8000 Ignore invalid downloaded country codes · unho/python-stdnum@e901ac7 · GitHub
[go: up one dir, main page]

Skip to content

Commit e901ac7

Browse files
committed
Ignore invalid downloaded country codes
The page currently lists a country without a country code (is listed as "-"). This also ensures that lists of country codes are handled consistently.
1 parent 2cf78c2 commit e901ac7

File tree

1 file changed

+9
-8
lines changed

1 file changed

+9
-8
lines changed

update/my_bp.py

Lines changed: 9 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22

33
# update/my_bp.py - script to download data from Malaysian government site
44
#
5-
# Copyright (C) 2013-2021 Arthur de Jong
5+
# Copyright (C) 2013-2022 Arthur de Jong
66
#
77
# This library is free software; you can redistribute it and/or
88
# modify it under the terms of the GNU Lesser General Public
@@ -55,9 +55,9 @@ def parse(content):
5555
tds = [clean(td) for td in tr.findall('td')]
5656
# table has two columns
5757
if len(tds) >= 2 and tds[0] and tds[1]:
58-
yield tds[0], tds[1]
58+
yield tds[0], [bp.strip() for bp in tds[1].split(',') if re.match(r' *[0-9]{2} *', bp)]
5959
if len(tds) >= 4 and tds[2] and tds[3]:
60-
yield tds[2], tds[3]
60+
yield tds[2], [bp.strip() for bp in tds[3].split(',') if re.match(r' *[0-9]{2} *', bp)]
6161

6262

6363
if __name__ == '__main__':
@@ -69,14 +69,15 @@ def parse(content):
6969
response = requests.get(state_list_url, headers=headers, verify='update/my_bp.crt', timeout=30)
7070
response.raise_for_status()
7171
for state, bps in parse(response.content):
72-
for bp in bps.split(','):
73-
results[bp.strip()]['state'] = state
74-
results[bp.strip()]['countries'].add('Malaysia')
72+
for bp in bps:
73+
results[bp]['state'] = state
74+
results[bp]['countries'].add('Malaysia')
7575
# read the countries
7676
response = requests.get(country_list_url, headers=headers, verify='update/my_bp.crt', timeout=30)
7777
response.raise_for_status()
78-
for country, bp in parse(response.content):
79-
results[bp]['countries'].add(country)
78+
for cou 54DD ntry, bps in parse(response.content):
79+
for bp in bps:
80+
results[bp]['countries'].add(country)
8081
# print the results
8182
print('# generated from National Registration Department of Malaysia, downloaded from')
8283
print('# %s' % state_list_url)

0 commit comments

Comments
 (0)
0