10000 Switch postal code download to Austrian open-data portal · nuno-andre/python-stdnum@407a02f · GitHub
[go: up one dir, main page]

Skip to content

Commit 407a02f

Browse files
committed
Switch postal code download to Austrian open-data portal
This simplifies the process of downloading Austrian postal codes by downloading a JSON blob instead from https://www.data.gv.at/katalog/dataset/f76ed887-00d6-450f-a158-9f8b1cbbeebf This filters the list to only use addressable (adressierbar) postal codes because it matches the previous list. Thanks Bernd Schlapsi for providing the pointer. Closes arthurdejong#235
1 parent 53f13b4 commit 407a02f

File tree

2 files changed

+15
-64
lines changed

2 files changed

+15
-64
lines changed

stdnum/at/postleitzahl.dat

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
1-
# generated from PLZ_Verzeichnis-07012021.xls downloaded from
2-
# https://www.post.at/g/c/postlexikon
1+
# generated from https://data.rtr.at/api/v1/tables/plz.json
2+
# version 20232 published 2020-07-06T10:40:00+02:00
33
1010 location="Wien" region="Wien"
44
1020 location="Wien" region="Wien"
55
1030 location="Wien" region="Wien"

update/at_postleitzahl.py

Lines changed: 13 additions & 62 deletions
Original file line numberDiff line numberDiff line change
@@ -24,23 +24,12 @@
2424

2525
from __future__ import print_function, unicode_literals
2626

27-
import os
28-
import os.path
29-
30-
import lxml.html
3127
import requests
32-
import xlrd
33-
3428

35-
try:
36-
from urllib.parse import urljoin
37-
except ImportError:
38-
from urlparse import urljoin
3929

30+
# The URL of postal codes on the Austrian open-data portal in CSV format.
31+
download_url = 'https://data.rtr.at/api/v1/tables/plz.json'
4032

41-
# The page that contains a link to the downloadable spreadsheet with current
42-
# Austrian postal codes
43-
base_url = 'https://www.post.at/g/c/postlexikon'
4433

4534
# The list of regions that can be used in the document.
4635
regions = {
@@ -55,57 +44,19 @@
5544
'W': 'Wien',
5645
}
5746

58-
# The user agent that will be passed in requests
59-
user_agent = 'Mozilla/5.0 (compatible; python-stdnum updater; +https://arthurdejong.org/python-stdnum/)'
60-
61-
62-
# Custom headers that will be passed to requests
63-
headers = {
64-
'User-Agent': user_agent,
65-
}
66-
67-
68-
def find_download_url():
69-
"""Extract the spreadsheet URL from the Austrian Post website."""
70-
response = requests.get(base_url, headers=headers)
71-
response.raise_for_status()
72-
document = lxml.html.document_fromstring(response.content)
73-
url = [
74-
a.get('href')
75-
for a in document.findall('.//a[@href]')
76-
if 'Werben/PLZ_Verzeichnis' in a.get('href')][0]
77-
return urljoin(base_url, url.split('?')[0])
78-
79-
80-
def get_postal_codes(download_url):
81-
"""Download the Austrian postal codes spreadsheet."""
82-
response = requests.get(download_url, headers=headers)
83-
response.raise_for_status()
84-
workbook = xlrd.open_workbook(
85-
file_contents=response.content, logfile=open(os.devnull, 'w'))
86-
sheet = workbook.sheet_by_index(0)
87-
rows = sheet.get_rows()
88-
# the first row contains the column names
89-
columns = [column.value.lower() for column in next(rows)]
90-
# the other rows contain data
91-
for row in rows:
92-
data = dict(zip(
93-
columns,
94-
[column.value for column in row]))
95-
if data['adressierbar'].lower() == 'ja':
96-
yield (
97-
data['plz'],
98-
data['ort'],
99-
regions.get(data['bundesland']))
100-
10147

10248
if __name__ == '__main__':
103-
# download/parse the information
104-
download_url = find_download_url()
49+
response = requests.get(download_url)
50+
response.raise_for_status()
51+
data = response.json()
10552
# print header
106-
print('# generated from %s downloaded from' %
107-
os.path.basename(download_url))
108-
print('# %s' % base_url)
53+
print('# generated from %s' % download_url)
54+
print('# version %s published %s' % (
55+
data['version']['id'], data['version']['published']))
10956
# build an ordered list of postal codes
110-
for code, location, region in sorted(get_postal_codes(download_url)):
57+
results = []
58+
for row in data['data']:
59+
if row['adressierbar'] == 'Ja':
60+
results.append((str(row['plz']), row['ort'], regions[row['bundesland']]))
61+
for code, location, region in sorted(results):
11162
print('%s location="%s" region="%s"' % (code, location, region))

0 commit comments

Comments
 (0)
0