|
24 | 24 |
|
25 | 25 | from __future__ import print_function, unicode_literals
|
26 | 26 |
|
27 |
| -import os |
28 |
| -import os.path |
29 |
| - |
30 |
| -import lxml.html |
31 | 27 | import requests
|
32 |
| -import xlrd |
33 |
| - |
34 | 28 |
|
35 |
| -try: |
36 |
| - from urllib.parse import urljoin |
37 |
| -except ImportError: |
38 |
| - from urlparse import urljoin |
39 | 29 |
|
| 30 | +# The URL of postal codes on the Austrian open-data portal in CSV format. |
| 31 | +download_url = 'https://data.rtr.at/api/v1/tables/plz.json' |
40 | 32 |
|
41 |
| -# The page that contains a link to the downloadable spreadsheet with current |
42 |
| -# Austrian postal codes |
43 |
| -base_url = 'https://www.post.at/g/c/postlexikon' |
44 | 33 |
|
45 | 34 | # The list of regions that can be used in the document.
|
46 | 35 | regions = {
|
|
55 | 44 | 'W': 'Wien',
|
56 | 45 | }
|
57 | 46 |
|
58 |
| -# The user agent that will be passed in requests |
59 |
| -user_agent = 'Mozilla/5.0 (compatible; python-stdnum updater; +https://arthurdejong.org/python-stdnum/)' |
60 |
| - |
61 |
| - |
62 |
| -# Custom headers that will be passed to requests |
63 |
| -headers = { |
64 |
| - 'User-Agent': user_agent, |
65 |
| -} |
66 |
| - |
67 |
| - |
68 |
| -def find_download_url(): |
69 |
| - """Extract the spreadsheet URL from the Austrian Post website.""" |
70 |
| - response = requests.get(base_url, headers=headers) |
71 |
| - response.raise_for_status() |
72 |
| - document = lxml.html.document_fromstring(response.content) |
73 |
| - url = [ |
74 |
| - a.get('href') |
75 |
| - for a in document.findall('.//a[@href]') |
76 |
| - if 'Werben/PLZ_Verzeichnis' in a.get('href')][0] |
77 |
| - return urljoin(base_url, url.split('?')[0]) |
78 |
| - |
79 |
| - |
80 |
| -def get_postal_codes(download_url): |
81 |
| - """Download the Austrian postal codes spreadsheet.""" |
82 |
| - response = requests.get(download_url, headers=headers) |
83 |
| - response.raise_for_status() |
84 |
| - workbook = xlrd.open_workbook( |
85 |
| - file_contents=response.content, logfile=open(os.devnull, 'w')) |
86 |
| - sheet = workbook.sheet_by_index(0) |
87 |
| - rows = sheet.get_rows() |
88 |
| - # the first row contains the column names |
89 |
| - columns = [column.value.lower() for column in next(rows)] |
90 |
| - # the other rows contain data |
91 |
| - for row in rows: |
92 |
| - data = dict(zip( |
93 |
| - columns, |
94 |
| - [column.value for column in row])) |
95 |
| - if data['adressierbar'].lower() == 'ja': |
96 |
| - yield ( |
97 |
| - data['plz'], |
98 |
| - data['ort'], |
99 |
| - regions.get(data['bundesland'])) |
100 |
| - |
101 | 47 |
|
102 | 48 | if __name__ == '__main__':
|
103 |
| - # download/parse the information |
104 |
| - download_url = find_download_url() |
| 49 | + response = requests.get(download_url) |
| 50 | + response.raise_for_status() |
| 51 | + data = response.json() |
105 | 52 | # print header
|
106 |
| - print('# generated from %s downloaded from' % |
107 |
| - os.path.basename(download_url)) |
108 |
| - print('# %s' % base_url) |
| 53 | + print('# generated from %s' % download_url) |
| 54 | + print('# version %s published %s' % ( |
| 55 | + data['version']['id'], data['version']['published'])) |
109 | 56 | # build an ordered list of postal codes
|
110 |
| - for code, location, region in sorted(get_postal_codes(download_url)): |
| 57 | + results = [] |
| 58 | + for row in data['data']: |
| 59 | + if row['adressierbar'] == 'Ja': |
| 60 | + results.append((str(row['plz']), row['ort'], regions[row['bundesland']])) |
| 61 | + for code, location, region in sorted(results): |
111 | 62 | print('%s location="%s" region="%s"' % (code, location, region))
|
0 commit comments