8000 Switch to using openpyxl for parsing XLSX files · Daverball/python-stdnum@26fd25b · GitHub
[go: up one dir, main page]

Skip to content

Commit 26fd25b

Browse files
committed
Switch to using openpyxl for parsing XLSX files
The xlrd has dropped support for parsing XLSX files. We still use xlrd for update/be_banks.py because they use the classic XLS format and openpyxl does not support that format.
1 parent 9230604 commit 26fd25b

File tree

4 files changed

+19
-20
lines changed

4 files changed

+19
-20
lines changed

setup.cfg

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -47,5 +47,6 @@ lines_after_imports = 2
4747
multi_line_output = 4
4848
known_third_party =
4949
lxml
50+
openpyxl
5051
requests
5152
xlrd

update/cfi.py

Lines changed: 11 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22

33
# update/cfi.py - script to download CFI code list from the SIX group
44
#
5-
# Copyright (C) 2022 Arthur de Jong
5+
# Copyright (C) 2022-2024 Arthur de Jong
66
#
77
# This library is free software; you can redistribute it and/or
88
# modify it under the terms of the GNU Lesser General Public
@@ -21,11 +21,12 @@
2121

2222
"""This script downloads the list of CFI codes as published by the SIX group."""
2323

24+
import io
2425
import re
2526

2627
import lxml.html
28+
import openpyxl
2729
import requests
28-
import xlrd
2930

3031

3132
# the location of the Statistical Classification file
@@ -39,8 +40,8 @@ def normalise(value):
3940

4041
def get_categories(sheet):
4142
"""Get the list of top-level CFI categories."""
42-
for row in sheet.get_rows():
43-
if len(row[0].value) == 1 and row[1].value:
43+
for row in sheet.iter_rows():
44+
if row[0].value and len(row[0].value) == 1 and row[1].value:
4445
yield (row[0].value, row[1].value)
4546

4647

@@ -49,7 +50,7 @@ def get_attributes(sheet):
4950
attribute = None
5051
value_list = []
5152
values = None
52-
for row in sheet.get_rows():
53+
for row in sheet.iter_rows():
5354
if row[0].value and not row[1].value and row[2].value:
5455
attribute = normalise(row[2].value)
5556
values = []
@@ -84,15 +85,15 @@ def print_attributes(attributes, index=0):
8485
# Download and parse the spreadsheet
8586
response = requests.get(link_url, timeout=30)
8687
response.raise_for_status()
87-
workbook = xlrd.open_workbook(file_contents=response.content)
88+
workbook = openpyxl.load_workbook(io.BytesIO(response.content), read_only=True)
8889

8990
print('# generated from %s, downloaded from' % link_url.split('/')[-1])
9091
print('# %s' % download_url)
9192

92-
groups = sorted(x for x in workbook.sheet_names() if len(x) == 6 and x.endswith('XXXX'))
93-
for category, name in sorted(get_categories(workbook.sheet_by_name('Categories'))):
93+
groups = sorted(x for x in workbook.sheetnames if len(x) == 6 and x.endswith('XXXX'))
94+
for category, name in sorted(get_categories(workbook['Categories'])):
9495
print('%s category="%s"' % (category, name))
9596
for group in (x for x in groups if x.startswith(category)):
96-
sheet = workbook.sheet_by_name(group)
97-
print(' %s group="%s"' % (group[1], normalise(sheet.cell(0, 0).value)))
97+
sheet = workbook[group]
98+
print(' %s group="%s"' % (group[1], normalise(sheet.cell(1, 1).value)))
9899
print_attributes(get_attributes(sheet))

update/nz_banks.py

Lines changed: 6 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33

44
# update/nz_banks.py - script to download Bank list from Bank Branch Register
55
#
6-
# Copyright (C) 2019-2021 Arthur de Jong
6+
# Copyright (C) 2019-2024 Arthur de Jong
77
#
88
# This library is free software; you can redistribute it and/or
99
# modify it under the terms of the GNU Lesser General Public
@@ -23,16 +23,12 @@
2323
"""This script downloads the list of banks with bank codes as used in the
2424
New Zealand bank account numbers."""
2525

26+
import io
2627
import re
2728
from collections import OrderedDict, defaultdict
2829

30+
import openpyxl
2931
import requests
30-
import xlrd
31-
32-
33-
# Monkey patch xlrd avoiding bug in combination with Python 3.9
34-
xlrd.xlsx.ensure_elementtree_imported(False, None)
35-
xlrd.xlsx.Element_has_iter = True
3632

3733

3834
# The page that contains a link to the latest XLS version of the codes.
@@ -41,7 +37,7 @@
4137

4238
def get_values(sheet):
4339
"""Return rows from the worksheet as a dict per row."""
44-
rows = sheet.get_rows()
40+
rows = sheet.iter_rows()
4541
# the first row has column names
4642
columns = [column.value.lower().replace(' ', '_') for column in next(rows)]
4743
# go over rows with values
@@ -75,8 +71,8 @@ def branch_list(branches):
7571
response.raise_for_status()
7672
content_disposition = response.headers.get('content-disposition', '')
7773
filename = re.findall(r'filename=?(.+)"?', content_disposition)[0].strip('"')
78-
workbook = xlrd.open_workbook(file_contents=response.content)
79-
sheet = workbook.sheet_by_index(0)
74+
workbook = openpyxl.load_workbook(io.BytesIO(response.content), read_only=True)
75+
sheet = workbook.worksheets[0]
8076
# print header
8177
print('# generated from %s downloaded from' % filename)
8278
print('# %s' % download_url)

update/requirements.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
11
lxml
2+
openpyxl
23
requests
34
xlrd

0 commit comments

Comments
 (0)
0