2
2
3
3
# update/cfi.py - script to download CFI code list from the SIX group
4
4
#
5
- # Copyright (C) 2022 Arthur de Jong
5
+ # Copyright (C) 2022-2024 Arthur de Jong
6
6
#
7
7
# This library is free software; you can redistribute it and/or
8
8
# modify it under the terms of the GNU Lesser General Public
21
21
22
22
"""This script downloads the list of CFI codes as published by the SIX group."""
23
23
24
+ import io
24
25
import re
25
26
26
27
import lxml .html
28
+ import openpyxl
27
29
import requests
28
- import xlrd
29
30
30
31
31
32
# the location of the Statistical Classification file
@@ -39,8 +40,8 @@ def normalise(value):
39
40
40
41
def get_categories (sheet ):
41
42
"""Get the list of top-level CFI categories."""
42
- for row in sheet .get_rows ():
43
- if len (row [0 ].value ) == 1 and row [1 ].value :
43
+ for row in sheet .iter_rows ():
44
+ if row [ 0 ]. value and len (row [0 ].value ) == 1 and row [1 ].value :
44
45
yield (row [0 ].value , row [1 ].value )
45
46
46
47
@@ -49,7 +50,7 @@ def get_attributes(sheet):
49
50
attribute = None
50
51
value_list = []
51
52
values = None
52
- for row in sheet .get_rows ():
53
+ for row in sheet .iter_rows ():
53
54
if row [0 ].value and not row [1 ].value and row [2 ].value :
54
55
attribute = normalise (row [2 ].value )
55
56
values = []
@@ -84,15 +85,15 @@ def print_attributes(attributes, index=0):
84
85
# Download and parse the spreadsheet
85
86
response = requests .get (link_url , timeout = 30 )
86
87
response .raise_for_status ()
87
- workbook = xlrd . open_workbook ( file_contents = response .content )
88
+ workbook = openpyxl . load_workbook ( io . BytesIO ( response .content ), read_only = True )
88
89
89
90
print ('# generated from %s, downloaded from' % link_url .split ('/' )[- 1 ])
90
91
print ('# %s' % download_url )
91
92
92
- groups = sorted (x for x in workbook .sheet_names () if len (x ) == 6 and x .endswith ('XXXX' ))
93
- for category , name in sorted (get_categories (workbook . sheet_by_name ( 'Categories' ) )):
93
+ groups = sorted (x for x in workbook .sheetnames if len (x ) == 6 and x .endswith ('XXXX' ))
94
+ for category , name in sorted (get_categories (workbook [ 'Categories' ] )):
94
95
print ('%s category="%s"' % (category , name ))
95
96
for group in (x for x in groups if x .startswith (category )):
96
- sheet = workbook . sheet_by_name ( group )
97
- print (' %s group="%s"' % (group [1 ], normalise (sheet .cell (0 , 0 ).value )))
97
+ sheet = workbook [ group ]
98
+ print (' %s group="%s"' % (group [1 ], normalise (sheet .cell (1 , 1 ).value )))
98
99
print_attributes (get_attributes (sheet ))
0 commit comments