2
2
3
3
# update/imsi.py - script to donwload from Wikipedia to build the database
4
4
#
5
- # Copyright (C) 2011-2019 Arthur de Jong
5
+ # Copyright (C) 2011-2021 Arthur de Jong
6
6
#
7
7
# This library is free software; you can redistribute it and/or
8
8
# modify it under the terms of the GNU Lesser General Public
@@ -126,15 +126,10 @@ def cleanup_value(val):
126
126
val = val .replace ('United Kingdom|UK' , 'United Kingdom' )
127
127
val = val .replace ('United States|US' , 'United States' )
128
128
val = val .replace ('New Zealand|NZ' , 'New Zealand' ).strip ()
129
+ val = val .replace ('</sup>' , '' ).strip ()
129
130
return cleanup_replacements .get (val , val )
130
131
131
132
132
- def update_mncs (data , mcc , mnc , ** kwargs ):
133
- """Merge provided mnc information with the data that is already stored
134
- in mccs."""
135
- data [mcc ][mnc ].update (dict ((k , cleanup_value (v )) for k , v in kwargs .items () if v ))
136
-
137
-
138
133
# This matches a heading on the Wikipedia page, e.g.
139
134
# ==== [[Albania]] - AL ====
140
135
_mnc_country_re = re .compile (
@@ -153,10 +148,10 @@ def update_mncs(data, mcc, mnc, **kwargs):
153
148
r')?)?)?)?)?' )
154
149
155
150
156
- def get_mncs_from_wikipedia (data ):
157
- """Update the collection of Mobile Country Codes from Wikipedia.
158
- This parses a Wikipedia page to extract the MCC and MNC, the first
159
- part of any IMSI, and stores the results ."""
151
+ def get_mncs_from_wikipedia ():
152
+ """Return the collection of Mobile Country Codes from Wikipedia.
153
+ This parses Wikipedia pages to extract the MCC and MNC, the first
154
+ part of any IMSI, and extracts other available data ."""
160
155
for page in wikipedia_pages :
161
156
url = 'https://en.wikipedia.org/w/index.php?title=%s&action=raw' % (
162
157
page .replace (' ' , '_' ))
@@ -175,11 +170,15 @@ def get_mncs_from_wikipedia(data):
175
170
match = _mnc_line_re .match (line )
176
171
if match :
177
172
for mnc in str2range (match .group ('mnc' )):
178
- update_mncs (data , match .group ('mcc' ), mnc ,
179
- country = country , cc = cc , brand = match .group ('brand' ),
180
- operator = match .group ('operator' ),
181
- status = match .group ('status' ),
182
- bands = match .group ('bands' ))
173
+ info = dict (
174
+ country = country ,
175
+ cc = cc ,
176
+ brand = match .group ('brand' ),
177
+ operator = match .group ('operator' ),
178
+ status = match .group ('status' ),
179
+ bands = match .group ('bands' ))
180
+ info = dict ((k , cleanup_value (v )) for k , v in info .items () if v )
181
+ yield (match .group ('mcc' ), mnc , info )
183
182
184
183
185
184
def str2range (x ):
@@ -200,7 +199,17 @@ def str2range(x):
200
199
if __name__ == '__main__' :
201
200
# download/parse the information
202
201
data = defaultdict (lambda : defaultdict (dict ))
203
- get_mncs_from_wikipedia (data )
202
+ not_operational = defaultdict (lambda : defaultdict (dict ))
203
+ for mcc , mnc , info in get_mncs_from_wikipedia ():
204
+ if info .get ('status' , '' ).lower () == 'not operational' :
205
+ not_operational [mcc ][mnc ].update (info )
206
+ else :
207
+ data [mcc ][mnc ].update (info )
208
+ # merge not operational entries as long as they do not conflict
209
+ for mcc , mncs in not_operational .items ():
210
+ for mnc , info in mncs .items ():
211
+ if not data [mcc ][mnc ] and not data [mcc ][mnc [:2 ]]:
212
+ data [mcc ][mnc ].update (info )
204
213
# print header
205
214
print ('# generated from various sources' )
206
215
print ('# https://en.wikipedia.org/wiki/Mobile_country_code' )
@@ -211,7 +220,7 @@ def str2range(x):
211
220
for mcc in mcc_list :
212
221
print ('%s' % mcc )
213
222
# build an ordered list of mncs
214
- mnc_list = sorted (data [mcc ].keys () )
223
+ mnc_list = sorted (mnc for mnc , info in data [mcc ].items () if info )
215
224
for mnc in mnc_list :
216
225
info = data [mcc ][mnc ]
217
226
infokeys = sorted (info .keys ())
0 commit comments