8000 Stop non-operational MNCs from confusing IMSI dataset · cedk/python-stdnum@b7901d6 · GitHub
[go: up one dir, main page]

Skip to content
< 8000 script crossorigin="anonymous" defer="defer" type="application/javascript" src="https://github.githubassets.com/assets/vendors-node_modules_github_remote-form_dist_index_js-node_modules_delegated-events_dist_inde-94fd67-e789af5a4655.js">

Commit b7901d6

Browse files
committed
Stop non-operational MNCs from confusing IMSI dataset
This only includes data from non-operational (status "Not operational" according to Wikipedia) Mobile Network Code operators in the generated data file if they would not confuse the lookup of operational numbers. This avoid problems when the "030" to "039" non-operational ranges conflicting with the "03" operational range. This ensures that only the "03" value is kept. For historical completeness we keep the other non-operational values. Closes arthurdejong#257
1 parent 7e69090 commit b7901d6

File tree

1 file changed

+27
-18
lines changed

1 file changed

+27
-18
lines changed

update/imsi.py

Lines changed: 27 additions & 18 deletions
205
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22

33
# update/imsi.py - script to donwload from Wikipedia to build the database
44
#
5-
# Copyright (C) 2011-2019 Arthur de Jong
5+
# Copyright (C) 2011-2021 Arthur de Jong
66
#
77
# This library is free software; you can redistribute it and/or
88
# modify it under the terms of the GNU Lesser General Public
@@ -126,15 +126,10 @@ def cleanup_value(val):
126126
val = val.replace('United Kingdom|UK', 'United Kingdom')
127127
val = val.replace('United States|US', 'United States')
128128
val = val.replace('New Zealand|NZ', 'New Zealand').strip()
129+
val = val.replace('</sup>', '').strip()
129130
return cleanup_replacements.get(val, val)
130131

131132

132-
def update_mncs(data, mcc, mnc, **kwargs):
133-
"""Merge provided mnc information with the data that is already stored
134-
in mccs."""
135-
data[mcc][mnc].update(dict((k, cleanup_value(v)) for k, v in kwargs.items() if v))
136-
137-
138133
# This matches a heading on the Wikipedia page, e.g.
139134
# ==== [[Albania]] - AL ====
140135
_mnc_country_re = re.compile(
@@ -153,10 +148,10 @@ def update_mncs(data, mcc, mnc, **kwargs):
153148
r')?)?)?)?)?')
154149

155150

156-
def get_mncs_from_wikipedia(data):
157-
"""Update the collection of Mobile Country Codes from Wikipedia.
158-
This parses a Wikipedia page to extract the MCC and MNC, the first
159-
part of any IMSI, and stores the results."""
151+
def get_mncs_from_wikipedia():
152+
"""Return the collection of Mobile Country Codes from Wikipedia.
153+
This parses Wikipedia pages to extract the MCC and MNC, the first
154+
part of any IMSI, and extracts other available data."""
160155
for page in wikipedia_pages:
161156
url = 'https://en.wikipedia.org/w/index.php?title=%s&action=raw' % (
162157
page.replace(' ', '_'))
@@ -175,11 +170,15 @@ def get_mncs_from_wikipedia(data):
175170
match = _mnc_line_re.match(line)
176171
if match:
177172
for mnc in str2range(match.group('mnc')):
178-
update_mncs(data, match.group('mcc'), mnc,
179-
country=country, cc=cc, brand=match.group('brand'),
180-
operator=match.group('operator'),
181-
status=match.group('status'),
182-
bands=match.group('bands'))
173+
info = dict(
174+
country=country,
175+
cc=cc,
176+
brand=match.group('brand'),
177+
operator=match.group('operator'),
178+
status=match.group('status'),
179+
bands=match.group('bands'))
180+
info = dict((k, cleanup_value(v)) for k, v in info.items() if v)
181+
yield (match.group('mcc'), mnc, info)
183182

184183

185184
def str2range(x):
@@ -200,7 +199,17 @@ def str2range(x):
200199
if __name__ == '__main__':
201200
# download/parse the information
202201
data = defaultdict(lambda: defaultdict(dict))
203-
get_mncs_from_wikipedia(data)
202+
not_operational = defaultdict(lambda: defaultdict(dict))
203+
for mcc, mnc, info in get_mncs_from_wikipedia():
204+
if info.get('status', '').lower() == 'not operational':
+
not_operational[mcc][mnc].update(info)
206+
else:
207+
data[mcc][mnc].update(info)
208+
# merge not operational entries as long as they do not conflict
209+
for mcc, mncs in not_operational.items():
210+
for mnc, info in mncs.items():
211+
if not data[mcc][mnc] and not data[mcc][mnc[:2]]:
212+
data[mcc][mnc].update(info)
204213
# print header
205214
print('# generated from various sources')
206215
print('# https://en.wikipedia.org/wiki/Mobile_country_code')
@@ -211,7 +220,7 @@ def str2range(x):
211220
for mcc in mcc_list:
212221
print('%s' % mcc)
213222
# build an ordered list of mncs
214-
mnc_list = sorted(data[mcc].keys())
223+
mnc_list = sorted(mnc for mnc, info in data[mcc].items() if info)
215224
for mnc in mnc_list:
216225
info = data[mcc][mnc]
217226
infokeys = sorted(info.keys())

0 commit comments

Comments
 (0)
0