@@ -53,6 +53,8 @@ def fetch(f):
53
53
sys .stderr .write ("cannot load %s\n " % f )
54
54
exit (1 )
55
55
56
+ return f
57
+
56
58
# Download a UCD table file
57
59
def fetch_unidata (f ):
58
60
if not os .path .exists (os .path .basename (f )):
@@ -63,14 +65,14 @@ def fetch_unidata(f):
63
65
sys .stderr .write ("cannot load %s" % f )
64
66
exit (1 )
65
67
66
- # Loads code point data from IdentifierStatus.txt and
67
- # IdentifierType.txt
68
- # Implementation from unicode-segmentation
68
+ return f
69
+
70
+ # Loads code point data from provided filename f
71
+ # Implementation adapted from unicode-segmentation
69
72
def load_properties (f , interestingprops = None ):
70
- fetch (f )
71
73
props = {}
72
- re1 = re .compile (r"^ *([0-9A-F]+) *; *(\w+) " )
73
- re2 = re .compile (r"^ *([0-9A-F]+)\.\.([0-9A-F]+) *; *(\w+) " )
74
+ re1 = re .compile (r"^ *([0-9A-F]+) *; *([^#\s]+) *# " )
75
+ re2 = re .compile (r"^ *([0-9A-F]+)\.\.([0-9A-F]+) *; *([^#\s]+) *# " )
74
76
75
77
for line in fileinput .input (os .path .basename (f ), openhook = fileinput .hook_encoded ("utf-8" )):
76
78
prop = None
@@ -99,42 +101,6 @@ def load_properties(f, interestingprops = None):
99
101
100
102
return props
101
103
102
- # Loads script data from Scripts.txt
103
- def load_script_properties (f , interestingprops ):
104
- fetch_unidata (f )
105
- props = {}
106
- # Note: these regexes are different from those in unicode-segmentation,
107
- # becase we need to handle spaces here
108
- re1 = re .compile (r"^ *([0-9A-F]+) *; *([^#]+) *#" )
109
- re2 = re .compile (r"^ *([0-9A-F]+)\.\.([0-9A-F]+) *; *([^#]+) *#" )
110
-
111
- for line in fileinput .input (os .path .basename (f )):
112
- prop = None
113
- d_lo = 0
114
- d_hi = 0
115
- m = re1 .match (line )
116
- if m :
117
- d_lo = m .group (1 )
118
- d_hi = m .group (1 )
119
- prop = m .group (2 ).strip ()
120
- else :
121
- m = re2 .match (line )
122
- if m :
123
- d_lo = m .group (1 )
124
- d_hi = m .group (2 )
125
- prop = m .group (3 ).strip ()
126
- else :
127
- continue
128
- if interestingprops and prop not in interestingprops :
129
- continue
130
- d_lo = int (d_lo , 16 )
131
- d_hi = int (d_hi , 16 )
132
- if prop not in props :
133
- props [prop ] = []
134
- props [prop ].append ((d_lo , d_hi ))
135
-
136
- return props
137
-
138
104
# Loads confusables data from confusables.txt
139
105
def load_confusables (f ):
140
106
fetch (f )
@@ -189,7 +155,7 @@ def load_scripts(f):
189
155
# changes are introduced, update accordingly.
190
156
191
157
(longforms , shortforms ) = aliases ()
192
- scripts = load_script_properties ( f , [])
158
+ scripts = load_properties ( fetch_unidata ( f ) , [])
193
159
194
160
script_table = []
195
161
script_list = []
@@ -546,10 +512,10 @@ def emit_identifier_module(f):
546
512
""" )
547
513
548
514
f .write (" // Identifier status table:\n " )
549
- identifier_status_table = load_properties ("IdentifierStatus.txt" )
515
+ identifier_status_table = load_properties (fetch ( "IdentifierStatus.txt" ) )
550
516
emit_table (f , "IDENTIFIER_STATUS" , identifier_status_table ['Allowed' ], "&'static [(char, char)]" , is_pub = False ,
551
517
pfun = lambda x : "(%s,%s)" % (escape_char (x [0 ]), escape_char (x [1 ])))
552
- identifier_type = load_properties ("IdentifierType.txt" )
518
+ identifier_type = load_properties (fetch ( "IdentifierType.txt" ) )
553
519
type_table = []
554
520
for ty in identifier_type :
555
521
type_table .extend ([(x , y , ty ) for (x , y ) in identifier_type [ty ]])
@@ -601,7 +567,7 @@ def emit_potiential_mixed_script_confusable(f):
601
567
}
602
568
}
603
569
""" )
604
- identifier_status_table = load_properties ("IdentifierStatus.txt" )
570
+ identifier_status_table = load_properties (fetch ( "IdentifierStatus.txt" ) )
605
571
_ , scripts = load_scripts ("Scripts.txt" )
606
572
identifier_allowed = identifier_status_table ['Allowed' ]
607
573
(mixedscript_confusable , mixedscript_confusable_unresolved ) = load_potential_mixedscript_confusables ("confusables.txt" , identifier_allowed , scripts )
0 commit comments