8000 Update to unicode 12.1 · unicode-rs/unicode-width@eec14d2 · GitHub
[go: up one dir, main page]

Skip to content

Commit eec14d2

Browse files
committed
Update to unicode 12.1
This updates the tables.rs file to be compatible with the latest available unicode standard, allowing for new glyphs like the yawning face to be recognized properly. The unicode.py script has also been updated to python3, since that should be supported on more systems.
1 parent ca01b2c commit eec14d2

File tree

3 files changed

+213
-193
lines changed

3 files changed

+213
-193
lines changed

scripts/unicode.py

Lines changed: 15 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
#!/usr/bin/env python
1+
#!/usr/bin/env python3
22
#
33
# Copyright 2011-2015 The Rust Project Developers. See the COPYRIGHT
44
# file at the top-level directory of this distribution and at
@@ -68,28 +68,28 @@ def load_unicode_data(f):
6868
fetch(f)
6969
gencats = {}
7070

71-
udict = {};
72-
range_start = -1;
71+
udict = {}
72+
range_start = -1
7373
for line in fileinput.input(f):
74-
data = line.split(';');
74+
data = line.split(';')
7575
if len(data) != 15:
7676
continue
77-
cp = int(data[0], 16);
77+
cp = int(data[0], 16)
7878
if is_surrogate(cp):
7979
continue
8080
if range_start >= 0:
81-
for i in xrange(range_start, cp):
82-
udict[i] = data;
83-
range_start = -1;
81+
for i in range(range_start, cp):
82+
udict[i] = data
83+
range_start = -1
8484
if data[1].endswith(", First>"):
85-
range_start = cp;
86-
continue;
87-
udict[cp] = data;
85+
range_start = cp
86+
continue
87+
udict[cp] = data
8888

8989
for code in udict:
9090
[code_org, name, gencat, combine, bidi,
9191
decomp, deci, digit, num, mirror,
92-
old, iso, upcase, lowcase, titlecase ] = udict[code];
92+
old, iso, upcase, lowcase, titlecase ] = udict[code]
9393

9494
# place letter in categories as appropriate
9595
for cat in [gencat, "Assigned"] + expanded_categories.get(gencat, []):
@@ -300,15 +300,15 @@ def optimize_width_table(wtable):
300300
### character width module
301301
width_table = []
302302
for zwcat in ["Me", "Mn", "Cf"]:
303-
width_table.extend(map(lambda (lo, hi): (lo, hi, 0, 0), gencats[zwcat]))
303+
width_table.extend([(lo_hi[0], lo_hi[1], 0, 0) for lo_hi in gencats[zwcat]])
304304
width_table.append((4448, 4607, 0, 0))
305305

306306
# get widths, except those that are explicitly marked zero-width above
307307
ea_widths = load_east_asian_width(["W", "F", "A"], ["Me", "Mn", "Cf"])
308308
# these are doublewidth
309309
for dwcat in ["W", "F"]:
310-
width_table.extend(map(lambda (lo, hi): (lo, hi, 2, 2), ea_widths[dwcat]))
311-
width_table.extend(map(lambda (lo, hi): (lo, hi, 1, 2), ea_widths["A"]))
310+
width_table.extend([(lo_hi1[0], lo_hi1[1], 2, 2) for lo_hi1 in ea_widths[dwcat]])
311+
width_table.extend([(lo_hi2[0], lo_hi2[1], 1, 2) for lo_hi2 in ea_widths["A"]])
312312

313313
width_table.sort(key=lambda w: w[0])
314314

0 commit comments

Comments
 (0)
0