8000 Merge pull request #86 from glandium/tables · unicode-rs/unicode-normalization@e9dc93a · GitHub
[go: up one dir, main page]

Skip to content

Commit e9dc93a

Browse files
authored
Merge pull request #86 from glandium/tables
Avoid slices in entries of decomposition tables
2 parents 68f2f55 + 7c265f8 commit e9dc93a

File tree

3 files changed

+17803
-6760
lines changed

3 files changed

+17803
-6760
lines changed

scripts/unicode.py

Lines changed: 13 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -391,9 +391,19 @@ def gen_composition_table(canon_comp, out):
391391
def gen_decomposition_tables(canon_decomp, compat_decomp, cjk_compat_variants_decomp, out):
392392
tables = [(canon_decomp, 'canonical'), (compat_decomp, 'compatibility'), (cjk_compat_variants_decomp, 'cjk_compat_variants')]
393393
for table, name in tables:
394-
gen_mph_data(name + '_decomposed', table, "(u32, &'static [char])",
395-
lambda k: "(0x{:x}, &[{}])".format(k,
396-
", ".join("'\\u{%s}'" % hexify(c) for c in table[k])))
394+
offsets = {}
395+
offset = 0
396+
out.write("pub(crate) const %s_DECOMPOSED_CHARS: &[char] = &[\n" % name.upper())
397+
for k, v in table.items():
398+
offsets[k] = offset
399+
offset += len(v)
400+
for c in v:
401+
out.write(" '\\u{%s}',\n" % hexify(c))
402+
# The largest offset must fit in a u16.
403+
assert offset < 65536
404+
out.write("];\n")
405+
gen_mph_data(name + '_decomposed', table, "(u32, (u16, u16))",
406+
lambda k: "(0x{:x}, ({}, {}))".format(k, offsets[k], len(table[k])))
397407

398408
def gen_qc_match(prop_table, out):
399409
out.write(" match c {\n")

src/lookups.rs

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -51,6 +51,7 @@ pub(crate) fn canonical_fully_decomposed(c: char) -> Option<&'static [char]> {
5151
pair_lookup_fv_opt,
5252
None,
5353
)
54+
.map(|(start, len)| &CANONICAL_DECOMPOSED_CHARS[start as usize..][..len as usize])
5455
}
5556

5657
pub(crate) fn compatibility_fully_decomposed(c: char) -> Option<&'static [char]> {
@@ -62,6 +63,7 @@ pub(crate) fn compatibility_fully_decomposed(c: char) -> Option<&'static [char]>
6263
pair_lookup_fv_opt,
6364
None,
6465
)
66+
.map(|(start, len)| &COMPATIBILITY_DECOMPOSED_CHARS[start as usize..][..len as usize])
6567
}
6668

6769
pub(crate) fn cjk_compat_variants_fully_decomposed(c: char) -> Option<&'static [char]> {
@@ -73,6 +75,7 @@ pub(crate) fn cjk_compat_variants_fully_decomposed(c: char) -> Option<&'static [
7375
pair_lookup_fv_opt,
7476
None,
7577
)
78+
.map(|(start, len)| &CJK_COMPAT_VARIANTS_DECOMPOSED_CHARS[start as usize..][..len as usize])
7679
}
7780

7881
/// Return whether the given character is a combining mark (`General_Category=Mark`)

0 commit comments

Comments
 (0)
0