8000 unicode-script: Add default ignorable code point detection module · unicode-rs/unicode-security@4594fbc · GitHub
[go: up one dir, main page]

Skip to content

Commit 4594fbc

Browse files
committed
unicode-script: Add default ignorable code point detection module
1 parent 526c6f2 commit 4594fbc

File tree

1 file changed

+23
-0
lines changed

1 file changed

+23
-0
lines changed

scripts/unicode.py

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@
1717
# - confusables.txt
1818
# - ReadMe.txt
1919
# This script also uses the following Unicode UCD data:
20+
# - DerivedCoreProperties.txt
2021
# - Scripts.txt
2122
#
2223
# Since this should not require frequent updates, we just store this
@@ -526,6 +527,26 @@ def emit_identifier_module(f):
526527
pfun=lambda x: "(%s,%s, IdentifierType::%s)" % (escape_char(x[0]), escape_char(x[1]), x[2]))
527528
f.write("}\n\n")
528529

530+
def emit_default_ignorable_detection_module(f):
531+
f.write("pub mod default_ignorable_code_point {")
532+
f.write("""
533+
534+
#[inline]
535+
pub fn default_ignorable_code_point(c: char) -> bool {
536+
match c as usize {
537+
_ => super::util::bsearch_range_table(c, DEFAULT_IGNORABLE)
538+
}
539+
}
540+
541+
""")
542+
543+
f.write(" // Default ignorable code point table:\n")
544+
default_ignorable_table = load_properties(fetch_unidata("DerivedCoreProperties.txt"), ["Default_Ignorable_Code_Point"])
545+
emit_table(f, "DEFAULT_IGNORABLE", default_ignorable_table["Default_Ignorable_Code_Point"], "&'static [(char, char)]", is_pub=False,
546+
pfun=lambda x: "(%s,%s)" % (escape_char(x[0]), escape_char(x[1])))
547+
548+
f.write("}\n\n")
549+
529550
def emit_confusable_detection_module(f):
530551
f.write("pub mod confusable_detection {")
531552
f.write("""
@@ -654,6 +675,8 @@ def emit_util_mod(f):
654675
emit_util_mod(rf)
655676
### identifier module
656677
emit_identifier_module(rf)
678+
### default_ignorable_detection module
679+
emit_default_ignorable_detection_module(rf)
657680
### confusable_detection module
658681
emit_confusable_detection_module(rf)
659682
### mixed_script_confusable_detection module

0 commit comments

Comments
 (0)
0