@@ -239,7 +239,7 @@ def is_codepoint_identifier_allowed(c, identifier_allowed):
239
239
return True
240
240
return False
241
241
242
- def load_rustc_mixedscript_confusables (f , identifier_allowed , scripts ):
242
+ def load_potential_mixedscript_confusables (f , identifier_allowed , scripts ):
243
243
# First, load all confusables data from confusables.txt
10000
244
244
confusables = load_confusables (f )
245
245
@@ -248,15 +248,6 @@ def load_rustc_mixedscript_confusables(f, identifier_allowed, scripts):
248
248
# seen as substitutes to itself. So if the confusables.txt says A -> C, B -> C,
249
249
# and implicitly C -> C, it means A <-> B, A <-> C, B <-> C are confusable.
250
250
251
- # here we first make a dict that contains all As and Bs whose corresponding C is single code point.
252
- seekup_map = {}
253
- for item in confusables :
254
- d_proto_list = item [1 ]
255
- d_source = item [0 ]
256
- assert (len (d_proto_list ) > 0 )
257
- if len (d_proto_list ) == 1 :
258
- seekup_map [escape_char (d_source )] = d_proto_list
259
-
260
251
# Here we're dividing all confusable lhs and rhs(prototype) operands of the substitution into equivalence classes.
261
252
# Principally we'll be using the rhs operands as the representive element of its equivalence classes.
262
253
# However some rhs operands are single code point, while some others are not.
@@ -275,9 +266,8 @@ def load_rustc_mixedscript_confusables(f, identifier_allowed, scripts):
275
266
if d_proto not in codepoint_map :
276
267
codepoint_map [d_proto ] = []
277
268
# when we create new equivalence class, we'll check whether the representative element should be collected.
278
- # i.e. if it is not subject to substituion, and not restricted from identifier usage,
279
- # we collect it into the equivalence class.
280
- if d_proto not in seekup_map and is_codepoint_identifier_allowed (d_proto_list [0 ], identifier_allowed ):
269
+ # i.e. if it is not restricted from identifier usage, we collect it into the equivalence class.
270
+ if is_codepoint_identifier_allowed (d_proto_list [0 ], identifier_allowed ):
281
271
codepoint_map [d_proto ].append (d_proto_list [0 ])
282
272
# we collect the original code point to be substituted into this list.
283
273
codepoint_map [d_proto ].append (d_source )
@@ -562,23 +552,20 @@ def emit_confusable_detection_module(f):
562
552
def escape_script_constant (name , longforms ):
563
553
return "Script::" + longforms [name ].strip ()
564
554
565
- def emit_rustc_mixed_script_confusable_detection (f ):
566
- f .write ("pub mod rustc_mixed_script_confusable_detection {" )
555
+ def emit_potiential_mixed_script_confusable (f ):
556
+ f .write ("pub mod potential_mixed_script_confusable {" )
567
557
f .write ("""
568
- use unicode_script::Script;
569
-
570
558
#[inline]
571
- pub fn is_rustc_mixed_script_confusable (c: char) -> Option<Script> {
559
+ pub fn potential_mixed_script_confusable (c: char) -> bool {
572
560
match c as usize {
573
- _ => super::util::bsearch_value_table (c, CONFUSABLES)
561
+ _ => super::util::bsearch_table (c, CONFUSABLES)
574
562
}
575
563
}
576
-
577
564
""" )
578
565
identifier_status_table = load_properties ("IdentifierStatus.txt" )
579
- longforms , scripts = load_scripts ("Scripts.txt" )
566
+ _ , scripts = load_scripts ("Scripts.txt" )
580
567
identifier_allowed = identifier_status_table ['Allowed' ]
581
- (mixedscript_confusable , mixedscript_confusable_unresolved ) = load_rustc_mixedscript_confusables ("confusables.txt" , identifier_allowed , scripts )
568
+ (mixedscript_confusable , mixedscript_confusable_unresolved ) = load_potential_mixedscript_confusables ("confusables.txt" , identifier_allowed , scripts )
582
569
debug = False
583
570
if debug == True :
584
571
debug_emit_mixedscript_confusable (f , mixedscript_confusable , "mixedscript_confusable" , scripts )
@@ -589,16 +576,21 @@ def emit_rustc_mixed_script_confusable_detection(f):
589
576
source = pair [0 ]
590
577
confusable_table .append ((source , script ))
591
578
confusable_table .sort (key = lambda w : w [0 ])
592
- emit_table (f , "CONFUSABLES" , confusable_table , "&'static [( char, Script) ]" , is_pub = False ,
593
- pfun = lambda x : "(%s,%s) " % ( escape_char (x [0 ]), escape_script_constant ( x [ 1 ], longforms ) ))
579
+ emit_table (f , "CONFUSABLES" , confusable_table , "&'static [char]" , is_pub = False ,
580
+ pfun = lambda x : "%s " % escape_char (x [0 ]))
594
581
f .write ("}\n \n " )
595
582
596
583
597
584
def emit_util_mod (f ):
598
585
f .write ("""
599
586
pub mod util {
600
587
use core::result::Result::{Ok, Err};
601
-
588
+
589
+ #[inline]
590
+ pub fn bsearch_table(c: char, r: &'static [char]) -> bool {
591
+ r.binary_search(&c).is_ok()
592
+ }
593
+
602
594
#[inline]
603
595
pub fn bsearch_value_table<T: Copy>(c: char, r: &'static [(char, T)]) -> Option<T> {
604
596
match r.binary_search_by_key(&c, |&(k, _)| k) {
@@ -609,7 +601,7 @@ def emit_util_mod(f):
609
601
Err(_) => None
610
602
}
611
603
}
612
-
604
+
613
605
#[inline]
614
606
pub fn bsearch_range_table(c: char, r: &'static [(char,char)]) -> bool {
615
607
use core::cmp::Ordering::{Equal, Less, Greater};
@@ -619,7 +611,7 @@ def emit_util_mod(f):
619
611
else { Greater }
620
612
}).is_ok()
621
613
}
622
-
614
+
623
615
pub fn bsearch_range_value_table<T: Copy>(c: char, r: &'static [(char, char, T)]) -> Option<T> {
624
616
use core::cmp::Ordering::{Equal, Less, Greater};
625
617
match r.binary_search_by(|&(lo, hi, _)| {
@@ -660,4 +652,4 @@ def emit_util_mod(f):
660
652
### confusable_detection module
661
653
emit_confusable_detection_module (rf )
662
654
### mixed_script_confusable_detection module
663
- emit_rustc_mixed_script_confusable_detection (rf )
655
+ emit_potiential_mixed_script_confusable (rf )
0 commit comments