1
+ //! For detecting the [restriction level](https://www.unicode.org/reports/tr39/#Restriction_Level_Detection)
2
+ //! a string conforms to
3
+
4
+ use crate :: mixed_script:: AugmentedScriptSet ;
5
+ use unicode_script:: { Script , ScriptExtension } ;
6
+ use crate :: GeneralSecurityProfile ;
7
+
8
+ #[ derive( Copy , Clone , PartialEq , PartialOrd , Eq , Ord , Debug , Hash ) ]
9
+ /// The [Restriction level](https://www.unicode.org/reports/tr39/#Restriction_Level_Detection)
10
+ /// a string conforms to
11
+ pub enum RestrictionLevel {
12
+ /// https://www.unicode.org/reports/tr39/#ascii_only
13
+ ASCIIOnly ,
14
+ /// https://www.unicode.org/reports/tr39/#single_script
15
+ SingleScript ,
16
+ /// https://www.unicode.org/reports/tr39/#highly_restrictive
17
+ HighlyRestrictive ,
18
+ /// https://www.unicode.org/reports/tr39/#moderately_restrictive
19
+ ModeratelyRestrictive ,
20
+ /// https://www.unicode.org/reports/tr39/#minimally_restrictive
21
+ MinimallyRestrictive ,
22
+ /// https://www.unicode.org/reports/tr39/#unrestricted
23
+ Unrestricted ,
24
+ }
25
+
26
+ /// Utilities for determining which [restriction level](https://www.unicode.org/reports/tr39/#Restriction_Level_Detection)
27
+ /// a string satisfies
28
+ pub trait RestrictionLevelDetection : Sized {
29
+ /// Detect the [restriction level](https://www.unicode.org/reports/tr39/#Restriction_Level_Detection)
30
+ ///
31
+ /// This will _not_ check identifier well-formedness, as different applications may have different notions of well-formedness
32
+ fn detect_restriction_level ( self ) -> RestrictionLevel ;
33
+
34
+
35
+ /// Check if a string satisfies the supplied [restriction level](https://www.unicode.org/reports/tr39/#Restriction_Level_Detection)
36
+ ///
37
+ /// This will _not_ check identifier well-formedness, as different applications may have different notions of well-formedness
38
+ fn check_restriction_level ( self , level : RestrictionLevel ) -> bool {
39
+ self . detect_restriction_level ( ) <= level
40
+ }
41
+ }
42
+
43
+ impl RestrictionLevelDetection for & ' _ str {
44
+ fn detect_restriction_level ( self ) -> RestrictionLevel {
45
+ let mut ascii_only = true ;
46
+ let mut set = AugmentedScriptSet :: default ( ) ;
47
+ let mut exclude_latin_set = AugmentedScriptSet :: default ( ) ;
48
+ for ch in self . chars ( ) {
49
+ if !GeneralSecurityProfile :: identifier_allowed ( ch) {
50
+ return RestrictionLevel :: Unrestricted ;
51
+ }
52
+ if ch. is_ascii ( ) {
53
+ ascii_only = false ;
54
+ }
55
+ let ch_set = ch. into ( ) ;
56
+ set. intersect_with ( ch_set) ;
57
+ if !ch_set. base . contains_script ( Script :: Latin ) {
58
+ exclude_latin_set. intersect_with ( ch_set) ;
59
+ }
60
+ }
61
+
62
+ if ascii_only {
63
+ return RestrictionLevel :: ASCIIOnly ;
64
+ } else if !set. is_empty ( ) {
65
+ return RestrictionLevel :: SingleScript ;
66
+ } else if exclude_latin_set. kore || exclude_latin_set. hanb || exclude_latin_set. jpan {
67
+ return RestrictionLevel :: HighlyRestrictive ;
68
+ } else if let ScriptExtension :: Single ( script) = exclude_latin_set. base {
69
+ if script. is_recommended ( ) && script != Script :: Cyrillic && script != Script :: Greek {
70
+ return RestrictionLevel :: ModeratelyRestrictive ;
71
+ }
72
+ }
73
+ return RestrictionLevel :: MinimallyRestrictive ;
74
+ }
75
+ }
0 commit comments