@@ -5,6 +5,7 @@ use unicode_script::{Script, ScriptExtension};
5
5
/// An Augmented script set, as defined by UTS 39
6
6
///
7
7
/// https://www.unicode.org/reports/tr39/#def-augmented-script-set
8
+ #[ derive( Copy , Clone , PartialEq , Debug , Hash ) ]
8
9
pub struct AugmentedScriptSet {
9
10
/// The base ScriptExtension value
10
11
pub base : ScriptExtension ,
@@ -106,6 +107,16 @@ impl AugmentedScriptSet {
106
107
}
107
108
}
108
109
110
+ #[ derive( Copy , Clone , PartialEq , PartialOrd , Eq , Ord , Debug , Hash ) ]
111
+ pub enum RestrictionLevel {
112
+ ASCIIOnly ,
113
+ SingleScript ,
114
+ HighlyRestrictive ,
115
+ ModeratelyRestrictive ,
116
+ MinimallyRestrictive ,
117
+ Unrestricted ,
118
+ }
119
+
109
120
/// Extension trait for [mixed-script detection](https://www.unicode.org/reports/tr39/#Mixed_Script_Detection)
110
121
pub trait MixedScript {
111
122
/// Check if a string is [single-script](https://www.unicode.org/reports/tr39/#def-single-script)
@@ -115,6 +126,9 @@ pub trait MixedScript {
115
126
116
127
/// Find the [resolved script set](https://www.unicode.org/reports/tr39/#def-resolved-script-set) of a given string
117
128
fn resolve_script_set ( self ) -> AugmentedScriptSet ;
129
+
130
+ /// Detect the [restriction level](https://www.unicode.org/reports/tr39/#Restriction_Level_Detection) of a given string
131
+ fn detect_restriction_level ( self ) -> RestrictionLevel ;
118
132
}
119
133
120
134
impl MixedScript for & ' _ str {
@@ -125,4 +139,36 @@ impl MixedScript for &'_ str {
125
139
fn resolve_script_set ( self ) -> AugmentedScriptSet {
126
140
self . into ( )
127
141
}
142
+
143
+ fn detect_restriction_level ( self ) -> RestrictionLevel {
144
+ use crate :: GeneralSecurityProfile ;
145
+ let mut ascii_only = true ;
146
+ let mut set = AugmentedScriptSet :: default ( ) ;
147
+ let mut exclude_latin_set = AugmentedScriptSet :: default ( ) ;
148
+ for ch in self . chars ( ) {
149
+ if !GeneralSecurityProfile :: identifier_allowed ( ch) {
150
+ return RestrictionLevel :: Unrestricted ;
151
+ }
152
+ if ch as u32 > 0x7F {
153
+ ascii_only = false ;
154
+ }
155
+ let ch_set = ch. into ( ) ;
156
+ set = set. intersect ( ch_set) ;
157
+ if !ch_set. base . contains_script ( Script :: Latin ) {
158
+ exclude_latin_set. intersect ( ch_set) ;
159
+ }
160
+ }
161
+ if ascii_only {
162
+ return RestrictionLevel :: ASCIIOnly ;
163
+ } else if !set. is_empty ( ) {
164
+ return RestrictionLevel :: SingleScript ;
165
+ } else if exclude_latin_set. kore || exclude_latin_set. hanb || exclude_latin_set. jpan {
166
+ return RestrictionLevel :: HighlyRestrictive ;
167
+ } else if let ScriptExtension :: Single ( script) = exclude_latin_set. base {
168
+ if script. is_recommended ( ) && script != Script :: Cyrillic && script != Script :: Greek {
169
+ return RestrictionLevel :: ModeratelyRestrictive ;
170
+ }
171
+ }
172
+ return RestrictionLevel :: MinimallyRestrictive ;
173
+ }
128
174
}
0 commit comments