8000 Implement detect_restriction_level(). · unicode-rs/unicode-security@e5da0e4 · GitHub
[go: up one dir, main page]

Skip to content

Commit e5da0e4

Browse files
crlf0710Manishearth
authored andcommitted
Implement detect_restriction_level().
1 parent 2299150 commit e5da0e4

File tree

1 file changed

+46
-0
lines changed

1 file changed

+46
-0
lines changed

src/mixed_script.rs

Lines changed: 46 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@ use unicode_script::{Script, ScriptExtension};
55
/// An Augmented script set, as defined by UTS 39
66
///
77
/// https://www.unicode.org/reports/tr39/#def-augmented-script-set
8+
#[derive(Copy, Clone, PartialEq, Debug, Hash)]
89
pub struct AugmentedScriptSet {
910
/// The base ScriptExtension value
1011
pub base: ScriptExtension,
@@ -106,6 +107,16 @@ impl AugmentedScriptSet {
106107
}
107108
}
108109

110+
#[derive(Copy, Clone, PartialEq, PartialOrd, Eq, Ord, Debug, Hash)]
111+
pub enum RestrictionLevel {
112+
ASCIIOnly,
113+
SingleScript,
114+
HighlyRestrictive,
115+
ModeratelyRestrictive,
116+
MinimallyRestrictive,
117+
Unrestricted,
118+
}
119+
109120
/// Extension trait for [mixed-script detection](https://www.unicode.org/reports/tr39/#Mixed_Script_Detection)
110121
pub trait MixedScript {
111122
/// Check if a string is [single-script](https://www.unicode.org/reports/tr39/#def-single-script)
@@ -115,6 +126,9 @@ pub trait MixedScript {
115126

116127
/// Find the [resolved script set](https://www.unicode.org/reports/tr39/#def-resolved-script-set) of a given string
117128
fn resolve_script_set(self) -> AugmentedScriptSet;
129+
130+
/// Detect the [restriction level](https://www.unicode.org/reports/tr39/#Restriction_Level_Detection) of a given string
131+
fn detect_restriction_level(self) -> RestrictionLevel;
118132
}
119133

120134
impl MixedScript for &'_ str {
@@ -125,4 +139,36 @@ impl MixedScript for &'_ str {
125139
fn resolve_script_set(self) -> AugmentedScriptSet {
126140
self.into()
127141
}
142+
143+
fn detect_restriction_level(self) -> RestrictionLevel {
144+
use crate::GeneralSecurityProfile;
145+
let mut ascii_only = true;
146+
let mut set = AugmentedScriptSet::default();
147+
let mut exclude_latin_set = AugmentedScriptSet::default();
148+
for ch in self.chars() {
149+
if !GeneralSecurityProfile::identifier_allowed(ch) {
150+
return RestrictionLevel::Unrestricted;
151+
}
152+
if ch as u32 > 0x7F {
153+
ascii_only = false;
154+
}
155+
let ch_set = ch.into();
156+
set = set.intersect(ch_set);
157+
if !ch_set.base.contains_script(Script::Latin) {
158+
exclude_latin_set.intersect(ch_set);
159+
}
160+
}
161+
if ascii_only {
162+
return RestrictionLevel::ASCIIOnly;
163+
} else if !set.is_empty() {
164+
return RestrictionLevel::SingleScript;
165+
} else if exclude_latin_set.kore || exclude_latin_set.hanb || exclude_latin_set.jpan {
166+
return RestrictionLevel::HighlyRestrictive;
167+
} else if let ScriptExtension::Single(script) = exclude_latin_set.base {
168+
if script.is_recommended() && script != Script::Cyrillic && script != Script::Greek {
169+
return RestrictionLevel::ModeratelyRestrictive;
170+
}
171+
}
172+
return RestrictionLevel::MinimallyRestrictive;
173+
}
128174
}

0 commit comments

Comments
 (0)
0