10000 Merge pull request #9 from unicode-rs/draft_detect_restriction_level · unicode-rs/unicode-security@dca4718 · GitHub
[go: up one dir, main page]

Skip to content

Commit dca4718

Browse files
authored
Merge pull request #9 from unicode-rs/draft_detect_restriction_level
Implement detect restriction level
2 parents 2299150 + b80d8f1 commit dca4718

File tree

3 files changed

+77
-0
lines changed

3 files changed

+77
-0
lines changed

src/lib.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -59,6 +59,7 @@ pub use tables::UNICODE_VERSION;
5959

6060
pub mod mixed_script;
6161
pub mod general_security_profile;
62+
pub mod restriction_level;
6263

6364
pub use mixed_script::MixedScript;
6465
pub use general_security_profile::GeneralSecurityProfile;

src/mixed_script.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@ use unicode_script::{Script, ScriptExtension};
55
/// An Augmented script set, as defined by UTS 39
66
///
77
/// https://www.unicode.org/reports/tr39/#def-augmented-script-set
8+
#[derive(Copy, Clone, PartialEq, Debug, Hash)]
89
pub struct AugmentedScriptSet {
910
/// The base ScriptExtension value
1011
pub base: ScriptExtension,

src/restriction_level.rs

Lines changed: 75 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,75 @@
1+
//! For detecting the [restriction level](https://www.unicode.org/reports/tr39/#Restriction_Level_Detection)
2+
//! a string conforms to
3+
4+
use crate::mixed_script::AugmentedScriptSet;
5+
use unicode_script::{Script, ScriptExtension};
6+
use crate::GeneralSecurityProfile;
7+
8+
#[derive(Copy, Clone, PartialEq, PartialOrd, Eq, Ord, Debug, Hash)]
9+
/// The [Restriction level](https://www.unicode.org/reports/tr39/#Restriction_Level_Detection)
10+
/// a string conforms to
11+
pub enum RestrictionLevel {
12+
/// https://www.unicode.org/reports/tr39/#ascii_only
13+
ASCIIOnly,
14+
/// https://www.unicode.org/reports/tr39/#single_script
15+
SingleScript,
16+
/// https://www.unicode.org/reports/tr39/#highly_restrictive
17+
HighlyRestrictive,
18+
/// https://www.unicode.org/reports/tr39/#moderately_restrictive
19+
ModeratelyRestrictive,
20+
/// https://www.unicode.org/reports/tr39/#minimally_restrictive
21+
MinimallyRestrictive,
22+
/// https://www.unicode.org/reports/tr39/#unrestricted
23+
Unrestricted,
24+
}
25+
26+
/// Utilities for determining which [restriction level](https://www.unicode.org/reports/tr39/#Restriction_Level_Detection)
27+
/// a string satisfies
28+
pub trait RestrictionLevelDetection: Sized {
29+
/// Detect the [restriction level](https://www.unicode.org/reports/tr39/#Restriction_Level_Detection)
30+
///
31+
/// This will _not_ check identifier well-formedness, as different applications may have different notions of well-formedness
32+
fn detect_restriction_level(self) -> RestrictionLevel;
33+
34+
35+
/// Check if a string satisfies the supplied [restriction level](https://www.unicode.org/reports/tr39/#Restriction_Level_Detection)
36+
///
37+
/// This will _not_ check identifier well-formedness, as different applications may have different notions of well-formedness
38+
fn check_restriction_level(self, level: RestrictionLevel) -> bool {
39+
self.detect_restriction_level() <= level
40+
}
41+
}
42+
43+
impl RestrictionLevelDetection for &'_ str {
44+
fn detect_restriction_level(self) -> RestrictionLevel {
45+
let mut ascii_only = true;
46+
let mut set = AugmentedScriptSet::default();
47+
let mut exclude_latin_set = AugmentedScriptSet::default();
48+
for ch in self.chars() {
49+
if !GeneralSecurityProfile::identifier_allowed(ch) {
50+
return RestrictionLevel::Unrestricted;
51+
}
52+
if ch.is_ascii() {
53+
ascii_only = false;
54+
}
55+
let ch_set = ch.into();
56+
set.intersect_with(ch_set);
57+
if !ch_set.base.contains_script(Script::Latin) {
58+
exclude_latin_set.intersect_with(ch_set);
59+
}
60+
}
61+
62+
if ascii_only {
63+
return RestrictionLevel::ASCIIOnly;
64+
} else if !set.is_empty() {
65+
return RestrictionLevel::SingleScript;
66+
} else if exclude_latin_set.kore || exclude_latin_set.hanb || exclude_latin_set.jpan {
67+
return RestrictionLevel::HighlyRestrictive;
68+
} else if let ScriptExtension::Single(script) = exclude_latin_set.base {
69+
if script.is_recommended() && script != Script::Cyrillic && script != Script::Greek {
70+
return RestrictionLevel::ModeratelyRestrictive;
71+
}
72+
}
73+
return RestrictionLevel::MinimallyRestrictive;
74+
}
75+
}

0 commit comments

Comments
 (0)
0