8000 Impl pretty-printing for AugmentedScriptSet by crlf0710 · Pull Request #19 · unicode-rs/unicode-security · GitHub
[go: up one dir, main page]

Skip to content

Impl pretty-printing for AugmentedScriptSet #19

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 2 commits into from
Jun 12, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ according to Unicode Technical Standard #39 rules.
exclude = [ "target/*", "Cargo.lock" ]

[dependencies]
unicode-script = { version = "0.4.0", default-features = false }
unicode-script = { version = "0.5.1", default-features = false }
unicode-normalization = { version = "0.1.12", default-features = false }
std = { version = "1.0", package = "rustc-std-workspace-std", optional = true }
core = { version = "1.0", package = "rustc-std-workspace-core", optional = true }
Expand Down
45 changes: 37 additions & 8 deletions src/mixed_script.rs
Original file line number Diff line number Diff line change
@@ -1,11 +1,12 @@
//! [Mixed-script detection](https://www.unicode.org/reports/tr39/#Mixed_Script_Detection)

use core::fmt::{self, Debug};
use unicode_script::{Script, ScriptExtension};

/// An Augmented script set, as defined by UTS 39
///
/// https://www.unicode.org/reports/tr39/#def-augmented-script-set
#[derive(Copy, Clone, PartialEq, Debug, Hash, Eq)]
#[derive(Copy, Clone, PartialEq, Hash, Eq)]
pub struct AugmentedScriptSet {
/// The base ScriptExtension value
pub base: ScriptExtension,
Expand All @@ -23,10 +24,7 @@ impl From<ScriptExtension> for AugmentedScriptSet {
let mut jpan = false;
let mut kore = false;

if ext == ScriptExtension::Single(Script::Common)
|| ext == ScriptExtension::Single(Script::Inherited)
|| ext.contains_script(Script::Han)
{
if ext.is_common() || ext.is_inherited() || ext.contains_script(Script::Han) {
hanb = true;
jpan = true;
kore = true;
Expand Down Expand Up @@ -67,14 +65,46 @@ impl From<&'_ str> for AugmentedScriptSet {
impl Default for AugmentedScriptSet {
fn default() -> Self {
AugmentedScriptSet {
base: ScriptExtension::Single(Script::Common),
base: Script::Common.into(),
hanb: true,
jpan: true,
kore: true,
}
}
}

impl Debug for AugmentedScriptSet {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
if self.is_empty() {
write!(f, "AugmentedScriptSet {{∅}}")?;
} else if self.is_all() {
write!(f, "AugmentedScriptSet {{ALL}}")?;
} else {
write!(f, "AugmentedScriptSet {{")?;
let mut first_entry = true;
let hanb = if self.hanb { Some("Hanb") } else { None };
let jpan = if self.jpan { Some("Jpan") } else { None };
let kore = if self.kore { Some("Kore") } else { None };
for writing_system in None
.into_iter()
.chain(hanb)
.chain(jpan)
.chain(kore)
.chain(self.base.iter().map(Script::short_name))
{
if !first_entry {
write!(f, ", ")?;
} else {
first_entry = false;
}
write!(f, "{}", writing_system)?;
}
write!(f, "}}")?;
}
Ok(())
}
}

impl AugmentedScriptSet {
/// Intersect this set with another
pub fn intersect_with(&mut self, other: Self) {
Expand All @@ -91,8 +121,7 @@ impl AugmentedScriptSet {

/// Check if the set is "All" (Common or Inherited)
pub fn is_all(&self) -> bool {
self.base == ScriptExtension::Single(Script::Common)
|| self.base == ScriptExtension::Single(Script::Inherited)
self.base.is_common() || self.base.is_inherited()
}

/// Construct an AugmentedScriptSet for a given character
Expand Down
5 changes: 3 additions & 2 deletions src/restriction_level.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@

use crate::mixed_script::AugmentedScriptSet;
use crate::GeneralSecurityProfile;
use unicode_script::{Script, ScriptExtension};
use unicode_script::Script;

#[derive(Copy, Clone, PartialEq, PartialOrd, Eq, Ord, Debug, Hash)]
/// The [Restriction level](https://www.unicode.org/reports/tr39/#Restriction_Level_Detection)
Expand Down Expand Up @@ -64,7 +64,8 @@ impl RestrictionLevelDetection for &'_ str {
return RestrictionLevel::SingleScript;
} else if exclude_latin_set.kore || exclude_latin_set.hanb || exclude_latin_set.jpan {
return RestrictionLevel::HighlyRestrictive;
} else if let ScriptExtension::Single(script) = exclude_latin_set.base {
} else if exclude_latin_set.base.len() == 1 {
let script = exclude_latin_set.base.iter().next().unwrap();
if script.is_recommended() && script != Script::Cyrillic && script != Script::Greek {
return RestrictionLevel::ModeratelyRestrictive;
}
Expand Down
37 changes: 37 additions & 0 deletions src/tests.rs
Original file line number Diff line number Diff line change
Expand Up @@ -77,3 +77,40 @@ fn test_potential_mixed_script_detection() {
assert!(is_potential_mixed_script_confusable_char('A'));
assert!(!is_potential_mixed_script_confusable_char('D'));
}

#[test]
fn test_augmented_script_set() {
use crate::mixed_script::AugmentedScriptSet;
let augmented_script_sets = vec![
AugmentedScriptSet::default(),
AugmentedScriptSet::from('0'),
AugmentedScriptSet::from('a'),
AugmentedScriptSet::from('μ'),
AugmentedScriptSet::from('汉'),
AugmentedScriptSet::from('ひ'),
AugmentedScriptSet::from('カ'),
AugmentedScriptSet::from('한'),
AugmentedScriptSet::from("汉ひ"),
AugmentedScriptSet::from("汉a"),
AugmentedScriptSet::from("汉μ"),
AugmentedScriptSet::from("〆切"),
];
let debug_output = vec![
"AugmentedScriptSet {ALL}",
"AugmentedScriptSet {ALL}",
"AugmentedScriptSet {Latn}",
"AugmentedScriptSet {Grek}",
"AugmentedScriptSet {Hanb, Jpan, Kore, Hani}",
"AugmentedScriptSet {Jpan, Hira}",
"AugmentedScriptSet {Jpan, Kana}",
"AugmentedScriptSet {Kore, Hang}",
"AugmentedScriptSet {Jpan}",
"AugmentedScriptSet {∅}",
"AugmentedScriptSet {∅}",
"AugmentedScriptSet {Hanb, Jpan, Kore, Hani}",
];

for (ss, output) in augmented_script_sets.into_iter().zip(debug_output) {
assert_eq!(format!("{:?}", ss), output);
}
}
0