diff --git a/Cargo.toml b/Cargo.toml index 5a15b32..85df296 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -16,7 +16,7 @@ according to Unicode Technical Standard #39 rules. exclude = [ "target/*", "Cargo.lock" ] [dependencies] -unicode-script = { version = "0.4.0", default-features = false } +unicode-script = { version = "0.5.1", default-features = false } unicode-normalization = { version = "0.1.12", default-features = false } std = { version = "1.0", package = "rustc-std-workspace-std", optional = true } core = { version = "1.0", package = "rustc-std-workspace-core", optional = true } diff --git a/src/mixed_script.rs b/src/mixed_script.rs index 52157fd..a1c6df1 100644 --- a/src/mixed_script.rs +++ b/src/mixed_script.rs @@ -1,11 +1,12 @@ //! [Mixed-script detection](https://www.unicode.org/reports/tr39/#Mixed_Script_Detection) +use core::fmt::{self, Debug}; use unicode_script::{Script, ScriptExtension}; /// An Augmented script set, as defined by UTS 39 /// /// https://www.unicode.org/reports/tr39/#def-augmented-script-set -#[derive(Copy, Clone, PartialEq, Debug, Hash, Eq)] +#[derive(Copy, Clone, PartialEq, Hash, Eq)] pub struct AugmentedScriptSet { /// The base ScriptExtension value pub base: ScriptExtension, @@ -23,10 +24,7 @@ impl From for AugmentedScriptSet { let mut jpan = false; let mut kore = false; - if ext == ScriptExtension::Single(Script::Common) - || ext == ScriptExtension::Single(Script::Inherited) - || ext.contains_script(Script::Han) - { + if ext.is_common() || ext.is_inherited() || ext.contains_script(Script::Han) { hanb = true; jpan = true; kore = true; @@ -67,7 +65,7 @@ impl From<&'_ str> for AugmentedScriptSet { impl Default for AugmentedScriptSet { fn default() -> Self { AugmentedScriptSet { - base: ScriptExtension::Single(Script::Common), + base: Script::Common.into(), hanb: true, jpan: true, kore: true, @@ -75,6 +73,38 @@ impl Default for AugmentedScriptSet { } } +impl Debug for AugmentedScriptSet { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + if self.is_empty() { + write!(f, "AugmentedScriptSet {{∅}}")?; + } else if self.is_all() { + write!(f, "AugmentedScriptSet {{ALL}}")?; + } else { + write!(f, "AugmentedScriptSet {{")?; + let mut first_entry = true; + let hanb = if self.hanb { Some("Hanb") } else { None }; + let jpan = if self.jpan { Some("Jpan") } else { None }; + let kore = if self.kore { Some("Kore") } else { None }; + for writing_system in None + .into_iter() + .chain(hanb) + .chain(jpan) + .chain(kore) + .chain(self.base.iter().map(Script::short_name)) + { + if !first_entry { + write!(f, ", ")?; + } else { + first_entry = false; + } + write!(f, "{}", writing_system)?; + } + write!(f, "}}")?; + } + Ok(()) + } +} + impl AugmentedScriptSet { /// Intersect this set with another pub fn intersect_with(&mut self, other: Self) { @@ -91,8 +121,7 @@ impl AugmentedScriptSet { /// Check if the set is "All" (Common or Inherited) pub fn is_all(&self) -> bool { - self.base == ScriptExtension::Single(Script::Common) - || self.base == ScriptExtension::Single(Script::Inherited) + self.base.is_common() || self.base.is_inherited() } /// Construct an AugmentedScriptSet for a given character diff --git a/src/restriction_level.rs b/src/restriction_level.rs index 292a4fa..adaf074 100644 --- a/src/restriction_level.rs +++ b/src/restriction_level.rs @@ -3,7 +3,7 @@ use crate::mixed_script::AugmentedScriptSet; use crate::GeneralSecurityProfile; -use unicode_script::{Script, ScriptExtension}; +use unicode_script::Script; #[derive(Copy, Clone, PartialEq, PartialOrd, Eq, Ord, Debug, Hash)] /// The [Restriction level](https://www.unicode.org/reports/tr39/#Restriction_Level_Detection) @@ -64,7 +64,8 @@ impl RestrictionLevelDetection for &'_ str { return RestrictionLevel::SingleScript; } else if exclude_latin_set.kore || exclude_latin_set.hanb || exclude_latin_set.jpan { return RestrictionLevel::HighlyRestrictive; - } else if let ScriptExtension::Single(script) = exclude_latin_set.base { + } else if exclude_latin_set.base.len() == 1 { + let script = exclude_latin_set.base.iter().next().unwrap(); if script.is_recommended() && script != Script::Cyrillic && script != Script::Greek { return RestrictionLevel::ModeratelyRestrictive; } diff --git a/src/tests.rs b/src/tests.rs index 1753411..6409113 100644 --- a/src/tests.rs +++ b/src/tests.rs @@ -77,3 +77,40 @@ fn test_potential_mixed_script_detection() { assert!(is_potential_mixed_script_confusable_char('A')); assert!(!is_potential_mixed_script_confusable_char('D')); } + +#[test] +fn test_augmented_script_set() { + use crate::mixed_script::AugmentedScriptSet; + let augmented_script_sets = vec![ + AugmentedScriptSet::default(), + AugmentedScriptSet::from('0'), + AugmentedScriptSet::from('a'), + AugmentedScriptSet::from('μ'), + AugmentedScriptSet::from('汉'), + AugmentedScriptSet::from('ひ'), + AugmentedScriptSet::from('カ'), + AugmentedScriptSet::from('한'), + AugmentedScriptSet::from("汉ひ"), + AugmentedScriptSet::from("汉a"), + AugmentedScriptSet::from("汉μ"), + AugmentedScriptSet::from("〆切"), + ]; + let debug_output = vec![ + "AugmentedScriptSet {ALL}", + "AugmentedScriptSet {ALL}", + "AugmentedScriptSet {Latn}", + "AugmentedScriptSet {Grek}", + "AugmentedScriptSet {Hanb, Jpan, Kore, Hani}", + "AugmentedScriptSet {Jpan, Hira}", + "AugmentedScriptSet {Jpan, Kana}", + "AugmentedScriptSet {Kore, Hang}", + "AugmentedScriptSet {Jpan}", + "AugmentedScriptSet {∅}", + "AugmentedScriptSet {∅}", + "AugmentedScriptSet {Hanb, Jpan, Kore, Hani}", + ]; + + for (ss, output) in augmented_script_sets.into_iter().zip(debug_output) { + assert_eq!(format!("{:?}", ss), output); + } +}