8000 Merge pull request #4 from unicode-rs/char_iter_alternative · froydnj/unicode-normalization@3933917 · GitHub
[go: up one dir, main page]

Skip to content

Commit 3933917

Browse files
committed
Merge pull request unicode-rs#4 from unicode-rs/char_iter_alternative
Add APIs to normalize arbitrary `char` iterators rather than just `str`.
2 parents c4445c7 + 3cc8eff commit 3933917

File tree

6 files changed

+123
-102
lines changed

6 files changed

+123
-102
lines changed

Cargo.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
[package]
22

33
name = "unicode-normalization"
4-
version = "0.0.3"
4+
version = "0.1.0"
55
authors = ["kwantam <kwantam@gmail.com>"]
66

77
homepage = "https://github.com/unicode-rs/unicode-normalization"

README.md

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -10,13 +10,13 @@ as described in
1010
extern crate unicode_normalization;
1111

1212
use unicode_normalization::char::compose;
13-
use unicode_normalization::str::UnicodeNormalization;
13+
use unicode_normalization::UnicodeNormalization;
1414

1515
fn main() {
1616
assert_eq!(compose('A','\u{30a}'), Some('Å'));
1717

1818
let s = "ÅΩ";
19-
let c = UnicodeNormalization::nfc_chars(s).collect::<String>();
19+
let c = s.nfc().collect::<String>();
2020
assert_eq!(c, "ÅΩ");
2121
}
2222
```
@@ -28,5 +28,5 @@ to your `Cargo.toml`:
2828

2929
```toml
3030
[dependencies]
31-
unicode-normalization = "0.0.3"
31+
unicode-normalization = "0.1.0"
3232
```

src/decompose.rs

Lines changed: 7 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,6 @@
88
// option. This file may not be copied, modified, or distributed
99
// except according to those terms.
1010

11-
use std::str::Chars;
1211

1312
// Helper functions used for Unicode normalization
1413
fn canonical_sort(comb: &mut [(char, u8)]) {
@@ -35,34 +34,34 @@ enum DecompositionType {
3534

3635
/// External iterator for a string decomposition's characters.
3736
#[derive(Clone)]
38-
pub struct Decompositions<'a> {
37+
pub struct Decompositions<I> {
3938
kind: DecompositionType,
40-
iter: Chars<'a>,
39+
iter: I,
4140
buffer: Vec<(char, u8)>,
4241
sorted: bool
4342
}
4443

4544
#[inline]
46-
pub fn new_canonical<'a>(s: &'a str) -> Decompositions<'a> {
45+
pub fn new_canonical<I: Iterator<Item=char>>(iter: I) -> Decompositions<I> {
4746
Decompositions {
48-
iter: s.chars(),
47+
iter: iter,
4948
buffer: Vec::new(),
5049
sorted: false,
5150
kind: self::DecompositionType::Canonical,
5251
}
5352
}
5453

5554
#[inline]
56-
pub fn new_compatible<'a>(s: &'a str) -> Decompositions<'a> {
55+
pub fn new_compatible<I: Iterator<Item=char>>(iter: I) -> Decompositions<I> {
5756
Decompositions {
58-
iter: s.chars(),
57+
iter: iter,
5958
buffer: Vec::new(),
6059
sorted: false,
6160
kind: self::DecompositionType::Compatible,
6261
}
6362
}
6463

65-
impl<'a> Iterator for Decompositions<'a> {
64+
impl<I: Iterator<Item=char>> Iterator for Decompositions<I> {
6665
type Item = char;
6766

6867
#[inline]

src/lib.rs

Lines changed: 72 additions & 52 deletions
Original file line numberDiff line numberDiff line change
@@ -16,13 +16,13 @@
1616
//! extern crate unicode_normalization;
1717
//!
1818
//! use unicode_normalization::char::compose;
19-
//! use unicode_normalization::str::UnicodeNormalization;
19+
//! use unicode_normalization::UnicodeNormalization;
2020
//!
2121
//! fn main() {
2222
//! assert_eq!(compose('A','\u{30a}'), Some('Å'));
23-
//!
23+
//!
2424
//! let s = "ÅΩ";
25-
//! let c = UnicodeNormalization::nfc_chars(s).collect::<String>();
25+
//! let c = s.nfc().collect::<String>();
2626
//! assert_eq!(c, "ÅΩ");
2727
//! }
2828
//! ```
@@ -34,14 +34,17 @@
3434
//!
3535
//! ```toml
3636
//! [dependencies]
37-
//! unicode-normalization = "0.0.3"
37+
//! unicode-normalization = "0.1.0"
3838
//! ```
3939
4040
#![deny(missing_docs, unsafe_code)]
4141
#![doc(html_logo_url = "https://unicode-rs.github.io/unicode-rs_sm.png",
4242
html_favicon_url = "https://unicode-rs.github.io/unicode-rs_sm.png")]
4343

4444
pub use tables::UNICODE_VERSION;
45+
pub use decompose::Decompositions;
46+
pub use recompose::Recompositions;
47+
use std::str::Chars;
4548

4649
mod decompose;
4750
mod normalize;
@@ -61,55 +64,72 @@ pub mod char {
6164
pub use tables::normalization::canonical_combining_class;
6265
}
6366

64-
/// Methods for applying composition and decomposition to strings.
65-
pub mod str {
66-
pub use super::decompose::Decompositions;
67-
pub use super::recompose::Recompositions;
68-
69-
/// Methods for iterating over strings while applying Unicode normalizations
70-
/// as described in
71-
/// [Unicode Standard Annex #15](http://www.unicode.org/reports/tr15/).
72-
pub trait UnicodeNormalization {
73-
/// Returns an iterator over the string in Unicode Normalization Form D
74-
/// (canonical decomposition).
75-
#[inline]
76-
fn nfd_chars(&self) -> Decompositions;
77-
78-
/// Returns an iterator over the string in Unicode Normalization Form KD
79-
/// (compatibility decomposition).
80-
#[inline]
81-
fn nfkd_chars(&self) -> Decompositions;
82-
83-
/// An Iterator over the string in Unicode Normalization Form C
84-
/// (canonical decomposition followed by canonical composition).
85-
#[inline]
86-
fn nfc_chars(&self) -> Recompositions;
87-
88-
/// An Iterator over the string in Unicode Normalization Form KC
89-
/// (compatibility decomposition followed by canonical composition).
90-
#[inline]
91-
fn nfkc_chars(&self) -> Recompositions;
67+
68+
/// Methods for iterating over strings while applying Unicode normalizations
69+
/// as described in
70+
/// [Unicode Standard Annex #15](http://www.unicode.org/reports/tr15/).
71+
pub trait UnicodeNormalization<I: Iterator<Item=char>> {
72+
/// Returns an iterator over the string in Unicode Normalization Form D
73+
/// (canonical decomposition).
74+
#[inline]
75+
fn nfd(self) -> Decompositions<I>;
76+
77+
/// Returns an iterator over the string in Unicode Normalization Form KD
78+
/// (compatibility decomposition).
79+
#[inline]
80+
fn nfkd(self) -> Decompositions<I>;
81+
82+
/// An Iterator over the string in Unicode Normalization Form C
83+
/// (canonical decomposition followed by canonical composition).
84+
#[inline]
85+
fn nfc(self) -> Recompositions<I>;
86+
87+
/// An Iterator over the string in Unicode Normalization Form KC
88+
/// (compatibility decomposition followed by canonical composition).
89+
#[inline]
90+
fn nfkc(self) -> Recompositions<I>;
91+
}
92+
93+
impl<'a> UnicodeNormalization<Chars<'a>> for &'a str {
94+
#[inline]
95+
fn nfd(self) -> Decompositions<Chars<'a>> {
96+
decompose::new_canonical(self.chars())
97+
}
98+
99+
#[inline]
100+
fn nfkd(self) -> Decompositions<Chars<'a>> {
101+
decompose::new_compatible(self.chars())
102+
}
103+
104+
#[inline]
105+
fn nfc(self) -> Recompositions<Chars<'a>> {
106+
recompose::new_canonical(self.chars())
107+
}
108+
109+
#[inline]
110+
fn nfkc(self) -> Recompositions<Chars<'a>> {
111+
recompose::new_compatible(self.chars())
112+
}
113+
}
114+
115+
impl<I: Iterator<Item=char>> UnicodeNormalization<I> for I {
116+
#[inline]
117+
fn nfd(self) -> Decompositions<I> {
118+
decompose::new_canonical(self)
119+
}
120+
121+
#[inline]
122+
fn nfkd(self) -> Decompositions<I> {
123+
decompose::new_compatible(self)
124+
}
125+
126+
#[inline]
127+
fn nfc(self) -> Recompositions<I> {
128+
recompose::new_canonical(self)
92129
}
93130

94-
impl UnicodeNormalization for str {
95-
#[inline]
96-
fn nfd_chars(&self) -> Decompositions {
97-
super::decompose::new_canonical(self)
98-
}
99-
100-
#[inline]
101-
fn nfkd_chars(&self) -> Decompositions {
102-
super::decompose::new_compatible(self)
103-
}
104-
105-
#[inline]
106-
fn nfc_chars(&self) -> Recompositions {
107-
super::recompose::new_canonical(self)
108-
}
109-
110-
#[inline]
111-
fn nfkc_chars(&self) -> Recompositions {
112-
super::recompose::new_compatible(self)
113-
}
131+
#[inline]
132+
fn nfkc(self) -> Recompositions<I> {
133+
recompose::new_compatible(self)
114134
}
115135
}

src/recompose.rs

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@
99
// except according to those terms.
1010

1111
use std::collections::VecDeque;
12-
use super::str::{Decompositions, UnicodeNormalization};
12+
use decompose::Decompositions;
1313

1414
#[derive(Clone)]
1515
enum RecompositionState {
@@ -20,18 +20,18 @@ enum RecompositionState {
2020

2121
/// External iterator for a string recomposition's characters.
2222
#[derive(Clone)]
23-
pub struct Recompositions<'a> {
24-
iter: Decompositions<'a>,
23+
pub struct Recompositions<I> {
24+
iter: Decompositions<I>,
2525
state: RecompositionState,
2626
buffer: VecDeque<char>,
2727
composee: Option<char>,
2828
last_ccc: Option<u8>
2929
}
3030

3131
#[inline]
32-
pub fn new_canonical<'a>(s: &'a str) -> Recompositions<'a> {
32+
pub fn new_canonical<I: Iterator<Item=char>>(iter: I) -> Recompositions<I> {
3333
Recompositions {
34-
iter: UnicodeNormalization::nfd_chars(s),
34+
iter: super::decompose::new_canonical(iter),
3535
state: self::RecompositionState::Composing,
3636
buffer: VecDeque::new(),
3737
composee: None,
@@ -40,17 +40,17 @@ pub fn new_canonical<'a>(s: &'a str) -> Recompositions<'a> {
4040
}
4141

4242
#[inline]
43-
pub fn new_compatible<'a>(s: &'a str) -> Recompositions<'a> {
43+
pub fn new_compatible<I: Iterator<Item=char>>(iter: I) -> Recompositions<I> {
4444
Recompositions {
45-
iter: UnicodeNormalization::nfkd_chars(s),
45+
iter: super::decompose::new_compatible(iter),
4646
state : self::RecompositionState::Composing,
4747
buffer: VecDeque::new(),
4848
composee: None,
4949
last_ccc: None,
5050
}
5151
}
5252

53-
impl<'a> Iterator for Recompositions<'a> {
53+
impl<I: Iterator<Item=char>> Iterator for Recompositions<I> {
5454
type Item = char;
5555

5656
#[inline]

0 commit comments

Comments
 (0)
0