8000 Merge pull request #106 from ShE3py/iter-ctors · unicode-rs/unicode-normalization@9d5d794 · GitHub
[go: up one dir, main page]

Skip to content

Commit 9d5d794

Browse files
authored
Merge pull request #106 from ShE3py/iter-ctors
Expose all iterator constructors, add hyperlinks to Unicode glossary/technical reports
2 parents c992130 + 5b31ba1 commit 9d5d794

File tree

5 files changed

+40
-18
lines changed

5 files changed

+40
-18
lines changed

src/decompose.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -53,7 +53,7 @@ impl<I: Iterator<Item = char>> Decompositions<I> {
5353

5454
/// Create a new decomposition iterator for compatability decompositions (NFkD)
5555
///
56-
/// Note that this iterator can also be obtained by directly calling [`.nfd()`](crate::UnicodeNormalization::nfd)
56+
/// Note that this iterator can also be obtained by directly calling [`.nfkd()`](crate::UnicodeNormalization::nfkd)
5757
/// on the iterator.
5858
#[inline]
5959
pub fn new_compatible(iter: I) -> Decompositions<I> {

src/lib.rs

Lines changed: 11 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -113,8 +113,8 @@ pub trait UnicodeNormalization<I: Iterator<Item = char>> {
113113
/// (compatibility decomposition followed by canonical composition).
114114
fn nfkc(self) -> Recompositions<I>;
115115

116-
/// A transformation which replaces CJK Compatibility Ideograph codepoints
117-
/// with normal forms using Standardized Variation Sequences. This is not
116+
/// A transformation which replaces [CJK Compatibility Ideograph] codepoints
117+
/// with normal forms using [Standardized Variation Sequences]. This is not
118118
/// part of the canonical or compatibility decomposition algorithms, but
119119
/// performing it before those algorithms produces normalized output which
120120
/// better preserves the intent of the original text.
@@ -123,10 +123,15 @@ pub trait UnicodeNormalization<I: Iterator<Item = char>> {
123123
/// may not immediately help text display as intended, but they at
124124
/// least preserve the information in a standardized form, giving
125125
/// implementations the option to recognize them.
126+
///
127+
/// [CJK Compatibility Ideograph]: https://www.unicode.org/glossary/#compatibility_ideograph
128+
/// [Standardized Variation Sequences]: https://www.unicode.org/glossary/#standardized_variation_sequence
126129
fn cjk_compat_variants(self) -> Replacements<I>;
127130

128131
/// An Iterator over the string with Conjoining Grapheme Joiner characters
129-
/// inserted according to the Stream-Safe Text Process (UAX15-D4)
132+
/// inserted according to the Stream-Safe Text Process ([UAX15-D4]).
133+
///
134+
/// [UAX15-D4]: https://www.unicode.org/reports/tr15/#UAX15-D4
130135
fn stream_safe(self) -> StreamSafe<I>;
131136
}
132137

@@ -153,7 +158,7 @@ impl<'a> UnicodeNormalization<Chars<'a>> for &'a str {
153158

154159
#[inline]
155160
fn cjk_compat_variants(self) -> Replacements<Chars<'a>> {
156-
replace::new_cjk_compat_variants(self.chars())
161+
Replacements::new_cjk_compat_variants(self.chars())
157162
}
158163

159164
#[inline]
@@ -185,7 +190,7 @@ impl UnicodeNormalization<option::IntoIter<char>> for char {
185190

186191
#[inline]
187192
fn cjk_compat_variants(self) -> Replacements<option::IntoIter<char>> {
188-
replace::new_cjk_compat_variants(Some(self).into_iter())
193+
Replacements::new_cjk_compat_variants(Some(self).into_iter())
189194
}
190195

191196
#[inline]
@@ -217,7 +222,7 @@ impl<I: Iterator<Item = char>> UnicodeNormalization<I> for I {
217222

218223
#[inline]
219224
fn cjk_compat_variants(self) -> Replacements<I> {
220-
replace::new_cjk_compat_variants(self)
225+
Replacements::new_cjk_compat_variants(self)
221226
}
222227

223228
#[inline]

src/normalize.rs

Lines changed: 6 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -41,11 +41,12 @@ pub fn decompose_compatible<F: FnMut(char)>(c: char, emit_char: F) {
4141
///
4242
/// [Standardized Variation Sequences] are used instead of the standard canonical
4343
/// decompositions, notably for CJK codepoints with singleton canonical decompositions,
44-
/// to avoid losing information. See the
45-
/// [Unicode Variation Sequence FAQ](http://unicode.org/faq/vs.html) and the
46-
/// "Other Enhancements" section of the
47-
/// [Unicode 6.3 Release Summary](https://www.unicode.org/versions/Unicode6.3.0/#Summary)
48-
/// for more information.
44+
/// to avoid losing information. See the [Unicode Variation Sequence FAQ] and the
45+
/// "Other Enhancements" section of the [Unicode 6.3 Release Summary] for more information.
46+
///
47+
/// [Standardized Variation Sequences]: https://www.unicode.org/glossary/#standardized_variation_sequence
48+
/// [Unicode Variation Sequence FAQ]: http://unicode.org/faq/vs.html
49+
/// [Unicode 6.3 Release Summary]: https://www.unicode.org/versions/Unicode6.3.0/#Summary
4950
#[inline]
5051
pub fn decompose_cjk_compat_variants<F>(c: char, mut emit_char: F)
5152
where

src/replace.rs

Lines changed: 12 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -22,9 +22,18 @@ pub struct Replacements<I> {
2222
buffer: Option<char>,
2323
}
2424

25-
#[inline]
26-
pub fn new_cjk_compat_variants<I: Iterator<Item = char>>(iter: I) -> Replacements<I> {
27-
Replacements { iter, buffer: None }
25+
impl<I: Iterator<Item = char>> Replacements<I> {
26+
/// Create a new iterator that replaces [CJK Compatibility Ideograph] codepoints with normal forms using [Standardized Variation Sequences].
27+
///
28+
/// Note that this iterator can also be obtained by directly calling [`.cjk_compat_variants()`] on the iterator.
29+
///
30+
/// [CJK Compatibility Ideograph]: https://www.unicode.org/glossary/#compatibility_ideograph
31+
/// [Standardized Variation Sequences]: https://www.unicode.org/glossary/#standardized_variation_sequence
32+
/// [`.cjk_compat_variants()`]: crate::UnicodeNormalization::cjk_compat_variants
33+
#[inline]
34+
pub fn new_cjk_compat_variants(iter: I) -> Replacements<I> {
35+
Replacements { iter, buffer: None }
36+
}
2837
}
2938

3039
impl<I: Iterator<Item = char>> Iterator for Replacements<I> {

src/stream_safe.rs

Lines changed: 10 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -10,17 +10,24 @@ use crate::tables::stream_safe_leading_nonstarters;
1010
pub(crate) const MAX_NONSTARTERS: usize = 30;
1111
const COMBINING_GRAPHEME_JOINER: char = '\u{034F}';
1212

13-
/// UAX15-D4: This iterator keeps track of how many non-starters there have been
13+
/// [UAX15-D4]: This iterator keeps track of how many non-starters there have been
1414
/// since the last starter in *NFKD* and will emit a Combining Grapheme Joiner
1515
/// (U+034F) if the count exceeds 30.
16+
///
17+
/// [UAX15-D4]: https://www.unicode.org/reports/tr15/#UAX15-D4
1618
pub struct StreamSafe<I> {
1719
iter: I,
1820
nonstarter_count: usize,
1921
buffer: Option<char>,
2022
}
2123

22-
impl<I> StreamSafe<I> {
23-
pub(crate) fn new(iter: I) -> Self {
24+
impl<I: Iterator<Item = char>> StreamSafe<I> {
25+
/// Create a new stream safe iterator.
26+
///
27+
/// Note that this iterator can also be obtained by directly calling [`.stream_safe()`](crate::UnicodeNormalization::stream_safe)
28+
/// on the iterator.
29+
#[inline]
30+
pub fn new(iter: I) -> Self {
2431
Self {
2532
iter,
2633
nonstarter_count: 0,

0 commit comments

Comments
 (0)
0