8000 Don't decompose Hangul in the `svar` iterator. · emilio/unicode-normalization@41dc717 · GitHub
[go: up one dir, main page]

Skip to content

Commit 41dc717

Browse files
committed
Don't decompose Hangul in the svar iterator.
1 parent d0c3706 commit 41dc717

File tree

2 files changed

+27
-2
lines changed

2 files changed

+27
-2
lines changed

src/normalize.rs

Lines changed: 18 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -47,11 +47,27 @@ pub fn decompose_compatible<F: FnMut(char)>(c: char, emit_char: F) {
4747
/// [Unicode 6.3 Release Summary](https://www.unicode.org/versions/Unicode6.3.0/#Summary)
4848
/// for more information.
4949
#[inline]
50-
pub fn decompose_svar<F>(c: char, emit_char: F)
50+
pub fn decompose_svar<F>(c: char, mut emit_char: F)
5151
where
5252
F: FnMut(char),
5353
{
54-
decompose(c, svar_fully_decomposed, emit_char)
54+
// 7-bit ASCII never decomposes
55+
if c <= '\x7f' {
56+
emit_char(c);
57+
return;
58+
}
59+
60+
// Don't perform decomposition for Hangul
61+
62+
if let Some(decomposed) = svar_fully_decomposed(c) {
63+
for &d in decomposed {
64+
emit_char(d);
65+
}
66+
return;
67+
}
68+
69+
// Finally bottom out.
70+
emit_char(c);
5571
}
5672

5773
#[inline]

tests/svar.rs

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -66,3 +66,12 @@ fn test_standardized_variations_for_cjk_singleton_decompositions() {
6666
assert_eq!(svar_nfkd_iter.next(), Some('\u{fe00}'));
6767
assert_eq!(svar_nfkd_iter.next(), None);
6868
}
69+
70+
/// `svar` shouldn't decompose Hangul.
71+
#[test]
72+
fn test_svar_hangul() {
73+
assert_eq!(
74+
"중국어 (홍콩)".chars().svar().collect::<String>(),
75+
"중국어 (홍콩)"
76+
);
77+
}

0 commit comments

Comments
 (0)
0