8000 Merge pull request #17 from mbrubeck/zwj · jmuk/unicode-segmentation@e86a69b · GitHub
[go: up one dir, main page]

Skip to content

Commit e86a69b

Browse files
authored
Merge pull request unicode-rs#17 from mbrubeck/zwj
Fix state of reverse iterator after ZWJ
2 parents 7bd5b4d + 3d593f8 commit e86a69b

File tree

2 files changed

+17
-27
lines changed

2 files changed

+17
-27
lines changed

src/grapheme.rs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -382,8 +382,8 @@ impl<'a> DoubleEndedIterator for Graphemes<'a> {
382382
take_curr = false;
383383
break;
384384
},
385-
Zwj => match cat { // char to right is (GAZ|EBG)
386-
gr::GC_ZWJ => continue, // rule GB11: ZWJ x (GAZ|EBG)
385+
Zwj => match cat { // char to right is (GAZ|EBG)
386+
gr::GC_ZWJ => FindExtend, // rule GB11: ZWJ x (GAZ|EBG)
387387
_ => {
388388
take_curr = false;
389389
break;

src/test.rs

Lines changed: 15 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -30,40 +30,30 @@ fn test_graphemes() {
3030
&["\u{600}", "\u{20}", "\u{20}"]),
3131
];
3232

33-
for &(s, g) in TEST_SAME {
33+
pub const EXTRA_SAME: &'static [(&'static str, &'static [&'static str])] = &[
34+
// family emoji (more than two emoji joined by ZWJ)
35+
("\u{1f468}\u{200d}\u{1f467}\u{200d}\u{1f466}",
36+
&["\u{1f468}\u{200d}\u{1f467}\u{200d}\u{1f466}"]),
37+
];
38+
39+
for &(s, g) in TEST_SAME.iter().chain(EXTRA_SAME) {
3440
// test forward iterator
35-
assert!(UnicodeSegmentation::graphemes(s, true)
36-
.zip(g.iter().cloned())
37-
.all(|(a,b)| a == b));
38-
assert!(UnicodeSegmentation::graphemes(s, false)
39-
.zip(g.iter().cloned())
40-
.all(|(a,b)| a == b));
41+
assert!(UnicodeSegmentation::graphemes(s, true).eq(g.iter().cloned()));
42+
assert!(UnicodeSegmentation::graphemes(s, false).eq(g.iter().cloned()));
4143

4244
// test reverse iterator
43-
assert!(UnicodeSegmentation::graphemes(s, true).rev()
44-
.zip(g.iter().rev().cloned())
45-
.all(|(a,b)| a == b));
46-
assert!(UnicodeSegmentation::graphemes(s, false).rev()
47-
.zip(g.iter().rev().cloned())
48-
.all(|(a,b)| a == b));
45+
assert!(UnicodeSegmentation::graphemes(s, true).rev().eq(g.iter().rev().cloned()));
46+
assert!(UnicodeSegmentation::graphemes(s, false).rev().eq(g.iter().rev().cloned()));
4947
}
5048

5149
for &(s, gt, gf) in TEST_DIFF.iter().chain(EXTRA_DIFF) {
5250
// test forward iterator
53-
assert!(UnicodeSegmentation::graphemes(s, true)
54-
.zip(gt.iter().cloned())
55-
.all(|(a,b)| a == b), "{:?}", s);
56-
assert!(UnicodeSegmentation::graphemes(s, false)
57-
.zip(gf.iter().cloned())
58-
.all(|(a,b)| a == b));
51+
assert!(UnicodeSegmentation::graphemes(s, true).eq(gt.iter().cloned()));
52+
assert!(UnicodeSegmentation::graphemes(s, false).eq(gf.iter().cloned()));
5953

6054
// test reverse iterator
61-
assert!(UnicodeSegmentation::graphemes(s, true).rev()
62-
.zip(gt.iter().rev().cloned())
63-
.all(|(a,b)| a == b));
64-
assert!(UnicodeSegmentation::graphemes(s, false).rev()
65-
.zip(gf.iter().rev().cloned())
66-
.all(|(a,b)| a == b));
55+
assert!(UnicodeSegmentation::graphemes(s, true).rev().eq(gt.iter().rev().cloned()));
56+
assert!(UnicodeSegmentation::graphemes(s, false).rev().eq(gf.iter().rev().cloned()));
6757
}
6858

6959
// test the indices iterators

0 commit comments

Comments
 (0)
0