8000 Fix Emoji state in reverse iteration for words · cbarrick/unicode-segmentation@41b11e6 · GitHub
[go: up one dir, main page]

Skip to content

Commit 41b11e6

Browse files
Manishearthmbrubeck
authored andcommitted
Fix Emoji state in reverse iteration for words
1 parent 605dc7d commit 41b11e6

File tree

1 file changed

+5
-3
lines changed

1 file changed

+5
-3
lines changed

src/word.rs

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -375,6 +375,7 @@ impl<'a> DoubleEndedIterator for UWordBounds<'a> {
375375

376376
if cat == wd::WC_Extend || cat == wd::WC_Format || cat == wd::WC_ZWJ {
377377
if match state {
378+
Zwj(_) if cat == wd::WC_ZWJ => false,
378379
FormatExtend(_) | Start => false,
379380
_ => true
380381
} {
@@ -489,8 +490,6 @@ impl<'a> DoubleEndedIterator for UWordBounds<'a> {
489490
let count = self.string[..previdx]
490491
.chars().rev()
491492
.map(|c| wd::word_category(c))
492-
// Ignore because of WB4
493-
// Combining characters *inside* flag emoji. Yay.
494493
.filter(|&c| ! (c == wd::WC_ZWJ || c == wd::WC_Extend || c == wd::WC_Format))
495494
.take_while(|&c| c == wd::WC_Regional_Indicator)
496495
.count();
@@ -513,7 +512,10 @@ impl<'a> DoubleEndedIterator for UWordBounds<'a> {
513512
}
514513
},
515514
Emoji => match cat { // rule WB14
516-
wd::WC_E_Base | wd::WC_E_Base_GAZ => continue,
515+
wd::WC_E_Base | wd::WC_E_Base_GAZ => {
516+
state = Zwj(false);
517+
continue
518+
},
517519
_ => {
518520
take_curr = false;
519521
break;

0 commit comments

Comments
 (0)
0