@@ -83,6 +83,7 @@ enum UWordBoundsState {
83
83
Regional ,
84
84
FormatExtend ( FormatExtendType ) ,
85
85
Zwj ,
86
+ Emoji ,
86
87
}
87
88
88
89
// subtypes for FormatExtend state in UWordBoundsState
@@ -163,6 +164,7 @@ impl<'a> Iterator for UWordBounds<'a> {
163
164
wd:: WC_Regional_Indicator => Regional , // rule WB13c
164
165
wd:: WC_LF | wd:: WC_Newline => break , // rule WB3a
165
166
wd:: WC_ZWJ => Zwj , // rule WB3c
167
+ wd:: WC_E_Base | wd:: WC_E_Base_GAZ => Emoji , // rule WB14
166
168
_ => {
167
169
if let Some ( ncat) = self . get_next_cat ( idx) { // rule WB4
168
170
if ncat == wd:: WC_Format || ncat == wd:: WC_Extend || ncat == wd:: WC_ZWJ {
@@ -245,6 +247,13 @@ impl<'a> Iterator for UWordBounds<'a> {
245
247
break ;
246
248
}
247
249
} ,
250
+ Emoji => match cat { // rule WB14
251
+ wd:: WC_E_Modifier => continue ,
252
+ _ => {
253
+ take_curr = false ;
254
+ break ;
255
+ }
256
+ } ,
248
257
FormatExtend ( t) => match t { // handle FormatExtends depending on what type
249
258
RequireNumeric if cat == wd:: WC_Numeric => Numeric , // rule WB11
250
259
RequireLetter | AcceptQLetter if cat == wd:: WC_ALetter => Letter , // rule WB7
@@ -355,6 +364,7 @@ impl<'a> DoubleEndedIterator for UWordBounds<'a> {
355
364
saveidx = idx;
356
365
FormatExtend ( AcceptQLetter ) // rule WB7a
357
366
} ,
367
+ wd:: WC_E_Modifier => Emoji , // rule WB14
358
368
wd:: WC_CR | wd:: WC_LF | wd:: WC_Newline => {
359
369
if state == Start {
360
370
if cat == wd:: WC_LF {
@@ -435,6 +445,13 @@ impl<'a> DoubleEndedIterator for UWordBounds<'a> {
435
445
break ;
436
446
}
437
447
} ,
448
+ Emoji => match cat { // rule WB14
449
+ wd:: WC_E_Base | wd:: WC_E_Base_GAZ => continue ,
450
+ _ => {
451
+ take_curr = false ;
452
+ break ;
453
+ }
454
+ } ,
438
455
FormatExtend ( t) => match t {
439
456
RequireNumeric if cat == wd:: WC_Numeric => Numeric , // rule WB12
440
457
RequireLetter if cat == wd:: WC_ALetter => Letter , // rule WB6
0 commit comments