@@ -80,9 +80,9 @@ enum UWordBoundsState {
80
80
Numeric ,
81
81
Katakana ,
82
82
ExtendNumLet ,
83
- Regional ,
83
+ Regional ( /* half */ bool ) ,
84
84
FormatExtend ( FormatExtendType ) ,
85
- Zwj ( bool ) ,
85
+ Zwj ( /* tainted */ bool ) ,
86
86
Emoji ,
87
87
}
88
88
@@ -184,7 +184,7 @@ impl<'a> Iterator for UWordBounds<'a> {
184
184
wd:: WC_Numeric => Numeric , // rule WB8, WB10, WB12, WB13a
185
185
wd:: WC_Katakana => Katakana , // rule WB13, WB13a
186
186
wd:: WC_ExtendNumLet => ExtendNumLet , // rule WB13a, WB13b
187
- wd:: WC_Regional_Indicator => Regional , // rule WB13c
187
+ wd:: WC_Regional_Indicator => Regional ( /* half = */ true ) , // rule WB13c
188
188
wd:: WC_LF | wd:: WC_Newline => break , // rule WB3a
189
189
wd:: WC_ZWJ => Zwj ( false ) , // rule WB3c
190
190
wd:: WC_E_Base | wd:: WC_E_Base_GAZ => Emoji , // rule WB14
@@ -269,8 +269,15 @@ impl<'a> Iterator for UWordBounds<'a> {
269
269
break ;
270
270
}
271
271
} ,
272
- Regional => match cat {
273
- wd:: WC_Regional_Indicator => Regional , // rule WB13c
272
+ Regional ( false ) => {
273
+ // if it reaches here we've gone too far,
274
+ // a full flag can only compose with ZWJ/Extend/Format
275
+ // proceeding it.
276
+ take_curr = false ;
277
+ break ;
278
+ }
279
+ Regional ( /* half */ true ) => match cat {
280
+ wd:: WC_Regional_Indicator => Regional ( false ) , // rule WB13c
274
281
_ => {
275
282
take_curr = false ;
276
283
break ;
@@ -385,7 +392,7 @@ impl<'a> DoubleEndedIterator for UWordBounds<'a> {
385
392
wd:: WC_Numeric => Numeric , // rule WB8, WB9, WB11, WB13b
386
393
wd:: WC_Katakana => Katakana , // rule WB13, WB13b
387
394
wd:: WC_ExtendNumLet => ExtendNumLet , // rule WB13a
388
- wd:: WC_Regional_Indicator => Regional , // rule WB13c
395
+ wd:: WC_Regional_Indicator => Regional ( true ) , // rule WB13c
389
396
wd:: WC_Glue_After_Zwj | wd:: WC_E_Base_GAZ => Zwj ( false ) , // rule WB3c
390
397
// rule WB4:
391
398
wd:: WC_Extend | wd:: WC_Format | wd:: WC_ZWJ => FormatExtend ( AcceptAny ) ,
@@ -467,8 +474,8 @@ impl<'a> DoubleEndedIterator for UWordBounds<'a> {
467
474
break ;
468
475
}
469
476
} ,
470
- Regional => match cat {
471
- wd:: WC_Regional_Indicator => Regional , // rule WB13c
477
+ Regional ( _ ) => match cat {
478
+ wd:: WC_Regional_Indicator => Regional ( true ) , // rule WB13c
472
479
_ => {
473
480
take_curr = false ;
474
481
break ;
0 commit comments