8000 Support flags in forward iteration · jmuk/unicode-segmentation@f3ea31d · GitHub
[go: up one dir, main page]

Skip to content

Commit f3ea31d

Browse files
Manishearthmbrubeck
authored andcommitted
Support flags in forward iteration
1 parent 858d594 commit f3ea31d

File tree

1 file changed

+15
-8
lines changed

1 file changed

+15
-8
lines changed

src/word.rs

Lines changed: 15 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -80,9 +80,9 @@ enum UWordBoundsState {
8080
Numeric,
8181
Katakana,
8282
ExtendNumLet,
83-
Regional,
83+
Regional(/* half */ bool),
8484
FormatExtend(FormatExtendType),
85-
Zwj(bool),
85+
Zwj(/* tainted */ bool),
8686
Emoji,
8787
}
8888

@@ -184,7 +184,7 @@ impl<'a> Iterator for UWordBounds<'a> {
184184
wd::WC_Numeric => Numeric, // rule WB8, WB10, WB12, WB13a
185185
wd::WC_Katakana => Katakana, // rule WB13, WB13a
186186
wd::WC_ExtendNumLet => ExtendNumLet, // rule WB13a, WB13b
187-
wd::WC_Regional_Indicator => Regional, // rule WB13c
187+
wd::WC_Regional_Indicator => Regional(/* half = */ true), // rule WB13c
188188
wd::WC_LF | wd::WC_Newline => break, // rule WB3a
189189
wd::WC_ZWJ => Zwj(false), // rule WB3c
190190
wd::WC_E_Base | wd::WC_E_Base_GAZ => Emoji, // rule WB14
@@ -269,8 +269,15 @@ impl<'a> Iterator for UWordBounds<'a> {
269269
break;
270270
}
271271
},
272-
Regional => match cat {
273-
wd::WC_Regional_Indicator => Regional, // rule WB13c
272+
Regional(false) => {
273+
// if it reaches here we've gone too far,
274+
// a full flag can only compose with ZWJ/Extend/Format
275+
// proceeding it.
276+
take_curr = false;
277+
break;
278+
}
279+
Regional(/* half */ true) => match cat {
280+
wd::WC_Regional_Indicator => Regional(false), // rule WB13c
274281
_ => {
275282
take_curr = false;
276283
break;
@@ -385,7 +392,7 @@ impl<'a> DoubleEndedIterator for UWordBounds<'a> {
385392
wd::WC_Numeric => Numeric, // rule WB8, WB9, WB11, WB13b
386393
wd::WC_Katakana => Katakana, // rule WB13, WB13b
387394
wd::WC_ExtendNumLet => ExtendNumLet, // rule WB13a
388-
wd::WC_Regional_Indicator => Regional, // rule WB13c
395+
wd::WC_Regional_Indicator => Regional(true), // rule WB13c
389396
wd::WC_Glue_After_Zwj | wd::WC_E_Base_GAZ => Zwj(false), // rule WB3c
390397
// rule WB4:
391398
wd::WC_Extend | wd::WC_Format | wd::WC_ZWJ => FormatExtend(AcceptAny),
@@ -467,8 +474,8 @@ impl<'a> DoubleEndedIterator for UWordBounds<'a> {
467474
break;
468475
}
469476
},
470-
Regional => match cat {
471-
wd::WC_Regional_Indicator => Regional, // rule WB13c
477+
Regional(_) => match cat {
478+
wd::WC_Regional_Indicator => Regional(true), // rule WB13c
472479
_ => {
473480
take_curr = false;
474481
break;

0 commit comments

Comments
 (0)
0