@@ -58,6 +58,7 @@ enum GraphemeState {
58
58
HangulL ,
59
59
HangulLV ,
60
60
HangulLVT ,
61
+ Prepend ,
61
62
Regional ,
62
63
Emoji ,
63
64
Zwj ,
@@ -123,6 +124,7 @@ impl<'a> Iterator for Graphemes<'a> {
123
124
gr:: GC_L => HangulL ,
124
125
gr:: GC_LV | gr:: GC_V => HangulLV ,
125
126
gr:: GC_LVT | gr:: GC_T => HangulLVT ,
127
+ gr:: GC_Prepend if self . extended => Prepend ,
126
128
gr:: GC_Regional_Indicator => Regional ,
127
129
10000
td> gr:: GC_E_Base | gr:: GC_E_Base_GAZ => Emoji ,
128
130
_ => FindExtend
@@ -155,6 +157,13 @@ impl<'a> Iterator for Graphemes<'a> {
155
157
break ;
156
158
}
157
159
} ,
160
+ Prepend => match cat { // rule GB9b
161
+ gr:: GC_Control => {
162
+ take_curr = false ;
163
+ break ;
164
+ }
165
+ _ => continue
166
+ } ,
158
167
Regional => match cat { // rule GB12/GB13
159
168
gr:: GC_Regional_Indicator => FindExtend ,
160
169
_ => {
@@ -276,6 +285,10 @@ impl<'a> DoubleEndedIterator for Graphemes<'a> {
276
285
break ;
277
286
}
278
287
} ,
288
+ Prepend => {
289
+ // not used in reverse iteration
290
+ unreachable ! ( )
291
+ } ,
279
292
Regional => { // rule GB12/GB13
280
293
// Need to scan backward to find if this is preceded by an odd or even number
281
294
// of Regional_Indicator characters.
@@ -340,6 +353,17 @@ impl<'a> DoubleEndedIterator for Graphemes<'a> {
340
353
Some ( cat)
341
354
} ;
342
355
356
+ if self . extended && cat != gr:: GC_Control {
357
+ // rule GB9b: include any preceding Prepend characters
358
+ for ( i, c) in self . string [ ..idx] . char_indices ( ) . rev ( ) {
359
+ // TODO: Cache this to avoid repeated lookups in the common case.
360
+ match gr:: grapheme_category ( c) {
361
+ gr:: GC_Prepend => idx = i,
362
+ _ => break
363
+ }
364
+ }
365
+ }
366
+
343
367
let retstr = & self . string [ idx..] ;
344
368
self . string = & self . string [ ..idx] ;
345
369
Some ( retstr)
0 commit comments