@@ -462,10 +462,21 @@ impl GraphemeCursor {
462
462
}
463
463
}
464
464
465
+ /// For handling rule GB9c:
466
+ ///
467
+ /// There's an `InCB=Consonant` after this, and we need to look back
468
+ /// to verify whether there should be a break.
469
+ ///
470
+ /// Seek backward to find an `InCB=Linker` preceded by an `InCB=Consonsnt`
471
+ /// (potentially separated by some number of `InCB=Linker` or `InCB=Extend`).
472
+ /// If we find the consonant in question, then there's no break; if we find a consonant
473
+ /// with no linker, or a non-linker non-extend non-consonant, or the start of text, there's a break;
474
+ /// otherwise we need more context
465
475
#[ inline]
466
476
fn handle_incb_consonant ( & mut self , chunk : & str , chunk_start : usize ) {
467
477
use crate :: tables:: { self , grapheme as gr} ;
468
478
479
+ // GB9c only applies to extended grapheme clusters
469
480
if !self . is_extended {
470
481
self . decide ( true ) ;
471
482
return ;
@@ -475,23 +486,28 @@ impl GraphemeCursor {
475
486
476
487
for ch in chunk. chars ( ) . rev ( ) {
477
488
if tables:: is_incb_linker ( ch) {
489
+ // We found an InCB linker
478
490
incb_linker_count += 1 ;
479
491
self . incb_linker_count = Some ( incb_linker_count) ;
480
492
} else if tables:: derived_property:: InCB_Extend ( ch) {
481
- // continue
493
+ // We ignore InCB extends, continue
482
494
} else {
495
+ // Prev character is neither linker nor extend, break suppressed iff it's InCB=Consonant
483
496
let result = !( self . incb_linker_count . unwrap_or ( 0 ) > 0
484
497
&& self . grapheme_category ( ch) == gr:: GC_InCB_Consonant ) ;
485
498
self . decide ( result) ;
486
499
return ;
487
500
}
488
501
}
502
+
489
503
if chunk_start == 0 {
504
+ // Start of text and we still haven't found a consonant, so break
490
505
self . decide ( true ) ;
491
- return ;
506
+ } else {
507
+ // We need more context
508
+ self . pre_context_offset = Some ( chunk_start) ;
509
+ self . state = GraphemeState :: InCbConsonant ;
492
510
}
493
- self . pre_context_offset = Some ( chunk_start) ;
494
- self . state = GraphemeState :: InCbConsonant ;
495
511
}
496
512
497
513
#[ inline]
@@ -509,10 +525,10 @@ impl GraphemeCursor {
509
525
self . ris_count = Some ( ris_count) ;
510
526
if chunk_start == 0 {
511
527
self . decide ( ( ris_count % 2 ) == 0 ) ;
512
- return ;
528
+ } else {
529
+ self . pre_context_offset = Some ( chunk_start) ;
530
+ self . state = GraphemeState :: Regional ;
513
531
}
514
- self . pre_context_offset = Some ( chunk_start) ;
515
- self . state = GraphemeState :: Regional ;
516
532
}
517
533
518
534
#[ inline]
@@ -540,10 +556,10 @@ impl GraphemeCursor {
540
556
}
541
557
if chunk_start == 0 {
542
558
self . decide ( true ) ;
543
- return ;
559
+ } else {
560
+ self . pre_context_offset = Some ( chunk_start) ;
561
+ self . state = GraphemeState :: Emoji ;
544
562
}
545
- self . pre_context_offset = Some ( chunk_start) ;
546
- self . state = GraphemeState :: Emoji ;
547
563
}
548
564
549
565
#[ inline]
0 commit comments