@@ -462,10 +462,21 @@ impl GraphemeCursor {
462462 }
463463 }
464464
465+ /// For handling rule GB9c:
466+ ///
467+ /// There's an `InCB=Consonant` after this, and we need to look back
468+ /// to verify whether there should be a break.
469+ ///
470+ /// Seek backward to find an `InCB=Linker` preceded by an `InCB=Consonsnt`
471+ /// (potentially separated by some number of `InCB=Linker` or `InCB=Extend`).
472+ /// If we find the consonant in question, then there's no break; if we find a consonant
473+ /// with no linker, or a non-linker non-extend non-consonant, or the start of text, there's a break;
474+ /// otherwise we need more context
465475 #[ inline]
466476 fn handle_incb_consonant ( & mut self , chunk : & str , chunk_start : usize ) {
467477 use crate :: tables:: { self , grapheme as gr} ;
468478
479+ // GB9c only applies to extended grapheme clusters
469480 if !self . is_extended {
470481 self . decide ( true ) ;
471482 return ;
@@ -475,23 +486,28 @@ impl GraphemeCursor {
475486
476487 for ch in chunk. chars ( ) . rev ( ) {
477488 if tables:: is_incb_linker ( ch) {
489+ // We found an InCB linker
478490 incb_linker_count += 1 ;
479491 self . incb_linker_count = Some ( incb_linker_count) ;
480492 } else if tables:: derived_property:: InCB_Extend ( ch) {
481- // continue
493+ // We ignore InCB extends, continue
482494 } else {
495+ // Prev character is neither linker nor extend, break suppressed iff it's InCB=Consonant
483496 let result = !( self . incb_linker_count . unwrap_or ( 0 ) > 0
484497 && self . grapheme_category ( ch) == gr:: GC_InCB_Consonant ) ;
485498 self . decide ( result) ;
486499 return ;
487500 }
488501 }
502+
489503 if chunk_start == 0 {
504+ // Start of text and we still haven't found a consonant, so break
490505 self . decide ( true ) ;
491- return ;
506+ } else {
507+ // We need more context
508+ self . pre_context_offset = Some ( chunk_start) ;
509+ self . state = GraphemeState :: InCbConsonant ;
492510 }
493- self . pre_context_offset = Some ( chunk_start) ;
494- self . state = GraphemeState :: InCbConsonant ;
495511 }
496512
497513 #[ inline]
@@ -509,10 +525,10 @@ impl GraphemeCursor {
509525 self . ris_count = Some ( ris_count) ;
510526 if chunk_start == 0 {
511527 self . decide ( ( ris_count % 2 ) == 0 ) ;
512- return ;
528+ } else {
529+ self . pre_context_offset = Some ( chunk_start) ;
530+ self . state = GraphemeState :: Regional ;
513531 }
514- self . pre_context_offset = Some ( chunk_start) ;
515- self . state = GraphemeState :: Regional ;
516532 }
517533
518534 #[ inline]
@@ -540,10 +556,10 @@ impl GraphemeCursor {
540556 }
541557 if chunk_start == 0 {
542558 self . decide ( true ) ;
543- return ;
559+ } else {
560+ self . pre_context_offset = Some ( chunk_start) ;
561+ self . state = GraphemeState :: Emoji ;
544562 }
545- self . pre_context_offset = Some ( chunk_start) ;
546- self . state = GraphemeState :: Emoji ;
547563 }
548564
549565 #[ inline]
0 commit comments