@@ -478,8 +478,8 @@ enum PairResult {
478
478
}
479
479
480
480
fn check_pair ( before : GraphemeCat , after : GraphemeCat ) -> PairResult {
481
- use self :: PairResult :: * ;
482
481
use tables:: grapheme:: GraphemeCat :: * ;
482
+ use self :: PairResult :: * ;
483
483
match ( before, after) {
484
484
( GC_Control , GC_Control ) => CheckCrlf , // GB3
485
485
( GC_Control , _) => Break , // GB4
@@ -498,7 +498,7 @@ fn check_pair(before: GraphemeCat, after: GraphemeCat) -> PairResult {
498
498
( _, GC_ZWJ ) => NotBreak , // GB9
499
499
( _, GC_SpacingMark ) => Extended , // GB9a
500
500
( GC_Prepend , _) => Extended , // GB9a
501
- ( GC_Base , GC_E_Modifier ) => NotBreak, // GB10
501
+ ( GC_E_Base , GC_E_Modifier ) => NotBreak , // GB10
502
502
( GC_E_Base_GAZ , GC_E_Modifier ) => NotBreak , // GB10
503
503
( GC_Extend , GC_E_Modifier ) => Emoji , // GB10
504
504
( GC_ZWJ , GC_Glue_After_Zwj ) => NotBreak , // GB11
@@ -527,14 +527,25 @@ impl GraphemeCursor {
527
527
}
528
528
}
529
529
530
+ pub fn set_cursor ( & mut self , offset : usize ) {
531
+ if offset != self . offset {
532
+ self . offset = offset;
533
+ self . state = if offset == 0 || offset == self . len {
534
+ GraphemeCursorState :: Break
535
+ } else {
536
+ GraphemeCursorState :: Unknown
537
+ } ;
538
+ }
539
+ }
540
+
530
541
pub fn provide_context ( & mut self , chunk : & str , chunk_start : usize ) {
531
542
use tables:: grapheme as gr;
532
543
assert ! ( chunk_start + chunk. len( ) == self . pre_context_offset. unwrap( ) ) ;
533
544
self . pre_context_offset = None ;
534
545
if self . is_extended && chunk_start + chunk. len ( ) == self . offset {
535
546
let ch = chunk. chars ( ) . rev ( ) . next ( ) . unwrap ( ) ;
536
547
if gr:: grapheme_category ( ch) == gr:: GC_Prepend {
537
- self . decide ( false ) ;
548
+ self . decide ( false ) ; // GB9b
538
549
return ;
539
550
}
540
551
}
@@ -680,4 +691,54 @@ impl GraphemeCursor {
680
691
}
681
692
}
682
693
694
+ pub fn next_boundary ( & mut self , chunk : & str , chunk_start : usize ) -> Result < Option < usize > , GraphemeIncomplete > {
695
+ if self . offset == self . len {
696
+ return Ok ( None ) ;
697
+ }
698
+ loop {
699
+ let ch = chunk[ self . offset - chunk_start..] . chars ( ) . next ( ) . unwrap ( ) ;
700
+ self . offset += ch. len_utf8 ( ) ;
701
+ self . cat = self . catb . take ( ) ;
702
+ self . state = GraphemeCursorState :: Unknown ;
703
+ if let ( Some ( ris_count) , Some ( cat) ) = ( self . ris_count , self . cat ) {
704
+ if cat == GraphemeCat :: GC_Regional_Indicator {
705
+ self . ris_count = Some ( ris_count + 1 ) ;
706
+ } else {
707
+ self . ris_count = Some ( 0 ) ;
708
+ }
709
+ }
710
+ if self . offset == self . len {
711
+ self . decide ( true ) ;
712
+ } else if self . offset >= chunk_start + chunk. len ( ) {
713
+ return Err ( GraphemeIncomplete :: NextChunk ) ;
714
+ }
715
+ if self . is_boundary ( chunk, chunk_start) ? {
716
+ return Ok ( Some ( self . offset ) ) ;
717
+ }
718
+ }
719
+ }
720
+
721
+ pub fn prev_boundary ( & mut self , chunk : & str , chunk_start : usize ) -> Result < Option < usize > , GraphemeIncomplete > {
722
+ if self . offset == 0 {
723
+ return Ok ( None ) ;
724
+ }
725
+ loop {
726
+ if self . offset == chunk_start {
727
+ return Err ( GraphemeIncomplete :: PrevChunk ) ;
728
+ }
729
+ let ch = chunk[ ..self . offset - chunk_start] . chars ( ) . rev ( ) . next ( ) . unwrap ( ) ;
730
+ self . offset -= ch. len_utf8 ( ) ;
731
+ self . catb = self . cat . take ( ) ;
732
+ self . state = GraphemeCursorState :: Unknown ;
733
+ if let Some ( ris_count) = self . ris_count {
734
+ self . ris_count = if ris_count > 0 { Some ( ris_count - 1 ) } else { None } ;
735
+ }
736
+ if self . offset == 0 {
737
+ self . decide ( true ) ;
738
+ }
739
+ if self . is_boundary ( chunk, chunk_start) ? {
740
+ return Ok ( Some ( self . offset ) ) ;
741
+ }
742
+ }
743
+ }
683
744
}
0 commit comments