8000 Continuing new grapheme boundary logic · jmuk/unicode-segmentation@aaf9da4 · GitHub
[go: up one dir, main page]

Skip to content

Commit aaf9da4

Browse files
committed
Continuing new grapheme boundary logic
Implemented next_boundary and prev_boundary functions in terms of is_boundary (plus fixups to the internal state when moving the cursor). Fixed various problems in previous commit. Still work in progress, not tested yet.
1 parent f0df6be commit aaf9da4

File tree

1 file changed

+64
-3
lines changed

1 file changed

+64
-3
lines changed

src/grapheme.rs

Lines changed: 64 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -478,8 +478,8 @@ enum PairResult {
478478
}
479479

480480
fn check_pair(before: GraphemeCat, after: GraphemeCat) -> PairResult {
481-
use self::PairResult::*;
482481
use tables::grapheme::GraphemeCat::*;
482+
use self::PairResult::*;
483483
match (before, after) {
484484
(GC_Control, GC_Control) => CheckCrlf, // GB3
485485
(GC_Control, _) => Break, // GB4
@@ -498,7 +498,7 @@ fn check_pair(before: GraphemeCat, after: GraphemeCat) -> PairResult {
498498
(_, GC_ZWJ) => NotBreak, // GB9
499499
(_, GC_SpacingMark) => Extended, // GB9a
500500
(GC_Prepend, _) => Extended, // GB9a
501-
(GC_Base, GC_E_Modifier) => NotBreak, // GB10
501+
(GC_E_Base, GC_E_Modifier) => NotBreak, // GB10
502502
(GC_E_Base_GAZ, GC_E_Modifier) => NotBreak, // GB10
503503
(GC_Extend, GC_E_Modifier) => Emoji, // GB10
504504
(GC_ZWJ, GC_Glue_After_Zwj) => NotBreak, // GB11
@@ -527,14 +527,25 @@ impl GraphemeCursor {
527527
}
528528
}
529529

530+
pub fn set_cursor(&mut self, offset: usize) {
531+
if offset != self.offset {
532+
self.offset = offset;
533+
self.state = if offset == 0 || offset == self.len {
534+
GraphemeCursorState::Break
535+
} else {
536+
GraphemeCursorState::Unknown
537+
};
538+
}
539+
}
540+
530541
pub fn provide_context(&mut self, chunk: &str, chunk_start: usize) {
531542
use tables::grapheme as gr;
532543
assert!(chunk_start + chunk.len() == self.pre_context_offset.unwrap());
533544
self.pre_context_offset = None;
534545
if self.is_extended && chunk_start + chunk.len() == self.offset {
535546
let ch = chunk.chars().rev().next().unwrap();
536547
if gr::grapheme_category(ch) == gr::GC_Prepend {
537-
self.decide(false);
548+
self.decide(false); // GB9b
538549
return;
539550
}
540551
}
@@ -680,4 +691,54 @@ impl GraphemeCursor {
680691
}
681692
}
682693

694+
pub fn next_boundary(&mut self, chunk: &str, chunk_start: usize) -> Result<Option<usize>, GraphemeIncomplete> {
695+
if self.offset == self.len {
696+
return Ok(None);
697+
}
698+
loop {
699+
let ch = chunk[self.offset - chunk_start..].chars().next().unwrap();
700+
self.offset += ch.len_utf8();
701+
self.cat = self.catb.take();
702+
self.state = GraphemeCursorState::Unknown;
703+
if let (Some(ris_count), Some(cat)) = (self.ris_count, self.cat) {
704+
if cat == GraphemeCat::GC_Regional_Indicator {
705+
self.ris_count = Some(ris_count + 1);
706+
} else {
707+
self.ris_count = Some(0);
708+
}
709+
}
710+
if self.offset == self.len {
711+
self.decide(true);
712+
} else if self.offset >= chunk_start + chunk.len() {
713+
return Err(GraphemeIncomplete::NextChunk);
714+
}
715+
if self.is_boundary(chunk, chunk_start)? {
716+
return Ok(Some(self.offset));
717+
}
718+
}
719+
}
720+
721+
pub fn prev_boundary(&mut self, chunk: &str, chunk_start: usize) -> Result<Option<usize>, GraphemeIncomplete> {
722+
if self.offset == 0 {
723+
return Ok(None);
724+
}
725+
loop {
726+
if self.offset == chunk_start {
727+
return Err(GraphemeIncomplete::PrevChunk);
728+
}
729+
let ch = chunk[..self.offset - chunk_start].chars().rev().next().unwrap();
730+
self.offset -= ch.len_utf8();
731+
self.catb = self.cat.take();
732+
self.state = GraphemeCursorState::Unknown;
733+
if let Some(ris_count) = self.ris_count {
734+
self.ris_count = if ris_count > 0 { Some(ris_count - 1) } else { None };
735+
}
736+
if self.offset == 0 {
737+
self.decide(true);
738+
}
739+
if self.is_boundary(chunk, chunk_start)? {
740+
return Ok(Some(self.offset));
741+
}
742+
}
743+
}
683744
}

0 commit comments

Comments
 (0)
0