8000 Add comments to `handle_incb_consonant` · unicode-rs/unicode-segmentation@dce3a34 · GitHub
[go: up one dir, main page]

Skip to content

Commit dce3a34

Browse files
Add comments to handle_incb_consonant
1 parent 4e4a7c6 commit dce3a34

File tree

1 file changed

+26
-10
lines changed

1 file changed

+26
-10
lines changed

src/grapheme.rs

Lines changed: 26 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -462,10 +462,21 @@ impl GraphemeCursor {
462462
}
463463
}
464464

465+
/// For handling rule GB9c:
466+
///
467+
/// There's an `InCB=Consonant` after this, and we need to look back
468+
/// to verify whether there should be a break.
469+
///
470+
/// Seek backward to find an `InCB=Linker` preceded by an `InCB=Consonsnt`
471+
/// (potentially separated by some number of `InCB=Linker` or `InCB=Extend`).
472+
/// If we find the consonant in question, then there's no break; if we find a consonant
473+
/// with no linker, or a non-linker non-extend non-consonant, or the start of text, there's a break;
474+
/// otherwise we need more context
465475
#[inline]
466476
fn handle_incb_consonant(&mut self, chunk: &str, chunk_start: usize) {
467477
use crate::tables::{self, grapheme as gr};
468478

479+
// GB9c only applies to extended grapheme clusters
469480
if !self.is_extended {
470481
self.decide(true);
471482
return;
@@ -475,23 +486,28 @@ impl GraphemeCursor {
475486

476487
for ch in chunk.chars().rev() {
477488
if tables::is_incb_linker(ch) {
489+
// We found an InCB linker
478490
incb_linker_count += 1;
479491
self.incb_linker_count = Some(incb_linker_count);
480492
} else if tables::derived_property::InCB_Extend(ch) {
481-
// continue
493+
// We ignore InCB extends, continue
482494
} else {
495+
// Prev character is neither linker nor extend, break suppressed iff it's InCB=Consonant
483496
let result = !(self.incb_linker_count.unwrap_or(0) > 0
484497
&& self.grapheme_category(ch) == gr::GC_InCB_Consonant);
485498
self.decide(result);
486499
return;
487500
}
488501
}
502+
489503
if chunk_start == 0 {
504+
// Start of text and we still haven't found a consonant, so break
490505
self.decide(true);
491-
return;
506+
} else {
507+
// We need more context
508+
self.pre_context_offset = Some(chunk_start);
509+
self.state = GraphemeState::InCbConsonant;
492510
}
493-
self.pre_context_offset = Some(chunk_start);
494-
self.state = GraphemeState::InCbConsonant;
495511
}
496512

497513
#[inline]
@@ -509,10 +525,10 @@ impl GraphemeCursor {
509525
self.ris_count = Some(ris_count);
510526
if chunk_start == 0 {
511527
self.decide((ris_count % 2) == 0);
512-
return;
528+
} else {
529+
self.pre_context_offset = Some(chunk_start);
530+
self.state = GraphemeState::Regional;
513531
}
514-
self.pre_context_offset = Some(chunk_start);
515-
self.state = GraphemeState::Regional;
516532
}
517533

518534
#[inline]
@@ -540,10 +556,10 @@ impl GraphemeCursor {
540556
}
541557
if chunk_start == 0 {
542558
self.decide(true);
543-
return;
559+
} else {
560+
self.pre_context_offset = Some(chunk_start);
561+
self.state = GraphemeState::Emoji;
544562
}
545-
self.pre_context_offset = Some(chunk_start);
546-
self.state = GraphemeState::Emoji;
547563
}
548564

549565
#[inline]

0 commit comments

Comments
 (0)
0