8000 Cache prepend lookbehinds · cmyr/unicode-segmentation@db6e78f · GitHub
[go: up one dir, main page]

Skip to content

Commit db6e78f

Browse files
Manishearthmbrubeck
authored andcommitted
Cache prepend lookbehinds
1 parent 7c320b5 commit db6e78f

File tree

2 files changed

+18
-4
lines changed

2 files changed

+18
-4
lines changed

src/grapheme.rs

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -223,6 +223,9 @@ impl<'a> DoubleEndedIterator for Graphemes<'a> {
223223
let mut state = Start;
224224
let mut cat = gr::GC_Any;
225225

226+
// caching used by next() should be invalidated
227+
self.cat = None;
228+
226229
'outer: for (curr, ch) in self.string.char_indices().rev() {
227230
previdx = idx;
228231
idx = curr;
@@ -365,10 +368,12 @@ impl<'a> DoubleEndedIterator for Graphemes<'a> {
365368
if self.extended && cat != gr::GC_Control {
366369
// rule GB9b: include any preceding Prepend characters
367370
for (i, c) in self.string[..idx].char_indices().rev() {
368-
// TODO: Cache this to avoid repeated lookups in the common case.
369371
match gr::grapheme_category(c) {
370372
gr::GC_Prepend => idx = i,
371-
_ => break
373+
cat => {
374+
self.catb = Some(cat);
375+
break;
376+
}
372377
}
373378
}
374379
}

src/test.rs

Lines changed: 11 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,15 @@ use std::prelude::v1::*;
1616
fn test_graphemes() {
1717
use testdata::{TEST_SAME, TEST_DIFF};
1818

19+
pub const EXTRA_DIFF: &'static [(&'static str,
20+
&'static [&'static str],
21+
&'static [&'static str])] = &[
22+
// Official test suite doesn't include two Prepend chars between two other chars.
23+
("\u{20}\u{600}\u{600}\u{20}",
24+
&["\u{20}", "\u{600}\u{600}\u{20}"],
25+
&["\u{20}", "\u{600}", "\u{600}", "\u{20}"]),
26+
];
27+
1928
for &(s, g) in TEST_SAME {
2029
// test forward iterator
2130
assert!(UnicodeSegmentation::graphemes(s, true)
@@ -34,11 +43,11 @@ fn test_graphemes() {
3443
.all(|(a,b)| a == b));
3544
}
3645

37-
for &(s, gt, gf) in TEST_DIFF {
46+
for 6A78 &(s, gt, gf) in TEST_DIFF.iter().chain(EXTRA_DIFF) {
3847
// test forward iterator
3948
assert!(UnicodeSegmentation::graphemes(s, true)
4049
.zip(gt.iter().cloned())
41-
.all(|(a,b)| a == b));
50+
.all(|(a,b)| a == b), "{:?}", s);
4251
assert!(UnicodeSegmentation::graphemes(s, false)
4352
.zip(gf.iter().cloned())
4453
.all(|(a,b)| a == b));

0 commit comments

Comments
 (0)
0