8000 Handle multiple starters in the stream-safe fuzzer. · unicode-rs/unicode-normalization@29193d1 · GitHub
[go: up one dir, main page]

Skip to content
8000

Commit 29193d1

Browse files
committed
Handle multiple starters in the stream-safe fuzzer.
NFC compositions can involve multiple starters, such as `\u{11347}` and `\u{11357}`. Adjust the counting iterator in the streaming fuzzer to only count non-starters, so that it doesn't over-count. Fixes #76.
1 parent e3ed0fe commit 29193d1

File tree

1 file changed

+10
-6
lines changed

1 file changed

+10
-6
lines changed

fuzz/fuzz_targets/streaming.rs

Lines changed: 10 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@ extern crate libfuzzer_sys;
1616
use std::str::Chars;
1717
use std::cell::RefCell;
1818
use std::rc::Rc;
19-
use unicode_normalization::UnicodeNormalization;
19+
use unicode_normalization::{char::canonical_combining_class, UnicodeNormalization};
2020

2121
const MAX_NONSTARTERS: u32 = 30;
2222

@@ -30,8 +30,13 @@ impl<'a> Iterator for Counter<'a> {
3030
type Item = char;
3131

3232
fn next(&mut self) -> Option<char> {
33-
*self.value.borrow_mut() += 1;
34-
self.iter.next()
33+
let next = self.iter.next();
34+
if let Some(c) = next {
35+
if canonical_combining_class(c) != 0 {
36+
*self.value.borrow_mut() += 1;
37+
}
38+
}
39+
next
3540
}
3641
}
3742

@@ -41,9 +46,8 @@ fuzz_target!(|input: String| {
4146
let mut value = Rc::new(RefCell::new(0));
4247
let counter = Counter { iter: stream_safe.chars(), value: Rc::clone(&mut value) };
4348
for _ in counter.nfc() {
44-
// Plus 2: one for the starter at the beginning of a sequence, and
45-
// one for a starter that begins the following sequence.
46-
assert!(*value.borrow() <= MAX_NONSTARTERS + 2);
49+
// Plus 1: The iterator may consume a starter that begins the next sequence.
50+
assert!(*value.borrow() <= MAX_NONSTARTERS + 1);
4751
*value.borrow_mut() = 0;
4852
}
4953
});

0 commit comments

Comments
 (0)
0