unicode-rs
diff --git a/‎Cargo.toml
Lines changed: 6 additions & 0 deletions b/‎Cargo.toml
Lines changed: 6 additions & 0 deletions
diff --git a/‎benches/chars.rs
Lines changed: 2 additions & 2 deletions b/‎benches/chars.rs
Lines changed: 2 additions & 2 deletions
diff --git a/‎benches/unicode_word_indices.rs
Lines changed: 37 additions & 0 deletions b/‎benches/unicode_word_indices.rs
Lines changed: 37 additions & 0 deletions
diff --git a/‎benches/words.rs
Lines changed: 2 additions & 2 deletions b/‎benches/words.rs
Lines changed: 2 additions & 2 deletions
@@ -24,6 +24,7 @@ no_std = [] # This is a no-op, preserved for backward compatibility only.
 [dev-dependencies]
 quickcheck = "0.7"
 criterion = "0.5"
+proptest = "1.7.0"
 
 [[bench]]
 name = "chars"
@@ -36,3 +37,8 @@ harness = false
 [[bench]]
 name = "word_bounds"
 harness = false
+
+[[bench]]
+name = "unicode_word_indices"
+harness = false
+
@@ -41,15 +41,15 @@ fn bench_all(c: &mut Criterion) {
     for file in FILES {
         group.bench_with_input(
             BenchmarkId::new("grapheme", file),
-            &fs::read_to_string(format!("benches/texts/{}.txt", file)).unwrap(),
+            &fs::read_to_string(format!("benches/texts/{file}.txt")).unwrap(),
             |b, content| b.iter(|| grapheme(content)),
         );
     }
 
     for file in FILES {
         group.bench_with_input(
             BenchmarkId::new("scalar", file),
-            &fs::read_to_string(format!("benches/texts/{}.txt", file)).unwrap(),
+            &fs::read_to_string(format!("benches/texts/{file}.txt")).unwrap(),
             |b, content| b.iter(|| scalar(content)),
         );
     }
 
@@ -0,0 +1,37 @@
+use criterion::{black_box, criterion_group, criterion_main, BenchmarkId, Criterion};
+
+use std::fs;
+use unicode_segmentation::UnicodeSegmentation;
+
+const FILES: &[&str] = &[
+    "log", //"arabic",
+    "english",
+    //"hindi",
+    "japanese",
+    //"korean",
+    //"mandarin",
+    //"russian",
+    //"source_code",
+];
+
+#[inline(always)]
+fn grapheme(text: &str) {
+    for w in text.unicode_word_indices() {
+        black_box(w);
+    }
+}
+
+fn bench_all(c: &mut Criterion) {
+    let mut group = c.benchmark_group("unicode_word_indices");
+
+    for file in FILES {
+        let input = fs::read_to_string(format!("benches/texts/{file}.txt")).unwrap();
+        group.throughput(criterion::Throughput::Bytes(input.len() as u64));
+        group.bench_with_input(BenchmarkId::from_parameter(file), &input, |b, content| {
+            b.iter(|| grapheme(content))
+        });
+    }
+}
+
+criterion_group!(benches, bench_all);
+criterion_main!(benches);
@@ -27,7 +27,7 @@ fn bench_all(c: &mut Criterion) {
     for file in FILES {
         group.bench_with_input(
             BenchmarkId::new("grapheme", file),
-            &fs::read_to_string(format!("benches/texts/{}.txt", file)).unwrap(),
+            &fs::read_to_string(format!("benches/texts/{file}.txt",)).unwrap(),
             |b, content| b.iter(|| grapheme(content)),
         );
     }
 
@@ -41,15 +41,15 @@ fn bench_all(c: &mut Criterion) {
     for file in FILES {
         group.bench_with_input(
             BenchmarkId::new("grapheme", file),
-            &fs::read_to_string(format!("benches/texts/{}.txt", file)).unwrap(),
+            &fs::read_to_string(format!("benches/texts/{file}.txt")).unwrap(),
             |b, content| b.iter(|| grapheme(content)),
         );
     }
 
     for file in FILES {
         group.bench_with_input(
             BenchmarkId::new("scalar", file),
-            &fs::read_to_string(format!("benches/texts/{}.txt", file)).unwrap(),
+            &fs::read_to_string(format!("benches/texts/{file}.txt")).unwrap(),
             |b, content| b.iter(|| scalar(content)),
         );
     }
Original file line number	Diff line number	Diff line change
`@@ -27,7 +27,7 @@ fn bench_all(c: &mut Criterion) {`
`27`	`27`	`for file in FILES {`
`28`	`28`	`group.bench_with_input(`
`29`	`29`	`BenchmarkId::new("grapheme", file),`
`30`		`- &fs::read_to_string(format!("benches/texts/{}.txt", file)).unwrap(),`
	`30`	`+ &fs::read_to_string(format!("benches/texts/{file}.txt",)).unwrap(),`
`31`	`31`	`\|b, content\| b.iter(\|\| grapheme(content)),`
`32`	`32`	`);`
`33`	`33`	`}`