8000 Merge pull request #100 from timClicks/inline-functions · Jake-Shadle/unicode-segmentation@12fc8d9 · GitHub
[go: up one dir, main page]

Skip to content

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Appearance settings

Commit 12fc8d9

Browse files
authored
Merge pull request unicode-rs#100 from timClicks/inline-functions
Increase the #[inline] opportunities - 15-40% performance improvements
2 parents 87624ad + 9310f0f commit 12fc8d9

File tree

3 files changed

+38
-31
lines changed

3 files changed

+38
-31
lines changed

Cargo.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,7 @@ no_std = [] # This is a no-op, preserved for backward compatibility only.
2424

2525
[dev-dependencies]
2626
quickcheck = "0.7"
27-
bencher = "0.1"
27+
criterion = "0.3"
2828

2929
[[bench]]
3030
name = "graphemes"

benches/graphemes.rs

Lines changed: 29 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -1,55 +1,54 @@
1-
#[macro_use]
2-
extern crate bencher;
3-
extern crate unicode_segmentation;
1+
use criterion::{black_box, criterion_group, criterion_main, Criterion};
2+
use unicode_segmentation;
43

5-
use bencher::Bencher;
6-
use unicode_segmentation::UnicodeSegmentation;
74
use std::fs;
5+
use unicode_segmentation::UnicodeSegmentation;
86

9-
fn graphemes(bench: &mut Bencher, path: &str) {
7+
fn graphemes(c: &mut Criterion, lang: &str, path: &str) {
108
let text = fs::read_to_string(path).unwrap();
11-
bench.iter(|| {
12-
for g in UnicodeSegmentation::graphemes(&*text, true) {
13-
bencher::black_box(g);
14-
}
15-
});
169

17-
bench.bytes = text.len() as u64;
10+
c.bench_function(&format!("graphemes_{}",lang), |bench| {
11+
bench.iter(|| {
12+
for g in UnicodeSegmentation::graphemes(black_box(&*text), true) {
13+
black_box(g);
14+
}
15+
})
16+
});
1817
}
1918

20-
fn graphemes_arabic(bench: &mut Bencher) {
21-
graphemes(bench, "benches/texts/arabic.txt");
19+
fn graphemes_arabic(c: &mut Criterion) {
20+
graphemes(c, "arabic" ,"benches/texts/arabic.txt");
2221
}
2322

24-
fn graphemes_english(bench: &mut Bencher) {
25-
graphemes(bench, "benches/texts/english.txt");
23+
fn graphemes_english(c: &mut Criterion) {
24+
graphemes(c, "english" ,"benches/texts/english.txt");
2625
}
2726

28-
fn graphemes_hindi(bench: &mut Bencher) {
29-
graphemes(bench, "benches/texts/hindi.txt");
27+
fn graphemes_hindi(c: &mut Criterion) {
28+
graphemes(c, "hindi" ,"benches/texts/hindi.txt");
3029
}
3130

32-
fn graphemes_japanese(bench: &mut Bencher) {
33-
graphemes(bench, "benches/texts/japanese.txt");
31+
fn graphemes_japanese(c: &mut Criterion) {
32+
graphemes(c, "japanese" ,"benches/texts/japanese.txt");
3433
}
3534

36-
fn graphemes_korean(bench: &mut Bencher) {
37-
graphemes(bench, "benches/texts/korean.txt");
35+
fn graphemes_korean(c: &mut Criterion) {
36+
graphemes(c, "korean" ,"benches/texts/korean.txt");
3837
}
3938

40-
fn graphemes_mandarin(bench: &mut Bencher) {
41-
graphemes(bench, "benches/texts/mandarin.txt");
39+
fn graphemes_mandarin(c: &mut Criterion) {
40+
graphemes(c, "mandarin" ,"benches/texts/mandarin.txt");
4241
}
4342

44-
fn graphemes_russian(bench: &mut Bencher) {
45-
graphemes(bench, "benches/texts/russian.txt");
43+
fn graphemes_russian(c: &mut Criterion) {
44+
graphemes(c, "russian" ,"benches/texts/russian.txt");
4645
}
4746

48-
fn graphemes_source_code(bench: &mut Bencher) {
49-
graphemes(bench, "benches/texts/source_code.txt");
47+
fn graphemes_source_code(c: &mut Criterion) {
48+
graphemes(c, "source_code","benches/texts/source_code.txt");
5049
}
5150

52-
benchmark_group!(
51+
criterion_group!(
5352
benches,
5453
graphemes_arabic,
5554
graphemes_english,
@@ -61,4 +60,4 @@ benchmark_group!(
6160
graphemes_source_code,
6261
);
6362

64-
benchmark_main!(benches);
63+
criterion_main!(benches);

src/grapheme.rs

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -228,6 +228,7 @@ enum PairResult {
228228
Emoji, // a break if preceded by emoji base and (Extend)*
229229
}
230230

231+
#[inline]
231232
fn check_pair(before: GraphemeCat, after: GraphemeCat) -> PairResult {
232233
use crate::tables::grapheme::GraphemeCat::*;
233234
use self::PairResult::*;
@@ -407,6 +408,7 @@ impl GraphemeCursor {
407408
}
408409
}
409410

411+
#[inline]
410412
fn decide(&mut self, is_break: bool) {
411413
self.state = if is_break {
412414
GraphemeState::Break
@@ -415,11 +417,13 @@ impl GraphemeCursor {
415417
};
416418
}
417419

420+
#[inline]
418421
fn decision(&mut self, is_break: bool) -> Result<bool, GraphemeIncomplete> {
419422
self.decide(is_break);
420423
Ok(is_break)
421424
}
422425

426+
#[inline]
423427
fn is_boundary_result(&self) -> Result<bool, GraphemeIncomplete> {
424428
if self.state == GraphemeState::Break {
425429
Ok(true)
@@ -432,6 +436,7 @@ impl GraphemeCursor {
432436
}
433437
}
434438

439+
#[inline]
435440
fn handle_regional(&mut self, chunk: &str, chunk_start: usize) {
436441
use crate::tables::grapheme as gr;
437442
let mut ris_count = self.ris_count.unwrap_or(0);
@@ -452,6 +457,7 @@ impl GraphemeCursor {
452457
self.state = GraphemeState::Regional;
453458
}
454459

460+
#[inline]
455461
fn handle_emoji(&mut self, chunk: &str, chunk_start: usize) {
456462
use crate::tables::grapheme as gr;
457463
let mut iter = chunk.chars().rev();
@@ -482,6 +488,7 @@ impl GraphemeCursor {
482488
self.state = GraphemeState::Emoji;
483489
}
484490

491+
#[inline]
485492
/// Determine whether the current cursor location is a grapheme cluster boundary.
486493
/// Only a part of the string need be supplied. If `chunk_start` is nonzero or
487494
/// the length of `chunk` is not equal to `len` on creation, then this method
@@ -563,6 +570,7 @@ impl GraphemeCursor {
563570
}
564571
}
565572

573+
#[inline]
566574
/// Find the next boundary after the current cursor position. Only a part of
567575
/// the string need be supplied. If the chunk is incomplete, then this
568576
/// method might return `GraphemeIncomplete::PreContext` or

0 commit comments

Comments
 (0)
0