@@ -4,94 +4,49 @@ extern crate unicode_segmentation;
4
4
5
5
use bencher:: Bencher ;
6
6
use unicode_segmentation:: UnicodeSegmentation ;
7
+ use std:: fs;
7
8
8
- const TEXT_ARABIC : & str = include_str ! ( "texts/arabic.txt" ) ;
9
- const TEXT_ENGLISH : & str = include_str ! ( "texts/english.txt" ) ;
10
- const TEXT_HINDI : & str = include_str ! ( "texts/hindi.txt" ) ;
11
- const TEXT_JAPANESE : & str = include_str ! ( "texts/japanese.txt" ) ;
12
- const TEXT_KOREAN : & str = include_str ! ( "texts/korean.txt" ) ;
13
- const TEXT_MANDARIN : & str = include_str ! ( "texts/mandarin.txt" ) ;
14
- const TEXT_RUSSIAN : & str = include_str ! ( "texts/russian.txt" ) ;
15
- const TEXT_SOURCE_CODE : & str = include_str ! ( "texts/source_code.txt" ) ;
16
-
17
- fn graphemes_arabic ( bench : & mut Bencher ) {
9
+ fn graphemes ( bench : & mut Bencher , path : & str ) {
10
+ let text = fs:: read_to_string ( path) . unwrap ( ) ;
18
11
bench. iter ( || {
19
- for g in UnicodeSegmentation :: graphemes ( TEXT_ARABIC , true ) {
12
+ for g in UnicodeSegmentation :: graphemes ( & * text , true ) {
20
13
bencher:: black_box ( g) ;
21
14
}
22
15
} ) ;
23
16
24
- bench. bytes = TEXT_ARABIC . len ( ) as u64 ;
17
+ bench. bytes = text . len ( ) as u64 ;
25
18
}
26
19
27
- fn graphemes_english ( bench : & mut Bencher ) {
28
- bench. iter ( || {
29
- for g in UnicodeSegmentation :: graphemes ( TEXT_ENGLISH , true ) {
30
- bencher:: black_box ( g) ;
31
- }
32
- } ) ;
20
+ fn graphemes_arabic ( bench : & mut Bencher ) {
21
+ graphemes ( bench, "benches/texts/arabic.txt" ) ;
22
+ }
33
23
34
- bench. bytes = TEXT_ENGLISH . len ( ) as u64 ;
24
+ fn graphemes_english ( bench : & mut Bencher ) {
25
+ graphemes ( bench, "benches/texts/english.txt" ) ;
35
26
}
36
27
37
28
fn graphemes_hindi ( bench : & mut Bencher ) {
38
- bench. iter ( || {
39
- for g in UnicodeSegmentation :: graphemes ( TEXT_HINDI , true ) {
40
- bencher:: black_box ( g) ;
41
- }
42
- } ) ;
43
-
44
- bench. bytes = TEXT_HINDI . len ( ) as u64 ;
29
+ graphemes ( bench, "benches/texts/hindi.txt" ) ;
45
30
}
46
31
47
32
fn graphemes_japanese ( bench : & mut Bencher ) {
48
- bench. iter ( || {
49
- for g in UnicodeSegmentation :: graphemes ( TEXT_JAPANESE , true ) {
50
- bencher:: black_box ( g) ;
51
- }
52
- } ) ;
53
-
54
- bench. bytes = TEXT_JAPANESE . len ( ) as u64 ;
33
+ graphemes ( bench, "benches/texts/japanese.txt" ) ;
55
34
}
56
35
57
36
fn graphemes_korean ( bench : & mut Bencher ) {
58
- bench. iter ( || {
59
- for g in UnicodeSegmentation :: graphemes ( TEXT_KOREAN , true ) {
60
- bencher:: black_box ( g) ;
61
- }
62
- } ) ;
63
-
64
- bench. bytes = TEXT_KOREAN . len ( ) as u64 ;
37
+ graphemes ( bench, "benches/texts/korean.txt" ) ;
65
38
}
66
39
67
40
fn graphemes_mandarin ( bench : & mut Bencher ) {
68
- bench. iter ( || {
69
- for g in UnicodeSegmentation :: graphemes ( TEXT_MANDARIN , true ) {
70
- bencher:: black_box ( g) ;
71
- }
72
- } ) ;
73
-
74
- bench. bytes = TEXT_MANDARIN . len ( ) as u64 ;
41
+ graphemes ( bench, "benches/texts/mandarin.txt" ) ;
75
42
}
76
43
77
44
fn graphemes_russian ( bench : & mut Bencher ) {
78
- bench. iter ( || {
79
- for g in UnicodeSegmentation :: graphemes ( TEXT_RUSSIAN , true ) {
80
- bencher:: black_box ( g) ;
81
- }
82
- } ) ;
83
-
84
- bench. bytes = TEXT_RUSSIAN . len ( ) as u64 ;
45
+ graphemes ( bench, "benches/texts/russian.txt" ) ;
85
46
}
86
47
87
48
fn graphemes_source_code ( bench : & mut Bencher ) {
88
- bench. iter ( || {
89
- for g in UnicodeSegmentation :: graphemes ( TEXT_SOURCE_CODE , true ) {
90
- bencher:: black_box ( g) ;
91
- }
92
- } ) ;
93
-
94
- bench. bytes = TEXT_SOURCE_CODE . len ( ) as u64 ;
49
+ graphemes ( bench, "benches/texts/source_code.txt" ) ;
95
50
}
96
51
97
52
benchmark_group ! (
0 commit comments