7
7
// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
8
8
// option. This file may not be copied, modified, or distributed
9
9
// except according to those terms.
10
-
11
10
use std:: fmt:: { self , Write } ;
12
11
13
- // Helper functions used for Unicode normalization
14
- fn canonical_sort ( comb : & mut [ ( char , u8 ) ] ) {
15
- let len = comb. len ( ) ;
16
- for i in 0 ..len {
17
- let mut swapped = false ;
18
- for j in 1 ..len-i {
19
- let class_a = comb[ j-1 ] . 1 ;
20
- let class_b = comb[ j] . 1 ;
21
- if class_a != 0 && class_b != 0 && class_a > class_b {
22
- comb. swap ( j-1 , j) ;
23
- swapped = true ;
24
- }
25
- }
26
- if !swapped { break ; }
27
- }
28
- }
29
-
30
12
#[ derive( Clone ) ]
31
13
enum DecompositionType {
32
14
Canonical ,
@@ -38,95 +20,94 @@ enum DecompositionType {
38
20
pub struct Decompositions < I > {
39
21
kind : DecompositionType ,
40
22
iter : I ,
41
- buffer : Vec < ( char , u8 ) > ,
42
- sorted : bool
23
+ done : bool ,
24
+
25
+ // This buffer stores pairs of (canonical combining class, character),
26
+ // pushed onto the end in text order.
27
+ //
28
+ // It's split into two contiguous regions by the `ready` offset. The first
29
+ // `ready` pairs are sorted and ready to emit on demand. The "pending"
30
+ // suffix afterwards still needs more characters for us to be able to sort
31
+ // in canonical order and is not safe to emit.
32
+ buffer : Vec < ( u8 , char ) > ,
33
+ ready : usize ,
43
34
}
44
35
45
36
#[ inline]
46
37
pub fn new_canonical < I : Iterator < Item =char > > ( iter : I ) -> Decompositions < I > {
47
38
Decompositions {
39
+ kind : self :: DecompositionType :: Canonical ,
48
40
iter : iter,
41
+ done : false ,
49
42
buffer : Vec :: new ( ) ,
50
- sorted : false ,
51
- kind : self :: DecompositionType :: Canonical ,
43
+ ready : 0 ,
52
44
}
53
45
}
54
46
55
47
#[ inline]
56
48
pub fn new_compatible < I : Iterator < Item =char > > ( iter : I ) -> Decompositions < I > {
57
49
Decompositions {
50
+ kind : self :: DecompositionType :: Compatible ,
58
51
iter : iter,
52
+ done : false ,
59
53
buffer : Vec :: new ( ) ,
60
- sorted : false ,
61
- kind : self :: DecompositionType :: Compatible ,
54
+ ready : 0 ,
62
55
}
63
56
}
64
57
65
- impl < I : Iterator < Item =char > > Iterator for Decompositions < I > {
66
- type Item = char ;
67
-
58
+ impl < I > Decompositions < I > {
68
59
#[ inline]
69
- fn next ( & mut self ) -> Option < char > {
70
- use self :: DecompositionType :: * ;
71
-
72
- match self . buffer . first ( ) {
73
- Some ( & ( c, 0 ) ) => {
74
- self . sorted = false ;
75
- self . buffer . remove ( 0 ) ;
76
- return Some ( c) ;
77
- }
78
- Some ( & ( c, _) ) if self . sorted => {
79
- self . buffer . remove ( 0 ) ;
80
- return Some ( c) ;
81
- }
82
- _ => self . sorted = false
60
+ fn push_back ( & mut self , ch : char ) {
61
+ let class = super :: char:: canonical_combining_class ( ch) ;
62
+ if class == 0 {
63
+ self . sort_pending ( ) ;
83
64
}
65
+ self . buffer . push ( ( class, ch) ) ;
66
+ }
84
67
85
- if !self . sorted {
86
- for ch in self . iter . by_ref ( ) {
87
- let buffer = & mut self . buffer ;
88
- let sorted = & mut self . sorted ;
89
- {
90
- let callback = |d| {
91
- let class =
92
- super :: char:: canonical_combining_class ( d) ;
93
- if class == 0 && !* sorted {
94
- canonical_sort ( buffer) ;
95
- * sorted = true ;
96
- }
97
- buffer. push ( ( d, class) ) ;
98
- } ;
99
- match self . kind {
100
- Canonical => {
101
- super :: char:: decompose_canonical ( ch, callback)
102
- }
103
- Compatible => {
104
- super :: char:: decompose_compatible ( ch, callback)
105
- }
106
- }
107
- }
108
- if * sorted {
109
- break
110
- }
111
- }
68
+ #[ inline]
69
+ fn sort_pending ( & mut self ) {
70
+ if self . ready == 0 && self . buffer . is_empty ( ) {
71
+ return ;
112
72
}
113
73
114
- if !self . sorted {
115
- canonical_sort ( & mut self . buffer ) ;
116
- self . sorted = true ;
117
- }
74
+ // NB: `sort_by_key` is stable, so it will preserve the original text's
75
+ // order within a combining class.
76
+ self . buffer [ self . ready ..] . sort_by_key ( |k| k. 0 ) ;
77
+ self . ready = self . buffer . len ( ) ;
78
+ }
118
79
119
- if self . buffer . is_empty ( ) {
80
+ #[ inline]
81
+ fn pop_front ( & mut self ) -> Option < char > {
82
+ if self . ready == 0 {
120
83
None
121
84
} else {
122
- match self . buffer . remove ( 0 ) {
123
- ( c, 0 ) => {
124
- self . sorted = false ;
125
- Some ( c)
126
- }
127
- ( c, _) => Some ( c) ,
85
+ self . ready -= 1 ;
86
+ Some ( self . buffer . remove ( 0 ) . 1 )
87
+ }
88
+ }
89
+ }
90
+
91
+ impl < I : Iterator < Item =char > > Iterator for Decompositions < I > {
92
+ type Item = char ;
93
+
94
+ #[ inline]
95
+ fn next ( & mut self ) -> Option < char > {
96
+ while self . ready == 0 && !self . done {
97
+ match ( self . iter . next ( ) , & self . kind ) {
98
+ ( Some ( ch) , & DecompositionType :: Canonical ) => {
99
+ super :: char:: decompose_canonical ( ch, |d| self . push_back ( d) ) ;
100
+ } ,
101
+ ( Some ( ch) , & DecompositionType :: Compatible ) => {
102
+ super :: char:: decompose_compatible ( ch, |d| self . push_back ( d) ) ;
103
+ } ,
104
+ ( None , _) => {
105
+ self . sort_pending ( ) ;
106
+ self . done = true ;
107
+ } ,
128
108
}
129
109
}
110
+ self . pop_front ( )
130
111
}
131
112
132
113
fn size_hint ( & self ) -> ( usize , Option < usize > ) {
0 commit comments