4
4
5
5
#[ deny( missing_docs) ]
6
6
#[ deny( clippy:: all) ]
7
+ use bit_field:: BitField ;
7
8
8
9
/// Possible errors returned by the API.
9
10
#[ derive( Debug , Copy , Clone ) ]
91
92
Ok ( ( ) )
92
93
}
93
94
95
+ /// Decode an input UCS-2 string into a UTF-8 string.
96
+ ///
97
+ /// The returned `usize` represents the length of the returned buffer,
98
+ /// in bytes.
99
+ pub fn decode ( input : & [ u16 ] , output : & mut [ u8 ] ) -> Result < usize > {
100
+ let buffer_size = output. len ( ) ;
101
+ let mut i = 0 ;
102
+
103
+ for & ch in input. iter ( ) {
104
+ /*
105
+ * We need to find how many bytes of UTF-8 this UCS-2 code-point needs. Because UCS-2 can only encode
106
+ * the Basic Multilingual Plane, a maximum of three bytes are needed.
107
+ */
108
+ if ( 0x0000 ..0x0080 ) . contains ( & ch) {
109
+ // Can be encoded in a single byte
110
+ if i >= buffer_size {
111
+ return Err ( Error :: BufferOverflow ) ;
112
+ }
113
+
114
+ output[ i] = ch as u8 ;
115
+ i += 1 ;
116
+ } else if ( 0x0080 ..0x0800 ) . contains ( & ch) {
117
+ // Can be encoded as two bytes
118
+ if ( i + 1 ) >= buffer_size {
119
+ return Err ( Error :: BufferOverflow ) ;
120
+ }
121
+
122
+ output[ i] = 0b11000000 + ch. get_bits ( 6 ..11 ) as u8 ;
123
+ output[ i + 1 ] = 0b10000000 + ch. get_bits ( 0 ..6 ) as u8 ;
124
+ i += 2 ;
125
+ } else {
126
+ // Can be encoded as three bytes
127
+ if ( i + 2 ) >= buffer_size {
128
+ return Err ( Error :: BufferOverflow ) ;
129
+ }
130
+
131
+ output[ i] = 0b11100000 + ch. get_bits ( 12 ..16 ) as u8 ;
132
+ output[ i + 1 ] = 0b10000000 + ch. get_bits ( 6 ..12 ) as u8 ;
133
+ output[ i + 2 ] = 0b10000000 + ch. get_bits ( 0 ..6 ) as u8 ;
134
+ i += 3 ;
135
+ }
136
+ }
137
+
138
+ Ok ( i)
139
+ }
140
+
94
141
#[ cfg( test) ]
95
142
mod tests {
96
143
use super :: * ;
@@ -105,4 +152,17 @@ mod tests {
105
152
106
153
assert_eq ! ( buffer[ ..] , [ 0x0151 , 0x044D , 0x254B ] ) ;
107
154
}
155
+
156
+ #[ test]
157
+ fn decoding ( ) {
158
+ let input = "$¢ह한" ;
159
+ let mut u16_buffer = [ 0u16 ; 4 ] ;
160
+ let result = encode ( input, & mut u16_buffer) ;
161
+ assert_eq ! ( result. unwrap( ) , 4 ) ;
162
+
163
+ let mut u8_buffer = [ 0u8 ; 9 ] ;
164
+ let result = decode ( & u16_buffer, & mut u8_buffer) ;
165
+ assert_eq ! ( result. unwrap( ) , 9 ) ;
166
+ assert_eq ! ( core:: str :: from_utf8( & u8_buffer[ 0 ..9 ] ) , Ok ( "$¢ह한" ) ) ;
167
+ }
108
168
}
0 commit comments