8000 Add method to decode UCS-2 (#7) · rust-osdev/ucs2-rs@e0df913 · GitHub
[go: up one dir, main page]

Skip to content

Commit e0df913

Browse files
IsaacWoodsGabrielMajeri
authored andcommitted
Add method to decode UCS-2 (#7)
* Switch to 2018 edition * Add method to decode UCS-2 into UTF-8 * Add decoding test * Add docs to decode method * Bump version and add author field
1 parent f921425 commit e0df913

File tree

2 files changed

+66
-2
lines changed

2 files changed

+66
-2
lines changed

Cargo.toml

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,17 @@
11
[package]
22
name = "ucs2"
3-
version = "0.2.0"
4-
authors = ["Gabriel Majeri <gabriel.majeri6@gmail.com>", "Fredrik Aleksander"]
3+
version = "0.3.0"
4+
authors = ["Gabriel Majeri <gabriel.majeri6@gmail.com>", "Fredrik Aleksander", "Isaac Woods"]
55
description = "UCS-2 decoding and encoding functions"
66
repository = "https://github.com/GabrielMajeri/ucs2-rs"
77
readme = "README.md"
88
keywords = ["ucs2", "no-std", "encoding"]
99
categories = ["encoding", "no-std"]
1010
license = "MPL-2.0"
11+
edition = "2018"
12+
13+
[dependencies]
14+
bit_field = "0.10"
1115

1216
[badges]
1317
is-it-maintained-issue-resolution = { repository = "https://github.com/GabrielMajeri/ucs2-rs" }

src/lib.rs

Lines changed: 60 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44

55
#[deny(missing_docs)]
66
#[deny(clippy::all)]
7+
use bit_field::BitField;
78

89
/// Possible errors returned by the API.
910
#[derive(Debug, Copy, Clone)]
@@ -91,6 +92,52 @@ where
9192
Ok(())
9293
}
9394

95+
/// Decode an input UCS-2 string into a UTF-8 string.
96+
///
97+
/// The returned `usize` represents the length of the returned buffer,
98+
/// in bytes.
99+
pub fn decode(input: &[u16], output: &mut [u8]) -> Result<usize> {
100+
let buffer_size = output.len();
101+
let mut i = 0;
102+
103+
for &ch in input.iter() {
104+
/*
105+
* We need to find how many bytes of UTF-8 this UCS-2 code-point needs. Because UCS-2 can only encode
106+
* the Basic Multilingual Plane, a maximum of three bytes are needed.
107+
*/
108+
if (0x0000..0x0080).contains(&ch) {
109+
// Can be encoded in a single byte
110+
if i >= buffer_size {
111+
return Err(Error::BufferOverflow);
112+
}
113+
114+
output[i] = ch as u8;
115+
i += 1;
116+
} else if (0x0080..0x0800).contains(&ch) {
117+
// Can be encoded as two bytes
118+
if (i + 1) >= buffer_size {
119+
return Err(Error::BufferOverflow);
120+
}
121+
122+
output[i] = 0b11000000 + ch.get_bits(6..11) as u8;
123+
output[i + 1] = 0b10000000 + ch.get_bits(0..6) as u8;
124+
i += 2;
125+
} else {
126+
// Can be encoded as three bytes
127+
if (i + 2) >= buffer_size {
128+
return Err(Error::BufferOverflow);
129+
}
130+
131+
output[i] = 0b11100000 + ch.get_bits(12..16) as u8;
132+
output[i + 1] = 0b10000000 + ch.get_bits(6..12) as u8;
133+
output[i + 2] = 0b10000000 + ch.get_bits(0..6) as u8;
134+
i += 3;
135+
}
136+
}
137+
138+
Ok(i)
139+
}
140+
94141
#[cfg(test)]
95142
mod tests {
96143
use super::*;
@@ -105,4 +152,17 @@ mod tests {
105152

106153
assert_eq!(buffer[..], [0x0151, 0x044D, 0x254B]);
107154
}
155+
156+
#[test]
157+
fn decoding() {
158+
let input = "$¢ह한";
159+
let mut u16_buffer = [0u16; 4];
160+
let result = encode(input, &mut u16_buffer);
161+
assert_eq!(result.unwrap(), 4);
162+
163+
let mut u8_buffer = [0u8; 9];
164+
let result = decode(&u16_buffer, &mut u8_buffer);
165+
assert_eq!(result.unwrap(), 9);
166+
assert_eq!(core::str::from_utf8(&u8_buffer[0..9]), Ok("$¢ह한"));
167+
}
108168
}

0 commit comments

Comments
 (0)
0