|
2 | 2 | // |
3 | 3 | // For the full copyright and license information, please view the LICENSE |
4 | 4 | // file that was distributed with this source code. |
5 | | -use std::str::from_utf8; |
6 | 5 |
|
7 | 6 | use crate::formatteriteminfo::{FormatWriter, FormatterItemInfo}; |
8 | 7 |
|
@@ -51,33 +50,13 @@ fn format_item_c(bytes: &[u8]) -> String { |
51 | 50 | let b = bytes[0]; |
52 | 51 |
|
53 | 52 | if b & 0x80 == 0x00 { |
| 53 | + // // ASCII byte (0xxxxxxx) |
54 | 54 | match C_CHARS.get(b as usize) { |
55 | 55 | Some(s) => format!("{s:>4}"), |
56 | 56 | None => format!("{b:>4}"), |
57 | 57 | } |
58 | | - } else if (b & 0xc0) == 0x80 { |
59 | | - // second or subsequent octet of an utf-8 sequence |
60 | | - String::from(" **") |
61 | | - } else if ((b & 0xe0) == 0xc0) && (bytes.len() >= 2) { |
62 | | - // start of a 2 octet utf-8 sequence |
63 | | - match from_utf8(&bytes[0..2]) { |
64 | | - Ok(s) => format!("{s:>4}"), |
65 | | - Err(_) => format!(" {b:03o}"), |
66 | | - } |
67 | | - } else if ((b & 0xf0) == 0xe0) && (bytes.len() >= 3) { |
68 | | - // start of a 3 octet utf-8 sequence |
69 | | - match from_utf8(&bytes[0..3]) { |
70 | | - Ok(s) => format!("{s:>4}"), |
71 | | - Err(_) => format!(" {b:03o}"), |
72 | | - } |
73 | | - } else if ((b & 0xf8) == 0xf0) && (bytes.len() >= 4) { |
74 | | - // start of a 4 octet utf-8 sequence |
75 | | - match from_utf8(&bytes[0..4]) { |
76 | | - Ok(s) => format!("{s:>4}"), |
77 | | - Err(_) => format!(" {b:03o}"), |
78 | | - } |
79 | 58 | } else { |
80 | | - // invalid utf-8 |
| 59 | + // Continuation or leading byte of a multibyte UTF-8 sequence — treat as raw byte |
81 | 60 | format!(" {b:03o}") |
82 | 61 | } |
83 | 62 | } |
@@ -125,27 +104,22 @@ fn test_format_item_c() { |
125 | 104<
7440
/td> | assert_eq!(" 177", format_item_c(&[0x7f])); |
126 | 105 | assert_eq!(" A", format_item_c(&[0x41, 0x21])); |
127 | 106 |
|
128 | | - assert_eq!(" **", format_item_c(&[0x80])); |
129 | | - assert_eq!(" **", format_item_c(&[0x9f])); |
| 107 | + assert_eq!(" 200", format_item_c(&[0x80])); |
| 108 | + assert_eq!(" 237", format_item_c(&[0x9f])); |
130 | 109 |
|
131 | | - assert_eq!(" ß", format_item_c(&[0xc3, 0x9f])); |
132 | | - assert_eq!(" ß", format_item_c(&[0xc3, 0x9f, 0x21])); |
| 110 | + assert_eq!(" 303", format_item_c(&[0xc3, 0x9f])); |
| 111 | + assert_eq!(" 303", format_item_c(&[0xc3, 0x9f, 0x21])); |
133 | 112 |
|
134 | | - assert_eq!(" \u{1000}", format_item_c(&[0xe1, 0x80, 0x80])); |
135 | | - assert_eq!(" \u{1000}", format_item_c(&[0xe1, 0x80, 0x80, 0x21])); |
| 113 | + assert_eq!(" 341", format_item_c(&[0xe1, 0x80, 0x80])); |
136 | 114 |
|
137 | | - assert_eq!(" \u{1f496}", format_item_c(&[0xf0, 0x9f, 0x92, 0x96])); |
138 | | - assert_eq!( |
139 | | - " \u{1f496}", |
140 | | - format_item_c(&[0xf0, 0x9f, 0x92, 0x96, 0x21]) |
141 | | - ); |
| 115 | + assert_eq!(" 360", format_item_c(&[0xf0, 0x9f, 0x92, 0x96])); |
142 | 116 |
|
143 | 117 | assert_eq!(" 300", format_item_c(&[0xc0, 0x80])); // invalid utf-8 (UTF-8 null) |
144 | 118 | assert_eq!(" 301", format_item_c(&[0xc1, 0xa1])); // invalid utf-8 |
145 | 119 | assert_eq!(" 303", format_item_c(&[0xc3, 0xc3])); // invalid utf-8 |
146 | 120 | assert_eq!(" 360", format_item_c(&[0xf0, 0x82, 0x82, 0xac])); // invalid utf-8 (overlong) |
147 | 121 | assert_eq!(" 360", format_item_c(&[0xf0, 0x9f, 0x92])); // invalid utf-8 (missing octet) |
148 | | - assert_eq!(" \u{10FFFD}", format_item_c(&[0xf4, 0x8f, 0xbf, 0xbd])); // largest valid utf-8 // spell-checker:ignore 10FFFD FFFD |
| 122 | + assert_eq!(" 364", format_item_c(&[0xf4, 0x8f, 0xbf, 0xbd])); // largest valid utf-8 // spell-checker:ignore 10FFFD FFFD |
149 | 123 | assert_eq!(" 364", format_item_c(&[0xf4, 0x90, 0x00, 0x00])); // invalid utf-8 |
150 | 124 | assert_eq!(" 365", format_item_c(&[0xf5, 0x80, 0x80, 0x80])); // invalid utf-8 |
151 | 125 | assert_eq!(" 377", format_item_c(&[0xff])); // invalid utf-8 |
|
0 commit comments