8000 Treat ambiguous `Modifier_Letter`s as narrow · unicode-rs/unicode-width@5a5c031 · GitHub
[go: up one dir, main page]

Skip to content

File tree

4 files changed

+19
-17
lines changed

4 files changed

+19
-17
lines changed

scripts/unicode.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -442,10 +442,10 @@ def load_east_asian_widths() -> list[EastAsianWidth]:
442442
),
443443
)
444444

445-
# Ambiguous `Modifier_Symbol`s are narrow
445+
# Ambiguous `Modifier_Letter`s and `Modifier_Symbol`s are narrow
446446
load_property(
447447
"extracted/DerivedGeneralCategory.txt",
448-
"Sk",
448+
r"(:?Lm|Sk)",
449449
lambda cp: (
450450
operator.setitem(width_map, cp, EastAsianWidth.NARROW)
451451
if width_map[cp] == EastAsianWidth.AMBIGUOUS

src/lib.rs

Lines changed: 1 addition & 1 deletion
Original file line numberD 10000 iff line numberDiff line change
@@ -122,7 +122,7 @@
122122
//! - Has an [`East_Asian_Width`] of [`Ambiguous`], or
123123
//! has a canonical decomposition to an [`Ambiguous`] character followed by [`'\u{0338}'` COMBINING LONG SOLIDUS OVERLAY], or
124124
//! is [`'\u{0387}'` GREEK ANO TELEIA](https://util.unicode.org/UnicodeJsps/character.jsp?a=0387), and
125-
//! - Does not have a [`General_Category`] of `Modifier_Symbol`, and
125+
//! - Does not have a [`General_Category`] of `Modifier_Letter` or `Modifier_Symbol`, and
126126
//! - Does not have a [`Script`] of `Latin`, `Greek`, or `Cyrillic`, or is a Roman numeral in the range `'\u{2160}'..='\u{217F}'`.
127127
//! 7. All other characters have width 1.
128128
//!

src/tables.rs

Lines changed: 8 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -1022,17 +1022,17 @@ static WIDTH_MIDDLE: Align64<[[u8; 64]; WIDTH_MIDDLE_LEN]> = Align64([
10221022
],
10231023
#[cfg(feature = "cjk")]
10241024
[
1025-
0x00, 0x9D, 0x02, 0x02, 0x02, 0x9E, 0x9F, 0xA0, 0x02, 0x04, 0x02, 0x05, 0x06, 0x07, 0x08,
1025+
0x00, 0x9D, 0x02, 0x02, 0x02, 0x02, 0x9E, 0x9F, 0x02, 0x04, 0x02, 0x05, 0x06, 0x07, 0x08,
10261026
0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F, 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17,
10271027
0x18, 0x19, 0x1A, 0x1B, 0x1C, 0x1D, 0x02, 0x02, 0x1E, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
10281028
0x02, 0x1F, 0x20, 0x21, 0x22, 0x23, 0x02, 0x24, 0x25, 0x26, 0x27, 0x28, 0x29, 0x02, 0x2A,
10291029
0x02, 0x02, 0x02, 0x02,
10301030
],
10311031
#[cfg(feature = "cjk")]
10321032
[
1033-
0xA1, 0xA2, 0xA3, 0xA4, 0xA5, 0xA6, 0xA7, 0x2E, 0xA8, 0xA9, 0xAA, 0xAB, 0xAC, 0xAD, 0xAE,
1034-
0x33, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0xAF, 0x02, 0x02, 0x35, 0x36, 0x37, 0x02, 0x38,
1035-
0x39, 0x3A, 0x3B, 0x3C, 0x3D, 0x3E, 0xB0, 0x39, 0x39, 0x39, 0x39, 0x39, 0x39, 0x39, 0x39,
1033+
0xA0, 0xA1, 0xA2, 0xA3, 0xA4, 0xA5, 0xA6, 0x2E, 0xA7, 0xA8, 0xA9, 0xAA, 0xAB, 0xAC, 0xAD,
1034+
0x33, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0xAE, 0x02, 0x02, 0x35, 0x36, 0x37, 0x02, 0x38,
1035+
0x39, 0x3A, 0x3B, 0x3C, 0x3D, 0x3E, 0xAF, 0x39, 0x39, 0x39, 0x39, 0x39, 0x39, 0x39, 0x39,
10361036
0x39, 0x39, 0x39, 0x39, 0x39, 0x39, 0x39, 0x39, 0x39, 0x39, 0x39, 0x39, 0x39, 0x39, 0x39,
10371037
0x39, 0x39, 0x39, 0x39,
10381038
],
@@ -1042,23 +1042,23 @@ static WIDTH_MIDDLE: Align64<[[u8; 64]; WIDTH_MIDDLE_LEN]> = Align64([
10421042
0x39, 0x39, 0x39, 0x39, 0x39, 0x39, 0x39, 0x39, 0x39, 0x39, 0x39, 0x39, 0x39, 0x39, 0x39,
10431043
0x39, 0x39, 0x39, 0x39, 0x39, 0x39, 0x39, 0x39, 0x39, 0x39, 0x39, 0x39, 0x39, 0x39, 0x39,
10441044
0x39, 0x39, 0x39, 0x39, 0x39, 0x39, 0x39, 0x39, 0x39, 0x4C, 0x02, 0x02, 0x02, 0x02, 0x02,
1045-
0xB1, 0x4E, 0x4F, 0xB2,
1045+
0xB0, 0x4E, 0x4F, 0xB1,
10461046
],
10471047
#[cfg(feature = "cjk")]
10481048
[
10491049
0x85, 0x86, 0x75, 0x02, 0x02, 0x87, 0x02, 0x02, 0x02, 0x88, 0x02, 0x02, 0x02, 0x02, 0x02,
10501050
0x02, 0x02, 0x89, 0x8A, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
1051-
0x02, 0x02, 0x8B, 0x8C, 0xB3, 0xB4, 0x8E, 0x02, 0x8F, 0x90, 0x91, 0x92, 0x93, 0x94, 0x95,
1051+
0x02, 0x02, 0x8B, 0x8C, 0xB2, 0xB3, 0x8E, 0x02, 0x8F, 0x90, 0x91, 0x92, 0x93, 0x94, 0x95,
10521052
0x96, 0x02, 0x97, 0x02, 0x02, 0x98, 0x99, 0x9A, 0x9B, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
10531053
0x02, 0x02, 0x02, 0x02,
10541054
],
10551055
]);
10561056

10571057
#[cfg(feature = "cjk")]
1058-
const WIDTH_LEAVES_LEN: usize = 181;
1058+
const WIDTH_LEAVES_LEN: usize = 180;
10591059
#[cfg(not(feature = "cjk"))]
10601060
const WIDTH_LEAVES_LEN: usize = 157;
1061-
/// Autogenerated. 181 sub-table(s). Consult [`lookup_width`] for layout info.
1061+
/// Autogenerated. 180 sub-table(s). Consult [`lookup_width`] for layout info.
10621062
static WIDTH_LEAVES: Align32<[[u8; 32]; WIDTH_LEAVES_LEN]> = Align32([
10631063
[
10641064
0x55, 0x55, 0x75, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55,
@@ -1852,12 +1852,6 @@ static WIDTH_LEAVES: Align32<[[u8; 32]; WIDTH_LEAVES_LEN]> = Align32([
18521852
0x55, 0x55,
18531853
],
18541854
#[cfg(feature = "cjk")]
1855-
[
1856-
0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55,
1857-
0x55, 0x55, 0x95, 0xA9, 0x59, 0x56, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55,
1858-
0x55, 0x55,
1859-
],
1860-
#[cfg(feature = "cjk")]
18611855
[
18621856
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x03,
18631857
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x55, 0x55,

tests/tests.rs

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -78,6 +78,14 @@ fn test_default_ignorable() {
7878
assert_width!('\u{E0000}', Some(0), Some(0));
7979
}
8080

81+
#[test]
82+
fn test_ambiguous() {
83+
assert_width!("\u{B7}", 1, 2);
84+
assert_width!("\u{0387}", 1, 2);
85+
assert_width!("\u{A8}", 1, 1);
86+
assert_width!("\u{02C9}", 1, 1);
87+
}
88+
8189
#[test]
8290
fn test_jamo() {
8391
assert_width!('\u{1100}', Some(2), Some(2));

0 commit comments

Comments
 (0)
0