8000 Ensure `tables.rs` passes rustfmt · unicode-rs/unicode-normalization@16becba · GitHub
[go: up one dir, main page]

Skip to content

Commit 16becba

Browse files
Ensure tables.rs passes rustfmt
1 parent a6a221a commit 16becba

File tree

3 files changed

+9281
-24578
lines changed

3 files changed

+9281
-24578
lines changed

.github/workflows/rust.yml

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -65,6 +65,9 @@ jobs:
6565
runs-on: ubuntu-latest
6666
steps:
6767
- uses: actions/checkout@v3
68+
- uses: actions/setup-python@v5
69+
with:
70+
python-version: '3.12'
6871
- name: Regen
6972
run: cd scripts && python3 unicode.py
7073
- name: Diff tables

scripts/unicode.py

Lines changed: 26 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@
2020
# out-of-line and check the tables.rs and normalization_tests.rs files into git.
2121
import collections
2222
import urllib.request
23+
from itertools import batched
2324

2425
UNICODE_VERSION = "15.1.0"
2526
UCD_URL = "https://www.unicode.org/Public/%s/ucd/" % UNICODE_VERSION
@@ -354,20 +355,26 @@ def is_first_and_last(first, last):
354355
return False
355356
return first[1:-8] == last[1:-7]
356357

357-
def gen_mph_data(name, d, kv_type, kv_callback):
358+
def gen_mph_data(name, d, kv_type, kv_callback, kv_row_width):
358359
(salt, keys) = minimal_perfect_hash(d)
359-
out.write("pub(crate) const %s_SALT: &[u16] = &[\n" % name.upper())
360-
for s in salt:
361-
out.write(" 0x{:x},\n".format(s))
360+
out.write(f"\npub(crate) const {name.upper()}_SALT: &[u16] = &[\n")
361+
for s_row in batched(salt, 13):
362+
out.write(" ")
363+
for s in s_row:
364+
out.write(f" 0x{s:03X},")
365+
out.write("\n")
366+
out.write("];\n")
367+
out.write(f"pub(crate) const {name.upper()}_KV: &[{kv_type}] = &[\n")
368+
for k_row in batched(keys, kv_row_width):
369+
out.write(" ")
370+
for k in k_row:
371+
out.write(f" {kv_callback(k)},")
372+
out.write("\n")
362373
out.write("];\n")
363-
out.write("pub(crate) const {}_KV: &[{}] = &[\n".format(name.upper(), kv_type))
364-
for k in keys:
365-
out.write(" {},\n".format(kv_callback(k)))
366-
out.write("];\n\n")
367374

368375
def gen_combining_class(combining_classes, out):
369376
gen_mph_data('canonical_combining_class', combining_classes, 'u32',
370-
lambda k: "0x{:X}".format(int(combining_classes[k]) | (k << 8)))
377+
lambda k: f"0x{int(combining_classes[k]) | (k << 8):07X}", 8)
371378

372379
def gen_composition_table(canon_comp, out):
373380
table = {}
@@ -376,7 +383,7 @@ def gen_composition_table(canon_comp, out):
376383
table[(c1 << 16) | c2] = c3
377384
(salt, keys) = minimal_perfect_hash(table)
378385
gen_mph_data('COMPOSITION_TABLE', table, '(u32, char)',
379-
lambda k: "(0x%s, '\\u{%s}')" % (hexify(k), hexify(table[k])))
386+
lambda k: f"(0x{k:08X}, '\\u{{{table[k]:06X}}}')", 1)
380387

381388
out.write("pub(crate) fn composition_table_astral(c1: char, c2: char) -> Option<char> {\n")
382389
out.write(" match (c1, c2) {\n")
@@ -403,7 +410,7 @@ def gen_decomposition_tables(canon_decomp, compat_decomp, cjk_compat_variants_de
403410
assert offset < 65536
404411
out.write("];\n")
405412
gen_mph_data(name + '_decomposed', table, "(u32, (u16, u16))",
406-
lambda k: "(0x{:x}, ({}, {}))".format(k, offsets[k], len(table[k])))
413+
lambda k: f"(0x{k:05X}, (0x{offsets[k]:03X}, 0x{len(table[k]):X}))", 1)
407414

408415
def gen_qc_match(prop_table, out):
409416
out.write(" match c {\n")
@@ -421,7 +428,7 @@ def gen_qc_match(prop_table, out):
421428
out.write(" }\n")
422429

423430
def gen_nfc_qc(prop_tables, out):
424-
out.write("#[inline]\n")
431+
out.write("\n#[inline]\n")
425432
out.write("#[allow(ellipsis_inclusive_range_patterns)]\n")
426433
out.write("pub fn qc_nfc(c: char) -> IsNormalized {\n")
427434
gen_qc_match(prop_tables['NFC_QC'], out)
@@ -450,7 +457,7 @@ def gen_nfkd_qc(prop_tables, out):
450457

451458
def gen_combining_mark(general_category_mark, out):
452459
gen_mph_data('combining_mark', general_category_mark, 'u32',
453-
lambda k: '0x{:04x}'.format(k))
460+
lambda k: '0x{:05X}'.format(k), 10)
454461

455462
def gen_public_assigned(general_category_public_assigned, out):
456463
# This could be done as a hash but the table is somewhat small.
@@ -464,17 +471,16 @@ def gen_public_assigned(general_category_public_assigned, out):
464471
out.write(" ")
465472
start = False
466473
else:
467-
out.write(" | ")
474+
out.write("\n | ")
468475
if first == last:
469-
out.write("'\\u{%s}'\n" % hexify(first))
476+
out.write("'\\u{%s}'" % hexify(first))
470477
else:
471-
out.write("'\\u{%s}'..='\\u{%s}'\n" % (hexify(first), hexify(last)))
472-
out.write(" => true,\n")
478+
out.write("'\\u{%s}'..='\\u{%s}'" % (hexify(first), hexify(last)))
479+
out.write(" => true,\n")
473480

474481
out.write(" _ => false,\n")
475482
out.write(" }\n")
476483
out.write("}\n")
477-
out.write("\n")
478484

479485
def gen_stream_safe(leading, trailing, out):
480486
# This could be done as a hash but the table is very small.
@@ -488,10 +494,9 @@ def gen_stream_safe(leading, trailing, out):
488494
out.write(" _ => 0,\n")
489495
out.write(" }\n")
490496
out.write("}\n")
491-
out.write("\n")
492497

493498
gen_mph_data('trailing_nonstarters', trailing, 'u32',
494-
lambda k: "0x{:X}".format(int(trailing[k]) | (k << 8)))
499+
lambda k: f"0x{int(trailing[k]) | (k << 8):07X}", 8)
495500

496501
def gen_tests(tests, out):
497502
out.write("""#[derive(Debug)]
@@ -579,43 +584,33 @@ def minimal_perfect_hash(d):
579584
data = UnicodeData()
580585
with open("tables.rs", "w", newline = "\n") as out:
581586
out.write(PREAMBLE)
582-
out.write("#![cfg_attr(rustfmt, rustfmt::skip)]\n")
583587
out.write("use crate::quick_check::IsNormalized;\n")
584588
out.write("use crate::quick_check::IsNormalized::*;\n")
585589
out.write("\n")
586590

587591
version = "(%s, %s, %s)" % tuple(UNICODE_VERSION.split("."))
588592
out.write("#[allow(unused)]\n")
589-
out.write("pub const UNICODE_VERSION: (u8, u8, u8) = %s;\n\n" % version)
593+
out.write("pub const UNICODE_VERSION: (u8, u8, u8) = %s;\n" % version)
590594

591595
gen_combining_class(data.combining_classes, out)
592-
out.write("\n")
593596

594597
gen_composition_table(data.canon_comp, out)
595-
out.write("\n")
596598

597599
gen_decomposition_tables(data.canon_fully_decomp, data.compat_fully_decomp, data.cjk_compat_variants_fully_decomp, out)
598600

599601
gen_combining_mark(data.general_category_mark, out)
600-
out.write("\n")
601602

602603
gen_public_assigned(data.general_category_public_assigned, out)
603-
out.write("\n")
604604

605605
gen_nfc_qc(data.norm_props, out)
606-
out.write("\n")
607606

608607
gen_nfkc_qc(data.norm_props, out)
609-
out.write("\n")
610608

611609
gen_nfd_qc(data.norm_props, out)
612-
out.write("\n")
613610

614611
gen_nfkd_qc(data.norm_props, out)
615-
out.write("\n")
616612

617613
gen_stream_safe(data.ss_leading, data.ss_trailing, out)
618-
out.write("\n")
619614

620615
with open("normalization_tests.rs", "w", newline = "\n") as out:
621616
out.write(PREAMBLE)

0 commit comments

Comments
 (0)
0