8000 Check that translations fit in expected character type · jepler/circuitpython@9c11bb2 · GitHub
[go: up one dir, main page]

Skip to content

Commit 9c11bb2

Browse files
committed
Check that translations fit in expected character type
1 parent 4671658 commit 9c11bb2

File tree

1 file changed

+13
-2
lines changed

1 file changed

+13
-2
lines changed

py/maketranslationdata.py

Lines changed: 13 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -146,7 +146,10 @@ def iter_substrings(s, minlen, maxlen):
146146
yield s[begin : begin + n]
147147

148148

149-
def compute_huffman_coding(translations, f):
149+
translation_requires_uint16 = {"cs", "el", "fr", "ja", "ko", "pl", "ru", "tr", "zh_Latn_pinyin"}
150+
151+
152+
def compute_huffman_coding(translation_name, translations, f):
150153
texts = [t[1] for t in translations]
151154
words = []
152155

@@ -163,6 +166,12 @@ def compute_huffman_coding(translations, f):
163166

164167
bits_per_codepoint = 16 if max_ord > 255 else 8
165168
values_type = "uint16_t" if max_ord > 255 else "uint8_t"
169+
translation_name = translation_name.split("/")[-1].split(".")[0]
170+
if max_ord > 255 and translation_name not in translation_requires_uint16:
171+
raise ValueError(
172+
f"Translation {translation_name} expected to fit in 8 bits but required 16 bits"
173+
)
174+
166175
while len(words) < max_words:
167176
# Until the dictionary is filled to capacity, use a heuristic to find
168177
# the best "word" (2- to 11-gram) to add to it.
@@ -522,5 +531,7 @@ def output_translation_data(encoding_table, i18ns, out):
522531
i18ns = parse_input_headers(args.infiles)
523532
i18ns = sorted(i18ns)
524533
translations = translate(args.translation, i18ns)
525-
encoding_table = compute_huffman_coding(translations, args.compression_filename)
534+
encoding_table = compute_huffman_coding(
535+
args.translation, translations, args.compression_filename
536+
)
526537
output_translation_data(encoding_table, translations, args.translation_filename)

0 commit comments

Comments
 (0)
0