35
35
// NOTE: The following code was generated by "scripts/unicode.py", do not edit directly
36
36
37
37
#![allow(missing_docs, non_upper_case_globals, non_snake_case)]
38
+
39
+ use super::ScriptExtension;
38
40
'''
39
41
40
42
UNICODE_VERSION = (12 , 0 , 0 )
@@ -183,44 +185,69 @@ def emit_search(f):
183
185
}
184
186
""" )
185
187
186
- def emit_enums (f , script_list , extension_list , longforms , intersections ):
188
+ def emit_enums (f , script_list , extension_list , longforms ):
187
189
"""
188
190
Emit the Script and ScriptExtension enums as well as any related utility functions
189
191
"""
192
+
190
193
f .write ("""
191
194
#[derive(Clone, Copy, PartialEq, Eq, Debug, Hash)]
192
195
#[non_exhaustive]
193
196
#[allow(non_camel_case_types)]
197
+ #[repr(u8)]
194
198
/// A value of the `Script` property
195
199
pub enum Script {
196
200
/// Unknown script
197
- Unknown,
201
+ Unknown = 0xFF,
202
+ /// Zyyy
203
+ Common = 0xFE,
204
+ /// Zinh,
205
+ Inherited = 0xFD,
198
206
""" )
199
- for script in script_list :
200
- f .write (" /// %s\n %s,\n " % (script , longforms [script ]))
201
- f .write ("""}
202
- #[derive(Clone, Copy, PartialEq, Eq, Debug, Hash)]
203
- #[non_exhaustive]
204
- /// A value for the `Script_Extension` property
205
- ///
206
- /// [`ScriptExtension`] is one or more [`Script`]
207
- ///
208
- /// This is essentially an optimized version of `Vec<Script>`,
209
- /// optimized by script sets and intersections actually present in Unicode.
210
- pub enum ScriptExtension {
211
- /// A single script
212
- Single(Script),
207
+ for (i , script ) in enumerate (script_list ):
208
+ f .write (" /// %s\n %s = %s,\n " % (script , longforms [script ], i ))
209
+ f .write ("}\n " )
210
+ f .write ("pub const NEXT_SCRIPT: u8 = %s;" % len (script_list ))
211
+ f .write ("""
212
+
213
+ pub mod script_extensions {
214
+ use crate::ScriptExtension;
215
+ pub const COMMON: ScriptExtension = ScriptExtension::new_common();
216
+ pub const INHERITED: ScriptExtension = ScriptExtension::new_inherited();
217
+ pub const UNKNOWN: ScriptExtension = ScriptExtension::new_unknown();
213
218
""" )
219
+ for (i , script ) in enumerate (script_list ):
220
+ first = 0
221
+ second = 0
222
+ third = 0
223
+ # need to replace L because `hex()` will spit out an L suffix for larger numbers
224
+ if i < 64 :
225
+ first = hex (1 << i ).replace ("L" , "" )
226
+ elif i < 128 :
227
+ second = hex (1 << (i - 64 )).replace ("L" , "" )
228
+ else :
229
+ third = hex (1 << (i - 128 )).replace ("L" , "" )
230
+ f .write (" /// %s\n pub const %s: ScriptExtension = ScriptExtension::new(%s, %s, %s);\n " %
231
+ (longforms [script ], longforms [script ].upper (), first , second , third ))
232
+ if script != longforms [script ]:
233
+ f .write (" /// %s\n pub const %s: ScriptExtension = %s;\n " %
234
+ (longforms [script ], script .upper (), longforms [script ].upper ()))
214
235
for ext in extension_list :
215
236
longform = ", " .join ([longforms [s ] for s in ext ])
216
- f .write (" /// %s\n %s,\n " % (longform , "" .join (ext )))
237
+ name = "_" .join ([s .upper () for s in ext ])
238
+ expr = ext [0 ].upper ()
239
+ for e in ext [1 :]:
240
+ expr = "%s.union(%s)" % (expr , e .upper ())
241
+ f .write (" /// %s\n pub const %s: ScriptExtension = %s;\n " % (longform , name , expr ))
217
242
f .write ("""}
218
243
219
244
impl Script {
220
245
#[inline]
221
246
pub(crate) fn inner_full_name(self) -> &'static str {
222
247
match self {
223
248
Script::Unknown => "Unknown",
249
+ Script::Common => "Common",
250
+ Script::Inherited => "Inherited",
224
251
""" )
225
252
for script in script_list :
226
253
f .write (" Script::%s => \" %s\" ,\n " % (longforms [script ], longforms [script ]))
@@ -231,119 +258,29 @@ def emit_enums(f, script_list, extension_list, longforms, intersections):
231
258
pub(crate) fn inner_short_name(self) -> &'static str {
232
259
match self {
233
260
Script::Unknown => "",
261
+ Script::Common => "Zyyy",
262
+ Script::Inherited => "Zinh",
234
263
""" )
235
264
for script in script_list :
236
265
f .write (" Script::%s => \" %s\" ,\n " % (longforms [script ], script ))
237
266
f .write (""" }
238
267
}
239
- }
240
-
241
- impl ScriptExtension {
242
- #[inline]
243
- #[cfg(feature = "with_std")]
244
- pub(crate) fn inner_scripts(self) -> Vec<Script> {
245
- match self {
246
- ScriptExtension::Single(s) => vec![s],
247
- """ )
248
- for ext in extension_list :
249
- scripts = ", " .join (["Script::%s" % longforms [s ] for s in ext ])
250
- f .write (" %s => vec![%s],\n " % (extension_name (ext ), scripts ))
251
- f .write (""" _ => unreachable!()
252
- }
253
- }
254
-
255
- #[inline]
256
- pub(crate) fn inner_contains_script(self, other: Script) -> bool {
257
- match self {
258
- ScriptExtension::Single(s) => s == other,
259
- """ )
260
- for ext in extension_list :
261
- scripts = " || " .join (["other == Script::%s" % longforms [s ] for s in ext ])
262
- f .write (" %s => %s,\n " % (extension_name (ext ), scripts ))
263
- f .write (""" }
264
- }
265
268
266
269
#[inline]
267
- pub(crate) fn inner_intersect(self, other: Self) -> Self {
268
- match (self, other) {
269
- (ScriptExtension::Single(Script::Unknown), _) |
270
- (_, ScriptExtension::Single(Script::Unknown)) => ScriptExtension::Single(Script::Unknown),
271
- (a, b) if a == b => a,
272
- (ScriptExtension::Single(Script::Common), a) |
273
- (ScriptExtension::Single(Script::Inherited), a) |
274
- (a, ScriptExtension::Single(Script::Common)) |
275
- (a, ScriptExtension::Single(Script::Inherited)) => a,
276
- (ScriptExtension::Single(s), o) | (o, ScriptExtension::Single(s)) if o.inner_contains_script(s) => ScriptExtension::Single(s),
270
+ pub(crate) fn for_integer(value: u8) -> Self {
271
+ match value {
277
272
""" )
278
- for (e1 , e2 , i ) in intersections :
279
- f .write (" (%s, %s) => %s,\n " % (extension_name ( e1 ), extension_name ( e2 ), extension_name ( i , longforms ) ))
280
- f .write (""" _ => ScriptExtension::Single(Script::Unknown ),
273
+ for (i , script ) in enumerate ( script_list ) :
274
+ f .write (" %s => Script:: %s,\n " % (i , longforms [ script ] ))
275
+ f .write (""" _ => unreachable!( ),
281
276
}
282
277
}
283
278
}
284
279
""" )
285
280
286
-
287
- def compute_intersections_elements (extension_list ):
288
- """
289
- Compute all intersections between the script extensions.
290
- This will add new elements to extension_list, be sure to call it first!
291
- """
292
-
293
- # This is the only third-level intersection
294
- # It's easier to hardcode things here rather than
295
- # do the below calculation in a loop
296
- extension_list .append (['Deva' , 'Knda' , 'Tirh' ])
297
- intersections = []
298
- # Some intersections will not exist in extension_list and we'll need to add them
299
- new_elements = []
300
- sets = [(e , set (e )) for e in extension_list ]
301
- for (e1 , s1 ) in sets :
302
- for (e2 , s2 ) in sets :
303
- if e1 == e2 :
304
- continue
305
- intersection = s1 .in
1CF5
tersection (s2 )
306
- if len (intersection ) > 0 :
307
- intersection = [i for i in intersection ]
308
- intersection .sort ()
309
- if len (intersection ) > 1 and intersection not in extension_list and intersection not in new_elements :
310
- new_elements .append (intersection )
311
- if (e1 , e2 , intersection ) not in intersections :
312
- intersections .append ((e1 , e2 , intersection ))
313
- extension_list .extend (new_elements )
314
-
315
- # We now go through the newly added second-level extension values and calculate their intersections
316
- # with the original set and each other
317
- new_sets = [(e , set (e )) for e in new_elements ]
318
- sets = [(e , set (e )) for e in extension_list ]
319
- for (e1 , s1 ) in new_sets :
320
- for (e2 , s2 ) in sets :
321
- if e1 == e2 :
322
- continue
323
- intersection = s1 .intersection (s2 )
324
- if len (intersection ) > 0 :
325
- intersection = [i for i in intersection ]
326
- intersection .sort ()
327
- if len (intersection ) > 1 and intersection not in extension_list :
328
- raise "Found new third-level intersection, please hardcode it"
329
- # The previous routine would automatically get both versions
330
- # of an intersection because it would iterate each pair in both orders,
331
- # but here we're working on an asymmetric pair, so we insert both in order to not
332
- # miss anything
333
- if (e1 , e2 , intersection ) not in intersections :
334
- intersections .append ((e1 , e2 , intersection ))
335
- if (e2 , e1 , intersection ) not in intersections :
336
- intersections .append ((e2 , e1 , intersection ))
337
-
338
- intersections .sort ()
339
- return intersections
340
-
341
- def extension_name (ext , longforms = {}):
281
+ def extension_name (ext ):
342
282
"""Get the rust source for a given ScriptExtension"""
343
- if len (ext ) == 1 :
344
- return "ScriptExtension::Single(Script::%s)" % longforms [ext [0 ]]
345
- else :
346
- return "ScriptExtension::%s" % "" .join (ext )
283
+ return "script_extensions::%s" % "_" .join ([e .upper () for e in ext ])
347
284
348
285
349
286
@@ -370,8 +307,10 @@ def extension_name(ext, longforms={}):
370
307
script_list = []
371
308
372
309
for script in scripts :
373
- script_list .append (shortforms [script ])
310
+ if script not in ["Common" , "Unknown" , "Inherited" ]:
311
+ script_list .append (shortforms [script ])
374
312
script_table .extend ([(x , y , shortforms [script ]) for (x , y ) in scripts [script ]])
313
+ script_list .sort ()
375
314
script_table .sort (key = lambda w : w [0 ])
376
315
377
316
@@ -389,14 +328,13 @@ def extension_name(ext, longforms={}):
389
328
extension_table .extend ([(x , y , output_ext ) for (x , y ) in extensions [ext ]])
390
329
extension_table .sort (key = lambda w : w [0 ])
391
330
392
- intersections = compute_intersections_elements (extension_list )
393
331
394
- emit_enums (rf , script_list , extension_list , longforms , intersections )
332
+ emit_enums (rf , script_list , extension_list , longforms )
395
333
emit_search (rf )
396
334
397
335
emit_table (rf , "SCRIPTS" , script_table , t_type = "&'static [(char, char, Script)]" ,
398
336
is_pub = False , pfun = lambda x : "(%s,%s, Script::%s)" % (escape_char (x [0 ]), escape_char (x [1 ]), longforms [x [2 ]]))
399
337
emit_table (rf , "SCRIPT_EXTENSIONS" , extension_table , t_type = "&'static [(char, char, ScriptExtension)]" ,
400
- is_pub = False , pfun = lambda x : "(%s,%s,%s)" % (escape_char (x [0 ]), escape_char (x [1 ]), extension_name (x [2 ], longforms )))
338
+ is_pub = False , pfun = lambda x : "(%s,%s,%s)" % (escape_char (x [0 ]), escape_char (x [1 ]), extension_name (x [2 ])))
401
339
402
340
# emit_table(rf, "FOObar", properties)
0 commit comments