35
35
// NOTE: The following code was generated by "scripts/unicode.py", do not edit directly
36
36
37
37
#![allow(missing_docs, non_upper_case_globals, non_snake_case)]
38
+
39
+ use super::ScriptExtension;
38
40
'''
39
41
40
42
UNICODE_VERSION = (12 , 0 , 0 )
@@ -183,182 +185,102 @@ def emit_search(f):
183
185
}
184
186
""" )
185
187
186
- def emit_enums (f , script_list , extension_list , longforms , intersections ):
188
+ def emit_enums (f , script_list , extension_list , longforms ):
187
189
"""
188
190
Emit the Script and ScriptExtension enums as well as any related utility functions
189
191
"""
192
+
190
193
f .write ("""
191
- use core::convert::TryFrom;
192
194
#[derive(Clone, Copy, PartialEq, Eq, Debug, Hash)]
193
195
#[non_exhaustive]
194
196
#[allow(non_camel_case_types)]
195
- /// A value of the Script property
197
+ #[repr(u8)]
198
+ /// A value of the `Script` property
196
199
pub enum Script {
197
200
/// Unknown script
198
- Unknown,
201
+ Unknown = 0xFF,
202
+ /// Zyyy
203
+ Common = 0xFE,
204
+ /// Zinh,
205
+ Inherited = 0xFD,
199
206
""" )
200
- for script in script_list :
201
- f .write (" /// %s\n %s,\n " % (script , longforms [script ]))
202
- f .write ("""}
203
- #[derive(Clone, Copy, PartialEq, Eq, Debug, Hash)]
204
- #[non_exhaustive]
205
- /// A value for the Script_Extension property
206
- ///
207
- /// Script_Extension is one or more Script
208
- ///
209
- /// This is essentially an optimized version of Vec<Script>,
210
- /// optimized by script sets and intersections actually present in Unicode.
211
- pub enum ScriptExtension {
212
- /// A single script
213
- Single(Script),
207
+ for (i , script ) in enumerate (script_list ):
208
+ f .write (" /// %s\n %s = %s,\n " % (script , longforms [script ], i ))
209
+ f .write ("}\n " )
210
+ f .write ("pub const NEXT_SCRIPT: u8 = %s;" % len (script_list ))
211
+ f .write ("""
212
+
213
+ pub mod script_extensions {
214
+ use crate::ScriptExtension;
215
+ pub const COMMON: ScriptExtension = ScriptExtension::new_common();
216
+ pub const INHERITED: ScriptExtension = ScriptExtension::new_inherited();
217
+ pub const UNKNOWN: ScriptExtension = ScriptExtension::new_unknown();
214
218
""" )
219
+ for (i , script ) in enumerate (script_list ):
220
+ first = 0
221
+ second = 0
222
+ third = 0
223
+ # need to replace L because `hex()` will spit out an L suffix for larger numbers
224
+ if i < 64 :
225
+ first = hex (1 << i ).replace ("L" , "" )
226
+ elif i < 128 :
227
+ second = hex (1 << (i - 64 )).replace ("L" , "" )
228
+ else :
229
+ third = hex (1 << (i - 128 )).replace ("L" , "" )
230
+ f .write (" /// %s\n pub const %s: ScriptExtension = ScriptExtension::new(%s, %s, %s);\n " %
231
+ (longforms [script ], longforms [script ].upper (), first , second , third ))
232
+ if script != longforms [script ]:
233
+ f .write (" /// %s\n pub const %s: ScriptExtension = %s;\n " %
234
+ (longforms [script ], script .upper (), longforms [script ].upper ()))
215
235
for ext in extension_list :
216
236
longform = ", " .join ([longforms [s ] for s in ext ])
217
- f .write (" /// %s\n %s,\n " % (longform , "" .join (ext )))
237
+ name = "_" .join ([s .upper () for s in ext ])
238
+ expr = ext [0 ].upper ()
239
+ for e in ext [1 :]:
240
+ expr = "%s.union(%s)" % (expr , e .upper ())
241
+ f .write (" /// %s\n pub const %s
10000
: ScriptExtension = %s;\n " % (longform , name , expr ))
218
242
f .write ("""}
219
243
220
- impl From<Script> for ScriptExtension {
221
- fn from(script: Script) -> Self {
222
- ScriptExtension::Single(script)
223
- }
224
- }
225
-
226
- impl TryFrom<ScriptExtension> for Script {
227
- type Error = ();
228
- fn try_from(ext: ScriptExtension) -> Result<Self, ()> {
229
- match ext {
230
- ScriptExtension::Single(s) => Ok(s),
231
- _ => Err(())
232
- }
233
- }
234
- }
235
-
236
244
impl Script {
245
+ #[inline]
237
246
pub(crate) fn inner_full_name(self) -> &'static str {
238
247
match self {
239
248
Script::Unknown => "Unknown",
249
+ Script::Common => "Common",
250
+ Script::Inherited => "Inherited",
240
251
""" )
241
252
for script in script_list :
242
253
f .write (" Script::%s => \" %s\" ,\n " % (longforms [script ], longforms [script ]))
243
254
f .write (""" }
244
255
}
245
256
257
+ #[inline]
246
258
pub(crate) fn inner_short_name(self) -> &'static str {
247
259
match self {
248
260
Script::Unknown => "",
261
+ Script::Common => "Zyyy",
262
+ Script::Inherited => "Zinh",
249
263
""" )
250
264
for script in script_list :
251
265
f .write (" Script::%s => \" %s\" ,\n " % (longforms [script ], script ))
252
266
f .write (""" }
253
267
}
254
- }
255
268
256
- impl ScriptExtension {
257
269
#[inline]
258
- #[cfg(feature = "with_std")]
259
- pub(crate) fn inner_scripts(self) -> Vec<Script> {
260
- match self {
261
- ScriptExtension::Single(s) => vec![s],
270
+ pub(crate) fn for_integer(value: u8) -> Self {
271
+ match value {
262
272
""" )
263
- for ext in extension_list :
264
- scripts = ", " .join (["Script::%s" % longforms [s ] for s in ext ])
265
- f .write (" %s => vec![%s],\n " % (extension_name (ext ), scripts ))
266
- f .write (""" _ => unreachable!()
267
- }
268
- }
269
-
270
- #[inline]
271
- pub(crate) fn inner_contains_script(self, other: Script) -> bool {
272
- match self {
273
- ScriptExtension::Single(s) => s == other,
274
- """ )
275
- for ext in extension_list :
276
- scripts = " || " .join (["other == Script::%s" % longforms [s ] for s in ext ])
277
- f .write (" %s => %s,\n " % (extension_name (ext ), scripts ))
278
- f .write (""" }
279
- }
280
-
281
- #[inline]
282
- pub(crate) fn inner_intersect(self, other: Self) -> Self {
283
- match (self, other) {
284
- (ScriptExtension::Single(Script::Unknown), _) |
285
- (_, ScriptExtension::Single(Script::Unknown)) => ScriptExtension::Single(Script::Unknown),
286
- (a, b) if a == b => a,
287
- (ScriptExtension::Single(Script::Common), a) |
288
- (ScriptExtension::Single(Script::Inherited), a) |
289
- (a, ScriptExtension::Single(Script::Common)) |
290
- (a, ScriptExtension::Single(Script::Inherited)) => a,
291
- (ScriptExtension::Single(s), o) | (o, ScriptExtension::Single(s)) if o.inner_contains_script(s) => ScriptExtension::Single(s),
292
- """ )
293
- for (e1 , e2 , i ) in intersections :
294
- f .write (" (%s, %s) => %s,\n " % (extension_name (e1 ), extension_name (e2 ), extension_name (i , longforms )))
295
- f .write (""" _ => ScriptExtension::Single(Script::Unknown),
273
+ for (i , script ) in enumerate (script_list ):
274
+ f .write (" %s => Script::%s,\n " % (i , longforms [script ]))
275
+ f .write (""" _ => unreachable!(),
296
276
}
297
277
}
298
278
}
299
279
""" )
300
280
301
-
302
- def compute_intersections_elements (extension_list ):
303
- """
304
- Compute all intersections between the script extensions.
305
- This will add new elements to extension_list, be sure to call it first!
306
- """
307
-
308
- # This is the only third-level intersection
309
- # It's easier to hardcode things here rather than
310
- # do the below calculation in a loop
311
- extension_list .append (['Deva' , 'Knda' , 'Tirh' ])
312
- intersections = []
313
- # Some intersections will not exist in extension_list and we'll need to add them
314
- new_elements = []
315
- sets = [(e , set (e )) for e in extension_list ]
316
- for (e1 , s1 ) in sets :
317
- for (e2 , s2 ) in sets :
318
- if e1 == e2 :
319
- continue
320
- intersection = s1 .intersection (s2 )
321
- if len (intersection ) > 0 :
322
- intersection = [i for i in intersection ]
323
- intersection .sort ()
324
- if len (intersection ) > 1 and intersection not in extension_list and intersection not in new_elements :
325
- new_elements .append (intersection )
326
- if (e1 , e2 , intersection ) not in intersections :
327
- intersections .append ((e1 , e2 , intersection ))
328
- extension_list .extend (new_elements )
329
-
330
- # We now go through the newly added second-level extension values and calculate their intersections
331
- # with the original set and each other
332
- new_sets = [(e , set (e )) for e in new_elements ]
333
- sets = [(e , set (e )) for e in extension_list ]
334
- for (e1 , s1 ) in new_sets :
335
- for (e2 , s2 ) in sets :
336
- if e1 == e2 :
337
- continue
338
- intersection = s1 .intersection (s2 )
339
- if len (intersection ) > 0 :
340
- intersection = [i for i in intersection ]
341
- intersection .sort ()
342
- if len (intersection ) > 1 and intersection not in extension_list :
343
- raise "Found new third-level intersection, please hardcode it"
344
- # The previous routine would automatically get both versions
345
- # of an intersection because it would iterate each pair in both orders,
346
- # but here we're working on an asymmetric pair, so we insert both in order to not
347
- # miss anything
348
- if (e1 , e2 , intersection ) not in intersections :
349
- intersections .append ((e1 , e2 , intersection ))
350
- if (e2 , e1 , intersection ) not in intersections :
351
- intersections .append ((e2 , e1 , intersection ))
352
-
353
- intersections .sort ()
354
- return intersections
355
-
356
- def extension_name (ext , longforms = {}):
281
+ def extension_name (ext ):
357
282
"""Get the rust source for a given ScriptExtension"""
358
- if len (ext ) == 1 :
359
- return "ScriptExtension::Single(Script::%s)" % longforms [ext [0 ]]
360
- else :
361
- return "ScriptExtension::%s" % "" .join (ext )
283
+ return "script_extensions::%s" % "_" .join ([e .upper () for e in ext ])
362
284
363
285
364
286
@@ -385,8 +307,10 @@ def extension_name(ext, longforms={}):
385
307
script_list = []
386
308
387
309
for script in scripts :
388
- script_list .append (shortforms [script ])
310
+ if script not in ["Common" , "Unknown" , "Inherited" ]:
311
+ script_list .append (shortforms [script ])
389
312
script_table .extend ([(x , y , shortforms [script ]) for (x , y ) in scripts [script ]])
313
+ script_list .sort ()
390
314
script_table .sort (key = lambda w : w [0 ])
391
315
392
316
@@ -404,14 +328,13 @@ def extension_name(ext, longforms={}):
404
328
extension_table .extend ([(x , y , output_ext ) for (x , y ) in extensions [ext ]])
405
329
extension_table .sort (key = lambda w : w [0 ])
406
330
407
- intersections = compute_intersections_elements (extension_list )
408
331
409
- emit_enums (rf , script_list , extension_list , longforms , intersections )
332
+ emit_enums (rf , script_list , extension_list , longforms )
410
333
emit_search (rf )
411
334
412
335
emit_table (rf , "SCRIPTS" , script_table , t_type = "&'static [(char, char, Script)]" ,
413
336
is_pub = False , pfun = lambda x : "(%s,%s, Script::%s)" % (escape_char (x [0 ]), escape_char (x [1 ]), longforms [x [2 ]]))
414
337
emit_table (rf , "SCRIPT_EXTENSIONS" , extension_table , t_type = "&'static [(char, char, ScriptExtension)]" ,
415
- is_pub = False , pfun = lambda x : "(%s,%s,%s)" % (escape_char (x [0 ]), escape_char (x [1 ]), extension_name (x [2 ], longforms )))
338
+ is_pub = False , pfun = lambda x : "(%s,%s,%s)" % (escape_char (x [0 ]), escape_char (x [1 ]), extension_name (x [2 ])))
416
339
417
340
# emit_table(rf, "FOObar", properties)
0 commit comments