@@ -183,7 +183,7 @@ def emit_search(f):
183
183
}
184
184
""" )
185
185
186
- def emit_enums (f , script_list , extension_list , longforms ):
186
+ def emit_enums (f , script_list , extension_list , longforms , intersections ):
187
187
"""
188
188
Emit the Script and ScriptExtension enums as well as any related utility functions
189
189
"""
@@ -278,51 +278,81 @@ def emit_enums(f, script_list, extension_list, longforms):
278
278
}
279
279
280
280
#[inline]
281
- pub(crate) fn inner_intersects (self, other: Self) -> bool {
281
+ pub(crate) fn inner_intersect (self, other: Self) -> Self {
282
282
match (self, other) {
283
283
(ScriptExtension::Single(Script::Unknown), _) |
284
- (_, ScriptExtension::Single(Script::Unknown)) => false ,
285
- (a, b) if a == b => true ,
286
- (ScriptExtension::Single(Script::Common), _ ) |
287
- (ScriptExtension::Single(Script::Inherited), _ ) |
288
- (_ , ScriptExtension::Single(Script::Common)) |
289
- (_ , ScriptExtension::Single(Script::Inherited)) => true ,
290
- (ScriptExtension::Single(s), o) | (o, ScriptExtension::Single(s)) => o.inner_contains_script(s),
284
+ (_, ScriptExtension::Single(Script::Unknown)) => ScriptExtension::Single(Script::Unknown) ,
285
+ (a, b) if a == b => a ,
286
+ (ScriptExtension::Single(Script::Common), a ) |
287
+ (ScriptExtension::Single(Script::Inherited), a ) |
288
+ (a , ScriptExtension::Single(Script::Common)) |
289
+ (a , ScriptExtension::Single(Script::Inherited)) => a ,
290
+ (ScriptExtension::Single(s), o) | (o, ScriptExtension::Single(s)) if o.inner_contains_script(s) => ScriptExtension::Single (s),
291
291
""" )
292
- intersections = compute_intersections (extension_list )
293
- for (e1 , e2 ) in intersections :
294
- f .write (" (%s, %s) => true,\n " % (extension_name (e1 ), extension_name (e2 )))
295
- f .write (""" _ => false,
292
+ for (e1 , e2 , i ) in intersections :
293
+ f .write (" (%s, %s) => %s,\n " % (extension_name (e1 ), extension_name (e2 ), extension_name (i , longforms )))
294
+ f .write (""" _ => ScriptExtension::Single(Script::Unknown),
296
295
}
297
296
}
298
297
}
299
298
""" )
300
299
301
300
302
- # We currently do NOT have an optimized method to compute
303
- # the actual intersection between two script extensions, we
304
- # only check if they *do* intersect
305
- #
306
- # To add such a method we'd need to do an extra pass where we compute any
307
- # new ScriptExtension enums we'll need from the intersections. It doesn't
308
- # seem worth it for now
309
- def compute_intersections (extension_list ):
301
+ def compute_intersections_elements (extension_list ):
310
302
"""
311
- Compute which pairs of elements intersect. This will return duplicate pairs with
312
- the elements swapped, but that's fine.
303
+ Compute all intersections between the script extensions.
304
+ This will add new elements to extension_list, be sure to call it first!
313
305
"""
306
+
307
+ # This is the only third-level intersection
308
+ # It's easier to hardcode things here rather than
309
+ # do the below calculation in a loop
310
+ extension_list .append (['Deva' , 'Knda' , 'Tirh' ])
314
311
intersections = []
312
+ # Some intersections will not exist in extension_list and we'll need to add them
313
+ new_elements = []
315
314
sets = [(e , set (e )) for e in extension_list ]
316
315
for (e1 , s1 ) in sets :
317
316
for (e2 , s2 ) in sets :
318
317
if e1 == e2 :
319
318
continue
320
319
intersection = s1 .intersection (s2 )
321
320
if len (intersection ) > 0 :
322
- intersections .append ((e1 , e2 ))
321
+ intersection = [i for i in intersection ]
322
+ intersection .sort ()
323
+ if len (intersection ) > 1 and intersection not in extension_list and intersection not in new_elements :
324
+ new_elements .append (intersection )
325
+ if (e1 , e2 , intersection ) not in intersections :
326
+ intersections .append ((e1 , e2 , intersection ))
327
+ extension_list .extend (new_elements )
328
+
329
+ # We now go through the newly added second-level extension values and calculate their intersections
330
+ # with the original set and each other
331
+ new_sets = [(e , set (e )) for e in new_elements ]
332
+ sets = [(e , set (e )) for e in extension_list ]
333
+ for (e1 , s1 ) in new_sets :
334
+ for (e2 , s2 ) in sets :
335
+ if e1 == e2 :
336
+ continue
337
+ intersection = s1 .intersection (s2 )
338
+ if len (intersection ) > 0 :
339
+ intersection = [i for i in intersection ]
340
+ intersection .sort ()
341
+ if len (intersection ) > 1 and intersection not in extension_list :
342
+ raise "Found new third-level intersection, please hardcode it"
343
+ # The previous routine would automatically get both versions
344
+ # of an intersection because it would iterate each pair in both orders,
345
+ # but here we're working on an asymmetric pair, so we insert both in order to not
346
+ # miss anything
347
+ if (e1 , e2 , intersection ) not in intersections :
348
+ intersections .append ((e1 , e2 , intersection ))
349
+ if (e2 , e1 , intersection ) not in intersections :
350
+ intersections .append ((e2 , e1 , intersection ))
351
+
352
+ intersections .sort ()
323
353
return intersections
324
354
325
- def extension_name (ext , longforms = [] ):
355
+ def extension_name (ext , longforms = {} ):
326
356
"""Get the rust source for a given ScriptExtension"""
327
357
if len (ext ) == 1 :
328
358
return "ScriptExtension::Single(Script::%s)" % longforms [ext [0 ]]
@@ -373,7 +403,9 @@ def extension_name(ext, longforms=[]):
373
403
extension_table .extend ([(x , y , output_ext ) for (x , y ) in extensions [ext ]])
374
404
extension_table .sort (key = lambda w : w [0 ])
375
405
376
- emit_enums (rf , script_list , extension_list , longforms )
406
+ intersections = compute_intersections_elements (extension_list )
407
+
408
+ emit_enums (rf , script_list , extension_list , longforms , intersections )
377
409
emit_search (rf )
378
410
379
411
emit_table (rf , "SCRIPTS" , script_table , t_type = "&'static [(char, char, Script)]" ,
0 commit comments