@@ -183,7 +183,7 @@ def emit_search(f):
183183}
184184""" )
185185
186- def emit_enums (f , script_list , extension_list , longforms ):
186+ def emit_enums (f , script_list , extension_list , longforms , intersections ):
187187 """
188188 Emit the Script and ScriptExtension enums as well as any related utility functions
189189 """
@@ -278,51 +278,81 @@ def emit_enums(f, script_list, extension_list, longforms):
278278 }
279279
280280 #[inline]
281- pub(crate) fn inner_intersects (self, other: Self) -> bool {
281+ pub(crate) fn inner_intersect (self, other: Self) -> Self {
282282 match (self, other) {
283283 (ScriptExtension::Single(Script::Unknown), _) |
284- (_, ScriptExtension::Single(Script::Unknown)) => false ,
285- (a, b) if a == b => true ,
286- (ScriptExtension::Single(Script::Common), _ ) |
287- (ScriptExtension::Single(Script::Inherited), _ ) |
288- (_ , ScriptExtension::Single(Script::Common)) |
289- (_ , ScriptExtension::Single(Script::Inherited)) => true ,
290- (ScriptExtension::Single(s), o) | (o, ScriptExtension::Single(s)) => o.inner_contains_script(s),
284+ (_, ScriptExtension::Single(Script::Unknown)) => ScriptExtension::Single(Script::Unknown) ,
285+ (a, b) if a == b => a ,
286+ (ScriptExtension::Single(Script::Common), a ) |
287+ (ScriptExtension::Single(Script::Inherited), a ) |
288+ (a , ScriptExtension::Single(Script::Common)) |
289+ (a , ScriptExtension::Single(Script::Inherited)) => a ,
290+ (ScriptExtension::Single(s), o) | (o, ScriptExtension::Single(s)) if o.inner_contains_script(s) => ScriptExtension::Single (s),
291291""" )
292- intersections = compute_intersections (extension_list )
293- for (e1 , e2 ) in intersections :
294- f .write (" (%s, %s) => true,\n " % (extension_name (e1 ), extension_name (e2 )))
295- f .write (""" _ => false,
292+ for (e1 , e2 , i ) in intersections :
293+ f .write (" (%s, %s) => %s,\n " % (extension_name (e1 ), extension_name (e2 ), extension_name (i , longforms )))
294+ f .write (""" _ => ScriptExtension::Single(Script::Unknown),
296295 }
297296 }
298297}
299298""" )
300299
301300
302- # We currently do NOT have an optimized method to compute
303- # the actual intersection between two script extensions, we
304- # only check if they *do* intersect
305- #
306- # To add such a method we'd need to do an extra pass where we compute any
307- # new ScriptExtension enums we'll need from the intersections. It doesn't
308- # seem worth it for now
309- def compute_intersections (extension_list ):
301+ def compute_intersections_elements (extension_list ):
310302 """
311- Compute which pairs of elements intersect. This will return duplicate pairs with
312- the elements swapped, but that's fine.
303+ Compute all intersections between the script extensions.
304+ This will add new elements to extension_list, be sure to call it first!
313305 """
306+
307+ # This is the only third-level intersection
308+ # It's easier to hardcode things here rather than
309+ # do the below calculation in a loop
310+ extension_list .append (['Deva' , 'Knda' , 'Tirh' ])
314311 intersections = []
312+ # Some intersections will not exist in extension_list and we'll need to add them
313+ new_elements = []
315314 sets = [(e , set (e )) for e in extension_list ]
316315 for (e1 , s1 ) in sets :
317316 for (e2 , s2 ) in sets :
318317 if e1 == e2 :
319318 continue
320319 intersection = s1 .intersection (s2 )
321320 if len (intersection ) > 0 :
322- intersections .append ((e1 , e2 ))
321+ intersection = [i for i in intersection ]
322+ intersection .sort ()
323+ if len (intersection ) > 1 and intersection not in extension_list and intersection not in new_elements :
324+ new_elements .append (intersection )
325+ if (e1 , e2 , intersection ) not in intersections :
326+ intersections .append ((e1 , e2 , intersection ))
327+ extension_list .extend (new_elements )
328+
329+ # We now go through the newly added second-level extension values and calculate their intersections
330+ # with the original set and each other
331+ new_sets = [(e , set (e )) for e in new_elements ]
332+ sets = [(e , set (e )) for e in extension_list ]
333+ for (e1 , s1 ) in new_sets :
334+ for (e2 , s2 ) in sets :
335+ if e1 == e2 :
336+ continue
337+ intersection = s1 .intersection (s2 )
338+ if len (intersection ) > 0 :
339+ intersection = [i for i in intersection ]
340+ intersection .sort ()
341+ if len (intersection ) > 1 and intersection not in extension_list :
342+ raise "Found new third-level intersection, please hardcode it"
343+ # The previous routine would automatically get both versions
344+ # of an intersection because it would iterate each pair in both orders,
345+ # but here we're working on an asymmetric pair, so we insert both in order to not
346+ # miss anything
347+ if (e1 , e2 , intersection ) not in intersections :
348+ intersections .append ((e1 , e2 , intersection ))
349+ if (e2 , e1 , intersection ) not in intersections :
350+ intersections .append ((e2 , e1 , intersection ))
351+
352+ intersections .sort ()
323353 return intersections
324354
325- def extension_name (ext , longforms = [] ):
355+ def extension_name (ext , longforms = {} ):
326356 """Get the rust source for a given ScriptExtension"""
327357 if len (ext ) == 1 :
328358 return "ScriptExtension::Single(Script::%s)" % longforms [ext [0 ]]
@@ -373,7 +403,9 @@ def extension_name(ext, longforms=[]):
373403 extension_table .extend ([(x , y , output_ext ) for (x , y ) in extensions [ext ]])
374404 extension_table .sort (key = lambda w : w [0 ])
375405
376- emit_enums (rf , script_list , extension_list , longforms )
406+ intersections = compute_intersections_elements (extension_list )
407+
408+ emit_enums (rf , script_list , extension_list , longforms , intersections )
377409 emit_search (rf )
378410
379411 emit_table (rf , "SCRIPTS" , script_table , t_type = "&'static [(char, char, Script)]" ,
0 commit comments