@@ -455,6 +455,56 @@ freearc(struct nfa * nfa,
455455 from -> free = victim ;
456456}
457457
458+ /*
459+ * hasnonemptyout - Does state have a non-EMPTY out arc?
460+ */
461+ static int
462+ hasnonemptyout (struct state * s )
463+ {
464+ struct arc * a ;
465+
466+ for (a = s -> outs ; a != NULL ; a = a -> outchain )
467+ {
468+ if (a -> type != EMPTY )
469+ return 1 ;
470+ }
471+ return 0 ;
472+ }
473+
474+ /*
475+ * nonemptyouts - count non-EMPTY out arcs of a state
476+ */
477+ static int
478+ nonemptyouts (struct state * s )
479+ {
480+ int n = 0 ;
481+ struct arc * a ;
482+
483+ for (a = s -> outs ; a != NULL ; a = a -> outchain )
484+ {
485+ if (a -> type != EMPTY )
486+ n ++ ;
487+ }
488+ return n ;
489+ }
490+
491+ /*
492+ * nonemptyins - count non-EMPTY in arcs of a state
493+ */
494+ static int
495+ nonemptyins (struct state * s )
496+ {
497+ int n = 0 ;
498+ struct arc * a ;
499+
500+ for (a = s -> ins ; a != NULL ; a = a -> inchain )
501+ {
502+ if (a -> type != EMPTY )
503+ n ++ ;
504+ }
505+ return n ;
506+ }
507+
458508/*
459509 * findarc - find arc, if any, from given source with given type and color
460510 * If there is more than one such arc, the result is random.
@@ -511,19 +561,25 @@ moveins(struct nfa * nfa,
511561}
512562
513563/*
514- * copyins - copy all in arcs of a state to another state
564+ * copyins - copy in arcs of a state to another state
565+ *
566+ * Either all arcs, or only non-empty ones as determined by all value.
515567 */
516568static void
517569copyins (struct nfa * nfa ,
518570 struct state * oldState ,
519- struct state * newState )
571+ struct state * newState ,
572+ int all )
520573{
521574 struct arc * a ;
522575
523576 assert (oldState != newState );
524577
525578 for (a = oldState -> ins ; a != NULL ; a = a -> inchain )
526- cparc (nfa , a , a -> from , newState );
579+ {
580+ if (all || a -> type != EMPTY )
581+ cparc (nfa , a , a -> from , newState );
582+ }
527583}
528584
529585/*
@@ -546,19 +602,25 @@ moveouts(struct nfa * nfa,
546602}
547603
548604/*
549- * copyouts - copy all out arcs of a state to another state
605+ * copyouts - copy out arcs of a state to another state
606+ *
607+ * Either all arcs, or only non-empty ones as determined by all value.
550608 */
551609static void
552610copyouts (struct nfa * nfa ,
553611 struct state * oldState ,
554- struct state * newState )
612+ struct state * newState ,
613+ int all )
555614{
556615 struct arc * a ;
557616
558617 assert (oldState != newState );
559618
560619 for (a = oldState -> outs ; a != NULL ; a = a -> outchain )
561- cparc (nfa , a , newState , a -> to );
620+ {
621+ if (all || a -> type != EMPTY )
622+ cparc (nfa , a , newState , a -> to );
623+ }
562624}
563625
564626/*
@@ -881,7 +943,7 @@ pull(struct nfa * nfa,
881943 if (NISERR ())
882944 return 0 ;
883945 assert (to != from ); /* con is not an inarc */
884- copyins (nfa , from , s ); /* duplicate inarcs */
946+ copyins (nfa , from , s , 1 ); /* duplicate inarcs */
885947 cparc (nfa , con , s , to ); /* move constraint arc */
886948 freearc (nfa , con );
887949 from = s ;
@@ -1027,7 +1089,7 @@ push(struct nfa * nfa,
10271089 s = newstate (nfa );
10281090 if (NISERR ())
10291091 return 0 ;
1030- copyouts (nfa , to , s ); /* duplicate outarcs */
1092+ copyouts (nfa , to , s , 1 ); /* duplicate outarcs */
10311093 cparc (nfa , con , from , s ); /* move constraint */
10321094 freearc (nfa , con );
10331095 to = s ;
@@ -1134,91 +1196,205 @@ fixempties(struct nfa * nfa,
11341196 FILE * f ) /* for debug output; NULL none */
11351197{
11361198 struct state * s ;
1199+ struct state * s2 ;
11371200 struct state * nexts ;
11381201 struct arc * a ;
11391202 struct arc * nexta ;
1140- int progress ;
11411203
1142- /* find and eliminate empties until there are no more */
1143- do
1204+ /*
1205+ * First, get rid of any states whose sole out-arc is an EMPTY, since
1206+ * they're basically just aliases for their successor. The parsing
1207+ * algorithm creates enough of these that it's worth special-casing this.
1208+ */
1209+ for (s = nfa -> states ; s != NULL && !NISERR (); s = nexts )
11441210 {
1145- progress = 0 ;
1146- for (s = nfa -> states ; s != NULL && !NISERR () &&
1147- s -> no != FREESTATE ; s = nexts )
1211+ nexts = s -> next ;
1212+ if (s -> flag || s -> nouts != 1 )<
2D03
/div>
1213+ continue ;
1214+ a = s -> outs ;
1215+ assert (a != NULL && a -> outchain == NULL );
1216+ if (a -> type != EMPTY )
1217+ continue ;
1218+ if (s != a -> to )
1219+ moveins (nfa , s , a -> to );
1220+ dropstate (nfa , s );
1221+ }
1222+
1223+ /*
1224+ * Similarly, get rid of any state with a single EMPTY in-arc, by folding
1225+ * it into its predecessor.
1226+ */
1227+ for (s = nfa -> states ; s != NULL && !NISERR (); s = nexts )
1228+ {
1229+ nexts = s -> next ;
1230+ /* while we're at it, ensure tmp fields are clear for next step */
1231+ assert (s -> tmp == NULL );
1232+ if (s -> flag || s -> nins != 1 )
1233+ continue ;
1234+ a = s -> ins ;
1235+ assert (a != NULL && a -> inchain == NULL );
1236+ if (a -> type != EMPTY )
1237+ continue ;
1238+ if (s != a -> from )
1239+ moveouts (nfa , s , a -> from );
1240+ dropstate (nfa , s );
1241+ }
1242+
1243+ /*
1244+ * For each remaining NFA state, find all other states that are reachable
1245+ * from it by a chain of one or more EMPTY arcs. Then generate new arcs
1246+ * that eliminate the need for each such chain.
1247+ *
1248+ * If we just do this straightforwardly, the algorithm gets slow in
1249+ * complex graphs, because the same arcs get copied to all intermediate
1250+ * states of an EMPTY chain, and then uselessly pushed repeatedly to the
1251+ * chain's final state; we waste a lot of time in newarc's duplicate
1252+ * checking. To improve matters, we decree that any state with only EMPTY
1253+ * out-arcs is "doomed" and will not be part of the final NFA. That can be
1254+ * ensured by not adding any new out-arcs to such a state. Having ensured
1255+ * that, we need not update the state's in-arcs list either; all arcs that
1256+ * might have gotten pushed forward to it will just get pushed directly to
1257+ * successor states. This eliminates most of the useless duplicate arcs.
1258+ */
1259+ for (s = nfa -> states ; s != NULL && !NISERR (); s = s -> next )
1260+ {
1261+ for (s2 = emptyreachable (s , s ); s2 != s && !NISERR (); s2 = nexts )
11481262 {
1149- nexts = s -> next ;
1150- for (a = s -> outs ; a != NULL && !NISERR (); a = nexta )
1151- {
1152- nexta = a -> outchain ;
1153- if (a -> type == EMPTY && unempty (nfa , a ))
1154- progress = 1 ;
1155- assert (nexta == NULL || s -> no != FREESTATE );
1156- }
1263+ /*
1264+ * If s2 is doomed, we decide that (1) we will always push arcs
1265+ * forward to it, not pull them back to s; and (2) we can optimize
1266+ * away the push-forward, per comment above. So do nothing.
1267+ */
1268+ if (s2 -> flag || hasnonemptyout (s2 ))
1269+ replaceempty (nfa , s , s2 );
1270+
1271+ /* Reset the tmp fields as we walk back */
1272+ nexts = s2 -> tmp ;
1273+ s2 -> tmp = NULL ;
11571274 }
1158- if (progress && f != NULL )
1159- dumpnfa (nfa , f );
1160- } while (progress && !NISERR ());
1275+ s -> tmp = NULL ;
1276+ }
1277+
1278+ if (NISERR ())
1279+ return ;
1280+
1281+ /*
1282+ * Now remove all the EMPTY arcs, since we don't need them anymore.
1283+ */
1284+ for (s = nfa -> states ; s != NULL ; s = s -> next )
1285+ {
1286+ for (a = s -> outs ; a != NULL ; a = nexta )
1287+ {
1288+ nexta = a -> outchain ;
1289+ if (a -> type == EMPTY )
1290+ freearc (nfa , a );
1291+ }
1292+ }
1293+
1294+ /*
1295+ * And remove any states that have become useless. (This cleanup is not
1296+ * very thorough, and would be even less so if we tried to combine it with
1297+ * the previous step; but cleanup() will take care of anything we miss.)
1298+ */
1299+ for (s = nfa -> states ; s != NULL ; s = nexts )
1300+ {
1301+ nexts = s -> next ;
1302+ if ((s -> nins == 0 || s -> nouts == 0 ) && !s -> flag )
1303+ dropstate (nfa , s );
1304+ }
1305+
1306+ if (f != NULL )
1307+ dumpnfa (nfa , f );
11611308}
11621309
11631310/*
1164- * unempty - optimize out an EMPTY arc, if possible
1311+ * emptyreachable - recursively find all states reachable from s by EMPTY arcs
1312+ *
1313+ * The return value is the last such state found. Its tmp field links back
1314+ * to the next-to-last such state, and so on back to s, so that all these
1315+ * states can be located without searching the whole NFA.
11651316 *
1166- * Actually, as it stands this function always succeeds, but the return
1167- * value is kept with an eye on possible future changes.
1317+ * The maximum recursion depth here is equal to the length of the longest
1318+ * loop-free chain of EMPTY arcs, which is surely no more than the size of
1319+ * the NFA, and in practice will be a lot less than that.
11681320 */
1169- static int /* 0 couldn't, 1 could */
1170- unempty (struct nfa * nfa ,
1171- struct arc * a )
1321+ static struct state *
1322+ emptyreachable (struct state * s , struct state * lastfound )
11721323{
1173- struct state * from = a -> from ;
1174- struct state * to = a -> to ;
1175- int usefrom ; /* work on from, as opposed to to? */
1176-
1177- assert (a -> type == EMPTY );
1178- assert (from != nfa -> pre && to != nfa -> post );
1324+ struct arc * a ;
11791325
1180- if (from == to )
1181- { /* vacuous loop */
1182- freearc (nfa , a );
1183- return 1 ;
1326+ s -> tmp = lastfound ;
1327+ lastfound = s ;
1328+ for (a = s -> outs ; a != NULL ; a = a -> outchain )
1329+ {
1330+ if (a -> type == EMPTY && a -> to -> tmp == NULL )
1331+ lastfound = emptyreachable (a -> to , lastfound );
11841332 }
1333+ return lastfound ;
1334+ }
11851335
1186- /* decide which end to work on */
1187- usefrom = 1 ; /* default: attack from */
1188- if (from -> nouts > to -> nins )
1189- usefrom = 0 ;
1190- else if (from -> nouts == to -> nins )
1336+ /*
1337+ * replaceempty - replace an EMPTY arc chain with some non-empty arcs
1338+ *
1339+ * The EMPTY arc(s) should be deleted later, but we can't do it here because
1340+ * they may still be needed to identify other arc chains during fixempties().
1341+ */
1342+ static void
1343+ replaceempty (struct nfa * nfa ,
1344+ struct state * from ,
1345+ struct state * to )
1346+ {
1347+ int fromouts ;
1348+ int toins ;
1349+
1350+ assert (from != to );
1351+
1352+ /*
1353+ * Create replacement arcs that bypass the need for the EMPTY chain. We
1354+ * can do this either by pushing arcs forward (linking directly from
1355+ * "from"'s predecessors to "to") or by pulling them back (linking
1356+ * directly from "from" to "to"'s successors). In general, we choose
1357+ * whichever way creates greater fan-out or fan-in, so as to improve the
1358+ * odds of reducing the other state to zero in-arcs or out-arcs and
1359+ * thereby being able to delete it. However, if "from" is doomed (has no
1360+ * non-EMPTY out-arcs), we must keep it so, so always push forward in that
1361+ * case.
1362+ *
1363+ * The fan-out/fan-in comparison should count only non-EMPTY arcs. If
1364+ * "from" is doomed, we can skip counting "to"'s arcs, since we want to
1365+ * force taking the copyins path in that case.
1366+ */
1367+ fromouts = nonemptyouts (from );
1368+ toins = (fromouts == 0 ) ? 1 : nonemptyins (to );
1369+
1370+ if (fromouts > toins )
11911371 {
1192- /* decide on secondary issue: move/copy fewest arcs */
1193- if (from -> nins > to -> nouts )
1194- usefrom = 0 ;
1372+ copyouts (nfa , to , from , 0 );
1373+ return ;
1374+ }
1375+ if (fromouts < toins )
1376+ {
1377+ copyins (nfa , from , to , 0 );
1378+ return ;
11951379 }
11961380
1197- freearc (nfa , a );
1198- if (usefrom )
1381+ /*
1382+ * fromouts == toins. Decide on secondary issue: copy fewest arcs.
1383+ *
1384+ * Doesn't seem to be worth the trouble to exclude empties from these
1385+ * comparisons; that takes extra time and doesn't seem to improve the
1386+ * resulting graph much.
1387+ */
1388+ if (from -> nins > to -> nouts )
11991389 {
1200- if (from -> nouts == 0 )
1201- {
1202- /* was the state's only outarc */
1203- moveins (nfa , from , to );
1204- freestate (nfa , from );
1205- }
1206- else
1207- copyins (nfa , from , to );
1390+ copyouts (nfa , to , from , 0 );
1391+ return ;
12081392 }
12091393 else
12101394 {
1211- if (to -> nins == 0 )
1212- {
1213- /* was the state's only inarc */
1214- moveouts (nfa , to , from );
1215- freestate (nfa , to );
1216- }
1217- else
1218- copyouts (nfa , to , from );
1395+ copyins (nfa , from , to , 0 );
1396+ return ;
12191397 }
1220-
1221- return 1 ;
12221398}
12231399
12241400/*
0 commit comments