8000 Miscellaneous cleanup of regular-expression compiler. · highb/postgres@4083a52 · GitHub
[go: up one dir, main page]

Skip to content

Commit 4083a52

Browse files
committed
Miscellaneous cleanup of regular-expression compiler.
Revert our previous addition of "all" flags to copyins() and copyouts(); they're no longer needed, and were never anything but an unsightly hack. Improve a couple of infelicities in the REG_DEBUG code for dumping the NFA data structure, including adding code to count the total number of states and arcs. Add a couple of missed error checks. Add some more documentation in the README file, and some regression tests illustrating cases that exceeded the state-count limit and/or took unreasonable amounts of time before this set of patches. Back-patch to all supported branches.
1 parent b94c2b6 commit 4083a52

File tree

2 files changed

+23
-43
lines changed

2 files changed

+23
-43
lines changed

src/backend/regex/regc_nfa.c

Lines changed: 17 additions & 38 deletions
< 8000 div data-testid="addition diffstat" class="DiffSquares-module__diffSquare--h5kjy DiffSquares-module__addition--jeNtt">
Original file line numberDiff line numberDiff line change
@@ -823,14 +823,11 @@ moveins(struct nfa * nfa,
823823

824824
/*
825825
* copyins - copy in arcs of a state to another state
826-
*
827-
* Either all arcs, or only non-empty ones as determined by all value.
828826
*/
829827
static void
830828
copyins(struct nfa * nfa,
831829
struct state * oldState,
832-
struct state * newState,
833-
int all)
830+
struct state * newState)
834831
{
835832
assert(oldState != newState);
836833

@@ -840,8 +837,7 @@ copyins(struct nfa * nfa,
840837
struct arc *a;
841838

842839
for (a = oldState->ins; a != NULL; a = a->inchain)
843-
if (all || a->type != EMPTY)
844-
cparc(nfa, a, a->from, newState);
840+
cparc(nfa, a, a->from, newState);
845841
}
846842
else
847843
{
@@ -873,12 +869,6 @@ copyins(struct nfa * nfa,
873869
{
874870
struct arc *a = oa;
875871

876-
if (!all && a->type == EMPTY)
877-
{
878-
oa = oa->inchain;
879-
continue;
880-
}
881-
882872
switch (sortins_cmp(&oa, &na))
883873
{
884874
case -1:
@@ -904,12 +894,6 @@ copyins(struct nfa * nfa,
904894
/* newState does not have anything matching oa */
905895
struct arc *a = oa;
906896

907-
if (!all && a->type == EMPTY)
908-
{
909-
oa = oa->inchain;
910-
continue;
911-
}
912-
913897
oa = oa->inchain;
914898
createarc(nfa, a->type, a->co, a->from, newState);
915899
}
@@ -1107,14 +1091,11 @@ moveouts(struct nfa * nfa,
11071091

11081092
/*
11091093
* copyouts - copy out arcs of a state to another state
1110-
*
1111-
* Either all arcs, or only non-empty ones as determined by all value.
11121094
*/
11131095
static void
11141096
copyouts(struct nfa * nfa,
11151097
struct state * oldState,
1116-
struct state * newState,
1117-
int all)
1098+
struct state * newState)
11181099
{
11191100
assert(oldState != newState);
11201101

@@ -1124,8 +1105,7 @@ copyouts(struct nfa * nfa,
11241105
struct arc *a;
11251106

11261107
for (a = oldState->outs; a != NULL; a = a->outchain)
1127-
if (all || a->type != EMPTY)
1128-
cparc(nfa, a, newState, a->to);
1108+
cparc(nfa, a, newState, a->to);
11291109
}
11301110
else
11311111
{
@@ -1157,12 +1137,6 @@ copyouts(struct nfa * nfa,
11571137
{
11581138
struct arc *a = oa;
11591139

1160-
if (!all && a->type == EMPTY)
1161-
{
1162-
oa = oa->outchain;
1163-
continue;
1164-
}
1165-
11661140
switch (sortouts_cmp(&oa, &na))
11671141
{
11681142
case -1:
@@ -1188,12 +1162,6 @@ copyouts(struct nfa * nfa,
11881162
/* newState does not have anything matching oa */
11891163
struct arc *a = oa;
11901164

1191-
if (!all && a->type == EMPTY)
1192-
{
1193-
oa = oa->outchain;
1194-
continue;
1195-
}
1196-
11971165
oa = oa->outchain;
11981166
createarc(nfa, a->type, a->co, newState, a->to);
11991167
}
@@ -1452,6 +1420,10 @@ optimize(struct nfa * nfa,
14521420
fprintf(f, "\nfinal cleanup:\n");
14531421
#endif A3E2
14541422
cleanup(nfa); /* final tidying */
1423+
#ifdef REG_DEBUG
1424+
if (verbose)
1425+
dumpnfa(nfa, f);
1426+
#endif
14551427
return analyze(nfa); /* and analysis */
14561428
}
14571429

@@ -1568,7 +1540,7 @@ pull(struct nfa * nfa,
15681540
s = newstate(nfa);
15691541
if (NISERR())
15701542
return 0;
1571-
copyins(nfa, from, s, 1); /* duplicate inarcs */
1543+
copyins(nfa, from, s); /* duplicate inarcs */
15721544
cparc(nfa, con, s, to); /* move constraint arc */
15731545
freearc(nfa, con);
15741546
if (NISERR())
@@ -1735,7 +1707,7 @@ push(struct nfa * nfa,
17351707
s = newstate(nfa);
17361708
if (NISERR())
17371709
return 0;
1738-
copyouts(nfa, to, s, 1); /* duplicate outarcs */
1710+
copyouts(nfa, to, s); /* duplicate outarcs */
17391711
cparc(nfa, con, from, s); /* move constraint arc */
17401712
freearc(nfa, con);
17411713
if (NISERR())
@@ -2952,6 +2924,8 @@ dumpnfa(struct nfa * nfa,
29522924
{
29532925
#ifdef REG_DEBUG
29542926
struct state *s;
2927+
int nstates = 0;
2928+
int narcs = 0;
29552929

29562930
fprintf(f, "pre %d, post %d", nfa->pre->no, nfa->post->no);
29572931
if (nfa->bos[0] != COLORLESS)
@@ -2964,7 +2938,12 @@ dumpnfa(struct nfa * nfa,
29642938
fprintf(f, ", eol [%ld]", (long) nfa->eos[1]);
29652939
fprintf(f, "\n");
29662940
for (s = nfa->states; s != NULL; s = s->next)
2941+
{
29672942
dumpstate(s, f);
2943+
nstates++;
2944+
narcs += s->nouts;
2945+
}
2946+
fprintf(f, "total of %d states, %d arcs\n", nstates, narcs);
29682947
if (nfa->parent == NULL)
29692948
dumpcolors(nfa->cm, f);
29702949
fflush(f);

src/backend/regex/regcomp.c

Lines changed: 6 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -136,10 +136,10 @@ static int sortins_cmp(const void *, const void *);
136136
static void sortouts(struct nfa *, struct state *);
137137
static int sortouts_cmp(const void *, const void *);
138138
static void moveins(struct nfa *, struct state *, struct state *);
139-
static void copyins(struct nfa *, struct state *, struct state *, int);
139+
static void copyins(struct nfa *, struct state *, struct state *);
140140
static void mergeins(struct nfa *, struct state *, struct arc **, int);
141141
static void moveouts(struct nfa *, struct state *, struct state *);
142-
static void copyouts(struct nfa *, struct state *, struct state *, int);
142+
static void copyouts(struct nfa *, struct state *, struct state *);
143143
static void cloneouts(struct nfa *, struct state *, struct state *, struct state *, int);
144144
static void delsub(struct nfa *, struct state *, struct state *);
145145
static void deltraverse(struct nfa *, struct state *, struct state *);
@@ -181,7 +181,6 @@ static void dumpnfa(struct nfa *, FILE *);
181181
#ifdef REG_DEBUG
182182
static void dumpstate(struct state *, FILE *);
183183
static void dumparcs(struct state *, FILE *);
184-
static int dumprarcs(struct arc *, struct state *, FILE *, int);
185184
static void dumparc(struct arc *, struct state *, FILE *);
186185
static void dumpcnfa(struct cnfa *, FILE *);
187186
static void dumpcstate(int, struct cnfa *, FILE *);
@@ -613,7 +612,9 @@ makesearch(struct vars * v,
613612
for (s = slist; s != NULL; s = s2)
614613
{
615614
s2 = newstate(nfa);
616-
copyouts(nfa, s, s2, 1);
615+
NOERR();
616+
copyouts(nfa, s, s2);
617+
NOERR();
617618
for (a = s->ins; a != NULL; a = b)
618619
{
619620
b = a->inchain;
@@ -1997,7 +1998,7 @@ dump(regex_t *re,
19971998
dumpcolors(&g->cmap, f);
19981999
if (!NULLCNFA(g->search))
19992000
{
2000-
printf("\nsearch:\n");
2001+
fprintf(f, "\nsearch:\n");
20012002
dumpcnfa(&g->search, f);
20022003
}
20032004
for (i = 1; i < g->nlacons; i++)

0 commit comments

Comments
 (0)
0