10000 Back-patch fix for extraction of fixed prefixes from regular expressi… · markokr/postgres@18c8dc3 · GitHub
[go: up one dir, main page]

Skip to content

Commit 18c8dc3

Browse files
committed
Back-patch fix for extraction of fixed prefixes from regular expressions.
Back-patch of commits 628cbb5 and c6aae30. This has been broken since 7.3, so back-patch to all supported branches.
1 parent f12960d commit 18c8dc3

File tree

14 files changed

+505
-210
lines changed

14 files changed

+505
-210
lines changed

src/backend/regex/Makefile

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@ subdir = src/backend/regex
1212
top_builddir = ../../..
1313
include $(top_builddir)/src/Makefile.global
1414

15-
OBJS = regcomp.o regerror.o regexec.o regfree.o
15+
OBJS = regcomp.o regerror.o regexec.o regfree.o regprefix.o
1616

1717
include $(top_srcdir)/src/backend/common.mk
1818

src/backend/regex/README

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,12 +7,13 @@ So this file is an attempt to reverse-engineer some docs.
77
General source-file layout
88
--------------------------
99

10-
There are four separately-compilable source files, each exposing exactly
10+
There are five separately-compilable source files, each exposing exactly
1111
one exported function:
1212
regcomp.c: pg_regcomp
1313
regexec.c: pg_regexec
1414
regerror.c: pg_regerror
1515
regfree.c: pg_regfree
16+
regprefix.c: pg_regprefix
1617
(The pg_ prefixes were added by the Postgres project to distinguish this
1718
library version from any similar one that might be present on a particular
1819
system. They'd need to be removed or replaced in any standalone version
@@ -44,6 +45,7 @@ regexec.c Top-level regex execution code
4445
rege_dfa.c DFA creation and execution
4546
regerror.c pg_regerror: generate text for a regex error code
4647
regfree.c pg_regfree: API to free a no-longer-needed regex_t
48+
regprefix.c Code for extracting a common prefix from a regex_t
4749

4850
The locale-specific code is concerned primarily with case-folding and with
4951
expanding locale-specific character classes, such as [[:alnum:]]. It

src/backend/regex/regc_color.c

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -66,8 +66,9 @@ initcm(struct vars * v,
6666
cd = cm->cd; /* cm->cd[WHITE] */
6767
cd->sub = NOSUB;
6868
cd->arcs = NULL;
69-
cd->flags = 0;
69+
cd->firstchr = CHR_MIN;
7070
cd->nchrs = CHR_MAX - CHR_MIN + 1;
71+
cd->flags = 0;
7172

7273
/* upper levels of tree */
7374
for (t = &cm->tree[0], j = NBYTS - 1; j > 0; t = nextt, j--)
@@ -272,6 +273,7 @@ newcolor(struct colormap * cm)
272273
cd->nchrs = 0;
273274
cd->sub = NOSUB;
274275
cd->arcs = NULL;
276+
cd->firstchr = CHR_MIN; /* in case never set otherwise */
275277
cd->flags = 0;
276278
cd->block = NULL;
277279

@@ -371,6 +373,8 @@ subcolor(struct colormap * cm, chr c)
371373
if (co == sco) /* already in an open subcolor */
372374
return co; /* rest is redundant */
373375
cm->cd[co].nchrs--;
376+
if (cm->cd[sco].nchrs == 0)
377+
cm->cd[sco].firstchr = c;
374378
cm->cd[sco].nchrs++;
375379
setcolor(cm, c, sco);
376380
return sco;
@@ -438,6 +442,11 @@ subrange(struct vars * v,
438442

439443
/*
440444
* subblock - allocate new subcolors for one tree block of chrs, fill in arcs
445+
*
446+
* Note: subcolors that are created during execution of this function
447+
* will not be given a useful value of firstchr; it'll be left as CHR_MIN.
448+
* For the current usage of firstchr in pg_regprefix, this does not matter
449+
* because such subcolors won't occur in the common prefix of a regex.
441450
*/
442451
static void
443452
subblock(struct vars * v,

src/backend/regex/regc_nfa.c

Lines changed: 17 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -1330,14 +1330,16 @@ compact(struct nfa * nfa,
13301330
for (s = nfa->states; s != NULL; s = s->next)
13311331
{
13321332
nstates++;
1333-
narcs += 1 + s->nouts + 1;
1334-
/* 1 as a fake for flags, nouts for arcs, 1 as endmarker */
1333+
narcs += s->nouts + 1; /* need one extra for endmarker */
13351334
}
13361335

1336+
cnfa->stflags = (char *) MALLOC(nstates * sizeof(char));
13371337
cnfa->states = (struct carc **) MALLOC(nstates * sizeof(struct carc *));
13381338
cnfa->arcs = (struct carc *) MALLOC(narcs * sizeof(struct carc));
1339-
if (cnfa->states == NULL || cnfa->arcs == NULL)
1339+
if (cnfa->stflags == NULL || cnfa->states == NULL || cnfa->arcs == NULL)
13401340
{
1341+
if (cnfa->stflags != NULL)
1342+
FREE(cnfa->stflags);
13411343
if (cnfa->states != NULL)
13421344
FREE(cnfa->states);
13431345
if (cnfa->arcs != NULL)
@@ -1359,9 +1361,8 @@ compact(struct nfa * nfa,
13591361
for (s = nfa->states; s != NULL; s = s->next)
13601362
{
13611363
assert((size_t) s->no < nstates);
1364+
cnfa->stflags[s->no] = 0;
13621365
cnfa->states[s->no] = ca;
1363-
ca->co = 0; /* clear and skip flags "arc" */
1364-
ca++;
13651366
first = ca;
13661367
for (a = s->outs; a != NULL; a = a->outchain)
13671368
switch (a->type)
@@ -1392,8 +1393,8 @@ compact(struct nfa * nfa,
13921393

13931394
/* mark no-progress states */
13941395
for (a = nfa->pre->outs; a != NULL; a = a->outchain)
1395-
cnfa->states[a->to->no]->co = 1;
1396-
cnfa->states[nfa->pre->no]->co = 1;
1396+
cnfa->stflags[a->to->no] = CNFA_NOPROGRESS;
1397+
cnfa->stflags[nfa->pre->no] = CNFA_NOPROGRESS;
13971398
}
13981399

13991400
/*
@@ -1433,6 +1434,7 @@ freecnfa(struct cnfa * cnfa)
14331434
{
14341435
assert(cnfa->nstates != 0); /* not empty already */
14351436
cnfa->nstates = 0;
1437+
FREE(cnfa->stflags);
14361438
FREE(cnfa->states);
14371439
FREE(cnfa->arcs);
14381440
}
@@ -1617,7 +1619,7 @@ dumpcnfa(struct cnfa * cnfa,
16171619
fprintf(f, ", haslacons");
16181620
fprintf(f, "\n");
16191621
for (st = 0; st < cnfa->nstates; st++)
1620-
dumpcstate(st, cnfa->states[st], cnfa, f);
1622+
dumpcstate(st, cnfa, f);
16211623
fflush(f);
16221624
}
16231625
#endif
@@ -1629,22 +1631,20 @@ dumpcnfa(struct cnfa * cnfa,
16291631
*/
16301632
static void
16311633
dumpcstate(int st,
1632-
struct carc * ca,
16331634
struct cnfa * cnfa,
16341635
FILE *f)
16351636
{
1636-
int i;
1637+
struct carc * ca;
16371638
int pos;
16381639

1639-
fprintf(f, "%d%s", st, (ca[0].co) ? ":" : ".");
1640+
fprintf(f, "%d%s", st, (cnfa->stflags[st] & CNFA_NOPROGRESS) ? ":" : ".");
16401641
pos = 1;
1641-
for (i = 1; ca[i].co != COLORLESS; i++)
1642+
for (ca = cnfa->states[st]; ca->co != COLORLESS; ca++)
16421643
{
1643-
if (ca[i].co < cnfa->ncolors)
1644-
fprintf(f, "\t[%ld]->%d", (long) ca[i].co, ca[i].to);
1644+
if (ca->co < cnfa->ncolors)
1645+
fprintf(f, "\t[%ld]->%d", (long) ca->co, ca->to);
16451646
else
1646-
fprintf(f, "\t:%ld:->%d", (long) ca[i].co - cnfa->ncolors,
1647-
ca[i].to);
1647+
fprintf(f, "\t:%ld:->%d", (long) (ca->co - cnfa->ncolors), ca->to);
16481648
if (pos == 5)
16491649
{
16501650
fprintf(f, "\n");
@@ -1653,7 +1653,7 @@ dumpcstate(int st,
16531653
else
16541654
pos++;
16551655
}
1656-
if (i == 1 || pos != 1)
1656+
if (ca == cnfa->states[st] || pos != 1)
16571657
fprintf(f, "\n");
16581658
fflush(f);
16591659
}

src/backend/regex/regcomp.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -162,7 +162,7 @@ static void dumparcs(struct state *, FILE *);
162162
static int dumprarcs(struct arc *, struct state *, FILE *, int);
163163
static void dumparc(struct arc *, struct state *, FILE *);
164164
static void dumpcnfa(struct cnfa *, FILE *);
165-
static void dumpcstate(int, struct carc *, struct cnfa *, FILE *);
165+
static void dumpcstate(int, struct cnfa *, FILE *);
166166
#endif
167167
/* === regc_cvec.c === */
168168
static struct cvec *newcvec(int, int);

src/backend/regex/rege_dfa.c

Lines changed: 5 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -457,14 +457,14 @@ miss(struct vars * v, /* used only for debug flags */
457457
gotstate = 0;
458458
for (i = 0; i < d->nstates; i++)
459459
if (ISBSET(css->states, i))
460-
for (ca = cnfa->states[i] + 1; ca->co != COLORLESS; ca++)
460+
for (ca = cnfa->states[i]; ca->co != COLORLESS; ca++)
461461
if (ca->co == co)
462462
{
463463
BSET(d->work, ca->to);
464464
gotstate = 1;
465465
if (ca->to == cnfa->post)
466466
ispost = 1;
467-
if (!cnfa->states[ca->to]->co)
467+
if (!(cnfa->stflags[ca->to] & CNFA_NOPROGRESS))
468468
noprogress = 0;
469469
FDEBUG(("%d -> %d\n", i, ca->to));
470470
}
@@ -475,10 +475,9 @@ miss(struct vars * v, /* used only for debug flags */
475475
dolacons = 0;
476476
for (i = 0; i < d->nstates; i++)
477477
if (ISBSET(d->work, i))
478-
for (ca = cnfa->states[i] + 1; ca->co != COLORLESS;
479-
ca++)
478+
for (ca = cnfa->states[i]; ca->co != COLORLESS; ca++)
480479
{
481-
if (ca->co <= cnfa->ncolors)
480+
if (ca->co < cnfa->ncolors)
482481
continue; /* NOTE CONTINUE */
483482
sawlacons = 1;
484483
if (ISBSET(d->work, ca->to))
@@ -489,7 +488,7 @@ miss(struct vars * v, /* used only for debug flags */
489488
dolacons = 1;
490489
if (ca->to == cnfa->post)
491490
ispost = 1;
492-
if (!cnfa->states[ca->to]->co)
491+
if (!(cnfa->stflags[ca->to] & CNFA_NOPROGRESS))
493492
noprogress = 0;
494493
FDEBUG(("%d :> %d\n", i, ca->to));
495494
}

0 commit comments

Comments
 (0)
0