10000 extmod/re1.5: Check and report byte overflow errors in _compilecode. · sstobbe/micropython@172fb52 · GitHub
[go: up one dir, main page]

Skip to content

Commit 172fb52

Browse files
jeplerdpgeorge
authored andcommitted
extmod/re1.5: Check and report byte overflow errors in _compilecode.
The generated regex code is limited in the range of jumps and counts, and this commit checks all cases which can overflow given the right kind of input regex, and returns an error in such a case. This change assumes that the results that overflow an int8_t do not overflow a platform int. Closes: micropython#7078 Signed-off-by: Jeff Epler <jepler@gmail.com>
1 parent d35f12f commit 172fb52

File tree

3 files changed

+61
-11
lines changed

3 files changed

+61
-11
lines changed

extmod/re1.5/compilecode.c

Lines changed: 20 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -8,11 +8,20 @@
88
((code ? memmove(code + at + num, code + at, pc - at) : 0), pc += num)
99
#define REL(at, to) (to - at - 2)
1010
#define EMIT(at, byte) (code ? (code[at] = byte) : (at))
11+
#define EMIT_CHECKED(at, byte) (_emit_checked(at, code, byte, &err))
1112
#define PC (prog->bytelen)
1213

14+
static void _emit_checked(int at, char *code, int val, bool *err) {
15+
*err |= val != (int8_t)val;
16+
if (code) {
17+
code[at] = val;
18+
}
19+
}
20+
1321
static const char *_compilecode(const char *re, ByteProg *prog, int sizecode)
1422
{
1523
char *code = sizecode ? NULL : prog->insts;
24+
bool err = false;
1625
int start = PC;
1726
int term = PC;
1827
int alt_label = 0;
@@ -64,7 +73,7 @@ static const char *_compilecode(const char *re, ByteProg *prog, int sizecode)
6473
}
6574
EMIT(PC++, *re);
6675
}
67-
EMIT(term + 1, cnt);
76+
EMIT_CHECKED(term + 1, cnt);
6877
break;
6978
}
7079
case '(': {
@@ -75,7 +84,7 @@ static const char *_compilecode(const char *re, ByteProg *prog, int sizecode)
7584
if (capture) {
7685
sub = ++prog->sub;
7786
EMIT(PC++, Save);
78-
EMIT(PC++, 2 * sub);
87+
EMIT_CHECKED(PC++, 2 * sub);
7988
prog->len++;
8089
} else {
8190
re += 2;
@@ -86,7 +95,7 @@ static const char *_compilecode(const char *re, ByteProg *prog, int sizecode)
8695

8796
if (capture) {
8897
EMIT(PC++, Save);
89-
EMIT(PC++, 2 * sub + 1);
98+
EMIT_CHECKED(PC++, 2 * sub + 1);
9099
prog->len++;
91100
}
92101

@@ -101,23 +110,23 @@ static const char *_compilecode(const char *re, ByteProg *prog, int sizecode)
101110
} else {
102111
EMIT(term, Split);
103112
}
104-
EMIT(term + 1, REL(term, PC));
113+
EMIT_CHECKED(term + 1, REL(term, PC));
105114
prog->len++;
106115
term = PC;
107116
break;
108117
case '*':
109118
if (PC == term) return NULL; // nothing to repeat
110119
INSERT_CODE(term, 2, PC);
111120
EMIT(PC, Jmp);
112-
EMIT(PC + 1, REL(PC, term));
121+
EMIT_CHECKED(PC + 1, REL(PC, term));
113122
PC += 2;
114123
if (re[1] == '?') {
115124
EMIT(term, RSplit);
116125
re++;
117126
} else {
118127
EMIT(term, Split);
119128
}
120-
EMIT(term + 1, REL(term, PC));
129+
EMIT_CHECKED(term + 1, REL(term, PC));
121130
prog->len += 2;
122131
term = PC;
123132
break;
@@ -129,20 +138,20 @@ static const char *_compilecode(const char *re, ByteProg *prog, int sizecode)
129138
} else {
130139
EMIT(PC, RSplit);
131140
}
132-
EMIT(PC + 1, REL(PC, term));
141+
EMIT_CHECKED(PC + 1, REL(PC, term));
133142
PC += 2;
134143
prog->len++;
135144
term = PC;
136145
break;
137146
case '|':
138147
if (alt_label) {
139-
EMIT(alt_label, REL(alt_label, PC) + 1);
148+
EMIT_CHECKED(alt_label, REL(alt_label, PC) + 1);
140149
}
141150
INSERT_CODE(start, 2, PC);
142151
EMIT(PC++, Jmp);
143152
alt_label = PC++;
144153
EMIT(start, Split);
145-
EMIT(start + 1, REL(start, PC));
154+
EMIT_CHECKED(start + 1, REL(start, PC));
146155
prog->len += 2;
147156
term = PC;
148157
break;
@@ -160,9 +169,9 @@ static const char *_compilecode(const char *re, ByteProg *prog, int sizecode)
160169
}
161170

162171
if (alt_label) {
163-
EMIT(alt_label, REL(alt_label, PC) + 1);
172+
EMIT_CHECKED(alt_label, REL(alt_label, PC) + 1);
164173
}
165-
return re;
174+
return err ? NULL : re;
166175
}
167176

168177
int re1_5_sizecode(const char *re)

tests/extmod/ure_limit.py

Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,34 @@
1+
# Test overflow in ure.compile output code.
2+
3+
try:
4+
import ure as re
5+
except ImportError:
6+
print("SKIP")
7+
raise SystemExit
8+
9+
10+
def test_re(r):
11+
try:
12+
re.compile(r)
13+
except:
14+
print("Error")
15+
16+
17+
# too many chars in []
18+
test_re("[" + "a" * 256 + "]")
19+
20+
# too many groups
21+
test_re("(a)" * 256)
22+
23+
# jump too big for ?
24+
test_re("(" + "a" * 62 + ")?")
25+
26+
# jump too big for *
27+
test_re("(" + "a" * 60 + ".)*")
28+
test_re("(" + "a" * 60 + "..)*")
29+
30+
# jump too big for +
31+
test_re("(" + "a" * 62 + ")+")
32+
33+
# jump too big for |
34+
test_re("b" * 63 + "|a")

tests/extmod/ure_limit.py.exp

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
Error
2+
Error
3+
Error
4+
Error
5+
Error
6+
Error
7+
Error

0 commit comments

Comments
 (0)
0