8000 modure: Update to re1.5 v0.6, support for char sets/classes ([a-c]). · lable/micropython@95908b0 · GitHub
[go: up one dir, main page]

Skip to content

Commit 95908b0

Browse files
committed
modure: Update to re1.5 v0.6, support for char sets/classes ([a-c]).
1 parent d27c0bb commit 95908b0

File tree

7 files changed

+74
-4
lines changed

7 files changed

+74
-4
lines changed

extmod/modure.c

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -38,7 +38,7 @@
3838

3939
#if MICROPY_PY_URE
4040

41-
#include "re1.5/regexp.h"
41+
#include "re1.5/re1.5.h"
4242

4343
#define FLAG_DEBUG 0x1000
4444

@@ -245,5 +245,6 @@ const mp_obj_module_t mp_module_ure = {
245245
#include "re1.5/compilecode.c"
246246
#include "re1.5/dumpcode.c"
247247
#include "re1.5/recursiveloop.c"
248+
#include "re1.5/charclass.c"
248249

249250
#endif //MICROPY_PY_URE

extmod/re1.5/charclass.c

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,11 @@
1+
#include "re1.5.h"
2+
3+
int _re1_5_classmatch(const char *pc, const char *sp)
4+
{
5+
// pc points to "cnt" byte after opcode
6+
int cnt = *pc++;
7+
while (cnt--) {
8+
if (!(*sp >= *pc && *sp <= pc[1])) return 0;
9+
}
10+
return 1;
11+
}

extmod/re1.5/compilecode.c

Lines changed: 31 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
// Use of this source code is governed by a BSD-style
33
// license that can be found in the LICENSE file.
44

5-
#include "regexp.h"
5+
#include "re1.5.h"
66

77
static void insert_code(char *code, int at, int num, int *pc)
88
{
@@ -45,6 +45,18 @@ int re1_5_sizecode(const char *re)
4545
break;
4646
case ')':
4747
break;
48+
case '[': {
49+
pc += 2;
50+
re++;
51+
while (*re != ']') {
52+
if (!*re) return -1;
53+
if (re[1] == '-') {
54+
re += 2;
55+
}
56+
pc += 2;
57+
re++;
58+
}
59+
}
4860
}
4961
}
5062

@@ -76,6 +88,24 @@ const char *_compilecode(const char *re, ByteProg *prog)
7688
EMIT(pc++, Any);
7789
prog->len++;
7890
break;
91+
case '[': {
92+
int cnt;
93+
term = pc;
94+
EMIT(pc++, Class);
95+
pc++; // Skip # of pair byte
96+
prog->len++;
97+
re++;
98+
for (cnt = 0; *re != ']'; re++, cnt++) {
99+
if (!*re) return NULL;
100+
EMIT(pc++, *re);
101+
if (re[1] == '-') {
102+
re += 2;
103+
}
104+
EMIT(pc++, *re);
105+
}
106+
EMIT(term + 1, cnt);
107+
break;
108+
}
79109
case '(':
80110
term = pc;
81111

extmod/re1.5/dumpcode.c

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
// Use of this source code is governed by a BSD-style
33
// license that can be found in the LICENSE file.
44

5-
#include "regexp.h"
5+
#include "re1.5.h"
66

77
void re1_5_dumpcode(ByteProg *prog)
88
{
@@ -32,6 +32,16 @@ void re1_5_dumpcode(ByteProg *prog)
3232
case Any:
3333
printf("any\n");
3434
break;
35+
case Class: {
36+
int num = code[pc++];
37+
printf("class %d", num);
38+
while (num--) {
39+
printf(" 0x%02x-0x%02x", code[pc], code[pc + 1]);
40+
pc += 2;
41+
}
42+
printf("\n");
43+
break;
44+
}
3545
case Match:
3646
printf("match\n");
3747
break;

extmod/re1.5/regexp.h renamed to extmod/re1.5/re1.5.h

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -80,14 +80,18 @@ enum /* Inst.opcode */
8080
CONSUMERS = 1,
8181
Char = CONSUMERS,
8282
Any,
83+
Class,
84+
8385
ASSERTS = 0x50,
8486
Bol = ASSERTS,
8587
Eol,
88+
8689
// Instructions which take relative offset as arg
8790
JUMPS = 0x60,
8891
Jmp = JUMPS,
8992
Split,
9093
RSplit,
94+
9195
// Other (special) instructions
9296
Save = 0x7e,
9397
Match = 0x7f,
@@ -139,5 +143,6 @@ int re1_5_sizecode(const char *re);
139143
int re1_5_compilecode(ByteProg *prog, const char *re);
140144
void re1_5_dumpcode(ByteProg *prog);
141145
void cleanmarks(ByteProg *prog);
146+
int _re1_5_classmatch(const char *pc, const char *sp);
142147

143148
#endif /*_RE1_5_REGEXP__H*/

extmod/re1.5/recursiveloop.c

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
// Use of this source code is governed by a BSD-style
33
// license that can be found in the LICENSE file.
44

5-
#include "regexp.h"
5+
#include "re1.5.h"
66

77
static int
88
recursiveloop(char *pc, const char *sp, Subject *input, const char **subp, int nsubp)
@@ -23,6 +23,12 @@ recursiveloop(char *pc, const char *sp, Subject *input, const char **subp, int n
2323
case Any:
2424
sp++;
2525
continue;
26+
case Class:
27+
if (!_re1_5_classmatch(pc, sp))
28+
return 0;
29+
pc += *(unsigned char*)pc * 2 + 1;
30+
sp++;
31+
continue;
2632
case Match:
2733
return 1;
2834
case Jmp:

tests/extmod/ure1.py

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,13 @ 8032 @
2020
except IndexError:
2121
print("IndexError")
2222

23+
r = re.compile("[a-c]")
24+
m = r.match("a")
25+
print(m.group(0))
26+
m = r.match("d")
27+
print(m)
28+
m = r.match("A")
29+
print(m)
2330

2431
r = re.compile("o+")
2532
m = r.search("foobar")

0 commit comments

Comments
 (0)
0