|
28 | 28 | POSSESSIVE_REPEAT: (POSSESSIVE_REPEAT, SUCCESS, POSSESSIVE_REPEAT_ONE),
|
29 | 29 | }
|
30 | 30 |
|
| 31 | +_CHARSET_ALL = [(NEGATE, None)] |
| 32 | + |
31 | 33 | def _combine_flags(flags, add_flags, del_flags,
|
32 | 34 | TYPE_FLAGS=_parser.TYPE_FLAGS):
|
33 | 35 | if add_flags & TYPE_FLAGS:
|
@@ -84,17 +86,22 @@ def _compile(code, pattern, flags):
|
84 | 86 | code[skip] = _len(code) - skip
|
85 | 87 | elif op is IN:
|
86 | 88 | charset, hascased = _optimize_charset(av, iscased, tolower, fixes)
|
87 |
| - if flags & SRE_FLAG_IGNORECASE and flags & SRE_FLAG_LOCALE: |
88 |
| - emit(IN_LOC_IGNORE) |
89 |
| - elif not hascased: |
90 |
| - emit(IN) |
91 |
| - elif not fixes: # ascii |
92 |
| - emit(IN_IGNORE) |
| 89 | + if not charset: |
| 90 | + emit(FAILURE) |
| 91 | + elif charset == _CHARSET_ALL: |
| 92 | + emit(ANY_ALL) |
93 | 93 | else:
|
94 |
| - emit(IN_UNI_IGNORE) |
95 |
| - skip = _len(code); emit(0) |
96 |
| - _compile_charset(charset, flags, code) |
97 |
| - code[skip] = _len(code) - skip |
| 94 | + if flags & SRE_FLAG_IGNORECASE and flags & SRE_FLAG_LOCALE: |
| 95 | + emit(IN_LOC_IGNORE) |
| 96 | + elif not hascased: |
| 97 | + emit(IN) |
| 98 | + elif not fixes: # ascii |
| 99 | + emit(IN_IGNORE) |
| 100 | + else: |
| 101 | + emit(IN_UNI_IGNORE) |
| 102 | + skip = _len(code); emit(0) |
| 103 | + _compile_charset(charset, flags, code) |
| 104 | + code[skip] = _len(code) - skip |
98 | 105 | elif op is ANY:
|
99 | 106 | if flags & SRE_FLAG_DOTALL:
|
100 | 107 | emit(ANY_ALL)
|
@@ -277,6 +284,10 @@ def _optimize_charset(charset, iscased=None, fixup=None, fixes=None):
|
277 | 284 | charmap[i] = 1
|
278 | 285 | elif op is NEGATE:
|
279 | 286 | out.append((op, av))
|
| 287 | + elif op is CATEGORY and tail and (CATEGORY, CH_NEGATE[av]) in tail: |
| 288 | + # Optimize [\s\S] etc. |
| 289 | + out = [] if out else _CHARSET_ALL |
| 290 | + return out, False |
280 | 291 | else:
|
281 | 292 | tail.append((op, av))
|
282 | 293 | except IndexError:
|
@@ -519,13 +530,18 @@ def _compile_info(code, pattern, flags):
|
519 | 530 | # look for a literal prefix
|
520 | 531 | prefix = []
|
521 | 532 | prefix_skip = 0
|
522 |
| - charset = [] # not used |
| 533 | + charset = None # not used |
523 | 534 | if not (flags & SRE_FLAG_IGNORECASE and flags & SRE_FLAG_LOCALE):
|
524 | 535 | # look for literal prefix
|
525 | 536 | prefix, prefix_skip, got_all = _get_literal_prefix(pattern, flags)
|
526 | 537 | # if no prefix, look for charset prefix
|
527 | 538 | if not prefix:
|
528 | 539 | charset = _get_charset_prefix(pattern, flags)
|
| 540 | + if charset: |
| 541 | + charset, hascased = _optimize_charset(charset) |
| 542 | + assert not hascased |
| 543 | + if charset == _CHARSET_ALL: |
| 544 | + charset = None |
529 | 545 | ## if prefix:
|
530 | 546 | ## print("*** PREFIX", prefix, prefix_skip)
|
531 | 547 | ## if charset:
|
@@ -560,8 +576,6 @@ def _compile_info(code, pattern, flags):
|
560 | 576 | # generate overlap table
|
561 | 577 | code.extend(_generate_overlap_table(prefix))
|
562 | 578 | elif charset:
|
563 |
| - charset, hascased = _optimize_charset(charset) |
564 |
| - assert not hascased |
565 | 579 | _compile_charset(charset, flags, code)
|
566 | 580 | code[skip] = len(code) - skip
|
567 | 581 |
|
|
0 commit comments