8000 Removed cached_property definition of mayReturnEmpty (conflicted with… · pyparsing/pyparsing@4bb24ba · GitHub
[go: up one dir, main page]

Skip to content

Commit 4bb24ba

Browse files
committed
Removed cached_property definition of mayReturnEmpty (conflicted with attribute defined in ParserElement.__init__), moved to computation of cached_property re; added check in Regex parseImpl* methods to raise ParseException if expr.mayReturnEmpty and matching beyond the end of the input string; added tests for deferred re compile in Regex, and refactored repetitive code in testParseUsingRegex
1 parent fccc7a8 commit 4bb24ba

File tree

4 files changed

+88
-71
lines changed

4 files changed

+88
-71
lines changed

CHANGES

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -57,6 +57,12 @@ Version 3.2.2 - under development
5757
question https://stackoverflow.com/questions/79327649 by Ben Alan). Also addressed
5858
bug in resolving PEP8 compliant argument name and legacy argument name.
5959

60+
- Fixed bug in `rest_of_line` and the underlying `Regex` class, in which matching a
61+
pattern that could match an empty string (such as `".*"` or `"[A-Z]*"` would not raise
62+
a `ParseException` at or beyond the end of the input string. This could cause an
63+
infinite parsing loop when parsing `rest_of_line` at the end of the input string.
64+
Reported by user Kylotan, thanks! (Issue #593)
65+
6066
- Better exception message for `MatchFirst` and `Or` expressions, showing all alternatives
6167
rather than just the first one. Fixes Issue #592, reported by Focke, thanks!
6268

pyparsing/__init__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -121,7 +121,7 @@ def __repr__(self):
121121

122122

123123
__version_info__ = version_info(3, 2, 2, "final", 1)
124-
__version_time__ = "14 Mar 2025 00:02 UTC"
124+
__version_time__ = "16 Mar 2025 21:18 UTC"
125125
__version__ = __version_info__.__version__
126126
__versionTime__ = __version_time__
127127
__author__ = "Paul McGuire <ptmcg.gm+pyparsing@gmail.com>"

pyparsing/core.py

Lines changed: 15 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -3123,23 +3123,27 @@ def re(self) -> re.Pattern:
31233123

31243124
try:
31253125
self._re = re.compile(self.pattern, self.flags)
3126-
return self._re
31273126
except re.error:
31283127
raise ValueError(f"invalid pattern ({self.pattern!r}) passed to Regex")
3128+
else:
3129+
self.mayReturnEmpty = self.re.match("", pos=0) is not None
3130+
return self._re
31293131

31303132
@cached_property
31313133
def re_match(self) -> Callable[[str, int], Any]:
31323134
return self.re.match
31333135

3134-
@cached_property
3135-
def mayReturnEmpty(self) -> bool: # type: ignore[override]
3136-
return self.re_match("", 0) is not None
3137-
31383136
def _generateDefaultName(self) -> str:
31393137
unescaped = repr(self.pattern).replace("\\\\", "\\")
31403138
return f"Re:({unescaped})"
31413139

31423140
def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType:
3141+
# explicit check for matching past the length of the string;
3142+
# this is done because the re module will not complain about
3143+
# a match with `pos > len(instring)`, it will just return ""
3144+
if loc > len(instring) and self.mayReturnEmpty:
3145+
raise ParseException(instring, loc, self.errmsg, self)
3146+
31433147
result = self.re_match(instring, loc)
31443148
if not result:
31453149
raise ParseException(instring, loc, self.errmsg, self)
@@ -3154,6 +3158,9 @@ def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType:
31543158
return loc, ret
31553159

31563160
def parseImplAsGroupList(self, instring, loc, do_actions=True):
3161+
if loc > len(instring) and self.mayReturnEmpty:
3162+
raise ParseException(instring, loc, self.errmsg, self)
3163+
31573164
result = self.re_match(instring, loc)
31583165
if not result:
31593166
raise ParseException(instring, loc, self.errmsg, self)
@@ -3163,6 +3170,9 @@ def parseImplAsGroupList(self, instring, loc, do_actions=True):
31633170
return loc, ret
31643171

31653172
def parseImplAsMatch(self, instring, loc, do_actions=True):
3173+
if loc > len(instring) and self.mayReturnEmpty:
3174+
raise ParseException(instring, loc, self.errmsg, self)
3175+
31663176
result = self.re_match(instring, loc)
31673177
if not result:
31683178
raise ParseException(instring, loc, self.errmsg, self)

tests/test_unit.py

Lines changed: 66 additions & 65 deletions
Origi 9E88 nal file line numberDiff line numberDiff line change
@@ -4208,6 +4208,16 @@ def testUpcaseDowncaseUnicode(self):
42084208
+ td_end.suppress()
42094209
)
42104210

4211+
def testRegexDeferredCompile(self):
4212+
"""test deferred compilation of Regex patterns"""
4213+
re_expr = pp.Regex(r"[A-Z]*")
4214+
self.assertFalse(re_expr.mayReturnEmpty, "failed to initialize mayReturnEmpty flag to False")
4215+
self.assertEqual(re_expr._re, None)
4216+
4217+
compiled = re_expr.re
4218+
self.assertTrue(re_expr.mayReturnEmpty, "failed to set mayReturnEmpty flag to True")
4219+
self.assertEqual(re_expr._re, compiled)
4220+
42114221
def testParseUsingRegex(self):
42124222
signedInt = pp.Regex(r"[-+][0-9]+")
42134223
unsignedInt = pp.Regex(r"[0-9]+")
@@ -4225,6 +4235,7 @@ def testMatch(expression, instring, shouldPass, expectedString=None):
42254235
print(
42264236
f"\tproduced {repr(result[0])} instead of {repr(expectedString)}"
42274237
)
4238+
return False
42284239
return True
42294240
except pp.ParseException:
42304241
print(f"{expression!r} incorrectly failed to match {instring!r}")
@@ -4239,73 +4250,56 @@ def testMatch(expression, instring, shouldPass, expectedString=None):
42394250
return False
42404251

42414252
# These should fail
4242-
self.assertTrue(
4243-
testMatch(signedInt, "1234 foo", False), "Re: (1) passed, expected fail"
4244-
)
4245-
self.assertTrue(
4246-
testMatch(signedInt, " +foo", False), "Re: (2) passed, expected fail"
4247-
)
4248-
self.assertTrue(
4249-
testMatch(unsignedInt, "abc", False), "Re: (3) passed, expected fail"
4250-
)
4251-
self.assertTrue(
4252-
testMatch(unsignedInt, "+123 foo", False), "Re: (4) passed, expected fail"
4253-
)
4254-
self.assertTrue(
4255-
testMatch(simpleString, "foo", False), "Re: (5) passed, expected fail"
4256-
)
4257-
self.assertTrue(
4258-
testMatch(simpleString, "\"foo bar'", False),
4259-
"Re: (6) passed, expected fail",
4260-
)
4261-
self.assertTrue(
4262-
testMatch(simpleString, "'foo bar\"", False),
4263-
"Re: (7) passed, expected fail",
4264-
)
4253+
for i, (test_expr, test_string) in enumerate(
4254+
[
4255+
(signedInt, "1234 foo"),
4256+
(signedInt, " +foo"),
4257+
(unsignedInt, "abc"),
4258+
(unsignedInt, "+123 foo"),
4259+
(simpleString, "foo"),
4260+
(simpleString, "\"foo bar'"),
4261+
(simpleString, "'foo bar\""),
4262+
(compiledRE, "blah"),
4263+
],
4264+
start = 1
4265+
):
4266+
with self.subTest(test_expr=test_expr, test_string=test_string):
4267+
self.assertTrue(
4268+
testMatch(
4269+
test_expr,
4270+
test_string,
4271+
False,
4272+
),
4273+
f"Re: ({i}) passed, expected fail",
4274+
)
42654275

42664276
# These should pass
4267-
self.assertTrue(
4268-
testMatch(signedInt, " +123", True, "+123"),
4269-
"Re: (8) failed, expected pass",
4270-
)
4271-
self.assertTrue(
4272-
testMatch(signedInt, "+123", True, "+123"), "Re: (9) failed, expected pass"
4273-
)
4274-
self.assertTrue(
4275-
testMatch(signedInt, "+123 foo", True, "+123"),
4276-
"Re: (10) failed, expected pass",
4277-
)
4278-
self.assertTrue(
4279-
testMatch(signedInt, "-0 foo", True, "-0"), "Re: (11) failed, expected pass"
4280-
)
4281-
self.assertTrue(
4282-
testMatch(unsignedInt, "123 foo", True, "123"),
4283-
"Re: (12) failed, expected pass",
4284-
)
4285-
self.assertTrue(
4286-
testMatch(unsignedInt, "0 foo", True, "0"), "Re: (13) failed, expected pass"
4287-
)
4288-
self.assertTrue(
4289-
testMatch(simpleString, '"foo"', True, '"foo"'),
4290-
"Re: (14) failed, expected pass",
4291-
)
4292-
self.assertTrue(
4293-
testMatch(simpleString, "'foo bar' baz", True, "'foo bar'"),
4294-
"Re: (15) failed, expected pass",
4295-
)
4296-
4297-
self.assertTrue(
4298-
testMatch(compiledRE, "blah", False), "Re: (16) passed, expected fail"
4299-
)
4300-
self.assertTrue(
4301-
testMatch(compiledRE, "BLAH", True, "BLAH"),
4302-
"Re: (17) failed, expected pass",
4303-
)
4277+
for i, (test_expr, test_string, expected_match) in enumerate(
4278+
[
4279+
(signedInt, " +123", "+123"),
4280+
(signedInt, "+123", "+123"),
4281+
(signedInt, "+123 foo", "+123"),
4282+
(signedInt, "-0 foo", "-0"),
4283+
(unsignedInt, "123 foo", "123"),
4284+
(unsignedInt, "0 foo", "0"),
4285+
(simpleString, '"foo"', '"foo"'),
4286+
(simpleString, "'foo bar' baz", "'foo bar'"),
4287+
(compiledRE, "BLAH", "BLAH"),
4288+
(namedGrouping, '"foo bar" baz', '"foo bar"'),
4289+
],
4290+
start = i + 1
4291+
):
4292+
with self.subTest(test_expr=test_expr, test_string=test_string):
4293+
self.assertTrue(
4294+
testMatch(
4295+
test_expr,
4296+
test_string,
4297+
True,
4298+
expected_match,
4299+
),
4300+
f"Re: ({i}) failed, expected pass",
4301+
)
43044302

4305-
self.assertTrue(
4306-
testMatch(namedGrouping, '"foo bar" baz', True, '"foo bar"'),
4307-
"Re: (16) failed, expected pass",
4308-
)
43094303
ret = namedGrouping.parseString('"zork" blah', parseAll=False)
43104304
print(ret)
43114305
print(list(ret.items()))
@@ -4330,7 +4324,7 @@ def testMatch(expression, instring, shouldPass, expectedString=None):
43304324
with self.assertRaises(
43314325
ValueError, msg="failed to warn empty string passed to Regex"
43324326
):
4333-
pp.Regex("").re
4327+
pp.Regex("").re # noqa
43344328

43354329
def testRegexAsType(self):
43364330
test_str = "sldkjfj 123 456 lsdfkj"
@@ -4426,6 +4420,13 @@ def testRegexInvalidType(self):
44264420
with self.assertRaises(TypeError, msg="issue with Regex of type int"):
44274421
expr = pp.Regex(12)
44284422

4423+
def testRegexLoopPastEndOfString(self):
4424+
"""test Regex matching after end of string"""
4425+
NL = pp.LineEnd().suppress()
4426+
empty_line = pp.rest_of_line() + NL
4427+
result = empty_line[1, 10].parse_string("\n\n")
4428+
self.assertEqual(3, len(result))
4429+
44294430
def testPrecededBy(self):
44304431
num = pp.Word(pp.nums).setParseAction(lambda t: int(t[0]))
44314432
interesting_num = pp.PrecededBy(pp.Char("abc")("prefix*")) + num

0 commit comments

Comments
 (0)
0