8000 Merge branch 'pyparsing_3.1.x' · pyparsing/pyparsing@1355e76 · GitHub
[go: up one dir, main page]

Skip to content

Commit 1355e76

Browse files
committed
Merge branch 'pyparsing_3.1.x'
2 parents 173bc16 + 5b939cc commit 1355e76

File tree

7 files changed

+169
-114
lines changed

7 files changed

+169
-114
lines changed

CHANGES

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,12 @@ help from Devin J. Pohly in structuring the code to enable this peaceful transit
1212

1313
Version 3.2.0 will also discontinue support for Python versions 3.6 and 3.7.
1414

15+
16+
Version 3.1.1 - (in development)
17+
--------------------------------
18+
- Some general internal code cleanup. (Instigated by Michal Čihař, Issue #488)
19+
20+
1521
Version 3.1.0 - June, 2023
1622
--------------------------
1723
- Added `tag_emitter.py` to examples. This example demonstrates how to insert

examples/booleansearchparser.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -90,9 +90,11 @@
9090
Suppress,
9191
OneOrMore,
9292
one_of,
93+
ParserElement,
9394
)
9495
import re
9596

97+
ParserElement.enablePackrat()
9698

9799
# Updated on 02 Dec 2021 according to ftp://ftp.unicode.org/Public/UNIDATA/Blocks.txt
98100
# (includes characters not found in the BasicMultilingualPlane)

examples/stackish.py

Lines changed: 32 additions & 44 deletions
Original file line numberDiff line numberDiff line change
@@ -28,72 +28,60 @@
2828
separation character and perform reasonable diffs on two structures.
2929
"""
3030

31-
from pyparsing import (
32-
Suppress,
33-
Word,
34-
nums,
35-
alphas,
36-
alphanums,
37-
Combine,
38-
oneOf,
39-
Optional,
40-
QuotedString,
41-
Forward,
42-
Group,
43-
ZeroOrMore,
44-
srange,
45-
pyparsing_common as ppc,
46-
)
31+
import pyparsing as pp
32+
ppc = pp.common
4733

48-
MARK, UNMARK, AT, COLON, QUOTE = map(Suppress, "[]@:'")
34+
MARK, UNMARK, AT, COLON, QUOTE = pp.Suppress.using_each("[]@:'")
4935

5036
NUMBER = ppc.integer()
5137
FLOAT = ppc.real()
52-
STRING = QuotedString('"', multiline=True) | QuotedString("'", multiline=True)
53-
WORD = Word(alphas, alphanums + "_:")
54-
ATTRIBUTE = Combine(AT + WORD)
38+
STRING = pp.QuotedString('"', multiline=True) | pp.QuotedString("'", multiline=True)
39+
WORD = pp.DelimitedList(pp.Word(pp.alphas, pp.alphanums + "_"), delim=":", combine=True)
40+
ATTRIBUTE = pp.Combine(AT + WORD)
5541

56-
strBody = Forward()
42+
str_body = pp.Forward()
5743

5844

59-
def setBodyLength(tokens):
60-
strBody << Word(srange(r"[\0x00-\0xffff]"), exact=int(tokens[0]))
45+
def set_body_length(tokens):
46+
str_body << pp.Word(pp.srange(r"[\0x00-\0xffff]"), exact=int(tokens[0]))
6147
return ""
6248

6349

64-
BLOB = Combine(
65-
QUOTE + Word(nums).setParseAction(setBodyLength) + COLON + strBody + QUOTE
50+
BLOB = pp.Combine(
51+
QUOTE + pp.Word(pp.nums).set_parse_action(set_body_length) + COLON + str_body + QUOTE
6652
)
6753

68-
item = Forward()
69-
7054

71-
def assignUsing(s):
72-
def assignPA(tokens):
55+
def assign_using(s):
56+
def assign_pa(tokens):
7357
if s in tokens:
7458
tokens[tokens[s]] = tokens[0]
7559
del tokens[s]
7660

77-
return assignPA
61+
return assign_pa
7862

7963

64+
item = pp.Forward()
65+
8066
GROUP = (
8167
MARK
82-
+ Group(
83-
ZeroOrMore(
84-
(item + Optional(ATTRIBUTE)("attr")).setParseAction(assignUsing("attr"))
85-
)
68+
+ pp.Group(
69+
(item + ATTRIBUTE[0, 1]("attr")).set_parse_action(assign_using("attr"))[...]
8670
)
8771
+ (WORD("name") | UNMARK)
88-
).setParseAction(assignUsing("name"))
72+
).set_parse_action(assign_using("name"))
8973
item <<= FLOAT | NUMBER | STRING | BLOB | GROUP
9074

91-
item.runTests(
92-
"""\
93-
[ '10:1234567890' @name 25 @age +0.45 @percentage person:zed
94-
[ [ "hello" 1 child root
95-
[ "child" [ 200 '4:like' "I" "hello" things root
96-
[ [ "data" [ 2 1 ] @numbers child root
97-
[ [ 1 2 3 ] @test 4 5 6 root
98-
"""
99-
)
75+
if __name__ == '__main__':
76+
77+
success, _ = item.run_tests(
78+
"""\
79+
[ '10:1234567890' @name 25 @age +0.45 @percentage person:zed
80+
[ [ "hello" 1 child root
81+
[ "child" [ 200 '4:like' "I" "hello" things root
82+
[ [ "data" [ 2 1 ] @numbers child root
83+
[ [ 1 2 3 ] @test 4 5 6 root
84+
"""
85+
)
86+
87+
assert success

pyparsing/__init__.py

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -120,8 +120,8 @@ def __repr__(self):
120120
return f"{__name__}.{type(self).__name__}({', '.join('{}={!r}'.format(*nv) for nv in zip(self._fields, self))})"
121121

122122

123-
__version_info__ = version_info(3, 1, 0, "final", 1)
124-
__version_time__ = "18 Jun 2023 14:05 UTC"
123+
__version_info__ = version_info(3, 1, 1, "final", 1)
124+
__version_time__ = "30 Jun 2023 05:39 UTC"
125125
__version__ = __version_info__.__version__
126126
__versionTime__ = __version_time__
127127
__author__ = "Paul McGuire <ptmcg.gm+pyparsing@gmail.com>"
@@ -319,4 +319,7 @@ def __repr__(self):
319319
"unicodeString",
320320
"withAttribute",
321321
"withClass",
322+
"common",
323+
"unicode",
324+
"testing",
322325
]

pyparsing/core.py

Lines changed: 83 additions & 67 deletions
Original file line numberDiff line numberDiff line change
@@ -3224,98 +3224,100 @@ def __init__(
32243224
convertWhitespaceEscapes: bool = True,
32253225
):
32263226
super().__init__()
3227-
escChar = escChar or esc_char
3228-
escQuote = escQuote or esc_quote
3229-
unquoteResults = unquoteResults and unquote_results
3230-
endQuoteChar = endQuoteChar or end_quote_char
3231-
convertWhitespaceEscapes = (
3227+
esc_char = escChar or esc_char
3228+
esc_quote = escQuote or esc_quote
3229+
unquote_results = unquoteResults and unquote_results
3230+
end_quote_char = endQuoteChar or end_quote_char
3231+
convert_whitespace_escapes = (
32323232
convertWhitespaceEscapes and convert_whitespace_escapes
32333233
)
32343234
quote_char = quoteChar or quote_char
32353235

3236-
# remove white space from quote chars - wont work anyway
3236+
# remove white space from quote chars
32373237
quote_char = quote_char.strip()
32383238
if not quote_char:
32393239
raise ValueError("quote_char cannot be the empty string")
32403240

3241-
if endQuoteChar is None:
3242-
endQuoteChar = quote_char
3241+
if end_quote_char is None:
3242+
end_quote_char = quote_char
32433243
else:
3244-
endQuoteChar = endQuoteChar.strip()
3245-
if not endQuoteChar:
3244+
end_quote_char = end_quote_char.strip()
3245+
if not end_quote_char:
32463246
raise ValueError("end_quote_char cannot be the empty string")
32473247

3248-
self.quoteChar: str = quote_char
3249-
self.quoteCharLen: int = len(quote_char)
3250-
self.firstQuoteChar: str = quote_char[0]
3251-
self.endQuoteChar: str = endQuoteChar
3252-
self.endQuoteCharLen: int = len(endQuoteChar)
3253-
self.escChar: str = escChar or ""
3254-
self.escQuote: str = escQuote or ""
3255-
self.unquoteResults: bool = unquoteResults
3256-
self.convertWhitespaceEscapes: bool = convertWhitespaceEscapes
3248+
self.quote_char: str = quote_char
3249+
self.quote_char_len: int = len(quote_char)
3250+
self.first_quote_char: str = quote_char[0]
3251+
self.end_quote_char: str = end_quote_char
3252+
self.end_quote_char_len: int = len(end_quote_char)
3253+
self.esc_char: str = esc_char or ""
3254+
self.has_esc_char: bool = esc_char is not None
3255+
self.esc_quote: str = esc_quote or ""
3256+
self.unquote_results: bool = unquote_results
3257+
self.convert_whitespace_escapes: bool = convert_whitespace_escapes
32573258
self.multiline = multiline
3259+
self.re_flags = re.RegexFlag(0)
32583260

3259-
sep = ""
3260-
inner_pattern = ""
3261+
# fmt: off
3262+
# build up re pattern for the content between the quote delimiters
3263+
inner_pattern = []
32613264

3262-
if escQuote:
3263-
inner_pattern += rf"{sep}(?:{re.escape(escQuote)})"
3264-
sep = "|"
3265+
if esc_quote:
3266+
inner_pattern.append(rf"(?:{re.escape(esc_quote)})")
32653267

3266-
if escChar:
3267-
inner_pattern += rf"{sep}(?:{re.escape(escChar)}.)"
3268-
sep = "|"
3269-
self.escCharReplacePattern = re.escape(escChar) + "(.)"
3268+
if esc_char:
3269+
inner_pattern.append(rf"(?:{re.escape(esc_char)}.)")
32703270

3271-
if len(self.endQuoteChar) > 1:
3272-
inner_pattern += (
3273-
f"{sep}(?:"
3271+
if len(self.end_quote_char) > 1:
3272+
inner_pattern.append(
3273+
"(?:"
32743274
+ "|".join(
3275-
f"(?:{re.escape(self.endQuoteChar[:i])}(?!{re.escape(self.endQuoteChar[i:])}))"
3276-
for i in range(len(self.endQuoteChar) - 1, 0, -1)
3275+
f"(?:{re.escape(self.end_quote_char[:i])}(?!{re.escape(self.end_quote_char[i:])}))"
3276+
for i in range(len(self.end_quote_char) - 1, 0, -1)
32773277
)
32783278
+ ")"
32793279
)
3280-
sep = "|"
32813280

3282-
self.flags = re.RegexFlag(0)
3283-
3284-
if multiline:
3285-
self.flags = re.MULTILINE | re.DOTALL
3286-
inner_pattern += (
3287-
rf"{sep}(?:[^{_escape_regex_range_chars(self.endQuoteChar[0])}"
3288-
rf"{(_escape_regex_range_chars(escChar) if escChar is not None else '')}])"
3281+
if self.multiline:
3282+
self.re_flags |= re.MULTILINE | re.DOTALL
3283+
inner_pattern.append(
3284+
rf"(?:[^{_escape_regex_range_chars(self.end_quote_char[0])}"
3285+
rf"{(_escape_regex_range_chars(esc_char) if self.has_esc_char else '')}])"
32893286
)
32903287
else:
3291-
inner_pattern += (
3292-
rf"{sep}(?:[^{_escape_regex_range_chars(self.endQuoteChar[0])}\n\r"
3293-
rf"{(_escape_regex_range_chars(escChar) if escChar is not None else '')}])"
3288+
inner_pattern.append(
3289+
rf"(?:[^{_escape_regex_range_chars(self.end_quote_char[0])}\n\r"
3290+
rf"{(_escape_regex_range_chars(esc_char) if self.has_esc_char else '')}])"
32943291
)
32953292

32963293
self.pattern = "".join(
32973294
[
3298-
re.escape(self.quoteChar),
3295+
re.escape(self.quote_char),
32993296
"(?:",
3300-
inner_pattern,
3297+
'|'.join(inner_pattern),
33013298
")*",
3302-
re.escape(self.endQuoteChar),
3299+
re.escape(self.end_quote_char),
33033300
]
33043301
)
33053302

3306-
if self.unquoteResults:
3307-
if self.convertWhitespaceEscapes:
3303+
if self.unquote_results:
3304+
if self.convert_whitespace_escapes:
33083305
self.unquote_scan_re = re.compile(
3309-
rf"({'|'.join(re.escape(k) for k in self.ws_map)})|({re.escape(self.escChar)}.)|(\n|.)",
3310-
flags=self.flags,
3306+
rf"({'|'.join(re.escape(k) for k in self.ws_map)})"
3307+
rf"|({re.escape(self.esc_char)}.)"
3308+
rf"|(\n|.)",
3309+
flags=self.re_flags,
33113310
)
33123311
else:
33133312
self.unquote_scan_re = re.compile(
3314-
rf"({re.escape(self.escChar)}.)|(\n|.)", flags=self.flags
3313+
rf"({re.escape(self.esc_char)}.)"
3314+
rf"|(\n|.)",
3315+
flags=self.re_flags
33153316
)
3317+
# fmt: on
33163318

33173319
try:
3318-
self.re = re.compile(self.pattern, self.flags)
3320+
self.re = re.compile(self.pattern, self.re_flags)
33193321
self.reString = self.pattern
33203322
self.re_match = self.re.match
33213323
except re.error:
@@ -3326,46 +3328,60 @@ def __init__(
33263328
self.mayReturnEmpty = True
33273329

33283330
def _generateDefaultName(self) -> str:
3329-
if self.quoteChar == self.endQuoteChar and isinstance(self.quoteChar, str_type):
3330-
return f"string enclosed in {self.quoteChar!r}"
3331+
if self.quote_char == self.end_quote_char and isinstance(
3332+
self.quote_char, str_type
3333+
):
3334+
return f"string enclosed in {self.quote_char!r}"
33313335

3332-
return f"quoted string, starting with {self.quoteChar} ending with {self.endQuoteChar}"
3336+
return f"quoted string, starting with {self.quote_char} ending with {self.end_quote_char}"
33333337

33343338
def parseImpl(self, instring, loc, doActions=True):
3339+
# check first character of opening quote to see if that is a match
3340+
# before doing the more complicated regex match
33353341
result = (
3336-
instring[loc] == self.firstQuoteChar
3342+
instring[loc] == self.first_quote_char
33373343
and self.re_match(instring, loc)
33383344
or None
33393345
)
33403346
if not result:
33413347
raise ParseException(instring, loc, self.errmsg, self)
33423348

3349+
# get ending loc and matched string from regex matching result
33433350
loc = result.end()
33443351
ret = result.group()
33453352

3346-
if self.unquoteResults:
3353+
if self.unquote_results:
33473354
# strip off quotes
3348-
ret = ret[self.quoteCharLen : -self.endQuoteCharLen]
3355+
ret = ret[self.quote_char_len : -self.end_quote_char_len]
33493356

33503357
if isinstance(ret, str_type):
3351-
if self.convertWhitespaceEscapes:
3358+
# fmt: off
3359+
if self.convert_whitespace_escapes:
3360+
# as we iterate over matches in the input string,
3361+
# collect from whichever match group of the unquote_scan_re
3362+
# regex matches (only 1 group will match at any given time)
33523363
ret = "".join(
3353-
self.ws_map[match.group(1)]
3354-
if match.group(1)
3355-
else match.group(2)[-1]
3356-
if match.group(2)
3364+
# match group 1 matches \t, \n, etc.
3365+
self.ws_map[match.group(1)] if match.group(1)
3366+
# match group 2 matches escaped characters
3367+
else match.group(2)[-1] if match.group(2)
3368+
# match group 3 matches any character
33573369
else match.group(3)
33583370
for match in self.unquote_scan_re.finditer(ret)
33593371
)
33603372
else:
33613373
ret = "".join(
3362-
match.group(1)[-1] if match.group(1) else match.group(2)
3374+
# match group 1 matches escaped characters
3375+
match.group(1)[-1] if match.group(1)
3376+
# match group 2 matches any character
3377+
else match.group(2)
33633378
for match in self.unquote_scan_re.finditer(ret)
33643379
)
3380+
# fmt: on
33653381

33663382
# replace escaped quotes
3367-
if self.escQuote:
3368-
ret = ret.replace(self.escQuote, self.endQuoteChar)
3383+
if self.esc_quote:
3384+
ret = ret.replace(self.esc_quote, self.end_quote_char)
33693385

33703386
return loc, ret
33713387

0 commit comments

Comments
 (0)
0