8000 gh-106628: email parsing speedup (gh-106629) · python/cpython@7e6ce48 · GitHub
[go: up one dir, main page]

Skip to content

Commit 7e6ce48

Browse files
authored
gh-106628: email parsing speedup (gh-106629)
1 parent af51bd7 commit 7e6ce48

File tree

2 files changed

+11
-6
lines changed

2 files changed

+11
-6
lines changed

Lib/email/feedparser.py

Lines changed: 9 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,8 @@
3737
headerRE = re.compile(r'^(From |[\041-\071\073-\176]*:|[\t ])')
3838
EMPTYSTRING = ''
3939
NL = '\n'
40+
boundaryendRE = re.compile(
41+
r'(?P<end>--)?(?P<ws>[ \t]*)(?P<linesep>\r\n|\r|\n)?$')
4042

4143
NeedMoreData = object()
4244

@@ -327,9 +329,10 @@ def _parsegen(self):
327329
# this onto the input stream until we've scanned past the
328330
# preamble.
329331
separator = '--' + boundary
330-
boundaryre = re.compile(
331-
'(?P<sep>' + re.escape(separator) +
332-
r')(?P<end>--)?(?P<ws>[ \t]*)(?P<linesep>\r\n|\r|\n)?$')
332+
def boundarymatch(line):
333+
if not line.startswith(separator):
334+
return None
335+
return boundaryendRE.match(line, len(separator))
333336
capturing_preamble = True
334337
preamble = []
335338
linesep = False
@@ -341,7 +344,7 @@ def _parsegen(self):
341344
continue
342345
if line == '':
343346
break
344-
mo = boundaryre.match(line)
347+
mo = boundarymatch(line)
345348
if mo:
346349
# If we're looking at the end boundary, we're done with
347350
# this multipart. If there was a newline at the end of
@@ -373,13 +376,13 @@ def _parsegen(self):
373376
if line is NeedMoreData:
374377
yield NeedMoreData
375378
continue
376-
mo = boundaryre.match(line)
379+
mo = boundarymatch(line)
377380
if not mo:
378381
self._input.unreadline(line)
379382
break
380383
# Recurse to parse this subpart; the input stream points
381384
# at the subpart's first line.
382-
self._input.push_eof_matcher(boundaryre.match)
385+
self._input.push_eof_matcher(boundarymatch)
383386
for retval in self._parsegen():
384387
if retval is NeedMoreData:
385388
yield NeedMoreData
Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
Speed up parsing of emails by about 20% by not compiling a new regular
2+
expression for every single email.

0 commit comments

Comments
 (0)
0