10000 Issue #26032: Optimized globbing in pathlib by using os.scandir(); it… · python/cpython@680cb15 · GitHub
[go: up one dir, main page]

Skip to content

Commit 680cb15

Browse files
Issue #26032: Optimized globbing in pathlib by using os.scandir(); it is now
about 1.5--4 times faster.
1 parent 1194c6d commit 680cb15

File tree

3 files changed

+45
-55
lines changed

3 files changed

+45
-55
lines changed

Doc/whatsnew/3.6.rst

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -808,6 +808,9 @@ Optimizations
808808
:mod:`glob` module; they are now about 3--6 times faster.
809809
(Contributed by Serhiy Storchaka in :issue:`25596`).
810810

811+
* Optimized globbing in :mod:`pathlib` by using :func:`os.scandir`;
812+
it is now about 1.5--4 times faster.
813+
(Contributed by Serhiy Storchaka in :issue:`26032`).
811814

812815
Build and C API Changes
813816
=======================

Lib/pathlib.py

Lines changed: 39 additions & 55 deletions
Original file line numberDiff line numberDiff line change
@@ -385,6 +385,8 @@ def wrapped(pathobjA, pathobjB, *args):
385385

386386
listdir = _wrap_strfunc(os.listdir)
387387

388+
scandir = _wrap_strfunc(os.scandir)
389+
388390
chmod = _wrap_strfunc(os.chmod)
389391

390392
if hasattr(os, "lchmod"):
@@ -429,25 +431,6 @@ def readlink(self, path):
429431
# Globbing helpers
430432
#
431433

432-
@contextmanager
433-
def _cached(func):
434-
try:
435-
func.__cached__
436-
yield func
437-
except AttributeError:
438-
cache = {}
439-
def wrapper(*args):
440-
try:
441-
return cache[args]
442-
except KeyError:
443-
value = cache[args] = func(*args)
444-
return value
445-
wrapper.__cached__ = True
446-
try:
447-
yield wrapper
448-
finally:
449-
cache.clear()
450-
451434
def _make_selector(pattern_parts):
452435
pat = pattern_parts[0]
453436
child_parts = pattern_parts[1:]
@@ -473,22 +456,26 @@ def __init__(self, child_parts):
473456
self.child_parts = child_parts
474457
if child_parts:
475458
self.successor = _make_selector(child_parts)
459+
self.dironly = True
476460
else:
477461
self.successor = _TerminatingSelector()
462+
self.dironly = False
478463

479464
def select_from(self, parent_path):
480465
"""Iterate over all child paths of `parent_path` matched by this
481466
selector. This can contain parent_path itself."""
482467
path_cls = type(parent_path)
483468
is_dir = path_cls.is_dir
484469
exists = path_cls.exists
485-
listdir = parent_path._accessor.listdir
486-
return self._select_from(parent_path, is_dir, exists, listdir)
470+
scandir = parent_path._accessor.scandir
471+
if not is_dir(parent_path):
472+
return iter([])
473+
return self._select_from(parent_path, is_dir, exists, scandir)
487474

488475

489476
class _TerminatingSelector:
490477

491-
def _select_from(self, parent_path, is_dir, exists, listdir):
478+
def _select_from(self, parent_path, is_dir, exists, scandir):
492479
yield parent_path
493480

494481

@@ -498,13 +485,11 @@ def __init__(self, name, child_parts):
498485
self.name = name
499486
_Selector.__init__(self, child_parts)
500487

501-
def _select_from(self, parent_path, is_dir, exists, listdir):
488+
def _select_from(self, parent_path, is_dir, exists, scandir):
502489
try:
503-
if not is_dir(parent_path):
504-
return
505490
path = parent_path._make_child_relpath(self.name)
506-
if exists(path):
507-
for p in self.successor._select_from(path, is_dir, exists, listdir):
491+
if (is_dir if self.dironly else exists)(path):
492+
for p in self.successor._select_from(path, is_dir, exists, scandir):
508493
yield p
509494
except PermissionError:
510495
return
@@ -516,17 +501,18 @@ def __init__(self, pat, child_parts):
516501
self.pat = re.compile(fnmatch.translate(pat))
517502
_Selector.__init__(self, child_parts)
518503

519-
def _select_from(self, parent_path, is_dir, exists, listdir):
504+
def _select_from(self, parent_path, is_dir, exists, scandir):
520505
try:
521-
if not is_dir(parent_path):
522-
return
523506
cf = parent_path._flavour.casefold
524-
for name in listdir(parent_path):
525-
casefolded = cf(name)
526-
if self.pat.match(casefolded):
527-
path = parent_path._make_child_relpath(name)
528-
for p in self.successor._select_from(path, is_dir, exists, listdir):
529-
yield p
507+
entries = list(scandir(parent_path))
508+
for entry in entries:
509+
if not self.dironly or entry.is_dir():
510+
name = entry.name
511+
casefolded = cf(name)
512+
if self.pat.match(casefolded):
513+
path = parent_path._make_child_relpath(name)
514+
for p in self.successor._select_from(path, is_dir, exists, scandir):
515+
yield p
530516
except PermissionError:
531517
return
532518

@@ -537,32 +523,30 @@ class _RecursiveWildcardSelector(_Selector):
537523
def __init__(self, pat, child_parts):
538524
_Selector.__init__(self, child_parts)
539525

540-
def _iterate_directories(self, parent_path, is_dir, listdir):
526+
def _iterate_directories(self, parent_path, is_dir, scandir):
541527
yield parent_path
542528
try:
543-
for name in listdir(parent_path):
544-
path = parent_path._make_child_relpath(name)
545-
if is_dir(path) and not path.is_symlink():
546-
for p in self._iterate_directories(path, is_dir, listdir):
529+
entries = list(scandir(parent_path))
530+
for entry in entries:
531+
if entry.is_dir() and not entry.is_symlink():
532+
path = parent_path._make_child_relpath(entry.name)
533+
for p in self._iterate_directories(path, is_dir, scandir):
547534
yield p
548535
except PermissionError:
549536
return
550537

551-
def _select_from(self, parent_path, is_dir, exists, listdir):
538+
def _select_from(self, parent_path, is_dir, exists, scandir):
552539
try:
553-
if not is_dir(parent_path):
554-
return
555-
with _cached(listdir) as listdir:
556-
yielded = set()
557-
try:
558-
successor_select = self.successor._select_from
559-
for starting_point in self._iterate_directories(parent_path, is_dir, listdir):
560-
for p in successor_select(starting_point, is_dir, exists, listdir):
561-
if p not in yielded:
562-
yield p
563-
yielded.add(p)
564-
finally:
565-
yielded.clear()
540+
yielded = set()
541+
try:
542+
successor_select = self.successor._select_from
543+
for starting_point in self._iterate_directories(parent_path, is_dir, scandir):
544+
for p in successor_select(starting_point, is_dir, exists, scandir):
545+
if p not in yielded:
546+
yield p
547+
yielded.add(p)
548+
finally:
549+
yielded.clear()
566550
except PermissionError:
567551
return
568552

Misc/NEWS

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -109,6 +109,9 @@ Library
109109

110110
- Issue #26798: Add BLAKE2 (blake2b and blake2s) to hashlib.
111111

112+
- Issue #26032: Optimized globbing in pathlib by using os.scandir(); it is now
113+
about 1.5--4 times faster.
114+
112115
- Issue #25596: Optimized glob() and iglob() functions in the
113116
glob module; they are now about 3--6 times faster.
114117

0 commit comments

Comments
 (0)
0