8000 GH-117586: Speed up `pathlib.Path.glob()` by working with strings by barneygale · Pull Request #117589 · python/cpython · GitHub
[go: up one dir, main page]

Skip to content

GH-117586: Speed up pathlib.Path.glob() by working with strings #117589

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 8 commits into from
Apr 10, 2024
Merged
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Slightly speed up path renormalisation.
  • Loading branch information
barneygale committed Apr 7, 2024
commit ebcd7fcca8db0a077d8cb2c4eea689de2c921a47
58 changes: 31 additions & 27 deletions Lib/pathlib/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
import glob
import io
import ntpath
import operator
import os
import posixpath
import sys
Expand Down Expand Up @@ -255,14 +256,17 @@ def _format_parsed_parts(cls, drv, root, tail):
return cls.parser.sep.join(tail)

def _from_parsed_parts(self, drv, root, tail):
path_str = self._format_parsed_parts(drv, root, tail)
path = self.with_segments(path_str)
path._str = path_str or '.'
path = self._from_parsed_string(self._format_parsed_parts(drv, root, tail))
path._drv = drv
path._root = root
path._tail_cached = tail
return path

def _from_parsed_string(self, path_str):
path = self.with_segments(path_str)
path._str = path_str or '.'
return path

@classmethod
def _parse_path(cls, path):
if not path:
Expand Down Expand Up @@ -563,6 +567,17 @@ def write_text(self, data, encoding=None, errors=None, newline=None):
encoding = io.text_encoding(encoding)
return _abc.PathBase.write_text(self, data, encoding, errors, newline)

_remove_leading_dot = operator.itemgetter(slice(2, None))
_remove_trailing_slash = operator.itemgetter(slice(-1))

def _filter_trailing_slash(self, paths):
sep = self.parser.sep
anchor_len = len(self.anchor)
for path_str in paths:
if len(path_str) > anchor_len and path_str[-1] == sep:
path_str = path_str[:-1]
yield path_str

def iterdir(self):
"""Yield path objects of the directory contents.

Expand Down Expand Up @@ -602,19 +617,6 @@ def _make_child_relpath(self, name):
path._tail_cached = tail + [name]
return path

def _make_glob_paths(self, paths):
"""Yields normalized path objects from the given iterable of string
glob results."""
sep = self.parser.sep
prefix_len = len(self.anchor)
for path_str in paths:
if len(path_str) > prefix_len and path_str[-1] == sep:
# Strip trailing slash.
path_str = path_str[:-1]
path = self.with_segments(path_str)
path._str = path_str or '.'
yield path

def glob(self, pattern, *, case_sensitive=None, recurse_symlinks=False):
"""Iterate over this subtree and yield all existing files (of any
kind, including directories) matching the given relative pattern.
Expand All @@ -631,16 +633,20 @@ def glob(self, pattern, *, case_sensitive=None, recurse_symlinks=False):
if raw[-1] in (self.parser.sep, self.parser.altsep):
# GH-65238: pathlib doesn't preserve trailing slash. Add it back.
parts.append('')
parts.reverse()
if not self.is_dir():
return iter([])
select = self._glob_selector(parts, case_sensitive, recurse_symlinks)
path = str(self)
paths = select(path, exists=True)
if path == '.':
# Strip leading './'.
paths = map(lambda p: p[2:], paths)
paths = self._make_glob_paths(paths)
select = self._glob_selector(parts[::-1], case_sensitive, recurse_symlinks)
root = str(self)
paths = select(root, exists=True)

# Normalize results
if root == '.':
paths = map(self._remove_leading_dot, paths)
if parts[-1] == '':
paths = map(self._remove_trailing_slash, paths)
elif parts[-1] == '**':
paths = self._filter_trailing_slash(paths)
paths = map(self._from_parsed_string, paths)
return paths

def rglob(self, pattern, *, case_sensitive=None, recurse_symlinks=False):
Expand Down Expand Up @@ -682,9 +688,7 @@ def absolute(self):
# of joining, and we exploit the fact that getcwd() returns a
# fully-normalized string by storing it in _str. This is used to
# implement Path.cwd().
result = self.with_segments(cwd)
result._str = cwd
return result
return self._from_parsed_string(cwd)
drive, root, rel = os.path.splitroot(cwd)
if not rel:
return self._from_parsed_parts(drive, root, self._tail)
Expand Down
0