8000 GH-77609: Add follow_symlinks argument to `pathlib.Path.glob()` by barneygale · Pull Request #102616 · python/cpython · GitHub
[go: up one dir, main page]

Skip to content

GH-77609: Add follow_symlinks argument to pathlib.Path.glob() #102616

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 17 commits into from
May 29, 2023
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
Merge branch 'main' into gh-77609-glob-follow-symlinks
  • Loading branch information
barneygale committed May 10, 2023
commit d3a33966a665c2610c0f48398552c8a9cecfa63f
42 changes: 30 additions & 12 deletions Doc/library/pathlib.rst
Original file line number Diff line number Diff line change
Expand Up @@ -880,7 +880,7 @@ call fails (for example because the path doesn't exist).
.. versionadded:: 3.5


.. method:: Path.glob(pattern, *, follow_symlinks=None)
.. method:: Path.glob(pattern, *, case_sensitive=None, follow_symlinks=None)

Glob the given relative *pattern* in the directory represented by this path,
yielding all matching files (of any kind)::
Expand All @@ -901,9 +901,15 @@ call fails (for example because the path doesn't exist).
PosixPath('setup.py'),
PosixPath('test_pathlib.py')]

By default, :meth:`Path.glob` follows symlinks except when expanding
"``**``" wildcards. Set *follow_symlinks* to true to always follow
symlinks, or false to treat all symlinks as files.
By default, or when the *case_sensitive* keyword-only argument is set to
``None``, this method matches paths using platform-specific casing rules:
typically, case-sensitive on POSIX, and case-insensitive on Windows.
Set *case_sensitive* to ``True`` or ``False`` to override this behaviour.

By default, or when the *follow_symlinks* keyword-only argument is set to
``None``, this method follows symlinks except when expanding "``**``"
wildcards. Set *follow_symlinks* to ``True`` to always follow symlinks, or
``False`` to treat all symlinks as files.

.. note::
Using the "``**``" pattern in large directory trees may consume
Expand All @@ -915,8 +921,11 @@ call fails (for example because the path doesn't exist).
Return only directories if *pattern* ends with a pathname components
separator (:data:`~os.sep` or :data:`~os.altsep`).

.. versionchanged:: 3.12
The *follow_symlinks* parameter was added.
.. versionadded:: 3.12
The *case_sensitive* argument.

.. versionadded:: 3.12
The *follow_symlinks* argument.

.. method:: Path.group()

Expand Down Expand Up @@ -1303,7 +1312,7 @@ call fails (for example because the path doesn't exist).
.. versionadded:: 3.6
The *strict* argument (pre-3.6 behavior is strict).

.. method:: Path.rglob(pattern, *, follow_symlinks=None)
.. method:: Path.rglob(pattern, *, case_sensitive=None, follow_symlinks=None)

Glob the given relative *pattern* recursively. This is like calling
:func:`Path.glob` with "``**/``" added in front of the *pattern*, where
Expand All @@ -1316,18 +1325,27 @@ call fails (for example because the path doesn't exist).
PosixPath('setup.py'),
PosixPath('test_pathlib.py')]

By default, :meth:`Path.rglob` follows symlinks except when expanding
"``**``" wildcards. Set *follow_symlinks* to true to always follow
symlinks, or false to treat all symlinks as files.
By default, or when the *case_sensitive* keyword-only argument is set to
``None``, this method matches paths using platform-specific casing rules:
typically, case-sensitive on POSIX, and case-insensitive on Windows.
Set *case_sensitive* to ``True`` or ``False`` to override this behaviour.

By default, or when the *follow_symlinks* keyword-only argument is set to
``None``, this method follows symlinks except when expanding "``**``"
wildcards. Set *follow_symlinks* to ``True`` to always follow symlinks, or
``False`` to treat all symlinks as files.

.. audit-event:: pathlib.Path.rglob self,pattern pathlib.Path.rglob

.. versionchanged:: 3.11
Return only directories if *pattern* ends with a pathname components
separator (:data:`~os.sep` or :data:`~os.altsep`).

.. versionchanged:: 3.12
The *follow_symlinks* parameter was added.
.. versionadded:: 3.12
The *case_sensitive* argument.

.. versionadded:: 3.12
The *follow_symlinks* argument.

.. method:: Path.rmdir()

Expand Down
188 changes: 10 additions & 178 deletions Lib/pathlib.py
8000
Original file line number Diff line number Diff line change
Expand Up @@ -193,16 +193,10 @@ def _iterate_directories(self, parent_path, scandir, follow_symlinks):
def _select_from(self, parent_path, scandir, follow_symlinks):
follow_dirlinks = False if follow_symlinks is None else follow_symlinks
try:
yielded = set()
try:
successor_select = self.successor._select_from
for starting_point in self._iterate_directories(parent_path, scandir, follow_dirlinks):
for p in successor_select(starting_point, scandir, follow_symlinks):
if p not in yielded:
yield p
yielded.add(p)
finally:
yielded.clear()
successor_select = self.successor._select_from
for starting_point in self._iterate_directories(parent_path, scandir, follow_dirlinks):
for p in successor_select(starting_point, scandir, follow_symlinks):
yield p
except PermissionError:
return

Expand All @@ -214,10 +208,10 @@ class _DoubleRecursiveWildcardSelector(_RecursiveWildcardSelector):
multiple non-adjacent '**' segments.
"""

def _select_from(self, parent_path, scandir):
def _select_from(self, parent_path, scandir, follow_symlinks):
yielded = set()
try:
for p in super()._select_from(parent_path, scandir):
for p in super()._select_from(parent_path, scandir, follow_symlinks):
if p not in yielded:
yield p
yielded.add(p)
Expand Down Expand Up @@ -768,168 +762,6 @@ class Path(PurePath):
"""
__slots__ = ()

def __init__(self, *args, **kwargs):
if kwargs:
msg = ("support for supplying keyword arguments to pathlib.PurePath "
"is deprecated and scheduled for removal in Python {remove}")
warnings._deprecated("pathlib.PurePath(**kwargs)", msg, remove=(3, 14))
super().__init__(*args)

def __new__(cls, *args, **kwargs):
if cls is Path:
cls = WindowsPath if os.name == 'nt' else PosixPath
return object.__new__(cls)

def _make_child_relpath(self, name):
path_str = str(self)
tail = self._tail
if tail:
path_str = f'{path_str}{self._flavour.sep}{name}'
elif path_str != '.':
path_str = f'{path_str}{name}'
else:
path_str = name
path = type(self)(path_str)
path._str = path_str
path._drv = self.drive
path._root = self.root
path._tail_cached = tail + [name]
return path

def __enter__(self):
# In previous versions of pathlib, __exit__() marked this path as
# closed; subsequent attempts to perform I/O would raise an IOError.
# This functionality was never documented, and had the effect of
# making Path objects mutable, contrary to PEP 428.
# In Python 3.9 __exit__() was made a no-op.
# In Python 3.11 __enter__() began emitting DeprecationWarning.
# In Python 3.13 __enter__() and __exit__() should be removed.
warnings.warn("pathlib.Path.__enter__() is deprecated and scheduled "
"for removal in Python 3.13; Path objects as a context "
"manager is a no-op",
DeprecationWarning, stacklevel=2)
return self

def __exit__(self, t, v, tb):
pass

# Public API

@classmethod
def cwd(cls):
"""Return a new path pointing to the current working directory."""
# We call 'absolute()' rather than using 'os.getcwd()' directly to
# enable users to replace the implementation of 'absolute()' in a
# subclass and benefit from the new behaviour here. This works because
# os.path.abspath('.') == os.getcwd().
return cls().absolute()

@classmethod
def home(cls):
"""Return a new path pointing to the user's home directory (as
returned by os.path.expanduser('~')).
"""
return cls("~").expanduser()

def samefile(self, other_path):
"""Return whether other_path is the same or not as this file
(as returned by os.path.samefile()).
"""
st = self.stat()
try:
other_st = other_path.stat()
except AttributeError:
other_st = self.__class__(other_path).stat()
return self._flavour.samestat(st, other_st)

def iterdir(self):
"""Yield path objects of the directory contents.

The children are yielded in arbitrary order, and the
special entries '.' and '..' are not included.
"""
for name in os.listdir(self):
yield self._make_child_relpath(name)

def _scandir(self):
# bpo-24132: a future version of pathlib will support subclassing of
# pathlib.Path to customize how the filesystem is accessed. This
# includes scandir(), which is used to implement glob().
return os.scandir(self)

def glob(self, pattern, *, follow_symlinks=None):
"""Iterate over this subtree and yield all existing files (of any
kind, including directories) matching the given relative pattern.
"""
sys.audit("pathlib.Path.glob", self, pattern)
if not pattern:
raise ValueError("Unacceptable pattern: {!r}".format(pattern))
drv, root, pattern_parts = self._parse_path(pattern)
if drv or root:
raise NotImplementedError("Non-relative patterns are unsupported")
if pattern[-1] in (self._flavour.sep, self._flavour.altsep):
pattern_parts.append('')
selector = _make_selector(tuple(pattern_parts), self._flavour)
for p in selector.select_from(self, follow_symlinks):
yield p

def rglob(self, pattern, *, follow_symlinks=None):
"""Recursively yield all existing files (of any kind, including
directories) matching the given relative pattern, anywhere in
this subtree.
"""
sys.audit("pathlib.Path.rglob", self, pattern)
drv, root, pattern_parts = self._parse_path(pattern)
if drv or root:
raise NotImplementedError("Non-relative patterns are unsupported")
if pattern and pattern[-1] in (self._flavour.sep, self._flavour.altsep):
pattern_parts.append('')
selector = _make_selector(("**",) + tuple(pattern_parts), self._flavour)
for p in selector.select_from(self, follow_symlinks):
yield p

def absolute(self):
"""Return an absolute version of this path by prepending the current
working directory. No normalization or symlink resolution is performed.

Use resolve() to get the canonical path to a file.
"""
if self.is_absolute():
return self
elif self.drive:
# There is a CWD on each drive-letter drive.
cwd = self._flavour.abspath(self.drive)
else:
cwd = os.getcwd()
return type(self)(cwd, self)

def resolve(self, strict=False):
"""
Make the path absolute, resolving all symlinks on the way and also
normalizing it.
"""

def check_eloop(e):
winerror = getattr(e, 'winerror', 0)
if e.errno == ELOOP or winerror == _WINERROR_CANT_RESOLVE_FILENAME:
raise RuntimeError("Symlink loop from %r" % e.filename)

try:
s = self._flavour.realpath(self, strict=strict)
except OSError as e:
check_eloop(e)
raise
p = type(self)(s)

# In non-strict mode, realpath() doesn't raise on symlink loops.
# Ensure we get an exception by calling stat()
if not strict:
try:
p.stat()
except OSError as e:
check_eloop(e)
return p

def stat(self, *, follow_symlinks=True):
"""
Return the result of the stat() system call on this path, like
Expand Down Expand Up @@ -1176,7 +1008,7 @@ def _make_child_relpath(self, name):
path._tail_cached = tail + [name]
return path

def glob(self, pattern, *, case_sensitive=None):
def glob(self, pattern, *, case_sensitive=None, follow_symlinks=None):
"""Iterate over this subtree and yield all existing files (of any
kind, including directories) matching the given relative pattern.
"""
Expand All @@ -1189,10 +1021,10 @@ def glob(self, pattern, *, case_sensitive=None):
if pattern[-1] in (self._flavour.sep, self._flavour.altsep):
pattern_parts.append('')
selector = _make_selector(tuple(pattern_parts), self._flavour, case_sensitive)
for p in selector.select_from(self):
for p in selector.select_from(self, follow_symlinks):
yield p

def rglob(self, pattern, *, case_sensitive=None):
def rglob(self, pattern, *, case_sensitive=None, follow_symlinks=None):
"""Recursively yield all existing files (of any kind, including
directories) matching the given relative pattern, anywhere in
this subtree.
Expand All @@ -1204,7 +1036,7 @@ def rglob(self, pattern, *, case_sensitive=None):
if pattern and pattern[-1] in (self._flavour.sep, self._flavour.altsep):
pattern_parts.append('')
selector = _make_selector(("**",) + tuple(pattern_parts), self._flavour, case_sensitive)
for p in selector.select_from(self):
for p in selector.select_from(self, follow_symlinks):
yield p

def walk(self, top_down=True, on_error=None, follow_symlinks=False):
Expand Down
12 changes: 12 additions & 0 deletions Lib/test/test_pathlib.py
Original file line number Diff line number Diff line change
Expand Up @@ -1839,6 +1839,18 @@ def _check(glob, expected):
else:
_check(p.glob("*/"), ["dirA", "dirB", "dirC", "dirE", "linkB"])

def test_glob_case_sensitive(self):
P = self.cls
def _check(path, pattern, case_sensitive, expected):
actual = {str(q) for q in path.glob(pattern, case_sensitive=case_sensitive)}
expected = {str(P(BASE, q)) for q in expected}
self.assertEqual(actual, expected)
path = P(BASE)
_check(path, "DIRB/FILE*", True, [])
_check(path, "DIRB/FILE*", False, ["dirB/fileB"])
_check(path, "dirb/file*", True, [])
_check(path, "dirb/file*", False, ["dirB/fileB"])

@os_helper.skip_unless_symlink
def test_glob_follow_symlinks_common(self):
def _check(path, glob, expected):
Expand Down
You are viewing a condensed version of this merge commit. You can view the full changes here.
0