8000 GH-77609: Add follow_symlinks argument to `pathlib.Path.glob()` by barneygale · Pull Request #102616 · python/cpython · GitHub
[go: up one dir, main page]

Skip to content

GH-77609: Add follow_symlinks argument to pathlib.Path.glob() #102616

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 17 commits into from
May 29, 2023
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Next Next commit
GH-77609: Support following symlinks in pathlib.Path.glob()
Add a keyword-only *follow_symlinks* parameter to `pathlib.Path.glob()` and
`rglob()`, defaulting to false. When set to true, symlinks to directories
are followed as if they were directories.

Previously these methods followed symlinks except when evaluating "`**`"
wildcards; on Windows they returned paths in filesystem casing except when
evaluating non-wildcard tokens. Both these problems are solved here. This
will allow us to address GH-102613 and GH-81079 in future commits.
  • Loading branch information
barneygale committed Mar 12, 2023
commit 59dcdb124bc78ea56dc033b3c6e17bbc2ca13e07
18 changes: 16 additions & 2 deletions Doc/library/pathlib.rst
Original file line number Diff line number Diff line change
Expand Up @@ -852,7 +852,7 @@ call fails (for example because the path doesn't exist).
.. versionadded:: 3.5


.. method:: Path.glob(pattern)
.. method:: Path.glob(pattern, *, follow_symlinks=False)

Glob the given relative *pattern* in the directory represented by this path,
yielding all matching files (of any kind)::
Expand All @@ -873,6 +873,9 @@ call fails (for example because the path doesn't exist).
PosixPath('setup.py'),
PosixPath('test_pathlib.py')]

By default, :meth:`Path.glob` does not follow symlinks. Set
*follow_symlinks* to true to visit symlinks to directories.

.. note::
Using the "``**``" pattern in large directory trees may consume
an inordinate amount of time.
Expand All @@ -883,6 +886,10 @@ call fails (for example because the path doesn't exist).
Return only directories if *pattern* ends with a pathname components
separator (:data:`~os.sep` or :data:`~os.altsep`).

.. versionchanged:: 3.12
The *follow_symlinks* parameter was added. In previous versions,
symlinks were followed except when expanding "``**``" wildcards.

.. method:: Path.group()

Return the name of the group owning the file. :exc:`KeyError` is raised
Expand Down Expand Up @@ -1268,7 +1275,7 @@ call fails (for example because the path doesn't exist).
.. versionadded:: 3.6
The *strict* argument (pre-3.6 behavior is strict).

.. method:: Path.rglob(pattern)
.. method:: Path.rglob(pattern, *, follow_symlinks=False)

Glob the given relative *pattern* recursively. This is like calling
:func:`Path.glob` with "``**/``" added in front of the *pattern*, where
Expand All @@ -1281,12 +1288,19 @@ call fails (for example because the path doesn't exist).
PosixPath('setup.py'),
PosixPath('test_pathlib.py')]

By default, :meth:`Path.rglob` does not follow symlinks. Set
*follow_symlinks* to true to visit symlinks to directories.

.. audit-event:: pathlib.Path.rglob self,pattern pathlib.Path.rglob

.. versionchanged:: 3.11
Return only directories if *pattern* ends with a pathname components
separator (:data:`~os.sep` or :data:`~os.altsep`).

.. versionchanged:: 3.12
The *follow_symlinks* parameter was added. In previous versions,
symlinks were followed except when expanding "``**``" wildcards.

.. method:: Path.rmdir()

Remove this directory. The directory must be empty.
Expand Down
64 changes: 18 additions & 46 deletions Lib/pathlib.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,12 +54,6 @@ def _ignore_error(exception):
return (getattr(exception, 'errno', None) in _IGNORED_ERRNOS or
getattr(exception, 'winerror', None) in _IGNORED_WINERRORS)


def _is_wildcard_pattern(pat):
# Whether this pattern needs actual matching using fnmatch, or can
# be looked up directly as a file.
return "*" in pat or "?" in pat or "[" in pat

#
# Globbing helpers
#
Expand All @@ -74,10 +68,8 @@ def _make_selector(pattern_parts, flavour):
cls = _RecursiveWildcardSelector
elif '**' in pat:
raise ValueError("Invalid pattern: '**' can only be an entire path component")
elif _is_wildcard_pattern(pat):
cls = _WildcardSelector
else:
cls = _PreciseSelector
cls = _WildcardSelector
return cls(pat, child_parts, flavour)


Expand All @@ -94,48 +86,28 @@ def __init__(self, child_parts, flavour):
self.successor = _TerminatingSelector()
self.dironly = False

def select_from(self, parent_path):
def select_from(self, parent_path, follow_symlinks):
"""Iterate over all child paths of `parent_path` matched by this
selector. This can contain parent_path itself."""
path_cls = type(parent_path)
is_dir = path_cls.is_dir
exists = path_cls.exists
scandir = path_cls._scandir
normcase = path_cls._flavour.normcase
if not is_dir(parent_path):
return iter([])
return self._select_from(parent_path, is_dir, exists, scandir, normcase)
return self._select_from(parent_path, follow_symlinks, scandir, normcase)


class _TerminatingSelector:

def _select_from(self, parent_path, is_dir, exists, scandir, normcase):
def _select_from(self, parent_path, follow_symlinks, scandir, normcase):
yield parent_path


class _PreciseSelector(_Selector):

def __init__(self, name, child_parts, flavour):
self.name = name
_Selector.__init__(self, child_parts, flavour)

def _select_from(self, parent_path, is_dir, exists, scandir, normcase):
try:
path = parent_path._make_child_relpath(self.name)
if (is_dir if self.dironly else exists)(path):
for p in self.successor._select_from(path, is_dir, exists, scandir, normcase):
yield p
except PermissionError:
return


class _WildcardSelector(_Selector):

def __init__(self, pat, child_parts, flavour):
self.match = re.compile(fnmatch.translate(flavour.normcase(pat))).fullmatch
_Selector.__init__(self, child_parts, flavour)

def _select_from(self, parent_path, is_dir, exists, scandir, normcase):
def _select_from(self, parent_path, follow_symlinks, scandir, normcase):
try:
# We must close the scandir() object before proceeding to
# avoid exhausting file descriptors when globbing deep trees.
Expand All @@ -147,7 +119,7 @@ def _select_from(self, parent_path, is_dir, exists, scandir, normcase):
# "entry.is_dir()" can raise PermissionError
# in some cases (see bpo-38894), which is not
# among the errors ignored by _ignore_error()
if not entry.is_dir():
if not entry.is_dir(follow_symlinks=follow_symlinks):
continue
except OSError as e:
if not _ignore_error(e):
Expand All @@ -156,7 +128,7 @@ def _select_from(self, parent_path, is_dir, exists, scandir, normcase):
name = entry.name
if self.match(normcase(name)):
path = parent_path._make_child_relpath(name)
for p in self.successor._select_from(path, is_dir, exists, scandir, normcase):
for p in self.successor._select_from(path, follow_symlinks, scandir, normcase):
yield p
except PermissionError:
return
Expand All @@ -167,7 +139,7 @@ class _RecursiveWildcardSelector(_Selector):
def __init__(self, pat, child_parts, flavour):
_Selector.__init__(self, child_parts, flavour)

def _iterate_directories(self, parent_path, is_dir, scandir):
def _iterate_directories(self, parent_path, follow_symlinks, scandir):
yield parent_path
try:
# We must close the scandir() object before proceeding to
Expand All @@ -177,24 +149,24 @@ def _iterate_directories(self, parent_path, is_dir, scandir):
for entry in entries:
entry_is_dir = False
try:
entry_is_dir = entry.is_dir()
entry_is_dir = entry.is_dir(follow_symlinks=follow_symlinks)
except OSError as e:
if not _ignore_error(e):
raise
if entry_is_dir and not entry.is_symlink():
if entry_is_dir:
path = parent_path._make_child_relpath(entry.name)
for p in self._iterate_directories(path, is_dir, scandir):
for p in self._iterate_directories(path, follow_symlinks, scandir):
yield p
except PermissionError:
return

def _select_from(self, parent_path, is_dir, exists, scandir, normcase):
def _select_from(self, parent_path, follow_symlinks, scandir, normcase):
try:
yielded = set()
try:
successor_select = self.successor._select_from
for starting_point in self._iterate_directories(parent_path, is_dir, scandir):
for p in successor_select(starting_point, is_dir, exists, scandir, normcase):
for starting_point in self._iterate_directories(parent_path, follow_symlinks, scandir):
for p in successor_select(starting_point, follow_symlinks, scandir, normcase):
if p not in yielded:
yield p
yielded.add(p)
Expand Down Expand Up @@ -763,7 +735,7 @@ def _scandir(self):
# includes scandir(), which is used to implement glob().
return os.scandir(self)

def glob(self, pattern):
def glob(self, pattern, *, follow_symlinks=False):
"""Iterate over this subtree and yield all existing files (of any
kind, including directories) matching the given relative pattern.
"""
Expand All @@ -776,10 +748,10 @@ def glob(self, pattern):
if pattern[-1] in (self._flavour.sep, self._flavour.altsep):
pattern_parts.append('')
selector = _make_selector(tuple(pattern_parts), self._flavour)
for p in selector.select_from(self):
for p in selector.select_from(self, follow_symlinks):
yield p

def rglob(self, pattern):
def rglob(self, pattern, *, follow_symlinks=False):
"""Recursively yield all existing files (of any kind, including
directories) matching the given relative pattern, anywhere in
this subtree.
Expand All @@ -791,7 +763,7 @@ def rglob(self, pattern):
if pattern and pattern[-1] in (self._flavour.sep, self._flavour.altsep):
pattern_parts.append('')
selector = _make_selector(("**",) + tuple(pattern_parts), self._flavour)
for p in selector.select_from(self):
for p in selector.select_from(self, follow_symlinks):
yield p

def absolute(self):
Expand Down
80 changes: 49 additions & 31 deletions Lib/test/test_pathlib.py
Original file line number Diff line number Diff line change
Expand Up @@ -1760,22 +1760,25 @@ def _check(glob, expected):
_check(p.glob("dir*/file*"), ["dirB/fileB", "dirC/fileC"])
if not os_helper.can_symlink():
_check(p.glob("*A"), ['dirA', 'fileA'])
else:
_check(p.glob("*A"), ['dirA', 'fileA', 'linkA'])
if not os_helper.can_symlink():
_check(p.glob("*B/*"), ['dirB/fileB'])
else:
_check(p.glob("*B/*"), ['dirB/fileB', 'dirB/linkD',
'linkB/fileB', 'linkB/linkD'])
if not os_helper.can_symlink():
_check(p.glob("*/fileB"), ['dirB/fileB'])
else:
_check(p.glob("*/fileB"), ['dirB/fileB', 'linkB/fileB'])
_check(p.glob("*A"), ['dirA', 'fileA', 'linkA'])
_check(p.glob("*B/*"), ['dirB/fileB', 'dirB/linkD'])
_check(p.glob("*/fileB"), ['dirB/fileB'])
_check(p.glob("*/"), ["dirA", "dirB", "dirC", "dirE"])

if not os_helper.can_symlink():
_check(p.glob("*/"), ["dirA", "dirB", "dirC", "dirE"])
else:
_check(p.glob("*/"), ["dirA", "dirB", "dirC", "dirE", "linkB"])
@os_helper.skip_unless_symlink
def test_glob_follow_symlinks_common(self):
def _check(path, glob, expected):
self.assertEqual(set(path.glob(glob, follow_symlinks=True)), { P(BASE, q) for q in expected })
P = self.cls
p = P(BASE)
_check(p, "fileB", [])
_check(p, "dir*/file*", ["dirB/fileB", "dirC/fileC"])
_check(p, "*A", ['dirA', 'fileA', 'linkA'])
_check(p, "*B/*", ['dirB/fileB', 'dirB/linkD', 'linkB/fileB', 'linkB/linkD'])
_check(p, "*/fileB", ['dirB/fileB', 'linkB/fileB'])
_check(p, "*/", ["dirA", "dirB", "dirC", "dirE", "linkB"])

def test_rglob_common(self):
def _check(glob, expected):
Expand All @@ -1787,22 +1790,10 @@ def _check(glob, expected):
_check(it, ["fileA"])
_check(p.rglob("fileB"), ["dirB/fileB"])
_check(p.rglob("*/fileA"), [])
if not os_helper.can_symlink():
_check(p.rglob("*/fileB"), ["dirB/fileB"])
else:
_check(p.rglob("*/fileB"), ["dirB/fileB", "dirB/linkD/fileB",
"linkB/fileB", "dirA/linkC/fileB"])
_check(p.rglob("*/fileB"), ["dirB/fileB"])
_check(p.rglob("file*"), ["fileA", "dirB/fileB",
"dirC/fileC", "dirC/dirD/fileD"])
if not os_helper.can_symlink():
_check(p.rglob("*/"), [
"dirA", "dirB", "dirC", "dirC/dirD", "dirE",
])
else:
_check(p.rglob("*/"), [
"dirA", "dirA/linkC", "dirB", "dirB/linkD", "dirC",
"dirC/dirD", "dirE", "linkB",
])
_check(p.rglob("*/"), ["dirA", "dirB", "dirC", "dirC/dirD", "dirE"])
_check(p.rglob(""), ["", "dirA", "dirB", "dirC", "dirE", "dirC/dirD"])

p = P(BASE, "dirC")
Expand All @@ -1816,6 +1807,33 @@ def _check(glob, expected):
_check(p.rglob("*.txt"), ["dirC/novel.txt"])
_check(p.rglob("*.*"), ["dirC/novel.txt"])

@os_helper.skip_unless_symlink
def test_rglob_follow_symlinks_common(self):
def _check(path, glob, expected):
actual = {path for path in path.rglob(glob, follow_symlinks=True)
if 'linkD' not in path.parts} # exclude symlink loop.
self.assertEqual(actual, { P(BASE, q) for q in expected })
P = self.cls
p = P(BASE)
_check(p, "fileB", ["dirB/fileB", "dirA/linkC/fileB", "linkB/fileB"])
_check(p, "*/fileA", [])
_check(p, "*/fileB", ["dirB/fileB", "dirA/linkC/fileB", "linkB/fileB"])
_check(p, "file*", ["fileA", "dirA/linkC/fileB", "dirB/fileB",
"dirC/fileC", "dirC/dirD/fileD", "linkB/fileB"])
_check(p, "*/", ["dirA", "dirA/linkC", "dirB", "dirC", "dirC/dirD", "dirE", "linkB"])
_check(p, "", ["", "dirA", "dirA/linkC", "dirB", "dirC", "dirE", "dirC/dirD", "linkB"])

p = P(BASE, "dirC")
_check(p, "*", ["dirC/fileC", "dirC/novel.txt",
"dirC/dirD", "dirC/dirD/fileD"])
_check(p, "file*", ["dirC/fileC", "dirC/dirD/fileD"])
_check(p, "*/*", ["dirC/dirD/fileD"])
_check(p, "*/", ["dirC/dirD"])
_check(p, "", ["dirC", "dirC/dirD"])
# gh-91616, a re module regression
_check(p, "*.txt", ["dirC/novel.txt"])
_check(p, "*.*", ["dirC/novel.txt"])

@os_helper.skip_unless_symlink
def test_rglob_symlink_loop(self):
# Don't get fooled by symlink loops (Issue #26012).
Expand Down Expand Up @@ -1856,8 +1874,8 @@ def test_glob_dotdot(self):
# ".." is not special in globs.
P = self.cls
p = P(BASE)
self.assertEqual(set(p.glob("..")), { P(BASE, "..") })
self.assertEqual(set(p.glob("dirA/../file*")), { P(BASE, "dirA/../fileA") })
self.assertEqual(set(p.glob("..")), set())
self.assertEqual(set(p.glob("dirA/../file*")), set())
self.assertEqual(set(p.glob("../xyzzy")), set())

@os_helper.skip_unless_symlink
Expand Down Expand Up @@ -3053,15 +3071,15 @@ def test_glob(self):
self.assertEqual(set(p.glob("FILEa")), { P(BASE, "fileA") })
self.assertEqual(set(p.glob("*a\\")), { P(BASE, "dirA") })
self.assertEqual(set(p.glob("F*a")), { P(BASE, "fileA") })
self.assertEqual(set(map(str, p.glob("FILEa"))), {f"{p}\\FILEa"})
self.assertEqual(set(map(str, p.glob("FILEa"))), {f"{p}\\fileA"})
self.assertEqual(set(map(str, p.glob("F*a"))), {f"{p}\\fileA"})

def test_rglob(self):
P = self.cls
p = P(BASE, "dirC")
self.assertEqual(set(p.rglob("FILEd")), { P(BASE, "dirC/dirD/fileD") })
self.assertEqual(set(p.rglob("*\\")), { P(BASE, "dirC/dirD") })
self.assertEqual(set(map(str, p.rglob("FILEd"))), {f"{p}\\dirD\\FILEd"})
self.assertEqual(set(map(str, p.rglob("FILEd"))), {f"{p}\\dirD\\fileD"})

def test_expanduser(self):
P = self.cls
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
Add *follow_symlinks* argument to :meth:`pathlib.Path.glob` and
:meth:`~pathlib.Path.rglob`, defaulting to false.
0