10000 GH-89727: Fix pathlib.Path.walk RecursionError on deep trees (GH-100282) · python/cpython@713df2c · GitHub
[go: up one dir, main page]

Skip to content

Commit 713df2c

Browse files
zmievsabarneygalebrettcannon
authored
GH-89727: Fix pathlib.Path.walk RecursionError on deep trees (GH-100282)
Use a stack to implement `pathlib.Path.walk()` iteratively instead of recursively to avoid hitting recursion limits on deeply nested trees. Co-authored-by: Barney Gale <barney.gale@gmail.com> Co-authored-by: Brett Cannon <brett@python.org>
1 parent af9c34f commit 713df2c

File tree

3 files changed

+54
-38
lines changed

3 files changed

+54
-38
lines changed

Lib/pathlib.py

Lines changed: 40 additions & 38 deletions
Original file line numberDiff line numberDiff line change
@@ -1197,45 +1197,47 @@ def expanduser(self):
11971197
def walk(self, top_down=True, on_error=None, follow_symlinks=False):
11981198
"""Walk the directory tree from this directory, similar to os.walk()."""
11991199
sys.audit("pathlib.Path.walk", self, on_error, follow_symlinks)
1200-
return self._walk(top_down, on_error, follow_symlinks)
1201-
1202-
def _walk(self, top_down, on_error, follow_symlinks):
1203-
# We may not have read permission for self, in which case we can't
1204-
# get a list of the files the directory contains. os.walk
1205-
# always suppressed the exception then, rather than blow up for a
1206-
# minor reason when (say) a thousand readable directories are still
1207-
# left to visit. That logic is copied here.
1208-
try:
1209-
scandir_it = self._scandir()
1210-
except OSError as error:
1211-
if on_error is not None:
1212-
on_error(error)
1213-
return
1214-
1215-
with scandir_it:
1216-
dirnames = []
1217-
filenames = []
1218-
for entry in scandir_it:
1219-
try:
1220-
is_dir = entry.is_dir(follow_symlinks=follow_symlinks)
1221-
except OSError:
1222-
# Carried over from os.path.isdir().
1223-
is_dir = False
1224-
1225-
if is_dir:
1226-
dirnames.append(entry.name)
1227-
else:
1228-
filenames.append(entry.name)
1229-
1230-
if top_down:
1231-
yield self, dirnames, filenames
1232-
1233-
for dirname in dirnames:
1234-
dirpath = self._make_child_relpath(dirname)
1235-
yield from dirpath._walk(top_down, on_error, follow_symlinks)
1200+
paths = [self]
1201+
1202+
while paths:
1203+
path = paths.pop()
1204+
if isinstance(path, tuple):
1205+
yield path
1206+
continue
1207+
1208+
# We may not have read permission for self, in which case we can't
1209+
# get a list of the files the directory contains. os.walk()
1210+
# always suppressed the exception in that instance, rather than
1211+
# blow up for a minor reason when (say) a thousand readable
1212+
# directories are still left to visit. That logic is copied here.
1213+
try:
1214+
scandir_it = path._scandir()
1215+
except OSError as error:
1216+
if on_error is not None:
1217+
on_error(error)
1218+
continue
1219+
1220+
with scandir_it:
1221+
dirnames = []
1222+
filenames = []
1223+
for entry in scandir_it:
1224+
try:
1225+
is_dir = entry.is_dir(follow_symlinks=follow_symlinks)
1226+
except OSError:
1227+
# Carried over from os.path.isdir().
1228+
is_dir = False
1229+
1230+
if is_dir:
1231+
dirnames.append(entry.name)
1232+
else:
1233+
filenames.append(entry.name)
1234+
1235+
if top_down:
1236+
yield path, dirnames, filenames
1237+
else:
1238+
paths.append((path, dirnames, filenames))
12361239

1237-
if not top_down:
1238-
yield self, dirnames, filenames
1240+
paths += [path._make_child_relpath(d) for d in reversed(dirnames)]
12391241

12401242

12411243
class PosixPath(Path, PurePosixPath):

Lib/test/test_pathlib.py

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@
1313
from unittest import mock
1414

1515
from test.support import import_helper
16+
from test.support import set_recursion_limit
1617
from test.support import is_emscripten, is_wasi
1718
from test.support import os_helper
1819
from test.support.os_helper import TESTFN, FakePath
@@ -2793,6 +2794,18 @@ def test_walk_many_open_files(self):
27932794
self.assertEqual(next(it), expected)
27942795
path = path / 'd'
27952796

2797+
def test_walk_above_recursion_limit(self):
2798+
recursion_limit = 40
2799+
# directory_depth > recursion_limit
2800+
directory_depth = recursion_limit + 10
2801+
base = pathlib.Path(os_helper.TESTFN, 'deep')
2802+
path = pathlib.Path(base, *(['d'] * directory_depth))
2803+
path.mkdir(parents=True)
2804+
2805+
with set_recursion_limit(recursion_limit):
2806+
list(base.walk())
2807+
list(base.walk(top_down=False))
2808+
27962809

27972810
class PathTest(_BasePathTest, unittest.TestCase):
27982811
cls = pathlib.Path
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
Fix pathlib.Path.walk RecursionError on deep directory trees by rewriting it using iteration instead of recursion.

0 commit comments

Comments
 (0)
0