8000 gh-89727: Fix os.fwalk RecursionError on deep trees by jonburdo · Pull Request #100347 · python/cpython · GitHub
[go: up one dir, main page]

Skip to content

gh-89727: Fix os.fwalk RecursionError on deep trees #100347

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 31 commits into from
Closed
Show file tree
Hide file tree
Changes from 1 commit
Commits
Show all changes
31 commits
Select commit Hold shift + click to select a range
d9f6f44
fix os.walk RecursionError on deep trees
jonburdo Dec 19, 2022
61b8078
use _WalkAction enum
jonburdo Dec 19, 2022
d5e2042
Merge branch 'main' into iterative-os-fwalk
jonburdo Dec 19, 2022
90c123b
fix formatting
jonburdo Dec 20, 2022
570818b
remove enum from os
jonburdo Dec 20, 2022
30aeed7
add blurb
jonburdo Dec 20, 2022
55e17b8
gh-89727: Fix os.walk RecursionError on deep trees (#99803)
jonburdo Dec 19, 2022
f41cef6
gh-69929: re docs: Add more specific definition of \w (#92015)
slateny Dec 20, 2022
33ba6a5
gh-89051: Add ssl.OP_LEGACY_SERVER_CONNECT (#93927)
graingert Dec 20, 2022
f9b6796
gh-88211: Change lower-case and upper-case to match recommendations i…
tbwolfe Dec 20, 2022
77d160f
gh-100348: Fix ref cycle in `asyncio._SelectorSocketTransport` with `…
rkojedzinszky Dec 20, 2022
db820a2
gh-99925: Fix inconsistency in `json.dumps()` error messages (GH-99926)
fnesveda Dec 20, 2022
c39ce63
Clarify that every thread has its own default context in contextvars …
pablogsal Dec 20, 2022
8d2befb
run test_walk_above_recursion_limit for fwalk 8000
jonburdo Dec 20, 2022
d29188e
set stack outside try-except in fwalk
jonburdo Dec 20, 2022
2cf7550
fix comments
jonburdo Dec 20, 2022
f2cca94
Merge branch 'main' into iterative-os-fwalk
jonburdo Dec 20, 2022
af18a1d
change ValueError to AssertionError
jonburdo Dec 20, 2022
9eedf9a
Merge branch 'main' into iterative-os-fwalk
jonburdo Mar 23, 2023
5ee50c6
add more reliable file descriptor closing logic in os.fwalk
jonburdo Mar 23, 2023
f0f9330
use a separate fd_stack for simpler cleanup and error handling
jonburdo Mar 23, 2023
ccfb955
Merge branch 'main' into iterative-os-fwalk
jonburdo Mar 23, 2023
598bdf9
run test_walk_above_recursion_limit with fwalk tests
jonburdo Mar 23, 2023
c158be3
Merge branch 'main' into iterative-os-fwalk
jonburdo Mar 24, 2023
6608a6a
make sure we don't close the same fd twice
jonburdo Mar 26, 2023
d026146
revert to using a single stack instead of a separate ffd stack
jonburdo Mar 26, 2023
762c03a
remove unused variable
jonburdo Mar 26, 2023
045fd88
get rid of unnecessary os._fwalk
jonburdo Apr 1, 2023
f3f793a
change except clause to finally clause in os.fwalk
jonburdo Apr 1, 2023
dfb9685
Merge branch 'main' into iterative-os-fwalk
AlexWaygood Apr 25, 2023
0ebc475
Merge branch 'main' into iterative-os-fwalk
erlend-aasland Jan 5, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
gh-89727: Fix os.walk RecursionError on deep trees (#99803)
Use a stack to implement os.walk iteratively instead of recursively to
avoid hitting recursion limits on deeply nested trees.
  • Loading branch information
jonburdo committed Dec 20, 2022
commit 55e17b83551db9414bd5418f577351333ff6292b
162 changes: 84 additions & 78 deletions Lib/os.py
Original file line number Diff line number Diff line change
Expand Up @@ -340,89 +340,95 @@ def walk(top, topdown=True, onerror=None, followlinks=False):

"""
sys.audit("os.walk", top, topdown, onerror, followlinks)
return _walk(fspath(top), topdown, onerror, followlinks)

def _walk(top, topdown, onerror, followlinks):
dirs = []
nondirs = []
walk_dirs = []

# We may not have read permission for top, in which case we can't
# get a list of the files the directory contains. os.walk
# always suppressed the exception then, rather than blow up for a
# minor reason when (say) a thousand readable directories are still
# left to visit. That logic is copied here.
try:
# Note that scandir is global in this module due
# to earlier import-*.
scandir_it = scandir(top)
except OSError as error:
if onerror is not None:
onerror(error)
return

with scandir_it:
while True:
try:
stack = [(False, fspath(top))]
islink, join = path.islink, path.join
while stack:
must_yield, top = stack.pop()
if must_yield:
yield top
continue

dirs = []
nondirs = []
walk_dirs = []

# We may not have read permission for top, in which case we can't
# get a list of the files the directory contains.
# We suppress the exception here, rather than blow up for a
# minor reason when (say) a thousand readable directories are still
# left to visit.
try:
scandir_it = scandir(top)
except OSError as error:
if onerror is not None:
onerror(error)
continue

cont = False
with scandir_it:
while True:
try:
entry = next(scandir_it)
except StopIteration:
try:
entry = next(scandir_it)
except StopIteration:
break
except OSError as error:
if onerror is not None:
onerror(error)
cont = True
break
except OSError as error:
if onerror is not None:
onerror(error)
return

try:
is_dir = entry.is_dir()
except OSError:
# If is_dir() raises an OSError, consider that the entry is not
# a directory, same behaviour than os.path.isdir().
is_dir = False

if is_dir:
dirs.append(entry.name)
else:
nondirs.append(entry.name)

if not topdown and is_dir:
# Bottom-up: recurse into sub-directory, but exclude symlinks to
# directories if followlinks is False
if followlinks:
walk_into = True
try:
is_dir = entry.is_dir()
except OSError:
# If is_dir() raises an OSError, consider the entry not to
# be a directory, same behaviour as os.path.isdir().
is_dir = False

if is_dir:
dirs.append(entry.name)
else:
try:
is_symlink = entry.is_symlink()
except OSError:
# If is_symlink() raises an OSError, consider that the
# entry is not a symbolic link, same behaviour than
# os.path.islink().
is_symlink = False
walk_into = not is_symlink

if walk_into:
walk_dirs.append(entry.path)

# Yield before recursion if going top down
if topdown:
yield top, dirs, nondirs

# Recurse into sub-directories
islink, join = path.islink, path.join
for dirname in dirs:
new_path = join(top, dirname)
# Issue #23605: os.path.islink() is used instead of caching
# entry.is_symlink() result during the loop on os.scandir() because
# the caller can replace the directory entry during the "yield"
# above.
if followlinks or not islink(new_path):
yield from _walk(new_path, topdown, onerror, followlinks)
else:
# Recurse into sub-directories
for new_path in walk_dirs:
yield from _walk(new_path, topdown, onerror, followlinks)
# Yield after recursion if going bottom up
yield top, dirs, nondirs
nondirs.append(entry.name)

if not topdown and is_dir:
# Bottom-up: traverse into sub-directory, but exclude
# symlinks to directories if followlinks is False
if followlinks:
walk_into = True
else:
try:
is_symlink = entry.is_symlink()
except OSError:
# If is_symlink() raises an OSError, consider the
# entry not to be a symbolic link, same behaviour
# as os.path.islink().
is_symlink = False
walk_into = not is_symlink

if walk_into:
walk_dirs.append(entry.path)
if cont:
continue

if topdown:
# Yield before sub-directory traversal if going top down
yield top, dirs, nondirs
# Traverse into sub-directories
for dirname in reversed(dirs):
new_path = join(top, dirname)
# bpo-23605: os.path.islink() is used instead of caching
# entry.is_symlink() result during the loop on os.scandir() because
# the caller can replace the directory entry during the "yield"
# above.
if followlinks or not islink(new_path):
stack.append((False, new_path))
else:
# Yield after sub-directory traversal if going bottom up
stack.append((True, (top, dirs, nondirs)))
# Traverse into sub-directories
for new_path in reversed(walk_dirs):
stack.append((False, new_path))

__all__.append("walk")

Expand Down
16 changes: 10 additions & 6 deletions Lib/test/support/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -2178,19 +2178,23 @@ def check_disallow_instantiation(testcase, tp, *args, **kwds):
testcase.assertRaisesRegex(TypeError, msg, tp, *args, **kwds)

@contextlib.contextmanager
def set_recursion_limit(limit):
"""Temporarily change the recursion limit."""
original_limit = sys.getrecursionlimit()
try:
sys.setrecursionlimit(limit)
yield
finally:
sys.setrecursionlimit(original_limit)

def infinite_recursion(max_depth=75):
"""Set a lower limit for tests that interact with infinite recursions
(e.g test_ast.ASTHelpers_Test.test_recursion_direct) since on some
debug windows builds, due to not enough functions being inlined the
stack size might not handle the default recursion limit (1000). See
bpo-11105 for details."""
return set_recursion_limit(max_depth)

original_depth = sys.getrecursionlimit()
try:
sys.setrecursionlimit(max_depth)
yield
finally:
sys.setrecursionlimit(original_depth)

def ignore_deprecations_from(module: str, *, like: str) -> object:
token = object()
Expand Down
43 changes: 43 additions & 0 deletions Lib/test/test_os.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@
from test.support import import_helper
from test.support import os_helper
from test.support import socket_helper
from test.support import set_recursion_limit
from test.support import warnings_helper
from platform import win32_is_iot

Expand Down Expand Up @@ -1471,6 +1472,46 @@ def test_walk_many_open_files(self):
self.assertEqual(next(it), expected)
p = os.path.join(p, 'd')

def test_walk_above_recursion_limit(self):
depth = 50
os.makedirs(os.path.join(self.walk_path, *(['d'] * depth)))
with set_recursion_limit(depth - 5):
all = list(self.walk(self.walk_path))

sub2_path = self.sub2_tree[0]
for root, dirs, files in all:
if root == sub2_path:
dirs.sort()
files.sort()

d_entries = []
d_path = self.walk_path
for _ in range(depth):
d_path = os.path.join(d_path, "d")
d_entries.append((d_path, ["d"], []))
d_entries[-1][1].clear()

# Sub-sequences where the order is known
sections = {
"SUB1": [
(self.sub1_path, ["SUB11"], ["tmp2"]),
(self.sub11_path, [], []),
],
"SUB2": [self.sub2_tree],
"d": d_entries,
}

# The ordering of sub-dirs is arbitrary but determines the order in
# which sub-sequences appear
dirs = all[0][1]
expected = [(self.walk_path, dirs, ["tmp1"])]
for d in dirs:
expected.extend(sections[d])

self.assertEqual(len(all), depth + 4)
self.assertEqual(sorted(dirs), ["SUB1", "SUB2", "d"])
self.assertEqual(all, expected)


@unittest.skipUnless(hasattr(os, 'fwalk'), "Test needs os.fwalk()")
class FwalkTests(WalkTests):
Expand Down Expand Up @@ -1545,6 +1586,8 @@ def test_fd_leak(self):

# fwalk() keeps file descriptors open
test_walk_many_open_files = None
# fwalk() still uses recursion
test_walk_above_recursion_limit = None


class BytesWalkTests(WalkTests):
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
Fix issue with :func:`os.walk` where a :exc:`RecursionError` would occur on
deep directory structures by adjusting the implementation of
:func:`os.walk` to be iterative instead of recursive.
0