8000 GH-125413: Add `pathlib.Path.scandir()` method (#126060) · python/cpython@260843d · GitHub
[go: up one dir, main page]

Skip to content
8000

Commit 260843d

Browse files
authored
GH-125413: Add pathlib.Path.scandir() method (#126060)
Add `pathlib.Path.scandir()` as a trivial wrapper of `os.scandir()`. This will be used to implement several `PathBase` methods more efficiently, including methods that provide `Path.copy()`.
1 parent d0abd0b commit 260843d

File tree

6 files changed

+114
-11
lines changed

6 files changed

+114
-11
lines changed

Doc/library/pathlib.rst

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1289,6 +1289,35 @@ Reading directories
12891289
raised.
12901290

12911291

1292+
.. method:: Path.scandir()
1293+
1294+
When the path points to a directory, return an iterator of
1295+
:class:`os.DirEntry` objects corresponding to entries in the directory. The
1296+
returned iterator supports the :term:`context manager` protocol. It is
1297+
implemented using :func:`os.scandir` and gives the same guarantees.
1298+
1299+
Using :meth:`~Path.scandir` instead of :meth:`~Path.iterdir` can
1300+
significantly increase the performance of code that also needs file type or
1301+
file attribute information, because :class:`os.DirEntry` objects expose
1302+
this information if the operating system provides it when scanning a
1303+
directory.
1304+
1305+
The following example displays the names of subdirectories. The
1306+
``entry.is_dir()`` check will generally not make an additional system call::
1307+
1308+
>>> p = Path('docs')
1309+
>>> with p.scandir() as entries:
1310+
... for entry in entries:
1311+
... if entry.is_dir():
1312+
... entry.name
1313+
...
1314+
'_templates'
1315+
'_build'
1316+
'_static'
1317+
1318+
.. versionadded:: 3.14
1319+
1320+
12921321
.. method:: Path.glob(pattern, *, case_sensitive=None, recurse_symlinks=False)
12931322

12941323
Glob the given relative *pattern* in the directory represented by this path,

Doc/whatsnew/3.14.rst

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -380,6 +380,12 @@ pathlib
380380

381381
(Contributed by Barney Gale in :gh:`73991`.)
382382

383+
* Add :meth:`pathlib.Path.scandir` to scan a directory and return an iterator
384+
of :class:`os.DirEntry` objects. This is exactly equivalent to calling
385+
:func:`os.scandir` on a path object.
386+
387+
(Contributed by Barney Gale in :gh:`125413`.)
388+
383389

384390
pdb
385391
---

Lib/pathlib/_abc.py

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -639,13 +639,23 @@ def write_text(self, data, encoding=None, errors=None, newline=None):
639639
with self.open(mode='w', encoding=encoding, errors=errors, newline=newline) as f:
640640
return f.write(data)
641641

642+
def scandir(self):
643+
"""Yield os.DirEntry objects of the directory contents.
644+
645+
The children are yielded in arbitrary order, and the
646+
special entries '.' and '..' are not included.
647+
"""
648+
raise UnsupportedOperation(self._unsupported_msg('scandir()'))
649+
642650
def iterdir(self):
643651
"""Yield path objects of the directory contents.
644652
645653
The children are yielded in arbitrary order, and the
646654
special entries '.' and '..' are not included.
647655
"""
648-
raise UnsupportedOperation(self._unsupported_msg('iterdir()'))
656+
with self.scandir() as entries:
657+
names = [entry.name for entry in entries]
658+
return map(self.joinpath, names)
649659

650660
def _glob_selector(self, parts, case_sensitive, recurse_symlinks):
651661
if case_sensitive is None:

Lib/pathlib/_local.py

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -615,6 +615,14 @@ def _filter_trailing_slash(self, paths):
615615
path_str = path_str[:-1]
616616
yield path_str
617617

618+
def scandir(self):
619+
"""Yield os.DirEntry objects of the directory contents.
620+
621+
The children are yielded in arbitrary order, and the
622+
special entries '.' and '..' are not included.
623+
"""
624+
return os.scandir(self)
625+
618626
def iterdir(self):
619627
"""Yield path objects of the directory contents.
620628

Lib/test/test_pathlib/test_pathlib_abc.py

Lines changed: 57 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
import collections
2+
import contextlib
23
import io
34
import os
45
import errno
@@ -1424,6 +1425,24 @@ def close(self):
14241425
'st_mode st_ino st_dev st_nlink st_uid st_gid st_size st_atime st_mtime st_ctime')
14251426

14261427

1428+
class DummyDirEntry:
1429+
"""
1430+
Minimal os.DirEntry-like object. Returned from DummyPath.scandir().
1431+
"""
1432+
__slots__ = ('name', '_is_symlink', '_is_dir')
1433+
1434+
def __init__(self, name, is_symlink, is_dir):
1435+
self.name = name
1436+
self._is_symlink = is_symlink
1437+
self._is_dir = is_dir
1438+
1439+
def is_symlink(self):
1440+
return self._is_symlink
1441+
1442+
def is_dir(self, *, follow_symlinks=True):
1443+
return self._is_dir and (follow_symlinks or not self._is_symlink)
1444+
1445+
14271446
class DummyPath(PathBase):
14281447
"""
14291448
Simple implementation of PathBase that keeps files and directories in
@@ -1491,14 +1510,25 @@ def open(self, mode='r', buffering=-1, encoding=None,
14911510
stream = io.TextIOWrapper(stream, encoding=encoding, errors=errors, newline=newline)
14921511
return stream
14931512

1494-
def iterdir(self):
1495-
path = str(self.resolve())
1496-
if path in self._files:
1497-
raise NotADirectoryError(errno.ENOTDIR, "Not a directory", path)
1498-
elif path in self._directories:
1499-
return iter([self / name for name in self._directories[path]])
1513+
@contextlib.contextmanager
1514+
def scandir(self):
1515+
path = self.resolve()
1516+
path_str = str(path)
1517+
if path_str in self._files:
1518+
raise NotADirectoryError(errno.ENOTDIR, "Not a directory", path_str)
1519+
elif path_str in self._directories:
1520+
yield iter([path.joinpath(name)._dir_entry for name in self._directories[path_str]])
15001521
else:
1501-
raise FileNotFoundError(errno.ENOENT, "File not found", path)
1522+
raise FileNotFoundError(errno.ENOENT, "File not found", path_str)
1523+
1524+
@property
1525+
def _dir_entry(self):
1526+
path_str = str(self)
1527+
is_symlink = path_str in self._symlinks
1528+
is_directory = (path_str in self._directories
1529+
if not is_symlink
1530+
else self._symlinks[path_str][1])
1531+
return DummyDirEntry(self.name, is_symlink, is_directory)
15021532

15031533
def mkdir(self, mode=0o777, parents=False, exist_ok=False):
15041534
path = str(self.parent.resolve() / self.name)
@@ -1602,7 +1632,7 @@ def setUp(self):
16021632
if self.can_symlink:
16031633
p.joinpath('linkA').symlink_to('fileA')
16041634
p.joinpath('brokenLink').symlink_to('non-existing')
1605-
p.joinpath('linkB').symlink_to('dirB')
1635+
p.joinpath('linkB').symlink_to('dirB', target_is_directory=True)
16061636
p.joinpath('dirA', 'linkC').symlink_to(parser.join('..', 'dirB'))
16071637
p.joinpath('dirB', 'linkD').symlink_to(parser.join('..', 'dirB'))
16081638
p.joinpath('brokenLinkLoop').symlink_to('brokenLinkLoop')
@@ -2187,6 +2217,23 @@ def test_iterdir_nodir(self):
21872217
self.assertIn(cm.exception.errno, (errno.ENOTDIR,
21882218
errno.ENOENT, errno.EINVAL))
21892219

2220+
def test_scandir(self):
2221+
p = self.cls(self.base)
2222+
with p.scandir() as entries:
2223+
self.assertTrue(list(entries))
2224+
with p.scandir() as entries:
2225+
for entry in entries:
2226+
child = p / entry.name
2227+
self.assertIsNotNone(entry)
2228+
self.assertEqual(entry.name, child.name)
2229+
self.assertEqual(entry.is_symlink(),
2230+
child.is_symlink())
2231+
self.assertEqual(entry.is_dir(follow_symlinks=False),
2232+
child.is_dir(follow_symlinks=False))
2233+
if entry.name != 'brokenLinkLoop':
2234+
self.assertEqual(entry.is_dir(), child.is_dir())
2235+
2236+
21902237
def test_glob_common(self):
21912238
def _check(glob, expected):
21922239
self.assertEqual(set(glob), { P(self.base, q) for q in expected })
@@ -3038,7 +3085,7 @@ class DummyPathWithSymlinks(DummyPath):
30383085
def readlink(self):
30393086
path = str(self.parent.resolve() / self.name)
30403087
if path in self._symlinks:
3041-
return self.with_segments(self._symlinks[path])
3088+
return self.with_segments(self._symlinks[path][0])
30423089
elif path in self._files or path in self._directories:
30433090
raise OSError(errno.EINVAL, "Not a symlink", path)
30443091
else:
@@ -3050,7 +3097,7 @@ def symlink_to(self, target, target_is_directory=False):
30503097
if path in self._symlinks:
30513098
raise FileExistsError(errno.EEXIST, "File exists", path)
30523099
self._directories[parent].add(self.name)
3053-
self._symlinks[path] = str(target)
3100+
self._symlinks[path] = str(target), target_is_directory
30543101

30553102

30563103
class DummyPathWithSymlinksTest(DummyPathTest):
Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
Add :meth:`pathlib.Path.scandir` method to efficiently fetch directory
2+
children and their file attributes. This is a trivial wrapper of
3+
:func:`os.scandir`.

0 commit comments

Comments
 (0)
0