8000 bpo-43757: Make pathlib use os.path.realpath() to resolve symlinks in… · python/cpython@baecfbd · GitHub
[go: up one dir, main page]

Skip to content

Commit baecfbd

Browse files
authored
bpo-43757: Make pathlib use os.path.realpath() to resolve symlinks in a path (GH-25264)
Also adds a new "strict" argument to realpath() to avoid changing the default behaviour of pathlib while sharing the implementation.
1 parent 859577c commit baecfbd

File tree

7 files changed

+184
-109
lines changed

7 files changed

+184
-109
lines changed

Doc/library/os.path.rst

Lines changed: 15 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -344,22 +344,34 @@ the :mod:`glob` module.)
344344
Accepts a :term:`path-like object`.
345345

346346

347-
.. function:: realpath(path)
347+
.. function:: realpath(path, *, strict=False)
348348

349349
Return the canonical path of the specified filename, eliminating any symbolic
350350
links encountered in the path (if they are supported by the operating
351351
system).
352352

353+
If a path doesn't exist or a symlink loop is encountered, and *strict* is
354+
``True``, :exc:`OSError` is raised. If *strict* is ``False``, the path is
355+
resolved as far as possible and any remainder is appended without checking
356+
whether it exists.
357+
353358
.. note::
354-
When symbolic link cycles occur, the returned path will be one member of
355-
the cycle, but no guarantee is made about which member that will be.
359+
This function emulates the operating system's procedure for making a path
360+
canonical, which differs slightly between Windows and UNIX with respect
361+
to how links and subsequent path components interact.
362+
363+
Operating system APIs make paths canonical as needed, so it's not
364+
normally necessary to call this function.
356365

357366
.. versionchanged:: 3.6
358367
Accepts a :term:`path-like object`.
359368

360369
.. versionchanged:: 3.8
361370
Symbolic links and junctions are now resolved on Windows.
362371

372+
.. versionchanged:: 3.10
373+
The *strict* parameter was added.
374+
363375

364376
.. function:: relpath(path, start=os.curdir)
365377

Lib/ntpath.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -635,7 +635,7 @@ def _getfinalpathname_nonstrict(path):
635635
tail = join(name, tail) if tail else name
636636
return tail
637637

638-
def realpath(path):
638+
def realpath(path, *, strict=False):
639639
path = normpath(path)
640640
if isinstance(path, bytes):
641641
prefix = b'\\\\?\\'
@@ -660,6 +660,8 @@ def realpath(path):
660660
path = _getfinalpathname(path)
661661
initial_winerror = 0
662662
except OSError as ex:
663+
if strict:
664+
raise
663665
initial_winerror = ex.winerror
664666
path = _getfinalpathname_nonstrict(path)
665667
# The path returned by _getfinalpathname will always start with \\?\ -

Lib/pathlib.py

Lines changed: 30 additions & 95 deletions
Original file line numberDiff line numberDiff line change
@@ -14,12 +14,6 @@
1414
from urllib.parse import quote_from_bytes as urlquote_from_bytes
1515

1616

17-
if os.name == 'nt':
18-
from nt import _getfinalpathname
19-
else:
20-
_getfinalpathname = None
21-
22-
2317
__all__ = [
2418
"PurePath", "PurePosixPath", "PureWindowsPath",
2519
"Path", "PosixPath", "WindowsPath",
@@ -29,14 +23,17 @@
2923
# Internals
3024
#
3125

26+
_WINERROR_NOT_READY = 21 # drive exists but is not accessible
27+
_WINERROR_INVALID_NAME = 123 # fix for bpo-35306
28+
_WINERROR_CANT_RESOLVE_FILENAME = 1921 # broken symlink pointing to itself
29+
3230
# EBADF - guard against macOS `stat` throwing EBADF
3331
_IGNORED_ERROS = (ENOENT, ENOTDIR, EBADF, ELOOP)
3432

3533
_IGNORED_WINERRORS = (
36-
21, # ERROR_NOT_READY - drive exists but is not accessible
37-
123, # ERROR_INVALID_NAME - fix for bpo-35306
38-
1921, # ERROR_CANT_RESOLVE_FILENAME - fix for broken symlink pointing to itself
39-
)
34+
_WINERROR_NOT_READY,
35+
_WINERROR_INVALID_NAME,
36+
_WINERROR_CANT_RESOLVE_FILENAME)
4037

4138
def _ignore_error(exception):
4239
return (getattr(exception, 'errno', None) in _IGNORED_ERROS or
@@ -186,30 +183,6 @@ def casefold_parts(self, parts):
186183
def compile_pattern(self, pattern):
187184
return re.compile(fnmatch.translate(pattern), re.IGNORECASE).fullmatch
188185

189-
def resolve(self, path, strict=False):
190-
s = str(path)
191-
if not s:
192-
return path._accessor.getcwd()
193-
previous_s = None
194-
if _getfinalpathname is not None:
195-
if strict:
196-
return self._ext_to_normal(_getfinalpathname(s))
197-
else:
198-
tail_parts = [] # End of the path after the first one not found
199-
while True:
200-
try:
201-
s = self._ext_to_normal(_getfinalpathname(s))
202-
except FileNotFoundError:
203-
previous_s = s
204-
s, tail = os.path.split(s)
205-
tail_parts.append(tail)
206-
if previous_s == s:
207-
return path
208-
else:
209-
return os.path.join(s, *reversed(tail_parts))
210-
# Means fallback on absolute
211-
return None
212-
213186
def _split_extended_path(self, s, ext_prefix=ext_namespace_prefix):
214187
prefix = ''
215188
if s.startswith(ext_prefix):
@@ -220,10 +193,6 @@ def _split_extended_path(self, s, ext_prefix=ext_namespace_prefix):
220193
s = '\\' + s[3:]
221194
return prefix, s
222195

223-
def _ext_to_normal(self, s):
224-
# Turn back an extended path into a normal DOS-like path
225-
return self._split_extended_path(s)[1]
226-
227196
def is_reserved(self, parts):
228197
# NOTE: the rules for reserved names seem somewhat complicated
229198
# (e.g. r"..\NUL" is reserved but not r"foo\NUL").
@@ -281,54 +250,6 @@ def casefold_parts(self, parts):
281250
def compile_pattern(self, pattern):
282251
return re.compile(fnmatch.translate(pattern)).fullmatch
283252

284-
def resolve(self, path, strict=False):
285-
sep = self.sep
286-
accessor = path._accessor
287-
seen = {}
288-
def _resolve(path, rest):
289-
if rest.startswith(sep):
290-
path = ''
291-
292-
for name in rest.split(sep):
293-
if not name or name == '.':
294-
# current dir
295-
continue
296-
if name == '..':
297-
# parent dir
298-
path, _, _ = path.rpartition(sep)
299-
continue
300-
if path.endswith(sep):
301-
newpath = path + name
302-
else:
303-
newpath = path + sep + name
304-
if newpath in seen:
305-
# Already seen this path
306-
path = seen[newpath]
307-
if path is not None:
308-
# use cached value
309-
continue
310-
# The symlink is not resolved, so we must have a symlink loop.
311-
raise RuntimeError("Symlink loop from %r" % newpath)
312-
# Resolve the symbolic link
313-
try:
314-
target = accessor.readlink(newpath)
315-
except OSError as e:
316-
if e.errno != EINVAL and strict:
317-
raise
318-
# Not a symlink, or non-strict mode. We just leave the path
319-
# untouched.
320-
path = newpath
321-
else:
322-
seen[newpath] = None # not resolved symlink
323-
path = _resolve(path, target)
324-
seen[newpath] = path # resolved symlink
325-
326-
return path
327-
# NOTE: according to POSIX, getcwd() cannot contain path components
328-
# which are symlinks.
329-
base = '' if path.is_absolute() else accessor.getcwd()
330-
return _resolve(base, str(path)) or sep
331-
332253
def is_reserved(self, parts):
333254
return False
334255

@@ -424,6 +345,8 @@ def group(self, path):
424345

425346
expanduser = staticmethod(os.path.expanduser)
426347

348+
realpath = staticmethod(os.path.realpath)
349+
427350

428351
_normal_accessor = _NormalAccessor()
429352

@@ -1132,15 +1055,27 @@ def resolve(self, strict=False):
11321055
normalizing it (for example turning slashes into backslashes under
11331056
Windows).
11341057
"""
1135-
s = self._flavour.resolve(self, strict=strict)
1136-
if s is None:
1137-
# No symlink resolution => for consistency, raise an error if
1138-
# the path doesn't exist or is forbidden
1139-
self.stat()
1140-
s = str(self.absolute())
1141-
# Now we have no symlinks in the path, it's safe to normalize it.
1142-
normed = self._flavour.pathmod.normpath(s)
1143-
return self._from_parts((normed,))
1058+
1059+
def check_eloop(e):
1060+
winerror = getattr(e, 'winerror', 0)
1061+
if e.errno == ELOOP or winerror == _WINERROR_CANT_RESOLVE_FILENAME:
1062+
raise RuntimeError("Symlink loop from %r" % e.filename)
1063+
1064+
try:
1065+
s = self._accessor.realpath(self, strict=strict)
1066+
except OSError as e:
1067+
check_eloop(e)
1068+
raise
1069+
p = self._from_parts((s,))
1070+
1071+
# In non-strict mode, realpath() doesn't raise on symlink loops.
1072+
# Ensure we get an exception by calling stat()
1073+
if not strict:
1074+
try:
1075+
p.stat()
1076+
except OSError as e:
1077+
check_eloop(e)
1078+
return p
11441079

11451080
def stat(self, *, follow_symlinks=True):
11461081
"""

Lib/posixpath.py

Lines changed: 19 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -387,16 +387,16 @@ def abspath(path):
387387
# Return a canonical path (i.e. the absolute location of a file on the
388388
# filesystem).
389389

390-
def realpath(filename):
390+
def realpath(filename, *, strict=False):
391391
"""Return the canonical path of the specified filename, eliminating any
392392
symbolic links encountered in the path."""
393393
filename = os.fspath(filename)
394-
path, ok = _joinrealpath(filename[:0], filename, {})
394+
path, ok = _joinrealpath(filename[:0], filename, strict, {})
395395
return abspath(path)
396396

397397
# Join two paths, normalizing and eliminating any symbolic links
398398
# encountered in the second path.
399-
def _joinrealpath(path, rest, seen):
399+
def _joinrealpath(path, rest, strict, seen):
400400
if isinstance(path, bytes):
401401
sep = b'/'
402402
curdir = b'.'
@@ -425,7 +425,15 @@ def _joinrealpath(path, rest, seen):
425425
path = pardir
426426
continue
427427
newpath = join(path, name)
428-
if not islink(newpath):
428+
try:
429+
st = os.lstat(newpath)
430+
except OSError:
431+
if strict:
432+
raise
433+
is_link = False
434+
else:
435+
is_link = stat.S_ISLNK(st.st_mode)
436+
if not is_link:
429437
path = newpath
430438
continue
431439
# Resolve the symbolic link
@@ -436,10 +444,14 @@ def _joinrealpath(path, rest, seen):
436444
# use cached value
437445
continue
438446
# The symlink is not resolved, so we must have a symlink loop.
439-
# Return already resolved part + rest of the path unchanged.
440-
return join(newpath, rest), False
447+
if strict:
448+
# Raise OSError(errno.ELOOP)
449+
os.stat(newpath)
450+
else:
451+
# Return already resolved part + rest of the path unchanged.
452+
return join(newpath, rest), False
441453
seen[newpath] = None # not resolved symlink
442-
path, ok = _joinrealpath(path, os.readlink(newpath), seen)
454+
path, ok = _joinrealpath(path, os.readlink(newpath), strict, seen)
443455
if not ok:
444456
return join(path, rest), False
445457
seen[newpath] = path # resolved symlink

Lib/test/test_ntpath.py

Lines changed: 58 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -269,6 +269,17 @@ def test_realpath_basic(self):
269269
self.assertPathEqual(ntpath.realpath(os.fsencode(ABSTFN + "1")),
270270
os.fsencode(ABSTFN))
271271

272+
@os_helper.skip_unless_symlink
273+
@unittest.skipUnless(HAVE_GETFINALPATHNAME, 'need _getfinalpathname')
274+
def test_realpath_strict(self):
275+
# Bug #43757: raise FileNotFoundError in strict mode if we encounter
276+
# a path that does not exist.
277+
ABSTFN = ntpath.abspath(os_helper.TESTFN)
278+
os.symlink(ABSTFN + "1", ABSTFN)
279+
self.addCleanup(os_helper.unlink, ABSTFN)
280+
self.assertRaises(FileNotFoundError, ntpath.realpath, ABSTFN, strict=True)
281+
self.assertRaises(FileNotFoundError, ntpath.realpath, ABSTFN + "2", strict=True)
282+
272283
@os_helper.skip_unless_symlink
273284
@unittest.skipUnless(HAVE_GETFINALPATHNAME, 'need _getfinalpathname')
274285
def test_realpath_relative(self):
@@ -340,8 +351,9 @@ def test_realpath_broken_symlinks(self):
340351
@os_helper.skip_unless_symlink
341352
@unittest.skipUnless(HAVE_GETFINALPATHNAME, 'need _getfinalpathname')
342353
def test_realpath_symlink_loops(self):
343-
# Symlink loops are non-deterministic as to which path is returned, but
344-
# it will always be the fully resolved path of one member of the cycle
354+
# Symlink loops in non-strict mode are non-deterministic as to which
355+
# path is returned, but it will always be the fully resolved path of
356+
# one member of the cycle
345357
ABSTFN = ntpath.abspath(os_helper.TESTFN)
346358
self.addCleanup(os_helper.unlink, ABSTFN)
347359
self.addCleanup(os_helper.unlink, ABSTFN + "1")
@@ -383,6 +395,50 @@ def test_realpath_symlink_loops(self):
383395
# Test using relative path as well.
384396
self.assertPathEqual(ntpath.realpath(ntpath.basename(ABSTFN)), ABSTFN)
385397

398+
@os_helper.skip_unless_symlink
399+
@unittest.skipUnless(HAVE_GETFINALPATHNAME, 'need _getfinalpathname')
400+
def test_realpath_symlink_loops_strict(self):
401+
# Symlink loops raise OSError in strict mode
402+
ABSTFN = ntpath.abspath(os_helper.TESTFN)
403+
self.addCleanup(os_helper.unlink, ABSTFN)
404+
self.addCleanup(os_helper.unlink, ABSTFN + "1")
405+
self.addCleanup(os_helper.unlink, ABSTFN + "2")
406+
self.addCleanup(os_helper.unlink, ABSTFN + "y")
407+
self.addCleanup(os_helper.unlink, ABSTFN + "c")
408+
self.addCleanup(os_helper.unlink, ABSTFN + "a")
409+
410+
os.symlink(ABSTFN, ABSTFN)
411+
self.assertRaises(OSError, ntpath.realpath, ABSTFN, strict=True)
412+
413+
os.symlink(ABSTFN + "1", ABSTFN + "2")
414+
os.symlink(ABSTFN + "2", ABSTFN + "1")
415+
self.assertRaises(OSError, ntpath.realpath, ABSTFN + "1", strict=True)
416+
self.assertRaises(OSError, ntpath.realpath, ABSTFN + "2", strict=True)
417+
self.assertRaises(OSError, ntpath.realpath, ABSTFN + "1\\x", strict=True)
418+
# Windows eliminates '..' components before resolving links, so the
419+
# following call is not expected to raise.
420+
self.assertPathEqual(ntpath.realpath(ABSTFN + "1\\..", strict=True),
421+
ntpath.dirname(ABSTFN))
422+
self.assertRaises(OSError, ntpath.realpath, ABSTFN + "1\\..\\x", strict=True)
423+
os.symlink(ABSTFN + "x", ABSTFN + "y")
424+
self.assertRaises(OSError, ntpath.realpath, ABSTFN + "1\\..\\"
425+
+ ntpath.basename(ABSTFN) + "y",
426+
strict=True)
427+
self.assertRaises(OSError, ntpath.realpath,
428+
ABSTFN + "1\\..\\" + ntpath.basename(ABSTFN) + "1",
429+
strict=True)
430+
431+
os.symlink(ntpath.basename(ABSTFN) + "a\\b", ABSTFN + "a")
432+
self.assertRaises(OSError, ntpath.realpath, ABSTFN + "a", strict=True)
433+
434+
os.symlink("..\\" + ntpath.basename(ntpath.dirname(ABSTFN))
435+
+ "\\" + ntpath.basename(ABSTFN) + "c", ABSTFN + "c")
436+
self.assertRaises(OSError, ntpath.realpath, ABSTFN + "c", strict=True)
437+
438+
# Test using relative path as well.
439+
self.assertRaises(OSError, ntpath.realpath, ntpath.basename(ABSTFN),
440+
strict=True)
441+
386442
@os_helper.skip_unless_symlink
387443
@unittest.skipUnless(HAVE_GETFINALPATHNAME, 'need _getfinalpathname')
388444
def test_realpath_symlink_prefix(self):

0 commit comments

Comments
 (0)
0