8000 bpo-9949: Enable symlink traversal for ntpath.realpath (GH-15287) · python/cpython@c30c869 · GitHub
[go: up one dir, main page]

Skip to content

Commit c30c869

Browse files
bpo-9949: Enable symlink traversal for ntpath.realpath (GH-15287)
(cherry picked from commit 75e0649) Co-authored-by: Steve Dower <steve.dower@python.org>
1 parent 7e293f5 commit c30c869

File tree

8 files changed

+304
-32
lines changed

8 files changed

+304
-32
lines changed

Doc/library/os.path.rst

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -350,11 +350,19 @@ the :mod:`glob` module.)
350350
.. function:: realpath(path)
351351

352352
Return the canonical path of the specified filename, eliminating any symbolic
353-
links encountered in the path (if they are supported by the operating system).
353+
links encountered in the path (if they are supported by the operating
354+
system).
355+
356+
.. note::
357+
When symbolic link cycles occur, the returned path will be one member of
358+
the cycle, but no guarantee is made about which member that will be.
354359

355360
.. versionchanged:: 3.6
356361
Accepts a :term:`path-like object`.
357362

363+
.. versionchanged:: 3.8
364+
Symbolic links and junctions are now resolved on Windows.
365+
358366

359367
.. function:: relpath(path, start=os.curdir)
360368

Doc/whatsnew/3.8.rst

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -824,6 +824,9 @@ characters or bytes unrepresentable at the OS level.
824824
environment variable and does not use :envvar:`HOME`, which is not normally set
825825
for regular user accounts.
826826

827+
:func:`~os.path.realpath` on Windows now resolves reparse points, including
828+
symlinks and directory junctions.
829+
827830

828831
ncurses
829832
-------

Lib/ntpath.py

Lines changed: 88 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -519,8 +519,94 @@ def abspath(path):
519519
except (OSError, ValueError):
520520
return _abspath_fallback(path)
521521

522-
# realpath is a no-op on systems without islink support
523-
realpath = abspath
522+
try:
523+
from nt import _getfinalpathname, readlink as _nt_readlink
524+
except ImportError:
525+
# realpath is a no-op on systems without _getfinalpathname support.
526+
realpath = abspath
527+
else:
528+
def _readlink_deep(path, seen=None):
529+
if seen is None:
530+
seen = set()
531+
532+
while normcase(path) not in seen:
533+
seen.add(normcase(path))
534+
try:
535+
path = _nt_readlink(path)
536+
except OSError as ex:
537+
# Stop on file (2) or directory (3) not found, or
538+
# paths that are not reparse points (4390)
539+
if ex.winerror in (2, 3, 4390):
540+
break
541+
raise
542+
except ValueError:
543+
# Stop on reparse points that are not symlinks
544+
break
545+
return path
546+
547+
def _getfinalpathname_nonstrict(path):
548+
# Fast path to get the final path name. If this succeeds, there
549+
# is no need to go any further.
550+
try:
551+
return _getfinalpathname(path)
552+
except OSError:
553+
pass
554+
555+
# Allow file (2) or directory (3) not found, invalid syntax (123),
556+
# and symlinks that cannot be followed (1921)
557+
allowed_winerror = 2, 3, 123, 1921
558+
559+
# Non-strict algorithm is to find as much of the target directory
560+
# as we can and join the rest.
561+
tail = ''
562+
seen = set()
563+
while path:
564+
try:
565+
path = _readlink_deep(path, seen)
566+
path = _getfinalpathname(path)
567+
return join(path, tail) if tail else path
568+
except OSError as ex:
569+
if ex.winerror not in allowed_winerror:
570+
raise
571+
path, name = split(path)
572+
if path and not name:
573+
return abspath(path + tail)
574+
tail = join(name, tail) if tail else name
575+
return abspath(tail)
576+
577+
def realpath(path):
578+
path = os.fspath(path)
579+
if isinstance(path, bytes):
580+
prefix = b'\\\\?\\'
581+
unc_prefix = b'\\\\?\\UNC\\'
582+
new_unc_prefix = b'\\\\'
583+
cwd = os.getcwdb()
584+
else:
585+
prefix = '\\\\?\\'
586+
unc_prefix = '\\\\?\\UNC\\'
587+
new_unc_prefix = '\\\\'
588+
cwd = os.getcwd()
589+
had_prefix = path.startswith(prefix)
590+
path = _getfinalpathname_nonstrict(path)
591+
# The path returned by _getfinalpathname will always start with \\?\ -
592+
# strip off that prefix unless it was already provided on the original
593+
# path.
594+
if not had_prefix and path.startswith(prefix):
595+
# For UNC paths, the prefix will actually be \\?\UNC\
596+
# Handle that case as well.
597+
if path.startswith(unc_prefix):
598+
spath = new_unc_prefix + path[len(unc_prefix):]
599+
else:
600+
spath = path[len(prefix):]
601+
# Ensure that the non-prefixed path resolves to the same path
602+
try:
603+
if _getfinalpathname(spath) == path:
604+
path = spath
605+
except OSError as ex:
606+
pass
607+
return path
608+
609+
524610
# Win9x family and earlier have no Unicode filename support.
525611
supports_unicode_filenames = (hasattr(sys, "getwindowsversion") and
526612
sys.getwindowsversion()[3] >= 2)
@@ -633,23 +719,6 @@ def commonpath(paths):
633719
raise
634720

635721

636-
# determine if two files are in fact the same file
637-
try:
638-
# GetFinalPathNameByHandle is available starting with Windows 6.0.
639-
# Windows XP and non-Windows OS'es will mock _getfinalpathname.
640-
if sys.getwindowsversion()[:2] >= (6, 0):
641-
from nt import _getfinalpathname
642-
else:
643-
raise ImportError
644-
except (AttributeError, ImportError):
645-
# On Windows XP and earlier, two files are the same if their absolute
646-
# pathnames are the same.
647-
# Non-Windows operating systems fake this method with an XP
648-
# approximation.
649-
def _getfinalpathname(f):
650-
return normcase(abspath(f))
651-
652-
653722
try:
654723
# The genericpath.isdir implementation uses os.stat and checks the mode
655724
# attribute to tell whether or not the path is a directory.

Lib/test/test_ntpath.py

Lines changed: 195 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -7,13 +7,22 @@
77
from test import support, test_genericpath
88
from tempfile import TemporaryFile
99

10+
1011
try:
1112
import nt
1213
except ImportError:
1314
# Most tests can complete without the nt module,
1415
# but for those that require it we import here.
1516
nt = None
1617

18+
try:
19+
ntpath._getfinalpathname
20+
except AttributeError:
21+
HAVE_GETFINALPATHNAME = False
22+
else:
23+
HAVE_GETFINALPATHNAME = True
24+
25+
1726
def tester(fn, wantResult):
1827
fn = fn.replace("\\", "\\\\")
1928
gotResult = eval(fn)
@@ -194,6 +203,189 @@ def test_normpath(self):
194203
tester("ntpath.normpath('\\\\.\\NUL')", r'\\.\NUL')
195204
tester("ntpath.normpath('\\\\?\\D:/XY\\Z')", r'\\?\D:/XY\Z')
196205

206+
def test_realpath_curdir(self):
207+
expected = ntpath.normpath(os.getcwd())
208+
tester("ntpath.realpath('.')", expected)
209+
tester("ntpath.realpath('./.')", expected)
210+
tester("ntpath.realpath('/'.join(['.'] * 100))", expected)
211+
tester("ntpath.realpath('.\\.')", expected)
212+
tester("ntpath.realpath('\\'.join(['.'] * 100))", expected)
213+
214+
def test_realpath_pardir(self):
215+
expected = ntpath.normpath(os.getcwd())
216+
tester("ntpath.realpath('..')", ntpath.dirname(expected))
217+
tester("ntpath.realpath('../..')",
218+
ntpath.dirname(ntpath.dirname(expected)))
219+
tester("ntpath.realpath('/'.join(['..'] * 50))",
220+
ntpath.splitdrive(expected)[0] + '\\')
221+
tester("ntpath.realpath('..\\..')",
222+
ntpath.dirname(ntpath.dirname(expected)))
223+
tester("ntpath.realpath('\\'.join(['..'] * 50))",
224+
ntpath.splitdrive(expected)[0] + '\\')
225+
226+
@support.skip_unless_symlink
227+
@unittest.skipUnless(HAVE_GETFINALPATHNAME, 'need _getfinalpathname')
228+
def test_realpath_basic(self):
229+
ABSTFN = ntpath.abspath(support.TESTFN)
230+
open(ABSTFN, "wb").close()
231+
self.addCleanup(support.unlink, ABSTFN)
232+
self.addCleanup(support.unlink, ABSTFN + "1")
233+
234+
os.symlink(ABSTFN, ABSTFN + "1")
235+
self.assertEqual(ntpath.realpath(ABSTFN + "1"), ABSTFN)
236+
self.assertEqual(ntpath.realpath(os.fsencode(ABSTFN + "1")),
237+
os.fsencode(ABSTFN))
238+
239+
@support.skip_unless_symlink
240+
@unittest.skipUnless(HAVE_GETFINALPATHNAME, 'need _getfinalpathname')
241+
def test_realpath_relative(self):
242+
ABSTFN = ntpath.abspath(support.TESTFN)
243+
open(ABSTFN, "wb").close()
244+
self.addCleanup(support.unlink, ABSTFN)
245+
self.addCleanup(support.unlink, ABSTFN + "1")
246+
247+
os.symlink(ABSTFN, ntpath.relpath(ABSTFN + "1"))
248+
self.assertEqual(ntpath.realpath(ABSTFN + "1"), ABSTFN)
249+
250+
@support.skip_unless_symlink
251+
@unittest.skipUnless(HAVE_GETFINALPATHNAME, 'need _getfinalpathname')
252+
def test_realpath_broken_symlinks(self):
253+
ABSTFN = ntpath.abspath(support.TESTFN)
254+
os.mkdir(ABSTFN)
255+
self.addCleanup(support.rmtree, ABSTFN)
256+
257+
with support.change_cwd(ABSTFN):
258+
os.mkdir("subdir")
259+
os.chdir("subdir")
260+
os.symlink(".", "recursive")
261+
os.symlink("..", "parent")
262+
os.chdir("..")
263+
os.symlink(".", "self")
264+
os.symlink("missing", "broken")
265+
os.symlink(r"broken\bar", "broken1")
266+
os.symlink(r"self\self\broken", "broken2")
267+
os.symlink(r"subdir\parent\subdir\parent\broken", "broken3")
268+
os.symlink(ABSTFN + r"\broken", "broken4")
269+
os.symlink(r"recursive\..\broken", "broken5")
270+
271+
self.assertEqual(ntpath.realpath("broken"),
272+
ABSTFN + r"\missing")
273+
self.assertEqual(ntpath.realpath(r"broken\foo"),
274+
ABSTFN + r"\missing\foo")
275+
self.assertEqual(ntpath.realpath(r"broken1"),
276+
ABSTFN + r"\missing\bar")
277+
self.assertEqual(ntpath.realpath(r"broken1\baz"),
278+
ABSTFN + r"\missing\bar\baz")
279+
self.assertEqual(ntpath.realpath("broken2"),
280+
ABSTFN + r"\missing")
281+
self.assertEqual(ntpath.realpath("broken3"),
282+
ABSTFN + r"\missing")
283+
self.assertEqual(ntpath.realpath("broken4"),
284+
ABSTFN + r"\missing")
285+
self.assertEqual(ntpath.realpath("broken5"),
286+
ABSTFN + r"\missing")
287+
288+
self.assertEqual(ntpath.realpath(b"broken"),
289+
os.fsencode(ABSTFN + r"\missing"))
290+
self.assertEqual(ntpath.realpath(rb"broken\foo"),
291+
os.fsencode(ABSTFN + r"\missing\foo"))
292+
self.assertEqual(ntpath.realpath(rb"broken1"),
293+
os.fsencode(ABSTFN + r"\missing\bar"))
294+
self.assertEqual(ntpath.realpath(rb"broken1\baz"),
295+
os.fsencode(ABSTFN + r"\missing\bar\baz"))
296+
self.assertEqual(ntpath.realpath(b"broken2"),
297+
os.fsencode(ABSTFN + r"\missing"))
298+
self.assertEqual(ntpath.realpath(rb"broken3"),
299+
os.fsencode(ABSTFN + r"\missing"))
300+
self.assertEqual(ntpath.realpath(b"broken4"),
301+
os.fsencode(ABSTFN + r"\missing"))
302+
self.assertEqual(ntpath.realpath(b"broken5"),
303+
os.fsencode(ABSTFN + r"\missing"))
304+
305+
@support.skip_unless_symlink
306+
@unittest.skipUnless(HAVE_GETFINALPATHNAME, 'need _getfinalpathname')
307+
def test_realpath_symlink_loops(self):
308+
# Bug #930024, return the path unchanged if we get into an infinite
309+
# symlink loop.
310+
ABSTFN = ntpath.abspath(support.TESTFN)
311+
self.addCleanup(support.unlink, ABSTFN)
312+
self.addCleanup(support.unlink, ABSTFN + "1")
313+
self.addCleanup(support.unlink, ABSTFN + "2")
314+
self.addCleanup(support.unlink, ABSTFN + "y")
315+
self.addCleanup(support.unlink, ABSTFN + "c")
316+
self.addCleanup(support.unlink, ABSTFN + "a")
317+
318+
P = "\\\\?\\"
319+
320+
os.symlink(ABSTFN, ABSTFN)
321+
self.assertEqual(ntpath.realpath(ABSTFN), P + ABSTFN)
322+
323+
# cycles are non-deterministic as to which path is returned, but
324+
# it will always be the fully resolved path of one member of the cycle
325+
os.symlink(ABSTFN + "1", ABSTFN + "2")
326+
os.symlink(ABSTFN + "2", ABSTFN + "1")
327+
expected = (P + ABSTFN + "1", P + ABSTFN + "2")
328+
self.assertIn(ntpath.realpath(ABSTFN + "1"), expected)
329+
self.assertIn(ntpath.realpath(ABSTFN + "2"), expected)
330+
331+
self.assertIn(ntpath.realpath(ABSTFN + "1\\x"),
332+
(ntpath.join(r, "x") for r in expected))
333+
self.assertEqual(ntpath.realpath(ABSTFN + "1\\.."),
334+
ntpath.dirname(ABSTFN))
335+
self.assertEqual(ntpath.realpath(ABSTFN + "1\\..\\x"),
336+
ntpath.dirname(P + ABSTFN) + "\\x")
337+
os.symlink(ABSTFN + "x", ABSTFN + "y")
338+
self.assertEqual(ntpath.realpath(ABSTFN + "1\\..\\"
339+
+ ntpath.basename(ABSTFN) + "y"),
340+
P + ABSTFN + "x")
341+
self.assertIn(ntpath.realpath(ABSTFN + "1\\..\\"
342+
+ ntpath.basename(ABSTFN) + "1"),
343+
expected)
344+
345+
os.symlink(ntpath.basename(ABSTFN) + "a\\b", ABSTFN + "a")
346+
self.assertEqual(ntpath.realpath(ABSTFN + "a"), P + ABSTFN + "a")
347+
348+
os.symlink("..\\" + ntpath.basename(ntpath.dirname(ABSTFN))
349+
+ "\\" + ntpath.basename(ABSTFN) + "c", ABSTFN + "c")
350+
self.assertEqual(ntpath.realpath(ABSTFN + "c"), P + ABSTFN + "c")
351+
352+
# Test using relative path as well.
353+
self.assertEqual(ntpath.realpath(ntpath.basename(ABSTFN)), P + ABSTFN)
354+
355+
@support.skip_unless_symlink
356+
@unittest.skipUnless(HAVE_GETFINALPATHNAME, 'need _getfinalpathname')
357+
def test_realpath_symlink_prefix(self):
358+
ABSTFN = ntpath.abspath(support.TESTFN)
359+
self.addCleanup(support.unlink, ABSTFN + "3")
360+
self.addCleanup(support.unlink, "\\\\?\\" + ABSTFN + "3.")
361+
self.addCleanup(support.unlink, ABSTFN + "3link")
362+
self.addCleanup(support.unlink, ABSTFN + "3.link")
363+
364+
with open(ABSTFN + "3", "wb") as f:
365+
f.write(b'0')
366+
os.symlink(ABSTFN + "3& 10000 quot;, ABSTFN + "3link")
367+
368+
with open("\\\\?\\" + ABSTFN + "3.", "wb") as f:
369+
f.write(b'1')
370+
os.symlink("\\\\?\\" + ABSTFN + "3.", ABSTFN + "3.link")
371+
372+
self.assertEqual(ntpath.realpath(ABSTFN + "3link"),
373+
ABSTFN + "3")
374+
self.assertEqual(ntpath.realpath(ABSTFN + "3.link"),
375+
"\\\\?\\" + ABSTFN + "3.")
376+
377+
# Resolved paths should be usable to open target files
378+
with open(ntpath.realpath(ABSTFN + "3link"), "rb") as f:
379+
self.assertEqual(f.read(), b'0')
380+
with open(ntpath.realpath(ABSTFN + "3.link"), "rb") as f:
381+
self.assertEqual(f.read(), b'1')
382+
383+
# When the prefix is included, it is not stripped
384+
self.assertEqual(ntpath.realpath("\\\\?\\" + ABSTFN + "3link"),
385+
"\\\\?\\" + ABSTFN + "3")
386+
self.assertEqual(ntpath.realpath("\\\\?\\" + ABSTFN + "3.link"),
387+
"\\\\?\\" + ABSTFN + "3.")
388+
197389
def test_expandvars(self):
198390
with support.EnvironmentVarGuard() as env:
199391
env.clear()
@@ -288,11 +480,11 @@ def test_abspath(self):
288480

289481
def test_relpath(self):
290482
tester('ntpath.relpath("a")', 'a')
291-
tester('ntpath.relpath(os.path.abspath("a"))', 'a')
483+
tester('ntpath.relpath(ntpath.abspath("a"))', 'a')
292484
tester('ntpath.relpath("a/b")', 'a\\b')
293485
tester('ntpath.relpath("../a/b")', '..\\a\\b')
294486
with support.temp_cwd(support.TESTFN) as cwd_dir:
295-
currentdir = os.path.basename(cwd_dir)
487+
currentdir = ntpath.basename(cwd_dir)
296488
tester('ntpath.relpath("a", "../b")', '..\\'+currentdir+'\\a')
297489
tester('ntpath.relpath("a/b", "../c")', '..\\'+currentdir+'\\a\\b')
298490
tester('ntpath.relpath("a", "b/c")', '..\\..\\a')
@@ -417,7 +609,7 @@ def test_ismount(self):
417609
# locations below cannot then refer to mount points
418610
#
419611
drive, path = ntpath.splitdrive(sys.executable)
420-
with support.change_cwd(os.path.dirname(sys.executable)):
612+
with support.change_cwd(ntpath.dirname(sys.executable)):
421613
self.assertFalse(ntpath.ismount(drive.lower()))
422614
self.assertFalse(ntpath.ismount(drive.upper()))
423615

Lib/test/test_os.py

Lines changed: 1 addition & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -3358,10 +3358,7 @@ def test_oserror_filename(self):
33583358
if hasattr(os, "lchmod"):
33593359
funcs.append((self.filenames, os.lchmod, 0o777))
33603360
if hasattr(os, "readlink"):
3361-
if sys.platform == "win32":
3362-
funcs.append((self.unicode_filenames, os.readlink,))
3363-
else:
3364-
funcs.append((self.filenames, os.readlink,))
3361+
funcs.append((self.filenames, os.readlink,))
33653362

33663363

33673364
for filenames, func, *func_args in funcs:

0 commit comments

Comments
 (0)
0