8000 GH-101362: Omit path anchor from `pathlib.PurePath()._parts` by barneygale · Pull Request #102476 · python/cpython · GitHub
[go: up one dir, main page]

Skip to content

GH-101362: Omit path anchor from pathlib.PurePath()._parts #102476

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Next Next commit
GH-101362: Omit path anchor from pathlib.PurePath()._parts
Improve performance of path construction by skipping the addition of the
path anchor (`drive + root`) to the internal `_parts` list. This change
allows us to simplify the implementations of `joinpath()`, `name`,
`parent`, and `parents` a little. The public `parts` tuple is unaffected.
  • Loading branch information
barneygale committed Mar 6, 2023
commit 8bf4600d524fc3e1f6bdad10843b06805f870c0d
40 changes: 20 additions & 20 deletions Lib/pathlib.py
Original file line number Diff line number Diff line change
Expand Up @@ -221,10 +221,7 @@ def __init__(self, path):
self._parts = path._parts

def __len__(self):
if self._drv or self._root:
return len(self._parts) - 1
else:
return len(self._parts)
return len(self._parts)

def __getitem__(self, idx):
if isinstance(idx, slice):
Expand Down Expand Up @@ -269,7 +266,7 @@ def __new__(cls, *args):
def __reduce__(self):
# Using the parts tuple helps share interned path parts
# when pickling related paths.
return (self.__class__, tuple(self._parts))
return (self.__class__, self.parts)

@classmethod
def _parse_parts(cls, parts):
Expand All @@ -295,8 +292,7 @@ def _parse_parts(cls, parts):
if drv.startswith(sep):
# pathlib assumes that UNC paths always have a root.
root = sep
unfiltered_parsed = [drv + root] + rel.split(sep)
parsed = [sys.intern(x) for x in unfiltered_parsed if x and x != '.']
parsed = [sys.intern(x) for x in rel.split(sep) if x and x != '.']
return drv, root, parsed

@classmethod
Expand All @@ -318,10 +314,11 @@ def _from_parsed_parts(cls, drv, root, parts):

@classmethod
def _format_parsed_parts(cls, drv, root, parts):
tail = cls._flavour.sep.join(parts)
if drv or root:
return drv + root + cls._flavour.sep.join(parts[1:])
return f'{drv}{root}{tail}'
else:
return cls._flavour.sep.join(parts)
return tail

def __str__(self):
"""Return the string representation of the path, suitable for
Expand Down Expand Up @@ -376,7 +373,7 @@ def _parts_normcase(self):
try:
return self._parts_normcase_cached
except AttributeError:
self._parts_normcase_cached = [self._flavour.normcase(p) for p in self._parts]
self._parts_normcase_cached = [self._flavour.normcase(p) for p in self.parts]
return self._parts_normcase_cached

def __eq__(self, other):
Expand Down Expand Up @@ -427,7 +424,7 @@ def anchor(self):
def name(self):
"""The final path component, if any."""
parts = self._parts
if len(parts) == (1 if (self._drv or self._root) else 0):
if not parts:
return ''
return parts[-1]

Expand Down Expand Up @@ -551,7 +548,10 @@ def parts(self):
try:
return self._parts_tuple
except AttributeError:
self._parts_tuple = tuple(self._parts)
if self._drv or self._root:
self._parts_tuple = (self._drv + self._root,) + tuple(self._parts)
else:
self._parts_tuple = tuple(self._parts)
return self._parts_tuple

def joinpath(self, *args):
Expand All @@ -564,13 +564,13 @@ def joinpath(self, *args):
drv2, root2, parts2 = self._parse_parts(args)
if root2:
if not drv2 and drv1:
return self._from_parsed_parts(drv1, root2, [drv1 + root2] + parts2[1:])
return self._from_parsed_parts(drv1, root2, parts2)
else:
return self._from_parsed_parts(drv2, root2, parts2)
elif drv2:
if drv2 == drv1 or self._flavour.normcase(drv2) == self._flavour.normcase(drv1):
# Same drive => second path is relative to the first.
return self._from_parsed_parts(drv1, root1, parts1 + parts2[1:])
return self._from_parsed_parts(drv1, root1, parts1 + parts2)
else:
return self._from_parsed_parts(drv2, root2, parts2)
else:
Expand All @@ -595,7 +595,7 @@ def parent(self):
drv = self._drv
root = self._root
parts = self._parts
if len(parts) == 1 and (drv or root):
if not parts:
return self
return self._from_parsed_parts(drv, root, parts[:-1])

Expand All @@ -622,7 +622,7 @@ def is_reserved(self):
# (e.g. r"..\NUL" is reserved but not r"foo\NUL" if "foo" does not
# exist). We err on the side of caution and return True for paths
# which are not considered reserved by Windows.
if self._parts[0].startswith('\\\\'):
if self._drv.startswith('\\\\'):
# UNC paths are never reserved.
return False
name = self._parts[-1].partition('.')[0].partition(':')[0].rstrip(' ')
Expand All @@ -632,12 +632,12 @@ def match(self, path_pattern):
"""
Return True if this path matches the given pattern.
"""
path_pattern = self._flavour.normcase(path_pattern)
drv, root, pat_parts = self._parse_parts((path_pattern,))
pat = type(self)(path_pattern)
pat_parts = pat._parts_normcase
if not pat_parts:
raise ValueError("empty pattern")
parts = self._parts_normcase
if drv or root:
if pat._drv or pat._root:
if len(pat_parts) != len(parts):
return False
elif len(pat_parts) > len(parts):
Expand Down Expand Up @@ -806,7 +806,7 @@ def absolute(self):
cwd = self._flavour.abspath(self._drv)
else:
cwd = os.getcwd()
return self._from_parts([cwd] + self._parts)
return self._from_parts((cwd,) + self.parts)

def resolve(self, strict=False):
"""
Expand Down
64 changes: 32 additions & 32 deletions Lib/test/test_pathlib.py
8000
Original file line number Diff line number Diff line change
Expand Up @@ -56,12 +56,12 @@ def test_parse_parts_common(self):
check(['a', '.', 'b'], ('', '', ['a', 'b']))
check(['a', '.', '.'], ('', '', ['a']))
# The first part is anchored.
check(['/a/b'], ('', sep, [sep, 'a', 'b']))
check(['/a', 'b'], ('', sep, [sep, 'a', 'b']))
check(['/a/', 'b'], ('', sep, [sep, 'a', 'b']))
check(['/a/b'], ('', sep, ['a', 'b']))
check(['/a', 'b'], ('', sep, ['a', 'b']))
check(['/a/', 'b'], ('', sep, ['a', 'b']))
# Ignoring parts before an anchored part.
check(['a', '/b', 'c'], ('', sep, [sep, 'b', 'c']))
check(['a', '/b', '/c'], ('', sep, [sep, 'c']))
check(['a', '/b', 'c'], ('', sep, ['b', 'c']))
check(['a', '/b', '/c'], ('', sep, ['c']))


class PosixFlavourTest(_BaseFlavourTest, unittest.TestCase):
Expand All @@ -72,9 +72,9 @@ def test_parse_parts(self):
check = self._check_parse_parts
# Collapsing of excess leading slashes, except for the double-slash
# special case.
check(['//a', 'b'], ('', '//', ['//', 'a', 'b']))
check(['///a', 'b'], ('', '/', ['/', 'a', 'b']))
check(['////a', 'b'], ('', '/', ['/', 'a', 'b']))
check(['//a', 'b'], ('', '//', ['a', 'b']))
check(['///a', 'b'], ('', '/', ['a', 'b']))
check(['////a', 'b'], ('', '/', ['a', 'b']))
# Paths which look like NT paths aren't treated specially.
check(['c:a'], ('', '', ['c:a']))
check(['c:\\a'], ('', '', ['c:\\a']))
Expand All @@ -88,40 +88,40 @@ class NTFlavourTest(_BaseFlavourTest, unittest.TestCase):
def test_parse_parts(self):
check = self._check_parse_parts
# First part is anchored.
check(['c:'], ('c:', '', ['c:']))
check(['c:/'], ('c:', '\\', ['c:\\']))
check(['/'], ('', '\\', ['\\']))
check(['c:a'], ('c:', '', ['c:', 'a']))
check(['c:/a'], ('c:', '\\', ['c:\\', 'a']))
check(['/a'], ('', '\\', ['\\', 'a']))
check(['c:'], ('c:', '', []))
check(['c:/'], ('c:', '\\', []))
check(['/'], ('', '\\', []))
check(['c:a'], ('c:', '', ['a']))
check(['c:/a'], ('c:', '\\', ['a']))
check(['/a'], ('', '\\', ['a']))
# UNC paths.
check(['//a/b'], ('\\\\a\\b', '\\', ['\\\\a\\b\\']))
check(['//a/b/ 8000 '], ('\\\\a\\b', '\\', ['\\\\a\\b\\']))
check(['//a/b/c'], ('\\\\a\\b', '\\', ['\\\\a\\b\\', 'c']))
check(['//a/b'], ('\\\\a\\b', '\\', []))
check(['//a/b/'], ('\\\\a\\b', '\\', []))
check(['//a/b/c'], ('\\\\a\\b', '\\', ['c']))
# Second part is anchored, so that the first part is ignored.
check(['a', 'Z:b', 'c'], ('Z:', '', ['Z:', 'b', 'c']))
check(['a', 'Z:/b', 'c'], ('Z:', '\\', ['Z:\\', 'b', 'c']))
check(['a', 'Z:b', 'c'], ('Z:', '', ['b', 'c']))
check(['a', 'Z:/b', 'c'], ('Z:', '\\', ['b', 'c']))
# UNC paths.
check(['a', '//b/c', 'd'], ('\\\\b\\c', '\\', ['\\\\b\\c\\', 'd']))
check(['a', '//b/c', 'd'], ('\\\\b\\c', '\\', ['d']))
# Collapsing and stripping excess slashes.
check(['a', 'Z://b//c/', 'd/'], ('Z:', '\\', ['Z:\\', 'b', 'c', 'd']))
check(['a', 'Z://b//c/', 'd/'], ('Z:', '\\', ['b', 'c', 'd']))
# UNC paths.
check(['a', '//b/c//', 'd'], ('\\\\b\\c', '\\', ['\\\\b\\c\\', 'd']))
check(['a', '//b/c//', 'd'], ('\\\\b\\c', '\\', ['d']))
# Extended paths.
check(['//?/c:/'], ('\\\\?\\c:', '\\', ['\\\\?\\c:\\']))
check(['//?/c:/a'], ('\\\\?\\c:', '\\', ['\\\\?\\c:\\', 'a']))
check(['//?/c:/a', '/b'], ('\\\\?\\c:', '\\', ['\\\\?\\c:\\', 'b']))
check(['//?/c:/'], ('\\\\?\\c:', '\\', []))
check(['//?/c:/a'], ('\\\\?\\c:', '\\', ['a']))
check(['//?/c:/a', '/b'], ('\\\\?\\c:', '\\', ['b']))
# Extended UNC paths (format is "\\?\UNC\server\share").
check(['//?/UNC/b/c'], ('\\\\?\\UNC\\b\\c', '\\', ['\\\\?\\UNC\\b\\c\\']))
check(['//?/UNC/b/c/d'], ('\\\\?\\UNC\\b\\c', '\\', ['\\\\?\\UNC\\b\\c\\', 'd']))
check(['//?/UNC/b/c'], ('\\\\?\\UNC\\b\\c', '\\', []))
check(['//?/UNC/b/c/d'], ('\\\\?\\UNC\\b\\c', '\\', ['d']))
# Second part has a root but not drive.
check(['a', '/b', 'c'], ('', '\\', ['\\', 'b', 'c']))
check(['Z:/a', '/b', 'c'], ('Z:', '\\', ['Z:\\', 'b', 'c']))
check(['//?/Z:/a', '/b', 'c'], ('\\\\?\\Z:', '\\', ['\\\\?\\Z:\\', 'b', 'c']))
check(['a', '/b', 'c'], ('', '\\', ['b', 'c']))
check(['Z:/a', '/b', 'c'], ('Z:', '\\', ['b', 'c']))
check(['//?/Z:/a', '/b', 'c'], ('\\\\?\\Z:', '\\', ['b', 'c']))
# Joining with the same drive => the first path is appended to if
# the second path is relative.
check(['c:/a/b', 'c:x/y'], ('c:', '\\', ['c:\\', 'a', 'b', 'x', 'y']))
check(['c:/a/b', 'c:/x/y'], ('c:', '\\', ['c:\\', 'x', 'y']))
check(['c:/a/b', 'c:x/y'], ('c:', '\\', ['a', 'b', 'x', 'y']))
check(['c:/a/b', 'c:/x/y'], ('c:', '\\', ['x', 'y']))


#
Expand Down
17A7
0