GH-101362: Omit path anchor from pathlib.PurePath()._parts

Improve performance of path construction by skipping the addition of the path anchor (`drive + root`) to the internal `_parts` list. This change allows us to simplify the implementations of `joinpath()`, `name`, `parent`, and `parents` a little. The public `parts` tuple is unaffected.
python · barneygale · Apr 9, 2023 · Mar 6, 2023 · Mar 6, 2023 · Mar 6, 2023
commit 8bf4600d524fc3e1f6bdad10843b06805f870c0d
@@ -221,10 +221,7 @@ def __init__(self, path):
         self._parts = path._parts
 
     def __len__(self):
-        if self._drv or self._root:
-            return len(self._parts) - 1
-        else:
-            return len(self._parts)
+        return len(self._parts)
 
     def __getitem__(self, idx):
         if isinstance(idx, slice):
@@ -269,7 +266,7 @@ def __new__(cls, *args):
     def __reduce__(self):
         # Using the parts tuple helps share interned path parts
         # when pickling related paths.
-        return (self.__class__, tuple(self._parts))
+        return (self.__class__, self.parts)
 
     @classmethod
     def _parse_parts(cls, parts):
@@ -295,8 +292,7 @@ def _parse_parts(cls, parts):
         if drv.startswith(sep):
             # pathlib assumes that UNC paths always have a root.
             root = sep
-        unfiltered_parsed = [drv + root] + rel.split(sep)
-        parsed = [sys.intern(x) for x in unfiltered_parsed if x and x != '.']
+        parsed = [sys.intern(x) for x in rel.split(sep) if x and x != '.']
         return drv, root, parsed
 
     @classmethod
@@ -318,10 +314,11 @@ def _from_parsed_parts(cls, drv, root, parts):
 
     @classmethod
     def _format_parsed_parts(cls, drv, root, parts):
+        tail = cls._flavour.sep.join(parts)
         if drv or root:
-            return drv + root + cls._flavour.sep.join(parts[1:])
+            return f'{drv}{root}{tail}'
         else:
-            return cls._flavour.sep.join(parts)
+            return tail
 
     def __str__(self):
         """Return the string representation of the path, suitable for
@@ -376,7 +373,7 @@ def _parts_normcase(self):
         try:
             return self._parts_normcase_cached
         except AttributeError:
-            self._parts_normcase_cached = [self._flavour.normcase(p) for p in self._parts]
+            self._parts_normcase_cached = [self._flavour.normcase(p) for p in self.parts]
             return self._parts_normcase_cached
 
     def __eq__(self, other):
@@ -427,7 +424,7 @@ def anchor(self):
     def name(self):
         """The final path component, if any."""
         parts = self._parts
-        if len(parts) == (1 if (self._drv or self._root) else 0):
+        if not parts:
             return ''
         return parts[-1]
 
@@ -551,7 +548,10 @@ def parts(self):
         try:
             return self._parts_tuple
         except AttributeError:
-            self._parts_tuple = tuple(self._parts)
+            if self._drv or self._root:
+                self._parts_tuple = (self._drv + self._root,) + tuple(self._parts)
+            else:
+                self._parts_tuple = tuple(self._parts)
             return self._parts_tuple
 
     def joinpath(self, *args):
@@ -564,13 +564,13 @@ def joinpath(self, *args):
         drv2, root2, parts2 = self._parse_parts(args)
         if root2:
             if not drv2 and drv1:
-                return self._from_parsed_parts(drv1, root2, [drv1 + root2] + parts2[1:])
+                return self._from_parsed_parts(drv1, root2, parts2)
             else:
                 return self._from_parsed_parts(drv2, root2, parts2)
        elif drv2:
             if drv2 == drv1 or self._flavour.normcase(drv2) == self._flavour.normcase(drv1):
                 # Same drive => second path is relative to the first.
-                return self._from_parsed_parts(drv1, root1, parts1 + parts2[1:])
+                return self._from_parsed_parts(drv1, root1, parts1 + parts2)
             else:
                 return self._from_parsed_parts(drv2, root2, parts2)
         else:
@@ -595,7 +595,7 @@ def parent(self):
         drv = self._drv
         root = self._root
         parts = self._parts
-        if len(parts) == 1 and (drv or root):
+        if not parts:
             return self
         return self._from_parsed_parts(drv, root, parts[:-1])
 
@@ -622,7 +622,7 @@ def is_reserved(self):
         # (e.g. r"..\NUL" is reserved but not r"foo\NUL" if "foo" does not
         # exist). We err on the side of caution and return True for paths
         # which are not considered reserved by Windows.
-        if self._parts[0].startswith('\\\\'):
+        if self._drv.startswith('\\\\'):
             # UNC paths are never reserved.
             return False
         name = self._parts[-1].partition('.')[0].partition(':')[0].rstrip(' ')
@@ -632,12 +632,12 @@ def match(self, path_pattern):
         """
         Return True if this path matches the given pattern.
         """
-        path_pattern = self._flavour.normcase(path_pattern)
-        drv, root, pat_parts = self._parse_parts((path_pattern,))
+        pat = type(self)(path_pattern)
+        pat_parts = pat._parts_normcase
         if not pat_parts:
             raise ValueError("empty pattern")
         parts = self._parts_normcase
-        if drv or root:
+        if pat._drv or pat._root:
             if len(pat_parts) != len(parts):
                 return False
         elif len(pat_parts) > len(parts):
@@ -806,7 +806,7 @@ def absolute(self):
             cwd = self._flavour.abspath(self._drv)
         else:
             cwd = os.getcwd()
-        return self._from_parts([cwd] + self._parts)
+        return self._from_parts((cwd,) + self.parts)
 
     def resolve(self, strict=False):
         """

@@ -56,12 +56,12 @@ def test_parse_parts_common(self):
         check(['a', '.', 'b'],      ('', '', ['a', 'b']))
         check(['a', '.', '.'],      ('', '', ['a']))
         # The first part is anchored.
-        check(['/a/b'],             ('', sep, [sep, 'a', 'b']))
-        check(['/a', 'b'],          ('', sep, [sep, 'a', 'b']))
-        check(['/a/', 'b'],         ('', sep, [sep, 'a', 'b']))
+        check(['/a/b'],             ('', sep, ['a', 'b']))
+        check(['/a', 'b'],          ('', sep, ['a', 'b']))
+        check(['/a/', 'b'],         ('', sep, ['a', 'b']))
         # Ignoring parts before an anchored part.
-        check(['a', '/b', 'c'],     ('', sep, [sep, 'b', 'c']))
-        check(['a', '/b', '/c'],    ('', sep, [sep, 'c']))
+        check(['a', '/b', 'c'],     ('', sep, ['b', 'c']))
+        check(['a', '/b', '/c'],    ('', sep, ['c']))
 
 
 class PosixFlavourTest(_BaseFlavourTest, unittest.TestCase):
@@ -72,9 +72,9 @@ def test_parse_parts(self):
         check = self._check_parse_parts
         # Collapsing of excess leading slashes, except for the double-slash
         # special case.
-        check(['//a', 'b'],             ('', '//', ['//', 'a', 'b']))
-        check(['///a', 'b'],            ('', '/', ['/', 'a', 'b']))
-        check(['////a', 'b'],           ('', '/', ['/', 'a', 'b']))
+        check(['//a', 'b'],             ('', '//', ['a', 'b']))
+        check(['///a', 'b'],            ('', '/', ['a', 'b']))
+        check(['////a', 'b'],           ('', '/', ['a', 'b']))
         # Paths which look like NT paths aren't treated specially.
         check(['c:a'],                  ('', '', ['c:a']))
         check(['c:\\a'],                ('', '', ['c:\\a']))
@@ -88,40 +88,40 @@ class NTFlavourTest(_BaseFlavourTest, unittest.TestCase):
     def test_parse_parts(self):
         check = self._check_parse_parts
         # First part is anchored.
-        check(['c:'],                   ('c:', '', ['c:']))
-        check(['c:/'],                  ('c:', '\\', ['c:\\']))
-        check(['/'],                    ('', '\\', ['\\']))
-        check(['c:a'],                  ('c:', '', ['c:', 'a']))
-        check(['c:/a'],                 ('c:', '\\', ['c:\\', 'a']))
-        check(['/a'],                   ('', '\\', ['\\', 'a']))
+        check(['c:'],                   ('c:', '', []))
+        check(['c:/'],                  ('c:', '\\', []))
+        check(['/'],                    ('', '\\', []))
+        check(['c:a'],                  ('c:', '', ['a']))
+        check(['c:/a'],                 ('c:', '\\', ['a']))
+        check(['/a'],                   ('', '\\', ['a']))
         # UNC paths.
-        check(['//a/b'],                ('\\\\a\\b', '\\', ['\\\\a\\b\\']))
-        check(['//a/b/
8000
'],               ('\\\\a\\b', '\\', ['\\\\a\\b\\']))
-        check(['//a/b/c'],              ('\\\\a\\b', '\\', ['\\\\a\\b\\', 'c']))
+        check(['//a/b'],                ('\\\\a\\b', '\\', []))
+        check(['//a/b/'],               ('\\\\a\\b', '\\', []))
+        check(['//a/b/c'],              ('\\\\a\\b', '\\', ['c']))
         # Second part is anchored, so that the first part is ignored.
-        check(['a', 'Z:b', 'c'],        ('Z:', '', ['Z:', 'b', 'c']))
-        check(['a', 'Z:/b', 'c'],       ('Z:', '\\', ['Z:\\', 'b', 'c']))
+        check(['a', 'Z:b', 'c'],        ('Z:', '', ['b', 'c']))
+        check(['a', 'Z:/b', 'c'],       ('Z:', '\\', ['b', 'c']))
         # UNC paths.
-        check(['a', '//b/c', 'd'],      ('\\\\b\\c', '\\', ['\\\\b\\c\\', 'd']))
+        check(['a', '//b/c', 'd'],      ('\\\\b\\c', '\\', ['d']))
         # Collapsing and stripping excess slashes.
-        check(['a', 'Z://b//c/', 'd/'], ('Z:', '\\', ['Z:\\', 'b', 'c', 'd']))
+        check(['a', 'Z://b//c/', 'd/'], ('Z:', '\\', ['b', 'c', 'd']))
         # UNC paths.
-        check(['a', '//b/c//', 'd'],    ('\\\\b\\c', '\\', ['\\\\b\\c\\', 'd']))
+        check(['a', '//b/c//', 'd'],    ('\\\\b\\c', '\\', ['d']))
         # Extended paths.
-        check(['//?/c:/'],              ('\\\\?\\c:', '\\', ['\\\\?\\c:\\']))
-        check(['//?/c:/a'],             ('\\\\?\\c:', '\\', ['\\\\?\\c:\\', 'a']))
-        check(['//?/c:/a', '/b'],       ('\\\\?\\c:', '\\', ['\\\\?\\c:\\', 'b']))
+        check(['//?/c:/'],              ('\\\\?\\c:', '\\', []))
+        check(['//?/c:/a'],             ('\\\\?\\c:', '\\', ['a']))
+        check(['//?/c:/a', '/b'],       ('\\\\?\\c:', '\\', ['b']))
         # Extended UNC paths (format is "\\?\UNC\server\share").
-        check(['//?/UNC/b/c'],          ('\\\\?\\UNC\\b\\c', '\\', ['\\\\?\\UNC\\b\\c\\']))
-        check(['//?/UNC/b/c/d'],        ('\\\\?\\UNC\\b\\c', '\\', ['\\\\?\\UNC\\b\\c\\', 'd']))
+        check(['//?/UNC/b/c'],          ('\\\\?\\UNC\\b\\c', '\\', []))
+        check(['//?/UNC/b/c/d'],        ('\\\\?\\UNC\\b\\c', '\\', ['d']))
         # Second part has a root but not drive.
-        check(['a', '/b', 'c'],         ('', '\\', ['\\', 'b', 'c']))
-        check(['Z:/a', '/b', 'c'],      ('Z:', '\\', ['Z:\\', 'b', 'c']))
-        check(['//?/Z:/a', '/b', 'c'],  ('\\\\?\\Z:', '\\', ['\\\\?\\Z:\\', 'b', 'c']))
+        check(['a', '/b', 'c'],         ('', '\\', ['b', 'c']))
+        check(['Z:/a', '/b', 'c'],      ('Z:', '\\', ['b', 'c']))
+        check(['//?/Z:/a', '/b', 'c'],  ('\\\\?\\Z:', '\\', ['b', 'c']))
         # Joining with the same drive => the first path is appended to if
         # the second path is relative.
-        check(['c:/a/b', 'c:x/y'], ('c:', '\\', ['c:\\', 'a', 'b', 'x', 'y']))
-        check(['c:/a/b', 'c:/x/y'], ('c:', '\\', ['c:\\', 'x', 'y']))
+        check(['c:/a/b', 'c:x/y'], ('c:', '\\', ['a', 'b', 'x', 'y']))
+        check(['c:/a/b', 'c:/x/y'], ('c:', '\\', ['x', 'y']))
 
 
 #