From ea989440509f2bc81658a5a63a3fd856988825be Mon Sep 17 00:00:00 2001 From: barneygale Date: Mon, 4 Nov 2024 19:37:52 +0000 Subject: [PATCH 1/2] pathlib ABCs: defer path joining Defer joining of path segments in the private `PurePathBase` ABC. The new behaviour matches how the public `PurePath` class handles path segments. This slightly reduces the size of `PurePath` objects by eliminating a `_raw_path` slot. --- Lib/pathlib/_abc.py | 66 +++++++++++++++-------- Lib/pathlib/_local.py | 21 -------- Lib/test/test_pathlib/test_pathlib_abc.py | 5 -- 3 files changed, 44 insertions(+), 48 deletions(-) diff --git a/Lib/pathlib/_abc.py b/Lib/pathlib/_abc.py index f5eed6f025c250..287b7b2bc41936 100644 --- a/Lib/pathlib/_abc.py +++ b/Lib/pathlib/_abc.py @@ -99,7 +99,7 @@ class PathGlobber(_GlobberBase): @staticmethod def concat_path(path, text): """Appends text to the given path.""" - return path.with_segments(path._raw_path + text) + return path.with_segments(str(path) + text) class PurePathBase: @@ -112,9 +112,13 @@ class PurePathBase: """ __slots__ = ( - # The `_raw_path` slot store a joined string path. This is set in the - # `__init__()` method. - '_raw_path', + # The `_raw_paths` slot stores unjoined string paths. This is set in + # the `__init__()` method. + '_raw_paths', + + # The `_str` slot stores the string representation of the path, + # computed when `__str__()` is called for the first time. + '_str', # The '_resolving' slot stores a boolean indicating whether the path # is being processed by `PathBase.resolve()`. This prevents duplicate @@ -124,11 +128,14 @@ class PurePathBase: parser = ParserBase() _globber = PathGlobber - def __init__(self, path, *paths): - self._raw_path = self.parser.join(path, *paths) if paths else path - if not isinstance(self._raw_path, str): - raise TypeError( - f"path should be a str, not {type(self._raw_path).__name__!r}") + def __init__(self, arg, *args): + paths = [arg] + paths.extend(args) + for path in paths: + if not isinstance(path, str): + raise TypeError( + f"path should be a str, not {type(path).__name__!r}") + self._raw_paths = paths self._resolving = False def with_segments(self, *pathsegments): @@ -138,10 +145,25 @@ def with_segments(self, *pathsegments): """ return type(self)(*pathsegments) + @property + def _raw_path(self): + paths = self._raw_paths + if len(paths) == 0: + path = '' + elif len(paths) == 1: + path = paths[0] + else: + path = self.parser.join(*paths) + return path + def __str__(self): """Return the string representation of the path, suitable for passing to system calls.""" - return self._raw_path + try: + return self._str + except AttributeError: + self._str = self._raw_path + return self._str def as_posix(self): """Return the string representation of the path with forward (/) @@ -166,7 +188,7 @@ def anchor(self): @property def name(self): """The final path component, if any.""" - return self.parser.split(self._raw_path)[1] + return self.parser.split(str(self))[1] @property def suffix(self): @@ -202,7 +224,7 @@ def with_name(self, name): split = self.parser.split if split(name)[0]: raise ValueError(f"Invalid name {name!r}") - return self.with_segments(split(self._raw_path)[0], name) + return self.with_segments(split(str(self))[0], name) def with_stem(self, stem): """Return a new path with the stem changed.""" @@ -242,7 +264,7 @@ def relative_to(self, other, *, walk_up=False): anchor0, parts0 = self._stack anchor1, parts1 = other._stack if anchor0 != anchor1: - raise ValueError(f"{self._raw_path!r} and {other._raw_path!r} have different anchors") + raise ValueError(f"{str(self)!r} and {str(other)!r} have different anchors") while parts0 and parts1 and parts0[-1] == parts1[-1]: parts0.pop() parts1.pop() @@ -250,9 +272,9 @@ def relative_to(self, other, *, walk_up=False): if not part or part == '.': pass elif not walk_up: - raise ValueError(f"{self._raw_path!r} is not in the subpath of {other._raw_path!r}") + raise ValueError(f"{str(self)!r} is not in the subpath of {str(other)!r}") elif part == '..': - raise ValueError(f"'..' segment in {other._raw_path!r} cannot be walked") + raise ValueError(f"'..' segment in {str(other)!r} cannot be walked") else: parts0.append('..') return self.with_segments('', *reversed(parts0)) @@ -289,17 +311,17 @@ def joinpath(self, *pathsegments): paths) or a totally different path (if one of the arguments is anchored). """ - return self.with_segments(self._raw_path, *pathsegments) + return self.with_segments(*self._raw_paths, *pathsegments) def __truediv__(self, key): try: - return self.with_segments(self._raw_path, key) + return self.with_segments(*self._raw_paths, key) except TypeError: return NotImplemented def __rtruediv__(self, key): try: - return self.with_segments(key, self._raw_path) + return self.with_segments(key, *self._raw_paths) except TypeError: return NotImplemented @@ -311,7 +333,7 @@ def _stack(self): *parts* is a reversed list of parts following the anchor. """ split = self.parser.split - path = self._raw_path + path = str(self) parent, name = split(path) names = [] while path != parent: @@ -323,7 +345,7 @@ def _stack(self): @property def parent(self): """The logical parent of the path.""" - path = self._raw_path + path = str(self) parent = self.parser.split(path)[0] if path != parent: parent = self.with_segments(parent) @@ -335,7 +357,7 @@ def parent(self): def parents(self): """A sequence of this path's logical parents.""" split = self.parser.split - path = self._raw_path + path = str(self) parent = split(path)[0] parents = [] while path != parent: @@ -347,7 +369,7 @@ def parents(self): def is_absolute(self): """True if the path is absolute (has both a root and, if applicable, a drive).""" - return self.parser.isabs(self._raw_path) + return self.parser.isabs(str(self)) @property def _pattern_str(self): diff --git a/Lib/pathlib/_local.py b/Lib/pathlib/_local.py index 99474e1f71a307..bf52ac4d4c8a8b 100644 --- a/Lib/pathlib/_local.py +++ b/Lib/pathlib/_local.py @@ -68,10 +68,6 @@ class PurePath(PurePathBase): """ __slots__ = ( - # The `_raw_paths` slot stores unnormalized string paths. This is set - # in the `__init__()` method. - '_raw_paths', - # The `_drv`, `_root` and `_tail_cached` slots store parsed and # normalized parts of the path. They are set when any of the `drive`, # `root` or `_tail` properties are accessed for the first time. The @@ -81,11 +77,6 @@ class PurePath(PurePathBase): # tail are normalized. '_drv', '_root', '_tail_cached', - # The `_str` slot stores the string representation of the path, - # computed from the drive, root and tail when `__str__()` is called - # for the first time. It's used to implement `_str_normcase` - '_str', - # The `_str_normcase_cached` slot stores the string path with # normalized case. It is set when the `_str_normcase` property is # accessed for the first time. It's used to implement `__eq__()` @@ -299,18 +290,6 @@ def _parse_pattern(cls, pattern): parts.append('') return parts - @property - def _raw_path(self): - """The joined but unnormalized path.""" - paths = self._raw_paths - if len(paths) == 0: - path = '' - elif len(paths) == 1: - path = paths[0] - else: - path = self.parser.join(*paths) - return path - @property def drive(self): """The drive prefix (letter or UNC path), if any.""" diff --git a/Lib/test/test_pathlib/test_pathlib_abc.py b/Lib/test/test_pathlib/test_pathlib_abc.py index 4ab804850e9c3e..d155e7c5bb9935 100644 --- a/Lib/test/test_pathlib/test_pathlib_abc.py +++ b/Lib/test/test_pathlib/test_pathlib_abc.py @@ -86,11 +86,6 @@ def test_unsupported_operation_pure(self): p.suffix with self.assertRaises(e): p.suffixes - with self.assertRaises(e): - p / 'bar' - with self.assertRaises(e): - 'bar' / p - self.assertRaises(e, p.joinpath, 'bar') self.assertRaises(e, p.with_name, 'bar') self.assertRaises(e, p.with_stem, 'bar') self.assertRaises(e, p.with_suffix, '.txt') From 47e5a500c9d7db1c3b3f592d2bbd31c95affa97d Mon Sep 17 00:00:00 2001 From: barneygale Date: Mon, 4 Nov 2024 19:49:07 +0000 Subject: [PATCH 2/2] Simplify joined path caching. --- Lib/pathlib/_abc.py | 33 +++++++++++++-------------------- Lib/pathlib/_local.py | 14 +++++++++++--- 2 files changed, 24 insertions(+), 23 deletions(-) diff --git a/Lib/pathlib/_abc.py b/Lib/pathlib/_abc.py index 287b7b2bc41936..43e6624934b045 100644 --- a/Lib/pathlib/_abc.py +++ b/Lib/pathlib/_abc.py @@ -116,10 +116,6 @@ class PurePathBase: # the `__init__()` method. '_raw_paths', - # The `_str` slot stores the string representation of the path, - # computed when `__str__()` is called for the first time. - '_str', - # The '_resolving' slot stores a boolean indicating whether the path # is being processed by `PathBase.resolve()`. This prevents duplicate # work from occurring when `resolve()` calls `stat()` or `readlink()`. @@ -145,25 +141,22 @@ def with_segments(self, *pathsegments): """ return type(self)(*pathsegments) - @property - def _raw_path(self): - paths = self._raw_paths - if len(paths) == 0: - path = '' - elif len(paths) == 1: - path = paths[0] - else: - path = self.parser.join(*paths) - return path - def __str__(self): """Return the string representation of the path, suitable for passing to system calls.""" - try: - return self._str - except AttributeError: - self._str = self._raw_path - return self._str + paths = self._raw_paths + if len(paths) == 1: + return paths[0] + elif paths: + # Join path segments from the initializer. + path = self.parser.join(*paths) + # Cache the joined path. + paths.clear() + paths.append(path) + return path + else: + paths.append('') + return '' def as_posix(self): """Return the string representation of the path with forward (/) diff --git a/Lib/pathlib/_local.py b/Lib/pathlib/_local.py index bf52ac4d4c8a8b..b27f456d375225 100644 --- a/Lib/pathlib/_local.py +++ b/Lib/pathlib/_local.py @@ -77,6 +77,11 @@ class PurePath(PurePathBase): # tail are normalized. '_drv', '_root', '_tail_cached', + # The `_str` slot stores the string representation of the path, + # computed from the drive, root and tail when `__str__()` is called + # for the first time. It's used to implement `_str_normcase` + '_str', + # The `_str_normcase_cached` slot stores the string path with # normalized case. It is set when the `_str_normcase` property is # accessed for the first time. It's used to implement `__eq__()` @@ -296,7 +301,8 @@ def drive(self): try: return self._drv except AttributeError: - self._drv, self._root, self._tail_cached = self._parse_path(self._raw_path) + raw_path = PurePathBase.__str__(self) + self._drv, self._root, self._tail_cached = self._parse_path(raw_path) return self._drv @property @@ -305,7 +311,8 @@ def root(self): try: return self._root except AttributeError: - self._drv, self._root, self._tail_cached = self._parse_path(self._raw_path) + raw_path = PurePathBase.__str__(self) + self._drv, self._root, self._tail_cached = self._parse_path(raw_path) return self._root @property @@ -313,7 +320,8 @@ def _tail(self): try: return self._tail_cached except AttributeError: - self._drv, self._root, self._tail_cached = self._parse_path(self._raw_path) + raw_path = PurePathBase.__str__(self) + self._drv, self._root, self._tail_cached = self._parse_path(raw_path) return self._tail_cached @property