From e987de93474d4d4dac85be9f5adc36c508bb3fb9 Mon Sep 17 00:00:00 2001 From: barneygale Date: Sat, 11 May 2024 20:26:38 +0100 Subject: [PATCH 1/3] GH-82805: Fix handling of single-dot file extensions in pathlib pathlib now treats "`.`" as a valid file extension (suffix). This brings it in line with `os.path.splitext()`. In the (private) pathlib ABCs, we add a new `ParserBase.splitext()` method that splits a path into a `(root, ext)` pair, like `os.path.splitext()`. This method is called by `PurePathBase.stem`, `suffix`, etc. In a future version of pathlib, we might make these base classes public, and so users will be able to define their own `splitext()` method to control file extension splitting. In `pathlib.PurePath` we add optimised `stem`, `suffix` and `suffixes` properties that don't use `splitext()`, which avoids computing the path base name twice. --- Doc/library/pathlib.rst | 13 ++++++ Lib/pathlib/_abc.py | 34 +++++++-------- Lib/pathlib/_local.py | 33 ++++++++++++++ Lib/test/test_pathlib/test_pathlib_abc.py | 43 +++++++++++-------- ...4-05-11-20-23-45.gh-issue-82805.F9bz4J.rst | 5 +++ 5 files changed, 93 insertions(+), 35 deletions(-) create mode 100644 Misc/NEWS.d/next/Library/2024-05-11-20-23-45.gh-issue-82805.F9bz4J.rst diff --git a/Doc/library/pathlib.rst b/Doc/library/pathlib.rst index 15e9eaa5190256..fc9adc5939d1b5 100644 --- a/Doc/library/pathlib.rst +++ b/Doc/library/pathlib.rst @@ -449,6 +449,10 @@ Pure paths provide the following methods and properties: This is commonly called the file extension. + .. versionchanged:: 3.14 + + A single dot ("``.``") is considered a valid suffix. + .. attribute:: PurePath.suffixes A list of the path's suffixes, often called file extensions:: @@ -460,6 +464,10 @@ Pure paths provide the following methods and properties: >>> PurePosixPath('my/library').suffixes [] + .. versionchanged:: 3.14 + + A single dot ("``.``") is considered a valid suffix. + .. attribute:: PurePath.stem @@ -713,6 +721,11 @@ Pure paths provide the following methods and properties: >>> p.with_suffix('') PureWindowsPath('README') + .. versionchanged:: 3.14 + + A single dot ("``.``") is considered a valid suffix. In previous + versions, :exc:`ValueError` is raised if a single dot is supplied. + .. method:: PurePath.with_segments(*pathsegments) diff --git a/Lib/pathlib/_abc.py b/Lib/pathlib/_abc.py index 06c10e8e4612ca..fe96c76795704e 100644 --- a/Lib/pathlib/_abc.py +++ b/Lib/pathlib/_abc.py @@ -88,6 +88,12 @@ def splitdrive(self, path): drive. Either part may be empty.""" raise UnsupportedOperation(self._unsupported_msg('splitdrive()')) + def splitext(self, path): + """Split the path into a pair (root, ext), where *ext* is empty or + begins with a begins with a period and contains at most one period, + and *root* is everything before the extension.""" + raise UnsupportedOperation(self._unsupported_msg('splitext()')) + def normcase(self, path): """Normalize the case of the path.""" raise UnsupportedOperation(self._unsupported_msg('normcase()')) @@ -171,12 +177,7 @@ def suffix(self): This includes the leading period. For example: '.txt' """ - name = self.name - i = name.rfind('.') - if 0 < i < len(name) - 1: - return name[i:] - else: - return '' + return self.parser.splitext(self.name)[1] @property def suffixes(self): @@ -185,21 +186,18 @@ def suffixes(self): These include the leading periods. For example: ['.tar', '.gz'] """ - name = self.name - if name.endswith('.'): - return [] - name = name.lstrip('.') - return ['.' + suffix for suffix in name.split('.')[1:]] + split = self.parser.splitext + stem, suffix = split(self.name) + suffixes = [] + while suffix: + suffixes.append(suffix) + stem, suffix = split(stem) + return suffixes[::-1] @property def stem(self): """The final path component, minus its last suffix.""" - name = self.name - i = name.rfind('.') - if 0 < i < len(name) - 1: - return name[:i] - else: - return name + return self.parser.splitext(self.name)[0] def with_name(self, name): """Return a new path with the file name changed.""" @@ -230,7 +228,7 @@ def with_suffix(self, suffix): elif not stem: # If the stem is empty, we can't make the suffix non-empty. raise ValueError(f"{self!r} has an empty name") - elif suffix.startswith('.') and len(suffix) > 1: + elif suffix.startswith('.'): return self.with_name(stem + suffix) else: raise ValueError(f"Invalid suffix {suffix!r}") diff --git a/Lib/pathlib/_local.py b/Lib/pathlib/_local.py index f2c627319d520f..e3cc1695632700 100644 --- a/Lib/pathlib/_local.py +++ b/Lib/pathlib/_local.py @@ -362,6 +362,39 @@ def with_name(self, name): tail[-1] = name return self._from_parsed_parts(self.drive, self.root, tail) + @property + def stem(self): + """The final path component, minus its last suffix.""" + name = self.name + i = name.rfind('.') + if name[:i].strip('.'): + return name[:i] + else: + return name + + @property + def suffix(self): + """ + The final component's last suffix, if any. + + This includes the leading period. For example: '.txt' + """ + name = self.name + i = name.rfind('.') + if name[:i].strip('.'): + return name[i:] + else: + return '' + + @property + def suffixes(self): + """ + A list of the final component's suffixes, if any. + + These include the leading periods. For example: ['.tar', '.gz'] + """ + return ['.' + ext for ext in self.name.lstrip('.').split('.')[1:]] + def relative_to(self, other, *, walk_up=False): """Return the relative path to another path identified by the passed arguments. If the operation is not possible (because this is not diff --git a/Lib/test/test_pathlib/test_pathlib_abc.py b/Lib/test/test_pathlib/test_pathlib_abc.py index aadecbc142cca6..519cceafbf1461 100644 --- a/Lib/test/test_pathlib/test_pathlib_abc.py +++ b/Lib/test/test_pathlib/test_pathlib_abc.py @@ -50,6 +50,7 @@ def test_unsupported_operation(self): self.assertRaises(e, m.join, 'foo') self.assertRaises(e, m.split, 'foo') self.assertRaises(e, m.splitdrive, 'foo') + self.assertRaises(e, m.splitext, 'foo') self.assertRaises(e, m.normcase, 'foo') self.assertRaises(e, m.isabs, 'foo') @@ -789,8 +790,10 @@ def test_suffix_common(self): self.assertEqual(P('/a/.hg.rc').suffix, '.rc') self.assertEqual(P('a/b.tar.gz').suffix, '.gz') self.assertEqual(P('/a/b.tar.gz').suffix, '.gz') - self.assertEqual(P('a/Some name. Ending with a dot.').suffix, '') - self.assertEqual(P('/a/Some name. Ending with a dot.').suffix, '') + self.assertEqual(P('a/trailing.dot.').suffix, '.') + self.assertEqual(P('/a/trailing.dot.').suffix, '.') + self.assertEqual(P('a/..d.o.t..').suffix, '.') + self.assertEqual(P('a/inn.er..dots').suffix, '.dots') @needs_windows def test_suffix_windows(self): @@ -807,8 +810,8 @@ def test_suffix_windows(self): self.assertEqual(P('c:/a/.hg.rc').suffix, '.rc') self.assertEqual(P('c:a/b.tar.gz').suffix, '.gz') self.assertEqual(P('c:/a/b.tar.gz').suffix, '.gz') - self.assertEqual(P('c:a/Some name. Ending with a dot.').suffix, '') - self.assertEqual(P('c:/a/Some name. Ending with a dot.').suffix, '') + self.assertEqual(P('c:a/trailing.dot.').suffix, '.') + self.assertEqual(P('c:/a/trailing.dot.').suffix, '.') self.assertEqual(P('//My.py/Share.php').suffix, '') self.assertEqual(P('//My.py/Share.php/a/b').suffix, '') @@ -828,8 +831,10 @@ def test_suffixes_common(self): self.assertEqual(P('/a/.hg.rc').suffixes, ['.rc']) self.assertEqual(P('a/b.tar.gz').suffixes, ['.tar', '.gz']) self.assertEqual(P('/a/b.tar.gz').suffixes, ['.tar', '.gz']) - self.assertEqual(P('a/Some name. Ending with a dot.').suffixes, []) - self.assertEqual(P('/a/Some name. Ending with a dot.').suffixes, []) + self.assertEqual(P('a/trailing.dot.').suffixes, ['.dot', '.']) + self.assertEqual(P('/a/trailing.dot.').suffixes, ['.dot', '.']) + self.assertEqual(P('a/..d.o.t..').suffixes, ['.o', '.t', '.', '.']) + self.assertEqual(P('a/inn.er..dots').suffixes, ['.er', '.', '.dots']) @needs_windows def test_suffixes_windows(self): @@ -848,8 +853,8 @@ def test_suffixes_windows(self): self.assertEqual(P('c:/a/b.tar.gz').suffixes, ['.tar', '.gz']) self.assertEqual(P('//My.py/Share.php').suffixes, []) self.assertEqual(P('//My.py/Share.php/a/b').suffixes, []) - self.assertEqual(P('c:a/Some name. Ending with a dot.').suffixes, []) - self.assertEqual(P('c:/a/Some name. Ending with a dot.').suffixes, []) + self.assertEqual(P('c:a/trailing.dot.').suffixes, ['.dot', '.']) + self.assertEqual(P('c:/a/trailing.dot.').suffixes, ['.dot', '.']) def test_stem_empty(self): P = self.cls @@ -865,8 +870,9 @@ def test_stem_common(self): self.assertEqual(P('a/.hgrc').stem, '.hgrc') self.assertEqual(P('a/.hg.rc').stem, '.hg') self.assertEqual(P('a/b.tar.gz').stem, 'b.tar') - self.assertEqual(P('a/Some name. Ending with a dot.').stem, - 'Some name. Ending with a dot.') + self.assertEqual(P('a/trailing.dot.').stem, 'trailing.dot') + self.assertEqual(P('a/..d.o.t..').stem, '..d.o.t.') + self.assertEqual(P('a/inn.er..dots').stem, 'inn.er.') @needs_windows def test_stem_windows(self): @@ -880,8 +886,7 @@ def test_stem_windows(self): self.assertEqual(P('c:a/.hgrc').stem, '.hgrc') self.assertEqual(P('c:a/.hg.rc').stem, '.hg') self.assertEqual(P('c:a/b.tar.gz').stem, 'b.tar') - self.assertEqual(P('c:a/Some name. Ending with a dot.').stem, - 'Some name. Ending with a dot.') + self.assertEqual(P('c:a/trailing.dot.').stem, 'trailing.dot') def test_with_name_common(self): P = self.cls self.assertEqual(P('a/b').with_name('d.xml'), P('a/d.xml')) @@ -929,16 +934,16 @@ def test_with_stem_common(self): self.assertEqual(P('a/b.py').with_stem('d'), P('a/d.py')) self.assertEqual(P('/a/b.py').with_stem('d'), P('/a/d.py')) self.assertEqual(P('/a/b.tar.gz').with_stem('d'), P('/a/d.gz')) - self.assertEqual(P('a/Dot ending.').with_stem('d'), P('a/d')) - self.assertEqual(P('/a/Dot ending.').with_stem('d'), P('/a/d')) + self.assertEqual(P('a/Dot ending.').with_stem('d'), P('a/d.')) + self.assertEqual(P('/a/Dot ending.').with_stem('d'), P('/a/d.')) @needs_windows def test_with_stem_windows(self): P = self.cls self.assertEqual(P('c:a/b').with_stem('d'), P('c:a/d')) self.assertEqual(P('c:/a/b').with_stem('d'), P('c:/a/d')) - self.assertEqual(P('c:a/Dot ending.').with_stem('d'), P('c:a/d')) - self.assertEqual(P('c:/a/Dot ending.').with_stem('d'), P('c:/a/d')) + self.assertEqual(P('c:a/Dot ending.').with_stem('d'), P('c:a/d.')) + self.assertEqual(P('c:/a/Dot ending.').with_stem('d'), P('c:/a/d.')) self.assertRaises(ValueError, P('c:').with_stem, 'd') self.assertRaises(ValueError, P('c:/').with_stem, 'd') self.assertRaises(ValueError, P('//My/Share').with_stem, 'd') @@ -974,6 +979,11 @@ def test_with_suffix_common(self): # Stripping suffix. self.assertEqual(P('a/b.py').with_suffix(''), P('a/b')) self.assertEqual(P('/a/b').with_suffix(''), P('/a/b')) + # Single dot + self.assertEqual(P('a/b').with_suffix('.'), P('a/b.')) + self.assertEqual(P('/a/b').with_suffix('.'), P('/a/b.')) + self.assertEqual(P('a/b.py').with_suffix('.'), P('a/b.')) + self.assertEqual(P('/a/b.py').with_suffix('.'), P('/a/b.')) @needs_windows def test_with_suffix_windows(self): @@ -1011,7 +1021,6 @@ def test_with_suffix_seps(self): # Invalid suffix. self.assertRaises(ValueError, P('a/b').with_suffix, 'gz') self.assertRaises(ValueError, P('a/b').with_suffix, '/') - self.assertRaises(ValueError, P('a/b').with_suffix, '.') self.assertRaises(ValueError, P('a/b').with_suffix, '/.gz') self.assertRaises(ValueError, P('a/b').with_suffix, 'c/d') self.assertRaises(ValueError, P('a/b').with_suffix, '.c/.d') diff --git a/Misc/NEWS.d/next/Library/2024-05-11-20-23-45.gh-issue-82805.F9bz4J.rst b/Misc/NEWS.d/next/Library/2024-05-11-20-23-45.gh-issue-82805.F9bz4J.rst new file mode 100644 index 00000000000000..8715deda7d9c41 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2024-05-11-20-23-45.gh-issue-82805.F9bz4J.rst @@ -0,0 +1,5 @@ +Support single-dot file extensions in :attr:`pathlib.PurePath.suffix` and +related attributes and methods. For example, the +:attr:`~pathlib.PurePath.suffixes` of ``PurePath('foo.bar.')`` are now +``['.bar', '.']`` rather than ``[]``. This brings file extension splitting +in line with :func:`os.path.splitext`. From ee29f29030a548f90bfcc2ad913da71c2991062b Mon Sep 17 00:00:00 2001 From: barneygale Date: Sat, 11 May 2024 21:17:16 +0100 Subject: [PATCH 2/3] Fix bugs in stem/suffix. --- Lib/pathlib/_local.py | 16 ++++++++-------- Lib/test/test_pathlib/test_pathlib_abc.py | 7 +++++++ 2 files changed, 15 insertions(+), 8 deletions(-) diff --git a/Lib/pathlib/_local.py b/Lib/pathlib/_local.py index e3cc1695632700..41e60156d9b556 100644 --- a/Lib/pathlib/_local.py +++ b/Lib/pathlib/_local.py @@ -367,10 +367,11 @@ def stem(self): """The final path component, minus its last suffix.""" name = self.name i = name.rfind('.') - if name[:i].strip('.'): - return name[:i] - else: - return name + if i != -1: + stem = name[:i] + if stem.strip('.'): + return stem + return name @property def suffix(self): @@ -379,12 +380,11 @@ def suffix(self): This includes the leading period. For example: '.txt' """ - name = self.name + name = self.name.lstrip('.') i = name.rfind('.') - if name[:i].strip('.'): + if i != -1: return name[i:] - else: - return '' + return '' @property def suffixes(self): diff --git a/Lib/test/test_pathlib/test_pathlib_abc.py b/Lib/test/test_pathlib/test_pathlib_abc.py index 519cceafbf1461..be83112582da81 100644 --- a/Lib/test/test_pathlib/test_pathlib_abc.py +++ b/Lib/test/test_pathlib/test_pathlib_abc.py @@ -794,6 +794,8 @@ def test_suffix_common(self): self.assertEqual(P('/a/trailing.dot.').suffix, '.') self.assertEqual(P('a/..d.o.t..').suffix, '.') self.assertEqual(P('a/inn.er..dots').suffix, '.dots') + self.assertEqual(P('photo').suffix, '') + self.assertEqual(P('photo.jpg').suffix, '.jpg') @needs_windows def test_suffix_windows(self): @@ -835,6 +837,8 @@ def test_suffixes_common(self): self.assertEqual(P('/a/trailing.dot.').suffixes, ['.dot', '.']) self.assertEqual(P('a/..d.o.t..').suffixes, ['.o', '.t', '.', '.']) self.assertEqual(P('a/inn.er..dots').suffixes, ['.er', '.', '.dots']) + self.assertEqual(P('photo').suffixes, []) + self.assertEqual(P('photo.jpg').suffixes, ['.jpg']) @needs_windows def test_suffixes_windows(self): @@ -873,6 +877,8 @@ def test_stem_common(self): self.assertEqual(P('a/trailing.dot.').stem, 'trailing.dot') self.assertEqual(P('a/..d.o.t..').stem, '..d.o.t.') self.assertEqual(P('a/inn.er..dots').stem, 'inn.er.') + self.assertEqual(P('photo').stem, 'photo') + self.assertEqual(P('photo.jpg').stem, 'photo') @needs_windows def test_stem_windows(self): @@ -887,6 +893,7 @@ def test_stem_windows(self): self.assertEqual(P('c:a/.hg.rc').stem, '.hg') self.assertEqual(P('c:a/b.tar.gz').stem, 'b.tar') self.assertEqual(P('c:a/trailing.dot.').stem, 'trailing.dot') + def test_with_name_common(self): P = self.cls self.assertEqual(P('a/b').with_name('d.xml'), P('a/d.xml')) From 8d4ff7b6303511f6970bdc637966cd345341201d Mon Sep 17 00:00:00 2001 From: barneygale Date: Sun, 12 May 2024 20:03:09 +0100 Subject: [PATCH 3/3] strip() --> lstrip(), comment. --- Lib/pathlib/_local.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/Lib/pathlib/_local.py b/Lib/pathlib/_local.py index 41e60156d9b556..23ce25d0bde477 100644 --- a/Lib/pathlib/_local.py +++ b/Lib/pathlib/_local.py @@ -369,7 +369,8 @@ def stem(self): i = name.rfind('.') if i != -1: stem = name[:i] - if stem.strip('.'): + # Stem must contain at least one non-dot character. + if stem.lstrip('.'): return stem return name