8000 GH-73435: Implement recursive wildcards in `pathlib.PurePath.match()` by barneygale · Pull Request #101398 · python/cpython · GitHub
[go: up one dir, main page]

Skip to content

GH-73435: Implement recursive wildcards in pathlib.PurePath.match() #101398

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 40 commits into from
May 30, 2023
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
Show all changes
40 commits
Select commit Hold shift + click to select a range
90eebcc
Make better use of path object caching.
barneygale Apr 9, 2023
4b5fffd
Add performance tip to docs
barneygale Apr 9, 2023
5e8bc28
Skip re-initialisation of PurePath patterns.
barneygale Apr 20, 2023
e81ab5a
Merge branch 'main' into gh-73435-pathlib-match-recursive
barneygale Apr 29, 2023
afb8047
Merge branch 'main' into gh-73435-pathlib-match-recursive
barneygale May 2, 2023
722a1ab
Use `re.IGNORECASE` rather than `os.path.normcase()`
barneygale May 2, 2023
0ccf3df
Merge branch 'main' into gh-73435-pathlib-match-recursive
barneygale May 6, 2023
ccea5e1
Add whats new entry
barneygale May 11, 2023
dd04294
Update Doc/whatsnew/3.12.rst
barneygale May 11, 2023
b258641
Apply suggestions from code review
barneygale May 14, 2023
ced8998
Explain _FNMATCH_SLICE
barneygale May 14, 2023
a33c7b6
Accidentally a word.
barneygale May 14, 2023
4b3bddb
Cache pattern compilation
barneygale May 14, 2023
6ad30dd
Remove unneeded `from None` suffix, whoops.
barneygale May 14, 2023
052890f
Tiny performance improvement: avoid accessing path.parts
barneygale May 14, 2023
d789b6d
Typo fix
barneygale May 14, 2023
4fe77c6
Avoid hashing path object when compiling pattern.
barneygale May 14, 2023
4770c13
More performance tweaks
barneygale May 14, 2023
559787d
Merge branch 'main' into gh-73435-pathlib-match-recursive
barneygale May 18, 2023
9c09fc4
Merge branch 'main' into gh-73435-pathlib-match-recursive
barneygale May 23, 2023
eb35dbc
Re-target to 3.13.
barneygale May 23, 2023
8959dfd
Merge branch 'main' into gh-73435-pathlib-match-recursive
barneygale May 27, 2023
fec7702
Merge branch 'main' into gh-73435-pathlib-match-recursive
barneygale May 29, 2023
89bc380
Merge branch 'main' into gh-73435-pathlib-match-recursive
barneygale May 29, 2023
9211297
Add more comments!
barneygale May 30, 2023
73bb309
Update Lib/pathlib.py
barneygale May 30, 2023
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
Merge branch 'main' into gh-73435-pathlib-match-recursive
  • Loading branch information
barneygale committed May 18, 2023
commit 559787dac96ad5b1695aa44f05783e3ad2f95db9
5 changes: 5 additions & 0 deletions Doc/library/pathlib.rst
Original file line number Diff line number Diff line change
Expand Up @@ -583,6 +583,11 @@ Pure paths provide the following methods and properties:
>>> PureWindowsPath('b.py').match('*.PY')
True

Set *case_sensitive* to ``True`` or ``False`` to override this behaviour.

.. versionadded:: 3.12
The *case_sensitive* argument.

.. versionchanged:: 3.12
Support for the recursive wildcard "``**``" was added. In previous
versions, it acted like the non-recursive wildcard "``*``".
Expand Down
4 changes: 4 additions & 0 deletions Doc/whatsnew/3.12.rst
Original file line number Diff line number Diff line change
Expand Up @@ -395,6 +395,10 @@ pathlib
* Add :meth:`pathlib.Path.is_junction` as a proxy to :func:`os.path.isjunction`.
(Contributed by Charles Machalow in :gh:`99547`.)

* Add *case_sensitive* optional parameter to :meth:`pathlib.Path.glob`,
:meth:`pathlib.Path.rglob` and :meth:`pathlib.PurePath.match` for matching
the path's case sensitivity, allowing for more precise control over the matching process.

* Add support for recursive wildcards in :meth:`pathlib.PurePath.match`.
(Contributed by Barney Gale in :gh:`73435`.)

Expand Down
45 changes: 23 additions & 22 deletions Lib/pathlib.py
< A574 td id="diff-fa525485738fc33d05b06c159172ff1f319c26e88d8c6bb39f7dbaae4dc4105cR749" data-line-number="749" class="blob-num blob-num-context js-linkable-line-number js-blob-rnum">
Original file line number Diff line number Diff line change
Expand Up @@ -77,26 +77,6 @@ def _is_case_sensitive(flavour):
}


@functools.lru_cache()
def _compile_pattern(pattern_lines, case_sensitive):
parts = ['^']
for part in pattern_lines.splitlines(keepends=True):
if part == '**\n':
part = r'[\s\S]*^'
elif part == '**':
part = r'[\s\S]*'
elif '**' in part:
raise ValueError("Invalid pattern: '**' can only be an entire path component")
else:
part = fnmatch.translate(part)[_FNMATCH_SLICE]
parts.append(part)
parts.append(r'\Z')
flags = re.MULTILINE
if not case_sensitive:
flags |= re.IGNORECASE
return re.compile(''.join(parts), flags=flags)


@functools.lru_cache()
def _make_selector(pattern_parts, flavour, case_sensitive):
pat = pattern_parts[0]
Expand Down Expand Up @@ -128,6 +108,26 @@ def _compile_pattern(pat, case_sensitive):
return re.compile(fnmatch.translate(pat), flags).match


@functools.lru_cache()
def _compile_pattern_lines(pattern_lines, case_sensitive):
parts = ['^']
for part in pattern_lines.splitlines(keepends=True):
if part == '**\n':
part = r'[\s\S]*^'
elif part == '**':
part = r'[\s\S]*'
elif '**' in part:
raise ValueError("Invalid pattern: '**' can only be an entire path component")
else:
part = fnmatch.translate(part)[_FNMATCH_SLICE]
parts.append(part)
parts.append(r'\Z')
flags = re.MULTILINE
if not case_sensitive:
flags |= re.IGNORECASE
return re.compile(''.join(parts), flags=flags)


class _Selector:
"""A selector matches a specific glob pattern part against the children
of a given path."""
Expand Down Expand Up @@ -741,8 +741,9 @@ def match(self, path_pattern, *, case_sensitive=None):
"""
if not isinstance(path_pattern, PurePath):
path_pattern = self.with_segments(path_pattern)
case_sensitive = _is_case_sensitive(self._flavour)
pattern = _compile_pattern(path_pattern._lines, case_sensitive)
if case_sensitive is None:
case_sensitive = _is_case_sensitive(self._flavour)
pattern = _compile_pattern_lines(path_pattern._lines, case_sensitive)
if path_pattern.drive or path_pattern.root:
return pattern.match(self._lines) is not None
elif path_pattern._tail:
Expand Down
5 changes: 5 additions & 0 deletions Lib/test/test_pathlib.py
Original file line number Diff line number Diff line change
Expand Up @@ -334,6 +334,11 @@ def test_match_common(self):
self.assertFalse(P('a/b/c.py').match('/**/a/b/c.py'))
self.assertRaises(ValueError, P('a').match, '**a/b/c')
self.assertRaises(ValueError, P('a').match, 'a/b/c**')
# Case-sensitive flag
self.assertFalse(P('A.py').match('a.PY', case_sensitive=True))
self.assertTrue(P('A.py').match('a.PY', case_sensitive=False))
self.assertFalse(P('c:/a/B.Py').match('C:/A/*.pY', case_sensitive=True))
self.assertTrue(P('/a/b/c.py').match('/A/*/*.Py', case_sensitive=False))

def test_ordering_common(self):
# Ordering is tuple-alike.
Expand Down
You are viewing a condensed version of this merge commit. You can view the full changes here.
0