From 473eeaf2a003b9b8f34f2bc0077575c4041983f2 Mon Sep 17 00:00:00 2001 From: thirumurugan-git Date: Wed, 17 May 2023 02:51:06 +0530 Subject: [PATCH 01/11] Add parameter @case_sensitive to pathlib.PurePath.match() function --- Lib/pathlib.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/Lib/pathlib.py b/Lib/pathlib.py index ef7c47c9e775e4..c87a588f81d445 100644 --- a/Lib/pathlib.py +++ b/Lib/pathlib.py @@ -680,7 +680,7 @@ def is_reserved(self): name = self._tail[-1].partition('.')[0].partition(':')[0].rstrip(' ') return name.upper() in _WIN_RESERVED_NAMES - def match(self, path_pattern): + def match(self, path_pattern, case_sensitive=True): """ Return True if this path matches the given pattern. """ @@ -695,6 +695,9 @@ def match(self, path_pattern): elif len(pat_parts) > len(parts): return False for part, pat in zip(reversed(parts), reversed(pat_parts)): + if not case_sensitive: + # Convert the 'part' and 'pattern' to lowercase to ensure case insensitivity. + part, pat = part.lower(), pat.lower() if not fnmatch.fnmatchcase(part, pat): return False return True From 17e721f415c6aaacef991ab552492bebf2d9512e Mon Sep 17 00:00:00 2001 From: thirumurugan-git Date: Wed, 17 May 2023 03:15:07 +0530 Subject: [PATCH 02/11] Add NEWS entry --- .../next/Library/2023-05-17-03-14-07.gh-issue-104484.y6KxL6.rst | 1 + 1 file changed, 1 insertion(+) create mode 100644 Misc/NEWS.d/next/Library/2023-05-17-03-14-07.gh-issue-104484.y6KxL6.rst diff --git a/Misc/NEWS.d/next/Library/2023-05-17-03-14-07.gh-issue-104484.y6KxL6.rst b/Misc/NEWS.d/next/Library/2023-05-17-03-14-07.gh-issue-104484.y6KxL6.rst new file mode 100644 index 00000000000000..383d62d5b416ee --- /dev/null +++ b/Misc/NEWS.d/next/Library/2023-05-17-03-14-07.gh-issue-104484.y6KxL6.rst @@ -0,0 +1 @@ +Added case_sensitive argument to pathlib.PurePath.match() From 73907a68368fa0f5fafeb50e28e2572df66bd595 Mon Sep 17 00:00:00 2001 From: thirumurugan-git Date: Wed, 17 May 2023 03:58:03 +0530 Subject: [PATCH 03/11] Refactor case sensitive check --- Lib/pathlib.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/Lib/pathlib.py b/Lib/pathlib.py index c87a588f81d445..526c6bd073e699 100644 --- a/Lib/pathlib.py +++ b/Lib/pathlib.py @@ -695,10 +695,10 @@ def match(self, path_pattern, case_sensitive=True): elif len(pat_parts) > len(parts): return False for part, pat in zip(reversed(parts), reversed(pat_parts)): - if not case_sensitive: - # Convert the 'part' and 'pattern' to lowercase to ensure case insensitivity. - part, pat = part.lower(), pat.lower() - if not fnmatch.fnmatchcase(part, pat): + # If none of the flags are applied, the value of 'flags' would be 0. + flags = 0 if case_sensitive else re.I + match = re.compile(fnmatch.translate(pat), flags).match + if not match(part): return False return True From 8c3f4a463ac1899c3f00ee32e311771705ca225e Mon Sep 17 00:00:00 2001 From: thirumurugan-git Date: Thu, 18 May 2023 01:29:51 +0530 Subject: [PATCH 04/11] Handle platform specific case sensitivity and add test cases --- Lib/pathlib.py | 11 ++++++----- Lib/test/test_pathlib.py | 8 ++++++++ 2 files changed, 14 insertions(+), 5 deletions(-) diff --git a/Lib/pathlib.py b/Lib/pathlib.py index 526c6bd073e699..a425e8798d1d68 100644 --- a/Lib/pathlib.py +++ b/Lib/pathlib.py @@ -680,23 +680,24 @@ def is_reserved(self): name = self._tail[-1].partition('.')[0].partition(':')[0].rstrip(' ') return name.upper() in _WIN_RESERVED_NAMES - def match(self, path_pattern, case_sensitive=True): + def match(self, path_pattern, case_sensitive=None): """ Return True if this path matches the given pattern. """ + if case_sensitive is None: + case_sensitive = _is_case_sensitive(self._flavour) + flags = re.NOFLAG if case_sensitive else re.IGNORECASE pat = self.with_segments(path_pattern) if not pat.parts: raise ValueError("empty pattern") - pat_parts = pat._parts_normcase - parts = self._parts_normcase + pat_parts = str(pat).split(pat._flavour.sep) + parts = str(self).split(self._flavour.sep) if pat.drive or pat.root: if len(pat_parts) != len(parts): return False elif len(pat_parts) > len(parts): return False for part, pat in zip(reversed(parts), reversed(pat_parts)): - # If none of the flags are applied, the value of 'flags' would be 0. - flags = 0 if case_sensitive else re.I match = re.compile(fnmatch.translate(pat), flags).match if not match(part): return False diff --git a/Lib/test/test_pathlib.py b/Lib/test/test_pathlib.py index 46a5248499c5d0..ba75619fa3c1cc 100644 --- a/Lib/test/test_pathlib.py +++ b/Lib/test/test_pathlib.py @@ -744,6 +744,9 @@ def test_as_uri_non_ascii(self): def test_match(self): P = self.cls self.assertFalse(P('A.py').match('a.PY')) + self.assertTrue(P('A.py').match('a.PY', case_sensitive=False)) + self.assertTrue(P('/a/b/c.py').match('/A/*/*.Py', case_sensitive=False)) + self.assertTrue(P('/a/b/c.py').match('**/*.py', case_sensitive=False)) def test_is_absolute(self): P = self.cls @@ -938,6 +941,11 @@ def test_match_common(self): self.assertTrue(P('B.py').match('b.PY')) self.assertTrue(P('c:/a/B.Py').match('C:/A/*.pY')) self.assertTrue(P('//Some/Share/B.Py').match('//somE/sharE/*.pY')) + # Case-sensitivity + self.assertFalse(P('A.py').match('a.PY', case_sensitive=True)) + self.assertFalse(P('c:/a/B.Py').match('C:/A/*.pY', case_sensitive=True)) + self.assertFalse(P('c:/a/B/c.PY').match('C:/A/**/*.pY', case_sensitive=True)) + self.assertTrue(P('c:/a/B/c.pY').match('c:/a/**/*.pY', case_sensitive=True)) # Path anchor doesn't match pattern anchor self.assertFalse(P('c:/b.py').match('/*.py')) # 'c:/' vs '/' self.assertFalse(P('c:/b.py').match('c:*.py')) # 'c:/' vs 'c:' From 1e638fe578484f5dcd9782e601bc2914fe112677 Mon Sep 17 00:00:00 2001 From: thirumurugan-git Date: Thu, 18 May 2023 02:56:02 +0530 Subject: [PATCH 05/11] Code formatting --- Doc/library/pathlib.rst | 10 +++++++++- Lib/pathlib.py | 14 ++++++++++---- Lib/test/test_pathlib.py | 13 +++++-------- 3 files changed, 24 insertions(+), 13 deletions(-) diff --git a/Doc/library/pathlib.rst b/Doc/library/pathlib.rst index 93af07ae5ac10f..0deaf137397abd 100644 --- a/Doc/library/pathlib.rst +++ b/Doc/library/pathlib.rst @@ -546,7 +546,7 @@ Pure paths provide the following methods and properties: PureWindowsPath('c:/Program Files') -.. method:: PurePath.match(pattern) +.. method:: PurePath.match(pattern, *, case_sensitive=None) Match this path against the provided glob-style pattern. Return ``True`` if matching is successful, ``False`` otherwise. @@ -576,6 +576,14 @@ Pure paths provide the following methods and properties: >>> PureWindowsPath('b.py').match('*.PY') True + By default, or when the *case_sensitive* keyword-only argument is set to + ``None``, this method matches paths using platform-specific casing rules: + typically, case-sensitive on POSIX, and case-insensitive on Windows. + Set *case_sensitive* to ``True`` or ``False`` to override this behaviour. + + .. versionadded:: 3.12 + The *case_sensitive* argument. + .. method:: PurePath.relative_to(other, walk_up=False) diff --git a/Lib/pathlib.py b/Lib/pathlib.py index a425e8798d1d68..daf361a6b089e4 100644 --- a/Lib/pathlib.py +++ b/Lib/pathlib.py @@ -86,6 +86,12 @@ def _make_selector(pattern_parts, flavour, case_sensitive): return cls(pat, child_parts, flavour, case_sensitive) +@functools.lru_cache(maxsize=256, typed=True) +def _compile_pattern(pat, flags): + re_pat = fnmatch.translate(pat) + return re.compile(re_pat, flags).match + + class _Selector: """A selector matches a specific glob pattern part against the children of a given path.""" @@ -680,7 +686,7 @@ def is_reserved(self): name = self._tail[-1].partition('.')[0].partition(':')[0].rstrip(' ') return name.upper() in _WIN_RESERVED_NAMES - def match(self, path_pattern, case_sensitive=None): + def match(self, path_pattern, *, case_sensitive=None): """ Return True if this path matches the given pattern. """ @@ -690,15 +696,15 @@ def match(self, path_pattern, case_sensitive=None): pat = self.with_segments(path_pattern) if not pat.parts: raise ValueError("empty pattern") - pat_parts = str(pat).split(pat._flavour.sep) - parts = str(self).split(self._flavour.sep) + pat_parts = pat.parts + parts = self.parts if pat.drive or pat.root: if len(pat_parts) != len(parts): return False elif len(pat_parts) > len(parts): return False for part, pat in zip(reversed(parts), reversed(pat_parts)): - match = re.compile(fnmatch.translate(pat), flags).match + match = _compile_pattern(pat, flags) if not match(part): return False return True diff --git a/Lib/test/test_pathlib.py b/Lib/test/test_pathlib.py index ba75619fa3c1cc..8cb64e2a2f1a2d 100644 --- a/Lib/test/test_pathlib.py +++ b/Lib/test/test_pathlib.py @@ -312,6 +312,11 @@ def test_match_common(self): # Multi-part glob-style pattern. self.assertFalse(P('/a/b/c.py').match('/**/*.py')) self.assertTrue(P('/a/b/c.py').match('/a/**/*.py')) + # Case-sensitive flag + self.assertFalse(P('A.py').match('a.PY', case_sensitive=True)) + self.assertTrue(P('A.py').match('a.PY', case_sensitive=False)) + self.assertFalse(P('c:/a/B.Py').match('C:/A/*.pY', case_sensitive=True)) + self.assertTrue(P('/a/b/c.py').match('/A/*/*.Py', case_sensitive=False)) def test_ordering_common(self): # Ordering is tuple-alike. @@ -744,9 +749,6 @@ def test_as_uri_non_ascii(self): def test_match(self): P = self.cls self.assertFalse(P('A.py').match('a.PY')) - self.assertTrue(P('A.py').match('a.PY', case_sensitive=False)) - self.assertTrue(P('/a/b/c.py').match('/A/*/*.Py', case_sensitive=False)) - self.assertTrue(P('/a/b/c.py').match('**/*.py', case_sensitive=False)) def test_is_absolute(self): P = self.cls @@ -941,11 +943,6 @@ def test_match_common(self): self.assertTrue(P('B.py').match('b.PY')) self.assertTrue(P('c:/a/B.Py').match('C:/A/*.pY')) self.assertTrue(P('//Some/Share/B.Py').match('//somE/sharE/*.pY')) - # Case-sensitivity - self.assertFalse(P('A.py').match('a.PY', case_sensitive=True)) - self.assertFalse(P('c:/a/B.Py').match('C:/A/*.pY', case_sensitive=True)) - self.assertFalse(P('c:/a/B/c.PY').match('C:/A/**/*.pY', case_sensitive=True)) - self.assertTrue(P('c:/a/B/c.pY').match('c:/a/**/*.pY', case_sensitive=True)) # Path anchor doesn't match pattern anchor self.assertFalse(P('c:/b.py').match('/*.py')) # 'c:/' vs '/' self.assertFalse(P('c:/b.py').match('c:*.py')) # 'c:/' vs 'c:' From 53e0bb8a050d479fbf52608ec0d9de0212c9697d Mon Sep 17 00:00:00 2001 From: thirumurugan-git Date: Thu, 18 May 2023 04:08:53 +0530 Subject: [PATCH 06/11] Resolve threads --- Doc/library/pathlib.rst | 8 +++++++- Doc/whatsnew/3.12.rst | 3 +++ Lib/pathlib.py | 13 +++++++------ Lib/test/test_pathlib.py | 2 +- 4 files changed, 18 insertions(+), 8 deletions(-) diff --git a/Doc/library/pathlib.rst b/Doc/library/pathlib.rst index 0deaf137397abd..8179068c118fa7 100644 --- a/Doc/library/pathlib.rst +++ b/Doc/library/pathlib.rst @@ -578,7 +578,13 @@ Pure paths provide the following methods and properties: By default, or when the *case_sensitive* keyword-only argument is set to ``None``, this method matches paths using platform-specific casing rules: - typically, case-sensitive on POSIX, and case-insensitive on Windows. + typically, case-sensitive on POSIX, and case-insensitive on Windows:: + + >>> PurePosixPath('b.py').match('*.PY') + False + >>> PureWindowsPath('b.py').match('*.PY') + True + Set *case_sensitive* to ``True`` or ``False`` to override this behaviour. .. versionadded:: 3.12 diff --git a/Doc/whatsnew/3.12.rst b/Doc/whatsnew/3.12.rst index 3e55b3fa0f4734..50b9c87b83783c 100644 --- a/Doc/whatsnew/3.12.rst +++ b/Doc/whatsnew/3.12.rst @@ -395,6 +395,9 @@ pathlib * Add :meth:`pathlib.Path.is_junction` as a proxy to :func:`os.path.isjunction`. (Contributed by Charles Machalow in :gh:`99547`.) +* Add *case_sensitive* optional parameter to :meth:`pathlib.PurePath.glob`, + :meth:`pathlib.PurePath.rglob` and :meth:`pathlib.PurePath.match` for matching + the path's case sensitivity, allowing for more precise control over the matching process. dis --- diff --git a/Lib/pathlib.py b/Lib/pathlib.py index daf361a6b089e4..c5af9587e2d82a 100644 --- a/Lib/pathlib.py +++ b/Lib/pathlib.py @@ -86,10 +86,9 @@ def _make_selector(pattern_parts, flavour, case_sensitive): return cls(pat, child_parts, flavour, case_sensitive) -@functools.lru_cache(maxsize=256, typed=True) +@functools.lru_cache(maxsize=256) def _compile_pattern(pat, flags): - re_pat = fnmatch.translate(pat) - return re.compile(re_pat, flags).match + return re.compile(fnmatch.translate(pat), flags).match class _Selector: @@ -690,9 +689,6 @@ def match(self, path_pattern, *, case_sensitive=None): """ Return True if this path matches the given pattern. """ - if case_sensitive is None: - case_sensitive = _is_case_sensitive(self._flavour) - flags = re.NOFLAG if case_sensitive else re.IGNORECASE pat = self.with_segments(path_pattern) if not pat.parts: raise ValueError("empty pattern") @@ -703,6 +699,11 @@ def match(self, path_pattern, *, case_sensitive=None): return False elif len(pat_parts) > len(parts): return False + # Generate regex flag based on |case_sensitive| parameter. + if case_sensitive is None: + case_sensitive = _is_case_sensitive(self._flavour) + flags = re.NOFLAG if case_sensitive else re.IGNORECASE + for part, pat in zip(reversed(parts), reversed(pat_parts)): match = _compile_pattern(pat, flags) if not match(part): diff --git a/Lib/test/test_pathlib.py b/Lib/test/test_pathlib.py index 8cb64e2a2f1a2d..ab2c2b232a0411 100644 --- a/Lib/test/test_pathlib.py +++ b/Lib/test/test_pathlib.py @@ -921,7 +921,7 @@ def test_as_uri(self): self.assertEqual(P('//some/share/a/b%#c\xe9').as_uri(), 'file://some/share/a/b%25%23c%C3%A9') - def test_match_common(self): + def test_match(self): P = self.cls # Absolute patterns. self.assertTrue(P('c:/b.py').match('*:/*.py')) From 90ed5c6f8f7260f0ddacdfce778799b3211ee145 Mon Sep 17 00:00:00 2001 From: thirumurugan <67573527+thirumurugan-git@users.noreply.github.com> Date: Thu, 18 May 2023 07:44:12 +0530 Subject: [PATCH 07/11] Update Doc/whatsnew/3.12.rst Co-authored-by: Barney Gale --- Doc/whatsnew/3.12.rst | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Doc/whatsnew/3.12.rst b/Doc/whatsnew/3.12.rst index 50b9c87b83783c..25f0a4c3ca26da 100644 --- a/Doc/whatsnew/3.12.rst +++ b/Doc/whatsnew/3.12.rst @@ -395,8 +395,8 @@ pathlib * Add :meth:`pathlib.Path.is_junction` as a proxy to :func:`os.path.isjunction`. (Contributed by Charles Machalow in :gh:`99547`.) -* Add *case_sensitive* optional parameter to :meth:`pathlib.PurePath.glob`, - :meth:`pathlib.PurePath.rglob` and :meth:`pathlib.PurePath.match` for matching +* Add *case_sensitive* optional parameter to :meth:`pathlib.Path.glob`, + :meth:`pathlib.Path.rglob` and :meth:`pathlib.PurePath.match` for matching the path's case sensitivity, allowing for more precise control over the matching process. dis From 563172cab6b81f00eccd76639949a62cf53500df Mon Sep 17 00:00:00 2001 From: thirumurugan <67573527+thirumurugan-git@users.noreply.github.com> Date: Thu, 18 May 2023 07:47:26 +0530 Subject: [PATCH 08/11] Update Misc/NEWS.d/next/Library/2023-05-17-03-14-07.gh-issue-104484.y6KxL6.rst Co-authored-by: Barney Gale --- .../next/Library/2023-05-17-03-14-07.gh-issue-104484.y6KxL6.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Misc/NEWS.d/next/Library/2023-05-17-03-14-07.gh-issue-104484.y6KxL6.rst b/Misc/NEWS.d/next/Library/2023-05-17-03-14-07.gh-issue-104484.y6KxL6.rst index 383d62d5b416ee..6d42078c35dd26 100644 --- a/Misc/NEWS.d/next/Library/2023-05-17-03-14-07.gh-issue-104484.y6KxL6.rst +++ b/Misc/NEWS.d/next/Library/2023-05-17-03-14-07.gh-issue-104484.y6KxL6.rst @@ -1 +1 @@ -Added case_sensitive argument to pathlib.PurePath.match() +Added *case_sensitive* argument to :meth:`pathlib.PurePath.match` From be06795a405fdec2bceac50a1aeefd51f8b39dd2 Mon Sep 17 00:00:00 2001 From: thirumurugan <67573527+thirumurugan-git@users.noreply.github.com> Date: Thu, 18 May 2023 07:47:48 +0530 Subject: [PATCH 09/11] Update Lib/pathlib.py Co-authored-by: Barney Gale --- Lib/pathlib.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/Lib/pathlib.py b/Lib/pathlib.py index c5af9587e2d82a..fa8fbb39b4cd2d 100644 --- a/Lib/pathlib.py +++ b/Lib/pathlib.py @@ -87,7 +87,8 @@ def _make_selector(pattern_parts, flavour, case_sensitive): @functools.lru_cache(maxsize=256) -def _compile_pattern(pat, flags): +def _compile_pattern(pat, case_sensitive): + flags = re.NOFLAG if case_sensitive else re.IGNORECASE return re.compile(fnmatch.translate(pat), flags).match From 466abc2e819c73cf1c8994917d0bb38f69082d1f Mon Sep 17 00:00:00 2001 From: thirumurugan-git Date: Thu, 18 May 2023 08:00:21 +0530 Subject: [PATCH 10/11] Update document --- Doc/library/pathlib.rst | 9 --------- Lib/pathlib.py | 9 +++------ 2 files changed, 3 insertions(+), 15 deletions(-) diff --git a/Doc/library/pathlib.rst b/Doc/library/pathlib.rst index 8179068c118fa7..627f2df9263dec 100644 --- a/Doc/library/pathlib.rst +++ b/Doc/library/pathlib.rst @@ -576,15 +576,6 @@ Pure paths provide the following methods and properties: >>> PureWindowsPath('b.py').match('*.PY') True - By default, or when the *case_sensitive* keyword-only argument is set to - ``None``, this method matches paths using platform-specific casing rules: - typically, case-sensitive on POSIX, and case-insensitive on Windows:: - - >>> PurePosixPath('b.py').match('*.PY') - False - >>> PureWindowsPath('b.py').match('*.PY') - True - Set *case_sensitive* to ``True`` or ``False`` to override this behaviour. .. versionadded:: 3.12 diff --git a/Lib/pathlib.py b/Lib/pathlib.py index fa8fbb39b4cd2d..6d040448cbeb8d 100644 --- a/Lib/pathlib.py +++ b/Lib/pathlib.py @@ -690,6 +690,8 @@ def match(self, path_pattern, *, case_sensitive=None): """ Return True if this path matches the given pattern. """ + if case_sensitive is None: + case_sensitive = _is_case_sensitive(self._flavour) pat = self.with_segments(path_pattern) if not pat.parts: raise ValueError("empty pattern") @@ -700,13 +702,8 @@ def match(self, path_pattern, *, case_sensitive=None): return False elif len(pat_parts) > len(parts): return False - # Generate regex flag based on |case_sensitive| parameter. - if case_sensitive is None: - case_sensitive = _is_case_sensitive(self._flavour) - flags = re.NOFLAG if case_sensitive else re.IGNORECASE - for part, pat in zip(reversed(parts), reversed(pat_parts)): - match = _compile_pattern(pat, flags) + match = _compile_pattern(pat, case_sensitive) if not match(part): return False return True From afee333983b292f3a4235af3f8ceaa21064c682a Mon Sep 17 00:00:00 2001 From: thirumurugan-git Date: Thu, 18 May 2023 13:15:04 +0530 Subject: [PATCH 11/11] Update pathlib.py --- Lib/pathlib.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/Lib/pathlib.py b/Lib/pathlib.py index 6d040448cbeb8d..3d68c161603d08 100644 --- a/Lib/pathlib.py +++ b/Lib/pathlib.py @@ -139,8 +139,7 @@ def __init__(self, pat, child_parts, flavour, case_sensitive): if case_sensitive is None: # TODO: evaluate case-sensitivity of each directory in _select_from() case_sensitive = _is_case_sensitive(flavour) - flags = re.NOFLAG if case_sensitive else re.IGNORECASE - self.match = re.compile(fnmatch.translate(pat), flags=flags).fullmatch + self.match = _compile_pattern(pat, case_sensitive) def _select_from(self, parent_path, scandir): try: