From 6855cd697f9490fcc227d42c006dcb7239fff9ce Mon Sep 17 00:00:00 2001 From: Christian von Schultz Date: Thu, 20 Jul 2023 12:44:02 +0200 Subject: [PATCH 01/30] feat(py_wheel)!: Normalize name and version Instead of following the obsolete PEP 427 escaping procedure for distribution names and versions, use the rules specified by https://packaging.python.org/en/latest/specifications (sections "Package name normalization" and "Binary distribution format"). For the versions, this means normalizing them according to PEP 440. This adds full support for PEP 440-compliant version identifiers, including local version identifiers (the part after "+" in versions such as "1.0+ubuntu.1"). BREAKING CHANGE: - Distribution names have stronger requirements now: "A valid name consists only of ASCII letters and numbers, period, underscore and hyphen. It must start and end with a letter or number." https://packaging.python.org/en/latest/specifications/name-normalization/ - Versions must be valid PEP 440 version identifiers. Previously versions such as "0.1-2-3" would have been accepted; that is no longer the case. - The file name of generated wheels may have changed, if the distribution name or the version identifier wasn't in normalized form. Fixes bazelbuild/rules_python#883 --- examples/wheel/BUILD.bazel | 11 +- examples/wheel/wheel_test.py | 16 +- python/private/py_wheel.bzl | 425 +++++++++++++++++++++++++++++- tests/py_wheel/py_wheel_tests.bzl | 106 ++++++++ tools/BUILD.bazel | 1 + tools/wheelmaker.py | 32 ++- 6 files changed, 568 insertions(+), 23 deletions(-) diff --git a/examples/wheel/BUILD.bazel b/examples/wheel/BUILD.bazel index f56a41b370..1d9f2a700a 100644 --- a/examples/wheel/BUILD.bazel +++ b/examples/wheel/BUILD.bazel @@ -259,12 +259,13 @@ py_wheel( py_wheel( name = "filename_escaping", - # Per https://www.python.org/dev/peps/pep-0427/#escaping-and-unicode - # runs of non-alphanumeric, non-digit symbols should be replaced with a single underscore. - # Unicode non-ascii letters should *not* be replaced with underscore. - distribution = "file~~name-escaping", + # Per https://packaging.python.org/en/latest/specifications/binary-distribution-format/#escaping-and-unicode + # runs of "-", "_" and "." should be replaced with a single underscore. + # Unicode non-ascii letters aren't allowed according to + # https://packaging.python.org/en/latest/specifications/name-normalization/. + distribution = "File--Name-Escaping", python_tag = "py3", - version = "0.0.1-r7", + version = "v0.0.1.RC1+ubuntu-r7", deps = [":example_pkg"], ) diff --git a/examples/wheel/wheel_test.py b/examples/wheel/wheel_test.py index f51a0ecedc..aaee08eb6f 100644 --- a/examples/wheel/wheel_test.py +++ b/examples/wheel/wheel_test.py @@ -159,7 +159,7 @@ def test_filename_escaping(self): "rules_python", "examples", "wheel", - "file_name_escaping-0.0.1_r7-py3-none-any.whl", + "file_name_escaping-0.0.1rc1+ubuntu.r7-py3-none-any.whl", ) with zipfile.ZipFile(filename) as zf: self.assertEqual( @@ -172,20 +172,20 @@ def test_filename_escaping(self): # PEP calls for replacing only in the archive filename. # Alas setuptools also escapes in the dist-info directory # name, so let's be compatible. - "file_name_escaping-0.0.1_r7.dist-info/WHEEL", - "file_name_escaping-0.0.1_r7.dist-info/METADATA", - "file_name_escaping-0.0.1_r7.dist-info/RECORD", + "file_name_escaping-0.0.1rc1+ubuntu.r7.dist-info/WHEEL", + "file_name_escaping-0.0.1rc1+ubuntu.r7.dist-info/METADATA", + "file_name_escaping-0.0.1rc1+ubuntu.r7.dist-info/RECORD", ], ) metadata_contents = zf.read( - "file_name_escaping-0.0.1_r7.dist-info/METADATA" + "file_name_escaping-0.0.1rc1+ubuntu.r7.dist-info/METADATA" ) self.assertEqual( metadata_contents, b"""\ Metadata-Version: 2.1 -Name: file~~name-escaping -Version: 0.0.1-r7 +Name: File--Name-Escaping +Version: 0.0.1rc1+ubuntu.r7 UNKNOWN """, @@ -384,7 +384,7 @@ def test_rule_expands_workspace_status_keys_in_wheel_metadata(self): "rules_python", "examples", "wheel", - "example_minimal_library_BUILD_USER_-0.1._BUILD_TIMESTAMP_-py3-none-any.whl", + "example_minimal_library{build_user}-0.1.{BUILD_TIMESTAMP}-py3-none-any.whl", ) with zipfile.ZipFile(filename) as zf: diff --git a/python/private/py_wheel.bzl b/python/private/py_wheel.bzl index d8bceabcb8..421773ba63 100644 --- a/python/private/py_wheel.bzl +++ b/python/private/py_wheel.bzl @@ -203,6 +203,427 @@ _DESCRIPTION_FILE_EXTENSION_TO_TYPE = { } _DEFAULT_DESCRIPTION_FILE_TYPE = "text/plain" +def _escape_filename_distribution_name(name): + """Escape the distribution name component of a filename. + + See https://packaging.python.org/en/latest/specifications/binary-distribution-format/#escaping-and-unicode + and https://packaging.python.org/en/latest/specifications/name-normalization/. + + Apart from the valid names according to the above, we also accept + '{' and '}', which may be used as placeholders for stamping. + """ + escaped = "" + for character in name.elems(): + if character.isalnum() or character in ["{", "}"]: + escaped += character.lower() + elif character in ["-", "_", "."]: + if escaped == "": + fail( + "A valid name must start with a letter or number.", + "Name '%s' does not." % name, + ) + elif escaped.endswith("_"): + pass + else: + escaped += "_" + else: + fail( + "A valid name consists only of ASCII letters ", + "and numbers, period, underscore and hyphen.", + "Name '%s' has bad character '%s'." % (name, character), + ) + if escaped.endswith("_"): + fail( + "A valid name must end with a letter or number.", + "Name '%s' does not." % name, + ) + return escaped + +def normalize_pep440(version): + """Escape the version component of a filename. + + See https://packaging.python.org/en/latest/specifications/binary-distribution-format/#escaping-and-unicode + and https://peps.python.org/pep-0440/ + + Args: + version: version string to be normalized according to PEP 440. + + Returns: + string containing the normalized version. + """ + + version = version.strip() # PEP 440: Leading and Trailing Whitespace + contexts = [] + + def open_context(start): + """Open an new parsing context. + + If the current parsing step succeeds, call close_context(). + If the current parsing step fails, call contexts.pop() to + go back to how it was before we opened a new context. + + Args: + start: index into `version` indicating where the current + parsing step starts. + """ + contexts.append({"norm": "", "start": start}) + return contexts[-1] + + def close_context(): + """Close the current context successfully and merge the results.""" + finished = contexts.pop() + contexts[-1]["norm"] += finished["norm"] + contexts[-1]["start"] = finished["start"] + + def is_(reference): + """Predicate testing a token for equality with `reference`.""" + return lambda token: token == reference + + def is_not(reference): + """Predicate testing a token for inequality with `reference`.""" + return lambda token: token != reference + + def in_(reference): + """Predicate testing if a token is in the list `reference`.""" + return lambda token: token in reference + + def accept(predicate, value): + """If `predicate` matches the next token, accept the token. + + Accepting the token means adding it (according to `value`) to + the running results maintained in context["norm"] and + advancing the cursor in context["start"] to the next token in + `version`. + + Args: + predicate: function taking a token and returning a boolean + saying if we want to accept the token. + value: the string to add if there's a match, or, if `value` + is a function, the function to apply to the current token + to get the string to add. + + Returns: + whether a token was accepted. + """ + + context = contexts[-1] + + if context["start"] >= len(version): + return False + + token = version[context["start"]] + + if predicate(token): + if type(value) in ["function", "builtin_function_or_method"]: + value = value(token) + + context["norm"] += value + context["start"] += 1 + return True + + return False + + def accept_placeholder(): + """Accept a Bazel placeholder. + + Placeholders aren't actually part of PEP 440, but are used for + stamping purposes. A placeholder might be + ``{BUILD_TIMESTAMP}``, for instance. We'll accept these as + they are, assuming they will expand to something that makes + sense where they appear. Before the stamping has happened, a + resulting wheel file name containing a placeholder will not + actually be valid. + + """ + context = open_context(contexts[-1]["start"]) + + if not accept(is_("{"), str): + contexts.pop() + return False + + start = context["start"] + for _ in range(start, len(version) + 1): + if not accept(is_not("}"), str): + break + + if not accept(is_("}"), str): + contexts.pop() + return False + + close_context() + return True + + def accept_digits(): + """Accept multiple digits (or placeholders).""" + + def isdigit(token): + return token.isdigit() + + context = open_context(contexts[-1]["start"]) + start = context["start"] + + for i in range(start, len(version) + 1): + if not accept(isdigit, str) and not accept_placeholder(): + if i - start >= 1: + if context["norm"].isdigit(): + # PEP 440: Integer Normalization + context["norm"] = str(int(context["norm"])) + close_context() + return True + break + + contexts.pop() + return False + + def accept_string(string, replacement): + """Accept a `string` in the input. Output `replacement`.""" + context = open_context(contexts[-1]["start"]) + + for character in string.elems(): + if not accept(in_([character, character.upper()]), ""): + contexts.pop() + return False + + context["norm"] = replacement + + close_context() + return True + + def accept_alnum(): + """Accept an alphanumeric sequence.""" + + def isalnum(token): + return token.isalnum() + + # PEP 440: Case sensitivity + def lower(token): + return token.lower() + + context = open_context(contexts[-1]["start"]) + start = context["start"] + + for i in range(start, len(version) + 1): + if not accept(isalnum, lower) and not accept_placeholder(): + if i - start >= 1: + close_context() + return True + break + + contexts.pop() + return False + + def accept_dot_number(): + """Accept a dot followed by digits.""" + open_context(contexts[-1]["start"]) + + if accept(is_("."), ".") and accept_digits(): + close_context() + return True + else: + contexts.pop() + return False + + def accept_dot_number_sequence(): + """Accept a sequence of dot+digits.""" + context = contexts[-1] + start = context["start"] + i = start + + for i in range(start, len(version) + 1): + if not accept_dot_number(): + break + return i - start >= 1 + + def accept_separator_alnum(): + """Accept a separator followed by an alphanumeric string.""" + open_context(contexts[-1]["start"]) + + # PEP 440: Local version segments + if ( + accept(in_([".", "-", "_"]), ".") and + (accept_digits() or accept_alnum()) + ): + close_context() + return True + + contexts.pop() + return False + + def accept_separator_alnum_sequence(): + """Accept a sequence of separator+alphanumeric.""" + context = contexts[-1] + start = context["start"] + i = start + + for i in range(start, len(version) + 1): + if not accept_separator_alnum(): + break + + return i - start >= 1 + + def accept_epoch(): + """PEP 440: Version epochs.""" + context = open_context(contexts[-1]["start"]) + if accept_digits() and accept(is_("!"), "!"): + if context["norm"] == "0!": + contexts.pop() + contexts[-1]["start"] = context["start"] + else: + close_context() + return True + else: + contexts.pop() + return False + + def accept_release(): + """Accept the release segment, numbers separated by dots.""" + open_context(contexts[-1]["start"]) + + if not accept_digits(): + contexts.pop() + return False + + accept_dot_number_sequence() + close_context() + return True + + def accept_pre_l(): + """PEP 440: Pre-release spelling.""" + open_context(contexts[-1]["start"]) + + if ( + accept_string("alpha", "a") or + accept_string("a", "a") or + accept_string("beta", "b") or + accept_string("b", "b") or + accept_string("c", "rc") or + accept_string("preview", "rc") or + accept_string("pre", "rc") or + accept_string("rc", "rc") + ): + close_context() + return True + else: + contexts.pop() + return False + + def accept_prerelease(): + """PEP 440: Pre-releases.""" + context = open_context(contexts[-1]["start"]) + + # PEP 440: Pre-release separators + accept(in_(["-", "_", "."]), "") + + if not accept_pre_l(): + contexts.pop() + return False + + accept(in_(["-", "_", "."]), "") + + if not accept_digits(): + # PEP 440: Implicit pre-release number + context["norm"] += "0" + + close_context() + return True + + def accept_implicit_postrelease(): + """PEP 440: Implicit post releases.""" + context = open_context(contexts[-1]["start"]) + + if accept(is_("-"), "") and accept_digits(): + context["norm"] = ".post" + context["norm"] + close_context() + return True + + contexts.pop() + return False + + def accept_explicit_postrelease(): + """PEP 440: Post-releases.""" + context = open_context(contexts[-1]["start"]) + + # PEP 440: Post release separators + if not accept(in_(["-", "_", "."]), "."): + context["norm"] += "." + + # PEP 440: Post release spelling + if ( + accept_string("post", "post") or + accept_string("rev", "post") or + accept_string("r", "post") + ): + accept(in_(["-", "_", "."]), "") + + if not accept_digits(): + # PEP 440: Implicit post release number + context["norm"] += "0" + + close_context() + return True + + contexts.pop() + return False + + def accept_postrelease(): + """PEP 440: Post-releases.""" + open_context(contexts[-1]["start"]) + + if accept_implicit_postrelease() or accept_explicit_postrelease(): + close_context() + return True + + contexts.pop() + return False + + def accept_devrelease(): + """PEP 440: Developmental releases.""" + context = open_context(contexts[-1]["start"]) + + # PEP 440: Development release separators + if not accept(in_(["-", "_", "."]), "."): + context["norm"] += "." + + if accept_string("dev", "dev"): + accept(in_(["-", "_", "."]), "") + + if not accept_digits(): + # PEP 440: Implicit development release number + context["norm"] += "0" + + close_context() + return True + + contexts.pop() + return False + + def accept_local(): + """PEP 440: Local version identifiers.""" + open_context(contexts[-1]["start"]) + + if accept(is_("+"), "+") and accept_alnum(): + accept_separator_alnum_sequence() + close_context() + return True + + contexts.pop() + return False + + open_context(0) + accept(is_("v"), "") # PEP 440: Preceding v character + accept_epoch() + accept_release() + accept_prerelease() + accept_postrelease() + accept_devrelease() + accept_local() + if version[contexts[-1]["start"]:]: + fail( + "Failed to parse PEP 440 version identifier '%s'." % version, + "Parse error at '%s'" % version[contexts[-1]["start"]:], + ) + return contexts[-1]["norm"] + def _escape_filename_segment(segment): """Escape a segment of the wheel filename. @@ -238,8 +659,8 @@ def _py_wheel_impl(ctx): version = _replace_make_variables(ctx.attr.version, ctx) outfile = ctx.actions.declare_file("-".join([ - _escape_filename_segment(ctx.attr.distribution), - _escape_filename_segment(version), + _escape_filename_distribution_name(ctx.attr.distribution), + normalize_pep440(version), _escape_filename_segment(python_tag), _escape_filename_segment(abi), _escape_filename_segment(ctx.attr.platform), diff --git a/tests/py_wheel/py_wheel_tests.bzl b/tests/py_wheel/py_wheel_tests.bzl index e580732aac..75fd38fb8e 100644 --- a/tests/py_wheel/py_wheel_tests.bzl +++ b/tests/py_wheel/py_wheel_tests.bzl @@ -16,7 +16,12 @@ load("@rules_testing//lib:analysis_test.bzl", "analysis_test", "test_suite") load("@rules_testing//lib:util.bzl", rt_util = "util") load("//python:packaging.bzl", "py_wheel") +load( + "//python/private:py_wheel.bzl", + "normalize_pep440", +) # buildifier: disable=bzl-visibility +_basic_tests = [] _tests = [] def _test_metadata(name): @@ -92,8 +97,109 @@ def _test_content_type_from_description_impl(env, target): _tests.append(_test_content_type_from_description) +def _test_pep440_normalization(env): + prefixes = ["v", " v", " \t\r\nv"] + epochs = { + "": ["", "0!", "00!"], + "1!": ["1!", "001!"], + "200!": ["200!", "00200!"], + } + releases = { + "0.1": ["0.1", "0.01"], + "2023.7.19": ["2023.7.19", "2023.07.19"], + } + pres = { + "": [""], + "a0": ["a", ".a", "-ALPHA0", "_alpha0", ".a0"], + "a4": ["alpha4", ".a04"], + "b0": ["b", ".b", "-BETA0", "_beta0", ".b0"], + "b5": ["beta05", ".b5"], + "rc0": ["C", "_c0", "RC", "_rc0", "-preview_0"], + } + explicit_posts = { + "": [""], + ".post0": [], + ".post1": [".post1", "-r1", "_rev1"], + } + implicit_posts = [[".post1", "-1"], [".post2", "-2"]] + devs = { + "": [""], + ".dev0": ["dev", "-DEV", "_Dev-0"], + ".dev9": ["DEV9", ".dev09", ".dev9"], + ".dev{BUILD_TIMESTAMP}": [ + "-DEV{BUILD_TIMESTAMP}", + "_dev_{BUILD_TIMESTAMP}", + ], + } + locals = { + "": [""], + "+ubuntu.7": ["+Ubuntu_7", "+ubuntu-007"], + "+ubuntu.r007": ["+Ubuntu_R007"], + } + epochs = [ + [normalized_epoch, input_epoch] + for normalized_epoch, input_epochs in epochs.items() + for input_epoch in input_epochs + ] + releases = [ + [normalized_release, input_release] + for normalized_release, input_releases in releases.items() + for input_release in input_releases + ] + pres = [ + [normalized_pre, input_pre] + for normalized_pre, input_pres in pres.items() + for input_pre in input_pres + ] + explicit_posts = [ + [normalized_post, input_post] + for normalized_post, input_posts in explicit_posts.items() + for input_post in input_posts + ] + pres_and_posts = [ + [normalized_pre + normalized_post, input_pre + input_post] + for normalized_pre, input_pre in pres + for normalized_post, input_post in explicit_posts + ] + [ + [normalized_pre + normalized_post, input_pre + input_post] + for normalized_pre, input_pre in pres + for normalized_post, input_post in implicit_posts + if input_pre == "" or input_pre[-1].isdigit() + ] + devs = [ + [normalized_dev, input_dev] + for normalized_dev, input_devs in devs.items() + for input_dev in input_devs + ] + locals = [ + [normalized_local, input_local] + for normalized_local, input_locals in locals.items() + for input_local in input_locals + ] + postfixes = ["", " ", " \t\r\n"] + i = 0 + for nepoch, iepoch in epochs: + for nrelease, irelease in releases: + for nprepost, iprepost in pres_and_posts: + for ndev, idev in devs: + for nlocal, ilocal in locals: + prefix = prefixes[i % len(prefixes)] + postfix = postfixes[(i // len(prefixes)) % len(postfixes)] + env.expect.that_str( + normalize_pep440( + prefix + iepoch + irelease + iprepost + + idev + ilocal + postfix, + ), + ).equals( + nepoch + nrelease + nprepost + ndev + nlocal, + ) + i += 1 + +_basic_tests.append(_test_pep440_normalization) + def py_wheel_test_suite(name): test_suite( name = name, + basic_tests = _basic_tests, tests = _tests, ) diff --git a/tools/BUILD.bazel b/tools/BUILD.bazel index fd951d9086..51bd56df0a 100644 --- a/tools/BUILD.bazel +++ b/tools/BUILD.bazel @@ -21,6 +21,7 @@ licenses(["notice"]) py_binary( name = "wheelmaker", srcs = ["wheelmaker.py"], + deps = ["@pypi__packaging//:lib"], ) filegroup( diff --git a/tools/wheelmaker.py b/tools/wheelmaker.py index 63b833fc5d..c6c21bfd0d 100644 --- a/tools/wheelmaker.py +++ b/tools/wheelmaker.py @@ -22,6 +22,8 @@ import zipfile from pathlib import Path +import packaging.version + def commonpath(path1, path2): ret = [] @@ -32,9 +34,20 @@ def commonpath(path1, path2): return os.path.sep.join(ret) -def escape_filename_segment(segment): - """Escapes a filename segment per https://www.python.org/dev/peps/pep-0427/#escaping-and-unicode""" - return re.sub(r"[^\w\d.]+", "_", segment, re.UNICODE) +def normalize_package_name(name): + """Normalize a package name according to the Python Packaging User Guide. + + See https://packaging.python.org/en/latest/specifications/name-normalization/ + """ + return re.sub(r"[-_.]+", "-", name).lower() + + +def escape_filename_distribution_name(name): + """Escape the distribution name component of a filename. + + See https://packaging.python.org/en/latest/specifications/binary-distribution-format/#escaping-and-unicode + """ + return normalize_package_name(name).replace("-", "_") class WheelMaker(object): @@ -50,7 +63,7 @@ def __init__( strip_path_prefixes=None, ): self._name = name - self._version = version + self._version = str(packaging.version.Version(version)) self._build_tag = build_tag self._python_tag = python_tag self._abi = abi @@ -61,9 +74,9 @@ def __init__( ) self._distinfo_dir = ( - escape_filename_segment(self._name) + escape_filename_distribution_name(self._name) + "-" - + escape_filename_segment(self._version) + + self._version + ".dist-info/" ) self._zipfile = None @@ -81,7 +94,10 @@ def __exit__(self, type, value, traceback): self._zipfile = None def wheelname(self) -> str: - components = [self._name, self._version] + components = [ + escape_filename_distribution_name(self._name), + self._version, + ] if self._build_tag: components.append(self._build_tag) components += [self._python_tag, self._abi, self._platform] @@ -172,7 +188,7 @@ def add_metadata(self, metadata, name, description, version): # https://www.python.org/dev/peps/pep-0566/ # https://packaging.python.org/specifications/core-metadata/ metadata = re.sub("^Name: .*$", "Name: %s" % name, metadata, flags=re.MULTILINE) - metadata += "Version: %s\n\n" % version + metadata += "Version: %s\n\n" % str(packaging.version.Version(version)) # setuptools seems to insert UNKNOWN as description when none is # provided. metadata += description if description else "UNKNOWN" From f6695f7b73d357ab1338c80a491a2c31f76025e9 Mon Sep 17 00:00:00 2001 From: Christian von Schultz Date: Tue, 8 Aug 2023 15:04:14 +0200 Subject: [PATCH 02/30] Handle placeholders in wheelmaker.py We need a valid PEP 440 version even if we're doing a non-stamped build, even if there are placeholders (stamping keys) in the version string. In that case, replace the placeholders with 0, and append the original version string, sanitized to dot-separated alphanumerics. --- tools/wheelmaker.py | 38 ++++++++++++++++++++++++++++++++++++-- 1 file changed, 36 insertions(+), 2 deletions(-) diff --git a/tools/wheelmaker.py b/tools/wheelmaker.py index c6c21bfd0d..97eac20350 100644 --- a/tools/wheelmaker.py +++ b/tools/wheelmaker.py @@ -50,6 +50,40 @@ def escape_filename_distribution_name(name): return normalize_package_name(name).replace("-", "_") +def normalize_pep440(version): + """Normalize version according to PEP 440, with fallback for placeholders. + + If there's a placeholder in braces, such as {BUILD_TIMESTAMP}, + replace it with 0. Such placeholders can be used with stamping, in + which case they would have been resolved already by now; if they + haven't, we're doing an unstamped build, but we still need to + produce a valid version. If such replacements are made, the + original version string, sanitized to dot-separated alphanumerics, + is appended as a local version segment, so you understand what + placeholder was involved. + + If that still doesn't produce a valid version, use version 0 and + append the original version string, sanitized to dot-separated + alphanumerics, as a local version segment. + + """ + + try: + return str(packaging.version.Version(version)) + except packaging.version.InvalidVersion: + pass + + sanitized = re.sub(r'[^a-z0-9]+', '.', version.lower()).strip('.') + substituted = re.sub(r'\{\w+\}', '0', version) + delimiter = '.' if '+' in substituted else '+' + try: + return str( + packaging.version.Version(f'{substituted}{delimiter}{sanitized}') + ) + except packaging.version.InvalidVersion: + return str(packaging.version.Version(f'0+{sanitized}')) + + class WheelMaker(object): def __init__( self, @@ -63,7 +97,7 @@ def __init__( strip_path_prefixes=None, ): self._name = name - self._version = str(packaging.version.Version(version)) + self._version = normalize_pep440(version) self._build_tag = build_tag self._python_tag = python_tag self._abi = abi @@ -188,7 +222,7 @@ def add_metadata(self, metadata, name, description, version): # https://www.python.org/dev/peps/pep-0566/ # https://packaging.python.org/specifications/core-metadata/ metadata = re.sub("^Name: .*$", "Name: %s" % name, metadata, flags=re.MULTILINE) - metadata += "Version: %s\n\n" % str(packaging.version.Version(version)) + metadata += "Version: %s\n\n" % normalize_pep440(version) # setuptools seems to insert UNKNOWN as description when none is # provided. metadata += description if description else "UNKNOWN" From 0705d108e6d85a9fbcc821beb115badbb850debc Mon Sep 17 00:00:00 2001 From: Ignas Anikevicius Date: Sun, 27 Aug 2023 10:00:30 +0900 Subject: [PATCH 03/30] refactor: move normalization into a separate file --- python/private/py_wheel.bzl | 386 +----------------- python/private/py_wheel_normalize_pep440.bzl | 400 +++++++++++++++++++ tests/py_wheel/py_wheel_tests.bzl | 2 +- 3 files changed, 402 insertions(+), 386 deletions(-) create mode 100644 python/private/py_wheel_normalize_pep440.bzl diff --git a/python/private/py_wheel.bzl b/python/private/py_wheel.bzl index 421773ba63..9fbba2d776 100644 --- a/python/private/py_wheel.bzl +++ b/python/private/py_wheel.bzl @@ -16,6 +16,7 @@ load("//python/private:stamp.bzl", "is_stamping_enabled") load(":py_package.bzl", "py_package_lib") +load(":py_wheel_normalize_pep440.bzl", "normalize_pep440") PyWheelInfo = provider( doc = "Information about a wheel produced by `py_wheel`", @@ -239,391 +240,6 @@ def _escape_filename_distribution_name(name): ) return escaped -def normalize_pep440(version): - """Escape the version component of a filename. - - See https://packaging.python.org/en/latest/specifications/binary-distribution-format/#escaping-and-unicode - and https://peps.python.org/pep-0440/ - - Args: - version: version string to be normalized according to PEP 440. - - Returns: - string containing the normalized version. - """ - - version = version.strip() # PEP 440: Leading and Trailing Whitespace - contexts = [] - - def open_context(start): - """Open an new parsing context. - - If the current parsing step succeeds, call close_context(). - If the current parsing step fails, call contexts.pop() to - go back to how it was before we opened a new context. - - Args: - start: index into `version` indicating where the current - parsing step starts. - """ - contexts.append({"norm": "", "start": start}) - return contexts[-1] - - def close_context(): - """Close the current context successfully and merge the results.""" - finished = contexts.pop() - contexts[-1]["norm"] += finished["norm"] - contexts[-1]["start"] = finished["start"] - - def is_(reference): - """Predicate testing a token for equality with `reference`.""" - return lambda token: token == reference - - def is_not(reference): - """Predicate testing a token for inequality with `reference`.""" - return lambda token: token != reference - - def in_(reference): - """Predicate testing if a token is in the list `reference`.""" - return lambda token: token in reference - - def accept(predicate, value): - """If `predicate` matches the next token, accept the token. - - Accepting the token means adding it (according to `value`) to - the running results maintained in context["norm"] and - advancing the cursor in context["start"] to the next token in - `version`. - - Args: - predicate: function taking a token and returning a boolean - saying if we want to accept the token. - value: the string to add if there's a match, or, if `value` - is a function, the function to apply to the current token - to get the string to add. - - Returns: - whether a token was accepted. - """ - - context = contexts[-1] - - if context["start"] >= len(version): - return False - - token = version[context["start"]] - - if predicate(token): - if type(value) in ["function", "builtin_function_or_method"]: - value = value(token) - - context["norm"] += value - context["start"] += 1 - return True - - return False - - def accept_placeholder(): - """Accept a Bazel placeholder. - - Placeholders aren't actually part of PEP 440, but are used for - stamping purposes. A placeholder might be - ``{BUILD_TIMESTAMP}``, for instance. We'll accept these as - they are, assuming they will expand to something that makes - sense where they appear. Before the stamping has happened, a - resulting wheel file name containing a placeholder will not - actually be valid. - - """ - context = open_context(contexts[-1]["start"]) - - if not accept(is_("{"), str): - contexts.pop() - return False - - start = context["start"] - for _ in range(start, len(version) + 1): - if not accept(is_not("}"), str): - break - - if not accept(is_("}"), str): - contexts.pop() - return False - - close_context() - return True - - def accept_digits(): - """Accept multiple digits (or placeholders).""" - - def isdigit(token): - return token.isdigit() - - context = open_context(contexts[-1]["start"]) - start = context["start"] - - for i in range(start, len(version) + 1): - if not accept(isdigit, str) and not accept_placeholder(): - if i - start >= 1: - if context["norm"].isdigit(): - # PEP 440: Integer Normalization - context["norm"] = str(int(context["norm"])) - close_context() - return True - break - - contexts.pop() - return False - - def accept_string(string, replacement): - """Accept a `string` in the input. Output `replacement`.""" - context = open_context(contexts[-1]["start"]) - - for character in string.elems(): - if not accept(in_([character, character.upper()]), ""): - contexts.pop() - return False - - context["norm"] = replacement - - close_context() - return True - - def accept_alnum(): - """Accept an alphanumeric sequence.""" - - def isalnum(token): - return token.isalnum() - - # PEP 440: Case sensitivity - def lower(token): - return token.lower() - - context = open_context(contexts[-1]["start"]) - start = context["start"] - - for i in range(start, len(version) + 1): - if not accept(isalnum, lower) and not accept_placeholder(): - if i - start >= 1: - close_context() - return True - break - - contexts.pop() - return False - - def accept_dot_number(): - """Accept a dot followed by digits.""" - open_context(contexts[-1]["start"]) - - if accept(is_("."), ".") and accept_digits(): - close_context() - return True - else: - contexts.pop() - return False - - def accept_dot_number_sequence(): - """Accept a sequence of dot+digits.""" - context = contexts[-1] - start = context["start"] - i = start - - for i in range(start, len(version) + 1): - if not accept_dot_number(): - break - return i - start >= 1 - - def accept_separator_alnum(): - """Accept a separator followed by an alphanumeric string.""" - open_context(contexts[-1]["start"]) - - # PEP 440: Local version segments - if ( - accept(in_([".", "-", "_"]), ".") and - (accept_digits() or accept_alnum()) - ): - close_context() - return True - - contexts.pop() - return False - - def accept_separator_alnum_sequence(): - """Accept a sequence of separator+alphanumeric.""" - context = contexts[-1] - start = context["start"] - i = start - - for i in range(start, len(version) + 1): - if not accept_separator_alnum(): - break - - return i - start >= 1 - - def accept_epoch(): - """PEP 440: Version epochs.""" - context = open_context(contexts[-1]["start"]) - if accept_digits() and accept(is_("!"), "!"): - if context["norm"] == "0!": - contexts.pop() - contexts[-1]["start"] = context["start"] - else: - close_context() - return True - else: - contexts.pop() - return False - - def accept_release(): - """Accept the release segment, numbers separated by dots.""" - open_context(contexts[-1]["start"]) - - if not accept_digits(): - contexts.pop() - return False - - accept_dot_number_sequence() - close_context() - return True - - def accept_pre_l(): - """PEP 440: Pre-release spelling.""" - open_context(contexts[-1]["start"]) - - if ( - accept_string("alpha", "a") or - accept_string("a", "a") or - accept_string("beta", "b") or - accept_string("b", "b") or - accept_string("c", "rc") or - accept_string("preview", "rc") or - accept_string("pre", "rc") or - accept_string("rc", "rc") - ): - close_context() - return True - else: - contexts.pop() - return False - - def accept_prerelease(): - """PEP 440: Pre-releases.""" - context = open_context(contexts[-1]["start"]) - - # PEP 440: Pre-release separators - accept(in_(["-", "_", "."]), "") - - if not accept_pre_l(): - contexts.pop() - return False - - accept(in_(["-", "_", "."]), "") - - if not accept_digits(): - # PEP 440: Implicit pre-release number - context["norm"] += "0" - - close_context() - return True - - def accept_implicit_postrelease(): - """PEP 440: Implicit post releases.""" - context = open_context(contexts[-1]["start"]) - - if accept(is_("-"), "") and accept_digits(): - context["norm"] = ".post" + context["norm"] - close_context() - return True - - contexts.pop() - return False - - def accept_explicit_postrelease(): - """PEP 440: Post-releases.""" - context = open_context(contexts[-1]["start"]) - - # PEP 440: Post release separators - if not accept(in_(["-", "_", "."]), "."): - context["norm"] += "." - - # PEP 440: Post release spelling - if ( - accept_string("post", "post") or - accept_string("rev", "post") or - accept_string("r", "post") - ): - accept(in_(["-", "_", "."]), "") - - if not accept_digits(): - # PEP 440: Implicit post release number - context["norm"] += "0" - - close_context() - return True - - contexts.pop() - return False - - def accept_postrelease(): - """PEP 440: Post-releases.""" - open_context(contexts[-1]["start"]) - - if accept_implicit_postrelease() or accept_explicit_postrelease(): - close_context() - return True - - contexts.pop() - return False - - def accept_devrelease(): - """PEP 440: Developmental releases.""" - context = open_context(contexts[-1]["start"]) - - # PEP 440: Development release separators - if not accept(in_(["-", "_", "."]), "."): - context["norm"] += "." - - if accept_string("dev", "dev"): - accept(in_(["-", "_", "."]), "") - - if not accept_digits(): - # PEP 440: Implicit development release number - context["norm"] += "0" - - close_context() - return True - - contexts.pop() - return False - - def accept_local(): - """PEP 440: Local version identifiers.""" - open_context(contexts[-1]["start"]) - - if accept(is_("+"), "+") and accept_alnum(): - accept_separator_alnum_sequence() - close_context() - return True - - contexts.pop() - return False - - open_context(0) - accept(is_("v"), "") # PEP 440: Preceding v character - accept_epoch() - accept_release() - accept_prerelease() - accept_postrelease() - accept_devrelease() - accept_local() - if version[contexts[-1]["start"]:]: - fail( - "Failed to parse PEP 440 version identifier '%s'." % version, - "Parse error at '%s'" % version[contexts[-1]["start"]:], - ) - return contexts[-1]["norm"] - def _escape_filename_segment(segment): """Escape a segment of the wheel filename. diff --git a/python/private/py_wheel_normalize_pep440.bzl b/python/private/py_wheel_normalize_pep440.bzl new file mode 100644 index 0000000000..399e7c5e7e --- /dev/null +++ b/python/private/py_wheel_normalize_pep440.bzl @@ -0,0 +1,400 @@ +# Copyright 2023 The Bazel Authors. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"Implementation of PEP440 version string normalization" + +def normalize_pep440(version): + """Escape the version component of a filename. + + See https://packaging.python.org/en/latest/specifications/binary-distribution-format/#escaping-and-unicode + and https://peps.python.org/pep-0440/ + + Args: + version: version string to be normalized according to PEP 440. + + Returns: + string containing the normalized version. + """ + + version = version.strip() # PEP 440: Leading and Trailing Whitespace + contexts = [] + + def open_context(start): + """Open an new parsing context. + + If the current parsing step succeeds, call close_context(). + If the current parsing step fails, call contexts.pop() to + go back to how it was before we opened a new context. + + Args: + start: index into `version` indicating where the current + parsing step starts. + """ + contexts.append({"norm": "", "start": start}) + return contexts[-1] + + def close_context(): + """Close the current context successfully and merge the results.""" + finished = contexts.pop() + contexts[-1]["norm"] += finished["norm"] + contexts[-1]["start"] = finished["start"] + + def is_(reference): + """Predicate testing a token for equality with `reference`.""" + return lambda token: token == reference + + def is_not(reference): + """Predicate testing a token for inequality with `reference`.""" + return lambda token: token != reference + + def in_(reference): + """Predicate testing if a token is in the list `reference`.""" + return lambda token: token in reference + + def accept(predicate, value): + """If `predicate` matches the next token, accept the token. + + Accepting the token means adding it (according to `value`) to + the running results maintained in context["norm"] and + advancing the cursor in context["start"] to the next token in + `version`. + + Args: + predicate: function taking a token and returning a boolean + saying if we want to accept the token. + value: the string to add if there's a match, or, if `value` + is a function, the function to apply to the current token + to get the string to add. + + Returns: + whether a token was accepted. + """ + + context = contexts[-1] + + if context["start"] >= len(version): + return False + + token = version[context["start"]] + + if predicate(token): + if type(value) in ["function", "builtin_function_or_method"]: + value = value(token) + + context["norm"] += value + context["start"] += 1 + return True + + return False + + def accept_placeholder(): + """Accept a Bazel placeholder. + + Placeholders aren't actually part of PEP 440, but are used for + stamping purposes. A placeholder might be + ``{BUILD_TIMESTAMP}``, for instance. We'll accept these as + they are, assuming they will expand to something that makes + sense where they appear. Before the stamping has happened, a + resulting wheel file name containing a placeholder will not + actually be valid. + + """ + context = open_context(contexts[-1]["start"]) + + if not accept(is_("{"), str): + contexts.pop() + return False + + start = context["start"] + for _ in range(start, len(version) + 1): + if not accept(is_not("}"), str): + break + + if not accept(is_("}"), str): + contexts.pop() + return False + + close_context() + return True + + def accept_digits(): + """Accept multiple digits (or placeholders).""" + + def isdigit(token): + return token.isdigit() + + context = open_context(contexts[-1]["start"]) + start = context["start"] + + for i in range(start, len(version) + 1): + if not accept(isdigit, str) and not accept_placeholder(): + if i - start >= 1: + if context["norm"].isdigit(): + # PEP 440: Integer Normalization + context["norm"] = str(int(context["norm"])) + close_context() + return True + break + + contexts.pop() + return False + + def accept_string(string, replacement): + """Accept a `string` in the input. Output `replacement`.""" + context = open_context(contexts[-1]["start"]) + + for character in string.elems(): + if not accept(in_([character, character.upper()]), ""): + contexts.pop() + return False + + context["norm"] = replacement + + close_context() + return True + + def accept_alnum(): + """Accept an alphanumeric sequence.""" + + def isalnum(token): + return token.isalnum() + + # PEP 440: Case sensitivity + def lower(token): + return token.lower() + + context = open_context(contexts[-1]["start"]) + start = context["start"] + + for i in range(start, len(version) + 1): + if not accept(isalnum, lower) and not accept_placeholder(): + if i - start >= 1: + close_context() + return True + break + + contexts.pop() + return False + + def accept_dot_number(): + """Accept a dot followed by digits.""" + open_context(contexts[-1]["start"]) + + if accept(is_("."), ".") and accept_digits(): + close_context() + return True + else: + contexts.pop() + return False + + def accept_dot_number_sequence(): + """Accept a sequence of dot+digits.""" + context = contexts[-1] + start = context["start"] + i = start + + for i in range(start, len(version) + 1): + if not accept_dot_number(): + break + return i - start >= 1 + + def accept_separator_alnum(): + """Accept a separator followed by an alphanumeric string.""" + open_context(contexts[-1]["start"]) + + # PEP 440: Local version segments + if ( + accept(in_([".", "-", "_"]), ".") and + (accept_digits() or accept_alnum()) + ): + close_context() + return True + + contexts.pop() + return False + + def accept_separator_alnum_sequence(): + """Accept a sequence of separator+alphanumeric.""" + context = contexts[-1] + start = context["start"] + i = start + + for i in range(start, len(version) + 1): + if not accept_separator_alnum(): + break + + return i - start >= 1 + + def accept_epoch(): + """PEP 440: Version epochs.""" + context = open_context(contexts[-1]["start"]) + if accept_digits() and accept(is_("!"), "!"): + if context["norm"] == "0!": + contexts.pop() + contexts[-1]["start"] = context["start"] + else: + close_context() + return True + else: + contexts.pop() + return False + + def accept_release(): + """Accept the release segment, numbers separated by dots.""" + open_context(contexts[-1]["start"]) + + if not accept_digits(): + contexts.pop() + return False + + accept_dot_number_sequence() + close_context() + return True + + def accept_pre_l(): + """PEP 440: Pre-release spelling.""" + open_context(contexts[-1]["start"]) + + if ( + accept_string("alpha", "a") or + accept_string("a", "a") or + accept_string("beta", "b") or + accept_string("b", "b") or + accept_string("c", "rc") or + accept_string("preview", "rc") or + accept_string("pre", "rc") or + accept_string("rc", "rc") + ): + close_context() + return True + else: + contexts.pop() + return False + + def accept_prerelease(): + """PEP 440: Pre-releases.""" + context = open_context(contexts[-1]["start"]) + + # PEP 440: Pre-release separators + accept(in_(["-", "_", "."]), "") + + if not accept_pre_l(): + contexts.pop() + return False + + accept(in_(["-", "_", "."]), "") + + if not accept_digits(): + # PEP 440: Implicit pre-release number + context["norm"] += "0" + + close_context() + return True + + def accept_implicit_postrelease(): + """PEP 440: Implicit post releases.""" + context = open_context(contexts[-1]["start"]) + + if accept(is_("-"), "") and accept_digits(): + context["norm"] = ".post" + context["norm"] + close_context() + return True + + contexts.pop() + return False + + def accept_explicit_postrelease(): + """PEP 440: Post-releases.""" + context = open_context(contexts[-1]["start"]) + + # PEP 440: Post release separators + if not accept(in_(["-", "_", "."]), "."): + context["norm"] += "." + + # PEP 440: Post release spelling + if ( + accept_string("post", "post") or + accept_string("rev", "post") or + accept_string("r", "post") + ): + accept(in_(["-", "_", "."]), "") + + if not accept_digits(): + # PEP 440: Implicit post release number + context["norm"] += "0" + + close_context() + return True + + contexts.pop() + return False + + def accept_postrelease(): + """PEP 440: Post-releases.""" + open_context(contexts[-1]["start"]) + + if accept_implicit_postrelease() or accept_explicit_postrelease(): + close_context() + return True + + contexts.pop() + return False + + def accept_devrelease(): + """PEP 440: Developmental releases.""" + context = open_context(contexts[-1]["start"]) + + # PEP 440: Development release separators + if not accept(in_(["-", "_", "."]), "."): + context["norm"] += "." + + if accept_string("dev", "dev"): + accept(in_(["-", "_", "."]), "") + + if not accept_digits(): + # PEP 440: Implicit development release number + context["norm"] += "0" + + close_context() + return True + + contexts.pop() + return False + + def accept_local(): + """PEP 440: Local version identifiers.""" + open_context(contexts[-1]["start"]) + + if accept(is_("+"), "+") and accept_alnum(): + accept_separator_alnum_sequence() + close_context() + return True + + contexts.pop() + return False + + open_context(0) + accept(is_("v"), "") # PEP 440: Preceding v character + accept_epoch() + accept_release() + accept_prerelease() + accept_postrelease() + accept_devrelease() + accept_local() + if version[contexts[-1]["start"]:]: + fail( + "Failed to parse PEP 440 version identifier '%s'." % version, + "Parse error at '%s'" % version[contexts[-1]["start"]:], + ) + return contexts[-1]["norm"] diff --git a/tests/py_wheel/py_wheel_tests.bzl b/tests/py_wheel/py_wheel_tests.bzl index 75fd38fb8e..5a8588e259 100644 --- a/tests/py_wheel/py_wheel_tests.bzl +++ b/tests/py_wheel/py_wheel_tests.bzl @@ -17,7 +17,7 @@ load("@rules_testing//lib:analysis_test.bzl", "analysis_test", "test_suite") load("@rules_testing//lib:util.bzl", rt_util = "util") load("//python:packaging.bzl", "py_wheel") load( - "//python/private:py_wheel.bzl", + "//python/private:py_wheel_normalize_pep440.bzl", "normalize_pep440", ) # buildifier: disable=bzl-visibility From d014bc67271626e10ee7beb2cde5e0d0be6de68e Mon Sep 17 00:00:00 2001 From: Ignas Anikevicius Date: Sun, 27 Aug 2023 10:29:02 +0900 Subject: [PATCH 04/30] refactor: move helper nested defs to be private defs --- python/private/py_wheel_normalize_pep440.bzl | 24 ++++++++++---------- 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/python/private/py_wheel_normalize_pep440.bzl b/python/private/py_wheel_normalize_pep440.bzl index 399e7c5e7e..dd6139d9b9 100644 --- a/python/private/py_wheel_normalize_pep440.bzl +++ b/python/private/py_wheel_normalize_pep440.bzl @@ -14,6 +14,16 @@ "Implementation of PEP440 version string normalization" +def _isdigit(token): + return token.isdigit() + +def _isalnum(token): + return token.isalnum() + +def _lower(token): + # PEP 440: Case sensitivity + return token.lower() + def normalize_pep440(version): """Escape the version component of a filename. @@ -131,14 +141,11 @@ def normalize_pep440(version): def accept_digits(): """Accept multiple digits (or placeholders).""" - def isdigit(token): - return token.isdigit() - context = open_context(contexts[-1]["start"]) start = context["start"] for i in range(start, len(version) + 1): - if not accept(isdigit, str) and not accept_placeholder(): + if not accept(_isdigit, str) and not accept_placeholder(): if i - start >= 1: if context["norm"].isdigit(): # PEP 440: Integer Normalization @@ -167,18 +174,11 @@ def normalize_pep440(version): def accept_alnum(): """Accept an alphanumeric sequence.""" - def isalnum(token): - return token.isalnum() - - # PEP 440: Case sensitivity - def lower(token): - return token.lower() - context = open_context(contexts[-1]["start"]) start = context["start"] for i in range(start, len(version) + 1): - if not accept(isalnum, lower) and not accept_placeholder(): + if not accept(_isalnum, _lower) and not accept_placeholder(): if i - start >= 1: close_context() return True From 1c79b2683cb817b12f4454c760c64e00bfbbf3e2 Mon Sep 17 00:00:00 2001 From: Ignas Anikevicius Date: Sun, 27 Aug 2023 10:30:22 +0900 Subject: [PATCH 05/30] refactor: move version and contexts to a self struct --- python/private/py_wheel_normalize_pep440.bzl | 110 ++++++++++--------- 1 file changed, 56 insertions(+), 54 deletions(-) diff --git a/python/private/py_wheel_normalize_pep440.bzl b/python/private/py_wheel_normalize_pep440.bzl index dd6139d9b9..5ddc27b17b 100644 --- a/python/private/py_wheel_normalize_pep440.bzl +++ b/python/private/py_wheel_normalize_pep440.bzl @@ -37,8 +37,10 @@ def normalize_pep440(version): string containing the normalized version. """ - version = version.strip() # PEP 440: Leading and Trailing Whitespace - contexts = [] + self = struct( + version = version.strip(), # PEP 440: Leading and Trailing Whitespace + contexts = [], + ) def open_context(start): """Open an new parsing context. @@ -51,14 +53,14 @@ def normalize_pep440(version): start: index into `version` indicating where the current parsing step starts. """ - contexts.append({"norm": "", "start": start}) - return contexts[-1] + self.contexts.append({"norm": "", "start": start}) + return self.contexts[-1] def close_context(): """Close the current context successfully and merge the results.""" - finished = contexts.pop() - contexts[-1]["norm"] += finished["norm"] - contexts[-1]["start"] = finished["start"] + finished = self.contexts.pop() + self.contexts[-1]["norm"] += finished["norm"] + self.contexts[-1]["start"] = finished["start"] def is_(reference): """Predicate testing a token for equality with `reference`.""" @@ -91,12 +93,12 @@ def normalize_pep440(version): whether a token was accepted. """ - context = contexts[-1] + context = self.contexts[-1] - if context["start"] >= len(version): + if context["start"] >= len(self.version): return False - token = version[context["start"]] + token = self.version[context["start"]] if predicate(token): if type(value) in ["function", "builtin_function_or_method"]: @@ -120,19 +122,19 @@ def normalize_pep440(version): actually be valid. """ - context = open_context(contexts[-1]["start"]) + context = open_context(self.contexts[-1]["start"]) if not accept(is_("{"), str): - contexts.pop() + self.contexts.pop() return False start = context["start"] - for _ in range(start, len(version) + 1): + for _ in range(start, len(self.version) + 1): if not accept(is_not("}"), str): break if not accept(is_("}"), str): - contexts.pop() + self.contexts.pop() return False close_context() @@ -141,10 +143,10 @@ def normalize_pep440(version): def accept_digits(): """Accept multiple digits (or placeholders).""" - context = open_context(contexts[-1]["start"]) + context = open_context(self.contexts[-1]["start"]) start = context["start"] - for i in range(start, len(version) + 1): + for i in range(start, len(self.version) + 1): if not accept(_isdigit, str) and not accept_placeholder(): if i - start >= 1: if context["norm"].isdigit(): @@ -154,16 +156,16 @@ def normalize_pep440(version): return True break - contexts.pop() + self.contexts.pop() return False def accept_string(string, replacement): """Accept a `string` in the input. Output `replacement`.""" - context = open_context(contexts[-1]["start"]) + context = open_context(self.contexts[-1]["start"]) for character in string.elems(): if not accept(in_([character, character.upper()]), ""): - contexts.pop() + self.contexts.pop() return False context["norm"] = replacement @@ -174,44 +176,44 @@ def normalize_pep440(version): def accept_alnum(): """Accept an alphanumeric sequence.""" - context = open_context(contexts[-1]["start"]) + context = open_context(self.contexts[-1]["start"]) start = context["start"] - for i in range(start, len(version) + 1): + for i in range(start, len(self.version) + 1): if not accept(_isalnum, _lower) and not accept_placeholder(): if i - start >= 1: close_context() return True break - contexts.pop() + self.contexts.pop() return False def accept_dot_number(): """Accept a dot followed by digits.""" - open_context(contexts[-1]["start"]) + open_context(self.contexts[-1]["start"]) if accept(is_("."), ".") and accept_digits(): close_context() return True else: - contexts.pop() + self.contexts.pop() return False def accept_dot_number_sequence(): """Accept a sequence of dot+digits.""" - context = contexts[-1] + context = self.contexts[-1] start = context["start"] i = start - for i in range(start, len(version) + 1): + for i in range(start, len(self.version) + 1): if not accept_dot_number(): break return i - start >= 1 def accept_separator_alnum(): """Accept a separator followed by an alphanumeric string.""" - open_context(contexts[-1]["start"]) + open_context(self.contexts[-1]["start"]) # PEP 440: Local version segments if ( @@ -221,16 +223,16 @@ def normalize_pep440(version): close_context() return True - contexts.pop() + self.contexts.pop() return False def accept_separator_alnum_sequence(): """Accept a sequence of separator+alphanumeric.""" - context = contexts[-1] + context = self.contexts[-1] start = context["start"] i = start - for i in range(start, len(version) + 1): + for i in range(start, len(self.version) + 1): if not accept_separator_alnum(): break @@ -238,24 +240,24 @@ def normalize_pep440(version): def accept_epoch(): """PEP 440: Version epochs.""" - context = open_context(contexts[-1]["start"]) + context = open_context(self.contexts[-1]["start"]) if accept_digits() and accept(is_("!"), "!"): if context["norm"] == "0!": - contexts.pop() - contexts[-1]["start"] = context["start"] + self.contexts.pop() + self.contexts[-1]["start"] = context["start"] else: close_context() return True else: - contexts.pop() + self.contexts.pop() return False def accept_release(): """Accept the release segment, numbers separated by dots.""" - open_context(contexts[-1]["start"]) + open_context(self.contexts[-1]["start"]) if not accept_digits(): - contexts.pop() + self.contexts.pop() return False accept_dot_number_sequence() @@ -264,7 +266,7 @@ def normalize_pep440(version): def accept_pre_l(): """PEP 440: Pre-release spelling.""" - open_context(contexts[-1]["start"]) + open_context(self.contexts[-1]["start"]) if ( accept_string("alpha", "a") or @@ -279,18 +281,18 @@ def normalize_pep440(version): close_context() return True else: - contexts.pop() + self.contexts.pop() return False def accept_prerelease(): """PEP 440: Pre-releases.""" - context = open_context(contexts[-1]["start"]) + context = open_context(self.contexts[-1]["start"]) # PEP 440: Pre-release separators accept(in_(["-", "_", "."]), "") if not accept_pre_l(): - contexts.pop() + self.contexts.pop() return False accept(in_(["-", "_", "."]), "") @@ -304,19 +306,19 @@ def normalize_pep440(version): def accept_implicit_postrelease(): """PEP 440: Implicit post releases.""" - context = open_context(contexts[-1]["start"]) + context = open_context(self.contexts[-1]["start"]) if accept(is_("-"), "") and accept_digits(): context["norm"] = ".post" + context["norm"] close_context() return True - contexts.pop() + self.contexts.pop() return False def accept_explicit_postrelease(): """PEP 440: Post-releases.""" - context = open_context(contexts[-1]["start"]) + context = open_context(self.contexts[-1]["start"]) # PEP 440: Post release separators if not accept(in_(["-", "_", "."]), "."): @@ -337,23 +339,23 @@ def normalize_pep440(version): close_context() return True - contexts.pop() + self.contexts.pop() return False def accept_postrelease(): """PEP 440: Post-releases.""" - open_context(contexts[-1]["start"]) + open_context(self.contexts[-1]["start"]) if accept_implicit_postrelease() or accept_explicit_postrelease(): close_context() return True - contexts.pop() + self.contexts.pop() return False def accept_devrelease(): """PEP 440: Developmental releases.""" - context = open_context(contexts[-1]["start"]) + context = open_context(self.contexts[-1]["start"]) # PEP 440: Development release separators if not accept(in_(["-", "_", "."]), "."): @@ -369,19 +371,19 @@ def normalize_pep440(version): close_context() return True - contexts.pop() + self.contexts.pop() return False def accept_local(): """PEP 440: Local version identifiers.""" - open_context(contexts[-1]["start"]) + open_context(self.contexts[-1]["start"]) if accept(is_("+"), "+") and accept_alnum(): accept_separator_alnum_sequence() close_context() return True - contexts.pop() + self.contexts.pop() return False open_context(0) @@ -392,9 +394,9 @@ def normalize_pep440(version): accept_postrelease() accept_devrelease() accept_local() - if version[contexts[-1]["start"]:]: + if self.version[self.contexts[-1]["start"]:]: fail( - "Failed to parse PEP 440 version identifier '%s'." % version, - "Parse error at '%s'" % version[contexts[-1]["start"]:], + "Failed to parse PEP 440 version identifier '%s'." % self.version, + "Parse error at '%s'" % self.version[self.contexts[-1]["start"]:], ) - return contexts[-1]["norm"] + return self.contexts[-1]["norm"] From a9d06f04a8b23900fadea5d9e47ad6c0c4a08222 Mon Sep 17 00:00:00 2001 From: Ignas Anikevicius Date: Sun, 27 Aug 2023 10:34:42 +0900 Subject: [PATCH 06/30] refactor: create a normalize function --- python/private/py_wheel_normalize_pep440.bzl | 31 +++++++++++--------- 1 file changed, 17 insertions(+), 14 deletions(-) diff --git a/python/private/py_wheel_normalize_pep440.bzl b/python/private/py_wheel_normalize_pep440.bzl index 5ddc27b17b..90cb8db869 100644 --- a/python/private/py_wheel_normalize_pep440.bzl +++ b/python/private/py_wheel_normalize_pep440.bzl @@ -386,17 +386,20 @@ def normalize_pep440(version): self.contexts.pop() return False - open_context(0) - accept(is_("v"), "") # PEP 440: Preceding v character - accept_epoch() - accept_release() - accept_prerelease() - accept_postrelease() - accept_devrelease() - accept_local() - if self.version[self.contexts[-1]["start"]:]: - fail( - "Failed to parse PEP 440 version identifier '%s'." % self.version, - "Parse error at '%s'" % self.version[self.contexts[-1]["start"]:], - ) - return self.contexts[-1]["norm"] + def normalize(self): + open_context(0) + accept(is_("v"), "") # PEP 440: Preceding v character + accept_epoch() + accept_release() + accept_prerelease() + accept_postrelease() + accept_devrelease() + accept_local() + if self.version[self.contexts[-1]["start"]:]: + fail( + "Failed to parse PEP 440 version identifier '%s'." % self.version, + "Parse error at '%s'" % self.version[self.contexts[-1]["start"]:], + ) + return self.contexts[-1]["norm"] + + return normalize(self) From df2f124a17b30ab443765221b885c1415f73c01a Mon Sep 17 00:00:00 2001 From: Ignas Anikevicius Date: Sun, 27 Aug 2023 10:41:55 +0900 Subject: [PATCH 07/30] refactor: move open_context to be a method on the normalizer struct --- python/private/py_wheel_normalize_pep440.bzl | 89 ++++++++++++-------- 1 file changed, 55 insertions(+), 34 deletions(-) diff --git a/python/private/py_wheel_normalize_pep440.bzl b/python/private/py_wheel_normalize_pep440.bzl index 90cb8db869..3267da73ee 100644 --- a/python/private/py_wheel_normalize_pep440.bzl +++ b/python/private/py_wheel_normalize_pep440.bzl @@ -14,6 +14,13 @@ "Implementation of PEP440 version string normalization" +def mkmethod(self, method): + """Bind a struct as the first arg to a function. + + This is loosely equivalent to creating a bound method of a class. + """ + return lambda *args, **kwargs: method(self, *args, **kwargs) + def _isdigit(token): return token.isdigit() @@ -24,6 +31,37 @@ def _lower(token): # PEP 440: Case sensitivity return token.lower() +def _open_context(self, start): + """Open an new parsing context. + + If the current parsing step succeeds, call close_context(). + If the current parsing step fails, call contexts.pop() to + go back to how it was before we opened a new context. + + Args: + self: The normalizer. + start: index into `version` indicating where the current + parsing step starts. + """ + self.contexts.append({"norm": "", "start": start}) + return self.contexts[-1] + +def _new(version): + """Create a new normalizer""" + self = struct( + version = version.strip(), # PEP 440: Leading and Trailing Whitespace + contexts = [], + ) + public = struct( + # methods: keep sorted + open_context = mkmethod(self, _open_context), + + # attributes: keep sorted + contexts = self.contexts, + version = self.version, + ) + return public + def normalize_pep440(version): """Escape the version component of a filename. @@ -37,24 +75,7 @@ def normalize_pep440(version): string containing the normalized version. """ - self = struct( - version = version.strip(), # PEP 440: Leading and Trailing Whitespace - contexts = [], - ) - - def open_context(start): - """Open an new parsing context. - - If the current parsing step succeeds, call close_context(). - If the current parsing step fails, call contexts.pop() to - go back to how it was before we opened a new context. - - Args: - start: index into `version` indicating where the current - parsing step starts. - """ - self.contexts.append({"norm": "", "start": start}) - return self.contexts[-1] + self = _new(version) def close_context(): """Close the current context successfully and merge the results.""" @@ -122,7 +143,7 @@ def normalize_pep440(version): actually be valid. """ - context = open_context(self.contexts[-1]["start"]) + context = self.open_context(self.contexts[-1]["start"]) if not accept(is_("{"), str): self.contexts.pop() @@ -143,7 +164,7 @@ def normalize_pep440(version): def accept_digits(): """Accept multiple digits (or placeholders).""" - context = open_context(self.contexts[-1]["start"]) + context = self.open_context(self.contexts[-1]["start"]) start = context["start"] for i in range(start, len(self.version) + 1): @@ -161,7 +182,7 @@ def normalize_pep440(version): def accept_string(string, replacement): """Accept a `string` in the input. Output `replacement`.""" - context = open_context(self.contexts[-1]["start"]) + context = self.open_context(self.contexts[-1]["start"]) for character in string.elems(): if not accept(in_([character, character.upper()]), ""): @@ -176,7 +197,7 @@ def normalize_pep440(version): def accept_alnum(): """Accept an alphanumeric sequence.""" - context = open_context(self.contexts[-1]["start"]) + context = self.open_context(self.contexts[-1]["start"]) start = context["start"] for i in range(start, len(self.version) + 1): @@ -191,7 +212,7 @@ def normalize_pep440(version): def accept_dot_number(): """Accept a dot followed by digits.""" - open_context(self.contexts[-1]["start"]) + self.open_context(self.contexts[-1]["start"]) if accept(is_("."), ".") and accept_digits(): close_context() @@ -213,7 +234,7 @@ def normalize_pep440(version): def accept_separator_alnum(): """Accept a separator followed by an alphanumeric string.""" - open_context(self.contexts[-1]["start"]) + self.open_context(self.contexts[-1]["start"]) # PEP 440: Local version segments if ( @@ -240,7 +261,7 @@ def normalize_pep440(version): def accept_epoch(): """PEP 440: Version epochs.""" - context = open_context(self.contexts[-1]["start"]) + context = self.open_context(self.contexts[-1]["start"]) if accept_digits() and accept(is_("!"), "!"): if context["norm"] == "0!": self.contexts.pop() @@ -254,7 +275,7 @@ def normalize_pep440(version): def accept_release(): """Accept the release segment, numbers separated by dots.""" - open_context(self.contexts[-1]["start"]) + self.open_context(self.contexts[-1]["start"]) if not accept_digits(): self.contexts.pop() @@ -266,7 +287,7 @@ def normalize_pep440(version): def accept_pre_l(): """PEP 440: Pre-release spelling.""" - open_context(self.contexts[-1]["start"]) + self.open_context(self.contexts[-1]["start"]) if ( accept_string("alpha", "a") or @@ -286,7 +307,7 @@ def normalize_pep440(version): def accept_prerelease(): """PEP 440: Pre-releases.""" - context = open_context(self.contexts[-1]["start"]) + context = self.open_context(self.contexts[-1]["start"]) # PEP 440: Pre-release separators accept(in_(["-", "_", "."]), "") @@ -306,7 +327,7 @@ def normalize_pep440(version): def accept_implicit_postrelease(): """PEP 440: Implicit post releases.""" - context = open_context(self.contexts[-1]["start"]) + context = self.open_context(self.contexts[-1]["start"]) if accept(is_("-"), "") and accept_digits(): context["norm"] = ".post" + context["norm"] @@ -318,7 +339,7 @@ def normalize_pep440(version): def accept_explicit_postrelease(): """PEP 440: Post-releases.""" - context = open_context(self.contexts[-1]["start"]) + context = self.open_context(self.contexts[-1]["start"]) # PEP 440: Post release separators if not accept(in_(["-", "_", "."]), "."): @@ -344,7 +365,7 @@ def normalize_pep440(version): def accept_postrelease(): """PEP 440: Post-releases.""" - open_context(self.contexts[-1]["start"]) + self.open_context(self.contexts[-1]["start"]) if accept_implicit_postrelease() or accept_explicit_postrelease(): close_context() @@ -355,7 +376,7 @@ def normalize_pep440(version): def accept_devrelease(): """PEP 440: Developmental releases.""" - context = open_context(self.contexts[-1]["start"]) + context = self.open_context(self.contexts[-1]["start"]) # PEP 440: Development release separators if not accept(in_(["-", "_", "."]), "."): @@ -376,7 +397,7 @@ def normalize_pep440(version): def accept_local(): """PEP 440: Local version identifiers.""" - open_context(self.contexts[-1]["start"]) + self.open_context(self.contexts[-1]["start"]) if accept(is_("+"), "+") and accept_alnum(): accept_separator_alnum_sequence() @@ -387,7 +408,7 @@ def normalize_pep440(version): return False def normalize(self): - open_context(0) + self.open_context(0) accept(is_("v"), "") # PEP 440: Preceding v character accept_epoch() accept_release() From 1355341aa616f2fabb0f0d20904edd2b45ce190e Mon Sep 17 00:00:00 2001 From: Ignas Anikevicius Date: Sun, 27 Aug 2023 10:43:43 +0900 Subject: [PATCH 08/30] refactor: move more predicates outside --- python/private/py_wheel_normalize_pep440.bzl | 56 ++++++++++---------- 1 file changed, 28 insertions(+), 28 deletions(-) diff --git a/python/private/py_wheel_normalize_pep440.bzl b/python/private/py_wheel_normalize_pep440.bzl index 3267da73ee..7f594fa6f4 100644 --- a/python/private/py_wheel_normalize_pep440.bzl +++ b/python/private/py_wheel_normalize_pep440.bzl @@ -31,6 +31,18 @@ def _lower(token): # PEP 440: Case sensitivity return token.lower() +def _is(reference): + """Predicate testing a token for equality with `reference`.""" + return lambda token: token == reference + +def _is_not(reference): + """Predicate testing a token for inequality with `reference`.""" + return lambda token: token != reference + +def _in(reference): + """Predicate testing if a token is in the list `reference`.""" + return lambda token: token in reference + def _open_context(self, start): """Open an new parsing context. @@ -83,18 +95,6 @@ def normalize_pep440(version): self.contexts[-1]["norm"] += finished["norm"] self.contexts[-1]["start"] = finished["start"] - def is_(reference): - """Predicate testing a token for equality with `reference`.""" - return lambda token: token == reference - - def is_not(reference): - """Predicate testing a token for inequality with `reference`.""" - return lambda token: token != reference - - def in_(reference): - """Predicate testing if a token is in the list `reference`.""" - return lambda token: token in reference - def accept(predicate, value): """If `predicate` matches the next token, accept the token. @@ -145,16 +145,16 @@ def normalize_pep440(version): """ context = self.open_context(self.contexts[-1]["start"]) - if not accept(is_("{"), str): + if not accept(_is("{"), str): self.contexts.pop() return False start = context["start"] for _ in range(start, len(self.version) + 1): - if not accept(is_not("}"), str): + if not accept(_is_not("}"), str): break - if not accept(is_("}"), str): + if not accept(_is("}"), str): self.contexts.pop() return False @@ -185,7 +185,7 @@ def normalize_pep440(version): context = self.open_context(self.contexts[-1]["start"]) for character in string.elems(): - if not accept(in_([character, character.upper()]), ""): + if not accept(_in([character, character.upper()]), ""): self.contexts.pop() return False @@ -214,7 +214,7 @@ def normalize_pep440(version): """Accept a dot followed by digits.""" self.open_context(self.contexts[-1]["start"]) - if accept(is_("."), ".") and accept_digits(): + if accept(_is("."), ".") and accept_digits(): close_context() return True else: @@ -238,7 +238,7 @@ def normalize_pep440(version): # PEP 440: Local version segments if ( - accept(in_([".", "-", "_"]), ".") and + accept(_in([".", "-", "_"]), ".") and (accept_digits() or accept_alnum()) ): close_context() @@ -262,7 +262,7 @@ def normalize_pep440(version): def accept_epoch(): """PEP 440: Version epochs.""" context = self.open_context(self.contexts[-1]["start"]) - if accept_digits() and accept(is_("!"), "!"): + if accept_digits() and accept(_is("!"), "!"): if context["norm"] == "0!": self.contexts.pop() self.contexts[-1]["start"] = context["start"] @@ -310,13 +310,13 @@ def normalize_pep440(version): context = self.open_context(self.contexts[-1]["start"]) # PEP 440: Pre-release separators - accept(in_(["-", "_", "."]), "") + accept(_in(["-", "_", "."]), "") if not accept_pre_l(): self.contexts.pop() return False - accept(in_(["-", "_", "."]), "") + accept(_in(["-", "_", "."]), "") if not accept_digits(): # PEP 440: Implicit pre-release number @@ -329,7 +329,7 @@ def normalize_pep440(version): """PEP 440: Implicit post releases.""" context = self.open_context(self.contexts[-1]["start"]) - if accept(is_("-"), "") and accept_digits(): + if accept(_is("-"), "") and accept_digits(): context["norm"] = ".post" + context["norm"] close_context() return True @@ -342,7 +342,7 @@ def normalize_pep440(version): context = self.open_context(self.contexts[-1]["start"]) # PEP 440: Post release separators - if not accept(in_(["-", "_", "."]), "."): + if not accept(_in(["-", "_", "."]), "."): context["norm"] += "." # PEP 440: Post release spelling @@ -351,7 +351,7 @@ def normalize_pep440(version): accept_string("rev", "post") or accept_string("r", "post") ): - accept(in_(["-", "_", "."]), "") + accept(_in(["-", "_", "."]), "") if not accept_digits(): # PEP 440: Implicit post release number @@ -379,11 +379,11 @@ def normalize_pep440(version): context = self.open_context(self.contexts[-1]["start"]) # PEP 440: Development release separators - if not accept(in_(["-", "_", "."]), "."): + if not accept(_in(["-", "_", "."]), "."): context["norm"] += "." if accept_string("dev", "dev"): - accept(in_(["-", "_", "."]), "") + accept(_in(["-", "_", "."]), "") if not accept_digits(): # PEP 440: Implicit development release number @@ -399,7 +399,7 @@ def normalize_pep440(version): """PEP 440: Local version identifiers.""" self.open_context(self.contexts[-1]["start"]) - if accept(is_("+"), "+") and accept_alnum(): + if accept(_is("+"), "+") and accept_alnum(): accept_separator_alnum_sequence() close_context() return True @@ -409,7 +409,7 @@ def normalize_pep440(version): def normalize(self): self.open_context(0) - accept(is_("v"), "") # PEP 440: Preceding v character + accept(_is("v"), "") # PEP 440: Preceding v character accept_epoch() accept_release() accept_prerelease() From 0a93c81774aad6498a7fc1cad6eea93fb3817fe6 Mon Sep 17 00:00:00 2001 From: Ignas Anikevicius Date: Sun, 27 Aug 2023 10:51:13 +0900 Subject: [PATCH 09/30] refactor: move close_context to self.close_context --- python/private/py_wheel_normalize_pep440.bzl | 45 ++++++++++---------- 1 file changed, 23 insertions(+), 22 deletions(-) diff --git a/python/private/py_wheel_normalize_pep440.bzl b/python/private/py_wheel_normalize_pep440.bzl index 7f594fa6f4..ea19c36bf1 100644 --- a/python/private/py_wheel_normalize_pep440.bzl +++ b/python/private/py_wheel_normalize_pep440.bzl @@ -46,7 +46,7 @@ def _in(reference): def _open_context(self, start): """Open an new parsing context. - If the current parsing step succeeds, call close_context(). + If the current parsing step succeeds, call self.close_context(). If the current parsing step fails, call contexts.pop() to go back to how it was before we opened a new context. @@ -58,6 +58,12 @@ def _open_context(self, start): self.contexts.append({"norm": "", "start": start}) return self.contexts[-1] +def _close_context(self): + """Close the current context successfully and merge the results.""" + finished = self.contexts.pop() + self.contexts[-1]["norm"] += finished["norm"] + self.contexts[-1]["start"] = finished["start"] + def _new(version): """Create a new normalizer""" self = struct( @@ -66,6 +72,7 @@ def _new(version): ) public = struct( # methods: keep sorted + close_context = mkmethod(self, _close_context), open_context = mkmethod(self, _open_context), # attributes: keep sorted @@ -89,12 +96,6 @@ def normalize_pep440(version): self = _new(version) - def close_context(): - """Close the current context successfully and merge the results.""" - finished = self.contexts.pop() - self.contexts[-1]["norm"] += finished["norm"] - self.contexts[-1]["start"] = finished["start"] - def accept(predicate, value): """If `predicate` matches the next token, accept the token. @@ -158,7 +159,7 @@ def normalize_pep440(version): self.contexts.pop() return False - close_context() + self.close_context() return True def accept_digits(): @@ -173,7 +174,7 @@ def normalize_pep440(version): if context["norm"].isdigit(): # PEP 440: Integer Normalization context["norm"] = str(int(context["norm"])) - close_context() + self.close_context() return True break @@ -191,7 +192,7 @@ def normalize_pep440(version): context["norm"] = replacement - close_context() + self.close_context() return True def accept_alnum(): @@ -203,7 +204,7 @@ def normalize_pep440(version): for i in range(start, len(self.version) + 1): if not accept(_isalnum, _lower) and not accept_placeholder(): if i - start >= 1: - close_context() + self.close_context() return True break @@ -215,7 +216,7 @@ def normalize_pep440(version): self.open_context(self.contexts[-1]["start"]) if accept(_is("."), ".") and accept_digits(): - close_context() + self.close_context() return True else: self.contexts.pop() @@ -241,7 +242,7 @@ def normalize_pep440(version): accept(_in([".", "-", "_"]), ".") and (accept_digits() or accept_alnum()) ): - close_context() + self.close_context() return True self.contexts.pop() @@ -267,7 +268,7 @@ def normalize_pep440(version): self.contexts.pop() self.contexts[-1]["start"] = context["start"] else: - close_context() + self.close_context() return True else: self.contexts.pop() @@ -282,7 +283,7 @@ def normalize_pep440(version): return False accept_dot_number_sequence() - close_context() + self.close_context() return True def accept_pre_l(): @@ -299,7 +300,7 @@ def normalize_pep440(version): accept_string("pre", "rc") or accept_string("rc", "rc") ): - close_context() + self.close_context() return True else: self.contexts.pop() @@ -322,7 +323,7 @@ def normalize_pep440(version): # PEP 440: Implicit pre-release number context["norm"] += "0" - close_context() + self.close_context() return True def accept_implicit_postrelease(): @@ -331,7 +332,7 @@ def normalize_pep440(version): if accept(_is("-"), "") and accept_digits(): context["norm"] = ".post" + context["norm"] - close_context() + self.close_context() return True self.contexts.pop() @@ -357,7 +358,7 @@ def normalize_pep440(version): # PEP 440: Implicit post release number context["norm"] += "0" - close_context() + self.close_context() return True self.contexts.pop() @@ -368,7 +369,7 @@ def normalize_pep440(version): self.open_context(self.contexts[-1]["start"]) if accept_implicit_postrelease() or accept_explicit_postrelease(): - close_context() + self.close_context() return True self.contexts.pop() @@ -389,7 +390,7 @@ def normalize_pep440(version): # PEP 440: Implicit development release number context["norm"] += "0" - close_context() + self.close_context() return True self.contexts.pop() @@ -401,7 +402,7 @@ def normalize_pep440(version): if accept(_is("+"), "+") and accept_alnum(): accept_separator_alnum_sequence() - close_context() + self.close_context() return True self.contexts.pop() From 9ed69bef102fa66d43151448cdeaa4bd965489f9 Mon Sep 17 00:00:00 2001 From: Ignas Anikevicius Date: Sun, 27 Aug 2023 10:57:30 +0900 Subject: [PATCH 10/30] refactor: move accept to self.accept --- python/private/py_wheel_normalize_pep440.bzl | 110 ++++++++++--------- 1 file changed, 56 insertions(+), 54 deletions(-) diff --git a/python/private/py_wheel_normalize_pep440.bzl b/python/private/py_wheel_normalize_pep440.bzl index ea19c36bf1..a4a8b9dc05 100644 --- a/python/private/py_wheel_normalize_pep440.bzl +++ b/python/private/py_wheel_normalize_pep440.bzl @@ -64,6 +64,43 @@ def _close_context(self): self.contexts[-1]["norm"] += finished["norm"] self.contexts[-1]["start"] = finished["start"] +def _accept(self, predicate, value): + """If `predicate` matches the next token, accept the token. + + Accepting the token means adding it (according to `value`) to + the running results maintained in context["norm"] and + advancing the cursor in context["start"] to the next token in + `version`. + + Args: + self: The normalizer. + predicate: function taking a token and returning a boolean + saying if we want to accept the token. + value: the string to add if there's a match, or, if `value` + is a function, the function to apply to the current token + to get the string to add. + + Returns: + whether a token was accepted. + """ + + context = self.contexts[-1] + + if context["start"] >= len(self.version): + return False + + token = self.version[context["start"]] + + if predicate(token): + if type(value) in ["function", "builtin_function_or_method"]: + value = value(token) + + context["norm"] += value + context["start"] += 1 + return True + + return False + def _new(version): """Create a new normalizer""" self = struct( @@ -72,6 +109,7 @@ def _new(version): ) public = struct( # methods: keep sorted + accept = mkmethod(self, _accept), close_context = mkmethod(self, _close_context), open_context = mkmethod(self, _open_context), @@ -96,42 +134,6 @@ def normalize_pep440(version): self = _new(version) - def accept(predicate, value): - """If `predicate` matches the next token, accept the token. - - Accepting the token means adding it (according to `value`) to - the running results maintained in context["norm"] and - advancing the cursor in context["start"] to the next token in - `version`. - - Args: - predicate: function taking a token and returning a boolean - saying if we want to accept the token. - value: the string to add if there's a match, or, if `value` - is a function, the function to apply to the current token - to get the string to add. - - Returns: - whether a token was accepted. - """ - - context = self.contexts[-1] - - if context["start"] >= len(self.version): - return False - - token = self.version[context["start"]] - - if predicate(token): - if type(value) in ["function", "builtin_function_or_method"]: - value = value(token) - - context["norm"] += value - context["start"] += 1 - return True - - return False - def accept_placeholder(): """Accept a Bazel placeholder. @@ -146,16 +148,16 @@ def normalize_pep440(version): """ context = self.open_context(self.contexts[-1]["start"]) - if not accept(_is("{"), str): + if not self.accept(_is("{"), str): self.contexts.pop() return False start = context["start"] for _ in range(start, len(self.version) + 1): - if not accept(_is_not("}"), str): + if not self.accept(_is_not("}"), str): break - if not accept(_is("}"), str): + if not self.accept(_is("}"), str): self.contexts.pop() return False @@ -169,7 +171,7 @@ def normalize_pep440(version): start = context["start"] for i in range(start, len(self.version) + 1): - if not accept(_isdigit, str) and not accept_placeholder(): + if not self.accept(_isdigit, str) and not accept_placeholder(): if i - start >= 1: if context["norm"].isdigit(): # PEP 440: Integer Normalization @@ -186,7 +188,7 @@ def normalize_pep440(version): context = self.open_context(self.contexts[-1]["start"]) for character in string.elems(): - if not accept(_in([character, character.upper()]), ""): + if not self.accept(_in([character, character.upper()]), ""): self.contexts.pop() return False @@ -202,7 +204,7 @@ def normalize_pep440(version): start = context["start"] for i in range(start, len(self.version) + 1): - if not accept(_isalnum, _lower) and not accept_placeholder(): + if not self.accept(_isalnum, _lower) and not accept_placeholder(): if i - start >= 1: self.close_context() return True @@ -215,7 +217,7 @@ def normalize_pep440(version): """Accept a dot followed by digits.""" self.open_context(self.contexts[-1]["start"]) - if accept(_is("."), ".") and accept_digits(): + if self.accept(_is("."), ".") and accept_digits(): self.close_context() return True else: @@ -239,7 +241,7 @@ def normalize_pep440(version): # PEP 440: Local version segments if ( - accept(_in([".", "-", "_"]), ".") and + self.accept(_in([".", "-", "_"]), ".") and (accept_digits() or accept_alnum()) ): self.close_context() @@ -263,7 +265,7 @@ def normalize_pep440(version): def accept_epoch(): """PEP 440: Version epochs.""" context = self.open_context(self.contexts[-1]["start"]) - if accept_digits() and accept(_is("!"), "!"): + if accept_digits() and self.accept(_is("!"), "!"): if context["norm"] == "0!": self.contexts.pop() self.contexts[-1]["start"] = context["start"] @@ -311,13 +313,13 @@ def normalize_pep440(version): context = self.open_context(self.contexts[-1]["start"]) # PEP 440: Pre-release separators - accept(_in(["-", "_", "."]), "") + self.accept(_in(["-", "_", "."]), "") if not accept_pre_l(): self.contexts.pop() return False - accept(_in(["-", "_", "."]), "") + self.accept(_in(["-", "_", "."]), "") if not accept_digits(): # PEP 440: Implicit pre-release number @@ -330,7 +332,7 @@ def normalize_pep440(version): """PEP 440: Implicit post releases.""" context = self.open_context(self.contexts[-1]["start"]) - if accept(_is("-"), "") and accept_digits(): + if self.accept(_is("-"), "") and accept_digits(): context["norm"] = ".post" + context["norm"] self.close_context() return True @@ -343,7 +345,7 @@ def normalize_pep440(version): context = self.open_context(self.contexts[-1]["start"]) # PEP 440: Post release separators - if not accept(_in(["-", "_", "."]), "."): + if not self.accept(_in(["-", "_", "."]), "."): context["norm"] += "." # PEP 440: Post release spelling @@ -352,7 +354,7 @@ def normalize_pep440(version): accept_string("rev", "post") or accept_string("r", "post") ): - accept(_in(["-", "_", "."]), "") + self.accept(_in(["-", "_", "."]), "") if not accept_digits(): # PEP 440: Implicit post release number @@ -380,11 +382,11 @@ def normalize_pep440(version): context = self.open_context(self.contexts[-1]["start"]) # PEP 440: Development release separators - if not accept(_in(["-", "_", "."]), "."): + if not self.accept(_in(["-", "_", "."]), "."): context["norm"] += "." if accept_string("dev", "dev"): - accept(_in(["-", "_", "."]), "") + self.accept(_in(["-", "_", "."]), "") if not accept_digits(): # PEP 440: Implicit development release number @@ -400,7 +402,7 @@ def normalize_pep440(version): """PEP 440: Local version identifiers.""" self.open_context(self.contexts[-1]["start"]) - if accept(_is("+"), "+") and accept_alnum(): + if self.accept(_is("+"), "+") and accept_alnum(): accept_separator_alnum_sequence() self.close_context() return True @@ -410,7 +412,7 @@ def normalize_pep440(version): def normalize(self): self.open_context(0) - accept(_is("v"), "") # PEP 440: Preceding v character + self.accept(_is("v"), "") # PEP 440: Preceding v character accept_epoch() accept_release() accept_prerelease() From 7f80890767e01aa6731f7a58269854e05f002424 Mon Sep 17 00:00:00 2001 From: Ignas Anikevicius Date: Sun, 27 Aug 2023 11:35:39 +0900 Subject: [PATCH 11/30] refactor: move all of the methods to be functions instead --- python/private/py_wheel_normalize_pep440.bzl | 490 +++++++++---------- 1 file changed, 243 insertions(+), 247 deletions(-) diff --git a/python/private/py_wheel_normalize_pep440.bzl b/python/private/py_wheel_normalize_pep440.bzl index a4a8b9dc05..51ef80b1f6 100644 --- a/python/private/py_wheel_normalize_pep440.bzl +++ b/python/private/py_wheel_normalize_pep440.bzl @@ -119,311 +119,307 @@ def _new(version): ) return public -def normalize_pep440(version): - """Escape the version component of a filename. +def accept_placeholder(parser): + """Accept a Bazel placeholder. - See https://packaging.python.org/en/latest/specifications/binary-distribution-format/#escaping-and-unicode - and https://peps.python.org/pep-0440/ + Placeholders aren't actually part of PEP 440, but are used for + stamping purposes. A placeholder might be + ``{BUILD_TIMESTAMP}``, for instance. We'll accept these as + they are, assuming they will expand to something that makes + sense where they appear. Before the stamping has happened, a + resulting wheel file name containing a placeholder will not + actually be valid. - Args: - version: version string to be normalized according to PEP 440. - - Returns: - string containing the normalized version. """ + context = parser.open_context(parser.contexts[-1]["start"]) - self = _new(version) + if not parser.accept(_is("{"), str): + parser.contexts.pop() + return False + + start = context["start"] + for _ in range(start, len(parser.version) + 1): + if not parser.accept(_is_not("}"), str): + break - def accept_placeholder(): - """Accept a Bazel placeholder. + if not parser.accept(_is("}"), str): + parser.contexts.pop() + return False - Placeholders aren't actually part of PEP 440, but are used for - stamping purposes. A placeholder might be - ``{BUILD_TIMESTAMP}``, for instance. We'll accept these as - they are, assuming they will expand to something that makes - sense where they appear. Before the stamping has happened, a - resulting wheel file name containing a placeholder will not - actually be valid. + parser.close_context() + return True - """ - context = self.open_context(self.contexts[-1]["start"]) +def accept_digits(parser): + """Accept multiple digits (or placeholders).""" - if not self.accept(_is("{"), str): - self.contexts.pop() - return False + context = parser.open_context(parser.contexts[-1]["start"]) + start = context["start"] - start = context["start"] - for _ in range(start, len(self.version) + 1): - if not self.accept(_is_not("}"), str): - break + for i in range(start, len(parser.version) + 1): + if not parser.accept(_isdigit, str) and not accept_placeholder(parser): + if i - start >= 1: + if context["norm"].isdigit(): + # PEP 440: Integer Normalization + context["norm"] = str(int(context["norm"])) + parser.close_context() + return True + break - if not self.accept(_is("}"), str): - self.contexts.pop() + parser.contexts.pop() + return False + +def accept_string(parser, string, replacement): + """Accept a `string` in the input. Output `replacement`.""" + context = parser.open_context(parser.contexts[-1]["start"]) + + for character in string.elems(): + if not parser.accept(_in([character, character.upper()]), ""): + parser.contexts.pop() return False - self.close_context() - return True + context["norm"] = replacement - def accept_digits(): - """Accept multiple digits (or placeholders).""" + parser.close_context() + return True - context = self.open_context(self.contexts[-1]["start"]) - start = context["start"] +def accept_alnum(parser): + """Accept an alphanumeric sequence.""" - for i in range(start, len(self.version) + 1): - if not self.accept(_isdigit, str) and not accept_placeholder(): - if i - start >= 1: - if context["norm"].isdigit(): - # PEP 440: Integer Normalization - context["norm"] = str(int(context["norm"])) - self.close_context() - return True - break + context = parser.open_context(parser.contexts[-1]["start"]) + start = context["start"] - self.contexts.pop() - return False + for i in range(start, len(parser.version) + 1): + if not parser.accept(_isalnum, _lower) and not accept_placeholder(parser): + if i - start >= 1: + parser.close_context() + return True + break - def accept_string(string, replacement): - """Accept a `string` in the input. Output `replacement`.""" - context = self.open_context(self.contexts[-1]["start"]) + parser.contexts.pop() + return False - for character in string.elems(): - if not self.accept(_in([character, character.upper()]), ""): - self.contexts.pop() - return False +def accept_dot_number(parser): + """Accept a dot followed by digits.""" + parser.open_context(parser.contexts[-1]["start"]) - context["norm"] = replacement + if parser.accept(_is("."), ".") and accept_digits(parser): + parser.close_context() + return True + else: + parser.contexts.pop() + return False - self.close_context() +def accept_dot_number_sequence(parser): + """Accept a sequence of dot+digits.""" + context = parser.contexts[-1] + start = context["start"] + i = start + + for i in range(start, len(parser.version) + 1): + if not accept_dot_number(parser): + break + return i - start >= 1 + +def accept_separator_alnum(parser): + """Accept a separator followed by an alphanumeric string.""" + parser.open_context(parser.contexts[-1]["start"]) + + # PEP 440: Local version segments + if ( + parser.accept(_in([".", "-", "_"]), ".") and + (accept_digits(parser) or accept_alnum(parser)) + ): + parser.close_context() return True - def accept_alnum(): - """Accept an alphanumeric sequence.""" + parser.contexts.pop() + return False + +def accept_separator_alnum_sequence(parser): + """Accept a sequence of separator+alphanumeric.""" + context = parser.contexts[-1] + start = context["start"] + i = start + + for i in range(start, len(parser.version) + 1): + if not accept_separator_alnum(parser): + break + + return i - start >= 1 + +def accept_epoch(parser): + """PEP 440: Version epochs.""" + context = parser.open_context(parser.contexts[-1]["start"]) + if accept_digits(parser) and parser.accept(_is("!"), "!"): + if context["norm"] == "0!": + parser.contexts.pop() + parser.contexts[-1]["start"] = context["start"] + else: + parser.close_context() + return True + else: + parser.contexts.pop() + return False - context = self.open_context(self.contexts[-1]["start"]) - start = context["start"] +def accept_release(parser): + """Accept the release segment, numbers separated by dots.""" + parser.open_context(parser.contexts[-1]["start"]) - for i in range(start, len(self.version) + 1): - if not self.accept(_isalnum, _lower) and not accept_placeholder(): - if i - start >= 1: - self.close_context() - return True - break + if not accept_digits(parser): + parser.contexts.pop() + return False - self.contexts.pop() + accept_dot_number_sequence(parser) + parser.close_context() + return True + +def accept_pre_l(parser): + """PEP 440: Pre-release spelling.""" + parser.open_context(parser.contexts[-1]["start"]) + + if ( + accept_string(parser, "alpha", "a") or + accept_string(parser, "a", "a") or + accept_string(parser, "beta", "b") or + accept_string(parser, "b", "b") or + accept_string(parser, "c", "rc") or + accept_string(parser, "preview", "rc") or + accept_string(parser, "pre", "rc") or + accept_string(parser, "rc", "rc") + ): + parser.close_context() + return True + else: + parser.contexts.pop() return False - def accept_dot_number(): - """Accept a dot followed by digits.""" - self.open_context(self.contexts[-1]["start"]) +def accept_prerelease(parser): + """PEP 440: Pre-releases.""" + context = parser.open_context(parser.contexts[-1]["start"]) - if self.accept(_is("."), ".") and accept_digits(): - self.close_context() - return True - else: - self.contexts.pop() - return False + # PEP 440: Pre-release separators + parser.accept(_in(["-", "_", "."]), "") - def accept_dot_number_sequence(): - """Accept a sequence of dot+digits.""" - context = self.contexts[-1] - start = context["start"] - i = start - - for i in range(start, len(self.version) + 1): - if not accept_dot_number(): - break - return i - start >= 1 - - def accept_separator_alnum(): - """Accept a separator followed by an alphanumeric string.""" - self.open_context(self.contexts[-1]["start"]) - - # PEP 440: Local version segments - if ( - self.accept(_in([".", "-", "_"]), ".") and - (accept_digits() or accept_alnum()) - ): - self.close_context() - return True - - self.contexts.pop() + if not accept_pre_l(parser): + parser.contexts.pop() return False - def accept_separator_alnum_sequence(): - """Accept a sequence of separator+alphanumeric.""" - context = self.contexts[-1] - start = context["start"] - i = start - - for i in range(start, len(self.version) + 1): - if not accept_separator_alnum(): - break - - return i - start >= 1 - - def accept_epoch(): - """PEP 440: Version epochs.""" - context = self.open_context(self.contexts[-1]["start"]) - if accept_digits() and self.accept(_is("!"), "!"): - if context["norm"] == "0!": - self.contexts.pop() - self.contexts[-1]["start"] = context["start"] - else: - self.close_context() - return True - else: - self.contexts.pop() - return False + parser.accept(_in(["-", "_", "."]), "") - def accept_release(): - """Accept the release segment, numbers separated by dots.""" - self.open_context(self.contexts[-1]["start"]) + if not accept_digits(parser): + # PEP 440: Implicit pre-release number + context["norm"] += "0" - if not accept_digits(): - self.contexts.pop() - return False + parser.close_context() + return True - accept_dot_number_sequence() - self.close_context() - return True +def accept_implicit_postrelease(parser): + """PEP 440: Implicit post releases.""" + context = parser.open_context(parser.contexts[-1]["start"]) - def accept_pre_l(): - """PEP 440: Pre-release spelling.""" - self.open_context(self.contexts[-1]["start"]) - - if ( - accept_string("alpha", "a") or - accept_string("a", "a") or - accept_string("beta", "b") or - accept_string("b", "b") or - accept_string("c", "rc") or - accept_string("preview", "rc") or - accept_string("pre", "rc") or - accept_string("rc", "rc") - ): - self.close_context() - return True - else: - self.contexts.pop() - return False + if parser.accept(_is("-"), "") and accept_digits(parser): + context["norm"] = ".post" + context["norm"] + parser.close_context() + return True - def accept_prerelease(): - """PEP 440: Pre-releases.""" - context = self.open_context(self.contexts[-1]["start"]) + parser.contexts.pop() + return False - # PEP 440: Pre-release separators - self.accept(_in(["-", "_", "."]), "") +def accept_explicit_postrelease(parser): + """PEP 440: Post-releases.""" + context = parser.open_context(parser.contexts[-1]["start"]) - if not accept_pre_l(): - self.contexts.pop() - return False + # PEP 440: Post release separators + if not parser.accept(_in(["-", "_", "."]), "."): + context["norm"] += "." - self.accept(_in(["-", "_", "."]), "") + # PEP 440: Post release spelling + if ( + accept_string(parser, "post", "post") or + accept_string(parser, "rev", "post") or + accept_string(parser, "r", "post") + ): + parser.accept(_in(["-", "_", "."]), "") - if not accept_digits(): - # PEP 440: Implicit pre-release number + if not accept_digits(parser): + # PEP 440: Implicit post release number context["norm"] += "0" - self.close_context() + parser.close_context() return True - def accept_implicit_postrelease(): - """PEP 440: Implicit post releases.""" - context = self.open_context(self.contexts[-1]["start"]) - - if self.accept(_is("-"), "") and accept_digits(): - context["norm"] = ".post" + context["norm"] - self.close_context() - return True - - self.contexts.pop() - return False - - def accept_explicit_postrelease(): - """PEP 440: Post-releases.""" - context = self.open_context(self.contexts[-1]["start"]) - - # PEP 440: Post release separators - if not self.accept(_in(["-", "_", "."]), "."): - context["norm"] += "." + parser.contexts.pop() + return False - # PEP 440: Post release spelling - if ( - accept_string("post", "post") or - accept_string("rev", "post") or - accept_string("r", "post") - ): - self.accept(_in(["-", "_", "."]), "") +def accept_postrelease(parser): + """PEP 440: Post-releases.""" + parser.open_context(parser.contexts[-1]["start"]) - if not accept_digits(): - # PEP 440: Implicit post release number - context["norm"] += "0" + if accept_implicit_postrelease(parser) or accept_explicit_postrelease(parser): + parser.close_context() + return True - self.close_context() - return True + parser.contexts.pop() + return False - self.contexts.pop() - return False +def accept_devrelease(parser): + """PEP 440: Developmental releases.""" + context = parser.open_context(parser.contexts[-1]["start"]) - def accept_postrelease(): - """PEP 440: Post-releases.""" - self.open_context(self.contexts[-1]["start"]) + # PEP 440: Development release separators + if not parser.accept(_in(["-", "_", "."]), "."): + context["norm"] += "." - if accept_implicit_postrelease() or accept_explicit_postrelease(): - self.close_context() - return True + if accept_string(parser, "dev", "dev"): + parser.accept(_in(["-", "_", "."]), "") - self.contexts.pop() - return False + if not accept_digits(parser): + # PEP 440: Implicit development release number + context["norm"] += "0" - def accept_devrelease(): - """PEP 440: Developmental releases.""" - context = self.open_context(self.contexts[-1]["start"]) + parser.close_context() + return True - # PEP 440: Development release separators - if not self.accept(_in(["-", "_", "."]), "."): - context["norm"] += "." + parser.contexts.pop() + return False - if accept_string("dev", "dev"): - self.accept(_in(["-", "_", "."]), "") +def accept_local(parser): + """PEP 440: Local version identifiers.""" + parser.open_context(parser.contexts[-1]["start"]) - if not accept_digits(): - # PEP 440: Implicit development release number - context["norm"] += "0" + if parser.accept(_is("+"), "+") and accept_alnum(parser): + accept_separator_alnum_sequence(parser) + parser.close_context() + return True - self.close_context() - return True + parser.contexts.pop() + return False - self.contexts.pop() - return False +def normalize_pep440(version): + """Escape the version component of a filename. - def accept_local(): - """PEP 440: Local version identifiers.""" - self.open_context(self.contexts[-1]["start"]) + See https://packaging.python.org/en/latest/specifications/binary-distribution-format/#escaping-and-unicode + and https://peps.python.org/pep-0440/ - if self.accept(_is("+"), "+") and accept_alnum(): - accept_separator_alnum_sequence() - self.close_context() - return True + Args: + version: version string to be normalized according to PEP 440. - self.contexts.pop() - return False + Returns: + string containing the normalized version. + """ - def normalize(self): - self.open_context(0) - self.accept(_is("v"), "") # PEP 440: Preceding v character - accept_epoch() - accept_release() - accept_prerelease() - accept_postrelease() - accept_devrelease() - accept_local() - if self.version[self.contexts[-1]["start"]:]: - fail( - "Failed to parse PEP 440 version identifier '%s'." % self.version, - "Parse error at '%s'" % self.version[self.contexts[-1]["start"]:], - ) - return self.contexts[-1]["norm"] - - return normalize(self) + parser = _new(version) + parser.open_context(0) + parser.accept(_is("v"), "") # PEP 440: Preceding v character + accept_epoch(parser) + accept_release(parser) + accept_prerelease(parser) + accept_postrelease(parser) + accept_devrelease(parser) + accept_local(parser) + if parser.version[parser.contexts[-1]["start"]:]: + fail( + "Failed to parse PEP 440 version identifier '%s'." % parser.version, + "Parse error at '%s'" % parser.version[parser.contexts[-1]["start"]:], + ) + return parser.contexts[-1]["norm"] From f4be867575826d4e0c828f9852a94990dfe60add Mon Sep 17 00:00:00 2001 From: Ignas Anikevicius Date: Sun, 27 Aug 2023 11:44:07 +0900 Subject: [PATCH 12/30] refactor: restrict usage of contexts to the internal parser functions --- python/private/py_wheel_normalize_pep440.bzl | 90 +++++++++++--------- 1 file changed, 48 insertions(+), 42 deletions(-) diff --git a/python/private/py_wheel_normalize_pep440.bzl b/python/private/py_wheel_normalize_pep440.bzl index 51ef80b1f6..0d5d280141 100644 --- a/python/private/py_wheel_normalize_pep440.bzl +++ b/python/private/py_wheel_normalize_pep440.bzl @@ -101,10 +101,16 @@ def _accept(self, predicate, value): return False +def _tail(self): + return self.contexts[-1] + +def _pop(self): + return self.contexts.pop() + def _new(version): """Create a new normalizer""" self = struct( - version = version.strip(), # PEP 440: Leading and Trailing Whitespace + version = version, contexts = [], ) public = struct( @@ -112,9 +118,10 @@ def _new(version): accept = mkmethod(self, _accept), close_context = mkmethod(self, _close_context), open_context = mkmethod(self, _open_context), + tail = mkmethod(self, _tail), + pop = mkmethod(self, _pop), # attributes: keep sorted - contexts = self.contexts, version = self.version, ) return public @@ -131,10 +138,10 @@ def accept_placeholder(parser): actually be valid. """ - context = parser.open_context(parser.contexts[-1]["start"]) + context = parser.open_context(parser.tail()["start"]) if not parser.accept(_is("{"), str): - parser.contexts.pop() + parser.pop() return False start = context["start"] @@ -143,7 +150,7 @@ def accept_placeholder(parser): break if not parser.accept(_is("}"), str): - parser.contexts.pop() + parser.pop() return False parser.close_context() @@ -152,7 +159,7 @@ def accept_placeholder(parser): def accept_digits(parser): """Accept multiple digits (or placeholders).""" - context = parser.open_context(parser.contexts[-1]["start"]) + context = parser.open_context(parser.tail()["start"]) start = context["start"] for i in range(start, len(parser.version) + 1): @@ -165,16 +172,16 @@ def accept_digits(parser): return True break - parser.contexts.pop() + parser.pop() return False def accept_string(parser, string, replacement): """Accept a `string` in the input. Output `replacement`.""" - context = parser.open_context(parser.contexts[-1]["start"]) + context = parser.open_context(parser.tail()["start"]) for character in string.elems(): if not parser.accept(_in([character, character.upper()]), ""): - parser.contexts.pop() + parser.pop() return False context["norm"] = replacement @@ -185,7 +192,7 @@ def accept_string(parser, string, replacement): def accept_alnum(parser): """Accept an alphanumeric sequence.""" - context = parser.open_context(parser.contexts[-1]["start"]) + context = parser.open_context(parser.tail()["start"]) start = context["start"] for i in range(start, len(parser.version) + 1): @@ -195,23 +202,23 @@ def accept_alnum(parser): return True break - parser.contexts.pop() + parser.pop() return False def accept_dot_number(parser): """Accept a dot followed by digits.""" - parser.open_context(parser.contexts[-1]["start"]) + parser.open_context(parser.tail()["start"]) if parser.accept(_is("."), ".") and accept_digits(parser): parser.close_context() return True else: - parser.contexts.pop() + parser.pop() return False def accept_dot_number_sequence(parser): """Accept a sequence of dot+digits.""" - context = parser.contexts[-1] + context = parser.tail() start = context["start"] i = start @@ -222,7 +229,7 @@ def accept_dot_number_sequence(parser): def accept_separator_alnum(parser): """Accept a separator followed by an alphanumeric string.""" - parser.open_context(parser.contexts[-1]["start"]) + parser.open_context(parser.tail()["start"]) # PEP 440: Local version segments if ( @@ -232,12 +239,12 @@ def accept_separator_alnum(parser): parser.close_context() return True - parser.contexts.pop() + parser.pop() return False def accept_separator_alnum_sequence(parser): """Accept a sequence of separator+alphanumeric.""" - context = parser.contexts[-1] + context = parser.tail() start = context["start"] i = start @@ -249,24 +256,24 @@ def accept_separator_alnum_sequence(parser): def accept_epoch(parser): """PEP 440: Version epochs.""" - context = parser.open_context(parser.contexts[-1]["start"]) + context = parser.open_context(parser.tail()["start"]) if accept_digits(parser) and parser.accept(_is("!"), "!"): if context["norm"] == "0!": - parser.contexts.pop() - parser.contexts[-1]["start"] = context["start"] + parser.pop() + parser.tail()["start"] = context["start"] else: parser.close_context() return True else: - parser.contexts.pop() + parser.pop() return False def accept_release(parser): """Accept the release segment, numbers separated by dots.""" - parser.open_context(parser.contexts[-1]["start"]) + parser.open_context(parser.tail()["start"]) if not accept_digits(parser): - parser.contexts.pop() + parser.pop() return False accept_dot_number_sequence(parser) @@ -275,7 +282,7 @@ def accept_release(parser): def accept_pre_l(parser): """PEP 440: Pre-release spelling.""" - parser.open_context(parser.contexts[-1]["start"]) + parser.open_context(parser.tail()["start"]) if ( accept_string(parser, "alpha", "a") or @@ -290,18 +297,18 @@ def accept_pre_l(parser): parser.close_context() return True else: - parser.contexts.pop() + parser.pop() return False def accept_prerelease(parser): """PEP 440: Pre-releases.""" - context = parser.open_context(parser.contexts[-1]["start"]) + context = parser.open_context(parser.tail()["start"]) # PEP 440: Pre-release separators parser.accept(_in(["-", "_", "."]), "") if not accept_pre_l(parser): - parser.contexts.pop() + parser.pop() return False parser.accept(_in(["-", "_", "."]), "") @@ -315,19 +322,19 @@ def accept_prerelease(parser): def accept_implicit_postrelease(parser): """PEP 440: Implicit post releases.""" - context = parser.open_context(parser.contexts[-1]["start"]) + context = parser.open_context(parser.tail()["start"]) if parser.accept(_is("-"), "") and accept_digits(parser): context["norm"] = ".post" + context["norm"] parser.close_context() return True - parser.contexts.pop() + parser.pop() return False def accept_explicit_postrelease(parser): """PEP 440: Post-releases.""" - context = parser.open_context(parser.contexts[-1]["start"]) + context = parser.open_context(parser.tail()["start"]) # PEP 440: Post release separators if not parser.accept(_in(["-", "_", "."]), "."): @@ -348,23 +355,23 @@ def accept_explicit_postrelease(parser): parser.close_context() return True - parser.contexts.pop() + parser.pop() return False def accept_postrelease(parser): """PEP 440: Post-releases.""" - parser.open_context(parser.contexts[-1]["start"]) + parser.open_context(parser.tail()["start"]) if accept_implicit_postrelease(parser) or accept_explicit_postrelease(parser): parser.close_context() return True - parser.contexts.pop() + parser.pop() return False def accept_devrelease(parser): """PEP 440: Developmental releases.""" - context = parser.open_context(parser.contexts[-1]["start"]) + context = parser.open_context(parser.tail()["start"]) # PEP 440: Development release separators if not parser.accept(_in(["-", "_", "."]), "."): @@ -380,19 +387,19 @@ def accept_devrelease(parser): parser.close_context() return True - parser.contexts.pop() + parser.pop() return False def accept_local(parser): """PEP 440: Local version identifiers.""" - parser.open_context(parser.contexts[-1]["start"]) + parser.open_context(parser.tail()["start"]) if parser.accept(_is("+"), "+") and accept_alnum(parser): accept_separator_alnum_sequence(parser) parser.close_context() return True - parser.contexts.pop() + parser.pop() return False def normalize_pep440(version): @@ -407,8 +414,7 @@ def normalize_pep440(version): Returns: string containing the normalized version. """ - - parser = _new(version) + parser = _new(version.strip()) # PEP 440: Leading and Trailing Whitespace parser.open_context(0) parser.accept(_is("v"), "") # PEP 440: Preceding v character accept_epoch(parser) @@ -417,9 +423,9 @@ def normalize_pep440(version): accept_postrelease(parser) accept_devrelease(parser) accept_local(parser) - if parser.version[parser.contexts[-1]["start"]:]: + if parser.version[parser.tail()["start"]:]: fail( "Failed to parse PEP 440 version identifier '%s'." % parser.version, - "Parse error at '%s'" % parser.version[parser.contexts[-1]["start"]:], + "Parse error at '%s'" % parser.version[parser.tail()["start"]:], ) - return parser.contexts[-1]["norm"] + return parser.tail()["norm"] From efc66d4a107f14fbb1040dbc13a8b1f56d120668 Mon Sep 17 00:00:00 2001 From: Ignas Anikevicius Date: Sun, 27 Aug 2023 11:47:21 +0900 Subject: [PATCH 13/30] refactor: move accept back to be a regular function --- python/private/py_wheel_normalize_pep440.bzl | 94 ++++++++++---------- 1 file changed, 46 insertions(+), 48 deletions(-) diff --git a/python/private/py_wheel_normalize_pep440.bzl b/python/private/py_wheel_normalize_pep440.bzl index 0d5d280141..04116913b1 100644 --- a/python/private/py_wheel_normalize_pep440.bzl +++ b/python/private/py_wheel_normalize_pep440.bzl @@ -64,7 +64,31 @@ def _close_context(self): self.contexts[-1]["norm"] += finished["norm"] self.contexts[-1]["start"] = finished["start"] -def _accept(self, predicate, value): +def _tail(self): + return self.contexts[-1] + +def _pop(self): + return self.contexts.pop() + +def _new(version): + """Create a new normalizer""" + self = struct( + version = version, + contexts = [], + ) + public = struct( + # methods: keep sorted + close_context = mkmethod(self, _close_context), + open_context = mkmethod(self, _open_context), + tail = mkmethod(self, _tail), + pop = mkmethod(self, _pop), + + # attributes: keep sorted + version = self.version, + ) + return public + +def accept(parser, predicate, value): """If `predicate` matches the next token, accept the token. Accepting the token means adding it (according to `value`) to @@ -73,7 +97,6 @@ def _accept(self, predicate, value): `version`. Args: - self: The normalizer. predicate: function taking a token and returning a boolean saying if we want to accept the token. value: the string to add if there's a match, or, if `value` @@ -84,12 +107,12 @@ def _accept(self, predicate, value): whether a token was accepted. """ - context = self.contexts[-1] + context = parser.tail() - if context["start"] >= len(self.version): + if context["start"] >= len(parser.version): return False - token = self.version[context["start"]] + token = parser.version[context["start"]] if predicate(token): if type(value) in ["function", "builtin_function_or_method"]: @@ -101,31 +124,6 @@ def _accept(self, predicate, value): return False -def _tail(self): - return self.contexts[-1] - -def _pop(self): - return self.contexts.pop() - -def _new(version): - """Create a new normalizer""" - self = struct( - version = version, - contexts = [], - ) - public = struct( - # methods: keep sorted - accept = mkmethod(self, _accept), - close_context = mkmethod(self, _close_context), - open_context = mkmethod(self, _open_context), - tail = mkmethod(self, _tail), - pop = mkmethod(self, _pop), - - # attributes: keep sorted - version = self.version, - ) - return public - def accept_placeholder(parser): """Accept a Bazel placeholder. @@ -140,16 +138,16 @@ def accept_placeholder(parser): """ context = parser.open_context(parser.tail()["start"]) - if not parser.accept(_is("{"), str): + if not accept(parser, _is("{"), str): parser.pop() return False start = context["start"] for _ in range(start, len(parser.version) + 1): - if not parser.accept(_is_not("}"), str): + if not accept(parser, _is_not("}"), str): break - if not parser.accept(_is("}"), str): + if not accept(parser, _is("}"), str): parser.pop() return False @@ -163,7 +161,7 @@ def accept_digits(parser): start = context["start"] for i in range(start, len(parser.version) + 1): - if not parser.accept(_isdigit, str) and not accept_placeholder(parser): + if not accept(parser, _isdigit, str) and not accept_placeholder(parser): if i - start >= 1: if context["norm"].isdigit(): # PEP 440: Integer Normalization @@ -180,7 +178,7 @@ def accept_string(parser, string, replacement): context = parser.open_context(parser.tail()["start"]) for character in string.elems(): - if not parser.accept(_in([character, character.upper()]), ""): + if not accept(parser, _in([character, character.upper()]), ""): parser.pop() return False @@ -196,7 +194,7 @@ def accept_alnum(parser): start = context["start"] for i in range(start, len(parser.version) + 1): - if not parser.accept(_isalnum, _lower) and not accept_placeholder(parser): + if not accept(parser, _isalnum, _lower) and not accept_placeholder(parser): if i - start >= 1: parser.close_context() return True @@ -209,7 +207,7 @@ def accept_dot_number(parser): """Accept a dot followed by digits.""" parser.open_context(parser.tail()["start"]) - if parser.accept(_is("."), ".") and accept_digits(parser): + if accept(parser, _is("."), ".") and accept_digits(parser): parser.close_context() return True else: @@ -233,7 +231,7 @@ def accept_separator_alnum(parser): # PEP 440: Local version segments if ( - parser.accept(_in([".", "-", "_"]), ".") and + accept(parser, _in([".", "-", "_"]), ".") and (accept_digits(parser) or accept_alnum(parser)) ): parser.close_context() @@ -257,7 +255,7 @@ def accept_separator_alnum_sequence(parser): def accept_epoch(parser): """PEP 440: Version epochs.""" context = parser.open_context(parser.tail()["start"]) - if accept_digits(parser) and parser.accept(_is("!"), "!"): + if accept_digits(parser) and accept(parser, _is("!"), "!"): if context["norm"] == "0!": parser.pop() parser.tail()["start"] = context["start"] @@ -305,13 +303,13 @@ def accept_prerelease(parser): context = parser.open_context(parser.tail()["start"]) # PEP 440: Pre-release separators - parser.accept(_in(["-", "_", "."]), "") + accept(parser, _in(["-", "_", "."]), "") if not accept_pre_l(parser): parser.pop() return False - parser.accept(_in(["-", "_", "."]), "") + accept(parser, _in(["-", "_", "."]), "") if not accept_digits(parser): # PEP 440: Implicit pre-release number @@ -324,7 +322,7 @@ def accept_implicit_postrelease(parser): """PEP 440: Implicit post releases.""" context = parser.open_context(parser.tail()["start"]) - if parser.accept(_is("-"), "") and accept_digits(parser): + if accept(parser, _is("-"), "") and accept_digits(parser): context["norm"] = ".post" + context["norm"] parser.close_context() return True @@ -337,7 +335,7 @@ def accept_explicit_postrelease(parser): context = parser.open_context(parser.tail()["start"]) # PEP 440: Post release separators - if not parser.accept(_in(["-", "_", "."]), "."): + if not accept(parser, _in(["-", "_", "."]), "."): context["norm"] += "." # PEP 440: Post release spelling @@ -346,7 +344,7 @@ def accept_explicit_postrelease(parser): accept_string(parser, "rev", "post") or accept_string(parser, "r", "post") ): - parser.accept(_in(["-", "_", "."]), "") + accept(parser, _in(["-", "_", "."]), "") if not accept_digits(parser): # PEP 440: Implicit post release number @@ -374,11 +372,11 @@ def accept_devrelease(parser): context = parser.open_context(parser.tail()["start"]) # PEP 440: Development release separators - if not parser.accept(_in(["-", "_", "."]), "."): + if not accept(parser, _in(["-", "_", "."]), "."): context["norm"] += "." if accept_string(parser, "dev", "dev"): - parser.accept(_in(["-", "_", "."]), "") + accept(parser, _in(["-", "_", "."]), "") if not accept_digits(parser): # PEP 440: Implicit development release number @@ -394,7 +392,7 @@ def accept_local(parser): """PEP 440: Local version identifiers.""" parser.open_context(parser.tail()["start"]) - if parser.accept(_is("+"), "+") and accept_alnum(parser): + if accept(parser, _is("+"), "+") and accept_alnum(parser): accept_separator_alnum_sequence(parser) parser.close_context() return True @@ -416,7 +414,7 @@ def normalize_pep440(version): """ parser = _new(version.strip()) # PEP 440: Leading and Trailing Whitespace parser.open_context(0) - parser.accept(_is("v"), "") # PEP 440: Preceding v character + accept(parser, _is("v"), "") # PEP 440: Preceding v character accept_epoch(parser) accept_release(parser) accept_prerelease(parser) From d2f6b74cec98e9eacb9f5d0537419c8af582a699 Mon Sep 17 00:00:00 2001 From: Ignas Anikevicius Date: Sun, 27 Aug 2023 12:00:05 +0900 Subject: [PATCH 14/30] refactor: rename context -> ctx --- python/private/py_wheel_normalize_pep440.bzl | 74 ++++++++++---------- 1 file changed, 37 insertions(+), 37 deletions(-) diff --git a/python/private/py_wheel_normalize_pep440.bzl b/python/private/py_wheel_normalize_pep440.bzl index 04116913b1..579a1c43a8 100644 --- a/python/private/py_wheel_normalize_pep440.bzl +++ b/python/private/py_wheel_normalize_pep440.bzl @@ -44,11 +44,11 @@ def _in(reference): return lambda token: token in reference def _open_context(self, start): - """Open an new parsing context. + """Open an new parsing ctx. If the current parsing step succeeds, call self.close_context(). If the current parsing step fails, call contexts.pop() to - go back to how it was before we opened a new context. + go back to how it was before we opened a new ctx. Args: self: The normalizer. @@ -59,7 +59,7 @@ def _open_context(self, start): return self.contexts[-1] def _close_context(self): - """Close the current context successfully and merge the results.""" + """Close the current ctx successfully and merge the results.""" finished = self.contexts.pop() self.contexts[-1]["norm"] += finished["norm"] self.contexts[-1]["start"] = finished["start"] @@ -92,8 +92,8 @@ def accept(parser, predicate, value): """If `predicate` matches the next token, accept the token. Accepting the token means adding it (according to `value`) to - the running results maintained in context["norm"] and - advancing the cursor in context["start"] to the next token in + the running results maintained in ctx["norm"] and + advancing the cursor in ctx["start"] to the next token in `version`. Args: @@ -107,19 +107,19 @@ def accept(parser, predicate, value): whether a token was accepted. """ - context = parser.tail() + ctx = parser.tail() - if context["start"] >= len(parser.version): + if ctx["start"] >= len(parser.version): return False - token = parser.version[context["start"]] + token = parser.version[ctx["start"]] if predicate(token): if type(value) in ["function", "builtin_function_or_method"]: value = value(token) - context["norm"] += value - context["start"] += 1 + ctx["norm"] += value + ctx["start"] += 1 return True return False @@ -136,13 +136,13 @@ def accept_placeholder(parser): actually be valid. """ - context = parser.open_context(parser.tail()["start"]) + ctx = parser.open_context(parser.tail()["start"]) if not accept(parser, _is("{"), str): parser.pop() return False - start = context["start"] + start = ctx["start"] for _ in range(start, len(parser.version) + 1): if not accept(parser, _is_not("}"), str): break @@ -157,15 +157,15 @@ def accept_placeholder(parser): def accept_digits(parser): """Accept multiple digits (or placeholders).""" - context = parser.open_context(parser.tail()["start"]) - start = context["start"] + ctx = parser.open_context(parser.tail()["start"]) + start = ctx["start"] for i in range(start, len(parser.version) + 1): if not accept(parser, _isdigit, str) and not accept_placeholder(parser): if i - start >= 1: - if context["norm"].isdigit(): + if ctx["norm"].isdigit(): # PEP 440: Integer Normalization - context["norm"] = str(int(context["norm"])) + ctx["norm"] = str(int(ctx["norm"])) parser.close_context() return True break @@ -175,14 +175,14 @@ def accept_digits(parser): def accept_string(parser, string, replacement): """Accept a `string` in the input. Output `replacement`.""" - context = parser.open_context(parser.tail()["start"]) + ctx = parser.open_context(parser.tail()["start"]) for character in string.elems(): if not accept(parser, _in([character, character.upper()]), ""): parser.pop() return False - context["norm"] = replacement + ctx["norm"] = replacement parser.close_context() return True @@ -190,8 +190,8 @@ def accept_string(parser, string, replacement): def accept_alnum(parser): """Accept an alphanumeric sequence.""" - context = parser.open_context(parser.tail()["start"]) - start = context["start"] + ctx = parser.open_context(parser.tail()["start"]) + start = ctx["start"] for i in range(start, len(parser.version) + 1): if not accept(parser, _isalnum, _lower) and not accept_placeholder(parser): @@ -216,8 +216,8 @@ def accept_dot_number(parser): def accept_dot_number_sequence(parser): """Accept a sequence of dot+digits.""" - context = parser.tail() - start = context["start"] + ctx = parser.tail() + start = ctx["start"] i = start for i in range(start, len(parser.version) + 1): @@ -242,8 +242,8 @@ def accept_separator_alnum(parser): def accept_separator_alnum_sequence(parser): """Accept a sequence of separator+alphanumeric.""" - context = parser.tail() - start = context["start"] + ctx = parser.tail() + start = ctx["start"] i = start for i in range(start, len(parser.version) + 1): @@ -254,11 +254,11 @@ def accept_separator_alnum_sequence(parser): def accept_epoch(parser): """PEP 440: Version epochs.""" - context = parser.open_context(parser.tail()["start"]) + ctx = parser.open_context(parser.tail()["start"]) if accept_digits(parser) and accept(parser, _is("!"), "!"): - if context["norm"] == "0!": + if ctx["norm"] == "0!": parser.pop() - parser.tail()["start"] = context["start"] + parser.tail()["start"] = ctx["start"] else: parser.close_context() return True @@ -300,7 +300,7 @@ def accept_pre_l(parser): def accept_prerelease(parser): """PEP 440: Pre-releases.""" - context = parser.open_context(parser.tail()["start"]) + ctx = parser.open_context(parser.tail()["start"]) # PEP 440: Pre-release separators accept(parser, _in(["-", "_", "."]), "") @@ -313,17 +313,17 @@ def accept_prerelease(parser): if not accept_digits(parser): # PEP 440: Implicit pre-release number - context["norm"] += "0" + ctx["norm"] += "0" parser.close_context() return True def accept_implicit_postrelease(parser): """PEP 440: Implicit post releases.""" - context = parser.open_context(parser.tail()["start"]) + ctx = parser.open_context(parser.tail()["start"]) if accept(parser, _is("-"), "") and accept_digits(parser): - context["norm"] = ".post" + context["norm"] + ctx["norm"] = ".post" + ctx["norm"] parser.close_context() return True @@ -332,11 +332,11 @@ def accept_implicit_postrelease(parser): def accept_explicit_postrelease(parser): """PEP 440: Post-releases.""" - context = parser.open_context(parser.tail()["start"]) + ctx = parser.open_context(parser.tail()["start"]) # PEP 440: Post release separators if not accept(parser, _in(["-", "_", "."]), "."): - context["norm"] += "." + ctx["norm"] += "." # PEP 440: Post release spelling if ( @@ -348,7 +348,7 @@ def accept_explicit_postrelease(parser): if not accept_digits(parser): # PEP 440: Implicit post release number - context["norm"] += "0" + ctx["norm"] += "0" parser.close_context() return True @@ -369,18 +369,18 @@ def accept_postrelease(parser): def accept_devrelease(parser): """PEP 440: Developmental releases.""" - context = parser.open_context(parser.tail()["start"]) + ctx = parser.open_context(parser.tail()["start"]) # PEP 440: Development release separators if not accept(parser, _in(["-", "_", "."]), "."): - context["norm"] += "." + ctx["norm"] += "." if accept_string(parser, "dev", "dev"): accept(parser, _in(["-", "_", "."]), "") if not accept_digits(parser): # PEP 440: Implicit development release number - context["norm"] += "0" + ctx["norm"] += "0" parser.close_context() return True From 03f6348a55576345ff0a0d4c9a6ea2aa378d7c4e Mon Sep 17 00:00:00 2001 From: Ignas Anikevicius Date: Sun, 27 Aug 2023 12:00:38 +0900 Subject: [PATCH 15/30] refactor: rename tail -> context --- python/private/py_wheel_normalize_pep440.bzl | 48 ++++++++++---------- 1 file changed, 24 insertions(+), 24 deletions(-) diff --git a/python/private/py_wheel_normalize_pep440.bzl b/python/private/py_wheel_normalize_pep440.bzl index 579a1c43a8..d476f20406 100644 --- a/python/private/py_wheel_normalize_pep440.bzl +++ b/python/private/py_wheel_normalize_pep440.bzl @@ -64,7 +64,7 @@ def _close_context(self): self.contexts[-1]["norm"] += finished["norm"] self.contexts[-1]["start"] = finished["start"] -def _tail(self): +def _context(self): return self.contexts[-1] def _pop(self): @@ -79,8 +79,8 @@ def _new(version): public = struct( # methods: keep sorted close_context = mkmethod(self, _close_context), + context = mkmethod(self, _context), open_context = mkmethod(self, _open_context), - tail = mkmethod(self, _tail), pop = mkmethod(self, _pop), # attributes: keep sorted @@ -107,7 +107,7 @@ def accept(parser, predicate, value): whether a token was accepted. """ - ctx = parser.tail() + ctx = parser.context() if ctx["start"] >= len(parser.version): return False @@ -136,7 +136,7 @@ def accept_placeholder(parser): actually be valid. """ - ctx = parser.open_context(parser.tail()["start"]) + ctx = parser.open_context(parser.context()["start"]) if not accept(parser, _is("{"), str): parser.pop() @@ -157,7 +157,7 @@ def accept_placeholder(parser): def accept_digits(parser): """Accept multiple digits (or placeholders).""" - ctx = parser.open_context(parser.tail()["start"]) + ctx = parser.open_context(parser.context()["start"]) start = ctx["start"] for i in range(start, len(parser.version) + 1): @@ -175,7 +175,7 @@ def accept_digits(parser): def accept_string(parser, string, replacement): """Accept a `string` in the input. Output `replacement`.""" - ctx = parser.open_context(parser.tail()["start"]) + ctx = parser.open_context(parser.context()["start"]) for character in string.elems(): if not accept(parser, _in([character, character.upper()]), ""): @@ -190,7 +190,7 @@ def accept_string(parser, string, replacement): def accept_alnum(parser): """Accept an alphanumeric sequence.""" - ctx = parser.open_context(parser.tail()["start"]) + ctx = parser.open_context(parser.context()["start"]) start = ctx["start"] for i in range(start, len(parser.version) + 1): @@ -205,7 +205,7 @@ def accept_alnum(parser): def accept_dot_number(parser): """Accept a dot followed by digits.""" - parser.open_context(parser.tail()["start"]) + parser.open_context(parser.context()["start"]) if accept(parser, _is("."), ".") and accept_digits(parser): parser.close_context() @@ -216,7 +216,7 @@ def accept_dot_number(parser): def accept_dot_number_sequence(parser): """Accept a sequence of dot+digits.""" - ctx = parser.tail() + ctx = parser.context() start = ctx["start"] i = start @@ -227,7 +227,7 @@ def accept_dot_number_sequence(parser): def accept_separator_alnum(parser): """Accept a separator followed by an alphanumeric string.""" - parser.open_context(parser.tail()["start"]) + parser.open_context(parser.context()["start"]) # PEP 440: Local version segments if ( @@ -242,7 +242,7 @@ def accept_separator_alnum(parser): def accept_separator_alnum_sequence(parser): """Accept a sequence of separator+alphanumeric.""" - ctx = parser.tail() + ctx = parser.context() start = ctx["start"] i = start @@ -254,11 +254,11 @@ def accept_separator_alnum_sequence(parser): def accept_epoch(parser): """PEP 440: Version epochs.""" - ctx = parser.open_context(parser.tail()["start"]) + ctx = parser.open_context(parser.context()["start"]) if accept_digits(parser) and accept(parser, _is("!"), "!"): if ctx["norm"] == "0!": parser.pop() - parser.tail()["start"] = ctx["start"] + parser.context()["start"] = ctx["start"] else: parser.close_context() return True @@ -268,7 +268,7 @@ def accept_epoch(parser): def accept_release(parser): """Accept the release segment, numbers separated by dots.""" - parser.open_context(parser.tail()["start"]) + parser.open_context(parser.context()["start"]) if not accept_digits(parser): parser.pop() @@ -280,7 +280,7 @@ def accept_release(parser): def accept_pre_l(parser): """PEP 440: Pre-release spelling.""" - parser.open_context(parser.tail()["start"]) + parser.open_context(parser.context()["start"]) if ( accept_string(parser, "alpha", "a") or @@ -300,7 +300,7 @@ def accept_pre_l(parser): def accept_prerelease(parser): """PEP 440: Pre-releases.""" - ctx = parser.open_context(parser.tail()["start"]) + ctx = parser.open_context(parser.context()["start"]) # PEP 440: Pre-release separators accept(parser, _in(["-", "_", "."]), "") @@ -320,7 +320,7 @@ def accept_prerelease(parser): def accept_implicit_postrelease(parser): """PEP 440: Implicit post releases.""" - ctx = parser.open_context(parser.tail()["start"]) + ctx = parser.open_context(parser.context()["start"]) if accept(parser, _is("-"), "") and accept_digits(parser): ctx["norm"] = ".post" + ctx["norm"] @@ -332,7 +332,7 @@ def accept_implicit_postrelease(parser): def accept_explicit_postrelease(parser): """PEP 440: Post-releases.""" - ctx = parser.open_context(parser.tail()["start"]) + ctx = parser.open_context(parser.context()["start"]) # PEP 440: Post release separators if not accept(parser, _in(["-", "_", "."]), "."): @@ -358,7 +358,7 @@ def accept_explicit_postrelease(parser): def accept_postrelease(parser): """PEP 440: Post-releases.""" - parser.open_context(parser.tail()["start"]) + parser.open_context(parser.context()["start"]) if accept_implicit_postrelease(parser) or accept_explicit_postrelease(parser): parser.close_context() @@ -369,7 +369,7 @@ def accept_postrelease(parser): def accept_devrelease(parser): """PEP 440: Developmental releases.""" - ctx = parser.open_context(parser.tail()["start"]) + ctx = parser.open_context(parser.context()["start"]) # PEP 440: Development release separators if not accept(parser, _in(["-", "_", "."]), "."): @@ -390,7 +390,7 @@ def accept_devrelease(parser): def accept_local(parser): """PEP 440: Local version identifiers.""" - parser.open_context(parser.tail()["start"]) + parser.open_context(parser.context()["start"]) if accept(parser, _is("+"), "+") and accept_alnum(parser): accept_separator_alnum_sequence(parser) @@ -421,9 +421,9 @@ def normalize_pep440(version): accept_postrelease(parser) accept_devrelease(parser) accept_local(parser) - if parser.version[parser.tail()["start"]:]: + if parser.version[parser.context()["start"]:]: fail( "Failed to parse PEP 440 version identifier '%s'." % parser.version, - "Parse error at '%s'" % parser.version[parser.tail()["start"]:], + "Parse error at '%s'" % parser.version[parser.context()["start"]:], ) - return parser.tail()["norm"] + return parser.context()["norm"] From dca427c19672abc1cd61aa126872336b0abacedf Mon Sep 17 00:00:00 2001 From: Ignas Anikevicius Date: Sun, 27 Aug 2023 12:14:38 +0900 Subject: [PATCH 16/30] refactor: drop parameters from open_context --- python/private/py_wheel_normalize_pep440.bzl | 41 +++++++++++--------- 1 file changed, 22 insertions(+), 19 deletions(-) diff --git a/python/private/py_wheel_normalize_pep440.bzl b/python/private/py_wheel_normalize_pep440.bzl index d476f20406..d9ada6b914 100644 --- a/python/private/py_wheel_normalize_pep440.bzl +++ b/python/private/py_wheel_normalize_pep440.bzl @@ -43,7 +43,10 @@ def _in(reference): """Predicate testing if a token is in the list `reference`.""" return lambda token: token in reference -def _open_context(self, start): +def _ctx(start): + return {"norm": "", "start": start} + +def _open_context(self): """Open an new parsing ctx. If the current parsing step succeeds, call self.close_context(). @@ -55,7 +58,7 @@ def _open_context(self, start): start: index into `version` indicating where the current parsing step starts. """ - self.contexts.append({"norm": "", "start": start}) + self.contexts.append(_ctx(_context(self)["start"])) return self.contexts[-1] def _close_context(self): @@ -74,8 +77,9 @@ def _new(version): """Create a new normalizer""" self = struct( version = version, - contexts = [], + contexts = [_ctx(0)], ) + public = struct( # methods: keep sorted close_context = mkmethod(self, _close_context), @@ -136,7 +140,7 @@ def accept_placeholder(parser): actually be valid. """ - ctx = parser.open_context(parser.context()["start"]) + ctx = parser.open_context() if not accept(parser, _is("{"), str): parser.pop() @@ -157,7 +161,7 @@ def accept_placeholder(parser): def accept_digits(parser): """Accept multiple digits (or placeholders).""" - ctx = parser.open_context(parser.context()["start"]) + ctx = parser.open_context() start = ctx["start"] for i in range(start, len(parser.version) + 1): @@ -175,7 +179,7 @@ def accept_digits(parser): def accept_string(parser, string, replacement): """Accept a `string` in the input. Output `replacement`.""" - ctx = parser.open_context(parser.context()["start"]) + ctx = parser.open_context() for character in string.elems(): if not accept(parser, _in([character, character.upper()]), ""): @@ -190,7 +194,7 @@ def accept_string(parser, string, replacement): def accept_alnum(parser): """Accept an alphanumeric sequence.""" - ctx = parser.open_context(parser.context()["start"]) + ctx = parser.open_context() start = ctx["start"] for i in range(start, len(parser.version) + 1): @@ -205,7 +209,7 @@ def accept_alnum(parser): def accept_dot_number(parser): """Accept a dot followed by digits.""" - parser.open_context(parser.context()["start"]) + parser.open_context() if accept(parser, _is("."), ".") and accept_digits(parser): parser.close_context() @@ -227,7 +231,7 @@ def accept_dot_number_sequence(parser): def accept_separator_alnum(parser): """Accept a separator followed by an alphanumeric string.""" - parser.open_context(parser.context()["start"]) + parser.open_context() # PEP 440: Local version segments if ( @@ -254,7 +258,7 @@ def accept_separator_alnum_sequence(parser): def accept_epoch(parser): """PEP 440: Version epochs.""" - ctx = parser.open_context(parser.context()["start"]) + ctx = parser.open_context() if accept_digits(parser) and accept(parser, _is("!"), "!"): if ctx["norm"] == "0!": parser.pop() @@ -268,7 +272,7 @@ def accept_epoch(parser): def accept_release(parser): """Accept the release segment, numbers separated by dots.""" - parser.open_context(parser.context()["start"]) + parser.open_context() if not accept_digits(parser): parser.pop() @@ -280,7 +284,7 @@ def accept_release(parser): def accept_pre_l(parser): """PEP 440: Pre-release spelling.""" - parser.open_context(parser.context()["start"]) + parser.open_context() if ( accept_string(parser, "alpha", "a") or @@ -300,7 +304,7 @@ def accept_pre_l(parser): def accept_prerelease(parser): """PEP 440: Pre-releases.""" - ctx = parser.open_context(parser.context()["start"]) + ctx = parser.open_context() # PEP 440: Pre-release separators accept(parser, _in(["-", "_", "."]), "") @@ -320,7 +324,7 @@ def accept_prerelease(parser): def accept_implicit_postrelease(parser): """PEP 440: Implicit post releases.""" - ctx = parser.open_context(parser.context()["start"]) + ctx = parser.open_context() if accept(parser, _is("-"), "") and accept_digits(parser): ctx["norm"] = ".post" + ctx["norm"] @@ -332,7 +336,7 @@ def accept_implicit_postrelease(parser): def accept_explicit_postrelease(parser): """PEP 440: Post-releases.""" - ctx = parser.open_context(parser.context()["start"]) + ctx = parser.open_context() # PEP 440: Post release separators if not accept(parser, _in(["-", "_", "."]), "."): @@ -358,7 +362,7 @@ def accept_explicit_postrelease(parser): def accept_postrelease(parser): """PEP 440: Post-releases.""" - parser.open_context(parser.context()["start"]) + parser.open_context() if accept_implicit_postrelease(parser) or accept_explicit_postrelease(parser): parser.close_context() @@ -369,7 +373,7 @@ def accept_postrelease(parser): def accept_devrelease(parser): """PEP 440: Developmental releases.""" - ctx = parser.open_context(parser.context()["start"]) + ctx = parser.open_context() # PEP 440: Development release separators if not accept(parser, _in(["-", "_", "."]), "."): @@ -390,7 +394,7 @@ def accept_devrelease(parser): def accept_local(parser): """PEP 440: Local version identifiers.""" - parser.open_context(parser.context()["start"]) + parser.open_context() if accept(parser, _is("+"), "+") and accept_alnum(parser): accept_separator_alnum_sequence(parser) @@ -413,7 +417,6 @@ def normalize_pep440(version): string containing the normalized version. """ parser = _new(version.strip()) # PEP 440: Leading and Trailing Whitespace - parser.open_context(0) accept(parser, _is("v"), "") # PEP 440: Preceding v character accept_epoch(parser) accept_release(parser) From fcfff02e6ca24bfac97241b959568c73b88ea504 Mon Sep 17 00:00:00 2001 From: Ignas Anikevicius Date: Sun, 27 Aug 2023 12:20:02 +0900 Subject: [PATCH 17/30] refactor: rename pop -> pop_context --- python/private/py_wheel_normalize_pep440.bzl | 38 ++++++++++---------- 1 file changed, 19 insertions(+), 19 deletions(-) diff --git a/python/private/py_wheel_normalize_pep440.bzl b/python/private/py_wheel_normalize_pep440.bzl index d9ada6b914..3c0035ffae 100644 --- a/python/private/py_wheel_normalize_pep440.bzl +++ b/python/private/py_wheel_normalize_pep440.bzl @@ -70,7 +70,7 @@ def _close_context(self): def _context(self): return self.contexts[-1] -def _pop(self): +def _pop_context(self): return self.contexts.pop() def _new(version): @@ -85,7 +85,7 @@ def _new(version): close_context = mkmethod(self, _close_context), context = mkmethod(self, _context), open_context = mkmethod(self, _open_context), - pop = mkmethod(self, _pop), + pop_context = mkmethod(self, _pop_context), # attributes: keep sorted version = self.version, @@ -143,7 +143,7 @@ def accept_placeholder(parser): ctx = parser.open_context() if not accept(parser, _is("{"), str): - parser.pop() + parser.pop_context() return False start = ctx["start"] @@ -152,7 +152,7 @@ def accept_placeholder(parser): break if not accept(parser, _is("}"), str): - parser.pop() + parser.pop_context() return False parser.close_context() @@ -174,7 +174,7 @@ def accept_digits(parser): return True break - parser.pop() + parser.pop_context() return False def accept_string(parser, string, replacement): @@ -183,7 +183,7 @@ def accept_string(parser, string, replacement): for character in string.elems(): if not accept(parser, _in([character, character.upper()]), ""): - parser.pop() + parser.pop_context() return False ctx["norm"] = replacement @@ -204,7 +204,7 @@ def accept_alnum(parser): return True break - parser.pop() + parser.pop_context() return False def accept_dot_number(parser): @@ -215,7 +215,7 @@ def accept_dot_number(parser): parser.close_context() return True else: - parser.pop() + parser.pop_context() return False def accept_dot_number_sequence(parser): @@ -241,7 +241,7 @@ def accept_separator_alnum(parser): parser.close_context() return True - parser.pop() + parser.pop_context() return False def accept_separator_alnum_sequence(parser): @@ -261,13 +261,13 @@ def accept_epoch(parser): ctx = parser.open_context() if accept_digits(parser) and accept(parser, _is("!"), "!"): if ctx["norm"] == "0!": - parser.pop() + parser.pop_context() parser.context()["start"] = ctx["start"] else: parser.close_context() return True else: - parser.pop() + parser.pop_context() return False def accept_release(parser): @@ -275,7 +275,7 @@ def accept_release(parser): parser.open_context() if not accept_digits(parser): - parser.pop() + parser.pop_context() return False accept_dot_number_sequence(parser) @@ -299,7 +299,7 @@ def accept_pre_l(parser): parser.close_context() return True else: - parser.pop() + parser.pop_context() return False def accept_prerelease(parser): @@ -310,7 +310,7 @@ def accept_prerelease(parser): accept(parser, _in(["-", "_", "."]), "") if not accept_pre_l(parser): - parser.pop() + parser.pop_context() return False accept(parser, _in(["-", "_", "."]), "") @@ -331,7 +331,7 @@ def accept_implicit_postrelease(parser): parser.close_context() return True - parser.pop() + parser.pop_context() return False def accept_explicit_postrelease(parser): @@ -357,7 +357,7 @@ def accept_explicit_postrelease(parser): parser.close_context() return True - parser.pop() + parser.pop_context() return False def accept_postrelease(parser): @@ -368,7 +368,7 @@ def accept_postrelease(parser): parser.close_context() return True - parser.pop() + parser.pop_context() return False def accept_devrelease(parser): @@ -389,7 +389,7 @@ def accept_devrelease(parser): parser.close_context() return True - parser.pop() + parser.pop_context() return False def accept_local(parser): @@ -401,7 +401,7 @@ def accept_local(parser): parser.close_context() return True - parser.pop() + parser.pop_context() return False def normalize_pep440(version): From ac27a62e4a16769d910a2f698224100b07932a7e Mon Sep 17 00:00:00 2001 From: Ignas Anikevicius Date: Sun, 27 Aug 2023 12:21:43 +0900 Subject: [PATCH 18/30] refactor: rename version -> input in the parser --- python/private/py_wheel_normalize_pep440.bzl | 26 ++++++++++---------- 1 file changed, 13 insertions(+), 13 deletions(-) diff --git a/python/private/py_wheel_normalize_pep440.bzl b/python/private/py_wheel_normalize_pep440.bzl index 3c0035ffae..2545913596 100644 --- a/python/private/py_wheel_normalize_pep440.bzl +++ b/python/private/py_wheel_normalize_pep440.bzl @@ -73,10 +73,10 @@ def _context(self): def _pop_context(self): return self.contexts.pop() -def _new(version): +def _new(input): """Create a new normalizer""" self = struct( - version = version, + input = input, contexts = [_ctx(0)], ) @@ -88,7 +88,7 @@ def _new(version): pop_context = mkmethod(self, _pop_context), # attributes: keep sorted - version = self.version, + input = self.input, ) return public @@ -113,10 +113,10 @@ def accept(parser, predicate, value): ctx = parser.context() - if ctx["start"] >= len(parser.version): + if ctx["start"] >= len(parser.input): return False - token = parser.version[ctx["start"]] + token = parser.input[ctx["start"]] if predicate(token): if type(value) in ["function", "builtin_function_or_method"]: @@ -147,7 +147,7 @@ def accept_placeholder(parser): return False start = ctx["start"] - for _ in range(start, len(parser.version) + 1): + for _ in range(start, len(parser.input) + 1): if not accept(parser, _is_not("}"), str): break @@ -164,7 +164,7 @@ def accept_digits(parser): ctx = parser.open_context() start = ctx["start"] - for i in range(start, len(parser.version) + 1): + for i in range(start, len(parser.input) + 1): if not accept(parser, _isdigit, str) and not accept_placeholder(parser): if i - start >= 1: if ctx["norm"].isdigit(): @@ -197,7 +197,7 @@ def accept_alnum(parser): ctx = parser.open_context() start = ctx["start"] - for i in range(start, len(parser.version) + 1): + for i in range(start, len(parser.input) + 1): if not accept(parser, _isalnum, _lower) and not accept_placeholder(parser): if i - start >= 1: parser.close_context() @@ -224,7 +224,7 @@ def accept_dot_number_sequence(parser): start = ctx["start"] i = start - for i in range(start, len(parser.version) + 1): + for i in range(start, len(parser.input) + 1): if not accept_dot_number(parser): break return i - start >= 1 @@ -250,7 +250,7 @@ def accept_separator_alnum_sequence(parser): start = ctx["start"] i = start - for i in range(start, len(parser.version) + 1): + for i in range(start, len(parser.input) + 1): if not accept_separator_alnum(parser): break @@ -424,9 +424,9 @@ def normalize_pep440(version): accept_postrelease(parser) accept_devrelease(parser) accept_local(parser) - if parser.version[parser.context()["start"]:]: + if parser.input[parser.context()["start"]:]: fail( - "Failed to parse PEP 440 version identifier '%s'." % parser.version, - "Parse error at '%s'" % parser.version[parser.context()["start"]:], + "Failed to parse PEP 440 version identifier '%s'." % parser.input, + "Parse error at '%s'" % parser.input[parser.context()["start"]:], ) return parser.context()["norm"] From 872a64895709ee214d12e161dc8af1b5e6428a25 Mon Sep 17 00:00:00 2001 From: Ignas Anikevicius Date: Sun, 27 Aug 2023 12:32:13 +0900 Subject: [PATCH 19/30] refactor: rename pop_context -> discard --- python/private/py_wheel_normalize_pep440.bzl | 40 ++++++++++---------- 1 file changed, 20 insertions(+), 20 deletions(-) diff --git a/python/private/py_wheel_normalize_pep440.bzl b/python/private/py_wheel_normalize_pep440.bzl index 2545913596..c65e0bf8ff 100644 --- a/python/private/py_wheel_normalize_pep440.bzl +++ b/python/private/py_wheel_normalize_pep440.bzl @@ -50,7 +50,7 @@ def _open_context(self): """Open an new parsing ctx. If the current parsing step succeeds, call self.close_context(). - If the current parsing step fails, call contexts.pop() to + If the current parsing step fails, call self.discard() to go back to how it was before we opened a new ctx. Args: @@ -70,7 +70,7 @@ def _close_context(self): def _context(self): return self.contexts[-1] -def _pop_context(self): +def _discard(self): return self.contexts.pop() def _new(input): @@ -84,8 +84,8 @@ def _new(input): # methods: keep sorted close_context = mkmethod(self, _close_context), context = mkmethod(self, _context), + discard = mkmethod(self, _discard), open_context = mkmethod(self, _open_context), - pop_context = mkmethod(self, _pop_context), # attributes: keep sorted input = self.input, @@ -143,7 +143,7 @@ def accept_placeholder(parser): ctx = parser.open_context() if not accept(parser, _is("{"), str): - parser.pop_context() + parser.discard() return False start = ctx["start"] @@ -152,7 +152,7 @@ def accept_placeholder(parser): break if not accept(parser, _is("}"), str): - parser.pop_context() + parser.discard() return False parser.close_context() @@ -174,7 +174,7 @@ def accept_digits(parser): return True break - parser.pop_context() + parser.discard() return False def accept_string(parser, string, replacement): @@ -183,7 +183,7 @@ def accept_string(parser, string, replacement): for character in string.elems(): if not accept(parser, _in([character, character.upper()]), ""): - parser.pop_context() + parser.discard() return False ctx["norm"] = replacement @@ -204,7 +204,7 @@ def accept_alnum(parser): return True break - parser.pop_context() + parser.discard() return False def accept_dot_number(parser): @@ -215,7 +215,7 @@ def accept_dot_number(parser): parser.close_context() return True else: - parser.pop_context() + parser.discard() return False def accept_dot_number_sequence(parser): @@ -241,7 +241,7 @@ def accept_separator_alnum(parser): parser.close_context() return True - parser.pop_context() + parser.discard() return False def accept_separator_alnum_sequence(parser): @@ -261,13 +261,13 @@ def accept_epoch(parser): ctx = parser.open_context() if accept_digits(parser) and accept(parser, _is("!"), "!"): if ctx["norm"] == "0!": - parser.pop_context() + parser.discard() parser.context()["start"] = ctx["start"] else: parser.close_context() return True else: - parser.pop_context() + parser.discard() return False def accept_release(parser): @@ -275,7 +275,7 @@ def accept_release(parser): parser.open_context() if not accept_digits(parser): - parser.pop_context() + parser.discard() return False accept_dot_number_sequence(parser) @@ -299,7 +299,7 @@ def accept_pre_l(parser): parser.close_context() return True else: - parser.pop_context() + parser.discard() return False def accept_prerelease(parser): @@ -310,7 +310,7 @@ def accept_prerelease(parser): accept(parser, _in(["-", "_", "."]), "") if not accept_pre_l(parser): - parser.pop_context() + parser.discard() return False accept(parser, _in(["-", "_", "."]), "") @@ -331,7 +331,7 @@ def accept_implicit_postrelease(parser): parser.close_context() return True - parser.pop_context() + parser.discard() return False def accept_explicit_postrelease(parser): @@ -357,7 +357,7 @@ def accept_explicit_postrelease(parser): parser.close_context() return True - parser.pop_context() + parser.discard() return False def accept_postrelease(parser): @@ -368,7 +368,7 @@ def accept_postrelease(parser): parser.close_context() return True - parser.pop_context() + parser.discard() return False def accept_devrelease(parser): @@ -389,7 +389,7 @@ def accept_devrelease(parser): parser.close_context() return True - parser.pop_context() + parser.discard() return False def accept_local(parser): @@ -401,7 +401,7 @@ def accept_local(parser): parser.close_context() return True - parser.pop_context() + parser.discard() return False def normalize_pep440(version): From 4237439e81c3b43e638cba749499a626a050c70a Mon Sep 17 00:00:00 2001 From: Ignas Anikevicius Date: Sun, 27 Aug 2023 12:33:31 +0900 Subject: [PATCH 20/30] refactor: rename close_context -> accept --- python/private/py_wheel_normalize_pep440.bzl | 36 ++++++++++---------- 1 file changed, 18 insertions(+), 18 deletions(-) diff --git a/python/private/py_wheel_normalize_pep440.bzl b/python/private/py_wheel_normalize_pep440.bzl index c65e0bf8ff..d88ceae855 100644 --- a/python/private/py_wheel_normalize_pep440.bzl +++ b/python/private/py_wheel_normalize_pep440.bzl @@ -49,7 +49,7 @@ def _ctx(start): def _open_context(self): """Open an new parsing ctx. - If the current parsing step succeeds, call self.close_context(). + If the current parsing step succeeds, call self.accept(). If the current parsing step fails, call self.discard() to go back to how it was before we opened a new ctx. @@ -61,7 +61,7 @@ def _open_context(self): self.contexts.append(_ctx(_context(self)["start"])) return self.contexts[-1] -def _close_context(self): +def _accept(self): """Close the current ctx successfully and merge the results.""" finished = self.contexts.pop() self.contexts[-1]["norm"] += finished["norm"] @@ -82,7 +82,7 @@ def _new(input): public = struct( # methods: keep sorted - close_context = mkmethod(self, _close_context), + accept = mkmethod(self, _accept), context = mkmethod(self, _context), discard = mkmethod(self, _discard), open_context = mkmethod(self, _open_context), @@ -155,7 +155,7 @@ def accept_placeholder(parser): parser.discard() return False - parser.close_context() + parser.accept() return True def accept_digits(parser): @@ -170,7 +170,7 @@ def accept_digits(parser): if ctx["norm"].isdigit(): # PEP 440: Integer Normalization ctx["norm"] = str(int(ctx["norm"])) - parser.close_context() + parser.accept() return True break @@ -188,7 +188,7 @@ def accept_string(parser, string, replacement): ctx["norm"] = replacement - parser.close_context() + parser.accept() return True def accept_alnum(parser): @@ -200,7 +200,7 @@ def accept_alnum(parser): for i in range(start, len(parser.input) + 1): if not accept(parser, _isalnum, _lower) and not accept_placeholder(parser): if i - start >= 1: - parser.close_context() + parser.accept() return True break @@ -212,7 +212,7 @@ def accept_dot_number(parser): parser.open_context() if accept(parser, _is("."), ".") and accept_digits(parser): - parser.close_context() + parser.accept() return True else: parser.discard() @@ -238,7 +238,7 @@ def accept_separator_alnum(parser): accept(parser, _in([".", "-", "_"]), ".") and (accept_digits(parser) or accept_alnum(parser)) ): - parser.close_context() + parser.accept() return True parser.discard() @@ -264,7 +264,7 @@ def accept_epoch(parser): parser.discard() parser.context()["start"] = ctx["start"] else: - parser.close_context() + parser.accept() return True else: parser.discard() @@ -279,7 +279,7 @@ def accept_release(parser): return False accept_dot_number_sequence(parser) - parser.close_context() + parser.accept() return True def accept_pre_l(parser): @@ -296,7 +296,7 @@ def accept_pre_l(parser): accept_string(parser, "pre", "rc") or accept_string(parser, "rc", "rc") ): - parser.close_context() + parser.accept() return True else: parser.discard() @@ -319,7 +319,7 @@ def accept_prerelease(parser): # PEP 440: Implicit pre-release number ctx["norm"] += "0" - parser.close_context() + parser.accept() return True def accept_implicit_postrelease(parser): @@ -328,7 +328,7 @@ def accept_implicit_postrelease(parser): if accept(parser, _is("-"), "") and accept_digits(parser): ctx["norm"] = ".post" + ctx["norm"] - parser.close_context() + parser.accept() return True parser.discard() @@ -354,7 +354,7 @@ def accept_explicit_postrelease(parser): # PEP 440: Implicit post release number ctx["norm"] += "0" - parser.close_context() + parser.accept() return True parser.discard() @@ -365,7 +365,7 @@ def accept_postrelease(parser): parser.open_context() if accept_implicit_postrelease(parser) or accept_explicit_postrelease(parser): - parser.close_context() + parser.accept() return True parser.discard() @@ -386,7 +386,7 @@ def accept_devrelease(parser): # PEP 440: Implicit development release number ctx["norm"] += "0" - parser.close_context() + parser.accept() return True parser.discard() @@ -398,7 +398,7 @@ def accept_local(parser): if accept(parser, _is("+"), "+") and accept_alnum(parser): accept_separator_alnum_sequence(parser) - parser.close_context() + parser.accept() return True parser.discard() From 1076fd7bb8a072f8144e065e1e6cf2bd1be95ec7 Mon Sep 17 00:00:00 2001 From: Ignas Anikevicius Date: Sun, 27 Aug 2023 12:38:13 +0900 Subject: [PATCH 21/30] refactor: use parser.discard and parser.accept methods --- python/private/py_wheel_normalize_pep440.bzl | 97 +++++++------------- 1 file changed, 34 insertions(+), 63 deletions(-) diff --git a/python/private/py_wheel_normalize_pep440.bzl b/python/private/py_wheel_normalize_pep440.bzl index d88ceae855..25fafe5b40 100644 --- a/python/private/py_wheel_normalize_pep440.bzl +++ b/python/private/py_wheel_normalize_pep440.bzl @@ -66,12 +66,14 @@ def _accept(self): finished = self.contexts.pop() self.contexts[-1]["norm"] += finished["norm"] self.contexts[-1]["start"] = finished["start"] + return True def _context(self): return self.contexts[-1] def _discard(self): - return self.contexts.pop() + self.contexts.pop() + return False def _new(input): """Create a new normalizer""" @@ -143,8 +145,7 @@ def accept_placeholder(parser): ctx = parser.open_context() if not accept(parser, _is("{"), str): - parser.discard() - return False + return parser.discard() start = ctx["start"] for _ in range(start, len(parser.input) + 1): @@ -152,11 +153,9 @@ def accept_placeholder(parser): break if not accept(parser, _is("}"), str): - parser.discard() - return False + return parser.discard() - parser.accept() - return True + return parser.accept() def accept_digits(parser): """Accept multiple digits (or placeholders).""" @@ -170,12 +169,10 @@ def accept_digits(parser): if ctx["norm"].isdigit(): # PEP 440: Integer Normalization ctx["norm"] = str(int(ctx["norm"])) - parser.accept() - return True + return parser.accept() break - parser.discard() - return False + return parser.discard() def accept_string(parser, string, replacement): """Accept a `string` in the input. Output `replacement`.""" @@ -183,13 +180,11 @@ def accept_string(parser, string, replacement): for character in string.elems(): if not accept(parser, _in([character, character.upper()]), ""): - parser.discard() - return False + return parser.discard() ctx["norm"] = replacement - parser.accept() - return True + return parser.accept() def accept_alnum(parser): """Accept an alphanumeric sequence.""" @@ -200,23 +195,19 @@ def accept_alnum(parser): for i in range(start, len(parser.input) + 1): if not accept(parser, _isalnum, _lower) and not accept_placeholder(parser): if i - start >= 1: - parser.accept() - return True + return parser.accept() break - parser.discard() - return False + return parser.discard() def accept_dot_number(parser): """Accept a dot followed by digits.""" parser.open_context() if accept(parser, _is("."), ".") and accept_digits(parser): - parser.accept() - return True + return parser.accept() else: - parser.discard() - return False + return parser.discard() def accept_dot_number_sequence(parser): """Accept a sequence of dot+digits.""" @@ -238,11 +229,9 @@ def accept_separator_alnum(parser): accept(parser, _in([".", "-", "_"]), ".") and (accept_digits(parser) or accept_alnum(parser)) ): - parser.accept() - return True + return parser.accept() - parser.discard() - return False + return parser.discard() def accept_separator_alnum_sequence(parser): """Accept a sequence of separator+alphanumeric.""" @@ -264,23 +253,19 @@ def accept_epoch(parser): parser.discard() parser.context()["start"] = ctx["start"] else: - parser.accept() - return True + return parser.accept() else: - parser.discard() - return False + return parser.discard() def accept_release(parser): """Accept the release segment, numbers separated by dots.""" parser.open_context() if not accept_digits(parser): - parser.discard() - return False + return parser.discard() accept_dot_number_sequence(parser) - parser.accept() - return True + return parser.accept() def accept_pre_l(parser): """PEP 440: Pre-release spelling.""" @@ -296,11 +281,9 @@ def accept_pre_l(parser): accept_string(parser, "pre", "rc") or accept_string(parser, "rc", "rc") ): - parser.accept() - return True + return parser.accept() else: - parser.discard() - return False + return parser.discard() def accept_prerelease(parser): """PEP 440: Pre-releases.""" @@ -310,8 +293,7 @@ def accept_prerelease(parser): accept(parser, _in(["-", "_", "."]), "") if not accept_pre_l(parser): - parser.discard() - return False + return parser.discard() accept(parser, _in(["-", "_", "."]), "") @@ -319,8 +301,7 @@ def accept_prerelease(parser): # PEP 440: Implicit pre-release number ctx["norm"] += "0" - parser.accept() - return True + return parser.accept() def accept_implicit_postrelease(parser): """PEP 440: Implicit post releases.""" @@ -328,11 +309,9 @@ def accept_implicit_postrelease(parser): if accept(parser, _is("-"), "") and accept_digits(parser): ctx["norm"] = ".post" + ctx["norm"] - parser.accept() - return True + return parser.accept() - parser.discard() - return False + return parser.discard() def accept_explicit_postrelease(parser): """PEP 440: Post-releases.""" @@ -354,22 +333,18 @@ def accept_explicit_postrelease(parser): # PEP 440: Implicit post release number ctx["norm"] += "0" - parser.accept() - return True + return parser.accept() - parser.discard() - return False + return parser.discard() def accept_postrelease(parser): """PEP 440: Post-releases.""" parser.open_context() if accept_implicit_postrelease(parser) or accept_explicit_postrelease(parser): - parser.accept() - return True + return parser.accept() - parser.discard() - return False + return parser.discard() def accept_devrelease(parser): """PEP 440: Developmental releases.""" @@ -386,11 +361,9 @@ def accept_devrelease(parser): # PEP 440: Implicit development release number ctx["norm"] += "0" - parser.accept() - return True + return parser.accept() - parser.discard() - return False + return parser.discard() def accept_local(parser): """PEP 440: Local version identifiers.""" @@ -398,11 +371,9 @@ def accept_local(parser): if accept(parser, _is("+"), "+") and accept_alnum(parser): accept_separator_alnum_sequence(parser) - parser.accept() - return True + return parser.accept() - parser.discard() - return False + return parser.discard() def normalize_pep440(version): """Escape the version component of a filename. From 2fb94570603bb5145a49bd44ffbaf86012ed9f63 Mon Sep 17 00:00:00 2001 From: Christian von Schultz Date: Mon, 28 Aug 2023 13:00:41 +0200 Subject: [PATCH 22/30] Make pep440 file accessible by docs targets //python/private:py_wheel.bzl depends on a new file, py_wheel_normalize_pep440.bzl, which therefore needs to be exported by the python/private package and listed in the //docs:packaging_bzl sources. --- docs/BUILD.bazel | 1 + python/private/BUILD.bazel | 1 + 2 files changed, 2 insertions(+) diff --git a/docs/BUILD.bazel b/docs/BUILD.bazel index 1fb4f81484..3205ca4fa1 100644 --- a/docs/BUILD.bazel +++ b/docs/BUILD.bazel @@ -79,6 +79,7 @@ bzl_library( "//python:packaging.bzl", "//python/private:py_package.bzl", "//python/private:py_wheel.bzl", + "//python/private:py_wheel_normalize_pep440.bzl", "//python/private:stamp.bzl", "//python/private:util.bzl", ], diff --git a/python/private/BUILD.bazel b/python/private/BUILD.bazel index 10af17e630..045db43089 100644 --- a/python/private/BUILD.bazel +++ b/python/private/BUILD.bazel @@ -92,6 +92,7 @@ exports_files( "coverage.patch", "py_package.bzl", "py_wheel.bzl", + "py_wheel_normalize_pep440.bzl", "reexports.bzl", "stamp.bzl", "util.bzl", From 2728e9fdff5b3b61cb45753f73996a1b451d6b75 Mon Sep 17 00:00:00 2001 From: Christian von Schultz Date: Mon, 28 Aug 2023 13:33:22 +0200 Subject: [PATCH 23/30] Fix branch with missing return in accept_epoch In the case where the epoch parsed to "0!", it should be reset to the empty string. The previous way of doing that forgot to return acceptance to the caller; the new way of doing it is clearer than manipulating the "start" of the context. --- python/private/py_wheel_normalize_pep440.bzl | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/python/private/py_wheel_normalize_pep440.bzl b/python/private/py_wheel_normalize_pep440.bzl index 25fafe5b40..cefc228fe7 100644 --- a/python/private/py_wheel_normalize_pep440.bzl +++ b/python/private/py_wheel_normalize_pep440.bzl @@ -250,10 +250,8 @@ def accept_epoch(parser): ctx = parser.open_context() if accept_digits(parser) and accept(parser, _is("!"), "!"): if ctx["norm"] == "0!": - parser.discard() - parser.context()["start"] = ctx["start"] - else: - return parser.accept() + ctx["norm"] = "" + return parser.accept() else: return parser.discard() From 2e57719f62ba3990d9819fb531a24b140825e5bc Mon Sep 17 00:00:00 2001 From: Christian von Schultz Date: Mon, 28 Aug 2023 13:34:59 +0200 Subject: [PATCH 24/30] Documentation fixes according to buildifier-lint The buildifier-lint checker wants arguments and return values to be documented. --- python/private/py_wheel_normalize_pep440.bzl | 154 ++++++++++++++++--- 1 file changed, 136 insertions(+), 18 deletions(-) diff --git a/python/private/py_wheel_normalize_pep440.bzl b/python/private/py_wheel_normalize_pep440.bzl index cefc228fe7..9566348987 100644 --- a/python/private/py_wheel_normalize_pep440.bzl +++ b/python/private/py_wheel_normalize_pep440.bzl @@ -55,8 +55,6 @@ def _open_context(self): Args: self: The normalizer. - start: index into `version` indicating where the current - parsing step starts. """ self.contexts.append(_ctx(_context(self)["start"])) return self.contexts[-1] @@ -103,6 +101,7 @@ def accept(parser, predicate, value): `version`. Args: + parser: The normalizer. predicate: function taking a token and returning a boolean saying if we want to accept the token. value: the string to add if there's a match, or, if `value` @@ -141,6 +140,11 @@ def accept_placeholder(parser): resulting wheel file name containing a placeholder will not actually be valid. + Args: + parser: The normalizer. + + Returns: + whether a placeholder was accepted. """ ctx = parser.open_context() @@ -158,7 +162,14 @@ def accept_placeholder(parser): return parser.accept() def accept_digits(parser): - """Accept multiple digits (or placeholders).""" + """Accept multiple digits (or placeholders). + + Args: + parser: The normalizer. + + Returns: + whether some digits (or placeholders) were accepted. + """ ctx = parser.open_context() start = ctx["start"] @@ -175,7 +186,16 @@ def accept_digits(parser): return parser.discard() def accept_string(parser, string, replacement): - """Accept a `string` in the input. Output `replacement`.""" + """Accept a `string` in the input. Output `replacement`. + + Args: + parser: The normalizer. + string: The string to search for in the parser input. + replacement: The normalized string to use if the string was found. + + Returns: + whether the string was accepted. + """ ctx = parser.open_context() for character in string.elems(): @@ -187,7 +207,14 @@ def accept_string(parser, string, replacement): return parser.accept() def accept_alnum(parser): - """Accept an alphanumeric sequence.""" + """Accept an alphanumeric sequence. + + Args: + parser: The normalizer. + + Returns: + whether an alphanumeric sequence was accepted. + """ ctx = parser.open_context() start = ctx["start"] @@ -201,7 +228,14 @@ def accept_alnum(parser): return parser.discard() def accept_dot_number(parser): - """Accept a dot followed by digits.""" + """Accept a dot followed by digits. + + Args: + parser: The normalizer. + + Returns: + whether a dot+digits pair was accepted. + """ parser.open_context() if accept(parser, _is("."), ".") and accept_digits(parser): @@ -210,7 +244,14 @@ def accept_dot_number(parser): return parser.discard() def accept_dot_number_sequence(parser): - """Accept a sequence of dot+digits.""" + """Accept a sequence of dot+digits. + + Args: + parser: The normalizer. + + Returns: + whether a sequence of dot+digits pairs was accepted. + """ ctx = parser.context() start = ctx["start"] i = start @@ -221,7 +262,14 @@ def accept_dot_number_sequence(parser): return i - start >= 1 def accept_separator_alnum(parser): - """Accept a separator followed by an alphanumeric string.""" + """Accept a separator followed by an alphanumeric string. + + Args: + parser: The normalizer. + + Returns: + whether a separator and an alphanumeric string were accepted. + """ parser.open_context() # PEP 440: Local version segments @@ -234,7 +282,14 @@ def accept_separator_alnum(parser): return parser.discard() def accept_separator_alnum_sequence(parser): - """Accept a sequence of separator+alphanumeric.""" + """Accept a sequence of separator+alphanumeric. + + Args: + parser: The normalizer. + + Returns: + whether a sequence of separator+alphanumerics was accepted. + """ ctx = parser.context() start = ctx["start"] i = start @@ -246,7 +301,14 @@ def accept_separator_alnum_sequence(parser): return i - start >= 1 def accept_epoch(parser): - """PEP 440: Version epochs.""" + """PEP 440: Version epochs. + + Args: + parser: The normalizer. + + Returns: + whether a PEP 440 epoch identifier was accepted. + """ ctx = parser.open_context() if accept_digits(parser) and accept(parser, _is("!"), "!"): if ctx["norm"] == "0!": @@ -256,7 +318,14 @@ def accept_epoch(parser): return parser.discard() def accept_release(parser): - """Accept the release segment, numbers separated by dots.""" + """Accept the release segment, numbers separated by dots. + + Args: + parser: The normalizer. + + Returns: + whether a release segment was accepted. + """ parser.open_context() if not accept_digits(parser): @@ -266,7 +335,14 @@ def accept_release(parser): return parser.accept() def accept_pre_l(parser): - """PEP 440: Pre-release spelling.""" + """PEP 440: Pre-release spelling. + + Args: + parser: The normalizer. + + Returns: + whether a prerelease keyword was accepted. + """ parser.open_context() if ( @@ -284,7 +360,14 @@ def accept_pre_l(parser): return parser.discard() def accept_prerelease(parser): - """PEP 440: Pre-releases.""" + """PEP 440: Pre-releases. + + Args: + parser: The normalizer. + + Returns: + whether a prerelease identifier was accepted. + """ ctx = parser.open_context() # PEP 440: Pre-release separators @@ -302,7 +385,14 @@ def accept_prerelease(parser): return parser.accept() def accept_implicit_postrelease(parser): - """PEP 440: Implicit post releases.""" + """PEP 440: Implicit post releases. + + Args: + parser: The normalizer. + + Returns: + whether an implicit postrelease identifier was accepted. + """ ctx = parser.open_context() if accept(parser, _is("-"), "") and accept_digits(parser): @@ -312,7 +402,14 @@ def accept_implicit_postrelease(parser): return parser.discard() def accept_explicit_postrelease(parser): - """PEP 440: Post-releases.""" + """PEP 440: Post-releases. + + Args: + parser: The normalizer. + + Returns: + whether an explicit postrelease identifier was accepted. + """ ctx = parser.open_context() # PEP 440: Post release separators @@ -336,7 +433,14 @@ def accept_explicit_postrelease(parser): return parser.discard() def accept_postrelease(parser): - """PEP 440: Post-releases.""" + """PEP 440: Post-releases. + + Args: + parser: The normalizer. + + Returns: + whether a postrelease identifier was accepted. + """ parser.open_context() if accept_implicit_postrelease(parser) or accept_explicit_postrelease(parser): @@ -345,7 +449,14 @@ def accept_postrelease(parser): return parser.discard() def accept_devrelease(parser): - """PEP 440: Developmental releases.""" + """PEP 440: Developmental releases. + + Args: + parser: The normalizer. + + Returns: + whether a developmental release identifier was accepted. + """ ctx = parser.open_context() # PEP 440: Development release separators @@ -364,7 +475,14 @@ def accept_devrelease(parser): return parser.discard() def accept_local(parser): - """PEP 440: Local version identifiers.""" + """PEP 440: Local version identifiers. + + Args: + parser: The normalizer. + + Returns: + whether a local version identifier was accepted. + """ parser.open_context() if accept(parser, _is("+"), "+") and accept_alnum(parser): From cd3f9054fcef3012996ba1d26cf933105f913808 Mon Sep 17 00:00:00 2001 From: Christian von Schultz Date: Mon, 28 Aug 2023 17:08:36 +0200 Subject: [PATCH 25/30] Put load of normalize_pep440 on a single line The reviewers apparently like long lines. Not sold on that, but contributors should follow the style of the projects they contribute to. --- tests/py_wheel/py_wheel_tests.bzl | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/tests/py_wheel/py_wheel_tests.bzl b/tests/py_wheel/py_wheel_tests.bzl index 5a8588e259..3c03a1b8e4 100644 --- a/tests/py_wheel/py_wheel_tests.bzl +++ b/tests/py_wheel/py_wheel_tests.bzl @@ -16,10 +16,7 @@ load("@rules_testing//lib:analysis_test.bzl", "analysis_test", "test_suite") load("@rules_testing//lib:util.bzl", rt_util = "util") load("//python:packaging.bzl", "py_wheel") -load( - "//python/private:py_wheel_normalize_pep440.bzl", - "normalize_pep440", -) # buildifier: disable=bzl-visibility +load("//python/private:py_wheel_normalize_pep440.bzl", "normalize_pep440") # buildifier: disable=bzl-visibility _basic_tests = [] _tests = [] From b2756dcaf9af2021717516ff926ae619b8d5f753 Mon Sep 17 00:00:00 2001 From: Christian von Schultz Date: Tue, 3 Oct 2023 15:55:09 +0200 Subject: [PATCH 26/30] Add feature flags normalizing version or name To avoid having breaking changes, add feature flags "incompatible_normalize_name" and "incompatible_normalize_version", which default to False for the time being. --- examples/wheel/BUILD.bazel | 38 ++++++++++++++++++++ examples/wheel/wheel_test.py | 40 ++++++++++++++++++++- python/private/py_wheel.bzl | 40 ++++++++++++++++++--- tools/wheelmaker.py | 67 +++++++++++++++++++++++++++++------- 4 files changed, 168 insertions(+), 17 deletions(-) diff --git a/examples/wheel/BUILD.bazel b/examples/wheel/BUILD.bazel index 1d9f2a700a..81422d37c3 100644 --- a/examples/wheel/BUILD.bazel +++ b/examples/wheel/BUILD.bazel @@ -54,6 +54,8 @@ py_wheel( testonly = True, # Set this to verify the generated .dist target doesn't break things # Package data. We're building "example_minimal_library-0.0.1-py3-none-any.whl" distribution = "example_minimal_library", + incompatible_normalize_name = True, + incompatible_normalize_version = True, python_tag = "py3", version = "0.0.1", deps = [ @@ -76,6 +78,8 @@ py_wheel( testonly = True, abi = "$(ABI)", distribution = "example_minimal_library", + incompatible_normalize_name = True, + incompatible_normalize_version = True, python_tag = "$(PYTHON_TAG)", toolchains = ["//examples/wheel:make_variable_tags"], version = "$(VERSION)", @@ -95,6 +99,8 @@ py_wheel( name = "minimal_with_py_library_with_stamp", # Package data. We're building "example_minimal_library-0.0.1-py3-none-any.whl" distribution = "example_minimal_library{BUILD_USER}", + incompatible_normalize_name = False, + incompatible_normalize_version = False, python_tag = "py3", stamp = 1, version = "0.1.{BUILD_TIMESTAMP}", @@ -123,6 +129,8 @@ py_wheel( name = "minimal_with_py_package", # Package data. We're building "example_minimal_package-0.0.1-py3-none-any.whl" distribution = "example_minimal_package", + incompatible_normalize_name = True, + incompatible_normalize_version = True, python_tag = "py3", version = "0.0.1", deps = [":example_pkg"], @@ -156,6 +164,8 @@ py_wheel( "//examples/wheel:README.md": "README", }, homepage = "www.example.com", + incompatible_normalize_name = True, + incompatible_normalize_version = True, license = "Apache 2.0", project_urls = { "Bug Tracker": "www.example.com/issues", @@ -177,6 +187,8 @@ py_wheel( entry_points = { "console_scripts": ["main = foo.bar:baz"], }, + incompatible_normalize_name = True, + incompatible_normalize_version = True, python_tag = "py3", strip_path_prefixes = [ "examples", @@ -191,6 +203,8 @@ py_wheel( name = "custom_package_root_multi_prefix", # Package data. We're building "custom_custom_package_root_multi_prefix-0.0.1-py3-none-any.whl" distribution = "example_custom_package_root_multi_prefix", + incompatible_normalize_name = True, + incompatible_normalize_version = True, python_tag = "py3", strip_path_prefixes = [ "examples/wheel/lib", @@ -206,6 +220,8 @@ py_wheel( name = "custom_package_root_multi_prefix_reverse_order", # Package data. We're building "custom_custom_package_root_multi_prefix_reverse_order-0.0.1-py3-none-any.whl" distribution = "example_custom_package_root_multi_prefix_reverse_order", + incompatible_normalize_name = True, + incompatible_normalize_version = True, python_tag = "py3", strip_path_prefixes = [ "examples/wheel", @@ -220,6 +236,8 @@ py_wheel( py_wheel( name = "python_requires_in_a_package", distribution = "example_python_requires_in_a_package", + incompatible_normalize_name = True, + incompatible_normalize_version = True, python_requires = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*", python_tag = "py3", version = "0.0.1", @@ -231,6 +249,8 @@ py_wheel( py_wheel( name = "use_rule_with_dir_in_outs", distribution = "use_rule_with_dir_in_outs", + incompatible_normalize_name = True, + incompatible_normalize_version = True, python_tag = "py3", version = "0.0.1", deps = [ @@ -244,6 +264,8 @@ py_wheel( name = "python_abi3_binary_wheel", abi = "abi3", distribution = "example_python_abi3_binary_wheel", + incompatible_normalize_name = True, + incompatible_normalize_version = True, # these platform strings must line up with test_python_abi3_binary_wheel() in wheel_test.py platform = select({ ":aarch64-apple-darwin": "macosx_11_0_arm64", @@ -257,6 +279,19 @@ py_wheel( version = "0.0.1", ) +py_wheel( + name = "legacy_filename_escaping", + # Per https://www.python.org/dev/peps/pep-0427/#escaping-and-unicode + # runs of non-alphanumeric, non-digit symbols should be replaced with a single underscore. + # Unicode non-ascii letters should *not* be replaced with underscore. + distribution = "file~~name-escaping", + incompatible_normalize_name = False, + incompatible_normalize_version = False, + python_tag = "py3", + version = "0.0.1-r7", + deps = [":example_pkg"], +) + py_wheel( name = "filename_escaping", # Per https://packaging.python.org/en/latest/specifications/binary-distribution-format/#escaping-and-unicode @@ -264,6 +299,8 @@ py_wheel( # Unicode non-ascii letters aren't allowed according to # https://packaging.python.org/en/latest/specifications/name-normalization/. distribution = "File--Name-Escaping", + incompatible_normalize_name = True, + incompatible_normalize_version = True, python_tag = "py3", version = "v0.0.1.RC1+ubuntu-r7", deps = [":example_pkg"], @@ -278,6 +315,7 @@ py_test( ":custom_package_root_multi_prefix_reverse_order", ":customized", ":filename_escaping", + ":legacy_filename_escaping", ":minimal_with_py_library", ":minimal_with_py_library_with_stamp", ":minimal_with_py_package", diff --git a/examples/wheel/wheel_test.py b/examples/wheel/wheel_test.py index aaee08eb6f..671bd8ad84 100644 --- a/examples/wheel/wheel_test.py +++ b/examples/wheel/wheel_test.py @@ -153,6 +153,44 @@ def test_customized_wheel(self): second = second.main:s""", ) + def test_legacy_filename_escaping(self): + filename = os.path.join( + os.environ['TEST_SRCDIR'], + 'rules_python', + 'examples', + 'wheel', + 'file_name_escaping-0.0.1_r7-py3-none-any.whl', + ) + with zipfile.ZipFile(filename) as zf: + self.assertEquals( + zf.namelist(), + [ + 'examples/wheel/lib/data.txt', + 'examples/wheel/lib/module_with_data.py', + 'examples/wheel/lib/simple_module.py', + 'examples/wheel/main.py', + # PEP calls for replacing only in the archive filename. + # Alas setuptools also escapes in the dist-info directory + # name, so let's be compatible. + 'file_name_escaping-0.0.1_r7.dist-info/WHEEL', + 'file_name_escaping-0.0.1_r7.dist-info/METADATA', + 'file_name_escaping-0.0.1_r7.dist-info/RECORD', + ], + ) + metadata_contents = zf.read( + 'file_name_escaping-0.0.1_r7.dist-info/METADATA' + ) + self.assertEquals( + metadata_contents, + b"""\ +Metadata-Version: 2.1 +Name: file~~name-escaping +Version: 0.0.1-r7 + +UNKNOWN +""", + ) + def test_filename_escaping(self): filename = os.path.join( os.environ["TEST_SRCDIR"], @@ -384,7 +422,7 @@ def test_rule_expands_workspace_status_keys_in_wheel_metadata(self): "rules_python", "examples", "wheel", - "example_minimal_library{build_user}-0.1.{BUILD_TIMESTAMP}-py3-none-any.whl", + "example_minimal_library_BUILD_USER_-0.1._BUILD_TIMESTAMP_-py3-none-any.whl", ) with zipfile.ZipFile(filename) as zf: diff --git a/python/private/py_wheel.bzl b/python/private/py_wheel.bzl index 9fbba2d776..76c49d4e03 100644 --- a/python/private/py_wheel.bzl +++ b/python/private/py_wheel.bzl @@ -118,6 +118,21 @@ See [`py_wheel_dist`](/docs/packaging.md#py_wheel_dist) for more info. ), } +_feature_flags = { + "incompatible_normalize_name": attr.bool( + default = False, + doc = "Normalize the package distribution name according to current " + + "standards.", + ), + "incompatible_normalize_version": attr.bool( + default = False, + doc = "Normalize the package version according to PEP440 standard. " + + "With this option set to True, if the user wants to pass any " + + "stamp variables, they have to be enclosed in '{}', e.g. " + + "'{BUILD_TIMESTAMP}'.", + ), +} + _requirement_attrs = { "extra_requires": attr.string_list_dict( doc = "List of optional requirements for this package", @@ -274,13 +289,25 @@ def _py_wheel_impl(ctx): python_tag = _replace_make_variables(ctx.attr.python_tag, ctx) version = _replace_make_variables(ctx.attr.version, ctx) - outfile = ctx.actions.declare_file("-".join([ - _escape_filename_distribution_name(ctx.attr.distribution), - normalize_pep440(version), + filename_segments = [] + + if ctx.attr.incompatible_normalize_name: + filename_segments.append(_escape_filename_distribution_name(ctx.attr.distribution)) + else: + filename_segments.append(_escape_filename_segment(ctx.attr.distribution)) + + if ctx.attr.incompatible_normalize_version: + filename_segments.append(normalize_pep440(version)) + else: + filename_segments.append(_escape_filename_segment(version)) + + filename_segments.extend([ _escape_filename_segment(python_tag), _escape_filename_segment(abi), _escape_filename_segment(ctx.attr.platform), - ]) + ".whl") + ]) + + outfile = ctx.actions.declare_file("-".join(filename_segments) + ".whl") name_file = ctx.actions.declare_file(ctx.label.name + ".name") @@ -309,6 +336,10 @@ def _py_wheel_impl(ctx): args.add("--out", outfile) args.add("--name_file", name_file) args.add_all(ctx.attr.strip_path_prefixes, format_each = "--strip_path_prefix=%s") + if ctx.attr.incompatible_normalize_name: + args.add("--incompatible_normalize_name") + if ctx.attr.incompatible_normalize_version: + args.add("--incompatible_normalize_version") # Pass workspace status files if stamping is enabled if is_stamping_enabled(ctx.attr): @@ -460,6 +491,7 @@ tries to locate `.runfiles` directory which is not packaged in the wheel. ), }, _distribution_attrs, + _feature_flags, _requirement_attrs, _entrypoint_attrs, _other_attrs, diff --git a/tools/wheelmaker.py b/tools/wheelmaker.py index 97eac20350..dce5406093 100644 --- a/tools/wheelmaker.py +++ b/tools/wheelmaker.py @@ -22,8 +22,6 @@ import zipfile from pathlib import Path -import packaging.version - def commonpath(path1, path2): ret = [] @@ -34,6 +32,16 @@ def commonpath(path1, path2): return os.path.sep.join(ret) +def escape_filename_segment(segment): + """Escapes a filename segment per https://www.python.org/dev/peps/pep-0427/#escaping-and-unicode + + This is a legacy function, kept for backwards compatibility, + and may be removed in the future. See `escape_filename_distribution_name` + and `normalize_pep440` for the modern alternatives. + """ + return re.sub(r"[^\w\d.]+", "_", segment, re.UNICODE) + + def normalize_package_name(name): """Normalize a package name according to the Python Packaging User Guide. @@ -68,6 +76,8 @@ def normalize_pep440(version): """ + import packaging.version + try: return str(packaging.version.Version(version)) except packaging.version.InvalidVersion: @@ -95,9 +105,11 @@ def __init__( platform, outfile=None, strip_path_prefixes=None, + incompatible_normalize_name=False, + incompatible_normalize_version=False, ): self._name = name - self._version = normalize_pep440(version) + self._version = version self._build_tag = build_tag self._python_tag = python_tag self._abi = abi @@ -107,12 +119,30 @@ def __init__( strip_path_prefixes if strip_path_prefixes is not None else [] ) - self._distinfo_dir = ( - escape_filename_distribution_name(self._name) - + "-" - + self._version - + ".dist-info/" - ) + if incompatible_normalize_version: + self._version = normalize_pep440(self._version) + self._escaped_version = self._version + else: + self._escaped_version = escape_filename_segment(self._version) + + if incompatible_normalize_name: + escaped_name = escape_filename_distribution_name(self._name) + self._distinfo_dir = ( + escaped_name + "-" + self._escaped_version + ".dist-info/" + ) + self._wheelname_fragment_distribution_name = escaped_name + else: + # The legacy behavior escapes the distinfo dir but not the + # wheel name. Enable incompatible_normalize_name to fix it. + # https://github.com/bazelbuild/rules_python/issues/1132 + self._distinfo_dir = ( + escape_filename_segment(self._name) + + "-" + + self._escaped_version + + ".dist-info/" + ) + self._wheelname_fragment_distribution_name = self._name + self._zipfile = None # Entries for the RECORD file as (filename, hash, size) tuples. self._record = [] @@ -129,7 +159,7 @@ def __exit__(self, type, value, traceback): def wheelname(self) -> str: components = [ - escape_filename_distribution_name(self._name), + self._wheelname_fragment_distribution_name, self._version, ] if self._build_tag: @@ -222,7 +252,7 @@ def add_metadata(self, metadata, name, description, version): # https://www.python.org/dev/peps/pep-0566/ # https://packaging.python.org/specifications/core-metadata/ metadata = re.sub("^Name: .*$", "Name: %s" % name, metadata, flags=re.MULTILINE) - metadata += "Version: %s\n\n" % normalize_pep440(version) + metadata += "Version: %s\n\n" % version # setuptools seems to insert UNKNOWN as description when none is # provided. metadata += description if description else "UNKNOWN" @@ -380,6 +410,10 @@ def parse_args() -> argparse.Namespace: help="Pass in the stamp info file for stamping", ) + feature_group = parser.add_argument_group("Feature flags") + feature_group.add_argument("--incompatible_normalize_name", action="store_true") + feature_group.add_argument("--incompatible_normalize_version", action="store_true") + return parser.parse_args(sys.argv[1:]) @@ -436,6 +470,8 @@ def main() -> None: platform=arguments.platform, outfile=arguments.out, strip_path_prefixes=strip_prefixes, + incompatible_normalize_name=arguments.incompatible_normalize_name, + incompatible_normalize_version=arguments.incompatible_normalize_version, ) as maker: for package_filename, real_filename in all_files: maker.add_file(package_filename, real_filename) @@ -460,8 +496,15 @@ def main() -> None: with open(arguments.metadata_file, "rt", encoding="utf-8") as metadata_file: metadata = metadata_file.read() + if arguments.incompatible_normalize_version: + version_in_metadata = normalize_pep440(version) + else: + version_in_metadata = version maker.add_metadata( - metadata=metadata, name=name, description=description, version=version + metadata=metadata, + name=name, + description=description, + version=version_in_metadata, ) if arguments.entry_points_file: From 943be1b72a9b41d91515e307ab2ae9e30dec7d60 Mon Sep 17 00:00:00 2001 From: Christian von Schultz Date: Tue, 3 Oct 2023 16:04:17 +0200 Subject: [PATCH 27/30] Add feature flags to documentation Add the "incompatible_normalize_version" and "incompatible_normalize_name" feature flags to the documentation. --- docs/packaging.md | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/docs/packaging.md b/docs/packaging.md index 0e8e110ef5..456f251c1f 100755 --- a/docs/packaging.md +++ b/docs/packaging.md @@ -59,8 +59,9 @@ This also has the advantage that stamping information is included in the wheel's
 py_wheel_rule(name, abi, author, author_email, classifiers, console_scripts, deps,
               description_content_type, description_file, distribution, entry_points,
-              extra_distinfo_files, extra_requires, homepage, license, platform, project_urls,
-              python_requires, python_tag, requires, stamp, strip_path_prefixes, summary, version)
+              extra_distinfo_files, extra_requires, homepage, incompatible_normalize_name,
+              incompatible_normalize_version, license, platform, project_urls, python_requires,
+              python_tag, requires, stamp, strip_path_prefixes, summary, version)
 
Internal rule used by the [py_wheel macro](/docs/packaging.md#py_wheel). @@ -89,6 +90,8 @@ in the way they expect. | extra_distinfo_files | Extra files to add to distinfo directory in the archive. | Dictionary: Label -> String | optional | {} | | extra_requires | List of optional requirements for this package | Dictionary: String -> List of strings | optional | {} | | homepage | A string specifying the URL for the package homepage. | String | optional | "" | +| incompatible_normalize_name | Normalize the package distribution name according to current standards. | Boolean | optional | False | +| incompatible_normalize_version | Normalize the package version according to PEP440 standard. With this option set to True, if the user wants to pass any stamp variables, they have to be enclosed in '{}', e.g. '{BUILD_TIMESTAMP}'. | Boolean | optional | False | | license | A string specifying the license of the package. | String | optional | "" | | platform | Supported platform. Use 'any' for pure-Python wheel.

If you have included platform-specific data, such as a .pyd or .so extension module, you will need to specify the platform in standard pip format. If you support multiple platforms, you can define platform constraints, then use a select() to specify the appropriate specifier, eg:

platform = select({ "//platforms:windows_x86_64": "win_amd64", "//platforms:macos_x86_64": "macosx_10_7_x86_64", "//platforms:linux_x86_64": "manylinux2014_x86_64", }) | String | optional | "any" | | project_urls | A string dict specifying additional browsable URLs for the project and corresponding labels, where label is the key and url is the value. e.g {{"Bug Tracker": "http://bitbucket.org/tarek/distribute/issues/"}} | Dictionary: String -> String | optional | {} | From 88c549bd6ec15a86cd8edadcc0f2debc4df6c149 Mon Sep 17 00:00:00 2001 From: Christian von Schultz Date: Wed, 4 Oct 2023 09:46:35 +0200 Subject: [PATCH 28/30] Update "incompatible_normalize_name" doc string Add links to the specifications. Note that we have support for placeholders. Co-authored-by: Ignas Anikevicius <240938+aignas@users.noreply.github.com> --- python/private/py_wheel.bzl | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/python/private/py_wheel.bzl b/python/private/py_wheel.bzl index 76c49d4e03..4152e08c18 100644 --- a/python/private/py_wheel.bzl +++ b/python/private/py_wheel.bzl @@ -121,8 +121,16 @@ See [`py_wheel_dist`](/docs/packaging.md#py_wheel_dist) for more info. _feature_flags = { "incompatible_normalize_name": attr.bool( default = False, - doc = "Normalize the package distribution name according to current " + - "standards.", + doc = """\ +Normalize the package distribution name according to latest +Python packaging standards. + +See https://packaging.python.org/en/latest/specifications/binary-distribution-format/#escaping-and-unicode +and https://packaging.python.org/en/latest/specifications/name-normalization/. + +Apart from the valid names according to the above, we also accept +'{' and '}', which may be used as placeholders for stamping. +""", ), "incompatible_normalize_version": attr.bool( default = False, From ea417905271dd9ad2ec01d26151f3468cab50a2d Mon Sep 17 00:00:00 2001 From: Christian von Schultz Date: Wed, 4 Oct 2023 09:50:30 +0200 Subject: [PATCH 29/30] Update feature flag description in documentation Just a `bazelisk run //docs:update` after having changed the description of "incompatible_normalize_name". --- docs/packaging.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/packaging.md b/docs/packaging.md index 456f251c1f..90c66dc1de 100755 --- a/docs/packaging.md +++ b/docs/packaging.md @@ -90,7 +90,7 @@ in the way they expect. | extra_distinfo_files | Extra files to add to distinfo directory in the archive. | Dictionary: Label -> String | optional | {} | | extra_requires | List of optional requirements for this package | Dictionary: String -> List of strings | optional | {} | | homepage | A string specifying the URL for the package homepage. | String | optional | "" | -| incompatible_normalize_name | Normalize the package distribution name according to current standards. | Boolean | optional | False | +| incompatible_normalize_name | Normalize the package distribution name according to latest Python packaging standards.

See https://packaging.python.org/en/latest/specifications/binary-distribution-format/#escaping-and-unicode and https://packaging.python.org/en/latest/specifications/name-normalization/.

Apart from the valid names according to the above, we also accept '{' and '}', which may be used as placeholders for stamping. | Boolean | optional | False | | incompatible_normalize_version | Normalize the package version according to PEP440 standard. With this option set to True, if the user wants to pass any stamp variables, they have to be enclosed in '{}', e.g. '{BUILD_TIMESTAMP}'. | Boolean | optional | False | | license | A string specifying the license of the package. | String | optional | "" | | platform | Supported platform. Use 'any' for pure-Python wheel.

If you have included platform-specific data, such as a .pyd or .so extension module, you will need to specify the platform in standard pip format. If you support multiple platforms, you can define platform constraints, then use a select() to specify the appropriate specifier, eg:

platform = select({ "//platforms:windows_x86_64": "win_amd64", "//platforms:macos_x86_64": "macosx_10_7_x86_64", "//platforms:linux_x86_64": "manylinux2014_x86_64", }) | String | optional | "any" | From 08d555b64b4ce7ede3a86f855f2a39cd6851793d Mon Sep 17 00:00:00 2001 From: Christian von Schultz Date: Thu, 5 Oct 2023 14:59:02 +0200 Subject: [PATCH 30/30] Add an entry to the change log Add a note about the two new feature flags in the change log: incompatible_normalize_name and incompatible_normalize_version. --- CHANGELOG.md | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 59bdac1b06..3c421a9d33 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -55,6 +55,14 @@ A brief description of the categories of changes: authentication against private HTTP hosts serving Python toolchain binaries. * `//python:packaging_bzl` added, a `bzl_library` for the Starlark files `//python:packaging.bzl` requires. +* (py_wheel) Added the `incompatible_normalize_name` feature flag to + normalize the package distribution name according to latest Python + packaging standards. Defaults to `False` for the time being. +* (py_wheel) Added the `incompatible_normalize_version` feature flag + to normalize the package version according to PEP440 standard. This + also adds support for local version specifiers (versions with a `+` + in them), in accordance with PEP440. Defaults to `False` for the + time being. ### Removed