From d4ecd4c744eb5d633c6e1853f853b5c34f29c315 Mon Sep 17 00:00:00 2001 From: Cory Paik Date: Sun, 28 Aug 2022 22:20:05 -0600 Subject: [PATCH] feat: add alias generation for pip parse Generate alias packages and targets for `pip_parse` under the parent repository. This serves two purposes. It allows for users to reference packages using a simpler `@pypi//package_name` without the import issues associated with putting the contents of the package there (as may be the case with `pip_install` following this naming pattern). Additionally, this allows users to override individual libraries either by default or based on select statements. --- docs/pip.md | 22 ++++ docs/pip_repository.md | 30 +++++- examples/pip_parse/BUILD | 1 + examples/pip_parse/WORKSPACE | 16 ++- examples/pip_parse_vendored/requirements.bzl | 2 +- python/pip.bzl | 3 +- python/pip_install/extract_wheels/bazel.py | 12 ++- .../extract_wheels/extract_single_wheel.py | 1 + .../parse_requirements_to_bzl.py | 102 +++++++++++++++++- .../parse_requirements_to_bzl_test.py | 5 +- .../extract_wheels/whl_filegroup_test.py | 5 +- python/pip_install/pip_repository.bzl | 34 ++++++ 12 files changed, 221 insertions(+), 12 deletions(-) diff --git a/docs/pip.md b/docs/pip.md index fc38f0fea3..33cb378f5b 100644 --- a/docs/pip.md +++ b/docs/pip.md @@ -71,6 +71,28 @@ Annotations to apply to the BUILD file content from package generated from a `pi str: A json encoded string of the provided content. + + +## package_overrides + +
+package_overrides(overrides)
+
+ +Overrides apply to alias rules generated from a `pip_repository` rule. + +**PARAMETERS** + + +| Name | Description | Default Value | +| :------------- | :------------- | :------------- | +| overrides | Dictionary mapping package names to either alternative default targets, or selection dictionaries. In this case, the dictionary will be merged with the default generated by pip_repository. | none | + +**RETURNS** + +str: A json encoded string of the provided overrides. + + ## pip_install diff --git a/docs/pip_repository.md b/docs/pip_repository.md index ae9100a315..dbd9adb0a7 100644 --- a/docs/pip_repository.md +++ b/docs/pip_repository.md @@ -8,9 +8,10 @@
 pip_repository(name, annotations, bzlmod, download_only, enable_implicit_namespace_pkgs,
-               environment, extra_pip_args, isolated, pip_data_exclude, python_interpreter,
-               python_interpreter_target, quiet, repo_mapping, repo_prefix, requirements_darwin,
-               requirements_linux, requirements_lock, requirements_windows, timeout)
+               environment, extra_pip_args, isolated, library_overrides, pip_data_exclude,
+               python_interpreter, python_interpreter_target, quiet, repo_mapping, repo_prefix,
+               requirements_darwin, requirements_linux, requirements_lock, requirements_windows,
+               timeout)
 
A rule for importing `requirements.txt` dependencies into Bazel. @@ -66,6 +67,7 @@ py_binary( | environment | Environment variables to set in the pip subprocess. Can be used to set common variables such as http_proxy, https_proxy and no_proxy Note that pip is run with "--isolated" on the CLI so PIP_<VAR>_<NAME> style env vars are ignored, but env vars that control requests and urllib3 can be passed. | Dictionary: String -> String | optional | {} | | extra_pip_args | Extra arguments to pass on to pip. Must not contain spaces. | List of strings | optional | [] | | isolated | Whether or not to pass the [--isolated](https://pip.pypa.io/en/stable/cli/pip/#cmdoption-isolated) flag to the underlying pip command. Alternatively, the RULES_PYTHON_PIP_ISOLATED enviornment varaible can be used to control this flag. | Boolean | optional | True | +| library_overrides | Optional library overrides to apply to packages. | String | optional | "" | | pip_data_exclude | Additional data exclusion parameters to add to the pip packages BUILD file. | List of strings | optional | [] | | python_interpreter | The python interpreter to use. This can either be an absolute path or the name of a binary found on the host's PATH environment variable. If no value is set python3 is defaulted for Unix systems and python.exe for Windows. | String | optional | "" | | python_interpreter_target | If you are using a custom python interpreter built by another repository rule, use this attribute to specify its BUILD target. This allows pip_repository to invoke pip using the same interpreter as your toolchain. If set, takes precedence over python_interpreter. | Label | optional | None | @@ -170,6 +172,28 @@ Annotations to apply to the BUILD file content from package generated from a `pi str: A json encoded string of the provided content. + + +## package_overrides + +
+package_overrides(overrides)
+
+ +Overrides apply to alias rules generated from a `pip_repository` rule. + +**PARAMETERS** + + +| Name | Description | Default Value | +| :------------- | :------------- | :------------- | +| overrides | Dictionary mapping package names to either alternative default targets, or selection dictionaries. In this case, the dictionary will be merged with the default generated by pip_repository. | none | + +**RETURNS** + +str: A json encoded string of the provided overrides. + + ## use_isolated diff --git a/examples/pip_parse/BUILD b/examples/pip_parse/BUILD index 1b6ba55f5e..507c10edb7 100644 --- a/examples/pip_parse/BUILD +++ b/examples/pip_parse/BUILD @@ -81,6 +81,7 @@ py_test( ":yamllint", data_requirement("s3cmd"), dist_info_requirement("requests"), + "@pypi//yamllint", ], env = { "WHEEL_DATA_CONTENTS": "$(rootpaths {})".format(data_requirement("s3cmd")), diff --git a/examples/pip_parse/WORKSPACE b/examples/pip_parse/WORKSPACE index e96db9f844..d66edd5a9e 100644 --- a/examples/pip_parse/WORKSPACE +++ b/examples/pip_parse/WORKSPACE @@ -13,7 +13,7 @@ python_register_toolchains( ) load("@python39//:defs.bzl", "interpreter") -load("@rules_python//python:pip.bzl", "pip_parse") +load("@rules_python//python:pip.bzl", "package_overrides", "pip_parse") pip_parse( # (Optional) You can set an environment in the pip process to control its @@ -22,6 +22,20 @@ pip_parse( # can be passed # environment = {"HTTPS_PROXY": "http://my.proxy.fun/"}, name = "pypi", + + # Pip parse uses alias packages to create targets configurable aliases such as @pypi//package. + # You can then override these packages, either by default or in certain cases (under the hood, this uses select). + # Note that this will override it for everyone who uses @pypi//mypackage, including other pip packages. This + # will not affect callers who use @pypi_mypackage//:pkg directly. + library_overrides = package_overrides({ + # Shorthand to override the default. + # "pip_package_1" : "//:my_pip_package_1", + # # Any valid select would work two (default not required). + # "pip_package_2" : { + # "//:mycondition": "//:my_pip_package_2", + # } + }), + # (Optional) You can provide extra parameters to pip. # Here, make pip output verbose (this is usable with `quiet = False`). # extra_pip_args = ["-v"], diff --git a/examples/pip_parse_vendored/requirements.bzl b/examples/pip_parse_vendored/requirements.bzl index 5a0dcf85f5..dbcb78a93d 100644 --- a/examples/pip_parse_vendored/requirements.bzl +++ b/examples/pip_parse_vendored/requirements.bzl @@ -7,7 +7,7 @@ from //:requirements.txt load("@python39//:defs.bzl", "interpreter") load("@rules_python//python/pip_install:pip_repository.bzl", "whl_library") -all_requirements = ["@pip_certifi//:pkg", "@pip_charset_normalizer//:pkg", "@pip_idna//:pkg", "@pip_requests//:pkg", "@pip_urllib3//:pkg"] +all_requirements = ["@pip//certifi", "@pip//charset_normalizer", "@pip//idna", "@pip//requests", "@pip//urllib3"] all_whl_requirements = ["@pip_certifi//:whl", "@pip_charset_normalizer//:whl", "@pip_idna//:whl", "@pip_requests//:whl", "@pip_urllib3//:whl"] diff --git a/python/pip.bzl b/python/pip.bzl index 02ea538c38..bfdffab204 100644 --- a/python/pip.bzl +++ b/python/pip.bzl @@ -13,12 +13,13 @@ # limitations under the License. """Import pip requirements into Bazel.""" -load("//python/pip_install:pip_repository.bzl", "pip_repository", _package_annotation = "package_annotation") +load("//python/pip_install:pip_repository.bzl", "pip_repository", _package_annotation = "package_annotation", _package_overrides = "package_overrides") load("//python/pip_install:repositories.bzl", "pip_install_dependencies") load("//python/pip_install:requirements.bzl", _compile_pip_requirements = "compile_pip_requirements") compile_pip_requirements = _compile_pip_requirements package_annotation = _package_annotation +package_overrides = _package_overrides def pip_install(requirements = None, name = "pip", **kwargs): """Accepts a locked/compiled requirements file and installs the dependencies listed within. diff --git a/python/pip_install/extract_wheels/bazel.py b/python/pip_install/extract_wheels/bazel.py index 013e4a23e1..4b58c4b3fa 100644 --- a/python/pip_install/extract_wheels/bazel.py +++ b/python/pip_install/extract_wheels/bazel.py @@ -326,6 +326,10 @@ def sanitised_repo_file_label(whl_name: str, repo_prefix: str) -> str: ) +def sanitised_alias_repo_library_label(repo: str, name: str) -> str: + return '"@{}//{}"'.format(sanitise_name(repo, ""), sanitise_name(name, "")) + + def extract_wheel( wheel_file: str, extras: Dict[str, Set[str]], @@ -335,6 +339,7 @@ def extract_wheel( incremental: bool = False, incremental_dir: Path = Path("."), annotation: Optional[annotation.Annotation] = None, + parent_repo_name: Optional[str] = None, ) -> Optional[str]: """Extracts wheel into given directory and creates py_library and filegroup targets. @@ -347,6 +352,7 @@ def extract_wheel( effects the names of libraries and their dependencies, which point to other external repositories. incremental_dir: An optional override for the working directory of incremental builds. annotation: An optional set of annotations to apply to the BUILD contents of the wheel. + parent_repo_name: The parent repo name, required when `incremental=True` and ignored otherwise. Returns: The Bazel label for the extracted wheel, in the form '//path/to/wheel'. @@ -373,8 +379,12 @@ def extract_wheel( whl_deps = sorted(whl.dependencies(extras_requested) - self_edge_dep) if incremental: + assert ( + parent_repo_name is not None + ), '"parent_repo_name" is required when incremental=True.' sanitised_dependencies = [ - sanitised_repo_library_label(d, repo_prefix=repo_prefix) for d in whl_deps + sanitised_alias_repo_library_label(repo=parent_repo_name, name=d) + for d in whl_deps ] sanitised_wheel_file_dependencies = [ sanitised_repo_file_label(d, repo_prefix=repo_prefix) for d in whl_deps diff --git a/python/pip_install/extract_wheels/extract_single_wheel.py b/python/pip_install/extract_wheels/extract_single_wheel.py index ff64291024..c43d7186cc 100644 --- a/python/pip_install/extract_wheels/extract_single_wheel.py +++ b/python/pip_install/extract_wheels/extract_single_wheel.py @@ -98,6 +98,7 @@ def main() -> None: incremental=True, repo_prefix=args.repo_prefix, annotation=args.annotation, + parent_repo_name=args.repo, ) diff --git a/python/pip_install/extract_wheels/parse_requirements_to_bzl.py b/python/pip_install/extract_wheels/parse_requirements_to_bzl.py index 686a57d8b2..acf0d568aa 100644 --- a/python/pip_install/extract_wheels/parse_requirements_to_bzl.py +++ b/python/pip_install/extract_wheels/parse_requirements_to_bzl.py @@ -1,8 +1,10 @@ import argparse import json +import os import shlex import sys import textwrap +import warnings from pathlib import Path from typing import Any, Dict, List, TextIO, Tuple @@ -86,6 +88,7 @@ def parse_whl_library_args(args: argparse.Namespace) -> Dict[str, Any]: "requirements_lock_label", "annotations", "bzlmod", + "library_overrides", ): if arg in whl_library_args: whl_library_args.pop(arg) @@ -93,10 +96,85 @@ def parse_whl_library_args(args: argparse.Namespace) -> Dict[str, Any]: return whl_library_args +def _create_alias_package( + alias_name: str, + py_library_selection: Dict[str, str], +): + # The directory name is just the sanitized name. + os.mkdir(alias_name) + + py_library_selection = json.dumps(py_library_selection) + + build_file_content = textwrap.dedent( + """\ + + package(default_visibility = ["//visibility:public"]) + + alias( + name = "{py_library_label}", + actual = select({py_library_selection}), + ) + """.format( + py_library_label=alias_name, + py_library_selection=py_library_selection, + ) + ) + + with open(os.path.join(alias_name, "BUILD.bazel"), "w", encoding="utf-8") as f: + f.write(build_file_content) + + +def create_alias_packages( + repo_prefix: str, + install_requirements: List[Tuple[InstallRequirement, str]], + library_overrides: Dict[str, Dict[str, str]], +): + """Create alias packages and targets for each requirement.""" + for ir, _ in install_requirements: + # The 'actual' library is the one in the incrementally fetched repo. + # We need to strip the quotes here as we will encode w/ json. + actual_library = bazel.sanitised_repo_library_label( + ir.name, repo_prefix=repo_prefix + ) + actual_library = actual_library.replace('"', "") + + alias_name = bazel.sanitise_name(ir.name, "") + + # Apply any overrides on top. We pop the keys here so we can report + # unused libraries to the user. Currently this only accepts overrides + # which use the alias (sanitized) name. + py_library_selection = { + "//conditions:default": actual_library, + **library_overrides.pop(alias_name, {}), + } + + _create_alias_package( + alias_name=alias_name, py_library_selection=py_library_selection + ) + + # By default, warn about overrides which aren't present in the requirements + # file, but create repos for these overrides anyway. This is useful in cases + # where the actual combination of libraries is unsupported but it works with + # the user's custom override(s). + # TODO(corypaik): Parameterize this behavior so that the users can whitelist + # specific libraries or throw an error instead. + if len(library_overrides) > 0: + warnings.warn( + "Ignoring library overrides for packages not present in the " + f"requirements file: {library_overrides}" + ) + + for alias_name, py_library_selection in library_overrides.items(): + _create_alias_package( + alias_name=alias_name, py_library_selection=py_library_selection + ) + + def generate_parsed_requirements_contents( requirements_lock: Path, repo: str, repo_prefix: str, + parent_repo_name: str, whl_library_args: Dict[str, Any], annotations: Dict[str, str] = dict(), bzlmod: bool = False, @@ -114,10 +192,12 @@ def generate_parsed_requirements_contents( repo_names_and_reqs = repo_names_and_requirements( install_req_and_lines, repo_prefix ) - + # Use the alias targets for `all_requirements`. all_requirements = ", ".join( [ - bazel.sanitised_repo_library_label(ir.name, repo_prefix=repo_prefix) + bazel.sanitised_alias_repo_library_label( + repo=parent_repo_name, name=ir.name + ) for ir, _ in install_req_and_lines ] ) @@ -212,6 +292,11 @@ def coerce_to_bool(option): return str(option).lower() == "true" +def parse_json_from_file(option): + content = Path(option).read_text() + return json.loads(content) if content.strip() != "" else {} + + def main(output: TextIO) -> None: """Args: @@ -266,6 +351,11 @@ def main(output: TextIO) -> None: default=False, help="Whether this script is run under bzlmod. Under bzlmod we don't generate the install_deps() macro as it isn't needed.", ) + parser.add_argument( + "--library_overrides", + type=parse_json_from_file, + help="A json encoded file containing library overrides for packages.", + ) arguments.parse_common_args(parser) args = parser.parse_args() @@ -278,6 +368,13 @@ def main(output: TextIO) -> None: req_names = sorted([req.name for req, _ in install_requirements]) annotations = args.annotations.collect(req_names) if args.annotations else {} + # Generate build files for each library. + create_alias_packages( + repo_prefix=args.repo_prefix, + install_requirements=install_requirements, + library_overrides=args.library_overrides, + ) + # Write all rendered annotation files and generate a list of the labels to write to the requirements file annotated_requirements = dict() for name, content in annotations.items(): @@ -310,6 +407,7 @@ def main(output: TextIO) -> None: requirements_lock=args.requirements_lock, repo=args.repo, repo_prefix=args.repo_prefix, + parent_repo_name=args.repo, whl_library_args=whl_library_args, annotations=annotated_requirements, bzlmod=args.bzlmod, diff --git a/python/pip_install/extract_wheels/parse_requirements_to_bzl_test.py b/python/pip_install/extract_wheels/parse_requirements_to_bzl_test.py index c4879f65c1..97a0fe103e 100644 --- a/python/pip_install/extract_wheels/parse_requirements_to_bzl_test.py +++ b/python/pip_install/extract_wheels/parse_requirements_to_bzl_test.py @@ -27,7 +27,7 @@ def test_generated_requirements_bzl(self) -> None: ) args = argparse.Namespace() args.requirements_lock = str(requirements_lock.resolve()) - args.repo = ("pip_parsed_deps_pypi__",) + args.repo = "pip_parsed_deps_pypi" args.repo_prefix = "pip_parsed_deps_pypi__" extra_pip_args = ["--index-url=pypi.org/simple"] pip_data_exclude = ["**.foo"] @@ -42,8 +42,9 @@ def test_generated_requirements_bzl(self) -> None: repo=args.repo, repo_prefix=args.repo_prefix, whl_library_args=whl_library_args, + parent_repo_name=args.repo, ) - library_target = "@pip_parsed_deps_pypi__foo//:pkg" + library_target = "@pip_parsed_deps_pypi//foo" whl_target = "@pip_parsed_deps_pypi__foo//:whl" all_requirements = 'all_requirements = ["{library_target}"]'.format( library_target=library_target diff --git a/python/pip_install/extract_wheels/whl_filegroup_test.py b/python/pip_install/extract_wheels/whl_filegroup_test.py index 2a7ade3b27..5a55f041e2 100644 --- a/python/pip_install/extract_wheels/whl_filegroup_test.py +++ b/python/pip_install/extract_wheels/whl_filegroup_test.py @@ -3,6 +3,7 @@ import tempfile import unittest from pathlib import Path +from typing import Optional from python.pip_install.extract_wheels import bazel @@ -21,6 +22,7 @@ def _run( self, repo_prefix: str, incremental: bool = False, + repo_name: Optional[str] = None, ) -> None: generated_bazel_dir = bazel.extract_wheel( self.wheel_path, @@ -28,6 +30,7 @@ def _run( pip_data_exclude=[], enable_implicit_namespace_pkgs=False, incremental=incremental, + parent_repo_name=repo_name, repo_prefix=repo_prefix, incremental_dir=Path(self.wheel_dir), ) @@ -46,7 +49,7 @@ def test_nonincremental(self) -> None: self._run(repo_prefix="prefix_") def test_incremental(self) -> None: - self._run(incremental=True, repo_prefix="prefix_") + self._run(incremental=True, repo_prefix="prefix_", repo_name="prefix") if __name__ == "__main__": diff --git a/python/pip_install/pip_repository.bzl b/python/pip_install/pip_repository.bzl index 7fbf503992..af52374c08 100644 --- a/python/pip_install/pip_repository.bzl +++ b/python/pip_install/pip_repository.bzl @@ -300,6 +300,11 @@ def _pip_repository_impl(rctx): rctx.file(annotations_file, json.encode_indent(annotations, indent = " " * 4)) requirements_txt = locked_requirements_label(rctx, rctx.attr) + + # Write library overrides file + library_overrides_file = rctx.path("library_overrides.json") + rctx.file(library_overrides_file, rctx.attr.library_overrides) + args = [ python_interpreter, "-m", @@ -317,6 +322,8 @@ def _pip_repository_impl(rctx): annotations_file, "--bzlmod", str(rctx.attr.bzlmod).lower(), + "--library_overrides", + library_overrides_file, ] args += ["--python_interpreter", _get_python_interpreter_attr(rctx)] @@ -447,6 +454,9 @@ pip_repository_attrs = { we do not create the install_deps() macro. """, ), + "library_overrides": attr.string( + doc = "Optional library overrides to apply to packages.", + ), "requirements_darwin": attr.label( allow_single_file = True, doc = "Override the requirements_lock attribute when the host platform is Mac OS", @@ -613,3 +623,27 @@ def package_annotation( data_exclude_glob = data_exclude_glob, srcs_exclude_glob = srcs_exclude_glob, )) + +def package_overrides(overrides): + """Overrides apply to alias rules generated from a `pip_repository` rule. + + Args: + overrides (dict, required): Dictionary mapping package names to either + alternative default targets, or selection dictionaries. In this case, + the dictionary will be merged with the default generated by `pip_repository`. + + Returns: + str: A json encoded string of the provided overrides. + """ + processed_overrides = {} + + for name, label_or_select in overrides.items(): + current = {} + if type(label_or_select) == type(""): + current["//conditions:default"] = label_or_select + else: + for select_key, select_name in label_or_select.items(): + current[select_key] = select_name + + processed_overrides[name] = current + return json.encode(processed_overrides)