diff --git a/.bazelrc b/.bazelrc index 1afaedbc24..ddba1f3b1a 100644 --- a/.bazelrc +++ b/.bazelrc @@ -3,7 +3,7 @@ # This lets us glob() up all the files inside the examples to make them inputs to tests # (Note, we cannot use `common --deleted_packages` because the bazel version command doesn't support it) # To update these lines, run tools/bazel_integration_test/update_deleted_packages.sh -build --deleted_packages=examples/legacy_pip_import/boto,examples/legacy_pip_import/extras,examples/legacy_pip_import/helloworld,examples/pip_install -query --deleted_packages=examples/legacy_pip_import/boto,examples/legacy_pip_import/extras,examples/legacy_pip_import/helloworld,examples/pip_install +build --deleted_packages=examples/legacy_pip_import/boto,examples/legacy_pip_import/extras,examples/legacy_pip_import/helloworld,examples/pip_install,examples/pip_parse +query --deleted_packages=examples/legacy_pip_import/boto,examples/legacy_pip_import/extras,examples/legacy_pip_import/helloworld,examples/pip_install,examples/pip_parse test --test_output=errors diff --git a/.gitignore b/.gitignore index dcfa539a21..cc8decd9a1 100644 --- a/.gitignore +++ b/.gitignore @@ -37,3 +37,7 @@ bazel-bin bazel-genfiles bazel-out bazel-testlogs + +# vim swap files +*.swp +*.swo diff --git a/README.md b/README.md index 591c403554..85df2c9fcf 100644 --- a/README.md +++ b/README.md @@ -105,7 +105,7 @@ target in the appropriate wheel repo. ### Importing `pip` dependencies -To add pip dependencies to your `WORKSPACE` is you load +To add pip dependencies to your `WORKSPACE` load the `pip_install` function, and call it to create the individual wheel repos. @@ -136,6 +136,40 @@ re-executed in order to pick up a non-hermetic change to your environment (e.g., updating your system `python` interpreter), you can completely flush out your repo cache with `bazel clean --expunge`. +### Fetch `pip` dependencies lazily (experimental) + +One pain point with `pip_install` is the need to download all dependencies resolved by +your requirements.txt before the bazel analysis phase can start. For large python monorepos +this can take a long time, especially on slow connections. + +`pip_parse` provides a solution to this problem. If you can provide a lock +file of all your python dependencies `pip_parse` will translate each requirement into its own external repository. +Bazel will only fetch/build wheels for the requirements in the subgraph of your build target. + +There are API differences between `pip_parse` and `pip_install`: +1. `pip_parse` requires a fully resolved lock file of your python dependencies. You can generate this using + `pip-compile`, or a virtualenv and `pip freeze`. `pip_parse` uses a label argument called `requirements_lock` instead of `requirements` + to make this distinction clear. +2. `pip_parse` translates your requirements into a starlark macro called `install_deps`. You must call this macro in your WORKSPACE to + declare your dependencies. + + +```python +load("@rules_python//python:pip.bzl", "pip_parse") + +# Create a central repo that knows about the dependencies needed from +# requirements_lock.txt. +pip_parse( + name = "my_deps", + requirements_lock = "//path/to:requirements_lock.txt", +) + +# Load the starlark macro which will define your dependencies. +load("@my_deps//:requirements.bzl", "install_deps") +# Call it to define repos for your requirements. +install_deps() +``` + ### Importing `pip` dependencies with `pip_import` (legacy) The deprecated `pip_import` can still be used if needed. diff --git a/examples/BUILD b/examples/BUILD index 092ad40902..5b798d53a7 100644 --- a/examples/BUILD +++ b/examples/BUILD @@ -26,3 +26,8 @@ bazel_integration_test( name = "pip_install_example", timeout = "long", ) + +bazel_integration_test( + name = "pip_parse_example", + timeout = "long", +) diff --git a/examples/pip_parse/BUILD b/examples/pip_parse/BUILD new file mode 100644 index 0000000000..ca56af9c0d --- /dev/null +++ b/examples/pip_parse/BUILD @@ -0,0 +1,42 @@ +load("@pip_parsed_deps//:requirements.bzl", "requirement") +load("@rules_python//python:defs.bzl", "py_binary", "py_test") + +# Toolchain setup, this is optional. +# Demonstrate that we can use the same python interpreter for the toolchain and executing pip in pip install (see WORKSPACE). +# +#load("@rules_python//python:defs.bzl", "py_runtime_pair") +# +#py_runtime( +# name = "python3_runtime", +# files = ["@python_interpreter//:files"], +# interpreter = "@python_interpreter//:python_bin", +# python_version = "PY3", +# visibility = ["//visibility:public"], +#) +# +#py_runtime_pair( +# name = "my_py_runtime_pair", +# py2_runtime = None, +# py3_runtime = ":python3_runtime", +#) +# +#toolchain( +# name = "my_py_toolchain", +# toolchain = ":my_py_runtime_pair", +# toolchain_type = "@bazel_tools//tools/python:toolchain_type", +#) +# End of toolchain setup. + +py_binary( + name = "main", + srcs = ["main.py"], + deps = [ + requirement("requests"), + ], +) + +py_test( + name = "test", + srcs = ["test.py"], + deps = [":main"], +) diff --git a/examples/pip_parse/WORKSPACE b/examples/pip_parse/WORKSPACE new file mode 100644 index 0000000000..418e762d0b --- /dev/null +++ b/examples/pip_parse/WORKSPACE @@ -0,0 +1,39 @@ +workspace(name = "example_repo") + +load("@bazel_tools//tools/build_defs/repo:http.bzl", "http_archive") + +http_archive( + name = "rules_python", + url = "https://github.com/bazelbuild/rules_python/releases/download/0.1.0/rules_python-0.1.0.tar.gz", + sha256 = "b6d46438523a3ec0f3cead544190ee13223a52f6a6765a29eae7b7cc24cc83a0", +) + +load("@rules_python//python:pip.bzl", "pip_parse") + +pip_parse( + # (Optional) You can provide extra parameters to pip. + # Here, make pip output verbose (this is usable with `quiet = False`). + # extra_pip_args = ["-v"], + + # (Optional) You can exclude custom elements in the data section of the generated BUILD files for pip packages. + # Exclude directories with spaces in their names in this example (avoids build errors if there are such directories). + #pip_data_exclude = ["**/* */**"], + + # (Optional) You can provide a python_interpreter (path) or a python_interpreter_target (a Bazel target, that + # acts as an executable). The latter can be anything that could be used as Python interpreter. E.g.: + # 1. Python interpreter that you compile in the build file (as above in @python_interpreter). + # 2. Pre-compiled python interpreter included with http_archive + # 3. Wrapper script, like in the autodetecting python toolchain. + #python_interpreter_target = "@python_interpreter//:python_bin", + + # (Optional) You can set quiet to False if you want to see pip output. + #quiet = False, + + # Uses the default repository name "pip_incremental" + requirements_lock = "//:requirements_lock.txt", +) + +load("@pip_parsed_deps//:requirements.bzl", "install_deps") + +# Initialize repositories for all packages in requirements_lock.txt. +install_deps() diff --git a/examples/pip_parse/main.py b/examples/pip_parse/main.py new file mode 100644 index 0000000000..79e1c1219b --- /dev/null +++ b/examples/pip_parse/main.py @@ -0,0 +1,5 @@ +import requests + + +def version(): + return requests.__version__ diff --git a/examples/pip_parse/requirements.txt b/examples/pip_parse/requirements.txt new file mode 100644 index 0000000000..989b995c68 --- /dev/null +++ b/examples/pip_parse/requirements.txt @@ -0,0 +1 @@ +requests==2.24.0 diff --git a/examples/pip_parse/requirements_lock.txt b/examples/pip_parse/requirements_lock.txt new file mode 100644 index 0000000000..b0d5b9ed51 --- /dev/null +++ b/examples/pip_parse/requirements_lock.txt @@ -0,0 +1,16 @@ +# +# This file is autogenerated by pip-compile +# To update, run: +# +# pip-compile --output-file=requirements_lock.txt requirements.txt +# +certifi==2020.12.5 + # via requests +chardet==3.0.4 + # via requests +idna==2.10 + # via requests +requests==2.24.0 + # via -r requirements.txt +urllib3==1.25.11 + # via requests diff --git a/examples/pip_parse/test.py b/examples/pip_parse/test.py new file mode 100644 index 0000000000..3cfb9bb91e --- /dev/null +++ b/examples/pip_parse/test.py @@ -0,0 +1,11 @@ +import unittest +import main + + +class ExampleTest(unittest.TestCase): + def test_main(self): + self.assertEqual("2.24.0", main.version()) + + +if __name__ == '__main__': + unittest.main() diff --git a/python/pip.bzl b/python/pip.bzl index 44e4167f1d..32a8901856 100644 --- a/python/pip.bzl +++ b/python/pip.bzl @@ -56,6 +56,17 @@ def pip_install(requirements, name = "pip", **kwargs): **kwargs ) +def pip_parse(requirements_lock, name = "pip_parsed_deps", **kwargs): + # Just in case our dependencies weren't already fetched + pip_install_dependencies() + + pip_repository( + name = name, + requirements_lock = requirements_lock, + incremental = True, + **kwargs + ) + def pip_repositories(): # buildifier: disable=print print("DEPRECATED: the pip_repositories rule has been replaced with pip_install, please see rules_python 0.1 release notes") diff --git a/python/pip_install/BUILD b/python/pip_install/BUILD index c24e6c7d31..bd53f5be32 100644 --- a/python/pip_install/BUILD +++ b/python/pip_install/BUILD @@ -3,6 +3,7 @@ filegroup( srcs = glob(["*.bzl"]) + [ "BUILD", "//python/pip_install/extract_wheels:distribution", + "//python/pip_install/parse_requirements_to_bzl:distribution", ], visibility = ["//:__pkg__"], ) diff --git a/python/pip_install/extract_wheels/__init__.py b/python/pip_install/extract_wheels/__init__.py index fe8b8ef7ea..96913cdad7 100644 --- a/python/pip_install/extract_wheels/__init__.py +++ b/python/pip_install/extract_wheels/__init__.py @@ -12,7 +12,7 @@ import sys import json -from python.pip_install.extract_wheels.lib import bazel, requirements +from python.pip_install.extract_wheels.lib import bazel, requirements, arguments def configure_reproducible_wheels() -> None: @@ -58,25 +58,7 @@ def main() -> None: required=True, help="Path to requirements.txt from where to install dependencies", ) - parser.add_argument( - "--repo", - action="store", - required=True, - help="The external repo name to install dependencies. In the format '@{REPO_NAME}'", - ) - parser.add_argument( - "--extra_pip_args", action="store", help="Extra arguments to pass down to pip.", - ) - parser.add_argument( - "--pip_data_exclude", - action="store", - help="Additional data exclusion parameters to add to the pip packages BUILD file.", - ) - parser.add_argument( - "--enable_implicit_namespace_pkgs", - action="store_true", - help="Disables conversion of implicit namespace packages into pkg-util style packages.", - ) + arguments.parse_common_args(parser) args = parser.parse_args() pip_args = [sys.executable, "-m", "pip", "--isolated", "wheel", "-r", args.requirements] @@ -93,10 +75,12 @@ def main() -> None: else: pip_data_exclude = [] + repo_label = "@%s" % args.repo + targets = [ '"%s%s"' % ( - args.repo, + repo_label, bazel.extract_wheel( whl, extras, pip_data_exclude, args.enable_implicit_namespace_pkgs ), @@ -106,5 +90,5 @@ def main() -> None: with open("requirements.bzl", "w") as requirement_file: requirement_file.write( - bazel.generate_requirements_file_contents(args.repo, targets) + bazel.generate_requirements_file_contents(repo_label, targets) ) diff --git a/python/pip_install/extract_wheels/lib/BUILD b/python/pip_install/extract_wheels/lib/BUILD index 2a269856be..c23d8f376f 100644 --- a/python/pip_install/extract_wheels/lib/BUILD +++ b/python/pip_install/extract_wheels/lib/BUILD @@ -9,8 +9,12 @@ py_library( "purelib.py", "requirements.py", "wheel.py", + "arguments.py", + ], + visibility = [ + "//python/pip_install/extract_wheels:__subpackages__", + "//python/pip_install/parse_requirements_to_bzl:__subpackages__", ], - visibility = ["//python/pip_install/extract_wheels:__subpackages__"], deps = [ requirement("pkginfo"), requirement("setuptools"), @@ -41,6 +45,19 @@ py_test( ], ) +py_test( + name = "arguments_test", + size = "small", + srcs = [ + "arguments_test.py", + ], + tags = ["unit"], + deps = [ + ":lib", + "//python/pip_install/parse_requirements_to_bzl:lib", + ], +) + py_test( name = "whl_filegroup_test", size = "small", diff --git a/python/pip_install/extract_wheels/lib/arguments.py b/python/pip_install/extract_wheels/lib/arguments.py new file mode 100644 index 0000000000..ee9a6491bc --- /dev/null +++ b/python/pip_install/extract_wheels/lib/arguments.py @@ -0,0 +1,24 @@ +from argparse import ArgumentParser + + +def parse_common_args(parser: ArgumentParser) -> ArgumentParser: + parser.add_argument( + "--repo", + action="store", + required=True, + help="The external repo name to install dependencies. In the format '@{REPO_NAME}'", + ) + parser.add_argument( + "--extra_pip_args", action="store", help="Extra arguments to pass down to pip.", + ) + parser.add_argument( + "--pip_data_exclude", + action="store", + help="Additional data exclusion parameters to add to the pip packages BUILD file.", + ) + parser.add_argument( + "--enable_implicit_namespace_pkgs", + action="store_true", + help="Disables conversion of implicit namespace packages into pkg-util style packages.", + ) + return parser diff --git a/python/pip_install/extract_wheels/lib/arguments_test.py b/python/pip_install/extract_wheels/lib/arguments_test.py new file mode 100644 index 0000000000..0d6a6af1fa --- /dev/null +++ b/python/pip_install/extract_wheels/lib/arguments_test.py @@ -0,0 +1,27 @@ +import argparse +import json +import unittest + +from python.pip_install.extract_wheels.lib import arguments +from python.pip_install.parse_requirements_to_bzl import deserialize_structured_args + + +class ArgumentsTestCase(unittest.TestCase): + def test_arguments(self) -> None: + parser = argparse.ArgumentParser() + parser = arguments.parse_common_args(parser) + repo_name = "foo" + index_url = "--index_url=pypi.org/simple" + args_dict = vars(parser.parse_args( + args=["--repo", repo_name, "--extra_pip_args={index_url}".format(index_url=json.dumps({"args": index_url}))])) + args_dict = deserialize_structured_args(args_dict) + self.assertIn("repo", args_dict) + self.assertIn("extra_pip_args", args_dict) + self.assertEqual(args_dict["pip_data_exclude"], None) + self.assertEqual(args_dict["enable_implicit_namespace_pkgs"], False) + self.assertEqual(args_dict["repo"], repo_name) + self.assertEqual(args_dict["extra_pip_args"], index_url) + + +if __name__ == "__main__": + unittest.main() diff --git a/python/pip_install/extract_wheels/lib/bazel.py b/python/pip_install/extract_wheels/lib/bazel.py index 0affa20b6d..95e85531ba 100644 --- a/python/pip_install/extract_wheels/lib/bazel.py +++ b/python/pip_install/extract_wheels/lib/bazel.py @@ -2,13 +2,15 @@ import os import textwrap import json -from typing import Iterable, List, Dict, Set +from typing import Iterable, List, Dict, Set, Optional import shutil from python.pip_install.extract_wheels.lib import namespace_pkgs, wheel, purelib WHEEL_FILE_LABEL = "whl" +PY_LIBRARY_LABEL = "pkg" + def generate_build_file_contents( name: str, dependencies: List[str], whl_file_deps: List[str], pip_data_exclude: List[str], @@ -91,6 +93,9 @@ def requirement(name): def whl_requirement(name): return requirement(name) + ":whl" + + def install_deps(): + fail("install_deps() only works if you are creating an incremental repo. Did you mean to use pip_install_incremental()?") """.format( repo=repo_name, requirement_labels=requirement_labels, @@ -99,7 +104,17 @@ def whl_requirement(name): ) -def sanitise_name(name: str) -> str: +DEFAULT_PACKAGE_PREFIX = "pypi__" + + +def whl_library_repo_prefix(parent_repo: str) -> str: + return "{parent}_{default_package_prefix}".format( + parent=parent_repo, + default_package_prefix=DEFAULT_PACKAGE_PREFIX + ) + + +def sanitise_name(name: str, prefix: str = DEFAULT_PACKAGE_PREFIX) -> str: """Sanitises the name to be compatible with Bazel labels. There are certain requirements around Bazel labels that we need to consider. From the Bazel docs: @@ -116,7 +131,7 @@ def sanitise_name(name: str) -> str: See: https://github.com/bazelbuild/bazel/issues/2636 """ - return "pypi__" + name.replace("-", "_").replace(".", "_").lower() + return prefix + name.replace("-", "_").replace(".", "_").lower() def setup_namespace_pkg_compatibility(wheel_dir: str) -> None: @@ -135,18 +150,40 @@ def setup_namespace_pkg_compatibility(wheel_dir: str) -> None: """ namespace_pkg_dirs = namespace_pkgs.implicit_namespace_packages( - wheel_dir, ignored_dirnames=["%s/bin" % wheel_dir,], + wheel_dir, ignored_dirnames=["%s/bin" % wheel_dir], ) for ns_pkg_dir in namespace_pkg_dirs: namespace_pkgs.add_pkgutil_style_namespace_pkg_init(ns_pkg_dir) +def sanitised_library_label(whl_name: str) -> str: + return '"//%s"' % sanitise_name(whl_name) + + +def sanitised_file_label(whl_name: str) -> str: + return '"//%s:%s"' % (sanitise_name(whl_name), WHEEL_FILE_LABEL) + + +def _whl_name_to_repo_root(whl_name: str, repo_prefix: str) -> str: + return "@{}//".format(sanitise_name(whl_name, prefix=repo_prefix)) + + +def sanitised_repo_library_label(whl_name: str, repo_prefix: str) -> str: + return '"{}:{}"'.format(_whl_name_to_repo_root(whl_name, repo_prefix), PY_LIBRARY_LABEL) + + +def sanitised_repo_file_label(whl_name: str, repo_prefix: str) -> str: + return '"{}:{}"'.format(_whl_name_to_repo_root(whl_name, repo_prefix), WHEEL_FILE_LABEL) + + def extract_wheel( wheel_file: str, extras: Dict[str, Set[str]], pip_data_exclude: List[str], enable_implicit_namespace_pkgs: bool, + incremental: bool = False, + incremental_repo_prefix: Optional[str] = None, ) -> str: """Extracts wheel into given directory and creates py_library and filegroup targets. @@ -155,17 +192,24 @@ def extract_wheel( extras: a list of extras to add as dependencies for the installed wheel pip_data_exclude: list of file patterns to exclude from the generated data section of the py_library enable_implicit_namespace_pkgs: if true, disables conversion of implicit namespace packages and will unzip as-is + incremental: If true the extract the wheel in a format suitable for an external repository. This + effects the names of libraries and their dependencies, which point to other external repositories. + incremental_repo_prefix: If incremental is true, use this prefix when creating labels from wheel + names instead of the default. Returns: The Bazel label for the extracted wheel, in the form '//path/to/wheel'. """ whl = wheel.Wheel(wheel_file) - directory = sanitise_name(whl.name) - - os.mkdir(directory) - # copy the original wheel - shutil.copy(whl.path, directory) + if incremental: + directory = "." + else: + directory = sanitise_name(whl.name) + + os.mkdir(directory) + # copy the original wheel + shutil.copy(whl.path, directory) whl.unzip(directory) # Note: Order of operations matters here @@ -177,16 +221,30 @@ def extract_wheel( extras_requested = extras[whl.name] if whl.name in extras else set() whl_deps = sorted(whl.dependencies(extras_requested)) - sanitised_dependencies = [ - '"//%s"' % sanitise_name(d) for d in whl_deps - ] - sanitised_wheel_file_dependencies = [ - '"//%s:%s"' % (sanitise_name(d), WHEEL_FILE_LABEL) for d in whl_deps - ] + if incremental: + # check for mypy Optional validity + if incremental_repo_prefix is None: + raise TypeError("incremental_repo_prefix arguement cannot be None if incremental == True") + sanitised_dependencies = [ + sanitised_repo_library_label(d, repo_prefix=incremental_repo_prefix) for d in whl_deps + ] + sanitised_wheel_file_dependencies = [ + sanitised_repo_file_label(d, repo_prefix=incremental_repo_prefix) for d in whl_deps + ] + else: + sanitised_dependencies = [ + sanitised_library_label(d) for d in whl_deps + ] + sanitised_wheel_file_dependencies = [ + sanitised_file_label(d) for d in whl_deps + ] with open(os.path.join(directory, "BUILD.bazel"), "w") as build_file: contents = generate_build_file_contents( - sanitise_name(whl.name), sanitised_dependencies, sanitised_wheel_file_dependencies, pip_data_exclude + PY_LIBRARY_LABEL if incremental else sanitise_name(whl.name), + sanitised_dependencies, + sanitised_wheel_file_dependencies, + pip_data_exclude ) build_file.write(contents) diff --git a/python/pip_install/extract_wheels/lib/purelib.py b/python/pip_install/extract_wheels/lib/purelib.py index 99f6299317..4e9eb3f3ef 100644 --- a/python/pip_install/extract_wheels/lib/purelib.py +++ b/python/pip_install/extract_wheels/lib/purelib.py @@ -27,7 +27,7 @@ def spread_purelib_into_root(wheel_dir: str) -> None: return dot_data_dir = wheel.get_dot_data_directory(wheel_dir) - # 'Root-Is-Purelib: false' is no guarantee a .date directory exists with + # 'Root-Is-Purelib: false' is no guarantee a .data directory exists with # package code in it. eg. the 'markupsafe' package. if not dot_data_dir: return diff --git a/python/pip_install/parse_requirements_to_bzl/BUILD b/python/pip_install/parse_requirements_to_bzl/BUILD new file mode 100644 index 0000000000..61bde474fc --- /dev/null +++ b/python/pip_install/parse_requirements_to_bzl/BUILD @@ -0,0 +1,40 @@ +load("@rules_python//python:defs.bzl", "py_binary", "py_library", "py_test") +load("//python/pip_install:repositories.bzl", "requirement") + +py_binary( + name = "parse_requirements_to_bzl", + srcs = [ + "__init__.py", + "__main__.py", + ], + main = "__main__.py", + deps = ["//python/pip_install/extract_wheels/lib"], +) + +py_library( + name = "lib", + srcs = ["__init__.py"], + deps = [requirement("pip")], + visibility = ["//python/pip_install/extract_wheels:__subpackages__"], +) + +py_test( + name = "parse_requirements_to_bzl_test", + size = "small", + srcs = [ + "parse_requirements_to_bzl_test.py", + ], + tags = ["unit"], + deps = [ + ":lib", + "//python/pip_install/extract_wheels/lib" + ], +) + +filegroup( + name = "distribution", + srcs = glob(["*"], exclude = ["*_test.py"]) + [ + "//python/pip_install/parse_requirements_to_bzl/extract_single_wheel:distribution", + ], + visibility = ["//python/pip_install:__subpackages__"], +) diff --git a/python/pip_install/parse_requirements_to_bzl/__init__.py b/python/pip_install/parse_requirements_to_bzl/__init__.py new file mode 100644 index 0000000000..e38f9b042b --- /dev/null +++ b/python/pip_install/parse_requirements_to_bzl/__init__.py @@ -0,0 +1,132 @@ +import argparse +import json +import textwrap +import sys +from typing import List, Tuple + +from python.pip_install.extract_wheels.lib import bazel, arguments +from pip._internal.req import parse_requirements, constructors +from pip._internal.req.req_install import InstallRequirement +from pip._internal.network.session import PipSession + + +def parse_install_requirements(requirements_lock: str) -> List[InstallRequirement]: + return [ + constructors.install_req_from_parsed_requirement(pr) + for pr in parse_requirements(requirements_lock, session=PipSession()) + ] + + +def repo_names_and_requirements(install_reqs: List[InstallRequirement], repo_prefix: str) -> List[Tuple[str, str]]: + return [ + ( + bazel.sanitise_name(ir.name, prefix=repo_prefix), + str(ir.req) + ) + for ir in install_reqs + ] + +def deserialize_structured_args(args): + """Deserialize structured arguments passed from the starlark rules. + Args: + args: dict of parsed command line arguments + """ + structured_args = ("extra_pip_args", "pip_data_exclude") + for arg_name in structured_args: + if args.get(arg_name) is not None: + args[arg_name] = json.loads(args[arg_name])["args"] + return args + + +def generate_parsed_requirements_contents(all_args: argparse.Namespace) -> str: + """ + Parse each requirement from the requirements_lock file, and prepare arguments for each + repository rule, which will represent the individual requirements. + + Generates a requirements.bzl file containing a macro (install_deps()) which instantiates + a repository rule for each requirment in the lock file. + """ + + args = dict(vars(all_args)) + args = deserialize_structured_args(args) + args.setdefault("python_interpreter", sys.executable) + # Pop this off because it wont be used as a config argument to the whl_library rule. + requirements_lock = args.pop("requirements_lock") + repo_prefix = bazel.whl_library_repo_prefix(args["repo"]) + + install_reqs = parse_install_requirements(requirements_lock) + repo_names_and_reqs = repo_names_and_requirements(install_reqs, repo_prefix) + all_requirements = ", ".join( + [bazel.sanitised_repo_library_label(ir.name, repo_prefix=repo_prefix) for ir in install_reqs] + ) + all_whl_requirements = ", ".join( + [bazel.sanitised_repo_file_label(ir.name, repo_prefix=repo_prefix) for ir in install_reqs] + ) + return textwrap.dedent("""\ + load("@rules_python//python/pip_install:pip_repository.bzl", "whl_library") + + all_requirements = [{all_requirements}] + + all_whl_requirements = [{all_whl_requirements}] + + _packages = {repo_names_and_reqs} + _config = {args} + + def _clean_name(name): + return name.replace("-", "_").replace(".", "_").lower() + + def requirement(name): + return "@{repo_prefix}" + _clean_name(name) + "//:pkg" + + def whl_requirement(name): + return "@{repo_prefix}" + _clean_name(name) + "//:whl" + + def install_deps(): + for name, requirement in _packages: + whl_library( + name = name, + requirement = requirement, + **_config, + ) + """.format( + all_requirements=all_requirements, + all_whl_requirements=all_whl_requirements, + repo_names_and_reqs=repo_names_and_reqs, + args=args, + repo_prefix=repo_prefix, + ) + ) + + +def main() -> None: + parser = argparse.ArgumentParser( + description="Create rules to incrementally fetch needed \ +dependencies from a fully resolved requirements lock file." + ) + parser.add_argument( + "--requirements_lock", + action="store", + required=True, + help="Path to fully resolved requirements.txt to use as the source of repos.", + ) + parser.add_argument( + "--quiet", + type=bool, + action="store", + required=True, + help="Whether to print stdout / stderr from child repos.", + ) + parser.add_argument( + "--timeout", + type=int, + action="store", + required=True, + help="timeout to use for pip operation.", + ) + arguments.parse_common_args(parser) + args = parser.parse_args() + + with open("requirements.bzl", "w") as requirement_file: + requirement_file.write( + generate_parsed_requirements_contents(args) + ) diff --git a/python/pip_install/parse_requirements_to_bzl/__main__.py b/python/pip_install/parse_requirements_to_bzl/__main__.py new file mode 100644 index 0000000000..89199612b5 --- /dev/null +++ b/python/pip_install/parse_requirements_to_bzl/__main__.py @@ -0,0 +1,5 @@ +"""Main entry point.""" +from python.pip_install.parse_requirements_to_bzl import main + +if __name__ == "__main__": + main() diff --git a/python/pip_install/parse_requirements_to_bzl/extract_single_wheel/BUILD b/python/pip_install/parse_requirements_to_bzl/extract_single_wheel/BUILD new file mode 100644 index 0000000000..17bdfe75ce --- /dev/null +++ b/python/pip_install/parse_requirements_to_bzl/extract_single_wheel/BUILD @@ -0,0 +1,8 @@ +filegroup( + name = "distribution", + srcs = glob( + ["*"], + exclude = ["*_test.py"], + ), + visibility = ["//python/pip_install:__subpackages__"], +) diff --git a/python/pip_install/parse_requirements_to_bzl/extract_single_wheel/__init__.py b/python/pip_install/parse_requirements_to_bzl/extract_single_wheel/__init__.py new file mode 100644 index 0000000000..d2b9413029 --- /dev/null +++ b/python/pip_install/parse_requirements_to_bzl/extract_single_wheel/__init__.py @@ -0,0 +1,51 @@ +import argparse +import sys +import glob +import subprocess +import json + +from python.pip_install.extract_wheels.lib import bazel, requirements, arguments +from python.pip_install.extract_wheels import configure_reproducible_wheels + + +def main() -> None: + parser = argparse.ArgumentParser( + description="Build and/or fetch a single wheel based on the requirement passed in" + ) + parser.add_argument( + "--requirement", + action="store", + required=True, + help="A single PEP508 requirement specifier string.", + ) + arguments.parse_common_args(parser) + args = parser.parse_args() + + configure_reproducible_wheels() + + pip_args = [sys.executable, "-m", "pip", "--isolated", "wheel", "--no-deps"] + if args.extra_pip_args: + pip_args += json.loads(args.extra_pip_args)["args"] + + pip_args.append(args.requirement) + + # Assumes any errors are logged by pip so do nothing. This command will fail if pip fails + subprocess.run(pip_args, check=True) + + name, extras_for_pkg = requirements._parse_requirement_for_extra(args.requirement) + extras = {name: extras_for_pkg} if extras_for_pkg and name else dict() + + if args.pip_data_exclude: + pip_data_exclude = json.loads(args.pip_data_exclude)["exclude"] + else: + pip_data_exclude = [] + + whl = next(iter(glob.glob("*.whl"))) + bazel.extract_wheel( + whl, + extras, + pip_data_exclude, + args.enable_implicit_namespace_pkgs, + incremental=True, + incremental_repo_prefix=bazel.whl_library_repo_prefix(args.repo) + ) diff --git a/python/pip_install/parse_requirements_to_bzl/extract_single_wheel/__main__.py b/python/pip_install/parse_requirements_to_bzl/extract_single_wheel/__main__.py new file mode 100644 index 0000000000..d45f90bbd1 --- /dev/null +++ b/python/pip_install/parse_requirements_to_bzl/extract_single_wheel/__main__.py @@ -0,0 +1,4 @@ +from python.pip_install.parse_requirements_to_bzl.extract_single_wheel import main + +if __name__ == "__main__": + main() diff --git a/python/pip_install/parse_requirements_to_bzl/parse_requirements_to_bzl_test.py b/python/pip_install/parse_requirements_to_bzl/parse_requirements_to_bzl_test.py new file mode 100644 index 0000000000..4b474d4f3c --- /dev/null +++ b/python/pip_install/parse_requirements_to_bzl/parse_requirements_to_bzl_test.py @@ -0,0 +1,39 @@ +import unittest +import argparse +import json +from tempfile import NamedTemporaryFile + +from python.pip_install.parse_requirements_to_bzl import generate_parsed_requirements_contents +from python.pip_install.extract_wheels.lib.bazel import ( + sanitised_repo_library_label, + whl_library_repo_prefix, + sanitised_repo_file_label +) + + +class TestParseRequirementsToBzl(unittest.TestCase): + + def test_generated_requirements_bzl(self) -> None: + with NamedTemporaryFile() as requirements_lock: + requirement_string = "foo==0.0.0" + requirements_lock.write(bytes(requirement_string, encoding="utf-8")) + requirements_lock.flush() + args = argparse.Namespace() + args.requirements_lock = requirements_lock.name + args.repo = "pip_parsed_deps" + extra_pip_args = ["--index-url=pypi.org/simple"] + args.extra_pip_args = json.dumps({"args": extra_pip_args}) + contents = generate_parsed_requirements_contents(args) + library_target = "@pip_parsed_deps_pypi__foo//:pkg" + whl_target = "@pip_parsed_deps_pypi__foo//:whl" + all_requirements = 'all_requirements = ["{library_target}"]'.format(library_target=library_target) + all_whl_requirements = 'all_whl_requirements = ["{whl_target}"]'.format(whl_target=whl_target) + self.assertIn(all_requirements, contents, contents) + self.assertIn(all_whl_requirements, contents, contents) + self.assertIn(requirement_string, contents, contents) + self.assertIn(requirement_string, contents, contents) + self.assertIn("'extra_pip_args': {}".format(repr(extra_pip_args)), contents, contents) + + +if __name__ == "__main__": + unittest.main() diff --git a/python/pip_install/pip_repository.bzl b/python/pip_install/pip_repository.bzl index b2cb0a8446..c07c9cdd86 100644 --- a/python/pip_install/pip_repository.bzl +++ b/python/pip_install/pip_repository.bzl @@ -2,17 +2,16 @@ load("//python/pip_install:repositories.bzl", "all_requirements") -def _pip_repository_impl(rctx): - python_interpreter = rctx.attr.python_interpreter - if rctx.attr.python_interpreter_target != None: - target = rctx.attr.python_interpreter_target - python_interpreter = rctx.path(target) - else: - if "/" not in python_interpreter: - python_interpreter = rctx.which(python_interpreter) - if not python_interpreter: - fail("python interpreter not found") +def _construct_pypath(rctx): + """Helper function to construct a PYTHONPATH. + + Contains entries for code in this repo as well as packages downloaded from //python/pip_install:repositories.bzl. + This allows us to run python code inside repository rule implementations. + Args: + rctx: Handle to the repository_context. + Returns: String of the PYTHONPATH. + """ rctx.file("BUILD", "") # Get the root directory of these rules @@ -24,17 +23,16 @@ def _pip_repository_impl(rctx): ] separator = ":" if not "windows" in rctx.os.name.lower() else ";" pypath = separator.join([str(p) for p in [rules_root] + thirdparty_roots]) + return pypath - args = [ - python_interpreter, - "-m", - "python.pip_install.extract_wheels", - "--requirements", - rctx.path(rctx.attr.requirements), - "--repo", - "@%s" % rctx.attr.name, - ] +def _parse_optional_attrs(rctx, args): + """Helper function to parse common attributes of pip_repository and whl_library repository rules. + Args: + rctx: Handle to the rule repository context. + args: A list of parsed args for the rule. + Returns: Augmented args list. + """ if rctx.attr.extra_pip_args: args += [ "--extra_pip_args", @@ -50,6 +48,49 @@ def _pip_repository_impl(rctx): if rctx.attr.enable_implicit_namespace_pkgs: args.append("--enable_implicit_namespace_pkgs") + return args + +def _pip_repository_impl(rctx): + python_interpreter = rctx.attr.python_interpreter + if rctx.attr.python_interpreter_target != None: + target = rctx.attr.python_interpreter_target + python_interpreter = rctx.path(target) + else: + if "/" not in python_interpreter: + python_interpreter = rctx.which(python_interpreter) + if not python_interpreter: + fail("python interpreter not found") + + if rctx.attr.incremental and not rctx.attr.requirements_lock: + fail("Incremental mode requires a requirements_lock attribute be specified.") + + pypath = _construct_pypath(rctx) + + if rctx.attr.incremental: + args = [ + python_interpreter, + "-m", + "python.pip_install.parse_requirements_to_bzl", + "--requirements_lock", + rctx.path(rctx.attr.requirements_lock), + # pass quiet and timeout args through to child repos. + "--quiet", + str(rctx.attr.quiet), + "--timeout", + str(rctx.attr.timeout), + ] + else: + args = [ + python_interpreter, + "-m", + "python.pip_install.extract_wheels", + "--requirements", + rctx.path(rctx.attr.requirements), + ] + + args += ["--repo", rctx.attr.name] + args = _parse_optional_attrs(rctx, args) + result = rctx.execute( args, environment = { @@ -59,52 +100,73 @@ def _pip_repository_impl(rctx): timeout = rctx.attr.timeout, quiet = rctx.attr.quiet, ) + if result.return_code: - fail("rules_python_external failed: %s (%s)" % (result.stdout, result.stderr)) + fail("rules_python failed: %s (%s)" % (result.stdout, result.stderr)) return -pip_repository = repository_rule( - attrs = { - "enable_implicit_namespace_pkgs": attr.bool( - default = False, - doc = """ +common_attrs = { + "enable_implicit_namespace_pkgs": attr.bool( + default = False, + doc = """ If true, disables conversion of native namespace packages into pkg-util style namespace packages. When set all py_binary and py_test targets must specify either `legacy_create_init=False` or the global Bazel option `--incompatible_default_to_explicit_init_py` to prevent `__init__.py` being automatically generated in every directory. This option is required to support some packages which cannot handle the conversion to pkg-util style. """, - ), - "extra_pip_args": attr.string_list( - doc = "Extra arguments to pass on to pip. Must not contain spaces.", - ), - "pip_data_exclude": attr.string_list( - doc = "Additional data exclusion parameters to add to the pip packages BUILD file.", - ), - "python_interpreter": attr.string(default = "python3"), - "python_interpreter_target": attr.label(allow_single_file = True, doc = """ + ), + "extra_pip_args": attr.string_list( + doc = "Extra arguments to pass on to pip. Must not contain spaces.", + ), + "pip_data_exclude": attr.string_list( + doc = "Additional data exclusion parameters to add to the pip packages BUILD file.", + ), + "python_interpreter": attr.string(default = "python3"), + "python_interpreter_target": attr.label( + allow_single_file = True, + doc = """ If you are using a custom python interpreter built by another repository rule, use this attribute to specify its BUILD target. This allows pip_repository to invoke pip using the same interpreter as your toolchain. If set, takes precedence over python_interpreter. -"""), - "quiet": attr.bool( - default = True, - doc = "If True, suppress printing stdout and stderr output to the terminal.", - ), - "requirements": attr.label( - allow_single_file = True, - mandatory = True, - doc = "A 'requirements.txt' pip requirements file.", - ), - # 600 is documented as default here: https://docs.bazel.build/versions/master/skylark/lib/repository_ctx.html#execute - "timeout": attr.int( - default = 600, - doc = "Timeout (in seconds) on the rule's execution duration.", - ), - }, - implementation = _pip_repository_impl, +""", + ), + "quiet": attr.bool( + default = True, + doc = "If True, suppress printing stdout and stderr output to the terminal.", + ), + # 600 is documented as default here: https://docs.bazel.build/versions/master/skylark/lib/repository_ctx.html#execute + "timeout": attr.int( + default = 600, + doc = "Timeout (in seconds) on the rule's execution duration.", + ), +} + +pip_repository_attrs = { + "incremental": attr.bool( + default = False, + doc = "Create the repository in incremental mode.", + ), + "requirements": attr.label( + allow_single_file = True, + doc = "A 'requirements.txt' pip requirements file.", + ), + "requirements_lock": attr.label( + allow_single_file = True, + doc = """ +A fully resolved 'requirements.txt' pip requirement file containing the transitive set of your dependencies. If this file is passed instead +of 'requirements' no resolve will take place and pip_repository will create individual repositories for each of your dependencies so that +wheels are fetched/built only for the targets specified by 'build/run/test'. +""", + ), +} + +pip_repository_attrs.update(**common_attrs) + +pip_repository = repository_rule( + attrs = pip_repository_attrs, doc = """A rule for importing `requirements.txt` dependencies into Bazel. This rule imports a `requirements.txt` file and generates a new @@ -144,4 +206,55 @@ py_binary( ) ``` """, + implementation = _pip_repository_impl, +) + +def _impl_whl_library(rctx): + # pointer to parent repo so these rules rerun if the definitions in requirements.bzl change. + _parent_repo_label = Label("@{parent}//:requirements.bzl".format(parent=rctx.attr.repo)) + pypath = _construct_pypath(rctx) + args = [ + rctx.attr.python_interpreter, + "-m", + "python.pip_install.parse_requirements_to_bzl.extract_single_wheel", + "--requirement", + rctx.attr.requirement, + "--repo", + rctx.attr.repo, + ] + args = _parse_optional_attrs(rctx, args) + result = rctx.execute( + args, + environment = { + # Manually construct the PYTHONPATH since we cannot use the toolchain here + "PYTHONPATH": pypath, + }, + quiet = rctx.attr.quiet, + timeout = rctx.attr.timeout, + ) + + if result.return_code: + fail("whl_library %s failed: %s (%s)" % (rctx.attr.name, result.stdout, result.stderr)) + + return + +whl_library_attrs = { + "repo": attr.string( + mandatory = True, + doc = "Pointer to parent repo name. Used to make these rules rerun if the parent repo changes.", + ), + "requirement": attr.string( + mandatory = True, + doc = "Python requirement string describing the package to make available", + ), +} + +whl_library_attrs.update(**common_attrs) + +whl_library = repository_rule( + attrs = whl_library_attrs, + doc = """ +Download and extracts a single wheel based into a bazel repo based on the requirement string passed in. +Instantiated from pip_repository and inherits config options from there.""", + implementation = _impl_whl_library, )