8000 Smarter namespace pkg detection in pip_install (issue #381) (#483) · kmARC/rules_python@03c4523 · GitHub
[go: up one dir, main page]

8000 Skip to content

Commit 03c4523

Browse files
author
Jonathon Belotti
authored
Smarter namespace pkg detection in pip_install (issue bazel-contrib#381) (bazel-contrib#483)
1 parent 6202ed2 commit 03c4523

File tree

2 files changed

+123
-11
lines changed

2 files changed

+123
-11
lines changed

python/pip_install/extract_wheels/lib/namespace_pkgs.py

Lines changed: 42 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
"""Utility functions to discover python package types"""
22
import os
3+
import pathlib # supported in >= 3.4
34
import textwrap
45
from typing import Set, List, Optional
56

@@ -19,26 +20,30 @@ def implicit_namespace_packages(
1920
Returns:
2021
The set of directories found under root to be packages using the native namespace method.
2122
"""
22-
namespace_pkg_dirs = set()
23-
for dirpath, dirnames, filenames in os.walk(directory, topdown=True):
24-
# We are only interested in dirs with no __init__.py file
23+
namespace_pkg_dirs: Set[str] = set()
24+
standard_pkg_dirs: Set[str] = set()
25+
# Traverse bottom-up because a directory can be a namespace pkg because its child contains module files.
26+
for dirpath, dirnames, filenames in os.walk(directory, topdown=False):
2527
if "__init__.py" in filenames:
26-
dirnames[:] = [] # Remove dirnames from search
28+
standard_pkg_dirs.add(dirpath)
2729
continue
30+
elif ignored_dirnames:
31+
is_ignored_dir = dirpath in ignored_dirnames
32+
child_of_ignored_dir = any(d in pathlib.Path(dirpath).parents for d in ignored_dirnames)
33+
if is_ignored_dir or child_of_ignored_dir:
34+
continue
2835

29-
for ignored_dir in ignored_dirnames or []:
30-
if ignored_dir in dirnames:
31-
dirnames.remove(ignored_dir)
32-
33-
non_empty_directory = dirnames or filenames
36+
dir_includes_py_modules = _includes_python_modules(filenames)
37+
parent_of_namespace_pkg = any(str(pathlib.Path(dirpath, d)) in namespace_pkg_dirs for d in dirnames)
38+
parent_of_standard_pkg = any(str(pathlib.Path(dirpath, d)) in standard_pkg_dirs for d in dirnames)
39+
parent_of_pkg = parent_of_namespace_pkg or parent_of_standard_pkg
3440
if (
35-
non_empty_directory
41+
(dir_includes_py_modules or parent_of_pkg)
3642
and
3743
# The root of the directory should never be an implicit namespace
3844
dirpath != directory
3945
):
4046
namespace_pkg_dirs.add(dirpath)
41-
4247
return namespace_pkg_dirs
4348

4449

@@ -68,3 +73,29 @@ def add_pkgutil_style_namespace_pkg_init(dir_path: str) -> None:
6873
"""
6974
)
7075
)
76+
77+
78+
def _includes_python_modules(files: List[str]) -> bool:
79+
"""
80+
In order to only transform directories that Python actually considers namespace pkgs
81+
we need to detect if a directory includes Python modules.
82+
83+
Which files are loadable as modules is extension based, and the particular set of extensions
84+
varies by platform.
85+
86+
See:
87+
1. https://github.com/python/cpython/blob/7d9d25dbedfffce61fc76bc7ccbfa9ae901bf56f/Lib/importlib/machinery.py#L19
88+
2. PEP 420 -- Implicit Namespace Packages, Specification - https://www.python.org/dev/peps/pep-0420/#specification
89+
3. dynload_shlib.c and dynload_win.c in python/cpython.
90+
"""
91+
module_suffixes = {
92+
".py", # Source modules
93+
".pyc", # Compiled bytecode modules
94+
".so", # Unix extension modules
95+
".pyd" # https://docs.python.org/3/faq/windows.html#is-a-pyd-file-the-same-as-a-dll
96+
}
97+
return any(
98+
pathlib.Path(f).suffix in module_suffixes
99+
for f
100+
in files
101+
)

python/pip_install/extract_wheels/lib/namespace_pkgs_test.py

Lines changed: 81 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -68,6 +68,87 @@ def test_empty_case(self) -> None:
6868
actual = namespace_pkgs.implicit_namespace_packages(directory.root())
6969
self.assertEqual(actual, set())
7070

71+
def test_ignores_non_module_files_in_directories(self) -> None:
72+
directory = TempDir()
73+
directory.add_file("foo/__init__.pyi")
74+
directory.add_file("foo/py.typed")
75+
76+
actual = namespace_pkgs.implicit_namespace_packages(directory.root())
77+
self.assertEqual(actual, set())
78+
79+
def test_parent_child_relationship_of_namespace_pkgs(self):
80+
directory = TempDir()
81+
directory.add_file("foo/bar/biff/my_module.py")
82+
directory.add_file("foo/bar/biff/another_module.py")
83+
84+
expected = {
85+
directory.root() + "/foo",
86+
directory.root() + "/foo/bar",
87+
directory.root() + "/foo/bar/biff",
88+
}
89+
actual = namespace_pkgs.implicit_namespace_packages(directory.root())
90+
self.assertEqual(actual, expected)
91+
92+
def test_parent_child_relationship_of_namespace_and_standard_pkgs(self):
93+
directory = TempDir()
94+
directory.add_file("foo/bar/biff/__init__.py")
95+
directory.add_file("foo/bar/biff/another_module.py")
96+
97+
expected = {
98+
directory.root() + "/foo",
99+
directory.root() + "/foo/bar",
100+
}
101+
actual = namespace_pkgs.implicit_namespace_packages(directory.root())
102+
self.assertEqual(actual, expected)
103+
104+
def test_parent_child_relationship_of_namespace_and_nested_standard_pkgs(self):
105+
directory = TempDir()
106+
directory.add_file("foo/bar/__init__.py")
107+
directory.add_file("foo/bar/biff/another_module.py")
108+
directory.add_file("foo/bar/biff/__init__.py")
109+
directory.add_file("foo/bar/boof/big_module.py")
110+
directory.add_file("foo/bar/boof/__init__.py")
111+
directory.add_file("fim/in_a_ns_pkg.py")
112+
113+
expected = {
114+
directory.root() + "/foo",
115+
directory.root() + "/fim",
116+
}
117+
actual = namespace_pkgs.implicit_namespace_packages(directory.root())
118+
self.assertEqual(actual, expected)
119+
120+
def test_recognized_all_nonstandard_module_types(self):
121+
directory = TempDir()
122+
directory.add_file("ayy/my_module.pyc")
123+
directory.add_file("bee/ccc/dee/eee.so")
124+
directory.add_file("eff/jee/aych.pyd")
125+
126+
expected = {
127+
directory.root() + "/ayy",
128+
directory.root() + "/bee",
129+
directory.root() + "/bee/ccc",
130+
directory.root() + "/bee/ccc/dee",
131+
directory.root() + "/eff",
132+
directory.root() + "/eff/jee",
133+
}
134+
actual = namespace_pkgs.implicit_namespace_packages(directory.root())
135+
self.assertEqual(actual, expected)
136+
137+
def test_skips_ignored_directories(self):
138+
directory = TempDir()
139+
directory.add_file("foo/boo/my_module.py")
140+
directory.add_file("foo/bar/another_module.py")
141+
142+
expected = {
143+
directory.root() + "/foo",
144+
directory.root() + "/foo/bar",
145+
}
146+
actual = namespace_pkgs.implicit_namespace_packages(
147+
directory.root(),
148+
ignored_dirnames=[directory.root() + "/foo/boo"],
149+
)
150+
self.assertEqual(actual, expected)
151+
71152

72153
if __name__ == "__main__":
73154
unittest.main()

0 commit comments

Comments
 (0)
0