-
-
Notifications
You must be signed in to change notification settings - Fork 32.5k
gh-114763: Protect lazy loading modules from attribute access race #114781
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 9 commits
d57f4eb
3db717c
5424d0c
96c08c5
3642ddc
46238e5
6accf0c
bb2292f
57fe084
40383a0
6322dd6
023d65d
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -13,6 +13,7 @@ | |
|
||
import _imp | ||
import sys | ||
import threading | ||
import types | ||
|
||
|
||
|
@@ -171,36 +172,54 @@ class _LazyModule(types.ModuleType): | |
|
||
def __getattribute__(self, attr): | ||
"""Trigger the load of the module and return the attribute.""" | ||
# All module metadata must be garnered from __spec__ in order to avoid | ||
# using mutated values. | ||
# Stop triggering this method. | ||
self.__class__ = types.ModuleType | ||
# Get the original name to make sure no object substitution occurred | ||
# in sys.modules. | ||
original_name = self.__spec__.name | ||
# Figure out exactly what attributes were mutated between the creation | ||
# of the module and now. | ||
attrs_then = self.__spec__.loader_state['__dict__'] | ||
attrs_now = self.__dict__ | ||
attrs_updated = {} | ||
for key, value in attrs_now.items(): | ||
# Code that set the attribute may have kept a reference to the | ||
# assigned object, making identity more important than equality. | ||
if key not in attrs_then: | ||
attrs_updated[key] = value | ||
elif id(attrs_now[key]) != id(attrs_then[key]): | ||
attrs_updated[key] = value | ||
self.__spec__.loader.exec_module(self) | ||
# If exec_module() was used directly there is no guarantee the module | ||
# object was put into sys.modules. | ||
if original_name in sys.modules: | ||
if id(self) != id(sys.modules[original_name]): | ||
raise ValueError(f"module object for {original_name!r} " | ||
"substituted in sys.modules during a lazy " | ||
"load") | ||
# Update after loading since that's what would happen in an eager | ||
# loading situation. | ||
self.__dict__.update(attrs_updated) | ||
__spec__ = object.__getattribute__(self, '__spec__') | ||
loader_state = __spec__.loader_state | ||
with loader_state['lock']: | ||
# Only the first thread to get the lock should trigger the load | ||
# and reset the module's class. The rest can now getattr(). | ||
if object.__getattribute__(self, '__class__') is _LazyModule: | ||
# The first thread comes here multiple times as it descends the | ||
# call stack. The first time, it sets is_loading and triggers | ||
# exec_module(), which will access module.__dict__, module.__name__, | ||
# and/or module.__spec__, reentering this method. These accesses | ||
# need to be allowed to proceed without triggering the load again. | ||
if loader_state['is_loading'].is_set() and attr.startswith('__') and attr.endswith('__'): | ||
return object.__getattribute__(self, attr) | ||
loader_state['is_loading'].set() | ||
effigies marked this conversation as resolved.
Show resolved
Hide resolved
|
||
|
||
__dict__ = object.__getattribute__(self, '__dict__') | ||
|
||
# All module metadata must be gathered from __spec__ in order to avoid | ||
# using mutated values. | ||
# Get the original name to make sure no object substitution occurred | ||
# in sys.modules. | ||
original_name = __spec__.name | ||
# Figure out exactly what attributes were mutated between the creation | ||
# of the module and now. | ||
attrs_then = loader_state['__dict__'] | ||
attrs_now = __dict__ | ||
attrs_updated = {} | ||
for key, value in attrs_now.items(): | ||
# Code that set an attribute may have kept a reference to the | ||
# assigned object, making identity more important than equality. | ||
if key not in attrs_then: | ||
attrs_updated[key] = value | ||
elif id(attrs_now[key]) != id(attrs_then[key]): | ||
attrs_updated[key] = value | ||
__spec__.loader.exec_module(self) | ||
# If exec_module() was used directly there is no guarantee the module | ||
# object was put into sys.modules. | ||
if original_name in sys.modules: | ||
if id(self) != id(sys.modules[original_name]): | ||
raise ValueError(f"module object for {original_name!r} " | ||
"substituted in sys.modules during a lazy " | ||
"load") | ||
# Update after loading since that's what would happen in an eager | ||
# loading situation. | ||
__dict__.update(attrs_updated) | ||
# Finally, stop triggering this method. | ||
self.__class__ = types.ModuleType | ||
|
||
return getattr(self, attr) | ||
|
||
def __delattr__(self, attr): | ||
|
@@ -244,5 +263,7 @@ def exec_module(self, module): | |
loader_state = {} | ||
loader_state['__dict__'] = module.__dict__.copy() | ||
loader_state['__class__'] = module.__class__ | ||
loader_state['lock'] = threading.RLock() | ||
loader_state['is_loading'] = threading.Event() | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Hi, can we use a Using an additional There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I agree, good catch. I will switch to a bool and push later today. |
||
module.__spec__.loader_state = loader_state | ||
module.__class__ = _LazyModule |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,3 @@ | ||
Protect modules loaded with :class:`importlib.util.LazyLoader` from race | ||
conditions when multiple threads try to access attributes before the loading | ||
is complete. |
Uh oh!
There was an error while loading. Please reload this page.