8000 [3.12] gh-117378: Fix multiprocessing forkserver preload sys.path inh… · python/cpython@7bb92ed · GitHub
[go: up one dir, main page]

Skip to content

Commit 7bb92ed

Browse files
[3.12] gh-117378: Fix multiprocessing forkserver preload sys.path inheritance. (GH-126538) (GH-126633)
gh-117378: Fix multiprocessing forkserver preload sys.path inheritance. `sys.path` was not properly being sent from the parent process when launching the multiprocessing forkserver process to preload imports. This bug has been there since the forkserver start method was introduced in Python 3.4. It was always _supposed_ to inherit `sys.path` the same way the spawn method does. Observable behavior change: A `''` value in `sys.path` will now be replaced in the forkserver's `sys.path` with an absolute pathname `os.path.abspath(os.getcwd())` saved at the time that `multiprocessing` was imported in the parent process as it already was when using the spawn start method. **This will only be observable during forkserver preload imports**. The code invoked before calling things in another process already correctly sets `sys.path`. Which is likely why this went unnoticed for so long as a mere performance issue in some configurations. A workaround for the bug on impacted Pythons is to set PYTHONPATH in the environment before multiprocessing's forkserver process was started. Not perfect as that is then inherited by other children, etc, but likely good enough for many people's purposes. (cherry picked from commit 9d08423) Co-authored-by: Serhiy Storchaka <storchaka@gmail.com>
1 parent 5fb443d commit 7bb92ed

File tree

3 files changed

+97
-0
lines changed

3 files changed

+97
-0
lines changed

Lib/multiprocessing/forkserver.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -167,6 +167,8 @@ def ensure_running(self):
167167
def main(listener_fd, alive_r, preload, main_path=None, sys_path=None):
168168
'''Run forkserver.'''
169169
if preload:
170+
if sys_path is not None:
171+
sys.path[:] = sys_path
170172
if '__main__' in preload and main_path is not None:
171173
process.current_process()._inheriting = True
172174
try:

Lib/test/_test_multiprocessing.py

Lines changed: 78 additions & 0 deletions
< 8000 td data-grid-cell-id="diff-c3db62f7c1928499afd71830c22c8bb8f56843a4533cecf69137973d73c489fc-6198-6239-0" data-selected="false" role="gridcell" style="background-color:var(--diffBlob-additionNum-bgColor, var(--diffBlob-addition-bgColor-num));text-align:center" tabindex="-1" valign="top" class="focusable-grid-cell diff-line-number position-relative left-side">
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@
1212
import sys
1313
import os
1414
import gc
15+
import importlib
1516
import errno
1617
import functools
1718
import signal
@@ -20,8 +21,10 @@
2021
import socket
2122
import random
2223
import logging
24+
import shutil
2325
import subprocess
2426
import struct
27+
import tempfile
2528
import operator
2629
import pickle
2730
import weakref
@@ -6196,6 +6199,81 @@ def submain(): pass
61966199
self.assertFalse(err, msg=err.decode('utf-8'))
61976200

61986201

6202+
class _TestSpawnedSysPath(BaseTestCase):
6203+
"""Test that sys.path is setup in forkserver and spawn processes."""
6204+
6205+
ALLOWED_TYPES = ('processes',)
6206+
6207+
def setUp(self):
6208+
self._orig_sys_path = list(sys.path)
6209+
self._temp_dir = tempfile.mkdtemp(prefix="test_sys_path-")
6210+
self._mod_name = "unique_test_mod"
6211+
module_path = os.path.join(self._temp_dir, f"{self._mod_name}.py")
6212+
with open(module_path, "w", encoding="utf-8") as mod:
6213+
mod.write("# A simple test module\n")
6214+
sys.path[:] = [p for p in sys.path if p] # remove any existing ""s
6215+
sys.path.insert(0, self._temp_dir)
6216+
sys.path.insert(0, "") # Replaced with an abspath in child.
6217+
try:
6218+
self._ctx_forkserver = multiprocessing.get_context("forkserver")
6219+
except ValueError:
6220+
self._ctx_forkserver = None
6221+
self._ctx_spawn = multiprocessing.get_context("spawn")
6222+
6223+
def tearDown(self):
6224+
sys.path[:] = self._orig_sys_path
6225+
shutil.rmtree(self._temp_dir, ignore_errors=True)
6226+
6227+
@staticmethod
6228+
def enq_imported_module_names(queue):
6229+
queue.put(tuple(sys.modules))
6230+
6231+
def test_forkserver_preload_imports_sys_path(self):
6232+
ctx = self._ctx_forkserver
6233+
if not ctx:
6234+
self.skipTest("requires forkserver start method.")
6235+
self.assertNotIn(self._mod_name, sys.modules)
6236+
multiprocessing.forkserver._forkserver._stop() # Must be fresh.
6237+
ctx.set_forkserver_preload(
6238+
["test.test_multiprocessing_forkserver", self._mod_name])
6239+
q = ctx.Queue()
6240+
proc = ctx.Process(target=self.enq_imported_module_names, args=(q,))
6241+
proc.start()
6242+
proc.join()
6243+
child_imported_modules = q.get()
6244+
q.close()
6245+
self.assertIn(self._mod_name, child_imported_modules)
6246+
6247+
@staticmethod
6248+
def enq_sys_path_and_import(queue, mod_name):
6249+
queue.put(sys.path)
6250+
try:
6251+
importlib.import_module(mod_name)
6252+
except ImportError as exc:
6253+
queue.put(exc)
6254+
else:
6255+
queue.put(None)
6256+
6257+
def test_child_sys_path(self):
6258+
for ctx in (self._ctx_spawn, self._ctx_forkserver):
6259+
if not ctx:
6260+
continue
6261+
with self.subTest(f"{ctx.get_start_method()} start method"):
6262+
q = ctx.Queue()
6263+
proc = ctx.Process(target=self.enq_sys_path_and_import,
6264+
args=(q, self._mod_name))
6265+
proc.start()
6266+
proc.join()
6267+
child_sys_path = q.get()
6268+
import_error = q.get()
6269+
q.close()
6270+
self.assertNotIn("", child_sys_path) # replaced by an abspath
6271+
self.assertIn(self._temp_dir, child_sys_path) # our addition
6272+
# ignore the first element, it is the absolute "" replacement
6273+
self.assertEqual(child_sys_path[1:], sys.path[1:])
6274+
self.assertIsNone(import_error, msg=f"child could not import {self._mod_name}")
6275+
6276+
61996277
class MiscTestCase(unittest.TestCase):
62006278
def test__all__(self):
62016279
# Just make sure names in not_exported are excluded
Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,17 @@
1+
Fixed the :mod:`multiprocessing` ``"forkserver"`` start method forkserver
2+
process to correctly inherit the parent's :data:`sys.path` during the importing
3+
of :func:`multiprocessing.set_forkserver_preload` modules in the same manner as
4+
:data:`sys.path` is configured in workers before executing work items.
5+
6+
This bug caused some forkserver module preloading to silently fail to preload.
7+
This manifested as a performance degration in child processes when the
8+
``sys.path`` was required due to additional repeated work in every worker.
9+
10+
It could also have a side effect of ``""`` remaining in :data:`sys.path` during
11+
forkserver preload imports instead of the absolute path from :func:`os.getcwd`
12+
at multiprocessing import time used in the worker ``sys.path``.
13+
14+
Potentially leading to incorrect imports from the wrong location during
15+
preload. We are unaware of that actually happening. The issue was discovered
16+
by someone observing unexpected preload performance gains.
17+

0 commit comments

Comments
 (0)
0