From 2d54d8ef91085cc7ac6484e22b21713df78afeb5 Mon Sep 17 00:00:00 2001 From: "Gregory P. Smith" Date: Mon, 22 May 2023 18:39:56 -0700 Subject: [PATCH 1/4] gh-104372: Drop the GIL around the vfork() call. On Linux where the `subprocess` module can use the `vfork` syscall for faster spawning, prevent the parent process from blocking other threads by dropping the GIL while it waits for the vfork'ed child process `exec` outcome. This prevents spawning a binary from a slow filesystem from blocking the rest of the application. Fixes #104372. --- ...-05-22-18-39-53.gh-issue-104372.7tDRaK.rst | 5 +++++ Modules/_posixsubprocess.c | 21 ++++++++++++++++++- 2 files changed, 25 insertions(+), 1 deletion(-) create mode 100644 Misc/NEWS.d/next/Library/2023-05-22-18-39-53.gh-issue-104372.7tDRaK.rst diff --git a/Misc/NEWS.d/next/Library/2023-05-22-18-39-53.gh-issue-104372.7tDRaK.rst b/Misc/NEWS.d/next/Library/2023-05-22-18-39-53.gh-issue-104372.7tDRaK.rst new file mode 100644 index 00000000000000..ea13ec85543ca2 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2023-05-22-18-39-53.gh-issue-104372.7tDRaK.rst @@ -0,0 +1,5 @@ +On Linux where :mod:`subprocess` can use the ``vfork()`` syscall for faster +spawning, prevent the parent process from blocking other threads by dropping +the GIL while it waits for the vfork'ed child process ``exec()`` outcome. +This prevents spawning a binary from a slow filesystem from blocking the +rest of the application. diff --git a/Modules/_posixsubprocess.c b/Modules/_posixsubprocess.c index 1b7fe71186a163..af79f032a77dad 100644 --- a/Modules/_posixsubprocess.c +++ b/Modules/_posixsubprocess.c @@ -559,7 +559,7 @@ reset_signal_handlers(const sigset_t *child_sigmask) * required by POSIX but not supported natively on Linux. Another reason to * avoid this family of functions is that sharing an address space between * processes running with different privileges is inherently insecure. - * See bpo-35823 for further discussion and references. + * See https://bugs.python.org/issue35823 for discussion and references. * * In some C libraries, setrlimit() has the same thread list/signalling * behavior since resource limits were per-thread attributes before @@ -798,6 +798,7 @@ do_fork_exec(char *const exec_array[], pid_t pid; #ifdef VFORK_USABLE + PyThreadState *vfork_tstate_save = NULL; if (child_sigmask) { /* These are checked by our caller; verify them in debug builds. */ assert(uid == (uid_t)-1); @@ -805,8 +806,20 @@ do_fork_exec(char *const exec_array[], assert(extra_group_size < 0); assert(preexec_fn == Py_None); + /* Drop the GIL so that other threads can continue execution while this + * thread in the parent remains blocked per vfork-semantics on the + * child's exec syscall outcome. Exec requires filesystem access which + * can take an arbitrarily long time. This addresses GH-104372. + * + * The vfork'ed child still runs in our address space. Per POSIX it + * must be limited to nothing but exec, but the Linux implementation + * is a little more usable. See the child_exec() comment. + */ + vfork_tstate_save = PyEval_SaveThread(); pid = vfork(); if (pid == (pid_t)-1) { + PyEval_RestoreThread(vfork_tstate_save); + vfork_tstate_save = NULL; /* If vfork() fails, fall back to using fork(). When it isn't * allowed in a process by the kernel, vfork can return -1 * with errno EINVAL. https://bugs.python.org/issue47151. */ @@ -819,6 +832,12 @@ do_fork_exec(char *const exec_array[], } if (pid != 0) { + // Parent process. +#ifdef VFORK_USABLE + if (vfork_tstate_save != NULL) { + PyEval_RestoreThread(vfork_tstate_save); + } +#endif return pid; } From eb385733ce276040999187f711a6520e2578c828 Mon Sep 17 00:00:00 2001 From: "Gregory P. Smith" Date: Tue, 23 May 2023 22:56:09 -0700 Subject: [PATCH 2/4] Simplify: avoid -Wclobbered the compiler warning. Don't write to the vfork_tstate_save pointer local after vfork. --- Modules/_posixsubprocess.c | 15 ++++++--------- 1 file changed, 6 insertions(+), 9 deletions(-) diff --git a/Modules/_posixsubprocess.c b/Modules/_posixsubprocess.c index af79f032a77dad..719b0b7b739aa8 100644 --- a/Modules/_posixsubprocess.c +++ b/Modules/_posixsubprocess.c @@ -798,7 +798,7 @@ do_fork_exec(char *const exec_array[], pid_t pid; #ifdef VFORK_USABLE - PyThreadState *vfork_tstate_save = NULL; + PyThreadState *vfork_tstate_save; if (child_sigmask) { /* These are checked by our caller; verify them in debug builds. */ assert(uid == (uid_t)-1); @@ -808,7 +808,7 @@ do_fork_exec(char *const exec_array[], /* Drop the GIL so that other threads can continue execution while this * thread in the parent remains blocked per vfork-semantics on the - * child's exec syscall outcome. Exec requires filesystem access which + * child's exec syscall outcome. Exec does filesystem access which * can take an arbitrarily long time. This addresses GH-104372. * * The vfork'ed child still runs in our address space. Per POSIX it @@ -817,9 +817,11 @@ do_fork_exec(char *const exec_array[], */ vfork_tstate_save = PyEval_SaveThread(); pid = vfork(); - if (pid == (pid_t)-1) { + if (pid != 0) { + // Not in the child process, reacquire the GIL. PyEval_RestoreThread(vfork_tstate_save); - vfork_tstate_save = NULL; + } + if (pid == (pid_t)-1) { /* If vfork() fails, fall back to using fork(). When it isn't * allowed in a process by the kernel, vfork can return -1 * with errno EINVAL. https://bugs.python.org/issue47151. */ @@ -833,11 +835,6 @@ do_fork_exec(char *const exec_array[], if (pid != 0) { // Parent process. -#ifdef VFORK_USABLE - if (vfork_tstate_save != NULL) { - PyEval_RestoreThread(vfork_tstate_save); - } -#endif return pid; } From 9197a4f474abc5caa202aa1ae6dbc1402a4c84c6 Mon Sep 17 00:00:00 2001 From: "Gregory P. Smith" Date: Tue, 23 May 2023 23:32:56 -0700 Subject: [PATCH 3/4] Docs: clarify that timeout= can't cover process creation. --- Doc/library/subprocess.rst | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/Doc/library/subprocess.rst b/Doc/library/subprocess.rst index 53dfbf827260c9..738e611c05adbf 100644 --- a/Doc/library/subprocess.rst +++ b/Doc/library/subprocess.rst @@ -57,10 +57,13 @@ underlying :class:`Popen` interface can be used directly. and combine both streams into one, use ``stdout=PIPE`` and ``stderr=STDOUT`` instead of *capture_output*. - The *timeout* argument is passed to :meth:`Popen.communicate`. If the timeout - expires, the child process will be killed and waited for. The - :exc:`TimeoutExpired` exception will be re-raised after the child process - has terminated. + A *timeout* may be specified in seconds, it is internally passed on to + :meth:`Popen.communicate`. If the timeout expires, the child process will be + killed and waited for. The :exc:`TimeoutExpired` exception will be + re-raised after the child process has terminated. The initial process + creation itself cannot be interrupted on many platform APIs so you are not + guaranteed to see a timeout exception until at least after however long + process creation takes. The *input* argument is passed to :meth:`Popen.communicate` and thus to the subprocess's stdin. If used it must be a byte sequence, or a string if @@ -734,7 +737,7 @@ arguments. code. All of the functions and methods that accept a *timeout* parameter, such as -:func:`call` and :meth:`Popen.communicate` will raise :exc:`TimeoutExpired` if +:func:`run` and :meth:`Popen.communicate` will raise :exc:`TimeoutExpired` if the timeout expires before the process exits. Exceptions defined in this module all inherit from :exc:`SubprocessError`. From df7f7ab4587b5cc59bfe94dfe258e9ab8b19e974 Mon Sep 17 00:00:00 2001 From: "Gregory P. Smith" Date: Thu, 25 May 2023 12:51:34 -0700 Subject: [PATCH 4/4] update the comment --- Modules/_posixsubprocess.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/Modules/_posixsubprocess.c b/Modules/_posixsubprocess.c index 5807b0c56a94e1..36470804c6a165 100644 --- a/Modules/_posixsubprocess.c +++ b/Modules/_posixsubprocess.c @@ -813,7 +813,8 @@ do_fork_exec(char *const exec_array[], * * The vfork'ed child still runs in our address space. Per POSIX it * must be limited to nothing but exec, but the Linux implementation - * is a little more usable. See the child_exec() comment. + * is a little more usable. See the child_exec() comment - The child + * MUST NOT re-acquire the GIL. */ vfork_tstate_save = PyEval_SaveThread(); pid = vfork();