8000 bpo-6721: Hold logging locks across fork() (GH-4071) (#9291) · python/cpython@3b69993 · GitHub
[go: up one dir, main page]

Skip to content

Commit 3b69993

Browse files
miss-islingtongpshead
authored andcommitted
bpo-6721: Hold logging locks across fork() (GH-4071) (#9291)
bpo-6721: When os.fork() was called while another thread holds a logging lock, the child process may deadlock when it tries to log. This fixes that by acquiring all logging locks before fork and releasing them afterwards. A regression test that fails before this change is included. Within the new unittest itself: There is a small _potential_ due to mixing of fork and a thread in the child process if the parent's thread happened to hold a non-reentrant library call lock (malloc?) when the os.fork() happens. buildbots and time will tell if this actually manifests itself in this test or not. :/ A functionality test that avoids that would be a challenge. An alternate test that isn't trying to produce the deadlock itself but just checking that the release and acquire calls are made would be the next best alternative if so. (cherry picked from commit 1900384) Co-authored-by: Gregory P. Smith <greg@krypto.org> [Google]
1 parent 1a21893 commit 3b69993

File tree

3 files changed

+119
-0
lines changed

3 files changed

+119
-0
lines changed

Lib/logging/__init__.py

Lines changed: 50 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -225,6 +225,55 @@ def _releaseLock():
225225
if _lock:
226226
_lock.release()
227227

228+
229+
# Prevent a held logging lock from blocking a child from logging.
230+
231+
if not hasattr(os, 'register_at_fork'): # Windows and friends.
232+
def _register_at_fork_acquire_release(instance):
233+
pass # no-op when os.register_at_fork does not exist.
234+
else: # The os.register_at_fork API exists
235+
os.register_at_fork(before=_acquireLock,
236+
after_in_child=_releaseLock,
237+
after_in_parent=_releaseLock)
238+
239+
# A collection of instances with acquire and release methods (logging.Handler)
240+
# to be called before and after fork. The weakref avoids us keeping discarded
241+
# Handler instances alive forever in case an odd program creates and destroys
242+
# many over its lifetime.
243+
_at_fork_acquire_release_weakset = weakref.WeakSet()
244+
245+
246+
def _register_at_fork_acquire_release(instance):
247+
# We put the instance itself in a single WeakSet as we MUST have only
248+
# one atomic weak ref. used by both before and after atfork calls to
249+
# guarantee matched pairs of acquire and release calls.
250+
_at_fork_acquire_release_weakset.add(instance)
251+
252+
253+
def _at_fork_weak_calls(method_name):
254+
for instance in _at_fork_acquire_release_weakset:
255+
method = getattr(instance, method_name)
256+
try:
257+
method()
258+
except Exception as err:
259+
# Similar to what PyErr_WriteUnraisable does.
260+
print("Ignoring exception from logging atfork", instance,
261+
method_name, "method:", err, file=sys.stderr)
262+
263+
264+
def _before_at_fork_weak_calls():
265+
_at_fork_weak_calls('acquire')
266+
267+
268+
def _after_at_fork_weak_calls():
269+
_at_fork_weak_calls('release')
270+
271+
272+
os.register_at_fork(before=_before_at_fork_weak_calls,
273+
after_in_child=_after_at_fork_weak_calls,
274+
after_in_parent=_after_at_fork_weak_calls)
275+
276+
228277
#---------------------------------------------------------------------------
229278
# The logging record
230279
#---------------------------------------------------------------------------
@@ -795,6 +844,7 @@ def createLock(self):
795844
Acquire a thread lock for serializing access to the underlying I/O.
796845
"""
797846
self.lock = threading.RLock()
847+
_register_at_fork_acquire_release(self)
798848

799849
def acquire(self):
800850
"""

Lib/test/test_logging.py

Lines changed: 67 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,7 @@
3535
import queue
3636
import random
3737
import re
38+
import< 8000 /span> signal
3839
import socket
3940
import struct
4041
import sys
@@ -666,6 +667,72 @@ def remove_loop(fname, tries):
666667
if os.path.exists(fn):
667668
os.unlink(fn)
668669

670+
# The implementation relies on os.register_at_fork existing, but we test
671+
# based on os.fork existing because that is what users and this test use.
672+
# This helps ensure that when fork exists (the important concept) that the
673+
# register_at_fork mechanism is also present and used.
674+
@unittest.skipIf(not hasattr(os, 'fork'), 'Test requires os.fork().')
675+
def test_post_fork_child_no_deadlock(self):
676+
"""Ensure forked child logging locks are not held; bpo-6721."""
677+
refed_h = logging.Handler()
678+
refed_h.name = 'because we need at least one for this test'
679+
self.assertGreater(len(logging._handlers), 0)
680+
681+
locks_held__ready_to_fork = threading.Event()
682+
fork_happened__release_locks_and_end_thread = threading.Event()
683+
684+
def lock_holder_thread_fn():
685+
logging._acquireLock()
686+
try:
687+
refed_h.acquire()
688+
try:
689+
# Tell the main thread to do the fork.
690+
locks_held__ready_to_fork.set()
691+
692+
# If the deadlock bug exists, the fork will happen
693+
# without dealing with the locks we hold, deadlocking
694+
# the child.
695+
696+
# Wait for a successful fork or an unreasonable amount of
697+
# time before releasing our locks. To avoid a timing based
698+
# test we'd need communication from os.fork() as to when it
699+
# has actually happened. Given this is a regression test
700+
# for a fixed issue, potentially less reliably detecting
701+
# regression via timing is acceptable for simplicity.
702+
# The test will always take at least this long. :(
703+
fork_happened__release_locks_and_end_thread.wait(0.5)
704+
finally:
705+
refed_h.release()
706+
finally:
707+
logging._releaseLock()
708+
709+
lock_holder_thread = threading.Thread(
710+
target=lock_holder_thread_fn,
711+
name='test_post_fork_child_no_deadlock lock holder')
712+
lock_holder_thread.start()
713+
714+
locks_held__ready_to_fork.wait()
715+
pid = os.fork()
716+
if pid == 0: # Child.
717+
logging.error(r'Child process did not deadlock. \o/')
718+
os._exit(0)
719+
else: # Parent.
720+
fork_happened__release_locks_and_end_thread.set()
721+
lock_holder_thread.join()
722+
start_time = time.monotonic()
723+
while True:
724+
waited_pid, status = os.waitpid(pid, os.WNOHANG)
725+
if waited_pid == pid:
726+
break # child process exited.
727+
if time.monotonic() - start_time > 7:
728+
break # so long? implies child deadlock.
729+
time.sleep(0.05)
730+
if waited_pid != pid:
731+
os.kill(pid, signal.SIGKILL)
732+
waited_pid, status = os.waitpid(pid, 0)
733+
self.fail("child process deadlocked.")
734+
self.assertEqual(status, 0, msg="child process error")
735+
669736

670737
class BadStream(object):
671738
def write(self, data):
Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
Acquire the logging module's commonly used internal locks while fork()ing to
2+
avoid deadlocks in the child process.

0 commit comments

Comments
 (0)
0