8000 bpo-6721: Hold logging locks across fork() (GH-4071) · python/cpython@1900384 · GitHub
[go: up one dir, main page]

Skip to content

Commit 1900384

Browse files
authored
bpo-6721: Hold logging locks across fork() (GH-4071)
bpo-6721: When os.fork() was called while another thread holds a logging lock, the child process may deadlock when it tries to log. This fixes that by acquiring all logging locks before fork and releasing them afterwards. A regression test that fails before this change is included. Within the new unittest itself: There is a small _potential_ due to mixing of fork and a thread in the child process if the parent's thread happened to hold a non-reentrant library call lock (malloc?) when the os.fork() happens. buildbots and time will tell if this actually manifests itself in this test or not. :/ A functionality test that avoids that would be a challenge. An alternate test that isn't trying to produce the deadlock itself but just checking that the release and acquire calls are made would be the next best alternative if so.
1 parent ea13740 commit 1900384

File tree

3 files changed

+119
-0
lines changed

3 files changed

+119
-0
lines changed

Lib/logging/__init__.py

Lines changed: 50 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -225,6 +225,55 @@ def _releaseLock():
225225
if _lock:
226226
_lock.release()
227227

228+
229+
# Prevent a held logging lock from blocking a child from logging.
230+
231+
if not hasattr(os, 'register_at_fork'): # Windows and friends.
232+
def _register_at_fork_acquire_release(instance):
233+
pass # no-op when os.register_at_fork does not exist.
234+
else: # The os.register_at_fork API exists
235+
os.register_at_fork(before=_acquireLock,
236+
after_in_child=_releaseLock,
237+
after_in_parent=_releaseLock)
238+
239+
# A collection of instances with acquire and release methods (logging.Handler)
240+
# to be called before and after fork. The weakref avoids us keeping discarded
241+
# Handler instances alive forever in case an odd program creates and destroys
242+
# many over its lifetime.
243+
_at_fork_acquire_release_weakset = weakref.WeakSet()
244+
245+
246+
def _register_at_fork_acquire_release(instance):
247+
# We put the instance itself in a single WeakSet as we MUST have only
248+
# one atomic weak ref. used by both before and after atfork calls to
249+
# guarantee matched pairs of acquire and release calls.
250+
_at_fork_acquire_release_weakset.add(instance)
251+
252+
253+
def _at_fork_weak_calls(method_name):
254+
for instance in _at_fork_acquire_release_weakset:
255+
method = getattr(instance, method_name)
256+
try:
257+
method()
258+
except Exception as err:
259+
# Similar to what PyErr_WriteUnraisable does.
260+
print("Ignoring exception from logging atfork", instance,
261+
method_name, "method:", err, file=sys.stderr)
262+
263+
264+
def _before_at_fork_weak_calls():
265+
_at_fork_weak_calls('acquire')
266+
267+
268+
def _after_at_fork_weak_calls():
269+
_at_fork_weak_calls('release')
270+
271+
272+
os.register_at_fork(before=_before_at_fork_weak_calls,
273+
after_in_child=_after_at_fork_weak_calls,
274+
after_in_parent=_after_at_fork_weak_calls)
275+
276+
228277
#---------------------------------------------------------------------------
229278
# The logging record
230279
#---------------------------------------------------------------------------
@@ -795,6 +844,7 @@ def createLock(self):
795844
Acquire a thread lock for serializing access to the underlying I/O.
796845
"""
797846
self.lock = threading.RLock()
847+
_register_at_fork_acquire_release(self)
798848

799849
def acquire(self):
800850
"""

Lib/test/test_logging.py

Lines changed: 67 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,7 @@
3535
import queue
3636
import random
3737
import re
38+
import signal
3839
import socket
3940
import struct
4041
import sys
@@ -666,6 +667,72 @@ def remove_loop(fname, tries):
666667
if os.path.exists(fn):
667668
os.unlink(fn)
668669

670+
# The implementation relies on os.register_at_fork existing, but we test
671+
# based on os.fork existing because that is what users and this test use.
672+
# This helps ensure that when fork exists (the important concept) that the
673+
# register_at_fork mechanism is also present and used.
674+
@unittest.skipIf(not hasattr(os, 'fork'), 'Test requires os.fork().')
675+
def test_post_fork_child_no_deadlock(self):
676+
"""Ensure forked child logging locks are not held; bpo-6721."""
677+
refed_h = logging.Handler()
678+
refed_h.name = 'because we need at least one for this test'
679+
self.assertGreater(len(logging._handlers), 0)
680+
681+
locks_held__ready_to_fork = threading.Event()
682+
fork_happened__release_locks_and_end_thread = threading.Event()
683+
684+
def lock_holder_thread_fn():
685+
logging._acquireLock()
686+
try:
687+
refed_h.acquire()
688+
try:
689+
# Tell the main thread to do the fork.
690+
locks_held__ready_to_fork.set()
691+
692+
# If the deadlock bug exists, the fork will happen
693+
# without dealing with the locks we hold, deadlocking
694+
# the child.
695+
696+
# Wait for a successful fork or an unreasonable amount of
697+
# time before releasing our locks. To avoid a timing based
698+
# test we'd need communication from os.fork() as to when it
699+
# has actually happened. Given this is a regression test
700+
# for a fixed issue, potentially less reliably detecting
701+
# regression via timing is acceptable for simplicity.
702+
# The test will always take at least this long. :(
703+
fork_happened__release_locks_and_end_thread.wait(0.5)
704+
finally:
705+
refed_h.release()
706+
finally:
707+
logging._releaseLock()
708+
709+
lock_holder_thread = threading.Thread(
710+
target=lock_holder_thread_fn,
711+
name='test_post_fork_child_no_deadlock lock holder')
712+
lock_holder_thread.start()
713+
714+
locks_held__ready_to_fork.wait()
715+
pid = os.fork()
716+
if pid == 0: # Child.
717+
logging.error(r'Child process did not deadlock. \o/')
718+
os._exit(0)
719+
else: # Parent.
720+
fork_happened__release_locks_and_end_thread.set()
721+
lock_holder_thread.join()
722+
start_time = time.monotonic()
723+
while True:
724+
waited_pid, status = os.waitpid(pid, os.WNOHANG)
725+
if waited_pid == pid:
726+
break # child process exited.
727+
if time.monotonic() - start_time > 7:
728+
break # so long? implies child deadlock.
729+
time.sleep(0.05)
730+
if waited_pid != pid:
731+
os.kill(pid, signal.SIGKILL)
732+
waited_pid, status = os.waitpid(pid, 0)
733+
self.fail("child process deadlocked.")
734+
self.assertEqual(status, 0, msg="child process error")
735+
669736

670737
class BadStream(object):
671738
def write(self, data):
Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
Acquire the logging module's commonly used internal locks while fork()ing to
2+
avoid deadlocks in the child process.

0 commit comments

Comments
 (0)
0