10000 bpo-37531: regrtest ignores output on timeout (GH-16659) · python/cpython@598bfa4 · GitHub
[go: up one dir, main page]

Skip to content

Commit 598bfa4

Browse files
bpo-37531: regrtest ignores output on timeout (GH-16659)
bpo-37531, bpo-38207: On timeout, regrtest no longer attempts to call `popen.communicate() again: it can hang until all child processes using stdout and stderr pipes completes. Kill the worker process and ignores its output. Reenable test_regrtest.test_multiprocessing_timeout(). bpo-37531: Change also the faulthandler timeout of the main process from 1 minute to 5 minutes, for Python slowest buildbots. (cherry picked from commit 0ec618a) Co-authored-by: Victor Stinner <vstinner@python.org>
1 parent 0b354fc commit 598bfa4

File tree

2 files changed

+42
-27
lines changed

2 files changed

+42
-27
lines changed

Lib/test/libregrtest/runtest_mp.py

Lines changed: 37 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,12 @@
2222
PROGRESS_UPDATE = 30.0 # seconds
2323
assert PROGRESS_UPDATE >= PROGRESS_MIN_TIME
2424

25+
# Kill the main process after 5 minutes. It is supposed to write an update
26+
# every PROGRESS_UPDATE seconds. Tolerate 5 minutes for Python slowest
27+
# buildbot workers.
28+
MAIN_PROCESS_TIMEOUT = 5 * 60.0
29+
assert MAIN_PROCESS_TIMEOUT >= PROGRESS_UPDATE
30+
2531
# Time to wait until a worker completes: should be immediate
2632
JOIN_TIMEOUT = 30.0 # seconds
2733

@@ -159,20 +165,6 @@ def mp_result_error(self, test_name, error_type, stdout='', stderr='',
159165
result = TestResult(test_name, error_type, test_time, None)
160166
return MultiprocessResult(result, stdout, stderr, err_msg)
161167

162-
def _timedout(self, test_name):
163-
self._kill()
164-
165-
stdout = stderr = ''
166-
popen = self._popen
167-
try:
168-
stdout, stderr = popen.communicate(timeout=JOIN_TIMEOUT)
169-
except (subprocess.TimeoutExpired, OSError) as exc:
170-
print_warning(f"Failed to read {self} output "
171-
f"(timeout={format_duration(JOIN_TIMEOUT)}): "
172-
f"{exc!r}")
173-
174-
return self.mp_result_error(test_name, TIMEOUT, stdout, stderr)
175-
176168
def _run_process(self, test_name):
177169
self.start_time = time.monotonic()
178170

@@ -195,23 +187,32 @@ def _run_process(self, test_name):
195187

196188
try:
197189
stdout, stderr = popen.communicate(timeout=self.timeout)
190+
retcode = popen.returncode
191+
assert retcode is not None
198192
except subprocess.TimeoutExpired:
199193
if self._stopped:
200194
# kill() has been called: communicate() fails
201195
# on reading closed stdout/stderr
202196
raise ExitThread
203197

204-
return self._timedout(test_name)
198+
# On timeout, kill the process
199+
self._kill()
200+
201+
# None means TIMEOUT for the caller
202+
retcode = None
203+
# bpo-38207: Don't attempt to call communicate() again: on it
204+
# can hang until all child processes using stdout and stderr
205+
# pipes completes.
206+
stdout = stderr = ''
205207
except OSError:
206208
if self._stopped:
207209
# kill() has been called: communicate() fails
208210
# on reading closed stdout/stderr
209211
raise ExitThread
210212
raise
211-
212-
retcode = popen.returncode
213-
stdout = stdout.strip()
214-
stderr = stderr.rstrip()
213+
else:
214+
stdout = stdout.strip()
215+
stderr = stderr.rstrip()
215216

216217
return (retcode, stdout, stderr)
217218
except:
@@ -223,13 +224,10 @@ def _run_process(self, test_name):
223224
self.current_test_name = None
224225

225226
def _runtest(self, test_name):
226-
result = self._run_process(test_name)
227-
228-
if isinstance(result, MultiprocessResult):
229-
# _timedout() case
230-
return result
227+
retcode, stdout, stderr = self._run_process(test_name)
231228

232-
retcode, stdout, stderr = result
229+
if retcode is None:
230+
return self.mp_result_error(test_name, TIMEOUT, stdout, stderr)
233231

234232
err_msg = None
235233
if retcode != 0:
@@ -248,7 +246,8 @@ def _runtest(self, test_name):
248246
err_msg = "Failed to parse worker JSON: %s" % exc
249247

250248
if err_msg is not None:
251-
return self.mp_result_error(test_name, CHILD_ERROR, stdout, stderr, err_msg)
249+
return self.mp_result_error(test_name, CHILD_ERROR,
250+
stdout, stderr, err_msg)
252251

253252
return MultiprocessResult(result, stdout, stderr, err_msg)
254253

@@ -287,6 +286,16 @@ def _wait_completed(self):
287286
f"{exc!r}")
288287

289288
def wait_stopped(self, start_time):
289+
# bpo-38207: MultiprocessTestRunner.stop_workers() called self.stop()
290+
# which killed the process. Sometimes, killing the process from the
291+
# main thread does not interrupt popen.communicate() in
292+
# TestWorkerProcess thread. This loop with a timeout is a workaround
293+
# for that.
294+
#
295+
# Moreover, if this method fails to join the thread, it is likely
296+
# that Python will hang at exit while calling threading._shutdown()
297+
# which tries again to join the blocked thread. Regrtest.main()
298+
# uses EXIT_TIMEOUT to workaround this second bug.
290299
while True:
291300
# Write a message every second
292301
self.join(1.0)
@@ -353,7 +362,8 @@ def _get_result(self):
353362
timeout = PROGRESS_UPDATE
354363
while True:
355364
if use_faulthandler:
356-
faulthandler.dump_traceback_later(timeout * 2.0, exit=True)
365+
faulthandler.dump_traceback_later(MAIN_PROCESS_TIMEOUT,
366+
exit=True)
357367

358368
# wait for a thread
359369
try:
Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
On timeout, regrtest no longer attempts to call ``popen.communicate()``
2+
again: it can hang until all child processes using stdout and stderr pipes
3+
completes. Kill the worker process and ignores its output. Change also the
4+
faulthandler timeout of the main process from 1 minute to 5 minutes, for Python
5+
slowest buildbots.

0 commit comments

Comments
 (0)
0