8000 [3.5] bpo-29537: Tolerate legacy invalid bytecode (#169) · python/cpython@93602e3 · GitHub
[go: up one dir, main page]

Skip to content {"props":{"docsUrl":"https://docs.github.com/get-started/accessibility/keyboard-shortcuts"}}

Commit 93602e3

Browse files
authored
[3.5] bpo-29537: Tolerate legacy invalid bytecode (#169)
bpo-27286 fixed a problem where BUILD_MAP_UNPACK_WITH_CALL could be emitted with an incorrect oparg value, causing the eval loop to access the wrong stack entry when attempting to read the function name. The associated magic number change caused significant problems when attempting to upgrade to 3.5.3 for anyone that relies on pre-cached bytecode remaining valid across maintenance releases. This patch restores the ability to import legacy bytecode generated by 3.5.0, 3.5.1 or 3.5.2, and modifies the eval loop to avoid any harmful consequences from the potentially malformed legacy bytecode. Original import patch by Petr Viktorin, eval loop patch by Serhiy Storchaka, and tests and integration by Nick Coghlan.
1 parent bef209d commit 93602e3

File tree

13 files changed

+2823
-2607
lines changed

13 files changed

+2823
-2607
lines changed

Lib/importlib/_bootstrap_external.py

Lines changed: 26 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -230,7 +230,7 @@ def _write_atomic(path, data, mode=0o666):
230230
# Python 3.5b1 3330 (PEP 448: Additional Unpacking Generalizations)
231231
# Python 3.5b2 3340 (fix dictionary display evaluation order #11205)
232232
# Python 3.5b2 3350 (add GET_YIELD_FROM_ITER opcode #24400)
233-
# Python 3.5.2 3351 (fix BUILD_MAP_UNPACK_WITH_CALL opcode #27286)
233+
# Python 3.5.3 3351 (fix BUILD_MAP_UNPACK_WITH_CALL opcode #27286)
234234
#
235235
# MAGIC must change whenever the bytecode emitted by the compiler may no
236236
# longer be understood by older implementations of the eval loop (usually
@@ -242,6 +242,28 @@ def _write_atomic(path, data, mode=0o666):
242242
MAGIC_NUMBER = (3351).to_bytes(2, 'little') + b'\r\n'
243243
_RAW_MAGIC_NUMBER = int.from_bytes(MAGIC_NUMBER, 'little') # For import.c
244244

245+
# Issue #29537: handle issue27286 bytecode incompatibility
246+
#
247+
# The magic number bump in Python 3.5.3 for issue27286 turned out to create
248+
# significant backwards compatibility problems for redistributors and
249+
# other folks that rely on the bytecode format remaining stable within a
250+
# given maintenance release series. See http://bugs.python.org/issue29514
251+
# for more discussion of the problems that the original change caused.
252+
#
253+
# The _BACKCOMPAT_MAGIC_NUMBER below and any other changes marked with
254+
# "Issue #29537" comments allow Python 3.5.4+ to load bytecode files with both
255+
# the original 3.5.0 magic number and those with the updated magic number used
256+
# since 3.5.3.
257+
#
258+
# This is expected to be a one-off change used solely to restore legacy
259+
# bytecode compatibility within the 3.5.x series, so it avoids any changes
260+
# that would prompt a rebuild of C extension modules.
261+
#
262+
if _RAW_MAGIC_NUMBER != 168627479:
263+
_msg = 'Magic number mismatch (the issue27286 workaround is for 3.5 only)'
264+
raise SystemError(_msg)
265+
_BACKCOMPAT_MAGIC_NUMBER = (3350).to_bytes(2, 'little') + b'\r\n'
266+
245267
_PYCACHE = '__pycache__'
246268
_OPT = 'opt-'
247269

@@ -446,7 +468,9 @@ def _validate_bytecode_header(data, source_stats=None, name=None, path=None):
446468
magic = data[:4]
447469
raw_timestamp = data[4:8]
448470
raw_size = data[8:12]
449-
if magic != MAGIC_NUMBER:
471+
if (magic != MAGIC_NUMBER
472+
# Issue #29537: handle issue27286 bytecode incompatibility
473+
and magic != _BACKCOMPAT_MAGIC_NUMBER):
450474
message = 'bad magic number in {!r}: {!r}'.format(name, magic)
451475
_verbose_message('{}', message)
452476
raise ImportError(message, **exc_details)

Lib/importlib/util.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@
44
from ._bootstrap import _resolve_name
55
from ._bootstrap import spec_from_loader
66
from ._bootstrap import _find_spec
7-
from ._bootstrap_external import MAGIC_NUMBER
7+
from ._bootstrap_external import MAGIC_NUMBER, _BACKCOMPAT_MAGIC_NUMBER
88
from ._bootstrap_external import cache_from_source
99
from ._bootstrap_external import decode_source
1010
from ._bootstrap_external import source_from_cache

Lib/pkgutil.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -37,7 +37,10 @@ def read_code(stream):
3737
import marshal
3838

3939
magic = stream.read(4)
40-
if magic != importlib.util.MAGIC_NUMBER:
40+
if (magic != importlib.util.MAGIC_NUMBER
41+
# Issue #29537: handle issue27286 bytecode incompatibility
42+
# See Lib/importlib/_bootstrap_external.py
43+
and magic != importlib.util._BACKCOMPAT_MAGIC_NUMBER):
4144
return None
4245

4346
stream.read(8) # Skip timestamp and size

Lib/pydoc.py

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -289,7 +289,12 @@ def importfile(path):
289289
"""Import a Python source file or compiled file given its path."""
290290
magic = importlib.util.MAGIC_NUMBER
291291
with open(path, 'rb') as file:
292-
is_bytecode = magic == file.read(len(magic))
292+
first_bytes = file.read(len(magic))
293+
is_bytecode = first_bytes in (magic,
294+
# Issue #29537: handle issue27286
295+
# bytecode incompatibility
296+
# See Lib/importlib/_bootstrap_external.py
297+
importlib.util._BACKCOMPAT_MAGIC_NUMBER)
293298
filename = os.path.basename(path)
294299
name, ext = os.path.splitext(filename)
295300
if is_bytecode:

Lib/test/test_extcall.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -52,15 +52,15 @@
5252
>>> f(1, 2, **{'a': -1, 'b': 5}, **{'a': 4, 'c': 6})
5353
Traceback (most recent call last):
5454
...
55-
TypeError: f() got multiple values for keyword argument 'a'
55+
TypeError: function got multiple values for keyword argument 'a'
5656
>>> f(1, 2, **{'a': -1, 'b': 5}, a=4, c=6)
5757
Traceback (most recent call last):
5858
...
59-
TypeError: f() got multiple values for keyword argument 'a'
59+
TypeError: function got multiple values for keyword argument 'a'
6060
>>> f(1, 2, a=3, **{'a': 4}, **{'a': 5})
6161
Traceback (most recent call last):
6262
...
63-
TypeError: f() got multiple values for keyword argument 'a'
63+
TypeError: function got multiple values for keyword argument 'a'
6464
>>> f(1, 2, 3, *[4, 5], **{'a':6, 'b':7})
6565
(1, 2, 3, 4, 5) {'a': 6, 'b': 7}
6666
>>> f(1, 2, 3, x=4, y=5, *(6, 7), **{'a':8, 'b': 9})

Lib/test/test_importlib/source/test_file_loader.py

Lines changed: 104 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -603,5 +603,109 @@ class SourcelessLoaderBadBytecodeTestPEP302(SourcelessLoaderBadBytecodeTest,
603603
util=importlib_util)
604604

605605

606+
###########################################################################
607+
# Issue #29537: Test backwards compatibility with legacy 3.5.0/1/2 bytecode
608+
###########################################################################
609+
610+
class LegacyBytecodeTest:
611+
612+
def _test_legacy_magic(self, test, *, del_source=False):
613+
# Replace the default magic number with one copied from a pyc file
614+
# generated by Python 3.5.2
615+
with util.create_modules('_temp') as mapping:
616+
bc_path = self.manipulate_bytecode('_temp', mapping,
617+
lambda bc: b'\x16\r\r\n' + bc[4:])
618+
test('_temp', mapping, bc_path)
619+
620+
LegacyBytecodeTestPEP451 = BadBytecodeTestPEP451
621+
LegacyBytecodeTestPEP302 = BadBytecodeTestPEP302
622+
623+
# SourceLoader via both PEP 451 and 302 hooks
624+
625+
class SourceLoaderLegacyBytecodeTest(LegacyBytecodeTest):
626+
F987 627+
@classmethod
628+
def setUpClass(cls):
629+
cls.loader = cls.machinery.SourceFileLoader
630+
631+
@util.writes_bytecode_files
632+
def test_legacy_magic(self):
633+
# The magic number from 3.5.0/1/2 should be accepted as is
634+
def test(name, mapping, bytecode_path):
635+
self.import_(mapping[name], name)
636+
with open(bytecode_path, 'rb') as bytecode_file:
637+
self.assertEqual(bytecode_file.read(4),
638+
self.util._BACKCOMPAT_MAGIC_NUMBER)
639+
640+
self._test_legacy_magic(test)
641+
642+
643+
class SourceLoaderLegacyBytecodeTestPEP451(
644+
SourceLoaderLegacyBytecodeTest, LegacyBytecodeTestPEP451):
645+
pass
646+
647+
648+
(Frozen_SourceLegacyBytecodePEP451,
649+
Source_SourceLegacyBytecodePEP451
650+
) = util.test_both(SourceLoaderLegacyBytecodeTestPEP451, importlib=importlib,
651+
machinery=machinery, abc=importlib_abc,
652+
util=importlib_util)
653+
654+
655+
class SourceLoaderLegacyBytecodeTestPEP302(
656+
SourceLoaderLegacyBytecodeTest, LegacyBytecodeTestPEP302):
657+
pass
658+
659+
660+
(Frozen_SourceLegacyBytecodePEP302,
661+
Source_SourceLegacyBytecodePEP302
662+
) = util.test_both(SourceLoaderLegacyBytecodeTestPEP302, importlib=importlib,
663+
machinery=machinery, abc=importlib_abc,
664+
util=importlib_util)
665+
666+
# SourcelessLoader via both PEP 451 and 302 hooks
667+
668+
class SourcelessLoaderLegacyBytecodeTest(LegacyBytecodeTest):
669+
670+
@classmethod
671+
def setUpClass(cls):
672+
cls.loader = cls.machinery.SourcelessFileLoader
673+
674+
675+
@util.writes_bytecode_files
676+
def test_legacy_magic(self):
677+
# The magic number from 3.5.0/1/2 should be accepted as is
678+
def test(name, mapping, bytecode_path):
679+
self.import_(bytecode_path, name)
680+
with open(bytecode_path, 'rb') as bytecode_file:
681+
self.assertEqual(bytecode_file.read(4),
682+
self.util._BACKCOMPAT_MAGIC_NUMBER)
683+
684+
self._test_legacy_magic(test)
685+
686+
class SourcelessLoaderLegacyBytecodeTestPEP451(
687+
SourcelessLoaderLegacyBytecodeTest, LegacyBytecodeTestPEP451):
688+
pass
689+
690+
(Frozen_SourcelessLegacyBytecodePEP451,
691+
Source_SourcelessLegacyBytecodePEP451
692+
) = util.test_both(SourcelessLoaderLegacyBytecodeTestPEP451, importlib=importlib,
693+
machinery=machinery, abc=importlib_abc,
694+
util=importlib_util)
695+
696+
697+
class SourcelessLoaderLegacyBytecodeTestPEP302(SourcelessLoaderLegacyBytecodeTest,
698+
LegacyBytecodeTestPEP302):
699+
pass
700+
701+
702+
(Frozen_SourcelessLegacyBytecodePEP302,
703+
Source_SourcelessLegacyBytecodePEP302
704+
) = util.test_both(SourcelessLoaderLegacyBytecodeTestPEP302, importlib=importlib,
705+
machinery=machinery, abc=importlib_abc,
706+
util=importlib_util)
707+
708+
# End of Issue #29537 legacy bytecode compatibility tests
709+
606710
if __name__ == '__main__':
607711
unittest.main()

Lib/test/test_unpack_ex.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -251,7 +251,7 @@
251251
>>> f(x=5, **{'x': 3}, **{'x': 2})
252252
Traceback (most recent call last):
253253
...
254-
TypeError: f() got multiple values for keyword argument 'x'
254+
TypeError: function got multiple values for keyword argument 'x'
255255
256256
>>> f(**{1: 3}, **{1: 5})
257257
Traceback (most recent call last):

Misc/NEWS

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,12 @@ Release date: XXXX-XX-XX
1010
Core and Builtins
1111
-----------------
1212

13+
- Issue #29537: Restore runtime compatibility with bytecode files generated by
14+
CPython 3.5.0 and 3.5.1, and adjust the eval loop to avoid the problems that
15+
could be caused by the malformed variant of the BUILD_MAP_UNPACK_WITH_CALL
16+
opcode that they may contain. Patch by Petr Viktorin, Serhiy Storchaka,
17+
and Nick Coghlan.
18+
1319
- Issue #28598: Support __rmod__ for subclasses of str being called before
1420
str.__mod__. Patch by Martijn Pieters.
1521

Modules/zipimport.c

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1263,6 +1263,11 @@ eq_mtime(time_t t1, time_t t2)
12631263
return d <= 1;
12641264
}
12651265

1266+
/* Issue #29537: handle issue27286 bytecode incompatibility
1267+
* See Lib/importlib/_bootstrap_external.py for general discussion
1268+
*/
1269+
extern PY_UINT32_T _Py_BACKCOMPAT_MAGIC_NUMBER;
1270+
12661271
/* Given the contents of a .py[co] file in a buffer, unmarshal the data
12671272
and return the code object. Return None if it the magic word doesn't
12681273
match (we do this instead of raising an exception as we fall back
@@ -1274,14 +1279,18 @@ unmarshal_code(PyObject *pathname, PyObject *data, time_t mtime)
12741279
PyObject *code;
12751280
unsigned char *buf = (unsigned char *)PyBytes_AsString(data);
12761281
Py_ssize_t size = PyBytes_Size(data);
1282+
PY_UINT32_T magic;
12771283

12781284
if (size < 12) {
12791285
PyErr_SetString(ZipImportError,
12801286
"bad pyc data");
12811287
return NULL;
12821288
}
12831289

1284-
if (get_uint32(buf) != (unsigned int)PyImport_GetMagicNumber()) {
1290+
magic = get_uint32(buf);
1291+
if (magic != (unsigned int)PyImport_GetMagicNumber()
1292+
/* Issue #29537: handle issue27286 bytecode incompatibility */
1293+
&& magic != _Py_BACKCOMPAT_MAGIC_NUMBER) {
12851294
if (Py_VerboseFlag) {
12861295
PySys_FormatStderr("# %R has bad magic\n",
12871296
pathname);

Python/ceval.c

Lines changed: 57 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -2672,14 +2672,22 @@ PyEval_EvalFrameEx(PyFrameObject *f, int throwflag)
26722672
if (PyErr_ExceptionMatches(PyExc_AttributeError) ||
26732673
!PyMapping_Check(arg)) {
26742674
int function_location = (oparg>>8) & 0xff;
2675-
PyObject *func = (
2676-
PEEK(function_location + num_maps));
2677-
PyErr_Format(PyExc_TypeError,
2678-
"%.200s%.200s argument after ** "
2679-
"must be a mapping, not %.200s",
2680-
PyEval_GetFuncName(func),
2681-
PyEval_GetFuncDesc(func),
2682-
arg->ob_type->tp_name);
2675+
if (function_location == 1) {
2676+
PyObject *func = (
2677+
PEEK(function_location + num_maps));
2678+
PyErr_Format(PyExc_TypeError,
2679+
"%.200s%.200s argument after ** "
2680+
"must be a mapping, not %.200s",
2681+
PyEval_GetFuncName(func),
2682+
PyEval_GetFuncDesc(func),
2683+
arg->ob_type->tp_name);
2684+
}
2685+
else {
2686+
PyErr_Format(PyExc_TypeError,
2687+
"argument after ** "
2688+
"must be a mapping, not %.200s",
2689+
arg->ob_type->tp_name);
2690+
}
26832691
}
26842692
Py_DECREF(sum);
26852693
goto error;
@@ -2689,21 +2697,34 @@ PyEval_EvalFrameEx(PyFrameObject *f, int throwflag)
26892697
Py_ssize_t idx = 0;
26902698
PyObject *key;
26912699
int function_location = (oparg>>8) & 0xff;
2692-
PyObject *func = PEEK(function_location + num_maps);
26932700
Py_hash_t hash;
26942701
_PySet_NextEntry(intersection, &idx, &key, &hash);
2695-
if (!PyUnicode_Check(key)) {
2696-
PyErr_Format(PyExc_TypeError,
2697-
"%.200s%.200s keywords must be strings",
2698-
PyEval_GetFuncName(func),
2699-
PyEval_GetFuncDesc(func));
2700-
} else {
2701-
PyErr_Format(PyExc_TypeError,
2702-
"%.200s%.200s got multiple "
2703-
"values for keyword argument '%U'",
2704-
PyEval_GetFuncName(func),
2705-
PyEval_GetFuncDesc(func),
2706-
key);
2702+
if (function_location == 1) {
2703+
PyObject *func = PEEK(function_location + num_maps);
2704+
if (!PyUnicode_Check(key)) {
2705+
PyErr_Format(PyExc_TypeError,
2706+
"%.200s%.200s keywords must be strings",
2707+
PyEval_GetFuncName(func),
2708+
PyEval_GetFuncDesc(func));
2709+
} else {
2710+
PyErr_Format(PyExc_TypeError,
2711+
"%.200s%.200s got multiple "
2712+
"values for keyword argument '%U'",
2713+
PyEval_GetFuncName(func),
2714+
PyEval_GetFuncDesc(func),
2715+
key);
2716+
}
2717+
}
2718+
else {
2719+
if (!PyUnicode_Check(key)) {
2720+
PyErr_SetString(PyExc_TypeError,
2721+
"keywords must be strings");
2722+
} else {
2723+
PyErr_Format(PyExc_TypeError,
2724+
"function got multiple "
2725+
"values for keyword argument '%U'",
2726+
key);
2727+
}
27072728
}
27082729
Py_DECREF(intersection);
27092730
Py_DECREF(sum);
@@ -2716,13 +2737,21 @@ PyEval_EvalFrameEx(PyFrameObject *f, int throwflag)
27162737
if (PyErr_ExceptionMatches(PyExc_AttributeError)) {
27172738
if (with_call) {
27182739
int function_location = (oparg>>8) & 0xff;
2719-
PyObject *func = PEEK(function_location + num_maps);
2720-
PyErr_Format(PyExc_TypeError,
2721-
"%.200s%.200s argument after ** "
2722-
"must be a mapping, not %.200s",
2723-
PyEval_GetFuncName(func),
2724-
PyEval_GetFuncDesc(func),
2725-
arg->ob_type->tp_name);
2740+
if (function_location == 1) {
2741+
PyObject *func = PEEK(function_location + num_maps);
2742+
PyErr_Format(PyExc_TypeError,
2743+
"%.200s%.200s argument after ** "
2744+
"must be a mapping, not %.200s",
2745+
PyEval_GetFuncName(func),
2746+
PyEval_GetFuncDesc(func),
2747+
arg->ob_type->tp_name);
2748+
}
2749+
else {
2750+
PyErr_Format(PyExc_TypeError,
2751+
"argument after ** "
2752+
"must be a mapping, not %.200s",
2753+
arg->ob_type->tp_name);
2754+
}
27262755
}
27272756
else {
27282757
PyErr_Format(PyExc_TypeError,

Python/import.c

Lines changed: 9 additions & 1 deletion
< 9F18 thead class="sr-only">
Original file line numberDiff line numberDiff line change
@@ -483,9 +483,17 @@ PyImport_Cleanup(void)
483483
#undef STORE_MODULE_WEAKREF
484484
}
485485

486+
/* Issue #29537: handle issue27286 bytecode incompatibility
487+
*
488+
* In order to avoid forcing recompilation of all extension modules, we export
489+
* the legacy 3.5.0 magic number here rather than putting it in a header file.
490+
*
491+
* See Lib/importlib/_bootstrap_external.py for general discussion
492+
*/
493+
PY_UINT32_T _Py_BACKCOMPAT_MAGIC_NUMBER = 168627478;
494+
PY_UINT32_T _Py_BACKCOMPAT_HALF_MAGIC = 3350;
486495

487496
/* Helper for pythonrun.c -- return magic number and tag. */
488-
489497
long
490498
PyImport_GetMagicNumber(void)
491499
{

0 commit comments

Comments
 (0)
0