From 3b933f0966b1e53ea50418970950de294ebbea76 Mon Sep 17 00:00:00 2001 From: faerot Date: Thu, 22 May 2014 11:32:54 +0300 Subject: [PATCH 001/349] added distinguish_tuple argument to Packer This will make precise python types serialization possible. --- msgpack/_packer.pyx | 12 ++++++++++-- msgpack/fallback.py | 11 +++++++++-- 2 files changed, 19 insertions(+), 4 deletions(-) diff --git a/msgpack/_packer.pyx b/msgpack/_packer.pyx index 82e4a63d..86e460fc 100644 --- a/msgpack/_packer.pyx +++ b/msgpack/_packer.pyx @@ -56,6 +56,10 @@ cdef class Packer(object): Convert unicode to bytes with this encoding. (default: 'utf-8') :param str unicode_errors: Error handler for encoding unicode. (default: 'strict') + :param bool distinguish_tuple: + If set to true, tuples will not be serialized as lists + and will be treated as unsupported type. This is useful when trying + to implement accurate serialization for python types. :param bool use_single_float: Use single precision float type for float. (default: False) :param bool autoreset: @@ -71,6 +75,7 @@ cdef class Packer(object): cdef object _berrors cdef char *encoding cdef char *unicode_errors + cdef bool distinguish_tuple cdef bool use_float cdef bint autoreset @@ -83,10 +88,12 @@ cdef class Packer(object): self.pk.length = 0 def __init__(self, default=None, encoding='utf-8', unicode_errors='strict', - use_single_float=False, bint autoreset=1, bint use_bin_type=0): + distinguish_tuple=False, use_single_float=False, bint autoreset=1, + bint use_bin_type=0): """ """ self.use_float = use_single_float + self.distinguish_tuple = distinguish_tuple self.autoreset = autoreset self.pk.use_bin_type = use_bin_type if default is not None: @@ -122,6 +129,7 @@ cdef class Packer(object): cdef dict d cdef size_t L cdef int default_used = 0 + cdef bool distinguish_tuple = self.distinguish_tuple if nest_limit < 0: raise PackValueError("recursion limit exceeded.") @@ -204,7 +212,7 @@ cdef class Packer(object): raise ValueError("EXT data is too large") ret = msgpack_pack_ext(&self.pk, longval, L) ret = msgpack_pack_raw_body(&self.pk, rawval, L) - elif PyTuple_Check(o) or PyList_Check(o): + elif (PyTuple_Check(o) and not distinguish_tuple) or PyList_Check(o): L = len(o) if L > (2**32)-1: raise ValueError("list is too large") diff --git a/msgpack/fallback.py b/msgpack/fallback.py index 49323e63..1d668c20 100644 --- a/msgpack/fallback.py +++ b/msgpack/fallback.py @@ -485,6 +485,10 @@ class Packer(object): Convert unicode to bytes with this encoding. (default: 'utf-8') :param str unicode_errors: Error handler for encoding unicode. (default: 'strict') + :param bool distinguish_tuple: + If set to true, tuples will not be serialized as lists + and will be treated as unsupported type. This is useful when trying + to implement accurate serialization for python types. :param bool use_single_float: Use single precision float type for float. (default: False) :param bool autoreset: @@ -495,7 +499,9 @@ class Packer(object): It also enable str8 type for unicode. """ def __init__(self, default=None, encoding='utf-8', unicode_errors='strict', - use_single_float=False, autoreset=True, use_bin_type=False): + distinguish_tuple=False, use_single_float=False, autoreset=True, + use_bin_type=False): + self._distinguish_tuple = distinguish_tuple self._use_float = use_single_float self._autoreset = autoreset self._use_bin_type = use_bin_type @@ -509,6 +515,7 @@ def __init__(self, default=None, encoding='utf-8', unicode_errors='strict', def _pack(self, obj, nest_limit=DEFAULT_RECURSE_LIMIT, isinstance=isinstance): default_used = False + list_type = list if self._distinguish_tuple else (list, tuple) while True: if nest_limit < 0: raise PackValueError("recursion limit exceeded") @@ -599,7 +606,7 @@ def _pack(self, obj, nest_limit=DEFAULT_RECURSE_LIMIT, isinstance=isinstance): self._buffer.write(struct.pack("b", code)) self._buffer.write(data) return - if isinstance(obj, (list, tuple)): + if isinstance(obj, list_type): n = len(obj) self._fb_pack_array_header(n) for i in xrange(n): From b877ce2afadd4a4c96d7c0542f7b29836785de71 Mon Sep 17 00:00:00 2001 From: faerot Date: Thu, 22 May 2014 16:45:26 +0300 Subject: [PATCH 002/349] precise_mode instead of distinguish_tuple When precise_mode flag is set, serialization will be as precise as possible - type checks will be exact (type(..) is ... instead of isinstance(..., ...) and tuple will be treated as undefined type. This mode is to make accurate object serialization possible. --- msgpack/_packer.pyx | 37 ++++++++++++++++++---------------- msgpack/fallback.py | 48 +++++++++++++++++++++++++++++---------------- 2 files changed, 51 insertions(+), 34 deletions(-) diff --git a/msgpack/_packer.pyx b/msgpack/_packer.pyx index 86e460fc..ec34cd84 100644 --- a/msgpack/_packer.pyx +++ b/msgpack/_packer.pyx @@ -56,10 +56,13 @@ cdef class Packer(object): Convert unicode to bytes with this encoding. (default: 'utf-8') :param str unicode_errors: Error handler for encoding unicode. (default: 'strict') - :param bool distinguish_tuple: - If set to true, tuples will not be serialized as lists - and will be treated as unsupported type. This is useful when trying - to implement accurate serialization for python types. + :param bool precise_mode: + If set to true, types will be checked to be exact. Derived classes + from serializeable types will not be serialized and will be + treated as unsupported type and forwarded to default. + Additionally tuples will not be serialized as lists. + This is useful when trying to implement accurate serialization + for python types. :param bool use_single_float: Use single precision float type for float. (default: False) :param bool autoreset: @@ -75,7 +78,7 @@ cdef class Packer(object): cdef object _berrors cdef char *encoding cdef char *unicode_errors - cdef bool distinguish_tuple + cdef bint precise_mode cdef bool use_float cdef bint autoreset @@ -88,12 +91,12 @@ cdef class Packer(object): self.pk.length = 0 def __init__(self, default=None, encoding='utf-8', unicode_errors='strict', - distinguish_tuple=False, use_single_float=False, bint autoreset=1, - bint use_bin_type=0): + use_single_float=False, bint autoreset=1, bint use_bin_type=0, + bint precise_mode=0): """ """ self.use_float = use_single_float - self.distinguish_tuple = distinguish_tuple + self.precise_mode = precise_mode self.autoreset = autoreset self.pk.use_bin_type = use_bin_type if default is not None: @@ -129,7 +132,7 @@ cdef class Packer(object): cdef dict d cdef size_t L cdef int default_used = 0 - cdef bool distinguish_tuple = self.distinguish_tuple + cdef bint precise = self.precise_mode if nest_limit < 0: raise PackValueError("recursion limit exceeded.") @@ -137,12 +140,12 @@ cdef class Packer(object): while True: if o is None: ret = msgpack_pack_nil(&self.pk) - elif isinstance(o, bool): + elif PyBool_Check(o) if precise else isinstance(o, bool): if o: ret = msgpack_pack_true(&self.pk) else: ret = msgpack_pack_false(&self.pk) - elif PyLong_Check(o): + elif PyLong_CheckExact(o) if precise else PyLong_Check(o): # PyInt_Check(long) is True for Python 3. # Sow we should test long before int. if o > 0: @@ -151,17 +154,17 @@ cdef class Packer(object): else: llval = o ret = msgpack_pack_long_long(&self.pk, llval) - elif PyInt_Check(o): + elif PyInt_CheckExact(o) if precise else PyInt_Check(o): longval = o ret = msgpack_pack_long(&self.pk, longval) - elif PyFloat_Check(o): + elif PyFloat_CheckExact(o) if precise else PyFloat_Check(o): if self.use_float: fval = o ret = msgpack_pack_float(&self.pk, fval) else: dval = o ret = msgpack_pack_double(&self.pk, dval) - elif PyBytes_Check(o): + elif PyBytes_CheckExact(o) if precise else PyBytes_Check(o): L = len(o) if L > (2**32)-1: raise ValueError("bytes is too large") @@ -169,7 +172,7 @@ cdef class Packer(object): ret = msgpack_pack_bin(&self.pk, L) if ret == 0: ret = msgpack_pack_raw_body(&self.pk, rawval, L) - elif PyUnicode_Check(o): + elif PyUnicode_CheckExact(o) if precise else PyUnicode_Check(o): if not self.encoding: raise TypeError("Can't encode unicode string: no encoding is specified") o = PyUnicode_AsEncodedString(o, self.encoding, self.unicode_errors) @@ -192,7 +195,7 @@ cdef class Packer(object): if ret != 0: break ret = self._pack(v, nest_limit-1) if ret != 0: break - elif PyDict_Check(o): + elif not precise and PyDict_Check(o): L = len(o) if L > (2**32)-1: raise ValueError("dict is too large") @@ -212,7 +215,7 @@ cdef class Packer(object): raise ValueError("EXT data is too large") ret = msgpack_pack_ext(&self.pk, longval, L) ret = msgpack_pack_raw_body(&self.pk, rawval, L) - elif (PyTuple_Check(o) and not distinguish_tuple) or PyList_Check(o): + elif PyList_CheckExact(o) if precise else (PyTuple_Check(o) or PyList_Check(o)): L = len(o) if L > (2**32)-1: raise ValueError("list is too large") diff --git a/msgpack/fallback.py b/msgpack/fallback.py index 1d668c20..77922f7e 100644 --- a/msgpack/fallback.py +++ b/msgpack/fallback.py @@ -485,10 +485,13 @@ class Packer(object): Convert unicode to bytes with this encoding. (default: 'utf-8') :param str unicode_errors: Error handler for encoding unicode. (default: 'strict') - :param bool distinguish_tuple: - If set to true, tuples will not be serialized as lists - and will be treated as unsupported type. This is useful when trying - to implement accurate serialization for python types. + :param bool precise_mode: + If set to true, types will be checked to be exact. Derived classes + from serializeable types will not be serialized and will be + treated as unsupported type and forwarded to default. + Additionally tuples will not be serialized as lists. + This is useful when trying to implement accurate serialization + for python types. :param bool use_single_float: Use single precision float type for float. (default: False) :param bool autoreset: @@ -499,9 +502,9 @@ class Packer(object): It also enable str8 type for unicode. """ def __init__(self, default=None, encoding='utf-8', unicode_errors='strict', - distinguish_tuple=False, use_single_float=False, autoreset=True, + precise_mode=False, use_single_float=False, autoreset=True, use_bin_type=False): - self._distinguish_tuple = distinguish_tuple + self._precise_mode = precise_mode self._use_float = use_single_float self._autoreset = autoreset self._use_bin_type = use_bin_type @@ -513,19 +516,30 @@ def __init__(self, default=None, encoding='utf-8', unicode_errors='strict', raise TypeError("default must be callable") self._default = default - def _pack(self, obj, nest_limit=DEFAULT_RECURSE_LIMIT, isinstance=isinstance): + def _check_precise(obj, t, type=type, tuple=tuple): + if type(t) is tuple: + return type(obj) in t + else: + return type(obj) is t + + def _pack(self, obj, nest_limit=DEFAULT_RECURSE_LIMIT, + check=isinstance, check_precise=_check_precise): default_used = False - list_type = list if self._distinguish_tuple else (list, tuple) + if self._precise_mode: + check = check_precise + list_types = list + else: + list_types = (list, tuple) while True: if nest_limit < 0: raise PackValueError("recursion limit exceeded") if obj is None: return self._buffer.write(b"\xc0") - if isinstance(obj, bool): + if check(obj, bool): if obj: return self._buffer.write(b"\xc3") return self._buffer.write(b"\xc2") - if isinstance(obj, int_types): + if check(obj, int_types): if 0 <= obj < 0x80: return self._buffer.write(struct.pack("B", obj)) if -0x20 <= obj < 0: @@ -547,7 +561,7 @@ def _pack(self, obj, nest_limit=DEFAULT_RECURSE_LIMIT, isinstance=isinstance): if -0x8000000000000000 <= obj < -0x80000000: return self._buffer.write(struct.pack(">Bq", 0xd3, obj)) raise PackValueError("Integer value out of range") - if self._use_bin_type and isinstance(obj, bytes): + if self._use_bin_type and check(obj, bytes): n = len(obj) if n <= 0xff: self._buffer.write(struct.pack('>BB', 0xc4, n)) @@ -558,8 +572,8 @@ def _pack(self, obj, nest_limit=DEFAULT_RECURSE_LIMIT, isinstance=isinstance): else: raise PackValueError("Bytes is too large") return self._buffer.write(obj) - if isinstance(obj, (Unicode, bytes)): - if isinstance(obj, Unicode): + if check(obj, (Unicode, bytes)): + if check(obj, Unicode): if self._encoding is None: raise TypeError( "Can't encode unicode string: " @@ -577,11 +591,11 @@ def _pack(self, obj, nest_limit=DEFAULT_RECURSE_LIMIT, isinstance=isinstance): else: raise PackValueError("String is too large") return self._buffer.write(obj) - if isinstance(obj, float): + if check(obj, float): if self._use_float: return self._buffer.write(struct.pack(">Bf", 0xca, obj)) return self._buffer.write(struct.pack(">Bd", 0xcb, obj)) - if isinstance(obj, ExtType): + if check(obj, ExtType): code = obj.code data = obj.data assert isinstance(code, int) @@ -606,13 +620,13 @@ def _pack(self, obj, nest_limit=DEFAULT_RECURSE_LIMIT, isinstance=isinstance): self._buffer.write(struct.pack("b", code)) self._buffer.write(data) return - if isinstance(obj, list_type): + if check(obj, list_types): n = len(obj) self._fb_pack_array_header(n) for i in xrange(n): self._pack(obj[i], nest_limit - 1) return - if isinstance(obj, dict): + if check(obj, dict): return self._fb_pack_map_pairs(len(obj), dict_iteritems(obj), nest_limit - 1) if not default_used and self._default is not None: From 9fe19cc4089467fff185399c659ffe72f2f52995 Mon Sep 17 00:00:00 2001 From: INADA Naoki Date: Fri, 13 Mar 2015 03:51:14 +0900 Subject: [PATCH 003/349] 0.4.6 --- ChangeLog.rst | 11 +++++++++++ msgpack/_version.py | 2 +- 2 files changed, 12 insertions(+), 1 deletion(-) diff --git a/ChangeLog.rst b/ChangeLog.rst index 797fa666..34f4cd4f 100644 --- a/ChangeLog.rst +++ b/ChangeLog.rst @@ -1,3 +1,14 @@ +0.4.6 +===== +:release date: 2015-03-13 + +Bugs fixed +---------- + +* fallback.Unpacker: Fix Data corruption when OutOfData. + This bug only affects "Streaming unpacking." + + 0.4.5 ===== :release date: 2015-01-25 diff --git a/msgpack/_version.py b/msgpack/_version.py index ac12f4e8..2c1c96c0 100644 --- a/msgpack/_version.py +++ b/msgpack/_version.py @@ -1 +1 @@ -version = (0, 4, 5) +version = (0, 4, 6) From b19e336108ed86ba344eeaccc9476244848e4dd4 Mon Sep 17 00:00:00 2001 From: INADA Naoki Date: Fri, 13 Mar 2015 04:05:44 +0900 Subject: [PATCH 004/349] travis: Cython 0.22 --- .travis.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.travis.yml b/.travis.yml index dad7e87a..ad54ee0a 100644 --- a/.travis.yml +++ b/.travis.yml @@ -17,8 +17,8 @@ env: install: - pip install wheel tox - ls -la wheelhouse - - if [ ! -f wheelhouse/Cython-0.21.2-cp27-none-linux_x86_64.whl ] ; then pip wheel cython ; fi - - pip install wheelhouse/Cython-0.21.2-cp27-none-linux_x86_64.whl + - if [ ! -f wheelhouse/Cython-0.22-cp27-none-linux_x86_64.whl ] ; then pip wheel cython==0.22 ; fi + - pip install wheelhouse/Cython-0.22-cp27-none-linux_x86_64.whl - cython --cplus msgpack/_packer.pyx msgpack/_unpacker.pyx script: tox From 2dda8fc4a58dd9be0c8d6f472342fd777d92886d Mon Sep 17 00:00:00 2001 From: INADA Naoki Date: Fri, 13 Mar 2015 04:18:10 +0900 Subject: [PATCH 005/349] travis: Build only master --- .travis.yml | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/.travis.yml b/.travis.yml index ad54ee0a..ddd52549 100644 --- a/.travis.yml +++ b/.travis.yml @@ -7,6 +7,10 @@ language: python python: - 2.7 +branches: + only: + - master + env: - TOXENV=py26-c,py27-c - TOXENV=py32-c,py33-c,py34-c From b7806a6e6eceb13153df6907b744af3be0e1075e Mon Sep 17 00:00:00 2001 From: INADA Naoki Date: Fri, 13 Mar 2015 04:23:04 +0900 Subject: [PATCH 006/349] README: Update version --- README.rst | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/README.rst b/README.rst index 3c1957d1..8cee3061 100644 --- a/README.rst +++ b/README.rst @@ -3,8 +3,8 @@ MessagePack for Python ======================= :author: INADA Naoki -:version: 0.4.5 -:date: 2015-01-25 +:version: 0.4.6 +:date: 2015-03-13 .. image:: https://secure.travis-ci.org/msgpack/msgpack-python.png :target: https://travis-ci.org/#!/msgpack/msgpack-python From 2d05b40b030cb6b5da0913e72b59a91b09faccab Mon Sep 17 00:00:00 2001 From: Pramukta Kumar Date: Tue, 17 Mar 2015 15:02:40 -0400 Subject: [PATCH 007/349] Test to demonstrate that the default function isn't always called (#133) --- test/test_extension.py | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/test/test_extension.py b/test/test_extension.py index 2f85ce39..c552498f 100644 --- a/test/test_extension.py +++ b/test/test_extension.py @@ -55,3 +55,22 @@ def ext_hook(code, data): s = msgpack.packb(obj, default=default) obj2 = msgpack.unpackb(s, ext_hook=ext_hook) assert obj == obj2 + +import sys +if sys.version > '3': + long = int + +def test_overriding_hooks(): + def default(obj): + if isinstance(obj, long): + return {"__type__": "long", "__data__": str(obj)} + else: + return obj + + obj = {"testval": long(1823746192837461928374619)} + refobj = {"testval": default(obj["testval"])} + refout = msgpack.packb(refobj) + assert isinstance(refout, (str, bytes)) + testout = msgpack.packb(obj, default=default) + + assert refout == testout From 10cd2d2ebf6390e844c2bf59e9efd765f9b60e40 Mon Sep 17 00:00:00 2001 From: Pramukta Kumar Date: Tue, 17 Mar 2015 15:05:04 -0400 Subject: [PATCH 008/349] calling the default function upon integer overflow in the fallback routine --- msgpack/fallback.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/msgpack/fallback.py b/msgpack/fallback.py index 235c2017..eb20002c 100644 --- a/msgpack/fallback.py +++ b/msgpack/fallback.py @@ -655,6 +655,10 @@ def _pack(self, obj, nest_limit=DEFAULT_RECURSE_LIMIT, isinstance=isinstance): return self._buffer.write(struct.pack(">BQ", 0xcf, obj)) if -0x8000000000000000 <= obj < -0x80000000: return self._buffer.write(struct.pack(">Bq", 0xd3, obj)) + if not default_used and self._default is not None: + obj = self._default(obj) + default_used = True + continue raise PackValueError("Integer value out of range") if self._use_bin_type and isinstance(obj, bytes): n = len(obj) From 6f02d252e1dc66d67861b45c5bead8392ed822d4 Mon Sep 17 00:00:00 2001 From: Pramukta Kumar Date: Tue, 17 Mar 2015 15:16:17 -0400 Subject: [PATCH 009/349] corresponding change to cython implementation --- msgpack/_packer.pyx | 20 ++++++++++++++------ 1 file changed, 14 insertions(+), 6 deletions(-) diff --git a/msgpack/_packer.pyx b/msgpack/_packer.pyx index fcd20a7e..7129208b 100644 --- a/msgpack/_packer.pyx +++ b/msgpack/_packer.pyx @@ -136,12 +136,20 @@ cdef class Packer(object): elif PyLong_Check(o): # PyInt_Check(long) is True for Python 3. # Sow we should test long before int. - if o > 0: - ullval = o - ret = msgpack_pack_unsigned_long_long(&self.pk, ullval) - else: - llval = o - ret = msgpack_pack_long_long(&self.pk, llval) + try: + if o > 0: + ullval = o + ret = msgpack_pack_unsigned_long_long(&self.pk, ullval) + else: + llval = o + ret = msgpack_pack_long_long(&self.pk, llval) + except OverflowError, oe: + if not default_used and self._default is not None: + o = self._default(o) + default_used = True + continue + else: + raise elif PyInt_Check(o): longval = o ret = msgpack_pack_long(&self.pk, longval) From 734cb71dac66b9760cd9704667df92d0a097c1a1 Mon Sep 17 00:00:00 2001 From: tbeu Date: Sun, 22 Mar 2015 21:35:21 +0100 Subject: [PATCH 010/349] Update README.rst Fix typo --- README.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.rst b/README.rst index 8cee3061..ba2c1b51 100644 --- a/README.rst +++ b/README.rst @@ -37,7 +37,7 @@ Windows When you can't use binary distribution, you need to install Visual Studio or Windows SDK on Windows. (NOTE: Visual C++ Express 2010 doesn't support -amd64. Windows SDK is recommanded way to build amd64 msgpack without any fee.) +amd64. Windows SDK is recommended way to build amd64 msgpack without any fee.) Without extension, using pure python implementation on CPython runs slowly. From 4eb4c7a9940a5424ded190c1e0b9a0bb466047fb Mon Sep 17 00:00:00 2001 From: Johannes Dollinger Date: Mon, 27 Jul 2015 20:29:43 +0200 Subject: [PATCH 011/349] Accept ext_hook for unpack() --- msgpack/_unpacker.pyx | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/msgpack/_unpacker.pyx b/msgpack/_unpacker.pyx index f5e7d95a..d53f7241 100644 --- a/msgpack/_unpacker.pyx +++ b/msgpack/_unpacker.pyx @@ -147,7 +147,7 @@ def unpackb(object packed, object object_hook=None, object list_hook=None, def unpack(object stream, object object_hook=None, object list_hook=None, bint use_list=1, encoding=None, unicode_errors="strict", - object_pairs_hook=None, + object_pairs_hook=None, ext_hook=ExtType ): """ Unpack an object from `stream`. @@ -158,7 +158,7 @@ def unpack(object stream, object object_hook=None, object list_hook=None, """ return unpackb(stream.read(), use_list=use_list, object_hook=object_hook, object_pairs_hook=object_pairs_hook, list_hook=list_hook, - encoding=encoding, unicode_errors=unicode_errors, + encoding=encoding, unicode_errors=unicode_errors, ext_hook=ext_hook ) From c3a3f9b0a5c3a380018aa42e2f81b0c6752afcf0 Mon Sep 17 00:00:00 2001 From: TW Date: Fri, 30 Oct 2015 00:36:12 +0100 Subject: [PATCH 012/349] fix typo in setup.py --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index 1055a61c..37729bd5 100644 --- a/setup.py +++ b/setup.py @@ -49,7 +49,7 @@ def build_extension(self, ext): try: return build_ext.build_extension(self, ext) except Exception as e: - print("WARNING: Failed to compile extensiom modules.") + print("WARNING: Failed to compile extension modules.") print("msgpack uses fallback pure python implementation.") print(e) From 672b220a3ff0c84eec1c8b39abcdedcd1e4cb329 Mon Sep 17 00:00:00 2001 From: INADA Naoki Date: Sat, 7 Nov 2015 13:17:28 +0900 Subject: [PATCH 013/349] remove unused bat file --- upload_windows.bat | 4 ---- 1 file changed, 4 deletions(-) delete mode 100644 upload_windows.bat diff --git a/upload_windows.bat b/upload_windows.bat deleted file mode 100644 index 5cd9a7c5..00000000 --- a/upload_windows.bat +++ /dev/null @@ -1,4 +0,0 @@ -c:\Python27\python setup.py bdist_egg bdist_wininst upload -c:\Python33\python setup.py bdist_egg bdist_wininst upload -c:\Python27_amd64\python setup.py bdist_egg bdist_wininst upload -c:\Python33_amd64\python setup.py bdist_egg bdist_wininst upload From 52a38c6e9d6cb5206732231c50d1b9a5bd0be586 Mon Sep 17 00:00:00 2001 From: INADA Naoki Date: Sat, 7 Nov 2015 14:26:14 +0900 Subject: [PATCH 014/349] remove unused bat file --- build_windows.bat | 24 ------------------------ 1 file changed, 24 deletions(-) delete mode 100644 build_windows.bat diff --git a/build_windows.bat b/build_windows.bat deleted file mode 100644 index a71c0e07..00000000 --- a/build_windows.bat +++ /dev/null @@ -1,24 +0,0 @@ -set MSSdk=1 -set DISTUTILS_USE_SDK=1 - -rem Python27 x86 -rem call "C:\Program Files\Microsoft SDKs\Windows\v6.1\Bin\SetEnv.cmd" /Release /x86 /xp -call "C:\Program Files (x86)\Microsoft Visual Studio 9.0\VC\bin\vcvars32.bat" -c:\Python27\python setup.py build_ext -f build install -pause - -rem Python27 amd64 -rem call "C:\Program Files\Microsoft SDKs\Windows\v6.1\Bin\SetEnv.cmd" /Release /x64 /xp -call "C:\Program Files (x86)\Microsoft Visual Studio 9.0\VC\bin\vcvars64.bat" -c:\Python27_amd64\python setup.py build_ext -f build install -pause - -rem Python33 x86 -call "C:\Program Files\Microsoft SDKs\Windows\v7.1\bin\SetEnv.cmd" /Release /x86 /xp -c:\Python33\python setup.py build_ext -f build install -pause - -rem Python33 amd64 -call "C:\Program Files\Microsoft SDKs\Windows\v7.1\bin\SetEnv.cmd" /Release /x64 /xp -c:\Python33_amd64\python setup.py build_ext -f build install -pause From c102e6cee58df543d352e36f6d2d0bdd595e1063 Mon Sep 17 00:00:00 2001 From: INADA Naoki Date: Sat, 7 Nov 2015 14:30:05 +0900 Subject: [PATCH 015/349] executable setup.py --- setup.py | 0 1 file changed, 0 insertions(+), 0 deletions(-) mode change 100644 => 100755 setup.py diff --git a/setup.py b/setup.py old mode 100644 new mode 100755 From ab359e333044fc89559b946cfec8efb83c2c6122 Mon Sep 17 00:00:00 2001 From: INADA Naoki Date: Sat, 7 Nov 2015 16:45:30 +0900 Subject: [PATCH 016/349] Update travis setting --- .travis.yml | 11 +++-------- 1 file changed, 3 insertions(+), 8 deletions(-) diff --git a/.travis.yml b/.travis.yml index ddd52549..76951843 100644 --- a/.travis.yml +++ b/.travis.yml @@ -1,8 +1,5 @@ sudo: false -cache: - directories: - - wheelhouse - +cache: pip language: python python: - 2.7 @@ -19,10 +16,8 @@ env: - TOXENV=pypy-pure,pypy3-pure install: - - pip install wheel tox - - ls -la wheelhouse - - if [ ! -f wheelhouse/Cython-0.22-cp27-none-linux_x86_64.whl ] ; then pip wheel cython==0.22 ; fi - - pip install wheelhouse/Cython-0.22-cp27-none-linux_x86_64.whl + - pip install tox + - pip install cython --install-option=--cython-with-refnanny --install-option=--no-cython-compile - cython --cplus msgpack/_packer.pyx msgpack/_unpacker.pyx script: tox From e9ab4d8824fddd070bac7cedca332130bf2028b0 Mon Sep 17 00:00:00 2001 From: INADA Naoki Date: Sat, 7 Nov 2015 16:30:18 +0900 Subject: [PATCH 017/349] Fix warnings fixes #146 --- msgpack/_unpacker.pyx | 24 ++++++++++++------------ msgpack/unpack.h | 2 +- msgpack/unpack_template.h | 10 +++++----- 3 files changed, 18 insertions(+), 18 deletions(-) diff --git a/msgpack/_unpacker.pyx b/msgpack/_unpacker.pyx index d53f7241..997979c1 100644 --- a/msgpack/_unpacker.pyx +++ b/msgpack/_unpacker.pyx @@ -37,10 +37,10 @@ cdef extern from "unpack.h": ctypedef struct unpack_context: msgpack_user user PyObject* obj - size_t count + Py_ssize_t count ctypedef int (*execute_fn)(unpack_context* ctx, const char* data, - size_t len, size_t* off) except? -1 + Py_ssize_t len, Py_ssize_t* off) except? -1 execute_fn unpack_construct execute_fn unpack_skip execute_fn read_array_header @@ -112,7 +112,7 @@ def unpackb(object packed, object object_hook=None, object list_hook=None, See :class:`Unpacker` for options. """ cdef unpack_context ctx - cdef size_t off = 0 + cdef Py_ssize_t off = 0 cdef int ret cdef char* buf @@ -142,7 +142,7 @@ def unpackb(object packed, object object_hook=None, object list_hook=None, raise ExtraData(obj, PyBytes_FromStringAndSize(buf+off, buf_len-off)) return obj else: - raise UnpackValueError("Unpack failed: error = %d" % (ret,)) + raise UnpackValueError("Unpack failed: error = %s" % (ret,)) def unpack(object stream, object object_hook=None, object list_hook=None, @@ -233,14 +233,14 @@ cdef class Unpacker(object): """ cdef unpack_context ctx cdef char* buf - cdef size_t buf_size, buf_head, buf_tail + cdef Py_ssize_t buf_size, buf_head, buf_tail cdef object file_like cdef object file_like_read cdef Py_ssize_t read_size # To maintain refcnt. cdef object object_hook, object_pairs_hook, list_hook, ext_hook cdef object encoding, unicode_errors - cdef size_t max_buffer_size + cdef Py_ssize_t max_buffer_size def __cinit__(self): self.buf = NULL @@ -325,10 +325,10 @@ cdef class Unpacker(object): cdef: char* buf = self.buf char* new_buf - size_t head = self.buf_head - size_t tail = self.buf_tail - size_t buf_size = self.buf_size - size_t new_size + Py_ssize_t head = self.buf_head + Py_ssize_t tail = self.buf_tail + Py_ssize_t buf_size = self.buf_size + Py_ssize_t new_size if tail + _buf_len > buf_size: if ((tail - head) + _buf_len) <= buf_size: @@ -374,7 +374,7 @@ cdef class Unpacker(object): cdef object _unpack(self, execute_fn execute, object write_bytes, bint iter=0): cdef int ret cdef object obj - cdef size_t prev_head + cdef Py_ssize_t prev_head if self.buf_head >= self.buf_tail and self.file_like is not None: self.read_from_file() @@ -408,7 +408,7 @@ cdef class Unpacker(object): def read_bytes(self, Py_ssize_t nbytes): """Read a specified number of raw bytes from the stream""" - cdef size_t nread + cdef Py_ssize_t nread nread = min(self.buf_tail - self.buf_head, nbytes) ret = PyBytes_FromStringAndSize(self.buf + self.buf_head, nread) self.buf_head += nread diff --git a/msgpack/unpack.h b/msgpack/unpack.h index 5deb7cde..297bc93d 100644 --- a/msgpack/unpack.h +++ b/msgpack/unpack.h @@ -33,7 +33,7 @@ typedef struct unpack_user { typedef PyObject* msgpack_unpack_object; struct unpack_context; typedef struct unpack_context unpack_context; -typedef int (*execute_fn)(unpack_context *ctx, const char* data, size_t len, size_t* off); +typedef int (*execute_fn)(unpack_context *ctx, const char* data, Py_ssize_t len, Py_ssize_t* off); static inline msgpack_unpack_object unpack_callback_root(unpack_user* u) { diff --git a/msgpack/unpack_template.h b/msgpack/unpack_template.h index d34eceda..5b389b81 100644 --- a/msgpack/unpack_template.h +++ b/msgpack/unpack_template.h @@ -24,8 +24,8 @@ typedef struct unpack_stack { PyObject* obj; - size_t size; - size_t count; + Py_ssize_t size; + Py_ssize_t count; unsigned int ct; PyObject* map_key; } unpack_stack; @@ -72,7 +72,7 @@ static inline PyObject* unpack_data(unpack_context* ctx) template -static inline int unpack_execute(unpack_context* ctx, const char* data, size_t len, size_t* off) +static inline int unpack_execute(unpack_context* ctx, const char* data, Py_ssize_t len, Py_ssize_t* off) { assert(len >= *off); @@ -89,7 +89,7 @@ static inline int unpack_execute(unpack_context* ctx, const char* data, size_t l */ unpack_user* user = &ctx->user; - PyObject* obj; + PyObject* obj = NULL; unpack_stack* c = NULL; int ret; @@ -409,7 +409,7 @@ static inline int unpack_execute(unpack_context* ctx, const char* data, size_t l #undef start_container template -static inline int unpack_container_header(unpack_context* ctx, const char* data, size_t len, size_t* off) +static inline int unpack_container_header(unpack_context* ctx, const char* data, Py_ssize_t len, Py_ssize_t* off) { assert(len >= *off); uint32_t size; From 35a69ac9c2fbf6b68b970352791f6d98fbd74963 Mon Sep 17 00:00:00 2001 From: INADA Naoki Date: Sun, 8 Nov 2015 12:38:38 +0900 Subject: [PATCH 018/349] Decrease refcnt when error happend while unpacking Fixes #152 --- msgpack/_unpacker.pyx | 5 +++-- msgpack/unpack_template.h | 4 ++++ 2 files changed, 7 insertions(+), 2 deletions(-) diff --git a/msgpack/_unpacker.pyx b/msgpack/_unpacker.pyx index 997979c1..d359e570 100644 --- a/msgpack/_unpacker.pyx +++ b/msgpack/_unpacker.pyx @@ -47,6 +47,7 @@ cdef extern from "unpack.h": execute_fn read_map_header void unpack_init(unpack_context* ctx) object unpack_data(unpack_context* ctx) + void unpack_clear(unpack_context* ctx) cdef inline init_ctx(unpack_context *ctx, object object_hook, object object_pairs_hook, @@ -141,8 +142,8 @@ def unpackb(object packed, object object_hook=None, object list_hook=None, if off < buf_len: raise ExtraData(obj, PyBytes_FromStringAndSize(buf+off, buf_len-off)) return obj - else: - raise UnpackValueError("Unpack failed: error = %s" % (ret,)) + unpack_clear(&ctx) + raise UnpackValueError("Unpack failed: error = %d" % (ret,)) def unpack(object stream, object object_hook=None, object list_hook=None, diff --git a/msgpack/unpack_template.h b/msgpack/unpack_template.h index 5b389b81..6b83d3e7 100644 --- a/msgpack/unpack_template.h +++ b/msgpack/unpack_template.h @@ -70,6 +70,10 @@ static inline PyObject* unpack_data(unpack_context* ctx) return (ctx)->stack[0].obj; } +static inline PyObject* unpack_clear(unpack_context *ctx) +{ + Py_CLEAR(ctx->stack[0].obj); +} template static inline int unpack_execute(unpack_context* ctx, const char* data, Py_ssize_t len, Py_ssize_t* off) From 02611afd5f32aa173f9ba8a777ba49fa6b4f67c5 Mon Sep 17 00:00:00 2001 From: INADA Naoki Date: Sun, 8 Nov 2015 17:29:09 +0900 Subject: [PATCH 019/349] Drpo pypip.in badge It downs so long --- README.rst | 3 --- 1 file changed, 3 deletions(-) diff --git a/README.rst b/README.rst index ba2c1b51..baa825ad 100644 --- a/README.rst +++ b/README.rst @@ -9,9 +9,6 @@ MessagePack for Python .. image:: https://secure.travis-ci.org/msgpack/msgpack-python.png :target: https://travis-ci.org/#!/msgpack/msgpack-python -.. image:: https://pypip.in/version/msgpack-python/badge.svg - :target: https://pypi.python.org/pypi/msgpack-python/ - :alt: Latest Version What's this ------------ From 6f208abbc7c9567145bf8c17bc41ba7c30ade0be Mon Sep 17 00:00:00 2001 From: INADA Naoki Date: Sun, 8 Nov 2015 17:34:52 +0900 Subject: [PATCH 020/349] Update Windows compiler information --- README.rst | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/README.rst b/README.rst index baa825ad..456de2b9 100644 --- a/README.rst +++ b/README.rst @@ -33,11 +33,15 @@ Windows ^^^^^^^ When you can't use binary distribution, you need to install Visual Studio -or Windows SDK on Windows. (NOTE: Visual C++ Express 2010 doesn't support -amd64. Windows SDK is recommended way to build amd64 msgpack without any fee.) - +or Windows SDK on Windows. Without extension, using pure python implementation on CPython runs slowly. +For Python 2.7, [Microsoft Visual C++ Compiler for Python 2.7](https://www.microsoft.com/en-us/download/details.aspx?id=44266) +is recommended solution. + +For Python 3.5, [Microsoft Visual Studio 2015](https://www.visualstudio.com/en-us/products/vs-2015-product-editions.aspx) +Community Edition or Express Edition can be used to build extension module. + Notes ----- From 53fcd9b9df1c1f84719dfc9744217a1c3a025ef5 Mon Sep 17 00:00:00 2001 From: INADA Naoki Date: Sun, 8 Nov 2015 19:37:40 +0900 Subject: [PATCH 021/349] Update gitignore --- .gitignore | 3 +++ 1 file changed, 3 insertions(+) diff --git a/.gitignore b/.gitignore index 1bd68b49..70f5746e 100644 --- a/.gitignore +++ b/.gitignore @@ -9,3 +9,6 @@ dist/* msgpack/__version__.py msgpack/*.cpp *.egg-info +/venv +/tags +/docs/_build From e601ef4c23c6bfa64f86dcb91d28f370e77b17bc Mon Sep 17 00:00:00 2001 From: INADA Naoki Date: Mon, 9 Nov 2015 00:43:52 +0900 Subject: [PATCH 022/349] Remove `msgpack 2.0` from README There are no versio in spec. --- README.rst | 109 +++++++++++++++++++++++++++++------------------------ 1 file changed, 59 insertions(+), 50 deletions(-) diff --git a/README.rst b/README.rst index 456de2b9..30453bd6 100644 --- a/README.rst +++ b/README.rst @@ -42,54 +42,6 @@ is recommended solution. For Python 3.5, [Microsoft Visual Studio 2015](https://www.visualstudio.com/en-us/products/vs-2015-product-editions.aspx) Community Edition or Express Edition can be used to build extension module. -Notes ------ - -Note for msgpack 2.0 support -^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - -msgpack 2.0 adds two types: *bin* and *ext*. - -*raw* was bytes or string type like Python 2's ``str``. -To distinguish string and bytes, msgpack 2.0 adds *bin*. -It is non-string binary like Python 3's ``bytes``. - -To use *bin* type for packing ``bytes``, pass ``use_bin_type=True`` to -packer argument. - -.. code-block:: pycon - - >>> import msgpack - >>> packed = msgpack.packb([b'spam', u'egg'], use_bin_type=True) - >>> msgpack.unpackb(packed, encoding='utf-8') - ['spam', u'egg'] - -You shoud use it carefully. When you use ``use_bin_type=True``, packed -binary can be unpacked by unpackers supporting msgpack-2.0. - -To use *ext* type, pass ``msgpack.ExtType`` object to packer. - -.. code-block:: pycon - - >>> import msgpack - >>> packed = msgpack.packb(msgpack.ExtType(42, b'xyzzy')) - >>> msgpack.unpackb(packed) - ExtType(code=42, data='xyzzy') - -You can use it with ``default`` and ``ext_hook``. See below. - -Note for msgpack 0.2.x users -^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - -The msgpack 0.3 have some incompatible changes. - -The default value of ``use_list`` keyword argument is ``True`` from 0.3. -You should pass the argument explicitly for backward compatibility. - -`Unpacker.unpack()` and some unpack methods now raises `OutOfData` -instead of `StopIteration`. -`StopIteration` is used for iterator protocol only. - How to use ----------- @@ -184,7 +136,7 @@ key-value pairs. Extended types ^^^^^^^^^^^^^^^ -It is also possible to pack/unpack custom data types using the msgpack 2.0 feature. +It is also possible to pack/unpack custom data types using the **ext** type. .. code-block:: pycon @@ -238,6 +190,58 @@ callback function: unpacker.skip(bytestream.write) worker.send(bytestream.getvalue()) + +Notes +----- + +string and binary type +^^^^^^^^^^^^^^^^^^^^^^ + +In old days, msgpack doesn't distinguish string and binary types like Python 1. +The type for represent string and binary types is named **raw**. + +msgpack can distinguish string and binary type for now. But it is not like Python 2. +Python 2 added unicode string. But msgpack renamed **raw** to **str** and added **bin** type. +It is because keep compatibility with data created by old libs. **raw** was used for text more than binary. + +Currently, while msgpack-python supports new **bin** type, default setting doesn't use it and +decodes **raw** as `bytes` instead of `unicode` (`str` in Python 3). + +You can change this by using `use_bin_type=True` option in Packer and `encoding="utf-8"` option in Unpacker. + +.. code-block:: pycon + + >>> import msgpack + >>> packed = msgpack.packb([b'spam', u'egg'], use_bin_type=True) + >>> msgpack.unpackb(packed, encoding='utf-8') + ['spam', u'egg'] + +ext type +^^^^^^^^ + +To use **ext** type, pass ``msgpack.ExtType`` object to packer. + +.. code-block:: pycon + + >>> import msgpack + >>> packed = msgpack.packb(msgpack.ExtType(42, b'xyzzy')) + >>> msgpack.unpackb(packed) + ExtType(code=42, data='xyzzy') + +You can use it with ``default`` and ``ext_hook``. See below. + +Note for msgpack-python 0.2.x users +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +The msgpack-python 0.3 have some incompatible changes. + +The default value of ``use_list`` keyword argument is ``True`` from 0.3. +You should pass the argument explicitly for backward compatibility. + +`Unpacker.unpack()` and some unpack methods now raises `OutOfData` +instead of `StopIteration`. +`StopIteration` is used for iterator protocol only. + Note about performance ------------------------ @@ -259,12 +263,17 @@ Python's dict can't use list as key and MessagePack allows array for key of mapp Another way to unpacking such object is using ``object_pairs_hook``. +Development +------------ + Test ----- +^^^^ + MessagePack uses `pytest` for testing. Run test with following command: $ py.test + .. vim: filetype=rst From 8aadc5c380d0d135273729333ace91d3f689702d Mon Sep 17 00:00:00 2001 From: INADA Naoki Date: Mon, 9 Nov 2015 00:50:07 +0900 Subject: [PATCH 023/349] readme: Fix markup --- README.rst | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/README.rst b/README.rst index 30453bd6..d32ec1d4 100644 --- a/README.rst +++ b/README.rst @@ -36,10 +36,10 @@ When you can't use binary distribution, you need to install Visual Studio or Windows SDK on Windows. Without extension, using pure python implementation on CPython runs slowly. -For Python 2.7, [Microsoft Visual C++ Compiler for Python 2.7](https://www.microsoft.com/en-us/download/details.aspx?id=44266) +For Python 2.7, `Microsoft Visual C++ Compiler for Python 2.7 `_ is recommended solution. -For Python 3.5, [Microsoft Visual Studio 2015](https://www.visualstudio.com/en-us/products/vs-2015-product-editions.aspx) +For Python 3.5, `Microsoft Visual Studio 2015 `_ Community Edition or Express Edition can be used to build extension module. From 3cef27b69b250f9abf126c85578a821c2d21e72e Mon Sep 17 00:00:00 2001 From: INADA Naoki Date: Mon, 9 Nov 2015 00:54:06 +0900 Subject: [PATCH 024/349] Update ChangeLog --- ChangeLog.rst | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/ChangeLog.rst b/ChangeLog.rst index 34f4cd4f..31a64d98 100644 --- a/ChangeLog.rst +++ b/ChangeLog.rst @@ -1,3 +1,20 @@ +0.4.7 +===== +:release date: TBD + +Bugs fixed +---------- + +* Memory leak when unpack is failed + +Changes +------- + +* Reduce compiler warnings while building extension module +* unpack() now accepts ext_hook argument like Unpacker and unpackb() +* Update Cython version to 0.23.4 + + 0.4.6 ===== :release date: 2015-03-13 From de3c2b99f78d134c326bc375f19f54b7c851797a Mon Sep 17 00:00:00 2001 From: INADA Naoki Date: Mon, 9 Nov 2015 01:50:40 +0900 Subject: [PATCH 025/349] refactor C code fixes #137 --- msgpack/pack.h | 34 ---------------------------------- msgpack/unpack.h | 4 ++-- msgpack/unpack_template.h | 2 +- 3 files changed, 3 insertions(+), 37 deletions(-) diff --git a/msgpack/pack.h b/msgpack/pack.h index 971065cc..a75bdb04 100644 --- a/msgpack/pack.h +++ b/msgpack/pack.h @@ -39,40 +39,6 @@ typedef struct msgpack_packer { typedef struct Packer Packer; -static inline int msgpack_pack_int(msgpack_packer* pk, int d); -static inline int msgpack_pack_long(msgpack_packer* pk, long d); -static inline int msgpack_pack_long_long(msgpack_packer* pk, long long d); -static inline int msgpack_pack_unsigned_short(msgpack_packer* pk, unsigned short d); -static inline int msgpack_pack_unsigned_int(msgpack_packer* pk, unsigned int d); -static inline int msgpack_pack_unsigned_long(msgpack_packer* pk, unsigned long d); -//static inline int msgpack_pack_unsigned_long_long(msgpack_packer* pk, unsigned long long d); - -static inline int msgpack_pack_uint8(msgpack_packer* pk, uint8_t d); -static inline int msgpack_pack_uint16(msgpack_packer* pk, uint16_t d); -static inline int msgpack_pack_uint32(msgpack_packer* pk, uint32_t d); -static inline int msgpack_pack_uint64(msgpack_packer* pk, uint64_t d); -static inline int msgpack_pack_int8(msgpack_packer* pk, int8_t d); -static inline int msgpack_pack_int16(msgpack_packer* pk, int16_t d); -static inline int msgpack_pack_int32(msgpack_packer* pk, int32_t d); -static inline int msgpack_pack_int64(msgpack_packer* pk, int64_t d); - -static inline int msgpack_pack_float(msgpack_packer* pk, float d); -static inline int msgpack_pack_double(msgpack_packer* pk, double d); - -static inline int msgpack_pack_nil(msgpack_packer* pk); -static inline int msgpack_pack_true(msgpack_packer* pk); -static inline int msgpack_pack_false(msgpack_packer* pk); - -static inline int msgpack_pack_array(msgpack_packer* pk, unsigned int n); - -static inline int msgpack_pack_map(msgpack_packer* pk, unsigned int n); - -static inline int msgpack_pack_raw(msgpack_packer* pk, size_t l); -static inline int msgpack_pack_bin(msgpack_packer* pk, size_t l); -static inline int msgpack_pack_raw_body(msgpack_packer* pk, const void* b, size_t l); - -static inline int msgpack_pack_ext(msgpack_packer* pk, char typecode, size_t l); - static inline int msgpack_pack_write(msgpack_packer* pk, const char *data, size_t l) { char* buf = pk->buf; diff --git a/msgpack/unpack.h b/msgpack/unpack.h index 297bc93d..92f4f118 100644 --- a/msgpack/unpack.h +++ b/msgpack/unpack.h @@ -69,7 +69,7 @@ static inline int unpack_callback_uint64(unpack_user* u, uint64_t d, msgpack_unp if (d > LONG_MAX) { p = PyLong_FromUnsignedLongLong((unsigned PY_LONG_LONG)d); } else { - p = PyInt_FromSize_t((size_t)d); + p = PyInt_FromLong((long)d); } if (!p) return -1; @@ -100,7 +100,7 @@ static inline int unpack_callback_int64(unpack_user* u, int64_t d, msgpack_unpac { PyObject *p; if (d > LONG_MAX || d < LONG_MIN) { - p = PyLong_FromLongLong((unsigned PY_LONG_LONG)d); + p = PyLong_FromLongLong((PY_LONG_LONG)d); } else { p = PyInt_FromLong((long)d); } diff --git a/msgpack/unpack_template.h b/msgpack/unpack_template.h index 6b83d3e7..e1e08fec 100644 --- a/msgpack/unpack_template.h +++ b/msgpack/unpack_template.h @@ -70,7 +70,7 @@ static inline PyObject* unpack_data(unpack_context* ctx) return (ctx)->stack[0].obj; } -static inline PyObject* unpack_clear(unpack_context *ctx) +static inline void unpack_clear(unpack_context *ctx) { Py_CLEAR(ctx->stack[0].obj); } From f7d3715f2cedb09babbcdd1950ecc50f4d673fbe Mon Sep 17 00:00:00 2001 From: INADA Naoki Date: Mon, 9 Nov 2015 02:00:48 +0900 Subject: [PATCH 026/349] Add missing params to unpack() --- msgpack/_unpacker.pyx | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/msgpack/_unpacker.pyx b/msgpack/_unpacker.pyx index d359e570..36cb78e5 100644 --- a/msgpack/_unpacker.pyx +++ b/msgpack/_unpacker.pyx @@ -148,7 +148,12 @@ def unpackb(object packed, object object_hook=None, object list_hook=None, def unpack(object stream, object object_hook=None, object list_hook=None, bint use_list=1, encoding=None, unicode_errors="strict", - object_pairs_hook=None, ext_hook=ExtType + object_pairs_hook=None, ext_hook=ExtType, + Py_ssize_t max_str_len=2147483647, # 2**32-1 + Py_ssize_t max_bin_len=2147483647, + Py_ssize_t max_array_len=2147483647, + Py_ssize_t max_map_len=2147483647, + Py_ssize_t max_ext_len=2147483647): ): """ Unpack an object from `stream`. @@ -159,7 +164,12 @@ def unpack(object stream, object object_hook=None, object list_hook=None, """ return unpackb(stream.read(), use_list=use_list, object_hook=object_hook, object_pairs_hook=object_pairs_hook, list_hook=list_hook, - encoding=encoding, unicode_errors=unicode_errors, ext_hook=ext_hook + encoding=encoding, unicode_errors=unicode_errors, ext_hook=ext_hook, + max_str_len=max_str_len, + max_bin_len=max_bin_len, + max_array_len=max_array_len, + max_map_len=max_map_len, + max_ext_len=max_ext_len, ) From 7d900371c8f13fa64f06aaf336b6ae65c705bf2c Mon Sep 17 00:00:00 2001 From: INADA Naoki Date: Mon, 9 Nov 2015 02:09:39 +0900 Subject: [PATCH 027/349] Fix compile error --- msgpack/_unpacker.pyx | 1 - 1 file changed, 1 deletion(-) diff --git a/msgpack/_unpacker.pyx b/msgpack/_unpacker.pyx index 36cb78e5..aec3b7d1 100644 --- a/msgpack/_unpacker.pyx +++ b/msgpack/_unpacker.pyx @@ -154,7 +154,6 @@ def unpack(object stream, object object_hook=None, object list_hook=None, Py_ssize_t max_array_len=2147483647, Py_ssize_t max_map_len=2147483647, Py_ssize_t max_ext_len=2147483647): - ): """ Unpack an object from `stream`. From a1317b604f7eb83575c4a0221db028c1929e8026 Mon Sep 17 00:00:00 2001 From: INADA Naoki Date: Mon, 9 Nov 2015 02:34:17 +0900 Subject: [PATCH 028/349] refactor --- msgpack/_packer.pyx | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/msgpack/_packer.pyx b/msgpack/_packer.pyx index 7129208b..0a6513c0 100644 --- a/msgpack/_packer.pyx +++ b/msgpack/_packer.pyx @@ -135,7 +135,7 @@ cdef class Packer(object): ret = msgpack_pack_false(&self.pk) elif PyLong_Check(o): # PyInt_Check(long) is True for Python 3. - # Sow we should test long before int. + # So we should test long before int. try: if o > 0: ullval = o @@ -143,7 +143,7 @@ cdef class Packer(object): else: llval = o ret = msgpack_pack_long_long(&self.pk, llval) - except OverflowError, oe: + except OverflowError as oe: if not default_used and self._default is not None: o = self._default(o) default_used = True From 29266b024ec82c281fa63bf2b73622088a8c188c Mon Sep 17 00:00:00 2001 From: INADA Naoki Date: Mon, 9 Nov 2015 02:34:28 +0900 Subject: [PATCH 029/349] Update ChangeLog --- ChangeLog.rst | 1 + 1 file changed, 1 insertion(+) diff --git a/ChangeLog.rst b/ChangeLog.rst index 31a64d98..35535b4a 100644 --- a/ChangeLog.rst +++ b/ChangeLog.rst @@ -13,6 +13,7 @@ Changes * Reduce compiler warnings while building extension module * unpack() now accepts ext_hook argument like Unpacker and unpackb() * Update Cython version to 0.23.4 +* default function is called when integer overflow 0.4.6 From cbdf3c339a2c4c6b40a7e75dd96e457a024937f6 Mon Sep 17 00:00:00 2001 From: INADA Naoki Date: Tue, 10 Nov 2015 03:30:11 +0900 Subject: [PATCH 030/349] s/precise_mode/strict_types/ --- msgpack/_packer.pyx | 26 +++++++++++++------------- 1 file changed, 13 insertions(+), 13 deletions(-) diff --git a/msgpack/_packer.pyx b/msgpack/_packer.pyx index c015fcb2..50d19ff6 100644 --- a/msgpack/_packer.pyx +++ b/msgpack/_packer.pyx @@ -55,7 +55,7 @@ cdef class Packer(object): Convert unicode to bytes with this encoding. (default: 'utf-8') :param str unicode_errors: Error handler for encoding unicode. (default: 'strict') - :param bool precise_mode: + :param bool strict_types: If set to true, types will be checked to be exact. Derived classes from serializeable types will not be serialized and will be treated as unsupported type and forwarded to default. @@ -77,7 +77,7 @@ cdef class Packer(object): cdef object _berrors cdef char *encoding cdef char *unicode_errors - cdef bint precise_mode + cdef bint strict_types cdef bool use_float cdef bint autoreset @@ -91,11 +91,11 @@ cdef class Packer(object): def __init__(self, default=None, encoding='utf-8', unicode_errors='strict', use_single_float=False, bint autoreset=1, bint use_bin_type=0, - bint precise_mode=0): + bint strict_types=0): """ """ self.use_float = use_single_float - self.precise_mode = precise_mode + self.strict_types = strict_types self.autoreset = autoreset self.pk.use_bin_type = use_bin_type if default is not None: @@ -131,7 +131,7 @@ cdef class Packer(object): cdef dict d cdef size_t L cdef int default_used = 0 - cdef bint precise = self.precise_mode + cdef bint strict_types = self.strict_types if nest_limit < 0: raise PackValueError("recursion limit exceeded.") @@ -139,12 +139,12 @@ cdef class Packer(object): while True: if o is None: ret = msgpack_pack_nil(&self.pk) - elif PyBool_Check(o) if precise else isinstance(o, bool): + elif PyBool_Check(o) if strict_types else isinstance(o, bool): if o: ret = msgpack_pack_true(&self.pk) else: ret = msgpack_pack_false(&self.pk) - elif PyLong_CheckExact(o) if precise else PyLong_Check(o): + elif PyLong_CheckExact(o) if strict_types else PyLong_Check(o): # PyInt_Check(long) is True for Python 3. # So we should test long before int. try: @@ -161,17 +161,17 @@ cdef class Packer(object): continue else: raise - elif PyInt_CheckExact(o) if precise else PyInt_Check(o): + elif PyInt_CheckExact(o) if strict_types else PyInt_Check(o): longval = o ret = msgpack_pack_long(&self.pk, longval) - elif PyFloat_CheckExact(o) if precise else PyFloat_Check(o): + elif PyFloat_CheckExact(o) if strict_types else PyFloat_Check(o): if self.use_float: fval = o ret = msgpack_pack_float(&self.pk, fval) else: dval = o ret = msgpack_pack_double(&self.pk, dval) - elif PyBytes_CheckExact(o) if precise else PyBytes_Check(o): + elif PyBytes_CheckExact(o) if strict_types else PyBytes_Check(o): L = len(o) if L > (2**32)-1: raise ValueError("bytes is too large") @@ -179,7 +179,7 @@ cdef class Packer(object): ret = msgpack_pack_bin(&self.pk, L) if ret == 0: ret = msgpack_pack_raw_body(&self.pk, rawval, L) - elif PyUnicode_CheckExact(o) if precise else PyUnicode_Check(o): + elif PyUnicode_CheckExact(o) if strict_types else PyUnicode_Check(o): if not self.encoding: raise TypeError("Can't encode unicode string: no encoding is specified") o = PyUnicode_AsEncodedString(o, self.encoding, self.unicode_errors) @@ -202,7 +202,7 @@ cdef class Packer(object): if ret != 0: break ret = self._pack(v, nest_limit-1) if ret != 0: break - elif not precise and PyDict_Check(o): + elif not strict_types and PyDict_Check(o): L = len(o) if L > (2**32)-1: raise ValueError("dict is too large") @@ -222,7 +222,7 @@ cdef class Packer(object): raise ValueError("EXT data is too large") ret = msgpack_pack_ext(&self.pk, longval, L) ret = msgpack_pack_raw_body(&self.pk, rawval, L) - elif PyList_CheckExact(o) if precise else (PyTuple_Check(o) or PyList_Check(o)): + elif PyList_CheckExact(o) if strict_types else (PyTuple_Check(o) or PyList_Check(o)): L = len(o) if L > (2**32)-1: raise ValueError("list is too large") From 1032ef9bf2baef73b04f209181e42978ab4c71fe Mon Sep 17 00:00:00 2001 From: INADA Naoki Date: Tue, 10 Nov 2015 03:33:50 +0900 Subject: [PATCH 031/349] fallback unpacker: precise => strict --- msgpack/fallback.py | 25 +++++++++++++------------ 1 file changed, 13 insertions(+), 12 deletions(-) diff --git a/msgpack/fallback.py b/msgpack/fallback.py index 0e37f74c..cffecca6 100644 --- a/msgpack/fallback.py +++ b/msgpack/fallback.py @@ -69,6 +69,13 @@ def getvalue(self): DEFAULT_RECURSE_LIMIT = 511 +def _check_type_strict(obj, t, type=type, tuple=tuple): + if type(t) is tuple: + return type(obj) in t + else: + return type(obj) is t + + def unpack(stream, **kwargs): """ Unpack an object from `stream`. @@ -601,7 +608,7 @@ class Packer(object): Convert unicode to bytes with this encoding. (default: 'utf-8') :param str unicode_errors: Error handler for encoding unicode. (default: 'strict') - :param bool precise_mode: + :param bool strict_types: If set to true, types will be checked to be exact. Derived classes from serializeable types will not be serialized and will be treated as unsupported type and forwarded to default. @@ -618,9 +625,9 @@ class Packer(object): It also enable str8 type for unicode. """ def __init__(self, default=None, encoding='utf-8', unicode_errors='strict', - precise_mode=False, use_single_float=False, autoreset=True, + strict_types=False, use_single_float=False, autoreset=True, use_bin_type=False): - self._precise_mode = precise_mode + self._strict_types = strict_types self._use_float = use_single_float self._autoreset = autoreset self._use_bin_type = use_bin_type @@ -632,17 +639,11 @@ def __init__(self, default=None, encoding='utf-8', unicode_errors='strict', raise TypeError("default must be callable") self._default = default - def _check_precise(obj, t, type=type, tuple=tuple): - if type(t) is tuple: - return type(obj) in t - else: - return type(obj) is t - def _pack(self, obj, nest_limit=DEFAULT_RECURSE_LIMIT, - check=isinstance, check_precise=_check_precise): + check=isinstance, check_type_strict=_check_type_strict): default_used = False - if self._precise_mode: - check = check_precise + if self._strict_types: + check = check_type_strict list_types = list else: list_types = (list, tuple) From 9b673279d36468e3334b513b5e86d40cba4c4acc Mon Sep 17 00:00:00 2001 From: INADA Naoki Date: Tue, 10 Nov 2015 03:37:54 +0900 Subject: [PATCH 032/349] strict_types should be last argument --- msgpack/_packer.pyx | 14 +++++++------- msgpack/fallback.py | 18 +++++++++--------- 2 files changed, 16 insertions(+), 16 deletions(-) diff --git a/msgpack/_packer.pyx b/msgpack/_packer.pyx index 50d19ff6..c8d4fd1d 100644 --- a/msgpack/_packer.pyx +++ b/msgpack/_packer.pyx @@ -55,13 +55,6 @@ cdef class Packer(object): Convert unicode to bytes with this encoding. (default: 'utf-8') :param str unicode_errors: Error handler for encoding unicode. (default: 'strict') - :param bool strict_types: - If set to true, types will be checked to be exact. Derived classes - from serializeable types will not be serialized and will be - treated as unsupported type and forwarded to default. - Additionally tuples will not be serialized as lists. - This is useful when trying to implement accurate serialization - for python types. :param bool use_single_float: Use single precision float type for float. (default: False) :param bool autoreset: @@ -70,6 +63,13 @@ cdef class Packer(object): :param bool use_bin_type: Use bin type introduced in msgpack spec 2.0 for bytes. It also enable str8 type for unicode. + :param bool strict_types: + If set to true, types will be checked to be exact. Derived classes + from serializeable types will not be serialized and will be + treated as unsupported type and forwarded to default. + Additionally tuples will not be serialized as lists. + This is useful when trying to implement accurate serialization + for python types. """ cdef msgpack_packer pk cdef object _default diff --git a/msgpack/fallback.py b/msgpack/fallback.py index cffecca6..11280edd 100644 --- a/msgpack/fallback.py +++ b/msgpack/fallback.py @@ -608,13 +608,6 @@ class Packer(object): Convert unicode to bytes with this encoding. (default: 'utf-8') :param str unicode_errors: Error handler for encoding unicode. (default: 'strict') - :param bool strict_types: - If set to true, types will be checked to be exact. Derived classes - from serializeable types will not be serialized and will be - treated as unsupported type and forwarded to default. - Additionally tuples will not be serialized as lists. - This is useful when trying to implement accurate serialization - for python types. :param bool use_single_float: Use single precision float type for float. (default: False) :param bool autoreset: @@ -623,10 +616,17 @@ class Packer(object): :param bool use_bin_type: Use bin type introduced in msgpack spec 2.0 for bytes. It also enable str8 type for unicode. + :param bool strict_types: + If set to true, types will be checked to be exact. Derived classes + from serializeable types will not be serialized and will be + treated as unsupported type and forwarded to default. + Additionally tuples will not be serialized as lists. + This is useful when trying to implement accurate serialization + for python types. """ def __init__(self, default=None, encoding='utf-8', unicode_errors='strict', - strict_types=False, use_single_float=False, autoreset=True, - use_bin_type=False): + use_single_float=False, autoreset=True, use_bin_type=False, + strict_types=False): self._strict_types = strict_types self._use_float = use_single_float self._autoreset = autoreset From 628c5191873148a8372bae4ee99454ad13b7b492 Mon Sep 17 00:00:00 2001 From: INADA Naoki Date: Tue, 10 Nov 2015 03:41:09 +0900 Subject: [PATCH 033/349] strict type check for ext type --- msgpack/_packer.pyx | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/msgpack/_packer.pyx b/msgpack/_packer.pyx index c8d4fd1d..7c1e53d2 100644 --- a/msgpack/_packer.pyx +++ b/msgpack/_packer.pyx @@ -213,7 +213,7 @@ cdef class Packer(object): if ret != 0: break ret = self._pack(v, nest_limit-1) if ret != 0: break - elif isinstance(o, ExtType): + elif type(o) is ExtType if strict_types else isinstance(o, ExtType): # This should be before Tuple because ExtType is namedtuple. longval = o.code rawval = o.data From 6f38bf7dd4c45523c3b0e9e8af0e69d5af35708b Mon Sep 17 00:00:00 2001 From: Omer Katz Date: Tue, 17 Nov 2015 15:31:36 +0200 Subject: [PATCH 034/349] Added python 3.5 to tox.ini. --- tox.ini | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tox.ini b/tox.ini index 15feb513..56ded491 100644 --- a/tox.ini +++ b/tox.ini @@ -1,5 +1,5 @@ [tox] -envlist = {py26,py27,py32,py33,py34}-{c,pure},{pypy,pypy3}-pure,py27-x86,py34-x86 +envlist = {py26,py27,py32,py33,py34,py35}-{c,pure},{pypy,pypy3}-pure,py27-x86,py34-x86 [variants:pure] setenv= From 4d9684db0a7b2187f54f7a81a4ea64e981fa11ca Mon Sep 17 00:00:00 2001 From: Omer Katz Date: Tue, 17 Nov 2015 15:32:34 +0200 Subject: [PATCH 035/349] Added Python 3.5 to the build matrix. --- .travis.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.travis.yml b/.travis.yml index 76951843..22f19dce 100644 --- a/.travis.yml +++ b/.travis.yml @@ -10,9 +10,9 @@ branches: env: - TOXENV=py26-c,py27-c - - TOXENV=py32-c,py33-c,py34-c + - TOXENV=py32-c,py33-c,py34-c,py35-c - TOXENV=py26-pure,py27-pure - - TOXENV=py32-pure,py33-pure,py34-pure + - TOXENV=py32-pure,py33-pure,py34-pure,py35-pure - TOXENV=pypy-pure,pypy3-pure install: From 81177caff7ddc9b193da9fcfd27fcd6807da7fe1 Mon Sep 17 00:00:00 2001 From: Omer Katz Date: Tue, 17 Nov 2015 16:57:25 +0200 Subject: [PATCH 036/349] Run the build with 3.5 since it's still not available by default in travis. --- .travis.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.travis.yml b/.travis.yml index 22f19dce..7a80cdf5 100644 --- a/.travis.yml +++ b/.travis.yml @@ -2,7 +2,7 @@ sudo: false cache: pip language: python python: - - 2.7 + - 3.5 branches: only: From e4aa43d76935ac44aecccecd30fb51d00854ffac Mon Sep 17 00:00:00 2001 From: Omer Katz Date: Tue, 17 Nov 2015 17:08:04 +0200 Subject: [PATCH 037/349] Travis will now cache dependencies despite having a custom install step. --- .travis.yml | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/.travis.yml b/.travis.yml index 76951843..bc0efba0 100644 --- a/.travis.yml +++ b/.travis.yml @@ -1,5 +1,7 @@ sudo: false -cache: pip +cache: + directories: + - $HOME/.cache/pip language: python python: - 2.7 From 9c6584ee101d300fdf0621cac44c3dcb9df817ba Mon Sep 17 00:00:00 2001 From: Thomas Waldmann Date: Wed, 9 Dec 2015 13:53:42 +0100 Subject: [PATCH 038/349] fix typos --- msgpack/_unpacker.pyx | 2 +- msgpack/fallback.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/msgpack/_unpacker.pyx b/msgpack/_unpacker.pyx index d53f7241..b98957ef 100644 --- a/msgpack/_unpacker.pyx +++ b/msgpack/_unpacker.pyx @@ -199,7 +199,7 @@ cdef class Unpacker(object): :param int max_buffer_size: Limits size of data waiting unpacked. 0 means system's INT_MAX (default). Raises `BufferFull` exception when it is insufficient. - You shoud set this parameter when unpacking data from untrasted source. + You shoud set this parameter when unpacking data from untrusted source. :param int max_str_len: Limits max length of str. (default: 2**31-1) diff --git a/msgpack/fallback.py b/msgpack/fallback.py index 235c2017..5b5085e3 100644 --- a/msgpack/fallback.py +++ b/msgpack/fallback.py @@ -138,7 +138,7 @@ class Unpacker(object): :param int max_buffer_size: Limits size of data waiting unpacked. 0 means system's INT_MAX (default). Raises `BufferFull` exception when it is insufficient. - You shoud set this parameter when unpacking data from untrasted source. + You shoud set this parameter when unpacking data from untrusted source. :param int max_str_len: Limits max length of str. (default: 2**31-1) From 83424bd7b3c4a417fe1dca4523adc177cee1bef9 Mon Sep 17 00:00:00 2001 From: Sadayuki Furuhashi Date: Mon, 11 Jan 2016 13:57:33 -0800 Subject: [PATCH 039/349] Fix wrong 'dict is too large' on unicode string --- msgpack/_packer.pyx | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/msgpack/_packer.pyx b/msgpack/_packer.pyx index 0a6513c0..6392655e 100644 --- a/msgpack/_packer.pyx +++ b/msgpack/_packer.pyx @@ -174,11 +174,11 @@ cdef class Packer(object): o = PyUnicode_AsEncodedString(o, self.encoding, self.unicode_errors) L = len(o) if L > (2**32)-1: - raise ValueError("dict is too large") + raise ValueError("unicode string is too large") rawval = o - ret = msgpack_pack_raw(&self.pk, len(o)) + ret = msgpack_pack_raw(&self.pk, L) if ret == 0: - ret = msgpack_pack_raw_body(&self.pk, rawval, len(o)) + ret = msgpack_pack_raw_body(&self.pk, rawval, L) elif PyDict_CheckExact(o): d = o L = len(d) From 1f8240eaf65b28e93621a8e35f1078d4292047f1 Mon Sep 17 00:00:00 2001 From: INADA Naoki Date: Mon, 25 Jan 2016 01:10:50 +0900 Subject: [PATCH 040/349] 0.4.7 --- msgpack/_version.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/msgpack/_version.py b/msgpack/_version.py index 2c1c96c0..37c172db 100644 --- a/msgpack/_version.py +++ b/msgpack/_version.py @@ -1 +1 @@ -version = (0, 4, 6) +version = (0, 4, 7) From 3a8bb070f76af211356fef9e0395e1429e81a9b2 Mon Sep 17 00:00:00 2001 From: INADA Naoki Date: Mon, 25 Jan 2016 01:12:56 +0900 Subject: [PATCH 041/349] Update ChangeLog --- ChangeLog.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ChangeLog.rst b/ChangeLog.rst index 35535b4a..f20bb75f 100644 --- a/ChangeLog.rst +++ b/ChangeLog.rst @@ -1,6 +1,6 @@ 0.4.7 ===== -:release date: TBD +:release date: 2016-01-25 Bugs fixed ---------- From 005739388d4a611979ae029fab2a1982a366a285 Mon Sep 17 00:00:00 2001 From: INADA Naoki Date: Mon, 25 Jan 2016 01:15:51 +0900 Subject: [PATCH 042/349] Drop Python 2.6, 3.2 support --- .travis.yml | 6 ++---- tox.ini | 3 +-- 2 files changed, 3 insertions(+), 6 deletions(-) diff --git a/.travis.yml b/.travis.yml index 2ba2caa7..eced3536 100644 --- a/.travis.yml +++ b/.travis.yml @@ -11,10 +11,8 @@ branches: - master env: - - TOXENV=py26-c,py27-c - - TOXENV=py32-c,py33-c,py34-c,py35-c - - TOXENV=py26-pure,py27-pure - - TOXENV=py32-pure,py33-pure,py34-pure,py35-pure + - TOXENV=py27-c,py33-c,py34-c,py35-c + - TOXENV=py27-pure,py33-pure,py34-pure,py35-pure - TOXENV=pypy-pure,pypy3-pure install: diff --git a/tox.ini b/tox.ini index 56ded491..b6e7a7f8 100644 --- a/tox.ini +++ b/tox.ini @@ -1,5 +1,5 @@ [tox] -envlist = {py26,py27,py32,py33,py34,py35}-{c,pure},{pypy,pypy3}-pure,py27-x86,py34-x86 +envlist = {py27,py33,py34,py35}-{c,pure},{pypy,pypy3}-pure,py27-x86,py34-x86 [variants:pure] setenv= @@ -36,4 +36,3 @@ commands= python -c 'import sys; print(hex(sys.maxsize))' python -c 'from msgpack import _packer, _unpacker' py.test - From a779b79b47529f84cd71593f284788d939226d66 Mon Sep 17 00:00:00 2001 From: INADA Naoki Date: Mon, 25 Jan 2016 02:18:25 +0900 Subject: [PATCH 043/349] Add test for strict_types option --- test/test_stricttype.py | 15 +++++++++++++++ 1 file changed, 15 insertions(+) create mode 100644 test/test_stricttype.py diff --git a/test/test_stricttype.py b/test/test_stricttype.py new file mode 100644 index 00000000..a20b5eb7 --- /dev/null +++ b/test/test_stricttype.py @@ -0,0 +1,15 @@ +# coding: utf-8 + +from collections import namedtuple +from msgpack import packb, unpackb + + +def test_namedtuple(): + T = namedtuple('T', "foo bar") + def default(o): + if isinstance(o, T): + return dict(o._asdict()) + raise TypeError('Unsupported type %s' % (type(o),)) + packed = packb(T(1, 42), strict_types=True, use_bin_type=True, default=default) + unpacked = unpackb(packed, encoding='utf-8') + assert unpacked == {'foo': 1, 'bar': 42} From 31adc5a3c09a5f3506db192e1fb8b7ca4b72d974 Mon Sep 17 00:00:00 2001 From: folz Date: Thu, 12 Nov 2015 11:49:19 +0100 Subject: [PATCH 044/349] Support packing memoryview objects --- msgpack/_packer.pyx | 17 +++++++++++++++++ msgpack/fallback.py | 6 ++++-- test/test_memoryview.py | 12 ++++++++++++ 3 files changed, 33 insertions(+), 2 deletions(-) create mode 100644 test/test_memoryview.py diff --git a/msgpack/_packer.pyx b/msgpack/_packer.pyx index c3ef1a42..b19d462b 100644 --- a/msgpack/_packer.pyx +++ b/msgpack/_packer.pyx @@ -10,6 +10,11 @@ from msgpack.exceptions import PackValueError from msgpack import ExtType +cdef extern from "Python.h": + + int PyMemoryView_Check(object obj) + + cdef extern from "pack.h": struct msgpack_packer: char* buf @@ -132,6 +137,7 @@ cdef class Packer(object): cdef size_t L cdef int default_used = 0 cdef bint strict_types = self.strict_types + cdef Py_buffer view if nest_limit < 0: raise PackValueError("recursion limit exceeded.") @@ -231,6 +237,17 @@ cdef class Packer(object): for v in o: ret = self._pack(v, nest_limit-1) if ret != 0: break + elif PyMemoryView_Check(o): + if PyObject_GetBuffer(o, &view, PyBUF_SIMPLE) != 0: + raise ValueError("could not get buffer for memoryview") + L = view.len + if L > (2**32)-1: + PyBuffer_Release(&view); + raise ValueError("memoryview is too large") + ret = msgpack_pack_bin(&self.pk, L) + if ret == 0: + ret = msgpack_pack_raw_body(&self.pk, view.buf, L) + PyBuffer_Release(&view); elif not default_used and self._default: o = self._default(o) default_used = 1 diff --git a/msgpack/fallback.py b/msgpack/fallback.py index 40c54a80..348e0179 100644 --- a/msgpack/fallback.py +++ b/msgpack/fallback.py @@ -36,6 +36,8 @@ def __init__(self, s=b''): else: self.builder = StringBuilder() def write(self, s): + if isinstance(s, memoryview): + s = s.tobytes() self.builder.append(s) def getvalue(self): return self.builder.build() @@ -682,7 +684,7 @@ def _pack(self, obj, nest_limit=DEFAULT_RECURSE_LIMIT, default_used = True continue raise PackValueError("Integer value out of range") - if self._use_bin_type and check(obj, bytes): + if self._use_bin_type and check(obj, (bytes, memoryview)): n = len(obj) if n <= 0xff: self._buffer.write(struct.pack('>BB', 0xc4, n)) @@ -693,7 +695,7 @@ def _pack(self, obj, nest_limit=DEFAULT_RECURSE_LIMIT, else: raise PackValueError("Bytes is too large") return self._buffer.write(obj) - if check(obj, (Unicode, bytes)): + if check(obj, (Unicode, bytes, memoryview)): if check(obj, Unicode): if self._encoding is None: raise TypeError( diff --git a/test/test_memoryview.py b/test/test_memoryview.py new file mode 100644 index 00000000..aed50696 --- /dev/null +++ b/test/test_memoryview.py @@ -0,0 +1,12 @@ +#!/usr/bin/env python +# coding: utf-8 + + +from msgpack import packb, unpackb + + +def test_pack_memoryview(): + data = bytearray(range(256)) + view = memoryview(data) + unpacked = unpackb(packb(view)) + assert data == unpacked From 7d2d46effce37f9fbf394fac74d380aaa7c95f02 Mon Sep 17 00:00:00 2001 From: palaviv Date: Fri, 12 Feb 2016 11:00:39 +0200 Subject: [PATCH 045/349] msgpack pack and unpack throws only exception that inherit from MsgpackBaseException. cython and fallback throws same exceptions --- msgpack/_packer.pyx | 14 ++++++------ msgpack/exceptions.py | 12 ++++++++-- msgpack/fallback.py | 51 ++++++++++++++++++++++--------------------- test/test_limits.py | 46 +++++++++++++++++++++++--------------- 4 files changed, 71 insertions(+), 52 deletions(-) diff --git a/msgpack/_packer.pyx b/msgpack/_packer.pyx index b19d462b..82168130 100644 --- a/msgpack/_packer.pyx +++ b/msgpack/_packer.pyx @@ -6,7 +6,7 @@ from libc.stdlib cimport * from libc.string cimport * from libc.limits cimport * -from msgpack.exceptions import PackValueError +from msgpack.exceptions import PackValueError, PackOverflowError from msgpack import ExtType @@ -166,7 +166,7 @@ cdef class Packer(object): default_used = True continue else: - raise + raise PackOverflowError("Integer value out of range") elif PyInt_CheckExact(o) if strict_types else PyInt_Check(o): longval = o ret = msgpack_pack_long(&self.pk, longval) @@ -180,7 +180,7 @@ cdef class Packer(object): elif PyBytes_CheckExact(o) if strict_types else PyBytes_Check(o): L = len(o) if L > (2**32)-1: - raise ValueError("bytes is too large") + raise PackValueError("bytes is too large") rawval = o ret = msgpack_pack_bin(&self.pk, L) if ret == 0: @@ -191,7 +191,7 @@ cdef class Packer(object): o = PyUnicode_AsEncodedString(o, self.encoding, self.unicode_errors) L = len(o) if L > (2**32)-1: - raise ValueError("unicode string is too large") + raise PackValueError("unicode string is too large") rawval = o ret = msgpack_pack_raw(&self.pk, L) if ret == 0: @@ -211,7 +211,7 @@ cdef class Packer(object): elif not strict_types and PyDict_Check(o): L = len(o) if L > (2**32)-1: - raise ValueError("dict is too large") + raise PackValueError("dict is too large") ret = msgpack_pack_map(&self.pk, L) if ret == 0: for k, v in o.items(): @@ -225,13 +225,13 @@ cdef class Packer(object): rawval = o.data L = len(o.data) if L > (2**32)-1: - raise ValueError("EXT data is too large") + raise PackValueError("EXT data is too large") ret = msgpack_pack_ext(&self.pk, longval, L) ret = msgpack_pack_raw_body(&self.pk, rawval, L) elif PyList_CheckExact(o) if strict_types else (PyTuple_Check(o) or PyList_Check(o)): L = len(o) if L > (2**32)-1: - raise ValueError("list is too large") + raise PackValueError("list is too large") ret = msgpack_pack_array(&self.pk, L) if ret == 0: for v in o: diff --git a/msgpack/exceptions.py b/msgpack/exceptions.py index f7678f13..e0d5b5f5 100644 --- a/msgpack/exceptions.py +++ b/msgpack/exceptions.py @@ -1,4 +1,8 @@ -class UnpackException(Exception): +class MsgpackBaseException(Exception): + pass + + +class UnpackException(MsgpackBaseException): pass @@ -22,8 +26,12 @@ def __init__(self, unpacked, extra): def __str__(self): return "unpack(b) received extra data." -class PackException(Exception): +class PackException(MsgpackBaseException): pass class PackValueError(PackException, ValueError): pass + + +class PackOverflowError(PackValueError, OverflowError): + pass diff --git a/msgpack/fallback.py b/msgpack/fallback.py index 348e0179..d8c5d738 100644 --- a/msgpack/fallback.py +++ b/msgpack/fallback.py @@ -51,6 +51,7 @@ def getvalue(self): OutOfData, UnpackValueError, PackValueError, + PackOverflowError, ExtraData) from msgpack import ExtType @@ -363,17 +364,17 @@ def _read_header(self, execute=EX_CONSTRUCT, write_bytes=None): obj = self._fb_read(n, write_bytes) typ = TYPE_RAW if n > self._max_str_len: - raise ValueError("%s exceeds max_str_len(%s)", n, self._max_str_len) + raise UnpackValueError("%s exceeds max_str_len(%s)", n, self._max_str_len) elif b & 0b11110000 == 0b10010000: n = b & 0b00001111 typ = TYPE_ARRAY if n > self._max_array_len: - raise ValueError("%s exceeds max_array_len(%s)", n, self._max_array_len) + raise UnpackValueError("%s exceeds max_array_len(%s)", n, self._max_array_len) elif b & 0b11110000 == 0b10000000: n = b & 0b00001111 typ = TYPE_MAP if n > self._max_map_len: - raise ValueError("%s exceeds max_map_len(%s)", n, self._max_map_len) + raise UnpackValueError("%s exceeds max_map_len(%s)", n, self._max_map_len) elif b == 0xc0: obj = None elif b == 0xc2: @@ -384,37 +385,37 @@ def _read_header(self, execute=EX_CONSTRUCT, write_bytes=None): typ = TYPE_BIN n = struct.unpack("B", self._fb_read(1, write_bytes))[0] if n > self._max_bin_len: - raise ValueError("%s exceeds max_bin_len(%s)" % (n, self._max_bin_len)) + raise UnpackValueError("%s exceeds max_bin_len(%s)" % (n, self._max_bin_len)) obj = self._fb_read(n, write_bytes) elif b == 0xc5: typ = TYPE_BIN n = struct.unpack(">H", self._fb_read(2, write_bytes))[0] if n > self._max_bin_len: - raise ValueError("%s exceeds max_bin_len(%s)" % (n, self._max_bin_len)) + raise UnpackValueError("%s exceeds max_bin_len(%s)" % (n, self._max_bin_len)) obj = self._fb_read(n, write_bytes) elif b == 0xc6: typ = TYPE_BIN n = struct.unpack(">I", self._fb_read(4, write_bytes))[0] if n > self._max_bin_len: - raise ValueError("%s exceeds max_bin_len(%s)" % (n, self._max_bin_len)) + raise UnpackValueError("%s exceeds max_bin_len(%s)" % (n, self._max_bin_len)) obj = self._fb_read(n, write_bytes) elif b == 0xc7: # ext 8 typ = TYPE_EXT L, n = struct.unpack('Bb', self._fb_read(2, write_bytes)) if L > self._max_ext_len: - raise ValueError("%s exceeds max_ext_len(%s)" % (L, self._max_ext_len)) + raise UnpackValueError("%s exceeds max_ext_len(%s)" % (L, self._max_ext_len)) obj = self._fb_read(L, write_bytes) elif b == 0xc8: # ext 16 typ = TYPE_EXT L, n = struct.unpack('>Hb', self._fb_read(3, write_bytes)) if L > self._max_ext_len: - raise ValueError("%s exceeds max_ext_len(%s)" % (L, self._max_ext_len)) + raise UnpackValueError("%s exceeds max_ext_len(%s)" % (L, self._max_ext_len)) obj = self._fb_read(L, write_bytes) elif b == 0xc9: # ext 32 typ = TYPE_EXT L, n = struct.unpack('>Ib', self._fb_read(5, write_bytes)) if L > self._max_ext_len: - raise ValueError("%s exceeds max_ext_len(%s)" % (L, self._max_ext_len)) + raise UnpackValueError("%s exceeds max_ext_len(%s)" % (L, self._max_ext_len)) obj = self._fb_read(L, write_bytes) elif b == 0xca: obj = struct.unpack(">f", self._fb_read(4, write_bytes))[0] @@ -439,65 +440,65 @@ def _read_header(self, execute=EX_CONSTRUCT, write_bytes=None): elif b == 0xd4: # fixext 1 typ = TYPE_EXT if self._max_ext_len < 1: - raise ValueError("%s exceeds max_ext_len(%s)" % (1, self._max_ext_len)) + raise UnpackValueError("%s exceeds max_ext_len(%s)" % (1, self._max_ext_len)) n, obj = struct.unpack('b1s', self._fb_read(2, write_bytes)) elif b == 0xd5: # fixext 2 typ = TYPE_EXT if self._max_ext_len < 2: - raise ValueError("%s exceeds max_ext_len(%s)" % (2, self._max_ext_len)) + raise UnpackValueError("%s exceeds max_ext_len(%s)" % (2, self._max_ext_len)) n, obj = struct.unpack('b2s', self._fb_read(3, write_bytes)) elif b == 0xd6: # fixext 4 typ = TYPE_EXT if self._max_ext_len < 4: - raise ValueError("%s exceeds max_ext_len(%s)" % (4, self._max_ext_len)) + raise UnpackValueError("%s exceeds max_ext_len(%s)" % (4, self._max_ext_len)) n, obj = struct.unpack('b4s', self._fb_read(5, write_bytes)) elif b == 0xd7: # fixext 8 typ = TYPE_EXT if self._max_ext_len < 8: - raise ValueError("%s exceeds max_ext_len(%s)" % (8, self._max_ext_len)) + raise UnpackValueError("%s exceeds max_ext_len(%s)" % (8, self._max_ext_len)) n, obj = struct.unpack('b8s', self._fb_read(9, write_bytes)) elif b == 0xd8: # fixext 16 typ = TYPE_EXT if self._max_ext_len < 16: - raise ValueError("%s exceeds max_ext_len(%s)" % (16, self._max_ext_len)) + raise UnpackValueError("%s exceeds max_ext_len(%s)" % (16, self._max_ext_len)) n, obj = struct.unpack('b16s', self._fb_read(17, write_bytes)) elif b == 0xd9: typ = TYPE_RAW n = struct.unpack("B", self._fb_read(1, write_bytes))[0] if n > self._max_str_len: - raise ValueError("%s exceeds max_str_len(%s)", n, self._max_str_len) + raise UnpackValueError("%s exceeds max_str_len(%s)", n, self._max_str_len) obj = self._fb_read(n, write_bytes) elif b == 0xda: typ = TYPE_RAW n = struct.unpack(">H", self._fb_read(2, write_bytes))[0] if n > self._max_str_len: - raise ValueError("%s exceeds max_str_len(%s)", n, self._max_str_len) + raise UnpackValueError("%s exceeds max_str_len(%s)", n, self._max_str_len) obj = self._fb_read(n, write_bytes) elif b == 0xdb: typ = TYPE_RAW n = struct.unpack(">I", self._fb_read(4, write_bytes))[0] if n > self._max_str_len: - raise ValueError("%s exceeds max_str_len(%s)", n, self._max_str_len) + raise UnpackValueError("%s exceeds max_str_len(%s)", n, self._max_str_len) obj = self._fb_read(n, write_bytes) elif b == 0xdc: n = struct.unpack(">H", self._fb_read(2, write_bytes))[0] if n > self._max_array_len: - raise ValueError("%s exceeds max_array_len(%s)", n, self._max_array_len) + raise UnpackValueError("%s exceeds max_array_len(%s)", n, self._max_array_len) typ = TYPE_ARRAY elif b == 0xdd: n = struct.unpack(">I", self._fb_read(4, write_bytes))[0] if n > self._max_array_len: - raise ValueError("%s exceeds max_array_len(%s)", n, self._max_array_len) + raise UnpackValueError("%s exceeds max_array_len(%s)", n, self._max_array_len) typ = TYPE_ARRAY elif b == 0xde: n = struct.unpack(">H", self._fb_read(2, write_bytes))[0] if n > self._max_map_len: - raise ValueError("%s exceeds max_map_len(%s)", n, self._max_map_len) + raise UnpackValueError("%s exceeds max_map_len(%s)", n, self._max_map_len) typ = TYPE_MAP elif b == 0xdf: n = struct.unpack(">I", self._fb_read(4, write_bytes))[0] if n > self._max_map_len: - raise ValueError("%s exceeds max_map_len(%s)", n, self._max_map_len) + raise UnpackValueError("%s exceeds max_map_len(%s)", n, self._max_map_len) typ = TYPE_MAP else: raise UnpackValueError("Unknown header: 0x%x" % b) @@ -683,7 +684,7 @@ def _pack(self, obj, nest_limit=DEFAULT_RECURSE_LIMIT, obj = self._default(obj) default_used = True continue - raise PackValueError("Integer value out of range") + raise PackOverflowError("Integer value out of range") if self._use_bin_type and check(obj, (bytes, memoryview)): n = len(obj) if n <= 0xff: @@ -778,7 +779,7 @@ def pack_map_pairs(self, pairs): def pack_array_header(self, n): if n >= 2**32: - raise ValueError + raise PackValueError self._fb_pack_array_header(n) ret = self._buffer.getvalue() if self._autoreset: @@ -789,7 +790,7 @@ def pack_array_header(self, n): def pack_map_header(self, n): if n >= 2**32: - raise ValueError + raise PackValueError self._fb_pack_map_header(n) ret = self._buffer.getvalue() if self._autoreset: @@ -807,7 +808,7 @@ def pack_ext_type(self, typecode, data): raise TypeError("data must have bytes type") L = len(data) if L > 0xffffffff: - raise ValueError("Too large data") + raise PackValueError("Too large data") if L == 1: self._buffer.write(b'\xd4') elif L == 2: diff --git a/test/test_limits.py b/test/test_limits.py index 3c1cf2ab..34acf7c8 100644 --- a/test/test_limits.py +++ b/test/test_limits.py @@ -3,36 +3,42 @@ from __future__ import absolute_import, division, print_function, unicode_literals import pytest -from msgpack import packb, unpackb, Packer, Unpacker, ExtType +from msgpack import packb, unpackb, Packer, Unpacker, ExtType, PackException, PackOverflowError, PackValueError +from msgpack import UnpackValueError, UnpackException, MsgpackBaseException -def test_integer(): +@pytest.mark.parametrize("expected_exception", [OverflowError, ValueError, PackOverflowError, + PackException, PackValueError, MsgpackBaseException]) +def test_integer(expected_exception): x = -(2 ** 63) assert unpackb(packb(x)) == x - with pytest.raises((OverflowError, ValueError)): + with pytest.raises(expected_exception): packb(x-1) x = 2 ** 64 - 1 assert unpackb(packb(x)) == x - with pytest.raises((OverflowError, ValueError)): + with pytest.raises(expected_exception): packb(x+1) -def test_array_header(): +@pytest.mark.parametrize("expected_exception", [ValueError, PackException, PackValueError, MsgpackBaseException]) +def test_array_header(expected_exception): packer = Packer() packer.pack_array_header(2**32-1) - with pytest.raises((OverflowError, ValueError)): + with pytest.raises(expected_exception): packer.pack_array_header(2**32) -def test_map_header(): +@pytest.mark.parametrize("expected_exception", [ValueError, PackException, PackValueError, MsgpackBaseException]) +def test_map_header(expected_exception): packer = Packer() packer.pack_map_header(2**32-1) - with pytest.raises((OverflowError, ValueError)): + with pytest.raises(expected_exception): packer.pack_array_header(2**32) -def test_max_str_len(): +@pytest.mark.parametrize("expected_exception", [ValueError, UnpackValueError, UnpackException, MsgpackBaseException]) +def test_max_str_len(expected_exception): d = 'x' * 3 packed = packb(d) @@ -41,12 +47,13 @@ def test_max_str_len(): assert unpacker.unpack() == d unpacker = Unpacker(max_str_len=2, encoding='utf-8') - with pytest.raises(ValueError): + with pytest.raises(expected_exception): unpacker.feed(packed) unpacker.unpack() -def test_max_bin_len(): +@pytest.mark.parametrize("expected_exception", [ValueError, UnpackValueError, UnpackException, MsgpackBaseException]) +def test_max_bin_len(expected_exception): d = b'x' * 3 packed = packb(d, use_bin_type=True) @@ -55,12 +62,13 @@ def test_max_bin_len(): assert unpacker.unpack() == d unpacker = Unpacker(max_bin_len=2) - with pytest.raises(ValueError): + with pytest.raises(expected_exception): unpacker.feed(packed) unpacker.unpack() -def test_max_array_len(): +@pytest.mark.parametrize("expected_exception", [ValueError, UnpackValueError, UnpackException, MsgpackBaseException]) +def test_max_array_len(expected_exception): d = [1,2,3] packed = packb(d) @@ -69,12 +77,13 @@ def test_max_array_len(): assert unpacker.unpack() == d unpacker = Unpacker(max_array_len=2) - with pytest.raises(ValueError): + with pytest.raises(expected_exception): unpacker.feed(packed) unpacker.unpack() -def test_max_map_len(): +@pytest.mark.parametrize("expected_exception", [ValueError, UnpackValueError, UnpackException, MsgpackBaseException]) +def test_max_map_len(expected_exception): d = {1: 2, 3: 4, 5: 6} packed = packb(d) @@ -83,12 +92,13 @@ def test_max_map_len(): assert unpacker.unpack() == d unpacker = Unpacker(max_map_len=2) - with pytest.raises(ValueError): + with pytest.raises(expected_exception): unpacker.feed(packed) unpacker.unpack() -def test_max_ext_len(): +@pytest.mark.parametrize("expected_exception", [ValueError, UnpackValueError, UnpackException, MsgpackBaseException]) +def test_max_ext_len(expected_exception): d = ExtType(42, b"abc") packed = packb(d) @@ -97,7 +107,7 @@ def test_max_ext_len(): assert unpacker.unpack() == d unpacker = Unpacker(max_ext_len=2) - with pytest.raises(ValueError): + with pytest.raises(expected_exception): unpacker.feed(packed) unpacker.unpack() From d44063119bf11fa5c4b559f9e246df60058bfe31 Mon Sep 17 00:00:00 2001 From: palaviv Date: Fri, 12 Feb 2016 15:36:48 +0200 Subject: [PATCH 046/349] changed more ValueErrors to PackValueError --- msgpack/_packer.pyx | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/msgpack/_packer.pyx b/msgpack/_packer.pyx index 82168130..e9238950 100644 --- a/msgpack/_packer.pyx +++ b/msgpack/_packer.pyx @@ -200,7 +200,7 @@ cdef class Packer(object): d = o L = len(d) if L > (2**32)-1: - raise ValueError("dict is too large") + raise PackValueError("dict is too large") ret = msgpack_pack_map(&self.pk, L) if ret == 0: for k, v in d.iteritems(): @@ -239,11 +239,11 @@ cdef class Packer(object): if ret != 0: break elif PyMemoryView_Check(o): if PyObject_GetBuffer(o, &view, PyBUF_SIMPLE) != 0: - raise ValueError("could not get buffer for memoryview") + raise PackValueError("could not get buffer for memoryview") L = view.len if L > (2**32)-1: PyBuffer_Release(&view); - raise ValueError("memoryview is too large") + raise PackValueError("memoryview is too large") ret = msgpack_pack_bin(&self.pk, L) if ret == 0: ret = msgpack_pack_raw_body(&self.pk, view.buf, L) @@ -274,7 +274,7 @@ cdef class Packer(object): def pack_array_header(self, size_t size): if size > (2**32-1): - raise ValueError + raise PackValueError cdef int ret = msgpack_pack_array(&self.pk, size) if ret == -1: raise MemoryError @@ -287,7 +287,7 @@ cdef class Packer(object): def pack_map_header(self, size_t size): if size > (2**32-1): - raise ValueError + raise PackValueError cdef int ret = msgpack_pack_map(&self.pk, size) if ret == -1: raise MemoryError From 1183eff688189b0e94ea9e15c5ae13c2f757d745 Mon Sep 17 00:00:00 2001 From: palaviv Date: Fri, 12 Feb 2016 15:37:39 +0200 Subject: [PATCH 047/349] reraising ValueError from unpack.h as UnpackValueError --- msgpack/_unpacker.pyx | 37 ++++++++++++++++++++----------------- 1 file changed, 20 insertions(+), 17 deletions(-) diff --git a/msgpack/_unpacker.pyx b/msgpack/_unpacker.pyx index 1aefc643..90ebf7d5 100644 --- a/msgpack/_unpacker.pyx +++ b/msgpack/_unpacker.pyx @@ -397,24 +397,27 @@ cdef class Unpacker(object): else: raise OutOfData("No more data to unpack.") - ret = execute(&self.ctx, self.buf, self.buf_tail, &self.buf_head) - if write_bytes is not None: - write_bytes(PyBytes_FromStringAndSize(self.buf + prev_head, self.buf_head - prev_head)) - - if ret == 1: - obj = unpack_data(&self.ctx) - unpack_init(&self.ctx) - return obj - elif ret == 0: - if self.file_like is not None: - self.read_from_file() - continue - if iter: - raise StopIteration("No more data to unpack.") + try: + ret = execute(&self.ctx, self.buf, self.buf_tail, &self.buf_head) + if write_bytes is not None: + write_bytes(PyBytes_FromStringAndSize(self.buf + prev_head, self.buf_head - prev_head)) + + if ret == 1: + obj = unpack_data(&self.ctx) + unpack_init(&self.ctx) + return obj + elif ret == 0: + if self.file_like is not None: + self.read_from_file() + continue + if iter: + raise StopIteration("No more data to unpack.") + else: + raise OutOfData("No more data to unpack.") else: - raise OutOfData("No more data to unpack.") - else: - raise ValueError("Unpack failed: error = %d" % (ret,)) + raise UnpackValueError("Unpack failed: error = %d" % (ret,)) + except ValueError as e: + raise UnpackValueError(e) def read_bytes(self, Py_ssize_t nbytes): """Read a specified number of raw bytes from the stream""" From e15085db0362899520f714e3959c37721c839cef Mon Sep 17 00:00:00 2001 From: palaviv Date: Fri, 12 Feb 2016 15:39:50 +0200 Subject: [PATCH 048/349] removed MsgpackBaseException --- msgpack/exceptions.py | 10 ++++------ test/test_limits.py | 18 +++++++++--------- 2 files changed, 13 insertions(+), 15 deletions(-) diff --git a/msgpack/exceptions.py b/msgpack/exceptions.py index e0d5b5f5..e9820798 100644 --- a/msgpack/exceptions.py +++ b/msgpack/exceptions.py @@ -1,8 +1,4 @@ -class MsgpackBaseException(Exception): - pass - - -class UnpackException(MsgpackBaseException): +class UnpackException(Exception): pass @@ -26,9 +22,11 @@ def __init__(self, unpacked, extra): def __str__(self): return "unpack(b) received extra data." -class PackException(MsgpackBaseException): + +class PackException(Exception): pass + class PackValueError(PackException, ValueError): pass diff --git a/test/test_limits.py b/test/test_limits.py index 34acf7c8..e9bc9dfe 100644 --- a/test/test_limits.py +++ b/test/test_limits.py @@ -4,11 +4,11 @@ import pytest from msgpack import packb, unpackb, Packer, Unpacker, ExtType, PackException, PackOverflowError, PackValueError -from msgpack import UnpackValueError, UnpackException, MsgpackBaseException +from msgpack import UnpackValueError, UnpackException @pytest.mark.parametrize("expected_exception", [OverflowError, ValueError, PackOverflowError, - PackException, PackValueError, MsgpackBaseException]) + PackException, PackValueError]) def test_integer(expected_exception): x = -(2 ** 63) assert unpackb(packb(x)) == x @@ -21,7 +21,7 @@ def test_integer(expected_exception): packb(x+1) -@pytest.mark.parametrize("expected_exception", [ValueError, PackException, PackValueError, MsgpackBaseException]) +@pytest.mark.parametrize("expected_exception", [ValueError, PackException, PackValueError]) def test_array_header(expected_exception): packer = Packer() packer.pack_array_header(2**32-1) @@ -29,7 +29,7 @@ def test_array_header(expected_exception): packer.pack_array_header(2**32) -@pytest.mark.parametrize("expected_exception", [ValueError, PackException, PackValueError, MsgpackBaseException]) +@pytest.mark.parametrize("expected_exception", [ValueError, PackException, PackValueError]) def test_map_header(expected_exception): packer = Packer() packer.pack_map_header(2**32-1) @@ -37,7 +37,7 @@ def test_map_header(expected_exception): packer.pack_array_header(2**32) -@pytest.mark.parametrize("expected_exception", [ValueError, UnpackValueError, UnpackException, MsgpackBaseException]) +@pytest.mark.parametrize("expected_exception", [ValueError, UnpackValueError, UnpackException]) def test_max_str_len(expected_exception): d = 'x' * 3 packed = packb(d) @@ -52,7 +52,7 @@ def test_max_str_len(expected_exception): unpacker.unpack() -@pytest.mark.parametrize("expected_exception", [ValueError, UnpackValueError, UnpackException, MsgpackBaseException]) +@pytest.mark.parametrize("expected_exception", [ValueError, UnpackValueError, UnpackException]) def test_max_bin_len(expected_exception): d = b'x' * 3 packed = packb(d, use_bin_type=True) @@ -67,7 +67,7 @@ def test_max_bin_len(expected_exception): unpacker.unpack() -@pytest.mark.parametrize("expected_exception", [ValueError, UnpackValueError, UnpackException, MsgpackBaseException]) +@pytest.mark.parametrize("expected_exception", [ValueError, UnpackValueError, UnpackException]) def test_max_array_len(expected_exception): d = [1,2,3] packed = packb(d) @@ -82,7 +82,7 @@ def test_max_array_len(expected_exception): unpacker.unpack() -@pytest.mark.parametrize("expected_exception", [ValueError, UnpackValueError, UnpackException, MsgpackBaseException]) +@pytest.mark.parametrize("expected_exception", [ValueError, UnpackValueError, UnpackException]) def test_max_map_len(expected_exception): d = {1: 2, 3: 4, 5: 6} packed = packb(d) @@ -97,7 +97,7 @@ def test_max_map_len(expected_exception): unpacker.unpack() -@pytest.mark.parametrize("expected_exception", [ValueError, UnpackValueError, UnpackException, MsgpackBaseException]) +@pytest.mark.parametrize("expected_exception", [ValueError, UnpackValueError, UnpackException]) def test_max_ext_len(expected_exception): d = ExtType(42, b"abc") packed = packb(d) From d90008d4f57ec83a15b84cf2db2edc36e9504ac1 Mon Sep 17 00:00:00 2001 From: INADA Naoki Date: Sun, 14 Feb 2016 11:46:28 +0900 Subject: [PATCH 049/349] ExtraData should be UnpackValueError --- msgpack/exceptions.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/msgpack/exceptions.py b/msgpack/exceptions.py index e9820798..bb122643 100644 --- a/msgpack/exceptions.py +++ b/msgpack/exceptions.py @@ -14,7 +14,7 @@ class UnpackValueError(UnpackException, ValueError): pass -class ExtraData(ValueError): +class ExtraData(UnpackValueError): def __init__(self, unpacked, extra): self.unpacked = unpacked self.extra = extra From 3dad39811d93a58c5d3de874193290b935da1446 Mon Sep 17 00:00:00 2001 From: INADA Naoki Date: Sun, 14 Feb 2016 11:54:01 +0900 Subject: [PATCH 050/349] Deprecate PackExceptions --- msgpack/_unpacker.pyx | 10 +++++----- msgpack/exceptions.py | 12 +++++++++--- 2 files changed, 14 insertions(+), 8 deletions(-) diff --git a/msgpack/_unpacker.pyx b/msgpack/_unpacker.pyx index 90ebf7d5..04435055 100644 --- a/msgpack/_unpacker.pyx +++ b/msgpack/_unpacker.pyx @@ -11,11 +11,11 @@ from libc.string cimport * from libc.limits cimport * from msgpack.exceptions import ( - BufferFull, - OutOfData, - UnpackValueError, - ExtraData, - ) + BufferFull, + OutOfData, + UnpackValueError, + ExtraData, +) from msgpack import ExtType diff --git a/msgpack/exceptions.py b/msgpack/exceptions.py index bb122643..73010b7d 100644 --- a/msgpack/exceptions.py +++ b/msgpack/exceptions.py @@ -24,12 +24,18 @@ def __str__(self): class PackException(Exception): - pass + """Deprecated. Use Exception instead to catch all exception during packing.""" class PackValueError(PackException, ValueError): - pass + """PackValueError is raised when type of input data is supported but it's value is unsupported. + + Deprecated. Use ValueError instead. + """ class PackOverflowError(PackValueError, OverflowError): - pass + """PackOverflowError is raised when integer value is out of range of msgpack support [-2**31, 2**32). + + Deprecated. Use ValueError instead. + """ From 6e364762394fdb06d0453411a5f020ee594c06b0 Mon Sep 17 00:00:00 2001 From: INADA Naoki Date: Sun, 14 Feb 2016 11:58:56 +0900 Subject: [PATCH 051/349] remove too much parameterized tests --- test/test_limits.py | 49 +++++++++++++++++++-------------------------- 1 file changed, 21 insertions(+), 28 deletions(-) diff --git a/test/test_limits.py b/test/test_limits.py index e9bc9dfe..197ef461 100644 --- a/test/test_limits.py +++ b/test/test_limits.py @@ -3,42 +3,39 @@ from __future__ import absolute_import, division, print_function, unicode_literals import pytest -from msgpack import packb, unpackb, Packer, Unpacker, ExtType, PackException, PackOverflowError, PackValueError -from msgpack import UnpackValueError, UnpackException +from msgpack import ( + packb, unpackb, Packer, Unpacker, ExtType, + PackOverflowError, PackValueError, UnpackValueError, +) -@pytest.mark.parametrize("expected_exception", [OverflowError, ValueError, PackOverflowError, - PackException, PackValueError]) -def test_integer(expected_exception): +def test_integer(): x = -(2 ** 63) assert unpackb(packb(x)) == x - with pytest.raises(expected_exception): + with pytest.raises(PackOverflowError): packb(x-1) x = 2 ** 64 - 1 assert unpackb(packb(x)) == x - with pytest.raises(expected_exception): + with pytest.raises(PackOverflowError): packb(x+1) -@pytest.mark.parametrize("expected_exception", [ValueError, PackException, PackValueError]) -def test_array_header(expected_exception): +def test_array_header(): packer = Packer() packer.pack_array_header(2**32-1) - with pytest.raises(expected_exception): + with pytest.raises(PackValueError): packer.pack_array_header(2**32) -@pytest.mark.parametrize("expected_exception", [ValueError, PackException, PackValueError]) -def test_map_header(expected_exception): +def test_map_header(): packer = Packer() packer.pack_map_header(2**32-1) - with pytest.raises(expected_exception): + with pytest.raises(PackValueError): packer.pack_array_header(2**32) -@pytest.mark.parametrize("expected_exception", [ValueError, UnpackValueError, UnpackException]) -def test_max_str_len(expected_exception): +def test_max_str_len(): d = 'x' * 3 packed = packb(d) @@ -47,13 +44,12 @@ def test_max_str_len(expected_exception): assert unpacker.unpack() == d unpacker = Unpacker(max_str_len=2, encoding='utf-8') - with pytest.raises(expected_exception): + with pytest.raises(UnpackValueError): unpacker.feed(packed) unpacker.unpack() -@pytest.mark.parametrize("expected_exception", [ValueError, UnpackValueError, UnpackException]) -def test_max_bin_len(expected_exception): +def test_max_bin_len(): d = b'x' * 3 packed = packb(d, use_bin_type=True) @@ -62,13 +58,12 @@ def test_max_bin_len(expected_exception): assert unpacker.unpack() == d unpacker = Unpacker(max_bin_len=2) - with pytest.raises(expected_exception): + with pytest.raises(UnpackValueError): unpacker.feed(packed) unpacker.unpack() -@pytest.mark.parametrize("expected_exception", [ValueError, UnpackValueError, UnpackException]) -def test_max_array_len(expected_exception): +def test_max_array_len(): d = [1,2,3] packed = packb(d) @@ -77,13 +72,12 @@ def test_max_array_len(expected_exception): assert unpacker.unpack() == d unpacker = Unpacker(max_array_len=2) - with pytest.raises(expected_exception): + with pytest.raises(UnpackValueError): unpacker.feed(packed) unpacker.unpack() -@pytest.mark.parametrize("expected_exception", [ValueError, UnpackValueError, UnpackException]) -def test_max_map_len(expected_exception): +def test_max_map_len(): d = {1: 2, 3: 4, 5: 6} packed = packb(d) @@ -92,13 +86,12 @@ def test_max_map_len(expected_exception): assert unpacker.unpack() == d unpacker = Unpacker(max_map_len=2) - with pytest.raises(expected_exception): + with pytest.raises(UnpackValueError): unpacker.feed(packed) unpacker.unpack() -@pytest.mark.parametrize("expected_exception", [ValueError, UnpackValueError, UnpackException]) -def test_max_ext_len(expected_exception): +def test_max_ext_len(): d = ExtType(42, b"abc") packed = packb(d) @@ -107,7 +100,7 @@ def test_max_ext_len(expected_exception): assert unpacker.unpack() == d unpacker = Unpacker(max_ext_len=2) - with pytest.raises(expected_exception): + with pytest.raises(UnpackValueError): unpacker.feed(packed) unpacker.unpack() From b2a8ce6cbdbef80d1a89d02fa483f56862cf1efa Mon Sep 17 00:00:00 2001 From: INADA Naoki Date: Sun, 14 Feb 2016 14:32:11 +0900 Subject: [PATCH 052/349] Deprecate more useless exceptions --- msgpack/exceptions.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/msgpack/exceptions.py b/msgpack/exceptions.py index 73010b7d..97668814 100644 --- a/msgpack/exceptions.py +++ b/msgpack/exceptions.py @@ -1,5 +1,5 @@ class UnpackException(Exception): - pass + """Deprecated. Use Exception instead to catch all exception during unpacking.""" class BufferFull(UnpackException): @@ -11,7 +11,7 @@ class OutOfData(UnpackException): class UnpackValueError(UnpackException, ValueError): - pass + """Deprecated. Use ValueError instead.""" class ExtraData(UnpackValueError): From 2192310bc4a7af32a628191ededd1feeec624845 Mon Sep 17 00:00:00 2001 From: INADA Naoki Date: Sat, 16 Apr 2016 02:03:18 +0900 Subject: [PATCH 053/349] Use manylinux1 wheel for Cython (#179) * Use manylinux1 wheel for Cython * Use newer pip --- .travis.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.travis.yml b/.travis.yml index eced3536..e7e9b638 100644 --- a/.travis.yml +++ b/.travis.yml @@ -16,8 +16,8 @@ env: - TOXENV=pypy-pure,pypy3-pure install: - - pip install tox - - pip install cython --install-option=--cython-with-refnanny --install-option=--no-cython-compile + - pip install -U pip + - pip install tox cython - cython --cplus msgpack/_packer.pyx msgpack/_unpacker.pyx script: tox From 40ee322440a018c9e09634aa2c190d1747d7f0bd Mon Sep 17 00:00:00 2001 From: Timothy Cyrus Date: Fri, 29 Apr 2016 11:18:27 -0400 Subject: [PATCH 054/349] Update README.rst (#184) Change PNG Badges to SVG --- README.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.rst b/README.rst index d32ec1d4..8136ccbb 100644 --- a/README.rst +++ b/README.rst @@ -6,7 +6,7 @@ MessagePack for Python :version: 0.4.6 :date: 2015-03-13 -.. image:: https://secure.travis-ci.org/msgpack/msgpack-python.png +.. image:: https://secure.travis-ci.org/msgpack/msgpack-python.svg :target: https://travis-ci.org/#!/msgpack/msgpack-python From 6b113a6fb37ffb969e92429b06aab9ea9b8eeb4a Mon Sep 17 00:00:00 2001 From: INADA Naoki Date: Sat, 30 Apr 2016 17:07:14 +0900 Subject: [PATCH 055/349] Use Python's memory API (#185) --- msgpack/_packer.pyx | 7 +++---- msgpack/_unpacker.pyx | 23 ++++++++++++++++++----- 2 files changed, 21 insertions(+), 9 deletions(-) diff --git a/msgpack/_packer.pyx b/msgpack/_packer.pyx index e9238950..d491cc10 100644 --- a/msgpack/_packer.pyx +++ b/msgpack/_packer.pyx @@ -88,7 +88,7 @@ cdef class Packer(object): def __cinit__(self): cdef int buf_size = 1024*1024 - self.pk.buf = malloc(buf_size); + self.pk.buf = PyMem_Malloc(buf_size) if self.pk.buf == NULL: raise MemoryError("Unable to allocate internal buffer.") self.pk.buf_size = buf_size @@ -97,8 +97,6 @@ cdef class Packer(object): def __init__(self, default=None, encoding='utf-8', unicode_errors='strict', use_single_float=False, bint autoreset=1, bint use_bin_type=0, bint strict_types=0): - """ - """ self.use_float = use_single_float self.strict_types = strict_types self.autoreset = autoreset @@ -123,7 +121,8 @@ cdef class Packer(object): self.unicode_errors = PyBytes_AsString(self._berrors) def __dealloc__(self): - free(self.pk.buf); + PyMem_Free(self.pk.buf) + self.pk.buf = NULL cdef int _pack(self, object o, int nest_limit=DEFAULT_RECURSE_LIMIT) except -1: cdef long long llval diff --git a/msgpack/_unpacker.pyx b/msgpack/_unpacker.pyx index 04435055..23f6478f 100644 --- a/msgpack/_unpacker.pyx +++ b/msgpack/_unpacker.pyx @@ -1,7 +1,20 @@ # coding: utf-8 #cython: embedsignature=True -from cpython cimport * +from cpython.bytes cimport ( + PyBytes_AsString, + PyBytes_FromStringAndSize, + PyBytes_Size, +) +from cpython.buffer cimport ( + Py_buffer, + PyBuffer_Release, + PyObject_GetBuffer, + PyBUF_SIMPLE, +) +from cpython.mem cimport PyMem_Malloc, PyMem_Free +from cpython.object cimport PyCallable_Check + cdef extern from "Python.h": ctypedef struct PyObject cdef int PyObject_AsReadBuffer(object o, const void** buff, Py_ssize_t* buf_len) except -1 @@ -256,7 +269,7 @@ cdef class Unpacker(object): self.buf = NULL def __dealloc__(self): - free(self.buf) + PyMem_Free(self.buf) self.buf = NULL def __init__(self, file_like=None, Py_ssize_t read_size=0, bint use_list=1, @@ -289,7 +302,7 @@ cdef class Unpacker(object): read_size = min(max_buffer_size, 1024**2) self.max_buffer_size = max_buffer_size self.read_size = read_size - self.buf = malloc(read_size) + self.buf = PyMem_Malloc(read_size) if self.buf == NULL: raise MemoryError("Unable to allocate internal buffer.") self.buf_size = read_size @@ -352,13 +365,13 @@ cdef class Unpacker(object): if new_size > self.max_buffer_size: raise BufferFull new_size = min(new_size*2, self.max_buffer_size) - new_buf = malloc(new_size) + new_buf = PyMem_Malloc(new_size) if new_buf == NULL: # self.buf still holds old buffer and will be freed during # obj destruction raise MemoryError("Unable to enlarge internal buffer.") memcpy(new_buf, buf + head, tail - head) - free(buf) + PyMem_Free(buf) buf = new_buf buf_size = new_size From ceb9635a3f4d7f3dd4874b98773ca6f7db9296a7 Mon Sep 17 00:00:00 2001 From: INADA Naoki Date: Tue, 3 May 2016 11:58:28 +0900 Subject: [PATCH 056/349] Use AppVeyor to build windows wheel (#188) * Add AppVeyor support to build windows wheel * Fix test_limits on 32bit environments * Ignore Python35-x64 test fail for now Should be fixed in next version. --- appveyor.yml | 57 +++++++++++++++++++++++++++++++++++++++++++++ build.cmd | 21 +++++++++++++++++ msgpack/_packer.pyx | 4 ++-- 3 files changed, 80 insertions(+), 2 deletions(-) create mode 100644 appveyor.yml create mode 100644 build.cmd diff --git a/appveyor.yml b/appveyor.yml new file mode 100644 index 00000000..02b44611 --- /dev/null +++ b/appveyor.yml @@ -0,0 +1,57 @@ +environment: + + matrix: + + # For Python versions available on Appveyor, see + # http://www.appveyor.com/docs/installed-software#python + # The list here is complete (excluding Python 2.6, which + # isn't covered by this document) at the time of writing. + + - PYTHON: "C:\\Python27" + - PYTHON: "C:\\Python34" + - PYTHON: "C:\\Python35" + - PYTHON: "C:\\Python27-x64" + - PYTHON: "C:\\Python34-x64" + DISTUTILS_USE_SDK: "1" + + # Python35-x64 test fails with MemoryError + # TODO: investigate it + #- PYTHON: "C:\\Python35-x64" + +install: + # We need wheel installed to build wheels + - "%PYTHON%\\python.exe -m pip install -U pip wheel pytest cython" + +build: off + +test_script: + # Put your test command here. + # If you don't need to build C extensions on 64-bit Python 3.3 or 3.4, + # you can remove "build.cmd" from the front of the command, as it's + # only needed to support those cases. + # Note that you must use the environment variable %PYTHON% to refer to + # the interpreter you're using - Appveyor does not do anything special + # to put the Python evrsion you want to use on PATH. + - "build.cmd %PYTHON%\\python.exe setup.py build_ext -i" + - "build.cmd %PYTHON%\\python.exe setup.py install" + - "%PYTHON%\\python.exe -c \"import sys; print(hex(sys.maxsize))\"" + - "%PYTHON%\\python.exe -c \"from msgpack import _packer, _unpacker\"" + - "%PYTHON%\\Scripts\\py.test test" + - "build.cmd %PYTHON%\\python.exe setup.py bdist_wheel" + +after_test: + # This step builds your wheels. + # Again, you only need build.cmd if you're building C extensions for + # 64-bit Python 3.3/3.4. And you need to use %PYTHON% to get the correct + # interpreter + +artifacts: + # bdist_wheel puts your built wheel in the dist directory + - path: dist\* + +#on_success: +# You can use this step to upload your artifacts to a public website. +# See Appveyor's documentation for more details. Or you can simply +# access your wheels from the Appveyor "artifacts" tab for your build. + +# vim: set shiftwidth=2 diff --git a/build.cmd b/build.cmd new file mode 100644 index 00000000..243dc9a1 --- /dev/null +++ b/build.cmd @@ -0,0 +1,21 @@ +@echo off +:: To build extensions for 64 bit Python 3, we need to configure environment +:: variables to use the MSVC 2010 C++ compilers from GRMSDKX_EN_DVD.iso of: +:: MS Windows SDK for Windows 7 and .NET Framework 4 +:: +:: More details at: +:: https://github.com/cython/cython/wiki/64BitCythonExtensionsOnWindows + +IF "%DISTUTILS_USE_SDK%"=="1" ( + ECHO Configuring environment to build with MSVC on a 64bit architecture + ECHO Using Windows SDK 7.1 + "C:\Program Files\Microsoft SDKs\Windows\v7.1\Setup\WindowsSdkVer.exe" -q -version:v7.1 + CALL "C:\Program Files\Microsoft SDKs\Windows\v7.1\Bin\SetEnv.cmd" /x64 /release + SET MSSdk=1 + REM Need the following to allow tox to see the SDK compiler + SET TOX_TESTENV_PASSENV=DISTUTILS_USE_SDK MSSdk INCLUDE LIB +) ELSE ( + ECHO Using default MSVC build environment +) + +CALL %* diff --git a/msgpack/_packer.pyx b/msgpack/_packer.pyx index d491cc10..b1a912b9 100644 --- a/msgpack/_packer.pyx +++ b/msgpack/_packer.pyx @@ -271,7 +271,7 @@ cdef class Packer(object): msgpack_pack_ext(&self.pk, typecode, len(data)) msgpack_pack_raw_body(&self.pk, data, len(data)) - def pack_array_header(self, size_t size): + def pack_array_header(self, long long size): if size > (2**32-1): raise PackValueError cdef int ret = msgpack_pack_array(&self.pk, size) @@ -284,7 +284,7 @@ cdef class Packer(object): self.pk.length = 0 return buf - def pack_map_header(self, size_t size): + def pack_map_header(self, long long size): if size > (2**32-1): raise PackValueError cdef int ret = msgpack_pack_map(&self.pk, size) From 0ec2e3534f9b7751be484bd2f1344e24c49bb24f Mon Sep 17 00:00:00 2001 From: folz Date: Thu, 28 Apr 2016 15:08:28 +0200 Subject: [PATCH 057/349] fix problems associated with packing memoryviews fix wrong length when packing multibyte memoryviews in fallback add tests for memoryviews of different types and sizes and check contents of packed data --- msgpack/_packer.pyx | 23 +++---- msgpack/fallback.py | 26 +++++++- test/test_memoryview.py | 133 ++++++++++++++++++++++++++++++++++++++-- 3 files changed, 164 insertions(+), 18 deletions(-) diff --git a/msgpack/_packer.pyx b/msgpack/_packer.pyx index b1a912b9..e07b1947 100644 --- a/msgpack/_packer.pyx +++ b/msgpack/_packer.pyx @@ -39,6 +39,7 @@ cdef extern from "pack.h": int msgpack_pack_ext(msgpack_packer* pk, char typecode, size_t l) cdef int DEFAULT_RECURSE_LIMIT=511 +cdef size_t ITEM_LIMIT = (2**32)-1 cdef class Packer(object): @@ -178,7 +179,7 @@ cdef class Packer(object): ret = msgpack_pack_double(&self.pk, dval) elif PyBytes_CheckExact(o) if strict_types else PyBytes_Check(o): L = len(o) - if L > (2**32)-1: + if L > ITEM_LIMIT: raise PackValueError("bytes is too large") rawval = o ret = msgpack_pack_bin(&self.pk, L) @@ -189,7 +190,7 @@ cdef class Packer(object): raise TypeError("Can't encode unicode string: no encoding is specified") o = PyUnicode_AsEncodedString(o, self.encoding, self.unicode_errors) L = len(o) - if L > (2**32)-1: + if L > ITEM_LIMIT: raise PackValueError("unicode string is too large") rawval = o ret = msgpack_pack_raw(&self.pk, L) @@ -198,7 +199,7 @@ cdef class Packer(object): elif PyDict_CheckExact(o): d = o L = len(d) - if L > (2**32)-1: + if L > ITEM_LIMIT: raise PackValueError("dict is too large") ret = msgpack_pack_map(&self.pk, L) if ret == 0: @@ -209,7 +210,7 @@ cdef class Packer(object): if ret != 0: break elif not strict_types and PyDict_Check(o): L = len(o) - if L > (2**32)-1: + if L > ITEM_LIMIT: raise PackValueError("dict is too large") ret = msgpack_pack_map(&self.pk, L) if ret == 0: @@ -223,13 +224,13 @@ cdef class Packer(object): longval = o.code rawval = o.data L = len(o.data) - if L > (2**32)-1: + if L > ITEM_LIMIT: raise PackValueError("EXT data is too large") ret = msgpack_pack_ext(&self.pk, longval, L) ret = msgpack_pack_raw_body(&self.pk, rawval, L) elif PyList_CheckExact(o) if strict_types else (PyTuple_Check(o) or PyList_Check(o)): L = len(o) - if L > (2**32)-1: + if L > ITEM_LIMIT: raise PackValueError("list is too large") ret = msgpack_pack_array(&self.pk, L) if ret == 0: @@ -240,7 +241,7 @@ cdef class Packer(object): if PyObject_GetBuffer(o, &view, PyBUF_SIMPLE) != 0: raise PackValueError("could not get buffer for memoryview") L = view.len - if L > (2**32)-1: + if L > ITEM_LIMIT: PyBuffer_Release(&view); raise PackValueError("memoryview is too large") ret = msgpack_pack_bin(&self.pk, L) @@ -271,8 +272,8 @@ cdef class Packer(object): msgpack_pack_ext(&self.pk, typecode, len(data)) msgpack_pack_raw_body(&self.pk, data, len(data)) - def pack_array_header(self, long long size): - if size > (2**32-1): + def pack_array_header(self, size_t size): + if size > ITEM_LIMIT: raise PackValueError cdef int ret = msgpack_pack_array(&self.pk, size) if ret == -1: @@ -284,8 +285,8 @@ cdef class Packer(object): self.pk.length = 0 return buf - def pack_map_header(self, long long size): - if size > (2**32-1): + def pack_map_header(self, size_t size): + if size > ITEM_LIMIT: raise PackValueError cdef int ret = msgpack_pack_map(&self.pk, size) if ret == -1: diff --git a/msgpack/fallback.py b/msgpack/fallback.py index d8c5d738..db47d5c6 100644 --- a/msgpack/fallback.py +++ b/msgpack/fallback.py @@ -685,7 +685,7 @@ def _pack(self, obj, nest_limit=DEFAULT_RECURSE_LIMIT, default_used = True continue raise PackOverflowError("Integer value out of range") - if self._use_bin_type and check(obj, (bytes, memoryview)): + if self._use_bin_type and check(obj, bytes): n = len(obj) if n <= 0xff: self._buffer.write(struct.pack('>BB', 0xc4, n)) @@ -696,7 +696,7 @@ def _pack(self, obj, nest_limit=DEFAULT_RECURSE_LIMIT, else: raise PackValueError("Bytes is too large") return self._buffer.write(obj) - if check(obj, (Unicode, bytes, memoryview)): + if check(obj, (Unicode, bytes)): if check(obj, Unicode): if self._encoding is None: raise TypeError( @@ -715,6 +715,28 @@ def _pack(self, obj, nest_limit=DEFAULT_RECURSE_LIMIT, else: raise PackValueError("String is too large") return self._buffer.write(obj) + if check(obj, memoryview): + n = len(obj) * obj.itemsize + if self._use_bin_type: + if n <= 0xff: + self._buffer.write(struct.pack('>BB', 0xc4, n)) + elif n <= 0xffff: + self._buffer.write(struct.pack(">BH", 0xc5, n)) + elif n <= 0xffffffff: + self._buffer.write(struct.pack(">BI", 0xc6, n)) + else: + raise PackValueError("memoryview is too large") + return self._buffer.write(obj) + else: + if n <= 0x1f: + self._buffer.write(struct.pack('B', 0xa0 + n)) + elif n <= 0xffff: + self._buffer.write(struct.pack(">BH", 0xda, n)) + elif n <= 0xffffffff: + self._buffer.write(struct.pack(">BI", 0xdb, n)) + else: + raise PackValueError("memoryview is too large") + return self._buffer.write(obj) if check(obj, float): if self._use_float: return self._buffer.write(struct.pack(">Bf", 0xca, obj)) diff --git a/test/test_memoryview.py b/test/test_memoryview.py index aed50696..f555c5b0 100644 --- a/test/test_memoryview.py +++ b/test/test_memoryview.py @@ -2,11 +2,134 @@ # coding: utf-8 +from array import array from msgpack import packb, unpackb +import sys -def test_pack_memoryview(): - data = bytearray(range(256)) - view = memoryview(data) - unpacked = unpackb(packb(view)) - assert data == unpacked +# For Python < 3: +# - array type only supports old buffer interface +# - array.frombytes is not available, must use deprecated array.fromstring +if sys.version_info[0] < 3: + def __memoryview(obj): + return memoryview(buffer(obj)) + + def __make_array(f, data): + a = array(f) + a.fromstring(data) + return a + + def __get_data(a): + return a.tostring() +else: + __memoryview = memoryview + + def __make_array(f, data): + a = array(f) + a.frombytes(data) + return a + + def __get_data(a): + return a.tobytes() + + +def __run_test(format, nbytes, expected_header, expected_prefix, use_bin_type): + # create a new array + original_array = array(format) + original_array.fromlist([255] * (nbytes // original_array.itemsize)) + original_data = __get_data(original_array) + view = __memoryview(original_array) + + # pack, unpack, and reconstruct array + packed = packb(view, use_bin_type=use_bin_type) + unpacked = unpackb(packed) + reconstructed_array = __make_array(format, unpacked) + + # check that we got the right amount of data + assert len(original_data) == nbytes + # check packed header + assert packed[:1] == expected_header + # check packed length prefix, if any + assert packed[1:1+len(expected_prefix)] == expected_prefix + # check packed data + assert packed[1+len(expected_prefix):] == original_data + # check array unpacked correctly + assert original_array == reconstructed_array + + +# ----------- +# test fixstr +# ----------- + + +def test_memoryview_byte_fixstr(): + __run_test('B', 31, b'\xbf', b'', False) + + +def test_memoryview_float_fixstr(): + __run_test('f', 28, b'\xbc', b'', False) + + +# ---------- +# test str16 +# ---------- + + +def test_memoryview_byte_str16(): + __run_test('B', 2**8, b'\xda', b'\x01\x00', False) + + +def test_memoryview_float_str16(): + __run_test('f', 2**8, b'\xda', b'\x01\x00', False) + + +# ---------- +# test str32 +# ---------- + + +def test_memoryview_byte_str32(): + __run_test('B', 2**16, b'\xdb', b'\x00\x01\x00\x00', False) + + +def test_memoryview_float_str32(): + __run_test('f', 2**16, b'\xdb', b'\x00\x01\x00\x00', False) + + +# --------- +# test bin8 +# --------- + + +def test_memoryview_byte_bin8(): + __run_test('B', 1, b'\xc4', b'\x01', True) + + +def test_memoryview_float_bin8(): + __run_test('f', 4, b'\xc4', b'\x04', True) + + +# ---------- +# test bin16 +# ---------- + + +def test_memoryview_byte_bin16(): + __run_test('B', 2**8, b'\xc5', b'\x01\x00', True) + + +def test_memoryview_float_bin16(): + __run_test('f', 2**8, b'\xc5', b'\x01\x00', True) + + +# ---------- +# test bin32 +# ---------- + + +def test_memoryview_byte_bin32(): + __run_test('B', 2**16, b'\xc6', b'\x00\x01\x00\x00', True) + + +def test_memoryview_float_bin32(): + __run_test('f', 2**16, b'\xc6', b'\x00\x01\x00\x00', True) From 0b55989f0b045f1a77d4230bea3b6da70eb3d840 Mon Sep 17 00:00:00 2001 From: folz Date: Wed, 4 May 2016 10:04:09 +0200 Subject: [PATCH 058/349] more descriptive test names --- test/test_memoryview.py | 72 ++++++++++++----------------------------- 1 file changed, 21 insertions(+), 51 deletions(-) diff --git a/test/test_memoryview.py b/test/test_memoryview.py index f555c5b0..27688673 100644 --- a/test/test_memoryview.py +++ b/test/test_memoryview.py @@ -11,25 +11,25 @@ # - array type only supports old buffer interface # - array.frombytes is not available, must use deprecated array.fromstring if sys.version_info[0] < 3: - def __memoryview(obj): + def make_memoryview(obj): return memoryview(buffer(obj)) - def __make_array(f, data): + def make_array(f, data): a = array(f) a.fromstring(data) return a - def __get_data(a): + def get_data(a): return a.tostring() else: - __memoryview = memoryview + make_memoryview = memoryview - def __make_array(f, data): + def make_array(f, data): a = array(f) a.frombytes(data) return a - def __get_data(a): + def get_data(a): return a.tobytes() @@ -37,13 +37,13 @@ def __run_test(format, nbytes, expected_header, expected_prefix, use_bin_type): # create a new array original_array = array(format) original_array.fromlist([255] * (nbytes // original_array.itemsize)) - original_data = __get_data(original_array) - view = __memoryview(original_array) + original_data = get_data(original_array) + view = make_memoryview(original_array) # pack, unpack, and reconstruct array packed = packb(view, use_bin_type=use_bin_type) unpacked = unpackb(packed) - reconstructed_array = __make_array(format, unpacked) + reconstructed_array = make_array(format, unpacked) # check that we got the right amount of data assert len(original_data) == nbytes @@ -57,79 +57,49 @@ def __run_test(format, nbytes, expected_header, expected_prefix, use_bin_type): assert original_array == reconstructed_array -# ----------- -# test fixstr -# ----------- - - -def test_memoryview_byte_fixstr(): +def test_fixstr_from_byte(): __run_test('B', 31, b'\xbf', b'', False) -def test_memoryview_float_fixstr(): +def test_fixstr_from_float(): __run_test('f', 28, b'\xbc', b'', False) -# ---------- -# test str16 -# ---------- - - -def test_memoryview_byte_str16(): +def test_str16_from_byte(): __run_test('B', 2**8, b'\xda', b'\x01\x00', False) -def test_memoryview_float_str16(): +def test_str16_from_float(): __run_test('f', 2**8, b'\xda', b'\x01\x00', False) -# ---------- -# test str32 -# ---------- - - -def test_memoryview_byte_str32(): +def test_str32_from_byte(): __run_test('B', 2**16, b'\xdb', b'\x00\x01\x00\x00', False) -def test_memoryview_float_str32(): +def test_str32_from_float(): __run_test('f', 2**16, b'\xdb', b'\x00\x01\x00\x00', False) -# --------- -# test bin8 -# --------- - - -def test_memoryview_byte_bin8(): +def test_bin8_from_byte(): __run_test('B', 1, b'\xc4', b'\x01', True) -def test_memoryview_float_bin8(): +def test_bin8_from_float(): __run_test('f', 4, b'\xc4', b'\x04', True) -# ---------- -# test bin16 -# ---------- - - -def test_memoryview_byte_bin16(): +def test_bin16_from_byte(): __run_test('B', 2**8, b'\xc5', b'\x01\x00', True) -def test_memoryview_float_bin16(): +def test_bin16_from_float(): __run_test('f', 2**8, b'\xc5', b'\x01\x00', True) -# ---------- -# test bin32 -# ---------- - - -def test_memoryview_byte_bin32(): +def test_bin32_from_byte(): __run_test('B', 2**16, b'\xc6', b'\x00\x01\x00\x00', True) -def test_memoryview_float_bin32(): +def test_bin32_from_float(): __run_test('f', 2**16, b'\xc6', b'\x00\x01\x00\x00', True) From 5860af953ae1c3f459ddc589cd815ec195db46a9 Mon Sep 17 00:00:00 2001 From: folz Date: Wed, 4 May 2016 11:01:27 +0200 Subject: [PATCH 059/349] refactor header packing for str and bin types --- msgpack/fallback.py | 83 +++++++++++++++++++++------------------------ 1 file changed, 39 insertions(+), 44 deletions(-) diff --git a/msgpack/fallback.py b/msgpack/fallback.py index db47d5c6..abed3d9e 100644 --- a/msgpack/fallback.py +++ b/msgpack/fallback.py @@ -685,58 +685,29 @@ def _pack(self, obj, nest_limit=DEFAULT_RECURSE_LIMIT, default_used = True continue raise PackOverflowError("Integer value out of range") - if self._use_bin_type and check(obj, bytes): + if check(obj, bytes): n = len(obj) - if n <= 0xff: - self._buffer.write(struct.pack('>BB', 0xc4, n)) - elif n <= 0xffff: - self._buffer.write(struct.pack(">BH", 0xc5, n)) - elif n <= 0xffffffff: - self._buffer.write(struct.pack(">BI", 0xc6, n)) - else: + if n >= 2**32: raise PackValueError("Bytes is too large") + self._fb_pack_bin_header(n) return self._buffer.write(obj) - if check(obj, (Unicode, bytes)): - if check(obj, Unicode): - if self._encoding is None: - raise TypeError( - "Can't encode unicode string: " - "no encoding is specified") - obj = obj.encode(self._encoding, self._unicode_errors) + if check(obj, Unicode): + if self._encoding is None: + raise TypeError( + "Can't encode unicode string: " + "no encoding is specified") + obj = obj.encode(self._encoding, self._unicode_errors) n = len(obj) - if n <= 0x1f: - self._buffer.write(struct.pack('B', 0xa0 + n)) - elif self._use_bin_type and n <= 0xff: - self._buffer.write(struct.pack('>BB', 0xd9, n)) - elif n <= 0xffff: - self._buffer.write(struct.pack(">BH", 0xda, n)) - elif n <= 0xffffffff: - self._buffer.write(struct.pack(">BI", 0xdb, n)) - else: + if n >= 2**32: raise PackValueError("String is too large") + self._fb_pack_raw_header(n) return self._buffer.write(obj) if check(obj, memoryview): n = len(obj) * obj.itemsize - if self._use_bin_type: - if n <= 0xff: - self._buffer.write(struct.pack('>BB', 0xc4, n)) - elif n <= 0xffff: - self._buffer.write(struct.pack(">BH", 0xc5, n)) - elif n <= 0xffffffff: - self._buffer.write(struct.pack(">BI", 0xc6, n)) - else: - raise PackValueError("memoryview is too large") - return self._buffer.write(obj) - else: - if n <= 0x1f: - self._buffer.write(struct.pack('B', 0xa0 + n)) - elif n <= 0xffff: - self._buffer.write(struct.pack(">BH", 0xda, n)) - elif n <= 0xffffffff: - self._buffer.write(struct.pack(">BI", 0xdb, n)) - else: - raise PackValueError("memoryview is too large") - return self._buffer.write(obj) + if n >= 2**32: + raise PackValueError("Memoryview is too large") + self._fb_pack_bin_header(n) + return self._buffer.write(obj) if check(obj, float): if self._use_float: return self._buffer.write(struct.pack(">Bf", 0xca, obj)) @@ -874,6 +845,30 @@ def _fb_pack_map_pairs(self, n, pairs, nest_limit=DEFAULT_RECURSE_LIMIT): self._pack(k, nest_limit - 1) self._pack(v, nest_limit - 1) + def _fb_pack_raw_header(self, n): + if n <= 0x1f: + self._buffer.write(struct.pack('B', 0xa0 + n)) + elif self._use_bin_type and n <= 0xff: + self._buffer.write(struct.pack('>BB', 0xd9, n)) + elif n <= 0xffff: + self._buffer.write(struct.pack(">BH", 0xda, n)) + elif n <= 0xffffffff: + self._buffer.write(struct.pack(">BI", 0xdb, n)) + else: + raise PackValueError('Raw is too large') + + def _fb_pack_bin_header(self, n): + if not self._use_bin_type: + return self._fb_pack_raw_header(n) + elif n <= 0xff: + return self._buffer.write(struct.pack('>BB', 0xc4, n)) + elif n <= 0xffff: + return self._buffer.write(struct.pack(">BH", 0xc5, n)) + elif n <= 0xffffffff: + return self._buffer.write(struct.pack(">BI", 0xc6, n)) + else: + raise PackValueError('Bin is too large') + def bytes(self): return self._buffer.getvalue() From a91d5c538ea5bbee0f00ff180a8e72d27df6cfc1 Mon Sep 17 00:00:00 2001 From: folz Date: Wed, 4 May 2016 12:03:37 +0200 Subject: [PATCH 060/349] add lower bound tests for memoryviews --- test/test_memoryview.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/test/test_memoryview.py b/test/test_memoryview.py index 27688673..7ce6bfc2 100644 --- a/test/test_memoryview.py +++ b/test/test_memoryview.py @@ -58,19 +58,23 @@ def __run_test(format, nbytes, expected_header, expected_prefix, use_bin_type): def test_fixstr_from_byte(): + __run_test('B', 1, b'\xa1', b'', False) __run_test('B', 31, b'\xbf', b'', False) def test_fixstr_from_float(): + __run_test('f', 4, b'\xa4', b'', False) __run_test('f', 28, b'\xbc', b'', False) def test_str16_from_byte(): __run_test('B', 2**8, b'\xda', b'\x01\x00', False) + __run_test('B', 2**16-1, b'\xda', b'\xff\xff', False) def test_str16_from_float(): __run_test('f', 2**8, b'\xda', b'\x01\x00', False) + __run_test('f', 2**16-4, b'\xda', b'\xff\xfc', False) def test_str32_from_byte(): @@ -83,18 +87,22 @@ def test_str32_from_float(): def test_bin8_from_byte(): __run_test('B', 1, b'\xc4', b'\x01', True) + __run_test('B', 2**8-1, b'\xc4', b'\xff', True) def test_bin8_from_float(): __run_test('f', 4, b'\xc4', b'\x04', True) + __run_test('f', 2**8-4, b'\xc4', b'\xfc', True) def test_bin16_from_byte(): __run_test('B', 2**8, b'\xc5', b'\x01\x00', True) + __run_test('B', 2**16-1, b'\xc5', b'\xff\xff', True) def test_bin16_from_float(): __run_test('f', 2**8, b'\xc5', b'\x01\x00', True) + __run_test('f', 2**16-4, b'\xc5', b'\xff\xfc', True) def test_bin32_from_byte(): From 53f47ef55d8d93e276ecf9041a9a8b43fc041aef Mon Sep 17 00:00:00 2001 From: INADA Naoki Date: Thu, 5 May 2016 00:49:48 +0900 Subject: [PATCH 061/349] Remove double underscore prefix --- test/test_memoryview.py | 43 ++++++++++++++++++++--------------------- 1 file changed, 21 insertions(+), 22 deletions(-) diff --git a/test/test_memoryview.py b/test/test_memoryview.py index 7ce6bfc2..f6d74edf 100644 --- a/test/test_memoryview.py +++ b/test/test_memoryview.py @@ -1,7 +1,6 @@ #!/usr/bin/env python # coding: utf-8 - from array import array from msgpack import packb, unpackb import sys @@ -33,7 +32,7 @@ def get_data(a): return a.tobytes() -def __run_test(format, nbytes, expected_header, expected_prefix, use_bin_type): +def _runtest(format, nbytes, expected_header, expected_prefix, use_bin_type): # create a new array original_array = array(format) original_array.fromlist([255] * (nbytes // original_array.itemsize)) @@ -58,56 +57,56 @@ def __run_test(format, nbytes, expected_header, expected_prefix, use_bin_type): def test_fixstr_from_byte(): - __run_test('B', 1, b'\xa1', b'', False) - __run_test('B', 31, b'\xbf', b'', False) + _runtest('B', 1, b'\xa1', b'', False) + _runtest('B', 31, b'\xbf', b'', False) def test_fixstr_from_float(): - __run_test('f', 4, b'\xa4', b'', False) - __run_test('f', 28, b'\xbc', b'', False) + _runtest('f', 4, b'\xa4', b'', False) + _runtest('f', 28, b'\xbc', b'', False) def test_str16_from_byte(): - __run_test('B', 2**8, b'\xda', b'\x01\x00', False) - __run_test('B', 2**16-1, b'\xda', b'\xff\xff', False) + _runtest('B', 2**8, b'\xda', b'\x01\x00', False) + _runtest('B', 2**16-1, b'\xda', b'\xff\xff', False) def test_str16_from_float(): - __run_test('f', 2**8, b'\xda', b'\x01\x00', False) - __run_test('f', 2**16-4, b'\xda', b'\xff\xfc', False) + _runtest('f', 2**8, b'\xda', b'\x01\x00', False) + _runtest('f', 2**16-4, b'\xda', b'\xff\xfc', False) def test_str32_from_byte(): - __run_test('B', 2**16, b'\xdb', b'\x00\x01\x00\x00', False) + _runtest('B', 2**16, b'\xdb', b'\x00\x01\x00\x00', False) def test_str32_from_float(): - __run_test('f', 2**16, b'\xdb', b'\x00\x01\x00\x00', False) + _runtest('f', 2**16, b'\xdb', b'\x00\x01\x00\x00', False) def test_bin8_from_byte(): - __run_test('B', 1, b'\xc4', b'\x01', True) - __run_test('B', 2**8-1, b'\xc4', b'\xff', True) + _runtest('B', 1, b'\xc4', b'\x01', True) + _runtest('B', 2**8-1, b'\xc4', b'\xff', True) def test_bin8_from_float(): - __run_test('f', 4, b'\xc4', b'\x04', True) - __run_test('f', 2**8-4, b'\xc4', b'\xfc', True) + _runtest('f', 4, b'\xc4', b'\x04', True) + _runtest('f', 2**8-4, b'\xc4', b'\xfc', True) def test_bin16_from_byte(): - __run_test('B', 2**8, b'\xc5', b'\x01\x00', True) - __run_test('B', 2**16-1, b'\xc5', b'\xff\xff', True) + _runtest('B', 2**8, b'\xc5', b'\x01\x00', True) + _runtest('B', 2**16-1, b'\xc5', b'\xff\xff', True) def test_bin16_from_float(): - __run_test('f', 2**8, b'\xc5', b'\x01\x00', True) - __run_test('f', 2**16-4, b'\xc5', b'\xff\xfc', True) + _runtest('f', 2**8, b'\xc5', b'\x01\x00', True) + _runtest('f', 2**16-4, b'\xc5', b'\xff\xfc', True) def test_bin32_from_byte(): - __run_test('B', 2**16, b'\xc6', b'\x00\x01\x00\x00', True) + _runtest('B', 2**16, b'\xc6', b'\x00\x01\x00\x00', True) def test_bin32_from_float(): - __run_test('f', 2**16, b'\xc6', b'\x00\x01\x00\x00', True) + _runtest('f', 2**16, b'\xc6', b'\x00\x01\x00\x00', True) From 63e23d37f9f3646f0fc3b327ddf1f3e1f200baf5 Mon Sep 17 00:00:00 2001 From: INADA Naoki Date: Thu, 5 May 2016 02:07:46 +0900 Subject: [PATCH 062/349] travis: Use docker to test 32bit environment (#189) * travis: testing matrix.include feature to use docker * Add test script for 32bit * Fix OverflowError in 32bit Environment --- .travis.yml | 38 +++++++++++++++++++++++++++----------- docker/runtests.sh | 14 ++++++++++++++ msgpack/_packer.pyx | 4 ++-- 3 files changed, 43 insertions(+), 13 deletions(-) create mode 100755 docker/runtests.sh diff --git a/.travis.yml b/.travis.yml index e7e9b638..b4396cb7 100644 --- a/.travis.yml +++ b/.travis.yml @@ -1,23 +1,39 @@ sudo: false +language: python +python: 3.5 cache: directories: - $HOME/.cache/pip -language: python -python: - - 3.5 branches: - only: - - master + only: + - master env: - - TOXENV=py27-c,py33-c,py34-c,py35-c - - TOXENV=py27-pure,py33-pure,py34-pure,py35-pure - - TOXENV=pypy-pure,pypy3-pure + - TOXENV=py27-c,py33-c,py34-c,py35-c + - TOXENV=py27-pure,py33-pure,py34-pure,py35-pure + - TOXENV=pypy-pure,pypy3-pure + +matrix: + include: + - sudo: required + services: + - docker + env: + - DOCKER_IMAGE=quay.io/pypa/manylinux1_i686 + install: + - pip install -U pip + - pip install cython + - cython --cplus msgpack/_packer.pyx msgpack/_unpacker.pyx + - docker pull $DOCKER_IMAGE + script: + - docker run --rm -v `pwd`:/io -w /io $DOCKER_IMAGE /io/docker/runtests.sh install: - - pip install -U pip - - pip install tox cython - - cython --cplus msgpack/_packer.pyx msgpack/_unpacker.pyx + - pip install -U pip + - pip install tox cython + - cython --cplus msgpack/_packer.pyx msgpack/_unpacker.pyx script: tox + +# vim: sw=2 ts=2 diff --git a/docker/runtests.sh b/docker/runtests.sh new file mode 100755 index 00000000..0d748023 --- /dev/null +++ b/docker/runtests.sh @@ -0,0 +1,14 @@ +#!/bin/bash +set -e -x + +for V in cp35-cp35m cp34-cp34m cp27-cp27m cp27-cp27mu; do + PYBIN=/opt/python/$V/bin + $PYBIN/python setup.py install + rm -rf build/ # Avoid lib build by narrow Python is used by wide python + $PYBIN/pip install pytest + pushd test # prevent importing msgpack package in current directory. + $PYBIN/python -c 'import sys; print(hex(sys.maxsize))' + $PYBIN/python -c 'from msgpack import _packer, _unpacker' + $PYBIN/py.test -v + popd +done diff --git a/msgpack/_packer.pyx b/msgpack/_packer.pyx index e07b1947..3981f200 100644 --- a/msgpack/_packer.pyx +++ b/msgpack/_packer.pyx @@ -272,7 +272,7 @@ cdef class Packer(object): msgpack_pack_ext(&self.pk, typecode, len(data)) msgpack_pack_raw_body(&self.pk, data, len(data)) - def pack_array_header(self, size_t size): + def pack_array_header(self, long long size): if size > ITEM_LIMIT: raise PackValueError cdef int ret = msgpack_pack_array(&self.pk, size) @@ -285,7 +285,7 @@ cdef class Packer(object): self.pk.length = 0 return buf - def pack_map_header(self, size_t size): + def pack_map_header(self, long long size): if size > ITEM_LIMIT: raise PackValueError cdef int ret = msgpack_pack_map(&self.pk, size) From 5c052264bc52c4bbdb5e5736f4f24834af46b8d6 Mon Sep 17 00:00:00 2001 From: INADA Naoki Date: Thu, 5 May 2016 02:31:03 +0900 Subject: [PATCH 063/349] Update ChangeLog --- ChangeLog.rst | 26 ++++++++++++++++++++++++++ 1 file changed, 26 insertions(+) diff --git a/ChangeLog.rst b/ChangeLog.rst index f20bb75f..396ccb7f 100644 --- a/ChangeLog.rst +++ b/ChangeLog.rst @@ -1,3 +1,29 @@ +0.5.0 +===== +:release date: TBD + +0.5 is important step toward 1.0. There are some deprecations. +Please read changes carefully. + +Changes +------- + +* Drop Python 2.6 and 3.2 support + +* Deprecate useless custom exceptions. Use ValueError instead of PackValueError, + Exception instead of PackException and UnpackException, etc... + See msgpack/exceptions.py + +* Add `strict_types` option to packer. It can be used to serialize subclass of + builtin types. For example, when packing object which type is subclass of dict, + `default()` is called. + +* Pure Python implementation supports packing memoryview object. + +Bugs fixed +---------- + + 0.4.7 ===== :release date: 2016-01-25 From a5c8bafad4ae29b0173f20dc1d7027219396f6aa Mon Sep 17 00:00:00 2001 From: INADA Naoki Date: Thu, 5 May 2016 02:46:10 +0900 Subject: [PATCH 064/349] Remove unused import (#190) --- msgpack/_packer.pyx | 3 --- 1 file changed, 3 deletions(-) diff --git a/msgpack/_packer.pyx b/msgpack/_packer.pyx index 3981f200..5c950cef 100644 --- a/msgpack/_packer.pyx +++ b/msgpack/_packer.pyx @@ -2,9 +2,6 @@ #cython: embedsignature=True from cpython cimport * -from libc.stdlib cimport * -from libc.string cimport * -from libc.limits cimport * from msgpack.exceptions import PackValueError, PackOverflowError from msgpack import ExtType From c6c4e59f4cb2ccb6de697de5d55a52e57f6a9a4e Mon Sep 17 00:00:00 2001 From: INADA Naoki Date: Sun, 8 May 2016 16:31:52 +0900 Subject: [PATCH 065/349] s/realloc/PyMem_Realloc/ (#193) --- msgpack/pack.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/msgpack/pack.h b/msgpack/pack.h index a75bdb04..d3aeff7c 100644 --- a/msgpack/pack.h +++ b/msgpack/pack.h @@ -47,7 +47,7 @@ static inline int msgpack_pack_write(msgpack_packer* pk, const char *data, size_ if (len + l > bs) { bs = (len + l) * 2; - buf = (char*)realloc(buf, bs); + buf = (char*)PyMem_Realloc(buf, bs); if (!buf) return -1; } memcpy(buf + len, data, l); From 318ddfc0527ef3ddf1ad3467ece10c26efa8d741 Mon Sep 17 00:00:00 2001 From: INADA Naoki Date: Fri, 13 May 2016 09:35:02 +0900 Subject: [PATCH 066/349] Remove wrong download_url from package metadata --- Makefile | 6 ++++++ setup.py | 7 +++---- 2 files changed, 9 insertions(+), 4 deletions(-) diff --git a/Makefile b/Makefile index 2e53d08f..01d80921 100644 --- a/Makefile +++ b/Makefile @@ -20,3 +20,9 @@ python3: cython test: py.test test + +.PHONY: clean +clean: + rm -rf build + rm msgpack/*.so + rm -rf msgpack/__pycache__ diff --git a/setup.py b/setup.py index 37729bd5..d62c8f2d 100755 --- a/setup.py +++ b/setup.py @@ -1,5 +1,6 @@ #!/usr/bin/env python # coding: utf-8 +import io import os import sys from glob import glob @@ -97,9 +98,8 @@ def __init__(self, *args, **kwargs): desc = 'MessagePack (de)serializer.' -f = open('README.rst') -long_desc = f.read() -f.close() +with io.open('README.rst', encoding='utf-8') as f: + long_desc = f.read() del f setup(name='msgpack-python', @@ -112,7 +112,6 @@ def __init__(self, *args, **kwargs): description=desc, long_description=long_desc, url='http://msgpack.org/', - download_url='http://pypi.python.org/pypi/msgpack/', classifiers=[ 'Programming Language :: Python :: 2', 'Programming Language :: Python :: 3', From f421f59a287ae26b7fa83a0cac18650d0dd09c03 Mon Sep 17 00:00:00 2001 From: INADA Naoki Date: Thu, 19 May 2016 22:35:42 +0900 Subject: [PATCH 067/349] fallback: Rewrite buffer from array of bytes to bytearray --- msgpack/fallback.py | 169 ++++++++++++++++++-------------------------- 1 file changed, 69 insertions(+), 100 deletions(-) diff --git a/msgpack/fallback.py b/msgpack/fallback.py index abed3d9e..181d7e2a 100644 --- a/msgpack/fallback.py +++ b/msgpack/fallback.py @@ -86,11 +86,8 @@ def unpack(stream, **kwargs): Raises `ExtraData` when `packed` contains extra bytes. See :class:`Unpacker` for options. """ - unpacker = Unpacker(stream, **kwargs) - ret = unpacker._fb_unpack() - if unpacker._fb_got_extradata(): - raise ExtraData(ret, unpacker._fb_get_extradata()) - return ret + data = stream.read() + return unpackb(data, **kwargs) def unpackb(packed, **kwargs): @@ -121,7 +118,7 @@ class Unpacker(object): If specified, unpacker reads serialized data from it and :meth:`feed()` is not usable. :param int read_size: - Used as `file_like.read(read_size)`. (default: `min(1024**2, max_buffer_size)`) + Used as `file_like.read(read_size)`. (default: `min(16*1024, max_buffer_size)`) :param bool use_list: If true, unpack msgpack array to Python list. @@ -199,13 +196,9 @@ def __init__(self, file_like=None, read_size=0, use_list=True, self._fb_feeding = False #: array of bytes feeded. - self._fb_buffers = [] - #: Which buffer we currently reads - self._fb_buf_i = 0 + self._buffer = b"" #: Which position we currently reads - self._fb_buf_o = 0 - #: Total size of _fb_bufferes - self._fb_buf_n = 0 + self._buff_i = 0 # When Unpacker is used as an iterable, between the calls to next(), # the buffer is not "consumed" completely, for efficiency sake. @@ -213,13 +206,13 @@ def __init__(self, file_like=None, read_size=0, use_list=True, # the correct moments, we have to keep track of how sloppy we were. # Furthermore, when the buffer is incomplete (that is: in the case # we raise an OutOfData) we need to rollback the buffer to the correct - # state, which _fb_slopiness records. - self._fb_sloppiness = 0 + # state, which _buf_checkpoint records. + self._buf_checkpoint = 0 self._max_buffer_size = max_buffer_size or 2**31-1 if read_size > self._max_buffer_size: raise ValueError("read_size must be smaller than max_buffer_size") - self._read_size = read_size or min(self._max_buffer_size, 4096) + self._read_size = read_size or min(self._max_buffer_size, 16*1024) self._encoding = encoding self._unicode_errors = unicode_errors self._use_list = use_list @@ -248,103 +241,75 @@ def __init__(self, file_like=None, read_size=0, use_list=True, def feed(self, next_bytes): if isinstance(next_bytes, array.array): next_bytes = next_bytes.tostring() - elif isinstance(next_bytes, bytearray): - next_bytes = bytes(next_bytes) + if not isinstance(next_bytes, (bytes, bytearray)): + raise TypeError("next_bytes should be bytes, bytearray or array.array") assert self._fb_feeding - if (self._fb_buf_n + len(next_bytes) - self._fb_sloppiness - > self._max_buffer_size): + + if (len(self._buffer) - self._buff_i + len(next_bytes) > self._max_buffer_size): raise BufferFull - self._fb_buf_n += len(next_bytes) - self._fb_buffers.append(next_bytes) - - def _fb_sloppy_consume(self): - """ Gets rid of some of the used parts of the buffer. """ - if self._fb_buf_i: - for i in xrange(self._fb_buf_i): - self._fb_buf_n -= len(self._fb_buffers[i]) - self._fb_buffers = self._fb_buffers[self._fb_buf_i:] - self._fb_buf_i = 0 - if self._fb_buffers: - self._fb_sloppiness = self._fb_buf_o - else: - self._fb_sloppiness = 0 + # bytes + bytearray -> bytearray + # So cast before append + self._buffer += bytes(next_bytes) def _fb_consume(self): """ Gets rid of the used parts of the buffer. """ - if self._fb_buf_i: - for i in xrange(self._fb_buf_i): - self._fb_buf_n -= len(self._fb_buffers[i]) - self._fb_buffers = self._fb_buffers[self._fb_buf_i:] - self._fb_buf_i = 0 - if self._fb_buffers: - self._fb_buffers[0] = self._fb_buffers[0][self._fb_buf_o:] - self._fb_buf_n -= self._fb_buf_o - else: - self._fb_buf_n = 0 - self._fb_buf_o = 0 - self._fb_sloppiness = 0 + self._buf_checkpoint = self._buff_i def _fb_got_extradata(self): - if self._fb_buf_i != len(self._fb_buffers): - return True - if self._fb_feeding: - return False - if not self.file_like: - return False - if self.file_like.read(1): - return True - return False + return self._buff_i < len(self._buffer) - def __iter__(self): - return self + def _fb_get_extradata(self): + return self._buffer[self._buff_i:] def read_bytes(self, n): return self._fb_read(n) - def _fb_rollback(self): - self._fb_buf_i = 0 - self._fb_buf_o = self._fb_sloppiness + def _fb_read(self, n, write_bytes=None): + # (int, Optional[Callable]) -> bytearray + remain_bytes = len(self._buffer) - self._buff_i - n + + # Fast path: buffer has n bytes already + if remain_bytes >= 0: + ret = self._buffer[self._buff_i:self._buff_i+n] + self._buff_i += n + if write_bytes is not None: + write_bytes(ret) + return ret - def _fb_get_extradata(self): - bufs = self._fb_buffers[self._fb_buf_i:] - if bufs: - bufs[0] = bufs[0][self._fb_buf_o:] - return b''.join(bufs) + if self._fb_feeding: + self._buff_i = self._buf_checkpoint + raise OutOfData - def _fb_read(self, n, write_bytes=None): - buffs = self._fb_buffers - # We have a redundant codepath for the most common case, such that - # pypy optimizes it properly. This is the case that the read fits - # in the current buffer. - if (write_bytes is None and self._fb_buf_i < len(buffs) and - self._fb_buf_o + n < len(buffs[self._fb_buf_i])): - self._fb_buf_o += n - return buffs[self._fb_buf_i][self._fb_buf_o - n:self._fb_buf_o] - - # The remaining cases. - ret = b'' - while len(ret) != n: - sliced = n - len(ret) - if self._fb_buf_i == len(buffs): - if self._fb_feeding: - break - to_read = sliced - if self._read_size > to_read: - to_read = self._read_size - tmp = self.file_like.read(to_read) - if not tmp: - break - buffs.append(tmp) - self._fb_buf_n += len(tmp) - continue - ret += buffs[self._fb_buf_i][self._fb_buf_o:self._fb_buf_o + sliced] - self._fb_buf_o += sliced - if self._fb_buf_o >= len(buffs[self._fb_buf_i]): - self._fb_buf_o = 0 - self._fb_buf_i += 1 - if len(ret) != n: - self._fb_rollback() + # Strip buffer before checkpoint before reading file. + if self._buf_checkpoint > 0: + self._buffer = self._buffer[self._buf_checkpoint:] + self._buff_i -= self._buf_checkpoint + self._buf_checkpoint = 0 + + # Read from file + remain_bytes = -remain_bytes + while remain_bytes > 0: + to_read_bytes = max(self._read_size, remain_bytes) + read_data = self.file_like.read(to_read_bytes) + if not read_data: + break + assert isinstance(read_data, bytes) + self._buffer += read_data + remain_bytes -= len(read_data) + + if len(self._buffer) < n + self._buff_i: + self._buff_i = 0 # rollback raise OutOfData + + if len(self._buffer) == n: + # checkpoint == 0 + ret = self._buffer + self._buffer = b"" + self._buff_i = 0 + else: + ret = self._buffer[self._buff_i:self._buff_i+n] + self._buff_i += n + if write_bytes is not None: write_bytes(ret) return ret @@ -562,15 +527,19 @@ def _fb_unpack(self, execute=EX_CONSTRUCT, write_bytes=None): assert typ == TYPE_IMMEDIATE return obj - def next(self): + def __iter__(self): + return self + + def __next__(self): try: ret = self._fb_unpack(EX_CONSTRUCT, None) - self._fb_sloppy_consume() + self._fb_consume() return ret except OutOfData: self._fb_consume() raise StopIteration - __next__ = next + + next = __next__ def skip(self, write_bytes=None): self._fb_unpack(EX_SKIP, write_bytes) From 3322a769890458ec8df5a365f9303510e59c3efb Mon Sep 17 00:00:00 2001 From: INADA Naoki Date: Sun, 22 May 2016 11:08:20 +0900 Subject: [PATCH 068/349] Remove _fb_ prefix --- msgpack/fallback.py | 172 ++++++++++++++++++++++---------------------- 1 file changed, 86 insertions(+), 86 deletions(-) diff --git a/msgpack/fallback.py b/msgpack/fallback.py index 181d7e2a..95be7133 100644 --- a/msgpack/fallback.py +++ b/msgpack/fallback.py @@ -100,11 +100,11 @@ def unpackb(packed, **kwargs): unpacker = Unpacker(None, **kwargs) unpacker.feed(packed) try: - ret = unpacker._fb_unpack() + ret = unpacker._unpack() except OutOfData: raise UnpackValueError("Data is not enough.") - if unpacker._fb_got_extradata(): - raise ExtraData(ret, unpacker._fb_get_extradata()) + if unpacker._got_extradata(): + raise ExtraData(ret, unpacker._get_extradata()) return ret @@ -188,12 +188,12 @@ def __init__(self, file_like=None, read_size=0, use_list=True, max_map_len=2147483647, max_ext_len=2147483647): if file_like is None: - self._fb_feeding = True + self._feeding = True else: if not callable(file_like.read): raise TypeError("`file_like.read` must be callable") self.file_like = file_like - self._fb_feeding = False + self._feeding = False #: array of bytes feeded. self._buffer = b"" @@ -243,7 +243,7 @@ def feed(self, next_bytes): next_bytes = next_bytes.tostring() if not isinstance(next_bytes, (bytes, bytearray)): raise TypeError("next_bytes should be bytes, bytearray or array.array") - assert self._fb_feeding + assert self._feeding if (len(self._buffer) - self._buff_i + len(next_bytes) > self._max_buffer_size): raise BufferFull @@ -251,20 +251,20 @@ def feed(self, next_bytes): # So cast before append self._buffer += bytes(next_bytes) - def _fb_consume(self): + def _consume(self): """ Gets rid of the used parts of the buffer. """ self._buf_checkpoint = self._buff_i - def _fb_got_extradata(self): + def _got_extradata(self): return self._buff_i < len(self._buffer) - def _fb_get_extradata(self): + def _get_extradata(self): return self._buffer[self._buff_i:] def read_bytes(self, n): - return self._fb_read(n) + return self._read(n) - def _fb_read(self, n, write_bytes=None): + def _read(self, n, write_bytes=None): # (int, Optional[Callable]) -> bytearray remain_bytes = len(self._buffer) - self._buff_i - n @@ -276,7 +276,7 @@ def _fb_read(self, n, write_bytes=None): write_bytes(ret) return ret - if self._fb_feeding: + if self._feeding: self._buff_i = self._buf_checkpoint raise OutOfData @@ -318,7 +318,7 @@ def _read_header(self, execute=EX_CONSTRUCT, write_bytes=None): typ = TYPE_IMMEDIATE n = 0 obj = None - c = self._fb_read(1, write_bytes) + c = self._read(1, write_bytes) b = ord(c) if b & 0b10000000 == 0: obj = b @@ -326,7 +326,7 @@ def _read_header(self, execute=EX_CONSTRUCT, write_bytes=None): obj = struct.unpack("b", c)[0] elif b & 0b11100000 == 0b10100000: n = b & 0b00011111 - obj = self._fb_read(n, write_bytes) + obj = self._read(n, write_bytes) typ = TYPE_RAW if n > self._max_str_len: raise UnpackValueError("%s exceeds max_str_len(%s)", n, self._max_str_len) @@ -348,120 +348,120 @@ def _read_header(self, execute=EX_CONSTRUCT, write_bytes=None): obj = True elif b == 0xc4: typ = TYPE_BIN - n = struct.unpack("B", self._fb_read(1, write_bytes))[0] + n = struct.unpack("B", self._read(1, write_bytes))[0] if n > self._max_bin_len: raise UnpackValueError("%s exceeds max_bin_len(%s)" % (n, self._max_bin_len)) - obj = self._fb_read(n, write_bytes) + obj = self._read(n, write_bytes) elif b == 0xc5: typ = TYPE_BIN - n = struct.unpack(">H", self._fb_read(2, write_bytes))[0] + n = struct.unpack(">H", self._read(2, write_bytes))[0] if n > self._max_bin_len: raise UnpackValueError("%s exceeds max_bin_len(%s)" % (n, self._max_bin_len)) - obj = self._fb_read(n, write_bytes) + obj = self._read(n, write_bytes) elif b == 0xc6: typ = TYPE_BIN - n = struct.unpack(">I", self._fb_read(4, write_bytes))[0] + n = struct.unpack(">I", self._read(4, write_bytes))[0] if n > self._max_bin_len: raise UnpackValueError("%s exceeds max_bin_len(%s)" % (n, self._max_bin_len)) - obj = self._fb_read(n, write_bytes) + obj = self._read(n, write_bytes) elif b == 0xc7: # ext 8 typ = TYPE_EXT - L, n = struct.unpack('Bb', self._fb_read(2, write_bytes)) + L, n = struct.unpack('Bb', self._read(2, write_bytes)) if L > self._max_ext_len: raise UnpackValueError("%s exceeds max_ext_len(%s)" % (L, self._max_ext_len)) - obj = self._fb_read(L, write_bytes) + obj = self._read(L, write_bytes) elif b == 0xc8: # ext 16 typ = TYPE_EXT - L, n = struct.unpack('>Hb', self._fb_read(3, write_bytes)) + L, n = struct.unpack('>Hb', self._read(3, write_bytes)) if L > self._max_ext_len: raise UnpackValueError("%s exceeds max_ext_len(%s)" % (L, self._max_ext_len)) - obj = self._fb_read(L, write_bytes) + obj = self._read(L, write_bytes) elif b == 0xc9: # ext 32 typ = TYPE_EXT - L, n = struct.unpack('>Ib', self._fb_read(5, write_bytes)) + L, n = struct.unpack('>Ib', self._read(5, write_bytes)) if L > self._max_ext_len: raise UnpackValueError("%s exceeds max_ext_len(%s)" % (L, self._max_ext_len)) - obj = self._fb_read(L, write_bytes) + obj = self._read(L, write_bytes) elif b == 0xca: - obj = struct.unpack(">f", self._fb_read(4, write_bytes))[0] + obj = struct.unpack(">f", self._read(4, write_bytes))[0] elif b == 0xcb: - obj = struct.unpack(">d", self._fb_read(8, write_bytes))[0] + obj = struct.unpack(">d", self._read(8, write_bytes))[0] elif b == 0xcc: - obj = struct.unpack("B", self._fb_read(1, write_bytes))[0] + obj = struct.unpack("B", self._read(1, write_bytes))[0] elif b == 0xcd: - obj = struct.unpack(">H", self._fb_read(2, write_bytes))[0] + obj = struct.unpack(">H", self._read(2, write_bytes))[0] elif b == 0xce: - obj = struct.unpack(">I", self._fb_read(4, write_bytes))[0] + obj = struct.unpack(">I", self._read(4, write_bytes))[0] elif b == 0xcf: - obj = struct.unpack(">Q", self._fb_read(8, write_bytes))[0] + obj = struct.unpack(">Q", self._read(8, write_bytes))[0] elif b == 0xd0: - obj = struct.unpack("b", self._fb_read(1, write_bytes))[0] + obj = struct.unpack("b", self._read(1, write_bytes))[0] elif b == 0xd1: - obj = struct.unpack(">h", self._fb_read(2, write_bytes))[0] + obj = struct.unpack(">h", self._read(2, write_bytes))[0] elif b == 0xd2: - obj = struct.unpack(">i", self._fb_read(4, write_bytes))[0] + obj = struct.unpack(">i", self._read(4, write_bytes))[0] elif b == 0xd3: - obj = struct.unpack(">q", self._fb_read(8, write_bytes))[0] + obj = struct.unpack(">q", self._read(8, write_bytes))[0] elif b == 0xd4: # fixext 1 typ = TYPE_EXT if self._max_ext_len < 1: raise UnpackValueError("%s exceeds max_ext_len(%s)" % (1, self._max_ext_len)) - n, obj = struct.unpack('b1s', self._fb_read(2, write_bytes)) + n, obj = struct.unpack('b1s', self._read(2, write_bytes)) elif b == 0xd5: # fixext 2 typ = TYPE_EXT if self._max_ext_len < 2: raise UnpackValueError("%s exceeds max_ext_len(%s)" % (2, self._max_ext_len)) - n, obj = struct.unpack('b2s', self._fb_read(3, write_bytes)) + n, obj = struct.unpack('b2s', self._read(3, write_bytes)) elif b == 0xd6: # fixext 4 typ = TYPE_EXT if self._max_ext_len < 4: raise UnpackValueError("%s exceeds max_ext_len(%s)" % (4, self._max_ext_len)) - n, obj = struct.unpack('b4s', self._fb_read(5, write_bytes)) + n, obj = struct.unpack('b4s', self._read(5, write_bytes)) elif b == 0xd7: # fixext 8 typ = TYPE_EXT if self._max_ext_len < 8: raise UnpackValueError("%s exceeds max_ext_len(%s)" % (8, self._max_ext_len)) - n, obj = struct.unpack('b8s', self._fb_read(9, write_bytes)) + n, obj = struct.unpack('b8s', self._read(9, write_bytes)) elif b == 0xd8: # fixext 16 typ = TYPE_EXT if self._max_ext_len < 16: raise UnpackValueError("%s exceeds max_ext_len(%s)" % (16, self._max_ext_len)) - n, obj = struct.unpack('b16s', self._fb_read(17, write_bytes)) + n, obj = struct.unpack('b16s', self._read(17, write_bytes)) elif b == 0xd9: typ = TYPE_RAW - n = struct.unpack("B", self._fb_read(1, write_bytes))[0] + n = struct.unpack("B", self._read(1, write_bytes))[0] if n > self._max_str_len: raise UnpackValueError("%s exceeds max_str_len(%s)", n, self._max_str_len) - obj = self._fb_read(n, write_bytes) + obj = self._read(n, write_bytes) elif b == 0xda: typ = TYPE_RAW - n = struct.unpack(">H", self._fb_read(2, write_bytes))[0] + n = struct.unpack(">H", self._read(2, write_bytes))[0] if n > self._max_str_len: raise UnpackValueError("%s exceeds max_str_len(%s)", n, self._max_str_len) - obj = self._fb_read(n, write_bytes) + obj = self._read(n, write_bytes) elif b == 0xdb: typ = TYPE_RAW - n = struct.unpack(">I", self._fb_read(4, write_bytes))[0] + n = struct.unpack(">I", self._read(4, write_bytes))[0] if n > self._max_str_len: raise UnpackValueError("%s exceeds max_str_len(%s)", n, self._max_str_len) - obj = self._fb_read(n, write_bytes) + obj = self._read(n, write_bytes) elif b == 0xdc: - n = struct.unpack(">H", self._fb_read(2, write_bytes))[0] + n = struct.unpack(">H", self._read(2, write_bytes))[0] if n > self._max_array_len: raise UnpackValueError("%s exceeds max_array_len(%s)", n, self._max_array_len) typ = TYPE_ARRAY elif b == 0xdd: - n = struct.unpack(">I", self._fb_read(4, write_bytes))[0] + n = struct.unpack(">I", self._read(4, write_bytes))[0] if n > self._max_array_len: raise UnpackValueError("%s exceeds max_array_len(%s)", n, self._max_array_len) typ = TYPE_ARRAY elif b == 0xde: - n = struct.unpack(">H", self._fb_read(2, write_bytes))[0] + n = struct.unpack(">H", self._read(2, write_bytes))[0] if n > self._max_map_len: raise UnpackValueError("%s exceeds max_map_len(%s)", n, self._max_map_len) typ = TYPE_MAP elif b == 0xdf: - n = struct.unpack(">I", self._fb_read(4, write_bytes))[0] + n = struct.unpack(">I", self._read(4, write_bytes))[0] if n > self._max_map_len: raise UnpackValueError("%s exceeds max_map_len(%s)", n, self._max_map_len) typ = TYPE_MAP @@ -469,7 +469,7 @@ def _read_header(self, execute=EX_CONSTRUCT, write_bytes=None): raise UnpackValueError("Unknown header: 0x%x" % b) return typ, n, obj - def _fb_unpack(self, execute=EX_CONSTRUCT, write_bytes=None): + def _unpack(self, execute=EX_CONSTRUCT, write_bytes=None): typ, n, obj = self._read_header(execute, write_bytes) if execute == EX_READ_ARRAY_HEADER: @@ -485,11 +485,11 @@ def _fb_unpack(self, execute=EX_CONSTRUCT, write_bytes=None): if execute == EX_SKIP: for i in xrange(n): # TODO check whether we need to call `list_hook` - self._fb_unpack(EX_SKIP, write_bytes) + self._unpack(EX_SKIP, write_bytes) return ret = newlist_hint(n) for i in xrange(n): - ret.append(self._fb_unpack(EX_CONSTRUCT, write_bytes)) + ret.append(self._unpack(EX_CONSTRUCT, write_bytes)) if self._list_hook is not None: ret = self._list_hook(ret) # TODO is the interaction between `list_hook` and `use_list` ok? @@ -498,19 +498,19 @@ def _fb_unpack(self, execute=EX_CONSTRUCT, write_bytes=None): if execute == EX_SKIP: for i in xrange(n): # TODO check whether we need to call hooks - self._fb_unpack(EX_SKIP, write_bytes) - self._fb_unpack(EX_SKIP, write_bytes) + self._unpack(EX_SKIP, write_bytes) + self._unpack(EX_SKIP, write_bytes) return if self._object_pairs_hook is not None: ret = self._object_pairs_hook( - (self._fb_unpack(EX_CONSTRUCT, write_bytes), - self._fb_unpack(EX_CONSTRUCT, write_bytes)) + (self._unpack(EX_CONSTRUCT, write_bytes), + self._unpack(EX_CONSTRUCT, write_bytes)) for _ in xrange(n)) else: ret = {} for _ in xrange(n): - key = self._fb_unpack(EX_CONSTRUCT, write_bytes) - ret[key] = self._fb_unpack(EX_CONSTRUCT, write_bytes) + key = self._unpack(EX_CONSTRUCT, write_bytes) + ret[key] = self._unpack(EX_CONSTRUCT, write_bytes) if self._object_hook is not None: ret = self._object_hook(ret) return ret @@ -532,32 +532,32 @@ def __iter__(self): def __next__(self): try: - ret = self._fb_unpack(EX_CONSTRUCT, None) - self._fb_consume() + ret = self._unpack(EX_CONSTRUCT, None) + self._consume() return ret except OutOfData: - self._fb_consume() + self._consume() raise StopIteration next = __next__ def skip(self, write_bytes=None): - self._fb_unpack(EX_SKIP, write_bytes) - self._fb_consume() + self._unpack(EX_SKIP, write_bytes) + self._consume() def unpack(self, write_bytes=None): - ret = self._fb_unpack(EX_CONSTRUCT, write_bytes) - self._fb_consume() + ret = self._unpack(EX_CONSTRUCT, write_bytes) + self._consume() return ret def read_array_header(self, write_bytes=None): - ret = self._fb_unpack(EX_READ_ARRAY_HEADER, write_bytes) - self._fb_consume() + ret = self._unpack(EX_READ_ARRAY_HEADER, write_bytes) + self._consume() return ret def read_map_header(self, write_bytes=None): - ret = self._fb_unpack(EX_READ_MAP_HEADER, write_bytes) - self._fb_consume() + ret = self._unpack(EX_READ_MAP_HEADER, write_bytes) + self._consume() return ret @@ -658,7 +658,7 @@ def _pack(self, obj, nest_limit=DEFAULT_RECURSE_LIMIT, n = len(obj) if n >= 2**32: raise PackValueError("Bytes is too large") - self._fb_pack_bin_header(n) + self._pack_bin_header(n) return self._buffer.write(obj) if check(obj, Unicode): if self._encoding is None: @@ -669,13 +669,13 @@ def _pack(self, obj, nest_limit=DEFAULT_RECURSE_LIMIT, n = len(obj) if n >= 2**32: raise PackValueError("String is too large") - self._fb_pack_raw_header(n) + self._pack_raw_header(n) return self._buffer.write(obj) if check(obj, memoryview): n = len(obj) * obj.itemsize if n >= 2**32: raise PackValueError("Memoryview is too large") - self._fb_pack_bin_header(n) + self._pack_bin_header(n) return self._buffer.write(obj) if check(obj, float): if self._use_float: @@ -708,12 +708,12 @@ def _pack(self, obj, nest_limit=DEFAULT_RECURSE_LIMIT, return if check(obj, list_types): n = len(obj) - self._fb_pack_array_header(n) + self._pack_array_header(n) for i in xrange(n): self._pack(obj[i], nest_limit - 1) return if check(obj, dict): - return self._fb_pack_map_pairs(len(obj), dict_iteritems(obj), + return self._pack_map_pairs(len(obj), dict_iteritems(obj), nest_limit - 1) if not default_used and self._default is not None: obj = self._default(obj) @@ -731,7 +731,7 @@ def pack(self, obj): return ret def pack_map_pairs(self, pairs): - self._fb_pack_map_pairs(len(pairs), pairs) + self._pack_map_pairs(len(pairs), pairs) ret = self._buffer.getvalue() if self._autoreset: self._buffer = StringIO() @@ -742,7 +742,7 @@ def pack_map_pairs(self, pairs): def pack_array_header(self, n): if n >= 2**32: raise PackValueError - self._fb_pack_array_header(n) + self._pack_array_header(n) ret = self._buffer.getvalue() if self._autoreset: self._buffer = StringIO() @@ -753,7 +753,7 @@ def pack_array_header(self, n): def pack_map_header(self, n): if n >= 2**32: raise PackValueError - self._fb_pack_map_header(n) + self._pack_map_header(n) ret = self._buffer.getvalue() if self._autoreset: self._buffer = StringIO() @@ -790,7 +790,7 @@ def pack_ext_type(self, typecode, data): self._buffer.write(struct.pack('B', typecode)) self._buffer.write(data) - def _fb_pack_array_header(self, n): + def _pack_array_header(self, n): if n <= 0x0f: return self._buffer.write(struct.pack('B', 0x90 + n)) if n <= 0xffff: @@ -799,7 +799,7 @@ def _fb_pack_array_header(self, n): return self._buffer.write(struct.pack(">BI", 0xdd, n)) raise PackValueError("Array is too large") - def _fb_pack_map_header(self, n): + def _pack_map_header(self, n): if n <= 0x0f: return self._buffer.write(struct.pack('B', 0x80 + n)) if n <= 0xffff: @@ -808,13 +808,13 @@ def _fb_pack_map_header(self, n): return self._buffer.write(struct.pack(">BI", 0xdf, n)) raise PackValueError("Dict is too large") - def _fb_pack_map_pairs(self, n, pairs, nest_limit=DEFAULT_RECURSE_LIMIT): - self._fb_pack_map_header(n) + def _pack_map_pairs(self, n, pairs, nest_limit=DEFAULT_RECURSE_LIMIT): + self._pack_map_header(n) for (k, v) in pairs: self._pack(k, nest_limit - 1) self._pack(v, nest_limit - 1) - def _fb_pack_raw_header(self, n): + def _pack_raw_header(self, n): if n <= 0x1f: self._buffer.write(struct.pack('B', 0xa0 + n)) elif self._use_bin_type and n <= 0xff: @@ -826,9 +826,9 @@ def _fb_pack_raw_header(self, n): else: raise PackValueError('Raw is too large') - def _fb_pack_bin_header(self, n): + def _pack_bin_header(self, n): if not self._use_bin_type: - return self._fb_pack_raw_header(n) + return self._pack_raw_header(n) elif n <= 0xff: return self._buffer.write(struct.pack('>BB', 0xc4, n)) elif n <= 0xffff: From e9c42fa523b51c184b581d6eab85f0ad40dff620 Mon Sep 17 00:00:00 2001 From: INADA Naoki Date: Sun, 22 May 2016 13:31:01 +0900 Subject: [PATCH 069/349] fallback: simplify write_bytes callback implementation --- msgpack/fallback.py | 124 +++++++++++++++++++++++--------------------- 1 file changed, 64 insertions(+), 60 deletions(-) diff --git a/msgpack/fallback.py b/msgpack/fallback.py index 95be7133..ecdbec4e 100644 --- a/msgpack/fallback.py +++ b/msgpack/fallback.py @@ -264,7 +264,7 @@ def _get_extradata(self): def read_bytes(self, n): return self._read(n) - def _read(self, n, write_bytes=None): + def _read(self, n): # (int, Optional[Callable]) -> bytearray remain_bytes = len(self._buffer) - self._buff_i - n @@ -272,8 +272,6 @@ def _read(self, n, write_bytes=None): if remain_bytes >= 0: ret = self._buffer[self._buff_i:self._buff_i+n] self._buff_i += n - if write_bytes is not None: - write_bytes(ret) return ret if self._feeding: @@ -310,15 +308,13 @@ def _read(self, n, write_bytes=None): ret = self._buffer[self._buff_i:self._buff_i+n] self._buff_i += n - if write_bytes is not None: - write_bytes(ret) return ret - def _read_header(self, execute=EX_CONSTRUCT, write_bytes=None): + def _read_header(self, execute=EX_CONSTRUCT): typ = TYPE_IMMEDIATE n = 0 obj = None - c = self._read(1, write_bytes) + c = self._read(1) b = ord(c) if b & 0b10000000 == 0: obj = b @@ -326,7 +322,7 @@ def _read_header(self, execute=EX_CONSTRUCT, write_bytes=None): obj = struct.unpack("b", c)[0] elif b & 0b11100000 == 0b10100000: n = b & 0b00011111 - obj = self._read(n, write_bytes) + obj = self._read(n) typ = TYPE_RAW if n > self._max_str_len: raise UnpackValueError("%s exceeds max_str_len(%s)", n, self._max_str_len) @@ -348,120 +344,120 @@ def _read_header(self, execute=EX_CONSTRUCT, write_bytes=None): obj = True elif b == 0xc4: typ = TYPE_BIN - n = struct.unpack("B", self._read(1, write_bytes))[0] + n = struct.unpack("B", self._read(1))[0] if n > self._max_bin_len: raise UnpackValueError("%s exceeds max_bin_len(%s)" % (n, self._max_bin_len)) - obj = self._read(n, write_bytes) + obj = self._read(n) elif b == 0xc5: typ = TYPE_BIN - n = struct.unpack(">H", self._read(2, write_bytes))[0] + n = struct.unpack(">H", self._read(2))[0] if n > self._max_bin_len: raise UnpackValueError("%s exceeds max_bin_len(%s)" % (n, self._max_bin_len)) - obj = self._read(n, write_bytes) + obj = self._read(n) elif b == 0xc6: typ = TYPE_BIN - n = struct.unpack(">I", self._read(4, write_bytes))[0] + n = struct.unpack(">I", self._read(4))[0] if n > self._max_bin_len: raise UnpackValueError("%s exceeds max_bin_len(%s)" % (n, self._max_bin_len)) - obj = self._read(n, write_bytes) + obj = self._read(n) elif b == 0xc7: # ext 8 typ = TYPE_EXT - L, n = struct.unpack('Bb', self._read(2, write_bytes)) + L, n = struct.unpack('Bb', self._read(2)) if L > self._max_ext_len: raise UnpackValueError("%s exceeds max_ext_len(%s)" % (L, self._max_ext_len)) - obj = self._read(L, write_bytes) + obj = self._read(L) elif b == 0xc8: # ext 16 typ = TYPE_EXT - L, n = struct.unpack('>Hb', self._read(3, write_bytes)) + L, n = struct.unpack('>Hb', self._read(3)) if L > self._max_ext_len: raise UnpackValueError("%s exceeds max_ext_len(%s)" % (L, self._max_ext_len)) - obj = self._read(L, write_bytes) + obj = self._read(L) elif b == 0xc9: # ext 32 typ = TYPE_EXT - L, n = struct.unpack('>Ib', self._read(5, write_bytes)) + L, n = struct.unpack('>Ib', self._read(5)) if L > self._max_ext_len: raise UnpackValueError("%s exceeds max_ext_len(%s)" % (L, self._max_ext_len)) - obj = self._read(L, write_bytes) + obj = self._read(L) elif b == 0xca: - obj = struct.unpack(">f", self._read(4, write_bytes))[0] + obj = struct.unpack(">f", self._read(4))[0] elif b == 0xcb: - obj = struct.unpack(">d", self._read(8, write_bytes))[0] + obj = struct.unpack(">d", self._read(8))[0] elif b == 0xcc: - obj = struct.unpack("B", self._read(1, write_bytes))[0] + obj = struct.unpack("B", self._read(1))[0] elif b == 0xcd: - obj = struct.unpack(">H", self._read(2, write_bytes))[0] + obj = struct.unpack(">H", self._read(2))[0] elif b == 0xce: - obj = struct.unpack(">I", self._read(4, write_bytes))[0] + obj = struct.unpack(">I", self._read(4))[0] elif b == 0xcf: - obj = struct.unpack(">Q", self._read(8, write_bytes))[0] + obj = struct.unpack(">Q", self._read(8))[0] elif b == 0xd0: - obj = struct.unpack("b", self._read(1, write_bytes))[0] + obj = struct.unpack("b", self._read(1))[0] elif b == 0xd1: - obj = struct.unpack(">h", self._read(2, write_bytes))[0] + obj = struct.unpack(">h", self._read(2))[0] elif b == 0xd2: - obj = struct.unpack(">i", self._read(4, write_bytes))[0] + obj = struct.unpack(">i", self._read(4))[0] elif b == 0xd3: - obj = struct.unpack(">q", self._read(8, write_bytes))[0] + obj = struct.unpack(">q", self._read(8))[0] elif b == 0xd4: # fixext 1 typ = TYPE_EXT if self._max_ext_len < 1: raise UnpackValueError("%s exceeds max_ext_len(%s)" % (1, self._max_ext_len)) - n, obj = struct.unpack('b1s', self._read(2, write_bytes)) + n, obj = struct.unpack('b1s', self._read(2)) elif b == 0xd5: # fixext 2 typ = TYPE_EXT if self._max_ext_len < 2: raise UnpackValueError("%s exceeds max_ext_len(%s)" % (2, self._max_ext_len)) - n, obj = struct.unpack('b2s', self._read(3, write_bytes)) + n, obj = struct.unpack('b2s', self._read(3)) elif b == 0xd6: # fixext 4 typ = TYPE_EXT if self._max_ext_len < 4: raise UnpackValueError("%s exceeds max_ext_len(%s)" % (4, self._max_ext_len)) - n, obj = struct.unpack('b4s', self._read(5, write_bytes)) + n, obj = struct.unpack('b4s', self._read(5)) elif b == 0xd7: # fixext 8 typ = TYPE_EXT if self._max_ext_len < 8: raise UnpackValueError("%s exceeds max_ext_len(%s)" % (8, self._max_ext_len)) - n, obj = struct.unpack('b8s', self._read(9, write_bytes)) + n, obj = struct.unpack('b8s', self._read(9)) elif b == 0xd8: # fixext 16 typ = TYPE_EXT if self._max_ext_len < 16: raise UnpackValueError("%s exceeds max_ext_len(%s)" % (16, self._max_ext_len)) - n, obj = struct.unpack('b16s', self._read(17, write_bytes)) + n, obj = struct.unpack('b16s', self._read(17)) elif b == 0xd9: typ = TYPE_RAW - n = struct.unpack("B", self._read(1, write_bytes))[0] + n = struct.unpack("B", self._read(1))[0] if n > self._max_str_len: raise UnpackValueError("%s exceeds max_str_len(%s)", n, self._max_str_len) - obj = self._read(n, write_bytes) + obj = self._read(n) elif b == 0xda: typ = TYPE_RAW - n = struct.unpack(">H", self._read(2, write_bytes))[0] + n = struct.unpack(">H", self._read(2))[0] if n > self._max_str_len: raise UnpackValueError("%s exceeds max_str_len(%s)", n, self._max_str_len) - obj = self._read(n, write_bytes) + obj = self._read(n) elif b == 0xdb: typ = TYPE_RAW - n = struct.unpack(">I", self._read(4, write_bytes))[0] + n = struct.unpack(">I", self._read(4))[0] if n > self._max_str_len: raise UnpackValueError("%s exceeds max_str_len(%s)", n, self._max_str_len) - obj = self._read(n, write_bytes) + obj = self._read(n) elif b == 0xdc: - n = struct.unpack(">H", self._read(2, write_bytes))[0] + n = struct.unpack(">H", self._read(2))[0] if n > self._max_array_len: raise UnpackValueError("%s exceeds max_array_len(%s)", n, self._max_array_len) typ = TYPE_ARRAY elif b == 0xdd: - n = struct.unpack(">I", self._read(4, write_bytes))[0] + n = struct.unpack(">I", self._read(4))[0] if n > self._max_array_len: raise UnpackValueError("%s exceeds max_array_len(%s)", n, self._max_array_len) typ = TYPE_ARRAY elif b == 0xde: - n = struct.unpack(">H", self._read(2, write_bytes))[0] + n = struct.unpack(">H", self._read(2))[0] if n > self._max_map_len: raise UnpackValueError("%s exceeds max_map_len(%s)", n, self._max_map_len) typ = TYPE_MAP elif b == 0xdf: - n = struct.unpack(">I", self._read(4, write_bytes))[0] + n = struct.unpack(">I", self._read(4))[0] if n > self._max_map_len: raise UnpackValueError("%s exceeds max_map_len(%s)", n, self._max_map_len) typ = TYPE_MAP @@ -469,8 +465,8 @@ def _read_header(self, execute=EX_CONSTRUCT, write_bytes=None): raise UnpackValueError("Unknown header: 0x%x" % b) return typ, n, obj - def _unpack(self, execute=EX_CONSTRUCT, write_bytes=None): - typ, n, obj = self._read_header(execute, write_bytes) + def _unpack(self, execute=EX_CONSTRUCT): + typ, n, obj = self._read_header(execute) if execute == EX_READ_ARRAY_HEADER: if typ != TYPE_ARRAY: @@ -485,11 +481,11 @@ def _unpack(self, execute=EX_CONSTRUCT, write_bytes=None): if execute == EX_SKIP: for i in xrange(n): # TODO check whether we need to call `list_hook` - self._unpack(EX_SKIP, write_bytes) + self._unpack(EX_SKIP) return ret = newlist_hint(n) for i in xrange(n): - ret.append(self._unpack(EX_CONSTRUCT, write_bytes)) + ret.append(self._unpack(EX_CONSTRUCT)) if self._list_hook is not None: ret = self._list_hook(ret) # TODO is the interaction between `list_hook` and `use_list` ok? @@ -498,19 +494,19 @@ def _unpack(self, execute=EX_CONSTRUCT, write_bytes=None): if execute == EX_SKIP: for i in xrange(n): # TODO check whether we need to call hooks - self._unpack(EX_SKIP, write_bytes) - self._unpack(EX_SKIP, write_bytes) + self._unpack(EX_SKIP) + self._unpack(EX_SKIP) return if self._object_pairs_hook is not None: ret = self._object_pairs_hook( - (self._unpack(EX_CONSTRUCT, write_bytes), - self._unpack(EX_CONSTRUCT, write_bytes)) + (self._unpack(EX_CONSTRUCT), + self._unpack(EX_CONSTRUCT)) for _ in xrange(n)) else: ret = {} for _ in xrange(n): - key = self._unpack(EX_CONSTRUCT, write_bytes) - ret[key] = self._unpack(EX_CONSTRUCT, write_bytes) + key = self._unpack(EX_CONSTRUCT) + ret[key] = self._unpack(EX_CONSTRUCT) if self._object_hook is not None: ret = self._object_hook(ret) return ret @@ -532,7 +528,7 @@ def __iter__(self): def __next__(self): try: - ret = self._unpack(EX_CONSTRUCT, None) + ret = self._unpack(EX_CONSTRUCT) self._consume() return ret except OutOfData: @@ -542,21 +538,29 @@ def __next__(self): next = __next__ def skip(self, write_bytes=None): - self._unpack(EX_SKIP, write_bytes) + self._unpack(EX_SKIP) + if write_bytes is not None: + write_bytes(self._buffer[self._buf_checkpoint:self._buff_i]) self._consume() def unpack(self, write_bytes=None): - ret = self._unpack(EX_CONSTRUCT, write_bytes) + ret = self._unpack(EX_CONSTRUCT) + if write_bytes is not None: + write_bytes(self._buffer[self._buf_checkpoint:self._buff_i]) self._consume() return ret def read_array_header(self, write_bytes=None): - ret = self._unpack(EX_READ_ARRAY_HEADER, write_bytes) + ret = self._unpack(EX_READ_ARRAY_HEADER) + if write_bytes is not None: + write_bytes(self._buffer[self._buf_checkpoint:self._buff_i]) self._consume() return ret def read_map_header(self, write_bytes=None): - ret = self._unpack(EX_READ_MAP_HEADER, write_bytes) + ret = self._unpack(EX_READ_MAP_HEADER) + if write_bytes is not None: + write_bytes(self._buffer[self._buf_checkpoint:self._buff_i]) self._consume() return ret From 6b8919355d6acdda74acc568dd348598e552f003 Mon Sep 17 00:00:00 2001 From: INADA Naoki Date: Tue, 24 May 2016 02:46:29 +0900 Subject: [PATCH 070/349] fallback: Use struct.unpack_from when possible --- msgpack/fallback.py | 150 +++++++++++++++++++++++++++++--------------- 1 file changed, 100 insertions(+), 50 deletions(-) diff --git a/msgpack/fallback.py b/msgpack/fallback.py index ecdbec4e..9b32610b 100644 --- a/msgpack/fallback.py +++ b/msgpack/fallback.py @@ -265,14 +265,18 @@ def read_bytes(self, n): return self._read(n) def _read(self, n): - # (int, Optional[Callable]) -> bytearray + # (int) -> bytearray + self._reserve(n) + i = self._buff_i + self._buff_i = i+n + return self._buffer[i:i+n] + + def _reserve(self, n): remain_bytes = len(self._buffer) - self._buff_i - n # Fast path: buffer has n bytes already if remain_bytes >= 0: - ret = self._buffer[self._buff_i:self._buff_i+n] - self._buff_i += n - return ret + return if self._feeding: self._buff_i = self._buf_checkpoint @@ -299,33 +303,23 @@ def _read(self, n): self._buff_i = 0 # rollback raise OutOfData - if len(self._buffer) == n: - # checkpoint == 0 - ret = self._buffer - self._buffer = b"" - self._buff_i = 0 - else: - ret = self._buffer[self._buff_i:self._buff_i+n] - self._buff_i += n - - return ret - def _read_header(self, execute=EX_CONSTRUCT): typ = TYPE_IMMEDIATE n = 0 obj = None - c = self._read(1) - b = ord(c) - if b & 0b10000000 == 0: + self._reserve(1) + b = struct.unpack_from("B", self._buffer, self._buff_i)[0] + self._buff_i += 1 + if b & 0b10000000 == 0: obj = b elif b & 0b11100000 == 0b11100000: - obj = struct.unpack("b", c)[0] + obj = -1 - (b ^ 0xff) elif b & 0b11100000 == 0b10100000: n = b & 0b00011111 - obj = self._read(n) typ = TYPE_RAW if n > self._max_str_len: raise UnpackValueError("%s exceeds max_str_len(%s)", n, self._max_str_len) + obj = self._read(n) elif b & 0b11110000 == 0b10010000: n = b & 0b00001111 typ = TYPE_ARRAY @@ -344,120 +338,176 @@ def _read_header(self, execute=EX_CONSTRUCT): obj = True elif b == 0xc4: typ = TYPE_BIN - n = struct.unpack("B", self._read(1))[0] + self._reserve(1) + n = struct.unpack_from("B", self._buffer, self._buff_i)[0] + self._buff_i += 1 if n > self._max_bin_len: raise UnpackValueError("%s exceeds max_bin_len(%s)" % (n, self._max_bin_len)) obj = self._read(n) elif b == 0xc5: typ = TYPE_BIN - n = struct.unpack(">H", self._read(2))[0] + self._reserve(2) + n = struct.unpack_from(">H", self._buffer, self._buff_i)[0] + self._buff_i += 2 if n > self._max_bin_len: raise UnpackValueError("%s exceeds max_bin_len(%s)" % (n, self._max_bin_len)) obj = self._read(n) elif b == 0xc6: typ = TYPE_BIN - n = struct.unpack(">I", self._read(4))[0] + self._reserve(4) + n = struct.unpack_from(">I", self._buffer, self._buff_i)[0] + self._buff_i += 4 if n > self._max_bin_len: raise UnpackValueError("%s exceeds max_bin_len(%s)" % (n, self._max_bin_len)) obj = self._read(n) elif b == 0xc7: # ext 8 typ = TYPE_EXT - L, n = struct.unpack('Bb', self._read(2)) + self._reserve(2) + L, n = struct.unpack_from('Bb', self._buffer, self._buff_i) + self._buff_i += 2 if L > self._max_ext_len: raise UnpackValueError("%s exceeds max_ext_len(%s)" % (L, self._max_ext_len)) obj = self._read(L) elif b == 0xc8: # ext 16 typ = TYPE_EXT - L, n = struct.unpack('>Hb', self._read(3)) + self._reserve(3) + L, n = struct.unpack_from('>Hb', self._buffer, self._buff_i) + self._buff_i += 3 if L > self._max_ext_len: raise UnpackValueError("%s exceeds max_ext_len(%s)" % (L, self._max_ext_len)) obj = self._read(L) elif b == 0xc9: # ext 32 typ = TYPE_EXT - L, n = struct.unpack('>Ib', self._read(5)) + self._reserve(5) + L, n = struct.unpack_from('>Ib', self._buffer, self._buff_i) + self._buff_i += 5 if L > self._max_ext_len: raise UnpackValueError("%s exceeds max_ext_len(%s)" % (L, self._max_ext_len)) obj = self._read(L) elif b == 0xca: - obj = struct.unpack(">f", self._read(4))[0] + self._reserve(4) + obj = struct.unpack_from(">f", self._buffer, self._buff_i)[0] + self._buff_i += 4 elif b == 0xcb: - obj = struct.unpack(">d", self._read(8))[0] + self._reserve(8) + obj = struct.unpack_from(">d", self._buffer, self._buff_i)[0] + self._buff_i += 8 elif b == 0xcc: - obj = struct.unpack("B", self._read(1))[0] + self._reserve(1) + obj = struct.unpack_from("B", self._buffer, self._buff_i)[0] + self._buff_i += 1 elif b == 0xcd: - obj = struct.unpack(">H", self._read(2))[0] + self._reserve(2) + obj = struct.unpack_from(">H", self._buffer, self._buff_i)[0] + self._buff_i += 2 elif b == 0xce: - obj = struct.unpack(">I", self._read(4))[0] + self._reserve(4) + obj = struct.unpack_from(">I", self._buffer, self._buff_i)[0] + self._buff_i += 4 elif b == 0xcf: - obj = struct.unpack(">Q", self._read(8))[0] + self._reserve(8) + obj = struct.unpack_from(">Q", self._buffer, self._buff_i)[0] + self._buff_i += 8 elif b == 0xd0: - obj = struct.unpack("b", self._read(1))[0] + self._reserve(1) + obj = struct.unpack_from("b", self._buffer, self._buff_i)[0] + self._buff_i += 1 elif b == 0xd1: - obj = struct.unpack(">h", self._read(2))[0] + self._reserve(2) + obj = struct.unpack_from(">h", self._buffer, self._buff_i)[0] + self._buff_i += 2 elif b == 0xd2: - obj = struct.unpack(">i", self._read(4))[0] + self._reserve(4) + obj = struct.unpack_from(">i", self._buffer, self._buff_i)[0] + self._buff_i += 4 elif b == 0xd3: - obj = struct.unpack(">q", self._read(8))[0] + self._reserve(8) + obj = struct.unpack_from(">q", self._buffer, self._buff_i)[0] + self._buff_i += 8 elif b == 0xd4: # fixext 1 typ = TYPE_EXT if self._max_ext_len < 1: raise UnpackValueError("%s exceeds max_ext_len(%s)" % (1, self._max_ext_len)) - n, obj = struct.unpack('b1s', self._read(2)) + self._reserve(2) + n, obj = struct.unpack_from("b1s", self._buffer, self._buff_i) + self._buff_i += 2 elif b == 0xd5: # fixext 2 typ = TYPE_EXT if self._max_ext_len < 2: raise UnpackValueError("%s exceeds max_ext_len(%s)" % (2, self._max_ext_len)) - n, obj = struct.unpack('b2s', self._read(3)) + self._reserve(3) + n, obj = struct.unpack_from("b2s", self._buffer, self._buff_i) + self._buff_i += 3 elif b == 0xd6: # fixext 4 typ = TYPE_EXT if self._max_ext_len < 4: raise UnpackValueError("%s exceeds max_ext_len(%s)" % (4, self._max_ext_len)) - n, obj = struct.unpack('b4s', self._read(5)) + self._reserve(5) + n, obj = struct.unpack_from("b4s", self._buffer, self._buff_i) + self._buff_i += 5 elif b == 0xd7: # fixext 8 typ = TYPE_EXT if self._max_ext_len < 8: raise UnpackValueError("%s exceeds max_ext_len(%s)" % (8, self._max_ext_len)) - n, obj = struct.unpack('b8s', self._read(9)) + self._reserve(9) + n, obj = struct.unpack_from("b8s", self._buffer, self._buff_i) + self._buff_i += 9 elif b == 0xd8: # fixext 16 typ = TYPE_EXT if self._max_ext_len < 16: raise UnpackValueError("%s exceeds max_ext_len(%s)" % (16, self._max_ext_len)) - n, obj = struct.unpack('b16s', self._read(17)) + self._reserve(17) + n, obj = struct.unpack_from("b16s", self._buffer, self._buff_i) + self._buff_i += 17 elif b == 0xd9: typ = TYPE_RAW - n = struct.unpack("B", self._read(1))[0] + self._reserve(1) + n, = struct.unpack_from("B", self._buffer, self._buff_i) + self._buff_i += 1 if n > self._max_str_len: raise UnpackValueError("%s exceeds max_str_len(%s)", n, self._max_str_len) obj = self._read(n) elif b == 0xda: typ = TYPE_RAW - n = struct.unpack(">H", self._read(2))[0] + self._reserve(2) + n, = struct.unpack_from(">H", self._buffer, self._buff_i) + self._buff_i += 2 if n > self._max_str_len: raise UnpackValueError("%s exceeds max_str_len(%s)", n, self._max_str_len) obj = self._read(n) elif b == 0xdb: typ = TYPE_RAW - n = struct.unpack(">I", self._read(4))[0] + self._reserve(4) + n, = struct.unpack_from(">I", self._buffer, self._buff_i) + self._buff_i += 4 if n > self._max_str_len: raise UnpackValueError("%s exceeds max_str_len(%s)", n, self._max_str_len) obj = self._read(n) elif b == 0xdc: - n = struct.unpack(">H", self._read(2))[0] + typ = TYPE_ARRAY + self._reserve(2) + n, = struct.unpack_from(">H", self._buffer, self._buff_i) + self._buff_i += 2 if n > self._max_array_len: raise UnpackValueError("%s exceeds max_array_len(%s)", n, self._max_array_len) - typ = TYPE_ARRAY elif b == 0xdd: - n = struct.unpack(">I", self._read(4))[0] + typ = TYPE_ARRAY + self._reserve(4) + n, = struct.unpack_from(">I", self._buffer, self._buff_i) + self._buff_i += 4 if n > self._max_array_len: raise UnpackValueError("%s exceeds max_array_len(%s)", n, self._max_array_len) - typ = TYPE_ARRAY elif b == 0xde: - n = struct.unpack(">H", self._read(2))[0] + self._reserve(2) + n, = struct.unpack_from(">H", self._buffer, self._buff_i) + self._buff_i += 2 if n > self._max_map_len: raise UnpackValueError("%s exceeds max_map_len(%s)", n, self._max_map_len) typ = TYPE_MAP elif b == 0xdf: - n = struct.unpack(">I", self._read(4))[0] + self._reserve(4) + n, = struct.unpack_from(">I", self._buffer, self._buff_i) + self._buff_i += 4 if n > self._max_map_len: raise UnpackValueError("%s exceeds max_map_len(%s)", n, self._max_map_len) typ = TYPE_MAP From c16a1c6bdf667d5cfe314d09d5613808f1243a8f Mon Sep 17 00:00:00 2001 From: INADA Naoki Date: Tue, 24 May 2016 07:32:30 +0900 Subject: [PATCH 071/349] fallback: Use bytearray as buffer --- msgpack/fallback.py | 24 +++++++++++++++--------- 1 file changed, 15 insertions(+), 9 deletions(-) diff --git a/msgpack/fallback.py b/msgpack/fallback.py index 9b32610b..a23ad8ca 100644 --- a/msgpack/fallback.py +++ b/msgpack/fallback.py @@ -196,7 +196,7 @@ def __init__(self, file_like=None, read_size=0, use_list=True, self._feeding = False #: array of bytes feeded. - self._buffer = b"" + self._buffer = bytearray() #: Which position we currently reads self._buff_i = 0 @@ -249,7 +249,7 @@ def feed(self, next_bytes): raise BufferFull # bytes + bytearray -> bytearray # So cast before append - self._buffer += bytes(next_bytes) + self._buffer += next_bytes def _consume(self): """ Gets rid of the used parts of the buffer. """ @@ -284,7 +284,7 @@ def _reserve(self, n): # Strip buffer before checkpoint before reading file. if self._buf_checkpoint > 0: - self._buffer = self._buffer[self._buf_checkpoint:] + del self._buffer[:self._buf_checkpoint] self._buff_i -= self._buf_checkpoint self._buf_checkpoint = 0 @@ -308,7 +308,8 @@ def _read_header(self, execute=EX_CONSTRUCT): n = 0 obj = None self._reserve(1) - b = struct.unpack_from("B", self._buffer, self._buff_i)[0] + #b = struct.unpack_from("B", self._buffer, self._buff_i)[0] + b = self._buffer[self._buff_i] self._buff_i += 1 if b & 0b10000000 == 0: obj = b @@ -339,7 +340,8 @@ def _read_header(self, execute=EX_CONSTRUCT): elif b == 0xc4: typ = TYPE_BIN self._reserve(1) - n = struct.unpack_from("B", self._buffer, self._buff_i)[0] + #n = struct.unpack_from("B", self._buffer, self._buff_i)[0] + n = self._buffer[self._buff_i] self._buff_i += 1 if n > self._max_bin_len: raise UnpackValueError("%s exceeds max_bin_len(%s)" % (n, self._max_bin_len)) @@ -394,7 +396,8 @@ def _read_header(self, execute=EX_CONSTRUCT): self._buff_i += 8 elif b == 0xcc: self._reserve(1) - obj = struct.unpack_from("B", self._buffer, self._buff_i)[0] + #obj = struct.unpack_from("B", self._buffer, self._buff_i)[0] + obj = self._buffer[self._buff_i] self._buff_i += 1 elif b == 0xcd: self._reserve(2) @@ -462,7 +465,8 @@ def _read_header(self, execute=EX_CONSTRUCT): elif b == 0xd9: typ = TYPE_RAW self._reserve(1) - n, = struct.unpack_from("B", self._buffer, self._buff_i) + #n, = struct.unpack_from("B", self._buffer, self._buff_i) + n = self._buffer[self._buff_i] self._buff_i += 1 if n > self._max_str_len: raise UnpackValueError("%s exceeds max_str_len(%s)", n, self._max_str_len) @@ -565,11 +569,13 @@ def _unpack(self, execute=EX_CONSTRUCT): if typ == TYPE_RAW: if self._encoding is not None: obj = obj.decode(self._encoding, self._unicode_errors) + else: + obj = bytes(obj) return obj if typ == TYPE_EXT: - return self._ext_hook(n, obj) + return self._ext_hook(n, bytes(obj)) if typ == TYPE_BIN: - return obj + return bytes(obj) assert typ == TYPE_IMMEDIATE return obj From 2b63e9fbbb3440d73d6638ec8af6315aeb8ecd97 Mon Sep 17 00:00:00 2001 From: folz Date: Sat, 7 May 2016 15:18:20 +0200 Subject: [PATCH 072/349] enable unpacking from memoryview --- msgpack/_unpacker.pyx | 92 ++++++++++++++++++++++++++++++++++--------- msgpack/fallback.py | 37 ++++++++++------- test/test_buffer.py | 9 +++++ 3 files changed, 106 insertions(+), 32 deletions(-) diff --git a/msgpack/_unpacker.pyx b/msgpack/_unpacker.pyx index 23f6478f..f6e06b0c 100644 --- a/msgpack/_unpacker.pyx +++ b/msgpack/_unpacker.pyx @@ -8,16 +8,23 @@ from cpython.bytes cimport ( ) from cpython.buffer cimport ( Py_buffer, - PyBuffer_Release, + PyObject_CheckBuffer, PyObject_GetBuffer, + PyBuffer_Release, + PyBuffer_IsContiguous, + PyBUF_READ, PyBUF_SIMPLE, + PyBUF_FULL_RO, ) from cpython.mem cimport PyMem_Malloc, PyMem_Free from cpython.object cimport PyCallable_Check +from cpython.ref cimport Py_DECREF +from cpython.exc cimport PyErr_WarnEx cdef extern from "Python.h": ctypedef struct PyObject cdef int PyObject_AsReadBuffer(object o, const void** buff, Py_ssize_t* buf_len) except -1 + object PyMemoryView_GetContiguous(object obj, int buffertype, char order) from libc.stdlib cimport * from libc.string cimport * @@ -110,6 +117,42 @@ cdef inline init_ctx(unpack_context *ctx, def default_read_extended_type(typecode, data): raise NotImplementedError("Cannot decode extended type with typecode=%d" % typecode) +cdef inline int get_data_from_buffer(object obj, + Py_buffer *view, + char **buf, + Py_ssize_t *buffer_len, + int *new_protocol) except 0: + cdef object contiguous + cdef Py_buffer tmp + if PyObject_CheckBuffer(obj): + new_protocol[0] = 1 + if PyObject_GetBuffer(obj, view, PyBUF_FULL_RO) == -1: + raise + if view.itemsize != 1: + PyBuffer_Release(view) + raise BufferError("cannot unpack from multi-byte object") + if PyBuffer_IsContiguous(view, 'A') == 0: + PyBuffer_Release(view) + # create a contiguous copy and get buffer + contiguous = PyMemoryView_GetContiguous(obj, PyBUF_READ, 'C') + PyObject_GetBuffer(contiguous, view, PyBUF_SIMPLE) + # view must hold the only reference to contiguous, + # so memory is freed when view is released + Py_DECREF(contiguous) + buffer_len[0] = view.len + buf[0] = view.buf + return 1 + else: + new_protocol[0] = 0 + if PyObject_AsReadBuffer(obj, buf, buffer_len) == -1: + raise BufferError("could not get memoryview") + PyErr_WarnEx(RuntimeWarning, + "using old buffer interface to unpack %s; " + "this leads to unpacking errors if slicing is used and " + "will be removed in a future version" % type(obj), + 1) + return 1 + def unpackb(object packed, object object_hook=None, object list_hook=None, bint use_list=1, encoding=None, unicode_errors="strict", object_pairs_hook=None, ext_hook=ExtType, @@ -129,27 +172,34 @@ def unpackb(object packed, object object_hook=None, object list_hook=None, cdef Py_ssize_t off = 0 cdef int ret - cdef char* buf + cdef Py_buffer view + cdef char* buf = NULL cdef Py_ssize_t buf_len cdef char* cenc = NULL cdef char* cerr = NULL + cdef int new_protocol = 0 + + get_data_from_buffer(packed, &view, &buf, &buf_len, &new_protocol) - PyObject_AsReadBuffer(packed, &buf, &buf_len) + try: + if encoding is not None: + if isinstance(encoding, unicode): + encoding = encoding.encode('ascii') + cenc = PyBytes_AsString(encoding) - if encoding is not None: - if isinstance(encoding, unicode): - encoding = encoding.encode('ascii') - cenc = PyBytes_AsString(encoding) + if unicode_errors is not None: + if isinstance(unicode_errors, unicode): + unicode_errors = unicode_errors.encode('ascii') + cerr = PyBytes_AsString(unicode_errors) - if unicode_errors is not None: - if isinstance(unicode_errors, unicode): - unicode_errors = unicode_errors.encode('ascii') - cerr = PyBytes_AsString(unicode_errors) + init_ctx(&ctx, object_hook, object_pairs_hook, list_hook, ext_hook, + use_list, cenc, cerr, + max_str_len, max_bin_len, max_array_len, max_map_len, max_ext_len) + ret = unpack_construct(&ctx, buf, buf_len, &off) + finally: + if new_protocol: + PyBuffer_Release(&view); - init_ctx(&ctx, object_hook, object_pairs_hook, list_hook, ext_hook, - use_list, cenc, cerr, - max_str_len, max_bin_len, max_array_len, max_map_len, max_ext_len) - ret = unpack_construct(&ctx, buf, buf_len, &off) if ret == 1: obj = unpack_data(&ctx) if off < buf_len: @@ -335,14 +385,20 @@ cdef class Unpacker(object): def feed(self, object next_bytes): """Append `next_bytes` to internal buffer.""" cdef Py_buffer pybuff + cdef int new_protocol = 0 + cdef char* buf + cdef Py_ssize_t buf_len + if self.file_like is not None: raise AssertionError( "unpacker.feed() is not be able to use with `file_like`.") - PyObject_GetBuffer(next_bytes, &pybuff, PyBUF_SIMPLE) + + get_data_from_buffer(next_bytes, &pybuff, &buf, &buf_len, &new_protocol) try: - self.append_buffer(pybuff.buf, pybuff.len) + self.append_buffer(buf, buf_len) finally: - PyBuffer_Release(&pybuff) + if new_protocol: + PyBuffer_Release(&pybuff) cdef append_buffer(self, void* _buf, Py_ssize_t _buf_len): cdef: diff --git a/msgpack/fallback.py b/msgpack/fallback.py index a23ad8ca..11087eb3 100644 --- a/msgpack/fallback.py +++ b/msgpack/fallback.py @@ -1,8 +1,8 @@ """Fallback pure Python implementation of msgpack""" import sys -import array import struct +import warnings if sys.version_info[0] == 3: PY3 = True @@ -46,6 +46,7 @@ def getvalue(self): from io import BytesIO as StringIO newlist_hint = lambda size: [] + from msgpack.exceptions import ( BufferFull, OutOfData, @@ -79,6 +80,24 @@ def _check_type_strict(obj, t, type=type, tuple=tuple): return type(obj) is t +def _get_data_from_buffer(obj): + try: + view = memoryview(obj) + except TypeError: + # try to use legacy buffer protocol if 2.7, otherwise re-raise + if not PY3: + view = memoryview(buffer(obj)) + warnings.warn("using old buffer interface to unpack %s; " + "this leads to unpacking errors if slicing is used and " + "will be removed in a future version" % type(obj), + RuntimeWarning) + else: + raise + if view.itemsize != 1: + raise ValueError("cannot unpack from multi-byte object") + return view + + def unpack(stream, **kwargs): """ Unpack an object from `stream`. @@ -239,17 +258,11 @@ def __init__(self, file_like=None, read_size=0, use_list=True, raise TypeError("`ext_hook` is not callable") def feed(self, next_bytes): - if isinstance(next_bytes, array.array): - next_bytes = next_bytes.tostring() - if not isinstance(next_bytes, (bytes, bytearray)): - raise TypeError("next_bytes should be bytes, bytearray or array.array") assert self._feeding - - if (len(self._buffer) - self._buff_i + len(next_bytes) > self._max_buffer_size): + view = _get_data_from_buffer(next_bytes) + if (len(self._buffer) - self._buff_i + len(view) > self._max_buffer_size): raise BufferFull - # bytes + bytearray -> bytearray - # So cast before append - self._buffer += next_bytes + self._buffer += view def _consume(self): """ Gets rid of the used parts of the buffer. """ @@ -308,7 +321,6 @@ def _read_header(self, execute=EX_CONSTRUCT): n = 0 obj = None self._reserve(1) - #b = struct.unpack_from("B", self._buffer, self._buff_i)[0] b = self._buffer[self._buff_i] self._buff_i += 1 if b & 0b10000000 == 0: @@ -340,7 +352,6 @@ def _read_header(self, execute=EX_CONSTRUCT): elif b == 0xc4: typ = TYPE_BIN self._reserve(1) - #n = struct.unpack_from("B", self._buffer, self._buff_i)[0] n = self._buffer[self._buff_i] self._buff_i += 1 if n > self._max_bin_len: @@ -396,7 +407,6 @@ def _read_header(self, execute=EX_CONSTRUCT): self._buff_i += 8 elif b == 0xcc: self._reserve(1) - #obj = struct.unpack_from("B", self._buffer, self._buff_i)[0] obj = self._buffer[self._buff_i] self._buff_i += 1 elif b == 0xcd: @@ -465,7 +475,6 @@ def _read_header(self, execute=EX_CONSTRUCT): elif b == 0xd9: typ = TYPE_RAW self._reserve(1) - #n, = struct.unpack_from("B", self._buffer, self._buff_i) n = self._buffer[self._buff_i] self._buff_i += 1 if n > self._max_str_len: diff --git a/test/test_buffer.py b/test/test_buffer.py index 5a71f904..87f359f9 100644 --- a/test/test_buffer.py +++ b/test/test_buffer.py @@ -18,3 +18,12 @@ def test_unpack_bytearray(): assert [b'foo', b'bar'] == obj expected_type = bytes assert all(type(s) == expected_type for s in obj) + + +def test_unpack_memoryview(): + buf = bytearray(packb(('foo', 'bar'))) + view = memoryview(buf) + obj = unpackb(view, use_list=1) + assert [b'foo', b'bar'] == obj + expected_type = bytes + assert all(type(s) == expected_type for s in obj) From d6254abc8a3ebec6a135b923951767bd97557de4 Mon Sep 17 00:00:00 2001 From: INADA Naoki Date: Tue, 3 May 2016 11:58:28 +0900 Subject: [PATCH 073/349] Use AppVeyor to build windows wheel (#188) * Add AppVeyor support to build windows wheel * Fix test_limits on 32bit environments * Ignore Python35-x64 test fail for now Should be fixed in next version. --- appveyor.yml | 57 +++++++++++++++++++++++++++++++++++++++++++++ build.cmd | 21 +++++++++++++++++ msgpack/_packer.pyx | 4 ++-- 3 files changed, 80 insertions(+), 2 deletions(-) create mode 100644 appveyor.yml create mode 100644 build.cmd diff --git a/appveyor.yml b/appveyor.yml new file mode 100644 index 00000000..02b44611 --- /dev/null +++ b/appveyor.yml @@ -0,0 +1,57 @@ +environment: + + matrix: + + # For Python versions available on Appveyor, see + # http://www.appveyor.com/docs/installed-software#python + # The list here is complete (excluding Python 2.6, which + # isn't covered by this document) at the time of writing. + + - PYTHON: "C:\\Python27" + - PYTHON: "C:\\Python34" + - PYTHON: "C:\\Python35" + - PYTHON: "C:\\Python27-x64" + - PYTHON: "C:\\Python34-x64" + DISTUTILS_USE_SDK: "1" + + # Python35-x64 test fails with MemoryError + # TODO: investigate it + #- PYTHON: "C:\\Python35-x64" + +install: + # We need wheel installed to build wheels + - "%PYTHON%\\python.exe -m pip install -U pip wheel pytest cython" + +build: off + +test_script: + # Put your test command here. + # If you don't need to build C extensions on 64-bit Python 3.3 or 3.4, + # you can remove "build.cmd" from the front of the command, as it's + # only needed to support those cases. + # Note that you must use the environment variable %PYTHON% to refer to + # the interpreter you're using - Appveyor does not do anything special + # to put the Python evrsion you want to use on PATH. + - "build.cmd %PYTHON%\\python.exe setup.py build_ext -i" + - "build.cmd %PYTHON%\\python.exe setup.py install" + - "%PYTHON%\\python.exe -c \"import sys; print(hex(sys.maxsize))\"" + - "%PYTHON%\\python.exe -c \"from msgpack import _packer, _unpacker\"" + - "%PYTHON%\\Scripts\\py.test test" + - "build.cmd %PYTHON%\\python.exe setup.py bdist_wheel" + +after_test: + # This step builds your wheels. + # Again, you only need build.cmd if you're building C extensions for + # 64-bit Python 3.3/3.4. And you need to use %PYTHON% to get the correct + # interpreter + +artifacts: + # bdist_wheel puts your built wheel in the dist directory + - path: dist\* + +#on_success: +# You can use this step to upload your artifacts to a public website. +# See Appveyor's documentation for more details. Or you can simply +# access your wheels from the Appveyor "artifacts" tab for your build. + +# vim: set shiftwidth=2 diff --git a/build.cmd b/build.cmd new file mode 100644 index 00000000..243dc9a1 --- /dev/null +++ b/build.cmd @@ -0,0 +1,21 @@ +@echo off +:: To build extensions for 64 bit Python 3, we need to configure environment +:: variables to use the MSVC 2010 C++ compilers from GRMSDKX_EN_DVD.iso of: +:: MS Windows SDK for Windows 7 and .NET Framework 4 +:: +:: More details at: +:: https://github.com/cython/cython/wiki/64BitCythonExtensionsOnWindows + +IF "%DISTUTILS_USE_SDK%"=="1" ( + ECHO Configuring environment to build with MSVC on a 64bit architecture + ECHO Using Windows SDK 7.1 + "C:\Program Files\Microsoft SDKs\Windows\v7.1\Setup\WindowsSdkVer.exe" -q -version:v7.1 + CALL "C:\Program Files\Microsoft SDKs\Windows\v7.1\Bin\SetEnv.cmd" /x64 /release + SET MSSdk=1 + REM Need the following to allow tox to see the SDK compiler + SET TOX_TESTENV_PASSENV=DISTUTILS_USE_SDK MSSdk INCLUDE LIB +) ELSE ( + ECHO Using default MSVC build environment +) + +CALL %* diff --git a/msgpack/_packer.pyx b/msgpack/_packer.pyx index 6392655e..872465b4 100644 --- a/msgpack/_packer.pyx +++ b/msgpack/_packer.pyx @@ -244,7 +244,7 @@ cdef class Packer(object): msgpack_pack_ext(&self.pk, typecode, len(data)) msgpack_pack_raw_body(&self.pk, data, len(data)) - def pack_array_header(self, size_t size): + def pack_array_header(self, long long size): if size > (2**32-1): raise ValueError cdef int ret = msgpack_pack_array(&self.pk, size) @@ -257,7 +257,7 @@ cdef class Packer(object): self.pk.length = 0 return buf - def pack_map_header(self, size_t size): + def pack_map_header(self, long long size): if size > (2**32-1): raise ValueError cdef int ret = msgpack_pack_map(&self.pk, size) From 334dbe2a9652f43abdf27d978d9f4cdaf3f2a34d Mon Sep 17 00:00:00 2001 From: INADA Naoki Date: Thu, 21 Jul 2016 19:19:32 +0900 Subject: [PATCH 074/349] Enable Python35-x64 in AppVeyor --- appveyor.yml | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/appveyor.yml b/appveyor.yml index 02b44611..9e766c55 100644 --- a/appveyor.yml +++ b/appveyor.yml @@ -13,10 +13,7 @@ environment: - PYTHON: "C:\\Python27-x64" - PYTHON: "C:\\Python34-x64" DISTUTILS_USE_SDK: "1" - - # Python35-x64 test fails with MemoryError - # TODO: investigate it - #- PYTHON: "C:\\Python35-x64" + - PYTHON: "C:\\Python35-x64" install: # We need wheel installed to build wheels From b911b3c652e190e6942a610fb3389aaaf2ccf3cc Mon Sep 17 00:00:00 2001 From: INADA Naoki Date: Thu, 21 Jul 2016 19:32:00 +0900 Subject: [PATCH 075/349] Fix ext_hook call (#203) fixes #202 --- msgpack/unpack.h | 4 ++-- setup.py | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/msgpack/unpack.h b/msgpack/unpack.h index 92f4f118..da2cfb6a 100644 --- a/msgpack/unpack.h +++ b/msgpack/unpack.h @@ -265,9 +265,9 @@ static inline int unpack_callback_ext(unpack_user* u, const char* base, const ch } // length also includes the typecode, so the actual data is length-1 #if PY_MAJOR_VERSION == 2 - py = PyObject_CallFunction(u->ext_hook, "(is#)", typecode, pos, length-1); + py = PyObject_CallFunction(u->ext_hook, "(is#)", (int)typecode, pos, (Py_ssize_t)length-1); #else - py = PyObject_CallFunction(u->ext_hook, "(iy#)", typecode, pos, length-1); + py = PyObject_CallFunction(u->ext_hook, "(iy#)", (int)typecode, pos, (Py_ssize_t)length-1); #endif if (!py) return -1; diff --git a/setup.py b/setup.py index 37729bd5..1363586e 100755 --- a/setup.py +++ b/setup.py @@ -20,7 +20,7 @@ class NoCython(Exception): def cythonize(src): sys.stderr.write("cythonize: %r\n" % (src,)) - cython_compiler.compile([src], cplus=True, emit_linenums=True) + cython_compiler.compile([src], cplus=True) def ensure_source(src): pyx = os.path.splitext(src)[0] + '.pyx' From ff208ad7d0b288f2d94d3160bf8a4cddeebc987d Mon Sep 17 00:00:00 2001 From: INADA Naoki Date: Fri, 29 Jul 2016 22:25:05 +0900 Subject: [PATCH 076/349] 0.4.8 --- ChangeLog.rst | 11 ++++++++++- msgpack/_version.py | 2 +- 2 files changed, 11 insertions(+), 2 deletions(-) diff --git a/ChangeLog.rst b/ChangeLog.rst index 35535b4a..2151736d 100644 --- a/ChangeLog.rst +++ b/ChangeLog.rst @@ -1,6 +1,15 @@ +0.4.8 +===== +:release date: 2016-07-29 + +Bugs fixed +---------- + +* Calling ext_hook with wrong length. (Only on Windows, maybe. #203) + 0.4.7 ===== -:release date: TBD +:release date: 2016-01-25 Bugs fixed ---------- diff --git a/msgpack/_version.py b/msgpack/_version.py index 37c172db..76bd8fbe 100644 --- a/msgpack/_version.py +++ b/msgpack/_version.py @@ -1 +1 @@ -version = (0, 4, 7) +version = (0, 4, 8) From a9f4dad4dcde4db148f22720c694e5b5e0cb6f2d Mon Sep 17 00:00:00 2001 From: INADA Naoki Date: Sat, 30 Jul 2016 11:35:26 +0900 Subject: [PATCH 077/349] Make manylinux1 wheels --- Makefile | 4 ++++ docker/buildwheel.sh | 11 +++++++++++ 2 files changed, 15 insertions(+) create mode 100644 docker/buildwheel.sh diff --git a/Makefile b/Makefile index 2e53d08f..6eaef536 100644 --- a/Makefile +++ b/Makefile @@ -20,3 +20,7 @@ python3: cython test: py.test test + +build-manylinux1-wheel: + docker run --rm -ti -v `pwd`:/project -w /project quay.io/pypa/manylinux1_i686 bash docker/buildwheel.sh + docker run --rm -ti -v `pwd`:/project -w /project quay.io/pypa/manylinux1_x86_64 bash docker/buildwheel.sh diff --git a/docker/buildwheel.sh b/docker/buildwheel.sh new file mode 100644 index 00000000..b654e45c --- /dev/null +++ b/docker/buildwheel.sh @@ -0,0 +1,11 @@ +#!/bin/bash +set -e -x + +ARCH=`uname -p` +echo "arch=$ARCH" + +for V in cp35-cp35m cp34-cp34m cp27-cp27m cp27-cp27mu; do + PYBIN=/opt/python/$V/bin + rm -rf build/ # Avoid lib build by narrow Python is used by wide python + $PYBIN/python setup.py bdist_wheel -p manylinux1_${ARCH} +done From e3fea94509767047a8ff45aa07cd58a9ba9694e7 Mon Sep 17 00:00:00 2001 From: TW Date: Wed, 11 Jan 2017 04:04:23 +0100 Subject: [PATCH 078/349] fix typos and other cosmetic issues (#214) cosmetic issues: - reST headlines' underline length needs to match the headline length (looks like somebody is / was using a proportional font) - Cython code lines do not need to be terminated with a semicolon - always use triple-double-quotes for docstrings --- ChangeLog.rst | 46 +++++++++++++++++++++--------------------- README.rst | 29 +++++++++++++------------- appveyor.yml | 2 +- docs/api.rst | 2 +- docs/index.rst | 2 +- msgpack/_packer.pyx | 10 ++++----- msgpack/_unpacker.pyx | 2 +- msgpack/fallback.py | 16 +++++++-------- test/test_extension.py | 2 +- test/test_pack.py | 2 +- 10 files changed, 57 insertions(+), 56 deletions(-) diff --git a/ChangeLog.rst b/ChangeLog.rst index 2151736d..30e6c5f4 100644 --- a/ChangeLog.rst +++ b/ChangeLog.rst @@ -160,7 +160,7 @@ Changes 0.2.4 -======= +===== :release date: 2012-12-22 Bugs fixed @@ -169,7 +169,7 @@ Bugs fixed * Fix SEGV when object_hook or object_pairs_hook raise Exception. (#39) 0.2.3 -======= +===== :release date: 2012-12-11 Changes @@ -177,11 +177,11 @@ Changes * Warn when use_list is not specified. It's default value will be changed in 0.3. Bugs fixed ------------ +---------- * Can't pack subclass of dict. 0.2.2 -======= +===== :release date: 2012-09-21 Changes @@ -190,7 +190,7 @@ Changes object in single precision format. Bugs fixed ------------ +---------- * ``unpack()`` didn't restores gc state when it called with gc disabled. ``unpack()`` doesn't control gc now instead of restoring gc state collectly. User can control gc state when gc cause performance issue. @@ -198,7 +198,7 @@ Bugs fixed * ``Unpacker``'s ``read_size`` option didn't used. 0.2.1 -======= +===== :release date: 2012-08-20 Changes @@ -206,8 +206,8 @@ Changes * Add ``max_buffer_size`` parameter to Unpacker. It limits internal buffer size and allows unpack data from untrusted source safely. -* Unpacker's buffer reallocation algorithm is less greedy now. It cause perforamce - derease in rare case but memory efficient and don't allocate than ``max_buffer_size``. +* Unpacker's buffer reallocation algorithm is less greedy now. It cause performance + decrease in rare case but memory efficient and don't allocate than ``max_buffer_size``. Bugs fixed ---------- @@ -217,7 +217,7 @@ Bugs fixed 0.2.0 -======= +===== :release date: 2012-06-27 Changes @@ -232,16 +232,16 @@ Bugs fixed 0.1.13 -======= +====== :release date: 2012-04-21 New ----- +--- * Don't accept subtype of list and tuple as msgpack list. (Steeve Morin) It allows customize how it serialized with ``default`` argument. Bugs fixed ------------ +---------- * Fix wrong error message. (David Wolever) * Fix memory leak while unpacking when ``object_hook`` or ``list_hook`` is used. (Steeve Morin) @@ -253,21 +253,21 @@ Other changes 0.1.12 -======= +====== :release date: 2011-12-27 Bugs fixed -------------- +---------- * Re-enable packs/unpacks removed at 0.1.11. It will be removed when 0.2 is released. 0.1.11 -======= +====== :release date: 2011-12-26 Bugs fixed -------------- +---------- * Include test code for Python3 to sdist. (Johan Bergström) * Fix compilation error on MSVC. (davidgaleano) @@ -285,7 +285,7 @@ New feature 0.1.9 -====== +===== :release date: 2011-01-29 New feature @@ -299,16 +299,16 @@ Bugs fixed * Add MemoryError check. 0.1.8 -====== +===== :release date: 2011-01-10 New feature ------------- +----------- * Support ``loads`` and ``dumps`` aliases for API compatibility with simplejson and pickle. * Add *object_hook* and *list_hook* option to unpacker. It allows you to - hook unpacing mapping type and array type. + hook unpacking mapping type and array type. * Add *default* option to packer. It allows you to pack unsupported types. @@ -320,13 +320,13 @@ Bugs fixed 0.1.7 -====== +===== :release date: 2010-11-02 New feature ------------- +----------- * Add *object_hook* and *list_hook* option to unpacker. It allows you to - hook unpacing mapping type and array type. + hook unpacking mapping type and array type. * Add *default* option to packer. It allows you to pack unsupported types. diff --git a/README.rst b/README.rst index d32ec1d4..e37c61d0 100644 --- a/README.rst +++ b/README.rst @@ -1,6 +1,6 @@ -======================= +====================== MessagePack for Python -======================= +====================== :author: INADA Naoki :version: 0.4.6 @@ -11,21 +11,22 @@ MessagePack for Python What's this ------------- +----------- -`MessagePack `_ is a fast, compact binary serialization format, suitable for -similar data to JSON. This package provides CPython bindings for reading and -writing MessagePack data. +`MessagePack `_ is an efficient binary serialization format. +It lets you exchange data among multiple languages like JSON. +But it's faster and smaller. +This package provides CPython bindings for reading and writing MessagePack data. Install ---------- +------- :: $ pip install msgpack-python PyPy -^^^^^ +^^^^ msgpack-python provides pure python implementation. PyPy can use this. @@ -44,7 +45,7 @@ Community Edition or Express Edition can be used to build extension module. How to use ------------ +---------- One-shot pack & unpack ^^^^^^^^^^^^^^^^^^^^^^ @@ -134,7 +135,7 @@ It is also possible to pack/unpack custom data types. Here is an example for key-value pairs. Extended types -^^^^^^^^^^^^^^^ +^^^^^^^^^^^^^^ It is also possible to pack/unpack custom data types using the **ext** type. @@ -166,7 +167,7 @@ Advanced unpacking control As an alternative to iteration, ``Unpacker`` objects provide ``unpack``, ``skip``, ``read_array_header`` and ``read_map_header`` methods. The former two -read an entire message from the stream, respectively deserialising and returning +read an entire message from the stream, respectively de-serialising and returning the result, or ignoring it. The latter two methods return the number of elements in the upcoming container, so that each element in an array, or key-value pair in a map, can be unpacked or skipped individually. @@ -243,7 +244,7 @@ instead of `StopIteration`. `StopIteration` is used for iterator protocol only. Note about performance ------------------------- +---------------------- GC ^^ @@ -253,7 +254,7 @@ This means unpacking may cause useless GC. You can use ``gc.disable()`` when unpacking large message. use_list option -^^^^^^^^^^^^^^^^ +^^^^^^^^^^^^^^^ List is the default sequence type of Python. But tuple is lighter than list. You can use ``use_list=False`` while unpacking when performance is important. @@ -264,7 +265,7 @@ Another way to unpacking such object is using ``object_pairs_hook``. Development ------------- +----------- Test ^^^^ diff --git a/appveyor.yml b/appveyor.yml index 9e766c55..a8a23528 100644 --- a/appveyor.yml +++ b/appveyor.yml @@ -28,7 +28,7 @@ test_script: # only needed to support those cases. # Note that you must use the environment variable %PYTHON% to refer to # the interpreter you're using - Appveyor does not do anything special - # to put the Python evrsion you want to use on PATH. + # to put the Python version you want to use on PATH. - "build.cmd %PYTHON%\\python.exe setup.py build_ext -i" - "build.cmd %PYTHON%\\python.exe setup.py install" - "%PYTHON%\\python.exe -c \"import sys; print(hex(sys.maxsize))\"" diff --git a/docs/api.rst b/docs/api.rst index 841c1346..6336793e 100644 --- a/docs/api.rst +++ b/docs/api.rst @@ -28,7 +28,7 @@ API reference .. autoclass:: ExtType exceptions ------------ +---------- These exceptions are accessible via `msgpack` package. (For example, `msgpack.OutOfData` is shortcut for `msgpack.exceptions.OutOfData`) diff --git a/docs/index.rst b/docs/index.rst index 72d4499f..dcdab4f5 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -1,5 +1,5 @@ msgpack document -================== +================ `MessagePack `_ is a efficient format for inter language data exchange. diff --git a/msgpack/_packer.pyx b/msgpack/_packer.pyx index 872465b4..7a128535 100644 --- a/msgpack/_packer.pyx +++ b/msgpack/_packer.pyx @@ -58,11 +58,11 @@ cdef class Packer(object): :param bool use_single_float: Use single precision float type for float. (default: False) :param bool autoreset: - Reset buffer after each pack and return it's content as `bytes`. (default: True). + Reset buffer after each pack and return its content as `bytes`. (default: True). If set this to false, use `bytes()` to get content and `.reset()` to clear buffer. :param bool use_bin_type: Use bin type introduced in msgpack spec 2.0 for bytes. - It also enable str8 type for unicode. + It also enables str8 type for unicode. """ cdef msgpack_packer pk cdef object _default @@ -75,7 +75,7 @@ cdef class Packer(object): def __cinit__(self): cdef int buf_size = 1024*1024 - self.pk.buf = malloc(buf_size); + self.pk.buf = malloc(buf_size) if self.pk.buf == NULL: raise MemoryError("Unable to allocate internal buffer.") self.pk.buf_size = buf_size @@ -108,7 +108,7 @@ cdef class Packer(object): self.unicode_errors = PyBytes_AsString(self._berrors) def __dealloc__(self): - free(self.pk.buf); + free(self.pk.buf) cdef int _pack(self, object o, int nest_limit=DEFAULT_RECURSE_LIMIT) except -1: cdef long long llval @@ -274,7 +274,7 @@ cdef class Packer(object): """ Pack *pairs* as msgpack map type. - *pairs* should sequence of pair. + *pairs* should be a sequence of pairs. (`len(pairs)` and `for k, v in pairs:` should be supported.) """ cdef int ret = msgpack_pack_map(&self.pk, len(pairs)) diff --git a/msgpack/_unpacker.pyx b/msgpack/_unpacker.pyx index 1aefc643..2a139038 100644 --- a/msgpack/_unpacker.pyx +++ b/msgpack/_unpacker.pyx @@ -209,7 +209,7 @@ cdef class Unpacker(object): :param int max_buffer_size: Limits size of data waiting unpacked. 0 means system's INT_MAX (default). Raises `BufferFull` exception when it is insufficient. - You shoud set this parameter when unpacking data from untrusted source. + You should set this parameter when unpacking data from untrusted source. :param int max_str_len: Limits max length of str. (default: 2**31-1) diff --git a/msgpack/fallback.py b/msgpack/fallback.py index f682611d..fefabb80 100644 --- a/msgpack/fallback.py +++ b/msgpack/fallback.py @@ -138,7 +138,7 @@ class Unpacker(object): :param int max_buffer_size: Limits size of data waiting unpacked. 0 means system's INT_MAX (default). Raises `BufferFull` exception when it is insufficient. - You shoud set this parameter when unpacking data from untrusted source. + You should set this parameter when unpacking data from untrusted source. :param int max_str_len: Limits max length of str. (default: 2**31-1) @@ -188,13 +188,13 @@ def __init__(self, file_like=None, read_size=0, use_list=True, self.file_like = file_like self._fb_feeding = False - #: array of bytes feeded. + #: array of bytes fed. self._fb_buffers = [] #: Which buffer we currently reads self._fb_buf_i = 0 #: Which position we currently reads self._fb_buf_o = 0 - #: Total size of _fb_bufferes + #: Total size of _fb_buffers self._fb_buf_n = 0 # When Unpacker is used as an iterable, between the calls to next(), @@ -203,7 +203,7 @@ def __init__(self, file_like=None, read_size=0, use_list=True, # the correct moments, we have to keep track of how sloppy we were. # Furthermore, when the buffer is incomplete (that is: in the case # we raise an OutOfData) we need to rollback the buffer to the correct - # state, which _fb_slopiness records. + # state, which _fb_sloppiness records. self._fb_sloppiness = 0 self._max_buffer_size = max_buffer_size or 2**31-1 @@ -303,7 +303,7 @@ def _fb_get_extradata(self): def _fb_read(self, n, write_bytes=None): buffs = self._fb_buffers - # We have a redundant codepath for the most common case, such that + # We have a redundant code path for the most common case, such that # pypy optimizes it properly. This is the case that the read fits # in the current buffer. if (write_bytes is None and self._fb_buf_i < len(buffs) and @@ -598,17 +598,17 @@ class Packer(object): Convert user type to builtin type that Packer supports. See also simplejson's document. :param str encoding: - Convert unicode to bytes with this encoding. (default: 'utf-8') + Convert unicode to bytes with this encoding. (default: 'utf-8') :param str unicode_errors: Error handler for encoding unicode. (default: 'strict') :param bool use_single_float: Use single precision float type for float. (default: False) :param bool autoreset: - Reset buffer after each pack and return it's content as `bytes`. (default: True). + Reset buffer after each pack and return its content as `bytes`. (default: True). If set this to false, use `bytes()` to get content and `.reset()` to clear buffer. :param bool use_bin_type: Use bin type introduced in msgpack spec 2.0 for bytes. - It also enable str8 type for unicode. + It also enables str8 type for unicode. """ def __init__(self, default=None, encoding='utf-8', unicode_errors='strict', use_single_float=False, autoreset=True, use_bin_type=False): diff --git a/test/test_extension.py b/test/test_extension.py index c552498f..d05d7ab9 100644 --- a/test/test_extension.py +++ b/test/test_extension.py @@ -42,7 +42,7 @@ def default(obj): typecode = 123 # application specific typecode data = obj.tostring() return ExtType(typecode, data) - raise TypeError("Unknwon type object %r" % (obj,)) + raise TypeError("Unknown type object %r" % (obj,)) def ext_hook(code, data): print('ext_hook called', code, data) diff --git a/test/test_pack.py b/test/test_pack.py index 762ccf51..e9459021 100644 --- a/test/test_pack.py +++ b/test/test_pack.py @@ -130,7 +130,7 @@ def testMapSize(sizes=[0, 5, 50, 1000]): class odict(dict): - '''Reimplement OrderedDict to run test on Python 2.6''' + """Reimplement OrderedDict to run test on Python 2.6""" def __init__(self, seq): self._seq = seq dict.__init__(self, seq) From f985ee8a665daa298282ab87c488e81b7d9814a7 Mon Sep 17 00:00:00 2001 From: INADA Naoki Date: Fri, 13 Jan 2017 19:57:04 +0900 Subject: [PATCH 079/349] Remove version and date from README --- README.rst | 2 -- 1 file changed, 2 deletions(-) diff --git a/README.rst b/README.rst index 74f5fc75..da625b49 100644 --- a/README.rst +++ b/README.rst @@ -3,8 +3,6 @@ MessagePack for Python ====================== :author: INADA Naoki -:version: 0.4.6 -:date: 2015-03-13 .. image:: https://secure.travis-ci.org/msgpack/msgpack-python.svg :target: https://travis-ci.org/#!/msgpack/msgpack-python From 12845692b5a3703bc88e85f492143ca1a9985902 Mon Sep 17 00:00:00 2001 From: INADA Naoki Date: Fri, 13 Jan 2017 20:41:33 +0900 Subject: [PATCH 080/349] Add requirements.txt for Read the Docs --- requirements.txt | 1 + 1 file changed, 1 insertion(+) create mode 100644 requirements.txt diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 00000000..87f04dab --- /dev/null +++ b/requirements.txt @@ -0,0 +1 @@ +Cython==0.25.2 From b328f3ecffd22e7f0db76e81774727fd171bf303 Mon Sep 17 00:00:00 2001 From: INADA Naoki Date: Fri, 13 Jan 2017 20:48:48 +0900 Subject: [PATCH 081/349] Add badge for Read the Docs --- README.rst | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/README.rst b/README.rst index da625b49..0e1ddd72 100644 --- a/README.rst +++ b/README.rst @@ -2,11 +2,12 @@ MessagePack for Python ====================== -:author: INADA Naoki - .. image:: https://secure.travis-ci.org/msgpack/msgpack-python.svg :target: https://travis-ci.org/#!/msgpack/msgpack-python - + +.. image:: https://readthedocs.org/projects/msgpack-python/badge/?version=latest + :target: http://msgpack-python.readthedocs.io/en/latest/?badge=latest + :alt: Documentation Status What's this ----------- From 3388e4a6ee6adea56789d97cc05ed610a4e5b4fc Mon Sep 17 00:00:00 2001 From: INADA Naoki Date: Fri, 13 Jan 2017 21:46:31 +0900 Subject: [PATCH 082/349] travis and appveyor update (#217) travis: * stop using tox * Add Python 3.6 and 3.7-dev * Stop pypy3 (until PyPy3.5 is released) appveyor: * Drop Python 3.4 and add 3.6 --- .travis.yml | 34 +++++++++++++++++++++++----------- appveyor.yml | 8 ++++---- docker/runtests.sh | 2 +- 3 files changed, 28 insertions(+), 16 deletions(-) diff --git a/.travis.yml b/.travis.yml index b4396cb7..0170360e 100644 --- a/.travis.yml +++ b/.travis.yml @@ -1,22 +1,23 @@ sudo: false language: python -python: 3.5 -cache: - directories: - - $HOME/.cache/pip +cache: pip + +python: + - "2.7" + - "3.3" + - "3.4" + - "3.5" + - "3.6" + - "3.7-dev" branches: only: - master -env: - - TOXENV=py27-c,py33-c,py34-c,py35-c - - TOXENV=py27-pure,py33-pure,py34-pure,py35-pure - - TOXENV=pypy-pure,pypy3-pure - matrix: include: - sudo: required + language: c services: - docker env: @@ -28,12 +29,23 @@ matrix: - docker pull $DOCKER_IMAGE script: - docker run --rm -v `pwd`:/io -w /io $DOCKER_IMAGE /io/docker/runtests.sh + - python: "pypy" + install: + - pip install -e . + script: + - py.test -v test + install: - pip install -U pip - - pip install tox cython + - pip install cython - cython --cplus msgpack/_packer.pyx msgpack/_unpacker.pyx + - pip install -e . -script: tox +script: + - python -c 'import sys; print(hex(sys.maxsize))' + - python -c 'from msgpack import _packer, _unpacker' + - py.test -v test + - MSGPACK_PUREPYTHON=x py.test -v test # vim: sw=2 ts=2 diff --git a/appveyor.yml b/appveyor.yml index a8a23528..e63423da 100644 --- a/appveyor.yml +++ b/appveyor.yml @@ -8,16 +8,16 @@ environment: # isn't covered by this document) at the time of writing. - PYTHON: "C:\\Python27" - - PYTHON: "C:\\Python34" - - PYTHON: "C:\\Python35" - PYTHON: "C:\\Python27-x64" - - PYTHON: "C:\\Python34-x64" - DISTUTILS_USE_SDK: "1" + - PYTHON: "C:\\Python35" - PYTHON: "C:\\Python35-x64" + - PYTHON: "C:\\Python36" + - PYTHON: "C:\\Python36-x64" install: # We need wheel installed to build wheels - "%PYTHON%\\python.exe -m pip install -U pip wheel pytest cython" + - "%PYTHON%\\Scripts\\cython --cplus msgpack/_packer.pyx msgpack/_unpacker.pyx" build: off diff --git a/docker/runtests.sh b/docker/runtests.sh index 0d748023..0eea715a 100755 --- a/docker/runtests.sh +++ b/docker/runtests.sh @@ -1,7 +1,7 @@ #!/bin/bash set -e -x -for V in cp35-cp35m cp34-cp34m cp27-cp27m cp27-cp27mu; do +for V in cp36-cp36m cp35-cp35m cp34-cp34m cp27-cp27m cp27-cp27mu; do PYBIN=/opt/python/$V/bin $PYBIN/python setup.py install rm -rf build/ # Avoid lib build by narrow Python is used by wide python From a8d9162ca6cff6101c1f6b9547e94749c6acae96 Mon Sep 17 00:00:00 2001 From: jfolz Date: Sat, 29 Apr 2017 19:33:20 +0200 Subject: [PATCH 083/349] Unpacker: add tell() (#227) --- msgpack/_unpacker.pyx | 7 +++++++ msgpack/fallback.py | 5 +++++ test/test_sequnpack.py | 20 ++++++++++++++++++++ 3 files changed, 32 insertions(+) diff --git a/msgpack/_unpacker.pyx b/msgpack/_unpacker.pyx index a9801eef..dabc5f70 100644 --- a/msgpack/_unpacker.pyx +++ b/msgpack/_unpacker.pyx @@ -29,6 +29,7 @@ cdef extern from "Python.h": from libc.stdlib cimport * from libc.string cimport * from libc.limits cimport * +ctypedef unsigned long long uint64_t from msgpack.exceptions import ( BufferFull, @@ -314,6 +315,7 @@ cdef class Unpacker(object): cdef object object_hook, object_pairs_hook, list_hook, ext_hook cdef object encoding, unicode_errors cdef Py_ssize_t max_buffer_size + cdef uint64_t stream_offset def __cinit__(self): self.buf = NULL @@ -358,6 +360,7 @@ cdef class Unpacker(object): self.buf_size = read_size self.buf_head = 0 self.buf_tail = 0 + self.stream_offset = 0 if encoding is not None: if isinstance(encoding, unicode): @@ -468,6 +471,7 @@ cdef class Unpacker(object): try: ret = execute(&self.ctx, self.buf, self.buf_tail, &self.buf_head) + self.stream_offset += self.buf_head - prev_head if write_bytes is not None: write_bytes(PyBytes_FromStringAndSize(self.buf + prev_head, self.buf_head - prev_head)) @@ -534,6 +538,9 @@ cdef class Unpacker(object): """ return self._unpack(read_map_header, write_bytes) + def tell(self): + return self.stream_offset + def __iter__(self): return self diff --git a/msgpack/fallback.py b/msgpack/fallback.py index d2eb9f44..508fd06f 100644 --- a/msgpack/fallback.py +++ b/msgpack/fallback.py @@ -244,6 +244,7 @@ def __init__(self, file_like=None, read_size=0, use_list=True, self._max_array_len = max_array_len self._max_map_len = max_map_len self._max_ext_len = max_ext_len + self._stream_offset = 0 if list_hook is not None and not callable(list_hook): raise TypeError('`list_hook` is not callable') @@ -266,6 +267,7 @@ def feed(self, next_bytes): def _consume(self): """ Gets rid of the used parts of the buffer. """ + self._stream_offset += self._buff_i - self._buf_checkpoint self._buf_checkpoint = self._buff_i def _got_extradata(self): @@ -629,6 +631,9 @@ def read_map_header(self, write_bytes=None): self._consume() return ret + def tell(self): + return self._stream_offset + class Packer(object): """ diff --git a/test/test_sequnpack.py b/test/test_sequnpack.py index 45f4cc78..59718f56 100644 --- a/test/test_sequnpack.py +++ b/test/test_sequnpack.py @@ -3,6 +3,7 @@ import io from msgpack import Unpacker, BufferFull +from msgpack import pack from msgpack.exceptions import OutOfData from pytest import raises @@ -96,3 +97,22 @@ def test_issue124(): unpacker.feed(b"!") assert tuple(unpacker) == (b'!',) assert tuple(unpacker) == () + + +def test_unpack_tell(): + stream = io.BytesIO() + messages = [2**i-1 for i in range(65)] + messages += [-(2**i) for i in range(1, 64)] + messages += [b'hello', b'hello'*1000, list(range(20)), + {i: bytes(i)*i for i in range(10)}, + {i: bytes(i)*i for i in range(32)}] + offsets = [] + for m in messages: + pack(m, stream) + offsets.append(stream.tell()) + stream.seek(0) + unpacker = Unpacker(stream) + for m, o in zip(messages, offsets): + m2 = next(unpacker) + assert m == m2 + assert o == unpacker.tell() From f0f2c0b39703e0129d2352c71ec9811a8f275cc8 Mon Sep 17 00:00:00 2001 From: jfolz Date: Thu, 18 May 2017 13:03:15 +0200 Subject: [PATCH 084/349] Packer accepts bytearray objects (#229) --- msgpack/_packer.pyx | 14 ++++++++++++-- msgpack/fallback.py | 6 ++++-- test/test_pack.py | 7 +++++++ 3 files changed, 23 insertions(+), 4 deletions(-) diff --git a/msgpack/_packer.pyx b/msgpack/_packer.pyx index f24aa703..5a81709d 100644 --- a/msgpack/_packer.pyx +++ b/msgpack/_packer.pyx @@ -10,6 +10,8 @@ from msgpack import ExtType cdef extern from "Python.h": int PyMemoryView_Check(object obj) + int PyByteArray_Check(object obj) + int PyByteArray_CheckExact(object obj) cdef extern from "pack.h": @@ -39,6 +41,14 @@ cdef int DEFAULT_RECURSE_LIMIT=511 cdef size_t ITEM_LIMIT = (2**32)-1 +cdef inline int PyBytesLike_Check(object o): + return PyBytes_Check(o) or PyByteArray_Check(o) + + +cdef inline int PyBytesLike_CheckExact(object o): + return PyBytes_CheckExact(o) or PyByteArray_CheckExact(o) + + cdef class Packer(object): """ MessagePack Packer @@ -174,10 +184,10 @@ cdef class Packer(object): else: dval = o ret = msgpack_pack_double(&self.pk, dval) - elif PyBytes_CheckExact(o) if strict_types else PyBytes_Check(o): + elif PyBytesLike_CheckExact(o) if strict_types else PyBytesLike_Check(o): L = len(o) if L > ITEM_LIMIT: - raise PackValueError("bytes is too large") + raise PackValueError("%s is too large" % type(o).__name__) rawval = o ret = msgpack_pack_bin(&self.pk, L) if ret == 0: diff --git a/msgpack/fallback.py b/msgpack/fallback.py index 508fd06f..a02cbe16 100644 --- a/msgpack/fallback.py +++ b/msgpack/fallback.py @@ -38,6 +38,8 @@ def __init__(self, s=b''): def write(self, s): if isinstance(s, memoryview): s = s.tobytes() + elif isinstance(s, bytearray): + s = bytes(s) self.builder.append(s) def getvalue(self): return self.builder.build() @@ -728,10 +730,10 @@ def _pack(self, obj, nest_limit=DEFAULT_RECURSE_LIMIT, default_used = True continue raise PackOverflowError("Integer value out of range") - if check(obj, bytes): + if check(obj, (bytes, bytearray)): n = len(obj) if n >= 2**32: - raise PackValueError("Bytes is too large") + raise PackValueError("%s is too large" % type(obj).__name__) self._pack_bin_header(n) return self._buffer.write(obj) if check(obj, Unicode): diff --git a/test/test_pack.py b/test/test_pack.py index e9459021..a704fdbc 100644 --- a/test/test_pack.py +++ b/test/test_pack.py @@ -58,6 +58,13 @@ def testPackBytes(): for td in test_data: check(td) +def testPackByteArrays(): + test_data = [ + bytearray(b""), bytearray(b"abcd"), (bytearray(b"defgh"),), + ] + for td in test_data: + check(td) + def testIgnoreUnicodeErrors(): re = unpackb(packb(b'abc\xeddef'), encoding='utf-8', unicode_errors='ignore', use_list=1) assert re == "abcdef" From deeda31a8840cee334f05f15bd2308af13dc9c64 Mon Sep 17 00:00:00 2001 From: Lorenzo Bolla Date: Sat, 30 Sep 2017 08:23:55 +0100 Subject: [PATCH 085/349] Add unittests to document serialisation of tuples (#246) Also, fix formatting of error message in case of tuple. See https://github.com/msgpack/msgpack-python/issues/245 --- msgpack/fallback.py | 2 +- test/test_stricttype.py | 49 ++++++++++++++++++++++++++++++++++++++++- 2 files changed, 49 insertions(+), 2 deletions(-) diff --git a/msgpack/fallback.py b/msgpack/fallback.py index a02cbe16..28478ca9 100644 --- a/msgpack/fallback.py +++ b/msgpack/fallback.py @@ -795,7 +795,7 @@ def _pack(self, obj, nest_limit=DEFAULT_RECURSE_LIMIT, obj = self._default(obj) default_used = 1 continue - raise TypeError("Cannot serialize %r" % obj) + raise TypeError("Cannot serialize %r" % (obj, )) def pack(self, obj): self._pack(obj) diff --git a/test/test_stricttype.py b/test/test_stricttype.py index a20b5eb7..0f865c83 100644 --- a/test/test_stricttype.py +++ b/test/test_stricttype.py @@ -1,7 +1,7 @@ # coding: utf-8 from collections import namedtuple -from msgpack import packb, unpackb +from msgpack import packb, unpackb, ExtType def test_namedtuple(): @@ -13,3 +13,50 @@ def default(o): packed = packb(T(1, 42), strict_types=True, use_bin_type=True, default=default) unpacked = unpackb(packed, encoding='utf-8') assert unpacked == {'foo': 1, 'bar': 42} + + +def test_tuple(): + t = ('one', 2, b'three', (4, )) + + def default(o): + if isinstance(o, tuple): + return { + '__type__': 'tuple', + 'value': list(o), + } + raise TypeError('Unsupported type %s' % (type(o),)) + + def convert(o): + if o.get('__type__') == 'tuple': + return tuple(o['value']) + return o + + data = packb(t, strict_types=True, use_bin_type=True, default=default) + expected = unpackb(data, encoding='utf-8', object_hook=convert) + + assert expected == t + + +def test_tuple_ext(): + t = ('one', 2, b'three', (4, )) + + MSGPACK_EXT_TYPE_TUPLE = 0 + + def default(o): + if isinstance(o, tuple): + # Convert to list and pack + payload = packb( + list(o), strict_types=True, use_bin_type=True, default=default) + return ExtType(MSGPACK_EXT_TYPE_TUPLE, payload) + raise TypeError(repr(o)) + + def convert(code, payload): + if code == MSGPACK_EXT_TYPE_TUPLE: + # Unpack and convert to tuple + return tuple(unpackb(payload, encoding='utf-8', ext_hook=convert)) + raise ValueError('Unknown Ext code {}'.format(code)) + + data = packb(t, strict_types=True, use_bin_type=True, default=default) + expected = unpackb(data, encoding='utf-8', ext_hook=convert) + + assert expected == t From b57106c246867b5beec62874a239c87d94dafba5 Mon Sep 17 00:00:00 2001 From: Hugo Date: Wed, 11 Oct 2017 20:49:02 +0300 Subject: [PATCH 086/349] Update badges (#247) --- README.rst | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/README.rst b/README.rst index 0e1ddd72..ea1499b0 100644 --- a/README.rst +++ b/README.rst @@ -2,17 +2,18 @@ MessagePack for Python ====================== -.. image:: https://secure.travis-ci.org/msgpack/msgpack-python.svg - :target: https://travis-ci.org/#!/msgpack/msgpack-python +.. image:: https://travis-ci.org/msgpack/msgpack-python.svg?branch=master + :target: https://travis-ci.org/msgpack/msgpack-python + :alt: Build Status .. image:: https://readthedocs.org/projects/msgpack-python/badge/?version=latest - :target: http://msgpack-python.readthedocs.io/en/latest/?badge=latest + :target: https://msgpack-python.readthedocs.io/en/latest/?badge=latest :alt: Documentation Status What's this ----------- -`MessagePack `_ is an efficient binary serialization format. +`MessagePack `_ is an efficient binary serialization format. It lets you exchange data among multiple languages like JSON. But it's faster and smaller. This package provides CPython bindings for reading and writing MessagePack data. From 54aa47b2dd489297d894c0639811653fd6ff7bfa Mon Sep 17 00:00:00 2001 From: Hugo Date: Thu, 12 Oct 2017 03:26:34 +0300 Subject: [PATCH 087/349] Update supported versions in classifiers (#248) --- setup.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/setup.py b/setup.py index 5f0f08c3..5c88397d 100755 --- a/setup.py +++ b/setup.py @@ -114,7 +114,13 @@ def __init__(self, *args, **kwargs): url='http://msgpack.org/', classifiers=[ 'Programming Language :: Python :: 2', + 'Programming Language :: Python :: 2.7', 'Programming Language :: Python :: 3', + 'Programming Language :: Python :: 3.5', + 'Programming Language :: Python :: 3.6', + 'Programming Language :: Python :: 3.7', + 'Programming Language :: Python :: Implementation :: CPython', + 'Programming Language :: Python :: Implementation :: PyPy', 'Intended Audience :: Developers', 'License :: OSI Approved :: Apache Software License', ] From 6fd1890be4692cca1a0c53cc160ac5da83e1d272 Mon Sep 17 00:00:00 2001 From: INADA Naoki Date: Fri, 13 Jan 2017 21:47:26 +0900 Subject: [PATCH 088/349] Add py36 to tox.ini --- tox.ini | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tox.ini b/tox.ini index b6e7a7f8..b2ac3362 100644 --- a/tox.ini +++ b/tox.ini @@ -1,5 +1,5 @@ [tox] -envlist = {py27,py33,py34,py35}-{c,pure},{pypy,pypy3}-pure,py27-x86,py34-x86 +envlist = {py27,py33,py34,py35,py36}-{c,pure},{pypy,pypy3}-pure,py27-x86,py34-x86 [variants:pure] setenv= From 3d7ebc47b4fed0cc06f652013c34f32107728c98 Mon Sep 17 00:00:00 2001 From: INADA Naoki Date: Thu, 12 Oct 2017 15:28:40 +0900 Subject: [PATCH 089/349] travis: Remove "only: master" restriction --- .travis.yml | 4 ---- 1 file changed, 4 deletions(-) diff --git a/.travis.yml b/.travis.yml index 0170360e..f5141d60 100644 --- a/.travis.yml +++ b/.travis.yml @@ -10,10 +10,6 @@ python: - "3.6" - "3.7-dev" -branches: - only: - - master - matrix: include: - sudo: required From a70ce0c3d7bb25646302fd2649ba916178cf4a69 Mon Sep 17 00:00:00 2001 From: INADA Naoki Date: Thu, 12 Oct 2017 16:26:58 +0900 Subject: [PATCH 090/349] Fix travis fail (#251) --- .travis.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.travis.yml b/.travis.yml index f5141d60..54e0c62a 100644 --- a/.travis.yml +++ b/.travis.yml @@ -13,7 +13,7 @@ python: matrix: include: - sudo: required - language: c + language: python services: - docker env: From 0fc4ee98be498f39a320eff501ba30c49c31482d Mon Sep 17 00:00:00 2001 From: Hugo Date: Thu, 12 Oct 2017 10:27:39 +0300 Subject: [PATCH 091/349] Remove code and tests for unsupported Python 2.6 (#250) --- .gitignore | 1 + test/test_pack.py | 18 ++---------------- 2 files changed, 3 insertions(+), 16 deletions(-) diff --git a/.gitignore b/.gitignore index 70f5746e..800f1c22 100644 --- a/.gitignore +++ b/.gitignore @@ -12,3 +12,4 @@ msgpack/*.cpp /venv /tags /docs/_build +.cache diff --git a/test/test_pack.py b/test/test_pack.py index a704fdbc..ac931038 100644 --- a/test/test_pack.py +++ b/test/test_pack.py @@ -7,6 +7,7 @@ from msgpack import packb, unpackb, Unpacker, Packer +from collections import OrderedDict from io import BytesIO def check(data, use_list=False): @@ -136,24 +137,9 @@ def testMapSize(sizes=[0, 5, 50, 1000]): assert unpacker.unpack() == dict((i, i * 2) for i in range(size)) -class odict(dict): - """Reimplement OrderedDict to run test on Python 2.6""" - def __init__(self, seq): - self._seq = seq - dict.__init__(self, seq) - - def items(self): - return self._seq[:] - - def iteritems(self): - return iter(self._seq) - - def keys(self): - return [x[0] for x in self._seq] - def test_odict(): seq = [(b'one', 1), (b'two', 2), (b'three', 3), (b'four', 4)] - od = odict(seq) + od = OrderedDict(seq) assert unpackb(packb(od), use_list=1) == dict(seq) def pair_hook(seq): return list(seq) From 1985eb7618296e6a93c8abc4a697c7b00fda72f8 Mon Sep 17 00:00:00 2001 From: Martin Braun Date: Mon, 16 Oct 2017 20:30:55 -0700 Subject: [PATCH 092/349] Clarify README, fix grammar, update section on byte arrays (#253) --- README.rst | 57 ++++++++++++++++++++++++++++++++++-------------------- 1 file changed, 36 insertions(+), 21 deletions(-) diff --git a/README.rst b/README.rst index ea1499b0..3c160783 100644 --- a/README.rst +++ b/README.rst @@ -28,14 +28,14 @@ Install PyPy ^^^^ -msgpack-python provides pure python implementation. PyPy can use this. +msgpack-python provides a pure Python implementation. PyPy can use this. Windows ^^^^^^^ -When you can't use binary distribution, you need to install Visual Studio +When you can't use a binary distribution, you need to install Visual Studio or Windows SDK on Windows. -Without extension, using pure python implementation on CPython runs slowly. +Without extension, using pure Python implementation on CPython runs slowly. For Python 2.7, `Microsoft Visual C++ Compiler for Python 2.7 `_ is recommended solution. @@ -51,11 +51,11 @@ One-shot pack & unpack ^^^^^^^^^^^^^^^^^^^^^^ Use ``packb`` for packing and ``unpackb`` for unpacking. -msgpack provides ``dumps`` and ``loads`` as alias for compatibility with +msgpack provides ``dumps`` and ``loads`` as an alias for compatibility with ``json`` and ``pickle``. -``pack`` and ``dump`` packs to file-like object. -``unpack`` and ``load`` unpacks from file-like object. +``pack`` and ``dump`` packs to a file-like object. +``unpack`` and ``load`` unpacks from a file-like object. .. code-block:: pycon @@ -65,14 +65,15 @@ msgpack provides ``dumps`` and ``loads`` as alias for compatibility with >>> msgpack.unpackb(_) [1, 2, 3] -``unpack`` unpacks msgpack's array to Python's list, but can unpack to tuple: +``unpack`` unpacks msgpack's array to Python's list, but can also unpack to tuple: .. code-block:: pycon >>> msgpack.unpackb(b'\x93\x01\x02\x03', use_list=False) (1, 2, 3) -You should always pass the ``use_list`` keyword argument. See performance issues relating to `use_list option`_ below. +You should always specify the ``use_list`` keyword argument for backward compatibility. +See performance issues relating to `use_list option`_ below. Read the docstring for other options. @@ -198,29 +199,43 @@ Notes string and binary type ^^^^^^^^^^^^^^^^^^^^^^ -In old days, msgpack doesn't distinguish string and binary types like Python 1. -The type for represent string and binary types is named **raw**. +Early versions of msgpack didn't distinguish string and binary types (like Python 1). +The type for representing both string and binary types was named **raw**. -msgpack can distinguish string and binary type for now. But it is not like Python 2. -Python 2 added unicode string. But msgpack renamed **raw** to **str** and added **bin** type. -It is because keep compatibility with data created by old libs. **raw** was used for text more than binary. +For backward compatibility reasons, msgpack-python will still default all +strings to byte strings, unless you specify the `use_bin_type=True` option in +the packer. If you do so, it will use a non-standard type called **bin** to +serialize byte arrays, and **raw** becomes to mean **str**. If you want to +distinguish **bin** and **raw** in the unpacker, specify `encoding='utf-8'`. -Currently, while msgpack-python supports new **bin** type, default setting doesn't use it and -decodes **raw** as `bytes` instead of `unicode` (`str` in Python 3). +Note that Python 2 defaults to byte-arrays over Unicode strings: -You can change this by using `use_bin_type=True` option in Packer and `encoding="utf-8"` option in Unpacker. +.. code-block:: pycon + + >>> import msgpack + >>> msgpack.unpackb(msgpack.packb([b'spam', u'eggs'])) + ['spam', 'eggs'] + >>> msgpack.unpackb(msgpack.packb([b'spam', u'eggs'], use_bin_type=True), + encoding='utf-8') + ['spam', u'eggs'] + +This is the same code in Python 3 (same behaviour, but Python 3 has a +different default): .. code-block:: pycon >>> import msgpack - >>> packed = msgpack.packb([b'spam', u'egg'], use_bin_type=True) - >>> msgpack.unpackb(packed, encoding='utf-8') - ['spam', u'egg'] + >>> msgpack.unpackb(msgpack.packb([b'spam', u'eggs'])) + [b'spam', b'eggs'] + >>> msgpack.unpackb(msgpack.packb([b'spam', u'eggs'], use_bin_type=True), + encoding='utf-8') + [b'spam', 'eggs'] + ext type ^^^^^^^^ -To use **ext** type, pass ``msgpack.ExtType`` object to packer. +To use the **ext** type, pass ``msgpack.ExtType`` object to packer. .. code-block:: pycon @@ -234,7 +249,7 @@ You can use it with ``default`` and ``ext_hook``. See below. Note for msgpack-python 0.2.x users ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ -The msgpack-python 0.3 have some incompatible changes. +The msgpack-python release 0.3 has some incompatible changes. The default value of ``use_list`` keyword argument is ``True`` from 0.3. You should pass the argument explicitly for backward compatibility. From 3a098851bea500ef1ffde856a60d80ddab230dee Mon Sep 17 00:00:00 2001 From: Hugo Date: Thu, 2 Nov 2017 11:06:15 +0200 Subject: [PATCH 093/349] Remove code and tests for unsupported Python 3.3 and 3.4 (#249) --- .travis.yml | 2 -- appveyor.yml | 13 ++++--------- build.cmd | 21 --------------------- docker/runtests.sh | 2 +- tox.ini | 2 +- 5 files changed, 6 insertions(+), 34 deletions(-) delete mode 100644 build.cmd diff --git a/.travis.yml b/.travis.yml index 54e0c62a..7aac6648 100644 --- a/.travis.yml +++ b/.travis.yml @@ -4,8 +4,6 @@ cache: pip python: - "2.7" - - "3.3" - - "3.4" - "3.5" - "3.6" - "3.7-dev" diff --git a/appveyor.yml b/appveyor.yml index e63423da..d581839b 100644 --- a/appveyor.yml +++ b/appveyor.yml @@ -23,24 +23,19 @@ build: off test_script: # Put your test command here. - # If you don't need to build C extensions on 64-bit Python 3.3 or 3.4, - # you can remove "build.cmd" from the front of the command, as it's - # only needed to support those cases. # Note that you must use the environment variable %PYTHON% to refer to # the interpreter you're using - Appveyor does not do anything special # to put the Python version you want to use on PATH. - - "build.cmd %PYTHON%\\python.exe setup.py build_ext -i" - - "build.cmd %PYTHON%\\python.exe setup.py install" + - "%PYTHON%\\python.exe setup.py build_ext -i" + - "%PYTHON%\\python.exe setup.py install" - "%PYTHON%\\python.exe -c \"import sys; print(hex(sys.maxsize))\"" - "%PYTHON%\\python.exe -c \"from msgpack import _packer, _unpacker\"" - "%PYTHON%\\Scripts\\py.test test" - - "build.cmd %PYTHON%\\python.exe setup.py bdist_wheel" + - "%PYTHON%\\python.exe setup.py bdist_wheel" after_test: # This step builds your wheels. - # Again, you only need build.cmd if you're building C extensions for - # 64-bit Python 3.3/3.4. And you need to use %PYTHON% to get the correct - # interpreter + # Again, you need to use %PYTHON% to get the correct interpreter artifacts: # bdist_wheel puts your built wheel in the dist directory diff --git a/build.cmd b/build.cmd deleted file mode 100644 index 243dc9a1..00000000 --- a/build.cmd +++ /dev/null @@ -1,21 +0,0 @@ -@echo off -:: To build extensions for 64 bit Python 3, we need to configure environment -:: variables to use the MSVC 2010 C++ compilers from GRMSDKX_EN_DVD.iso of: -:: MS Windows SDK for Windows 7 and .NET Framework 4 -:: -:: More details at: -:: https://github.com/cython/cython/wiki/64BitCythonExtensionsOnWindows - -IF "%DISTUTILS_USE_SDK%"=="1" ( - ECHO Configuring environment to build with MSVC on a 64bit architecture - ECHO Using Windows SDK 7.1 - "C:\Program Files\Microsoft SDKs\Windows\v7.1\Setup\WindowsSdkVer.exe" -q -version:v7.1 - CALL "C:\Program Files\Microsoft SDKs\Windows\v7.1\Bin\SetEnv.cmd" /x64 /release - SET MSSdk=1 - REM Need the following to allow tox to see the SDK compiler - SET TOX_TESTENV_PASSENV=DISTUTILS_USE_SDK MSSdk INCLUDE LIB -) ELSE ( - ECHO Using default MSVC build environment -) - -CALL %* diff --git a/docker/runtests.sh b/docker/runtests.sh index 0eea715a..11ef9f46 100755 --- a/docker/runtests.sh +++ b/docker/runtests.sh @@ -1,7 +1,7 @@ #!/bin/bash set -e -x -for V in cp36-cp36m cp35-cp35m cp34-cp34m cp27-cp27m cp27-cp27mu; do +for V in cp36-cp36m cp35-cp35m cp27-cp27m cp27-cp27mu; do PYBIN=/opt/python/$V/bin $PYBIN/python setup.py install rm -rf build/ # Avoid lib build by narrow Python is used by wide python diff --git a/tox.ini b/tox.ini index b2ac3362..68a2f53a 100644 --- a/tox.ini +++ b/tox.ini @@ -1,5 +1,5 @@ [tox] -envlist = {py27,py33,py34,py35,py36}-{c,pure},{pypy,pypy3}-pure,py27-x86,py34-x86 +envlist = {py27,py35,py36}-{c,pure},{pypy,pypy3}-pure,py27-x86,py34-x86 [variants:pure] setenv= From 99341035f2f7f7b9d708c73f59e92277579abb0c Mon Sep 17 00:00:00 2001 From: INADA Naoki Date: Thu, 21 Dec 2017 20:46:14 +0900 Subject: [PATCH 094/349] fix zero length raw can't be decoded. (#236) fix #234 --- msgpack/unpack_template.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/msgpack/unpack_template.h b/msgpack/unpack_template.h index e1e08fec..525dea24 100644 --- a/msgpack/unpack_template.h +++ b/msgpack/unpack_template.h @@ -82,7 +82,7 @@ static inline int unpack_execute(unpack_context* ctx, const char* data, Py_ssize const unsigned char* p = (unsigned char*)data + *off; const unsigned char* const pe = (unsigned char*)data + len; - const void* n = NULL; + const void* n = p; unsigned int trail = ctx->trail; unsigned int cs = ctx->cs; From 2eb6e75db1d4b4e30997aa88f9e904dc462a28da Mon Sep 17 00:00:00 2001 From: aaron jheng Date: Sun, 31 Dec 2017 10:52:50 +0800 Subject: [PATCH 095/349] add license info to metadata (#260) --- setup.py | 1 + 1 file changed, 1 insertion(+) diff --git a/setup.py b/setup.py index 5c88397d..5a26241a 100755 --- a/setup.py +++ b/setup.py @@ -112,6 +112,7 @@ def __init__(self, *args, **kwargs): description=desc, long_description=long_desc, url='http://msgpack.org/', + license='Apache 2.0', classifiers=[ 'Programming Language :: Python :: 2', 'Programming Language :: Python :: 2.7', From 0e2021d3a3d1218ca191f4e802df0af3bbfaa51f Mon Sep 17 00:00:00 2001 From: INADA Naoki Date: Fri, 5 Jan 2018 19:16:14 +0900 Subject: [PATCH 096/349] Update changelog --- ChangeLog.rst | 19 ++++++++++++++----- 1 file changed, 14 insertions(+), 5 deletions(-) diff --git a/ChangeLog.rst b/ChangeLog.rst index 73ffc30b..ed0f92e5 100644 --- a/ChangeLog.rst +++ b/ChangeLog.rst @@ -1,24 +1,33 @@ 0.5.0 ====== -0.5 is important step toward 1.0. There are some deprecations. -Please read changes carefully. +There are some deprecations. Please read changes carefully. Changes ------- -* Drop Python 2.6 and 3.2 support +* Drop Python 2.6 and ~3.4 support. Python 2.7 and 3.5+ are supported. * Deprecate useless custom exceptions. Use ValueError instead of PackValueError, Exception instead of PackException and UnpackException, etc... See msgpack/exceptions.py -* Add `strict_types` option to packer. It can be used to serialize subclass of +* Add *strict_types* option to packer. It can be used to serialize subclass of builtin types. For example, when packing object which type is subclass of dict, - `default()` is called. + ``default()`` is called. ``default()`` is called for tuple too. * Pure Python implementation supports packing memoryview object. +* Support packing bytearray. + +* Add ``Unpacker.tell()``. And ``write_bytes`` option is deprecated. + + +Bugs fixed +---------- + +* Fixed zero length raw can't be decoded when encoding is specified. (#236) + 0.4.8 ===== From 43137d6bd2cc841af775a9c8132e72d284b119e3 Mon Sep 17 00:00:00 2001 From: INADA Naoki Date: Fri, 5 Jan 2018 20:19:04 +0900 Subject: [PATCH 097/349] Deprecate write_bytes option in Unpacker. (#262) Fixes #197 --- msgpack/_unpacker.pyx | 3 +++ msgpack/fallback.py | 4 ++++ 2 files changed, 7 insertions(+) diff --git a/msgpack/_unpacker.pyx b/msgpack/_unpacker.pyx index dabc5f70..564749e7 100644 --- a/msgpack/_unpacker.pyx +++ b/msgpack/_unpacker.pyx @@ -458,6 +458,9 @@ cdef class Unpacker(object): cdef object obj cdef Py_ssize_t prev_head + if write_bytes is not None: + PyErr_WarnEx(DeprecationWarning, "`write_bytes` option is deprecated. Use `.tell()` instead.", 1) + if self.buf_head >= self.buf_tail and self.file_like is not None: self.read_from_file() diff --git a/msgpack/fallback.py b/msgpack/fallback.py index 28478ca9..3c9c3b84 100644 --- a/msgpack/fallback.py +++ b/msgpack/fallback.py @@ -609,12 +609,14 @@ def __next__(self): def skip(self, write_bytes=None): self._unpack(EX_SKIP) if write_bytes is not None: + warnings.warn("`write_bytes` option is deprecated. Use `.tell()` instead.", DeprecationWarning) write_bytes(self._buffer[self._buf_checkpoint:self._buff_i]) self._consume() def unpack(self, write_bytes=None): ret = self._unpack(EX_CONSTRUCT) if write_bytes is not None: + warnings.warn("`write_bytes` option is deprecated. Use `.tell()` instead.", DeprecationWarning) write_bytes(self._buffer[self._buf_checkpoint:self._buff_i]) self._consume() return ret @@ -622,6 +624,7 @@ def unpack(self, write_bytes=None): def read_array_header(self, write_bytes=None): ret = self._unpack(EX_READ_ARRAY_HEADER) if write_bytes is not None: + warnings.warn("`write_bytes` option is deprecated. Use `.tell()` instead.", DeprecationWarning) write_bytes(self._buffer[self._buf_checkpoint:self._buff_i]) self._consume() return ret @@ -629,6 +632,7 @@ def read_array_header(self, write_bytes=None): def read_map_header(self, write_bytes=None): ret = self._unpack(EX_READ_MAP_HEADER) if write_bytes is not None: + warnings.warn("`write_bytes` option is deprecated. Use `.tell()` instead.", DeprecationWarning) write_bytes(self._buffer[self._buf_checkpoint:self._buff_i]) self._consume() return ret From 1979722ba2de84e68ae5992d33bc39461aa7b4b2 Mon Sep 17 00:00:00 2001 From: INADA Naoki Date: Fri, 5 Jan 2018 20:58:14 +0900 Subject: [PATCH 098/349] Raise MemoryError when failed to grow buffer (#263) --- msgpack/pack.h | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/msgpack/pack.h b/msgpack/pack.h index d3aeff7c..3bc21ea5 100644 --- a/msgpack/pack.h +++ b/msgpack/pack.h @@ -48,7 +48,10 @@ static inline int msgpack_pack_write(msgpack_packer* pk, const char *data, size_ if (len + l > bs) { bs = (len + l) * 2; buf = (char*)PyMem_Realloc(buf, bs); - if (!buf) return -1; + if (!buf) { + PyErr_NoMemory(); + return -1; + } } memcpy(buf + len, data, l); len += l; From d0d3a403892106dfb809693f5e006a546cb55b83 Mon Sep 17 00:00:00 2001 From: INADA Naoki Date: Sat, 6 Jan 2018 02:07:39 +0900 Subject: [PATCH 099/349] Warn about future use_bin_type change (#264) --- README.rst | 13 ++++++++----- msgpack/_packer.pyx | 12 ++++++++++-- 2 files changed, 18 insertions(+), 7 deletions(-) diff --git a/README.rst b/README.rst index 3c160783..20353f5c 100644 --- a/README.rst +++ b/README.rst @@ -60,7 +60,7 @@ msgpack provides ``dumps`` and ``loads`` as an alias for compatibility with .. code-block:: pycon >>> import msgpack - >>> msgpack.packb([1, 2, 3]) + >>> msgpack.packb([1, 2, 3], use_bin_type=True) '\x93\x01\x02\x03' >>> msgpack.unpackb(_) [1, 2, 3] @@ -91,13 +91,13 @@ stream (or from bytes provided through its ``feed`` method). buf = BytesIO() for i in range(100): - buf.write(msgpack.packb(range(i))) + buf.write(msgpack.packb(range(i), use_bin_type=True)) buf.seek(0) unpacker = msgpack.Unpacker(buf) for unpacked in unpacker: - print unpacked + print(unpacked) Packing/unpacking of custom data type @@ -109,7 +109,6 @@ It is also possible to pack/unpack custom data types. Here is an example for .. code-block:: python import datetime - import msgpack useful_dict = { @@ -128,7 +127,7 @@ It is also possible to pack/unpack custom data types. Here is an example for return obj - packed_dict = msgpack.packb(useful_dict, default=encode_datetime) + packed_dict = msgpack.packb(useful_dict, default=encode_datetime, use_bin_type=True) this_dict_again = msgpack.unpackb(packed_dict, object_hook=decode_datetime) ``Unpacker``'s ``object_hook`` callback receives a dict; the @@ -208,6 +207,10 @@ the packer. If you do so, it will use a non-standard type called **bin** to serialize byte arrays, and **raw** becomes to mean **str**. If you want to distinguish **bin** and **raw** in the unpacker, specify `encoding='utf-8'`. +**In future version, default value of ``use_bin_type`` will be changed to ``False``. +To avoid this change will break your code, you must specify it explicitly +even when you want to use old format.** + Note that Python 2 defaults to byte-arrays over Unicode strings: .. code-block:: pycon diff --git a/msgpack/_packer.pyx b/msgpack/_packer.pyx index 5a81709d..ebaeb657 100644 --- a/msgpack/_packer.pyx +++ b/msgpack/_packer.pyx @@ -2,6 +2,7 @@ #cython: embedsignature=True from cpython cimport * +from cpython.exc cimport PyErr_WarnEx from msgpack.exceptions import PackValueError, PackOverflowError from msgpack import ExtType @@ -76,6 +77,8 @@ cdef class Packer(object): :param bool use_bin_type: Use bin type introduced in msgpack spec 2.0 for bytes. It also enables str8 type for unicode. + Current default value is false, but it will be changed to true + in future version. You should specify it explicitly. :param bool strict_types: If set to true, types will be checked to be exact. Derived classes from serializeable types will not be serialized and will be @@ -103,12 +106,17 @@ cdef class Packer(object): self.pk.length = 0 def __init__(self, default=None, encoding='utf-8', unicode_errors='strict', - use_single_float=False, bint autoreset=1, bint use_bin_type=0, + use_single_float=False, bint autoreset=1, use_bin_type=None, bint strict_types=0): + if use_bin_type is None: + PyErr_WarnEx( + FutureWarning, + "use_bin_type option is not specified. Default value of the option will be changed in future version.", + 1) self.use_float = use_single_float self.strict_types = strict_types self.autoreset = autoreset - self.pk.use_bin_type = use_bin_type + self.pk.use_bin_type = use_bin_type if default is not None: if not PyCallable_Check(default): raise TypeError("default must be a callable.") From 89e4f8b7b3a43fc84ea0dd278a4b8b54b1fac4bd Mon Sep 17 00:00:00 2001 From: INADA Naoki Date: Sun, 7 Jan 2018 01:57:47 +0900 Subject: [PATCH 100/349] Rename package name to msgpack --- setup.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/setup.py b/setup.py index 5a26241a..419482bd 100755 --- a/setup.py +++ b/setup.py @@ -102,7 +102,7 @@ def __init__(self, *args, **kwargs): long_desc = f.read() del f -setup(name='msgpack-python', +setup(name='msgpack', author='INADA Naoki', author_email='songofacandy@gmail.com', version=version_str, @@ -124,5 +124,5 @@ def __init__(self, *args, **kwargs): 'Programming Language :: Python :: Implementation :: PyPy', 'Intended Audience :: Developers', 'License :: OSI Approved :: Apache Software License', - ] - ) + ], +) From d720c42468c2e9cad2c04e28c23917187fa64412 Mon Sep 17 00:00:00 2001 From: INADA Naoki Date: Sun, 7 Jan 2018 01:58:01 +0900 Subject: [PATCH 101/349] prepare 0.5 --- msgpack/_version.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/msgpack/_version.py b/msgpack/_version.py index 76bd8fbe..f90cdc12 100644 --- a/msgpack/_version.py +++ b/msgpack/_version.py @@ -1 +1 @@ -version = (0, 4, 8) +version = (0, 5, 0) From 9f4c12f29ce7a6d62423311ab12a031dc79c458a Mon Sep 17 00:00:00 2001 From: INADA Naoki Date: Sun, 7 Jan 2018 01:59:14 +0900 Subject: [PATCH 102/349] Add transition package --- dummy/README | 2 ++ dummy/setup.py | 31 +++++++++++++++++++++++++++++++ 2 files changed, 33 insertions(+) create mode 100644 dummy/README create mode 100644 dummy/setup.py diff --git a/dummy/README b/dummy/README new file mode 100644 index 00000000..bf1eeb63 --- /dev/null +++ b/dummy/README @@ -0,0 +1,2 @@ +This is dummy transition package for msgpak. +Install msgpack instead of msgpack-python. diff --git a/dummy/setup.py b/dummy/setup.py new file mode 100644 index 00000000..0f1e6e72 --- /dev/null +++ b/dummy/setup.py @@ -0,0 +1,31 @@ +from setuptools import setup, Extension + +long_desc = """\ +msgpack-python is renamed to just msgpack. + +Install msgpack by ``pip install msgpack``. +""" + + +setup(name='msgpack-python', + author='INADA Naoki', + author_email='songofacandy@gmail.com', + version="0.5.0", + description="Transition package for msgpack", + long_description=long_desc, + install_requires=["msgpack>=0.5"], + url='http://msgpack.org/', + license='Apache 2.0', + classifiers=[ + 'Programming Language :: Python :: 2', + 'Programming Language :: Python :: 2.7', + 'Programming Language :: Python :: 3', + 'Programming Language :: Python :: 3.5', + 'Programming Language :: Python :: 3.6', + 'Programming Language :: Python :: 3.7', + 'Programming Language :: Python :: Implementation :: CPython', + 'Programming Language :: Python :: Implementation :: PyPy', + 'Intended Audience :: Developers', + 'License :: OSI Approved :: Apache Software License', + ], +) From 35fc29797036b9bf8619b726f7c7b231c508439e Mon Sep 17 00:00:00 2001 From: INADA Naoki Date: Sun, 7 Jan 2018 02:01:20 +0900 Subject: [PATCH 103/349] Update README --- README.rst | 15 ++------------- 1 file changed, 2 insertions(+), 13 deletions(-) diff --git a/README.rst b/README.rst index 20353f5c..f5fd233f 100644 --- a/README.rst +++ b/README.rst @@ -23,12 +23,12 @@ Install :: - $ pip install msgpack-python + $ pip install msgpack PyPy ^^^^ -msgpack-python provides a pure Python implementation. PyPy can use this. +msgpack provides a pure Python implementation. PyPy can use this. Windows ^^^^^^^ @@ -249,17 +249,6 @@ To use the **ext** type, pass ``msgpack.ExtType`` object to packer. You can use it with ``default`` and ``ext_hook``. See below. -Note for msgpack-python 0.2.x users -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - -The msgpack-python release 0.3 has some incompatible changes. - -The default value of ``use_list`` keyword argument is ``True`` from 0.3. -You should pass the argument explicitly for backward compatibility. - -`Unpacker.unpack()` and some unpack methods now raises `OutOfData` -instead of `StopIteration`. -`StopIteration` is used for iterator protocol only. Note about performance ---------------------- From dbb827815aef9e26c54467436bc267c23b2eff81 Mon Sep 17 00:00:00 2001 From: INADA Naoki Date: Sun, 7 Jan 2018 02:04:49 +0900 Subject: [PATCH 104/349] Update Cython version --- requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requirements.txt b/requirements.txt index 87f04dab..cd54e6df 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1 +1 @@ -Cython==0.25.2 +Cython==0.27.3 From 7c22d983f4aad34d612e6fb4dc84676fcb2bece9 Mon Sep 17 00:00:00 2001 From: INADA Naoki Date: Tue, 9 Jan 2018 13:17:47 +0900 Subject: [PATCH 105/349] Update README --- README.rst | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/README.rst b/README.rst index f5fd233f..fed1dfa4 100644 --- a/README.rst +++ b/README.rst @@ -10,6 +10,20 @@ MessagePack for Python :target: https://msgpack-python.readthedocs.io/en/latest/?badge=latest :alt: Documentation Status +Upgrading from msgpack-0.4 +-------------------------- + +TL;DR: When upgrading from msgpack-0.4 or earlier, don't do `pip install -U msgpack-python`. +Do `pip uninstall msgpack-python; pip install msgpack` instead. + +Package name on PyPI was changed to msgpack from 0.5. +I upload transitional package (msgpack-python 0.5 which depending on msgpack) +for smooth transition from msgpack-python to msgpack. + +Sadly, this doesn't work for upgrade install. After `pip install -U msgpack-python`, +msgpack is removed and `import msgpack` fail. + + What's this ----------- From 45c1a53d5aeab5799f76cfae02f064ba89098f7f Mon Sep 17 00:00:00 2001 From: INADA Naoki Date: Tue, 9 Jan 2018 17:58:32 +0900 Subject: [PATCH 106/349] Update AppVeyor build (#267) --- appveyor.yml | 32 ++++++++++++++------------------ ci/runtests.bat | 7 +++++++ 2 files changed, 21 insertions(+), 18 deletions(-) create mode 100644 ci/runtests.bat diff --git a/appveyor.yml b/appveyor.yml index d581839b..72b334a0 100644 --- a/appveyor.yml +++ b/appveyor.yml @@ -1,22 +1,12 @@ environment: - matrix: - # For Python versions available on Appveyor, see # http://www.appveyor.com/docs/installed-software#python - # The list here is complete (excluding Python 2.6, which - # isn't covered by this document) at the time of writing. - - - PYTHON: "C:\\Python27" - - PYTHON: "C:\\Python27-x64" - - PYTHON: "C:\\Python35" - - PYTHON: "C:\\Python35-x64" - PYTHON: "C:\\Python36" - - PYTHON: "C:\\Python36-x64" install: # We need wheel installed to build wheels - - "%PYTHON%\\python.exe -m pip install -U pip wheel pytest cython" + - "%PYTHON%\\python.exe -m pip install -U cython" - "%PYTHON%\\Scripts\\cython --cplus msgpack/_packer.pyx msgpack/_unpacker.pyx" build: off @@ -26,12 +16,18 @@ test_script: # Note that you must use the environment variable %PYTHON% to refer to # the interpreter you're using - Appveyor does not do anything special # to put the Python version you want to use on PATH. - - "%PYTHON%\\python.exe setup.py build_ext -i" - - "%PYTHON%\\python.exe setup.py install" - - "%PYTHON%\\python.exe -c \"import sys; print(hex(sys.maxsize))\"" - - "%PYTHON%\\python.exe -c \"from msgpack import _packer, _unpacker\"" - - "%PYTHON%\\Scripts\\py.test test" - - "%PYTHON%\\python.exe setup.py bdist_wheel" + - set PYTHON="C:\\Python27" + - ci\\runtests.bat + - set PYTHON="C:\\Python27-x64" + - ci\\runtests.bat + - set PYTHON="C:\\Python35" + - ci\\runtests.bat + - set PYTHON="C:\\Python35-x64" + - ci\\runtests.bat + - set PYTHON="C:\\Python36" + - ci\\runtests.bat + - set PYTHON="C:\\Python36-x64" + - ci\\runtests.bat after_test: # This step builds your wheels. @@ -39,7 +35,7 @@ after_test: artifacts: # bdist_wheel puts your built wheel in the dist directory - - path: dist\* + - path: dist\*.whl #on_success: # You can use this step to upload your artifacts to a public website. diff --git a/ci/runtests.bat b/ci/runtests.bat new file mode 100644 index 00000000..9efea00c --- /dev/null +++ b/ci/runtests.bat @@ -0,0 +1,7 @@ +%PYTHON%\python.exe -m pip install -U pip wheel pytest +%PYTHON%\python.exe setup.py build_ext -i +%PYTHON%\python.exe setup.py install +%PYTHON%\python.exe -c "import sys; print(hex(sys.maxsize))" +%PYTHON%\python.exe -c "from msgpack import _packer, _unpacker" +%PYTHON%\python.exe -m pytest -v test +%PYTHON%\python.exe setup.py bdist_wheel From 676bbcd0eee2a6e8aece5239e380c12ea633375a Mon Sep 17 00:00:00 2001 From: INADA Naoki Date: Tue, 9 Jan 2018 19:00:42 +0900 Subject: [PATCH 107/349] manylinux1: Add 3.6 and remove 3.4 --- docker/buildwheel.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docker/buildwheel.sh b/docker/buildwheel.sh index b654e45c..f586a8dd 100644 --- a/docker/buildwheel.sh +++ b/docker/buildwheel.sh @@ -4,7 +4,7 @@ set -e -x ARCH=`uname -p` echo "arch=$ARCH" -for V in cp35-cp35m cp34-cp34m cp27-cp27m cp27-cp27mu; do +for V in cp36-cp36m cp35-cp35m cp27-cp27m cp27-cp27mu; do PYBIN=/opt/python/$V/bin rm -rf build/ # Avoid lib build by narrow Python is used by wide python $PYBIN/python setup.py bdist_wheel -p manylinux1_${ARCH} From e0934355c6534690d3c80ea8659d51c65a55ee0f Mon Sep 17 00:00:00 2001 From: INADA Naoki Date: Tue, 9 Jan 2018 20:48:45 +0900 Subject: [PATCH 108/349] Update Makefile --- Makefile | 27 +++++++++++---------------- 1 file changed, 11 insertions(+), 16 deletions(-) diff --git a/Makefile b/Makefile index f833bbcf..84decd80 100644 --- a/Makefile +++ b/Makefile @@ -1,32 +1,27 @@ -.PHONY: test all python3 - +.PHONY: all all: cython python setup.py build_ext -i -f -doc-serve: all - cd docs && make serve - -doc: - cd docs && make zip - -upload-doc: - python setup.py upload_docs --upload-dir docs/_build/html - +.PHONY: cython cython: cython --cplus msgpack/*.pyx -python3: cython - python3 setup.py build_ext -i -f - +.PHONY: test test: - py.test test + py.test -v test + +.PHONY: serve-doc +serve-doc: all + cd docs && make serve .PHONY: clean clean: rm -rf build rm msgpack/*.so rm -rf msgpack/__pycache__ + rm -rf test/__pycache__ -build-manylinux1-wheel: +.PHONY: linux-wheel +linux-wheel: docker run --rm -ti -v `pwd`:/project -w /project quay.io/pypa/manylinux1_i686 bash docker/buildwheel.sh docker run --rm -ti -v `pwd`:/project -w /project quay.io/pypa/manylinux1_x86_64 bash docker/buildwheel.sh From ab66c272b03805fd16f0346238e8b7d1233b96c4 Mon Sep 17 00:00:00 2001 From: INADA Naoki Date: Tue, 9 Jan 2018 22:03:06 +0900 Subject: [PATCH 109/349] Update README --- README.rst | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/README.rst b/README.rst index fed1dfa4..d863328c 100644 --- a/README.rst +++ b/README.rst @@ -148,6 +148,7 @@ It is also possible to pack/unpack custom data types. Here is an example for ``object_pairs_hook`` callback may instead be used to receive a list of key-value pairs. + Extended types ^^^^^^^^^^^^^^ @@ -170,7 +171,7 @@ It is also possible to pack/unpack custom data types using the **ext** type. ... return ExtType(code, data) ... >>> data = array.array('d', [1.2, 3.4]) - >>> packed = msgpack.packb(data, default=default) + >>> packed = msgpack.packb(data, default=default, use_bin_type=True) >>> unpacked = msgpack.unpackb(packed, ext_hook=ext_hook) >>> data == unpacked True @@ -294,7 +295,7 @@ Test MessagePack uses `pytest` for testing. Run test with following command: - $ py.test + $ pytest -v test .. From 5be93786404d4e95de933d1bc64640402c3f2696 Mon Sep 17 00:00:00 2001 From: INADA Naoki Date: Wed, 10 Jan 2018 02:48:08 +0900 Subject: [PATCH 110/349] Make msgpack-python deprecated clone of msgpack. --- README.rst | 4 ++-- dummy/README | 2 -- dummy/setup.py | 31 ------------------------------- setup.py | 11 ++++++++++- 4 files changed, 12 insertions(+), 36 deletions(-) delete mode 100644 dummy/README delete mode 100644 dummy/setup.py diff --git a/README.rst b/README.rst index d863328c..42758b8a 100644 --- a/README.rst +++ b/README.rst @@ -10,8 +10,8 @@ MessagePack for Python :target: https://msgpack-python.readthedocs.io/en/latest/?badge=latest :alt: Documentation Status -Upgrading from msgpack-0.4 --------------------------- +IMPORTANT: Upgrading from msgpack-0.4 +-------------------------------------- TL;DR: When upgrading from msgpack-0.4 or earlier, don't do `pip install -U msgpack-python`. Do `pip uninstall msgpack-python; pip install msgpack` instead. diff --git a/dummy/README b/dummy/README deleted file mode 100644 index bf1eeb63..00000000 --- a/dummy/README +++ /dev/null @@ -1,2 +0,0 @@ -This is dummy transition package for msgpak. -Install msgpack instead of msgpack-python. diff --git a/dummy/setup.py b/dummy/setup.py deleted file mode 100644 index 0f1e6e72..00000000 --- a/dummy/setup.py +++ /dev/null @@ -1,31 +0,0 @@ -from setuptools import setup, Extension - -long_desc = """\ -msgpack-python is renamed to just msgpack. - -Install msgpack by ``pip install msgpack``. -""" - - -setup(name='msgpack-python', - author='INADA Naoki', - author_email='songofacandy@gmail.com', - version="0.5.0", - description="Transition package for msgpack", - long_description=long_desc, - install_requires=["msgpack>=0.5"], - url='http://msgpack.org/', - license='Apache 2.0', - classifiers=[ - 'Programming Language :: Python :: 2', - 'Programming Language :: Python :: 2.7', - 'Programming Language :: Python :: 3', - 'Programming Language :: Python :: 3.5', - 'Programming Language :: Python :: 3.6', - 'Programming Language :: Python :: 3.7', - 'Programming Language :: Python :: Implementation :: CPython', - 'Programming Language :: Python :: Implementation :: PyPy', - 'Intended Audience :: Developers', - 'License :: OSI Approved :: Apache Software License', - ], -) diff --git a/setup.py b/setup.py index 419482bd..61085874 100755 --- a/setup.py +++ b/setup.py @@ -9,6 +9,9 @@ from distutils.command.build_ext import build_ext +# for building transitional package. +TRANSITIONAL = False + class NoCython(Exception): pass @@ -102,7 +105,13 @@ def __init__(self, *args, **kwargs): long_desc = f.read() del f -setup(name='msgpack', +name = 'msgpack' + +if TRANSITIONAL: + name = 'msgpack-python' + long_desc = "This package is deprecated. Install msgpack instead." + +setup(name=name, author='INADA Naoki', author_email='songofacandy@gmail.com', version=version_str, From e0f2fd3af348e26b269d7eb90c74876d908aafca Mon Sep 17 00:00:00 2001 From: INADA Naoki Date: Wed, 10 Jan 2018 02:49:50 +0900 Subject: [PATCH 111/349] Fix README --- README.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.rst b/README.rst index 42758b8a..01a8b2a4 100644 --- a/README.rst +++ b/README.rst @@ -222,7 +222,7 @@ the packer. If you do so, it will use a non-standard type called **bin** to serialize byte arrays, and **raw** becomes to mean **str**. If you want to distinguish **bin** and **raw** in the unpacker, specify `encoding='utf-8'`. -**In future version, default value of ``use_bin_type`` will be changed to ``False``. +**In future version, default value of ``use_bin_type`` will be changed to ``True``. To avoid this change will break your code, you must specify it explicitly even when you want to use old format.** From 0112957bcff8e16dddd6cbc474bfe8a49f418fad Mon Sep 17 00:00:00 2001 From: INADA Naoki Date: Wed, 10 Jan 2018 02:54:59 +0900 Subject: [PATCH 112/349] Remove FutureWarning about use_bin_type option (#271) --- msgpack/_packer.pyx | 26 +++++++++++++------------- 1 file changed, 13 insertions(+), 13 deletions(-) diff --git a/msgpack/_packer.pyx b/msgpack/_packer.pyx index ebaeb657..13a18f6c 100644 --- a/msgpack/_packer.pyx +++ b/msgpack/_packer.pyx @@ -2,7 +2,7 @@ #cython: embedsignature=True from cpython cimport * -from cpython.exc cimport PyErr_WarnEx +#from cpython.exc cimport PyErr_WarnEx from msgpack.exceptions import PackValueError, PackOverflowError from msgpack import ExtType @@ -65,20 +65,20 @@ cdef class Packer(object): :param callable default: Convert user type to builtin type that Packer supports. See also simplejson's document. - :param str encoding: - Convert unicode to bytes with this encoding. (default: 'utf-8') - :param str unicode_errors: - Error handler for encoding unicode. (default: 'strict') + :param bool use_single_float: Use single precision float type for float. (default: False) + :param bool autoreset: Reset buffer after each pack and return its content as `bytes`. (default: True). If set this to false, use `bytes()` to get content and `.reset()` to clear buffer. + :param bool use_bin_type: Use bin type introduced in msgpack spec 2.0 for bytes. It also enables str8 type for unicode. Current default value is false, but it will be changed to true in future version. You should specify it explicitly. + :param bool strict_types: If set to true, types will be checked to be exact. Derived classes from serializeable types will not be serialized and will be @@ -86,6 +86,11 @@ cdef class Packer(object): Additionally tuples will not be serialized as lists. This is useful when trying to implement accurate serialization for python types. + + :param str encoding: + (deprecated) Convert unicode to bytes with this encoding. (default: 'utf-8') + :param str unicode_errors: + (deprecated) Error handler for encoding unicode. (default: 'strict') """ cdef msgpack_packer pk cdef object _default @@ -106,17 +111,12 @@ cdef class Packer(object): self.pk.length = 0 def __init__(self, default=None, encoding='utf-8', unicode_errors='strict', - use_single_float=False, bint autoreset=1, use_bin_type=None, - bint strict_types=0): - if use_bin_type is None: - PyErr_WarnEx( - FutureWarning, - "use_bin_type option is not specified. Default value of the option will be changed in future version.", - 1) + bint use_single_float=False, bint autoreset=True, bint use_bin_type=False, + bint strict_types=False): self.use_float = use_single_float self.strict_types = strict_types self.autoreset = autoreset - self.pk.use_bin_type = use_bin_type + self.pk.use_bin_type = use_bin_type if default is not None: if not PyCallable_Check(default): raise TypeError("default must be a callable.") From fc09da997ca323cb1a545478e9c1563d8db37ab1 Mon Sep 17 00:00:00 2001 From: INADA Naoki Date: Wed, 10 Jan 2018 02:58:55 +0900 Subject: [PATCH 113/349] fallback: Update docstring. --- msgpack/_version.py | 2 +- msgpack/fallback.py | 16 +++++++++++----- 2 files changed, 12 insertions(+), 6 deletions(-) diff --git a/msgpack/_version.py b/msgpack/_version.py index f90cdc12..ecba3d88 100644 --- a/msgpack/_version.py +++ b/msgpack/_version.py @@ -1 +1 @@ -version = (0, 5, 0) +version = (0, 5, 1) diff --git a/msgpack/fallback.py b/msgpack/fallback.py index 3c9c3b84..5447b530 100644 --- a/msgpack/fallback.py +++ b/msgpack/fallback.py @@ -160,7 +160,7 @@ class Unpacker(object): If it is None (default), msgpack raw is deserialized to Python bytes. :param str unicode_errors: - Used for decoding msgpack raw with *encoding*. + (deprecated) Used for decoding msgpack raw with *encoding*. (default: `'strict'`) :param int max_buffer_size: @@ -656,18 +656,18 @@ class Packer(object): :param callable default: Convert user type to builtin type that Packer supports. See also simplejson's document. - :param str encoding: - Convert unicode to bytes with this encoding. (default: 'utf-8') - :param str unicode_errors: - Error handler for encoding unicode. (default: 'strict') + :param bool use_single_float: Use single precision float type for float. (default: False) + :param bool autoreset: Reset buffer after each pack and return its content as `bytes`. (default: True). If set this to false, use `bytes()` to get content and `.reset()` to clear buffer. + :param bool use_bin_type: Use bin type introduced in msgpack spec 2.0 for bytes. It also enables str8 type for unicode. + :param bool strict_types: If set to true, types will be checked to be exact. Derived classes from serializeable types will not be serialized and will be @@ -675,6 +675,12 @@ class Packer(object): Additionally tuples will not be serialized as lists. This is useful when trying to implement accurate serialization for python types. + + :param str encoding: + (deprecated) Convert unicode to bytes with this encoding. (default: 'utf-8') + + :param str unicode_errors: + (deprecated) Error handler for encoding unicode. (default: 'strict') """ def __init__(self, default=None, encoding='utf-8', unicode_errors='strict', use_single_float=False, autoreset=True, use_bin_type=False, From 50ea49c86f5aaff8bb1cd37778b50b13df83ba8f Mon Sep 17 00:00:00 2001 From: INADA Naoki Date: Wed, 10 Jan 2018 03:04:54 +0900 Subject: [PATCH 114/349] Update doc --- docs/Makefile | 2 +- docs/conf.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/Makefile b/docs/Makefile index 08696047..b09d8844 100644 --- a/docs/Makefile +++ b/docs/Makefile @@ -153,7 +153,7 @@ doctest: "results in $(BUILDDIR)/doctest/output.txt." serve: html - cd _build/html && python3.3 -m http.server + cd _build/html && python3 -m http.server zip: html cd _build/html && zip -r ../../../msgpack-doc.zip . diff --git a/docs/conf.py b/docs/conf.py index 0f19fcc4..47d745a8 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -49,7 +49,7 @@ # # The short X.Y version. # The full version, including alpha/beta/rc tags. -version = release = '0.4' +version = release = '0.5' # The language for content autogenerated by Sphinx. Refer to documentation # for a list of supported languages. From 5534d0c7af0114db3d27f7b96c82a7fe22ce1e40 Mon Sep 17 00:00:00 2001 From: INADA Naoki Date: Thu, 11 Jan 2018 17:02:41 +0900 Subject: [PATCH 115/349] Add raw_as_bytes option to Unpacker. (#265) --- Makefile | 3 +- README.rst | 78 ++++++++++++++++++++++++++++----------- ci/runtests.bat | 4 +- msgpack/_packer.pyx | 18 ++++++--- msgpack/_unpacker.pyx | 81 ++++++++++++++++++++++++++--------------- msgpack/fallback.py | 52 +++++++++++++++++++++++--- msgpack/unpack.h | 12 ++++-- test/test_limits.py | 4 +- test/test_pack.py | 22 +++++------ test/test_stricttype.py | 8 ++-- test/test_unpack.py | 10 ++--- 11 files changed, 199 insertions(+), 93 deletions(-) diff --git a/Makefile b/Makefile index 84decd80..6a9906c6 100644 --- a/Makefile +++ b/Makefile @@ -8,7 +8,8 @@ cython: .PHONY: test test: - py.test -v test + pytest -v test + MSGPACK_PUREPYTHON=1 pytest -v test .PHONY: serve-doc serve-doc: all diff --git a/README.rst b/README.rst index 01a8b2a4..a5038db5 100644 --- a/README.rst +++ b/README.rst @@ -10,8 +10,21 @@ MessagePack for Python :target: https://msgpack-python.readthedocs.io/en/latest/?badge=latest :alt: Documentation Status -IMPORTANT: Upgrading from msgpack-0.4 --------------------------------------- + +What's this +----------- + +`MessagePack `_ is an efficient binary serialization format. +It lets you exchange data among multiple languages like JSON. +But it's faster and smaller. +This package provides CPython bindings for reading and writing MessagePack data. + + +Very important notes for existing users +--------------------------------------- + +PyPI package name +^^^^^^^^^^^^^^^^^ TL;DR: When upgrading from msgpack-0.4 or earlier, don't do `pip install -U msgpack-python`. Do `pip uninstall msgpack-python; pip install msgpack` instead. @@ -24,13 +37,37 @@ Sadly, this doesn't work for upgrade install. After `pip install -U msgpack-pyt msgpack is removed and `import msgpack` fail. -What's this ------------ +Deprecating encoding option +^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +encoding and unicode_errors options are deprecated. + +In case of packer, use UTF-8 always. Storing other than UTF-8 is not recommended. + +For backward compatibility, you can use ``use_bin_type=False`` and pack ``bytes`` +object into msgpack raw type. + +In case of unpacker, there is new ``raw_as_bytes`` option. It is ``True`` by default +for backward compatibility, but it is changed to ``False`` in near future. +You can use ``raw_as_bytes=False`` instead of ``encoding='utf-8'``. + +Planned backward incompatible changes +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +When msgpack 1.0, I planning these breaking changes: + +* packer and unpacker: Remove ``encoding`` and ``unicode_errors`` option. +* packer: Change default of ``use_bin_type`` option from False to True. +* unpacker: Change default of ``raw_as_bytes`` option from True to False. +* unpacker: Reduce all ``max_xxx_len`` options for typical usage. +* unpacker: Remove ``write_bytes`` option from all methods. + +To avoid these breaking changes breaks your application, please: + +* Don't use deprecated options. +* Pass ``use_bin_type`` and ``raw_as_bytes`` options explicitly. +* If your application handle large (>1MB) data, specify ``max_xxx_len`` options too. -`MessagePack `_ is an efficient binary serialization format. -It lets you exchange data among multiple languages like JSON. -But it's faster and smaller. -This package provides CPython bindings for reading and writing MessagePack data. Install ------- @@ -76,14 +113,14 @@ msgpack provides ``dumps`` and ``loads`` as an alias for compatibility with >>> import msgpack >>> msgpack.packb([1, 2, 3], use_bin_type=True) '\x93\x01\x02\x03' - >>> msgpack.unpackb(_) + >>> msgpack.unpackb(_, raw_as_bytes=False) [1, 2, 3] ``unpack`` unpacks msgpack's array to Python's list, but can also unpack to tuple: .. code-block:: pycon - >>> msgpack.unpackb(b'\x93\x01\x02\x03', use_list=False) + >>> msgpack.unpackb(b'\x93\x01\x02\x03', use_list=False, raw_as_bytes=False) (1, 2, 3) You should always specify the ``use_list`` keyword argument for backward compatibility. @@ -109,7 +146,7 @@ stream (or from bytes provided through its ``feed`` method). buf.seek(0) - unpacker = msgpack.Unpacker(buf) + unpacker = msgpack.Unpacker(buf, raw_as_bytes=False) for unpacked in unpacker: print(unpacked) @@ -142,7 +179,7 @@ It is also possible to pack/unpack custom data types. Here is an example for packed_dict = msgpack.packb(useful_dict, default=encode_datetime, use_bin_type=True) - this_dict_again = msgpack.unpackb(packed_dict, object_hook=decode_datetime) + this_dict_again = msgpack.unpackb(packed_dict, object_hook=decode_datetime, raw_as_bytes=False) ``Unpacker``'s ``object_hook`` callback receives a dict; the ``object_pairs_hook`` callback may instead be used to receive a list of @@ -172,7 +209,7 @@ It is also possible to pack/unpack custom data types using the **ext** type. ... >>> data = array.array('d', [1.2, 3.4]) >>> packed = msgpack.packb(data, default=default, use_bin_type=True) - >>> unpacked = msgpack.unpackb(packed, ext_hook=ext_hook) + >>> unpacked = msgpack.unpackb(packed, ext_hook=ext_hook, raw_as_bytes=False) >>> data == unpacked True @@ -217,14 +254,10 @@ Early versions of msgpack didn't distinguish string and binary types (like Pytho The type for representing both string and binary types was named **raw**. For backward compatibility reasons, msgpack-python will still default all -strings to byte strings, unless you specify the `use_bin_type=True` option in +strings to byte strings, unless you specify the ``use_bin_type=True`` option in the packer. If you do so, it will use a non-standard type called **bin** to serialize byte arrays, and **raw** becomes to mean **str**. If you want to -distinguish **bin** and **raw** in the unpacker, specify `encoding='utf-8'`. - -**In future version, default value of ``use_bin_type`` will be changed to ``True``. -To avoid this change will break your code, you must specify it explicitly -even when you want to use old format.** +distinguish **bin** and **raw** in the unpacker, specify ``raw_as_bytes=False``. Note that Python 2 defaults to byte-arrays over Unicode strings: @@ -234,7 +267,7 @@ Note that Python 2 defaults to byte-arrays over Unicode strings: >>> msgpack.unpackb(msgpack.packb([b'spam', u'eggs'])) ['spam', 'eggs'] >>> msgpack.unpackb(msgpack.packb([b'spam', u'eggs'], use_bin_type=True), - encoding='utf-8') + raw_as_bytes=False) ['spam', u'eggs'] This is the same code in Python 3 (same behaviour, but Python 3 has a @@ -246,7 +279,7 @@ different default): >>> msgpack.unpackb(msgpack.packb([b'spam', u'eggs'])) [b'spam', b'eggs'] >>> msgpack.unpackb(msgpack.packb([b'spam', u'eggs'], use_bin_type=True), - encoding='utf-8') + raw_as_bytes=False) [b'spam', 'eggs'] @@ -277,6 +310,7 @@ You can use ``gc.disable()`` when unpacking large message. use_list option ^^^^^^^^^^^^^^^ + List is the default sequence type of Python. But tuple is lighter than list. You can use ``use_list=False`` while unpacking when performance is important. @@ -295,7 +329,7 @@ Test MessagePack uses `pytest` for testing. Run test with following command: - $ pytest -v test + $ make test .. diff --git a/ci/runtests.bat b/ci/runtests.bat index 9efea00c..02404679 100644 --- a/ci/runtests.bat +++ b/ci/runtests.bat @@ -3,5 +3,7 @@ %PYTHON%\python.exe setup.py install %PYTHON%\python.exe -c "import sys; print(hex(sys.maxsize))" %PYTHON%\python.exe -c "from msgpack import _packer, _unpacker" -%PYTHON%\python.exe -m pytest -v test %PYTHON%\python.exe setup.py bdist_wheel +%PYTHON%\python.exe -m pytest -v test +SET EL=%ERRORLEVEL% +exit /b %EL% diff --git a/msgpack/_packer.pyx b/msgpack/_packer.pyx index 13a18f6c..39da91b5 100644 --- a/msgpack/_packer.pyx +++ b/msgpack/_packer.pyx @@ -2,7 +2,7 @@ #cython: embedsignature=True from cpython cimport * -#from cpython.exc cimport PyErr_WarnEx +from cpython.exc cimport PyErr_WarnEx from msgpack.exceptions import PackValueError, PackOverflowError from msgpack import ExtType @@ -39,7 +39,7 @@ cdef extern from "pack.h": int msgpack_pack_ext(msgpack_packer* pk, char typecode, size_t l) cdef int DEFAULT_RECURSE_LIMIT=511 -cdef size_t ITEM_LIMIT = (2**32)-1 +cdef long long ITEM_LIMIT = (2**32)-1 cdef inline int PyBytesLike_Check(object o): @@ -110,9 +110,13 @@ cdef class Packer(object): self.pk.buf_size = buf_size self.pk.length = 0 - def __init__(self, default=None, encoding='utf-8', unicode_errors='strict', + def __init__(self, default=None, encoding=None, unicode_errors=None, bint use_single_float=False, bint autoreset=True, bint use_bin_type=False, bint strict_types=False): + if encoding is not None: + PyErr_WarnEx(PendingDeprecationWarning, "encoding is deprecated.", 1) + if unicode_errors is not None: + PyErr_WarnEx(PendingDeprecationWarning, "unicode_errors is deprecated.", 1) self.use_float = use_single_float self.strict_types = strict_types self.autoreset = autoreset @@ -122,7 +126,7 @@ cdef class Packer(object): raise TypeError("default must be a callable.") self._default = default if encoding is None: - self.encoding = NULL + self.encoding = 'utf_8' self.unicode_errors = NULL else: if isinstance(encoding, unicode): @@ -134,7 +138,8 @@ cdef class Packer(object): self._berrors = unicode_errors.encode('ascii') else: self._berrors = unicode_errors - self.unicode_errors = PyBytes_AsString(self._berrors) + if self._berrors is not None: + self.unicode_errors = PyBytes_AsString(self._berrors) def __dealloc__(self): PyMem_Free(self.pk.buf) @@ -149,7 +154,7 @@ cdef class Packer(object): cdef char* rawval cdef int ret cdef dict d - cdef size_t L + cdef Py_ssize_t L cdef int default_used = 0 cdef bint strict_types = self.strict_types cdef Py_buffer view @@ -203,6 +208,7 @@ cdef class Packer(object): elif PyUnicode_CheckExact(o) if strict_types else PyUnicode_Check(o): if not self.encoding: raise TypeError("Can't encode unicode string: no encoding is specified") + #TODO: Use faster API for UTF-8 o = PyUnicode_AsEncodedString(o, self.encoding, self.unicode_errors) L = len(o) if L > ITEM_LIMIT: diff --git a/msgpack/_unpacker.pyx b/msgpack/_unpacker.pyx index 564749e7..b796d045 100644 --- a/msgpack/_unpacker.pyx +++ b/msgpack/_unpacker.pyx @@ -43,8 +43,9 @@ from msgpack import ExtType cdef extern from "unpack.h": ctypedef struct msgpack_user: bint use_list - PyObject* object_hook + bint raw_as_bytes bint has_pairs_hook # call object_hook with k-v pairs + PyObject* object_hook PyObject* list_hook PyObject* ext_hook char *encoding @@ -73,12 +74,14 @@ cdef extern from "unpack.h": cdef inline init_ctx(unpack_context *ctx, object object_hook, object object_pairs_hook, object list_hook, object ext_hook, - bint use_list, char* encoding, char* unicode_errors, + bint use_list, bint raw_as_bytes, + char* encoding, char* unicode_errors, Py_ssize_t max_str_len, Py_ssize_t max_bin_len, Py_ssize_t max_array_len, Py_ssize_t max_map_len, Py_ssize_t max_ext_len): unpack_init(ctx) ctx.user.use_list = use_list + ctx.user.raw_as_bytes = raw_as_bytes ctx.user.object_hook = ctx.user.list_hook = NULL ctx.user.max_str_len = max_str_len ctx.user.max_bin_len = max_bin_len @@ -155,7 +158,8 @@ cdef inline int get_data_from_buffer(object obj, return 1 def unpackb(object packed, object object_hook=None, object list_hook=None, - bint use_list=1, encoding=None, unicode_errors="strict", + bint use_list=True, bint raw_as_bytes=True, + encoding=None, unicode_errors="strict", object_pairs_hook=None, ext_hook=ExtType, Py_ssize_t max_str_len=2147483647, # 2**32-1 Py_ssize_t max_bin_len=2147483647, @@ -180,21 +184,26 @@ def unpackb(object packed, object object_hook=None, object list_hook=None, cdef char* cerr = NULL cdef int new_protocol = 0 - get_data_from_buffer(packed, &view, &buf, &buf_len, &new_protocol) + if encoding is not None: + PyErr_WarnEx(PendingDeprecationWarning, "encoding is deprecated, Use raw_as_bytes=False instead.", 1) + if isinstance(encoding, unicode): + encoding = encoding.encode('ascii') + elif not isinstance(encoding, bytes): + raise TypeError("encoding should be bytes or unicode") + cenc = PyBytes_AsString(encoding) + + if unicode_errors is not None: + PyErr_WarnEx(PendingDeprecationWarning, "unicode_errors is deprecated", 1) + if isinstance(unicode_errors, unicode): + unicode_errors = unicode_errors.encode('ascii') + elif not isinstance(unicode_errors, bytes): + raise TypeError("unicode_errors should be bytes or unicode") + cerr = PyBytes_AsString(unicode_errors) + get_data_from_buffer(packed, &view, &buf, &buf_len, &new_protocol) try: - if encoding is not None: - if isinstance(encoding, unicode): - encoding = encoding.encode('ascii') - cenc = PyBytes_AsString(encoding) - - if unicode_errors is not None: - if isinstance(unicode_errors, unicode): - unicode_errors = unicode_errors.encode('ascii') - cerr = PyBytes_AsString(unicode_errors) - init_ctx(&ctx, object_hook, object_pairs_hook, list_hook, ext_hook, - use_list, cenc, cerr, + use_list, raw_as_bytes, cenc, cerr, max_str_len, max_bin_len, max_array_len, max_map_len, max_ext_len) ret = unpack_construct(&ctx, buf, buf_len, &off) finally: @@ -252,6 +261,16 @@ cdef class Unpacker(object): If true, unpack msgpack array to Python list. Otherwise, unpack to Python tuple. (default: True) + :param bool raw_as_bytes: + If true, unpack msgpack raw to Python bytes (default). + Otherwise, unpack to Python str (or unicode on Python 2) by decoding + with UTF-8 encoding (recommended). + Currently, the default is true, but it will be changed to false in + near future. So you must specify it explicitly for keeping backward + compatibility. + + *encoding* option which is deprecated overrides this option. + :param callable object_hook: When specified, it should be callable. Unpacker calls it with a dict argument after unpacking msgpack map. @@ -262,14 +281,6 @@ cdef class Unpacker(object): Unpacker calls it with a list of key-value pairs after unpacking msgpack map. (See also simplejson) - :param str encoding: - Encoding used for decoding msgpack raw. - If it is None (default), msgpack raw is deserialized to Python bytes. - - :param str unicode_errors: - Used for decoding msgpack raw with *encoding*. - (default: `'strict'`) - :param int max_buffer_size: Limits size of data waiting unpacked. 0 means system's INT_MAX (default). Raises `BufferFull` exception when it is insufficient. @@ -287,16 +298,25 @@ cdef class Unpacker(object): :param int max_map_len: Limits max length of map. (default: 2**31-1) + :param str encoding: + Deprecated, use raw_as_bytes instead. + Encoding used for decoding msgpack raw. + If it is None (default), msgpack raw is deserialized to Python bytes. + + :param str unicode_errors: + Deprecated. Used for decoding msgpack raw with *encoding*. + (default: `'strict'`) + - example of streaming deserialize from file-like object:: + Example of streaming deserialize from file-like object:: - unpacker = Unpacker(file_like) + unpacker = Unpacker(file_like, raw_as_bytes=False) for o in unpacker: process(o) - example of streaming deserialize from socket:: + Example of streaming deserialize from socket:: - unpacker = Unpacker() + unpacker = Unpacker(raw_as_bytes=False) while True: buf = sock.recv(1024**2) if not buf: @@ -324,7 +344,8 @@ cdef class Unpacker(object): PyMem_Free(self.buf) self.buf = NULL - def __init__(self, file_like=None, Py_ssize_t read_size=0, bint use_list=1, + def __init__(self, file_like=None, Py_ssize_t read_size=0, + bint use_list=True, bint raw_as_bytes=True, object object_hook=None, object object_pairs_hook=None, object list_hook=None, encoding=None, unicode_errors='strict', int max_buffer_size=0, object ext_hook=ExtType, @@ -363,6 +384,7 @@ cdef class Unpacker(object): self.stream_offset = 0 if encoding is not None: + PyErr_WarnEx(PendingDeprecationWarning, "encoding is deprecated, Use raw_as_bytes=False instead.", 1) if isinstance(encoding, unicode): self.encoding = encoding.encode('ascii') elif isinstance(encoding, bytes): @@ -372,6 +394,7 @@ cdef class Unpacker(object): cenc = PyBytes_AsString(self.encoding) if unicode_errors is not None: + PyErr_WarnEx(PendingDeprecationWarning, "unicode_errors is deprecated", 1) if isinstance(unicode_errors, unicode): self.unicode_errors = unicode_errors.encode('ascii') elif isinstance(unicode_errors, bytes): @@ -381,7 +404,7 @@ cdef class Unpacker(object): cerr = PyBytes_AsString(self.unicode_errors) init_ctx(&self.ctx, object_hook, object_pairs_hook, list_hook, - ext_hook, use_list, cenc, cerr, + ext_hook, use_list, raw_as_bytes, cenc, cerr, max_str_len, max_bin_len, max_array_len, max_map_len, max_ext_len) diff --git a/msgpack/fallback.py b/msgpack/fallback.py index 5447b530..d95f6218 100644 --- a/msgpack/fallback.py +++ b/msgpack/fallback.py @@ -145,6 +145,16 @@ class Unpacker(object): If true, unpack msgpack array to Python list. Otherwise, unpack to Python tuple. (default: True) + :param bool raw_as_bytes: + If true, unpack msgpack raw to Python bytes (default). + Otherwise, unpack to Python str (or unicode on Python 2) by decoding + with UTF-8 encoding (recommended). + Currently, the default is true, but it will be changed to false in + near future. So you must specify it explicitly for keeping backward + compatibility. + + *encoding* option which is deprecated overrides this option. + :param callable object_hook: When specified, it should be callable. Unpacker calls it with a dict argument after unpacking msgpack map. @@ -183,13 +193,13 @@ class Unpacker(object): example of streaming deserialize from file-like object:: - unpacker = Unpacker(file_like) + unpacker = Unpacker(file_like, raw_as_bytes=False) for o in unpacker: process(o) example of streaming deserialize from socket:: - unpacker = Unpacker() + unpacker = Unpacker(raw_as_bytes=False) while True: buf = sock.recv(1024**2) if not buf: @@ -199,15 +209,28 @@ class Unpacker(object): process(o) """ - def __init__(self, file_like=None, read_size=0, use_list=True, + def __init__(self, file_like=None, read_size=0, use_list=True, raw_as_bytes=True, object_hook=None, object_pairs_hook=None, list_hook=None, - encoding=None, unicode_errors='strict', max_buffer_size=0, + encoding=None, unicode_errors=None, max_buffer_size=0, ext_hook=ExtType, max_str_len=2147483647, # 2**32-1 max_bin_len=2147483647, max_array_len=2147483647, max_map_len=2147483647, max_ext_len=2147483647): + + if encoding is not None: + warnings.warn( + "encoding is deprecated, Use raw_as_bytes=False instead.", + PendingDeprecationWarning) + + if unicode_errors is not None: + warnings.warn( + "unicode_errors is deprecated.", + PendingDeprecationWarning) + else: + unicode_errors = 'strict' + if file_like is None: self._feeding = True else: @@ -234,6 +257,7 @@ def __init__(self, file_like=None, read_size=0, use_list=True, if read_size > self._max_buffer_size: raise ValueError("read_size must be smaller than max_buffer_size") self._read_size = read_size or min(self._max_buffer_size, 16*1024) + self._raw_as_bytes = bool(raw_as_bytes) self._encoding = encoding self._unicode_errors = unicode_errors self._use_list = use_list @@ -582,8 +606,10 @@ def _unpack(self, execute=EX_CONSTRUCT): if typ == TYPE_RAW: if self._encoding is not None: obj = obj.decode(self._encoding, self._unicode_errors) - else: + elif self._raw_as_bytes: obj = bytes(obj) + else: + obj = obj.decode('utf_8') return obj if typ == TYPE_EXT: return self._ext_hook(n, bytes(obj)) @@ -682,9 +708,23 @@ class Packer(object): :param str unicode_errors: (deprecated) Error handler for encoding unicode. (default: 'strict') """ - def __init__(self, default=None, encoding='utf-8', unicode_errors='strict', + def __init__(self, default=None, encoding=None, unicode_errors=None, use_single_float=False, autoreset=True, use_bin_type=False, strict_types=False): + if encoding is None: + encoding = 'utf_8' + else: + warnings.warn( + "encoding is deprecated, Use raw_as_bytes=False instead.", + PendingDeprecationWarning) + + if unicode_errors is None: + unicode_errors = 'strict' + else: + warnings.warn( + "unicode_errors is deprecated.", + PendingDeprecationWarning) + self._strict_types = strict_types self._use_float = use_single_float self._autoreset = autoreset diff --git a/msgpack/unpack.h b/msgpack/unpack.h index da2cfb6a..8c2fc467 100644 --- a/msgpack/unpack.h +++ b/msgpack/unpack.h @@ -20,9 +20,10 @@ #include "unpack_define.h" typedef struct unpack_user { - int use_list; - PyObject *object_hook; + bool use_list; + bool raw_as_bytes; bool has_pairs_hook; + PyObject *object_hook; PyObject *list_hook; PyObject *ext_hook; const char *encoding; @@ -225,10 +226,13 @@ static inline int unpack_callback_raw(unpack_user* u, const char* b, const char* } PyObject *py; - if(u->encoding) { + + if (u->encoding) { py = PyUnicode_Decode(p, l, u->encoding, u->unicode_errors); - } else { + } else if (u->raw_as_bytes) { py = PyBytes_FromStringAndSize(p, l); + } else { + py = PyUnicode_DecodeUTF8(p, l, NULL); } if (!py) return -1; diff --git a/test/test_limits.py b/test/test_limits.py index 197ef461..3febc30c 100644 --- a/test/test_limits.py +++ b/test/test_limits.py @@ -39,11 +39,11 @@ def test_max_str_len(): d = 'x' * 3 packed = packb(d) - unpacker = Unpacker(max_str_len=3, encoding='utf-8') + unpacker = Unpacker(max_str_len=3, raw_as_bytes=False) unpacker.feed(packed) assert unpacker.unpack() == d - unpacker = Unpacker(max_str_len=2, encoding='utf-8') + unpacker = Unpacker(max_str_len=2, raw_as_bytes=False) with pytest.raises(UnpackValueError): unpacker.feed(packed) unpacker.unpack() diff --git a/test/test_pack.py b/test/test_pack.py index ac931038..29f5887e 100644 --- a/test/test_pack.py +++ b/test/test_pack.py @@ -31,14 +31,14 @@ def testPack(): def testPackUnicode(): test_data = ["", "abcd", ["defgh"], "Русский текст"] for td in test_data: - re = unpackb(packb(td, encoding='utf-8'), use_list=1, encoding='utf-8') + re = unpackb(packb(td), use_list=1, raw_as_bytes=False) assert re == td - packer = Packer(encoding='utf-8') + packer = Packer() data = packer.pack(td) - re = Unpacker(BytesIO(data), encoding=str('utf-8'), use_list=1).unpack() + re = Unpacker(BytesIO(data), raw_as_bytes=False, use_list=1).unpack() assert re == td -def testPackUTF32(): +def testPackUTF32(): # deprecated try: test_data = [ "", @@ -66,26 +66,22 @@ def testPackByteArrays(): for td in test_data: check(td) -def testIgnoreUnicodeErrors(): +def testIgnoreUnicodeErrors(): # deprecated re = unpackb(packb(b'abc\xeddef'), encoding='utf-8', unicode_errors='ignore', use_list=1) assert re == "abcdef" def testStrictUnicodeUnpack(): with raises(UnicodeDecodeError): - unpackb(packb(b'abc\xeddef'), encoding='utf-8', use_list=1) + unpackb(packb(b'abc\xeddef'), raw_as_bytes=False, use_list=1) -def testStrictUnicodePack(): +def testStrictUnicodePack(): # deprecated with raises(UnicodeEncodeError): packb("abc\xeddef", encoding='ascii', unicode_errors='strict') -def testIgnoreErrorsPack(): - re = unpackb(packb("abcФФФdef", encoding='ascii', unicode_errors='ignore'), encoding='utf-8', use_list=1) +def testIgnoreErrorsPack(): # deprecated + re = unpackb(packb("abcФФФdef", encoding='ascii', unicode_errors='ignore'), raw_as_bytes=False, use_list=1) assert re == "abcdef" -def testNoEncoding(): - with raises(TypeError): - packb("abc", encoding=None) - def testDecodeBinary(): re = unpackb(packb(b"abc"), encoding=None, use_list=1) assert re == b"abc" diff --git a/test/test_stricttype.py b/test/test_stricttype.py index 0f865c83..13239f1e 100644 --- a/test/test_stricttype.py +++ b/test/test_stricttype.py @@ -11,7 +11,7 @@ def default(o): return dict(o._asdict()) raise TypeError('Unsupported type %s' % (type(o),)) packed = packb(T(1, 42), strict_types=True, use_bin_type=True, default=default) - unpacked = unpackb(packed, encoding='utf-8') + unpacked = unpackb(packed, raw_as_bytes=False) assert unpacked == {'foo': 1, 'bar': 42} @@ -32,7 +32,7 @@ def convert(o): return o data = packb(t, strict_types=True, use_bin_type=True, default=default) - expected = unpackb(data, encoding='utf-8', object_hook=convert) + expected = unpackb(data, raw_as_bytes=False, object_hook=convert) assert expected == t @@ -53,10 +53,10 @@ def default(o): def convert(code, payload): if code == MSGPACK_EXT_TYPE_TUPLE: # Unpack and convert to tuple - return tuple(unpackb(payload, encoding='utf-8', ext_hook=convert)) + return tuple(unpackb(payload, raw_as_bytes=False, ext_hook=convert)) raise ValueError('Unknown Ext code {}'.format(code)) data = packb(t, strict_types=True, use_bin_type=True, default=default) - expected = unpackb(data, encoding='utf-8', ext_hook=convert) + expected = unpackb(data, raw_as_bytes=False, ext_hook=convert) assert expected == t diff --git a/test/test_unpack.py b/test/test_unpack.py index c0d711cd..143f9993 100644 --- a/test/test_unpack.py +++ b/test/test_unpack.py @@ -47,8 +47,8 @@ def test_unpacker_ext_hook(): class MyUnpacker(Unpacker): def __init__(self): - super(MyUnpacker, self).__init__(ext_hook=self._hook, - encoding='utf-8') + super(MyUnpacker, self).__init__( + ext_hook=self._hook, raw_as_bytes=False) def _hook(self, code, data): if code == 1: @@ -57,11 +57,11 @@ def _hook(self, code, data): return ExtType(code, data) unpacker = MyUnpacker() - unpacker.feed(packb({'a': 1}, encoding='utf-8')) + unpacker.feed(packb({'a': 1})) assert unpacker.unpack() == {'a': 1} - unpacker.feed(packb({'a': ExtType(1, b'123')}, encoding='utf-8')) + unpacker.feed(packb({'a': ExtType(1, b'123')})) assert unpacker.unpack() == {'a': 123} - unpacker.feed(packb({'a': ExtType(2, b'321')}, encoding='utf-8')) + unpacker.feed(packb({'a': ExtType(2, b'321')})) assert unpacker.unpack() == {'a': ExtType(2, b'321')} From 60ef3879d792ec92480cf9d6d610951657c2e8c7 Mon Sep 17 00:00:00 2001 From: INADA Naoki Date: Thu, 11 Jan 2018 19:41:05 +0900 Subject: [PATCH 116/349] packer: Use PyUnicode_AsUTF8AndSize() for utf-8 (#272) --- docker/runtests.sh | 2 +- msgpack/_packer.pyx | 36 +++++++++++++++++++++------------- msgpack/pack.h | 47 +++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 71 insertions(+), 14 deletions(-) diff --git a/docker/runtests.sh b/docker/runtests.sh index 11ef9f46..113b6307 100755 --- a/docker/runtests.sh +++ b/docker/runtests.sh @@ -9,6 +9,6 @@ for V in cp36-cp36m cp35-cp35m cp27-cp27m cp27-cp27mu; do pushd test # prevent importing msgpack package in current directory. $PYBIN/python -c 'import sys; print(hex(sys.maxsize))' $PYBIN/python -c 'from msgpack import _packer, _unpacker' - $PYBIN/py.test -v + $PYBIN/pytest -v . popd done diff --git a/msgpack/_packer.pyx b/msgpack/_packer.pyx index 39da91b5..a4913ab7 100644 --- a/msgpack/_packer.pyx +++ b/msgpack/_packer.pyx @@ -13,6 +13,7 @@ cdef extern from "Python.h": int PyMemoryView_Check(object obj) int PyByteArray_Check(object obj) int PyByteArray_CheckExact(object obj) + char* PyUnicode_AsUTF8AndSize(object obj, Py_ssize_t *l) except NULL cdef extern from "pack.h": @@ -37,6 +38,7 @@ cdef extern from "pack.h": int msgpack_pack_bin(msgpack_packer* pk, size_t l) int msgpack_pack_raw_body(msgpack_packer* pk, char* body, size_t l) int msgpack_pack_ext(msgpack_packer* pk, char typecode, size_t l) + int msgpack_pack_unicode(msgpack_packer* pk, object o, long long limit) cdef int DEFAULT_RECURSE_LIMIT=511 cdef long long ITEM_LIMIT = (2**32)-1 @@ -126,8 +128,12 @@ cdef class Packer(object): raise TypeError("default must be a callable.") self._default = default if encoding is None: - self.encoding = 'utf_8' - self.unicode_errors = NULL + if unicode_errors is None: + self.encoding = NULL + self.unicode_errors = NULL + else: + self.encoding = "utf_8" + self.unicode_errors = unicode_errors else: if isinstance(encoding, unicode): self._bencoding = encoding.encode('ascii') @@ -140,6 +146,8 @@ cdef class Packer(object): self._berrors = unicode_errors if self._berrors is not None: self.unicode_errors = PyBytes_AsString(self._berrors) + else: + self.unicode_errors = NULL def __dealloc__(self): PyMem_Free(self.pk.buf) @@ -206,17 +214,19 @@ cdef class Packer(object): if ret == 0: ret = msgpack_pack_raw_body(&self.pk, rawval, L) elif PyUnicode_CheckExact(o) if strict_types else PyUnicode_Check(o): - if not self.encoding: - raise TypeError("Can't encode unicode string: no encoding is specified") - #TODO: Use faster API for UTF-8 - o = PyUnicode_AsEncodedString(o, self.encoding, self.unicode_errors) - L = len(o) - if L > ITEM_LIMIT: - raise PackValueError("unicode string is too large") - rawval = o - ret = msgpack_pack_raw(&self.pk, L) - if ret == 0: - ret = msgpack_pack_raw_body(&self.pk, rawval, L) + if self.encoding == NULL: + ret = msgpack_pack_unicode(&self.pk, o, ITEM_LIMIT); + if ret == -2: + raise PackValueError("unicode string is too large") + else: + o = PyUnicode_AsEncodedString(o, self.encoding, self.unicode_errors) + L = len(o) + if L > ITEM_LIMIT: + raise PackValueError("unicode string is too large") + ret = msgpack_pack_raw(&self.pk, L) + if ret == 0: + rawval = o + ret = msgpack_pack_raw_body(&self.pk, rawval, L) elif PyDict_CheckExact(o): d = o L = len(d) diff --git a/msgpack/pack.h b/msgpack/pack.h index 3bc21ea5..4f3ce1d9 100644 --- a/msgpack/pack.h +++ b/msgpack/pack.h @@ -67,6 +67,53 @@ static inline int msgpack_pack_write(msgpack_packer* pk, const char *data, size_ #include "pack_template.h" +// return -2 when o is too long +static inline int +msgpack_pack_unicode(msgpack_packer *pk, PyObject *o, long long limit) +{ +#if PY_MAJOR_VERSION >= 3 + assert(PyUnicode_Check(o)); + + Py_ssize_t len; + const char* buf = PyUnicode_AsUTF8AndSize(o, &len); + if (buf == NULL) + return -1; + + if (len > limit) { + return -2; + } + + int ret = msgpack_pack_raw(pk, len); + if (ret) return ret; + + return msgpack_pack_raw_body(pk, buf, len); +#else + PyObject *bytes; + Py_ssize_t len; + int ret; + + // py2 + bytes = PyUnicode_AsUTF8String(o); + if (bytes == NULL) + return -1; + + len = PyString_GET_SIZE(bytes); + if (len > limit) { + Py_DECREF(bytes); + return -2; + } + + ret = msgpack_pack_raw(pk, len); + if (ret) { + Py_DECREF(bytes); + return -1; + } + ret = msgpack_pack_raw_body(pk, PyString_AS_STRING(bytes), len); + Py_DECREF(bytes); + return ret; +#endif +} + #ifdef __cplusplus } #endif From d9ec8fc905fc9ed37c86700f794adeb883b4f5ea Mon Sep 17 00:00:00 2001 From: INADA Naoki Date: Thu, 11 Jan 2018 23:50:41 +0900 Subject: [PATCH 117/349] Packer.pack() reset buffer on exception (#274) fixes #210 --- msgpack/_packer.pyx | 12 +++++++----- msgpack/fallback.py | 6 +++++- 2 files changed, 12 insertions(+), 6 deletions(-) diff --git a/msgpack/_packer.pyx b/msgpack/_packer.pyx index a4913ab7..35e5a9dc 100644 --- a/msgpack/_packer.pyx +++ b/msgpack/_packer.pyx @@ -289,11 +289,13 @@ cdef class Packer(object): cpdef pack(self, object obj): cdef int ret - ret = self._pack(obj, DEFAULT_RECURSE_LIMIT) - if ret == -1: - raise MemoryError - elif ret: # should not happen. - raise TypeError + try: + ret = self._pack(obj, DEFAULT_RECURSE_LIMIT) + except: + self.pk.length = 0 + raise + if ret: # should not happen. + raise RuntimeError("internal error") if self.autoreset: buf = PyBytes_FromStringAndSize(self.pk.buf, self.pk.length) self.pk.length = 0 diff --git a/msgpack/fallback.py b/msgpack/fallback.py index d95f6218..675ee8a5 100644 --- a/msgpack/fallback.py +++ b/msgpack/fallback.py @@ -848,7 +848,11 @@ def _pack(self, obj, nest_limit=DEFAULT_RECURSE_LIMIT, raise TypeError("Cannot serialize %r" % (obj, )) def pack(self, obj): - self._pack(obj) + try: + self._pack(obj) + except: + self._buffer = StringIO() # force reset + raise ret = self._buffer.getvalue() if self._autoreset: self._buffer = StringIO() From 5569a4efcdc913d343eaff4e55c9b19fafde4268 Mon Sep 17 00:00:00 2001 From: INADA Naoki Date: Fri, 12 Jan 2018 19:22:36 +0900 Subject: [PATCH 118/349] s/raw_as_bytes/raw/g (#276) fixes #273 --- Makefile | 3 ++- README.rst | 24 ++++++++++++------------ msgpack/_unpacker.pyx | 26 +++++++++++++------------- msgpack/fallback.py | 16 ++++++++-------- msgpack/unpack.h | 4 ++-- test/test_limits.py | 4 ++-- test/test_pack.py | 8 ++++---- test/test_stricttype.py | 8 ++++---- test/test_unpack.py | 2 +- 9 files changed, 48 insertions(+), 47 deletions(-) diff --git a/Makefile b/Makefile index 6a9906c6..4030080c 100644 --- a/Makefile +++ b/Makefile @@ -18,7 +18,8 @@ serve-doc: all .PHONY: clean clean: rm -rf build - rm msgpack/*.so + rm -f msgpack/_packer.cpp + rm -f msgpack/_unpacker.cpp rm -rf msgpack/__pycache__ rm -rf test/__pycache__ diff --git a/README.rst b/README.rst index a5038db5..8925a65c 100644 --- a/README.rst +++ b/README.rst @@ -47,9 +47,9 @@ In case of packer, use UTF-8 always. Storing other than UTF-8 is not recommende For backward compatibility, you can use ``use_bin_type=False`` and pack ``bytes`` object into msgpack raw type. -In case of unpacker, there is new ``raw_as_bytes`` option. It is ``True`` by default +In case of unpacker, there is new ``raw`` option. It is ``True`` by default for backward compatibility, but it is changed to ``False`` in near future. -You can use ``raw_as_bytes=False`` instead of ``encoding='utf-8'``. +You can use ``raw=False`` instead of ``encoding='utf-8'``. Planned backward incompatible changes ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ @@ -58,14 +58,14 @@ When msgpack 1.0, I planning these breaking changes: * packer and unpacker: Remove ``encoding`` and ``unicode_errors`` option. * packer: Change default of ``use_bin_type`` option from False to True. -* unpacker: Change default of ``raw_as_bytes`` option from True to False. +* unpacker: Change default of ``raw`` option from True to False. * unpacker: Reduce all ``max_xxx_len`` options for typical usage. * unpacker: Remove ``write_bytes`` option from all methods. To avoid these breaking changes breaks your application, please: * Don't use deprecated options. -* Pass ``use_bin_type`` and ``raw_as_bytes`` options explicitly. +* Pass ``use_bin_type`` and ``raw`` options explicitly. * If your application handle large (>1MB) data, specify ``max_xxx_len`` options too. @@ -113,14 +113,14 @@ msgpack provides ``dumps`` and ``loads`` as an alias for compatibility with >>> import msgpack >>> msgpack.packb([1, 2, 3], use_bin_type=True) '\x93\x01\x02\x03' - >>> msgpack.unpackb(_, raw_as_bytes=False) + >>> msgpack.unpackb(_, raw=False) [1, 2, 3] ``unpack`` unpacks msgpack's array to Python's list, but can also unpack to tuple: .. code-block:: pycon - >>> msgpack.unpackb(b'\x93\x01\x02\x03', use_list=False, raw_as_bytes=False) + >>> msgpack.unpackb(b'\x93\x01\x02\x03', use_list=False, raw=False) (1, 2, 3) You should always specify the ``use_list`` keyword argument for backward compatibility. @@ -146,7 +146,7 @@ stream (or from bytes provided through its ``feed`` method). buf.seek(0) - unpacker = msgpack.Unpacker(buf, raw_as_bytes=False) + unpacker = msgpack.Unpacker(buf, raw=False) for unpacked in unpacker: print(unpacked) @@ -179,7 +179,7 @@ It is also possible to pack/unpack custom data types. Here is an example for packed_dict = msgpack.packb(useful_dict, default=encode_datetime, use_bin_type=True) - this_dict_again = msgpack.unpackb(packed_dict, object_hook=decode_datetime, raw_as_bytes=False) + this_dict_again = msgpack.unpackb(packed_dict, object_hook=decode_datetime, raw=False) ``Unpacker``'s ``object_hook`` callback receives a dict; the ``object_pairs_hook`` callback may instead be used to receive a list of @@ -209,7 +209,7 @@ It is also possible to pack/unpack custom data types using the **ext** type. ... >>> data = array.array('d', [1.2, 3.4]) >>> packed = msgpack.packb(data, default=default, use_bin_type=True) - >>> unpacked = msgpack.unpackb(packed, ext_hook=ext_hook, raw_as_bytes=False) + >>> unpacked = msgpack.unpackb(packed, ext_hook=ext_hook, raw=False) >>> data == unpacked True @@ -257,7 +257,7 @@ For backward compatibility reasons, msgpack-python will still default all strings to byte strings, unless you specify the ``use_bin_type=True`` option in the packer. If you do so, it will use a non-standard type called **bin** to serialize byte arrays, and **raw** becomes to mean **str**. If you want to -distinguish **bin** and **raw** in the unpacker, specify ``raw_as_bytes=False``. +distinguish **bin** and **raw** in the unpacker, specify ``raw=False``. Note that Python 2 defaults to byte-arrays over Unicode strings: @@ -267,7 +267,7 @@ Note that Python 2 defaults to byte-arrays over Unicode strings: >>> msgpack.unpackb(msgpack.packb([b'spam', u'eggs'])) ['spam', 'eggs'] >>> msgpack.unpackb(msgpack.packb([b'spam', u'eggs'], use_bin_type=True), - raw_as_bytes=False) + raw=False) ['spam', u'eggs'] This is the same code in Python 3 (same behaviour, but Python 3 has a @@ -279,7 +279,7 @@ different default): >>> msgpack.unpackb(msgpack.packb([b'spam', u'eggs'])) [b'spam', b'eggs'] >>> msgpack.unpackb(msgpack.packb([b'spam', u'eggs'], use_bin_type=True), - raw_as_bytes=False) + raw=False) [b'spam', 'eggs'] diff --git a/msgpack/_unpacker.pyx b/msgpack/_unpacker.pyx index b796d045..806be4fd 100644 --- a/msgpack/_unpacker.pyx +++ b/msgpack/_unpacker.pyx @@ -43,7 +43,7 @@ from msgpack import ExtType cdef extern from "unpack.h": ctypedef struct msgpack_user: bint use_list - bint raw_as_bytes + bint raw bint has_pairs_hook # call object_hook with k-v pairs PyObject* object_hook PyObject* list_hook @@ -74,14 +74,14 @@ cdef extern from "unpack.h": cdef inline init_ctx(unpack_context *ctx, object object_hook, object object_pairs_hook, object list_hook, object ext_hook, - bint use_list, bint raw_as_bytes, + bint use_list, bint raw, char* encoding, char* unicode_errors, Py_ssize_t max_str_len, Py_ssize_t max_bin_len, Py_ssize_t max_array_len, Py_ssize_t max_map_len, Py_ssize_t max_ext_len): unpack_init(ctx) ctx.user.use_list = use_list - ctx.user.raw_as_bytes = raw_as_bytes + ctx.user.raw = raw ctx.user.object_hook = ctx.user.list_hook = NULL ctx.user.max_str_len = max_str_len ctx.user.max_bin_len = max_bin_len @@ -158,7 +158,7 @@ cdef inline int get_data_from_buffer(object obj, return 1 def unpackb(object packed, object object_hook=None, object list_hook=None, - bint use_list=True, bint raw_as_bytes=True, + bint use_list=True, bint raw=True, encoding=None, unicode_errors="strict", object_pairs_hook=None, ext_hook=ExtType, Py_ssize_t max_str_len=2147483647, # 2**32-1 @@ -185,7 +185,7 @@ def unpackb(object packed, object object_hook=None, object list_hook=None, cdef int new_protocol = 0 if encoding is not None: - PyErr_WarnEx(PendingDeprecationWarning, "encoding is deprecated, Use raw_as_bytes=False instead.", 1) + PyErr_WarnEx(PendingDeprecationWarning, "encoding is deprecated, Use raw=False instead.", 1) if isinstance(encoding, unicode): encoding = encoding.encode('ascii') elif not isinstance(encoding, bytes): @@ -203,7 +203,7 @@ def unpackb(object packed, object object_hook=None, object list_hook=None, get_data_from_buffer(packed, &view, &buf, &buf_len, &new_protocol) try: init_ctx(&ctx, object_hook, object_pairs_hook, list_hook, ext_hook, - use_list, raw_as_bytes, cenc, cerr, + use_list, raw, cenc, cerr, max_str_len, max_bin_len, max_array_len, max_map_len, max_ext_len) ret = unpack_construct(&ctx, buf, buf_len, &off) finally: @@ -261,7 +261,7 @@ cdef class Unpacker(object): If true, unpack msgpack array to Python list. Otherwise, unpack to Python tuple. (default: True) - :param bool raw_as_bytes: + :param bool raw: If true, unpack msgpack raw to Python bytes (default). Otherwise, unpack to Python str (or unicode on Python 2) by decoding with UTF-8 encoding (recommended). @@ -299,7 +299,7 @@ cdef class Unpacker(object): Limits max length of map. (default: 2**31-1) :param str encoding: - Deprecated, use raw_as_bytes instead. + Deprecated, use raw instead. Encoding used for decoding msgpack raw. If it is None (default), msgpack raw is deserialized to Python bytes. @@ -310,13 +310,13 @@ cdef class Unpacker(object): Example of streaming deserialize from file-like object:: - unpacker = Unpacker(file_like, raw_as_bytes=False) + unpacker = Unpacker(file_like, raw=False) for o in unpacker: process(o) Example of streaming deserialize from socket:: - unpacker = Unpacker(raw_as_bytes=False) + unpacker = Unpacker(raw=False) while True: buf = sock.recv(1024**2) if not buf: @@ -345,7 +345,7 @@ cdef class Unpacker(object): self.buf = NULL def __init__(self, file_like=None, Py_ssize_t read_size=0, - bint use_list=True, bint raw_as_bytes=True, + bint use_list=True, bint raw=True, object object_hook=None, object object_pairs_hook=None, object list_hook=None, encoding=None, unicode_errors='strict', int max_buffer_size=0, object ext_hook=ExtType, @@ -384,7 +384,7 @@ cdef class Unpacker(object): self.stream_offset = 0 if encoding is not None: - PyErr_WarnEx(PendingDeprecationWarning, "encoding is deprecated, Use raw_as_bytes=False instead.", 1) + PyErr_WarnEx(PendingDeprecationWarning, "encoding is deprecated, Use raw=False instead.", 1) if isinstance(encoding, unicode): self.encoding = encoding.encode('ascii') elif isinstance(encoding, bytes): @@ -404,7 +404,7 @@ cdef class Unpacker(object): cerr = PyBytes_AsString(self.unicode_errors) init_ctx(&self.ctx, object_hook, object_pairs_hook, list_hook, - ext_hook, use_list, raw_as_bytes, cenc, cerr, + ext_hook, use_list, raw, cenc, cerr, max_str_len, max_bin_len, max_array_len, max_map_len, max_ext_len) diff --git a/msgpack/fallback.py b/msgpack/fallback.py index 675ee8a5..bf6c9a6f 100644 --- a/msgpack/fallback.py +++ b/msgpack/fallback.py @@ -145,7 +145,7 @@ class Unpacker(object): If true, unpack msgpack array to Python list. Otherwise, unpack to Python tuple. (default: True) - :param bool raw_as_bytes: + :param bool raw: If true, unpack msgpack raw to Python bytes (default). Otherwise, unpack to Python str (or unicode on Python 2) by decoding with UTF-8 encoding (recommended). @@ -193,13 +193,13 @@ class Unpacker(object): example of streaming deserialize from file-like object:: - unpacker = Unpacker(file_like, raw_as_bytes=False) + unpacker = Unpacker(file_like, raw=False) for o in unpacker: process(o) example of streaming deserialize from socket:: - unpacker = Unpacker(raw_as_bytes=False) + unpacker = Unpacker(raw=False) while True: buf = sock.recv(1024**2) if not buf: @@ -209,7 +209,7 @@ class Unpacker(object): process(o) """ - def __init__(self, file_like=None, read_size=0, use_list=True, raw_as_bytes=True, + def __init__(self, file_like=None, read_size=0, use_list=True, raw=True, object_hook=None, object_pairs_hook=None, list_hook=None, encoding=None, unicode_errors=None, max_buffer_size=0, ext_hook=ExtType, @@ -221,7 +221,7 @@ def __init__(self, file_like=None, read_size=0, use_list=True, raw_as_bytes=True if encoding is not None: warnings.warn( - "encoding is deprecated, Use raw_as_bytes=False instead.", + "encoding is deprecated, Use raw=False instead.", PendingDeprecationWarning) if unicode_errors is not None: @@ -257,7 +257,7 @@ def __init__(self, file_like=None, read_size=0, use_list=True, raw_as_bytes=True if read_size > self._max_buffer_size: raise ValueError("read_size must be smaller than max_buffer_size") self._read_size = read_size or min(self._max_buffer_size, 16*1024) - self._raw_as_bytes = bool(raw_as_bytes) + self._raw = bool(raw) self._encoding = encoding self._unicode_errors = unicode_errors self._use_list = use_list @@ -606,7 +606,7 @@ def _unpack(self, execute=EX_CONSTRUCT): if typ == TYPE_RAW: if self._encoding is not None: obj = obj.decode(self._encoding, self._unicode_errors) - elif self._raw_as_bytes: + elif self._raw: obj = bytes(obj) else: obj = obj.decode('utf_8') @@ -715,7 +715,7 @@ def __init__(self, default=None, encoding=None, unicode_errors=None, encoding = 'utf_8' else: warnings.warn( - "encoding is deprecated, Use raw_as_bytes=False instead.", + "encoding is deprecated, Use raw=False instead.", PendingDeprecationWarning) if unicode_errors is None: diff --git a/msgpack/unpack.h b/msgpack/unpack.h index 8c2fc467..d7b5e00a 100644 --- a/msgpack/unpack.h +++ b/msgpack/unpack.h @@ -21,7 +21,7 @@ typedef struct unpack_user { bool use_list; - bool raw_as_bytes; + bool raw; bool has_pairs_hook; PyObject *object_hook; PyObject *list_hook; @@ -229,7 +229,7 @@ static inline int unpack_callback_raw(unpack_user* u, const char* b, const char* if (u->encoding) { py = PyUnicode_Decode(p, l, u->encoding, u->unicode_errors); - } else if (u->raw_as_bytes) { + } else if (u->raw) { py = PyBytes_FromStringAndSize(p, l); } else { py = PyUnicode_DecodeUTF8(p, l, NULL); diff --git a/test/test_limits.py b/test/test_limits.py index 3febc30c..74e48c19 100644 --- a/test/test_limits.py +++ b/test/test_limits.py @@ -39,11 +39,11 @@ def test_max_str_len(): d = 'x' * 3 packed = packb(d) - unpacker = Unpacker(max_str_len=3, raw_as_bytes=False) + unpacker = Unpacker(max_str_len=3, raw=False) unpacker.feed(packed) assert unpacker.unpack() == d - unpacker = Unpacker(max_str_len=2, raw_as_bytes=False) + unpacker = Unpacker(max_str_len=2, raw=False) with pytest.raises(UnpackValueError): unpacker.feed(packed) unpacker.unpack() diff --git a/test/test_pack.py b/test/test_pack.py index 29f5887e..b447f9c3 100644 --- a/test/test_pack.py +++ b/test/test_pack.py @@ -31,11 +31,11 @@ def testPack(): def testPackUnicode(): test_data = ["", "abcd", ["defgh"], "Русский текст"] for td in test_data: - re = unpackb(packb(td), use_list=1, raw_as_bytes=False) + re = unpackb(packb(td), use_list=1, raw=False) assert re == td packer = Packer() data = packer.pack(td) - re = Unpacker(BytesIO(data), raw_as_bytes=False, use_list=1).unpack() + re = Unpacker(BytesIO(data), raw=False, use_list=1).unpack() assert re == td def testPackUTF32(): # deprecated @@ -72,14 +72,14 @@ def testIgnoreUnicodeErrors(): # deprecated def testStrictUnicodeUnpack(): with raises(UnicodeDecodeError): - unpackb(packb(b'abc\xeddef'), raw_as_bytes=False, use_list=1) + unpackb(packb(b'abc\xeddef'), raw=False, use_list=1) def testStrictUnicodePack(): # deprecated with raises(UnicodeEncodeError): packb("abc\xeddef", encoding='ascii', unicode_errors='strict') def testIgnoreErrorsPack(): # deprecated - re = unpackb(packb("abcФФФdef", encoding='ascii', unicode_errors='ignore'), raw_as_bytes=False, use_list=1) + re = unpackb(packb("abcФФФdef", encoding='ascii', unicode_errors='ignore'), raw=False, use_list=1) assert re == "abcdef" def testDecodeBinary(): diff --git a/test/test_stricttype.py b/test/test_stricttype.py index 13239f1e..87e7c1ce 100644 --- a/test/test_stricttype.py +++ b/test/test_stricttype.py @@ -11,7 +11,7 @@ def default(o): return dict(o._asdict()) raise TypeError('Unsupported type %s' % (type(o),)) packed = packb(T(1, 42), strict_types=True, use_bin_type=True, default=default) - unpacked = unpackb(packed, raw_as_bytes=False) + unpacked = unpackb(packed, raw=False) assert unpacked == {'foo': 1, 'bar': 42} @@ -32,7 +32,7 @@ def convert(o): return o data = packb(t, strict_types=True, use_bin_type=True, default=default) - expected = unpackb(data, raw_as_bytes=False, object_hook=convert) + expected = unpackb(data, raw=False, object_hook=convert) assert expected == t @@ -53,10 +53,10 @@ def default(o): def convert(code, payload): if code == MSGPACK_EXT_TYPE_TUPLE: # Unpack and convert to tuple - return tuple(unpackb(payload, raw_as_bytes=False, ext_hook=convert)) + return tuple(unpackb(payload, raw=False, ext_hook=convert)) raise ValueError('Unknown Ext code {}'.format(code)) data = packb(t, strict_types=True, use_bin_type=True, default=default) - expected = unpackb(data, raw_as_bytes=False, ext_hook=convert) + expected = unpackb(data, raw=False, ext_hook=convert) assert expected == t diff --git a/test/test_unpack.py b/test/test_unpack.py index 143f9993..00a10612 100644 --- a/test/test_unpack.py +++ b/test/test_unpack.py @@ -48,7 +48,7 @@ class MyUnpacker(Unpacker): def __init__(self): super(MyUnpacker, self).__init__( - ext_hook=self._hook, raw_as_bytes=False) + ext_hook=self._hook, raw=False) def _hook(self, code, data): if code == 1: From 52fb85a2c5776590599df3a5839117b88bc49980 Mon Sep 17 00:00:00 2001 From: INADA Naoki Date: Fri, 2 Feb 2018 19:43:42 +0900 Subject: [PATCH 119/349] 0.5.2 --- ChangeLog.rst | 13 +++++++++++++ msgpack/_version.py | 2 +- 2 files changed, 14 insertions(+), 1 deletion(-) diff --git a/ChangeLog.rst b/ChangeLog.rst index ed0f92e5..ffdc910b 100644 --- a/ChangeLog.rst +++ b/ChangeLog.rst @@ -1,3 +1,16 @@ +0.5.2 +====== + +* Add ``raw`` option to Unpacker. It is preferred way than ``encoding`` option. + +* Packer.pack() reset buffer on exception (#274) + + +0.5.1 +====== + +* Remove FutureWarning about use_bin_type option (#271) + 0.5.0 ====== diff --git a/msgpack/_version.py b/msgpack/_version.py index ecba3d88..4e3b29fe 100644 --- a/msgpack/_version.py +++ b/msgpack/_version.py @@ -1 +1 @@ -version = (0, 5, 1) +version = (0, 5, 2) From a0ba076c3527bb474cd8ec820ae0fd6c8293d4af Mon Sep 17 00:00:00 2001 From: Andrew Rabert <6550543+nvllsvm@users.noreply.github.com> Date: Fri, 2 Feb 2018 20:34:42 -0500 Subject: [PATCH 120/349] Fix encoding and unicode_errors (#277) Previously, unicode_errors was either set to NULL or to the result of PyBytes_AsString. This restores that behavior while also keeping the existing NULL default behavior. Original defaults were restored to keep API compatibility until these deprecated options are finally removed. --- msgpack/_packer.pyx | 33 ++++++++++++++++----------------- 1 file changed, 16 insertions(+), 17 deletions(-) diff --git a/msgpack/_packer.pyx b/msgpack/_packer.pyx index 35e5a9dc..c49e7194 100644 --- a/msgpack/_packer.pyx +++ b/msgpack/_packer.pyx @@ -127,27 +127,26 @@ cdef class Packer(object): if not PyCallable_Check(default): raise TypeError("default must be a callable.") self._default = default - if encoding is None: - if unicode_errors is None: - self.encoding = NULL - self.unicode_errors = NULL - else: - self.encoding = "utf_8" - self.unicode_errors = unicode_errors + if encoding is None and unicode_errors is None: + self.encoding = NULL + self.unicode_errors = NULL else: - if isinstance(encoding, unicode): - self._bencoding = encoding.encode('ascii') + if encoding is None: + self.encoding = 'utf-8' else: - self._bencoding = encoding - self.encoding = PyBytes_AsString(self._bencoding) - if isinstance(unicode_errors, unicode): - self._berrors = unicode_errors.encode('ascii') + if isinstance(encoding, unicode): + self._bencoding = encoding.encode('ascii') + else: + self._bencoding = encoding + self.encoding = PyBytes_AsString(self._bencoding) + if unicode_errors is None: + self.unicode_errors = 'strict' else: - self._berrors = unicode_errors - if self._berrors is not None: + if isinstance(unicode_errors, unicode): + self._berrors = unicode_errors.encode('ascii') + else: + self._berrors = unicode_errors self.unicode_errors = PyBytes_AsString(self._berrors) - else: - self.unicode_errors = NULL def __dealloc__(self): PyMem_Free(self.pk.buf) From 618b2cb027e326b315edf5b856ed3b2011c8dad9 Mon Sep 17 00:00:00 2001 From: INADA Naoki Date: Sat, 3 Feb 2018 10:54:21 +0900 Subject: [PATCH 121/349] 0.5.3 --- ChangeLog.rst | 5 +++++ msgpack/_version.py | 2 +- 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/ChangeLog.rst b/ChangeLog.rst index ffdc910b..4a5c5994 100644 --- a/ChangeLog.rst +++ b/ChangeLog.rst @@ -1,3 +1,8 @@ +0.5.3 +====== + +* Fixed regression when passing ``unicode_errors`` to Packer but not ``encoding``. (#277) + 0.5.2 ====== diff --git a/msgpack/_version.py b/msgpack/_version.py index 4e3b29fe..217bfcec 100644 --- a/msgpack/_version.py +++ b/msgpack/_version.py @@ -1 +1 @@ -version = (0, 5, 2) +version = (0, 5, 3) From 9fdb83719d3544322f0687a9b1637276c1443eda Mon Sep 17 00:00:00 2001 From: INADA Naoki Date: Mon, 5 Feb 2018 02:19:48 +0900 Subject: [PATCH 122/349] Undeprecate unicode_errors (#278) --- msgpack/_packer.pyx | 7 +++---- msgpack/_unpacker.pyx | 9 +++------ msgpack/unpack.h | 2 +- 3 files changed, 7 insertions(+), 11 deletions(-) diff --git a/msgpack/_packer.pyx b/msgpack/_packer.pyx index c49e7194..b987aee5 100644 --- a/msgpack/_packer.pyx +++ b/msgpack/_packer.pyx @@ -89,10 +89,11 @@ cdef class Packer(object): This is useful when trying to implement accurate serialization for python types. + :param str unicode_errors: + Error handler for encoding unicode. (default: 'strict') + :param str encoding: (deprecated) Convert unicode to bytes with this encoding. (default: 'utf-8') - :param str unicode_errors: - (deprecated) Error handler for encoding unicode. (default: 'strict') """ cdef msgpack_packer pk cdef object _default @@ -117,8 +118,6 @@ cdef class Packer(object): bint strict_types=False): if encoding is not None: PyErr_WarnEx(PendingDeprecationWarning, "encoding is deprecated.", 1) - if unicode_errors is not None: - PyErr_WarnEx(PendingDeprecationWarning, "unicode_errors is deprecated.", 1) self.use_float = use_single_float self.strict_types = strict_types self.autoreset = autoreset diff --git a/msgpack/_unpacker.pyx b/msgpack/_unpacker.pyx index 806be4fd..25fdcd96 100644 --- a/msgpack/_unpacker.pyx +++ b/msgpack/_unpacker.pyx @@ -159,7 +159,7 @@ cdef inline int get_data_from_buffer(object obj, def unpackb(object packed, object object_hook=None, object list_hook=None, bint use_list=True, bint raw=True, - encoding=None, unicode_errors="strict", + encoding=None, unicode_errors=None, object_pairs_hook=None, ext_hook=ExtType, Py_ssize_t max_str_len=2147483647, # 2**32-1 Py_ssize_t max_bin_len=2147483647, @@ -193,7 +193,6 @@ def unpackb(object packed, object object_hook=None, object list_hook=None, cenc = PyBytes_AsString(encoding) if unicode_errors is not None: - PyErr_WarnEx(PendingDeprecationWarning, "unicode_errors is deprecated", 1) if isinstance(unicode_errors, unicode): unicode_errors = unicode_errors.encode('ascii') elif not isinstance(unicode_errors, bytes): @@ -304,8 +303,7 @@ cdef class Unpacker(object): If it is None (default), msgpack raw is deserialized to Python bytes. :param str unicode_errors: - Deprecated. Used for decoding msgpack raw with *encoding*. - (default: `'strict'`) + Error handler used for decoding str type. (default: `'strict'`) Example of streaming deserialize from file-like object:: @@ -347,7 +345,7 @@ cdef class Unpacker(object): def __init__(self, file_like=None, Py_ssize_t read_size=0, bint use_list=True, bint raw=True, object object_hook=None, object object_pairs_hook=None, object list_hook=None, - encoding=None, unicode_errors='strict', int max_buffer_size=0, + encoding=None, unicode_errors=None, int max_buffer_size=0, object ext_hook=ExtType, Py_ssize_t max_str_len=2147483647, # 2**32-1 Py_ssize_t max_bin_len=2147483647, @@ -394,7 +392,6 @@ cdef class Unpacker(object): cenc = PyBytes_AsString(self.encoding) if unicode_errors is not None: - PyErr_WarnEx(PendingDeprecationWarning, "unicode_errors is deprecated", 1) if isinstance(unicode_errors, unicode): self.unicode_errors = unicode_errors.encode('ascii') elif isinstance(unicode_errors, bytes): diff --git a/msgpack/unpack.h b/msgpack/unpack.h index d7b5e00a..63e5543b 100644 --- a/msgpack/unpack.h +++ b/msgpack/unpack.h @@ -232,7 +232,7 @@ static inline int unpack_callback_raw(unpack_user* u, const char* b, const char* } else if (u->raw) { py = PyBytes_FromStringAndSize(p, l); } else { - py = PyUnicode_DecodeUTF8(p, l, NULL); + py = PyUnicode_DecodeUTF8(p, l, u->unicode_errors); } if (!py) return -1; From 351023946fa92fc78bdd5247f64d3a62fa233e95 Mon Sep 17 00:00:00 2001 From: INADA Naoki Date: Mon, 5 Feb 2018 02:25:12 +0900 Subject: [PATCH 123/349] 0.5.4 --- ChangeLog.rst | 5 +++++ msgpack/_version.py | 2 +- 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/ChangeLog.rst b/ChangeLog.rst index 4a5c5994..67ee24cf 100644 --- a/ChangeLog.rst +++ b/ChangeLog.rst @@ -1,3 +1,8 @@ +0.5.4 +====== + +* Undeprecate ``unicode_errors`` option. (#278) + 0.5.3 ====== diff --git a/msgpack/_version.py b/msgpack/_version.py index 217bfcec..e7435d96 100644 --- a/msgpack/_version.py +++ b/msgpack/_version.py @@ -1 +1 @@ -version = (0, 5, 3) +version = (0, 5, 4) From 2644cbdcb7e1f2a8373de5bd30d71c907827caf6 Mon Sep 17 00:00:00 2001 From: INADA Naoki Date: Mon, 5 Feb 2018 11:44:17 +0900 Subject: [PATCH 124/349] Use cython's cast for converting encoding and errors (#279) It is little faster on Python 3 because we can skip temporary bytes object --- msgpack/_packer.pyx | 40 +++++++++++++++++--------------------- msgpack/_unpacker.pyx | 45 ++++++++++++++----------------------------- 2 files changed, 32 insertions(+), 53 deletions(-) diff --git a/msgpack/_packer.pyx b/msgpack/_packer.pyx index b987aee5..225f24ae 100644 --- a/msgpack/_packer.pyx +++ b/msgpack/_packer.pyx @@ -1,7 +1,8 @@ # coding: utf-8 -#cython: embedsignature=True +#cython: embedsignature=True, c_string_encoding=ascii from cpython cimport * +from cpython.version cimport PY_MAJOR_VERSION from cpython.exc cimport PyErr_WarnEx from msgpack.exceptions import PackValueError, PackOverflowError @@ -99,8 +100,8 @@ cdef class Packer(object): cdef object _default cdef object _bencoding cdef object _berrors - cdef char *encoding - cdef char *unicode_errors + cdef const char *encoding + cdef const char *unicode_errors cdef bint strict_types cdef bool use_float cdef bint autoreset @@ -126,26 +127,21 @@ cdef class Packer(object): if not PyCallable_Check(default): raise TypeError("default must be a callable.") self._default = default - if encoding is None and unicode_errors is None: - self.encoding = NULL - self.unicode_errors = NULL - else: - if encoding is None: + + self._bencoding = encoding + if encoding is None: + if PY_MAJOR_VERSION < 3: self.encoding = 'utf-8' else: - if isinstance(encoding, unicode): - self._bencoding = encoding.encode('ascii') - else: - self._bencoding = encoding - self.encoding = PyBytes_AsString(self._bencoding) - if unicode_errors is None: - self.unicode_errors = 'strict' - else: - if isinstance(unicode_errors, unicode): - self._berrors = unicode_errors.encode('ascii') - else: - self._berrors = unicode_errors - self.unicode_errors = PyBytes_AsString(self._berrors) + self.encoding = NULL + else: + self.encoding = self._bencoding + + self._berrors = unicode_errors + if unicode_errors is None: + self.unicode_errors = NULL + else: + self.unicode_errors = self._berrors def __dealloc__(self): PyMem_Free(self.pk.buf) @@ -212,7 +208,7 @@ cdef class Packer(object): if ret == 0: ret = msgpack_pack_raw_body(&self.pk, rawval, L) elif PyUnicode_CheckExact(o) if strict_types else PyUnicode_Check(o): - if self.encoding == NULL: + if self.encoding == NULL and self.unicode_errors == NULL: ret = msgpack_pack_unicode(&self.pk, o, ITEM_LIMIT); if ret == -2: raise PackValueError("unicode string is too large") diff --git a/msgpack/_unpacker.pyx b/msgpack/_unpacker.pyx index 25fdcd96..fb58490a 100644 --- a/msgpack/_unpacker.pyx +++ b/msgpack/_unpacker.pyx @@ -1,6 +1,7 @@ # coding: utf-8 -#cython: embedsignature=True +#cython: embedsignature=True, c_string_encoding=ascii +from cpython.version cimport PY_MAJOR_VERSION from cpython.bytes cimport ( PyBytes_AsString, PyBytes_FromStringAndSize, @@ -75,7 +76,7 @@ cdef inline init_ctx(unpack_context *ctx, object object_hook, object object_pairs_hook, object list_hook, object ext_hook, bint use_list, bint raw, - char* encoding, char* unicode_errors, + const char* encoding, const char* unicode_errors, Py_ssize_t max_str_len, Py_ssize_t max_bin_len, Py_ssize_t max_array_len, Py_ssize_t max_map_len, Py_ssize_t max_ext_len): @@ -180,24 +181,16 @@ def unpackb(object packed, object object_hook=None, object list_hook=None, cdef Py_buffer view cdef char* buf = NULL cdef Py_ssize_t buf_len - cdef char* cenc = NULL - cdef char* cerr = NULL + cdef const char* cenc = NULL + cdef const char* cerr = NULL cdef int new_protocol = 0 if encoding is not None: PyErr_WarnEx(PendingDeprecationWarning, "encoding is deprecated, Use raw=False instead.", 1) - if isinstance(encoding, unicode): - encoding = encoding.encode('ascii') - elif not isinstance(encoding, bytes): - raise TypeError("encoding should be bytes or unicode") - cenc = PyBytes_AsString(encoding) + cenc = encoding if unicode_errors is not None: - if isinstance(unicode_errors, unicode): - unicode_errors = unicode_errors.encode('ascii') - elif not isinstance(unicode_errors, bytes): - raise TypeError("unicode_errors should be bytes or unicode") - cerr = PyBytes_AsString(unicode_errors) + cerr = unicode_errors get_data_from_buffer(packed, &view, &buf, &buf_len, &new_protocol) try: @@ -219,7 +212,7 @@ def unpackb(object packed, object object_hook=None, object list_hook=None, def unpack(object stream, object object_hook=None, object list_hook=None, - bint use_list=1, encoding=None, unicode_errors="strict", + bint use_list=1, encoding=None, unicode_errors=None, object_pairs_hook=None, ext_hook=ExtType, Py_ssize_t max_str_len=2147483647, # 2**32-1 Py_ssize_t max_bin_len=2147483647, @@ -352,8 +345,8 @@ cdef class Unpacker(object): Py_ssize_t max_array_len=2147483647, Py_ssize_t max_map_len=2147483647, Py_ssize_t max_ext_len=2147483647): - cdef char *cenc=NULL, - cdef char *cerr=NULL + cdef const char *cenc=NULL, + cdef const char *cerr=NULL self.object_hook = object_hook self.object_pairs_hook = object_pairs_hook @@ -383,22 +376,12 @@ cdef class Unpacker(object): if encoding is not None: PyErr_WarnEx(PendingDeprecationWarning, "encoding is deprecated, Use raw=False instead.", 1) - if isinstance(encoding, unicode): - self.encoding = encoding.encode('ascii') - elif isinstance(encoding, bytes): - self.encoding = encoding - else: - raise TypeError("encoding should be bytes or unicode") - cenc = PyBytes_AsString(self.encoding) + self.encoding = encoding + cenc = encoding if unicode_errors is not None: - if isinstance(unicode_errors, unicode): - self.unicode_errors = unicode_errors.encode('ascii') - elif isinstance(unicode_errors, bytes): - self.unicode_errors = unicode_errors - else: - raise TypeError("unicode_errors should be bytes or unicode") - cerr = PyBytes_AsString(self.unicode_errors) + self.unicode_errors = unicode_errors + cerr = unicode_errors init_ctx(&self.ctx, object_hook, object_pairs_hook, list_hook, ext_hook, use_list, raw, cenc, cerr, From 4b72b6177321ef24b6c7af2354fb980db69e2aec Mon Sep 17 00:00:00 2001 From: INADA Naoki Date: Mon, 5 Feb 2018 15:08:19 +0900 Subject: [PATCH 125/349] Add Makefile target for updating docker image --- Makefile | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/Makefile b/Makefile index 4030080c..124f2437 100644 --- a/Makefile +++ b/Makefile @@ -23,6 +23,11 @@ clean: rm -rf msgpack/__pycache__ rm -rf test/__pycache__ +.PHONY: update-docker +update-docker: + docker pull quay.io/pypa/manylinux1_i686 + docker pull quay.io/pypa/manylinux1_x86_64 + .PHONY: linux-wheel linux-wheel: docker run --rm -ti -v `pwd`:/project -w /project quay.io/pypa/manylinux1_i686 bash docker/buildwheel.sh From ae8d4694829d5b58d613c588c30e29dd29860c4f Mon Sep 17 00:00:00 2001 From: INADA Naoki Date: Fri, 16 Feb 2018 16:35:22 +0900 Subject: [PATCH 126/349] Fix memory leak in pure Python Unpacker.feed() (#284) fixes #283 --- msgpack/fallback.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/msgpack/fallback.py b/msgpack/fallback.py index bf6c9a6f..7e406862 100644 --- a/msgpack/fallback.py +++ b/msgpack/fallback.py @@ -289,6 +289,8 @@ def feed(self, next_bytes): view = _get_data_from_buffer(next_bytes) if (len(self._buffer) - self._buff_i + len(view) > self._max_buffer_size): raise BufferFull + del self._buffer[:self._buff_i] + self._buff_i = 0 self._buffer += view def _consume(self): From da902f9c1d996fb461f1efef6487ef40d32d365a Mon Sep 17 00:00:00 2001 From: INADA Naoki Date: Thu, 22 Feb 2018 00:55:32 +0900 Subject: [PATCH 127/349] Move unpack() from each implementation to __init__. (#286) Fixes #285 --- msgpack/__init__.py | 17 ++++++++++++++--- msgpack/_unpacker.pyx | 26 -------------------------- msgpack/fallback.py | 10 ---------- 3 files changed, 14 insertions(+), 39 deletions(-) diff --git a/msgpack/__init__.py b/msgpack/__init__.py index 6c5ae532..70de97f2 100644 --- a/msgpack/__init__.py +++ b/msgpack/__init__.py @@ -19,13 +19,13 @@ def __new__(cls, code, data): import os if os.environ.get('MSGPACK_PUREPYTHON'): - from msgpack.fallback import Packer, unpack, unpackb, Unpacker + from msgpack.fallback import Packer, unpackb, Unpacker else: try: from msgpack._packer import Packer - from msgpack._unpacker import unpack, unpackb, Unpacker + from msgpack._unpacker import unpackb, Unpacker except ImportError: - from msgpack.fallback import Packer, unpack, unpackb, Unpacker + from msgpack.fallback import Packer, unpackb, Unpacker def pack(o, stream, **kwargs): @@ -46,6 +46,17 @@ def packb(o, **kwargs): """ return Packer(**kwargs).pack(o) + +def unpack(stream, **kwargs): + """ + Unpack an object from `stream`. + + Raises `ExtraData` when `packed` contains extra bytes. + See :class:`Unpacker` for options. + """ + return unpackb(stream.read(), **kwargs) + + # alias for compatibility to simplejson/marshal/pickle. load = unpack loads = unpackb diff --git a/msgpack/_unpacker.pyx b/msgpack/_unpacker.pyx index fb58490a..25a74018 100644 --- a/msgpack/_unpacker.pyx +++ b/msgpack/_unpacker.pyx @@ -211,32 +211,6 @@ def unpackb(object packed, object object_hook=None, object list_hook=None, raise UnpackValueError("Unpack failed: error = %d" % (ret,)) -def unpack(object stream, object object_hook=None, object list_hook=None, - bint use_list=1, encoding=None, unicode_errors=None, - object_pairs_hook=None, ext_hook=ExtType, - Py_ssize_t max_str_len=2147483647, # 2**32-1 - Py_ssize_t max_bin_len=2147483647, - Py_ssize_t max_array_len=2147483647, - Py_ssize_t max_map_len=2147483647, - Py_ssize_t max_ext_len=2147483647): - """ - Unpack an object from `stream`. - - Raises `ValueError` when `stream` has extra bytes. - - See :class:`Unpacker` for options. - """ - return unpackb(stream.read(), use_list=use_list, - object_hook=object_hook, object_pairs_hook=object_pairs_hook, list_hook=list_hook, - encoding=encoding, unicode_errors=unicode_errors, ext_hook=ext_hook, - max_str_len=max_str_len, - max_bin_len=max_bin_len, - max_array_len=max_array_len, - max_map_len=max_map_len, - max_ext_len=max_ext_len, - ) - - cdef class Unpacker(object): """Streaming unpacker. diff --git a/msgpack/fallback.py b/msgpack/fallback.py index 7e406862..3609fd87 100644 --- a/msgpack/fallback.py +++ b/msgpack/fallback.py @@ -100,16 +100,6 @@ def _get_data_from_buffer(obj): return view -def unpack(stream, **kwargs): - """ - Unpack an object from `stream`. - - Raises `ExtraData` when `packed` contains extra bytes. - See :class:`Unpacker` for options. - """ - data = stream.read() - return unpackb(data, **kwargs) - def unpackb(packed, **kwargs): """ From 02c881c7cb402b37418a9bd9a3fa56daf673a71b Mon Sep 17 00:00:00 2001 From: INADA Naoki Date: Thu, 22 Feb 2018 17:55:39 +0900 Subject: [PATCH 128/349] 0.5.5 --- ChangeLog.rst | 6 ++++++ msgpack/_version.py | 2 +- 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/ChangeLog.rst b/ChangeLog.rst index 67ee24cf..263d1f34 100644 --- a/ChangeLog.rst +++ b/ChangeLog.rst @@ -1,3 +1,9 @@ +0.5.5 +====== + +* Fix memory leak in pure Python Unpacker.feed() (#283) +* Fix unpack() didn't support `raw` option (#285) + 0.5.4 ====== diff --git a/msgpack/_version.py b/msgpack/_version.py index e7435d96..4035588c 100644 --- a/msgpack/_version.py +++ b/msgpack/_version.py @@ -1 +1 @@ -version = (0, 5, 4) +version = (0, 5, 5) From 9bf38105f7dfd7e9885d8faee81c8bd188b4de4d Mon Sep 17 00:00:00 2001 From: INADA Naoki Date: Fri, 23 Feb 2018 11:32:26 +0900 Subject: [PATCH 129/349] Revert "0.5.5" This reverts commit 02c881c7cb402b37418a9bd9a3fa56daf673a71b. From 9455fccc5283abe59868c55ee3f4cedd5bf2d14b Mon Sep 17 00:00:00 2001 From: INADA Naoki Date: Fri, 23 Feb 2018 11:32:26 +0900 Subject: [PATCH 130/349] Revert "Move unpack() from each implementation to __init__. (#286)" This reverts commit da902f9c1d996fb461f1efef6487ef40d32d365a. --- msgpack/__init__.py | 17 +++-------------- msgpack/_unpacker.pyx | 26 ++++++++++++++++++++++++++ msgpack/fallback.py | 10 ++++++++++ 3 files changed, 39 insertions(+), 14 deletions(-) diff --git a/msgpack/__init__.py b/msgpack/__init__.py index 70de97f2..6c5ae532 100644 --- a/msgpack/__init__.py +++ b/msgpack/__init__.py @@ -19,13 +19,13 @@ def __new__(cls, code, data): import os if os.environ.get('MSGPACK_PUREPYTHON'): - from msgpack.fallback import Packer, unpackb, Unpacker + from msgpack.fallback import Packer, unpack, unpackb, Unpacker else: try: from msgpack._packer import Packer - from msgpack._unpacker import unpackb, Unpacker + from msgpack._unpacker import unpack, unpackb, Unpacker except ImportError: - from msgpack.fallback import Packer, unpackb, Unpacker + from msgpack.fallback import Packer, unpack, unpackb, Unpacker def pack(o, stream, **kwargs): @@ -46,17 +46,6 @@ def packb(o, **kwargs): """ return Packer(**kwargs).pack(o) - -def unpack(stream, **kwargs): - """ - Unpack an object from `stream`. - - Raises `ExtraData` when `packed` contains extra bytes. - See :class:`Unpacker` for options. - """ - return unpackb(stream.read(), **kwargs) - - # alias for compatibility to simplejson/marshal/pickle. load = unpack loads = unpackb diff --git a/msgpack/_unpacker.pyx b/msgpack/_unpacker.pyx index 25a74018..fb58490a 100644 --- a/msgpack/_unpacker.pyx +++ b/msgpack/_unpacker.pyx @@ -211,6 +211,32 @@ def unpackb(object packed, object object_hook=None, object list_hook=None, raise UnpackValueError("Unpack failed: error = %d" % (ret,)) +def unpack(object stream, object object_hook=None, object list_hook=None, + bint use_list=1, encoding=None, unicode_errors=None, + object_pairs_hook=None, ext_hook=ExtType, + Py_ssize_t max_str_len=2147483647, # 2**32-1 + Py_ssize_t max_bin_len=2147483647, + Py_ssize_t max_array_len=2147483647, + Py_ssize_t max_map_len=2147483647, + Py_ssize_t max_ext_len=2147483647): + """ + Unpack an object from `stream`. + + Raises `ValueError` when `stream` has extra bytes. + + See :class:`Unpacker` for options. + """ + return unpackb(stream.read(), use_list=use_list, + object_hook=object_hook, object_pairs_hook=object_pairs_hook, list_hook=list_hook, + encoding=encoding, unicode_errors=unicode_errors, ext_hook=ext_hook, + max_str_len=max_str_len, + max_bin_len=max_bin_len, + max_array_len=max_array_len, + max_map_len=max_map_len, + max_ext_len=max_ext_len, + ) + + cdef class Unpacker(object): """Streaming unpacker. diff --git a/msgpack/fallback.py b/msgpack/fallback.py index 3609fd87..7e406862 100644 --- a/msgpack/fallback.py +++ b/msgpack/fallback.py @@ -100,6 +100,16 @@ def _get_data_from_buffer(obj): return view +def unpack(stream, **kwargs): + """ + Unpack an object from `stream`. + + Raises `ExtraData` when `packed` contains extra bytes. + See :class:`Unpacker` for options. + """ + data = stream.read() + return unpackb(data, **kwargs) + def unpackb(packed, **kwargs): """ From fbaa1360be67672f855dd6337e4137f2edf7ade6 Mon Sep 17 00:00:00 2001 From: INADA Naoki Date: Fri, 23 Feb 2018 11:35:09 +0900 Subject: [PATCH 131/349] Fix #285 again --- msgpack/_unpacker.pyx | 21 +++------------------ 1 file changed, 3 insertions(+), 18 deletions(-) diff --git a/msgpack/_unpacker.pyx b/msgpack/_unpacker.pyx index fb58490a..3843e925 100644 --- a/msgpack/_unpacker.pyx +++ b/msgpack/_unpacker.pyx @@ -211,30 +211,15 @@ def unpackb(object packed, object object_hook=None, object list_hook=None, raise UnpackValueError("Unpack failed: error = %d" % (ret,)) -def unpack(object stream, object object_hook=None, object list_hook=None, - bint use_list=1, encoding=None, unicode_errors=None, - object_pairs_hook=None, ext_hook=ExtType, - Py_ssize_t max_str_len=2147483647, # 2**32-1 - Py_ssize_t max_bin_len=2147483647, - Py_ssize_t max_array_len=2147483647, - Py_ssize_t max_map_len=2147483647, - Py_ssize_t max_ext_len=2147483647): +def unpack(object stream, **kwargs): """ Unpack an object from `stream`. Raises `ValueError` when `stream` has extra bytes. - See :class:`Unpacker` for options. """ - return unpackb(stream.read(), use_list=use_list, - object_hook=object_hook, object_pairs_hook=object_pairs_hook, list_hook=list_hook, - encoding=encoding, unicode_errors=unicode_errors, ext_hook=ext_hook, - max_str_len=max_str_len, - max_bin_len=max_bin_len, - max_array_len=max_array_len, - max_map_len=max_map_len, - max_ext_len=max_ext_len, - ) + data = stream.read() + return unpackb(data, **kwargs) cdef class Unpacker(object): From f38c1a3674b7623080cb774b56ede21383bde04a Mon Sep 17 00:00:00 2001 From: INADA Naoki Date: Fri, 23 Feb 2018 11:52:48 +0900 Subject: [PATCH 132/349] Fix Unpacker.feed() drops unused data in buffer. (#289) Fixes #287 --- msgpack/fallback.py | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/msgpack/fallback.py b/msgpack/fallback.py index 7e406862..e9108d23 100644 --- a/msgpack/fallback.py +++ b/msgpack/fallback.py @@ -289,8 +289,13 @@ def feed(self, next_bytes): view = _get_data_from_buffer(next_bytes) if (len(self._buffer) - self._buff_i + len(view) > self._max_buffer_size): raise BufferFull - del self._buffer[:self._buff_i] - self._buff_i = 0 + + # Strip buffer before checkpoint before reading file. + if self._buf_checkpoint > 0: + del self._buffer[:self._buf_checkpoint] + self._buff_i -= self._buf_checkpoint + self._buf_checkpoint = 0 + self._buffer += view def _consume(self): From ae3a6ba0b04487d7108360d1d504332aed079556 Mon Sep 17 00:00:00 2001 From: INADA Naoki Date: Fri, 23 Feb 2018 15:41:21 +0900 Subject: [PATCH 133/349] Deprecate implementation module's unpack() (#290) --- msgpack/__init__.py | 18 +++++++++++++++--- msgpack/_unpacker.pyx | 9 +++------ msgpack/fallback.py | 21 +++++---------------- 3 files changed, 23 insertions(+), 25 deletions(-) diff --git a/msgpack/__init__.py b/msgpack/__init__.py index 6c5ae532..3955a414 100644 --- a/msgpack/__init__.py +++ b/msgpack/__init__.py @@ -19,13 +19,13 @@ def __new__(cls, code, data): import os if os.environ.get('MSGPACK_PUREPYTHON'): - from msgpack.fallback import Packer, unpack, unpackb, Unpacker + from msgpack.fallback import Packer, unpackb, Unpacker else: try: from msgpack._packer import Packer - from msgpack._unpacker import unpack, unpackb, Unpacker + from msgpack._unpacker import unpackb, Unpacker except ImportError: - from msgpack.fallback import Packer, unpack, unpackb, Unpacker + from msgpack.fallback import Packer, unpackb, Unpacker def pack(o, stream, **kwargs): @@ -46,6 +46,18 @@ def packb(o, **kwargs): """ return Packer(**kwargs).pack(o) + +def unpack(stream, **kwargs): + """ + Unpack an object from `stream`. + + Raises `ExtraData` when `stream` contains extra bytes. + See :class:`Unpacker` for options. + """ + data = stream.read() + return unpackb(data, **kwargs) + + # alias for compatibility to simplejson/marshal/pickle. load = unpack loads = unpackb diff --git a/msgpack/_unpacker.pyx b/msgpack/_unpacker.pyx index 3843e925..d7fa5bc6 100644 --- a/msgpack/_unpacker.pyx +++ b/msgpack/_unpacker.pyx @@ -212,12 +212,9 @@ def unpackb(object packed, object object_hook=None, object list_hook=None, def unpack(object stream, **kwargs): - """ - Unpack an object from `stream`. - - Raises `ValueError` when `stream` has extra bytes. - See :class:`Unpacker` for options. - """ + PyErr_WarnEx( + PendingDeprecationWarning, + "Direct calling implementation's unpack() is deprecated, Use msgpack.unpack() or unpackb() instead.", 1) data = stream.read() return unpackb(data, **kwargs) diff --git a/msgpack/fallback.py b/msgpack/fallback.py index e9108d23..c0e5fd66 100644 --- a/msgpack/fallback.py +++ b/msgpack/fallback.py @@ -101,12 +101,9 @@ def _get_data_from_buffer(obj): def unpack(stream, **kwargs): - """ - Unpack an object from `stream`. - - Raises `ExtraData` when `packed` contains extra bytes. - See :class:`Unpacker` for options. - """ + warnings.warn( + "Direct calling implementation's unpack() is deprecated, Use msgpack.unpack() or unpackb() instead.", + PendingDeprecationWarning) data = stream.read() return unpackb(data, **kwargs) @@ -224,11 +221,7 @@ def __init__(self, file_like=None, read_size=0, use_list=True, raw=True, "encoding is deprecated, Use raw=False instead.", PendingDeprecationWarning) - if unicode_errors is not None: - warnings.warn( - "unicode_errors is deprecated.", - PendingDeprecationWarning) - else: + if unicode_errors is None: unicode_errors = 'strict' if file_like is None: @@ -713,7 +706,7 @@ class Packer(object): (deprecated) Convert unicode to bytes with this encoding. (default: 'utf-8') :param str unicode_errors: - (deprecated) Error handler for encoding unicode. (default: 'strict') + Error handler for encoding unicode. (default: 'strict') """ def __init__(self, default=None, encoding=None, unicode_errors=None, use_single_float=False, autoreset=True, use_bin_type=False, @@ -727,10 +720,6 @@ def __init__(self, default=None, encoding=None, unicode_errors=None, if unicode_errors is None: unicode_errors = 'strict' - else: - warnings.warn( - "unicode_errors is deprecated.", - PendingDeprecationWarning) self._strict_types = strict_types self._use_float = use_single_float From d4675bee6c83d42cecda9a84e6716021eb2ad679 Mon Sep 17 00:00:00 2001 From: INADA Naoki Date: Fri, 23 Feb 2018 15:45:34 +0900 Subject: [PATCH 134/349] 0.5.6 --- ChangeLog.rst | 7 +++++++ msgpack/_version.py | 2 +- 2 files changed, 8 insertions(+), 1 deletion(-) diff --git a/ChangeLog.rst b/ChangeLog.rst index 263d1f34..b6158c33 100644 --- a/ChangeLog.rst +++ b/ChangeLog.rst @@ -1,3 +1,10 @@ +0.5.6 +====== + +* Fix fallback.Unpacker.feed() dropped unused data from buffer (#287) +* Resurrect fallback.unpack() and _unpacker.unpack(). + They were removed at 0.5.5 but it breaks backward compatibility. (#288, #290) + 0.5.5 ====== diff --git a/msgpack/_version.py b/msgpack/_version.py index 4035588c..d28f0deb 100644 --- a/msgpack/_version.py +++ b/msgpack/_version.py @@ -1 +1 @@ -version = (0, 5, 5) +version = (0, 5, 6) From 984116bd1805c52fb4e7c07bfd9635ed902850c3 Mon Sep 17 00:00:00 2001 From: INADA Naoki Date: Fri, 13 Apr 2018 23:41:01 +0900 Subject: [PATCH 135/349] Update setup() --- setup.py | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/setup.py b/setup.py index 61085874..c252d816 100755 --- a/setup.py +++ b/setup.py @@ -120,7 +120,13 @@ def __init__(self, *args, **kwargs): packages=['msgpack'], description=desc, long_description=long_desc, - url='http://msgpack.org/', + long_description_content_type="text/x-rst", + url='https://msgpack.org/', + project_urls = { + 'Documentation': 'https://msgpack-python.readthedocs.io/', + 'Source': 'https://github.com/msgpack/msgpack-python', + 'Tracker': 'https://github.com/msgpack/msgpack-python/issues', + }, license='Apache 2.0', classifiers=[ 'Programming Language :: Python :: 2', From b10cf78f54a5daab866b19c32e45e207d838f52b Mon Sep 17 00:00:00 2001 From: Alex Gaynor Date: Sun, 15 Apr 2018 23:18:35 -0400 Subject: [PATCH 136/349] Fix TypeError in fallback.unpack() on H", self._buffer, self._buff_i)[0] + n = struct.unpack_from(">H", self._buffer_view, self._buff_i)[0] self._buff_i += 2 if n > self._max_bin_len: raise UnpackValueError("%s exceeds max_bin_len(%s)" % (n, self._max_bin_len)) @@ -396,7 +402,7 @@ def _read_header(self, execute=EX_CONSTRUCT): elif b == 0xc6: typ = TYPE_BIN self._reserve(4) - n = struct.unpack_from(">I", self._buffer, self._buff_i)[0] + n = struct.unpack_from(">I", self._buffer_view, self._buff_i)[0] self._buff_i += 4 if n > self._max_bin_len: raise UnpackValueError("%s exceeds max_bin_len(%s)" % (n, self._max_bin_len)) @@ -404,7 +410,7 @@ def _read_header(self, execute=EX_CONSTRUCT): elif b == 0xc7: # ext 8 typ = TYPE_EXT self._reserve(2) - L, n = struct.unpack_from('Bb', self._buffer, self._buff_i) + L, n = struct.unpack_from('Bb', self._buffer_view, self._buff_i) self._buff_i += 2 if L > self._max_ext_len: raise UnpackValueError("%s exceeds max_ext_len(%s)" % (L, self._max_ext_len)) @@ -412,7 +418,7 @@ def _read_header(self, execute=EX_CONSTRUCT): elif b == 0xc8: # ext 16 typ = TYPE_EXT self._reserve(3) - L, n = struct.unpack_from('>Hb', self._buffer, self._buff_i) + L, n = struct.unpack_from('>Hb', self._buffer_view, self._buff_i) self._buff_i += 3 if L > self._max_ext_len: raise UnpackValueError("%s exceeds max_ext_len(%s)" % (L, self._max_ext_len)) @@ -420,18 +426,18 @@ def _read_header(self, execute=EX_CONSTRUCT): elif b == 0xc9: # ext 32 typ = TYPE_EXT self._reserve(5) - L, n = struct.unpack_from('>Ib', self._buffer, self._buff_i) + L, n = struct.unpack_from('>Ib', self._buffer_view, self._buff_i) self._buff_i += 5 if L > self._max_ext_len: raise UnpackValueError("%s exceeds max_ext_len(%s)" % (L, self._max_ext_len)) obj = self._read(L) elif b == 0xca: self._reserve(4) - obj = struct.unpack_from(">f", self._buffer, self._buff_i)[0] + obj = struct.unpack_from(">f", self._buffer_view, self._buff_i)[0] self._buff_i += 4 elif b == 0xcb: self._reserve(8) - obj = struct.unpack_from(">d", self._buffer, self._buff_i)[0] + obj = struct.unpack_from(">d", self._buffer_view, self._buff_i)[0] self._buff_i += 8 elif b == 0xcc: self._reserve(1) @@ -439,66 +445,66 @@ def _read_header(self, execute=EX_CONSTRUCT): self._buff_i += 1 elif b == 0xcd: self._reserve(2) - obj = struct.unpack_from(">H", self._buffer, self._buff_i)[0] + obj = struct.unpack_from(">H", self._buffer_view, self._buff_i)[0] self._buff_i += 2 elif b == 0xce: self._reserve(4) - obj = struct.unpack_from(">I", self._buffer, self._buff_i)[0] + obj = struct.unpack_from(">I", self._buffer_view, self._buff_i)[0] self._buff_i += 4 elif b == 0xcf: self._reserve(8) - obj = struct.unpack_from(">Q", self._buffer, self._buff_i)[0] + obj = struct.unpack_from(">Q", self._buffer_view, self._buff_i)[0] self._buff_i += 8 elif b == 0xd0: self._reserve(1) - obj = struct.unpack_from("b", self._buffer, self._buff_i)[0] + obj = struct.unpack_from("b", self._buffer_view, self._buff_i)[0] self._buff_i += 1 elif b == 0xd1: self._reserve(2) - obj = struct.unpack_from(">h", self._buffer, self._buff_i)[0] + obj = struct.unpack_from(">h", self._buffer_view, self._buff_i)[0] self._buff_i += 2 elif b == 0xd2: self._reserve(4) - obj = struct.unpack_from(">i", self._buffer, self._buff_i)[0] + obj = struct.unpack_from(">i", self._buffer_view, self._buff_i)[0] self._buff_i += 4 elif b == 0xd3: self._reserve(8) - obj = struct.unpack_from(">q", self._buffer, self._buff_i)[0] + obj = struct.unpack_from(">q", self._buffer_view, self._buff_i)[0] self._buff_i += 8 elif b == 0xd4: # fixext 1 typ = TYPE_EXT if self._max_ext_len < 1: raise UnpackValueError("%s exceeds max_ext_len(%s)" % (1, self._max_ext_len)) self._reserve(2) - n, obj = struct.unpack_from("b1s", self._buffer, self._buff_i) + n, obj = struct.unpack_from("b1s", self._buffer_view, self._buff_i) self._buff_i += 2 elif b == 0xd5: # fixext 2 typ = TYPE_EXT if self._max_ext_len < 2: raise UnpackValueError("%s exceeds max_ext_len(%s)" % (2, self._max_ext_len)) self._reserve(3) - n, obj = struct.unpack_from("b2s", self._buffer, self._buff_i) + n, obj = struct.unpack_from("b2s", self._buffer_view, self._buff_i) self._buff_i += 3 elif b == 0xd6: # fixext 4 typ = TYPE_EXT if self._max_ext_len < 4: raise UnpackValueError("%s exceeds max_ext_len(%s)" % (4, self._max_ext_len)) self._reserve(5) - n, obj = struct.unpack_from("b4s", self._buffer, self._buff_i) + n, obj = struct.unpack_from("b4s", self._buffer_view, self._buff_i) self._buff_i += 5 elif b == 0xd7: # fixext 8 typ = TYPE_EXT if self._max_ext_len < 8: raise UnpackValueError("%s exceeds max_ext_len(%s)" % (8, self._max_ext_len)) self._reserve(9) - n, obj = struct.unpack_from("b8s", self._buffer, self._buff_i) + n, obj = struct.unpack_from("b8s", self._buffer_view, self._buff_i) self._buff_i += 9 elif b == 0xd8: # fixext 16 typ = TYPE_EXT if self._max_ext_len < 16: raise UnpackValueError("%s exceeds max_ext_len(%s)" % (16, self._max_ext_len)) self._reserve(17) - n, obj = struct.unpack_from("b16s", self._buffer, self._buff_i) + n, obj = struct.unpack_from("b16s", self._buffer_view, self._buff_i) self._buff_i += 17 elif b == 0xd9: typ = TYPE_RAW @@ -511,7 +517,7 @@ def _read_header(self, execute=EX_CONSTRUCT): elif b == 0xda: typ = TYPE_RAW self._reserve(2) - n, = struct.unpack_from(">H", self._buffer, self._buff_i) + n, = struct.unpack_from(">H", self._buffer_view, self._buff_i) self._buff_i += 2 if n > self._max_str_len: raise UnpackValueError("%s exceeds max_str_len(%s)", n, self._max_str_len) @@ -519,7 +525,7 @@ def _read_header(self, execute=EX_CONSTRUCT): elif b == 0xdb: typ = TYPE_RAW self._reserve(4) - n, = struct.unpack_from(">I", self._buffer, self._buff_i) + n, = struct.unpack_from(">I", self._buffer_view, self._buff_i) self._buff_i += 4 if n > self._max_str_len: raise UnpackValueError("%s exceeds max_str_len(%s)", n, self._max_str_len) @@ -527,27 +533,27 @@ def _read_header(self, execute=EX_CONSTRUCT): elif b == 0xdc: typ = TYPE_ARRAY self._reserve(2) - n, = struct.unpack_from(">H", self._buffer, self._buff_i) + n, = struct.unpack_from(">H", self._buffer_view, self._buff_i) self._buff_i += 2 if n > self._max_array_len: raise UnpackValueError("%s exceeds max_array_len(%s)", n, self._max_array_len) elif b == 0xdd: typ = TYPE_ARRAY self._reserve(4) - n, = struct.unpack_from(">I", self._buffer, self._buff_i) + n, = struct.unpack_from(">I", self._buffer_view, self._buff_i) self._buff_i += 4 if n > self._max_array_len: raise UnpackValueError("%s exceeds max_array_len(%s)", n, self._max_array_len) elif b == 0xde: self._reserve(2) - n, = struct.unpack_from(">H", self._buffer, self._buff_i) + n, = struct.unpack_from(">H", self._buffer_view, self._buff_i) self._buff_i += 2 if n > self._max_map_len: raise UnpackValueError("%s exceeds max_map_len(%s)", n, self._max_map_len) typ = TYPE_MAP elif b == 0xdf: self._reserve(4) - n, = struct.unpack_from(">I", self._buffer, self._buff_i) + n, = struct.unpack_from(">I", self._buffer_view, self._buff_i) self._buff_i += 4 if n > self._max_map_len: raise UnpackValueError("%s exceeds max_map_len(%s)", n, self._max_map_len) From 5f684aed82d0d08079b9aa74e1d41cc2a367833d Mon Sep 17 00:00:00 2001 From: Inada Naoki Date: Wed, 27 Jun 2018 01:27:31 +0900 Subject: [PATCH 137/349] fallback: Fix error on Jython Fixes #303 --- msgpack/fallback.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/msgpack/fallback.py b/msgpack/fallback.py index c8c8c78a..20ad4c90 100644 --- a/msgpack/fallback.py +++ b/msgpack/fallback.py @@ -100,6 +100,13 @@ def _get_data_from_buffer(obj): return view +# Jython's memoryview support is incomplete +# See https://github.com/msgpack/msgpack-python/issues/303 +_is_jython = sys.platform.startswith('java') +if _is_jython: + _get_data_from_buffer = bytes + + def unpack(stream, **kwargs): warnings.warn( "Direct calling implementation's unpack() is deprecated, Use msgpack.unpack() or unpackb() instead.", From d1060de29309717355b78fe9092f7995897b4f0c Mon Sep 17 00:00:00 2001 From: INADA Naoki Date: Fri, 13 Jul 2018 19:54:44 +0900 Subject: [PATCH 138/349] travis: Run test on Python 3.4 (#307) Python 3.4 is not supported officially. But keep running test for a while, to know when msgpack-python stop working on Python 3.4 actually. --- .travis.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.travis.yml b/.travis.yml index 7aac6648..823c8b14 100644 --- a/.travis.yml +++ b/.travis.yml @@ -4,6 +4,7 @@ cache: pip python: - "2.7" + - "3.4" - "3.5" - "3.6" - "3.7-dev" From 70b5f21b34b28cd50125ee337b5a399001759b73 Mon Sep 17 00:00:00 2001 From: Raymond E Ferguson Date: Tue, 2 Oct 2018 11:20:06 +0000 Subject: [PATCH 139/349] Alternate fixes for jython and legacy CPython (#310) Python 3.4 is not supported officially. But keep running test for a while, to know when msgpack-python stop working on Python 3.4 actually. The current patches did not work under jython-2.7.1 where implicit casting of buffer or memoryview doesn't work. It may also be the jython is a little pickier about string casting non string bytes due to the underlying strong typing of java. See issues #303 & #304. --- msgpack/fallback.py | 74 +++++++++++++++++++++------------------------ 1 file changed, 35 insertions(+), 39 deletions(-) diff --git a/msgpack/fallback.py b/msgpack/fallback.py index 20ad4c90..0b60ba2a 100644 --- a/msgpack/fallback.py +++ b/msgpack/fallback.py @@ -100,13 +100,6 @@ def _get_data_from_buffer(obj): return view -# Jython's memoryview support is incomplete -# See https://github.com/msgpack/msgpack-python/issues/303 -_is_jython = sys.platform.startswith('java') -if _is_jython: - _get_data_from_buffer = bytes - - def unpack(stream, **kwargs): warnings.warn( "Direct calling implementation's unpack() is deprecated, Use msgpack.unpack() or unpackb() instead.", @@ -133,6 +126,14 @@ def unpackb(packed, **kwargs): return ret +if sys.version_info < (2, 7, 6): + def _unpack_from(f, b, o=0): + """Explicit typcast for legacy struct.unpack_from""" + return struct.unpack_from(f, bytes(b), o) +else: + _unpack_from = struct.unpack_from + + class Unpacker(object): """Streaming unpacker. @@ -241,12 +242,6 @@ def __init__(self, file_like=None, read_size=0, use_list=True, raw=True, #: array of bytes fed. self._buffer = bytearray() - # Some very old pythons don't support `struct.unpack_from()` with a - # `bytearray`. So we wrap it in a `buffer()` there. - if sys.version_info < (2, 7, 6): - self._buffer_view = buffer(self._buffer) - else: - self._buffer_view = self._buffer #: Which position we currently reads self._buff_i = 0 @@ -302,7 +297,8 @@ def feed(self, next_bytes): self._buff_i -= self._buf_checkpoint self._buf_checkpoint = 0 - self._buffer += view + # Use extend here: INPLACE_ADD += doesn't reliably typecast memoryview in jython + self._buffer.extend(view) def _consume(self): """ Gets rid of the used parts of the buffer. """ @@ -401,7 +397,7 @@ def _read_header(self, execute=EX_CONSTRUCT): elif b == 0xc5: typ = TYPE_BIN self._reserve(2) - n = struct.unpack_from(">H", self._buffer_view, self._buff_i)[0] + n = _unpack_from(">H", self._buffer, self._buff_i)[0] self._buff_i += 2 if n > self._max_bin_len: raise UnpackValueError("%s exceeds max_bin_len(%s)" % (n, self._max_bin_len)) @@ -409,7 +405,7 @@ def _read_header(self, execute=EX_CONSTRUCT): elif b == 0xc6: typ = TYPE_BIN self._reserve(4) - n = struct.unpack_from(">I", self._buffer_view, self._buff_i)[0] + n = _unpack_from(">I", self._buffer, self._buff_i)[0] self._buff_i += 4 if n > self._max_bin_len: raise UnpackValueError("%s exceeds max_bin_len(%s)" % (n, self._max_bin_len)) @@ -417,7 +413,7 @@ def _read_header(self, execute=EX_CONSTRUCT): elif b == 0xc7: # ext 8 typ = TYPE_EXT self._reserve(2) - L, n = struct.unpack_from('Bb', self._buffer_view, self._buff_i) + L, n = _unpack_from('Bb', self._buffer, self._buff_i) self._buff_i += 2 if L > self._max_ext_len: raise UnpackValueError("%s exceeds max_ext_len(%s)" % (L, self._max_ext_len)) @@ -425,7 +421,7 @@ def _read_header(self, execute=EX_CONSTRUCT): elif b == 0xc8: # ext 16 typ = TYPE_EXT self._reserve(3) - L, n = struct.unpack_from('>Hb', self._buffer_view, self._buff_i) + L, n = _unpack_from('>Hb', self._buffer, self._buff_i) self._buff_i += 3 if L > self._max_ext_len: raise UnpackValueError("%s exceeds max_ext_len(%s)" % (L, self._max_ext_len)) @@ -433,18 +429,18 @@ def _read_header(self, execute=EX_CONSTRUCT): elif b == 0xc9: # ext 32 typ = TYPE_EXT self._reserve(5) - L, n = struct.unpack_from('>Ib', self._buffer_view, self._buff_i) + L, n = _unpack_from('>Ib', self._buffer, self._buff_i) self._buff_i += 5 if L > self._max_ext_len: raise UnpackValueError("%s exceeds max_ext_len(%s)" % (L, self._max_ext_len)) obj = self._read(L) elif b == 0xca: self._reserve(4) - obj = struct.unpack_from(">f", self._buffer_view, self._buff_i)[0] + obj = _unpack_from(">f", self._buffer, self._buff_i)[0] self._buff_i += 4 elif b == 0xcb: self._reserve(8) - obj = struct.unpack_from(">d", self._buffer_view, self._buff_i)[0] + obj = _unpack_from(">d", self._buffer, self._buff_i)[0] self._buff_i += 8 elif b == 0xcc: self._reserve(1) @@ -452,66 +448,66 @@ def _read_header(self, execute=EX_CONSTRUCT): self._buff_i += 1 elif b == 0xcd: self._reserve(2) - obj = struct.unpack_from(">H", self._buffer_view, self._buff_i)[0] + obj = _unpack_from(">H", self._buffer, self._buff_i)[0] self._buff_i += 2 elif b == 0xce: self._reserve(4) - obj = struct.unpack_from(">I", self._buffer_view, self._buff_i)[0] + obj = _unpack_from(">I", self._buffer, self._buff_i)[0] self._buff_i += 4 elif b == 0xcf: self._reserve(8) - obj = struct.unpack_from(">Q", self._buffer_view, self._buff_i)[0] + obj = _unpack_from(">Q", self._buffer, self._buff_i)[0] self._buff_i += 8 elif b == 0xd0: self._reserve(1) - obj = struct.unpack_from("b", self._buffer_view, self._buff_i)[0] + obj = _unpack_from("b", self._buffer, self._buff_i)[0] self._buff_i += 1 elif b == 0xd1: self._reserve(2) - obj = struct.unpack_from(">h", self._buffer_view, self._buff_i)[0] + obj = _unpack_from(">h", self._buffer, self._buff_i)[0] self._buff_i += 2 elif b == 0xd2: self._reserve(4) - obj = struct.unpack_from(">i", self._buffer_view, self._buff_i)[0] + obj = _unpack_from(">i", self._buffer, self._buff_i)[0] self._buff_i += 4 elif b == 0xd3: self._reserve(8) - obj = struct.unpack_from(">q", self._buffer_view, self._buff_i)[0] + obj = _unpack_from(">q", self._buffer, self._buff_i)[0] self._buff_i += 8 elif b == 0xd4: # fixext 1 typ = TYPE_EXT if self._max_ext_len < 1: raise UnpackValueError("%s exceeds max_ext_len(%s)" % (1, self._max_ext_len)) self._reserve(2) - n, obj = struct.unpack_from("b1s", self._buffer_view, self._buff_i) + n, obj = _unpack_from("b1s", self._buffer, self._buff_i) self._buff_i += 2 elif b == 0xd5: # fixext 2 typ = TYPE_EXT if self._max_ext_len < 2: raise UnpackValueError("%s exceeds max_ext_len(%s)" % (2, self._max_ext_len)) self._reserve(3) - n, obj = struct.unpack_from("b2s", self._buffer_view, self._buff_i) + n, obj = _unpack_from("b2s", self._buffer, self._buff_i) self._buff_i += 3 elif b == 0xd6: # fixext 4 typ = TYPE_EXT if self._max_ext_len < 4: raise UnpackValueError("%s exceeds max_ext_len(%s)" % (4, self._max_ext_len)) self._reserve(5) - n, obj = struct.unpack_from("b4s", self._buffer_view, self._buff_i) + n, obj = _unpack_from("b4s", self._buffer, self._buff_i) self._buff_i += 5 elif b == 0xd7: # fixext 8 typ = TYPE_EXT if self._max_ext_len < 8: raise UnpackValueError("%s exceeds max_ext_len(%s)" % (8, self._max_ext_len)) self._reserve(9) - n, obj = struct.unpack_from("b8s", self._buffer_view, self._buff_i) + n, obj = _unpack_from("b8s", self._buffer, self._buff_i) self._buff_i += 9 elif b == 0xd8: # fixext 16 typ = TYPE_EXT if self._max_ext_len < 16: raise UnpackValueError("%s exceeds max_ext_len(%s)" % (16, self._max_ext_len)) self._reserve(17) - n, obj = struct.unpack_from("b16s", self._buffer_view, self._buff_i) + n, obj = _unpack_from("b16s", self._buffer, self._buff_i) self._buff_i += 17 elif b == 0xd9: typ = TYPE_RAW @@ -524,7 +520,7 @@ def _read_header(self, execute=EX_CONSTRUCT): elif b == 0xda: typ = TYPE_RAW self._reserve(2) - n, = struct.unpack_from(">H", self._buffer_view, self._buff_i) + n, = _unpack_from(">H", self._buffer, self._buff_i) self._buff_i += 2 if n > self._max_str_len: raise UnpackValueError("%s exceeds max_str_len(%s)", n, self._max_str_len) @@ -532,7 +528,7 @@ def _read_header(self, execute=EX_CONSTRUCT): elif b == 0xdb: typ = TYPE_RAW self._reserve(4) - n, = struct.unpack_from(">I", self._buffer_view, self._buff_i) + n, = _unpack_from(">I", self._buffer, self._buff_i) self._buff_i += 4 if n > self._max_str_len: raise UnpackValueError("%s exceeds max_str_len(%s)", n, self._max_str_len) @@ -540,27 +536,27 @@ def _read_header(self, execute=EX_CONSTRUCT): elif b == 0xdc: typ = TYPE_ARRAY self._reserve(2) - n, = struct.unpack_from(">H", self._buffer_view, self._buff_i) + n, = _unpack_from(">H", self._buffer, self._buff_i) self._buff_i += 2 if n > self._max_array_len: raise UnpackValueError("%s exceeds max_array_len(%s)", n, self._max_array_len) elif b == 0xdd: typ = TYPE_ARRAY self._reserve(4) - n, = struct.unpack_from(">I", self._buffer_view, self._buff_i) + n, = _unpack_from(">I", self._buffer, self._buff_i) self._buff_i += 4 if n > self._max_array_len: raise UnpackValueError("%s exceeds max_array_len(%s)", n, self._max_array_len) elif b == 0xde: self._reserve(2) - n, = struct.unpack_from(">H", self._buffer_view, self._buff_i) + n, = _unpack_from(">H", self._buffer, self._buff_i) self._buff_i += 2 if n > self._max_map_len: raise UnpackValueError("%s exceeds max_map_len(%s)", n, self._max_map_len) typ = TYPE_MAP elif b == 0xdf: self._reserve(4) - n, = struct.unpack_from(">I", self._buffer_view, self._buff_i) + n, = _unpack_from(">I", self._buffer, self._buff_i) self._buff_i += 4 if n > self._max_map_len: raise UnpackValueError("%s exceeds max_map_len(%s)", n, self._max_map_len) From 205f7d39b26d7441f3cce86e93449e50bd71a6fa Mon Sep 17 00:00:00 2001 From: INADA Naoki Date: Wed, 3 Oct 2018 21:06:20 +0900 Subject: [PATCH 140/349] Start 0.6 development --- ChangeLog.rst | 6 ++++++ msgpack/_version.py | 2 +- 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/ChangeLog.rst b/ChangeLog.rst index b6158c33..18a99173 100644 --- a/ChangeLog.rst +++ b/ChangeLog.rst @@ -1,3 +1,9 @@ +0.6.0 +====== + +Release Date: TBD + + 0.5.6 ====== diff --git a/msgpack/_version.py b/msgpack/_version.py index d28f0deb..0952ec6a 100644 --- a/msgpack/_version.py +++ b/msgpack/_version.py @@ -1 +1 @@ -version = (0, 5, 6) +version = (0, 6, 0, 'dev') From b077a21f89881df8af56a05cc41d4bdebea19105 Mon Sep 17 00:00:00 2001 From: Marat Sharafutdinov Date: Sun, 4 Nov 2018 19:14:11 +0300 Subject: [PATCH 141/349] Fix stream unpacking example in README (#317) --- README.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.rst b/README.rst index 8925a65c..94a4bb21 100644 --- a/README.rst +++ b/README.rst @@ -142,7 +142,7 @@ stream (or from bytes provided through its ``feed`` method). buf = BytesIO() for i in range(100): - buf.write(msgpack.packb(range(i), use_bin_type=True)) + buf.write(msgpack.packb(i, use_bin_type=True)) buf.seek(0) From 91ec9e1daf5cc915a47e2b356a7b1dd9662573a3 Mon Sep 17 00:00:00 2001 From: Inada Naoki Date: Wed, 7 Nov 2018 23:02:35 +0900 Subject: [PATCH 142/349] Update travis.yml --- .travis.yml | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/.travis.yml b/.travis.yml index 823c8b14..fd6125d1 100644 --- a/.travis.yml +++ b/.travis.yml @@ -1,4 +1,4 @@ -sudo: false +dist: xenial language: python cache: pip @@ -7,7 +7,8 @@ python: - "3.4" - "3.5" - "3.6" - - "3.7-dev" + - "3.7" + - "nightly" matrix: include: @@ -24,7 +25,12 @@ matrix: - docker pull $DOCKER_IMAGE script: - docker run --rm -v `pwd`:/io -w /io $DOCKER_IMAGE /io/docker/runtests.sh - - python: "pypy" + - python: "pypy2.7-5.10.0" + install: + - pip install -e . + script: + - py.test -v test + - python: "pypy3.5" install: - pip install -e . script: From f6f95972492bcb83d8fe4c63be3b96d46e47bab7 Mon Sep 17 00:00:00 2001 From: INADA Naoki Date: Wed, 3 Oct 2018 20:55:51 +0900 Subject: [PATCH 143/349] Merge extension module There were `_packer.so` and `_unpacker.so`. But single module is simpler than double module. Merge extension module into single `_msgpack.so`. --- Makefile | 5 ++--- msgpack/__init__.py | 3 +-- msgpack/_msgpack.pyx | 4 ++++ setup.py | 13 +++---------- 4 files changed, 10 insertions(+), 15 deletions(-) create mode 100644 msgpack/_msgpack.pyx diff --git a/Makefile b/Makefile index 124f2437..ff9a482b 100644 --- a/Makefile +++ b/Makefile @@ -4,7 +4,7 @@ all: cython .PHONY: cython cython: - cython --cplus msgpack/*.pyx + cython --cplus msgpack/_msgpack.pyx .PHONY: test test: @@ -18,8 +18,7 @@ serve-doc: all .PHONY: clean clean: rm -rf build - rm -f msgpack/_packer.cpp - rm -f msgpack/_unpacker.cpp + rm -f msgpack/_msgpack.cpp rm -rf msgpack/__pycache__ rm -rf test/__pycache__ diff --git a/msgpack/__init__.py b/msgpack/__init__.py index 3955a414..7c5d4c0c 100644 --- a/msgpack/__init__.py +++ b/msgpack/__init__.py @@ -22,8 +22,7 @@ def __new__(cls, code, data): from msgpack.fallback import Packer, unpackb, Unpacker else: try: - from msgpack._packer import Packer - from msgpack._unpacker import unpackb, Unpacker + from msgpack._msgpack import Packer, unpackb, Unpacker except ImportError: from msgpack.fallback import Packer, unpackb, Unpacker diff --git a/msgpack/_msgpack.pyx b/msgpack/_msgpack.pyx new file mode 100644 index 00000000..4381394f --- /dev/null +++ b/msgpack/_msgpack.pyx @@ -0,0 +1,4 @@ +# coding: utf-8 +#cython: embedsignature=True, c_string_encoding=ascii +include "_packer.pyx" +include "_unpacker.pyx" diff --git a/setup.py b/setup.py index c252d816..8b8f7bdf 100755 --- a/setup.py +++ b/setup.py @@ -68,8 +68,7 @@ def build_extension(self, ext): if have_cython: class Sdist(sdist): def __init__(self, *args, **kwargs): - for src in glob('msgpack/*.pyx'): - cythonize(src) + cythonize('msgpack/_msgpack.pyx') sdist.__init__(self, *args, **kwargs) else: Sdist = sdist @@ -85,14 +84,8 @@ def __init__(self, *args, **kwargs): ext_modules = [] if not hasattr(sys, 'pypy_version_info'): - ext_modules.append(Extension('msgpack._packer', - sources=['msgpack/_packer.cpp'], - libraries=libraries, - include_dirs=['.'], - define_macros=macros, - )) - ext_modules.append(Extension('msgpack._unpacker', - sources=['msgpack/_unpacker.cpp'], + ext_modules.append(Extension('msgpack._msgpack', + sources=['msgpack/_msgpack.cpp'], libraries=libraries, include_dirs=['.'], define_macros=macros, From 6c8e539eec5bfbda752337c34fb701980f061859 Mon Sep 17 00:00:00 2001 From: INADA Naoki Date: Wed, 3 Oct 2018 21:09:45 +0900 Subject: [PATCH 144/349] Update travis config --- .travis.yml | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/.travis.yml b/.travis.yml index fd6125d1..822ca9ad 100644 --- a/.travis.yml +++ b/.travis.yml @@ -21,7 +21,7 @@ matrix: install: - pip install -U pip - pip install cython - - cython --cplus msgpack/_packer.pyx msgpack/_unpacker.pyx + - make cython - docker pull $DOCKER_IMAGE script: - docker run --rm -v `pwd`:/io -w /io $DOCKER_IMAGE /io/docker/runtests.sh @@ -34,19 +34,19 @@ matrix: install: - pip install -e . script: - - py.test -v test + - pytest -v test install: - pip install -U pip - pip install cython - - cython --cplus msgpack/_packer.pyx msgpack/_unpacker.pyx + - make cython - pip install -e . script: - python -c 'import sys; print(hex(sys.maxsize))' - - python -c 'from msgpack import _packer, _unpacker' - - py.test -v test - - MSGPACK_PUREPYTHON=x py.test -v test + - python -c 'from msgpack import _msgpack' + - pytest -v test + - MSGPACK_PUREPYTHON=x pytest -v test # vim: sw=2 ts=2 From 9d11249d89c67fd87acaeb16184414b5ef0b5aa4 Mon Sep 17 00:00:00 2001 From: INADA Naoki Date: Wed, 3 Oct 2018 21:44:06 +0900 Subject: [PATCH 145/349] Update docker/runtests --- docker/runtests.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docker/runtests.sh b/docker/runtests.sh index 113b6307..f6c2c68e 100755 --- a/docker/runtests.sh +++ b/docker/runtests.sh @@ -8,7 +8,7 @@ for V in cp36-cp36m cp35-cp35m cp27-cp27m cp27-cp27mu; do $PYBIN/pip install pytest pushd test # prevent importing msgpack package in current directory. $PYBIN/python -c 'import sys; print(hex(sys.maxsize))' - $PYBIN/python -c 'from msgpack import _packer, _unpacker' + $PYBIN/python -c 'from msgpack import _msgpack' # Ensure extension is available $PYBIN/pytest -v . popd done From ae90b26c3015e090a80a880b73895daa35f048fa Mon Sep 17 00:00:00 2001 From: Inada Naoki Date: Thu, 8 Nov 2018 22:21:05 +0900 Subject: [PATCH 146/349] Update ChangeLog --- ChangeLog.rst | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/ChangeLog.rst b/ChangeLog.rst index 18a99173..b135745c 100644 --- a/ChangeLog.rst +++ b/ChangeLog.rst @@ -3,6 +3,10 @@ Release Date: TBD +Extension modules are merged. There is ``msgpack._msgpack`` instead of +``msgpack._packer`` and ``msgpack._unpacker``. (#314) + + 0.5.6 ====== From 3b80233592674d18c8db7a62fa56504a5a285296 Mon Sep 17 00:00:00 2001 From: INADA Naoki Date: Thu, 8 Nov 2018 22:21:44 +0900 Subject: [PATCH 147/349] unpacker: Make default size limit smaller (#319) To avoid DoS attack, make default size limit smaller. Fixes #295 --- msgpack/_unpacker.pyx | 31 +++++++++++++++++-------------- msgpack/fallback.py | 20 +++++++++++--------- 2 files changed, 28 insertions(+), 23 deletions(-) diff --git a/msgpack/_unpacker.pyx b/msgpack/_unpacker.pyx index d7fa5bc6..cc9e7f0c 100644 --- a/msgpack/_unpacker.pyx +++ b/msgpack/_unpacker.pyx @@ -162,11 +162,11 @@ def unpackb(object packed, object object_hook=None, object list_hook=None, bint use_list=True, bint raw=True, encoding=None, unicode_errors=None, object_pairs_hook=None, ext_hook=ExtType, - Py_ssize_t max_str_len=2147483647, # 2**32-1 - Py_ssize_t max_bin_len=2147483647, - Py_ssize_t max_array_len=2147483647, - Py_ssize_t max_map_len=2147483647, - Py_ssize_t max_ext_len=2147483647): + Py_ssize_t max_str_len=1024*1024, + Py_ssize_t max_bin_len=1024*1024, + Py_ssize_t max_array_len=128*1024, + Py_ssize_t max_map_len=32*1024, + Py_ssize_t max_ext_len=1024*1024): """ Unpack packed_bytes to object. Returns an unpacked object. @@ -261,16 +261,19 @@ cdef class Unpacker(object): You should set this parameter when unpacking data from untrusted source. :param int max_str_len: - Limits max length of str. (default: 2**31-1) + Limits max length of str. (default: 1024*1024) :param int max_bin_len: - Limits max length of bin. (default: 2**31-1) + Limits max length of bin. (default: 1024*1024) :param int max_array_len: - Limits max length of array. (default: 2**31-1) + Limits max length of array. (default: 128*1024) :param int max_map_len: - Limits max length of map. (default: 2**31-1) + Limits max length of map. (default: 32*1024) + + :param int max_ext_len: + Limits max length of map. (default: 1024*1024) :param str encoding: Deprecated, use raw instead. @@ -322,11 +325,11 @@ cdef class Unpacker(object): object object_hook=None, object object_pairs_hook=None, object list_hook=None, encoding=None, unicode_errors=None, int max_buffer_size=0, object ext_hook=ExtType, - Py_ssize_t max_str_len=2147483647, # 2**32-1 - Py_ssize_t max_bin_len=2147483647, - Py_ssize_t max_array_len=2147483647, - Py_ssize_t max_map_len=2147483647, - Py_ssize_t max_ext_len=2147483647): + Py_ssize_t max_str_len=1024*1024, + Py_ssize_t max_bin_len=1024*1024, + Py_ssize_t max_array_len=128*1024, + Py_ssize_t max_map_len=32*1024, + Py_ssize_t max_ext_len=1024*1024): cdef const char *cenc=NULL, cdef const char *cerr=NULL diff --git a/msgpack/fallback.py b/msgpack/fallback.py index 0b60ba2a..895864ed 100644 --- a/msgpack/fallback.py +++ b/msgpack/fallback.py @@ -184,17 +184,19 @@ class Unpacker(object): You should set this parameter when unpacking data from untrusted source. :param int max_str_len: - Limits max length of str. (default: 2**31-1) + Limits max length of str. (default: 1024*1024) :param int max_bin_len: - Limits max length of bin. (default: 2**31-1) + Limits max length of bin. (default: 1024*1024) :param int max_array_len: - Limits max length of array. (default: 2**31-1) + Limits max length of array. (default: 128*1024) :param int max_map_len: - Limits max length of map. (default: 2**31-1) + Limits max length of map. (default: 32*1024) + :param int max_ext_len: + Limits max length of map. (default: 1024*1024) example of streaming deserialize from file-like object:: @@ -218,11 +220,11 @@ def __init__(self, file_like=None, read_size=0, use_list=True, raw=True, object_hook=None, object_pairs_hook=None, list_hook=None, encoding=None, unicode_errors=None, max_buffer_size=0, ext_hook=ExtType, - max_str_len=2147483647, # 2**32-1 - max_bin_len=2147483647, - max_array_len=2147483647, - max_map_len=2147483647, - max_ext_len=2147483647): + max_str_len=1024*1024, + max_bin_len=1024*1024, + max_array_len=128*1024, + max_map_len=32*1024, + max_ext_len=1024*1024): if encoding is not None: warnings.warn( From a8b3e97fe588a2411a8e869b52be1946ed9f0f86 Mon Sep 17 00:00:00 2001 From: Inada Naoki Date: Thu, 8 Nov 2018 22:25:05 +0900 Subject: [PATCH 148/349] Update changelog --- ChangeLog.rst | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/ChangeLog.rst b/ChangeLog.rst index b135745c..d71e0f5c 100644 --- a/ChangeLog.rst +++ b/ChangeLog.rst @@ -3,9 +3,21 @@ Release Date: TBD + +Important changes +------------------ + Extension modules are merged. There is ``msgpack._msgpack`` instead of ``msgpack._packer`` and ``msgpack._unpacker``. (#314) +unpacker: Default size limits is smaller than before to avoid DoS attack. +If you need to handle large data, you need to specify limits manually. + + + +Other changes +-------------- + 0.5.6 From 9e210bfc1a922031db67bf42e508b1b4550814c6 Mon Sep 17 00:00:00 2001 From: INADA Naoki Date: Fri, 9 Nov 2018 20:55:13 +0900 Subject: [PATCH 149/349] Add Packer.buffer() (#320) --- ChangeLog.rst | 2 ++ docs/Makefile | 2 +- docs/advanced.rst | 32 ++++++++++++++++++++++++++++++++ docs/index.rst | 1 + msgpack/_packer.pyx | 14 ++++++++++++-- msgpack/buff_converter.h | 28 ++++++++++++++++++++++++++++ msgpack/fallback.py | 36 ++++++++++++++++++++---------------- test/test_pack.py | 12 +++++++++++- 8 files changed, 107 insertions(+), 20 deletions(-) create mode 100644 docs/advanced.rst create mode 100644 msgpack/buff_converter.h diff --git a/ChangeLog.rst b/ChangeLog.rst index d71e0f5c..cc6b5e4f 100644 --- a/ChangeLog.rst +++ b/ChangeLog.rst @@ -18,6 +18,8 @@ If you need to handle large data, you need to specify limits manually. Other changes -------------- +Add ``Unpacker.getbuffer()`` method. + 0.5.6 diff --git a/docs/Makefile b/docs/Makefile index b09d8844..831a6a7f 100644 --- a/docs/Makefile +++ b/docs/Makefile @@ -153,7 +153,7 @@ doctest: "results in $(BUILDDIR)/doctest/output.txt." serve: html - cd _build/html && python3 -m http.server + python3 -m http.server -d _build/html zip: html cd _build/html && zip -r ../../../msgpack-doc.zip . diff --git a/docs/advanced.rst b/docs/advanced.rst new file mode 100644 index 00000000..38370088 --- /dev/null +++ b/docs/advanced.rst @@ -0,0 +1,32 @@ +Advanced usage +=============== + +Packer +------ + +autoreset +~~~~~~~~~ + +When you used ``autoreset=False`` option of :class:`~msgpack.Packer`, +``pack()`` method doesn't return packed ``bytes``. + +You can use :meth:`~msgpack.Packer.bytes` or :meth:`~msgpack.Packer.getbuffer` to +get packed data. + +``bytes()`` returns ``bytes`` object. ``getbuffer()`` returns some bytes-like +object. It's concrete type is implement detail and it will be changed in future +versions. + +You can reduce temporary bytes object by using ``Unpacker.getbuffer()``. + +.. code-block:: python + + packer = Packer(use_bin_type=True, autoreset=False) + + packer.pack([1, 2]) + packer.pack([3, 4]) + + with open('data.bin', 'wb') as f: + f.write(packer.getbuffer()) + + packer.reset() # reset internal buffer diff --git a/docs/index.rst b/docs/index.rst index dcdab4f5..e9c2ce83 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -8,3 +8,4 @@ language data exchange. :maxdepth: 1 api + advanced diff --git a/msgpack/_packer.pyx b/msgpack/_packer.pyx index 225f24ae..fd05ae06 100644 --- a/msgpack/_packer.pyx +++ b/msgpack/_packer.pyx @@ -41,6 +41,9 @@ cdef extern from "pack.h": int msgpack_pack_ext(msgpack_packer* pk, char typecode, size_t l) int msgpack_pack_unicode(msgpack_packer* pk, object o, long long limit) +cdef extern from "buff_converter.h": + object buff_to_buff(char *, Py_ssize_t) + cdef int DEFAULT_RECURSE_LIMIT=511 cdef long long ITEM_LIMIT = (2**32)-1 @@ -349,9 +352,16 @@ cdef class Packer(object): return buf def reset(self): - """Clear internal buffer.""" + """Reset internal buffer. + + This method is usaful only when autoreset=False. + """ self.pk.length = 0 def bytes(self): - """Return buffer content.""" + """Return internal buffer contents as bytes object""" return PyBytes_FromStringAndSize(self.pk.buf, self.pk.length) + + def getbuffer(self): + """Return view of internal buffer.""" + return buff_to_buff(self.pk.buf, self.pk.length) diff --git a/msgpack/buff_converter.h b/msgpack/buff_converter.h new file mode 100644 index 00000000..bc7227ae --- /dev/null +++ b/msgpack/buff_converter.h @@ -0,0 +1,28 @@ +#include "Python.h" + +/* cython does not support this preprocessor check => write it in raw C */ +#if PY_MAJOR_VERSION == 2 +static PyObject * +buff_to_buff(char *buff, Py_ssize_t size) +{ + return PyBuffer_FromMemory(buff, size); +} + +#elif (PY_MAJOR_VERSION == 3) && (PY_MINOR_VERSION >= 3) +static PyObject * +buff_to_buff(char *buff, Py_ssize_t size) +{ + return PyMemoryView_FromMemory(buff, size, PyBUF_READ); +} +#else +static PyObject * +buff_to_buff(char *buff, Py_ssize_t size) +{ + Py_buffer pybuf; + if (PyBuffer_FillInfo(&pybuf, NULL, buff, size, 1, PyBUF_FULL_RO) == -1) { + return NULL; + } + + return PyMemoryView_FromBuffer(&pybuf); +} +#endif diff --git a/msgpack/fallback.py b/msgpack/fallback.py index 895864ed..5b4d6cef 100644 --- a/msgpack/fallback.py +++ b/msgpack/fallback.py @@ -860,43 +860,35 @@ def pack(self, obj): except: self._buffer = StringIO() # force reset raise - ret = self._buffer.getvalue() if self._autoreset: + ret = self._buffer.getvalue() self._buffer = StringIO() - elif USING_STRINGBUILDER: - self._buffer = StringIO(ret) - return ret + return ret def pack_map_pairs(self, pairs): self._pack_map_pairs(len(pairs), pairs) - ret = self._buffer.getvalue() if self._autoreset: + ret = self._buffer.getvalue() self._buffer = StringIO() - elif USING_STRINGBUILDER: - self._buffer = StringIO(ret) - return ret + return ret def pack_array_header(self, n): if n >= 2**32: raise PackValueError self._pack_array_header(n) - ret = self._buffer.getvalue() if self._autoreset: + ret = self._buffer.getvalue() self._buffer = StringIO() - elif USING_STRINGBUILDER: - self._buffer = StringIO(ret) - return ret + return ret def pack_map_header(self, n): if n >= 2**32: raise PackValueError self._pack_map_header(n) - ret = self._buffer.getvalue() if self._autoreset: + ret = self._buffer.getvalue() self._buffer = StringIO() - elif USING_STRINGBUILDER: - self._buffer = StringIO(ret) - return ret + return ret def pack_ext_type(self, typecode, data): if not isinstance(typecode, int): @@ -976,7 +968,19 @@ def _pack_bin_header(self, n): raise PackValueError('Bin is too large') def bytes(self): + """Return internal buffer contents as bytes object""" return self._buffer.getvalue() def reset(self): + """Reset internal buffer. + + This method is usaful only when autoreset=False. + """ self._buffer = StringIO() + + def getbuffer(self): + """Return view of internal buffer.""" + if USING_STRINGBUILDER or not PY3: + return memoryview(self.bytes()) + else: + return self._buffer.getbuffer() diff --git a/test/test_pack.py b/test/test_pack.py index b447f9c3..46080832 100644 --- a/test/test_pack.py +++ b/test/test_pack.py @@ -5,7 +5,7 @@ import struct from pytest import raises, xfail -from msgpack import packb, unpackb, Unpacker, Packer +from msgpack import packb, unpackb, Unpacker, Packer, pack from collections import OrderedDict from io import BytesIO @@ -148,3 +148,13 @@ def test_pairlist(): packed = packer.pack_map_pairs(pairlist) unpacked = unpackb(packed, object_pairs_hook=list) assert pairlist == unpacked + +def test_get_buffer(): + packer = Packer(autoreset=0, use_bin_type=True) + packer.pack([1, 2]) + strm = BytesIO() + strm.write(packer.getbuffer()) + written = strm.getvalue() + + expected = packb([1, 2], use_bin_type=True) + assert written == expected From 1bf62ba6f8f94ab8a7dd135e0039ee3b10e0e96c Mon Sep 17 00:00:00 2001 From: INADA Naoki Date: Fri, 9 Nov 2018 21:39:25 +0900 Subject: [PATCH 150/349] PendingDeprecationWarning -> DeprecationWarning (#321) --- msgpack/_packer.pyx | 2 +- msgpack/_unpacker.pyx | 6 +++--- msgpack/fallback.py | 6 +++--- 3 files changed, 7 insertions(+), 7 deletions(-) diff --git a/msgpack/_packer.pyx b/msgpack/_packer.pyx index fd05ae06..6a6d9176 100644 --- a/msgpack/_packer.pyx +++ b/msgpack/_packer.pyx @@ -121,7 +121,7 @@ cdef class Packer(object): bint use_single_float=False, bint autoreset=True, bint use_bin_type=False, bint strict_types=False): if encoding is not None: - PyErr_WarnEx(PendingDeprecationWarning, "encoding is deprecated.", 1) + PyErr_WarnEx(DeprecationWarning, "encoding is deprecated.", 1) self.use_float = use_single_float self.strict_types = strict_types self.autoreset = autoreset diff --git a/msgpack/_unpacker.pyx b/msgpack/_unpacker.pyx index cc9e7f0c..85c404ae 100644 --- a/msgpack/_unpacker.pyx +++ b/msgpack/_unpacker.pyx @@ -186,7 +186,7 @@ def unpackb(object packed, object object_hook=None, object list_hook=None, cdef int new_protocol = 0 if encoding is not None: - PyErr_WarnEx(PendingDeprecationWarning, "encoding is deprecated, Use raw=False instead.", 1) + PyErr_WarnEx(DeprecationWarning, "encoding is deprecated, Use raw=False instead.", 1) cenc = encoding if unicode_errors is not None: @@ -213,7 +213,7 @@ def unpackb(object packed, object object_hook=None, object list_hook=None, def unpack(object stream, **kwargs): PyErr_WarnEx( - PendingDeprecationWarning, + DeprecationWarning, "Direct calling implementation's unpack() is deprecated, Use msgpack.unpack() or unpackb() instead.", 1) data = stream.read() return unpackb(data, **kwargs) @@ -360,7 +360,7 @@ cdef class Unpacker(object): self.stream_offset = 0 if encoding is not None: - PyErr_WarnEx(PendingDeprecationWarning, "encoding is deprecated, Use raw=False instead.", 1) + PyErr_WarnEx(DeprecationWarning, "encoding is deprecated, Use raw=False instead.", 1) self.encoding = encoding cenc = encoding diff --git a/msgpack/fallback.py b/msgpack/fallback.py index 5b4d6cef..197b6d27 100644 --- a/msgpack/fallback.py +++ b/msgpack/fallback.py @@ -103,7 +103,7 @@ def _get_data_from_buffer(obj): def unpack(stream, **kwargs): warnings.warn( "Direct calling implementation's unpack() is deprecated, Use msgpack.unpack() or unpackb() instead.", - PendingDeprecationWarning) + DeprecationWarning) data = stream.read() return unpackb(data, **kwargs) @@ -229,7 +229,7 @@ def __init__(self, file_like=None, read_size=0, use_list=True, raw=True, if encoding is not None: warnings.warn( "encoding is deprecated, Use raw=False instead.", - PendingDeprecationWarning) + DeprecationWarning) if unicode_errors is None: unicode_errors = 'strict' @@ -727,7 +727,7 @@ def __init__(self, default=None, encoding=None, unicode_errors=None, else: warnings.warn( "encoding is deprecated, Use raw=False instead.", - PendingDeprecationWarning) + DeprecationWarning) if unicode_errors is None: unicode_errors = 'strict' From 07f0beeabb71828377f481ff83746997b3babf23 Mon Sep 17 00:00:00 2001 From: INADA Naoki Date: Mon, 12 Nov 2018 02:19:01 +0900 Subject: [PATCH 151/349] Remove deprecated exception classes (#323) --- msgpack/_packer.pyx | 27 ++++++++------- msgpack/_unpacker.pyx | 42 +++++++++++------------ msgpack/exceptions.py | 35 +++++++++----------- msgpack/fallback.py | 77 +++++++++++++++++++++---------------------- 4 files changed, 85 insertions(+), 96 deletions(-) diff --git a/msgpack/_packer.pyx b/msgpack/_packer.pyx index 6a6d9176..2643f858 100644 --- a/msgpack/_packer.pyx +++ b/msgpack/_packer.pyx @@ -5,7 +5,6 @@ from cpython cimport * from cpython.version cimport PY_MAJOR_VERSION from cpython.exc cimport PyErr_WarnEx -from msgpack.exceptions import PackValueError, PackOverflowError from msgpack import ExtType @@ -165,7 +164,7 @@ cdef class Packer(object): cdef Py_buffer view if nest_limit < 0: - raise PackValueError("recursion limit exceeded.") + raise ValueError("recursion limit exceeded.") while True: if o is None: @@ -191,7 +190,7 @@ cdef class Packer(object): default_used = True continue else: - raise PackOverflowError("Integer value out of range") + raise OverflowError("Integer value out of range") elif PyInt_CheckExact(o) if strict_types else PyInt_Check(o): longval = o ret = msgpack_pack_long(&self.pk, longval) @@ -205,7 +204,7 @@ cdef class Packer(object): elif PyBytesLike_CheckExact(o) if strict_types else PyBytesLike_Check(o): L = len(o) if L > ITEM_LIMIT: - raise PackValueError("%s is too large" % type(o).__name__) + raise ValueError("%s is too large" % type(o).__name__) rawval = o ret = msgpack_pack_bin(&self.pk, L) if ret == 0: @@ -214,12 +213,12 @@ cdef class Packer(object): if self.encoding == NULL and self.unicode_errors == NULL: ret = msgpack_pack_unicode(&self.pk, o, ITEM_LIMIT); if ret == -2: - raise PackValueError("unicode string is too large") + raise ValueError("unicode string is too large") else: o = PyUnicode_AsEncodedString(o, self.encoding, self.unicode_errors) L = len(o) if L > ITEM_LIMIT: - raise PackValueError("unicode string is too large") + raise ValueError("unicode string is too large") ret = msgpack_pack_raw(&self.pk, L) if ret == 0: rawval = o @@ -228,7 +227,7 @@ cdef class Packer(object): d = o L = len(d) if L > ITEM_LIMIT: - raise PackValueError("dict is too large") + raise ValueError("dict is too large") ret = msgpack_pack_map(&self.pk, L) if ret == 0: for k, v in d.iteritems(): @@ -239,7 +238,7 @@ cdef class Packer(object): elif not strict_types and PyDict_Check(o): L = len(o) if L > ITEM_LIMIT: - raise PackValueError("dict is too large") + raise ValueError("dict is too large") ret = msgpack_pack_map(&self.pk, L) if ret == 0: for k, v in o.items(): @@ -253,13 +252,13 @@ cdef class Packer(object): rawval = o.data L = len(o.data) if L > ITEM_LIMIT: - raise PackValueError("EXT data is too large") + raise ValueError("EXT data is too large") ret = msgpack_pack_ext(&self.pk, longval, L) ret = msgpack_pack_raw_body(&self.pk, rawval, L) elif PyList_CheckExact(o) if strict_types else (PyTuple_Check(o) or PyList_Check(o)): L = len(o) if L > ITEM_LIMIT: - raise PackValueError("list is too large") + raise ValueError("list is too large") ret = msgpack_pack_array(&self.pk, L) if ret == 0: for v in o: @@ -267,11 +266,11 @@ cdef class Packer(object): if ret != 0: break elif PyMemoryView_Check(o): if PyObject_GetBuffer(o, &view, PyBUF_SIMPLE) != 0: - raise PackValueError("could not get buffer for memoryview") + raise ValueError("could not get buffer for memoryview") L = view.len if L > ITEM_LIMIT: PyBuffer_Release(&view); - raise PackValueError("memoryview is too large") + raise ValueError("memoryview is too large") ret = msgpack_pack_bin(&self.pk, L) if ret == 0: ret = msgpack_pack_raw_body(&self.pk, view.buf, L) @@ -304,7 +303,7 @@ cdef class Packer(object): def pack_array_header(self, long long size): if size > ITEM_LIMIT: - raise PackValueError + raise ValueError cdef int ret = msgpack_pack_array(&self.pk, size) if ret == -1: raise MemoryError @@ -317,7 +316,7 @@ cdef class Packer(object): def pack_map_header(self, long long size): if size > ITEM_LIMIT: - raise PackValueError + raise ValueError cdef int ret = msgpack_pack_map(&self.pk, size) if ret == -1: raise MemoryError diff --git a/msgpack/_unpacker.pyx b/msgpack/_unpacker.pyx index 85c404ae..2f99019b 100644 --- a/msgpack/_unpacker.pyx +++ b/msgpack/_unpacker.pyx @@ -35,7 +35,6 @@ ctypedef unsigned long long uint64_t from msgpack.exceptions import ( BufferFull, OutOfData, - UnpackValueError, ExtraData, ) from msgpack import ExtType @@ -208,7 +207,7 @@ def unpackb(object packed, object object_hook=None, object list_hook=None, raise ExtraData(obj, PyBytes_FromStringAndSize(buf+off, buf_len-off)) return obj unpack_clear(&ctx) - raise UnpackValueError("Unpack failed: error = %d" % (ret,)) + raise ValueError("Unpack failed: error = %d" % (ret,)) def unpack(object stream, **kwargs): @@ -460,28 +459,25 @@ cdef class Unpacker(object): else: raise OutOfData("No more data to unpack.") - try: - ret = execute(&self.ctx, self.buf, self.buf_tail, &self.buf_head) - self.stream_offset += self.buf_head - prev_head - if write_bytes is not None: - write_bytes(PyBytes_FromStringAndSize(self.buf + prev_head, self.buf_head - prev_head)) - - if ret == 1: - obj = unpack_data(&self.ctx) - unpack_init(&self.ctx) - return obj - elif ret == 0: - if self.file_like is not None: - self.read_from_file() - continue - if iter: - raise StopIteration("No more data to unpack.") - else: - raise OutOfData("No more data to unpack.") + ret = execute(&self.ctx, self.buf, self.buf_tail, &self.buf_head) + self.stream_offset += self.buf_head - prev_head + if write_bytes is not None: + write_bytes(PyBytes_FromStringAndSize(self.buf + prev_head, self.buf_head - prev_head)) + + if ret == 1: + obj = unpack_data(&self.ctx) + unpack_init(&self.ctx) + return obj + elif ret == 0: + if self.file_like is not None: + self.read_from_file() + continue + if iter: + raise StopIteration("No more data to unpack.") else: - raise UnpackValueError("Unpack failed: error = %d" % (ret,)) - except ValueError as e: - raise UnpackValueError(e) + raise OutOfData("No more data to unpack.") + else: + raise ValueError("Unpack failed: error = %d" % (ret,)) def read_bytes(self, Py_ssize_t nbytes): """Read a specified number of raw bytes from the stream""" diff --git a/msgpack/exceptions.py b/msgpack/exceptions.py index 97668814..5bee5b2f 100644 --- a/msgpack/exceptions.py +++ b/msgpack/exceptions.py @@ -1,6 +1,10 @@ class UnpackException(Exception): - """Deprecated. Use Exception instead to catch all exception during unpacking.""" + """Base class for some exceptions raised while unpacking. + NOTE: unpack may raise exception other than subclass of + UnpackException. If you want to catch all error, catch + Exception instead. + """ class BufferFull(UnpackException): pass @@ -10,11 +14,16 @@ class OutOfData(UnpackException): pass -class UnpackValueError(UnpackException, ValueError): - """Deprecated. Use ValueError instead.""" +# Deprecated. Use ValueError instead +UnpackValueError = ValueError class ExtraData(UnpackValueError): + """ExtraData is raised when there is trailing data. + + This exception is raised while only one-shot (not streaming) + unpack. + """ def __init__(self, unpacked, extra): self.unpacked = unpacked self.extra = extra @@ -23,19 +32,7 @@ def __str__(self): return "unpack(b) received extra data." -class PackException(Exception): - """Deprecated. Use Exception instead to catch all exception during packing.""" - - -class PackValueError(PackException, ValueError): - """PackValueError is raised when type of input data is supported but it's value is unsupported. - - Deprecated. Use ValueError instead. - """ - - -class PackOverflowError(PackValueError, OverflowError): - """PackOverflowError is raised when integer value is out of range of msgpack support [-2**31, 2**32). - - Deprecated. Use ValueError instead. - """ +#Deprecated. Use Exception instead to catch all exception during packing. +PackException = Exception +PackValueError = ValueError +PackOverflowError = OverflowError diff --git a/msgpack/fallback.py b/msgpack/fallback.py index 197b6d27..9d461718 100644 --- a/msgpack/fallback.py +++ b/msgpack/fallback.py @@ -52,9 +52,6 @@ def getvalue(self): from msgpack.exceptions import ( BufferFull, OutOfData, - UnpackValueError, - PackValueError, - PackOverflowError, ExtraData) from msgpack import ExtType @@ -120,7 +117,7 @@ def unpackb(packed, **kwargs): try: ret = unpacker._unpack() except OutOfData: - raise UnpackValueError("Data is not enough.") + raise ValueError("Data is not enough.") if unpacker._got_extradata(): raise ExtraData(ret, unpacker._get_extradata()) return ret @@ -370,18 +367,18 @@ def _read_header(self, execute=EX_CONSTRUCT): n = b & 0b00011111 typ = TYPE_RAW if n > self._max_str_len: - raise UnpackValueError("%s exceeds max_str_len(%s)", n, self._max_str_len) + raise ValueError("%s exceeds max_str_len(%s)", n, self._max_str_len) obj = self._read(n) elif b & 0b11110000 == 0b10010000: n = b & 0b00001111 typ = TYPE_ARRAY if n > self._max_array_len: - raise UnpackValueError("%s exceeds max_array_len(%s)", n, self._max_array_len) + raise ValueError("%s exceeds max_array_len(%s)", n, self._max_array_len) elif b & 0b11110000 == 0b10000000: n = b & 0b00001111 typ = TYPE_MAP if n > self._max_map_len: - raise UnpackValueError("%s exceeds max_map_len(%s)", n, self._max_map_len) + raise ValueError("%s exceeds max_map_len(%s)", n, self._max_map_len) elif b == 0xc0: obj = None elif b == 0xc2: @@ -394,7 +391,7 @@ def _read_header(self, execute=EX_CONSTRUCT): n = self._buffer[self._buff_i] self._buff_i += 1 if n > self._max_bin_len: - raise UnpackValueError("%s exceeds max_bin_len(%s)" % (n, self._max_bin_len)) + raise ValueError("%s exceeds max_bin_len(%s)" % (n, self._max_bin_len)) obj = self._read(n) elif b == 0xc5: typ = TYPE_BIN @@ -402,7 +399,7 @@ def _read_header(self, execute=EX_CONSTRUCT): n = _unpack_from(">H", self._buffer, self._buff_i)[0] self._buff_i += 2 if n > self._max_bin_len: - raise UnpackValueError("%s exceeds max_bin_len(%s)" % (n, self._max_bin_len)) + raise ValueError("%s exceeds max_bin_len(%s)" % (n, self._max_bin_len)) obj = self._read(n) elif b == 0xc6: typ = TYPE_BIN @@ -410,7 +407,7 @@ def _read_header(self, execute=EX_CONSTRUCT): n = _unpack_from(">I", self._buffer, self._buff_i)[0] self._buff_i += 4 if n > self._max_bin_len: - raise UnpackValueError("%s exceeds max_bin_len(%s)" % (n, self._max_bin_len)) + raise ValueError("%s exceeds max_bin_len(%s)" % (n, self._max_bin_len)) obj = self._read(n) elif b == 0xc7: # ext 8 typ = TYPE_EXT @@ -418,7 +415,7 @@ def _read_header(self, execute=EX_CONSTRUCT): L, n = _unpack_from('Bb', self._buffer, self._buff_i) self._buff_i += 2 if L > self._max_ext_len: - raise UnpackValueError("%s exceeds max_ext_len(%s)" % (L, self._max_ext_len)) + raise ValueError("%s exceeds max_ext_len(%s)" % (L, self._max_ext_len)) obj = self._read(L) elif b == 0xc8: # ext 16 typ = TYPE_EXT @@ -426,7 +423,7 @@ def _read_header(self, execute=EX_CONSTRUCT): L, n = _unpack_from('>Hb', self._buffer, self._buff_i) self._buff_i += 3 if L > self._max_ext_len: - raise UnpackValueError("%s exceeds max_ext_len(%s)" % (L, self._max_ext_len)) + raise ValueError("%s exceeds max_ext_len(%s)" % (L, self._max_ext_len)) obj = self._read(L) elif b == 0xc9: # ext 32 typ = TYPE_EXT @@ -434,7 +431,7 @@ def _read_header(self, execute=EX_CONSTRUCT): L, n = _unpack_from('>Ib', self._buffer, self._buff_i) self._buff_i += 5 if L > self._max_ext_len: - raise UnpackValueError("%s exceeds max_ext_len(%s)" % (L, self._max_ext_len)) + raise ValueError("%s exceeds max_ext_len(%s)" % (L, self._max_ext_len)) obj = self._read(L) elif b == 0xca: self._reserve(4) @@ -479,35 +476,35 @@ def _read_header(self, execute=EX_CONSTRUCT): elif b == 0xd4: # fixext 1 typ = TYPE_EXT if self._max_ext_len < 1: - raise UnpackValueError("%s exceeds max_ext_len(%s)" % (1, self._max_ext_len)) + raise ValueError("%s exceeds max_ext_len(%s)" % (1, self._max_ext_len)) self._reserve(2) n, obj = _unpack_from("b1s", self._buffer, self._buff_i) self._buff_i += 2 elif b == 0xd5: # fixext 2 typ = TYPE_EXT if self._max_ext_len < 2: - raise UnpackValueError("%s exceeds max_ext_len(%s)" % (2, self._max_ext_len)) + raise ValueError("%s exceeds max_ext_len(%s)" % (2, self._max_ext_len)) self._reserve(3) n, obj = _unpack_from("b2s", self._buffer, self._buff_i) self._buff_i += 3 elif b == 0xd6: # fixext 4 typ = TYPE_EXT if self._max_ext_len < 4: - raise UnpackValueError("%s exceeds max_ext_len(%s)" % (4, self._max_ext_len)) + raise ValueError("%s exceeds max_ext_len(%s)" % (4, self._max_ext_len)) self._reserve(5) n, obj = _unpack_from("b4s", self._buffer, self._buff_i) self._buff_i += 5 elif b == 0xd7: # fixext 8 typ = TYPE_EXT if self._max_ext_len < 8: - raise UnpackValueError("%s exceeds max_ext_len(%s)" % (8, self._max_ext_len)) + raise ValueError("%s exceeds max_ext_len(%s)" % (8, self._max_ext_len)) self._reserve(9) n, obj = _unpack_from("b8s", self._buffer, self._buff_i) self._buff_i += 9 elif b == 0xd8: # fixext 16 typ = TYPE_EXT if self._max_ext_len < 16: - raise UnpackValueError("%s exceeds max_ext_len(%s)" % (16, self._max_ext_len)) + raise ValueError("%s exceeds max_ext_len(%s)" % (16, self._max_ext_len)) self._reserve(17) n, obj = _unpack_from("b16s", self._buffer, self._buff_i) self._buff_i += 17 @@ -517,7 +514,7 @@ def _read_header(self, execute=EX_CONSTRUCT): n = self._buffer[self._buff_i] self._buff_i += 1 if n > self._max_str_len: - raise UnpackValueError("%s exceeds max_str_len(%s)", n, self._max_str_len) + raise ValueError("%s exceeds max_str_len(%s)", n, self._max_str_len) obj = self._read(n) elif b == 0xda: typ = TYPE_RAW @@ -525,7 +522,7 @@ def _read_header(self, execute=EX_CONSTRUCT): n, = _unpack_from(">H", self._buffer, self._buff_i) self._buff_i += 2 if n > self._max_str_len: - raise UnpackValueError("%s exceeds max_str_len(%s)", n, self._max_str_len) + raise ValueError("%s exceeds max_str_len(%s)", n, self._max_str_len) obj = self._read(n) elif b == 0xdb: typ = TYPE_RAW @@ -533,7 +530,7 @@ def _read_header(self, execute=EX_CONSTRUCT): n, = _unpack_from(">I", self._buffer, self._buff_i) self._buff_i += 4 if n > self._max_str_len: - raise UnpackValueError("%s exceeds max_str_len(%s)", n, self._max_str_len) + raise ValueError("%s exceeds max_str_len(%s)", n, self._max_str_len) obj = self._read(n) elif b == 0xdc: typ = TYPE_ARRAY @@ -541,30 +538,30 @@ def _read_header(self, execute=EX_CONSTRUCT): n, = _unpack_from(">H", self._buffer, self._buff_i) self._buff_i += 2 if n > self._max_array_len: - raise UnpackValueError("%s exceeds max_array_len(%s)", n, self._max_array_len) + raise ValueError("%s exceeds max_array_len(%s)", n, self._max_array_len) elif b == 0xdd: typ = TYPE_ARRAY self._reserve(4) n, = _unpack_from(">I", self._buffer, self._buff_i) self._buff_i += 4 if n > self._max_array_len: - raise UnpackValueError("%s exceeds max_array_len(%s)", n, self._max_array_len) + raise ValueError("%s exceeds max_array_len(%s)", n, self._max_array_len) elif b == 0xde: self._reserve(2) n, = _unpack_from(">H", self._buffer, self._buff_i) self._buff_i += 2 if n > self._max_map_len: - raise UnpackValueError("%s exceeds max_map_len(%s)", n, self._max_map_len) + raise ValueError("%s exceeds max_map_len(%s)", n, self._max_map_len) typ = TYPE_MAP elif b == 0xdf: self._reserve(4) n, = _unpack_from(">I", self._buffer, self._buff_i) self._buff_i += 4 if n > self._max_map_len: - raise UnpackValueError("%s exceeds max_map_len(%s)", n, self._max_map_len) + raise ValueError("%s exceeds max_map_len(%s)", n, self._max_map_len) typ = TYPE_MAP else: - raise UnpackValueError("Unknown header: 0x%x" % b) + raise ValueError("Unknown header: 0x%x" % b) return typ, n, obj def _unpack(self, execute=EX_CONSTRUCT): @@ -572,11 +569,11 @@ def _unpack(self, execute=EX_CONSTRUCT): if execute == EX_READ_ARRAY_HEADER: if typ != TYPE_ARRAY: - raise UnpackValueError("Expected array") + raise ValueError("Expected array") return n if execute == EX_READ_MAP_HEADER: if typ != TYPE_MAP: - raise UnpackValueError("Expected map") + raise ValueError("Expected map") return n # TODO should we eliminate the recursion? if typ == TYPE_ARRAY: @@ -754,7 +751,7 @@ def _pack(self, obj, nest_limit=DEFAULT_RECURSE_LIMIT, list_types = (list, tuple) while True: if nest_limit < 0: - raise PackValueError("recursion limit exceeded") + raise ValueError("recursion limit exceeded") if obj is None: return self._buffer.write(b"\xc0") if check(obj, bool): @@ -786,11 +783,11 @@ def _pack(self, obj, nest_limit=DEFAULT_RECURSE_LIMIT, obj = self._default(obj) default_used = True continue - raise PackOverflowError("Integer value out of range") + raise OverflowError("Integer value out of range") if check(obj, (bytes, bytearray)): n = len(obj) if n >= 2**32: - raise PackValueError("%s is too large" % type(obj).__name__) + raise ValueError("%s is too large" % type(obj).__name__) self._pack_bin_header(n) return self._buffer.write(obj) if check(obj, Unicode): @@ -801,13 +798,13 @@ def _pack(self, obj, nest_limit=DEFAULT_RECURSE_LIMIT, obj = obj.encode(self._encoding, self._unicode_errors) n = len(obj) if n >= 2**32: - raise PackValueError("String is too large") + raise ValueError("String is too large") self._pack_raw_header(n) return self._buffer.write(obj) if check(obj, memoryview): n = len(obj) * obj.itemsize if n >= 2**32: - raise PackValueError("Memoryview is too large") + raise ValueError("Memoryview is too large") self._pack_bin_header(n) return self._buffer.write(obj) if check(obj, float): @@ -874,7 +871,7 @@ def pack_map_pairs(self, pairs): def pack_array_header(self, n): if n >= 2**32: - raise PackValueError + raise ValueError self._pack_array_header(n) if self._autoreset: ret = self._buffer.getvalue() @@ -883,7 +880,7 @@ def pack_array_header(self, n): def pack_map_header(self, n): if n >= 2**32: - raise PackValueError + raise ValueError self._pack_map_header(n) if self._autoreset: ret = self._buffer.getvalue() @@ -899,7 +896,7 @@ def pack_ext_type(self, typecode, data): raise TypeError("data must have bytes type") L = len(data) if L > 0xffffffff: - raise PackValueError("Too large data") + raise ValueError("Too large data") if L == 1: self._buffer.write(b'\xd4') elif L == 2: @@ -926,7 +923,7 @@ def _pack_array_header(self, n): return self._buffer.write(struct.pack(">BH", 0xdc, n)) if n <= 0xffffffff: return self._buffer.write(struct.pack(">BI", 0xdd, n)) - raise PackValueError("Array is too large") + raise ValueError("Array is too large") def _pack_map_header(self, n): if n <= 0x0f: @@ -935,7 +932,7 @@ def _pack_map_header(self, n): return self._buffer.write(struct.pack(">BH", 0xde, n)) if n <= 0xffffffff: return self._buffer.write(struct.pack(">BI", 0xdf, n)) - raise PackValueError("Dict is too large") + raise ValueError("Dict is too large") def _pack_map_pairs(self, n, pairs, nest_limit=DEFAULT_RECURSE_LIMIT): self._pack_map_header(n) @@ -953,7 +950,7 @@ def _pack_raw_header(self, n): elif n <= 0xffffffff: self._buffer.write(struct.pack(">BI", 0xdb, n)) else: - raise PackValueError('Raw is too large') + raise ValueError('Raw is too large') def _pack_bin_header(self, n): if not self._use_bin_type: @@ -965,7 +962,7 @@ def _pack_bin_header(self, n): elif n <= 0xffffffff: return self._buffer.write(struct.pack(">BI", 0xc6, n)) else: - raise PackValueError('Bin is too large') + raise ValueError('Bin is too large') def bytes(self): """Return internal buffer contents as bytes object""" From 39f8aa78c7bc6b6b18a1c814a0c296f55242f028 Mon Sep 17 00:00:00 2001 From: INADA Naoki Date: Mon, 12 Nov 2018 02:33:31 +0900 Subject: [PATCH 152/349] Remove deprecated write_bytes option (#322) --- msgpack/_unpacker.pyx | 35 ++++++++++++----------------------- msgpack/fallback.py | 20 ++++---------------- test/test_unpack_raw.py | 29 ----------------------------- 3 files changed, 16 insertions(+), 68 deletions(-) delete mode 100644 test/test_unpack_raw.py diff --git a/msgpack/_unpacker.pyx b/msgpack/_unpacker.pyx index 2f99019b..e168587b 100644 --- a/msgpack/_unpacker.pyx +++ b/msgpack/_unpacker.pyx @@ -135,10 +135,10 @@ cdef inline int get_data_from_buffer(object obj, if view.itemsize != 1: PyBuffer_Release(view) raise BufferError("cannot unpack from multi-byte object") - if PyBuffer_IsContiguous(view, 'A') == 0: + if PyBuffer_IsContiguous(view, b'A') == 0: PyBuffer_Release(view) # create a contiguous copy and get buffer - contiguous = PyMemoryView_GetContiguous(obj, PyBUF_READ, 'C') + contiguous = PyMemoryView_GetContiguous(obj, PyBUF_READ, b'C') PyObject_GetBuffer(contiguous, view, PyBUF_SIMPLE) # view must hold the only reference to contiguous, # so memory is freed when view is released @@ -440,14 +440,11 @@ cdef class Unpacker(object): else: self.file_like = None - cdef object _unpack(self, execute_fn execute, object write_bytes, bint iter=0): + cdef object _unpack(self, execute_fn execute, bint iter=0): cdef int ret cdef object obj cdef Py_ssize_t prev_head - if write_bytes is not None: - PyErr_WarnEx(DeprecationWarning, "`write_bytes` option is deprecated. Use `.tell()` instead.", 1) - if self.buf_head >= self.buf_tail and self.file_like is not None: self.read_from_file() @@ -461,8 +458,6 @@ cdef class Unpacker(object): ret = execute(&self.ctx, self.buf, self.buf_tail, &self.buf_head) self.stream_offset += self.buf_head - prev_head - if write_bytes is not None: - write_bytes(PyBytes_FromStringAndSize(self.buf + prev_head, self.buf_head - prev_head)) if ret == 1: obj = unpack_data(&self.ctx) @@ -489,41 +484,35 @@ cdef class Unpacker(object): ret += self.file_like.read(nbytes - len(ret)) return ret - def unpack(self, object write_bytes=None): + def unpack(self): """Unpack one object - If write_bytes is not None, it will be called with parts of the raw - message as it is unpacked. - Raises `OutOfData` when there are no more bytes to unpack. """ - return self._unpack(unpack_construct, write_bytes) + return self._unpack(unpack_construct) - def skip(self, object write_bytes=None): + def skip(self): """Read and ignore one object, returning None - If write_bytes is not None, it will be called with parts of the raw - message as it is unpacked. - Raises `OutOfData` when there are no more bytes to unpack. """ - return self._unpack(unpack_skip, write_bytes) + return self._unpack(unpack_skip) - def read_array_header(self, object write_bytes=None): + def read_array_header(self): """assuming the next object is an array, return its size n, such that the next n unpack() calls will iterate over its contents. Raises `OutOfData` when there are no more bytes to unpack. """ - return self._unpack(read_array_header, write_bytes) + return self._unpack(read_array_header) - def read_map_header(self, object write_bytes=None): + def read_map_header(self): """assuming the next object is a map, return its size n, such that the next n * 2 unpack() calls will iterate over its key-value pairs. Raises `OutOfData` when there are no more bytes to unpack. """ - return self._unpack(read_map_header, write_bytes) + return self._unpack(read_map_header) def tell(self): return self.stream_offset @@ -532,7 +521,7 @@ cdef class Unpacker(object): return self def __next__(self): - return self._unpack(unpack_construct, None, 1) + return self._unpack(unpack_construct, 1) # for debug. #def _buf(self): diff --git a/msgpack/fallback.py b/msgpack/fallback.py index 9d461718..b9ef2969 100644 --- a/msgpack/fallback.py +++ b/msgpack/fallback.py @@ -640,34 +640,22 @@ def __next__(self): next = __next__ - def skip(self, write_bytes=None): + def skip(self): self._unpack(EX_SKIP) - if write_bytes is not None: - warnings.warn("`write_bytes` option is deprecated. Use `.tell()` instead.", DeprecationWarning) - write_bytes(self._buffer[self._buf_checkpoint:self._buff_i]) self._consume() - def unpack(self, write_bytes=None): + def unpack(self): ret = self._unpack(EX_CONSTRUCT) - if write_bytes is not None: - warnings.warn("`write_bytes` option is deprecated. Use `.tell()` instead.", DeprecationWarning) - write_bytes(self._buffer[self._buf_checkpoint:self._buff_i]) self._consume() return ret - def read_array_header(self, write_bytes=None): + def read_array_header(self): ret = self._unpack(EX_READ_ARRAY_HEADER) - if write_bytes is not None: - warnings.warn("`write_bytes` option is deprecated. Use `.tell()` instead.", DeprecationWarning) - write_bytes(self._buffer[self._buf_checkpoint:self._buff_i]) self._consume() return ret - def read_map_header(self, write_bytes=None): + def read_map_header(self): ret = self._unpack(EX_READ_MAP_HEADER) - if write_bytes is not None: - warnings.warn("`write_bytes` option is deprecated. Use `.tell()` instead.", DeprecationWarning) - write_bytes(self._buffer[self._buf_checkpoint:self._buff_i]) self._consume() return ret diff --git a/test/test_unpack_raw.py b/test/test_unpack_raw.py deleted file mode 100644 index 70026012..00000000 --- a/test/test_unpack_raw.py +++ /dev/null @@ -1,29 +0,0 @@ -"""Tests for cases where the user seeks to obtain packed msgpack objects""" - -import io -from msgpack import Unpacker, packb - - -def test_write_bytes(): - unpacker = Unpacker() - unpacker.feed(b'abc') - f = io.BytesIO() - assert unpacker.unpack(f.write) == ord('a') - assert f.getvalue() == b'a' - f = io.BytesIO() - assert unpacker.skip(f.write) is None - assert f.getvalue() == b'b' - f = io.BytesIO() - assert unpacker.skip() is None - assert f.getvalue() == b'' - - -def test_write_bytes_multi_buffer(): - long_val = (5) * 100 - expected = packb(long_val) - unpacker = Unpacker(io.BytesIO(expected), read_size=3, max_buffer_size=3) - - f = io.BytesIO() - unpacked = unpacker.unpack(f.write) - assert unpacked == long_val - assert f.getvalue() == expected From 2b5f59166beeccde0ee230c8673cf50932c8daba Mon Sep 17 00:00:00 2001 From: INADA Naoki Date: Wed, 14 Nov 2018 16:34:51 +0900 Subject: [PATCH 153/349] fallback: Fix warning stacklevel (#327) --- msgpack/fallback.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/msgpack/fallback.py b/msgpack/fallback.py index b9ef2969..04fb5b92 100644 --- a/msgpack/fallback.py +++ b/msgpack/fallback.py @@ -89,7 +89,7 @@ def _get_data_from_buffer(obj): warnings.warn("using old buffer interface to unpack %s; " "this leads to unpacking errors if slicing is used and " "will be removed in a future version" % type(obj), - RuntimeWarning) + RuntimeWarning, stacklevel=3) else: raise if view.itemsize != 1: @@ -100,7 +100,7 @@ def _get_data_from_buffer(obj): def unpack(stream, **kwargs): warnings.warn( "Direct calling implementation's unpack() is deprecated, Use msgpack.unpack() or unpackb() instead.", - DeprecationWarning) + DeprecationWarning, stacklevel=2) data = stream.read() return unpackb(data, **kwargs) @@ -226,7 +226,7 @@ def __init__(self, file_like=None, read_size=0, use_list=True, raw=True, if encoding is not None: warnings.warn( "encoding is deprecated, Use raw=False instead.", - DeprecationWarning) + DeprecationWarning, stacklevel=2) if unicode_errors is None: unicode_errors = 'strict' @@ -712,7 +712,7 @@ def __init__(self, default=None, encoding=None, unicode_errors=None, else: warnings.warn( "encoding is deprecated, Use raw=False instead.", - DeprecationWarning) + DeprecationWarning, stacklevel=2) if unicode_errors is None: unicode_errors = 'strict' From d782464c9150e448ab3a8d81197ff335e1ac2c2b Mon Sep 17 00:00:00 2001 From: INADA Naoki Date: Wed, 14 Nov 2018 16:35:37 +0900 Subject: [PATCH 154/349] Refactor Cython code (#328) _msgpack -> _cmsgpack --- .travis.yml | 2 +- Makefile | 2 +- appveyor.yml | 3 ++- docker/runtests.sh | 2 +- msgpack/__init__.py | 2 +- msgpack/_cmsgpack.pyx | 4 ++++ msgpack/_msgpack.pyx | 4 ---- msgpack/_packer.pyx | 10 +++------- msgpack/_unpacker.pyx | 22 +--------------------- setup.py | 6 +++--- 10 files changed, 17 insertions(+), 40 deletions(-) create mode 100644 msgpack/_cmsgpack.pyx delete mode 100644 msgpack/_msgpack.pyx diff --git a/.travis.yml b/.travis.yml index 822ca9ad..1adbdc25 100644 --- a/.travis.yml +++ b/.travis.yml @@ -45,7 +45,7 @@ install: script: - python -c 'import sys; print(hex(sys.maxsize))' - - python -c 'from msgpack import _msgpack' + - python -c 'from msgpack import _cmsgpack' - pytest -v test - MSGPACK_PUREPYTHON=x pytest -v test diff --git a/Makefile b/Makefile index ff9a482b..b65aa859 100644 --- a/Makefile +++ b/Makefile @@ -4,7 +4,7 @@ all: cython .PHONY: cython cython: - cython --cplus msgpack/_msgpack.pyx + cython --cplus msgpack/_cmsgpack.pyx .PHONY: test test: diff --git a/appveyor.yml b/appveyor.yml index 72b334a0..f0e21fc7 100644 --- a/appveyor.yml +++ b/appveyor.yml @@ -6,8 +6,9 @@ environment: install: # We need wheel installed to build wheels + - "%PYTHON%\\python.exe -m pip install -U pip" - "%PYTHON%\\python.exe -m pip install -U cython" - - "%PYTHON%\\Scripts\\cython --cplus msgpack/_packer.pyx msgpack/_unpacker.pyx" + - "%PYTHON%\\Scripts\\cython --cplus msgpack/_cmsgpack.pyx" build: off diff --git a/docker/runtests.sh b/docker/runtests.sh index f6c2c68e..c6bbf60f 100755 --- a/docker/runtests.sh +++ b/docker/runtests.sh @@ -8,7 +8,7 @@ for V in cp36-cp36m cp35-cp35m cp27-cp27m cp27-cp27mu; do $PYBIN/pip install pytest pushd test # prevent importing msgpack package in current directory. $PYBIN/python -c 'import sys; print(hex(sys.maxsize))' - $PYBIN/python -c 'from msgpack import _msgpack' # Ensure extension is available + $PYBIN/python -c 'from msgpack import _cmsgpack' # Ensure extension is available $PYBIN/pytest -v . popd done diff --git a/msgpack/__init__.py b/msgpack/__init__.py index 7c5d4c0c..7493c4c6 100644 --- a/msgpack/__init__.py +++ b/msgpack/__init__.py @@ -22,7 +22,7 @@ def __new__(cls, code, data): from msgpack.fallback import Packer, unpackb, Unpacker else: try: - from msgpack._msgpack import Packer, unpackb, Unpacker + from msgpack._cmsgpack import Packer, unpackb, Unpacker except ImportError: from msgpack.fallback import Packer, unpackb, Unpacker diff --git a/msgpack/_cmsgpack.pyx b/msgpack/_cmsgpack.pyx new file mode 100644 index 00000000..a48d5b51 --- /dev/null +++ b/msgpack/_cmsgpack.pyx @@ -0,0 +1,4 @@ +# coding: utf-8 +#cython: embedsignature=True, c_string_encoding=ascii, language_level=2 +include "_packer.pyx" +include "_unpacker.pyx" diff --git a/msgpack/_msgpack.pyx b/msgpack/_msgpack.pyx deleted file mode 100644 index 4381394f..00000000 --- a/msgpack/_msgpack.pyx +++ /dev/null @@ -1,4 +0,0 @@ -# coding: utf-8 -#cython: embedsignature=True, c_string_encoding=ascii -include "_packer.pyx" -include "_unpacker.pyx" diff --git a/msgpack/_packer.pyx b/msgpack/_packer.pyx index 2643f858..3be593fb 100644 --- a/msgpack/_packer.pyx +++ b/msgpack/_packer.pyx @@ -1,9 +1,7 @@ # coding: utf-8 -#cython: embedsignature=True, c_string_encoding=ascii from cpython cimport * -from cpython.version cimport PY_MAJOR_VERSION -from cpython.exc cimport PyErr_WarnEx +from cpython.bytearray cimport PyByteArray_Check, PyByteArray_CheckExact from msgpack import ExtType @@ -11,8 +9,6 @@ from msgpack import ExtType cdef extern from "Python.h": int PyMemoryView_Check(object obj) - int PyByteArray_Check(object obj) - int PyByteArray_CheckExact(object obj) char* PyUnicode_AsUTF8AndSize(object obj, Py_ssize_t *l) except NULL @@ -204,7 +200,7 @@ cdef class Packer(object): elif PyBytesLike_CheckExact(o) if strict_types else PyBytesLike_Check(o): L = len(o) if L > ITEM_LIMIT: - raise ValueError("%s is too large" % type(o).__name__) + PyErr_Format(ValueError, b"%.200s object is too large", Py_TYPE(o).tp_name) rawval = o ret = msgpack_pack_bin(&self.pk, L) if ret == 0: @@ -280,7 +276,7 @@ cdef class Packer(object): default_used = 1 continue else: - raise TypeError("can't serialize %r" % (o,)) + PyErr_Format(TypeError, b"can not serialize '%.200s' object", Py_TYPE(o).tp_name) return ret cpdef pack(self, object obj): diff --git a/msgpack/_unpacker.pyx b/msgpack/_unpacker.pyx index e168587b..aeebe2ac 100644 --- a/msgpack/_unpacker.pyx +++ b/msgpack/_unpacker.pyx @@ -1,26 +1,6 @@ # coding: utf-8 -#cython: embedsignature=True, c_string_encoding=ascii -from cpython.version cimport PY_MAJOR_VERSION -from cpython.bytes cimport ( - PyBytes_AsString, - PyBytes_FromStringAndSize, - PyBytes_Size, -) -from cpython.buffer cimport ( - Py_buffer, - PyObject_CheckBuffer, - PyObject_GetBuffer, - PyBuffer_Release, - PyBuffer_IsContiguous, - PyBUF_READ, - PyBUF_SIMPLE, - PyBUF_FULL_RO, -) -from cpython.mem cimport PyMem_Malloc, PyMem_Free -from cpython.object cimport PyCallable_Check -from cpython.ref cimport Py_DECREF -from cpython.exc cimport PyErr_WarnEx +from cpython cimport * cdef extern from "Python.h": ctypedef struct PyObject diff --git a/setup.py b/setup.py index 8b8f7bdf..eb9403f1 100755 --- a/setup.py +++ b/setup.py @@ -68,7 +68,7 @@ def build_extension(self, ext): if have_cython: class Sdist(sdist): def __init__(self, *args, **kwargs): - cythonize('msgpack/_msgpack.pyx') + cythonize('msgpack/_cmsgpack.pyx') sdist.__init__(self, *args, **kwargs) else: Sdist = sdist @@ -84,8 +84,8 @@ def __init__(self, *args, **kwargs): ext_modules = [] if not hasattr(sys, 'pypy_version_info'): - ext_modules.append(Extension('msgpack._msgpack', - sources=['msgpack/_msgpack.cpp'], + ext_modules.append(Extension('msgpack._cmsgpack', + sources=['msgpack/_cmsgpack.cpp'], libraries=libraries, include_dirs=['.'], define_macros=macros, From 2f808b6e012bdc506ca83a33c2f53af1b255a069 Mon Sep 17 00:00:00 2001 From: INADA Naoki Date: Wed, 14 Nov 2018 20:04:22 +0900 Subject: [PATCH 155/349] Try language_level=3 (#329) --- msgpack/_cmsgpack.pyx | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/msgpack/_cmsgpack.pyx b/msgpack/_cmsgpack.pyx index a48d5b51..8ebdbf58 100644 --- a/msgpack/_cmsgpack.pyx +++ b/msgpack/_cmsgpack.pyx @@ -1,4 +1,4 @@ # coding: utf-8 -#cython: embedsignature=True, c_string_encoding=ascii, language_level=2 +#cython: embedsignature=True, c_string_encoding=ascii, language_level=3 include "_packer.pyx" include "_unpacker.pyx" From 8b6ce53cce40e528af7cce89f358f7dde1a09289 Mon Sep 17 00:00:00 2001 From: INADA Naoki Date: Wed, 14 Nov 2018 21:06:16 +0900 Subject: [PATCH 156/349] s/iteritems/items/g (#330) --- msgpack/_packer.pyx | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/msgpack/_packer.pyx b/msgpack/_packer.pyx index 3be593fb..bfde043e 100644 --- a/msgpack/_packer.pyx +++ b/msgpack/_packer.pyx @@ -226,7 +226,7 @@ cdef class Packer(object): raise ValueError("dict is too large") ret = msgpack_pack_map(&self.pk, L) if ret == 0: - for k, v in d.iteritems(): + for k, v in d.items(): ret = self._pack(k, nest_limit-1) if ret != 0: break ret = self._pack(v, nest_limit-1) From 44254dd35e8aa3cfd6706e14effab117d7f22c25 Mon Sep 17 00:00:00 2001 From: INADA Naoki Date: Tue, 20 Nov 2018 13:12:49 +0900 Subject: [PATCH 157/349] Add StackError and FormatError (#331) --- ChangeLog.rst | 20 ++++++++++-------- Makefile | 3 ++- msgpack/_unpacker.pyx | 26 +++++++++++++++++++++-- msgpack/exceptions.py | 12 ++++++++++- msgpack/fallback.py | 43 ++++++++++++++++++++++++++++++++++----- msgpack/unpack_template.h | 29 +++++--------------------- test/test_except.py | 33 ++++++++++++++++++++++++------ 7 files changed, 119 insertions(+), 47 deletions(-) diff --git a/ChangeLog.rst b/ChangeLog.rst index cc6b5e4f..d39e76bf 100644 --- a/ChangeLog.rst +++ b/ChangeLog.rst @@ -5,21 +5,25 @@ Release Date: TBD Important changes ------------------- +----------------- -Extension modules are merged. There is ``msgpack._msgpack`` instead of -``msgpack._packer`` and ``msgpack._unpacker``. (#314) +* unpacker: Default size limits is smaller than before to avoid DoS attack. + If you need to handle large data, you need to specify limits manually. (#319) -unpacker: Default size limits is smaller than before to avoid DoS attack. -If you need to handle large data, you need to specify limits manually. +Other changes +------------- +* Extension modules are merged. There is ``msgpack._msgpack`` instead of + ``msgpack._packer`` and ``msgpack._unpacker``. (#314) -Other changes --------------- +* Add ``Unpacker.getbuffer()`` method. (#320) -Add ``Unpacker.getbuffer()`` method. +* unpacker: ``msgpack.StackError`` is raised when input data contains too + nested data. (#331) +* unpacker: ``msgpack.FormatError`` is raised when input data is not valid + msgpack format. (#331) 0.5.6 diff --git a/Makefile b/Makefile index b65aa859..5828ed4d 100644 --- a/Makefile +++ b/Makefile @@ -7,7 +7,8 @@ cython: cython --cplus msgpack/_cmsgpack.pyx .PHONY: test -test: +test: cython + pip install -e . pytest -v test MSGPACK_PUREPYTHON=1 pytest -v test diff --git a/msgpack/_unpacker.pyx b/msgpack/_unpacker.pyx index aeebe2ac..69330d3b 100644 --- a/msgpack/_unpacker.pyx +++ b/msgpack/_unpacker.pyx @@ -16,6 +16,8 @@ from msgpack.exceptions import ( BufferFull, OutOfData, ExtraData, + FormatError, + StackError, ) from msgpack import ExtType @@ -149,7 +151,11 @@ def unpackb(object packed, object object_hook=None, object list_hook=None, """ Unpack packed_bytes to object. Returns an unpacked object. - Raises `ValueError` when `packed` contains extra bytes. + Raises ``ExtraData`` when *packed* contains extra bytes. + Raises ``ValueError`` when *packed* is incomplete. + Raises ``FormatError`` when *packed* is not valid msgpack. + Raises ``StackError`` when *packed* contains too nested. + Other exceptions can be raised during unpacking. See :class:`Unpacker` for options. """ @@ -187,6 +193,12 @@ def unpackb(object packed, object object_hook=None, object list_hook=None, raise ExtraData(obj, PyBytes_FromStringAndSize(buf+off, buf_len-off)) return obj unpack_clear(&ctx) + if ret == 0: + raise ValueError("Unpack failed: incomplete input") + elif ret == -2: + raise FormatError + elif ret == -3: + raise StackError raise ValueError("Unpack failed: error = %d" % (ret,)) @@ -201,7 +213,7 @@ def unpack(object stream, **kwargs): cdef class Unpacker(object): """Streaming unpacker. - arguments: + Arguments: :param file_like: File-like object having `.read(n)` method. @@ -279,6 +291,12 @@ cdef class Unpacker(object): unpacker.feed(buf) for o in unpacker: process(o) + + Raises ``ExtraData`` when *packed* contains extra bytes. + Raises ``OutOfData`` when *packed* is incomplete. + Raises ``FormatError`` when *packed* is not valid msgpack. + Raises ``StackError`` when *packed* contains too nested. + Other exceptions can be raised during unpacking. """ cdef unpack_context ctx cdef char* buf @@ -451,6 +469,10 @@ cdef class Unpacker(object): raise StopIteration("No more data to unpack.") else: raise OutOfData("No more data to unpack.") + elif ret == -2: + raise FormatError + elif ret == -3: + raise StackError else: raise ValueError("Unpack failed: error = %d" % (ret,)) diff --git a/msgpack/exceptions.py b/msgpack/exceptions.py index 5bee5b2f..d6d2615c 100644 --- a/msgpack/exceptions.py +++ b/msgpack/exceptions.py @@ -6,6 +6,7 @@ class UnpackException(Exception): Exception instead. """ + class BufferFull(UnpackException): pass @@ -14,6 +15,14 @@ class OutOfData(UnpackException): pass +class FormatError(ValueError, UnpackException): + """Invalid msgpack format""" + + +class StackError(ValueError, UnpackException): + """Too nested""" + + # Deprecated. Use ValueError instead UnpackValueError = ValueError @@ -24,6 +33,7 @@ class ExtraData(UnpackValueError): This exception is raised while only one-shot (not streaming) unpack. """ + def __init__(self, unpacked, extra): self.unpacked = unpacked self.extra = extra @@ -32,7 +42,7 @@ def __str__(self): return "unpack(b) received extra data." -#Deprecated. Use Exception instead to catch all exception during packing. +# Deprecated. Use Exception instead to catch all exception during packing. PackException = Exception PackValueError = ValueError PackOverflowError = OverflowError diff --git a/msgpack/fallback.py b/msgpack/fallback.py index 04fb5b92..9c767a77 100644 --- a/msgpack/fallback.py +++ b/msgpack/fallback.py @@ -18,6 +18,16 @@ def dict_iteritems(d): def dict_iteritems(d): return d.iteritems() +if sys.version_info < (3, 5): + # Ugly hack... + RecursionError = RuntimeError + + def _is_recursionerror(e): + return len(e.args) == 1 and isinstance(e.args[0], str) and \ + e.args[0].startswith('maximum recursion depth exceeded') +else: + def _is_recursionerror(e): + return True if hasattr(sys, 'pypy_version_info'): # cStringIO is slow on PyPy, StringIO is faster. However: PyPy's own @@ -52,7 +62,10 @@ def getvalue(self): from msgpack.exceptions import ( BufferFull, OutOfData, - ExtraData) + ExtraData, + FormatError, + StackError, +) from msgpack import ExtType @@ -109,7 +122,12 @@ def unpackb(packed, **kwargs): """ Unpack an object from `packed`. - Raises `ExtraData` when `packed` contains extra bytes. + Raises ``ExtraData`` when *packed* contains extra bytes. + Raises ``ValueError`` when *packed* is incomplete. + Raises ``FormatError`` when *packed* is not valid msgpack. + Raises ``StackError`` when *packed* contains too nested. + Other exceptions can be raised during unpacking. + See :class:`Unpacker` for options. """ unpacker = Unpacker(None, **kwargs) @@ -117,7 +135,11 @@ def unpackb(packed, **kwargs): try: ret = unpacker._unpack() except OutOfData: - raise ValueError("Data is not enough.") + raise ValueError("Unpack failed: incomplete input") + except RecursionError as e: + if _is_recursionerror(e): + raise StackError + raise if unpacker._got_extradata(): raise ExtraData(ret, unpacker._get_extradata()) return ret @@ -211,6 +233,12 @@ class Unpacker(object): unpacker.feed(buf) for o in unpacker: process(o) + + Raises ``ExtraData`` when *packed* contains extra bytes. + Raises ``OutOfData`` when *packed* is incomplete. + Raises ``FormatError`` when *packed* is not valid msgpack. + Raises ``StackError`` when *packed* contains too nested. + Other exceptions can be raised during unpacking. """ def __init__(self, file_like=None, read_size=0, use_list=True, raw=True, @@ -561,7 +589,7 @@ def _read_header(self, execute=EX_CONSTRUCT): raise ValueError("%s exceeds max_map_len(%s)", n, self._max_map_len) typ = TYPE_MAP else: - raise ValueError("Unknown header: 0x%x" % b) + raise FormatError("Unknown header: 0x%x" % b) return typ, n, obj def _unpack(self, execute=EX_CONSTRUCT): @@ -637,6 +665,8 @@ def __next__(self): except OutOfData: self._consume() raise StopIteration + except RecursionError: + raise StackError next = __next__ @@ -645,7 +675,10 @@ def skip(self): self._consume() def unpack(self): - ret = self._unpack(EX_CONSTRUCT) + try: + ret = self._unpack(EX_CONSTRUCT) + except RecursionError: + raise StackError self._consume() return ret diff --git a/msgpack/unpack_template.h b/msgpack/unpack_template.h index 525dea24..a78b7fa7 100644 --- a/msgpack/unpack_template.h +++ b/msgpack/unpack_template.h @@ -123,7 +123,7 @@ static inline int unpack_execute(unpack_context* ctx, const char* data, Py_ssize goto _fixed_trail_again #define start_container(func, count_, ct_) \ - if(top >= MSGPACK_EMBED_STACK_SIZE) { goto _failed; } /* FIXME */ \ + if(top >= MSGPACK_EMBED_STACK_SIZE) { ret = -3; goto _end; } \ if(construct_cb(func)(user, count_, &stack[top].obj) < 0) { goto _failed; } \ if((count_) == 0) { obj = stack[top].obj; \ if (construct_cb(func##_end)(user, &obj) < 0) { goto _failed; } \ @@ -132,27 +132,6 @@ static inline int unpack_execute(unpack_context* ctx, const char* data, Py_ssize stack[top].size = count_; \ stack[top].count = 0; \ ++top; \ - /*printf("container %d count %d stack %d\n",stack[top].obj,count_,top);*/ \ - /*printf("stack push %d\n", top);*/ \ - /* FIXME \ - if(top >= stack_size) { \ - if(stack_size == MSGPACK_EMBED_STACK_SIZE) { \ - size_t csize = sizeof(unpack_stack) * MSGPACK_EMBED_STACK_SIZE; \ - size_t nsize = csize * 2; \ - unpack_stack* tmp = (unpack_stack*)malloc(nsize); \ - if(tmp == NULL) { goto _failed; } \ - memcpy(tmp, ctx->stack, csize); \ - ctx->stack = stack = tmp; \ - ctx->stack_size = stack_size = MSGPACK_EMBED_STACK_SIZE * 2; \ - } else { \ - size_t nsize = sizeof(unpack_stack) * ctx->stack_size * 2; \ - unpack_stack* tmp = (unpack_stack*)realloc(ctx->stack, nsize); \ - if(tmp == NULL) { goto _failed; } \ - ctx->stack = stack = tmp; \ - ctx->stack_size = stack_size = stack_size * 2; \ - } \ - } \ - */ \ goto _header_again #define NEXT_CS(p) ((unsigned int)*p & 0x1f) @@ -229,7 +208,8 @@ static inline int unpack_execute(unpack_context* ctx, const char* data, Py_ssize case 0xdf: // map 32 again_fixed_trail(NEXT_CS(p), 2 << (((unsigned int)*p) & 0x01)); default: - goto _failed; + ret = -2; + goto _end; } SWITCH_RANGE(0xa0, 0xbf) // FixRaw again_fixed_trail_if_zero(ACS_RAW_VALUE, ((unsigned int)*p & 0x1f), _raw_zero); @@ -239,7 +219,8 @@ static inline int unpack_execute(unpack_context* ctx, const char* data, Py_ssize start_container(_map, ((unsigned int)*p) & 0x0f, CT_MAP_KEY); SWITCH_RANGE_DEFAULT - goto _failed; + ret = -2; + goto _end; SWITCH_RANGE_END // end CS_HEADER diff --git a/test/test_except.py b/test/test_except.py index 361d4ea3..626c8be0 100644 --- a/test/test_except.py +++ b/test/test_except.py @@ -2,7 +2,7 @@ # coding: utf-8 from pytest import raises -from msgpack import packb, unpackb +from msgpack import packb, unpackb, Unpacker, FormatError, StackError, OutOfData import datetime @@ -19,13 +19,34 @@ def test_raise_on_find_unsupported_value(): def test_raise_from_object_hook(): def hook(obj): raise DummyException + raises(DummyException, unpackb, packb({}), object_hook=hook) - raises(DummyException, unpackb, packb({'fizz': 'buzz'}), object_hook=hook) - raises(DummyException, unpackb, packb({'fizz': 'buzz'}), object_pairs_hook=hook) - raises(DummyException, unpackb, packb({'fizz': {'buzz': 'spam'}}), object_hook=hook) - raises(DummyException, unpackb, packb({'fizz': {'buzz': 'spam'}}), object_pairs_hook=hook) + raises(DummyException, unpackb, packb({"fizz": "buzz"}), object_hook=hook) + raises(DummyException, unpackb, packb({"fizz": "buzz"}), object_pairs_hook=hook) + raises(DummyException, unpackb, packb({"fizz": {"buzz": "spam"}}), object_hook=hook) + raises( + DummyException, + unpackb, + packb({"fizz": {"buzz": "spam"}}), + object_pairs_hook=hook, + ) def test_invalidvalue(): + incomplete = b"\xd9\x97#DL_" # raw8 - length=0x97 with raises(ValueError): - unpackb(b'\xd9\x97#DL_') + unpackb(incomplete) + + with raises(OutOfData): + unpacker = Unpacker() + unpacker.feed(incomplete) + unpacker.unpack() + + with raises(FormatError): + unpackb(b"\xc1") # (undefined tag) + + with raises(FormatError): + unpackb(b"\x91\xc1") # fixarray(len=1) [ (undefined tag) ] + + with raises(StackError): + unpackb(b"\x91" * 3000) # nested fixarray(len=1) From ab2415eaa0cdbe8e5b6e248d447cf5e66e858eb2 Mon Sep 17 00:00:00 2001 From: jkorvin Date: Tue, 20 Nov 2018 09:24:35 +0300 Subject: [PATCH 158/349] Unpacker: allow to use buffer with size greater than 2 GB (#332) --- msgpack/_unpacker.pyx | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/msgpack/_unpacker.pyx b/msgpack/_unpacker.pyx index 69330d3b..a5403d83 100644 --- a/msgpack/_unpacker.pyx +++ b/msgpack/_unpacker.pyx @@ -320,7 +320,7 @@ cdef class Unpacker(object): def __init__(self, file_like=None, Py_ssize_t read_size=0, bint use_list=True, bint raw=True, object object_hook=None, object object_pairs_hook=None, object list_hook=None, - encoding=None, unicode_errors=None, int max_buffer_size=0, + encoding=None, unicode_errors=None, Py_ssize_t max_buffer_size=0, object ext_hook=ExtType, Py_ssize_t max_str_len=1024*1024, Py_ssize_t max_bin_len=1024*1024, From 3c9c6edbc88908fceb3c69ff3d6455be8b5914c8 Mon Sep 17 00:00:00 2001 From: INADA Naoki Date: Tue, 20 Nov 2018 15:48:44 +0900 Subject: [PATCH 159/349] Update README --- ChangeLog.rst | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/ChangeLog.rst b/ChangeLog.rst index d39e76bf..8bc80e65 100644 --- a/ChangeLog.rst +++ b/ChangeLog.rst @@ -7,15 +7,21 @@ Release Date: TBD Important changes ----------------- -* unpacker: Default size limits is smaller than before to avoid DoS attack. +* unpacker: Default value of input limits are smaller than before to avoid DoS attack. If you need to handle large data, you need to specify limits manually. (#319) +* Unpacker doesn't wrap underlaying ``ValueError`` (including ``UnicodeError``) into + ``UnpackValueError``. If you want to catch all exception during unpack, you need + to use ``try ... except Exception`` with minimum try code block. (#323, #233) + +* ``PackValueError`` and ``PackOverflowError`` are also removed. You need to catch + normal ``ValueError`` and ``OverflowError``. (#323, #233) Other changes ------------- -* Extension modules are merged. There is ``msgpack._msgpack`` instead of - ``msgpack._packer`` and ``msgpack._unpacker``. (#314) +* Extension modules are merged. There is ``msgpack._cmsgpack`` instead of + ``msgpack._packer`` and ``msgpack._unpacker``. (#314, #328) * Add ``Unpacker.getbuffer()`` method. (#320) From e9086a34e4b3d64df78314339f152c800e79c8e1 Mon Sep 17 00:00:00 2001 From: Inada Naoki Date: Thu, 29 Nov 2018 22:29:38 +0900 Subject: [PATCH 160/349] Add strict_map_key option to unpacker --- msgpack/_unpacker.pyx | 17 ++++++++++++----- msgpack/unpack.h | 5 +++++ test/test_except.py | 11 +++++++++++ 3 files changed, 28 insertions(+), 5 deletions(-) diff --git a/msgpack/_unpacker.pyx b/msgpack/_unpacker.pyx index a5403d83..2163425b 100644 --- a/msgpack/_unpacker.pyx +++ b/msgpack/_unpacker.pyx @@ -27,6 +27,7 @@ cdef extern from "unpack.h": bint use_list bint raw bint has_pairs_hook # call object_hook with k-v pairs + bint strict_map_key PyObject* object_hook PyObject* list_hook PyObject* ext_hook @@ -56,7 +57,7 @@ cdef extern from "unpack.h": cdef inline init_ctx(unpack_context *ctx, object object_hook, object object_pairs_hook, object list_hook, object ext_hook, - bint use_list, bint raw, + bint use_list, bint raw, bint strict_map_key, const char* encoding, const char* unicode_errors, Py_ssize_t max_str_len, Py_ssize_t max_bin_len, Py_ssize_t max_array_len, Py_ssize_t max_map_len, @@ -64,6 +65,7 @@ cdef inline init_ctx(unpack_context *ctx, unpack_init(ctx) ctx.user.use_list = use_list ctx.user.raw = raw + ctx.user.strict_map_key = strict_map_key ctx.user.object_hook = ctx.user.list_hook = NULL ctx.user.max_str_len = max_str_len ctx.user.max_bin_len = max_bin_len @@ -140,7 +142,7 @@ cdef inline int get_data_from_buffer(object obj, return 1 def unpackb(object packed, object object_hook=None, object list_hook=None, - bint use_list=True, bint raw=True, + bint use_list=True, bint raw=True, bint strict_map_key=False, encoding=None, unicode_errors=None, object_pairs_hook=None, ext_hook=ExtType, Py_ssize_t max_str_len=1024*1024, @@ -180,7 +182,7 @@ def unpackb(object packed, object object_hook=None, object list_hook=None, get_data_from_buffer(packed, &view, &buf, &buf_len, &new_protocol) try: init_ctx(&ctx, object_hook, object_pairs_hook, list_hook, ext_hook, - use_list, raw, cenc, cerr, + use_list, raw, strict_map_key, cenc, cerr, max_str_len, max_bin_len, max_array_len, max_map_len, max_ext_len) ret = unpack_construct(&ctx, buf, buf_len, &off) finally: @@ -236,6 +238,11 @@ cdef class Unpacker(object): *encoding* option which is deprecated overrides this option. + :param bool strict_map_key: + If true, only str or bytes are accepted for map (dict) keys. + It's False by default for backward-compatibility. + But it will be True from msgpack 1.0. + :param callable object_hook: When specified, it should be callable. Unpacker calls it with a dict argument after unpacking msgpack map. @@ -318,7 +325,7 @@ cdef class Unpacker(object): self.buf = NULL def __init__(self, file_like=None, Py_ssize_t read_size=0, - bint use_list=True, bint raw=True, + bint use_list=True, bint raw=True, bint strict_map_key=False, object object_hook=None, object object_pairs_hook=None, object list_hook=None, encoding=None, unicode_errors=None, Py_ssize_t max_buffer_size=0, object ext_hook=ExtType, @@ -366,7 +373,7 @@ cdef class Unpacker(object): cerr = unicode_errors init_ctx(&self.ctx, object_hook, object_pairs_hook, list_hook, - ext_hook, use_list, raw, cenc, cerr, + ext_hook, use_list, raw, strict_map_key, cenc, cerr, max_str_len, max_bin_len, max_array_len, max_map_len, max_ext_len) diff --git a/msgpack/unpack.h b/msgpack/unpack.h index 63e5543b..85dbbed5 100644 --- a/msgpack/unpack.h +++ b/msgpack/unpack.h @@ -23,6 +23,7 @@ typedef struct unpack_user { bool use_list; bool raw; bool has_pairs_hook; + bool strict_map_key; PyObject *object_hook; PyObject *list_hook; PyObject *ext_hook; @@ -188,6 +189,10 @@ static inline int unpack_callback_map(unpack_user* u, unsigned int n, msgpack_un static inline int unpack_callback_map_item(unpack_user* u, unsigned int current, msgpack_unpack_object* c, msgpack_unpack_object k, msgpack_unpack_object v) { + if (u->strict_map_key && !PyUnicode_CheckExact(k) && !PyBytes_CheckExact(k)) { + PyErr_Format(PyExc_ValueError, "%.100s is not allowed for map key", Py_TYPE(k)->tp_name); + return -1; + } if (u->has_pairs_hook) { msgpack_unpack_object item = PyTuple_Pack(2, k, v); if (!item) diff --git a/test/test_except.py b/test/test_except.py index 626c8be0..40ca3ee0 100644 --- a/test/test_except.py +++ b/test/test_except.py @@ -50,3 +50,14 @@ def test_invalidvalue(): with raises(StackError): unpackb(b"\x91" * 3000) # nested fixarray(len=1) + + +def test_strict_map_key(): + valid = {u"unicode": 1, b"bytes": 2} + packed = packb(valid, use_bin_type=True) + assert valid == unpackb(packed, raw=True) + + invalid = {42: 1} + packed = packb(invalid, use_bin_type=True) + with raises(ValueError): + unpackb(packed, raw=True) From dc1b9930793ea57aa4e2a9773a23582b5483c53a Mon Sep 17 00:00:00 2001 From: Inada Naoki Date: Thu, 29 Nov 2018 22:35:12 +0900 Subject: [PATCH 161/349] Implement strict_map_key to fallback unpacker. --- msgpack/fallback.py | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/msgpack/fallback.py b/msgpack/fallback.py index 9c767a77..be60cca8 100644 --- a/msgpack/fallback.py +++ b/msgpack/fallback.py @@ -179,6 +179,11 @@ class Unpacker(object): *encoding* option which is deprecated overrides this option. + :param bool strict_map_key: + If true, only str or bytes are accepted for map (dict) keys. + It's False by default for backward-compatibility. + But it will be True from msgpack 1.0. + :param callable object_hook: When specified, it should be callable. Unpacker calls it with a dict argument after unpacking msgpack map. @@ -241,7 +246,7 @@ class Unpacker(object): Other exceptions can be raised during unpacking. """ - def __init__(self, file_like=None, read_size=0, use_list=True, raw=True, + def __init__(self, file_like=None, read_size=0, use_list=True, raw=True, strict_map_key=False, object_hook=None, object_pairs_hook=None, list_hook=None, encoding=None, unicode_errors=None, max_buffer_size=0, ext_hook=ExtType, @@ -286,6 +291,7 @@ def __init__(self, file_like=None, read_size=0, use_list=True, raw=True, raise ValueError("read_size must be smaller than max_buffer_size") self._read_size = read_size or min(self._max_buffer_size, 16*1024) self._raw = bool(raw) + self._strict_map_key = bool(strict_map_key) self._encoding = encoding self._unicode_errors = unicode_errors self._use_list = use_list @@ -633,6 +639,8 @@ def _unpack(self, execute=EX_CONSTRUCT): ret = {} for _ in xrange(n): key = self._unpack(EX_CONSTRUCT) + if self._strict_map_key and type(key) not in (unicode, bytes): + raise ValueError("%s is not allowed for map key" % str(type(key))) ret[key] = self._unpack(EX_CONSTRUCT) if self._object_hook is not None: ret = self._object_hook(ret) From e76091a82c04d092a363b0e1f7307dc70bf784f5 Mon Sep 17 00:00:00 2001 From: Inada Naoki Date: Thu, 29 Nov 2018 22:38:22 +0900 Subject: [PATCH 162/349] Fix test --- test/test_except.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/test/test_except.py b/test/test_except.py index 40ca3ee0..01961d27 100644 --- a/test/test_except.py +++ b/test/test_except.py @@ -55,9 +55,9 @@ def test_invalidvalue(): def test_strict_map_key(): valid = {u"unicode": 1, b"bytes": 2} packed = packb(valid, use_bin_type=True) - assert valid == unpackb(packed, raw=True) + assert valid == unpackb(packed, raw=True, strict_map_key=True) invalid = {42: 1} packed = packb(invalid, use_bin_type=True) with raises(ValueError): - unpackb(packed, raw=True) + unpackb(packed, raw=True, strict_map_key=True) From ab789813b8e5786f404e9132ff1c6f2647dc4afa Mon Sep 17 00:00:00 2001 From: Inada Naoki Date: Fri, 30 Nov 2018 11:36:15 +0900 Subject: [PATCH 163/349] Fix test --- test/test_except.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/test/test_except.py b/test/test_except.py index 01961d27..5544f2bc 100644 --- a/test/test_except.py +++ b/test/test_except.py @@ -55,9 +55,9 @@ def test_invalidvalue(): def test_strict_map_key(): valid = {u"unicode": 1, b"bytes": 2} packed = packb(valid, use_bin_type=True) - assert valid == unpackb(packed, raw=True, strict_map_key=True) + assert valid == unpackb(packed, raw=False, strict_map_key=True) invalid = {42: 1} packed = packb(invalid, use_bin_type=True) with raises(ValueError): - unpackb(packed, raw=True, strict_map_key=True) + unpackb(packed, raw=False, strict_map_key=True) From 8ae6320072e746fad29bc14a095569811e009695 Mon Sep 17 00:00:00 2001 From: Inada Naoki Date: Fri, 30 Nov 2018 11:42:51 +0900 Subject: [PATCH 164/349] Fix fallback --- msgpack/fallback.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/msgpack/fallback.py b/msgpack/fallback.py index be60cca8..ae2fcfc3 100644 --- a/msgpack/fallback.py +++ b/msgpack/fallback.py @@ -639,7 +639,7 @@ def _unpack(self, execute=EX_CONSTRUCT): ret = {} for _ in xrange(n): key = self._unpack(EX_CONSTRUCT) - if self._strict_map_key and type(key) not in (unicode, bytes): + if self._strict_map_key and type(key) not in (Unicode, bytes): raise ValueError("%s is not allowed for map key" % str(type(key))) ret[key] = self._unpack(EX_CONSTRUCT) if self._object_hook is not None: From 04cf8fc7f4b9e8dd32d809cd2c45b05b83d7f913 Mon Sep 17 00:00:00 2001 From: Inada Naoki Date: Fri, 30 Nov 2018 14:04:18 +0900 Subject: [PATCH 165/349] Update ChangeLog --- ChangeLog.rst | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/ChangeLog.rst b/ChangeLog.rst index 8bc80e65..806007e9 100644 --- a/ChangeLog.rst +++ b/ChangeLog.rst @@ -17,6 +17,11 @@ Important changes * ``PackValueError`` and ``PackOverflowError`` are also removed. You need to catch normal ``ValueError`` and ``OverflowError``. (#323, #233) +* Unpacker has ``strict_map_key`` option now. When it is true, only bytes and str + (unicode in Python 2) are allowed for map keys. It is recommended to avoid + hashdos. Default value of this option is False for backward compatibility reason. + But it will be changed True in 1.0. (#296, #334) + Other changes ------------- From 93b5953eae11fa3a8668de16a3ccbf20d7bf0fd9 Mon Sep 17 00:00:00 2001 From: Inada Naoki Date: Fri, 30 Nov 2018 16:05:31 +0900 Subject: [PATCH 166/349] Update tox.ini --- tox.ini | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/tox.ini b/tox.ini index 68a2f53a..0945a6d6 100644 --- a/tox.ini +++ b/tox.ini @@ -1,5 +1,5 @@ [tox] -envlist = {py27,py35,py36}-{c,pure},{pypy,pypy3}-pure,py27-x86,py34-x86 +envlist = {py27,py35,py36,py37}-{c,pure},{pypy,pypy3}-pure,py27-x86,py34-x86 [variants:pure] setenv= @@ -11,7 +11,7 @@ deps= changedir=test commands= - c,x86: python -c 'from msgpack import _packer, _unpacker' + c,x86: python -c 'from msgpack import _cmsgpack' c,x86: py.test pure: py.test @@ -23,7 +23,7 @@ deps= changedir=test commands= python -c 'import sys; print(hex(sys.maxsize))' - python -c 'from msgpack import _packer, _unpacker' + python -c 'from msgpack import _cmsgpack' py.test [testenv:py34-x86] @@ -34,5 +34,5 @@ deps= changedir=test commands= python -c 'import sys; print(hex(sys.maxsize))' - python -c 'from msgpack import _packer, _unpacker' + python -c 'from msgpack import _cmsgpack' py.test From bbdfd4d92e54e89604a2ebf6af86ced4ae5ae05d Mon Sep 17 00:00:00 2001 From: Inada Naoki Date: Fri, 30 Nov 2018 16:28:41 +0900 Subject: [PATCH 167/349] cleanup --- msgpack/fallback.py | 26 +++++++++++++------------- 1 file changed, 13 insertions(+), 13 deletions(-) diff --git a/msgpack/fallback.py b/msgpack/fallback.py index ae2fcfc3..57b436a7 100644 --- a/msgpack/fallback.py +++ b/msgpack/fallback.py @@ -4,19 +4,19 @@ import struct import warnings -if sys.version_info[0] == 3: - PY3 = True + +if sys.version_info[0] == 2: + PY2 = True + int_types = (int, long) + def dict_iteritems(d): + return d.iteritems() +else: + PY2 = False int_types = int - Unicode = str + unicode = str xrange = range def dict_iteritems(d): return d.items() -else: - PY3 = False - int_types = (int, long) - Unicode = unicode - def dict_iteritems(d): - return d.iteritems() if sys.version_info < (3, 5): # Ugly hack... @@ -97,7 +97,7 @@ def _get_data_from_buffer(obj): view = memoryview(obj) except TypeError: # try to use legacy buffer protocol if 2.7, otherwise re-raise - if not PY3: + if PY2: view = memoryview(buffer(obj)) warnings.warn("using old buffer interface to unpack %s; " "this leads to unpacking errors if slicing is used and " @@ -639,7 +639,7 @@ def _unpack(self, execute=EX_CONSTRUCT): ret = {} for _ in xrange(n): key = self._unpack(EX_CONSTRUCT) - if self._strict_map_key and type(key) not in (Unicode, bytes): + if self._strict_map_key and type(key) not in (unicode, bytes): raise ValueError("%s is not allowed for map key" % str(type(key))) ret[key] = self._unpack(EX_CONSTRUCT) if self._object_hook is not None: @@ -819,7 +819,7 @@ def _pack(self, obj, nest_limit=DEFAULT_RECURSE_LIMIT, raise ValueError("%s is too large" % type(obj).__name__) self._pack_bin_header(n) return self._buffer.write(obj) - if check(obj, Unicode): + if check(obj, unicode): if self._encoding is None: raise TypeError( "Can't encode unicode string: " @@ -1006,7 +1006,7 @@ def reset(self): def getbuffer(self): """Return view of internal buffer.""" - if USING_STRINGBUILDER or not PY3: + if USING_STRINGBUILDER or PY2: return memoryview(self.bytes()) else: return self._buffer.getbuffer() From cc7fd5722b779d438f7d226a9c7f61115764b39c Mon Sep 17 00:00:00 2001 From: Inada Naoki Date: Fri, 30 Nov 2018 19:03:44 +0900 Subject: [PATCH 168/349] 0.6.0 --- ChangeLog.rst | 3 ++- msgpack/_version.py | 2 +- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/ChangeLog.rst b/ChangeLog.rst index 806007e9..651ba62d 100644 --- a/ChangeLog.rst +++ b/ChangeLog.rst @@ -1,8 +1,9 @@ 0.6.0 ====== -Release Date: TBD +Release Date: 2018-11-30 +This release contains some backward incompatible changes for security reason (DoS). Important changes ----------------- diff --git a/msgpack/_version.py b/msgpack/_version.py index 0952ec6a..49a41034 100644 --- a/msgpack/_version.py +++ b/msgpack/_version.py @@ -1 +1 @@ -version = (0, 6, 0, 'dev') +version = (0, 6, 0) From b1d658e7a0aed4e723f9b6b238cc9f2f876e54d7 Mon Sep 17 00:00:00 2001 From: Inada Naoki Date: Fri, 30 Nov 2018 19:25:14 +0900 Subject: [PATCH 169/349] AppVeyor: Add Python 3.7 and remove 3.6 --- appveyor.yml | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/appveyor.yml b/appveyor.yml index f0e21fc7..bd0800ae 100644 --- a/appveyor.yml +++ b/appveyor.yml @@ -21,14 +21,14 @@ test_script: - ci\\runtests.bat - set PYTHON="C:\\Python27-x64" - ci\\runtests.bat - - set PYTHON="C:\\Python35" - - ci\\runtests.bat - - set PYTHON="C:\\Python35-x64" - - ci\\runtests.bat - set PYTHON="C:\\Python36" - ci\\runtests.bat - set PYTHON="C:\\Python36-x64" - ci\\runtests.bat + - set PYTHON="C:\\Python37" + - ci\\runtests.bat + - set PYTHON="C:\\Python37-x64" + - ci\\runtests.bat after_test: # This step builds your wheels. From b8bf3c950c4474d3af82a6b6bda6326b2e197a5e Mon Sep 17 00:00:00 2001 From: Inada Naoki Date: Tue, 4 Dec 2018 17:18:34 +0900 Subject: [PATCH 170/349] Build linux wheel for Python 3.7 --- docker/buildwheel.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docker/buildwheel.sh b/docker/buildwheel.sh index f586a8dd..d8c74cb8 100644 --- a/docker/buildwheel.sh +++ b/docker/buildwheel.sh @@ -4,7 +4,7 @@ set -e -x ARCH=`uname -p` echo "arch=$ARCH" -for V in cp36-cp36m cp35-cp35m cp27-cp27m cp27-cp27mu; do +for V in cp37-cp37m cp36-cp36m cp35-cp35m cp27-cp27m cp27-cp27mu; do PYBIN=/opt/python/$V/bin rm -rf build/ # Avoid lib build by narrow Python is used by wide python $PYBIN/python setup.py bdist_wheel -p manylinux1_${ARCH} From 197e30723a242e3ba31f9634b3263ae4cb2937b1 Mon Sep 17 00:00:00 2001 From: Inada Naoki Date: Tue, 4 Dec 2018 20:10:21 +0900 Subject: [PATCH 171/349] Fix docstring --- msgpack/_unpacker.pyx | 2 +- msgpack/fallback.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/msgpack/_unpacker.pyx b/msgpack/_unpacker.pyx index 2163425b..4ea05459 100644 --- a/msgpack/_unpacker.pyx +++ b/msgpack/_unpacker.pyx @@ -271,7 +271,7 @@ cdef class Unpacker(object): Limits max length of map. (default: 32*1024) :param int max_ext_len: - Limits max length of map. (default: 1024*1024) + Limits max size of ext type. (default: 1024*1024) :param str encoding: Deprecated, use raw instead. diff --git a/msgpack/fallback.py b/msgpack/fallback.py index 57b436a7..4567e2dd 100644 --- a/msgpack/fallback.py +++ b/msgpack/fallback.py @@ -220,7 +220,7 @@ class Unpacker(object): Limits max length of map. (default: 32*1024) :param int max_ext_len: - Limits max length of map. (default: 1024*1024) + Limits max size of ext type. (default: 1024*1024) example of streaming deserialize from file-like object:: From f46523b1af7ff2d408da8500ea36a4f9f2abe915 Mon Sep 17 00:00:00 2001 From: INADA Naoki Date: Mon, 7 Jan 2019 21:10:40 +0900 Subject: [PATCH 172/349] use _PyFloat APIs to (de)serialize (#340) --- msgpack/pack_template.h | 13 +++---------- msgpack/unpack_template.h | 14 ++++---------- 2 files changed, 7 insertions(+), 20 deletions(-) diff --git a/msgpack/pack_template.h b/msgpack/pack_template.h index 5d1088f4..69982f4d 100644 --- a/msgpack/pack_template.h +++ b/msgpack/pack_template.h @@ -566,24 +566,17 @@ if(sizeof(unsigned long long) == 2) { static inline int msgpack_pack_float(msgpack_packer* x, float d) { - union { float f; uint32_t i; } mem; - mem.f = d; unsigned char buf[5]; - buf[0] = 0xca; _msgpack_store32(&buf[1], mem.i); + buf[0] = 0xca; + _PyFloat_Pack4(d, &buf[1], 0); msgpack_pack_append_buffer(x, buf, 5); } static inline int msgpack_pack_double(msgpack_packer* x, double d) { - union { double f; uint64_t i; } mem; - mem.f = d; unsigned char buf[9]; buf[0] = 0xcb; -#if defined(__arm__) && !(__ARM_EABI__) // arm-oabi - // https://github.com/msgpack/msgpack-perl/pull/1 - mem.i = (mem.i & 0xFFFFFFFFUL) << 32UL | (mem.i >> 32UL); -#endif - _msgpack_store64(&buf[1], mem.i); + _PyFloat_Pack8(d, &buf[1], 0); msgpack_pack_append_buffer(x, buf, 9); } diff --git a/msgpack/unpack_template.h b/msgpack/unpack_template.h index a78b7fa7..9924b9c6 100644 --- a/msgpack/unpack_template.h +++ b/msgpack/unpack_template.h @@ -243,17 +243,11 @@ static inline int unpack_execute(unpack_context* ctx, const char* data, Py_ssize _msgpack_load32(uint32_t,n)+1, _ext_zero); case CS_FLOAT: { - union { uint32_t i; float f; } mem; - mem.i = _msgpack_load32(uint32_t,n); - push_fixed_value(_float, mem.f); } + double f = _PyFloat_Unpack4((unsigned char*)n, 0); + push_fixed_value(_float, f); } case CS_DOUBLE: { - union { uint64_t i; double f; } mem; - mem.i = _msgpack_load64(uint64_t,n); -#if defined(__arm__) && !(__ARM_EABI__) // arm-oabi - // https://github.com/msgpack/msgpack-perl/pull/1 - mem.i = (mem.i & 0xFFFFFFFFUL) << 32UL | (mem.i >> 32UL); -#endif - push_fixed_value(_double, mem.f); } + double f = _PyFloat_Unpack8((unsigned char*)n, 0); + push_fixed_value(_double, f); } case CS_UINT_8: push_fixed_value(_uint8, *(uint8_t*)n); case CS_UINT_16: From 28b5f46a34933cc177aca333203d1344b5e3639a Mon Sep 17 00:00:00 2001 From: Inada Naoki Date: Thu, 24 Jan 2019 18:46:39 +0900 Subject: [PATCH 173/349] Auto limit configuration (#342) --- msgpack/_unpacker.pyx | 56 +++++++++++++++++++++++++++++++------------ msgpack/fallback.py | 39 ++++++++++++++++++++---------- test/test_limits.py | 25 ++++++++++++++++++- 3 files changed, 92 insertions(+), 28 deletions(-) diff --git a/msgpack/_unpacker.pyx b/msgpack/_unpacker.pyx index 4ea05459..38119c05 100644 --- a/msgpack/_unpacker.pyx +++ b/msgpack/_unpacker.pyx @@ -145,11 +145,11 @@ def unpackb(object packed, object object_hook=None, object list_hook=None, bint use_list=True, bint raw=True, bint strict_map_key=False, encoding=None, unicode_errors=None, object_pairs_hook=None, ext_hook=ExtType, - Py_ssize_t max_str_len=1024*1024, - Py_ssize_t max_bin_len=1024*1024, - Py_ssize_t max_array_len=128*1024, - Py_ssize_t max_map_len=32*1024, - Py_ssize_t max_ext_len=1024*1024): + Py_ssize_t max_str_len=-1, + Py_ssize_t max_bin_len=-1, + Py_ssize_t max_array_len=-1, + Py_ssize_t max_map_len=-1, + Py_ssize_t max_ext_len=-1): """ Unpack packed_bytes to object. Returns an unpacked object. @@ -160,6 +160,8 @@ def unpackb(object packed, object object_hook=None, object list_hook=None, Other exceptions can be raised during unpacking. See :class:`Unpacker` for options. + + *max_xxx_len* options are configured automatically from ``len(packed)``. """ cdef unpack_context ctx cdef Py_ssize_t off = 0 @@ -180,6 +182,18 @@ def unpackb(object packed, object object_hook=None, object list_hook=None, cerr = unicode_errors get_data_from_buffer(packed, &view, &buf, &buf_len, &new_protocol) + + if max_str_len == -1: + max_str_len = buf_len + if max_bin_len == -1: + max_bin_len = buf_len + if max_array_len == -1: + max_array_len = buf_len + if max_map_len == -1: + max_map_len = buf_len//2 + if max_ext_len == -1: + max_ext_len = buf_len + try: init_ctx(&ctx, object_hook, object_pairs_hook, list_hook, ext_hook, use_list, raw, strict_map_key, cenc, cerr, @@ -259,19 +273,19 @@ cdef class Unpacker(object): You should set this parameter when unpacking data from untrusted source. :param int max_str_len: - Limits max length of str. (default: 1024*1024) + Limits max length of str. (default: max_buffer_size or 1024*1024) :param int max_bin_len: - Limits max length of bin. (default: 1024*1024) + Limits max length of bin. (default: max_buffer_size or 1024*1024) :param int max_array_len: - Limits max length of array. (default: 128*1024) + Limits max length of array. (default: max_buffer_size or 128*1024) :param int max_map_len: - Limits max length of map. (default: 32*1024) + Limits max length of map. (default: max_buffer_size//2 or 32*1024) :param int max_ext_len: - Limits max size of ext type. (default: 1024*1024) + Limits max size of ext type. (default: max_buffer_size or 1024*1024) :param str encoding: Deprecated, use raw instead. @@ -329,11 +343,11 @@ cdef class Unpacker(object): object object_hook=None, object object_pairs_hook=None, object list_hook=None, encoding=None, unicode_errors=None, Py_ssize_t max_buffer_size=0, object ext_hook=ExtType, - Py_ssize_t max_str_len=1024*1024, - Py_ssize_t max_bin_len=1024*1024, - Py_ssize_t max_array_len=128*1024, - Py_ssize_t max_map_len=32*1024, - Py_ssize_t max_ext_len=1024*1024): + Py_ssize_t max_str_len=-1, + Py_ssize_t max_bin_len=-1, + Py_ssize_t max_array_len=-1, + Py_ssize_t max_map_len=-1, + Py_ssize_t max_ext_len=-1): cdef const char *cenc=NULL, cdef const char *cerr=NULL @@ -347,6 +361,18 @@ cdef class Unpacker(object): self.file_like_read = file_like.read if not PyCallable_Check(self.file_like_read): raise TypeError("`file_like.read` must be a callable.") + + if max_str_len == -1: + max_str_len = max_buffer_size or 1024*1024 + if max_bin_len == -1: + max_bin_len = max_buffer_size or 1024*1024 + if max_array_len == -1: + max_array_len = max_buffer_size or 128*1024 + if max_map_len == -1: + max_map_len = max_buffer_size//2 or 32*1024 + if max_ext_len == -1: + max_ext_len = max_buffer_size or 1024*1024 + if not max_buffer_size: max_buffer_size = INT_MAX if read_size > max_buffer_size: diff --git a/msgpack/fallback.py b/msgpack/fallback.py index 4567e2dd..7524448a 100644 --- a/msgpack/fallback.py +++ b/msgpack/fallback.py @@ -130,7 +130,7 @@ def unpackb(packed, **kwargs): See :class:`Unpacker` for options. """ - unpacker = Unpacker(None, **kwargs) + unpacker = Unpacker(None, max_buffer_size=len(packed), **kwargs) unpacker.feed(packed) try: ret = unpacker._unpack() @@ -208,19 +208,24 @@ class Unpacker(object): You should set this parameter when unpacking data from untrusted source. :param int max_str_len: - Limits max length of str. (default: 1024*1024) + (deprecated) Limits max length of str. + (default: max_buffer_size or 1024*1024) :param int max_bin_len: - Limits max length of bin. (default: 1024*1024) + (deprecated) Limits max length of bin. + (default: max_buffer_size or 1024*1024) :param int max_array_len: - Limits max length of array. (default: 128*1024) + Limits max length of array. + (default: max_buffer_size or 128*1024) :param int max_map_len: - Limits max length of map. (default: 32*1024) + Limits max length of map. + (default: max_buffer_size//2 or 32*1024) :param int max_ext_len: - Limits max size of ext type. (default: 1024*1024) + (deprecated) Limits max size of ext type. + (default: max_buffer_size or 1024*1024) example of streaming deserialize from file-like object:: @@ -250,12 +255,11 @@ def __init__(self, file_like=None, read_size=0, use_list=True, raw=True, strict_ object_hook=None, object_pairs_hook=None, list_hook=None, encoding=None, unicode_errors=None, max_buffer_size=0, ext_hook=ExtType, - max_str_len=1024*1024, - max_bin_len=1024*1024, - max_array_len=128*1024, - max_map_len=32*1024, - max_ext_len=1024*1024): - + max_str_len=-1, + max_bin_len=-1, + max_array_len=-1, + max_map_len=-1, + max_ext_len=-1): if encoding is not None: warnings.warn( "encoding is deprecated, Use raw=False instead.", @@ -286,6 +290,17 @@ def __init__(self, file_like=None, read_size=0, use_list=True, raw=True, strict_ # state, which _buf_checkpoint records. self._buf_checkpoint = 0 + if max_str_len == -1: + max_str_len = max_buffer_size or 1024*1024 + if max_bin_len == -1: + max_bin_len = max_buffer_size or 1024*1024 + if max_array_len == -1: + max_array_len = max_buffer_size or 128*1024 + if max_map_len == -1: + max_map_len = max_buffer_size//2 or 32*1024 + if max_ext_len == -1: + max_ext_len = max_buffer_size or 1024*1024 + self._max_buffer_size = max_buffer_size or 2**31-1 if read_size > self._max_buffer_size: raise ValueError("read_size must be smaller than max_buffer_size") diff --git a/test/test_limits.py b/test/test_limits.py index 74e48c19..8c7606fa 100644 --- a/test/test_limits.py +++ b/test/test_limits.py @@ -105,7 +105,6 @@ def test_max_ext_len(): unpacker.unpack() - # PyPy fails following tests because of constant folding? # https://bugs.pypy.org/issue1721 #@pytest.mark.skipif(True, reason="Requires very large memory.") @@ -134,3 +133,27 @@ def test_max_ext_len(): # x.append(0) # with pytest.raises(ValueError): # packb(x) + + +# auto max len + +def test_auto_max_array_len(): + packed = b'\xde\x00\x06zz' + with pytest.raises(UnpackValueError): + unpackb(packed, raw=False) + + unpacker = Unpacker(max_buffer_size=5, raw=False) + unpacker.feed(packed) + with pytest.raises(UnpackValueError): + unpacker.unpack() + +def test_auto_max_map_len(): + # len(packed) == 6 -> max_map_len == 3 + packed = b'\xde\x00\x04zzz' + with pytest.raises(UnpackValueError): + unpackb(packed, raw=False) + + unpacker = Unpacker(max_buffer_size=6, raw=False) + unpacker.feed(packed) + with pytest.raises(UnpackValueError): + unpacker.unpack() From 464fe277e1165a5870d4edc040be9c9ac1c1df0c Mon Sep 17 00:00:00 2001 From: Inada Naoki Date: Fri, 25 Jan 2019 20:52:57 +0900 Subject: [PATCH 174/349] Remove pytest warnings --- test/test_buffer.py | 5 ++++- test/test_extension.py | 10 ++++++++-- test/test_pack.py | 23 +++++++++++++++-------- 3 files changed, 27 insertions(+), 11 deletions(-) diff --git a/test/test_buffer.py b/test/test_buffer.py index 87f359f9..d723e8d6 100644 --- a/test/test_buffer.py +++ b/test/test_buffer.py @@ -7,7 +7,10 @@ def test_unpack_buffer(): from array import array buf = array('b') - buf.fromstring(packb((b'foo', b'bar'))) + try: + buf.frombytes(packb((b'foo', b'bar'))) + except AttributeError: # PY2 + buf.fromstring(packb((b'foo', b'bar'))) obj = unpackb(buf, use_list=1) assert [b'foo', b'bar'] == obj diff --git a/test/test_extension.py b/test/test_extension.py index d05d7ab9..8aa0cbb0 100644 --- a/test/test_extension.py +++ b/test/test_extension.py @@ -40,7 +40,10 @@ def default(obj): print('default called', obj) if isinstance(obj, array.array): typecode = 123 # application specific typecode - data = obj.tostring() + try: + data = obj.tobytes() + except AttributeError: + data = obj.tostring() return ExtType(typecode, data) raise TypeError("Unknown type object %r" % (obj,)) @@ -48,7 +51,10 @@ def ext_hook(code, data): print('ext_hook called', code, data) assert code == 123 obj = array.array('d') - obj.fromstring(data) + try: + obj.frombytes(data) + except AttributeError: # PY2 + obj.fromstring(data) return obj obj = [42, b'hello', array.array('d', [1.1, 2.2, 3.3])] diff --git a/test/test_pack.py b/test/test_pack.py index 46080832..3658a977 100644 --- a/test/test_pack.py +++ b/test/test_pack.py @@ -2,13 +2,15 @@ # coding: utf-8 from __future__ import absolute_import, division, print_function, unicode_literals +from collections import OrderedDict +from io import BytesIO import struct + +import pytest from pytest import raises, xfail from msgpack import packb, unpackb, Unpacker, Packer, pack -from collections import OrderedDict -from io import BytesIO def check(data, use_list=False): re = unpackb(packb(data), use_list=use_list) @@ -47,7 +49,8 @@ def testPackUTF32(): # deprecated "Русский текст", ] for td in test_data: - re = unpackb(packb(td, encoding='utf-32'), use_list=1, encoding='utf-32') + with pytest.deprecated_call(): + re = unpackb(packb(td, encoding='utf-32'), use_list=1, encoding='utf-32') assert re == td except LookupError as e: xfail(e) @@ -67,19 +70,23 @@ def testPackByteArrays(): check(td) def testIgnoreUnicodeErrors(): # deprecated - re = unpackb(packb(b'abc\xeddef'), encoding='utf-8', unicode_errors='ignore', use_list=1) + with pytest.deprecated_call(): + re = unpackb(packb(b'abc\xeddef'), encoding='utf-8', unicode_errors='ignore', use_list=1) assert re == "abcdef" def testStrictUnicodeUnpack(): - with raises(UnicodeDecodeError): - unpackb(packb(b'abc\xeddef'), raw=False, use_list=1) + packed = packb(b'abc\xeddef') + with pytest.raises(UnicodeDecodeError): + unpackb(packed, raw=False, use_list=1) def testStrictUnicodePack(): # deprecated with raises(UnicodeEncodeError): - packb("abc\xeddef", encoding='ascii', unicode_errors='strict') + with pytest.deprecated_call(): + packb("abc\xeddef", encoding='ascii', unicode_errors='strict') def testIgnoreErrorsPack(): # deprecated - re = unpackb(packb("abcФФФdef", encoding='ascii', unicode_errors='ignore'), raw=False, use_list=1) + with pytest.deprecated_call(): + re = unpackb(packb("abcФФФdef", encoding='ascii', unicode_errors='ignore'), raw=False, use_list=1) assert re == "abcdef" def testDecodeBinary(): From 9951b894555e4f9c7120375028e686f7420de92a Mon Sep 17 00:00:00 2001 From: Inada Naoki Date: Fri, 25 Jan 2019 21:04:14 +0900 Subject: [PATCH 175/349] travis: Install new pytest --- .travis.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.travis.yml b/.travis.yml index 1adbdc25..51917c50 100644 --- a/.travis.yml +++ b/.travis.yml @@ -39,7 +39,7 @@ matrix: install: - pip install -U pip - - pip install cython + - pip install -U cython pytest - make cython - pip install -e . From 280308e8ced50322414fd4f7426d56093a57dbf1 Mon Sep 17 00:00:00 2001 From: Inada Naoki Date: Fri, 25 Jan 2019 21:27:46 +0900 Subject: [PATCH 176/349] Recommend max_buffer_len instead of max_(str|bin|ext)_len --- ChangeLog.rst | 18 ++++++++++++++++++ msgpack/_unpacker.pyx | 9 ++++++--- msgpack/fallback.py | 20 ++++++++++---------- 3 files changed, 34 insertions(+), 13 deletions(-) diff --git a/ChangeLog.rst b/ChangeLog.rst index 651ba62d..2c988db1 100644 --- a/ChangeLog.rst +++ b/ChangeLog.rst @@ -1,3 +1,21 @@ +0.6.1 +====== + +Release Date: 2019-01-25 + +This release is for mitigating pain caused by v0.6.1 reduced max input limits +for security reason. + +* ``unpackb(data)`` configures ``max_*_len`` options from ``len(data)``, + instead of static default sizes. + +* ``Unpacker(max_buffer_len=N)`` configures ``max_*_len`` options from ``N``, + instead of static default sizes. + +* ``max_bin_len``, ``max_str_len``, and ``max_ext_len`` are deprecated. + Since this is minor release, it's document only deprecation. + + 0.6.0 ====== diff --git a/msgpack/_unpacker.pyx b/msgpack/_unpacker.pyx index 38119c05..3c6d59e7 100644 --- a/msgpack/_unpacker.pyx +++ b/msgpack/_unpacker.pyx @@ -273,9 +273,11 @@ cdef class Unpacker(object): You should set this parameter when unpacking data from untrusted source. :param int max_str_len: + Deprecated, use *max_buffer_size* instead. Limits max length of str. (default: max_buffer_size or 1024*1024) :param int max_bin_len: + Deprecated, use *max_buffer_size* instead. Limits max length of bin. (default: max_buffer_size or 1024*1024) :param int max_array_len: @@ -285,10 +287,11 @@ cdef class Unpacker(object): Limits max length of map. (default: max_buffer_size//2 or 32*1024) :param int max_ext_len: + Deprecated, use *max_buffer_size* instead. Limits max size of ext type. (default: max_buffer_size or 1024*1024) :param str encoding: - Deprecated, use raw instead. + Deprecated, use ``raw=False`` instead. Encoding used for decoding msgpack raw. If it is None (default), msgpack raw is deserialized to Python bytes. @@ -298,13 +301,13 @@ cdef class Unpacker(object): Example of streaming deserialize from file-like object:: - unpacker = Unpacker(file_like, raw=False) + unpacker = Unpacker(file_like, raw=False, max_buffer_size=10*1024*1024) for o in unpacker: process(o) Example of streaming deserialize from socket:: - unpacker = Unpacker(raw=False) + unpacker = Unpacker(raw=False, max_buffer_size=10*1024*1024) while True: buf = sock.recv(1024**2) if not buf: diff --git a/msgpack/fallback.py b/msgpack/fallback.py index 7524448a..1aa3bdf6 100644 --- a/msgpack/fallback.py +++ b/msgpack/fallback.py @@ -208,12 +208,12 @@ class Unpacker(object): You should set this parameter when unpacking data from untrusted source. :param int max_str_len: - (deprecated) Limits max length of str. - (default: max_buffer_size or 1024*1024) + Deprecated, use *max_buffer_size* instead. + Limits max length of str. (default: max_buffer_size or 1024*1024) :param int max_bin_len: - (deprecated) Limits max length of bin. - (default: max_buffer_size or 1024*1024) + Deprecated, use *max_buffer_size* instead. + Limits max length of bin. (default: max_buffer_size or 1024*1024) :param int max_array_len: Limits max length of array. @@ -224,18 +224,18 @@ class Unpacker(object): (default: max_buffer_size//2 or 32*1024) :param int max_ext_len: - (deprecated) Limits max size of ext type. - (default: max_buffer_size or 1024*1024) + Deprecated, use *max_buffer_size* instead. + Limits max size of ext type. (default: max_buffer_size or 1024*1024) - example of streaming deserialize from file-like object:: + Example of streaming deserialize from file-like object:: - unpacker = Unpacker(file_like, raw=False) + unpacker = Unpacker(file_like, raw=False, max_buffer_size=10*1024*1024) for o in unpacker: process(o) - example of streaming deserialize from socket:: + Example of streaming deserialize from socket:: - unpacker = Unpacker(raw=False) + unpacker = Unpacker(raw=False, max_buffer_size=10*1024*1024) while True: buf = sock.recv(1024**2) if not buf: From 8f513af999d4abd39d632fcc8732225a658268ee Mon Sep 17 00:00:00 2001 From: Inada Naoki Date: Fri, 25 Jan 2019 21:43:28 +0900 Subject: [PATCH 177/349] v0.6.1 --- msgpack/_version.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/msgpack/_version.py b/msgpack/_version.py index 49a41034..926c5e7b 100644 --- a/msgpack/_version.py +++ b/msgpack/_version.py @@ -1 +1 @@ -version = (0, 6, 0) +version = (0, 6, 1) From 381c2eff5f8ee0b8669fd6daf1fd1ecaffe7c931 Mon Sep 17 00:00:00 2001 From: INADA Naoki Date: Mon, 4 Feb 2019 12:08:07 +0900 Subject: [PATCH 178/349] Fix changelog. Fixes #343 --- ChangeLog.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ChangeLog.rst b/ChangeLog.rst index 2c988db1..727ca9ab 100644 --- a/ChangeLog.rst +++ b/ChangeLog.rst @@ -3,7 +3,7 @@ Release Date: 2019-01-25 -This release is for mitigating pain caused by v0.6.1 reduced max input limits +This release is for mitigating pain caused by v0.6.0 reduced max input limits for security reason. * ``unpackb(data)`` configures ``max_*_len`` options from ``len(data)``, From 737f08a885dcff32aa1a417a45936d7f7810ee37 Mon Sep 17 00:00:00 2001 From: Hugues Date: Wed, 27 Mar 2019 06:37:26 -0700 Subject: [PATCH 179/349] Update requirements.txt (#346) bytearray.pxd is only available starting with Cython 0.29 --- requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requirements.txt b/requirements.txt index cd54e6df..e08dd4f0 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1 +1 @@ -Cython==0.27.3 +Cython~=0.29.5 From 05ff11dbcc8181cc781b121e46e76a01258a32af Mon Sep 17 00:00:00 2001 From: Felix Schwarz Date: Sun, 12 May 2019 14:44:32 +0200 Subject: [PATCH 180/349] use relative imports (#357) Some applications use msgpack to store persistent data and require a specific msgpack version (e.g. borgbackup). Bundling helps in case there is an (incompatible) version of msgpack in a system-wide install. --- msgpack/__init__.py | 10 +++++----- msgpack/_packer.pyx | 2 +- msgpack/_unpacker.pyx | 4 ++-- msgpack/fallback.py | 4 ++-- 4 files changed, 10 insertions(+), 10 deletions(-) diff --git a/msgpack/__init__.py b/msgpack/__init__.py index 7493c4c6..4ad9c1a5 100644 --- a/msgpack/__init__.py +++ b/msgpack/__init__.py @@ -1,6 +1,6 @@ # coding: utf-8 -from msgpack._version import version -from msgpack.exceptions import * +from ._version import version +from .exceptions import * from collections import namedtuple @@ -19,12 +19,12 @@ def __new__(cls, code, data): import os if os.environ.get('MSGPACK_PUREPYTHON'): - from msgpack.fallback import Packer, unpackb, Unpacker + from .fallback import Packer, unpackb, Unpacker else: try: - from msgpack._cmsgpack import Packer, unpackb, Unpacker + from ._cmsgpack import Packer, unpackb, Unpacker except ImportError: - from msgpack.fallback import Packer, unpackb, Unpacker + from .fallback import Packer, unpackb, Unpacker def pack(o, stream, **kwargs): diff --git a/msgpack/_packer.pyx b/msgpack/_packer.pyx index bfde043e..dcee2138 100644 --- a/msgpack/_packer.pyx +++ b/msgpack/_packer.pyx @@ -3,7 +3,7 @@ from cpython cimport * from cpython.bytearray cimport PyByteArray_Check, PyByteArray_CheckExact -from msgpack import ExtType +from . import ExtType cdef extern from "Python.h": diff --git a/msgpack/_unpacker.pyx b/msgpack/_unpacker.pyx index 3c6d59e7..3727f50c 100644 --- a/msgpack/_unpacker.pyx +++ b/msgpack/_unpacker.pyx @@ -12,14 +12,14 @@ from libc.string cimport * from libc.limits cimport * ctypedef unsigned long long uint64_t -from msgpack.exceptions import ( +from .exceptions import ( BufferFull, OutOfData, ExtraData, FormatError, StackError, ) -from msgpack import ExtType +from . import ExtType cdef extern from "unpack.h": diff --git a/msgpack/fallback.py b/msgpack/fallback.py index 1aa3bdf6..3836e830 100644 --- a/msgpack/fallback.py +++ b/msgpack/fallback.py @@ -59,7 +59,7 @@ def getvalue(self): newlist_hint = lambda size: [] -from msgpack.exceptions import ( +from .exceptions import ( BufferFull, OutOfData, ExtraData, @@ -67,7 +67,7 @@ def getvalue(self): StackError, ) -from msgpack import ExtType +from . import ExtType EX_SKIP = 0 From b98b8cab99d7b2dbfe2b2211974564b7e22e9412 Mon Sep 17 00:00:00 2001 From: Marty B Date: Wed, 18 Sep 2019 18:15:09 +0200 Subject: [PATCH 181/349] Avoid calling __Pyx_GetModuleGlobalName for ExtType (#363) --- msgpack/_packer.pyx | 2 ++ 1 file changed, 2 insertions(+) diff --git a/msgpack/_packer.pyx b/msgpack/_packer.pyx index dcee2138..e275ef20 100644 --- a/msgpack/_packer.pyx +++ b/msgpack/_packer.pyx @@ -3,6 +3,8 @@ from cpython cimport * from cpython.bytearray cimport PyByteArray_Check, PyByteArray_CheckExact +cdef ExtType + from . import ExtType From 3146ebd330cbd02d0d7b4f82a94472cb395804ef Mon Sep 17 00:00:00 2001 From: Inada Naoki Date: Thu, 19 Sep 2019 13:20:57 +0900 Subject: [PATCH 182/349] Use Py_SIZE() when it is safe (#369) --- msgpack/_packer.pyx | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/msgpack/_packer.pyx b/msgpack/_packer.pyx index e275ef20..2f4d1207 100644 --- a/msgpack/_packer.pyx +++ b/msgpack/_packer.pyx @@ -200,7 +200,7 @@ cdef class Packer(object): dval = o ret = msgpack_pack_double(&self.pk, dval) elif PyBytesLike_CheckExact(o) if strict_types else PyBytesLike_Check(o): - L = len(o) + L = Py_SIZE(o) if L > ITEM_LIMIT: PyErr_Format(ValueError, b"%.200s object is too large", Py_TYPE(o).tp_name) rawval = o @@ -214,7 +214,7 @@ cdef class Packer(object): raise ValueError("unicode string is too large") else: o = PyUnicode_AsEncodedString(o, self.encoding, self.unicode_errors) - L = len(o) + L = Py_SIZE(o) if L > ITEM_LIMIT: raise ValueError("unicode string is too large") ret = msgpack_pack_raw(&self.pk, L) @@ -254,7 +254,7 @@ cdef class Packer(object): ret = msgpack_pack_ext(&self.pk, longval, L) ret = msgpack_pack_raw_body(&self.pk, rawval, L) elif PyList_CheckExact(o) if strict_types else (PyTuple_Check(o) or PyList_Check(o)): - L = len(o) + L = Py_SIZE(o) if L > ITEM_LIMIT: raise ValueError("list is too large") ret = msgpack_pack_array(&self.pk, L) From c25e2a0984ec5d092fee38eeb4b74676ada9aef4 Mon Sep 17 00:00:00 2001 From: Inada Naoki Date: Thu, 19 Sep 2019 20:14:33 +0900 Subject: [PATCH 183/349] update Cython to 0.29.13 (#370) --- .travis.yml | 5 +++-- requirements.txt | 2 +- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/.travis.yml b/.travis.yml index 51917c50..43c52597 100644 --- a/.travis.yml +++ b/.travis.yml @@ -20,7 +20,7 @@ matrix: - DOCKER_IMAGE=quay.io/pypa/manylinux1_i686 install: - pip install -U pip - - pip install cython + - pip install -r requirements.txt - make cython - docker pull $DOCKER_IMAGE script: @@ -39,7 +39,8 @@ matrix: install: - pip install -U pip - - pip install -U cython pytest + - pip install -U pytest + - pip install -r requirements.txt - make cython - pip install -e . diff --git a/requirements.txt b/requirements.txt index e08dd4f0..a2cce258 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1 +1 @@ -Cython~=0.29.5 +Cython~=0.29.13 From fd3f0048633423651772526875611f125dda68f6 Mon Sep 17 00:00:00 2001 From: Inada Naoki Date: Thu, 19 Sep 2019 20:37:19 +0900 Subject: [PATCH 184/349] Add Python 3.8 to travis (#371) --- .travis.yml | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/.travis.yml b/.travis.yml index 43c52597..c80bb379 100644 --- a/.travis.yml +++ b/.travis.yml @@ -3,16 +3,20 @@ language: python cache: pip python: + # Available Python (PyPy) can be listed by: + # + # $ aws s3 ls s3://travis-python-archives/binaries/ubuntu/16.04/x86_64/ - "2.7" - "3.4" - "3.5" - "3.6" - "3.7" - - "nightly" + - "3.8-dev" matrix: include: - - sudo: required + - name: 32bit build + sudo: required language: python services: - docker @@ -25,12 +29,14 @@ matrix: - docker pull $DOCKER_IMAGE script: - docker run --rm -v `pwd`:/io -w /io $DOCKER_IMAGE /io/docker/runtests.sh - - python: "pypy2.7-5.10.0" + - name: "pypy2.7" + python: "pypy2.7-7.1.1" install: - pip install -e . script: - py.test -v test - - python: "pypy3.5" + - name: "pypy3" + python: "pypy3.6-7.1.1" install: - pip install -e . script: From 144f276e885be867c1545226a60c99957dac04e0 Mon Sep 17 00:00:00 2001 From: Inada Naoki Date: Fri, 20 Sep 2019 16:36:37 +0900 Subject: [PATCH 185/349] Update ChangeLog --- ChangeLog.rst | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/ChangeLog.rst b/ChangeLog.rst index 727ca9ab..a295e6b3 100644 --- a/ChangeLog.rst +++ b/ChangeLog.rst @@ -1,3 +1,13 @@ +0.6.2 +===== + +Release Date: 2019-09-20 + +* Support Python 3.8. +* Update Cython to 0.29.13 for support Python 3.8. +* Some small optimizations. + + 0.6.1 ====== From 997b524f06176aaa6bd255a046a8746e99b4f87d Mon Sep 17 00:00:00 2001 From: Inada Naoki Date: Fri, 20 Sep 2019 16:37:08 +0900 Subject: [PATCH 186/349] 0.6.2 --- msgpack/_version.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/msgpack/_version.py b/msgpack/_version.py index 926c5e7b..1e73a00f 100644 --- a/msgpack/_version.py +++ b/msgpack/_version.py @@ -1 +1 @@ -version = (0, 6, 1) +version = (0, 6, 2) From b458e9a6a2cf69e483fa5994d227382c6a01f3c4 Mon Sep 17 00:00:00 2001 From: Terence Honles Date: Fri, 22 Nov 2019 19:58:55 -0800 Subject: [PATCH 187/349] update for Python 3.8 (#374) --- appveyor.yml | 4 ++++ docker/buildwheel.sh | 5 ++++- docker/runtests.sh | 5 ++++- docker/shared.env | 8 ++++++++ setup.py | 1 + tox.ini | 2 +- 6 files changed, 22 insertions(+), 3 deletions(-) create mode 100644 docker/shared.env diff --git a/appveyor.yml b/appveyor.yml index bd0800ae..f338e177 100644 --- a/appveyor.yml +++ b/appveyor.yml @@ -29,6 +29,10 @@ test_script: - ci\\runtests.bat - set PYTHON="C:\\Python37-x64" - ci\\runtests.bat + - set PYTHON="C:\\Python38" + - ci\\runtests.bat + - set PYTHON="C:\\Python38-x64" + - ci\\runtests.bat after_test: # This step builds your wheels. diff --git a/docker/buildwheel.sh b/docker/buildwheel.sh index d8c74cb8..c953127d 100644 --- a/docker/buildwheel.sh +++ b/docker/buildwheel.sh @@ -1,10 +1,13 @@ #!/bin/bash +DOCKER_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )" +source "$DOCKER_DIR/shared.env" + set -e -x ARCH=`uname -p` echo "arch=$ARCH" -for V in cp37-cp37m cp36-cp36m cp35-cp35m cp27-cp27m cp27-cp27mu; do +for V in "${PYTHON_VERSIONS[@]}"; do PYBIN=/opt/python/$V/bin rm -rf build/ # Avoid lib build by narrow Python is used by wide python $PYBIN/python setup.py bdist_wheel -p manylinux1_${ARCH} diff --git a/docker/runtests.sh b/docker/runtests.sh index c6bbf60f..fa7e979b 100755 --- a/docker/runtests.sh +++ b/docker/runtests.sh @@ -1,7 +1,10 @@ #!/bin/bash +DOCKER_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )" +source "$DOCKER_DIR/shared.env" + set -e -x -for V in cp36-cp36m cp35-cp35m cp27-cp27m cp27-cp27mu; do +for V in "${PYTHON_VERSIONS[@]}"; do PYBIN=/opt/python/$V/bin $PYBIN/python setup.py install rm -rf build/ # Avoid lib build by narrow Python is used by wide python diff --git a/docker/shared.env b/docker/shared.env new file mode 100644 index 00000000..b5772e32 --- /dev/null +++ b/docker/shared.env @@ -0,0 +1,8 @@ +PYTHON_VERSIONS=( + cp38-cp38 + cp37-cp37m + cp36-cp36m + cp35-cp35m + cp27-cp27m + cp27-cp27mu +) diff --git a/setup.py b/setup.py index eb9403f1..8b8d7a09 100755 --- a/setup.py +++ b/setup.py @@ -128,6 +128,7 @@ def __init__(self, *args, **kwargs): 'Programming Language :: Python :: 3.5', 'Programming Language :: Python :: 3.6', 'Programming Language :: Python :: 3.7', + 'Programming Language :: Python :: 3.8', 'Programming Language :: Python :: Implementation :: CPython', 'Programming Language :: Python :: Implementation :: PyPy', 'Intended Audience :: Developers', diff --git a/tox.ini b/tox.ini index 0945a6d6..4b059ffe 100644 --- a/tox.ini +++ b/tox.ini @@ -1,5 +1,5 @@ [tox] -envlist = {py27,py35,py36,py37}-{c,pure},{pypy,pypy3}-pure,py27-x86,py34-x86 +envlist = {py27,py35,py36,py37,py38}-{c,pure},{pypy,pypy3}-pure,py27-x86,py34-x86 [variants:pure] setenv= From 891f2d8743857bb75204f96b0469cb2ec90c7f79 Mon Sep 17 00:00:00 2001 From: Inada Naoki Date: Thu, 28 Nov 2019 20:23:34 +0900 Subject: [PATCH 188/349] Drop Python 2 support from _cmsgpack (#376) --- .travis.yml | 10 ++++++++-- README.rst | 20 +++++++++++--------- docker/shared.env | 2 -- msgpack/__init__.py | 5 +++-- msgpack/_packer.pyx | 5 +---- msgpack/buff_converter.h | 20 -------------------- msgpack/fallback.py | 5 ++--- msgpack/unpack.h | 4 ---- setup.py | 20 +++++++++++--------- 9 files changed, 36 insertions(+), 55 deletions(-) diff --git a/.travis.yml b/.travis.yml index c80bb379..7b298af4 100644 --- a/.travis.yml +++ b/.travis.yml @@ -6,7 +6,6 @@ python: # Available Python (PyPy) can be listed by: # # $ aws s3 ls s3://travis-python-archives/binaries/ubuntu/16.04/x86_64/ - - "2.7" - "3.4" - "3.5" - "3.6" @@ -41,7 +40,14 @@ matrix: - pip install -e . script: - pytest -v test - + - name: "Python 2 (fallback)" + python: "2.7" + install: + - pip install -U pip + - pip install -U pytest + - pip install . + script: + - pytest -v test install: - pip install -U pip diff --git a/README.rst b/README.rst index 94a4bb21..82b6c02a 100644 --- a/README.rst +++ b/README.rst @@ -76,10 +76,18 @@ Install $ pip install msgpack -PyPy -^^^^ +Pure Python implementation +^^^^^^^^^^^^^^^^^^^^^^^^^^ + +The extension module in msgpack (``msgpack._cmsgpack``) does not support +Python 2 and PyPy. + +But msgpack provides a pure Python implementation (``msgpack.fallback``) +for PyPy and Python 2. + +Since the [pip](https://pip.pypa.io/) uses the pure Python implementation, +Python 2 support will not be dropped in foreseeable feature. -msgpack provides a pure Python implementation. PyPy can use this. Windows ^^^^^^^ @@ -88,12 +96,6 @@ When you can't use a binary distribution, you need to install Visual Studio or Windows SDK on Windows. Without extension, using pure Python implementation on CPython runs slowly. -For Python 2.7, `Microsoft Visual C++ Compiler for Python 2.7 `_ -is recommended solution. - -For Python 3.5, `Microsoft Visual Studio 2015 `_ -Community Edition or Express Edition can be used to build extension module. - How to use ---------- diff --git a/docker/shared.env b/docker/shared.env index b5772e32..17abdd8f 100644 --- a/docker/shared.env +++ b/docker/shared.env @@ -3,6 +3,4 @@ PYTHON_VERSIONS=( cp37-cp37m cp36-cp36m cp35-cp35m - cp27-cp27m - cp27-cp27mu ) diff --git a/msgpack/__init__.py b/msgpack/__init__.py index 4ad9c1a5..4112a164 100644 --- a/msgpack/__init__.py +++ b/msgpack/__init__.py @@ -2,6 +2,8 @@ from ._version import version from .exceptions import * +import os +import sys from collections import namedtuple @@ -17,8 +19,7 @@ def __new__(cls, code, data): return super(ExtType, cls).__new__(cls, code, data) -import os -if os.environ.get('MSGPACK_PUREPYTHON'): +if os.environ.get('MSGPACK_PUREPYTHON') or sys.version_info[0] == 2: from .fallback import Packer, unpackb, Unpacker else: try: diff --git a/msgpack/_packer.pyx b/msgpack/_packer.pyx index 2f4d1207..e6209145 100644 --- a/msgpack/_packer.pyx +++ b/msgpack/_packer.pyx @@ -130,10 +130,7 @@ cdef class Packer(object): self._bencoding = encoding if encoding is None: - if PY_MAJOR_VERSION < 3: - self.encoding = 'utf-8' - else: - self.encoding = NULL + self.encoding = 'utf-8' else: self.encoding = self._bencoding diff --git a/msgpack/buff_converter.h b/msgpack/buff_converter.h index bc7227ae..86b4196d 100644 --- a/msgpack/buff_converter.h +++ b/msgpack/buff_converter.h @@ -1,28 +1,8 @@ #include "Python.h" /* cython does not support this preprocessor check => write it in raw C */ -#if PY_MAJOR_VERSION == 2 -static PyObject * -buff_to_buff(char *buff, Py_ssize_t size) -{ - return PyBuffer_FromMemory(buff, size); -} - -#elif (PY_MAJOR_VERSION == 3) && (PY_MINOR_VERSION >= 3) static PyObject * buff_to_buff(char *buff, Py_ssize_t size) { return PyMemoryView_FromMemory(buff, size, PyBUF_READ); } -#else -static PyObject * -buff_to_buff(char *buff, Py_ssize_t size) -{ - Py_buffer pybuf; - if (PyBuffer_FillInfo(&pybuf, NULL, buff, size, 1, PyBUF_FULL_RO) == -1) { - return NULL; - } - - return PyMemoryView_FromBuffer(&pybuf); -} -#endif diff --git a/msgpack/fallback.py b/msgpack/fallback.py index 3836e830..1ed6e773 100644 --- a/msgpack/fallback.py +++ b/msgpack/fallback.py @@ -5,13 +5,12 @@ import warnings -if sys.version_info[0] == 2: - PY2 = True +PY2 = sys.version_info[0] == 2 +if PY2: int_types = (int, long) def dict_iteritems(d): return d.iteritems() else: - PY2 = False int_types = int unicode = str xrange = range diff --git a/msgpack/unpack.h b/msgpack/unpack.h index 85dbbed5..bbce91c6 100644 --- a/msgpack/unpack.h +++ b/msgpack/unpack.h @@ -273,11 +273,7 @@ static inline int unpack_callback_ext(unpack_user* u, const char* base, const ch return -1; } // length also includes the typecode, so the actual data is length-1 -#if PY_MAJOR_VERSION == 2 - py = PyObject_CallFunction(u->ext_hook, "(is#)", (int)typecode, pos, (Py_ssize_t)length-1); -#else py = PyObject_CallFunction(u->ext_hook, "(iy#)", (int)typecode, pos, (Py_ssize_t)length-1); -#endif if (!py) return -1; *o = py; diff --git a/setup.py b/setup.py index 8b8d7a09..77b81c63 100755 --- a/setup.py +++ b/setup.py @@ -9,6 +9,11 @@ from distutils.command.build_ext import build_ext + +PYPY = hasattr(sys, "pypy_version_info") +PY2 = sys.version_info[0] == 2 + + # for building transitional package. TRANSITIONAL = False @@ -64,14 +69,11 @@ def build_extension(self, ext): if len(version) > 3 and version[3] != 'final': version_str += version[3] -# take care of extension modules. -if have_cython: - class Sdist(sdist): - def __init__(self, *args, **kwargs): - cythonize('msgpack/_cmsgpack.pyx') - sdist.__init__(self, *args, **kwargs) -else: - Sdist = sdist +# Cython is required for sdist +class Sdist(sdist): + def __init__(self, *args, **kwargs): + cythonize('msgpack/_cmsgpack.pyx') + sdist.__init__(self, *args, **kwargs) libraries = [] if sys.platform == 'win32': @@ -83,7 +85,7 @@ def __init__(self, *args, **kwargs): macros = [('__LITTLE_ENDIAN__', '1')] ext_modules = [] -if not hasattr(sys, 'pypy_version_info'): +if not PYPY and not PY2: ext_modules.append(Extension('msgpack._cmsgpack', sources=['msgpack/_cmsgpack.cpp'], libraries=libraries, From cc3a8665d6210e933bcfb9120bd0ceb15224f03e Mon Sep 17 00:00:00 2001 From: Inada Naoki Date: Tue, 3 Dec 2019 17:46:28 +0900 Subject: [PATCH 189/349] Use Github Actions for Windows (#377) --- .github/workflows/windows.yaml | 70 ++++++++++++++++++++++++++++++++++ ci/runtests.bat | 2 +- ci/runtests.sh | 8 ++++ 3 files changed, 79 insertions(+), 1 deletion(-) create mode 100644 .github/workflows/windows.yaml create mode 100644 ci/runtests.sh diff --git a/.github/workflows/windows.yaml b/.github/workflows/windows.yaml new file mode 100644 index 00000000..cecb8258 --- /dev/null +++ b/.github/workflows/windows.yaml @@ -0,0 +1,70 @@ +name: Build and test windows wheels +on: + push: + branches: + - master + - test + pull_request: + create: + +jobs: + build: + runs-on: windows-latest + steps: + - name: Checkout + uses: actions/checkout@v1 + + - name: Cythonize + shell: bash + run: | + pip install -U Cython + make cython + #python setup.py sdist + + - name: Python 3.6 (amd64) + env: + PYTHON: "py -3.6-64" + shell: bash + run: | + ci/runtests.sh + + - name: Python 3.6 (x86) + env: + PYTHON: "py -3.6-32" + shell: bash + run: | + ci/runtests.sh + + - name: Python 3.7 (amd64) + env: + PYTHON: "py -3.7-64" + shell: bash + run: | + ci/runtests.sh + + - name: Python 3.7 (x86) + env: + PYTHON: "py -3.7-32" + shell: bash + run: | + ci/runtests.sh + + - name: Python 3.8 (amd64) + env: + PYTHON: "py -3.8-64" + shell: bash + run: | + ci/runtests.sh + + - name: Python 3.8 (x86) + env: + PYTHON: "py -3.8-32" + shell: bash + run: | + ci/runtests.sh + + - name: Upload Wheels + uses: actions/upload-artifact@v1 + with: + name: win-wheels + path: ./dist diff --git a/ci/runtests.bat b/ci/runtests.bat index 02404679..4ae2f708 100644 --- a/ci/runtests.bat +++ b/ci/runtests.bat @@ -2,7 +2,7 @@ %PYTHON%\python.exe setup.py build_ext -i %PYTHON%\python.exe setup.py install %PYTHON%\python.exe -c "import sys; print(hex(sys.maxsize))" -%PYTHON%\python.exe -c "from msgpack import _packer, _unpacker" +%PYTHON%\python.exe -c "from msgpack import _cmsgpack" %PYTHON%\python.exe setup.py bdist_wheel %PYTHON%\python.exe -m pytest -v test SET EL=%ERRORLEVEL% diff --git a/ci/runtests.sh b/ci/runtests.sh new file mode 100644 index 00000000..5d87f696 --- /dev/null +++ b/ci/runtests.sh @@ -0,0 +1,8 @@ +#!/bin/bash +set -ex +${PYTHON} -VV +${PYTHON} -m pip install setuptools wheel pytest +${PYTHON} setup.py build_ext -if +${PYTHON} -c "from msgpack import _cmsgpack" +${PYTHON} setup.py bdist_wheel +${PYTHON} -m pytest -v test From e1ed0044bf31dc0d6ef951f6298de4f420170968 Mon Sep 17 00:00:00 2001 From: Inada Naoki Date: Tue, 3 Dec 2019 18:54:01 +0900 Subject: [PATCH 190/349] Remove encoding/unicode_errors options from Packer (#378) --- msgpack/_packer.pyx | 46 +++++++-------------------------------------- msgpack/fallback.py | 26 ++----------------------- test/test_pack.py | 30 ----------------------------- 3 files changed, 9 insertions(+), 93 deletions(-) diff --git a/msgpack/_packer.pyx b/msgpack/_packer.pyx index e6209145..2e698e16 100644 --- a/msgpack/_packer.pyx +++ b/msgpack/_packer.pyx @@ -89,19 +89,9 @@ cdef class Packer(object): Additionally tuples will not be serialized as lists. This is useful when trying to implement accurate serialization for python types. - - :param str unicode_errors: - Error handler for encoding unicode. (default: 'strict') - - :param str encoding: - (deprecated) Convert unicode to bytes with this encoding. (default: 'utf-8') """ cdef msgpack_packer pk cdef object _default - cdef object _bencoding - cdef object _berrors - cdef const char *encoding - cdef const char *unicode_errors cdef bint strict_types cdef bool use_float cdef bint autoreset @@ -114,11 +104,11 @@ cdef class Packer(object): self.pk.buf_size = buf_size self.pk.length = 0 - def __init__(self, default=None, encoding=None, unicode_errors=None, - bint use_single_float=False, bint autoreset=True, bint use_bin_type=False, + def __init__(self, default=None, + bint use_single_float=False, + bint autoreset=True, + bint use_bin_type=False, bint strict_types=False): - if encoding is not None: - PyErr_WarnEx(DeprecationWarning, "encoding is deprecated.", 1) self.use_float = use_single_float self.strict_types = strict_types self.autoreset = autoreset @@ -128,18 +118,6 @@ cdef class Packer(object): raise TypeError("default must be a callable.") self._default = default - self._bencoding = encoding - if encoding is None: - self.encoding = 'utf-8' - else: - self.encoding = self._bencoding - - self._berrors = unicode_errors - if unicode_errors is None: - self.unicode_errors = NULL - else: - self.unicode_errors = self._berrors - def __dealloc__(self): PyMem_Free(self.pk.buf) self.pk.buf = NULL @@ -205,19 +183,9 @@ cdef class Packer(object): if ret == 0: ret = msgpack_pack_raw_body(&self.pk, rawval, L) elif PyUnicode_CheckExact(o) if strict_types else PyUnicode_Check(o): - if self.encoding == NULL and self.unicode_errors == NULL: - ret = msgpack_pack_unicode(&self.pk, o, ITEM_LIMIT); - if ret == -2: - raise ValueError("unicode string is too large") - else: - o = PyUnicode_AsEncodedString(o, self.encoding, self.unicode_errors) - L = Py_SIZE(o) - if L > ITEM_LIMIT: - raise ValueError("unicode string is too large") - ret = msgpack_pack_raw(&self.pk, L) - if ret == 0: - rawval = o - ret = msgpack_pack_raw_body(&self.pk, rawval, L) + ret = msgpack_pack_unicode(&self.pk, o, ITEM_LIMIT); + if ret == -2: + raise ValueError("unicode string is too large") elif PyDict_CheckExact(o): d = o L = len(d) diff --git a/msgpack/fallback.py b/msgpack/fallback.py index 1ed6e773..5dab9065 100644 --- a/msgpack/fallback.py +++ b/msgpack/fallback.py @@ -752,32 +752,14 @@ class Packer(object): Additionally tuples will not be serialized as lists. This is useful when trying to implement accurate serialization for python types. - - :param str encoding: - (deprecated) Convert unicode to bytes with this encoding. (default: 'utf-8') - - :param str unicode_errors: - Error handler for encoding unicode. (default: 'strict') """ - def __init__(self, default=None, encoding=None, unicode_errors=None, + def __init__(self, default=None, use_single_float=False, autoreset=True, use_bin_type=False, strict_types=False): - if encoding is None: - encoding = 'utf_8' - else: - warnings.warn( - "encoding is deprecated, Use raw=False instead.", - DeprecationWarning, stacklevel=2) - - if unicode_errors is None: - unicode_errors = 'strict' - self._strict_types = strict_types self._use_float = use_single_float self._autoreset = autoreset self._use_bin_type = use_bin_type - self._encoding = encoding - self._unicode_errors = unicode_errors self._buffer = StringIO() if default is not None: if not callable(default): @@ -834,11 +816,7 @@ def _pack(self, obj, nest_limit=DEFAULT_RECURSE_LIMIT, self._pack_bin_header(n) return self._buffer.write(obj) if check(obj, unicode): - if self._encoding is None: - raise TypeError( - "Can't encode unicode string: " - "no encoding is specified") - obj = obj.encode(self._encoding, self._unicode_errors) + obj = obj.encode("utf-8") n = len(obj) if n >= 2**32: raise ValueError("String is too large") diff --git a/test/test_pack.py b/test/test_pack.py index 3658a977..194b2c92 100644 --- a/test/test_pack.py +++ b/test/test_pack.py @@ -40,21 +40,6 @@ def testPackUnicode(): re = Unpacker(BytesIO(data), raw=False, use_list=1).unpack() assert re == td -def testPackUTF32(): # deprecated - try: - test_data = [ - "", - "abcd", - ["defgh"], - "Русский текст", - ] - for td in test_data: - with pytest.deprecated_call(): - re = unpackb(packb(td, encoding='utf-32'), use_list=1, encoding='utf-32') - assert re == td - except LookupError as e: - xfail(e) - def testPackBytes(): test_data = [ b"", b"abcd", (b"defgh",), @@ -69,26 +54,11 @@ def testPackByteArrays(): for td in test_data: check(td) -def testIgnoreUnicodeErrors(): # deprecated - with pytest.deprecated_call(): - re = unpackb(packb(b'abc\xeddef'), encoding='utf-8', unicode_errors='ignore', use_list=1) - assert re == "abcdef" - def testStrictUnicodeUnpack(): packed = packb(b'abc\xeddef') with pytest.raises(UnicodeDecodeError): unpackb(packed, raw=False, use_list=1) -def testStrictUnicodePack(): # deprecated - with raises(UnicodeEncodeError): - with pytest.deprecated_call(): - packb("abc\xeddef", encoding='ascii', unicode_errors='strict') - -def testIgnoreErrorsPack(): # deprecated - with pytest.deprecated_call(): - re = unpackb(packb("abcФФФdef", encoding='ascii', unicode_errors='ignore'), raw=False, use_list=1) - assert re == "abcdef" - def testDecodeBinary(): re = unpackb(packb(b"abc"), encoding=None, use_list=1) assert re == b"abc" From a0480c760256b4afc18beaebd5e3c79de1d4ce56 Mon Sep 17 00:00:00 2001 From: Inada Naoki Date: Tue, 3 Dec 2019 18:54:18 +0900 Subject: [PATCH 191/349] Update ChangeLog --- ChangeLog.rst | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/ChangeLog.rst b/ChangeLog.rst index a295e6b3..1352af83 100644 --- a/ChangeLog.rst +++ b/ChangeLog.rst @@ -1,3 +1,13 @@ +1.0.0 +===== + +Release Date: TBD + +* Remove Python 2 support from the ``msgpack/_cmsgpack``. + ``msgpack/fallback`` still supports Python 2. +* Remove encoding and unicode_errors options from the Packer. + + 0.6.2 ===== From 83ebb63c447a99c81d043eb6808bbfb50697a751 Mon Sep 17 00:00:00 2001 From: Inada Naoki Date: Tue, 3 Dec 2019 20:53:11 +0900 Subject: [PATCH 192/349] Ressurect unicode_errors of the Packer. (#379) --- ChangeLog.rst | 2 +- msgpack/_packer.pyx | 34 +++++++++++++++++++++++++++------- msgpack/fallback.py | 11 ++++++++--- test/test_pack.py | 16 ++++++++++++++-- 4 files changed, 50 insertions(+), 13 deletions(-) diff --git a/ChangeLog.rst b/ChangeLog.rst index 1352af83..1d784af7 100644 --- a/ChangeLog.rst +++ b/ChangeLog.rst @@ -5,7 +5,7 @@ Release Date: TBD * Remove Python 2 support from the ``msgpack/_cmsgpack``. ``msgpack/fallback`` still supports Python 2. -* Remove encoding and unicode_errors options from the Packer. +* Remove ``encoding`` option from the Packer. 0.6.2 diff --git a/msgpack/_packer.pyx b/msgpack/_packer.pyx index 2e698e16..8b1a392c 100644 --- a/msgpack/_packer.pyx +++ b/msgpack/_packer.pyx @@ -89,9 +89,15 @@ cdef class Packer(object): Additionally tuples will not be serialized as lists. This is useful when trying to implement accurate serialization for python types. + + :param str unicode_errors: + The error handler for encoding unicode. (default: 'strict') + DO NOT USE THIS!! This option is kept for very specific usage. """ cdef msgpack_packer pk cdef object _default + cdef object _berrors + cdef const char *unicode_errors cdef bint strict_types cdef bool use_float cdef bint autoreset @@ -104,10 +110,8 @@ cdef class Packer(object): self.pk.buf_size = buf_size self.pk.length = 0 - def __init__(self, default=None, - bint use_single_float=False, - bint autoreset=True, - bint use_bin_type=False, + def __init__(self, *, default=None, unicode_errors=None, + bint use_single_float=False, bint autoreset=True, bint use_bin_type=False, bint strict_types=False): self.use_float = use_single_float self.strict_types = strict_types @@ -118,6 +122,12 @@ cdef class Packer(object): raise TypeError("default must be a callable.") self._default = default + self._berrors = unicode_errors + if unicode_errors is None: + self.unicode_errors = NULL + else: + self.unicode_errors = self._berrors + def __dealloc__(self): PyMem_Free(self.pk.buf) self.pk.buf = NULL @@ -183,9 +193,19 @@ cdef class Packer(object): if ret == 0: ret = msgpack_pack_raw_body(&self.pk, rawval, L) elif PyUnicode_CheckExact(o) if strict_types else PyUnicode_Check(o): - ret = msgpack_pack_unicode(&self.pk, o, ITEM_LIMIT); - if ret == -2: - raise ValueError("unicode string is too large") + if self.unicode_errors == NULL: + ret = msgpack_pack_unicode(&self.pk, o, ITEM_LIMIT); + if ret == -2: + raise ValueError("unicode string is too large") + else: + o = PyUnicode_AsEncodedString(o, NULL, self.unicode_errors) + L = Py_SIZE(o) + if L > ITEM_LIMIT: + raise ValueError("unicode string is too large") + ret = msgpack_pack_raw(&self.pk, L) + if ret == 0: + rawval = o + ret = msgpack_pack_raw_body(&self.pk, rawval, L) elif PyDict_CheckExact(o): d = o L = len(d) diff --git a/msgpack/fallback.py b/msgpack/fallback.py index 5dab9065..0c0c101c 100644 --- a/msgpack/fallback.py +++ b/msgpack/fallback.py @@ -667,7 +667,7 @@ def _unpack(self, execute=EX_CONSTRUCT): elif self._raw: obj = bytes(obj) else: - obj = obj.decode('utf_8') + obj = obj.decode('utf_8', self._unicode_errors) return obj if typ == TYPE_EXT: return self._ext_hook(n, bytes(obj)) @@ -752,14 +752,19 @@ class Packer(object): Additionally tuples will not be serialized as lists. This is useful when trying to implement accurate serialization for python types. + + :param str unicode_errors: + The error handler for encoding unicode. (default: 'strict') + DO NOT USE THIS!! This option is kept for very specific usage. """ - def __init__(self, default=None, + def __init__(self, default=None, unicode_errors=None, use_single_float=False, autoreset=True, use_bin_type=False, strict_types=False): self._strict_types = strict_types self._use_float = use_single_float self._autoreset = autoreset self._use_bin_type = use_bin_type + self._unicode_errors = unicode_errors or "strict" self._buffer = StringIO() if default is not None: if not callable(default): @@ -816,7 +821,7 @@ def _pack(self, obj, nest_limit=DEFAULT_RECURSE_LIMIT, self._pack_bin_header(n) return self._buffer.write(obj) if check(obj, unicode): - obj = obj.encode("utf-8") + obj = obj.encode("utf-8", self._unicode_errors) n = len(obj) if n >= 2**32: raise ValueError("String is too large") diff --git a/test/test_pack.py b/test/test_pack.py index 194b2c92..b6752e5a 100644 --- a/test/test_pack.py +++ b/test/test_pack.py @@ -5,6 +5,7 @@ from collections import OrderedDict from io import BytesIO import struct +import sys import pytest from pytest import raises, xfail @@ -54,13 +55,24 @@ def testPackByteArrays(): for td in test_data: check(td) +@pytest.mark.skipif(sys.version_info < (3,0), reason="Python 2 passes invalid surrogates") +def testIgnoreUnicodeErrors(): + re = unpackb(packb(b'abc\xeddef', use_bin_type=False), + raw=False, unicode_errors='ignore') + assert re == "abcdef" + def testStrictUnicodeUnpack(): - packed = packb(b'abc\xeddef') + packed = packb(b'abc\xeddef', use_bin_type=False) with pytest.raises(UnicodeDecodeError): unpackb(packed, raw=False, use_list=1) +@pytest.mark.skipif(sys.version_info < (3,0), reason="Python 2 passes invalid surrogates") +def testIgnoreErrorsPack(): + re = unpackb(packb(u"abc\uDC80\uDCFFdef", use_bin_type=True, unicode_errors='ignore'), raw=False, use_list=1) + assert re == "abcdef" + def testDecodeBinary(): - re = unpackb(packb(b"abc"), encoding=None, use_list=1) + re = unpackb(packb(b"abc"), use_list=1) assert re == b"abc" def testPackFloat(): From e419cd8e2db6b8226bd681b52b6acfe70d8e6a86 Mon Sep 17 00:00:00 2001 From: Inada Naoki Date: Tue, 3 Dec 2019 21:13:05 +0900 Subject: [PATCH 193/349] Remove encoding option from Unpacker. (#380) --- ChangeLog.rst | 2 +- msgpack/_unpacker.pyx | 36 ++++++++---------------------------- msgpack/fallback.py | 23 +++++------------------ msgpack/unpack.h | 5 +---- 4 files changed, 15 insertions(+), 51 deletions(-) diff --git a/ChangeLog.rst b/ChangeLog.rst index 1d784af7..d44b36a9 100644 --- a/ChangeLog.rst +++ b/ChangeLog.rst @@ -5,7 +5,7 @@ Release Date: TBD * Remove Python 2 support from the ``msgpack/_cmsgpack``. ``msgpack/fallback`` still supports Python 2. -* Remove ``encoding`` option from the Packer. +* Remove ``encoding`` option from the Packer and Unpacker. 0.6.2 diff --git a/msgpack/_unpacker.pyx b/msgpack/_unpacker.pyx index 3727f50c..b2586860 100644 --- a/msgpack/_unpacker.pyx +++ b/msgpack/_unpacker.pyx @@ -31,7 +31,6 @@ cdef extern from "unpack.h": PyObject* object_hook PyObject* list_hook PyObject* ext_hook - char *encoding char *unicode_errors Py_ssize_t max_str_len Py_ssize_t max_bin_len @@ -58,7 +57,7 @@ cdef inline init_ctx(unpack_context *ctx, object object_hook, object object_pairs_hook, object list_hook, object ext_hook, bint use_list, bint raw, bint strict_map_key, - const char* encoding, const char* unicode_errors, + const char* unicode_errors, Py_ssize_t max_str_len, Py_ssize_t max_bin_len, Py_ssize_t max_array_len, Py_ssize_t max_map_len, Py_ssize_t max_ext_len): @@ -99,7 +98,6 @@ cdef inline init_ctx(unpack_context *ctx, raise TypeError("ext_hook must be a callable.") ctx.user.ext_hook = ext_hook - ctx.user.encoding = encoding ctx.user.unicode_errors = unicode_errors def default_read_extended_type(typecode, data): @@ -141,9 +139,9 @@ cdef inline int get_data_from_buffer(object obj, 1) return 1 -def unpackb(object packed, object object_hook=None, object list_hook=None, +def unpackb(object packed, *, object object_hook=None, object list_hook=None, bint use_list=True, bint raw=True, bint strict_map_key=False, - encoding=None, unicode_errors=None, + unicode_errors=None, object_pairs_hook=None, ext_hook=ExtType, Py_ssize_t max_str_len=-1, Py_ssize_t max_bin_len=-1, @@ -170,14 +168,9 @@ def unpackb(object packed, object object_hook=None, object list_hook=None, cdef Py_buffer view cdef char* buf = NULL cdef Py_ssize_t buf_len - cdef const char* cenc = NULL cdef const char* cerr = NULL cdef int new_protocol = 0 - if encoding is not None: - PyErr_WarnEx(DeprecationWarning, "encoding is deprecated, Use raw=False instead.", 1) - cenc = encoding - if unicode_errors is not None: cerr = unicode_errors @@ -196,7 +189,7 @@ def unpackb(object packed, object object_hook=None, object list_hook=None, try: init_ctx(&ctx, object_hook, object_pairs_hook, list_hook, ext_hook, - use_list, raw, strict_map_key, cenc, cerr, + use_list, raw, strict_map_key, cerr, max_str_len, max_bin_len, max_array_len, max_map_len, max_ext_len) ret = unpack_construct(&ctx, buf, buf_len, &off) finally: @@ -250,8 +243,6 @@ cdef class Unpacker(object): near future. So you must specify it explicitly for keeping backward compatibility. - *encoding* option which is deprecated overrides this option. - :param bool strict_map_key: If true, only str or bytes are accepted for map (dict) keys. It's False by default for backward-compatibility. @@ -290,11 +281,6 @@ cdef class Unpacker(object): Deprecated, use *max_buffer_size* instead. Limits max size of ext type. (default: max_buffer_size or 1024*1024) - :param str encoding: - Deprecated, use ``raw=False`` instead. - Encoding used for decoding msgpack raw. - If it is None (default), msgpack raw is deserialized to Python bytes. - :param str unicode_errors: Error handler used for decoding str type. (default: `'strict'`) @@ -330,7 +316,7 @@ cdef class Unpacker(object): cdef Py_ssize_t read_size # To maintain refcnt. cdef object object_hook, object_pairs_hook, list_hook, ext_hook - cdef object encoding, unicode_errors + cdef object unicode_errors cdef Py_ssize_t max_buffer_size cdef uint64_t stream_offset @@ -341,17 +327,16 @@ cdef class Unpacker(object): PyMem_Free(self.buf) self.buf = NULL - def __init__(self, file_like=None, Py_ssize_t read_size=0, + def __init__(self, file_like=None, *, Py_ssize_t read_size=0, bint use_list=True, bint raw=True, bint strict_map_key=False, object object_hook=None, object object_pairs_hook=None, object list_hook=None, - encoding=None, unicode_errors=None, Py_ssize_t max_buffer_size=0, + unicode_errors=None, Py_ssize_t max_buffer_size=0, object ext_hook=ExtType, Py_ssize_t max_str_len=-1, Py_ssize_t max_bin_len=-1, Py_ssize_t max_array_len=-1, Py_ssize_t max_map_len=-1, Py_ssize_t max_ext_len=-1): - cdef const char *cenc=NULL, cdef const char *cerr=NULL self.object_hook = object_hook @@ -392,17 +377,12 @@ cdef class Unpacker(object): self.buf_tail = 0 self.stream_offset = 0 - if encoding is not None: - PyErr_WarnEx(DeprecationWarning, "encoding is deprecated, Use raw=False instead.", 1) - self.encoding = encoding - cenc = encoding - if unicode_errors is not None: self.unicode_errors = unicode_errors cerr = unicode_errors init_ctx(&self.ctx, object_hook, object_pairs_hook, list_hook, - ext_hook, use_list, raw, strict_map_key, cenc, cerr, + ext_hook, use_list, raw, strict_map_key, cerr, max_str_len, max_bin_len, max_array_len, max_map_len, max_ext_len) diff --git a/msgpack/fallback.py b/msgpack/fallback.py index 0c0c101c..9e312138 100644 --- a/msgpack/fallback.py +++ b/msgpack/fallback.py @@ -176,8 +176,6 @@ class Unpacker(object): near future. So you must specify it explicitly for keeping backward compatibility. - *encoding* option which is deprecated overrides this option. - :param bool strict_map_key: If true, only str or bytes are accepted for map (dict) keys. It's False by default for backward-compatibility. @@ -193,13 +191,10 @@ class Unpacker(object): Unpacker calls it with a list of key-value pairs after unpacking msgpack map. (See also simplejson) - :param str encoding: - Encoding used for decoding msgpack raw. - If it is None (default), msgpack raw is deserialized to Python bytes. - :param str unicode_errors: - (deprecated) Used for decoding msgpack raw with *encoding*. - (default: `'strict'`) + The error handler for decoding unicode. (default: 'strict') + This option should be used only when you have msgpack data which + contains invalid UTF-8 string. :param int max_buffer_size: Limits size of data waiting unpacked. 0 means system's INT_MAX (default). @@ -252,18 +247,13 @@ class Unpacker(object): def __init__(self, file_like=None, read_size=0, use_list=True, raw=True, strict_map_key=False, object_hook=None, object_pairs_hook=None, list_hook=None, - encoding=None, unicode_errors=None, max_buffer_size=0, + unicode_errors=None, max_buffer_size=0, ext_hook=ExtType, max_str_len=-1, max_bin_len=-1, max_array_len=-1, max_map_len=-1, max_ext_len=-1): - if encoding is not None: - warnings.warn( - "encoding is deprecated, Use raw=False instead.", - DeprecationWarning, stacklevel=2) - if unicode_errors is None: unicode_errors = 'strict' @@ -306,7 +296,6 @@ def __init__(self, file_like=None, read_size=0, use_list=True, raw=True, strict_ self._read_size = read_size or min(self._max_buffer_size, 16*1024) self._raw = bool(raw) self._strict_map_key = bool(strict_map_key) - self._encoding = encoding self._unicode_errors = unicode_errors self._use_list = use_list self._list_hook = list_hook @@ -662,9 +651,7 @@ def _unpack(self, execute=EX_CONSTRUCT): if execute == EX_SKIP: return if typ == TYPE_RAW: - if self._encoding is not None: - obj = obj.decode(self._encoding, self._unicode_errors) - elif self._raw: + if self._raw: obj = bytes(obj) else: obj = obj.decode('utf_8', self._unicode_errors) diff --git a/msgpack/unpack.h b/msgpack/unpack.h index bbce91c6..539a9916 100644 --- a/msgpack/unpack.h +++ b/msgpack/unpack.h @@ -27,7 +27,6 @@ typedef struct unpack_user { PyObject *object_hook; PyObject *list_hook; PyObject *ext_hook; - const char *encoding; const char *unicode_errors; Py_ssize_t max_str_len, max_bin_len, max_array_len, max_map_len, max_ext_len; } unpack_user; @@ -232,9 +231,7 @@ static inline int unpack_callback_raw(unpack_user* u, const char* b, const char* PyObject *py; - if (u->encoding) { - py = PyUnicode_Decode(p, l, u->encoding, u->unicode_errors); - } else if (u->raw) { + if (u->raw) { py = PyBytes_FromStringAndSize(p, l); } else { py = PyUnicode_DecodeUTF8(p, l, u->unicode_errors); From 2c6668941f72e3bcb797d096437683eca4e3caf5 Mon Sep 17 00:00:00 2001 From: Inada Naoki Date: Tue, 3 Dec 2019 21:18:17 +0900 Subject: [PATCH 194/349] Intern map keys (#381) Fixes #372. --- msgpack/fallback.py | 2 ++ msgpack/unpack.h | 3 +++ 2 files changed, 5 insertions(+) diff --git a/msgpack/fallback.py b/msgpack/fallback.py index 9e312138..9a48b71e 100644 --- a/msgpack/fallback.py +++ b/msgpack/fallback.py @@ -644,6 +644,8 @@ def _unpack(self, execute=EX_CONSTRUCT): key = self._unpack(EX_CONSTRUCT) if self._strict_map_key and type(key) not in (unicode, bytes): raise ValueError("%s is not allowed for map key" % str(type(key))) + if not PY2 and type(key) is str: + key = sys.intern(key) ret[key] = self._unpack(EX_CONSTRUCT) if self._object_hook is not None: ret = self._object_hook(ret) diff --git a/msgpack/unpack.h b/msgpack/unpack.h index 539a9916..ead50958 100644 --- a/msgpack/unpack.h +++ b/msgpack/unpack.h @@ -192,6 +192,9 @@ static inline int unpack_callback_map_item(unpack_user* u, unsigned int current, PyErr_Format(PyExc_ValueError, "%.100s is not allowed for map key", Py_TYPE(k)->tp_name); return -1; } + if (PyUnicode_CheckExact(k)) { + PyUnicode_InternInPlace(&k); + } if (u->has_pairs_hook) { msgpack_unpack_object item = PyTuple_Pack(2, k, v); if (!item) From 641406902efaa8f22f4a7973d04c921a2a35a6be Mon Sep 17 00:00:00 2001 From: Inada Naoki Date: Thu, 5 Dec 2019 18:29:15 +0900 Subject: [PATCH 195/349] Add Timestamp support (#382) --- docs/api.rst | 4 ++ msgpack/__init__.py | 14 +---- msgpack/_packer.pyx | 9 ++- msgpack/_unpacker.pyx | 5 +- msgpack/ext.py | 136 ++++++++++++++++++++++++++++++++++++++++ msgpack/fallback.py | 12 ++-- msgpack/pack_template.h | 33 ++++++++++ msgpack/unpack.h | 44 ++++++++++++- test/test_timestamp.py | 46 ++++++++++++++ 9 files changed, 283 insertions(+), 20 deletions(-) create mode 100644 msgpack/ext.py create mode 100644 test/test_timestamp.py diff --git a/docs/api.rst b/docs/api.rst index 6336793e..93827e19 100644 --- a/docs/api.rst +++ b/docs/api.rst @@ -27,6 +27,10 @@ API reference .. autoclass:: ExtType +.. autoclass:: Timestamp + :members: + :special-members: __init__ + exceptions ---------- diff --git a/msgpack/__init__.py b/msgpack/__init__.py index 4112a164..ff66f461 100644 --- a/msgpack/__init__.py +++ b/msgpack/__init__.py @@ -1,22 +1,10 @@ # coding: utf-8 from ._version import version from .exceptions import * +from .ext import ExtType, Timestamp import os import sys -from collections import namedtuple - - -class ExtType(namedtuple('ExtType', 'code data')): - """ExtType represents ext type in msgpack.""" - def __new__(cls, code, data): - if not isinstance(code, int): - raise TypeError("code must be int") - if not isinstance(data, bytes): - raise TypeError("data must be bytes") - if not 0 <= code <= 127: - raise ValueError("code must be 0~127") - return super(ExtType, cls).__new__(cls, code, data) if os.environ.get('MSGPACK_PUREPYTHON') or sys.version_info[0] == 2: diff --git a/msgpack/_packer.pyx b/msgpack/_packer.pyx index 8b1a392c..f3bde3f5 100644 --- a/msgpack/_packer.pyx +++ b/msgpack/_packer.pyx @@ -4,8 +4,9 @@ from cpython cimport * from cpython.bytearray cimport PyByteArray_Check, PyByteArray_CheckExact cdef ExtType +cdef Timestamp -from . import ExtType +from .ext import ExtType, Timestamp cdef extern from "Python.h": @@ -36,6 +37,7 @@ cdef extern from "pack.h": int msgpack_pack_bin(msgpack_packer* pk, size_t l) int msgpack_pack_raw_body(msgpack_packer* pk, char* body, size_t l) int msgpack_pack_ext(msgpack_packer* pk, char typecode, size_t l) + int msgpack_pack_timestamp(msgpack_packer* x, long long seconds, unsigned long nanoseconds); int msgpack_pack_unicode(msgpack_packer* pk, object o, long long limit) cdef extern from "buff_converter.h": @@ -135,6 +137,7 @@ cdef class Packer(object): cdef int _pack(self, object o, int nest_limit=DEFAULT_RECURSE_LIMIT) except -1: cdef long long llval cdef unsigned long long ullval + cdef unsigned long ulval cdef long longval cdef float fval cdef double dval @@ -238,6 +241,10 @@ cdef class Packer(object): raise ValueError("EXT data is too large") ret = msgpack_pack_ext(&self.pk, longval, L) ret = msgpack_pack_raw_body(&self.pk, rawval, L) + elif type(o) is Timestamp: + llval = o.seconds + ulval = o.nanoseconds + ret = msgpack_pack_timestamp(&self.pk, llval, ulval) elif PyList_CheckExact(o) if strict_types else (PyTuple_Check(o) or PyList_Check(o)): L = Py_SIZE(o) if L > ITEM_LIMIT: diff --git a/msgpack/_unpacker.pyx b/msgpack/_unpacker.pyx index b2586860..6dedd30a 100644 --- a/msgpack/_unpacker.pyx +++ b/msgpack/_unpacker.pyx @@ -19,7 +19,7 @@ from .exceptions import ( FormatError, StackError, ) -from . import ExtType +from .ext import ExtType, Timestamp cdef extern from "unpack.h": @@ -31,6 +31,7 @@ cdef extern from "unpack.h": PyObject* object_hook PyObject* list_hook PyObject* ext_hook + PyObject* timestamp_t char *unicode_errors Py_ssize_t max_str_len Py_ssize_t max_bin_len @@ -98,6 +99,8 @@ cdef inline init_ctx(unpack_context *ctx, raise TypeError("ext_hook must be a callable.") ctx.user.ext_hook = ext_hook + # Add Timestamp type to the user object so it may be used in unpack.h + ctx.user.timestamp_t = Timestamp ctx.user.unicode_errors = unicode_errors def default_read_extended_type(typecode, data): diff --git a/msgpack/ext.py b/msgpack/ext.py new file mode 100644 index 00000000..1a0f8fe7 --- /dev/null +++ b/msgpack/ext.py @@ -0,0 +1,136 @@ +# coding: utf-8 +from collections import namedtuple +import sys +import struct + + +PY2 = sys.version_info[0] == 2 +if not PY2: + long = int + + +class ExtType(namedtuple('ExtType', 'code data')): + """ExtType represents ext type in msgpack.""" + def __new__(cls, code, data): + if not isinstance(code, int): + raise TypeError("code must be int") + if not isinstance(data, bytes): + raise TypeError("data must be bytes") + if code == -1: + return Timestamp.from_bytes(data) + if not 0 <= code <= 127: + raise ValueError("code must be 0~127") + return super(ExtType, cls).__new__(cls, code, data) + + +class Timestamp(object): + """Timestamp represents the Timestamp extension type in msgpack. + + When built with Cython, msgpack uses C methods to pack and unpack `Timestamp`. When using pure-Python + msgpack, :func:`to_bytes` and :func:`from_bytes` are used to pack and unpack `Timestamp`. + """ + __slots__ = ["seconds", "nanoseconds"] + + def __init__(self, seconds, nanoseconds=0): + """Initialize a Timestamp object. + + :param seconds: Number of seconds since the UNIX epoch (00:00:00 UTC Jan 1 1970, minus leap seconds). May be + negative. If :code:`seconds` includes a fractional part, :code:`nanoseconds` must be 0. + :type seconds: int or float + + :param nanoseconds: Number of nanoseconds to add to `seconds` to get fractional time. Maximum is 999_999_999. + Default is 0. + :type nanoseconds: int + + Note: Negative times (before the UNIX epoch) are represented as negative seconds + positive ns. + """ + if not isinstance(seconds, (int, long, float)): + raise TypeError("seconds must be numeric") + if not isinstance(nanoseconds, (int, long)): + raise TypeError("nanoseconds must be an integer") + if nanoseconds: + if nanoseconds < 0 or nanoseconds % 1 != 0 or nanoseconds > (1e9 - 1): + raise ValueError("nanoseconds must be a non-negative integer less than 999999999.") + if not isinstance(seconds, (int, long)): + raise ValueError("seconds must be an integer if also providing nanoseconds.") + self.nanoseconds = nanoseconds + else: + # round helps with floating point issues + self.nanoseconds = int(round(seconds % 1 * 1e9, 0)) + self.seconds = int(seconds // 1) + + def __repr__(self): + """String representation of Timestamp.""" + return "Timestamp(seconds={0}, nanoseconds={1})".format(self.seconds, self.nanoseconds) + + def __eq__(self, other): + """Check for equality with another Timestamp object""" + if type(other) is self.__class__: + return self.seconds == other.seconds and self.nanoseconds == other.nanoseconds + return False + + def __ne__(self, other): + """not-equals method (see :func:`__eq__()`)""" + return not self.__eq__(other) + + @staticmethod + def from_bytes(b): + """Unpack bytes into a `Timestamp` object. + + Used for pure-Python msgpack unpacking. + + :param b: Payload from msgpack ext message with code -1 + :type b: bytes + + :returns: Timestamp object unpacked from msgpack ext payload + :rtype: Timestamp + """ + if len(b) == 4: + seconds = struct.unpack("!L", b)[0] + nanoseconds = 0 + elif len(b) == 8: + data64 = struct.unpack("!Q", b)[0] + seconds = data64 & 0x00000003ffffffff + nanoseconds = data64 >> 34 + elif len(b) == 12: + nanoseconds, seconds = struct.unpack("!Iq", b) + else: + raise ValueError("Timestamp type can only be created from 32, 64, or 96-bit byte objects") + return Timestamp(seconds, nanoseconds) + + def to_bytes(self): + """Pack this Timestamp object into bytes. + + Used for pure-Python msgpack packing. + + :returns data: Payload for EXT message with code -1 (timestamp type) + :rtype: bytes + """ + if (self.seconds >> 34) == 0: # seconds is non-negative and fits in 34 bits + data64 = self.nanoseconds << 34 | self.seconds + if data64 & 0xffffffff00000000 == 0: + # nanoseconds is zero and seconds < 2**32, so timestamp 32 + data = struct.pack("!L", data64) + else: + # timestamp 64 + data = struct.pack("!Q", data64) + else: + # timestamp 96 + data = struct.pack("!Iq", self.nanoseconds, self.seconds) + return data + + def to_float_s(self): + """Get the timestamp as a floating-point value. + + :returns: posix timestamp + :rtype: float + """ + return self.seconds + self.nanoseconds/1e9 + + def to_unix_ns(self): + """Get the timestamp as a unixtime in nanoseconds. + + :returns: posix timestamp in nanoseconds + :rtype: int + """ + return int(self.seconds * 1e9 + self.nanoseconds) diff --git a/msgpack/fallback.py b/msgpack/fallback.py index 9a48b71e..55e66f51 100644 --- a/msgpack/fallback.py +++ b/msgpack/fallback.py @@ -66,7 +66,7 @@ def getvalue(self): StackError, ) -from . import ExtType +from .ext import ExtType, Timestamp EX_SKIP = 0 @@ -826,9 +826,13 @@ def _pack(self, obj, nest_limit=DEFAULT_RECURSE_LIMIT, if self._use_float: return self._buffer.write(struct.pack(">Bf", 0xca, obj)) return self._buffer.write(struct.pack(">Bd", 0xcb, obj)) - if check(obj, ExtType): - code = obj.code - data = obj.data + if check(obj, (ExtType, Timestamp)): + if check(obj, Timestamp): + code = -1 + data = obj.to_bytes() + else: + code = obj.code + data = obj.data assert isinstance(code, int) assert isinstance(data, bytes) L = len(data) diff --git a/msgpack/pack_template.h b/msgpack/pack_template.h index 69982f4d..0e940b84 100644 --- a/msgpack/pack_template.h +++ b/msgpack/pack_template.h @@ -759,6 +759,39 @@ static inline int msgpack_pack_ext(msgpack_packer* x, char typecode, size_t l) } +/* + * Pack Timestamp extension type. Follows msgpack-c pack_template.h. + */ +static inline int msgpack_pack_timestamp(msgpack_packer* x, int64_t seconds, uint32_t nanoseconds) +{ + if ((seconds >> 34) == 0) { + /* seconds is unsigned and fits in 34 bits */ + uint64_t data64 = ((uint64_t)nanoseconds << 34) | (uint64_t)seconds; + if ((data64 & 0xffffffff00000000L) == 0) { + /* no nanoseconds and seconds is 32bits or smaller. timestamp32. */ + unsigned char buf[4]; + uint32_t data32 = (uint32_t)data64; + msgpack_pack_ext(x, -1, 4); + _msgpack_store32(buf, data32); + msgpack_pack_raw_body(x, buf, 4); + } else { + /* timestamp64 */ + unsigned char buf[8]; + msgpack_pack_ext(x, -1, 8); + _msgpack_store64(buf, data64); + msgpack_pack_raw_body(x, buf, 8); + + } + } else { + /* seconds is signed or >34bits */ + unsigned char buf[12]; + _msgpack_store32(&buf[0], nanoseconds); + _msgpack_store64(&buf[4], seconds); + msgpack_pack_ext(x, -1, 12); + msgpack_pack_raw_body(x, buf, 12); + } + return 0; +} #undef msgpack_pack_append_buffer diff --git a/msgpack/unpack.h b/msgpack/unpack.h index ead50958..4380ec55 100644 --- a/msgpack/unpack.h +++ b/msgpack/unpack.h @@ -27,6 +27,7 @@ typedef struct unpack_user { PyObject *object_hook; PyObject *list_hook; PyObject *ext_hook; + PyObject *timestamp_t; const char *unicode_errors; Py_ssize_t max_str_len, max_bin_len, max_array_len, max_map_len, max_ext_len; } unpack_user; @@ -259,6 +260,38 @@ static inline int unpack_callback_bin(unpack_user* u, const char* b, const char* return 0; } +typedef struct msgpack_timestamp { + int64_t tv_sec; + uint32_t tv_nsec; +} msgpack_timestamp; + +/* + * Unpack ext buffer to a timestamp. Pulled from msgpack-c timestamp.h. + */ +static inline int unpack_timestamp(const char* buf, unsigned int buflen, msgpack_timestamp* ts) { + switch (buflen) { + case 4: + ts->tv_nsec = 0; + { + uint32_t v = _msgpack_load32(uint32_t, buf); + ts->tv_sec = (int64_t)v; + } + return 0; + case 8: { + uint64_t value =_msgpack_load64(uint64_t, buf); + ts->tv_nsec = (uint32_t)(value >> 34); + ts->tv_sec = value & 0x00000003ffffffffLL; + return 0; + } + case 12: + ts->tv_nsec = _msgpack_load32(uint32_t, buf); + ts->tv_sec = _msgpack_load64(int64_t, buf + 4); + return 0; + default: + return -1; + } +} + static inline int unpack_callback_ext(unpack_user* u, const char* base, const char* pos, unsigned int length, msgpack_unpack_object* o) { @@ -273,7 +306,16 @@ static inline int unpack_callback_ext(unpack_user* u, const char* base, const ch return -1; } // length also includes the typecode, so the actual data is length-1 - py = PyObject_CallFunction(u->ext_hook, "(iy#)", (int)typecode, pos, (Py_ssize_t)length-1); + if (typecode == -1) { + msgpack_timestamp ts; + if (unpack_timestamp(pos, length-1, &ts) == 0) { + py = PyObject_CallFunction(u->timestamp_t, "(Lk)", ts.tv_sec, ts.tv_nsec); + } else { + py = NULL; + } + } else { + py = PyObject_CallFunction(u->ext_hook, "(iy#)", (int)typecode, pos, (Py_ssize_t)length-1); + } if (!py) return -1; *o = py; diff --git a/test/test_timestamp.py b/test/test_timestamp.py new file mode 100644 index 00000000..55c2f6dd --- /dev/null +++ b/test/test_timestamp.py @@ -0,0 +1,46 @@ +import msgpack +from msgpack import Timestamp + + +def test_timestamp(): + # timestamp32 + ts = Timestamp(2**32 - 1) + assert ts.to_bytes() == b"\xff\xff\xff\xff" + packed = msgpack.packb(ts) + assert packed == b"\xd6\xff" + ts.to_bytes() + unpacked = msgpack.unpackb(packed) + assert ts == unpacked + assert ts.seconds == 2**32 - 1 and ts.nanoseconds == 0 + + # timestamp64 + ts = Timestamp(2**34 - 1, 999999999) + assert ts.to_bytes() == b"\xee\x6b\x27\xff\xff\xff\xff\xff" + packed = msgpack.packb(ts) + assert packed == b"\xd7\xff" + ts.to_bytes() + unpacked = msgpack.unpackb(packed) + assert ts == unpacked + assert ts.seconds == 2**34 - 1 and ts.nanoseconds == 999999999 + + # timestamp96 + ts = Timestamp(2**63 - 1, 999999999) + assert ts.to_bytes() == b"\x3b\x9a\xc9\xff\x7f\xff\xff\xff\xff\xff\xff\xff" + packed = msgpack.packb(ts) + assert packed == b"\xc7\x0c\xff" + ts.to_bytes() + unpacked = msgpack.unpackb(packed) + assert ts == unpacked + assert ts.seconds == 2**63 - 1 and ts.nanoseconds == 999999999 + + # negative fractional + ts = Timestamp(-2.3) #s: -3, ns: 700000000 + assert ts.to_bytes() == b"\x29\xb9\x27\x00\xff\xff\xff\xff\xff\xff\xff\xfd" + packed = msgpack.packb(ts) + assert packed == b"\xc7\x0c\xff" + ts.to_bytes() + unpacked = msgpack.unpackb(packed) + assert ts == unpacked + assert ts.seconds == -3 and ts.nanoseconds == 700000000 + + +def test_timestamp_to(): + t = Timestamp(42, 14) + assert t.to_float_s() == 42.000000014 + assert t.to_unix_ns() == 42000000014 From e557e17cbd4e88622e48547ac52834e9ab95f946 Mon Sep 17 00:00:00 2001 From: Inada Naoki Date: Thu, 5 Dec 2019 18:50:13 +0900 Subject: [PATCH 196/349] blacken --- msgpack/__init__.py | 2 +- msgpack/ext.py | 30 +++- msgpack/fallback.py | 393 +++++++++++++++++++++++++------------------- 3 files changed, 245 insertions(+), 180 deletions(-) diff --git a/msgpack/__init__.py b/msgpack/__init__.py index ff66f461..d6705e22 100644 --- a/msgpack/__init__.py +++ b/msgpack/__init__.py @@ -7,7 +7,7 @@ import sys -if os.environ.get('MSGPACK_PUREPYTHON') or sys.version_info[0] == 2: +if os.environ.get("MSGPACK_PUREPYTHON") or sys.version_info[0] == 2: from .fallback import Packer, unpackb, Unpacker else: try: diff --git a/msgpack/ext.py b/msgpack/ext.py index 1a0f8fe7..c7efff61 100644 --- a/msgpack/ext.py +++ b/msgpack/ext.py @@ -9,8 +9,9 @@ long = int -class ExtType(namedtuple('ExtType', 'code data')): +class ExtType(namedtuple("ExtType", "code data")): """ExtType represents ext type in msgpack.""" + def __new__(cls, code, data): if not isinstance(code, int): raise TypeError("code must be int") @@ -29,6 +30,7 @@ class Timestamp(object): When built with Cython, msgpack uses C methods to pack and unpack `Timestamp`. When using pure-Python msgpack, :func:`to_bytes` and :func:`from_bytes` are used to pack and unpack `Timestamp`. """ + __slots__ = ["seconds", "nanoseconds"] def __init__(self, seconds, nanoseconds=0): @@ -50,9 +52,13 @@ def __init__(self, seconds, nanoseconds=0): raise TypeError("nanoseconds must be an integer") if nanoseconds: if nanoseconds < 0 or nanoseconds % 1 != 0 or nanoseconds > (1e9 - 1): - raise ValueError("nanoseconds must be a non-negative integer less than 999999999.") + raise ValueError( + "nanoseconds must be a non-negative integer less than 999999999." + ) if not isinstance(seconds, (int, long)): - raise ValueError("seconds must be an integer if also providing nanoseconds.") + raise ValueError( + "seconds must be an integer if also providing nanoseconds." + ) self.nanoseconds = nanoseconds else: # round helps with floating point issues @@ -61,12 +67,16 @@ def __init__(self, seconds, nanoseconds=0): def __repr__(self): """String representation of Timestamp.""" - return "Timestamp(seconds={0}, nanoseconds={1})".format(self.seconds, self.nanoseconds) + return "Timestamp(seconds={0}, nanoseconds={1})".format( + self.seconds, self.nanoseconds + ) def __eq__(self, other): """Check for equality with another Timestamp object""" if type(other) is self.__class__: - return self.seconds == other.seconds and self.nanoseconds == other.nanoseconds + return ( + self.seconds == other.seconds and self.nanoseconds == other.nanoseconds + ) return False def __ne__(self, other): @@ -90,12 +100,14 @@ def from_bytes(b): nanoseconds = 0 elif len(b) == 8: data64 = struct.unpack("!Q", b)[0] - seconds = data64 & 0x00000003ffffffff + seconds = data64 & 0x00000003FFFFFFFF nanoseconds = data64 >> 34 elif len(b) == 12: nanoseconds, seconds = struct.unpack("!Iq", b) else: - raise ValueError("Timestamp type can only be created from 32, 64, or 96-bit byte objects") + raise ValueError( + "Timestamp type can only be created from 32, 64, or 96-bit byte objects" + ) return Timestamp(seconds, nanoseconds) def to_bytes(self): @@ -108,7 +120,7 @@ def to_bytes(self): """ if (self.seconds >> 34) == 0: # seconds is non-negative and fits in 34 bits data64 = self.nanoseconds << 34 | self.seconds - if data64 & 0xffffffff00000000 == 0: + if data64 & 0xFFFFFFFF00000000 == 0: # nanoseconds is zero and seconds < 2**32, so timestamp 32 data = struct.pack("!L", data64) else: @@ -125,7 +137,7 @@ def to_float_s(self): :returns: posix timestamp :rtype: float """ - return self.seconds + self.nanoseconds/1e9 + return self.seconds + self.nanoseconds / 1e9 def to_unix_ns(self): """Get the timestamp as a unixtime in nanoseconds. diff --git a/msgpack/fallback.py b/msgpack/fallback.py index 55e66f51..577e571a 100644 --- a/msgpack/fallback.py +++ b/msgpack/fallback.py @@ -8,53 +8,72 @@ PY2 = sys.version_info[0] == 2 if PY2: int_types = (int, long) + def dict_iteritems(d): return d.iteritems() + + else: int_types = int unicode = str xrange = range + def dict_iteritems(d): return d.items() + if sys.version_info < (3, 5): # Ugly hack... RecursionError = RuntimeError def _is_recursionerror(e): - return len(e.args) == 1 and isinstance(e.args[0], str) and \ - e.args[0].startswith('maximum recursion depth exceeded') + return ( + len(e.args) == 1 + and isinstance(e.args[0], str) + and e.args[0].startswith("maximum recursion depth exceeded") + ) + + else: + def _is_recursionerror(e): return True -if hasattr(sys, 'pypy_version_info'): + +if hasattr(sys, "pypy_version_info"): # cStringIO is slow on PyPy, StringIO is faster. However: PyPy's own # StringBuilder is fastest. from __pypy__ import newlist_hint + try: from __pypy__.builders import BytesBuilder as StringBuilder except ImportError: from __pypy__.builders import StringBuilder USING_STRINGBUILDER = True + class StringIO(object): - def __init__(self, s=b''): + def __init__(self, s=b""): if s: self.builder = StringBuilder(len(s)) self.builder.append(s) else: self.builder = StringBuilder() + def write(self, s): if isinstance(s, memoryview): s = s.tobytes() elif isinstance(s, bytearray): s = bytes(s) self.builder.append(s) + def getvalue(self): return self.builder.build() + + else: USING_STRINGBUILDER = False from io import BytesIO as StringIO + newlist_hint = lambda size: [] @@ -69,17 +88,17 @@ def getvalue(self): from .ext import ExtType, Timestamp -EX_SKIP = 0 -EX_CONSTRUCT = 1 -EX_READ_ARRAY_HEADER = 2 -EX_READ_MAP_HEADER = 3 +EX_SKIP = 0 +EX_CONSTRUCT = 1 +EX_READ_ARRAY_HEADER = 2 +EX_READ_MAP_HEADER = 3 -TYPE_IMMEDIATE = 0 -TYPE_ARRAY = 1 -TYPE_MAP = 2 -TYPE_RAW = 3 -TYPE_BIN = 4 -TYPE_EXT = 5 +TYPE_IMMEDIATE = 0 +TYPE_ARRAY = 1 +TYPE_MAP = 2 +TYPE_RAW = 3 +TYPE_BIN = 4 +TYPE_EXT = 5 DEFAULT_RECURSE_LIMIT = 511 @@ -98,10 +117,13 @@ def _get_data_from_buffer(obj): # try to use legacy buffer protocol if 2.7, otherwise re-raise if PY2: view = memoryview(buffer(obj)) - warnings.warn("using old buffer interface to unpack %s; " - "this leads to unpacking errors if slicing is used and " - "will be removed in a future version" % type(obj), - RuntimeWarning, stacklevel=3) + warnings.warn( + "using old buffer interface to unpack %s; " + "this leads to unpacking errors if slicing is used and " + "will be removed in a future version" % type(obj), + RuntimeWarning, + stacklevel=3, + ) else: raise if view.itemsize != 1: @@ -112,7 +134,9 @@ def _get_data_from_buffer(obj): def unpack(stream, **kwargs): warnings.warn( "Direct calling implementation's unpack() is deprecated, Use msgpack.unpack() or unpackb() instead.", - DeprecationWarning, stacklevel=2) + DeprecationWarning, + stacklevel=2, + ) data = stream.read() return unpackb(data, **kwargs) @@ -145,9 +169,12 @@ def unpackb(packed, **kwargs): if sys.version_info < (2, 7, 6): + def _unpack_from(f, b, o=0): """Explicit typcast for legacy struct.unpack_from""" return struct.unpack_from(f, bytes(b), o) + + else: _unpack_from = struct.unpack_from @@ -245,17 +272,27 @@ class Unpacker(object): Other exceptions can be raised during unpacking. """ - def __init__(self, file_like=None, read_size=0, use_list=True, raw=True, strict_map_key=False, - object_hook=None, object_pairs_hook=None, list_hook=None, - unicode_errors=None, max_buffer_size=0, - ext_hook=ExtType, - max_str_len=-1, - max_bin_len=-1, - max_array_len=-1, - max_map_len=-1, - max_ext_len=-1): + def __init__( + self, + file_like=None, + read_size=0, + use_list=True, + raw=True, + strict_map_key=False, + object_hook=None, + object_pairs_hook=None, + list_hook=None, + unicode_errors=None, + max_buffer_size=0, + ext_hook=ExtType, + max_str_len=-1, + max_bin_len=-1, + max_array_len=-1, + max_map_len=-1, + max_ext_len=-1, + ): if unicode_errors is None: - unicode_errors = 'strict' + unicode_errors = "strict" if file_like is None: self._feeding = True @@ -280,20 +317,20 @@ def __init__(self, file_like=None, read_size=0, use_list=True, raw=True, strict_ self._buf_checkpoint = 0 if max_str_len == -1: - max_str_len = max_buffer_size or 1024*1024 + max_str_len = max_buffer_size or 1024 * 1024 if max_bin_len == -1: - max_bin_len = max_buffer_size or 1024*1024 + max_bin_len = max_buffer_size or 1024 * 1024 if max_array_len == -1: - max_array_len = max_buffer_size or 128*1024 + max_array_len = max_buffer_size or 128 * 1024 if max_map_len == -1: - max_map_len = max_buffer_size//2 or 32*1024 + max_map_len = max_buffer_size // 2 or 32 * 1024 if max_ext_len == -1: - max_ext_len = max_buffer_size or 1024*1024 + max_ext_len = max_buffer_size or 1024 * 1024 - self._max_buffer_size = max_buffer_size or 2**31-1 + self._max_buffer_size = max_buffer_size or 2 ** 31 - 1 if read_size > self._max_buffer_size: raise ValueError("read_size must be smaller than max_buffer_size") - self._read_size = read_size or min(self._max_buffer_size, 16*1024) + self._read_size = read_size or min(self._max_buffer_size, 16 * 1024) self._raw = bool(raw) self._strict_map_key = bool(strict_map_key) self._unicode_errors = unicode_errors @@ -310,26 +347,27 @@ def __init__(self, file_like=None, read_size=0, use_list=True, raw=True, strict_ self._stream_offset = 0 if list_hook is not None and not callable(list_hook): - raise TypeError('`list_hook` is not callable') + raise TypeError("`list_hook` is not callable") if object_hook is not None and not callable(object_hook): - raise TypeError('`object_hook` is not callable') + raise TypeError("`object_hook` is not callable") if object_pairs_hook is not None and not callable(object_pairs_hook): - raise TypeError('`object_pairs_hook` is not callable') + raise TypeError("`object_pairs_hook` is not callable") if object_hook is not None and object_pairs_hook is not None: - raise TypeError("object_pairs_hook and object_hook are mutually " - "exclusive") + raise TypeError( + "object_pairs_hook and object_hook are mutually " "exclusive" + ) if not callable(ext_hook): raise TypeError("`ext_hook` is not callable") def feed(self, next_bytes): assert self._feeding view = _get_data_from_buffer(next_bytes) - if (len(self._buffer) - self._buff_i + len(view) > self._max_buffer_size): + if len(self._buffer) - self._buff_i + len(view) > self._max_buffer_size: raise BufferFull # Strip buffer before checkpoint before reading file. if self._buf_checkpoint > 0: - del self._buffer[:self._buf_checkpoint] + del self._buffer[: self._buf_checkpoint] self._buff_i -= self._buf_checkpoint self._buf_checkpoint = 0 @@ -345,7 +383,7 @@ def _got_extradata(self): return self._buff_i < len(self._buffer) def _get_extradata(self): - return self._buffer[self._buff_i:] + return self._buffer[self._buff_i :] def read_bytes(self, n): return self._read(n) @@ -354,8 +392,8 @@ def _read(self, n): # (int) -> bytearray self._reserve(n) i = self._buff_i - self._buff_i = i+n - return self._buffer[i:i+n] + self._buff_i = i + n + return self._buffer[i : i + n] def _reserve(self, n): remain_bytes = len(self._buffer) - self._buff_i - n @@ -370,7 +408,7 @@ def _reserve(self, n): # Strip buffer before checkpoint before reading file. if self._buf_checkpoint > 0: - del self._buffer[:self._buf_checkpoint] + del self._buffer[: self._buf_checkpoint] self._buff_i -= self._buf_checkpoint self._buf_checkpoint = 0 @@ -399,7 +437,7 @@ def _read_header(self, execute=EX_CONSTRUCT): if b & 0b10000000 == 0: obj = b elif b & 0b11100000 == 0b11100000: - obj = -1 - (b ^ 0xff) + obj = -1 - (b ^ 0xFF) elif b & 0b11100000 == 0b10100000: n = b & 0b00011111 typ = TYPE_RAW @@ -416,13 +454,13 @@ def _read_header(self, execute=EX_CONSTRUCT): typ = TYPE_MAP if n > self._max_map_len: raise ValueError("%s exceeds max_map_len(%s)", n, self._max_map_len) - elif b == 0xc0: + elif b == 0xC0: obj = None - elif b == 0xc2: + elif b == 0xC2: obj = False - elif b == 0xc3: + elif b == 0xC3: obj = True - elif b == 0xc4: + elif b == 0xC4: typ = TYPE_BIN self._reserve(1) n = self._buffer[self._buff_i] @@ -430,7 +468,7 @@ def _read_header(self, execute=EX_CONSTRUCT): if n > self._max_bin_len: raise ValueError("%s exceeds max_bin_len(%s)" % (n, self._max_bin_len)) obj = self._read(n) - elif b == 0xc5: + elif b == 0xC5: typ = TYPE_BIN self._reserve(2) n = _unpack_from(">H", self._buffer, self._buff_i)[0] @@ -438,7 +476,7 @@ def _read_header(self, execute=EX_CONSTRUCT): if n > self._max_bin_len: raise ValueError("%s exceeds max_bin_len(%s)" % (n, self._max_bin_len)) obj = self._read(n) - elif b == 0xc6: + elif b == 0xC6: typ = TYPE_BIN self._reserve(4) n = _unpack_from(">I", self._buffer, self._buff_i)[0] @@ -446,106 +484,106 @@ def _read_header(self, execute=EX_CONSTRUCT): if n > self._max_bin_len: raise ValueError("%s exceeds max_bin_len(%s)" % (n, self._max_bin_len)) obj = self._read(n) - elif b == 0xc7: # ext 8 + elif b == 0xC7: # ext 8 typ = TYPE_EXT self._reserve(2) - L, n = _unpack_from('Bb', self._buffer, self._buff_i) + L, n = _unpack_from("Bb", self._buffer, self._buff_i) self._buff_i += 2 if L > self._max_ext_len: raise ValueError("%s exceeds max_ext_len(%s)" % (L, self._max_ext_len)) obj = self._read(L) - elif b == 0xc8: # ext 16 + elif b == 0xC8: # ext 16 typ = TYPE_EXT self._reserve(3) - L, n = _unpack_from('>Hb', self._buffer, self._buff_i) + L, n = _unpack_from(">Hb", self._buffer, self._buff_i) self._buff_i += 3 if L > self._max_ext_len: raise ValueError("%s exceeds max_ext_len(%s)" % (L, self._max_ext_len)) obj = self._read(L) - elif b == 0xc9: # ext 32 + elif b == 0xC9: # ext 32 typ = TYPE_EXT self._reserve(5) - L, n = _unpack_from('>Ib', self._buffer, self._buff_i) + L, n = _unpack_from(">Ib", self._buffer, self._buff_i) self._buff_i += 5 if L > self._max_ext_len: raise ValueError("%s exceeds max_ext_len(%s)" % (L, self._max_ext_len)) obj = self._read(L) - elif b == 0xca: + elif b == 0xCA: self._reserve(4) obj = _unpack_from(">f", self._buffer, self._buff_i)[0] self._buff_i += 4 - elif b == 0xcb: + elif b == 0xCB: self._reserve(8) obj = _unpack_from(">d", self._buffer, self._buff_i)[0] self._buff_i += 8 - elif b == 0xcc: + elif b == 0xCC: self._reserve(1) obj = self._buffer[self._buff_i] self._buff_i += 1 - elif b == 0xcd: + elif b == 0xCD: self._reserve(2) obj = _unpack_from(">H", self._buffer, self._buff_i)[0] self._buff_i += 2 - elif b == 0xce: + elif b == 0xCE: self._reserve(4) obj = _unpack_from(">I", self._buffer, self._buff_i)[0] self._buff_i += 4 - elif b == 0xcf: + elif b == 0xCF: self._reserve(8) obj = _unpack_from(">Q", self._buffer, self._buff_i)[0] self._buff_i += 8 - elif b == 0xd0: + elif b == 0xD0: self._reserve(1) obj = _unpack_from("b", self._buffer, self._buff_i)[0] self._buff_i += 1 - elif b == 0xd1: + elif b == 0xD1: self._reserve(2) obj = _unpack_from(">h", self._buffer, self._buff_i)[0] self._buff_i += 2 - elif b == 0xd2: + elif b == 0xD2: self._reserve(4) obj = _unpack_from(">i", self._buffer, self._buff_i)[0] self._buff_i += 4 - elif b == 0xd3: + elif b == 0xD3: self._reserve(8) obj = _unpack_from(">q", self._buffer, self._buff_i)[0] self._buff_i += 8 - elif b == 0xd4: # fixext 1 + elif b == 0xD4: # fixext 1 typ = TYPE_EXT if self._max_ext_len < 1: raise ValueError("%s exceeds max_ext_len(%s)" % (1, self._max_ext_len)) self._reserve(2) n, obj = _unpack_from("b1s", self._buffer, self._buff_i) self._buff_i += 2 - elif b == 0xd5: # fixext 2 + elif b == 0xD5: # fixext 2 typ = TYPE_EXT if self._max_ext_len < 2: raise ValueError("%s exceeds max_ext_len(%s)" % (2, self._max_ext_len)) self._reserve(3) n, obj = _unpack_from("b2s", self._buffer, self._buff_i) self._buff_i += 3 - elif b == 0xd6: # fixext 4 + elif b == 0xD6: # fixext 4 typ = TYPE_EXT if self._max_ext_len < 4: raise ValueError("%s exceeds max_ext_len(%s)" % (4, self._max_ext_len)) self._reserve(5) n, obj = _unpack_from("b4s", self._buffer, self._buff_i) self._buff_i += 5 - elif b == 0xd7: # fixext 8 + elif b == 0xD7: # fixext 8 typ = TYPE_EXT if self._max_ext_len < 8: raise ValueError("%s exceeds max_ext_len(%s)" % (8, self._max_ext_len)) self._reserve(9) n, obj = _unpack_from("b8s", self._buffer, self._buff_i) self._buff_i += 9 - elif b == 0xd8: # fixext 16 + elif b == 0xD8: # fixext 16 typ = TYPE_EXT if self._max_ext_len < 16: raise ValueError("%s exceeds max_ext_len(%s)" % (16, self._max_ext_len)) self._reserve(17) n, obj = _unpack_from("b16s", self._buffer, self._buff_i) self._buff_i += 17 - elif b == 0xd9: + elif b == 0xD9: typ = TYPE_RAW self._reserve(1) n = self._buffer[self._buff_i] @@ -553,46 +591,46 @@ def _read_header(self, execute=EX_CONSTRUCT): if n > self._max_str_len: raise ValueError("%s exceeds max_str_len(%s)", n, self._max_str_len) obj = self._read(n) - elif b == 0xda: + elif b == 0xDA: typ = TYPE_RAW self._reserve(2) - n, = _unpack_from(">H", self._buffer, self._buff_i) + (n,) = _unpack_from(">H", self._buffer, self._buff_i) self._buff_i += 2 if n > self._max_str_len: raise ValueError("%s exceeds max_str_len(%s)", n, self._max_str_len) obj = self._read(n) - elif b == 0xdb: + elif b == 0xDB: typ = TYPE_RAW self._reserve(4) - n, = _unpack_from(">I", self._buffer, self._buff_i) + (n,) = _unpack_from(">I", self._buffer, self._buff_i) self._buff_i += 4 if n > self._max_str_len: raise ValueError("%s exceeds max_str_len(%s)", n, self._max_str_len) obj = self._read(n) - elif b == 0xdc: + elif b == 0xDC: typ = TYPE_ARRAY self._reserve(2) - n, = _unpack_from(">H", self._buffer, self._buff_i) + (n,) = _unpack_from(">H", self._buffer, self._buff_i) self._buff_i += 2 if n > self._max_array_len: raise ValueError("%s exceeds max_array_len(%s)", n, self._max_array_len) - elif b == 0xdd: + elif b == 0xDD: typ = TYPE_ARRAY self._reserve(4) - n, = _unpack_from(">I", self._buffer, self._buff_i) + (n,) = _unpack_from(">I", self._buffer, self._buff_i) self._buff_i += 4 if n > self._max_array_len: raise ValueError("%s exceeds max_array_len(%s)", n, self._max_array_len) - elif b == 0xde: + elif b == 0xDE: self._reserve(2) - n, = _unpack_from(">H", self._buffer, self._buff_i) + (n,) = _unpack_from(">H", self._buffer, self._buff_i) self._buff_i += 2 if n > self._max_map_len: raise ValueError("%s exceeds max_map_len(%s)", n, self._max_map_len) typ = TYPE_MAP - elif b == 0xdf: + elif b == 0xDF: self._reserve(4) - n, = _unpack_from(">I", self._buffer, self._buff_i) + (n,) = _unpack_from(">I", self._buffer, self._buff_i) self._buff_i += 4 if n > self._max_map_len: raise ValueError("%s exceeds max_map_len(%s)", n, self._max_map_len) @@ -635,15 +673,17 @@ def _unpack(self, execute=EX_CONSTRUCT): return if self._object_pairs_hook is not None: ret = self._object_pairs_hook( - (self._unpack(EX_CONSTRUCT), - self._unpack(EX_CONSTRUCT)) - for _ in xrange(n)) + (self._unpack(EX_CONSTRUCT), self._unpack(EX_CONSTRUCT)) + for _ in xrange(n) + ) else: ret = {} for _ in xrange(n): key = self._unpack(EX_CONSTRUCT) if self._strict_map_key and type(key) not in (unicode, bytes): - raise ValueError("%s is not allowed for map key" % str(type(key))) + raise ValueError( + "%s is not allowed for map key" % str(type(key)) + ) if not PY2 and type(key) is str: key = sys.intern(key) ret[key] = self._unpack(EX_CONSTRUCT) @@ -656,7 +696,7 @@ def _unpack(self, execute=EX_CONSTRUCT): if self._raw: obj = bytes(obj) else: - obj = obj.decode('utf_8', self._unicode_errors) + obj = obj.decode("utf_8", self._unicode_errors) return obj if typ == TYPE_EXT: return self._ext_hook(n, bytes(obj)) @@ -746,9 +786,16 @@ class Packer(object): The error handler for encoding unicode. (default: 'strict') DO NOT USE THIS!! This option is kept for very specific usage. """ - def __init__(self, default=None, unicode_errors=None, - use_single_float=False, autoreset=True, use_bin_type=False, - strict_types=False): + + def __init__( + self, + default=None, + unicode_errors=None, + use_single_float=False, + autoreset=True, + use_bin_type=False, + strict_types=False, + ): self._strict_types = strict_types self._use_float = use_single_float self._autoreset = autoreset @@ -760,8 +807,13 @@ def __init__(self, default=None, unicode_errors=None, raise TypeError("default must be callable") self._default = default - def _pack(self, obj, nest_limit=DEFAULT_RECURSE_LIMIT, - check=isinstance, check_type_strict=_check_type_strict): + def _pack( + self, + obj, + nest_limit=DEFAULT_RECURSE_LIMIT, + check=isinstance, + check_type_strict=_check_type_strict, + ): default_used = False if self._strict_types: check = check_type_strict @@ -782,22 +834,22 @@ def _pack(self, obj, nest_limit=DEFAULT_RECURSE_LIMIT, return self._buffer.write(struct.pack("B", obj)) if -0x20 <= obj < 0: return self._buffer.write(struct.pack("b", obj)) - if 0x80 <= obj <= 0xff: - return self._buffer.write(struct.pack("BB", 0xcc, obj)) + if 0x80 <= obj <= 0xFF: + return self._buffer.write(struct.pack("BB", 0xCC, obj)) if -0x80 <= obj < 0: - return self._buffer.write(struct.pack(">Bb", 0xd0, obj)) - if 0xff < obj <= 0xffff: - return self._buffer.write(struct.pack(">BH", 0xcd, obj)) + return self._buffer.write(struct.pack(">Bb", 0xD0, obj)) + if 0xFF < obj <= 0xFFFF: + return self._buffer.write(struct.pack(">BH", 0xCD, obj)) if -0x8000 <= obj < -0x80: - return self._buffer.write(struct.pack(">Bh", 0xd1, obj)) - if 0xffff < obj <= 0xffffffff: - return self._buffer.write(struct.pack(">BI", 0xce, obj)) + return self._buffer.write(struct.pack(">Bh", 0xD1, obj)) + if 0xFFFF < obj <= 0xFFFFFFFF: + return self._buffer.write(struct.pack(">BI", 0xCE, obj)) if -0x80000000 <= obj < -0x8000: - return self._buffer.write(struct.pack(">Bi", 0xd2, obj)) - if 0xffffffff < obj <= 0xffffffffffffffff: - return self._buffer.write(struct.pack(">BQ", 0xcf, obj)) + return self._buffer.write(struct.pack(">Bi", 0xD2, obj)) + if 0xFFFFFFFF < obj <= 0xFFFFFFFFFFFFFFFF: + return self._buffer.write(struct.pack(">BQ", 0xCF, obj)) if -0x8000000000000000 <= obj < -0x80000000: - return self._buffer.write(struct.pack(">Bq", 0xd3, obj)) + return self._buffer.write(struct.pack(">Bq", 0xD3, obj)) if not default_used and self._default is not None: obj = self._default(obj) default_used = True @@ -805,27 +857,27 @@ def _pack(self, obj, nest_limit=DEFAULT_RECURSE_LIMIT, raise OverflowError("Integer value out of range") if check(obj, (bytes, bytearray)): n = len(obj) - if n >= 2**32: + if n >= 2 ** 32: raise ValueError("%s is too large" % type(obj).__name__) self._pack_bin_header(n) return self._buffer.write(obj) if check(obj, unicode): obj = obj.encode("utf-8", self._unicode_errors) n = len(obj) - if n >= 2**32: + if n >= 2 ** 32: raise ValueError("String is too large") self._pack_raw_header(n) return self._buffer.write(obj) if check(obj, memoryview): n = len(obj) * obj.itemsize - if n >= 2**32: + if n >= 2 ** 32: raise ValueError("Memoryview is too large") self._pack_bin_header(n) return self._buffer.write(obj) if check(obj, float): if self._use_float: - return self._buffer.write(struct.pack(">Bf", 0xca, obj)) - return self._buffer.write(struct.pack(">Bd", 0xcb, obj)) + return self._buffer.write(struct.pack(">Bf", 0xCA, obj)) + return self._buffer.write(struct.pack(">Bd", 0xCB, obj)) if check(obj, (ExtType, Timestamp)): if check(obj, Timestamp): code = -1 @@ -837,21 +889,21 @@ def _pack(self, obj, nest_limit=DEFAULT_RECURSE_LIMIT, assert isinstance(data, bytes) L = len(data) if L == 1: - self._buffer.write(b'\xd4') + self._buffer.write(b"\xd4") elif L == 2: - self._buffer.write(b'\xd5') + self._buffer.write(b"\xd5") elif L == 4: - self._buffer.write(b'\xd6') + self._buffer.write(b"\xd6") elif L == 8: - self._buffer.write(b'\xd7') + self._buffer.write(b"\xd7") elif L == 16: - self._buffer.write(b'\xd8') - elif L <= 0xff: - self._buffer.write(struct.pack(">BB", 0xc7, L)) - elif L <= 0xffff: - self._buffer.write(struct.pack(">BH", 0xc8, L)) + self._buffer.write(b"\xd8") + elif L <= 0xFF: + self._buffer.write(struct.pack(">BB", 0xC7, L)) + elif L <= 0xFFFF: + self._buffer.write(struct.pack(">BH", 0xC8, L)) else: - self._buffer.write(struct.pack(">BI", 0xc9, L)) + self._buffer.write(struct.pack(">BI", 0xC9, L)) self._buffer.write(struct.pack("b", code)) self._buffer.write(data) return @@ -862,13 +914,14 @@ def _pack(self, obj, nest_limit=DEFAULT_RECURSE_LIMIT, self._pack(obj[i], nest_limit - 1) return if check(obj, dict): - return self._pack_map_pairs(len(obj), dict_iteritems(obj), - nest_limit - 1) + return self._pack_map_pairs( + len(obj), dict_iteritems(obj), nest_limit - 1 + ) if not default_used and self._default is not None: obj = self._default(obj) default_used = 1 continue - raise TypeError("Cannot serialize %r" % (obj, )) + raise TypeError("Cannot serialize %r" % (obj,)) def pack(self, obj): try: @@ -889,7 +942,7 @@ def pack_map_pairs(self, pairs): return ret def pack_array_header(self, n): - if n >= 2**32: + if n >= 2 ** 32: raise ValueError self._pack_array_header(n) if self._autoreset: @@ -898,7 +951,7 @@ def pack_array_header(self, n): return ret def pack_map_header(self, n): - if n >= 2**32: + if n >= 2 ** 32: raise ValueError self._pack_map_header(n) if self._autoreset: @@ -914,43 +967,43 @@ def pack_ext_type(self, typecode, data): if not isinstance(data, bytes): raise TypeError("data must have bytes type") L = len(data) - if L > 0xffffffff: + if L > 0xFFFFFFFF: raise ValueError("Too large data") if L == 1: - self._buffer.write(b'\xd4') + self._buffer.write(b"\xd4") elif L == 2: - self._buffer.write(b'\xd5') + self._buffer.write(b"\xd5") elif L == 4: - self._buffer.write(b'\xd6') + self._buffer.write(b"\xd6") elif L == 8: - self._buffer.write(b'\xd7') + self._buffer.write(b"\xd7") elif L == 16: - self._buffer.write(b'\xd8') - elif L <= 0xff: - self._buffer.write(b'\xc7' + struct.pack('B', L)) - elif L <= 0xffff: - self._buffer.write(b'\xc8' + struct.pack('>H', L)) + self._buffer.write(b"\xd8") + elif L <= 0xFF: + self._buffer.write(b"\xc7" + struct.pack("B", L)) + elif L <= 0xFFFF: + self._buffer.write(b"\xc8" + struct.pack(">H", L)) else: - self._buffer.write(b'\xc9' + struct.pack('>I', L)) - self._buffer.write(struct.pack('B', typecode)) + self._buffer.write(b"\xc9" + struct.pack(">I", L)) + self._buffer.write(struct.pack("B", typecode)) self._buffer.write(data) def _pack_array_header(self, n): - if n <= 0x0f: - return self._buffer.write(struct.pack('B', 0x90 + n)) - if n <= 0xffff: - return self._buffer.write(struct.pack(">BH", 0xdc, n)) - if n <= 0xffffffff: - return self._buffer.write(struct.pack(">BI", 0xdd, n)) + if n <= 0x0F: + return self._buffer.write(struct.pack("B", 0x90 + n)) + if n <= 0xFFFF: + return self._buffer.write(struct.pack(">BH", 0xDC, n)) + if n <= 0xFFFFFFFF: + return self._buffer.write(struct.pack(">BI", 0xDD, n)) raise ValueError("Array is too large") def _pack_map_header(self, n): - if n <= 0x0f: - return self._buffer.write(struct.pack('B', 0x80 + n)) - if n <= 0xffff: - return self._buffer.write(struct.pack(">BH", 0xde, n)) - if n <= 0xffffffff: - return self._buffer.write(struct.pack(">BI", 0xdf, n)) + if n <= 0x0F: + return self._buffer.write(struct.pack("B", 0x80 + n)) + if n <= 0xFFFF: + return self._buffer.write(struct.pack(">BH", 0xDE, n)) + if n <= 0xFFFFFFFF: + return self._buffer.write(struct.pack(">BI", 0xDF, n)) raise ValueError("Dict is too large") def _pack_map_pairs(self, n, pairs, nest_limit=DEFAULT_RECURSE_LIMIT): @@ -960,28 +1013,28 @@ def _pack_map_pairs(self, n, pairs, nest_limit=DEFAULT_RECURSE_LIMIT): self._pack(v, nest_limit - 1) def _pack_raw_header(self, n): - if n <= 0x1f: - self._buffer.write(struct.pack('B', 0xa0 + n)) - elif self._use_bin_type and n <= 0xff: - self._buffer.write(struct.pack('>BB', 0xd9, n)) - elif n <= 0xffff: - self._buffer.write(struct.pack(">BH", 0xda, n)) - elif n <= 0xffffffff: - self._buffer.write(struct.pack(">BI", 0xdb, n)) + if n <= 0x1F: + self._buffer.write(struct.pack("B", 0xA0 + n)) + elif self._use_bin_type and n <= 0xFF: + self._buffer.write(struct.pack(">BB", 0xD9, n)) + elif n <= 0xFFFF: + self._buffer.write(struct.pack(">BH", 0xDA, n)) + elif n <= 0xFFFFFFFF: + self._buffer.write(struct.pack(">BI", 0xDB, n)) else: - raise ValueError('Raw is too large') + raise ValueError("Raw is too large") def _pack_bin_header(self, n): if not self._use_bin_type: return self._pack_raw_header(n) - elif n <= 0xff: - return self._buffer.write(struct.pack('>BB', 0xc4, n)) - elif n <= 0xffff: - return self._buffer.write(struct.pack(">BH", 0xc5, n)) - elif n <= 0xffffffff: - return self._buffer.write(struct.pack(">BI", 0xc6, n)) + elif n <= 0xFF: + return self._buffer.write(struct.pack(">BB", 0xC4, n)) + elif n <= 0xFFFF: + return self._buffer.write(struct.pack(">BH", 0xC5, n)) + elif n <= 0xFFFFFFFF: + return self._buffer.write(struct.pack(">BI", 0xC6, n)) else: - raise ValueError('Bin is too large') + raise ValueError("Bin is too large") def bytes(self): """Return internal buffer contents as bytes object""" From 10e5e39ff9739fa3ce589ad9d451260be0f3842c Mon Sep 17 00:00:00 2001 From: Inada Naoki Date: Thu, 5 Dec 2019 18:51:45 +0900 Subject: [PATCH 197/349] blacken test --- test/test_buffer.py | 17 +++--- test/test_case.py | 116 +++++++++++++++++++++++++------------- test/test_extension.py | 55 ++++++++++-------- test/test_format.py | 92 ++++++++++++++++++------------ test/test_limits.py | 46 ++++++++------- test/test_memoryview.py | 47 ++++++++-------- test/test_newspec.py | 60 ++++++++++---------- test/test_obj.py | 42 +++++++++----- test/test_pack.py | 102 ++++++++++++++++++++++++--------- test/test_read_size.py | 49 ++++++++-------- test/test_seq.py | 11 ++-- test/test_sequnpack.py | 121 ++++++++++++++++++++++------------------ test/test_stricttype.py | 29 +++++----- test/test_subtype.py | 7 ++- test/test_timestamp.py | 14 ++--- test/test_unpack.py | 27 +++++---- 16 files changed, 501 insertions(+), 334 deletions(-) diff --git a/test/test_buffer.py b/test/test_buffer.py index d723e8d6..64fbdefb 100644 --- a/test/test_buffer.py +++ b/test/test_buffer.py @@ -6,27 +6,28 @@ def test_unpack_buffer(): from array import array - buf = array('b') + + buf = array("b") try: - buf.frombytes(packb((b'foo', b'bar'))) + buf.frombytes(packb((b"foo", b"bar"))) except AttributeError: # PY2 - buf.fromstring(packb((b'foo', b'bar'))) + buf.fromstring(packb((b"foo", b"bar"))) obj = unpackb(buf, use_list=1) - assert [b'foo', b'bar'] == obj + assert [b"foo", b"bar"] == obj def test_unpack_bytearray(): - buf = bytearray(packb(('foo', 'bar'))) + buf = bytearray(packb(("foo", "bar"))) obj = unpackb(buf, use_list=1) - assert [b'foo', b'bar'] == obj + assert [b"foo", b"bar"] == obj expected_type = bytes assert all(type(s) == expected_type for s in obj) def test_unpack_memoryview(): - buf = bytearray(packb(('foo', 'bar'))) + buf = bytearray(packb(("foo", "bar"))) view = memoryview(buf) obj = unpackb(view, use_list=1) - assert [b'foo', b'bar'] == obj + assert [b"foo", b"bar"] == obj expected_type = bytes assert all(type(s) == expected_type for s in obj) diff --git a/test/test_case.py b/test/test_case.py index 5a4bb6c4..3bc1b26d 100644 --- a/test/test_case.py +++ b/test/test_case.py @@ -6,97 +6,133 @@ def check(length, obj): v = packb(obj) - assert len(v) == length, \ - "%r length should be %r but get %r" % (obj, length, len(v)) + assert len(v) == length, "%r length should be %r but get %r" % (obj, length, len(v)) assert unpackb(v, use_list=0) == obj + def test_1(): - for o in [None, True, False, 0, 1, (1 << 6), (1 << 7) - 1, -1, - -((1<<5)-1), -(1<<5)]: + for o in [ + None, + True, + False, + 0, + 1, + (1 << 6), + (1 << 7) - 1, + -1, + -((1 << 5) - 1), + -(1 << 5), + ]: check(1, o) + def test_2(): - for o in [1 << 7, (1 << 8) - 1, - -((1<<5)+1), -(1<<7) - ]: + for o in [1 << 7, (1 << 8) - 1, -((1 << 5) + 1), -(1 << 7)]: check(2, o) + def test_3(): - for o in [1 << 8, (1 << 16) - 1, - -((1<<7)+1), -(1<<15)]: + for o in [1 << 8, (1 << 16) - 1, -((1 << 7) + 1), -(1 << 15)]: check(3, o) + def test_5(): - for o in [1 << 16, (1 << 32) - 1, - -((1<<15)+1), -(1<<31)]: + for o in [1 << 16, (1 << 32) - 1, -((1 << 15) + 1), -(1 << 31)]: check(5, o) + def test_9(): - for o in [1 << 32, (1 << 64) - 1, - -((1<<31)+1), -(1<<63), - 1.0, 0.1, -0.1, -1.0]: + for o in [ + 1 << 32, + (1 << 64) - 1, + -((1 << 31) + 1), + -(1 << 63), + 1.0, + 0.1, + -0.1, + -1.0, + ]: check(9, o) def check_raw(overhead, num): check(num + overhead, b" " * num) + def test_fixraw(): check_raw(1, 0) - check_raw(1, (1<<5) - 1) + check_raw(1, (1 << 5) - 1) + def test_raw16(): - check_raw(3, 1<<5) - check_raw(3, (1<<16) - 1) + check_raw(3, 1 << 5) + check_raw(3, (1 << 16) - 1) + def test_raw32(): - check_raw(5, 1<<16) + check_raw(5, 1 << 16) def check_array(overhead, num): check(num + overhead, (None,) * num) + def test_fixarray(): check_array(1, 0) check_array(1, (1 << 4) - 1) + def test_array16(): check_array(3, 1 << 4) - check_array(3, (1<<16)-1) + check_array(3, (1 << 16) - 1) + def test_array32(): - check_array(5, (1<<16)) + check_array(5, (1 << 16)) def match(obj, buf): assert packb(obj) == buf assert unpackb(buf, use_list=0) == obj + def test_match(): cases = [ - (None, b'\xc0'), - (False, b'\xc2'), - (True, b'\xc3'), - (0, b'\x00'), - (127, b'\x7f'), - (128, b'\xcc\x80'), - (256, b'\xcd\x01\x00'), - (-1, b'\xff'), - (-33, b'\xd0\xdf'), - (-129, b'\xd1\xff\x7f'), - ({1:1}, b'\x81\x01\x01'), + (None, b"\xc0"), + (False, b"\xc2"), + (True, b"\xc3"), + (0, b"\x00"), + (127, b"\x7f"), + (128, b"\xcc\x80"), + (256, b"\xcd\x01\x00"), + (-1, b"\xff"), + (-33, b"\xd0\xdf"), + (-129, b"\xd1\xff\x7f"), + ({1: 1}, b"\x81\x01\x01"), (1.0, b"\xcb\x3f\xf0\x00\x00\x00\x00\x00\x00"), - ((), b'\x90'), - (tuple(range(15)),b"\x9f\x00\x01\x02\x03\x04\x05\x06\x07\x08\x09\x0a\x0b\x0c\x0d\x0e"), - (tuple(range(16)),b"\xdc\x00\x10\x00\x01\x02\x03\x04\x05\x06\x07\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f"), - ({}, b'\x80'), - (dict([(x,x) for x in range(15)]), b'\x8f\x00\x00\x01\x01\x02\x02\x03\x03\x04\x04\x05\x05\x06\x06\x07\x07\x08\x08\t\t\n\n\x0b\x0b\x0c\x0c\r\r\x0e\x0e'), - (dict([(x,x) for x in range(16)]), b'\xde\x00\x10\x00\x00\x01\x01\x02\x02\x03\x03\x04\x04\x05\x05\x06\x06\x07\x07\x08\x08\t\t\n\n\x0b\x0b\x0c\x0c\r\r\x0e\x0e\x0f\x0f'), - ] + ((), b"\x90"), + ( + tuple(range(15)), + b"\x9f\x00\x01\x02\x03\x04\x05\x06\x07\x08\x09\x0a\x0b\x0c\x0d\x0e", + ), + ( + tuple(range(16)), + b"\xdc\x00\x10\x00\x01\x02\x03\x04\x05\x06\x07\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f", + ), + ({}, b"\x80"), + ( + dict([(x, x) for x in range(15)]), + b"\x8f\x00\x00\x01\x01\x02\x02\x03\x03\x04\x04\x05\x05\x06\x06\x07\x07\x08\x08\t\t\n\n\x0b\x0b\x0c\x0c\r\r\x0e\x0e", + ), + ( + dict([(x, x) for x in range(16)]), + b"\xde\x00\x10\x00\x00\x01\x01\x02\x02\x03\x03\x04\x04\x05\x05\x06\x06\x07\x07\x08\x08\t\t\n\n\x0b\x0b\x0c\x0c\r\r\x0e\x0e\x0f\x0f", + ), + ] for v, p in cases: match(v, p) -def test_unicode(): - assert unpackb(packb('foobar'), use_list=1) == b'foobar' +def test_unicode(): + assert unpackb(packb("foobar"), use_list=1) == b"foobar" diff --git a/test/test_extension.py b/test/test_extension.py index 8aa0cbb0..6b365751 100644 --- a/test/test_extension.py +++ b/test/test_extension.py @@ -9,37 +9,41 @@ def p(s): packer = msgpack.Packer() packer.pack_ext_type(0x42, s) return packer.bytes() - assert p(b'A') == b'\xd4\x42A' # fixext 1 - assert p(b'AB') == b'\xd5\x42AB' # fixext 2 - assert p(b'ABCD') == b'\xd6\x42ABCD' # fixext 4 - assert p(b'ABCDEFGH') == b'\xd7\x42ABCDEFGH' # fixext 8 - assert p(b'A'*16) == b'\xd8\x42' + b'A'*16 # fixext 16 - assert p(b'ABC') == b'\xc7\x03\x42ABC' # ext 8 - assert p(b'A'*0x0123) == b'\xc8\x01\x23\x42' + b'A'*0x0123 # ext 16 - assert p(b'A'*0x00012345) == b'\xc9\x00\x01\x23\x45\x42' + b'A'*0x00012345 # ext 32 + + assert p(b"A") == b"\xd4\x42A" # fixext 1 + assert p(b"AB") == b"\xd5\x42AB" # fixext 2 + assert p(b"ABCD") == b"\xd6\x42ABCD" # fixext 4 + assert p(b"ABCDEFGH") == b"\xd7\x42ABCDEFGH" # fixext 8 + assert p(b"A" * 16) == b"\xd8\x42" + b"A" * 16 # fixext 16 + assert p(b"ABC") == b"\xc7\x03\x42ABC" # ext 8 + assert p(b"A" * 0x0123) == b"\xc8\x01\x23\x42" + b"A" * 0x0123 # ext 16 + assert ( + p(b"A" * 0x00012345) == b"\xc9\x00\x01\x23\x45\x42" + b"A" * 0x00012345 + ) # ext 32 def test_unpack_ext_type(): def check(b, expected): assert msgpack.unpackb(b) == expected - check(b'\xd4\x42A', ExtType(0x42, b'A')) # fixext 1 - check(b'\xd5\x42AB', ExtType(0x42, b'AB')) # fixext 2 - check(b'\xd6\x42ABCD', ExtType(0x42, b'ABCD')) # fixext 4 - check(b'\xd7\x42ABCDEFGH', ExtType(0x42, b'ABCDEFGH')) # fixext 8 - check(b'\xd8\x42' + b'A'*16, ExtType(0x42, b'A'*16)) # fixext 16 - check(b'\xc7\x03\x42ABC', ExtType(0x42, b'ABC')) # ext 8 - check(b'\xc8\x01\x23\x42' + b'A'*0x0123, - ExtType(0x42, b'A'*0x0123)) # ext 16 - check(b'\xc9\x00\x01\x23\x45\x42' + b'A'*0x00012345, - ExtType(0x42, b'A'*0x00012345)) # ext 32 + check(b"\xd4\x42A", ExtType(0x42, b"A")) # fixext 1 + check(b"\xd5\x42AB", ExtType(0x42, b"AB")) # fixext 2 + check(b"\xd6\x42ABCD", ExtType(0x42, b"ABCD")) # fixext 4 + check(b"\xd7\x42ABCDEFGH", ExtType(0x42, b"ABCDEFGH")) # fixext 8 + check(b"\xd8\x42" + b"A" * 16, ExtType(0x42, b"A" * 16)) # fixext 16 + check(b"\xc7\x03\x42ABC", ExtType(0x42, b"ABC")) # ext 8 + check(b"\xc8\x01\x23\x42" + b"A" * 0x0123, ExtType(0x42, b"A" * 0x0123)) # ext 16 + check( + b"\xc9\x00\x01\x23\x45\x42" + b"A" * 0x00012345, + ExtType(0x42, b"A" * 0x00012345), + ) # ext 32 def test_extension_type(): def default(obj): - print('default called', obj) + print("default called", obj) if isinstance(obj, array.array): - typecode = 123 # application specific typecode + typecode = 123 # application specific typecode try: data = obj.tobytes() except AttributeError: @@ -48,24 +52,27 @@ def default(obj): raise TypeError("Unknown type object %r" % (obj,)) def ext_hook(code, data): - print('ext_hook called', code, data) + print("ext_hook called", code, data) assert code == 123 - obj = array.array('d') + obj = array.array("d") try: obj.frombytes(data) except AttributeError: # PY2 obj.fromstring(data) return obj - obj = [42, b'hello', array.array('d', [1.1, 2.2, 3.3])] + obj = [42, b"hello", array.array("d", [1.1, 2.2, 3.3])] s = msgpack.packb(obj, default=default) obj2 = msgpack.unpackb(s, ext_hook=ext_hook) assert obj == obj2 + import sys -if sys.version > '3': + +if sys.version > "3": long = int + def test_overriding_hooks(): def default(obj): if isinstance(obj, long): diff --git a/test/test_format.py b/test/test_format.py index 5fec0c3a..c2cdfbd6 100644 --- a/test/test_format.py +++ b/test/test_format.py @@ -3,68 +3,88 @@ from msgpack import unpackb + def check(src, should, use_list=0): assert unpackb(src, use_list=use_list) == should + def testSimpleValue(): - check(b"\x93\xc0\xc2\xc3", - (None, False, True,)) + check(b"\x93\xc0\xc2\xc3", (None, False, True,)) + def testFixnum(): - check(b"\x92\x93\x00\x40\x7f\x93\xe0\xf0\xff", - ((0,64,127,), (-32,-16,-1,),) - ) + check(b"\x92\x93\x00\x40\x7f\x93\xe0\xf0\xff", ((0, 64, 127,), (-32, -16, -1,),)) + def testFixArray(): - check(b"\x92\x90\x91\x91\xc0", - ((),((None,),),), - ) + check( + b"\x92\x90\x91\x91\xc0", ((), ((None,),),), + ) + def testFixRaw(): - check(b"\x94\xa0\xa1a\xa2bc\xa3def", - (b"", b"a", b"bc", b"def",), - ) + check( + b"\x94\xa0\xa1a\xa2bc\xa3def", (b"", b"a", b"bc", b"def",), + ) + def testFixMap(): check( - b"\x82\xc2\x81\xc0\xc0\xc3\x81\xc0\x80", - {False: {None: None}, True:{None:{}}}, - ) + b"\x82\xc2\x81\xc0\xc0\xc3\x81\xc0\x80", + {False: {None: None}, True: {None: {}}}, + ) + def testUnsignedInt(): check( - b"\x99\xcc\x00\xcc\x80\xcc\xff\xcd\x00\x00\xcd\x80\x00" - b"\xcd\xff\xff\xce\x00\x00\x00\x00\xce\x80\x00\x00\x00" - b"\xce\xff\xff\xff\xff", - (0, 128, 255, 0, 32768, 65535, 0, 2147483648, 4294967295,), - ) + b"\x99\xcc\x00\xcc\x80\xcc\xff\xcd\x00\x00\xcd\x80\x00" + b"\xcd\xff\xff\xce\x00\x00\x00\x00\xce\x80\x00\x00\x00" + b"\xce\xff\xff\xff\xff", + (0, 128, 255, 0, 32768, 65535, 0, 2147483648, 4294967295,), + ) + def testSignedInt(): - check(b"\x99\xd0\x00\xd0\x80\xd0\xff\xd1\x00\x00\xd1\x80\x00" - b"\xd1\xff\xff\xd2\x00\x00\x00\x00\xd2\x80\x00\x00\x00" - b"\xd2\xff\xff\xff\xff", - (0, -128, -1, 0, -32768, -1, 0, -2147483648, -1,)) + check( + b"\x99\xd0\x00\xd0\x80\xd0\xff\xd1\x00\x00\xd1\x80\x00" + b"\xd1\xff\xff\xd2\x00\x00\x00\x00\xd2\x80\x00\x00\x00" + b"\xd2\xff\xff\xff\xff", + (0, -128, -1, 0, -32768, -1, 0, -2147483648, -1,), + ) + def testRaw(): - check(b"\x96\xda\x00\x00\xda\x00\x01a\xda\x00\x02ab\xdb\x00\x00" + check( + b"\x96\xda\x00\x00\xda\x00\x01a\xda\x00\x02ab\xdb\x00\x00" b"\x00\x00\xdb\x00\x00\x00\x01a\xdb\x00\x00\x00\x02ab", - (b"", b"a", b"ab", b"", b"a", b"ab")) + (b"", b"a", b"ab", b"", b"a", b"ab"), + ) + def testArray(): - check(b"\x96\xdc\x00\x00\xdc\x00\x01\xc0\xdc\x00\x02\xc2\xc3\xdd\x00" + check( + b"\x96\xdc\x00\x00\xdc\x00\x01\xc0\xdc\x00\x02\xc2\xc3\xdd\x00" b"\x00\x00\x00\xdd\x00\x00\x00\x01\xc0\xdd\x00\x00\x00\x02" b"\xc2\xc3", - ((), (None,), (False,True), (), (None,), (False,True)) - ) + ((), (None,), (False, True), (), (None,), (False, True)), + ) + def testMap(): check( b"\x96" - b"\xde\x00\x00" - b"\xde\x00\x01\xc0\xc2" - b"\xde\x00\x02\xc0\xc2\xc3\xc2" - b"\xdf\x00\x00\x00\x00" - b"\xdf\x00\x00\x00\x01\xc0\xc2" - b"\xdf\x00\x00\x00\x02\xc0\xc2\xc3\xc2", - ({}, {None: False}, {True: False, None: False}, {}, - {None: False}, {True: False, None: False})) + b"\xde\x00\x00" + b"\xde\x00\x01\xc0\xc2" + b"\xde\x00\x02\xc0\xc2\xc3\xc2" + b"\xdf\x00\x00\x00\x00" + b"\xdf\x00\x00\x00\x01\xc0\xc2" + b"\xdf\x00\x00\x00\x02\xc0\xc2\xc3\xc2", + ( + {}, + {None: False}, + {True: False, None: False}, + {}, + {None: False}, + {True: False, None: False}, + ), + ) diff --git a/test/test_limits.py b/test/test_limits.py index 8c7606fa..6e850302 100644 --- a/test/test_limits.py +++ b/test/test_limits.py @@ -4,8 +4,14 @@ import pytest from msgpack import ( - packb, unpackb, Packer, Unpacker, ExtType, - PackOverflowError, PackValueError, UnpackValueError, + packb, + unpackb, + Packer, + Unpacker, + ExtType, + PackOverflowError, + PackValueError, + UnpackValueError, ) @@ -13,30 +19,30 @@ def test_integer(): x = -(2 ** 63) assert unpackb(packb(x)) == x with pytest.raises(PackOverflowError): - packb(x-1) + packb(x - 1) x = 2 ** 64 - 1 assert unpackb(packb(x)) == x with pytest.raises(PackOverflowError): - packb(x+1) + packb(x + 1) def test_array_header(): packer = Packer() - packer.pack_array_header(2**32-1) + packer.pack_array_header(2 ** 32 - 1) with pytest.raises(PackValueError): - packer.pack_array_header(2**32) + packer.pack_array_header(2 ** 32) def test_map_header(): packer = Packer() - packer.pack_map_header(2**32-1) + packer.pack_map_header(2 ** 32 - 1) with pytest.raises(PackValueError): - packer.pack_array_header(2**32) + packer.pack_array_header(2 ** 32) def test_max_str_len(): - d = 'x' * 3 + d = "x" * 3 packed = packb(d) unpacker = Unpacker(max_str_len=3, raw=False) @@ -50,7 +56,7 @@ def test_max_str_len(): def test_max_bin_len(): - d = b'x' * 3 + d = b"x" * 3 packed = packb(d, use_bin_type=True) unpacker = Unpacker(max_bin_len=3) @@ -64,7 +70,7 @@ def test_max_bin_len(): def test_max_array_len(): - d = [1,2,3] + d = [1, 2, 3] packed = packb(d) unpacker = Unpacker(max_array_len=3) @@ -107,8 +113,8 @@ def test_max_ext_len(): # PyPy fails following tests because of constant folding? # https://bugs.pypy.org/issue1721 -#@pytest.mark.skipif(True, reason="Requires very large memory.") -#def test_binary(): +# @pytest.mark.skipif(True, reason="Requires very large memory.") +# def test_binary(): # x = b'x' * (2**32 - 1) # assert unpackb(packb(x)) == x # del x @@ -117,8 +123,8 @@ def test_max_ext_len(): # packb(x) # # -#@pytest.mark.skipif(True, reason="Requires very large memory.") -#def test_string(): +# @pytest.mark.skipif(True, reason="Requires very large memory.") +# def test_string(): # x = 'x' * (2**32 - 1) # assert unpackb(packb(x)) == x # x += 'y' @@ -126,8 +132,8 @@ def test_max_ext_len(): # packb(x) # # -#@pytest.mark.skipif(True, reason="Requires very large memory.") -#def test_array(): +# @pytest.mark.skipif(True, reason="Requires very large memory.") +# def test_array(): # x = [0] * (2**32 - 1) # assert unpackb(packb(x)) == x # x.append(0) @@ -137,8 +143,9 @@ def test_max_ext_len(): # auto max len + def test_auto_max_array_len(): - packed = b'\xde\x00\x06zz' + packed = b"\xde\x00\x06zz" with pytest.raises(UnpackValueError): unpackb(packed, raw=False) @@ -147,9 +154,10 @@ def test_auto_max_array_len(): with pytest.raises(UnpackValueError): unpacker.unpack() + def test_auto_max_map_len(): # len(packed) == 6 -> max_map_len == 3 - packed = b'\xde\x00\x04zzz' + packed = b"\xde\x00\x04zzz" with pytest.raises(UnpackValueError): unpackb(packed, raw=False) diff --git a/test/test_memoryview.py b/test/test_memoryview.py index f6d74edf..e1b63b8f 100644 --- a/test/test_memoryview.py +++ b/test/test_memoryview.py @@ -10,6 +10,7 @@ # - array type only supports old buffer interface # - array.frombytes is not available, must use deprecated array.fromstring if sys.version_info[0] < 3: + def make_memoryview(obj): return memoryview(buffer(obj)) @@ -20,6 +21,8 @@ def make_array(f, data): def get_data(a): return a.tostring() + + else: make_memoryview = memoryview @@ -49,64 +52,64 @@ def _runtest(format, nbytes, expected_header, expected_prefix, use_bin_type): # check packed header assert packed[:1] == expected_header # check packed length prefix, if any - assert packed[1:1+len(expected_prefix)] == expected_prefix + assert packed[1 : 1 + len(expected_prefix)] == expected_prefix # check packed data - assert packed[1+len(expected_prefix):] == original_data + assert packed[1 + len(expected_prefix) :] == original_data # check array unpacked correctly assert original_array == reconstructed_array def test_fixstr_from_byte(): - _runtest('B', 1, b'\xa1', b'', False) - _runtest('B', 31, b'\xbf', b'', False) + _runtest("B", 1, b"\xa1", b"", False) + _runtest("B", 31, b"\xbf", b"", False) def test_fixstr_from_float(): - _runtest('f', 4, b'\xa4', b'', False) - _runtest('f', 28, b'\xbc', b'', False) + _runtest("f", 4, b"\xa4", b"", False) + _runtest("f", 28, b"\xbc", b"", False) def test_str16_from_byte(): - _runtest('B', 2**8, b'\xda', b'\x01\x00', False) - _runtest('B', 2**16-1, b'\xda', b'\xff\xff', False) + _runtest("B", 2 ** 8, b"\xda", b"\x01\x00", False) + _runtest("B", 2 ** 16 - 1, b"\xda", b"\xff\xff", False) def test_str16_from_float(): - _runtest('f', 2**8, b'\xda', b'\x01\x00', False) - _runtest('f', 2**16-4, b'\xda', b'\xff\xfc', False) + _runtest("f", 2 ** 8, b"\xda", b"\x01\x00", False) + _runtest("f", 2 ** 16 - 4, b"\xda", b"\xff\xfc", False) def test_str32_from_byte(): - _runtest('B', 2**16, b'\xdb', b'\x00\x01\x00\x00', False) + _runtest("B", 2 ** 16, b"\xdb", b"\x00\x01\x00\x00", False) def test_str32_from_float(): - _runtest('f', 2**16, b'\xdb', b'\x00\x01\x00\x00', False) + _runtest("f", 2 ** 16, b"\xdb", b"\x00\x01\x00\x00", False) def test_bin8_from_byte(): - _runtest('B', 1, b'\xc4', b'\x01', True) - _runtest('B', 2**8-1, b'\xc4', b'\xff', True) + _runtest("B", 1, b"\xc4", b"\x01", True) + _runtest("B", 2 ** 8 - 1, b"\xc4", b"\xff", True) def test_bin8_from_float(): - _runtest('f', 4, b'\xc4', b'\x04', True) - _runtest('f', 2**8-4, b'\xc4', b'\xfc', True) + _runtest("f", 4, b"\xc4", b"\x04", True) + _runtest("f", 2 ** 8 - 4, b"\xc4", b"\xfc", True) def test_bin16_from_byte(): - _runtest('B', 2**8, b'\xc5', b'\x01\x00', True) - _runtest('B', 2**16-1, b'\xc5', b'\xff\xff', True) + _runtest("B", 2 ** 8, b"\xc5", b"\x01\x00", True) + _runtest("B", 2 ** 16 - 1, b"\xc5", b"\xff\xff", True) def test_bin16_from_float(): - _runtest('f', 2**8, b'\xc5', b'\x01\x00', True) - _runtest('f', 2**16-4, b'\xc5', b'\xff\xfc', True) + _runtest("f", 2 ** 8, b"\xc5", b"\x01\x00", True) + _runtest("f", 2 ** 16 - 4, b"\xc5", b"\xff\xfc", True) def test_bin32_from_byte(): - _runtest('B', 2**16, b'\xc6', b'\x00\x01\x00\x00', True) + _runtest("B", 2 ** 16, b"\xc6", b"\x00\x01\x00\x00", True) def test_bin32_from_float(): - _runtest('f', 2**16, b'\xc6', b'\x00\x01\x00\x00', True) + _runtest("f", 2 ** 16, b"\xc6", b"\x00\x01\x00\x00", True) diff --git a/test/test_newspec.py b/test/test_newspec.py index ab05029d..f4f2a238 100644 --- a/test/test_newspec.py +++ b/test/test_newspec.py @@ -4,85 +4,87 @@ def test_str8(): - header = b'\xd9' - data = b'x' * 32 + header = b"\xd9" + data = b"x" * 32 b = packb(data.decode(), use_bin_type=True) assert len(b) == len(data) + 2 - assert b[0:2] == header + b'\x20' + assert b[0:2] == header + b"\x20" assert b[2:] == data assert unpackb(b) == data - data = b'x' * 255 + data = b"x" * 255 b = packb(data.decode(), use_bin_type=True) assert len(b) == len(data) + 2 - assert b[0:2] == header + b'\xff' + assert b[0:2] == header + b"\xff" assert b[2:] == data assert unpackb(b) == data def test_bin8(): - header = b'\xc4' - data = b'' + header = b"\xc4" + data = b"" b = packb(data, use_bin_type=True) assert len(b) == len(data) + 2 - assert b[0:2] == header + b'\x00' + assert b[0:2] == header + b"\x00" assert b[2:] == data assert unpackb(b) == data - data = b'x' * 255 + data = b"x" * 255 b = packb(data, use_bin_type=True) assert len(b) == len(data) + 2 - assert b[0:2] == header + b'\xff' + assert b[0:2] == header + b"\xff" assert b[2:] == data assert unpackb(b) == data def test_bin16(): - header = b'\xc5' - data = b'x' * 256 + header = b"\xc5" + data = b"x" * 256 b = packb(data, use_bin_type=True) assert len(b) == len(data) + 3 assert b[0:1] == header - assert b[1:3] == b'\x01\x00' + assert b[1:3] == b"\x01\x00" assert b[3:] == data assert unpackb(b) == data - data = b'x' * 65535 + data = b"x" * 65535 b = packb(data, use_bin_type=True) assert len(b) == len(data) + 3 assert b[0:1] == header - assert b[1:3] == b'\xff\xff' + assert b[1:3] == b"\xff\xff" assert b[3:] == data assert unpackb(b) == data def test_bin32(): - header = b'\xc6' - data = b'x' * 65536 + header = b"\xc6" + data = b"x" * 65536 b = packb(data, use_bin_type=True) assert len(b) == len(data) + 5 assert b[0:1] == header - assert b[1:5] == b'\x00\x01\x00\x00' + assert b[1:5] == b"\x00\x01\x00\x00" assert b[5:] == data assert unpackb(b) == data + def test_ext(): def check(ext, packed): assert packb(ext) == packed assert unpackb(packed) == ext - check(ExtType(0x42, b'Z'), b'\xd4\x42Z') # fixext 1 - check(ExtType(0x42, b'ZZ'), b'\xd5\x42ZZ') # fixext 2 - check(ExtType(0x42, b'Z'*4), b'\xd6\x42' + b'Z'*4) # fixext 4 - check(ExtType(0x42, b'Z'*8), b'\xd7\x42' + b'Z'*8) # fixext 8 - check(ExtType(0x42, b'Z'*16), b'\xd8\x42' + b'Z'*16) # fixext 16 + + check(ExtType(0x42, b"Z"), b"\xd4\x42Z") # fixext 1 + check(ExtType(0x42, b"ZZ"), b"\xd5\x42ZZ") # fixext 2 + check(ExtType(0x42, b"Z" * 4), b"\xd6\x42" + b"Z" * 4) # fixext 4 + check(ExtType(0x42, b"Z" * 8), b"\xd7\x42" + b"Z" * 8) # fixext 8 + check(ExtType(0x42, b"Z" * 16), b"\xd8\x42" + b"Z" * 16) # fixext 16 # ext 8 - check(ExtType(0x42, b''), b'\xc7\x00\x42') - check(ExtType(0x42, b'Z'*255), b'\xc7\xff\x42' + b'Z'*255) + check(ExtType(0x42, b""), b"\xc7\x00\x42") + check(ExtType(0x42, b"Z" * 255), b"\xc7\xff\x42" + b"Z" * 255) # ext 16 - check(ExtType(0x42, b'Z'*256), b'\xc8\x01\x00\x42' + b'Z'*256) - check(ExtType(0x42, b'Z'*0xffff), b'\xc8\xff\xff\x42' + b'Z'*0xffff) + check(ExtType(0x42, b"Z" * 256), b"\xc8\x01\x00\x42" + b"Z" * 256) + check(ExtType(0x42, b"Z" * 0xFFFF), b"\xc8\xff\xff\x42" + b"Z" * 0xFFFF) # ext 32 - check(ExtType(0x42, b'Z'*0x10000), b'\xc9\x00\x01\x00\x00\x42' + b'Z'*0x10000) + check(ExtType(0x42, b"Z" * 0x10000), b"\xc9\x00\x01\x00\x00\x42" + b"Z" * 0x10000) # needs large memory - #check(ExtType(0x42, b'Z'*0xffffffff), + # check(ExtType(0x42, b'Z'*0xffffffff), # b'\xc9\xff\xff\xff\xff\x42' + b'Z'*0xffffffff) diff --git a/test/test_obj.py b/test/test_obj.py index 390c1b62..0b99ceab 100644 --- a/test/test_obj.py +++ b/test/test_obj.py @@ -4,64 +4,76 @@ from pytest import raises from msgpack import packb, unpackb + def _decode_complex(obj): - if b'__complex__' in obj: - return complex(obj[b'real'], obj[b'imag']) + if b"__complex__" in obj: + return complex(obj[b"real"], obj[b"imag"]) return obj + def _encode_complex(obj): if isinstance(obj, complex): - return {b'__complex__': True, b'real': 1, b'imag': 2} + return {b"__complex__": True, b"real": 1, b"imag": 2} return obj + def test_encode_hook(): - packed = packb([3, 1+2j], default=_encode_complex) + packed = packb([3, 1 + 2j], default=_encode_complex) unpacked = unpackb(packed, use_list=1) - assert unpacked[1] == {b'__complex__': True, b'real': 1, b'imag': 2} + assert unpacked[1] == {b"__complex__": True, b"real": 1, b"imag": 2} + def test_decode_hook(): - packed = packb([3, {b'__complex__': True, b'real': 1, b'imag': 2}]) + packed = packb([3, {b"__complex__": True, b"real": 1, b"imag": 2}]) unpacked = unpackb(packed, object_hook=_decode_complex, use_list=1) - assert unpacked[1] == 1+2j + assert unpacked[1] == 1 + 2j + def test_decode_pairs_hook(): packed = packb([3, {1: 2, 3: 4}]) prod_sum = 1 * 2 + 3 * 4 - unpacked = unpackb(packed, object_pairs_hook=lambda l: sum(k * v for k, v in l), use_list=1) + unpacked = unpackb( + packed, object_pairs_hook=lambda l: sum(k * v for k, v in l), use_list=1 + ) assert unpacked[1] == prod_sum + def test_only_one_obj_hook(): with raises(TypeError): - unpackb(b'', object_hook=lambda x: x, object_pairs_hook=lambda x: x) + unpackb(b"", object_hook=lambda x: x, object_pairs_hook=lambda x: x) + def test_bad_hook(): with raises(TypeError): - packed = packb([3, 1+2j], default=lambda o: o) + packed = packb([3, 1 + 2j], default=lambda o: o) unpacked = unpackb(packed, use_list=1) + def _arr_to_str(arr): - return ''.join(str(c) for c in arr) + return "".join(str(c) for c in arr) + def test_array_hook(): - packed = packb([1,2,3]) + packed = packb([1, 2, 3]) unpacked = unpackb(packed, list_hook=_arr_to_str, use_list=1) - assert unpacked == '123' + assert unpacked == "123" class DecodeError(Exception): pass + def bad_complex_decoder(o): raise DecodeError("Ooops!") def test_an_exception_in_objecthook1(): with raises(DecodeError): - packed = packb({1: {'__complex__': True, 'real': 1, 'imag': 2}}) + packed = packb({1: {"__complex__": True, "real": 1, "imag": 2}}) unpackb(packed, object_hook=bad_complex_decoder) def test_an_exception_in_objecthook2(): with raises(DecodeError): - packed = packb({1: [{'__complex__': True, 'real': 1, 'imag': 2}]}) + packed = packb({1: [{"__complex__": True, "real": 1, "imag": 2}]}) unpackb(packed, list_hook=bad_complex_decoder, use_list=1) diff --git a/test/test_pack.py b/test/test_pack.py index b6752e5a..de212efa 100644 --- a/test/test_pack.py +++ b/test/test_pack.py @@ -17,20 +17,46 @@ def check(data, use_list=False): re = unpackb(packb(data), use_list=use_list) assert re == data + def testPack(): test_data = [ - 0, 1, 127, 128, 255, 256, 65535, 65536, 4294967295, 4294967296, - -1, -32, -33, -128, -129, -32768, -32769, -4294967296, -4294967297, - 1.0, - b"", b"a", b"a"*31, b"a"*32, - None, True, False, - (), ((),), ((), None,), + 0, + 1, + 127, + 128, + 255, + 256, + 65535, + 65536, + 4294967295, + 4294967296, + -1, + -32, + -33, + -128, + -129, + -32768, + -32769, + -4294967296, + -4294967297, + 1.0, + b"", + b"a", + b"a" * 31, + b"a" * 32, + None, + True, + False, + (), + ((),), + ((), None,), {None: 0}, - (1<<23), - ] + (1 << 23), + ] for td in test_data: check(td) + def testPackUnicode(): test_data = ["", "abcd", ["defgh"], "Русский текст"] for td in test_data: @@ -41,43 +67,64 @@ def testPackUnicode(): re = Unpacker(BytesIO(data), raw=False, use_list=1).unpack() assert re == td + def testPackBytes(): test_data = [ - b"", b"abcd", (b"defgh",), - ] + b"", + b"abcd", + (b"defgh",), + ] for td in test_data: check(td) + def testPackByteArrays(): test_data = [ - bytearray(b""), bytearray(b"abcd"), (bytearray(b"defgh"),), - ] + bytearray(b""), + bytearray(b"abcd"), + (bytearray(b"defgh"),), + ] for td in test_data: check(td) -@pytest.mark.skipif(sys.version_info < (3,0), reason="Python 2 passes invalid surrogates") + +@pytest.mark.skipif( + sys.version_info < (3, 0), reason="Python 2 passes invalid surrogates" +) def testIgnoreUnicodeErrors(): - re = unpackb(packb(b'abc\xeddef', use_bin_type=False), - raw=False, unicode_errors='ignore') + re = unpackb( + packb(b"abc\xeddef", use_bin_type=False), raw=False, unicode_errors="ignore" + ) assert re == "abcdef" + def testStrictUnicodeUnpack(): - packed = packb(b'abc\xeddef', use_bin_type=False) + packed = packb(b"abc\xeddef", use_bin_type=False) with pytest.raises(UnicodeDecodeError): unpackb(packed, raw=False, use_list=1) -@pytest.mark.skipif(sys.version_info < (3,0), reason="Python 2 passes invalid surrogates") + +@pytest.mark.skipif( + sys.version_info < (3, 0), reason="Python 2 passes invalid surrogates" +) def testIgnoreErrorsPack(): - re = unpackb(packb(u"abc\uDC80\uDCFFdef", use_bin_type=True, unicode_errors='ignore'), raw=False, use_list=1) + re = unpackb( + packb("abc\uDC80\uDCFFdef", use_bin_type=True, unicode_errors="ignore"), + raw=False, + use_list=1, + ) assert re == "abcdef" + def testDecodeBinary(): re = unpackb(packb(b"abc"), use_list=1) assert re == b"abc" + def testPackFloat(): - assert packb(1.0, use_single_float=True) == b'\xca' + struct.pack(str('>f'), 1.0) - assert packb(1.0, use_single_float=False) == b'\xcb' + struct.pack(str('>d'), 1.0) + assert packb(1.0, use_single_float=True) == b"\xca" + struct.pack(str(">f"), 1.0) + assert packb(1.0, use_single_float=False) == b"\xcb" + struct.pack(str(">d"), 1.0) + def testArraySize(sizes=[0, 5, 50, 1000]): bio = BytesIO() @@ -92,6 +139,7 @@ def testArraySize(sizes=[0, 5, 50, 1000]): for size in sizes: assert unpacker.unpack() == list(range(size)) + def test_manualreset(sizes=[0, 5, 50, 1000]): packer = Packer(autoreset=False) for size in sizes: @@ -105,7 +153,8 @@ def test_manualreset(sizes=[0, 5, 50, 1000]): assert unpacker.unpack() == list(range(size)) packer.reset() - assert packer.bytes() == b'' + assert packer.bytes() == b"" + def testMapSize(sizes=[0, 5, 50, 1000]): bio = BytesIO() @@ -113,8 +162,8 @@ def testMapSize(sizes=[0, 5, 50, 1000]): for size in sizes: bio.write(packer.pack_map_header(size)) for i in range(size): - bio.write(packer.pack(i)) # key - bio.write(packer.pack(i * 2)) # value + bio.write(packer.pack(i)) # key + bio.write(packer.pack(i * 2)) # value bio.seek(0) unpacker = Unpacker(bio) @@ -123,21 +172,24 @@ def testMapSize(sizes=[0, 5, 50, 1000]): def test_odict(): - seq = [(b'one', 1), (b'two', 2), (b'three', 3), (b'four', 4)] + seq = [(b"one", 1), (b"two", 2), (b"three", 3), (b"four", 4)] od = OrderedDict(seq) assert unpackb(packb(od), use_list=1) == dict(seq) + def pair_hook(seq): return list(seq) + assert unpackb(packb(od), object_pairs_hook=pair_hook, use_list=1) == seq def test_pairlist(): - pairlist = [(b'a', 1), (2, b'b'), (b'foo', b'bar')] + pairlist = [(b"a", 1), (2, b"b"), (b"foo", b"bar")] packer = Packer() packed = packer.pack_map_pairs(pairlist) unpacked = unpackb(packed, object_pairs_hook=list) assert pairlist == unpacked + def test_get_buffer(): packer = Packer(autoreset=0, use_bin_type=True) packer.pack([1, 2]) diff --git a/test/test_read_size.py b/test/test_read_size.py index 4e6c2b93..8d8df642 100644 --- a/test/test_read_size.py +++ b/test/test_read_size.py @@ -1,66 +1,71 @@ """Test Unpacker's read_array_header and read_map_header methods""" from msgpack import packb, Unpacker, OutOfData + UnexpectedTypeException = ValueError + def test_read_array_header(): unpacker = Unpacker() - unpacker.feed(packb(['a', 'b', 'c'])) + unpacker.feed(packb(["a", "b", "c"])) assert unpacker.read_array_header() == 3 - assert unpacker.unpack() == b'a' - assert unpacker.unpack() == b'b' - assert unpacker.unpack() == b'c' + assert unpacker.unpack() == b"a" + assert unpacker.unpack() == b"b" + assert unpacker.unpack() == b"c" try: unpacker.unpack() - assert 0, 'should raise exception' + assert 0, "should raise exception" except OutOfData: - assert 1, 'okay' + assert 1, "okay" def test_read_map_header(): unpacker = Unpacker() - unpacker.feed(packb({'a': 'A'})) + unpacker.feed(packb({"a": "A"})) assert unpacker.read_map_header() == 1 - assert unpacker.unpack() == B'a' - assert unpacker.unpack() == B'A' + assert unpacker.unpack() == b"a" + assert unpacker.unpack() == b"A" try: unpacker.unpack() - assert 0, 'should raise exception' + assert 0, "should raise exception" except OutOfData: - assert 1, 'okay' + assert 1, "okay" + def test_incorrect_type_array(): unpacker = Unpacker() unpacker.feed(packb(1)) try: unpacker.read_array_header() - assert 0, 'should raise exception' + assert 0, "should raise exception" except UnexpectedTypeException: - assert 1, 'okay' + assert 1, "okay" + def test_incorrect_type_map(): unpacker = Unpacker() unpacker.feed(packb(1)) try: unpacker.read_map_header() - assert 0, 'should raise exception' + assert 0, "should raise exception" except UnexpectedTypeException: - assert 1, 'okay' + assert 1, "okay" + def test_correct_type_nested_array(): unpacker = Unpacker() - unpacker.feed(packb({'a': ['b', 'c', 'd']})) + unpacker.feed(packb({"a": ["b", "c", "d"]})) try: unpacker.read_array_header() - assert 0, 'should raise exception' + assert 0, "should raise exception" except UnexpectedTypeException: - assert 1, 'okay' + assert 1, "okay" + def test_incorrect_type_nested_map(): unpacker = Unpacker() - unpacker.feed(packb([{'a': 'b'}])) + unpacker.feed(packb([{"a": "b"}])) try: unpacker.read_map_header() - assert 0, 'should raise exception' + assert 0, "should raise exception" except UnexpectedTypeException: - assert 1, 'okay' - + assert 1, "okay" diff --git a/test/test_seq.py b/test/test_seq.py index fed9ff42..0d5d8065 100644 --- a/test/test_seq.py +++ b/test/test_seq.py @@ -7,8 +7,9 @@ binarydata = bytes(bytearray(range(256))) + def gen_binary_data(idx): - return binarydata[:idx % 300] + return binarydata[: idx % 300] def test_exceeding_unpacker_read_size(): @@ -18,10 +19,10 @@ def test_exceeding_unpacker_read_size(): NUMBER_OF_STRINGS = 6 read_size = 16 - # 5 ok for read_size=16, while 6 glibc detected *** python: double free or corruption (fasttop): - # 20 ok for read_size=256, while 25 segfaults / glibc detected *** python: double free or corruption (!prev) - # 40 ok for read_size=1024, while 50 introduces errors - # 7000 ok for read_size=1024*1024, while 8000 leads to glibc detected *** python: double free or corruption (!prev): + # 5 ok for read_size=16, while 6 glibc detected *** python: double free or corruption (fasttop): + # 20 ok for read_size=256, while 25 segfaults / glibc detected *** python: double free or corruption (!prev) + # 40 ok for read_size=1024, while 50 introduces errors + # 7000 ok for read_size=1024*1024, while 8000 leads to glibc detected *** python: double free or corruption (!prev): for idx in range(NUMBER_OF_STRINGS): data = gen_binary_data(idx) diff --git a/test/test_sequnpack.py b/test/test_sequnpack.py index 59718f56..e5765716 100644 --- a/test/test_sequnpack.py +++ b/test/test_sequnpack.py @@ -10,102 +10,115 @@ def test_partialdata(): unpacker = Unpacker() - unpacker.feed(b'\xa5') - with raises(StopIteration): next(iter(unpacker)) - unpacker.feed(b'h') - with raises(StopIteration): next(iter(unpacker)) - unpacker.feed(b'a') - with raises(StopIteration): next(iter(unpacker)) - unpacker.feed(b'l') - with raises(StopIteration): next(iter(unpacker)) - unpacker.feed(b'l') - with raises(StopIteration): next(iter(unpacker)) - unpacker.feed(b'o') - assert next(iter(unpacker)) == b'hallo' + unpacker.feed(b"\xa5") + with raises(StopIteration): + next(iter(unpacker)) + unpacker.feed(b"h") + with raises(StopIteration): + next(iter(unpacker)) + unpacker.feed(b"a") + with raises(StopIteration): + next(iter(unpacker)) + unpacker.feed(b"l") + with raises(StopIteration): + next(iter(unpacker)) + unpacker.feed(b"l") + with raises(StopIteration): + next(iter(unpacker)) + unpacker.feed(b"o") + assert next(iter(unpacker)) == b"hallo" + def test_foobar(): unpacker = Unpacker(read_size=3, use_list=1) - unpacker.feed(b'foobar') - assert unpacker.unpack() == ord(b'f') - assert unpacker.unpack() == ord(b'o') - assert unpacker.unpack() == ord(b'o') - assert unpacker.unpack() == ord(b'b') - assert unpacker.unpack() == ord(b'a') - assert unpacker.unpack() == ord(b'r') + unpacker.feed(b"foobar") + assert unpacker.unpack() == ord(b"f") + assert unpacker.unpack() == ord(b"o") + assert unpacker.unpack() == ord(b"o") + assert unpacker.unpack() == ord(b"b") + assert unpacker.unpack() == ord(b"a") + assert unpacker.unpack() == ord(b"r") with raises(OutOfData): unpacker.unpack() - unpacker.feed(b'foo') - unpacker.feed(b'bar') + unpacker.feed(b"foo") + unpacker.feed(b"bar") k = 0 - for o, e in zip(unpacker, 'foobarbaz'): + for o, e in zip(unpacker, "foobarbaz"): assert o == ord(e) k += 1 - assert k == len(b'foobar') + assert k == len(b"foobar") + def test_foobar_skip(): unpacker = Unpacker(read_size=3, use_list=1) - unpacker.feed(b'foobar') - assert unpacker.unpack() == ord(b'f') + unpacker.feed(b"foobar") + assert unpacker.unpack() == ord(b"f") unpacker.skip() - assert unpacker.unpack() == ord(b'o') + assert unpacker.unpack() == ord(b"o") unpacker.skip() - assert unpacker.unpack() == ord(b'a') + assert unpacker.unpack() == ord(b"a") unpacker.skip() with raises(OutOfData): unpacker.unpack() + def test_maxbuffersize(): with raises(ValueError): Unpacker(read_size=5, max_buffer_size=3) unpacker = Unpacker(read_size=3, max_buffer_size=3, use_list=1) - unpacker.feed(b'fo') + unpacker.feed(b"fo") with raises(BufferFull): - unpacker.feed(b'ob') - unpacker.feed(b'o') - assert ord('f') == next(unpacker) - unpacker.feed(b'b') - assert ord('o') == next(unpacker) - assert ord('o') == next(unpacker) - assert ord('b') == next(unpacker) + unpacker.feed(b"ob") + unpacker.feed(b"o") + assert ord("f") == next(unpacker) + unpacker.feed(b"b") + assert ord("o") == next(unpacker) + assert ord("o") == next(unpacker) + assert ord("b") == next(unpacker) def test_readbytes(): unpacker = Unpacker(read_size=3) - unpacker.feed(b'foobar') - assert unpacker.unpack() == ord(b'f') - assert unpacker.read_bytes(3) == b'oob' - assert unpacker.unpack() == ord(b'a') - assert unpacker.unpack() == ord(b'r') + unpacker.feed(b"foobar") + assert unpacker.unpack() == ord(b"f") + assert unpacker.read_bytes(3) == b"oob" + assert unpacker.unpack() == ord(b"a") + assert unpacker.unpack() == ord(b"r") # Test buffer refill - unpacker = Unpacker(io.BytesIO(b'foobar'), read_size=3) - assert unpacker.unpack() == ord(b'f') - assert unpacker.read_bytes(3) == b'oob' - assert unpacker.unpack() == ord(b'a') - assert unpacker.unpack() == ord(b'r') + unpacker = Unpacker(io.BytesIO(b"foobar"), read_size=3) + assert unpacker.unpack() == ord(b"f") + assert unpacker.read_bytes(3) == b"oob" + assert unpacker.unpack() == ord(b"a") + assert unpacker.unpack() == ord(b"r") + def test_issue124(): unpacker = Unpacker() - unpacker.feed(b'\xa1?\xa1!') - assert tuple(unpacker) == (b'?', b'!') + unpacker.feed(b"\xa1?\xa1!") + assert tuple(unpacker) == (b"?", b"!") assert tuple(unpacker) == () unpacker.feed(b"\xa1?\xa1") - assert tuple(unpacker) == (b'?',) + assert tuple(unpacker) == (b"?",) assert tuple(unpacker) == () unpacker.feed(b"!") - assert tuple(unpacker) == (b'!',) + assert tuple(unpacker) == (b"!",) assert tuple(unpacker) == () def test_unpack_tell(): stream = io.BytesIO() - messages = [2**i-1 for i in range(65)] - messages += [-(2**i) for i in range(1, 64)] - messages += [b'hello', b'hello'*1000, list(range(20)), - {i: bytes(i)*i for i in range(10)}, - {i: bytes(i)*i for i in range(32)}] + messages = [2 ** i - 1 for i in range(65)] + messages += [-(2 ** i) for i in range(1, 64)] + messages += [ + b"hello", + b"hello" * 1000, + list(range(20)), + {i: bytes(i) * i for i in range(10)}, + {i: bytes(i) * i for i in range(32)}, + ] offsets = [] for m in messages: pack(m, stream) diff --git a/test/test_stricttype.py b/test/test_stricttype.py index 87e7c1ce..78e1723f 100644 --- a/test/test_stricttype.py +++ b/test/test_stricttype.py @@ -5,30 +5,32 @@ def test_namedtuple(): - T = namedtuple('T', "foo bar") + T = namedtuple("T", "foo bar") + def default(o): if isinstance(o, T): return dict(o._asdict()) - raise TypeError('Unsupported type %s' % (type(o),)) + raise TypeError("Unsupported type %s" % (type(o),)) + packed = packb(T(1, 42), strict_types=True, use_bin_type=True, default=default) unpacked = unpackb(packed, raw=False) - assert unpacked == {'foo': 1, 'bar': 42} + assert unpacked == {"foo": 1, "bar": 42} def test_tuple(): - t = ('one', 2, b'three', (4, )) + t = ("one", 2, b"three", (4,)) def default(o): if isinstance(o, tuple): return { - '__type__': 'tuple', - 'value': list(o), - } - raise TypeError('Unsupported type %s' % (type(o),)) + "__type__": "tuple", + "value": list(o), + } + raise TypeError("Unsupported type %s" % (type(o),)) def convert(o): - if o.get('__type__') == 'tuple': - return tuple(o['value']) + if o.get("__type__") == "tuple": + return tuple(o["value"]) return o data = packb(t, strict_types=True, use_bin_type=True, default=default) @@ -38,7 +40,7 @@ def convert(o): def test_tuple_ext(): - t = ('one', 2, b'three', (4, )) + t = ("one", 2, b"three", (4,)) MSGPACK_EXT_TYPE_TUPLE = 0 @@ -46,7 +48,8 @@ def default(o): if isinstance(o, tuple): # Convert to list and pack payload = packb( - list(o), strict_types=True, use_bin_type=True, default=default) + list(o), strict_types=True, use_bin_type=True, default=default + ) return ExtType(MSGPACK_EXT_TYPE_TUPLE, payload) raise TypeError(repr(o)) @@ -54,7 +57,7 @@ def convert(code, payload): if code == MSGPACK_EXT_TYPE_TUPLE: # Unpack and convert to tuple return tuple(unpackb(payload, raw=False, ext_hook=convert)) - raise ValueError('Unknown Ext code {}'.format(code)) + raise ValueError("Unknown Ext code {}".format(code)) data = packb(t, strict_types=True, use_bin_type=True, default=default) expected = unpackb(data, raw=False, ext_hook=convert) diff --git a/test/test_subtype.py b/test/test_subtype.py index 6807508e..d91d4553 100644 --- a/test/test_subtype.py +++ b/test/test_subtype.py @@ -4,16 +4,21 @@ from msgpack import packb, unpackb from collections import namedtuple + class MyList(list): pass + class MyDict(dict): pass + class MyTuple(tuple): pass -MyNamedTuple = namedtuple('MyNamedTuple', 'x y') + +MyNamedTuple = namedtuple("MyNamedTuple", "x y") + def test_types(): assert packb(MyDict()) == packb(dict()) diff --git a/test/test_timestamp.py b/test/test_timestamp.py index 55c2f6dd..1348e694 100644 --- a/test/test_timestamp.py +++ b/test/test_timestamp.py @@ -4,34 +4,34 @@ def test_timestamp(): # timestamp32 - ts = Timestamp(2**32 - 1) + ts = Timestamp(2 ** 32 - 1) assert ts.to_bytes() == b"\xff\xff\xff\xff" packed = msgpack.packb(ts) assert packed == b"\xd6\xff" + ts.to_bytes() unpacked = msgpack.unpackb(packed) assert ts == unpacked - assert ts.seconds == 2**32 - 1 and ts.nanoseconds == 0 + assert ts.seconds == 2 ** 32 - 1 and ts.nanoseconds == 0 # timestamp64 - ts = Timestamp(2**34 - 1, 999999999) + ts = Timestamp(2 ** 34 - 1, 999999999) assert ts.to_bytes() == b"\xee\x6b\x27\xff\xff\xff\xff\xff" packed = msgpack.packb(ts) assert packed == b"\xd7\xff" + ts.to_bytes() unpacked = msgpack.unpackb(packed) assert ts == unpacked - assert ts.seconds == 2**34 - 1 and ts.nanoseconds == 999999999 + assert ts.seconds == 2 ** 34 - 1 and ts.nanoseconds == 999999999 # timestamp96 - ts = Timestamp(2**63 - 1, 999999999) + ts = Timestamp(2 ** 63 - 1, 999999999) assert ts.to_bytes() == b"\x3b\x9a\xc9\xff\x7f\xff\xff\xff\xff\xff\xff\xff" packed = msgpack.packb(ts) assert packed == b"\xc7\x0c\xff" + ts.to_bytes() unpacked = msgpack.unpackb(packed) assert ts == unpacked - assert ts.seconds == 2**63 - 1 and ts.nanoseconds == 999999999 + assert ts.seconds == 2 ** 63 - 1 and ts.nanoseconds == 999999999 # negative fractional - ts = Timestamp(-2.3) #s: -3, ns: 700000000 + ts = Timestamp(-2.3) # s: -3, ns: 700000000 assert ts.to_bytes() == b"\x29\xb9\x27\x00\xff\xff\xff\xff\xff\xff\xff\xfd" packed = msgpack.packb(ts) assert packed == b"\xc7\x0c\xff" + ts.to_bytes() diff --git a/test/test_unpack.py b/test/test_unpack.py index 00a10612..bc74c4dd 100644 --- a/test/test_unpack.py +++ b/test/test_unpack.py @@ -5,7 +5,7 @@ def test_unpack_array_header_from_file(): - f = BytesIO(packb([1,2,3,4])) + f = BytesIO(packb([1, 2, 3, 4])) unpacker = Unpacker(f) assert unpacker.read_array_header() == 4 assert unpacker.unpack() == 1 @@ -16,8 +16,10 @@ def test_unpack_array_header_from_file(): unpacker.unpack() -@mark.skipif("not hasattr(sys, 'getrefcount') == True", - reason='sys.getrefcount() is needed to pass this test') +@mark.skipif( + "not hasattr(sys, 'getrefcount') == True", + reason="sys.getrefcount() is needed to pass this test", +) def test_unpacker_hook_refcnt(): result = [] @@ -43,12 +45,9 @@ def hook(x): def test_unpacker_ext_hook(): - class MyUnpacker(Unpacker): - def __init__(self): - super(MyUnpacker, self).__init__( - ext_hook=self._hook, raw=False) + super(MyUnpacker, self).__init__(ext_hook=self._hook, raw=False) def _hook(self, code, data): if code == 1: @@ -57,15 +56,15 @@ def _hook(self, code, data): return ExtType(code, data) unpacker = MyUnpacker() - unpacker.feed(packb({'a': 1})) - assert unpacker.unpack() == {'a': 1} - unpacker.feed(packb({'a': ExtType(1, b'123')})) - assert unpacker.unpack() == {'a': 123} - unpacker.feed(packb({'a': ExtType(2, b'321')})) - assert unpacker.unpack() == {'a': ExtType(2, b'321')} + unpacker.feed(packb({"a": 1})) + assert unpacker.unpack() == {"a": 1} + unpacker.feed(packb({"a": ExtType(1, b"123")})) + assert unpacker.unpack() == {"a": 123} + unpacker.feed(packb({"a": ExtType(2, b"321")})) + assert unpacker.unpack() == {"a": ExtType(2, b"321")} -if __name__ == '__main__': +if __name__ == "__main__": test_unpack_array_header_from_file() test_unpacker_hook_refcnt() test_unpacker_ext_hook() From bc8c86203af8d36152c9c72ea22e895db2ed3fe0 Mon Sep 17 00:00:00 2001 From: Inada Naoki Date: Thu, 5 Dec 2019 18:53:49 +0900 Subject: [PATCH 198/349] blacken all files. --- benchmark/benchmark.py | 20 +++-- docs/conf.py | 164 ++++++++++++++++++++--------------------- setup.py | 117 ++++++++++++++++------------- 3 files changed, 158 insertions(+), 143 deletions(-) diff --git a/benchmark/benchmark.py b/benchmark/benchmark.py index 80819c62..82d0ddbf 100644 --- a/benchmark/benchmark.py +++ b/benchmark/benchmark.py @@ -1,6 +1,8 @@ from msgpack import fallback + try: from msgpack import _unpacker, _packer + has_ext = True except ImportError: has_ext = False @@ -9,7 +11,7 @@ def profile(name, func): times = timeit.repeat(func, number=1000, repeat=4) - times = ', '.join(["%8f" % t for t in times]) + times = ", ".join(["%8f" % t for t in times]) print("%-30s %40s" % (name, times)) @@ -18,17 +20,19 @@ def simple(name, data): packer = _packer.Packer() profile("packing %s (ext)" % name, lambda: packer.pack(data)) packer = fallback.Packer() - profile('packing %s (fallback)' % name, lambda: packer.pack(data)) + profile("packing %s (fallback)" % name, lambda: packer.pack(data)) data = packer.pack(data) if has_ext: - profile('unpacking %s (ext)' % name, lambda: _unpacker.unpackb(data)) - profile('unpacking %s (fallback)' % name, lambda: fallback.unpackb(data)) + profile("unpacking %s (ext)" % name, lambda: _unpacker.unpackb(data)) + profile("unpacking %s (fallback)" % name, lambda: fallback.unpackb(data)) + def main(): - simple("integers", [7]*10000) - simple("bytes", [b'x'*n for n in range(100)]*10) - simple("lists", [[]]*10000) - simple("dicts", [{}]*10000) + simple("integers", [7] * 10000) + simple("bytes", [b"x" * n for n in range(100)] * 10) + simple("lists", [[]] * 10000) + simple("dicts", [{}] * 10000) + main() diff --git a/docs/conf.py b/docs/conf.py index 47d745a8..36fa76e3 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -16,32 +16,32 @@ # If extensions (or modules to document with autodoc) are in another directory, # add these directories to sys.path here. If the directory is relative to the # documentation root, use os.path.abspath to make it absolute, like shown here. -#sys.path.insert(0, os.path.abspath('.')) +# sys.path.insert(0, os.path.abspath('.')) # -- General configuration ----------------------------------------------------- # If your documentation needs a minimal Sphinx version, state it here. -#needs_sphinx = '1.0' +# needs_sphinx = '1.0' # Add any Sphinx extension module names here, as strings. They can be extensions # coming with Sphinx (named 'sphinx.ext.*') or your custom ones. -extensions = ['sphinx.ext.autodoc', 'sphinx.ext.viewcode'] +extensions = ["sphinx.ext.autodoc", "sphinx.ext.viewcode"] # Add any paths that contain templates here, relative to this directory. -templates_path = ['_templates'] +templates_path = ["_templates"] # The suffix of source filenames. -source_suffix = '.rst' +source_suffix = ".rst" # The encoding of source files. -#source_encoding = 'utf-8-sig' +# source_encoding = 'utf-8-sig' # The master toctree document. -master_doc = 'index' +master_doc = "index" # General information about the project. -project = u'msgpack' -copyright = u'2013, INADA Naoki' +project = u"msgpack" +copyright = u"2013, INADA Naoki" # The version info for the project you're documenting, acts as replacement for # |version| and |release|, also used in various other places throughout the @@ -49,176 +49,170 @@ # # The short X.Y version. # The full version, including alpha/beta/rc tags. -version = release = '0.5' +version = release = "0.5" # The language for content autogenerated by Sphinx. Refer to documentation # for a list of supported languages. -#language = None +# language = None # There are two options for replacing |today|: either, you set today to some # non-false value, then it is used: -#today = '' +# today = '' # Else, today_fmt is used as the format for a strftime call. -#today_fmt = '%B %d, %Y' +# today_fmt = '%B %d, %Y' today_fmt = "%Y-%m-%d" # List of patterns, relative to source directory, that match files and # directories to ignore when looking for source files. -exclude_patterns = ['_build'] +exclude_patterns = ["_build"] # The reST default role (used for this markup: `text`) to use for all documents. -#default_role = None +# default_role = None # If true, '()' will be appended to :func: etc. cross-reference text. -#add_function_parentheses = True +# add_function_parentheses = True # If true, the current module name will be prepended to all description # unit titles (such as .. function::). -#add_module_names = True +# add_module_names = True # If true, sectionauthor and moduleauthor directives will be shown in the # output. They are ignored by default. -#show_authors = False +# show_authors = False # The name of the Pygments (syntax highlighting) style to use. -pygments_style = 'sphinx' +pygments_style = "sphinx" # A list of ignored prefixes for module index sorting. -#modindex_common_prefix = [] +# modindex_common_prefix = [] # -- Options for HTML output --------------------------------------------------- # The theme to use for HTML and HTML Help pages. See the documentation for # a list of builtin themes. -html_theme = 'sphinxdoc' +html_theme = "sphinxdoc" # Theme options are theme-specific and customize the look and feel of a theme # further. For a list of options available for each theme, see the # documentation. -#html_theme_options = {} +# html_theme_options = {} # Add any paths that contain custom themes here, relative to this directory. -#html_theme_path = [] +# html_theme_path = [] # The name for this set of Sphinx documents. If None, it defaults to # " v documentation". -#html_title = None +# html_title = None # A shorter title for the navigation bar. Default is the same as html_title. -#html_short_title = None +# html_short_title = None # The name of an image file (relative to this directory) to place at the top # of the sidebar. -#html_logo = None +# html_logo = None # The name of an image file (within the static path) to use as favicon of the # docs. This file should be a Windows icon file (.ico) being 16x16 or 32x32 # pixels large. -#html_favicon = None +# html_favicon = None # Add any paths that contain custom static files (such as style sheets) here, # relative to this directory. They are copied after the builtin static files, # so a file named "default.css" will overwrite the builtin "default.css". -html_static_path = ['_static'] +html_static_path = ["_static"] # If not '', a 'Last updated on:' timestamp is inserted at every page bottom, # using the given strftime format. -#html_last_updated_fmt = '%b %d, %Y' +# html_last_updated_fmt = '%b %d, %Y' # If true, SmartyPants will be used to convert quotes and dashes to # typographically correct entities. -#html_use_smartypants = True +# html_use_smartypants = True # Custom sidebar templates, maps document names to template names. -#html_sidebars = {} +# html_sidebars = {} # Additional templates that should be rendered to pages, maps page names to # template names. -#html_additional_pages = {} +# html_additional_pages = {} # If false, no module index is generated. -#html_domain_indices = True +# html_domain_indices = True # If false, no index is generated. -#html_use_index = True +# html_use_index = True # If true, the index is split into individual pages for each letter. -#html_split_index = False +# html_split_index = False # If true, links to the reST sources are added to the pages. -#html_show_sourcelink = True +# html_show_sourcelink = True # If true, "Created using Sphinx" is shown in the HTML footer. Default is True. -#html_show_sphinx = True +# html_show_sphinx = True # If true, "(C) Copyright ..." is shown in the HTML footer. Default is True. -#html_show_copyright = True +# html_show_copyright = True # If true, an OpenSearch description file will be output, and all pages will # contain a tag referring to it. The value of this option must be the # base URL from which the finished HTML is served. -#html_use_opensearch = '' +# html_use_opensearch = '' # This is the file name suffix for HTML files (e.g. ".xhtml"). -#html_file_suffix = None +# html_file_suffix = None # Output file base name for HTML help builder. -htmlhelp_basename = 'msgpackdoc' +htmlhelp_basename = "msgpackdoc" # -- Options for LaTeX output -------------------------------------------------- latex_elements = { -# The paper size ('letterpaper' or 'a4paper'). -#'papersize': 'letterpaper', - -# The font size ('10pt', '11pt' or '12pt'). -#'pointsize': '10pt', - -# Additional stuff for the LaTeX preamble. -#'preamble': '', + # The paper size ('letterpaper' or 'a4paper'). + #'papersize': 'letterpaper', + # The font size ('10pt', '11pt' or '12pt'). + #'pointsize': '10pt', + # Additional stuff for the LaTeX preamble. + #'preamble': '', } # Grouping the document tree into LaTeX files. List of tuples # (source start file, target name, title, author, documentclass [howto/manual]). latex_documents = [ - ('index', 'msgpack.tex', u'msgpack Documentation', - u'Author', 'manual'), + ("index", "msgpack.tex", u"msgpack Documentation", u"Author", "manual"), ] # The name of an image file (relative to this directory) to place at the top of # the title page. -#latex_logo = None +# latex_logo = None # For "manual" documents, if this is true, then toplevel headings are parts, # not chapters. -#latex_use_parts = False +# latex_use_parts = False # If true, show page references after internal links. -#latex_show_pagerefs = False +# latex_show_pagerefs = False # If true, show URL addresses after external links. -#latex_show_urls = False +# latex_show_urls = False # Documents to append as an appendix to all manuals. -#latex_appendices = [] +# latex_appendices = [] # If false, no module index is generated. -#latex_domain_indices = True +# latex_domain_indices = True # -- Options for manual page output -------------------------------------------- # One entry per manual page. List of tuples # (source start file, name, description, authors, manual section). -man_pages = [ - ('index', 'msgpack', u'msgpack Documentation', - [u'Author'], 1) -] +man_pages = [("index", "msgpack", u"msgpack Documentation", [u"Author"], 1)] # If true, show URL addresses after external links. -#man_show_urls = False +# man_show_urls = False # -- Options for Texinfo output ------------------------------------------------ @@ -227,59 +221,65 @@ # (source start file, target name, title, author, # dir menu entry, description, category) texinfo_documents = [ - ('index', 'msgpack', u'msgpack Documentation', - u'Author', 'msgpack', 'One line description of project.', - 'Miscellaneous'), + ( + "index", + "msgpack", + u"msgpack Documentation", + u"Author", + "msgpack", + "One line description of project.", + "Miscellaneous", + ), ] # Documents to append as an appendix to all manuals. -#texinfo_appendices = [] +# texinfo_appendices = [] # If false, no module index is generated. -#texinfo_domain_indices = True +# texinfo_domain_indices = True # How to display URL addresses: 'footnote', 'no', or 'inline'. -#texinfo_show_urls = 'footnote' +# texinfo_show_urls = 'footnote' # -- Options for Epub output --------------------------------------------------- # Bibliographic Dublin Core info. -epub_title = u'msgpack' -epub_author = u'Author' -epub_publisher = u'Author' -epub_copyright = u'2013, Author' +epub_title = u"msgpack" +epub_author = u"Author" +epub_publisher = u"Author" +epub_copyright = u"2013, Author" # The language of the text. It defaults to the language option # or en if the language is not set. -#epub_language = '' +# epub_language = '' # The scheme of the identifier. Typical schemes are ISBN or URL. -#epub_scheme = '' +# epub_scheme = '' # The unique identifier of the text. This can be a ISBN number # or the project homepage. -#epub_identifier = '' +# epub_identifier = '' # A unique identification for the text. -#epub_uid = '' +# epub_uid = '' # A tuple containing the cover image and cover page html template filenames. -#epub_cover = () +# epub_cover = () # HTML files that should be inserted before the pages created by sphinx. # The format is a list of tuples containing the path and title. -#epub_pre_files = [] +# epub_pre_files = [] # HTML files shat should be inserted after the pages created by sphinx. # The format is a list of tuples containing the path and title. -#epub_post_files = [] +# epub_post_files = [] # A list of files that should not be packed into the epub file. -#epub_exclude_files = [] +# epub_exclude_files = [] # The depth of the table of contents in toc.ncx. -#epub_tocdepth = 3 +# epub_tocdepth = 3 # Allow duplicate toc entries. -#epub_tocdup = True +# epub_tocdup = True diff --git a/setup.py b/setup.py index 77b81c63..a8c2306d 100755 --- a/setup.py +++ b/setup.py @@ -17,11 +17,14 @@ # for building transitional package. TRANSITIONAL = False + class NoCython(Exception): pass + try: import Cython.Compiler.Main as cython_compiler + have_cython = True except ImportError: have_cython = False @@ -31,16 +34,19 @@ def cythonize(src): sys.stderr.write("cythonize: %r\n" % (src,)) cython_compiler.compile([src], cplus=True) + def ensure_source(src): - pyx = os.path.splitext(src)[0] + '.pyx' + pyx = os.path.splitext(src)[0] + ".pyx" if not os.path.exists(src): if not have_cython: raise NoCython cythonize(pyx) - elif (os.path.exists(pyx) and - os.stat(src).st_mtime < os.stat(pyx).st_mtime and - have_cython): + elif ( + os.path.exists(pyx) + and os.stat(src).st_mtime < os.stat(pyx).st_mtime + and have_cython + ): cythonize(pyx) return src @@ -63,77 +69,82 @@ def build_extension(self, ext): print(e) -exec(open('msgpack/_version.py').read()) +exec(open("msgpack/_version.py").read()) -version_str = '.'.join(str(x) for x in version[:3]) -if len(version) > 3 and version[3] != 'final': +version_str = ".".join(str(x) for x in version[:3]) +if len(version) > 3 and version[3] != "final": version_str += version[3] # Cython is required for sdist class Sdist(sdist): def __init__(self, *args, **kwargs): - cythonize('msgpack/_cmsgpack.pyx') + cythonize("msgpack/_cmsgpack.pyx") sdist.__init__(self, *args, **kwargs) + libraries = [] -if sys.platform == 'win32': - libraries.append('ws2_32') +if sys.platform == "win32": + libraries.append("ws2_32") -if sys.byteorder == 'big': - macros = [('__BIG_ENDIAN__', '1')] +if sys.byteorder == "big": + macros = [("__BIG_ENDIAN__", "1")] else: - macros = [('__LITTLE_ENDIAN__', '1')] + macros = [("__LITTLE_ENDIAN__", "1")] ext_modules = [] if not PYPY and not PY2: - ext_modules.append(Extension('msgpack._cmsgpack', - sources=['msgpack/_cmsgpack.cpp'], - libraries=libraries, - include_dirs=['.'], - define_macros=macros, - )) + ext_modules.append( + Extension( + "msgpack._cmsgpack", + sources=["msgpack/_cmsgpack.cpp"], + libraries=libraries, + include_dirs=["."], + define_macros=macros, + ) + ) del libraries, macros -desc = 'MessagePack (de)serializer.' -with io.open('README.rst', encoding='utf-8') as f: +desc = "MessagePack (de)serializer." +with io.open("README.rst", encoding="utf-8") as f: long_desc = f.read() del f -name = 'msgpack' +name = "msgpack" if TRANSITIONAL: - name = 'msgpack-python' + name = "msgpack-python" long_desc = "This package is deprecated. Install msgpack instead." -setup(name=name, - author='INADA Naoki', - author_email='songofacandy@gmail.com', - version=version_str, - cmdclass={'build_ext': BuildExt, 'sdist': Sdist}, - ext_modules=ext_modules, - packages=['msgpack'], - description=desc, - long_description=long_desc, - long_description_content_type="text/x-rst", - url='https://msgpack.org/', - project_urls = { - 'Documentation': 'https://msgpack-python.readthedocs.io/', - 'Source': 'https://github.com/msgpack/msgpack-python', - 'Tracker': 'https://github.com/msgpack/msgpack-python/issues', - }, - license='Apache 2.0', - classifiers=[ - 'Programming Language :: Python :: 2', - 'Programming Language :: Python :: 2.7', - 'Programming Language :: Python :: 3', - 'Programming Language :: Python :: 3.5', - 'Programming Language :: Python :: 3.6', - 'Programming Language :: Python :: 3.7', - 'Programming Language :: Python :: 3.8', - 'Programming Language :: Python :: Implementation :: CPython', - 'Programming Language :: Python :: Implementation :: PyPy', - 'Intended Audience :: Developers', - 'License :: OSI Approved :: Apache Software License', - ], +setup( + name=name, + author="INADA Naoki", + author_email="songofacandy@gmail.com", + version=version_str, + cmdclass={"build_ext": BuildExt, "sdist": Sdist}, + ext_modules=ext_modules, + packages=["msgpack"], + description=desc, + long_description=long_desc, + long_description_content_type="text/x-rst", + url="https://msgpack.org/", + project_urls={ + "Documentation": "https://msgpack-python.readthedocs.io/", + "Source": "https://github.com/msgpack/msgpack-python", + "Tracker": "https://github.com/msgpack/msgpack-python/issues", + }, + license="Apache 2.0", + classifiers=[ + "Programming Language :: Python :: 2", + "Programming Language :: Python :: 2.7", + "Programming Language :: Python :: 3", + "Programming Language :: Python :: 3.5", + "Programming Language :: Python :: 3.6", + "Programming Language :: Python :: 3.7", + "Programming Language :: Python :: 3.8", + "Programming Language :: Python :: Implementation :: CPython", + "Programming Language :: Python :: Implementation :: PyPy", + "Intended Audience :: Developers", + "License :: OSI Approved :: Apache Software License", + ], ) From af4eea430e2f176f17fff5abe781dd83f55d4657 Mon Sep 17 00:00:00 2001 From: Inada Naoki Date: Thu, 5 Dec 2019 18:54:14 +0900 Subject: [PATCH 199/349] travis: Add Black --- .travis.yml | 9 ++++++++- Makefile | 4 ++++ 2 files changed, 12 insertions(+), 1 deletion(-) diff --git a/.travis.yml b/.travis.yml index 7b298af4..9d3ae749 100644 --- a/.travis.yml +++ b/.travis.yml @@ -14,8 +14,15 @@ python: matrix: include: + - name: Black + language: python + python: 3.8 + install: + - pip install black + script: + - black --check --diff . + - name: 32bit build - sudo: required language: python services: - docker diff --git a/Makefile b/Makefile index 5828ed4d..a1edc883 100644 --- a/Makefile +++ b/Makefile @@ -2,6 +2,10 @@ all: cython python setup.py build_ext -i -f +.PHONY: black +black: + black . + .PHONY: cython cython: cython --cplus msgpack/_cmsgpack.pyx From 9ae43709e42092c7f6a4e990d696d9005fa1623d Mon Sep 17 00:00:00 2001 From: Inada Naoki Date: Thu, 5 Dec 2019 20:20:53 +0900 Subject: [PATCH 200/349] Drop old buffer protocol support (#383) --- msgpack/_unpacker.pyx | 60 ++++++++++++++++--------------------------- 1 file changed, 22 insertions(+), 38 deletions(-) diff --git a/msgpack/_unpacker.pyx b/msgpack/_unpacker.pyx index 6dedd30a..3c9b7b37 100644 --- a/msgpack/_unpacker.pyx +++ b/msgpack/_unpacker.pyx @@ -109,38 +109,26 @@ def default_read_extended_type(typecode, data): cdef inline int get_data_from_buffer(object obj, Py_buffer *view, char **buf, - Py_ssize_t *buffer_len, - int *new_protocol) except 0: + Py_ssize_t *buffer_len) except 0: cdef object contiguous cdef Py_buffer tmp - if PyObject_CheckBuffer(obj): - new_protocol[0] = 1 - if PyObject_GetBuffer(obj, view, PyBUF_FULL_RO) == -1: - raise - if view.itemsize != 1: - PyBuffer_Release(view) - raise BufferError("cannot unpack from multi-byte object") - if PyBuffer_IsContiguous(view, b'A') == 0: - PyBuffer_Release(view) - # create a contiguous copy and get buffer - contiguous = PyMemoryView_GetContiguous(obj, PyBUF_READ, b'C') - PyObject_GetBuffer(contiguous, view, PyBUF_SIMPLE) - # view must hold the only reference to contiguous, - # so memory is freed when view is released - Py_DECREF(contiguous) - buffer_len[0] = view.len - buf[0] = view.buf - return 1 - else: - new_protocol[0] = 0 - if PyObject_AsReadBuffer(obj, buf, buffer_len) == -1: - raise BufferError("could not get memoryview") - PyErr_WarnEx(RuntimeWarning, - "using old buffer interface to unpack %s; " - "this leads to unpacking errors if slicing is used and " - "will be removed in a future version" % type(obj), - 1) - return 1 + if PyObject_GetBuffer(obj, view, PyBUF_FULL_RO) == -1: + raise + if view.itemsize != 1: + PyBuffer_Release(view) + raise BufferError("cannot unpack from multi-byte object") + if PyBuffer_IsContiguous(view, b'A') == 0: + PyBuffer_Release(view) + # create a contiguous copy and get buffer + contiguous = PyMemoryView_GetContiguous(obj, PyBUF_READ, b'C') + PyObject_GetBuffer(contiguous, view, PyBUF_SIMPLE) + # view must hold the only reference to contiguous, + # so memory is freed when view is released + Py_DECREF(contiguous) + buffer_len[0] = view.len + buf[0] = view.buf + return 1 + def unpackb(object packed, *, object object_hook=None, object list_hook=None, bint use_list=True, bint raw=True, bint strict_map_key=False, @@ -172,12 +160,11 @@ def unpackb(object packed, *, object object_hook=None, object list_hook=None, cdef char* buf = NULL cdef Py_ssize_t buf_len cdef const char* cerr = NULL - cdef int new_protocol = 0 if unicode_errors is not None: cerr = unicode_errors - get_data_from_buffer(packed, &view, &buf, &buf_len, &new_protocol) + get_data_from_buffer(packed, &view, &buf, &buf_len) if max_str_len == -1: max_str_len = buf_len @@ -196,8 +183,7 @@ def unpackb(object packed, *, object object_hook=None, object list_hook=None, max_str_len, max_bin_len, max_array_len, max_map_len, max_ext_len) ret = unpack_construct(&ctx, buf, buf_len, &off) finally: - if new_protocol: - PyBuffer_Release(&view); + PyBuffer_Release(&view); if ret == 1: obj = unpack_data(&ctx) @@ -392,7 +378,6 @@ cdef class Unpacker(object): def feed(self, object next_bytes): """Append `next_bytes` to internal buffer.""" cdef Py_buffer pybuff - cdef int new_protocol = 0 cdef char* buf cdef Py_ssize_t buf_len @@ -400,12 +385,11 @@ cdef class Unpacker(object): raise AssertionError( "unpacker.feed() is not be able to use with `file_like`.") - get_data_from_buffer(next_bytes, &pybuff, &buf, &buf_len, &new_protocol) + get_data_from_buffer(next_bytes, &pybuff, &buf, &buf_len) try: self.append_buffer(buf, buf_len) finally: - if new_protocol: - PyBuffer_Release(&pybuff) + PyBuffer_Release(&pybuff) cdef append_buffer(self, void* _buf, Py_ssize_t _buf_len): cdef: From 9f4b2d53b77c5ccd96e3ceb359747960cbf03bd4 Mon Sep 17 00:00:00 2001 From: Inada Naoki Date: Thu, 5 Dec 2019 20:47:01 +0900 Subject: [PATCH 201/349] Remove deprecated submodule unpack (#385) --- msgpack/_unpacker.pyx | 8 -------- msgpack/fallback.py | 10 ---------- 2 files changed, 18 deletions(-) diff --git a/msgpack/_unpacker.pyx b/msgpack/_unpacker.pyx index 3c9b7b37..3a9d494a 100644 --- a/msgpack/_unpacker.pyx +++ b/msgpack/_unpacker.pyx @@ -200,14 +200,6 @@ def unpackb(object packed, *, object object_hook=None, object list_hook=None, raise ValueError("Unpack failed: error = %d" % (ret,)) -def unpack(object stream, **kwargs): - PyErr_WarnEx( - DeprecationWarning, - "Direct calling implementation's unpack() is deprecated, Use msgpack.unpack() or unpackb() instead.", 1) - data = stream.read() - return unpackb(data, **kwargs) - - cdef class Unpacker(object): """Streaming unpacker. diff --git a/msgpack/fallback.py b/msgpack/fallback.py index 577e571a..3faacbfc 100644 --- a/msgpack/fallback.py +++ b/msgpack/fallback.py @@ -131,16 +131,6 @@ def _get_data_from_buffer(obj): return view -def unpack(stream, **kwargs): - warnings.warn( - "Direct calling implementation's unpack() is deprecated, Use msgpack.unpack() or unpackb() instead.", - DeprecationWarning, - stacklevel=2, - ) - data = stream.read() - return unpackb(data, **kwargs) - - def unpackb(packed, **kwargs): """ Unpack an object from `packed`. From de320488ae494b85a03b60dd33f91b650033d775 Mon Sep 17 00:00:00 2001 From: Inada Naoki Date: Thu, 5 Dec 2019 20:47:20 +0900 Subject: [PATCH 202/349] fallback: Remove old buffer protocol support (#384) --- msgpack/fallback.py | 16 +--------------- test/test_buffer.py | 8 ++++---- 2 files changed, 5 insertions(+), 19 deletions(-) diff --git a/msgpack/fallback.py b/msgpack/fallback.py index 3faacbfc..9de35535 100644 --- a/msgpack/fallback.py +++ b/msgpack/fallback.py @@ -111,21 +111,7 @@ def _check_type_strict(obj, t, type=type, tuple=tuple): def _get_data_from_buffer(obj): - try: - view = memoryview(obj) - except TypeError: - # try to use legacy buffer protocol if 2.7, otherwise re-raise - if PY2: - view = memoryview(buffer(obj)) - warnings.warn( - "using old buffer interface to unpack %s; " - "this leads to unpacking errors if slicing is used and " - "will be removed in a future version" % type(obj), - RuntimeWarning, - stacklevel=3, - ) - else: - raise + view = memoryview(obj) if view.itemsize != 1: raise ValueError("cannot unpack from multi-byte object") return view diff --git a/test/test_buffer.py b/test/test_buffer.py index 64fbdefb..da68b27e 100644 --- a/test/test_buffer.py +++ b/test/test_buffer.py @@ -1,17 +1,17 @@ #!/usr/bin/env python # coding: utf-8 +import sys +import pytest from msgpack import packb, unpackb +@pytest.mark.skipif(sys.version_info[0] == 2, reason="Python 2 is not supported") def test_unpack_buffer(): from array import array buf = array("b") - try: - buf.frombytes(packb((b"foo", b"bar"))) - except AttributeError: # PY2 - buf.fromstring(packb((b"foo", b"bar"))) + buf.frombytes(packb((b"foo", b"bar"))) obj = unpackb(buf, use_list=1) assert [b"foo", b"bar"] == obj From 7e9905bdfaecde83ddb1a4575e734a10b055fde9 Mon Sep 17 00:00:00 2001 From: Inada Naoki Date: Thu, 5 Dec 2019 21:34:10 +0900 Subject: [PATCH 203/349] Use new msgpack spec by default. (#386) --- README.rst | 70 ++++++++++++----------------------------- msgpack/_packer.pyx | 6 ++-- msgpack/_unpacker.pyx | 16 ++++------ msgpack/fallback.py | 20 +++++------- test/test_buffer.py | 4 +-- test/test_case.py | 11 +++---- test/test_format.py | 10 ++++-- test/test_memoryview.py | 39 +++++++---------------- test/test_newspec.py | 6 ++-- test/test_read_size.py | 10 +++--- test/test_sequnpack.py | 9 +++--- 11 files changed, 75 insertions(+), 126 deletions(-) diff --git a/README.rst b/README.rst index 82b6c02a..f9f074fe 100644 --- a/README.rst +++ b/README.rst @@ -37,36 +37,16 @@ Sadly, this doesn't work for upgrade install. After `pip install -U msgpack-pyt msgpack is removed and `import msgpack` fail. -Deprecating encoding option -^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +Compatibility with old format +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ -encoding and unicode_errors options are deprecated. +You can use ``use_bin_type=False`` option to pack ``bytes`` +object into raw type in old msgpack spec, instead of bin type in new msgpack spec. -In case of packer, use UTF-8 always. Storing other than UTF-8 is not recommended. +You can unpack old msgpack formatk using ``raw=True`` option. +It unpacks str (raw) type in msgpack into Python bytes. -For backward compatibility, you can use ``use_bin_type=False`` and pack ``bytes`` -object into msgpack raw type. - -In case of unpacker, there is new ``raw`` option. It is ``True`` by default -for backward compatibility, but it is changed to ``False`` in near future. -You can use ``raw=False`` instead of ``encoding='utf-8'``. - -Planned backward incompatible changes -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - -When msgpack 1.0, I planning these breaking changes: - -* packer and unpacker: Remove ``encoding`` and ``unicode_errors`` option. -* packer: Change default of ``use_bin_type`` option from False to True. -* unpacker: Change default of ``raw`` option from True to False. -* unpacker: Reduce all ``max_xxx_len`` options for typical usage. -* unpacker: Remove ``write_bytes`` option from all methods. - -To avoid these breaking changes breaks your application, please: - -* Don't use deprecated options. -* Pass ``use_bin_type`` and ``raw`` options explicitly. -* If your application handle large (>1MB) data, specify ``max_xxx_len`` options too. +See note in below for detail. Install @@ -76,6 +56,7 @@ Install $ pip install msgpack + Pure Python implementation ^^^^^^^^^^^^^^^^^^^^^^^^^^ @@ -100,6 +81,13 @@ Without extension, using pure Python implementation on CPython runs slowly. How to use ---------- +.. note:: + + In examples below, I use ``raw=False`` and ``use_bin_type=True`` for users + using msgpack < 1.0. + These options are default from msgpack 1.0 so you can omit them. + + One-shot pack & unpack ^^^^^^^^^^^^^^^^^^^^^^ @@ -252,36 +240,18 @@ Notes string and binary type ^^^^^^^^^^^^^^^^^^^^^^ -Early versions of msgpack didn't distinguish string and binary types (like Python 1). +Early versions of msgpack didn't distinguish string and binary types. The type for representing both string and binary types was named **raw**. -For backward compatibility reasons, msgpack-python will still default all -strings to byte strings, unless you specify the ``use_bin_type=True`` option in -the packer. If you do so, it will use a non-standard type called **bin** to -serialize byte arrays, and **raw** becomes to mean **str**. If you want to -distinguish **bin** and **raw** in the unpacker, specify ``raw=False``. - -Note that Python 2 defaults to byte-arrays over Unicode strings: - -.. code-block:: pycon - - >>> import msgpack - >>> msgpack.unpackb(msgpack.packb([b'spam', u'eggs'])) - ['spam', 'eggs'] - >>> msgpack.unpackb(msgpack.packb([b'spam', u'eggs'], use_bin_type=True), - raw=False) - ['spam', u'eggs'] - -This is the same code in Python 3 (same behaviour, but Python 3 has a -different default): +You can pack into and unpack from this old spec using ``use_bin_type=False`` +and ``raw=True`` options. .. code-block:: pycon >>> import msgpack - >>> msgpack.unpackb(msgpack.packb([b'spam', u'eggs'])) + >>> msgpack.unpackb(msgpack.packb([b'spam', u'eggs'], use_bin_type=False), raw=True) [b'spam', b'eggs'] - >>> msgpack.unpackb(msgpack.packb([b'spam', u'eggs'], use_bin_type=True), - raw=False) + >>> msgpack.unpackb(msgpack.packb([b'spam', u'eggs'], use_bin_type=True), raw=False) [b'spam', 'eggs'] diff --git a/msgpack/_packer.pyx b/msgpack/_packer.pyx index f3bde3f5..8cf3c05e 100644 --- a/msgpack/_packer.pyx +++ b/msgpack/_packer.pyx @@ -80,9 +80,7 @@ cdef class Packer(object): :param bool use_bin_type: Use bin type introduced in msgpack spec 2.0 for bytes. - It also enables str8 type for unicode. - Current default value is false, but it will be changed to true - in future version. You should specify it explicitly. + It also enables str8 type for unicode. (default: True) :param bool strict_types: If set to true, types will be checked to be exact. Derived classes @@ -113,7 +111,7 @@ cdef class Packer(object): self.pk.length = 0 def __init__(self, *, default=None, unicode_errors=None, - bint use_single_float=False, bint autoreset=True, bint use_bin_type=False, + bint use_single_float=False, bint autoreset=True, bint use_bin_type=True, bint strict_types=False): self.use_float = use_single_float self.strict_types = strict_types diff --git a/msgpack/_unpacker.pyx b/msgpack/_unpacker.pyx index 3a9d494a..f10e99d7 100644 --- a/msgpack/_unpacker.pyx +++ b/msgpack/_unpacker.pyx @@ -131,7 +131,7 @@ cdef inline int get_data_from_buffer(object obj, def unpackb(object packed, *, object object_hook=None, object list_hook=None, - bint use_list=True, bint raw=True, bint strict_map_key=False, + bint use_list=True, bint raw=False, bint strict_map_key=False, unicode_errors=None, object_pairs_hook=None, ext_hook=ExtType, Py_ssize_t max_str_len=-1, @@ -217,12 +217,8 @@ cdef class Unpacker(object): Otherwise, unpack to Python tuple. (default: True) :param bool raw: - If true, unpack msgpack raw to Python bytes (default). - Otherwise, unpack to Python str (or unicode on Python 2) by decoding - with UTF-8 encoding (recommended). - Currently, the default is true, but it will be changed to false in - near future. So you must specify it explicitly for keeping backward - compatibility. + If true, unpack msgpack raw to Python bytes. + Otherwise, unpack to Python str by decoding with UTF-8 encoding (default). :param bool strict_map_key: If true, only str or bytes are accepted for map (dict) keys. @@ -268,13 +264,13 @@ cdef class Unpacker(object): Example of streaming deserialize from file-like object:: - unpacker = Unpacker(file_like, raw=False, max_buffer_size=10*1024*1024) + unpacker = Unpacker(file_like, max_buffer_size=10*1024*1024) for o in unpacker: process(o) Example of streaming deserialize from socket:: - unpacker = Unpacker(raw=False, max_buffer_size=10*1024*1024) + unpacker = Unpacker(max_buffer_size=10*1024*1024) while True: buf = sock.recv(1024**2) if not buf: @@ -309,7 +305,7 @@ cdef class Unpacker(object): self.buf = NULL def __init__(self, file_like=None, *, Py_ssize_t read_size=0, - bint use_list=True, bint raw=True, bint strict_map_key=False, + bint use_list=True, bint raw=False, bint strict_map_key=False, object object_hook=None, object object_pairs_hook=None, object list_hook=None, unicode_errors=None, Py_ssize_t max_buffer_size=0, object ext_hook=ExtType, diff --git a/msgpack/fallback.py b/msgpack/fallback.py index 9de35535..fa2f3a8c 100644 --- a/msgpack/fallback.py +++ b/msgpack/fallback.py @@ -158,7 +158,7 @@ def _unpack_from(f, b, o=0): class Unpacker(object): """Streaming unpacker. - arguments: + Arguments: :param file_like: File-like object having `.read(n)` method. @@ -172,12 +172,8 @@ class Unpacker(object): Otherwise, unpack to Python tuple. (default: True) :param bool raw: - If true, unpack msgpack raw to Python bytes (default). - Otherwise, unpack to Python str (or unicode on Python 2) by decoding - with UTF-8 encoding (recommended). - Currently, the default is true, but it will be changed to false in - near future. So you must specify it explicitly for keeping backward - compatibility. + If true, unpack msgpack raw to Python bytes. + Otherwise, unpack to Python str by decoding with UTF-8 encoding (default). :param bool strict_map_key: If true, only str or bytes are accepted for map (dict) keys. @@ -226,13 +222,13 @@ class Unpacker(object): Example of streaming deserialize from file-like object:: - unpacker = Unpacker(file_like, raw=False, max_buffer_size=10*1024*1024) + unpacker = Unpacker(file_like, max_buffer_size=10*1024*1024) for o in unpacker: process(o) Example of streaming deserialize from socket:: - unpacker = Unpacker(raw=False, max_buffer_size=10*1024*1024) + unpacker = Unpacker(max_buffer_size=10*1024*1024) while True: buf = sock.recv(1024**2) if not buf: @@ -253,7 +249,7 @@ def __init__( file_like=None, read_size=0, use_list=True, - raw=True, + raw=False, strict_map_key=False, object_hook=None, object_pairs_hook=None, @@ -748,7 +744,7 @@ class Packer(object): :param bool use_bin_type: Use bin type introduced in msgpack spec 2.0 for bytes. - It also enables str8 type for unicode. + It also enables str8 type for unicode. (default: True) :param bool strict_types: If set to true, types will be checked to be exact. Derived classes @@ -769,7 +765,7 @@ def __init__( unicode_errors=None, use_single_float=False, autoreset=True, - use_bin_type=False, + use_bin_type=True, strict_types=False, ): self._strict_types = strict_types diff --git a/test/test_buffer.py b/test/test_buffer.py index da68b27e..62507cf4 100644 --- a/test/test_buffer.py +++ b/test/test_buffer.py @@ -17,7 +17,7 @@ def test_unpack_buffer(): def test_unpack_bytearray(): - buf = bytearray(packb(("foo", "bar"))) + buf = bytearray(packb((b"foo", b"bar"))) obj = unpackb(buf, use_list=1) assert [b"foo", b"bar"] == obj expected_type = bytes @@ -25,7 +25,7 @@ def test_unpack_bytearray(): def test_unpack_memoryview(): - buf = bytearray(packb(("foo", "bar"))) + buf = bytearray(packb((b"foo", b"bar"))) view = memoryview(buf) obj = unpackb(view, use_list=1) assert [b"foo", b"bar"] == obj diff --git a/test/test_case.py b/test/test_case.py index 3bc1b26d..3e60e59e 100644 --- a/test/test_case.py +++ b/test/test_case.py @@ -1,13 +1,12 @@ #!/usr/bin/env python # coding: utf-8 - from msgpack import packb, unpackb -def check(length, obj): - v = packb(obj) +def check(length, obj, use_bin_type=True): + v = packb(obj, use_bin_type=use_bin_type) assert len(v) == length, "%r length should be %r but get %r" % (obj, length, len(v)) - assert unpackb(v, use_list=0) == obj + assert unpackb(v, use_list=0, raw=not use_bin_type) == obj def test_1(): @@ -56,7 +55,7 @@ def test_9(): def check_raw(overhead, num): - check(num + overhead, b" " * num) + check(num + overhead, b" " * num, use_bin_type=False) def test_fixraw(): @@ -135,4 +134,4 @@ def test_match(): def test_unicode(): - assert unpackb(packb("foobar"), use_list=1) == b"foobar" + assert unpackb(packb(u"foobar"), use_list=1) == u"foobar" diff --git a/test/test_format.py b/test/test_format.py index c2cdfbd6..8c2f03ff 100644 --- a/test/test_format.py +++ b/test/test_format.py @@ -4,8 +4,8 @@ from msgpack import unpackb -def check(src, should, use_list=0): - assert unpackb(src, use_list=use_list) == should +def check(src, should, use_list=0, raw=True): + assert unpackb(src, use_list=use_list, raw=raw) == should def testSimpleValue(): @@ -59,6 +59,12 @@ def testRaw(): b"\x00\x00\xdb\x00\x00\x00\x01a\xdb\x00\x00\x00\x02ab", (b"", b"a", b"ab", b"", b"a", b"ab"), ) + check( + b"\x96\xda\x00\x00\xda\x00\x01a\xda\x00\x02ab\xdb\x00\x00" + b"\x00\x00\xdb\x00\x00\x00\x01a\xdb\x00\x00\x00\x02ab", + ("", "a", "ab", "", "a", "ab"), + raw=False, + ) def testArray(): diff --git a/test/test_memoryview.py b/test/test_memoryview.py index e1b63b8f..86b2c1f7 100644 --- a/test/test_memoryview.py +++ b/test/test_memoryview.py @@ -1,50 +1,33 @@ #!/usr/bin/env python # coding: utf-8 +import pytest from array import array from msgpack import packb, unpackb import sys -# For Python < 3: -# - array type only supports old buffer interface -# - array.frombytes is not available, must use deprecated array.fromstring -if sys.version_info[0] < 3: +pytestmark = pytest.mark.skipif( + sys.version_info[0] < 3, reason="Only Python 3 supports buffer protocol" +) - def make_memoryview(obj): - return memoryview(buffer(obj)) - def make_array(f, data): - a = array(f) - a.fromstring(data) - return a - - def get_data(a): - return a.tostring() - - -else: - make_memoryview = memoryview - - def make_array(f, data): - a = array(f) - a.frombytes(data) - return a - - def get_data(a): - return a.tobytes() +def make_array(f, data): + a = array(f) + a.frombytes(data) + return a def _runtest(format, nbytes, expected_header, expected_prefix, use_bin_type): # create a new array original_array = array(format) original_array.fromlist([255] * (nbytes // original_array.itemsize)) - original_data = get_data(original_array) - view = make_memoryview(original_array) + original_data = original_array.tobytes() + view = memoryview(original_array) # pack, unpack, and reconstruct array packed = packb(view, use_bin_type=use_bin_type) - unpacked = unpackb(packed) + unpacked = unpackb(packed, raw=(not use_bin_type)) reconstructed_array = make_array(format, unpacked) # check that we got the right amount of data diff --git a/test/test_newspec.py b/test/test_newspec.py index f4f2a238..b7da486e 100644 --- a/test/test_newspec.py +++ b/test/test_newspec.py @@ -10,14 +10,16 @@ def test_str8(): assert len(b) == len(data) + 2 assert b[0:2] == header + b"\x20" assert b[2:] == data - assert unpackb(b) == data + assert unpackb(b, raw=True) == data + assert unpackb(b, raw=False) == data.decode() data = b"x" * 255 b = packb(data.decode(), use_bin_type=True) assert len(b) == len(data) + 2 assert b[0:2] == header + b"\xff" assert b[2:] == data - assert unpackb(b) == data + assert unpackb(b, raw=True) == data + assert unpackb(b, raw=False) == data.decode() def test_bin8(): diff --git a/test/test_read_size.py b/test/test_read_size.py index 8d8df642..33a7e7dd 100644 --- a/test/test_read_size.py +++ b/test/test_read_size.py @@ -8,9 +8,9 @@ def test_read_array_header(): unpacker = Unpacker() unpacker.feed(packb(["a", "b", "c"])) assert unpacker.read_array_header() == 3 - assert unpacker.unpack() == b"a" - assert unpacker.unpack() == b"b" - assert unpacker.unpack() == b"c" + assert unpacker.unpack() == "a" + assert unpacker.unpack() == "b" + assert unpacker.unpack() == "c" try: unpacker.unpack() assert 0, "should raise exception" @@ -22,8 +22,8 @@ def test_read_map_header(): unpacker = Unpacker() unpacker.feed(packb({"a": "A"})) assert unpacker.read_map_header() == 1 - assert unpacker.unpack() == b"a" - assert unpacker.unpack() == b"A" + assert unpacker.unpack() == "a" + assert unpacker.unpack() == "A" try: unpacker.unpack() assert 0, "should raise exception" diff --git a/test/test_sequnpack.py b/test/test_sequnpack.py index e5765716..9b694790 100644 --- a/test/test_sequnpack.py +++ b/test/test_sequnpack.py @@ -1,6 +1,5 @@ #!/usr/bin/env python # coding: utf-8 - import io from msgpack import Unpacker, BufferFull from msgpack import pack @@ -26,7 +25,7 @@ def test_partialdata(): with raises(StopIteration): next(iter(unpacker)) unpacker.feed(b"o") - assert next(iter(unpacker)) == b"hallo" + assert next(iter(unpacker)) == "hallo" def test_foobar(): @@ -98,13 +97,13 @@ def test_readbytes(): def test_issue124(): unpacker = Unpacker() unpacker.feed(b"\xa1?\xa1!") - assert tuple(unpacker) == (b"?", b"!") + assert tuple(unpacker) == ("?", "!") assert tuple(unpacker) == () unpacker.feed(b"\xa1?\xa1") - assert tuple(unpacker) == (b"?",) + assert tuple(unpacker) == ("?",) assert tuple(unpacker) == () unpacker.feed(b"!") - assert tuple(unpacker) == (b"!",) + assert tuple(unpacker) == ("!",) assert tuple(unpacker) == () From 235c6036eabd89d5a28244091ac1e4f091b49679 Mon Sep 17 00:00:00 2001 From: Inada Naoki Date: Fri, 6 Dec 2019 19:28:23 +0900 Subject: [PATCH 204/349] travis: Use codecov (#387) --- .travis.yml | 48 +++++++++++++++++++++++++++--------------------- 1 file changed, 27 insertions(+), 21 deletions(-) diff --git a/.travis.yml b/.travis.yml index 9d3ae749..378bc809 100644 --- a/.travis.yml +++ b/.travis.yml @@ -12,11 +12,20 @@ python: - "3.7" - "3.8-dev" + +_pure: &pure + install: + - pip install -U pip + - pip install -U pytest pytest-cov codecov + - pip install . + script: + - pytest --cov=msgpack -v test + matrix: include: - name: Black language: python - python: 3.8 + python: 3.7 install: - pip install black script: @@ -35,38 +44,35 @@ matrix: - docker pull $DOCKER_IMAGE script: - docker run --rm -v `pwd`:/io -w /io $DOCKER_IMAGE /io/docker/runtests.sh + + - name: "Python 2 (fallback)" + python: "2.7" + <<: *pure + - name: "pypy2.7" python: "pypy2.7-7.1.1" - install: - - pip install -e . - script: - - py.test -v test + <<: *pure + - name: "pypy3" python: "pypy3.6-7.1.1" - install: - - pip install -e . - script: - - pytest -v test - - name: "Python 2 (fallback)" - python: "2.7" - install: - - pip install -U pip - - pip install -U pytest - - pip install . - script: - - pytest -v test + <<: *pure install: - pip install -U pip - - pip install -U pytest - - pip install -r requirements.txt + - pip install -U pytest pytest-cov codecov + - pip install -r requirements.txt # Cython - make cython - pip install -e . script: - python -c 'import sys; print(hex(sys.maxsize))' - python -c 'from msgpack import _cmsgpack' - - pytest -v test - - MSGPACK_PUREPYTHON=x pytest -v test + - pytest --cov=msgpack -v test + - MSGPACK_PUREPYTHON=x pytest --cov=msgpack -v test + +after_success: + - if [ -f .coverage ]; then + codecov; + fi # vim: sw=2 ts=2 From 7a8ce0f9ca910a851b6835d26b1d6970a188fa4e Mon Sep 17 00:00:00 2001 From: Inada Naoki Date: Fri, 6 Dec 2019 20:34:18 +0900 Subject: [PATCH 205/349] Remove unused import --- Makefile | 2 +- msgpack/fallback.py | 1 - 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/Makefile b/Makefile index a1edc883..f8971cc3 100644 --- a/Makefile +++ b/Makefile @@ -4,7 +4,7 @@ all: cython .PHONY: black black: - black . + black msgpack/ test/ .PHONY: cython cython: diff --git a/msgpack/fallback.py b/msgpack/fallback.py index fa2f3a8c..388a5ab4 100644 --- a/msgpack/fallback.py +++ b/msgpack/fallback.py @@ -2,7 +2,6 @@ import sys import struct -import warnings PY2 = sys.version_info[0] == 2 From f6f6f328eb2d7b7f1272fa7addb31d2ac5bef207 Mon Sep 17 00:00:00 2001 From: Inada Naoki Date: Fri, 6 Dec 2019 21:16:27 +0900 Subject: [PATCH 206/349] Fix fallback Unpacker.read() (#388) Fixes #352. --- msgpack/fallback.py | 4 +++- test/test_sequnpack.py | 9 +++++++++ 2 files changed, 12 insertions(+), 1 deletion(-) diff --git a/msgpack/fallback.py b/msgpack/fallback.py index 388a5ab4..85a711b6 100644 --- a/msgpack/fallback.py +++ b/msgpack/fallback.py @@ -357,7 +357,9 @@ def _get_extradata(self): return self._buffer[self._buff_i :] def read_bytes(self, n): - return self._read(n) + ret = self._read(n) + self._consume() + return ret def _read(self, n): # (int) -> bytearray diff --git a/test/test_sequnpack.py b/test/test_sequnpack.py index 9b694790..ad29de84 100644 --- a/test/test_sequnpack.py +++ b/test/test_sequnpack.py @@ -93,6 +93,15 @@ def test_readbytes(): assert unpacker.unpack() == ord(b"a") assert unpacker.unpack() == ord(b"r") + # Issue 352 + u = Unpacker() + u.feed(b"x") + assert bytes(u.read_bytes(1)) == b"x" + with raises(StopIteration): + next(u) + u.feed(b"\1") + assert next(u) == 1 + def test_issue124(): unpacker = Unpacker() From 5ba496c79a45c6938f3e850718349cfa51cc38ae Mon Sep 17 00:00:00 2001 From: Inada Naoki Date: Fri, 6 Dec 2019 21:23:54 +0900 Subject: [PATCH 207/349] Move Black from Travis to Github Actions (#390) --- .github/workflows/black.yaml | 21 +++++++++++++++++++++ .travis.yml | 8 -------- 2 files changed, 21 insertions(+), 8 deletions(-) create mode 100644 .github/workflows/black.yaml diff --git a/.github/workflows/black.yaml b/.github/workflows/black.yaml new file mode 100644 index 00000000..cabd0ccf --- /dev/null +++ b/.github/workflows/black.yaml @@ -0,0 +1,21 @@ +name: Black + +on: ["push", "pull_request"] + +jobs: + black: + runs-on: ubuntu-latest + steps: + - name: Setup Python + uses: actions/setup-python@v1 + with: + python-version: '3.x' + architecture: 'x64' + + - name: Checkout + uses: actions/checkout@v1 + + - name: Black Code Formatter + run: | + pip install black + black --diff --check msgpack/ test/ diff --git a/.travis.yml b/.travis.yml index 378bc809..39db1d74 100644 --- a/.travis.yml +++ b/.travis.yml @@ -23,14 +23,6 @@ _pure: &pure matrix: include: - - name: Black - language: python - python: 3.7 - install: - - pip install black - script: - - black --check --diff . - - name: 32bit build language: python services: From 0fc0eb2f16fcc7d0271792f93a90af389f66dafb Mon Sep 17 00:00:00 2001 From: Inada Naoki Date: Fri, 6 Dec 2019 21:26:28 +0900 Subject: [PATCH 208/349] Update README --- README.rst | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/README.rst b/README.rst index f9f074fe..c13267b6 100644 --- a/README.rst +++ b/README.rst @@ -34,19 +34,19 @@ I upload transitional package (msgpack-python 0.5 which depending on msgpack) for smooth transition from msgpack-python to msgpack. Sadly, this doesn't work for upgrade install. After `pip install -U msgpack-python`, -msgpack is removed and `import msgpack` fail. +msgpack is removed, and `import msgpack` fail. -Compatibility with old format -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +Compatibility with the old format +^^^^^^^^^^^^^^^^^^^^^^----^^^^^^^ You can use ``use_bin_type=False`` option to pack ``bytes`` -object into raw type in old msgpack spec, instead of bin type in new msgpack spec. +object into raw type in the old msgpack spec, instead of bin type in new msgpack spec. -You can unpack old msgpack formatk using ``raw=True`` option. +You can unpack old msgpack format using ``raw=True`` option. It unpacks str (raw) type in msgpack into Python bytes. -See note in below for detail. +See note below for detail. Install @@ -67,7 +67,7 @@ But msgpack provides a pure Python implementation (``msgpack.fallback``) for PyPy and Python 2. Since the [pip](https://pip.pypa.io/) uses the pure Python implementation, -Python 2 support will not be dropped in foreseeable feature. +Python 2 support will not be dropped in the foreseeable future. Windows From d8e3cf0563989a660398318a7c788645124e1d8b Mon Sep 17 00:00:00 2001 From: Inada Naoki Date: Fri, 6 Dec 2019 22:23:15 +0900 Subject: [PATCH 209/349] Make strict_map_key default to True (#392) --- msgpack/_unpacker.pyx | 8 +++----- msgpack/fallback.py | 6 ++---- test/test_case.py | 2 +- test/test_format.py | 2 +- test/test_limits.py | 4 ++-- test/test_obj.py | 9 ++++++--- test/test_pack.py | 6 +++--- test/test_sequnpack.py | 2 +- 8 files changed, 19 insertions(+), 20 deletions(-) diff --git a/msgpack/_unpacker.pyx b/msgpack/_unpacker.pyx index f10e99d7..53ecf860 100644 --- a/msgpack/_unpacker.pyx +++ b/msgpack/_unpacker.pyx @@ -131,7 +131,7 @@ cdef inline int get_data_from_buffer(object obj, def unpackb(object packed, *, object object_hook=None, object list_hook=None, - bint use_list=True, bint raw=False, bint strict_map_key=False, + bint use_list=True, bint raw=False, bint strict_map_key=True, unicode_errors=None, object_pairs_hook=None, ext_hook=ExtType, Py_ssize_t max_str_len=-1, @@ -221,9 +221,7 @@ cdef class Unpacker(object): Otherwise, unpack to Python str by decoding with UTF-8 encoding (default). :param bool strict_map_key: - If true, only str or bytes are accepted for map (dict) keys. - It's False by default for backward-compatibility. - But it will be True from msgpack 1.0. + If true (default), only str or bytes are accepted for map (dict) keys. :param callable object_hook: When specified, it should be callable. @@ -305,7 +303,7 @@ cdef class Unpacker(object): self.buf = NULL def __init__(self, file_like=None, *, Py_ssize_t read_size=0, - bint use_list=True, bint raw=False, bint strict_map_key=False, + bint use_list=True, bint raw=False, bint strict_map_key=True, object object_hook=None, object object_pairs_hook=None, object list_hook=None, unicode_errors=None, Py_ssize_t max_buffer_size=0, object ext_hook=ExtType, diff --git a/msgpack/fallback.py b/msgpack/fallback.py index 85a711b6..7df92f57 100644 --- a/msgpack/fallback.py +++ b/msgpack/fallback.py @@ -175,9 +175,7 @@ class Unpacker(object): Otherwise, unpack to Python str by decoding with UTF-8 encoding (default). :param bool strict_map_key: - If true, only str or bytes are accepted for map (dict) keys. - It's False by default for backward-compatibility. - But it will be True from msgpack 1.0. + If true (default), only str or bytes are accepted for map (dict) keys. :param callable object_hook: When specified, it should be callable. @@ -249,7 +247,7 @@ def __init__( read_size=0, use_list=True, raw=False, - strict_map_key=False, + strict_map_key=True, object_hook=None, object_pairs_hook=None, list_hook=None, diff --git a/test/test_case.py b/test/test_case.py index 3e60e59e..a0a3c5ad 100644 --- a/test/test_case.py +++ b/test/test_case.py @@ -92,7 +92,7 @@ def test_array32(): def match(obj, buf): assert packb(obj) == buf - assert unpackb(buf, use_list=0) == obj + assert unpackb(buf, use_list=0, strict_map_key=False) == obj def test_match(): diff --git a/test/test_format.py b/test/test_format.py index 8c2f03ff..d455f7ce 100644 --- a/test/test_format.py +++ b/test/test_format.py @@ -5,7 +5,7 @@ def check(src, should, use_list=0, raw=True): - assert unpackb(src, use_list=use_list, raw=raw) == should + assert unpackb(src, use_list=use_list, raw=raw, strict_map_key=False) == should def testSimpleValue(): diff --git a/test/test_limits.py b/test/test_limits.py index 6e850302..65e6bcc7 100644 --- a/test/test_limits.py +++ b/test/test_limits.py @@ -87,11 +87,11 @@ def test_max_map_len(): d = {1: 2, 3: 4, 5: 6} packed = packb(d) - unpacker = Unpacker(max_map_len=3) + unpacker = Unpacker(max_map_len=3, strict_map_key=False) unpacker.feed(packed) assert unpacker.unpack() == d - unpacker = Unpacker(max_map_len=2) + unpacker = Unpacker(max_map_len=2, strict_map_key=False) with pytest.raises(UnpackValueError): unpacker.feed(packed) unpacker.unpack() diff --git a/test/test_obj.py b/test/test_obj.py index 0b99ceab..86c557cd 100644 --- a/test/test_obj.py +++ b/test/test_obj.py @@ -33,7 +33,10 @@ def test_decode_pairs_hook(): packed = packb([3, {1: 2, 3: 4}]) prod_sum = 1 * 2 + 3 * 4 unpacked = unpackb( - packed, object_pairs_hook=lambda l: sum(k * v for k, v in l), use_list=1 + packed, + object_pairs_hook=lambda l: sum(k * v for k, v in l), + use_list=1, + strict_map_key=False, ) assert unpacked[1] == prod_sum @@ -70,10 +73,10 @@ def bad_complex_decoder(o): def test_an_exception_in_objecthook1(): with raises(DecodeError): packed = packb({1: {"__complex__": True, "real": 1, "imag": 2}}) - unpackb(packed, object_hook=bad_complex_decoder) + unpackb(packed, object_hook=bad_complex_decoder, strict_map_key=False) def test_an_exception_in_objecthook2(): with raises(DecodeError): packed = packb({1: [{"__complex__": True, "real": 1, "imag": 2}]}) - unpackb(packed, list_hook=bad_complex_decoder, use_list=1) + unpackb(packed, list_hook=bad_complex_decoder, use_list=1, strict_map_key=False) diff --git a/test/test_pack.py b/test/test_pack.py index de212efa..932f7608 100644 --- a/test/test_pack.py +++ b/test/test_pack.py @@ -14,7 +14,7 @@ def check(data, use_list=False): - re = unpackb(packb(data), use_list=use_list) + re = unpackb(packb(data), use_list=use_list, strict_map_key=False) assert re == data @@ -166,7 +166,7 @@ def testMapSize(sizes=[0, 5, 50, 1000]): bio.write(packer.pack(i * 2)) # value bio.seek(0) - unpacker = Unpacker(bio) + unpacker = Unpacker(bio, strict_map_key=False) for size in sizes: assert unpacker.unpack() == dict((i, i * 2) for i in range(size)) @@ -186,7 +186,7 @@ def test_pairlist(): pairlist = [(b"a", 1), (2, b"b"), (b"foo", b"bar")] packer = Packer() packed = packer.pack_map_pairs(pairlist) - unpacked = unpackb(packed, object_pairs_hook=list) + unpacked = unpackb(packed, object_pairs_hook=list, strict_map_key=False) assert pairlist == unpacked diff --git a/test/test_sequnpack.py b/test/test_sequnpack.py index ad29de84..6293a453 100644 --- a/test/test_sequnpack.py +++ b/test/test_sequnpack.py @@ -132,7 +132,7 @@ def test_unpack_tell(): pack(m, stream) offsets.append(stream.tell()) stream.seek(0) - unpacker = Unpacker(stream) + unpacker = Unpacker(stream, strict_map_key=False) for m, o in zip(messages, offsets): m2 = next(unpacker) assert m == m2 From 5399f8180d23c147b1243d7c39aa19f9a8ba840a Mon Sep 17 00:00:00 2001 From: Inada Naoki Date: Mon, 9 Dec 2019 17:02:35 +0900 Subject: [PATCH 210/349] Update README (#393) --- README.rst | 55 +++++++++++++++++++++++++++++++++++---------- msgpack/fallback.py | 4 ++-- 2 files changed, 45 insertions(+), 14 deletions(-) diff --git a/README.rst b/README.rst index c13267b6..d01e963e 100644 --- a/README.rst +++ b/README.rst @@ -38,7 +38,7 @@ msgpack is removed, and `import msgpack` fail. Compatibility with the old format -^^^^^^^^^^^^^^^^^^^^^^----^^^^^^^ +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ You can use ``use_bin_type=False`` option to pack ``bytes`` object into raw type in the old msgpack spec, instead of bin type in new msgpack spec. @@ -49,6 +49,32 @@ It unpacks str (raw) type in msgpack into Python bytes. See note below for detail. +Major breaking changes in msgpack 1.0 +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +* Python 2 + + * The extension module does not support Python 2 anymore. + The pure Python implementation (``msgpack.fallback``) is used for Python 2. + +* Packer + + * ``use_bin_type=True`` by default. bytes are encoded in bin type in msgpack. + **If you are still sing Python 2, you must use unicode for all string types.** + You can use ``use_bin_type=False`` to encode into old msgpack format. + * ``encoding`` option is removed. UTF-8 is used always. + +* Unpacker + + * ``raw=False`` by default. It assumes str types are valid UTF-8 string + and decode them to Python str (unicode) object. + * ``encdoding`` option is rmeoved. You can use ``raw=True`` to support old format. + * Default value of ``max_buffer_size`` is changed from 0 to 100 MiB. + * Default value of ``strict_map_key`` is changed to True to avoid hashdos. + You need to pass ``strict_map_key=False`` if you have data which contain map keys + which type is not bytes or str. + + Install ------- @@ -270,27 +296,32 @@ To use the **ext** type, pass ``msgpack.ExtType`` object to packer. You can use it with ``default`` and ``ext_hook``. See below. -Note about performance ----------------------- +Security +^^^^^^^^ + +To unpacking data received from unreliable source, msgpack provides +two security options. + +``max_buffer_size`` (default: 100*1024*1024) limits the internal buffer size. +It is used to limit the preallocated list size too. -GC -^^ +``strict_map_key`` (default: ``True``) limits the type of map keys to bytes and str. +While msgpack spec doesn't limit the types of the map keys, +there is a risk of the hashdos. +If you need to support other types for map keys, use ``strict_map_key=False``. + + +Performance tips +^^^^^^^^^^^^^^^^ CPython's GC starts when growing allocated object. This means unpacking may cause useless GC. You can use ``gc.disable()`` when unpacking large message. -use_list option -^^^^^^^^^^^^^^^ - List is the default sequence type of Python. But tuple is lighter than list. You can use ``use_list=False`` while unpacking when performance is important. -Python's dict can't use list as key and MessagePack allows array for key of mapping. -``use_list=False`` allows unpacking such message. -Another way to unpacking such object is using ``object_pairs_hook``. - Development ----------- diff --git a/msgpack/fallback.py b/msgpack/fallback.py index 7df92f57..3704f9d5 100644 --- a/msgpack/fallback.py +++ b/msgpack/fallback.py @@ -747,7 +747,7 @@ class Packer(object): :param bool strict_types: If set to true, types will be checked to be exact. Derived classes - from serializeable types will not be serialized and will be + from serializable types will not be serialized and will be treated as unsupported type and forwarded to default. Additionally tuples will not be serialized as lists. This is useful when trying to implement accurate serialization @@ -1014,7 +1014,7 @@ def bytes(self): def reset(self): """Reset internal buffer. - This method is usaful only when autoreset=False. + This method is useful only when autoreset=False. """ self._buffer = StringIO() From c356035a576c38db5ca232ede07b291087f1b8b2 Mon Sep 17 00:00:00 2001 From: Inada Naoki Date: Mon, 9 Dec 2019 17:03:12 +0900 Subject: [PATCH 211/349] Unpacker: Change max_buffer_size to 100MiB (#391) --- msgpack/_unpacker.pyx | 34 ++++++++++++++++++---------------- msgpack/fallback.py | 33 ++++++++++++++++++--------------- 2 files changed, 36 insertions(+), 31 deletions(-) diff --git a/msgpack/_unpacker.pyx b/msgpack/_unpacker.pyx index 53ecf860..0ff633b9 100644 --- a/msgpack/_unpacker.pyx +++ b/msgpack/_unpacker.pyx @@ -234,27 +234,28 @@ cdef class Unpacker(object): (See also simplejson) :param int max_buffer_size: - Limits size of data waiting unpacked. 0 means system's INT_MAX (default). + Limits size of data waiting unpacked. 0 means system's INT_MAX. + The default value is 100*1024*1024 (100MiB). Raises `BufferFull` exception when it is insufficient. You should set this parameter when unpacking data from untrusted source. :param int max_str_len: Deprecated, use *max_buffer_size* instead. - Limits max length of str. (default: max_buffer_size or 1024*1024) + Limits max length of str. (default: max_buffer_size) :param int max_bin_len: Deprecated, use *max_buffer_size* instead. - Limits max length of bin. (default: max_buffer_size or 1024*1024) + Limits max length of bin. (default: max_buffer_size) :param int max_array_len: - Limits max length of array. (default: max_buffer_size or 128*1024) + Limits max length of array. (default: max_buffer_size) :param int max_map_len: - Limits max length of map. (default: max_buffer_size//2 or 32*1024) + Limits max length of map. (default: max_buffer_size//2) :param int max_ext_len: Deprecated, use *max_buffer_size* instead. - Limits max size of ext type. (default: max_buffer_size or 1024*1024) + Limits max size of ext type. (default: max_buffer_size) :param str unicode_errors: Error handler used for decoding str type. (default: `'strict'`) @@ -262,13 +263,13 @@ cdef class Unpacker(object): Example of streaming deserialize from file-like object:: - unpacker = Unpacker(file_like, max_buffer_size=10*1024*1024) + unpacker = Unpacker(file_like) for o in unpacker: process(o) Example of streaming deserialize from socket:: - unpacker = Unpacker(max_buffer_size=10*1024*1024) + unpacker = Unpacker() while True: buf = sock.recv(1024**2) if not buf: @@ -305,7 +306,7 @@ cdef class Unpacker(object): def __init__(self, file_like=None, *, Py_ssize_t read_size=0, bint use_list=True, bint raw=False, bint strict_map_key=True, object object_hook=None, object object_pairs_hook=None, object list_hook=None, - unicode_errors=None, Py_ssize_t max_buffer_size=0, + unicode_errors=None, Py_ssize_t max_buffer_size=100*1024*1024, object ext_hook=ExtType, Py_ssize_t max_str_len=-1, Py_ssize_t max_bin_len=-1, @@ -325,23 +326,24 @@ cdef class Unpacker(object): if not PyCallable_Check(self.file_like_read): raise TypeError("`file_like.read` must be a callable.") + if not max_buffer_size: + max_buffer_size = INT_MAX if max_str_len == -1: - max_str_len = max_buffer_size or 1024*1024 + max_str_len = max_buffer_size if max_bin_len == -1: - max_bin_len = max_buffer_size or 1024*1024 + max_bin_len = max_buffer_size if max_array_len == -1: - max_array_len = max_buffer_size or 128*1024 + max_array_len = max_buffer_size if max_map_len == -1: - max_map_len = max_buffer_size//2 or 32*1024 + max_map_len = max_buffer_size//2 if max_ext_len == -1: - max_ext_len = max_buffer_size or 1024*1024 + max_ext_len = max_buffer_size - if not max_buffer_size: - max_buffer_size = INT_MAX if read_size > max_buffer_size: raise ValueError("read_size should be less or equal to max_buffer_size") if not read_size: read_size = min(max_buffer_size, 1024**2) + self.max_buffer_size = max_buffer_size self.read_size = read_size self.buf = PyMem_Malloc(read_size) diff --git a/msgpack/fallback.py b/msgpack/fallback.py index 3704f9d5..f6ba4242 100644 --- a/msgpack/fallback.py +++ b/msgpack/fallback.py @@ -193,39 +193,40 @@ class Unpacker(object): contains invalid UTF-8 string. :param int max_buffer_size: - Limits size of data waiting unpacked. 0 means system's INT_MAX (default). + Limits size of data waiting unpacked. 0 means 2**32-1. + The default value is 100*1024*1024 (100MiB). Raises `BufferFull` exception when it is insufficient. You should set this parameter when unpacking data from untrusted source. :param int max_str_len: Deprecated, use *max_buffer_size* instead. - Limits max length of str. (default: max_buffer_size or 1024*1024) + Limits max length of str. (default: max_buffer_size) :param int max_bin_len: Deprecated, use *max_buffer_size* instead. - Limits max length of bin. (default: max_buffer_size or 1024*1024) + Limits max length of bin. (default: max_buffer_size) :param int max_array_len: Limits max length of array. - (default: max_buffer_size or 128*1024) + (default: max_buffer_size) :param int max_map_len: Limits max length of map. - (default: max_buffer_size//2 or 32*1024) + (default: max_buffer_size//2) :param int max_ext_len: Deprecated, use *max_buffer_size* instead. - Limits max size of ext type. (default: max_buffer_size or 1024*1024) + Limits max size of ext type. (default: max_buffer_size) Example of streaming deserialize from file-like object:: - unpacker = Unpacker(file_like, max_buffer_size=10*1024*1024) + unpacker = Unpacker(file_like) for o in unpacker: process(o) Example of streaming deserialize from socket:: - unpacker = Unpacker(max_buffer_size=10*1024*1024) + unpacker = Unpacker(max_buffer_size) while True: buf = sock.recv(1024**2) if not buf: @@ -252,7 +253,7 @@ def __init__( object_pairs_hook=None, list_hook=None, unicode_errors=None, - max_buffer_size=0, + max_buffer_size=100 * 1024 * 1024, ext_hook=ExtType, max_str_len=-1, max_bin_len=-1, @@ -285,18 +286,20 @@ def __init__( # state, which _buf_checkpoint records. self._buf_checkpoint = 0 + if not max_buffer_size: + max_buffer_size = 2 ** 31 - 1 if max_str_len == -1: - max_str_len = max_buffer_size or 1024 * 1024 + max_str_len = max_buffer_size if max_bin_len == -1: - max_bin_len = max_buffer_size or 1024 * 1024 + max_bin_len = max_buffer_size if max_array_len == -1: - max_array_len = max_buffer_size or 128 * 1024 + max_array_len = max_buffer_size if max_map_len == -1: - max_map_len = max_buffer_size // 2 or 32 * 1024 + max_map_len = max_buffer_size // 2 if max_ext_len == -1: - max_ext_len = max_buffer_size or 1024 * 1024 + max_ext_len = max_buffer_size - self._max_buffer_size = max_buffer_size or 2 ** 31 - 1 + self._max_buffer_size = max_buffer_size if read_size > self._max_buffer_size: raise ValueError("read_size must be smaller than max_buffer_size") self._read_size = read_size or min(self._max_buffer_size, 16 * 1024) From d10f12db8f328130a13df759bc9cb3fa064cc8b8 Mon Sep 17 00:00:00 2001 From: Inada Naoki Date: Mon, 9 Dec 2019 18:12:51 +0900 Subject: [PATCH 212/349] typo --- msgpack/_packer.pyx | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/msgpack/_packer.pyx b/msgpack/_packer.pyx index 8cf3c05e..2a768b06 100644 --- a/msgpack/_packer.pyx +++ b/msgpack/_packer.pyx @@ -341,7 +341,7 @@ cdef class Packer(object): def reset(self): """Reset internal buffer. - This method is usaful only when autoreset=False. + This method is useful only when autoreset=False. """ self.pk.length = 0 From 5fd611909319d03200774ea3c7a6ae16dbd26c12 Mon Sep 17 00:00:00 2001 From: Marty B Date: Mon, 9 Dec 2019 11:29:47 +0100 Subject: [PATCH 213/349] Simplify check for bool type (#362) --- msgpack/_packer.pyx | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/msgpack/_packer.pyx b/msgpack/_packer.pyx index 2a768b06..14264396 100644 --- a/msgpack/_packer.pyx +++ b/msgpack/_packer.pyx @@ -153,11 +153,10 @@ cdef class Packer(object): while True: if o is None: ret = msgpack_pack_nil(&self.pk) - elif PyBool_Check(o) if strict_types else isinstance(o, bool): - if o: - ret = msgpack_pack_true(&self.pk) - else: - ret = msgpack_pack_false(&self.pk) + elif o is True: + ret = msgpack_pack_true(&self.pk) + elif o is False: + ret = msgpack_pack_false(&self.pk) elif PyLong_CheckExact(o) if strict_types else PyLong_Check(o): # PyInt_Check(long) is True for Python 3. # So we should test long before int. From 2186455d1579affc33253484d9445f7bdf3f7c29 Mon Sep 17 00:00:00 2001 From: Inada Naoki Date: Wed, 11 Dec 2019 23:48:16 +0900 Subject: [PATCH 214/349] Support datetime. (#394) --- Makefile | 2 +- msgpack/_cmsgpack.pyx | 7 ++++ msgpack/_packer.pyx | 19 +++++++++-- msgpack/_unpacker.pyx | 23 ++++++++++---- msgpack/ext.py | 27 +++++++++++++++- msgpack/fallback.py | 48 +++++++++++++++++++++++++--- msgpack/unpack.h | 72 ++++++++++++++++++++++++++++++++++++++---- test/test_timestamp.py | 48 ++++++++++++++++++++++++++-- 8 files changed, 222 insertions(+), 24 deletions(-) diff --git a/Makefile b/Makefile index f8971cc3..e2f25cfd 100644 --- a/Makefile +++ b/Makefile @@ -4,7 +4,7 @@ all: cython .PHONY: black black: - black msgpack/ test/ + black msgpack/ test/ setup.py .PHONY: cython cython: diff --git a/msgpack/_cmsgpack.pyx b/msgpack/_cmsgpack.pyx index 8ebdbf58..1faaac3a 100644 --- a/msgpack/_cmsgpack.pyx +++ b/msgpack/_cmsgpack.pyx @@ -1,4 +1,11 @@ # coding: utf-8 #cython: embedsignature=True, c_string_encoding=ascii, language_level=3 +from cpython.datetime cimport import_datetime, datetime_new +import_datetime() + +import datetime +cdef object utc = datetime.timezone.utc +cdef object epoch = datetime_new(1970, 1, 1, 0, 0, 0, 0, tz=utc) + include "_packer.pyx" include "_unpacker.pyx" diff --git a/msgpack/_packer.pyx b/msgpack/_packer.pyx index 14264396..b4706463 100644 --- a/msgpack/_packer.pyx +++ b/msgpack/_packer.pyx @@ -2,6 +2,10 @@ from cpython cimport * from cpython.bytearray cimport PyByteArray_Check, PyByteArray_CheckExact +from cpython.datetime cimport ( + PyDateTime_CheckExact, PyDelta_CheckExact, + datetime_tzinfo, timedelta_days, timedelta_seconds, timedelta_microseconds, +) cdef ExtType cdef Timestamp @@ -99,8 +103,9 @@ cdef class Packer(object): cdef object _berrors cdef const char *unicode_errors cdef bint strict_types - cdef bool use_float + cdef bint use_float cdef bint autoreset + cdef bint datetime def __cinit__(self): cdef int buf_size = 1024*1024 @@ -110,12 +115,13 @@ cdef class Packer(object): self.pk.buf_size = buf_size self.pk.length = 0 - def __init__(self, *, default=None, unicode_errors=None, + def __init__(self, *, default=None, bint use_single_float=False, bint autoreset=True, bint use_bin_type=True, - bint strict_types=False): + bint strict_types=False, bint datetime=False, unicode_errors=None): self.use_float = use_single_float self.strict_types = strict_types self.autoreset = autoreset + self.datetime = datetime self.pk.use_bin_type = use_bin_type if default is not None: if not PyCallable_Check(default): @@ -262,6 +268,13 @@ cdef class Packer(object): if ret == 0: ret = msgpack_pack_raw_body(&self.pk, view.buf, L) PyBuffer_Release(&view); + elif self.datetime and PyDateTime_CheckExact(o) and datetime_tzinfo(o) is not None: + delta = o - epoch + if not PyDelta_CheckExact(delta): + raise ValueError("failed to calculate delta") + llval = timedelta_days(delta) * (24*60*60) + timedelta_seconds(delta) + ulval = timedelta_microseconds(delta) * 1000 + ret = msgpack_pack_timestamp(&self.pk, llval, ulval) elif not default_used and self._default: o = self._default(o) default_used = 1 diff --git a/msgpack/_unpacker.pyx b/msgpack/_unpacker.pyx index 0ff633b9..43c93a2c 100644 --- a/msgpack/_unpacker.pyx +++ b/msgpack/_unpacker.pyx @@ -1,7 +1,6 @@ # coding: utf-8 from cpython cimport * - cdef extern from "Python.h": ctypedef struct PyObject cdef int PyObject_AsReadBuffer(object o, const void** buff, Py_ssize_t* buf_len) except -1 @@ -21,6 +20,8 @@ from .exceptions import ( ) from .ext import ExtType, Timestamp +cdef object giga = 1_000_000_000 + cdef extern from "unpack.h": ctypedef struct msgpack_user: @@ -28,10 +29,13 @@ cdef extern from "unpack.h": bint raw bint has_pairs_hook # call object_hook with k-v pairs bint strict_map_key + int timestamp PyObject* object_hook PyObject* list_hook PyObject* ext_hook PyObject* timestamp_t + PyObject *giga; + PyObject *utc; char *unicode_errors Py_ssize_t max_str_len Py_ssize_t max_bin_len @@ -57,7 +61,8 @@ cdef extern from "unpack.h": cdef inline init_ctx(unpack_context *ctx, object object_hook, object object_pairs_hook, object list_hook, object ext_hook, - bint use_list, bint raw, bint strict_map_key, + bint use_list, bint raw, int timestamp, + bint strict_map_key, const char* unicode_errors, Py_ssize_t max_str_len, Py_ssize_t max_bin_len, Py_ssize_t max_array_len, Py_ssize_t max_map_len, @@ -99,8 +104,14 @@ cdef inline init_ctx(unpack_context *ctx, raise TypeError("ext_hook must be a callable.") ctx.user.ext_hook = ext_hook + if timestamp < 0 or 3 < timestamp: + raise ValueError("timestamp must be 0..3") + # Add Timestamp type to the user object so it may be used in unpack.h + ctx.user.timestamp = timestamp ctx.user.timestamp_t = Timestamp + ctx.user.giga = giga + ctx.user.utc = utc ctx.user.unicode_errors = unicode_errors def default_read_extended_type(typecode, data): @@ -131,7 +142,7 @@ cdef inline int get_data_from_buffer(object obj, def unpackb(object packed, *, object object_hook=None, object list_hook=None, - bint use_list=True, bint raw=False, bint strict_map_key=True, + bint use_list=True, bint raw=False, int timestamp=0, bint strict_map_key=True, unicode_errors=None, object_pairs_hook=None, ext_hook=ExtType, Py_ssize_t max_str_len=-1, @@ -179,7 +190,7 @@ def unpackb(object packed, *, object object_hook=None, object list_hook=None, try: init_ctx(&ctx, object_hook, object_pairs_hook, list_hook, ext_hook, - use_list, raw, strict_map_key, cerr, + use_list, raw, timestamp, strict_map_key, cerr, max_str_len, max_bin_len, max_array_len, max_map_len, max_ext_len) ret = unpack_construct(&ctx, buf, buf_len, &off) finally: @@ -304,7 +315,7 @@ cdef class Unpacker(object): self.buf = NULL def __init__(self, file_like=None, *, Py_ssize_t read_size=0, - bint use_list=True, bint raw=False, bint strict_map_key=True, + bint use_list=True, bint raw=False, int timestamp=0, bint strict_map_key=True, object object_hook=None, object object_pairs_hook=None, object list_hook=None, unicode_errors=None, Py_ssize_t max_buffer_size=100*1024*1024, object ext_hook=ExtType, @@ -359,7 +370,7 @@ cdef class Unpacker(object): cerr = unicode_errors init_ctx(&self.ctx, object_hook, object_pairs_hook, list_hook, - ext_hook, use_list, raw, strict_map_key, cerr, + ext_hook, use_list, raw, timestamp, strict_map_key, cerr, max_str_len, max_bin_len, max_array_len, max_map_len, max_ext_len) diff --git a/msgpack/ext.py b/msgpack/ext.py index c7efff61..09adb341 100644 --- a/msgpack/ext.py +++ b/msgpack/ext.py @@ -1,12 +1,18 @@ # coding: utf-8 from collections import namedtuple +import datetime import sys import struct PY2 = sys.version_info[0] == 2 + if not PY2: long = int + try: + _utc = datetime.timezone.utc + except AttributeError: + _utc = datetime.timezone(datetime.timedelta(0)) class ExtType(namedtuple("ExtType", "code data")): @@ -131,7 +137,7 @@ def to_bytes(self): data = struct.pack("!Iq", self.nanoseconds, self.seconds) return data - def to_float_s(self): + def to_float(self): """Get the timestamp as a floating-point value. :returns: posix timestamp @@ -139,6 +145,12 @@ def to_float_s(self): """ return self.seconds + self.nanoseconds / 1e9 + @staticmethod + def from_float(unix_float): + seconds = int(unix_float) + nanoseconds = int((unix_float % 1) * 1000000000) + return Timestamp(seconds, nanoseconds) + def to_unix_ns(self): """Get the timestamp as a unixtime in nanoseconds. @@ -146,3 +158,16 @@ def to_unix_ns(self): :rtype: int """ return int(self.seconds * 1e9 + self.nanoseconds) + + if not PY2: + + def to_datetime(self): + """Get the timestamp as a UTC datetime. + + :rtype: datetime. + """ + return datetime.datetime.fromtimestamp(self.to_float(), _utc) + + @staticmethod + def from_datetime(dt): + return Timestamp.from_float(dt.timestamp()) diff --git a/msgpack/fallback.py b/msgpack/fallback.py index f6ba4242..9ba98bfc 100644 --- a/msgpack/fallback.py +++ b/msgpack/fallback.py @@ -1,5 +1,6 @@ """Fallback pure Python implementation of msgpack""" +from datetime import datetime as _DateTime import sys import struct @@ -174,6 +175,14 @@ class Unpacker(object): If true, unpack msgpack raw to Python bytes. Otherwise, unpack to Python str by decoding with UTF-8 encoding (default). + :param int timestamp: + Control how timestamp type is unpacked: + + 0 - Tiemstamp + 1 - float (Seconds from the EPOCH) + 2 - int (Nanoseconds from the EPOCH) + 3 - datetime.datetime (UTC). Python 2 is not supported. + :param bool strict_map_key: If true (default), only str or bytes are accepted for map (dict) keys. @@ -248,6 +257,7 @@ def __init__( read_size=0, use_list=True, raw=False, + timestamp=0, strict_map_key=True, object_hook=None, object_pairs_hook=None, @@ -307,6 +317,9 @@ def __init__( self._strict_map_key = bool(strict_map_key) self._unicode_errors = unicode_errors self._use_list = use_list + if not (0 <= timestamp <= 3): + raise ValueError("timestamp must be 0..3") + self._timestamp = timestamp self._list_hook = list_hook self._object_hook = object_hook self._object_pairs_hook = object_pairs_hook @@ -672,10 +685,21 @@ def _unpack(self, execute=EX_CONSTRUCT): else: obj = obj.decode("utf_8", self._unicode_errors) return obj - if typ == TYPE_EXT: - return self._ext_hook(n, bytes(obj)) if typ == TYPE_BIN: return bytes(obj) + if typ == TYPE_EXT: + if n == -1: # timestamp + ts = Timestamp.from_bytes(bytes(obj)) + if self._timestamp == 1: + return ts.to_float() + elif self._timestamp == 2: + return ts.to_unix_ns() + elif self._timestamp == 3: + return ts.to_datetime() + else: + return ts + else: + return self._ext_hook(n, bytes(obj)) assert typ == TYPE_IMMEDIATE return obj @@ -756,6 +780,12 @@ class Packer(object): This is useful when trying to implement accurate serialization for python types. + :param bool datetime: + If set to true, datetime with tzinfo is packed into Timestamp type. + Note that the tzinfo is stripped in the timestamp. + You can get UTC datetime with `timestamp=3` option of the Unapcker. + (Python 2 is not supported). + :param str unicode_errors: The error handler for encoding unicode. (default: 'strict') DO NOT USE THIS!! This option is kept for very specific usage. @@ -764,18 +794,22 @@ class Packer(object): def __init__( self, default=None, - unicode_errors=None, use_single_float=False, autoreset=True, use_bin_type=True, strict_types=False, + datetime=False, + unicode_errors=None, ): self._strict_types = strict_types self._use_float = use_single_float self._autoreset = autoreset self._use_bin_type = use_bin_type - self._unicode_errors = unicode_errors or "strict" self._buffer = StringIO() + if PY2 and datetime: + raise ValueError("datetime is not supported in Python 2") + self._datetime = bool(datetime) + self._unicode_errors = unicode_errors or "strict" if default is not None: if not callable(default): raise TypeError("default must be callable") @@ -891,6 +925,12 @@ def _pack( return self._pack_map_pairs( len(obj), dict_iteritems(obj), nest_limit - 1 ) + + if self._datetime and check(obj, _DateTime): + obj = Timestamp.from_datetime(obj) + default_used = 1 + continue + if not default_used and self._default is not None: obj = self._default(obj) default_used = 1 diff --git a/msgpack/unpack.h b/msgpack/unpack.h index 4380ec55..debdf715 100644 --- a/msgpack/unpack.h +++ b/msgpack/unpack.h @@ -24,10 +24,13 @@ typedef struct unpack_user { bool raw; bool has_pairs_hook; bool strict_map_key; + int timestamp; PyObject *object_hook; PyObject *list_hook; PyObject *ext_hook; PyObject *timestamp_t; + PyObject *giga; + PyObject *utc; const char *unicode_errors; Py_ssize_t max_str_len, max_bin_len, max_array_len, max_map_len, max_ext_len; } unpack_user; @@ -268,7 +271,7 @@ typedef struct msgpack_timestamp { /* * Unpack ext buffer to a timestamp. Pulled from msgpack-c timestamp.h. */ -static inline int unpack_timestamp(const char* buf, unsigned int buflen, msgpack_timestamp* ts) { +static int unpack_timestamp(const char* buf, unsigned int buflen, msgpack_timestamp* ts) { switch (buflen) { case 4: ts->tv_nsec = 0; @@ -292,10 +295,11 @@ static inline int unpack_timestamp(const char* buf, unsigned int buflen, msgpack } } -static inline int unpack_callback_ext(unpack_user* u, const char* base, const char* pos, - unsigned int length, msgpack_unpack_object* o) +#include "datetime.h" + +static int unpack_callback_ext(unpack_user* u, const char* base, const char* pos, + unsigned int length, msgpack_unpack_object* o) { - PyObject *py; int8_t typecode = (int8_t)*pos++; if (!u->ext_hook) { PyErr_SetString(PyExc_AssertionError, "u->ext_hook cannot be NULL"); @@ -305,13 +309,67 @@ static inline int unpack_callback_ext(unpack_user* u, const char* base, const ch PyErr_Format(PyExc_ValueError, "%u exceeds max_ext_len(%zd)", length, u->max_ext_len); return -1; } + + PyObject *py = NULL; // length also includes the typecode, so the actual data is length-1 if (typecode == -1) { msgpack_timestamp ts; - if (unpack_timestamp(pos, length-1, &ts) == 0) { + if (unpack_timestamp(pos, length-1, &ts) < 0) { + return -1; + } + + if (u->timestamp == 2) { // int + PyObject *a = PyLong_FromLongLong(ts.tv_sec); + if (a == NULL) return -1; + + PyObject *c = PyNumber_Multiply(a, u->giga); + Py_DECREF(a); + if (c == NULL) { + return -1; + } + + PyObject *b = PyLong_FromUnsignedLong(ts.tv_nsec); + if (b == NULL) { + Py_DECREF(c); + return -1; + } + + py = PyNumber_Add(c, b); + Py_DECREF(c); + Py_DECREF(b); + } + else if (u->timestamp == 0) { // Timestamp py = PyObject_CallFunction(u->timestamp_t, "(Lk)", ts.tv_sec, ts.tv_nsec); - } else { - py = NULL; + } + else { // float or datetime + PyObject *a = PyFloat_FromDouble((double)ts.tv_nsec); + if (a == NULL) return -1; + + PyObject *b = PyNumber_TrueDivide(a, u->giga); + Py_DECREF(a); + if (b == NULL) return -1; + + PyObject *c = PyLong_FromLongLong(ts.tv_sec); + if (c == NULL) { + Py_DECREF(b); + return -1; + } + + a = PyNumber_Add(b, c); + Py_DECREF(b); + Py_DECREF(c); + + if (u->timestamp == 3) { // datetime + PyObject *t = PyTuple_Pack(2, a, u->utc); + Py_DECREF(a); + if (t == NULL) { + return -1; + } + py = PyDateTime_FromTimestamp(t); + Py_DECREF(t); + } else { // float + py = a; + } } } else { py = PyObject_CallFunction(u->ext_hook, "(iy#)", (int)typecode, pos, (Py_ssize_t)length-1); diff --git a/test/test_timestamp.py b/test/test_timestamp.py index 1348e694..822994c8 100644 --- a/test/test_timestamp.py +++ b/test/test_timestamp.py @@ -1,5 +1,11 @@ +import pytest +import sys +import datetime import msgpack -from msgpack import Timestamp +from msgpack.ext import Timestamp + +if sys.version_info[0] > 2: + from msgpack.ext import _utc def test_timestamp(): @@ -42,5 +48,43 @@ def test_timestamp(): def test_timestamp_to(): t = Timestamp(42, 14) - assert t.to_float_s() == 42.000000014 + assert t.to_float() == 42.000000014 assert t.to_unix_ns() == 42000000014 + + +@pytest.mark.skipif(sys.version_info[0] == 2, reason="datetime support is PY3+ only") +def test_timestamp_datetime(): + t = Timestamp(42, 14) + assert t.to_datetime() == datetime.datetime(1970, 1, 1, 0, 0, 42, 0, tzinfo=_utc) + + +@pytest.mark.skipif(sys.version_info[0] == 2, reason="datetime support is PY3+ only") +def test_unpack_datetime(): + t = Timestamp(42, 14) + packed = msgpack.packb(t) + unpacked = msgpack.unpackb(packed, timestamp=3) + assert unpacked == datetime.datetime(1970, 1, 1, 0, 0, 42, 0, tzinfo=_utc) + + +@pytest.mark.skipif(sys.version_info[0] == 2, reason="datetime support is PY3+ only") +def test_pack_datetime(): + t = Timestamp(42, 14000) + dt = t.to_datetime() + assert dt == datetime.datetime(1970, 1, 1, 0, 0, 42, 14, tzinfo=_utc) + + packed = msgpack.packb(dt, datetime=True) + packed2 = msgpack.packb(t) + assert packed == packed2 + + unpacked = msgpack.unpackb(packed) + print(packed, unpacked) + assert unpacked == t + + unpacked = msgpack.unpackb(packed, timestamp=3) + assert unpacked == dt + + x = [] + packed = msgpack.packb(dt, datetime=False, default=x.append) + assert x + assert x[0] == dt + assert msgpack.unpackb(packed) is None From c60e6c7a6ff1815083bf6803ec70f3ac34aaf3bb Mon Sep 17 00:00:00 2001 From: Inada Naoki Date: Thu, 12 Dec 2019 18:09:07 +0900 Subject: [PATCH 215/349] Update README --- ChangeLog.rst | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/ChangeLog.rst b/ChangeLog.rst index d44b36a9..c70b966c 100644 --- a/ChangeLog.rst +++ b/ChangeLog.rst @@ -6,6 +6,14 @@ Release Date: TBD * Remove Python 2 support from the ``msgpack/_cmsgpack``. ``msgpack/fallback`` still supports Python 2. * Remove ``encoding`` option from the Packer and Unpacker. +* Unpacker: The default value of ``max_buffer_type`` is changed to 100MiB. +* Unpacker: ``strict_map_key`` is True by default now. +* Unpacker: String map keys are interned. +* Drop old buffer protocol support. +* Support Timestamp type. +* Support serializing and decerializing ``datetime`` object + with tzinfo. +* Unpacker: ``Fix Unpacker.read_bytes()`` in fallback implementation. (#352) 0.6.2 From 3df431cafd82354e61b39afd6094003e9c313c43 Mon Sep 17 00:00:00 2001 From: Inada Naoki Date: Thu, 12 Dec 2019 18:25:38 +0900 Subject: [PATCH 216/349] Prepare 1.0rc1 --- MANIFEST.in | 2 +- README.rst => README.md | 125 ++++++++++++++-------------------------- msgpack/_version.py | 2 +- setup.py | 6 +- 4 files changed, 49 insertions(+), 86 deletions(-) rename README.rst => README.md (83%) diff --git a/MANIFEST.in b/MANIFEST.in index e1912cac..57d84a4c 100644 --- a/MANIFEST.in +++ b/MANIFEST.in @@ -1,5 +1,5 @@ include setup.py include COPYING -include README.rst +include README.md recursive-include msgpack *.h *.c *.pyx *.cpp recursive-include test *.py diff --git a/README.rst b/README.md similarity index 83% rename from README.rst rename to README.md index d01e963e..897a9324 100644 --- a/README.rst +++ b/README.md @@ -1,18 +1,9 @@ -====================== -MessagePack for Python -====================== +# MessagePack for Python -.. image:: https://travis-ci.org/msgpack/msgpack-python.svg?branch=master - :target: https://travis-ci.org/msgpack/msgpack-python - :alt: Build Status +[![Build Status](https://travis-ci.org/msgpack/msgpack-python.svg?branch=master)](https://travis-ci.org/msgpack/msgpack-python) +[![Documentation Status](https://readthedocs.org/projects/msgpack-python/badge/?version=latest)](https://msgpack-python.readthedocs.io/en/latest/?badge=latest) -.. image:: https://readthedocs.org/projects/msgpack-python/badge/?version=latest - :target: https://msgpack-python.readthedocs.io/en/latest/?badge=latest - :alt: Documentation Status - - -What's this ------------ +## What's this `MessagePack `_ is an efficient binary serialization format. It lets you exchange data among multiple languages like JSON. @@ -20,11 +11,9 @@ But it's faster and smaller. This package provides CPython bindings for reading and writing MessagePack data. -Very important notes for existing users ---------------------------------------- +## Very important notes for existing users -PyPI package name -^^^^^^^^^^^^^^^^^ +### PyPI package name TL;DR: When upgrading from msgpack-0.4 or earlier, don't do `pip install -U msgpack-python`. Do `pip uninstall msgpack-python; pip install msgpack` instead. @@ -37,8 +26,7 @@ Sadly, this doesn't work for upgrade install. After `pip install -U msgpack-pyt msgpack is removed, and `import msgpack` fail. -Compatibility with the old format -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +### Compatibility with the old format You can use ``use_bin_type=False`` option to pack ``bytes`` object into raw type in the old msgpack spec, instead of bin type in new msgpack spec. @@ -49,8 +37,7 @@ It unpacks str (raw) type in msgpack into Python bytes. See note below for detail. -Major breaking changes in msgpack 1.0 -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +### Major breaking changes in msgpack 1.0 * Python 2 @@ -75,16 +62,13 @@ Major breaking changes in msgpack 1.0 which type is not bytes or str. -Install -------- +## Install -:: $ pip install msgpack -Pure Python implementation -^^^^^^^^^^^^^^^^^^^^^^^^^^ +### Pure Python implementation The extension module in msgpack (``msgpack._cmsgpack``) does not support Python 2 and PyPy. @@ -96,26 +80,20 @@ Since the [pip](https://pip.pypa.io/) uses the pure Python implementation, Python 2 support will not be dropped in the foreseeable future. -Windows -^^^^^^^ +### Windows When you can't use a binary distribution, you need to install Visual Studio or Windows SDK on Windows. Without extension, using pure Python implementation on CPython runs slowly. -How to use ----------- +## How to use -.. note:: +NOTE: In examples below, I use ``raw=False`` and ``use_bin_type=True`` for users +using msgpack < 1.0. These options are default from msgpack 1.0 so you can omit them. - In examples below, I use ``raw=False`` and ``use_bin_type=True`` for users - using msgpack < 1.0. - These options are default from msgpack 1.0 so you can omit them. - -One-shot pack & unpack -^^^^^^^^^^^^^^^^^^^^^^ +### One-shot pack & unpack Use ``packb`` for packing and ``unpackb`` for unpacking. msgpack provides ``dumps`` and ``loads`` as an alias for compatibility with @@ -124,20 +102,20 @@ msgpack provides ``dumps`` and ``loads`` as an alias for compatibility with ``pack`` and ``dump`` packs to a file-like object. ``unpack`` and ``load`` unpacks from a file-like object. -.. code-block:: pycon - +```pycon >>> import msgpack >>> msgpack.packb([1, 2, 3], use_bin_type=True) '\x93\x01\x02\x03' >>> msgpack.unpackb(_, raw=False) [1, 2, 3] +``` ``unpack`` unpacks msgpack's array to Python's list, but can also unpack to tuple: -.. code-block:: pycon - +```pycon >>> msgpack.unpackb(b'\x93\x01\x02\x03', use_list=False, raw=False) (1, 2, 3) +``` You should always specify the ``use_list`` keyword argument for backward compatibility. See performance issues relating to `use_list option`_ below. @@ -145,14 +123,12 @@ See performance issues relating to `use_list option`_ below. Read the docstring for other options. -Streaming unpacking -^^^^^^^^^^^^^^^^^^^ +### Streaming unpacking ``Unpacker`` is a "streaming unpacker". It unpacks multiple objects from one stream (or from bytes provided through its ``feed`` method). -.. code-block:: python - +```py import msgpack from io import BytesIO @@ -165,16 +141,15 @@ stream (or from bytes provided through its ``feed`` method). unpacker = msgpack.Unpacker(buf, raw=False) for unpacked in unpacker: print(unpacked) +``` -Packing/unpacking of custom data type -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +### Packing/unpacking of custom data type It is also possible to pack/unpack custom data types. Here is an example for ``datetime.datetime``. -.. code-block:: python - +```py import datetime import msgpack @@ -196,19 +171,18 @@ It is also possible to pack/unpack custom data types. Here is an example for packed_dict = msgpack.packb(useful_dict, default=encode_datetime, use_bin_type=True) this_dict_again = msgpack.unpackb(packed_dict, object_hook=decode_datetime, raw=False) +``` ``Unpacker``'s ``object_hook`` callback receives a dict; the ``object_pairs_hook`` callback may instead be used to receive a list of key-value pairs. -Extended types -^^^^^^^^^^^^^^ +### Extended types It is also possible to pack/unpack custom data types using the **ext** type. -.. code-block:: pycon - +```pycon >>> import msgpack >>> import array >>> def default(obj): @@ -228,10 +202,10 @@ It is also possible to pack/unpack custom data types using the **ext** type. >>> unpacked = msgpack.unpackb(packed, ext_hook=ext_hook, raw=False) >>> data == unpacked True +``` -Advanced unpacking control -^^^^^^^^^^^^^^^^^^^^^^^^^^ +### Advanced unpacking control As an alternative to iteration, ``Unpacker`` objects provide ``unpack``, ``skip``, ``read_array_header`` and ``read_map_header`` methods. The former two @@ -243,8 +217,7 @@ in a map, can be unpacked or skipped individually. Each of these methods may optionally write the packed data it reads to a callback function: -.. code-block:: python - +```py from io import BytesIO def distribute(unpacker, get_worker): @@ -258,13 +231,11 @@ callback function: bytestream = BytesIO() unpacker.skip(bytestream.write) worker.send(bytestream.getvalue()) +``` +## Notes -Notes ------ - -string and binary type -^^^^^^^^^^^^^^^^^^^^^^ +### string and binary type Early versions of msgpack didn't distinguish string and binary types. The type for representing both string and binary types was named **raw**. @@ -272,32 +243,29 @@ The type for representing both string and binary types was named **raw**. You can pack into and unpack from this old spec using ``use_bin_type=False`` and ``raw=True`` options. -.. code-block:: pycon - +```pycon >>> import msgpack >>> msgpack.unpackb(msgpack.packb([b'spam', u'eggs'], use_bin_type=False), raw=True) [b'spam', b'eggs'] >>> msgpack.unpackb(msgpack.packb([b'spam', u'eggs'], use_bin_type=True), raw=False) [b'spam', 'eggs'] +``` - -ext type -^^^^^^^^ +### ext type To use the **ext** type, pass ``msgpack.ExtType`` object to packer. -.. code-block:: pycon - +```pycon >>> import msgpack >>> packed = msgpack.packb(msgpack.ExtType(42, b'xyzzy')) >>> msgpack.unpackb(packed) ExtType(code=42, data='xyzzy') +``` You can use it with ``default`` and ``ext_hook``. See below. -Security -^^^^^^^^ +### Security To unpacking data received from unreliable source, msgpack provides two security options. @@ -311,8 +279,7 @@ there is a risk of the hashdos. If you need to support other types for map keys, use ``strict_map_key=False``. -Performance tips -^^^^^^^^^^^^^^^^ +### Performance tips CPython's GC starts when growing allocated object. This means unpacking may cause useless GC. @@ -323,17 +290,13 @@ But tuple is lighter than list. You can use ``use_list=False`` while unpacking when performance is important. -Development ------------ +## Development -Test -^^^^ +### Test MessagePack uses `pytest` for testing. Run test with following command: +``` $ make test - - -.. - vim: filetype=rst +``` diff --git a/msgpack/_version.py b/msgpack/_version.py index 1e73a00f..5762e8cd 100644 --- a/msgpack/_version.py +++ b/msgpack/_version.py @@ -1 +1 @@ -version = (0, 6, 2) +version = (1, 0, 0, 'rc1') diff --git a/setup.py b/setup.py index a8c2306d..ac0dc30c 100755 --- a/setup.py +++ b/setup.py @@ -106,7 +106,7 @@ def __init__(self, *args, **kwargs): desc = "MessagePack (de)serializer." -with io.open("README.rst", encoding="utf-8") as f: +with io.open("README.md", encoding="utf-8") as f: long_desc = f.read() del f @@ -118,7 +118,7 @@ def __init__(self, *args, **kwargs): setup( name=name, - author="INADA Naoki", + author="Inada Naoki", author_email="songofacandy@gmail.com", version=version_str, cmdclass={"build_ext": BuildExt, "sdist": Sdist}, @@ -126,7 +126,7 @@ def __init__(self, *args, **kwargs): packages=["msgpack"], description=desc, long_description=long_desc, - long_description_content_type="text/x-rst", + long_description_content_type="text/markdown", url="https://msgpack.org/", project_urls={ "Documentation": "https://msgpack-python.readthedocs.io/", From a05fc5e7c543d4d925802b6bba1a5542ee8ee3c3 Mon Sep 17 00:00:00 2001 From: Inada Naoki Date: Thu, 12 Dec 2019 18:46:55 +0900 Subject: [PATCH 217/349] black --- msgpack/_version.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/msgpack/_version.py b/msgpack/_version.py index 5762e8cd..56f67e4e 100644 --- a/msgpack/_version.py +++ b/msgpack/_version.py @@ -1 +1 @@ -version = (1, 0, 0, 'rc1') +version = (1, 0, 0, "rc1") From aab29ff277cf88ff85e7ea5e603607a24d8c38a4 Mon Sep 17 00:00:00 2001 From: Inada Naoki Date: Thu, 12 Dec 2019 18:48:16 +0900 Subject: [PATCH 218/349] Remove TRANSITIONAL package support --- setup.py | 12 +----------- 1 file changed, 1 insertion(+), 11 deletions(-) diff --git a/setup.py b/setup.py index ac0dc30c..2ec9ca70 100755 --- a/setup.py +++ b/setup.py @@ -14,10 +14,6 @@ PY2 = sys.version_info[0] == 2 -# for building transitional package. -TRANSITIONAL = False - - class NoCython(Exception): pass @@ -110,14 +106,8 @@ def __init__(self, *args, **kwargs): long_desc = f.read() del f -name = "msgpack" - -if TRANSITIONAL: - name = "msgpack-python" - long_desc = "This package is deprecated. Install msgpack instead." - setup( - name=name, + name="msgpack", author="Inada Naoki", author_email="songofacandy@gmail.com", version=version_str, From 887d3a7d22865d36d68fdcb5e653ea61d66f0b61 Mon Sep 17 00:00:00 2001 From: Inada Naoki Date: Thu, 12 Dec 2019 19:43:59 +0900 Subject: [PATCH 219/349] Refine Timestamp APIs (#395) --- msgpack/ext.py | 99 ++++++++++++++++++++++++------------------ msgpack/fallback.py | 4 +- test/test_timestamp.py | 16 ++++--- 3 files changed, 69 insertions(+), 50 deletions(-) diff --git a/msgpack/ext.py b/msgpack/ext.py index 09adb341..00b759da 100644 --- a/msgpack/ext.py +++ b/msgpack/ext.py @@ -7,8 +7,11 @@ PY2 = sys.version_info[0] == 2 -if not PY2: - long = int +if PY2: + int_types = (int, long) + _utc = None +else: + int_types = int try: _utc = datetime.timezone.utc except AttributeError: @@ -23,8 +26,6 @@ def __new__(cls, code, data): raise TypeError("code must be int") if not isinstance(data, bytes): raise TypeError("data must be bytes") - if code == -1: - return Timestamp.from_bytes(data) if not 0 <= code <= 127: raise ValueError("code must be 0~127") return super(ExtType, cls).__new__(cls, code, data) @@ -42,34 +43,26 @@ class Timestamp(object): def __init__(self, seconds, nanoseconds=0): """Initialize a Timestamp object. - :param seconds: Number of seconds since the UNIX epoch (00:00:00 UTC Jan 1 1970, minus leap seconds). May be - negative. If :code:`seconds` includes a fractional part, :code:`nanoseconds` must be 0. - :type seconds: int or float + :param int seconds: + Number of seconds since the UNIX epoch (00:00:00 UTC Jan 1 1970, minus leap seconds). + May be negative. - :param nanoseconds: Number of nanoseconds to add to `seconds` to get fractional time. Maximum is 999_999_999. - Default is 0. - :type nanoseconds: int + :param int nanoseconds: + Number of nanoseconds to add to `seconds` to get fractional time. + Maximum is 999_999_999. Default is 0. Note: Negative times (before the UNIX epoch) are represented as negative seconds + positive ns. """ - if not isinstance(seconds, (int, long, float)): - raise TypeError("seconds must be numeric") - if not isinstance(nanoseconds, (int, long)): + if not isinstance(seconds, int_types): + raise TypeError("seconds must be an interger") + if not isinstance(nanoseconds, int_types): raise TypeError("nanoseconds must be an integer") - if nanoseconds: - if nanoseconds < 0 or nanoseconds % 1 != 0 or nanoseconds > (1e9 - 1): - raise ValueError( - "nanoseconds must be a non-negative integer less than 999999999." - ) - if not isinstance(seconds, (int, long)): - raise ValueError( - "seconds must be an integer if also providing nanoseconds." - ) - self.nanoseconds = nanoseconds - else: - # round helps with floating point issues - self.nanoseconds = int(round(seconds % 1 * 1e9, 0)) - self.seconds = int(seconds // 1) + if not (0 <= nanoseconds < 10 ** 9): + raise ValueError( + "nanoseconds must be a non-negative integer less than 999999999." + ) + self.seconds = seconds + self.nanoseconds = nanoseconds def __repr__(self): """String representation of Timestamp.""" @@ -137,7 +130,18 @@ def to_bytes(self): data = struct.pack("!Iq", self.nanoseconds, self.seconds) return data - def to_float(self): + @staticmethod + def from_unix(unix_sec): + """Create a Timestamp from posix timestamp in seconds. + + :param unix_float: Posix timestamp in seconds. + :type unix_float: int or float. + """ + seconds = int(unix_sec // 1) + nanoseconds = int((unix_sec % 1) * 10 ** 9) + return Timestamp(seconds, nanoseconds) + + def to_unix(self): """Get the timestamp as a floating-point value. :returns: posix timestamp @@ -146,28 +150,37 @@ def to_float(self): return self.seconds + self.nanoseconds / 1e9 @staticmethod - def from_float(unix_float): - seconds = int(unix_float) - nanoseconds = int((unix_float % 1) * 1000000000) - return Timestamp(seconds, nanoseconds) + def from_unix_nano(unix_ns): + """Create a Timestamp from posix timestamp in nanoseconds. - def to_unix_ns(self): + :param int unix_ns: Posix timestamp in nanoseconds. + :rtype: Timestamp + """ + return Timestamp(*divmod(unix_ns, 10 ** 9)) + + def to_unix_nano(self): """Get the timestamp as a unixtime in nanoseconds. :returns: posix timestamp in nanoseconds :rtype: int """ - return int(self.seconds * 1e9 + self.nanoseconds) + return self.seconds * 10 ** 9 + self.nanoseconds - if not PY2: + def to_datetime(self): + """Get the timestamp as a UTC datetime. - def to_datetime(self): - """Get the timestamp as a UTC datetime. + Python 2 is not supported. - :rtype: datetime. - """ - return datetime.datetime.fromtimestamp(self.to_float(), _utc) + :rtype: datetime. + """ + return datetime.datetime.fromtimestamp(self.to_unix(), _utc) - @staticmethod - def from_datetime(dt): - return Timestamp.from_float(dt.timestamp()) + @staticmethod + def from_datetime(dt): + """Create a Timestamp from datetime with tzinfo. + + Python 2 is not supported. + + :rtype: Timestamp + """ + return Timestamp.from_unix(dt.timestamp()) diff --git a/msgpack/fallback.py b/msgpack/fallback.py index 9ba98bfc..08e8d467 100644 --- a/msgpack/fallback.py +++ b/msgpack/fallback.py @@ -691,9 +691,9 @@ def _unpack(self, execute=EX_CONSTRUCT): if n == -1: # timestamp ts = Timestamp.from_bytes(bytes(obj)) if self._timestamp == 1: - return ts.to_float() + return ts.to_unix() elif self._timestamp == 2: - return ts.to_unix_ns() + return ts.to_unix_nano() elif self._timestamp == 3: return ts.to_datetime() else: diff --git a/test/test_timestamp.py b/test/test_timestamp.py index 822994c8..ba5611ca 100644 --- a/test/test_timestamp.py +++ b/test/test_timestamp.py @@ -37,19 +37,25 @@ def test_timestamp(): assert ts.seconds == 2 ** 63 - 1 and ts.nanoseconds == 999999999 # negative fractional - ts = Timestamp(-2.3) # s: -3, ns: 700000000 + ts = Timestamp.from_unix(-2.3) # s: -3, ns: 700000000 + assert ts.seconds == -3 and ts.nanoseconds == 700000000 assert ts.to_bytes() == b"\x29\xb9\x27\x00\xff\xff\xff\xff\xff\xff\xff\xfd" packed = msgpack.packb(ts) assert packed == b"\xc7\x0c\xff" + ts.to_bytes() unpacked = msgpack.unpackb(packed) assert ts == unpacked - assert ts.seconds == -3 and ts.nanoseconds == 700000000 + + +def test_timestamp_from(): + t = Timestamp(42, 14000) + assert Timestamp.from_unix(42.000014) == t + assert Timestamp.from_unix_nano(42000014000) == t def test_timestamp_to(): - t = Timestamp(42, 14) - assert t.to_float() == 42.000000014 - assert t.to_unix_ns() == 42000000014 + t = Timestamp(42, 14000) + assert t.to_unix() == 42.000014 + assert t.to_unix_nano() == 42000014000 @pytest.mark.skipif(sys.version_info[0] == 2, reason="datetime support is PY3+ only") From 9e5ec95e0292dce8485575310f6b69a618fdbefe Mon Sep 17 00:00:00 2001 From: Inada Naoki Date: Thu, 12 Dec 2019 19:59:06 +0900 Subject: [PATCH 220/349] Make Timestamp hashable (#396) When overriding __eq__, __hash__ should be overridden too. --- msgpack/ext.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/msgpack/ext.py b/msgpack/ext.py index 00b759da..cc34fb2e 100644 --- a/msgpack/ext.py +++ b/msgpack/ext.py @@ -36,6 +36,8 @@ class Timestamp(object): When built with Cython, msgpack uses C methods to pack and unpack `Timestamp`. When using pure-Python msgpack, :func:`to_bytes` and :func:`from_bytes` are used to pack and unpack `Timestamp`. + + This class is immutable: Do not override seconds and nanoseconds. """ __slots__ = ["seconds", "nanoseconds"] @@ -78,9 +80,8 @@ def __eq__(self, other): ) return False - def __ne__(self, other): - """not-equals method (see :func:`__eq__()`)""" - return not self.__eq__(other) + def __hash__(self): + return hash((self.seconds, self.nanoseconds)) @staticmethod def from_bytes(b): From 5e1fe818e3839c85a38419859bcec6d38979c620 Mon Sep 17 00:00:00 2001 From: Inada Naoki Date: Thu, 12 Dec 2019 20:05:25 +0900 Subject: [PATCH 221/349] Reintroduce __ne__ --- msgpack/ext.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/msgpack/ext.py b/msgpack/ext.py index cc34fb2e..8341c68b 100644 --- a/msgpack/ext.py +++ b/msgpack/ext.py @@ -80,6 +80,10 @@ def __eq__(self, other): ) return False + def __ne__(self, other): + """not-equals method (see :func:`__eq__()`)""" + return not self.__eq__(other) + def __hash__(self): return hash((self.seconds, self.nanoseconds)) From 42f5ecfd514cc5797385df0c72258a16fe645c72 Mon Sep 17 00:00:00 2001 From: Inada Naoki Date: Fri, 13 Dec 2019 15:10:32 +0900 Subject: [PATCH 222/349] Fix some typo --- msgpack/fallback.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/msgpack/fallback.py b/msgpack/fallback.py index 08e8d467..263e74ef 100644 --- a/msgpack/fallback.py +++ b/msgpack/fallback.py @@ -41,7 +41,7 @@ def _is_recursionerror(e): if hasattr(sys, "pypy_version_info"): - # cStringIO is slow on PyPy, StringIO is faster. However: PyPy's own + # StringIO is slow on PyPy, StringIO is faster. However: PyPy's own # StringBuilder is fastest. from __pypy__ import newlist_hint @@ -147,7 +147,7 @@ def unpackb(packed, **kwargs): if sys.version_info < (2, 7, 6): def _unpack_from(f, b, o=0): - """Explicit typcast for legacy struct.unpack_from""" + """Explicit type cast for legacy struct.unpack_from""" return struct.unpack_from(f, bytes(b), o) @@ -178,7 +178,7 @@ class Unpacker(object): :param int timestamp: Control how timestamp type is unpacked: - 0 - Tiemstamp + 0 - Timestamp 1 - float (Seconds from the EPOCH) 2 - int (Nanoseconds from the EPOCH) 3 - datetime.datetime (UTC). Python 2 is not supported. @@ -749,7 +749,7 @@ class Packer(object): """ MessagePack Packer - usage: + Usage: packer = Packer() astream.write(packer.pack(a)) @@ -783,7 +783,7 @@ class Packer(object): :param bool datetime: If set to true, datetime with tzinfo is packed into Timestamp type. Note that the tzinfo is stripped in the timestamp. - You can get UTC datetime with `timestamp=3` option of the Unapcker. + You can get UTC datetime with `timestamp=3` option of the Unpacker. (Python 2 is not supported). :param str unicode_errors: From ebfe55e63738fa41e55ac986ccae5e5f9bc3afbd Mon Sep 17 00:00:00 2001 From: Inada Naoki Date: Mon, 16 Dec 2019 15:14:34 +0900 Subject: [PATCH 223/349] travis: Use build config validation. --- .travis.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.travis.yml b/.travis.yml index 39db1d74..7af7beb5 100644 --- a/.travis.yml +++ b/.travis.yml @@ -1,3 +1,4 @@ +version: ~> 1.0 dist: xenial language: python cache: pip From 030bb2f1f7ebd12faed937d8ef9846601361fbce Mon Sep 17 00:00:00 2001 From: ossdev07 <39188636+ossdev07@users.noreply.github.com> Date: Tue, 31 Dec 2019 06:42:21 +0530 Subject: [PATCH 224/349] travis: Add test for arm64 (#399) Signed-off-by: ossdev07 --- .travis.yml | 19 ++++++++++++++++++- 1 file changed, 18 insertions(+), 1 deletion(-) diff --git a/.travis.yml b/.travis.yml index 7af7beb5..852674de 100644 --- a/.travis.yml +++ b/.travis.yml @@ -2,7 +2,9 @@ version: ~> 1.0 dist: xenial language: python cache: pip - +arch: + - amd64 + - arm64 python: # Available Python (PyPy) can be listed by: # @@ -38,6 +40,21 @@ matrix: script: - docker run --rm -v `pwd`:/io -w /io $DOCKER_IMAGE /io/docker/runtests.sh + - arch: arm64 + name: arm64 32bit build + language: python + services: + - docker + env: + - DOCKER_IMAGE=quay.io/pypa/manylinux2014_aarch64 + install: + - pip install -U pip + - pip install -r requirements.txt + - make cython + - docker pull $DOCKER_IMAGE + script: + - docker run --rm -v `pwd`:/io -w /io $DOCKER_IMAGE /io/docker/runtests.sh + - name: "Python 2 (fallback)" python: "2.7" <<: *pure From 1bd6fc36d09cea273a5b47d4f54ed5e3b718582c Mon Sep 17 00:00:00 2001 From: Emilio Tagua <53828441+eb-emilio@users.noreply.github.com> Date: Wed, 5 Feb 2020 09:20:17 -0300 Subject: [PATCH 225/349] Update README.md (#402) --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 897a9324..48401bed 100644 --- a/README.md +++ b/README.md @@ -55,7 +55,7 @@ See note below for detail. * ``raw=False`` by default. It assumes str types are valid UTF-8 string and decode them to Python str (unicode) object. - * ``encdoding`` option is rmeoved. You can use ``raw=True`` to support old format. + * ``encoding`` option is removed. You can use ``raw=True`` to support old format. * Default value of ``max_buffer_size`` is changed from 0 to 100 MiB. * Default value of ``strict_map_key`` is changed to True to avoid hashdos. You need to pass ``strict_map_key=False`` if you have data which contain map keys From 24950990f4ebeffbf98acd188b171cc60a27095e Mon Sep 17 00:00:00 2001 From: Inada Naoki Date: Thu, 6 Feb 2020 20:29:33 +0900 Subject: [PATCH 226/349] Remove broken example --- README.md | 18 ------------------ 1 file changed, 18 deletions(-) diff --git a/README.md b/README.md index 48401bed..a4d69c14 100644 --- a/README.md +++ b/README.md @@ -214,24 +214,6 @@ the result, or ignoring it. The latter two methods return the number of elements in the upcoming container, so that each element in an array, or key-value pair in a map, can be unpacked or skipped individually. -Each of these methods may optionally write the packed data it reads to a -callback function: - -```py - from io import BytesIO - - def distribute(unpacker, get_worker): - nelems = unpacker.read_map_header() - for i in range(nelems): - # Select a worker for the given key - key = unpacker.unpack() - worker = get_worker(key) - - # Send the value as a packed message to worker - bytestream = BytesIO() - unpacker.skip(bytestream.write) - worker.send(bytestream.getvalue()) -``` ## Notes From 0dad82116912878dfb172de3f5affe128c7475ce Mon Sep 17 00:00:00 2001 From: Inada Naoki Date: Thu, 6 Feb 2020 20:35:41 +0900 Subject: [PATCH 227/349] Fix markdown --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index a4d69c14..46af8106 100644 --- a/README.md +++ b/README.md @@ -5,7 +5,7 @@ ## What's this -`MessagePack `_ is an efficient binary serialization format. +[MessagePack](https://msgpack.org/) is an efficient binary serialization format. It lets you exchange data among multiple languages like JSON. But it's faster and smaller. This package provides CPython bindings for reading and writing MessagePack data. From ff1f5f89d997b40e60472c2820ea55cab752c779 Mon Sep 17 00:00:00 2001 From: Inada Naoki Date: Thu, 6 Feb 2020 21:06:04 +0900 Subject: [PATCH 228/349] README: `` -> ` --- README.md | 74 +++++++++++++++++++++++++++---------------------------- 1 file changed, 37 insertions(+), 37 deletions(-) diff --git a/README.md b/README.md index 46af8106..03552866 100644 --- a/README.md +++ b/README.md @@ -28,10 +28,10 @@ msgpack is removed, and `import msgpack` fail. ### Compatibility with the old format -You can use ``use_bin_type=False`` option to pack ``bytes`` +You can use `use_bin_type=False` option to pack `bytes` object into raw type in the old msgpack spec, instead of bin type in new msgpack spec. -You can unpack old msgpack format using ``raw=True`` option. +You can unpack old msgpack format using `raw=True` option. It unpacks str (raw) type in msgpack into Python bytes. See note below for detail. @@ -42,23 +42,23 @@ See note below for detail. * Python 2 * The extension module does not support Python 2 anymore. - The pure Python implementation (``msgpack.fallback``) is used for Python 2. + The pure Python implementation (`msgpack.fallback`) is used for Python 2. * Packer - * ``use_bin_type=True`` by default. bytes are encoded in bin type in msgpack. + * `use_bin_type=True` by default. bytes are encoded in bin type in msgpack. **If you are still sing Python 2, you must use unicode for all string types.** - You can use ``use_bin_type=False`` to encode into old msgpack format. - * ``encoding`` option is removed. UTF-8 is used always. + You can use `use_bin_type=False` to encode into old msgpack format. + * `encoding` option is removed. UTF-8 is used always. * Unpacker - * ``raw=False`` by default. It assumes str types are valid UTF-8 string + * `raw=False` by default. It assumes str types are valid UTF-8 string and decode them to Python str (unicode) object. - * ``encoding`` option is removed. You can use ``raw=True`` to support old format. - * Default value of ``max_buffer_size`` is changed from 0 to 100 MiB. - * Default value of ``strict_map_key`` is changed to True to avoid hashdos. - You need to pass ``strict_map_key=False`` if you have data which contain map keys + * `encoding` option is removed. You can use `raw=True` to support old format. + * Default value of `max_buffer_size` is changed from 0 to 100 MiB. + * Default value of `strict_map_key` is changed to True to avoid hashdos. + You need to pass `strict_map_key=False` if you have data which contain map keys which type is not bytes or str. @@ -70,10 +70,10 @@ See note below for detail. ### Pure Python implementation -The extension module in msgpack (``msgpack._cmsgpack``) does not support +The extension module in msgpack (`msgpack._cmsgpack`) does not support Python 2 and PyPy. -But msgpack provides a pure Python implementation (``msgpack.fallback``) +But msgpack provides a pure Python implementation (`msgpack.fallback`) for PyPy and Python 2. Since the [pip](https://pip.pypa.io/) uses the pure Python implementation, @@ -89,18 +89,18 @@ Without extension, using pure Python implementation on CPython runs slowly. ## How to use -NOTE: In examples below, I use ``raw=False`` and ``use_bin_type=True`` for users +NOTE: In examples below, I use `raw=False` and `use_bin_type=True` for users using msgpack < 1.0. These options are default from msgpack 1.0 so you can omit them. ### One-shot pack & unpack -Use ``packb`` for packing and ``unpackb`` for unpacking. -msgpack provides ``dumps`` and ``loads`` as an alias for compatibility with -``json`` and ``pickle``. +Use `packb` for packing and `unpackb` for unpacking. +msgpack provides `dumps` and `loads` as an alias for compatibility with +`json` and `pickle`. -``pack`` and ``dump`` packs to a file-like object. -``unpack`` and ``load`` unpacks from a file-like object. +`pack` and `dump` packs to a file-like object. +`unpack` and `load` unpacks from a file-like object. ```pycon >>> import msgpack @@ -110,14 +110,14 @@ msgpack provides ``dumps`` and ``loads`` as an alias for compatibility with [1, 2, 3] ``` -``unpack`` unpacks msgpack's array to Python's list, but can also unpack to tuple: +`unpack` unpacks msgpack's array to Python's list, but can also unpack to tuple: ```pycon >>> msgpack.unpackb(b'\x93\x01\x02\x03', use_list=False, raw=False) (1, 2, 3) ``` -You should always specify the ``use_list`` keyword argument for backward compatibility. +You should always specify the `use_list` keyword argument for backward compatibility. See performance issues relating to `use_list option`_ below. Read the docstring for other options. @@ -125,8 +125,8 @@ Read the docstring for other options. ### Streaming unpacking -``Unpacker`` is a "streaming unpacker". It unpacks multiple objects from one -stream (or from bytes provided through its ``feed`` method). +`Unpacker` is a "streaming unpacker". It unpacks multiple objects from one +stream (or from bytes provided through its `feed` method). ```py import msgpack @@ -147,7 +147,7 @@ stream (or from bytes provided through its ``feed`` method). ### Packing/unpacking of custom data type It is also possible to pack/unpack custom data types. Here is an example for -``datetime.datetime``. +`datetime.datetime`. ```py import datetime @@ -173,8 +173,8 @@ It is also possible to pack/unpack custom data types. Here is an example for this_dict_again = msgpack.unpackb(packed_dict, object_hook=decode_datetime, raw=False) ``` -``Unpacker``'s ``object_hook`` callback receives a dict; the -``object_pairs_hook`` callback may instead be used to receive a list of +`Unpacker`'s `object_hook` callback receives a dict; the +`object_pairs_hook` callback may instead be used to receive a list of key-value pairs. @@ -207,8 +207,8 @@ It is also possible to pack/unpack custom data types using the **ext** type. ### Advanced unpacking control -As an alternative to iteration, ``Unpacker`` objects provide ``unpack``, -``skip``, ``read_array_header`` and ``read_map_header`` methods. The former two +As an alternative to iteration, `Unpacker` objects provide `unpack`, +`skip`, `read_array_header` and `read_map_header` methods. The former two read an entire message from the stream, respectively de-serialising and returning the result, or ignoring it. The latter two methods return the number of elements in the upcoming container, so that each element in an array, or key-value pair @@ -222,8 +222,8 @@ in a map, can be unpacked or skipped individually. Early versions of msgpack didn't distinguish string and binary types. The type for representing both string and binary types was named **raw**. -You can pack into and unpack from this old spec using ``use_bin_type=False`` -and ``raw=True`` options. +You can pack into and unpack from this old spec using `use_bin_type=False` +and `raw=True` options. ```pycon >>> import msgpack @@ -235,7 +235,7 @@ and ``raw=True`` options. ### ext type -To use the **ext** type, pass ``msgpack.ExtType`` object to packer. +To use the **ext** type, pass `msgpack.ExtType` object to packer. ```pycon >>> import msgpack @@ -244,7 +244,7 @@ To use the **ext** type, pass ``msgpack.ExtType`` object to packer. ExtType(code=42, data='xyzzy') ``` -You can use it with ``default`` and ``ext_hook``. See below. +You can use it with `default` and `ext_hook`. See below. ### Security @@ -252,24 +252,24 @@ You can use it with ``default`` and ``ext_hook``. See below. To unpacking data received from unreliable source, msgpack provides two security options. -``max_buffer_size`` (default: 100*1024*1024) limits the internal buffer size. +`max_buffer_size` (default: `100*1024*1024`) limits the internal buffer size. It is used to limit the preallocated list size too. -``strict_map_key`` (default: ``True``) limits the type of map keys to bytes and str. +`strict_map_key` (default: `True`) limits the type of map keys to bytes and str. While msgpack spec doesn't limit the types of the map keys, there is a risk of the hashdos. -If you need to support other types for map keys, use ``strict_map_key=False``. +If you need to support other types for map keys, use `strict_map_key=False`. ### Performance tips CPython's GC starts when growing allocated object. This means unpacking may cause useless GC. -You can use ``gc.disable()`` when unpacking large message. +You can use `gc.disable()` when unpacking large message. List is the default sequence type of Python. But tuple is lighter than list. -You can use ``use_list=False`` while unpacking when performance is important. +You can use `use_list=False` while unpacking when performance is important. ## Development From 9d79351e99e435b8ca749d57a313441783f67133 Mon Sep 17 00:00:00 2001 From: Inada Naoki Date: Thu, 6 Feb 2020 22:11:04 +0900 Subject: [PATCH 229/349] Add some test for timestamp (#403) --- .github/workflows/black.yaml | 2 +- msgpack/fallback.py | 8 +------- test/test_format.py | 19 +++++++------------ test/test_pack.py | 14 +++----------- test/test_stricttype.py | 5 +---- test/test_timestamp.py | 27 +++++++++++++++++++++++++++ 6 files changed, 40 insertions(+), 35 deletions(-) diff --git a/.github/workflows/black.yaml b/.github/workflows/black.yaml index cabd0ccf..eda8d076 100644 --- a/.github/workflows/black.yaml +++ b/.github/workflows/black.yaml @@ -18,4 +18,4 @@ jobs: - name: Black Code Formatter run: | pip install black - black --diff --check msgpack/ test/ + black --diff --check msgpack/ test/ setup.py diff --git a/msgpack/fallback.py b/msgpack/fallback.py index 263e74ef..9f6665b3 100644 --- a/msgpack/fallback.py +++ b/msgpack/fallback.py @@ -77,13 +77,7 @@ def getvalue(self): newlist_hint = lambda size: [] -from .exceptions import ( - BufferFull, - OutOfData, - ExtraData, - FormatError, - StackError, -) +from .exceptions import BufferFull, OutOfData, ExtraData, FormatError, StackError from .ext import ExtType, Timestamp diff --git a/test/test_format.py b/test/test_format.py index d455f7ce..fbbc3f98 100644 --- a/test/test_format.py +++ b/test/test_format.py @@ -9,29 +9,24 @@ def check(src, should, use_list=0, raw=True): def testSimpleValue(): - check(b"\x93\xc0\xc2\xc3", (None, False, True,)) + check(b"\x93\xc0\xc2\xc3", (None, False, True)) def testFixnum(): - check(b"\x92\x93\x00\x40\x7f\x93\xe0\xf0\xff", ((0, 64, 127,), (-32, -16, -1,),)) + check(b"\x92\x93\x00\x40\x7f\x93\xe0\xf0\xff", ((0, 64, 127), (-32, -16, -1))) def testFixArray(): - check( - b"\x92\x90\x91\x91\xc0", ((), ((None,),),), - ) + check(b"\x92\x90\x91\x91\xc0", ((), ((None,),))) def testFixRaw(): - check( - b"\x94\xa0\xa1a\xa2bc\xa3def", (b"", b"a", b"bc", b"def",), - ) + check(b"\x94\xa0\xa1a\xa2bc\xa3def", (b"", b"a", b"bc", b"def")) def testFixMap(): check( - b"\x82\xc2\x81\xc0\xc0\xc3\x81\xc0\x80", - {False: {None: None}, True: {None: {}}}, + b"\x82\xc2\x81\xc0\xc0\xc3\x81\xc0\x80", {False: {None: None}, True: {None: {}}} ) @@ -40,7 +35,7 @@ def testUnsignedInt(): b"\x99\xcc\x00\xcc\x80\xcc\xff\xcd\x00\x00\xcd\x80\x00" b"\xcd\xff\xff\xce\x00\x00\x00\x00\xce\x80\x00\x00\x00" b"\xce\xff\xff\xff\xff", - (0, 128, 255, 0, 32768, 65535, 0, 2147483648, 4294967295,), + (0, 128, 255, 0, 32768, 65535, 0, 2147483648, 4294967295), ) @@ -49,7 +44,7 @@ def testSignedInt(): b"\x99\xd0\x00\xd0\x80\xd0\xff\xd1\x00\x00\xd1\x80\x00" b"\xd1\xff\xff\xd2\x00\x00\x00\x00\xd2\x80\x00\x00\x00" b"\xd2\xff\xff\xff\xff", - (0, -128, -1, 0, -32768, -1, 0, -2147483648, -1,), + (0, -128, -1, 0, -32768, -1, 0, -2147483648, -1), ) diff --git a/test/test_pack.py b/test/test_pack.py index 932f7608..a51d84c9 100644 --- a/test/test_pack.py +++ b/test/test_pack.py @@ -49,7 +49,7 @@ def testPack(): False, (), ((),), - ((), None,), + ((), None), {None: 0}, (1 << 23), ] @@ -69,21 +69,13 @@ def testPackUnicode(): def testPackBytes(): - test_data = [ - b"", - b"abcd", - (b"defgh",), - ] + test_data = [b"", b"abcd", (b"defgh",)] for td in test_data: check(td) def testPackByteArrays(): - test_data = [ - bytearray(b""), - bytearray(b"abcd"), - (bytearray(b"defgh"),), - ] + test_data = [bytearray(b""), bytearray(b"abcd"), (bytearray(b"defgh"),)] for td in test_data: check(td) diff --git a/test/test_stricttype.py b/test/test_stricttype.py index 78e1723f..fe9ec6cd 100644 --- a/test/test_stricttype.py +++ b/test/test_stricttype.py @@ -22,10 +22,7 @@ def test_tuple(): def default(o): if isinstance(o, tuple): - return { - "__type__": "tuple", - "value": list(o), - } + return {"__type__": "tuple", "value": list(o)} raise TypeError("Unsupported type %s" % (type(o),)) def convert(o): diff --git a/test/test_timestamp.py b/test/test_timestamp.py index ba5611ca..823fe04e 100644 --- a/test/test_timestamp.py +++ b/test/test_timestamp.py @@ -46,6 +46,33 @@ def test_timestamp(): assert ts == unpacked +def test_unpack_timestamp(): + # timestamp 32 + assert msgpack.unpackb(b"\xd6\xff\x00\x00\x00\x00") == Timestamp(0) + + # timestamp 64 + assert msgpack.unpackb(b"\xd7\xff" + b"\x00" * 8) == Timestamp(0) + with pytest.raises(ValueError): + msgpack.unpackb(b"\xd7\xff" + b"\xff" * 8) + + # timestamp 96 + assert msgpack.unpackb(b"\xc7\x0c\xff" + b"\x00" * 12) == Timestamp(0) + with pytest.raises(ValueError): + msgpack.unpackb(b"\xc7\x0c\xff" + b"\xff" * 12) == Timestamp(0) + + # Undefined + with pytest.raises(ValueError): + msgpack.unpackb(b"\xd4\xff\x00") # fixext 1 + with pytest.raises(ValueError): + msgpack.unpackb(b"\xd5\xff\x00\x00") # fixext 2 + with pytest.raises(ValueError): + msgpack.unpackb(b"\xc7\x00\xff") # ext8 (len=0) + with pytest.raises(ValueError): + msgpack.unpackb(b"\xc7\x03\xff\0\0\0") # ext8 (len=3) + with pytest.raises(ValueError): + msgpack.unpackb(b"\xc7\x05\xff\0\0\0\0\0") # ext8 (len=5) + + def test_timestamp_from(): t = Timestamp(42, 14000) assert Timestamp.from_unix(42.000014) == t From f0952f1dd657e3f8437907bfe13885e61d5367fe Mon Sep 17 00:00:00 2001 From: Erik Cederstrand Date: Fri, 14 Feb 2020 04:31:09 +0100 Subject: [PATCH 230/349] travis: Python 3.9 is the new dev version. (#405) --- .travis.yml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/.travis.yml b/.travis.yml index 852674de..5132b4c4 100644 --- a/.travis.yml +++ b/.travis.yml @@ -13,7 +13,8 @@ python: - "3.5" - "3.6" - "3.7" - - "3.8-dev" + - "3.8" + - "3.9-dev" _pure: &pure From cd6561db520de20c681bb50022cec621c0deda82 Mon Sep 17 00:00:00 2001 From: Alex Willmer Date: Fri, 14 Feb 2020 06:45:17 +0000 Subject: [PATCH 231/349] build: Don't test C extension on CPython 2.7 under Tox (#406) As the Changelog notes, release 1.0 will drop support for the native extension on CPython 2.x. So there seems little benefit of testing it. --- tox.ini | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/tox.ini b/tox.ini index 4b059ffe..607b182e 100644 --- a/tox.ini +++ b/tox.ini @@ -1,5 +1,10 @@ [tox] -envlist = {py27,py35,py36,py37,py38}-{c,pure},{pypy,pypy3}-pure,py27-x86,py34-x86 +envlist = + py27-pure, + {py35,py36,py37,py38}-{c,pure}, + {pypy,pypy3}-pure, + py27-x86, + py34-x86, [variants:pure] setenv= From fcb19a0e1a86d80c28447b0008504c3f4e2faf59 Mon Sep 17 00:00:00 2001 From: Alex Willmer Date: Fri, 14 Feb 2020 06:51:19 +0000 Subject: [PATCH 232/349] Clean msgpack/_cmsgpack.cpp and msgpack/_cmsgpack.*.so (#407) --- Makefile | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/Makefile b/Makefile index e2f25cfd..b4749b39 100644 --- a/Makefile +++ b/Makefile @@ -23,7 +23,8 @@ serve-doc: all .PHONY: clean clean: rm -rf build - rm -f msgpack/_msgpack.cpp + rm -f msgpack/_cmsgpack.cpp + rm -f msgpack/_cmsgpack.*.so rm -rf msgpack/__pycache__ rm -rf test/__pycache__ From 64f59884a1a56a882ae888af354de12d5eb052a8 Mon Sep 17 00:00:00 2001 From: Inada Naoki Date: Mon, 17 Feb 2020 16:58:25 +0900 Subject: [PATCH 233/349] Add note --- DEVELOP.md | 9 +++++++++ 1 file changed, 9 insertions(+) create mode 100644 DEVELOP.md diff --git a/DEVELOP.md b/DEVELOP.md new file mode 100644 index 00000000..0e6e0e5e --- /dev/null +++ b/DEVELOP.md @@ -0,0 +1,9 @@ +# Developer's note + +## Wheels + +Wheels for macOS and Linux are built on Travis and AppVeyr, in +[methane/msgpack-wheels](https://github.com/methane/msgpack-wheels) repository. + +Wheels for Windows are built on Github Actions in this repository. + From fa7d7447fc2cc7cc1a8b388618549c98d1712b9c Mon Sep 17 00:00:00 2001 From: Inada Naoki Date: Mon, 17 Feb 2020 17:07:18 +0900 Subject: [PATCH 234/349] 1.0.0 --- ChangeLog.rst | 2 +- msgpack/_version.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/ChangeLog.rst b/ChangeLog.rst index c70b966c..55cfd5d2 100644 --- a/ChangeLog.rst +++ b/ChangeLog.rst @@ -1,7 +1,7 @@ 1.0.0 ===== -Release Date: TBD +Release Date: 2020-02-17 * Remove Python 2 support from the ``msgpack/_cmsgpack``. ``msgpack/fallback`` still supports Python 2. diff --git a/msgpack/_version.py b/msgpack/_version.py index 56f67e4e..9f55cf50 100644 --- a/msgpack/_version.py +++ b/msgpack/_version.py @@ -1 +1 @@ -version = (1, 0, 0, "rc1") +version = (1, 0, 0) From 12506d8d91ce5a7b76c2d8babe7e2d1e2851d3a2 Mon Sep 17 00:00:00 2001 From: Inada Naoki Date: Mon, 17 Feb 2020 17:12:47 +0900 Subject: [PATCH 235/349] update README --- DEVELOP.md | 16 ++++++++++++++++ README.md | 12 ------------ 2 files changed, 16 insertions(+), 12 deletions(-) diff --git a/DEVELOP.md b/DEVELOP.md index 0e6e0e5e..9c823c34 100644 --- a/DEVELOP.md +++ b/DEVELOP.md @@ -7,3 +7,19 @@ Wheels for macOS and Linux are built on Travis and AppVeyr, in Wheels for Windows are built on Github Actions in this repository. + +### Build + +``` +$ make cython +``` + + +### Test + +MessagePack uses `pytest` for testing. +Run test with following command: + +``` +$ make test +``` diff --git a/README.md b/README.md index 03552866..78bc0cf5 100644 --- a/README.md +++ b/README.md @@ -270,15 +270,3 @@ You can use `gc.disable()` when unpacking large message. List is the default sequence type of Python. But tuple is lighter than list. You can use `use_list=False` while unpacking when performance is important. - - -## Development - -### Test - -MessagePack uses `pytest` for testing. -Run test with following command: - -``` - $ make test -``` From 2849f5582ab154ade7fcd8c23109b1c7bd8d1530 Mon Sep 17 00:00:00 2001 From: Inada Naoki Date: Wed, 19 Feb 2020 00:53:01 +0900 Subject: [PATCH 236/349] Build linux and macOS wheels on GitHub Actions. (#409) --- .github/workflows/linux.yml | 67 ++++++++++++++++++++++++++++++++ .github/workflows/mac.yml | 76 +++++++++++++++++++++++++++++++++++++ Makefile | 8 ++-- docker/buildwheel.sh | 8 +++- 4 files changed, 154 insertions(+), 5 deletions(-) create mode 100644 .github/workflows/linux.yml create mode 100644 .github/workflows/mac.yml diff --git a/.github/workflows/linux.yml b/.github/workflows/linux.yml new file mode 100644 index 00000000..89bdb4e4 --- /dev/null +++ b/.github/workflows/linux.yml @@ -0,0 +1,67 @@ +name: Build Linux Wheels +on: + push: + pull_request: + create: + +jobs: + build: + runs-on: ubuntu-latest + steps: + - name: Checkout + uses: actions/checkout@v1 + + - name: Set up Python 3.8 + uses: actions/setup-python@v1 + with: + python-version: 3.8 + + - name: Cythonize + shell: bash + run: | + pip install -U pip + pip -V + pip install -r requirements.txt + make cython + #python setup.py sdist + + - name: Build wheels + shell: bash + run: | + make linux-wheel + + - name: Run test (3.8) + run: | + pip install pytest + pip install -v msgpack --only-binary :all: --no-index -f dist/wheelhouse + pytest -v test + + + - name: Set up Python 3.7 + uses: actions/setup-python@v1 + with: + python-version: 3.7 + + - name: Run test (3.7) + run: | + pip install pytest + pip install -v msgpack --only-binary :all: --no-index -f dist/wheelhouse + pytest -v test + + - name: Set up Python 3.6 + uses: actions/setup-python@v1 + with: + python-version: 3.6 + + - name: Run test (3.6) + run: | + pip install pytest + pip install -v msgpack --only-binary :all: --no-index -f dist/wheelhouse + pytest -v test + + + - name: Upload Wheels + uses: actions/upload-artifact@v1 + with: + name: linux-wheels + path: ./dist/wheelhouse/ diff --git a/.github/workflows/mac.yml b/.github/workflows/mac.yml new file mode 100644 index 00000000..fb2c67f4 --- /dev/null +++ b/.github/workflows/mac.yml @@ -0,0 +1,76 @@ +name: Build macOS Wheels +on: + push: + pull_request: + create: + +jobs: + build: + runs-on: macos-latest + + steps: + - name: Checkout + uses: actions/checkout@v1 + + - name: Set up Python 3.8 + uses: actions/setup-python@v1 + with: + python-version: "3.8" + + - name: Cythonize + run: | + pip install -U pip + pip install -r requirements.txt + make cython + + - name: Build wheel + run: | + pip install setuptools wheel + python setup.py bdist_wheel + + - name: Run test + run: | + pip install pytest + pip install -v msgpack --only-binary :all: -f dist/ --no-index + pytest -v test + + + - name: Set up Python 3.7 + uses: actions/setup-python@v1 + with: + python-version: "3.7" + + - name: Build wheel + run: | + pip install setuptools wheel + python setup.py bdist_wheel + + - name: Run test + run: | + pip install pytest + pip install -v msgpack --only-binary :all: -f dist/ --no-index + pytest -v test + + + - name: Set up Python 3.6 + uses: actions/setup-python@v1 + with: + python-version: "3.6" + + - name: Build wheel + run: | + pip install setuptools wheel + python setup.py bdist_wheel + + - name: Run test + run: | + pip install pytest + pip install -v msgpack --only-binary :all: -f dist/ --no-index + pytest -v test + + + - name: Upload Wheels + uses: actions/upload-artifact@v1 + with: + name: macos-wheels + path: ./dist/ diff --git a/Makefile b/Makefile index b4749b39..2a4c0af8 100644 --- a/Makefile +++ b/Makefile @@ -30,10 +30,10 @@ clean: .PHONY: update-docker update-docker: - docker pull quay.io/pypa/manylinux1_i686 - docker pull quay.io/pypa/manylinux1_x86_64 + docker pull quay.io/pypa/manylinux2010_i686 + docker pull quay.io/pypa/manylinux2010_x86_64 .PHONY: linux-wheel linux-wheel: - docker run --rm -ti -v `pwd`:/project -w /project quay.io/pypa/manylinux1_i686 bash docker/buildwheel.sh - docker run --rm -ti -v `pwd`:/project -w /project quay.io/pypa/manylinux1_x86_64 bash docker/buildwheel.sh + docker run --rm -v `pwd`:/project -w /project quay.io/pypa/manylinux2010_i686 bash docker/buildwheel.sh + docker run --rm -v `pwd`:/project -w /project quay.io/pypa/manylinux2010_x86_64 bash docker/buildwheel.sh diff --git a/docker/buildwheel.sh b/docker/buildwheel.sh index c953127d..89a25706 100644 --- a/docker/buildwheel.sh +++ b/docker/buildwheel.sh @@ -10,5 +10,11 @@ echo "arch=$ARCH" for V in "${PYTHON_VERSIONS[@]}"; do PYBIN=/opt/python/$V/bin rm -rf build/ # Avoid lib build by narrow Python is used by wide python - $PYBIN/python setup.py bdist_wheel -p manylinux1_${ARCH} + $PYBIN/python setup.py bdist_wheel +done + +cd dist +for whl in *.whl; do + auditwheel repair "$whl" + rm "$whl" done From 2bfc2d0566e24594460078680ad3bd0dc71892ad Mon Sep 17 00:00:00 2001 From: Charles-Axel Dein Date: Mon, 24 Feb 2020 09:51:56 +0100 Subject: [PATCH 237/349] Upgrade msgpack if already installed (#414) --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 78bc0cf5..921f7f85 100644 --- a/README.md +++ b/README.md @@ -16,7 +16,7 @@ This package provides CPython bindings for reading and writing MessagePack data. ### PyPI package name TL;DR: When upgrading from msgpack-0.4 or earlier, don't do `pip install -U msgpack-python`. -Do `pip uninstall msgpack-python; pip install msgpack` instead. +Do `pip uninstall msgpack-python; pip install -U msgpack` instead. Package name on PyPI was changed to msgpack from 0.5. I upload transitional package (msgpack-python 0.5 which depending on msgpack) From 692e0ee8ff66686dd423aae69b248b67c3bf9ed4 Mon Sep 17 00:00:00 2001 From: Dan Salmon Date: Wed, 18 Mar 2020 00:29:51 +0000 Subject: [PATCH 238/349] Fix typo (#416) --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 921f7f85..aeeee3a6 100644 --- a/README.md +++ b/README.md @@ -47,7 +47,7 @@ See note below for detail. * Packer * `use_bin_type=True` by default. bytes are encoded in bin type in msgpack. - **If you are still sing Python 2, you must use unicode for all string types.** + **If you are still using Python 2, you must use unicode for all string types.** You can use `use_bin_type=False` to encode into old msgpack format. * `encoding` option is removed. UTF-8 is used always. From 4e10222b5116806864a91fd9f79a70869e0a43c1 Mon Sep 17 00:00:00 2001 From: Kevin Tewouda Date: Wed, 13 May 2020 06:41:15 +0200 Subject: [PATCH 239/349] Fix an example in README.md (#423) --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index aeeee3a6..ac52d94b 100644 --- a/README.md +++ b/README.md @@ -159,7 +159,7 @@ It is also possible to pack/unpack custom data types. Here is an example for } def decode_datetime(obj): - if b'__datetime__' in obj: + if '__datetime__' in obj: obj = datetime.datetime.strptime(obj["as_str"], "%Y%m%dT%H:%M:%S.%f") return obj From b04690012d5d77cfe5074893686c4d55ec780300 Mon Sep 17 00:00:00 2001 From: Inada Naoki Date: Sun, 24 May 2020 02:15:04 +0900 Subject: [PATCH 240/349] Update doc version Fixes #425 --- docs/conf.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/conf.py b/docs/conf.py index 36fa76e3..6b432be0 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -41,7 +41,7 @@ # General information about the project. project = u"msgpack" -copyright = u"2013, INADA Naoki" +copyright = u"Inada Naoki" # The version info for the project you're documenting, acts as replacement for # |version| and |release|, also used in various other places throughout the @@ -49,7 +49,7 @@ # # The short X.Y version. # The full version, including alpha/beta/rc tags. -version = release = "0.5" +version = release = "1.0" # The language for content autogenerated by Sphinx. Refer to documentation # for a list of supported languages. From c1b1a23f62d5e0ec39a1910d2e9580ce1c13a1cf Mon Sep 17 00:00:00 2001 From: jfolz Date: Mon, 8 Jun 2020 05:14:50 +0200 Subject: [PATCH 241/349] Fix Unpacker.tell() (#427) Fixes #426. Co-authored-by: folz --- msgpack/_unpacker.pyx | 10 ++++++++-- msgpack/fallback.py | 15 ++++++++------- test/test_unpack.py | 29 +++++++++++++++++++++++++++++ 3 files changed, 45 insertions(+), 9 deletions(-) diff --git a/msgpack/_unpacker.pyx b/msgpack/_unpacker.pyx index 43c93a2c..4340e044 100644 --- a/msgpack/_unpacker.pyx +++ b/msgpack/_unpacker.pyx @@ -484,8 +484,10 @@ cdef class Unpacker(object): nread = min(self.buf_tail - self.buf_head, nbytes) ret = PyBytes_FromStringAndSize(self.buf + self.buf_head, nread) self.buf_head += nread - if len(ret) < nbytes and self.file_like is not None: - ret += self.file_like.read(nbytes - len(ret)) + if nread < nbytes and self.file_like is not None: + ret += self.file_like.read(nbytes - nread) + nread = len(ret) + self.stream_offset += nread return ret def unpack(self): @@ -519,6 +521,10 @@ cdef class Unpacker(object): return self._unpack(read_map_header) def tell(self): + """Returns the current position of the Unpacker in bytes, i.e., the + number of bytes that were read from the input, also the starting + position of the next object. + """ return self.stream_offset def __iter__(self): diff --git a/msgpack/fallback.py b/msgpack/fallback.py index 9f6665b3..1e0bbe91 100644 --- a/msgpack/fallback.py +++ b/msgpack/fallback.py @@ -365,18 +365,19 @@ def _get_extradata(self): return self._buffer[self._buff_i :] def read_bytes(self, n): - ret = self._read(n) + ret = self._read(n, raise_outofdata=False) self._consume() return ret - def _read(self, n): + def _read(self, n, raise_outofdata=True): # (int) -> bytearray - self._reserve(n) + self._reserve(n, raise_outofdata=raise_outofdata) i = self._buff_i - self._buff_i = i + n - return self._buffer[i : i + n] + ret = self._buffer[i : i + n] + self._buff_i = i + len(ret) + return ret - def _reserve(self, n): + def _reserve(self, n, raise_outofdata=True): remain_bytes = len(self._buffer) - self._buff_i - n # Fast path: buffer has n bytes already @@ -404,7 +405,7 @@ def _reserve(self, n): self._buffer += read_data remain_bytes -= len(read_data) - if len(self._buffer) < n + self._buff_i: + if len(self._buffer) < n + self._buff_i and raise_outofdata: self._buff_i = 0 # rollback raise OutOfData diff --git a/test/test_unpack.py b/test/test_unpack.py index bc74c4dd..057b7bf4 100644 --- a/test/test_unpack.py +++ b/test/test_unpack.py @@ -3,6 +3,11 @@ from msgpack import Unpacker, packb, OutOfData, ExtType from pytest import raises, mark +try: + from itertools import izip as zip +except ImportError: + pass + def test_unpack_array_header_from_file(): f = BytesIO(packb([1, 2, 3, 4])) @@ -64,7 +69,31 @@ def _hook(self, code, data): assert unpacker.unpack() == {"a": ExtType(2, b"321")} +def test_unpacker_tell(): + objects = 1, 2, u"abc", u"def", u"ghi" + packed = b"\x01\x02\xa3abc\xa3def\xa3ghi" + positions = 1, 2, 6, 10, 14 + unpacker = Unpacker(BytesIO(packed)) + for obj, unp, pos in zip(objects, unpacker, positions): + assert obj == unp + assert pos == unpacker.tell() + + +def test_unpacker_tell_read_bytes(): + objects = 1, u"abc", u"ghi" + packed = b"\x01\x02\xa3abc\xa3def\xa3ghi" + raw_data = b"\x02", b"\xa3def", b"" + lenghts = 1, 4, 999 + positions = 1, 6, 14 + unpacker = Unpacker(BytesIO(packed)) + for obj, unp, pos, n, raw in zip(objects, unpacker, positions, lenghts, raw_data): + assert obj == unp + assert pos == unpacker.tell() + assert unpacker.read_bytes(n) == raw + + if __name__ == "__main__": test_unpack_array_header_from_file() test_unpacker_hook_refcnt() test_unpacker_ext_hook() + test_unpacker_tell() From 3508ca524ebb83b0117a0cc9a08986d933ddb022 Mon Sep 17 00:00:00 2001 From: Contextualist Date: Sun, 21 Jun 2020 22:27:52 -0400 Subject: [PATCH 242/349] Fix benchmark extension module import (#428) --- benchmark/benchmark.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/benchmark/benchmark.py b/benchmark/benchmark.py index 82d0ddbf..2e778dd2 100644 --- a/benchmark/benchmark.py +++ b/benchmark/benchmark.py @@ -1,7 +1,7 @@ from msgpack import fallback try: - from msgpack import _unpacker, _packer + from msgpack import _cmsgpack has_ext = True except ImportError: @@ -17,14 +17,14 @@ def profile(name, func): def simple(name, data): if has_ext: - packer = _packer.Packer() + packer = _cmsgpack.Packer() profile("packing %s (ext)" % name, lambda: packer.pack(data)) packer = fallback.Packer() profile("packing %s (fallback)" % name, lambda: packer.pack(data)) data = packer.pack(data) if has_ext: - profile("unpacking %s (ext)" % name, lambda: _unpacker.unpackb(data)) + profile("unpacking %s (ext)" % name, lambda: _cmsgpack.unpackb(data)) profile("unpacking %s (fallback)" % name, lambda: fallback.unpackb(data)) From d9ead81021c7b5f034a0475bf9a88e9612cc8e84 Mon Sep 17 00:00:00 2001 From: Markus Gerstel <2102431+Anthchirp@users.noreply.github.com> Date: Fri, 26 Jun 2020 10:15:46 +0100 Subject: [PATCH 243/349] Fix a typo in the changelog (#429) --- ChangeLog.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ChangeLog.rst b/ChangeLog.rst index 55cfd5d2..d922e847 100644 --- a/ChangeLog.rst +++ b/ChangeLog.rst @@ -6,7 +6,7 @@ Release Date: 2020-02-17 * Remove Python 2 support from the ``msgpack/_cmsgpack``. ``msgpack/fallback`` still supports Python 2. * Remove ``encoding`` option from the Packer and Unpacker. -* Unpacker: The default value of ``max_buffer_type`` is changed to 100MiB. +* Unpacker: The default value of ``max_buffer_size`` is changed to 100MiB. * Unpacker: ``strict_map_key`` is True by default now. * Unpacker: String map keys are interned. * Drop old buffer protocol support. From 5614dd5a898772faa2dfe89bd9f6a5c90c5fafe5 Mon Sep 17 00:00:00 2001 From: Tom Pohl Date: Thu, 23 Jul 2020 10:53:55 +0200 Subject: [PATCH 244/349] Allow for timestamps before UNIX epoch (#433) --- msgpack/ext.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/msgpack/ext.py b/msgpack/ext.py index 8341c68b..4eb9dd65 100644 --- a/msgpack/ext.py +++ b/msgpack/ext.py @@ -178,7 +178,9 @@ def to_datetime(self): :rtype: datetime. """ - return datetime.datetime.fromtimestamp(self.to_unix(), _utc) + return datetime.datetime.fromtimestamp(0, _utc) + datetime.timedelta( + seconds=self.to_unix() + ) @staticmethod def from_datetime(dt): From 772c830841a276adb392dd449809764b2826b1f8 Mon Sep 17 00:00:00 2001 From: Peter Fischer Date: Fri, 24 Jul 2020 09:29:15 +0200 Subject: [PATCH 245/349] Synchronize handling of datetime in Packer implementations (#434) The handling of datetime is different in the cython and Python implementations. In contrast to the docs, timezone is not required in the Python implementation. --- msgpack/fallback.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/msgpack/fallback.py b/msgpack/fallback.py index 1e0bbe91..9739d53b 100644 --- a/msgpack/fallback.py +++ b/msgpack/fallback.py @@ -921,7 +921,7 @@ def _pack( len(obj), dict_iteritems(obj), nest_limit - 1 ) - if self._datetime and check(obj, _DateTime): + if self._datetime and check(obj, _DateTime) and obj.tzinfo is not None: obj = Timestamp.from_datetime(obj) default_used = 1 continue From 8fb709f2e0438862020d8810fa70a81fb5dac7d4 Mon Sep 17 00:00:00 2001 From: Peter Fischer Date: Thu, 30 Jul 2020 16:48:51 +0200 Subject: [PATCH 246/349] Fix datetime before epoch on windows in cython implementation (#436) Cython implementation still used datetime.from_timestamp method, which does not work on windows. Update the cython implementation to use utc time and delta and add a regression test to highlight the issue. --- Makefile | 1 + msgpack/unpack.h | 34 +++++++++++++++++++++------------- test/test_timestamp.py | 8 ++++++++ 3 files changed, 30 insertions(+), 13 deletions(-) diff --git a/Makefile b/Makefile index 2a4c0af8..0110ddfa 100644 --- a/Makefile +++ b/Makefile @@ -25,6 +25,7 @@ clean: rm -rf build rm -f msgpack/_cmsgpack.cpp rm -f msgpack/_cmsgpack.*.so + rm -f msgpack/_cmsgpack.*.pyd rm -rf msgpack/__pycache__ rm -rf test/__pycache__ diff --git a/msgpack/unpack.h b/msgpack/unpack.h index debdf715..868b96e7 100644 --- a/msgpack/unpack.h +++ b/msgpack/unpack.h @@ -341,7 +341,26 @@ static int unpack_callback_ext(unpack_user* u, const char* base, const char* pos else if (u->timestamp == 0) { // Timestamp py = PyObject_CallFunction(u->timestamp_t, "(Lk)", ts.tv_sec, ts.tv_nsec); } - else { // float or datetime + else if (u->timestamp == 3) { // datetime + // Calculate datetime using epoch + delta + // due to limitations PyDateTime_FromTimestamp on Windows with negative timestamps + PyObject *epoch = PyDateTimeAPI->DateTime_FromDateAndTime(1970, 1, 1, 0, 0, 0, 0, u->utc, PyDateTimeAPI->DateTimeType); + if (epoch == NULL) { + return -1; + } + + PyObject* d = PyDelta_FromDSU(0, ts.tv_sec, ts.tv_nsec / 1000); + if (d == NULL) { + Py_DECREF(epoch); + return -1; + } + + py = PyNumber_Add(epoch, d); + + Py_DECREF(epoch); + Py_DECREF(d); + } + else { // float PyObject *a = PyFloat_FromDouble((double)ts.tv_nsec); if (a == NULL) return -1; @@ -358,18 +377,7 @@ static int unpack_callback_ext(unpack_user* u, const char* base, const char* pos a = PyNumber_Add(b, c); Py_DECREF(b); Py_DECREF(c); - - if (u->timestamp == 3) { // datetime - PyObject *t = PyTuple_Pack(2, a, u->utc); - Py_DECREF(a); - if (t == NULL) { - return -1; - } - py = PyDateTime_FromTimestamp(t); - Py_DECREF(t); - } else { // float - py = a; - } + py = a; } } else { py = PyObject_CallFunction(u->ext_hook, "(iy#)", (int)typecode, pos, (Py_ssize_t)length-1); diff --git a/test/test_timestamp.py b/test/test_timestamp.py index 823fe04e..edc488af 100644 --- a/test/test_timestamp.py +++ b/test/test_timestamp.py @@ -99,6 +99,14 @@ def test_unpack_datetime(): assert unpacked == datetime.datetime(1970, 1, 1, 0, 0, 42, 0, tzinfo=_utc) +@pytest.mark.skipif(sys.version_info[0] == 2, reason="datetime support is PY3+ only") +def test_pack_unpack_before_epoch(): + t_in = datetime.datetime(1960, 1, 1, tzinfo=_utc) + packed = msgpack.packb(t_in, datetime=True) + unpacked = msgpack.unpackb(packed, timestamp=3) + assert unpacked == t_in + + @pytest.mark.skipif(sys.version_info[0] == 2, reason="datetime support is PY3+ only") def test_pack_datetime(): t = Timestamp(42, 14000) From 44bc2bd439808ad7563ef8a558ad6ccfe175a66a Mon Sep 17 00:00:00 2001 From: Inada Naoki Date: Fri, 4 Dec 2020 17:23:09 +0900 Subject: [PATCH 247/349] Update docstring --- msgpack/_packer.pyx | 8 ++++- msgpack/_unpacker.pyx | 80 +++++++++++++++++-------------------------- msgpack/fallback.py | 25 +++++++++++++- 3 files changed, 63 insertions(+), 50 deletions(-) diff --git a/msgpack/_packer.pyx b/msgpack/_packer.pyx index b4706463..e6cd2c7d 100644 --- a/msgpack/_packer.pyx +++ b/msgpack/_packer.pyx @@ -63,7 +63,7 @@ cdef class Packer(object): """ MessagePack Packer - usage:: + Usage:: packer = Packer() astream.write(packer.pack(a)) @@ -94,6 +94,12 @@ cdef class Packer(object): This is useful when trying to implement accurate serialization for python types. + :param bool datetime: + If set to true, datetime with tzinfo is packed into Timestamp type. + Note that the tzinfo is stripped in the timestamp. + You can get UTC datetime with `timestamp=3` option of the Unpacker. + (Python 2 is not supported). + :param str unicode_errors: The error handler for encoding unicode. (default: 'strict') DO NOT USE THIS!! This option is kept for very specific usage. diff --git a/msgpack/_unpacker.pyx b/msgpack/_unpacker.pyx index 4340e044..e4f3f1e4 100644 --- a/msgpack/_unpacker.pyx +++ b/msgpack/_unpacker.pyx @@ -212,65 +212,49 @@ def unpackb(object packed, *, object object_hook=None, object list_hook=None, cdef class Unpacker(object): - """Streaming unpacker. - - Arguments: - - :param file_like: - File-like object having `.read(n)` method. - If specified, unpacker reads serialized data from it and :meth:`feed()` is not usable. - - :param int read_size: - Used as `file_like.read(read_size)`. (default: `min(1024**2, max_buffer_size)`) - - :param bool use_list: - If true, unpack msgpack array to Python list. - Otherwise, unpack to Python tuple. (default: True) - - :param bool raw: - If true, unpack msgpack raw to Python bytes. - Otherwise, unpack to Python str by decoding with UTF-8 encoding (default). + """ + MessagePack Packer - :param bool strict_map_key: - If true (default), only str or bytes are accepted for map (dict) keys. + Usage:: - :param callable object_hook: - When specified, it should be callable. - Unpacker calls it with a dict argument after unpacking msgpack map. - (See also simplejson) + packer = Packer() + astream.write(packer.pack(a)) + astream.write(packer.pack(b)) - :param callable object_pairs_hook: - When specified, it should be callable. - Unpacker calls it with a list of key-value pairs after unpacking msgpack map. - (See also simplejson) + Packer's constructor has some keyword arguments: - :param int max_buffer_size: - Limits size of data waiting unpacked. 0 means system's INT_MAX. - The default value is 100*1024*1024 (100MiB). - Raises `BufferFull` exception when it is insufficient. - You should set this parameter when unpacking data from untrusted source. + :param callable default: + Convert user type to builtin type that Packer supports. + See also simplejson's document. - :param int max_str_len: - Deprecated, use *max_buffer_size* instead. - Limits max length of str. (default: max_buffer_size) + :param bool use_single_float: + Use single precision float type for float. (default: False) - :param int max_bin_len: - Deprecated, use *max_buffer_size* instead. - Limits max length of bin. (default: max_buffer_size) + :param bool autoreset: + Reset buffer after each pack and return its content as `bytes`. (default: True). + If set this to false, use `bytes()` to get content and `.reset()` to clear buffer. - :param int max_array_len: - Limits max length of array. (default: max_buffer_size) + :param bool use_bin_type: + Use bin type introduced in msgpack spec 2.0 for bytes. + It also enables str8 type for unicode. (default: True) - :param int max_map_len: - Limits max length of map. (default: max_buffer_size//2) + :param bool strict_types: + If set to true, types will be checked to be exact. Derived classes + from serializable types will not be serialized and will be + treated as unsupported type and forwarded to default. + Additionally tuples will not be serialized as lists. + This is useful when trying to implement accurate serialization + for python types. - :param int max_ext_len: - Deprecated, use *max_buffer_size* instead. - Limits max size of ext type. (default: max_buffer_size) + :param bool datetime: + If set to true, datetime with tzinfo is packed into Timestamp type. + Note that the tzinfo is stripped in the timestamp. + You can get UTC datetime with `timestamp=3` option of the Unpacker. + (Python 2 is not supported). :param str unicode_errors: - Error handler used for decoding str type. (default: `'strict'`) - + The error handler for encoding unicode. (default: 'strict') + DO NOT USE THIS!! This option is kept for very specific usage. Example of streaming deserialize from file-like object:: diff --git a/msgpack/fallback.py b/msgpack/fallback.py index 9739d53b..0bfa94ea 100644 --- a/msgpack/fallback.py +++ b/msgpack/fallback.py @@ -744,7 +744,7 @@ class Packer(object): """ MessagePack Packer - Usage: + Usage:: packer = Packer() astream.write(packer.pack(a)) @@ -784,6 +784,29 @@ class Packer(object): :param str unicode_errors: The error handler for encoding unicode. (default: 'strict') DO NOT USE THIS!! This option is kept for very specific usage. + + Example of streaming deserialize from file-like object:: + + unpacker = Unpacker(file_like) + for o in unpacker: + process(o) + + Example of streaming deserialize from socket:: + + unpacker = Unpacker() + while True: + buf = sock.recv(1024**2) + if not buf: + break + unpacker.feed(buf) + for o in unpacker: + process(o) + + Raises ``ExtraData`` when *packed* contains extra bytes. + Raises ``OutOfData`` when *packed* is incomplete. + Raises ``FormatError`` when *packed* is not valid msgpack. + Raises ``StackError`` when *packed* contains too nested. + Other exceptions can be raised during unpacking. """ def __init__( From 2df517999b012737b1cf36f12bac13541a8eb6e1 Mon Sep 17 00:00:00 2001 From: Inada Naoki Date: Fri, 11 Dec 2020 13:39:24 +0900 Subject: [PATCH 248/349] Travis: Reduce build Save credits. --- .travis.yml | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/.travis.yml b/.travis.yml index 5132b4c4..5980d630 100644 --- a/.travis.yml +++ b/.travis.yml @@ -3,20 +3,17 @@ dist: xenial language: python cache: pip arch: - - amd64 - arm64 + python: # Available Python (PyPy) can be listed by: # # $ aws s3 ls s3://travis-python-archives/binaries/ubuntu/16.04/x86_64/ - - "3.4" - - "3.5" - "3.6" - "3.7" - "3.8" - "3.9-dev" - _pure: &pure install: - pip install -U pip From 7d6b4dfb516451735150d3f796f0b35ed5f004a4 Mon Sep 17 00:00:00 2001 From: Tsahi Zidenberg <65945052+tsahee@users.noreply.github.com> Date: Fri, 11 Dec 2020 07:30:49 +0200 Subject: [PATCH 249/349] Build arm64 wheels (#439) --- .github/workflows/linux.yml | 10 ++++++++++ Makefile | 4 ++++ 2 files changed, 14 insertions(+) diff --git a/.github/workflows/linux.yml b/.github/workflows/linux.yml index 89bdb4e4..50a0abe7 100644 --- a/.github/workflows/linux.yml +++ b/.github/workflows/linux.yml @@ -30,6 +30,16 @@ jobs: run: | make linux-wheel + - name: Install qemu-user-static for docker + shell: bash + run: | + docker run --rm --privileged multiarch/qemu-user-static --reset -p yes + + - name: Build arm64 wheels + shell: bash + run: | + make linux-arm64-wheel + - name: Run test (3.8) run: | pip install pytest diff --git a/Makefile b/Makefile index 0110ddfa..05cca554 100644 --- a/Makefile +++ b/Makefile @@ -38,3 +38,7 @@ update-docker: linux-wheel: docker run --rm -v `pwd`:/project -w /project quay.io/pypa/manylinux2010_i686 bash docker/buildwheel.sh docker run --rm -v `pwd`:/project -w /project quay.io/pypa/manylinux2010_x86_64 bash docker/buildwheel.sh + +.PHONY: linux-arm64-wheel +linux-arm64-wheel: + docker run --rm -v `pwd`:/project -w /project quay.io/pypa/manylinux2014_aarch64 bash docker/buildwheel.sh From d893697eab07a2cf2a02f5115d4e7bae99d07e3f Mon Sep 17 00:00:00 2001 From: Inada Naoki Date: Fri, 11 Dec 2020 19:16:14 +0900 Subject: [PATCH 250/349] v1.0.1 --- ChangeLog.rst | 8 ++++++++ msgpack/_version.py | 2 +- setup.py | 1 + 3 files changed, 10 insertions(+), 1 deletion(-) diff --git a/ChangeLog.rst b/ChangeLog.rst index d922e847..bb3a6332 100644 --- a/ChangeLog.rst +++ b/ChangeLog.rst @@ -1,3 +1,11 @@ +1.0.1 +===== + +* Add Python 3.9 and linux/arm64 wheels. (#439) +* Fixed Unpacker.tell() after read_bytes() (#426) +* Fixed unpacking datetime before epoch on Windows (#433) +* Fixed fallback Packer didn't check DateTime.tzinfo (#434) + 1.0.0 ===== diff --git a/msgpack/_version.py b/msgpack/_version.py index 9f55cf50..95e81294 100644 --- a/msgpack/_version.py +++ b/msgpack/_version.py @@ -1 +1 @@ -version = (1, 0, 0) +version = (1, 0, 1) diff --git a/setup.py b/setup.py index 2ec9ca70..8e88750a 100755 --- a/setup.py +++ b/setup.py @@ -132,6 +132,7 @@ def __init__(self, *args, **kwargs): "Programming Language :: Python :: 3.6", "Programming Language :: Python :: 3.7", "Programming Language :: Python :: 3.8", + "Programming Language :: Python :: 3.9", "Programming Language :: Python :: Implementation :: CPython", "Programming Language :: Python :: Implementation :: PyPy", "Intended Audience :: Developers", From edd56036616e5f7211d5cd840a4d00bb43a7f9ee Mon Sep 17 00:00:00 2001 From: Inada Naoki Date: Fri, 11 Dec 2020 19:31:24 +0900 Subject: [PATCH 251/349] Actions: Add Python 3.9 --- .github/workflows/linux.yml | 11 +++++++++++ .github/workflows/mac.yml | 17 +++++++++++++++++ .github/workflows/windows.yaml | 14 ++++++++++++++ 3 files changed, 42 insertions(+) diff --git a/.github/workflows/linux.yml b/.github/workflows/linux.yml index 50a0abe7..f9707b1c 100644 --- a/.github/workflows/linux.yml +++ b/.github/workflows/linux.yml @@ -47,6 +47,17 @@ jobs: pytest -v test + - name: Set up Python 3.9 + uses: actions/setup-python@v1 + with: + python-version: 3.9 + + - name: Run test (3.9) + run: | + pip install pytest + pip install -v msgpack --only-binary :all: --no-index -f dist/wheelhouse + pytest -v test + - name: Set up Python 3.7 uses: actions/setup-python@v1 with: diff --git a/.github/workflows/mac.yml b/.github/workflows/mac.yml index fb2c67f4..78d944cc 100644 --- a/.github/workflows/mac.yml +++ b/.github/workflows/mac.yml @@ -35,6 +35,23 @@ jobs: pytest -v test + - name: Set up Python 3.9 + uses: actions/setup-python@v1 + with: + python-version: "3.9" + + - name: Build wheel + run: | + pip install setuptools wheel + python setup.py bdist_wheel + + - name: Run test + run: | + pip install pytest + pip install -v msgpack --only-binary :all: -f dist/ --no-index + pytest -v test + + - name: Set up Python 3.7 uses: actions/setup-python@v1 with: diff --git a/.github/workflows/windows.yaml b/.github/workflows/windows.yaml index cecb8258..139a5a64 100644 --- a/.github/workflows/windows.yaml +++ b/.github/workflows/windows.yaml @@ -63,6 +63,20 @@ jobs: run: | ci/runtests.sh + - name: Python 3.9 (amd64) + env: + PYTHON: "py -3.9-64" + shell: bash + run: | + ci/runtests.sh + + - name: Python 3.9 (x86) + env: + PYTHON: "py -3.9-32" + shell: bash + run: | + ci/runtests.sh + - name: Upload Wheels uses: actions/upload-artifact@v1 with: From 8029f95516dbfddf2fea61efb06dc08ded84aab7 Mon Sep 17 00:00:00 2001 From: Inada Naoki Date: Fri, 11 Dec 2020 19:33:20 +0900 Subject: [PATCH 252/349] Add Python 3.9 wheels --- docker/shared.env | 1 + 1 file changed, 1 insertion(+) diff --git a/docker/shared.env b/docker/shared.env index 17abdd8f..3601a075 100644 --- a/docker/shared.env +++ b/docker/shared.env @@ -1,4 +1,5 @@ PYTHON_VERSIONS=( + cp39-cp39 cp38-cp38 cp37-cp37m cp36-cp36m From 753b3706d80a7bc5a29147730804e867b97eee57 Mon Sep 17 00:00:00 2001 From: Inada Naoki Date: Fri, 18 Dec 2020 14:21:27 +0900 Subject: [PATCH 253/349] Fix overflow in unpacking timestamp to datetime (#452) --- msgpack/unpack.h | 6 +++--- test/test_timestamp.py | 11 +++++++++++ 2 files changed, 14 insertions(+), 3 deletions(-) diff --git a/msgpack/unpack.h b/msgpack/unpack.h index 868b96e7..34212bcd 100644 --- a/msgpack/unpack.h +++ b/msgpack/unpack.h @@ -342,21 +342,21 @@ static int unpack_callback_ext(unpack_user* u, const char* base, const char* pos py = PyObject_CallFunction(u->timestamp_t, "(Lk)", ts.tv_sec, ts.tv_nsec); } else if (u->timestamp == 3) { // datetime - // Calculate datetime using epoch + delta + // Calculate datetime using epoch + delta // due to limitations PyDateTime_FromTimestamp on Windows with negative timestamps PyObject *epoch = PyDateTimeAPI->DateTime_FromDateAndTime(1970, 1, 1, 0, 0, 0, 0, u->utc, PyDateTimeAPI->DateTimeType); if (epoch == NULL) { return -1; } - PyObject* d = PyDelta_FromDSU(0, ts.tv_sec, ts.tv_nsec / 1000); + PyObject* d = PyDelta_FromDSU(ts.tv_sec/(24*3600), ts.tv_sec%(24*3600), ts.tv_nsec / 1000); if (d == NULL) { Py_DECREF(epoch); return -1; } py = PyNumber_Add(epoch, d); - + Py_DECREF(epoch); Py_DECREF(d); } diff --git a/test/test_timestamp.py b/test/test_timestamp.py index edc488af..6a29be77 100644 --- a/test/test_timestamp.py +++ b/test/test_timestamp.py @@ -129,3 +129,14 @@ def test_pack_datetime(): assert x assert x[0] == dt assert msgpack.unpackb(packed) is None + + +@pytest.mark.skipif(sys.version_info[0] == 2, reason="datetime support is PY3+ only") +def test_issue451(): + # https://github.com/msgpack/msgpack-python/issues/451 + dt = datetime.datetime(2100, 1, 1, 1, 1, tzinfo=_utc) + packed = msgpack.packb(dt, datetime=True) + assert packed == b"\xd6\xff\xf4\x86eL" + + unpacked = msgpack.unpackb(packed, timestamp=3) + assert dt == unpacked From 94336cf914c202718e37f27d664517c9a8c79d50 Mon Sep 17 00:00:00 2001 From: Inada Naoki Date: Fri, 18 Dec 2020 16:03:05 +0900 Subject: [PATCH 254/349] Fix some travis builds. (#453) --- .travis.yml | 3 +++ 1 file changed, 3 insertions(+) diff --git a/.travis.yml b/.travis.yml index 5980d630..4974d26c 100644 --- a/.travis.yml +++ b/.travis.yml @@ -25,6 +25,7 @@ _pure: &pure matrix: include: - name: 32bit build + arch: amd64 language: python services: - docker @@ -58,10 +59,12 @@ matrix: <<: *pure - name: "pypy2.7" + arch: amd64 python: "pypy2.7-7.1.1" <<: *pure - name: "pypy3" + arch: amd64 python: "pypy3.6-7.1.1" <<: *pure From 051f9ded1fe2343ffdf55e7a5ec2da1dab7638b9 Mon Sep 17 00:00:00 2001 From: Inada Naoki Date: Fri, 18 Dec 2020 16:13:35 +0900 Subject: [PATCH 255/349] format markdown --- README.md | 126 +++++++++++++++++++++++++++--------------------------- 1 file changed, 63 insertions(+), 63 deletions(-) diff --git a/README.md b/README.md index ac52d94b..2d5dd889 100644 --- a/README.md +++ b/README.md @@ -64,9 +64,9 @@ See note below for detail. ## Install - - $ pip install msgpack - +``` +$ pip install msgpack +``` ### Pure Python implementation @@ -103,18 +103,18 @@ msgpack provides `dumps` and `loads` as an alias for compatibility with `unpack` and `load` unpacks from a file-like object. ```pycon - >>> import msgpack - >>> msgpack.packb([1, 2, 3], use_bin_type=True) - '\x93\x01\x02\x03' - >>> msgpack.unpackb(_, raw=False) - [1, 2, 3] +>>> import msgpack +>>> msgpack.packb([1, 2, 3], use_bin_type=True) +'\x93\x01\x02\x03' +>>> msgpack.unpackb(_, raw=False) +[1, 2, 3] ``` `unpack` unpacks msgpack's array to Python's list, but can also unpack to tuple: ```pycon - >>> msgpack.unpackb(b'\x93\x01\x02\x03', use_list=False, raw=False) - (1, 2, 3) +>>> msgpack.unpackb(b'\x93\x01\x02\x03', use_list=False, raw=False) +(1, 2, 3) ``` You should always specify the `use_list` keyword argument for backward compatibility. @@ -129,18 +129,18 @@ Read the docstring for other options. stream (or from bytes provided through its `feed` method). ```py - import msgpack - from io import BytesIO +import msgpack +from io import BytesIO - buf = BytesIO() - for i in range(100): - buf.write(msgpack.packb(i, use_bin_type=True)) +buf = BytesIO() +for i in range(100): + buf.write(msgpack.packb(i, use_bin_type=True)) - buf.seek(0) +buf.seek(0) - unpacker = msgpack.Unpacker(buf, raw=False) - for unpacked in unpacker: - print(unpacked) +unpacker = msgpack.Unpacker(buf, raw=False) +for unpacked in unpacker: + print(unpacked) ``` @@ -150,27 +150,27 @@ It is also possible to pack/unpack custom data types. Here is an example for `datetime.datetime`. ```py - import datetime - import msgpack +import datetime +import msgpack - useful_dict = { - "id": 1, - "created": datetime.datetime.now(), - } +useful_dict = { + "id": 1, + "created": datetime.datetime.now(), +} - def decode_datetime(obj): - if '__datetime__' in obj: - obj = datetime.datetime.strptime(obj["as_str"], "%Y%m%dT%H:%M:%S.%f") - return obj +def decode_datetime(obj): + if '__datetime__' in obj: + obj = datetime.datetime.strptime(obj["as_str"], "%Y%m%dT%H:%M:%S.%f") + return obj - def encode_datetime(obj): - if isinstance(obj, datetime.datetime): - return {'__datetime__': True, 'as_str': obj.strftime("%Y%m%dT%H:%M:%S.%f")} - return obj +def encode_datetime(obj): + if isinstance(obj, datetime.datetime): + return {'__datetime__': True, 'as_str': obj.strftime("%Y%m%dT%H:%M:%S.%f")} + return obj - packed_dict = msgpack.packb(useful_dict, default=encode_datetime, use_bin_type=True) - this_dict_again = msgpack.unpackb(packed_dict, object_hook=decode_datetime, raw=False) +packed_dict = msgpack.packb(useful_dict, default=encode_datetime, use_bin_type=True) +this_dict_again = msgpack.unpackb(packed_dict, object_hook=decode_datetime, raw=False) ``` `Unpacker`'s `object_hook` callback receives a dict; the @@ -183,25 +183,25 @@ key-value pairs. It is also possible to pack/unpack custom data types using the **ext** type. ```pycon - >>> import msgpack - >>> import array - >>> def default(obj): - ... if isinstance(obj, array.array) and obj.typecode == 'd': - ... return msgpack.ExtType(42, obj.tostring()) - ... raise TypeError("Unknown type: %r" % (obj,)) - ... - >>> def ext_hook(code, data): - ... if code == 42: - ... a = array.array('d') - ... a.fromstring(data) - ... return a - ... return ExtType(code, data) - ... - >>> data = array.array('d', [1.2, 3.4]) - >>> packed = msgpack.packb(data, default=default, use_bin_type=True) - >>> unpacked = msgpack.unpackb(packed, ext_hook=ext_hook, raw=False) - >>> data == unpacked - True +>>> import msgpack +>>> import array +>>> def default(obj): +... if isinstance(obj, array.array) and obj.typecode == 'd': +... return msgpack.ExtType(42, obj.tostring()) +... raise TypeError("Unknown type: %r" % (obj,)) +... +>>> def ext_hook(code, data): +... if code == 42: +... a = array.array('d') +... a.fromstring(data) +... return a +... return ExtType(code, data) +... +>>> data = array.array('d', [1.2, 3.4]) +>>> packed = msgpack.packb(data, default=default, use_bin_type=True) +>>> unpacked = msgpack.unpackb(packed, ext_hook=ext_hook, raw=False) +>>> data == unpacked +True ``` @@ -226,11 +226,11 @@ You can pack into and unpack from this old spec using `use_bin_type=False` and `raw=True` options. ```pycon - >>> import msgpack - >>> msgpack.unpackb(msgpack.packb([b'spam', u'eggs'], use_bin_type=False), raw=True) - [b'spam', b'eggs'] - >>> msgpack.unpackb(msgpack.packb([b'spam', u'eggs'], use_bin_type=True), raw=False) - [b'spam', 'eggs'] +>>> import msgpack +>>> msgpack.unpackb(msgpack.packb([b'spam', u'eggs'], use_bin_type=False), raw=True) +[b'spam', b'eggs'] +>>> msgpack.unpackb(msgpack.packb([b'spam', u'eggs'], use_bin_type=True), raw=False) +[b'spam', 'eggs'] ``` ### ext type @@ -238,10 +238,10 @@ and `raw=True` options. To use the **ext** type, pass `msgpack.ExtType` object to packer. ```pycon - >>> import msgpack - >>> packed = msgpack.packb(msgpack.ExtType(42, b'xyzzy')) - >>> msgpack.unpackb(packed) - ExtType(code=42, data='xyzzy') +>>> import msgpack +>>> packed = msgpack.packb(msgpack.ExtType(42, b'xyzzy')) +>>> msgpack.unpackb(packed) +ExtType(code=42, data='xyzzy') ``` You can use it with `default` and `ext_hook`. See below. From f34fca7fb55e2c8adde39c34ac48542649a24d11 Mon Sep 17 00:00:00 2001 From: Inada Naoki Date: Fri, 18 Dec 2020 16:21:41 +0900 Subject: [PATCH 256/349] Update readme --- README.md | 11 +++-------- 1 file changed, 3 insertions(+), 8 deletions(-) diff --git a/README.md b/README.md index 2d5dd889..d8ce9baa 100644 --- a/README.md +++ b/README.md @@ -15,15 +15,10 @@ This package provides CPython bindings for reading and writing MessagePack data. ### PyPI package name -TL;DR: When upgrading from msgpack-0.4 or earlier, don't do `pip install -U msgpack-python`. -Do `pip uninstall msgpack-python; pip install -U msgpack` instead. +Package name on PyPI was changed from `msgpack-python` to `msgpack` from 0.5. -Package name on PyPI was changed to msgpack from 0.5. -I upload transitional package (msgpack-python 0.5 which depending on msgpack) -for smooth transition from msgpack-python to msgpack. - -Sadly, this doesn't work for upgrade install. After `pip install -U msgpack-python`, -msgpack is removed, and `import msgpack` fail. +When upgrading from msgpack-0.4 or earlier, do `pip uninstall msgpack-python` before +`pip install -U msgpack`. ### Compatibility with the old format From c0516c603f0eb6555117e312f5cdfb383853bc8e Mon Sep 17 00:00:00 2001 From: Inada Naoki Date: Fri, 18 Dec 2020 16:43:04 +0900 Subject: [PATCH 257/349] v1.0.2 --- ChangeLog.rst | 5 +++++ msgpack/_version.py | 2 +- 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/ChangeLog.rst b/ChangeLog.rst index bb3a6332..230cc301 100644 --- a/ChangeLog.rst +++ b/ChangeLog.rst @@ -1,3 +1,8 @@ +1.0.2 +===== + +* Fix year 2038 problem regression in 1.0.1. (#451) + 1.0.1 ===== diff --git a/msgpack/_version.py b/msgpack/_version.py index 95e81294..1c83c8ed 100644 --- a/msgpack/_version.py +++ b/msgpack/_version.py @@ -1 +1 @@ -version = (1, 0, 1) +version = (1, 0, 2) From 431ef45c8ebff54c2b182dda46e7edb78a4e271b Mon Sep 17 00:00:00 2001 From: Inada Naoki Date: Fri, 18 Dec 2020 17:43:37 +0900 Subject: [PATCH 258/349] Use manylinux1 instead of manylinux2010 --- Makefile | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/Makefile b/Makefile index 05cca554..6f29aede 100644 --- a/Makefile +++ b/Makefile @@ -31,13 +31,13 @@ clean: .PHONY: update-docker update-docker: - docker pull quay.io/pypa/manylinux2010_i686 - docker pull quay.io/pypa/manylinux2010_x86_64 + docker pull quay.io/pypa/manylinux1_i686 + docker pull quay.io/pypa/manylinux1_x86_64 .PHONY: linux-wheel linux-wheel: - docker run --rm -v `pwd`:/project -w /project quay.io/pypa/manylinux2010_i686 bash docker/buildwheel.sh - docker run --rm -v `pwd`:/project -w /project quay.io/pypa/manylinux2010_x86_64 bash docker/buildwheel.sh + docker run --rm -v `pwd`:/project -w /project quay.io/pypa/manylinux1_i686 bash docker/buildwheel.sh + docker run --rm -v `pwd`:/project -w /project quay.io/pypa/manylinux1_x86_64 bash docker/buildwheel.sh .PHONY: linux-arm64-wheel linux-arm64-wheel: From 3b71818bb0e4cbafdb58895fa47704563448b6ac Mon Sep 17 00:00:00 2001 From: Guy Tuval Date: Sat, 2 Jan 2021 08:39:37 +0200 Subject: [PATCH 259/349] Refactor fallback read header (#441) --- msgpack/fallback.py | 247 +++++++++++++++----------------------------- 1 file changed, 82 insertions(+), 165 deletions(-) diff --git a/msgpack/fallback.py b/msgpack/fallback.py index 0bfa94ea..7dbc67ad 100644 --- a/msgpack/fallback.py +++ b/msgpack/fallback.py @@ -1,5 +1,4 @@ """Fallback pure Python implementation of msgpack""" - from datetime import datetime as _DateTime import sys import struct @@ -148,6 +147,38 @@ def _unpack_from(f, b, o=0): else: _unpack_from = struct.unpack_from +_NO_FORMAT_USED = "" +_MSGPACK_HEADERS = { + 0xC4: (1, _NO_FORMAT_USED, TYPE_BIN), + 0xC5: (2, ">H", TYPE_BIN), + 0xC6: (4, ">I", TYPE_BIN), + 0xC7: (2, "Bb", TYPE_EXT), + 0xC8: (3, ">Hb", TYPE_EXT), + 0xC9: (5, ">Ib", TYPE_EXT), + 0xCA: (4, ">f"), + 0xCB: (8, ">d"), + 0xCC: (1, _NO_FORMAT_USED), + 0xCD: (2, ">H"), + 0xCE: (4, ">I"), + 0xCF: (8, ">Q"), + 0xD0: (1, "b"), + 0xD1: (2, ">h"), + 0xD2: (4, ">i"), + 0xD3: (8, ">q"), + 0xD4: (1, "b1s", TYPE_EXT), + 0xD5: (2, "b2s", TYPE_EXT), + 0xD6: (4, "b4s", TYPE_EXT), + 0xD7: (8, "b8s", TYPE_EXT), + 0xD8: (16, "b16s", TYPE_EXT), + 0xD9: (1, _NO_FORMAT_USED, TYPE_RAW), + 0xDA: (2, ">H", TYPE_RAW), + 0xDB: (4, ">I", TYPE_RAW), + 0xDC: (2, ">H", TYPE_ARRAY), + 0xDD: (4, ">I", TYPE_ARRAY), + 0xDE: (2, ">H", TYPE_MAP), + 0xDF: (4, ">I", TYPE_MAP), +} + class Unpacker(object): """Streaming unpacker. @@ -409,7 +440,7 @@ def _reserve(self, n, raise_outofdata=True): self._buff_i = 0 # rollback raise OutOfData - def _read_header(self, execute=EX_CONSTRUCT): + def _read_header(self): typ = TYPE_IMMEDIATE n = 0 obj = None @@ -442,187 +473,73 @@ def _read_header(self, execute=EX_CONSTRUCT): obj = False elif b == 0xC3: obj = True - elif b == 0xC4: - typ = TYPE_BIN - self._reserve(1) - n = self._buffer[self._buff_i] - self._buff_i += 1 - if n > self._max_bin_len: - raise ValueError("%s exceeds max_bin_len(%s)" % (n, self._max_bin_len)) - obj = self._read(n) - elif b == 0xC5: - typ = TYPE_BIN - self._reserve(2) - n = _unpack_from(">H", self._buffer, self._buff_i)[0] - self._buff_i += 2 - if n > self._max_bin_len: - raise ValueError("%s exceeds max_bin_len(%s)" % (n, self._max_bin_len)) - obj = self._read(n) - elif b == 0xC6: - typ = TYPE_BIN - self._reserve(4) - n = _unpack_from(">I", self._buffer, self._buff_i)[0] - self._buff_i += 4 + elif 0xC4 <= b <= 0xC6: + size, fmt, typ = _MSGPACK_HEADERS[b] + self._reserve(size) + if len(fmt) > 0: + n = _unpack_from(fmt, self._buffer, self._buff_i)[0] + else: + n = self._buffer[self._buff_i] + self._buff_i += size if n > self._max_bin_len: raise ValueError("%s exceeds max_bin_len(%s)" % (n, self._max_bin_len)) obj = self._read(n) - elif b == 0xC7: # ext 8 - typ = TYPE_EXT - self._reserve(2) - L, n = _unpack_from("Bb", self._buffer, self._buff_i) - self._buff_i += 2 + elif 0xC7 <= b <= 0xC9: + size, fmt, typ = _MSGPACK_HEADERS[b] + self._reserve(size) + L, n = _unpack_from(fmt, self._buffer, self._buff_i) + self._buff_i += size if L > self._max_ext_len: raise ValueError("%s exceeds max_ext_len(%s)" % (L, self._max_ext_len)) obj = self._read(L) - elif b == 0xC8: # ext 16 - typ = TYPE_EXT - self._reserve(3) - L, n = _unpack_from(">Hb", self._buffer, self._buff_i) - self._buff_i += 3 - if L > self._max_ext_len: - raise ValueError("%s exceeds max_ext_len(%s)" % (L, self._max_ext_len)) - obj = self._read(L) - elif b == 0xC9: # ext 32 - typ = TYPE_EXT - self._reserve(5) - L, n = _unpack_from(">Ib", self._buffer, self._buff_i) - self._buff_i += 5 - if L > self._max_ext_len: - raise ValueError("%s exceeds max_ext_len(%s)" % (L, self._max_ext_len)) - obj = self._read(L) - elif b == 0xCA: - self._reserve(4) - obj = _unpack_from(">f", self._buffer, self._buff_i)[0] - self._buff_i += 4 - elif b == 0xCB: - self._reserve(8) - obj = _unpack_from(">d", self._buffer, self._buff_i)[0] - self._buff_i += 8 - elif b == 0xCC: - self._reserve(1) - obj = self._buffer[self._buff_i] - self._buff_i += 1 - elif b == 0xCD: - self._reserve(2) - obj = _unpack_from(">H", self._buffer, self._buff_i)[0] - self._buff_i += 2 - elif b == 0xCE: - self._reserve(4) - obj = _unpack_from(">I", self._buffer, self._buff_i)[0] - self._buff_i += 4 - elif b == 0xCF: - self._reserve(8) - obj = _unpack_from(">Q", self._buffer, self._buff_i)[0] - self._buff_i += 8 - elif b == 0xD0: - self._reserve(1) - obj = _unpack_from("b", self._buffer, self._buff_i)[0] - self._buff_i += 1 - elif b == 0xD1: - self._reserve(2) - obj = _unpack_from(">h", self._buffer, self._buff_i)[0] - self._buff_i += 2 - elif b == 0xD2: - self._reserve(4) - obj = _unpack_from(">i", self._buffer, self._buff_i)[0] - self._buff_i += 4 - elif b == 0xD3: - self._reserve(8) - obj = _unpack_from(">q", self._buffer, self._buff_i)[0] - self._buff_i += 8 - elif b == 0xD4: # fixext 1 - typ = TYPE_EXT - if self._max_ext_len < 1: - raise ValueError("%s exceeds max_ext_len(%s)" % (1, self._max_ext_len)) - self._reserve(2) - n, obj = _unpack_from("b1s", self._buffer, self._buff_i) - self._buff_i += 2 - elif b == 0xD5: # fixext 2 - typ = TYPE_EXT - if self._max_ext_len < 2: - raise ValueError("%s exceeds max_ext_len(%s)" % (2, self._max_ext_len)) - self._reserve(3) - n, obj = _unpack_from("b2s", self._buffer, self._buff_i) - self._buff_i += 3 - elif b == 0xD6: # fixext 4 - typ = TYPE_EXT - if self._max_ext_len < 4: - raise ValueError("%s exceeds max_ext_len(%s)" % (4, self._max_ext_len)) - self._reserve(5) - n, obj = _unpack_from("b4s", self._buffer, self._buff_i) - self._buff_i += 5 - elif b == 0xD7: # fixext 8 - typ = TYPE_EXT - if self._max_ext_len < 8: - raise ValueError("%s exceeds max_ext_len(%s)" % (8, self._max_ext_len)) - self._reserve(9) - n, obj = _unpack_from("b8s", self._buffer, self._buff_i) - self._buff_i += 9 - elif b == 0xD8: # fixext 16 - typ = TYPE_EXT - if self._max_ext_len < 16: - raise ValueError("%s exceeds max_ext_len(%s)" % (16, self._max_ext_len)) - self._reserve(17) - n, obj = _unpack_from("b16s", self._buffer, self._buff_i) - self._buff_i += 17 - elif b == 0xD9: - typ = TYPE_RAW - self._reserve(1) - n = self._buffer[self._buff_i] - self._buff_i += 1 - if n > self._max_str_len: - raise ValueError("%s exceeds max_str_len(%s)", n, self._max_str_len) - obj = self._read(n) - elif b == 0xDA: - typ = TYPE_RAW - self._reserve(2) - (n,) = _unpack_from(">H", self._buffer, self._buff_i) - self._buff_i += 2 - if n > self._max_str_len: - raise ValueError("%s exceeds max_str_len(%s)", n, self._max_str_len) - obj = self._read(n) - elif b == 0xDB: - typ = TYPE_RAW - self._reserve(4) - (n,) = _unpack_from(">I", self._buffer, self._buff_i) - self._buff_i += 4 + elif 0xCA <= b <= 0xD3: + size, fmt = _MSGPACK_HEADERS[b] + self._reserve(size) + if len(fmt) > 0: + obj = _unpack_from(fmt, self._buffer, self._buff_i)[0] + else: + obj = self._buffer[self._buff_i] + self._buff_i += size + elif 0xD4 <= b <= 0xD8: + size, fmt, typ = _MSGPACK_HEADERS[b] + if self._max_ext_len < size: + raise ValueError( + "%s exceeds max_ext_len(%s)" % (size, self._max_ext_len) + ) + self._reserve(size + 1) + n, obj = _unpack_from(fmt, self._buffer, self._buff_i) + self._buff_i += size + 1 + elif 0xD9 <= b <= 0xDB: + size, fmt, typ = _MSGPACK_HEADERS[b] + self._reserve(size) + if len(fmt) > 0: + (n,) = _unpack_from(fmt, self._buffer, self._buff_i) + else: + n = self._buffer[self._buff_i] + self._buff_i += size if n > self._max_str_len: raise ValueError("%s exceeds max_str_len(%s)", n, self._max_str_len) obj = self._read(n) - elif b == 0xDC: - typ = TYPE_ARRAY - self._reserve(2) - (n,) = _unpack_from(">H", self._buffer, self._buff_i) - self._buff_i += 2 - if n > self._max_array_len: - raise ValueError("%s exceeds max_array_len(%s)", n, self._max_array_len) - elif b == 0xDD: - typ = TYPE_ARRAY - self._reserve(4) - (n,) = _unpack_from(">I", self._buffer, self._buff_i) - self._buff_i += 4 + elif 0xDC <= b <= 0xDD: + size, fmt, typ = _MSGPACK_HEADERS[b] + self._reserve(size) + (n,) = _unpack_from(fmt, self._buffer, self._buff_i) + self._buff_i += size if n > self._max_array_len: raise ValueError("%s exceeds max_array_len(%s)", n, self._max_array_len) - elif b == 0xDE: - self._reserve(2) - (n,) = _unpack_from(">H", self._buffer, self._buff_i) - self._buff_i += 2 + elif 0xDE <= b <= 0xDF: + size, fmt, typ = _MSGPACK_HEADERS[b] + self._reserve(size) + (n,) = _unpack_from(fmt, self._buffer, self._buff_i) + self._buff_i += size if n > self._max_map_len: raise ValueError("%s exceeds max_map_len(%s)", n, self._max_map_len) - typ = TYPE_MAP - elif b == 0xDF: - self._reserve(4) - (n,) = _unpack_from(">I", self._buffer, self._buff_i) - self._buff_i += 4 - if n > self._max_map_len: - raise ValueError("%s exceeds max_map_len(%s)", n, self._max_map_len) - typ = TYPE_MAP else: raise FormatError("Unknown header: 0x%x" % b) return typ, n, obj def _unpack(self, execute=EX_CONSTRUCT): - typ, n, obj = self._read_header(execute) + typ, n, obj = self._read_header() if execute == EX_READ_ARRAY_HEADER: if typ != TYPE_ARRAY: From 02e1f7623cd8d0fcd4763d542fc60e2957ee2046 Mon Sep 17 00:00:00 2001 From: Alex Willmer Date: Wed, 27 Jan 2021 01:11:32 +0000 Subject: [PATCH 260/349] build: Create tox environments using a known Cython version (#408) This change causes Tox to run the project's setup.py in a virtualenv (default path is .tox/.package). The required version of Cython is installed, rather than whatever version is installed system wide. --- pyproject.toml | 8 ++++++++ requirements.txt | 1 + tox.ini | 1 + 3 files changed, 10 insertions(+) create mode 100644 pyproject.toml diff --git a/pyproject.toml b/pyproject.toml new file mode 100644 index 00000000..a9eb8aa9 --- /dev/null +++ b/pyproject.toml @@ -0,0 +1,8 @@ +[build-system] +requires = [ + # Also declared in requirements.txt, if updating here please also update + # there + "Cython~=0.29.13", + "setuptools >= 35.0.2", +] +build-backend = "setuptools.build_meta" diff --git a/requirements.txt b/requirements.txt index a2cce258..180fe851 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1 +1,2 @@ +# Also declared in pyproject.toml, if updating here please also update there Cython~=0.29.13 diff --git a/tox.ini b/tox.ini index 607b182e..ace1ba90 100644 --- a/tox.ini +++ b/tox.ini @@ -5,6 +5,7 @@ envlist = {pypy,pypy3}-pure, py27-x86, py34-x86, +isolated_build = true [variants:pure] setenv= From cfae52437b0d146f74a14d69cb706712c1fa2c95 Mon Sep 17 00:00:00 2001 From: laike9m Date: Wed, 27 Jan 2021 15:33:14 -0800 Subject: [PATCH 261/349] Updated readme about Python 2 support (#456) --- README.md | 2 -- 1 file changed, 2 deletions(-) diff --git a/README.md b/README.md index d8ce9baa..cb816485 100644 --- a/README.md +++ b/README.md @@ -71,8 +71,6 @@ Python 2 and PyPy. But msgpack provides a pure Python implementation (`msgpack.fallback`) for PyPy and Python 2. -Since the [pip](https://pip.pypa.io/) uses the pure Python implementation, -Python 2 support will not be dropped in the foreseeable future. ### Windows From 1e728a2e0b7f263a4c77d6cdb6ec7c7e2a91872f Mon Sep 17 00:00:00 2001 From: Inada Naoki Date: Fri, 12 Feb 2021 16:20:14 +0900 Subject: [PATCH 262/349] fix docstring (#459) --- msgpack/_unpacker.pyx | 91 ++++++++++++++++++++++++++++--------------- msgpack/fallback.py | 2 +- 2 files changed, 60 insertions(+), 33 deletions(-) diff --git a/msgpack/_unpacker.pyx b/msgpack/_unpacker.pyx index e4f3f1e4..7bfc3af5 100644 --- a/msgpack/_unpacker.pyx +++ b/msgpack/_unpacker.pyx @@ -212,49 +212,76 @@ def unpackb(object packed, *, object object_hook=None, object list_hook=None, cdef class Unpacker(object): - """ - MessagePack Packer + """Streaming unpacker. + + Arguments: - Usage:: + :param file_like: + File-like object having `.read(n)` method. + If specified, unpacker reads serialized data from it and :meth:`feed()` is not usable. - packer = Packer() - astream.write(packer.pack(a)) - astream.write(packer.pack(b)) + :param int read_size: + Used as `file_like.read(read_size)`. (default: `min(16*1024, max_buffer_size)`) - Packer's constructor has some keyword arguments: + :param bool use_list: + If true, unpack msgpack array to Python list. + Otherwise, unpack to Python tuple. (default: True) - :param callable default: - Convert user type to builtin type that Packer supports. - See also simplejson's document. + :param bool raw: + If true, unpack msgpack raw to Python bytes. + Otherwise, unpack to Python str by decoding with UTF-8 encoding (default). - :param bool use_single_float: - Use single precision float type for float. (default: False) + :param int timestamp: + Control how timestamp type is unpacked: - :param bool autoreset: - Reset buffer after each pack and return its content as `bytes`. (default: True). - If set this to false, use `bytes()` to get content and `.reset()` to clear buffer. + 0 - Timestamp + 1 - float (Seconds from the EPOCH) + 2 - int (Nanoseconds from the EPOCH) + 3 - datetime.datetime (UTC). Python 2 is not supported. - :param bool use_bin_type: - Use bin type introduced in msgpack spec 2.0 for bytes. - It also enables str8 type for unicode. (default: True) + :param bool strict_map_key: + If true (default), only str or bytes are accepted for map (dict) keys. - :param bool strict_types: - If set to true, types will be checked to be exact. Derived classes - from serializable types will not be serialized and will be - treated as unsupported type and forwarded to default. - Additionally tuples will not be serialized as lists. - This is useful when trying to implement accurate serialization - for python types. + :param callable object_hook: + When specified, it should be callable. + Unpacker calls it with a dict argument after unpacking msgpack map. + (See also simplejson) - :param bool datetime: - If set to true, datetime with tzinfo is packed into Timestamp type. - Note that the tzinfo is stripped in the timestamp. - You can get UTC datetime with `timestamp=3` option of the Unpacker. - (Python 2 is not supported). + :param callable object_pairs_hook: + When specified, it should be callable. + Unpacker calls it with a list of key-value pairs after unpacking msgpack map. + (See also simplejson) :param str unicode_errors: - The error handler for encoding unicode. (default: 'strict') - DO NOT USE THIS!! This option is kept for very specific usage. + The error handler for decoding unicode. (default: 'strict') + This option should be used only when you have msgpack data which + contains invalid UTF-8 string. + + :param int max_buffer_size: + Limits size of data waiting unpacked. 0 means 2**32-1. + The default value is 100*1024*1024 (100MiB). + Raises `BufferFull` exception when it is insufficient. + You should set this parameter when unpacking data from untrusted source. + + :param int max_str_len: + Deprecated, use *max_buffer_size* instead. + Limits max length of str. (default: max_buffer_size) + + :param int max_bin_len: + Deprecated, use *max_buffer_size* instead. + Limits max length of bin. (default: max_buffer_size) + + :param int max_array_len: + Limits max length of array. + (default: max_buffer_size) + + :param int max_map_len: + Limits max length of map. + (default: max_buffer_size//2) + + :param int max_ext_len: + Deprecated, use *max_buffer_size* instead. + Limits max size of ext type. (default: max_buffer_size) Example of streaming deserialize from file-like object:: diff --git a/msgpack/fallback.py b/msgpack/fallback.py index 7dbc67ad..4bcc05d5 100644 --- a/msgpack/fallback.py +++ b/msgpack/fallback.py @@ -260,7 +260,7 @@ class Unpacker(object): Example of streaming deserialize from socket:: - unpacker = Unpacker(max_buffer_size) + unpacker = Unpacker() while True: buf = sock.recv(1024**2) if not buf: From 4b0819dca941ba3fd27ea127e835698c33705365 Mon Sep 17 00:00:00 2001 From: Andrey Bienkowski Date: Tue, 16 Feb 2021 13:38:06 +0000 Subject: [PATCH 263/349] Remove redundant code (#460) --- test/test_unpack.py | 7 ------- 1 file changed, 7 deletions(-) diff --git a/test/test_unpack.py b/test/test_unpack.py index 057b7bf4..aa4c01f8 100644 --- a/test/test_unpack.py +++ b/test/test_unpack.py @@ -90,10 +90,3 @@ def test_unpacker_tell_read_bytes(): assert obj == unp assert pos == unpacker.tell() assert unpacker.read_bytes(n) == raw - - -if __name__ == "__main__": - test_unpack_array_header_from_file() - test_unpacker_hook_refcnt() - test_unpacker_ext_hook() - test_unpacker_tell() From 38357b928a2452d0889d80f0a2a721fa66f94c9a Mon Sep 17 00:00:00 2001 From: Andrey Bienkowski Date: Fri, 26 Feb 2021 02:39:36 +0000 Subject: [PATCH 264/349] Fix error formatting (#463) --- msgpack/fallback.py | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) diff --git a/msgpack/fallback.py b/msgpack/fallback.py index 4bcc05d5..898fe146 100644 --- a/msgpack/fallback.py +++ b/msgpack/fallback.py @@ -455,18 +455,20 @@ def _read_header(self): n = b & 0b00011111 typ = TYPE_RAW if n > self._max_str_len: - raise ValueError("%s exceeds max_str_len(%s)", n, self._max_str_len) + raise ValueError("%s exceeds max_str_len(%s)" % (n, self._max_str_len)) obj = self._read(n) elif b & 0b11110000 == 0b10010000: n = b & 0b00001111 typ = TYPE_ARRAY if n > self._max_array_len: - raise ValueError("%s exceeds max_array_len(%s)", n, self._max_array_len) + raise ValueError( + "%s exceeds max_array_len(%s)" % (n, self._max_array_len) + ) elif b & 0b11110000 == 0b10000000: n = b & 0b00001111 typ = TYPE_MAP if n > self._max_map_len: - raise ValueError("%s exceeds max_map_len(%s)", n, self._max_map_len) + raise ValueError("%s exceeds max_map_len(%s)" % (n, self._max_map_len)) elif b == 0xC0: obj = None elif b == 0xC2: @@ -518,7 +520,7 @@ def _read_header(self): n = self._buffer[self._buff_i] self._buff_i += size if n > self._max_str_len: - raise ValueError("%s exceeds max_str_len(%s)", n, self._max_str_len) + raise ValueError("%s exceeds max_str_len(%s)" % (n, self._max_str_len)) obj = self._read(n) elif 0xDC <= b <= 0xDD: size, fmt, typ = _MSGPACK_HEADERS[b] @@ -526,14 +528,16 @@ def _read_header(self): (n,) = _unpack_from(fmt, self._buffer, self._buff_i) self._buff_i += size if n > self._max_array_len: - raise ValueError("%s exceeds max_array_len(%s)", n, self._max_array_len) + raise ValueError( + "%s exceeds max_array_len(%s)" % (n, self._max_array_len) + ) elif 0xDE <= b <= 0xDF: size, fmt, typ = _MSGPACK_HEADERS[b] self._reserve(size) (n,) = _unpack_from(fmt, self._buffer, self._buff_i) self._buff_i += size if n > self._max_map_len: - raise ValueError("%s exceeds max_map_len(%s)", n, self._max_map_len) + raise ValueError("%s exceeds max_map_len(%s)" % (n, self._max_map_len)) else: raise FormatError("Unknown header: 0x%x" % b) return typ, n, obj From 4ace82f1087ffa1ca3c44a27c7dd3338739efd0a Mon Sep 17 00:00:00 2001 From: Andrey Bienkowski Date: Fri, 26 Feb 2021 12:08:06 +0000 Subject: [PATCH 265/349] Fix tox.ini (#465) There is no such thing as [variants] in the tox syntax. This resulted in MSGPACK_PUREPYTHON being unset in the "pure" test environments --- tox.ini | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/tox.ini b/tox.ini index ace1ba90..29c256d1 100644 --- a/tox.ini +++ b/tox.ini @@ -7,10 +7,6 @@ envlist = py34-x86, isolated_build = true -[variants:pure] -setenv= - MSGPACK_PUREPYTHON=x - [testenv] deps= pytest @@ -20,6 +16,8 @@ commands= c,x86: python -c 'from msgpack import _cmsgpack' c,x86: py.test pure: py.test +setenv= + pure: MSGPACK_PUREPYTHON=x [testenv:py27-x86] basepython=python2.7-x86 From 44fd5777050c6583791609d3f77e05427bf878a3 Mon Sep 17 00:00:00 2001 From: Alexander Shadchin Date: Sat, 27 Feb 2021 03:30:46 +0300 Subject: [PATCH 266/349] Remove unused PyObject_AsReadBuffer definition (#468) Also "old" buffer API was removed in Python 3.10 --- msgpack/_unpacker.pyx | 1 - 1 file changed, 1 deletion(-) diff --git a/msgpack/_unpacker.pyx b/msgpack/_unpacker.pyx index 7bfc3af5..9ecfdfbb 100644 --- a/msgpack/_unpacker.pyx +++ b/msgpack/_unpacker.pyx @@ -3,7 +3,6 @@ from cpython cimport * cdef extern from "Python.h": ctypedef struct PyObject - cdef int PyObject_AsReadBuffer(object o, const void** buff, Py_ssize_t* buf_len) except -1 object PyMemoryView_GetContiguous(object obj, int buffertype, char order) from libc.stdlib cimport * From 010de11bedde8d61ced5a382b44a8344d571c305 Mon Sep 17 00:00:00 2001 From: Andrey Bienkowski Date: Sat, 27 Feb 2021 01:50:24 +0000 Subject: [PATCH 267/349] Make pure-python wheels and eggs possible (#467) --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index 8e88750a..751abff5 100755 --- a/setup.py +++ b/setup.py @@ -88,7 +88,7 @@ def __init__(self, *args, **kwargs): macros = [("__LITTLE_ENDIAN__", "1")] ext_modules = [] -if not PYPY and not PY2: +if not PYPY and not PY2 and not os.environ.get("MSGPACK_PUREPYTHON"): ext_modules.append( Extension( "msgpack._cmsgpack", From 38dba9634e4efa7886a777b9e7c739dc148da457 Mon Sep 17 00:00:00 2001 From: Vladimir Matveev Date: Thu, 18 Mar 2021 14:35:54 -0700 Subject: [PATCH 268/349] cimport uint64_t instead of using ctypedef (#473) --- msgpack/_unpacker.pyx | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/msgpack/_unpacker.pyx b/msgpack/_unpacker.pyx index 9ecfdfbb..27facc0a 100644 --- a/msgpack/_unpacker.pyx +++ b/msgpack/_unpacker.pyx @@ -8,7 +8,7 @@ cdef extern from "Python.h": from libc.stdlib cimport * from libc.string cimport * from libc.limits cimport * -ctypedef unsigned long long uint64_t +from libc.stdint cimport uint64_t from .exceptions import ( BufferFull, From 09187421eb0ac7061de83c9c112d738aa1edd2cf Mon Sep 17 00:00:00 2001 From: Paul Melis Date: Tue, 16 Nov 2021 06:47:40 +0100 Subject: [PATCH 269/349] Improve exception message relating to strict_map_key (#485) --- msgpack/unpack.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/msgpack/unpack.h b/msgpack/unpack.h index 34212bcd..23aa6220 100644 --- a/msgpack/unpack.h +++ b/msgpack/unpack.h @@ -193,7 +193,7 @@ static inline int unpack_callback_map(unpack_user* u, unsigned int n, msgpack_un static inline int unpack_callback_map_item(unpack_user* u, unsigned int current, msgpack_unpack_object* c, msgpack_unpack_object k, msgpack_unpack_object v) { if (u->strict_map_key && !PyUnicode_CheckExact(k) && !PyBytes_CheckExact(k)) { - PyErr_Format(PyExc_ValueError, "%.100s is not allowed for map key", Py_TYPE(k)->tp_name); + PyErr_Format(PyExc_ValueError, "%.100s is not allowed for map key when strict_map_key=True", Py_TYPE(k)->tp_name); return -1; } if (PyUnicode_CheckExact(k)) { From 9b84e490e7c78ac9bbd76dcf9ce71c1d0c978d81 Mon Sep 17 00:00:00 2001 From: Inada Naoki Date: Tue, 16 Nov 2021 14:53:08 +0900 Subject: [PATCH 270/349] Fix black formatting --- msgpack/fallback.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/msgpack/fallback.py b/msgpack/fallback.py index 898fe146..45408750 100644 --- a/msgpack/fallback.py +++ b/msgpack/fallback.py @@ -385,7 +385,7 @@ def feed(self, next_bytes): self._buffer.extend(view) def _consume(self): - """ Gets rid of the used parts of the buffer. """ + """Gets rid of the used parts of the buffer.""" self._stream_offset += self._buff_i - self._buf_checkpoint self._buf_checkpoint = self._buff_i From b3f72541925cda3b018942db45a59936184727e3 Mon Sep 17 00:00:00 2001 From: Inada Naoki Date: Tue, 16 Nov 2021 16:19:47 +0900 Subject: [PATCH 271/349] Support Python 3.10 and Drop Python 3.5 (#487) * linux: Use manylinux2014 * mac: Drop Python 3.6 too --- .github/workflows/linux.yml | 11 +++++++++++ .github/workflows/mac.yml | 12 ++++++------ .github/workflows/windows.yaml | 14 ++++++++++++++ Makefile | 4 ++-- docker/buildwheel.sh | 4 +++- docker/shared.env | 2 +- setup.py | 4 +--- 7 files changed, 38 insertions(+), 13 deletions(-) diff --git a/.github/workflows/linux.yml b/.github/workflows/linux.yml index f9707b1c..811bc13d 100644 --- a/.github/workflows/linux.yml +++ b/.github/workflows/linux.yml @@ -47,6 +47,17 @@ jobs: pytest -v test + - name: Set up Python 3.10 + uses: actions/setup-python@v1 + with: + python-version: "3.10" + + - name: Run test (3.10) + run: | + pip install pytest + pip install -v msgpack --only-binary :all: --no-index -f dist/wheelhouse + pytest -v test + - name: Set up Python 3.9 uses: actions/setup-python@v1 with: diff --git a/.github/workflows/mac.yml b/.github/workflows/mac.yml index 78d944cc..4efe2cad 100644 --- a/.github/workflows/mac.yml +++ b/.github/workflows/mac.yml @@ -35,10 +35,10 @@ jobs: pytest -v test - - name: Set up Python 3.9 + - name: Set up Python 3.10 uses: actions/setup-python@v1 with: - python-version: "3.9" + python-version: "3.10" - name: Build wheel run: | @@ -52,10 +52,10 @@ jobs: pytest -v test - - name: Set up Python 3.7 + - name: Set up Python 3.9 uses: actions/setup-python@v1 with: - python-version: "3.7" + python-version: "3.9" - name: Build wheel run: | @@ -69,10 +69,10 @@ jobs: pytest -v test - - name: Set up Python 3.6 + - name: Set up Python 3.7 uses: actions/setup-python@v1 with: - python-version: "3.6" + python-version: "3.7" - name: Build wheel run: | diff --git a/.github/workflows/windows.yaml b/.github/workflows/windows.yaml index 139a5a64..debe0747 100644 --- a/.github/workflows/windows.yaml +++ b/.github/workflows/windows.yaml @@ -77,6 +77,20 @@ jobs: run: | ci/runtests.sh + - name: Python 3.10 (amd64) + env: + PYTHON: "py -3.10-64" + shell: bash + run: | + ci/runtests.sh + + - name: Python 3.10 (x86) + env: + PYTHON: "py -3.10-32" + shell: bash + run: | + ci/runtests.sh + - name: Upload Wheels uses: actions/upload-artifact@v1 with: diff --git a/Makefile b/Makefile index 6f29aede..b50fa80c 100644 --- a/Makefile +++ b/Makefile @@ -36,8 +36,8 @@ update-docker: .PHONY: linux-wheel linux-wheel: - docker run --rm -v `pwd`:/project -w /project quay.io/pypa/manylinux1_i686 bash docker/buildwheel.sh - docker run --rm -v `pwd`:/project -w /project quay.io/pypa/manylinux1_x86_64 bash docker/buildwheel.sh + docker run --rm -v `pwd`:/project -w /project quay.io/pypa/manylinux2014_i686 bash docker/buildwheel.sh + docker run --rm -v `pwd`:/project -w /project quay.io/pypa/manylinux2014_x86_64 bash docker/buildwheel.sh .PHONY: linux-arm64-wheel linux-arm64-wheel: diff --git a/docker/buildwheel.sh b/docker/buildwheel.sh index 89a25706..ff34139d 100644 --- a/docker/buildwheel.sh +++ b/docker/buildwheel.sh @@ -7,10 +7,12 @@ set -e -x ARCH=`uname -p` echo "arch=$ARCH" +ls /opt/python + for V in "${PYTHON_VERSIONS[@]}"; do PYBIN=/opt/python/$V/bin rm -rf build/ # Avoid lib build by narrow Python is used by wide python - $PYBIN/python setup.py bdist_wheel + $PYBIN/python -m build -w done cd dist diff --git a/docker/shared.env b/docker/shared.env index 3601a075..80274ac6 100644 --- a/docker/shared.env +++ b/docker/shared.env @@ -1,7 +1,7 @@ PYTHON_VERSIONS=( + cp310-cp310 cp39-cp39 cp38-cp38 cp37-cp37m cp36-cp36m - cp35-cp35m ) diff --git a/setup.py b/setup.py index 751abff5..01f125fb 100755 --- a/setup.py +++ b/setup.py @@ -125,14 +125,12 @@ def __init__(self, *args, **kwargs): }, license="Apache 2.0", classifiers=[ - "Programming Language :: Python :: 2", - "Programming Language :: Python :: 2.7", "Programming Language :: Python :: 3", - "Programming Language :: Python :: 3.5", "Programming Language :: Python :: 3.6", "Programming Language :: Python :: 3.7", "Programming Language :: Python :: 3.8", "Programming Language :: Python :: 3.9", + "Programming Language :: Python :: 3.10", "Programming Language :: Python :: Implementation :: CPython", "Programming Language :: Python :: Implementation :: PyPy", "Intended Audience :: Developers", From 8e358617e77d63a391f0c0f91b3e552214f2a49a Mon Sep 17 00:00:00 2001 From: Inada Naoki Date: Tue, 16 Nov 2021 17:42:42 +0900 Subject: [PATCH 272/349] mac: Provide Universal2 wheel (#488) * mac: Use cibuildwheel * Do not build wheel for PyPy. --- .github/workflows/mac.yml | 51 +++++++++++++++------------------------ 1 file changed, 19 insertions(+), 32 deletions(-) diff --git a/.github/workflows/mac.yml b/.github/workflows/mac.yml index 4efe2cad..85844e7a 100644 --- a/.github/workflows/mac.yml +++ b/.github/workflows/mac.yml @@ -1,7 +1,6 @@ name: Build macOS Wheels on: push: - pull_request: create: jobs: @@ -12,10 +11,11 @@ jobs: - name: Checkout uses: actions/checkout@v1 - - name: Set up Python 3.8 + # Python 3.9 + - name: Set up Python 3.9 uses: actions/setup-python@v1 with: - python-version: "3.8" + python-version: "3.9" - name: Cythonize run: | @@ -23,66 +23,53 @@ jobs: pip install -r requirements.txt make cython - - name: Build wheel - run: | - pip install setuptools wheel - python setup.py bdist_wheel + - name: Build wheels + uses: pypa/cibuildwheel@v2.2.2 + env: + CIBW_ARCHS_MACOS: x86_64 universal2 + CIBW_SKIP: pp* - name: Run test run: | + ls wheelhouse/ pip install pytest - pip install -v msgpack --only-binary :all: -f dist/ --no-index + pip install -v msgpack --only-binary :all: -f wheelhouse/ --no-index pytest -v test - + # Python 3.10 - name: Set up Python 3.10 uses: actions/setup-python@v1 with: python-version: "3.10" - - name: Build wheel - run: | - pip install setuptools wheel - python setup.py bdist_wheel - - name: Run test run: | pip install pytest - pip install -v msgpack --only-binary :all: -f dist/ --no-index + pip install -v msgpack --only-binary :all: -f wheelhouse/ --no-index pytest -v test - - - name: Set up Python 3.9 + # Python 3.8 + - name: Set up Python 3.8 uses: actions/setup-python@v1 with: - python-version: "3.9" - - - name: Build wheel - run: | - pip install setuptools wheel - python setup.py bdist_wheel + python-version: "3.8" - name: Run test run: | pip install pytest - pip install -v msgpack --only-binary :all: -f dist/ --no-index + pip install -v msgpack --only-binary :all: -f wheelhouse/ --no-index pytest -v test - + # Python 3.7 - name: Set up Python 3.7 uses: actions/setup-python@v1 with: python-version: "3.7" - - name: Build wheel - run: | - pip install setuptools wheel - python setup.py bdist_wheel - - name: Run test run: | pip install pytest - pip install -v msgpack --only-binary :all: -f dist/ --no-index + pip install -v msgpack --only-binary :all: -f wheelhouse/ --no-index pytest -v test @@ -90,4 +77,4 @@ jobs: uses: actions/upload-artifact@v1 with: name: macos-wheels - path: ./dist/ + path: ./wheelhouse/ From cfa05d3fdc6290b4847e4781a06ac0668ea9dc18 Mon Sep 17 00:00:00 2001 From: Inada Naoki Date: Tue, 16 Nov 2021 17:47:16 +0900 Subject: [PATCH 273/349] Actions: Run CI only for PRs from forks. (#489) --- .github/workflows/black.yaml | 4 ++++ .github/workflows/linux.yml | 4 ++++ .github/workflows/windows.yaml | 7 ++++--- 3 files changed, 12 insertions(+), 3 deletions(-) diff --git a/.github/workflows/black.yaml b/.github/workflows/black.yaml index eda8d076..c5459539 100644 --- a/.github/workflows/black.yaml +++ b/.github/workflows/black.yaml @@ -4,6 +4,10 @@ on: ["push", "pull_request"] jobs: black: + # We want to run on external PRs, but not on our own internal PRs as they'll be run + # by the push to the branch. + if: github.event_name == 'push' || github.event.pull_request.head.repo.full_name != github.repository + runs-on: ubuntu-latest steps: - name: Setup Python diff --git a/.github/workflows/linux.yml b/.github/workflows/linux.yml index 811bc13d..95c672b2 100644 --- a/.github/workflows/linux.yml +++ b/.github/workflows/linux.yml @@ -6,6 +6,10 @@ on: jobs: build: + # We want to run on external PRs, but not on our own internal PRs as they'll be run + # by the push to the branch. + if: github.event_name == 'push' || github.event.pull_request.head.repo.full_name != github.repository + runs-on: ubuntu-latest steps: - name: Checkout diff --git a/.github/workflows/windows.yaml b/.github/workflows/windows.yaml index debe0747..0ce50f58 100644 --- a/.github/workflows/windows.yaml +++ b/.github/workflows/windows.yaml @@ -1,14 +1,15 @@ name: Build and test windows wheels on: push: - branches: - - master - - test pull_request: create: jobs: build: + # We want to run on external PRs, but not on our own internal PRs as they'll be run + # by the push to the branch. + if: github.event_name == 'push' || github.event.pull_request.head.repo.full_name != github.repository + runs-on: windows-latest steps: - name: Checkout From e464cb44fa3af5ad3ecd83f9c045b16981d01bb2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Benjamin=20Egelund-M=C3=BCller?= Date: Tue, 16 Nov 2021 09:49:47 +0100 Subject: [PATCH 274/349] Nicer error when packing a datetime without tzinfo (#466) --- msgpack/_packer.pyx | 2 ++ msgpack/fallback.py | 4 ++++ test/test_timestamp.py | 16 ++++++++++++++++ 3 files changed, 22 insertions(+) diff --git a/msgpack/_packer.pyx b/msgpack/_packer.pyx index e6cd2c7d..396da0c2 100644 --- a/msgpack/_packer.pyx +++ b/msgpack/_packer.pyx @@ -285,6 +285,8 @@ cdef class Packer(object): o = self._default(o) default_used = 1 continue + elif self.datetime and PyDateTime_CheckExact(o): + PyErr_Format(ValueError, b"can not serialize '%.200s' object where tzinfo=None", Py_TYPE(o).tp_name) else: PyErr_Format(TypeError, b"can not serialize '%.200s' object", Py_TYPE(o).tp_name) return ret diff --git a/msgpack/fallback.py b/msgpack/fallback.py index 45408750..b27acb29 100644 --- a/msgpack/fallback.py +++ b/msgpack/fallback.py @@ -874,6 +874,10 @@ def _pack( obj = self._default(obj) default_used = 1 continue + + if self._datetime and check(obj, _DateTime): + raise ValueError("Cannot serialize %r where tzinfo=None" % (obj,)) + raise TypeError("Cannot serialize %r" % (obj,)) def pack(self, obj): diff --git a/test/test_timestamp.py b/test/test_timestamp.py index 6a29be77..4e26489b 100644 --- a/test/test_timestamp.py +++ b/test/test_timestamp.py @@ -140,3 +140,19 @@ def test_issue451(): unpacked = msgpack.unpackb(packed, timestamp=3) assert dt == unpacked + + +@pytest.mark.skipif(sys.version_info[0] == 2, reason="datetime support is PY3+ only") +def test_pack_datetime_without_tzinfo(): + dt = datetime.datetime(1970, 1, 1, 0, 0, 42, 14) + with pytest.raises(ValueError, match="where tzinfo=None"): + packed = msgpack.packb(dt, datetime=True) + + dt = datetime.datetime(1970, 1, 1, 0, 0, 42, 14) + packed = msgpack.packb(dt, datetime=True, default=lambda x: None) + assert packed == msgpack.packb(None) + + dt = datetime.datetime(1970, 1, 1, 0, 0, 42, 14, tzinfo=_utc) + packed = msgpack.packb(dt, datetime=True) + unpacked = msgpack.unpackb(packed, timestamp=3) + assert unpacked == dt From 724e6200fd6b28b3562e48c73f7827a9c19dd11f Mon Sep 17 00:00:00 2001 From: Inada Naoki Date: Tue, 16 Nov 2021 17:52:01 +0900 Subject: [PATCH 275/349] 1.0.3rc1 --- msgpack/_version.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/msgpack/_version.py b/msgpack/_version.py index 1c83c8ed..3b462fa5 100644 --- a/msgpack/_version.py +++ b/msgpack/_version.py @@ -1 +1 @@ -version = (1, 0, 2) +version = (1, 0, 3, 'rc1') From e29b423de71cb6da323bce8742b1328603a7f9be Mon Sep 17 00:00:00 2001 From: Inada Naoki Date: Wed, 17 Nov 2021 11:03:06 +0900 Subject: [PATCH 276/349] black --- msgpack/_version.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/msgpack/_version.py b/msgpack/_version.py index 3b462fa5..b7165245 100644 --- a/msgpack/_version.py +++ b/msgpack/_version.py @@ -1 +1 @@ -version = (1, 0, 3, 'rc1') +version = (1, 0, 3, "rc1") From 6129789e9f6ebccdb19b23c2cd1dc9551e57fc53 Mon Sep 17 00:00:00 2001 From: Inada Naoki Date: Wed, 24 Nov 2021 16:18:17 +0900 Subject: [PATCH 277/349] Release v1.0.3 (#491) --- ChangeLog.rst | 9 +++++++++ msgpack/_version.py | 2 +- 2 files changed, 10 insertions(+), 1 deletion(-) diff --git a/ChangeLog.rst b/ChangeLog.rst index 230cc301..fc6df680 100644 --- a/ChangeLog.rst +++ b/ChangeLog.rst @@ -1,3 +1,12 @@ +1.0.3 +===== + +Release Date: 2021-11-24 JST + +* Fix Docstring (#459) +* Fix error formatting (#463) +* Improve error message about strict_map_key (#485) + 1.0.2 ===== diff --git a/msgpack/_version.py b/msgpack/_version.py index b7165245..fb878b35 100644 --- a/msgpack/_version.py +++ b/msgpack/_version.py @@ -1 +1 @@ -version = (1, 0, 3, "rc1") +version = (1, 0, 3) From bdf0511e29b02427437b03e7d8454bd5076e837f Mon Sep 17 00:00:00 2001 From: Inada Naoki Date: Thu, 25 Nov 2021 14:43:55 +0900 Subject: [PATCH 278/349] Refactor CI (#492) * Use cibuildwheel to build wheels. * Use matrix --- .github/workflows/black.yaml | 2 +- .github/workflows/linux.yml | 103 --------------------------------- .github/workflows/mac.yml | 80 ------------------------- .github/workflows/test.yml | 45 ++++++++++++++ .github/workflows/wheel.yml | 51 ++++++++++++++++ .github/workflows/windows.yaml | 99 ------------------------------- .travis.yml | 89 ---------------------------- Makefile | 5 +- appveyor.yml | 50 ---------------- ci/runtests.bat | 9 --- ci/runtests.sh | 8 --- 11 files changed, 100 insertions(+), 441 deletions(-) delete mode 100644 .github/workflows/linux.yml delete mode 100644 .github/workflows/mac.yml create mode 100644 .github/workflows/test.yml create mode 100644 .github/workflows/wheel.yml delete mode 100644 .github/workflows/windows.yaml delete mode 100644 .travis.yml delete mode 100644 appveyor.yml delete mode 100644 ci/runtests.bat delete mode 100644 ci/runtests.sh diff --git a/.github/workflows/black.yaml b/.github/workflows/black.yaml index c5459539..2961ed35 100644 --- a/.github/workflows/black.yaml +++ b/.github/workflows/black.yaml @@ -11,7 +11,7 @@ jobs: runs-on: ubuntu-latest steps: - name: Setup Python - uses: actions/setup-python@v1 + uses: actions/setup-python@v2 with: python-version: '3.x' architecture: 'x64' diff --git a/.github/workflows/linux.yml b/.github/workflows/linux.yml deleted file mode 100644 index 95c672b2..00000000 --- a/.github/workflows/linux.yml +++ /dev/null @@ -1,103 +0,0 @@ -name: Build Linux Wheels -on: - push: - pull_request: - create: - -jobs: - build: - # We want to run on external PRs, but not on our own internal PRs as they'll be run - # by the push to the branch. - if: github.event_name == 'push' || github.event.pull_request.head.repo.full_name != github.repository - - runs-on: ubuntu-latest - steps: - - name: Checkout - uses: actions/checkout@v1 - - - name: Set up Python 3.8 - uses: actions/setup-python@v1 - with: - python-version: 3.8 - - - name: Cythonize - shell: bash - run: | - pip install -U pip - pip -V - pip install -r requirements.txt - make cython - #python setup.py sdist - - - name: Build wheels - shell: bash - run: | - make linux-wheel - - - name: Install qemu-user-static for docker - shell: bash - run: | - docker run --rm --privileged multiarch/qemu-user-static --reset -p yes - - - name: Build arm64 wheels - shell: bash - run: | - make linux-arm64-wheel - - - name: Run test (3.8) - run: | - pip install pytest - pip install -v msgpack --only-binary :all: --no-index -f dist/wheelhouse - pytest -v test - - - - name: Set up Python 3.10 - uses: actions/setup-python@v1 - with: - python-version: "3.10" - - - name: Run test (3.10) - run: | - pip install pytest - pip install -v msgpack --only-binary :all: --no-index -f dist/wheelhouse - pytest -v test - - - name: Set up Python 3.9 - uses: actions/setup-python@v1 - with: - python-version: 3.9 - - - name: Run test (3.9) - run: | - pip install pytest - pip install -v msgpack --only-binary :all: --no-index -f dist/wheelhouse - pytest -v test - - - name: Set up Python 3.7 - uses: actions/setup-python@v1 - with: - python-version: 3.7 - - - name: Run test (3.7) - run: | - pip install pytest - pip install -v msgpack --only-binary :all: --no-index -f dist/wheelhouse - pytest -v test - - - name: Set up Python 3.6 - uses: actions/setup-python@v1 - with: - python-version: 3.6 - - - name: Run test (3.6) - run: | - pip install pytest - pip install -v msgpack --only-binary :all: --no-index -f dist/wheelhouse - pytest -v test - - - - name: Upload Wheels - uses: actions/upload-artifact@v1 - with: - name: linux-wheels - path: ./dist/wheelhouse/ diff --git a/.github/workflows/mac.yml b/.github/workflows/mac.yml deleted file mode 100644 index 85844e7a..00000000 --- a/.github/workflows/mac.yml +++ /dev/null @@ -1,80 +0,0 @@ -name: Build macOS Wheels -on: - push: - create: - -jobs: - build: - runs-on: macos-latest - - steps: - - name: Checkout - uses: actions/checkout@v1 - - # Python 3.9 - - name: Set up Python 3.9 - uses: actions/setup-python@v1 - with: - python-version: "3.9" - - - name: Cythonize - run: | - pip install -U pip - pip install -r requirements.txt - make cython - - - name: Build wheels - uses: pypa/cibuildwheel@v2.2.2 - env: - CIBW_ARCHS_MACOS: x86_64 universal2 - CIBW_SKIP: pp* - - - name: Run test - run: | - ls wheelhouse/ - pip install pytest - pip install -v msgpack --only-binary :all: -f wheelhouse/ --no-index - pytest -v test - - # Python 3.10 - - name: Set up Python 3.10 - uses: actions/setup-python@v1 - with: - python-version: "3.10" - - - name: Run test - run: | - pip install pytest - pip install -v msgpack --only-binary :all: -f wheelhouse/ --no-index - pytest -v test - - # Python 3.8 - - name: Set up Python 3.8 - uses: actions/setup-python@v1 - with: - python-version: "3.8" - - - name: Run test - run: | - pip install pytest - pip install -v msgpack --only-binary :all: -f wheelhouse/ --no-index - pytest -v test - - # Python 3.7 - - name: Set up Python 3.7 - uses: actions/setup-python@v1 - with: - python-version: "3.7" - - - name: Run test - run: | - pip install pytest - pip install -v msgpack --only-binary :all: -f wheelhouse/ --no-index - pytest -v test - - - - name: Upload Wheels - uses: actions/upload-artifact@v1 - with: - name: macos-wheels - path: ./wheelhouse/ diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml new file mode 100644 index 00000000..6e497e00 --- /dev/null +++ b/.github/workflows/test.yml @@ -0,0 +1,45 @@ +name: Run tests +on: + push: + branches: [main] + pull_request: + create: + +jobs: + test: + strategy: + matrix: + os: [ubuntu-20.04, windows-2022, macos-10.15] + py: ["3.10", "3.9", "3.8", "3.7", "3.6"] + + runs-on: ${{ matrix.os }} + name: Run test with Python ${{ matrix.py }} on ${{ matrix.os }} + + steps: + - name: Checkout + uses: actions/checkout@v2 + + - name: Set up Python + uses: actions/setup-python@v2 + with: + python-version: ${{ matrix.py }} + cache: "pip" + + - name: Build + shell: bash + run: | + pip install -U pip + pip install -r requirements.txt pytest + make cython + pip install . + + - name: Test (C extension) + shell: bash + run: | + pytest -v test + + - name: Test (pure Python fallback) + shell: bash + run: | + MSGPACK_PUREPYTHON=1 pytest -v test + diff --git a/.github/workflows/wheel.yml b/.github/workflows/wheel.yml new file mode 100644 index 00000000..5627b5fc --- /dev/null +++ b/.github/workflows/wheel.yml @@ -0,0 +1,51 @@ +name: Build Wheels +on: + push: + branches: [main] + create: + +jobs: + build_wheels: + strategy: + matrix: + os: [ubuntu-20.04, windows-2022, macos-10.15] + runs-on: ${{ matrix.os }} + name: Build wheels on ${{ matrix.os }} + + steps: + - name: Checkout + uses: actions/checkout@v2 + + - name: Set up QEMU + if: runner.os == 'Linux' + uses: docker/setup-qemu-action@v1 + with: + platforms: arm64 + + - name: Set up Python 3.9 + uses: actions/setup-python@v2 + with: + python-version: 3.9 + cache: "pip" + + - name: Prepare + shell: bash + run: | + pip install -r requirements.txt + make cython + + - name: Build + uses: pypa/cibuildwheel@v2.2.2 + env: + CIBW_TEST_REQUIRES: "pytest" + CIBW_TEST_COMMAND: "pytest {package}/test" + CIBW_ARCHS_LINUX: auto aarch64 + CIBW_ARCHS_MACOS: x86_64 universal2 arm64 + CIBW_SKIP: pp* + + - name: Upload Wheels + uses: actions/upload-artifact@v1 + with: + name: Wheels + path: wheelhouse + diff --git a/.github/workflows/windows.yaml b/.github/workflows/windows.yaml deleted file mode 100644 index 0ce50f58..00000000 --- a/.github/workflows/windows.yaml +++ /dev/null @@ -1,99 +0,0 @@ -name: Build and test windows wheels -on: - push: - pull_request: - create: - -jobs: - build: - # We want to run on external PRs, but not on our own internal PRs as they'll be run - # by the push to the branch. - if: github.event_name == 'push' || github.event.pull_request.head.repo.full_name != github.repository - - runs-on: windows-latest - steps: - - name: Checkout - uses: actions/checkout@v1 - - - name: Cythonize - shell: bash - run: | - pip install -U Cython - make cython - #python setup.py sdist - - - name: Python 3.6 (amd64) - env: - PYTHON: "py -3.6-64" - shell: bash - run: | - ci/runtests.sh - - - name: Python 3.6 (x86) - env: - PYTHON: "py -3.6-32" - shell: bash - run: | - ci/runtests.sh - - - name: Python 3.7 (amd64) - env: - PYTHON: "py -3.7-64" - shell: bash - run: | - ci/runtests.sh - - - name: Python 3.7 (x86) - env: - PYTHON: "py -3.7-32" - shell: bash - run: | - ci/runtests.sh - - - name: Python 3.8 (amd64) - env: - PYTHON: "py -3.8-64" - shell: bash - run: | - ci/runtests.sh - - - name: Python 3.8 (x86) - env: - PYTHON: "py -3.8-32" - shell: bash - run: | - ci/runtests.sh - - - name: Python 3.9 (amd64) - env: - PYTHON: "py -3.9-64" - shell: bash - run: | - ci/runtests.sh - - - name: Python 3.9 (x86) - env: - PYTHON: "py -3.9-32" - shell: bash - run: | - ci/runtests.sh - - - name: Python 3.10 (amd64) - env: - PYTHON: "py -3.10-64" - shell: bash - run: | - ci/runtests.sh - - - name: Python 3.10 (x86) - env: - PYTHON: "py -3.10-32" - shell: bash - run: | - ci/runtests.sh - - - name: Upload Wheels - uses: actions/upload-artifact@v1 - with: - name: win-wheels - path: ./dist diff --git a/.travis.yml b/.travis.yml deleted file mode 100644 index 4974d26c..00000000 --- a/.travis.yml +++ /dev/null @@ -1,89 +0,0 @@ -version: ~> 1.0 -dist: xenial -language: python -cache: pip -arch: - - arm64 - -python: - # Available Python (PyPy) can be listed by: - # - # $ aws s3 ls s3://travis-python-archives/binaries/ubuntu/16.04/x86_64/ - - "3.6" - - "3.7" - - "3.8" - - "3.9-dev" - -_pure: &pure - install: - - pip install -U pip - - pip install -U pytest pytest-cov codecov - - pip install . - script: - - pytest --cov=msgpack -v test - -matrix: - include: - - name: 32bit build - arch: amd64 - language: python - services: - - docker - env: - - DOCKER_IMAGE=quay.io/pypa/manylinux1_i686 - install: - - pip install -U pip - - pip install -r requirements.txt - - make cython - - docker pull $DOCKER_IMAGE - script: - - docker run --rm -v `pwd`:/io -w /io $DOCKER_IMAGE /io/docker/runtests.sh - - - arch: arm64 - name: arm64 32bit build - language: python - services: - - docker - env: - - DOCKER_IMAGE=quay.io/pypa/manylinux2014_aarch64 - install: - - pip install -U pip - - pip install -r requirements.txt - - make cython - - docker pull $DOCKER_IMAGE - script: - - docker run --rm -v `pwd`:/io -w /io $DOCKER_IMAGE /io/docker/runtests.sh - - - name: "Python 2 (fallback)" - python: "2.7" - <<: *pure - - - name: "pypy2.7" - arch: amd64 - python: "pypy2.7-7.1.1" - <<: *pure - - - name: "pypy3" - arch: amd64 - python: "pypy3.6-7.1.1" - <<: *pure - -install: - - pip install -U pip - - pip install -U pytest pytest-cov codecov - - pip install -r requirements.txt # Cython - - make cython - - pip install -e . - -script: - - python -c 'import sys; print(hex(sys.maxsize))' - - python -c 'from msgpack import _cmsgpack' - - pytest --cov=msgpack -v test - - MSGPACK_PUREPYTHON=x pytest --cov=msgpack -v test - -after_success: - - if [ -f .coverage ]; then - codecov; - fi - -# vim: sw=2 ts=2 diff --git a/Makefile b/Makefile index b50fa80c..02a4a170 100644 --- a/Makefile +++ b/Makefile @@ -31,8 +31,9 @@ clean: .PHONY: update-docker update-docker: - docker pull quay.io/pypa/manylinux1_i686 - docker pull quay.io/pypa/manylinux1_x86_64 + docker pull quay.io/pypa/manylinux2014_i686 + docker pull quay.io/pypa/manylinux2014_x86_64 + docker pull quay.io/pypa/manylinux2014_aarch64 .PHONY: linux-wheel linux-wheel: diff --git a/appveyor.yml b/appveyor.yml deleted file mode 100644 index f338e177..00000000 --- a/appveyor.yml +++ /dev/null @@ -1,50 +0,0 @@ -environment: - matrix: - # For Python versions available on Appveyor, see - # http://www.appveyor.com/docs/installed-software#python - - PYTHON: "C:\\Python36" - -install: - # We need wheel installed to build wheels - - "%PYTHON%\\python.exe -m pip install -U pip" - - "%PYTHON%\\python.exe -m pip install -U cython" - - "%PYTHON%\\Scripts\\cython --cplus msgpack/_cmsgpack.pyx" - -build: off - -test_script: - # Put your test command here. - # Note that you must use the environment variable %PYTHON% to refer to - # the interpreter you're using - Appveyor does not do anything special - # to put the Python version you want to use on PATH. - - set PYTHON="C:\\Python27" - - ci\\runtests.bat - - set PYTHON="C:\\Python27-x64" - - ci\\runtests.bat - - set PYTHON="C:\\Python36" - - ci\\runtests.bat - - set PYTHON="C:\\Python36-x64" - - ci\\runtests.bat - - set PYTHON="C:\\Python37" - - ci\\runtests.bat - - set PYTHON="C:\\Python37-x64" - - ci\\runtests.bat - - set PYTHON="C:\\Python38" - - ci\\runtests.bat - - set PYTHON="C:\\Python38-x64" - - ci\\runtests.bat - -after_test: - # This step builds your wheels. - # Again, you need to use %PYTHON% to get the correct interpreter - -artifacts: - # bdist_wheel puts your built wheel in the dist directory - - path: dist\*.whl - -#on_success: -# You can use this step to upload your artifacts to a public website. -# See Appveyor's documentation for more details. Or you can simply -# access your wheels from the Appveyor "artifacts" tab for your build. - -# vim: set shiftwidth=2 diff --git a/ci/runtests.bat b/ci/runtests.bat deleted file mode 100644 index 4ae2f708..00000000 --- a/ci/runtests.bat +++ /dev/null @@ -1,9 +0,0 @@ -%PYTHON%\python.exe -m pip install -U pip wheel pytest -%PYTHON%\python.exe setup.py build_ext -i -%PYTHON%\python.exe setup.py install -%PYTHON%\python.exe -c "import sys; print(hex(sys.maxsize))" -%PYTHON%\python.exe -c "from msgpack import _cmsgpack" -%PYTHON%\python.exe setup.py bdist_wheel -%PYTHON%\python.exe -m pytest -v test -SET EL=%ERRORLEVEL% -exit /b %EL% diff --git a/ci/runtests.sh b/ci/runtests.sh deleted file mode 100644 index 5d87f696..00000000 --- a/ci/runtests.sh +++ /dev/null @@ -1,8 +0,0 @@ -#!/bin/bash -set -ex -${PYTHON} -VV -${PYTHON} -m pip install setuptools wheel pytest -${PYTHON} setup.py build_ext -if -${PYTHON} -c "from msgpack import _cmsgpack" -${PYTHON} setup.py bdist_wheel -${PYTHON} -m pytest -v test From 89ea57747ebcb0fad004a92ab00ebf13c10b2d51 Mon Sep 17 00:00:00 2001 From: Inada Naoki Date: Wed, 19 Jan 2022 14:42:28 +0900 Subject: [PATCH 279/349] Don't define __*_ENDIAN__ macro on Unix. (#495) --- msgpack/fallback.py | 4 ---- setup.py | 6 ++---- 2 files changed, 2 insertions(+), 8 deletions(-) diff --git a/msgpack/fallback.py b/msgpack/fallback.py index b27acb29..9731a22f 100644 --- a/msgpack/fallback.py +++ b/msgpack/fallback.py @@ -11,7 +11,6 @@ def dict_iteritems(d): return d.iteritems() - else: int_types = int unicode = str @@ -32,7 +31,6 @@ def _is_recursionerror(e): and e.args[0].startswith("maximum recursion depth exceeded") ) - else: def _is_recursionerror(e): @@ -68,7 +66,6 @@ def write(self, s): def getvalue(self): return self.builder.build() - else: USING_STRINGBUILDER = False from io import BytesIO as StringIO @@ -143,7 +140,6 @@ def _unpack_from(f, b, o=0): """Explicit type cast for legacy struct.unpack_from""" return struct.unpack_from(f, bytes(b), o) - else: _unpack_from = struct.unpack_from diff --git a/setup.py b/setup.py index 01f125fb..502ed33e 100755 --- a/setup.py +++ b/setup.py @@ -79,12 +79,10 @@ def __init__(self, *args, **kwargs): libraries = [] +macros = [] + if sys.platform == "win32": libraries.append("ws2_32") - -if sys.byteorder == "big": - macros = [("__BIG_ENDIAN__", "1")] -else: macros = [("__LITTLE_ENDIAN__", "1")] ext_modules = [] From cb50b2081b21e5cb4a364d292f55092c98aa1a6f Mon Sep 17 00:00:00 2001 From: Inada Naoki Date: Thu, 3 Mar 2022 12:29:55 +0900 Subject: [PATCH 280/349] Update setuptools and black (#498) * Use setuptools * Use black==22.1.0 --- .github/workflows/black.yaml | 6 +++--- Makefile | 2 +- msgpack/__init__.py | 5 ++++- msgpack/_version.py | 1 - msgpack/ext.py | 8 +++---- msgpack/fallback.py | 12 +++++------ requirements.txt | 3 +++ setup.cfg | 32 +++++++++++++++++++++++++++ setup.py | 42 ++---------------------------------- test/test_limits.py | 12 +++++------ test/test_memoryview.py | 28 ++++++++++++------------ test/test_sequnpack.py | 4 ++-- test/test_timestamp.py | 12 +++++------ 13 files changed, 83 insertions(+), 84 deletions(-) delete mode 100644 msgpack/_version.py create mode 100644 setup.cfg diff --git a/.github/workflows/black.yaml b/.github/workflows/black.yaml index 2961ed35..be137ae6 100644 --- a/.github/workflows/black.yaml +++ b/.github/workflows/black.yaml @@ -17,9 +17,9 @@ jobs: architecture: 'x64' - name: Checkout - uses: actions/checkout@v1 + uses: actions/checkout@v2 - name: Black Code Formatter run: | - pip install black - black --diff --check msgpack/ test/ setup.py + pip install black==22.1.0 + black -S --diff --check msgpack/ test/ setup.py diff --git a/Makefile b/Makefile index 02a4a170..415dcfdd 100644 --- a/Makefile +++ b/Makefile @@ -4,7 +4,7 @@ all: cython .PHONY: black black: - black msgpack/ test/ setup.py + black -S msgpack/ test/ setup.py .PHONY: cython cython: diff --git a/msgpack/__init__.py b/msgpack/__init__.py index d6705e22..1929df31 100644 --- a/msgpack/__init__.py +++ b/msgpack/__init__.py @@ -1,5 +1,4 @@ # coding: utf-8 -from ._version import version from .exceptions import * from .ext import ExtType, Timestamp @@ -7,6 +6,10 @@ import sys +version = (1, 0, 4, 'dev') +__version__ = "1.0.4dev" + + if os.environ.get("MSGPACK_PUREPYTHON") or sys.version_info[0] == 2: from .fallback import Packer, unpackb, Unpacker else: diff --git a/msgpack/_version.py b/msgpack/_version.py deleted file mode 100644 index fb878b35..00000000 --- a/msgpack/_version.py +++ /dev/null @@ -1 +0,0 @@ -version = (1, 0, 3) diff --git a/msgpack/ext.py b/msgpack/ext.py index 4eb9dd65..25544c55 100644 --- a/msgpack/ext.py +++ b/msgpack/ext.py @@ -59,7 +59,7 @@ def __init__(self, seconds, nanoseconds=0): raise TypeError("seconds must be an interger") if not isinstance(nanoseconds, int_types): raise TypeError("nanoseconds must be an integer") - if not (0 <= nanoseconds < 10 ** 9): + if not (0 <= nanoseconds < 10**9): raise ValueError( "nanoseconds must be a non-negative integer less than 999999999." ) @@ -143,7 +143,7 @@ def from_unix(unix_sec): :type unix_float: int or float. """ seconds = int(unix_sec // 1) - nanoseconds = int((unix_sec % 1) * 10 ** 9) + nanoseconds = int((unix_sec % 1) * 10**9) return Timestamp(seconds, nanoseconds) def to_unix(self): @@ -161,7 +161,7 @@ def from_unix_nano(unix_ns): :param int unix_ns: Posix timestamp in nanoseconds. :rtype: Timestamp """ - return Timestamp(*divmod(unix_ns, 10 ** 9)) + return Timestamp(*divmod(unix_ns, 10**9)) def to_unix_nano(self): """Get the timestamp as a unixtime in nanoseconds. @@ -169,7 +169,7 @@ def to_unix_nano(self): :returns: posix timestamp in nanoseconds :rtype: int """ - return self.seconds * 10 ** 9 + self.nanoseconds + return self.seconds * 10**9 + self.nanoseconds def to_datetime(self): """Get the timestamp as a UTC datetime. diff --git a/msgpack/fallback.py b/msgpack/fallback.py index 9731a22f..5f215e95 100644 --- a/msgpack/fallback.py +++ b/msgpack/fallback.py @@ -318,7 +318,7 @@ def __init__( self._buf_checkpoint = 0 if not max_buffer_size: - max_buffer_size = 2 ** 31 - 1 + max_buffer_size = 2**31 - 1 if max_str_len == -1: max_str_len = max_buffer_size if max_bin_len == -1: @@ -800,20 +800,20 @@ def _pack( raise OverflowError("Integer value out of range") if check(obj, (bytes, bytearray)): n = len(obj) - if n >= 2 ** 32: + if n >= 2**32: raise ValueError("%s is too large" % type(obj).__name__) self._pack_bin_header(n) return self._buffer.write(obj) if check(obj, unicode): obj = obj.encode("utf-8", self._unicode_errors) n = len(obj) - if n >= 2 ** 32: + if n >= 2**32: raise ValueError("String is too large") self._pack_raw_header(n) return self._buffer.write(obj) if check(obj, memoryview): n = len(obj) * obj.itemsize - if n >= 2 ** 32: + if n >= 2**32: raise ValueError("Memoryview is too large") self._pack_bin_header(n) return self._buffer.write(obj) @@ -895,7 +895,7 @@ def pack_map_pairs(self, pairs): return ret def pack_array_header(self, n): - if n >= 2 ** 32: + if n >= 2**32: raise ValueError self._pack_array_header(n) if self._autoreset: @@ -904,7 +904,7 @@ def pack_array_header(self, n): return ret def pack_map_header(self, n): - if n >= 2 ** 32: + if n >= 2**32: raise ValueError self._pack_map_header(n) if self._autoreset: diff --git a/requirements.txt b/requirements.txt index 180fe851..9bdb4789 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,2 +1,5 @@ # Also declared in pyproject.toml, if updating here please also update there Cython~=0.29.13 + +# dev only tools. no need to add pyproject +black==22.1.0 diff --git a/setup.cfg b/setup.cfg new file mode 100644 index 00000000..1cb6ce36 --- /dev/null +++ b/setup.cfg @@ -0,0 +1,32 @@ +[metadata] +name = msgpack +#version = attr: msgpack.__version__ +version = attr: msgpack.version +license = Apache 2.0 +author = Inada Naoki +author_email = songofacandy@gmail.com +description = MessagePack serializer +long_description = file: README.md +long_description_content_type = text/markdown +url = https://msgpack.org/ + +project_urls = + Documentation = https://msgpack-python.readthedocs.io/ + Source = https://github.com/msgpack/msgpack-python + Tracker = https://github.com/msgpack/msgpack-python/issues + +classifiers = + Programming Language :: Python :: 3 + Programming Language :: Python :: 3.6 + Programming Language :: Python :: 3.7 + Programming Language :: Python :: 3.8 + Programming Language :: Python :: 3.9 + Programming Language :: Python :: 3.10 + Programming Language :: Python :: Implementation :: CPython + Programming Language :: Python :: Implementation :: PyPy + Intended Audience :: Developers + License :: OSI Approved :: Apache Software License + +[flake8] +max_line_length = 100 + diff --git a/setup.py b/setup.py index 502ed33e..9630cda0 100755 --- a/setup.py +++ b/setup.py @@ -4,10 +4,9 @@ import os import sys from glob import glob -from distutils.command.sdist import sdist from setuptools import setup, Extension - -from distutils.command.build_ext import build_ext +from setuptools.command.build_ext import build_ext +from setuptools.command.sdist import sdist PYPY = hasattr(sys, "pypy_version_info") @@ -65,12 +64,6 @@ def build_extension(self, ext): print(e) -exec(open("msgpack/_version.py").read()) - -version_str = ".".join(str(x) for x in version[:3]) -if len(version) > 3 and version[3] != "final": - version_str += version[3] - # Cython is required for sdist class Sdist(sdist): def __init__(self, *args, **kwargs): @@ -99,39 +92,8 @@ def __init__(self, *args, **kwargs): del libraries, macros -desc = "MessagePack (de)serializer." -with io.open("README.md", encoding="utf-8") as f: - long_desc = f.read() -del f - setup( - name="msgpack", - author="Inada Naoki", - author_email="songofacandy@gmail.com", - version=version_str, cmdclass={"build_ext": BuildExt, "sdist": Sdist}, ext_modules=ext_modules, packages=["msgpack"], - description=desc, - long_description=long_desc, - long_description_content_type="text/markdown", - url="https://msgpack.org/", - project_urls={ - "Documentation": "https://msgpack-python.readthedocs.io/", - "Source": "https://github.com/msgpack/msgpack-python", - "Tracker": "https://github.com/msgpack/msgpack-python/issues", - }, - license="Apache 2.0", - classifiers=[ - "Programming Language :: Python :: 3", - "Programming Language :: Python :: 3.6", - "Programming Language :: Python :: 3.7", - "Programming Language :: Python :: 3.8", - "Programming Language :: Python :: 3.9", - "Programming Language :: Python :: 3.10", - "Programming Language :: Python :: Implementation :: CPython", - "Programming Language :: Python :: Implementation :: PyPy", - "Intended Audience :: Developers", - "License :: OSI Approved :: Apache Software License", - ], ) diff --git a/test/test_limits.py b/test/test_limits.py index 65e6bcc7..4314c2c0 100644 --- a/test/test_limits.py +++ b/test/test_limits.py @@ -16,12 +16,12 @@ def test_integer(): - x = -(2 ** 63) + x = -(2**63) assert unpackb(packb(x)) == x with pytest.raises(PackOverflowError): packb(x - 1) - x = 2 ** 64 - 1 + x = 2**64 - 1 assert unpackb(packb(x)) == x with pytest.raises(PackOverflowError): packb(x + 1) @@ -29,16 +29,16 @@ def test_integer(): def test_array_header(): packer = Packer() - packer.pack_array_header(2 ** 32 - 1) + packer.pack_array_header(2**32 - 1) with pytest.raises(PackValueError): - packer.pack_array_header(2 ** 32) + packer.pack_array_header(2**32) def test_map_header(): packer = Packer() - packer.pack_map_header(2 ** 32 - 1) + packer.pack_map_header(2**32 - 1) with pytest.raises(PackValueError): - packer.pack_array_header(2 ** 32) + packer.pack_array_header(2**32) def test_max_str_len(): diff --git a/test/test_memoryview.py b/test/test_memoryview.py index 86b2c1f7..84941db8 100644 --- a/test/test_memoryview.py +++ b/test/test_memoryview.py @@ -53,46 +53,46 @@ def test_fixstr_from_float(): def test_str16_from_byte(): - _runtest("B", 2 ** 8, b"\xda", b"\x01\x00", False) - _runtest("B", 2 ** 16 - 1, b"\xda", b"\xff\xff", False) + _runtest("B", 2**8, b"\xda", b"\x01\x00", False) + _runtest("B", 2**16 - 1, b"\xda", b"\xff\xff", False) def test_str16_from_float(): - _runtest("f", 2 ** 8, b"\xda", b"\x01\x00", False) - _runtest("f", 2 ** 16 - 4, b"\xda", b"\xff\xfc", False) + _runtest("f", 2**8, b"\xda", b"\x01\x00", False) + _runtest("f", 2**16 - 4, b"\xda", b"\xff\xfc", False) def test_str32_from_byte(): - _runtest("B", 2 ** 16, b"\xdb", b"\x00\x01\x00\x00", False) + _runtest("B", 2**16, b"\xdb", b"\x00\x01\x00\x00", False) def test_str32_from_float(): - _runtest("f", 2 ** 16, b"\xdb", b"\x00\x01\x00\x00", False) + _runtest("f", 2**16, b"\xdb", b"\x00\x01\x00\x00", False) def test_bin8_from_byte(): _runtest("B", 1, b"\xc4", b"\x01", True) - _runtest("B", 2 ** 8 - 1, b"\xc4", b"\xff", True) + _runtest("B", 2**8 - 1, b"\xc4", b"\xff", True) def test_bin8_from_float(): _runtest("f", 4, b"\xc4", b"\x04", True) - _runtest("f", 2 ** 8 - 4, b"\xc4", b"\xfc", True) + _runtest("f", 2**8 - 4, b"\xc4", b"\xfc", True) def test_bin16_from_byte(): - _runtest("B", 2 ** 8, b"\xc5", b"\x01\x00", True) - _runtest("B", 2 ** 16 - 1, b"\xc5", b"\xff\xff", True) + _runtest("B", 2**8, b"\xc5", b"\x01\x00", True) + _runtest("B", 2**16 - 1, b"\xc5", b"\xff\xff", True) def test_bin16_from_float(): - _runtest("f", 2 ** 8, b"\xc5", b"\x01\x00", True) - _runtest("f", 2 ** 16 - 4, b"\xc5", b"\xff\xfc", True) + _runtest("f", 2**8, b"\xc5", b"\x01\x00", True) + _runtest("f", 2**16 - 4, b"\xc5", b"\xff\xfc", True) def test_bin32_from_byte(): - _runtest("B", 2 ** 16, b"\xc6", b"\x00\x01\x00\x00", True) + _runtest("B", 2**16, b"\xc6", b"\x00\x01\x00\x00", True) def test_bin32_from_float(): - _runtest("f", 2 ** 16, b"\xc6", b"\x00\x01\x00\x00", True) + _runtest("f", 2**16, b"\xc6", b"\x00\x01\x00\x00", True) diff --git a/test/test_sequnpack.py b/test/test_sequnpack.py index 6293a453..9f20c07e 100644 --- a/test/test_sequnpack.py +++ b/test/test_sequnpack.py @@ -118,8 +118,8 @@ def test_issue124(): def test_unpack_tell(): stream = io.BytesIO() - messages = [2 ** i - 1 for i in range(65)] - messages += [-(2 ** i) for i in range(1, 64)] + messages = [2**i - 1 for i in range(65)] + messages += [-(2**i) for i in range(1, 64)] messages += [ b"hello", b"hello" * 1000, diff --git a/test/test_timestamp.py b/test/test_timestamp.py index 4e26489b..253228e7 100644 --- a/test/test_timestamp.py +++ b/test/test_timestamp.py @@ -10,31 +10,31 @@ def test_timestamp(): # timestamp32 - ts = Timestamp(2 ** 32 - 1) + ts = Timestamp(2**32 - 1) assert ts.to_bytes() == b"\xff\xff\xff\xff" packed = msgpack.packb(ts) assert packed == b"\xd6\xff" + ts.to_bytes() unpacked = msgpack.unpackb(packed) assert ts == unpacked - assert ts.seconds == 2 ** 32 - 1 and ts.nanoseconds == 0 + assert ts.seconds == 2**32 - 1 and ts.nanoseconds == 0 # timestamp64 - ts = Timestamp(2 ** 34 - 1, 999999999) + ts = Timestamp(2**34 - 1, 999999999) assert ts.to_bytes() == b"\xee\x6b\x27\xff\xff\xff\xff\xff" packed = msgpack.packb(ts) assert packed == b"\xd7\xff" + ts.to_bytes() unpacked = msgpack.unpackb(packed) assert ts == unpacked - assert ts.seconds == 2 ** 34 - 1 and ts.nanoseconds == 999999999 + assert ts.seconds == 2**34 - 1 and ts.nanoseconds == 999999999 # timestamp96 - ts = Timestamp(2 ** 63 - 1, 999999999) + ts = Timestamp(2**63 - 1, 999999999) assert ts.to_bytes() == b"\x3b\x9a\xc9\xff\x7f\xff\xff\xff\xff\xff\xff\xff" packed = msgpack.packb(ts) assert packed == b"\xc7\x0c\xff" + ts.to_bytes() unpacked = msgpack.unpackb(packed) assert ts == unpacked - assert ts.seconds == 2 ** 63 - 1 and ts.nanoseconds == 999999999 + assert ts.seconds == 2**63 - 1 and ts.nanoseconds == 999999999 # negative fractional ts = Timestamp.from_unix(-2.3) # s: -3, ns: 700000000 From 849c8063817894f7dba166a19fbdbb9ffd8c2b80 Mon Sep 17 00:00:00 2001 From: Victor Stinner Date: Mon, 14 Mar 2022 03:23:11 +0100 Subject: [PATCH 281/349] Use PyFloat_Pack8() on Python 3.11a7 (#499) Python 3.11a7 adds public functions: * PyFloat_Pack4(), PyFloat_Pack8() * PyFloat_Unpack4(), PyFloat_Unpack8() https://bugs.python.org/issue46906 --- msgpack/pack_template.h | 9 +++++++++ msgpack/unpack_template.h | 14 ++++++++++++-- 2 files changed, 21 insertions(+), 2 deletions(-) diff --git a/msgpack/pack_template.h b/msgpack/pack_template.h index 0e940b84..7d479b6d 100644 --- a/msgpack/pack_template.h +++ b/msgpack/pack_template.h @@ -568,7 +568,12 @@ static inline int msgpack_pack_float(msgpack_packer* x, float d) { unsigned char buf[5]; buf[0] = 0xca; + +#if PY_VERSION_HEX >= 0x030B00A7 + PyFloat_Pack4(d, (char *)&buf[1], 0); +#else _PyFloat_Pack4(d, &buf[1], 0); +#endif msgpack_pack_append_buffer(x, buf, 5); } @@ -576,7 +581,11 @@ static inline int msgpack_pack_double(msgpack_packer* x, double d) { unsigned char buf[9]; buf[0] = 0xcb; +#if PY_VERSION_HEX >= 0x030B00A7 + PyFloat_Pack8(d, (char *)&buf[1], 0); +#else _PyFloat_Pack8(d, &buf[1], 0); +#endif msgpack_pack_append_buffer(x, buf, 9); } diff --git a/msgpack/unpack_template.h b/msgpack/unpack_template.h index 9924b9c6..8b9fcc19 100644 --- a/msgpack/unpack_template.h +++ b/msgpack/unpack_template.h @@ -243,10 +243,20 @@ static inline int unpack_execute(unpack_context* ctx, const char* data, Py_ssize _msgpack_load32(uint32_t,n)+1, _ext_zero); case CS_FLOAT: { - double f = _PyFloat_Unpack4((unsigned char*)n, 0); + double f; +#if PY_VERSION_HEX >= 0x030B00A7 + f = PyFloat_Unpack4((const char*)n, 0); +#else + f = _PyFloat_Unpack4((unsigned char*)n, 0); +#endif push_fixed_value(_float, f); } case CS_DOUBLE: { - double f = _PyFloat_Unpack8((unsigned char*)n, 0); + double f; +#if PY_VERSION_HEX >= 0x030B00A7 + f = PyFloat_Unpack8((const char*)n, 0); +#else + f = _PyFloat_Unpack8((unsigned char*)n, 0); +#endif push_fixed_value(_double, f); } case CS_UINT_8: push_fixed_value(_uint8, *(uint8_t*)n); From 6a721faa778e4db23f76bc8a0f9adf85f59f69c7 Mon Sep 17 00:00:00 2001 From: Shantanu <12621235+hauntsaninja@users.noreply.github.com> Date: Mon, 2 May 2022 02:26:53 -0600 Subject: [PATCH 282/349] Upgrade black (#505) --- .github/workflows/black.yaml | 2 +- requirements.txt | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/black.yaml b/.github/workflows/black.yaml index be137ae6..0a0a737d 100644 --- a/.github/workflows/black.yaml +++ b/.github/workflows/black.yaml @@ -21,5 +21,5 @@ jobs: - name: Black Code Formatter run: | - pip install black==22.1.0 + pip install black==22.3.0 black -S --diff --check msgpack/ test/ setup.py diff --git a/requirements.txt b/requirements.txt index 9bdb4789..f557888d 100644 --- a/requirements.txt +++ b/requirements.txt @@ -2,4 +2,4 @@ Cython~=0.29.13 # dev only tools. no need to add pyproject -black==22.1.0 +black==22.3.0 From b901b179d1976e09eb074803dcbb17d8d150c69f Mon Sep 17 00:00:00 2001 From: Inada Naoki Date: Mon, 23 May 2022 04:52:09 +0000 Subject: [PATCH 283/349] Update Cython to 0.29.30 --- pyproject.toml | 2 +- requirements.txt | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index a9eb8aa9..195795f0 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -2,7 +2,7 @@ requires = [ # Also declared in requirements.txt, if updating here please also update # there - "Cython~=0.29.13", + "Cython~=0.29.30", "setuptools >= 35.0.2", ] build-backend = "setuptools.build_meta" diff --git a/requirements.txt b/requirements.txt index f557888d..9f3c1a0d 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,5 +1,5 @@ # Also declared in pyproject.toml, if updating here please also update there -Cython~=0.29.13 +Cython~=0.29.30 # dev only tools. no need to add pyproject black==22.3.0 From b75e3412fb8a2b6d6cd1da1b7063e14f6bfc0337 Mon Sep 17 00:00:00 2001 From: Inada Naoki Date: Mon, 23 May 2022 05:01:08 +0000 Subject: [PATCH 284/349] Fix pip upgrade --- .github/workflows/test.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 6e497e00..2a019262 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -28,7 +28,7 @@ jobs: - name: Build shell: bash run: | - pip install -U pip + python -m pip install -U pip pip install -r requirements.txt pytest make cython pip install . From 500a238028bdebe123b502b07769578b5f0e8a3a Mon Sep 17 00:00:00 2001 From: Inada Naoki Date: Tue, 24 May 2022 19:46:51 +0900 Subject: [PATCH 285/349] Fix Unpacker max_buffer_length handling (#506) --- msgpack/_unpacker.pyx | 28 ++++++++++++---------------- msgpack/fallback.py | 2 ++ test/test_sequnpack.py | 11 ++++++++++- 3 files changed, 24 insertions(+), 17 deletions(-) diff --git a/msgpack/_unpacker.pyx b/msgpack/_unpacker.pyx index 27facc0a..8b06661e 100644 --- a/msgpack/_unpacker.pyx +++ b/msgpack/_unpacker.pyx @@ -440,34 +440,30 @@ cdef class Unpacker(object): self.buf_size = buf_size self.buf_tail = tail + _buf_len - cdef read_from_file(self): - next_bytes = self.file_like_read( - min(self.read_size, - self.max_buffer_size - (self.buf_tail - self.buf_head) - )) + cdef int read_from_file(self) except -1: + cdef Py_ssize_t remains = self.max_buffer_size - (self.buf_tail - self.buf_head) + if remains <= 0: + raise BufferFull + + next_bytes = self.file_like_read(min(self.read_size, remains)) if next_bytes: self.append_buffer(PyBytes_AsString(next_bytes), PyBytes_Size(next_bytes)) else: self.file_like = None + return 0 cdef object _unpack(self, execute_fn execute, bint iter=0): cdef int ret cdef object obj cdef Py_ssize_t prev_head - if self.buf_head >= self.buf_tail and self.file_like is not None: - self.read_from_file() - while 1: prev_head = self.buf_head - if prev_head >= self.buf_tail: - if iter: - raise StopIteration("No more data to unpack.") - else: - raise OutOfData("No more data to unpack.") - - ret = execute(&self.ctx, self.buf, self.buf_tail, &self.buf_head) - self.stream_offset += self.buf_head - prev_head + if prev_head < self.buf_tail: + ret = execute(&self.ctx, self.buf, self.buf_tail, &self.buf_head) + self.stream_offset += self.buf_head - prev_head + else: + ret = 0 if ret == 1: obj = unpack_data(&self.ctx) diff --git a/msgpack/fallback.py b/msgpack/fallback.py index 5f215e95..f560c7b5 100644 --- a/msgpack/fallback.py +++ b/msgpack/fallback.py @@ -423,6 +423,8 @@ def _reserve(self, n, raise_outofdata=True): # Read from file remain_bytes = -remain_bytes + if remain_bytes + len(self._buffer) > self._max_buffer_size: + raise BufferFull while remain_bytes > 0: to_read_bytes = max(self._read_size, remain_bytes) read_data = self.file_like.read(to_read_bytes) diff --git a/test/test_sequnpack.py b/test/test_sequnpack.py index 9f20c07e..c091076b 100644 --- a/test/test_sequnpack.py +++ b/test/test_sequnpack.py @@ -2,7 +2,7 @@ # coding: utf-8 import io from msgpack import Unpacker, BufferFull -from msgpack import pack +from msgpack import pack, packb from msgpack.exceptions import OutOfData from pytest import raises @@ -78,6 +78,15 @@ def test_maxbuffersize(): assert ord("b") == next(unpacker) +def test_maxbuffersize_file(): + buff = io.BytesIO(packb(b"a" * 10) + packb([b"a" * 20] * 2)) + unpacker = Unpacker(buff, read_size=1, max_buffer_size=19, max_bin_len=20) + assert unpacker.unpack() == b"a" * 10 + # assert unpacker.unpack() == [b"a" * 20]*2 + with raises(BufferFull): + print(unpacker.unpack()) + + def test_readbytes(): unpacker = Unpacker(read_size=3) unpacker.feed(b"foobar") From 63837a44d855017f1b2f667afa5ac684fd65591d Mon Sep 17 00:00:00 2001 From: Inada Naoki Date: Tue, 24 May 2022 20:13:07 +0900 Subject: [PATCH 286/349] ci: Update action versions. (#507) --- .github/workflows/test.yml | 7 +++---- .github/workflows/wheel.yml | 6 +++--- 2 files changed, 6 insertions(+), 7 deletions(-) diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 2a019262..2a796aa2 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -10,17 +10,17 @@ jobs: strategy: matrix: os: [ubuntu-20.04, windows-2022, macos-10.15] - py: ["3.10", "3.9", "3.8", "3.7", "3.6"] + py: ["3.11-dev", "3.10", "3.9", "3.8", "3.7", "3.6"] runs-on: ${{ matrix.os }} name: Run test with Python ${{ matrix.py }} on ${{ matrix.os }} steps: - name: Checkout - uses: actions/checkout@v2 + uses: actions/checkout@v3 - name: Set up Python - uses: actions/setup-python@v2 + uses: actions/setup-python@v3 with: python-version: ${{ matrix.py }} cache: "pip" @@ -28,7 +28,6 @@ jobs: - name: Build shell: bash run: | - python -m pip install -U pip pip install -r requirements.txt pytest make cython pip install . diff --git a/.github/workflows/wheel.yml b/.github/workflows/wheel.yml index 5627b5fc..541654db 100644 --- a/.github/workflows/wheel.yml +++ b/.github/workflows/wheel.yml @@ -14,7 +14,7 @@ jobs: steps: - name: Checkout - uses: actions/checkout@v2 + uses: actions/checkout@v3 - name: Set up QEMU if: runner.os == 'Linux' @@ -23,7 +23,7 @@ jobs: platforms: arm64 - name: Set up Python 3.9 - uses: actions/setup-python@v2 + uses: actions/setup-python@v3 with: python-version: 3.9 cache: "pip" @@ -35,7 +35,7 @@ jobs: make cython - name: Build - uses: pypa/cibuildwheel@v2.2.2 + uses: pypa/cibuildwheel@v2.5.0 env: CIBW_TEST_REQUIRES: "pytest" CIBW_TEST_COMMAND: "pytest {package}/test" From a34dc945bfe39c1f2f91af2403d906069613ea41 Mon Sep 17 00:00:00 2001 From: Inada Naoki Date: Wed, 25 May 2022 10:00:57 +0900 Subject: [PATCH 287/349] 1.0.4rc1 --- msgpack/__init__.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/msgpack/__init__.py b/msgpack/__init__.py index 1929df31..81b2e67b 100644 --- a/msgpack/__init__.py +++ b/msgpack/__init__.py @@ -6,8 +6,8 @@ import sys -version = (1, 0, 4, 'dev') -__version__ = "1.0.4dev" +version = (1, 0, 4, 'rc1') +__version__ = "1.0.4rc1" if os.environ.get("MSGPACK_PUREPYTHON") or sys.version_info[0] == 2: From caadbf2df5a87039a52a5dcf4fc3f151bba70eed Mon Sep 17 00:00:00 2001 From: Inada Naoki Date: Wed, 25 May 2022 12:10:47 +0900 Subject: [PATCH 288/349] Use Actions to publish to PyPI --- .github/workflows/test.yml | 5 +++++ .github/workflows/wheel.yml | 8 +++++++- 2 files changed, 12 insertions(+), 1 deletion(-) diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 2a796aa2..d01d74cb 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -42,3 +42,8 @@ jobs: run: | MSGPACK_PUREPYTHON=1 pytest -v test + - name: Publish Wheels to TestPyPI + if: github.event_name == 'push' && startsWith(github.ref, 'refs/tags') + uses: pypa/gh-action-pypi-publish@release/v1 + with: + password: ${{ secrets.PYPI_API_TOKEN }} diff --git a/.github/workflows/wheel.yml b/.github/workflows/wheel.yml index 541654db..9e8dce86 100644 --- a/.github/workflows/wheel.yml +++ b/.github/workflows/wheel.yml @@ -43,9 +43,15 @@ jobs: CIBW_ARCHS_MACOS: x86_64 universal2 arm64 CIBW_SKIP: pp* - - name: Upload Wheels + - name: Upload Wheels to artifact uses: actions/upload-artifact@v1 with: name: Wheels path: wheelhouse + - name: Publish Wheels to TestPyPI + if: github.event_name == 'push' && startsWith(github.ref, 'refs/tags') + uses: pypa/gh-action-pypi-publish@release/v1 + with: + packages_dir: wheelhouse + password: ${{ secrets.PYPI_API_TOKEN }} From b5acfd53833c3dbd379e539cc6e540cec83d0a99 Mon Sep 17 00:00:00 2001 From: Inada Naoki Date: Fri, 3 Jun 2022 13:46:51 +0900 Subject: [PATCH 289/349] Release v1.0.4 (#509) --- .github/workflows/wheel.yml | 2 +- ChangeLog.rst | 10 ++++++++++ msgpack/__init__.py | 4 ++-- 3 files changed, 13 insertions(+), 3 deletions(-) diff --git a/.github/workflows/wheel.yml b/.github/workflows/wheel.yml index 9e8dce86..d73898c2 100644 --- a/.github/workflows/wheel.yml +++ b/.github/workflows/wheel.yml @@ -35,7 +35,7 @@ jobs: make cython - name: Build - uses: pypa/cibuildwheel@v2.5.0 + uses: pypa/cibuildwheel@v2.6.0 env: CIBW_TEST_REQUIRES: "pytest" CIBW_TEST_COMMAND: "pytest {package}/test" diff --git a/ChangeLog.rst b/ChangeLog.rst index fc6df680..a11c8144 100644 --- a/ChangeLog.rst +++ b/ChangeLog.rst @@ -1,3 +1,13 @@ +1.0.4 +===== + +Release Date: 2022-06-03 + +* Support Python 3.11 (beta). +* Don't define `__*_ENDIAN__` macro on Unix. by @methane in https://github.com/msgpack/msgpack-python/pull/495 +* Use PyFloat_Pack8() on Python 3.11a7 by @vstinner in https://github.com/msgpack/msgpack-python/pull/499 +* Fix Unpacker max_buffer_length handling by @methane in https://github.com/msgpack/msgpack-python/pull/506 + 1.0.3 ===== diff --git a/msgpack/__init__.py b/msgpack/__init__.py index 81b2e67b..50710218 100644 --- a/msgpack/__init__.py +++ b/msgpack/__init__.py @@ -6,8 +6,8 @@ import sys -version = (1, 0, 4, 'rc1') -__version__ = "1.0.4rc1" +version = (1, 0, 4) +__version__ = "1.0.4" if os.environ.get("MSGPACK_PUREPYTHON") or sys.version_info[0] == 2: From 9d45926a596028e39ec59dd909a56eb5e9e8fee7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jakub=20Kul=C3=ADk?= Date: Tue, 2 Aug 2022 06:19:56 +0200 Subject: [PATCH 290/349] Usef `__BYTE_ORDER__` instead of `__BYTE_ORDER` (#513) __BYTE_ORDER__ is common predefined macro available on at least gcc and clang. __BYTE_ORDER is macro defined in platform specific headers. --- msgpack/sysdep.h | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/msgpack/sysdep.h b/msgpack/sysdep.h index ed9c1bc0..ae28f0c5 100644 --- a/msgpack/sysdep.h +++ b/msgpack/sysdep.h @@ -61,14 +61,12 @@ typedef unsigned int _msgpack_atomic_counter_t; #endif #endif -#else -#include /* __BYTE_ORDER */ #endif #if !defined(__LITTLE_ENDIAN__) && !defined(__BIG_ENDIAN__) -#if __BYTE_ORDER == __LITTLE_ENDIAN +#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ #define __LITTLE_ENDIAN__ -#elif __BYTE_ORDER == __BIG_ENDIAN +#elif __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ #define __BIG_ENDIAN__ #elif _WIN32 #define __LITTLE_ENDIAN__ From edca770071fc702e0b4c33f87fb0fa3682b486b4 Mon Sep 17 00:00:00 2001 From: Inada Naoki Date: Mon, 8 Aug 2022 15:08:40 +0900 Subject: [PATCH 291/349] Fix build error caused by ntohs, ntohl (#514) --- msgpack/sysdep.h | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/msgpack/sysdep.h b/msgpack/sysdep.h index ae28f0c5..70673004 100644 --- a/msgpack/sysdep.h +++ b/msgpack/sysdep.h @@ -61,6 +61,8 @@ typedef unsigned int _msgpack_atomic_counter_t; #endif #endif +#else /* _WIN32 */ +#include /* ntohs, ntohl */ #endif #if !defined(__LITTLE_ENDIAN__) && !defined(__BIG_ENDIAN__) @@ -93,7 +95,7 @@ typedef unsigned int _msgpack_atomic_counter_t; #ifdef _WIN32 # if defined(ntohl) # define _msgpack_be32(x) ntohl(x) -# elif defined(_byteswap_ulong) || (defined(_MSC_VER) && _MSC_VER >= 1400) +# elif defined(_byteswap_ulong) || defined(_MSC_VER) # define _msgpack_be32(x) ((uint32_t)_byteswap_ulong((unsigned long)x)) # else # define _msgpack_be32(x) \ @@ -106,7 +108,7 @@ typedef unsigned int _msgpack_atomic_counter_t; # define _msgpack_be32(x) ntohl(x) #endif -#if defined(_byteswap_uint64) || (defined(_MSC_VER) && _MSC_VER >= 1400) +#if defined(_byteswap_uint64) || defined(_MSC_VER) # define _msgpack_be64(x) (_byteswap_uint64(x)) #elif defined(bswap_64) # define _msgpack_be64(x) bswap_64(x) From 44a80603838ea480e66c9235036ff742f4013200 Mon Sep 17 00:00:00 2001 From: Matthieu Darbois Date: Fri, 9 Sep 2022 09:16:12 +0200 Subject: [PATCH 292/349] Add python 3.11 wheels (#517) --- .github/workflows/wheel.yml | 4 ++-- setup.cfg | 1 + 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/.github/workflows/wheel.yml b/.github/workflows/wheel.yml index d73898c2..770b565a 100644 --- a/.github/workflows/wheel.yml +++ b/.github/workflows/wheel.yml @@ -8,7 +8,7 @@ jobs: build_wheels: strategy: matrix: - os: [ubuntu-20.04, windows-2022, macos-10.15] + os: [ubuntu-22.04, windows-2022, macos-10.15] runs-on: ${{ matrix.os }} name: Build wheels on ${{ matrix.os }} @@ -35,7 +35,7 @@ jobs: make cython - name: Build - uses: pypa/cibuildwheel@v2.6.0 + uses: pypa/cibuildwheel@v2.9.0 env: CIBW_TEST_REQUIRES: "pytest" CIBW_TEST_COMMAND: "pytest {package}/test" diff --git a/setup.cfg b/setup.cfg index 1cb6ce36..e3782842 100644 --- a/setup.cfg +++ b/setup.cfg @@ -22,6 +22,7 @@ classifiers = Programming Language :: Python :: 3.8 Programming Language :: Python :: 3.9 Programming Language :: Python :: 3.10 + Programming Language :: Python :: 3.11 Programming Language :: Python :: Implementation :: CPython Programming Language :: Python :: Implementation :: PyPy Intended Audience :: Developers From c3995669f1f821596714240c2cd07943810f8658 Mon Sep 17 00:00:00 2001 From: Inada Naoki Date: Wed, 18 Jan 2023 08:08:58 +0000 Subject: [PATCH 293/349] Remove unused code --- msgpack/pack.h | 26 -------------------------- 1 file changed, 26 deletions(-) diff --git a/msgpack/pack.h b/msgpack/pack.h index 4f3ce1d9..1e849acc 100644 --- a/msgpack/pack.h +++ b/msgpack/pack.h @@ -71,7 +71,6 @@ static inline int msgpack_pack_write(msgpack_packer* pk, const char *data, size_ static inline int msgpack_pack_unicode(msgpack_packer *pk, PyObject *o, long long limit) { -#if PY_MAJOR_VERSION >= 3 assert(PyUnicode_Check(o)); Py_ssize_t len; @@ -87,31 +86,6 @@ msgpack_pack_unicode(msgpack_packer *pk, PyObject *o, long long limit) if (ret) return ret; return msgpack_pack_raw_body(pk, buf, len); -#else - PyObject *bytes; - Py_ssize_t len; - int ret; - - // py2 - bytes = PyUnicode_AsUTF8String(o); - if (bytes == NULL) - return -1; - - len = PyString_GET_SIZE(bytes); - if (len > limit) { - Py_DECREF(bytes); - return -2; - } - - ret = msgpack_pack_raw(pk, len); - if (ret) { - Py_DECREF(bytes); - return -1; - } - ret = msgpack_pack_raw_body(pk, PyString_AS_STRING(bytes), len); - Py_DECREF(bytes); - return ret; -#endif } #ifdef __cplusplus From b82d0b62f187552b8108602d7b0451ac362a29cc Mon Sep 17 00:00:00 2001 From: Inada Naoki Date: Wed, 18 Jan 2023 19:13:44 +0900 Subject: [PATCH 294/349] fallback: Fix packing multidim memoryview (#527) Fix #526 --- msgpack/fallback.py | 2 +- test/test_memoryview.py | 8 ++++++++ 2 files changed, 9 insertions(+), 1 deletion(-) diff --git a/msgpack/fallback.py b/msgpack/fallback.py index f560c7b5..e8cebc1b 100644 --- a/msgpack/fallback.py +++ b/msgpack/fallback.py @@ -814,7 +814,7 @@ def _pack( self._pack_raw_header(n) return self._buffer.write(obj) if check(obj, memoryview): - n = len(obj) * obj.itemsize + n = obj.nbytes if n >= 2**32: raise ValueError("Memoryview is too large") self._pack_bin_header(n) diff --git a/test/test_memoryview.py b/test/test_memoryview.py index 84941db8..a0939a69 100644 --- a/test/test_memoryview.py +++ b/test/test_memoryview.py @@ -96,3 +96,11 @@ def test_bin32_from_byte(): def test_bin32_from_float(): _runtest("f", 2**16, b"\xc6", b"\x00\x01\x00\x00", True) + + +def test_multidim_memoryview(): + # See https://github.com/msgpack/msgpack-python/issues/526 + view = memoryview(b"\00" * 6) + data = view.cast(view.format, (3, 2)) + packed = packb(data) + assert packed == b'\xc4\x06\x00\x00\x00\x00\x00\x00' From 10082295536098d90681da5d7199ca384e8b8ff8 Mon Sep 17 00:00:00 2001 From: Inada Naoki Date: Wed, 18 Jan 2023 19:47:15 +0900 Subject: [PATCH 295/349] Release v1.0.5rc1 (#528) --- .github/workflows/black.yaml | 4 ++-- .github/workflows/test.yml | 6 +++--- .github/workflows/wheel.yml | 2 +- ChangeLog.rst | 9 +++++++++ msgpack/__init__.py | 4 ++-- 5 files changed, 17 insertions(+), 8 deletions(-) diff --git a/.github/workflows/black.yaml b/.github/workflows/black.yaml index 0a0a737d..1e28b7b5 100644 --- a/.github/workflows/black.yaml +++ b/.github/workflows/black.yaml @@ -11,13 +11,13 @@ jobs: runs-on: ubuntu-latest steps: - name: Setup Python - uses: actions/setup-python@v2 + uses: actions/setup-python@v4 with: python-version: '3.x' architecture: 'x64' - name: Checkout - uses: actions/checkout@v2 + uses: actions/checkout@v3 - name: Black Code Formatter run: | diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index d01d74cb..5e41167f 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -9,8 +9,8 @@ jobs: test: strategy: matrix: - os: [ubuntu-20.04, windows-2022, macos-10.15] - py: ["3.11-dev", "3.10", "3.9", "3.8", "3.7", "3.6"] + os: [ubuntu-22.04, windows-2022, macos-10.15] + py: ["3.11", "3.10", "3.9", "3.8", "3.7"] runs-on: ${{ matrix.os }} name: Run test with Python ${{ matrix.py }} on ${{ matrix.os }} @@ -20,7 +20,7 @@ jobs: uses: actions/checkout@v3 - name: Set up Python - uses: actions/setup-python@v3 + uses: actions/setup-python@v4 with: python-version: ${{ matrix.py }} cache: "pip" diff --git a/.github/workflows/wheel.yml b/.github/workflows/wheel.yml index 770b565a..b2879fe5 100644 --- a/.github/workflows/wheel.yml +++ b/.github/workflows/wheel.yml @@ -35,7 +35,7 @@ jobs: make cython - name: Build - uses: pypa/cibuildwheel@v2.9.0 + uses: pypa/cibuildwheel@v2.12.0 env: CIBW_TEST_REQUIRES: "pytest" CIBW_TEST_COMMAND: "pytest {package}/test" diff --git a/ChangeLog.rst b/ChangeLog.rst index a11c8144..1b7ae2c3 100644 --- a/ChangeLog.rst +++ b/ChangeLog.rst @@ -1,3 +1,12 @@ +1.0.5rc1 +======== + +Release Date: 2023-01-18 + +* Use ``__BYTE_ORDER__`` instead of ``__BYTE_ORDER`` for portability. (#513, #514) +* Add Python 3.11 wheels (#517) +* fallback: Fix packing multidimensional memoryview (#527) + 1.0.4 ===== diff --git a/msgpack/__init__.py b/msgpack/__init__.py index 50710218..501b291b 100644 --- a/msgpack/__init__.py +++ b/msgpack/__init__.py @@ -6,8 +6,8 @@ import sys -version = (1, 0, 4) -__version__ = "1.0.4" +version = (1, 0, 5, 'rc', 1) +__version__ = "1.0.5rc1" if os.environ.get("MSGPACK_PUREPYTHON") or sys.version_info[0] == 2: From e3ef909c47e5a245fc9d711e9d974a5f9df99303 Mon Sep 17 00:00:00 2001 From: Inada Naoki Date: Wed, 18 Jan 2023 13:07:24 +0000 Subject: [PATCH 296/349] Action: Use setup-python@v4 --- .github/workflows/wheel.yml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/.github/workflows/wheel.yml b/.github/workflows/wheel.yml index b2879fe5..23050088 100644 --- a/.github/workflows/wheel.yml +++ b/.github/workflows/wheel.yml @@ -22,10 +22,10 @@ jobs: with: platforms: arm64 - - name: Set up Python 3.9 - uses: actions/setup-python@v3 + - name: Set up Python 3.x + uses: actions/setup-python@v4 with: - python-version: 3.9 + python-version: "3.x" cache: "pip" - name: Prepare From dcb775031c0b1d575b90e822e81e845ebfda4a2e Mon Sep 17 00:00:00 2001 From: Anthon van der Neut Date: Sun, 5 Mar 2023 15:45:38 +0100 Subject: [PATCH 297/349] minor type in exception message (#533) interger -> integer --- msgpack/ext.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/msgpack/ext.py b/msgpack/ext.py index 25544c55..23e0d6b4 100644 --- a/msgpack/ext.py +++ b/msgpack/ext.py @@ -56,7 +56,7 @@ def __init__(self, seconds, nanoseconds=0): Note: Negative times (before the UNIX epoch) are represented as negative seconds + positive ns. """ if not isinstance(seconds, int_types): - raise TypeError("seconds must be an interger") + raise TypeError("seconds must be an integer") if not isinstance(nanoseconds, int_types): raise TypeError("nanoseconds must be an integer") if not (0 <= nanoseconds < 10**9): From aa9ce3e2bbc1d3d0476396892c46e704292455ab Mon Sep 17 00:00:00 2001 From: Inada Naoki Date: Fri, 3 Jun 2022 14:37:21 +0900 Subject: [PATCH 298/349] Action: Run publish on tag creation. --- .github/workflows/wheel.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/wheel.yml b/.github/workflows/wheel.yml index 23050088..5f103a37 100644 --- a/.github/workflows/wheel.yml +++ b/.github/workflows/wheel.yml @@ -49,8 +49,8 @@ jobs: name: Wheels path: wheelhouse - - name: Publish Wheels to TestPyPI - if: github.event_name == 'push' && startsWith(github.ref, 'refs/tags') + - name: Publish Wheels to PyPI + if: github.event_name == 'create' && github.event.ref_type == 'tag' uses: pypa/gh-action-pypi-publish@release/v1 with: packages_dir: wheelhouse From 4c55f809fe2231130cf99b20538b26b92f1bea31 Mon Sep 17 00:00:00 2001 From: Inada Naoki Date: Thu, 9 Mar 2023 00:43:28 +0900 Subject: [PATCH 299/349] Release v1.0.5 (#534) --- ChangeLog.rst | 6 +++--- msgpack/__init__.py | 4 ++-- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/ChangeLog.rst b/ChangeLog.rst index 1b7ae2c3..53547996 100644 --- a/ChangeLog.rst +++ b/ChangeLog.rst @@ -1,7 +1,7 @@ -1.0.5rc1 -======== +1.0.5 +===== -Release Date: 2023-01-18 +Release Date: 2023-03-08 * Use ``__BYTE_ORDER__`` instead of ``__BYTE_ORDER`` for portability. (#513, #514) * Add Python 3.11 wheels (#517) diff --git a/msgpack/__init__.py b/msgpack/__init__.py index 501b291b..1300b866 100644 --- a/msgpack/__init__.py +++ b/msgpack/__init__.py @@ -6,8 +6,8 @@ import sys -version = (1, 0, 5, 'rc', 1) -__version__ = "1.0.5rc1" +version = (1, 0, 5) +__version__ = "1.0.5" if os.environ.get("MSGPACK_PUREPYTHON") or sys.version_info[0] == 2: From 35b2d246cfdb19484caa5789512bf71ee378caec Mon Sep 17 00:00:00 2001 From: Inada Naoki Date: Thu, 9 Mar 2023 00:47:52 +0900 Subject: [PATCH 300/349] Action: Update wheel workflow --- .github/workflows/wheel.yml | 9 +-------- 1 file changed, 1 insertion(+), 8 deletions(-) diff --git a/.github/workflows/wheel.yml b/.github/workflows/wheel.yml index 5f103a37..6cf2fe94 100644 --- a/.github/workflows/wheel.yml +++ b/.github/workflows/wheel.yml @@ -8,7 +8,7 @@ jobs: build_wheels: strategy: matrix: - os: [ubuntu-22.04, windows-2022, macos-10.15] + os: ["ubuntu-latest", "windows-latest", "macos-latest"] runs-on: ${{ matrix.os }} name: Build wheels on ${{ matrix.os }} @@ -48,10 +48,3 @@ jobs: with: name: Wheels path: wheelhouse - - - name: Publish Wheels to PyPI - if: github.event_name == 'create' && github.event.ref_type == 'tag' - uses: pypa/gh-action-pypi-publish@release/v1 - with: - packages_dir: wheelhouse - password: ${{ secrets.PYPI_API_TOKEN }} From 0516c2c2a97ef48a2becf30bc8b2365ca16199f1 Mon Sep 17 00:00:00 2001 From: Inada Naoki Date: Thu, 9 Mar 2023 01:22:38 +0900 Subject: [PATCH 301/349] Action: Update test workflow --- .github/workflows/test.yml | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 5e41167f..88781025 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -9,7 +9,7 @@ jobs: test: strategy: matrix: - os: [ubuntu-22.04, windows-2022, macos-10.15] + os: ["ubuntu-latest", "windows-latest", "macos-latest"] py: ["3.11", "3.10", "3.9", "3.8", "3.7"] runs-on: ${{ matrix.os }} @@ -41,9 +41,3 @@ jobs: shell: bash run: | MSGPACK_PUREPYTHON=1 pytest -v test - - - name: Publish Wheels to TestPyPI - if: github.event_name == 'push' && startsWith(github.ref, 'refs/tags') - uses: pypa/gh-action-pypi-publish@release/v1 - with: - password: ${{ secrets.PYPI_API_TOKEN }} From 802cbc9495ed059b62b46f057cf3d71f756e2480 Mon Sep 17 00:00:00 2001 From: Inada Naoki Date: Sat, 1 Apr 2023 00:02:25 +0900 Subject: [PATCH 302/349] Add security policy --- SECURITY.md | 5 +++++ 1 file changed, 5 insertions(+) create mode 100644 SECURITY.md diff --git a/SECURITY.md b/SECURITY.md new file mode 100644 index 00000000..75f0c541 --- /dev/null +++ b/SECURITY.md @@ -0,0 +1,5 @@ +## Security contact information + +To report a security vulnerability, please use the +[Tidelift security contact](https://tidelift.com/security). +Tidelift will coordinate the fix and disclosure. \ No newline at end of file From 45f848695c855966d8a46656d8de1a2734d934ae Mon Sep 17 00:00:00 2001 From: Laerte Pereira <5853172+Laerte@users.noreply.github.com> Date: Sat, 8 Apr 2023 02:18:25 -0300 Subject: [PATCH 303/349] fix: build status badge (#538) --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index cb816485..7f7c4234 100644 --- a/README.md +++ b/README.md @@ -1,6 +1,6 @@ # MessagePack for Python -[![Build Status](https://travis-ci.org/msgpack/msgpack-python.svg?branch=master)](https://travis-ci.org/msgpack/msgpack-python) +[![Build Status](https://github.com/msgpack/msgpack-python/actions/workflows/wheel.yml/badge.svg)](https://github.com/msgpack/msgpack-python/actions/workflows/wheel.yml) [![Documentation Status](https://readthedocs.org/projects/msgpack-python/badge/?version=latest)](https://msgpack-python.readthedocs.io/en/latest/?badge=latest) ## What's this From feec06206c7f8dd8efeac6177badbf5b256b36e4 Mon Sep 17 00:00:00 2001 From: sblondon Date: Sun, 21 May 2023 09:26:39 +0200 Subject: [PATCH 304/349] Drop python2 support (#519) The PR removes python2 references and cases. Close #518 Co-authored-by: Inada Naoki --- README.md | 4 +-- docs/conf.py | 20 ++++++------ msgpack/__init__.py | 2 +- msgpack/_packer.pyx | 1 - msgpack/_unpacker.pyx | 2 +- msgpack/ext.py | 24 +++----------- msgpack/fallback.py | 70 ++++++++++++----------------------------- setup.py | 3 +- test/test_buffer.py | 1 - test/test_case.py | 2 +- test/test_except.py | 2 +- test/test_extension.py | 15 ++------- test/test_memoryview.py | 5 --- test/test_pack.py | 6 ---- test/test_timestamp.py | 27 +++++++--------- test/test_unpack.py | 4 +-- tox.ini | 13 -------- 17 files changed, 58 insertions(+), 143 deletions(-) diff --git a/README.md b/README.md index 7f7c4234..61f99e1f 100644 --- a/README.md +++ b/README.md @@ -220,9 +220,9 @@ and `raw=True` options. ```pycon >>> import msgpack ->>> msgpack.unpackb(msgpack.packb([b'spam', u'eggs'], use_bin_type=False), raw=True) +>>> msgpack.unpackb(msgpack.packb([b'spam', 'eggs'], use_bin_type=False), raw=True) [b'spam', b'eggs'] ->>> msgpack.unpackb(msgpack.packb([b'spam', u'eggs'], use_bin_type=True), raw=False) +>>> msgpack.unpackb(msgpack.packb([b'spam', 'eggs'], use_bin_type=True), raw=False) [b'spam', 'eggs'] ``` diff --git a/docs/conf.py b/docs/conf.py index 6b432be0..91ce77f0 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -40,8 +40,8 @@ master_doc = "index" # General information about the project. -project = u"msgpack" -copyright = u"Inada Naoki" +project = "msgpack" +copyright = "Inada Naoki" # The version info for the project you're documenting, acts as replacement for # |version| and |release|, also used in various other places throughout the @@ -181,7 +181,7 @@ # Grouping the document tree into LaTeX files. List of tuples # (source start file, target name, title, author, documentclass [howto/manual]). latex_documents = [ - ("index", "msgpack.tex", u"msgpack Documentation", u"Author", "manual"), + ("index", "msgpack.tex", "msgpack Documentation", "Author", "manual"), ] # The name of an image file (relative to this directory) to place at the top of @@ -209,7 +209,7 @@ # One entry per manual page. List of tuples # (source start file, name, description, authors, manual section). -man_pages = [("index", "msgpack", u"msgpack Documentation", [u"Author"], 1)] +man_pages = [("index", "msgpack", "msgpack Documentation", ["Author"], 1)] # If true, show URL addresses after external links. # man_show_urls = False @@ -224,8 +224,8 @@ ( "index", "msgpack", - u"msgpack Documentation", - u"Author", + "msgpack Documentation", + "Author", "msgpack", "One line description of project.", "Miscellaneous", @@ -245,10 +245,10 @@ # -- Options for Epub output --------------------------------------------------- # Bibliographic Dublin Core info. -epub_title = u"msgpack" -epub_author = u"Author" -epub_publisher = u"Author" -epub_copyright = u"2013, Author" +epub_title = "msgpack" +epub_author = "Author" +epub_publisher = "Author" +epub_copyright = "2013, Author" # The language of the text. It defaults to the language option # or en if the language is not set. diff --git a/msgpack/__init__.py b/msgpack/__init__.py index 1300b866..6c10dc23 100644 --- a/msgpack/__init__.py +++ b/msgpack/__init__.py @@ -10,7 +10,7 @@ __version__ = "1.0.5" -if os.environ.get("MSGPACK_PUREPYTHON") or sys.version_info[0] == 2: +if os.environ.get("MSGPACK_PUREPYTHON"): from .fallback import Packer, unpackb, Unpacker else: try: diff --git a/msgpack/_packer.pyx b/msgpack/_packer.pyx index 396da0c2..074b39fc 100644 --- a/msgpack/_packer.pyx +++ b/msgpack/_packer.pyx @@ -98,7 +98,6 @@ cdef class Packer(object): If set to true, datetime with tzinfo is packed into Timestamp type. Note that the tzinfo is stripped in the timestamp. You can get UTC datetime with `timestamp=3` option of the Unpacker. - (Python 2 is not supported). :param str unicode_errors: The error handler for encoding unicode. (default: 'strict') diff --git a/msgpack/_unpacker.pyx b/msgpack/_unpacker.pyx index 8b06661e..d5dc5ea5 100644 --- a/msgpack/_unpacker.pyx +++ b/msgpack/_unpacker.pyx @@ -236,7 +236,7 @@ cdef class Unpacker(object): 0 - Timestamp 1 - float (Seconds from the EPOCH) 2 - int (Nanoseconds from the EPOCH) - 3 - datetime.datetime (UTC). Python 2 is not supported. + 3 - datetime.datetime (UTC). :param bool strict_map_key: If true (default), only str or bytes are accepted for map (dict) keys. diff --git a/msgpack/ext.py b/msgpack/ext.py index 23e0d6b4..07f96a58 100644 --- a/msgpack/ext.py +++ b/msgpack/ext.py @@ -5,19 +5,6 @@ import struct -PY2 = sys.version_info[0] == 2 - -if PY2: - int_types = (int, long) - _utc = None -else: - int_types = int - try: - _utc = datetime.timezone.utc - except AttributeError: - _utc = datetime.timezone(datetime.timedelta(0)) - - class ExtType(namedtuple("ExtType", "code data")): """ExtType represents ext type in msgpack.""" @@ -55,9 +42,9 @@ def __init__(self, seconds, nanoseconds=0): Note: Negative times (before the UNIX epoch) are represented as negative seconds + positive ns. """ - if not isinstance(seconds, int_types): + if not isinstance(seconds, int): raise TypeError("seconds must be an integer") - if not isinstance(nanoseconds, int_types): + if not isinstance(nanoseconds, int): raise TypeError("nanoseconds must be an integer") if not (0 <= nanoseconds < 10**9): raise ValueError( @@ -174,11 +161,10 @@ def to_unix_nano(self): def to_datetime(self): """Get the timestamp as a UTC datetime. - Python 2 is not supported. - :rtype: datetime. """ - return datetime.datetime.fromtimestamp(0, _utc) + datetime.timedelta( + utc = datetime.timezone.utc + return datetime.datetime.fromtimestamp(0, utc) + datetime.timedelta( seconds=self.to_unix() ) @@ -186,8 +172,6 @@ def to_datetime(self): def from_datetime(dt): """Create a Timestamp from datetime with tzinfo. - Python 2 is not supported. - :rtype: Timestamp """ return Timestamp.from_unix(dt.timestamp()) diff --git a/msgpack/fallback.py b/msgpack/fallback.py index e8cebc1b..618c3622 100644 --- a/msgpack/fallback.py +++ b/msgpack/fallback.py @@ -4,22 +4,6 @@ import struct -PY2 = sys.version_info[0] == 2 -if PY2: - int_types = (int, long) - - def dict_iteritems(d): - return d.iteritems() - -else: - int_types = int - unicode = str - xrange = range - - def dict_iteritems(d): - return d.items() - - if sys.version_info < (3, 5): # Ugly hack... RecursionError = RuntimeError @@ -134,15 +118,6 @@ def unpackb(packed, **kwargs): return ret -if sys.version_info < (2, 7, 6): - - def _unpack_from(f, b, o=0): - """Explicit type cast for legacy struct.unpack_from""" - return struct.unpack_from(f, bytes(b), o) - -else: - _unpack_from = struct.unpack_from - _NO_FORMAT_USED = "" _MSGPACK_HEADERS = { 0xC4: (1, _NO_FORMAT_USED, TYPE_BIN), @@ -202,7 +177,7 @@ class Unpacker(object): 0 - Timestamp 1 - float (Seconds from the EPOCH) 2 - int (Nanoseconds from the EPOCH) - 3 - datetime.datetime (UTC). Python 2 is not supported. + 3 - datetime.datetime (UTC). :param bool strict_map_key: If true (default), only str or bytes are accepted for map (dict) keys. @@ -477,7 +452,7 @@ def _read_header(self): size, fmt, typ = _MSGPACK_HEADERS[b] self._reserve(size) if len(fmt) > 0: - n = _unpack_from(fmt, self._buffer, self._buff_i)[0] + n = struct.unpack_from(fmt, self._buffer, self._buff_i)[0] else: n = self._buffer[self._buff_i] self._buff_i += size @@ -487,7 +462,7 @@ def _read_header(self): elif 0xC7 <= b <= 0xC9: size, fmt, typ = _MSGPACK_HEADERS[b] self._reserve(size) - L, n = _unpack_from(fmt, self._buffer, self._buff_i) + L, n = struct.unpack_from(fmt, self._buffer, self._buff_i) self._buff_i += size if L > self._max_ext_len: raise ValueError("%s exceeds max_ext_len(%s)" % (L, self._max_ext_len)) @@ -496,7 +471,7 @@ def _read_header(self): size, fmt = _MSGPACK_HEADERS[b] self._reserve(size) if len(fmt) > 0: - obj = _unpack_from(fmt, self._buffer, self._buff_i)[0] + obj = struct.unpack_from(fmt, self._buffer, self._buff_i)[0] else: obj = self._buffer[self._buff_i] self._buff_i += size @@ -507,13 +482,13 @@ def _read_header(self): "%s exceeds max_ext_len(%s)" % (size, self._max_ext_len) ) self._reserve(size + 1) - n, obj = _unpack_from(fmt, self._buffer, self._buff_i) + n, obj = struct.unpack_from(fmt, self._buffer, self._buff_i) self._buff_i += size + 1 elif 0xD9 <= b <= 0xDB: size, fmt, typ = _MSGPACK_HEADERS[b] self._reserve(size) if len(fmt) > 0: - (n,) = _unpack_from(fmt, self._buffer, self._buff_i) + (n,) = struct.unpack_from(fmt, self._buffer, self._buff_i) else: n = self._buffer[self._buff_i] self._buff_i += size @@ -523,7 +498,7 @@ def _read_header(self): elif 0xDC <= b <= 0xDD: size, fmt, typ = _MSGPACK_HEADERS[b] self._reserve(size) - (n,) = _unpack_from(fmt, self._buffer, self._buff_i) + (n,) = struct.unpack_from(fmt, self._buffer, self._buff_i) self._buff_i += size if n > self._max_array_len: raise ValueError( @@ -532,7 +507,7 @@ def _read_header(self): elif 0xDE <= b <= 0xDF: size, fmt, typ = _MSGPACK_HEADERS[b] self._reserve(size) - (n,) = _unpack_from(fmt, self._buffer, self._buff_i) + (n,) = struct.unpack_from(fmt, self._buffer, self._buff_i) self._buff_i += size if n > self._max_map_len: raise ValueError("%s exceeds max_map_len(%s)" % (n, self._max_map_len)) @@ -554,12 +529,12 @@ def _unpack(self, execute=EX_CONSTRUCT): # TODO should we eliminate the recursion? if typ == TYPE_ARRAY: if execute == EX_SKIP: - for i in xrange(n): + for i in range(n): # TODO check whether we need to call `list_hook` self._unpack(EX_SKIP) return ret = newlist_hint(n) - for i in xrange(n): + for i in range(n): ret.append(self._unpack(EX_CONSTRUCT)) if self._list_hook is not None: ret = self._list_hook(ret) @@ -567,7 +542,7 @@ def _unpack(self, execute=EX_CONSTRUCT): return ret if self._use_list else tuple(ret) if typ == TYPE_MAP: if execute == EX_SKIP: - for i in xrange(n): + for i in range(n): # TODO check whether we need to call hooks self._unpack(EX_SKIP) self._unpack(EX_SKIP) @@ -575,17 +550,17 @@ def _unpack(self, execute=EX_CONSTRUCT): if self._object_pairs_hook is not None: ret = self._object_pairs_hook( (self._unpack(EX_CONSTRUCT), self._unpack(EX_CONSTRUCT)) - for _ in xrange(n) + for _ in range(n) ) else: ret = {} - for _ in xrange(n): + for _ in range(n): key = self._unpack(EX_CONSTRUCT) - if self._strict_map_key and type(key) not in (unicode, bytes): + if self._strict_map_key and type(key) not in (str, bytes): raise ValueError( "%s is not allowed for map key" % str(type(key)) ) - if not PY2 and type(key) is str: + if type(key) is str: key = sys.intern(key) ret[key] = self._unpack(EX_CONSTRUCT) if self._object_hook is not None: @@ -698,7 +673,6 @@ class Packer(object): If set to true, datetime with tzinfo is packed into Timestamp type. Note that the tzinfo is stripped in the timestamp. You can get UTC datetime with `timestamp=3` option of the Unpacker. - (Python 2 is not supported). :param str unicode_errors: The error handler for encoding unicode. (default: 'strict') @@ -743,8 +717,6 @@ def __init__( self._autoreset = autoreset self._use_bin_type = use_bin_type self._buffer = StringIO() - if PY2 and datetime: - raise ValueError("datetime is not supported in Python 2") self._datetime = bool(datetime) self._unicode_errors = unicode_errors or "strict" if default is not None: @@ -774,7 +746,7 @@ def _pack( if obj: return self._buffer.write(b"\xc3") return self._buffer.write(b"\xc2") - if check(obj, int_types): + if check(obj, int): if 0 <= obj < 0x80: return self._buffer.write(struct.pack("B", obj)) if -0x20 <= obj < 0: @@ -806,7 +778,7 @@ def _pack( raise ValueError("%s is too large" % type(obj).__name__) self._pack_bin_header(n) return self._buffer.write(obj) - if check(obj, unicode): + if check(obj, str): obj = obj.encode("utf-8", self._unicode_errors) n = len(obj) if n >= 2**32: @@ -855,13 +827,11 @@ def _pack( if check(obj, list_types): n = len(obj) self._pack_array_header(n) - for i in xrange(n): + for i in range(n): self._pack(obj[i], nest_limit - 1) return if check(obj, dict): - return self._pack_map_pairs( - len(obj), dict_iteritems(obj), nest_limit - 1 - ) + return self._pack_map_pairs(len(obj), obj.items(), nest_limit - 1) if self._datetime and check(obj, _DateTime) and obj.tzinfo is not None: obj = Timestamp.from_datetime(obj) @@ -1004,7 +974,7 @@ def reset(self): def getbuffer(self): """Return view of internal buffer.""" - if USING_STRINGBUILDER or PY2: + if USING_STRINGBUILDER: return memoryview(self.bytes()) else: return self._buffer.getbuffer() diff --git a/setup.py b/setup.py index 9630cda0..15ba7741 100755 --- a/setup.py +++ b/setup.py @@ -10,7 +10,6 @@ PYPY = hasattr(sys, "pypy_version_info") -PY2 = sys.version_info[0] == 2 class NoCython(Exception): @@ -79,7 +78,7 @@ def __init__(self, *args, **kwargs): macros = [("__LITTLE_ENDIAN__", "1")] ext_modules = [] -if not PYPY and not PY2 and not os.environ.get("MSGPACK_PUREPYTHON"): +if not PYPY and not os.environ.get("MSGPACK_PUREPYTHON"): ext_modules.append( Extension( "msgpack._cmsgpack", diff --git a/test/test_buffer.py b/test/test_buffer.py index 62507cf4..04470584 100644 --- a/test/test_buffer.py +++ b/test/test_buffer.py @@ -6,7 +6,6 @@ from msgpack import packb, unpackb -@pytest.mark.skipif(sys.version_info[0] == 2, reason="Python 2 is not supported") def test_unpack_buffer(): from array import array diff --git a/test/test_case.py b/test/test_case.py index a0a3c5ad..1c4e3229 100644 --- a/test/test_case.py +++ b/test/test_case.py @@ -134,4 +134,4 @@ def test_match(): def test_unicode(): - assert unpackb(packb(u"foobar"), use_list=1) == u"foobar" + assert unpackb(packb("foobar"), use_list=1) == "foobar" diff --git a/test/test_except.py b/test/test_except.py index 5544f2bc..745ebecb 100644 --- a/test/test_except.py +++ b/test/test_except.py @@ -53,7 +53,7 @@ def test_invalidvalue(): def test_strict_map_key(): - valid = {u"unicode": 1, b"bytes": 2} + valid = {"unicode": 1, b"bytes": 2} packed = packb(valid, use_bin_type=True) assert valid == unpackb(packed, raw=False, strict_map_key=True) diff --git a/test/test_extension.py b/test/test_extension.py index 6b365751..dfbe4350 100644 --- a/test/test_extension.py +++ b/test/test_extension.py @@ -55,10 +55,7 @@ def ext_hook(code, data): print("ext_hook called", code, data) assert code == 123 obj = array.array("d") - try: - obj.frombytes(data) - except AttributeError: # PY2 - obj.fromstring(data) + obj.frombytes(data) return obj obj = [42, b"hello", array.array("d", [1.1, 2.2, 3.3])] @@ -67,20 +64,14 @@ def ext_hook(code, data): assert obj == obj2 -import sys - -if sys.version > "3": - long = int - - def test_overriding_hooks(): def default(obj): - if isinstance(obj, long): + if isinstance(obj, int): return {"__type__": "long", "__data__": str(obj)} else: return obj - obj = {"testval": long(1823746192837461928374619)} + obj = {"testval": 1823746192837461928374619} refobj = {"testval": default(obj["testval"])} refout = msgpack.packb(refobj) assert isinstance(refout, (str, bytes)) diff --git a/test/test_memoryview.py b/test/test_memoryview.py index a0939a69..63beab10 100644 --- a/test/test_memoryview.py +++ b/test/test_memoryview.py @@ -7,11 +7,6 @@ import sys -pytestmark = pytest.mark.skipif( - sys.version_info[0] < 3, reason="Only Python 3 supports buffer protocol" -) - - def make_array(f, data): a = array(f) a.frombytes(data) diff --git a/test/test_pack.py b/test/test_pack.py index a51d84c9..65c9cb17 100644 --- a/test/test_pack.py +++ b/test/test_pack.py @@ -80,9 +80,6 @@ def testPackByteArrays(): check(td) -@pytest.mark.skipif( - sys.version_info < (3, 0), reason="Python 2 passes invalid surrogates" -) def testIgnoreUnicodeErrors(): re = unpackb( packb(b"abc\xeddef", use_bin_type=False), raw=False, unicode_errors="ignore" @@ -96,9 +93,6 @@ def testStrictUnicodeUnpack(): unpackb(packed, raw=False, use_list=1) -@pytest.mark.skipif( - sys.version_info < (3, 0), reason="Python 2 passes invalid surrogates" -) def testIgnoreErrorsPack(): re = unpackb( packb("abc\uDC80\uDCFFdef", use_bin_type=True, unicode_errors="ignore"), diff --git a/test/test_timestamp.py b/test/test_timestamp.py index 253228e7..af84a2ff 100644 --- a/test/test_timestamp.py +++ b/test/test_timestamp.py @@ -4,9 +4,6 @@ import msgpack from msgpack.ext import Timestamp -if sys.version_info[0] > 2: - from msgpack.ext import _utc - def test_timestamp(): # timestamp32 @@ -85,33 +82,33 @@ def test_timestamp_to(): assert t.to_unix_nano() == 42000014000 -@pytest.mark.skipif(sys.version_info[0] == 2, reason="datetime support is PY3+ only") def test_timestamp_datetime(): t = Timestamp(42, 14) - assert t.to_datetime() == datetime.datetime(1970, 1, 1, 0, 0, 42, 0, tzinfo=_utc) + utc = datetime.timezone.utc + assert t.to_datetime() == datetime.datetime(1970, 1, 1, 0, 0, 42, 0, tzinfo=utc) -@pytest.mark.skipif(sys.version_info[0] == 2, reason="datetime support is PY3+ only") def test_unpack_datetime(): t = Timestamp(42, 14) + utc = datetime.timezone.utc packed = msgpack.packb(t) unpacked = msgpack.unpackb(packed, timestamp=3) - assert unpacked == datetime.datetime(1970, 1, 1, 0, 0, 42, 0, tzinfo=_utc) + assert unpacked == datetime.datetime(1970, 1, 1, 0, 0, 42, 0, tzinfo=utc) -@pytest.mark.skipif(sys.version_info[0] == 2, reason="datetime support is PY3+ only") def test_pack_unpack_before_epoch(): - t_in = datetime.datetime(1960, 1, 1, tzinfo=_utc) + utc = datetime.timezone.utc + t_in = datetime.datetime(1960, 1, 1, tzinfo=utc) packed = msgpack.packb(t_in, datetime=True) unpacked = msgpack.unpackb(packed, timestamp=3) assert unpacked == t_in -@pytest.mark.skipif(sys.version_info[0] == 2, reason="datetime support is PY3+ only") def test_pack_datetime(): t = Timestamp(42, 14000) dt = t.to_datetime() - assert dt == datetime.datetime(1970, 1, 1, 0, 0, 42, 14, tzinfo=_utc) + utc = datetime.timezone.utc + assert dt == datetime.datetime(1970, 1, 1, 0, 0, 42, 14, tzinfo=utc) packed = msgpack.packb(dt, datetime=True) packed2 = msgpack.packb(t) @@ -131,10 +128,10 @@ def test_pack_datetime(): assert msgpack.unpackb(packed) is None -@pytest.mark.skipif(sys.version_info[0] == 2, reason="datetime support is PY3+ only") def test_issue451(): # https://github.com/msgpack/msgpack-python/issues/451 - dt = datetime.datetime(2100, 1, 1, 1, 1, tzinfo=_utc) + utc = datetime.timezone.utc + dt = datetime.datetime(2100, 1, 1, 1, 1, tzinfo=utc) packed = msgpack.packb(dt, datetime=True) assert packed == b"\xd6\xff\xf4\x86eL" @@ -142,7 +139,6 @@ def test_issue451(): assert dt == unpacked -@pytest.mark.skipif(sys.version_info[0] == 2, reason="datetime support is PY3+ only") def test_pack_datetime_without_tzinfo(): dt = datetime.datetime(1970, 1, 1, 0, 0, 42, 14) with pytest.raises(ValueError, match="where tzinfo=None"): @@ -152,7 +148,8 @@ def test_pack_datetime_without_tzinfo(): packed = msgpack.packb(dt, datetime=True, default=lambda x: None) assert packed == msgpack.packb(None) - dt = datetime.datetime(1970, 1, 1, 0, 0, 42, 14, tzinfo=_utc) + utc = datetime.timezone.utc + dt = datetime.datetime(1970, 1, 1, 0, 0, 42, 14, tzinfo=utc) packed = msgpack.packb(dt, datetime=True) unpacked = msgpack.unpackb(packed, timestamp=3) assert unpacked == dt diff --git a/test/test_unpack.py b/test/test_unpack.py index aa4c01f8..c7141024 100644 --- a/test/test_unpack.py +++ b/test/test_unpack.py @@ -70,7 +70,7 @@ def _hook(self, code, data): def test_unpacker_tell(): - objects = 1, 2, u"abc", u"def", u"ghi" + objects = 1, 2, "abc", "def", "ghi" packed = b"\x01\x02\xa3abc\xa3def\xa3ghi" positions = 1, 2, 6, 10, 14 unpacker = Unpacker(BytesIO(packed)) @@ -80,7 +80,7 @@ def test_unpacker_tell(): def test_unpacker_tell_read_bytes(): - objects = 1, u"abc", u"ghi" + objects = 1, "abc", "ghi" packed = b"\x01\x02\xa3abc\xa3def\xa3ghi" raw_data = b"\x02", b"\xa3def", b"" lenghts = 1, 4, 999 diff --git a/tox.ini b/tox.ini index 29c256d1..1ef2d189 100644 --- a/tox.ini +++ b/tox.ini @@ -1,9 +1,7 @@ [tox] envlist = - py27-pure, {py35,py36,py37,py38}-{c,pure}, {pypy,pypy3}-pure, - py27-x86, py34-x86, isolated_build = true @@ -19,17 +17,6 @@ commands= setenv= pure: MSGPACK_PUREPYTHON=x -[testenv:py27-x86] -basepython=python2.7-x86 -deps= - pytest - -changedir=test -commands= - python -c 'import sys; print(hex(sys.maxsize))' - python -c 'from msgpack import _cmsgpack' - py.test - [testenv:py34-x86] basepython=python3.4-x86 deps= From c8d0751fe3375a5e8005b5edf955cd5904aaec2f Mon Sep 17 00:00:00 2001 From: Evgeny Markov Date: Tue, 23 May 2023 18:41:08 +0200 Subject: [PATCH 305/349] Drop Python 3.6 support (#543) The following steps have been taken: 1. Black was updated to latest version. The code has been formatted with the new version. 2. The pyupgrade utility is installed. This helped to remove all the code that was needed to support Python < 3.7. Fix #541. Co-authored-by: Inada Naoki --- Makefile | 8 ++++- msgpack/__init__.py | 1 - msgpack/ext.py | 21 ++++--------- msgpack/fallback.py | 70 ++++++++++++----------------------------- pyproject.toml | 5 +++ requirements.txt | 8 +++-- setup.cfg | 1 - setup.py | 9 ++---- test/test_buffer.py | 1 - test/test_case.py | 7 ++--- test/test_except.py | 1 - test/test_extension.py | 7 ++--- test/test_format.py | 5 +-- test/test_limits.py | 2 -- test/test_memoryview.py | 1 - test/test_newspec.py | 2 -- test/test_obj.py | 1 - test/test_pack.py | 12 +++---- test/test_seq.py | 1 - test/test_sequnpack.py | 1 - test/test_stricttype.py | 12 +++---- test/test_subtype.py | 1 - test/test_unpack.py | 2 +- 23 files changed, 60 insertions(+), 119 deletions(-) diff --git a/Makefile b/Makefile index 415dcfdd..e4f22da4 100644 --- a/Makefile +++ b/Makefile @@ -1,10 +1,16 @@ +PYTHON_SOURCES = msgpack test setup.py + .PHONY: all all: cython python setup.py build_ext -i -f .PHONY: black black: - black -S msgpack/ test/ setup.py + black $(PYTHON_SOURCES) + +.PHONY: pyupgrade +pyupgrade: + @find $(PYTHON_SOURCES) -name '*.py' -type f -exec pyupgrade --py37-plus '{}' \; .PHONY: cython cython: diff --git a/msgpack/__init__.py b/msgpack/__init__.py index 6c10dc23..638236fe 100644 --- a/msgpack/__init__.py +++ b/msgpack/__init__.py @@ -1,4 +1,3 @@ -# coding: utf-8 from .exceptions import * from .ext import ExtType, Timestamp diff --git a/msgpack/ext.py b/msgpack/ext.py index 07f96a58..97942949 100644 --- a/msgpack/ext.py +++ b/msgpack/ext.py @@ -1,4 +1,3 @@ -# coding: utf-8 from collections import namedtuple import datetime import sys @@ -15,10 +14,10 @@ def __new__(cls, code, data): raise TypeError("data must be bytes") if not 0 <= code <= 127: raise ValueError("code must be 0~127") - return super(ExtType, cls).__new__(cls, code, data) + return super().__new__(cls, code, data) -class Timestamp(object): +class Timestamp: """Timestamp represents the Timestamp extension type in msgpack. When built with Cython, msgpack uses C methods to pack and unpack `Timestamp`. When using pure-Python @@ -47,24 +46,18 @@ def __init__(self, seconds, nanoseconds=0): if not isinstance(nanoseconds, int): raise TypeError("nanoseconds must be an integer") if not (0 <= nanoseconds < 10**9): - raise ValueError( - "nanoseconds must be a non-negative integer less than 999999999." - ) + raise ValueError("nanoseconds must be a non-negative integer less than 999999999.") self.seconds = seconds self.nanoseconds = nanoseconds def __repr__(self): """String representation of Timestamp.""" - return "Timestamp(seconds={0}, nanoseconds={1})".format( - self.seconds, self.nanoseconds - ) + return f"Timestamp(seconds={self.seconds}, nanoseconds={self.nanoseconds})" def __eq__(self, other): """Check for equality with another Timestamp object""" if type(other) is self.__class__: - return ( - self.seconds == other.seconds and self.nanoseconds == other.nanoseconds - ) + return self.seconds == other.seconds and self.nanoseconds == other.nanoseconds return False def __ne__(self, other): @@ -164,9 +157,7 @@ def to_datetime(self): :rtype: datetime. """ utc = datetime.timezone.utc - return datetime.datetime.fromtimestamp(0, utc) + datetime.timedelta( - seconds=self.to_unix() - ) + return datetime.datetime.fromtimestamp(0, utc) + datetime.timedelta(seconds=self.to_unix()) @staticmethod def from_datetime(dt): diff --git a/msgpack/fallback.py b/msgpack/fallback.py index 618c3622..ac1eaf4c 100644 --- a/msgpack/fallback.py +++ b/msgpack/fallback.py @@ -4,23 +4,6 @@ import struct -if sys.version_info < (3, 5): - # Ugly hack... - RecursionError = RuntimeError - - def _is_recursionerror(e): - return ( - len(e.args) == 1 - and isinstance(e.args[0], str) - and e.args[0].startswith("maximum recursion depth exceeded") - ) - -else: - - def _is_recursionerror(e): - return True - - if hasattr(sys, "pypy_version_info"): # StringIO is slow on PyPy, StringIO is faster. However: PyPy's own # StringBuilder is fastest. @@ -32,7 +15,7 @@ def _is_recursionerror(e): from __pypy__.builders import StringBuilder USING_STRINGBUILDER = True - class StringIO(object): + class StringIO: def __init__(self, s=b""): if s: self.builder = StringBuilder(len(s)) @@ -109,10 +92,8 @@ def unpackb(packed, **kwargs): ret = unpacker._unpack() except OutOfData: raise ValueError("Unpack failed: incomplete input") - except RecursionError as e: - if _is_recursionerror(e): - raise StackError - raise + except RecursionError: + raise StackError if unpacker._got_extradata(): raise ExtraData(ret, unpacker._get_extradata()) return ret @@ -151,7 +132,7 @@ def unpackb(packed, **kwargs): } -class Unpacker(object): +class Unpacker: """Streaming unpacker. Arguments: @@ -334,9 +315,7 @@ def __init__( if object_pairs_hook is not None and not callable(object_pairs_hook): raise TypeError("`object_pairs_hook` is not callable") if object_hook is not None and object_pairs_hook is not None: - raise TypeError( - "object_pairs_hook and object_hook are mutually " "exclusive" - ) + raise TypeError("object_pairs_hook and object_hook are mutually exclusive") if not callable(ext_hook): raise TypeError("`ext_hook` is not callable") @@ -428,20 +407,18 @@ def _read_header(self): n = b & 0b00011111 typ = TYPE_RAW if n > self._max_str_len: - raise ValueError("%s exceeds max_str_len(%s)" % (n, self._max_str_len)) + raise ValueError(f"{n} exceeds max_str_len({self._max_str_len})") obj = self._read(n) elif b & 0b11110000 == 0b10010000: n = b & 0b00001111 typ = TYPE_ARRAY if n > self._max_array_len: - raise ValueError( - "%s exceeds max_array_len(%s)" % (n, self._max_array_len) - ) + raise ValueError(f"{n} exceeds max_array_len({self._max_array_len})") elif b & 0b11110000 == 0b10000000: n = b & 0b00001111 typ = TYPE_MAP if n > self._max_map_len: - raise ValueError("%s exceeds max_map_len(%s)" % (n, self._max_map_len)) + raise ValueError(f"{n} exceeds max_map_len({self._max_map_len})") elif b == 0xC0: obj = None elif b == 0xC2: @@ -457,7 +434,7 @@ def _read_header(self): n = self._buffer[self._buff_i] self._buff_i += size if n > self._max_bin_len: - raise ValueError("%s exceeds max_bin_len(%s)" % (n, self._max_bin_len)) + raise ValueError(f"{n} exceeds max_bin_len({self._max_bin_len})") obj = self._read(n) elif 0xC7 <= b <= 0xC9: size, fmt, typ = _MSGPACK_HEADERS[b] @@ -465,7 +442,7 @@ def _read_header(self): L, n = struct.unpack_from(fmt, self._buffer, self._buff_i) self._buff_i += size if L > self._max_ext_len: - raise ValueError("%s exceeds max_ext_len(%s)" % (L, self._max_ext_len)) + raise ValueError(f"{L} exceeds max_ext_len({self._max_ext_len})") obj = self._read(L) elif 0xCA <= b <= 0xD3: size, fmt = _MSGPACK_HEADERS[b] @@ -478,9 +455,7 @@ def _read_header(self): elif 0xD4 <= b <= 0xD8: size, fmt, typ = _MSGPACK_HEADERS[b] if self._max_ext_len < size: - raise ValueError( - "%s exceeds max_ext_len(%s)" % (size, self._max_ext_len) - ) + raise ValueError(f"{size} exceeds max_ext_len({self._max_ext_len})") self._reserve(size + 1) n, obj = struct.unpack_from(fmt, self._buffer, self._buff_i) self._buff_i += size + 1 @@ -493,7 +468,7 @@ def _read_header(self): n = self._buffer[self._buff_i] self._buff_i += size if n > self._max_str_len: - raise ValueError("%s exceeds max_str_len(%s)" % (n, self._max_str_len)) + raise ValueError(f"{n} exceeds max_str_len({self._max_str_len})") obj = self._read(n) elif 0xDC <= b <= 0xDD: size, fmt, typ = _MSGPACK_HEADERS[b] @@ -501,16 +476,14 @@ def _read_header(self): (n,) = struct.unpack_from(fmt, self._buffer, self._buff_i) self._buff_i += size if n > self._max_array_len: - raise ValueError( - "%s exceeds max_array_len(%s)" % (n, self._max_array_len) - ) + raise ValueError(f"{n} exceeds max_array_len({self._max_array_len})") elif 0xDE <= b <= 0xDF: size, fmt, typ = _MSGPACK_HEADERS[b] self._reserve(size) (n,) = struct.unpack_from(fmt, self._buffer, self._buff_i) self._buff_i += size if n > self._max_map_len: - raise ValueError("%s exceeds max_map_len(%s)" % (n, self._max_map_len)) + raise ValueError(f"{n} exceeds max_map_len({self._max_map_len})") else: raise FormatError("Unknown header: 0x%x" % b) return typ, n, obj @@ -549,17 +522,14 @@ def _unpack(self, execute=EX_CONSTRUCT): return if self._object_pairs_hook is not None: ret = self._object_pairs_hook( - (self._unpack(EX_CONSTRUCT), self._unpack(EX_CONSTRUCT)) - for _ in range(n) + (self._unpack(EX_CONSTRUCT), self._unpack(EX_CONSTRUCT)) for _ in range(n) ) else: ret = {} for _ in range(n): key = self._unpack(EX_CONSTRUCT) if self._strict_map_key and type(key) not in (str, bytes): - raise ValueError( - "%s is not allowed for map key" % str(type(key)) - ) + raise ValueError("%s is not allowed for map key" % str(type(key))) if type(key) is str: key = sys.intern(key) ret[key] = self._unpack(EX_CONSTRUCT) @@ -634,7 +604,7 @@ def tell(self): return self._stream_offset -class Packer(object): +class Packer: """ MessagePack Packer @@ -844,9 +814,9 @@ def _pack( continue if self._datetime and check(obj, _DateTime): - raise ValueError("Cannot serialize %r where tzinfo=None" % (obj,)) + raise ValueError(f"Cannot serialize {obj!r} where tzinfo=None") - raise TypeError("Cannot serialize %r" % (obj,)) + raise TypeError(f"Cannot serialize {obj!r}") def pack(self, obj): try: @@ -933,7 +903,7 @@ def _pack_map_header(self, n): def _pack_map_pairs(self, n, pairs, nest_limit=DEFAULT_RECURSE_LIMIT): self._pack_map_header(n) - for (k, v) in pairs: + for k, v in pairs: self._pack(k, nest_limit - 1) self._pack(v, nest_limit - 1) diff --git a/pyproject.toml b/pyproject.toml index 195795f0..86fae1c7 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -6,3 +6,8 @@ requires = [ "setuptools >= 35.0.2", ] build-backend = "setuptools.build_meta" + +[tool.black] +line-length = 100 +target-version = ["py37"] +skip_string_normalization = true diff --git a/requirements.txt b/requirements.txt index 9f3c1a0d..88b5eb97 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,5 +1,7 @@ -# Also declared in pyproject.toml, if updating here please also update there +# Also declared in pyproject.toml, if updating here please also update there. Cython~=0.29.30 -# dev only tools. no need to add pyproject -black==22.3.0 +# Tools required only for development. No need to add it to pyproject.toml file. +black==23.3.0 +pytest==7.3.1 +pyupgrade==3.3.2 diff --git a/setup.cfg b/setup.cfg index e3782842..d6888fcf 100644 --- a/setup.cfg +++ b/setup.cfg @@ -17,7 +17,6 @@ project_urls = classifiers = Programming Language :: Python :: 3 - Programming Language :: Python :: 3.6 Programming Language :: Python :: 3.7 Programming Language :: Python :: 3.8 Programming Language :: Python :: 3.9 diff --git a/setup.py b/setup.py index 15ba7741..1cd1e8eb 100755 --- a/setup.py +++ b/setup.py @@ -1,5 +1,4 @@ #!/usr/bin/env python -# coding: utf-8 import io import os import sys @@ -25,7 +24,7 @@ class NoCython(Exception): def cythonize(src): - sys.stderr.write("cythonize: %r\n" % (src,)) + sys.stderr.write(f"cythonize: {src!r}\n") cython_compiler.compile([src], cplus=True) @@ -36,11 +35,7 @@ def ensure_source(src): if not have_cython: raise NoCython cythonize(pyx) - elif ( - os.path.exists(pyx) - and os.stat(src).st_mtime < os.stat(pyx).st_mtime - and have_cython - ): + elif os.path.exists(pyx) and os.stat(src).st_mtime < os.stat(pyx).st_mtime and have_cython: cythonize(pyx) return src diff --git a/test/test_buffer.py b/test/test_buffer.py index 04470584..7ee674ae 100644 --- a/test/test_buffer.py +++ b/test/test_buffer.py @@ -1,5 +1,4 @@ #!/usr/bin/env python -# coding: utf-8 import sys import pytest diff --git a/test/test_case.py b/test/test_case.py index 1c4e3229..c4c615e3 100644 --- a/test/test_case.py +++ b/test/test_case.py @@ -1,11 +1,10 @@ #!/usr/bin/env python -# coding: utf-8 from msgpack import packb, unpackb def check(length, obj, use_bin_type=True): v = packb(obj, use_bin_type=use_bin_type) - assert len(v) == length, "%r length should be %r but get %r" % (obj, length, len(v)) + assert len(v) == length, f"{obj!r} length should be {length!r} but get {len(v)!r}" assert unpackb(v, use_list=0, raw=not use_bin_type) == obj @@ -120,11 +119,11 @@ def test_match(): ), ({}, b"\x80"), ( - dict([(x, x) for x in range(15)]), + {x: x for x in range(15)}, b"\x8f\x00\x00\x01\x01\x02\x02\x03\x03\x04\x04\x05\x05\x06\x06\x07\x07\x08\x08\t\t\n\n\x0b\x0b\x0c\x0c\r\r\x0e\x0e", ), ( - dict([(x, x) for x in range(16)]), + {x: x for x in range(16)}, b"\xde\x00\x10\x00\x00\x01\x01\x02\x02\x03\x03\x04\x04\x05\x05\x06\x06\x07\x07\x08\x08\t\t\n\n\x0b\x0b\x0c\x0c\r\r\x0e\x0e\x0f\x0f", ), ] diff --git a/test/test_except.py b/test/test_except.py index 745ebecb..8c0a9766 100644 --- a/test/test_except.py +++ b/test/test_except.py @@ -1,5 +1,4 @@ #!/usr/bin/env python -# coding: utf-8 from pytest import raises from msgpack import packb, unpackb, Unpacker, FormatError, StackError, OutOfData diff --git a/test/test_extension.py b/test/test_extension.py index dfbe4350..9e5e6aad 100644 --- a/test/test_extension.py +++ b/test/test_extension.py @@ -1,4 +1,3 @@ -from __future__ import print_function import array import msgpack from msgpack import ExtType @@ -17,9 +16,7 @@ def p(s): assert p(b"A" * 16) == b"\xd8\x42" + b"A" * 16 # fixext 16 assert p(b"ABC") == b"\xc7\x03\x42ABC" # ext 8 assert p(b"A" * 0x0123) == b"\xc8\x01\x23\x42" + b"A" * 0x0123 # ext 16 - assert ( - p(b"A" * 0x00012345) == b"\xc9\x00\x01\x23\x45\x42" + b"A" * 0x00012345 - ) # ext 32 + assert p(b"A" * 0x00012345) == b"\xc9\x00\x01\x23\x45\x42" + b"A" * 0x00012345 # ext 32 def test_unpack_ext_type(): @@ -49,7 +46,7 @@ def default(obj): except AttributeError: data = obj.tostring() return ExtType(typecode, data) - raise TypeError("Unknown type object %r" % (obj,)) + raise TypeError(f"Unknown type object {obj!r}") def ext_hook(code, data): print("ext_hook called", code, data) diff --git a/test/test_format.py b/test/test_format.py index fbbc3f98..c06c87dc 100644 --- a/test/test_format.py +++ b/test/test_format.py @@ -1,5 +1,4 @@ #!/usr/bin/env python -# coding: utf-8 from msgpack import unpackb @@ -25,9 +24,7 @@ def testFixRaw(): def testFixMap(): - check( - b"\x82\xc2\x81\xc0\xc0\xc3\x81\xc0\x80", {False: {None: None}, True: {None: {}}} - ) + check(b"\x82\xc2\x81\xc0\xc0\xc3\x81\xc0\x80", {False: {None: None}, True: {None: {}}}) def testUnsignedInt(): diff --git a/test/test_limits.py b/test/test_limits.py index 4314c2c0..533bc112 100644 --- a/test/test_limits.py +++ b/test/test_limits.py @@ -1,6 +1,4 @@ #!/usr/bin/env python -# coding: utf-8 -from __future__ import absolute_import, division, print_function, unicode_literals import pytest from msgpack import ( diff --git a/test/test_memoryview.py b/test/test_memoryview.py index 63beab10..eaadef73 100644 --- a/test/test_memoryview.py +++ b/test/test_memoryview.py @@ -1,5 +1,4 @@ #!/usr/bin/env python -# coding: utf-8 import pytest from array import array diff --git a/test/test_newspec.py b/test/test_newspec.py index b7da486e..a6f4251b 100644 --- a/test/test_newspec.py +++ b/test/test_newspec.py @@ -1,5 +1,3 @@ -# coding: utf-8 - from msgpack import packb, unpackb, ExtType diff --git a/test/test_obj.py b/test/test_obj.py index 86c557cd..d3f870d9 100644 --- a/test/test_obj.py +++ b/test/test_obj.py @@ -1,5 +1,4 @@ #!/usr/bin/env python -# coding: utf-8 from pytest import raises from msgpack import packb, unpackb diff --git a/test/test_pack.py b/test/test_pack.py index 65c9cb17..2753e469 100644 --- a/test/test_pack.py +++ b/test/test_pack.py @@ -1,6 +1,4 @@ #!/usr/bin/env python -# coding: utf-8 -from __future__ import absolute_import, division, print_function, unicode_literals from collections import OrderedDict from io import BytesIO @@ -81,9 +79,7 @@ def testPackByteArrays(): def testIgnoreUnicodeErrors(): - re = unpackb( - packb(b"abc\xeddef", use_bin_type=False), raw=False, unicode_errors="ignore" - ) + re = unpackb(packb(b"abc\xeddef", use_bin_type=False), raw=False, unicode_errors="ignore") assert re == "abcdef" @@ -108,8 +104,8 @@ def testDecodeBinary(): def testPackFloat(): - assert packb(1.0, use_single_float=True) == b"\xca" + struct.pack(str(">f"), 1.0) - assert packb(1.0, use_single_float=False) == b"\xcb" + struct.pack(str(">d"), 1.0) + assert packb(1.0, use_single_float=True) == b"\xca" + struct.pack(">f", 1.0) + assert packb(1.0, use_single_float=False) == b"\xcb" + struct.pack(">d", 1.0) def testArraySize(sizes=[0, 5, 50, 1000]): @@ -154,7 +150,7 @@ def testMapSize(sizes=[0, 5, 50, 1000]): bio.seek(0) unpacker = Unpacker(bio, strict_map_key=False) for size in sizes: - assert unpacker.unpack() == dict((i, i * 2) for i in range(size)) + assert unpacker.unpack() == {i: i * 2 for i in range(size)} def test_odict(): diff --git a/test/test_seq.py b/test/test_seq.py index 0d5d8065..def6630d 100644 --- a/test/test_seq.py +++ b/test/test_seq.py @@ -1,5 +1,4 @@ #!/usr/bin/env python -# coding: utf-8 import io import msgpack diff --git a/test/test_sequnpack.py b/test/test_sequnpack.py index c091076b..6b138aad 100644 --- a/test/test_sequnpack.py +++ b/test/test_sequnpack.py @@ -1,5 +1,4 @@ #!/usr/bin/env python -# coding: utf-8 import io from msgpack import Unpacker, BufferFull from msgpack import pack, packb diff --git a/test/test_stricttype.py b/test/test_stricttype.py index fe9ec6cd..9ffaff25 100644 --- a/test/test_stricttype.py +++ b/test/test_stricttype.py @@ -1,5 +1,3 @@ -# coding: utf-8 - from collections import namedtuple from msgpack import packb, unpackb, ExtType @@ -10,7 +8,7 @@ def test_namedtuple(): def default(o): if isinstance(o, T): return dict(o._asdict()) - raise TypeError("Unsupported type %s" % (type(o),)) + raise TypeError(f"Unsupported type {type(o)}") packed = packb(T(1, 42), strict_types=True, use_bin_type=True, default=default) unpacked = unpackb(packed, raw=False) @@ -23,7 +21,7 @@ def test_tuple(): def default(o): if isinstance(o, tuple): return {"__type__": "tuple", "value": list(o)} - raise TypeError("Unsupported type %s" % (type(o),)) + raise TypeError(f"Unsupported type {type(o)}") def convert(o): if o.get("__type__") == "tuple": @@ -44,9 +42,7 @@ def test_tuple_ext(): def default(o): if isinstance(o, tuple): # Convert to list and pack - payload = packb( - list(o), strict_types=True, use_bin_type=True, default=default - ) + payload = packb(list(o), strict_types=True, use_bin_type=True, default=default) return ExtType(MSGPACK_EXT_TYPE_TUPLE, payload) raise TypeError(repr(o)) @@ -54,7 +50,7 @@ def convert(code, payload): if code == MSGPACK_EXT_TYPE_TUPLE: # Unpack and convert to tuple return tuple(unpackb(payload, raw=False, ext_hook=convert)) - raise ValueError("Unknown Ext code {}".format(code)) + raise ValueError(f"Unknown Ext code {code}") data = packb(t, strict_types=True, use_bin_type=True, default=default) expected = unpackb(data, raw=False, ext_hook=convert) diff --git a/test/test_subtype.py b/test/test_subtype.py index d91d4553..d5a9adb3 100644 --- a/test/test_subtype.py +++ b/test/test_subtype.py @@ -1,5 +1,4 @@ #!/usr/bin/env python -# coding: utf-8 from msgpack import packb, unpackb from collections import namedtuple diff --git a/test/test_unpack.py b/test/test_unpack.py index c7141024..bf3f960d 100644 --- a/test/test_unpack.py +++ b/test/test_unpack.py @@ -52,7 +52,7 @@ def hook(x): def test_unpacker_ext_hook(): class MyUnpacker(Unpacker): def __init__(self): - super(MyUnpacker, self).__init__(ext_hook=self._hook, raw=False) + super().__init__(ext_hook=self._hook, raw=False) def _hook(self, code, data): if code == 1: From e5249f877c18c88a0009f21097d7e48819579e60 Mon Sep 17 00:00:00 2001 From: Inada Naoki Date: Fri, 21 Jul 2023 02:53:58 +0900 Subject: [PATCH 306/349] ci: add Python 3.12 and drop 3.7 --- .github/workflows/test.yml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 88781025..76fcf27e 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -10,7 +10,7 @@ jobs: strategy: matrix: os: ["ubuntu-latest", "windows-latest", "macos-latest"] - py: ["3.11", "3.10", "3.9", "3.8", "3.7"] + py: ["3.12", "3.11", "3.10", "3.9", "3.8"] runs-on: ${{ matrix.os }} name: Run test with Python ${{ matrix.py }} on ${{ matrix.os }} @@ -23,6 +23,7 @@ jobs: uses: actions/setup-python@v4 with: python-version: ${{ matrix.py }} + allow-prereleases: true cache: "pip" - name: Build From 427736bbcc5553b0754616b58154ed26733103b6 Mon Sep 17 00:00:00 2001 From: Inada Naoki Date: Fri, 21 Jul 2023 11:11:04 +0900 Subject: [PATCH 307/349] try Cython 3.0 (#548) --- pyproject.toml | 2 +- requirements.txt | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 86fae1c7..dc8bbee3 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -2,7 +2,7 @@ requires = [ # Also declared in requirements.txt, if updating here please also update # there - "Cython~=0.29.30", + "Cython~=3.0.0", "setuptools >= 35.0.2", ] build-backend = "setuptools.build_meta" diff --git a/requirements.txt b/requirements.txt index 88b5eb97..e27df0f4 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,5 +1,5 @@ # Also declared in pyproject.toml, if updating here please also update there. -Cython~=0.29.30 +Cython~=3.0.0 # Tools required only for development. No need to add it to pyproject.toml file. black==23.3.0 From 7cfced51501b8e0786da5d6b499331ef2f492b29 Mon Sep 17 00:00:00 2001 From: Inada Naoki Date: Wed, 9 Aug 2023 18:09:42 +0900 Subject: [PATCH 308/349] start v1.0.6 development --- msgpack/__init__.py | 2 +- setup.cfg | 6 +++--- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/msgpack/__init__.py b/msgpack/__init__.py index 638236fe..2540120b 100644 --- a/msgpack/__init__.py +++ b/msgpack/__init__.py @@ -6,7 +6,7 @@ version = (1, 0, 5) -__version__ = "1.0.5" +__version__ = "1.0.6dev1" if os.environ.get("MSGPACK_PUREPYTHON"): diff --git a/setup.cfg b/setup.cfg index d6888fcf..c2e16721 100644 --- a/setup.cfg +++ b/setup.cfg @@ -1,7 +1,7 @@ [metadata] name = msgpack -#version = attr: msgpack.__version__ -version = attr: msgpack.version +version = attr: msgpack.__version__ +#version = attr: msgpack.version license = Apache 2.0 author = Inada Naoki author_email = songofacandy@gmail.com @@ -17,11 +17,11 @@ project_urls = classifiers = Programming Language :: Python :: 3 - Programming Language :: Python :: 3.7 Programming Language :: Python :: 3.8 Programming Language :: Python :: 3.9 Programming Language :: Python :: 3.10 Programming Language :: Python :: 3.11 + Programming Language :: Python :: 3.12 Programming Language :: Python :: Implementation :: CPython Programming Language :: Python :: Implementation :: PyPy Intended Audience :: Developers From 715126c67b2339381f5ad02f45d8fe367400c749 Mon Sep 17 00:00:00 2001 From: Inada Naoki Date: Wed, 9 Aug 2023 18:20:05 +0900 Subject: [PATCH 309/349] CI: update cibuildwheel to v2.15.0 (#551) --- .github/workflows/wheel.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/wheel.yml b/.github/workflows/wheel.yml index 6cf2fe94..c1eb2ed6 100644 --- a/.github/workflows/wheel.yml +++ b/.github/workflows/wheel.yml @@ -35,7 +35,7 @@ jobs: make cython - name: Build - uses: pypa/cibuildwheel@v2.12.0 + uses: pypa/cibuildwheel@v2.15.0 env: CIBW_TEST_REQUIRES: "pytest" CIBW_TEST_COMMAND: "pytest {package}/test" From 7b75b4f36819c77a12518929fecc09d94e82f5bd Mon Sep 17 00:00:00 2001 From: TW Date: Thu, 31 Aug 2023 05:56:24 +0200 Subject: [PATCH 310/349] sphinx-related work (#554) fixes #510 --- .github/workflows/docs.yaml | 32 ++++++++++++++++++++++++++++++++ docs/_static/README.txt | 1 + docs/api.rst | 8 ++++---- docs/conf.py | 2 +- msgpack/_packer.pyx | 3 ++- msgpack/_unpacker.pyx | 6 +++--- msgpack/ext.py | 4 ++-- msgpack/fallback.py | 9 +++++---- tox.ini | 9 +++++++++ 9 files changed, 59 insertions(+), 15 deletions(-) create mode 100644 .github/workflows/docs.yaml create mode 100644 docs/_static/README.txt diff --git a/.github/workflows/docs.yaml b/.github/workflows/docs.yaml new file mode 100644 index 00000000..a393c6b8 --- /dev/null +++ b/.github/workflows/docs.yaml @@ -0,0 +1,32 @@ +name: docs + +on: ["push", "pull_request"] + +jobs: + docs: + # We want to run on external PRs, but not on our own internal PRs as they'll be run + # by the push to the branch. + if: github.event_name == 'push' || github.event.pull_request.head.repo.full_name != github.repository + + runs-on: ubuntu-latest + steps: + - name: Setup Python + uses: actions/setup-python@v4 + with: + python-version: '3.x' + architecture: 'x64' + + - name: Checkout + uses: actions/checkout@v3 + + - name: Build + shell: bash + run: | + pip install -r requirements.txt + make cython + pip install . + + - name: Sphinx Documentation Generator + run: | + pip install tox + tox -e sphinx diff --git a/docs/_static/README.txt b/docs/_static/README.txt new file mode 100644 index 00000000..1c70594f --- /dev/null +++ b/docs/_static/README.txt @@ -0,0 +1 @@ +Sphinx will copy the contents of docs/_static/ directory to the build location. diff --git a/docs/api.rst b/docs/api.rst index 93827e19..f5dfbbd2 100644 --- a/docs/api.rst +++ b/docs/api.rst @@ -5,19 +5,19 @@ API reference .. autofunction:: pack -:func:`dump` is alias for :func:`pack` +``dump()`` is an alias for :func:`pack` .. autofunction:: packb -:func:`dumps` is alias for :func:`packb` +``dumps()`` is an alias for :func:`packb` .. autofunction:: unpack -:func:`load` is alias for :func:`unpack` +``load()`` is an alias for :func:`unpack` .. autofunction:: unpackb -:func:`loads` is alias for :func:`unpackb` +``loads()`` is an alias for :func:`unpackb` .. autoclass:: Packer :members: diff --git a/docs/conf.py b/docs/conf.py index 91ce77f0..1c1895c7 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -16,7 +16,7 @@ # If extensions (or modules to document with autodoc) are in another directory, # add these directories to sys.path here. If the directory is relative to the # documentation root, use os.path.abspath to make it absolute, like shown here. -# sys.path.insert(0, os.path.abspath('.')) +#sys.path.insert(0, os.path.abspath('..')) # -- General configuration ----------------------------------------------------- diff --git a/msgpack/_packer.pyx b/msgpack/_packer.pyx index 074b39fc..3c398672 100644 --- a/msgpack/_packer.pyx +++ b/msgpack/_packer.pyx @@ -71,7 +71,8 @@ cdef class Packer(object): Packer's constructor has some keyword arguments: - :param callable default: + :param default: + When specified, it should be callable. Convert user type to builtin type that Packer supports. See also simplejson's document. diff --git a/msgpack/_unpacker.pyx b/msgpack/_unpacker.pyx index d5dc5ea5..56126f43 100644 --- a/msgpack/_unpacker.pyx +++ b/msgpack/_unpacker.pyx @@ -217,7 +217,7 @@ cdef class Unpacker(object): :param file_like: File-like object having `.read(n)` method. - If specified, unpacker reads serialized data from it and :meth:`feed()` is not usable. + If specified, unpacker reads serialized data from it and `.feed()` is not usable. :param int read_size: Used as `file_like.read(read_size)`. (default: `min(16*1024, max_buffer_size)`) @@ -241,12 +241,12 @@ cdef class Unpacker(object): :param bool strict_map_key: If true (default), only str or bytes are accepted for map (dict) keys. - :param callable object_hook: + :param object_hook: When specified, it should be callable. Unpacker calls it with a dict argument after unpacking msgpack map. (See also simplejson) - :param callable object_pairs_hook: + :param object_pairs_hook: When specified, it should be callable. Unpacker calls it with a list of key-value pairs after unpacking msgpack map. (See also simplejson) diff --git a/msgpack/ext.py b/msgpack/ext.py index 97942949..f7f2d77d 100644 --- a/msgpack/ext.py +++ b/msgpack/ext.py @@ -120,7 +120,7 @@ def from_unix(unix_sec): """Create a Timestamp from posix timestamp in seconds. :param unix_float: Posix timestamp in seconds. - :type unix_float: int or float. + :type unix_float: int or float """ seconds = int(unix_sec // 1) nanoseconds = int((unix_sec % 1) * 10**9) @@ -154,7 +154,7 @@ def to_unix_nano(self): def to_datetime(self): """Get the timestamp as a UTC datetime. - :rtype: datetime. + :rtype: `datetime.datetime` """ utc = datetime.timezone.utc return datetime.datetime.fromtimestamp(0, utc) + datetime.timedelta(seconds=self.to_unix()) diff --git a/msgpack/fallback.py b/msgpack/fallback.py index ac1eaf4c..84b2617a 100644 --- a/msgpack/fallback.py +++ b/msgpack/fallback.py @@ -139,7 +139,7 @@ class Unpacker: :param file_like: File-like object having `.read(n)` method. - If specified, unpacker reads serialized data from it and :meth:`feed()` is not usable. + If specified, unpacker reads serialized data from it and `.feed()` is not usable. :param int read_size: Used as `file_like.read(read_size)`. (default: `min(16*1024, max_buffer_size)`) @@ -163,12 +163,12 @@ class Unpacker: :param bool strict_map_key: If true (default), only str or bytes are accepted for map (dict) keys. - :param callable object_hook: + :param object_hook: When specified, it should be callable. Unpacker calls it with a dict argument after unpacking msgpack map. (See also simplejson) - :param callable object_pairs_hook: + :param object_pairs_hook: When specified, it should be callable. Unpacker calls it with a list of key-value pairs after unpacking msgpack map. (See also simplejson) @@ -616,7 +616,8 @@ class Packer: Packer's constructor has some keyword arguments: - :param callable default: + :param default: + When specified, it should be callable. Convert user type to builtin type that Packer supports. See also simplejson's document. diff --git a/tox.ini b/tox.ini index 1ef2d189..369eddcf 100644 --- a/tox.ini +++ b/tox.ini @@ -3,6 +3,7 @@ envlist = {py35,py36,py37,py38}-{c,pure}, {pypy,pypy3}-pure, py34-x86, + sphinx, isolated_build = true [testenv] @@ -27,3 +28,11 @@ commands= python -c 'import sys; print(hex(sys.maxsize))' python -c 'from msgpack import _cmsgpack' py.test + + +[testenv:sphinx] +changedir = docs +deps = + sphinx +commands = + sphinx-build -n -v -W --keep-going -b html -d {envtmpdir}/doctrees . {envtmpdir}/html From 423c6df265d0f964733b31a7e835fe91e4b8ea89 Mon Sep 17 00:00:00 2001 From: TW Date: Tue, 5 Sep 2023 03:51:04 +0200 Subject: [PATCH 311/349] move project metadata to pyproject.toml (#555) also: replace flake8 by ruff. --- docs/conf.py | 4 ++-- msgpack/__init__.py | 1 - msgpack/ext.py | 8 ++++---- msgpack/fallback.py | 2 +- pyproject.toml | 45 +++++++++++++++++++++++++++++++++++++++++ setup.cfg | 32 ----------------------------- setup.py | 2 -- test/test_buffer.py | 2 -- test/test_memoryview.py | 2 -- test/test_obj.py | 4 ++-- test/test_pack.py | 4 +--- test/test_seq.py | 2 +- test/test_subtype.py | 2 +- test/test_timestamp.py | 1 - 14 files changed, 57 insertions(+), 54 deletions(-) delete mode 100644 setup.cfg diff --git a/docs/conf.py b/docs/conf.py index 1c1895c7..6eb472af 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -11,11 +11,11 @@ # All configuration values have a default; values that are commented out # serve to show the default. -import sys, os - # If extensions (or modules to document with autodoc) are in another directory, # add these directories to sys.path here. If the directory is relative to the # documentation root, use os.path.abspath to make it absolute, like shown here. +#import os +#import sys #sys.path.insert(0, os.path.abspath('..')) # -- General configuration ----------------------------------------------------- diff --git a/msgpack/__init__.py b/msgpack/__init__.py index 2540120b..9a96c982 100644 --- a/msgpack/__init__.py +++ b/msgpack/__init__.py @@ -2,7 +2,6 @@ from .ext import ExtType, Timestamp import os -import sys version = (1, 0, 5) diff --git a/msgpack/ext.py b/msgpack/ext.py index f7f2d77d..02c2c430 100644 --- a/msgpack/ext.py +++ b/msgpack/ext.py @@ -1,6 +1,5 @@ from collections import namedtuple import datetime -import sys import struct @@ -20,8 +19,9 @@ def __new__(cls, code, data): class Timestamp: """Timestamp represents the Timestamp extension type in msgpack. - When built with Cython, msgpack uses C methods to pack and unpack `Timestamp`. When using pure-Python - msgpack, :func:`to_bytes` and :func:`from_bytes` are used to pack and unpack `Timestamp`. + When built with Cython, msgpack uses C methods to pack and unpack `Timestamp`. + When using pure-Python msgpack, :func:`to_bytes` and :func:`from_bytes` are used to pack and + unpack `Timestamp`. This class is immutable: Do not override seconds and nanoseconds. """ @@ -39,7 +39,7 @@ def __init__(self, seconds, nanoseconds=0): Number of nanoseconds to add to `seconds` to get fractional time. Maximum is 999_999_999. Default is 0. - Note: Negative times (before the UNIX epoch) are represented as negative seconds + positive ns. + Note: Negative times (before the UNIX epoch) are represented as neg. seconds + pos. ns. """ if not isinstance(seconds, int): raise TypeError("seconds must be an integer") diff --git a/msgpack/fallback.py b/msgpack/fallback.py index 84b2617a..a174162a 100644 --- a/msgpack/fallback.py +++ b/msgpack/fallback.py @@ -530,7 +530,7 @@ def _unpack(self, execute=EX_CONSTRUCT): key = self._unpack(EX_CONSTRUCT) if self._strict_map_key and type(key) not in (str, bytes): raise ValueError("%s is not allowed for map key" % str(type(key))) - if type(key) is str: + if isinstance(key, str): key = sys.intern(key) ret[key] = self._unpack(EX_CONSTRUCT) if self._object_hook is not None: diff --git a/pyproject.toml b/pyproject.toml index dc8bbee3..f37d213c 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -7,7 +7,52 @@ requires = [ ] build-backend = "setuptools.build_meta" +[project] +name = "msgpack" +dynamic = ["version"] +license = {text="Apache 2.0"} +authors = [{name="Inada Naoki", email="songofacandy@gmail.com"}] +description = "MessagePack serializer" +readme = "README.md" +#keywords = ["python", "msgpack", "messagepack", "serializer", "serialization", "binary"] +#requires-python = ">=3.8" +classifiers = [ +# "Development Status :: 5 - Production/Stable", +# "Operating System :: OS Independent", +# "Programming Language :: Python", + "Programming Language :: Python :: 3", + "Programming Language :: Python :: 3.8", + "Programming Language :: Python :: 3.9", + "Programming Language :: Python :: 3.10", + "Programming Language :: Python :: 3.11", + "Programming Language :: Python :: 3.12", + "Programming Language :: Python :: Implementation :: CPython", + "Programming Language :: Python :: Implementation :: PyPy", + "Intended Audience :: Developers", + "License :: OSI Approved :: Apache Software License", +] + +[project.urls] +Homepage = "https://msgpack.org/" +Documentation = "https://msgpack-python.readthedocs.io/" +Repository = "https://github.com/msgpack/msgpack-python/" +Tracker = "https://github.com/msgpack/msgpack-python/issues" +#Changelog = "https://github.com/msgpack/msgpack-python/blob/main/ChangeLog.rst" + +[tool.setuptools.dynamic] +version = {attr = "msgpack.__version__"} + [tool.black] line-length = 100 target-version = ["py37"] skip_string_normalization = true + +[tool.ruff] +line-length = 100 +target-version = "py38" +ignore = [] + +[tool.ruff.per-file-ignores] +"msgpack/__init__.py" = ["F401", "F403"] +"msgpack/fallback.py" = ["E731"] +"test/test_seq.py" = ["E501"] diff --git a/setup.cfg b/setup.cfg deleted file mode 100644 index c2e16721..00000000 --- a/setup.cfg +++ /dev/null @@ -1,32 +0,0 @@ -[metadata] -name = msgpack -version = attr: msgpack.__version__ -#version = attr: msgpack.version -license = Apache 2.0 -author = Inada Naoki -author_email = songofacandy@gmail.com -description = MessagePack serializer -long_description = file: README.md -long_description_content_type = text/markdown -url = https://msgpack.org/ - -project_urls = - Documentation = https://msgpack-python.readthedocs.io/ - Source = https://github.com/msgpack/msgpack-python - Tracker = https://github.com/msgpack/msgpack-python/issues - -classifiers = - Programming Language :: Python :: 3 - Programming Language :: Python :: 3.8 - Programming Language :: Python :: 3.9 - Programming Language :: Python :: 3.10 - Programming Language :: Python :: 3.11 - Programming Language :: Python :: 3.12 - Programming Language :: Python :: Implementation :: CPython - Programming Language :: Python :: Implementation :: PyPy - Intended Audience :: Developers - License :: OSI Approved :: Apache Software License - -[flake8] -max_line_length = 100 - diff --git a/setup.py b/setup.py index 1cd1e8eb..7a34c8cf 100755 --- a/setup.py +++ b/setup.py @@ -1,8 +1,6 @@ #!/usr/bin/env python -import io import os import sys -from glob import glob from setuptools import setup, Extension from setuptools.command.build_ext import build_ext from setuptools.command.sdist import sdist diff --git a/test/test_buffer.py b/test/test_buffer.py index 7ee674ae..a3db339c 100644 --- a/test/test_buffer.py +++ b/test/test_buffer.py @@ -1,7 +1,5 @@ #!/usr/bin/env python -import sys -import pytest from msgpack import packb, unpackb diff --git a/test/test_memoryview.py b/test/test_memoryview.py index eaadef73..dc319a63 100644 --- a/test/test_memoryview.py +++ b/test/test_memoryview.py @@ -1,9 +1,7 @@ #!/usr/bin/env python -import pytest from array import array from msgpack import packb, unpackb -import sys def make_array(f, data): diff --git a/test/test_obj.py b/test/test_obj.py index d3f870d9..f78bf426 100644 --- a/test/test_obj.py +++ b/test/test_obj.py @@ -33,7 +33,7 @@ def test_decode_pairs_hook(): prod_sum = 1 * 2 + 3 * 4 unpacked = unpackb( packed, - object_pairs_hook=lambda l: sum(k * v for k, v in l), + object_pairs_hook=lambda lst: sum(k * v for k, v in lst), use_list=1, strict_map_key=False, ) @@ -48,7 +48,7 @@ def test_only_one_obj_hook(): def test_bad_hook(): with raises(TypeError): packed = packb([3, 1 + 2j], default=lambda o: o) - unpacked = unpackb(packed, use_list=1) + unpackb(packed, use_list=1) def _arr_to_str(arr): diff --git a/test/test_pack.py b/test/test_pack.py index 2753e469..42325378 100644 --- a/test/test_pack.py +++ b/test/test_pack.py @@ -3,12 +3,10 @@ from collections import OrderedDict from io import BytesIO import struct -import sys import pytest -from pytest import raises, xfail -from msgpack import packb, unpackb, Unpacker, Packer, pack +from msgpack import packb, unpackb, Unpacker, Packer def check(data, use_list=False): diff --git a/test/test_seq.py b/test/test_seq.py index def6630d..16d9dde4 100644 --- a/test/test_seq.py +++ b/test/test_seq.py @@ -34,7 +34,7 @@ def test_exceeding_unpacker_read_size(): read_count = 0 for idx, o in enumerate(unpacker): - assert type(o) == bytes + assert isinstance(o, bytes) assert o == gen_binary_data(idx) read_count += 1 diff --git a/test/test_subtype.py b/test/test_subtype.py index d5a9adb3..0d1c41af 100644 --- a/test/test_subtype.py +++ b/test/test_subtype.py @@ -1,6 +1,6 @@ #!/usr/bin/env python -from msgpack import packb, unpackb +from msgpack import packb from collections import namedtuple diff --git a/test/test_timestamp.py b/test/test_timestamp.py index af84a2ff..db5cc57a 100644 --- a/test/test_timestamp.py +++ b/test/test_timestamp.py @@ -1,5 +1,4 @@ import pytest -import sys import datetime import msgpack from msgpack.ext import Timestamp From ef15f4a62c25114bec1db91aa4006ae2d3a9fb53 Mon Sep 17 00:00:00 2001 From: TW Date: Thu, 7 Sep 2023 14:25:07 +0200 Subject: [PATCH 312/349] add a basic .readthedocs.yaml file (#558) --- .readthedocs.yaml | 12 ++++++++++++ 1 file changed, 12 insertions(+) create mode 100644 .readthedocs.yaml diff --git a/.readthedocs.yaml b/.readthedocs.yaml new file mode 100644 index 00000000..8b5aaf16 --- /dev/null +++ b/.readthedocs.yaml @@ -0,0 +1,12 @@ +# Read the Docs configuration file for Sphinx projects. +# See https://docs.readthedocs.io/en/stable/config-file/v2.html for details. + +version: 2 + +build: + os: ubuntu-22.04 + tools: + python: "3.11" + +sphinx: + configuration: docs/conf.py From 41d6239c0a3bfb1daabe1a45ffdbecf4e9aa5469 Mon Sep 17 00:00:00 2001 From: TW Date: Tue, 12 Sep 2023 19:51:12 +0200 Subject: [PATCH 313/349] fix .readthedocs.yaml, fixes #559 (#560) --- .readthedocs.yaml | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/.readthedocs.yaml b/.readthedocs.yaml index 8b5aaf16..7447895d 100644 --- a/.readthedocs.yaml +++ b/.readthedocs.yaml @@ -7,6 +7,17 @@ build: os: ubuntu-22.04 tools: python: "3.11" + apt_packages: + - build-essential + jobs: + pre_install: + - pip install -r requirements.txt + - make cython + +python: + install: + - method: pip + path: . sphinx: configuration: docs/conf.py From 4e10c10aaa8350f23e4b85d27ff131f7b4fd13e2 Mon Sep 17 00:00:00 2001 From: Inada Naoki Date: Wed, 13 Sep 2023 18:40:04 +0900 Subject: [PATCH 314/349] prepare for 1.0.6rc1 (#557) --- ChangeLog.rst | 9 +++++++++ msgpack/__init__.py | 4 ++-- pyproject.toml | 12 ++++++------ 3 files changed, 17 insertions(+), 8 deletions(-) diff --git a/ChangeLog.rst b/ChangeLog.rst index 53547996..bf345ddb 100644 --- a/ChangeLog.rst +++ b/ChangeLog.rst @@ -1,3 +1,12 @@ +1.0.6rc1 +======== + +Release Date: 2023-09-13 + +* Add Python 3.12 wheels (#517) +* Remove Python 2.7, 3.6, and 3.7 support + + 1.0.5 ===== diff --git a/msgpack/__init__.py b/msgpack/__init__.py index 9a96c982..2e201330 100644 --- a/msgpack/__init__.py +++ b/msgpack/__init__.py @@ -4,8 +4,8 @@ import os -version = (1, 0, 5) -__version__ = "1.0.6dev1" +version = (1, 0, 6, "rc", 1) +__version__ = "1.0.6rc1" if os.environ.get("MSGPACK_PUREPYTHON"): diff --git a/pyproject.toml b/pyproject.toml index f37d213c..a63009a7 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -14,12 +14,12 @@ license = {text="Apache 2.0"} authors = [{name="Inada Naoki", email="songofacandy@gmail.com"}] description = "MessagePack serializer" readme = "README.md" -#keywords = ["python", "msgpack", "messagepack", "serializer", "serialization", "binary"] -#requires-python = ">=3.8" +keywords = ["msgpack", "messagepack", "serializer", "serialization", "binary"] +requires-python = ">=3.8" classifiers = [ -# "Development Status :: 5 - Production/Stable", -# "Operating System :: OS Independent", -# "Programming Language :: Python", + "Development Status :: 5 - Production/Stable", + "Operating System :: OS Independent", + "Programming Language :: Python", "Programming Language :: Python :: 3", "Programming Language :: Python :: 3.8", "Programming Language :: Python :: 3.9", @@ -37,7 +37,7 @@ Homepage = "https://msgpack.org/" Documentation = "https://msgpack-python.readthedocs.io/" Repository = "https://github.com/msgpack/msgpack-python/" Tracker = "https://github.com/msgpack/msgpack-python/issues" -#Changelog = "https://github.com/msgpack/msgpack-python/blob/main/ChangeLog.rst" +Changelog = "https://github.com/msgpack/msgpack-python/blob/main/ChangeLog.rst" [tool.setuptools.dynamic] version = {attr = "msgpack.__version__"} From e1d3d5d5c386b8b2fa99c812b4648f6532cab032 Mon Sep 17 00:00:00 2001 From: Inada Naoki Date: Fri, 15 Sep 2023 12:02:06 +0900 Subject: [PATCH 315/349] update actions (#563) --- .github/workflows/black.yaml | 2 +- .github/workflows/docs.yaml | 2 +- .github/workflows/test.yml | 2 +- .github/workflows/wheel.yml | 4 ++-- 4 files changed, 5 insertions(+), 5 deletions(-) diff --git a/.github/workflows/black.yaml b/.github/workflows/black.yaml index 1e28b7b5..e0917926 100644 --- a/.github/workflows/black.yaml +++ b/.github/workflows/black.yaml @@ -17,7 +17,7 @@ jobs: architecture: 'x64' - name: Checkout - uses: actions/checkout@v3 + uses: actions/checkout@v4 - name: Black Code Formatter run: | diff --git a/.github/workflows/docs.yaml b/.github/workflows/docs.yaml index a393c6b8..80bbba7d 100644 --- a/.github/workflows/docs.yaml +++ b/.github/workflows/docs.yaml @@ -17,7 +17,7 @@ jobs: architecture: 'x64' - name: Checkout - uses: actions/checkout@v3 + uses: actions/checkout@v4 - name: Build shell: bash diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 76fcf27e..4eb8849d 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -17,7 +17,7 @@ jobs: steps: - name: Checkout - uses: actions/checkout@v3 + uses: actions/checkout@v4 - name: Set up Python uses: actions/setup-python@v4 diff --git a/.github/workflows/wheel.yml b/.github/workflows/wheel.yml index c1eb2ed6..0412a385 100644 --- a/.github/workflows/wheel.yml +++ b/.github/workflows/wheel.yml @@ -14,11 +14,11 @@ jobs: steps: - name: Checkout - uses: actions/checkout@v3 + uses: actions/checkout@v4 - name: Set up QEMU if: runner.os == 'Linux' - uses: docker/setup-qemu-action@v1 + uses: docker/setup-qemu-action@v3 with: platforms: arm64 From b1b0edaeedd073f21023ec01a60bfa9da077ad2b Mon Sep 17 00:00:00 2001 From: Inada Naoki Date: Thu, 21 Sep 2023 14:58:37 +0900 Subject: [PATCH 316/349] release v1.0.6 (#564) --- ChangeLog.rst | 6 +++--- msgpack/__init__.py | 4 ++-- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/ChangeLog.rst b/ChangeLog.rst index bf345ddb..bad51aaf 100644 --- a/ChangeLog.rst +++ b/ChangeLog.rst @@ -1,7 +1,7 @@ -1.0.6rc1 -======== +1.0.6 +===== -Release Date: 2023-09-13 +Release Date: 2023-09-21 * Add Python 3.12 wheels (#517) * Remove Python 2.7, 3.6, and 3.7 support diff --git a/msgpack/__init__.py b/msgpack/__init__.py index 2e201330..781bcdf4 100644 --- a/msgpack/__init__.py +++ b/msgpack/__init__.py @@ -4,8 +4,8 @@ import os -version = (1, 0, 6, "rc", 1) -__version__ = "1.0.6rc1" +version = (1, 0, 6) +__version__ = "1.0.6" if os.environ.get("MSGPACK_PUREPYTHON"): From ecf03748c7241a0fb6bef733c7e5d2d68179b670 Mon Sep 17 00:00:00 2001 From: Inada Naoki Date: Thu, 28 Sep 2023 15:03:16 +0900 Subject: [PATCH 317/349] remove inline macro for msvc (#567) --- msgpack/pack.h | 4 ---- 1 file changed, 4 deletions(-) diff --git a/msgpack/pack.h b/msgpack/pack.h index 1e849acc..2453428c 100644 --- a/msgpack/pack.h +++ b/msgpack/pack.h @@ -26,10 +26,6 @@ extern "C" { #endif -#ifdef _MSC_VER -#define inline __inline -#endif - typedef struct msgpack_packer { char *buf; size_t length; From acd068439233b8f04543c4ee81c18c8dbb681aba Mon Sep 17 00:00:00 2001 From: Inada Naoki Date: Thu, 28 Sep 2023 15:25:10 +0900 Subject: [PATCH 318/349] do not fallback on build error (#568) --- setup.py | 26 ++++++-------------------- 1 file changed, 6 insertions(+), 20 deletions(-) mode change 100755 => 100644 setup.py diff --git a/setup.py b/setup.py old mode 100755 new mode 100644 index 7a34c8cf..a13bd81d --- a/setup.py +++ b/setup.py @@ -22,6 +22,8 @@ class NoCython(Exception): def cythonize(src): + if not have_cython: + raise Exception("Cython is required for building from checkout") sys.stderr.write(f"cythonize: {src!r}\n") cython_compiler.compile([src], cplus=True) @@ -29,31 +31,15 @@ def cythonize(src): def ensure_source(src): pyx = os.path.splitext(src)[0] + ".pyx" - if not os.path.exists(src): - if not have_cython: - raise NoCython + if not os.path.exists(src) or have_cython and os.stat(src).st_mtime < os.stat(pyx).st_mtime: cythonize(pyx) - elif os.path.exists(pyx) and os.stat(src).st_mtime < os.stat(pyx).st_mtime and have_cython: - cythonize(pyx) - return src class BuildExt(build_ext): def build_extension(self, ext): - try: - ext.sources = list(map(ensure_source, ext.sources)) - except NoCython: - print("WARNING") - print("Cython is required for building extension from checkout.") - print("Install Cython >= 0.16 or install msgpack from PyPI.") - print("Falling back to pure Python implementation.") - return - try: - return build_ext.build_extension(self, ext) - except Exception as e: - print("WARNING: Failed to compile extension modules.") - print("msgpack uses fallback pure python implementation.") - print(e) + for src in ext.sources: + ensure_source(src) + return build_ext.build_extension(self, ext) # Cython is required for sdist From 2982e9ff729eae150d67ee608fdf1d01d93d8e3f Mon Sep 17 00:00:00 2001 From: Inada Naoki Date: Thu, 28 Sep 2023 17:31:52 +0900 Subject: [PATCH 319/349] release v1.0.7 (#569) --- .gitignore | 1 + ChangeLog.rst | 13 +++++++++++++ msgpack/__init__.py | 4 ++-- 3 files changed, 16 insertions(+), 2 deletions(-) diff --git a/.gitignore b/.gitignore index 800f1c22..8a06e267 100644 --- a/.gitignore +++ b/.gitignore @@ -2,6 +2,7 @@ MANIFEST build/* dist/* .tox +.python-version *.pyc *.pyo *.so diff --git a/ChangeLog.rst b/ChangeLog.rst index bad51aaf..ca74ebe3 100644 --- a/ChangeLog.rst +++ b/ChangeLog.rst @@ -1,8 +1,21 @@ +1.0.7 +===== + +Release Date: 2023-09-28 + +* Fix build error of extension module on Windows. (#567) +* ``setup.py`` doesn't skip build error of extension module. (#568) + + 1.0.6 ===== Release Date: 2023-09-21 +.. note:: + v1.0.6 Wheels for Windows don't contain extension module. + Please upgrade to v1.0.7 or newer. + * Add Python 3.12 wheels (#517) * Remove Python 2.7, 3.6, and 3.7 support diff --git a/msgpack/__init__.py b/msgpack/__init__.py index 781bcdf4..60a088cc 100644 --- a/msgpack/__init__.py +++ b/msgpack/__init__.py @@ -4,8 +4,8 @@ import os -version = (1, 0, 6) -__version__ = "1.0.6" +version = (1, 0, 7) +__version__ = "1.0.7" if os.environ.get("MSGPACK_PUREPYTHON"): From c78026102c981d734c6d8fec6b2790ee31212f27 Mon Sep 17 00:00:00 2001 From: Inada Naoki Date: Wed, 15 Nov 2023 23:34:32 +0900 Subject: [PATCH 320/349] doc: use sphinx-rtd-theme (#575) --- .github/workflows/docs.yaml | 1 - .readthedocs.yaml | 1 + docs/conf.py | 4 +--- docs/requirements.txt | 2 ++ tox.ini | 2 +- 5 files changed, 5 insertions(+), 5 deletions(-) create mode 100644 docs/requirements.txt diff --git a/.github/workflows/docs.yaml b/.github/workflows/docs.yaml index 80bbba7d..8c8298a0 100644 --- a/.github/workflows/docs.yaml +++ b/.github/workflows/docs.yaml @@ -24,7 +24,6 @@ jobs: run: | pip install -r requirements.txt make cython - pip install . - name: Sphinx Documentation Generator run: | diff --git a/.readthedocs.yaml b/.readthedocs.yaml index 7447895d..88d87182 100644 --- a/.readthedocs.yaml +++ b/.readthedocs.yaml @@ -18,6 +18,7 @@ python: install: - method: pip path: . + - requirements: docs/requirements.txt sphinx: configuration: docs/conf.py diff --git a/docs/conf.py b/docs/conf.py index 6eb472af..ab0ad3c8 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -1,5 +1,3 @@ -# -*- coding: utf-8 -*- -# # msgpack documentation build configuration file, created by # sphinx-quickstart on Sun Feb 24 14:20:50 2013. # @@ -91,7 +89,7 @@ # The theme to use for HTML and HTML Help pages. See the documentation for # a list of builtin themes. -html_theme = "sphinxdoc" +html_theme = "sphinx_rtd_theme" # Theme options are theme-specific and customize the look and feel of a theme # further. For a list of options available for each theme, see the diff --git a/docs/requirements.txt b/docs/requirements.txt new file mode 100644 index 00000000..8d45d0b6 --- /dev/null +++ b/docs/requirements.txt @@ -0,0 +1,2 @@ +sphinx~=7.2 +sphinx-rtd-theme~=1.3.0 diff --git a/tox.ini b/tox.ini index 369eddcf..49364be9 100644 --- a/tox.ini +++ b/tox.ini @@ -33,6 +33,6 @@ commands= [testenv:sphinx] changedir = docs deps = - sphinx + -r docs/requirements.txt commands = sphinx-build -n -v -W --keep-going -b html -d {envtmpdir}/doctrees . {envtmpdir}/html From 140864249fd0f67dffaeceeb168ffe9cdf6f1964 Mon Sep 17 00:00:00 2001 From: Inada Naoki Date: Wed, 20 Dec 2023 20:46:04 +0900 Subject: [PATCH 321/349] exclude C/Cython files from wheel (#577) --- pyproject.toml | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/pyproject.toml b/pyproject.toml index a63009a7..121b1fd9 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -39,6 +39,10 @@ Repository = "https://github.com/msgpack/msgpack-python/" Tracker = "https://github.com/msgpack/msgpack-python/issues" Changelog = "https://github.com/msgpack/msgpack-python/blob/main/ChangeLog.rst" +[tool.setuptools] +# Do not install C/C++/Cython source files +include-package-data = false + [tool.setuptools.dynamic] version = {attr = "msgpack.__version__"} From 039022cecb04b62a29afb8260b81f57a937aaaaa Mon Sep 17 00:00:00 2001 From: Inada Naoki Date: Fri, 1 Mar 2024 19:24:06 +0900 Subject: [PATCH 322/349] update Cython (#581) --- .github/workflows/wheel.yml | 10 +++++++++- pyproject.toml | 2 +- requirements.txt | 2 +- 3 files changed, 11 insertions(+), 3 deletions(-) diff --git a/.github/workflows/wheel.yml b/.github/workflows/wheel.yml index 0412a385..4a8847d0 100644 --- a/.github/workflows/wheel.yml +++ b/.github/workflows/wheel.yml @@ -3,6 +3,7 @@ on: push: branches: [main] create: + workflow_dispatch: jobs: build_wheels: @@ -23,7 +24,7 @@ jobs: platforms: arm64 - name: Set up Python 3.x - uses: actions/setup-python@v4 + uses: actions/setup-python@v5 with: python-version: "3.x" cache: "pip" @@ -43,6 +44,13 @@ jobs: CIBW_ARCHS_MACOS: x86_64 universal2 arm64 CIBW_SKIP: pp* + - name: Build pure Python wheel + env: + MSGPACK_PUREPYTHON: "1" + run: | + pip install build + python -m build -w -o wheelhouse + - name: Upload Wheels to artifact uses: actions/upload-artifact@v1 with: diff --git a/pyproject.toml b/pyproject.toml index 121b1fd9..f9af967b 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -2,7 +2,7 @@ requires = [ # Also declared in requirements.txt, if updating here please also update # there - "Cython~=3.0.0", + "Cython~=3.0.8", "setuptools >= 35.0.2", ] build-backend = "setuptools.build_meta" diff --git a/requirements.txt b/requirements.txt index e27df0f4..839dc5f1 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,5 +1,5 @@ # Also declared in pyproject.toml, if updating here please also update there. -Cython~=3.0.0 +Cython~=3.0.8 # Tools required only for development. No need to add it to pyproject.toml file. black==23.3.0 From bf7bf88ad0b3cd7a8cd74e8251521fde743e9af9 Mon Sep 17 00:00:00 2001 From: Inada Naoki Date: Fri, 1 Mar 2024 20:09:55 +0900 Subject: [PATCH 323/349] ci: update workflows (#582) --- .github/workflows/test.yml | 14 +++++++++++++- .github/workflows/wheel.yml | 6 +++--- 2 files changed, 16 insertions(+), 4 deletions(-) diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 4eb8849d..1faeb0ca 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -20,7 +20,7 @@ jobs: uses: actions/checkout@v4 - name: Set up Python - uses: actions/setup-python@v4 + uses: actions/setup-python@v5 with: python-version: ${{ matrix.py }} allow-prereleases: true @@ -42,3 +42,15 @@ jobs: shell: bash run: | MSGPACK_PUREPYTHON=1 pytest -v test + + - name: build packages + shell: bash + run: | + pip install build + python -m build + + - name: upload packages + uses: actions/upload-artifact@v4 + with: + name: dist-${{ matrix.os }}-${{ matrix.py }} + path: dist diff --git a/.github/workflows/wheel.yml b/.github/workflows/wheel.yml index 4a8847d0..e91325be 100644 --- a/.github/workflows/wheel.yml +++ b/.github/workflows/wheel.yml @@ -36,7 +36,7 @@ jobs: make cython - name: Build - uses: pypa/cibuildwheel@v2.15.0 + uses: pypa/cibuildwheel@v2.16.5 env: CIBW_TEST_REQUIRES: "pytest" CIBW_TEST_COMMAND: "pytest {package}/test" @@ -52,7 +52,7 @@ jobs: python -m build -w -o wheelhouse - name: Upload Wheels to artifact - uses: actions/upload-artifact@v1 + uses: actions/upload-artifact@v4 with: - name: Wheels + name: wheels-${{ matrix.os }} path: wheelhouse From 9aedf8ed7f632044d42984e9710fefbd97023f71 Mon Sep 17 00:00:00 2001 From: Inada Naoki Date: Fri, 1 Mar 2024 20:35:28 +0900 Subject: [PATCH 324/349] Release v1.0.8 (#583) --- ChangeLog.rst | 10 ++++++++++ msgpack/__init__.py | 4 ++-- 2 files changed, 12 insertions(+), 2 deletions(-) diff --git a/ChangeLog.rst b/ChangeLog.rst index ca74ebe3..2408bc9f 100644 --- a/ChangeLog.rst +++ b/ChangeLog.rst @@ -1,3 +1,13 @@ +1.0.8 +===== + +Release Date: 2024-03-01 + +* Update Cython to 3.0.8. This fixes memory leak when iterating + ``Unpacker`` object on Python 3.12. +* Do not include C/Cython files in binary wheels. + + 1.0.7 ===== diff --git a/msgpack/__init__.py b/msgpack/__init__.py index 60a088cc..919b86f1 100644 --- a/msgpack/__init__.py +++ b/msgpack/__init__.py @@ -4,8 +4,8 @@ import os -version = (1, 0, 7) -__version__ = "1.0.7" +version = (1, 0, 8) +__version__ = "1.0.8" if os.environ.get("MSGPACK_PUREPYTHON"): From e77672200bf89f2093b96a4abd0e4eeb253975b2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?hakan=20aky=C3=BCrek?= Date: Sat, 20 Apr 2024 00:46:30 +0200 Subject: [PATCH 325/349] Avoid using floating points during timestamp-datetime conversions (#591) --- msgpack/ext.py | 6 ++++-- test/test_timestamp.py | 15 +++++++++++++++ 2 files changed, 19 insertions(+), 2 deletions(-) diff --git a/msgpack/ext.py b/msgpack/ext.py index 02c2c430..3940fe0f 100644 --- a/msgpack/ext.py +++ b/msgpack/ext.py @@ -157,7 +157,9 @@ def to_datetime(self): :rtype: `datetime.datetime` """ utc = datetime.timezone.utc - return datetime.datetime.fromtimestamp(0, utc) + datetime.timedelta(seconds=self.to_unix()) + return datetime.datetime.fromtimestamp(0, utc) + datetime.timedelta( + seconds=self.seconds, microseconds=self.nanoseconds // 1000 + ) @staticmethod def from_datetime(dt): @@ -165,4 +167,4 @@ def from_datetime(dt): :rtype: Timestamp """ - return Timestamp.from_unix(dt.timestamp()) + return Timestamp(seconds=int(dt.timestamp()), nanoseconds=dt.microsecond * 1000) diff --git a/test/test_timestamp.py b/test/test_timestamp.py index db5cc57a..f9bc8353 100644 --- a/test/test_timestamp.py +++ b/test/test_timestamp.py @@ -86,6 +86,21 @@ def test_timestamp_datetime(): utc = datetime.timezone.utc assert t.to_datetime() == datetime.datetime(1970, 1, 1, 0, 0, 42, 0, tzinfo=utc) + ts = datetime.datetime(2024, 4, 16, 8, 43, 9, 420317, tzinfo=utc) + ts2 = datetime.datetime(2024, 4, 16, 8, 43, 9, 420318, tzinfo=utc) + + assert ( + Timestamp.from_datetime(ts2).nanoseconds - Timestamp.from_datetime(ts).nanoseconds == 1000 + ) + + ts3 = datetime.datetime(2024, 4, 16, 8, 43, 9, 4256) + ts4 = datetime.datetime(2024, 4, 16, 8, 43, 9, 4257) + assert ( + Timestamp.from_datetime(ts4).nanoseconds - Timestamp.from_datetime(ts3).nanoseconds == 1000 + ) + + assert Timestamp.from_datetime(ts).to_datetime() == ts + def test_unpack_datetime(): t = Timestamp(42, 14) From 2eca765533106cc4bbb44ecc565b1034ac50f91b Mon Sep 17 00:00:00 2001 From: Inada Naoki Date: Fri, 3 May 2024 15:17:54 +0900 Subject: [PATCH 326/349] use ruff instead of black (#598) --- .github/workflows/docs.yaml | 17 +++++---- .github/workflows/{black.yaml => lint.yaml} | 19 +++++------ Makefile | 14 ++++++-- docs/requirements.txt | 4 +-- msgpack/fallback.py | 1 + pyproject.toml | 4 +-- requirements.txt | 7 +--- test/test_memoryview.py | 2 +- test/test_pack.py | 2 +- test/test_read_size.py | 1 + tox.ini | 38 --------------------- 11 files changed, 38 insertions(+), 71 deletions(-) rename .github/workflows/{black.yaml => lint.yaml} (56%) delete mode 100644 tox.ini diff --git a/.github/workflows/docs.yaml b/.github/workflows/docs.yaml index 8c8298a0..08fc2f46 100644 --- a/.github/workflows/docs.yaml +++ b/.github/workflows/docs.yaml @@ -10,22 +10,25 @@ jobs: runs-on: ubuntu-latest steps: + - name: Checkout + uses: actions/checkout@v4 + - name: Setup Python - uses: actions/setup-python@v4 + uses: actions/setup-python@v5 with: python-version: '3.x' architecture: 'x64' - - - name: Checkout - uses: actions/checkout@v4 + cache: "pip" + cache-dependency-path: | + requirements.txt + docs/requirements.txt - name: Build - shell: bash run: | pip install -r requirements.txt make cython - name: Sphinx Documentation Generator run: | - pip install tox - tox -e sphinx + pip install -r docs/requirements.txt + make docs diff --git a/.github/workflows/black.yaml b/.github/workflows/lint.yaml similarity index 56% rename from .github/workflows/black.yaml rename to .github/workflows/lint.yaml index e0917926..198cf7b5 100644 --- a/.github/workflows/black.yaml +++ b/.github/workflows/lint.yaml @@ -1,25 +1,22 @@ -name: Black +name: lint on: ["push", "pull_request"] jobs: - black: + lint: # We want to run on external PRs, but not on our own internal PRs as they'll be run # by the push to the branch. if: github.event_name == 'push' || github.event.pull_request.head.repo.full_name != github.repository runs-on: ubuntu-latest steps: - - name: Setup Python - uses: actions/setup-python@v4 - with: - python-version: '3.x' - architecture: 'x64' - - name: Checkout uses: actions/checkout@v4 - - name: Black Code Formatter + - name: ruff check + run: | + pipx run ruff check --diff msgpack/ test/ setup.py + + - name: ruff format run: | - pip install black==22.3.0 - black -S --diff --check msgpack/ test/ setup.py + pipx run ruff format --diff msgpack/ test/ setup.py diff --git a/Makefile b/Makefile index e4f22da4..5c1863c7 100644 --- a/Makefile +++ b/Makefile @@ -4,9 +4,17 @@ PYTHON_SOURCES = msgpack test setup.py all: cython python setup.py build_ext -i -f -.PHONY: black -black: - black $(PYTHON_SOURCES) +.PHONY: format +format: + pipx run ruff format $(PYTHON_SOURCES) + +.PHONY: lint +lint: + pipx run ruff check $(PYTHON_SOURCES) + +.PHONY: doc +doc: + cd docs && sphinx-build -n -v -W --keep-going -b html -d doctrees . html .PHONY: pyupgrade pyupgrade: diff --git a/docs/requirements.txt b/docs/requirements.txt index 8d45d0b6..26002de4 100644 --- a/docs/requirements.txt +++ b/docs/requirements.txt @@ -1,2 +1,2 @@ -sphinx~=7.2 -sphinx-rtd-theme~=1.3.0 +sphinx~=7.3.7 +sphinx-rtd-theme~=2.0.0 diff --git a/msgpack/fallback.py b/msgpack/fallback.py index a174162a..ea4c4ced 100644 --- a/msgpack/fallback.py +++ b/msgpack/fallback.py @@ -1,4 +1,5 @@ """Fallback pure Python implementation of msgpack""" + from datetime import datetime as _DateTime import sys import struct diff --git a/pyproject.toml b/pyproject.toml index f9af967b..d99375ac 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -54,9 +54,9 @@ skip_string_normalization = true [tool.ruff] line-length = 100 target-version = "py38" -ignore = [] +lint.ignore = [] -[tool.ruff.per-file-ignores] +[tool.ruff.lint.per-file-ignores] "msgpack/__init__.py" = ["F401", "F403"] "msgpack/fallback.py" = ["E731"] "test/test_seq.py" = ["E501"] diff --git a/requirements.txt b/requirements.txt index 839dc5f1..1164a941 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,7 +1,2 @@ # Also declared in pyproject.toml, if updating here please also update there. -Cython~=3.0.8 - -# Tools required only for development. No need to add it to pyproject.toml file. -black==23.3.0 -pytest==7.3.1 -pyupgrade==3.3.2 +Cython~=3.0.10 diff --git a/test/test_memoryview.py b/test/test_memoryview.py index dc319a63..eff4bca0 100644 --- a/test/test_memoryview.py +++ b/test/test_memoryview.py @@ -95,4 +95,4 @@ def test_multidim_memoryview(): view = memoryview(b"\00" * 6) data = view.cast(view.format, (3, 2)) packed = packb(data) - assert packed == b'\xc4\x06\x00\x00\x00\x00\x00\x00' + assert packed == b"\xc4\x06\x00\x00\x00\x00\x00\x00" diff --git a/test/test_pack.py b/test/test_pack.py index 42325378..4a0ef403 100644 --- a/test/test_pack.py +++ b/test/test_pack.py @@ -89,7 +89,7 @@ def testStrictUnicodeUnpack(): def testIgnoreErrorsPack(): re = unpackb( - packb("abc\uDC80\uDCFFdef", use_bin_type=True, unicode_errors="ignore"), + packb("abc\udc80\udcffdef", use_bin_type=True, unicode_errors="ignore"), raw=False, use_list=1, ) diff --git a/test/test_read_size.py b/test/test_read_size.py index 33a7e7dd..a7d61fd9 100644 --- a/test/test_read_size.py +++ b/test/test_read_size.py @@ -1,4 +1,5 @@ """Test Unpacker's read_array_header and read_map_header methods""" + from msgpack import packb, Unpacker, OutOfData UnexpectedTypeException = ValueError diff --git a/tox.ini b/tox.ini deleted file mode 100644 index 49364be9..00000000 --- a/tox.ini +++ /dev/null @@ -1,38 +0,0 @@ -[tox] -envlist = - {py35,py36,py37,py38}-{c,pure}, - {pypy,pypy3}-pure, - py34-x86, - sphinx, -isolated_build = true - -[testenv] -deps= - pytest - -changedir=test -commands= - c,x86: python -c 'from msgpack import _cmsgpack' - c,x86: py.test - pure: py.test -setenv= - pure: MSGPACK_PUREPYTHON=x - -[testenv:py34-x86] -basepython=python3.4-x86 -deps= - pytest - -changedir=test -commands= - python -c 'import sys; print(hex(sys.maxsize))' - python -c 'from msgpack import _cmsgpack' - py.test - - -[testenv:sphinx] -changedir = docs -deps = - -r docs/requirements.txt -commands = - sphinx-build -n -v -W --keep-going -b html -d {envtmpdir}/doctrees . {envtmpdir}/html From 0602baf3ea7fb597d1c78a90980071e03a536836 Mon Sep 17 00:00:00 2001 From: Inada Naoki Date: Fri, 3 May 2024 18:20:09 +0900 Subject: [PATCH 327/349] update Cython and setuptools (#599) --- DEVELOP.md | 8 -------- pyproject.toml | 7 +++---- 2 files changed, 3 insertions(+), 12 deletions(-) diff --git a/DEVELOP.md b/DEVELOP.md index 9c823c34..27adf8c0 100644 --- a/DEVELOP.md +++ b/DEVELOP.md @@ -1,13 +1,5 @@ # Developer's note -## Wheels - -Wheels for macOS and Linux are built on Travis and AppVeyr, in -[methane/msgpack-wheels](https://github.com/methane/msgpack-wheels) repository. - -Wheels for Windows are built on Github Actions in this repository. - - ### Build ``` diff --git a/pyproject.toml b/pyproject.toml index d99375ac..6254f06f 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,9 +1,8 @@ [build-system] requires = [ - # Also declared in requirements.txt, if updating here please also update - # there - "Cython~=3.0.8", - "setuptools >= 35.0.2", + # Also declared in requirements.txt, if updating here please also update there + "Cython~=3.0.10", + "setuptools >= 69.5.1", ] build-backend = "setuptools.build_meta" From 3e9a2a7419714c294be0590aab24f2dc040581f5 Mon Sep 17 00:00:00 2001 From: Inada Naoki Date: Sat, 4 May 2024 16:01:48 +0900 Subject: [PATCH 328/349] Stop using c++ (#600) Python 3.13a6+ & C++ & Cython cause compile error on some compilers. --- .github/workflows/test.yml | 2 +- Makefile | 2 +- msgpack/_unpacker.pyx | 2 +- msgpack/pack.h | 2 + msgpack/unpack_container_header.h | 51 +++++++++++++++++++ msgpack/unpack_template.h | 85 ++++++++----------------------- setup.py | 6 +-- 7 files changed, 81 insertions(+), 69 deletions(-) create mode 100644 msgpack/unpack_container_header.h diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 1faeb0ca..530238c9 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -10,7 +10,7 @@ jobs: strategy: matrix: os: ["ubuntu-latest", "windows-latest", "macos-latest"] - py: ["3.12", "3.11", "3.10", "3.9", "3.8"] + py: ["3.13-dev", "3.12", "3.11", "3.10", "3.9", "3.8"] runs-on: ${{ matrix.os }} name: Run test with Python ${{ matrix.py }} on ${{ matrix.os }} diff --git a/Makefile b/Makefile index 5c1863c7..3ce178f2 100644 --- a/Makefile +++ b/Makefile @@ -22,7 +22,7 @@ pyupgrade: .PHONY: cython cython: - cython --cplus msgpack/_cmsgpack.pyx + cython msgpack/_cmsgpack.pyx .PHONY: test test: cython diff --git a/msgpack/_unpacker.pyx b/msgpack/_unpacker.pyx index 56126f43..2771e7bf 100644 --- a/msgpack/_unpacker.pyx +++ b/msgpack/_unpacker.pyx @@ -35,7 +35,7 @@ cdef extern from "unpack.h": PyObject* timestamp_t PyObject *giga; PyObject *utc; - char *unicode_errors + const char *unicode_errors Py_ssize_t max_str_len Py_ssize_t max_bin_len Py_ssize_t max_array_len diff --git a/msgpack/pack.h b/msgpack/pack.h index 2453428c..901fad7b 100644 --- a/msgpack/pack.h +++ b/msgpack/pack.h @@ -24,6 +24,8 @@ #ifdef __cplusplus extern "C" { +#else +#define bool char #endif typedef struct msgpack_packer { diff --git a/msgpack/unpack_container_header.h b/msgpack/unpack_container_header.h new file mode 100644 index 00000000..c14a3c2b --- /dev/null +++ b/msgpack/unpack_container_header.h @@ -0,0 +1,51 @@ +static inline int unpack_container_header(unpack_context* ctx, const char* data, Py_ssize_t len, Py_ssize_t* off) +{ + assert(len >= *off); + uint32_t size; + const unsigned char *const p = (unsigned char*)data + *off; + +#define inc_offset(inc) \ + if (len - *off < inc) \ + return 0; \ + *off += inc; + + switch (*p) { + case var_offset: + inc_offset(3); + size = _msgpack_load16(uint16_t, p + 1); + break; + case var_offset + 1: + inc_offset(5); + size = _msgpack_load32(uint32_t, p + 1); + break; +#ifdef USE_CASE_RANGE + case fixed_offset + 0x0 ... fixed_offset + 0xf: +#else + case fixed_offset + 0x0: + case fixed_offset + 0x1: + case fixed_offset + 0x2: + case fixed_offset + 0x3: + case fixed_offset + 0x4: + case fixed_offset + 0x5: + case fixed_offset + 0x6: + case fixed_offset + 0x7: + case fixed_offset + 0x8: + case fixed_offset + 0x9: + case fixed_offset + 0xa: + case fixed_offset + 0xb: + case fixed_offset + 0xc: + case fixed_offset + 0xd: + case fixed_offset + 0xe: + case fixed_offset + 0xf: +#endif + ++*off; + size = ((unsigned int)*p) & 0x0f; + break; + default: + PyErr_SetString(PyExc_ValueError, "Unexpected type header on stream"); + return -1; + } + unpack_callback_uint32(&ctx->user, size, &ctx->stack[0].obj); + return 1; +} + diff --git a/msgpack/unpack_template.h b/msgpack/unpack_template.h index 8b9fcc19..cce29e7a 100644 --- a/msgpack/unpack_template.h +++ b/msgpack/unpack_template.h @@ -75,8 +75,7 @@ static inline void unpack_clear(unpack_context *ctx) Py_CLEAR(ctx->stack[0].obj); } -template -static inline int unpack_execute(unpack_context* ctx, const char* data, Py_ssize_t len, Py_ssize_t* off) +static inline int unpack_execute(bool construct, unpack_context* ctx, const char* data, Py_ssize_t len, Py_ssize_t* off) { assert(len >= *off); @@ -386,6 +385,7 @@ static inline int unpack_execute(unpack_context* ctx, const char* data, Py_ssize #undef construct_cb } +#undef NEXT_CS #undef SWITCH_RANGE_BEGIN #undef SWITCH_RANGE #undef SWITCH_RANGE_DEFAULT @@ -397,68 +397,27 @@ static inline int unpack_execute(unpack_context* ctx, const char* data, Py_ssize #undef again_fixed_trail_if_zero #undef start_container -template -static inline int unpack_container_header(unpack_context* ctx, const char* data, Py_ssize_t len, Py_ssize_t* off) -{ - assert(len >= *off); - uint32_t size; - const unsigned char *const p = (unsigned char*)data + *off; - -#define inc_offset(inc) \ - if (len - *off < inc) \ - return 0; \ - *off += inc; - - switch (*p) { - case var_offset: - inc_offset(3); - size = _msgpack_load16(uint16_t, p + 1); - break; - case var_offset + 1: - inc_offset(5); - size = _msgpack_load32(uint32_t, p + 1); - break; -#ifdef USE_CASE_RANGE - case fixed_offset + 0x0 ... fixed_offset + 0xf: -#else - case fixed_offset + 0x0: - case fixed_offset + 0x1: - case fixed_offset + 0x2: - case fixed_offset + 0x3: - case fixed_offset + 0x4: - case fixed_offset + 0x5: - case fixed_offset + 0x6: - case fixed_offset + 0x7: - case fixed_offset + 0x8: - case fixed_offset + 0x9: - case fixed_offset + 0xa: - case fixed_offset + 0xb: - case fixed_offset + 0xc: - case fixed_offset + 0xd: - case fixed_offset + 0xe: - case fixed_offset + 0xf: -#endif - ++*off; - size = ((unsigned int)*p) & 0x0f; - break; - default: - PyErr_SetString(PyExc_ValueError, "Unexpected type header on stream"); - return -1; - } - unpack_callback_uint32(&ctx->user, size, &ctx->stack[0].obj); - return 1; +static int unpack_construct(unpack_context *ctx, const char *data, Py_ssize_t len, Py_ssize_t *off) { + return unpack_execute(1, ctx, data, len, off); +} +static int unpack_skip(unpack_context *ctx, const char *data, Py_ssize_t len, Py_ssize_t *off) { + return unpack_execute(0, ctx, data, len, off); } -#undef SWITCH_RANGE_BEGIN -#undef SWITCH_RANGE -#undef SWITCH_RANGE_DEFAULT -#undef SWITCH_RANGE_END - -static const execute_fn unpack_construct = &unpack_execute; -static const execute_fn unpack_skip = &unpack_execute; -static const execute_fn read_array_header = &unpack_container_header<0x90, 0xdc>; -static const execute_fn read_map_header = &unpack_container_header<0x80, 0xde>; - -#undef NEXT_CS +#define unpack_container_header read_array_header +#define fixed_offset 0x90 +#define var_offset 0xdc +#include "unpack_container_header.h" +#undef unpack_container_header +#undef fixed_offset +#undef var_offset + +#define unpack_container_header read_map_header +#define fixed_offset 0x80 +#define var_offset 0xde +#include "unpack_container_header.h" +#undef unpack_container_header +#undef fixed_offset +#undef var_offset /* vim: set ts=4 sw=4 sts=4 expandtab */ diff --git a/setup.py b/setup.py index a13bd81d..dc14a26e 100644 --- a/setup.py +++ b/setup.py @@ -25,7 +25,7 @@ def cythonize(src): if not have_cython: raise Exception("Cython is required for building from checkout") sys.stderr.write(f"cythonize: {src!r}\n") - cython_compiler.compile([src], cplus=True) + cython_compiler.compile([src]) def ensure_source(src): @@ -51,17 +51,17 @@ def __init__(self, *args, **kwargs): libraries = [] macros = [] +ext_modules = [] if sys.platform == "win32": libraries.append("ws2_32") macros = [("__LITTLE_ENDIAN__", "1")] -ext_modules = [] if not PYPY and not os.environ.get("MSGPACK_PUREPYTHON"): ext_modules.append( Extension( "msgpack._cmsgpack", - sources=["msgpack/_cmsgpack.cpp"], + sources=["msgpack/_cmsgpack.c"], libraries=libraries, include_dirs=["."], define_macros=macros, From b389ccf2f72355e23836ced193b555401508ef81 Mon Sep 17 00:00:00 2001 From: Inada Naoki Date: Sat, 4 May 2024 16:10:37 +0900 Subject: [PATCH 329/349] update README (#561) --- README.md | 116 +++++++++++++++++++++++------------------------------- 1 file changed, 50 insertions(+), 66 deletions(-) diff --git a/README.md b/README.md index 61f99e1f..d3247bdf 100644 --- a/README.md +++ b/README.md @@ -10,53 +10,6 @@ It lets you exchange data among multiple languages like JSON. But it's faster and smaller. This package provides CPython bindings for reading and writing MessagePack data. - -## Very important notes for existing users - -### PyPI package name - -Package name on PyPI was changed from `msgpack-python` to `msgpack` from 0.5. - -When upgrading from msgpack-0.4 or earlier, do `pip uninstall msgpack-python` before -`pip install -U msgpack`. - - -### Compatibility with the old format - -You can use `use_bin_type=False` option to pack `bytes` -object into raw type in the old msgpack spec, instead of bin type in new msgpack spec. - -You can unpack old msgpack format using `raw=True` option. -It unpacks str (raw) type in msgpack into Python bytes. - -See note below for detail. - - -### Major breaking changes in msgpack 1.0 - -* Python 2 - - * The extension module does not support Python 2 anymore. - The pure Python implementation (`msgpack.fallback`) is used for Python 2. - -* Packer - - * `use_bin_type=True` by default. bytes are encoded in bin type in msgpack. - **If you are still using Python 2, you must use unicode for all string types.** - You can use `use_bin_type=False` to encode into old msgpack format. - * `encoding` option is removed. UTF-8 is used always. - -* Unpacker - - * `raw=False` by default. It assumes str types are valid UTF-8 string - and decode them to Python str (unicode) object. - * `encoding` option is removed. You can use `raw=True` to support old format. - * Default value of `max_buffer_size` is changed from 0 to 100 MiB. - * Default value of `strict_map_key` is changed to True to avoid hashdos. - You need to pass `strict_map_key=False` if you have data which contain map keys - which type is not bytes or str. - - ## Install ``` @@ -65,12 +18,9 @@ $ pip install msgpack ### Pure Python implementation -The extension module in msgpack (`msgpack._cmsgpack`) does not support -Python 2 and PyPy. - -But msgpack provides a pure Python implementation (`msgpack.fallback`) -for PyPy and Python 2. +The extension module in msgpack (`msgpack._cmsgpack`) does not support PyPy. +But msgpack provides a pure Python implementation (`msgpack.fallback`) for PyPy. ### Windows @@ -82,10 +32,6 @@ Without extension, using pure Python implementation on CPython runs slowly. ## How to use -NOTE: In examples below, I use `raw=False` and `use_bin_type=True` for users -using msgpack < 1.0. These options are default from msgpack 1.0 so you can omit them. - - ### One-shot pack & unpack Use `packb` for packing and `unpackb` for unpacking. @@ -97,16 +43,16 @@ msgpack provides `dumps` and `loads` as an alias for compatibility with ```pycon >>> import msgpack ->>> msgpack.packb([1, 2, 3], use_bin_type=True) +>>> msgpack.packb([1, 2, 3]) '\x93\x01\x02\x03' ->>> msgpack.unpackb(_, raw=False) +>>> msgpack.unpackb(_) [1, 2, 3] ``` `unpack` unpacks msgpack's array to Python's list, but can also unpack to tuple: ```pycon ->>> msgpack.unpackb(b'\x93\x01\x02\x03', use_list=False, raw=False) +>>> msgpack.unpackb(b'\x93\x01\x02\x03', use_list=False) (1, 2, 3) ``` @@ -127,11 +73,11 @@ from io import BytesIO buf = BytesIO() for i in range(100): - buf.write(msgpack.packb(i, use_bin_type=True)) + buf.write(msgpack.packb(i)) buf.seek(0) -unpacker = msgpack.Unpacker(buf, raw=False) +unpacker = msgpack.Unpacker(buf) for unpacked in unpacker: print(unpacked) ``` @@ -162,8 +108,8 @@ def encode_datetime(obj): return obj -packed_dict = msgpack.packb(useful_dict, default=encode_datetime, use_bin_type=True) -this_dict_again = msgpack.unpackb(packed_dict, object_hook=decode_datetime, raw=False) +packed_dict = msgpack.packb(useful_dict, default=encode_datetime) +this_dict_again = msgpack.unpackb(packed_dict, object_hook=decode_datetime) ``` `Unpacker`'s `object_hook` callback receives a dict; the @@ -191,8 +137,8 @@ It is also possible to pack/unpack custom data types using the **ext** type. ... return ExtType(code, data) ... >>> data = array.array('d', [1.2, 3.4]) ->>> packed = msgpack.packb(data, default=default, use_bin_type=True) ->>> unpacked = msgpack.unpackb(packed, ext_hook=ext_hook, raw=False) +>>> packed = msgpack.packb(data, default=default) +>>> unpacked = msgpack.unpackb(packed, ext_hook=ext_hook) >>> data == unpacked True ``` @@ -210,7 +156,7 @@ in a map, can be unpacked or skipped individually. ## Notes -### string and binary type +### string and binary type in old msgpack spec Early versions of msgpack didn't distinguish string and binary types. The type for representing both string and binary types was named **raw**. @@ -263,3 +209,41 @@ You can use `gc.disable()` when unpacking large message. List is the default sequence type of Python. But tuple is lighter than list. You can use `use_list=False` while unpacking when performance is important. + + +## Major breaking changes in the history + +### msgpack 0.5 + +Package name on PyPI was changed from `msgpack-python` to `msgpack` from 0.5. + +When upgrading from msgpack-0.4 or earlier, do `pip uninstall msgpack-python` before +`pip install -U msgpack`. + + +### msgpack 1.0 + +* Python 2 support + + * The extension module does not support Python 2 anymore. + The pure Python implementation (`msgpack.fallback`) is used for Python 2. + + * msgpack 1.0.6 drops official support of Python 2.7, as pip and + GitHub Action (setup-python) no longer support Python 2.7. + +* Packer + + * Packer uses `use_bin_type=True` by default. + Bytes are encoded in bin type in msgpack. + * The `encoding` option is removed. UTF-8 is used always. + +* Unpacker + + * Unpacker uses `raw=False` by default. It assumes str types are valid UTF-8 string + and decode them to Python str (unicode) object. + * `encoding` option is removed. You can use `raw=True` to support old format (e.g. unpack into bytes, not str). + * Default value of `max_buffer_size` is changed from 0 to 100 MiB to avoid DoS attack. + You need to pass `max_buffer_size=0` if you have large but safe data. + * Default value of `strict_map_key` is changed to True to avoid hashdos. + You need to pass `strict_map_key=False` if you have data which contain map keys + which type is not bytes or str. From 526ec9c923c4867c96537c1f09783fe59432f737 Mon Sep 17 00:00:00 2001 From: Inada Naoki Date: Sat, 4 May 2024 16:49:22 +0900 Subject: [PATCH 330/349] update cibuildwheel to 2.17 (#601) --- .github/workflows/docs.yaml | 1 - .github/workflows/wheel.yml | 15 ++++++--------- 2 files changed, 6 insertions(+), 10 deletions(-) diff --git a/.github/workflows/docs.yaml b/.github/workflows/docs.yaml index 08fc2f46..b696b926 100644 --- a/.github/workflows/docs.yaml +++ b/.github/workflows/docs.yaml @@ -17,7 +17,6 @@ jobs: uses: actions/setup-python@v5 with: python-version: '3.x' - architecture: 'x64' cache: "pip" cache-dependency-path: | requirements.txt diff --git a/.github/workflows/wheel.yml b/.github/workflows/wheel.yml index e91325be..d57e0586 100644 --- a/.github/workflows/wheel.yml +++ b/.github/workflows/wheel.yml @@ -14,29 +14,25 @@ jobs: name: Build wheels on ${{ matrix.os }} steps: - - name: Checkout - uses: actions/checkout@v4 - - name: Set up QEMU if: runner.os == 'Linux' uses: docker/setup-qemu-action@v3 with: - platforms: arm64 + platforms: all - - name: Set up Python 3.x - uses: actions/setup-python@v5 + - uses: actions/checkout@v4 + - uses: actions/setup-python@v5 with: python-version: "3.x" cache: "pip" - - - name: Prepare + - name: Cythonize shell: bash run: | pip install -r requirements.txt make cython - name: Build - uses: pypa/cibuildwheel@v2.16.5 + uses: pypa/cibuildwheel@v2.17.0 env: CIBW_TEST_REQUIRES: "pytest" CIBW_TEST_COMMAND: "pytest {package}/test" @@ -45,6 +41,7 @@ jobs: CIBW_SKIP: pp* - name: Build pure Python wheel + if: runner.os == 'Linux' env: MSGPACK_PUREPYTHON: "1" run: | From 52f8bc2e557ca6684f2f73d129da8317a88bc431 Mon Sep 17 00:00:00 2001 From: Inada Naoki Date: Sun, 5 May 2024 23:14:27 +0900 Subject: [PATCH 331/349] implement buffer protocol (#602) Fix #479 --- msgpack/_packer.pyx | 10 +++++++--- msgpack/buff_converter.h | 8 -------- 2 files changed, 7 insertions(+), 11 deletions(-) delete mode 100644 msgpack/buff_converter.h diff --git a/msgpack/_packer.pyx b/msgpack/_packer.pyx index 3c398672..c2018806 100644 --- a/msgpack/_packer.pyx +++ b/msgpack/_packer.pyx @@ -44,8 +44,6 @@ cdef extern from "pack.h": int msgpack_pack_timestamp(msgpack_packer* x, long long seconds, unsigned long nanoseconds); int msgpack_pack_unicode(msgpack_packer* pk, object o, long long limit) -cdef extern from "buff_converter.h": - object buff_to_buff(char *, Py_ssize_t) cdef int DEFAULT_RECURSE_LIMIT=511 cdef long long ITEM_LIMIT = (2**32)-1 @@ -371,4 +369,10 @@ cdef class Packer(object): def getbuffer(self): """Return view of internal buffer.""" - return buff_to_buff(self.pk.buf, self.pk.length) + return memoryview(self) + + def __getbuffer__(self, Py_buffer *buffer, int flags): + PyBuffer_FillInfo(buffer, self, self.pk.buf, self.pk.length, 1, flags) + + def __releasebuffer__(self, Py_buffer *buffer): + pass diff --git a/msgpack/buff_converter.h b/msgpack/buff_converter.h deleted file mode 100644 index 86b4196d..00000000 --- a/msgpack/buff_converter.h +++ /dev/null @@ -1,8 +0,0 @@ -#include "Python.h" - -/* cython does not support this preprocessor check => write it in raw C */ -static PyObject * -buff_to_buff(char *buff, Py_ssize_t size) -{ - return PyMemoryView_FromMemory(buff, size, PyBUF_READ); -} From a97b31437d3301fa4c29b7813dabee0e690756e8 Mon Sep 17 00:00:00 2001 From: Inada Naoki Date: Mon, 6 May 2024 00:13:59 +0900 Subject: [PATCH 332/349] Remove unused code (#603) --- msgpack/_packer.pyx | 7 -- msgpack/pack.h | 3 +- msgpack/pack_template.h | 238 ++-------------------------------------- 3 files changed, 8 insertions(+), 240 deletions(-) diff --git a/msgpack/_packer.pyx b/msgpack/_packer.pyx index c2018806..c7eeda11 100644 --- a/msgpack/_packer.pyx +++ b/msgpack/_packer.pyx @@ -16,8 +16,6 @@ from .ext import ExtType, Timestamp cdef extern from "Python.h": int PyMemoryView_Check(object obj) - char* PyUnicode_AsUTF8AndSize(object obj, Py_ssize_t *l) except NULL - cdef extern from "pack.h": struct msgpack_packer: @@ -26,11 +24,9 @@ cdef extern from "pack.h": size_t buf_size bint use_bin_type - int msgpack_pack_int(msgpack_packer* pk, int d) int msgpack_pack_nil(msgpack_packer* pk) int msgpack_pack_true(msgpack_packer* pk) int msgpack_pack_false(msgpack_packer* pk) - int msgpack_pack_long(msgpack_packer* pk, long d) int msgpack_pack_long_long(msgpack_packer* pk, long long d) int msgpack_pack_unsigned_long_long(msgpack_packer* pk, unsigned long long d) int msgpack_pack_float(msgpack_packer* pk, float d) @@ -184,9 +180,6 @@ cdef class Packer(object): continue else: raise OverflowError("Integer value out of range") - elif PyInt_CheckExact(o) if strict_types else PyInt_Check(o): - longval = o - ret = msgpack_pack_long(&self.pk, longval) elif PyFloat_CheckExact(o) if strict_types else PyFloat_Check(o): if self.use_float: fval = o diff --git a/msgpack/pack.h b/msgpack/pack.h index 901fad7b..688eab86 100644 --- a/msgpack/pack.h +++ b/msgpack/pack.h @@ -21,11 +21,10 @@ #include "sysdep.h" #include #include +#include #ifdef __cplusplus extern "C" { -#else -#define bool char #endif typedef struct msgpack_packer { diff --git a/msgpack/pack_template.h b/msgpack/pack_template.h index 7d479b6d..b8959f02 100644 --- a/msgpack/pack_template.h +++ b/msgpack/pack_template.h @@ -37,18 +37,6 @@ * Integer */ -#define msgpack_pack_real_uint8(x, d) \ -do { \ - if(d < (1<<7)) { \ - /* fixnum */ \ - msgpack_pack_append_buffer(x, &TAKE8_8(d), 1); \ - } else { \ - /* unsigned 8 */ \ - unsigned char buf[2] = {0xcc, TAKE8_8(d)}; \ - msgpack_pack_append_buffer(x, buf, 2); \ - } \ -} while(0) - #define msgpack_pack_real_uint16(x, d) \ do { \ if(d < (1<<7)) { \ @@ -123,18 +111,6 @@ do { \ } \ } while(0) -#define msgpack_pack_real_int8(x, d) \ -do { \ - if(d < -(1<<5)) { \ - /* signed 8 */ \ - unsigned char buf[2] = {0xd0, TAKE8_8(d)}; \ - msgpack_pack_append_buffer(x, buf, 2); \ - } else { \ - /* fixnum */ \ - msgpack_pack_append_buffer(x, &TAKE8_8(d), 1); \ - } \ -} while(0) - #define msgpack_pack_real_int16(x, d) \ do { \ if(d < -(1<<5)) { \ @@ -264,49 +240,6 @@ do { \ } while(0) -static inline int msgpack_pack_uint8(msgpack_packer* x, uint8_t d) -{ - msgpack_pack_real_uint8(x, d); -} - -static inline int msgpack_pack_uint16(msgpack_packer* x, uint16_t d) -{ - msgpack_pack_real_uint16(x, d); -} - -static inline int msgpack_pack_uint32(msgpack_packer* x, uint32_t d) -{ - msgpack_pack_real_uint32(x, d); -} - -static inline int msgpack_pack_uint64(msgpack_packer* x, uint64_t d) -{ - msgpack_pack_real_uint64(x, d); -} - -static inline int msgpack_pack_int8(msgpack_packer* x, int8_t d) -{ - msgpack_pack_real_int8(x, d); -} - -static inline int msgpack_pack_int16(msgpack_packer* x, int16_t d) -{ - msgpack_pack_real_int16(x, d); -} - -static inline int msgpack_pack_int32(msgpack_packer* x, int32_t d) -{ - msgpack_pack_real_int32(x, d); -} - -static inline int msgpack_pack_int64(msgpack_packer* x, int64_t d) -{ - msgpack_pack_real_int64(x, d); -} - - -//#ifdef msgpack_pack_inline_func_cint - static inline int msgpack_pack_short(msgpack_packer* x, short d) { #if defined(SIZEOF_SHORT) @@ -372,192 +305,37 @@ if(sizeof(int) == 2) { static inline int msgpack_pack_long(msgpack_packer* x, long d) { #if defined(SIZEOF_LONG) -#if SIZEOF_LONG == 2 - msgpack_pack_real_int16(x, d); -#elif SIZEOF_LONG == 4 +#if SIZEOF_LONG == 4 msgpack_pack_real_int32(x, d); #else msgpack_pack_real_int64(x, d); #endif #elif defined(LONG_MAX) -#if LONG_MAX == 0x7fffL - msgpack_pack_real_int16(x, d); -#elif LONG_MAX == 0x7fffffffL +#if LONG_MAX == 0x7fffffffL msgpack_pack_real_int32(x, d); #else msgpack_pack_real_int64(x, d); #endif #else -if(sizeof(long) == 2) { - msgpack_pack_real_int16(x, d); -} else if(sizeof(long) == 4) { - msgpack_pack_real_int32(x, d); -} else { - msgpack_pack_real_int64(x, d); -} + if (sizeof(long) == 4) { + msgpack_pack_real_int32(x, d); + } else { + msgpack_pack_real_int64(x, d); + } #endif } static inline int msgpack_pack_long_long(msgpack_packer* x, long long d) { -#if defined(SIZEOF_LONG_LONG) -#if SIZEOF_LONG_LONG == 2 - msgpack_pack_real_int16(x, d); -#elif SIZEOF_LONG_LONG == 4 - msgpack_pack_real_int32(x, d); -#else - msgpack_pack_real_int64(x, d); -#endif - -#elif defined(LLONG_MAX) -#if LLONG_MAX == 0x7fffL - msgpack_pack_real_int16(x, d); -#elif LLONG_MAX == 0x7fffffffL - msgpack_pack_real_int32(x, d); -#else - msgpack_pack_real_int64(x, d); -#endif - -#else -if(sizeof(long long) == 2) { - msgpack_pack_real_int16(x, d); -} else if(sizeof(long long) == 4) { - msgpack_pack_real_int32(x, d); -} else { msgpack_pack_real_int64(x, d); } -#endif -} - -static inline int msgpack_pack_unsigned_short(msgpack_packer* x, unsigned short d) -{ -#if defined(SIZEOF_SHORT) -#if SIZEOF_SHORT == 2 - msgpack_pack_real_uint16(x, d); -#elif SIZEOF_SHORT == 4 - msgpack_pack_real_uint32(x, d); -#else - msgpack_pack_real_uint64(x, d); -#endif - -#elif defined(USHRT_MAX) -#if USHRT_MAX == 0xffffU - msgpack_pack_real_uint16(x, d); -#elif USHRT_MAX == 0xffffffffU - msgpack_pack_real_uint32(x, d); -#else - msgpack_pack_real_uint64(x, d); -#endif - -#else -if(sizeof(unsigned short) == 2) { - msgpack_pack_real_uint16(x, d); -} else if(sizeof(unsigned short) == 4) { - msgpack_pack_real_uint32(x, d); -} else { - msgpack_pack_real_uint64(x, d); -} -#endif -} - -static inline int msgpack_pack_unsigned_int(msgpack_packer* x, unsigned int d) -{ -#if defined(SIZEOF_INT) -#if SIZEOF_INT == 2 - msgpack_pack_real_uint16(x, d); -#elif SIZEOF_INT == 4 - msgpack_pack_real_uint32(x, d); -#else - msgpack_pack_real_uint64(x, d); -#endif - -#elif defined(UINT_MAX) -#if UINT_MAX == 0xffffU - msgpack_pack_real_uint16(x, d); -#elif UINT_MAX == 0xffffffffU - msgpack_pack_real_uint32(x, d); -#else - msgpack_pack_real_uint64(x, d); -#endif - -#else -if(sizeof(unsigned int) == 2) { - msgpack_pack_real_uint16(x, d); -} else if(sizeof(unsigned int) == 4) { - msgpack_pack_real_uint32(x, d); -} else { - msgpack_pack_real_uint64(x, d); -} -#endif -} - -static inline int msgpack_pack_unsigned_long(msgpack_packer* x, unsigned long d) -{ -#if defined(SIZEOF_LONG) -#if SIZEOF_LONG == 2 - msgpack_pack_real_uint16(x, d); -#elif SIZEOF_LONG == 4 - msgpack_pack_real_uint32(x, d); -#else - msgpack_pack_real_uint64(x, d); -#endif - -#elif defined(ULONG_MAX) -#if ULONG_MAX == 0xffffUL - msgpack_pack_real_uint16(x, d); -#elif ULONG_MAX == 0xffffffffUL - msgpack_pack_real_uint32(x, d); -#else - msgpack_pack_real_uint64(x, d); -#endif - -#else -if(sizeof(unsigned long) == 2) { - msgpack_pack_real_uint16(x, d); -} else if(sizeof(unsigned long) == 4) { - msgpack_pack_real_uint32(x, d); -} else { - msgpack_pack_real_uint64(x, d); -} -#endif -} static inline int msgpack_pack_unsigned_long_long(msgpack_packer* x, unsigned long long d) { -#if defined(SIZEOF_LONG_LONG) -#if SIZEOF_LONG_LONG == 2 - msgpack_pack_real_uint16(x, d); -#elif SIZEOF_LONG_LONG == 4 - msgpack_pack_real_uint32(x, d); -#else - msgpack_pack_real_uint64(x, d); -#endif - -#elif defined(ULLONG_MAX) -#if ULLONG_MAX == 0xffffUL - msgpack_pack_real_uint16(x, d); -#elif ULLONG_MAX == 0xffffffffUL - msgpack_pack_real_uint32(x, d); -#else - msgpack_pack_real_uint64(x, d); -#endif - -#else -if(sizeof(unsigned long long) == 2) { - msgpack_pack_real_uint16(x, d); -} else if(sizeof(unsigned long long) == 4) { - msgpack_pack_real_uint32(x, d); -} else { msgpack_pack_real_uint64(x, d); } -#endif -} - -//#undef msgpack_pack_inline_func_cint -//#endif - /* @@ -810,11 +588,9 @@ static inline int msgpack_pack_timestamp(msgpack_packer* x, int64_t seconds, uin #undef TAKE8_32 #undef TAKE8_64 -#undef msgpack_pack_real_uint8 #undef msgpack_pack_real_uint16 #undef msgpack_pack_real_uint32 #undef msgpack_pack_real_uint64 -#undef msgpack_pack_real_int8 #undef msgpack_pack_real_int16 #undef msgpack_pack_real_int32 #undef msgpack_pack_real_int64 From bf2413f915474841aafee2c98321dc465e5f0a3e Mon Sep 17 00:00:00 2001 From: Inada Naoki Date: Mon, 6 May 2024 00:30:07 +0900 Subject: [PATCH 333/349] ignore msgpack/*.c --- .gitignore | 1 + 1 file changed, 1 insertion(+) diff --git a/.gitignore b/.gitignore index 8a06e267..341be631 100644 --- a/.gitignore +++ b/.gitignore @@ -8,6 +8,7 @@ dist/* *.so *~ msgpack/__version__.py +msgpack/*.c msgpack/*.cpp *.egg-info /venv From 72e65feb0e02449fa191346f26b54d3842ab7e69 Mon Sep 17 00:00:00 2001 From: Inada Naoki Date: Mon, 6 May 2024 00:49:12 +0900 Subject: [PATCH 334/349] packer: add `buf_size` option (#604) And change the default buffer size to 256KiB. Signed-off-by: Rodrigo Tobar Co-authored-by: Rodrigo Tobar --- msgpack/_packer.pyx | 13 +++++++++---- msgpack/_unpacker.pyx | 2 +- msgpack/fallback.py | 27 +++++---------------------- 3 files changed, 15 insertions(+), 27 deletions(-) diff --git a/msgpack/_packer.pyx b/msgpack/_packer.pyx index c7eeda11..99557d37 100644 --- a/msgpack/_packer.pyx +++ b/msgpack/_packer.pyx @@ -53,7 +53,7 @@ cdef inline int PyBytesLike_CheckExact(object o): return PyBytes_CheckExact(o) or PyByteArray_CheckExact(o) -cdef class Packer(object): +cdef class Packer: """ MessagePack Packer @@ -97,6 +97,11 @@ cdef class Packer(object): :param str unicode_errors: The error handler for encoding unicode. (default: 'strict') DO NOT USE THIS!! This option is kept for very specific usage. + + :param int buf_size: + The size of the internal buffer. (default: 256*1024) + Useful if serialisation size can be correctly estimated, + avoid unnecessary reallocations. """ cdef msgpack_packer pk cdef object _default @@ -107,8 +112,7 @@ cdef class Packer(object): cdef bint autoreset cdef bint datetime - def __cinit__(self): - cdef int buf_size = 1024*1024 + def __cinit__(self, buf_size=256*1024, **_kwargs): self.pk.buf = PyMem_Malloc(buf_size) if self.pk.buf == NULL: raise MemoryError("Unable to allocate internal buffer.") @@ -117,7 +121,8 @@ cdef class Packer(object): def __init__(self, *, default=None, bint use_single_float=False, bint autoreset=True, bint use_bin_type=True, - bint strict_types=False, bint datetime=False, unicode_errors=None): + bint strict_types=False, bint datetime=False, unicode_errors=None, + buf_size=256*1024): self.use_float = use_single_float self.strict_types = strict_types self.autoreset = autoreset diff --git a/msgpack/_unpacker.pyx b/msgpack/_unpacker.pyx index 2771e7bf..34ff3304 100644 --- a/msgpack/_unpacker.pyx +++ b/msgpack/_unpacker.pyx @@ -210,7 +210,7 @@ def unpackb(object packed, *, object object_hook=None, object list_hook=None, raise ValueError("Unpack failed: error = %d" % (ret,)) -cdef class Unpacker(object): +cdef class Unpacker: """Streaming unpacker. Arguments: diff --git a/msgpack/fallback.py b/msgpack/fallback.py index ea4c4ced..cbf0d30e 100644 --- a/msgpack/fallback.py +++ b/msgpack/fallback.py @@ -232,6 +232,7 @@ class Unpacker: def __init__( self, file_like=None, + *, read_size=0, use_list=True, raw=False, @@ -650,32 +651,13 @@ class Packer: The error handler for encoding unicode. (default: 'strict') DO NOT USE THIS!! This option is kept for very specific usage. - Example of streaming deserialize from file-like object:: - - unpacker = Unpacker(file_like) - for o in unpacker: - process(o) - - Example of streaming deserialize from socket:: - - unpacker = Unpacker() - while True: - buf = sock.recv(1024**2) - if not buf: - break - unpacker.feed(buf) - for o in unpacker: - process(o) - - Raises ``ExtraData`` when *packed* contains extra bytes. - Raises ``OutOfData`` when *packed* is incomplete. - Raises ``FormatError`` when *packed* is not valid msgpack. - Raises ``StackError`` when *packed* contains too nested. - Other exceptions can be raised during unpacking. + :param int buf_size: + Internal buffer size. This option is used only for C implementation. """ def __init__( self, + *, default=None, use_single_float=False, autoreset=True, @@ -683,6 +665,7 @@ def __init__( strict_types=False, datetime=False, unicode_errors=None, + buf_size=None, ): self._strict_types = strict_types self._use_float = use_single_float From 3da5818a3a96a4bd39f25a483ee3c22ecb626e9d Mon Sep 17 00:00:00 2001 From: Inada Naoki Date: Mon, 6 May 2024 02:12:46 +0900 Subject: [PATCH 335/349] update readme (#605) --- README.md | 15 ++++----------- 1 file changed, 4 insertions(+), 11 deletions(-) diff --git a/README.md b/README.md index d3247bdf..61a03c60 100644 --- a/README.md +++ b/README.md @@ -49,17 +49,7 @@ msgpack provides `dumps` and `loads` as an alias for compatibility with [1, 2, 3] ``` -`unpack` unpacks msgpack's array to Python's list, but can also unpack to tuple: - -```pycon ->>> msgpack.unpackb(b'\x93\x01\x02\x03', use_list=False) -(1, 2, 3) -``` - -You should always specify the `use_list` keyword argument for backward compatibility. -See performance issues relating to `use_list option`_ below. - -Read the docstring for other options. +Read the docstring for options. ### Streaming unpacking @@ -116,6 +106,9 @@ this_dict_again = msgpack.unpackb(packed_dict, object_hook=decode_datetime) `object_pairs_hook` callback may instead be used to receive a list of key-value pairs. +NOTE: msgpack can encode datetime with tzinfo into standard ext type for now. +See `datetime` option in `Packer` docstring. + ### Extended types From e1068087e0bb3aebd77c4f2407bf9c429f631787 Mon Sep 17 00:00:00 2001 From: Inada Naoki Date: Mon, 6 May 2024 02:13:12 +0900 Subject: [PATCH 336/349] cython: better exception handling (#606) - use `except -1` instead of manual error handling - use `PyUnicode_AsUTF8AndSize()` - use `_pack()` and `_pack_inner()` instead of `while True:` --- msgpack/_packer.pyx | 302 +++++++++++++++++++------------------------- msgpack/pack.h | 21 --- 2 files changed, 131 insertions(+), 192 deletions(-) diff --git a/msgpack/_packer.pyx b/msgpack/_packer.pyx index 99557d37..ad532215 100644 --- a/msgpack/_packer.pyx +++ b/msgpack/_packer.pyx @@ -24,21 +24,20 @@ cdef extern from "pack.h": size_t buf_size bint use_bin_type - int msgpack_pack_nil(msgpack_packer* pk) - int msgpack_pack_true(msgpack_packer* pk) - int msgpack_pack_false(msgpack_packer* pk) - int msgpack_pack_long_long(msgpack_packer* pk, long long d) - int msgpack_pack_unsigned_long_long(msgpack_packer* pk, unsigned long long d) - int msgpack_pack_float(msgpack_packer* pk, float d) - int msgpack_pack_double(msgpack_packer* pk, double d) - int msgpack_pack_array(msgpack_packer* pk, size_t l) - int msgpack_pack_map(msgpack_packer* pk, size_t l) - int msgpack_pack_raw(msgpack_packer* pk, size_t l) - int msgpack_pack_bin(msgpack_packer* pk, size_t l) - int msgpack_pack_raw_body(msgpack_packer* pk, char* body, size_t l) - int msgpack_pack_ext(msgpack_packer* pk, char typecode, size_t l) - int msgpack_pack_timestamp(msgpack_packer* x, long long seconds, unsigned long nanoseconds); - int msgpack_pack_unicode(msgpack_packer* pk, object o, long long limit) + int msgpack_pack_nil(msgpack_packer* pk) except -1 + int msgpack_pack_true(msgpack_packer* pk) except -1 + int msgpack_pack_false(msgpack_packer* pk) except -1 + int msgpack_pack_long_long(msgpack_packer* pk, long long d) except -1 + int msgpack_pack_unsigned_long_long(msgpack_packer* pk, unsigned long long d) except -1 + int msgpack_pack_float(msgpack_packer* pk, float d) except -1 + int msgpack_pack_double(msgpack_packer* pk, double d) except -1 + int msgpack_pack_array(msgpack_packer* pk, size_t l) except -1 + int msgpack_pack_map(msgpack_packer* pk, size_t l) except -1 + int msgpack_pack_raw(msgpack_packer* pk, size_t l) except -1 + int msgpack_pack_bin(msgpack_packer* pk, size_t l) except -1 + int msgpack_pack_raw_body(msgpack_packer* pk, char* body, size_t l) except -1 + int msgpack_pack_ext(msgpack_packer* pk, char typecode, size_t l) except -1 + int msgpack_pack_timestamp(msgpack_packer* x, long long seconds, unsigned long nanoseconds) except -1 cdef int DEFAULT_RECURSE_LIMIT=511 @@ -119,6 +118,10 @@ cdef class Packer: self.pk.buf_size = buf_size self.pk.length = 0 + def __dealloc__(self): + PyMem_Free(self.pk.buf) + self.pk.buf = NULL + def __init__(self, *, default=None, bint use_single_float=False, bint autoreset=True, bint use_bin_type=True, bint strict_types=False, bint datetime=False, unicode_errors=None, @@ -139,155 +142,127 @@ cdef class Packer: else: self.unicode_errors = self._berrors - def __dealloc__(self): - PyMem_Free(self.pk.buf) - self.pk.buf = NULL - - cdef int _pack(self, object o, int nest_limit=DEFAULT_RECURSE_LIMIT) except -1: + # returns -2 when default should(o) be called + cdef int _pack_inner(self, object o, bint will_default, int nest_limit) except -1: cdef long long llval cdef unsigned long long ullval cdef unsigned long ulval - cdef long longval - cdef float fval - cdef double dval - cdef char* rawval - cdef int ret - cdef dict d + cdef const char* rawval cdef Py_ssize_t L - cdef int default_used = 0 - cdef bint strict_types = self.strict_types + cdef bool strict_types = self.strict_types cdef Py_buffer view - if nest_limit < 0: - raise ValueError("recursion limit exceeded.") - - while True: - if o is None: - ret = msgpack_pack_nil(&self.pk) - elif o is True: - ret = msgpack_pack_true(&self.pk) - elif o is False: - ret = msgpack_pack_false(&self.pk) - elif PyLong_CheckExact(o) if strict_types else PyLong_Check(o): - # PyInt_Check(long) is True for Python 3. - # So we should test long before int. - try: - if o > 0: - ullval = o - ret = msgpack_pack_unsigned_long_long(&self.pk, ullval) - else: - llval = o - ret = msgpack_pack_long_long(&self.pk, llval) - except OverflowError as oe: - if not default_used and self._default is not None: - o = self._default(o) - default_used = True - continue - else: - raise OverflowError("Integer value out of range") - elif PyFloat_CheckExact(o) if strict_types else PyFloat_Check(o): - if self.use_float: - fval = o - ret = msgpack_pack_float(&self.pk, fval) + if o is None: + msgpack_pack_nil(&self.pk) + elif o is True: + msgpack_pack_true(&self.pk) + elif o is False: + msgpack_pack_false(&self.pk) + elif PyLong_CheckExact(o) if strict_types else PyLong_Check(o): + try: + if o > 0: + ullval = o + msgpack_pack_unsigned_long_long(&self.pk, ullval) else: - dval = o - ret = msgpack_pack_double(&self.pk, dval) - elif PyBytesLike_CheckExact(o) if strict_types else PyBytesLike_Check(o): - L = Py_SIZE(o) - if L > ITEM_LIMIT: - PyErr_Format(ValueError, b"%.200s object is too large", Py_TYPE(o).tp_name) - rawval = o - ret = msgpack_pack_bin(&self.pk, L) - if ret == 0: - ret = msgpack_pack_raw_body(&self.pk, rawval, L) - elif PyUnicode_CheckExact(o) if strict_types else PyUnicode_Check(o): - if self.unicode_errors == NULL: - ret = msgpack_pack_unicode(&self.pk, o, ITEM_LIMIT); - if ret == -2: - raise ValueError("unicode string is too large") + llval = o + msgpack_pack_long_long(&self.pk, llval) + except OverflowError as oe: + if will_default: + return -2 else: - o = PyUnicode_AsEncodedString(o, NULL, self.unicode_errors) - L = Py_SIZE(o) - if L > ITEM_LIMIT: - raise ValueError("unicode string is too large") - ret = msgpack_pack_raw(&self.pk, L) - if ret == 0: - rawval = o - ret = msgpack_pack_raw_body(&self.pk, rawval, L) - elif PyDict_CheckExact(o): - d = o - L = len(d) - if L > ITEM_LIMIT: - raise ValueError("dict is too large") - ret = msgpack_pack_map(&self.pk, L) - if ret == 0: - for k, v in d.items(): - ret = self._pack(k, nest_limit-1) - if ret != 0: break - ret = self._pack(v, nest_limit-1) - if ret != 0: break - elif not strict_types and PyDict_Check(o): - L = len(o) - if L > ITEM_LIMIT: - raise ValueError("dict is too large") - ret = msgpack_pack_map(&self.pk, L) - if ret == 0: - for k, v in o.items(): - ret = self._pack(k, nest_limit-1) - if ret != 0: break - ret = self._pack(v, nest_limit-1) - if ret != 0: break - elif type(o) is ExtType if strict_types else isinstance(o, ExtType): - # This should be before Tuple because ExtType is namedtuple. - longval = o.code - rawval = o.data - L = len(o.data) - if L > ITEM_LIMIT: - raise ValueError("EXT data is too large") - ret = msgpack_pack_ext(&self.pk, longval, L) - ret = msgpack_pack_raw_body(&self.pk, rawval, L) - elif type(o) is Timestamp: - llval = o.seconds - ulval = o.nanoseconds - ret = msgpack_pack_timestamp(&self.pk, llval, ulval) - elif PyList_CheckExact(o) if strict_types else (PyTuple_Check(o) or PyList_Check(o)): + raise OverflowError("Integer value out of range") + elif PyFloat_CheckExact(o) if strict_types else PyFloat_Check(o): + if self.use_float: + msgpack_pack_float(&self.pk, o) + else: + msgpack_pack_double(&self.pk, o) + elif PyBytesLike_CheckExact(o) if strict_types else PyBytesLike_Check(o): + L = Py_SIZE(o) + if L > ITEM_LIMIT: + PyErr_Format(ValueError, b"%.200s object is too large", Py_TYPE(o).tp_name) + rawval = o + msgpack_pack_bin(&self.pk, L) + msgpack_pack_raw_body(&self.pk, rawval, L) + elif PyUnicode_CheckExact(o) if strict_types else PyUnicode_Check(o): + if self.unicode_errors == NULL: + rawval = PyUnicode_AsUTF8AndSize(o, &L) + if L >ITEM_LIMIT: + raise ValueError("unicode string is too large") + else: + o = PyUnicode_AsEncodedString(o, NULL, self.unicode_errors) L = Py_SIZE(o) if L > ITEM_LIMIT: - raise ValueError("list is too large") - ret = msgpack_pack_array(&self.pk, L) - if ret == 0: - for v in o: - ret = self._pack(v, nest_limit-1) - if ret != 0: break - elif PyMemoryView_Check(o): - if PyObject_GetBuffer(o, &view, PyBUF_SIMPLE) != 0: - raise ValueError("could not get buffer for memoryview") - L = view.len - if L > ITEM_LIMIT: - PyBuffer_Release(&view); - raise ValueError("memoryview is too large") - ret = msgpack_pack_bin(&self.pk, L) - if ret == 0: - ret = msgpack_pack_raw_body(&self.pk, view.buf, L) + raise ValueError("unicode string is too large") + rawval = o + msgpack_pack_raw(&self.pk, L) + msgpack_pack_raw_body(&self.pk, rawval, L) + elif PyDict_CheckExact(o) if strict_types else PyDict_Check(o): + L = len(o) + if L > ITEM_LIMIT: + raise ValueError("dict is too large") + msgpack_pack_map(&self.pk, L) + for k, v in o.items(): + self._pack(k, nest_limit) + self._pack(v, nest_limit) + elif type(o) is ExtType if strict_types else isinstance(o, ExtType): + # This should be before Tuple because ExtType is namedtuple. + rawval = o.data + L = len(o.data) + if L > ITEM_LIMIT: + raise ValueError("EXT data is too large") + msgpack_pack_ext(&self.pk, o.code, L) + msgpack_pack_raw_body(&self.pk, rawval, L) + elif type(o) is Timestamp: + llval = o.seconds + ulval = o.nanoseconds + msgpack_pack_timestamp(&self.pk, llval, ulval) + elif PyList_CheckExact(o) if strict_types else (PyTuple_Check(o) or PyList_Check(o)): + L = Py_SIZE(o) + if L > ITEM_LIMIT: + raise ValueError("list is too large") + msgpack_pack_array(&self.pk, L) + for v in o: + self._pack(v, nest_limit) + elif PyMemoryView_Check(o): + PyObject_GetBuffer(o, &view, PyBUF_SIMPLE) + L = view.len + if L > ITEM_LIMIT: PyBuffer_Release(&view); - elif self.datetime and PyDateTime_CheckExact(o) and datetime_tzinfo(o) is not None: - delta = o - epoch - if not PyDelta_CheckExact(delta): - raise ValueError("failed to calculate delta") - llval = timedelta_days(delta) * (24*60*60) + timedelta_seconds(delta) - ulval = timedelta_microseconds(delta) * 1000 - ret = msgpack_pack_timestamp(&self.pk, llval, ulval) - elif not default_used and self._default: + raise ValueError("memoryview is too large") + try: + msgpack_pack_bin(&self.pk, L) + msgpack_pack_raw_body(&self.pk, view.buf, L) + finally: + PyBuffer_Release(&view); + elif self.datetime and PyDateTime_CheckExact(o) and datetime_tzinfo(o) is not None: + delta = o - epoch + if not PyDelta_CheckExact(delta): + raise ValueError("failed to calculate delta") + llval = timedelta_days(delta) * (24*60*60) + timedelta_seconds(delta) + ulval = timedelta_microseconds(delta) * 1000 + msgpack_pack_timestamp(&self.pk, llval, ulval) + elif will_default: + return -2 + elif self.datetime and PyDateTime_CheckExact(o): + # this should be later than will_default + PyErr_Format(ValueError, b"can not serialize '%.200s' object where tzinfo=None", Py_TYPE(o).tp_name) + else: + PyErr_Format(TypeError, b"can not serialize '%.200s' object", Py_TYPE(o).tp_name) + + cdef int _pack(self, object o, int nest_limit=DEFAULT_RECURSE_LIMIT) except -1: + cdef int ret + if nest_limit < 0: + raise ValueError("recursion limit exceeded.") + nest_limit -= 1 + if self._default is not None: + ret = self._pack_inner(o, 1, nest_limit) + if ret == -2: o = self._default(o) - default_used = 1 - continue - elif self.datetime and PyDateTime_CheckExact(o): - PyErr_Format(ValueError, b"can not serialize '%.200s' object where tzinfo=None", Py_TYPE(o).tp_name) else: - PyErr_Format(TypeError, b"can not serialize '%.200s' object", Py_TYPE(o).tp_name) - return ret + return ret + return self._pack_inner(o, 0, nest_limit) - cpdef pack(self, object obj): + def pack(self, object obj): cdef int ret try: ret = self._pack(obj, DEFAULT_RECURSE_LIMIT) @@ -308,11 +283,7 @@ cdef class Packer: def pack_array_header(self, long long size): if size > ITEM_LIMIT: raise ValueError - cdef int ret = msgpack_pack_array(&self.pk, size) - if ret == -1: - raise MemoryError - elif ret: # should not happen - raise TypeError + msgpack_pack_array(&self.pk, size) if self.autoreset: buf = PyBytes_FromStringAndSize(self.pk.buf, self.pk.length) self.pk.length = 0 @@ -321,11 +292,7 @@ cdef class Packer: def pack_map_header(self, long long size): if size > ITEM_LIMIT: raise ValueError - cdef int ret = msgpack_pack_map(&self.pk, size) - if ret == -1: - raise MemoryError - elif ret: # should not happen - raise TypeError + msgpack_pack_map(&self.pk, size) if self.autoreset: buf = PyBytes_FromStringAndSize(self.pk.buf, self.pk.length) self.pk.length = 0 @@ -338,17 +305,10 @@ cdef class Packer: *pairs* should be a sequence of pairs. (`len(pairs)` and `for k, v in pairs:` should be supported.) """ - cdef int ret = msgpack_pack_map(&self.pk, len(pairs)) - if ret == 0: - for k, v in pairs: - ret = self._pack(k) - if ret != 0: break - ret = self._pack(v) - if ret != 0: break - if ret == -1: - raise MemoryError - elif ret: # should not happen - raise TypeError + msgpack_pack_map(&self.pk, len(pairs)) + for k, v in pairs: + self._pack(k) + self._pack(v) if self.autoreset: buf = PyBytes_FromStringAndSize(self.pk.buf, self.pk.length) self.pk.length = 0 diff --git a/msgpack/pack.h b/msgpack/pack.h index 688eab86..edf3a3fe 100644 --- a/msgpack/pack.h +++ b/msgpack/pack.h @@ -64,27 +64,6 @@ static inline int msgpack_pack_write(msgpack_packer* pk, const char *data, size_ #include "pack_template.h" -// return -2 when o is too long -static inline int -msgpack_pack_unicode(msgpack_packer *pk, PyObject *o, long long limit) -{ - assert(PyUnicode_Check(o)); - - Py_ssize_t len; - const char* buf = PyUnicode_AsUTF8AndSize(o, &len); - if (buf == NULL) - return -1; - - if (len > limit) { - return -2; - } - - int ret = msgpack_pack_raw(pk, len); - if (ret) return ret; - - return msgpack_pack_raw_body(pk, buf, len); -} - #ifdef __cplusplus } #endif From e0f0e145f15364819bac80bd0808834a9df0065e Mon Sep 17 00:00:00 2001 From: Inada Naoki Date: Mon, 6 May 2024 03:33:48 +0900 Subject: [PATCH 337/349] better error checks (#607) * check buffer exports * add error messages --- msgpack/_packer.pyx | 48 +++++++++++++++++++++++++++++++++------------ test/test_buffer.py | 24 +++++++++++++++++++++-- 2 files changed, 57 insertions(+), 15 deletions(-) diff --git a/msgpack/_packer.pyx b/msgpack/_packer.pyx index ad532215..402b6946 100644 --- a/msgpack/_packer.pyx +++ b/msgpack/_packer.pyx @@ -106,6 +106,7 @@ cdef class Packer: cdef object _default cdef object _berrors cdef const char *unicode_errors + cdef size_t exports # number of exported buffers cdef bint strict_types cdef bint use_float cdef bint autoreset @@ -117,10 +118,16 @@ cdef class Packer: raise MemoryError("Unable to allocate internal buffer.") self.pk.buf_size = buf_size self.pk.length = 0 + self.exports = 0 def __dealloc__(self): PyMem_Free(self.pk.buf) self.pk.buf = NULL + assert self.exports == 0 + + cdef _check_exports(self): + if self.exports > 0: + raise BufferError("Existing exports of data: Packer cannot be changed") def __init__(self, *, default=None, bint use_single_float=False, bint autoreset=True, bint use_bin_type=True, @@ -149,8 +156,8 @@ cdef class Packer: cdef unsigned long ulval cdef const char* rawval cdef Py_ssize_t L - cdef bool strict_types = self.strict_types cdef Py_buffer view + cdef bint strict = self.strict_types if o is None: msgpack_pack_nil(&self.pk) @@ -158,7 +165,7 @@ cdef class Packer: msgpack_pack_true(&self.pk) elif o is False: msgpack_pack_false(&self.pk) - elif PyLong_CheckExact(o) if strict_types else PyLong_Check(o): + elif PyLong_CheckExact(o) if strict else PyLong_Check(o): try: if o > 0: ullval = o @@ -171,19 +178,19 @@ cdef class Packer: return -2 else: raise OverflowError("Integer value out of range") - elif PyFloat_CheckExact(o) if strict_types else PyFloat_Check(o): + elif PyFloat_CheckExact(o) if strict else PyFloat_Check(o): if self.use_float: msgpack_pack_float(&self.pk, o) else: msgpack_pack_double(&self.pk, o) - elif PyBytesLike_CheckExact(o) if strict_types else PyBytesLike_Check(o): + elif PyBytesLike_CheckExact(o) if strict else PyBytesLike_Check(o): L = Py_SIZE(o) if L > ITEM_LIMIT: PyErr_Format(ValueError, b"%.200s object is too large", Py_TYPE(o).tp_name) rawval = o msgpack_pack_bin(&self.pk, L) msgpack_pack_raw_body(&self.pk, rawval, L) - elif PyUnicode_CheckExact(o) if strict_types else PyUnicode_Check(o): + elif PyUnicode_CheckExact(o) if strict else PyUnicode_Check(o): if self.unicode_errors == NULL: rawval = PyUnicode_AsUTF8AndSize(o, &L) if L >ITEM_LIMIT: @@ -196,7 +203,7 @@ cdef class Packer: rawval = o msgpack_pack_raw(&self.pk, L) msgpack_pack_raw_body(&self.pk, rawval, L) - elif PyDict_CheckExact(o) if strict_types else PyDict_Check(o): + elif PyDict_CheckExact(o) if strict else PyDict_Check(o): L = len(o) if L > ITEM_LIMIT: raise ValueError("dict is too large") @@ -204,7 +211,7 @@ cdef class Packer: for k, v in o.items(): self._pack(k, nest_limit) self._pack(v, nest_limit) - elif type(o) is ExtType if strict_types else isinstance(o, ExtType): + elif type(o) is ExtType if strict else isinstance(o, ExtType): # This should be before Tuple because ExtType is namedtuple. rawval = o.data L = len(o.data) @@ -216,7 +223,7 @@ cdef class Packer: llval = o.seconds ulval = o.nanoseconds msgpack_pack_timestamp(&self.pk, llval, ulval) - elif PyList_CheckExact(o) if strict_types else (PyTuple_Check(o) or PyList_Check(o)): + elif PyList_CheckExact(o) if strict else (PyTuple_Check(o) or PyList_Check(o)): L = Py_SIZE(o) if L > ITEM_LIMIT: raise ValueError("list is too large") @@ -264,6 +271,7 @@ cdef class Packer: def pack(self, object obj): cdef int ret + self._check_exports() try: ret = self._pack(obj, DEFAULT_RECURSE_LIMIT) except: @@ -277,12 +285,16 @@ cdef class Packer: return buf def pack_ext_type(self, typecode, data): + self._check_exports() + if len(data) > ITEM_LIMIT: + raise ValueError("ext data too large") msgpack_pack_ext(&self.pk, typecode, len(data)) msgpack_pack_raw_body(&self.pk, data, len(data)) def pack_array_header(self, long long size): + self._check_exports() if size > ITEM_LIMIT: - raise ValueError + raise ValueError("array too large") msgpack_pack_array(&self.pk, size) if self.autoreset: buf = PyBytes_FromStringAndSize(self.pk.buf, self.pk.length) @@ -290,8 +302,9 @@ cdef class Packer: return buf def pack_map_header(self, long long size): + self._check_exports() if size > ITEM_LIMIT: - raise ValueError + raise ValueError("map too learge") msgpack_pack_map(&self.pk, size) if self.autoreset: buf = PyBytes_FromStringAndSize(self.pk.buf, self.pk.length) @@ -305,7 +318,11 @@ cdef class Packer: *pairs* should be a sequence of pairs. (`len(pairs)` and `for k, v in pairs:` should be supported.) """ - msgpack_pack_map(&self.pk, len(pairs)) + self._check_exports() + size = len(pairs) + if size > ITEM_LIMIT: + raise ValueError("map too large") + msgpack_pack_map(&self.pk, size) for k, v in pairs: self._pack(k) self._pack(v) @@ -319,6 +336,7 @@ cdef class Packer: This method is useful only when autoreset=False. """ + self._check_exports() self.pk.length = 0 def bytes(self): @@ -326,11 +344,15 @@ cdef class Packer: return PyBytes_FromStringAndSize(self.pk.buf, self.pk.length) def getbuffer(self): - """Return view of internal buffer.""" + """Return memoryview of internal buffer. + + Note: Packer now supports buffer protocol. You can use memoryview(packer). + """ return memoryview(self) def __getbuffer__(self, Py_buffer *buffer, int flags): PyBuffer_FillInfo(buffer, self, self.pk.buf, self.pk.length, 1, flags) + self.exports += 1 def __releasebuffer__(self, Py_buffer *buffer): - pass + self.exports -= 1 diff --git a/test/test_buffer.py b/test/test_buffer.py index a3db339c..2165eb50 100644 --- a/test/test_buffer.py +++ b/test/test_buffer.py @@ -1,6 +1,6 @@ -#!/usr/bin/env python +from pytest import raises -from msgpack import packb, unpackb +from msgpack import packb, unpackb, Packer def test_unpack_buffer(): @@ -27,3 +27,23 @@ def test_unpack_memoryview(): assert [b"foo", b"bar"] == obj expected_type = bytes assert all(type(s) == expected_type for s in obj) + + +def test_packer_getbuffer(): + packer = Packer(autoreset=False) + packer.pack_array_header(2) + packer.pack(42) + packer.pack("hello") + buffer = packer.getbuffer() + assert isinstance(buffer, memoryview) + assert bytes(buffer) == b"\x92*\xa5hello" + + if Packer.__module__ == "msgpack._cmsgpack": # only for Cython + # cython Packer supports buffer protocol directly + assert bytes(packer) == b"\x92*\xa5hello" + + with raises(BufferError): + packer.pack(42) + buffer.release() + packer.pack(42) + assert bytes(packer) == b"\x92*\xa5hello*" From 33e0e86f4e66e6bb029f118e8f5e166b55e828f7 Mon Sep 17 00:00:00 2001 From: Inada Naoki Date: Mon, 6 May 2024 11:46:31 +0900 Subject: [PATCH 338/349] Cleanup code and pyproject (#608) * use isort * fallback: use BytesIO instead of StringIO. We had dropped Python 2 already. --- Makefile | 4 ++-- msgpack/__init__.py | 12 +++++----- msgpack/ext.py | 2 +- msgpack/fallback.py | 52 ++++++++++++++++++----------------------- pyproject.toml | 17 +++++--------- setup.py | 4 ++-- test/test_buffer.py | 2 +- test/test_except.py | 5 ++-- test/test_extension.py | 1 + test/test_limits.py | 8 +++---- test/test_memoryview.py | 1 + test/test_newspec.py | 2 +- test/test_obj.py | 1 + test/test_pack.py | 4 ++-- test/test_read_size.py | 2 +- test/test_seq.py | 6 ++--- test/test_sequnpack.py | 7 +++--- test/test_stricttype.py | 3 ++- test/test_subtype.py | 3 ++- test/test_timestamp.py | 4 +++- test/test_unpack.py | 11 ++++----- 21 files changed, 73 insertions(+), 78 deletions(-) diff --git a/Makefile b/Makefile index 3ce178f2..51f3e0ef 100644 --- a/Makefile +++ b/Makefile @@ -6,11 +6,11 @@ all: cython .PHONY: format format: - pipx run ruff format $(PYTHON_SOURCES) + ruff format $(PYTHON_SOURCES) .PHONY: lint lint: - pipx run ruff check $(PYTHON_SOURCES) + ruff check $(PYTHON_SOURCES) .PHONY: doc doc: diff --git a/msgpack/__init__.py b/msgpack/__init__.py index 919b86f1..e796efb0 100644 --- a/msgpack/__init__.py +++ b/msgpack/__init__.py @@ -1,20 +1,20 @@ -from .exceptions import * -from .ext import ExtType, Timestamp - +# ruff: noqa: F401 import os +from .exceptions import * # noqa: F403 +from .ext import ExtType, Timestamp version = (1, 0, 8) __version__ = "1.0.8" if os.environ.get("MSGPACK_PUREPYTHON"): - from .fallback import Packer, unpackb, Unpacker + from .fallback import Packer, Unpacker, unpackb else: try: - from ._cmsgpack import Packer, unpackb, Unpacker + from ._cmsgpack import Packer, Unpacker, unpackb except ImportError: - from .fallback import Packer, unpackb, Unpacker + from .fallback import Packer, Unpacker, unpackb def pack(o, stream, **kwargs): diff --git a/msgpack/ext.py b/msgpack/ext.py index 3940fe0f..9694819a 100644 --- a/msgpack/ext.py +++ b/msgpack/ext.py @@ -1,6 +1,6 @@ -from collections import namedtuple import datetime import struct +from collections import namedtuple class ExtType(namedtuple("ExtType", "code data")): diff --git a/msgpack/fallback.py b/msgpack/fallback.py index cbf0d30e..b02e47cf 100644 --- a/msgpack/fallback.py +++ b/msgpack/fallback.py @@ -1,28 +1,22 @@ """Fallback pure Python implementation of msgpack""" -from datetime import datetime as _DateTime -import sys import struct - +import sys +from datetime import datetime as _DateTime if hasattr(sys, "pypy_version_info"): - # StringIO is slow on PyPy, StringIO is faster. However: PyPy's own - # StringBuilder is fastest. from __pypy__ import newlist_hint + from __pypy__.builders import BytesBuilder - try: - from __pypy__.builders import BytesBuilder as StringBuilder - except ImportError: - from __pypy__.builders import StringBuilder - USING_STRINGBUILDER = True + _USING_STRINGBUILDER = True - class StringIO: + class BytesIO: def __init__(self, s=b""): if s: - self.builder = StringBuilder(len(s)) + self.builder = BytesBuilder(len(s)) self.builder.append(s) else: - self.builder = StringBuilder() + self.builder = BytesBuilder() def write(self, s): if isinstance(s, memoryview): @@ -35,17 +29,17 @@ def getvalue(self): return self.builder.build() else: - USING_STRINGBUILDER = False - from io import BytesIO as StringIO + from io import BytesIO - newlist_hint = lambda size: [] + _USING_STRINGBUILDER = False + def newlist_hint(size): + return [] -from .exceptions import BufferFull, OutOfData, ExtraData, FormatError, StackError +from .exceptions import BufferFull, ExtraData, FormatError, OutOfData, StackError from .ext import ExtType, Timestamp - EX_SKIP = 0 EX_CONSTRUCT = 1 EX_READ_ARRAY_HEADER = 2 @@ -335,6 +329,7 @@ def feed(self, next_bytes): # Use extend here: INPLACE_ADD += doesn't reliably typecast memoryview in jython self._buffer.extend(view) + view.release() def _consume(self): """Gets rid of the used parts of the buffer.""" @@ -671,12 +666,11 @@ def __init__( self._use_float = use_single_float self._autoreset = autoreset self._use_bin_type = use_bin_type - self._buffer = StringIO() + self._buffer = BytesIO() self._datetime = bool(datetime) self._unicode_errors = unicode_errors or "strict" - if default is not None: - if not callable(default): - raise TypeError("default must be callable") + if default is not None and not callable(default): + raise TypeError("default must be callable") self._default = default def _pack( @@ -807,18 +801,18 @@ def pack(self, obj): try: self._pack(obj) except: - self._buffer = StringIO() # force reset + self._buffer = BytesIO() # force reset raise if self._autoreset: ret = self._buffer.getvalue() - self._buffer = StringIO() + self._buffer = BytesIO() return ret def pack_map_pairs(self, pairs): self._pack_map_pairs(len(pairs), pairs) if self._autoreset: ret = self._buffer.getvalue() - self._buffer = StringIO() + self._buffer = BytesIO() return ret def pack_array_header(self, n): @@ -827,7 +821,7 @@ def pack_array_header(self, n): self._pack_array_header(n) if self._autoreset: ret = self._buffer.getvalue() - self._buffer = StringIO() + self._buffer = BytesIO() return ret def pack_map_header(self, n): @@ -836,7 +830,7 @@ def pack_map_header(self, n): self._pack_map_header(n) if self._autoreset: ret = self._buffer.getvalue() - self._buffer = StringIO() + self._buffer = BytesIO() return ret def pack_ext_type(self, typecode, data): @@ -925,11 +919,11 @@ def reset(self): This method is useful only when autoreset=False. """ - self._buffer = StringIO() + self._buffer = BytesIO() def getbuffer(self): """Return view of internal buffer.""" - if USING_STRINGBUILDER: + if _USING_STRINGBUILDER: return memoryview(self.bytes()) else: return self._buffer.getbuffer() diff --git a/pyproject.toml b/pyproject.toml index 6254f06f..f36c7f40 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -45,17 +45,12 @@ include-package-data = false [tool.setuptools.dynamic] version = {attr = "msgpack.__version__"} -[tool.black] -line-length = 100 -target-version = ["py37"] -skip_string_normalization = true - [tool.ruff] line-length = 100 target-version = "py38" -lint.ignore = [] - -[tool.ruff.lint.per-file-ignores] -"msgpack/__init__.py" = ["F401", "F403"] -"msgpack/fallback.py" = ["E731"] -"test/test_seq.py" = ["E501"] +lint.select = [ + "E", # pycodestyle + "F", # Pyflakes + "I", # isort + #"UP", pyupgrade +] diff --git a/setup.py b/setup.py index dc14a26e..eaca7460 100644 --- a/setup.py +++ b/setup.py @@ -1,11 +1,11 @@ #!/usr/bin/env python import os import sys -from setuptools import setup, Extension + +from setuptools import Extension, setup from setuptools.command.build_ext import build_ext from setuptools.command.sdist import sdist - PYPY = hasattr(sys, "pypy_version_info") diff --git a/test/test_buffer.py b/test/test_buffer.py index 2165eb50..2c5a14c5 100644 --- a/test/test_buffer.py +++ b/test/test_buffer.py @@ -1,6 +1,6 @@ from pytest import raises -from msgpack import packb, unpackb, Packer +from msgpack import Packer, packb, unpackb def test_unpack_buffer(): diff --git a/test/test_except.py b/test/test_except.py index 8c0a9766..b77ac800 100644 --- a/test/test_except.py +++ b/test/test_except.py @@ -1,9 +1,10 @@ #!/usr/bin/env python +import datetime + from pytest import raises -from msgpack import packb, unpackb, Unpacker, FormatError, StackError, OutOfData -import datetime +from msgpack import FormatError, OutOfData, StackError, Unpacker, packb, unpackb class DummyException(Exception): diff --git a/test/test_extension.py b/test/test_extension.py index 9e5e6aad..aaf0fd92 100644 --- a/test/test_extension.py +++ b/test/test_extension.py @@ -1,4 +1,5 @@ import array + import msgpack from msgpack import ExtType diff --git a/test/test_limits.py b/test/test_limits.py index 533bc112..9b92b4d9 100644 --- a/test/test_limits.py +++ b/test/test_limits.py @@ -2,14 +2,14 @@ import pytest from msgpack import ( - packb, - unpackb, - Packer, - Unpacker, ExtType, + Packer, PackOverflowError, PackValueError, + Unpacker, UnpackValueError, + packb, + unpackb, ) diff --git a/test/test_memoryview.py b/test/test_memoryview.py index eff4bca0..0a2a6f53 100644 --- a/test/test_memoryview.py +++ b/test/test_memoryview.py @@ -1,6 +1,7 @@ #!/usr/bin/env python from array import array + from msgpack import packb, unpackb diff --git a/test/test_newspec.py b/test/test_newspec.py index a6f4251b..9e2f9be5 100644 --- a/test/test_newspec.py +++ b/test/test_newspec.py @@ -1,4 +1,4 @@ -from msgpack import packb, unpackb, ExtType +from msgpack import ExtType, packb, unpackb def test_str8(): diff --git a/test/test_obj.py b/test/test_obj.py index f78bf426..23be06d5 100644 --- a/test/test_obj.py +++ b/test/test_obj.py @@ -1,6 +1,7 @@ #!/usr/bin/env python from pytest import raises + from msgpack import packb, unpackb diff --git a/test/test_pack.py b/test/test_pack.py index 4a0ef403..374d1549 100644 --- a/test/test_pack.py +++ b/test/test_pack.py @@ -1,12 +1,12 @@ #!/usr/bin/env python +import struct from collections import OrderedDict from io import BytesIO -import struct import pytest -from msgpack import packb, unpackb, Unpacker, Packer +from msgpack import Packer, Unpacker, packb, unpackb def check(data, use_list=False): diff --git a/test/test_read_size.py b/test/test_read_size.py index a7d61fd9..0f6c1b50 100644 --- a/test/test_read_size.py +++ b/test/test_read_size.py @@ -1,6 +1,6 @@ """Test Unpacker's read_array_header and read_map_header methods""" -from msgpack import packb, Unpacker, OutOfData +from msgpack import OutOfData, Unpacker, packb UnexpectedTypeException = ValueError diff --git a/test/test_seq.py b/test/test_seq.py index 16d9dde4..8dee4620 100644 --- a/test/test_seq.py +++ b/test/test_seq.py @@ -1,8 +1,8 @@ -#!/usr/bin/env python - +# ruff: noqa: E501 +# ignore line length limit for long comments import io -import msgpack +import msgpack binarydata = bytes(bytearray(range(256))) diff --git a/test/test_sequnpack.py b/test/test_sequnpack.py index 6b138aad..0f895d7d 100644 --- a/test/test_sequnpack.py +++ b/test/test_sequnpack.py @@ -1,10 +1,11 @@ #!/usr/bin/env python import io -from msgpack import Unpacker, BufferFull -from msgpack import pack, packb -from msgpack.exceptions import OutOfData + from pytest import raises +from msgpack import BufferFull, Unpacker, pack, packb +from msgpack.exceptions import OutOfData + def test_partialdata(): unpacker = Unpacker() diff --git a/test/test_stricttype.py b/test/test_stricttype.py index 9ffaff25..72776a2c 100644 --- a/test/test_stricttype.py +++ b/test/test_stricttype.py @@ -1,5 +1,6 @@ from collections import namedtuple -from msgpack import packb, unpackb, ExtType + +from msgpack import ExtType, packb, unpackb def test_namedtuple(): diff --git a/test/test_subtype.py b/test/test_subtype.py index 0d1c41af..a911578c 100644 --- a/test/test_subtype.py +++ b/test/test_subtype.py @@ -1,8 +1,9 @@ #!/usr/bin/env python -from msgpack import packb from collections import namedtuple +from msgpack import packb + class MyList(list): pass diff --git a/test/test_timestamp.py b/test/test_timestamp.py index f9bc8353..831141a1 100644 --- a/test/test_timestamp.py +++ b/test/test_timestamp.py @@ -1,5 +1,7 @@ -import pytest import datetime + +import pytest + import msgpack from msgpack.ext import Timestamp diff --git a/test/test_unpack.py b/test/test_unpack.py index bf3f960d..b17c3c53 100644 --- a/test/test_unpack.py +++ b/test/test_unpack.py @@ -1,12 +1,9 @@ -from io import BytesIO import sys -from msgpack import Unpacker, packb, OutOfData, ExtType -from pytest import raises, mark +from io import BytesIO + +from pytest import mark, raises -try: - from itertools import izip as zip -except ImportError: - pass +from msgpack import ExtType, OutOfData, Unpacker, packb def test_unpack_array_header_from_file(): From 9cea8b6da23ce66f0e78f017c96adcc447deb09a Mon Sep 17 00:00:00 2001 From: Inada Naoki Date: Tue, 7 May 2024 20:49:23 +0900 Subject: [PATCH 339/349] Release v1.1.0rc1 (#609) --- ChangeLog.rst | 20 +++++++++++++++++++- msgpack/__init__.py | 4 ++-- 2 files changed, 21 insertions(+), 3 deletions(-) diff --git a/ChangeLog.rst b/ChangeLog.rst index 2408bc9f..9b16e41f 100644 --- a/ChangeLog.rst +++ b/ChangeLog.rst @@ -1,3 +1,21 @@ +1.1.0rc1 +======== + +Release Date: 2024-05-07 + +* Update Cython to 3.0.10 to reduce C warnings and future support for Python 3.13. +* Stop using C++ mode in Cython to reduce compile error on some compilers. +* ``Packer()`` has ``buf_size`` option to specify initial size of + internal buffer to reduce reallocation. +* The default internal buffer size of ``Packer()`` is reduced from + 1MiB to 256KiB to optimize for common use cases. Use ``buf_size`` + if you are packing large data. +* ``Timestamp.to_datetime()`` and ``Timestamp.from_datetime()`` become + more accurate by avoiding floating point calculations. (#591) +* The Cython code for ``Unpacker`` has been slightly rewritten for maintainability. +* The fallback implementation of ``Packer()`` and ``Unpacker()`` now uses keyword-only + arguments to improve compatibility with the Cython implementation. + 1.0.8 ===== @@ -130,7 +148,7 @@ Important changes * unpacker: Default value of input limits are smaller than before to avoid DoS attack. If you need to handle large data, you need to specify limits manually. (#319) -* Unpacker doesn't wrap underlaying ``ValueError`` (including ``UnicodeError``) into +* Unpacker doesn't wrap underlying ``ValueError`` (including ``UnicodeError``) into ``UnpackValueError``. If you want to catch all exception during unpack, you need to use ``try ... except Exception`` with minimum try code block. (#323, #233) diff --git a/msgpack/__init__.py b/msgpack/__init__.py index e796efb0..c2525444 100644 --- a/msgpack/__init__.py +++ b/msgpack/__init__.py @@ -4,8 +4,8 @@ from .exceptions import * # noqa: F403 from .ext import ExtType, Timestamp -version = (1, 0, 8) -__version__ = "1.0.8" +version = (1, 1, 0, "rc1") +__version__ = "1.1.0rc1" if os.environ.get("MSGPACK_PUREPYTHON"): From 0b1c47b06b55d91c00c9f7153c4a9440ea878886 Mon Sep 17 00:00:00 2001 From: Inada Naoki Date: Tue, 7 May 2024 22:01:54 +0900 Subject: [PATCH 340/349] do not install cython as build dependency (#610) User can not cythonize during `pip install msgpack`. So remove cython from build dependency. If user need to use another Cython, user should download sdist, unzip, manually cythonize, and `pip install .`. --- pyproject.toml | 6 +----- setup.py | 45 --------------------------------------------- 2 files changed, 1 insertion(+), 50 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index f36c7f40..d041d4c0 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,9 +1,5 @@ [build-system] -requires = [ - # Also declared in requirements.txt, if updating here please also update there - "Cython~=3.0.10", - "setuptools >= 69.5.1", -] +requires = ["setuptools >= 69.5.1"] build-backend = "setuptools.build_meta" [project] diff --git a/setup.py b/setup.py index eaca7460..4029e9ed 100644 --- a/setup.py +++ b/setup.py @@ -3,52 +3,9 @@ import sys from setuptools import Extension, setup -from setuptools.command.build_ext import build_ext -from setuptools.command.sdist import sdist PYPY = hasattr(sys, "pypy_version_info") - -class NoCython(Exception): - pass - - -try: - import Cython.Compiler.Main as cython_compiler - - have_cython = True -except ImportError: - have_cython = False - - -def cythonize(src): - if not have_cython: - raise Exception("Cython is required for building from checkout") - sys.stderr.write(f"cythonize: {src!r}\n") - cython_compiler.compile([src]) - - -def ensure_source(src): - pyx = os.path.splitext(src)[0] + ".pyx" - - if not os.path.exists(src) or have_cython and os.stat(src).st_mtime < os.stat(pyx).st_mtime: - cythonize(pyx) - - -class BuildExt(build_ext): - def build_extension(self, ext): - for src in ext.sources: - ensure_source(src) - return build_ext.build_extension(self, ext) - - -# Cython is required for sdist -class Sdist(sdist): - def __init__(self, *args, **kwargs): - cythonize("msgpack/_cmsgpack.pyx") - sdist.__init__(self, *args, **kwargs) - - libraries = [] macros = [] ext_modules = [] @@ -69,9 +26,7 @@ def __init__(self, *args, **kwargs): ) del libraries, macros - setup( - cmdclass={"build_ext": BuildExt, "sdist": Sdist}, ext_modules=ext_modules, packages=["msgpack"], ) From 6e11368f5d54f7d4878dc209717495c37be03c68 Mon Sep 17 00:00:00 2001 From: Inada Naoki Date: Mon, 19 Aug 2024 17:35:16 +0900 Subject: [PATCH 341/349] update Cython to 3.0.11 (#617) --- requirements.txt | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/requirements.txt b/requirements.txt index 1164a941..b677f068 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,2 +1 @@ -# Also declared in pyproject.toml, if updating here please also update there. -Cython~=3.0.10 +Cython~=3.0.11 From 9e26d80ab2a02221d3ca36cc0b5ca2268f391204 Mon Sep 17 00:00:00 2001 From: Inada Naoki Date: Mon, 19 Aug 2024 17:56:01 +0900 Subject: [PATCH 342/349] update cibuildwheel to 2.20.0 (#618) --- .github/workflows/wheel.yml | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/.github/workflows/wheel.yml b/.github/workflows/wheel.yml index d57e0586..01d0bbd1 100644 --- a/.github/workflows/wheel.yml +++ b/.github/workflows/wheel.yml @@ -32,7 +32,7 @@ jobs: make cython - name: Build - uses: pypa/cibuildwheel@v2.17.0 + uses: pypa/cibuildwheel@v2.20.0 env: CIBW_TEST_REQUIRES: "pytest" CIBW_TEST_COMMAND: "pytest {package}/test" @@ -40,13 +40,11 @@ jobs: CIBW_ARCHS_MACOS: x86_64 universal2 arm64 CIBW_SKIP: pp* - - name: Build pure Python wheel + - name: Build sdist if: runner.os == 'Linux' - env: - MSGPACK_PUREPYTHON: "1" run: | pip install build - python -m build -w -o wheelhouse + python -m build -s -o wheelhouse - name: Upload Wheels to artifact uses: actions/upload-artifact@v4 From 9d0c7f2f9cab21c3e39d23001cd9d2034824ee61 Mon Sep 17 00:00:00 2001 From: Inada Naoki Date: Mon, 19 Aug 2024 20:36:26 +0900 Subject: [PATCH 343/349] Release v1.1.0rc2 (#619) --- .github/workflows/wheel.yml | 2 +- ChangeLog.rst | 8 ++++++++ msgpack/__init__.py | 4 ++-- 3 files changed, 11 insertions(+), 3 deletions(-) diff --git a/.github/workflows/wheel.yml b/.github/workflows/wheel.yml index 01d0bbd1..50157223 100644 --- a/.github/workflows/wheel.yml +++ b/.github/workflows/wheel.yml @@ -38,7 +38,7 @@ jobs: CIBW_TEST_COMMAND: "pytest {package}/test" CIBW_ARCHS_LINUX: auto aarch64 CIBW_ARCHS_MACOS: x86_64 universal2 arm64 - CIBW_SKIP: pp* + CIBW_SKIP: "pp* cp38-macosx_*" - name: Build sdist if: runner.os == 'Linux' diff --git a/ChangeLog.rst b/ChangeLog.rst index 9b16e41f..47328ffb 100644 --- a/ChangeLog.rst +++ b/ChangeLog.rst @@ -1,3 +1,11 @@ +1.1.0rc2 +======== + +Release Date: 2024-08-19 + +* Update Cython to 3.0.11 for better Python 3.13 support. +* Update cibuildwheel to 2.20.0 to build Python 3.13 wheels. + 1.1.0rc1 ======== diff --git a/msgpack/__init__.py b/msgpack/__init__.py index c2525444..a72e974c 100644 --- a/msgpack/__init__.py +++ b/msgpack/__init__.py @@ -4,8 +4,8 @@ from .exceptions import * # noqa: F403 from .ext import ExtType, Timestamp -version = (1, 1, 0, "rc1") -__version__ = "1.1.0rc1" +version = (1, 1, 0, "rc2") +__version__ = "1.1.0rc2" if os.environ.get("MSGPACK_PUREPYTHON"): From 20a2b8eaa26d76169049ce150e10d1d2aa37d3ab Mon Sep 17 00:00:00 2001 From: Thomas A Caswell Date: Wed, 21 Aug 2024 01:56:00 -0400 Subject: [PATCH 344/349] use `PyLong_*` instead of `PyInt_*` (#620) 9af421163cb8081414be347038dee7a82b29e8dd in Cython removed back-compatibility `#define`. --- msgpack/unpack.h | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/msgpack/unpack.h b/msgpack/unpack.h index 23aa6220..58a2f4f5 100644 --- a/msgpack/unpack.h +++ b/msgpack/unpack.h @@ -47,7 +47,7 @@ static inline msgpack_unpack_object unpack_callback_root(unpack_user* u) static inline int unpack_callback_uint16(unpack_user* u, uint16_t d, msgpack_unpack_object* o) { - PyObject *p = PyInt_FromLong((long)d); + PyObject *p = PyLong_FromLong((long)d); if (!p) return -1; *o = p; @@ -61,7 +61,7 @@ static inline int unpack_callback_uint8(unpack_user* u, uint8_t d, msgpack_unpac static inline int unpack_callback_uint32(unpack_user* u, uint32_t d, msgpack_unpack_object* o) { - PyObject *p = PyInt_FromSize_t((size_t)d); + PyObject *p = PyLong_FromSize_t((size_t)d); if (!p) return -1; *o = p; @@ -74,7 +74,7 @@ static inline int unpack_callback_uint64(unpack_user* u, uint64_t d, msgpack_unp if (d > LONG_MAX) { p = PyLong_FromUnsignedLongLong((unsigned PY_LONG_LONG)d); } else { - p = PyInt_FromLong((long)d); + p = PyLong_FromLong((long)d); } if (!p) return -1; @@ -84,7 +84,7 @@ static inline int unpack_callback_uint64(unpack_user* u, uint64_t d, msgpack_unp static inline int unpack_callback_int32(unpack_user* u, int32_t d, msgpack_unpack_object* o) { - PyObject *p = PyInt_FromLong(d); + PyObject *p = PyLong_FromLong(d); if (!p) return -1; *o = p; @@ -107,7 +107,7 @@ static inline int unpack_callback_int64(unpack_user* u, int64_t d, msgpack_unpac if (d > LONG_MAX || d < LONG_MIN) { p = PyLong_FromLongLong((PY_LONG_LONG)d); } else { - p = PyInt_FromLong((long)d); + p = PyLong_FromLong((long)d); } *o = p; return 0; From 4587393b1ae2c9ebbd5bc93005b8aea2c8050b27 Mon Sep 17 00:00:00 2001 From: Inada Naoki Date: Tue, 10 Sep 2024 01:58:00 +0900 Subject: [PATCH 345/349] release v1.1.0 (#622) --- ChangeLog.rst | 8 ++++++++ msgpack/__init__.py | 4 ++-- 2 files changed, 10 insertions(+), 2 deletions(-) diff --git a/ChangeLog.rst b/ChangeLog.rst index 47328ffb..863c6b2c 100644 --- a/ChangeLog.rst +++ b/ChangeLog.rst @@ -1,3 +1,11 @@ +1.1.0 +===== + +Release Date: 2024-09-10 + +* use ``PyLong_*`` instead of ``PyInt_*`` for compatibility with + future Cython. (#620) + 1.1.0rc2 ======== diff --git a/msgpack/__init__.py b/msgpack/__init__.py index a72e974c..b6151054 100644 --- a/msgpack/__init__.py +++ b/msgpack/__init__.py @@ -4,8 +4,8 @@ from .exceptions import * # noqa: F403 from .ext import ExtType, Timestamp -version = (1, 1, 0, "rc2") -__version__ = "1.1.0rc2" +version = (1, 1, 0) +__version__ = "1.1.0" if os.environ.get("MSGPACK_PUREPYTHON"): From 0eeabfb453844b441a4a77097b3d5aa0cb6645b6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Edgar=20Ram=C3=ADrez=20Mondrag=C3=B3n?= <16805946+edgarrmondragon@users.noreply.github.com> Date: Tue, 8 Oct 2024 03:04:56 -0600 Subject: [PATCH 346/349] Add Python 3.13 trove classifier (#626) --- pyproject.toml | 1 + 1 file changed, 1 insertion(+) diff --git a/pyproject.toml b/pyproject.toml index d041d4c0..e24f2b88 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -21,6 +21,7 @@ classifiers = [ "Programming Language :: Python :: 3.10", "Programming Language :: Python :: 3.11", "Programming Language :: Python :: 3.12", + "Programming Language :: Python :: 3.13", "Programming Language :: Python :: Implementation :: CPython", "Programming Language :: Python :: Implementation :: PyPy", "Intended Audience :: Developers", From 868aa2cd83f39237deb957c68ce7232422a5950b Mon Sep 17 00:00:00 2001 From: Inada Naoki Date: Sat, 31 May 2025 12:45:06 +0900 Subject: [PATCH 347/349] update Cython to 3.1.1 (#637) --- .github/workflows/test.yml | 9 +++++++-- pyproject.toml | 12 +++++++----- requirements.txt | 2 +- 3 files changed, 15 insertions(+), 8 deletions(-) diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 530238c9..23d221c8 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -10,7 +10,7 @@ jobs: strategy: matrix: os: ["ubuntu-latest", "windows-latest", "macos-latest"] - py: ["3.13-dev", "3.12", "3.11", "3.10", "3.9", "3.8"] + py: ["3.14-dev", "3.13", "3.12", "3.11", "3.10", "3.9", "3.8"] runs-on: ${{ matrix.os }} name: Run test with Python ${{ matrix.py }} on ${{ matrix.os }} @@ -26,10 +26,15 @@ jobs: allow-prereleases: true cache: "pip" - - name: Build + - name: Prepare shell: bash run: | + pip install -U pip pip install -r requirements.txt pytest + + - name: Build + shell: bash + run: | make cython pip install . diff --git a/pyproject.toml b/pyproject.toml index e24f2b88..b1628322 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,10 +1,13 @@ [build-system] -requires = ["setuptools >= 69.5.1"] +# 75.3.0 is the latest version supporting Python 3.8 +requires = ["setuptools >= 75.3.0"] build-backend = "setuptools.build_meta" [project] name = "msgpack" dynamic = ["version"] +# `license = "Apache-2.0"` is preferred. But keep old syntax for Python 3.8 compatibility. +# https://github.com/msgpack/msgpack-python/pull/637 license = {text="Apache 2.0"} authors = [{name="Inada Naoki", email="songofacandy@gmail.com"}] description = "MessagePack serializer" @@ -14,18 +17,17 @@ requires-python = ">=3.8" classifiers = [ "Development Status :: 5 - Production/Stable", "Operating System :: OS Independent", - "Programming Language :: Python", - "Programming Language :: Python :: 3", + "Topic :: File Formats", + "Intended Audience :: Developers", "Programming Language :: Python :: 3.8", "Programming Language :: Python :: 3.9", "Programming Language :: Python :: 3.10", "Programming Language :: Python :: 3.11", "Programming Language :: Python :: 3.12", "Programming Language :: Python :: 3.13", + "Programming Language :: Python :: 3.14", "Programming Language :: Python :: Implementation :: CPython", "Programming Language :: Python :: Implementation :: PyPy", - "Intended Audience :: Developers", - "License :: OSI Approved :: Apache Software License", ] [project.urls] diff --git a/requirements.txt b/requirements.txt index b677f068..78a2f38f 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1 +1 @@ -Cython~=3.0.11 +Cython~=3.1.1 From cdc764450370ff80e7c83edbe8d015f08f6fb9b3 Mon Sep 17 00:00:00 2001 From: Inada Naoki Date: Sun, 1 Jun 2025 16:56:44 +0900 Subject: [PATCH 348/349] update cibuildwheel to v2.23.3 (#638) --- .github/workflows/wheel.yml | 13 +++---------- 1 file changed, 3 insertions(+), 10 deletions(-) diff --git a/.github/workflows/wheel.yml b/.github/workflows/wheel.yml index 50157223..d97de1dd 100644 --- a/.github/workflows/wheel.yml +++ b/.github/workflows/wheel.yml @@ -9,17 +9,12 @@ jobs: build_wheels: strategy: matrix: - os: ["ubuntu-latest", "windows-latest", "macos-latest"] + # macos-13 is for intel + os: ["ubuntu-24.04", "ubuntu-24.04-arm", "windows-latest", "macos-13", "macos-latest"] runs-on: ${{ matrix.os }} name: Build wheels on ${{ matrix.os }} steps: - - name: Set up QEMU - if: runner.os == 'Linux' - uses: docker/setup-qemu-action@v3 - with: - platforms: all - - uses: actions/checkout@v4 - uses: actions/setup-python@v5 with: @@ -32,12 +27,10 @@ jobs: make cython - name: Build - uses: pypa/cibuildwheel@v2.20.0 + uses: pypa/cibuildwheel@v2.23.3 env: CIBW_TEST_REQUIRES: "pytest" CIBW_TEST_COMMAND: "pytest {package}/test" - CIBW_ARCHS_LINUX: auto aarch64 - CIBW_ARCHS_MACOS: x86_64 universal2 arm64 CIBW_SKIP: "pp* cp38-macosx_*" - name: Build sdist From fe9e620a607702b31476f092ad01a387cff4cfbd Mon Sep 17 00:00:00 2001 From: Inada Naoki Date: Mon, 2 Jun 2025 14:46:53 +0900 Subject: [PATCH 349/349] upload to PyPI on create a release (#639) --- .github/workflows/wheel.yml | 48 ++++++++++++++++++++++++++++++++++--- 1 file changed, 45 insertions(+), 3 deletions(-) diff --git a/.github/workflows/wheel.yml b/.github/workflows/wheel.yml index d97de1dd..686d7dd0 100644 --- a/.github/workflows/wheel.yml +++ b/.github/workflows/wheel.yml @@ -1,8 +1,10 @@ -name: Build Wheels +name: Build sdist and Wheels on: push: branches: [main] - create: + release: + types: + - published workflow_dispatch: jobs: @@ -34,7 +36,7 @@ jobs: CIBW_SKIP: "pp* cp38-macosx_*" - name: Build sdist - if: runner.os == 'Linux' + if: runner.os == 'Linux' && runner.arch == 'X64' run: | pip install build python -m build -s -o wheelhouse @@ -44,3 +46,43 @@ jobs: with: name: wheels-${{ matrix.os }} path: wheelhouse + + # combine all wheels into one artifact + combine_wheels: + needs: [build_wheels] + runs-on: ubuntu-latest + steps: + - uses: actions/download-artifact@v4 + with: + # unpacks all CIBW artifacts into dist/ + pattern: wheels-* + path: dist + merge-multiple: true + + - name: Upload Wheels to artifact + uses: actions/upload-artifact@v4 + with: + name: wheels-all + path: dist + + # https://github.com/pypa/cibuildwheel/blob/main/examples/github-deploy.yml + upload_pypi: + needs: [build_wheels] + runs-on: ubuntu-latest + environment: pypi + permissions: + id-token: write + if: github.event_name == 'release' && github.event.action == 'published' + # or, alternatively, upload to PyPI on every tag starting with 'v' (remove on: release above to use this) + # if: github.event_name == 'push' && startsWith(github.ref, 'refs/tags/v') + steps: + - uses: actions/download-artifact@v4 + with: + # unpacks all CIBW artifacts into dist/ + pattern: wheels-* + path: dist + merge-multiple: true + + - uses: pypa/gh-action-pypi-publish@release/v1 + #with: + # To test: repository-url: https://test.pypi.org/legacy/