8000 Merge pull request #158 from methane/feature/strict-typecheck · sugarguo/msgpack-python@8036cb4 · GitHub
[go: up one dir, main page]

Skip to content

Commit 8036cb4

Browse files
committed
Merge pull request msgpack#158 from methane/feature/strict-typecheck
Packer: check type strictly
2 parents c851389 + a779b79 commit 8036cb4

File tree

3 files changed

+69
-21
lines changed

3 files changed

+69
-21
lines changed

msgpack/_packer.pyx

Lines changed: 21 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -63,13 +63,21 @@ cdef class Packer(object):
6363
:param bool use_bin_type:
6464
Use bin type introduced in msgpack spec 2.0 for bytes.
6565
It also enable str8 type for unicode.
66+
:param bool strict_types:
67+
If set to true, types will be checked to be exact. Derived classes
68+
from serializeable types will not be serialized and will be
69+
treated as unsupported type and forwarded to default.
70+
Additionally tuples will not be serialized as lists.
71+
This is useful when trying to implement accurate serialization
72+
for python types.
6673
"""
6774
cdef msgpack_packer pk
6875
cdef object _default
6976
cdef object _bencoding
7077
cdef object _berrors
7178
cdef char *encoding
7279
cdef char *unicode_errors
80+
cdef bint strict_types
7381
cdef bool use_float
7482
cdef bint autoreset
7583

@@ -82,10 +90,12 @@ cdef class Packer(object):
8290
self.pk.length = 0
8391

8492
def __init__(self, default=None, encoding='utf-8', unicode_errors='strict',
85-
use_single_float=False, bint autoreset=1, bint use_bin_type=0):
93+
use_single_float=False, bint autoreset=1, bint use_bin_type=0,
94+
bint strict_types=0):
8695
"""
8796
"""
8897
self.use_float = use_single_float
98+
self.strict_types = strict_types
8999
self.autoreset = autoreset
90100
self.pk.use_bin_type = use_bin_type
91101
if default is not None:
@@ -121,19 +131,20 @@ cdef class Packer(object):
121131
cdef dict d
122132
cdef size_t L
123133
cdef int default_used = 0
134+
cdef bint strict_types = self.strict_types
124135

125136
if nest_limit < 0:
126137
raise PackValueError("recursion limit exceeded.")
127138

128139
while True:
129140
if o is None:
130141
ret = msgpack_pack_nil(&self.pk)
131-
elif isinstance(o, bool):
142+
elif PyBool_Check(o) if strict_types else isinstance(o, bool):
132143
if o:
133144
ret = msgpack_pack_true(&self.pk)
134145
else:
135146
ret = msgpack_pack_false(&self.pk)
136-
elif PyLong_Check(o):
147+
elif PyLong_CheckExact(o) if strict_types else PyLong_Check(o):
137148
# PyInt_Check(long) is True for Python 3.
138149
# So we should test long before int.
139150
try:
@@ -150,25 +161,25 @@ cdef class Packer(object):
150161
continue
151162
else:
152163
raise
153-
elif PyInt_Check(o):
164+
elif PyInt_CheckExact(o) if strict_types else PyInt_Check(o):
154165
longval = o
155166
ret = msgpack_pack_long(&self.pk, longval)
156-
elif PyFloat_Check(o):
167+
elif PyFloat_CheckExact(o) if strict_types else PyFloat_Check(o):
157168
if self.use_float:
158169
fval = o
159170
ret = msgpack_pack_float(&self.pk, fval)
160171
else:
161172
dval = o
162173
ret = msgpack_pack_double(&self.pk, dval)
163-
elif PyBytes_Check(o):
174+
elif PyBytes_CheckExact(o) if strict_types else PyBytes_Check(o):
164175
L = len(o)
165176
if L > (2**32)-1:
166177
raise ValueError("bytes is too large")
167178
rawval = o
168179
ret = msgpack_pack_bin(&self.pk, L)
169180
if ret == 0:
170181
ret = msgpack_pack_raw_body(&self.pk, rawval, L)
171-
elif PyUnicode_Check(o):
182+
elif PyUnicode_CheckExact(o) if strict_types else PyUnicode_Check(o):
172183
if not self.encoding:
173184
raise TypeError("Can't encode unicode string: no encoding is specified")
174185
o = PyUnicode_AsEncodedString(o, self.encoding, self.unicode_errors)
@@ -191,7 +202,7 @@ cdef class Packer(object):
191202
if ret != 0: break
192203
ret = self._pack(v, nest_limit-1)
193204
if ret != 0: break
194-
elif PyDict_Check(o):
205+
elif not strict_types and PyDict_Check(o):
195206
L = len(o)
196207
if L > (2**32)-1:
197208
raise ValueError("dict is too large")
@@ -202,7 +213,7 @@ cdef class Packer(object):
202213
if ret != 0: break
203214
ret = self._pack(v, nest_limit-1)
204215
if ret != 0: break
205-
elif isinstance(o, ExtType):
216+
elif type(o) is ExtType if strict_types else isinstance(o, ExtType):
206217
# This should be before Tuple because ExtType is namedtuple.
207218
longval = o.code
208219
rawval = o.data
@@ -211,7 +222,7 @@ cdef class Packer(object):
211222
raise ValueError("EXT data is too large")
212223
ret = msgpack_pack_ext(&self.pk, longval, L)
213224
ret = msgpack_pack_raw_body(&self.pk, rawval, L)
214-
elif PyTuple_Check(o) or PyList_Check(o):
225+
elif PyList_CheckExact(o) if strict_types else (PyTuple_Check(o) or PyList_Check(o)):
215226
L = len(o)
216227
if L > (2**32)-1:
217228
raise ValueError("list is too large")

msgpack/fallback.py

Lines changed: 33 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -69,6 +69,13 @@ def getvalue(self):
6969
DEFAULT_RECURSE_LIMIT = 511
7070

7171

72+
def _check_type_strict(obj, t, type=type, tuple=tuple):
73+
if type(t) is tuple:
74+
return type(obj) in t
75+
else:
76+
return type(obj) is t
77+
78+
7279
def unpack(stream, **kwargs):
7380
"""
7481
Unpack an object from `stream`.
@@ -609,9 +616,18 @@ class Packer(object):
609616
:param bool use_bin_type:
610617
Use bin type introduced in msgpack spec 2.0 for bytes.
611618
It also enable str8 type for unicode.
619+
:param bool strict_types:
620+
If set to true, types will be checked to be exact. Derived classes
621+
from serializeable types will not be serialized and will be
622+
treated as unsupported type and forwarded to default.
623+
Additionally tuples will not be serialized as lists.
624+
This is useful when trying to implement accurate serialization
625+
for python types.
612626
"""
613627
def __init__(self, default=None, encoding='utf-8', unicode_errors='strict',
614-
use_single_float=False, autoreset=True, use_bin_type=False):
628+
use_single_float=False, autoreset=True, use_bin_type=False,
629+
strict_types=False):
630+
self._strict_types = strict_types
615631
self._use_float = use_single_float
616632
self._autoreset = autoreset
617633
self._use_bin_type = use_bin_type
@@ -623,18 +639,24 @@ def __init__(self, default=None, encoding='utf-8', unicode_errors='strict',
623639
raise TypeError("default must be callable")
624640
self._default = default
625641

626-
def _pack(self, obj, nest_limit=DEFAULT_RECURSE_LIMIT, isinstance=isinstance):
642+
def _pack(self, obj, nest_limit=DEFAULT_RECURSE_LIMIT,
643+
check=isinstance, check_type_strict=_check_type_strict):
627644
default_used = False
645+
if self._strict_types:
646+
check = check_type_strict
647+
list_types = list
648+
else:
649+
list_types = (list, tuple)
628650
while True:
629651
if nest_limit < 0:
630652
raise PackValueError("recursion limit exceeded")
631653
if obj is None:
632654
return self._buffer.write(b"\xc0")
633-
if isinstance(obj, bool):
655+
if check(obj, bool):
634656
if obj:
635657
return self._buffer.write(b"\xc3")
636658
return self._buffer.write(b"\xc2")
637-
if isinstance(obj, int_types):
659+
if check(obj, int_types):
638660
if 0 <= obj < 0x80:
639661
return self._buffer.write(struct.pack("B", obj))
640662
if -0x20 <= obj < 0:
@@ -660,7 +682,7 @@ def _pack(self, obj, nest_limit=DEFAULT_RECURSE_LIMIT, isinstance=isinstance):
660682
default_used = True
661683
continue
662684
raise PackValueError("Integer value out of range")
663-
if self._use_bin_type and isinstance(obj, bytes):
685+
if self._use_bin_type and check(obj, bytes):
664686
n = len(obj)
665687
if n <= 0xff:
666688
self._buffer.write(struct.pack('>BB', 0xc4, n))
@@ -671,8 +693,8 @@ def _pack(self, obj, nest_limit=DEFAULT_RECURSE_LIMIT, isinstance=isinstance):
671693
else:
672694
raise PackValueError("Bytes is too large")
673695
return self._buffer.write(obj)
674-
if isinstance(obj, (Unicode, bytes)):
675-
if isinstance(obj, Unicode):
696+
if check(obj, (Unicode, bytes)):
697+
if check(obj, Unicode):
676698
if self._encoding is None:
677699
raise TypeError(
678700
"Can't encode unicode string: "
@@ -690,11 +712,11 @@ def _pack(self, obj, nest_limit=DEFAULT_RECURSE_LIMIT, isinstance=isinstance):
690712
else:
691713
raise PackValueError("String is too large")
692714
return self._buffer.write(obj)
693-
if isinstance(obj, float):
715+
if check(obj, float):
694716
if self._use_float:
695717
return self._buffer.write(struct.pack(">Bf", 0xca, obj))
696718
return self._buffer.write(struct.pack(">Bd", 0xcb, obj))
697-
if isinstance(obj, ExtType):
719+
if check(obj, ExtType):
698720
code = obj.code
699721
data = obj.data
700722
assert isinstance(code, int)
@@ -719,13 +741,13 @@ def _pack(self, obj, nest_limit=DEFAULT_RECURSE_LIMIT, isinstance=isinstance):
719741
self._buffer.write(struct.pack("b", code))
720742
self._buffer.write(data)
721743
return
722-
if isinstance(obj, (list, tuple)):
744+
if check(obj, list_types):
723745
n = len(obj)
724746
self._fb_pack_array_header(n)
725747
for i in xrange(n):
726748
self._pack(obj[i], nest_limit - 1)
727749
return
728-
if isinstance(obj, dict):
750+
if check(obj, dict):
729751
return self._fb_pack_map_pairs(len(obj), dict_iteritems(obj),
730752
nest_limit - 1)
731753
if not default_used and self._default is not None:

test/test_stricttype.py

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,15 @@
1+
# coding: utf-8
2+
3+
from collections import namedtuple
4+
from msgpack import packb, unpackb
5+
6+
7+
def test_namedtuple():
8+
T = namedtuple('T', "foo bar")
9+
def default(o):
10+
if isinstance(o, T):
11+
return dict(o._asdict())
12+
raise TypeError('Unsupported type %s' % (type(o),))
13+
packed = packb(T(1, 42), strict_types=True, use_bin_type=True, default=default)
14+
unpacked = unpackb(packed, encoding='utf-8')
15+
assert unpacked == {'foo': 1, 'bar': 42}

0 commit comments

Comments
 (0)
0