8000 Merge pull request #79 from msgpack/newspec · liufeigit/msgpack-python@e802abe · GitHub
[go: up one dir, main page]

Skip to content

Commit e802abe

Browse files
committed
Merge pull request msgpack#79 from msgpack/newspec
[WIP] Newspec stage 2.
2 parents ec0691f + d84a403 commit e802abe

14 files changed

+530
-207
lines changed

README.rst

Lines changed: 60 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -3,8 +3,8 @@ MessagePack for Python
33
=======================
44

55
:author: INADA Naoki
6-
:version: 0.3.0
7-
:date: 2012-12-07
6+
:version: 0.4.0
7+
:date: 2013-10-21
88

99
.. image:: https://secure.travis-ci.org/msgpack/msgpack-python.png
1010
:target: https://travis-ci.org/#!/msgpack/msgpack-python
@@ -39,8 +39,40 @@ amd64. Windows SDK is recommanded way to build amd64 msgpack without any fee.)
3939

4040
Without extension, using pure python implementation on CPython runs slowly.
4141

42+
Notes
43+
-----
44+
45+
Note for msgpack 2.0 support
46+
^^^^^^^^^^^^^^^^^^^^^^^^^^^^
47+
48+
msgpack 2.0 adds two types: *bin* and *ext*.
49+
50+
*raw* was bytes or string type like Python 2's ``str``.
51+
To distinguish string and bytes, msgpack 2.0 adds *bin*.
52+
It is non-string binary like Python 3's ``bytes``.
53+
54+
To use *bin* type for packing ``bytes``, pass ``use_bin_type=True`` to
55+
packer argument.
56+
57+
>>> import msgpack
58+
>>> packed = msgpack.packb([b'spam', u'egg'], use_bin_type=True)
59+
>>> msgpack.unpackb(packed, encoding='utf-8')
60+
['spam', u'egg']
61+
62+
You shoud use it carefully. When you use ``use_bin_type=True``, packed
63+
binary can be unpacked by unpackers supporting msgpack-2.0.
64+
65+
To use *ext* type, pass ``msgpack.ExtType`` object to packer.
66+
67+
>>> import msgpack
68+
>>> packed = msgpack.packb(msgpack.ExtType(42, b'xyzzy'))
69+
>>> msgpack.unpackb(packed)
70+
ExtType(code=42, data='xyzzy')
71+
72+
You can use it with ``default`` and ``ext_hook``. See below.
73+
4274
Note for msgpack 0.2.x users
43-
----------------------------
75+
^^^^^^^^^^^^^^^^^^^^^^^^^^^^
4476

4577
The msgpack 0.3 have some incompatible changes.
4678

@@ -140,6 +172,31 @@ It is also possible to pack/unpack custom data types. Here is an example for
140172
``object_pairs_hook`` callback may instead be used to receive a list of
141173
key-value pairs.
142174

175+
Extended types
176+
^^^^^^^^^^^^^^^
177+
178+
It is also possible to pack/unpack custom data types using the msgpack 2.0 feature.
179+
180+
>>> import msgpack
181+
>>> import array
182+
>>> def default(obj):
183+
... if isinstance(obj, array.array) and obj.typecode == 'd':
184+
... return msgpack.ExtType(42, obj.tostring())
185+
... raise TypeError("Unknown type: %r" % (obj,))
186+
...
187+
>>> def ext_hook(code, data):
188+
... if code == 42:
189+
... a = array.array('d')
190+
... a.fromstring(data)
191+
... return a
192+
... return ExtType(code, data)
193+
...
194+
>>> data = array.array('d', [1.2, 3.4])
195+
>>> packed = msgpack.packb(data, default=default)
196+
>>> unpacked = msgpack.unpackb(packed, ext_hook=ext_hook)
197+
>>> data == unpacked
198+
True
199+
143200

144201
Advanced unpacking control
145202
^^^^^^^^^^^^^^^^^^^^^^^^^^

msgpack/__init__.py

Lines changed: 12 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,17 @@
44

55
from collections import namedtuple
66

7-
ExtType = namedtuple('ExtType', 'code data')
7+
8+
class ExtType(namedtuple('ExtType', 'code data')):
9+
def __new__(cls, code, data):
10+
if not isinstance(code, int):
11+
raise TypeError("code must be int")
12+
if not isinstance(data, bytes):
13+
raise TypeError("data must be bytes")
14+
if not 0 <= code <= 127:
15+
raise ValueError("code must be 0~127")
16+
return super(ExtType, cls).__new__(cls, code, data)
17+
818

919
import os
1020
if os.environ.get('MSGPACK_PUREPYTHON'):
@@ -26,6 +36,7 @@ def pack(o, stream, **kwargs):
2636
packer = Packer(**kwargs)
2737
stream.write(packer.pack(o))
2838

39+
2940
def packb(o, **kwargs):
3041
"""
3142
Pack object `o` and return packed bytes
@@ -40,4 +51,3 @@ def packb(o, **kwargs):
4051

4152
dump = pack
4253
dumps = packb
43-

msgpack/_packer.pyx

Lines changed: 84 additions & 67 deletions
Original file line numberDiff line numberDiff line change
@@ -5,8 +5,11 @@ from cpython cimport *
55
from libc.stdlib cimport *
66
from libc.string cimport *
77
from libc.limits cimport *
8+
from libc.stdint cimport int8_t
89

910
from msgpack.exceptions import PackValueError
11+
from msgpack import ExtType
12+
1013

1114
cdef extern from "pack.h":
1215
struct msgpack_packer:
@@ -29,11 +32,11 @@ cdef extern from "pack.h":
2932
int msgpack_pack_raw(msgpack_packer* pk, size_t l)
3033
int msgpack_pack_bin(msgpack_packer* pk, size_t l)
3134
int msgpack_pack_raw_body(msgpack_packer* pk, char* body, size_t l)
35+
int msgpack_pack_ext(msgpack_packer* pk, int8_t typecode, size_t l)
3236

3337
cdef int DEFAULT_RECURSE_LIMIT=511
3438

3539

36-
3740
cdef class Packer(object):
3841
"""
3942
MessagePack Packer
@@ -118,77 +121,87 @@ cdef class Packer(object):
118121
cdef int ret
119122
cdef dict d
120123
cdef size_t L
124+
cdef int default_used = 0
121125

122126
if nest_limit < 0:
123127
raise PackValueError("recursion limit exceeded.")
124128

125-
if o is None:
126-
ret = msgpack_pack_nil(&self.pk)
127-
elif isinstance(o, bool):
128-
if o:
129-
ret = msgpack_pack_true(&self.pk)
130-
else:
131-
ret = msgpack_pack_false(&self.pk)
132-
elif PyLong_Check(o):
133-
if o > 0:
134-
ullval = o
135-
ret = msgpack_pack_unsigned_long_long(&self.pk, ullval)
136-
else:
137-
llval = o
138-
ret = msgpack_pack_long_long(&self.pk, llval)
139-
elif PyInt_Check(o):
140-
longval = o
141-
ret = msgpack_pack_long(&self.pk, longval)
142-
elif PyFloat_Check(o):
143-
if self.use_float:
144-
fval = o
145-
ret = msgpack_pack_float(&self.pk, fval)
146-
else:
147-
dval = o
148-
ret = msgpack_pack_double(&self.pk, dval)
149-
elif PyBytes_Check(o):
150-
rawval = o
151-
L = len(o)
152-
ret = msgpack_pack_bin(&self.pk, L)
153-
if ret == 0:
129+
while True:
130+
if o is None:
131+
ret = msgpack_pack_nil(&self.pk)
132+
elif isinstance(o, bool):
133+
if o:
134+
ret = msgpack_pack_true(&self.pk)
135+
else:
136+
ret = msgpack_pack_false(&self.pk)
137+
elif PyLong_Check(o):
138+
if o > 0:
139+
ullval = o
140+
ret = msgpack_pack_unsigned_long_long(&self.pk, ullval)
141+
else:
142+
llval = o
143+
ret = msgpack_pack_long_long(&self.pk, llval)
144+
elif PyInt_Check(o):
145+
longval = o
146+
ret = msgpack_pack_long(&self.pk, longval)
147+
elif PyFloat_Check(o):
148+
if self.use_float:
149+
fval = o
150+
ret = msgpack_pack_float(&self.pk, fval)
151+
else:
152+
dval = o
153+
ret = msgpack_pack_double(&self.pk, dval)
154+
elif PyBytes_Check(o):
155+
rawval = o
156+
L = len(o)
157+
ret = msgpack_pack_bin(&self.pk, L)
158+
if ret == 0:
159+
ret = msgpack_pack_raw_body(&self.pk, rawval, L)
160+
elif PyUnicode_Check(o):
161+
if not self.encoding:
162+
raise TypeError("Can't encode unicode string: no encoding is specified")
163+
o = PyUnicode_AsEncodedString(o, self.encoding, self.unicode_errors)
164+
rawval = o
165+
ret = msgpack_pack_raw(&self.pk, len(o))
166+
if ret == 0:
167+
ret = msgpack_pack_raw_body(&self.pk, rawval, len(o))
168+
elif PyDict_CheckExact(o):
169+
d = <dict>o
170+
ret = msgpack_pack_map(&self.pk, len(d))
171+
if ret == 0:
172+
for k, v in d.iteritems():
173+
ret = self._pack(k, nest_limit-1)
174+
if ret != 0: break
175+
ret = self._pack(v, nest_limit-1)
176+
if ret != 0: break
177+
elif PyDict_Check(o):
178+
ret = msgpack_pack_map(&self.pk, len(o))
179+
if ret == 0:
180+
for k, v in o.items():
181+
ret = self._pack(k, nest_limit-1)
182+
if ret != 0: break
183+
ret = self._pack(v, nest_limit-1)
184+
if ret != 0: break
185+
elif isinstance(o, ExtType):
186+
# This should be before Tuple because ExtType is namedtuple.
187+
longval = o.code
188+
rawval = o.data
189+
L = len(o.data)
190+
ret = msgpack_pack_ext(&self.pk, longval, L)
154191
ret = msgpack_pack_raw_body(&self.pk, rawval, L)
155-
elif PyUnicode_Check(o):
156-
if not self.encoding:
157-
raise TypeError("Can't encode unicode string: no encoding is specified")
158-
o = PyUnicode_AsEncodedString(o, self.encoding, self.unicode_errors)
159-
rawval = o
160-
ret = msgpack_pack_raw(&self.pk, len(o))
161-
if ret == 0:
162-
ret = msgpack_pack_raw_body(&self.pk, rawval, len(o))
163-
elif PyDict_CheckExact(o):
164-
d = <dict>o
165-
ret = msgpack_pack_map(&self.pk, len(d))
166-
if ret == 0:
167-
for k, v in d.iteritems():
168-
ret = self._pack(k, nest_limit-1)
169-
if ret != 0: break
170-
ret = self._pack(v, nest_limit-1)
171-
if ret != 0: break
172-
elif PyDict_Check(o):
173-
ret = msgpack_pack_map(&self.pk, len(o))
174-
if ret == 0:
175-
for k, v in o.items():
176-
ret = self._pack(k, nest_limit-1)
177-
if ret != 0: break
178-
ret = self._pack(v, nest_limit-1)
179-
if ret != 0: break
180-
elif PyTuple_Check(o) or PyList_Check(o):
181-
ret = msgpack_pack_array(&self.pk, len(o))
182-
if ret == 0:
183-
for v in o:
184-
ret = self._pack(v, nest_limit-1)
185-
if ret != 0: break
186-
elif self._default:
187-
o = self._default(o)
188-
ret = self._pack(o, nest_limit-1)
189-
else:
190-
raise TypeError("can't serialize %r" % (o,))
191-
return ret
192+
elif PyTuple_Check(o) or PyList_Check(o):
193+
ret = msgpack_pack_array(&self.pk, len(o))
194+
if ret == 0:
195+
for v in o:
196+
ret = self._pack(v, nest_limit-1)
197+
if ret != 0: break
198+
elif not default_used and self._default:
199+
o = self._default(o)
200+
default_used = 1
201+
continue
202+
else:
203+
raise TypeError("can't serialize %r" % (o,))
204+
return ret
192205

193206
cpdef pack(self, object obj):
194207
cdef int ret
@@ -202,6 +215,10 @@ cdef class Packer(object):
202215
self.pk.length = 0
203216
return buf
204217

218+
def pack_ext_type(self, typecode, data):
219+
msgpack_pack_ext(&self.pk, typecode, len(data))
220+
msgpack_pack_raw_body(&self.pk, data, len(data))
221+
205222
def pack_array_header(self, size_t size):
206223
cdef int ret = msgpack_pack_array(&self.pk, size)
207224
if ret == -1:

msgpack/_unpacker.pyx

Lines changed: 18 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@ from msgpack.exceptions import (
1616
UnpackValueError,
1717
ExtraData,
1818
)
19+
from msgpack import ExtType
1920

2021

2122
cdef extern from "unpack.h":
@@ -24,15 +25,14 @@ cdef extern from "unpack.h":
2425
PyObject* object_hook
2526
bint has_pairs_hook # call object_hook with k-v pairs
2627
PyObject* list_hook
28+
PyObject* ext_hook
2729
char *encoding
2830
char *unicode_errors
2931

3032
ctypedef struct unpack_context:
3133
msgpack_user user
3234
PyObject* obj
3335
size_t count
34-
unsigned int ct
35-
PyObject* key
3636

3737
ctypedef int (*execute_fn)(unpack_context* ctx, const char* data,
< 10000 /td>
3838
size_t len, size_t* off) except? -1
@@ -44,7 +44,8 @@ cdef extern from "unpack.h":
4444
object unpack_data(unpack_context* ctx)
4545

4646
cdef inline init_ctx(unpack_context *ctx,
47-
object object_hook, object object_pairs_hook, object list_hook,
47+
object object_hook, object object_pairs_hook,
48+
object list_hook, object ext_hook,
4849
bint use_list, char* encoding, char* unicode_errors):
4950
unpack_init(ctx)
5051
ctx.user.use_list = use_list
@@ -71,13 +72,20 @@ cdef inline init_ctx(unpack_context *ctx,
7172
raise TypeError("list_hook must be a callable.")
7273
ctx.user.list_hook = <PyObject*>list_hook
7374

75+
if ext_hook is not None:
76+
if not PyCallable_Check(ext_hook):
77+
raise TypeError("ext_hook must be a callable.")
78+
ctx.user.ext_hook = <PyObject*>ext_hook
79+
7480
ctx.user.encoding = encoding
7581
ctx.user.unicode_errors = unicode_errors
7682

83+
def default_read_extended_type(typecode, data):
84+
raise NotImplementedError("Cannot decode extended type with typecode=%d" % typecode)
85+
7786
def unpackb(object packed, object object_hook=None, object list_hook=None,
7887
bint use_list=1, encoding=None, unicode_errors="strict",
79-
object_pairs_hook=None,
80-
):
88+
object_pairs_hook=None, ext_hook=ExtType):
8189
"""
8290
Unpack packed_bytes to object. Returns an unpacked object.
8391
@@ -106,7 +114,8 @@ def unpackb(object packed, object object_hook=None, object list_hook=None,
106114
unicode_errors = unicode_errors.encode('ascii')
107115
cerr = PyBytes_AsString(unicode_errors)
108116

109-
init_ctx(&ctx, object_hook, object_pairs_hook, list_hook, use_list, cenc, cerr)
117+
init_ctx(&ctx, object_hook, object_pairs_hook, list_hook, ext_hook,
118+
use_list, cenc, cerr)
110119
ret = unpack_construct(&ctx, buf, buf_len, &off)
111120
if ret == 1:
112121
obj = unpack_data(&ctx)
@@ -211,7 +220,7 @@ cdef class Unpacker(object):
211220
def __init__(self, file_like=None, Py_ssize_t read_size=0, bint use_list=1,
212221
object object_hook=None, object object_pairs_hook=None, object list_hook=None,
213222
str encoding=None, str unicode_errors='strict', int max_buffer_size=0,
214-
):
223+
object ext_hook=ExtType):
215224
cdef char *cenc=NULL, *cerr=NULL
216225

217226
self.file_like = file_like
@@ -248,7 +257,8 @@ cdef class Unpacker(object):
248257
self.unicode_errors = unicode_errors
249258
cerr = PyBytes_AsString(self.unicode_errors)
250259

251-
init_ctx(&self.ctx, object_hook, object_pairs_hook, list_hook, use_list, cenc, cerr)
260+
init_ctx(&self.ctx, object_hook, object_pairs_hook, list_hook,
261+
ext_hook, use_list, cenc, cerr)
252262

253263
def feed(self, object next_bytes):
254264
"""Append `next_bytes` to internal buffer."""

0 commit comments

Comments
 (0)
0