8000 Use cython's cast for converting encoding and errors (#279) · loude/msgpack-python@2644cbd · GitHub
[go: up one dir, main page]

Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Appearance settings

Commit 2644cbd

Browse files
authored
Use cython's cast for converting encoding and errors (msgpack#279)
It is little faster on Python 3 because we can skip temporary bytes object
1 parent 3510239 commit 2644cbd

File tree

2 files changed

+32
-53
lines changed

2 files changed

+32
-53
lines changed

msgpack/_packer.pyx

Lines changed: 18 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,8 @@
11
# coding: utf-8
2-
#cython: embedsignature=True
2+
#cython: embedsignature=True, c_string_encoding=ascii
33

44
from cpython cimport *
5+
from cpython.version cimport PY_MAJOR_VERSION
56
from cpython.exc cimport PyErr_WarnEx
67

78
from msgpack.exceptions import PackValueError, PackOverflowError
@@ -99,8 +100,8 @@ cdef class Packer(object):
99100
cdef object _default
100101
cdef object _bencoding
101102
cdef object _berrors
102-
cdef char *encoding
103-
cdef char *unicode_errors
103+
cdef const char *encoding
104+
cdef const char *unicode_errors
104105
cdef bint strict_types
105106
cdef bool use_float
106107
cdef bint autoreset
@@ -126,26 +127,21 @@ cdef class Packer(object):
126127
if not PyCallable_Check(default):
127128
raise TypeError("default must be a callable.")
128129
self._default = default
129-
if encoding is None and unicode_errors is None:
130-
self.encoding = NULL
131-
self.unicode_errors = NULL
132-
else:
133-
if encoding is None:
130+
131+
self._bencoding = encoding
132+
if encoding is None:
133+
if PY_MAJOR_VERSION < 3:
134134
self.encoding = 'utf-8'
135135
else:
136-
if isinstance(encoding, unicode):
137-
self._bencoding = encoding.encode('ascii')
138-
else:
139-
self._bencoding = encoding
140-
self.encoding = PyBytes_AsString(self._bencoding)
141-
if unicode_errors is None:
142-
self.unicode_errors = 'strict'
143-
else:
144-
if isinstance(unicode_errors, unicode):
145-
self._berrors = unicode_errors.encode('ascii')
146-
else:
147-
self._berrors = unicode_errors
148-
self.unicode_errors = PyBytes_AsString(self._berrors)
136+
self.encoding = NULL
137+
else:
138+
self.encoding = self._bencoding
139+
140+
self._berrors = unicode_errors
141+
if unicode_errors is None:
142+
self.unicode_errors = NULL
143+
else:
144+
self.unicode_errors = self._berrors
149145

150146
def __dealloc__(self):
151147
PyMem_Free(self.pk.buf)
@@ -212,7 +208,7 @@ cdef class Packer(object):
212208
if ret == 0:
213209
ret = msgpack_pack_raw_body(&self.pk, rawval, L)
214210
elif PyUnicode_CheckExact(o) if strict_types else PyUnicode_Check(o):
215-
if self.encoding == NULL:
211+
if self.encoding == NULL and self.unicode_errors == NULL:
216212
ret = msgpack_pack_unicode(&self.pk, o, ITEM_LIMIT);
217213
if ret == -2:
218214
raise PackValueError("unicode string is too large")

msgpack/_unpacker.pyx

Lines changed: 14 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
# coding: utf-8
2-
#cython: embedsignature=True
2+
#cython: embedsignature=True, c_string_encoding=ascii
33

4+
from cpython.version cimport PY_MAJOR_VERSION
45
from cpython.bytes cimport (
56
PyBytes_AsString,
67
PyBytes_FromStringAndSize,
@@ -75,7 +76,7 @@ cdef inline init_ctx(unpack_context *ctx,
7576
object object_hook, object object_pairs_hook,
7677
object list_hook, object ext_hook,
7778
bint use_list, bint raw,
78-
char* encoding, char* unicode_errors,
79+
const char* encoding, const char* unicode_errors,
7980
Py_ssize_t max_str_len, Py_ssize_t max_bin_len,
8081
Py_ssize_t max_array_len, Py_ssize_t max_map_len,
8182
Py_ssize_t max_ext_len):
@@ -180,24 +181,16 @@ def unpackb(object packed, object object_hook=None, object list_hook=None,
180181
cdef Py_buffer view
181182
cdef char* buf = NULL
182183
cdef Py_ssize_t buf_len
183-
cdef char* cenc = NULL
184-
cdef char* cerr = NULL
184+
cdef const char* cenc = NULL
185+
cdef const char* cerr = NULL
185186
cdef int new_protocol = 0
186187

187188
if encoding is not None:
188189
PyErr_WarnEx(PendingDeprecationWarning, "encoding is deprecated, Use raw=False instead.", 1)
189-
if isinstance(encoding, unicode):
190-
encoding = encoding.encode('ascii')
191-
elif not isinstance(encoding, bytes):
192-
raise TypeError("encoding should be bytes or unicode")
193-
cenc = PyBytes_AsString(encoding)
190+
cenc = encoding
194191

195192
if unicode_errors is not None:
196-
if isinstance(unicode_errors, unicode):
197-
unicode_errors = unicode_errors.encode('ascii')
198-
elif not isinstance(unicode_errors, bytes):
199-
raise TypeError("unicode_errors should be bytes or unicode")
200-
cerr = PyBytes_AsString(unicode_errors)
193+
cerr = unicode_errors
201194

202195
get_data_from_buffer(packed, &view, &buf, &buf_len, &new_protocol)
203196
try:
@@ -219,7 +212,7 @@ def unpackb(object packed, object object_hook=None, object list_hook=None,
219212

220213

221214
def unpack(object stream, object object_hook=None, object list_hook=None,
222-
bint use_list=1, encoding=None, unicode_errors="strict",
215+
bint use_list=1, encoding=None, unicode_errors=None,
223216
object_pairs_hook=None, ext_hook=ExtType,
224217
Py_ssize_t max_str_len=2147483647, # 2**32-1
225218
Py_ssize_t max_bin_len=2147483647,
@@ -352,8 +345,8 @@ cdef class Unpacker(object):
352345
Py_ssize_t max_array_len=2147483647,
353346
Py_ssize_t max_map_len=2147483647,
354347
Py_ssize_t max_ext_len=2147483647):
355-
cdef char *cenc=NULL,
356-
cdef char *cerr=NULL
348+
cdef const char *cenc=NULL,
349+
cdef const char *cerr=NULL
357350

358351
self.object_hook = object_hook
359352
self.object_pairs_hook = object_pairs_hook
@@ -383,22 +376,12 @@ cdef class Unpacker(object):
383376

384377
if encoding is not None:
385378
PyErr_WarnEx(PendingDeprecationWarning, "encoding is deprecated, Use raw=False instead.", 1)
386-
if isinstance(encoding, unicode):
387-
self.encoding = encoding.encode('ascii')
388-
elif AAC9 isinstance(encoding, bytes):
389-
self.encoding = encoding
390-
else:
391-
raise TypeError("encoding should be bytes or unicode")
392-
cenc = PyBytes_AsString(self.encoding)
379+
self.encoding = encoding
380+
cenc = encoding
393381

394382
if unicode_errors is not None:
395-
if isinstance(unicode_errors, unicode):
396-
self.unicode_errors = unicode_errors.encode('ascii')
397-
elif isinstance(unicode_errors, bytes):
398-
self.unicode_errors = unicode_errors
399-
else:
400-
raise TypeError("unicode_errors should be bytes or unicode")
401-
cerr = PyBytes_AsString(self.unicode_errors)
383+
self.unicode_errors = unicode_errors
384+
cerr = unicode_errors
402385

403386
init_ctx(&self.ctx, object_hook, object_pairs_hook, list_hook,
404387
ext_hook, use_list, raw, cenc, cerr,

0 commit comments

Comments
 (0)
0