8000 Ressurect unicode_errors of the Packer. (#379) · eb-emilio/msgpack-python@83ebb63 · GitHub
[go: up one dir, main page]

Skip to content

Commit 83ebb63

Browse files
authored
Ressurect unicode_errors of the Packer. (msgpack#379)
1 parent a0480c7 commit 83ebb63

File tree

4 files changed

+50
-13
lines changed

4 files changed

+50
-13
lines changed

ChangeLog.rst

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@ Release Date: TBD
55

66
* Remove Python 2 support from the ``msgpack/_cmsgpack``.
77
``msgpack/fallback`` still supports Python 2.
8-
* Remove encoding and unicode_errors options from the Packer.
8+
* Remove ``encoding`` option from the Packer.
99

1010

1111
0.6.2

msgpack/_packer.pyx

Lines changed: 27 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -89,9 +89,15 @@ cdef class Packer(object):
8989
Additionally tuples will not be serialized as lists.
9090
This is useful when trying to implement accurate serialization
9191
for python types.
92+
93+
:param str unicode_errors:
94+
The error handler for encoding unicode. (default: 'strict')
95+
DO NOT USE THIS!! This option is kept for very specific usage.
9296
"""
9397
cdef msgpack_packer pk
9498
cdef object _default
99+
cdef object _berrors
100+
cdef const char *unicode_errors
95101
cdef bint strict_types
96102
cdef bool use_float
97103
cdef bint autoreset
@@ -104,10 +110,8 @@ cdef class Packer(object):
104110
self.pk.buf_size = buf_size
105111
self.pk.length = 0
106112

107-
def __init__(self, default=None,
108-
bint use_single_float=False,
109-
bint autoreset=True,
110-
bint use_bin_type=False,
113+
def __init__(self, *, default=None, unicode_errors=None,
114+
bint use_single_float=False, bint autoreset=True, bint use_bin_type=False,
111115
bint strict_types=False):
112116
self.use_float = use_single_float
113117
self.strict_types = strict_types
@@ -118,6 +122,12 @@ cdef class Packer(object):
118122
raise TypeError("default must be a callable.")
119123
self._default = default
120124

125+
self._berrors = unicode_errors
126+
if unicode_errors is None:
127+
self.unicode_errors = NULL
128+
else:
129+
self.unicode_errors = self._berrors
130+
121131
def __dealloc__(self):
122132
PyMem_Free(self.pk.buf)
123133
self.pk.buf = NULL
@@ -183,9 +193,19 @@ cdef class Packer(object):
183193
if ret == 0:
184194
ret = msgpack_pack_raw_body(&self.pk, rawval, L)
185195
elif PyUnicode_CheckExact(o) if strict_types else PyUnicode_Check(o):
186-
ret = msgpack_pack_unicode(&self.pk, o, ITEM_LIMIT);
187-
if ret == -2:
188-
raise ValueError("unicode string is too large")
196+
if self.unicode_errors == NULL:
197+
ret = msgpack_pack_unicode(&self.pk, o, ITEM_LIMIT);
198+
if ret == -2:
199+
raise ValueError("unicode string is too large")
200+
else:
201+
o = PyUnicode_AsEncodedString(o, NULL, self.unicode_errors)
202+
L = Py_SIZE(o)
203+
if L > ITEM_LIMIT:
204+
raise ValueError("unicode string is too large")
205+
ret = msgpack_pack_raw(&self.pk, L)
206+
if ret == 0:
207+
rawval = o
208+
ret = msgpack_pack_raw_body(&self.pk, rawval, L)
189209
elif PyDict_CheckExact(o):
190210
d = <dict>o
191211
L = len(d)

msgpack/fallback.py

Lines changed: 8 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -667,7 +667,7 @@ def _unpack(self, execute=EX_CONSTRUCT):
667667
elif self._raw:
668668
obj = bytes(obj)
669669
else:
670-
obj = obj.decode('utf_8')
670+
obj = obj.decode('utf_8', self._unicode_errors)
671671
return obj
672672
if typ == TYPE_EXT:
673673
return self._ext_hook(n, bytes(obj))
@@ -752,14 +752,19 @@ class Packer(object):
752752
Additionally tuples will not be serialized as lists.
753753
This is useful when trying to implement accurate serialization
754754
for python types.
755+
756+
:param str unicode_errors:
757+
The error handler for encoding unicode. (default: 'strict')
758+
DO NOT USE THIS!! This option is kept for very specific usage.
755759
"""
756-
def __init__(self, default=None,
760+
def __init__(self, default=None, unicode_errors=None,
757761
use_single_float=False, autoreset=True, use_bin_type=False,
758762
strict_types=False):
759763
self._strict_types = strict_types
760764
self._use_float = use_single_float
761765
self._autoreset = autoreset
762766
self._use_bin_type = use_bin_type
767+
self._unicode_errors = unicode_errors or "strict"
763768
self._buffer = StringIO()
764769
if default is not None:
765770
if not callable(default):
@@ -816,7 +821,7 @@ def _pack(self, obj, nest_limit=DEFAULT_RECURSE_LIMIT,
816821
self._pack_bin_header(n)
817822
return self._buffer.write(obj)
818823
if check(obj, unicode):
819-
obj = obj.encode("utf-8")
824+
obj = obj.encode("utf-8", self._unicode_errors)
820825
n = len(obj)
821826
if n >= 2**32:
822827
raise ValueError("String is too large")

test/test_pack.py

Lines changed: 14 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55
from collections import OrderedDict
66
from io import BytesIO
77
import struct
8+
import sys
89

910
import pytest
1011
from pytest import raises, xfail
@@ -54,13 +55,24 @@ def testPackByteArrays():
5455
for td in test_data:
5556
check(td)
5657

58+
@pytest.mark.skipif(sys.version_info < (3,0), reason="Python 2 passes invalid surrogates")
59+
def testIgnoreUnicodeErrors():
60+
re = unpackb(packb(b'abc\xeddef', use_bin_type=False),
61+
raw=False, unicode_errors='ignore')
62+
assert re == "abcdef"
63+
5764
def testStrictUnicodeUnpack():
58-
packed = packb(b'abc\xeddef')
65+
packed = packb(b'abc\xeddef', use_bin_type=False)
5966
with pytest.raises(UnicodeDecodeError):
6067
unpackb(packed, raw=False, use_list=1)
6168

69+
@pytest.mark.skipif(sys.version_info < (3,0), reason="Python 2 passes invalid surrogates")
70+
def testIgnoreErrorsPack():
71+
re = unpackb(packb(u"abc\uDC80\uDCFFdef", use_bin_type=True, unicode_errors='ignore'), raw=False, use_list=1)
72+
assert re == "abcdef"
73+
6274
def testDecodeBinary():
63-
re = unpackb(packb(b"abc"), encoding=None, use_list=1)
75+
re = unpackb(packb(b"abc"), use_list=1)
6476
assert re == b"abc"
6577

6678
def testPackFloat():

0 commit comments

Comments
 (0)
0