8000 Merge pull request #28282 from charris/backport-28276 · numpy/numpy@2cc5acf · GitHub
[go: up one dir, main page]

Skip to content

Commit 2cc5acf

Browse files
authored
Merge pull request #28282 from charris/backport-28276
BUG: fix incorrect bytes to stringdtype coercion
2 parents c7ea504 + c455112 commit 2cc5acf

File tree

2 files changed

+22
-1
lines changed

2 files changed

+22
-1
lines changed

numpy/_core/src/multiarray/stringdtype/dtype.c

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -270,6 +270,15 @@ as_pystring(PyObject *scalar, int coerce)
270270
"string coercion is disabled.");
271271
return NULL;
272272
}
273+
else if (scalar_type == &PyBytes_Type) {
274+
// assume UTF-8 encoding
275+
char *buffer;
276+
Py_ssize_t length;
277+
if (PyBytes_AsStringAndSize(scalar, &buffer, &length) < 0) {
278+
return NULL;
279+
}
280+
return PyUnicode_FromStringAndSize(buffer, length);
281+
}
273282
else {
274283
// attempt to coerce to str
275284
scalar = PyObject_Str(scalar);

numpy/_core/tests/test_stringdtype.py

Lines changed: 13 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -190,10 +190,14 @@ def test_array_creation_utf8(dtype, data):
190190
],
191191
)
192192
def test_scalars_string_conversion(data, dtype):
193+
try:
194+
str_vals = [str(d.decode('utf-8')) for d in data]
195+
except AttributeError:
196+
str_vals = [str(d) for d in data]
193197
if dtype.coerce:
194198
assert_array_equal(
195199
np.array(data, dtype=dtype),
196-
np.array([str(d) for d in data], dtype=dtype),
200+
np.array(str_vals, dtype=dtype),
197201
)
198202
else:
199203
with pytest.raises(ValueError):
@@ -284,6 +288,14 @@ def test_bytes_casts(self, dtype, strings):
284288
barr = np.array(utf8_bytes, dtype=bytes_dtype)
285289
assert_array_equal(barr, sarr.astype(bytes_dtype))
286290
assert_array_equal(barr.astype(dtype), sarr)
291+
if dtype.coerce:
292+
barr = np.array(utf8_bytes, dtype=dtype)
293+
assert_array_equal(barr, sarr)
294+
barr = np.array(utf8_bytes, dtype="O")
295+
assert_array_equal(barr.astype(dtype), sarr)
296+
else:
297+
with pytest.raises(ValueError):
298+
np.array(utf8_bytes, dtype=dtype)
287299
except UnicodeEncodeError:
288300
with pytest.raises(UnicodeEncodeError):
289301
sarr.astype("S20")

0 commit comments

Comments
 (0)
0