8000 bpo-40130: _PyUnicode_AsKind() should not be exported. (GH-19265) · python/cpython@17b4733 · GitHub
[go: up one dir, main page]

Skip to content

Commit 17b4733

Browse files
bpo-40130: _PyUnicode_AsKind() should not be exported. (GH-19265)
Make it a static function, and pass known attributes (kind, data, length) instead of the PyUnicode object.
1 parent 3ef4a7e commit 17b4733

File tree

2 files changed

+46
-55
lines changed

2 files changed

+46
-55
lines changed

Include/cpython/unicodeobject.h

Lines changed: 0 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -726,12 +726,6 @@ PyAPI_FUNC(int) _PyUnicode_FormatAdvancedWriter(
726726
Py_ssize_t start,
727727
Py_ssize_t end);
728728

729-
/* --- wchar_t support for platforms which support it --------------------- */
730-
731-
#ifdef HAVE_WCHAR_H
732-
PyAPI_FUNC(void*) _PyUnicode_AsKind(PyObject *s, unsigned int kind);
733-
#endif
734-
735729
/* --- Manage the default encoding ---------------------------------------- */
736730

737731
/* Returns a pointer to the default encoding (UTF-8) of the

Objects/unicodeobject.c

Lines changed: 46 additions & 49 deletions
Original file line numberDiff line numberDiff line change
@@ -2043,9 +2043,9 @@ unicode_write_cstr(PyObject *unicode, Py_ssize_t index,
20432043
void *data = PyUnicode_DATA(unicode);
20442044
const char *end = str + len;
20452045

2046+
assert(index + len <= PyUnicode_GET_LENGTH(unicode));
20462047
switch (kind) {
20472048
case PyUnicode_1BYTE_KIND: {
2048-
assert(index + len <= PyUnicode_GET_LENGTH(unicode));
20492049
#ifdef Py_DEBUG
20502050
if (PyUnicode_IS_ASCII(unicode)) {
20512051
Py_UCS4 maxchar = ucs1lib_find_max_char(
@@ -2060,25 +2060,25 @@ unicode_write_cstr(PyObject *unicode, Py_ssize_t index,
20602060
case PyUnicode_2BYTE_KIND: {
20612061
Py_UCS2 *start = (Py_UCS2 *)data + index;
20622062
Py_UCS2 *ucs2 = start;
2063-
assert(index <= PyUnicode_GET_LENGTH(unicode));
20642063

20652064
for (; str < end; ++ucs2, ++str)
20662065
*ucs2 = (Py_UCS2)*str;
20672066

20682067
assert((ucs2 - start) <= PyUnicode_GET_LENGTH(unicode));
20692068
break;
20702069
}
2071-
default: {
2070+
case PyUnicode_4BYTE_KIND: {
20722071
Py_UCS4 *start = (Py_UCS4 *)data + index;
20732072
Py_UCS4 *ucs4 = start;
2074-
assert(kind == PyUnicode_4BYTE_KIND);
2075-
assert(index <= PyUnicode_GET_LENGTH(unicode));
20762073

20772074
for (; str < end; ++ucs4, ++str)
20782075
*ucs4 = (Py_UCS4)*str;
20792076

20802077
assert((ucs4 - start) <= PyUnicode_GET_LENGTH(unicode));
2078+
break;
20812079
}
2080+
default:
2081+
Py_UNREACHABLE();
20822082
}
20832083
}
20842084

@@ -2458,13 +2458,15 @@ unicode_adjust_maxchar(PyObject **p_unicode)
24582458
if (max_char >= 256)
24592459
return;
24602460
}
2461-
else {
2461+
else if (kind == PyUnicode_4BYTE_KIND) {
24622462
const Py_UCS4 *u = PyUnicode_4BYTE_DATA(unicode);
2463-
assert(kind == PyUnicode_4BYTE_KIND);
24642463
max_char = ucs4lib_find_max_char(u, u + len);
24652464
if (max_char >= 0x10000)
24662465
return;
24672466
}
2467+
else
2468+
Py_UNREACHABLE();
2469+
24682470
copy = PyUnicode_New(len, max_char);
24692471
if (copy != NULL)
24702472
_PyUnicode_FastCopyCharacters(copy, 0, unicode, 0, len);
@@ -2501,22 +2503,12 @@ _PyUnicode_Copy(PyObject *unicode)
25012503
/* Widen Unicode objects to larger buffers. Don't write terminating null
25022504
character. Return NULL on error. */
25032505

2504-
void*
2505-
_PyUnicode_AsKind(PyObject *s, unsigned int kind)
2506+
static void*
2507+
unicode_askind(unsigned int skind, void const *data, Py_ssize_t len, unsigned int kind)
25062508
{
2507-
Py_ssize_t len;
25082509
void *result;
2509-
unsigned int skind;
25102510

2511-
if (PyUnicode_READY(s) == -1)
2512-
return NULL;
2513-
2514-
len = PyUnicode_GET_LENGTH(s);
2515-
skind = PyUnicode_KIND(s);
2516-
if (skind >= kind) {
2517-
PyErr_SetString(PyExc_SystemError, "invalid widening attempt");
2518-
return NULL;
2519-
}
2511+
assert(skind < kind);
25202512
switch (kind) {
25212513
case PyUnicode_2BYTE_KIND:
25222514
result = PyMem_New(Py_UCS2, len);
@@ -2525,8 +2517,8 @@ _PyUnicode_AsKind(PyObject *s, unsigned int kind)
25252517
assert(skind == PyUnicode_1BYTE_KIND);
25262518
_PyUnicode_CONVERT_BYTES(
25272519
Py_UCS1, Py_UCS2,
2528-
PyUnicode_1BYTE_DATA(s),
2529-
PyUnicode_1BYTE_DATA(s) + len,
2520+
(const Py_UCS1 *)data,
2521+
((const Py_UCS1 *)data) + len,
25302522
result);
25312523
return result;
25322524
case PyUnicode_4BYTE_KIND:
@@ -2536,24 +2528,23 @@ _PyUnicode_AsKind(PyObject *s, unsigned int kind)
25362528
if (skind == PyUnicode_2BYTE_KIND) {
25372529
_PyUnicode_CONVERT_BYTES(
25382530
Py_UCS2, Py_UCS4,
2539-
PyUnicode_2BYTE_DATA(s),
2540-
PyUnicode_2BYTE_DATA(s) + len,
2531+
(const Py_UCS2 *)data,
2532+
((const Py_UCS2 *)data) + len,
25412533
result);
25422534
}
25432535
else {
25442536
assert(skind == PyUnicode_1BYTE_KIND);
25452537
_PyUnicode_CONVERT_BYTES(
25462538
Py_UCS1, Py_UCS4,
2547-
PyUnicode_1BYTE_DATA(s),
2548-
PyUnicode_1BYTE_DATA(s) + len,
2539+
(const Py_UCS1 *)data,
2540+
((const Py_UCS1 *)data) + len,
25492541
result);
25502542
}
25512543
return result;
25522544
default:
2553-
break;
2545+
Py_UNREACHABLE();
2546+
return NULL;
25542547
}
2555-
PyErr_SetString(PyExc_SystemError, "invalid kind");
2556-
return NULL;
25572548
}
25582549

25592550
static Py_UCS4*
@@ -9420,7 +9411,7 @@ any_find_slice(PyObject* s1, PyObject* s2,
94209411
}
94219412

94229413
if (kind2 != kind1) {
9423-
buf2 = _PyUnicode_AsKind(s2, kind1);
9414+
buf2 = unicode_askind(kind2, buf2, len2, kind1);
94249415
if (!buf2)
94259416
return -2;
94269417
}
@@ -9642,7 +9633,7 @@ PyUnicode_Count(PyObject *str,
96429633
buf1 = PyUnicode_DATA(str);
96439634
buf2 = PyUnicode_DATA< 10000 /span>(substr);
96449635
if (kind2 != kind1) {
9645-
buf2 = _PyUnicode_AsKind(substr, kind1);
9636+
buf2 = unicode_askind(kind2, buf2, len2, kind1);
96469637
if (!buf2)
96479638
goto onError;
96489639
}
@@ -10415,7 +10406,7 @@ split(PyObject *self,
1041510406
buf1 = PyUnicode_DATA(self);
1041610407
buf2 = PyUnicode_DATA(substring);
1041710408
if (kind2 != kind1) {
10418-
buf2 = _PyUnicode_AsKind(substring, kind1);
10409+
buf2 = unicode_askind(kind2, buf2, len2, kind1);
1041910410
if (!buf2)
1042010411
return NULL;
1042110412
}
@@ -10506,7 +10497,7 @@ rsplit(PyObject *self,
1050610497
buf1 = PyUnicode_DATA(self);
1050710498
buf2 = PyUnicode_DATA(substring);
1050810499
if (kind2 != kind1) {
10509-
buf2 = _PyUnicode_AsKind(substring, kind1);
10500+
buf2 = unicode_askind(kind2, buf2, len2, kind1);
1051010501
if (!buf2)
1051110502
return NULL;
1051210503
}
@@ -10665,7 +10656,7 @@ replace(PyObject *self, PyObject *str1,
1066510656

1066610657
if (kind1 < rkind) {
1066710658
/* widen substring */
10668-
buf1 = _PyUnicode_AsKind(str1, rkind);
10659+
buf1 = unicode_askind(kind1, buf1, len1, rkind);
1066910660
if (!buf1) goto error;
1067010661
release1 = 1;
1067110662
}
@@ -10674,19 +10665,22 @@ replace(PyObject *self, PyObject *str1,
1067410665
goto nothing;
1067510666
if (rkind > kind2) {
1067610667
/* widen replacement */
10677-
buf2 = _PyUnicode_AsKind(str2, rkind);
10668+
buf2 = unicode_askind(kind2, buf2, len2, rkind);
1067810669
if (!buf2) goto error;
1067910670
release2 = 1;
1068010671
}
1068110672
else if (rkind < kind2) {
1068210673
/* widen self and buf1 */
1068310674
rkind = kind2;
10684-
if (release1) PyMem_Free(buf1);
10685-
release1 = 0;
10686-
sbuf = _PyUnicode_AsKind(self, rkind);
10675+
if (release1) {
10676+
PyMem_Free(buf1);
10677+
buf1 = PyUnicode_DATA(str1);
10678+
release1 = 0;
10679+
}
10680+
sbuf = unicode_askind(skind, sbuf, slen, rkind);
1068710681
if (!sbuf) goto error;
1068810682
srelease = 1;
10689-
buf1 = _PyUnicode_AsKind(str1, rkind);
10683+
buf1 = unicode_askind(kind1, buf1, len1, rkind);
1069010684
if (!buf1) goto error;
1069110685
release1 = 1;
1069210686
}
@@ -10724,7 +10718,7 @@ replace(PyObject *self, PyObject *str1,
1072410718

1072510719
if (kind1 < rkind) {
1072610720
/* widen substring */
10727-
buf1 = _PyUnicode_AsKind(str1, rkind);
10721+
buf1 = unicode_askind(kind1, buf1, len1, rkind);
1072810722
if (!buf1) goto error;
1072910723
release1 = 1;
1073010724
}
@@ -10733,19 +10727,22 @@ replace(PyObject *self, PyObject *str1,
1073310727
goto nothing;
1073410728
if (kind2 < rkind) {
1073510729
/* widen replacement */
10736-
buf2 = _PyUnicode_AsKind(str2, rkind);
10730+
buf2 = unicode_askind(kind2, buf2, len2, rkind);
1073710731
if (!buf2) goto error;
1073810732
release2 = 1;
1073910733
}
1074010734
else if (kind2 > rkind) {
1074110735
/* widen self and buf1 */
1074210736
rkind = kind2;
10743-
sbuf = _PyUnicode_AsKind(self, rkind);
10737+
sbuf = unicode_askind(skind, sbuf, slen, rkind);
1074410738
if (!sbuf) goto error;
1074510739
srelease = 1;
10746-
if (release1) PyMem_Free(buf1);
10747-
release1 = 0;
10748-
buf1 = _PyUnicode_AsKind(str1, rkind);
10740+
if (release1) {
10741+
PyMem_Free(buf1);
10742+
buf1 = PyUnicode_DATA(str1);
10743+
release1 = 0;
10744+
}
10745+
buf1 = unicode_askind(kind1, buf1, len1, rkind);
1074910746
if (!buf1) goto error;
1075010747
release1 = 1;
1075110748
}
@@ -11361,7 +11358,7 @@ PyUnicode_Contains(PyObject *str, PyObject *substr)
1136111358
return result;
1136211359
}
1136311360
if (kind2 != kind1) {
11364-
buf2 = _PyUnicode_AsKind(substr, kind1);
11361+
buf2 = unicode_askind(kind2, buf2, len2, kind1);
1136511362
if (!buf2)
1136611363
return -1;
1136711364
}
@@ -11578,7 +11575,7 @@ unicode_count(PyObject *self, PyObject *args)
1157811575
buf1 = PyUnicode_DATA(self);
1157911576
buf2 = PyUnicode_DATA(substring);
1158011577
if (kind2 != kind1) {
11581-
buf2 = _PyUnicode_AsKind(substring, kind1);
11578+
buf2 = unicode_askind(kind2, buf2, len2, kind1);
1158211579
if (!buf2)
1158311580
return NULL;
1158411581
}
@@ -13081,7 +13078,7 @@ PyUnicode_Partition(PyObject *str_obj, PyObject *sep_obj)
1308113078
buf1 = PyUnicode_DATA(str_obj);
1308213079
buf2 = PyUnicode_DATA(sep_obj);
1308313080
if (kind2 != kind1) {
13084-
buf2 = _PyUnicode_AsKind(sep_obj, kind1);
13081+
buf2 = unicode_askind(kind2, buf2, len2, kind1);
1308513082
if (!buf2)
1308613083
return NULL;
1308713084
}
@@ -13138,7 +13135,7 @@ PyUnicode_RPartition(PyObject *str_obj, PyObject *sep_obj)
1313813135
buf1 = PyUnicode_DATA(str_obj);
1313913136
buf2 = PyUnicode_DATA(sep_obj);
1314013137
if (kind2 != kind1) {
13141-
buf2 = _PyUnicode_AsKind(sep_obj, kind1);
13138+
buf2 = unicode_askind(kind2, buf2, len2, kind1);
1314213139
if (!buf2)
1314313140
return NULL;
1314413141
}

0 commit comments

Comments
 (0)
0