@@ -2043,9 +2043,9 @@ unicode_write_cstr(PyObject *unicode, Py_ssize_t index,
2043
2043
void * data = PyUnicode_DATA (unicode );
2044
2044
const char * end = str + len ;
2045
2045
2046
+ assert (index + len <= PyUnicode_GET_LENGTH (unicode ));
2046
2047
switch (kind ) {
2047
2048
case PyUnicode_1BYTE_KIND : {
2048
- assert (index + len <= PyUnicode_GET_LENGTH (unicode ));
2049
2049
#ifdef Py_DEBUG
2050
2050
if (PyUnicode_IS_ASCII (unicode )) {
2051
2051
Py_UCS4 maxchar = ucs1lib_find_max_char (
@@ -2060,25 +2060,25 @@ unicode_write_cstr(PyObject *unicode, Py_ssize_t index,
2060
2060
case PyUnicode_2BYTE_KIND : {
2061
2061
Py_UCS2 * start = (Py_UCS2 * )data + index ;
2062
2062
Py_UCS2 * ucs2 = start ;
2063
- assert (index <= PyUnicode_GET_LENGTH (unicode ));
2064
2063
2065
2064
for (; str < end ; ++ ucs2 , ++ str )
2066
2065
* ucs2 = (Py_UCS2 )* str ;
2067
2066
2068
2067
assert ((ucs2 - start ) <= PyUnicode_GET_LENGTH (unicode ));
2069
2068
break ;
2070
2069
}
2071
- default : {
2070
+ case PyUnicode_4BYTE_KIND : {
2072
2071
Py_UCS4 * start = (Py_UCS4 * )data + index ;
2073
2072
Py_UCS4 * ucs4 = start ;
2074
- assert (kind == PyUnicode_4BYTE_KIND );
2075
- assert (index <= PyUnicode_GET_LENGTH (unicode ));
2076
2073
2077
2074
for (; str < end ; ++ ucs4 , ++ str )
2078
2075
* ucs4 = (Py_UCS4 )* str ;
2079
2076
2080
2077
assert ((ucs4 - start ) <= PyUnicode_GET_LENGTH (unicode ));
2078
+ break ;
2081
2079
}
2080
+ default :
2081
+ Py_UNREACHABLE ();
2082
2082
}
2083
2083
}
2084
2084
@@ -2458,13 +2458,15 @@ unicode_adjust_maxchar(PyObject **p_unicode)
2458
2458
if (max_char >= 256 )
2459
2459
return ;
2460
2460
}
2461
- else {
2461
+ else if ( kind == PyUnicode_4BYTE_KIND ) {
2462
2462
const Py_UCS4 * u = PyUnicode_4BYTE_DATA (unicode );
2463
- assert (kind == PyUnicode_4BYTE_KIND );
2464
2463
max_char = ucs4lib_find_max_char (u , u + len );
2465
2464
if (max_char >= 0x10000 )
2466
2465
return ;
2467
2466
}
2467
+ else
2468
+ Py_UNREACHABLE ();
2469
+
2468
2470
copy = PyUnicode_New (len , max_char );
2469
2471
if (copy != NULL )
2470
2472
_PyUnicode_FastCopyCharacters (copy , 0 , unicode , 0 , len );
@@ -2501,22 +2503,12 @@ _PyUnicode_Copy(PyObject *unicode)
2501
2503
/* Widen Unicode objects to larger buffers. Don't write terminating null
2502
2504
character. Return NULL on error. */
2503
2505
2504
- void *
2505
- _PyUnicode_AsKind ( PyObject * s , unsigned int kind )
2506
+ static void *
2507
+ unicode_askind ( unsigned int skind , void const * data , Py_ssize_t len , unsigned int kind )
2506
2508
{
2507
- Py_ssize_t len ;
2508
2509
void * result ;
2509
- unsigned int skind ;
2510
2510
2511
- if (PyUnicode_READY (s ) == -1 )
2512
- return NULL ;
2513
-
2514
- len = PyUnicode_GET_LENGTH (s );
2515
- skind = PyUnicode_KIND (s );
2516
- if (skind >= kind ) {
2517
- PyErr_SetString (PyExc_SystemError , "invalid widening attempt" );
2518
- return NULL ;
2519
- }
2511
+ assert (skind < kind );
2520
2512
switch (kind ) {
2521
2513
case PyUnicode_2BYTE_KIND :
2522
2514
result = PyMem_New (Py_UCS2 , len );
@@ -2525,8 +2517,8 @@ _PyUnicode_AsKind(PyObject *s, unsigned int kind)
2525
2517
assert (skind == PyUnicode_1BYTE_KIND );
2526
2518
_PyUnicode_CONVERT_BYTES (
2527
2519
Py_UCS1 , Py_UCS2 ,
2528
- PyUnicode_1BYTE_DATA ( s ) ,
2529
- PyUnicode_1BYTE_DATA ( s ) + len ,
2520
+ ( const Py_UCS1 * ) data ,
2521
+ (( const Py_UCS1 * ) data ) + len ,
2530
2522
result );
2531
2523
return result ;
2532
2524
case PyUnicode_4BYTE_KIND :
@@ -2536,24 +2528,23 @@ _PyUnicode_AsKind(PyObject *s, unsigned int kind)
2536
2528
if (skind == PyUnicode_2BYTE_KIND ) {
2537
2529
_PyUnicode_CONVERT_BYTES (
2538
2530
Py_UCS2 , Py_UCS4 ,
2539
- PyUnicode_2BYTE_DATA ( s ) ,
2540
- PyUnicode_2BYTE_DATA ( s ) + len ,
2531
+ ( const Py_UCS2 * ) data ,
2532
+ (( const Py_UCS2 * ) data ) + len ,
2541
2533
result );
2542
2534
}
2543
2535
else {
2544
2536
assert (skind == PyUnicode_1BYTE_KIND );
2545
2537
_PyUnicode_CONVERT_BYTES (
2546
2538
Py_UCS1 , Py_UCS4 ,
2547
- PyUnicode_1BYTE_DATA ( s ) ,
2548
- PyUnicode_1BYTE_DATA ( s ) + len ,
2539
+ ( const Py_UCS1 * ) data ,
2540
+ (( const Py_UCS1 * ) data ) + len ,
2549
2541
result );
2550
2542
}
2551
2543
return result ;
2552
2544
default :
2553
- break ;
2545
+ Py_UNREACHABLE ();
2546
+ return NULL ;
2554
2547
}
2555
- PyErr_SetString (PyExc_SystemError , "invalid kind" );
2556
- return NULL ;
2557
2548
}
2558
2549
2559
2550
static Py_UCS4 *
@@ -9420,7 +9411,7 @@ any_find_slice(PyObject* s1, PyObject* s2,
9420
9411
}
9421
9412
9422
9413
if (kind2 != kind1 ) {
9423
- buf2 = _PyUnicode_AsKind ( s2 , kind1 );
9414
+ buf2 = unicode_askind ( kind2 , buf2 , len2 , kind1 );
9424
9415
if (!buf2 )
9425
9416
return -2 ;
9426
9417
}
@@ -9642,7 +9633,7 @@ PyUnicode_Count(PyObject *str,
9642
9633
buf1 = PyUnicode_DATA (str );
9643
9634
buf2 = PyUnicode_DATA<
10000
/span>(substr );
9644
9635
if (kind2 != kind1 ) {
9645
- buf2 = _PyUnicode_AsKind ( substr , kind1 );
9636
+ buf2 = unicode_askind ( kind2 , buf2 , len2 , kind1 );
9646
9637
if (!buf2 )
9647
9638
goto onError ;
9648
9639
}
@@ -10415,7 +10406,7 @@ split(PyObject *self,
10415
10406
buf1 = PyUnicode_DATA (self );
10416
10407
buf2 = PyUnicode_DATA (substring );
10417
10408
if (kind2 != kind1 ) {
10418
- buf2 = _PyUnicode_AsKind ( substring , kind1 );
10409
+ buf2 = unicode_askind ( kind2 , buf2 , len2 , kind1 );
10419
10410
if (!buf2 )
10420
10411
return NULL ;
10421
10412
}
@@ -10506,7 +10497,7 @@ rsplit(PyObject *self,
10506
10497
buf1 = PyUnicode_DATA (self );
10507
10498
buf2 = PyUnicode_DATA (substring );
10508
10499
if (kind2 != kind1 ) {
10509
- buf2 = _PyUnicode_AsKind ( substring , kind1 );
10500
+ buf2 = unicode_askind ( kind2 , buf2 , len2 , kind1 );
10510
10501
if (!buf2 )
10511
10502
return NULL ;
10512
10503
}
@@ -10665,7 +10656,7 @@ replace(PyObject *self, PyObject *str1,
10665
10656
10666
10657
if (kind1 < rkind ) {
10667
10658
/* widen substring */
10668
- buf1 = _PyUnicode_AsKind ( str1 , rkind );
10659
+ buf1 = unicode_askind ( kind1 , buf1 , len1 , rkind );
10669
10660
if (!buf1 ) goto error ;
10670
10661
release1 = 1 ;
10671
10662
}
@@ -10674,19 +10665,22 @@ replace(PyObject *self, PyObject *str1,
10674
10665
goto nothing ;
10675
10666
if (rkind > kind2 ) {
10676
10667
/* widen replacement */
10677
- buf2 = _PyUnicode_AsKind ( str2 , rkind );
10668
+ buf2 = unicode_askind ( kind2 , buf2 , len2 , rkind );
10678
10669
if (!buf2 ) goto error ;
10679
10670
release2 = 1 ;
10680
10671
}
10681
10672
else if (rkind < kind2 ) {
10682
10673
/* widen self and buf1 */
10683
10674
rkind = kind2 ;
10684
- if (release1 ) PyMem_Free (buf1 );
10685
- release1 = 0 ;
10686
- sbuf = _PyUnicode_AsKind (self , rkind );
10675
+ if (release1 ) {
10676
+ PyMem_Free (buf1 );
10677
+ buf1 = PyUnicode_DATA (str1 );
10678
+ release1 = 0 ;
10679
+ }
10680
+ sbuf = unicode_askind (skind , sbuf , slen , rkind );
10687
10681
if (!sbuf ) goto error ;
10688
10682
srelease = 1 ;
10689
- buf1 = _PyUnicode_AsKind ( str1 , rkind );
10683
+ buf1 = unicode_askind ( kind1 , buf1 , len1 , rkind );
10690
10684
if (!buf1 ) goto error ;
10691
10685
release1 = 1 ;
10692
10686
}
@@ -10724,7 +10718,7 @@ replace(PyObject *self, PyObject *str1,
10724
10718
10725
10719
if (kind1 < rkind ) {
10726
10720
/* widen substring */
10727
- buf1 = _PyUnicode_AsKind ( str1 , rkind );
10721
+ buf1 = unicode_askind ( kind1 , buf1 , len1 , rkind );
10728
10722
if (!buf1 ) goto error ;
10729
10723
release1 = 1 ;
10730
10724
}
@@ -10733,19 +10727,22 @@ replace(PyObject *self, PyObject *str1,
10733
10727
goto nothing ;
10734
10728
if (kind2 < rkind ) {
10735
10729
/* widen replacement */
10736
- buf2 = _PyUnicode_AsKind ( str2 , rkind );
10730
+ buf2 = unicode_askind ( kind2 , buf2 , len2 , rkind );
10737
10731
if (!buf2 ) goto error ;
10738
10732
release2 = 1 ;
10739
10733
}
10740
10734
else if (kind2 > rkind ) {
10741
10735
/* widen self and buf1 */
10742
10736
rkind = kind2 ;
10743
- sbuf = _PyUnicode_AsKind ( self , rkind );
10737
+ sbuf = unicode_askind ( skind , sbuf , slen , rkind );
10744
10738
if (!sbuf ) goto error ;
10745
10739
srelease = 1 ;
10746
- if (release1 ) PyMem_Free (buf1 );
10747
- release1 = 0 ;
10748
- buf1 = _PyUnicode_AsKind (str1 , rkind );
10740
+ if (release1 ) {
10741
+ PyMem_Free (buf1 );
10742
+ buf1 = PyUnicode_DATA (str1 );
10743
+ release1 = 0 ;
10744
+ }
10745
+ buf1 = unicode_askind (kind1 , buf1 , len1 , rkind );
10749
10746
if (!buf1 ) goto error ;
10750
10747
release1 = 1 ;
10751
10748
}
@@ -11361,7 +11358,7 @@ PyUnicode_Contains(PyObject *str, PyObject *substr)
11361
11358
return result ;
11362
11359
}
11363
11360
if (kind2 != kind1 ) {
11364
- buf2 = _PyUnicode_AsKind ( substr , kind1 );
11361
+ buf2 = unicode_askind ( kind2 , buf2 , len2 , kind1 );
11365
11362
if (!buf2 )
11366
11363
return -1 ;
11367
11364
}
@@ -11578,7 +11575,7 @@ unicode_count(PyObject *self, PyObject *args)
11578
11575
buf1 = PyUnicode_DATA (self );
11579
11576
buf2 = PyUnicode_DATA (substring );
11580
11577
if (kind2 != kind1 ) {
11581
- buf2 = _PyUnicode_AsKind ( substring , kind1 );
11578
+ buf2 = unicode_askind ( kind2 , buf2 , len2 , kind1 );
11582
11579
if (!buf2 )
11583
11580
return NULL ;
11584
11581
}
@@ -13081,7 +13078,7 @@ PyUnicode_Partition(PyObject *str_obj, PyObject *sep_obj)
13081
13078
buf1 = PyUnicode_DATA (str_obj );
13082
13079
buf2 = PyUnicode_DATA (sep_obj );
13083
13080
if (kind2 != kind1 ) {
13084
- buf2 = _PyUnicode_AsKind ( sep_obj , kind1 );
13081
+ buf2 = unicode_askind ( kind2 , buf2 , len2 , kind1 );
13085
13082
if (!buf2 )
13086
13083
return NULL ;
13087
13084
}
@@ -13138,7 +13135,7 @@ PyUnicode_RPartition(PyObject *str_obj, PyObject *sep_obj)
13138
13135
buf1 = PyUnicode_DATA (str_obj );
13139
13136
buf2 = PyUnicode_DATA (sep_obj );
13140
13137
if (kind2 != kind1 ) {
13141
- buf2 = _PyUnicode_AsKind ( sep_obj , kind1 );
13138
+ buf2 = unicode_askind ( kind2 , buf2 , len2 , kind1 );
13142
13139
if (!buf2 )
13143
13140
return NULL ;
13144
13141
}
0 commit comments