63
63
64
64
/* ASCII-only strings created through PyUnicode_New use the PyASCIIObject
65
65
structure. state.ascii and state.compact are set, and the data
66
- immediately follow the structure. utf8_length and wstr_length can be found
66
+ immediately follow the structure. utf8_length can be found
67
67
in the length field; the utf8 pointer is equal to the data pointer. */
68
68
typedef struct {
69
69
/* There are 4 forms of Unicode strings:
@@ -76,7 +76,7 @@ typedef struct {
76
76
* compact = 1
77
77
* ascii = 1
78
78
* ready = 1
79
- * (length is the length of the utf8 and wstr strings )
79
+ * (length is the length of the utf8)
80
80
* (data starts just after the structure)
81
81
* (since ASCII is decoded from UTF-8, the utf8 string are the data)
82
82
@@ -91,51 +91,25 @@ typedef struct {
91
91
* ascii = 0
92
92
* utf8 is not shared with data
93
93
* utf8_length = 0 if utf8 is NULL
94
- * wstr is shared with data and wstr_length=length
95
- if kind=PyUnicode_2BYTE_KIND and sizeof(wchar_t)=2
96
- or if kind=PyUnicode_4BYTE_KIND and sizeof(wchar_t)=4
97
- * wstr_length = 0 if wstr is NULL
98
94
* (data starts just after the structure)
99
95
100
- - legacy string, not ready:
101
-
102
10000
- * structure = PyUnicodeObject
103
- * test: kind == PyUnicode_WCHAR_KIND
104
- * length = 0 (use wstr_length)
105
- * hash = -1
106
- * kind = PyUnicode_WCHAR_KIND
107
- * compact = 0
108
- * ascii = 0
109
- * ready = 0
110
- * interned = SSTATE_NOT_INTERNED
111
- * wstr is not NULL
112
- * data.any is NULL
113
- * utf8 is NULL
114
- * utf8_length = 0
115
-
116
96
- legacy string, ready:
117
97
118
98
* structure = PyUnicodeObject structure
119
- * test: !PyUnicode_IS_COMPACT(op) && kind != PyUnicode_WCHAR_KIND
99
+ * test: !PyUnicode_IS_COMPACT(op)
120
100
* kind = PyUnicode_1BYTE_KIND, PyUnicode_2BYTE_KIND or
121
101
PyUnicode_4BYTE_KIND
122
102
* compact = 0
123
103
* ready = 1
124
104
* data.any is not NULL
125
105
* utf8 is shared and utf8_length = length with data.any if ascii = 1
126
106
* utf8_length = 0 if utf8 is NULL
127
- * wstr is shared with data.any and wstr_length = length
128
- if kind=PyUnicode_2BYTE_KIND and sizeof(wchar_t)=2
129
- or if kind=PyUnicode_4BYTE_KIND and sizeof(wchar_4)=4
130
- * wstr_length = 0 if wstr is NULL
131
107
132
108
Compact strings use only one memory block (structure + characters),
133
109
whereas legacy strings use one block for the structure and one block
134
110
for characters.
135
111
136
- Legacy strings are created by PyUnicode_FromUnicode() and
137
- PyUnicode_FromStringAndSize(NULL, size) functions. They become ready
138
- when PyUnicode_READY() is called.
112
+ Legacy strings are created by subclasses of Unicode.
139
113
140
114
See also _PyUnicode_CheckConsistency().
141
115
*/
@@ -154,11 +128,6 @@ typedef struct {
154
128
unsigned int interned :2 ;
155
129
/* Character size:
156
130
157
- - PyUnicode_WCHAR_KIND (0):
158
-
159
- * character type = wchar_t (16 or 32 bits, depending on the
160
- platform)
161
-
162
131
- PyUnicode_1BYTE_KIND (1):
163
132
164
133
* character type = Py_UCS1 (8 bits, unsigned)
@@ -198,7 +167,6 @@ typedef struct {
198
167
4 bytes (see issue #19537 on m68k). */
199
168
unsigned int :24 ;
200
169
} state ;
201
- wchar_t * wstr ; /* wchar_t representation (null-terminated) */
202
170
} PyASCIIObject ;
203
171
204
172
/* Non-ASCII strings allocated through PyUnicode_New use the
@@ -209,13 +177,9 @@ typedef struct {
209
177
Py_ssize_t utf8_length ; /* Number of bytes in utf8, excluding the
210
178
* terminating \0. */
211
179
char * utf8 ; /* UTF-8 representation (null-terminated) */
212
- Py_ssize_t wstr_length ; /* Number of code points in wstr, possible
213
- * surrogates count as two code points. */
214
180
} PyCompactUnicodeObject ;
215
181
216
- /* Strings allocated through PyUnicode_FromUnicode(NULL, len) use the
217
- PyUnicodeObject structure. The actual string data is initially in the wstr
218
- block, and copied into the data block using _PyUnicode_Ready. */
182
+ /* Object format for Unicode subclasses. */
219
183
typedef struct {
220
184
PyCompactUnicodeObject _base ;
221
185
union {
@@ -298,10 +262,6 @@ static inline int PyUnicode_IS_COMPACT_ASCII(PyObject *op) {
298
262
#endif
299
263
300
264
enum PyUnicode_Kind {
301
- /* String contains only wstr byte characters. This is only possible
302
- when the string was created with a legacy API and _PyUnicode_Ready()
303
- has not been called yet. */
304
- PyUnicode_WCHAR_KIND = 0 ,
305
265
/* Return values of the PyUnicode_KIND() function: */
306
266
PyUnicode_1BYTE_KIND = 1 ,
307
267
PyUnicode_2BYTE_KIND = 2 ,
@@ -459,27 +419,14 @@ PyAPI_FUNC(PyObject*) PyUnicode_New(
459
419
Py_UCS4 maxchar /* maximum code point value in the string */
460
420
);
461
421
462
- /* Initializes the canonical string representation from the deprecated
463
- wstr/Py_UNICODE representation. This function is used to convert Unicode
464
- objects which were created using the old API to the new flexible format
465
- introduced with PEP 393.
466
-
467
- Don't call this function directly, use the public PyUnicode_READY() function
468
- instead. */
469
- PyAPI_FUNC (int ) _PyUnicode_Ready (
470
- PyObject * unicode /* Unicode object */
471
- );
472
-
473
422
/* PyUnicode_READY() does less work than _PyUnicode_Ready() in the best
474
423
case. If the canonical representation is not yet set, it will still call
475
424
_PyUnicode_Ready().
476
425
Returns 0 on success and -1 on errors. */
477
426
static inline int PyUnicode_READY (PyObject * op )
478
427
{
479
- if (PyUnicode_IS_READY (op )) {
480
- return 0 ;
481
- }
482
- return _PyUnicode_Ready (op );
428
+ assert (PyUnicode_IS_READY (op ));
429
+ return 0 ;
483
430
}
484
431
#if !defined(Py_LIMITED_API ) || Py_LIMITED_API + 0 < 0x030b0000
485
432
# define PyUnicode_READY (op ) PyUnicode_READY(_PyObject_CAST(op))
0 commit comments