-
-
Notifications
You must be signed in to change notification settings - Fork 32.1k
bpo-43667: Fix broken Unicode encoding in non-UTF locales on Solaris #25096
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 3 commits
59548ca
cb6452c
c01b792
0fc7f54
5fdd1a2
bad1eba
e8dd8d1
4aebe1d
afaeaa2
627e460
02a37ee
00956fe
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -922,6 +922,86 @@ _Py_GetLocaleEncodingObject(void) | |
return str; | ||
} | ||
|
||
#ifdef HAVE_NON_UNICODE_WCHAR_T_REPRESENTATION | ||
|
||
/* Convert a wide character string to the UTF32 encoded char32_t string. This | ||
is necessary on systems where internal form of wchar_t is not already | ||
Unicode (e.g. Oracle Solaris). | ||
kulikjak marked this conversation as resolved.
Show resolved
Hide resolved
|
||
|
||
Return a pointer to a newly allocated char32_t string, use PyMem_Free() to | ||
free the memory. Return NULL and raise exception on conversion or memory | ||
allocation error. */ | ||
char32_t* | ||
_Py_convert_wchar_t_to_UTF32(const wchar_t* u, Py_ssize_t size) | ||
{ | ||
/* Ensure we won't overflow the size. */ | ||
if (size > ((PY_SSIZE_T_MAX / (Py_ssize_t)sizeof(wchar_t)) - 1)) { | ||
PyErr_NoMemory(); | ||
return NULL; | ||
} | ||
|
||
/* Given 'u' might not be NULL terminated (size smaller than its | ||
length); copy and terminate part we are interested in. */ | ||
wchar_t* substr = PyMem_Malloc((size + 1) * sizeof(wchar_t)); | ||
if (substr == NULL) { | ||
PyErr_NoMemory(); | ||
return NULL; | ||
} | ||
|
||
memcpy(substr, u, size * sizeof(wchar_t)); | ||
substr[size] = 0; | ||
|
||
/* Convert given wide-character string to a character string */ | ||
size_t buffsize = wcstombs(NULL, substr, 0) + 1; | ||
kulikjak marked this conversation as resolved.
Show resolved
Hide resolved
|
||
if (buffsize == (size_t)-1) { | ||
PyMem_Free(substr); | ||
PyErr_Format(PyExc_ValueError, "wcstombs() conversion failed"); | ||
return NULL; | ||
} | ||
|
||
/* Ensure we won't overflow the size. */ | ||
if (buffsize > (PY_SSIZE_T_MAX - 1)) { | ||
PyMem_Free(substr); | ||
PyErr_NoMemory(); | ||
return NULL; | ||
} | ||
char* buffer = PyMem_Malloc(buffsize * sizeof(char)); | ||
if (buffer == NULL) { | ||
PyMem_Free(substr); | ||
PyErr_NoMemory(); | ||
return NULL; | ||
} | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Maybe add an assertion: There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Is this always the case? You told me that |
||
|
||
size_t res = wcstombs(buffer, substr, buffsize); | ||
assert(res == buffsize - 1); | ||
kulikjak marked this conversation as resolved.
Show resolved
Hide resolved
|
||
|
||
/* Convert character string to UTF32 encoded char32_t string. | ||
Since wchar_t and char32_t have the same size on Solaris and one | ||
wchar_t symbol corresponds to one UTF32 value, we can safely | ||
reuse this buffer and skip additional allocation. */ | ||
char32_t* c32 = (char32_t*) substr; | ||
mbstate_t state = {0}; | ||
|
||
Py_ssize_t i = 0; | ||
char* ptr = buffer; | ||
char* end = ptr + res + 1; | ||
|
||
while (res = mbrtoc32(&(c32[i]), ptr, end - ptr, &state)) { | ||
kulikjak marked this conversation as resolved.
Show resolved
Hide resolved
|
||
if (res == (size_t)-1 || res == (size_t)-2 || res == (size_t)-3) { | ||
kulikjak marked this conversation as resolved.
Show resolved
Hide resolved
|
||
PyMem_Free(c32); | ||
PyMem_Free(buffer); | ||
PyErr_Format(PyExc_ValueError, | ||
"mbrtoc32() conversion failed with error code: %zd", | ||
(Py_ssize_t)res); | ||
return NULL; | ||
} | ||
ptr += res; | ||
i ++; | ||
} | ||
PyMem_Free(buffer); | ||
return c32; | ||
} | ||
#endif /* HAVE_NON_UNICODE_WCHAR_T_REPRESENTATION */ | ||
|
||
#ifdef MS_WINDOWS | ||
static __int64 secs_between_epochs = 11644473600; /* Seconds between 1.1.1601 and 1.1.1970 */ | ||
|
Uh oh!
There was an error while loading. Please reload this page.