@@ -3615,39 +3615,32 @@ PyObject *
3615
3615
PyUnicode_EncodeFSDefault (PyObject * unicode )
3616
3616
{
3617
3617
PyInterpreterState * interp = _PyInterpreterState_GET_UNSAFE ();
3618
- #ifdef _Py_FORCE_UTF8_FS_ENCODING
3619
- if (interp -> fs_codec .encoding ) {
3618
+ if (interp -> fs_codec .utf8 ) {
3620
3619
return unicode_encode_utf8 (unicode ,
3621
3620
interp -> fs_codec .error_handler ,
3622
3621
interp -> fs_codec .errors );
3623
3622
}
3624
- else {
3625
- const wchar_t * filesystem_errors = interp -> config .filesystem_errors ;
3626
- _Py_error_handler errors ;
3627
- errors = get_error_handler_wide (filesystem_errors );
3628
- assert (errors != _Py_ERROR_UNKNOWN );
3629
- return unicode_encode_utf8 (unicode , errors , NULL );
3630
- }
3631
- #else
3632
- /* Bootstrap check: if the filesystem codec is implemented in Python, we
3633
- cannot use it to encode and decode filenames before it is loaded. Load
3634
- the Python codec requires to encode at least its own filename. Use the C
3635
- implementation of the locale codec until the codec registry is
3636
- initialized and the Python codec is loaded.
3637
- See _PyUnicode_InitEncodings(). */
3638
- if (interp -> fs_codec .encoding ) {
3623
+ #ifndef _Py_FORCE_UTF8_FS_ENCODING
3624
+ else if (interp -> fs_codec .encoding ) {
3639
3625
return PyUnicode_AsEncodedString (unicode ,
3640
3626
interp -> fs_codec .encoding ,
3641
3627
interp -> fs_codec .errors );
3642
3628
}
3629
+ #endif
3643
3630
else {
3631
+ /* Before _PyUnicode_InitEncodings() is called, the Python codec
3632
+ machinery is not ready and so cannot be used:
3633
+ use wcstombs() in this case. */
3644
3634
const wchar_t * filesystem_errors = interp -> config .filesystem_errors ;
3645
- _Py_error_handler errors ;
3646
- errors = get_error_handler_wide (filesystem_errors );
3635
+ assert ( filesystem_errors != NULL ) ;
3636
+ _Py_error_handler errors = get_error_handler_wide (filesystem_errors );
3647
3637
assert (errors != _Py_ERROR_UNKNOWN );
3638
+ #ifdef _Py_FORCE_UTF8_FS_ENCODING
3639
+ return unicode_encode_utf8 (unicode , errors , NULL );
3640
+ #else
3648
3641
return unicode_encode_locale (unicode , errors , 0 );
3649
- }
3650
3642
#endif
3643
+ }
3651
3644
}
3652
3645
3653
3646
PyObject *
@@ -3857,39 +3850,33 @@ PyObject*
3857
3850
PyUnicode_DecodeFSDefaultAndSize (const char * s , Py_ssize_t size )
3858
3851
{
3859
3852
PyInterpreterState * interp = _PyInterpreterState_GET_UNSAFE ();
3860
- #ifdef _Py_FORCE_UTF8_FS_ENCODING
3861
- if (interp -> fs_codec .encoding ) {
3853
+ if (interp -> fs_codec .utf8 ) {
3862
3854
return unicode_decode_utf8 (s , size ,
3863
3855
interp -> fs_codec .error_handler ,
3864
3856
interp -> fs_codec .errors ,
3865
3857
NULL );
3866
3858
}
3867
- else {
3868
- const wchar_t * filesystem_errors = interp -> config .filesystem_errors ;
3869
- _Py_error_handler errors ;
3870
- errors = get_error_handler_wide (filesystem_errors );
3871
- assert (errors != _Py_ERROR_UNKNOWN );
3872
- return unicode_decode_utf8 (s , size , errors , NULL , NULL );
3873
- }
3874
- #else
3875
- /* Bootstrap check: if the filesystem codec is implemented in Python, we
3876
- cannot use it to encode and decode filenames before it is loaded. Load
3877
- the Python codec requires to encode at least its own filename. Use the C
3878
- implementation of the locale codec until the codec registry is
3879
- initialized and the Python codec is loaded.
3880
- See _PyUnicode_InitEncodings(). */
3881
- if (interp -> fs_codec .encoding ) {
3859
+ #ifndef _Py_FORCE_UTF8_FS_ENCODING
3860
+ else if (interp -> fs_codec .encoding ) {
3882
3861
return PyUnicode_Decode (s , size ,
3883
3862
interp -> fs_codec .encoding ,
3884
3863
interp -> fs_codec .errors );
3885
3864
}
3865
+ #endif
3886
3866
else {
3867
+ /* Before _PyUnicode_InitEncodings() is called, the Python codec
3868
+ machinery is not ready and so cannot be used:
3869
+ use mbstowcs() in this case. */
3887
3870
const wchar_t * filesystem_errors = interp -> config .filesystem_errors ;
3888
- _Py_error_handler errors ;
3889
- errors = get_error_handler_wide (filesystem_errors );
3871
+ assert (filesystem_errors != NULL );
3872
+ _Py_error_handler errors = get_error_handler_wide (filesystem_errors );
3873
+ assert (errors != _Py_ERROR_UNKNOWN );
3874
+ #ifdef _Py_FORCE_UTF8_FS_ENCODING
3875
+ return unicode_decode_utf8 (s , size , errors , NULL , NULL );
3876
+ #else
3890
3877
return unicode_decode_locale (s , size , errors , 0 );
3891
- }
3892
3878
#endif
3879
+ }
3893
3880
}
3894
3881
3895
3882
@@ -15849,10 +15836,16 @@ init_fs_codec(PyInterpreterState *interp)
15849
15836
15850
15837
PyMem_RawFree (interp -> fs_codec .encoding );
15851
15838
interp -> fs_codec .encoding = encoding ;
15839
+ /* encoding has been normalized by init_fs_encoding() */
15840
+ interp -> fs_codec .utf8 = (strcmp (encoding , "utf-8" ) == 0 );
15852
15841
PyMem_RawFree (interp -> fs_codec .errors );
15853
15842
interp -> fs_codec .errors = errors ;
15854
15843
interp -> fs_codec .error_handler = error_handler ;
15855
15844
15845
+ #ifdef _Py_FORCE_UTF8_FS_ENCODING
15846
+ assert (interp -> fs_codec .utf8 == 1 );
15847
+ #endif
15848
+
15856
15849
/* At this point, PyUnicode_EncodeFSDefault() and
15857
15850
PyUnicode_DecodeFSDefault() can now use the Python codec rather than
15858
15851
the C implementation of the filesystem encoding. */
@@ -15902,6 +15895,19 @@ _PyUnicode_InitEncodings(PyThreadState *tstate)
15902
15895
}
15903
15896
15904
15897
15898
+ static void
15899
+ _PyUnicode_FiniEncodings (PyThreadState * tstate )
15900
+ {
15901
+ PyInterpreterState * interp = tstate -> interp ;
15902
+ PyMem_RawFree (interp -> fs_codec .encoding );
15903
+ interp -> fs_codec .encoding = NULL ;
15904
+ interp -> fs_codec .utf8 = 0 ;
15905
+ PyMem_RawFree (interp -> fs_codec .errors );
15906
+ interp -> fs_codec .errors = NULL ;
15907
+ interp -> fs_codec .error_handler = _Py_ERROR_UNKNOWN ;
15908
+ }
15909
+
15910
+
15905
15911
#ifdef MS_WINDOWS
15906
15912
int
15907
15913
_PyUnicode_EnableLegacyWindowsFSEncoding (void )
@@ -15954,12 +15960,7 @@ _PyUnicode_Fini(PyThreadState *tstate)
15954
15960
_PyUnicode_ClearStaticStrings ();
15955
15961
}
15956
15962
15957
- PyInterpreterState * interp = _PyInterpreterState_GET_UNSAFE ();
15958
- PyMem_RawFree (interp -> fs_codec .encoding );
15959
- interp -> fs_codec .encoding = NULL ;
15960
- PyMem_RawFree (interp -> fs_codec .errors );
15961
- interp -> fs_codec .errors = NULL ;
15962
- interp -> config .filesystem_errors = (wchar_t * )_Py_ERROR_UNKNOWN ;
15963
+ _PyUnicode_FiniEncodings (tstate );
15963
15964
}
15964
15965
15965
15966
0 commit comments