From 788f9fa8b05d4c06cc7b8c267fba3612c0de2e9b Mon Sep 17 00:00:00 2001
From: Serhiy Storchaka <storchaka@gmail.com>
Date: Tue, 13 May 2025 11:55:24 +0300
Subject: [PATCH] gh-133886: Fix sys.remote_exec() for non-UTF-8 paths
 (GH-133887)

It now supports non-ASCII paths in non-UTF-8 locales and
non-UTF-8 paths in UTF-8 locales.
(cherry picked from commit c09cec5d69f2ef6ab5e64c7e0579fbd9dcb2ca45)

Co-authored-by: Serhiy Storchaka <storchaka@gmail.com>
---
 Lib/test/test_sys.py                          |  35 ++++--
 ...-05-11-13-40-42.gh-issue-133886.ryBAyo.rst |   2 +
 Python/ceval_gil.c                            |  25 +++--
 Python/sysmodule.c                            | 106 +++++++++---------
 4 files changed, 97 insertions(+), 71 deletions(-)
 create mode 100644 Misc/NEWS.d/next/Core_and_Builtins/2025-05-11-13-40-42.gh-issue-133886.ryBAyo.rst

diff --git a/Lib/test/test_sys.py b/Lib/test/test_sys.py
index 59ef5c993099f0..8af2e3488b48d9 100644
--- a/Lib/test/test_sys.py
+++ b/Lib/test/test_sys.py
@@ -1976,12 +1976,13 @@ class TestRemoteExec(unittest.TestCase):
     def tearDown(self):
         test.support.reap_children()
 
-    def _run_remote_exec_test(self, script_code, python_args=None, env=None, prologue=''):
+    def _run_remote_exec_test(self, script_code, python_args=None, env=None,
+                              prologue='',
+                              script_path=os_helper.TESTFN + '_remote.py'):
         # Create the script that will be remotely executed
-        script = os_helper.TESTFN + '_remote.py'
-        self.addCleanup(os_helper.unlink, script)
+        self.addCleanup(os_helper.unlink, script_path)
 
-        with open(script, 'w') as f:
+        with open(script_path, 'w') as f:
             f.write(script_code)
 
         # Create and run the target process
@@ -2050,7 +2051,7 @@ def _run_remote_exec_test(self, script_code, python_args=None, env=None, prologu
                 self.assertEqual(response, b"ready")
 
                 # Try remote exec on the target process
-                sys.remote_exec(proc.pid, script)
+                sys.remote_exec(proc.pid, script_path)
 
                 # Signal script to continue
                 client_socket.sendall(b"continue")
@@ -2073,14 +2074,32 @@ def _run_remote_exec_test(self, script_code, python_args=None, env=None, prologu
 
     def test_remote_exec(self):
         """Test basic remote exec functionality"""
-        script = '''
-print("Remote script executed successfully!")
-'''
+        script = 'print("Remote script executed successfully!")'
         returncode, stdout, stderr = self._run_remote_exec_test(script)
         # self.assertEqual(returncode, 0)
         self.assertIn(b"Remote script executed successfully!", stdout)
         self.assertEqual(stderr, b"")
 
+    def test_remote_exec_bytes(self):
+        script = 'print("Remote script executed successfully!")'
+        script_path = os.fsencode(os_helper.TESTFN) + b'_bytes_remote.py'
+        returncode, stdout, stderr = self._run_remote_exec_test(script,
+                                                    script_path=script_path)
+        self.assertIn(b"Remote script executed successfully!", stdout)
+        self.assertEqual(stderr, b"")
+
+    @unittest.skipUnless(os_helper.TESTFN_UNDECODABLE, 'requires undecodable path')
+    @unittest.skipIf(sys.platform == 'darwin',
+                     'undecodable paths are not supported on macOS')
+    def test_remote_exec_undecodable(self):
+        script = 'print("Remote script executed successfully!")'
+        script_path = os_helper.TESTFN_UNDECODABLE + b'_undecodable_remote.py'
+        for script_path in [script_path, os.fsdecode(script_path)]:
+            returncode, stdout, stderr = self._run_remote_exec_test(script,
+                                                        script_path=script_path)
+            self.assertIn(b"Remote script executed successfully!", stdout)
+            self.assertEqual(stderr, b"")
+
     def test_remote_exec_with_self_process(self):
         """Test remote exec with the target process being the same as the test process"""
 
diff --git a/Misc/NEWS.d/next/Core_and_Builtins/2025-05-11-13-40-42.gh-issue-133886.ryBAyo.rst b/Misc/NEWS.d/next/Core_and_Builtins/2025-05-11-13-40-42.gh-issue-133886.ryBAyo.rst
new file mode 100644
index 00000000000000..fd1020f05d6b0d
--- /dev/null
+++ b/Misc/NEWS.d/next/Core_and_Builtins/2025-05-11-13-40-42.gh-issue-133886.ryBAyo.rst
@@ -0,0 +1,2 @@
+Fix :func:`sys.remote_exec` for non-ASCII paths in non-UTF-8 locales and
+non-UTF-8 paths in UTF-8 locales.
diff --git a/Python/ceval_gil.c b/Python/ceval_gil.c
index 5b5018a63731ab..6d2383ac7c1c65 100644
--- a/Python/ceval_gil.c
+++ b/Python/ceval_gil.c
@@ -1218,30 +1218,30 @@ static inline int run_remote_debugger_source(PyObject *source)
 
 // Note that this function is inline to avoid creating a PLT entry
 // that would be an easy target for a ROP gadget.
-static inline void run_remote_debugger_script(const char *path)
+static inline void run_remote_debugger_script(PyObject *path)
 {
-    if (0 != PySys_Audit("remote_debugger_script", "s", path)) {
+    if (0 != PySys_Audit("remote_debugger_script", "O", path)) {
         PyErr_FormatUnraisable(
-            "Audit hook failed for remote debugger script %s", path);
+            "Audit hook failed for remote debugger script %U", path);
         return;
     }
 
     // Open the debugger script with the open code hook, and reopen the
     // resulting file object to get a C FILE* object.
-    PyObject* fileobj = PyFile_OpenCode(path);
+    PyObject* fileobj = PyFile_OpenCodeObject(path);
     if (!fileobj) {
-        PyErr_FormatUnraisable("Can't open debugger script %s", path);
+        PyErr_FormatUnraisable("Can't open debugger script %U", path);
         return;
     }
 
     PyObject* source = PyObject_CallMethodNoArgs(fileobj, &_Py_ID(read));
     if (!source) {
-        PyErr_FormatUnraisable("Error reading debugger script %s", path);
+        PyErr_FormatUnraisable("Error reading debugger script %U", path);
     }
 
     PyObject* res = PyObject_CallMethodNoArgs(fileobj, &_Py_ID(close));
     if (!res) {
-        PyErr_FormatUnraisable("Error closing debugger script %s", path);
+        PyErr_FormatUnraisable("Error closing debugger script %U", path);
     } else {
         Py_DECREF(res);
     }
@@ -1249,7 +1249,7 @@ static inline void run_remote_debugger_script(const char *path)
 
     if (source) {
         if (0 != run_remote_debugger_source(source)) {
-            PyErr_FormatUnraisable("Error executing debugger script %s", path);
+            PyErr_FormatUnraisable("Error executing debugger script %U", path);
         }
         Py_DECREF(source);
     }
@@ -1278,7 +1278,14 @@ int _PyRunRemoteDebugger(PyThreadState *tstate)
                 pathsz);
             path[pathsz - 1] = '\0';
             if (*path) {
-                run_remote_debugger_script(path);
+                PyObject *path_obj = PyUnicode_DecodeFSDefault(path);
+                if (path_obj == NULL) {
+                    PyErr_FormatUnraisable("Can't decode debugger script");
+                }
+                else {
+                    run_remote_debugger_script(path_obj);
+                    Py_DECREF(path_obj);
+                }
             }
             PyMem_Free(path);
         }
diff --git a/Python/sysmodule.c b/Python/sysmodule.c
index 41b9a6b276a3b1..b46c2ab7c4483d 100644
--- a/Python/sysmodule.c
+++ b/Python/sysmodule.c
@@ -2451,26 +2451,58 @@ sys_is_remote_debug_enabled_impl(PyObject *module)
 #endif
 }
 
+/*[clinic input]
+sys.remote_exec
+
+    pid: int
+    script: object
+
+Executes a file containing Python code in a given remote Python process.
+
+This function returns immediately, and the code will be executed by the
+target process's main thread at the next available opportunity, similarly
+to how signals are handled. There is no interface to determine when the
+code has been executed. The caller is responsible for making sure that
+the file still exists whenever the remote process tries to read it and that
+it hasn't been overwritten.
+
+The remote process must be running a CPython interpreter of the same major
+and minor version as the local process. If either the local or remote
+interpreter is pre-release (alpha, beta, or release candidate) then the
+local and remote interpreters must be the same exact version.
+
+Args:
+     pid (int): The process ID of the target Python process.
+     script (str|bytes): The path to a file containing
+         the Python code to be executed.
+[clinic start generated code]*/
+
 static PyObject *
-sys_remote_exec_unicode_path(PyObject *module, int pid, PyObject *script)
+sys_remote_exec_impl(PyObject *module, int pid, PyObject *script)
+/*[clinic end generated code: output=7d94c56afe4a52c0 input=39908ca2c5fe1eb0]*/
 {
-    const char *debugger_script_path = PyUnicode_AsUTF8(script);
-    if (debugger_script_path == NULL) {
+    PyObject *path;
+    const char *debugger_script_path;
+
+    if (PyUnicode_FSConverter(script, &path) < 0) {
         return NULL;
     }
-
+    debugger_script_path = PyBytes_AS_STRING(path);
 #ifdef MS_WINDOWS
+    PyObject *unicode_path;
+    if (PyUnicode_FSDecoder(path, &unicode_path) < 0) {
+        goto error;
+    }
     // Use UTF-16 (wide char) version of the path for permission checks
-    wchar_t *debugger_script_path_w = PyUnicode_AsWideCharString(script, NULL);
+    wchar_t *debugger_script_path_w = PyUnicode_AsWideCharString(unicode_path, NULL);
+    Py_DECREF(unicode_path);
     if (debugger_script_path_w == NULL) {
-        return NULL;
+        goto error;
     }
-
-    // Check file attributes using wide character version (W) instead of ANSI (A)
     DWORD attr = GetFileAttributesW(debugger_script_path_w);
-    PyMem_Free(debugger_script_path_w);
     if (attr == INVALID_FILE_ATTRIBUTES) {
         DWORD err = GetLastError();
+        PyMem_Free(debugger_script_path_w);
         if (err == ERROR_FILE_NOT_FOUND || err == ERROR_PATH_NOT_FOUND) {
             PyErr_SetString(PyExc_FileNotFoundError, "Script file does not exist");
         }
@@ -2478,11 +2510,12 @@ sys_remote_exec_unicode_path(PyObject *module, int pid, PyObject *script)
             PyErr_SetString(PyExc_PermissionError, "Script file cannot be read");
         }
         else {
-            PyErr_SetFromWindowsErr(0);
+            PyErr_SetFromWindowsErr(err);
         }
-        return NULL;
+        goto error;
     }
-#else
+    PyMem_Free(debugger_script_path_w);
+#else // MS_WINDOWS
     if (access(debugger_script_path, F_OK | R_OK) != 0) {
         switch (errno) {
             case ENOENT:
@@ -2494,54 +2527,19 @@ sys_remote_exec_unicode_path(PyObject *module, int pid, PyObject *script)
             default:
                 PyErr_SetFromErrno(PyExc_OSError);
         }
-        return NULL;
+        goto error;
     }
-#endif
-
+#endif // MS_WINDOWS
     if (_PySysRemoteDebug_SendExec(pid, 0, debugger_script_path) < 0) {
-        return NULL;
+        goto error;
     }
 
+    Py_DECREF(path);
     Py_RETURN_NONE;
-}
-
-/*[clinic input]
-sys.remote_exec
-
-    pid: int
-    script: object
-
-Executes a file containing Python code in a given remote Python process.
-
-This function returns immediately, and the code will be executed by the
-target process's main thread at the next available opportunity, similarly
-to how signals are handled. There is no interface to determine when the
-code has been executed. The caller is responsible for making sure that
-the file still exists whenever the remote process tries to read it and that
-it hasn't been overwritten.
 
-The remote process must be running a CPython interpreter of the same major
-and minor version as the local process. If either the local or remote
-interpreter is pre-release (alpha, beta, or release candidate) then the
-local and remote interpreters must be the same exact version.
-
-Args:
-     pid (int): The process ID of the target Python process.
-     script (str|bytes): The path to a file containing
-         the Python code to be executed.
-[clinic start generated code]*/
-
-static PyObject *
-sys_remote_exec_impl(PyObject *module, int pid, PyObject *script)
-/*[clinic end generated code: output=7d94c56afe4a52c0 input=39908ca2c5fe1eb0]*/
-{
-    PyObject *ret = NULL;
-    PyObject *path;
-    if (PyUnicode_FSDecoder(script, &path)) {
-        ret = sys_remote_exec_unicode_path(module, pid, path);
-        Py_DECREF(path);
-    }
-    return ret;
+error:
+    Py_DECREF(path);
+    return NULL;
 }