8000 gh-131020: py.exe launcher does not correctly detect a BOM when searc… · python/cpython@22ac71d · GitHub
[go: up one dir, main page]

Skip to content

Commit 22ac71d

Browse files
gh-131020: py.exe launcher does not correctly detect a BOM when searching for the shebang (GH-131021)
(cherry picked from commit 36ef3bf) Co-authored-by: Chris Eibl <138194463+chris-eibl@users.noreply.github.com>
1 parent c82795a commit 22ac71d

File tree

3 files changed

+29
-5
lines changed

3 files changed

+29
-5
lines changed

Lib/test/test_launcher.py

Lines changed: 23 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -263,7 +263,10 @@ def py_ini(self, content):
263263
@contextlib.contextmanager
264264
def script(self, content, encoding="utf-8"):
265265
file = Path(tempfile.mktemp(dir=os.getcwd()) + ".py")
266-
file.write_text(content, encoding=encoding)
266+
if isinstance(content, bytes):
267+
file.write_bytes(content)
268+
else:
269+
file.write_text(content, encoding=encoding)
267270
try:
268271
yield file
269272
finally:
@@ -608,6 +611,25 @@ def test_py_shebang_short_argv0(self):
608611
self.assertEqual("3.100", data["SearchInfo.tag"])
609612
self.assertEqual(f'X.Y.exe -prearg "{script}" -postarg', data["stdout"].strip())
610613

614+
def test_py_shebang_valid_bom(self):
615+
with self.py_ini(TEST_PY_DEFAULTS):
616+
content = "#! /usr/bin/python -prearg".encode("utf-8")
617+
with self.script(b"\xEF\xBB\xBF" + content) as script:
618+
data = self.run_py([script, "-postarg"])
619+
self.assertEqual("PythonTestSuite", data["SearchInfo.company"])
620+
self.assertEqual("3.100", data["SearchInfo.tag"])
621+
self.assertEqual(f"X.Y.exe -prearg {script} -postarg", data["stdout"].strip())
622+
623+
def test_py_shebang_invalid_bom(self):
624+
with self.py_ini(TEST_PY_DEFAULTS):
625+
content = "#! /usr/bin/python3 -prearg".encode("utf-8")
626+
with self.script(b"\xEF\xAA\xBF" + content) as script:
627+
data = self.run_py([script, "-postarg"])
628+
self.assertIn("Invalid BOM", data["stderr"])
629+
self.assertEqual("PythonTestSuite", data["SearchInfo.company"])
630+
self.assertEqual("3.100", data["SearchInfo.tag"])
631+
self.assertEqual(f"X.Y.exe {script} -postarg", data["stdout"].strip())
632+
611633
def test_py_handle_64_in_ini(self):
612634
with self.py_ini("\n".join(["[defaults]", "python=3.999-64"])):
613635
# Expect this to fail, but should get oldStyleTag flipped on
Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
:source:`pylauncher <PC/launcher2.c>` correctly detects a BOM when searching for the
2+
shebang. Fix by Chris Eibl.

PC/launcher2.c

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1055,7 +1055,7 @@ checkShebang(SearchInfo *search)
10551055
}
10561056

10571057
DWORD bytesRead = 0;
1058-
char buffer[4096];
1058+
unsigned char buffer[4096];
10591059
if (!ReadFile(hFile, buffer, sizeof(buffer), &bytesRead, NULL)) {
10601060
debug(L"# Failed to read %s for shebang parsing (0x%08X)\n",
10611061
scriptFile, GetLastError());
@@ -1068,7 +1068,7 @@ checkShebang(SearchInfo *search)
10681068
free(scriptFile);
10691069

10701070

1071-
char *b = buffer;
1071+
unsigned char *b = buffer;
10721072
bool onlyUtf8 = false;
10731073
if (bytesRead > 3 && *b == 0xEF) {
10741074
if (*++b == 0xBB && *++b == 0xBF) {
@@ -1089,13 +1089,13 @@ checkShebang(SearchInfo *search)
10891089
++b;
10901090
--bytesRead;
10911091
while (--bytesRead > 0 && isspace(*++b)) { }
1092-
char *start = b;
1092+
const unsigned char *start = b;
10931093
while (--bytesRead > 0 && *++b != '\r' && *b != '\n') { }
10941094
wchar_t *shebang;
10951095
int shebangLength;
10961096
// We add 1 when bytesRead==0, as in that case we hit EOF and b points
10971097
// to the last character in the file, not the newline
1098-
int exitCode = _decodeShebang(search, start, (int)(b - start + (bytesRead == 0)), onlyUtf8, &shebang, &shebangLength);
1098+
int exitCode = _decodeShebang(search, (const char*)start, (int)(b - start + (bytesRead == 0)), onlyUtf8, &shebang, &shebangLength);
10991099
if (exitCode) {
11001100
return exitCode;
11011101
}

0 commit comments

Comments
 (0)
0