8000 gh-103997: Automatically dedent the argument to "-c" (#103998) · python/cpython@fc0ec29 · GitHub
[go: up one dir, main page]

Skip to content

Commit fc0ec29

Browse files
Erotemicsunmy2019Eclips4methaneAA-Turner
authored
gh-103997: Automatically dedent the argument to "-c" (#103998)
Co-authored-by: sunmy2019 <59365878+sunmy2019@users.noreply.github.com> Co-authored-by: Kirill Podoprigora <80244920+Eclips4@users.noreply.github.com> Co-authored-by: Inada Naoki <songofacandy@gmail.com> Co-authored-by: Adam Turner <9087854+AA-Turner@users.noreply.github.com> Co-authored-by: Bénédikt Tran <10796600+picnixz@users.noreply.github.com>
1 parent 50e518e commit fc0ec29

File tree

7 files changed

+266
-0
lines changed

7 files changed

+266
-0
lines changed

Doc/using/cmdline.rst

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -73,6 +73,9 @@ source.
7373

7474
.. audit-event:: cpython.run_command command cmdoption-c
7575

76+
.. versionchanged:: next
77+
*command* is automatically dedented before execution.
78+
7679
.. option:: -m <module-name>
7780

7881
Search :data:`sys.path` for the named module and execute its contents as

Doc/whatsnew/3.14.rst

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -474,6 +474,12 @@ Other language changes
474474
explicitly overridden in the subclass.
475475
(Contributed by Tomasz Pytel in :gh:`132329`.)
476476

477+
* The command line option :option:`-c` now automatically dedents its code
478+
argument before execution. The auto-dedentation behavior mirrors
479+
:func:`textwrap.dedent`.
480+
(Contributed by Jon Crall and Steven Sun in :gh:`103998`.)
481+
482+
477483
.. _whatsnew314-pep765:
478484

479485
PEP 765: Disallow return/break/continue that exit a finally block

Include/internal/pycore_unicodeobject.h

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -247,6 +247,12 @@ extern Py_ssize_t _PyUnicode_InsertThousandsGrouping(
247247
Py_UCS4 *maxchar,
248248
int forward);
249249

250+
/* Dedent a string.
251+
Behaviour is expected to be an exact match of `textwrap.dedent`.
252+
Return a new reference on success, NULL with exception set on error.
253+
*/
254+
extern PyObject* _PyUnicode_Dedent(PyObject *unicode);
255+
250256
/* --- Misc functions ----------------------------------------------------- */
251257

252258
extern PyObject* _PyUnicode_FormatLong(PyObject *, int, int, int);

Lib/test/test_cmd_line.py

Lines changed: 84 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,8 @@
1717
spawn_python, kill_python, assert_python_ok, assert_python_failure,
1818
interpreter_requires_environment
1919
)
20+
from textwrap import dedent
21+
2022

2123
if not support.has_subprocess_support:
2224
raise unittest.SkipTest("test module requires subprocess")
@@ -1051,6 +1053,88 @@ def test_int_max_str_digits(self):
1051 B41A 1053
)
10521054
self.assertEqual(res2int(res), (6000, 6000))
10531055

1056+
def test_cmd_dedent(self):
1057+
# test that -c auto-dedents its arguments
1058+
test_cases = [
1059+
(
1060+
"""
1061+
print('space-auto-dedent')
1062+
""",
1063+
"space-auto-dedent",
1064+
),
1065+
(
1066+
dedent(
1067+
"""
1068+
^^^print('tab-auto-dedent')
1069+
"""
1070+
).replace("^", "\t"),
1071+
"tab-auto-dedent",
1072+
),
1073+
(
1074+
dedent(
1075+
"""
1076+
^^if 1:
1077+
^^^^print('mixed-auto-dedent-1')
1078+
^^print('mixed-auto-dedent-2')
1079+
"""
1080+
).replace("^", "\t \t"),
1081+
"mixed-auto-dedent-1\nmixed-auto-dedent-2",
1082+
),
1083+
(
1084+
'''
1085+
data = """$
1086+
1087+
this data has an empty newline above and a newline with spaces below $
1088+
$
1089+
"""$
1090+
if 1: $
1091+
print(repr(data))$
1092+
'''.replace(
1093+
"$", ""
1094+
),
1095+
# Note: entirely blank lines are normalized to \n, even if they
1096+
# are part of a data string. This is consistent with
1097+
# textwrap.dedent behavior, but might not be intuitive.
1098+
"'\\n\\nthis data has an empty newline above and a newline with spaces below \\n\\n'",
1099+
),
1100+
(
1101+
'',
1102+
'',
1103+
),
1104+
(
1105+
' \t\n\t\n \t\t\t \t\t \t\n\t\t \n\n\n\t\t\t ',
1106+
'',
1107+
),
1108+
]
1109+
for code, expected in test_cases:
1110+
# Run the auto-dedent case
1111+
args1 = sys.executable, '-c', code
1112+
proc1 = subprocess.run(args1, stdout=subprocess.PIPE)
1113+
self.assertEqual(proc1.returncode, 0, proc1)
1114+
output1 = proc1.stdout.strip().decode(encoding='utf-8')
1115+
1116+
# Manually dedent beforehand, check the result is the same.
1117+
args2 = sys.executable, '-c', dedent(code)
1118+
proc2 = subprocess.run(args2, stdout=subprocess.PIPE)
1119+
self.assertEqual(proc2.returncode, 0, proc2)
1120+
output2 = proc2.stdout.strip().decode(encoding='utf-8')
1121+
1122+
self.assertEqual(output1, output2)
1123+
self.assertEqual(output1.replace('\r\n', '\n'), expected)
1124+
1125+
def test_cmd_dedent_failcase(self):
1126+
# Mixing tabs and spaces is not allowed
1127+
from textwrap import dedent
1128+
template = dedent(
1129+
'''
1130+
-+if 1:
1131+
+-++ print('will fail')
1132+
''')
1133+
code = template.replace('-', ' ').replace('+', '\t')
1134+
assert_python_failure('-c', code)
1135+
code = template.replace('-', '\t').replace('+', ' ')
1136+
assert_python_failure('-c', code)
1137+
10541138
def test_cpu_count(self):
10551139
code = "import os; print(os.cpu_count(), os.process_cpu_count())"
10561140
res = assert_python_ok('-X', 'cpu_count=4321', '-c', code)
Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
String arguments passed to "-c" are now automatically dedented as if by
2+
:func:`textwrap.dedent`. This allows "python -c" invocations to be indented
3+
in shell scripts without causing indentation errors. (Patch by Jon Crall and
4+
Steven Sun)

Modules/main.c

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@
1111
#include "pycore_pylifecycle.h" // _Py_PreInitializeFromPyArgv()
1212
#include "pycore_pystate.h" // _PyInterpreterState_GET()
1313
#include "pycore_pythonrun.h" // _PyRun_AnyFileObject()
14+
#include "pycore_unicodeobject.h" // _PyUnicode_Dedent()
1415

1516
/* Includes for exit_sigint() */
1617
#include <stdio.h> // perror()
@@ -244,6 +245,11 @@ pymain_run_command(wchar_t *command)
244245
return pymain_exit_err_print();
245246
}
246247

248+
Py_SETREF(unicode, _PyUnicode_Dedent(unicode));
249+
if (unicode == NULL) {
250+
goto error;
251+
}
252+
247253
bytes = PyUnicode_AsUTF8String(unicode);
248254
Py_DECREF(unicode);
249255
if (bytes == NULL) {

Objects/unicodeobject.c

Lines changed: 157 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14270,6 +14270,163 @@ unicode_getnewargs(PyObject *v, PyObject *Py_UNUSED(ignored))
1427014270
return Py_BuildValue("(N)", copy);
1427114271
}
1427214272

14273+
/*
14274+
This function searchs the longest common leading whitespace
14275+
of all lines in the [src, end).
14276+
It returns the length of the common leading whitespace and sets `output` to
14277+
point to the beginning of the common leading whitespace if length > 0.
14278+
*/
14279+
static Py_ssize_t
14280+
search_longest_common_leading_whitespace(
14281+
const char *const src,
14282+
const char *const end,
14283+
const char **output)
14284+
{
14285+
// [_start, _start + _len)
14286+
// describes the current longest common leading whitespace
14287+
const char *_start = NULL;
14288+
Py_ssize_t _len = 0;
14289+
14290+
for (const char *iter = src; iter < end; ++iter) {
14291+
const char *line_start = iter;
14292+
const char *leading_whitespace_end = NULL;
14293+
14294+
// scan the whole line
14295+
while (iter < end && *iter != '\n') {
14296+
if (!leading_whitespace_end && *iter != ' ' && *iter != '\t') {
14297+
/* `iter` points to the first non-whitespace character
14298+
in this line */
14299+
if (iter == line_start) {
14300+
// some line has no indent, fast exit!
14301+
return 0;
14302+
}
14303+
leading_whitespace_end = iter;
14304+
}
14305+
++iter;
14306+
}
14307+
14308+
// if this line has all white space, skip it
14309+
if (!leading_whitespace_end) {
14310+
continue;
14311+
}
14312+
14313+
if (!_start) {
14314+
// update the first leading whitespace
14315+
_start = line_start;
14316+
_len = leading_whitespace_end - line_start;
14317+
assert(_len > 0);
14318+
}
14319+
else {
14320+
/* We then compare with the current longest leading whitespace.
14321+
14322+
[line_start, leading_whitespace_end) is the leading
14323+
whitespace of this line,
14324+
14325+
[_start, _start + _len) is the leading whitespace of the
14326+
current longest leading whitespace. */
14327+
Py_ssize_t new_len = 0;
14328+
const char *_iter = _start, *line_iter = line_start;
14329+
14330+
while (_iter < _start + _len && line_iter < leading_whitespace_end
14331+
&& *_iter == *line_iter)
14332+
{
14333+
++_iter;
14334+
++line_iter;
14335+
++new_len;
14336+
}
14337+
14338+
_len = new_len;
14339+
if (_len == 0) {
14340+
// No common things now, fast exit!
14341+
return 0;
14342+
}
14343+
}
14344+
}
14345+
14346+
assert(_len >= 0);
14347+
if (_len > 0) {
14348+
*output = _start;
14349+
}
14350+
return _len;
14351+
}
14352+
14353+
/* Dedent a string.
14354+
Behaviour is expected to be an exact match of `textwrap.dedent`.
14355+
Return a new reference on success, NULL with exception set on error.
14356+
*/
14357+
PyObject *
14358+
_PyUnicode_Dedent(PyObject *unicode)
14359+
{
14360+
Py_ssize_t src_len = 0;
14361+
const char *src = PyUnicode_AsUTF8AndSize(unicode, &src_len);
14362+
if (!src) {
14363+
return NULL;
14364+
}
14365+
assert(src_len >= 0);
14366+
if (src_len == 0) {
14367+
return Py_NewRef(unicode);
14368+
}
14369+
14370+
const char *const end = src + src_len;
14371+
14372+
// [whitespace_start, whitespace_start + whitespace_len)
14373+
// describes the current longest common leading whitespace
14374+
const char *whitespace_start = NULL;
14375+
Py_ssize_t whitespace_len = search_longest_common_leading_whitespace(
14376+
src, end, &whitespace_start);
14377+
14378+
if (whitespace_len == 0) {
14379+
return Py_NewRef(unicode);
14380+
}
14381+
14382+
// now we should trigger a dedent
14383+
char *dest = PyMem_Malloc(src_len);
14384+
if (!dest) {
14385+
PyErr_NoMemory();
14386+
return NULL;
14387+
}
14388+
char *dest_iter = dest;
14389+
14390+
for (const char *iter = src; iter < end; ++iter) {
14391+
const char *line_start = iter;
14392+
bool in_leading_space = true;
14393+
14394+
// iterate over a line to find the end of a line
14395+
while (iter < end && *iter != '\n') {
14396+
if (in_leading_space && *iter != ' ' && *iter != '\t') {
14397+
in_leading_space = false;
14398+
}
14399+
++iter;
14400+
}
14401+
14402+
// invariant: *iter == '\n' or iter == end
14403+
bool append_newline = iter < end;
14404+
14405+
// if this line has all white space, write '\n' and continue
14406+
if (in_leading_space && append_newline) {
14407+
*dest_iter++ = '\n';
14408+
continue;
14409+
}
14410+
14411+
/* copy [new_line_start + whitespace_len, iter) to buffer, then
14412+
conditionally append '\n' */
14413+
14414+
Py_ssize_t new_line_len = iter - line_start - whitespace_len;
14415+
assert(new_line_len >= 0);
14416+
memcpy(dest_iter, line_start + whitespace_len, new_line_len);
14417+
14418+
dest_iter += new_line_len;
14419+
14420+
if (append_newline) {
14421+
*dest_iter++ = '\n';
14422+
}
14423+
}
14424+
14425+
PyObject *res = PyUnicode_FromStringAndSize(dest, dest_iter - dest);
14426+
PyMem_Free(dest);
14427+
return res;
14428+
}
14429+
1427314430
static PyMethodDef unicode_methods[] = {
1427414431
UNICODE_ENCODE_METHODDEF
1427514432
UNICODE_REPLACE_METHODDEF

0 commit comments

Comments
 (0)
0