8000 [mypyc] Introduce FormatOp and add a tokenizer for .format() call (#1… · python/mypy@58c0a05 · GitHub
[go: up one dir, main page]

Skip to content

Commit 58c0a05

Browse files
[mypyc] Introduce FormatOp and add a tokenizer for .format() call (#10935)
This PR adds a tokenizer that convert a str.format() format string into literals and specifiers. By doing so, the code structure of `translate_str_format` is clearer. This PR also introduces `FormatOp`. Compare to `ConversionSpecifier`, `FormatOp` has fewer attributes and indicates compile time optimizations. For example, to mark a conversion from any object to string, `ConversionSpecifier` may have several representations, like '%s', '{}' or '{:{}}'. However, there would only exist one corresponding `FormatOp`. Currently `FormatOp` is just an Enum for convenience. We might add several attributes later and upgrade it to a class if we need to support more conversions. To help for the future optimization, these parts of code are extracted into new functions: * `generate_format_ops` that shrink `ConversionSpecifier` into `FormatOp` * `convert_expr` that can help convert the expressions into desired results.
1 parent e7161ac commit 58c0a05

File tree

3 files changed

+120
-51
lines changed

3 files changed

+120
-51
lines changed

mypyc/irbuild/format_str_tokenizer.py

Lines changed: 107 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,14 +1,54 @@
11
"""Tokenizers for three string formatting methods"""
22

3-
from typing import List, Tuple
3+
from typing import List, Tuple, Optional
4+
from typing_extensions import Final
5+
from enum import Enum
46

57
from mypy.checkstrformat import (
6-
ConversionSpecifier, parse_conversion_specifiers
8+
parse_format_value, ConversionSpecifier, parse_conversion_specifiers
79
)
10+
from mypy.errors import Errors
11+
from mypy.messages import MessageBuilder
12+
from mypy.nodes import Context, Expression
13+
814
from mypyc.ir.ops import Value, Integer
9-
from mypyc.ir.rtypes import c_pyssize_t_rprimitive
15+
from mypyc.ir.rtypes import (
16+
c_pyssize_t_rprimitive, is_str_rprimitive, is_int_rprimitive, is_short_int_rprimitive
17+
)
1018
from mypyc.irbuild.builder import IRBuilder
11-
from mypyc.primitives.str_ops import str_build_op
19+
from mypyc.primitives.int_ops import int_to_str_op
20+
from mypyc.primitives.str_ops import str_build_op, str_op
21+
22+
23+
class FormatOp(Enum):
24+
"""FormatOp represents conversion operations of string formatting during
25+
compile time.
26+
27+
Compare to ConversionSpecifier, FormatOp has fewer attributes.
28+
For example, to mark a conversion from any object to string,
29+
ConversionSpecifier may have several representations, like '%s', '{}'
30+
or '{:{}}'. However, there would only exist one corresponding FormatOp.
31+
"""
32+
STR = 's'
33+
INT = 'd'
34+
35+
36+
def generate_format_ops(specifiers: List[ConversionSpecifier]) -> Optional[List[FormatOp]]:
37+
"""Convert ConversionSpecifier to FormatOp.
38+
39+
Different ConversionSpecifiers may share a same FormatOp.
40+
"""
41+
format_ops = []
42+
for spec in specifiers:
43+
# TODO: Match specifiers instead of using whole_seq
44+
if spec.whole_seq == '%s' or spec.whole_seq == '{:{}}':
45+
format_op = FormatOp.STR
46+
elif spec.whole_seq:
47+
return None
48+
else:
49+
format_op = FormatOp.STR
50+
format_ops.append(format_op)
51+
return format_ops
1252

1353

1454
def tokenizer_printf_style(format_str: str) -> Tuple[List[str], List[ConversionSpecifier]]:
@@ -30,6 +70,69 @@ def tokenizer_printf_style(format_str: str) -> Tuple[List[str], List[ConversionS
3070
return literals, specifiers
3171

3272

73+
# The empty Context as an argument for parse_format_value().
74+
# It wouldn't be used since the code has passed the type-checking.
75+
EMPTY_CONTEXT: Final = Context()
76+
77+
78+
def tokenizer_format_call(
79+
format_str: str) -> Optional[Tuple[List[str], List[FormatOp]]]:
80+
"""Tokenize a str.format() format string.
81+
82+
The core function parse_format_value() is shared with mypy.
83+
With 67ED these specifiers, we then parse the literal substrings
84+
of the original format string and convert `ConversionSpecifier`
85+
to `FormatOp`.
86+
87+
Return:
88+
A list of string literals and a list of FormatOps. The literals
89+
are interleaved with FormatOps and the length of returned literals
90+
should be exactly one more than FormatOps.
91+
Return None if it cannot parse the string.
92+
"""
93+
# Creates an empty MessageBuilder here.
94+
# It wouldn't be used since the code has passed the type-checking.
95+
specifiers = parse_format_value(format_str, EMPTY_CONTEXT,
96+
MessageBuilder(Errors(), {}))
97+
if specifiers is None:
98+
return None
99+
format_ops = generate_format_ops(specifiers)
100+
if format_ops is None:
101+
return None
102+
103+
literals: List[str] = []
104+
last_end = 0
105+
for spec in specifiers:
106+
# Skip { and }
107+
literals.append(format_str[last_end:spec.start_pos - 1])
108+
last_end = spec.start_pos + len(spec.whole_seq) + 1
109+
literals.append(format_str[last_end:])
110+
# Deal with escaped {{
111+
literals = [x.replace('{{', '{').replace('}}', '}') for x in literals]
112+
113+
return literals, format_ops
114+
115+
116+
def convert_expr(builder: IRBuilder, format_ops: List[FormatOp],
117+
exprs: List[Expression], line: int) -> Optional[List[Value]]:
118+
"""Convert expressions into string literals with the guidance
119+
of FormatOps."""
120+
converted = []
121+
for x, format_op in zip(exprs, format_ops):
122+
node_type = builder.node_type(x)
123+
if format_op == FormatOp.STR:
124+
if is_str_rprimitive(node_type):
125+
var_str = builder.accept(x)
126+
elif is_int_rprimitive(node_type) or is_short_int_rprimitive(node_type):
127+
var_str = builder.call_c(int_to_str_op, [builder.accept(x)], line)
128+
else:
129+
var_str = builder.call_c(str_op, [builder.accept(x)], line)
130+
converted.append(var_str)
131+
else:
132+
return None
133+
return converted
134+
135+
33136
def join_formatted_strings(builder: IRBuilder, literals: List[str],
34137
substitutions: List[Value], line: int) -> Value:
35138
"""Merge the list of literals and the list of substitutions

mypyc/irbuild/specialize.py

Lines changed: 12 additions & 46 deletions
Original file line numberDiff line numberDiff line change
@@ -13,14 +13,10 @@
1313
"""
1414

1515
from typing import Callable, Optional, Dict, Tuple, List
16-
from typing_extensions import Final
1716

18-
from mypy.checkstrformat import parse_format_value
19-
from mypy.errors import Errors
20-
from mypy.messages import MessageBuilder
2117
from mypy.nodes import (
2218
CallExpr, RefExpr, MemberExpr, NameExpr, TupleExpr, GeneratorExpr,
23-
ListExpr, DictExpr, StrExpr, ARG_POS, Context
19+
ListExpr, DictExpr, StrExpr, ARG_POS
2420
)
2521
from mypy.types import AnyType, TypeOfAny
2622

@@ -29,11 +25,11 @@
2925
)
3026
from mypyc.ir.rtypes import (
3127
RType, RTuple, str_rprimitive, list_rprimitive, dict_rprimitive, set_rprimitive,
32-
bool_rprimitive, c_int_rprimitive, c_pyssize_t_rprimitive, is_dict_rprimitive,
33-
is_int_rprimitive, is_str_rprimitive, is_short_int_rprimitive
28+
bool_rprimitive, c_int_rprimitive, c_pyssize_t_rprimitive, is_dict_rprimitive
29+
)
30+
from mypyc.irbuild.format_str_tokenizer import (
31+
tokenizer_format_call, join_formatted_strings, convert_expr
3432
)
35-
from mypyc.irbuild.format_str_tokenizer import join_formatted_strings
36-
from mypyc.primitives.int_ops import int_to_str_op
3733
from mypyc.primitives.dict_ops import (
3834
dict_keys_op, dict_values_op, dict_items_op, dict_setdefault_spec_init_op
3935
)
@@ -392,51 +388,21 @@ def translate_dict_setdefault(
392388
return None
393389

394390

395-
# The empty Context as an argument for parse_format_value().
396-
# It wouldn't be used since the code has passed the type-checking.
397-
EMPTY_CONTEXT: Final = Context()
398-
399-
400391
@specialize_function('format', str_rprimitive)
401392
def translate_str_format(
402393
builder: IRBuilder, expr: CallExpr, callee: RefExpr) -> Optional[Value]:
403394
if (isinstance(callee, MemberExpr) and isinstance(callee.expr, StrExpr)
404395
and expr.arg_kinds.count(ARG_POS) == len(expr.arg_kinds)):
405396
format_str = callee.expr.value
406-
407-
# Creates an empty MessageBuilder here.
408-
# It wouldn't be used since the code has passed the type-checking.
409-
specifiers = parse_format_value(format_str, EMPTY_CONTEXT,
410-
MessageBuilder(Errors(), {}))
411-
if specifiers is None:
397+
tokens = tokenizer_format_call(format_str)
398+
if tokens is None:
412399
return None
413-
414-
literals = []
415-
last_pos = 0
416-
for spec in specifiers:
417-
# Only empty curly brace is allowed
418-
if spec.whole_seq:
419-
return None
420-
literals.append(format_str[last_pos:spec.start_pos-1])
421-
last_pos = spec.start_pos + len(spec.whole_seq) + 1
422-
literals.append(format_str[last_pos:])
423-
424-
# Deal with escaped {{
425-
literals = [x.replace('{{', '{').replace('}}', '}') for x in literals]
426-
400+
literals, format_ops = tokens
427401
# Convert variables to strings
428-
variables = []
429-
for x in expr.args:
430-
node_type = builder.node_type(x)
431-
if is_str_rprimitive(node_type):
432-
var_str = builder.accept(x)
433-
elif is_int_rprimitive(node_type) or is_short_int_rprimitive(node_type):
434-
var_str = builder.call_c(int_to_str_op, [builder.accept(x)], expr.line)
435-
else:
436-
var_str = builder.call_c(str_op, [builder.accept(x)], expr.line)
437-
variables.append(var_str)
438-
439-
return join_formatted_strings(builder, literals, variables, expr.line)
402+
substitutions = convert_expr(builder, format_ops, expr.args, expr.line)
403+
if substitutions is None:
404+
return None
405+
return join_formatted_strings(builder, literals, substitutions, expr.line)
440406
return None
441407

442408

mypyc/test-data/run-strings.test

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -149,7 +149,7 @@ def test_str_to_bool() -> None:
149149
assert is_true(x)
150150
assert not is_false(x)
151151

152-
[case testCStyleStringFormatting]
152+
[case testStringFormattingCStyle]
153153
[typing fixtures/typing-full.pyi]
154154
from typing import Tuple
155155

0 commit comments

Comments
 (0)
0