8000 Fix parsing of long format strings · lezer-parser/python@1c3aae2 · GitHub
[go: up one dir, main page]

Skip to content

Commit 1c3aae2

Browse files
committed
Fix parsing of long format strings
FIX: Fix a bug that caused triple-quoted format strings with quotes in them to be parsed incorrectly. Closes codemirror/dev#1147
1 parent 2cdf53b commit 1c3aae2

File tree

3 files changed

+62
-25
lines changed

3 files changed

+62
-25
lines changed

src/python.grammar

Lines changed: 11 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -252,20 +252,20 @@ skw<term> { @extend[@name={term}]<identifier, term> }
252252
}
253253

254254
FormatString {
255-
formatStringStart<"'"> (formatString1Content | FormatReplacement)* "'" |
256-
formatStringStart<'"'> (formatString2Content | FormatReplacement)* '"' |
257-
longFormatStringStart<"'"> (longFormatString1Content | FormatReplacement)* "'''" |
258-
longFormatStringStart<'"'> (longFormatString2Content | FormatReplacement)* '"""'
255+
formatStringStart<"'"> (formatString1Content | FormatReplacement<formatString1Brace>)* formatString1End |
256+
formatStringStart<'"'> (formatString2Content | FormatReplacement<formatString2Brace>)* formatString2End |
257+
longFormatStringStart<"'"> (formatString1lContent | FormatReplacement<formatString1lBrace>)* formatString1lEnd |
258+
longFormatStringStart<'"'> (formatString2lContent | FormatReplacement<formatString2lBrace>)* formatString2lEnd
259259
}
260260

261-
formatStringSpec { FormatSpec { ":" (formatStringSpecChars | FormatReplacement)* } "}" }
261+
formatStringSpec { FormatSpec { ":" (formatStringSpecChars | FormatReplacement<"{">)* } "}" }
262262

263263
blankLine {
264264
blankLineStart space? Comment? newline
265265
}
266266
}
267267

268-
FormatReplacement { "{" (YieldExpression | commaSep<"*"? test>) FormatConversion? (formatStringSpec | "}") }
268+
FormatReplacement<start> { start (YieldExpression | commaSep<"*"? test>) FormatConversion? (formatStringSpec | "}") }
269269

270270
@context trackIndent from "./tokens.js"
271271

@@ -275,6 +275,11 @@ FormatReplacement { "{" (YieldExpression | commaSep<"*"? test>) FormatConversion
275275

276276
@external tokens newlines from "./tokens" { newline, blankLineStart, newlineBracketed, eof }
277277

278+
@external tokens formatString1 from "./tokens" { formatString1Content, formatString1Brace[@name="{"], formatString1End }
279+
@external tokens formatString2 from "./tokens" { formatString2Content, formatString2Brace[@name="{"], formatString2End }
280+
@external tokens formatString1l from "./tokens" { formatString1lContent, formatString1lBrace[@name="{"], formatString1lEnd }
281+
@external tokens formatString2l from "./tokens" { formatString2lContent, formatString2lBrace[@name="{"], formatString2lEnd }
282+
278283
@tokens {
279284
CompareOp { "<" | ">" | $[<>=!] "=" | "<>" }
280285

@@ -304,9 +309,6 @@ FormatReplacement { "{" (YieldExpression | commaSep<"*"? test>) FormatConversion
304309

305310
formatStringSpecChars { ![{}]+ }
306311

307-
formatString1Content { (!['{\\] | "\\" _ | "{{")+ }
308-
formatString2Content { (!["{\\] | "\\" _ | "{{")+ }
309-
310312
longStringStart<quote> { stringPrefix? quote quote quote }
311313

312314
longString1Content { (!['\\] | "\\" _ | "'" longString1_2)+ }
@@ -319,16 +321,6 @@ FormatReplacement { "{" (YieldExpression | commaSep<"*"? test>) FormatConversion
319321

320322
longFormatStringStart<quote> { formatPrefix quote quote quote }
321323

322-
longFormatString1Content { (!['\\{] | "\\" _ | "'" longFormatString1_2 | "{{")+ }
323-
longFormatString1_2 { !['\\{] | "\\" _ | "{{" | "'" longFormatString1_3 }
324-
longFormatString1_3 { !['\\{] | "\\" _ | "{{" }
325-
326-
longFormatString2Content { (!["\\{] | "\\" _ | '"' longFormatString2_2 | "{{")+ }
327-
longFormatString2_2 { !["\\{] | "\\" _ | "{{" | '"' longFormatString2_3 }
328-
longFormatString2_3 { !["\\{] | "\\" _ | "{{" }
329-
330-
@precedence { "{", formatString1Content, formatString2Content, longFormatString1Content, longFormatString2Content }
331-
332324
Number {
333325
(@digit ("_" | @digit)* ("." @digit ("_" | @digit)*)? | "." @digit ("_" | @digit)*)
334326
($[eE] $[+\-]? @digit ("_" | @digit)*)? $[jJ]? |

src/tokens.js

Lines changed: 48 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -2,19 +2,24 @@ import {ExternalTokenizer, ContextTracker} from "@lezer/lr"
22
import {
33
newline as newlineToken, eof, newlineBracketed, blankLineStart, indent, dedent, printKeyword,
44
ParenthesizedExpression, TupleExpression, ComprehensionExpression,
5-
PatternArgList, SequencePattern, MappingPattern,
5+
PatternArgList, SequencePattern, MappingPattern, FormatString,
66
ArrayExpression, ArrayComprehensionExpression, ArgList, ParamList, importList, subscript,
7-
DictionaryExpression, DictionaryComprehensionExpression, SetExpression, SetComprehensionExpression, FormatReplacement,
7+
DictionaryExpression, DictionaryComprehensionExpression, SetExpression, SetComprehensionExpression,
8+
formatString1Content, formatString1Brace, formatString1End,
9+
formatString2Content, formatString2Brace, formatString2End,
10+
formatString1lContent, formatString1lBrace, formatString1lEnd,
11+
formatString2lContent, formatString2lBrace, formatString2lEnd,
812
ParenL, BraceL, BracketL
913
} from "./parser.terms.js"
1014

11-
const newline = 10, carriageReturn = 13, space = 32, tab = 9, hash = 35, parenOpen = 40, dot = 46
15+
const newline = 10, carriageReturn = 13, space = 32, tab = 9, hash = 35, parenOpen = 40, dot = 46,
16+
braceOpen = 123, singleQuote = 39, doubleQuote = 34
1217

1318
const bracketed = new Set([
1419
ParenthesizedExpression, TupleExpression, ComprehensionExpression, importList, ArgList, ParamList,
1520
ArrayExpression, ArrayComprehensionExpression, subscript,
16-
SetExpression, SetComprehensionExpression,
17-
DictionaryExpression, DictionaryComprehensionExpression, FormatReplacement,
21+
SetExpression, SetComprehensionExpression, FormatString,
22+
DictionaryExpression, DictionaryComprehensionExpression,
1823
SequencePattern, MappingPattern, PatternArgList
1924
])
2025

@@ -102,3 +107,41 @@ export const legacyPrint = new ExternalTokenizer(input => {
102107
return
103108
}
104109
})
110+
111+
function formatString(quote, len, content, brace, end) {
112+
return new ExternalTokenizer(input => {
113+
let start = input.pos
114+
for (;;) {
115+
if (input.next < 0) {
116+
break
117+
} else if (input.next == braceOpen) {
118+
if (input.peek(1) == braceOpen) {
119+
input.advance(2)
120+
} else {
121+
if (input.pos == start) {
122+
input.acceptToken(brace, 1)
123+
return
124+
}
125+
break
126+
}
127+
} else if (input.next == "\\") {
128+
input.advance()
129+
if (input.next >= 0) input.advance()
130+
} else if (input.next == quote && (len == 1 || input.peek(1) == quote && input.peek(2) == quote)) {
131+
if (input.pos == start) {
132+
input.acceptToken(end, len)
133+
return
134+
}
135+
break
136+
} else {
137+
input.advance()
138+
}
139+
}
140+
if (input.pos > start) input.acceptToken(content)
141+
})
142+
}
143+
144+
export const formatString1 = formatString(singleQuote, 1, formatString1Content, formatString1Brace, formatString1End)
145+
export const formatString2 = formatString(doubleQuote, 1, formatString2Content, formatString2Brace, formatString2End)
146+
export const formatString1l = formatString(singleQuote, 3, formatString1lContent, formatString1lBrace, formatString1lEnd)
147+
export const formatString2l = formatString(doubleQuote, 3, formatString2lContent, formatString2lBrace, formatString2lEnd)

test/expression.txt

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -50,14 +50,16 @@ f"double {quoted !s}"
5050
f"""big long format
5151
{string :foo}"""
5252
f'''well {{ \x }} {2 :{bar}}'''
53+
f"""one"{two}"three"""
5354

5455
==>
5556

5657
Script(ExpressionStatement(FormatString(FormatReplacement(Number),
5758
FormatReplacement(BinaryExpression(VariableName, ArithOp, Number)))),
5859
ExpressionStatement(FormatString(FormatReplacement(VariableName, FormatConversion))),
5960
ExpressionStatement(FormatString(FormatReplacement(VariableName, FormatSpec))),
60-
ExpressionStatement(FormatString(FormatReplacement(Number, FormatSpec(FormatReplacement(VariableName))))))
61+
ExpressionStatement(FormatString(FormatReplacement(Number, FormatSpec(FormatReplacement(VariableName))))),
62+
ExpressionStatement(FormatString(FormatReplacement(VariableName))))
6163

6264
# Lambda
6365

0 commit comments

Comments
 (0)
0