8000 Use a context value for more reliable incremental parsing · geddski/python@bbb9fd5 · GitHub
[go: up one dir, main page]

Skip to content

Commit bbb9fd5

Browse files
committed
Use a context value for more reliable incremental parsing
FIX: Fix a bug where incremental parses could get confused about block nesting. Issue codemirror/dev#394
1 parent f027de4 commit bbb9fd5

File tree

3 files changed

+56
-120
lines changed

3 files changed

+56
-120
lines changed

package.json

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -13,13 +13,13 @@
1313
"author": "Marijn Haverbeke <marijnh@gmail.com>",
1414
"license": "MIT",
1515
"devDependencies": {
16-
"lezer-generator": "^0.13.0",
16+
"lezer-generator": "^0.13.3",
1717
"mocha": "^8.1.3",
1818
"rollup": "^2.27.1",
1919
"@rollup/plugin-node-resolve": "^9.0.0"
2020
},
2121
"dependencies": {
22-
"lezer": "^0.13.0"
22+
"lezer": "^0.13.2"
2323
},
2424
"repository": {
2525
"type" : "git",

src/python.grammar

Lines changed: 9 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -67,7 +67,7 @@ importedNames { commaSep<VariableName | VariableName kw<"as"> VariableName> }
6767

6868
commaSep<expr> { expr ("," expr)* ","? }
6969

70-
compoundStatement[@export] {
70+
compoundStatement {
7171
IfStatement |
7272
WhileStatement { kw<"while"> testNamed Body elseClause? } |
7373
ForStatement { kw<"async">? kw<"for"> commaSep<"*"? expression> kw<"in"> commaSep<test> Body elseClause? } |
@@ -78,22 +78,22 @@ compoundStatement[@export] {
7878
DecoratedStatement { Decorator+ (ClassDefinition | FunctionDefinition) }
7979
}
8080

81-
elseClause { _else Body }
81+
elseClause { kw<"else"> Body }
8282

8383
IfStatement {
8484
kw<"if"> testNamed Body
85-
(_elif testNamed? Body)*
85+
(kw<"elif"> testNamed? Body)*
8686
elseClause?
8787
}
8888

8989
TryStatement {
9090
kw<"try"> Body
91-
(_except (test ((kw<"as"> | ",") VariableName)?)? Body)*
91+
(kw<"except"> (test ((kw<"as"> | ",") VariableName)?)? Body)*
9292
elseClause?
93-
(_finally Body)?
93+
(kw<"finally"> Body)?
9494
}
9595

96-
Body { ":" (simpleStatement | newline continueBody statement (continueBody statement)* (endBody | eof)) }
96+
Body { ":" (simpleStatement | newline indent statement+ (dedent | eof)) }
9797

9898
lambdaParam { VariableName (AssignOp{"="} test)? | "*" VariableName? | "**" VariableName }
9999

@@ -203,14 +203,11 @@ kw<term> { @specialize[@name={term}]<identifier, term> }
203203

204204
FormatReplacement { "{" (YieldExpression | commaSep<"*"? test>) FormatConversion? (formatStringSpec | "}") }
205205

206+
@context trackIndent from "./tokens.js"
207+
206208
@external tokens legacyPrint from "./tokens.js" { printKeyword[@name="print"] }
207209

208-
@external tokens statementContinueKeyword from "./tokens" {
209-
_else[@name="else"],
210-
_elif[@name="elif"],
211-
_except[@name="except"],
212-
_finally[@name="finally"]
213-
}
210+
@external tokens indentation from "./tokens" { indent, dedent }
214211

215212
@tokens {
216213
CompareOp { "<" | ">" | $[<>=!] "=" | "<>" }
@@ -289,5 +286,3 @@ FormatReplacement { "{" (YieldExpression | commaSep<"*"? test>) FormatConversion
289286
}
290287

291288
@external tokens newlines from "./tokens" { newline, newlineBracketed, newlineEmpty, eof }
292-
293-
@external tokens bodyContinue from "./tokens" { continueBody, endBody }

src/tokens.js

Lines changed: 45 additions & 104 deletions
Original file line numberDiff line numberDiff line change
@@ -1,132 +1,73 @@
1-
import {ExternalTokenizer} from "lezer"
1+
import {ExternalTokenizer, ContextTracker} from "lezer"
22
import {
3-
newline as newlineToken, eof, newlineEmpty, newlineBracketed, continueBody, endBody,
4-
_else, _elif, _except, _finally,
3+
newline as newlineToken, eof, newlineEmpty, newlineBracketed, indent, dedent, printKeyword,
54
ParenthesizedExpression, TupleExpression, ComprehensionExpression, ArrayExpression, ArrayComprehensionExpression,
6-
DictionaryExpression, DictionaryComprehensionExpression, SetExpression, SetComprehensionExpression,
7-
compoundStatement,
8-
printKeyword
5+
DictionaryExpression, DictionaryComprehensionExpression, SetExpression, SetComprehensionExpression
96
} from "./parser.terms.js"
107

118
const newline = 10, carriageReturn = 13, space = 32, tab = 9, hash = 35, parenOpen = 40, dot = 46
129

1310
const bracketed = [
1411
ParenthesizedExpression, TupleExpression, ComprehensionExpression, ArrayExpression, Ar F438 rayComprehensionExpression,
1512
DictionaryExpression, DictionaryComprehensionExpression, SetExpression, SetComprehensionExpression
16-
], parentStatement = [compoundStatement]
17-
18-
const caches = new WeakMap
19-
20-
// Per-input-stream indentation cache. `prev` maps indentation depths
21-
// to the last position at which a statement indented to that depth
22-
// was seen. There's an extra set of slots for the _current_
23-
// indentation, since that needs to be available alongside a previous
24-
// indentation position at the same level.
25-
class Cache {
26-
constructor() {
27-
this.last = this.lastIndent = -1
28-
this.prev = []
29-
}
30-
31-
get(pos) {
32-
if (this.last == pos) return this.lastIndent
33-
for (let i = 0; i < this.prev.length; i++) if (this.prev[i] == pos) return i
34-
return -1
35-
}
36-
37-
set(pos, indent) {
38-
if (pos == this.last) return
39-
if (this.last > -1) this.setPrev(this.last, this.lastIndent)
40-
this.last = pos
41-
this.lastIndent = indent
42-
}
43-
44-
setPrev(pos, indent) {
45-
while (this.prev.length < indent) this.prev.push(-1)
46-
this.prev[indent] = pos
47-
}
48-
49-
static for(input) {
50-
let found = caches.get(input)
51-
if (!found) caches.set(input, found = new Cache)
52-
return found
53-
}
54-
}
55-
56-
const maxIndent = 50
13+
]
5714

15+
let cachedIndent = 0, cachedInput = null, cachedPos = 0
5816
function getIndent(input, pos) {
59-
let cache = Cache.for(input), found = cache.get(pos)
60-
if (found > -1) return found
17+
if (pos == cachedPos && input == cachedInput) return cachedIndent
18+
cachedInput = input; cachedPos = pos
19+
return cachedIndent = getIndentInner(input, pos)
20+
}
6121

62-
// This shouldn't happen very often (or even at all) in normal
63-
// parsing, since the indentations are stored by the newline
64-
// tokenizer ahead of time. But it's kind of tricky to prove whether
65-
// that always happens in incremental parsing scenarios, so here's a
66-
// fallback anyway.
67-
let before = input.read(Math.max(0, pos - maxIndent), pos)
68-
let count = 0, start = before.length
69-
for (; start > 0; start--) {
70-
let next = before.charCodeAt(start - 1)
71-
if (next == newline || next == carriageReturn) break
72-
}
73-
for (let i = start; i < before.length; i++) {
74-
let ch = before.charCodeAt(i)
75-
if (ch == space) count++
76-
else if (ch == tab) count += 8 - (count % 8)
77-
else break
22+
function getIndentInner(input, pos) {
23+
for (let indent = 0;; pos++) {
24+
let ch = input.get(pos)
25+
if (ch == space) indent++
26+
else if (ch == tab) indent += 8 - (indent % 8)
27+
else if (ch == newline || ch == carriageReturn || ch == hash) return -1
28+
else return indent
7829
}
79-
cache.setPrev(pos, count)
80-
return count
8130
}
8231

8332
export const newlines = new ExternalTokenizer((input, token, stack) => {
8433
let next = input.get(token.start)
8534
if (next < 0) {
8635
token.accept(eof, token.start)
87-
return
88-
}
89-
if (next != newline && next != carriageReturn) return
90-
if (stack.startOf(bracketed) != null) {
36+
} else if (next != newline && next != carriageReturn) {
37+
} else if (stack.startOf(bracketed) != null) {
9138
token.accept(newlineBracketed, token.start + 1)
92-
return
93-
}
94-
let scan = token.start + 1, indent = 0
95-
for (; scan < input.length; scan++) {
96-
let ch = input.get(scan)
97-
if (ch == space) indent++
98-
else if (ch == tab) indent += 8 - (indent % 8)
99-
else if (ch == newline || ch == carriageReturn || ch == hash) {
100-
token.accept(newlineEmpty, token.start + 1)
101-
return
102-
} else {
103-
break
104-
}
39+
} else if (getIndent(input, token.start + 1) < 0) {
40+
token.accept(newlineEmpty, token.start + 1)
41+
} else {
42+
token.accept(newlineToken, token.start + 1)
10543
}
106-
token.accept(newlineToken, token.start + 1)
107-
Cache.for(input).set(scan, indent)
10844
}, {contextual: true, fallback: true})
10945

110-
export const bodyContinue = new ExternalTokenizer((input, token, stack) => {
111-
let parent = stack.startOf(parentStatement)
112-
let parentIndent = parent == null ? 0 : getIndent(input, parent)
113-
let indentHere = getIndent(input, token.start)
114-
token.accept(indentHere <= parentIndent ? endBody : continueBody, token.start)
115-
}, {contextual: true, fallback: true})
46+
export const indentation = new ExternalTokenizer((input, token, stack) => {
47+
let prev = input.get(token.start - 1), depth
48+
if ((prev == newline || prev == carriageReturn) &&
49+
(depth = getIndent(input, token.start)) >= 0 &&
50+
depth != stack.context.depth &&
51+
stack.startOf(bracketed) == null)
52+
token.accept(depth < stack.context.depth ? dedent : indent, token.start)
53+
})
11654

117-
let keywords = {else: _else, elif: _elif, except: _except, finally: _finally}
55+
function IndentLevel(parent, depth) {
56+
this.parent = parent
57+
this.depth = depth
58+
this.hash = (parent ? parent.hash + parent.hash << 8 : 0) + depth + (depth << 4)
59+
}
11860

119-
// Matches else/elif/except/finally, but only when at same indentation
120-
// as their parent statement
121-
export const statementContinueKeyword = new ExternalTokenizer((input, token, stack) => {
122-
let pos = token.start, next = input.get(token.start), m
123-
if (next == 101 /* 'e' */ && (m = /^(?:else|elif|except)\b/.exec(input.read(pos, pos + 7))) ||
124-
next == 102 /* 'f' */ && (m = /^finally\b/.exec(input.read(pos, pos + 8)))) {
125-
let parent = stack.startOf(parentStatement)
126-
let parentIndent = parent == null ? 0 : getIndent(input, parent)
127-
if (getIndent(input, token.start) == parentIndent) token.accept(keywords[m[0]], pos + m[0].length)
128-
}
129-
}, {contextual: true, fallback: true})
61+
const topIndent = new IndentLevel(null, 0)
62+
63+
export const trackIndent = new ContextTracker({
64+
start: topIndent,
65+
shift(context, term, input, stack) {
66+
return term == indent ? new IndentLevel(context, getIndent(input, stack.pos)) :
67+
term == dedent ? context.parent : context
68+
},
69+
hash(context) { return context.hash }
70+
})
13071

13172
export const legacyPrint = new ExternalTokenizer((input, token) => {
13273
let pos = token.start

0 commit comments

Comments
 (0)
0