|
1 |
| -import {ExternalTokenizer} from "lezer" |
| 1 | +import {ExternalTokenizer, ContextTracker} from "lezer" |
2 | 2 | import {
|
3 |
| - newline as newlineToken, eof, newlineEmpty, newlineBracketed, continueBody, endBody, |
4 |
| - _else, _elif, _except, _finally, |
| 3 | + newline as newlineToken, eof, newlineEmpty, newlineBracketed, indent, dedent, printKeyword, |
5 | 4 | ParenthesizedExpression, TupleExpression, ComprehensionExpression, ArrayExpression, ArrayComprehensionExpression,
|
6 |
| - DictionaryExpression, DictionaryComprehensionExpression, SetExpression, SetComprehensionExpression, |
7 |
| - compoundStatement, |
8 |
| - printKeyword |
| 5 | + DictionaryExpression, DictionaryComprehensionExpression, SetExpression, SetComprehensionExpression |
9 | 6 | } from "./parser.terms.js"
|
10 | 7 |
|
11 | 8 | const newline = 10, carriageReturn = 13, space = 32, tab = 9, hash = 35, parenOpen = 40, dot = 46
|
12 | 9 |
|
13 | 10 | const bracketed = [
|
14 | 11 | ParenthesizedExpression, TupleExpression, ComprehensionExpression, ArrayExpression, Ar
F438
rayComprehensionExpression,
|
15 | 12 | DictionaryExpression, DictionaryComprehensionExpression, SetExpression, SetComprehensionExpression
|
16 |
| -], parentStatement = [compoundStatement] |
17 |
| - |
18 |
| -const caches = new WeakMap |
19 |
| - |
20 |
| -// Per-input-stream indentation cache. `prev` maps indentation depths |
21 |
| -// to the last position at which a statement indented to that depth |
22 |
| -// was seen. There's an extra set of slots for the _current_ |
23 |
| -// indentation, since that needs to be available alongside a previous |
24 |
| -// indentation position at the same level. |
25 |
| -class Cache { |
26 |
| - constructor() { |
27 |
| - this.last = this.lastIndent = -1 |
28 |
| - this.prev = [] |
29 |
| - } |
30 |
| - |
31 |
| - get(pos) { |
32 |
| - if (this.last == pos) return this.lastIndent |
33 |
| - for (let i = 0; i < this.prev.length; i++) if (this.prev[i] == pos) return i |
34 |
| - return -1 |
35 |
| - } |
36 |
| - |
37 |
| - set(pos, indent) { |
38 |
| - if (pos == this.last) return |
39 |
| - if (this.last > -1) this.setPrev(this.last, this.lastIndent) |
40 |
| - this.last = pos |
41 |
| - this.lastIndent = indent |
42 |
| - } |
43 |
| - |
44 |
| - setPrev(pos, indent) { |
45 |
| - while (this.prev.length < indent) this.prev.push(-1) |
46 |
| - this.prev[indent] = pos |
47 |
| - } |
48 |
| - |
49 |
| - static for(input) { |
50 |
| - let found = caches.get(input) |
51 |
| - if (!found) caches.set(input, found = new Cache) |
52 |
| - return found |
53 |
| - } |
54 |
| -} |
55 |
| - |
56 |
| -const maxIndent = 50 |
| 13 | +] |
57 | 14 |
|
| 15 | +let cachedIndent = 0, cachedInput = null, cachedPos = 0 |
58 | 16 | function getIndent(input, pos) {
|
59 |
| - let cache = Cache.for(input), found = cache.get(pos) |
60 |
| - if (found > -1) return found |
| 17 | + if (pos == cachedPos && input == cachedInput) return cachedIndent |
| 18 | + cachedInput = input; cachedPos = pos |
| 19 | + return cachedIndent = getIndentInner(input, pos) |
| 20 | +} |
61 | 21 |
|
62 |
| - // This shouldn't happen very often (or even at all) in normal |
63 |
| - // parsing, since the indentations are stored by the newline |
64 |
| - // tokenizer ahead of time. But it's kind of tricky to prove whether |
65 |
| - // that always happens in incremental parsing scenarios, so here's a |
66 |
| - // fallback anyway. |
67 |
| - let before = input.read(Math.max(0, pos - maxIndent), pos) |
68 |
| - let count = 0, start = before.length |
69 |
| - for (; start > 0; start--) { |
70 |
| - let next = before.charCodeAt(start - 1) |
71 |
| - if (next == newline || next == carriageReturn) break |
72 |
| - } |
73 |
| - for (let i = start; i < before.length; i++) { |
74 |
| - let ch = before.charCodeAt(i) |
75 |
| - if (ch == space) count++ |
76 |
| - else if (ch == tab) count += 8 - (count % 8) |
77 |
| - else break |
| 22 | +function getIndentInner(input, pos) { |
| 23 | + for (let indent = 0;; pos++) { |
| 24 | + let ch = input.get(pos) |
| 25 | + if (ch == space) indent++ |
| 26 | + else if (ch == tab) indent += 8 - (indent % 8) |
| 27 | + else if (ch == newline || ch == carriageReturn || ch == hash) return -1 |
| 28 | + else return indent |
78 | 29 | }
|
79 |
| - cache.setPrev(pos, count) |
80 |
| - return count |
81 | 30 | }
|
82 | 31 |
|
83 | 32 | export const newlines = new ExternalTokenizer((input, token, stack) => {
|
84 | 33 | let next = input.get(token.start)
|
85 | 34 | if (next < 0) {
|
86 | 35 | token.accept(eof, token.start)
|
87 |
| - return |
88 |
| - } |
89 |
| - if (next != newline && next != carriageReturn) return |
90 |
| - if (stack.startOf(bracketed) != null) { |
| 36 | + } else if (next != newline && next != carriageReturn) { |
| 37 | + } else if (stack.startOf(bracketed) != null) { |
91 | 38 | token.accept(newlineBracketed, token.start + 1)
|
92 |
| - return |
93 |
| - } |
94 |
| - let scan = token.start + 1, indent = 0 |
95 |
| - for (; scan < input.length; scan++) { |
96 |
| - let ch = input.get(scan) |
97 |
| - if (ch == space) indent++ |
98 |
| - else if (ch == tab) indent += 8 - (indent % 8) |
99 |
| - else if (ch == newline || ch == carriageReturn || ch == hash) { |
100 |
| - token.accept(newlineEmpty, token.start + 1) |
101 |
| - return |
102 |
| - } else { |
103 |
| - break |
104 |
| - } |
| 39 | + } else if (getIndent(input, token.start + 1) < 0) { |
| 40 | + token.accept(newlineEmpty, token.start + 1) |
| 41 | + } else { |
| 42 | + token.accept(newlineToken, token.start + 1) |
105 | 43 | }
|
106 |
| - token.accept(newlineToken, token.start + 1) |
107 |
| - Cache.for(input).set(scan, indent) |
108 | 44 | }, {contextual: true, fallback: true})
|
109 | 45 |
|
110 |
| -export const bodyContinue = new ExternalTokenizer((input, token, stack) => { |
111 |
| - let parent = stack.startOf(parentStatement) |
112 |
| - let parentIndent = parent == null ? 0 : getIndent(input, parent) |
113 |
| - let indentHere = getIndent(input, token.start) |
114 |
| - token.accept(indentHere <= parentIndent ? endBody : continueBody, token.start) |
115 |
| -}, {contextual: true, fallback: true}) |
| 46 | +export const indentation = new ExternalTokenizer((input, token, stack) => { |
| 47 | + let prev = input.get(token.start - 1), depth |
| 48 | + if ((prev == newline || prev == carriageReturn) && |
| 49 | + (depth = getIndent(input, token.start)) >= 0 && |
| 50 | + depth != stack.context.depth && |
| 51 | + stack.startOf(bracketed) == null) |
| 52 | + token.accept(depth < stack.context.depth ? dedent : indent, token.start) |
| 53 | +}) |
116 | 54 |
|
117 |
| -let keywords = {else: _else, elif: _elif, except: _except, finally: _finally} |
| 55 | +function IndentLevel(parent, depth) { |
| 56 | + this.parent = parent |
| 57 | + this.depth = depth |
| 58 | + this.hash = (parent ? parent.hash + parent.hash << 8 : 0) + depth + (depth << 4) |
| 59 | +} |
118 | 60 |
|
119 |
| -// Matches else/elif/except/finally, but only when at same indentation |
120 |
| -// as their parent statement |
121 |
| -export const statementContinueKeyword = new ExternalTokenizer((input, token, stack) => { |
122 |
| - let pos = token.start, next = input.get(token.start), m |
123 |
| - if (next == 101 /* 'e' */ && (m = /^(?:else|elif|except)\b/.exec(input.read(pos, pos + 7))) || |
124 |
| - next == 102 /* 'f' */ && (m = /^finally\b/.exec(input.read(pos, pos + 8)))) { |
125 |
| - let parent = stack.startOf(parentStatement) |
126 |
| - let parentIndent = parent == null ? 0 : getIndent(input, parent) |
127 |
| - if (getIndent(input, token.start) == parentIndent) token.accept(keywords[m[0]], pos + m[0].length) |
128 |
| - } |
129 |
| -}, {contextual: true, fallback: true}) |
| 61 | +const topIndent = new IndentLevel(null, 0) |
| 62 | + |
| 63 | +export const trackIndent = new ContextTracker({ |
| 64 | + start: topIndent, |
| 65 | + shift(context, term, input, stack) { |
| 66 | + return term == indent ? new IndentLevel(context, getIndent(input, stack.pos)) : |
| 67 | + term == dedent ? context.parent : context |
| 68 | + }, |
| 69 | + hash(context) { return context.hash } |
| 70 | +}) |
130 | 71 |
|
131 | 72 | export const legacyPrint = new ExternalTokenizer((input, token) => {
|
132 | 73 | let pos = token.start
|
|
0 commit comments