8000 add lots of python code · dragoncoder047/pickle@40eb6d2 · GitHub
[go: up one dir, main page]

Skip to content

Commit 40eb6d2

Browse files
add lots of python code
1 parent 6462ea2 commit 40eb6d2

File tree

6 files changed

+476
-0
lines changed

6 files changed

+476
-0
lines changed

python/.gitignore

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
**/__pycache__/**

python/pickle/__init__.py

Whitespace-only changes.

python/pickle/errors.py

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,10 @@
1+
class PickleError(Exception):
2+
"""Something went wrong in a Pickle program."""
3+
4+
5+
class ParseError(PickleError):
6+
"""The Pickle parser encountered something it doesn't know how to parse."""
7+
8+
9+
class ParseFail(Exception):
10+
"""Signal used to indicate that parsing failed."""

python/pickle/object.py

Lines changed: 58 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,58 @@
1+
import functools
2+
from typing import Callable, Any
3+
from dataclasses import dataclass
4+
5+
6+
@dataclass
7+
class Symbol:
8+
"""String-like thing that represents a keyword or atom."""
9+
name: str
10+
11+
12+
@dataclass
13+
@functools.total_ordering
14+
class Pattern:
15+
"""The core object that manages pattern-matching."""
16+
precedence: int
17+
pattern: list
18+
handler: Callable[[dict[Symbol, Any]], Any]
19+
right: bool = False
20+
macro: bool = False
21+
greedy: bool = True
22+
23+
def __lt__(self, other):
24+
if isinstance(other, Pattern):
25+
return other.precedence < self.precedence
26+
return NotImplemented
27+
28+
def __eq__(self, other):
29+
if isinstance(other, Pattern):
30+
return other.precedence == self.precedence
31+
return NotImplemented
32+
33+
34+
@dataclass
35+
class Var:
36+
"""Something that isn't a literal in a pattern, that binds to a variable"""
37+
var: Symbol
38+
cls: type
39+
use_cls: bool = True
40+
41+
42+
@dataclass
43+
class Space:
44+
"""Dummy value used to represent whitespace in a a pattern."""
45+
comment: str = ""
46+
47+
48+
@dataclass
49+
class Optional:
50+
"""Value used in patterns to indicate the element is optional."""
51+
what: Any
52+
greedy: bool = True
53+
54+
55+
@dataclass
56+
class Alternate:
57+
"""Value used in patterns to indicate multiple options."""
58+
options: list[Any]

python/pickle/parse.py

Lines changed: 322 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,322 @@
1+
import re
2+
import enum
3+
from typing import NoReturn, Any
4+
from .errors import ParseFail
5+
6+
UNESCAPE_MAP = {
7+
"b": "\b",
8+
"t": "\t",
9+
"n": "\n",
10+
"v": "\v",
11+
"f": "\f",
12+
"r": "\r",
13+
"a": "\a",
14+
"o": "{",
15+
"c": "}",
16+
"e": "\x1b",
17+
"\n": ""
18+
}
19+
20+
ESCAPE_MAP = {
21+
"\\": "\\",
22+
"\b": "b",
23+
"\t": "t",
24+
"\n": "n",
25+
"\v": "v",
26+
"\f": "f",
27+
"\r": "r",
28+
"\a": "a",
29+
"{": "o",
30+
"}": "c",
31+
"\x1b": "e"
32+
}
33+
34+
35+
def unescape(ch: str) -> str:
36+
return UNESCAPE_MAP.get(ch, ch)
37+
38+
39+
def escape(ch: str) -> str:
40+
if ch in ESCAPE_MAP:
41+
return "\\" + ESCAPE_MAP[ch]
42+
return ch
43+
44+
45+
class SourceLocation:
46+
"""An object that holds the position in the file that generated the token."""
47+
48+
def __init__(self, filename: str, line: int, column: int):
49+
self.line = line
50+
self.column = column
51+
self.filename = filename
52+
53+
def as_tb_line(self):
54+
# Python style
55+
# return f"file {self.filename}, near line {self.line}"
56+
# C style
57+
return f"{self.filename}:{self.line}:{self.column}"
58+
59+
def __iadd__(self, other: "SourceLocation"):
60+
if not isinstance(other, SourceLocation):
61+
return NotImplemented
62+
self.line += other.line
63+
self.column += other.column
64+
return self
65+
66+
67+
class TokenKind(enum.Enum):
68+
ERR = 0
69+
STR = 1
70+
PAR = 2
71+
EOL = 3
72+
SYM = 4
73+
SPC = 5
74+
75+
76+
class Token:
77+
"""A parse token generated when parsing PICKLE code."""
78+
79+
def __init__(
80+
self,
81+
kind: TokenKind,
82+
content: str,
83+
start: SourceLocation,
84+
end: SourceLocation,
85+
message: str = None):
86+
self.kind = kind
87+
self.content = content
88+
self.message = message
89+
self.start = start
90+
self.end = end
91+
92+
def __repr__(self) -> str:
93+
msgpart = " (" + self.message + ")" if self.message else ""
94+
return f"<{self.kind.name} at {self.start.as_tb_line()}\t{self.content!r}{msgpart}>"
95+
96+
97+
class Tokenizer:
98+
"""The thing that does all the work when tokenizing Pickle code."""
99+
100+
def __init__(self, string="", filename="", offset=SourceLocation(None, 0, 0)):
101+
self.offset = offset
102+
self.bi = 0
103+
self.i = 0
104+
self.last_token: Token = None
105+
self.string = string
106+
self.filename = filename
107+
self.position_stack: list[int] = []
108+
109+
def at(self, i=0):
110+
"""Returns the character the tokenizer is sitting on, with an optional offset."""
111+
return self.string[self.i+i]
112+
113+
def string_at(self, i=0):
114+
"""Returns the string remaining to parse, with an optional offset."""
115+
return self.string[self.i+i:]
116+
117+
def __enter__(self):
118+
self.position_stack.append(self.i)
119+
120+
def __exit__(self, type_, value, traceback):
121+
i = self.position_stack.pop()
122+
if isinstance(value, ParseFail):
123+
self.i = i
124+
return True
125+
126+
def __bool__(self):
127+
return self.i < len(self.string)
128+
129+
def chomp(self, what: str | re.Pattern) -> str | re.Match | None:
130+
"""Chomps the prefix off, advances the stream, and returns what was chomped.
131+
If nothing was chomped, return None."""
132+
if isinstance(what, str):
133+
if self.string_at().startswith(what):
134+
self.i += len(what)
135+
return what
136+
elif isinstance(what, re.Pattern):
137+
if match := what.match(self.string, self.i):
138+
self.i += match.end() - match.start()
139+
return match
140+
return None
141+
142+
def chomp_re(self, what: str) -> re.Match | None:
143+
return self.chomp(re.compile(what))
144+
145+
def chomp_or_fail(self, what: str | re.Pattern) -> str | re.Match | NoReturn:
146+
"""Try chomp(), and if it returns None, raise ParseFail."""
147+
result = self.chomp(what)
148+
if result is None:
149+
raise ParseFail
150+
return result
151+
152+
def chomp_re_or_fail(self, what: str) -> re.Match | NoReturn:
153+
return self.chomp_or_fail(re.compile(what))
154+
155+
def try_funs_or_fail(self, *functions) -> Any | NoReturn:
156+
for fun in functions:
157+
with self:
158+
if (result := fun()) is not None:
159+
return result
160+
raise ParseFail
161+
162+
def consume_greedy(self, function):
163+
while True:
164+
try:
165+
function()
166+
except ParseFail:
167+
return
168+
169+
def error(self, offending="", message="") -> Token:
170+
"""Creates an error token when parsing fails."""
171+
if self.i == self.bi:
172+
self.i += 1
173+
return self.make_token(
174+
TokenKind.ERR,
175+
offending or self.string[self.bi:self.i],
176+
message or f"unexpected {self.at(-1)}")
177+
178+
def make_token(self, kind: TokenKind, content: str, message=""):
179+
"""Creates a token and initializes the line and column numbers of the start and end."""
180+
here = SourceLocation(self.filename, 1, 1)
181+
start: SourceLocation = None
182+
for i, ch in enumerate(self.string[:self.i]):
183+
if i == self.bi:
184+
start = SourceLocation(self.filename, here.line, here.column)
185+
if ch == "\n":
186+
here.line += 1
187+
here.column = 0
188+
else:
189+
here.column += 1
190+
here += self.offset
191+
start += self.offset
192+
return Token(kind, content, start, here, message)
193+
194+
def __try_whitespace(self) -> Token | NoReturn:
195+
match = (
196+
# block comment
197+
self.chomp_re(
198+
r"(?<!#)(###)(\S*?)(#+)(?!#)[\s\S\n\r]*?(?<!#)\3\2\1(?!#)")
199+
# line comment
200+
or self.chomp_re(r"##[^\n]*")
201+
# general whitespace
202+
or self.chomp_re_or_fail(r"((?!\n)\s)+"))
203+
return self.make_token(TokenKind.SPC, match.group())
204+
205+
def __try_colon_block(self) -> Token | NoReturn:
206+
self.chomp_or_fail(":")
207+
self.consume_greedy(self.__try_whitespace)
208+
self.chomp_or_fail("\n")
209+
indent = self.chomp_re(r"((?!\n)\s)+")
210+
if indent is None:
211+
return self.error(message="expected indent after colon and EOL")
212+
indent = indent.group()
213+
if re.fullmatch(r"(\s)\1*", indent) is None:
214+
return self.error(indent, "mix of tabs and spaces indenting block")
215+
lines = []
216+
while True:
217+
if line := self.chomp_re(r"[^\r\n]*"):
218+
lines.append(line.group(0))
219+
else:
220+
lines.append("")
221+
if self.chomp_re(r"\r|\n|\r\n") is None:
222+
break
223+
if not self.chomp(indent):
224+
if bad_indent := self.chomp_re(r"(((?!\n)\s)*)(?=\S)"):
225+
bad_indent = bad_indent.group()
226+
if len(bad_indent) > 0:
227+
return self.error(bad_indent, "unexpected unindent")
228+
else:
229+
break
230+
return self.make_token(TokenKind.STR, "\n".join(lines))
231+
232+
def __try_symbol_paren_eol(self) -> Token | NoReturn:
233+
if match := self.chomp_re(r"""(?![\(\{\[\]\}\);"'])\S"""):
234+
return self.make_token(TokenKind.SYM, match.group())
235+
if match := self.chomp_re(r"[\(\[\]\)]"):
236+
return self.make_token(TokenKind.PAR, match.group())
237+
match = self.chomp_re_or_fail(r"[;\s]+")
238+
return self.make_token(TokenKind.EOL, None)
239+
240+
def __try_brace_string(self) -> Token | NoReturn:
241+
self.chomp_or_fail("{")
242+
depth = 1
243+
out = ""
244+
while self:
245+
char = self.at()
246+
self.i += 1
247+
if char == "{":
248+
depth += 1
249+
if char == "}":
250+
depth -= 1
251+
if depth == 0:
252+
return self.make_token(TokenKind.STR, out)
253+
out += char
254+
return self.error(out, "unexpected EOF inside {")
255+
256+
def __try_quote_string(self) -> Token | NoReturn:
257+
quote = self.chomp_re_or_fail(r"""["']""").group()
258+
out = ""
259+
while self:
260+
char = self.at()
261+
self.i += 1
262+
if char in "\r\n":
263+
return self.error(message="unexpected newline in string"
264+
" (use \\ to escape newlines)")
265+
if char == "\\":
266+
char = unescape(self.at())
267+
self.i += 1
268+
elif char == quote:
269+
return self.make_token(TokenKind.STR, out)
270+
out += char
271+
return self.error(out, "unexpected EOF in string")
272+
273+
def next_token(self) -> Token | None:
274+
"""Return the next token in the stream, or None if the stream is exhausted."""
275+
if not self:
276+
return None
277+
self.bi = self.i
278+
try:
279+
return self.try_funs_or_fail(
280+
self.__try_whitespace,
281+
self.__try_colon_block,
282+
self.__try_symbol_paren_eol,
283+
self.__try_brace_string,
284+
self.__try_quote_string)
285+
except ParseFail:
286+
return self.error()
287+
288+
289+
class Parser:
290+
"""The second pass in PICKLE parsing, uses the token stream to """
291+
292+
293+
if __name__ == "__main__":
294+
x = Tokenizer(
295+
r"""
296+
"Hello I am a string";;;;;;;;;;;;;
297+
{Hello I am a {nested
298+
{extremely
299+
{deeply}}} string
300+
with newlines}
301+
### I am a line comment ###
302+
' this should be a string not a comment '
303+
###11### This is
304+
"a block comment"
305+
###22### nested comment ###22###
306+
end of comment ###11###
307+
pattern [x is Number]+[y is Number]j:
308+
Complex.new $x $y
309+
"I am a string with \n embedded \e \a \o\o\o\c\c\c escapes"
310+
"I\
311+
have\
312+
embedded\
313+
newlines\
314+
escaped"
315+
"i am an unclosed string
316+
bwni:
317+
pass
318+
}}}}}}}}}}} ## errors
319+
""",
320+
filename="test")
321+
while x:
322+
print(x.next_token())

0 commit comments

Comments
 (0)
0