8000 Go back to llvm-objdump like the good 'ol days · python/cpython@be88965 · GitHub
[go: up one dir, main page]

Skip to content

Commit be88965

Browse files
committed
Go back to llvm-objdump like the good 'ol days
1 parent e2ae7bc commit be88965

File tree

1 file changed

+33
-43
lines changed

1 file changed

+33
-43
lines changed

Tools/jit/build.py

Lines changed: 33 additions & 43 deletions
291
Original file line numberDiff line numberDiff line change
@@ -212,7 +212,7 @@ class ObjectParser:
212212
"--sections",
213213
]
214214

215-
def __init__(self, path: pathlib.Path, reader: str, mc: str | None = None, symbol_prefix: str = "") -> None:
215+
def __init__(self, path: pathlib.Path, reader: str, dumper: str, symbol_prefix: str = "") -> None:
216216
self.path = path
217217
self.body = bytearray()
218218
self.body_symbols = {}
@@ -223,11 +223,19 @@ def __init__(self, path: pathlib.Path, reader: str, mc: str | None = None, symbo
223223
self.relocations_todo = []
224224
self.symbol_prefix = symbol_prefix
225225
self.reader = reader
226-
self.mc = mc
226+
self.dumper = dumper
227227
self.data_size = 0
228+
self.data = []
229+
self.code_size = 0
228230

229231
async def parse(self):
230-
# subprocess.run([find_llvm_tool("llvm-objdump")[0], self.path, "-dr"], check=True) # XXX
232+
process = await asyncio.create_subprocess_exec(self.dumper, self.path, "--disassemble", "--reloc", stdout=subprocess.PIPE)
233+
stdout, stderr = await process.communicate()
234+
assert stderr is None, stderr
235+
if process.returncode:
236+
raise RuntimeError(f"{self.dumper} exited with {process.returncode}")
237+
disassembly = [line.lstrip().expandtabs() for line in stdout.decode().splitlines()]
238+
disassembly = [line for line in disassembly if re.match(r"[0-9a-f]+[: ]", line)]
231239
process = await asyncio.create_subprocess_exec(self.reader, *self._ARGS, self.path, stdout=subprocess.PIPE)
232240
stdout, stderr = await process.communicate()
233241
assert stderr is None, stderr
@@ -258,34 +266,17 @@ async def parse(self):
258266
addend = newhole.addend + self.body_symbols[newhole.symbol] - entry
259267
newhole = Hole(newhole.kind, "_jit_base", newhole.offset, addend)
260268
holes.append(newhole)
261-
if self.mc is not None:
262-
process = await asyncio.create_subprocess_exec(self.mc, "--disassemble", "--show-encoding", stdin=subprocess.PIPE, stdout=subprocess.PIPE)
263-
stdout, stderr = await process.communicate(" ".join(f'0x{byte:02x}' for byte in self.body[:got - padding - self.data_size]).encode())
264-
if process.returncode:
265-
raise RuntimeError(f"{self.mc} exited with {process.returncode}")
266-
disassembly = [line.removeprefix("\t").expandtabs() for line in stdout.decode().splitlines()]
267-
assert disassembly[0].startswith(".")
268-
del disassembly[0]
269-
else:
270-
disassembly = [f"# <code>\t\t\t\t# encoding: [{','.join(f'0x{byte:02x}' for byte in self.body[:got - padding - self.data_size])}]".expandtabs()]
271-
offset = 0
272-
size = 0
273-
comment = None
274-
for i, line in enumerate(disassembly):
275-
if match := re.search(r"(#|;|//) encoding: \[((?:(0x[0-9a-f]{2}|A),?)+)\]", line):
276-
offset += size
277-
disassembly[i] = f"{offset:03x}: {line}"
278-
if match:
279-
comment = match.group(1)
280-
size = len(match.group(2).split(","))
281-
offset += size
282-
assert self.data_size == got - padding - offset, (self.data_size, got, padding, offset)
269+
offset = got-self.data_size-padding
270+
comment = "#"
271+
assert self.body[got-self.data_size-padding:got-padding] == bytes(self.data), breakpoint()
272+
assert self.data_size == got - padding - offset, breakpoint()
283273
if self.data_size:
284-
disassembly.append(f"{offset:03x}: " + f"{comment} <data>\t\t\t\t{comment} encoding: [{','.join(f'0x{byte:02x}' for byte in self.body[offset:offset + self.data_size])}]".expandtabs())
285-
disassembly.append(f"{offset:03x}: " + f"\t\t\t\t\t{comment} data = {str(bytes(self.body[offset:offset + self.data_size])).removeprefix('b')}".expandtabs())
274+
disassembly.append(f"{offset:x}: " + f"{comment} {str(bytes(self.body[offset:offset + self.data_size])).removeprefix('b')}".expandtabs())
275+
disassembly.append(f"{offset:x}: " + f"{' '.join(f'{byte:02x}' for byte in self.body[offset:offset + self.data_size])}".expandtabs())
286276
offset += self.data_size
287277
if padding:
288-
disassembly.append(f"{offset:03x}: " + f"{comment} <padding>\t\t\t\t{comment} encoding: [{','.join(f'0x{byte:02x}' for byte in self.body[offset:offset + padding])}]".expandtabs())
278+
disassembly.append(f"{offset:x}: " + f"{comment} <padding>".expandtabs())
279+
disassembly.append(f"{offset:x}: " + f"{' '.join(padding * ['00'])}".expandtabs())
289280
offset += padding
290281
for i, (got_symbol, addend) in enumerate(self.got_entries):
282
if got_symbol in self.body_symbols:
@@ -294,16 +285,17 @@ async def parse(self):
294285
# XXX: PATCH_ABS_32 on 32-bit platforms?
295286
holes.append(Hole("PATCH_ABS_64", got_symbol, got + 8 * i, addend))
296287
symbol_part = f"{comment} &{got_symbol}{f' + 0x{addend:x}' if addend else ''}"
297-
tabs = "\t" * (5 - len(symbol_part) // 8)
298-
disassembly.append(f"{offset:03x}: " + f"{symbol_part}{tabs}{comment} encoding: [{','.join(8 * ['0x00'])}]".expandtabs())
288+
disassembly.append(f"{offset:x}: " + f"{symbol_part}".expandtabs())
289+
disassembly.append(f"{offset:x}: " + f"{' '.join(8 * ['00'])}".expandtabs())
299290
offset += 8
300291
self.body.extend([0] * 8 * len(self.got_entries))
301292
padding = 0
302293
while len(self.body) % 16:
303294
self.body.append(0)
304295
padding += 1
305296
if padding:
306-
disassembly.append(f"{offset:03x}: " + f"{comment} <padding>\t\t\t\t{comment} encoding: [{','.join(padding * ['0x00'])}]".expandtabs())
297+
disassembly.append(f"{offset:x}: " + f"{comment} <padding>".expandtabs())
298+
disassembly.append(f"{offset:x}: " + f"{' '.join(padding * ['00'])}".expandtabs())
307299
offset += padding
308300
holes.sort(key=lambda hole: hole.offset)
309301
assert offset == len(self.body), (self.path, offset, len(self.body))
@@ -728,21 +720,19 @@ def _handle_section(self, section: COFFSection) -> None:
728720
flags = {flag["Name"] for flag in section["Characteristics"]["Flags"]}
729721
if "SectionData" not in section:
730722
return
723+
section_data = section["SectionData"]
731724
if flags & {"IMAGE_SCN_LINK_COMDAT", "IMAGE_SCN_MEM_EXECUTE", "IMAGE_SCN_MEM_READ", "IMAGE_SCN_MEM_WRITE"} == {"IMAGE_SCN_LINK_COMDAT", "IMAGE_SCN_MEM_READ"}:
732725
# XXX: Merge these
733726
self.data_size += len(section_data["Bytes"])
734727
before = self.body_offsets[section["Number"]] = len(self.body)
735-
section_data = section["SectionData"]
736728
self.body.extend(section_data["Bytes"])
737729
elif flags & {"IMAGE_SCN_MEM_EXECUTE"}:
738730
assert not self.data_size, self.data_size
739731
before = self.body_offsets[section["Number"]] = len(self.body)
740-
section_data = section["SectionData"]
741732
self.body.extend(section_data["Bytes"])
742733
elif flags & {"IMAGE_SCN_MEM_READ"}:
743734
self.data_size += len(section_data["Bytes"])
744735
before = self.body_offsets[section["Number"]] = len(self.body)
745-
section_data = section["SectionData"]
746736
self.body.extend(section_data["Bytes"])
747737
else:
748738
return
@@ -765,13 +755,18 @@ def _handle_section(self, section: MachOSection) -> None:
765755
flags = {flag["Name"] for flag in section["Attributes"]["Flags"]}
766756
if flags & {"SomeInstructions"}:
767757
assert not self.data_size
758+
assert not self.data
759+
self.code_size += len(section_data["Bytes"]) + (section["Address"] - len(self.body))
768760
self.body.extend([0] * (section["Address"] - len(self.body)))
769761
before = self.body_offsets[section["Index"]] = section["Address"]
770762
self.body.extend(section_data["Bytes"])
771763
else:
772-
self.data_size += len(section_data["Bytes"]) + (section["Address"] - len(self.body))
764+
self.data_size += section["Address"] - len(self.body)
765+
self.data.extend([0] * (section["Address"] - len(self.body)))
773766
self.body.extend([0] * (section["Address"] - len(self.body)))
774767
before = self.body_offsets[section["Index"]] = section["Address"]
768+
self.data_size += len(section_data["Bytes"])
769+
self.data.extend(section_data["Bytes"])
775770
self.body.extend(section_data["Bytes"])
776771
name = section["Name"]["Value"]
777772
# assert name.startswith("_") # XXX
@@ -902,13 +897,8 @@ def __init__(
902897
self._verbose = verbose
903898
self._clang, clang_version = find_llvm_tool("clang")
904899
self._readobj, readobj_version = find_llvm_tool("llvm-readobj")
905-
try:
906-
self._mc, mc_version = find_llvm_tool("llvm-mc")
907-
except RuntimeError:
908-
self._mc = None
909-
self._stderr(f"Using {self._clang} ({clang_version}) and {self._readobj} ({readobj_version}).")
910-
else:
911-
self._stderr(f"Using {self._clang} ({clang_version}), {self._readobj} ({readobj_version}), and {self._mc} ({mc_version}).")
900+
self._objdump, objdump_version = find_llvm_tool("llvm-objdump")
901+
self._stderr(f"Using {self._clang} ({clang_version}), {self._readobj} ({readobj_version}), and {self._objdump} ({objdump_version}).")
912902
self._semaphore = asyncio.BoundedSemaphore(jobs)
913903
self._ghccc = ghccc
914904

@@ -945,7 +935,7 @@ async def _compile(self, opname, c) -> None:
945935
assert stderr is None, stderr
946936
if process.returncode:
947937
raise RuntimeError(f"{self._clang} exited with {process.returncode}")
948-
self._stencils_built[opname] = await ObjectParserDefault(o, self._readobj, self._mc).parse()
938+
self._stencils_built[opname] = await ObjectParserDefault(o, self._readobj, self._objdump).parse()
949939

950940
async def build(self) -> None:
951941
generated_cases = PYTHON_EXECUTOR_CASES_C_H.read_text()

0 commit comments

Comments
 (0)
0