@@ -212,7 +212,7 @@ class ObjectParser:
212
212
"--sections" ,
213
213
]
214
214
215
- def __init__ (self , path : pathlib .Path , reader : str , mc : str | None = None , symbol_prefix : str = "" ) -> None :
215
+ def __init__ (self , path : pathlib .Path , reader : str , dumper : str , symbol_prefix : str = "" ) -> None :
216
216
self .path = path
217
217
self .body = bytearray ()
218
218
self .body_symbols = {}
@@ -223,11 +223,19 @@ def __init__(self, path: pathlib.Path, reader: str, mc: str | None = None, symbo
223
223
self .relocations_todo = []
224
224
self .symbol_prefix = symbol_prefix
225
225
self .reader = reader
226
- self .mc = mc
226
+ self .dumper = dumper
227
227
self .data_size = 0
228
+ self .data = []
229
+ self .code_size = 0
228
230
229
231
async def parse (self ):
230
- # subprocess.run([find_llvm_tool("llvm-objdump")[0], self.path, "-dr"], check=True) # XXX
232
+ process = await asyncio .create_subprocess_exec (self .dumper , self .path , "--disassemble" , "--reloc" , stdout = subprocess .PIPE )
233
+ stdout , stderr = await process .communicate ()
234
+ assert stderr is None , stderr
235
+ if process .returncode :
236
+ raise RuntimeError (f"{ self .dumper } exited with { process .returncode } " )
237
+ disassembly = [line .lstrip ().expandtabs () for line in stdout .decode ().splitlines ()]
238
+ disassembly = [line for line in disassembly if re .match (r"[0-9a-f]+[: ]" , line )]
231
239
process = await asyncio .create_subprocess_exec (self .reader , * self ._ARGS , self .path , stdout = subprocess .PIPE )
232
240
stdout , stderr = await process .communicate ()
233
241
assert stderr is None , stderr
@@ -258,34 +266,17 @@ async def parse(self):
258
266
addend = newhole .addend + self .body_symbols [newhole .symbol ] - entry
259
267
newhole = Hole (newhole .kind , "_jit_base" , newhole .offset , addend )
260
268
holes .append (newhole )
261
- if self .mc is not None :
262
- process = await asyncio .create_subprocess_exec (self .mc , "--disassemble" , "--show-encoding" , stdin = subprocess .PIPE , stdout = subprocess .PIPE )
263
- stdout , stderr = await process .communicate (" " .join (f'0x{ byte :02x} ' for byte in self .body [:got - padding - self .data_size ]).encode ())
264
- if process .returncode :
265
- raise RuntimeError (f"{ self .mc } exited with { process .returncode } " )
266
- disassembly = [line .removeprefix ("\t " ).expandtabs () for line in stdout .decode ().splitlines ()]
267
- assert disassembly [0 ].startswith ("." )
268
- del disassembly [0 ]
269
- else :
270
- disassembly = [f"# <code>\t \t \t \t # encoding: [{ ',' .join (f'0x{ byte :02x} ' for byte in self .body [:got - padding - self .data_size ])} ]" .expandtabs ()]
271
- offset = 0
272
- size = 0
273
- comment = None
274
- for i , line in enumerate (disassembly ):
275
- if match := re .search (r"(#|;|//) encoding: \[((?:(0x[0-9a-f]{2}|A),?)+)\]" , line ):
276
- offset += size
277
- disassembly [i ] = f"{ offset :03x} : { line } "
278
- if match :
279
- comment = match .group (1 )
280
- size = len (match .group (2 ).split ("," ))
281
- offset += size
282
- assert self .data_size == got - padding - offset , (self .data_size , got , padding , offset )
269
+ offset = got - self .data_size - padding
270
+ comment = "#"
271
+ assert self .body [got - self .data_size - padding :got - padding ] == bytes (self .data ), breakpoint ()
272
+ assert self .data_size == got - padding - offset , breakpoint ()
283
273
if self .data_size :
284
- disassembly .append (f"{ offset :03x } : " + f"{ comment } <data> \t \t \t \t { comment } encoding: [ { ',' . join ( f'0x { byte :02x } ' for byte in self .body [offset :offset + self .data_size ])} ] " .expandtabs ())
285
- disassembly .append (f"{ offset :03x } : " + f"\t \t \t \t \t { comment } data = { str ( bytes ( self .body [offset :offset + self .data_size ])). removeprefix ( 'b' )} " .expandtabs ())
274
+ disassembly .append (f"{ offset :x } : " + f"{ comment } { str ( bytes ( self .body [offset :offset + self .data_size ])). removeprefix ( 'b' ) } " .expandtabs ())
275
+ disassembly .append (f"{ offset :x } : " + f"{ ' ' . join ( f' { byte :02x } ' for byte in self .body [offset :offset + self .data_size ])} " .expandtabs ())
286
276
offset += self .data_size
287
277
if padding :
288
- disassembly .append (f"{ offset :03x} : " + f"{ comment } <padding>\t \t \t \t { comment } encoding: [{ ',' .join (f'0x{ byte :02x} ' for byte in self .body [offset :offset + padding ])} ]" .expandtabs ())
278
+ disassembly .append (f"{ offset :x} : " + f"{ comment } <padding>" .expandtabs ())
279
+ disassembly .append (f"{ offset :x} : " + f"{ ' ' .join (padding * ['00' ])} " .expandtabs ())
289
280
offset += padding
290
281
for i , (got_symbol , addend ) in enumerate (self .got_entries ):
291
282
if got_symbol in self .body_symbols :
@@ -294,16 +285,17 @@ async def parse(self):
294
285
# XXX: PATCH_ABS_32 on 32-bit platforms?
295
286
holes .append (Hole ("PATCH_ABS_64" , got_symbol , got + 8 * i , addend ))
296
287
symbol_part = f"{ comment } &{ got_symbol } { f' + 0x{ addend :x} ' if addend else '' } "
297
- tabs = " \t " * ( 5 - len ( symbol_part ) // 8 )
298
- disassembly .append (f"{ offset :03x } : " + f"{ symbol_part } { tabs } { comment } encoding: [ { ',' .join (8 * ['0x00 ' ])} ] " .expandtabs ())
288
+ disassembly . append ( f" { offset :x } : " + f" { symbol_part } " . expandtabs () )
289
+ disassembly .append (f"{ offset :x } : " + f"{ ' ' .join (8 * ['00 ' ])} " .expandtabs ())
299
290
offset += 8
300
291
self .body .extend ([0 ] * 8 * len (self .got_entries ))
301
292
padding = 0
302
293
while len (self .body ) % 16 :
303
294
self .body .append (0 )
304
295
padding += 1
305
296
if padding :
306
- disassembly .append (f"{ offset :03x} : " + f"{ comment } <padding>\t \t \t \t { comment } encoding: [{ ',' .join (padding * ['0x00' ])} ]" .expandtabs ())
297
+ disassembly .append (f"{ offset :x} : " + f"{ comment } <padding>" .expandtabs ())
298
+ disassembly .append (f"{ offset :x} : " + f"{ ' ' .join (padding * ['00' ])} " .expandtabs ())
307
299
offset += padding
308
300
holes .sort (key = lambda hole : hole .offset )
309
301
assert offset == len (self .body ), (self .path , offset , len (self .body ))
@@ -728,21 +720,19 @@ def _handle_section(self, section: COFFSection) -> None:
728
720
flags = {flag ["Name" ] for flag in section ["Characteristics" ]["Flags" ]}
729
721
if "SectionData" not in section :
730
722
return
723
+ section_data = section ["SectionData" ]
731
724
if flags & {"IMAGE_SCN_LINK_COMDAT" , "IMAGE_SCN_MEM_EXECUTE" , "IMAGE_SCN_MEM_READ" , "IMAGE_SCN_MEM_WRITE" } == {"IMAGE_SCN_LINK_COMDAT" , "IMAGE_SCN_MEM_READ" }:
732
725
# XXX: Merge these
733
726
self .data_size += len (section_data ["Bytes" ])
734
727
before = self .body_offsets [section ["Number" ]] = len (self .body )
735
- section_data = section ["SectionData" ]
736
728
self .body .extend (section_data ["Bytes" ])
737
729
elif flags & {"IMAGE_SCN_MEM_EXECUTE" }:
738
730
assert not self .data_size , self .data_size
739
731
before = self .body_offsets [section ["Number" ]] = len (self .body )
740
- section_data = section ["SectionData" ]
741
732
self .body .extend (section_data ["Bytes" ])
742
733
elif flags & {"IMAGE_SCN_MEM_READ" }:
743
734
self .data_size += len (section_data ["Bytes" ])
744
735
before = self .body_offsets [section ["Number" ]] = len (self .body )
745
- section_data = section ["SectionData" ]
746
736
self .body .extend (section_data ["Bytes" ])
747
737
else :
748
738
return
@@ -765,13 +755,18 @@ def _handle_section(self, section: MachOSection) -> None:
765
755
flags = {flag ["Name" ] for flag in section ["Attributes" ]["Flags" ]}
766
756
if flags & {"SomeInstructions" }:
767
757
assert not self .data_size
758
+ assert not self .data
759
+ self .code_size += len (section_data ["Bytes" ]) + (section ["Address" ] - len (self .body ))
768
760
self .body .extend ([0 ] * (section ["Address" ] - len (self .body )))
769
761
before = self .body_offsets [section ["Index" ]] = section ["Address" ]
770
762
self .body .extend (section_data ["Bytes" ])
771
763
else :
772
- self .data_size += len (section_data ["Bytes" ]) + (section ["Address" ] - len (self .body ))
764
+ self .data_size += section ["Address" ] - len (self .body )
765
+ self .data .extend ([0 ] * (section ["Address" ] - len (self .body )))
773
766
self .body .extend ([0 ] * (section ["Address" ] - len (self .body )))
774
767
before = self .body_offsets [section ["Index" ]] = section ["Address" ]
768
+ self .data_size += len (section_data ["Bytes" ])
769
+ self .data .extend (section_data ["Bytes" ])
775
770
self .body .extend (section_data ["Bytes" ])
776
771
name = section ["Name" ]["Value" ]
777
772
# assert name.startswith("_") # XXX
@@ -902,13 +897,8 @@ def __init__(
902
897
self ._verbose = verbose
903
898
self ._clang , clang_version = find_llvm_tool ("clang" )
904
899
self ._readobj , readobj_version = find_llvm_tool ("llvm-readobj" )
905
- try :
906
- self ._mc , mc_version = find_llvm_tool ("llvm-mc" )
907
- except RuntimeError :
908
- self ._mc = None
909
- self ._stderr (f"Using { self ._clang } ({ clang_version } ) and { self ._readobj } ({ readobj_version } )." )
910
- else :
911
- self ._stderr (f"Using { self ._clang } ({ clang_version } ), { self ._readobj } ({ readobj_version } ), and { self ._mc } ({ mc_version } )." )
900
+ self ._objdump , objdump_version = find_llvm_tool ("llvm-objdump" )
901
+ self ._stderr (f"Using { self ._clang } ({ clang_version } ), { self ._readobj } ({ readobj_version } ), and { self ._objdump } ({ objdump_version } )." )
912
902
self ._semaphore = asyncio .BoundedSemaphore (jobs )
913
903
self ._ghccc = ghccc
914
904
@@ -945,7 +935,7 @@ async def _compile(self, opname, c) -> None:
945
935
assert stderr is None , stderr
946
936
if process .returncode :
947
937
raise RuntimeError (f"{ self ._clang } exited with { process .returncode } " )
948
- self ._stencils_built [opname ] = await ObjectParserDefault (o , self ._readobj , self ._mc ).parse ()
938
+ self ._stencils_built [opname ] = await ObjectParserDefault (o , self ._readobj , self ._objdump ).parse ()
949
939
950
940
async def build (self ) -> None :
951
941
generated_cases = PYTHON_EXECUTOR_CASES_C_H .read_text ()
0 commit comments