18
18
19
19
#include "jit_stencils.h"
20
20
21
- // Boring memory management stuff //////////////////////////////////////////////
21
+ // Memory management stuff: ////// //////////////////////////////////////////////
22
22
23
23
#ifndef MS_WINDOWS
24
24
#include <sys/mman.h>
@@ -130,28 +130,29 @@ mark_readable(char *memory, size_t size)
130
130
return 0 ;
131
131
}
132
132
133
- // Cool JIT compiler stuff /////////////////////////////////////////////////////
133
+ // JIT compiler stuff: //// /////////////////////////////////////////////////////
134
134
135
135
// Warning! AArch64 requires you to get your hands dirty. These are your gloves:
136
136
137
137
// value[i : i + n]
138
138
static uint32_t
139
- bits (uint64_t value , uint8_t i , uint8_t n )
139
+ get_bits (uint64_t value , uint8_t i , uint8_t n )
140
140
{
141
141
assert (n <= 32 );
142
142
return (value >> i ) & ((1ULL << n ) - 1 );
143
143
}
144
144
145
- // *loc[j : j + n] = value[i : i + n]
145
+ // *loc[i : i + n] = value[j : j + n]
146
146
static void
147
- patch_bits (uint32_t * loc , uint64_t value , uint8_t i , uint8_t n , uint8_t j )
147
+ set_bits (uint32_t * loc , uint8_t i , uint8_t n , uint64_t value , uint8_t j )
148
148
{
149
- assert (j + n <= 32 );
149
+ assert (i + n <= 32 );
150
150
// Clear the bits we're about to patch:
151
- * loc &= ~(((1ULL << n ) - 1 ) << j );
152
- assert (bits (* loc , j , n ) == 0 );
151
+ * loc &= ~(((1ULL << n ) - 1 ) << i );
152
+ assert (get_bits (* loc , i , n ) == 0 );
153
153
// Patch the bits:
154
- * loc |= bits (value , i , n ) << j ;
154
+ * loc |= get_bits (value , j , n ) << i ;
155
+ assert (get_bits (* loc , i , n ) == get_bits (value , j , n ));
155
156
}
156
157
157
158
// See https://developer.arm.com/documentation/ddi0602/2023-09/Base-Instructions
@@ -165,12 +166,19 @@ patch_bits(uint32_t *loc, uint64_t value, uint8_t i, uint8_t n, uint8_t j)
165
166
// LLD is an awesome reference for how to perform relocations... just keep in
166
167
// mind that Tools/jit/build.py does some filtering and preprocessing for us!
167
168
// Here's a good place to start for each platform:
168
- // - aarch64-apple-darwin: https://github.com/llvm/llvm-project/blob/main/lld/MachO/Arch/ARM64Common.cpp
169
- // - aarch64-unknown-linux-gnu: https://github.com/llvm/llvm-project/blob/main/lld/ELF/Arch/AArch64.cpp
170
- // - i686-pc-windows-msvc: https://github.com/llvm/llvm-project/blob/main/lld/COFF/Chunks.cpp
171
- // - x86_64-apple-darwin: https://github.com/llvm/llvm-project/blob/main/lld/MachO/Arch/X86_64.cpp
172
- // - x86_64-pc-windows-msvc: https://github.com/llvm/llvm-project/blob/main/lld/COFF/Chunks.cpp
173
- // - x86_64-unknown-linux-gnu: https://github.com/llvm/llvm-project/blob/main/lld/ELF/Arch/AArch64.cpp
169
+ // - aarch64-apple-darwin:
170
+ // - https://github.com/llvm/llvm-project/blob/main/lld/MachO/Arch/ARM64Common.cpp
171
+ // - https://github.com/llvm/llvm-project/blob/main/lld/MachO/Arch/ARM64Common.h
172
+ // - aarch64-unknown-linux-gnu:
173
+ // - https://github.com/llvm/llvm-project/blob/main/lld/ELF/Arch/AArch64.cpp
174
+ // - i686-pc-windows-msvc:
175
+ // - https://github.com/llvm/llvm-project/blob/main/lld/COFF/Chunks.cpp
176
+ // - x86_64-apple-darwin:
177
+ // - https://github.com/llvm/llvm-project/blob/main/lld/MachO/Arch/X86_64.cpp
178
+ // - x86_64-pc-windows-msvc:
179
+ // - https://github.com/llvm/llvm-project/blob/main/lld/COFF/Chunks.cpp
180
+ // - x86_64-unknown-linux-gnu:
181
+ // - https://github.com/llvm/llvm-project/blob/main/lld/ELF/Arch/X86_64.cpp
174
182
static void
175
183
patch (char * base , const Hole * hole , uint64_t * patches )
176
184
{
@@ -202,50 +210,51 @@ patch(char *base, const Hole *hole, uint64_t *patches)
202
210
assert ((int64_t )value >= - (1 << 27 ));
203
211
assert ((int64_t )value < (1 << 27 ));
204
212
// Since instructions are 4-byte aligned, only use 26 bits:
205
- assert (bits (value , 0 , 2 ) == 0 );
206
- patch_bits (loc32 , value , 2 , 26 , 0 );
213
+ assert (get_bits (value , 0 , 2 ) == 0 );
214
+ set_bits (loc32 , 0 , 26 , value , 2 );
207
215
return ;
208
216
case HoleKind_R_AARCH64_MOVW_UABS_G0_NC :
209
217
// 16-bit low part of an absolute address.
210
218
assert (IS_AARCH64_MOV (* loc32 ));
211
219
// Check the implicit shift (this is "part 0 of 3"):
212
- assert (bits (* loc32 , 21 , 2 ) == 0 );
213
- patch_bits (loc32 , value , 0 , 16 , 5 );
220
+ assert (get_bits (* loc32 , 21 , 2 ) == 0 );
221
+ set_bits (loc32 , 5 , 16 , value , 0 );
214
222
return ;
215
223
case HoleKind_R_AARCH64_MOVW_UABS_G1_NC :
216
224
// 16-bit middle-low part of an absolute address.
217
225
assert (IS_AARCH64_MOV (* loc32 ));
218
226
// Check the implicit shift (this is "part 1 of 3"):
219
- assert (bits (* loc32 , 21 , 2 ) == 1 );
220
- patch_bits (loc32 , value , 16 , 16 , 5 );
227
+ assert (get_bits (* loc32 , 21 , 2 ) == 1 );
228
+ set_bits (loc32 , 5 , 16 , value , 16 );
221
229
return ;
222
230
case HoleKind_R_AARCH64_MOVW_UABS_G2_NC :
223
231
// 16-bit middle-high part of an absolute address.
224
232
assert (IS_AARCH64_MOV (* loc32 ));
225
233
// Check the implicit shift (this is "part 2 of 3"):
226
- assert (bits (* loc32 , 21 , 2 ) == 2 );
227
- patch_bits (loc32 , value , 32 , 16 , 5 );
234
+ assert (get_bits (* loc32 , 21 , 2 ) == 2 );
235
+ set_bits (loc32 , 5 , 16 , value , 32 );
228
236
return ;
229
237
case HoleKind_R_AARCH64_MOVW_UABS_G3 :
230
238
// 16-bit high part of an absolute address.
231
239
assert (IS_AARCH64_MOV (* loc32 ));
232
240
// Check the implicit shift (this is "part 3 of 3"):
233
- assert (bits (* loc32 , 21 , 2 ) == 3 );
234
- patch_bits (loc32 , value , 48 , 16 , 5 );
241
+ assert (get_bits (* loc32 , 21 , 2 ) == 3 );
242
+ set_bits (loc32 , 5 , 16 , value , 48 );
235
243
return ;
236
244
case HoleKind_ARM64_RELOC_GOT_LOAD_PAGE21 :
237
245
// 21-bit count of pages between this page and an absolute address's
238
246
// page... I know, I know, it's weird. Pairs nicely with
239
247
// ARM64_RELOC_GOT_LOAD_PAGEOFF12 (below).
240
248
assert (IS_AARCH64_ADRP (* loc32 ));
241
- // The high 31 bits are ignored, so they must match:
242
- assert (bits (value , 33 , 31 ) == bits ((uint64_t )location , 33 , 31 ));
243
249
// Number of pages between this page and the value's page:
244
- value = bits (value , 12 , 21 ) - bits ((uint64_t )location , 12 , 21 );
250
+ value = (value >> 12 ) - ((uint64_t )location >> 12 );
251
+ // Check that we're not out of range of 21 signed bits:
252
+ assert ((int64_t )value >= - (1 << 20 ));
253
+ assert ((int64_t )value < (1 << 20 ));
245
254
// value[0:2] goes in loc[29:31]:
246
- patch_bits (loc32 , value , 0 , 2 , 29 );
255
+ set_bits (loc32 , 29 , 2 , value , 0 );
247
256
// value[2:21] goes in loc[5:26]:
248
- patch_bits (loc32 , value , 2 , 19 , 5 );
257
+ set_bits (loc32 , 5 , 19 , value , 2 );
249
258
return ;
250
259
case HoleKind_ARM64_RELOC_GOT_LOAD_PAGEOFF12 :
251
260
// 12-bit low part of an absolute address. Pairs nicely with
@@ -254,14 +263,14 @@ patch(char *base, const Hole *hole, uint64_t *patches)
254
263
// There might be an implicit shift encoded in the instruction:
255
264
uint8_t shift = 0 ;
256
265
if (IS_AARCH64_LDR_OR_STR (* loc32 )) {
257
- shift = (uint8_t )bits (* loc32 , 30 , 2 );
266
+ shift = (uint8_t )get_bits (* loc32 , 30 , 2 );
258
267
// If both of these are set, the shift is supposed to be 4.
259
268
// That's pretty weird, and it's never actually been observed...
260
- assert (bits (* loc32 , 23 , 1 ) == 0 || bits (* loc32 , 26 , 1 ) == 0 );
269
+ assert (get_bits (* loc32 , 23 , 1 ) == 0 || get_bits (* loc32 , 26 , 1 ) == 0 );
261
270
}
262
- value = bits (value , 0 , 12 );
263
- assert (bits (value , 0 , shift ) == 0 );
264
- patch_bits (loc32 , value , shift , 12 , 10 );
271
+ value = get_bits (value , 0 , 12 );
272
+ assert (get_bits (value , 0 , shift ) == 0 );
273
+ set_bits (loc32 , 10 , 12 , value , shift );
265
274
return ;
266
275
}
267
276
Py_UNREACHABLE ();
@@ -287,7 +296,8 @@ emit(const StencilGroup *stencil_group, uint64_t patches[])
287
296
288
297
// This becomes the executor's execute member, and handles some setup/teardown:
289
298
static _Py_CODEUNIT *
290
- execute (_PyExecutorObject * executor , _PyInterpreterFrame * frame , PyObject * * stack_pointer )
299
+ execute (_PyExecutorObject * executor , _PyInterpreterFrame * frame ,
300
+ PyObject * * stack_pointer )
291
301
{
292
302
PyThreadState * tstate = PyThreadState_Get ();
293
303
assert (PyObject_TypeCheck (executor , & _PyUOpExecutor_Type ));
@@ -340,8 +350,9 @@ _PyJIT_Compile(_PyUOpExecutorObject *executor)
340
350
text += stencil_group -> text .body_size ;
341
351
data += stencil_group -> data .body_size ;
342
352
}
343
- // Change the permissions... DO NOT LEAVE ANYTHING WRITABLE!
344
- if (mark_executable (memory , text_size ) || mark_readable (memory + text_size , data_size )) {
353
+ if (mark_executable (memory , text_size ) ||
354
+ mark_readable (memory + text_size , data_size ))
355
+ {
345
356
jit_free (memory , text_size + data_size );
346
357
goto fail ;
347
358
}
0 commit comments