8000 GH-116134: JIT `aarch64-pc-windows-msvc` by brandtbucher · Pull Request #116130 · python/cpython · GitHub
[go: up one dir, main page]

Skip to content

GH-116134: JIT aarch64-pc-windows-msvc #116130

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 15 commits into from
Mar 4, 2024
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
Use position-independent code on aarch64-apple-darwin
  • Loading branch information
brandtbucher committed Feb 15, 2024
commit 85d8290541388039f0dd61463e92b1fbeb378f13
48 changes: 41 additions & 7 deletions Python/jit.c
Original file line number Diff line number Diff line change
Expand Up @@ -198,23 +198,23 @@ patch(unsigned char *base, const Stencil *stencil, uint64_t *patches)
// 32-bit absolute address.
// Check that we're not out of range of 32 unsigned bits:
assert(value < (1ULL << 32));
*loc32 = (uint32_t)value;
loc32[0] = (uint32_t)value;
continue;
case HoleKind_ARM64_RELOC_UNSIGNED:
case HoleKind_IMAGE_REL_AMD64_ADDR64:
case HoleKind_R_AARCH64_ABS64:
case HoleKind_X86_64_RELOC_UNSIGNED:
case HoleKind_R_X86_64_64:
// 64-bit absolute address.
*loc64 = value;
loc64[0] = value;
continue;
case HoleKind_R_X86_64_GOTPCRELX:
case HoleKind_R_X86_64_REX_GOTPCRELX: {
// 32-bit relative address.
// Try to relax the GOT load into an immediate value:
uint64_t relaxed = *(uint64_t *)(value + 4) - 4;
if ((int64_t)relaxed - (int64_t)location >= -(1L << 31) &&
(int64_t)relaxed - (int64_t)location + 1 < (1L << 31))
(int64_t)relaxed - (int64_t)location + 1 < (1LL << 31))
{
if (location[-2] == 0x8B) {
location[-2] = 0x8D;
Expand Down Expand Up @@ -247,16 +247,16 @@ patch(unsigned char *base, const Stencil *stencil, uint64_t *patches)
case HoleKind_R_X86_64_PC32:
// 32-bit relative address.
value -= (uint64_t)location;
*loc32 = (uint32_t)value;
loc32[0] = (uint32_t)value;
continue;
case HoleKind_R_AARCH64_CALL26:
case HoleKind_R_AARCH64_JUMP26:
// 28-bit relative branch.
assert(IS_AARCH64_BRANCH(*loc32));
value -= (uint64_t)location;
// Check that we're not out of range of 28 signed bits:
assert((int64_t)value >= -(1 << 27));
assert((int64_t)value < (1 << 27));
assert((int64_t)value >= -(1L << 27));
assert((int64_t)value < (1L << 27));
// Since instructions are 4-byte aligned, only use 26 bits:
assert(get_bits(value, 0, 2) == 0);
set_bits(loc32, 0, value, 2, 26);
Expand Down Expand Up @@ -289,11 +289,44 @@ patch(unsigned char *base, const Stencil *stencil, uint64_t *patches)
assert(get_bits(*loc32, 21, 2) == 3);
set_bits(loc32, 5, value, 48, 16);
continue;
case HoleKind_ARM64_RELOC_GOT_LOAD_PAGE21:
case HoleKind_ARM64_RELOC_GOT_LOAD_PAGE21: {
// 21-bit count of pages between this page and an absolute address's
// page... I know, I know, it's weird. Pairs nicely with
// ARM64_RELOC_GOT_LOAD_PAGEOFF12 (below).
assert(i + 1 < stencil->holes_size);
const Hole *next_hole = &stencil->holes[i + 1];
assert(next_hole->kind == HoleKind_ARM64_RELOC_GOT_LOAD_PAGEOFF12);
assert(next_hole->offset == hole->offset + 4);
assert(next_hole->symbol == hole->symbol);
assert(next_hole->addend == hole->addend);
assert(next_hole->value == hole->value);
assert(IS_AARCH64_ADRP(*loc32));
unsigned char rd = get_bits(loc32[0], 0, 5);
assert(IS_AARCH64_LDR_OR_STR(*(loc32 + 1)));
unsigned char rt = get_bits(loc32[1], 0, 5);
unsigned char rn = get_bits(loc32[1], 5, 5);
assert(rd == rn && rn == rt);
uint64_t relaxed = *(uint64_t *)value;
if (relaxed < (1UL << 16)) {
loc32[0] = 0xD503201F;
loc32[1] = 0xD2800000 | (get_bits(relaxed, 0, 16) << 5) | rd;
i++;
continue;
}
if (relaxed < (1ULL << 32)) {
loc32[0] = 0xD2800000 | (get_bits(relaxed, 0, 16) << 5) | rd;
loc32[1] = 0xF2A00000 | (get_bits(relaxed, 16, 16) << 5) | rd;
i++;
continue;
}
relaxed = (uint64_t)value - (uint64_t)location - 4;
if ((relaxed & 0x3) == 0 && (int64_t)relaxed >= -(1L << 19) && (int64_t)relaxed < (1L << 19)) {
loc32[0] = 0xD503201F;
loc32[1] = 0x58000000 | (get_bits(relaxed, 2, 19) << 5) | rd;
i++;
continue;
}
assert(0);
// Number of pages between this page and the value's page:
value = (value >> 12) - ((uint64_t)location >> 12);
// Check that we're not out of range of 21 signed bits:
Expand All @@ -304,6 +337,7 @@ patch(unsigned char *base, const Stencil *stencil, uint64_t *patches)
< 8000 /td> // value[2:21] goes in loc[5:26]:
set_bits(loc32, 5, value, 2, 19);
continue;
}
case HoleKind_ARM64_RELOC_GOT_LOAD_PAGEOFF12:
// 12-bit low part of an absolute address. Pairs nicely with
// ARM64_RELOC_GOT_LOAD_PAGE21 (above).
Expand Down
7 changes: 4 additions & 3 deletions Tools/jit/_targets.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,7 @@ class _Target(typing.Generic[_S, _R]):
alignment: int = 1
pic: bool = False
prefix: str = ""
small: bool = False
debug: bool = False
force: bool = False
verbose: bool = False
Expand Down Expand Up @@ -132,7 +133,7 @@ async def _compile(
# - "medium": assumes that code resides in the lowest 2GB of memory,
# and makes no assumptions about data (not available on aarch64)
# - "large": makes no assumptions about either code or data
"-mcmodel=small" if self.pic else "-mcmodel=large",
"-mcmodel=small" if self.small else "-mcmodel=large",
"-o",
f"{o}",
"-std=c11",
Expand Down Expand Up @@ -395,7 +396,7 @@ def _handle_relocation(
def get_target(host: str) -> _COFF | _ELF | _MachO:
"""Build a _Target for the given host "triple" and options."""
if re.fullmatch(r"aarch64-apple-darwin.*", host):
return _MachO(host, alignment=8, prefix="_")
return _MachO(host, alignment=8, pic=True, prefix="_")
if re.fullmatch(r"aarch64-.*-linux-gnu", host):
return _ELF(host, alignment=8)
if re.fullmatch(r"i686-pc-windows-msvc", host):
Expand All @@ -405,5 +406,5 @@ def get_target(host: str) -> _COFF | _ELF | _MachO:
if re.fullmatch(r"x86_64-pc-windows-msvc", host):
return _COFF(host)
if re.fullmatch(r"x86_64-.*-linux-gnu", host):
return _ELF(host, pic=True)
return _ELF(host, pic=True, small=True)
raise ValueError(host)
0