-
Notifications
You must be signed in to change notification settings - Fork 14.3k
[AArch64] Refactor allocation of locals and stack realignment #72028
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[AArch64] Refactor allocation of locals and stack realignment #72028
Conversation
@llvm/pr-subscribers-backend-aarch64 Author: Momchil Velikov (momchil-velikov) ChangesFactor out some stack allocation in a separate function. This patch splits out the generic portion of a larger refactoring done as a part of stack clash protection support. Patch is 27.57 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/72028.diff 11 Files Affected:
diff --git a/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp b/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp
index 18e3aa2b0ecec86..5f617e3a176a16e 100644
--- a/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp
@@ -296,6 +296,7 @@ static int64_t getArgumentStackToRestore(MachineFunction &MF,
static bool produceCompactUnwindFrame(MachineFunction &MF);
static bool needsWinCFI(const MachineFunction &MF);
static StackOffset getSVEStackSize(const MachineFunction &MF);
+static unsigned findScratchNonCalleeSaveRegister(MachineBasicBlock *MBB);
/// Returns true if a homogeneous prolog or epilog code can be emitted
/// for the size optimization. If possible, a frame helper call is injected.
@@ -688,6 +689,44 @@ void AArch64FrameLowering::emitCalleeSavedSVERestores(
emitCalleeSavedRestores(MBB, MBBI, true);
}
+void AArch64FrameLowering::allocateStackSpace(
+ MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
+ bool NeedsRealignment, StackOffset AllocSize, bool NeedsWinCFI,
+ bool *HasWinCFI, bool EmitCFI, StackOffset InitialOffset) const {
+
+ if (!AllocSize)
+ return;
+
+ DebugLoc DL;
+ MachineFunction &MF = *MBB.getParent();
+ const AArch64Subtarget &Subtarget = MF.getSubtarget<AArch64Subtarget>();
+ const TargetInstrInfo &TII = *Subtarget.getInstrInfo();
+ AArch64FunctionInfo &AFI = *MF.getInfo<AArch64FunctionInfo>();
+ const MachineFrameInfo &MFI = MF.getFrameInfo();
+
+ Register TargetReg =
+ NeedsRealignment ? findScratchNonCalleeSaveRegister(&MBB) : AArch64::SP;
+ // SUB Xd/SP, SP, AllocSize
+ emitFrameOffset(MBB, MBBI, DL, TargetReg, AArch64::SP, -AllocSize, &TII,
+ MachineInstr::FrameSetup, false, NeedsWinCFI, HasWinCFI,
+ EmitCFI, InitialOffset);
+
+ if (NeedsRealignment) {
+ const int64_t MaxAlign = MFI.getMaxAlign().value();
+ const uint64_t AndMask = ~(MaxAlign - 1);
+ // AND SP, Xd, 0b11111...0000
+ BuildMI(MBB, MBBI, DL, TII.get(AArch64::ANDXri), AArch64::SP)
+ .addReg(TargetReg, RegState::Kill)
+ .addImm(AArch64_AM::encodeLogicalImmediate(AndMask, 64))
+ .setMIFlags(MachineInstr::FrameSetup);
+ AFI.setStackRealigned(true);
+
+ // No need for SEH instructions here; if we're realigning the stack,
+ // we've set a frame pointer and already finished the SEH prologue.
+ assert(!NeedsWinCFI);
+ }
+}
+
static MCRegister getRegisterOrZero(MCRegister Reg, bool HasSVE) {
switch (Reg.id()) {
default:
@@ -1774,7 +1813,7 @@ void AArch64FrameLowering::emitPrologue(MachineFunction &MF,
}
}
- StackOffset AllocateBefore = SVEStackSize, AllocateAfter = {};
+ StackOffset SVECalleeSavesSize = {}, SVELocalsSize = SVEStackSize;
MachineBasicBlock::iterator CalleeSavesBegin = MBBI, CalleeSavesEnd = MBBI;
// Process the SVE callee-saves to determine what space needs to be
@@ -1787,67 +1826,32 @@ void AArch64FrameLowering::emitPrologue(MachineFunction &MF,
++MBBI;
CalleeSavesEnd = MBBI;
- AllocateBefore = StackOffset::getScalable(CalleeSavedSize);
- AllocateAfter = SVEStackSize - AllocateBefore;
+ SVECalleeSavesSize = StackOffset::getScalable(CalleeSavedSize);
+ SVELocalsSize = SVEStackSize - SVECalleeSavesSize;
}
// Allocate space for the callee saves (if any).
- emitFrameOffset(
- MBB, CalleeSavesBegin, DL, AArch64::SP, AArch64::SP, -AllocateBefore, TII,
- MachineInstr::FrameSetup, false, false, nullptr,
- EmitAsyncCFI && !HasFP && AllocateBefore,
- StackOffset::getFixed((int64_t)MFI.getStackSize() - NumBytes));
+ StackOffset CFAOffset =
+ StackOffset::getFixed((int64_t)MFI.getStackSize() - NumBytes);
+ allocateStackSpace(MBB, CalleeSavesBegin, false, SVECalleeSavesSize, false,
+ nullptr, EmitAsyncCFI && !HasFP, CFAOffset);
+ CFAOffset += SVECalleeSavesSize;
if (EmitAsyncCFI)
emitCalleeSavedSVELocations(MBB, CalleeSavesEnd);
- // Finally allocate remaining SVE stack space.
- emitFrameOffset(MBB, CalleeSavesEnd, DL, AArch64::SP, AArch64::SP,
- -AllocateAfter, TII, MachineInstr::FrameSetup, false, false,
- nullptr, EmitAsyncCFI && !HasFP && AllocateAfter,
- AllocateBefore + StackOffset::getFixed(
- (int64_t)MFI.getStackSize() - NumBytes));
-
- // Allocate space for the rest of the frame.
- if (NumBytes) {
- unsigned scratchSPReg = AArch64::SP;
-
- if (NeedsRealignment) {
- scratchSPReg = findScratchNonCalleeSaveRegister(&MBB);
- assert(scratchSPReg != AArch64::NoRegister);
- }
-
- // If we're a leaf function, try using the red zone.
- if (!canUseRedZone(MF)) {
- // FIXME: in the case of dynamic re-alignment, NumBytes doesn't have
- // the correct value here, as NumBytes also includes padding bytes,
- // which shouldn't be counted here.
- emitFrameOffset(
- MBB, MBBI, DL, scratchSPReg, AArch64::SP,
- StackOffset::getFixed(-NumBytes), TII, MachineInstr::FrameSetup,
- false, NeedsWinCFI, &HasWinCFI, EmitAsyncCFI && !HasFP,
- SVEStackSize +
- StackOffset::getFixed((int64_t)MFI.getStackSize() - NumBytes));
- }
- if (NeedsRealignment) {
- assert(MFI.getMaxAlign() > Align(1));
- assert(scratchSPReg != AArch64::SP);
-
- // SUB X9, SP, NumBytes
- // -- X9 is temporary register, so shouldn't contain any live data here,
- // -- free to use. This is already produced by emitFrameOffset above.
- // AND SP, X9, 0b11111...0000
- uint64_t AndMask = ~(MFI.getMaxAlign().value() - 1);
-
- BuildMI(MBB, MBBI, DL, TII->get(AArch64::ANDXri), AArch64::SP)
- .addReg(scratchSPReg, RegState::Kill)
- .addImm(AArch64_AM::encodeLogicalImmediate(AndMask, 64));
- AFI->setStackRealigned(true);
-
- // No need for SEH instructions here; if we're realigning the stack,
- // we've set a frame pointer and already finished the SEH prologue.
- assert(!NeedsWinCFI);
- }
+ // Allocate space for the rest of the frame including SVE locals. Align the
+ // stack as necessary.
+ assert(!(canUseRedZone(MF) && NeedsRealignment) &&
+ "Cannot use redzone with stack realignment");
+ if (!canUseRedZone(MF)) {
+ // FIXME: in the case of dynamic re-alignment, NumBytes doesn't have
+ // the correct value here, as NumBytes also includes padding bytes,
+ // which shouldn't be counted here.
+ allocateStackSpace(MBB, CalleeSavesEnd, NeedsRealignment,
+ SVELocalsSize + StackOffset::getFixed(NumBytes),
+ NeedsWinCFI, &HasWinCFI, EmitAsyncCFI && !HasFP,
+ CFAOffset);
}
// If we need a base pointer, set it up here. It's whatever the value of the
|
The patch is almost, but not quite NFC. The only difference should be that where we have adjacent allocation of stack space
becomes
|
bfd551c
to
90f8b84
Compare
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Thank you Momchil for the work!
Your comment helped me to understand the reason there is less emmitframe function being called.
It looks a sensible change to me.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The refactoring of FrameLowering code looks good to me. Just one very small nit on one of the tests.
Factor out some stack allocaton in a separate function. This patch splits out the generatic portion of a larger refactoring done as a part of stack clash protection support.
90f8b84
to
a0f51e9
Compare
…2028) Factor out some stack allocation in a separate function. This patch splits out the generic portion of a larger refactoring done as a part of stack clash protection support. The patch is almost, but not quite NFC. The only difference should be that where we have adjacent allocation of stack space for local SVE objects and non-local SVE objects the order of `sub sp, ...` and `addvl sp, ...` instructions is reversed, because now it's done with a single call to `emitFrameOffset` and it happens add/subtract the fixed part before the scalable part, e.g. addvl sp, sp, #-2 sub sp, sp, llvm#16, lsl llvm#12 sub sp, sp, llvm#16 becomes sub sp, sp, llvm#16, lsl llvm#12 sub sp, sp, llvm#16 addvl sp, sp, #-2
Factor out some stack allocation in a separate function. This patch splits out the generic portion of a larger refactoring done as a part of stack clash protection support.