[go: up one dir, main page]

rustc_codegen_llvm/
va_arg.rs

1use rustc_abi::{Align, BackendRepr, Endian, HasDataLayout, Primitive, Size, TyAndLayout};
2use rustc_codegen_ssa::MemFlags;
3use rustc_codegen_ssa::common::IntPredicate;
4use rustc_codegen_ssa::mir::operand::OperandRef;
5use rustc_codegen_ssa::traits::{
6    BaseTypeCodegenMethods, BuilderMethods, ConstCodegenMethods, LayoutTypeCodegenMethods,
7};
8use rustc_middle::ty::Ty;
9use rustc_middle::ty::layout::{HasTyCtxt, LayoutOf};
10
11use crate::builder::Builder;
12use crate::type_::Type;
13use crate::type_of::LayoutLlvmExt;
14use crate::value::Value;
15
16fn round_up_to_alignment<'ll>(
17    bx: &mut Builder<'_, 'll, '_>,
18    mut value: &'ll Value,
19    align: Align,
20) -> &'ll Value {
21    value = bx.add(value, bx.cx().const_i32(align.bytes() as i32 - 1));
22    return bx.and(value, bx.cx().const_i32(-(align.bytes() as i32)));
23}
24
25fn round_pointer_up_to_alignment<'ll>(
26    bx: &mut Builder<'_, 'll, '_>,
27    addr: &'ll Value,
28    align: Align,
29    ptr_ty: &'ll Type,
30) -> &'ll Value {
31    let mut ptr_as_int = bx.ptrtoint(addr, bx.cx().type_isize());
32    ptr_as_int = round_up_to_alignment(bx, ptr_as_int, align);
33    bx.inttoptr(ptr_as_int, ptr_ty)
34}
35
36fn emit_direct_ptr_va_arg<'ll, 'tcx>(
37    bx: &mut Builder<'_, 'll, 'tcx>,
38    list: OperandRef<'tcx, &'ll Value>,
39    size: Size,
40    align: Align,
41    slot_size: Align,
42    allow_higher_align: bool,
43    force_right_adjust: bool,
44) -> (&'ll Value, Align) {
45    let va_list_ty = bx.type_ptr();
46    let va_list_addr = list.immediate();
47
48    let ptr = bx.load(va_list_ty, va_list_addr, bx.tcx().data_layout.pointer_align.abi);
49
50    let (addr, addr_align) = if allow_higher_align && align > slot_size {
51        (round_pointer_up_to_alignment(bx, ptr, align, bx.type_ptr()), align)
52    } else {
53        (ptr, slot_size)
54    };
55
56    let aligned_size = size.align_to(slot_size).bytes() as i32;
57    let full_direct_size = bx.cx().const_i32(aligned_size);
58    let next = bx.inbounds_ptradd(addr, full_direct_size);
59    bx.store(next, va_list_addr, bx.tcx().data_layout.pointer_align.abi);
60
61    if size.bytes() < slot_size.bytes()
62        && bx.tcx().sess.target.endian == Endian::Big
63        && force_right_adjust
64    {
65        let adjusted_size = bx.cx().const_i32((slot_size.bytes() - size.bytes()) as i32);
66        let adjusted = bx.inbounds_ptradd(addr, adjusted_size);
67        (adjusted, addr_align)
68    } else {
69        (addr, addr_align)
70    }
71}
72
73enum PassMode {
74    Direct,
75    Indirect,
76}
77
78enum SlotSize {
79    Bytes8 = 8,
80    Bytes4 = 4,
81}
82
83enum AllowHigherAlign {
84    No,
85    Yes,
86}
87
88enum ForceRightAdjust {
89    No,
90    Yes,
91}
92
93fn emit_ptr_va_arg<'ll, 'tcx>(
94    bx: &mut Builder<'_, 'll, 'tcx>,
95    list: OperandRef<'tcx, &'ll Value>,
96    target_ty: Ty<'tcx>,
97    pass_mode: PassMode,
98    slot_size: SlotSize,
99    allow_higher_align: AllowHigherAlign,
100    force_right_adjust: ForceRightAdjust,
101) -> &'ll Value {
102    let indirect = matches!(pass_mode, PassMode::Indirect);
103    let allow_higher_align = matches!(allow_higher_align, AllowHigherAlign::Yes);
104    let force_right_adjust = matches!(force_right_adjust, ForceRightAdjust::Yes);
105    let slot_size = Align::from_bytes(slot_size as u64).unwrap();
106
107    let layout = bx.cx.layout_of(target_ty);
108    let (llty, size, align) = if indirect {
109        (
110            bx.cx.layout_of(Ty::new_imm_ptr(bx.cx.tcx, target_ty)).llvm_type(bx.cx),
111            bx.cx.data_layout().pointer_size,
112            bx.cx.data_layout().pointer_align,
113        )
114    } else {
115        (layout.llvm_type(bx.cx), layout.size, layout.align)
116    };
117    let (addr, addr_align) = emit_direct_ptr_va_arg(
118        bx,
119        list,
120        size,
121        align.abi,
122        slot_size,
123        allow_higher_align,
124        force_right_adjust,
125    );
126    if indirect {
127        let tmp_ret = bx.load(llty, addr, addr_align);
128        bx.load(bx.cx.layout_of(target_ty).llvm_type(bx.cx), tmp_ret, align.abi)
129    } else {
130        bx.load(llty, addr, addr_align)
131    }
132}
133
134fn emit_aapcs_va_arg<'ll, 'tcx>(
135    bx: &mut Builder<'_, 'll, 'tcx>,
136    list: OperandRef<'tcx, &'ll Value>,
137    target_ty: Ty<'tcx>,
138) -> &'ll Value {
139    let dl = bx.cx.data_layout();
140
141    // Implementation of the AAPCS64 calling convention for va_args see
142    // https://github.com/ARM-software/abi-aa/blob/master/aapcs64/aapcs64.rst
143    //
144    // typedef struct  va_list {
145    //     void * stack; // next stack param
146    //     void * gr_top; // end of GP arg reg save area
147    //     void * vr_top; // end of FP/SIMD arg reg save area
148    //     int gr_offs; // offset from  gr_top to next GP register arg
149    //     int vr_offs; // offset from  vr_top to next FP/SIMD register arg
150    // } va_list;
151    let va_list_addr = list.immediate();
152
153    // There is no padding between fields since `void*` is size=8 align=8, `int` is size=4 align=4.
154    // See https://github.com/ARM-software/abi-aa/blob/master/aapcs64/aapcs64.rst
155    // Table 1, Byte size and byte alignment of fundamental data types
156    // Table 3, Mapping of C & C++ built-in data types
157    let ptr_offset = 8;
158    let i32_offset = 4;
159    let gr_top = bx.inbounds_ptradd(va_list_addr, bx.cx.const_usize(ptr_offset));
160    let vr_top = bx.inbounds_ptradd(va_list_addr, bx.cx.const_usize(2 * ptr_offset));
161    let gr_offs = bx.inbounds_ptradd(va_list_addr, bx.cx.const_usize(3 * ptr_offset));
162    let vr_offs = bx.inbounds_ptradd(va_list_addr, bx.cx.const_usize(3 * ptr_offset + i32_offset));
163
164    let layout = bx.cx.layout_of(target_ty);
165
166    let maybe_reg = bx.append_sibling_block("va_arg.maybe_reg");
167    let in_reg = bx.append_sibling_block("va_arg.in_reg");
168    let on_stack = bx.append_sibling_block("va_arg.on_stack");
169    let end = bx.append_sibling_block("va_arg.end");
170    let zero = bx.const_i32(0);
171    let offset_align = Align::from_bytes(4).unwrap();
172
173    let gr_type = target_ty.is_any_ptr() || target_ty.is_integral();
174    let (reg_off, reg_top, slot_size) = if gr_type {
175        let nreg = (layout.size.bytes() + 7) / 8;
176        (gr_offs, gr_top, nreg * 8)
177    } else {
178        let nreg = (layout.size.bytes() + 15) / 16;
179        (vr_offs, vr_top, nreg * 16)
180    };
181
182    // if the offset >= 0 then the value will be on the stack
183    let mut reg_off_v = bx.load(bx.type_i32(), reg_off, offset_align);
184    let use_stack = bx.icmp(IntPredicate::IntSGE, reg_off_v, zero);
185    bx.cond_br(use_stack, on_stack, maybe_reg);
186
187    // The value at this point might be in a register, but there is a chance that
188    // it could be on the stack so we have to update the offset and then check
189    // the offset again.
190
191    bx.switch_to_block(maybe_reg);
192    if gr_type && layout.align.abi.bytes() > 8 {
193        reg_off_v = bx.add(reg_off_v, bx.const_i32(15));
194        reg_off_v = bx.and(reg_off_v, bx.const_i32(-16));
195    }
196    let new_reg_off_v = bx.add(reg_off_v, bx.const_i32(slot_size as i32));
197
198    bx.store(new_reg_off_v, reg_off, offset_align);
199
200    // Check to see if we have overflowed the registers as a result of this.
201    // If we have then we need to use the stack for this value
202    let use_stack = bx.icmp(IntPredicate::IntSGT, new_reg_off_v, zero);
203    bx.cond_br(use_stack, on_stack, in_reg);
204
205    bx.switch_to_block(in_reg);
206    let top_type = bx.type_ptr();
207    let top = bx.load(top_type, reg_top, dl.pointer_align.abi);
208
209    // reg_value = *(@top + reg_off_v);
210    let mut reg_addr = bx.ptradd(top, reg_off_v);
211    if bx.tcx().sess.target.endian == Endian::Big && layout.size.bytes() != slot_size {
212        // On big-endian systems the value is right-aligned in its slot.
213        let offset = bx.const_i32((slot_size - layout.size.bytes()) as i32);
214        reg_addr = bx.ptradd(reg_addr, offset);
215    }
216    let reg_type = layout.llvm_type(bx);
217    let reg_value = bx.load(reg_type, reg_addr, layout.align.abi);
218    bx.br(end);
219
220    // On Stack block
221    bx.switch_to_block(on_stack);
222    let stack_value = emit_ptr_va_arg(
223        bx,
224        list,
225        target_ty,
226        PassMode::Direct,
227        SlotSize::Bytes8,
228        AllowHigherAlign::Yes,
229        ForceRightAdjust::No,
230    );
231    bx.br(end);
232
233    bx.switch_to_block(end);
234    let val =
235        bx.phi(layout.immediate_llvm_type(bx), &[reg_value, stack_value], &[in_reg, on_stack]);
236
237    val
238}
239
240fn emit_powerpc_va_arg<'ll, 'tcx>(
241    bx: &mut Builder<'_, 'll, 'tcx>,
242    list: OperandRef<'tcx, &'ll Value>,
243    target_ty: Ty<'tcx>,
244) -> &'ll Value {
245    let dl = bx.cx.data_layout();
246
247    // struct __va_list_tag {
248    //   unsigned char gpr;
249    //   unsigned char fpr;
250    //   unsigned short reserved;
251    //   void *overflow_arg_area;
252    //   void *reg_save_area;
253    // };
254    let va_list_addr = list.immediate();
255
256    // Peel off any newtype wrappers.
257    let layout = {
258        let mut layout = bx.cx.layout_of(target_ty);
259
260        while let Some((_, inner)) = layout.non_1zst_field(bx.cx) {
261            layout = inner;
262        }
263
264        layout
265    };
266
267    // Rust does not currently support any powerpc softfloat targets.
268    let target = &bx.cx.tcx.sess.target;
269    let is_soft_float_abi = target.abi == "softfloat";
270    assert!(!is_soft_float_abi);
271
272    // All instances of VaArgSafe are passed directly.
273    let is_indirect = false;
274
275    let (is_i64, is_int, is_f64) = match layout.layout.backend_repr() {
276        BackendRepr::Scalar(scalar) => match scalar.primitive() {
277            rustc_abi::Primitive::Int(integer, _) => (integer.size().bits() == 64, true, false),
278            rustc_abi::Primitive::Float(float) => (false, false, float.size().bits() == 64),
279            rustc_abi::Primitive::Pointer(_) => (false, true, false),
280        },
281        _ => unreachable!("all instances of VaArgSafe are represented as scalars"),
282    };
283
284    let num_regs_addr = if is_int || is_soft_float_abi {
285        va_list_addr // gpr
286    } else {
287        bx.inbounds_ptradd(va_list_addr, bx.const_usize(1)) // fpr
288    };
289
290    let mut num_regs = bx.load(bx.type_i8(), num_regs_addr, dl.i8_align.abi);
291
292    // "Align" the register count when the type is passed as `i64`.
293    if is_i64 || (is_f64 && is_soft_float_abi) {
294        num_regs = bx.add(num_regs, bx.const_u8(1));
295        num_regs = bx.and(num_regs, bx.const_u8(0b1111_1110));
296    }
297
298    let max_regs = 8u8;
299    let use_regs = bx.icmp(IntPredicate::IntULT, num_regs, bx.const_u8(max_regs));
300
301    let in_reg = bx.append_sibling_block("va_arg.in_reg");
302    let in_mem = bx.append_sibling_block("va_arg.in_mem");
303    let end = bx.append_sibling_block("va_arg.end");
304
305    bx.cond_br(use_regs, in_reg, in_mem);
306
307    let reg_addr = {
308        bx.switch_to_block(in_reg);
309
310        let reg_safe_area_ptr = bx.inbounds_ptradd(va_list_addr, bx.cx.const_usize(1 + 1 + 2 + 4));
311        let mut reg_addr = bx.load(bx.type_ptr(), reg_safe_area_ptr, dl.pointer_align.abi);
312
313        // Floating-point registers start after the general-purpose registers.
314        if !is_int && !is_soft_float_abi {
315            reg_addr = bx.inbounds_ptradd(reg_addr, bx.cx.const_usize(32))
316        }
317
318        // Get the address of the saved value by scaling the number of
319        // registers we've used by the number of.
320        let reg_size = if is_int || is_soft_float_abi { 4 } else { 8 };
321        let reg_offset = bx.mul(num_regs, bx.cx().const_u8(reg_size));
322        let reg_addr = bx.inbounds_ptradd(reg_addr, reg_offset);
323
324        // Increase the used-register count.
325        let reg_incr = if is_i64 || (is_f64 && is_soft_float_abi) { 2 } else { 1 };
326        let new_num_regs = bx.add(num_regs, bx.cx.const_u8(reg_incr));
327        bx.store(new_num_regs, num_regs_addr, dl.i8_align.abi);
328
329        bx.br(end);
330
331        reg_addr
332    };
333
334    let mem_addr = {
335        bx.switch_to_block(in_mem);
336
337        bx.store(bx.const_u8(max_regs), num_regs_addr, dl.i8_align.abi);
338
339        // Everything in the overflow area is rounded up to a size of at least 4.
340        let overflow_area_align = Align::from_bytes(4).unwrap();
341
342        let size = if !is_indirect {
343            layout.layout.size.align_to(overflow_area_align)
344        } else {
345            dl.pointer_size
346        };
347
348        let overflow_area_ptr = bx.inbounds_ptradd(va_list_addr, bx.cx.const_usize(1 + 1 + 2));
349        let mut overflow_area = bx.load(bx.type_ptr(), overflow_area_ptr, dl.pointer_align.abi);
350
351        // Round up address of argument to alignment
352        if layout.layout.align.abi > overflow_area_align {
353            overflow_area = round_pointer_up_to_alignment(
354                bx,
355                overflow_area,
356                layout.layout.align.abi,
357                bx.type_ptr(),
358            );
359        }
360
361        let mem_addr = overflow_area;
362
363        // Increase the overflow area.
364        overflow_area = bx.inbounds_ptradd(overflow_area, bx.const_usize(size.bytes()));
365        bx.store(overflow_area, overflow_area_ptr, dl.pointer_align.abi);
366
367        bx.br(end);
368
369        mem_addr
370    };
371
372    // Return the appropriate result.
373    bx.switch_to_block(end);
374    let val_addr = bx.phi(bx.type_ptr(), &[reg_addr, mem_addr], &[in_reg, in_mem]);
375    let val_type = layout.llvm_type(bx);
376    let val_addr = if is_indirect {
377        bx.load(bx.cx.type_ptr(), val_addr, dl.pointer_align.abi)
378    } else {
379        val_addr
380    };
381    bx.load(val_type, val_addr, layout.align.abi)
382}
383
384fn emit_s390x_va_arg<'ll, 'tcx>(
385    bx: &mut Builder<'_, 'll, 'tcx>,
386    list: OperandRef<'tcx, &'ll Value>,
387    target_ty: Ty<'tcx>,
388) -> &'ll Value {
389    let dl = bx.cx.data_layout();
390
391    // Implementation of the s390x ELF ABI calling convention for va_args see
392    // https://github.com/IBM/s390x-abi (chapter 1.2.4)
393    //
394    // typedef struct __va_list_tag {
395    //     long __gpr;
396    //     long __fpr;
397    //     void *__overflow_arg_area;
398    //     void *__reg_save_area;
399    // } va_list[1];
400    let va_list_addr = list.immediate();
401
402    // There is no padding between fields since `long` and `void*` both have size=8 align=8.
403    // https://github.com/IBM/s390x-abi (Table 1.1.: Scalar types)
404    let i64_offset = 8;
405    let ptr_offset = 8;
406    let gpr = va_list_addr;
407    let fpr = bx.inbounds_ptradd(va_list_addr, bx.cx.const_usize(i64_offset));
408    let overflow_arg_area = bx.inbounds_ptradd(va_list_addr, bx.cx.const_usize(2 * i64_offset));
409    let reg_save_area =
410        bx.inbounds_ptradd(va_list_addr, bx.cx.const_usize(2 * i64_offset + ptr_offset));
411
412    let layout = bx.cx.layout_of(target_ty);
413
414    let in_reg = bx.append_sibling_block("va_arg.in_reg");
415    let in_mem = bx.append_sibling_block("va_arg.in_mem");
416    let end = bx.append_sibling_block("va_arg.end");
417
418    // FIXME: vector ABI not yet supported.
419    let target_ty_size = bx.cx.size_of(target_ty).bytes();
420    let indirect: bool = target_ty_size > 8 || !target_ty_size.is_power_of_two();
421    let unpadded_size = if indirect { 8 } else { target_ty_size };
422    let padded_size = 8;
423    let padding = padded_size - unpadded_size;
424
425    let gpr_type = indirect || !layout.is_single_fp_element(bx.cx);
426    let (max_regs, reg_count, reg_save_index, reg_padding) =
427        if gpr_type { (5, gpr, 2, padding) } else { (4, fpr, 16, 0) };
428
429    // Check whether the value was passed in a register or in memory.
430    let reg_count_v = bx.load(bx.type_i64(), reg_count, Align::from_bytes(8).unwrap());
431    let use_regs = bx.icmp(IntPredicate::IntULT, reg_count_v, bx.const_u64(max_regs));
432    bx.cond_br(use_regs, in_reg, in_mem);
433
434    // Emit code to load the value if it was passed in a register.
435    bx.switch_to_block(in_reg);
436
437    // Work out the address of the value in the register save area.
438    let reg_ptr_v = bx.load(bx.type_ptr(), reg_save_area, dl.pointer_align.abi);
439    let scaled_reg_count = bx.mul(reg_count_v, bx.const_u64(8));
440    let reg_off = bx.add(scaled_reg_count, bx.const_u64(reg_save_index * 8 + reg_padding));
441    let reg_addr = bx.ptradd(reg_ptr_v, reg_off);
442
443    // Update the register count.
444    let new_reg_count_v = bx.add(reg_count_v, bx.const_u64(1));
445    bx.store(new_reg_count_v, reg_count, Align::from_bytes(8).unwrap());
446    bx.br(end);
447
448    // Emit code to load the value if it was passed in memory.
449    bx.switch_to_block(in_mem);
450
451    // Work out the address of the value in the argument overflow area.
452    let arg_ptr_v =
453        bx.load(bx.type_ptr(), overflow_arg_area, bx.tcx().data_layout.pointer_align.abi);
454    let arg_off = bx.const_u64(padding);
455    let mem_addr = bx.ptradd(arg_ptr_v, arg_off);
456
457    // Update the argument overflow area pointer.
458    let arg_size = bx.cx().const_u64(padded_size);
459    let new_arg_ptr_v = bx.inbounds_ptradd(arg_ptr_v, arg_size);
460    bx.store(new_arg_ptr_v, overflow_arg_area, dl.pointer_align.abi);
461    bx.br(end);
462
463    // Return the appropriate result.
464    bx.switch_to_block(end);
465    let val_addr = bx.phi(bx.type_ptr(), &[reg_addr, mem_addr], &[in_reg, in_mem]);
466    let val_type = layout.llvm_type(bx);
467    let val_addr =
468        if indirect { bx.load(bx.cx.type_ptr(), val_addr, dl.pointer_align.abi) } else { val_addr };
469    bx.load(val_type, val_addr, layout.align.abi)
470}
471
472fn emit_x86_64_sysv64_va_arg<'ll, 'tcx>(
473    bx: &mut Builder<'_, 'll, 'tcx>,
474    list: OperandRef<'tcx, &'ll Value>,
475    target_ty: Ty<'tcx>,
476) -> &'ll Value {
477    let dl = bx.cx.data_layout();
478
479    // Implementation of the systemv x86_64 ABI calling convention for va_args, see
480    // https://gitlab.com/x86-psABIs/x86-64-ABI (section 3.5.7). This implementation is heavily
481    // based on the one in clang.
482
483    // We're able to take some shortcuts because the return type of `va_arg` must implement the
484    // `VaArgSafe` trait. Currently, only pointers, f64, i32, u32, i64 and u64 implement this trait.
485
486    // typedef struct __va_list_tag {
487    //     unsigned int gp_offset;
488    //     unsigned int fp_offset;
489    //     void *overflow_arg_area;
490    //     void *reg_save_area;
491    // } va_list[1];
492    let va_list_addr = list.immediate();
493
494    // Peel off any newtype wrappers.
495    //
496    // The "C" ABI does not unwrap newtypes (see `ReprOptions::inhibit_newtype_abi_optimization`).
497    // Here, we do actually want the unwrapped representation, because that is how LLVM/Clang
498    // pass such types to variadic functions.
499    //
500    // An example of a type that must be unwrapped is `Foo` below. Without the unwrapping, it has
501    // `BackendRepr::Memory`, but we need it to be `BackendRepr::Scalar` to generate correct code.
502    //
503    // ```
504    // #[repr(C)]
505    // struct Empty;
506    //
507    // #[repr(C)]
508    // struct Foo([Empty; 8], i32);
509    // ```
510    let layout = {
511        let mut layout = bx.cx.layout_of(target_ty);
512
513        while let Some((_, inner)) = layout.non_1zst_field(bx.cx) {
514            layout = inner;
515        }
516
517        layout
518    };
519
520    // AMD64-ABI 3.5.7p5: Step 1. Determine whether type may be passed
521    // in the registers. If not go to step 7.
522
523    // AMD64-ABI 3.5.7p5: Step 2. Compute num_gp to hold the number of
524    // general purpose registers needed to pass type and num_fp to hold
525    // the number of floating point registers needed.
526
527    let mut num_gp_registers = 0;
528    let mut num_fp_registers = 0;
529
530    let mut registers_for_primitive = |p| match p {
531        Primitive::Int(integer, _is_signed) => {
532            num_gp_registers += integer.size().bytes().div_ceil(8) as u32;
533        }
534        Primitive::Float(float) => {
535            num_fp_registers += float.size().bytes().div_ceil(16) as u32;
536        }
537        Primitive::Pointer(_) => {
538            num_gp_registers += 1;
539        }
540    };
541
542    match layout.layout.backend_repr() {
543        BackendRepr::Scalar(scalar) => {
544            registers_for_primitive(scalar.primitive());
545        }
546        BackendRepr::ScalarPair(scalar1, scalar2) => {
547            registers_for_primitive(scalar1.primitive());
548            registers_for_primitive(scalar2.primitive());
549        }
550        BackendRepr::SimdVector { .. } => {
551            // Because no instance of VaArgSafe uses a non-scalar `BackendRepr`.
552            unreachable!(
553                "No x86-64 SysV va_arg implementation for {:?}",
554                layout.layout.backend_repr()
555            )
556        }
557        BackendRepr::Memory { .. } => {
558            let mem_addr = x86_64_sysv64_va_arg_from_memory(bx, va_list_addr, layout);
559            return bx.load(layout.llvm_type(bx), mem_addr, layout.align.abi);
560        }
561    };
562
563    // AMD64-ABI 3.5.7p5: Step 3. Verify whether arguments fit into
564    // registers. In the case: l->gp_offset > 48 - num_gp * 8 or
565    // l->fp_offset > 176 - num_fp * 16 go to step 7.
566
567    let unsigned_int_offset = 4;
568    let ptr_offset = 8;
569    let gp_offset_ptr = va_list_addr;
570    let fp_offset_ptr = bx.inbounds_ptradd(va_list_addr, bx.cx.const_usize(unsigned_int_offset));
571
572    let gp_offset_v = bx.load(bx.type_i32(), gp_offset_ptr, Align::from_bytes(8).unwrap());
573    let fp_offset_v = bx.load(bx.type_i32(), fp_offset_ptr, Align::from_bytes(4).unwrap());
574
575    let mut use_regs = bx.const_bool(false);
576
577    if num_gp_registers > 0 {
578        let max_offset_val = 48u32 - num_gp_registers * 8;
579        let fits_in_gp = bx.icmp(IntPredicate::IntULE, gp_offset_v, bx.const_u32(max_offset_val));
580        use_regs = fits_in_gp;
581    }
582
583    if num_fp_registers > 0 {
584        let max_offset_val = 176u32 - num_fp_registers * 16;
585        let fits_in_fp = bx.icmp(IntPredicate::IntULE, fp_offset_v, bx.const_u32(max_offset_val));
586        use_regs = if num_gp_registers > 0 { bx.and(use_regs, fits_in_fp) } else { fits_in_fp };
587    }
588
589    let in_reg = bx.append_sibling_block("va_arg.in_reg");
590    let in_mem = bx.append_sibling_block("va_arg.in_mem");
591    let end = bx.append_sibling_block("va_arg.end");
592
593    bx.cond_br(use_regs, in_reg, in_mem);
594
595    // Emit code to load the value if it was passed in a register.
596    bx.switch_to_block(in_reg);
597
598    // AMD64-ABI 3.5.7p5: Step 4. Fetch type from l->reg_save_area with
599    // an offset of l->gp_offset and/or l->fp_offset. This may require
600    // copying to a temporary location in case the parameter is passed
601    // in different register classes or requires an alignment greater
602    // than 8 for general purpose registers and 16 for XMM registers.
603    //
604    // FIXME(llvm): This really results in shameful code when we end up needing to
605    // collect arguments from different places; often what should result in a
606    // simple assembling of a structure from scattered addresses has many more
607    // loads than necessary. Can we clean this up?
608    let reg_save_area_ptr =
609        bx.inbounds_ptradd(va_list_addr, bx.cx.const_usize(2 * unsigned_int_offset + ptr_offset));
610    let reg_save_area_v = bx.load(bx.type_ptr(), reg_save_area_ptr, dl.pointer_align.abi);
611
612    let reg_addr = match layout.layout.backend_repr() {
613        BackendRepr::Scalar(scalar) => match scalar.primitive() {
614            Primitive::Int(_, _) | Primitive::Pointer(_) => {
615                let reg_addr = bx.inbounds_ptradd(reg_save_area_v, gp_offset_v);
616
617                // Copy into a temporary if the type is more aligned than the register save area.
618                let gp_align = Align::from_bytes(8).unwrap();
619                copy_to_temporary_if_more_aligned(bx, reg_addr, layout, gp_align)
620            }
621            Primitive::Float(_) => bx.inbounds_ptradd(reg_save_area_v, fp_offset_v),
622        },
623        BackendRepr::ScalarPair(scalar1, scalar2) => {
624            let ty_lo = bx.cx().scalar_pair_element_backend_type(layout, 0, false);
625            let ty_hi = bx.cx().scalar_pair_element_backend_type(layout, 1, false);
626
627            let align_lo = layout.field(bx.cx, 0).layout.align().abi;
628            let align_hi = layout.field(bx.cx, 1).layout.align().abi;
629
630            match (scalar1.primitive(), scalar2.primitive()) {
631                (Primitive::Float(_), Primitive::Float(_)) => {
632                    // SSE registers are spaced 16 bytes apart in the register save
633                    // area, we need to collect the two eightbytes together.
634                    // The ABI isn't explicit about this, but it seems reasonable
635                    // to assume that the slots are 16-byte aligned, since the stack is
636                    // naturally 16-byte aligned and the prologue is expected to store
637                    // all the SSE registers to the RSA.
638                    let reg_lo_addr = bx.inbounds_ptradd(reg_save_area_v, fp_offset_v);
639                    let reg_hi_addr = bx.inbounds_ptradd(reg_lo_addr, bx.const_i32(16));
640
641                    let align = layout.layout.align().abi;
642                    let tmp = bx.alloca(layout.layout.size(), align);
643
644                    let reg_lo = bx.load(ty_lo, reg_lo_addr, align_lo);
645                    let reg_hi = bx.load(ty_hi, reg_hi_addr, align_hi);
646
647                    let offset = scalar1.size(bx.cx).align_to(align_hi).bytes();
648                    let field0 = tmp;
649                    let field1 = bx.inbounds_ptradd(tmp, bx.const_u32(offset as u32));
650
651                    bx.store(reg_lo, field0, align);
652                    bx.store(reg_hi, field1, align);
653
654                    tmp
655                }
656                (Primitive::Float(_), _) | (_, Primitive::Float(_)) => {
657                    let gp_addr = bx.inbounds_ptradd(reg_save_area_v, gp_offset_v);
658                    let fp_addr = bx.inbounds_ptradd(reg_save_area_v, fp_offset_v);
659
660                    let (reg_lo_addr, reg_hi_addr) = match scalar1.primitive() {
661                        Primitive::Float(_) => (fp_addr, gp_addr),
662                        Primitive::Int(_, _) | Primitive::Pointer(_) => (gp_addr, fp_addr),
663                    };
664
665                    let tmp = bx.alloca(layout.layout.size(), layout.layout.align().abi);
666
667                    let reg_lo = bx.load(ty_lo, reg_lo_addr, align_lo);
668                    let reg_hi = bx.load(ty_hi, reg_hi_addr, align_hi);
669
670                    let offset = scalar1.size(bx.cx).align_to(align_hi).bytes();
671                    let field0 = tmp;
672                    let field1 = bx.inbounds_ptradd(tmp, bx.const_u32(offset as u32));
673
674                    bx.store(reg_lo, field0, align_lo);
675                    bx.store(reg_hi, field1, align_hi);
676
677                    tmp
678                }
679                (_, _) => {
680                    // Two integer/pointer values are just contiguous in memory.
681                    let reg_addr = bx.inbounds_ptradd(reg_save_area_v, gp_offset_v);
682
683                    // Copy into a temporary if the type is more aligned than the register save area.
684                    let gp_align = Align::from_bytes(8).unwrap();
685                    copy_to_temporary_if_more_aligned(bx, reg_addr, layout, gp_align)
686                }
687            }
688        }
689        // The Previous match on `BackendRepr` means control flow already escaped.
690        BackendRepr::SimdVector { .. } | BackendRepr::Memory { .. } => unreachable!(),
691    };
692
693    // AMD64-ABI 3.5.7p5: Step 5. Set:
694    // l->gp_offset = l->gp_offset + num_gp * 8
695    if num_gp_registers > 0 {
696        let offset = bx.const_u32(num_gp_registers * 8);
697        let sum = bx.add(gp_offset_v, offset);
698        // An alignment of 8 because `__va_list_tag` is 8-aligned and this is its first field.
699        bx.store(sum, gp_offset_ptr, Align::from_bytes(8).unwrap());
700    }
701
702    // l->fp_offset = l->fp_offset + num_fp * 16.
703    if num_fp_registers > 0 {
704        let offset = bx.const_u32(num_fp_registers * 16);
705        let sum = bx.add(fp_offset_v, offset);
706        bx.store(sum, fp_offset_ptr, Align::from_bytes(4).unwrap());
707    }
708
709    bx.br(end);
710
711    bx.switch_to_block(in_mem);
712    let mem_addr = x86_64_sysv64_va_arg_from_memory(bx, va_list_addr, layout);
713    bx.br(end);
714
715    bx.switch_to_block(end);
716
717    let val_type = layout.llvm_type(bx);
718    let val_addr = bx.phi(bx.type_ptr(), &[reg_addr, mem_addr], &[in_reg, in_mem]);
719
720    bx.load(val_type, val_addr, layout.align.abi)
721}
722
723/// Copy into a temporary if the type is more aligned than the register save area.
724fn copy_to_temporary_if_more_aligned<'ll, 'tcx>(
725    bx: &mut Builder<'_, 'll, 'tcx>,
726    reg_addr: &'ll Value,
727    layout: TyAndLayout<'tcx, Ty<'tcx>>,
728    src_align: Align,
729) -> &'ll Value {
730    if layout.layout.align.abi > src_align {
731        let tmp = bx.alloca(layout.layout.size(), layout.layout.align().abi);
732        bx.memcpy(
733            tmp,
734            layout.layout.align.abi,
735            reg_addr,
736            src_align,
737            bx.const_u32(layout.layout.size().bytes() as u32),
738            MemFlags::empty(),
739        );
740        tmp
741    } else {
742        reg_addr
743    }
744}
745
746fn x86_64_sysv64_va_arg_from_memory<'ll, 'tcx>(
747    bx: &mut Builder<'_, 'll, 'tcx>,
748    va_list_addr: &'ll Value,
749    layout: TyAndLayout<'tcx, Ty<'tcx>>,
750) -> &'ll Value {
751    let dl = bx.cx.data_layout();
752
753    let overflow_arg_area_ptr = bx.inbounds_ptradd(va_list_addr, bx.const_usize(8));
754
755    let overflow_arg_area_v = bx.load(bx.type_ptr(), overflow_arg_area_ptr, dl.pointer_align.abi);
756    // AMD64-ABI 3.5.7p5: Step 7. Align l->overflow_arg_area upwards to a 16
757    // byte boundary if alignment needed by type exceeds 8 byte boundary.
758    // It isn't stated explicitly in the standard, but in practice we use
759    // alignment greater than 16 where necessary.
760    if layout.layout.align.abi.bytes() > 8 {
761        unreachable!("all instances of VaArgSafe have an alignment <= 8");
762    }
763
764    // AMD64-ABI 3.5.7p5: Step 8. Fetch type from l->overflow_arg_area.
765    let mem_addr = overflow_arg_area_v;
766
767    // AMD64-ABI 3.5.7p5: Step 9. Set l->overflow_arg_area to:
768    // l->overflow_arg_area + sizeof(type).
769    // AMD64-ABI 3.5.7p5: Step 10. Align l->overflow_arg_area upwards to
770    // an 8 byte boundary.
771    let size_in_bytes = layout.layout.size().bytes();
772    let offset = bx.const_i32(size_in_bytes.next_multiple_of(8) as i32);
773    let overflow_arg_area = bx.inbounds_ptradd(overflow_arg_area_v, offset);
774    bx.store(overflow_arg_area, overflow_arg_area_ptr, dl.pointer_align.abi);
775
776    mem_addr
777}
778
779fn emit_xtensa_va_arg<'ll, 'tcx>(
780    bx: &mut Builder<'_, 'll, 'tcx>,
781    list: OperandRef<'tcx, &'ll Value>,
782    target_ty: Ty<'tcx>,
783) -> &'ll Value {
784    // Implementation of va_arg for Xtensa. There doesn't seem to be an authoritative source for
785    // this, other than "what GCC does".
786    //
787    // The va_list type has three fields:
788    // struct __va_list_tag {
789    //   int32_t *va_stk; // Arguments passed on the stack
790    //   int32_t *va_reg; // Arguments passed in registers, saved to memory by the prologue.
791    //   int32_t va_ndx; // Offset into the arguments, in bytes
792    // };
793    //
794    // The first 24 bytes (equivalent to 6 registers) come from va_reg, the rest from va_stk.
795    // Thus if va_ndx is less than 24, the next va_arg *may* read from va_reg,
796    // otherwise it must come from va_stk.
797    //
798    // Primitive arguments are never split between registers and the stack. For example, if loading an 8 byte
799    // primitive value and va_ndx = 20, we instead bump the offset and read everything from va_stk.
800    let va_list_addr = list.immediate();
801    // FIXME: handle multi-field structs that split across regsave/stack?
802    let layout = bx.cx.layout_of(target_ty);
803    let from_stack = bx.append_sibling_block("va_arg.from_stack");
804    let from_regsave = bx.append_sibling_block("va_arg.from_regsave");
805    let end = bx.append_sibling_block("va_arg.end");
806
807    // (*va).va_ndx
808    let va_reg_offset = 4;
809    let va_ndx_offset = va_reg_offset + 4;
810    let offset_ptr = bx.inbounds_ptradd(va_list_addr, bx.cx.const_usize(va_ndx_offset));
811
812    let offset = bx.load(bx.type_i32(), offset_ptr, bx.tcx().data_layout.i32_align.abi);
813    let offset = round_up_to_alignment(bx, offset, layout.align.abi);
814
815    let slot_size = layout.size.align_to(Align::from_bytes(4).unwrap()).bytes() as i32;
816
817    // Update the offset in va_list, by adding the slot's size.
818    let offset_next = bx.add(offset, bx.const_i32(slot_size));
819
820    // Figure out where to look for our value. We do that by checking the end of our slot (offset_next).
821    // If that is within the regsave area, then load from there. Otherwise load from the stack area.
822    let regsave_size = bx.const_i32(24);
823    let use_regsave = bx.icmp(IntPredicate::IntULE, offset_next, regsave_size);
824    bx.cond_br(use_regsave, from_regsave, from_stack);
825
826    bx.switch_to_block(from_regsave);
827    // update va_ndx
828    bx.store(offset_next, offset_ptr, bx.tcx().data_layout.pointer_align.abi);
829
830    // (*va).va_reg
831    let regsave_area_ptr = bx.inbounds_ptradd(va_list_addr, bx.cx.const_usize(va_reg_offset));
832    let regsave_area =
833        bx.load(bx.type_ptr(), regsave_area_ptr, bx.tcx().data_layout.pointer_align.abi);
834    let regsave_value_ptr = bx.inbounds_ptradd(regsave_area, offset);
835    bx.br(end);
836
837    bx.switch_to_block(from_stack);
838
839    // The first time we switch from regsave to stack we needs to adjust our offsets a bit.
840    // va_stk is set up such that the first stack argument is always at va_stk + 32.
841    // The corrected offset is written back into the va_list struct.
842
843    // let offset_corrected = cmp::max(offset, 32);
844    let stack_offset_start = bx.const_i32(32);
845    let needs_correction = bx.icmp(IntPredicate::IntULE, offset, stack_offset_start);
846    let offset_corrected = bx.select(needs_correction, stack_offset_start, offset);
847
848    // let offset_next_corrected = offset_corrected + slot_size;
849    // va_ndx = offset_next_corrected;
850    let offset_next_corrected = bx.add(offset_next, bx.const_i32(slot_size));
851    // update va_ndx
852    bx.store(offset_next_corrected, offset_ptr, bx.tcx().data_layout.pointer_align.abi);
853
854    // let stack_value_ptr = unsafe { (*va).va_stk.byte_add(offset_corrected) };
855    let stack_area_ptr = bx.inbounds_ptradd(va_list_addr, bx.cx.const_usize(0));
856    let stack_area = bx.load(bx.type_ptr(), stack_area_ptr, bx.tcx().data_layout.pointer_align.abi);
857    let stack_value_ptr = bx.inbounds_ptradd(stack_area, offset_corrected);
858    bx.br(end);
859
860    bx.switch_to_block(end);
861
862    // On big-endian, for values smaller than the slot size we'd have to align the read to the end
863    // of the slot rather than the start. While the ISA and GCC support big-endian, all the Xtensa
864    // targets supported by rustc are litte-endian so don't worry about it.
865
866    // if from_regsave {
867    //     unsafe { *regsave_value_ptr }
868    // } else {
869    //     unsafe { *stack_value_ptr }
870    // }
871    assert!(bx.tcx().sess.target.endian == Endian::Little);
872    let value_ptr =
873        bx.phi(bx.type_ptr(), &[regsave_value_ptr, stack_value_ptr], &[from_regsave, from_stack]);
874    return bx.load(layout.llvm_type(bx), value_ptr, layout.align.abi);
875}
876
877pub(super) fn emit_va_arg<'ll, 'tcx>(
878    bx: &mut Builder<'_, 'll, 'tcx>,
879    addr: OperandRef<'tcx, &'ll Value>,
880    target_ty: Ty<'tcx>,
881) -> &'ll Value {
882    // Determine the va_arg implementation to use. The LLVM va_arg instruction
883    // is lacking in some instances, so we should only use it as a fallback.
884    let target = &bx.cx.tcx.sess.target;
885
886    match &*target.arch {
887        "x86" => emit_ptr_va_arg(
888            bx,
889            addr,
890            target_ty,
891            PassMode::Direct,
892            SlotSize::Bytes4,
893            if target.is_like_windows { AllowHigherAlign::No } else { AllowHigherAlign::Yes },
894            ForceRightAdjust::No,
895        ),
896        "aarch64" | "arm64ec" if target.is_like_windows || target.is_like_darwin => {
897            emit_ptr_va_arg(
898                bx,
899                addr,
900                target_ty,
901                PassMode::Direct,
902                SlotSize::Bytes8,
903                if target.is_like_windows { AllowHigherAlign::No } else { AllowHigherAlign::Yes },
904                ForceRightAdjust::No,
905            )
906        }
907        "aarch64" => emit_aapcs_va_arg(bx, addr, target_ty),
908        "s390x" => emit_s390x_va_arg(bx, addr, target_ty),
909        "powerpc" => emit_powerpc_va_arg(bx, addr, target_ty),
910        "powerpc64" | "powerpc64le" => emit_ptr_va_arg(
911            bx,
912            addr,
913            target_ty,
914            PassMode::Direct,
915            SlotSize::Bytes8,
916            AllowHigherAlign::Yes,
917            match &*target.arch {
918                "powerpc64" => ForceRightAdjust::Yes,
919                _ => ForceRightAdjust::No,
920            },
921        ),
922        // Windows x86_64
923        "x86_64" if target.is_like_windows => {
924            let target_ty_size = bx.cx.size_of(target_ty).bytes();
925            emit_ptr_va_arg(
926                bx,
927                addr,
928                target_ty,
929                if target_ty_size > 8 || !target_ty_size.is_power_of_two() {
930                    PassMode::Indirect
931                } else {
932                    PassMode::Direct
933                },
934                SlotSize::Bytes8,
935                AllowHigherAlign::No,
936                ForceRightAdjust::No,
937            )
938        }
939        // This includes `target.is_like_darwin`, which on x86_64 targets is like sysv64.
940        "x86_64" => emit_x86_64_sysv64_va_arg(bx, addr, target_ty),
941        "xtensa" => emit_xtensa_va_arg(bx, addr, target_ty),
942        // For all other architecture/OS combinations fall back to using
943        // the LLVM va_arg instruction.
944        // https://llvm.org/docs/LangRef.html#va-arg-instruction
945        _ => bx.va_arg(addr.immediate(), bx.cx.layout_of(target_ty).llvm_type(bx.cx)),
946    }
947}