Skip to content

Commit 5b5e357

Browse files
authored
Cranelift: aarch64: fix preserve-all to save full vector registers. (#12944)
It turns out that the `preserve-all` ABI was only preserving some, not all (false advertising!): specifically, the aarch64 ABI code was continuing to use low-64-bit loads/stores on vector/float registers, as it does for the ordinary AAPCS (SysV) calling convention. `PreserveAll` specifically indicates that the *entire* vector register should be saved; so now we do that.
1 parent dad2293 commit 5b5e357

2 files changed

Lines changed: 270 additions & 165 deletions

File tree

cranelift/codegen/src/isa/aarch64/abi.rs

Lines changed: 139 additions & 98 deletions
Original file line numberDiff line numberDiff line change
@@ -38,7 +38,10 @@ impl From<StackAMode> for AMode {
3838

3939
// Returns the size of stack space needed to store the
4040
// `clobbered_callee_saved` registers.
41-
fn compute_clobber_size(clobbered_callee_saves: &[Writable<RealReg>]) -> u32 {
41+
fn compute_clobber_size(
42+
call_conv: isa::CallConv,
43+
clobbered_callee_saves: &[Writable<RealReg>],
44+
) -> u32 {
4245
let mut int_regs = 0;
4346
let mut vec_regs = 0;
4447
for &reg in clobbered_callee_saves {
@@ -55,16 +58,22 @@ fn compute_clobber_size(clobbered_callee_saves: &[Writable<RealReg>]) -> u32 {
5558

5659
// Round up to multiple of 2, to keep 16-byte stack alignment.
5760
let int_save_bytes = (int_regs + (int_regs & 1)) * 8;
58-
// The Procedure Call Standard for the Arm 64-bit Architecture
59-
// (AAPCS64, including several related ABIs such as the one used by
60-
// Windows) mandates saving only the bottom 8 bytes of the vector
61-
// registers, so we round up the number of registers to ensure
62-
// proper stack alignment (similarly to the situation with
63-
// `int_reg`).
64-
let vec_reg_size = 8;
65-
let vec_save_padding = vec_regs & 1;
66-
// FIXME: SVE: ABI is different to Neon, so do we treat all vec regs as Z-regs?
67-
let vec_save_bytes = (vec_regs + vec_save_padding) * vec_reg_size;
61+
let vec_save_bytes = if call_conv == isa::CallConv::PreserveAll {
62+
// In the PreserveAll ABI, we save the entire vector register,
63+
// i.e., all 128 bits.
64+
vec_regs * 16
65+
} else {
66+
// The Procedure Call Standard for the Arm 64-bit Architecture
67+
// (AAPCS64, including several related ABIs such as the one used by
68+
// Windows) mandates saving only the bottom 8 bytes of the vector
69+
// registers, so we round up the number of registers to ensure
70+
// proper stack alignment (similarly to the situation with
71+
// `int_reg`).
72+
let vec_reg_size = 8;
73+
let vec_save_padding = vec_regs & 1;
74+
// FIXME: SVE: ABI is different to Neon, so do we treat all vec regs as Z-regs?
75+
(vec_regs + vec_save_padding) * vec_reg_size
76+
};
6877

6978
int_save_bytes + vec_save_bytes
7079
}
@@ -714,7 +723,7 @@ impl ABIMachineSpec for AArch64MachineDeps {
714723
}
715724

716725
fn gen_clobber_save(
717-
_call_conv: isa::CallConv,
726+
call_conv: isa::CallConv,
718727
flags: &settings::Flags,
719728
frame_layout: &FrameLayout,
720729
) -> SmallVec<[Inst; 16]> {
@@ -857,74 +866,91 @@ impl ABIMachineSpec for AArch64MachineDeps {
857866
}
858867
}
859868

860-
let store_vec_reg = |rd| Inst::FpuStore64 {
861-
rd,
862-
mem: AMode::SPPreIndexed {
863-
simm9: SImm9::maybe_from_i64(-clobber_offset_change).unwrap(),
864-
},
865-
flags: MemFlags::trusted(),
866-
};
867-
let iter = clobbered_vec.chunks_exact(2);
869+
if call_conv == isa::CallConv::PreserveAll {
870+
// Store full vector registers in PreserveAll convention.
871+
for reg in clobbered_vec.iter().rev() {
872+
let inst = Inst::FpuStore128 {
873+
rd: reg.to_reg().into(),
874+
mem: AMode::SPPreIndexed {
875+
simm9: SImm9::maybe_from_i64(-clobber_offset_change).unwrap(),
876+
},
877+
flags: MemFlags::trusted(),
878+
};
879+
insts.push(inst);
880+
// N.B.: no unwind info: we don't have a way to
881+
// represent "full register" anyway.
882+
}
883+
} else {
884+
let store_vec_reg_half = |rd| Inst::FpuStore64 {
885+
rd,
886+
mem: AMode::SPPreIndexed {
887+
simm9: SImm9::maybe_from_i64(-clobber_offset_change).unwrap(),
888+
},
889+
flags: MemFlags::trusted(),
890+
};
891+
let iter = clobbered_vec.chunks_exact(2);
868892

869-
if let [rd] = iter.remainder() {
870-
let rd: Reg = rd.to_reg().into();
893+
if let [rd] = iter.remainder() {
894+
let rd: Reg = rd.to_reg().into();
871895

872-
debug_assert_eq!(rd.class(), RegClass::Float);
873-
insts.push(store_vec_reg(rd));
896+
debug_assert_eq!(rd.class(), RegClass::Float);
897+
insts.push(store_vec_reg_half(rd));
874898

875-
if flags.unwind_info() {
876-
clobber_offset -= clobber_offset_change as u32;
877-
insts.push(Inst::Unwind {
878-
inst: UnwindInst::SaveReg {
879-
clobber_offset,
880-
reg: rd.to_real_reg().unwrap(),
881-
},
882-
});
899+
if flags.unwind_info() {
900+
clobber_offset -= clobber_offset_change as u32;
901+
insts.push(Inst::Unwind {
902+
inst: UnwindInst::SaveReg {
903+
clobber_offset,
904+
reg: rd.to_real_reg().unwrap(),
905+
},
906+
});
907+
}
883908
}
884-
}
885909

886-
let store_vec_reg_pair = |rt, rt2| {
887-
let clobber_offset_change = 16;
910+
let store_vec_reg_half_pair = |rt, rt2| {
911+
let clobber_offset_change = 16;
888912

889-
(
890-
Inst::FpuStoreP64 {
891-
rt,
892-
rt2,
893-
mem: PairAMode::SPPreIndexed {
894-
simm7: SImm7Scaled::maybe_from_i64(-clobber_offset_change, F64).unwrap(),
913+
(
914+
Inst::FpuStoreP64 {
915+
rt,
916+
rt2,
917+
mem: PairAMode::SPPreIndexed {
918+
simm7: SImm7Scaled::maybe_from_i64(-clobber_offset_change, F64)
919+
.unwrap(),
920+
},
921+
flags: MemFlags::trusted(),
895922
},
896-
flags: MemFlags::trusted(),
897-
},
898-
clobber_offset_change as u32,
899-
)
900-
};
901-
let mut iter = iter.rev();
923+
clobber_offset_change as u32,
924+
)
925+
};
926+
let mut iter = iter.rev();
902927

903-
while let Some([rt, rt2]) = iter.next() {
904-
let rt: Reg = rt.to_reg().into();
905-
let rt2: Reg = rt2.to_reg().into();
928+
while let Some([rt, rt2]) = iter.next() {
929+
let rt: Reg = rt.to_reg().into();
930+
let rt2: Reg = rt2.to_reg().into();
906931

907-
debug_assert_eq!(rt.class(), RegClass::Float);
908-
debug_assert_eq!(rt2.class(), RegClass::Float);
932+
debug_assert_eq!(rt.class(), RegClass::Float);
933+
debug_assert_eq!(rt2.class(), RegClass::Float);
909934

910-
let (inst, clobber_offset_change) = store_vec_reg_pair(rt, rt2);
935+
let (inst, clobber_offset_change) = store_vec_reg_half_pair(rt, rt2);
911936

912-
insts.push(inst);
937+
insts.push(inst);
913938

914-
if flags.unwind_info() {
915-
clobber_offset -= clobber_offset_change;
916-
insts.push(Inst::Unwind {
917-
inst: UnwindInst::SaveReg {
918-
clobber_offset,
919-
reg: rt.to_real_reg().unwrap(),
920-
},
921-
});
922-
insts.push(Inst::Unwind {
923-
inst: UnwindInst::SaveReg {
924-
clobber_offset: clobber_offset + clobber_offset_change / 2,
925-
reg: rt2.to_real_reg().unwrap(),
926-
},
927-
});
939+
if flags.unwind_info() {
940+
clobber_offset -= clobber_offset_change;
941+
insts.push(Inst::Unwind {
942+
inst: UnwindInst::SaveReg {
943+
clobber_offset,
944+
reg: rt.to_real_reg().unwrap(),
945+
},
946+
});
947+
insts.push(Inst::Unwind {
948+
inst: UnwindInst::SaveReg {
949+
clobber_offset: clobber_offset + clobber_offset_change / 2,
950+
reg: rt2.to_real_reg().unwrap(),
951+
},
952+
});
953+
}
928954
}
929955
}
930956

@@ -943,7 +969,7 @@ impl ABIMachineSpec for AArch64MachineDeps {
943969
}
944970

945971
fn gen_clobber_restore(
946-
_call_conv: isa::CallConv,
972+
call_conv: isa::CallConv,
947973
_flags: &settings::Flags,
948974
frame_layout: &FrameLayout,
949975
) -> SmallVec<[Inst; 16]> {
@@ -956,40 +982,55 @@ impl ABIMachineSpec for AArch64MachineDeps {
956982
insts.extend(Self::gen_sp_reg_adjust(stack_size as i32));
957983
}
958984

959-
let load_vec_reg = |rd| Inst::FpuLoad64 {
960-
rd,
961-
mem: AMode::SPPostIndexed {
962-
simm9: SImm9::maybe_from_i64(16).unwrap(),
963-
},
964-
flags: MemFlags::trusted(),
965-
};
966-
let load_vec_reg_pair = |rt, rt2| Inst::FpuLoadP64 {
967-
rt,
968-
rt2,
969-
mem: PairAMode::SPPostIndexed {
970-
simm7: SImm7Scaled::maybe_from_i64(16, F64).unwrap(),
971-
},
972-
flags: MemFlags::trusted(),
973-
};
985+
if call_conv == isa::CallConv::PreserveAll {
986+
for reg in clobbered_vec.iter() {
987+
let inst = Inst::FpuLoad128 {
988+
rd: reg.map(|r| r.into()),
989+
mem: AMode::SPPostIndexed {
990+
simm9: SImm9::maybe_from_i64(16).unwrap(),
991+
},
992+
flags: MemFlags::trusted(),
993+
};
994+
insts.push(inst);
995+
// N.B.: no unwind info; we don't have a way to
996+
// represent "full vector register saved" anyway.
997+
}
998+
} else {
999+
let load_vec_reg_half = |rd| Inst::FpuLoad64 {
1000+
rd,
1001+
mem: AMode::SPPostIndexed {
1002+
simm9: SImm9::maybe_from_i64(16).unwrap(),
1003+
},
1004+
flags: MemFlags::trusted(),
1005+
};
1006+
let load_vec_reg_half_pair = |rt, rt2| Inst::FpuLoadP64 {
1007+
rt,
1008+
rt2,
1009+
mem: PairAMode::SPPostIndexed {
1010+
simm7: SImm7Scaled::maybe_from_i64(16, F64).unwrap(),
1011+
},
1012+
flags: MemFlags::trusted(),
1013+
};
9741014

975-
let mut iter = clobbered_vec.chunks_exact(2);
1015+
let mut iter = clobbered_vec.chunks_exact(2);
9761016

977-
while let Some([rt, rt2]) = iter.next() {
978-
let rt: Writable<Reg> = rt.map(|r| r.into());
979-
let rt2: Writable<Reg> = rt2.map(|r| r.into());
1017+
while let Some([rt, rt2]) = iter.next() {
1018+
let rt: Writable<Reg> = rt.map(|r| r.into());
1019+
let rt2: Writable<Reg> = rt2.map(|r| r.into());
9801020

981-
debug_assert_eq!(rt.to_reg().class(), RegClass::Float);
982-
debug_assert_eq!(rt2.to_reg().class(), RegClass::Float);
983-
insts.push(load_vec_reg_pair(rt, rt2));
984-
}
1021+
debug_assert_eq!(rt.to_reg().class(), RegClass::Float);
1022+
debug_assert_eq!(rt2.to_reg().class(), RegClass::Float);
1023+
insts.push(load_vec_reg_half_pair(rt, rt2));
1024+
}
9851025

986-
debug_assert!(iter.remainder().len() <= 1);
1026+
debug_assert!(iter.remainder().len() <= 1);
9871027

988-
if let [rd] = iter.remainder() {
989-
let rd: Writable<Reg> = rd.map(|r| r.into());
1028+
if let [rd] = iter.remainder() {
1029+
let rd: Writable<Reg> = rd.map(|r| r.into());
9901030

991-
debug_assert_eq!(rd.to_reg().class(), RegClass::Float);
992-
insts.push(load_vec_reg(rd));
1031+
debug_assert_eq!(rd.to_reg().class(), RegClass::Float);
1032+
insts.push(load_vec_reg_half(rd));
1033+
}
9931034
}
9941035

9951036
let mut iter = clobbered_int.chunks_exact(2);
@@ -1150,7 +1191,7 @@ impl ABIMachineSpec for AArch64MachineDeps {
11501191
regs.sort_unstable();
11511192

11521193
// Compute clobber size.
1153-
let clobber_size = compute_clobber_size(&regs);
1194+
let clobber_size = compute_clobber_size(call_conv, &regs);
11541195

11551196
// Compute linkage frame size.
11561197
let setup_area_size = if flags.preserve_frame_pointers()

0 commit comments

Comments
 (0)