Skip to content

Commit 179a6ba

Browse files
4astAlexei Starovoitov
andauthored
bpf: Support aggregate returns (#190894)
i128 and 16 byte structs are now returned in R0:R2 which matches x86 that returns { i64, i32 } in RAX:RDX. Both skip one register in between (RCX / R1). Signed-off-by: Alexei Starovoitov <ast@kernel.org> Co-authored-by: Alexei Starovoitov <ast@kernel.org>
1 parent fd09e8f commit 179a6ba

8 files changed

Lines changed: 151 additions & 37 deletions

File tree

llvm/lib/Target/BPF/BPFCallingConv.td

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@
1111
//===----------------------------------------------------------------------===//
1212

1313
// BPF 64-bit C return-value convention.
14-
def RetCC_BPF64 : CallingConv<[CCIfType<[i64], CCAssignToReg<[R0]>>]>;
14+
def RetCC_BPF64 : CallingConv<[CCIfType<[i64], CCAssignToReg<[R0, R2]>>]>;
1515

1616
// BPF 64-bit C Calling convention.
1717
def CC_BPF64 : CallingConv<[
@@ -27,8 +27,8 @@ def CC_BPF64 : CallingConv<[
2727

2828
// Return-value convention when -mattr=+alu32 enabled
2929
def RetCC_BPF32 : CallingConv<[
30-
CCIfType<[i32], CCAssignToRegWithShadow<[W0], [R0]>>,
31-
CCIfType<[i64], CCAssignToRegWithShadow<[R0], [W0]>>
30+
CCIfType<[i32], CCAssignToRegWithShadow<[W0, W2], [R0, R2]>>,
31+
CCIfType<[i64], CCAssignToRegWithShadow<[R0, R2], [W0, W2]>>
3232
]>;
3333

3434
// Calling convention when -mattr=+alu32 enabled

llvm/lib/Target/BPF/BPFISelLowering.cpp

Lines changed: 30 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -126,9 +126,9 @@ BPFTargetLowering::BPFTargetLowering(const TargetMachine &TM,
126126
setOperationAction(ISD::SMUL_LOHI, VT, Expand);
127127
setOperationAction(ISD::ROTR, VT, Expand);
128128
setOperationAction(ISD::ROTL, VT, Expand);
129-
setOperationAction(ISD::SHL_PARTS, VT, Expand);
130-
setOperationAction(ISD::SRL_PARTS, VT, Expand);
131-
setOperationAction(ISD::SRA_PARTS, VT, Expand);
129+
setOperationAction(ISD::SHL_PARTS, VT, Custom);
130+
setOperationAction(ISD::SRL_PARTS, VT, Custom);
131+
setOperationAction(ISD::SRA_PARTS, VT, Custom);
132132
setOperationAction(ISD::CTPOP, VT, Expand);
133133
setOperationAction(ISD::CTTZ, VT, Expand);
134134
setOperationAction(ISD::CTLZ, VT, Expand);
@@ -363,6 +363,10 @@ SDValue BPFTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
363363
case ISD::SDIV:
364364
case ISD::SREM:
365365
return LowerSDIVSREM(Op, DAG);
366+
case ISD::SHL_PARTS:
367+
case ISD::SRL_PARTS:
368+
case ISD::SRA_PARTS:
369+
return LowerShiftParts(Op, DAG);
366370
case ISD::DYNAMIC_STACKALLOC:
367371
return LowerDYNAMIC_STACKALLOC(Op, DAG);
368372
case ISD::ATOMIC_LOAD:
@@ -448,9 +452,6 @@ SDValue BPFTargetLowering::LowerFormalArguments(
448452
fail(DL, DAG, "stack arguments are not supported");
449453
if (IsVarArg)
450454
fail(DL, DAG, "variadic functions are not supported");
451-
if (MF.getFunction().hasStructRetAttr())
452-
fail(DL, DAG, "aggregate returns are not supported");
453-
454455
return Chain;
455456
}
456457

@@ -565,12 +566,13 @@ SDValue BPFTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
565566
Callee = DAG.getTargetGlobalAddress(G->getGlobal(), CLI.DL, PtrVT,
566567
G->getOffset(), 0);
567568
} else if (ExternalSymbolSDNode *E = dyn_cast<ExternalSymbolSDNode>(Callee)) {
568-
if (StringRef(E->getSymbol()) != BPF_TRAP) {
569-
Callee = DAG.getTargetExternalSymbol(E->getSymbol(), PtrVT, 0);
570-
fail(CLI.DL, DAG,
571-
Twine("A call to built-in function '" + StringRef(E->getSymbol()) +
572-
"' is not supported."));
573-
}
569+
Callee = DAG.getTargetExternalSymbol(E->getSymbol(), PtrVT, 0);
570+
StringRef Sym = E->getSymbol();
571+
if (Sym != BPF_TRAP && Sym != "__multi3" && Sym != "__divti3" &&
572+
Sym != "__modti3" && Sym != "__udivti3" && Sym != "__umodti3")
573+
fail(
574+
CLI.DL, DAG,
575+
Twine("A call to built-in function '" + Sym + "' is not supported."));
574576
}
575577

576578
// Returns a chain & a flag for retval copy to use.
@@ -633,11 +635,6 @@ BPFTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
633635
// CCState - Info about the registers and stack slot.
634636
CCState CCInfo(CallConv, IsVarArg, MF, RVLocs, *DAG.getContext());
635637

636-
if (MF.getFunction().getReturnType()->isAggregateType()) {
637-
fail(DL, DAG, "aggregate returns are not supported");
638-
return DAG.getNode(Opc, DL, MVT::Other, Chain);
639-
}
640-
641638
// Analize return values.
642639
CCInfo.AnalyzeReturn(Outs, getHasAlu32() ? RetCC_BPF32 : RetCC_BPF64);
643640

@@ -677,13 +674,6 @@ SDValue BPFTargetLowering::LowerCallResult(
677674
SmallVector<CCValAssign, 16> RVLocs;
678675
CCState CCInfo(CallConv, IsVarArg, MF, RVLocs, *DAG.getContext());
679676

680-
if (Ins.size() > 1) {
681-
fail(DL, DAG, "only small returns supported");
682-
for (auto &In : Ins)
683-
InVals.push_back(DAG.getConstant(0, DL, In.VT));
684-
return DAG.getCopyFromReg(Chain, DL, 1, Ins[0].VT, InGlue).getValue(1);
685-
}
686-
687677
CCInfo.AnalyzeCallResult(Ins, getHasAlu32() ? RetCC_BPF32 : RetCC_BPF64);
688678

689679
// Copy all of the result registers out of their specified physreg.
@@ -718,6 +708,13 @@ SDValue BPFTargetLowering::LowerSDIVSREM(SDValue Op, SelectionDAG &DAG) const {
718708
return DAG.getUNDEF(Op->getValueType(0));
719709
}
720710

711+
SDValue BPFTargetLowering::LowerShiftParts(SDValue Op,
712+
SelectionDAG &DAG) const {
713+
SDValue Lo, Hi;
714+
expandShiftParts(Op.getNode(), Lo, Hi, DAG);
715+
return DAG.getMergeValues({Lo, Hi}, SDLoc(Op));
716+
}
717+
721718
SDValue BPFTargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op,
722719
SelectionDAG &DAG) const {
723720
SDLoc DL(Op);
@@ -1196,3 +1193,12 @@ bool BPFTargetLowering::isLegalAddressingMode(const DataLayout &DL,
11961193

11971194
return true;
11981195
}
1196+
1197+
bool BPFTargetLowering::CanLowerReturn(
1198+
CallingConv::ID CallConv, MachineFunction &MF, bool IsVarArg,
1199+
const SmallVectorImpl<ISD::OutputArg> &Outs, LLVMContext &Context,
1200+
const Type *RetTy) const {
1201+
SmallVector<CCValAssign, 16> RVLocs;
1202+
CCState CCInfo(CallConv, IsVarArg, MF, RVLocs, Context);
1203+
return CCInfo.CheckReturn(Outs, getHasAlu32() ? RetCC_BPF32 : RetCC_BPF64);
1204+
}

llvm/lib/Target/BPF/BPFISelLowering.h

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -69,6 +69,7 @@ class BPFTargetLowering : public TargetLowering {
6969
bool AllowsMisalignedMemAccess;
7070

7171
SDValue LowerSDIVSREM(SDValue Op, SelectionDAG &DAG) const;
72+
SDValue LowerShiftParts(SDValue Op, SelectionDAG &DAG) const;
7273
SDValue LowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) const;
7374
SDValue LowerBR_CC(SDValue Op, SelectionDAG &DAG) const;
7475
SDValue LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const;
@@ -163,6 +164,11 @@ class BPFTargetLowering : public TargetLowering {
163164
MachineBasicBlock *
164165
EmitInstrWithCustomInserterLDimm64(MachineInstr &MI,
165166
MachineBasicBlock *BB) const;
167+
168+
bool CanLowerReturn(CallingConv::ID CallConv, MachineFunction &MF,
169+
bool IsVarArg,
170+
const SmallVectorImpl<ISD::OutputArg> &Outs,
171+
LLVMContext &Context, const Type *RetTy) const override;
166172
};
167173
}
168174

llvm/test/CodeGen/BPF/atomic-oversize.ll

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,4 @@
11
; RUN: llc -mtriple=bpf < %s | FileCheck %s
2-
; XFAIL: *
3-
; Doesn't currently build, with error 'only small returns supported'.
42

53
define void @test(ptr %a) nounwind {
64
; CHECK-LABEL: test:
Lines changed: 44 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,44 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6
2+
; RUN: llc -march=bpfel < %s | FileCheck %s
3+
; This test used to fail with "unable to allocate function return #1"
4+
5+
6+
define i64 @bar(i64 %a, i64 %b) {
7+
; CHECK-LABEL: bar:
8+
; CHECK: # %bb.0: # %entry
9+
; CHECK-NEXT: call foo
10+
; CHECK-NEXT: r2 += r0
11+
; CHECK-NEXT: r0 = r2
12+
; CHECK-NEXT: exit
13+
entry:
14+
%c = call i128 @foo(i64 %a, i64 %b)
15+
%d = lshr i128 %c, 64
16+
%e = trunc i128 %d to i64
17+
%f = trunc i128 %c to i64
18+
%g = add i64 %e, %f
19+
ret i64 %g
20+
}
21+
22+
23+
define i128 @foo(i64 %a, i64 %b) {
24+
; CHECK-LABEL: foo:
25+
; CHECK: # %bb.0: # %entry
26+
; CHECK-NEXT: r0 = r1
27+
; CHECK-NEXT: r0 += r2
28+
; CHECK-NEXT: w3 = 1
29+
; CHECK-NEXT: if r0 < r1 goto .LBB1_2
30+
; CHECK-NEXT: # %bb.1: # %entry
31+
; CHECK-NEXT: w3 = 0
32+
; CHECK-NEXT: .LBB1_2: # %entry
33+
; CHECK-NEXT: r2 s>>= 63
34+
; CHECK-NEXT: r1 s>>= 63
35+
; CHECK-NEXT: r1 += r2
36+
; CHECK-NEXT: r1 += r3
37+
; CHECK-NEXT: r2 = r1
38+
; CHECK-NEXT: exit
39+
entry:
40+
%a1 = sext i64 %a to i128
41+
%b1 = sext i64 %b to i128
42+
%c = add i128 %a1, %b1
43+
ret i128 %c
44+
}

llvm/test/CodeGen/BPF/sret.ll

Lines changed: 43 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,43 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6
2+
; RUN: llc -march=bpfel -mcpu=v4 < %s | FileCheck %s
3+
4+
define void @ret_s16(ptr sret([16 x i8]) %result) {
5+
; CHECK-LABEL: ret_s16:
6+
; CHECK: # %bb.0:
7+
; CHECK-NEXT: *(u64 *)(r1 + 8) = 2
8+
; CHECK-NEXT: *(u64 *)(r1 + 0) = 1
9+
; CHECK-NEXT: exit
10+
store i64 1, ptr %result
11+
%p2 = getelementptr i8, ptr %result, i64 8
12+
store i64 2, ptr %p2
13+
ret void
14+
}
15+
16+
define void @ret_s24(ptr sret([24 x i8]) %result) {
17+
; CHECK-LABEL: ret_s24:
18+
; CHECK: # %bb.0:
19+
; CHECK-NEXT: *(u64 *)(r1 + 16) = 3
20+
; CHECK-NEXT: *(u64 *)(r1 + 8) = 2
21+
; CHECK-NEXT: *(u64 *)(r1 + 0) = 1
22+
; CHECK-NEXT: exit
23+
store i64 1, ptr %result
24+
%p2 = getelementptr i8, ptr %result, i64 8
25+
store i64 2, ptr %p2
26+
%p3 = getelementptr i8, ptr %result, i64 16
27+
store i64 3, ptr %p3
28+
ret void
29+
}
30+
31+
define i64 @caller() {
32+
; CHECK-LABEL: caller:
33+
; CHECK: # %bb.0:
34+
; CHECK-NEXT: r1 = r10
35+
; CHECK-NEXT: r1 += -16
36+
; CHECK-NEXT: call ret_s16
37+
; CHECK-NEXT: r0 = *(u64 *)(r10 - 16)
38+
; CHECK-NEXT: exit
39+
%s = alloca [16 x i8], align 8
40+
call void @ret_s16(ptr sret([16 x i8]) %s)
41+
%v = load i64, ptr %s
42+
ret i64 %v
43+
}
Lines changed: 17 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,21 @@
1-
; RUN: not llc -mtriple=bpf < %s 2> %t1
2-
; RUN: FileCheck %s < %t1
3-
; CHECK: error: <unknown>:0:0: in function bar { i64, i32 } (i32, i32, i32, i32, i32): aggregate returns are not supported
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6
2+
; RUN: llc -mtriple=bpf < %s | FileCheck %s
43

54
%struct.S = type { i32, i32, i32 }
65

76
@s = common global %struct.S zeroinitializer, align 4
87

98
; Function Attrs: nounwind readonly uwtable
109
define { i64, i32 } @bar(i32 %a, i32 %b, i32 %c, i32 %d, i32 %e) #0 {
10+
; CHECK-LABEL: bar:
11+
; CHECK: # %bb.0: # %entry
12+
; CHECK-NEXT: r1 = s ll
13+
; CHECK-NEXT: w2 = *(u32 *)(r1 + 0)
14+
; CHECK-NEXT: w0 = *(u32 *)(r1 + 4)
15+
; CHECK-NEXT: r0 <<= 32
16+
; CHECK-NEXT: r0 |= r2
17+
; CHECK-NEXT: w2 = *(u32 *)(r1 + 8)
18+
; CHECK-NEXT: exit
1119
entry:
1220
%retval.sroa.0.0.copyload = load i64, ptr @s, align 4
1321
%retval.sroa.2.0.copyload = load i32, ptr getelementptr inbounds (%struct.S, ptr @s, i64 0, i32 2), align 4
@@ -16,12 +24,16 @@ entry:
1624
ret { i64, i32 } %.fca.1.insert
1725
}
1826

19-
; CHECK: error: <unknown>:0:0: in function baz void (ptr): aggregate returns are not supported
20-
2127
%struct.B = type { [100 x i64] }
2228

2329
; Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(none)
2430
define dso_local void @baz(ptr noalias nocapture sret(%struct.B) align 8 %agg.result) local_unnamed_addr #0 {
31+
; CHECK-LABEL: baz:
32+
; CHECK: .Lbaz$local:
33+
; CHECK-NEXT: .type .Lbaz$local,@function
34+
; CHECK-NEXT: .cfi_startproc
35+
; CHECK-NEXT: # %bb.0: # %entry
36+
; CHECK-NEXT: exit
2537
entry:
2638
ret void
2739
}

llvm/test/CodeGen/BPF/struct_ret2.ll

Lines changed: 8 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,14 @@
1-
; RUN: not llc -mtriple=bpf < %s 2> %t1
2-
; RUN: FileCheck %s < %t1
3-
; CHECK: only small returns
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6
2+
; RUN: llc -mtriple=bpf < %s | FileCheck %s
43

54
; Function Attrs: nounwind uwtable
65
define { i64, i32 } @foo(i32 %a, i32 %b, i32 %c) #0 {
6+
; CHECK-LABEL: foo:
7+
; CHECK: # %bb.0: # %entry
8+
; CHECK-NEXT: w4 = 1
9+
; CHECK-NEXT: w5 = 2
10+
; CHECK-NEXT: call bar
11+
; CHECK-NEXT: exit
712
entry:
813
%call = tail call { i64, i32 } @bar(i32 %a, i32 %b, i32 %c, i32 1, i32 2) #3
914
ret { i64, i32 } %call

0 commit comments

Comments
 (0)