Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions llvm/include/llvm/Transforms/Utils/LoopUtils.h
Original file line number Diff line number Diff line change
Expand Up @@ -491,6 +491,12 @@ LLVM_ABI Value *getOrderedReduction(IRBuilderBase &Builder, Value *Acc,
Value *Src, unsigned Op,
RecurKind MinMaxKind = RecurKind::None);

/// Expand a scalable vector reduction into a runtime loop that applies
/// \p RdxOpcode element by element, starting from \p Acc as the initial
/// accumulator value (typically the reduction identity).
LLVM_ABI Value *expandReductionViaLoop(IRBuilderBase &Builder, Value *Vec,
unsigned RdxOpcode, Value *Acc);

/// Generates a vector reduction using shufflevectors to reduce the value.
/// Fast-math-flags are propagated using the IRBuilder's setting.
LLVM_ABI Value *getShuffleReduction(IRBuilderBase &Builder, Value *Src,
Expand Down
12 changes: 11 additions & 1 deletion llvm/lib/CodeGen/ExpandReductions.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -75,6 +75,10 @@ bool expandReductions(Function &F, const TargetTransformInfo *TTI) {
Value *Acc = II->getArgOperand(0);
Value *Vec = II->getArgOperand(1);
unsigned RdxOpcode = getArithmeticReductionInstruction(ID);
if (isa<ScalableVectorType>(Vec->getType())) {
Comment thread
wangpc-pp marked this conversation as resolved.
Rdx = expandReductionViaLoop(Builder, Vec, RdxOpcode, Acc);
break;
}
if (!FMF.allowReassoc())
Rdx = getOrderedReduction(Builder, Acc, Vec, RdxOpcode, RK);
else {
Expand Down Expand Up @@ -125,10 +129,16 @@ bool expandReductions(Function &F, const TargetTransformInfo *TTI) {
case Intrinsic::vector_reduce_umax:
case Intrinsic::vector_reduce_umin: {
Value *Vec = II->getArgOperand(0);
unsigned RdxOpcode = getArithmeticReductionInstruction(ID);
if (isa<ScalableVectorType>(Vec->getType())) {
Comment thread
wangpc-pp marked this conversation as resolved.
Type *EltTy = Vec->getType()->getScalarType();
Value *Ident = getReductionIdentity(ID, EltTy, FMF);
Rdx = expandReductionViaLoop(Builder, Vec, RdxOpcode, Ident);
break;
}
if (!isPowerOf2_32(
cast<FixedVectorType>(Vec->getType())->getNumElements()))
continue;
unsigned RdxOpcode = getArithmeticReductionInstruction(ID);
Rdx = getShuffleReduction(Builder, Vec, RdxOpcode, RS, RK);
break;
}
Expand Down
43 changes: 43 additions & 0 deletions llvm/lib/Transforms/Utils/LoopUtils.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1325,6 +1325,49 @@ Value *llvm::getOrderedReduction(IRBuilderBase &Builder, Value *Acc, Value *Src,
return Result;
}

Value *llvm::expandReductionViaLoop(IRBuilderBase &Builder, Value *Vec,
unsigned RdxOpcode, Value *Acc) {
auto *VTy = cast<VectorType>(Vec->getType());
Type *EltTy = VTy->getElementType();
Function *F = Builder.GetInsertBlock()->getParent();

const DataLayout &DL = F->getDataLayout();
Type *IdxTy = DL.getIndexType(EltTy->getContext(), 0);
unsigned MinElts = VTy->getElementCount().getKnownMinValue();
Value *NumElts = Builder.CreateVScale(IdxTy);
NumElts = Builder.CreateMul(NumElts, ConstantInt::get(IdxTy, MinElts));

BasicBlock *EntryBB = Builder.GetInsertBlock();
BasicBlock *LoopBB = BasicBlock::Create(F->getContext(), "rdx.loop", F);
BasicBlock *ExitBB =
EntryBB->splitBasicBlock(Builder.GetInsertPoint(), "rdx.exit");

EntryBB->getTerminator()->eraseFromParent();
Builder.SetInsertPoint(EntryBB);
Builder.CreateBr(LoopBB);

Builder.SetInsertPoint(LoopBB);
PHINode *IV = Builder.CreatePHI(IdxTy, 2, "rdx.iv");
PHINode *AccPhi = Builder.CreatePHI(EltTy, 2, "rdx.acc");
IV->addIncoming(ConstantInt::get(IdxTy, 0), EntryBB);
AccPhi->addIncoming(Acc, EntryBB);

Value *Elt = Builder.CreateExtractElement(Vec, IV);
Value *Res = Builder.CreateBinOp((Instruction::BinaryOps)RdxOpcode, AccPhi,
Elt, "rdx.op");

Value *NextIV =
Builder.CreateNUWAdd(IV, ConstantInt::get(IdxTy, 1), "rdx.next");
IV->addIncoming(NextIV, LoopBB);
AccPhi->addIncoming(Res, LoopBB);

Value *Done = Builder.CreateICmpEQ(NextIV, NumElts, "rdx.done");
Builder.CreateCondBr(Done, ExitBB, LoopBB);

Builder.SetInsertPoint(ExitBB, ExitBB->begin());
return Res;
}

// Helper to generate a log2 shuffle reduction.
Value *llvm::getShuffleReduction(IRBuilderBase &Builder, Value *Src,
unsigned Op,
Expand Down
140 changes: 140 additions & 0 deletions llvm/test/CodeGen/RISCV/rvv/vreductions-fp-sdnode.ll
Original file line number Diff line number Diff line change
Expand Up @@ -953,3 +953,143 @@ define half @vreduce_fmax_nxv12f16(<vscale x 12 x half> %v) {
%red = call half @llvm.vector.reduce.fmax.nxv12f16(<vscale x 12 x half> %v)
ret half %red
}

define float @vreduce_fmul_nxv1f32(<vscale x 1 x float> %v, float %s) {
; CHECK-LABEL: vreduce_fmul_nxv1f32:
; CHECK: # %bb.0:
; CHECK-NEXT: li a0, 0
; CHECK-NEXT: csrr a1, vlenb
; CHECK-NEXT: srli a1, a1, 3
; CHECK-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
; CHECK-NEXT: .LBB75_1: # %rdx.loop
; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
; CHECK-NEXT: vslidedown.vx v9, v8, a0
; CHECK-NEXT: vfmv.f.s fa5, v9
; CHECK-NEXT: addi a0, a0, 1
; CHECK-NEXT: fmul.s fa0, fa0, fa5
; CHECK-NEXT: bne a0, a1, .LBB75_1
; CHECK-NEXT: # %bb.2: # %rdx.exit
; CHECK-NEXT: ret
%red = call reassoc float @llvm.vector.reduce.fmul.nxv1f32(float %s, <vscale x 1 x float> %v)
ret float %red
}

define float @vreduce_ord_fmul_nxv1f32(<vscale x 1 x float> %v, float %s) {
; CHECK-LABEL: vreduce_ord_fmul_nxv1f32:
; CHECK: # %bb.0:
; CHECK-NEXT: li a0, 0
; CHECK-NEXT: csrr a1, vlenb
; CHECK-NEXT: srli a1, a1, 3
; CHECK-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
; CHECK-NEXT: .LBB76_1: # %rdx.loop
; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
; CHECK-NEXT: vslidedown.vx v9, v8, a0
; CHECK-NEXT: vfmv.f.s fa5, v9
; CHECK-NEXT: addi a0, a0, 1
; CHECK-NEXT: fmul.s fa0, fa0, fa5
; CHECK-NEXT: bne a0, a1, .LBB76_1
; CHECK-NEXT: # %bb.2: # %rdx.exit
; CHECK-NEXT: ret
%red = call float @llvm.vector.reduce.fmul.nxv1f32(float %s, <vscale x 1 x float> %v)
ret float %red
}

define float @vreduce_fmul_nxv2f32(<vscale x 2 x float> %v, float %s) {
; CHECK-LABEL: vreduce_fmul_nxv2f32:
; CHECK: # %bb.0:
; CHECK-NEXT: li a0, 0
; CHECK-NEXT: csrr a1, vlenb
; CHECK-NEXT: srli a1, a1, 2
; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; CHECK-NEXT: .LBB77_1: # %rdx.loop
; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
; CHECK-NEXT: vslidedown.vx v9, v8, a0
; CHECK-NEXT: vfmv.f.s fa5, v9
; CHECK-NEXT: addi a0, a0, 1
; CHECK-NEXT: fmul.s fa0, fa0, fa5
; CHECK-NEXT: bne a0, a1, .LBB77_1
; CHECK-NEXT: # %bb.2: # %rdx.exit
; CHECK-NEXT: ret
%red = call reassoc float @llvm.vector.reduce.fmul.nxv2f32(float %s, <vscale x 2 x float> %v)
ret float %red
}

define float @vreduce_ord_fmul_nxv2f32(<vscale x 2 x float> %v, float %s) {
; CHECK-LABEL: vreduce_ord_fmul_nxv2f32:
; CHECK: # %bb.0:
; CHECK-NEXT: li a0, 0
; CHECK-NEXT: csrr a1, vlenb
; CHECK-NEXT: srli a1, a1, 2
; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; CHECK-NEXT: .LBB78_1: # %rdx.loop
; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
; CHECK-NEXT: vslidedown.vx v9, v8, a0
; CHECK-NEXT: vfmv.f.s fa5, v9
; CHECK-NEXT: addi a0, a0, 1
; CHECK-NEXT: fmul.s fa0, fa0, fa5
; CHECK-NEXT: bne a0, a1, .LBB78_1
; CHECK-NEXT: # %bb.2: # %rdx.exit
; CHECK-NEXT: ret
%red = call float @llvm.vector.reduce.fmul.nxv2f32(float %s, <vscale x 2 x float> %v)
ret float %red
}

define float @vreduce_fmul_nxv4f32(<vscale x 4 x float> %v, float %s) {
; CHECK-LABEL: vreduce_fmul_nxv4f32:
; CHECK: # %bb.0:
; CHECK-NEXT: li a0, 0
; CHECK-NEXT: csrr a1, vlenb
; CHECK-NEXT: srli a1, a1, 1
; CHECK-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; CHECK-NEXT: .LBB79_1: # %rdx.loop
; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
; CHECK-NEXT: vslidedown.vx v10, v8, a0
; CHECK-NEXT: vfmv.f.s fa5, v10
; CHECK-NEXT: addi a0, a0, 1
; CHECK-NEXT: fmul.s fa0, fa0, fa5
; CHECK-NEXT: bne a0, a1, .LBB79_1
; CHECK-NEXT: # %bb.2: # %rdx.exit
; CHECK-NEXT: ret
%red = call reassoc float @llvm.vector.reduce.fmul.nxv4f32(float %s, <vscale x 4 x float> %v)
ret float %red
}

define double @vreduce_fmul_nxv1f64(<vscale x 1 x double> %v, double %s) {
; CHECK-LABEL: vreduce_fmul_nxv1f64:
; CHECK: # %bb.0:
; CHECK-NEXT: li a0, 0
; CHECK-NEXT: csrr a1, vlenb
; CHECK-NEXT: srli a1, a1, 3
; CHECK-NEXT: vsetivli zero, 1, e64, m1, ta, ma
; CHECK-NEXT: .LBB80_1: # %rdx.loop
; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
; CHECK-NEXT: vslidedown.vx v9, v8, a0
; CHECK-NEXT: vfmv.f.s fa5, v9
; CHECK-NEXT: addi a0, a0, 1
; CHECK-NEXT: fmul.d fa0, fa0, fa5
; CHECK-NEXT: bne a0, a1, .LBB80_1
; CHECK-NEXT: # %bb.2: # %rdx.exit
; CHECK-NEXT: ret
%red = call reassoc double @llvm.vector.reduce.fmul.nxv1f64(double %s, <vscale x 1 x double> %v)
ret double %red
}

define double @vreduce_ord_fmul_nxv1f64(<vscale x 1 x double> %v, double %s) {
; CHECK-LABEL: vreduce_ord_fmul_nxv1f64:
; CHECK: # %bb.0:
; CHECK-NEXT: li a0, 0
; CHECK-NEXT: csrr a1, vlenb
; CHECK-NEXT: srli a1, a1, 3
; CHECK-NEXT: vsetivli zero, 1, e64, m1, ta, ma
; CHECK-NEXT: .LBB81_1: # %rdx.loop
; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
; CHECK-NEXT: vslidedown.vx v9, v8, a0
; CHECK-NEXT: vfmv.f.s fa5, v9
; CHECK-NEXT: addi a0, a0, 1
; CHECK-NEXT: fmul.d fa0, fa0, fa5
; CHECK-NEXT: bne a0, a1, .LBB81_1
; CHECK-NEXT: # %bb.2: # %rdx.exit
; CHECK-NEXT: ret
%red = call double @llvm.vector.reduce.fmul.nxv1f64(double %s, <vscale x 1 x double> %v)
ret double %red
}
Loading