Skip to content

Commit 62521ba

Browse files
preamesgithub-actions[bot]
authored andcommitted
Automerge: [clang][riscv] Add tests for __builtin_reduce_X support [NFC] (#193082)
It turns out we already support use of the __builtin_reduce_ family of builtins on the builtin RVV types, but we have no test coverage which demonstrates this. Note that __builtin_reduce_mul is a bit of a cornercase as currently the clang part works just fine, but the lowering will crash since we don't have a vredprod-esq instruction. (See llvm/llvm-project#193094 for the lowering fix.)
2 parents 8caf773 + 66b86af commit 62521ba

File tree

1 file changed

+156
-0
lines changed

1 file changed

+156
-0
lines changed
Lines changed: 156 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,156 @@
1+
// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
2+
// RUN: %clang_cc1 -triple riscv32 -target-feature +v \
3+
// RUN: -disable-O0-optnone \
4+
// RUN: -emit-llvm -o - %s | opt -S -passes=sroa | FileCheck %s
5+
// RUN: %clang_cc1 -triple riscv64 -target-feature +v \
6+
// RUN: -disable-O0-optnone \
7+
// RUN: -emit-llvm -o - %s | opt -S -passes=sroa | FileCheck %s
8+
9+
// REQUIRES: riscv-registered-target
10+
11+
#include <riscv_vector.h>
12+
13+
// Integer reductions — signed
14+
15+
// CHECK-LABEL: @reduce_add_i32(
16+
// CHECK-NEXT: entry:
17+
// CHECK-NEXT: [[RDX_ADD:%.*]] = call i32 @llvm.vector.reduce.add.nxv2i32(<vscale x 2 x i32> [[A:%.*]])
18+
// CHECK-NEXT: ret i32 [[RDX_ADD]]
19+
//
20+
int reduce_add_i32(vint32m1_t a) {
21+
return __builtin_reduce_add(a);
22+
}
23+
24+
// CHECK-LABEL: @reduce_and_i32(
25+
// CHECK-NEXT: entry:
26+
// CHECK-NEXT: [[RDX_AND:%.*]] = call i32 @llvm.vector.reduce.and.nxv2i32(<vscale x 2 x i32> [[A:%.*]])
27+
// CHECK-NEXT: ret i32 [[RDX_AND]]
28+
//
29+
int reduce_and_i32(vint32m1_t a) {
30+
return __builtin_reduce_and(a);
31+
}
32+
33+
// CHECK-LABEL: @reduce_or_i32(
34+
// CHECK-NEXT: entry:
35+
// CHECK-NEXT: [[RDX_OR:%.*]] = call i32 @llvm.vector.reduce.or.nxv2i32(<vscale x 2 x i32> [[A:%.*]])
36+
// CHECK-NEXT: ret i32 [[RDX_OR]]
37+
//
38+
int reduce_or_i32(vint32m1_t a) {
39+
return __builtin_reduce_or(a);
40+
}
41+
42+
// CHECK-LABEL: @reduce_xor_i32(
43+
// CHECK-NEXT: entry:
44+
// CHECK-NEXT: [[RDX_XOR:%.*]] = call i32 @llvm.vector.reduce.xor.nxv2i32(<vscale x 2 x i32> [[A:%.*]])
45+
// CHECK-NEXT: ret i32 [[RDX_XOR]]
46+
//
47+
int reduce_xor_i32(vint32m1_t a) {
48+
return __builtin_reduce_xor(a);
49+
}
50+
51+
// CHECK-LABEL: @reduce_smax_i32(
52+
// CHECK-NEXT: entry:
53+
// CHECK-NEXT: [[RDX_MIN:%.*]] = call i32 @llvm.vector.reduce.smax.nxv2i32(<vscale x 2 x i32> [[A:%.*]])
54+
// CHECK-NEXT: ret i32 [[RDX_MIN]]
55+
//
56+
int reduce_smax_i32(vint32m1_t a) {
57+
return __builtin_reduce_max(a);
58+
}
59+
60+
// CHECK-LABEL: @reduce_smin_i32(
61+
// CHECK-NEXT: entry:
62+
// CHECK-NEXT: [[RDX_MIN:%.*]] = call i32 @llvm.vector.reduce.smin.nxv2i32(<vscale x 2 x i32> [[A:%.*]])
63+
// CHECK-NEXT: ret i32 [[RDX_MIN]]
64+
//
65+
int reduce_smin_i32(vint32m1_t a) {
66+
return __builtin_reduce_min(a);
67+
}
68+
69+
// Integer reductions — unsigned
70+
71+
// CHECK-LABEL: @reduce_umax_u32(
72+
// CHECK-NEXT: entry:
73+
// CHECK-NEXT: [[RDX_MIN:%.*]] = call i32 @llvm.vector.reduce.umax.nxv2i32(<vscale x 2 x i32> [[A:%.*]])
74+
// CHECK-NEXT: ret i32 [[RDX_MIN]]
75+
//
76+
unsigned reduce_umax_u32(vuint32m1_t a) {
77+
return __builtin_reduce_max(a);
78+
}
79+
80+
// CHECK-LABEL: @reduce_umin_u32(
81+
// CHECK-NEXT: entry:
82+
// CHECK-NEXT: [[RDX_MIN:%.*]] = call i32 @llvm.vector.reduce.umin.nxv2i32(<vscale x 2 x i32> [[A:%.*]])
83+
// CHECK-NEXT: ret i32 [[RDX_MIN]]
84+
//
85+
unsigned reduce_umin_u32(vuint32m1_t a) {
86+
return __builtin_reduce_min(a);
87+
}
88+
89+
// Floating point reductions
90+
91+
// CHECK-LABEL: @reduce_fmax_f32(
92+
// CHECK-NEXT: entry:
93+
// CHECK-NEXT: [[RDX_MIN:%.*]] = call float @llvm.vector.reduce.fmax.nxv2f32(<vscale x 2 x float> [[A:%.*]])
94+
// CHECK-NEXT: ret float [[RDX_MIN]]
95+
//
96+
float reduce_fmax_f32(vfloat32m1_t a) {
97+
return __builtin_reduce_max(a);
98+
}
99+
100+
// CHECK-LABEL: @reduce_fmin_f32(
101+
// CHECK-NEXT: entry:
102+
// CHECK-NEXT: [[RDX_MIN:%.*]] = call float @llvm.vector.reduce.fmin.nxv2f32(<vscale x 2 x float> [[A:%.*]])
103+
// CHECK-NEXT: ret float [[RDX_MIN]]
104+
//
105+
float reduce_fmin_f32(vfloat32m1_t a) {
106+
return __builtin_reduce_min(a);
107+
}
108+
109+
// CHECK-LABEL: @reduce_fmaximum_f32(
110+
// CHECK-NEXT: entry:
111+
// CHECK-NEXT: [[RDX_MAXIMUM:%.*]] = call float @llvm.vector.reduce.fmaximum.nxv2f32(<vscale x 2 x float> [[A:%.*]])
112+
// CHECK-NEXT: ret float [[RDX_MAXIMUM]]
113+
//
114+
float reduce_fmaximum_f32(vfloat32m1_t a) {
115+
return __builtin_reduce_maximum(a);
116+
}
117+
118+
// CHECK-LABEL: @reduce_fminimum_f32(
119+
// CHECK-NEXT: entry:
120+
// CHECK-NEXT: [[RDX_MINIMUM:%.*]] = call float @llvm.vector.reduce.fminimum.nxv2f32(<vscale x 2 x float> [[A:%.*]])
121+
// CHECK-NEXT: ret float [[RDX_MINIMUM]]
122+
//
123+
float reduce_fminimum_f32(vfloat32m1_t a) {
124+
return __builtin_reduce_minimum(a);
125+
}
126+
127+
// Different LMUL
128+
129+
// CHECK-LABEL: @reduce_add_i32_m8(
130+
// CHECK-NEXT: entry:
131+
// CHECK-NEXT: [[RDX_ADD:%.*]] = call i32 @llvm.vector.reduce.add.nxv16i32(<vscale x 16 x i32> [[A:%.*]])
132+
// CHECK-NEXT: ret i32 [[RDX_ADD]]
133+
//
134+
int reduce_add_i32_m8(vint32m8_t a) {
135+
return __builtin_reduce_add(a);
136+
}
137+
138+
// Different element width
139+
140+
// CHECK-LABEL: @reduce_add_i64(
141+
// CHECK-NEXT: entry:
142+
// CHECK-NEXT: [[RDX_ADD:%.*]] = call i64 @llvm.vector.reduce.add.nxv1i64(<vscale x 1 x i64> [[A:%.*]])
143+
// CHECK-NEXT: ret i64 [[RDX_ADD]]
144+
//
145+
long long reduce_add_i64(vint64m1_t a) {
146+
return __builtin_reduce_add(a);
147+
}
148+
149+
// CHECK-LABEL: @reduce_mul_i32(
150+
// CHECK-NEXT: entry:
151+
// CHECK-NEXT: [[RDX_MUL:%.*]] = call i32 @llvm.vector.reduce.mul.nxv2i32(<vscale x 2 x i32> [[A:%.*]])
152+
// CHECK-NEXT: ret i32 [[RDX_MUL]]
153+
//
154+
int reduce_mul_i32(vint32m1_t a) {
155+
return __builtin_reduce_mul(a);
156+
}

0 commit comments

Comments
 (0)