Skip to content

Commit 0a4d470

Browse files
authored
AMDGPU/GlobalISel: RegBankLegalize rules for G_ABS (#192760)
1 parent 3c7c4d4 commit 0a4d470

6 files changed

Lines changed: 123 additions & 147 deletions

File tree

llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeHelper.cpp

Lines changed: 56 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1083,6 +1083,58 @@ bool RegBankLegalizeHelper::lowerInsVecEltTo32(MachineInstr &MI) {
10831083
return true;
10841084
}
10851085

1086+
bool RegBankLegalizeHelper::lowerAbsToNegMax(MachineInstr &MI) {
1087+
// Lower divergent G_ABS to smax(x, 0 - x) in the VGPR bank:
1088+
// zero = 0
1089+
// neg = G_SUB zero, x
1090+
// dst = G_SMAX x, neg
1091+
//
1092+
// There is no integer v_abs instruction on AMDGPU, so divergent G_ABS is
1093+
// expanded to this sub/smax pair.
1094+
Register DstReg = MI.getOperand(0).getReg();
1095+
Register SrcReg = MI.getOperand(1).getReg();
1096+
LLT Ty = MRI.getType(DstReg);
1097+
1098+
Register Zero;
1099+
if (Ty == V2S16) {
1100+
// buildConstant cannot produce a V2S16 directly; pack two S16 zeros.
1101+
Register Zero16 = B.buildConstant({VgprRB, S16}, 0).getReg(0);
1102+
Zero = B.buildBuildVector({VgprRB, Ty}, {Zero16, Zero16}).getReg(0);
1103+
} else {
1104+
assert((Ty == S32 || Ty == S16) && "unexpected type for AbsToNegMax");
1105+
Zero = B.buildConstant({VgprRB, Ty}, 0).getReg(0);
1106+
}
1107+
1108+
auto Neg = B.buildSub({VgprRB, Ty}, Zero, SrcReg);
1109+
B.buildSMax(DstReg, SrcReg, Neg);
1110+
MI.eraseFromParent();
1111+
return true;
1112+
}
1113+
1114+
bool RegBankLegalizeHelper::lowerAbsToS32(MachineInstr &MI) {
1115+
// Lower uniform V2S16 abs by unpacking the values to two separate SGPR
1116+
// registers and re-emitting G_ABS on each:
1117+
// packed = bitcast <2 x s16> src to s32
1118+
// lo = sext_inreg packed, 16
1119+
// hi = ashr packed, 16
1120+
// dst = build_vector_trunc G_ABS(lo), G_ABS(hi)
1121+
//
1122+
// SALU only has s_abs_i32, with no direct uniform V2S16 abs. The
1123+
// re-emitted G_ABS(SgprRB, S32) selects to s_abs_i32 on each value.
1124+
auto Bitcast = B.buildBitcast({SgprRB_S32}, MI.getOperand(1).getReg());
1125+
auto SextInReg = B.buildSExtInReg({SgprRB_S32}, Bitcast, 16);
1126+
auto ShiftHi =
1127+
B.buildAShr({SgprRB_S32}, Bitcast, B.buildConstant({SgprRB_S32}, 16));
1128+
1129+
auto AbsLo = B.buildInstr(AMDGPU::G_ABS, {{SgprRB_S32}}, {SextInReg});
1130+
auto AbsHi = B.buildInstr(AMDGPU::G_ABS, {{SgprRB_S32}}, {ShiftHi});
1131+
B.buildBuildVectorTrunc(MI.getOperand(0).getReg(),
1132+
{AbsLo.getReg(0), AbsHi.getReg(0)});
1133+
1134+
MI.eraseFromParent();
1135+
return true;
1136+
}
1137+
10861138
bool RegBankLegalizeHelper::lower(MachineInstr &MI,
10871139
const RegBankLLTMapping &Mapping,
10881140
WaterfallInfo &WFI) {
@@ -1365,6 +1417,10 @@ bool RegBankLegalizeHelper::lower(MachineInstr &MI,
13651417
return lowerInsVecEltToSel(MI);
13661418
case InsVecEltTo32:
13671419
return lowerInsVecEltTo32(MI);
1420+
case AbsToNegMax:
1421+
return lowerAbsToNegMax(MI);
1422+
case AbsToS32:
1423+
return lowerAbsToS32(MI);
13681424
}
13691425

13701426
if (!WFI.SgprWaterfallOperandRegs.empty()) {

llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeHelper.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -144,6 +144,8 @@ class RegBankLegalizeHelper {
144144
bool lowerExtrVecEltTo32(MachineInstr &MI);
145145
bool lowerInsVecEltToSel(MachineInstr &MI);
146146
bool lowerInsVecEltTo32(MachineInstr &MI);
147+
bool lowerAbsToNegMax(MachineInstr &MI);
148+
bool lowerAbsToS32(MachineInstr &MI);
147149
bool applyRegisterBanksINTRIN_IMAGE(MachineInstr &MI);
148150
};
149151

llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.cpp

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1236,7 +1236,13 @@ RegBankLegalizeRules::RegBankLegalizeRules(const GCNSubtarget &_ST,
12361236
.Any({{UniP3}, {{SgprP3}, {SgprP3, Sgpr32}}})
12371237
.Any({{DivP3}, {{VgprP3}, {VgprP3, Vgpr32}}});
12381238

1239-
addRulesForGOpcs({G_ABS}, Standard).Uni(S16, {{Sgpr32Trunc}, {Sgpr32SExt}});
1239+
addRulesForGOpcs({G_ABS}, Standard)
1240+
.Uni(S16, {{Sgpr32Trunc}, {Sgpr32SExt}})
1241+
.Div(S16, {{Vgpr16}, {Vgpr16}, AbsToNegMax})
1242+
.Uni(S32, {{Sgpr32}, {Sgpr32}})
1243+
.Div(S32, {{Vgpr32}, {Vgpr32}, AbsToNegMax})
1244+
.Uni(V2S16, {{SgprV2S16}, {SgprV2S16}, AbsToS32})
1245+
.Div(V2S16, {{VgprV2S16}, {VgprV2S16}, AbsToNegMax});
12401246

12411247
addRulesForGOpcs({G_BITREVERSE}, Standard)
12421248
.Uni(S32, {{Sgpr32}, {Sgpr32}})

llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.h

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -301,7 +301,9 @@ enum LoweringMethodID {
301301
ExtrVecEltToSel,
302302
ExtrVecEltTo32,
303303
InsVecEltToSel,
304-
InsVecEltTo32
304+
InsVecEltTo32,
305+
AbsToNegMax,
306+
AbsToS32
305307
};
306308

307309
enum FastRulesTypes {

llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.abs.ll

Lines changed: 54 additions & 143 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,9 @@
11
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2-
; RUN: llc -global-isel -mtriple=amdgcn -mcpu=tahiti -o - < %s | FileCheck %s --check-prefixes=GFX,GFX6
3-
; RUN: llc -global-isel -mtriple=amdgcn -mcpu=fiji -o - < %s | FileCheck %s --check-prefixes=GFX,GFX8
4-
; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx1010 -o - < %s | FileCheck %s --check-prefixes=GFX,GFX10
5-
; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx1250 -mattr=-real-true16 -o - < %s | FileCheck %s --check-prefixes=GFX,GFX1250,GFX1250-FAKE16
6-
; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx1250 -mattr=+real-true16 -o - < %s | FileCheck %s --check-prefixes=GFX,GFX1250,GFX1250-REAL16
2+
; RUN: llc -global-isel -new-reg-bank-select -mtriple=amdgcn -mcpu=tahiti -o - < %s | FileCheck %s --check-prefixes=GFX,GFX6
3+
; RUN: llc -global-isel -new-reg-bank-select -mtriple=amdgcn -mcpu=fiji -o - < %s | FileCheck %s --check-prefixes=GFX,GFX8
4+
; RUN: llc -global-isel -new-reg-bank-select -mtriple=amdgcn -mcpu=gfx1010 -o - < %s | FileCheck %s --check-prefixes=GFX,GFX10
5+
; RUN: llc -global-isel -new-reg-bank-select -mtriple=amdgcn -mcpu=gfx1250 -mattr=-real-true16 -o - < %s | FileCheck %s --check-prefixes=GFX1250,GFX1250-FAKE16
6+
; RUN: llc -global-isel -new-reg-bank-select -mtriple=amdgcn -mcpu=gfx1250 -mattr=+real-true16 -o - < %s | FileCheck %s --check-prefixes=GFX1250,GFX1250-REAL16
77

88
declare i16 @llvm.abs.i16(i16, i1)
99
declare i32 @llvm.abs.i32(i32, i1)
@@ -15,23 +15,11 @@ declare <3 x i16> @llvm.abs.v3i16(<3 x i16>, i1)
1515
declare <4 x i32> @llvm.abs.v4i32(<4 x i32>, i1)
1616

1717
define amdgpu_cs i16 @abs_sgpr_i16(i16 inreg %arg) {
18-
; GFX6-LABEL: abs_sgpr_i16:
19-
; GFX6: ; %bb.0:
20-
; GFX6-NEXT: s_sext_i32_i16 s0, s0
21-
; GFX6-NEXT: s_abs_i32 s0, s0
22-
; GFX6-NEXT: ; return to shader part epilog
23-
;
24-
; GFX8-LABEL: abs_sgpr_i16:
25-
; GFX8: ; %bb.0:
26-
; GFX8-NEXT: s_sext_i32_i16 s0, s0
27-
; GFX8-NEXT: s_abs_i32 s0, s0
28-
; GFX8-NEXT: ; return to shader part epilog
29-
;
30-
; GFX10-LABEL: abs_sgpr_i16:
31-
; GFX10: ; %bb.0:
32-
; GFX10-NEXT: s_sext_i32_i16 s0, s0
33-
; GFX10-NEXT: s_abs_i32 s0, s0
34-
; GFX10-NEXT: ; return to shader part epilog
18+
; GFX-LABEL: abs_sgpr_i16:
19+
; GFX: ; %bb.0:
20+
; GFX-NEXT: s_sext_i32_i16 s0, s0
21+
; GFX-NEXT: s_abs_i32 s0, s0
22+
; GFX-NEXT: ; return to shader part epilog
3523
;
3624
; GFX1250-LABEL: abs_sgpr_i16:
3725
; GFX1250: ; %bb.0:
@@ -45,20 +33,10 @@ define amdgpu_cs i16 @abs_sgpr_i16(i16 inreg %arg) {
4533
}
4634

4735
define amdgpu_cs i32 @abs_sgpr_i32(i32 inreg %arg) {
48-
; GFX6-LABEL: abs_sgpr_i32:
49-
; GFX6: ; %bb.0:
50-
; GFX6-NEXT: s_abs_i32 s0, s0
51-
; GFX6-NEXT: ; return to shader part epilog
52-
;
53-
; GFX8-LABEL: abs_sgpr_i32:
54-
; GFX8: ; %bb.0:
55-
; GFX8-NEXT: s_abs_i32 s0, s0
56-
; GFX8-NEXT: ; return to shader part epilog
57-
;
58-
; GFX10-LABEL: abs_sgpr_i32:
59-
; GFX10: ; %bb.0:
60-
; GFX10-NEXT: s_abs_i32 s0, s0
61-
; GFX10-NEXT: ; return to shader part epilog
36+
; GFX-LABEL: abs_sgpr_i32:
37+
; GFX: ; %bb.0:
38+
; GFX-NEXT: s_abs_i32 s0, s0
39+
; GFX-NEXT: ; return to shader part epilog
6240
;
6341
; GFX1250-LABEL: abs_sgpr_i32:
6442
; GFX1250: ; %bb.0:
@@ -70,32 +48,14 @@ define amdgpu_cs i32 @abs_sgpr_i32(i32 inreg %arg) {
7048
}
7149

7250
define amdgpu_cs i64 @abs_sgpr_i64(i64 inreg %arg) {
73-
; GFX6-LABEL: abs_sgpr_i64:
74-
; GFX6: ; %bb.0:
75-
; GFX6-NEXT: s_ashr_i32 s2, s1, 31
76-
; GFX6-NEXT: s_add_u32 s0, s0, s2
77-
; GFX6-NEXT: s_mov_b32 s3, s2
78-
; GFX6-NEXT: s_addc_u32 s1, s1, s2
79-
; GFX6-NEXT: s_xor_b64 s[0:1], s[0:1], s[2:3]
80-
; GFX6-NEXT: ; return to shader part epilog
81-
;
82-
; GFX8-LABEL: abs_sgpr_i64:
83-
; GFX8: ; %bb.0:
84-
; GFX8-NEXT: s_ashr_i32 s2, s1, 31
85-
; GFX8-NEXT: s_add_u32 s0, s0, s2
86-
; GFX8-NEXT: s_mov_b32 s3, s2
87-
; GFX8-NEXT: s_addc_u32 s1, s1, s2
88-
; GFX8-NEXT: s_xor_b64 s[0:1], s[0:1], s[2:3]
89-
; GFX8-NEXT: ; return to shader part epilog
90-
;
91-
; GFX10-LABEL: abs_sgpr_i64:
92-
; GFX10: ; %bb.0:
93-
; GFX10-NEXT: s_ashr_i32 s2, s1, 31
94-
; GFX10-NEXT: s_add_u32 s0, s0, s2
95-
; GFX10-NEXT: s_mov_b32 s3, s2
96-
; GFX10-NEXT: s_addc_u32 s1, s1, s2
97-
; GFX10-NEXT: s_xor_b64 s[0:1], s[0:1], s[2:3]
98-
; GFX10-NEXT: ; return to shader part epilog
51+
; GFX-LABEL: abs_sgpr_i64:
52+
; GFX: ; %bb.0:
53+
; GFX-NEXT: s_ashr_i32 s2, s1, 31
54+
; GFX-NEXT: s_add_u32 s0, s0, s2
55+
; GFX-NEXT: s_mov_b32 s3, s2
56+
; GFX-NEXT: s_addc_u32 s1, s1, s2
57+
; GFX-NEXT: s_xor_b64 s[0:1], s[0:1], s[2:3]
58+
; GFX-NEXT: ; return to shader part epilog
9959
;
10060
; GFX1250-LABEL: abs_sgpr_i64:
10161
; GFX1250: ; %bb.0:
@@ -112,29 +72,13 @@ define amdgpu_cs i64 @abs_sgpr_i64(i64 inreg %arg) {
11272
}
11373

11474
define amdgpu_cs <4 x i32> @abs_sgpr_v4i32(<4 x i32> inreg %arg) {
115-
; GFX6-LABEL: abs_sgpr_v4i32:
116-
; GFX6: ; %bb.0:
117-
; GFX6-NEXT: s_abs_i32 s0, s0
118-
; GFX6-NEXT: s_abs_i32 s1, s1
119-
; GFX6-NEXT: s_abs_i32 s2, s2
120-
; GFX6-NEXT: s_abs_i32 s3, s3
121-
; GFX6-NEXT: ; return to shader part epilog
122-
;
123-
; GFX8-LABEL: abs_sgpr_v4i32:
124-
; GFX8: ; %bb.0:
125-
; GFX8-NEXT: s_abs_i32 s0, s0
126-
; GFX8-NEXT: s_abs_i32 s1, s1
127-
; GFX8-NEXT: s_abs_i32 s2, s2
128-
; GFX8-NEXT: s_abs_i32 s3, s3
129-
; GFX8-NEXT: ; return to shader part epilog
130-
;
131-
; GFX10-LABEL: abs_sgpr_v4i32:
132-
; GFX10: ; %bb.0:
133-
; GFX10-NEXT: s_abs_i32 s0, s0
134-
; GFX10-NEXT: s_abs_i32 s1, s1
135-
; GFX10-NEXT: s_abs_i32 s2, s2
136-
; GFX10-NEXT: s_abs_i32 s3, s3
137-
; GFX10-NEXT: ; return to shader part epilog
75+
; GFX-LABEL: abs_sgpr_v4i32:
76+
; GFX: ; %bb.0:
77+
; GFX-NEXT: s_abs_i32 s0, s0
78+
; GFX-NEXT: s_abs_i32 s1, s1
79+
; GFX-NEXT: s_abs_i32 s2, s2
80+
; GFX-NEXT: s_abs_i32 s3, s3
81+
; GFX-NEXT: ; return to shader part epilog
13882
;
13983
; GFX1250-LABEL: abs_sgpr_v4i32:
14084
; GFX1250: ; %bb.0:
@@ -331,29 +275,13 @@ define <4 x i32> @abs_vgpr_v4i32(<4 x i32> %arg) {
331275
}
332276

333277
define amdgpu_cs <2 x i8> @abs_sgpr_v2i8(<2 x i8> inreg %arg) {
334-
; GFX6-LABEL: abs_sgpr_v2i8:
335-
; GFX6: ; %bb.0:
336-
; GFX6-NEXT: s_sext_i32_i8 s0, s0
337-
; GFX6-NEXT: s_sext_i32_i8 s1, s1
338-
; GFX6-NEXT: s_abs_i32 s0, s0
339-
; GFX6-NEXT: s_abs_i32 s1, s1
340-
; GFX6-NEXT: ; return to shader part epilog
341-
;
342-
; GFX8-LABEL: abs_sgpr_v2i8:
343-
; GFX8: ; %bb.0:
344-
; GFX8-NEXT: s_sext_i32_i8 s0, s0
345-
; GFX8-NEXT: s_sext_i32_i8 s1, s1
346-
; GFX8-NEXT: s_abs_i32 s0, s0
347-
; GFX8-NEXT: s_abs_i32 s1, s1
348-
; GFX8-NEXT: ; return to shader part epilog
349-
;
350-
; GFX10-LABEL: abs_sgpr_v2i8:
351-
; GFX10: ; %bb.0:
352-
; GFX10-NEXT: s_sext_i32_i8 s0, s0
353-
; GFX10-NEXT: s_sext_i32_i8 s1, s1
354-
; GFX10-NEXT: s_abs_i32 s0, s0
355-
; GFX10-NEXT: s_abs_i32 s1, s1
356-
; GFX10-NEXT: ; return to shader part epilog
278+
; GFX-LABEL: abs_sgpr_v2i8:
279+
; GFX: ; %bb.0:
280+
; GFX-NEXT: s_sext_i32_i8 s0, s0
281+
; GFX-NEXT: s_sext_i32_i8 s1, s1
282+
; GFX-NEXT: s_abs_i32 s0, s0
283+
; GFX-NEXT: s_abs_i32 s1, s1
284+
; GFX-NEXT: ; return to shader part epilog
357285
;
358286
; GFX1250-LABEL: abs_sgpr_v2i8:
359287
; GFX1250: ; %bb.0:
@@ -432,35 +360,15 @@ define <2 x i8> @abs_vgpr_v2i8(<2 x i8> %arg) {
432360
}
433361

434362
define amdgpu_cs <3 x i8> @abs_sgpr_v3i8(<3 x i8> inreg %arg) {
435-
; GFX6-LABEL: abs_sgpr_v3i8:
436-
; GFX6: ; %bb.0:
437-
; GFX6-NEXT: s_sext_i32_i8 s0, s0
438-
; GFX6-NEXT: s_sext_i32_i8 s1, s1
439-
; GFX6-NEXT: s_sext_i32_i8 s2, s2
440-
; GFX6-NEXT: s_abs_i32 s0, s0
441-
; GFX6-NEXT: s_abs_i32 s1, s1
442-
; GFX6-NEXT: s_abs_i32 s2, s2
443-
; GFX6-NEXT: ; return to shader part epilog
444-
;
445-
; GFX8-LABEL: abs_sgpr_v3i8:
446-
; GFX8: ; %bb.0:
447-
; GFX8-NEXT: s_sext_i32_i8 s0, s0
448-
; GFX8-NEXT: s_sext_i32_i8 s1, s1
449-
; GFX8-NEXT: s_sext_i32_i8 s2, s2
450-
; GFX8-NEXT: s_abs_i32 s0, s0
451-
; GFX8-NEXT: s_abs_i32 s1, s1
452-
; GFX8-NEXT: s_abs_i32 s2, s2
453-
; GFX8-NEXT: ; return to shader part epilog
454-
;
455-
; GFX10-LABEL: abs_sgpr_v3i8:
456-
; GFX10: ; %bb.0:
457-
; GFX10-NEXT: s_sext_i32_i8 s0, s0
458-
; GFX10-NEXT: s_sext_i32_i8 s1, s1
459-
; GFX10-NEXT: s_sext_i32_i8 s2, s2
460-
; GFX10-NEXT: s_abs_i32 s0, s0
461-
; GFX10-NEXT: s_abs_i32 s1, s1
462-
; GFX10-NEXT: s_abs_i32 s2, s2
463-
; GFX10-NEXT: ; return to shader part epilog
363+
; GFX-LABEL: abs_sgpr_v3i8:
364+
; GFX: ; %bb.0:
365+
; GFX-NEXT: s_sext_i32_i8 s0, s0
366+
; GFX-NEXT: s_sext_i32_i8 s1, s1
367+
; GFX-NEXT: s_sext_i32_i8 s2, s2
368+
; GFX-NEXT: s_abs_i32 s0, s0
369+
; GFX-NEXT: s_abs_i32 s1, s1
370+
; GFX-NEXT: s_abs_i32 s2, s2
371+
; GFX-NEXT: ; return to shader part epilog
464372
;
465373
; GFX1250-LABEL: abs_sgpr_v3i8:
466374
; GFX1250: ; %bb.0:
@@ -730,21 +638,26 @@ define <3 x i16> @abs_vgpr_v3i16(<3 x i16> %arg) {
730638
; GFX10-LABEL: abs_vgpr_v3i16:
731639
; GFX10: ; %bb.0:
732640
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
641+
; GFX10-NEXT: v_sub_nc_u16 v2, 0, v1
642+
; GFX10-NEXT: v_max_i16 v1, v1, v2
733643
; GFX10-NEXT: v_pk_sub_i16 v2, 0, v0
734-
; GFX10-NEXT: v_sub_nc_u16 v3, 0, v1
644+
; GFX10-NEXT: v_and_b32_e32 v1, 0xffff, v1
735645
; GFX10-NEXT: v_pk_max_i16 v0, v0, v2
736-
; GFX10-NEXT: v_max_i16 v1, v1, v3
646+
; GFX10-NEXT: v_lshl_or_b32 v1, s4, 16, v1
737647
; GFX10-NEXT: s_setpc_b64 s[30:31]
738648
;
739649
; GFX1250-FAKE16-LABEL: abs_vgpr_v3i16:
740650
; GFX1250-FAKE16: ; %bb.0:
741651
; GFX1250-FAKE16-NEXT: s_wait_loadcnt_dscnt 0x0
742652
; GFX1250-FAKE16-NEXT: s_wait_kmcnt 0x0
653+
; GFX1250-FAKE16-NEXT: v_sub_nc_u16 v2, 0, v1
654+
; GFX1250-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
655+
; GFX1250-FAKE16-NEXT: v_max_i16 v1, v1, v2
743656
; GFX1250-FAKE16-NEXT: v_pk_sub_i16 v2, 0, v0
744-
; GFX1250-FAKE16-NEXT: v_sub_nc_u16 v3, 0, v1
657+
; GFX1250-FAKE16-NEXT: v_and_b32_e32 v1, 0xffff, v1
745658
; GFX1250-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
746659
; GFX1250-FAKE16-NEXT: v_pk_max_i16 v0, v0, v2
747-
; GFX1250-FAKE16-NEXT: v_max_i16 v1, v1, v3
660+
; GFX1250-FAKE16-NEXT: v_lshl_or_b32 v1, s0, 16, v1
748661
; GFX1250-FAKE16-NEXT: s_set_pc_i64 s[30:31]
749662
;
750663
; GFX1250-REAL16-LABEL: abs_vgpr_v3i16:
@@ -760,5 +673,3 @@ define <3 x i16> @abs_vgpr_v3i16(<3 x i16> %arg) {
760673
%res = call <3 x i16> @llvm.abs.v3i16(<3 x i16> %arg, i1 false)
761674
ret <3 x i16> %res
762675
}
763-
;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
764-
; GFX: {{.*}}

llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-abs.mir

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
2-
# RUN: llc -mtriple=amdgcn -mcpu=gfx1010 -run-pass=regbankselect %s -verify-machineinstrs -o - | FileCheck %s
2+
# RUN: llc -mtriple=amdgcn -mcpu=gfx1010 -run-pass='amdgpu-regbankselect,amdgpu-regbanklegalize' %s -o - | FileCheck %s
33

44
---
55
name: abs_sgpr_s16
@@ -16,7 +16,6 @@ body: |
1616
; CHECK-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s16) = G_TRUNC [[COPY]](s32)
1717
; CHECK-NEXT: [[SEXT:%[0-9]+]]:sgpr(s32) = G_SEXT [[TRUNC]](s16)
1818
; CHECK-NEXT: [[ABS:%[0-9]+]]:sgpr(s32) = G_ABS [[SEXT]]
19-
; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:sgpr(s16) = G_TRUNC [[ABS]](s32)
2019
%1:_(s32) = COPY $sgpr0
2120
%2:_(s16) = G_TRUNC %1
2221
%5:_(s16) = G_ABS %2

0 commit comments

Comments
 (0)