11; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2- ; RUN: llc -global-isel -mtriple=amdgcn -mcpu=tahiti -o - < %s | FileCheck %s --check-prefixes=GFX,GFX6
3- ; RUN: llc -global-isel -mtriple=amdgcn -mcpu=fiji -o - < %s | FileCheck %s --check-prefixes=GFX,GFX8
4- ; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx1010 -o - < %s | FileCheck %s --check-prefixes=GFX,GFX10
5- ; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx1250 -mattr=-real-true16 -o - < %s | FileCheck %s --check-prefixes=GFX, GFX1250,GFX1250-FAKE16
6- ; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx1250 -mattr=+real-true16 -o - < %s | FileCheck %s --check-prefixes=GFX, GFX1250,GFX1250-REAL16
2+ ; RUN: llc -global-isel -new-reg-bank-select - mtriple=amdgcn -mcpu=tahiti -o - < %s | FileCheck %s --check-prefixes=GFX,GFX6
3+ ; RUN: llc -global-isel -new-reg-bank-select - mtriple=amdgcn -mcpu=fiji -o - < %s | FileCheck %s --check-prefixes=GFX,GFX8
4+ ; RUN: llc -global-isel -new-reg-bank-select - mtriple=amdgcn -mcpu=gfx1010 -o - < %s | FileCheck %s --check-prefixes=GFX,GFX10
5+ ; RUN: llc -global-isel -new-reg-bank-select - mtriple=amdgcn -mcpu=gfx1250 -mattr=-real-true16 -o - < %s | FileCheck %s --check-prefixes=GFX1250,GFX1250-FAKE16
6+ ; RUN: llc -global-isel -new-reg-bank-select - mtriple=amdgcn -mcpu=gfx1250 -mattr=+real-true16 -o - < %s | FileCheck %s --check-prefixes=GFX1250,GFX1250-REAL16
77
88declare i16 @llvm.abs.i16 (i16 , i1 )
99declare i32 @llvm.abs.i32 (i32 , i1 )
@@ -15,23 +15,11 @@ declare <3 x i16> @llvm.abs.v3i16(<3 x i16>, i1)
1515declare <4 x i32 > @llvm.abs.v4i32 (<4 x i32 >, i1 )
1616
1717define amdgpu_cs i16 @abs_sgpr_i16 (i16 inreg %arg ) {
18- ; GFX6-LABEL: abs_sgpr_i16:
19- ; GFX6: ; %bb.0:
20- ; GFX6-NEXT: s_sext_i32_i16 s0, s0
21- ; GFX6-NEXT: s_abs_i32 s0, s0
22- ; GFX6-NEXT: ; return to shader part epilog
23- ;
24- ; GFX8-LABEL: abs_sgpr_i16:
25- ; GFX8: ; %bb.0:
26- ; GFX8-NEXT: s_sext_i32_i16 s0, s0
27- ; GFX8-NEXT: s_abs_i32 s0, s0
28- ; GFX8-NEXT: ; return to shader part epilog
29- ;
30- ; GFX10-LABEL: abs_sgpr_i16:
31- ; GFX10: ; %bb.0:
32- ; GFX10-NEXT: s_sext_i32_i16 s0, s0
33- ; GFX10-NEXT: s_abs_i32 s0, s0
34- ; GFX10-NEXT: ; return to shader part epilog
18+ ; GFX-LABEL: abs_sgpr_i16:
19+ ; GFX: ; %bb.0:
20+ ; GFX-NEXT: s_sext_i32_i16 s0, s0
21+ ; GFX-NEXT: s_abs_i32 s0, s0
22+ ; GFX-NEXT: ; return to shader part epilog
3523;
3624; GFX1250-LABEL: abs_sgpr_i16:
3725; GFX1250: ; %bb.0:
@@ -45,20 +33,10 @@ define amdgpu_cs i16 @abs_sgpr_i16(i16 inreg %arg) {
4533}
4634
4735define amdgpu_cs i32 @abs_sgpr_i32 (i32 inreg %arg ) {
48- ; GFX6-LABEL: abs_sgpr_i32:
49- ; GFX6: ; %bb.0:
50- ; GFX6-NEXT: s_abs_i32 s0, s0
51- ; GFX6-NEXT: ; return to shader part epilog
52- ;
53- ; GFX8-LABEL: abs_sgpr_i32:
54- ; GFX8: ; %bb.0:
55- ; GFX8-NEXT: s_abs_i32 s0, s0
56- ; GFX8-NEXT: ; return to shader part epilog
57- ;
58- ; GFX10-LABEL: abs_sgpr_i32:
59- ; GFX10: ; %bb.0:
60- ; GFX10-NEXT: s_abs_i32 s0, s0
61- ; GFX10-NEXT: ; return to shader part epilog
36+ ; GFX-LABEL: abs_sgpr_i32:
37+ ; GFX: ; %bb.0:
38+ ; GFX-NEXT: s_abs_i32 s0, s0
39+ ; GFX-NEXT: ; return to shader part epilog
6240;
6341; GFX1250-LABEL: abs_sgpr_i32:
6442; GFX1250: ; %bb.0:
@@ -70,32 +48,14 @@ define amdgpu_cs i32 @abs_sgpr_i32(i32 inreg %arg) {
7048}
7149
7250define amdgpu_cs i64 @abs_sgpr_i64 (i64 inreg %arg ) {
73- ; GFX6-LABEL: abs_sgpr_i64:
74- ; GFX6: ; %bb.0:
75- ; GFX6-NEXT: s_ashr_i32 s2, s1, 31
76- ; GFX6-NEXT: s_add_u32 s0, s0, s2
77- ; GFX6-NEXT: s_mov_b32 s3, s2
78- ; GFX6-NEXT: s_addc_u32 s1, s1, s2
79- ; GFX6-NEXT: s_xor_b64 s[0:1], s[0:1], s[2:3]
80- ; GFX6-NEXT: ; return to shader part epilog
81- ;
82- ; GFX8-LABEL: abs_sgpr_i64:
83- ; GFX8: ; %bb.0:
84- ; GFX8-NEXT: s_ashr_i32 s2, s1, 31
85- ; GFX8-NEXT: s_add_u32 s0, s0, s2
86- ; GFX8-NEXT: s_mov_b32 s3, s2
87- ; GFX8-NEXT: s_addc_u32 s1, s1, s2
88- ; GFX8-NEXT: s_xor_b64 s[0:1], s[0:1], s[2:3]
89- ; GFX8-NEXT: ; return to shader part epilog
90- ;
91- ; GFX10-LABEL: abs_sgpr_i64:
92- ; GFX10: ; %bb.0:
93- ; GFX10-NEXT: s_ashr_i32 s2, s1, 31
94- ; GFX10-NEXT: s_add_u32 s0, s0, s2
95- ; GFX10-NEXT: s_mov_b32 s3, s2
96- ; GFX10-NEXT: s_addc_u32 s1, s1, s2
97- ; GFX10-NEXT: s_xor_b64 s[0:1], s[0:1], s[2:3]
98- ; GFX10-NEXT: ; return to shader part epilog
51+ ; GFX-LABEL: abs_sgpr_i64:
52+ ; GFX: ; %bb.0:
53+ ; GFX-NEXT: s_ashr_i32 s2, s1, 31
54+ ; GFX-NEXT: s_add_u32 s0, s0, s2
55+ ; GFX-NEXT: s_mov_b32 s3, s2
56+ ; GFX-NEXT: s_addc_u32 s1, s1, s2
57+ ; GFX-NEXT: s_xor_b64 s[0:1], s[0:1], s[2:3]
58+ ; GFX-NEXT: ; return to shader part epilog
9959;
10060; GFX1250-LABEL: abs_sgpr_i64:
10161; GFX1250: ; %bb.0:
@@ -112,29 +72,13 @@ define amdgpu_cs i64 @abs_sgpr_i64(i64 inreg %arg) {
11272}
11373
11474define amdgpu_cs <4 x i32 > @abs_sgpr_v4i32 (<4 x i32 > inreg %arg ) {
115- ; GFX6-LABEL: abs_sgpr_v4i32:
116- ; GFX6: ; %bb.0:
117- ; GFX6-NEXT: s_abs_i32 s0, s0
118- ; GFX6-NEXT: s_abs_i32 s1, s1
119- ; GFX6-NEXT: s_abs_i32 s2, s2
120- ; GFX6-NEXT: s_abs_i32 s3, s3
121- ; GFX6-NEXT: ; return to shader part epilog
122- ;
123- ; GFX8-LABEL: abs_sgpr_v4i32:
124- ; GFX8: ; %bb.0:
125- ; GFX8-NEXT: s_abs_i32 s0, s0
126- ; GFX8-NEXT: s_abs_i32 s1, s1
127- ; GFX8-NEXT: s_abs_i32 s2, s2
128- ; GFX8-NEXT: s_abs_i32 s3, s3
129- ; GFX8-NEXT: ; return to shader part epilog
130- ;
131- ; GFX10-LABEL: abs_sgpr_v4i32:
132- ; GFX10: ; %bb.0:
133- ; GFX10-NEXT: s_abs_i32 s0, s0
134- ; GFX10-NEXT: s_abs_i32 s1, s1
135- ; GFX10-NEXT: s_abs_i32 s2, s2
136- ; GFX10-NEXT: s_abs_i32 s3, s3
137- ; GFX10-NEXT: ; return to shader part epilog
75+ ; GFX-LABEL: abs_sgpr_v4i32:
76+ ; GFX: ; %bb.0:
77+ ; GFX-NEXT: s_abs_i32 s0, s0
78+ ; GFX-NEXT: s_abs_i32 s1, s1
79+ ; GFX-NEXT: s_abs_i32 s2, s2
80+ ; GFX-NEXT: s_abs_i32 s3, s3
81+ ; GFX-NEXT: ; return to shader part epilog
13882;
13983; GFX1250-LABEL: abs_sgpr_v4i32:
14084; GFX1250: ; %bb.0:
@@ -331,29 +275,13 @@ define <4 x i32> @abs_vgpr_v4i32(<4 x i32> %arg) {
331275}
332276
333277define amdgpu_cs <2 x i8 > @abs_sgpr_v2i8 (<2 x i8 > inreg %arg ) {
334- ; GFX6-LABEL: abs_sgpr_v2i8:
335- ; GFX6: ; %bb.0:
336- ; GFX6-NEXT: s_sext_i32_i8 s0, s0
337- ; GFX6-NEXT: s_sext_i32_i8 s1, s1
338- ; GFX6-NEXT: s_abs_i32 s0, s0
339- ; GFX6-NEXT: s_abs_i32 s1, s1
340- ; GFX6-NEXT: ; return to shader part epilog
341- ;
342- ; GFX8-LABEL: abs_sgpr_v2i8:
343- ; GFX8: ; %bb.0:
344- ; GFX8-NEXT: s_sext_i32_i8 s0, s0
345- ; GFX8-NEXT: s_sext_i32_i8 s1, s1
346- ; GFX8-NEXT: s_abs_i32 s0, s0
347- ; GFX8-NEXT: s_abs_i32 s1, s1
348- ; GFX8-NEXT: ; return to shader part epilog
349- ;
350- ; GFX10-LABEL: abs_sgpr_v2i8:
351- ; GFX10: ; %bb.0:
352- ; GFX10-NEXT: s_sext_i32_i8 s0, s0
353- ; GFX10-NEXT: s_sext_i32_i8 s1, s1
354- ; GFX10-NEXT: s_abs_i32 s0, s0
355- ; GFX10-NEXT: s_abs_i32 s1, s1
356- ; GFX10-NEXT: ; return to shader part epilog
278+ ; GFX-LABEL: abs_sgpr_v2i8:
279+ ; GFX: ; %bb.0:
280+ ; GFX-NEXT: s_sext_i32_i8 s0, s0
281+ ; GFX-NEXT: s_sext_i32_i8 s1, s1
282+ ; GFX-NEXT: s_abs_i32 s0, s0
283+ ; GFX-NEXT: s_abs_i32 s1, s1
284+ ; GFX-NEXT: ; return to shader part epilog
357285;
358286; GFX1250-LABEL: abs_sgpr_v2i8:
359287; GFX1250: ; %bb.0:
@@ -432,35 +360,15 @@ define <2 x i8> @abs_vgpr_v2i8(<2 x i8> %arg) {
432360}
433361
434362define amdgpu_cs <3 x i8 > @abs_sgpr_v3i8 (<3 x i8 > inreg %arg ) {
435- ; GFX6-LABEL: abs_sgpr_v3i8:
436- ; GFX6: ; %bb.0:
437- ; GFX6-NEXT: s_sext_i32_i8 s0, s0
438- ; GFX6-NEXT: s_sext_i32_i8 s1, s1
439- ; GFX6-NEXT: s_sext_i32_i8 s2, s2
440- ; GFX6-NEXT: s_abs_i32 s0, s0
441- ; GFX6-NEXT: s_abs_i32 s1, s1
442- ; GFX6-NEXT: s_abs_i32 s2, s2
443- ; GFX6-NEXT: ; return to shader part epilog
444- ;
445- ; GFX8-LABEL: abs_sgpr_v3i8:
446- ; GFX8: ; %bb.0:
447- ; GFX8-NEXT: s_sext_i32_i8 s0, s0
448- ; GFX8-NEXT: s_sext_i32_i8 s1, s1
449- ; GFX8-NEXT: s_sext_i32_i8 s2, s2
450- ; GFX8-NEXT: s_abs_i32 s0, s0
451- ; GFX8-NEXT: s_abs_i32 s1, s1
452- ; GFX8-NEXT: s_abs_i32 s2, s2
453- ; GFX8-NEXT: ; return to shader part epilog
454- ;
455- ; GFX10-LABEL: abs_sgpr_v3i8:
456- ; GFX10: ; %bb.0:
457- ; GFX10-NEXT: s_sext_i32_i8 s0, s0
458- ; GFX10-NEXT: s_sext_i32_i8 s1, s1
459- ; GFX10-NEXT: s_sext_i32_i8 s2, s2
460- ; GFX10-NEXT: s_abs_i32 s0, s0
461- ; GFX10-NEXT: s_abs_i32 s1, s1
462- ; GFX10-NEXT: s_abs_i32 s2, s2
463- ; GFX10-NEXT: ; return to shader part epilog
363+ ; GFX-LABEL: abs_sgpr_v3i8:
364+ ; GFX: ; %bb.0:
365+ ; GFX-NEXT: s_sext_i32_i8 s0, s0
366+ ; GFX-NEXT: s_sext_i32_i8 s1, s1
367+ ; GFX-NEXT: s_sext_i32_i8 s2, s2
368+ ; GFX-NEXT: s_abs_i32 s0, s0
369+ ; GFX-NEXT: s_abs_i32 s1, s1
370+ ; GFX-NEXT: s_abs_i32 s2, s2
371+ ; GFX-NEXT: ; return to shader part epilog
464372;
465373; GFX1250-LABEL: abs_sgpr_v3i8:
466374; GFX1250: ; %bb.0:
@@ -730,21 +638,26 @@ define <3 x i16> @abs_vgpr_v3i16(<3 x i16> %arg) {
730638; GFX10-LABEL: abs_vgpr_v3i16:
731639; GFX10: ; %bb.0:
732640; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
641+ ; GFX10-NEXT: v_sub_nc_u16 v2, 0, v1
642+ ; GFX10-NEXT: v_max_i16 v1, v1, v2
733643; GFX10-NEXT: v_pk_sub_i16 v2, 0, v0
734- ; GFX10-NEXT: v_sub_nc_u16 v3, 0 , v1
644+ ; GFX10-NEXT: v_and_b32_e32 v1, 0xffff , v1
735645; GFX10-NEXT: v_pk_max_i16 v0, v0, v2
736- ; GFX10-NEXT: v_max_i16 v1, v1, v3
646+ ; GFX10-NEXT: v_lshl_or_b32 v1, s4, 16, v1
737647; GFX10-NEXT: s_setpc_b64 s[30:31]
738648;
739649; GFX1250-FAKE16-LABEL: abs_vgpr_v3i16:
740650; GFX1250-FAKE16: ; %bb.0:
741651; GFX1250-FAKE16-NEXT: s_wait_loadcnt_dscnt 0x0
742652; GFX1250-FAKE16-NEXT: s_wait_kmcnt 0x0
653+ ; GFX1250-FAKE16-NEXT: v_sub_nc_u16 v2, 0, v1
654+ ; GFX1250-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
655+ ; GFX1250-FAKE16-NEXT: v_max_i16 v1, v1, v2
743656; GFX1250-FAKE16-NEXT: v_pk_sub_i16 v2, 0, v0
744- ; GFX1250-FAKE16-NEXT: v_sub_nc_u16 v3, 0 , v1
657+ ; GFX1250-FAKE16-NEXT: v_and_b32_e32 v1, 0xffff , v1
745658; GFX1250-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
746659; GFX1250-FAKE16-NEXT: v_pk_max_i16 v0, v0, v2
747- ; GFX1250-FAKE16-NEXT: v_max_i16 v1, v1, v3
660+ ; GFX1250-FAKE16-NEXT: v_lshl_or_b32 v1, s0, 16, v1
748661; GFX1250-FAKE16-NEXT: s_set_pc_i64 s[30:31]
749662;
750663; GFX1250-REAL16-LABEL: abs_vgpr_v3i16:
@@ -760,5 +673,3 @@ define <3 x i16> @abs_vgpr_v3i16(<3 x i16> %arg) {
760673 %res = call <3 x i16 > @llvm.abs.v3i16 (<3 x i16 > %arg , i1 false )
761674 ret <3 x i16 > %res
762675}
763- ;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
764- ; GFX: {{.*}}
0 commit comments