blob: 35055190b348e628033c1489105a425b31a20afb [file] [log] [blame]
Nicolai Haehnleca4a3292018-12-06 14:33:40 +00001; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -mtriple=amdgcn-amd-mesa3d -mcpu=fiji -verify-machineinstrs | FileCheck -check-prefix=VI %s
3; RUN: llc < %s -mtriple=amdgcn-amd-mesa3d -mcpu=gfx900 -verify-machineinstrs | FileCheck -check-prefix=GFX9 %s
4
5; ===================================================================================
6; V_ADD3_U32
7; ===================================================================================
8
9define amdgpu_ps float @add3(i32 %a, i32 %b, i32 %c) {
10; VI-LABEL: add3:
11; VI: ; %bb.0:
12; VI-NEXT: v_add_u32_e32 v0, vcc, v0, v1
13; VI-NEXT: v_add_u32_e32 v0, vcc, v0, v2
14; VI-NEXT: ; return to shader part epilog
15;
16; GFX9-LABEL: add3:
17; GFX9: ; %bb.0:
18; GFX9-NEXT: v_add3_u32 v0, v0, v1, v2
19; GFX9-NEXT: ; return to shader part epilog
20 %x = add i32 %a, %b
21 %result = add i32 %x, %c
22 %bc = bitcast i32 %result to float
23 ret float %bc
24}
25
26; ThreeOp instruction variant not used due to Constant Bus Limitations
27; TODO: with reassociation it is possible to replace a v_add_u32_e32 with a s_add_i32
28define amdgpu_ps float @add3_vgpr_b(i32 inreg %a, i32 %b, i32 inreg %c) {
29; VI-LABEL: add3_vgpr_b:
30; VI: ; %bb.0:
31; VI-NEXT: v_add_u32_e32 v0, vcc, s2, v0
32; VI-NEXT: v_add_u32_e32 v0, vcc, s3, v0
33; VI-NEXT: ; return to shader part epilog
34;
35; GFX9-LABEL: add3_vgpr_b:
36; GFX9: ; %bb.0:
37; GFX9-NEXT: v_add_u32_e32 v0, s2, v0
38; GFX9-NEXT: v_add_u32_e32 v0, s3, v0
39; GFX9-NEXT: ; return to shader part epilog
40 %x = add i32 %a, %b
41 %result = add i32 %x, %c
42 %bc = bitcast i32 %result to float
43 ret float %bc
44}
45
46define amdgpu_ps float @add3_vgpr_all2(i32 %a, i32 %b, i32 %c) {
47; VI-LABEL: add3_vgpr_all2:
48; VI: ; %bb.0:
49; VI-NEXT: v_add_u32_e32 v1, vcc, v1, v2
50; VI-NEXT: v_add_u32_e32 v0, vcc, v0, v1
51; VI-NEXT: ; return to shader part epilog
52;
53; GFX9-LABEL: add3_vgpr_all2:
54; GFX9: ; %bb.0:
55; GFX9-NEXT: v_add3_u32 v0, v1, v2, v0
56; GFX9-NEXT: ; return to shader part epilog
57 %x = add i32 %b, %c
58 %result = add i32 %a, %x
59 %bc = bitcast i32 %result to float
60 ret float %bc
61}
62
63define amdgpu_ps float @add3_vgpr_bc(i32 inreg %a, i32 %b, i32 %c) {
64; VI-LABEL: add3_vgpr_bc:
65; VI: ; %bb.0:
66; VI-NEXT: v_add_u32_e32 v0, vcc, s2, v0
67; VI-NEXT: v_add_u32_e32 v0, vcc, v0, v1
68; VI-NEXT: ; return to shader part epilog
69;
70; GFX9-LABEL: add3_vgpr_bc:
71; GFX9: ; %bb.0:
72; GFX9-NEXT: v_add3_u32 v0, s2, v0, v1
73; GFX9-NEXT: ; return to shader part epilog
74 %x = add i32 %a, %b
75 %result = add i32 %x, %c
76 %bc = bitcast i32 %result to float
77 ret float %bc
78}
79
80define amdgpu_ps float @add3_vgpr_const(i32 %a, i32 %b) {
81; VI-LABEL: add3_vgpr_const:
82; VI: ; %bb.0:
83; VI-NEXT: v_add_u32_e32 v0, vcc, v0, v1
84; VI-NEXT: v_add_u32_e32 v0, vcc, 16, v0
85; VI-NEXT: ; return to shader part epilog
86;
87; GFX9-LABEL: add3_vgpr_const:
88; GFX9: ; %bb.0:
89; GFX9-NEXT: v_add3_u32 v0, v0, v1, 16
90; GFX9-NEXT: ; return to shader part epilog
91 %x = add i32 %a, %b
92 %result = add i32 %x, 16
93 %bc = bitcast i32 %result to float
94 ret float %bc
95}
96
97define amdgpu_ps <2 x float> @add3_multiuse_outer(i32 %a, i32 %b, i32 %c, i32 %x) {
98; VI-LABEL: add3_multiuse_outer:
99; VI: ; %bb.0:
100; VI-NEXT: v_add_u32_e32 v0, vcc, v0, v1
101; VI-NEXT: v_add_u32_e32 v0, vcc, v0, v2
102; VI-NEXT: v_mul_lo_i32 v1, v0, v3
103; VI-NEXT: ; return to shader part epilog
104;
105; GFX9-LABEL: add3_multiuse_outer:
106; GFX9: ; %bb.0:
107; GFX9-NEXT: v_add3_u32 v0, v0, v1, v2
108; GFX9-NEXT: v_mul_lo_i32 v1, v0, v3
109; GFX9-NEXT: ; return to shader part epilog
110 %inner = add i32 %a, %b
111 %outer = add i32 %inner, %c
112 %x1 = mul i32 %outer, %x
113 %r1 = insertelement <2 x i32> undef, i32 %outer, i32 0
114 %r0 = insertelement <2 x i32> %r1, i32 %x1, i32 1
115 %bc = bitcast <2 x i32> %r0 to <2 x float>
116 ret <2 x float> %bc
117}
118
119define amdgpu_ps <2 x float> @add3_multiuse_inner(i32 %a, i32 %b, i32 %c) {
120; VI-LABEL: add3_multiuse_inner:
121; VI: ; %bb.0:
122; VI-NEXT: v_add_u32_e32 v0, vcc, v0, v1
123; VI-NEXT: v_add_u32_e32 v1, vcc, v0, v2
124; VI-NEXT: ; return to shader part epilog
125;
126; GFX9-LABEL: add3_multiuse_inner:
127; GFX9: ; %bb.0:
128; GFX9-NEXT: v_add_u32_e32 v0, v0, v1
129; GFX9-NEXT: v_add_u32_e32 v1, v0, v2
130; GFX9-NEXT: ; return to shader part epilog
131 %inner = add i32 %a, %b
132 %outer = add i32 %inner, %c
133 %r1 = insertelement <2 x i32> undef, i32 %inner, i32 0
134 %r0 = insertelement <2 x i32> %r1, i32 %outer, i32 1
135 %bc = bitcast <2 x i32> %r0 to <2 x float>
136 ret <2 x float> %bc
137}
138
139; A case where uniform values end up in VGPRs -- we could use v_add3_u32 here,
140; but we don't.
141define amdgpu_ps float @add3_uniform_vgpr(float inreg %a, float inreg %b, float inreg %c) {
142; VI-LABEL: add3_uniform_vgpr:
143; VI: ; %bb.0:
144; VI-NEXT: v_mov_b32_e32 v2, 0x40400000
145; VI-NEXT: v_add_f32_e64 v0, s2, 1.0
146; VI-NEXT: v_add_f32_e64 v1, s3, 2.0
147; VI-NEXT: v_add_f32_e32 v2, s4, v2
148; VI-NEXT: v_add_u32_e32 v0, vcc, v1, v0
149; VI-NEXT: v_add_u32_e32 v0, vcc, v2, v0
150; VI-NEXT: ; return to shader part epilog
151;
152; GFX9-LABEL: add3_uniform_vgpr:
153; GFX9: ; %bb.0:
154; GFX9-NEXT: v_mov_b32_e32 v2, 0x40400000
155; GFX9-NEXT: v_add_f32_e64 v0, s2, 1.0
156; GFX9-NEXT: v_add_f32_e64 v1, s3, 2.0
157; GFX9-NEXT: v_add_f32_e32 v2, s4, v2
158; GFX9-NEXT: v_add_u32_e32 v0, v0, v1
159; GFX9-NEXT: v_add_u32_e32 v0, v0, v2
160; GFX9-NEXT: ; return to shader part epilog
161 %a1 = fadd float %a, 1.0
162 %b2 = fadd float %b, 2.0
163 %c3 = fadd float %c, 3.0
164 %bc.a = bitcast float %a1 to i32
165 %bc.b = bitcast float %b2 to i32
166 %bc.c = bitcast float %c3 to i32
167 %x = add i32 %bc.a, %bc.b
168 %result = add i32 %x, %bc.c
169 %bc = bitcast i32 %result to float
170 ret float %bc
171}