blob: e49f57ca448151d4ca8c76bb8fbaf7969efac3a0 [file] [log] [blame]
Nicolai Haehnleca4a3292018-12-06 14:33:40 +00001; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -mtriple=amdgcn-amd-mesa3d -mcpu=fiji -verify-machineinstrs | FileCheck -check-prefix=VI %s
3; RUN: llc < %s -mtriple=amdgcn-amd-mesa3d -mcpu=gfx900 -verify-machineinstrs | FileCheck -check-prefix=GFX9 %s
4
5; ===================================================================================
6; V_ADD3_U32
7; ===================================================================================
8
9define amdgpu_ps float @add3(i32 %a, i32 %b, i32 %c) {
10; VI-LABEL: add3:
11; VI: ; %bb.0:
12; VI-NEXT: v_add_u32_e32 v0, vcc, v0, v1
13; VI-NEXT: v_add_u32_e32 v0, vcc, v0, v2
14; VI-NEXT: ; return to shader part epilog
15;
16; GFX9-LABEL: add3:
17; GFX9: ; %bb.0:
18; GFX9-NEXT: v_add3_u32 v0, v0, v1, v2
19; GFX9-NEXT: ; return to shader part epilog
20 %x = add i32 %a, %b
21 %result = add i32 %x, %c
22 %bc = bitcast i32 %result to float
23 ret float %bc
24}
25
Changpeng Fang20fe3d22019-01-15 23:12:36 +000026; V_MAD_U32_U24 is given higher priority.
27define amdgpu_ps float @mad_no_add3(i32 %a, i32 %b, i32 %c, i32 %d, i32 %e) {
28; GFX9-LABEL: mad_no_add3:
29; GFX9: ; %bb.0:
30; GFX9-NEXT: v_mad_u32_u24 v0, v0, v1, v4
31; GFX9-NEXT: v_mad_u32_u24 v0, v2, v3, v0
32; GFX9-NEXT: ; return to shader part epilog
33 %a0 = shl i32 %a, 8
34 %a1 = lshr i32 %a0, 8
35 %b0 = shl i32 %b, 8
36 %b1 = lshr i32 %b0, 8
37 %mul1 = mul i32 %a1, %b1
38
39 %c0 = shl i32 %c, 8
40 %c1 = lshr i32 %c0, 8
41 %d0 = shl i32 %d, 8
42 %d1 = lshr i32 %d0, 8
43 %mul2 = mul i32 %c1, %d1
44
45 %add0 = add i32 %e, %mul1
46 %add1 = add i32 %mul2, %add0
47
48 %bc = bitcast i32 %add1 to float
49 ret float %bc
50}
51
Nicolai Haehnleca4a3292018-12-06 14:33:40 +000052; ThreeOp instruction variant not used due to Constant Bus Limitations
53; TODO: with reassociation it is possible to replace a v_add_u32_e32 with a s_add_i32
54define amdgpu_ps float @add3_vgpr_b(i32 inreg %a, i32 %b, i32 inreg %c) {
55; VI-LABEL: add3_vgpr_b:
56; VI: ; %bb.0:
57; VI-NEXT: v_add_u32_e32 v0, vcc, s2, v0
58; VI-NEXT: v_add_u32_e32 v0, vcc, s3, v0
59; VI-NEXT: ; return to shader part epilog
60;
61; GFX9-LABEL: add3_vgpr_b:
62; GFX9: ; %bb.0:
63; GFX9-NEXT: v_add_u32_e32 v0, s2, v0
64; GFX9-NEXT: v_add_u32_e32 v0, s3, v0
65; GFX9-NEXT: ; return to shader part epilog
66 %x = add i32 %a, %b
67 %result = add i32 %x, %c
68 %bc = bitcast i32 %result to float
69 ret float %bc
70}
71
72define amdgpu_ps float @add3_vgpr_all2(i32 %a, i32 %b, i32 %c) {
73; VI-LABEL: add3_vgpr_all2:
74; VI: ; %bb.0:
75; VI-NEXT: v_add_u32_e32 v1, vcc, v1, v2
76; VI-NEXT: v_add_u32_e32 v0, vcc, v0, v1
77; VI-NEXT: ; return to shader part epilog
78;
79; GFX9-LABEL: add3_vgpr_all2:
80; GFX9: ; %bb.0:
81; GFX9-NEXT: v_add3_u32 v0, v1, v2, v0
82; GFX9-NEXT: ; return to shader part epilog
83 %x = add i32 %b, %c
84 %result = add i32 %a, %x
85 %bc = bitcast i32 %result to float
86 ret float %bc
87}
88
89define amdgpu_ps float @add3_vgpr_bc(i32 inreg %a, i32 %b, i32 %c) {
90; VI-LABEL: add3_vgpr_bc:
91; VI: ; %bb.0:
92; VI-NEXT: v_add_u32_e32 v0, vcc, s2, v0
93; VI-NEXT: v_add_u32_e32 v0, vcc, v0, v1
94; VI-NEXT: ; return to shader part epilog
95;
96; GFX9-LABEL: add3_vgpr_bc:
97; GFX9: ; %bb.0:
98; GFX9-NEXT: v_add3_u32 v0, s2, v0, v1
99; GFX9-NEXT: ; return to shader part epilog
100 %x = add i32 %a, %b
101 %result = add i32 %x, %c
102 %bc = bitcast i32 %result to float
103 ret float %bc
104}
105
106define amdgpu_ps float @add3_vgpr_const(i32 %a, i32 %b) {
107; VI-LABEL: add3_vgpr_const:
108; VI: ; %bb.0:
109; VI-NEXT: v_add_u32_e32 v0, vcc, v0, v1
110; VI-NEXT: v_add_u32_e32 v0, vcc, 16, v0
111; VI-NEXT: ; return to shader part epilog
112;
113; GFX9-LABEL: add3_vgpr_const:
114; GFX9: ; %bb.0:
115; GFX9-NEXT: v_add3_u32 v0, v0, v1, 16
116; GFX9-NEXT: ; return to shader part epilog
117 %x = add i32 %a, %b
118 %result = add i32 %x, 16
119 %bc = bitcast i32 %result to float
120 ret float %bc
121}
122
123define amdgpu_ps <2 x float> @add3_multiuse_outer(i32 %a, i32 %b, i32 %c, i32 %x) {
124; VI-LABEL: add3_multiuse_outer:
125; VI: ; %bb.0:
126; VI-NEXT: v_add_u32_e32 v0, vcc, v0, v1
127; VI-NEXT: v_add_u32_e32 v0, vcc, v0, v2
128; VI-NEXT: v_mul_lo_i32 v1, v0, v3
129; VI-NEXT: ; return to shader part epilog
130;
131; GFX9-LABEL: add3_multiuse_outer:
132; GFX9: ; %bb.0:
133; GFX9-NEXT: v_add3_u32 v0, v0, v1, v2
134; GFX9-NEXT: v_mul_lo_i32 v1, v0, v3
135; GFX9-NEXT: ; return to shader part epilog
136 %inner = add i32 %a, %b
137 %outer = add i32 %inner, %c
138 %x1 = mul i32 %outer, %x
139 %r1 = insertelement <2 x i32> undef, i32 %outer, i32 0
140 %r0 = insertelement <2 x i32> %r1, i32 %x1, i32 1
141 %bc = bitcast <2 x i32> %r0 to <2 x float>
142 ret <2 x float> %bc
143}
144
145define amdgpu_ps <2 x float> @add3_multiuse_inner(i32 %a, i32 %b, i32 %c) {
146; VI-LABEL: add3_multiuse_inner:
147; VI: ; %bb.0:
148; VI-NEXT: v_add_u32_e32 v0, vcc, v0, v1
149; VI-NEXT: v_add_u32_e32 v1, vcc, v0, v2
150; VI-NEXT: ; return to shader part epilog
151;
152; GFX9-LABEL: add3_multiuse_inner:
153; GFX9: ; %bb.0:
154; GFX9-NEXT: v_add_u32_e32 v0, v0, v1
155; GFX9-NEXT: v_add_u32_e32 v1, v0, v2
156; GFX9-NEXT: ; return to shader part epilog
157 %inner = add i32 %a, %b
158 %outer = add i32 %inner, %c
159 %r1 = insertelement <2 x i32> undef, i32 %inner, i32 0
160 %r0 = insertelement <2 x i32> %r1, i32 %outer, i32 1
161 %bc = bitcast <2 x i32> %r0 to <2 x float>
162 ret <2 x float> %bc
163}
164
165; A case where uniform values end up in VGPRs -- we could use v_add3_u32 here,
166; but we don't.
167define amdgpu_ps float @add3_uniform_vgpr(float inreg %a, float inreg %b, float inreg %c) {
168; VI-LABEL: add3_uniform_vgpr:
169; VI: ; %bb.0:
170; VI-NEXT: v_mov_b32_e32 v2, 0x40400000
171; VI-NEXT: v_add_f32_e64 v0, s2, 1.0
172; VI-NEXT: v_add_f32_e64 v1, s3, 2.0
173; VI-NEXT: v_add_f32_e32 v2, s4, v2
174; VI-NEXT: v_add_u32_e32 v0, vcc, v1, v0
175; VI-NEXT: v_add_u32_e32 v0, vcc, v2, v0
176; VI-NEXT: ; return to shader part epilog
177;
178; GFX9-LABEL: add3_uniform_vgpr:
179; GFX9: ; %bb.0:
180; GFX9-NEXT: v_mov_b32_e32 v2, 0x40400000
181; GFX9-NEXT: v_add_f32_e64 v0, s2, 1.0
182; GFX9-NEXT: v_add_f32_e64 v1, s3, 2.0
183; GFX9-NEXT: v_add_f32_e32 v2, s4, v2
184; GFX9-NEXT: v_add_u32_e32 v0, v0, v1
185; GFX9-NEXT: v_add_u32_e32 v0, v0, v2
186; GFX9-NEXT: ; return to shader part epilog
187 %a1 = fadd float %a, 1.0
188 %b2 = fadd float %b, 2.0
189 %c3 = fadd float %c, 3.0
190 %bc.a = bitcast float %a1 to i32
191 %bc.b = bitcast float %b2 to i32
192 %bc.c = bitcast float %c3 to i32
193 %x = add i32 %bc.a, %bc.b
194 %result = add i32 %x, %bc.c
195 %bc = bitcast i32 %result to float
196 ret float %bc
197}