blob: f38c1f8aa6edb8de2b328434d7ec8950d40fe56b [file] [log] [blame]
Matt Arsenault9c47dd52016-02-11 06:02:01 +00001; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
Matt Arsenaultd282ada2014-10-17 18:00:48 +00002
Matt Arsenault9c47dd52016-02-11 06:02:01 +00003declare i32 @llvm.amdgcn.workitem.id.x() #1
Matt Arsenaultd282ada2014-10-17 18:00:48 +00004declare float @llvm.fabs.f32(float) #1
Matt Arsenault581a7a62014-11-13 19:26:50 +00005declare float @llvm.fma.f32(float, float, float) nounwind readnone
Matt Arsenaultd282ada2014-10-17 18:00:48 +00006
7; FUNC-LABEL: @commute_add_imm_fabs_f32
Tom Stellard326d6ec2014-11-05 14:50:53 +00008; SI: buffer_load_dword [[X:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
Matt Arsenaultbbb47da2016-09-08 17:19:29 +00009; SI: v_add_f32_e64 [[REG:v[0-9]+]], |[[X]]|, 2.0
Tom Stellard0bc954e2016-03-30 16:35:09 +000010; SI: buffer_store_dword [[REG]]
Matt Arsenault3dbeefa2017-03-21 21:39:51 +000011define amdgpu_kernel void @commute_add_imm_fabs_f32(float addrspace(1)* %out, float addrspace(1)* %in) #0 {
Matt Arsenault9c47dd52016-02-11 06:02:01 +000012 %tid = call i32 @llvm.amdgcn.workitem.id.x() #1
David Blaikie79e6c742015-02-27 19:29:02 +000013 %gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid
David Blaikiea79ac142015-02-27 21:17:42 +000014 %x = load float, float addrspace(1)* %gep.0
Matt Arsenaultd282ada2014-10-17 18:00:48 +000015 %x.fabs = call float @llvm.fabs.f32(float %x) #1
16 %z = fadd float 2.0, %x.fabs
17 store float %z, float addrspace(1)* %out
18 ret void
19}
20
21; FUNC-LABEL: @commute_mul_imm_fneg_fabs_f32
Tom Stellard326d6ec2014-11-05 14:50:53 +000022; SI: buffer_load_dword [[X:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
Matt Arsenaultbbb47da2016-09-08 17:19:29 +000023; SI: v_mul_f32_e64 [[REG:v[0-9]+]], |[[X]]|, -4.0
Tom Stellard0bc954e2016-03-30 16:35:09 +000024; SI: buffer_store_dword [[REG]]
Matt Arsenault3dbeefa2017-03-21 21:39:51 +000025define amdgpu_kernel void @commute_mul_imm_fneg_fabs_f32(float addrspace(1)* %out, float addrspace(1)* %in) #0 {
Matt Arsenault9c47dd52016-02-11 06:02:01 +000026 %tid = call i32 @llvm.amdgcn.workitem.id.x() #1
David Blaikie79e6c742015-02-27 19:29:02 +000027 %gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid
David Blaikiea79ac142015-02-27 21:17:42 +000028 %x = load float, float addrspace(1)* %gep.0
Matt Arsenaultd282ada2014-10-17 18:00:48 +000029 %x.fabs = call float @llvm.fabs.f32(float %x) #1
30 %x.fneg.fabs = fsub float -0.000000e+00, %x.fabs
31 %z = fmul float 4.0, %x.fneg.fabs
32 store float %z, float addrspace(1)* %out
33 ret void
34}
35
36; FUNC-LABEL: @commute_mul_imm_fneg_f32
Tom Stellard326d6ec2014-11-05 14:50:53 +000037; SI: buffer_load_dword [[X:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
38; SI: v_mul_f32_e32 [[REG:v[0-9]+]], -4.0, [[X]]
Tom Stellard0bc954e2016-03-30 16:35:09 +000039; SI: buffer_store_dword [[REG]]
Matt Arsenault3dbeefa2017-03-21 21:39:51 +000040define amdgpu_kernel void @commute_mul_imm_fneg_f32(float addrspace(1)* %out, float addrspace(1)* %in) #0 {
Matt Arsenault9c47dd52016-02-11 06:02:01 +000041 %tid = call i32 @llvm.amdgcn.workitem.id.x() #1
David Blaikie79e6c742015-02-27 19:29:02 +000042 %gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid
David Blaikiea79ac142015-02-27 21:17:42 +000043 %x = load float, float addrspace(1)* %gep.0
Matt Arsenaultd282ada2014-10-17 18:00:48 +000044 %x.fneg = fsub float -0.000000e+00, %x
45 %z = fmul float 4.0, %x.fneg
46 store float %z, float addrspace(1)* %out
47 ret void
48}
49
Matt Arsenault95e48662014-11-13 19:26:47 +000050; FIXME: Should use SGPR for literal.
51; FUNC-LABEL: @commute_add_lit_fabs_f32
52; SI: buffer_load_dword [[X:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
53; SI: v_mov_b32_e32 [[K:v[0-9]+]], 0x44800000
Stanislav Mekhanoshin56ea4882017-05-30 16:49:24 +000054; SI: v_add_f32_e64 [[REG:v[0-9]+]], |[[X]]|, [[K]]
Tom Stellard0bc954e2016-03-30 16:35:09 +000055; SI: buffer_store_dword [[REG]]
Matt Arsenault3dbeefa2017-03-21 21:39:51 +000056define amdgpu_kernel void @commute_add_lit_fabs_f32(float addrspace(1)* %out, float addrspace(1)* %in) #0 {
Matt Arsenault9c47dd52016-02-11 06:02:01 +000057 %tid = call i32 @llvm.amdgcn.workitem.id.x() #1
David Blaikie79e6c742015-02-27 19:29:02 +000058 %gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid
David Blaikiea79ac142015-02-27 21:17:42 +000059 %x = load float, float addrspace(1)* %gep.0
Matt Arsenault95e48662014-11-13 19:26:47 +000060 %x.fabs = call float @llvm.fabs.f32(float %x) #1
61 %z = fadd float 1024.0, %x.fabs
62 store float %z, float addrspace(1)* %out
63 ret void
64}
65
66; FUNC-LABEL: @commute_add_fabs_f32
67; SI-DAG: buffer_load_dword [[X:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
Matt Arsenaultfb13b222014-12-03 03:12:13 +000068; SI-DAG: buffer_load_dword [[Y:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4
Matt Arsenault95e48662014-11-13 19:26:47 +000069; SI: v_add_f32_e64 [[REG:v[0-9]+]], [[X]], |[[Y]]|
Tom Stellard0bc954e2016-03-30 16:35:09 +000070; SI: buffer_store_dword [[REG]]
Matt Arsenault3dbeefa2017-03-21 21:39:51 +000071define amdgpu_kernel void @commute_add_fabs_f32(float addrspace(1)* %out, float addrspace(1)* %in) #0 {
Matt Arsenault9c47dd52016-02-11 06:02:01 +000072 %tid = call i32 @llvm.amdgcn.workitem.id.x() #1
David Blaikie79e6c742015-02-27 19:29:02 +000073 %gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid
74 %gep.1 = getelementptr float, float addrspace(1)* %gep.0, i32 1
Matt Arsenault44e54832016-04-12 13:38:18 +000075 %x = load volatile float, float addrspace(1)* %gep.0
76 %y = load volatile float, float addrspace(1)* %gep.1
Matt Arsenault95e48662014-11-13 19:26:47 +000077 %y.fabs = call float @llvm.fabs.f32(float %y) #1
78 %z = fadd float %x, %y.fabs
79 store float %z, float addrspace(1)* %out
80 ret void
81}
82
83; FUNC-LABEL: @commute_mul_fneg_f32
84; SI-DAG: buffer_load_dword [[X:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
Matt Arsenaultfb13b222014-12-03 03:12:13 +000085; SI-DAG: buffer_load_dword [[Y:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4
Matt Arsenault95e48662014-11-13 19:26:47 +000086; SI: v_mul_f32_e64 [[REG:v[0-9]+]], [[X]], -[[Y]]
Tom Stellard0bc954e2016-03-30 16:35:09 +000087; SI: buffer_store_dword [[REG]]
Matt Arsenault3dbeefa2017-03-21 21:39:51 +000088define amdgpu_kernel void @commute_mul_fneg_f32(float addrspace(1)* %out, float addrspace(1)* %in) #0 {
Matt Arsenault9c47dd52016-02-11 06:02:01 +000089 %tid = call i32 @llvm.amdgcn.workitem.id.x() #1
David Blaikie79e6c742015-02-27 19:29:02 +000090 %gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid
91 %gep.1 = getelementptr float, float addrspace(1)* %gep.0, i32 1
Matt Arsenault44e54832016-04-12 13:38:18 +000092 %x = load volatile float, float addrspace(1)* %gep.0
93 %y = load volatile float, float addrspace(1)* %gep.1
Matt Arsenault95e48662014-11-13 19:26:47 +000094 %y.fneg = fsub float -0.000000e+00, %y
95 %z = fmul float %x, %y.fneg
96 store float %z, float addrspace(1)* %out
97 ret void
98}
99
100; FUNC-LABEL: @commute_mul_fabs_fneg_f32
101; SI-DAG: buffer_load_dword [[X:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
Matt Arsenaultfb13b222014-12-03 03:12:13 +0000102; SI-DAG: buffer_load_dword [[Y:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4
Matt Arsenault95e48662014-11-13 19:26:47 +0000103; SI: v_mul_f32_e64 [[REG:v[0-9]+]], [[X]], -|[[Y]]|
Tom Stellard0bc954e2016-03-30 16:35:09 +0000104; SI: buffer_store_dword [[REG]]
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000105define amdgpu_kernel void @commute_mul_fabs_fneg_f32(float addrspace(1)* %out, float addrspace(1)* %in) #0 {
Matt Arsenault9c47dd52016-02-11 06:02:01 +0000106 %tid = call i32 @llvm.amdgcn.workitem.id.x() #1
David Blaikie79e6c742015-02-27 19:29:02 +0000107 %gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid
108 %gep.1 = getelementptr float, float addrspace(1)* %gep.0, i32 1
Matt Arsenault44e54832016-04-12 13:38:18 +0000109 %x = load volatile float, float addrspace(1)* %gep.0
110 %y = load volatile float, float addrspace(1)* %gep.1
Matt Arsenault95e48662014-11-13 19:26:47 +0000111 %y.fabs = call float @llvm.fabs.f32(float %y) #1
112 %y.fabs.fneg = fsub float -0.000000e+00, %y.fabs
113 %z = fmul float %x, %y.fabs.fneg
114 store float %z, float addrspace(1)* %out
115 ret void
116}
117
118; There's no reason to commute this.
119; FUNC-LABEL: @commute_mul_fabs_x_fabs_y_f32
120; SI-DAG: buffer_load_dword [[X:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
Matt Arsenaultfb13b222014-12-03 03:12:13 +0000121; SI-DAG: buffer_load_dword [[Y:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4
Matt Arsenault95e48662014-11-13 19:26:47 +0000122; SI: v_mul_f32_e64 [[REG:v[0-9]+]], |[[X]]|, |[[Y]]|
Tom Stellard0bc954e2016-03-30 16:35:09 +0000123; SI: buffer_store_dword [[REG]]
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000124define amdgpu_kernel void @commute_mul_fabs_x_fabs_y_f32(float addrspace(1)* %out, float addrspace(1)* %in) #0 {
Matt Arsenault9c47dd52016-02-11 06:02:01 +0000125 %tid = call i32 @llvm.amdgcn.workitem.id.x() #1
David Blaikie79e6c742015-02-27 19:29:02 +0000126 %gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid
127 %gep.1 = getelementptr float, float addrspace(1)* %gep.0, i32 1
Matt Arsenault44e54832016-04-12 13:38:18 +0000128 %x = load volatile float, float addrspace(1)* %gep.0
129 %y = load volatile float, float addrspace(1)* %gep.1
Matt Arsenault95e48662014-11-13 19:26:47 +0000130 %x.fabs = call float @llvm.fabs.f32(float %x) #1
131 %y.fabs = call float @llvm.fabs.f32(float %y) #1
132 %z = fmul float %x.fabs, %y.fabs
133 store float %z, float addrspace(1)* %out
134 ret void
135}
136
137; FUNC-LABEL: @commute_mul_fabs_x_fneg_fabs_y_f32
138; SI-DAG: buffer_load_dword [[X:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
Matt Arsenaultfb13b222014-12-03 03:12:13 +0000139; SI-DAG: buffer_load_dword [[Y:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4
Matt Arsenault95e48662014-11-13 19:26:47 +0000140; SI: v_mul_f32_e64 [[REG:v[0-9]+]], |[[X]]|, -|[[Y]]|
Tom Stellard0bc954e2016-03-30 16:35:09 +0000141; SI: buffer_store_dword [[REG]]
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000142define amdgpu_kernel void @commute_mul_fabs_x_fneg_fabs_y_f32(float addrspace(1)* %out, float addrspace(1)* %in) #0 {
Matt Arsenault9c47dd52016-02-11 06:02:01 +0000143 %tid = call i32 @llvm.amdgcn.workitem.id.x() #1
David Blaikie79e6c742015-02-27 19:29:02 +0000144 %gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid
145 %gep.1 = getelementptr float, float addrspace(1)* %gep.0, i32 1
Matt Arsenault44e54832016-04-12 13:38:18 +0000146 %x = load volatile float, float addrspace(1)* %gep.0
147 %y = load volatile float, float addrspace(1)* %gep.1
Matt Arsenault95e48662014-11-13 19:26:47 +0000148 %x.fabs = call float @llvm.fabs.f32(float %x) #1
149 %y.fabs = call float @llvm.fabs.f32(float %y) #1
150 %y.fabs.fneg = fsub float -0.000000e+00, %y.fabs
151 %z = fmul float %x.fabs, %y.fabs.fneg
152 store float %z, float addrspace(1)* %out
153 ret void
154}
155
Matt Arsenault581a7a62014-11-13 19:26:50 +0000156; Make sure we commute the multiply part for the constant in src0 even
157; though we have negate modifier on src2.
158
159; SI-LABEL: {{^}}fma_a_2.0_neg_b_f32
160; SI-DAG: buffer_load_dword [[R1:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
Matt Arsenaultfb13b222014-12-03 03:12:13 +0000161; SI-DAG: buffer_load_dword [[R2:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4
Matt Arsenault3d1c1de2016-04-14 21:58:24 +0000162; SI: v_fma_f32 [[RESULT:v[0-9]+]], [[R1]], 2.0, |[[R2]]|
Matt Arsenault581a7a62014-11-13 19:26:50 +0000163; SI: buffer_store_dword [[RESULT]]
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000164define amdgpu_kernel void @fma_a_2.0_neg_b_f32(float addrspace(1)* %out, float addrspace(1)* %in) {
Matt Arsenault9c47dd52016-02-11 06:02:01 +0000165 %tid = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
David Blaikie79e6c742015-02-27 19:29:02 +0000166 %gep.0 = getelementptr float, float addrspace(1)* %out, i32 %tid
167 %gep.1 = getelementptr float, float addrspace(1)* %gep.0, i32 1
168 %gep.out = getelementptr float, float addrspace(1)* %out, i32 %tid
Matt Arsenault581a7a62014-11-13 19:26:50 +0000169
Matt Arsenault44e54832016-04-12 13:38:18 +0000170 %r1 = load volatile float, float addrspace(1)* %gep.0
171 %r2 = load volatile float, float addrspace(1)* %gep.1
Matt Arsenault581a7a62014-11-13 19:26:50 +0000172
173 %r2.fabs = call float @llvm.fabs.f32(float %r2)
174
175 %r3 = tail call float @llvm.fma.f32(float %r1, float 2.0, float %r2.fabs)
176 store float %r3, float addrspace(1)* %gep.out
177 ret void
178}
179
Matt Arsenaultd282ada2014-10-17 18:00:48 +0000180attributes #0 = { nounwind }
181attributes #1 = { nounwind readnone }