blob: 0ff5d9652c1047bada4f4b480d4884e52732e92c [file] [log] [blame]
Graham Sellersb2973792018-12-07 15:33:21 +00001; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SI -check-prefix=FUNC %s
2; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI -check-prefix=FUNC %s
Alexander Timofeev982aee62017-07-04 17:32:00 +00003; RUN: llc -amdgpu-scalarize-global-loads=false -march=r600 -mcpu=redwood < %s | FileCheck -check-prefix=R600 -check-prefix=FUNC %s
Matt Arsenault4de32442014-08-02 02:26:51 +00004
Tom Stellard79243d92014-10-01 17:15:17 +00005; FUNC-LABEL: {{^}}fneg_fabs_fadd_f32:
Tom Stellard326d6ec2014-11-05 14:50:53 +00006; SI-NOT: and
Matt Arsenault8c4a3522018-06-26 19:10:00 +00007; SI: v_sub_f32_e64 {{v[0-9]+}}, {{v[0-9]+}}, |{{s[0-9]+}}|
Matt Arsenault3dbeefa2017-03-21 21:39:51 +00008define amdgpu_kernel void @fneg_fabs_fadd_f32(float addrspace(1)* %out, float %x, float %y) {
Matt Arsenaultfabf5452014-08-15 18:42:22 +00009 %fabs = call float @llvm.fabs.f32(float %x)
10 %fsub = fsub float -0.000000e+00, %fabs
11 %fadd = fadd float %y, %fsub
12 store float %fadd, float addrspace(1)* %out, align 4
13 ret void
14}
15
Tom Stellard79243d92014-10-01 17:15:17 +000016; FUNC-LABEL: {{^}}fneg_fabs_fmul_f32:
Tom Stellard326d6ec2014-11-05 14:50:53 +000017; SI-NOT: and
Matt Arsenault8c4a3522018-06-26 19:10:00 +000018; SI: v_mul_f32_e64 {{v[0-9]+}}, {{v[0-9]+}}, -|{{s[0-9]+}}|
Tom Stellard326d6ec2014-11-05 14:50:53 +000019; SI-NOT: and
Matt Arsenault3dbeefa2017-03-21 21:39:51 +000020define amdgpu_kernel void @fneg_fabs_fmul_f32(float addrspace(1)* %out, float %x, float %y) {
Matt Arsenaultfabf5452014-08-15 18:42:22 +000021 %fabs = call float @llvm.fabs.f32(float %x)
22 %fsub = fsub float -0.000000e+00, %fabs
23 %fmul = fmul float %y, %fsub
24 store float %fmul, float addrspace(1)* %out, align 4
25 ret void
26}
Michel Danzer624b02a2014-02-04 07:12:38 +000027
28; DAGCombiner will transform:
29; (fabs (f32 bitcast (i32 a))) => (f32 bitcast (and (i32 a), 0x7FFFFFFF))
30; unless isFabsFree returns true
31
Tom Stellard79243d92014-10-01 17:15:17 +000032; FUNC-LABEL: {{^}}fneg_fabs_free_f32:
Matt Arsenault4de32442014-08-02 02:26:51 +000033; R600-NOT: AND
34; R600: |PV.{{[XYZW]}}|
35; R600: -PV
Michel Danzer624b02a2014-02-04 07:12:38 +000036
Marek Olsak74d084f2015-10-29 15:29:05 +000037; SI: s_or_b32 s{{[0-9]+}}, s{{[0-9]+}}, 0x80000000
Graham Sellersb2973792018-12-07 15:33:21 +000038; VI: s_bitset1_b32 s{{[0-9]+}}, 31
Matt Arsenault3dbeefa2017-03-21 21:39:51 +000039define amdgpu_kernel void @fneg_fabs_free_f32(float addrspace(1)* %out, i32 %in) {
Matt Arsenault4de32442014-08-02 02:26:51 +000040 %bc = bitcast i32 %in to float
41 %fabs = call float @llvm.fabs.f32(float %bc)
42 %fsub = fsub float -0.000000e+00, %fabs
43 store float %fsub, float addrspace(1)* %out
Michel Danzer624b02a2014-02-04 07:12:38 +000044 ret void
45}
46
Tom Stellard79243d92014-10-01 17:15:17 +000047; FUNC-LABEL: {{^}}fneg_fabs_fn_free_f32:
Matt Arsenault4de32442014-08-02 02:26:51 +000048; R600-NOT: AND
49; R600: |PV.{{[XYZW]}}|
50; R600: -PV
51
Marek Olsak74d084f2015-10-29 15:29:05 +000052; SI: s_or_b32 s{{[0-9]+}}, s{{[0-9]+}}, 0x80000000
Matt Arsenault3dbeefa2017-03-21 21:39:51 +000053define amdgpu_kernel void @fneg_fabs_fn_free_f32(float addrspace(1)* %out, i32 %in) {
Matt Arsenault4de32442014-08-02 02:26:51 +000054 %bc = bitcast i32 %in to float
55 %fabs = call float @fabs(float %bc)
56 %fsub = fsub float -0.000000e+00, %fabs
57 store float %fsub, float addrspace(1)* %out
Michel Danzer624b02a2014-02-04 07:12:38 +000058 ret void
59}
60
Tom Stellard79243d92014-10-01 17:15:17 +000061; FUNC-LABEL: {{^}}fneg_fabs_f32:
Marek Olsak74d084f2015-10-29 15:29:05 +000062; SI: s_or_b32 s{{[0-9]+}}, s{{[0-9]+}}, 0x80000000
Matt Arsenault3dbeefa2017-03-21 21:39:51 +000063define amdgpu_kernel void @fneg_fabs_f32(float addrspace(1)* %out, float %in) {
Matt Arsenaultfabf5452014-08-15 18:42:22 +000064 %fabs = call float @llvm.fabs.f32(float %in)
65 %fsub = fsub float -0.000000e+00, %fabs
66 store float %fsub, float addrspace(1)* %out, align 4
67 ret void
68}
69
Tom Stellard79243d92014-10-01 17:15:17 +000070; FUNC-LABEL: {{^}}v_fneg_fabs_f32:
Tom Stellard326d6ec2014-11-05 14:50:53 +000071; SI: v_or_b32_e32 v{{[0-9]+}}, 0x80000000, v{{[0-9]+}}
Matt Arsenault3dbeefa2017-03-21 21:39:51 +000072define amdgpu_kernel void @v_fneg_fabs_f32(float addrspace(1)* %out, float addrspace(1)* %in) {
David Blaikiea79ac142015-02-27 21:17:42 +000073 %val = load float, float addrspace(1)* %in, align 4
Matt Arsenaultfabf5452014-08-15 18:42:22 +000074 %fabs = call float @llvm.fabs.f32(float %val)
75 %fsub = fsub float -0.000000e+00, %fabs
76 store float %fsub, float addrspace(1)* %out, align 4
77 ret void
78}
79
Tom Stellard79243d92014-10-01 17:15:17 +000080; FUNC-LABEL: {{^}}fneg_fabs_v2f32:
Matt Arsenault4de32442014-08-02 02:26:51 +000081; R600: |{{(PV|T[0-9])\.[XYZW]}}|
82; R600: -PV
83; R600: |{{(PV|T[0-9])\.[XYZW]}}|
84; R600: -PV
85
Matt Arsenault124384f2016-09-09 23:32:53 +000086; FIXME: In this case two uses of the constant should be folded
Matt Arsenault663ab8c2016-11-01 23:14:20 +000087; SI: s_brev_b32 [[SIGNBITK:s[0-9]+]], 1{{$}}
Matt Arsenault124384f2016-09-09 23:32:53 +000088; SI: v_or_b32_e32 v{{[0-9]+}}, [[SIGNBITK]], v{{[0-9]+}}
89; SI: v_or_b32_e32 v{{[0-9]+}}, [[SIGNBITK]], v{{[0-9]+}}
Matt Arsenault3dbeefa2017-03-21 21:39:51 +000090define amdgpu_kernel void @fneg_fabs_v2f32(<2 x float> addrspace(1)* %out, <2 x float> %in) {
Matt Arsenault4de32442014-08-02 02:26:51 +000091 %fabs = call <2 x float> @llvm.fabs.v2f32(<2 x float> %in)
92 %fsub = fsub <2 x float> <float -0.000000e+00, float -0.000000e+00>, %fabs
93 store <2 x float> %fsub, <2 x float> addrspace(1)* %out
Michel Danzer624b02a2014-02-04 07:12:38 +000094 ret void
95}
96
Tom Stellard79243d92014-10-01 17:15:17 +000097; FUNC-LABEL: {{^}}fneg_fabs_v4f32:
Matt Arsenault663ab8c2016-11-01 23:14:20 +000098; SI: s_brev_b32 [[SIGNBITK:s[0-9]+]], 1{{$}}
Matt Arsenault124384f2016-09-09 23:32:53 +000099; SI: v_or_b32_e32 v{{[0-9]+}}, [[SIGNBITK]], v{{[0-9]+}}
100; SI: v_or_b32_e32 v{{[0-9]+}}, [[SIGNBITK]], v{{[0-9]+}}
101; SI: v_or_b32_e32 v{{[0-9]+}}, [[SIGNBITK]], v{{[0-9]+}}
102; SI: v_or_b32_e32 v{{[0-9]+}}, [[SIGNBITK]], v{{[0-9]+}}
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000103define amdgpu_kernel void @fneg_fabs_v4f32(<4 x float> addrspace(1)* %out, <4 x float> %in) {
Matt Arsenault4de32442014-08-02 02:26:51 +0000104 %fabs = call <4 x float> @llvm.fabs.v4f32(<4 x float> %in)
105 %fsub = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %fabs
106 store <4 x float> %fsub, <4 x float> addrspace(1)* %out
107 ret void
108}
109
110declare float @fabs(float) readnone
111declare float @llvm.fabs.f32(float) readnone
112declare <2 x float> @llvm.fabs.v2f32(<2 x float>) readnone
113declare <4 x float> @llvm.fabs.v4f32(<4 x float>) readnone