blob: 44c80b63bf7c3400296fff1bf2d47905f1762622 [file] [log] [blame]
Matt Arsenaultfcb345f2016-02-11 06:15:39 +00001; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=SI-SAFE -check-prefix=FUNC %s
2; RUN: llc -enable-no-nans-fp-math -enable-unsafe-fp-math -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=SI-NONAN -check-prefix=SI -check-prefix=FUNC %s
Matt Arsenaultda59f3d2014-11-13 23:03:09 +00003; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s
4
Matt Arsenaulta982e4f2015-01-13 00:43:00 +00005; FIXME: Should replace unsafe-fp-math with no signed zeros.
6
Matt Arsenault36094d72014-11-15 05:02:57 +00007declare i32 @llvm.r600.read.tidig.x() #1
8
Matt Arsenaultda59f3d2014-11-13 23:03:09 +00009; FUNC-LABEL: @test_fmax_legacy_uge_f32
Matt Arsenault36094d72014-11-15 05:02:57 +000010; SI: buffer_load_dword [[A:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
Matt Arsenaultfb13b222014-12-03 03:12:13 +000011; SI: buffer_load_dword [[B:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4
Matt Arsenaulta982e4f2015-01-13 00:43:00 +000012; SI-SAFE: v_max_legacy_f32_e32 {{v[0-9]+}}, [[B]], [[A]]
Matt Arsenault6c29c5a2017-07-10 19:53:57 +000013; SI-NONAN: v_max_f32_e32 {{v[0-9]+}}, [[A]], [[B]]
Matt Arsenaulta982e4f2015-01-13 00:43:00 +000014
Matt Arsenaultda59f3d2014-11-13 23:03:09 +000015; EG: MAX
Matt Arsenault3dbeefa2017-03-21 21:39:51 +000016define amdgpu_kernel void @test_fmax_legacy_uge_f32(float addrspace(1)* %out, float addrspace(1)* %in) #0 {
Matt Arsenault36094d72014-11-15 05:02:57 +000017 %tid = call i32 @llvm.r600.read.tidig.x() #1
David Blaikie79e6c742015-02-27 19:29:02 +000018 %gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid
19 %gep.1 = getelementptr float, float addrspace(1)* %gep.0, i32 1
Matt Arsenault36094d72014-11-15 05:02:57 +000020
Matt Arsenault44e54832016-04-12 13:38:18 +000021 %a = load volatile float, float addrspace(1)* %gep.0, align 4
22 %b = load volatile float, float addrspace(1)* %gep.1, align 4
Matt Arsenault36094d72014-11-15 05:02:57 +000023
Matt Arsenaultda59f3d2014-11-13 23:03:09 +000024 %cmp = fcmp uge float %a, %b
25 %val = select i1 %cmp, float %a, float %b
26 store float %val, float addrspace(1)* %out, align 4
27 ret void
28}
29
30; FUNC-LABEL: @test_fmax_legacy_oge_f32
Matt Arsenault36094d72014-11-15 05:02:57 +000031; SI: buffer_load_dword [[A:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
Matt Arsenaultfb13b222014-12-03 03:12:13 +000032; SI: buffer_load_dword [[B:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4
Matt Arsenaulta982e4f2015-01-13 00:43:00 +000033; SI-SAFE: v_max_legacy_f32_e32 {{v[0-9]+}}, [[A]], [[B]]
Matt Arsenault6c29c5a2017-07-10 19:53:57 +000034; SI-NONAN: v_max_f32_e32 {{v[0-9]+}}, [[A]], [[B]]
Matt Arsenaultda59f3d2014-11-13 23:03:09 +000035; EG: MAX
Matt Arsenault3dbeefa2017-03-21 21:39:51 +000036define amdgpu_kernel void @test_fmax_legacy_oge_f32(float addrspace(1)* %out, float addrspace(1)* %in) #0 {
Matt Arsenault36094d72014-11-15 05:02:57 +000037 %tid = call i32 @llvm.r600.read.tidig.x() #1
David Blaikie79e6c742015-02-27 19:29:02 +000038 %gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid
39 %gep.1 = getelementptr float, float addrspace(1)* %gep.0, i32 1
Matt Arsenault36094d72014-11-15 05:02:57 +000040
Matt Arsenault44e54832016-04-12 13:38:18 +000041 %a = load volatile float, float addrspace(1)* %gep.0, align 4
42 %b = load volatile float, float addrspace(1)* %gep.1, align 4
Matt Arsenault36094d72014-11-15 05:02:57 +000043
Matt Arsenaultda59f3d2014-11-13 23:03:09 +000044 %cmp = fcmp oge float %a, %b
45 %val = select i1 %cmp, float %a, float %b
46 store float %val, float addrspace(1)* %out, align 4
47 ret void
48}
49
50; FUNC-LABEL: @test_fmax_legacy_ugt_f32
Matt Arsenault36094d72014-11-15 05:02:57 +000051; SI: buffer_load_dword [[A:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
Matt Arsenaultfb13b222014-12-03 03:12:13 +000052; SI: buffer_load_dword [[B:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4
Matt Arsenaulta982e4f2015-01-13 00:43:00 +000053; SI-SAFE: v_max_legacy_f32_e32 {{v[0-9]+}}, [[B]], [[A]]
Matt Arsenault6c29c5a2017-07-10 19:53:57 +000054; SI-NONAN: v_max_f32_e32 {{v[0-9]+}}, [[A]], [[B]]
Matt Arsenaultda59f3d2014-11-13 23:03:09 +000055; EG: MAX
Matt Arsenault3dbeefa2017-03-21 21:39:51 +000056define amdgpu_kernel void @test_fmax_legacy_ugt_f32(float addrspace(1)* %out, float addrspace(1)* %in) #0 {
Matt Arsenault36094d72014-11-15 05:02:57 +000057 %tid = call i32 @llvm.r600.read.tidig.x() #1
David Blaikie79e6c742015-02-27 19:29:02 +000058 %gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid
59 %gep.1 = getelementptr float, float addrspace(1)* %gep.0, i32 1
Matt Arsenault36094d72014-11-15 05:02:57 +000060
Matt Arsenault44e54832016-04-12 13:38:18 +000061 %a = load volatile float, float addrspace(1)* %gep.0, align 4
62 %b = load volatile float, float addrspace(1)* %gep.1, align 4
Matt Arsenault36094d72014-11-15 05:02:57 +000063
Matt Arsenaultda59f3d2014-11-13 23:03:09 +000064 %cmp = fcmp ugt float %a, %b
65 %val = select i1 %cmp, float %a, float %b
66 store float %val, float addrspace(1)* %out, align 4
67 ret void
68}
69
70; FUNC-LABEL: @test_fmax_legacy_ogt_f32
Matt Arsenault36094d72014-11-15 05:02:57 +000071; SI: buffer_load_dword [[A:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
Matt Arsenaultfb13b222014-12-03 03:12:13 +000072; SI: buffer_load_dword [[B:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4
Matt Arsenaulta982e4f2015-01-13 00:43:00 +000073; SI-SAFE: v_max_legacy_f32_e32 {{v[0-9]+}}, [[A]], [[B]]
Matt Arsenault6c29c5a2017-07-10 19:53:57 +000074; SI-NONAN: v_max_f32_e32 {{v[0-9]+}}, [[A]], [[B]]
Matt Arsenaultda59f3d2014-11-13 23:03:09 +000075; EG: MAX
Matt Arsenault3dbeefa2017-03-21 21:39:51 +000076define amdgpu_kernel void @test_fmax_legacy_ogt_f32(float addrspace(1)* %out, float addrspace(1)* %in) #0 {
Matt Arsenault36094d72014-11-15 05:02:57 +000077 %tid = call i32 @llvm.r600.read.tidig.x() #1
David Blaikie79e6c742015-02-27 19:29:02 +000078 %gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid
79 %gep.1 = getelementptr float, float addrspace(1)* %gep.0, i32 1
Matt Arsenault36094d72014-11-15 05:02:57 +000080
Matt Arsenault44e54832016-04-12 13:38:18 +000081 %a = load volatile float, float addrspace(1)* %gep.0, align 4
82 %b = load volatile float, float addrspace(1)* %gep.1, align 4
Matt Arsenault36094d72014-11-15 05:02:57 +000083
Matt Arsenaultda59f3d2014-11-13 23:03:09 +000084 %cmp = fcmp ogt float %a, %b
85 %val = select i1 %cmp, float %a, float %b
86 store float %val, float addrspace(1)* %out, align 4
87 ret void
88}
Matt Arsenault36094d72014-11-15 05:02:57 +000089
Matt Arsenaultfabab4b2015-12-11 23:16:47 +000090; FUNC-LABEL: {{^}}test_fmax_legacy_ogt_v1f32:
91; SI: buffer_load_dword [[A:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
92; SI: buffer_load_dword [[B:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4
93; SI-SAFE: v_max_legacy_f32_e32 {{v[0-9]+}}, [[A]], [[B]]
Matt Arsenault6c29c5a2017-07-10 19:53:57 +000094; SI-NONAN: v_max_f32_e32 {{v[0-9]+}}, [[A]], [[B]]
Matt Arsenaultfabab4b2015-12-11 23:16:47 +000095; EG: MAX
Matt Arsenault3dbeefa2017-03-21 21:39:51 +000096define amdgpu_kernel void @test_fmax_legacy_ogt_v1f32(<1 x float> addrspace(1)* %out, <1 x float> addrspace(1)* %in) #0 {
Matt Arsenaultfabab4b2015-12-11 23:16:47 +000097 %tid = call i32 @llvm.r600.read.tidig.x() #1
98 %gep.0 = getelementptr <1 x float>, <1 x float> addrspace(1)* %in, i32 %tid
99 %gep.1 = getelementptr <1 x float>, <1 x float> addrspace(1)* %gep.0, i32 1
100
101 %a = load <1 x float>, <1 x float> addrspace(1)* %gep.0
102 %b = load <1 x float>, <1 x float> addrspace(1)* %gep.1
103
104 %cmp = fcmp ogt <1 x float> %a, %b
105 %val = select <1 x i1> %cmp, <1 x float> %a, <1 x float> %b
106 store <1 x float> %val, <1 x float> addrspace(1)* %out
107 ret void
108}
109
110; FUNC-LABEL: {{^}}test_fmax_legacy_ogt_v3f32:
111; SI-SAFE: v_max_legacy_f32_e32
112; SI-SAFE: v_max_legacy_f32_e32
113; SI-SAFE: v_max_legacy_f32_e32
114; SI-NONAN: v_max_f32_e32
115; SI-NONAN: v_max_f32_e32
116; SI-NONAN: v_max_f32_e32
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000117define amdgpu_kernel void @test_fmax_legacy_ogt_v3f32(<3 x float> addrspace(1)* %out, <3 x float> addrspace(1)* %in) #0 {
Matt Arsenaultfabab4b2015-12-11 23:16:47 +0000118 %tid = call i32 @llvm.r600.read.tidig.x() #1
119 %gep.0 = getelementptr <3 x float>, <3 x float> addrspace(1)* %in, i32 %tid
120 %gep.1 = getelementptr <3 x float>, <3 x float> addrspace(1)* %gep.0, i32 1
121
122 %a = load <3 x float>, <3 x float> addrspace(1)* %gep.0
123 %b = load <3 x float>, <3 x float> addrspace(1)* %gep.1
124
125 %cmp = fcmp ogt <3 x float> %a, %b
126 %val = select <3 x i1> %cmp, <3 x float> %a, <3 x float> %b
127 store <3 x float> %val, <3 x float> addrspace(1)* %out
128 ret void
129}
Matt Arsenaultdc103072014-12-19 23:15:30 +0000130
131; FUNC-LABEL: @test_fmax_legacy_ogt_f32_multi_use
132; SI: buffer_load_dword [[A:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
133; SI: buffer_load_dword [[B:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4
134; SI-NOT: v_max_
135; SI: v_cmp_gt_f32
136; SI-NEXT: v_cndmask_b32
137; SI-NOT: v_max_
138
139; EG: MAX
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000140define amdgpu_kernel void @test_fmax_legacy_ogt_f32_multi_use(float addrspace(1)* %out0, i1 addrspace(1)* %out1, float addrspace(1)* %in) #0 {
Matt Arsenaultdc103072014-12-19 23:15:30 +0000141 %tid = call i32 @llvm.r600.read.tidig.x() #1
David Blaikie79e6c742015-02-27 19:29:02 +0000142 %gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid
143 %gep.1 = getelementptr float, float addrspace(1)* %gep.0, i32 1
Matt Arsenaultdc103072014-12-19 23:15:30 +0000144
Matt Arsenault44e54832016-04-12 13:38:18 +0000145 %a = load volatile float, float addrspace(1)* %gep.0, align 4
146 %b = load volatile float, float addrspace(1)* %gep.1, align 4
Matt Arsenaultdc103072014-12-19 23:15:30 +0000147
148 %cmp = fcmp ogt float %a, %b
149 %val = select i1 %cmp, float %a, float %b
150 store float %val, float addrspace(1)* %out0, align 4
151 store i1 %cmp, i1addrspace(1)* %out1
152 ret void
153}
154
Matt Arsenault36094d72014-11-15 05:02:57 +0000155attributes #0 = { nounwind }
156attributes #1 = { nounwind readnone }