blob: ceffd05421520bcfddfef91216953ceb2ba9c407 [file] [log] [blame]
Marek Olsakfa6607d2015-02-11 14:26:46 +00001; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SI %s
Matt Arsenault1bc9d952015-02-14 04:22:00 +00002; XUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s
Matt Arsenaulta0050b02014-06-19 01:19:19 +00003
Matt Arsenault1bc9d952015-02-14 04:22:00 +00004; FIXME: Enable for VI.
5
6declare i32 @llvm.r600.read.tidig.x() nounwind readnone
7declare void @llvm.AMDGPU.barrier.global() nounwind noduplicate
Matt Arsenault75c658e2014-10-21 22:20:55 +00008declare float @llvm.AMDGPU.div.fmas.f32(float, float, float, i1) nounwind readnone
9declare double @llvm.AMDGPU.div.fmas.f64(double, double, double, i1) nounwind readnone
Matt Arsenaulta0050b02014-06-19 01:19:19 +000010
Marek Olsakfa6607d2015-02-11 14:26:46 +000011; GCN-LABEL: {{^}}test_div_fmas_f32:
Tom Stellard326d6ec2014-11-05 14:50:53 +000012; SI-DAG: s_load_dword [[SA:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xb
13; SI-DAG: s_load_dword [[SC:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xd
14; SI-DAG: s_load_dword [[SB:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xc
Marek Olsakfa6607d2015-02-11 14:26:46 +000015; VI-DAG: s_load_dword [[SA:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0x2c
16; VI-DAG: s_load_dword [[SC:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0x34
17; VI-DAG: s_load_dword [[SB:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0x30
18; GCN-DAG: v_mov_b32_e32 [[VC:v[0-9]+]], [[SC]]
19; GCN-DAG: v_mov_b32_e32 [[VB:v[0-9]+]], [[SB]]
Matt Arsenault1bc9d952015-02-14 04:22:00 +000020; GCN-DAG: v_mov_b32_e32 [[VA:v[0-9]+]], [[SA]]
21; GCN: v_div_fmas_f32 [[RESULT:v[0-9]+]], [[VA]], [[VB]], [[VC]]
Marek Olsakfa6607d2015-02-11 14:26:46 +000022; GCN: buffer_store_dword [[RESULT]],
23; GCN: s_endpgm
Matt Arsenault75c658e2014-10-21 22:20:55 +000024define void @test_div_fmas_f32(float addrspace(1)* %out, float %a, float %b, float %c, i1 %d) nounwind {
25 %result = call float @llvm.AMDGPU.div.fmas.f32(float %a, float %b, float %c, i1 %d) nounwind readnone
Matt Arsenaulta0050b02014-06-19 01:19:19 +000026 store float %result, float addrspace(1)* %out, align 4
27 ret void
28}
29
Marek Olsakfa6607d2015-02-11 14:26:46 +000030; GCN-LABEL: {{^}}test_div_fmas_f64:
31; GCN: v_div_fmas_f64
Matt Arsenault75c658e2014-10-21 22:20:55 +000032define void @test_div_fmas_f64(double addrspace(1)* %out, double %a, double %b, double %c, i1 %d) nounwind {
33 %result = call double @llvm.AMDGPU.div.fmas.f64(double %a, double %b, double %c, i1 %d) nounwind readnone
Matt Arsenaulta0050b02014-06-19 01:19:19 +000034 store double %result, double addrspace(1)* %out, align 8
35 ret void
36}
Matt Arsenault1bc9d952015-02-14 04:22:00 +000037
38; GCN-LABEL: {{^}}test_div_fmas_f32_cond_to_vcc:
39; SI: v_cmp_eq_i32_e64 vcc, s{{[0-9]+}}, 0
40; SI: v_div_fmas_f32 {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}
41define void @test_div_fmas_f32_cond_to_vcc(float addrspace(1)* %out, float %a, float %b, float %c, i32 %i) nounwind {
42 %cmp = icmp eq i32 %i, 0
43 %result = call float @llvm.AMDGPU.div.fmas.f32(float %a, float %b, float %c, i1 %cmp) nounwind readnone
44 store float %result, float addrspace(1)* %out, align 4
45 ret void
46}
47
48; GCN-LABEL: {{^}}test_div_fmas_f32_imm_false_cond_to_vcc:
49; SI: s_mov_b64 vcc, 0
50; SI: v_div_fmas_f32 {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}
51define void @test_div_fmas_f32_imm_false_cond_to_vcc(float addrspace(1)* %out, float %a, float %b, float %c) nounwind {
52 %result = call float @llvm.AMDGPU.div.fmas.f32(float %a, float %b, float %c, i1 false) nounwind readnone
53 store float %result, float addrspace(1)* %out, align 4
54 ret void
55}
56
57; GCN-LABEL: {{^}}test_div_fmas_f32_imm_true_cond_to_vcc:
58; SI: s_mov_b64 vcc, -1
59; SI: v_div_fmas_f32 {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}
60define void @test_div_fmas_f32_imm_true_cond_to_vcc(float addrspace(1)* %out, float %a, float %b, float %c) nounwind {
61 %result = call float @llvm.AMDGPU.div.fmas.f32(float %a, float %b, float %c, i1 true) nounwind readnone
62 store float %result, float addrspace(1)* %out, align 4
63 ret void
64}
65
66; GCN-LABEL: {{^}}test_div_fmas_f32_logical_cond_to_vcc:
67; SI-DAG: buffer_load_dword [[A:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
68; SI-DAG: buffer_load_dword [[B:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4{{$}}
69; SI-DAG: buffer_load_dword [[C:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:8{{$}}
70
71; SI-DAG: v_cmp_eq_i32_e64 [[CMP0:s\[[0-9]+:[0-9]+\]]], v{{[0-9]+}}, 0
72; SI-DAG: v_cmp_ne_i32_e64 [[CMP1:s\[[0-9]+:[0-9]+\]]], s{{[0-9]+}}, 0
73; SI: s_and_b64 vcc, [[CMP0]], [[CMP1]]
74; SI: v_div_fmas_f32 {{v[0-9]+}}, [[B]], [[A]], [[C]]
75; SI: s_endpgm
76define void @test_div_fmas_f32_logical_cond_to_vcc(float addrspace(1)* %out, float addrspace(1)* %in, i32 %d) nounwind {
77 %tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone
78 %gep.a = getelementptr float addrspace(1)* %in, i32 %tid
79 %gep.b = getelementptr float addrspace(1)* %gep.a, i32 1
80 %gep.c = getelementptr float addrspace(1)* %gep.a, i32 2
81 %gep.out = getelementptr float addrspace(1)* %out, i32 2
82
83 %a = load float addrspace(1)* %gep.a
84 %b = load float addrspace(1)* %gep.b
85 %c = load float addrspace(1)* %gep.c
86
87 %cmp0 = icmp eq i32 %tid, 0
88 %cmp1 = icmp ne i32 %d, 0
89 %and = and i1 %cmp0, %cmp1
90
91 %result = call float @llvm.AMDGPU.div.fmas.f32(float %a, float %b, float %c, i1 %and) nounwind readnone
92 store float %result, float addrspace(1)* %gep.out, align 4
93 ret void
94}
95
96; GCN-LABEL: {{^}}test_div_fmas_f32_i1_phi_vcc:
97; SI: v_cmp_eq_i32_e64 [[CMPTID:s\[[0-9]+:[0-9]+\]]], v{{[0-9]+}}, 0
98; SI: s_and_saveexec_b64 [[CMPTID]], [[CMPTID]]
99; SI: s_xor_b64 [[CMPTID]], exec, [[CMPTID]]
100
101; SI: buffer_load_dword [[LOAD:v[0-9]+]]
102; SI: v_cmp_ne_i32_e64 [[CMPLOAD:s\[[0-9]+:[0-9]+\]]], [[LOAD]], 0
103; SI: v_cndmask_b32_e64 {{v[0-9]+}}, 0, -1, [[CMPLOAD]]
104
105
106; SI: BB6_2:
107; SI: s_or_b64 exec, exec, [[CMPTID]]
108; SI: v_cmp_ne_i32_e32 vcc, 0, v0
109; SI: v_div_fmas_f32 {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}
110; SI: buffer_store_dword
111; SI: s_endpgm
112define void @test_div_fmas_f32_i1_phi_vcc(float addrspace(1)* %out, float addrspace(1)* %in, i32 addrspace(1)* %dummy) nounwind {
113entry:
114 %tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone
115 %gep.out = getelementptr float addrspace(1)* %out, i32 2
116 %gep.a = getelementptr float addrspace(1)* %in, i32 %tid
117 %gep.b = getelementptr float addrspace(1)* %gep.a, i32 1
118 %gep.c = getelementptr float addrspace(1)* %gep.a, i32 2
119
120 %a = load float addrspace(1)* %gep.a
121 %b = load float addrspace(1)* %gep.b
122 %c = load float addrspace(1)* %gep.c
123
124 %cmp0 = icmp eq i32 %tid, 0
125 br i1 %cmp0, label %bb, label %exit
126
127bb:
128 %val = load i32 addrspace(1)* %dummy
129 %cmp1 = icmp ne i32 %val, 0
130 br label %exit
131
132exit:
133 %cond = phi i1 [false, %entry], [%cmp1, %bb]
134 %result = call float @llvm.AMDGPU.div.fmas.f32(float %a, float %b, float %c, i1 %cond) nounwind readnone
135 store float %result, float addrspace(1)* %gep.out, align 4
136 ret void
137}