blob: fe209e34dd16c6c701136dd30c06ab376e782841 [file] [log] [blame]
Matt Arsenault770ec862016-12-22 03:55:35 +00001; RUN: llc -march=amdgcn -mcpu=tahiti -fp-contract=on -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GCN-STRICTSI %s
2; RUN: llc -march=amdgcn -mcpu=verde -fp-contract=on -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GCN-STRICT,SI %s
3; RUN: llc -march=amdgcn -mcpu=tahiti -fp-contract=fast -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GCN-CONTRACT,SI %s
4; RUN: llc -march=amdgcn -mcpu=verde -fp-contract=fast -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GCN-CONTRACT,SI %s
5; RUN: llc -march=amdgcn -mcpu=tonga -fp-contract=on -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GCN-STRICT,VI %s
6; RUN: llc -march=amdgcn -mcpu=tonga -fp-contract=fast -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GCN-CONTRACT,VI %s
7
8; GCN-LABEL: {{^}}fmuladd_f64:
9; GCN: v_fma_f64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\]}}
10define void @fmuladd_f64(double addrspace(1)* %out, double addrspace(1)* %in1,
11 double addrspace(1)* %in2, double addrspace(1)* %in3) #0 {
12 %r0 = load double, double addrspace(1)* %in1
13 %r1 = load double, double addrspace(1)* %in2
14 %r2 = load double, double addrspace(1)* %in3
15 %r3 = tail call double @llvm.fmuladd.f64(double %r0, double %r1, double %r2)
16 store double %r3, double addrspace(1)* %out
17 ret void
18}
19
20; GCN-LABEL: {{^}}fmul_fadd_f64:
21; GCN-CONTRACT: v_fma_f64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\]}}
22
23; GCN-STRICT: v_mul_f64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\]}}
24; GCN-STRICT: v_add_f64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\]}}
25define void @fmul_fadd_f64(double addrspace(1)* %out, double addrspace(1)* %in1,
26 double addrspace(1)* %in2, double addrspace(1)* %in3) #0 {
27 %r0 = load double, double addrspace(1)* %in1
28 %r1 = load double, double addrspace(1)* %in2
29 %r2 = load double, double addrspace(1)* %in3
30 %tmp = fmul double %r0, %r1
31 %r3 = fadd double %tmp, %r2
32 store double %r3, double addrspace(1)* %out
33 ret void
34}
35
36; GCN-LABEL: {{^}}fadd_a_a_b_f64:
37; GCN: {{buffer|flat}}_load_dwordx2 [[R1:v\[[0-9]+:[0-9]+\]]],
38; GCN: {{buffer|flat}}_load_dwordx2 [[R2:v\[[0-9]+:[0-9]+\]]],
39
40; GCN-STRICT: v_add_f64 [[TMP:v\[[0-9]+:[0-9]+\]]], [[R1]], [[R1]]
41; GCN-STRICT: v_add_f64 [[RESULT:v\[[0-9]+:[0-9]+\]]], [[TMP]], [[R2]]
42
43; GCN-CONTRACT: v_fma_f64 [[RESULT:v\[[0-9]+:[0-9]+\]]], [[R1]], 2.0, [[R2]]
44
45; SI: buffer_store_dwordx2 [[RESULT]]
46; VI: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
47define void @fadd_a_a_b_f64(double addrspace(1)* %out,
48 double addrspace(1)* %in1,
49 double addrspace(1)* %in2) #0 {
50 %tid = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
51 %gep.0 = getelementptr double, double addrspace(1)* %out, i32 %tid
52 %gep.1 = getelementptr double, double addrspace(1)* %gep.0, i32 1
53 %gep.out = getelementptr double, double addrspace(1)* %out, i32 %tid
54
55 %r0 = load volatile double, double addrspace(1)* %gep.0
56 %r1 = load volatile double, double addrspace(1)* %gep.1
57
58 %add.0 = fadd double %r0, %r0
59 %add.1 = fadd double %add.0, %r1
60 store double %add.1, double addrspace(1)* %gep.out
61 ret void
62}
63
64; GCN-LABEL: {{^}}fadd_b_a_a_f64:
65; GCN: {{buffer|flat}}_load_dwordx2 [[R1:v\[[0-9]+:[0-9]+\]]],
66; GCN: {{buffer|flat}}_load_dwordx2 [[R2:v\[[0-9]+:[0-9]+\]]],
67
68; GCN-STRICT: v_add_f64 [[TMP:v\[[0-9]+:[0-9]+\]]], [[R1]], [[R1]]
69; GCN-STRICT: v_add_f64 [[RESULT:v\[[0-9]+:[0-9]+\]]], [[R2]], [[TMP]]
70
71; GCN-CONTRACT: v_fma_f64 [[RESULT:v\[[0-9]+:[0-9]+\]]], [[R1]], 2.0, [[R2]]
72
73; SI: buffer_store_dwordx2 [[RESULT]]
74; VI: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
75define void @fadd_b_a_a_f64(double addrspace(1)* %out,
76 double addrspace(1)* %in1,
77 double addrspace(1)* %in2) #0 {
78 %tid = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
79 %gep.0 = getelementptr double, double addrspace(1)* %out, i32 %tid
80 %gep.1 = getelementptr double, double addrspace(1)* %gep.0, i32 1
81 %gep.out = getelementptr double, double addrspace(1)* %out, i32 %tid
82
83 %r0 = load volatile double, double addrspace(1)* %gep.0
84 %r1 = load volatile double, double addrspace(1)* %gep.1
85
86 %add.0 = fadd double %r0, %r0
87 %add.1 = fadd double %r1, %add.0
88 store double %add.1, double addrspace(1)* %gep.out
89 ret void
90}
91
92; GCN-LABEL: {{^}}mad_sub_f64:
93; GCN-STRICT: v_mul_f64 v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}}
94; GCN-STRICT: v_add_f64 v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}}, -v{{\[[0-9]+:[0-9]+\]}}
95
96; GCN-CONTRACT: v_fma_f64 v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}}, -v{{\[[0-9]+:[0-9]+\]}}
97define void @mad_sub_f64(double addrspace(1)* noalias nocapture %out, double addrspace(1)* noalias nocapture readonly %ptr) #1 {
98 %tid = tail call i32 @llvm.amdgcn.workitem.id.x() #0
99 %tid.ext = sext i32 %tid to i64
100 %gep0 = getelementptr double, double addrspace(1)* %ptr, i64 %tid.ext
101 %add1 = add i64 %tid.ext, 1
102 %gep1 = getelementptr double, double addrspace(1)* %ptr, i64 %add1
103 %add2 = add i64 %tid.ext, 2
104 %gep2 = getelementptr double, double addrspace(1)* %ptr, i64 %add2
105 %outgep = getelementptr double, double addrspace(1)* %out, i64 %tid.ext
106 %a = load volatile double, double addrspace(1)* %gep0, align 8
107 %b = load volatile double, double addrspace(1)* %gep1, align 8
108 %c = load volatile double, double addrspace(1)* %gep2, align 8
109 %mul = fmul double %a, %b
110 %sub = fsub double %mul, %c
111 store double %sub, double addrspace(1)* %outgep, align 8
112 ret void
113}
114
115declare i32 @llvm.amdgcn.workitem.id.x() #1
116declare double @llvm.fmuladd.f64(double, double, double) #1
117
118attributes #0 = { nounwind }
119attributes #1 = { nounwind readnone }