blob: ee199518ad78c8f7cc1ea6c259ea5b22d3c15c65 [file] [log] [blame]
Matt Arsenaulta6867fd2017-01-23 22:31:03 +00001; RUN: llc -march=amdgcn -mattr=-fp64-fp16-denormals -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SI -check-prefix=SI-FLUSH %s
2; RUN: llc -march=amdgcn -mcpu=fiji -mattr=-fp64-fp16-denormals -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI -check-prefix=VI-FLUSH %s
3; RUN: llc -march=amdgcn -mattr=+fp64-fp16-denormals -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SI -check-prefix=SI-DENORM %s
4; RUN: llc -march=amdgcn -mcpu=fiji -mattr=+fp64-fp16-denormals -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI -check-prefix=VI-DENORM %s
Konstantin Zhuravlyovf86e4b72016-11-13 07:01:11 +00005
6declare half @llvm.fmuladd.f16(half %a, half %b, half %c)
7declare <2 x half> @llvm.fmuladd.v2f16(<2 x half> %a, <2 x half> %b, <2 x half> %c)
8
9; GCN-LABEL: {{^}}fmuladd_f16
10; GCN: buffer_load_ushort v[[A_F16:[0-9]+]]
11; GCN: buffer_load_ushort v[[B_F16:[0-9]+]]
12; GCN: buffer_load_ushort v[[C_F16:[0-9]+]]
13; SI: v_cvt_f32_f16_e32 v[[A_F32:[0-9]+]], v[[A_F16]]
14; SI: v_cvt_f32_f16_e32 v[[B_F32:[0-9]+]], v[[B_F16]]
15; SI: v_cvt_f32_f16_e32 v[[C_F32:[0-9]+]], v[[C_F16]]
16; SI: v_mac_f32_e32 v[[C_F32]], v[[B_F32]], v[[A_F32]]
17; SI: v_cvt_f16_f32_e32 v[[R_F16:[0-9]+]], v[[C_F32]]
18; SI: buffer_store_short v[[R_F16]]
Matt Arsenault9e22bc22016-12-22 03:21:48 +000019
20; VI-FLUSH: v_mac_f16_e32 v[[C_F16]], v[[B_F16]], v[[A_F16]]
21; VI-FLUSH: buffer_store_short v[[C_F16]]
22
23; VI-DENORM: v_fma_f16 [[RESULT:v[0-9]+]], v[[A_F16]], v[[B_F16]], v[[C_F16]]
24; VI-DENORM: buffer_store_short [[RESULT]]
25
Konstantin Zhuravlyovf86e4b72016-11-13 07:01:11 +000026; GCN: s_endpgm
27define void @fmuladd_f16(
28 half addrspace(1)* %r,
29 half addrspace(1)* %a,
30 half addrspace(1)* %b,
31 half addrspace(1)* %c) {
32 %a.val = load half, half addrspace(1)* %a
33 %b.val = load half, half addrspace(1)* %b
34 %c.val = load half, half addrspace(1)* %c
35 %r.val = call half @llvm.fmuladd.f16(half %a.val, half %b.val, half %c.val)
36 store half %r.val, half addrspace(1)* %r
37 ret void
38}
39
40; GCN-LABEL: {{^}}fmuladd_f16_imm_a
41; GCN: buffer_load_ushort v[[B_F16:[0-9]+]]
42; GCN: buffer_load_ushort v[[C_F16:[0-9]+]]
43; SI: v_cvt_f32_f16_e32 v[[A_F32:[0-9]+]], 0x4200{{$}}
44; SI: v_cvt_f32_f16_e32 v[[B_F32:[0-9]+]], v[[B_F16]]
45; SI: v_cvt_f32_f16_e32 v[[C_F32:[0-9]+]], v[[C_F16]]
46; SI: v_mac_f32_e32 v[[C_F32]], v[[A_F32]], v[[B_F32]]
47; SI: v_cvt_f16_f32_e32 v[[R_F16:[0-9]+]], v[[C_F32]]
48; SI: buffer_store_short v[[R_F16]]
Matt Arsenault9e22bc22016-12-22 03:21:48 +000049
50; VI-FLUSH: v_mac_f16_e32 v[[C_F16]], 0x4200, v[[B_F16]]
51; VI-FLUSH: buffer_store_short v[[C_F16]]
52
53; VI-DENORM: v_mov_b32_e32 [[KA:v[0-9]+]], 0x4200
54; VI-DENORM: v_fma_f16 [[RESULT:v[0-9]+]], [[KA]], v[[B_F16]], v[[C_F16]]
55; VI-DENORM: buffer_store_short [[RESULT]]
56
Konstantin Zhuravlyovf86e4b72016-11-13 07:01:11 +000057; GCN: s_endpgm
58define void @fmuladd_f16_imm_a(
59 half addrspace(1)* %r,
60 half addrspace(1)* %b,
61 half addrspace(1)* %c) {
62 %b.val = load half, half addrspace(1)* %b
63 %c.val = load half, half addrspace(1)* %c
64 %r.val = call half @llvm.fmuladd.f16(half 3.0, half %b.val, half %c.val)
65 store half %r.val, half addrspace(1)* %r
66 ret void
67}
68
69; GCN-LABEL: {{^}}fmuladd_f16_imm_b
70; GCN: buffer_load_ushort v[[A_F16:[0-9]+]]
71; GCN: buffer_load_ushort v[[C_F16:[0-9]+]]
72; SI: v_cvt_f32_f16_e32 v[[B_F32:[0-9]+]], 0x4200{{$}}
73; SI: v_cvt_f32_f16_e32 v[[A_F32:[0-9]+]], v[[A_F16]]
74; SI: v_cvt_f32_f16_e32 v[[C_F32:[0-9]+]], v[[C_F16]]
75; SI: v_mac_f32_e32 v[[C_F32]], v[[B_F32]], v[[A_F32]]
76; SI: v_cvt_f16_f32_e32 v[[R_F16:[0-9]+]], v[[C_F32]]
77; SI: buffer_store_short v[[R_F16]]
Matt Arsenault9e22bc22016-12-22 03:21:48 +000078
79; VI-FLUSH: v_mac_f16_e32 v[[C_F16]], 0x4200, v[[A_F16]]
80; VI-FLUSH: buffer_store_short v[[C_F16]]
81
82; VI-DENORM: v_mov_b32_e32 [[KA:v[0-9]+]], 0x4200
83; VI-DENORM: v_fma_f16 [[RESULT:v[0-9]+]], [[KA]], v[[A_F16]], v[[C_F16]]
84; VI-DENORM buffer_store_short [[RESULT]]
85
86
Konstantin Zhuravlyovf86e4b72016-11-13 07:01:11 +000087; GCN: s_endpgm
88define void @fmuladd_f16_imm_b(
89 half addrspace(1)* %r,
90 half addrspace(1)* %a,
91 half addrspace(1)* %c) {
92 %a.val = load half, half addrspace(1)* %a
93 %c.val = load half, half addrspace(1)* %c
94 %r.val = call half @llvm.fmuladd.f16(half %a.val, half 3.0, half %c.val)
95 store half %r.val, half addrspace(1)* %r
96 ret void
97}
98
99; GCN-LABEL: {{^}}fmuladd_v2f16
100; GCN: buffer_load_dword v[[A_V2_F16:[0-9]+]]
101; GCN: buffer_load_dword v[[B_V2_F16:[0-9]+]]
102; GCN: buffer_load_dword v[[C_V2_F16:[0-9]+]]
103; GCN: v_lshrrev_b32_e32 v[[A_F16_1:[0-9]+]], 16, v[[A_V2_F16]]
104; GCN: v_lshrrev_b32_e32 v[[B_F16_1:[0-9]+]], 16, v[[B_V2_F16]]
105; GCN: v_lshrrev_b32_e32 v[[C_F16_1:[0-9]+]], 16, v[[C_V2_F16]]
106; SI: v_cvt_f32_f16_e32 v[[A_F32_0:[0-9]+]], v[[A_V2_F16]]
107; SI: v_cvt_f32_f16_e32 v[[B_F32_0:[0-9]+]], v[[B_V2_F16]]
108; SI: v_cvt_f32_f16_e32 v[[C_F32_0:[0-9]+]], v[[C_V2_F16]]
109; SI: v_cvt_f32_f16_e32 v[[A_F32_1:[0-9]+]], v[[A_F16_1]]
110; SI: v_cvt_f32_f16_e32 v[[B_F32_1:[0-9]+]], v[[B_F16_1]]
111; SI: v_cvt_f32_f16_e32 v[[C_F32_1:[0-9]+]], v[[C_F16_1]]
112; SI: v_mac_f32_e32 v[[C_F32_0]], v[[B_F32_0]], v[[A_F32_0]]
113; SI: v_cvt_f16_f32_e32 v[[R_F16_0:[0-9]+]], v[[C_F32_0]]
114; SI: v_mac_f32_e32 v[[C_F32_1]], v[[B_F32_1]], v[[A_F32_1]]
115; SI: v_cvt_f16_f32_e32 v[[R_F16_1:[0-9]+]], v[[C_F32_1]]
116; SI: v_and_b32_e32 v[[R_F16_LO:[0-9]+]], 0xffff, v[[R_F16_0]]
117; SI: v_lshlrev_b32_e32 v[[R_F16_HI:[0-9]+]], 16, v[[R_F16_1]]
Matt Arsenault9e22bc22016-12-22 03:21:48 +0000118
119
120; FIXME: and should be unnecessary
121; VI-FLUSH: v_mac_f16_e32 v[[C_V2_F16]], v[[B_V2_F16]], v[[A_V2_F16]]
122; VI-FLUSH: v_mac_f16_e32 v[[C_F16_1]], v[[B_F16_1]], v[[A_F16_1]]
123; VI-FLUSH: v_and_b32_e32 v[[R_F16_LO:[0-9]+]], 0xffff, v[[C_V2_F16]]
124; VI-FLUSH: v_lshlrev_b32_e32 v[[R_F16_HI:[0-9]+]], 16, v[[C_F16_1]]
125
126; VI-DENORM-DAG: v_fma_f16 v[[RES0:[0-9]+]], v[[A_V2_F16]], v[[B_V2_F16]], v[[C_V2_F16]]
127; VI-DENORM-DAG: v_fma_f16 v[[RES1:[0-9]+]], v[[A_F16_1]], v[[B_F16_1]], v[[C_F16_1]]
128; VI-DENORM: v_and_b32_e32 v[[R_F16_LO:[0-9]+]], 0xffff, v[[RES0]]
129; VI-DENORM: v_lshlrev_b32_e32 v[[R_F16_HI:[0-9]+]], 16, v[[RES1]]
130
Konstantin Zhuravlyovf86e4b72016-11-13 07:01:11 +0000131; GCN: v_or_b32_e32 v[[R_V2_F16:[0-9]+]], v[[R_F16_HI]], v[[R_F16_LO]]
132; GCN: buffer_store_dword v[[R_V2_F16]]
133; GCN: s_endpgm
134define void @fmuladd_v2f16(
135 <2 x half> addrspace(1)* %r,
136 <2 x half> addrspace(1)* %a,
137 <2 x half> addrspace(1)* %b,
138 <2 x half> addrspace(1)* %c) {
139 %a.val = load <2 x half>, <2 x half> addrspace(1)* %a
140 %b.val = load <2 x half>, <2 x half> addrspace(1)* %b
141 %c.val = load <2 x half>, <2 x half> addrspace(1)* %c
142 %r.val = call <2 x half> @llvm.fmuladd.v2f16(<2 x half> %a.val, <2 x half> %b.val, <2 x half> %c.val)
143 store <2 x half> %r.val, <2 x half> addrspace(1)* %r
144 ret void
145}