blob: f30fd1d5820436f8d64a8b4d49d42433778d54ab [file] [log] [blame]
Matt Arsenaulta6867fd2017-01-23 22:31:03 +00001; RUN: llc -march=amdgcn -mattr=-fp64-fp16-denormals -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SI -check-prefix=SI-FLUSH %s
Matt Arsenault7aad8fd2017-01-24 22:02:15 +00002; RUN: llc -march=amdgcn -mcpu=fiji -mattr=-fp64-fp16-denormals,-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI -check-prefix=VI-FLUSH %s
Matt Arsenaulta6867fd2017-01-23 22:31:03 +00003; RUN: llc -march=amdgcn -mattr=+fp64-fp16-denormals -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SI -check-prefix=SI-DENORM %s
Matt Arsenault7aad8fd2017-01-24 22:02:15 +00004; RUN: llc -march=amdgcn -mcpu=fiji -mattr=+fp64-fp16-denormals,-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI -check-prefix=VI-DENORM %s
Konstantin Zhuravlyovf86e4b72016-11-13 07:01:11 +00005
6declare half @llvm.fmuladd.f16(half %a, half %b, half %c)
7declare <2 x half> @llvm.fmuladd.v2f16(<2 x half> %a, <2 x half> %b, <2 x half> %c)
8
9; GCN-LABEL: {{^}}fmuladd_f16
10; GCN: buffer_load_ushort v[[A_F16:[0-9]+]]
11; GCN: buffer_load_ushort v[[B_F16:[0-9]+]]
12; GCN: buffer_load_ushort v[[C_F16:[0-9]+]]
13; SI: v_cvt_f32_f16_e32 v[[A_F32:[0-9]+]], v[[A_F16]]
14; SI: v_cvt_f32_f16_e32 v[[B_F32:[0-9]+]], v[[B_F16]]
15; SI: v_cvt_f32_f16_e32 v[[C_F32:[0-9]+]], v[[C_F16]]
16; SI: v_mac_f32_e32 v[[C_F32]], v[[B_F32]], v[[A_F32]]
17; SI: v_cvt_f16_f32_e32 v[[R_F16:[0-9]+]], v[[C_F32]]
18; SI: buffer_store_short v[[R_F16]]
Matt Arsenault9e22bc22016-12-22 03:21:48 +000019
20; VI-FLUSH: v_mac_f16_e32 v[[C_F16]], v[[B_F16]], v[[A_F16]]
21; VI-FLUSH: buffer_store_short v[[C_F16]]
22
23; VI-DENORM: v_fma_f16 [[RESULT:v[0-9]+]], v[[A_F16]], v[[B_F16]], v[[C_F16]]
24; VI-DENORM: buffer_store_short [[RESULT]]
25
Konstantin Zhuravlyovf86e4b72016-11-13 07:01:11 +000026; GCN: s_endpgm
Matt Arsenault3dbeefa2017-03-21 21:39:51 +000027define amdgpu_kernel void @fmuladd_f16(
Konstantin Zhuravlyovf86e4b72016-11-13 07:01:11 +000028 half addrspace(1)* %r,
29 half addrspace(1)* %a,
30 half addrspace(1)* %b,
31 half addrspace(1)* %c) {
32 %a.val = load half, half addrspace(1)* %a
33 %b.val = load half, half addrspace(1)* %b
34 %c.val = load half, half addrspace(1)* %c
35 %r.val = call half @llvm.fmuladd.f16(half %a.val, half %b.val, half %c.val)
36 store half %r.val, half addrspace(1)* %r
37 ret void
38}
39
40; GCN-LABEL: {{^}}fmuladd_f16_imm_a
41; GCN: buffer_load_ushort v[[B_F16:[0-9]+]]
42; GCN: buffer_load_ushort v[[C_F16:[0-9]+]]
Konstantin Zhuravlyovf86e4b72016-11-13 07:01:11 +000043; SI: v_cvt_f32_f16_e32 v[[B_F32:[0-9]+]], v[[B_F16]]
44; SI: v_cvt_f32_f16_e32 v[[C_F32:[0-9]+]], v[[C_F16]]
Matt Arsenault0c687392017-01-30 16:57:41 +000045; SI: v_mac_f32_e32 v[[C_F32]], 0x40400000, v[[B_F32]]
Konstantin Zhuravlyovf86e4b72016-11-13 07:01:11 +000046; SI: v_cvt_f16_f32_e32 v[[R_F16:[0-9]+]], v[[C_F32]]
47; SI: buffer_store_short v[[R_F16]]
Matt Arsenault9e22bc22016-12-22 03:21:48 +000048
49; VI-FLUSH: v_mac_f16_e32 v[[C_F16]], 0x4200, v[[B_F16]]
50; VI-FLUSH: buffer_store_short v[[C_F16]]
51
52; VI-DENORM: v_mov_b32_e32 [[KA:v[0-9]+]], 0x4200
53; VI-DENORM: v_fma_f16 [[RESULT:v[0-9]+]], [[KA]], v[[B_F16]], v[[C_F16]]
54; VI-DENORM: buffer_store_short [[RESULT]]
55
Konstantin Zhuravlyovf86e4b72016-11-13 07:01:11 +000056; GCN: s_endpgm
Matt Arsenault3dbeefa2017-03-21 21:39:51 +000057define amdgpu_kernel void @fmuladd_f16_imm_a(
Konstantin Zhuravlyovf86e4b72016-11-13 07:01:11 +000058 half addrspace(1)* %r,
59 half addrspace(1)* %b,
60 half addrspace(1)* %c) {
61 %b.val = load half, half addrspace(1)* %b
62 %c.val = load half, half addrspace(1)* %c
63 %r.val = call half @llvm.fmuladd.f16(half 3.0, half %b.val, half %c.val)
64 store half %r.val, half addrspace(1)* %r
65 ret void
66}
67
68; GCN-LABEL: {{^}}fmuladd_f16_imm_b
69; GCN: buffer_load_ushort v[[A_F16:[0-9]+]]
70; GCN: buffer_load_ushort v[[C_F16:[0-9]+]]
Konstantin Zhuravlyovf86e4b72016-11-13 07:01:11 +000071; SI: v_cvt_f32_f16_e32 v[[A_F32:[0-9]+]], v[[A_F16]]
72; SI: v_cvt_f32_f16_e32 v[[C_F32:[0-9]+]], v[[C_F16]]
Matt Arsenault0c687392017-01-30 16:57:41 +000073; SI: v_mac_f32_e32 v[[C_F32]], 0x40400000, v[[B_F32]]
Konstantin Zhuravlyovf86e4b72016-11-13 07:01:11 +000074; SI: v_cvt_f16_f32_e32 v[[R_F16:[0-9]+]], v[[C_F32]]
75; SI: buffer_store_short v[[R_F16]]
Matt Arsenault9e22bc22016-12-22 03:21:48 +000076
77; VI-FLUSH: v_mac_f16_e32 v[[C_F16]], 0x4200, v[[A_F16]]
78; VI-FLUSH: buffer_store_short v[[C_F16]]
79
80; VI-DENORM: v_mov_b32_e32 [[KA:v[0-9]+]], 0x4200
81; VI-DENORM: v_fma_f16 [[RESULT:v[0-9]+]], [[KA]], v[[A_F16]], v[[C_F16]]
82; VI-DENORM buffer_store_short [[RESULT]]
83
84
Konstantin Zhuravlyovf86e4b72016-11-13 07:01:11 +000085; GCN: s_endpgm
Matt Arsenault3dbeefa2017-03-21 21:39:51 +000086define amdgpu_kernel void @fmuladd_f16_imm_b(
Konstantin Zhuravlyovf86e4b72016-11-13 07:01:11 +000087 half addrspace(1)* %r,
88 half addrspace(1)* %a,
89 half addrspace(1)* %c) {
90 %a.val = load half, half addrspace(1)* %a
91 %c.val = load half, half addrspace(1)* %c
92 %r.val = call half @llvm.fmuladd.f16(half %a.val, half 3.0, half %c.val)
93 store half %r.val, half addrspace(1)* %r
94 ret void
95}
96
97; GCN-LABEL: {{^}}fmuladd_v2f16
98; GCN: buffer_load_dword v[[A_V2_F16:[0-9]+]]
99; GCN: buffer_load_dword v[[B_V2_F16:[0-9]+]]
100; GCN: buffer_load_dword v[[C_V2_F16:[0-9]+]]
Matt Arsenault86e02ce2017-03-15 19:04:26 +0000101
Konstantin Zhuravlyovf86e4b72016-11-13 07:01:11 +0000102; SI: v_cvt_f32_f16_e32 v[[A_F32_0:[0-9]+]], v[[A_V2_F16]]
Matt Arsenault86e02ce2017-03-15 19:04:26 +0000103; SI: v_lshrrev_b32_e32 v[[A_F16_1:[0-9]+]], 16, v[[A_V2_F16]]
104
Konstantin Zhuravlyovf86e4b72016-11-13 07:01:11 +0000105; SI: v_cvt_f32_f16_e32 v[[B_F32_0:[0-9]+]], v[[B_V2_F16]]
Matt Arsenault86e02ce2017-03-15 19:04:26 +0000106; SI-DAG: v_cvt_f32_f16_e32 v[[C_F32_0:[0-9]+]], v[[C_V2_F16]]
107; SI-DAG: v_lshrrev_b32_e32 v[[B_F16_1:[0-9]+]], 16, v[[B_V2_F16]]
108; SI-DAG: v_lshrrev_b32_e32 v[[C_F16_1:[0-9]+]], 16, v[[C_V2_F16]]
109
Konstantin Zhuravlyovf86e4b72016-11-13 07:01:11 +0000110; SI: v_cvt_f32_f16_e32 v[[A_F32_1:[0-9]+]], v[[A_F16_1]]
111; SI: v_cvt_f32_f16_e32 v[[B_F32_1:[0-9]+]], v[[B_F16_1]]
112; SI: v_cvt_f32_f16_e32 v[[C_F32_1:[0-9]+]], v[[C_F16_1]]
113; SI: v_mac_f32_e32 v[[C_F32_0]], v[[B_F32_0]], v[[A_F32_0]]
Konstantin Zhuravlyovf86e4b72016-11-13 07:01:11 +0000114; SI: v_mac_f32_e32 v[[C_F32_1]], v[[B_F32_1]], v[[A_F32_1]]
115; SI: v_cvt_f16_f32_e32 v[[R_F16_1:[0-9]+]], v[[C_F32_1]]
Matt Arsenault86e02ce2017-03-15 19:04:26 +0000116; SI: v_cvt_f16_f32_e32 v[[R_F16_LO:[0-9]+]], v[[C_F32_0]]
Konstantin Zhuravlyovf86e4b72016-11-13 07:01:11 +0000117; SI: v_lshlrev_b32_e32 v[[R_F16_HI:[0-9]+]], 16, v[[R_F16_1]]
Sam Kolton9fa16962017-04-06 15:03:28 +0000118; SI: v_or_b32_e32 v[[R_V2_F16:[0-9]+]], v[[R_F16_HI]], v[[R_F16_LO]]
Matt Arsenault9e22bc22016-12-22 03:21:48 +0000119
Sam Kolton9fa16962017-04-06 15:03:28 +0000120; VI-FLUSH: v_lshrrev_b32_e32 v[[A_F16_1:[0-9]+]], 16, v[[A_V2_F16]]
121; VI-FLUSH-DAG: v_mac_f16_sdwa v[[A_F16_1]], v[[C_V2_F16]], v[[B_V2_F16]] dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
122; VI-FLUSH-DAG: v_mac_f16_e32 v[[A_V2_F16]], v[[C_V2_F16]], v[[B_V2_F16]]
123; VI-FLUSH-DAG: v_lshlrev_b32_e32 v[[R_F16_HI:[0-9]+]], 16, v[[A_F16_1]]
Matt Arsenault8edfaee2017-03-31 19:53:03 +0000124; VI-FLUSH-NOT: v_and_b32
Sam Kolton9fa16962017-04-06 15:03:28 +0000125; VI-FLUSH: v_or_b32_e32 v[[R_V2_F16:[0-9]+]], v[[R_F16_HI]], v[[A_V2_F16]]
Matt Arsenault9e22bc22016-12-22 03:21:48 +0000126
Sam Kolton9fa16962017-04-06 15:03:28 +0000127; VI-DENORM: v_lshrrev_b32_e32 v[[A_F16_1:[0-9]+]], 16, v[[A_V2_F16]]
128; VI-DENORM: v_lshrrev_b32_e32 v[[B_F16_1:[0-9]+]], 16, v[[B_V2_F16]]
129; VI-DENORM: v_lshrrev_b32_e32 v[[C_F16_1:[0-9]+]], 16, v[[C_V2_F16]]
Matt Arsenault9e22bc22016-12-22 03:21:48 +0000130; VI-DENORM-DAG: v_fma_f16 v[[RES0:[0-9]+]], v[[A_V2_F16]], v[[B_V2_F16]], v[[C_V2_F16]]
131; VI-DENORM-DAG: v_fma_f16 v[[RES1:[0-9]+]], v[[A_F16_1]], v[[B_F16_1]], v[[C_F16_1]]
Matt Arsenault8edfaee2017-03-31 19:53:03 +0000132; VI-DENORM-DAG: v_lshlrev_b32_e32 v[[R_F16_HI:[0-9]+]], 16, v[[RES1]]
133; VI-DENORM-NOT: v_and_b32
134; VI-DENORM: v_or_b32_e32 v[[R_V2_F16:[0-9]+]], v[[R_F16_HI]], v[[RES0]]
Matt Arsenault9e22bc22016-12-22 03:21:48 +0000135
Konstantin Zhuravlyovf86e4b72016-11-13 07:01:11 +0000136; GCN: buffer_store_dword v[[R_V2_F16]]
137; GCN: s_endpgm
Sam Kolton9fa16962017-04-06 15:03:28 +0000138
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000139define amdgpu_kernel void @fmuladd_v2f16(
Konstantin Zhuravlyovf86e4b72016-11-13 07:01:11 +0000140 <2 x half> addrspace(1)* %r,
141 <2 x half> addrspace(1)* %a,
142 <2 x half> addrspace(1)* %b,
143 <2 x half> addrspace(1)* %c) {
144 %a.val = load <2 x half>, <2 x half> addrspace(1)* %a
145 %b.val = load <2 x half>, <2 x half> addrspace(1)* %b
146 %c.val = load <2 x half>, <2 x half> addrspace(1)* %c
147 %r.val = call <2 x half> @llvm.fmuladd.v2f16(<2 x half> %a.val, <2 x half> %b.val, <2 x half> %c.val)
148 store <2 x half> %r.val, <2 x half> addrspace(1)* %r
149 ret void
150}