blob: 0e60f273129e89645eabff645a221448cb5f6bf0 [file] [log] [blame]
Matt Arsenault8728c5f2017-08-07 14:58:04 +00001; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SI -check-prefix=SIVI %s
2; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=fiji -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SIVI -check-prefix=VI -check-prefix=GFX89 %s
3; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=gfx900 -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=GFX89 -check-prefix=GFX9 %s
Konstantin Zhuravlyovf86e4b72016-11-13 07:01:11 +00004
5declare half @llvm.rint.f16(half %a)
6declare <2 x half> @llvm.rint.v2f16(<2 x half> %a)
7
8; GCN-LABEL: {{^}}rint_f16
9; GCN: buffer_load_ushort v[[A_F16:[0-9]+]]
10; SI: v_cvt_f32_f16_e32 v[[A_F32:[0-9]+]], v[[A_F16]]
11; SI: v_rndne_f32_e32 v[[R_F32:[0-9]+]], v[[A_F32]]
12; SI: v_cvt_f16_f32_e32 v[[R_F16:[0-9]+]], v[[R_F32]]
Matt Arsenaultb5d23272017-03-24 20:04:18 +000013; GFX89: v_rndne_f16_e32 v[[R_F16:[0-9]+]], v[[A_F16]]
Konstantin Zhuravlyovf86e4b72016-11-13 07:01:11 +000014; GCN: buffer_store_short v[[R_F16]]
15; GCN: s_endpgm
Matt Arsenault3dbeefa2017-03-21 21:39:51 +000016define amdgpu_kernel void @rint_f16(
Konstantin Zhuravlyovf86e4b72016-11-13 07:01:11 +000017 half addrspace(1)* %r,
18 half addrspace(1)* %a) {
19entry:
20 %a.val = load half, half addrspace(1)* %a
21 %r.val = call half @llvm.rint.f16(half %a.val)
22 store half %r.val, half addrspace(1)* %r
23 ret void
24}
25
26; GCN-LABEL: {{^}}rint_v2f16
27; GCN: buffer_load_dword v[[A_V2_F16:[0-9]+]]
28; SI: v_cvt_f32_f16_e32 v[[A_F32_0:[0-9]+]], v[[A_V2_F16]]
Sam Kolton9fa16962017-04-06 15:03:28 +000029; SI: v_lshrrev_b32_e32 v[[A_F16_1:[0-9]+]], 16, v[[A_V2_F16]]
Konstantin Zhuravlyovf86e4b72016-11-13 07:01:11 +000030; SI: v_cvt_f32_f16_e32 v[[A_F32_1:[0-9]+]], v[[A_F16_1]]
31; SI: v_rndne_f32_e32 v[[R_F32_0:[0-9]+]], v[[A_F32_0]]
32; SI: v_cvt_f16_f32_e32 v[[R_F16_0:[0-9]+]], v[[R_F32_0]]
33; SI: v_rndne_f32_e32 v[[R_F32_1:[0-9]+]], v[[A_F32_1]]
34; SI: v_cvt_f16_f32_e32 v[[R_F16_1:[0-9]+]], v[[R_F32_1]]
Sam Kolton9fa16962017-04-06 15:03:28 +000035; SI-DAG: v_lshlrev_b32_e32 v[[R_F16_HI:[0-9]+]], 16, v[[R_F16_1]]
36; SI-NOT: v_and_b32
Matt Arsenault6c29c5a2017-07-10 19:53:57 +000037; SI: v_or_b32_e32 v[[R_V2_F16:[0-9]+]], v[[R_F16_0]], v[[R_F16_HI]]
Matt Arsenaultb5d23272017-03-24 20:04:18 +000038
Sam Kolton9fa16962017-04-06 15:03:28 +000039; VI-DAG: v_rndne_f16_e32 v[[R_F16_0:[0-9]+]], v[[A_V2_F16]]
40; VI-DAG: v_rndne_f16_sdwa v[[R_F16_1:[0-9]+]], v[[A_V2_F16]] dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1
Matt Arsenault8edfaee2017-03-31 19:53:03 +000041; VI-NOT: v_and_b32
Matt Arsenault6c29c5a2017-07-10 19:53:57 +000042; VI: v_or_b32_e32 v[[R_V2_F16:[0-9]+]], v[[R_F16_0]], v[[R_F16_1]]
Matt Arsenaultb5d23272017-03-24 20:04:18 +000043
44; GFX9: v_rndne_f16_e32 v[[R_F16_0:[0-9]+]], v[[A_V2_F16]]
Sam Kolton3c4933f2017-06-22 06:26:41 +000045; GFX9: v_rndne_f16_sdwa v[[R_F16_1:[0-9]+]], v[[A_V2_F16]] dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
Konstantin Zhuravlyovd24aeb22017-04-13 23:17:00 +000046; GFX9: v_and_b32_e32 v[[R_F16_LO:[0-9]+]], 0xffff, v[[R_F16_0]]
47; GFX9: v_lshl_or_b32 v[[R_V2_F16:[0-9]+]], v[[R_F16_1]], 16, v[[R_F16_LO]]
Matt Arsenaultb5d23272017-03-24 20:04:18 +000048
Konstantin Zhuravlyovf86e4b72016-11-13 07:01:11 +000049; GCN: buffer_store_dword v[[R_V2_F16]]
50; GCN: s_endpgm
Sam Kolton3c4933f2017-06-22 06:26:41 +000051
Matt Arsenault3dbeefa2017-03-21 21:39:51 +000052define amdgpu_kernel void @rint_v2f16(
Konstantin Zhuravlyovf86e4b72016-11-13 07:01:11 +000053 <2 x half> addrspace(1)* %r,
54 <2 x half> addrspace(1)* %a) {
55entry:
56 %a.val = load <2 x half>, <2 x half> addrspace(1)* %a
57 %r.val = call <2 x half> @llvm.rint.v2f16(<2 x half> %a.val)
58 store <2 x half> %r.val, <2 x half> addrspace(1)* %r
59 ret void
60}