blob: 07a23cb7e1654dbeb19cdd4b670880e65b980b7e [file] [log] [blame]
Matt Arsenaulteb522e62017-02-27 22:15:25 +00001; RUN: llc -march=amdgcn -mcpu=kaveri -mtriple=amdgcn--amdhsa -verify-machineinstrs < %s | FileCheck -check-prefix=CI -check-prefix=CIVI -check-prefix=GCN %s
2; RUN: llc -march=amdgcn -mcpu=tonga -mtriple=amdgcn--amdhsa -verify-machineinstrs < %s | FileCheck -check-prefix=VI -check-prefix=CIVI -check-prefix=GCN %s
3; RUN: llc -march=amdgcn -mcpu=gfx901 -mtriple=amdgcn--amdhsa -verify-machineinstrs < %s | FileCheck -check-prefix=GFX9 -check-prefix=GCN %s
Matt Arsenaultc79dc702016-11-15 02:25:28 +00004
5; FIXME: Should be able to do scalar op
Matt Arsenaulteb522e62017-02-27 22:15:25 +00006; GCN-LABEL: {{^}}s_fneg_f16:
Matt Arsenault3dbeefa2017-03-21 21:39:51 +00007define amdgpu_kernel void @s_fneg_f16(half addrspace(1)* %out, half %in) #0 {
Matt Arsenaulteb522e62017-02-27 22:15:25 +00008 %fneg = fsub half -0.0, %in
Matt Arsenaultc79dc702016-11-15 02:25:28 +00009 store half %fneg, half addrspace(1)* %out
10 ret void
11}
12
13; FIXME: Should be able to use bit operations when illegal type as
14; well.
15
Matt Arsenaulteb522e62017-02-27 22:15:25 +000016; GCN-LABEL: {{^}}v_fneg_f16:
Matt Arsenaultc79dc702016-11-15 02:25:28 +000017; GCN: flat_load_ushort [[VAL:v[0-9]+]],
Matt Arsenault9dba9bd2017-02-02 02:27:04 +000018; GCN: v_xor_b32_e32 [[XOR:v[0-9]+]], 0x8000, [[VAL]]
Matt Arsenaultc79dc702016-11-15 02:25:28 +000019; VI: flat_store_short v{{\[[0-9]+:[0-9]+\]}}, [[XOR]]
Matt Arsenault9dba9bd2017-02-02 02:27:04 +000020; SI: buffer_store_short [[XOR]]
Matt Arsenault3dbeefa2017-03-21 21:39:51 +000021define amdgpu_kernel void @v_fneg_f16(half addrspace(1)* %out, half addrspace(1)* %in) #0 {
Matt Arsenaulteb522e62017-02-27 22:15:25 +000022 %tid = call i32 @llvm.amdgcn.workitem.id.x()
23 %gep.in = getelementptr inbounds half, half addrspace(1)* %in, i32 %tid
24 %gep.out = getelementptr inbounds half, half addrspace(1)* %in, i32 %tid
25 %val = load half, half addrspace(1)* %gep.in, align 2
26 %fneg = fsub half -0.0, %val
27 store half %fneg, half addrspace(1)* %gep.out
Matt Arsenaultc79dc702016-11-15 02:25:28 +000028 ret void
29}
30
Matt Arsenaulteb522e62017-02-27 22:15:25 +000031; GCN-LABEL: {{^}}fneg_free_f16:
Matt Arsenaultc79dc702016-11-15 02:25:28 +000032; GCN: flat_load_ushort [[NEG_VALUE:v[0-9]+]],
33
34; XCI: s_xor_b32 [[XOR:s[0-9]+]], [[NEG_VALUE]], 0x8000{{$}}
35; CI: v_xor_b32_e32 [[XOR:v[0-9]+]], 0x8000, [[NEG_VALUE]]
36; CI: flat_store_short v{{\[[0-9]+:[0-9]+\]}}, [[XOR]]
Matt Arsenault3dbeefa2017-03-21 21:39:51 +000037define amdgpu_kernel void @fneg_free_f16(half addrspace(1)* %out, i16 %in) #0 {
Matt Arsenaultc79dc702016-11-15 02:25:28 +000038 %bc = bitcast i16 %in to half
39 %fsub = fsub half -0.0, %bc
40 store half %fsub, half addrspace(1)* %out
41 ret void
42}
43
Matt Arsenaulteb522e62017-02-27 22:15:25 +000044; GCN-LABEL: {{^}}v_fneg_fold_f16:
Matt Arsenaultc79dc702016-11-15 02:25:28 +000045; GCN: flat_load_ushort [[NEG_VALUE:v[0-9]+]]
46
Matt Arsenault9dba9bd2017-02-02 02:27:04 +000047; CI-DAG: v_cvt_f32_f16_e32 [[CVT_VAL:v[0-9]+]], [[NEG_VALUE]]
48; CI-DAG: v_cvt_f32_f16_e64 [[NEG_CVT0:v[0-9]+]], -[[NEG_VALUE]]
49; CI: v_mul_f32_e32 [[MUL:v[0-9]+]], [[CVT_VAL]], [[NEG_CVT0]]
Matt Arsenaultc79dc702016-11-15 02:25:28 +000050; CI: v_cvt_f16_f32_e32 [[CVT1:v[0-9]+]], [[MUL]]
51; CI: flat_store_short v{{\[[0-9]+:[0-9]+\]}}, [[CVT1]]
52
53; VI-NOT: [[NEG_VALUE]]
54; VI: v_mul_f16_e64 v{{[0-9]+}}, -[[NEG_VALUE]], [[NEG_VALUE]]
Matt Arsenault3dbeefa2017-03-21 21:39:51 +000055define amdgpu_kernel void @v_fneg_fold_f16(half addrspace(1)* %out, half addrspace(1)* %in) #0 {
Matt Arsenaultc79dc702016-11-15 02:25:28 +000056 %val = load half, half addrspace(1)* %in
57 %fsub = fsub half -0.0, %val
58 %fmul = fmul half %fsub, %val
59 store half %fmul, half addrspace(1)* %out
60 ret void
61}
Matt Arsenaulteb522e62017-02-27 22:15:25 +000062
63; FIXME: Terrible code with VI and even worse with SI/CI
64; GCN-LABEL: {{^}}s_fneg_v2f16:
65; CI: s_mov_b32 [[MASK:s[0-9]+]], 0x8000{{$}}
66; CI: v_xor_b32_e32 v{{[0-9]+}}, [[MASK]], v{{[0-9]+}}
67; CI: v_lshlrev_b32_e32 v{{[0-9]+}}, 16, v{{[0-9]+}}
68; CI: v_xor_b32_e32 v{{[0-9]+}}, [[MASK]], v{{[0-9]+}}
69; CI: v_or_b32_e32
70
71; VI: v_mov_b32_e32 [[MASK:v[0-9]+]], 0x8000{{$}}
72; VI: v_xor_b32_e32 v{{[0-9]+}}, v{{[0-9]+}}, [[MASK]]
73; VI: v_xor_b32_e32 v{{[0-9]+}}, v{{[0-9]+}}, [[MASK]]
74
75; GFX9: v_xor_b32_e32 v{{[0-9]+}}, 0x80008000, v{{[0-9]+}}
Matt Arsenault3dbeefa2017-03-21 21:39:51 +000076define amdgpu_kernel void @s_fneg_v2f16(<2 x half> addrspace(1)* %out, <2 x half> %in) #0 {
Matt Arsenaulteb522e62017-02-27 22:15:25 +000077 %fneg = fsub <2 x half> <half -0.0, half -0.0>, %in
78 store <2 x half> %fneg, <2 x half> addrspace(1)* %out
79 ret void
80}
81
82; GCN-LABEL: {{^}}v_fneg_v2f16:
83; GCN: flat_load_dword [[VAL:v[0-9]+]]
84; GCN: v_xor_b32_e32 v{{[0-9]+}}, 0x80008000, [[VAL]]
Matt Arsenault3dbeefa2017-03-21 21:39:51 +000085define amdgpu_kernel void @v_fneg_v2f16(<2 x half> addrspace(1)* %out, <2 x half> addrspace(1)* %in) #0 {
Matt Arsenaulteb522e62017-02-27 22:15:25 +000086 %tid = call i32 @llvm.amdgcn.workitem.id.x()
87 %gep.in = getelementptr inbounds <2 x half>, <2 x half> addrspace(1)* %in, i32 %tid
88 %gep.out = getelementptr inbounds <2 x half>, <2 x half> addrspace(1)* %in, i32 %tid
89 %val = load <2 x half>, <2 x half> addrspace(1)* %gep.in, align 2
90 %fneg = fsub <2 x half> <half -0.0, half -0.0>, %val
91 store <2 x half> %fneg, <2 x half> addrspace(1)* %gep.out
92 ret void
93}
94
95; GCN-LABEL: {{^}}fneg_free_v2f16:
96; GCN: s_load_dword [[VAL:s[0-9]+]]
97; CIVI: s_xor_b32 s{{[0-9]+}}, [[VAL]], 0x80008000
98
99; GFX9: v_mov_b32_e32 [[VVAL:v[0-9]+]], [[VAL]]
100; GFX9: v_xor_b32_e32 v{{[0-9]+}}, 0x80008000, [[VVAL]]
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000101define amdgpu_kernel void @fneg_free_v2f16(<2 x half> addrspace(1)* %out, i32 %in) #0 {
Matt Arsenaulteb522e62017-02-27 22:15:25 +0000102 %bc = bitcast i32 %in to <2 x half>
103 %fsub = fsub <2 x half> <half -0.0, half -0.0>, %bc
104 store <2 x half> %fsub, <2 x half> addrspace(1)* %out
105 ret void
106}
107
108; GCN-LABEL: {{^}}v_fneg_fold_v2f16:
109; GCN: flat_load_dword [[VAL:v[0-9]+]]
110
111; CI: v_cvt_f32_f16_e64 v{{[0-9]+}}, -v{{[0-9]+}}
112; CI: v_cvt_f32_f16_e64 v{{[0-9]+}}, -v{{[0-9]+}}
113; CI: v_mul_f32_e32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
114; CI: v_cvt_f16_f32
115; CI: v_mul_f32_e32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
116; CI: v_cvt_f16_f32
117
118; VI: v_lshrrev_b32_e32 v{{[0-9]+}}, 16,
119; VI: v_mul_f16_e64 v{{[0-9]+}}, -v{{[0-9]+}}, v{{[0-9]+}}
120; VI: v_mul_f16_e64 v{{[0-9]+}}, -v{{[0-9]+}}, v{{[0-9]+}}
121
122; GFX9: v_pk_mul_f16 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} neg_lo:[1,0] neg_hi:[1,0]{{$}}
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000123define amdgpu_kernel void @v_fneg_fold_v2f16(<2 x half> addrspace(1)* %out, <2 x half> addrspace(1)* %in) #0 {
Matt Arsenaulteb522e62017-02-27 22:15:25 +0000124 %val = load <2 x half>, <2 x half> addrspace(1)* %in
125 %fsub = fsub <2 x half> <half -0.0, half -0.0>, %val
126 %fmul = fmul <2 x half> %fsub, %val
127 store <2 x half> %fmul, <2 x half> addrspace(1)* %out
128 ret void
129}
130
131declare i32 @llvm.amdgcn.workitem.id.x() #1
132
133attributes #0 = { nounwind }
134attributes #1 = { nounwind readnone }