blob: 066e49d7cacf11a8e4043b7b4d5e428c8d94f8f5 [file] [log] [blame]
Matt Arsenault8728c5f2017-08-07 14:58:04 +00001; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SI -check-prefix=SIVI %s
2; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=fiji -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=GFX89 -check-prefix=VI -check-prefix=SIVI %s
Konstantin Zhuravlyovc40d9f22017-12-08 20:52:28 +00003; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=gfx900 -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=GFX89 -check-prefix=GFX9 %s
Konstantin Zhuravlyovf86e4b72016-11-13 07:01:11 +00004
Matt Arsenault0c687392017-01-30 16:57:41 +00005; GCN-LABEL: {{^}}fsub_f16:
Konstantin Zhuravlyovf86e4b72016-11-13 07:01:11 +00006; GCN: buffer_load_ushort v[[A_F16:[0-9]+]]
7; GCN: buffer_load_ushort v[[B_F16:[0-9]+]]
8; SI: v_cvt_f32_f16_e32 v[[A_F32:[0-9]+]], v[[A_F16]]
9; SI: v_cvt_f32_f16_e32 v[[B_F32:[0-9]+]], v[[B_F16]]
Matt Arsenault6c29c5a2017-07-10 19:53:57 +000010; SI: v_sub_f32_e32 v[[R_F32:[0-9]+]], v[[A_F32]], v[[B_F32]]
Konstantin Zhuravlyovf86e4b72016-11-13 07:01:11 +000011; SI: v_cvt_f16_f32_e32 v[[R_F16:[0-9]+]], v[[R_F32]]
Matt Arsenault6c29c5a2017-07-10 19:53:57 +000012; GFX89: v_sub_f16_e32 v[[R_F16:[0-9]+]], v[[A_F16]], v[[B_F16]]
Konstantin Zhuravlyovf86e4b72016-11-13 07:01:11 +000013; GCN: buffer_store_short v[[R_F16]]
14; GCN: s_endpgm
Matt Arsenault3dbeefa2017-03-21 21:39:51 +000015define amdgpu_kernel void @fsub_f16(
Konstantin Zhuravlyovf86e4b72016-11-13 07:01:11 +000016 half addrspace(1)* %r,
17 half addrspace(1)* %a,
18 half addrspace(1)* %b) {
19entry:
Matt Arsenault8c4a3522018-06-26 19:10:00 +000020 %a.val = load volatile half, half addrspace(1)* %a
21 %b.val = load volatile half, half addrspace(1)* %b
Konstantin Zhuravlyovf86e4b72016-11-13 07:01:11 +000022 %r.val = fsub half %a.val, %b.val
23 store half %r.val, half addrspace(1)* %r
24 ret void
25}
26
Matt Arsenault0c687392017-01-30 16:57:41 +000027; GCN-LABEL: {{^}}fsub_f16_imm_a:
Konstantin Zhuravlyovf86e4b72016-11-13 07:01:11 +000028; GCN: buffer_load_ushort v[[B_F16:[0-9]+]]
Konstantin Zhuravlyovf86e4b72016-11-13 07:01:11 +000029; SI: v_cvt_f32_f16_e32 v[[B_F32:[0-9]+]], v[[B_F16]]
Matt Arsenault0c687392017-01-30 16:57:41 +000030; SI: v_sub_f32_e32 v[[R_F32:[0-9]+]], 1.0, v[[B_F32]]
Konstantin Zhuravlyovf86e4b72016-11-13 07:01:11 +000031; SI: v_cvt_f16_f32_e32 v[[R_F16:[0-9]+]], v[[R_F32]]
Matt Arsenaulteb522e62017-02-27 22:15:25 +000032; GFX89: v_sub_f16_e32 v[[R_F16:[0-9]+]], 1.0, v[[B_F16]]
Konstantin Zhuravlyovf86e4b72016-11-13 07:01:11 +000033; GCN: buffer_store_short v[[R_F16]]
34; GCN: s_endpgm
Matt Arsenault3dbeefa2017-03-21 21:39:51 +000035define amdgpu_kernel void @fsub_f16_imm_a(
Konstantin Zhuravlyovf86e4b72016-11-13 07:01:11 +000036 half addrspace(1)* %r,
37 half addrspace(1)* %b) {
38entry:
Matt Arsenault8c4a3522018-06-26 19:10:00 +000039 %b.val = load volatile half, half addrspace(1)* %b
Konstantin Zhuravlyovf86e4b72016-11-13 07:01:11 +000040 %r.val = fsub half 1.0, %b.val
41 store half %r.val, half addrspace(1)* %r
42 ret void
43}
44
Matt Arsenault0c687392017-01-30 16:57:41 +000045; GCN-LABEL: {{^}}fsub_f16_imm_b:
Konstantin Zhuravlyovf86e4b72016-11-13 07:01:11 +000046; GCN: buffer_load_ushort v[[A_F16:[0-9]+]]
Konstantin Zhuravlyovf86e4b72016-11-13 07:01:11 +000047; SI: v_cvt_f32_f16_e32 v[[A_F32:[0-9]+]], v[[A_F16]]
Matt Arsenault0c687392017-01-30 16:57:41 +000048; SI: v_add_f32_e32 v[[R_F32:[0-9]+]], -2.0, v[[A_F32]]
Konstantin Zhuravlyovf86e4b72016-11-13 07:01:11 +000049; SI: v_cvt_f16_f32_e32 v[[R_F16:[0-9]+]], v[[R_F32]]
Matt Arsenaulteb522e62017-02-27 22:15:25 +000050; GFX89: v_add_f16_e32 v[[R_F16:[0-9]+]], -2.0, v[[A_F16]]
Konstantin Zhuravlyovf86e4b72016-11-13 07:01:11 +000051; GCN: buffer_store_short v[[R_F16]]
52; GCN: s_endpgm
Matt Arsenault3dbeefa2017-03-21 21:39:51 +000053define amdgpu_kernel void @fsub_f16_imm_b(
Konstantin Zhuravlyovf86e4b72016-11-13 07:01:11 +000054 half addrspace(1)* %r,
55 half addrspace(1)* %a) {
56entry:
Matt Arsenault8c4a3522018-06-26 19:10:00 +000057 %a.val = load volatile half, half addrspace(1)* %a
Konstantin Zhuravlyovf86e4b72016-11-13 07:01:11 +000058 %r.val = fsub half %a.val, 2.0
59 store half %r.val, half addrspace(1)* %r
60 ret void
61}
62
Matt Arsenault0c687392017-01-30 16:57:41 +000063; GCN-LABEL: {{^}}fsub_v2f16:
Matt Arsenault8c4a3522018-06-26 19:10:00 +000064; SI: buffer_load_dword v[[B_V2_F16:[0-9]+]]
65; SI: buffer_load_dword v[[A_V2_F16:[0-9]+]]
66
67; SI-DAG: v_cvt_f32_f16_e32 v[[A_F32_0:[0-9]+]], v[[A_V2_F16]]
68; SI-DAG: v_lshrrev_b32_e32 v[[A_F16_1:[0-9]+]], 16, v[[A_V2_F16]]
69; SI-DAG: v_cvt_f32_f16_e32 v[[B_F32_0:[0-9]+]], v[[B_V2_F16]]
70; SI-DAG: v_lshrrev_b32_e32 v[[B_F16_1:[0-9]+]], 16, v[[B_V2_F16]]
Matt Arsenaulteb522e62017-02-27 22:15:25 +000071
Matt Arsenault86e02ce2017-03-15 19:04:26 +000072; SI-DAG: v_cvt_f32_f16_e32 v[[A_F32_1:[0-9]+]], v[[A_F16_1]]
73; SI-DAG: v_cvt_f32_f16_e32 v[[B_F32_1:[0-9]+]], v[[B_F16_1]]
Matt Arsenault8c4a3522018-06-26 19:10:00 +000074; SI-DAG: v_sub_f32_e32 v[[R_F32_0:[0-9]+]], v[[A_F32_0]], v[[B_F32_0]]
75; SI-DAG: v_sub_f32_e32 v[[R_F32_1:[0-9]+]], v[[A_F32_1]], v[[B_F32_1]]
Matt Arsenault86e02ce2017-03-15 19:04:26 +000076; SI-DAG: v_cvt_f16_f32_e32 v[[R_F16_1:[0-9]+]], v[[R_F32_1]]
77; SI-DAG: v_cvt_f16_f32_e32 v[[R_F16_0:[0-9]+]], v[[R_F32_0]]
Sam Kolton9fa16962017-04-06 15:03:28 +000078; SI-DAG: v_lshlrev_b32_e32 v[[R_F16_HI:[0-9]+]], 16, v[[R_F16_1]]
Matt Arsenault6c29c5a2017-07-10 19:53:57 +000079; SI: v_or_b32_e32 v[[R_V2_F16:[0-9]+]], v[[R_F16_0]], v[[R_F16_HI]]
Matt Arsenaulteb522e62017-02-27 22:15:25 +000080
Matt Arsenault8c4a3522018-06-26 19:10:00 +000081; VI: buffer_load_dword v[[B_V2_F16:[0-9]+]]
82; VI: buffer_load_dword v[[A_V2_F16:[0-9]+]]
83
Matt Arsenault6c29c5a2017-07-10 19:53:57 +000084; VI-DAG: v_sub_f16_e32 v[[R_F16_0:[0-9]+]], v[[A_V2_F16]], v[[B_V2_F16]]
Stanislav Mekhanoshin03306602017-06-03 17:39:47 +000085; VI-DAG: v_sub_f16_sdwa v[[R_F16_HI:[0-9]+]], v[[A_V2_F16]], v[[B_V2_F16]] dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
Matt Arsenault6c29c5a2017-07-10 19:53:57 +000086; VI: v_or_b32_e32 v[[R_V2_F16:[0-9]+]], v[[R_F16_0]], v[[R_F16_HI]]
Matt Arsenaulteb522e62017-02-27 22:15:25 +000087
Matt Arsenault8c4a3522018-06-26 19:10:00 +000088
89; GFX9: buffer_load_dword v[[A_V2_F16:[0-9]+]]
90; GFX9: buffer_load_dword v[[B_V2_F16:[0-9]+]]
91
Matt Arsenaulteb522e62017-02-27 22:15:25 +000092; GFX9: v_pk_add_f16 v[[R_V2_F16:[0-9]+]], v[[A_V2_F16]], v[[B_V2_F16]] neg_lo:[0,1] neg_hi:[0,1]
93
Konstantin Zhuravlyovf86e4b72016-11-13 07:01:11 +000094; GCN: buffer_store_dword v[[R_V2_F16]]
95; GCN: s_endpgm
Sam Kolton9fa16962017-04-06 15:03:28 +000096
Matt Arsenault3dbeefa2017-03-21 21:39:51 +000097define amdgpu_kernel void @fsub_v2f16(
Konstantin Zhuravlyovf86e4b72016-11-13 07:01:11 +000098 <2 x half> addrspace(1)* %r,
99 <2 x half> addrspace(1)* %a,
100 <2 x half> addrspace(1)* %b) {
101entry:
102 %a.val = load <2 x half>, <2 x half> addrspace(1)* %a
103 %b.val = load <2 x half>, <2 x half> addrspace(1)* %b
104 %r.val = fsub <2 x half> %a.val, %b.val
105 store <2 x half> %r.val, <2 x half> addrspace(1)* %r
106 ret void
107}
108
Matt Arsenault0c687392017-01-30 16:57:41 +0000109; GCN-LABEL: {{^}}fsub_v2f16_imm_a:
Stanislav Mekhanoshin56ea4882017-05-30 16:49:24 +0000110; GCN-DAG: buffer_load_dword v[[B_V2_F16:[0-9]+]]
Sam Kolton9fa16962017-04-06 15:03:28 +0000111
Matt Arsenault8c4a3522018-06-26 19:10:00 +0000112; SI-DAG: v_cvt_f32_f16_e32 v[[B_F32_0:[0-9]+]], v[[B_V2_F16]]
113; SI-DAG: v_lshrrev_b32_e32 v[[B_F16_1:[0-9]+]], 16, v[[B_V2_F16]]
114; SI-DAG: v_cvt_f32_f16_e32 v[[B_F32_1:[0-9]+]], v[[B_F16_1]]
115; SI-DAG: v_sub_f32_e32 v[[R_F32_0:[0-9]+]], 1.0, v[[B_F32_0]]
116; SI-DAG: v_cvt_f16_f32_e32 v[[R_F16_0:[0-9]+]], v[[R_F32_0]]
117; SI-DAG: v_sub_f32_e32 v[[R_F32_1:[0-9]+]], 2.0, v[[B_F32_1]]
118; SI-DAG: v_cvt_f16_f32_e32 v[[R_F16_1:[0-9]+]], v[[R_F32_1]]
Sam Kolton9fa16962017-04-06 15:03:28 +0000119; SI-DAG: v_lshlrev_b32_e32 v[[R_F16_HI:[0-9]+]], 16, v[[R_F16_1]]
Matt Arsenault6c29c5a2017-07-10 19:53:57 +0000120; SI: v_or_b32_e32 v[[R_V2_F16:[0-9]+]], v[[R_F16_0]], v[[R_F16_HI]]
Matt Arsenaulteb522e62017-02-27 22:15:25 +0000121
Stanislav Mekhanoshin56ea4882017-05-30 16:49:24 +0000122; VI-DAG: v_mov_b32_e32 [[CONST2:v[0-9]+]], 0x4000
123; VI-DAG: v_sub_f16_sdwa v[[R_F16_HI:[0-9]+]], [[CONST2]], v[[B_V2_F16]] dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
Sam Kolton9fa16962017-04-06 15:03:28 +0000124; VI-DAG: v_sub_f16_e32 v[[R_F16_0:[0-9]+]], 1.0, v[[B_V2_F16]]
Matt Arsenault6c29c5a2017-07-10 19:53:57 +0000125; VI: v_or_b32_e32 v[[R_V2_F16:[0-9]+]], v[[R_F16_0]], v[[R_F16_HI]]
Matt Arsenaulteb522e62017-02-27 22:15:25 +0000126
127; GFX9: s_mov_b32 [[K:s[0-9]+]], 0x40003c00
Stanislav Mekhanoshin56ea4882017-05-30 16:49:24 +0000128; GFX9: v_pk_add_f16 v[[R_V2_F16:[0-9]+]], v[[B_V2_F16]], [[K]] neg_lo:[1,0] neg_hi:[1,0]
Matt Arsenaulteb522e62017-02-27 22:15:25 +0000129
Konstantin Zhuravlyovf86e4b72016-11-13 07:01:11 +0000130; GCN: buffer_store_dword v[[R_V2_F16]]
131; GCN: s_endpgm
Sam Kolton9fa16962017-04-06 15:03:28 +0000132
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000133define amdgpu_kernel void @fsub_v2f16_imm_a(
Konstantin Zhuravlyovf86e4b72016-11-13 07:01:11 +0000134 <2 x half> addrspace(1)* %r,
135 <2 x half> addrspace(1)* %b) {
136entry:
137 %b.val = load <2 x half>, <2 x half> addrspace(1)* %b
138 %r.val = fsub <2 x half> <half 1.0, half 2.0>, %b.val
139 store <2 x half> %r.val, <2 x half> addrspace(1)* %r
140 ret void
141}
142
Matt Arsenault0c687392017-01-30 16:57:41 +0000143; GCN-LABEL: {{^}}fsub_v2f16_imm_b:
Stanislav Mekhanoshin56ea4882017-05-30 16:49:24 +0000144; GCN-DAG: buffer_load_dword v[[A_V2_F16:[0-9]+]]
Matt Arsenaulteb522e62017-02-27 22:15:25 +0000145
Matt Arsenault8c4a3522018-06-26 19:10:00 +0000146; SI-DAG: v_cvt_f32_f16_e32 v[[A_F32_0:[0-9]+]], v[[A_V2_F16]]
147; SI-DAG: v_lshrrev_b32_e32 v[[A_F16_1:[0-9]+]], 16, v[[A_V2_F16]]
148; SI-DAG: v_cvt_f32_f16_e32 v[[A_F32_1:[0-9]+]], v[[A_F16_1]]
149; SI-DAG: v_add_f32_e32 v[[R_F32_0:[0-9]+]], -2.0, v[[A_F32_0]]
150; SI-DAG: v_cvt_f16_f32_e32 v[[R_F16_0:[0-9]+]], v[[R_F32_0]]
151; SI-DAG: v_add_f32_e32 v[[R_F32_1:[0-9]+]], -1.0, v[[A_F32_1]]
152; SI-DAG: v_cvt_f16_f32_e32 v[[R_F16_1:[0-9]+]], v[[R_F32_1]]
Sam Kolton9fa16962017-04-06 15:03:28 +0000153; SI-DAG: v_lshlrev_b32_e32 v[[R_F16_HI:[0-9]+]], 16, v[[R_F16_1]]
Matt Arsenault6c29c5a2017-07-10 19:53:57 +0000154; SI: v_or_b32_e32 v[[R_V2_F16:[0-9]+]], v[[R_F16_0]], v[[R_F16_HI]]
Matt Arsenaulteb522e62017-02-27 22:15:25 +0000155
Stanislav Mekhanoshin56ea4882017-05-30 16:49:24 +0000156; VI-DAG: v_mov_b32_e32 [[CONSTM1:v[0-9]+]], 0xbc00
Stanislav Mekhanoshin03306602017-06-03 17:39:47 +0000157; VI-DAG: v_add_f16_sdwa v[[R_F16_HI:[0-9]+]], v[[A_V2_F16]], [[CONSTM1]] dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
Sam Kolton9fa16962017-04-06 15:03:28 +0000158; VI-DAG: v_add_f16_e32 v[[R_F16_0:[0-9]+]], -2.0, v[[A_V2_F16]]
Matt Arsenault6c29c5a2017-07-10 19:53:57 +0000159; VI: v_or_b32_e32 v[[R_V2_F16:[0-9]+]], v[[R_F16_0]], v[[R_F16_HI]]
Matt Arsenaulteb522e62017-02-27 22:15:25 +0000160
161; GFX9: s_mov_b32 [[K:s[0-9]+]], 0xbc00c000
Stanislav Mekhanoshin56ea4882017-05-30 16:49:24 +0000162; GFX9: v_pk_add_f16 v[[R_V2_F16:[0-9]+]], v[[A_V2_F16]], [[K]]{{$}}
Matt Arsenaulteb522e62017-02-27 22:15:25 +0000163
Konstantin Zhuravlyovf86e4b72016-11-13 07:01:11 +0000164; GCN: buffer_store_dword v[[R_V2_F16]]
165; GCN: s_endpgm
Sam Kolton9fa16962017-04-06 15:03:28 +0000166
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000167define amdgpu_kernel void @fsub_v2f16_imm_b(
Konstantin Zhuravlyovf86e4b72016-11-13 07:01:11 +0000168 <2 x half> addrspace(1)* %r,
169 <2 x half> addrspace(1)* %a) {
170entry:
171 %a.val = load <2 x half>, <2 x half> addrspace(1)* %a
172 %r.val = fsub <2 x half> %a.val, <half 2.0, half 1.0>
173 store <2 x half> %r.val, <2 x half> addrspace(1)* %r
174 ret void
175}