blob: c57b545dc6d2bade7a08da944ddf3b174f6f3503 [file] [log] [blame]
Matt Arsenaultf0c5c6b2018-05-22 20:42:00 +00001; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,SI,SIVI %s
2; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=fiji -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,VI,GFX89,SIVI %s
3; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=gfx900 -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX9,GFX89 %s
Konstantin Zhuravlyovf86e4b72016-11-13 07:01:11 +00004
5declare half @llvm.maxnum.f16(half %a, half %b)
6declare <2 x half> @llvm.maxnum.v2f16(<2 x half> %a, <2 x half> %b)
Matt Arsenaultf0c5c6b2018-05-22 20:42:00 +00007declare <3 x half> @llvm.maxnum.v3f16(<3 x half> %a, <3 x half> %b)
8declare <4 x half> @llvm.maxnum.v4f16(<4 x half> %a, <4 x half> %b)
Konstantin Zhuravlyovf86e4b72016-11-13 07:01:11 +00009
Matt Arsenault0c687392017-01-30 16:57:41 +000010; GCN-LABEL: {{^}}maxnum_f16:
Konstantin Zhuravlyovf86e4b72016-11-13 07:01:11 +000011; GCN: buffer_load_ushort v[[A_F16:[0-9]+]]
12; GCN: buffer_load_ushort v[[B_F16:[0-9]+]]
13; SI: v_cvt_f32_f16_e32 v[[A_F32:[0-9]+]], v[[A_F16]]
14; SI: v_cvt_f32_f16_e32 v[[B_F32:[0-9]+]], v[[B_F16]]
Matt Arsenault6c29c5a2017-07-10 19:53:57 +000015; SI: v_max_f32_e32 v[[R_F32:[0-9]+]], v[[A_F32]], v[[B_F32]]
Konstantin Zhuravlyovf86e4b72016-11-13 07:01:11 +000016; SI: v_cvt_f16_f32_e32 v[[R_F16:[0-9]+]], v[[R_F32]]
Matt Arsenaultf0c5c6b2018-05-22 20:42:00 +000017; GFX89: v_max_f16_e32 v[[R_F16:[0-9]+]], v[[A_F16]], v[[B_F16]]
Konstantin Zhuravlyovf86e4b72016-11-13 07:01:11 +000018; GCN: buffer_store_short v[[R_F16]]
19; GCN: s_endpgm
Matt Arsenault3dbeefa2017-03-21 21:39:51 +000020define amdgpu_kernel void @maxnum_f16(
Konstantin Zhuravlyovf86e4b72016-11-13 07:01:11 +000021 half addrspace(1)* %r,
22 half addrspace(1)* %a,
23 half addrspace(1)* %b) {
24entry:
25 %a.val = load half, half addrspace(1)* %a
26 %b.val = load half, half addrspace(1)* %b
27 %r.val = call half @llvm.maxnum.f16(half %a.val, half %b.val)
28 store half %r.val, half addrspace(1)* %r
29 ret void
30}
31
Matt Arsenault0c687392017-01-30 16:57:41 +000032; GCN-LABEL: {{^}}maxnum_f16_imm_a:
Konstantin Zhuravlyovf86e4b72016-11-13 07:01:11 +000033; GCN: buffer_load_ushort v[[B_F16:[0-9]+]]
Konstantin Zhuravlyovf86e4b72016-11-13 07:01:11 +000034; SI: v_cvt_f32_f16_e32 v[[B_F32:[0-9]+]], v[[B_F16]]
Matt Arsenault0c687392017-01-30 16:57:41 +000035; SI: v_max_f32_e32 v[[R_F32:[0-9]+]], 0x40400000, v[[B_F32]]
Konstantin Zhuravlyovf86e4b72016-11-13 07:01:11 +000036; SI: v_cvt_f16_f32_e32 v[[R_F16:[0-9]+]], v[[R_F32]]
Matt Arsenaultf0c5c6b2018-05-22 20:42:00 +000037; GFX89: v_max_f16_e32 v[[R_F16:[0-9]+]], 0x4200, v[[B_F16]]
Konstantin Zhuravlyovf86e4b72016-11-13 07:01:11 +000038; GCN: buffer_store_short v[[R_F16]]
39; GCN: s_endpgm
Matt Arsenault3dbeefa2017-03-21 21:39:51 +000040define amdgpu_kernel void @maxnum_f16_imm_a(
Konstantin Zhuravlyovf86e4b72016-11-13 07:01:11 +000041 half addrspace(1)* %r,
42 half addrspace(1)* %b) {
43entry:
44 %b.val = load half, half addrspace(1)* %b
45 %r.val = call half @llvm.maxnum.f16(half 3.0, half %b.val)
46 store half %r.val, half addrspace(1)* %r
47 ret void
48}
49
Matt Arsenault0c687392017-01-30 16:57:41 +000050; GCN-LABEL: {{^}}maxnum_f16_imm_b:
Konstantin Zhuravlyovf86e4b72016-11-13 07:01:11 +000051; GCN: buffer_load_ushort v[[A_F16:[0-9]+]]
Konstantin Zhuravlyovf86e4b72016-11-13 07:01:11 +000052; SI: v_cvt_f32_f16_e32 v[[A_F32:[0-9]+]], v[[A_F16]]
Matt Arsenault0c687392017-01-30 16:57:41 +000053; SI: v_max_f32_e32 v[[R_F32:[0-9]+]], 4.0, v[[A_F32]]
Konstantin Zhuravlyovf86e4b72016-11-13 07:01:11 +000054; SI: v_cvt_f16_f32_e32 v[[R_F16:[0-9]+]], v[[R_F32]]
Matt Arsenaultf0c5c6b2018-05-22 20:42:00 +000055; GFX89: v_max_f16_e32 v[[R_F16:[0-9]+]], 4.0, v[[A_F16]]
Konstantin Zhuravlyovf86e4b72016-11-13 07:01:11 +000056; GCN: buffer_store_short v[[R_F16]]
57; GCN: s_endpgm
Matt Arsenault3dbeefa2017-03-21 21:39:51 +000058define amdgpu_kernel void @maxnum_f16_imm_b(
Konstantin Zhuravlyovf86e4b72016-11-13 07:01:11 +000059 half addrspace(1)* %r,
60 half addrspace(1)* %a) {
61entry:
62 %a.val = load half, half addrspace(1)* %a
63 %r.val = call half @llvm.maxnum.f16(half %a.val, half 4.0)
64 store half %r.val, half addrspace(1)* %r
65 ret void
66}
67
Matt Arsenault0c687392017-01-30 16:57:41 +000068; GCN-LABEL: {{^}}maxnum_v2f16:
Konstantin Zhuravlyovf86e4b72016-11-13 07:01:11 +000069; GCN: buffer_load_dword v[[A_V2_F16:[0-9]+]]
70; GCN: buffer_load_dword v[[B_V2_F16:[0-9]+]]
Matt Arsenault86e02ce2017-03-15 19:04:26 +000071
Konstantin Zhuravlyovf86e4b72016-11-13 07:01:11 +000072; SI: v_cvt_f32_f16_e32 v[[A_F32_0:[0-9]+]], v[[A_V2_F16]]
Matt Arsenault86e02ce2017-03-15 19:04:26 +000073; SI: v_lshrrev_b32_e32 v[[A_F16_1:[0-9]+]], 16, v[[A_V2_F16]]
Konstantin Zhuravlyovf86e4b72016-11-13 07:01:11 +000074; SI: v_cvt_f32_f16_e32 v[[B_F32_0:[0-9]+]], v[[B_V2_F16]]
Matt Arsenault86e02ce2017-03-15 19:04:26 +000075; SI: v_lshrrev_b32_e32 v[[B_F16_1:[0-9]+]], 16, v[[B_V2_F16]]
76
Konstantin Zhuravlyovf86e4b72016-11-13 07:01:11 +000077; SI: v_cvt_f32_f16_e32 v[[A_F32_1:[0-9]+]], v[[A_F16_1]]
78; SI: v_cvt_f32_f16_e32 v[[B_F32_1:[0-9]+]], v[[B_F16_1]]
Matt Arsenault6c29c5a2017-07-10 19:53:57 +000079; SI: v_max_f32_e32 v[[R_F32_0:[0-9]+]], v[[A_F32_0]], v[[B_F32_0]]
80; SI-DAG: v_max_f32_e32 v[[R_F32_1:[0-9]+]], v[[A_F32_1]], v[[B_F32_1]]
Sam Kolton9fa16962017-04-06 15:03:28 +000081; SI-DAG: v_cvt_f16_f32_e32 v[[R_F16_0:[0-9]+]], v[[R_F32_0]]
82; SI-DAG: v_cvt_f16_f32_e32 v[[R_F16_1:[0-9]+]], v[[R_F32_1]]
83; SI: v_lshlrev_b32_e32 v[[R_F16_HI:[0-9]+]], 16, v[[R_F16_1]]
84; SI-NOT: and
Matt Arsenault6c29c5a2017-07-10 19:53:57 +000085; SI: v_or_b32_e32 v[[R_V2_F16:[0-9]+]], v[[R_F16_0]], v[[R_F16_HI]]
Matt Arsenault86e02ce2017-03-15 19:04:26 +000086
Matt Arsenault6c29c5a2017-07-10 19:53:57 +000087; VI-DAG: v_max_f16_e32 v[[R_F16_0:[0-9]+]], v[[A_V2_F16]], v[[B_V2_F16]]
Stanislav Mekhanoshin03306602017-06-03 17:39:47 +000088; VI-DAG: v_max_f16_sdwa v[[R_F16_1:[0-9]+]], v[[A_V2_F16]], v[[B_V2_F16]] dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
Sam Kolton9fa16962017-04-06 15:03:28 +000089; VI-NOT: and
Matt Arsenaultf0c5c6b2018-05-22 20:42:00 +000090; VI: v_or_b32_e32 v[[R_V2_F16:[0-9]+]], v[[R_F16_0]], v[[R_F16_1]]
91
92; GFX9: v_pk_max_f16 v[[R_V2_F16:[0-9]+]], v[[A_V2_F16]], v[[B_V2_F16]]
Matt Arsenault86e02ce2017-03-15 19:04:26 +000093
Konstantin Zhuravlyovf86e4b72016-11-13 07:01:11 +000094; GCN: buffer_store_dword v[[R_V2_F16]]
95; GCN: s_endpgm
Matt Arsenault3dbeefa2017-03-21 21:39:51 +000096define amdgpu_kernel void @maxnum_v2f16(
Konstantin Zhuravlyovf86e4b72016-11-13 07:01:11 +000097 <2 x half> addrspace(1)* %r,
98 <2 x half> addrspace(1)* %a,
99 <2 x half> addrspace(1)* %b) {
100entry:
101 %a.val = load <2 x half>, <2 x half> addrspace(1)* %a
102 %b.val = load <2 x half>, <2 x half> addrspace(1)* %b
103 %r.val = call <2 x half> @llvm.maxnum.v2f16(<2 x half> %a.val, <2 x half> %b.val)
104 store <2 x half> %r.val, <2 x half> addrspace(1)* %r
105 ret void
106}
107
Matt Arsenault0c687392017-01-30 16:57:41 +0000108; GCN-LABEL: {{^}}maxnum_v2f16_imm_a:
Stanislav Mekhanoshin56ea4882017-05-30 16:49:24 +0000109; GCN-DAG: buffer_load_dword v[[B_V2_F16:[0-9]+]]
Konstantin Zhuravlyovf86e4b72016-11-13 07:01:11 +0000110; SI: v_cvt_f32_f16_e32 v[[B_F32_0:[0-9]+]], v[[B_V2_F16]]
Stanislav Mekhanoshin56ea4882017-05-30 16:49:24 +0000111; SI: v_lshrrev_b32_e32 v[[B_F16_1:[0-9]+]], 16, v[[B_V2_F16]]
Konstantin Zhuravlyovf86e4b72016-11-13 07:01:11 +0000112; SI: v_cvt_f32_f16_e32 v[[B_F32_1:[0-9]+]], v[[B_F16_1]]
Matt Arsenault0c687392017-01-30 16:57:41 +0000113; SI: v_max_f32_e32 v[[R_F32_0:[0-9]+]], 0x40400000, v[[B_F32_0]]
Konstantin Zhuravlyovf86e4b72016-11-13 07:01:11 +0000114; SI: v_cvt_f16_f32_e32 v[[R_F16_0:[0-9]+]], v[[R_F32_0]]
Matt Arsenault0c687392017-01-30 16:57:41 +0000115; SI: v_max_f32_e32 v[[R_F32_1:[0-9]+]], 4.0, v[[B_F32_1]]
Konstantin Zhuravlyovf86e4b72016-11-13 07:01:11 +0000116; SI: v_cvt_f16_f32_e32 v[[R_F16_1:[0-9]+]], v[[R_F32_1]]
Stanislav Mekhanoshin56ea4882017-05-30 16:49:24 +0000117; VI-DAG: v_mov_b32_e32 [[CONST4:v[0-9]+]], 0x4400
Stanislav Mekhanoshin03306602017-06-03 17:39:47 +0000118; VI-DAG: v_max_f16_sdwa v[[R_F16_HI:[0-9]+]], v[[B_V2_F16]], [[CONST4]] dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
Sam Kolton9fa16962017-04-06 15:03:28 +0000119; VI-DAG: v_max_f16_e32 v[[R_F16_0:[0-9]+]], 0x4200, v[[B_V2_F16]]
Matt Arsenault86e02ce2017-03-15 19:04:26 +0000120
Stanislav Mekhanoshin56ea4882017-05-30 16:49:24 +0000121; SI-DAG: v_lshlrev_b32_e32 v[[R_F16_HI:[0-9]+]], 16, v[[R_F16_1]]
Matt Arsenaultf0c5c6b2018-05-22 20:42:00 +0000122; SIVI-NOT: and
123; SIVI: v_or_b32_e32 v[[R_V2_F16:[0-9]+]], v[[R_F16_0]], v[[R_F16_HI]]
124
125
126; GFX9: s_mov_b32 [[K:s[0-9]+]], 0x44004200
127; GFX9: v_pk_max_f16 v[[R_V2_F16:[0-9]+]], v[[B_V2_F16]], [[K]]
128
Konstantin Zhuravlyovf86e4b72016-11-13 07:01:11 +0000129; GCN: buffer_store_dword v[[R_V2_F16]]
130; GCN: s_endpgm
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000131define amdgpu_kernel void @maxnum_v2f16_imm_a(
Konstantin Zhuravlyovf86e4b72016-11-13 07:01:11 +0000132 <2 x half> addrspace(1)* %r,
133 <2 x half> addrspace(1)* %b) {
134entry:
135 %b.val = load <2 x half>, <2 x half> addrspace(1)* %b
136 %r.val = call <2 x half> @llvm.maxnum.v2f16(<2 x half> <half 3.0, half 4.0>, <2 x half> %b.val)
137 store <2 x half> %r.val, <2 x half> addrspace(1)* %r
138 ret void
139}
140
Matt Arsenault0c687392017-01-30 16:57:41 +0000141; GCN-LABEL: {{^}}maxnum_v2f16_imm_b:
Stanislav Mekhanoshin56ea4882017-05-30 16:49:24 +0000142; GCN-DAG: buffer_load_dword v[[A_V2_F16:[0-9]+]]
Konstantin Zhuravlyovf86e4b72016-11-13 07:01:11 +0000143; SI: v_cvt_f32_f16_e32 v[[A_F32_0:[0-9]+]], v[[A_V2_F16]]
Stanislav Mekhanoshin56ea4882017-05-30 16:49:24 +0000144; SI: v_lshrrev_b32_e32 v[[A_F16_1:[0-9]+]], 16, v[[A_V2_F16]]
Konstantin Zhuravlyovf86e4b72016-11-13 07:01:11 +0000145; SI: v_cvt_f32_f16_e32 v[[A_F32_1:[0-9]+]], v[[A_F16_1]]
Matt Arsenault0c687392017-01-30 16:57:41 +0000146; SI: v_max_f32_e32 v[[R_F32_0:[0-9]+]], 4.0, v[[A_F32_0]]
Konstantin Zhuravlyovf86e4b72016-11-13 07:01:11 +0000147; SI: v_cvt_f16_f32_e32 v[[R_F16_0:[0-9]+]], v[[R_F32_0]]
Matt Arsenault0c687392017-01-30 16:57:41 +0000148; SI: v_max_f32_e32 v[[R_F32_1:[0-9]+]], 0x40400000, v[[A_F32_1]]
Konstantin Zhuravlyovf86e4b72016-11-13 07:01:11 +0000149; SI: v_cvt_f16_f32_e32 v[[R_F16_1:[0-9]+]], v[[R_F32_1]]
Matt Arsenault70b92822017-11-12 23:53:44 +0000150
Stanislav Mekhanoshin56ea4882017-05-30 16:49:24 +0000151; VI-DAG: v_mov_b32_e32 [[CONST3:v[0-9]+]], 0x4200
Matt Arsenault70b92822017-11-12 23:53:44 +0000152; VI-DAG: v_max_f16_sdwa v[[R_F16_HI:[0-9]+]], v[[A_V2_F16]], [[CONST3]] dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
Sam Kolton9fa16962017-04-06 15:03:28 +0000153; VI-DAG: v_max_f16_e32 v[[R_F16_0:[0-9]+]], 4.0, v[[A_V2_F16]]
Matt Arsenault86e02ce2017-03-15 19:04:26 +0000154
Stanislav Mekhanoshin56ea4882017-05-30 16:49:24 +0000155; SI-DAG: v_lshlrev_b32_e32 v[[R_F16_HI:[0-9]+]], 16, v[[R_F16_1]]
Matt Arsenaultf0c5c6b2018-05-22 20:42:00 +0000156
157
158; SIVI-NOT: and
159; SIVI: v_or_b32_e32 v[[R_V2_F16:[0-9]+]], v[[R_F16_0]], v[[R_F16_HI]]
160
161; GFX9: s_mov_b32 [[K:s[0-9]+]], 0x42004400
162; GFX9: v_pk_max_f16 v[[R_V2_F16:[0-9]+]], v[[A_V2_F16]], [[K]]
163
Konstantin Zhuravlyovf86e4b72016-11-13 07:01:11 +0000164; GCN: buffer_store_dword v[[R_V2_F16]]
165; GCN: s_endpgm
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000166define amdgpu_kernel void @maxnum_v2f16_imm_b(
Konstantin Zhuravlyovf86e4b72016-11-13 07:01:11 +0000167 <2 x half> addrspace(1)* %r,
168 <2 x half> addrspace(1)* %a) {
169entry:
170 %a.val = load <2 x half>, <2 x half> addrspace(1)* %a
171 %r.val = call <2 x half> @llvm.maxnum.v2f16(<2 x half> %a.val, <2 x half> <half 4.0, half 3.0>)
172 store <2 x half> %r.val, <2 x half> addrspace(1)* %r
173 ret void
174}
Matt Arsenaultf0c5c6b2018-05-22 20:42:00 +0000175
176; FIXME: Scalarize with undef half
177; GCN-LABEL: {{^}}maxnum_v3f16:
178; GFX9: v_pk_max_f16
179; GFX9: v_pk_max_f16
180define amdgpu_kernel void @maxnum_v3f16(
181 <3 x half> addrspace(1)* %r,
182 <3 x half> addrspace(1)* %a,
183 <3 x half> addrspace(1)* %b) {
184entry:
185 %a.val = load <3 x half>, <3 x half> addrspace(1)* %a
186 %b.val = load <3 x half>, <3 x half> addrspace(1)* %b
187 %r.val = call <3 x half> @llvm.maxnum.v3f16(<3 x half> %a.val, <3 x half> %b.val)
188 store <3 x half> %r.val, <3 x half> addrspace(1)* %r
189 ret void
190}
191
192; GCN-LABEL: {{^}}maxnum_v4f16:
193; GFX89: buffer_load_dwordx2 v{{\[}}[[A_LO:[0-9]+]]:[[A_HI:[0-9]+]]{{\]}}
194; GFX89: buffer_load_dwordx2 v{{\[}}[[B_LO:[0-9]+]]:[[B_HI:[0-9]+]]{{\]}}
195; GFX9-DAG: v_pk_max_f16 v[[MAX_LO:[0-9]+]], v[[A_LO]], v[[B_LO]]
196; GFX9-DAG: v_pk_max_f16 v[[MAX_HI:[0-9]+]], v[[A_HI]], v[[B_HI]]
197; GFX9: buffer_store_dwordx2 v{{\[}}[[MAX_LO]]:[[MAX_HI]]{{\]}}
198define amdgpu_kernel void @maxnum_v4f16(
199 <4 x half> addrspace(1)* %r,
200 <4 x half> addrspace(1)* %a,
201 <4 x half> addrspace(1)* %b) {
202entry:
203 %a.val = load <4 x half>, <4 x half> addrspace(1)* %a
204 %b.val = load <4 x half>, <4 x half> addrspace(1)* %b
205 %r.val = call <4 x half> @llvm.maxnum.v4f16(<4 x half> %a.val, <4 x half> %b.val)
206 store <4 x half> %r.val, <4 x half> addrspace(1)* %r
207 ret void
208}
209
210; GCN-LABEL: {{^}}fmax_v4f16_imm_a:
211; GFX89-DAG: buffer_load_dwordx2 v{{\[}}[[A_LO:[0-9]+]]:[[A_HI:[0-9]+]]{{\]}}
212; GFX9-DAG: s_mov_b32 [[K1:s[0-9]+]], 0x44004200
213; GFX9-DAG: s_mov_b32 [[K0:s[0-9]+]], 0x40004800
214
215; GFX9-DAG: v_pk_max_f16 v[[MAX_LO:[0-9]+]], v[[A_LO]], [[K0]]
216; GFX9-DAG: v_pk_max_f16 v[[MAX_HI:[0-9]+]], v[[A_HI]], [[K1]]
217; GFX9: buffer_store_dwordx2 v{{\[}}[[MAX_LO]]:[[MAX_HI]]{{\]}}
218
219; VI-DAG: v_mov_b32_e32 [[K2:v[0-9]+]], 0x4000
220; VI-DAG: v_mov_b32_e32 [[K4:v[0-9]+]], 0x4400
221
222; VI-DAG: v_max_f16_sdwa v[[MAX_HI_HI:[0-9]+]], v[[A_HI]], [[K4]] dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
223; VI-DAG: v_max_f16_e32 v[[MAX_HI_LO:[0-9]+]], 0x4200, v[[A_HI]]
224; VI-DAG: v_max_f16_sdwa v[[MAX_LO_HI:[0-9]+]], v[[A_LO]], [[K2]] dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
225; VI-DAG: v_max_f16_e32 v[[MAX_LO_LO:[0-9]+]], 0x4800, v[[A_LO]]
226
227; VI-DAG: v_or_b32_e32 v[[OR0:[0-9]+]], v[[MAX_LO_LO]], v[[MAX_LO_HI]]
228; VI-DAG: v_or_b32_e32 v[[OR1:[0-9]+]], v[[MAX_HI_LO]], v[[MAX_HI_HI]]
229
230; VI: buffer_store_dwordx2 v{{\[}}[[OR0]]:[[OR1]]{{\]}}
231define amdgpu_kernel void @fmax_v4f16_imm_a(
232 <4 x half> addrspace(1)* %r,
233 <4 x half> addrspace(1)* %b) {
234entry:
235 %b.val = load <4 x half>, <4 x half> addrspace(1)* %b
236 %r.val = call <4 x half> @llvm.maxnum.v4f16(<4 x half> <half 8.0, half 2.0, half 3.0, half 4.0>, <4 x half> %b.val)
237 store <4 x half> %r.val, <4 x half> addrspace(1)* %r
238 ret void
239}