| Konstantin Zhuravlyov | 3d1cc88 | 2017-04-21 19:45:22 +0000 | [diff] [blame] | 1 | ; RUN: llc -march=amdgcn -mcpu=gfx901 -mattr=-flat-for-global -verify-machineinstrs -enable-packed-inlinable-literals < %s | FileCheck -check-prefix=GFX9 -check-prefix=GCN %s | 
| Sam Kolton | 9fa1696 | 2017-04-06 15:03:28 +0000 | [diff] [blame] | 2 | ; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s | 
| Matt Arsenault | eb522e6 | 2017-02-27 22:15:25 +0000 | [diff] [blame] | 3 |  | 
|  | 4 | ; FIXME: Need to handle non-uniform case for function below (load without gep). | 
|  | 5 | ; GCN-LABEL: {{^}}v_test_add_v2i16: | 
|  | 6 | ; GFX9: v_pk_add_u16 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} | 
|  | 7 |  | 
| Sam Kolton | 9fa1696 | 2017-04-06 15:03:28 +0000 | [diff] [blame] | 8 | ; VI: v_add_u16_sdwa v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} | 
| Matt Arsenault | eb522e6 | 2017-02-27 22:15:25 +0000 | [diff] [blame] | 9 | ; VI: v_add_u16_e32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} | 
| Matt Arsenault | 3dbeefa | 2017-03-21 21:39:51 +0000 | [diff] [blame] | 10 | define amdgpu_kernel void @v_test_add_v2i16(<2 x i16> addrspace(1)* %out, <2 x i16> addrspace(1)* %in0, <2 x i16> addrspace(1)* %in1) #1 { | 
| Matt Arsenault | eb522e6 | 2017-02-27 22:15:25 +0000 | [diff] [blame] | 11 | %tid = call i32 @llvm.amdgcn.workitem.id.x() | 
|  | 12 | %gep.out = getelementptr inbounds <2 x i16>, <2 x i16> addrspace(1)* %out, i32 %tid | 
|  | 13 | %gep.in0 = getelementptr inbounds <2 x i16>, <2 x i16> addrspace(1)* %in0, i32 %tid | 
|  | 14 | %gep.in1 = getelementptr inbounds <2 x i16>, <2 x i16> addrspace(1)* %in1, i32 %tid | 
|  | 15 | %a = load volatile <2 x i16>, <2 x i16> addrspace(1)* %gep.in0 | 
|  | 16 | %b = load volatile <2 x i16>, <2 x i16> addrspace(1)* %gep.in1 | 
|  | 17 | %add = add <2 x i16> %a, %b | 
|  | 18 | store <2 x i16> %add, <2 x i16> addrspace(1)* %out | 
|  | 19 | ret void | 
|  | 20 | } | 
|  | 21 |  | 
|  | 22 | ; GCN-LABEL: {{^}}s_test_add_v2i16: | 
|  | 23 | ; GFX9: s_load_dword [[VAL0:s[0-9]+]] | 
|  | 24 | ; GFX9: s_load_dword [[VAL1:s[0-9]+]] | 
|  | 25 | ; GFX9: v_mov_b32_e32 [[VVAL1:v[0-9]+]] | 
| Stanislav Mekhanoshin | 56ea488 | 2017-05-30 16:49:24 +0000 | [diff] [blame] | 26 | ; GFX9: v_pk_add_u16 v{{[0-9]+}}, [[VAL0]], [[VVAL1]] | 
| Matt Arsenault | eb522e6 | 2017-02-27 22:15:25 +0000 | [diff] [blame] | 27 |  | 
|  | 28 | ; VI: s_add_i32 | 
|  | 29 | ; VI: s_add_i32 | 
| Matt Arsenault | 3dbeefa | 2017-03-21 21:39:51 +0000 | [diff] [blame] | 30 | define amdgpu_kernel void @s_test_add_v2i16(<2 x i16> addrspace(1)* %out, <2 x i16> addrspace(2)* %in0, <2 x i16> addrspace(2)* %in1) #1 { | 
| Matt Arsenault | eb522e6 | 2017-02-27 22:15:25 +0000 | [diff] [blame] | 31 | %a = load <2 x i16>, <2 x i16> addrspace(2)* %in0 | 
|  | 32 | %b = load <2 x i16>, <2 x i16> addrspace(2)* %in1 | 
|  | 33 | %add = add <2 x i16> %a, %b | 
|  | 34 | store <2 x i16> %add, <2 x i16> addrspace(1)* %out | 
|  | 35 | ret void | 
|  | 36 | } | 
|  | 37 |  | 
|  | 38 | ; GCN-LABEL: {{^}}s_test_add_self_v2i16: | 
|  | 39 | ; GFX9: s_load_dword [[VAL:s[0-9]+]] | 
|  | 40 | ; GFX9: v_pk_add_u16 v{{[0-9]+}}, [[VAL]], [[VAL]] | 
|  | 41 |  | 
|  | 42 | ; VI: s_add_i32 | 
|  | 43 | ; VI: s_add_i32 | 
| Matt Arsenault | 3dbeefa | 2017-03-21 21:39:51 +0000 | [diff] [blame] | 44 | define amdgpu_kernel void @s_test_add_self_v2i16(<2 x i16> addrspace(1)* %out, <2 x i16> addrspace(2)* %in0) #1 { | 
| Matt Arsenault | eb522e6 | 2017-02-27 22:15:25 +0000 | [diff] [blame] | 45 | %a = load <2 x i16>, <2 x i16> addrspace(2)* %in0 | 
|  | 46 | %add = add <2 x i16> %a, %a | 
|  | 47 | store <2 x i16> %add, <2 x i16> addrspace(1)* %out | 
|  | 48 | ret void | 
|  | 49 | } | 
|  | 50 |  | 
|  | 51 | ; FIXME: VI should not scalarize arg access. | 
|  | 52 | ; GCN-LABEL: {{^}}s_test_add_v2i16_kernarg: | 
| Stanislav Mekhanoshin | 56ea488 | 2017-05-30 16:49:24 +0000 | [diff] [blame] | 53 | ; GFX9: v_pk_add_u16 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}} | 
| Matt Arsenault | eb522e6 | 2017-02-27 22:15:25 +0000 | [diff] [blame] | 54 |  | 
|  | 55 | ; VI: v_add_i32 | 
| Sam Kolton | 9fa1696 | 2017-04-06 15:03:28 +0000 | [diff] [blame] | 56 | ; VI: v_add_i32_sdwa | 
| Matt Arsenault | 3dbeefa | 2017-03-21 21:39:51 +0000 | [diff] [blame] | 57 | define amdgpu_kernel void @s_test_add_v2i16_kernarg(<2 x i16> addrspace(1)* %out, <2 x i16> %a, <2 x i16> %b) #1 { | 
| Matt Arsenault | eb522e6 | 2017-02-27 22:15:25 +0000 | [diff] [blame] | 58 | %add = add <2 x i16> %a, %b | 
|  | 59 | store <2 x i16> %add, <2 x i16> addrspace(1)* %out | 
|  | 60 | ret void | 
|  | 61 | } | 
|  | 62 |  | 
|  | 63 | ; GCN-LABEL: {{^}}v_test_add_v2i16_constant: | 
|  | 64 | ; GFX9: s_mov_b32 [[CONST:s[0-9]+]], 0x1c8007b{{$}} | 
| Stanislav Mekhanoshin | 56ea488 | 2017-05-30 16:49:24 +0000 | [diff] [blame] | 65 | ; GFX9: v_pk_add_u16 v{{[0-9]+}}, v{{[0-9]+}}, [[CONST]] | 
| Matt Arsenault | eb522e6 | 2017-02-27 22:15:25 +0000 | [diff] [blame] | 66 |  | 
|  | 67 | ; VI-DAG: v_add_u16_e32 v{{[0-9]+}}, 0x7b, v{{[0-9]+}} | 
| Stanislav Mekhanoshin | 56ea488 | 2017-05-30 16:49:24 +0000 | [diff] [blame] | 68 | ; VI-DAG: v_mov_b32_e32 v[[SCONST:[0-9]+]], 0x1c8 | 
| Stanislav Mekhanoshin | 0330660 | 2017-06-03 17:39:47 +0000 | [diff] [blame] | 69 | ; VI-DAG: v_add_u16_sdwa v{{[0-9]+}}, v{{[0-9]+}}, v[[SCONST]] dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD | 
| Matt Arsenault | 3dbeefa | 2017-03-21 21:39:51 +0000 | [diff] [blame] | 70 | define amdgpu_kernel void @v_test_add_v2i16_constant(<2 x i16> addrspace(1)* %out, <2 x i16> addrspace(1)* %in0) #1 { | 
| Matt Arsenault | eb522e6 | 2017-02-27 22:15:25 +0000 | [diff] [blame] | 71 | %tid = call i32 @llvm.amdgcn.workitem.id.x() | 
|  | 72 | %gep.out = getelementptr inbounds <2 x i16>, <2 x i16> addrspace(1)* %out, i32 %tid | 
|  | 73 | %gep.in0 = getelementptr inbounds <2 x i16>, <2 x i16> addrspace(1)* %in0, i32 %tid | 
|  | 74 | %a = load volatile <2 x i16>, <2 x i16> addrspace(1)* %gep.in0 | 
|  | 75 | %add = add <2 x i16> %a, <i16 123, i16 456> | 
|  | 76 | store <2 x i16> %add, <2 x i16> addrspace(1)* %out | 
|  | 77 | ret void | 
|  | 78 | } | 
|  | 79 |  | 
|  | 80 | ; FIXME: Need to handle non-uniform case for function below (load without gep). | 
|  | 81 | ; GCN-LABEL: {{^}}v_test_add_v2i16_neg_constant: | 
|  | 82 | ; GFX9: s_mov_b32 [[CONST:s[0-9]+]], 0xfc21fcb3{{$}} | 
| Stanislav Mekhanoshin | 56ea488 | 2017-05-30 16:49:24 +0000 | [diff] [blame] | 83 | ; GFX9: v_pk_add_u16 v{{[0-9]+}}, v{{[0-9]+}}, [[CONST]] | 
| Matt Arsenault | eb522e6 | 2017-02-27 22:15:25 +0000 | [diff] [blame] | 84 |  | 
|  | 85 | ; VI-DAG: v_add_u16_e32 v{{[0-9]+}}, 0xfffffcb3, v{{[0-9]+}} | 
| Stanislav Mekhanoshin | 56ea488 | 2017-05-30 16:49:24 +0000 | [diff] [blame] | 86 | ; VI-DAG: v_mov_b32_e32 v[[SCONST:[0-9]+]], 0xfffffc21 | 
| Stanislav Mekhanoshin | 0330660 | 2017-06-03 17:39:47 +0000 | [diff] [blame] | 87 | ; VI-DAG: v_add_u16_sdwa v{{[0-9]+}}, v{{[0-9]+}}, v[[SCONST]] dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD | 
| Matt Arsenault | 3dbeefa | 2017-03-21 21:39:51 +0000 | [diff] [blame] | 88 | define amdgpu_kernel void @v_test_add_v2i16_neg_constant(<2 x i16> addrspace(1)* %out, <2 x i16> addrspace(1)* %in0) #1 { | 
| Matt Arsenault | eb522e6 | 2017-02-27 22:15:25 +0000 | [diff] [blame] | 89 | %tid = call i32 @llvm.amdgcn.workitem.id.x() | 
|  | 90 | %gep.out = getelementptr inbounds <2 x i16>, <2 x i16> addrspace(1)* %out, i32 %tid | 
|  | 91 | %gep.in0 = getelementptr inbounds <2 x i16>, <2 x i16> addrspace(1)* %in0, i32 %tid | 
|  | 92 | %a = load volatile <2 x i16>, <2 x i16> addrspace(1)* %gep.in0 | 
|  | 93 | %add = add <2 x i16> %a, <i16 -845, i16 -991> | 
|  | 94 | store <2 x i16> %add, <2 x i16> addrspace(1)* %out | 
|  | 95 | ret void | 
|  | 96 | } | 
|  | 97 |  | 
|  | 98 | ; GCN-LABEL: {{^}}v_test_add_v2i16_inline_neg1: | 
|  | 99 | ; GFX9: v_pk_add_u16 v{{[0-9]+}}, v{{[0-9]+}}, -1{{$}} | 
|  | 100 |  | 
| Stanislav Mekhanoshin | 56ea488 | 2017-05-30 16:49:24 +0000 | [diff] [blame] | 101 | ; VI: v_mov_b32_e32 v[[SCONST:[0-9]+]], -1 | 
| Matt Arsenault | eb522e6 | 2017-02-27 22:15:25 +0000 | [diff] [blame] | 102 | ; VI: flat_load_ushort [[LOAD0:v[0-9]+]] | 
|  | 103 | ; VI: flat_load_ushort [[LOAD1:v[0-9]+]] | 
| Stanislav Mekhanoshin | 0330660 | 2017-06-03 17:39:47 +0000 | [diff] [blame] | 104 | ; VI-DAG: v_add_u16_sdwa v{{[0-9]+}}, [[LOAD0]], v[[SCONST]] dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD | 
| Matt Arsenault | eb522e6 | 2017-02-27 22:15:25 +0000 | [diff] [blame] | 105 | ; VI-DAG: v_add_u16_e32 v{{[0-9]+}}, -1, [[LOAD1]] | 
| Matt Arsenault | eb522e6 | 2017-02-27 22:15:25 +0000 | [diff] [blame] | 106 | ; VI: v_or_b32_e32 | 
| Matt Arsenault | 3dbeefa | 2017-03-21 21:39:51 +0000 | [diff] [blame] | 107 | define amdgpu_kernel void @v_test_add_v2i16_inline_neg1(<2 x i16> addrspace(1)* %out, <2 x i16> addrspace(1)* %in0) #1 { | 
| Matt Arsenault | eb522e6 | 2017-02-27 22:15:25 +0000 | [diff] [blame] | 108 | %tid = call i32 @llvm.amdgcn.workitem.id.x() | 
|  | 109 | %gep.out = getelementptr inbounds <2 x i16>, <2 x i16> addrspace(1)* %out, i32 %tid | 
|  | 110 | %gep.in0 = getelementptr inbounds <2 x i16>, <2 x i16> addrspace(1)* %in0, i32 %tid | 
|  | 111 | %a = load volatile <2 x i16>, <2 x i16> addrspace(1)* %gep.in0 | 
|  | 112 | %add = add <2 x i16> %a, <i16 -1, i16 -1> | 
|  | 113 | store <2 x i16> %add, <2 x i16> addrspace(1)* %out | 
|  | 114 | ret void | 
|  | 115 | } | 
|  | 116 |  | 
|  | 117 | ; GCN-LABEL: {{^}}v_test_add_v2i16_inline_lo_zero_hi: | 
|  | 118 | ; GFX9: s_mov_b32 [[K:s[0-9]+]], 32{{$}} | 
| Stanislav Mekhanoshin | 56ea488 | 2017-05-30 16:49:24 +0000 | [diff] [blame] | 119 | ; GFX9: v_pk_add_u16 v{{[0-9]+}}, v{{[0-9]+}}, [[K]]{{$}} | 
| Matt Arsenault | eb522e6 | 2017-02-27 22:15:25 +0000 | [diff] [blame] | 120 |  | 
|  | 121 | ; VI-NOT: v_add_u16 | 
|  | 122 | ; VI: v_add_u16_e32 v{{[0-9]+}}, 32, v{{[0-9]+}} | 
|  | 123 | ; VI-NOT: v_add_u16 | 
|  | 124 | ; VI: v_lshlrev_b32_e32 v{{[0-9]+}}, 16, | 
|  | 125 | ; VI: v_or_b32_e32 | 
| Matt Arsenault | 3dbeefa | 2017-03-21 21:39:51 +0000 | [diff] [blame] | 126 | define amdgpu_kernel void @v_test_add_v2i16_inline_lo_zero_hi(<2 x i16> addrspace(1)* %out, <2 x i16> addrspace(1)* %in0) #1 { | 
| Matt Arsenault | eb522e6 | 2017-02-27 22:15:25 +0000 | [diff] [blame] | 127 | %tid = call i32 @llvm.amdgcn.workitem.id.x() | 
|  | 128 | %gep.out = getelementptr inbounds <2 x i16>, <2 x i16> addrspace(1)* %out, i32 %tid | 
|  | 129 | %gep.in0 = getelementptr inbounds <2 x i16>, <2 x i16> addrspace(1)* %in0, i32 %tid | 
|  | 130 | %a = load volatile <2 x i16>, <2 x i16> addrspace(1)* %gep.in0 | 
|  | 131 | %add = add <2 x i16> %a, <i16 32, i16 0> | 
|  | 132 | store <2 x i16> %add, <2 x i16> addrspace(1)* %out | 
|  | 133 | ret void | 
|  | 134 | } | 
|  | 135 |  | 
|  | 136 | ; The high element gives fp | 
|  | 137 | ; GCN-LABEL: {{^}}v_test_add_v2i16_inline_fp_split: | 
|  | 138 | ; GFX9: s_mov_b32 [[K:s[0-9]+]], 1.0 | 
| Stanislav Mekhanoshin | 56ea488 | 2017-05-30 16:49:24 +0000 | [diff] [blame] | 139 | ; GFX9: v_pk_add_u16 v{{[0-9]+}}, v{{[0-9]+}}, [[K]]{{$}} | 
| Matt Arsenault | eb522e6 | 2017-02-27 22:15:25 +0000 | [diff] [blame] | 140 |  | 
|  | 141 | ; VI-NOT: v_add_u16 | 
| Stanislav Mekhanoshin | 56ea488 | 2017-05-30 16:49:24 +0000 | [diff] [blame] | 142 | ; VI: v_mov_b32_e32 v[[K:[0-9]+]], 0x3f80 | 
| Stanislav Mekhanoshin | 0330660 | 2017-06-03 17:39:47 +0000 | [diff] [blame] | 143 | ; VI: v_add_u16_sdwa v{{[0-9]+}}, v{{[0-9]+}}, v[[K]] dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD | 
| Matt Arsenault | eb522e6 | 2017-02-27 22:15:25 +0000 | [diff] [blame] | 144 | ; VI-NOT: v_add_u16 | 
| Matt Arsenault | eb522e6 | 2017-02-27 22:15:25 +0000 | [diff] [blame] | 145 | ; VI: v_or_b32_e32 | 
| Matt Arsenault | 3dbeefa | 2017-03-21 21:39:51 +0000 | [diff] [blame] | 146 | define amdgpu_kernel void @v_test_add_v2i16_inline_fp_split(<2 x i16> addrspace(1)* %out, <2 x i16> addrspace(1)* %in0) #1 { | 
| Matt Arsenault | eb522e6 | 2017-02-27 22:15:25 +0000 | [diff] [blame] | 147 | %tid = call i32 @llvm.amdgcn.workitem.id.x() | 
|  | 148 | %gep.out = getelementptr inbounds <2 x i16>, <2 x i16> addrspace(1)* %out, i32 %tid | 
|  | 149 | %gep.in0 = getelementptr inbounds <2 x i16>, <2 x i16> addrspace(1)* %in0, i32 %tid | 
|  | 150 | %a = load volatile <2 x i16>, <2 x i16> addrspace(1)* %gep.in0 | 
|  | 151 | %add = add <2 x i16> %a, <i16 0, i16 16256> | 
|  | 152 | store <2 x i16> %add, <2 x i16> addrspace(1)* %out | 
|  | 153 | ret void | 
|  | 154 | } | 
|  | 155 |  | 
|  | 156 | ; FIXME: Need to handle non-uniform case for function below (load without gep). | 
|  | 157 | ; GCN-LABEL: {{^}}v_test_add_v2i16_zext_to_v2i32: | 
| Matt Arsenault | 4e309b0 | 2017-07-29 01:03:53 +0000 | [diff] [blame] | 158 | ; GFX9: global_load_dword [[A:v[0-9]+]] | 
|  | 159 | ; GFX9: global_load_dword [[B:v[0-9]+]] | 
| Matt Arsenault | eb522e6 | 2017-02-27 22:15:25 +0000 | [diff] [blame] | 160 |  | 
|  | 161 | ; GFX9: v_pk_add_u16 [[ADD:v[0-9]+]], [[A]], [[B]] | 
|  | 162 | ; GFX9-DAG: v_and_b32_e32 v[[ELT0:[0-9]+]], 0xffff, [[ADD]] | 
|  | 163 | ; GFX9-DAG: v_lshrrev_b32_e32 v[[ELT1:[0-9]+]], 16, [[ADD]] | 
|  | 164 | ; GFX9: buffer_store_dwordx2 v{{\[}}[[ELT0]]:[[ELT1]]{{\]}} | 
|  | 165 |  | 
| Matt Arsenault | eb522e6 | 2017-02-27 22:15:25 +0000 | [diff] [blame] | 166 | ; VI: flat_load_ushort v[[A_LO:[0-9]+]] | 
| Matt Arsenault | 301162c | 2017-11-15 21:51:43 +0000 | [diff] [blame] | 167 | ; VI: flat_load_ushort v[[A_HI:[0-9]+]] | 
| Matt Arsenault | eb522e6 | 2017-02-27 22:15:25 +0000 | [diff] [blame] | 168 | ; VI: flat_load_ushort v[[B_LO:[0-9]+]] | 
| Matt Arsenault | 301162c | 2017-11-15 21:51:43 +0000 | [diff] [blame] | 169 | ; VI: flat_load_ushort v[[B_HI:[0-9]+]] | 
| Matt Arsenault | eb522e6 | 2017-02-27 22:15:25 +0000 | [diff] [blame] | 170 |  | 
| Matt Arsenault | 6c29c5a | 2017-07-10 19:53:57 +0000 | [diff] [blame] | 171 | ; VI: v_add_u16_e32 v[[ADD_HI:[0-9]+]], v[[A_HI]], v[[B_HI]] | 
| Matt Arsenault | eb522e6 | 2017-02-27 22:15:25 +0000 | [diff] [blame] | 172 | ; VI-NOT: and | 
|  | 173 | ; VI-NOT: shl | 
| Matt Arsenault | 6c29c5a | 2017-07-10 19:53:57 +0000 | [diff] [blame] | 174 | ; VI: v_add_u16_e32 v[[ADD_LO:[0-9]+]], v[[A_LO]], v[[B_LO]] | 
| Matt Arsenault | eb522e6 | 2017-02-27 22:15:25 +0000 | [diff] [blame] | 175 | ; VI-NOT: and | 
|  | 176 | ; VI-NOT: shl | 
|  | 177 | ; VI: buffer_store_dwordx2 v{{\[}}[[ADD_LO]]:[[ADD_HI]]{{\]}} | 
| Matt Arsenault | 3dbeefa | 2017-03-21 21:39:51 +0000 | [diff] [blame] | 178 | define amdgpu_kernel void @v_test_add_v2i16_zext_to_v2i32(<2 x i32> addrspace(1)* %out, <2 x i16> addrspace(1)* %in0, <2 x i16> addrspace(1)* %in1) #1 { | 
| Matt Arsenault | eb522e6 | 2017-02-27 22:15:25 +0000 | [diff] [blame] | 179 | %tid = call i32 @llvm.amdgcn.workitem.id.x() | 
|  | 180 | %gep.out = getelementptr inbounds <2 x i32>, <2 x i32> addrspace(1)* %out, i32 %tid | 
|  | 181 | %gep.in0 = getelementptr inbounds <2 x i16>, <2 x i16> addrspace(1)* %in0, i32 %tid | 
|  | 182 | %gep.in1 = getelementptr inbounds <2 x i16>, <2 x i16> addrspace(1)* %in1, i32 %tid | 
|  | 183 | %a = load volatile <2 x i16>, <2 x i16> addrspace(1)* %gep.in0 | 
|  | 184 | %b = load volatile <2 x i16>, <2 x i16> addrspace(1)* %gep.in1 | 
|  | 185 | %add = add <2 x i16> %a, %b | 
|  | 186 | %ext = zext <2 x i16> %add to <2 x i32> | 
|  | 187 | store <2 x i32> %ext, <2 x i32> addrspace(1)* %out | 
|  | 188 | ret void | 
|  | 189 | } | 
|  | 190 |  | 
|  | 191 | ; FIXME: Need to handle non-uniform case for function below (load without gep). | 
|  | 192 | ; GCN-LABEL: {{^}}v_test_add_v2i16_zext_to_v2i64: | 
| Matt Arsenault | 4e309b0 | 2017-07-29 01:03:53 +0000 | [diff] [blame] | 193 | ; GFX9: global_load_dword [[A:v[0-9]+]] | 
|  | 194 | ; GFX9: global_load_dword [[B:v[0-9]+]] | 
| Matt Arsenault | eb522e6 | 2017-02-27 22:15:25 +0000 | [diff] [blame] | 195 |  | 
|  | 196 | ; GFX9: v_pk_add_u16 [[ADD:v[0-9]+]], [[A]], [[B]] | 
|  | 197 | ; GFX9-DAG: v_and_b32_e32 v[[ELT0:[0-9]+]], 0xffff, [[ADD]] | 
|  | 198 | ; GFX9-DAG: v_lshrrev_b32_e32 v[[ELT1:[0-9]+]], 16, [[ADD]] | 
| Matt Arsenault | eb522e6 | 2017-02-27 22:15:25 +0000 | [diff] [blame] | 199 | ; GFX9: buffer_store_dwordx4 | 
|  | 200 |  | 
| Stanislav Mekhanoshin | 56ea488 | 2017-05-30 16:49:24 +0000 | [diff] [blame] | 201 | ; VI-DAG: v_mov_b32_e32 v{{[0-9]+}}, 0{{$}} | 
| Matt Arsenault | eb522e6 | 2017-02-27 22:15:25 +0000 | [diff] [blame] | 202 | ; VI: flat_load_ushort v[[A_LO:[0-9]+]] | 
|  | 203 | ; VI: flat_load_ushort v[[A_HI:[0-9]+]] | 
|  | 204 | ; VI: flat_load_ushort v[[B_LO:[0-9]+]] | 
|  | 205 | ; VI: flat_load_ushort v[[B_HI:[0-9]+]] | 
|  | 206 |  | 
| Stanislav Mekhanoshin | 5fa289f | 2017-05-22 16:58:10 +0000 | [diff] [blame] | 207 | ; VI-DAG: v_add_u16_e32 | 
|  | 208 | ; VI-DAG: v_add_u16_e32 | 
| Matt Arsenault | eb522e6 | 2017-02-27 22:15:25 +0000 | [diff] [blame] | 209 |  | 
|  | 210 | ; VI: buffer_store_dwordx4 | 
| Matt Arsenault | 3dbeefa | 2017-03-21 21:39:51 +0000 | [diff] [blame] | 211 | define amdgpu_kernel void @v_test_add_v2i16_zext_to_v2i64(<2 x i64> addrspace(1)* %out, <2 x i16> addrspace(1)* %in0, <2 x i16> addrspace(1)* %in1) #1 { | 
| Matt Arsenault | eb522e6 | 2017-02-27 22:15:25 +0000 | [diff] [blame] | 212 | %tid = call i32 @llvm.amdgcn.workitem.id.x() | 
|  | 213 | %gep.out = getelementptr inbounds <2 x i64>, <2 x i64> addrspace(1)* %out, i32 %tid | 
|  | 214 | %gep.in0 = getelementptr inbounds <2 x i16>, <2 x i16> addrspace(1)* %in0, i32 %tid | 
|  | 215 | %gep.in1 = getelementptr inbounds <2 x i16>, <2 x i16> addrspace(1)* %in1, i32 %tid | 
|  | 216 | %a = load volatile <2 x i16>, <2 x i16> addrspace(1)* %gep.in0 | 
|  | 217 | %b = load volatile <2 x i16>, <2 x i16> addrspace(1)* %gep.in1 | 
|  | 218 | %add = add <2 x i16> %a, %b | 
|  | 219 | %ext = zext <2 x i16> %add to <2 x i64> | 
|  | 220 | store <2 x i64> %ext, <2 x i64> addrspace(1)* %out | 
|  | 221 | ret void | 
|  | 222 | } | 
|  | 223 |  | 
|  | 224 | ; FIXME: Need to handle non-uniform case for function below (load without gep). | 
|  | 225 | ; GCN-LABEL: {{^}}v_test_add_v2i16_sext_to_v2i32: | 
| Matt Arsenault | 4e309b0 | 2017-07-29 01:03:53 +0000 | [diff] [blame] | 226 | ; GFX9: global_load_dword [[A:v[0-9]+]] | 
|  | 227 | ; GFX9: global_load_dword [[B:v[0-9]+]] | 
| Matt Arsenault | eb522e6 | 2017-02-27 22:15:25 +0000 | [diff] [blame] | 228 |  | 
|  | 229 | ; GFX9: v_pk_add_u16 [[ADD:v[0-9]+]], [[A]], [[B]] | 
|  | 230 | ; GFX9-DAG: v_bfe_i32 v[[ELT0:[0-9]+]], [[ADD]], 0, 16 | 
|  | 231 | ; GFX9-DAG: v_ashrrev_i32_e32 v[[ELT1:[0-9]+]], 16, [[ADD]] | 
|  | 232 | ; GFX9: buffer_store_dwordx2 v{{\[}}[[ELT0]]:[[ELT1]]{{\]}} | 
|  | 233 |  | 
|  | 234 | ; VI: v_add_u16_e32 | 
|  | 235 | ; VI: v_add_u16_e32 | 
|  | 236 | ; VI: v_bfe_i32 v{{[0-9]+}}, v{{[0-9]+}}, 0, 16 | 
|  | 237 | ; VI: v_bfe_i32 v{{[0-9]+}}, v{{[0-9]+}}, 0, 16 | 
|  | 238 | ; VI: buffer_store_dwordx2 | 
| Matt Arsenault | 3dbeefa | 2017-03-21 21:39:51 +0000 | [diff] [blame] | 239 | define amdgpu_kernel void @v_test_add_v2i16_sext_to_v2i32(<2 x i32> addrspace(1)* %out, <2 x i16> addrspace(1)* %in0, <2 x i16> addrspace(1)* %in1) #1 { | 
| Matt Arsenault | eb522e6 | 2017-02-27 22:15:25 +0000 | [diff] [blame] | 240 | %tid = call i32 @llvm.amdgcn.workitem.id.x() | 
|  | 241 | %gep.out = getelementptr inbounds <2 x i32>, <2 x i32> addrspace(1)* %out, i32 %tid | 
|  | 242 | %gep.in0 = getelementptr inbounds <2 x i16>, <2 x i16> addrspace(1)* %in0, i32 %tid | 
|  | 243 | %gep.in1 = getelementptr inbounds <2 x i16>, <2 x i16> addrspace(1)* %in1, i32 %tid | 
|  | 244 | %a = load volatile <2 x i16>, <2 x i16> addrspace(1)* %gep.in0 | 
|  | 245 | %b = load volatile <2 x i16>, <2 x i16> addrspace(1)* %gep.in1 | 
|  | 246 | %add = add <2 x i16> %a, %b | 
|  | 247 | %ext = sext <2 x i16> %add to <2 x i32> | 
|  | 248 | store <2 x i32> %ext, <2 x i32> addrspace(1)* %out | 
|  | 249 | ret void | 
|  | 250 | } | 
|  | 251 |  | 
|  | 252 | ; FIXME: Need to handle non-uniform case for function below (load without gep). | 
|  | 253 | ; GCN-LABEL: {{^}}v_test_add_v2i16_sext_to_v2i64: | 
| Matt Arsenault | 4e309b0 | 2017-07-29 01:03:53 +0000 | [diff] [blame] | 254 | ; GCN: {{flat|global}}_load_dword | 
|  | 255 | ; GCN: {{flat|global}}_load_dword | 
| Matt Arsenault | eb522e6 | 2017-02-27 22:15:25 +0000 | [diff] [blame] | 256 |  | 
|  | 257 | ; GFX9: v_pk_add_u16 | 
|  | 258 | ; GFX9: v_lshrrev_b32_e32 v{{[0-9]+}}, 16, v{{[0-9]+}} | 
|  | 259 |  | 
| Sam Kolton | 9fa1696 | 2017-04-06 15:03:28 +0000 | [diff] [blame] | 260 | ; VI: v_add_u16_sdwa | 
| Matt Arsenault | eb522e6 | 2017-02-27 22:15:25 +0000 | [diff] [blame] | 261 | ; VI: v_add_u16_e32 | 
|  | 262 |  | 
|  | 263 | ; GCN: v_bfe_i32 v{{[0-9]+}}, v{{[0-9]+}}, 0, 16 | 
|  | 264 | ; GCN: v_bfe_i32 v{{[0-9]+}}, v{{[0-9]+}}, 0, 16 | 
|  | 265 | ; GCN: v_ashrrev_i32_e32 v{{[0-9]+}}, 31, v{{[0-9]+}} | 
|  | 266 | ; GCN: v_ashrrev_i32_e32 v{{[0-9]+}}, 31, v{{[0-9]+}} | 
| Matt Arsenault | 3dbeefa | 2017-03-21 21:39:51 +0000 | [diff] [blame] | 267 | define amdgpu_kernel void @v_test_add_v2i16_sext_to_v2i64(<2 x i64> addrspace(1)* %out, <2 x i16> addrspace(1)* %in0, <2 x i16> addrspace(1)* %in1) #1 { | 
| Matt Arsenault | eb522e6 | 2017-02-27 22:15:25 +0000 | [diff] [blame] | 268 | %tid = call i32 @llvm.amdgcn.workitem.id.x() | 
|  | 269 | %gep.out = getelementptr inbounds <2 x i64>, <2 x i64> addrspace(1)* %out, i32 %tid | 
|  | 270 | %gep.in0 = getelementptr inbounds <2 x i16>, <2 x i16> addrspace(1)* %in0, i32 %tid | 
|  | 271 | %gep.in1 = getelementptr inbounds <2 x i16>, <2 x i16> addrspace(1)* %in1, i32 %tid | 
|  | 272 | %a = load <2 x i16>, <2 x i16> addrspace(1)* %gep.in0 | 
|  | 273 | %b = load <2 x i16>, <2 x i16> addrspace(1)* %gep.in1 | 
|  | 274 | %add = add <2 x i16> %a, %b | 
|  | 275 | %ext = sext <2 x i16> %add to <2 x i64> | 
|  | 276 | store <2 x i64> %ext, <2 x i64> addrspace(1)* %out | 
|  | 277 | ret void | 
|  | 278 | } | 
|  | 279 |  | 
|  | 280 | declare i32 @llvm.amdgcn.workitem.id.x() #0 | 
|  | 281 |  | 
|  | 282 | attributes #0 = { nounwind readnone } | 
|  | 283 | attributes #1 = { nounwind } |