Joel E. Denny | 9fa9c93 | 2018-07-11 20:25:49 +0000 | [diff] [blame] | 1 | ; RUN: llc -march=amdgcn -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -allow-deprecated-dag-overlap -check-prefix=GCN -check-prefix=SI %s |
| 2 | ; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -allow-deprecated-dag-overlap -check-prefix=GCN -check-prefix=VI %s |
Matt Arsenault | 364a674 | 2014-06-11 17:50:44 +0000 | [diff] [blame] | 3 | |
Konstantin Zhuravlyov | f74fc60 | 2016-10-07 14:22:58 +0000 | [diff] [blame] | 4 | declare i32 @llvm.amdgcn.workitem.id.x() nounwind readnone |
| 5 | declare i32 @llvm.amdgcn.workitem.id.y() nounwind readnone |
| 6 | |
Tom Stellard | 115a615 | 2016-11-10 16:02:37 +0000 | [diff] [blame] | 7 | ; GCN-LABEL: {{^}}load_i8_to_f32: |
Alexander Timofeev | 982aee6 | 2017-07-04 17:32:00 +0000 | [diff] [blame] | 8 | ; GCN: {{buffer|flat}}_load_ubyte [[LOADREG:v[0-9]+]], |
Tom Stellard | 115a615 | 2016-11-10 16:02:37 +0000 | [diff] [blame] | 9 | ; GCN-NOT: bfe |
| 10 | ; GCN-NOT: lshr |
| 11 | ; GCN: v_cvt_f32_ubyte0_e32 [[CONV:v[0-9]+]], [[LOADREG]] |
| 12 | ; GCN: buffer_store_dword [[CONV]], |
Matt Arsenault | 3dbeefa | 2017-03-21 21:39:51 +0000 | [diff] [blame] | 13 | define amdgpu_kernel void @load_i8_to_f32(float addrspace(1)* noalias %out, i8 addrspace(1)* noalias %in) nounwind { |
Alexander Timofeev | 982aee6 | 2017-07-04 17:32:00 +0000 | [diff] [blame] | 14 | %tid = call i32 @llvm.amdgcn.workitem.id.x() |
Matt Arsenault | 8728c5f | 2017-08-07 14:58:04 +0000 | [diff] [blame] | 15 | %gep = getelementptr i8, i8 addrspace(1)* %in, i32 %tid |
Alexander Timofeev | 982aee6 | 2017-07-04 17:32:00 +0000 | [diff] [blame] | 16 | %load = load i8, i8 addrspace(1)* %gep, align 1 |
Matt Arsenault | 364a674 | 2014-06-11 17:50:44 +0000 | [diff] [blame] | 17 | %cvt = uitofp i8 %load to float |
| 18 | store float %cvt, float addrspace(1)* %out, align 4 |
| 19 | ret void |
| 20 | } |
| 21 | |
Tom Stellard | 115a615 | 2016-11-10 16:02:37 +0000 | [diff] [blame] | 22 | ; GCN-LABEL: {{^}}load_v2i8_to_v2f32: |
Alexander Timofeev | 982aee6 | 2017-07-04 17:32:00 +0000 | [diff] [blame] | 23 | ; GCN: {{buffer|flat}}_load_ushort [[LD:v[0-9]+]] |
Tom Stellard | 115a615 | 2016-11-10 16:02:37 +0000 | [diff] [blame] | 24 | ; GCN-DAG: v_cvt_f32_ubyte1_e32 v[[HIRESULT:[0-9]+]], [[LD]] |
| 25 | ; GCN-DAG: v_cvt_f32_ubyte0_e32 v[[LORESULT:[0-9]+]], [[LD]] |
| 26 | ; GCN: buffer_store_dwordx2 v{{\[}}[[LORESULT]]:[[HIRESULT]]{{\]}}, |
Matt Arsenault | 3dbeefa | 2017-03-21 21:39:51 +0000 | [diff] [blame] | 27 | define amdgpu_kernel void @load_v2i8_to_v2f32(<2 x float> addrspace(1)* noalias %out, <2 x i8> addrspace(1)* noalias %in) nounwind { |
Alexander Timofeev | 982aee6 | 2017-07-04 17:32:00 +0000 | [diff] [blame] | 28 | %tid = call i32 @llvm.amdgcn.workitem.id.x() |
Matt Arsenault | 8728c5f | 2017-08-07 14:58:04 +0000 | [diff] [blame] | 29 | %gep = getelementptr <2 x i8>, <2 x i8> addrspace(1)* %in, i32 %tid |
Alexander Timofeev | 982aee6 | 2017-07-04 17:32:00 +0000 | [diff] [blame] | 30 | %load = load <2 x i8>, <2 x i8> addrspace(1)* %gep, align 2 |
Matt Arsenault | 364a674 | 2014-06-11 17:50:44 +0000 | [diff] [blame] | 31 | %cvt = uitofp <2 x i8> %load to <2 x float> |
| 32 | store <2 x float> %cvt, <2 x float> addrspace(1)* %out, align 16 |
| 33 | ret void |
| 34 | } |
| 35 | |
Tom Stellard | 115a615 | 2016-11-10 16:02:37 +0000 | [diff] [blame] | 36 | ; GCN-LABEL: {{^}}load_v3i8_to_v3f32: |
Alexander Timofeev | 982aee6 | 2017-07-04 17:32:00 +0000 | [diff] [blame] | 37 | ; GCN: {{buffer|flat}}_load_dword [[VAL:v[0-9]+]] |
Tom Stellard | 115a615 | 2016-11-10 16:02:37 +0000 | [diff] [blame] | 38 | ; GCN-NOT: v_cvt_f32_ubyte3_e32 |
Neil Henning | 76504a4 | 2018-12-12 16:15:21 +0000 | [diff] [blame] | 39 | ; GCN-DAG: v_cvt_f32_ubyte2_e32 v[[HIRESULT:[0-9]+]], [[VAL]] |
| 40 | ; GCN-DAG: v_cvt_f32_ubyte1_e32 v{{[0-9]+}}, [[VAL]] |
Tom Stellard | 115a615 | 2016-11-10 16:02:37 +0000 | [diff] [blame] | 41 | ; GCN-DAG: v_cvt_f32_ubyte0_e32 v[[LORESULT:[0-9]+]], [[VAL]] |
Neil Henning | 76504a4 | 2018-12-12 16:15:21 +0000 | [diff] [blame] | 42 | ; GCN: buffer_store_dwordx3 v{{\[}}[[LORESULT]]:[[HIRESULT]]{{\]}}, |
Matt Arsenault | 3dbeefa | 2017-03-21 21:39:51 +0000 | [diff] [blame] | 43 | define amdgpu_kernel void @load_v3i8_to_v3f32(<3 x float> addrspace(1)* noalias %out, <3 x i8> addrspace(1)* noalias %in) nounwind { |
Alexander Timofeev | 982aee6 | 2017-07-04 17:32:00 +0000 | [diff] [blame] | 44 | %tid = call i32 @llvm.amdgcn.workitem.id.x() |
Matt Arsenault | 8728c5f | 2017-08-07 14:58:04 +0000 | [diff] [blame] | 45 | %gep = getelementptr <3 x i8>, <3 x i8> addrspace(1)* %in, i32 %tid |
Alexander Timofeev | 982aee6 | 2017-07-04 17:32:00 +0000 | [diff] [blame] | 46 | %load = load <3 x i8>, <3 x i8> addrspace(1)* %gep, align 4 |
Matt Arsenault | 364a674 | 2014-06-11 17:50:44 +0000 | [diff] [blame] | 47 | %cvt = uitofp <3 x i8> %load to <3 x float> |
| 48 | store <3 x float> %cvt, <3 x float> addrspace(1)* %out, align 16 |
| 49 | ret void |
| 50 | } |
| 51 | |
Tom Stellard | 115a615 | 2016-11-10 16:02:37 +0000 | [diff] [blame] | 52 | ; GCN-LABEL: {{^}}load_v4i8_to_v4f32: |
Alexander Timofeev | 982aee6 | 2017-07-04 17:32:00 +0000 | [diff] [blame] | 53 | ; GCN: {{buffer|flat}}_load_dword [[LOADREG:v[0-9]+]] |
Tom Stellard | 115a615 | 2016-11-10 16:02:37 +0000 | [diff] [blame] | 54 | ; GCN-NOT: bfe |
| 55 | ; GCN-NOT: lshr |
| 56 | ; GCN-DAG: v_cvt_f32_ubyte3_e32 v[[HIRESULT:[0-9]+]], [[LOADREG]] |
| 57 | ; GCN-DAG: v_cvt_f32_ubyte2_e32 v{{[0-9]+}}, [[LOADREG]] |
| 58 | ; GCN-DAG: v_cvt_f32_ubyte1_e32 v{{[0-9]+}}, [[LOADREG]] |
| 59 | ; GCN-DAG: v_cvt_f32_ubyte0_e32 v[[LORESULT:[0-9]+]], [[LOADREG]] |
| 60 | ; GCN: buffer_store_dwordx4 v{{\[}}[[LORESULT]]:[[HIRESULT]]{{\]}}, |
Matt Arsenault | 3dbeefa | 2017-03-21 21:39:51 +0000 | [diff] [blame] | 61 | define amdgpu_kernel void @load_v4i8_to_v4f32(<4 x float> addrspace(1)* noalias %out, <4 x i8> addrspace(1)* noalias %in) nounwind { |
Alexander Timofeev | 982aee6 | 2017-07-04 17:32:00 +0000 | [diff] [blame] | 62 | %tid = call i32 @llvm.amdgcn.workitem.id.x() |
Matt Arsenault | 8728c5f | 2017-08-07 14:58:04 +0000 | [diff] [blame] | 63 | %gep = getelementptr <4 x i8>, <4 x i8> addrspace(1)* %in, i32 %tid |
Alexander Timofeev | 982aee6 | 2017-07-04 17:32:00 +0000 | [diff] [blame] | 64 | %load = load <4 x i8>, <4 x i8> addrspace(1)* %gep, align 4 |
Matt Arsenault | bd22342 | 2015-01-14 01:35:17 +0000 | [diff] [blame] | 65 | %cvt = uitofp <4 x i8> %load to <4 x float> |
| 66 | store <4 x float> %cvt, <4 x float> addrspace(1)* %out, align 16 |
| 67 | ret void |
| 68 | } |
| 69 | |
| 70 | ; This should not be adding instructions to shift into the correct |
| 71 | ; position in the word for the component. |
| 72 | |
Matt Arsenault | 8af47a0 | 2016-07-01 22:55:55 +0000 | [diff] [blame] | 73 | ; FIXME: Packing bytes |
Tom Stellard | 115a615 | 2016-11-10 16:02:37 +0000 | [diff] [blame] | 74 | ; GCN-LABEL: {{^}}load_v4i8_to_v4f32_unaligned: |
Alexander Timofeev | 982aee6 | 2017-07-04 17:32:00 +0000 | [diff] [blame] | 75 | ; GCN: {{buffer|flat}}_load_ubyte [[LOADREG3:v[0-9]+]] |
| 76 | ; GCN: {{buffer|flat}}_load_ubyte [[LOADREG2:v[0-9]+]] |
| 77 | ; GCN: {{buffer|flat}}_load_ubyte [[LOADREG1:v[0-9]+]] |
| 78 | ; GCN: {{buffer|flat}}_load_ubyte [[LOADREG0:v[0-9]+]] |
Tom Stellard | 115a615 | 2016-11-10 16:02:37 +0000 | [diff] [blame] | 79 | ; GCN-DAG: v_lshlrev_b32 |
| 80 | ; GCN-DAG: v_or_b32 |
| 81 | ; GCN-DAG: v_cvt_f32_ubyte0_e32 v[[LORESULT:[0-9]+]], |
| 82 | ; GCN-DAG: v_cvt_f32_ubyte0_e32 v{{[0-9]+}}, |
| 83 | ; GCN-DAG: v_cvt_f32_ubyte0_e32 v{{[0-9]+}}, |
| 84 | ; GCN-DAG: v_cvt_f32_ubyte0_e32 v[[HIRESULT:[0-9]+]] |
Matt Arsenault | bd22342 | 2015-01-14 01:35:17 +0000 | [diff] [blame] | 85 | |
Tom Stellard | 115a615 | 2016-11-10 16:02:37 +0000 | [diff] [blame] | 86 | ; GCN: buffer_store_dwordx4 |
Matt Arsenault | 3dbeefa | 2017-03-21 21:39:51 +0000 | [diff] [blame] | 87 | define amdgpu_kernel void @load_v4i8_to_v4f32_unaligned(<4 x float> addrspace(1)* noalias %out, <4 x i8> addrspace(1)* noalias %in) nounwind { |
Alexander Timofeev | 982aee6 | 2017-07-04 17:32:00 +0000 | [diff] [blame] | 88 | %tid = call i32 @llvm.amdgcn.workitem.id.x() |
Matt Arsenault | 8728c5f | 2017-08-07 14:58:04 +0000 | [diff] [blame] | 89 | %gep = getelementptr <4 x i8>, <4 x i8> addrspace(1)* %in, i32 %tid |
Alexander Timofeev | 982aee6 | 2017-07-04 17:32:00 +0000 | [diff] [blame] | 90 | %load = load <4 x i8>, <4 x i8> addrspace(1)* %gep, align 1 |
Matt Arsenault | 364a674 | 2014-06-11 17:50:44 +0000 | [diff] [blame] | 91 | %cvt = uitofp <4 x i8> %load to <4 x float> |
| 92 | store <4 x float> %cvt, <4 x float> addrspace(1)* %out, align 16 |
| 93 | ret void |
| 94 | } |
| 95 | |
Konstantin Zhuravlyov | f74fc60 | 2016-10-07 14:22:58 +0000 | [diff] [blame] | 96 | ; FIXME: Need to handle non-uniform case for function below (load without gep). |
Matt Arsenault | 327bb5a | 2016-07-01 22:47:50 +0000 | [diff] [blame] | 97 | ; Instructions still emitted to repack bytes for add use. |
Matt Arsenault | 364a674 | 2014-06-11 17:50:44 +0000 | [diff] [blame] | 98 | |
Tom Stellard | 115a615 | 2016-11-10 16:02:37 +0000 | [diff] [blame] | 99 | ; GCN-LABEL: {{^}}load_v4i8_to_v4f32_2_uses: |
| 100 | ; GCN: {{buffer|flat}}_load_dword |
| 101 | ; GCN-DAG: v_cvt_f32_ubyte0_e32 |
| 102 | ; GCN-DAG: v_cvt_f32_ubyte1_e32 |
| 103 | ; GCN-DAG: v_cvt_f32_ubyte2_e32 |
| 104 | ; GCN-DAG: v_cvt_f32_ubyte3_e32 |
| 105 | |
| 106 | ; GCN-DAG: v_lshrrev_b32_e32 v{{[0-9]+}}, 24 |
Tom Stellard | 115a615 | 2016-11-10 16:02:37 +0000 | [diff] [blame] | 107 | |
Matt Arsenault | 327bb5a | 2016-07-01 22:47:50 +0000 | [diff] [blame] | 108 | ; SI-DAG: v_lshlrev_b32_e32 v{{[0-9]+}}, 16 |
| 109 | ; SI-DAG: v_lshlrev_b32_e32 v{{[0-9]+}}, 8 |
| 110 | ; SI-DAG: v_and_b32_e32 v{{[0-9]+}}, 0xffff, |
Nirav Dave | a81682a | 2016-10-13 20:23:25 +0000 | [diff] [blame] | 111 | ; SI-DAG: v_and_b32_e32 v{{[0-9]+}}, 0xff00, |
Matt Arsenault | 327bb5a | 2016-07-01 22:47:50 +0000 | [diff] [blame] | 112 | ; SI-DAG: v_add_i32 |
| 113 | |
Tom Stellard | 115a615 | 2016-11-10 16:02:37 +0000 | [diff] [blame] | 114 | ; VI-DAG: v_and_b32_e32 v{{[0-9]+}}, 0xffffff00, |
| 115 | ; VI-DAG: v_add_u16_e32 |
| 116 | ; VI-DAG: v_add_u16_e32 |
Matt Arsenault | 327bb5a | 2016-07-01 22:47:50 +0000 | [diff] [blame] | 117 | |
Tom Stellard | 115a615 | 2016-11-10 16:02:37 +0000 | [diff] [blame] | 118 | ; GCN: {{buffer|flat}}_store_dwordx4 |
| 119 | ; GCN: {{buffer|flat}}_store_dword |
| 120 | |
| 121 | ; GCN: s_endpgm |
Matt Arsenault | 3dbeefa | 2017-03-21 21:39:51 +0000 | [diff] [blame] | 122 | define amdgpu_kernel void @load_v4i8_to_v4f32_2_uses(<4 x float> addrspace(1)* noalias %out, <4 x i8> addrspace(1)* noalias %out2, <4 x i8> addrspace(1)* noalias %in) nounwind { |
Konstantin Zhuravlyov | f74fc60 | 2016-10-07 14:22:58 +0000 | [diff] [blame] | 123 | %tid.x = call i32 @llvm.amdgcn.workitem.id.x() |
| 124 | %in.ptr = getelementptr <4 x i8>, <4 x i8> addrspace(1)* %in, i32 %tid.x |
| 125 | %load = load <4 x i8>, <4 x i8> addrspace(1)* %in.ptr, align 4 |
Matt Arsenault | 364a674 | 2014-06-11 17:50:44 +0000 | [diff] [blame] | 126 | %cvt = uitofp <4 x i8> %load to <4 x float> |
| 127 | store <4 x float> %cvt, <4 x float> addrspace(1)* %out, align 16 |
| 128 | %add = add <4 x i8> %load, <i8 9, i8 9, i8 9, i8 9> ; Second use of %load |
| 129 | store <4 x i8> %add, <4 x i8> addrspace(1)* %out2, align 4 |
| 130 | ret void |
| 131 | } |
| 132 | |
| 133 | ; Make sure this doesn't crash. |
Tom Stellard | 115a615 | 2016-11-10 16:02:37 +0000 | [diff] [blame] | 134 | ; GCN-LABEL: {{^}}load_v7i8_to_v7f32: |
| 135 | ; GCN: s_endpgm |
Matt Arsenault | 3dbeefa | 2017-03-21 21:39:51 +0000 | [diff] [blame] | 136 | define amdgpu_kernel void @load_v7i8_to_v7f32(<7 x float> addrspace(1)* noalias %out, <7 x i8> addrspace(1)* noalias %in) nounwind { |
Alexander Timofeev | 982aee6 | 2017-07-04 17:32:00 +0000 | [diff] [blame] | 137 | %tid = call i32 @llvm.amdgcn.workitem.id.x() |
Matt Arsenault | 8728c5f | 2017-08-07 14:58:04 +0000 | [diff] [blame] | 138 | %gep = getelementptr <7 x i8>, <7 x i8> addrspace(1)* %in, i32 %tid |
Alexander Timofeev | 982aee6 | 2017-07-04 17:32:00 +0000 | [diff] [blame] | 139 | %load = load <7 x i8>, <7 x i8> addrspace(1)* %gep, align 1 |
Matt Arsenault | 364a674 | 2014-06-11 17:50:44 +0000 | [diff] [blame] | 140 | %cvt = uitofp <7 x i8> %load to <7 x float> |
| 141 | store <7 x float> %cvt, <7 x float> addrspace(1)* %out, align 16 |
| 142 | ret void |
| 143 | } |
| 144 | |
Tom Stellard | 115a615 | 2016-11-10 16:02:37 +0000 | [diff] [blame] | 145 | ; GCN-LABEL: {{^}}load_v8i8_to_v8f32: |
Alexander Timofeev | 982aee6 | 2017-07-04 17:32:00 +0000 | [diff] [blame] | 146 | ; GCN: {{buffer|flat}}_load_dwordx2 v{{\[}}[[LOLOAD:[0-9]+]]:[[HILOAD:[0-9]+]]{{\]}}, |
Tom Stellard | 115a615 | 2016-11-10 16:02:37 +0000 | [diff] [blame] | 147 | ; GCN-NOT: bfe |
| 148 | ; GCN-NOT: lshr |
| 149 | ; GCN-DAG: v_cvt_f32_ubyte3_e32 v{{[0-9]+}}, v[[LOLOAD]] |
| 150 | ; GCN-DAG: v_cvt_f32_ubyte2_e32 v{{[0-9]+}}, v[[LOLOAD]] |
| 151 | ; GCN-DAG: v_cvt_f32_ubyte1_e32 v{{[0-9]+}}, v[[LOLOAD]] |
| 152 | ; GCN-DAG: v_cvt_f32_ubyte0_e32 v{{[0-9]+}}, v[[LOLOAD]] |
| 153 | ; GCN-DAG: v_cvt_f32_ubyte3_e32 v{{[0-9]+}}, v[[HILOAD]] |
| 154 | ; GCN-DAG: v_cvt_f32_ubyte2_e32 v{{[0-9]+}}, v[[HILOAD]] |
| 155 | ; GCN-DAG: v_cvt_f32_ubyte1_e32 v{{[0-9]+}}, v[[HILOAD]] |
| 156 | ; GCN-DAG: v_cvt_f32_ubyte0_e32 v{{[0-9]+}}, v[[HILOAD]] |
| 157 | ; GCN-NOT: bfe |
| 158 | ; GCN-NOT: lshr |
| 159 | ; GCN: buffer_store_dwordx4 |
| 160 | ; GCN: buffer_store_dwordx4 |
Matt Arsenault | 3dbeefa | 2017-03-21 21:39:51 +0000 | [diff] [blame] | 161 | define amdgpu_kernel void @load_v8i8_to_v8f32(<8 x float> addrspace(1)* noalias %out, <8 x i8> addrspace(1)* noalias %in) nounwind { |
Alexander Timofeev | 982aee6 | 2017-07-04 17:32:00 +0000 | [diff] [blame] | 162 | %tid = call i32 @llvm.amdgcn.workitem.id.x() |
Matt Arsenault | 8728c5f | 2017-08-07 14:58:04 +0000 | [diff] [blame] | 163 | %gep = getelementptr <8 x i8>, <8 x i8> addrspace(1)* %in, i32 %tid |
Alexander Timofeev | 982aee6 | 2017-07-04 17:32:00 +0000 | [diff] [blame] | 164 | %load = load <8 x i8>, <8 x i8> addrspace(1)* %gep, align 8 |
Matt Arsenault | 364a674 | 2014-06-11 17:50:44 +0000 | [diff] [blame] | 165 | %cvt = uitofp <8 x i8> %load to <8 x float> |
| 166 | store <8 x float> %cvt, <8 x float> addrspace(1)* %out, align 16 |
| 167 | ret void |
| 168 | } |
| 169 | |
Tom Stellard | 115a615 | 2016-11-10 16:02:37 +0000 | [diff] [blame] | 170 | ; GCN-LABEL: {{^}}i8_zext_inreg_i32_to_f32: |
Alexander Timofeev | 982aee6 | 2017-07-04 17:32:00 +0000 | [diff] [blame] | 171 | ; GCN: {{buffer|flat}}_load_dword [[LOADREG:v[0-9]+]], |
Dmitry Preobrazhensky | a0342dc | 2017-11-20 18:24:21 +0000 | [diff] [blame] | 172 | ; GCN: v_add_{{[iu]}}32_e32 [[ADD:v[0-9]+]], vcc, 2, [[LOADREG]] |
Tom Stellard | 115a615 | 2016-11-10 16:02:37 +0000 | [diff] [blame] | 173 | ; GCN-NEXT: v_cvt_f32_ubyte0_e32 [[CONV:v[0-9]+]], [[ADD]] |
| 174 | ; GCN: buffer_store_dword [[CONV]], |
Matt Arsenault | 3dbeefa | 2017-03-21 21:39:51 +0000 | [diff] [blame] | 175 | define amdgpu_kernel void @i8_zext_inreg_i32_to_f32(float addrspace(1)* noalias %out, i32 addrspace(1)* noalias %in) nounwind { |
Alexander Timofeev | 982aee6 | 2017-07-04 17:32:00 +0000 | [diff] [blame] | 176 | %tid = call i32 @llvm.amdgcn.workitem.id.x() |
Matt Arsenault | 8728c5f | 2017-08-07 14:58:04 +0000 | [diff] [blame] | 177 | %gep = getelementptr i32, i32 addrspace(1)* %in, i32 %tid |
Alexander Timofeev | 982aee6 | 2017-07-04 17:32:00 +0000 | [diff] [blame] | 178 | %load = load i32, i32 addrspace(1)* %gep, align 4 |
Matt Arsenault | 364a674 | 2014-06-11 17:50:44 +0000 | [diff] [blame] | 179 | %add = add i32 %load, 2 |
| 180 | %inreg = and i32 %add, 255 |
| 181 | %cvt = uitofp i32 %inreg to float |
| 182 | store float %cvt, float addrspace(1)* %out, align 4 |
| 183 | ret void |
| 184 | } |
| 185 | |
Tom Stellard | 115a615 | 2016-11-10 16:02:37 +0000 | [diff] [blame] | 186 | ; GCN-LABEL: {{^}}i8_zext_inreg_hi1_to_f32: |
Matt Arsenault | 3dbeefa | 2017-03-21 21:39:51 +0000 | [diff] [blame] | 187 | define amdgpu_kernel void @i8_zext_inreg_hi1_to_f32(float addrspace(1)* noalias %out, i32 addrspace(1)* noalias %in) nounwind { |
Alexander Timofeev | 982aee6 | 2017-07-04 17:32:00 +0000 | [diff] [blame] | 188 | %tid = call i32 @llvm.amdgcn.workitem.id.x() |
Matt Arsenault | 8728c5f | 2017-08-07 14:58:04 +0000 | [diff] [blame] | 189 | %gep = getelementptr i32, i32 addrspace(1)* %in, i32 %tid |
Alexander Timofeev | 982aee6 | 2017-07-04 17:32:00 +0000 | [diff] [blame] | 190 | %load = load i32, i32 addrspace(1)* %gep, align 4 |
Matt Arsenault | 364a674 | 2014-06-11 17:50:44 +0000 | [diff] [blame] | 191 | %inreg = and i32 %load, 65280 |
| 192 | %shr = lshr i32 %inreg, 8 |
| 193 | %cvt = uitofp i32 %shr to float |
| 194 | store float %cvt, float addrspace(1)* %out, align 4 |
| 195 | ret void |
| 196 | } |
| 197 | |
Matt Arsenault | 364a674 | 2014-06-11 17:50:44 +0000 | [diff] [blame] | 198 | ; We don't get these ones because of the zext, but instcombine removes |
| 199 | ; them so it shouldn't really matter. |
Tom Stellard | 115a615 | 2016-11-10 16:02:37 +0000 | [diff] [blame] | 200 | ; GCN-LABEL: {{^}}i8_zext_i32_to_f32: |
Matt Arsenault | 3dbeefa | 2017-03-21 21:39:51 +0000 | [diff] [blame] | 201 | define amdgpu_kernel void @i8_zext_i32_to_f32(float addrspace(1)* noalias %out, i8 addrspace(1)* noalias %in) nounwind { |
Alexander Timofeev | 982aee6 | 2017-07-04 17:32:00 +0000 | [diff] [blame] | 202 | %tid = call i32 @llvm.amdgcn.workitem.id.x() |
Matt Arsenault | 8728c5f | 2017-08-07 14:58:04 +0000 | [diff] [blame] | 203 | %gep = getelementptr i8, i8 addrspace(1)* %in, i32 %tid |
Alexander Timofeev | 982aee6 | 2017-07-04 17:32:00 +0000 | [diff] [blame] | 204 | %load = load i8, i8 addrspace(1)* %gep, align 1 |
Matt Arsenault | 364a674 | 2014-06-11 17:50:44 +0000 | [diff] [blame] | 205 | %ext = zext i8 %load to i32 |
| 206 | %cvt = uitofp i32 %ext to float |
| 207 | store float %cvt, float addrspace(1)* %out, align 4 |
| 208 | ret void |
| 209 | } |
| 210 | |
Tom Stellard | 115a615 | 2016-11-10 16:02:37 +0000 | [diff] [blame] | 211 | ; GCN-LABEL: {{^}}v4i8_zext_v4i32_to_v4f32: |
Matt Arsenault | 3dbeefa | 2017-03-21 21:39:51 +0000 | [diff] [blame] | 212 | define amdgpu_kernel void @v4i8_zext_v4i32_to_v4f32(<4 x float> addrspace(1)* noalias %out, <4 x i8> addrspace(1)* noalias %in) nounwind { |
Alexander Timofeev | 982aee6 | 2017-07-04 17:32:00 +0000 | [diff] [blame] | 213 | %tid = call i32 @llvm.amdgcn.workitem.id.x() |
Matt Arsenault | 8728c5f | 2017-08-07 14:58:04 +0000 | [diff] [blame] | 214 | %gep = getelementptr <4 x i8>, <4 x i8> addrspace(1)* %in, i32 %tid |
Alexander Timofeev | 982aee6 | 2017-07-04 17:32:00 +0000 | [diff] [blame] | 215 | %load = load <4 x i8>, <4 x i8> addrspace(1)* %gep, align 1 |
Matt Arsenault | 364a674 | 2014-06-11 17:50:44 +0000 | [diff] [blame] | 216 | %ext = zext <4 x i8> %load to <4 x i32> |
| 217 | %cvt = uitofp <4 x i32> %ext to <4 x float> |
| 218 | store <4 x float> %cvt, <4 x float> addrspace(1)* %out, align 16 |
| 219 | ret void |
| 220 | } |
Matt Arsenault | a949dc6 | 2016-05-09 16:29:50 +0000 | [diff] [blame] | 221 | |
Tom Stellard | 115a615 | 2016-11-10 16:02:37 +0000 | [diff] [blame] | 222 | ; GCN-LABEL: {{^}}extract_byte0_to_f32: |
Alexander Timofeev | 982aee6 | 2017-07-04 17:32:00 +0000 | [diff] [blame] | 223 | ; GCN: {{buffer|flat}}_load_dword [[VAL:v[0-9]+]] |
Tom Stellard | 115a615 | 2016-11-10 16:02:37 +0000 | [diff] [blame] | 224 | ; GCN-NOT: [[VAL]] |
| 225 | ; GCN: v_cvt_f32_ubyte0_e32 [[CONV:v[0-9]+]], [[VAL]] |
| 226 | ; GCN: buffer_store_dword [[CONV]] |
Matt Arsenault | 3dbeefa | 2017-03-21 21:39:51 +0000 | [diff] [blame] | 227 | define amdgpu_kernel void @extract_byte0_to_f32(float addrspace(1)* noalias %out, i32 addrspace(1)* noalias %in) nounwind { |
Alexander Timofeev | 982aee6 | 2017-07-04 17:32:00 +0000 | [diff] [blame] | 228 | %tid = call i32 @llvm.amdgcn.workitem.id.x() |
Matt Arsenault | 8728c5f | 2017-08-07 14:58:04 +0000 | [diff] [blame] | 229 | %gep = getelementptr i32, i32 addrspace(1)* %in, i32 %tid |
Alexander Timofeev | 982aee6 | 2017-07-04 17:32:00 +0000 | [diff] [blame] | 230 | %val = load i32, i32 addrspace(1)* %gep |
Matt Arsenault | a949dc6 | 2016-05-09 16:29:50 +0000 | [diff] [blame] | 231 | %and = and i32 %val, 255 |
| 232 | %cvt = uitofp i32 %and to float |
| 233 | store float %cvt, float addrspace(1)* %out |
| 234 | ret void |
| 235 | } |
| 236 | |
Tom Stellard | 115a615 | 2016-11-10 16:02:37 +0000 | [diff] [blame] | 237 | ; GCN-LABEL: {{^}}extract_byte1_to_f32: |
Alexander Timofeev | 982aee6 | 2017-07-04 17:32:00 +0000 | [diff] [blame] | 238 | ; GCN: {{buffer|flat}}_load_dword [[VAL:v[0-9]+]] |
Tom Stellard | 115a615 | 2016-11-10 16:02:37 +0000 | [diff] [blame] | 239 | ; GCN-NOT: [[VAL]] |
| 240 | ; GCN: v_cvt_f32_ubyte1_e32 [[CONV:v[0-9]+]], [[VAL]] |
| 241 | ; GCN: buffer_store_dword [[CONV]] |
Matt Arsenault | 3dbeefa | 2017-03-21 21:39:51 +0000 | [diff] [blame] | 242 | define amdgpu_kernel void @extract_byte1_to_f32(float addrspace(1)* noalias %out, i32 addrspace(1)* noalias %in) nounwind { |
Alexander Timofeev | 982aee6 | 2017-07-04 17:32:00 +0000 | [diff] [blame] | 243 | %tid = call i32 @llvm.amdgcn.workitem.id.x() |
Matt Arsenault | 8728c5f | 2017-08-07 14:58:04 +0000 | [diff] [blame] | 244 | %gep = getelementptr i32, i32 addrspace(1)* %in, i32 %tid |
Alexander Timofeev | 982aee6 | 2017-07-04 17:32:00 +0000 | [diff] [blame] | 245 | %val = load i32, i32 addrspace(1)* %gep |
Matt Arsenault | a949dc6 | 2016-05-09 16:29:50 +0000 | [diff] [blame] | 246 | %srl = lshr i32 %val, 8 |
| 247 | %and = and i32 %srl, 255 |
| 248 | %cvt = uitofp i32 %and to float |
| 249 | store float %cvt, float addrspace(1)* %out |
| 250 | ret void |
| 251 | } |
| 252 | |
Tom Stellard | 115a615 | 2016-11-10 16:02:37 +0000 | [diff] [blame] | 253 | ; GCN-LABEL: {{^}}extract_byte2_to_f32: |
Alexander Timofeev | 982aee6 | 2017-07-04 17:32:00 +0000 | [diff] [blame] | 254 | ; GCN: {{buffer|flat}}_load_dword [[VAL:v[0-9]+]] |
Tom Stellard | 115a615 | 2016-11-10 16:02:37 +0000 | [diff] [blame] | 255 | ; GCN-NOT: [[VAL]] |
| 256 | ; GCN: v_cvt_f32_ubyte2_e32 [[CONV:v[0-9]+]], [[VAL]] |
| 257 | ; GCN: buffer_store_dword [[CONV]] |
Matt Arsenault | 3dbeefa | 2017-03-21 21:39:51 +0000 | [diff] [blame] | 258 | define amdgpu_kernel void @extract_byte2_to_f32(float addrspace(1)* noalias %out, i32 addrspace(1)* noalias %in) nounwind { |
Alexander Timofeev | 982aee6 | 2017-07-04 17:32:00 +0000 | [diff] [blame] | 259 | %tid = call i32 @llvm.amdgcn.workitem.id.x() |
Matt Arsenault | 8728c5f | 2017-08-07 14:58:04 +0000 | [diff] [blame] | 260 | %gep = getelementptr i32, i32 addrspace(1)* %in, i32 %tid |
Alexander Timofeev | 982aee6 | 2017-07-04 17:32:00 +0000 | [diff] [blame] | 261 | %val = load i32, i32 addrspace(1)* %gep |
Matt Arsenault | a949dc6 | 2016-05-09 16:29:50 +0000 | [diff] [blame] | 262 | %srl = lshr i32 %val, 16 |
| 263 | %and = and i32 %srl, 255 |
| 264 | %cvt = uitofp i32 %and to float |
| 265 | store float %cvt, float addrspace(1)* %out |
| 266 | ret void |
| 267 | } |
| 268 | |
Tom Stellard | 115a615 | 2016-11-10 16:02:37 +0000 | [diff] [blame] | 269 | ; GCN-LABEL: {{^}}extract_byte3_to_f32: |
Alexander Timofeev | 982aee6 | 2017-07-04 17:32:00 +0000 | [diff] [blame] | 270 | ; GCN: {{buffer|flat}}_load_dword [[VAL:v[0-9]+]] |
Tom Stellard | 115a615 | 2016-11-10 16:02:37 +0000 | [diff] [blame] | 271 | ; GCN-NOT: [[VAL]] |
| 272 | ; GCN: v_cvt_f32_ubyte3_e32 [[CONV:v[0-9]+]], [[VAL]] |
| 273 | ; GCN: buffer_store_dword [[CONV]] |
Matt Arsenault | 3dbeefa | 2017-03-21 21:39:51 +0000 | [diff] [blame] | 274 | define amdgpu_kernel void @extract_byte3_to_f32(float addrspace(1)* noalias %out, i32 addrspace(1)* noalias %in) nounwind { |
Alexander Timofeev | 982aee6 | 2017-07-04 17:32:00 +0000 | [diff] [blame] | 275 | %tid = call i32 @llvm.amdgcn.workitem.id.x() |
Matt Arsenault | 8728c5f | 2017-08-07 14:58:04 +0000 | [diff] [blame] | 276 | %gep = getelementptr i32, i32 addrspace(1)* %in, i32 %tid |
Alexander Timofeev | 982aee6 | 2017-07-04 17:32:00 +0000 | [diff] [blame] | 277 | %val = load i32, i32 addrspace(1)* %gep |
Matt Arsenault | a949dc6 | 2016-05-09 16:29:50 +0000 | [diff] [blame] | 278 | %srl = lshr i32 %val, 24 |
| 279 | %and = and i32 %srl, 255 |
| 280 | %cvt = uitofp i32 %and to float |
| 281 | store float %cvt, float addrspace(1)* %out |
| 282 | ret void |
| 283 | } |