Matt Arsenault | 70b9282 | 2017-11-12 23:53:44 +0000 | [diff] [blame] | 1 | ; RUN: llc -march=amdgcn -mcpu=verde -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=SI -check-prefix=FUNC %s |
| 2 | ; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=SI -check-prefix=FUNC %s |
Matt Arsenault | 6689abe | 2016-05-05 20:07:37 +0000 | [diff] [blame] | 3 | ; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s |
Tom Stellard | 75aadc2 | 2012-12-11 21:25:42 +0000 | [diff] [blame] | 4 | |
Matt Arsenault | 28bd7d4 | 2015-09-25 18:21:47 +0000 | [diff] [blame] | 5 | declare i32 @llvm.r600.read.tidig.x() #0 |
| 6 | |
Tom Stellard | 79243d9 | 2014-10-01 17:15:17 +0000 | [diff] [blame] | 7 | ; FUNC-LABEL: {{^}}test2: |
Matt Arsenault | 284ae08 | 2014-06-09 08:36:53 +0000 | [diff] [blame] | 8 | ; EG: AND_INT {{\*? *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} |
| 9 | ; EG: AND_INT {{\*? *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} |
Tom Stellard | 75aadc2 | 2012-12-11 21:25:42 +0000 | [diff] [blame] | 10 | |
Alexander Timofeev | 982aee6 | 2017-07-04 17:32:00 +0000 | [diff] [blame] | 11 | ; SI: s_and_b32 s{{[0-9]+, s[0-9]+, s[0-9]+}} |
| 12 | ; SI: s_and_b32 s{{[0-9]+, s[0-9]+, s[0-9]+}} |
Aaron Watry | 00aeb11 | 2013-06-25 13:55:23 +0000 | [diff] [blame] | 13 | |
Matt Arsenault | 3dbeefa | 2017-03-21 21:39:51 +0000 | [diff] [blame] | 14 | define amdgpu_kernel void @test2(<2 x i32> addrspace(1)* %out, <2 x i32> addrspace(1)* %in) { |
David Blaikie | 79e6c74 | 2015-02-27 19:29:02 +0000 | [diff] [blame] | 15 | %b_ptr = getelementptr <2 x i32>, <2 x i32> addrspace(1)* %in, i32 1 |
David Blaikie | a79ac14 | 2015-02-27 21:17:42 +0000 | [diff] [blame] | 16 | %a = load <2 x i32>, <2 x i32> addrspace(1) * %in |
| 17 | %b = load <2 x i32>, <2 x i32> addrspace(1) * %b_ptr |
Aaron Watry | 00aeb11 | 2013-06-25 13:55:23 +0000 | [diff] [blame] | 18 | %result = and <2 x i32> %a, %b |
| 19 | store <2 x i32> %result, <2 x i32> addrspace(1)* %out |
| 20 | ret void |
| 21 | } |
| 22 | |
Tom Stellard | 79243d9 | 2014-10-01 17:15:17 +0000 | [diff] [blame] | 23 | ; FUNC-LABEL: {{^}}test4: |
Matt Arsenault | 284ae08 | 2014-06-09 08:36:53 +0000 | [diff] [blame] | 24 | ; EG: AND_INT {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} |
| 25 | ; EG: AND_INT {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} |
| 26 | ; EG: AND_INT {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} |
| 27 | ; EG: AND_INT {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} |
Aaron Watry | 00aeb11 | 2013-06-25 13:55:23 +0000 | [diff] [blame] | 28 | |
Alexander Timofeev | 982aee6 | 2017-07-04 17:32:00 +0000 | [diff] [blame] | 29 | |
| 30 | ; SI: s_and_b32 s{{[0-9]+, s[0-9]+, s[0-9]+}} |
| 31 | ; SI: s_and_b32 s{{[0-9]+, s[0-9]+, s[0-9]+}} |
| 32 | ; SI: s_and_b32 s{{[0-9]+, s[0-9]+, s[0-9]+}} |
| 33 | ; SI: s_and_b32 s{{[0-9]+, s[0-9]+, s[0-9]+}} |
Aaron Watry | 00aeb11 | 2013-06-25 13:55:23 +0000 | [diff] [blame] | 34 | |
Matt Arsenault | 3dbeefa | 2017-03-21 21:39:51 +0000 | [diff] [blame] | 35 | define amdgpu_kernel void @test4(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %in) { |
David Blaikie | 79e6c74 | 2015-02-27 19:29:02 +0000 | [diff] [blame] | 36 | %b_ptr = getelementptr <4 x i32>, <4 x i32> addrspace(1)* %in, i32 1 |
David Blaikie | a79ac14 | 2015-02-27 21:17:42 +0000 | [diff] [blame] | 37 | %a = load <4 x i32>, <4 x i32> addrspace(1) * %in |
| 38 | %b = load <4 x i32>, <4 x i32> addrspace(1) * %b_ptr |
Tom Stellard | 75aadc2 | 2012-12-11 21:25:42 +0000 | [diff] [blame] | 39 | %result = and <4 x i32> %a, %b |
| 40 | store <4 x i32> %result, <4 x i32> addrspace(1)* %out |
| 41 | ret void |
| 42 | } |
Matt Arsenault | 284ae08 | 2014-06-09 08:36:53 +0000 | [diff] [blame] | 43 | |
Tom Stellard | 79243d9 | 2014-10-01 17:15:17 +0000 | [diff] [blame] | 44 | ; FUNC-LABEL: {{^}}s_and_i32: |
Tom Stellard | 326d6ec | 2014-11-05 14:50:53 +0000 | [diff] [blame] | 45 | ; SI: s_and_b32 |
Matt Arsenault | 3dbeefa | 2017-03-21 21:39:51 +0000 | [diff] [blame] | 46 | define amdgpu_kernel void @s_and_i32(i32 addrspace(1)* %out, i32 %a, i32 %b) { |
Matt Arsenault | 284ae08 | 2014-06-09 08:36:53 +0000 | [diff] [blame] | 47 | %and = and i32 %a, %b |
| 48 | store i32 %and, i32 addrspace(1)* %out, align 4 |
| 49 | ret void |
| 50 | } |
| 51 | |
Tom Stellard | 79243d9 | 2014-10-01 17:15:17 +0000 | [diff] [blame] | 52 | ; FUNC-LABEL: {{^}}s_and_constant_i32: |
Tom Stellard | 326d6ec | 2014-11-05 14:50:53 +0000 | [diff] [blame] | 53 | ; SI: s_and_b32 s{{[0-9]+}}, s{{[0-9]+}}, 0x12d687 |
Matt Arsenault | 3dbeefa | 2017-03-21 21:39:51 +0000 | [diff] [blame] | 54 | define amdgpu_kernel void @s_and_constant_i32(i32 addrspace(1)* %out, i32 %a) { |
Matt Arsenault | 284ae08 | 2014-06-09 08:36:53 +0000 | [diff] [blame] | 55 | %and = and i32 %a, 1234567 |
| 56 | store i32 %and, i32 addrspace(1)* %out, align 4 |
| 57 | ret void |
| 58 | } |
| 59 | |
Matt Arsenault | 28bd7d4 | 2015-09-25 18:21:47 +0000 | [diff] [blame] | 60 | ; FIXME: We should really duplicate the constant so that the SALU use |
| 61 | ; can fold into the s_and_b32 and the VALU one is materialized |
| 62 | ; directly without copying from the SGPR. |
| 63 | |
| 64 | ; Second use is a VGPR use of the constant. |
| 65 | ; FUNC-LABEL: {{^}}s_and_multi_use_constant_i32_0: |
| 66 | ; SI: s_mov_b32 [[K:s[0-9]+]], 0x12d687 |
| 67 | ; SI-DAG: s_and_b32 [[AND:s[0-9]+]], s{{[0-9]+}}, [[K]] |
| 68 | ; SI-DAG: v_mov_b32_e32 [[VK:v[0-9]+]], [[K]] |
| 69 | ; SI: buffer_store_dword [[VK]] |
Matt Arsenault | 3dbeefa | 2017-03-21 21:39:51 +0000 | [diff] [blame] | 70 | define amdgpu_kernel void @s_and_multi_use_constant_i32_0(i32 addrspace(1)* %out, i32 %a, i32 %b) { |
Matt Arsenault | 28bd7d4 | 2015-09-25 18:21:47 +0000 | [diff] [blame] | 71 | %and = and i32 %a, 1234567 |
| 72 | |
| 73 | ; Just to stop future replacement of copy to vgpr + store with VALU op. |
| 74 | %foo = add i32 %and, %b |
| 75 | store volatile i32 %foo, i32 addrspace(1)* %out |
| 76 | store volatile i32 1234567, i32 addrspace(1)* %out |
| 77 | ret void |
| 78 | } |
| 79 | |
| 80 | ; Second use is another SGPR use of the constant. |
| 81 | ; FUNC-LABEL: {{^}}s_and_multi_use_constant_i32_1: |
| 82 | ; SI: s_mov_b32 [[K:s[0-9]+]], 0x12d687 |
| 83 | ; SI: s_and_b32 [[AND:s[0-9]+]], s{{[0-9]+}}, [[K]] |
| 84 | ; SI: s_add_i32 |
| 85 | ; SI: s_add_i32 [[ADD:s[0-9]+]], s{{[0-9]+}}, [[K]] |
Matt Arsenault | 70b9282 | 2017-11-12 23:53:44 +0000 | [diff] [blame] | 86 | ; SI: v_mov_b32_e32 [[VADD:v[0-9]+]], [[ADD]] |
| 87 | ; SI: buffer_store_dword [[VADD]] |
Matt Arsenault | 3dbeefa | 2017-03-21 21:39:51 +0000 | [diff] [blame] | 88 | define amdgpu_kernel void @s_and_multi_use_constant_i32_1(i32 addrspace(1)* %out, i32 %a, i32 %b) { |
Matt Arsenault | 28bd7d4 | 2015-09-25 18:21:47 +0000 | [diff] [blame] | 89 | %and = and i32 %a, 1234567 |
| 90 | %foo = add i32 %and, 1234567 |
| 91 | %bar = add i32 %foo, %b |
| 92 | store volatile i32 %bar, i32 addrspace(1)* %out |
| 93 | ret void |
| 94 | } |
| 95 | |
| 96 | ; FUNC-LABEL: {{^}}v_and_i32_vgpr_vgpr: |
| 97 | ; SI: v_and_b32_e32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} |
Matt Arsenault | 3dbeefa | 2017-03-21 21:39:51 +0000 | [diff] [blame] | 98 | define amdgpu_kernel void @v_and_i32_vgpr_vgpr(i32 addrspace(1)* %out, i32 addrspace(1)* %aptr, i32 addrspace(1)* %bptr) { |
Matt Arsenault | 28bd7d4 | 2015-09-25 18:21:47 +0000 | [diff] [blame] | 99 | %tid = call i32 @llvm.r600.read.tidig.x() #0 |
| 100 | %gep.a = getelementptr i32, i32 addrspace(1)* %aptr, i32 %tid |
| 101 | %gep.b = getelementptr i32, i32 addrspace(1)* %bptr, i32 %tid |
| 102 | %gep.out = getelementptr i32, i32 addrspace(1)* %out, i32 %tid |
| 103 | %a = load i32, i32 addrspace(1)* %gep.a |
| 104 | %b = load i32, i32 addrspace(1)* %gep.b |
Matt Arsenault | 284ae08 | 2014-06-09 08:36:53 +0000 | [diff] [blame] | 105 | %and = and i32 %a, %b |
Matt Arsenault | 28bd7d4 | 2015-09-25 18:21:47 +0000 | [diff] [blame] | 106 | store i32 %and, i32 addrspace(1)* %gep.out |
| 107 | ret void |
| 108 | } |
| 109 | |
| 110 | ; FUNC-LABEL: {{^}}v_and_i32_sgpr_vgpr: |
| 111 | ; SI-DAG: s_load_dword [[SA:s[0-9]+]] |
| 112 | ; SI-DAG: {{buffer|flat}}_load_dword [[VB:v[0-9]+]] |
| 113 | ; SI: v_and_b32_e32 v{{[0-9]+}}, [[SA]], [[VB]] |
Matt Arsenault | 3dbeefa | 2017-03-21 21:39:51 +0000 | [diff] [blame] | 114 | define amdgpu_kernel void @v_and_i32_sgpr_vgpr(i32 addrspace(1)* %out, i32 %a, i32 addrspace(1)* %bptr) { |
Matt Arsenault | 28bd7d4 | 2015-09-25 18:21:47 +0000 | [diff] [blame] | 115 | %tid = call i32 @llvm.r600.read.tidig.x() #0 |
| 116 | %gep.b = getelementptr i32, i32 addrspace(1)* %bptr, i32 %tid |
| 117 | %gep.out = getelementptr i32, i32 addrspace(1)* %out, i32 %tid |
| 118 | %b = load i32, i32 addrspace(1)* %gep.b |
| 119 | %and = and i32 %a, %b |
| 120 | store i32 %and, i32 addrspace(1)* %gep.out |
| 121 | ret void |
| 122 | } |
| 123 | |
| 124 | ; FUNC-LABEL: {{^}}v_and_i32_vgpr_sgpr: |
| 125 | ; SI-DAG: s_load_dword [[SA:s[0-9]+]] |
| 126 | ; SI-DAG: {{buffer|flat}}_load_dword [[VB:v[0-9]+]] |
| 127 | ; SI: v_and_b32_e32 v{{[0-9]+}}, [[SA]], [[VB]] |
Matt Arsenault | 3dbeefa | 2017-03-21 21:39:51 +0000 | [diff] [blame] | 128 | define amdgpu_kernel void @v_and_i32_vgpr_sgpr(i32 addrspace(1)* %out, i32 addrspace(1)* %aptr, i32 %b) { |
Matt Arsenault | 28bd7d4 | 2015-09-25 18:21:47 +0000 | [diff] [blame] | 129 | %tid = call i32 @llvm.r600.read.tidig.x() #0 |
| 130 | %gep.a = getelementptr i32, i32 addrspace(1)* %aptr, i32 %tid |
| 131 | %gep.out = getelementptr i32, i32 addrspace(1)* %out, i32 %tid |
| 132 | %a = load i32, i32 addrspace(1)* %gep.a |
| 133 | %and = and i32 %a, %b |
| 134 | store i32 %and, i32 addrspace(1)* %gep.out |
Matt Arsenault | 284ae08 | 2014-06-09 08:36:53 +0000 | [diff] [blame] | 135 | ret void |
| 136 | } |
| 137 | |
Matt Arsenault | 11a4d67 | 2015-02-13 19:05:03 +0000 | [diff] [blame] | 138 | ; FUNC-LABEL: {{^}}v_and_constant_i32 |
| 139 | ; SI: v_and_b32_e32 v{{[0-9]+}}, 0x12d687, v{{[0-9]+}} |
Matt Arsenault | 3dbeefa | 2017-03-21 21:39:51 +0000 | [diff] [blame] | 140 | define amdgpu_kernel void @v_and_constant_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %aptr) { |
Alexander Timofeev | 982aee6 | 2017-07-04 17:32:00 +0000 | [diff] [blame] | 141 | %tid = call i32 @llvm.r600.read.tidig.x() #0 |
| 142 | %gep = getelementptr i32, i32 addrspace(1)* %aptr, i32 %tid |
| 143 | %a = load i32, i32 addrspace(1)* %gep, align 4 |
Matt Arsenault | 284ae08 | 2014-06-09 08:36:53 +0000 | [diff] [blame] | 144 | %and = and i32 %a, 1234567 |
| 145 | store i32 %and, i32 addrspace(1)* %out, align 4 |
| 146 | ret void |
| 147 | } |
| 148 | |
Matt Arsenault | 11a4d67 | 2015-02-13 19:05:03 +0000 | [diff] [blame] | 149 | ; FUNC-LABEL: {{^}}v_and_inline_imm_64_i32 |
| 150 | ; SI: v_and_b32_e32 v{{[0-9]+}}, 64, v{{[0-9]+}} |
Matt Arsenault | 3dbeefa | 2017-03-21 21:39:51 +0000 | [diff] [blame] | 151 | define amdgpu_kernel void @v_and_inline_imm_64_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %aptr) { |
Alexander Timofeev | 982aee6 | 2017-07-04 17:32:00 +0000 | [diff] [blame] | 152 | %tid = call i32 @llvm.r600.read.tidig.x() #0 |
| 153 | %gep = getelementptr i32, i32 addrspace(1)* %aptr, i32 %tid |
| 154 | %a = load i32, i32 addrspace(1)* %gep, align 4 |
Matt Arsenault | 11a4d67 | 2015-02-13 19:05:03 +0000 | [diff] [blame] | 155 | %and = and i32 %a, 64 |
| 156 | store i32 %and, i32 addrspace(1)* %out, align 4 |
| 157 | ret void |
| 158 | } |
| 159 | |
| 160 | ; FUNC-LABEL: {{^}}v_and_inline_imm_neg_16_i32 |
| 161 | ; SI: v_and_b32_e32 v{{[0-9]+}}, -16, v{{[0-9]+}} |
Matt Arsenault | 3dbeefa | 2017-03-21 21:39:51 +0000 | [diff] [blame] | 162 | define amdgpu_kernel void @v_and_inline_imm_neg_16_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %aptr) { |
Alexander Timofeev | 982aee6 | 2017-07-04 17:32:00 +0000 | [diff] [blame] | 163 | %tid = call i32 @llvm.r600.read.tidig.x() #0 |
| 164 | %gep = getelementptr i32, i32 addrspace(1)* %aptr, i32 %tid |
| 165 | %a = load i32, i32 addrspace(1)* %gep, align 4 |
Matt Arsenault | 11a4d67 | 2015-02-13 19:05:03 +0000 | [diff] [blame] | 166 | %and = and i32 %a, -16 |
| 167 | store i32 %and, i32 addrspace(1)* %out, align 4 |
| 168 | ret void |
| 169 | } |
| 170 | |
| 171 | ; FUNC-LABEL: {{^}}s_and_i64 |
Tom Stellard | 326d6ec | 2014-11-05 14:50:53 +0000 | [diff] [blame] | 172 | ; SI: s_and_b64 |
Matt Arsenault | 3dbeefa | 2017-03-21 21:39:51 +0000 | [diff] [blame] | 173 | define amdgpu_kernel void @s_and_i64(i64 addrspace(1)* %out, i64 %a, i64 %b) { |
Matt Arsenault | 284ae08 | 2014-06-09 08:36:53 +0000 | [diff] [blame] | 174 | %and = and i64 %a, %b |
| 175 | store i64 %and, i64 addrspace(1)* %out, align 8 |
| 176 | ret void |
| 177 | } |
| 178 | |
Tom Stellard | 79243d9 | 2014-10-01 17:15:17 +0000 | [diff] [blame] | 179 | ; FUNC-LABEL: {{^}}s_and_i1: |
Matt Arsenault | 90083d3 | 2018-06-07 09:54:49 +0000 | [diff] [blame] | 180 | ; SI: s_load_dword [[LOAD:s[0-9]+]] |
| 181 | ; SI: s_lshr_b32 [[B_SHIFT:s[0-9]+]], [[LOAD]], 8 |
| 182 | ; SI: s_and_b32 [[AND:s[0-9]+]], [[LOAD]], [[B_SHIFT]] |
| 183 | ; SI: s_and_b32 [[AND_TRUNC:s[0-9]+]], [[AND]], 1{{$}} |
| 184 | ; SI: v_mov_b32_e32 [[V_AND_TRUNC:v[0-9]+]], [[AND_TRUNC]] |
| 185 | ; SI: buffer_store_byte [[V_AND_TRUNC]] |
Matt Arsenault | 3dbeefa | 2017-03-21 21:39:51 +0000 | [diff] [blame] | 186 | define amdgpu_kernel void @s_and_i1(i1 addrspace(1)* %out, i1 %a, i1 %b) { |
Matt Arsenault | 0d89e84 | 2014-07-15 21:44:37 +0000 | [diff] [blame] | 187 | %and = and i1 %a, %b |
| 188 | store i1 %and, i1 addrspace(1)* %out |
| 189 | ret void |
| 190 | } |
| 191 | |
Matt Arsenault | 6e3a451 | 2016-01-18 22:01:13 +0000 | [diff] [blame] | 192 | ; FUNC-LABEL: {{^}}s_and_constant_i64: |
| 193 | ; SI-DAG: s_and_b32 s{{[0-9]+}}, s{{[0-9]+}}, 0x80000{{$}} |
| 194 | ; SI-DAG: s_and_b32 s{{[0-9]+}}, s{{[0-9]+}}, 0x80{{$}} |
| 195 | ; SI: buffer_store_dwordx2 |
Matt Arsenault | 3dbeefa | 2017-03-21 21:39:51 +0000 | [diff] [blame] | 196 | define amdgpu_kernel void @s_and_constant_i64(i64 addrspace(1)* %out, i64 %a) { |
Matt Arsenault | 6e3a451 | 2016-01-18 22:01:13 +0000 | [diff] [blame] | 197 | %and = and i64 %a, 549756338176 |
Matt Arsenault | 284ae08 | 2014-06-09 08:36:53 +0000 | [diff] [blame] | 198 | store i64 %and, i64 addrspace(1)* %out, align 8 |
| 199 | ret void |
| 200 | } |
| 201 | |
Matt Arsenault | 6e3a451 | 2016-01-18 22:01:13 +0000 | [diff] [blame] | 202 | ; FUNC-LABEL: {{^}}s_and_multi_use_constant_i64: |
| 203 | ; XSI-DAG: s_mov_b32 s[[KLO:[0-9]+]], 0x80000{{$}} |
| 204 | ; XSI-DAG: s_mov_b32 s[[KHI:[0-9]+]], 0x80{{$}} |
| 205 | ; XSI: s_and_b64 s{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, s{{\[}}[[KLO]]:[[KHI]]{{\]}} |
Matt Arsenault | 3dbeefa | 2017-03-21 21:39:51 +0000 | [diff] [blame] | 206 | define amdgpu_kernel void @s_and_multi_use_constant_i64(i64 addrspace(1)* %out, i64 %a, i64 %b) { |
Matt Arsenault | 6e3a451 | 2016-01-18 22:01:13 +0000 | [diff] [blame] | 207 | %and0 = and i64 %a, 549756338176 |
| 208 | %and1 = and i64 %b, 549756338176 |
| 209 | store volatile i64 %and0, i64 addrspace(1)* %out |
| 210 | store volatile i64 %and1, i64 addrspace(1)* %out |
| 211 | ret void |
| 212 | } |
| 213 | |
| 214 | ; FUNC-LABEL: {{^}}s_and_32_bit_constant_i64: |
| 215 | ; SI: s_load_dwordx2 |
| 216 | ; SI-NOT: and |
| 217 | ; SI: s_and_b32 s{{[0-9]+}}, s{{[0-9]+}}, 0x12d687{{$}} |
| 218 | ; SI-NOT: and |
| 219 | ; SI: buffer_store_dwordx2 |
Matt Arsenault | 8c4a352 | 2018-06-26 19:10:00 +0000 | [diff] [blame] | 220 | define amdgpu_kernel void @s_and_32_bit_constant_i64(i64 addrspace(1)* %out, i32, i64 %a) { |
Matt Arsenault | 6e3a451 | 2016-01-18 22:01:13 +0000 | [diff] [blame] | 221 | %and = and i64 %a, 1234567 |
| 222 | store i64 %and, i64 addrspace(1)* %out, align 8 |
| 223 | ret void |
| 224 | } |
| 225 | |
| 226 | ; FUNC-LABEL: {{^}}s_and_multi_use_inline_imm_i64: |
Stanislav Mekhanoshin | d4ae470 | 2017-09-19 20:54:38 +0000 | [diff] [blame] | 227 | ; SI: s_load_dwordx2 |
Matt Arsenault | efa3fe1 | 2016-04-22 22:48:38 +0000 | [diff] [blame] | 228 | ; SI: s_load_dword [[A:s[0-9]+]] |
| 229 | ; SI: s_load_dword [[B:s[0-9]+]] |
Matt Arsenault | 6e3a451 | 2016-01-18 22:01:13 +0000 | [diff] [blame] | 230 | ; SI: s_load_dwordx2 |
| 231 | ; SI-NOT: and |
Matt Arsenault | efa3fe1 | 2016-04-22 22:48:38 +0000 | [diff] [blame] | 232 | ; SI: s_lshl_b32 [[A]], [[A]], 1 |
| 233 | ; SI: s_lshl_b32 [[B]], [[B]], 1 |
| 234 | ; SI: s_and_b32 s{{[0-9]+}}, [[A]], 62 |
| 235 | ; SI: s_and_b32 s{{[0-9]+}}, [[B]], 62 |
Matt Arsenault | 6e3a451 | 2016-01-18 22:01:13 +0000 | [diff] [blame] | 236 | ; SI-NOT: and |
| 237 | ; SI: buffer_store_dwordx2 |
Matt Arsenault | 8c4a352 | 2018-06-26 19:10:00 +0000 | [diff] [blame] | 238 | define amdgpu_kernel void @s_and_multi_use_inline_imm_i64(i64 addrspace(1)* %out, i32, i64 %a, i32, i64 %b, i32, i64 %c) { |
Matt Arsenault | 6e3a451 | 2016-01-18 22:01:13 +0000 | [diff] [blame] | 239 | %shl.a = shl i64 %a, 1 |
| 240 | %shl.b = shl i64 %b, 1 |
| 241 | %and0 = and i64 %shl.a, 62 |
| 242 | %and1 = and i64 %shl.b, 62 |
| 243 | %add0 = add i64 %and0, %c |
| 244 | %add1 = add i64 %and1, %c |
| 245 | store volatile i64 %add0, i64 addrspace(1)* %out |
| 246 | store volatile i64 %add1, i64 addrspace(1)* %out |
| 247 | ret void |
| 248 | } |
| 249 | |
Tom Stellard | 79243d9 | 2014-10-01 17:15:17 +0000 | [diff] [blame] | 250 | ; FUNC-LABEL: {{^}}v_and_i64: |
Tom Stellard | 326d6ec | 2014-11-05 14:50:53 +0000 | [diff] [blame] | 251 | ; SI: v_and_b32 |
| 252 | ; SI: v_and_b32 |
Matt Arsenault | 3dbeefa | 2017-03-21 21:39:51 +0000 | [diff] [blame] | 253 | define amdgpu_kernel void @v_and_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %aptr, i64 addrspace(1)* %bptr) { |
Alexander Timofeev | 982aee6 | 2017-07-04 17:32:00 +0000 | [diff] [blame] | 254 | %tid = call i32 @llvm.r600.read.tidig.x() #0 |
| 255 | %gep.a = getelementptr i64, i64 addrspace(1)* %aptr, i32 %tid |
| 256 | %a = load i64, i64 addrspace(1)* %gep.a, align 8 |
| 257 | %gep.b = getelementptr i64, i64 addrspace(1)* %bptr, i32 %tid |
| 258 | %b = load i64, i64 addrspace(1)* %gep.b, align 8 |
Matt Arsenault | 284ae08 | 2014-06-09 08:36:53 +0000 | [diff] [blame] | 259 | %and = and i64 %a, %b |
| 260 | store i64 %and, i64 addrspace(1)* %out, align 8 |
| 261 | ret void |
| 262 | } |
| 263 | |
Tom Stellard | 79243d9 | 2014-10-01 17:15:17 +0000 | [diff] [blame] | 264 | ; FUNC-LABEL: {{^}}v_and_constant_i64: |
Matt Arsenault | 6e3a451 | 2016-01-18 22:01:13 +0000 | [diff] [blame] | 265 | ; SI-DAG: v_and_b32_e32 {{v[0-9]+}}, 0xab19b207, {{v[0-9]+}} |
| 266 | ; SI-DAG: v_and_b32_e32 {{v[0-9]+}}, 0x11e, {{v[0-9]+}} |
Matt Arsenault | 68d9386 | 2015-09-24 08:36:14 +0000 | [diff] [blame] | 267 | ; SI: buffer_store_dwordx2 |
Matt Arsenault | 3dbeefa | 2017-03-21 21:39:51 +0000 | [diff] [blame] | 268 | define amdgpu_kernel void @v_and_constant_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %aptr) { |
Alexander Timofeev | 982aee6 | 2017-07-04 17:32:00 +0000 | [diff] [blame] | 269 | %tid = call i32 @llvm.r600.read.tidig.x() #0 |
| 270 | %gep.a = getelementptr i64, i64 addrspace(1)* %aptr, i32 %tid |
| 271 | %a = load i64, i64 addrspace(1)* %gep.a, align 8 |
Matt Arsenault | 68d9386 | 2015-09-24 08:36:14 +0000 | [diff] [blame] | 272 | %and = and i64 %a, 1231231234567 |
| 273 | store i64 %and, i64 addrspace(1)* %out, align 8 |
| 274 | ret void |
| 275 | } |
| 276 | |
Matt Arsenault | 6e3a451 | 2016-01-18 22:01:13 +0000 | [diff] [blame] | 277 | ; FUNC-LABEL: {{^}}v_and_multi_use_constant_i64: |
Tom Stellard | 0d23ebe | 2016-08-29 19:42:52 +0000 | [diff] [blame] | 278 | ; SI-DAG: buffer_load_dwordx2 v{{\[}}[[LO0:[0-9]+]]:[[HI0:[0-9]+]]{{\]}} |
| 279 | ; SI-DAG: buffer_load_dwordx2 v{{\[}}[[LO1:[0-9]+]]:[[HI1:[0-9]+]]{{\]}} |
Matt Arsenault | 6e3a451 | 2016-01-18 22:01:13 +0000 | [diff] [blame] | 280 | ; SI-DAG: s_movk_i32 [[KHI:s[0-9]+]], 0x11e{{$}} |
Tom Stellard | 0d23ebe | 2016-08-29 19:42:52 +0000 | [diff] [blame] | 281 | ; SI-DAG: s_mov_b32 [[KLO:s[0-9]+]], 0xab19b207{{$}} |
Matt Arsenault | 6e3a451 | 2016-01-18 22:01:13 +0000 | [diff] [blame] | 282 | ; SI-DAG: v_and_b32_e32 {{v[0-9]+}}, [[KLO]], v[[LO0]] |
| 283 | ; SI-DAG: v_and_b32_e32 {{v[0-9]+}}, [[KHI]], v[[HI0]] |
| 284 | ; SI-DAG: v_and_b32_e32 {{v[0-9]+}}, [[KLO]], v[[LO1]] |
| 285 | ; SI-DAG: v_and_b32_e32 {{v[0-9]+}}, [[KHI]], v[[HI1]] |
| 286 | ; SI: buffer_store_dwordx2 |
| 287 | ; SI: buffer_store_dwordx2 |
Matt Arsenault | 3dbeefa | 2017-03-21 21:39:51 +0000 | [diff] [blame] | 288 | define amdgpu_kernel void @v_and_multi_use_constant_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %aptr) { |
Matt Arsenault | 6e3a451 | 2016-01-18 22:01:13 +0000 | [diff] [blame] | 289 | %a = load volatile i64, i64 addrspace(1)* %aptr |
| 290 | %b = load volatile i64, i64 addrspace(1)* %aptr |
| 291 | %and0 = and i64 %a, 1231231234567 |
| 292 | %and1 = and i64 %b, 1231231234567 |
| 293 | store volatile i64 %and0, i64 addrspace(1)* %out |
| 294 | store volatile i64 %and1, i64 addrspace(1)* %out |
| 295 | ret void |
| 296 | } |
| 297 | |
| 298 | ; FUNC-LABEL: {{^}}v_and_multi_use_inline_imm_i64: |
| 299 | ; SI: buffer_load_dwordx2 v{{\[}}[[LO0:[0-9]+]]:[[HI0:[0-9]+]]{{\]}} |
| 300 | ; SI-NOT: and |
| 301 | ; SI: buffer_load_dwordx2 v{{\[}}[[LO1:[0-9]+]]:[[HI1:[0-9]+]]{{\]}} |
| 302 | ; SI-NOT: and |
| 303 | ; SI: v_and_b32_e32 v[[RESLO0:[0-9]+]], 63, v[[LO0]] |
| 304 | ; SI: v_and_b32_e32 v[[RESLO1:[0-9]+]], 63, v[[LO1]] |
| 305 | ; SI-NOT: and |
Tom Stellard | 0d23ebe | 2016-08-29 19:42:52 +0000 | [diff] [blame] | 306 | ; SI: buffer_store_dwordx2 v{{\[}}[[RESLO0]] |
Tom Stellard | 0bc954e | 2016-03-30 16:35:09 +0000 | [diff] [blame] | 307 | ; SI: buffer_store_dwordx2 v{{\[}}[[RESLO1]] |
Matt Arsenault | 3dbeefa | 2017-03-21 21:39:51 +0000 | [diff] [blame] | 308 | define amdgpu_kernel void @v_and_multi_use_inline_imm_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %aptr) { |
Matt Arsenault | 6e3a451 | 2016-01-18 22:01:13 +0000 | [diff] [blame] | 309 | %a = load volatile i64, i64 addrspace(1)* %aptr |
| 310 | %b = load volatile i64, i64 addrspace(1)* %aptr |
| 311 | %and0 = and i64 %a, 63 |
| 312 | %and1 = and i64 %b, 63 |
| 313 | store volatile i64 %and0, i64 addrspace(1)* %out |
| 314 | store volatile i64 %and1, i64 addrspace(1)* %out |
| 315 | ret void |
| 316 | } |
| 317 | |
Matt Arsenault | 68d9386 | 2015-09-24 08:36:14 +0000 | [diff] [blame] | 318 | ; FUNC-LABEL: {{^}}v_and_i64_32_bit_constant: |
Alexander Timofeev | 982aee6 | 2017-07-04 17:32:00 +0000 | [diff] [blame] | 319 | ; SI: {{buffer|flat}}_load_dword [[VAL:v[0-9]+]] |
Matt Arsenault | 6e3a451 | 2016-01-18 22:01:13 +0000 | [diff] [blame] | 320 | ; SI-NOT: and |
| 321 | ; SI: v_and_b32_e32 {{v[0-9]+}}, 0x12d687, [[VAL]] |
| 322 | ; SI-NOT: and |
| 323 | ; SI: buffer_store_dwordx2 |
Matt Arsenault | 3dbeefa | 2017-03-21 21:39:51 +0000 | [diff] [blame] | 324 | define amdgpu_kernel void @v_and_i64_32_bit_constant(i64 addrspace(1)* %out, i64 addrspace(1)* %aptr) { |
Alexander Timofeev | 982aee6 | 2017-07-04 17:32:00 +0000 | [diff] [blame] | 325 | %tid = call i32 @llvm.r600.read.tidig.x() #0 |
| 326 | %gep.a = getelementptr i64, i64 addrspace(1)* %aptr, i32 %tid |
| 327 | %a = load i64, i64 addrspace(1)* %gep.a, align 8 |
Matt Arsenault | 284ae08 | 2014-06-09 08:36:53 +0000 | [diff] [blame] | 328 | %and = and i64 %a, 1234567 |
| 329 | store i64 %and, i64 addrspace(1)* %out, align 8 |
| 330 | ret void |
| 331 | } |
Matt Arsenault | 49dd428 | 2014-09-15 17:15:02 +0000 | [diff] [blame] | 332 | |
Tom Stellard | 79243d9 | 2014-10-01 17:15:17 +0000 | [diff] [blame] | 333 | ; FUNC-LABEL: {{^}}v_and_inline_imm_i64: |
Alexander Timofeev | 982aee6 | 2017-07-04 17:32:00 +0000 | [diff] [blame] | 334 | ; SI: {{buffer|flat}}_load_dword v{{[0-9]+}} |
Matt Arsenault | 6e3a451 | 2016-01-18 22:01:13 +0000 | [diff] [blame] | 335 | ; SI-NOT: and |
Tom Stellard | 326d6ec | 2014-11-05 14:50:53 +0000 | [diff] [blame] | 336 | ; SI: v_and_b32_e32 {{v[0-9]+}}, 64, {{v[0-9]+}} |
Matt Arsenault | 6e3a451 | 2016-01-18 22:01:13 +0000 | [diff] [blame] | 337 | ; SI-NOT: and |
| 338 | ; SI: buffer_store_dwordx2 |
Matt Arsenault | 3dbeefa | 2017-03-21 21:39:51 +0000 | [diff] [blame] | 339 | define amdgpu_kernel void @v_and_inline_imm_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %aptr) { |
Alexander Timofeev | 982aee6 | 2017-07-04 17:32:00 +0000 | [diff] [blame] | 340 | %tid = call i32 @llvm.r600.read.tidig.x() #0 |
| 341 | %gep.a = getelementptr i64, i64 addrspace(1)* %aptr, i32 %tid |
| 342 | %a = load i64, i64 addrspace(1)* %gep.a, align 8 |
Matt Arsenault | 49dd428 | 2014-09-15 17:15:02 +0000 | [diff] [blame] | 343 | %and = and i64 %a, 64 |
| 344 | store i64 %and, i64 addrspace(1)* %out, align 8 |
| 345 | ret void |
| 346 | } |
| 347 | |
Matt Arsenault | fa5f767 | 2016-09-14 15:19:03 +0000 | [diff] [blame] | 348 | ; FIXME: Should be able to reduce load width |
| 349 | ; FUNC-LABEL: {{^}}v_and_inline_neg_imm_i64: |
Alexander Timofeev | 982aee6 | 2017-07-04 17:32:00 +0000 | [diff] [blame] | 350 | ; SI: {{buffer|flat}}_load_dwordx2 v{{\[}}[[VAL_LO:[0-9]+]]:[[VAL_HI:[0-9]+]]{{\]}} |
Matt Arsenault | fa5f767 | 2016-09-14 15:19:03 +0000 | [diff] [blame] | 351 | ; SI-NOT: and |
| 352 | ; SI: v_and_b32_e32 v[[VAL_LO]], -8, v[[VAL_LO]] |
| 353 | ; SI-NOT: and |
| 354 | ; SI: buffer_store_dwordx2 v{{\[}}[[VAL_LO]]:[[VAL_HI]]{{\]}} |
Matt Arsenault | 3dbeefa | 2017-03-21 21:39:51 +0000 | [diff] [blame] | 355 | define amdgpu_kernel void @v_and_inline_neg_imm_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %aptr) { |
Alexander Timofeev | 982aee6 | 2017-07-04 17:32:00 +0000 | [diff] [blame] | 356 | %tid = call i32 @llvm.r600.read.tidig.x() #0 |
| 357 | %gep.a = getelementptr i64, i64 addrspace(1)* %aptr, i32 %tid |
| 358 | %a = load i64, i64 addrspace(1)* %gep.a, align 8 |
Matt Arsenault | fa5f767 | 2016-09-14 15:19:03 +0000 | [diff] [blame] | 359 | %and = and i64 %a, -8 |
| 360 | store i64 %and, i64 addrspace(1)* %out, align 8 |
| 361 | ret void |
| 362 | } |
| 363 | |
Matt Arsenault | 11a4d67 | 2015-02-13 19:05:03 +0000 | [diff] [blame] | 364 | ; FUNC-LABEL: {{^}}s_and_inline_imm_64_i64 |
Matt Arsenault | 6e3a451 | 2016-01-18 22:01:13 +0000 | [diff] [blame] | 365 | ; SI: s_load_dword |
| 366 | ; SI-NOT: and |
| 367 | ; SI: s_and_b32 s{{[0-9]+}}, s{{[0-9]+}}, 64 |
| 368 | ; SI-NOT: and |
| 369 | ; SI: buffer_store_dword |
Matt Arsenault | 3dbeefa | 2017-03-21 21:39:51 +0000 | [diff] [blame] | 370 | define amdgpu_kernel void @s_and_inline_imm_64_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %aptr, i64 %a) { |
Matt Arsenault | 49dd428 | 2014-09-15 17:15:02 +0000 | [diff] [blame] | 371 | %and = and i64 %a, 64 |
| 372 | store i64 %and, i64 addrspace(1)* %out, align 8 |
| 373 | ret void |
| 374 | } |
Matt Arsenault | 11a4d67 | 2015-02-13 19:05:03 +0000 | [diff] [blame] | 375 | |
Matt Arsenault | 6e3a451 | 2016-01-18 22:01:13 +0000 | [diff] [blame] | 376 | ; FUNC-LABEL: {{^}}s_and_inline_imm_64_i64_noshrink: |
Matt Arsenault | efa3fe1 | 2016-04-22 22:48:38 +0000 | [diff] [blame] | 377 | ; SI: s_load_dword [[A:s[0-9]+]] |
| 378 | ; SI: s_lshl_b32 [[A]], [[A]], 1{{$}} |
Matt Arsenault | 6e3a451 | 2016-01-18 22:01:13 +0000 | [diff] [blame] | 379 | ; SI-NOT: and |
Matt Arsenault | efa3fe1 | 2016-04-22 22:48:38 +0000 | [diff] [blame] | 380 | ; SI: s_and_b32 s{{[0-9]+}}, [[A]], 64 |
Matt Arsenault | 6e3a451 | 2016-01-18 22:01:13 +0000 | [diff] [blame] | 381 | ; SI-NOT: and |
| 382 | ; SI: s_add_u32 |
| 383 | ; SI-NEXT: s_addc_u32 |
Matt Arsenault | 8c4a352 | 2018-06-26 19:10:00 +0000 | [diff] [blame] | 384 | define amdgpu_kernel void @s_and_inline_imm_64_i64_noshrink(i64 addrspace(1)* %out, i64 addrspace(1)* %aptr, i64 %a, i32, i64 %b) { |
Matt Arsenault | 6e3a451 | 2016-01-18 22:01:13 +0000 | [diff] [blame] | 385 | %shl = shl i64 %a, 1 |
| 386 | %and = and i64 %shl, 64 |
| 387 | %add = add i64 %and, %b |
| 388 | store i64 %add, i64 addrspace(1)* %out, align 8 |
| 389 | ret void |
| 390 | } |
| 391 | |
Matt Arsenault | 11a4d67 | 2015-02-13 19:05:03 +0000 | [diff] [blame] | 392 | ; FUNC-LABEL: {{^}}s_and_inline_imm_1_i64 |
Matt Arsenault | 6e3a451 | 2016-01-18 22:01:13 +0000 | [diff] [blame] | 393 | ; SI: s_load_dwordx2 |
| 394 | ; SI-NOT: and |
| 395 | ; SI: s_and_b32 s{{[0-9]+}}, s{{[0-9]+}}, 1 |
| 396 | ; SI-NOT: and |
| 397 | ; SI: buffer_store_dwordx2 |
Matt Arsenault | 3dbeefa | 2017-03-21 21:39:51 +0000 | [diff] [blame] | 398 | define amdgpu_kernel void @s_and_inline_imm_1_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %aptr, i64 %a) { |
Matt Arsenault | 11a4d67 | 2015-02-13 19:05:03 +0000 | [diff] [blame] | 399 | %and = and i64 %a, 1 |
| 400 | store i64 %and, i64 addrspace(1)* %out, align 8 |
| 401 | ret void |
| 402 | } |
| 403 | |
| 404 | ; FUNC-LABEL: {{^}}s_and_inline_imm_1.0_i64 |
Matt Arsenault | 6e3a451 | 2016-01-18 22:01:13 +0000 | [diff] [blame] | 405 | ; XSI: s_and_b64 s{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 1.0 |
| 406 | |
| 407 | ; SI: s_load_dwordx2 |
| 408 | ; SI: s_load_dwordx2 |
| 409 | ; SI-NOT: and |
| 410 | ; SI: s_and_b32 {{s[0-9]+}}, {{s[0-9]+}}, 0x3ff00000 |
| 411 | ; SI-NOT: and |
| 412 | ; SI: buffer_store_dwordx2 |
Matt Arsenault | 3dbeefa | 2017-03-21 21:39:51 +0000 | [diff] [blame] | 413 | define amdgpu_kernel void @s_and_inline_imm_1.0_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %aptr, i64 %a) { |
Matt Arsenault | 11a4d67 | 2015-02-13 19:05:03 +0000 | [diff] [blame] | 414 | %and = and i64 %a, 4607182418800017408 |
| 415 | store i64 %and, i64 addrspace(1)* %out, align 8 |
| 416 | ret void |
| 417 | } |
| 418 | |
| 419 | ; FUNC-LABEL: {{^}}s_and_inline_imm_neg_1.0_i64 |
Matt Arsenault | 6e3a451 | 2016-01-18 22:01:13 +0000 | [diff] [blame] | 420 | ; XSI: s_and_b64 s{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, -1.0 |
| 421 | |
| 422 | ; SI: s_load_dwordx2 |
| 423 | ; SI: s_load_dwordx2 |
| 424 | ; SI-NOT: and |
| 425 | ; SI: s_and_b32 {{s[0-9]+}}, {{s[0-9]+}}, 0xbff00000 |
| 426 | ; SI-NOT: and |
| 427 | ; SI: buffer_store_dwordx2 |
Matt Arsenault | 3dbeefa | 2017-03-21 21:39:51 +0000 | [diff] [blame] | 428 | define amdgpu_kernel void @s_and_inline_imm_neg_1.0_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %aptr, i64 %a) { |
Matt Arsenault | 11a4d67 | 2015-02-13 19:05:03 +0000 | [diff] [blame] | 429 | %and = and i64 %a, 13830554455654793216 |
| 430 | store i64 %and, i64 addrspace(1)* %out, align 8 |
| 431 | ret void |
| 432 | } |
| 433 | |
| 434 | ; FUNC-LABEL: {{^}}s_and_inline_imm_0.5_i64 |
Matt Arsenault | 6e3a451 | 2016-01-18 22:01:13 +0000 | [diff] [blame] | 435 | ; XSI: s_and_b64 s{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0.5 |
| 436 | |
| 437 | ; SI: s_load_dwordx2 |
| 438 | ; SI: s_load_dwordx2 |
| 439 | ; SI-NOT: and |
| 440 | ; SI: s_and_b32 {{s[0-9]+}}, {{s[0-9]+}}, 0x3fe00000 |
| 441 | ; SI-NOT: and |
| 442 | ; SI: buffer_store_dwordx2 |
Matt Arsenault | 3dbeefa | 2017-03-21 21:39:51 +0000 | [diff] [blame] | 443 | define amdgpu_kernel void @s_and_inline_imm_0.5_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %aptr, i64 %a) { |
Matt Arsenault | 11a4d67 | 2015-02-13 19:05:03 +0000 | [diff] [blame] | 444 | %and = and i64 %a, 4602678819172646912 |
| 445 | store i64 %and, i64 addrspace(1)* %out, align 8 |
| 446 | ret void |
| 447 | } |
| 448 | |
Matt Arsenault | 6e3a451 | 2016-01-18 22:01:13 +0000 | [diff] [blame] | 449 | ; FUNC-LABEL: {{^}}s_and_inline_imm_neg_0.5_i64: |
| 450 | ; XSI: s_and_b64 s{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, -0.5 |
| 451 | |
| 452 | ; SI: s_load_dwordx2 |
| 453 | ; SI: s_load_dwordx2 |
| 454 | ; SI-NOT: and |
| 455 | ; SI: s_and_b32 {{s[0-9]+}}, {{s[0-9]+}}, 0xbfe00000 |
| 456 | ; SI-NOT: and |
| 457 | ; SI: buffer_store_dwordx2 |
Matt Arsenault | 3dbeefa | 2017-03-21 21:39:51 +0000 | [diff] [blame] | 458 | define amdgpu_kernel void @s_and_inline_imm_neg_0.5_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %aptr, i64 %a) { |
Matt Arsenault | 11a4d67 | 2015-02-13 19:05:03 +0000 | [diff] [blame] | 459 | %and = and i64 %a, 13826050856027422720 |
| 460 | store i64 %and, i64 addrspace(1)* %out, align 8 |
| 461 | ret void |
| 462 | } |
| 463 | |
Matt Arsenault | 6e3a451 | 2016-01-18 22:01:13 +0000 | [diff] [blame] | 464 | ; FUNC-LABEL: {{^}}s_and_inline_imm_2.0_i64: |
| 465 | ; SI: s_load_dwordx2 |
| 466 | ; SI: s_load_dwordx2 |
| 467 | ; SI-NOT: and |
| 468 | ; SI: s_and_b32 {{s[0-9]+}}, {{s[0-9]+}}, 2.0 |
| 469 | ; SI-NOT: and |
| 470 | ; SI: buffer_store_dwordx2 |
Matt Arsenault | 3dbeefa | 2017-03-21 21:39:51 +0000 | [diff] [blame] | 471 | define amdgpu_kernel void @s_and_inline_imm_2.0_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %aptr, i64 %a) { |
Matt Arsenault | 11a4d67 | 2015-02-13 19:05:03 +0000 | [diff] [blame] | 472 | %and = and i64 %a, 4611686018427387904 |
| 473 | store i64 %and, i64 addrspace(1)* %out, align 8 |
| 474 | ret void |
| 475 | } |
| 476 | |
Matt Arsenault | 6e3a451 | 2016-01-18 22:01:13 +0000 | [diff] [blame] | 477 | ; FUNC-LABEL: {{^}}s_and_inline_imm_neg_2.0_i64: |
| 478 | ; SI: s_load_dwordx2 |
| 479 | ; SI: s_load_dwordx2 |
| 480 | ; SI-NOT: and |
| 481 | ; SI: s_and_b32 {{s[0-9]+}}, {{s[0-9]+}}, -2.0 |
| 482 | ; SI-NOT: and |
| 483 | ; SI: buffer_store_dwordx2 |
Matt Arsenault | 3dbeefa | 2017-03-21 21:39:51 +0000 | [diff] [blame] | 484 | define amdgpu_kernel void @s_and_inline_imm_neg_2.0_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %aptr, i64 %a) { |
Matt Arsenault | 11a4d67 | 2015-02-13 19:05:03 +0000 | [diff] [blame] | 485 | %and = and i64 %a, 13835058055282163712 |
| 486 | store i64 %and, i64 addrspace(1)* %out, align 8 |
| 487 | ret void |
| 488 | } |
| 489 | |
Matt Arsenault | 6e3a451 | 2016-01-18 22:01:13 +0000 | [diff] [blame] | 490 | ; FUNC-LABEL: {{^}}s_and_inline_imm_4.0_i64: |
| 491 | ; XSI: s_and_b64 s{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 4.0 |
| 492 | |
| 493 | ; SI: s_load_dwordx2 |
| 494 | ; SI: s_load_dwordx2 |
| 495 | ; SI-NOT: and |
| 496 | ; SI: s_and_b32 {{s[0-9]+}}, {{s[0-9]+}}, 0x40100000 |
| 497 | ; SI-NOT: and |
| 498 | ; SI: buffer_store_dwordx2 |
Matt Arsenault | 3dbeefa | 2017-03-21 21:39:51 +0000 | [diff] [blame] | 499 | define amdgpu_kernel void @s_and_inline_imm_4.0_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %aptr, i64 %a) { |
Matt Arsenault | 11a4d67 | 2015-02-13 19:05:03 +0000 | [diff] [blame] | 500 | %and = and i64 %a, 4616189618054758400 |
| 501 | store i64 %and, i64 addrspace(1)* %out, align 8 |
| 502 | ret void |
| 503 | } |
| 504 | |
Matt Arsenault | 6e3a451 | 2016-01-18 22:01:13 +0000 | [diff] [blame] | 505 | ; FUNC-LABEL: {{^}}s_and_inline_imm_neg_4.0_i64: |
| 506 | ; XSI: s_and_b64 s{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, -4.0 |
| 507 | |
| 508 | ; SI: s_load_dwordx2 |
| 509 | ; SI: s_load_dwordx2 |
| 510 | ; SI-NOT: and |
| 511 | ; SI: s_and_b32 {{s[0-9]+}}, {{s[0-9]+}}, 0xc0100000 |
| 512 | ; SI-NOT: and |
| 513 | ; SI: buffer_store_dwordx2 |
Matt Arsenault | 3dbeefa | 2017-03-21 21:39:51 +0000 | [diff] [blame] | 514 | define amdgpu_kernel void @s_and_inline_imm_neg_4.0_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %aptr, i64 %a) { |
Matt Arsenault | 11a4d67 | 2015-02-13 19:05:03 +0000 | [diff] [blame] | 515 | %and = and i64 %a, 13839561654909534208 |
| 516 | store i64 %and, i64 addrspace(1)* %out, align 8 |
| 517 | ret void |
| 518 | } |
| 519 | |
| 520 | |
| 521 | ; Test with the 64-bit integer bitpattern for a 32-bit float in the |
| 522 | ; low 32-bits, which is not a valid 64-bit inline immmediate. |
| 523 | |
Matt Arsenault | 6e3a451 | 2016-01-18 22:01:13 +0000 | [diff] [blame] | 524 | ; FUNC-LABEL: {{^}}s_and_inline_imm_f32_4.0_i64: |
Matthias Braun | 6ad3d05 | 2016-06-25 00:23:00 +0000 | [diff] [blame] | 525 | ; SI: s_load_dwordx2 |
Tom Stellard | 0d23ebe | 2016-08-29 19:42:52 +0000 | [diff] [blame] | 526 | ; SI: s_load_dword s |
Matt Arsenault | 6e3a451 | 2016-01-18 22:01:13 +0000 | [diff] [blame] | 527 | ; SI-NOT: and |
| 528 | ; SI: s_and_b32 s[[K_HI:[0-9]+]], s{{[0-9]+}}, 4.0 |
| 529 | ; SI-NOT: and |
| 530 | ; SI: buffer_store_dwordx2 |
Matt Arsenault | 3dbeefa | 2017-03-21 21:39:51 +0000 | [diff] [blame] | 531 | define amdgpu_kernel void @s_and_inline_imm_f32_4.0_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %aptr, i64 %a) { |
Matt Arsenault | 11a4d67 | 2015-02-13 19:05:03 +0000 | [diff] [blame] | 532 | %and = and i64 %a, 1082130432 |
| 533 | store i64 %and, i64 addrspace(1)* %out, align 8 |
| 534 | ret void |
| 535 | } |
| 536 | |
Matt Arsenault | 6e3a451 | 2016-01-18 22:01:13 +0000 | [diff] [blame] | 537 | ; FUNC-LABEL: {{^}}s_and_inline_imm_f32_neg_4.0_i64: |
| 538 | ; SI: s_load_dwordx2 |
| 539 | ; SI: s_load_dwordx2 |
| 540 | ; SI-NOT: and |
| 541 | ; SI: s_and_b32 s[[K_HI:[0-9]+]], s{{[0-9]+}}, -4.0 |
| 542 | ; SI-NOT: and |
| 543 | ; SI: buffer_store_dwordx2 |
Matt Arsenault | 3dbeefa | 2017-03-21 21:39:51 +0000 | [diff] [blame] | 544 | define amdgpu_kernel void @s_and_inline_imm_f32_neg_4.0_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %aptr, i64 %a) { |
Matt Arsenault | 11a4d67 | 2015-02-13 19:05:03 +0000 | [diff] [blame] | 545 | %and = and i64 %a, -1065353216 |
| 546 | store i64 %and, i64 addrspace(1)* %out, align 8 |
| 547 | ret void |
| 548 | } |
| 549 | |
| 550 | ; Shift into upper 32-bits |
Matt Arsenault | 6e3a451 | 2016-01-18 22:01:13 +0000 | [diff] [blame] | 551 | ; SI: s_load_dwordx2 |
| 552 | ; SI: s_load_dwordx2 |
| 553 | ; SI-NOT: and |
| 554 | ; SI: s_and_b32 s[[K_HI:[0-9]+]], s{{[0-9]+}}, 4.0 |
| 555 | ; SI-NOT: and |
| 556 | ; SI: buffer_store_dwordx2 |
Matt Arsenault | 3dbeefa | 2017-03-21 21:39:51 +0000 | [diff] [blame] | 557 | define amdgpu_kernel void @s_and_inline_high_imm_f32_4.0_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %aptr, i64 %a) { |
Matt Arsenault | 11a4d67 | 2015-02-13 19:05:03 +0000 | [diff] [blame] | 558 | %and = and i64 %a, 4647714815446351872 |
| 559 | store i64 %and, i64 addrspace(1)* %out, align 8 |
| 560 | ret void |
| 561 | } |
| 562 | |
Matt Arsenault | 6e3a451 | 2016-01-18 22:01:13 +0000 | [diff] [blame] | 563 | ; FUNC-LABEL: {{^}}s_and_inline_high_imm_f32_neg_4.0_i64: |
| 564 | ; SI: s_load_dwordx2 |
| 565 | ; SI: s_load_dwordx2 |
| 566 | ; SI-NOT: and |
| 567 | ; SI: s_and_b32 s[[K_HI:[0-9]+]], s{{[0-9]+}}, -4.0 |
| 568 | ; SI-NOT: and |
| 569 | ; SI: buffer_store_dwordx2 |
Matt Arsenault | 3dbeefa | 2017-03-21 21:39:51 +0000 | [diff] [blame] | 570 | define amdgpu_kernel void @s_and_inline_high_imm_f32_neg_4.0_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %aptr, i64 %a) { |
Matt Arsenault | 11a4d67 | 2015-02-13 19:05:03 +0000 | [diff] [blame] | 571 | %and = and i64 %a, 13871086852301127680 |
| 572 | store i64 %and, i64 addrspace(1)* %out, align 8 |
| 573 | ret void |
| 574 | } |
Matt Arsenault | 28bd7d4 | 2015-09-25 18:21:47 +0000 | [diff] [blame] | 575 | attributes #0 = { nounwind readnone } |