| Matt Arsenault | 70b9282 | 2017-11-12 23:53:44 +0000 | [diff] [blame] | 1 | ; RUN: llc -march=amdgcn -mcpu=verde -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=SI -check-prefix=FUNC %s |
| 2 | ; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=SI -check-prefix=FUNC %s |
| Matt Arsenault | 6689abe | 2016-05-05 20:07:37 +0000 | [diff] [blame] | 3 | ; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s |
| Tom Stellard | 75aadc2 | 2012-12-11 21:25:42 +0000 | [diff] [blame] | 4 | |
| Matt Arsenault | 28bd7d4 | 2015-09-25 18:21:47 +0000 | [diff] [blame] | 5 | declare i32 @llvm.r600.read.tidig.x() #0 |
| 6 | |
| Tom Stellard | 79243d9 | 2014-10-01 17:15:17 +0000 | [diff] [blame] | 7 | ; FUNC-LABEL: {{^}}test2: |
| Matt Arsenault | 284ae08 | 2014-06-09 08:36:53 +0000 | [diff] [blame] | 8 | ; EG: AND_INT {{\*? *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} |
| 9 | ; EG: AND_INT {{\*? *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} |
| Tom Stellard | 75aadc2 | 2012-12-11 21:25:42 +0000 | [diff] [blame] | 10 | |
| Alexander Timofeev | 982aee6 | 2017-07-04 17:32:00 +0000 | [diff] [blame] | 11 | ; SI: s_and_b32 s{{[0-9]+, s[0-9]+, s[0-9]+}} |
| 12 | ; SI: s_and_b32 s{{[0-9]+, s[0-9]+, s[0-9]+}} |
| Aaron Watry | 00aeb11 | 2013-06-25 13:55:23 +0000 | [diff] [blame] | 13 | |
| Matt Arsenault | 3dbeefa | 2017-03-21 21:39:51 +0000 | [diff] [blame] | 14 | define amdgpu_kernel void @test2(<2 x i32> addrspace(1)* %out, <2 x i32> addrspace(1)* %in) { |
| David Blaikie | 79e6c74 | 2015-02-27 19:29:02 +0000 | [diff] [blame] | 15 | %b_ptr = getelementptr <2 x i32>, <2 x i32> addrspace(1)* %in, i32 1 |
| David Blaikie | a79ac14 | 2015-02-27 21:17:42 +0000 | [diff] [blame] | 16 | %a = load <2 x i32>, <2 x i32> addrspace(1) * %in |
| 17 | %b = load <2 x i32>, <2 x i32> addrspace(1) * %b_ptr |
| Aaron Watry | 00aeb11 | 2013-06-25 13:55:23 +0000 | [diff] [blame] | 18 | %result = and <2 x i32> %a, %b |
| 19 | store <2 x i32> %result, <2 x i32> addrspace(1)* %out |
| 20 | ret void |
| 21 | } |
| 22 | |
| Tom Stellard | 79243d9 | 2014-10-01 17:15:17 +0000 | [diff] [blame] | 23 | ; FUNC-LABEL: {{^}}test4: |
| Matt Arsenault | 284ae08 | 2014-06-09 08:36:53 +0000 | [diff] [blame] | 24 | ; EG: AND_INT {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} |
| 25 | ; EG: AND_INT {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} |
| 26 | ; EG: AND_INT {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} |
| 27 | ; EG: AND_INT {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} |
| Aaron Watry | 00aeb11 | 2013-06-25 13:55:23 +0000 | [diff] [blame] | 28 | |
| Alexander Timofeev | 982aee6 | 2017-07-04 17:32:00 +0000 | [diff] [blame] | 29 | |
| 30 | ; SI: s_and_b32 s{{[0-9]+, s[0-9]+, s[0-9]+}} |
| 31 | ; SI: s_and_b32 s{{[0-9]+, s[0-9]+, s[0-9]+}} |
| 32 | ; SI: s_and_b32 s{{[0-9]+, s[0-9]+, s[0-9]+}} |
| 33 | ; SI: s_and_b32 s{{[0-9]+, s[0-9]+, s[0-9]+}} |
| Aaron Watry | 00aeb11 | 2013-06-25 13:55:23 +0000 | [diff] [blame] | 34 | |
| Matt Arsenault | 3dbeefa | 2017-03-21 21:39:51 +0000 | [diff] [blame] | 35 | define amdgpu_kernel void @test4(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %in) { |
| David Blaikie | 79e6c74 | 2015-02-27 19:29:02 +0000 | [diff] [blame] | 36 | %b_ptr = getelementptr <4 x i32>, <4 x i32> addrspace(1)* %in, i32 1 |
| David Blaikie | a79ac14 | 2015-02-27 21:17:42 +0000 | [diff] [blame] | 37 | %a = load <4 x i32>, <4 x i32> addrspace(1) * %in |
| 38 | %b = load <4 x i32>, <4 x i32> addrspace(1) * %b_ptr |
| Tom Stellard | 75aadc2 | 2012-12-11 21:25:42 +0000 | [diff] [blame] | 39 | %result = and <4 x i32> %a, %b |
| 40 | store <4 x i32> %result, <4 x i32> addrspace(1)* %out |
| 41 | ret void |
| 42 | } |
| Matt Arsenault | 284ae08 | 2014-06-09 08:36:53 +0000 | [diff] [blame] | 43 | |
| Tom Stellard | 79243d9 | 2014-10-01 17:15:17 +0000 | [diff] [blame] | 44 | ; FUNC-LABEL: {{^}}s_and_i32: |
| Tom Stellard | 326d6ec | 2014-11-05 14:50:53 +0000 | [diff] [blame] | 45 | ; SI: s_and_b32 |
| Matt Arsenault | 3dbeefa | 2017-03-21 21:39:51 +0000 | [diff] [blame] | 46 | define amdgpu_kernel void @s_and_i32(i32 addrspace(1)* %out, i32 %a, i32 %b) { |
| Matt Arsenault | 284ae08 | 2014-06-09 08:36:53 +0000 | [diff] [blame] | 47 | %and = and i32 %a, %b |
| 48 | store i32 %and, i32 addrspace(1)* %out, align 4 |
| 49 | ret void |
| 50 | } |
| 51 | |
| Tom Stellard | 79243d9 | 2014-10-01 17:15:17 +0000 | [diff] [blame] | 52 | ; FUNC-LABEL: {{^}}s_and_constant_i32: |
| Tom Stellard | 326d6ec | 2014-11-05 14:50:53 +0000 | [diff] [blame] | 53 | ; SI: s_and_b32 s{{[0-9]+}}, s{{[0-9]+}}, 0x12d687 |
| Matt Arsenault | 3dbeefa | 2017-03-21 21:39:51 +0000 | [diff] [blame] | 54 | define amdgpu_kernel void @s_and_constant_i32(i32 addrspace(1)* %out, i32 %a) { |
| Matt Arsenault | 284ae08 | 2014-06-09 08:36:53 +0000 | [diff] [blame] | 55 | %and = and i32 %a, 1234567 |
| 56 | store i32 %and, i32 addrspace(1)* %out, align 4 |
| 57 | ret void |
| 58 | } |
| 59 | |
| Matt Arsenault | 28bd7d4 | 2015-09-25 18:21:47 +0000 | [diff] [blame] | 60 | ; FIXME: We should really duplicate the constant so that the SALU use |
| 61 | ; can fold into the s_and_b32 and the VALU one is materialized |
| 62 | ; directly without copying from the SGPR. |
| 63 | |
| 64 | ; Second use is a VGPR use of the constant. |
| 65 | ; FUNC-LABEL: {{^}}s_and_multi_use_constant_i32_0: |
| 66 | ; SI: s_mov_b32 [[K:s[0-9]+]], 0x12d687 |
| 67 | ; SI-DAG: s_and_b32 [[AND:s[0-9]+]], s{{[0-9]+}}, [[K]] |
| 68 | ; SI-DAG: v_mov_b32_e32 [[VK:v[0-9]+]], [[K]] |
| 69 | ; SI: buffer_store_dword [[VK]] |
| Matt Arsenault | 3dbeefa | 2017-03-21 21:39:51 +0000 | [diff] [blame] | 70 | define amdgpu_kernel void @s_and_multi_use_constant_i32_0(i32 addrspace(1)* %out, i32 %a, i32 %b) { |
| Matt Arsenault | 28bd7d4 | 2015-09-25 18:21:47 +0000 | [diff] [blame] | 71 | %and = and i32 %a, 1234567 |
| 72 | |
| 73 | ; Just to stop future replacement of copy to vgpr + store with VALU op. |
| 74 | %foo = add i32 %and, %b |
| 75 | store volatile i32 %foo, i32 addrspace(1)* %out |
| 76 | store volatile i32 1234567, i32 addrspace(1)* %out |
| 77 | ret void |
| 78 | } |
| 79 | |
| 80 | ; Second use is another SGPR use of the constant. |
| 81 | ; FUNC-LABEL: {{^}}s_and_multi_use_constant_i32_1: |
| 82 | ; SI: s_mov_b32 [[K:s[0-9]+]], 0x12d687 |
| 83 | ; SI: s_and_b32 [[AND:s[0-9]+]], s{{[0-9]+}}, [[K]] |
| 84 | ; SI: s_add_i32 |
| 85 | ; SI: s_add_i32 [[ADD:s[0-9]+]], s{{[0-9]+}}, [[K]] |
| Matt Arsenault | 70b9282 | 2017-11-12 23:53:44 +0000 | [diff] [blame] | 86 | ; SI: v_mov_b32_e32 [[VADD:v[0-9]+]], [[ADD]] |
| 87 | ; SI: buffer_store_dword [[VADD]] |
| Matt Arsenault | 3dbeefa | 2017-03-21 21:39:51 +0000 | [diff] [blame] | 88 | define amdgpu_kernel void @s_and_multi_use_constant_i32_1(i32 addrspace(1)* %out, i32 %a, i32 %b) { |
| Matt Arsenault | 28bd7d4 | 2015-09-25 18:21:47 +0000 | [diff] [blame] | 89 | %and = and i32 %a, 1234567 |
| 90 | %foo = add i32 %and, 1234567 |
| 91 | %bar = add i32 %foo, %b |
| 92 | store volatile i32 %bar, i32 addrspace(1)* %out |
| 93 | ret void |
| 94 | } |
| 95 | |
| 96 | ; FUNC-LABEL: {{^}}v_and_i32_vgpr_vgpr: |
| 97 | ; SI: v_and_b32_e32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} |
| Matt Arsenault | 3dbeefa | 2017-03-21 21:39:51 +0000 | [diff] [blame] | 98 | define amdgpu_kernel void @v_and_i32_vgpr_vgpr(i32 addrspace(1)* %out, i32 addrspace(1)* %aptr, i32 addrspace(1)* %bptr) { |
| Matt Arsenault | 28bd7d4 | 2015-09-25 18:21:47 +0000 | [diff] [blame] | 99 | %tid = call i32 @llvm.r600.read.tidig.x() #0 |
| 100 | %gep.a = getelementptr i32, i32 addrspace(1)* %aptr, i32 %tid |
| 101 | %gep.b = getelementptr i32, i32 addrspace(1)* %bptr, i32 %tid |
| 102 | %gep.out = getelementptr i32, i32 addrspace(1)* %out, i32 %tid |
| 103 | %a = load i32, i32 addrspace(1)* %gep.a |
| 104 | %b = load i32, i32 addrspace(1)* %gep.b |
| Matt Arsenault | 284ae08 | 2014-06-09 08:36:53 +0000 | [diff] [blame] | 105 | %and = and i32 %a, %b |
| Matt Arsenault | 28bd7d4 | 2015-09-25 18:21:47 +0000 | [diff] [blame] | 106 | store i32 %and, i32 addrspace(1)* %gep.out |
| 107 | ret void |
| 108 | } |
| 109 | |
| 110 | ; FUNC-LABEL: {{^}}v_and_i32_sgpr_vgpr: |
| 111 | ; SI-DAG: s_load_dword [[SA:s[0-9]+]] |
| 112 | ; SI-DAG: {{buffer|flat}}_load_dword [[VB:v[0-9]+]] |
| 113 | ; SI: v_and_b32_e32 v{{[0-9]+}}, [[SA]], [[VB]] |
| Matt Arsenault | 3dbeefa | 2017-03-21 21:39:51 +0000 | [diff] [blame] | 114 | define amdgpu_kernel void @v_and_i32_sgpr_vgpr(i32 addrspace(1)* %out, i32 %a, i32 addrspace(1)* %bptr) { |
| Matt Arsenault | 28bd7d4 | 2015-09-25 18:21:47 +0000 | [diff] [blame] | 115 | %tid = call i32 @llvm.r600.read.tidig.x() #0 |
| 116 | %gep.b = getelementptr i32, i32 addrspace(1)* %bptr, i32 %tid |
| 117 | %gep.out = getelementptr i32, i32 addrspace(1)* %out, i32 %tid |
| 118 | %b = load i32, i32 addrspace(1)* %gep.b |
| 119 | %and = and i32 %a, %b |
| 120 | store i32 %and, i32 addrspace(1)* %gep.out |
| 121 | ret void |
| 122 | } |
| 123 | |
| 124 | ; FUNC-LABEL: {{^}}v_and_i32_vgpr_sgpr: |
| 125 | ; SI-DAG: s_load_dword [[SA:s[0-9]+]] |
| 126 | ; SI-DAG: {{buffer|flat}}_load_dword [[VB:v[0-9]+]] |
| 127 | ; SI: v_and_b32_e32 v{{[0-9]+}}, [[SA]], [[VB]] |
| Matt Arsenault | 3dbeefa | 2017-03-21 21:39:51 +0000 | [diff] [blame] | 128 | define amdgpu_kernel void @v_and_i32_vgpr_sgpr(i32 addrspace(1)* %out, i32 addrspace(1)* %aptr, i32 %b) { |
| Matt Arsenault | 28bd7d4 | 2015-09-25 18:21:47 +0000 | [diff] [blame] | 129 | %tid = call i32 @llvm.r600.read.tidig.x() #0 |
| 130 | %gep.a = getelementptr i32, i32 addrspace(1)* %aptr, i32 %tid |
| 131 | %gep.out = getelementptr i32, i32 addrspace(1)* %out, i32 %tid |
| 132 | %a = load i32, i32 addrspace(1)* %gep.a |
| 133 | %and = and i32 %a, %b |
| 134 | store i32 %and, i32 addrspace(1)* %gep.out |
| Matt Arsenault | 284ae08 | 2014-06-09 08:36:53 +0000 | [diff] [blame] | 135 | ret void |
| 136 | } |
| 137 | |
| Matt Arsenault | 11a4d67 | 2015-02-13 19:05:03 +0000 | [diff] [blame] | 138 | ; FUNC-LABEL: {{^}}v_and_constant_i32 |
| 139 | ; SI: v_and_b32_e32 v{{[0-9]+}}, 0x12d687, v{{[0-9]+}} |
| Matt Arsenault | 3dbeefa | 2017-03-21 21:39:51 +0000 | [diff] [blame] | 140 | define amdgpu_kernel void @v_and_constant_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %aptr) { |
| Alexander Timofeev | 982aee6 | 2017-07-04 17:32:00 +0000 | [diff] [blame] | 141 | %tid = call i32 @llvm.r600.read.tidig.x() #0 |
| 142 | %gep = getelementptr i32, i32 addrspace(1)* %aptr, i32 %tid |
| 143 | %a = load i32, i32 addrspace(1)* %gep, align 4 |
| Matt Arsenault | 284ae08 | 2014-06-09 08:36:53 +0000 | [diff] [blame] | 144 | %and = and i32 %a, 1234567 |
| 145 | store i32 %and, i32 addrspace(1)* %out, align 4 |
| 146 | ret void |
| 147 | } |
| 148 | |
| Matt Arsenault | 11a4d67 | 2015-02-13 19:05:03 +0000 | [diff] [blame] | 149 | ; FUNC-LABEL: {{^}}v_and_inline_imm_64_i32 |
| 150 | ; SI: v_and_b32_e32 v{{[0-9]+}}, 64, v{{[0-9]+}} |
| Matt Arsenault | 3dbeefa | 2017-03-21 21:39:51 +0000 | [diff] [blame] | 151 | define amdgpu_kernel void @v_and_inline_imm_64_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %aptr) { |
| Alexander Timofeev | 982aee6 | 2017-07-04 17:32:00 +0000 | [diff] [blame] | 152 | %tid = call i32 @llvm.r600.read.tidig.x() #0 |
| 153 | %gep = getelementptr i32, i32 addrspace(1)* %aptr, i32 %tid |
| 154 | %a = load i32, i32 addrspace(1)* %gep, align 4 |
| Matt Arsenault | 11a4d67 | 2015-02-13 19:05:03 +0000 | [diff] [blame] | 155 | %and = and i32 %a, 64 |
| 156 | store i32 %and, i32 addrspace(1)* %out, align 4 |
| 157 | ret void |
| 158 | } |
| 159 | |
| 160 | ; FUNC-LABEL: {{^}}v_and_inline_imm_neg_16_i32 |
| 161 | ; SI: v_and_b32_e32 v{{[0-9]+}}, -16, v{{[0-9]+}} |
| Matt Arsenault | 3dbeefa | 2017-03-21 21:39:51 +0000 | [diff] [blame] | 162 | define amdgpu_kernel void @v_and_inline_imm_neg_16_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %aptr) { |
| Alexander Timofeev | 982aee6 | 2017-07-04 17:32:00 +0000 | [diff] [blame] | 163 | %tid = call i32 @llvm.r600.read.tidig.x() #0 |
| 164 | %gep = getelementptr i32, i32 addrspace(1)* %aptr, i32 %tid |
| 165 | %a = load i32, i32 addrspace(1)* %gep, align 4 |
| Matt Arsenault | 11a4d67 | 2015-02-13 19:05:03 +0000 | [diff] [blame] | 166 | %and = and i32 %a, -16 |
| 167 | store i32 %and, i32 addrspace(1)* %out, align 4 |
| 168 | ret void |
| 169 | } |
| 170 | |
| 171 | ; FUNC-LABEL: {{^}}s_and_i64 |
| Tom Stellard | 326d6ec | 2014-11-05 14:50:53 +0000 | [diff] [blame] | 172 | ; SI: s_and_b64 |
| Matt Arsenault | 3dbeefa | 2017-03-21 21:39:51 +0000 | [diff] [blame] | 173 | define amdgpu_kernel void @s_and_i64(i64 addrspace(1)* %out, i64 %a, i64 %b) { |
| Matt Arsenault | 284ae08 | 2014-06-09 08:36:53 +0000 | [diff] [blame] | 174 | %and = and i64 %a, %b |
| 175 | store i64 %and, i64 addrspace(1)* %out, align 8 |
| 176 | ret void |
| 177 | } |
| 178 | |
| Matt Arsenault | 0d89e84 | 2014-07-15 21:44:37 +0000 | [diff] [blame] | 179 | ; FIXME: Should use SGPRs |
| Tom Stellard | 79243d9 | 2014-10-01 17:15:17 +0000 | [diff] [blame] | 180 | ; FUNC-LABEL: {{^}}s_and_i1: |
| Tom Stellard | 326d6ec | 2014-11-05 14:50:53 +0000 | [diff] [blame] | 181 | ; SI: v_and_b32 |
| Matt Arsenault | 3dbeefa | 2017-03-21 21:39:51 +0000 | [diff] [blame] | 182 | define amdgpu_kernel void @s_and_i1(i1 addrspace(1)* %out, i1 %a, i1 %b) { |
| Matt Arsenault | 0d89e84 | 2014-07-15 21:44:37 +0000 | [diff] [blame] | 183 | %and = and i1 %a, %b |
| 184 | store i1 %and, i1 addrspace(1)* %out |
| 185 | ret void |
| 186 | } |
| 187 | |
| Matt Arsenault | 6e3a451 | 2016-01-18 22:01:13 +0000 | [diff] [blame] | 188 | ; FUNC-LABEL: {{^}}s_and_constant_i64: |
| 189 | ; SI-DAG: s_and_b32 s{{[0-9]+}}, s{{[0-9]+}}, 0x80000{{$}} |
| 190 | ; SI-DAG: s_and_b32 s{{[0-9]+}}, s{{[0-9]+}}, 0x80{{$}} |
| 191 | ; SI: buffer_store_dwordx2 |
| Matt Arsenault | 3dbeefa | 2017-03-21 21:39:51 +0000 | [diff] [blame] | 192 | define amdgpu_kernel void @s_and_constant_i64(i64 addrspace(1)* %out, i64 %a) { |
| Matt Arsenault | 6e3a451 | 2016-01-18 22:01:13 +0000 | [diff] [blame] | 193 | %and = and i64 %a, 549756338176 |
| Matt Arsenault | 284ae08 | 2014-06-09 08:36:53 +0000 | [diff] [blame] | 194 | store i64 %and, i64 addrspace(1)* %out, align 8 |
| 195 | ret void |
| 196 | } |
| 197 | |
| Matt Arsenault | 6e3a451 | 2016-01-18 22:01:13 +0000 | [diff] [blame] | 198 | ; FUNC-LABEL: {{^}}s_and_multi_use_constant_i64: |
| 199 | ; XSI-DAG: s_mov_b32 s[[KLO:[0-9]+]], 0x80000{{$}} |
| 200 | ; XSI-DAG: s_mov_b32 s[[KHI:[0-9]+]], 0x80{{$}} |
| 201 | ; XSI: s_and_b64 s{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, s{{\[}}[[KLO]]:[[KHI]]{{\]}} |
| Matt Arsenault | 3dbeefa | 2017-03-21 21:39:51 +0000 | [diff] [blame] | 202 | define amdgpu_kernel void @s_and_multi_use_constant_i64(i64 addrspace(1)* %out, i64 %a, i64 %b) { |
| Matt Arsenault | 6e3a451 | 2016-01-18 22:01:13 +0000 | [diff] [blame] | 203 | %and0 = and i64 %a, 549756338176 |
| 204 | %and1 = and i64 %b, 549756338176 |
| 205 | store volatile i64 %and0, i64 addrspace(1)* %out |
| 206 | store volatile i64 %and1, i64 addrspace(1)* %out |
| 207 | ret void |
| 208 | } |
| 209 | |
| 210 | ; FUNC-LABEL: {{^}}s_and_32_bit_constant_i64: |
| 211 | ; SI: s_load_dwordx2 |
| 212 | ; SI-NOT: and |
| 213 | ; SI: s_and_b32 s{{[0-9]+}}, s{{[0-9]+}}, 0x12d687{{$}} |
| 214 | ; SI-NOT: and |
| 215 | ; SI: buffer_store_dwordx2 |
| Matt Arsenault | 3dbeefa | 2017-03-21 21:39:51 +0000 | [diff] [blame] | 216 | define amdgpu_kernel void @s_and_32_bit_constant_i64(i64 addrspace(1)* %out, i64 %a) { |
| Matt Arsenault | 6e3a451 | 2016-01-18 22:01:13 +0000 | [diff] [blame] | 217 | %and = and i64 %a, 1234567 |
| 218 | store i64 %and, i64 addrspace(1)* %out, align 8 |
| 219 | ret void |
| 220 | } |
| 221 | |
| 222 | ; FUNC-LABEL: {{^}}s_and_multi_use_inline_imm_i64: |
| Stanislav Mekhanoshin | d4ae470 | 2017-09-19 20:54:38 +0000 | [diff] [blame] | 223 | ; SI: s_load_dwordx2 |
| Matt Arsenault | efa3fe1 | 2016-04-22 22:48:38 +0000 | [diff] [blame] | 224 | ; SI: s_load_dword [[A:s[0-9]+]] |
| 225 | ; SI: s_load_dword [[B:s[0-9]+]] |
| Matt Arsenault | 6e3a451 | 2016-01-18 22:01:13 +0000 | [diff] [blame] | 226 | ; SI: s_load_dwordx2 |
| 227 | ; SI-NOT: and |
| Matt Arsenault | efa3fe1 | 2016-04-22 22:48:38 +0000 | [diff] [blame] | 228 | ; SI: s_lshl_b32 [[A]], [[A]], 1 |
| 229 | ; SI: s_lshl_b32 [[B]], [[B]], 1 |
| 230 | ; SI: s_and_b32 s{{[0-9]+}}, [[A]], 62 |
| 231 | ; SI: s_and_b32 s{{[0-9]+}}, [[B]], 62 |
| Matt Arsenault | 6e3a451 | 2016-01-18 22:01:13 +0000 | [diff] [blame] | 232 | ; SI-NOT: and |
| 233 | ; SI: buffer_store_dwordx2 |
| Matt Arsenault | 3dbeefa | 2017-03-21 21:39:51 +0000 | [diff] [blame] | 234 | define amdgpu_kernel void @s_and_multi_use_inline_imm_i64(i64 addrspace(1)* %out, i64 %a, i64 %b, i64 %c) { |
| Matt Arsenault | 6e3a451 | 2016-01-18 22:01:13 +0000 | [diff] [blame] | 235 | %shl.a = shl i64 %a, 1 |
| 236 | %shl.b = shl i64 %b, 1 |
| 237 | %and0 = and i64 %shl.a, 62 |
| 238 | %and1 = and i64 %shl.b, 62 |
| 239 | %add0 = add i64 %and0, %c |
| 240 | %add1 = add i64 %and1, %c |
| 241 | store volatile i64 %add0, i64 addrspace(1)* %out |
| 242 | store volatile i64 %add1, i64 addrspace(1)* %out |
| 243 | ret void |
| 244 | } |
| 245 | |
| Tom Stellard | 79243d9 | 2014-10-01 17:15:17 +0000 | [diff] [blame] | 246 | ; FUNC-LABEL: {{^}}v_and_i64: |
| Tom Stellard | 326d6ec | 2014-11-05 14:50:53 +0000 | [diff] [blame] | 247 | ; SI: v_and_b32 |
| 248 | ; SI: v_and_b32 |
| Matt Arsenault | 3dbeefa | 2017-03-21 21:39:51 +0000 | [diff] [blame] | 249 | define amdgpu_kernel void @v_and_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %aptr, i64 addrspace(1)* %bptr) { |
| Alexander Timofeev | 982aee6 | 2017-07-04 17:32:00 +0000 | [diff] [blame] | 250 | %tid = call i32 @llvm.r600.read.tidig.x() #0 |
| 251 | %gep.a = getelementptr i64, i64 addrspace(1)* %aptr, i32 %tid |
| 252 | %a = load i64, i64 addrspace(1)* %gep.a, align 8 |
| 253 | %gep.b = getelementptr i64, i64 addrspace(1)* %bptr, i32 %tid |
| 254 | %b = load i64, i64 addrspace(1)* %gep.b, align 8 |
| Matt Arsenault | 284ae08 | 2014-06-09 08:36:53 +0000 | [diff] [blame] | 255 | %and = and i64 %a, %b |
| 256 | store i64 %and, i64 addrspace(1)* %out, align 8 |
| 257 | ret void |
| 258 | } |
| 259 | |
| Tom Stellard | 79243d9 | 2014-10-01 17:15:17 +0000 | [diff] [blame] | 260 | ; FUNC-LABEL: {{^}}v_and_constant_i64: |
| Matt Arsenault | 6e3a451 | 2016-01-18 22:01:13 +0000 | [diff] [blame] | 261 | ; SI-DAG: v_and_b32_e32 {{v[0-9]+}}, 0xab19b207, {{v[0-9]+}} |
| 262 | ; SI-DAG: v_and_b32_e32 {{v[0-9]+}}, 0x11e, {{v[0-9]+}} |
| Matt Arsenault | 68d9386 | 2015-09-24 08:36:14 +0000 | [diff] [blame] | 263 | ; SI: buffer_store_dwordx2 |
| Matt Arsenault | 3dbeefa | 2017-03-21 21:39:51 +0000 | [diff] [blame] | 264 | define amdgpu_kernel void @v_and_constant_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %aptr) { |
| Alexander Timofeev | 982aee6 | 2017-07-04 17:32:00 +0000 | [diff] [blame] | 265 | %tid = call i32 @llvm.r600.read.tidig.x() #0 |
| 266 | %gep.a = getelementptr i64, i64 addrspace(1)* %aptr, i32 %tid |
| 267 | %a = load i64, i64 addrspace(1)* %gep.a, align 8 |
| Matt Arsenault | 68d9386 | 2015-09-24 08:36:14 +0000 | [diff] [blame] | 268 | %and = and i64 %a, 1231231234567 |
| 269 | store i64 %and, i64 addrspace(1)* %out, align 8 |
| 270 | ret void |
| 271 | } |
| 272 | |
| Matt Arsenault | 6e3a451 | 2016-01-18 22:01:13 +0000 | [diff] [blame] | 273 | ; FUNC-LABEL: {{^}}v_and_multi_use_constant_i64: |
| Tom Stellard | 0d23ebe | 2016-08-29 19:42:52 +0000 | [diff] [blame] | 274 | ; SI-DAG: buffer_load_dwordx2 v{{\[}}[[LO0:[0-9]+]]:[[HI0:[0-9]+]]{{\]}} |
| 275 | ; SI-DAG: buffer_load_dwordx2 v{{\[}}[[LO1:[0-9]+]]:[[HI1:[0-9]+]]{{\]}} |
| Matt Arsenault | 6e3a451 | 2016-01-18 22:01:13 +0000 | [diff] [blame] | 276 | ; SI-DAG: s_movk_i32 [[KHI:s[0-9]+]], 0x11e{{$}} |
| Tom Stellard | 0d23ebe | 2016-08-29 19:42:52 +0000 | [diff] [blame] | 277 | ; SI-DAG: s_mov_b32 [[KLO:s[0-9]+]], 0xab19b207{{$}} |
| Matt Arsenault | 6e3a451 | 2016-01-18 22:01:13 +0000 | [diff] [blame] | 278 | ; SI-DAG: v_and_b32_e32 {{v[0-9]+}}, [[KLO]], v[[LO0]] |
| 279 | ; SI-DAG: v_and_b32_e32 {{v[0-9]+}}, [[KHI]], v[[HI0]] |
| 280 | ; SI-DAG: v_and_b32_e32 {{v[0-9]+}}, [[KLO]], v[[LO1]] |
| 281 | ; SI-DAG: v_and_b32_e32 {{v[0-9]+}}, [[KHI]], v[[HI1]] |
| 282 | ; SI: buffer_store_dwordx2 |
| 283 | ; SI: buffer_store_dwordx2 |
| Matt Arsenault | 3dbeefa | 2017-03-21 21:39:51 +0000 | [diff] [blame] | 284 | define amdgpu_kernel void @v_and_multi_use_constant_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %aptr) { |
| Matt Arsenault | 6e3a451 | 2016-01-18 22:01:13 +0000 | [diff] [blame] | 285 | %a = load volatile i64, i64 addrspace(1)* %aptr |
| 286 | %b = load volatile i64, i64 addrspace(1)* %aptr |
| 287 | %and0 = and i64 %a, 1231231234567 |
| 288 | %and1 = and i64 %b, 1231231234567 |
| 289 | store volatile i64 %and0, i64 addrspace(1)* %out |
| 290 | store volatile i64 %and1, i64 addrspace(1)* %out |
| 291 | ret void |
| 292 | } |
| 293 | |
| 294 | ; FUNC-LABEL: {{^}}v_and_multi_use_inline_imm_i64: |
| 295 | ; SI: buffer_load_dwordx2 v{{\[}}[[LO0:[0-9]+]]:[[HI0:[0-9]+]]{{\]}} |
| 296 | ; SI-NOT: and |
| 297 | ; SI: buffer_load_dwordx2 v{{\[}}[[LO1:[0-9]+]]:[[HI1:[0-9]+]]{{\]}} |
| 298 | ; SI-NOT: and |
| 299 | ; SI: v_and_b32_e32 v[[RESLO0:[0-9]+]], 63, v[[LO0]] |
| 300 | ; SI: v_and_b32_e32 v[[RESLO1:[0-9]+]], 63, v[[LO1]] |
| 301 | ; SI-NOT: and |
| Tom Stellard | 0d23ebe | 2016-08-29 19:42:52 +0000 | [diff] [blame] | 302 | ; SI: buffer_store_dwordx2 v{{\[}}[[RESLO0]] |
| Tom Stellard | 0bc954e | 2016-03-30 16:35:09 +0000 | [diff] [blame] | 303 | ; SI: buffer_store_dwordx2 v{{\[}}[[RESLO1]] |
| Matt Arsenault | 3dbeefa | 2017-03-21 21:39:51 +0000 | [diff] [blame] | 304 | define amdgpu_kernel void @v_and_multi_use_inline_imm_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %aptr) { |
| Matt Arsenault | 6e3a451 | 2016-01-18 22:01:13 +0000 | [diff] [blame] | 305 | %a = load volatile i64, i64 addrspace(1)* %aptr |
| 306 | %b = load volatile i64, i64 addrspace(1)* %aptr |
| 307 | %and0 = and i64 %a, 63 |
| 308 | %and1 = and i64 %b, 63 |
| 309 | store volatile i64 %and0, i64 addrspace(1)* %out |
| 310 | store volatile i64 %and1, i64 addrspace(1)* %out |
| 311 | ret void |
| 312 | } |
| 313 | |
| Matt Arsenault | 68d9386 | 2015-09-24 08:36:14 +0000 | [diff] [blame] | 314 | ; FUNC-LABEL: {{^}}v_and_i64_32_bit_constant: |
| Alexander Timofeev | 982aee6 | 2017-07-04 17:32:00 +0000 | [diff] [blame] | 315 | ; SI: {{buffer|flat}}_load_dword [[VAL:v[0-9]+]] |
| Matt Arsenault | 6e3a451 | 2016-01-18 22:01:13 +0000 | [diff] [blame] | 316 | ; SI-NOT: and |
| 317 | ; SI: v_and_b32_e32 {{v[0-9]+}}, 0x12d687, [[VAL]] |
| 318 | ; SI-NOT: and |
| 319 | ; SI: buffer_store_dwordx2 |
| Matt Arsenault | 3dbeefa | 2017-03-21 21:39:51 +0000 | [diff] [blame] | 320 | define amdgpu_kernel void @v_and_i64_32_bit_constant(i64 addrspace(1)* %out, i64 addrspace(1)* %aptr) { |
| Alexander Timofeev | 982aee6 | 2017-07-04 17:32:00 +0000 | [diff] [blame] | 321 | %tid = call i32 @llvm.r600.read.tidig.x() #0 |
| 322 | %gep.a = getelementptr i64, i64 addrspace(1)* %aptr, i32 %tid |
| 323 | %a = load i64, i64 addrspace(1)* %gep.a, align 8 |
| Matt Arsenault | 284ae08 | 2014-06-09 08:36:53 +0000 | [diff] [blame] | 324 | %and = and i64 %a, 1234567 |
| 325 | store i64 %and, i64 addrspace(1)* %out, align 8 |
| 326 | ret void |
| 327 | } |
| Matt Arsenault | 49dd428 | 2014-09-15 17:15:02 +0000 | [diff] [blame] | 328 | |
| Tom Stellard | 79243d9 | 2014-10-01 17:15:17 +0000 | [diff] [blame] | 329 | ; FUNC-LABEL: {{^}}v_and_inline_imm_i64: |
| Alexander Timofeev | 982aee6 | 2017-07-04 17:32:00 +0000 | [diff] [blame] | 330 | ; SI: {{buffer|flat}}_load_dword v{{[0-9]+}} |
| Matt Arsenault | 6e3a451 | 2016-01-18 22:01:13 +0000 | [diff] [blame] | 331 | ; SI-NOT: and |
| Tom Stellard | 326d6ec | 2014-11-05 14:50:53 +0000 | [diff] [blame] | 332 | ; SI: v_and_b32_e32 {{v[0-9]+}}, 64, {{v[0-9]+}} |
| Matt Arsenault | 6e3a451 | 2016-01-18 22:01:13 +0000 | [diff] [blame] | 333 | ; SI-NOT: and |
| 334 | ; SI: buffer_store_dwordx2 |
| Matt Arsenault | 3dbeefa | 2017-03-21 21:39:51 +0000 | [diff] [blame] | 335 | define amdgpu_kernel void @v_and_inline_imm_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %aptr) { |
| Alexander Timofeev | 982aee6 | 2017-07-04 17:32:00 +0000 | [diff] [blame] | 336 | %tid = call i32 @llvm.r600.read.tidig.x() #0 |
| 337 | %gep.a = getelementptr i64, i64 addrspace(1)* %aptr, i32 %tid |
| 338 | %a = load i64, i64 addrspace(1)* %gep.a, align 8 |
| Matt Arsenault | 49dd428 | 2014-09-15 17:15:02 +0000 | [diff] [blame] | 339 | %and = and i64 %a, 64 |
| 340 | store i64 %and, i64 addrspace(1)* %out, align 8 |
| 341 | ret void |
| 342 | } |
| 343 | |
| Matt Arsenault | fa5f767 | 2016-09-14 15:19:03 +0000 | [diff] [blame] | 344 | ; FIXME: Should be able to reduce load width |
| 345 | ; FUNC-LABEL: {{^}}v_and_inline_neg_imm_i64: |
| Alexander Timofeev | 982aee6 | 2017-07-04 17:32:00 +0000 | [diff] [blame] | 346 | ; SI: {{buffer|flat}}_load_dwordx2 v{{\[}}[[VAL_LO:[0-9]+]]:[[VAL_HI:[0-9]+]]{{\]}} |
| Matt Arsenault | fa5f767 | 2016-09-14 15:19:03 +0000 | [diff] [blame] | 347 | ; SI-NOT: and |
| 348 | ; SI: v_and_b32_e32 v[[VAL_LO]], -8, v[[VAL_LO]] |
| 349 | ; SI-NOT: and |
| 350 | ; SI: buffer_store_dwordx2 v{{\[}}[[VAL_LO]]:[[VAL_HI]]{{\]}} |
| Matt Arsenault | 3dbeefa | 2017-03-21 21:39:51 +0000 | [diff] [blame] | 351 | define amdgpu_kernel void @v_and_inline_neg_imm_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %aptr) { |
| Alexander Timofeev | 982aee6 | 2017-07-04 17:32:00 +0000 | [diff] [blame] | 352 | %tid = call i32 @llvm.r600.read.tidig.x() #0 |
| 353 | %gep.a = getelementptr i64, i64 addrspace(1)* %aptr, i32 %tid |
| 354 | %a = load i64, i64 addrspace(1)* %gep.a, align 8 |
| Matt Arsenault | fa5f767 | 2016-09-14 15:19:03 +0000 | [diff] [blame] | 355 | %and = and i64 %a, -8 |
| 356 | store i64 %and, i64 addrspace(1)* %out, align 8 |
| 357 | ret void |
| 358 | } |
| 359 | |
| Matt Arsenault | 11a4d67 | 2015-02-13 19:05:03 +0000 | [diff] [blame] | 360 | ; FUNC-LABEL: {{^}}s_and_inline_imm_64_i64 |
| Matt Arsenault | 6e3a451 | 2016-01-18 22:01:13 +0000 | [diff] [blame] | 361 | ; SI: s_load_dword |
| 362 | ; SI-NOT: and |
| 363 | ; SI: s_and_b32 s{{[0-9]+}}, s{{[0-9]+}}, 64 |
| 364 | ; SI-NOT: and |
| 365 | ; SI: buffer_store_dword |
| Matt Arsenault | 3dbeefa | 2017-03-21 21:39:51 +0000 | [diff] [blame] | 366 | define amdgpu_kernel void @s_and_inline_imm_64_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %aptr, i64 %a) { |
| Matt Arsenault | 49dd428 | 2014-09-15 17:15:02 +0000 | [diff] [blame] | 367 | %and = and i64 %a, 64 |
| 368 | store i64 %and, i64 addrspace(1)* %out, align 8 |
| 369 | ret void |
| 370 | } |
| Matt Arsenault | 11a4d67 | 2015-02-13 19:05:03 +0000 | [diff] [blame] | 371 | |
| Matt Arsenault | 6e3a451 | 2016-01-18 22:01:13 +0000 | [diff] [blame] | 372 | ; FUNC-LABEL: {{^}}s_and_inline_imm_64_i64_noshrink: |
| Matt Arsenault | efa3fe1 | 2016-04-22 22:48:38 +0000 | [diff] [blame] | 373 | ; SI: s_load_dword [[A:s[0-9]+]] |
| 374 | ; SI: s_lshl_b32 [[A]], [[A]], 1{{$}} |
| Matt Arsenault | 6e3a451 | 2016-01-18 22:01:13 +0000 | [diff] [blame] | 375 | ; SI-NOT: and |
| Matt Arsenault | efa3fe1 | 2016-04-22 22:48:38 +0000 | [diff] [blame] | 376 | ; SI: s_and_b32 s{{[0-9]+}}, [[A]], 64 |
| Matt Arsenault | 6e3a451 | 2016-01-18 22:01:13 +0000 | [diff] [blame] | 377 | ; SI-NOT: and |
| 378 | ; SI: s_add_u32 |
| 379 | ; SI-NEXT: s_addc_u32 |
| Matt Arsenault | 3dbeefa | 2017-03-21 21:39:51 +0000 | [diff] [blame] | 380 | define amdgpu_kernel void @s_and_inline_imm_64_i64_noshrink(i64 addrspace(1)* %out, i64 addrspace(1)* %aptr, i64 %a, i64 %b) { |
| Matt Arsenault | 6e3a451 | 2016-01-18 22:01:13 +0000 | [diff] [blame] | 381 | %shl = shl i64 %a, 1 |
| 382 | %and = and i64 %shl, 64 |
| 383 | %add = add i64 %and, %b |
| 384 | store i64 %add, i64 addrspace(1)* %out, align 8 |
| 385 | ret void |
| 386 | } |
| 387 | |
| Matt Arsenault | 11a4d67 | 2015-02-13 19:05:03 +0000 | [diff] [blame] | 388 | ; FUNC-LABEL: {{^}}s_and_inline_imm_1_i64 |
| Matt Arsenault | 6e3a451 | 2016-01-18 22:01:13 +0000 | [diff] [blame] | 389 | ; SI: s_load_dwordx2 |
| 390 | ; SI-NOT: and |
| 391 | ; SI: s_and_b32 s{{[0-9]+}}, s{{[0-9]+}}, 1 |
| 392 | ; SI-NOT: and |
| 393 | ; SI: buffer_store_dwordx2 |
| Matt Arsenault | 3dbeefa | 2017-03-21 21:39:51 +0000 | [diff] [blame] | 394 | define amdgpu_kernel void @s_and_inline_imm_1_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %aptr, i64 %a) { |
| Matt Arsenault | 11a4d67 | 2015-02-13 19:05:03 +0000 | [diff] [blame] | 395 | %and = and i64 %a, 1 |
| 396 | store i64 %and, i64 addrspace(1)* %out, align 8 |
| 397 | ret void |
| 398 | } |
| 399 | |
| 400 | ; FUNC-LABEL: {{^}}s_and_inline_imm_1.0_i64 |
| Matt Arsenault | 6e3a451 | 2016-01-18 22:01:13 +0000 | [diff] [blame] | 401 | ; XSI: s_and_b64 s{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 1.0 |
| 402 | |
| 403 | ; SI: s_load_dwordx2 |
| 404 | ; SI: s_load_dwordx2 |
| 405 | ; SI-NOT: and |
| 406 | ; SI: s_and_b32 {{s[0-9]+}}, {{s[0-9]+}}, 0x3ff00000 |
| 407 | ; SI-NOT: and |
| 408 | ; SI: buffer_store_dwordx2 |
| Matt Arsenault | 3dbeefa | 2017-03-21 21:39:51 +0000 | [diff] [blame] | 409 | define amdgpu_kernel void @s_and_inline_imm_1.0_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %aptr, i64 %a) { |
| Matt Arsenault | 11a4d67 | 2015-02-13 19:05:03 +0000 | [diff] [blame] | 410 | %and = and i64 %a, 4607182418800017408 |
| 411 | store i64 %and, i64 addrspace(1)* %out, align 8 |
| 412 | ret void |
| 413 | } |
| 414 | |
| 415 | ; FUNC-LABEL: {{^}}s_and_inline_imm_neg_1.0_i64 |
| Matt Arsenault | 6e3a451 | 2016-01-18 22:01:13 +0000 | [diff] [blame] | 416 | ; XSI: s_and_b64 s{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, -1.0 |
| 417 | |
| 418 | ; SI: s_load_dwordx2 |
| 419 | ; SI: s_load_dwordx2 |
| 420 | ; SI-NOT: and |
| 421 | ; SI: s_and_b32 {{s[0-9]+}}, {{s[0-9]+}}, 0xbff00000 |
| 422 | ; SI-NOT: and |
| 423 | ; SI: buffer_store_dwordx2 |
| Matt Arsenault | 3dbeefa | 2017-03-21 21:39:51 +0000 | [diff] [blame] | 424 | define amdgpu_kernel void @s_and_inline_imm_neg_1.0_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %aptr, i64 %a) { |
| Matt Arsenault | 11a4d67 | 2015-02-13 19:05:03 +0000 | [diff] [blame] | 425 | %and = and i64 %a, 13830554455654793216 |
| 426 | store i64 %and, i64 addrspace(1)* %out, align 8 |
| 427 | ret void |
| 428 | } |
| 429 | |
| 430 | ; FUNC-LABEL: {{^}}s_and_inline_imm_0.5_i64 |
| Matt Arsenault | 6e3a451 | 2016-01-18 22:01:13 +0000 | [diff] [blame] | 431 | ; XSI: s_and_b64 s{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0.5 |
| 432 | |
| 433 | ; SI: s_load_dwordx2 |
| 434 | ; SI: s_load_dwordx2 |
| 435 | ; SI-NOT: and |
| 436 | ; SI: s_and_b32 {{s[0-9]+}}, {{s[0-9]+}}, 0x3fe00000 |
| 437 | ; SI-NOT: and |
| 438 | ; SI: buffer_store_dwordx2 |
| Matt Arsenault | 3dbeefa | 2017-03-21 21:39:51 +0000 | [diff] [blame] | 439 | define amdgpu_kernel void @s_and_inline_imm_0.5_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %aptr, i64 %a) { |
| Matt Arsenault | 11a4d67 | 2015-02-13 19:05:03 +0000 | [diff] [blame] | 440 | %and = and i64 %a, 4602678819172646912 |
| 441 | store i64 %and, i64 addrspace(1)* %out, align 8 |
| 442 | ret void |
| 443 | } |
| 444 | |
| Matt Arsenault | 6e3a451 | 2016-01-18 22:01:13 +0000 | [diff] [blame] | 445 | ; FUNC-LABEL: {{^}}s_and_inline_imm_neg_0.5_i64: |
| 446 | ; XSI: s_and_b64 s{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, -0.5 |
| 447 | |
| 448 | ; SI: s_load_dwordx2 |
| 449 | ; SI: s_load_dwordx2 |
| 450 | ; SI-NOT: and |
| 451 | ; SI: s_and_b32 {{s[0-9]+}}, {{s[0-9]+}}, 0xbfe00000 |
| 452 | ; SI-NOT: and |
| 453 | ; SI: buffer_store_dwordx2 |
| Matt Arsenault | 3dbeefa | 2017-03-21 21:39:51 +0000 | [diff] [blame] | 454 | define amdgpu_kernel void @s_and_inline_imm_neg_0.5_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %aptr, i64 %a) { |
| Matt Arsenault | 11a4d67 | 2015-02-13 19:05:03 +0000 | [diff] [blame] | 455 | %and = and i64 %a, 13826050856027422720 |
| 456 | store i64 %and, i64 addrspace(1)* %out, align 8 |
| 457 | ret void |
| 458 | } |
| 459 | |
| Matt Arsenault | 6e3a451 | 2016-01-18 22:01:13 +0000 | [diff] [blame] | 460 | ; FUNC-LABEL: {{^}}s_and_inline_imm_2.0_i64: |
| 461 | ; SI: s_load_dwordx2 |
| 462 | ; SI: s_load_dwordx2 |
| 463 | ; SI-NOT: and |
| 464 | ; SI: s_and_b32 {{s[0-9]+}}, {{s[0-9]+}}, 2.0 |
| 465 | ; SI-NOT: and |
| 466 | ; SI: buffer_store_dwordx2 |
| Matt Arsenault | 3dbeefa | 2017-03-21 21:39:51 +0000 | [diff] [blame] | 467 | define amdgpu_kernel void @s_and_inline_imm_2.0_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %aptr, i64 %a) { |
| Matt Arsenault | 11a4d67 | 2015-02-13 19:05:03 +0000 | [diff] [blame] | 468 | %and = and i64 %a, 4611686018427387904 |
| 469 | store i64 %and, i64 addrspace(1)* %out, align 8 |
| 470 | ret void |
| 471 | } |
| 472 | |
| Matt Arsenault | 6e3a451 | 2016-01-18 22:01:13 +0000 | [diff] [blame] | 473 | ; FUNC-LABEL: {{^}}s_and_inline_imm_neg_2.0_i64: |
| 474 | ; SI: s_load_dwordx2 |
| 475 | ; SI: s_load_dwordx2 |
| 476 | ; SI-NOT: and |
| 477 | ; SI: s_and_b32 {{s[0-9]+}}, {{s[0-9]+}}, -2.0 |
| 478 | ; SI-NOT: and |
| 479 | ; SI: buffer_store_dwordx2 |
| Matt Arsenault | 3dbeefa | 2017-03-21 21:39:51 +0000 | [diff] [blame] | 480 | define amdgpu_kernel void @s_and_inline_imm_neg_2.0_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %aptr, i64 %a) { |
| Matt Arsenault | 11a4d67 | 2015-02-13 19:05:03 +0000 | [diff] [blame] | 481 | %and = and i64 %a, 13835058055282163712 |
| 482 | store i64 %and, i64 addrspace(1)* %out, align 8 |
| 483 | ret void |
| 484 | } |
| 485 | |
| Matt Arsenault | 6e3a451 | 2016-01-18 22:01:13 +0000 | [diff] [blame] | 486 | ; FUNC-LABEL: {{^}}s_and_inline_imm_4.0_i64: |
| 487 | ; XSI: s_and_b64 s{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 4.0 |
| 488 | |
| 489 | ; SI: s_load_dwordx2 |
| 490 | ; SI: s_load_dwordx2 |
| 491 | ; SI-NOT: and |
| 492 | ; SI: s_and_b32 {{s[0-9]+}}, {{s[0-9]+}}, 0x40100000 |
| 493 | ; SI-NOT: and |
| 494 | ; SI: buffer_store_dwordx2 |
| Matt Arsenault | 3dbeefa | 2017-03-21 21:39:51 +0000 | [diff] [blame] | 495 | define amdgpu_kernel void @s_and_inline_imm_4.0_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %aptr, i64 %a) { |
| Matt Arsenault | 11a4d67 | 2015-02-13 19:05:03 +0000 | [diff] [blame] | 496 | %and = and i64 %a, 4616189618054758400 |
| 497 | store i64 %and, i64 addrspace(1)* %out, align 8 |
| 498 | ret void |
| 499 | } |
| 500 | |
| Matt Arsenault | 6e3a451 | 2016-01-18 22:01:13 +0000 | [diff] [blame] | 501 | ; FUNC-LABEL: {{^}}s_and_inline_imm_neg_4.0_i64: |
| 502 | ; XSI: s_and_b64 s{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, -4.0 |
| 503 | |
| 504 | ; SI: s_load_dwordx2 |
| 505 | ; SI: s_load_dwordx2 |
| 506 | ; SI-NOT: and |
| 507 | ; SI: s_and_b32 {{s[0-9]+}}, {{s[0-9]+}}, 0xc0100000 |
| 508 | ; SI-NOT: and |
| 509 | ; SI: buffer_store_dwordx2 |
| Matt Arsenault | 3dbeefa | 2017-03-21 21:39:51 +0000 | [diff] [blame] | 510 | define amdgpu_kernel void @s_and_inline_imm_neg_4.0_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %aptr, i64 %a) { |
| Matt Arsenault | 11a4d67 | 2015-02-13 19:05:03 +0000 | [diff] [blame] | 511 | %and = and i64 %a, 13839561654909534208 |
| 512 | store i64 %and, i64 addrspace(1)* %out, align 8 |
| 513 | ret void |
| 514 | } |
| 515 | |
| 516 | |
| 517 | ; Test with the 64-bit integer bitpattern for a 32-bit float in the |
| 518 | ; low 32-bits, which is not a valid 64-bit inline immmediate. |
| 519 | |
| Matt Arsenault | 6e3a451 | 2016-01-18 22:01:13 +0000 | [diff] [blame] | 520 | ; FUNC-LABEL: {{^}}s_and_inline_imm_f32_4.0_i64: |
| Matthias Braun | 6ad3d05 | 2016-06-25 00:23:00 +0000 | [diff] [blame] | 521 | ; SI: s_load_dwordx2 |
| Tom Stellard | 0d23ebe | 2016-08-29 19:42:52 +0000 | [diff] [blame] | 522 | ; SI: s_load_dword s |
| Matt Arsenault | 6e3a451 | 2016-01-18 22:01:13 +0000 | [diff] [blame] | 523 | ; SI-NOT: and |
| 524 | ; SI: s_and_b32 s[[K_HI:[0-9]+]], s{{[0-9]+}}, 4.0 |
| 525 | ; SI-NOT: and |
| 526 | ; SI: buffer_store_dwordx2 |
| Matt Arsenault | 3dbeefa | 2017-03-21 21:39:51 +0000 | [diff] [blame] | 527 | define amdgpu_kernel void @s_and_inline_imm_f32_4.0_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %aptr, i64 %a) { |
| Matt Arsenault | 11a4d67 | 2015-02-13 19:05:03 +0000 | [diff] [blame] | 528 | %and = and i64 %a, 1082130432 |
| 529 | store i64 %and, i64 addrspace(1)* %out, align 8 |
| 530 | ret void |
| 531 | } |
| 532 | |
| Matt Arsenault | 6e3a451 | 2016-01-18 22:01:13 +0000 | [diff] [blame] | 533 | ; FUNC-LABEL: {{^}}s_and_inline_imm_f32_neg_4.0_i64: |
| 534 | ; SI: s_load_dwordx2 |
| 535 | ; SI: s_load_dwordx2 |
| 536 | ; SI-NOT: and |
| 537 | ; SI: s_and_b32 s[[K_HI:[0-9]+]], s{{[0-9]+}}, -4.0 |
| 538 | ; SI-NOT: and |
| 539 | ; SI: buffer_store_dwordx2 |
| Matt Arsenault | 3dbeefa | 2017-03-21 21:39:51 +0000 | [diff] [blame] | 540 | define amdgpu_kernel void @s_and_inline_imm_f32_neg_4.0_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %aptr, i64 %a) { |
| Matt Arsenault | 11a4d67 | 2015-02-13 19:05:03 +0000 | [diff] [blame] | 541 | %and = and i64 %a, -1065353216 |
| 542 | store i64 %and, i64 addrspace(1)* %out, align 8 |
| 543 | ret void |
| 544 | } |
| 545 | |
| 546 | ; Shift into upper 32-bits |
| Matt Arsenault | 6e3a451 | 2016-01-18 22:01:13 +0000 | [diff] [blame] | 547 | ; SI: s_load_dwordx2 |
| 548 | ; SI: s_load_dwordx2 |
| 549 | ; SI-NOT: and |
| 550 | ; SI: s_and_b32 s[[K_HI:[0-9]+]], s{{[0-9]+}}, 4.0 |
| 551 | ; SI-NOT: and |
| 552 | ; SI: buffer_store_dwordx2 |
| Matt Arsenault | 3dbeefa | 2017-03-21 21:39:51 +0000 | [diff] [blame] | 553 | define amdgpu_kernel void @s_and_inline_high_imm_f32_4.0_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %aptr, i64 %a) { |
| Matt Arsenault | 11a4d67 | 2015-02-13 19:05:03 +0000 | [diff] [blame] | 554 | %and = and i64 %a, 4647714815446351872 |
| 555 | store i64 %and, i64 addrspace(1)* %out, align 8 |
| 556 | ret void |
| 557 | } |
| 558 | |
| Matt Arsenault | 6e3a451 | 2016-01-18 22:01:13 +0000 | [diff] [blame] | 559 | ; FUNC-LABEL: {{^}}s_and_inline_high_imm_f32_neg_4.0_i64: |
| 560 | ; SI: s_load_dwordx2 |
| 561 | ; SI: s_load_dwordx2 |
| 562 | ; SI-NOT: and |
| 563 | ; SI: s_and_b32 s[[K_HI:[0-9]+]], s{{[0-9]+}}, -4.0 |
| 564 | ; SI-NOT: and |
| 565 | ; SI: buffer_store_dwordx2 |
| Matt Arsenault | 3dbeefa | 2017-03-21 21:39:51 +0000 | [diff] [blame] | 566 | define amdgpu_kernel void @s_and_inline_high_imm_f32_neg_4.0_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %aptr, i64 %a) { |
| Matt Arsenault | 11a4d67 | 2015-02-13 19:05:03 +0000 | [diff] [blame] | 567 | %and = and i64 %a, 13871086852301127680 |
| 568 | store i64 %and, i64 addrspace(1)* %out, align 8 |
| 569 | ret void |
| 570 | } |
| Matt Arsenault | 28bd7d4 | 2015-09-25 18:21:47 +0000 | [diff] [blame] | 571 | attributes #0 = { nounwind readnone } |