| Hao Liu | 2cd34bb | 2015-06-26 02:45:36 +0000 | [diff] [blame] | 1 | ; RUN: llc -mtriple=arm-eabi -mattr=+neon -lower-interleaved-accesses=true < %s | FileCheck %s | 
|  | 2 |  | 
|  | 3 | ; CHECK-LABEL: load_factor2: | 
|  | 4 | ; CHECK: vld2.8 {d16, d17}, [r0] | 
|  | 5 | define <8 x i8> @load_factor2(<16 x i8>* %ptr) { | 
|  | 6 | %wide.vec = load <16 x i8>, <16 x i8>* %ptr, align 4 | 
|  | 7 | %strided.v0 = shufflevector <16 x i8> %wide.vec, <16 x i8> undef, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14> | 
|  | 8 | %strided.v1 = shufflevector <16 x i8> %wide.vec, <16 x i8> undef, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15> | 
|  | 9 | %add = add nsw <8 x i8> %strided.v0, %strided.v1 | 
|  | 10 | ret <8 x i8> %add | 
|  | 11 | } | 
|  | 12 |  | 
|  | 13 | ; CHECK-LABEL: load_factor3: | 
|  | 14 | ; CHECK: vld3.32 {d16, d17, d18}, [r0] | 
|  | 15 | define <2 x i32> @load_factor3(i32* %ptr) { | 
|  | 16 | %base = bitcast i32* %ptr to <6 x i32>* | 
|  | 17 | %wide.vec = load <6 x i32>, <6 x i32>* %base, align 4 | 
|  | 18 | %strided.v2 = shufflevector <6 x i32> %wide.vec, <6 x i32> undef, <2 x i32> <i32 2, i32 5> | 
|  | 19 | %strided.v1 = shufflevector <6 x i32> %wide.vec, <6 x i32> undef, <2 x i32> <i32 1, i32 4> | 
|  | 20 | %add = add nsw <2 x i32> %strided.v2, %strided.v1 | 
|  | 21 | ret <2 x i32> %add | 
|  | 22 | } | 
|  | 23 |  | 
|  | 24 | ; CHECK-LABEL: load_factor4: | 
|  | 25 | ; CHECK: vld4.32 {d16, d18, d20, d22}, [r0]! | 
|  | 26 | ; CHECK: vld4.32 {d17, d19, d21, d23}, [r0] | 
|  | 27 | define <4 x i32> @load_factor4(i32* %ptr) { | 
|  | 28 | %base = bitcast i32* %ptr to <16 x i32>* | 
|  | 29 | %wide.vec = load <16 x i32>, <16 x i32>* %base, align 4 | 
|  | 30 | %strided.v0 = shufflevector <16 x i32> %wide.vec, <16 x i32> undef, <4 x i32> <i32 0, i32 4, i32 8, i32 12> | 
|  | 31 | %strided.v2 = shufflevector <16 x i32> %wide.vec, <16 x i32> undef, <4 x i32> <i32 2, i32 6, i32 10, i32 14> | 
|  | 32 | %add = add nsw <4 x i32> %strided.v0, %strided.v2 | 
|  | 33 | ret <4 x i32> %add | 
|  | 34 | } | 
|  | 35 |  | 
|  | 36 | ; CHECK-LABEL: store_factor2: | 
|  | 37 | ; CHECK: vst2.8 {d16, d17}, [r0] | 
|  | 38 | define void @store_factor2(<16 x i8>* %ptr, <8 x i8> %v0, <8 x i8> %v1) { | 
|  | 39 | %interleaved.vec = shufflevector <8 x i8> %v0, <8 x i8> %v1, <16 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11, i32 4, i32 12, i32 5, i32 13, i32 6, i32 14, i32 7, i32 15> | 
|  | 40 | store <16 x i8> %interleaved.vec, <16 x i8>* %ptr, align 4 | 
|  | 41 | ret void | 
|  | 42 | } | 
|  | 43 |  | 
|  | 44 | ; CHECK-LABEL: store_factor3: | 
|  | 45 | ; CHECK: vst3.32 {d16, d18, d20}, [r0]! | 
|  | 46 | ; CHECK: vst3.32 {d17, d19, d21}, [r0] | 
|  | 47 | define void @store_factor3(i32* %ptr, <4 x i32> %v0, <4 x i32> %v1, <4 x i32> %v2) { | 
|  | 48 | %base = bitcast i32* %ptr to <12 x i32>* | 
|  | 49 | %v0_v1 = shufflevector <4 x i32> %v0, <4 x i32> %v1, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> | 
|  | 50 | %v2_u = shufflevector <4 x i32> %v2, <4 x i32> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef> | 
|  | 51 | %interleaved.vec = shufflevector <8 x i32> %v0_v1, <8 x i32> %v2_u, <12 x i32> <i32 0, i32 4, i32 8, i32 1, i32 5, i32 9, i32 2, i32 6, i32 10, i32 3, i32 7, i32 11> | 
|  | 52 | store <12 x i32> %interleaved.vec, <12 x i32>* %base, align 4 | 
|  | 53 | ret void | 
|  | 54 | } | 
|  | 55 |  | 
|  | 56 | ; CHECK-LABEL: store_factor4: | 
|  | 57 | ; CHECK: vst4.32 {d16, d18, d20, d22}, [r0]! | 
|  | 58 | ; CHECK: vst4.32 {d17, d19, d21, d23}, [r0] | 
|  | 59 | define void @store_factor4(i32* %ptr, <4 x i32> %v0, <4 x i32> %v1, <4 x i32> %v2, <4 x i32> %v3) { | 
|  | 60 | %base = bitcast i32* %ptr to <16 x i32>* | 
|  | 61 | %v0_v1 = shufflevector <4 x i32> %v0, <4 x i32> %v1, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> | 
|  | 62 | %v2_v3 = shufflevector <4 x i32> %v2, <4 x i32> %v3, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> | 
|  | 63 | %interleaved.vec = shufflevector <8 x i32> %v0_v1, <8 x i32> %v2_v3, <16 x i32> <i32 0, i32 4, i32 8, i32 12, i32 1, i32 5, i32 9, i32 13, i32 2, i32 6, i32 10, i32 14, i32 3, i32 7, i32 11, i32 15> | 
|  | 64 | store <16 x i32> %interleaved.vec, <16 x i32>* %base, align 4 | 
|  | 65 | ret void | 
|  | 66 | } | 
|  | 67 |  | 
|  | 68 | ; The following cases test that interleaved access of pointer vectors can be | 
|  | 69 | ; matched to ldN/stN instruction. | 
|  | 70 |  | 
|  | 71 | ; CHECK-LABEL: load_ptrvec_factor2: | 
|  | 72 | ; CHECK: vld2.32 {d16, d17}, [r0] | 
|  | 73 | define <2 x i32*> @load_ptrvec_factor2(i32** %ptr) { | 
|  | 74 | %base = bitcast i32** %ptr to <4 x i32*>* | 
|  | 75 | %wide.vec = load <4 x i32*>, <4 x i32*>* %base, align 4 | 
|  | 76 | %strided.v0 = shufflevector <4 x i32*> %wide.vec, <4 x i32*> undef, <2 x i32> <i32 0, i32 2> | 
|  | 77 | ret <2 x i32*> %strided.v0 | 
|  | 78 | } | 
|  | 79 |  | 
|  | 80 | ; CHECK-LABEL: load_ptrvec_factor3: | 
|  | 81 | ; CHECK: vld3.32 {d16, d17, d18}, [r0] | 
|  | 82 | define void @load_ptrvec_factor3(i32** %ptr, <2 x i32*>* %ptr1, <2 x i32*>* %ptr2) { | 
|  | 83 | %base = bitcast i32** %ptr to <6 x i32*>* | 
|  | 84 | %wide.vec = load <6 x i32*>, <6 x i32*>* %base, align 4 | 
|  | 85 | %strided.v2 = shufflevector <6 x i32*> %wide.vec, <6 x i32*> undef, <2 x i32> <i32 2, i32 5> | 
|  | 86 | store <2 x i32*> %strided.v2, <2 x i32*>* %ptr1 | 
|  | 87 | %strided.v1 = shufflevector <6 x i32*> %wide.vec, <6 x i32*> undef, <2 x i32> <i32 1, i32 4> | 
|  | 88 | store <2 x i32*> %strided.v1, <2 x i32*>* %ptr2 | 
|  | 89 | ret void | 
|  | 90 | } | 
|  | 91 |  | 
|  | 92 | ; CHECK-LABEL: load_ptrvec_factor4: | 
|  | 93 | ; CHECK: vld4.32 {d16, d17, d18, d19}, [r0] | 
|  | 94 | define void @load_ptrvec_factor4(i32** %ptr, <2 x i32*>* %ptr1, <2 x i32*>* %ptr2) { | 
|  | 95 | %base = bitcast i32** %ptr to <8 x i32*>* | 
|  | 96 | %wide.vec = load <8 x i32*>, <8 x i32*>* %base, align 4 | 
|  | 97 | %strided.v1 = shufflevector <8 x i32*> %wide.vec, <8 x i32*> undef, <2 x i32> <i32 1, i32 5> | 
|  | 98 | %strided.v3 = shufflevector <8 x i32*> %wide.vec, <8 x i32*> undef, <2 x i32> <i32 3, i32 7> | 
|  | 99 | store <2 x i32*> %strided.v1, <2 x i32*>* %ptr1 | 
|  | 100 | store <2 x i32*> %strided.v3, <2 x i32*>* %ptr2 | 
|  | 101 | ret void | 
|  | 102 | } | 
|  | 103 |  | 
|  | 104 | ; CHECK-LABEL: store_ptrvec_factor2: | 
|  | 105 | ; CHECK: vst2.32 {d16, d17}, [r0] | 
|  | 106 | define void @store_ptrvec_factor2(i32** %ptr, <2 x i32*> %v0, <2 x i32*> %v1) { | 
|  | 107 | %base = bitcast i32** %ptr to <4 x i32*>* | 
|  | 108 | %interleaved.vec = shufflevector <2 x i32*> %v0, <2 x i32*> %v1, <4 x i32> <i32 0, i32 2, i32 1, i32 3> | 
|  | 109 | store <4 x i32*> %interleaved.vec, <4 x i32*>* %base, align 4 | 
|  | 110 | ret void | 
|  | 111 | } | 
|  | 112 |  | 
|  | 113 | ; CHECK-LABEL: store_ptrvec_factor3: | 
|  | 114 | ; CHECK: vst3.32 {d16, d17, d18}, [r0] | 
|  | 115 | define void @store_ptrvec_factor3(i32** %ptr, <2 x i32*> %v0, <2 x i32*> %v1, <2 x i32*> %v2) { | 
|  | 116 | %base = bitcast i32** %ptr to <6 x i32*>* | 
|  | 117 | %v0_v1 = shufflevector <2 x i32*> %v0, <2 x i32*> %v1, <4 x i32> <i32 0, i32 1, i32 2, i32 3> | 
|  | 118 | %v2_u = shufflevector <2 x i32*> %v2, <2 x i32*> undef, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef> | 
|  | 119 | %interleaved.vec = shufflevector <4 x i32*> %v0_v1, <4 x i32*> %v2_u, <6 x i32> <i32 0, i32 2, i32 4, i32 1, i32 3, i32 5> | 
|  | 120 | store <6 x i32*> %interleaved.vec, <6 x i32*>* %base, align 4 | 
|  | 121 | ret void | 
|  | 122 | } | 
|  | 123 |  | 
|  | 124 | ; CHECK-LABEL: store_ptrvec_factor4: | 
|  | 125 | ; CHECK: vst4.32 {d16, d17, d18, d19}, [r0] | 
|  | 126 | define void @store_ptrvec_factor4(i32* %ptr, <2 x i32*> %v0, <2 x i32*> %v1, <2 x i32*> %v2, <2 x i32*> %v3) { | 
|  | 127 | %base = bitcast i32* %ptr to <8 x i32*>* | 
|  | 128 | %v0_v1 = shufflevector <2 x i32*> %v0, <2 x i32*> %v1, <4 x i32> <i32 0, i32 1, i32 2, i32 3> | 
|  | 129 | %v2_v3 = shufflevector <2 x i32*> %v2, <2 x i32*> %v3, <4 x i32> <i32 0, i32 1, i32 2, i32 3> | 
|  | 130 | %interleaved.vec = shufflevector <4 x i32*> %v0_v1, <4 x i32*> %v2_v3, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 1, i32 3, i32 5, i32 7> | 
|  | 131 | store <8 x i32*> %interleaved.vec, <8 x i32*>* %base, align 4 | 
|  | 132 | ret void | 
|  | 133 | } | 
|  | 134 |  | 
|  | 135 | ; Following cases check that shuffle maskes with undef indices can be matched | 
|  | 136 | ; into ldN/stN instruction. | 
|  | 137 |  | 
|  | 138 | ; CHECK-LABEL: load_undef_mask_factor2: | 
|  | 139 | ; CHECK: vld2.32 {d16, d17, d18, d19}, [r0] | 
|  | 140 | define <4 x i32> @load_undef_mask_factor2(i32* %ptr) { | 
|  | 141 | %base = bitcast i32* %ptr to <8 x i32>* | 
|  | 142 | %wide.vec = load <8 x i32>, <8 x i32>* %base, align 4 | 
|  | 143 | %strided.v0 = shufflevector <8 x i32> %wide.vec, <8 x i32> undef, <4 x i32> <i32 undef, i32 2, i32 undef, i32 6> | 
|  | 144 | %strided.v1 = shufflevector <8 x i32> %wide.vec, <8 x i32> undef, <4 x i32> <i32 undef, i32 3, i32 undef, i32 7> | 
|  | 145 | %add = add nsw <4 x i32> %strided.v0, %strided.v1 | 
|  | 146 | ret <4 x i32> %add | 
|  | 147 | } | 
|  | 148 |  | 
|  | 149 | ; CHECK-LABEL: load_undef_mask_factor3: | 
|  | 150 | ; CHECK: vld3.32 {d16, d18, d20}, [r0]! | 
|  | 151 | ; CHECK: vld3.32 {d17, d19, d21}, [r0] | 
|  | 152 | define <4 x i32> @load_undef_mask_factor3(i32* %ptr) { | 
|  | 153 | %base = bitcast i32* %ptr to <12 x i32>* | 
|  | 154 | %wide.vec = load <12 x i32>, <12 x i32>* %base, align 4 | 
|  | 155 | %strided.v2 = shufflevector <12 x i32> %wide.vec, <12 x i32> undef, <4 x i32> <i32 2, i32 undef, i32 undef, i32 undef> | 
|  | 156 | %strided.v1 = shufflevector <12 x i32> %wide.vec, <12 x i32> undef, <4 x i32> <i32 1, i32 4, i32 7, i32 10> | 
|  | 157 | %add = add nsw <4 x i32> %strided.v2, %strided.v1 | 
|  | 158 | ret <4 x i32> %add | 
|  | 159 | } | 
|  | 160 |  | 
|  | 161 | ; CHECK-LABEL: load_undef_mask_factor4: | 
|  | 162 | ; CHECK: vld4.32 {d16, d18, d20, d22}, [r0]! | 
|  | 163 | ; CHECK: vld4.32 {d17, d19, d21, d23}, [r0] | 
|  | 164 | define <4 x i32> @load_undef_mask_factor4(i32* %ptr) { | 
|  | 165 | %base = bitcast i32* %ptr to <16 x i32>* | 
|  | 166 | %wide.vec = load <16 x i32>, <16 x i32>* %base, align 4 | 
|  | 167 | %strided.v0 = shufflevector <16 x i32> %wide.vec, <16 x i32> undef, <4 x i32> <i32 0, i32 4, i32 undef, i32 undef> | 
|  | 168 | %strided.v2 = shufflevector <16 x i32> %wide.vec, <16 x i32> undef, <4 x i32> <i32 2, i32 6, i32 undef, i32 undef> | 
|  | 169 | %add = add nsw <4 x i32> %strided.v0, %strided.v2 | 
|  | 170 | ret <4 x i32> %add | 
|  | 171 | } | 
|  | 172 |  | 
|  | 173 | ; CHECK-LABEL: store_undef_mask_factor2: | 
|  | 174 | ; CHECK: vst2.32 {d16, d17, d18, d19}, [r0] | 
|  | 175 | define void @store_undef_mask_factor2(i32* %ptr, <4 x i32> %v0, <4 x i32> %v1) { | 
|  | 176 | %base = bitcast i32* %ptr to <8 x i32>* | 
|  | 177 | %interleaved.vec = shufflevector <4 x i32> %v0, <4 x i32> %v1, <8 x i32> <i32 undef, i32 undef, i32 undef, i32 undef, i32 2, i32 6, i32 3, i32 7> | 
|  | 178 | store <8 x i32> %interleaved.vec, <8 x i32>* %base, align 4 | 
|  | 179 | ret void | 
|  | 180 | } | 
|  | 181 |  | 
|  | 182 | ; CHECK-LABEL: store_undef_mask_factor3: | 
|  | 183 | ; CHECK: vst3.32 {d16, d18, d20}, [r0]! | 
|  | 184 | ; CHECK: vst3.32 {d17, d19, d21}, [r0] | 
|  | 185 | define void @store_undef_mask_factor3(i32* %ptr, <4 x i32> %v0, <4 x i32> %v1, <4 x i32> %v2) { | 
|  | 186 | %base = bitcast i32* %ptr to <12 x i32>* | 
|  | 187 | %v0_v1 = shufflevector <4 x i32> %v0, <4 x i32> %v1, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> | 
|  | 188 | %v2_u = shufflevector <4 x i32> %v2, <4 x i32> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef> | 
|  | 189 | %interleaved.vec = shufflevector <8 x i32> %v0_v1, <8 x i32> %v2_u, <12 x i32> <i32 0, i32 4, i32 undef, i32 1, i32 undef, i32 9, i32 2, i32 6, i32 10, i32 3, i32 7, i32 11> | 
|  | 190 | store <12 x i32> %interleaved.vec, <12 x i32>* %base, align 4 | 
|  | 191 | ret void | 
|  | 192 | } | 
|  | 193 |  | 
|  | 194 | ; CHECK-LABEL: store_undef_mask_factor4: | 
|  | 195 | ; CHECK: vst4.32 {d16, d18, d20, d22}, [r0]! | 
|  | 196 | ; CHECK: vst4.32 {d17, d19, d21, d23}, [r0] | 
|  | 197 | define void @store_undef_mask_factor4(i32* %ptr, <4 x i32> %v0, <4 x i32> %v1, <4 x i32> %v2, <4 x i32> %v3) { | 
|  | 198 | %base = bitcast i32* %ptr to <16 x i32>* | 
|  | 199 | %v0_v1 = shufflevector <4 x i32> %v0, <4 x i32> %v1, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> | 
|  | 200 | %v2_v3 = shufflevector <4 x i32> %v2, <4 x i32> %v3, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> | 
|  | 201 | %interleaved.vec = shufflevector <8 x i32> %v0_v1, <8 x i32> %v2_v3, <16 x i32> <i32 0, i32 4, i32 8, i32 undef, i32 undef, i32 5, i32 9, i32 13, i32 2, i32 6, i32 10, i32 14, i32 3, i32 7, i32 11, i32 15> | 
|  | 202 | store <16 x i32> %interleaved.vec, <16 x i32>* %base, align 4 | 
|  | 203 | ret void | 
|  | 204 | } | 
| Jeroen Ketema | ab99b59 | 2015-09-30 10:56:37 +0000 | [diff] [blame] | 205 |  | 
|  | 206 | ; The following test cases check that address spaces are properly handled | 
|  | 207 |  | 
|  | 208 | ; CHECK-LABEL: load_address_space | 
|  | 209 | ; CHECK: vld3.32 | 
|  | 210 | define void @load_address_space(<4 x i32> addrspace(1)* %A, <2 x i32>* %B) { | 
|  | 211 | %tmp = load <4 x i32>, <4 x i32> addrspace(1)* %A | 
|  | 212 | %interleaved = shufflevector <4 x i32> %tmp, <4 x i32> undef, <2 x i32> <i32 0, i32 3> | 
|  | 213 | store <2 x i32> %interleaved, <2 x i32>* %B | 
|  | 214 | ret void | 
|  | 215 | } | 
|  | 216 |  | 
|  | 217 | ; CHECK-LABEL: store_address_space | 
|  | 218 | ; CHECK: vst2.32 | 
|  | 219 | define void @store_address_space(<2 x i32>* %A, <2 x i32>* %B, <4 x i32> addrspace(1)* %C) { | 
|  | 220 | %tmp0 = load <2 x i32>, <2 x i32>* %A | 
|  | 221 | %tmp1 = load <2 x i32>, <2 x i32>* %B | 
|  | 222 | %interleaved = shufflevector <2 x i32> %tmp0, <2 x i32> %tmp1, <4 x i32> <i32 0, i32 2, i32 1, i32 3> | 
|  | 223 | store <4 x i32> %interleaved, <4 x i32> addrspace(1)* %C | 
|  | 224 | ret void | 
|  | 225 | } |