| Bob Wilson | 35b6173 | 2009-10-09 20:20:54 +0000 | [diff] [blame] | 1 | ; RUN: llc -mattr=+neon < %s | FileCheck %s | 
|  | 2 | target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:32-f32:32:32-f64:32:32-v64:64:64-v128:128:128-a0:0:32" | 
|  | 3 | target triple = "thumbv7-elf" | 
|  | 4 |  | 
|  | 5 | define <4 x i16> @vqdmulhs16(<4 x i16>* %A, <4 x i16>* %B) nounwind { | 
|  | 6 | ;CHECK: vqdmulhs16: | 
|  | 7 | ;CHECK: vqdmulh.s16 | 
|  | 8 | %tmp1 = load <4 x i16>* %A | 
|  | 9 | %tmp2 = load <4 x i16>* %B | 
|  | 10 | %tmp3 = call <4 x i16> @llvm.arm.neon.vqdmulh.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2) | 
|  | 11 | ret <4 x i16> %tmp3 | 
|  | 12 | } | 
|  | 13 |  | 
|  | 14 | define <2 x i32> @vqdmulhs32(<2 x i32>* %A, <2 x i32>* %B) nounwind { | 
|  | 15 | ;CHECK: vqdmulhs32: | 
|  | 16 | ;CHECK: vqdmulh.s32 | 
|  | 17 | %tmp1 = load <2 x i32>* %A | 
|  | 18 | %tmp2 = load <2 x i32>* %B | 
|  | 19 | %tmp3 = call <2 x i32> @llvm.arm.neon.vqdmulh.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2) | 
|  | 20 | ret <2 x i32> %tmp3 | 
|  | 21 | } | 
|  | 22 |  | 
|  | 23 | define <8 x i16> @vqdmulhQs16(<8 x i16>* %A, <8 x i16>* %B) nounwind { | 
|  | 24 | ;CHECK: vqdmulhQs16: | 
|  | 25 | ;CHECK: vqdmulh.s16 | 
|  | 26 | %tmp1 = load <8 x i16>* %A | 
|  | 27 | %tmp2 = load <8 x i16>* %B | 
|  | 28 | %tmp3 = call <8 x i16> @llvm.arm.neon.vqdmulh.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2) | 
|  | 29 | ret <8 x i16> %tmp3 | 
|  | 30 | } | 
|  | 31 |  | 
|  | 32 | define <4 x i32> @vqdmulhQs32(<4 x i32>* %A, <4 x i32>* %B) nounwind { | 
|  | 33 | ;CHECK: vqdmulhQs32: | 
|  | 34 | ;CHECK: vqdmulh.s32 | 
|  | 35 | %tmp1 = load <4 x i32>* %A | 
|  | 36 | %tmp2 = load <4 x i32>* %B | 
|  | 37 | %tmp3 = call <4 x i32> @llvm.arm.neon.vqdmulh.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2) | 
|  | 38 | ret <4 x i32> %tmp3 | 
|  | 39 | } | 
|  | 40 |  | 
|  | 41 | define arm_aapcs_vfpcc <8 x i16> @test_vqdmulhQ_lanes16(<8 x i16> %arg0_int16x8_t, <4 x i16> %arg1_int16x4_t) nounwind readnone { | 
|  | 42 | entry: | 
|  | 43 | ; CHECK: test_vqdmulhQ_lanes16 | 
|  | 44 | ; CHECK: vqdmulh.s16 q0, q0, d2[1] | 
|  | 45 | %0 = shufflevector <4 x i16> %arg1_int16x4_t, <4 x i16> undef, <8 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1> ; <<8 x i16>> [#uses=1] | 
|  | 46 | %1 = tail call <8 x i16> @llvm.arm.neon.vqdmulh.v8i16(<8 x i16> %arg0_int16x8_t, <8 x i16> %0) ; <<8 x i16>> [#uses=1] | 
|  | 47 | ret <8 x i16> %1 | 
|  | 48 | } | 
|  | 49 |  | 
|  | 50 | define arm_aapcs_vfpcc <4 x i32> @test_vqdmulhQ_lanes32(<4 x i32> %arg0_int32x4_t, <2 x i32> %arg1_int32x2_t) nounwind readnone { | 
|  | 51 | entry: | 
|  | 52 | ; CHECK: test_vqdmulhQ_lanes32 | 
|  | 53 | ; CHECK: vqdmulh.s32 q0, q0, d2[1] | 
|  | 54 | %0 = shufflevector <2 x i32> %arg1_int32x2_t, <2 x i32> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1> ; <<4 x i32>> [#uses=1] | 
|  | 55 | %1 = tail call <4 x i32> @llvm.arm.neon.vqdmulh.v4i32(<4 x i32> %arg0_int32x4_t, <4 x i32> %0) ; <<4 x i32>> [#uses=1] | 
|  | 56 | ret <4 x i32> %1 | 
|  | 57 | } | 
|  | 58 |  | 
|  | 59 | define arm_aapcs_vfpcc <4 x i16> @test_vqdmulh_lanes16(<4 x i16> %arg0_int16x4_t, <4 x i16> %arg1_int16x4_t) nounwind readnone { | 
|  | 60 | entry: | 
|  | 61 | ; CHECK: test_vqdmulh_lanes16 | 
|  | 62 | ; CHECK: vqdmulh.s16 d0, d0, d1[1] | 
|  | 63 | %0 = shufflevector <4 x i16> %arg1_int16x4_t, <4 x i16> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1> ; <<4 x i16>> [#uses=1] | 
|  | 64 | %1 = tail call <4 x i16> @llvm.arm.neon.vqdmulh.v4i16(<4 x i16> %arg0_int16x4_t, <4 x i16> %0) ; <<4 x i16>> [#uses=1] | 
|  | 65 | ret <4 x i16> %1 | 
|  | 66 | } | 
|  | 67 |  | 
|  | 68 | define arm_aapcs_vfpcc <2 x i32> @test_vqdmulh_lanes32(<2 x i32> %arg0_int32x2_t, <2 x i32> %arg1_int32x2_t) nounwind readnone { | 
|  | 69 | entry: | 
|  | 70 | ; CHECK: test_vqdmulh_lanes32 | 
|  | 71 | ; CHECK: vqdmulh.s32 d0, d0, d1[1] | 
|  | 72 | %0 = shufflevector <2 x i32> %arg1_int32x2_t, <2 x i32> undef, <2 x i32> <i32 1, i32 1> ; <<2 x i32>> [#uses=1] | 
|  | 73 | %1 = tail call <2 x i32> @llvm.arm.neon.vqdmulh.v2i32(<2 x i32> %arg0_int32x2_t, <2 x i32> %0) ; <<2 x i32>> [#uses=1] | 
|  | 74 | ret <2 x i32> %1 | 
|  | 75 | } | 
|  | 76 |  | 
|  | 77 | declare <4 x i16> @llvm.arm.neon.vqdmulh.v4i16(<4 x i16>, <4 x i16>) nounwind readnone | 
|  | 78 | declare <2 x i32> @llvm.arm.neon.vqdmulh.v2i32(<2 x i32>, <2 x i32>) nounwind readnone | 
|  | 79 |  | 
|  | 80 | declare <8 x i16> @llvm.arm.neon.vqdmulh.v8i16(<8 x i16>, <8 x i16>) nounwind readnone | 
|  | 81 | declare <4 x i32> @llvm.arm.neon.vqdmulh.v4i32(<4 x i32>, <4 x i32>) nounwind readnone | 
|  | 82 |  | 
|  | 83 | define <4 x i16> @vqrdmulhs16(<4 x i16>* %A, <4 x i16>* %B) nounwind { | 
|  | 84 | ;CHECK: vqrdmulhs16: | 
|  | 85 | ;CHECK: vqrdmulh.s16 | 
|  | 86 | %tmp1 = load <4 x i16>* %A | 
|  | 87 | %tmp2 = load <4 x i16>* %B | 
|  | 88 | %tmp3 = call <4 x i16> @llvm.arm.neon.vqrdmulh.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2) | 
|  | 89 | ret <4 x i16> %tmp3 | 
|  | 90 | } | 
|  | 91 |  | 
|  | 92 | define <2 x i32> @vqrdmulhs32(<2 x i32>* %A, <2 x i32>* %B) nounwind { | 
|  | 93 | ;CHECK: vqrdmulhs32: | 
|  | 94 | ;CHECK: vqrdmulh.s32 | 
|  | 95 | %tmp1 = load <2 x i32>* %A | 
|  | 96 | %tmp2 = load <2 x i32>* %B | 
|  | 97 | %tmp3 = call <2 x i32> @llvm.arm.neon.vqrdmulh.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2) | 
|  | 98 | ret <2 x i32> %tmp3 | 
|  | 99 | } | 
|  | 100 |  | 
|  | 101 | define <8 x i16> @vqrdmulhQs16(<8 x i16>* %A, <8 x i16>* %B) nounwind { | 
|  | 102 | ;CHECK: vqrdmulhQs16: | 
|  | 103 | ;CHECK: vqrdmulh.s16 | 
|  | 104 | %tmp1 = load <8 x i16>* %A | 
|  | 105 | %tmp2 = load <8 x i16>* %B | 
|  | 106 | %tmp3 = call <8 x i16> @llvm.arm.neon.vqrdmulh.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2) | 
|  | 107 | ret <8 x i16> %tmp3 | 
|  | 108 | } | 
|  | 109 |  | 
|  | 110 | define <4 x i32> @vqrdmulhQs32(<4 x i32>* %A, <4 x i32>* %B) nounwind { | 
|  | 111 | ;CHECK: vqrdmulhQs32: | 
|  | 112 | ;CHECK: vqrdmulh.s32 | 
|  | 113 | %tmp1 = load <4 x i32>* %A | 
|  | 114 | %tmp2 = load <4 x i32>* %B | 
|  | 115 | %tmp3 = call <4 x i32> @llvm.arm.neon.vqrdmulh.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2) | 
|  | 116 | ret <4 x i32> %tmp3 | 
|  | 117 | } | 
|  | 118 |  | 
|  | 119 | define arm_aapcs_vfpcc <8 x i16> @test_vqRdmulhQ_lanes16(<8 x i16> %arg0_int16x8_t, <4 x i16> %arg1_int16x4_t) nounwind readnone { | 
|  | 120 | entry: | 
|  | 121 | ; CHECK: test_vqRdmulhQ_lanes16 | 
|  | 122 | ; CHECK: vqrdmulh.s16 q0, q0, d2[1] | 
|  | 123 | %0 = shufflevector <4 x i16> %arg1_int16x4_t, <4 x i16> undef, <8 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1> ; <<8 x i16>> [#uses=1] | 
|  | 124 | %1 = tail call <8 x i16> @llvm.arm.neon.vqrdmulh.v8i16(<8 x i16> %arg0_int16x8_t, <8 x i16> %0) ; <<8 x i16>> [#uses=1] | 
|  | 125 | ret <8 x i16> %1 | 
|  | 126 | } | 
|  | 127 |  | 
|  | 128 | define arm_aapcs_vfpcc <4 x i32> @test_vqRdmulhQ_lanes32(<4 x i32> %arg0_int32x4_t, <2 x i32> %arg1_int32x2_t) nounwind readnone { | 
|  | 129 | entry: | 
|  | 130 | ; CHECK: test_vqRdmulhQ_lanes32 | 
|  | 131 | ; CHECK: vqrdmulh.s32 q0, q0, d2[1] | 
|  | 132 | %0 = shufflevector <2 x i32> %arg1_int32x2_t, <2 x i32> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1> ; <<4 x i32>> [#uses=1] | 
|  | 133 | %1 = tail call <4 x i32> @llvm.arm.neon.vqrdmulh.v4i32(<4 x i32> %arg0_int32x4_t, <4 x i32> %0) ; <<4 x i32>> [#uses=1] | 
|  | 134 | ret <4 x i32> %1 | 
|  | 135 | } | 
|  | 136 |  | 
|  | 137 | define arm_aapcs_vfpcc <4 x i16> @test_vqRdmulh_lanes16(<4 x i16> %arg0_int16x4_t, <4 x i16> %arg1_int16x4_t) nounwind readnone { | 
|  | 138 | entry: | 
|  | 139 | ; CHECK: test_vqRdmulh_lanes16 | 
|  | 140 | ; CHECK: vqrdmulh.s16 d0, d0, d1[1] | 
|  | 141 | %0 = shufflevector <4 x i16> %arg1_int16x4_t, <4 x i16> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1> ; <<4 x i16>> [#uses=1] | 
|  | 142 | %1 = tail call <4 x i16> @llvm.arm.neon.vqrdmulh.v4i16(<4 x i16> %arg0_int16x4_t, <4 x i16> %0) ; <<4 x i16>> [#uses=1] | 
|  | 143 | ret <4 x i16> %1 | 
|  | 144 | } | 
|  | 145 |  | 
|  | 146 | define arm_aapcs_vfpcc <2 x i32> @test_vqRdmulh_lanes32(<2 x i32> %arg0_int32x2_t, <2 x i32> %arg1_int32x2_t) nounwind readnone { | 
|  | 147 | entry: | 
|  | 148 | ; CHECK: test_vqRdmulh_lanes32 | 
|  | 149 | ; CHECK: vqrdmulh.s32 d0, d0, d1[1] | 
|  | 150 | %0 = shufflevector <2 x i32> %arg1_int32x2_t, <2 x i32> undef, <2 x i32> <i32 1, i32 1> ; <<2 x i32>> [#uses=1] | 
|  | 151 | %1 = tail call <2 x i32> @llvm.arm.neon.vqrdmulh.v2i32(<2 x i32> %arg0_int32x2_t, <2 x i32> %0) ; <<2 x i32>> [#uses=1] | 
|  | 152 | ret <2 x i32> %1 | 
|  | 153 | } | 
|  | 154 |  | 
| Bob Wilson | 35b6173 | 2009-10-09 20:20:54 +0000 | [diff] [blame] | 155 | declare <4 x i16> @llvm.arm.neon.vqrdmulh.v4i16(<4 x i16>, <4 x i16>) nounwind readnone | 
|  | 156 | declare <2 x i32> @llvm.arm.neon.vqrdmulh.v2i32(<2 x i32>, <2 x i32>) nounwind readnone | 
|  | 157 |  | 
|  | 158 | declare <8 x i16> @llvm.arm.neon.vqrdmulh.v8i16(<8 x i16>, <8 x i16>) nounwind readnone | 
|  | 159 | declare <4 x i32> @llvm.arm.neon.vqrdmulh.v4i32(<4 x i32>, <4 x i32>) nounwind readnone | 
|  | 160 |  | 
|  | 161 | define <4 x i32> @vqdmulls16(<4 x i16>* %A, <4 x i16>* %B) nounwind { | 
|  | 162 | ;CHECK: vqdmulls16: | 
|  | 163 | ;CHECK: vqdmull.s16 | 
|  | 164 | %tmp1 = load <4 x i16>* %A | 
|  | 165 | %tmp2 = load <4 x i16>* %B | 
|  | 166 | %tmp3 = call <4 x i32> @llvm.arm.neon.vqdmull.v4i32(<4 x i16> %tmp1, <4 x i16> %tmp2) | 
|  | 167 | ret <4 x i32> %tmp3 | 
|  | 168 | } | 
|  | 169 |  | 
|  | 170 | define <2 x i64> @vqdmulls32(<2 x i32>* %A, <2 x i32>* %B) nounwind { | 
|  | 171 | ;CHECK: vqdmulls32: | 
|  | 172 | ;CHECK: vqdmull.s32 | 
|  | 173 | %tmp1 = load <2 x i32>* %A | 
|  | 174 | %tmp2 = load <2 x i32>* %B | 
|  | 175 | %tmp3 = call <2 x i64> @llvm.arm.neon.vqdmull.v2i64(<2 x i32> %tmp1, <2 x i32> %tmp2) | 
|  | 176 | ret <2 x i64> %tmp3 | 
|  | 177 | } | 
|  | 178 |  | 
|  | 179 | define arm_aapcs_vfpcc <4 x i32> @test_vqdmull_lanes16(<4 x i16> %arg0_int16x4_t, <4 x i16> %arg1_int16x4_t) nounwind readnone { | 
|  | 180 | entry: | 
|  | 181 | ; CHECK: test_vqdmull_lanes16 | 
|  | 182 | ; CHECK: vqdmull.s16 q0, d0, d1[1] | 
|  | 183 | %0 = shufflevector <4 x i16> %arg1_int16x4_t, <4 x i16> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1> ; <<4 x i16>> [#uses=1] | 
|  | 184 | %1 = tail call <4 x i32> @llvm.arm.neon.vqdmull.v4i32(<4 x i16> %arg0_int16x4_t, <4 x i16> %0) ; <<4 x i32>> [#uses=1] | 
|  | 185 | ret <4 x i32> %1 | 
|  | 186 | } | 
|  | 187 |  | 
|  | 188 | define arm_aapcs_vfpcc <2 x i64> @test_vqdmull_lanes32(<2 x i32> %arg0_int32x2_t, <2 x i32> %arg1_int32x2_t) nounwind readnone { | 
|  | 189 | entry: | 
|  | 190 | ; CHECK: test_vqdmull_lanes32 | 
|  | 191 | ; CHECK: vqdmull.s32 q0, d0, d1[1] | 
|  | 192 | %0 = shufflevector <2 x i32> %arg1_int32x2_t, <2 x i32> undef, <2 x i32> <i32 1, i32 1> ; <<2 x i32>> [#uses=1] | 
|  | 193 | %1 = tail call <2 x i64> @llvm.arm.neon.vqdmull.v2i64(<2 x i32> %arg0_int32x2_t, <2 x i32> %0) ; <<2 x i64>> [#uses=1] | 
|  | 194 | ret <2 x i64> %1 | 
|  | 195 | } | 
|  | 196 |  | 
|  | 197 | declare <4 x i32>  @llvm.arm.neon.vqdmull.v4i32(<4 x i16>, <4 x i16>) nounwind readnone | 
|  | 198 | declare <2 x i64>  @llvm.arm.neon.vqdmull.v2i64(<2 x i32>, <2 x i32>) nounwind readnone | 
|  | 199 |  | 
|  | 200 | define <4 x i32> @vqdmlals16(<4 x i32>* %A, <4 x i16>* %B, <4 x i16>* %C) nounwind { | 
|  | 201 | ;CHECK: vqdmlals16: | 
|  | 202 | ;CHECK: vqdmlal.s16 | 
|  | 203 | %tmp1 = load <4 x i32>* %A | 
|  | 204 | %tmp2 = load <4 x i16>* %B | 
|  | 205 | %tmp3 = load <4 x i16>* %C | 
|  | 206 | %tmp4 = call <4 x i32> @llvm.arm.neon.vqdmlal.v4i32(<4 x i32> %tmp1, <4 x i16> %tmp2, <4 x i16> %tmp3) | 
|  | 207 | ret <4 x i32> %tmp4 | 
|  | 208 | } | 
|  | 209 |  | 
|  | 210 | define <2 x i64> @vqdmlals32(<2 x i64>* %A, <2 x i32>* %B, <2 x i32>* %C) nounwind { | 
|  | 211 | ;CHECK: vqdmlals32: | 
|  | 212 | ;CHECK: vqdmlal.s32 | 
|  | 213 | %tmp1 = load <2 x i64>* %A | 
|  | 214 | %tmp2 = load <2 x i32>* %B | 
|  | 215 | %tmp3 = load <2 x i32>* %C | 
|  | 216 | %tmp4 = call <2 x i64> @llvm.arm.neon.vqdmlal.v2i64(<2 x i64> %tmp1, <2 x i32> %tmp2, <2 x i32> %tmp3) | 
|  | 217 | ret <2 x i64> %tmp4 | 
|  | 218 | } | 
|  | 219 |  | 
|  | 220 | define arm_aapcs_vfpcc <4 x i32> @test_vqdmlal_lanes16(<4 x i32> %arg0_int32x4_t, <4 x i16> %arg1_int16x4_t, <4 x i16> %arg2_int16x4_t) nounwind readnone { | 
|  | 221 | entry: | 
|  | 222 | ; CHECK: test_vqdmlal_lanes16 | 
|  | 223 | ; CHECK: vqdmlal.s16 q0, d2, d3[1] | 
|  | 224 | %0 = shufflevector <4 x i16> %arg2_int16x4_t, <4 x i16> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1> ; <<4 x i16>> [#uses=1] | 
|  | 225 | %1 = tail call <4 x i32> @llvm.arm.neon.vqdmlal.v4i32(<4 x i32> %arg0_int32x4_t, <4 x i16> %arg1_int16x4_t, <4 x i16> %0) ; <<4 x i32>> [#uses=1] | 
|  | 226 | ret <4 x i32> %1 | 
|  | 227 | } | 
|  | 228 |  | 
|  | 229 | define arm_aapcs_vfpcc <2 x i64> @test_vqdmlal_lanes32(<2 x i64> %arg0_int64x2_t, <2 x i32> %arg1_int32x2_t, <2 x i32> %arg2_int32x2_t) nounwind readnone { | 
|  | 230 | entry: | 
|  | 231 | ; CHECK: test_vqdmlal_lanes32 | 
|  | 232 | ; CHECK: vqdmlal.s32 q0, d2, d3[1] | 
|  | 233 | %0 = shufflevector <2 x i32> %arg2_int32x2_t, <2 x i32> undef, <2 x i32> <i32 1, i32 1> ; <<2 x i32>> [#uses=1] | 
|  | 234 | %1 = tail call <2 x i64> @llvm.arm.neon.vqdmlal.v2i64(<2 x i64> %arg0_int64x2_t, <2 x i32> %arg1_int32x2_t, <2 x i32> %0) ; <<2 x i64>> [#uses=1] | 
|  | 235 | ret <2 x i64> %1 | 
|  | 236 | } | 
|  | 237 |  | 
|  | 238 | declare <4 x i32>  @llvm.arm.neon.vqdmlal.v4i32(<4 x i32>, <4 x i16>, <4 x i16>) nounwind readnone | 
|  | 239 | declare <2 x i64>  @llvm.arm.neon.vqdmlal.v2i64(<2 x i64>, <2 x i32>, <2 x i32>) nounwind readnone | 
|  | 240 |  | 
|  | 241 | define <4 x i32> @vqdmlsls16(<4 x i32>* %A, <4 x i16>* %B, <4 x i16>* %C) nounwind { | 
|  | 242 | ;CHECK: vqdmlsls16: | 
|  | 243 | ;CHECK: vqdmlsl.s16 | 
|  | 244 | %tmp1 = load <4 x i32>* %A | 
|  | 245 | %tmp2 = load <4 x i16>* %B | 
|  | 246 | %tmp3 = load <4 x i16>* %C | 
|  | 247 | %tmp4 = call <4 x i32> @llvm.arm.neon.vqdmlsl.v4i32(<4 x i32> %tmp1, <4 x i16> %tmp2, <4 x i16> %tmp3) | 
|  | 248 | ret <4 x i32> %tmp4 | 
|  | 249 | } | 
|  | 250 |  | 
|  | 251 | define <2 x i64> @vqdmlsls32(<2 x i64>* %A, <2 x i32>* %B, <2 x i32>* %C) nounwind { | 
|  | 252 | ;CHECK: vqdmlsls32: | 
|  | 253 | ;CHECK: vqdmlsl.s32 | 
|  | 254 | %tmp1 = load <2 x i64>* %A | 
|  | 255 | %tmp2 = load <2 x i32>* %B | 
|  | 256 | %tmp3 = load <2 x i32>* %C | 
|  | 257 | %tmp4 = call <2 x i64> @llvm.arm.neon.vqdmlsl.v2i64(<2 x i64> %tmp1, <2 x i32> %tmp2, <2 x i32> %tmp3) | 
|  | 258 | ret <2 x i64> %tmp4 | 
|  | 259 | } | 
|  | 260 |  | 
|  | 261 | define arm_aapcs_vfpcc <4 x i32> @test_vqdmlsl_lanes16(<4 x i32> %arg0_int32x4_t, <4 x i16> %arg1_int16x4_t, <4 x i16> %arg2_int16x4_t) nounwind readnone { | 
|  | 262 | entry: | 
|  | 263 | ; CHECK: test_vqdmlsl_lanes16 | 
|  | 264 | ; CHECK: vqdmlsl.s16 q0, d2, d3[1] | 
|  | 265 | %0 = shufflevector <4 x i16> %arg2_int16x4_t, <4 x i16> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1> ; <<4 x i16>> [#uses=1] | 
|  | 266 | %1 = tail call <4 x i32> @llvm.arm.neon.vqdmlsl.v4i32(<4 x i32> %arg0_int32x4_t, <4 x i16> %arg1_int16x4_t, <4 x i16> %0) ; <<4 x i32>> [#uses=1] | 
|  | 267 | ret <4 x i32> %1 | 
|  | 268 | } | 
|  | 269 |  | 
|  | 270 | define arm_aapcs_vfpcc <2 x i64> @test_vqdmlsl_lanes32(<2 x i64> %arg0_int64x2_t, <2 x i32> %arg1_int32x2_t, <2 x i32> %arg2_int32x2_t) nounwind readnone { | 
|  | 271 | entry: | 
|  | 272 | ; CHECK: test_vqdmlsl_lanes32 | 
|  | 273 | ; CHECK: vqdmlsl.s32 q0, d2, d3[1] | 
|  | 274 | %0 = shufflevector <2 x i32> %arg2_int32x2_t, <2 x i32> undef, <2 x i32> <i32 1, i32 1> ; <<2 x i32>> [#uses=1] | 
|  | 275 | %1 = tail call <2 x i64> @llvm.arm.neon.vqdmlsl.v2i64(<2 x i64> %arg0_int64x2_t, <2 x i32> %arg1_int32x2_t, <2 x i32> %0) ; <<2 x i64>> [#uses=1] | 
|  | 276 | ret <2 x i64> %1 | 
|  | 277 | } | 
|  | 278 |  | 
|  | 279 | declare <4 x i32>  @llvm.arm.neon.vqdmlsl.v4i32(<4 x i32>, <4 x i16>, <4 x i16>) nounwind readnone | 
|  | 280 | declare <2 x i64>  @llvm.arm.neon.vqdmlsl.v2i64(<2 x i64>, <2 x i32>, <2 x i32>) nounwind readnone |