Blame - test/CodeGen/ARM/vmul.ll - fp2-dev/platform/external/llvm

blob: 80ba9be3bd2d3dd09732699701e7a7098397b969 [file] [log] [blame]

Bob Wilson	fe27c51	2009-10-07 23:47:21 +0000	[diff] [blame]	1	; RUN: llc < %s -march=arm -mattr=+neon \| FileCheck %s
Bob Wilson	5bafff3	2009-06-22 23:27:02 +0000	[diff] [blame]	2
				3	define <8 x i8> @vmuli8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
Bob Wilson	fe27c51	2009-10-07 23:47:21 +0000	[diff] [blame]	4	;CHECK: vmuli8:
				5	;CHECK: vmul.i8
Bob Wilson	5bafff3	2009-06-22 23:27:02 +0000	[diff] [blame]	6	%tmp1 = load <8 x i8>* %A
				7	%tmp2 = load <8 x i8>* %B
				8	%tmp3 = mul <8 x i8> %tmp1, %tmp2
				9	ret <8 x i8> %tmp3
				10	}
				11
				12	define <4 x i16> @vmuli16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
Bob Wilson	fe27c51	2009-10-07 23:47:21 +0000	[diff] [blame]	13	;CHECK: vmuli16:
				14	;CHECK: vmul.i16
Bob Wilson	5bafff3	2009-06-22 23:27:02 +0000	[diff] [blame]	15	%tmp1 = load <4 x i16>* %A
				16	%tmp2 = load <4 x i16>* %B
				17	%tmp3 = mul <4 x i16> %tmp1, %tmp2
				18	ret <4 x i16> %tmp3
				19	}
				20
				21	define <2 x i32> @vmuli32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
Bob Wilson	fe27c51	2009-10-07 23:47:21 +0000	[diff] [blame]	22	;CHECK: vmuli32:
				23	;CHECK: vmul.i32
Bob Wilson	5bafff3	2009-06-22 23:27:02 +0000	[diff] [blame]	24	%tmp1 = load <2 x i32>* %A
				25	%tmp2 = load <2 x i32>* %B
				26	%tmp3 = mul <2 x i32> %tmp1, %tmp2
				27	ret <2 x i32> %tmp3
				28	}
				29
				30	define <2 x float> @vmulf32(<2 x float>* %A, <2 x float>* %B) nounwind {
Bob Wilson	fe27c51	2009-10-07 23:47:21 +0000	[diff] [blame]	31	;CHECK: vmulf32:
				32	;CHECK: vmul.f32
Bob Wilson	5bafff3	2009-06-22 23:27:02 +0000	[diff] [blame]	33	%tmp1 = load <2 x float>* %A
				34	%tmp2 = load <2 x float>* %B
Dan Gohman	d4d0115	2010-05-03 22:36:46 +0000	[diff] [blame]	35	%tmp3 = fmul <2 x float> %tmp1, %tmp2
Bob Wilson	5bafff3	2009-06-22 23:27:02 +0000	[diff] [blame]	36	ret <2 x float> %tmp3
				37	}
				38
				39	define <8 x i8> @vmulp8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
Bob Wilson	fe27c51	2009-10-07 23:47:21 +0000	[diff] [blame]	40	;CHECK: vmulp8:
				41	;CHECK: vmul.p8
Bob Wilson	5bafff3	2009-06-22 23:27:02 +0000	[diff] [blame]	42	%tmp1 = load <8 x i8>* %A
				43	%tmp2 = load <8 x i8>* %B
				44	%tmp3 = call <8 x i8> @llvm.arm.neon.vmulp.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
				45	ret <8 x i8> %tmp3
				46	}
				47
				48	define <16 x i8> @vmulQi8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
Bob Wilson	fe27c51	2009-10-07 23:47:21 +0000	[diff] [blame]	49	;CHECK: vmulQi8:
				50	;CHECK: vmul.i8
Bob Wilson	5bafff3	2009-06-22 23:27:02 +0000	[diff] [blame]	51	%tmp1 = load <16 x i8>* %A
				52	%tmp2 = load <16 x i8>* %B
				53	%tmp3 = mul <16 x i8> %tmp1, %tmp2
				54	ret <16 x i8> %tmp3
				55	}
				56
				57	define <8 x i16> @vmulQi16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
Bob Wilson	fe27c51	2009-10-07 23:47:21 +0000	[diff] [blame]	58	;CHECK: vmulQi16:
				59	;CHECK: vmul.i16
Bob Wilson	5bafff3	2009-06-22 23:27:02 +0000	[diff] [blame]	60	%tmp1 = load <8 x i16>* %A
				61	%tmp2 = load <8 x i16>* %B
				62	%tmp3 = mul <8 x i16> %tmp1, %tmp2
				63	ret <8 x i16> %tmp3
				64	}
				65
				66	define <4 x i32> @vmulQi32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
Bob Wilson	fe27c51	2009-10-07 23:47:21 +0000	[diff] [blame]	67	;CHECK: vmulQi32:
				68	;CHECK: vmul.i32
Bob Wilson	5bafff3	2009-06-22 23:27:02 +0000	[diff] [blame]	69	%tmp1 = load <4 x i32>* %A
				70	%tmp2 = load <4 x i32>* %B
				71	%tmp3 = mul <4 x i32> %tmp1, %tmp2
				72	ret <4 x i32> %tmp3
				73	}
				74
				75	define <4 x float> @vmulQf32(<4 x float>* %A, <4 x float>* %B) nounwind {
Bob Wilson	fe27c51	2009-10-07 23:47:21 +0000	[diff] [blame]	76	;CHECK: vmulQf32:
				77	;CHECK: vmul.f32
Bob Wilson	5bafff3	2009-06-22 23:27:02 +0000	[diff] [blame]	78	%tmp1 = load <4 x float>* %A
				79	%tmp2 = load <4 x float>* %B
Dan Gohman	d4d0115	2010-05-03 22:36:46 +0000	[diff] [blame]	80	%tmp3 = fmul <4 x float> %tmp1, %tmp2
Bob Wilson	5bafff3	2009-06-22 23:27:02 +0000	[diff] [blame]	81	ret <4 x float> %tmp3
				82	}
				83
				84	define <16 x i8> @vmulQp8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
Bob Wilson	fe27c51	2009-10-07 23:47:21 +0000	[diff] [blame]	85	;CHECK: vmulQp8:
				86	;CHECK: vmul.p8
Bob Wilson	5bafff3	2009-06-22 23:27:02 +0000	[diff] [blame]	87	%tmp1 = load <16 x i8>* %A
				88	%tmp2 = load <16 x i8>* %B
				89	%tmp3 = call <16 x i8> @llvm.arm.neon.vmulp.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2)
				90	ret <16 x i8> %tmp3
				91	}
				92
				93	declare <8 x i8> @llvm.arm.neon.vmulp.v8i8(<8 x i8>, <8 x i8>) nounwind readnone
				94	declare <16 x i8> @llvm.arm.neon.vmulp.v16i8(<16 x i8>, <16 x i8>) nounwind readnone
Bob Wilson	83815ae	2009-10-09 20:20:54 +0000	[diff] [blame]	95
				96	define arm_aapcs_vfpcc <2 x float> @test_vmul_lanef32(<2 x float> %arg0_float32x2_t, <2 x float> %arg1_float32x2_t) nounwind readnone {
				97	entry:
				98	; CHECK: test_vmul_lanef32:
				99	; CHECK: vmul.f32 d0, d0, d1[0]
				100	%0 = shufflevector <2 x float> %arg1_float32x2_t, <2 x float> undef, <2 x i32> zeroinitializer ; <<2 x float>> [#uses=1]
				101	%1 = fmul <2 x float> %0, %arg0_float32x2_t ; <<2 x float>> [#uses=1]
				102	ret <2 x float> %1
				103	}
				104
				105	define arm_aapcs_vfpcc <4 x i16> @test_vmul_lanes16(<4 x i16> %arg0_int16x4_t, <4 x i16> %arg1_int16x4_t) nounwind readnone {
				106	entry:
				107	; CHECK: test_vmul_lanes16:
				108	; CHECK: vmul.i16 d0, d0, d1[1]
				109	%0 = shufflevector <4 x i16> %arg1_int16x4_t, <4 x i16> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1> ; <<4 x i16>> [#uses$
				110	%1 = mul <4 x i16> %0, %arg0_int16x4_t ; <<4 x i16>> [#uses=1]
				111	ret <4 x i16> %1
				112	}
				113
				114	define arm_aapcs_vfpcc <2 x i32> @test_vmul_lanes32(<2 x i32> %arg0_int32x2_t, <2 x i32> %arg1_int32x2_t) nounwind readnone {
				115	entry:
				116	; CHECK: test_vmul_lanes32:
				117	; CHECK: vmul.i32 d0, d0, d1[1]
				118	%0 = shufflevector <2 x i32> %arg1_int32x2_t, <2 x i32> undef, <2 x i32> <i32 1, i32 1> ; <<2 x i32>> [#uses=1]
				119	%1 = mul <2 x i32> %0, %arg0_int32x2_t ; <<2 x i32>> [#uses=1]
				120	ret <2 x i32> %1
				121	}
				122
				123	define arm_aapcs_vfpcc <4 x float> @test_vmulQ_lanef32(<4 x float> %arg0_float32x4_t, <2 x float> %arg1_float32x2_t) nounwind readnone {
				124	entry:
				125	; CHECK: test_vmulQ_lanef32:
				126	; CHECK: vmul.f32 q0, q0, d2[1]
				127	%0 = shufflevector <2 x float> %arg1_float32x2_t, <2 x float> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1> ; <<4 x float>$
				128	%1 = fmul <4 x float> %0, %arg0_float32x4_t ; <<4 x float>> [#uses=1]
				129	ret <4 x float> %1
				130	}
				131
				132	define arm_aapcs_vfpcc <8 x i16> @test_vmulQ_lanes16(<8 x i16> %arg0_int16x8_t, <4 x i16> %arg1_int16x4_t) nounwind readnone {
				133	entry:
				134	; CHECK: test_vmulQ_lanes16:
				135	; CHECK: vmul.i16 q0, q0, d2[1]
				136	%0 = shufflevector <4 x i16> %arg1_int16x4_t, <4 x i16> undef, <8 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
				137	%1 = mul <8 x i16> %0, %arg0_int16x8_t ; <<8 x i16>> [#uses=1]
				138	ret <8 x i16> %1
				139	}
				140
				141	define arm_aapcs_vfpcc <4 x i32> @test_vmulQ_lanes32(<4 x i32> %arg0_int32x4_t, <2 x i32> %arg1_int32x2_t) nounwind readnone {
				142	entry:
				143	; CHECK: test_vmulQ_lanes32:
				144	; CHECK: vmul.i32 q0, q0, d2[1]
				145	%0 = shufflevector <2 x i32> %arg1_int32x2_t, <2 x i32> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1> ; <<4 x i32>> [#uses$
				146	%1 = mul <4 x i32> %0, %arg0_int32x4_t ; <<4 x i32>> [#uses=1]
				147	ret <4 x i32> %1
				148	}
				149
				150	define <8 x i16> @vmulls8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
				151	;CHECK: vmulls8:
				152	;CHECK: vmull.s8
				153	%tmp1 = load <8 x i8>* %A
				154	%tmp2 = load <8 x i8>* %B
Bob Wilson	d0b69cf	2010-09-01 23:50:19 +0000	[diff] [blame]	155	%tmp3 = sext <8 x i8> %tmp1 to <8 x i16>
				156	%tmp4 = sext <8 x i8> %tmp2 to <8 x i16>
				157	%tmp5 = mul <8 x i16> %tmp3, %tmp4
				158	ret <8 x i16> %tmp5
Bob Wilson	83815ae	2009-10-09 20:20:54 +0000	[diff] [blame]	159	}
				160
Evan Cheng	92e3916	2011-03-29 23:06:19 +0000	[diff] [blame^]	161	define <8 x i16> @vmulls8_int(<8 x i8>* %A, <8 x i8>* %B) nounwind {
				162	;CHECK: vmulls8_int:
				163	;CHECK: vmull.s8
				164	%tmp1 = load <8 x i8>* %A
				165	%tmp2 = load <8 x i8>* %B
				166	%tmp3 = call <8 x i16> @llvm.arm.neon.vmulls.v8i16(<8 x i8> %tmp1, <8 x i8> %tmp2)
				167	ret <8 x i16> %tmp3
				168	}
				169
Bob Wilson	83815ae	2009-10-09 20:20:54 +0000	[diff] [blame]	170	define <4 x i32> @vmulls16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
				171	;CHECK: vmulls16:
				172	;CHECK: vmull.s16
				173	%tmp1 = load <4 x i16>* %A
				174	%tmp2 = load <4 x i16>* %B
Bob Wilson	d0b69cf	2010-09-01 23:50:19 +0000	[diff] [blame]	175	%tmp3 = sext <4 x i16> %tmp1 to <4 x i32>
				176	%tmp4 = sext <4 x i16> %tmp2 to <4 x i32>
				177	%tmp5 = mul <4 x i32> %tmp3, %tmp4
				178	ret <4 x i32> %tmp5
Bob Wilson	83815ae	2009-10-09 20:20:54 +0000	[diff] [blame]	179	}
				180
Evan Cheng	92e3916	2011-03-29 23:06:19 +0000	[diff] [blame^]	181	define <4 x i32> @vmulls16_int(<4 x i16>* %A, <4 x i16>* %B) nounwind {
				182	;CHECK: vmulls16_int:
				183	;CHECK: vmull.s16
				184	%tmp1 = load <4 x i16>* %A
				185	%tmp2 = load <4 x i16>* %B
				186	%tmp3 = call <4 x i32> @llvm.arm.neon.vmulls.v4i32(<4 x i16> %tmp1, <4 x i16> %tmp2)
				187	ret <4 x i32> %tmp3
				188	}
				189
Bob Wilson	83815ae	2009-10-09 20:20:54 +0000	[diff] [blame]	190	define <2 x i64> @vmulls32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
				191	;CHECK: vmulls32:
				192	;CHECK: vmull.s32
				193	%tmp1 = load <2 x i32>* %A
				194	%tmp2 = load <2 x i32>* %B
Bob Wilson	d0b69cf	2010-09-01 23:50:19 +0000	[diff] [blame]	195	%tmp3 = sext <2 x i32> %tmp1 to <2 x i64>
				196	%tmp4 = sext <2 x i32> %tmp2 to <2 x i64>
				197	%tmp5 = mul <2 x i64> %tmp3, %tmp4
				198	ret <2 x i64> %tmp5
Bob Wilson	83815ae	2009-10-09 20:20:54 +0000	[diff] [blame]	199	}
				200
Evan Cheng	92e3916	2011-03-29 23:06:19 +0000	[diff] [blame^]	201	define <2 x i64> @vmulls32_int(<2 x i32>* %A, <2 x i32>* %B) nounwind {
				202	;CHECK: vmulls32_int:
				203	;CHECK: vmull.s32
				204	%tmp1 = load <2 x i32>* %A
				205	%tmp2 = load <2 x i32>* %B
				206	%tmp3 = call <2 x i64> @llvm.arm.neon.vmulls.v2i64(<2 x i32> %tmp1, <2 x i32> %tmp2)
				207	ret <2 x i64> %tmp3
				208	}
				209
Bob Wilson	83815ae	2009-10-09 20:20:54 +0000	[diff] [blame]	210	define <8 x i16> @vmullu8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
				211	;CHECK: vmullu8:
				212	;CHECK: vmull.u8
				213	%tmp1 = load <8 x i8>* %A
				214	%tmp2 = load <8 x i8>* %B
Bob Wilson	d0b69cf	2010-09-01 23:50:19 +0000	[diff] [blame]	215	%tmp3 = zext <8 x i8> %tmp1 to <8 x i16>
				216	%tmp4 = zext <8 x i8> %tmp2 to <8 x i16>
				217	%tmp5 = mul <8 x i16> %tmp3, %tmp4
				218	ret <8 x i16> %tmp5
Bob Wilson	83815ae	2009-10-09 20:20:54 +0000	[diff] [blame]	219	}
				220
Evan Cheng	92e3916	2011-03-29 23:06:19 +0000	[diff] [blame^]	221	define <8 x i16> @vmullu8_int(<8 x i8>* %A, <8 x i8>* %B) nounwind {
				222	;CHECK: vmullu8_int:
				223	;CHECK: vmull.u8
				224	%tmp1 = load <8 x i8>* %A
				225	%tmp2 = load <8 x i8>* %B
				226	%tmp3 = call <8 x i16> @llvm.arm.neon.vmullu.v8i16(<8 x i8> %tmp1, <8 x i8> %tmp2)
				227	ret <8 x i16> %tmp3
				228	}
				229
Bob Wilson	83815ae	2009-10-09 20:20:54 +0000	[diff] [blame]	230	define <4 x i32> @vmullu16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
				231	;CHECK: vmullu16:
				232	;CHECK: vmull.u16
				233	%tmp1 = load <4 x i16>* %A
				234	%tmp2 = load <4 x i16>* %B
Bob Wilson	d0b69cf	2010-09-01 23:50:19 +0000	[diff] [blame]	235	%tmp3 = zext <4 x i16> %tmp1 to <4 x i32>
				236	%tmp4 = zext <4 x i16> %tmp2 to <4 x i32>
				237	%tmp5 = mul <4 x i32> %tmp3, %tmp4
				238	ret <4 x i32> %tmp5
Bob Wilson	83815ae	2009-10-09 20:20:54 +0000	[diff] [blame]	239	}
				240
Evan Cheng	92e3916	2011-03-29 23:06:19 +0000	[diff] [blame^]	241	define <4 x i32> @vmullu16_int(<4 x i16>* %A, <4 x i16>* %B) nounwind {
				242	;CHECK: vmullu16_int:
				243	;CHECK: vmull.u16
				244	%tmp1 = load <4 x i16>* %A
				245	%tmp2 = load <4 x i16>* %B
				246	%tmp3 = call <4 x i32> @llvm.arm.neon.vmullu.v4i32(<4 x i16> %tmp1, <4 x i16> %tmp2)
				247	ret <4 x i32> %tmp3
				248	}
				249
Bob Wilson	83815ae	2009-10-09 20:20:54 +0000	[diff] [blame]	250	define <2 x i64> @vmullu32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
				251	;CHECK: vmullu32:
				252	;CHECK: vmull.u32
				253	%tmp1 = load <2 x i32>* %A
				254	%tmp2 = load <2 x i32>* %B
Bob Wilson	d0b69cf	2010-09-01 23:50:19 +0000	[diff] [blame]	255	%tmp3 = zext <2 x i32> %tmp1 to <2 x i64>
				256	%tmp4 = zext <2 x i32> %tmp2 to <2 x i64>
				257	%tmp5 = mul <2 x i64> %tmp3, %tmp4
				258	ret <2 x i64> %tmp5
Bob Wilson	83815ae	2009-10-09 20:20:54 +0000	[diff] [blame]	259	}
				260
Evan Cheng	92e3916	2011-03-29 23:06:19 +0000	[diff] [blame^]	261	define <2 x i64> @vmullu32_int(<2 x i32>* %A, <2 x i32>* %B) nounwind {
				262	;CHECK: vmullu32_int:
				263	;CHECK: vmull.u32
				264	%tmp1 = load <2 x i32>* %A
				265	%tmp2 = load <2 x i32>* %B
				266	%tmp3 = call <2 x i64> @llvm.arm.neon.vmullu.v2i64(<2 x i32> %tmp1, <2 x i32> %tmp2)
				267	ret <2 x i64> %tmp3
				268	}
				269
Bob Wilson	83815ae	2009-10-09 20:20:54 +0000	[diff] [blame]	270	define <8 x i16> @vmullp8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
				271	;CHECK: vmullp8:
				272	;CHECK: vmull.p8
				273	%tmp1 = load <8 x i8>* %A
				274	%tmp2 = load <8 x i8>* %B
				275	%tmp3 = call <8 x i16> @llvm.arm.neon.vmullp.v8i16(<8 x i8> %tmp1, <8 x i8> %tmp2)
				276	ret <8 x i16> %tmp3
				277	}
				278
				279	define arm_aapcs_vfpcc <4 x i32> @test_vmull_lanes16(<4 x i16> %arg0_int16x4_t, <4 x i16> %arg1_int16x4_t) nounwind readnone {
				280	entry:
				281	; CHECK: test_vmull_lanes16
				282	; CHECK: vmull.s16 q0, d0, d1[1]
				283	%0 = shufflevector <4 x i16> %arg1_int16x4_t, <4 x i16> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1> ; <<4 x i16>> [#uses=1]
Bob Wilson	d0b69cf	2010-09-01 23:50:19 +0000	[diff] [blame]	284	%1 = sext <4 x i16> %arg0_int16x4_t to <4 x i32>
				285	%2 = sext <4 x i16> %0 to <4 x i32>
				286	%3 = mul <4 x i32> %1, %2
				287	ret <4 x i32> %3
Bob Wilson	83815ae	2009-10-09 20:20:54 +0000	[diff] [blame]	288	}
				289
Evan Cheng	92e3916	2011-03-29 23:06:19 +0000	[diff] [blame^]	290	define arm_aapcs_vfpcc <4 x i32> @test_vmull_lanes16_int(<4 x i16> %arg0_int16x4_t, <4 x i16> %arg1_int16x4_t) nounwind readnone {
				291	entry:
				292	; CHECK: test_vmull_lanes16_int
				293	; CHECK: vmull.s16 q0, d0, d1[1]
				294	%0 = shufflevector <4 x i16> %arg1_int16x4_t, <4 x i16> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1> ; <<4 x i16>> [#uses=1]
				295	%1 = tail call <4 x i32> @llvm.arm.neon.vmulls.v4i32(<4 x i16> %arg0_int16x4_t, <4 x i16> %0) ; <<4 x i32>> [#uses=1]
				296	ret <4 x i32> %1
				297	}
				298
Bob Wilson	83815ae	2009-10-09 20:20:54 +0000	[diff] [blame]	299	define arm_aapcs_vfpcc <2 x i64> @test_vmull_lanes32(<2 x i32> %arg0_int32x2_t, <2 x i32> %arg1_int32x2_t) nounwind readnone {
				300	entry:
				301	; CHECK: test_vmull_lanes32
				302	; CHECK: vmull.s32 q0, d0, d1[1]
				303	%0 = shufflevector <2 x i32> %arg1_int32x2_t, <2 x i32> undef, <2 x i32> <i32 1, i32 1> ; <<2 x i32>> [#uses=1]
Bob Wilson	d0b69cf	2010-09-01 23:50:19 +0000	[diff] [blame]	304	%1 = sext <2 x i32> %arg0_int32x2_t to <2 x i64>
				305	%2 = sext <2 x i32> %0 to <2 x i64>
				306	%3 = mul <2 x i64> %1, %2
				307	ret <2 x i64> %3
Bob Wilson	83815ae	2009-10-09 20:20:54 +0000	[diff] [blame]	308	}
				309
Evan Cheng	92e3916	2011-03-29 23:06:19 +0000	[diff] [blame^]	310	define arm_aapcs_vfpcc <2 x i64> @test_vmull_lanes32_int(<2 x i32> %arg0_int32x2_t, <2 x i32> %arg1_int32x2_t) nounwind readnone {
				311	entry:
				312	; CHECK: test_vmull_lanes32_int
				313	; CHECK: vmull.s32 q0, d0, d1[1]
				314	%0 = shufflevector <2 x i32> %arg1_int32x2_t, <2 x i32> undef, <2 x i32> <i32 1, i32 1> ; <<2 x i32>> [#uses=1]
				315	%1 = tail call <2 x i64> @llvm.arm.neon.vmulls.v2i64(<2 x i32> %arg0_int32x2_t, <2 x i32> %0) ; <<2 x i64>> [#uses=1]
				316	ret <2 x i64> %1
				317	}
				318
Bob Wilson	83815ae	2009-10-09 20:20:54 +0000	[diff] [blame]	319	define arm_aapcs_vfpcc <4 x i32> @test_vmull_laneu16(<4 x i16> %arg0_uint16x4_t, <4 x i16> %arg1_uint16x4_t) nounwind readnone {
				320	entry:
				321	; CHECK: test_vmull_laneu16
				322	; CHECK: vmull.u16 q0, d0, d1[1]
				323	%0 = shufflevector <4 x i16> %arg1_uint16x4_t, <4 x i16> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1> ; <<4 x i16>> [#uses=1]
Bob Wilson	d0b69cf	2010-09-01 23:50:19 +0000	[diff] [blame]	324	%1 = zext <4 x i16> %arg0_uint16x4_t to <4 x i32>
				325	%2 = zext <4 x i16> %0 to <4 x i32>
				326	%3 = mul <4 x i32> %1, %2
				327	ret <4 x i32> %3
Bob Wilson	83815ae	2009-10-09 20:20:54 +0000	[diff] [blame]	328	}
				329
Evan Cheng	92e3916	2011-03-29 23:06:19 +0000	[diff] [blame^]	330	define arm_aapcs_vfpcc <4 x i32> @test_vmull_laneu16_int(<4 x i16> %arg0_uint16x4_t, <4 x i16> %arg1_uint16x4_t) nounwind readnone {
				331	entry:
				332	; CHECK: test_vmull_laneu16_int
				333	; CHECK: vmull.u16 q0, d0, d1[1]
				334	%0 = shufflevector <4 x i16> %arg1_uint16x4_t, <4 x i16> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1> ; <<4 x i16>> [#uses=1]
				335	%1 = tail call <4 x i32> @llvm.arm.neon.vmullu.v4i32(<4 x i16> %arg0_uint16x4_t, <4 x i16> %0) ; <<4 x i32>> [#uses=1]
				336	ret <4 x i32> %1
				337	}
				338
Bob Wilson	83815ae	2009-10-09 20:20:54 +0000	[diff] [blame]	339	define arm_aapcs_vfpcc <2 x i64> @test_vmull_laneu32(<2 x i32> %arg0_uint32x2_t, <2 x i32> %arg1_uint32x2_t) nounwind readnone {
				340	entry:
				341	; CHECK: test_vmull_laneu32
				342	; CHECK: vmull.u32 q0, d0, d1[1]
				343	%0 = shufflevector <2 x i32> %arg1_uint32x2_t, <2 x i32> undef, <2 x i32> <i32 1, i32 1> ; <<2 x i32>> [#uses=1]
Bob Wilson	d0b69cf	2010-09-01 23:50:19 +0000	[diff] [blame]	344	%1 = zext <2 x i32> %arg0_uint32x2_t to <2 x i64>
				345	%2 = zext <2 x i32> %0 to <2 x i64>
				346	%3 = mul <2 x i64> %1, %2
				347	ret <2 x i64> %3
Bob Wilson	83815ae	2009-10-09 20:20:54 +0000	[diff] [blame]	348	}
				349
Evan Cheng	92e3916	2011-03-29 23:06:19 +0000	[diff] [blame^]	350	define arm_aapcs_vfpcc <2 x i64> @test_vmull_laneu32_int(<2 x i32> %arg0_uint32x2_t, <2 x i32> %arg1_uint32x2_t) nounwind readnone {
				351	entry:
				352	; CHECK: test_vmull_laneu32_int
				353	; CHECK: vmull.u32 q0, d0, d1[1]
				354	%0 = shufflevector <2 x i32> %arg1_uint32x2_t, <2 x i32> undef, <2 x i32> <i32 1, i32 1> ; <<2 x i32>> [#uses=1]
				355	%1 = tail call <2 x i64> @llvm.arm.neon.vmullu.v2i64(<2 x i32> %arg0_uint32x2_t, <2 x i32> %0) ; <<2 x i64>> [#uses=1]
				356	ret <2 x i64> %1
				357	}
				358
				359	declare <8 x i16> @llvm.arm.neon.vmulls.v8i16(<8 x i8>, <8 x i8>) nounwind readnone
				360	declare <4 x i32> @llvm.arm.neon.vmulls.v4i32(<4 x i16>, <4 x i16>) nounwind readnone
				361	declare <2 x i64> @llvm.arm.neon.vmulls.v2i64(<2 x i32>, <2 x i32>) nounwind readnone
				362
				363	declare <8 x i16> @llvm.arm.neon.vmullu.v8i16(<8 x i8>, <8 x i8>) nounwind readnone
				364	declare <4 x i32> @llvm.arm.neon.vmullu.v4i32(<4 x i16>, <4 x i16>) nounwind readnone
				365	declare <2 x i64> @llvm.arm.neon.vmullu.v2i64(<2 x i32>, <2 x i32>) nounwind readnone
				366
Bob Wilson	83815ae	2009-10-09 20:20:54 +0000	[diff] [blame]	367	declare <8 x i16> @llvm.arm.neon.vmullp.v8i16(<8 x i8>, <8 x i8>) nounwind readnone
Bob Wilson	626613d	2010-11-23 19:38:38 +0000	[diff] [blame]	368
				369
				370	; Radar 8687140
				371	; VMULL needs to recognize BUILD_VECTORs with sign/zero-extended elements.
				372
				373	define <8 x i16> @vmull_extvec_s8(<8 x i8> %arg) nounwind {
				374	; CHECK: vmull_extvec_s8
				375	; CHECK: vmull.s8
				376	%tmp3 = sext <8 x i8> %arg to <8 x i16>
				377	%tmp4 = mul <8 x i16> %tmp3, <i16 -12, i16 -12, i16 -12, i16 -12, i16 -12, i16 -12, i16 -12, i16 -12>
				378	ret <8 x i16> %tmp4
				379	}
				380
				381	define <8 x i16> @vmull_extvec_u8(<8 x i8> %arg) nounwind {
				382	; CHECK: vmull_extvec_u8
				383	; CHECK: vmull.u8
				384	%tmp3 = zext <8 x i8> %arg to <8 x i16>
				385	%tmp4 = mul <8 x i16> %tmp3, <i16 12, i16 12, i16 12, i16 12, i16 12, i16 12, i16 12, i16 12>
				386	ret <8 x i16> %tmp4
				387	}
				388
				389	define <8 x i16> @vmull_noextvec_s8(<8 x i8> %arg) nounwind {
				390	; Do not use VMULL if the BUILD_VECTOR element values are too big.
				391	; CHECK: vmull_noextvec_s8
				392	; CHECK: vmovl.s8
				393	; CHECK: vmul.i16
				394	%tmp3 = sext <8 x i8> %arg to <8 x i16>
				395	%tmp4 = mul <8 x i16> %tmp3, <i16 -999, i16 -999, i16 -999, i16 -999, i16 -999, i16 -999, i16 -999, i16 -999>
				396	ret <8 x i16> %tmp4
				397	}
				398
				399	define <8 x i16> @vmull_noextvec_u8(<8 x i8> %arg) nounwind {
				400	; Do not use VMULL if the BUILD_VECTOR element values are too big.
				401	; CHECK: vmull_noextvec_u8
				402	; CHECK: vmovl.u8
				403	; CHECK: vmul.i16
				404	%tmp3 = zext <8 x i8> %arg to <8 x i16>
				405	%tmp4 = mul <8 x i16> %tmp3, <i16 999, i16 999, i16 999, i16 999, i16 999, i16 999, i16 999, i16 999>
				406	ret <8 x i16> %tmp4
				407	}
				408
				409	define <4 x i32> @vmull_extvec_s16(<4 x i16> %arg) nounwind {
				410	; CHECK: vmull_extvec_s16
				411	; CHECK: vmull.s16
				412	%tmp3 = sext <4 x i16> %arg to <4 x i32>
				413	%tmp4 = mul <4 x i32> %tmp3, <i32 -12, i32 -12, i32 -12, i32 -12>
				414	ret <4 x i32> %tmp4
				415	}
				416
				417	define <4 x i32> @vmull_extvec_u16(<4 x i16> %arg) nounwind {
				418	; CHECK: vmull_extvec_u16
				419	; CHECK: vmull.u16
				420	%tmp3 = zext <4 x i16> %arg to <4 x i32>
				421	%tmp4 = mul <4 x i32> %tmp3, <i32 1234, i32 1234, i32 1234, i32 1234>
				422	ret <4 x i32> %tmp4
				423	}
				424
				425	define <2 x i64> @vmull_extvec_s32(<2 x i32> %arg) nounwind {
				426	; CHECK: vmull_extvec_s32
				427	; CHECK: vmull.s32
				428	%tmp3 = sext <2 x i32> %arg to <2 x i64>
				429	%tmp4 = mul <2 x i64> %tmp3, <i64 -1234, i64 -1234>
				430	ret <2 x i64> %tmp4
				431	}
				432
				433	define <2 x i64> @vmull_extvec_u32(<2 x i32> %arg) nounwind {
				434	; CHECK: vmull_extvec_u32
				435	; CHECK: vmull.u32
				436	%tmp3 = zext <2 x i32> %arg to <2 x i64>
				437	%tmp4 = mul <2 x i64> %tmp3, <i64 1234, i64 1234>
				438	ret <2 x i64> %tmp4
				439	}
Evan Cheng	78fe9ab	2011-03-29 01:56:09 +0000	[diff] [blame]	440
				441	; rdar://9197392
				442	define void @distribue(i16* %dst, i8* %src, i32 %mul) nounwind {
				443	entry:
				444	; CHECK: distribue:
				445	; CHECK: vmull.u8 [[REG1:(q[0-9]+)]], d{{.*}}, [[REG2:(d[0-9]+)]]
				446	; CHECK: vmlal.u8 [[REG1]], d{{.*}}, [[REG2]]
				447	%0 = trunc i32 %mul to i8
				448	%1 = insertelement <8 x i8> undef, i8 %0, i32 0
				449	%2 = shufflevector <8 x i8> %1, <8 x i8> undef, <8 x i32> zeroinitializer
				450	%3 = tail call <16 x i8> @llvm.arm.neon.vld1.v16i8(i8* %src, i32 1)
				451	%4 = bitcast <16 x i8> %3 to <2 x double>
				452	%5 = extractelement <2 x double> %4, i32 1
				453	%6 = bitcast double %5 to <8 x i8>
				454	%7 = zext <8 x i8> %6 to <8 x i16>
				455	%8 = zext <8 x i8> %2 to <8 x i16>
				456	%9 = extractelement <2 x double> %4, i32 0
				457	%10 = bitcast double %9 to <8 x i8>
				458	%11 = zext <8 x i8> %10 to <8 x i16>
				459	%12 = add <8 x i16> %7, %11
				460	%13 = mul <8 x i16> %12, %8
				461	%14 = bitcast i16* %dst to i8*
				462	tail call void @llvm.arm.neon.vst1.v8i16(i8* %14, <8 x i16> %13, i32 2)
				463	ret void
				464	}
				465
				466	declare <16 x i8> @llvm.arm.neon.vld1.v16i8(i8*, i32) nounwind readonly
				467
				468	declare void @llvm.arm.neon.vst1.v8i16(i8*, <8 x i16>, i32) nounwind