Blame - llvm/test/CodeGen/ARM/lowerMUL-newload.ll - toolchain/llvm-project

blob: 93d765cba1168cef45bb940647dafbfdb09ab846 [file] [log] [blame]

Huihui Zhang	98240e9	2017-04-06 20:22:51 +0000	[diff] [blame]	1	; RUN: llc < %s -mtriple=arm-eabi -mcpu=krait \| FileCheck %s
				2
				3	define void @func1(i16* %a, i16* %b, i16* %c) {
				4	entry:
				5	; The test case trying to vectorize the pseudo code below.
				6	; a[i] = b[i] + c[i];
				7	; b[i] = a[i] * c[i];
				8	; a[i] = b[i] + a[i] * c[i];
				9	;
				10	; Checking that vector load a[i] for "a[i] = b[i] + a[i] * c[i]" is
				11	; scheduled before the first vector store to "a[i] = b[i] + c[i]".
				12	; Checking that there is no vector load a[i] scheduled between the vector
				13	; stores to a[i], otherwise the load of a[i] will be polluted by the first
				14	; vector store to a[i].
				15	;
				16	; This test case check that the chain information is updated during
				17	; lowerMUL for the new created Load SDNode.
				18
				19	; CHECK: vldr {{.*}} [r0, #16]
				20	; CHECK: vstr {{.*}} [r0, #16]
				21	; CHECK-NOT: vldr {{.*}} [r0, #16]
				22	; CHECK: vstr {{.*}} [r0, #16]
				23
				24	%scevgep0 = getelementptr i16, i16* %a, i32 8
				25	%vector_ptr0 = bitcast i16* %scevgep0 to <4 x i16>*
				26	%vec0 = load <4 x i16>, <4 x i16>* %vector_ptr0, align 8
				27	%scevgep1 = getelementptr i16, i16* %b, i32 8
				28	%vector_ptr1 = bitcast i16* %scevgep1 to <4 x i16>*
				29	%vec1 = load <4 x i16>, <4 x i16>* %vector_ptr1, align 8
				30	%0 = zext <4 x i16> %vec1 to <4 x i32>
				31	%scevgep2 = getelementptr i16, i16* %c, i32 8
				32	%vector_ptr2 = bitcast i16* %scevgep2 to <4 x i16>*
				33	%vec2 = load <4 x i16>, <4 x i16>* %vector_ptr2, align 8
				34	%1 = sext <4 x i16> %vec2 to <4 x i32>
				35	%vec3 = add <4 x i32> %1, %0
				36	%2 = trunc <4 x i32> %vec3 to <4 x i16>
				37	%scevgep3 = getelementptr i16, i16* %a, i32 8
				38	%vector_ptr3 = bitcast i16* %scevgep3 to <4 x i16>*
				39	store <4 x i16> %2, <4 x i16>* %vector_ptr3, align 8
				40	%vector_ptr4 = bitcast i16* %scevgep2 to <4 x i16>*
				41	%vec4 = load <4 x i16>, <4 x i16>* %vector_ptr4, align 8
				42	%3 = sext <4 x i16> %vec4 to <4 x i32>
				43	%vec5 = mul <4 x i32> %3, %vec3
				44	%4 = trunc <4 x i32> %vec5 to <4 x i16>
				45	%vector_ptr5 = bitcast i16* %scevgep1 to <4 x i16>*
				46	store <4 x i16> %4, <4 x i16>* %vector_ptr5, align 8
				47	%5 = sext <4 x i16> %vec0 to <4 x i32>
				48	%vector_ptr6 = bitcast i16* %scevgep2 to <4 x i16>*
				49	%vec6 = load <4 x i16>, <4 x i16>* %vector_ptr6, align 8
				50	%6 = sext <4 x i16> %vec6 to <4 x i32>
				51	%vec7 = mul <4 x i32> %6, %5
				52	%vec8 = add <4 x i32> %vec7, %vec5
				53	%7 = trunc <4 x i32> %vec8 to <4 x i16>
				54	%vector_ptr7 = bitcast i16* %scevgep3 to <4 x i16>*
				55	store <4 x i16> %7, <4 x i16>* %vector_ptr7, align 8
				56	ret void
				57	}
				58
				59	define void @func2(i16* %a, i16* %b, i16* %c) {
				60	entry:
				61	; The test case trying to vectorize the pseudo code below.
				62	; a[i] = b[i] + c[i];
				63	; b[i] = a[i] * c[i];
				64	; a[i] = b[i] + a[i] * c[i] + a[i];
				65	;
				66	; Checking that vector load a[i] for "a[i] = b[i] + a[i] * c[i] + a[i]"
				67	; is scheduled before the first vector store to "a[i] = b[i] + c[i]".
				68	; Checking that there is no vector load a[i] scheduled between the first
				69	; vector store to a[i] and the vector add of a[i], otherwise the load of
				70	; a[i] will be polluted by the first vector store to a[i].
				71	;
				72	; This test case check that both the chain and value of the new created
				73	; Load SDNode are updated during lowerMUL.
				74
				75	; CHECK: vldr {{.*}} [r0, #16]
				76	; CHECK: vstr {{.*}} [r0, #16]
				77	; CHECK-NOT: vldr {{.*}} [r0, #16]
				78	; CHECK: vaddw.s16
				79	; CHECK: vstr {{.*}} [r0, #16]
				80
				81	%scevgep0 = getelementptr i16, i16* %a, i32 8
				82	%vector_ptr0 = bitcast i16* %scevgep0 to <4 x i16>*
				83	%vec0 = load <4 x i16>, <4 x i16>* %vector_ptr0, align 8
				84	%scevgep1 = getelementptr i16, i16* %b, i32 8
				85	%vector_ptr1 = bitcast i16* %scevgep1 to <4 x i16>*
				86	%vec1 = load <4 x i16>, <4 x i16>* %vector_ptr1, align 8
				87	%0 = zext <4 x i16> %vec1 to <4 x i32>
				88	%scevgep2 = getelementptr i16, i16* %c, i32 8
				89	%vector_ptr2 = bitcast i16* %scevgep2 to <4 x i16>*
				90	%vec2 = load <4 x i16>, <4 x i16>* %vector_ptr2, align 8
				91	%1 = sext <4 x i16> %vec2 to <4 x i32>
				92	%vec3 = add <4 x i32> %1, %0
				93	%2 = trunc <4 x i32> %vec3 to <4 x i16>
				94	%scevgep3 = getelementptr i16, i16* %a, i32 8
				95	%vector_ptr3 = bitcast i16* %scevgep3 to <4 x i16>*
				96	store <4 x i16> %2, <4 x i16>* %vector_ptr3, align 8
				97	%vector_ptr4 = bitcast i16* %scevgep2 to <4 x i16>*
				98	%vec4 = load <4 x i16>, <4 x i16>* %vector_ptr4, align 8
				99	%3 = sext <4 x i16> %vec4 to <4 x i32>
				100	%vec5 = mul <4 x i32> %3, %vec3
				101	%4 = trunc <4 x i32> %vec5 to <4 x i16>
				102	%vector_ptr5 = bitcast i16* %scevgep1 to <4 x i16>*
				103	store <4 x i16> %4, <4 x i16>* %vector_ptr5, align 8
				104	%5 = sext <4 x i16> %vec0 to <4 x i32>
				105	%vector_ptr6 = bitcast i16* %scevgep2 to <4 x i16>*
				106	%vec6 = load <4 x i16>, <4 x i16>* %vector_ptr6, align 8
				107	%6 = sext <4 x i16> %vec6 to <4 x i32>
				108	%vec7 = mul <4 x i32> %6, %5
				109	%vec8 = add <4 x i32> %vec7, %vec5
				110	%vec9 = add <4 x i32> %vec8, %5
				111	%7 = trunc <4 x i32> %vec9 to <4 x i16>
				112	%vector_ptr7 = bitcast i16* %scevgep3 to <4 x i16>*
				113	store <4 x i16> %7, <4 x i16>* %vector_ptr7, align 8
				114	ret void
				115	}