Blame - llvm/test/CodeGen/AArch64/arm64-stp-aa.ll - toolchain/llvm-project

blob: 2a45745fedb5c40aeca91efacb6e790bd0f94787 [file] [log] [blame]

Chad Rosier	ce8e5ab	2015-05-21 21:36:46 +0000	[diff] [blame]	1	; RUN: llc < %s -march=arm64 -enable-misched=false -aarch64-stp-suppress=false -verify-machineinstrs \| FileCheck %s
				2
				3	; The next set of tests makes sure we can combine the second instruction into
				4	; the first.
				5
				6	; CHECK-LABEL: stp_int_aa
				7	; CHECK: stp w0, w1, [x2]
				8	; CHECK: ldr w0, [x2, #8]
				9	; CHECK: ret
				10	define i32 @stp_int_aa(i32 %a, i32 %b, i32* nocapture %p) nounwind {
				11	store i32 %a, i32* %p, align 4
				12	%ld.ptr = getelementptr inbounds i32, i32* %p, i64 2
				13	%tmp = load i32, i32* %ld.ptr, align 4
				14	%add.ptr = getelementptr inbounds i32, i32* %p, i64 1
				15	store i32 %b, i32* %add.ptr, align 4
				16	ret i32 %tmp
				17	}
				18
				19	; CHECK-LABEL: stp_long_aa
				20	; CHECK: stp x0, x1, [x2]
				21	; CHECK: ldr x0, [x2, #16]
				22	; CHECK: ret
				23	define i64 @stp_long_aa(i64 %a, i64 %b, i64* nocapture %p) nounwind {
				24	store i64 %a, i64* %p, align 8
				25	%ld.ptr = getelementptr inbounds i64, i64* %p, i64 2
				26	%tmp = load i64, i64* %ld.ptr, align 4
				27	%add.ptr = getelementptr inbounds i64, i64* %p, i64 1
				28	store i64 %b, i64* %add.ptr, align 8
				29	ret i64 %tmp
				30	}
				31
				32	; CHECK-LABEL: stp_float_aa
				33	; CHECK: stp s0, s1, [x0]
				34	; CHECK: ldr s0, [x0, #8]
				35	; CHECK: ret
				36	define float @stp_float_aa(float %a, float %b, float* nocapture %p) nounwind {
				37	store float %a, float* %p, align 4
				38	%ld.ptr = getelementptr inbounds float, float* %p, i64 2
				39	%tmp = load float, float* %ld.ptr, align 4
				40	%add.ptr = getelementptr inbounds float, float* %p, i64 1
				41	store float %b, float* %add.ptr, align 4
				42	ret float %tmp
				43	}
				44
				45	; CHECK-LABEL: stp_double_aa
				46	; CHECK: stp d0, d1, [x0]
				47	; CHECK: ldr d0, [x0, #16]
				48	; CHECK: ret
				49	define double @stp_double_aa(double %a, double %b, double* nocapture %p) nounwind {
				50	store double %a, double* %p, align 8
				51	%ld.ptr = getelementptr inbounds double, double* %p, i64 2
				52	%tmp = load double, double* %ld.ptr, align 4
				53	%add.ptr = getelementptr inbounds double, double* %p, i64 1
				54	store double %b, double* %add.ptr, align 8
				55	ret double %tmp
				56	}
				57
				58	; The next set of tests makes sure we can combine the first instruction into
				59	; the second.
				60
				61	; CHECK-LABEL: stp_int_aa_after
				62	; CHECK: ldr w0, [x3, #4]
				63	; CHECK: stp w1, w2, [x3]
				64	; CHECK: ret
				65	define i32 @stp_int_aa_after(i32 %w0, i32 %a, i32 %b, i32* nocapture %p) nounwind {
				66	store i32 %a, i32* %p, align 4
				67	%ld.ptr = getelementptr inbounds i32, i32* %p, i64 1
				68	%tmp = load i32, i32* %ld.ptr, align 4
				69	%add.ptr = getelementptr inbounds i32, i32* %p, i64 1
				70	store i32 %b, i32* %add.ptr, align 4
				71	ret i32 %tmp
				72	}
				73
				74	; CHECK-LABEL: stp_long_aa_after
				75	; CHECK: ldr x0, [x3, #8]
				76	; CHECK: stp x1, x2, [x3]
				77	; CHECK: ret
				78	define i64 @stp_long_aa_after(i64 %x0, i64 %a, i64 %b, i64* nocapture %p) nounwind {
				79	store i64 %a, i64* %p, align 8
				80	%ld.ptr = getelementptr inbounds i64, i64* %p, i64 1
				81	%tmp = load i64, i64* %ld.ptr, align 4
				82	%add.ptr = getelementptr inbounds i64, i64* %p, i64 1
				83	store i64 %b, i64* %add.ptr, align 8
				84	ret i64 %tmp
				85	}
				86
				87	; CHECK-LABEL: stp_float_aa_after
				88	; CHECK: ldr s0, [x0, #4]
				89	; CHECK: stp s1, s2, [x0]
				90	; CHECK: ret
				91	define float @stp_float_aa_after(float %s0, float %a, float %b, float* nocapture %p) nounwind {
				92	store float %a, float* %p, align 4
				93	%ld.ptr = getelementptr inbounds float, float* %p, i64 1
				94	%tmp = load float, float* %ld.ptr, align 4
				95	%add.ptr = getelementptr inbounds float, float* %p, i64 1
				96	store float %b, float* %add.ptr, align 4
				97	ret float %tmp
				98	}
				99
				100	; CHECK-LABEL: stp_double_aa_after
				101	; CHECK: ldr d0, [x0, #8]
				102	; CHECK: stp d1, d2, [x0]
				103	; CHECK: ret
				104	define double @stp_double_aa_after(double %d0, double %a, double %b, double* nocapture %p) nounwind {
				105	store double %a, double* %p, align 8
				106	%ld.ptr = getelementptr inbounds double, double* %p, i64 1
				107	%tmp = load double, double* %ld.ptr, align 4
				108	%add.ptr = getelementptr inbounds double, double* %p, i64 1
				109	store double %b, double* %add.ptr, align 8
				110	ret double %tmp
				111	}
Chad Rosier	1fbe9bc	2016-04-15 18:09:10 +0000	[diff] [blame]	112
				113	; Check that the stores %c and %d are paired after the fadd instruction,
				114	; and then the stores %a and %d are paired after proving that they do not
				115	; depend on the the (%c, %d) pair.
				116	;
				117	; CHECK-LABEL: st1:
				118	; CHECK: stp q0, q1, [x{{[0-9]+}}]
				119	; CHECK: fadd
				120	; CHECK: stp q2, q0, [x{{[0-9]+}}, #32]
				121	define void @st1(<4 x float> %a, <4 x float> %b, <4 x float> %c, <4 x float> %d, float* %base, i64 %index) {
				122	entry:
				123	%a0 = getelementptr inbounds float, float* %base, i64 %index
				124	%b0 = getelementptr float, float* %a0, i64 4
				125	%c0 = getelementptr float, float* %a0, i64 8
				126	%d0 = getelementptr float, float* %a0, i64 12
				127
				128	%a1 = bitcast float* %a0 to <4 x float>*
				129	%b1 = bitcast float* %b0 to <4 x float>*
				130	%c1 = bitcast float* %c0 to <4 x float>*
				131	%d1 = bitcast float* %d0 to <4 x float>*
				132
				133	store <4 x float> %c, <4 x float> * %c1, align 4
				134	store <4 x float> %a, <4 x float> * %a1, align 4
				135
				136	; This fadd forces the compiler to pair %c and %e after fadd, and leave the
				137	; stores %a and %b separated by a stp. The dependence analysis needs then to
				138	; prove that it is safe to move %b past the stp to be paired with %a.
				139	%e = fadd fast <4 x float> %d, %a
				140
				141	store <4 x float> %e, <4 x float>* %d1, align 4
				142	store <4 x float> %b, <4 x float>* %b1, align 4
				143
				144	ret void
				145	}