Blame - llvm/test/Transforms/LoopVectorize/induction.ll - toolchain/llvm-project

blob: 6213b4a7c2e9d10f3d29af10554a1301c2b957a3 [file] [log] [blame]

Sanjay Patel	b653de1	2014-09-10 17:58:16 +0000	[diff] [blame]	1	; RUN: opt < %s -loop-vectorize -force-vector-interleave=1 -force-vector-width=2 -S \| FileCheck %s
Michael Kuperstein	3a3c64d	2016-06-01 17:16:46 +0000	[diff] [blame]	2	; RUN: opt < %s -loop-vectorize -force-vector-interleave=1 -force-vector-width=2 -instcombine -S \| FileCheck %s --check-prefix=IND
				3	; RUN: opt < %s -loop-vectorize -force-vector-interleave=2 -force-vector-width=2 -instcombine -S \| FileCheck %s --check-prefix=UNROLL
Matthew Simpson	3c3b4a2	2016-07-14 14:36:06 +0000	[diff] [blame]	4	; RUN: opt < %s -loop-vectorize -force-vector-interleave=2 -force-vector-width=2 -S \| FileCheck %s --check-prefix=UNROLL-NO-IC
Matthew Simpson	433cb1d	2016-07-06 14:26:59 +0000	[diff] [blame]	5	; RUN: opt < %s -loop-vectorize -force-vector-interleave=2 -force-vector-width=4 -enable-interleaved-mem-accesses -instcombine -S \| FileCheck %s --check-prefix=INTERLEAVE
Arnold Schwaighofer	2e7a922	2013-05-14 00:21:18 +0000	[diff] [blame]	6
				7	target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
				8
				9	; Make sure that we can handle multiple integer induction variables.
Matt Arsenault	e64c7c7	2013-10-02 20:29:00 +0000	[diff] [blame]	10	; CHECK-LABEL: @multi_int_induction(
Arnold Schwaighofer	2e7a922	2013-05-14 00:21:18 +0000	[diff] [blame]	11	; CHECK: vector.body:
				12	; CHECK: %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
James Molloy	c07701b	2015-09-02 10:14:54 +0000	[diff] [blame]	13	; CHECK: %[[VAR:.*]] = trunc i64 %index to i32
Arnold Schwaighofer	2e7a922	2013-05-14 00:21:18 +0000	[diff] [blame]	14	; CHECK: %offset.idx = add i32 190, %[[VAR]]
				15	define void @multi_int_induction(i32* %A, i32 %N) {
				16	for.body.lr.ph:
				17	br label %for.body
				18
				19	for.body:
				20	%indvars.iv = phi i64 [ 0, %for.body.lr.ph ], [ %indvars.iv.next, %for.body ]
				21	%count.09 = phi i32 [ 190, %for.body.lr.ph ], [ %inc, %for.body ]
David Blaikie	79e6c74	2015-02-27 19:29:02 +0000	[diff] [blame]	22	%arrayidx2 = getelementptr inbounds i32, i32* %A, i64 %indvars.iv
Arnold Schwaighofer	2e7a922	2013-05-14 00:21:18 +0000	[diff] [blame]	23	store i32 %count.09, i32* %arrayidx2, align 4
				24	%inc = add nsw i32 %count.09, 1
				25	%indvars.iv.next = add i64 %indvars.iv, 1
				26	%lftr.wideiv = trunc i64 %indvars.iv.next to i32
				27	%exitcond = icmp ne i32 %lftr.wideiv, %N
				28	br i1 %exitcond, label %for.body, label %for.end
				29
				30	for.end:
				31	ret void
				32	}
				33
Arnold Schwaighofer	a846a7f	2013-11-01 22:18:19 +0000	[diff] [blame]	34	; Make sure we remove unneeded vectorization of induction variables.
				35	; In order for instcombine to cleanup the vectorized induction variables that we
				36	; create in the loop vectorizer we need to perform some form of redundancy
				37	; elimination to get rid of multiple uses.
				38
				39	; IND-LABEL: scalar_use
				40
				41	; IND: br label %vector.body
				42	; IND: vector.body:
				43	; Vectorized induction variable.
				44	; IND-NOT: insertelement <2 x i64>
				45	; IND-NOT: shufflevector <2 x i64>
				46	; IND: br {{.*}}, label %vector.body
				47
				48	define void @scalar_use(float* %a, float %b, i64 %offset, i64 %offset2, i64 %n) {
				49	entry:
				50	br label %for.body
				51
				52	for.body:
				53	%iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
				54	%ind.sum = add i64 %iv, %offset
David Blaikie	79e6c74	2015-02-27 19:29:02 +0000	[diff] [blame]	55	%arr.idx = getelementptr inbounds float, float* %a, i64 %ind.sum
David Blaikie	a79ac14	2015-02-27 21:17:42 +0000	[diff] [blame]	56	%l1 = load float, float* %arr.idx, align 4
Arnold Schwaighofer	a846a7f	2013-11-01 22:18:19 +0000	[diff] [blame]	57	%ind.sum2 = add i64 %iv, %offset2
David Blaikie	79e6c74	2015-02-27 19:29:02 +0000	[diff] [blame]	58	%arr.idx2 = getelementptr inbounds float, float* %a, i64 %ind.sum2
David Blaikie	a79ac14	2015-02-27 21:17:42 +0000	[diff] [blame]	59	%l2 = load float, float* %arr.idx2, align 4
Arnold Schwaighofer	a846a7f	2013-11-01 22:18:19 +0000	[diff] [blame]	60	%m = fmul fast float %b, %l2
				61	%ad = fadd fast float %l1, %m
				62	store float %ad, float* %arr.idx, align 4
				63	%iv.next = add nuw nsw i64 %iv, 1
				64	%exitcond = icmp eq i64 %iv.next, %n
				65	br i1 %exitcond, label %loopexit, label %for.body
				66
				67	loopexit:
				68	ret void
				69	}
Arnold Schwaighofer	b72cb4e	2013-11-18 13:14:32 +0000	[diff] [blame]	70
Matthew Simpson	433cb1d	2016-07-06 14:26:59 +0000	[diff] [blame]	71	; Make sure we don't create a vector induction phi node that is unused.
				72	; Scalarize the step vectors instead.
				73	;
				74	; for (int i = 0; i < n; ++i)
				75	; sum += a[i];
				76	;
Matthew Simpson	3c3b4a2	2016-07-14 14:36:06 +0000	[diff] [blame]	77	; CHECK-LABEL: @scalarize_induction_variable_01(
				78	; CHECK: vector.body:
				79	; CHECK: %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
				80	; CHECK: %[[i0:.+]] = add i64 %index, 0
Matthew Simpson	3c3b4a2	2016-07-14 14:36:06 +0000	[diff] [blame]	81	; CHECK: getelementptr inbounds i64, i64* %a, i64 %[[i0]]
Matthew Simpson	3c3b4a2	2016-07-14 14:36:06 +0000	[diff] [blame]	82	;
				83	; UNROLL-NO-IC-LABEL: @scalarize_induction_variable_01(
				84	; UNROLL-NO-IC: vector.body:
				85	; UNROLL-NO-IC: %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
				86	; UNROLL-NO-IC: %[[i0:.+]] = add i64 %index, 0
Matthew Simpson	3c3b4a2	2016-07-14 14:36:06 +0000	[diff] [blame]	87	; UNROLL-NO-IC: %[[i2:.+]] = add i64 %index, 2
Matthew Simpson	3c3b4a2	2016-07-14 14:36:06 +0000	[diff] [blame]	88	; UNROLL-NO-IC: getelementptr inbounds i64, i64* %a, i64 %[[i0]]
Matthew Simpson	3c3b4a2	2016-07-14 14:36:06 +0000	[diff] [blame]	89	; UNROLL-NO-IC: getelementptr inbounds i64, i64* %a, i64 %[[i2]]
Matthew Simpson	3c3b4a2	2016-07-14 14:36:06 +0000	[diff] [blame]	90	;
Matthew Simpson	433cb1d	2016-07-06 14:26:59 +0000	[diff] [blame]	91	; IND-LABEL: @scalarize_induction_variable_01(
				92	; IND: vector.body:
				93	; IND: %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
				94	; IND-NOT: add i64 {{.*}}, 2
				95	; IND: getelementptr inbounds i64, i64* %a, i64 %index
				96	;
				97	; UNROLL-LABEL: @scalarize_induction_variable_01(
				98	; UNROLL: vector.body:
				99	; UNROLL: %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
				100	; UNROLL-NOT: add i64 {{.*}}, 4
				101	; UNROLL: %[[g1:.+]] = getelementptr inbounds i64, i64* %a, i64 %index
				102	; UNROLL: getelementptr i64, i64* %[[g1]], i64 2
				103
				104	define i64 @scalarize_induction_variable_01(i64 *%a, i64 %n) {
				105	entry:
				106	br label %for.body
				107
				108	for.body:
				109	%i = phi i64 [ %i.next, %for.body ], [ 0, %entry ]
				110	%sum = phi i64 [ %2, %for.body ], [ 0, %entry ]
				111	%0 = getelementptr inbounds i64, i64* %a, i64 %i
				112	%1 = load i64, i64* %0, align 8
				113	%2 = add i64 %1, %sum
				114	%i.next = add nuw nsw i64 %i, 1
				115	%cond = icmp slt i64 %i.next, %n
				116	br i1 %cond, label %for.body, label %for.end
				117
				118	for.end:
				119	%3 = phi i64 [ %2, %for.body ]
				120	ret i64 %3
				121	}
				122
				123	; Make sure we scalarize the step vectors used for the pointer arithmetic. We
				124	; can't easily simplify vectorized step vectors.
				125	;
				126	; float s = 0;
				127	; for (int i ; 0; i < n; i += 8)
				128	; s += (a[i] + b[i] + 1.0f);
				129	;
Matthew Simpson	3c3b4a2	2016-07-14 14:36:06 +0000	[diff] [blame]	130	; CHECK-LABEL: @scalarize_induction_variable_02(
				131	; CHECK: vector.body:
				132	; CHECK: %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
				133	; CHECK: %offset.idx = shl i64 %index, 3
				134	; CHECK: %[[i0:.+]] = add i64 %offset.idx, 0
				135	; CHECK: %[[i1:.+]] = add i64 %offset.idx, 8
				136	; CHECK: getelementptr inbounds float, float* %a, i64 %[[i0]]
				137	; CHECK: getelementptr inbounds float, float* %a, i64 %[[i1]]
				138	; CHECK: getelementptr inbounds float, float* %b, i64 %[[i0]]
				139	; CHECK: getelementptr inbounds float, float* %b, i64 %[[i1]]
				140	;
				141	; UNROLL-NO-IC-LABEL: @scalarize_induction_variable_02(
				142	; UNROLL-NO-IC: vector.body:
				143	; UNROLL-NO-IC: %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
				144	; UNROLL-NO-IC: %offset.idx = shl i64 %index, 3
				145	; UNROLL-NO-IC: %[[i0:.+]] = add i64 %offset.idx, 0
				146	; UNROLL-NO-IC: %[[i1:.+]] = add i64 %offset.idx, 8
				147	; UNROLL-NO-IC: %[[i2:.+]] = add i64 %offset.idx, 16
				148	; UNROLL-NO-IC: %[[i3:.+]] = add i64 %offset.idx, 24
				149	; UNROLL-NO-IC: getelementptr inbounds float, float* %a, i64 %[[i0]]
				150	; UNROLL-NO-IC: getelementptr inbounds float, float* %a, i64 %[[i1]]
				151	; UNROLL-NO-IC: getelementptr inbounds float, float* %a, i64 %[[i2]]
				152	; UNROLL-NO-IC: getelementptr inbounds float, float* %a, i64 %[[i3]]
				153	; UNROLL-NO-IC: getelementptr inbounds float, float* %b, i64 %[[i0]]
				154	; UNROLL-NO-IC: getelementptr inbounds float, float* %b, i64 %[[i1]]
				155	; UNROLL-NO-IC: getelementptr inbounds float, float* %b, i64 %[[i2]]
				156	; UNROLL-NO-IC: getelementptr inbounds float, float* %b, i64 %[[i3]]
				157	;
Matthew Simpson	433cb1d	2016-07-06 14:26:59 +0000	[diff] [blame]	158	; IND-LABEL: @scalarize_induction_variable_02(
				159	; IND: vector.body:
				160	; IND: %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
				161	; IND: %[[i0:.+]] = shl i64 %index, 3
				162	; IND: %[[i1:.+]] = or i64 %[[i0]], 8
				163	; IND: getelementptr inbounds float, float* %a, i64 %[[i0]]
				164	; IND: getelementptr inbounds float, float* %a, i64 %[[i1]]
				165	;
				166	; UNROLL-LABEL: @scalarize_induction_variable_02(
				167	; UNROLL: vector.body:
				168	; UNROLL: %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
				169	; UNROLL: %[[i0:.+]] = shl i64 %index, 3
				170	; UNROLL: %[[i1:.+]] = or i64 %[[i0]], 8
				171	; UNROLL: %[[i2:.+]] = or i64 %[[i0]], 16
				172	; UNROLL: %[[i3:.+]] = or i64 %[[i0]], 24
				173	; UNROLL: getelementptr inbounds float, float* %a, i64 %[[i0]]
				174	; UNROLL: getelementptr inbounds float, float* %a, i64 %[[i1]]
				175	; UNROLL: getelementptr inbounds float, float* %a, i64 %[[i2]]
				176	; UNROLL: getelementptr inbounds float, float* %a, i64 %[[i3]]
				177
				178	define float @scalarize_induction_variable_02(float* %a, float* %b, i64 %n) {
				179	entry:
				180	br label %for.body
				181
				182	for.body:
				183	%i = phi i64 [ 0, %entry ], [ %i.next, %for.body ]
				184	%s = phi float [ 0.0, %entry ], [ %6, %for.body ]
				185	%0 = getelementptr inbounds float, float* %a, i64 %i
				186	%1 = load float, float* %0, align 4
				187	%2 = getelementptr inbounds float, float* %b, i64 %i
				188	%3 = load float, float* %2, align 4
				189	%4 = fadd fast float %s, 1.0
				190	%5 = fadd fast float %4, %1
				191	%6 = fadd fast float %5, %3
				192	%i.next = add nuw nsw i64 %i, 8
				193	%cond = icmp slt i64 %i.next, %n
				194	br i1 %cond, label %for.body, label %for.end
				195
				196	for.end:
				197	%s.lcssa = phi float [ %6, %for.body ]
				198	ret float %s.lcssa
				199	}
				200
				201	; Make sure we scalarize the step vectors used for the pointer arithmetic. We
				202	; can't easily simplify vectorized step vectors. (Interleaved accesses.)
				203	;
				204	; for (int i = 0; i < n; ++i)
				205	; a[i].f ^= y;
				206	;
				207	; INTERLEAVE-LABEL: @scalarize_induction_variable_03(
				208	; INTERLEAVE: vector.body:
				209	; INTERLEAVE: %[[i0:.+]] = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
				210	; INTERLEAVE: %[[i1:.+]] = or i64 %[[i0]], 1
				211	; INTERLEAVE: %[[i2:.+]] = or i64 %[[i0]], 2
				212	; INTERLEAVE: %[[i3:.+]] = or i64 %[[i0]], 3
				213	; INTERLEAVE: %[[i4:.+]] = or i64 %[[i0]], 4
				214	; INTERLEAVE: %[[i5:.+]] = or i64 %[[i0]], 5
				215	; INTERLEAVE: %[[i6:.+]] = or i64 %[[i0]], 6
				216	; INTERLEAVE: %[[i7:.+]] = or i64 %[[i0]], 7
Matthew Simpson	18d8898	2016-08-02 15:25:16 +0000	[diff] [blame]	217	; INTERLEAVE: getelementptr inbounds %pair.i32, %pair.i32* %p, i64 %[[i0]], i32 1
				218	; INTERLEAVE: getelementptr inbounds %pair.i32, %pair.i32* %p, i64 %[[i1]], i32 1
				219	; INTERLEAVE: getelementptr inbounds %pair.i32, %pair.i32* %p, i64 %[[i2]], i32 1
				220	; INTERLEAVE: getelementptr inbounds %pair.i32, %pair.i32* %p, i64 %[[i3]], i32 1
				221	; INTERLEAVE: getelementptr inbounds %pair.i32, %pair.i32* %p, i64 %[[i4]], i32 1
				222	; INTERLEAVE: getelementptr inbounds %pair.i32, %pair.i32* %p, i64 %[[i5]], i32 1
				223	; INTERLEAVE: getelementptr inbounds %pair.i32, %pair.i32* %p, i64 %[[i6]], i32 1
				224	; INTERLEAVE: getelementptr inbounds %pair.i32, %pair.i32* %p, i64 %[[i7]], i32 1
Matthew Simpson	433cb1d	2016-07-06 14:26:59 +0000	[diff] [blame]	225
Matthew Simpson	18d8898	2016-08-02 15:25:16 +0000	[diff] [blame]	226	%pair.i32 = type { i32, i32 }
				227	define void @scalarize_induction_variable_03(%pair.i32 *%p, i32 %y, i64 %n) {
Matthew Simpson	433cb1d	2016-07-06 14:26:59 +0000	[diff] [blame]	228	entry:
				229	br label %for.body
				230
				231	for.body:
				232	%i = phi i64 [ %i.next, %for.body ], [ 0, %entry ]
Matthew Simpson	18d8898	2016-08-02 15:25:16 +0000	[diff] [blame]	233	%f = getelementptr inbounds %pair.i32, %pair.i32* %p, i64 %i, i32 1
Matthew Simpson	433cb1d	2016-07-06 14:26:59 +0000	[diff] [blame]	234	%0 = load i32, i32* %f, align 8
				235	%1 = xor i32 %0, %y
				236	store i32 %1, i32* %f, align 8
				237	%i.next = add nuw nsw i64 %i, 1
				238	%cond = icmp slt i64 %i.next, %n
				239	br i1 %cond, label %for.body, label %for.end
				240
				241	for.end:
				242	ret void
				243	}
Arnold Schwaighofer	b72cb4e	2013-11-18 13:14:32 +0000	[diff] [blame]	244
Matthew Simpson	58f5628	2016-08-02 14:29:41 +0000	[diff] [blame]	245	; Make sure we scalarize the step vectors used for the pointer arithmetic. We
				246	; can't easily simplify vectorized step vectors. (Interleaved accesses.)
				247	;
				248	; for (int i = 0; i < n; ++i)
				249	; p[i].f = a[i * 4]
				250	;
				251	; INTERLEAVE-LABEL: @scalarize_induction_variable_04(
				252	; INTERLEAVE: vector.body:
				253	; INTERLEAVE: %[[i0:.+]] = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
				254	; INTERLEAVE: %[[i1:.+]] = or i64 %[[i0]], 1
				255	; INTERLEAVE: %[[i2:.+]] = or i64 %[[i0]], 2
				256	; INTERLEAVE: %[[i3:.+]] = or i64 %[[i0]], 3
				257	; INTERLEAVE: %[[i4:.+]] = or i64 %[[i0]], 4
				258	; INTERLEAVE: %[[i5:.+]] = or i64 %[[i0]], 5
				259	; INTERLEAVE: %[[i6:.+]] = or i64 %[[i0]], 6
				260	; INTERLEAVE: %[[i7:.+]] = or i64 %[[i0]], 7
Matthew Simpson	18d8898	2016-08-02 15:25:16 +0000	[diff] [blame]	261	; INTERLEAVE: getelementptr inbounds %pair.i32, %pair.i32* %p, i64 %[[i0]], i32 1
				262	; INTERLEAVE: getelementptr inbounds %pair.i32, %pair.i32* %p, i64 %[[i1]], i32 1
				263	; INTERLEAVE: getelementptr inbounds %pair.i32, %pair.i32* %p, i64 %[[i2]], i32 1
				264	; INTERLEAVE: getelementptr inbounds %pair.i32, %pair.i32* %p, i64 %[[i3]], i32 1
				265	; INTERLEAVE: getelementptr inbounds %pair.i32, %pair.i32* %p, i64 %[[i4]], i32 1
				266	; INTERLEAVE: getelementptr inbounds %pair.i32, %pair.i32* %p, i64 %[[i5]], i32 1
				267	; INTERLEAVE: getelementptr inbounds %pair.i32, %pair.i32* %p, i64 %[[i6]], i32 1
				268	; INTERLEAVE: getelementptr inbounds %pair.i32, %pair.i32* %p, i64 %[[i7]], i32 1
Matthew Simpson	58f5628	2016-08-02 14:29:41 +0000	[diff] [blame]	269
Matthew Simpson	18d8898	2016-08-02 15:25:16 +0000	[diff] [blame]	270	define void @scalarize_induction_variable_04(i32* %a, %pair.i32* %p, i32 %n) {
Matthew Simpson	58f5628	2016-08-02 14:29:41 +0000	[diff] [blame]	271	entry:
				272	br label %for.body
				273
				274	for.body:
				275	%i = phi i64 [ %i.next, %for.body ], [ 0, %entry]
				276	%0 = shl nsw i64 %i, 2
				277	%1 = getelementptr inbounds i32, i32* %a, i64 %0
				278	%2 = load i32, i32* %1, align 1
Matthew Simpson	18d8898	2016-08-02 15:25:16 +0000	[diff] [blame]	279	%3 = getelementptr inbounds %pair.i32, %pair.i32* %p, i64 %i, i32 1
Matthew Simpson	58f5628	2016-08-02 14:29:41 +0000	[diff] [blame]	280	store i32 %2, i32* %3, align 1
				281	%i.next = add nuw nsw i64 %i, 1
				282	%4 = trunc i64 %i.next to i32
				283	%cond = icmp eq i32 %4, %n
				284	br i1 %cond, label %for.end, label %for.body
				285
				286	for.end:
				287	ret void
				288	}
				289
Matthew Simpson	7808833	2016-09-30 15:13:52 +0000	[diff] [blame]	290	; PR30542. Ensure we generate all the scalar steps for the induction variable.
				291	; The scalar induction variable is used by a getelementptr instruction
				292	; (uniform), and a udiv (non-uniform).
				293	;
				294	; int sum = 0;
				295	; for (int i = 0; i < n; ++i) {
				296	; int x = a[i];
				297	; if (c)
				298	; x /= i;
				299	; sum += x;
				300	; }
				301	;
				302	; CHECK-LABEL: @scalarize_induction_variable_05(
				303	; CHECK: vector.body:
				304	; CHECK: %index = phi i32 [ 0, %vector.ph ], [ %index.next, %pred.udiv.continue2 ]
				305	; CHECK: %[[I0:.+]] = add i32 %index, 0
Matthew Simpson	7808833	2016-09-30 15:13:52 +0000	[diff] [blame]	306	; CHECK: getelementptr inbounds i32, i32* %a, i32 %[[I0]]
				307	; CHECK: pred.udiv.if:
				308	; CHECK: udiv i32 {{.*}}, %[[I0]]
				309	; CHECK: pred.udiv.if1:
Matthew Simpson	c62266d	2016-10-25 18:59:45 +0000	[diff] [blame^]	310	; CHECK: %[[I1:.+]] = add i32 %index, 1
Matthew Simpson	7808833	2016-09-30 15:13:52 +0000	[diff] [blame]	311	; CHECK: udiv i32 {{.*}}, %[[I1]]
				312	;
				313	; UNROLL-NO_IC-LABEL: @scalarize_induction_variable_05(
				314	; UNROLL-NO-IC: vector.body:
				315	; UNROLL-NO-IC: %index = phi i32 [ 0, %vector.ph ], [ %index.next, %pred.udiv.continue11 ]
				316	; UNROLL-NO-IC: %[[I0:.+]] = add i32 %index, 0
Matthew Simpson	7808833	2016-09-30 15:13:52 +0000	[diff] [blame]	317	; UNROLL-NO-IC: %[[I2:.+]] = add i32 %index, 2
Matthew Simpson	7808833	2016-09-30 15:13:52 +0000	[diff] [blame]	318	; UNROLL-NO-IC: getelementptr inbounds i32, i32* %a, i32 %[[I0]]
				319	; UNROLL-NO-IC: getelementptr inbounds i32, i32* %a, i32 %[[I2]]
				320	; UNROLL-NO-IC: pred.udiv.if:
				321	; UNROLL-NO-IC: udiv i32 {{.*}}, %[[I0]]
				322	; UNROLL-NO-IC: pred.udiv.if6:
Matthew Simpson	c62266d	2016-10-25 18:59:45 +0000	[diff] [blame^]	323	; UNROLL-NO-IC: %[[I1:.+]] = add i32 %index, 1
Matthew Simpson	7808833	2016-09-30 15:13:52 +0000	[diff] [blame]	324	; UNROLL-NO-IC: udiv i32 {{.*}}, %[[I1]]
				325	; UNROLL-NO-IC: pred.udiv.if8:
				326	; UNROLL-NO-IC: udiv i32 {{.*}}, %[[I2]]
				327	; UNROLL-NO-IC: pred.udiv.if10:
Matthew Simpson	c62266d	2016-10-25 18:59:45 +0000	[diff] [blame^]	328	; UNROLL-NO-IC: %[[I3:.+]] = add i32 %index, 3
Matthew Simpson	7808833	2016-09-30 15:13:52 +0000	[diff] [blame]	329	; UNROLL-NO-IC: udiv i32 {{.*}}, %[[I3]]
				330	;
				331	; IND-LABEL: @scalarize_induction_variable_05(
				332	; IND: vector.body:
				333	; IND: %index = phi i32 [ 0, %vector.ph ], [ %index.next, %pred.udiv.continue2 ]
Matthew Simpson	7808833	2016-09-30 15:13:52 +0000	[diff] [blame]	334	; IND: %[[E0:.+]] = sext i32 %index to i64
				335	; IND: getelementptr inbounds i32, i32* %a, i64 %[[E0]]
				336	; IND: pred.udiv.if:
				337	; IND: udiv i32 {{.*}}, %index
				338	; IND: pred.udiv.if1:
Matthew Simpson	c62266d	2016-10-25 18:59:45 +0000	[diff] [blame^]	339	; IND: %[[I1:.+]] = or i32 %index, 1
Matthew Simpson	7808833	2016-09-30 15:13:52 +0000	[diff] [blame]	340	; IND: udiv i32 {{.*}}, %[[I1]]
				341	;
				342	; UNROLL-LABEL: @scalarize_induction_variable_05(
				343	; UNROLL: vector.body:
				344	; UNROLL: %index = phi i32 [ 0, %vector.ph ], [ %index.next, %pred.udiv.continue11 ]
Matthew Simpson	7808833	2016-09-30 15:13:52 +0000	[diff] [blame]	345	; UNROLL: %[[I2:.+]] = or i32 %index, 2
Matthew Simpson	7808833	2016-09-30 15:13:52 +0000	[diff] [blame]	346	; UNROLL: %[[E0:.+]] = sext i32 %index to i64
				347	; UNROLL: %[[G0:.+]] = getelementptr inbounds i32, i32* %a, i64 %[[E0]]
				348	; UNROLL: getelementptr i32, i32* %[[G0]], i64 2
				349	; UNROLL: pred.udiv.if:
				350	; UNROLL: udiv i32 {{.*}}, %index
				351	; UNROLL: pred.udiv.if6:
Matthew Simpson	c62266d	2016-10-25 18:59:45 +0000	[diff] [blame^]	352	; UNROLL: %[[I1:.+]] = or i32 %index, 1
Matthew Simpson	7808833	2016-09-30 15:13:52 +0000	[diff] [blame]	353	; UNROLL: udiv i32 {{.*}}, %[[I1]]
				354	; UNROLL: pred.udiv.if8:
				355	; UNROLL: udiv i32 {{.*}}, %[[I2]]
				356	; UNROLL: pred.udiv.if10:
Matthew Simpson	c62266d	2016-10-25 18:59:45 +0000	[diff] [blame^]	357	; UNROLL: %[[I3:.+]] = or i32 %index, 3
Matthew Simpson	7808833	2016-09-30 15:13:52 +0000	[diff] [blame]	358	; UNROLL: udiv i32 {{.*}}, %[[I3]]
				359
				360	define i32 @scalarize_induction_variable_05(i32* %a, i32 %x, i1 %c, i32 %n) {
				361	entry:
				362	br label %for.body
				363
				364	for.body:
				365	%i = phi i32 [ 0, %entry ], [ %i.next, %if.end ]
				366	%sum = phi i32 [ 0, %entry ], [ %tmp4, %if.end ]
				367	%tmp0 = getelementptr inbounds i32, i32* %a, i32 %i
				368	%tmp1 = load i32, i32* %tmp0, align 4
				369	br i1 %c, label %if.then, label %if.end
				370
				371	if.then:
				372	%tmp2 = udiv i32 %tmp1, %i
				373	br label %if.end
				374
				375	if.end:
				376	%tmp3 = phi i32 [ %tmp2, %if.then ], [ %tmp1, %for.body ]
				377	%tmp4 = add i32 %tmp3, %sum
				378	%i.next = add nuw nsw i32 %i, 1
				379	%cond = icmp slt i32 %i.next, %n
				380	br i1 %cond, label %for.body, label %for.end
				381
				382	for.end:
				383	%tmp5 = phi i32 [ %tmp4, %if.end ]
				384	ret i32 %tmp5
				385	}
				386
Matthew Simpson	18d8898	2016-08-02 15:25:16 +0000	[diff] [blame]	387	; Ensure we generate both a vector and a scalar induction variable. In this
				388	; test, the induction variable is used by an instruction that will be
				389	; vectorized (trunc) as well as an instruction that will remain in scalar form
				390	; (gepelementptr).
				391	;
				392	; CHECK-LABEL: @iv_vector_and_scalar_users(
				393	; CHECK: vector.body:
				394	; CHECK: %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
				395	; CHECK: %vec.ind = phi <2 x i64> [ <i64 0, i64 1>, %vector.ph ], [ %vec.ind.next, %vector.body ]
				396	; CHECK: %vec.ind1 = phi <2 x i32> [ <i32 0, i32 1>, %vector.ph ], [ %vec.ind.next2, %vector.body ]
				397	; CHECK: %[[i0:.+]] = add i64 %index, 0
				398	; CHECK: %[[i1:.+]] = add i64 %index, 1
				399	; CHECK: getelementptr inbounds %pair.i16, %pair.i16* %p, i64 %[[i0]], i32 1
				400	; CHECK: getelementptr inbounds %pair.i16, %pair.i16* %p, i64 %[[i1]], i32 1
				401	; CHECK: %index.next = add i64 %index, 2
				402	; CHECK: %vec.ind.next = add <2 x i64> %vec.ind, <i64 2, i64 2>
				403	; CHECK: %vec.ind.next2 = add <2 x i32> %vec.ind1, <i32 2, i32 2>
				404	;
				405	; IND-LABEL: @iv_vector_and_scalar_users(
				406	; IND: vector.body:
				407	; IND: %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
				408	; IND: %vec.ind1 = phi <2 x i32> [ <i32 0, i32 1>, %vector.ph ], [ %vec.ind.next2, %vector.body ]
				409	; IND: %[[i1:.+]] = or i64 %index, 1
				410	; IND: getelementptr inbounds %pair.i16, %pair.i16* %p, i64 %index, i32 1
				411	; IND: getelementptr inbounds %pair.i16, %pair.i16* %p, i64 %[[i1]], i32 1
				412	; IND: %index.next = add i64 %index, 2
				413	; IND: %vec.ind.next2 = add <2 x i32> %vec.ind1, <i32 2, i32 2>
				414	;
				415	; UNROLL-LABEL: @iv_vector_and_scalar_users(
				416	; UNROLL: vector.body:
				417	; UNROLL: %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
				418	; UNROLL: %vec.ind2 = phi <2 x i32> [ <i32 0, i32 1>, %vector.ph ], [ %vec.ind.next5, %vector.body ]
				419	; UNROLL: %[[i1:.+]] = or i64 %index, 1
				420	; UNROLL: %[[i2:.+]] = or i64 %index, 2
				421	; UNROLL: %[[i3:.+]] = or i64 %index, 3
				422	; UNROLL: %step.add3 = add <2 x i32> %vec.ind2, <i32 2, i32 2>
				423	; UNROLL: getelementptr inbounds %pair.i16, %pair.i16* %p, i64 %index, i32 1
				424	; UNROLL: getelementptr inbounds %pair.i16, %pair.i16* %p, i64 %[[i1]], i32 1
				425	; UNROLL: getelementptr inbounds %pair.i16, %pair.i16* %p, i64 %[[i2]], i32 1
				426	; UNROLL: getelementptr inbounds %pair.i16, %pair.i16* %p, i64 %[[i3]], i32 1
				427	; UNROLL: %index.next = add i64 %index, 4
				428	; UNROLL: %vec.ind.next5 = add <2 x i32> %vec.ind2, <i32 4, i32 4>
				429
				430	%pair.i16 = type { i16, i16 }
				431	define void @iv_vector_and_scalar_users(%pair.i16* %p, i32 %a, i32 %n) {
				432	entry:
				433	br label %for.body
				434
				435	for.body:
				436	%i = phi i64 [ %i.next, %for.body ], [ 0, %entry ]
				437	%0 = trunc i64 %i to i32
				438	%1 = add i32 %a, %0
				439	%2 = trunc i32 %1 to i16
				440	%3 = getelementptr inbounds %pair.i16, %pair.i16* %p, i64 %i, i32 1
				441	store i16 %2, i16* %3, align 2
				442	%i.next = add nuw nsw i64 %i, 1
				443	%4 = trunc i64 %i.next to i32
				444	%cond = icmp eq i32 %4, %n
				445	br i1 %cond, label %for.end, label %for.body
				446
				447	for.end:
				448	ret void
				449	}
				450
Arnold Schwaighofer	b72cb4e	2013-11-18 13:14:32 +0000	[diff] [blame]	451	; Make sure that the loop exit count computation does not overflow for i8 and
				452	; i16. The exit count of these loops is i8/i16 max + 1. If we don't cast the
				453	; induction variable to a bigger type the exit count computation will overflow
				454	; to 0.
				455	; PR17532
				456
				457	; CHECK-LABEL: i8_loop
Benjamin Kramer	c10563d	2014-01-11 21:06:00 +0000	[diff] [blame]	458	; CHECK: icmp eq i32 {{.*}}, 256
Arnold Schwaighofer	b72cb4e	2013-11-18 13:14:32 +0000	[diff] [blame]	459	define i32 @i8_loop() nounwind readnone ssp uwtable {
				460	br label %1
				461
				462	; <label>:1 ; preds = %1, %0
				463	%a.0 = phi i32 [ 1, %0 ], [ %2, %1 ]
				464	%b.0 = phi i8 [ 0, %0 ], [ %3, %1 ]
				465	%2 = and i32 %a.0, 4
				466	%3 = add i8 %b.0, -1
				467	%4 = icmp eq i8 %3, 0
				468	br i1 %4, label %5, label %1
				469
				470	; <label>:5 ; preds = %1
				471	ret i32 %2
				472	}
				473
				474	; CHECK-LABEL: i16_loop
Benjamin Kramer	c10563d	2014-01-11 21:06:00 +0000	[diff] [blame]	475	; CHECK: icmp eq i32 {{.*}}, 65536
Arnold Schwaighofer	b72cb4e	2013-11-18 13:14:32 +0000	[diff] [blame]	476
				477	define i32 @i16_loop() nounwind readnone ssp uwtable {
				478	br label %1
				479
				480	; <label>:1 ; preds = %1, %0
				481	%a.0 = phi i32 [ 1, %0 ], [ %2, %1 ]
				482	%b.0 = phi i16 [ 0, %0 ], [ %3, %1 ]
				483	%2 = and i32 %a.0, 4
				484	%3 = add i16 %b.0, -1
				485	%4 = icmp eq i16 %3, 0
				486	br i1 %4, label %5, label %1
				487
				488	; <label>:5 ; preds = %1
				489	ret i32 %2
				490	}
Arnold Schwaighofer	e206768	2014-05-29 22:10:01 +0000	[diff] [blame]	491
				492	; This loop has a backedge taken count of i32_max. We need to check for this
				493	; condition and branch directly to the scalar loop.
				494
				495	; CHECK-LABEL: max_i32_backedgetaken
James Molloy	cba9230	2015-09-02 10:15:22 +0000	[diff] [blame]	496	; CHECK: br i1 true, label %scalar.ph, label %min.iters.checked
Arnold Schwaighofer	e206768	2014-05-29 22:10:01 +0000	[diff] [blame]	497
Matthew Simpson	433cb1d	2016-07-06 14:26:59 +0000	[diff] [blame]	498	; CHECK: middle.block:
				499	; CHECK: %[[v9:.+]] = extractelement <2 x i32> %bin.rdx, i32 0
Arnold Schwaighofer	e206768	2014-05-29 22:10:01 +0000	[diff] [blame]	500	; CHECK: scalar.ph:
Matthew Simpson	433cb1d	2016-07-06 14:26:59 +0000	[diff] [blame]	501	; CHECK: %bc.resume.val = phi i32 [ 0, %middle.block ], [ 0, %[[v0:.+]] ]
				502	; CHECK: %bc.merge.rdx = phi i32 [ 1, %[[v0:.+]] ], [ 1, %min.iters.checked ], [ %[[v9]], %middle.block ]
Arnold Schwaighofer	e206768	2014-05-29 22:10:01 +0000	[diff] [blame]	503
				504	define i32 @max_i32_backedgetaken() nounwind readnone ssp uwtable {
				505
				506	br label %1
				507
				508	; <label>:1 ; preds = %1, %0
				509	%a.0 = phi i32 [ 1, %0 ], [ %2, %1 ]
				510	%b.0 = phi i32 [ 0, %0 ], [ %3, %1 ]
				511	%2 = and i32 %a.0, 4
				512	%3 = add i32 %b.0, -1
				513	%4 = icmp eq i32 %3, 0
				514	br i1 %4, label %5, label %1
				515
				516	; <label>:5 ; preds = %1
				517	ret i32 %2
				518	}
Arnold Schwaighofer	c11107c	2014-06-22 03:38:59 +0000	[diff] [blame]	519
				520	; When generating the overflow check we must sure that the induction start value
				521	; is defined before the branch to the scalar preheader.
				522
				523	; CHECK-LABEL: testoverflowcheck
				524	; CHECK: entry
				525	; CHECK: %[[LOAD:.*]] = load i8
Arnold Schwaighofer	c11107c	2014-06-22 03:38:59 +0000	[diff] [blame]	526	; CHECK: br
				527
				528	; CHECK: scalar.ph
James Molloy	c07701b	2015-09-02 10:14:54 +0000	[diff] [blame]	529	; CHECK: phi i8 [ %{{.*}}, %middle.block ], [ %[[LOAD]], %entry ]
Arnold Schwaighofer	c11107c	2014-06-22 03:38:59 +0000	[diff] [blame]	530
				531	@e = global i8 1, align 1
				532	@d = common global i32 0, align 4
				533	@c = common global i32 0, align 4
				534	define i32 @testoverflowcheck() {
				535	entry:
David Blaikie	a79ac14	2015-02-27 21:17:42 +0000	[diff] [blame]	536	%.pr.i = load i8, i8* @e, align 1
				537	%0 = load i32, i32* @d, align 4
				538	%c.promoted.i = load i32, i32* @c, align 4
Arnold Schwaighofer	c11107c	2014-06-22 03:38:59 +0000	[diff] [blame]	539	br label %cond.end.i
				540
				541	cond.end.i:
				542	%inc4.i = phi i8 [ %.pr.i, %entry ], [ %inc.i, %cond.end.i ]
				543	%and3.i = phi i32 [ %c.promoted.i, %entry ], [ %and.i, %cond.end.i ]
				544	%and.i = and i32 %0, %and3.i
				545	%inc.i = add i8 %inc4.i, 1
				546	%tobool.i = icmp eq i8 %inc.i, 0
				547	br i1 %tobool.i, label %loopexit, label %cond.end.i
				548
				549	loopexit:
				550	ret i32 %and.i
				551	}
Silviu Baranga	c05bab8	2016-05-05 15:20:39 +0000	[diff] [blame]	552
				553	; The SCEV expression of %sphi is (zext i8 {%t,+,1}<%loop> to i32)
				554	; In order to recognize %sphi as an induction PHI and vectorize this loop,
				555	; we need to convert the SCEV expression into an AddRecExpr.
				556	; The expression gets converted to {zext i8 %t to i32,+,1}.
				557
				558	; CHECK-LABEL: wrappingindvars1
				559	; CHECK-LABEL: vector.scevcheck
Michael Kuperstein	c5edcde	2016-06-09 18:03:15 +0000	[diff] [blame]	560	; CHECK-LABEL: vector.ph
				561	; CHECK: %[[START:.]] = add <2 x i32> %{{.}}, <i32 0, i32 1>
Silviu Baranga	c05bab8	2016-05-05 15:20:39 +0000	[diff] [blame]	562	; CHECK-LABEL: vector.body
Michael Kuperstein	c5edcde	2016-06-09 18:03:15 +0000	[diff] [blame]	563	; CHECK: %[[PHI:.]] = phi <2 x i32> [ %[[START]], %vector.ph ], [ %[[STEP:.]], %vector.body ]
				564	; CHECK: %[[STEP]] = add <2 x i32> %[[PHI]], <i32 2, i32 2>
Silviu Baranga	c05bab8	2016-05-05 15:20:39 +0000	[diff] [blame]	565	define void @wrappingindvars1(i8 %t, i32 %len, i32 *%A) {
				566	entry:
				567	%st = zext i8 %t to i16
				568	%ext = zext i8 %t to i32
				569	%ecmp = icmp ult i16 %st, 42
				570	br i1 %ecmp, label %loop, label %exit
				571
				572	loop:
				573
				574	%idx = phi i8 [ %t, %entry ], [ %idx.inc, %loop ]
				575	%idx.b = phi i32 [ 0, %entry ], [ %idx.b.inc, %loop ]
				576	%sphi = phi i32 [ %ext, %entry ], [%idx.inc.ext, %loop]
				577
				578	%ptr = getelementptr inbounds i32, i32* %A, i8 %idx
				579	store i32 %sphi, i32* %ptr
				580
				581	%idx.inc = add i8 %idx, 1
				582	%idx.inc.ext = zext i8 %idx.inc to i32
				583	%idx.b.inc = add nuw nsw i32 %idx.b, 1
				584
				585	%c = icmp ult i32 %idx.b, %len
				586	br i1 %c, label %loop, label %exit
				587
				588	exit:
				589	ret void
				590	}
				591
				592	; The SCEV expression of %sphi is (4 * (zext i8 {%t,+,1}<%loop> to i32))
				593	; In order to recognize %sphi as an induction PHI and vectorize this loop,
				594	; we need to convert the SCEV expression into an AddRecExpr.
				595	; The expression gets converted to ({4 * (zext %t to i32),+,4}).
				596	; CHECK-LABEL: wrappingindvars2
				597	; CHECK-LABEL: vector.scevcheck
Michael Kuperstein	c5edcde	2016-06-09 18:03:15 +0000	[diff] [blame]	598	; CHECK-LABEL: vector.ph
				599	; CHECK: %[[START:.]] = add <2 x i32> %{{.}}, <i32 0, i32 4>
Silviu Baranga	c05bab8	2016-05-05 15:20:39 +0000	[diff] [blame]	600	; CHECK-LABEL: vector.body
Michael Kuperstein	c5edcde	2016-06-09 18:03:15 +0000	[diff] [blame]	601	; CHECK: %[[PHI:.]] = phi <2 x i32> [ %[[START]], %vector.ph ], [ %[[STEP:.]], %vector.body ]
				602	; CHECK: %[[STEP]] = add <2 x i32> %[[PHI]], <i32 8, i32 8>
Silviu Baranga	c05bab8	2016-05-05 15:20:39 +0000	[diff] [blame]	603	define void @wrappingindvars2(i8 %t, i32 %len, i32 *%A) {
				604
				605	entry:
				606	%st = zext i8 %t to i16
				607	%ext = zext i8 %t to i32
				608	%ext.mul = mul i32 %ext, 4
				609
				610	%ecmp = icmp ult i16 %st, 42
				611	br i1 %ecmp, label %loop, label %exit
				612
				613	loop:
				614
				615	%idx = phi i8 [ %t, %entry ], [ %idx.inc, %loop ]
				616	%sphi = phi i32 [ %ext.mul, %entry ], [%mul, %loop]
				617	%idx.b = phi i32 [ 0, %entry ], [ %idx.b.inc, %loop ]
				618
				619	%ptr = getelementptr inbounds i32, i32* %A, i8 %idx
				620	store i32 %sphi, i32* %ptr
				621
				622	%idx.inc = add i8 %idx, 1
				623	%idx.inc.ext = zext i8 %idx.inc to i32
				624	%mul = mul i32 %idx.inc.ext, 4
				625	%idx.b.inc = add nuw nsw i32 %idx.b, 1
				626
				627	%c = icmp ult i32 %idx.b, %len
				628	br i1 %c, label %loop, label %exit
				629
				630	exit:
				631	ret void
				632	}
Michael Kuperstein	3a3c64d	2016-06-01 17:16:46 +0000	[diff] [blame]	633
				634	; Check that we generate vectorized IVs in the pre-header
				635	; instead of widening the scalar IV inside the loop, when
				636	; we know how to do that.
				637	; IND-LABEL: veciv
				638	; IND: vector.body:
				639	; IND: %index = phi i32 [ 0, %vector.ph ], [ %index.next, %vector.body ]
Matthew Simpson	102729c	2016-07-21 21:20:15 +0000	[diff] [blame]	640	; IND: %vec.ind = phi <2 x i32> [ <i32 0, i32 1>, %vector.ph ], [ %vec.ind.next, %vector.body ]
Michael Kuperstein	3a3c64d	2016-06-01 17:16:46 +0000	[diff] [blame]	641	; IND: %index.next = add i32 %index, 2
Matthew Simpson	102729c	2016-07-21 21:20:15 +0000	[diff] [blame]	642	; IND: %vec.ind.next = add <2 x i32> %vec.ind, <i32 2, i32 2>
Michael Kuperstein	3a3c64d	2016-06-01 17:16:46 +0000	[diff] [blame]	643	; IND: %[[CMP:.*]] = icmp eq i32 %index.next
				644	; IND: br i1 %[[CMP]]
				645	; UNROLL-LABEL: veciv
				646	; UNROLL: vector.body:
				647	; UNROLL: %index = phi i32 [ 0, %vector.ph ], [ %index.next, %vector.body ]
Matthew Simpson	102729c	2016-07-21 21:20:15 +0000	[diff] [blame]	648	; UNROLL: %vec.ind = phi <2 x i32> [ <i32 0, i32 1>, %vector.ph ], [ %vec.ind.next, %vector.body ]
Michael Kuperstein	3a3c64d	2016-06-01 17:16:46 +0000	[diff] [blame]	649	; UNROLL: %step.add = add <2 x i32> %vec.ind, <i32 2, i32 2>
Michael Kuperstein	3a3c64d	2016-06-01 17:16:46 +0000	[diff] [blame]	650	; UNROLL: %index.next = add i32 %index, 4
Matthew Simpson	102729c	2016-07-21 21:20:15 +0000	[diff] [blame]	651	; UNROLL: %vec.ind.next = add <2 x i32> %vec.ind, <i32 4, i32 4>
Michael Kuperstein	3a3c64d	2016-06-01 17:16:46 +0000	[diff] [blame]	652	; UNROLL: %[[CMP:.*]] = icmp eq i32 %index.next
				653	; UNROLL: br i1 %[[CMP]]
				654	define void @veciv(i32* nocapture %a, i32 %start, i32 %k) {
				655	for.body.preheader:
				656	br label %for.body
				657
				658	for.body:
				659	%indvars.iv = phi i32 [ %indvars.iv.next, %for.body ], [ 0, %for.body.preheader ]
				660	%arrayidx = getelementptr inbounds i32, i32* %a, i32 %indvars.iv
				661	store i32 %indvars.iv, i32* %arrayidx, align 4
				662	%indvars.iv.next = add nuw nsw i32 %indvars.iv, 1
				663	%exitcond = icmp eq i32 %indvars.iv.next, %k
				664	br i1 %exitcond, label %exit, label %for.body
				665
				666	exit:
				667	ret void
				668	}
				669
				670	; IND-LABEL: trunciv
				671	; IND: vector.body:
				672	; IND: %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
				673	; IND: %[[VECIND:.]] = phi <2 x i32> [ <i32 0, i32 1>, %vector.ph ], [ %[[STEPADD:.]], %vector.body ]
Michael Kuperstein	3a3c64d	2016-06-01 17:16:46 +0000	[diff] [blame]	674	; IND: %index.next = add i64 %index, 2
Matthew Simpson	102729c	2016-07-21 21:20:15 +0000	[diff] [blame]	675	; IND: %[[STEPADD]] = add <2 x i32> %[[VECIND]], <i32 2, i32 2>
Michael Kuperstein	3a3c64d	2016-06-01 17:16:46 +0000	[diff] [blame]	676	; IND: %[[CMP:.*]] = icmp eq i64 %index.next
				677	; IND: br i1 %[[CMP]]
				678	define void @trunciv(i32* nocapture %a, i32 %start, i64 %k) {
				679	for.body.preheader:
				680	br label %for.body
				681
				682	for.body:
				683	%indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %for.body.preheader ]
				684	%trunc.iv = trunc i64 %indvars.iv to i32
				685	%arrayidx = getelementptr inbounds i32, i32* %a, i32 %trunc.iv
				686	store i32 %trunc.iv, i32* %arrayidx, align 4
				687	%indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
				688	%exitcond = icmp eq i64 %indvars.iv.next, %k
				689	br i1 %exitcond, label %exit, label %for.body
				690
				691	exit:
				692	ret void
				693	}
Michael Kuperstein	c5edcde	2016-06-09 18:03:15 +0000	[diff] [blame]	694
Matthew Simpson	18d8898	2016-08-02 15:25:16 +0000	[diff] [blame]	695	; CHECK-LABEL: @nonprimary(
				696	; CHECK: vector.ph:
				697	; CHECK: %[[INSERT:.*]] = insertelement <2 x i32> undef, i32 %i, i32 0
				698	; CHECK: %[[SPLAT:.*]] = shufflevector <2 x i32> %[[INSERT]], <2 x i32> undef, <2 x i32> zeroinitializer
				699	; CHECK: %[[START:.*]] = add <2 x i32> %[[SPLAT]], <i32 0, i32 1>
				700	; CHECK: vector.body:
				701	; CHECK: %index = phi i32 [ 0, %vector.ph ], [ %index.next, %vector.body ]
				702	; CHECK: %vec.ind = phi <2 x i32> [ %[[START]], %vector.ph ], [ %vec.ind.next, %vector.body ]
				703	; CHECK: %offset.idx = add i32 %i, %index
				704	; CHECK: %[[A1:.*]] = add i32 %offset.idx, 0
Matthew Simpson	18d8898	2016-08-02 15:25:16 +0000	[diff] [blame]	705	; CHECK: %[[G1:.]] = getelementptr inbounds i32, i32 %a, i32 %[[A1]]
Matthew Simpson	18d8898	2016-08-02 15:25:16 +0000	[diff] [blame]	706	; CHECK: %[[G3:.]] = getelementptr i32, i32 %[[G1]], i32 0
				707	; CHECK: %[[B1:.]] = bitcast i32 %[[G3]] to <2 x i32>*
				708	; CHECK: store <2 x i32> %vec.ind, <2 x i32>* %[[B1]]
				709	; CHECK: %index.next = add i32 %index, 2
				710	; CHECK: %vec.ind.next = add <2 x i32> %vec.ind, <i32 2, i32 2>
				711	; CHECK: %[[CMP:.*]] = icmp eq i32 %index.next, %n.vec
				712	; CHECK: br i1 %[[CMP]]
				713	;
				714	; IND-LABEL: @nonprimary(
				715	; IND: vector.ph:
				716	; IND: %[[INSERT:.*]] = insertelement <2 x i32> undef, i32 %i, i32 0
				717	; IND: %[[SPLAT:.*]] = shufflevector <2 x i32> %[[INSERT]], <2 x i32> undef, <2 x i32> zeroinitializer
				718	; IND: %[[START:.*]] = add <2 x i32> %[[SPLAT]], <i32 0, i32 1>
				719	; IND: vector.body:
				720	; IND: %index = phi i32 [ 0, %vector.ph ], [ %index.next, %vector.body ]
				721	; IND: %vec.ind = phi <2 x i32> [ %[[START]], %vector.ph ], [ %vec.ind.next, %vector.body ]
				722	; IND: %[[A1:.*]] = add i32 %index, %i
				723	; IND: %[[S1:.*]] = sext i32 %[[A1]] to i64
				724	; IND: %[[G1:.]] = getelementptr inbounds i32, i32 %a, i64 %[[S1]]
				725	; IND: %[[B1:.]] = bitcast i32 %[[G1]] to <2 x i32>*
				726	; IND: store <2 x i32> %vec.ind, <2 x i32>* %[[B1]]
				727	; IND: %index.next = add i32 %index, 2
				728	; IND: %vec.ind.next = add <2 x i32> %vec.ind, <i32 2, i32 2>
				729	; IND: %[[CMP:.*]] = icmp eq i32 %index.next, %n.vec
				730	; IND: br i1 %[[CMP]]
				731	;
				732	; UNROLL-LABEL: @nonprimary(
				733	; UNROLL: vector.ph:
				734	; UNROLL: %[[INSERT:.*]] = insertelement <2 x i32> undef, i32 %i, i32 0
				735	; UNROLL: %[[SPLAT:.*]] = shufflevector <2 x i32> %[[INSERT]], <2 x i32> undef, <2 x i32> zeroinitializer
				736	; UNROLL: %[[START:.*]] = add <2 x i32> %[[SPLAT]], <i32 0, i32 1>
				737	; UNROLL: vector.body:
				738	; UNROLL: %index = phi i32 [ 0, %vector.ph ], [ %index.next, %vector.body ]
				739	; UNROLL: %vec.ind = phi <2 x i32> [ %[[START]], %vector.ph ], [ %vec.ind.next, %vector.body ]
				740	; UNROLL: %step.add = add <2 x i32> %vec.ind, <i32 2, i32 2>
				741	; UNROLL: %[[A1:.*]] = add i32 %index, %i
				742	; UNROLL: %[[S1:.*]] = sext i32 %[[A1]] to i64
				743	; UNROLL: %[[G1:.]] = getelementptr inbounds i32, i32 %a, i64 %[[S1]]
				744	; UNROLL: %[[B1:.]] = bitcast i32 %[[G1]] to <2 x i32>*
				745	; UNROLL: store <2 x i32> %vec.ind, <2 x i32>* %[[B1]]
				746	; UNROLL: %[[G2:.]] = getelementptr i32, i32 %[[G1]], i64 2
				747	; UNROLL: %[[B2:.]] = bitcast i32 %[[G2]] to <2 x i32>*
				748	; UNROLL: store <2 x i32> %step.add, <2 x i32>* %[[B2]]
				749	; UNROLL: %index.next = add i32 %index, 4
				750	; UNROLL: %vec.ind.next = add <2 x i32> %vec.ind, <i32 4, i32 4>
				751	; UNROLL: %[[CMP:.*]] = icmp eq i32 %index.next, %n.vec
				752	; UNROLL: br i1 %[[CMP]]
Michael Kuperstein	c5edcde	2016-06-09 18:03:15 +0000	[diff] [blame]	753	define void @nonprimary(i32* nocapture %a, i32 %start, i32 %i, i32 %k) {
				754	for.body.preheader:
				755	br label %for.body
				756
				757	for.body:
				758	%indvars.iv = phi i32 [ %indvars.iv.next, %for.body ], [ %i, %for.body.preheader ]
				759	%arrayidx = getelementptr inbounds i32, i32* %a, i32 %indvars.iv
				760	store i32 %indvars.iv, i32* %arrayidx, align 4
Matthew Simpson	18d8898	2016-08-02 15:25:16 +0000	[diff] [blame]	761	%indvars.iv.next = add nuw nsw i32 %indvars.iv, 1
Michael Kuperstein	c5edcde	2016-06-09 18:03:15 +0000	[diff] [blame]	762	%exitcond = icmp eq i32 %indvars.iv.next, %k
				763	br i1 %exitcond, label %exit, label %for.body
				764
				765	exit:
				766	ret void
				767	}