Blame - llvm/test/Transforms/LoopVectorize/induction.ll - toolchain/llvm-project

blob: 72e18bf1816fe764f383493926146580cf326a7d [file] [log] [blame]

Sanjay Patel	b653de1	2014-09-10 17:58:16 +0000	[diff] [blame]	1	; RUN: opt < %s -loop-vectorize -force-vector-interleave=1 -force-vector-width=2 -S \| FileCheck %s
Michael Kuperstein	3a3c64d	2016-06-01 17:16:46 +0000	[diff] [blame]	2	; RUN: opt < %s -loop-vectorize -force-vector-interleave=1 -force-vector-width=2 -instcombine -S \| FileCheck %s --check-prefix=IND
				3	; RUN: opt < %s -loop-vectorize -force-vector-interleave=2 -force-vector-width=2 -instcombine -S \| FileCheck %s --check-prefix=UNROLL
Matthew Simpson	3c3b4a2	2016-07-14 14:36:06 +0000	[diff] [blame]	4	; RUN: opt < %s -loop-vectorize -force-vector-interleave=2 -force-vector-width=2 -S \| FileCheck %s --check-prefix=UNROLL-NO-IC
Matthew Simpson	433cb1d	2016-07-06 14:26:59 +0000	[diff] [blame]	5	; RUN: opt < %s -loop-vectorize -force-vector-interleave=2 -force-vector-width=4 -enable-interleaved-mem-accesses -instcombine -S \| FileCheck %s --check-prefix=INTERLEAVE
Arnold Schwaighofer	2e7a922	2013-05-14 00:21:18 +0000	[diff] [blame]	6
				7	target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
				8
				9	; Make sure that we can handle multiple integer induction variables.
Matt Arsenault	e64c7c7	2013-10-02 20:29:00 +0000	[diff] [blame]	10	; CHECK-LABEL: @multi_int_induction(
Arnold Schwaighofer	2e7a922	2013-05-14 00:21:18 +0000	[diff] [blame]	11	; CHECK: vector.body:
				12	; CHECK: %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
James Molloy	c07701b	2015-09-02 10:14:54 +0000	[diff] [blame]	13	; CHECK: %[[VAR:.*]] = trunc i64 %index to i32
Arnold Schwaighofer	2e7a922	2013-05-14 00:21:18 +0000	[diff] [blame]	14	; CHECK: %offset.idx = add i32 190, %[[VAR]]
				15	define void @multi_int_induction(i32* %A, i32 %N) {
				16	for.body.lr.ph:
				17	br label %for.body
				18
				19	for.body:
				20	%indvars.iv = phi i64 [ 0, %for.body.lr.ph ], [ %indvars.iv.next, %for.body ]
				21	%count.09 = phi i32 [ 190, %for.body.lr.ph ], [ %inc, %for.body ]
David Blaikie	79e6c74	2015-02-27 19:29:02 +0000	[diff] [blame]	22	%arrayidx2 = getelementptr inbounds i32, i32* %A, i64 %indvars.iv
Arnold Schwaighofer	2e7a922	2013-05-14 00:21:18 +0000	[diff] [blame]	23	store i32 %count.09, i32* %arrayidx2, align 4
				24	%inc = add nsw i32 %count.09, 1
				25	%indvars.iv.next = add i64 %indvars.iv, 1
				26	%lftr.wideiv = trunc i64 %indvars.iv.next to i32
				27	%exitcond = icmp ne i32 %lftr.wideiv, %N
				28	br i1 %exitcond, label %for.body, label %for.end
				29
				30	for.end:
				31	ret void
				32	}
				33
Arnold Schwaighofer	a846a7f	2013-11-01 22:18:19 +0000	[diff] [blame]	34	; Make sure we remove unneeded vectorization of induction variables.
				35	; In order for instcombine to cleanup the vectorized induction variables that we
				36	; create in the loop vectorizer we need to perform some form of redundancy
				37	; elimination to get rid of multiple uses.
				38
				39	; IND-LABEL: scalar_use
				40
				41	; IND: br label %vector.body
				42	; IND: vector.body:
				43	; Vectorized induction variable.
				44	; IND-NOT: insertelement <2 x i64>
				45	; IND-NOT: shufflevector <2 x i64>
				46	; IND: br {{.*}}, label %vector.body
				47
				48	define void @scalar_use(float* %a, float %b, i64 %offset, i64 %offset2, i64 %n) {
				49	entry:
				50	br label %for.body
				51
				52	for.body:
				53	%iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
				54	%ind.sum = add i64 %iv, %offset
David Blaikie	79e6c74	2015-02-27 19:29:02 +0000	[diff] [blame]	55	%arr.idx = getelementptr inbounds float, float* %a, i64 %ind.sum
David Blaikie	a79ac14	2015-02-27 21:17:42 +0000	[diff] [blame]	56	%l1 = load float, float* %arr.idx, align 4
Arnold Schwaighofer	a846a7f	2013-11-01 22:18:19 +0000	[diff] [blame]	57	%ind.sum2 = add i64 %iv, %offset2
David Blaikie	79e6c74	2015-02-27 19:29:02 +0000	[diff] [blame]	58	%arr.idx2 = getelementptr inbounds float, float* %a, i64 %ind.sum2
David Blaikie	a79ac14	2015-02-27 21:17:42 +0000	[diff] [blame]	59	%l2 = load float, float* %arr.idx2, align 4
Arnold Schwaighofer	a846a7f	2013-11-01 22:18:19 +0000	[diff] [blame]	60	%m = fmul fast float %b, %l2
				61	%ad = fadd fast float %l1, %m
				62	store float %ad, float* %arr.idx, align 4
				63	%iv.next = add nuw nsw i64 %iv, 1
				64	%exitcond = icmp eq i64 %iv.next, %n
				65	br i1 %exitcond, label %loopexit, label %for.body
				66
				67	loopexit:
				68	ret void
				69	}
Arnold Schwaighofer	b72cb4e	2013-11-18 13:14:32 +0000	[diff] [blame]	70
Matthew Simpson	433cb1d	2016-07-06 14:26:59 +0000	[diff] [blame]	71	; Make sure we don't create a vector induction phi node that is unused.
				72	; Scalarize the step vectors instead.
				73	;
				74	; for (int i = 0; i < n; ++i)
				75	; sum += a[i];
				76	;
Matthew Simpson	3c3b4a2	2016-07-14 14:36:06 +0000	[diff] [blame]	77	; CHECK-LABEL: @scalarize_induction_variable_01(
				78	; CHECK: vector.body:
				79	; CHECK: %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
				80	; CHECK: %[[i0:.+]] = add i64 %index, 0
				81	; CHECK: %[[i1:.+]] = add i64 %index, 1
				82	; CHECK: getelementptr inbounds i64, i64* %a, i64 %[[i0]]
				83	; CHECK: getelementptr inbounds i64, i64* %a, i64 %[[i1]]
				84	;
				85	; UNROLL-NO-IC-LABEL: @scalarize_induction_variable_01(
				86	; UNROLL-NO-IC: vector.body:
				87	; UNROLL-NO-IC: %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
				88	; UNROLL-NO-IC: %[[i0:.+]] = add i64 %index, 0
				89	; UNROLL-NO-IC: %[[i1:.+]] = add i64 %index, 1
				90	; UNROLL-NO-IC: %[[i2:.+]] = add i64 %index, 2
				91	; UNROLL-NO-IC: %[[i3:.+]] = add i64 %index, 3
				92	; UNROLL-NO-IC: getelementptr inbounds i64, i64* %a, i64 %[[i0]]
				93	; UNROLL-NO-IC: getelementptr inbounds i64, i64* %a, i64 %[[i1]]
				94	; UNROLL-NO-IC: getelementptr inbounds i64, i64* %a, i64 %[[i2]]
				95	; UNROLL-NO-IC: getelementptr inbounds i64, i64* %a, i64 %[[i3]]
				96	;
Matthew Simpson	433cb1d	2016-07-06 14:26:59 +0000	[diff] [blame]	97	; IND-LABEL: @scalarize_induction_variable_01(
				98	; IND: vector.body:
				99	; IND: %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
				100	; IND-NOT: add i64 {{.*}}, 2
				101	; IND: getelementptr inbounds i64, i64* %a, i64 %index
				102	;
				103	; UNROLL-LABEL: @scalarize_induction_variable_01(
				104	; UNROLL: vector.body:
				105	; UNROLL: %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
				106	; UNROLL-NOT: add i64 {{.*}}, 4
				107	; UNROLL: %[[g1:.+]] = getelementptr inbounds i64, i64* %a, i64 %index
				108	; UNROLL: getelementptr i64, i64* %[[g1]], i64 2
				109
				110	define i64 @scalarize_induction_variable_01(i64 *%a, i64 %n) {
				111	entry:
				112	br label %for.body
				113
				114	for.body:
				115	%i = phi i64 [ %i.next, %for.body ], [ 0, %entry ]
				116	%sum = phi i64 [ %2, %for.body ], [ 0, %entry ]
				117	%0 = getelementptr inbounds i64, i64* %a, i64 %i
				118	%1 = load i64, i64* %0, align 8
				119	%2 = add i64 %1, %sum
				120	%i.next = add nuw nsw i64 %i, 1
				121	%cond = icmp slt i64 %i.next, %n
				122	br i1 %cond, label %for.body, label %for.end
				123
				124	for.end:
				125	%3 = phi i64 [ %2, %for.body ]
				126	ret i64 %3
				127	}
				128
				129	; Make sure we scalarize the step vectors used for the pointer arithmetic. We
				130	; can't easily simplify vectorized step vectors.
				131	;
				132	; float s = 0;
				133	; for (int i ; 0; i < n; i += 8)
				134	; s += (a[i] + b[i] + 1.0f);
				135	;
Matthew Simpson	3c3b4a2	2016-07-14 14:36:06 +0000	[diff] [blame]	136	; CHECK-LABEL: @scalarize_induction_variable_02(
				137	; CHECK: vector.body:
				138	; CHECK: %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
				139	; CHECK: %offset.idx = shl i64 %index, 3
				140	; CHECK: %[[i0:.+]] = add i64 %offset.idx, 0
				141	; CHECK: %[[i1:.+]] = add i64 %offset.idx, 8
				142	; CHECK: getelementptr inbounds float, float* %a, i64 %[[i0]]
				143	; CHECK: getelementptr inbounds float, float* %a, i64 %[[i1]]
				144	; CHECK: getelementptr inbounds float, float* %b, i64 %[[i0]]
				145	; CHECK: getelementptr inbounds float, float* %b, i64 %[[i1]]
				146	;
				147	; UNROLL-NO-IC-LABEL: @scalarize_induction_variable_02(
				148	; UNROLL-NO-IC: vector.body:
				149	; UNROLL-NO-IC: %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
				150	; UNROLL-NO-IC: %offset.idx = shl i64 %index, 3
				151	; UNROLL-NO-IC: %[[i0:.+]] = add i64 %offset.idx, 0
				152	; UNROLL-NO-IC: %[[i1:.+]] = add i64 %offset.idx, 8
				153	; UNROLL-NO-IC: %[[i2:.+]] = add i64 %offset.idx, 16
				154	; UNROLL-NO-IC: %[[i3:.+]] = add i64 %offset.idx, 24
				155	; UNROLL-NO-IC: getelementptr inbounds float, float* %a, i64 %[[i0]]
				156	; UNROLL-NO-IC: getelementptr inbounds float, float* %a, i64 %[[i1]]
				157	; UNROLL-NO-IC: getelementptr inbounds float, float* %a, i64 %[[i2]]
				158	; UNROLL-NO-IC: getelementptr inbounds float, float* %a, i64 %[[i3]]
				159	; UNROLL-NO-IC: getelementptr inbounds float, float* %b, i64 %[[i0]]
				160	; UNROLL-NO-IC: getelementptr inbounds float, float* %b, i64 %[[i1]]
				161	; UNROLL-NO-IC: getelementptr inbounds float, float* %b, i64 %[[i2]]
				162	; UNROLL-NO-IC: getelementptr inbounds float, float* %b, i64 %[[i3]]
				163	;
Matthew Simpson	433cb1d	2016-07-06 14:26:59 +0000	[diff] [blame]	164	; IND-LABEL: @scalarize_induction_variable_02(
				165	; IND: vector.body:
				166	; IND: %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
				167	; IND: %[[i0:.+]] = shl i64 %index, 3
				168	; IND: %[[i1:.+]] = or i64 %[[i0]], 8
				169	; IND: getelementptr inbounds float, float* %a, i64 %[[i0]]
				170	; IND: getelementptr inbounds float, float* %a, i64 %[[i1]]
				171	;
				172	; UNROLL-LABEL: @scalarize_induction_variable_02(
				173	; UNROLL: vector.body:
				174	; UNROLL: %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
				175	; UNROLL: %[[i0:.+]] = shl i64 %index, 3
				176	; UNROLL: %[[i1:.+]] = or i64 %[[i0]], 8
				177	; UNROLL: %[[i2:.+]] = or i64 %[[i0]], 16
				178	; UNROLL: %[[i3:.+]] = or i64 %[[i0]], 24
				179	; UNROLL: getelementptr inbounds float, float* %a, i64 %[[i0]]
				180	; UNROLL: getelementptr inbounds float, float* %a, i64 %[[i1]]
				181	; UNROLL: getelementptr inbounds float, float* %a, i64 %[[i2]]
				182	; UNROLL: getelementptr inbounds float, float* %a, i64 %[[i3]]
				183
				184	define float @scalarize_induction_variable_02(float* %a, float* %b, i64 %n) {
				185	entry:
				186	br label %for.body
				187
				188	for.body:
				189	%i = phi i64 [ 0, %entry ], [ %i.next, %for.body ]
				190	%s = phi float [ 0.0, %entry ], [ %6, %for.body ]
				191	%0 = getelementptr inbounds float, float* %a, i64 %i
				192	%1 = load float, float* %0, align 4
				193	%2 = getelementptr inbounds float, float* %b, i64 %i
				194	%3 = load float, float* %2, align 4
				195	%4 = fadd fast float %s, 1.0
				196	%5 = fadd fast float %4, %1
				197	%6 = fadd fast float %5, %3
				198	%i.next = add nuw nsw i64 %i, 8
				199	%cond = icmp slt i64 %i.next, %n
				200	br i1 %cond, label %for.body, label %for.end
				201
				202	for.end:
				203	%s.lcssa = phi float [ %6, %for.body ]
				204	ret float %s.lcssa
				205	}
				206
				207	; Make sure we scalarize the step vectors used for the pointer arithmetic. We
				208	; can't easily simplify vectorized step vectors. (Interleaved accesses.)
				209	;
				210	; for (int i = 0; i < n; ++i)
				211	; a[i].f ^= y;
				212	;
				213	; INTERLEAVE-LABEL: @scalarize_induction_variable_03(
				214	; INTERLEAVE: vector.body:
				215	; INTERLEAVE: %[[i0:.+]] = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
				216	; INTERLEAVE: %[[i1:.+]] = or i64 %[[i0]], 1
				217	; INTERLEAVE: %[[i2:.+]] = or i64 %[[i0]], 2
				218	; INTERLEAVE: %[[i3:.+]] = or i64 %[[i0]], 3
				219	; INTERLEAVE: %[[i4:.+]] = or i64 %[[i0]], 4
				220	; INTERLEAVE: %[[i5:.+]] = or i64 %[[i0]], 5
				221	; INTERLEAVE: %[[i6:.+]] = or i64 %[[i0]], 6
				222	; INTERLEAVE: %[[i7:.+]] = or i64 %[[i0]], 7
				223	; INTERLEAVE: getelementptr inbounds %pair, %pair* %p, i64 %[[i0]], i32 1
				224	; INTERLEAVE: getelementptr inbounds %pair, %pair* %p, i64 %[[i1]], i32 1
				225	; INTERLEAVE: getelementptr inbounds %pair, %pair* %p, i64 %[[i2]], i32 1
				226	; INTERLEAVE: getelementptr inbounds %pair, %pair* %p, i64 %[[i3]], i32 1
				227	; INTERLEAVE: getelementptr inbounds %pair, %pair* %p, i64 %[[i4]], i32 1
				228	; INTERLEAVE: getelementptr inbounds %pair, %pair* %p, i64 %[[i5]], i32 1
				229	; INTERLEAVE: getelementptr inbounds %pair, %pair* %p, i64 %[[i6]], i32 1
				230	; INTERLEAVE: getelementptr inbounds %pair, %pair* %p, i64 %[[i7]], i32 1
				231
				232	%pair = type { i32, i32 }
				233	define void @scalarize_induction_variable_03(%pair *%p, i32 %y, i64 %n) {
				234	entry:
				235	br label %for.body
				236
				237	for.body:
				238	%i = phi i64 [ %i.next, %for.body ], [ 0, %entry ]
				239	%f = getelementptr inbounds %pair, %pair* %p, i64 %i, i32 1
				240	%0 = load i32, i32* %f, align 8
				241	%1 = xor i32 %0, %y
				242	store i32 %1, i32* %f, align 8
				243	%i.next = add nuw nsw i64 %i, 1
				244	%cond = icmp slt i64 %i.next, %n
				245	br i1 %cond, label %for.body, label %for.end
				246
				247	for.end:
				248	ret void
				249	}
Arnold Schwaighofer	b72cb4e	2013-11-18 13:14:32 +0000	[diff] [blame]	250
Matthew Simpson	58f5628	2016-08-02 14:29:41 +0000	[diff] [blame]	251	; Make sure we scalarize the step vectors used for the pointer arithmetic. We
				252	; can't easily simplify vectorized step vectors. (Interleaved accesses.)
				253	;
				254	; for (int i = 0; i < n; ++i)
				255	; p[i].f = a[i * 4]
				256	;
				257	; INTERLEAVE-LABEL: @scalarize_induction_variable_04(
				258	; INTERLEAVE: vector.body:
				259	; INTERLEAVE: %[[i0:.+]] = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
				260	; INTERLEAVE: %[[i1:.+]] = or i64 %[[i0]], 1
				261	; INTERLEAVE: %[[i2:.+]] = or i64 %[[i0]], 2
				262	; INTERLEAVE: %[[i3:.+]] = or i64 %[[i0]], 3
				263	; INTERLEAVE: %[[i4:.+]] = or i64 %[[i0]], 4
				264	; INTERLEAVE: %[[i5:.+]] = or i64 %[[i0]], 5
				265	; INTERLEAVE: %[[i6:.+]] = or i64 %[[i0]], 6
				266	; INTERLEAVE: %[[i7:.+]] = or i64 %[[i0]], 7
				267	; INTERLEAVE: getelementptr inbounds %pair, %pair* %p, i64 %[[i0]], i32 1
				268	; INTERLEAVE: getelementptr inbounds %pair, %pair* %p, i64 %[[i1]], i32 1
				269	; INTERLEAVE: getelementptr inbounds %pair, %pair* %p, i64 %[[i2]], i32 1
				270	; INTERLEAVE: getelementptr inbounds %pair, %pair* %p, i64 %[[i3]], i32 1
				271	; INTERLEAVE: getelementptr inbounds %pair, %pair* %p, i64 %[[i4]], i32 1
				272	; INTERLEAVE: getelementptr inbounds %pair, %pair* %p, i64 %[[i5]], i32 1
				273	; INTERLEAVE: getelementptr inbounds %pair, %pair* %p, i64 %[[i6]], i32 1
				274	; INTERLEAVE: getelementptr inbounds %pair, %pair* %p, i64 %[[i7]], i32 1
				275
				276	define void @scalarize_induction_variable_04(i32* %a, %pair* %p, i32 %n) {
				277	entry:
				278	br label %for.body
				279
				280	for.body:
				281	%i = phi i64 [ %i.next, %for.body ], [ 0, %entry]
				282	%0 = shl nsw i64 %i, 2
				283	%1 = getelementptr inbounds i32, i32* %a, i64 %0
				284	%2 = load i32, i32* %1, align 1
				285	%3 = getelementptr inbounds %pair, %pair* %p, i64 %i, i32 1
				286	store i32 %2, i32* %3, align 1
				287	%i.next = add nuw nsw i64 %i, 1
				288	%4 = trunc i64 %i.next to i32
				289	%cond = icmp eq i32 %4, %n
				290	br i1 %cond, label %for.end, label %for.body
				291
				292	for.end:
				293	ret void
				294	}
				295
Arnold Schwaighofer	b72cb4e	2013-11-18 13:14:32 +0000	[diff] [blame]	296	; Make sure that the loop exit count computation does not overflow for i8 and
				297	; i16. The exit count of these loops is i8/i16 max + 1. If we don't cast the
				298	; induction variable to a bigger type the exit count computation will overflow
				299	; to 0.
				300	; PR17532
				301
				302	; CHECK-LABEL: i8_loop
Benjamin Kramer	c10563d	2014-01-11 21:06:00 +0000	[diff] [blame]	303	; CHECK: icmp eq i32 {{.*}}, 256
Arnold Schwaighofer	b72cb4e	2013-11-18 13:14:32 +0000	[diff] [blame]	304	define i32 @i8_loop() nounwind readnone ssp uwtable {
				305	br label %1
				306
				307	; <label>:1 ; preds = %1, %0
				308	%a.0 = phi i32 [ 1, %0 ], [ %2, %1 ]
				309	%b.0 = phi i8 [ 0, %0 ], [ %3, %1 ]
				310	%2 = and i32 %a.0, 4
				311	%3 = add i8 %b.0, -1
				312	%4 = icmp eq i8 %3, 0
				313	br i1 %4, label %5, label %1
				314
				315	; <label>:5 ; preds = %1
				316	ret i32 %2
				317	}
				318
				319	; CHECK-LABEL: i16_loop
Benjamin Kramer	c10563d	2014-01-11 21:06:00 +0000	[diff] [blame]	320	; CHECK: icmp eq i32 {{.*}}, 65536
Arnold Schwaighofer	b72cb4e	2013-11-18 13:14:32 +0000	[diff] [blame]	321
				322	define i32 @i16_loop() nounwind readnone ssp uwtable {
				323	br label %1
				324
				325	; <label>:1 ; preds = %1, %0
				326	%a.0 = phi i32 [ 1, %0 ], [ %2, %1 ]
				327	%b.0 = phi i16 [ 0, %0 ], [ %3, %1 ]
				328	%2 = and i32 %a.0, 4
				329	%3 = add i16 %b.0, -1
				330	%4 = icmp eq i16 %3, 0
				331	br i1 %4, label %5, label %1
				332
				333	; <label>:5 ; preds = %1
				334	ret i32 %2
				335	}
Arnold Schwaighofer	e206768	2014-05-29 22:10:01 +0000	[diff] [blame]	336
				337	; This loop has a backedge taken count of i32_max. We need to check for this
				338	; condition and branch directly to the scalar loop.
				339
				340	; CHECK-LABEL: max_i32_backedgetaken
James Molloy	cba9230	2015-09-02 10:15:22 +0000	[diff] [blame]	341	; CHECK: br i1 true, label %scalar.ph, label %min.iters.checked
Arnold Schwaighofer	e206768	2014-05-29 22:10:01 +0000	[diff] [blame]	342
Matthew Simpson	433cb1d	2016-07-06 14:26:59 +0000	[diff] [blame]	343	; CHECK: middle.block:
				344	; CHECK: %[[v9:.+]] = extractelement <2 x i32> %bin.rdx, i32 0
Arnold Schwaighofer	e206768	2014-05-29 22:10:01 +0000	[diff] [blame]	345	; CHECK: scalar.ph:
Matthew Simpson	433cb1d	2016-07-06 14:26:59 +0000	[diff] [blame]	346	; CHECK: %bc.resume.val = phi i32 [ 0, %middle.block ], [ 0, %[[v0:.+]] ]
				347	; CHECK: %bc.merge.rdx = phi i32 [ 1, %[[v0:.+]] ], [ 1, %min.iters.checked ], [ %[[v9]], %middle.block ]
Arnold Schwaighofer	e206768	2014-05-29 22:10:01 +0000	[diff] [blame]	348
				349	define i32 @max_i32_backedgetaken() nounwind readnone ssp uwtable {
				350
				351	br label %1
				352
				353	; <label>:1 ; preds = %1, %0
				354	%a.0 = phi i32 [ 1, %0 ], [ %2, %1 ]
				355	%b.0 = phi i32 [ 0, %0 ], [ %3, %1 ]
				356	%2 = and i32 %a.0, 4
				357	%3 = add i32 %b.0, -1
				358	%4 = icmp eq i32 %3, 0
				359	br i1 %4, label %5, label %1
				360
				361	; <label>:5 ; preds = %1
				362	ret i32 %2
				363	}
Arnold Schwaighofer	c11107c	2014-06-22 03:38:59 +0000	[diff] [blame]	364
				365	; When generating the overflow check we must sure that the induction start value
				366	; is defined before the branch to the scalar preheader.
				367
				368	; CHECK-LABEL: testoverflowcheck
				369	; CHECK: entry
				370	; CHECK: %[[LOAD:.*]] = load i8
Arnold Schwaighofer	c11107c	2014-06-22 03:38:59 +0000	[diff] [blame]	371	; CHECK: br
				372
				373	; CHECK: scalar.ph
James Molloy	c07701b	2015-09-02 10:14:54 +0000	[diff] [blame]	374	; CHECK: phi i8 [ %{{.*}}, %middle.block ], [ %[[LOAD]], %entry ]
Arnold Schwaighofer	c11107c	2014-06-22 03:38:59 +0000	[diff] [blame]	375
				376	@e = global i8 1, align 1
				377	@d = common global i32 0, align 4
				378	@c = common global i32 0, align 4
				379	define i32 @testoverflowcheck() {
				380	entry:
David Blaikie	a79ac14	2015-02-27 21:17:42 +0000	[diff] [blame]	381	%.pr.i = load i8, i8* @e, align 1
				382	%0 = load i32, i32* @d, align 4
				383	%c.promoted.i = load i32, i32* @c, align 4
Arnold Schwaighofer	c11107c	2014-06-22 03:38:59 +0000	[diff] [blame]	384	br label %cond.end.i
				385
				386	cond.end.i:
				387	%inc4.i = phi i8 [ %.pr.i, %entry ], [ %inc.i, %cond.end.i ]
				388	%and3.i = phi i32 [ %c.promoted.i, %entry ], [ %and.i, %cond.end.i ]
				389	%and.i = and i32 %0, %and3.i
				390	%inc.i = add i8 %inc4.i, 1
				391	%tobool.i = icmp eq i8 %inc.i, 0
				392	br i1 %tobool.i, label %loopexit, label %cond.end.i
				393
				394	loopexit:
				395	ret i32 %and.i
				396	}
Silviu Baranga	c05bab8	2016-05-05 15:20:39 +0000	[diff] [blame]	397
				398	; The SCEV expression of %sphi is (zext i8 {%t,+,1}<%loop> to i32)
				399	; In order to recognize %sphi as an induction PHI and vectorize this loop,
				400	; we need to convert the SCEV expression into an AddRecExpr.
				401	; The expression gets converted to {zext i8 %t to i32,+,1}.
				402
				403	; CHECK-LABEL: wrappingindvars1
				404	; CHECK-LABEL: vector.scevcheck
Michael Kuperstein	c5edcde	2016-06-09 18:03:15 +0000	[diff] [blame]	405	; CHECK-LABEL: vector.ph
				406	; CHECK: %[[START:.]] = add <2 x i32> %{{.}}, <i32 0, i32 1>
Silviu Baranga	c05bab8	2016-05-05 15:20:39 +0000	[diff] [blame]	407	; CHECK-LABEL: vector.body
Michael Kuperstein	c5edcde	2016-06-09 18:03:15 +0000	[diff] [blame]	408	; CHECK: %[[PHI:.]] = phi <2 x i32> [ %[[START]], %vector.ph ], [ %[[STEP:.]], %vector.body ]
				409	; CHECK: %[[STEP]] = add <2 x i32> %[[PHI]], <i32 2, i32 2>
Silviu Baranga	c05bab8	2016-05-05 15:20:39 +0000	[diff] [blame]	410	define void @wrappingindvars1(i8 %t, i32 %len, i32 *%A) {
				411	entry:
				412	%st = zext i8 %t to i16
				413	%ext = zext i8 %t to i32
				414	%ecmp = icmp ult i16 %st, 42
				415	br i1 %ecmp, label %loop, label %exit
				416
				417	loop:
				418
				419	%idx = phi i8 [ %t, %entry ], [ %idx.inc, %loop ]
				420	%idx.b = phi i32 [ 0, %entry ], [ %idx.b.inc, %loop ]
				421	%sphi = phi i32 [ %ext, %entry ], [%idx.inc.ext, %loop]
				422
				423	%ptr = getelementptr inbounds i32, i32* %A, i8 %idx
				424	store i32 %sphi, i32* %ptr
				425
				426	%idx.inc = add i8 %idx, 1
				427	%idx.inc.ext = zext i8 %idx.inc to i32
				428	%idx.b.inc = add nuw nsw i32 %idx.b, 1
				429
				430	%c = icmp ult i32 %idx.b, %len
				431	br i1 %c, label %loop, label %exit
				432
				433	exit:
				434	ret void
				435	}
				436
				437	; The SCEV expression of %sphi is (4 * (zext i8 {%t,+,1}<%loop> to i32))
				438	; In order to recognize %sphi as an induction PHI and vectorize this loop,
				439	; we need to convert the SCEV expression into an AddRecExpr.
				440	; The expression gets converted to ({4 * (zext %t to i32),+,4}).
				441	; CHECK-LABEL: wrappingindvars2
				442	; CHECK-LABEL: vector.scevcheck
Michael Kuperstein	c5edcde	2016-06-09 18:03:15 +0000	[diff] [blame]	443	; CHECK-LABEL: vector.ph
				444	; CHECK: %[[START:.]] = add <2 x i32> %{{.}}, <i32 0, i32 4>
Silviu Baranga	c05bab8	2016-05-05 15:20:39 +0000	[diff] [blame]	445	; CHECK-LABEL: vector.body
Michael Kuperstein	c5edcde	2016-06-09 18:03:15 +0000	[diff] [blame]	446	; CHECK: %[[PHI:.]] = phi <2 x i32> [ %[[START]], %vector.ph ], [ %[[STEP:.]], %vector.body ]
				447	; CHECK: %[[STEP]] = add <2 x i32> %[[PHI]], <i32 8, i32 8>
Silviu Baranga	c05bab8	2016-05-05 15:20:39 +0000	[diff] [blame]	448	define void @wrappingindvars2(i8 %t, i32 %len, i32 *%A) {
				449
				450	entry:
				451	%st = zext i8 %t to i16
				452	%ext = zext i8 %t to i32
				453	%ext.mul = mul i32 %ext, 4
				454
				455	%ecmp = icmp ult i16 %st, 42
				456	br i1 %ecmp, label %loop, label %exit
				457
				458	loop:
				459
				460	%idx = phi i8 [ %t, %entry ], [ %idx.inc, %loop ]
				461	%sphi = phi i32 [ %ext.mul, %entry ], [%mul, %loop]
				462	%idx.b = phi i32 [ 0, %entry ], [ %idx.b.inc, %loop ]
				463
				464	%ptr = getelementptr inbounds i32, i32* %A, i8 %idx
				465	store i32 %sphi, i32* %ptr
				466
				467	%idx.inc = add i8 %idx, 1
				468	%idx.inc.ext = zext i8 %idx.inc to i32
				469	%mul = mul i32 %idx.inc.ext, 4
				470	%idx.b.inc = add nuw nsw i32 %idx.b, 1
				471
				472	%c = icmp ult i32 %idx.b, %len
				473	br i1 %c, label %loop, label %exit
				474
				475	exit:
				476	ret void
				477	}
Michael Kuperstein	3a3c64d	2016-06-01 17:16:46 +0000	[diff] [blame]	478
				479	; Check that we generate vectorized IVs in the pre-header
				480	; instead of widening the scalar IV inside the loop, when
				481	; we know how to do that.
				482	; IND-LABEL: veciv
				483	; IND: vector.body:
				484	; IND: %index = phi i32 [ 0, %vector.ph ], [ %index.next, %vector.body ]
Matthew Simpson	102729c	2016-07-21 21:20:15 +0000	[diff] [blame]	485	; IND: %vec.ind = phi <2 x i32> [ <i32 0, i32 1>, %vector.ph ], [ %vec.ind.next, %vector.body ]
Michael Kuperstein	3a3c64d	2016-06-01 17:16:46 +0000	[diff] [blame]	486	; IND: %index.next = add i32 %index, 2
Matthew Simpson	102729c	2016-07-21 21:20:15 +0000	[diff] [blame]	487	; IND: %vec.ind.next = add <2 x i32> %vec.ind, <i32 2, i32 2>
Michael Kuperstein	3a3c64d	2016-06-01 17:16:46 +0000	[diff] [blame]	488	; IND: %[[CMP:.*]] = icmp eq i32 %index.next
				489	; IND: br i1 %[[CMP]]
				490	; UNROLL-LABEL: veciv
				491	; UNROLL: vector.body:
				492	; UNROLL: %index = phi i32 [ 0, %vector.ph ], [ %index.next, %vector.body ]
Matthew Simpson	102729c	2016-07-21 21:20:15 +0000	[diff] [blame]	493	; UNROLL: %vec.ind = phi <2 x i32> [ <i32 0, i32 1>, %vector.ph ], [ %vec.ind.next, %vector.body ]
Michael Kuperstein	3a3c64d	2016-06-01 17:16:46 +0000	[diff] [blame]	494	; UNROLL: %step.add = add <2 x i32> %vec.ind, <i32 2, i32 2>
Michael Kuperstein	3a3c64d	2016-06-01 17:16:46 +0000	[diff] [blame]	495	; UNROLL: %index.next = add i32 %index, 4
Matthew Simpson	102729c	2016-07-21 21:20:15 +0000	[diff] [blame]	496	; UNROLL: %vec.ind.next = add <2 x i32> %vec.ind, <i32 4, i32 4>
Michael Kuperstein	3a3c64d	2016-06-01 17:16:46 +0000	[diff] [blame]	497	; UNROLL: %[[CMP:.*]] = icmp eq i32 %index.next
				498	; UNROLL: br i1 %[[CMP]]
				499	define void @veciv(i32* nocapture %a, i32 %start, i32 %k) {
				500	for.body.preheader:
				501	br label %for.body
				502
				503	for.body:
				504	%indvars.iv = phi i32 [ %indvars.iv.next, %for.body ], [ 0, %for.body.preheader ]
				505	%arrayidx = getelementptr inbounds i32, i32* %a, i32 %indvars.iv
				506	store i32 %indvars.iv, i32* %arrayidx, align 4
				507	%indvars.iv.next = add nuw nsw i32 %indvars.iv, 1
				508	%exitcond = icmp eq i32 %indvars.iv.next, %k
				509	br i1 %exitcond, label %exit, label %for.body
				510
				511	exit:
				512	ret void
				513	}
				514
				515	; IND-LABEL: trunciv
				516	; IND: vector.body:
				517	; IND: %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
				518	; IND: %[[VECIND:.]] = phi <2 x i32> [ <i32 0, i32 1>, %vector.ph ], [ %[[STEPADD:.]], %vector.body ]
Michael Kuperstein	3a3c64d	2016-06-01 17:16:46 +0000	[diff] [blame]	519	; IND: %index.next = add i64 %index, 2
Matthew Simpson	102729c	2016-07-21 21:20:15 +0000	[diff] [blame]	520	; IND: %[[STEPADD]] = add <2 x i32> %[[VECIND]], <i32 2, i32 2>
Michael Kuperstein	3a3c64d	2016-06-01 17:16:46 +0000	[diff] [blame]	521	; IND: %[[CMP:.*]] = icmp eq i64 %index.next
				522	; IND: br i1 %[[CMP]]
				523	define void @trunciv(i32* nocapture %a, i32 %start, i64 %k) {
				524	for.body.preheader:
				525	br label %for.body
				526
				527	for.body:
				528	%indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %for.body.preheader ]
				529	%trunc.iv = trunc i64 %indvars.iv to i32
				530	%arrayidx = getelementptr inbounds i32, i32* %a, i32 %trunc.iv
				531	store i32 %trunc.iv, i32* %arrayidx, align 4
				532	%indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
				533	%exitcond = icmp eq i64 %indvars.iv.next, %k
				534	br i1 %exitcond, label %exit, label %for.body
				535
				536	exit:
				537	ret void
				538	}
Michael Kuperstein	c5edcde	2016-06-09 18:03:15 +0000	[diff] [blame]	539
				540	; IND-LABEL: nonprimary
				541	; IND-LABEL: vector.ph
				542	; IND: %[[INSERT:.*]] = insertelement <2 x i32> undef, i32 %i, i32 0
				543	; IND: %[[SPLAT:.*]] = shufflevector <2 x i32> %[[INSERT]], <2 x i32> undef, <2 x i32> zeroinitializer
				544	; IND: %[[START:.*]] = add <2 x i32> %[[SPLAT]], <i32 0, i32 42>
				545	; IND-LABEL: vector.body:
				546	; IND: %index = phi i32 [ 0, %vector.ph ], [ %index.next, %vector.body ]
Matthew Simpson	102729c	2016-07-21 21:20:15 +0000	[diff] [blame]	547	; IND: %vec.ind = phi <2 x i32> [ %[[START]], %vector.ph ], [ %vec.ind.next, %vector.body ]
Michael Kuperstein	c5edcde	2016-06-09 18:03:15 +0000	[diff] [blame]	548	; IND: %index.next = add i32 %index, 2
Matthew Simpson	102729c	2016-07-21 21:20:15 +0000	[diff] [blame]	549	; IND: %vec.ind.next = add <2 x i32> %vec.ind, <i32 84, i32 84>
Michael Kuperstein	c5edcde	2016-06-09 18:03:15 +0000	[diff] [blame]	550	; IND: %[[CMP:.*]] = icmp eq i32 %index.next
				551	; IND: br i1 %[[CMP]]
				552	; UNROLL-LABEL: nonprimary
				553	; UNROLL-LABEL: vector.ph
				554	; UNROLL: %[[INSERT:.*]] = insertelement <2 x i32> undef, i32 %i, i32 0
				555	; UNROLL: %[[SPLAT:.*]] = shufflevector <2 x i32> %[[INSERT]], <2 x i32> undef, <2 x i32> zeroinitializer
				556	; UNROLL: %[[START:.*]] = add <2 x i32> %[[SPLAT]], <i32 0, i32 42>
				557	; UNROLL-LABEL: vector.body:
				558	; UNROLL: %index = phi i32 [ 0, %vector.ph ], [ %index.next, %vector.body ]
Matthew Simpson	102729c	2016-07-21 21:20:15 +0000	[diff] [blame]	559	; UNROLL: %vec.ind = phi <2 x i32> [ %[[START]], %vector.ph ], [ %vec.ind.next, %vector.body ]
Michael Kuperstein	c5edcde	2016-06-09 18:03:15 +0000	[diff] [blame]	560	; UNROLL: %step.add = add <2 x i32> %vec.ind, <i32 84, i32 84>
Michael Kuperstein	c5edcde	2016-06-09 18:03:15 +0000	[diff] [blame]	561	; UNROLL: %index.next = add i32 %index, 4
Matthew Simpson	102729c	2016-07-21 21:20:15 +0000	[diff] [blame]	562	; UNROLL: %vec.ind.next = add <2 x i32> %vec.ind, <i32 168, i32 168>
Michael Kuperstein	c5edcde	2016-06-09 18:03:15 +0000	[diff] [blame]	563	; UNROLL: %[[CMP:.*]] = icmp eq i32 %index.next
				564	; UNROLL: br i1 %[[CMP]]
				565	define void @nonprimary(i32* nocapture %a, i32 %start, i32 %i, i32 %k) {
				566	for.body.preheader:
				567	br label %for.body
				568
				569	for.body:
				570	%indvars.iv = phi i32 [ %indvars.iv.next, %for.body ], [ %i, %for.body.preheader ]
				571	%arrayidx = getelementptr inbounds i32, i32* %a, i32 %indvars.iv
				572	store i32 %indvars.iv, i32* %arrayidx, align 4
				573	%indvars.iv.next = add nuw nsw i32 %indvars.iv, 42
				574	%exitcond = icmp eq i32 %indvars.iv.next, %k
				575	br i1 %exitcond, label %exit, label %for.body
				576
				577	exit:
				578	ret void
				579	}