blob: fd8cc9b92f2faede6b2d39c5991a24269cb13002 [file] [log] [blame]
Sam Parker98722692019-07-01 08:21:28 +00001; RUN: llc -mtriple=thumbv8.1m.main -disable-arm-loloops=false -mattr=+lob -stop-after=arm-low-overhead-loops --verify-machineinstrs %s -o - | FileCheck %s
2; RUN: llc -mtriple=thumbv8.1m.main -disable-arm-loloops=false -mattr=+lob -stop-after=arm-low-overhead-loops --verify-machineinstrs %s -o - | FileCheck %s --check-prefix=CHECK-GLOBAL
3
4; Not implemented as a mir test so that changes the generic HardwareLoop can
5; also be tested. These functions have been taken from
6; Transforms/HardwareLoops/loop-guards.ll in which can be seen the generation
7; of a few test.set intrinsics, but only one (ne_trip_count) gets generated
8; here. Simplifications result in icmps changing and maybe also the CFG. So,
9; TODO: Teach the HardwareLoops some better pattern recognition.
10
11; CHECK-GLOBAL-NOT: DoLoopStart
12; CHECK-GLOBAL-NOT: WhileLoopStart
13; CHECK-GLOBAL-NOT: LoopEnd
14
15; CHECK: ne_and_guard
16; CHECK: body:
17; CHECK: bb.0.entry:
18; CHECK: t2CMPri renamable $lr, 0
19; CHECK: tBcc %bb.3
20; CHECK: bb.1.while.body.preheader:
21; CHECK: $lr = t2DLS renamable $lr
22; CHECK: bb.2.while.body:
23; CHECK: $lr = t2LEUpdate renamable $lr, %bb.2
24define void @ne_and_guard(i1 zeroext %t1, i1 zeroext %t2, i32* nocapture %a, i32* nocapture readonly %b, i32 %N) {
25entry:
26 %brmerge.demorgan = and i1 %t1, %t2
27 %cmp6 = icmp ne i32 %N, 0
28 %or.cond = and i1 %brmerge.demorgan, %cmp6
29 br i1 %or.cond, label %while.body, label %if.end
30
31while.body: ; preds = %while.body, %entry
32 %i.09 = phi i32 [ %inc, %while.body ], [ 0, %entry ]
33 %a.addr.08 = phi i32* [ %incdec.ptr3, %while.body ], [ %a, %entry ]
34 %b.addr.07 = phi i32* [ %incdec.ptr, %while.body ], [ %b, %entry ]
35 %incdec.ptr = getelementptr inbounds i32, i32* %b.addr.07, i32 1
36 %tmp = load i32, i32* %b.addr.07, align 4
37 %incdec.ptr3 = getelementptr inbounds i32, i32* %a.addr.08, i32 1
38 store i32 %tmp, i32* %a.addr.08, align 4
39 %inc = add nuw i32 %i.09, 1
40 %exitcond = icmp eq i32 %inc, %N
41 br i1 %exitcond, label %if.end, label %while.body
42
43if.end: ; preds = %while.body, %entry
44 ret void
45}
46
47; TODO: This could generate WLS
48; CHECK: ne_preheader
49; CHECK: body:
50; CHECK: bb.0.entry:
51; CHECK: t2CMPri renamable $lr, 0
52; CHECK: tBcc %bb.3
53; CHECK: bb.1.while.body.preheader:
54; CHECK: $lr = t2DLS renamable $lr
55; CHECK: bb.2.while.body:
56; CHECK: $lr = t2LEUpdate renamable $lr, %bb.2
57define void @ne_preheader(i1 zeroext %t1, i1 zeroext %t2, i32* nocapture %a, i32* nocapture readonly %b, i32 %N) {
58entry:
59 %brmerge.demorgan = and i1 %t1, %t2
60 br i1 %brmerge.demorgan, label %while.preheader, label %if.end
61
62while.preheader: ; preds = %entry
63 %cmp = icmp ne i32 %N, 0
64 br i1 %cmp, label %while.body, label %if.end
65
66while.body: ; preds = %while.body, %while.preheader
67 %i.09 = phi i32 [ %inc, %while.body ], [ 0, %while.preheader ]
68 %a.addr.08 = phi i32* [ %incdec.ptr3, %while.body ], [ %a, %while.preheader ]
69 %b.addr.07 = phi i32* [ %incdec.ptr, %while.body ], [ %b, %while.preheader ]
70 %incdec.ptr = getelementptr inbounds i32, i32* %b.addr.07, i32 1
71 %tmp = load i32, i32* %b.addr.07, align 4
72 %incdec.ptr3 = getelementptr inbounds i32, i32* %a.addr.08, i32 1
73 store i32 %tmp, i32* %a.addr.08, align 4
74 %inc = add nuw i32 %i.09, 1
75 %exitcond = icmp eq i32 %inc, %N
76 br i1 %exitcond, label %if.end, label %while.body
77
78if.end: ; preds = %while.body, %while.preheader, %entry
79 ret void
80}
81
82; TODO: This could generate WLS
83; CHECK: eq_preheader
84; CHECK: body:
85; CHECK: bb.0.entry:
86; CHECK: t2CMPri renamable $lr, 0
87; CHECK: tBcc %bb.3
88; CHECK: bb.1.while.body.preheader:
89; CHECK: $lr = t2DLS renamable $lr
90; CHECK: bb.2.while.body:
91; CHECK: $lr = t2LEUpdate renamable $lr, %bb.2
92define void @eq_preheader(i1 zeroext %t1, i1 zeroext %t2, i32* nocapture %a, i32* nocapture readonly %b, i32 %N) {
93entry:
94 %brmerge.demorgan = and i1 %t1, %t2
95 br i1 %brmerge.demorgan, label %while.preheader, label %if.end
96
97while.preheader: ; preds = %entry
98 %cmp = icmp eq i32 %N, 0
99 br i1 %cmp, label %if.end, label %while.body
100
101while.body: ; preds = %while.body, %while.preheader
102 %i.09 = phi i32 [ %inc, %while.body ], [ 0, %while.preheader ]
103 %a.addr.08 = phi i32* [ %incdec.ptr3, %while.body ], [ %a, %while.preheader ]
104 %b.addr.07 = phi i32* [ %incdec.ptr, %while.body ], [ %b, %while.preheader ]
105 %incdec.ptr = getelementptr inbounds i32, i32* %b.addr.07, i32 1
106 %tmp = load i32, i32* %b.addr.07, align 4
107 %incdec.ptr3 = getelementptr inbounds i32, i32* %a.addr.08, i32 1
108 store i32 %tmp, i32* %a.addr.08, align 4
109 %inc = add nuw i32 %i.09, 1
110 %exitcond = icmp eq i32 %inc, %N
111 br i1 %exitcond, label %if.end, label %while.body
112
113if.end: ; preds = %while.body, %while.preheader, %entry
114 ret void
115}
116
117; TODO: This could generate WLS
118; CHECK: ne_prepreheader
119; CHECK: body:
120; CHECK: bb.0.entry:
121; CHECK: t2CMPri renamable $lr, 0
122; CHECK: tBcc %bb.3
123; CHECK: bb.1.while.body.preheader:
124; CHECK: $lr = t2DLS renamable $lr
125; CHECK: bb.2.while.body:
126; CHECK: $lr = t2LEUpdate renamable $lr, %bb.2
127define void @ne_prepreheader(i1 zeroext %t1, i1 zeroext %t2, i32* nocapture %a, i32* nocapture readonly %b, i32 %N) {
128entry:
129 %cmp = icmp ne i32 %N, 0
130 br i1 %cmp, label %while.preheader, label %if.end
131
132while.preheader: ; preds = %entry
133 %brmerge.demorgan = and i1 %t1, %t2
134 br i1 %brmerge.demorgan, label %while.body, label %if.end
135
136while.body: ; preds = %while.body, %while.preheader
137 %i.09 = phi i32 [ %inc, %while.body ], [ 0, %while.preheader ]
138 %a.addr.08 = phi i32* [ %incdec.ptr3, %while.body ], [ %a, %while.preheader ]
139 %b.addr.07 = phi i32* [ %incdec.ptr, %while.body ], [ %b, %while.preheader ]
140 %incdec.ptr = getelementptr inbounds i32, i32* %b.addr.07, i32 1
141 %tmp = load i32, i32* %b.addr.07, align 4
142 %incdec.ptr3 = getelementptr inbounds i32, i32* %a.addr.08, i32 1
143 store i32 %tmp, i32* %a.addr.08, align 4
144 %inc = add nuw i32 %i.09, 1
145 %exitcond = icmp eq i32 %inc, %N
146 br i1 %exitcond, label %if.end, label %while.body
147
148if.end: ; preds = %while.body, %while.preheader, %entry
149 ret void
150}
151
152; CHECK: be_ne
153; CHECK: body:
154; CHECK: bb.0.entry:
155; CHECK: $lr = t2DLS renamable $lr
156; CHECK: bb.1.do.body:
157; CHECK: $lr = t2LEUpdate renamable $lr, %bb.1
158define void @be_ne(i32* nocapture %a, i32* nocapture readonly %b, i32 %N) {
159entry:
160 %cmp = icmp ne i32 %N, 0
161 %sub = sub i32 %N, 1
162 %be = select i1 %cmp, i32 0, i32 %sub
163 %cmp.1 = icmp ne i32 %be, 0
164 br i1 %cmp.1, label %do.body, label %if.end
165
166do.body: ; preds = %do.body, %entry
167 %b.addr.0 = phi i32* [ %incdec.ptr, %do.body ], [ %b, %entry ]
168 %a.addr.0 = phi i32* [ %incdec.ptr3, %do.body ], [ %a, %entry ]
169 %i.0 = phi i32 [ %inc, %do.body ], [ 0, %entry ]
170 %incdec.ptr = getelementptr inbounds i32, i32* %b.addr.0, i32 1
171 %tmp = load i32, i32* %b.addr.0, align 4
172 %incdec.ptr3 = getelementptr inbounds i32, i32* %a.addr.0, i32 1
173 store i32 %tmp, i32* %a.addr.0, align 4
174 %inc = add nuw i32 %i.0, 1
175 %cmp.2 = icmp ult i32 %inc, %N
176 br i1 %cmp.2, label %do.body, label %if.end
177
178if.end: ; preds = %do.body, %entry
179 ret void
180}
181
182; TODO: Remove the tMOVr in the preheader!
183; CHECK: ne_trip_count
184; CHECK: body:
185; CHECK: bb.0.entry:
186; CHECK: $lr = t2WLS $r3, %bb.3
187; CHECK: bb.1.do.body.preheader:
188; CHECK: $lr = tMOVr
189; CHECK: bb.2.do.body:
190; CHECK: $lr = t2LEUpdate renamable $lr, %bb.2
191define void @ne_trip_count(i1 zeroext %t1, i32* nocapture %a, i32* nocapture readonly %b, i32 %N) {
192entry:
193 br label %do.body.preheader
194
195do.body.preheader:
196 %cmp = icmp ne i32 %N, 0
197 br i1 %cmp, label %do.body, label %if.end
198
199do.body:
200 %b.addr.0 = phi i32* [ %incdec.ptr, %do.body ], [ %b, %do.body.preheader ]
201 %a.addr.0 = phi i32* [ %incdec.ptr3, %do.body ], [ %a, %do.body.preheader ]
202 %i.0 = phi i32 [ %inc, %do.body ], [ 0, %do.body.preheader ]
203 %incdec.ptr = getelementptr inbounds i32, i32* %b.addr.0, i32 1
204 %tmp = load i32, i32* %b.addr.0, align 4
205 %incdec.ptr3 = getelementptr inbounds i32, i32* %a.addr.0, i32 1
206 store i32 %tmp, i32* %a.addr.0, align 4
207 %inc = add nuw i32 %i.0, 1
208 %cmp.1 = icmp ult i32 %inc, %N
209 br i1 %cmp.1, label %do.body, label %if.end
210
211if.end: ; preds = %do.body, %entry
212 ret void
213}