blob: 0e2d2920339e096be44acd0df4e6231d1f3fd927 [file] [log] [blame]
Lama Saba92746832018-04-02 13:48:28 +00001; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -mtriple=x86_64-linux | FileCheck %s -check-prefix=CHECK
3; RUN: llc < %s -mtriple=x86_64-linux --x86-disable-avoid-SFB | FileCheck %s --check-prefix=DISABLED
4; RUN: llc < %s -mtriple=x86_64-linux -mcpu=core-avx2 | FileCheck %s -check-prefix=CHECK-AVX2
5; RUN: llc < %s -mtriple=x86_64-linux -mcpu=skx | FileCheck %s -check-prefix=CHECK-AVX512
6
7target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
8target triple = "x86_64-unknown-linux-gnu"
9
10%struct.S = type { i32, i32, i32, i32 }
11
12; Function Attrs: nounwind uwtable
13define void @test_conditional_block(%struct.S* nocapture noalias %s1 , %struct.S* nocapture noalias %s2, i32 %x, %struct.S* nocapture noalias %s3, %struct.S* nocapture noalias readonly %s4) local_unnamed_addr #0 {
14; CHECK-LABEL: test_conditional_block:
15; CHECK: # %bb.0: # %entry
16; CHECK-NEXT: cmpl $18, %edx
17; CHECK-NEXT: jl .LBB0_2
18; CHECK-NEXT: # %bb.1: # %if.then
19; CHECK-NEXT: movl %edx, 4(%rdi)
20; CHECK-NEXT: .LBB0_2: # %if.end
21; CHECK-NEXT: movups (%r8), %xmm0
22; CHECK-NEXT: movups %xmm0, (%rcx)
23; CHECK-NEXT: movl (%rdi), %eax
24; CHECK-NEXT: movl %eax, (%rsi)
25; CHECK-NEXT: movl 4(%rdi), %eax
26; CHECK-NEXT: movl %eax, 4(%rsi)
27; CHECK-NEXT: movq 8(%rdi), %rax
28; CHECK-NEXT: movq %rax, 8(%rsi)
29; CHECK-NEXT: retq
30;
31; DISABLED-LABEL: test_conditional_block:
32; DISABLED: # %bb.0: # %entry
33; DISABLED-NEXT: cmpl $18, %edx
34; DISABLED-NEXT: jl .LBB0_2
35; DISABLED-NEXT: # %bb.1: # %if.then
36; DISABLED-NEXT: movl %edx, 4(%rdi)
37; DISABLED-NEXT: .LBB0_2: # %if.end
38; DISABLED-NEXT: movups (%r8), %xmm0
39; DISABLED-NEXT: movups %xmm0, (%rcx)
40; DISABLED-NEXT: movups (%rdi), %xmm0
41; DISABLED-NEXT: movups %xmm0, (%rsi)
42; DISABLED-NEXT: retq
43;
44; CHECK-AVX2-LABEL: test_conditional_block:
45; CHECK-AVX2: # %bb.0: # %entry
46; CHECK-AVX2-NEXT: cmpl $18, %edx
47; CHECK-AVX2-NEXT: jl .LBB0_2
48; CHECK-AVX2-NEXT: # %bb.1: # %if.then
49; CHECK-AVX2-NEXT: movl %edx, 4(%rdi)
50; CHECK-AVX2-NEXT: .LBB0_2: # %if.end
51; CHECK-AVX2-NEXT: vmovups (%r8), %xmm0
52; CHECK-AVX2-NEXT: vmovups %xmm0, (%rcx)
53; CHECK-AVX2-NEXT: movl (%rdi), %eax
54; CHECK-AVX2-NEXT: movl %eax, (%rsi)
55; CHECK-AVX2-NEXT: movl 4(%rdi), %eax
56; CHECK-AVX2-NEXT: movl %eax, 4(%rsi)
57; CHECK-AVX2-NEXT: movq 8(%rdi), %rax
58; CHECK-AVX2-NEXT: movq %rax, 8(%rsi)
59; CHECK-AVX2-NEXT: retq
60;
61; CHECK-AVX512-LABEL: test_conditional_block:
62; CHECK-AVX512: # %bb.0: # %entry
63; CHECK-AVX512-NEXT: cmpl $18, %edx
64; CHECK-AVX512-NEXT: jl .LBB0_2
65; CHECK-AVX512-NEXT: # %bb.1: # %if.then
66; CHECK-AVX512-NEXT: movl %edx, 4(%rdi)
67; CHECK-AVX512-NEXT: .LBB0_2: # %if.end
68; CHECK-AVX512-NEXT: vmovups (%r8), %xmm0
69; CHECK-AVX512-NEXT: vmovups %xmm0, (%rcx)
70; CHECK-AVX512-NEXT: movl (%rdi), %eax
71; CHECK-AVX512-NEXT: movl %eax, (%rsi)
72; CHECK-AVX512-NEXT: movl 4(%rdi), %eax
73; CHECK-AVX512-NEXT: movl %eax, 4(%rsi)
74; CHECK-AVX512-NEXT: movq 8(%rdi), %rax
75; CHECK-AVX512-NEXT: movq %rax, 8(%rsi)
76; CHECK-AVX512-NEXT: retq
77entry:
78 %cmp = icmp sgt i32 %x, 17
79 br i1 %cmp, label %if.then, label %if.end
80
81if.then: ; preds = %entry
82 %b = getelementptr inbounds %struct.S, %struct.S* %s1, i64 0, i32 1
83 store i32 %x, i32* %b, align 4
84 br label %if.end
85
86if.end: ; preds = %if.then, %entry
87 %0 = bitcast %struct.S* %s3 to i8*
88 %1 = bitcast %struct.S* %s4 to i8*
89 tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %0, i8* %1, i64 16, i32 4, i1 false)
90 %2 = bitcast %struct.S* %s2 to i8*
91 %3 = bitcast %struct.S* %s1 to i8*
92 tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %2, i8* %3, i64 16, i32 4, i1 false)
93 ret void
94}
95
96; Function Attrs: nounwind uwtable
97define void @test_imm_store(%struct.S* nocapture noalias %s1, %struct.S* nocapture %s2, i32 %x, %struct.S* nocapture %s3) local_unnamed_addr #0 {
98; CHECK-LABEL: test_imm_store:
99; CHECK: # %bb.0: # %entry
100; CHECK-NEXT: movl $0, (%rdi)
101; CHECK-NEXT: movl $1, (%rcx)
102; CHECK-NEXT: movl (%rdi), %eax
103; CHECK-NEXT: movl %eax, (%rsi)
104; CHECK-NEXT: movq 4(%rdi), %rax
105; CHECK-NEXT: movq %rax, 4(%rsi)
106; CHECK-NEXT: movl 12(%rdi), %eax
107; CHECK-NEXT: movl %eax, 12(%rsi)
108; CHECK-NEXT: retq
109;
110; DISABLED-LABEL: test_imm_store:
111; DISABLED: # %bb.0: # %entry
112; DISABLED-NEXT: movl $0, (%rdi)
113; DISABLED-NEXT: movl $1, (%rcx)
114; DISABLED-NEXT: movups (%rdi), %xmm0
115; DISABLED-NEXT: movups %xmm0, (%rsi)
116; DISABLED-NEXT: retq
117;
118; CHECK-AVX2-LABEL: test_imm_store:
119; CHECK-AVX2: # %bb.0: # %entry
120; CHECK-AVX2-NEXT: movl $0, (%rdi)
121; CHECK-AVX2-NEXT: movl $1, (%rcx)
122; CHECK-AVX2-NEXT: movl (%rdi), %eax
123; CHECK-AVX2-NEXT: movl %eax, (%rsi)
124; CHECK-AVX2-NEXT: movq 4(%rdi), %rax
125; CHECK-AVX2-NEXT: movq %rax, 4(%rsi)
126; CHECK-AVX2-NEXT: movl 12(%rdi), %eax
127; CHECK-AVX2-NEXT: movl %eax, 12(%rsi)
128; CHECK-AVX2-NEXT: retq
129;
130; CHECK-AVX512-LABEL: test_imm_store:
131; CHECK-AVX512: # %bb.0: # %entry
132; CHECK-AVX512-NEXT: movl $0, (%rdi)
133; CHECK-AVX512-NEXT: movl $1, (%rcx)
134; CHECK-AVX512-NEXT: movl (%rdi), %eax
135; CHECK-AVX512-NEXT: movl %eax, (%rsi)
136; CHECK-AVX512-NEXT: movq 4(%rdi), %rax
137; CHECK-AVX512-NEXT: movq %rax, 4(%rsi)
138; CHECK-AVX512-NEXT: movl 12(%rdi), %eax
139; CHECK-AVX512-NEXT: movl %eax, 12(%rsi)
140; CHECK-AVX512-NEXT: retq
141entry:
142 %a = getelementptr inbounds %struct.S, %struct.S* %s1, i64 0, i32 0
143 store i32 0, i32* %a, align 4
144 %a1 = getelementptr inbounds %struct.S, %struct.S* %s3, i64 0, i32 0
145 store i32 1, i32* %a1, align 4
146 %0 = bitcast %struct.S* %s2 to i8*
147 %1 = bitcast %struct.S* %s1 to i8*
148 tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %0, i8* %1, i64 16, i32 4, i1 false)
149 ret void
150}
151
152; Function Attrs: nounwind uwtable
153define void @test_nondirect_br(%struct.S* nocapture noalias %s1, %struct.S* nocapture %s2, i32 %x, %struct.S* nocapture %s3, %struct.S* nocapture readonly %s4, i32 %x2) local_unnamed_addr #0 {
154; CHECK-LABEL: test_nondirect_br:
155; CHECK: # %bb.0: # %entry
156; CHECK-NEXT: cmpl $18, %edx
157; CHECK-NEXT: jl .LBB2_2
158; CHECK-NEXT: # %bb.1: # %if.then
159; CHECK-NEXT: movl %edx, 4(%rdi)
160; CHECK-NEXT: .LBB2_2: # %if.end
161; CHECK-NEXT: cmpl $14, %r9d
162; CHECK-NEXT: jl .LBB2_4
163; CHECK-NEXT: # %bb.3: # %if.then2
164; CHECK-NEXT: movl %r9d, 12(%rdi)
165; CHECK-NEXT: .LBB2_4: # %if.end3
166; CHECK-NEXT: movups (%r8), %xmm0
167; CHECK-NEXT: movups %xmm0, (%rcx)
168; CHECK-NEXT: movq (%rdi), %rax
169; CHECK-NEXT: movq %rax, (%rsi)
170; CHECK-NEXT: movl 8(%rdi), %eax
171; CHECK-NEXT: movl %eax, 8(%rsi)
172; CHECK-NEXT: movl 12(%rdi), %eax
173; CHECK-NEXT: movl %eax, 12(%rsi)
174; CHECK-NEXT: retq
175;
176; DISABLED-LABEL: test_nondirect_br:
177; DISABLED: # %bb.0: # %entry
178; DISABLED-NEXT: cmpl $18, %edx
179; DISABLED-NEXT: jl .LBB2_2
180; DISABLED-NEXT: # %bb.1: # %if.then
181; DISABLED-NEXT: movl %edx, 4(%rdi)
182; DISABLED-NEXT: .LBB2_2: # %if.end
183; DISABLED-NEXT: cmpl $14, %r9d
184; DISABLED-NEXT: jl .LBB2_4
185; DISABLED-NEXT: # %bb.3: # %if.then2
186; DISABLED-NEXT: movl %r9d, 12(%rdi)
187; DISABLED-NEXT: .LBB2_4: # %if.end3
188; DISABLED-NEXT: movups (%r8), %xmm0
189; DISABLED-NEXT: movups %xmm0, (%rcx)
190; DISABLED-NEXT: movups (%rdi), %xmm0
191; DISABLED-NEXT: movups %xmm0, (%rsi)
192; DISABLED-NEXT: retq
193;
194; CHECK-AVX2-LABEL: test_nondirect_br:
195; CHECK-AVX2: # %bb.0: # %entry
196; CHECK-AVX2-NEXT: cmpl $18, %edx
197; CHECK-AVX2-NEXT: jl .LBB2_2
198; CHECK-AVX2-NEXT: # %bb.1: # %if.then
199; CHECK-AVX2-NEXT: movl %edx, 4(%rdi)
200; CHECK-AVX2-NEXT: .LBB2_2: # %if.end
201; CHECK-AVX2-NEXT: cmpl $14, %r9d
202; CHECK-AVX2-NEXT: jl .LBB2_4
203; CHECK-AVX2-NEXT: # %bb.3: # %if.then2
204; CHECK-AVX2-NEXT: movl %r9d, 12(%rdi)
205; CHECK-AVX2-NEXT: .LBB2_4: # %if.end3
206; CHECK-AVX2-NEXT: vmovups (%r8), %xmm0
207; CHECK-AVX2-NEXT: vmovups %xmm0, (%rcx)
208; CHECK-AVX2-NEXT: movq (%rdi), %rax
209; CHECK-AVX2-NEXT: movq %rax, (%rsi)
210; CHECK-AVX2-NEXT: movl 8(%rdi), %eax
211; CHECK-AVX2-NEXT: movl %eax, 8(%rsi)
212; CHECK-AVX2-NEXT: movl 12(%rdi), %eax
213; CHECK-AVX2-NEXT: movl %eax, 12(%rsi)
214; CHECK-AVX2-NEXT: retq
215;
216; CHECK-AVX512-LABEL: test_nondirect_br:
217; CHECK-AVX512: # %bb.0: # %entry
218; CHECK-AVX512-NEXT: cmpl $18, %edx
219; CHECK-AVX512-NEXT: jl .LBB2_2
220; CHECK-AVX512-NEXT: # %bb.1: # %if.then
221; CHECK-AVX512-NEXT: movl %edx, 4(%rdi)
222; CHECK-AVX512-NEXT: .LBB2_2: # %if.end
223; CHECK-AVX512-NEXT: cmpl $14, %r9d
224; CHECK-AVX512-NEXT: jl .LBB2_4
225; CHECK-AVX512-NEXT: # %bb.3: # %if.then2
226; CHECK-AVX512-NEXT: movl %r9d, 12(%rdi)
227; CHECK-AVX512-NEXT: .LBB2_4: # %if.end3
228; CHECK-AVX512-NEXT: vmovups (%r8), %xmm0
229; CHECK-AVX512-NEXT: vmovups %xmm0, (%rcx)
230; CHECK-AVX512-NEXT: movq (%rdi), %rax
231; CHECK-AVX512-NEXT: movq %rax, (%rsi)
232; CHECK-AVX512-NEXT: movl 8(%rdi), %eax
233; CHECK-AVX512-NEXT: movl %eax, 8(%rsi)
234; CHECK-AVX512-NEXT: movl 12(%rdi), %eax
235; CHECK-AVX512-NEXT: movl %eax, 12(%rsi)
236; CHECK-AVX512-NEXT: retq
237entry:
238 %cmp = icmp sgt i32 %x, 17
239 br i1 %cmp, label %if.then, label %if.end
240
241if.then: ; preds = %entry
242 %b = getelementptr inbounds %struct.S, %struct.S* %s1, i64 0, i32 1
243 store i32 %x, i32* %b, align 4
244 br label %if.end
245
246if.end: ; preds = %if.then, %entry
247 %cmp1 = icmp sgt i32 %x2, 13
248 br i1 %cmp1, label %if.then2, label %if.end3
249
250if.then2: ; preds = %if.end
251 %d = getelementptr inbounds %struct.S, %struct.S* %s1, i64 0, i32 3
252 store i32 %x2, i32* %d, align 4
253 br label %if.end3
254
255if.end3: ; preds = %if.then2, %if.end
256 %0 = bitcast %struct.S* %s3 to i8*
257 %1 = bitcast %struct.S* %s4 to i8*
258 tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %0, i8* %1, i64 16, i32 4, i1 false)
259 %2 = bitcast %struct.S* %s2 to i8*
260 %3 = bitcast %struct.S* %s1 to i8*
261 tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %2, i8* %3, i64 16, i32 4, i1 false)
262 ret void
263}
264
265; Function Attrs: nounwind uwtable
266define void @test_2preds_block(%struct.S* nocapture noalias %s1, %struct.S* nocapture %s2, i32 %x, %struct.S* nocapture %s3, %struct.S* nocapture readonly %s4, i32 %x2) local_unnamed_addr #0 {
267; CHECK-LABEL: test_2preds_block:
268; CHECK: # %bb.0: # %entry
269; CHECK-NEXT: movl %r9d, 12(%rdi)
270; CHECK-NEXT: cmpl $18, %edx
271; CHECK-NEXT: jl .LBB3_2
272; CHECK-NEXT: # %bb.1: # %if.then
273; CHECK-NEXT: movl %edx, 4(%rdi)
274; CHECK-NEXT: .LBB3_2: # %if.end
275; CHECK-NEXT: movups (%r8), %xmm0
276; CHECK-NEXT: movups %xmm0, (%rcx)
277; CHECK-NEXT: movl (%rdi), %eax
278; CHECK-NEXT: movl %eax, (%rsi)
279; CHECK-NEXT: movl 4(%rdi), %eax
280; CHECK-NEXT: movl %eax, 4(%rsi)
281; CHECK-NEXT: movl 8(%rdi), %eax
282; CHECK-NEXT: movl %eax, 8(%rsi)
283; CHECK-NEXT: movl 12(%rdi), %eax
284; CHECK-NEXT: movl %eax, 12(%rsi)
285; CHECK-NEXT: retq
286;
287; DISABLED-LABEL: test_2preds_block:
288; DISABLED: # %bb.0: # %entry
289; DISABLED-NEXT: movl %r9d, 12(%rdi)
290; DISABLED-NEXT: cmpl $18, %edx
291; DISABLED-NEXT: jl .LBB3_2
292; DISABLED-NEXT: # %bb.1: # %if.then
293; DISABLED-NEXT: movl %edx, 4(%rdi)
294; DISABLED-NEXT: .LBB3_2: # %if.end
295; DISABLED-NEXT: movups (%r8), %xmm0
296; DISABLED-NEXT: movups %xmm0, (%rcx)
297; DISABLED-NEXT: movups (%rdi), %xmm0
298; DISABLED-NEXT: movups %xmm0, (%rsi)
299; DISABLED-NEXT: retq
300;
301; CHECK-AVX2-LABEL: test_2preds_block:
302; CHECK-AVX2: # %bb.0: # %entry
303; CHECK-AVX2-NEXT: movl %r9d, 12(%rdi)
304; CHECK-AVX2-NEXT: cmpl $18, %edx
305; CHECK-AVX2-NEXT: jl .LBB3_2
306; CHECK-AVX2-NEXT: # %bb.1: # %if.then
307; CHECK-AVX2-NEXT: movl %edx, 4(%rdi)
308; CHECK-AVX2-NEXT: .LBB3_2: # %if.end
309; CHECK-AVX2-NEXT: vmovups (%r8), %xmm0
310; CHECK-AVX2-NEXT: vmovups %xmm0, (%rcx)
311; CHECK-AVX2-NEXT: movl (%rdi), %eax
312; CHECK-AVX2-NEXT: movl %eax, (%rsi)
313; CHECK-AVX2-NEXT: movl 4(%rdi), %eax
314; CHECK-AVX2-NEXT: movl %eax, 4(%rsi)
315; CHECK-AVX2-NEXT: movl 8(%rdi), %eax
316; CHECK-AVX2-NEXT: movl %eax, 8(%rsi)
317; CHECK-AVX2-NEXT: movl 12(%rdi), %eax
318; CHECK-AVX2-NEXT: movl %eax, 12(%rsi)
319; CHECK-AVX2-NEXT: retq
320;
321; CHECK-AVX512-LABEL: test_2preds_block:
322; CHECK-AVX512: # %bb.0: # %entry
323; CHECK-AVX512-NEXT: movl %r9d, 12(%rdi)
324; CHECK-AVX512-NEXT: cmpl $18, %edx
325; CHECK-AVX512-NEXT: jl .LBB3_2
326; CHECK-AVX512-NEXT: # %bb.1: # %if.then
327; CHECK-AVX512-NEXT: movl %edx, 4(%rdi)
328; CHECK-AVX512-NEXT: .LBB3_2: # %if.end
329; CHECK-AVX512-NEXT: vmovups (%r8), %xmm0
330; CHECK-AVX512-NEXT: vmovups %xmm0, (%rcx)
331; CHECK-AVX512-NEXT: movl (%rdi), %eax
332; CHECK-AVX512-NEXT: movl %eax, (%rsi)
333; CHECK-AVX512-NEXT: movl 4(%rdi), %eax
334; CHECK-AVX512-NEXT: movl %eax, 4(%rsi)
335; CHECK-AVX512-NEXT: movl 8(%rdi), %eax
336; CHECK-AVX512-NEXT: movl %eax, 8(%rsi)
337; CHECK-AVX512-NEXT: movl 12(%rdi), %eax
338; CHECK-AVX512-NEXT: movl %eax, 12(%rsi)
339; CHECK-AVX512-NEXT: retq
340entry:
341 %d = getelementptr inbounds %struct.S, %struct.S* %s1, i64 0, i32 3
342 store i32 %x2, i32* %d, align 4
343 %cmp = icmp sgt i32 %x, 17
344 br i1 %cmp, label %if.then, label %if.end
345
346if.then: ; preds = %entry
347 %b = getelementptr inbounds %struct.S, %struct.S* %s1, i64 0, i32 1
348 store i32 %x, i32* %b, align 4
349 br label %if.end
350
351if.end: ; preds = %if.then, %entry
352 %0 = bitcast %struct.S* %s3 to i8*
353 %1 = bitcast %struct.S* %s4 to i8*
354 tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %0, i8* %1, i64 16, i32 4, i1 false)
355 %2 = bitcast %struct.S* %s2 to i8*
356 %3 = bitcast %struct.S* %s1 to i8*
357 tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %2, i8* %3, i64 16, i32 4, i1 false)
358 ret void
359}
360%struct.S2 = type { i64, i64 }
361
362; Function Attrs: nounwind uwtable
363define void @test_type64(%struct.S2* nocapture noalias %s1, %struct.S2* nocapture %s2, i32 %x, %struct.S2* nocapture %s3, %struct.S2* nocapture readonly %s4) local_unnamed_addr #0 {
364; CHECK-LABEL: test_type64:
365; CHECK: # %bb.0: # %entry
366; CHECK-NEXT: cmpl $18, %edx
367; CHECK-NEXT: jl .LBB4_2
368; CHECK-NEXT: # %bb.1: # %if.then
369; CHECK-NEXT: movslq %edx, %rax
370; CHECK-NEXT: movq %rax, 8(%rdi)
371; CHECK-NEXT: .LBB4_2: # %if.end
372; CHECK-NEXT: movups (%r8), %xmm0
373; CHECK-NEXT: movups %xmm0, (%rcx)
374; CHECK-NEXT: movq (%rdi), %rax
375; CHECK-NEXT: movq %rax, (%rsi)
376; CHECK-NEXT: movq 8(%rdi), %rax
377; CHECK-NEXT: movq %rax, 8(%rsi)
378; CHECK-NEXT: retq
379;
380; DISABLED-LABEL: test_type64:
381; DISABLED: # %bb.0: # %entry
382; DISABLED-NEXT: cmpl $18, %edx
383; DISABLED-NEXT: jl .LBB4_2
384; DISABLED-NEXT: # %bb.1: # %if.then
385; DISABLED-NEXT: movslq %edx, %rax
386; DISABLED-NEXT: movq %rax, 8(%rdi)
387; DISABLED-NEXT: .LBB4_2: # %if.end
388; DISABLED-NEXT: movups (%r8), %xmm0
389; DISABLED-NEXT: movups %xmm0, (%rcx)
390; DISABLED-NEXT: movups (%rdi), %xmm0
391; DISABLED-NEXT: movups %xmm0, (%rsi)
392; DISABLED-NEXT: retq
393;
394; CHECK-AVX2-LABEL: test_type64:
395; CHECK-AVX2: # %bb.0: # %entry
396; CHECK-AVX2-NEXT: cmpl $18, %edx
397; CHECK-AVX2-NEXT: jl .LBB4_2
398; CHECK-AVX2-NEXT: # %bb.1: # %if.then
399; CHECK-AVX2-NEXT: movslq %edx, %rax
400; CHECK-AVX2-NEXT: movq %rax, 8(%rdi)
401; CHECK-AVX2-NEXT: .LBB4_2: # %if.end
402; CHECK-AVX2-NEXT: vmovups (%r8), %xmm0
403; CHECK-AVX2-NEXT: vmovups %xmm0, (%rcx)
404; CHECK-AVX2-NEXT: movq (%rdi), %rax
405; CHECK-AVX2-NEXT: movq %rax, (%rsi)
406; CHECK-AVX2-NEXT: movq 8(%rdi), %rax
407; CHECK-AVX2-NEXT: movq %rax, 8(%rsi)
408; CHECK-AVX2-NEXT: retq
409;
410; CHECK-AVX512-LABEL: test_type64:
411; CHECK-AVX512: # %bb.0: # %entry
412; CHECK-AVX512-NEXT: cmpl $18, %edx
413; CHECK-AVX512-NEXT: jl .LBB4_2
414; CHECK-AVX512-NEXT: # %bb.1: # %if.then
415; CHECK-AVX512-NEXT: movslq %edx, %rax
416; CHECK-AVX512-NEXT: movq %rax, 8(%rdi)
417; CHECK-AVX512-NEXT: .LBB4_2: # %if.end
418; CHECK-AVX512-NEXT: vmovups (%r8), %xmm0
419; CHECK-AVX512-NEXT: vmovups %xmm0, (%rcx)
420; CHECK-AVX512-NEXT: movq (%rdi), %rax
421; CHECK-AVX512-NEXT: movq %rax, (%rsi)
422; CHECK-AVX512-NEXT: movq 8(%rdi), %rax
423; CHECK-AVX512-NEXT: movq %rax, 8(%rsi)
424; CHECK-AVX512-NEXT: retq
425entry:
426 %cmp = icmp sgt i32 %x, 17
427 br i1 %cmp, label %if.then, label %if.end
428
429if.then: ; preds = %entry
430 %conv = sext i32 %x to i64
431 %b = getelementptr inbounds %struct.S2, %struct.S2* %s1, i64 0, i32 1
432 store i64 %conv, i64* %b, align 8
433 br label %if.end
434
435if.end: ; preds = %if.then, %entry
436 %0 = bitcast %struct.S2* %s3 to i8*
437 %1 = bitcast %struct.S2* %s4 to i8*
438 tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %0, i8* %1, i64 16, i32 8, i1 false)
439 %2 = bitcast %struct.S2* %s2 to i8*
440 %3 = bitcast %struct.S2* %s1 to i8*
441 tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %2, i8* %3, i64 16, i32 8, i1 false)
442 ret void
443}
444%struct.S3 = type { i64, i8, i8, i16, i32 }
445
446; Function Attrs: noinline nounwind uwtable
447define void @test_mixed_type(%struct.S3* nocapture noalias %s1, %struct.S3* nocapture %s2, i32 %x, %struct.S3* nocapture readnone %s3, %struct.S3* nocapture readnone %s4) local_unnamed_addr #0 {
448; CHECK-LABEL: test_mixed_type:
449; CHECK: # %bb.0: # %entry
450; CHECK-NEXT: cmpl $18, %edx
451; CHECK-NEXT: jl .LBB5_2
452; CHECK-NEXT: # %bb.1: # %if.then
453; CHECK-NEXT: movslq %edx, %rax
454; CHECK-NEXT: movq %rax, (%rdi)
455; CHECK-NEXT: movb %dl, 8(%rdi)
456; CHECK-NEXT: .LBB5_2: # %if.end
457; CHECK-NEXT: movq (%rdi), %rax
458; CHECK-NEXT: movq %rax, (%rsi)
459; CHECK-NEXT: movb 8(%rdi), %al
460; CHECK-NEXT: movb %al, 8(%rsi)
461; CHECK-NEXT: movl 9(%rdi), %eax
462; CHECK-NEXT: movl %eax, 9(%rsi)
463; CHECK-NEXT: movzwl 13(%rdi), %eax
464; CHECK-NEXT: movw %ax, 13(%rsi)
465; CHECK-NEXT: movb 15(%rdi), %al
466; CHECK-NEXT: movb %al, 15(%rsi)
467; CHECK-NEXT: retq
468;
469; DISABLED-LABEL: test_mixed_type:
470; DISABLED: # %bb.0: # %entry
471; DISABLED-NEXT: cmpl $18, %edx
472; DISABLED-NEXT: jl .LBB5_2
473; DISABLED-NEXT: # %bb.1: # %if.then
474; DISABLED-NEXT: movslq %edx, %rax
475; DISABLED-NEXT: movq %rax, (%rdi)
476; DISABLED-NEXT: movb %dl, 8(%rdi)
477; DISABLED-NEXT: .LBB5_2: # %if.end
478; DISABLED-NEXT: movups (%rdi), %xmm0
479; DISABLED-NEXT: movups %xmm0, (%rsi)
480; DISABLED-NEXT: retq
481;
482; CHECK-AVX2-LABEL: test_mixed_type:
483; CHECK-AVX2: # %bb.0: # %entry
484; CHECK-AVX2-NEXT: cmpl $18, %edx
485; CHECK-AVX2-NEXT: jl .LBB5_2
486; CHECK-AVX2-NEXT: # %bb.1: # %if.then
487; CHECK-AVX2-NEXT: movslq %edx, %rax
488; CHECK-AVX2-NEXT: movq %rax, (%rdi)
489; CHECK-AVX2-NEXT: movb %dl, 8(%rdi)
490; CHECK-AVX2-NEXT: .LBB5_2: # %if.end
491; CHECK-AVX2-NEXT: movq (%rdi), %rax
492; CHECK-AVX2-NEXT: movq %rax, (%rsi)
493; CHECK-AVX2-NEXT: movb 8(%rdi), %al
494; CHECK-AVX2-NEXT: movb %al, 8(%rsi)
495; CHECK-AVX2-NEXT: movl 9(%rdi), %eax
496; CHECK-AVX2-NEXT: movl %eax, 9(%rsi)
497; CHECK-AVX2-NEXT: movzwl 13(%rdi), %eax
498; CHECK-AVX2-NEXT: movw %ax, 13(%rsi)
499; CHECK-AVX2-NEXT: movb 15(%rdi), %al
500; CHECK-AVX2-NEXT: movb %al, 15(%rsi)
501; CHECK-AVX2-NEXT: retq
502;
503; CHECK-AVX512-LABEL: test_mixed_type:
504; CHECK-AVX512: # %bb.0: # %entry
505; CHECK-AVX512-NEXT: cmpl $18, %edx
506; CHECK-AVX512-NEXT: jl .LBB5_2
507; CHECK-AVX512-NEXT: # %bb.1: # %if.then
508; CHECK-AVX512-NEXT: movslq %edx, %rax
509; CHECK-AVX512-NEXT: movq %rax, (%rdi)
510; CHECK-AVX512-NEXT: movb %dl, 8(%rdi)
511; CHECK-AVX512-NEXT: .LBB5_2: # %if.end
512; CHECK-AVX512-NEXT: movq (%rdi), %rax
513; CHECK-AVX512-NEXT: movq %rax, (%rsi)
514; CHECK-AVX512-NEXT: movb 8(%rdi), %al
515; CHECK-AVX512-NEXT: movb %al, 8(%rsi)
516; CHECK-AVX512-NEXT: movl 9(%rdi), %eax
517; CHECK-AVX512-NEXT: movl %eax, 9(%rsi)
518; CHECK-AVX512-NEXT: movzwl 13(%rdi), %eax
519; CHECK-AVX512-NEXT: movw %ax, 13(%rsi)
520; CHECK-AVX512-NEXT: movb 15(%rdi), %al
521; CHECK-AVX512-NEXT: movb %al, 15(%rsi)
522; CHECK-AVX512-NEXT: retq
523entry:
524 %cmp = icmp sgt i32 %x, 17
525 br i1 %cmp, label %if.then, label %if.end
526
527if.then: ; preds = %entry
528 %conv = sext i32 %x to i64
529 %a = getelementptr inbounds %struct.S3, %struct.S3* %s1, i64 0, i32 0
530 store i64 %conv, i64* %a, align 8
531 %conv1 = trunc i32 %x to i8
532 %b = getelementptr inbounds %struct.S3, %struct.S3* %s1, i64 0, i32 1
533 store i8 %conv1, i8* %b, align 8
534 br label %if.end
535
536if.end: ; preds = %if.then, %entry
537 %0 = bitcast %struct.S3* %s2 to i8*
538 %1 = bitcast %struct.S3* %s1 to i8*
539 tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %0, i8* %1, i64 16, i32 8, i1 false)
540 ret void
541}
542%struct.S4 = type { i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 }
543
544; Function Attrs: nounwind uwtable
545define void @test_multiple_blocks(%struct.S4* nocapture noalias %s1, %struct.S4* nocapture %s2) local_unnamed_addr #0 {
546; CHECK-LABEL: test_multiple_blocks:
547; CHECK: # %bb.0: # %entry
548; CHECK-NEXT: movl $0, 4(%rdi)
549; CHECK-NEXT: movl $0, 36(%rdi)
550; CHECK-NEXT: movups 16(%rdi), %xmm0
551; CHECK-NEXT: movups %xmm0, 16(%rsi)
552; CHECK-NEXT: movl 32(%rdi), %eax
553; CHECK-NEXT: movl %eax, 32(%rsi)
554; CHECK-NEXT: movl 36(%rdi), %eax
555; CHECK-NEXT: movl %eax, 36(%rsi)
556; CHECK-NEXT: movq 40(%rdi), %rax
557; CHECK-NEXT: movq %rax, 40(%rsi)
558; CHECK-NEXT: movl (%rdi), %eax
559; CHECK-NEXT: movl %eax, (%rsi)
560; CHECK-NEXT: movl 4(%rdi), %eax
561; CHECK-NEXT: movl %eax, 4(%rsi)
562; CHECK-NEXT: movq 8(%rdi), %rax
563; CHECK-NEXT: movq %rax, 8(%rsi)
564; CHECK-NEXT: retq
565;
566; DISABLED-LABEL: test_multiple_blocks:
567; DISABLED: # %bb.0: # %entry
568; DISABLED-NEXT: movl $0, 4(%rdi)
569; DISABLED-NEXT: movl $0, 36(%rdi)
570; DISABLED-NEXT: movups 16(%rdi), %xmm0
571; DISABLED-NEXT: movups %xmm0, 16(%rsi)
572; DISABLED-NEXT: movups 32(%rdi), %xmm0
573; DISABLED-NEXT: movups %xmm0, 32(%rsi)
574; DISABLED-NEXT: movups (%rdi), %xmm0
575; DISABLED-NEXT: movups %xmm0, (%rsi)
576; DISABLED-NEXT: retq
577;
578; CHECK-AVX2-LABEL: test_multiple_blocks:
579; CHECK-AVX2: # %bb.0: # %entry
580; CHECK-AVX2-NEXT: movl $0, 4(%rdi)
581; CHECK-AVX2-NEXT: movl $0, 36(%rdi)
582; CHECK-AVX2-NEXT: vmovups 16(%rdi), %xmm0
583; CHECK-AVX2-NEXT: vmovups %xmm0, 16(%rsi)
584; CHECK-AVX2-NEXT: movl 32(%rdi), %eax
585; CHECK-AVX2-NEXT: movl %eax, 32(%rsi)
586; CHECK-AVX2-NEXT: movl 36(%rdi), %eax
587; CHECK-AVX2-NEXT: movl %eax, 36(%rsi)
588; CHECK-AVX2-NEXT: movq 40(%rdi), %rax
589; CHECK-AVX2-NEXT: movq %rax, 40(%rsi)
590; CHECK-AVX2-NEXT: movl (%rdi), %eax
591; CHECK-AVX2-NEXT: movl %eax, (%rsi)
592; CHECK-AVX2-NEXT: movl 4(%rdi), %eax
593; CHECK-AVX2-NEXT: movl %eax, 4(%rsi)
594; CHECK-AVX2-NEXT: vmovups 8(%rdi), %xmm0
595; CHECK-AVX2-NEXT: vmovups %xmm0, 8(%rsi)
596; CHECK-AVX2-NEXT: movq 24(%rdi), %rax
597; CHECK-AVX2-NEXT: movq %rax, 24(%rsi)
598; CHECK-AVX2-NEXT: retq
599;
600; CHECK-AVX512-LABEL: test_multiple_blocks:
601; CHECK-AVX512: # %bb.0: # %entry
602; CHECK-AVX512-NEXT: movl $0, 4(%rdi)
603; CHECK-AVX512-NEXT: movl $0, 36(%rdi)
604; CHECK-AVX512-NEXT: vmovups 16(%rdi), %xmm0
605; CHECK-AVX512-NEXT: vmovups %xmm0, 16(%rsi)
606; CHECK-AVX512-NEXT: movl 32(%rdi), %eax
607; CHECK-AVX512-NEXT: movl %eax, 32(%rsi)
608; CHECK-AVX512-NEXT: movl 36(%rdi), %eax
609; CHECK-AVX512-NEXT: movl %eax, 36(%rsi)
610; CHECK-AVX512-NEXT: movq 40(%rdi), %rax
611; CHECK-AVX512-NEXT: movq %rax, 40(%rsi)
612; CHECK-AVX512-NEXT: movl (%rdi), %eax
613; CHECK-AVX512-NEXT: movl %eax, (%rsi)
614; CHECK-AVX512-NEXT: movl 4(%rdi), %eax
615; CHECK-AVX512-NEXT: movl %eax, 4(%rsi)
616; CHECK-AVX512-NEXT: vmovups 8(%rdi), %xmm0
617; CHECK-AVX512-NEXT: vmovups %xmm0, 8(%rsi)
618; CHECK-AVX512-NEXT: movq 24(%rdi), %rax
619; CHECK-AVX512-NEXT: movq %rax, 24(%rsi)
620; CHECK-AVX512-NEXT: retq
621entry:
622 %b = getelementptr inbounds %struct.S4, %struct.S4* %s1, i64 0, i32 1
623 store i32 0, i32* %b, align 4
624 %b3 = getelementptr inbounds %struct.S4, %struct.S4* %s1, i64 0, i32 9
625 store i32 0, i32* %b3, align 4
626 %0 = bitcast %struct.S4* %s2 to i8*
627 %1 = bitcast %struct.S4* %s1 to i8*
628 tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %0, i8* %1, i64 48, i32 4, i1 false)
629 ret void
630}
631%struct.S5 = type { i16, i16, i16, i16, i16, i16, i16, i16 }
632
633; Function Attrs: nounwind uwtable
634define void @test_type16(%struct.S5* nocapture noalias %s1, %struct.S5* nocapture %s2, i32 %x, %struct.S5* nocapture %s3, %struct.S5* nocapture readonly %s4) local_unnamed_addr #0 {
635; CHECK-LABEL: test_type16:
636; CHECK: # %bb.0: # %entry
637; CHECK-NEXT: cmpl $18, %edx
638; CHECK-NEXT: jl .LBB7_2
639; CHECK-NEXT: # %bb.1: # %if.then
640; CHECK-NEXT: movw %dx, 2(%rdi)
641; CHECK-NEXT: .LBB7_2: # %if.end
642; CHECK-NEXT: movups (%r8), %xmm0
643; CHECK-NEXT: movups %xmm0, (%rcx)
644; CHECK-NEXT: movzwl (%rdi), %eax
645; CHECK-NEXT: movw %ax, (%rsi)
646; CHECK-NEXT: movzwl 2(%rdi), %eax
647; CHECK-NEXT: movw %ax, 2(%rsi)
648; CHECK-NEXT: movq 4(%rdi), %rax
649; CHECK-NEXT: movq %rax, 4(%rsi)
650; CHECK-NEXT: movl 12(%rdi), %eax
651; CHECK-NEXT: movl %eax, 12(%rsi)
652; CHECK-NEXT: retq
653;
654; DISABLED-LABEL: test_type16:
655; DISABLED: # %bb.0: # %entry
656; DISABLED-NEXT: cmpl $18, %edx
657; DISABLED-NEXT: jl .LBB7_2
658; DISABLED-NEXT: # %bb.1: # %if.then
659; DISABLED-NEXT: movw %dx, 2(%rdi)
660; DISABLED-NEXT: .LBB7_2: # %if.end
661; DISABLED-NEXT: movups (%r8), %xmm0
662; DISABLED-NEXT: movups %xmm0, (%rcx)
663; DISABLED-NEXT: movups (%rdi), %xmm0
664; DISABLED-NEXT: movups %xmm0, (%rsi)
665; DISABLED-NEXT: retq
666;
667; CHECK-AVX2-LABEL: test_type16:
668; CHECK-AVX2: # %bb.0: # %entry
669; CHECK-AVX2-NEXT: cmpl $18, %edx
670; CHECK-AVX2-NEXT: jl .LBB7_2
671; CHECK-AVX2-NEXT: # %bb.1: # %if.then
672; CHECK-AVX2-NEXT: movw %dx, 2(%rdi)
673; CHECK-AVX2-NEXT: .LBB7_2: # %if.end
674; CHECK-AVX2-NEXT: vmovups (%r8), %xmm0
675; CHECK-AVX2-NEXT: vmovups %xmm0, (%rcx)
676; CHECK-AVX2-NEXT: movzwl (%rdi), %eax
677; CHECK-AVX2-NEXT: movw %ax, (%rsi)
678; CHECK-AVX2-NEXT: movzwl 2(%rdi), %eax
679; CHECK-AVX2-NEXT: movw %ax, 2(%rsi)
680; CHECK-AVX2-NEXT: movq 4(%rdi), %rax
681; CHECK-AVX2-NEXT: movq %rax, 4(%rsi)
682; CHECK-AVX2-NEXT: movl 12(%rdi), %eax
683; CHECK-AVX2-NEXT: movl %eax, 12(%rsi)
684; CHECK-AVX2-NEXT: retq
685;
686; CHECK-AVX512-LABEL: test_type16:
687; CHECK-AVX512: # %bb.0: # %entry
688; CHECK-AVX512-NEXT: cmpl $18, %edx
689; CHECK-AVX512-NEXT: jl .LBB7_2
690; CHECK-AVX512-NEXT: # %bb.1: # %if.then
691; CHECK-AVX512-NEXT: movw %dx, 2(%rdi)
692; CHECK-AVX512-NEXT: .LBB7_2: # %if.end
693; CHECK-AVX512-NEXT: vmovups (%r8), %xmm0
694; CHECK-AVX512-NEXT: vmovups %xmm0, (%rcx)
695; CHECK-AVX512-NEXT: movzwl (%rdi), %eax
696; CHECK-AVX512-NEXT: movw %ax, (%rsi)
697; CHECK-AVX512-NEXT: movzwl 2(%rdi), %eax
698; CHECK-AVX512-NEXT: movw %ax, 2(%rsi)
699; CHECK-AVX512-NEXT: movq 4(%rdi), %rax
700; CHECK-AVX512-NEXT: movq %rax, 4(%rsi)
701; CHECK-AVX512-NEXT: movl 12(%rdi), %eax
702; CHECK-AVX512-NEXT: movl %eax, 12(%rsi)
703; CHECK-AVX512-NEXT: retq
704entry:
705 %cmp = icmp sgt i32 %x, 17
706 br i1 %cmp, label %if.then, label %if.end
707
708if.then: ; preds = %entry
709 %conv = trunc i32 %x to i16
710 %b = getelementptr inbounds %struct.S5, %struct.S5* %s1, i64 0, i32 1
711 store i16 %conv, i16* %b, align 2
712 br label %if.end
713
714if.end: ; preds = %if.then, %entry
715 %0 = bitcast %struct.S5* %s3 to i8*
716 %1 = bitcast %struct.S5* %s4 to i8*
717 tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %0, i8* %1, i64 16, i32 2, i1 false)
718 %2 = bitcast %struct.S5* %s2 to i8*
719 %3 = bitcast %struct.S5* %s1 to i8*
720 tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %2, i8* %3, i64 16, i32 2, i1 false)
721 ret void
722}
723
724%struct.S6 = type { [4 x i32], i32, i32, i32, i32 }
725
726; Function Attrs: nounwind uwtable
727define void @test_stack(%struct.S6* noalias nocapture sret %agg.result, %struct.S6* byval nocapture readnone align 8 %s1, %struct.S6* byval nocapture align 8 %s2, i32 %x) local_unnamed_addr #0 {
728; CHECK-LABEL: test_stack:
729; CHECK: # %bb.0: # %entry
730; CHECK-NEXT: movl %esi, {{[0-9]+}}(%rsp)
731; CHECK-NEXT: movaps {{[0-9]+}}(%rsp), %xmm0
732; CHECK-NEXT: movups %xmm0, (%rdi)
733; CHECK-NEXT: movq {{[0-9]+}}(%rsp), %rax
734; CHECK-NEXT: movq %rax, 16(%rdi)
735; CHECK-NEXT: movl {{[0-9]+}}(%rsp), %eax
736; CHECK-NEXT: movl %eax, 24(%rdi)
737; CHECK-NEXT: movl {{[0-9]+}}(%rsp), %eax
738; CHECK-NEXT: movl %eax, 28(%rdi)
739; CHECK-NEXT: movaps {{[0-9]+}}(%rsp), %xmm0
740; CHECK-NEXT: movq {{[0-9]+}}(%rsp), %rax
741; CHECK-NEXT: movl {{[0-9]+}}(%rsp), %ecx
742; CHECK-NEXT: movl {{[0-9]+}}(%rsp), %edx
743; CHECK-NEXT: movaps %xmm0, {{[0-9]+}}(%rsp)
744; CHECK-NEXT: movq %rax, {{[0-9]+}}(%rsp)
745; CHECK-NEXT: movl %ecx, {{[0-9]+}}(%rsp)
746; CHECK-NEXT: movl %edx, {{[0-9]+}}(%rsp)
747; CHECK-NEXT: movq %rdi, %rax
748; CHECK-NEXT: retq
749;
750; DISABLED-LABEL: test_stack:
751; DISABLED: # %bb.0: # %entry
752; DISABLED-NEXT: movl %esi, {{[0-9]+}}(%rsp)
753; DISABLED-NEXT: movaps {{[0-9]+}}(%rsp), %xmm0
754; DISABLED-NEXT: movups %xmm0, (%rdi)
755; DISABLED-NEXT: movaps {{[0-9]+}}(%rsp), %xmm0
756; DISABLED-NEXT: movups %xmm0, 16(%rdi)
757; DISABLED-NEXT: movaps {{[0-9]+}}(%rsp), %xmm0
758; DISABLED-NEXT: movaps {{[0-9]+}}(%rsp), %xmm1
759; DISABLED-NEXT: movaps %xmm0, {{[0-9]+}}(%rsp)
760; DISABLED-NEXT: movaps %xmm1, {{[0-9]+}}(%rsp)
761; DISABLED-NEXT: movq %rdi, %rax
762; DISABLED-NEXT: retq
763;
764; CHECK-AVX2-LABEL: test_stack:
765; CHECK-AVX2: # %bb.0: # %entry
766; CHECK-AVX2-NEXT: movl %esi, {{[0-9]+}}(%rsp)
767; CHECK-AVX2-NEXT: vmovups {{[0-9]+}}(%rsp), %xmm0
768; CHECK-AVX2-NEXT: vmovups %xmm0, (%rdi)
769; CHECK-AVX2-NEXT: movq {{[0-9]+}}(%rsp), %rax
770; CHECK-AVX2-NEXT: movq %rax, 16(%rdi)
771; CHECK-AVX2-NEXT: movl {{[0-9]+}}(%rsp), %eax
772; CHECK-AVX2-NEXT: movl %eax, 24(%rdi)
773; CHECK-AVX2-NEXT: movl {{[0-9]+}}(%rsp), %eax
774; CHECK-AVX2-NEXT: movl %eax, 28(%rdi)
775; CHECK-AVX2-NEXT: vmovups {{[0-9]+}}(%rsp), %xmm0
776; CHECK-AVX2-NEXT: vmovups %xmm0, {{[0-9]+}}(%rsp)
777; CHECK-AVX2-NEXT: movq {{[0-9]+}}(%rsp), %rax
778; CHECK-AVX2-NEXT: movq %rax, {{[0-9]+}}(%rsp)
779; CHECK-AVX2-NEXT: movl {{[0-9]+}}(%rsp), %eax
780; CHECK-AVX2-NEXT: movl %eax, {{[0-9]+}}(%rsp)
781; CHECK-AVX2-NEXT: movl {{[0-9]+}}(%rsp), %eax
782; CHECK-AVX2-NEXT: movl %eax, {{[0-9]+}}(%rsp)
783; CHECK-AVX2-NEXT: movq %rdi, %rax
784; CHECK-AVX2-NEXT: retq
785;
786; CHECK-AVX512-LABEL: test_stack:
787; CHECK-AVX512: # %bb.0: # %entry
788; CHECK-AVX512-NEXT: movl %esi, {{[0-9]+}}(%rsp)
789; CHECK-AVX512-NEXT: vmovups {{[0-9]+}}(%rsp), %xmm0
790; CHECK-AVX512-NEXT: vmovups %xmm0, (%rdi)
791; CHECK-AVX512-NEXT: movq {{[0-9]+}}(%rsp), %rax
792; CHECK-AVX512-NEXT: movq %rax, 16(%rdi)
793; CHECK-AVX512-NEXT: movl {{[0-9]+}}(%rsp), %eax
794; CHECK-AVX512-NEXT: movl %eax, 24(%rdi)
795; CHECK-AVX512-NEXT: movl {{[0-9]+}}(%rsp), %eax
796; CHECK-AVX512-NEXT: movl %eax, 28(%rdi)
797; CHECK-AVX512-NEXT: vmovups {{[0-9]+}}(%rsp), %xmm0
798; CHECK-AVX512-NEXT: vmovups %xmm0, {{[0-9]+}}(%rsp)
799; CHECK-AVX512-NEXT: movq {{[0-9]+}}(%rsp), %rax
800; CHECK-AVX512-NEXT: movq %rax, {{[0-9]+}}(%rsp)
801; CHECK-AVX512-NEXT: movl {{[0-9]+}}(%rsp), %eax
802; CHECK-AVX512-NEXT: movl %eax, {{[0-9]+}}(%rsp)
803; CHECK-AVX512-NEXT: movl {{[0-9]+}}(%rsp), %eax
804; CHECK-AVX512-NEXT: movl %eax, {{[0-9]+}}(%rsp)
805; CHECK-AVX512-NEXT: movq %rdi, %rax
806; CHECK-AVX512-NEXT: retq
807entry:
808 %s6.sroa.0.0..sroa_cast1 = bitcast %struct.S6* %s2 to i8*
809 %s6.sroa.3.0..sroa_idx4 = getelementptr inbounds %struct.S6, %struct.S6* %s2, i64 0, i32 3
810 store i32 %x, i32* %s6.sroa.3.0..sroa_idx4, align 8
811 %0 = bitcast %struct.S6* %agg.result to i8*
812 %s6.sroa.0.0..sroa_cast2 = bitcast %struct.S6* %s1 to i8*
813 call void @llvm.memcpy.p0i8.p0i8.i64(i8* %0, i8* nonnull %s6.sroa.0.0..sroa_cast1, i64 32, i32 4, i1 false)
814 call void @llvm.memcpy.p0i8.p0i8.i64(i8* nonnull %s6.sroa.0.0..sroa_cast2, i8* nonnull %s6.sroa.0.0..sroa_cast1, i64 32, i32 4, i1 false)
815
816 ret void
817}
818
819; Function Attrs: nounwind uwtable
820define void @test_limit_all(%struct.S* noalias %s1, %struct.S* nocapture %s2, i32 %x, %struct.S* nocapture %s3, %struct.S* nocapture readonly %s4, i32 %x2) local_unnamed_addr #0 {
821; CHECK-LABEL: test_limit_all:
822; CHECK: # %bb.0: # %entry
823; CHECK-NEXT: pushq %rbp
824; CHECK-NEXT: .cfi_def_cfa_offset 16
825; CHECK-NEXT: pushq %r15
826; CHECK-NEXT: .cfi_def_cfa_offset 24
827; CHECK-NEXT: pushq %r14
828; CHECK-NEXT: .cfi_def_cfa_offset 32
829; CHECK-NEXT: pushq %r12
830; CHECK-NEXT: .cfi_def_cfa_offset 40
831; CHECK-NEXT: pushq %rbx
832; CHECK-NEXT: .cfi_def_cfa_offset 48
833; CHECK-NEXT: .cfi_offset %rbx, -48
834; CHECK-NEXT: .cfi_offset %r12, -40
835; CHECK-NEXT: .cfi_offset %r14, -32
836; CHECK-NEXT: .cfi_offset %r15, -24
837; CHECK-NEXT: .cfi_offset %rbp, -16
838; CHECK-NEXT: movq %r8, %r15
839; CHECK-NEXT: movq %rcx, %r14
840; CHECK-NEXT: movl %edx, %ebp
841; CHECK-NEXT: movq %rsi, %r12
842; CHECK-NEXT: movq %rdi, %rbx
843; CHECK-NEXT: movl %r9d, 12(%rdi)
844; CHECK-NEXT: callq bar
845; CHECK-NEXT: cmpl $18, %ebp
846; CHECK-NEXT: jl .LBB9_2
847; CHECK-NEXT: # %bb.1: # %if.then
848; CHECK-NEXT: movl %ebp, 4(%rbx)
849; CHECK-NEXT: movq %rbx, %rdi
850; CHECK-NEXT: callq bar
851; CHECK-NEXT: .LBB9_2: # %if.end
852; CHECK-NEXT: movups (%r15), %xmm0
853; CHECK-NEXT: movups %xmm0, (%r14)
854; CHECK-NEXT: movups (%rbx), %xmm0
855; CHECK-NEXT: movups %xmm0, (%r12)
856; CHECK-NEXT: popq %rbx
857; CHECK-NEXT: popq %r12
858; CHECK-NEXT: popq %r14
859; CHECK-NEXT: popq %r15
860; CHECK-NEXT: popq %rbp
861; CHECK-NEXT: retq
862;
863; DISABLED-LABEL: test_limit_all:
864; DISABLED: # %bb.0: # %entry
865; DISABLED-NEXT: pushq %rbp
866; DISABLED-NEXT: .cfi_def_cfa_offset 16
867; DISABLED-NEXT: pushq %r15
868; DISABLED-NEXT: .cfi_def_cfa_offset 24
869; DISABLED-NEXT: pushq %r14
870; DISABLED-NEXT: .cfi_def_cfa_offset 32
871; DISABLED-NEXT: pushq %r12
872; DISABLED-NEXT: .cfi_def_cfa_offset 40
873; DISABLED-NEXT: pushq %rbx
874; DISABLED-NEXT: .cfi_def_cfa_offset 48
875; DISABLED-NEXT: .cfi_offset %rbx, -48
876; DISABLED-NEXT: .cfi_offset %r12, -40
877; DISABLED-NEXT: .cfi_offset %r14, -32
878; DISABLED-NEXT: .cfi_offset %r15, -24
879; DISABLED-NEXT: .cfi_offset %rbp, -16
880; DISABLED-NEXT: movq %r8, %r15
881; DISABLED-NEXT: movq %rcx, %r14
882; DISABLED-NEXT: movl %edx, %ebp
883; DISABLED-NEXT: movq %rsi, %r12
884; DISABLED-NEXT: movq %rdi, %rbx
885; DISABLED-NEXT: movl %r9d, 12(%rdi)
886; DISABLED-NEXT: callq bar
887; DISABLED-NEXT: cmpl $18, %ebp
888; DISABLED-NEXT: jl .LBB9_2
889; DISABLED-NEXT: # %bb.1: # %if.then
890; DISABLED-NEXT: movl %ebp, 4(%rbx)
891; DISABLED-NEXT: movq %rbx, %rdi
892; DISABLED-NEXT: callq bar
893; DISABLED-NEXT: .LBB9_2: # %if.end
894; DISABLED-NEXT: movups (%r15), %xmm0
895; DISABLED-NEXT: movups %xmm0, (%r14)
896; DISABLED-NEXT: movups (%rbx), %xmm0
897; DISABLED-NEXT: movups %xmm0, (%r12)
898; DISABLED-NEXT: popq %rbx
899; DISABLED-NEXT: popq %r12
900; DISABLED-NEXT: popq %r14
901; DISABLED-NEXT: popq %r15
902; DISABLED-NEXT: popq %rbp
903; DISABLED-NEXT: retq
904;
905; CHECK-AVX2-LABEL: test_limit_all:
906; CHECK-AVX2: # %bb.0: # %entry
907; CHECK-AVX2-NEXT: pushq %rbp
908; CHECK-AVX2-NEXT: .cfi_def_cfa_offset 16
909; CHECK-AVX2-NEXT: pushq %r15
910; CHECK-AVX2-NEXT: .cfi_def_cfa_offset 24
911; CHECK-AVX2-NEXT: pushq %r14
912; CHECK-AVX2-NEXT: .cfi_def_cfa_offset 32
913; CHECK-AVX2-NEXT: pushq %r12
914; CHECK-AVX2-NEXT: .cfi_def_cfa_offset 40
915; CHECK-AVX2-NEXT: pushq %rbx
916; CHECK-AVX2-NEXT: .cfi_def_cfa_offset 48
917; CHECK-AVX2-NEXT: .cfi_offset %rbx, -48
918; CHECK-AVX2-NEXT: .cfi_offset %r12, -40
919; CHECK-AVX2-NEXT: .cfi_offset %r14, -32
920; CHECK-AVX2-NEXT: .cfi_offset %r15, -24
921; CHECK-AVX2-NEXT: .cfi_offset %rbp, -16
922; CHECK-AVX2-NEXT: movq %r8, %r15
923; CHECK-AVX2-NEXT: movq %rcx, %r14
924; CHECK-AVX2-NEXT: movl %edx, %ebp
925; CHECK-AVX2-NEXT: movq %rsi, %r12
926; CHECK-AVX2-NEXT: movq %rdi, %rbx
927; CHECK-AVX2-NEXT: movl %r9d, 12(%rdi)
928; CHECK-AVX2-NEXT: callq bar
929; CHECK-AVX2-NEXT: cmpl $18, %ebp
930; CHECK-AVX2-NEXT: jl .LBB9_2
931; CHECK-AVX2-NEXT: # %bb.1: # %if.then
932; CHECK-AVX2-NEXT: movl %ebp, 4(%rbx)
933; CHECK-AVX2-NEXT: movq %rbx, %rdi
934; CHECK-AVX2-NEXT: callq bar
935; CHECK-AVX2-NEXT: .LBB9_2: # %if.end
936; CHECK-AVX2-NEXT: vmovups (%r15), %xmm0
937; CHECK-AVX2-NEXT: vmovups %xmm0, (%r14)
938; CHECK-AVX2-NEXT: vmovups (%rbx), %xmm0
939; CHECK-AVX2-NEXT: vmovups %xmm0, (%r12)
940; CHECK-AVX2-NEXT: popq %rbx
941; CHECK-AVX2-NEXT: popq %r12
942; CHECK-AVX2-NEXT: popq %r14
943; CHECK-AVX2-NEXT: popq %r15
944; CHECK-AVX2-NEXT: popq %rbp
945; CHECK-AVX2-NEXT: retq
946;
947; CHECK-AVX512-LABEL: test_limit_all:
948; CHECK-AVX512: # %bb.0: # %entry
949; CHECK-AVX512-NEXT: pushq %rbp
950; CHECK-AVX512-NEXT: .cfi_def_cfa_offset 16
951; CHECK-AVX512-NEXT: pushq %r15
952; CHECK-AVX512-NEXT: .cfi_def_cfa_offset 24
953; CHECK-AVX512-NEXT: pushq %r14
954; CHECK-AVX512-NEXT: .cfi_def_cfa_offset 32
955; CHECK-AVX512-NEXT: pushq %r12
956; CHECK-AVX512-NEXT: .cfi_def_cfa_offset 40
957; CHECK-AVX512-NEXT: pushq %rbx
958; CHECK-AVX512-NEXT: .cfi_def_cfa_offset 48
959; CHECK-AVX512-NEXT: .cfi_offset %rbx, -48
960; CHECK-AVX512-NEXT: .cfi_offset %r12, -40
961; CHECK-AVX512-NEXT: .cfi_offset %r14, -32
962; CHECK-AVX512-NEXT: .cfi_offset %r15, -24
963; CHECK-AVX512-NEXT: .cfi_offset %rbp, -16
964; CHECK-AVX512-NEXT: movq %r8, %r15
965; CHECK-AVX512-NEXT: movq %rcx, %r14
966; CHECK-AVX512-NEXT: movl %edx, %ebp
967; CHECK-AVX512-NEXT: movq %rsi, %r12
968; CHECK-AVX512-NEXT: movq %rdi, %rbx
969; CHECK-AVX512-NEXT: movl %r9d, 12(%rdi)
970; CHECK-AVX512-NEXT: callq bar
971; CHECK-AVX512-NEXT: cmpl $18, %ebp
972; CHECK-AVX512-NEXT: jl .LBB9_2
973; CHECK-AVX512-NEXT: # %bb.1: # %if.then
974; CHECK-AVX512-NEXT: movl %ebp, 4(%rbx)
975; CHECK-AVX512-NEXT: movq %rbx, %rdi
976; CHECK-AVX512-NEXT: callq bar
977; CHECK-AVX512-NEXT: .LBB9_2: # %if.end
978; CHECK-AVX512-NEXT: vmovups (%r15), %xmm0
979; CHECK-AVX512-NEXT: vmovups %xmm0, (%r14)
980; CHECK-AVX512-NEXT: vmovups (%rbx), %xmm0
981; CHECK-AVX512-NEXT: vmovups %xmm0, (%r12)
982; CHECK-AVX512-NEXT: popq %rbx
983; CHECK-AVX512-NEXT: popq %r12
984; CHECK-AVX512-NEXT: popq %r14
985; CHECK-AVX512-NEXT: popq %r15
986; CHECK-AVX512-NEXT: popq %rbp
987; CHECK-AVX512-NEXT: retq
988entry:
989 %d = getelementptr inbounds %struct.S, %struct.S* %s1, i64 0, i32 3
990 store i32 %x2, i32* %d, align 4
991 tail call void @bar(%struct.S* %s1) #3
992 %cmp = icmp sgt i32 %x, 17
993 br i1 %cmp, label %if.then, label %if.end
994
995if.then: ; preds = %entry
996 %b = getelementptr inbounds %struct.S, %struct.S* %s1, i64 0, i32 1
997 store i32 %x, i32* %b, align 4
998 tail call void @bar(%struct.S* nonnull %s1) #3
999 br label %if.end
1000
1001if.end: ; preds = %if.then, %entry
1002 %0 = bitcast %struct.S* %s3 to i8*
1003 %1 = bitcast %struct.S* %s4 to i8*
1004 tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %0, i8* %1, i64 16, i32 4, i1 false)
1005 %2 = bitcast %struct.S* %s2 to i8*
1006 %3 = bitcast %struct.S* %s1 to i8*
1007 tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %2, i8* %3, i64 16, i32 4, i1 false)
1008 ret void
1009}
1010
1011; Function Attrs: nounwind uwtable
1012define void @test_limit_one_pred(%struct.S* noalias %s1, %struct.S* nocapture %s2, i32 %x, %struct.S* nocapture %s3, %struct.S* nocapture readonly %s4, i32 %x2) local_unnamed_addr #0 {
1013; CHECK-LABEL: test_limit_one_pred:
1014; CHECK: # %bb.0: # %entry
1015; CHECK-NEXT: pushq %r15
1016; CHECK-NEXT: .cfi_def_cfa_offset 16
1017; CHECK-NEXT: pushq %r14
1018; CHECK-NEXT: .cfi_def_cfa_offset 24
1019; CHECK-NEXT: pushq %r12
1020; CHECK-NEXT: .cfi_def_cfa_offset 32
1021; CHECK-NEXT: pushq %rbx
1022; CHECK-NEXT: .cfi_def_cfa_offset 40
1023; CHECK-NEXT: pushq %rax
1024; CHECK-NEXT: .cfi_def_cfa_offset 48
1025; CHECK-NEXT: .cfi_offset %rbx, -40
1026; CHECK-NEXT: .cfi_offset %r12, -32
1027; CHECK-NEXT: .cfi_offset %r14, -24
1028; CHECK-NEXT: .cfi_offset %r15, -16
1029; CHECK-NEXT: movq %r8, %r12
1030; CHECK-NEXT: movq %rcx, %r15
1031; CHECK-NEXT: movq %rsi, %r14
1032; CHECK-NEXT: movq %rdi, %rbx
1033; CHECK-NEXT: movl %r9d, 12(%rdi)
1034; CHECK-NEXT: cmpl $18, %edx
1035; CHECK-NEXT: jl .LBB10_2
1036; CHECK-NEXT: # %bb.1: # %if.then
1037; CHECK-NEXT: movl %edx, 4(%rbx)
1038; CHECK-NEXT: movq %rbx, %rdi
1039; CHECK-NEXT: callq bar
1040; CHECK-NEXT: .LBB10_2: # %if.end
1041; CHECK-NEXT: movups (%r12), %xmm0
1042; CHECK-NEXT: movups %xmm0, (%r15)
1043; CHECK-NEXT: movq (%rbx), %rax
1044; CHECK-NEXT: movq %rax, (%r14)
1045; CHECK-NEXT: movl 8(%rbx), %eax
1046; CHECK-NEXT: movl %eax, 8(%r14)
1047; CHECK-NEXT: movl 12(%rbx), %eax
1048; CHECK-NEXT: movl %eax, 12(%r14)
1049; CHECK-NEXT: addq $8, %rsp
1050; CHECK-NEXT: popq %rbx
1051; CHECK-NEXT: popq %r12
1052; CHECK-NEXT: popq %r14
1053; CHECK-NEXT: popq %r15
1054; CHECK-NEXT: retq
1055;
1056; DISABLED-LABEL: test_limit_one_pred:
1057; DISABLED: # %bb.0: # %entry
1058; DISABLED-NEXT: pushq %r15
1059; DISABLED-NEXT: .cfi_def_cfa_offset 16
1060; DISABLED-NEXT: pushq %r14
1061; DISABLED-NEXT: .cfi_def_cfa_offset 24
1062; DISABLED-NEXT: pushq %r12
1063; DISABLED-NEXT: .cfi_def_cfa_offset 32
1064; DISABLED-NEXT: pushq %rbx
1065; DISABLED-NEXT: .cfi_def_cfa_offset 40
1066; DISABLED-NEXT: pushq %rax
1067; DISABLED-NEXT: .cfi_def_cfa_offset 48
1068; DISABLED-NEXT: .cfi_offset %rbx, -40
1069; DISABLED-NEXT: .cfi_offset %r12, -32
1070; DISABLED-NEXT: .cfi_offset %r14, -24
1071; DISABLED-NEXT: .cfi_offset %r15, -16
1072; DISABLED-NEXT: movq %r8, %r15
1073; DISABLED-NEXT: movq %rcx, %r14
1074; DISABLED-NEXT: movq %rsi, %r12
1075; DISABLED-NEXT: movq %rdi, %rbx
1076; DISABLED-NEXT: movl %r9d, 12(%rdi)
1077; DISABLED-NEXT: cmpl $18, %edx
1078; DISABLED-NEXT: jl .LBB10_2
1079; DISABLED-NEXT: # %bb.1: # %if.then
1080; DISABLED-NEXT: movl %edx, 4(%rbx)
1081; DISABLED-NEXT: movq %rbx, %rdi
1082; DISABLED-NEXT: callq bar
1083; DISABLED-NEXT: .LBB10_2: # %if.end
1084; DISABLED-NEXT: movups (%r15), %xmm0
1085; DISABLED-NEXT: movups %xmm0, (%r14)
1086; DISABLED-NEXT: movups (%rbx), %xmm0
1087; DISABLED-NEXT: movups %xmm0, (%r12)
1088; DISABLED-NEXT: addq $8, %rsp
1089; DISABLED-NEXT: popq %rbx
1090; DISABLED-NEXT: popq %r12
1091; DISABLED-NEXT: popq %r14
1092; DISABLED-NEXT: popq %r15
1093; DISABLED-NEXT: retq
1094;
1095; CHECK-AVX2-LABEL: test_limit_one_pred:
1096; CHECK-AVX2: # %bb.0: # %entry
1097; CHECK-AVX2-NEXT: pushq %r15
1098; CHECK-AVX2-NEXT: .cfi_def_cfa_offset 16
1099; CHECK-AVX2-NEXT: pushq %r14
1100; CHECK-AVX2-NEXT: .cfi_def_cfa_offset 24
1101; CHECK-AVX2-NEXT: pushq %r12
1102; CHECK-AVX2-NEXT: .cfi_def_cfa_offset 32
1103; CHECK-AVX2-NEXT: pushq %rbx
1104; CHECK-AVX2-NEXT: .cfi_def_cfa_offset 40
1105; CHECK-AVX2-NEXT: pushq %rax
1106; CHECK-AVX2-NEXT: .cfi_def_cfa_offset 48
1107; CHECK-AVX2-NEXT: .cfi_offset %rbx, -40
1108; CHECK-AVX2-NEXT: .cfi_offset %r12, -32
1109; CHECK-AVX2-NEXT: .cfi_offset %r14, -24
1110; CHECK-AVX2-NEXT: .cfi_offset %r15, -16
1111; CHECK-AVX2-NEXT: movq %r8, %r12
1112; CHECK-AVX2-NEXT: movq %rcx, %r15
1113; CHECK-AVX2-NEXT: movq %rsi, %r14
1114; CHECK-AVX2-NEXT: movq %rdi, %rbx
1115; CHECK-AVX2-NEXT: movl %r9d, 12(%rdi)
1116; CHECK-AVX2-NEXT: cmpl $18, %edx
1117; CHECK-AVX2-NEXT: jl .LBB10_2
1118; CHECK-AVX2-NEXT: # %bb.1: # %if.then
1119; CHECK-AVX2-NEXT: movl %edx, 4(%rbx)
1120; CHECK-AVX2-NEXT: movq %rbx, %rdi
1121; CHECK-AVX2-NEXT: callq bar
1122; CHECK-AVX2-NEXT: .LBB10_2: # %if.end
1123; CHECK-AVX2-NEXT: vmovups (%r12), %xmm0
1124; CHECK-AVX2-NEXT: vmovups %xmm0, (%r15)
1125; CHECK-AVX2-NEXT: movq (%rbx), %rax
1126; CHECK-AVX2-NEXT: movq %rax, (%r14)
1127; CHECK-AVX2-NEXT: movl 8(%rbx), %eax
1128; CHECK-AVX2-NEXT: movl %eax, 8(%r14)
1129; CHECK-AVX2-NEXT: movl 12(%rbx), %eax
1130; CHECK-AVX2-NEXT: movl %eax, 12(%r14)
1131; CHECK-AVX2-NEXT: addq $8, %rsp
1132; CHECK-AVX2-NEXT: popq %rbx
1133; CHECK-AVX2-NEXT: popq %r12
1134; CHECK-AVX2-NEXT: popq %r14
1135; CHECK-AVX2-NEXT: popq %r15
1136; CHECK-AVX2-NEXT: retq
1137;
1138; CHECK-AVX512-LABEL: test_limit_one_pred:
1139; CHECK-AVX512: # %bb.0: # %entry
1140; CHECK-AVX512-NEXT: pushq %r15
1141; CHECK-AVX512-NEXT: .cfi_def_cfa_offset 16
1142; CHECK-AVX512-NEXT: pushq %r14
1143; CHECK-AVX512-NEXT: .cfi_def_cfa_offset 24
1144; CHECK-AVX512-NEXT: pushq %r12
1145; CHECK-AVX512-NEXT: .cfi_def_cfa_offset 32
1146; CHECK-AVX512-NEXT: pushq %rbx
1147; CHECK-AVX512-NEXT: .cfi_def_cfa_offset 40
1148; CHECK-AVX512-NEXT: pushq %rax
1149; CHECK-AVX512-NEXT: .cfi_def_cfa_offset 48
1150; CHECK-AVX512-NEXT: .cfi_offset %rbx, -40
1151; CHECK-AVX512-NEXT: .cfi_offset %r12, -32
1152; CHECK-AVX512-NEXT: .cfi_offset %r14, -24
1153; CHECK-AVX512-NEXT: .cfi_offset %r15, -16
1154; CHECK-AVX512-NEXT: movq %r8, %r12
1155; CHECK-AVX512-NEXT: movq %rcx, %r15
1156; CHECK-AVX512-NEXT: movq %rsi, %r14
1157; CHECK-AVX512-NEXT: movq %rdi, %rbx
1158; CHECK-AVX512-NEXT: movl %r9d, 12(%rdi)
1159; CHECK-AVX512-NEXT: cmpl $18, %edx
1160; CHECK-AVX512-NEXT: jl .LBB10_2
1161; CHECK-AVX512-NEXT: # %bb.1: # %if.then
1162; CHECK-AVX512-NEXT: movl %edx, 4(%rbx)
1163; CHECK-AVX512-NEXT: movq %rbx, %rdi
1164; CHECK-AVX512-NEXT: callq bar
1165; CHECK-AVX512-NEXT: .LBB10_2: # %if.end
1166; CHECK-AVX512-NEXT: vmovups (%r12), %xmm0
1167; CHECK-AVX512-NEXT: vmovups %xmm0, (%r15)
1168; CHECK-AVX512-NEXT: movq (%rbx), %rax
1169; CHECK-AVX512-NEXT: movq %rax, (%r14)
1170; CHECK-AVX512-NEXT: movl 8(%rbx), %eax
1171; CHECK-AVX512-NEXT: movl %eax, 8(%r14)
1172; CHECK-AVX512-NEXT: movl 12(%rbx), %eax
1173; CHECK-AVX512-NEXT: movl %eax, 12(%r14)
1174; CHECK-AVX512-NEXT: addq $8, %rsp
1175; CHECK-AVX512-NEXT: popq %rbx
1176; CHECK-AVX512-NEXT: popq %r12
1177; CHECK-AVX512-NEXT: popq %r14
1178; CHECK-AVX512-NEXT: popq %r15
1179; CHECK-AVX512-NEXT: retq
1180entry:
1181 %d = getelementptr inbounds %struct.S, %struct.S* %s1, i64 0, i32 3
1182 store i32 %x2, i32* %d, align 4
1183 %cmp = icmp sgt i32 %x, 17
1184 br i1 %cmp, label %if.then, label %if.end
1185
1186if.then: ; preds = %entry
1187 %b = getelementptr inbounds %struct.S, %struct.S* %s1, i64 0, i32 1
1188 store i32 %x, i32* %b, align 4
1189 tail call void @bar(%struct.S* nonnull %s1) #3
1190 br label %if.end
1191
1192if.end: ; preds = %if.then, %entry
1193 %0 = bitcast %struct.S* %s3 to i8*
1194 %1 = bitcast %struct.S* %s4 to i8*
1195 tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %0, i8* %1, i64 16, i32 4, i1 false)
1196 %2 = bitcast %struct.S* %s2 to i8*
1197 %3 = bitcast %struct.S* %s1 to i8*
1198 tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %2, i8* %3, i64 16, i32 4, i1 false)
1199 ret void
1200}
1201
1202
1203declare void @bar(%struct.S*) local_unnamed_addr #1
1204
1205
1206; Function Attrs: argmemonly nounwind
1207declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture writeonly, i8* nocapture readonly, i64, i32, i1) #1
1208
1209attributes #0 = { nounwind uwtable "target-cpu"="x86-64" }
1210
1211%struct.S7 = type { float, float, float , float, float, float, float, float }
1212
1213; Function Attrs: nounwind uwtable
1214define void @test_conditional_block_float(%struct.S7* nocapture noalias %s1, %struct.S7* nocapture %s2, i32 %x, %struct.S7* nocapture %s3, %struct.S7* nocapture readonly %s4, float %y) local_unnamed_addr #0 {
1215; CHECK-LABEL: test_conditional_block_float:
1216; CHECK: # %bb.0: # %entry
1217; CHECK-NEXT: cmpl $18, %edx
1218; CHECK-NEXT: jl .LBB11_2
1219; CHECK-NEXT: # %bb.1: # %if.then
1220; CHECK-NEXT: movl $1065353216, 4(%rdi) # imm = 0x3F800000
1221; CHECK-NEXT: .LBB11_2: # %if.end
1222; CHECK-NEXT: movups (%r8), %xmm0
1223; CHECK-NEXT: movups 16(%r8), %xmm1
1224; CHECK-NEXT: movups %xmm1, 16(%rcx)
1225; CHECK-NEXT: movups %xmm0, (%rcx)
1226; CHECK-NEXT: movl (%rdi), %eax
1227; CHECK-NEXT: movl 4(%rdi), %ecx
1228; CHECK-NEXT: movq 8(%rdi), %rdx
1229; CHECK-NEXT: movups 16(%rdi), %xmm0
1230; CHECK-NEXT: movups %xmm0, 16(%rsi)
1231; CHECK-NEXT: movl %eax, (%rsi)
1232; CHECK-NEXT: movl %ecx, 4(%rsi)
1233; CHECK-NEXT: movq %rdx, 8(%rsi)
1234; CHECK-NEXT: retq
1235;
1236; DISABLED-LABEL: test_conditional_block_float:
1237; DISABLED: # %bb.0: # %entry
1238; DISABLED-NEXT: cmpl $18, %edx
1239; DISABLED-NEXT: jl .LBB11_2
1240; DISABLED-NEXT: # %bb.1: # %if.then
1241; DISABLED-NEXT: movl $1065353216, 4(%rdi) # imm = 0x3F800000
1242; DISABLED-NEXT: .LBB11_2: # %if.end
1243; DISABLED-NEXT: movups (%r8), %xmm0
1244; DISABLED-NEXT: movups 16(%r8), %xmm1
1245; DISABLED-NEXT: movups %xmm1, 16(%rcx)
1246; DISABLED-NEXT: movups %xmm0, (%rcx)
1247; DISABLED-NEXT: movups (%rdi), %xmm0
1248; DISABLED-NEXT: movups 16(%rdi), %xmm1
1249; DISABLED-NEXT: movups %xmm1, 16(%rsi)
1250; DISABLED-NEXT: movups %xmm0, (%rsi)
1251; DISABLED-NEXT: retq
1252;
1253; CHECK-AVX2-LABEL: test_conditional_block_float:
1254; CHECK-AVX2: # %bb.0: # %entry
1255; CHECK-AVX2-NEXT: cmpl $18, %edx
1256; CHECK-AVX2-NEXT: jl .LBB11_2
1257; CHECK-AVX2-NEXT: # %bb.1: # %if.then
1258; CHECK-AVX2-NEXT: movl $1065353216, 4(%rdi) # imm = 0x3F800000
1259; CHECK-AVX2-NEXT: .LBB11_2: # %if.end
1260; CHECK-AVX2-NEXT: vmovups (%r8), %ymm0
1261; CHECK-AVX2-NEXT: vmovups %ymm0, (%rcx)
1262; CHECK-AVX2-NEXT: movl (%rdi), %eax
1263; CHECK-AVX2-NEXT: movl %eax, (%rsi)
1264; CHECK-AVX2-NEXT: movl 4(%rdi), %eax
1265; CHECK-AVX2-NEXT: movl %eax, 4(%rsi)
1266; CHECK-AVX2-NEXT: vmovups 8(%rdi), %xmm0
1267; CHECK-AVX2-NEXT: vmovups %xmm0, 8(%rsi)
1268; CHECK-AVX2-NEXT: movq 24(%rdi), %rax
1269; CHECK-AVX2-NEXT: movq %rax, 24(%rsi)
1270; CHECK-AVX2-NEXT: vzeroupper
1271; CHECK-AVX2-NEXT: retq
1272;
1273; CHECK-AVX512-LABEL: test_conditional_block_float:
1274; CHECK-AVX512: # %bb.0: # %entry
1275; CHECK-AVX512-NEXT: cmpl $18, %edx
1276; CHECK-AVX512-NEXT: jl .LBB11_2
1277; CHECK-AVX512-NEXT: # %bb.1: # %if.then
1278; CHECK-AVX512-NEXT: movl $1065353216, 4(%rdi) # imm = 0x3F800000
1279; CHECK-AVX512-NEXT: .LBB11_2: # %if.end
1280; CHECK-AVX512-NEXT: vmovups (%r8), %ymm0
1281; CHECK-AVX512-NEXT: vmovups %ymm0, (%rcx)
1282; CHECK-AVX512-NEXT: movl (%rdi), %eax
1283; CHECK-AVX512-NEXT: movl %eax, (%rsi)
1284; CHECK-AVX512-NEXT: movl 4(%rdi), %eax
1285; CHECK-AVX512-NEXT: movl %eax, 4(%rsi)
1286; CHECK-AVX512-NEXT: vmovups 8(%rdi), %xmm0
1287; CHECK-AVX512-NEXT: vmovups %xmm0, 8(%rsi)
1288; CHECK-AVX512-NEXT: movq 24(%rdi), %rax
1289; CHECK-AVX512-NEXT: movq %rax, 24(%rsi)
1290; CHECK-AVX512-NEXT: vzeroupper
1291; CHECK-AVX512-NEXT: retq
1292entry:
1293 %cmp = icmp sgt i32 %x, 17
1294 br i1 %cmp, label %if.then, label %if.end
1295
1296if.then: ; preds = %entry
1297 %b = getelementptr inbounds %struct.S7, %struct.S7* %s1, i64 0, i32 1
1298 store float 1.0, float* %b, align 4
1299 br label %if.end
1300
1301if.end: ; preds = %if.then, %entry
1302 %0 = bitcast %struct.S7* %s3 to i8*
1303 %1 = bitcast %struct.S7* %s4 to i8*
1304 tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %0, i8* %1, i64 32, i32 4, i1 false)
1305 %2 = bitcast %struct.S7* %s2 to i8*
1306 %3 = bitcast %struct.S7* %s1 to i8*
1307 tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %2, i8* %3, i64 32, i32 4, i1 false)
1308 ret void
1309}
1310
1311%struct.S8 = type { i64, i64, i64, i64, i64, i64 }
1312
1313; Function Attrs: nounwind uwtable
1314define void @test_conditional_block_ymm(%struct.S8* nocapture noalias %s1, %struct.S8* nocapture %s2, i32 %x, %struct.S8* nocapture %s3, %struct.S8* nocapture readonly %s4) local_unnamed_addr #0 {
1315; CHECK-LABEL: test_conditional_block_ymm:
1316; CHECK: # %bb.0: # %entry
1317; CHECK-NEXT: cmpl $18, %edx
1318; CHECK-NEXT: jl .LBB12_2
1319; CHECK-NEXT: # %bb.1: # %if.then
1320; CHECK-NEXT: movq $1, 8(%rdi)
1321; CHECK-NEXT: .LBB12_2: # %if.end
1322; CHECK-NEXT: movups (%r8), %xmm0
1323; CHECK-NEXT: movups 16(%r8), %xmm1
1324; CHECK-NEXT: movups %xmm1, 16(%rcx)
1325; CHECK-NEXT: movups %xmm0, (%rcx)
1326; CHECK-NEXT: movq (%rdi), %rax
1327; CHECK-NEXT: movq 8(%rdi), %rcx
1328; CHECK-NEXT: movups 16(%rdi), %xmm0
1329; CHECK-NEXT: movups %xmm0, 16(%rsi)
1330; CHECK-NEXT: movq %rax, (%rsi)
1331; CHECK-NEXT: movq %rcx, 8(%rsi)
1332; CHECK-NEXT: retq
1333;
1334; DISABLED-LABEL: test_conditional_block_ymm:
1335; DISABLED: # %bb.0: # %entry
1336; DISABLED-NEXT: cmpl $18, %edx
1337; DISABLED-NEXT: jl .LBB12_2
1338; DISABLED-NEXT: # %bb.1: # %if.then
1339; DISABLED-NEXT: movq $1, 8(%rdi)
1340; DISABLED-NEXT: .LBB12_2: # %if.end
1341; DISABLED-NEXT: movups (%r8), %xmm0
1342; DISABLED-NEXT: movups 16(%r8), %xmm1
1343; DISABLED-NEXT: movups %xmm1, 16(%rcx)
1344; DISABLED-NEXT: movups %xmm0, (%rcx)
1345; DISABLED-NEXT: movups (%rdi), %xmm0
1346; DISABLED-NEXT: movups 16(%rdi), %xmm1
1347; DISABLED-NEXT: movups %xmm1, 16(%rsi)
1348; DISABLED-NEXT: movups %xmm0, (%rsi)
1349; DISABLED-NEXT: retq
1350;
1351; CHECK-AVX2-LABEL: test_conditional_block_ymm:
1352; CHECK-AVX2: # %bb.0: # %entry
1353; CHECK-AVX2-NEXT: cmpl $18, %edx
1354; CHECK-AVX2-NEXT: jl .LBB12_2
1355; CHECK-AVX2-NEXT: # %bb.1: # %if.then
1356; CHECK-AVX2-NEXT: movq $1, 8(%rdi)
1357; CHECK-AVX2-NEXT: .LBB12_2: # %if.end
1358; CHECK-AVX2-NEXT: vmovups (%r8), %ymm0
1359; CHECK-AVX2-NEXT: vmovups %ymm0, (%rcx)
1360; CHECK-AVX2-NEXT: movq (%rdi), %rax
1361; CHECK-AVX2-NEXT: movq %rax, (%rsi)
1362; CHECK-AVX2-NEXT: movq 8(%rdi), %rax
1363; CHECK-AVX2-NEXT: movq %rax, 8(%rsi)
1364; CHECK-AVX2-NEXT: vmovups 16(%rdi), %xmm0
1365; CHECK-AVX2-NEXT: vmovups %xmm0, 16(%rsi)
1366; CHECK-AVX2-NEXT: vzeroupper
1367; CHECK-AVX2-NEXT: retq
1368;
1369; CHECK-AVX512-LABEL: test_conditional_block_ymm:
1370; CHECK-AVX512: # %bb.0: # %entry
1371; CHECK-AVX512-NEXT: cmpl $18, %edx
1372; CHECK-AVX512-NEXT: jl .LBB12_2
1373; CHECK-AVX512-NEXT: # %bb.1: # %if.then
1374; CHECK-AVX512-NEXT: movq $1, 8(%rdi)
1375; CHECK-AVX512-NEXT: .LBB12_2: # %if.end
1376; CHECK-AVX512-NEXT: vmovups (%r8), %ymm0
1377; CHECK-AVX512-NEXT: vmovups %ymm0, (%rcx)
1378; CHECK-AVX512-NEXT: movq (%rdi), %rax
1379; CHECK-AVX512-NEXT: movq %rax, (%rsi)
1380; CHECK-AVX512-NEXT: movq 8(%rdi), %rax
1381; CHECK-AVX512-NEXT: movq %rax, 8(%rsi)
1382; CHECK-AVX512-NEXT: vmovups 16(%rdi), %xmm0
1383; CHECK-AVX512-NEXT: vmovups %xmm0, 16(%rsi)
1384; CHECK-AVX512-NEXT: vzeroupper
1385; CHECK-AVX512-NEXT: retq
1386entry:
1387 %cmp = icmp sgt i32 %x, 17
1388 br i1 %cmp, label %if.then, label %if.end
1389
1390if.then: ; preds = %entry
1391 %b = getelementptr inbounds %struct.S8, %struct.S8* %s1, i64 0, i32 1
1392 store i64 1, i64* %b, align 4
1393 br label %if.end
1394
1395if.end: ; preds = %if.then, %entry
1396 %0 = bitcast %struct.S8* %s3 to i8*
1397 %1 = bitcast %struct.S8* %s4 to i8*
1398 tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %0, i8* %1, i64 32, i32 4, i1 false)
1399 %2 = bitcast %struct.S8* %s2 to i8*
1400 %3 = bitcast %struct.S8* %s1 to i8*
1401 tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %2, i8* %3, i64 32, i32 4, i1 false)
1402 ret void
1403}
1404
1405define dso_local void @test_alias(i8* nocapture %A, i32 %x) local_unnamed_addr #0 {
1406; CHECK-LABEL: test_alias:
1407; CHECK: # %bb.0: # %entry
1408; CHECK-NEXT: movl %esi, (%rdi)
1409; CHECK-NEXT: movups (%rdi), %xmm0
1410; CHECK-NEXT: movups %xmm0, 4(%rdi)
1411; CHECK-NEXT: retq
1412;
1413; DISABLED-LABEL: test_alias:
1414; DISABLED: # %bb.0: # %entry
1415; DISABLED-NEXT: movl %esi, (%rdi)
1416; DISABLED-NEXT: movups (%rdi), %xmm0
1417; DISABLED-NEXT: movups %xmm0, 4(%rdi)
1418; DISABLED-NEXT: retq
1419;
1420; CHECK-AVX2-LABEL: test_alias:
1421; CHECK-AVX2: # %bb.0: # %entry
1422; CHECK-AVX2-NEXT: movl %esi, (%rdi)
1423; CHECK-AVX2-NEXT: vmovups (%rdi), %xmm0
1424; CHECK-AVX2-NEXT: vmovups %xmm0, 4(%rdi)
1425; CHECK-AVX2-NEXT: retq
1426;
1427; CHECK-AVX512-LABEL: test_alias:
1428; CHECK-AVX512: # %bb.0: # %entry
1429; CHECK-AVX512-NEXT: movl %esi, (%rdi)
1430; CHECK-AVX512-NEXT: vmovups (%rdi), %xmm0
1431; CHECK-AVX512-NEXT: vmovups %xmm0, 4(%rdi)
1432; CHECK-AVX512-NEXT: retq
1433entry:
1434 %a = bitcast i8* %A to i32*
1435 store i32 %x, i32* %a, align 4
1436 %add.ptr = getelementptr inbounds i8, i8* %A, i64 4
1437 tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* nonnull align 4 %add.ptr, i8* align 4 %A, i64 16, i32 4, i1 false)
1438 ret void
1439}
1440
1441; Function Attrs: nounwind uwtable
1442define dso_local void @test_noalias(i8* nocapture %A, i32 %x) local_unnamed_addr #0 {
1443; CHECK-LABEL: test_noalias:
1444; CHECK: # %bb.0: # %entry
1445; CHECK-NEXT: movl %esi, (%rdi)
1446; CHECK-NEXT: movl (%rdi), %eax
1447; CHECK-NEXT: movl %eax, 20(%rdi)
1448; CHECK-NEXT: movq 4(%rdi), %rax
1449; CHECK-NEXT: movq %rax, 24(%rdi)
1450; CHECK-NEXT: movl 12(%rdi), %eax
1451; CHECK-NEXT: movl %eax, 32(%rdi)
1452; CHECK-NEXT: retq
1453;
1454; DISABLED-LABEL: test_noalias:
1455; DISABLED: # %bb.0: # %entry
1456; DISABLED-NEXT: movl %esi, (%rdi)
1457; DISABLED-NEXT: movups (%rdi), %xmm0
1458; DISABLED-NEXT: movups %xmm0, 20(%rdi)
1459; DISABLED-NEXT: retq
1460;
1461; CHECK-AVX2-LABEL: test_noalias:
1462; CHECK-AVX2: # %bb.0: # %entry
1463; CHECK-AVX2-NEXT: movl %esi, (%rdi)
1464; CHECK-AVX2-NEXT: movl (%rdi), %eax
1465; CHECK-AVX2-NEXT: movl %eax, 20(%rdi)
1466; CHECK-AVX2-NEXT: movq 4(%rdi), %rax
1467; CHECK-AVX2-NEXT: movq %rax, 24(%rdi)
1468; CHECK-AVX2-NEXT: movl 12(%rdi), %eax
1469; CHECK-AVX2-NEXT: movl %eax, 32(%rdi)
1470; CHECK-AVX2-NEXT: retq
1471;
1472; CHECK-AVX512-LABEL: test_noalias:
1473; CHECK-AVX512: # %bb.0: # %entry
1474; CHECK-AVX512-NEXT: movl %esi, (%rdi)
1475; CHECK-AVX512-NEXT: movl (%rdi), %eax
1476; CHECK-AVX512-NEXT: movl %eax, 20(%rdi)
1477; CHECK-AVX512-NEXT: movq 4(%rdi), %rax
1478; CHECK-AVX512-NEXT: movq %rax, 24(%rdi)
1479; CHECK-AVX512-NEXT: movl 12(%rdi), %eax
1480; CHECK-AVX512-NEXT: movl %eax, 32(%rdi)
1481; CHECK-AVX512-NEXT: retq
1482entry:
1483 %a = bitcast i8* %A to i32*
1484 store i32 %x, i32* %a, align 4
1485 %add.ptr = getelementptr inbounds i8, i8* %A, i64 20
1486 tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* nonnull align 4 %add.ptr, i8* align 4 %A, i64 16, i32 4, i1 false)
1487 ret void
1488}
1489
1490
1491