blob: 751f447077be3ef76f70d43b717d2383ee337850 [file] [log] [blame]
Tim Northover569f69d2013-10-10 09:28:20 +00001; RUN: llc -mtriple thumbv7-apple-ios -verify-machineinstrs -o - %s | FileCheck %s
2
3; ARM load store optimizer was dealing with a sequence like:
4; s1 = VLDRS [r0, 1], Q0<imp-def>
5; s3 = VLDRS [r0, 2], Q0<imp-use,kill>, Q0<imp-def>
6; s0 = VLDRS [r0, 0], Q0<imp-use,kill>, Q0<imp-def>
7; s2 = VLDRS [r0, 4], Q0<imp-use,kill>, Q0<imp-def>
8;
9; It decided to combine the {s0, s1} loads into a single instruction in the
10; third position. However, this leaves the instruction defining s3 with a stray
11; imp-use of Q0, which is undefined.
12;
13; The verifier catches this, so this test just makes sure that appropriate
14; liveness flags are added.
15;
16; I believe the change will be tested as long as the vldmia is not the first of
17; the loads. Earlier optimisations may perturb the output over time, but
18; fiddling the indices should be sufficient to restore the test.
19
20define arm_aapcs_vfpcc <4 x float> @foo(float* %ptr) {
21; CHECK-LABEL: foo:
22; CHECK: vldr s3, [r0, #8]
23; CHECK: vldmia r0, {s0, s1}
24; CHECK: vldr s2, [r0, #16]
25 %off0 = getelementptr float* %ptr, i32 0
26 %val0 = load float* %off0
27 %off1 = getelementptr float* %ptr, i32 1
28 %val1 = load float* %off1
29 %off4 = getelementptr float* %ptr, i32 4
30 %val4 = load float* %off4
31 %off2 = getelementptr float* %ptr, i32 2
32 %val2 = load float* %off2
33
34 %vec1 = insertelement <4 x float> undef, float %val0, i32 0
35 %vec2 = insertelement <4 x float> %vec1, float %val1, i32 1
36 %vec3 = insertelement <4 x float> %vec2, float %val4, i32 2
37 %vec4 = insertelement <4 x float> %vec3, float %val2, i32 3
38
39 ret <4 x float> %vec4
40}