Tim Northover | 569f69d | 2013-10-10 09:28:20 +0000 | [diff] [blame] | 1 | ; RUN: llc -mtriple thumbv7-apple-ios -verify-machineinstrs -o - %s | FileCheck %s |
| 2 | |
| 3 | ; ARM load store optimizer was dealing with a sequence like: |
| 4 | ; s1 = VLDRS [r0, 1], Q0<imp-def> |
| 5 | ; s3 = VLDRS [r0, 2], Q0<imp-use,kill>, Q0<imp-def> |
| 6 | ; s0 = VLDRS [r0, 0], Q0<imp-use,kill>, Q0<imp-def> |
| 7 | ; s2 = VLDRS [r0, 4], Q0<imp-use,kill>, Q0<imp-def> |
| 8 | ; |
| 9 | ; It decided to combine the {s0, s1} loads into a single instruction in the |
| 10 | ; third position. However, this leaves the instruction defining s3 with a stray |
| 11 | ; imp-use of Q0, which is undefined. |
| 12 | ; |
| 13 | ; The verifier catches this, so this test just makes sure that appropriate |
| 14 | ; liveness flags are added. |
| 15 | ; |
| 16 | ; I believe the change will be tested as long as the vldmia is not the first of |
| 17 | ; the loads. Earlier optimisations may perturb the output over time, but |
| 18 | ; fiddling the indices should be sufficient to restore the test. |
| 19 | |
| 20 | define arm_aapcs_vfpcc <4 x float> @foo(float* %ptr) { |
| 21 | ; CHECK-LABEL: foo: |
| 22 | ; CHECK: vldr s3, [r0, #8] |
| 23 | ; CHECK: vldmia r0, {s0, s1} |
| 24 | ; CHECK: vldr s2, [r0, #16] |
| 25 | %off0 = getelementptr float* %ptr, i32 0 |
| 26 | %val0 = load float* %off0 |
| 27 | %off1 = getelementptr float* %ptr, i32 1 |
| 28 | %val1 = load float* %off1 |
| 29 | %off4 = getelementptr float* %ptr, i32 4 |
| 30 | %val4 = load float* %off4 |
| 31 | %off2 = getelementptr float* %ptr, i32 2 |
| 32 | %val2 = load float* %off2 |
| 33 | |
| 34 | %vec1 = insertelement <4 x float> undef, float %val0, i32 0 |
| 35 | %vec2 = insertelement <4 x float> %vec1, float %val1, i32 1 |
| 36 | %vec3 = insertelement <4 x float> %vec2, float %val4, i32 2 |
| 37 | %vec4 = insertelement <4 x float> %vec3, float %val2, i32 3 |
| 38 | |
| 39 | ret <4 x float> %vec4 |
| 40 | } |