ARM/Dwarf: correctly align stack before callee-saved VPRs We were making an attempt to do this by adding an extra callee-saved GPR (so that there was an even number in the list), but when that failed we went ahead and pushed anyway. This had a couple of potential issues: + The .cfi directives we emit misplaced dN because they were based on PrologEpilogInserter's calculation. + Unaligned stores can be less efficient. + Unaligned stores can actually fault (likely only an issue in niche cases, but possible). This adds a final explicit stack adjustment if all other options fail, so that the actual locations of the registers match up with where they should be. llvm-svn: 221320

commit: 228c943f316eea630dfb94e270af7e342bd5dd56 [log] [tgz]
author: Tim Northover <tnorthover@apple.com> Wed Nov 05 00:27:13 2014 +0000
committer: Tim Northover <tnorthover@apple.com> Wed Nov 05 00:27:13 2014 +0000
tree: b8616cc8616979c8b30d4511a3b505699cbf28e0
parent: 445b0657a50fbeefcdcd954f4779c8ae933e8d75 [diff]
diff --git a/llvm/test/CodeGen/ARM/dwarf-unwind.ll b/llvm/test/CodeGen/ARM/dwarf-unwind.ll
new file mode 100644
index 0000000..58f486d
--- /dev/null
+++ b/llvm/test/CodeGen/ARM/dwarf-unwind.ll

@@ -0,0 +1,68 @@
+; RUN: llc -mtriple=thumbv7-netbsd-eabi -o - %s | FileCheck %s
+declare void @bar()
+
+; ARM's frame lowering attempts to tack another callee-saved register onto the
+; list when it detects a potential misaligned VFP store. However, if there are
+; none available it used to just vpush anyway and misreport the location of the
+; registers in unwind info. Since there are benefits to aligned stores, it's
+; better to correct the code than the .cfi_offset directive.
+
+define void @test_dpr_align(i8 %l, i8 %r) {
+; CHECK-LABEL: test_dpr_align:
+; CHECK: push.w {r4, r5, r6, r7, r8, r9, r10, r11, lr}
+; CHECK: sub sp, #4
+; CHECK: vpush {d8}
+; CHECK: .cfi_offset d8, -48
+; CHECK-NOT: sub sp
+; [...]
+; CHECK: bl bar
+; CHECK-NOT: add sp
+; CHECK: vpop {d8}
+; CHECK: add sp, #4
+; CHECK: pop.w {r4, r5, r6, r7, r8, r9, r10, r11, pc}
+  call void asm sideeffect "", "~{r4},~{r5},~{r6},~{r7},~{r8},~{r9},~{r10},~{r11},~{d8}"()
+  call void @bar()
+  ret void
+}
+
+; The prologue (but not the epilogue) can be made more space efficient by
+; chucking an argument register into the list. Not worth it in general though,
+; "sub sp, #4" is likely faster.
+define void @test_dpr_align_tiny(i8 %l, i8 %r) minsize {
+; CHECK-LABEL: test_dpr_align_tiny:
+; CHECK: push.w {r3, r4, r5, r6, r7, r8, r9, r10, r11, lr}
+; CHECK-NOT: sub sp
+; CHECK: vpush {d8}
+; CHECK: .cfi_offset d8, -48
+; CHECK-NOT: sub sp
+; [...]
+; CHECK: bl bar
+; CHECK-NOT: add sp
+; CHECK: vpop {d8}
+; CHECK: add sp, #4
+; CHECK: pop.w {r4, r5, r6, r7, r8, r9, r10, r11, pc}
+  call void asm sideeffect "", "~{r4},~{r5},~{r6},~{r7},~{r8},~{r9},~{r10},~{r11},~{d8}"()
+  call void @bar()
+  ret void
+}
+
+
+; However, we shouldn't do a 2-step align/adjust if there are no DPRs to be
+; saved.
+define void @test_nodpr_noalign(i8 %l, i8 %r) {
+; CHECK-LABEL: test_nodpr_noalign:
+; CHECK: push.w {r4, r5, r6, r7, r8, r9, r10, r11, lr}
+; CHECK-NOT: sub sp
+; CHECK: sub sp, #12
+; CHECK-NOT: sub sp
+; [...]
+; CHECK: bl bar
+; CHECK-NOT: add sp
+; CHECK: add sp, #12
+; CHECK-NOT: add sp
+; CHECK: pop.w {r4, r5, r6, r7, r8, r9, r10, r11, pc}
+  alloca i64
+  call void asm sideeffect "", "~{r4},~{r5},~{r6},~{r7},~{r8},~{r9},~{r10},~{r11}"()
+  call void @bar()
+  ret void
+}
commit	228c943f316eea630dfb94e270af7e342bd5dd56	[log] [tgz]
author	Tim Northover <tnorthover@apple.com>	Wed Nov 05 00:27:13 2014 +0000
committer	Tim Northover <tnorthover@apple.com>	Wed Nov 05 00:27:13 2014 +0000
tree	b8616cc8616979c8b30d4511a3b505699cbf28e0
parent	445b0657a50fbeefcdcd954f4779c8ae933e8d75 [diff]