[ARM] Dynamic stack alignment for 16-bit Thumb

This patch implements dynamic stack (re-)alignment for 16-bit Thumb. When
targeting processors, which support only the 16-bit Thumb instruction set
the compiler ignores the alignment attributes of automatic variables and may
silently generate incorrect code.

Differential revision: https://reviews.llvm.org/D38143

llvm-svn: 316289
diff --git a/llvm/test/CodeGen/ARM/thumb1_return_sequence.ll b/llvm/test/CodeGen/ARM/thumb1_return_sequence.ll
index 67d1cad..c54712e 100644
--- a/llvm/test/CodeGen/ARM/thumb1_return_sequence.ll
+++ b/llvm/test/CodeGen/ARM/thumb1_return_sequence.ll
@@ -9,6 +9,8 @@
 ; --------
 ; CHECK-V4T:    push {[[SAVED:(r[4567](, )?)+]], lr}
 ; CHECK-V4T:    sub sp,
+; Stack is realigned because of the <6 x i32> type
+; CHECK-V4T:    mov sp, r4
 ; CHECK-V5T:    push {[[SAVED:(r[4567](, )?)+]], lr}
 
   %b = alloca <6 x i32>, align 16
@@ -21,7 +23,8 @@
 
 ; Epilogue
 ; --------
-; CHECK-V4T:         add sp,
+; Stack realignment means sp is restored from frame pointer
+; CHECK-V4T:         mov sp
 ; CHECK-V4T-NEXT:    pop {[[SAVED]]}
 ; The ISA for v4 does not support pop pc, so make sure we do not emit
 ; one even when we do not need to update SP.
@@ -70,8 +73,9 @@
 ; CHECK-V4T-NEXT:    mov lr, [[POP_REG]]
 ; CHECK-V4T-NEXT:    mov [[POP_REG]], r12
 ; CHECK-V4T:         bx  lr
-; CHECK-V5T:         add sp,
-; CHECK-V5T-NEXT:    pop {[[SAVED]]}
+; CHECK-V5T:         lsls r4
+; CHECK-V5T-NEXT:    mov sp, r4
+; CHECK-V5T:         pop {[[SAVED]]}
 ; CHECK-V5T-NEXT:    mov r12, [[POP_REG:r[0-7]]]
 ; CHECK-V5T-NEXT:    pop {[[POP_REG]]}
 ; CHECK-V5T-NEXT:    add sp,
diff --git a/llvm/test/CodeGen/Thumb/large-stack.ll b/llvm/test/CodeGen/Thumb/large-stack.ll
index b0152dd..f35bffb 100644
--- a/llvm/test/CodeGen/Thumb/large-stack.ll
+++ b/llvm/test/CodeGen/Thumb/large-stack.ll
@@ -75,7 +75,7 @@
 ; CHECK: add sp, [[TEMP3]]
     %retval = alloca i32, align 4
     %tmp = alloca i32, align 4
-    %a = alloca [805306369 x i8], align 16
+    %a = alloca [805306369 x i8], align 4
     store i32 0, i32* %tmp
     %tmp1 = load i32, i32* %tmp
     ret i32 %tmp1
@@ -91,7 +91,7 @@
 ; CHECK: mov sp, r4
     %retval = alloca i32, align 4
     %tmp = alloca i32, align 4
-    %a = alloca [805306369 x i8], align 16
+    %a = alloca [805306369 x i8], align 8
     store i32 0, i32* %tmp
     %tmp1 = load i32, i32* %tmp
     ret i32 %tmp1
diff --git a/llvm/test/CodeGen/Thumb/long.ll b/llvm/test/CodeGen/Thumb/long.ll
index 13951ef..7fc46ff 100644
--- a/llvm/test/CodeGen/Thumb/long.ll
+++ b/llvm/test/CodeGen/Thumb/long.ll
@@ -1,4 +1,5 @@
-; RUN: llc -mtriple=thumb-eabi %s -verify-machineinstrs -o - | FileCheck %s
+; RUN: llc -mtriple=thumb-eabi %s -verify-machineinstrs -o - | \
+; RUN:    FileCheck %s -check-prefix CHECK --check-prefix CHECK-EABI
 ; RUN: llc -mtriple=thumb-apple-darwin %s -verify-machineinstrs -o - | \
 ; RUN:    FileCheck %s -check-prefix CHECK -check-prefix CHECK-DARWIN
 
@@ -172,10 +173,12 @@
         %retval = load i64, i64* %a          ; <i64> [#uses=1]
         ret i64 %retval
 ; CHECK-LABEL: f10:
-; CHECK: sub sp, #8
+; CHECK-EABI: sub sp, #8
+; CHECK-DARWIN: add r7, sp, #4
 ; CHECK: ldr r0, [sp]
 ; CHECK: ldr r1, [sp, #4]
-; CHECK: add sp, #8
+; CHECK-EABI: add sp, #8
+; CHECK-DARWIN: mov sp, r4
 }
 
 define i64 @f11(i64 %x, i64 %y) {