Preston Gurd | 5b50701 | 2012-07-19 18:53:21 +0000 | [diff] [blame] | 1 | ; RUN: llc < %s -mcpu=generic -march=x86-64 -mattr=+avx -mtriple=i686-apple-darwin10 | FileCheck %s |
| 2 | ; RUN: llc < %s -mcpu=generic -force-align-stack -stack-alignment=32 -march=x86-64 -mattr=+avx -mtriple=i686-apple-darwin10 | FileCheck %s -check-prefix=FORCE-ALIGN |
Chad Rosier | 3d3c75c | 2012-07-10 17:49:39 +0000 | [diff] [blame] | 3 | ; rdar://11496434 |
| 4 | |
| 5 | ; no VLAs or dynamic alignment |
| 6 | define i32 @t1() nounwind uwtable ssp { |
| 7 | entry: |
| 8 | %a = alloca i32, align 4 |
| 9 | call void @t1_helper(i32* %a) nounwind |
| 10 | %0 = load i32* %a, align 4 |
| 11 | %add = add nsw i32 %0, 13 |
| 12 | ret i32 %add |
| 13 | |
| 14 | ; CHECK: _t1 |
| 15 | ; CHECK-NOT: andq $-{{[0-9]+}}, %rsp |
| 16 | ; CHECK: leaq [[OFFSET:[0-9]*]](%rsp), %rdi |
| 17 | ; CHECK: callq _t1_helper |
| 18 | ; CHECK: movl [[OFFSET]](%rsp), %eax |
| 19 | ; CHECK: addl $13, %eax |
| 20 | } |
| 21 | |
| 22 | declare void @t1_helper(i32*) |
| 23 | |
| 24 | ; dynamic realignment |
| 25 | define i32 @t2() nounwind uwtable ssp { |
| 26 | entry: |
| 27 | %a = alloca i32, align 4 |
| 28 | %v = alloca <8 x float>, align 32 |
| 29 | call void @t2_helper(i32* %a, <8 x float>* %v) nounwind |
| 30 | %0 = load i32* %a, align 4 |
| 31 | %add = add nsw i32 %0, 13 |
| 32 | ret i32 %add |
| 33 | |
| 34 | ; CHECK: _t2 |
| 35 | ; CHECK: pushq %rbp |
| 36 | ; CHECK: movq %rsp, %rbp |
| 37 | ; CHECK: andq $-32, %rsp |
| 38 | ; CHECK: subq ${{[0-9]+}}, %rsp |
| 39 | ; |
| 40 | ; CHECK: leaq {{[0-9]*}}(%rsp), %rdi |
| 41 | ; CHECK: leaq {{[0-9]*}}(%rsp), %rsi |
| 42 | ; CHECK: callq _t2_helper |
| 43 | ; |
| 44 | ; CHECK: movq %rbp, %rsp |
| 45 | ; CHECK: popq %rbp |
| 46 | } |
| 47 | |
| 48 | declare void @t2_helper(i32*, <8 x float>*) |
| 49 | |
| 50 | ; VLAs |
| 51 | define i32 @t3(i64 %sz) nounwind uwtable ssp { |
| 52 | entry: |
| 53 | %a = alloca i32, align 4 |
| 54 | %vla = alloca i32, i64 %sz, align 16 |
| 55 | call void @t3_helper(i32* %a, i32* %vla) nounwind |
| 56 | %0 = load i32* %a, align 4 |
| 57 | %add = add nsw i32 %0, 13 |
| 58 | ret i32 %add |
| 59 | |
| 60 | ; CHECK: _t3 |
| 61 | ; CHECK: pushq %rbp |
| 62 | ; CHECK: movq %rsp, %rbp |
| 63 | ; CHECK: pushq %rbx |
| 64 | ; CHECK-NOT: andq $-{{[0-9]+}}, %rsp |
| 65 | ; CHECK: subq ${{[0-9]+}}, %rsp |
| 66 | ; |
| 67 | ; CHECK: leaq -{{[0-9]+}}(%rbp), %rsp |
| 68 | ; CHECK: popq %rbx |
| 69 | ; CHECK: popq %rbp |
| 70 | } |
| 71 | |
| 72 | declare void @t3_helper(i32*, i32*) |
| 73 | |
| 74 | ; VLAs + Dynamic realignment |
| 75 | define i32 @t4(i64 %sz) nounwind uwtable ssp { |
| 76 | entry: |
| 77 | %a = alloca i32, align 4 |
| 78 | %v = alloca <8 x float>, align 32 |
| 79 | %vla = alloca i32, i64 %sz, align 16 |
| 80 | call void @t4_helper(i32* %a, i32* %vla, <8 x float>* %v) nounwind |
| 81 | %0 = load i32* %a, align 4 |
| 82 | %add = add nsw i32 %0, 13 |
| 83 | ret i32 %add |
| 84 | |
| 85 | ; CHECK: _t4 |
| 86 | ; CHECK: pushq %rbp |
| 87 | ; CHECK: movq %rsp, %rbp |
Chad Rosier | 3d3c75c | 2012-07-10 17:49:39 +0000 | [diff] [blame] | 88 | ; CHECK: pushq %r14 |
| 89 | ; CHECK: pushq %rbx |
Alexey Samsonov | 99a92f2 | 2012-07-16 06:54:09 +0000 | [diff] [blame] | 90 | ; CHECK: andq $-32, %rsp |
| 91 | ; CHECK: subq ${{[0-9]+}}, %rsp |
Chad Rosier | 3d3c75c | 2012-07-10 17:49:39 +0000 | [diff] [blame] | 92 | ; CHECK: movq %rsp, %rbx |
| 93 | ; |
| 94 | ; CHECK: leaq {{[0-9]*}}(%rbx), %rdi |
| 95 | ; CHECK: leaq {{[0-9]*}}(%rbx), %rdx |
| 96 | ; CHECK: callq _t4_helper |
| 97 | ; |
Alexey Samsonov | 99a92f2 | 2012-07-16 06:54:09 +0000 | [diff] [blame] | 98 | ; CHECK: leaq -16(%rbp), %rsp |
Chad Rosier | 3d3c75c | 2012-07-10 17:49:39 +0000 | [diff] [blame] | 99 | ; CHECK: popq %rbx |
| 100 | ; CHECK: popq %r14 |
Chad Rosier | 3d3c75c | 2012-07-10 17:49:39 +0000 | [diff] [blame] | 101 | ; CHECK: popq %rbp |
| 102 | } |
| 103 | |
| 104 | declare void @t4_helper(i32*, i32*, <8 x float>*) |
| 105 | |
| 106 | ; Dynamic realignment + Spill |
| 107 | define i32 @t5(float* nocapture %f) nounwind uwtable ssp { |
| 108 | entry: |
| 109 | %a = alloca i32, align 4 |
| 110 | %0 = bitcast float* %f to <8 x float>* |
| 111 | %1 = load <8 x float>* %0, align 32 |
| 112 | call void @t5_helper1(i32* %a) nounwind |
| 113 | call void @t5_helper2(<8 x float> %1) nounwind |
| 114 | %2 = load i32* %a, align 4 |
| 115 | %add = add nsw i32 %2, 13 |
| 116 | ret i32 %add |
| 117 | |
| 118 | ; CHECK: _t5 |
| 119 | ; CHECK: pushq %rbp |
| 120 | ; CHECK: movq %rsp, %rbp |
| 121 | ; CHECK: andq $-32, %rsp |
| 122 | ; CHECK: subq ${{[0-9]+}}, %rsp |
| 123 | ; |
| 124 | ; CHECK: vmovaps (%rdi), [[AVXREG:%ymm[0-9]+]] |
| 125 | ; CHECK: vmovaps [[AVXREG]], (%rsp) |
| 126 | ; CHECK: leaq {{[0-9]+}}(%rsp), %rdi |
| 127 | ; CHECK: callq _t5_helper1 |
| 128 | ; CHECK: vmovaps (%rsp), %ymm0 |
| 129 | ; CHECK: callq _t5_helper2 |
| 130 | ; CHECK: movl {{[0-9]+}}(%rsp), %eax |
| 131 | ; |
| 132 | ; CHECK: movq %rbp, %rsp |
| 133 | ; CHECK: popq %rbp |
| 134 | } |
| 135 | |
| 136 | declare void @t5_helper1(i32*) |
| 137 | |
| 138 | declare void @t5_helper2(<8 x float>) |
| 139 | |
| 140 | ; VLAs + Dynamic realignment + Spill |
| 141 | ; FIXME: RA has already reserved RBX, so we can't do dynamic realignment. |
| 142 | define i32 @t6(i64 %sz, float* nocapture %f) nounwind uwtable ssp { |
| 143 | entry: |
| 144 | ; CHECK: _t6 |
| 145 | %a = alloca i32, align 4 |
| 146 | %0 = bitcast float* %f to <8 x float>* |
| 147 | %1 = load <8 x float>* %0, align 32 |
| 148 | %vla = alloca i32, i64 %sz, align 16 |
| 149 | call void @t6_helper1(i32* %a, i32* %vla) nounwind |
| 150 | call void @t6_helper2(<8 x float> %1) nounwind |
| 151 | %2 = load i32* %a, align 4 |
| 152 | %add = add nsw i32 %2, 13 |
| 153 | ret i32 %add |
| 154 | } |
| 155 | |
| 156 | declare void @t6_helper1(i32*, i32*) |
| 157 | |
| 158 | declare void @t6_helper2(<8 x float>) |
| 159 | |
| 160 | ; VLAs + Dynamic realignment + byval |
| 161 | ; The byval adjust the sp after the prolog, but if we're restoring the sp from |
| 162 | ; the base pointer we use the original adjustment. |
| 163 | %struct.struct_t = type { [5 x i32] } |
| 164 | |
| 165 | define void @t7(i32 %size, %struct.struct_t* byval align 8 %arg1) nounwind uwtable { |
| 166 | entry: |
| 167 | %x = alloca i32, align 32 |
| 168 | store i32 0, i32* %x, align 32 |
| 169 | %0 = zext i32 %size to i64 |
| 170 | %vla = alloca i32, i64 %0, align 16 |
| 171 | %1 = load i32* %x, align 32 |
| 172 | call void @bar(i32 %1, i32* %vla, %struct.struct_t* byval align 8 %arg1) |
| 173 | ret void |
| 174 | |
| 175 | ; CHECK: _t7 |
| 176 | ; CHECK: pushq %rbp |
| 177 | ; CHECK: movq %rsp, %rbp |
Chad Rosier | 3d3c75c | 2012-07-10 17:49:39 +0000 | [diff] [blame] | 178 | ; CHECK: pushq %rbx |
Alexey Samsonov | 99a92f2 | 2012-07-16 06:54:09 +0000 | [diff] [blame] | 179 | ; CHECK: andq $-32, %rsp |
| 180 | ; CHECK: subq ${{[0-9]+}}, %rsp |
Chad Rosier | 3d3c75c | 2012-07-10 17:49:39 +0000 | [diff] [blame] | 181 | ; CHECK: movq %rsp, %rbx |
| 182 | |
| 183 | ; Stack adjustment for byval |
| 184 | ; CHECK: subq {{.*}}, %rsp |
| 185 | ; CHECK: callq _bar |
| 186 | ; CHECK-NOT: addq {{.*}}, %rsp |
Alexey Samsonov | 99a92f2 | 2012-07-16 06:54:09 +0000 | [diff] [blame] | 187 | ; CHECK: leaq -8(%rbp), %rsp |
Chad Rosier | 3d3c75c | 2012-07-10 17:49:39 +0000 | [diff] [blame] | 188 | ; CHECK: popq %rbx |
Chad Rosier | 3d3c75c | 2012-07-10 17:49:39 +0000 | [diff] [blame] | 189 | ; CHECK: popq %rbp |
| 190 | } |
| 191 | |
| 192 | declare i8* @llvm.stacksave() nounwind |
| 193 | |
| 194 | declare void @bar(i32, i32*, %struct.struct_t* byval align 8) |
| 195 | |
| 196 | declare void @llvm.stackrestore(i8*) nounwind |
| 197 | |
| 198 | |
| 199 | ; Test when forcing stack alignment |
| 200 | define i32 @t8() nounwind uwtable { |
| 201 | entry: |
| 202 | %a = alloca i32, align 4 |
| 203 | call void @t1_helper(i32* %a) nounwind |
| 204 | %0 = load i32* %a, align 4 |
| 205 | %add = add nsw i32 %0, 13 |
| 206 | ret i32 %add |
| 207 | |
| 208 | ; FORCE-ALIGN: _t8 |
| 209 | ; FORCE-ALIGN: movq %rsp, %rbp |
| 210 | ; FORCE-ALIGN: andq $-32, %rsp |
| 211 | ; FORCE-ALIGN-NEXT: subq $32, %rsp |
| 212 | ; FORCE-ALIGN: movq %rbp, %rsp |
| 213 | ; FORCE-ALIGN: popq %rbp |
| 214 | } |
| 215 | |
| 216 | ; VLAs |
| 217 | define i32 @t9(i64 %sz) nounwind uwtable { |
| 218 | entry: |
| 219 | %a = alloca i32, align 4 |
| 220 | %vla = alloca i32, i64 %sz, align 16 |
| 221 | call void @t3_helper(i32* %a, i32* %vla) nounwind |
| 222 | %0 = load i32* %a, align 4 |
| 223 | %add = add nsw i32 %0, 13 |
| 224 | ret i32 %add |
| 225 | |
| 226 | ; FORCE-ALIGN: _t9 |
| 227 | ; FORCE-ALIGN: pushq %rbp |
| 228 | ; FORCE-ALIGN: movq %rsp, %rbp |
Chad Rosier | 3d3c75c | 2012-07-10 17:49:39 +0000 | [diff] [blame] | 229 | ; FORCE-ALIGN: pushq %rbx |
Alexey Samsonov | 99a92f2 | 2012-07-16 06:54:09 +0000 | [diff] [blame] | 230 | ; FORCE-ALIGN: andq $-32, %rsp |
| 231 | ; FORCE-ALIGN: subq $32, %rsp |
Chad Rosier | 3d3c75c | 2012-07-10 17:49:39 +0000 | [diff] [blame] | 232 | ; FORCE-ALIGN: movq %rsp, %rbx |
| 233 | |
Alexey Samsonov | 99a92f2 | 2012-07-16 06:54:09 +0000 | [diff] [blame] | 234 | ; FORCE-ALIGN: leaq -8(%rbp), %rsp |
Chad Rosier | 3d3c75c | 2012-07-10 17:49:39 +0000 | [diff] [blame] | 235 | ; FORCE-ALIGN: popq %rbx |
Chad Rosier | 3d3c75c | 2012-07-10 17:49:39 +0000 | [diff] [blame] | 236 | ; FORCE-ALIGN: popq %rbp |
Chad Rosier | 542e35f | 2012-07-10 17:57:00 +0000 | [diff] [blame] | 237 | } |