James Molloy | 75f5f9e | 2014-04-16 15:33:48 +0000 | [diff] [blame] | 1 | // RUN: %clang_cc1 -triple arm64-apple-ios7 -target-feature +neon -target-abi darwinpcs -ffreestanding -emit-llvm -w -o - %s | FileCheck %s |
Tim Northover | a2ee433 | 2014-03-29 15:09:45 +0000 | [diff] [blame] | 2 | |
| 3 | // CHECK: define signext i8 @f0() |
| 4 | char f0(void) { |
| 5 | return 0; |
| 6 | } |
| 7 | |
| 8 | // Struct as return type. Aggregates <= 16 bytes are passed directly and round |
| 9 | // up to multiple of 8 bytes. |
| 10 | // CHECK: define i64 @f1() |
| 11 | struct s1 { char f0; }; |
| 12 | struct s1 f1(void) {} |
| 13 | |
| 14 | // CHECK: define i64 @f2() |
| 15 | struct s2 { short f0; }; |
| 16 | struct s2 f2(void) {} |
| 17 | |
| 18 | // CHECK: define i64 @f3() |
| 19 | struct s3 { int f0; }; |
| 20 | struct s3 f3(void) {} |
| 21 | |
| 22 | // CHECK: define i64 @f4() |
| 23 | struct s4 { struct s4_0 { int f0; } f0; }; |
| 24 | struct s4 f4(void) {} |
| 25 | |
| 26 | // CHECK: define i64 @f5() |
| 27 | struct s5 { struct { } f0; int f1; }; |
| 28 | struct s5 f5(void) {} |
| 29 | |
| 30 | // CHECK: define i64 @f6() |
| 31 | struct s6 { int f0[1]; }; |
| 32 | struct s6 f6(void) {} |
| 33 | |
| 34 | // CHECK: define void @f7() |
| 35 | struct s7 { struct { int : 0; } f0; }; |
| 36 | struct s7 f7(void) {} |
| 37 | |
| 38 | // CHECK: define void @f8() |
| 39 | struct s8 { struct { int : 0; } f0[1]; }; |
| 40 | struct s8 f8(void) {} |
| 41 | |
| 42 | // CHECK: define i64 @f9() |
| 43 | struct s9 { int f0; int : 0; }; |
| 44 | struct s9 f9(void) {} |
| 45 | |
| 46 | // CHECK: define i64 @f10() |
| 47 | struct s10 { int f0; int : 0; int : 0; }; |
| 48 | struct s10 f10(void) {} |
| 49 | |
| 50 | // CHECK: define i64 @f11() |
| 51 | struct s11 { int : 0; int f0; }; |
| 52 | struct s11 f11(void) {} |
| 53 | |
| 54 | // CHECK: define i64 @f12() |
| 55 | union u12 { char f0; short f1; int f2; }; |
| 56 | union u12 f12(void) {} |
| 57 | |
| 58 | // Homogeneous Aggregate as return type will be passed directly. |
| 59 | // CHECK: define %struct.s13 @f13() |
| 60 | struct s13 { float f0; }; |
| 61 | struct s13 f13(void) {} |
| 62 | // CHECK: define %union.u14 @f14() |
| 63 | union u14 { float f0; }; |
| 64 | union u14 f14(void) {} |
| 65 | |
| 66 | // CHECK: define void @f15() |
| 67 | void f15(struct s7 a0) {} |
| 68 | |
| 69 | // CHECK: define void @f16() |
| 70 | void f16(struct s8 a0) {} |
| 71 | |
| 72 | // CHECK: define i64 @f17() |
| 73 | struct s17 { short f0 : 13; char f1 : 4; }; |
| 74 | struct s17 f17(void) {} |
| 75 | |
| 76 | // CHECK: define i64 @f18() |
| 77 | struct s18 { short f0; char f1 : 4; }; |
| 78 | struct s18 f18(void) {} |
| 79 | |
| 80 | // CHECK: define i64 @f19() |
| 81 | struct s19 { int f0; struct s8 f1; }; |
| 82 | struct s19 f19(void) {} |
| 83 | |
| 84 | // CHECK: define i64 @f20() |
| 85 | struct s20 { struct s8 f1; int f0; }; |
| 86 | struct s20 f20(void) {} |
| 87 | |
| 88 | // CHECK: define i64 @f21() |
| 89 | struct s21 { struct {} f1; int f0 : 4; }; |
| 90 | struct s21 f21(void) {} |
| 91 | |
| 92 | // CHECK: define i64 @f22() |
| 93 | // CHECK: define i64 @f23() |
| 94 | // CHECK: define i64 @f24() |
| 95 | // CHECK: define i128 @f25() |
| 96 | // CHECK: define { float, float } @f26() |
| 97 | // CHECK: define { double, double } @f27() |
| 98 | _Complex char f22(void) {} |
| 99 | _Complex short f23(void) {} |
| 100 | _Complex int f24(void) {} |
| 101 | _Complex long long f25(void) {} |
| 102 | _Complex float f26(void) {} |
| 103 | _Complex double f27(void) {} |
| 104 | |
| 105 | // CHECK: define i64 @f28() |
| 106 | struct s28 { _Complex char f0; }; |
| 107 | struct s28 f28() {} |
| 108 | |
| 109 | // CHECK: define i64 @f29() |
| 110 | struct s29 { _Complex short f0; }; |
| 111 | struct s29 f29() {} |
| 112 | |
| 113 | // CHECK: define i64 @f30() |
| 114 | struct s30 { _Complex int f0; }; |
| 115 | struct s30 f30() {} |
| 116 | |
| 117 | struct s31 { char x; }; |
| 118 | void f31(struct s31 s) { } |
| 119 | // CHECK: define void @f31(i64 %s.coerce) |
| 120 | // CHECK: %s = alloca %struct.s31, align 8 |
| 121 | // CHECK: trunc i64 %s.coerce to i8 |
| 122 | // CHECK: store i8 %{{.*}}, |
| 123 | |
| 124 | struct s32 { double x; }; |
| 125 | void f32(struct s32 s) { } |
| 126 | // Expand Homogeneous Aggregate. |
| 127 | // CHECK: @f32(double %{{.*}}) |
| 128 | |
| 129 | // A composite type larger than 16 bytes should be passed indirectly. |
| 130 | struct s33 { char buf[32*32]; }; |
| 131 | void f33(struct s33 s) { } |
| 132 | // CHECK: define void @f33(%struct.s33* %s) |
| 133 | |
| 134 | struct s34 { char c; }; |
| 135 | void f34(struct s34 s); |
| 136 | void g34(struct s34 *s) { f34(*s); } |
| 137 | // CHECK: @g34(%struct.s34* %s) |
| 138 | // CHECK: %[[a:.*]] = load i8* %{{.*}} |
| 139 | // CHECK: zext i8 %[[a]] to i64 |
| 140 | // CHECK: call void @f34(i64 %{{.*}}) |
| 141 | |
| 142 | /* |
| 143 | * Check that va_arg accesses stack according to ABI alignment |
| 144 | */ |
| 145 | long long t1(int i, ...) { |
| 146 | // CHECK: t1 |
| 147 | __builtin_va_list ap; |
| 148 | __builtin_va_start(ap, i); |
| 149 | // CHECK-NOT: add i32 %{{.*}} 7 |
| 150 | // CHECK-NOT: and i32 %{{.*}} -8 |
| 151 | long long ll = __builtin_va_arg(ap, long long); |
| 152 | __builtin_va_end(ap); |
| 153 | return ll; |
| 154 | } |
| 155 | double t2(int i, ...) { |
| 156 | // CHECK: t2 |
| 157 | __builtin_va_list ap; |
| 158 | __builtin_va_start(ap, i); |
| 159 | // CHECK-NOT: add i32 %{{.*}} 7 |
| 160 | // CHECK-NOT: and i32 %{{.*}} -8 |
| 161 | double ll = __builtin_va_arg(ap, double); |
| 162 | __builtin_va_end(ap); |
| 163 | return ll; |
| 164 | } |
| 165 | |
| 166 | #include <arm_neon.h> |
| 167 | |
| 168 | // Homogeneous Vector Aggregate as return type and argument type. |
| 169 | // CHECK: define %struct.int8x16x2_t @f0_0(<16 x i8> %{{.*}}, <16 x i8> %{{.*}}) |
| 170 | int8x16x2_t f0_0(int8x16_t a0, int8x16_t a1) { |
| 171 | return vzipq_s8(a0, a1); |
| 172 | } |
| 173 | |
| 174 | // Test direct vector passing. |
| 175 | typedef float T_float32x2 __attribute__ ((__vector_size__ (8))); |
| 176 | typedef float T_float32x4 __attribute__ ((__vector_size__ (16))); |
| 177 | typedef float T_float32x8 __attribute__ ((__vector_size__ (32))); |
| 178 | typedef float T_float32x16 __attribute__ ((__vector_size__ (64))); |
| 179 | |
| 180 | // CHECK: define <2 x float> @f1_0(<2 x float> %{{.*}}) |
| 181 | T_float32x2 f1_0(T_float32x2 a0) { return a0; } |
| 182 | // CHECK: define <4 x float> @f1_1(<4 x float> %{{.*}}) |
| 183 | T_float32x4 f1_1(T_float32x4 a0) { return a0; } |
| 184 | // Vector with length bigger than 16-byte is illegal and is passed indirectly. |
| 185 | // CHECK: define void @f1_2(<8 x float>* noalias sret %{{.*}}, <8 x float>*) |
| 186 | T_float32x8 f1_2(T_float32x8 a0) { return a0; } |
| 187 | // CHECK: define void @f1_3(<16 x float>* noalias sret %{{.*}}, <16 x float>*) |
| 188 | T_float32x16 f1_3(T_float32x16 a0) { return a0; } |
| 189 | |
| 190 | // Testing alignment with aggregates: HFA, aggregates with size <= 16 bytes and |
| 191 | // aggregates with size > 16 bytes. |
| 192 | struct s35 |
| 193 | { |
| 194 | float v[4]; //Testing HFA. |
| 195 | } __attribute__((aligned(16))); |
| 196 | typedef struct s35 s35_with_align; |
| 197 | |
| 198 | typedef __attribute__((neon_vector_type(4))) float float32x4_t; |
| 199 | float32x4_t f35(int i, s35_with_align s1, s35_with_align s2) { |
| 200 | // CHECK: define <4 x float> @f35(i32 %i, float %s1.0, float %s1.1, float %s1.2, float %s1.3, float %s2.0, float %s2.1, float %s2.2, float %s2.3) |
| 201 | // CHECK: %s1 = alloca %struct.s35, align 16 |
| 202 | // CHECK: %s2 = alloca %struct.s35, align 16 |
| 203 | // CHECK: %[[a:.*]] = bitcast %struct.s35* %s1 to <4 x float>* |
| 204 | // CHECK: load <4 x float>* %[[a]], align 16 |
| 205 | // CHECK: %[[b:.*]] = bitcast %struct.s35* %s2 to <4 x float>* |
| 206 | // CHECK: load <4 x float>* %[[b]], align 16 |
| 207 | float32x4_t v = vaddq_f32(*(float32x4_t *)&s1, |
| 208 | *(float32x4_t *)&s2); |
| 209 | return v; |
| 210 | } |
| 211 | |
| 212 | struct s36 |
| 213 | { |
| 214 | int v[4]; //Testing 16-byte aggregate. |
| 215 | } __attribute__((aligned(16))); |
| 216 | typedef struct s36 s36_with_align; |
| 217 | |
| 218 | typedef __attribute__((neon_vector_type(4))) int int32x4_t; |
| 219 | int32x4_t f36(int i, s36_with_align s1, s36_with_align s2) { |
| 220 | // CHECK: define <4 x i32> @f36(i32 %i, i128 %s1.coerce, i128 %s2.coerce) |
| 221 | // CHECK: %s1 = alloca %struct.s36, align 16 |
| 222 | // CHECK: %s2 = alloca %struct.s36, align 16 |
| 223 | // CHECK: store i128 %s1.coerce, i128* %{{.*}}, align 1 |
| 224 | // CHECK: store i128 %s2.coerce, i128* %{{.*}}, align 1 |
| 225 | // CHECK: %[[a:.*]] = bitcast %struct.s36* %s1 to <4 x i32>* |
| 226 | // CHECK: load <4 x i32>* %[[a]], align 16 |
| 227 | // CHECK: %[[b:.*]] = bitcast %struct.s36* %s2 to <4 x i32>* |
| 228 | // CHECK: load <4 x i32>* %[[b]], align 16 |
| 229 | int32x4_t v = vaddq_s32(*(int32x4_t *)&s1, |
| 230 | *(int32x4_t *)&s2); |
| 231 | return v; |
| 232 | } |
| 233 | |
| 234 | struct s37 |
| 235 | { |
| 236 | int v[18]; //Testing large aggregate. |
| 237 | } __attribute__((aligned(16))); |
| 238 | typedef struct s37 s37_with_align; |
| 239 | |
| 240 | int32x4_t f37(int i, s37_with_align s1, s37_with_align s2) { |
| 241 | // CHECK: define <4 x i32> @f37(i32 %i, %struct.s37* %s1, %struct.s37* %s2) |
| 242 | // CHECK: %[[a:.*]] = bitcast %struct.s37* %s1 to <4 x i32>* |
| 243 | // CHECK: load <4 x i32>* %[[a]], align 16 |
| 244 | // CHECK: %[[b:.*]] = bitcast %struct.s37* %s2 to <4 x i32>* |
| 245 | // CHECK: load <4 x i32>* %[[b]], align 16 |
| 246 | int32x4_t v = vaddq_s32(*(int32x4_t *)&s1, |
| 247 | *(int32x4_t *)&s2); |
| 248 | return v; |
| 249 | } |
| 250 | s37_with_align g37; |
| 251 | int32x4_t caller37() { |
| 252 | // CHECK: caller37 |
| 253 | // CHECK: %[[a:.*]] = alloca %struct.s37, align 16 |
| 254 | // CHECK: %[[b:.*]] = alloca %struct.s37, align 16 |
| 255 | // CHECK: call void @llvm.memcpy |
| 256 | // CHECK: call void @llvm.memcpy |
| 257 | // CHECK: call <4 x i32> @f37(i32 3, %struct.s37* %[[a]], %struct.s37* %[[b]]) |
| 258 | return f37(3, g37, g37); |
| 259 | } |
| 260 | |
| 261 | // rdar://problem/12648441 |
| 262 | // Test passing structs with size < 8, < 16 and > 16 |
| 263 | // with alignment of 16 and without |
| 264 | |
| 265 | // structs with size <= 8 bytes, without alignment attribute |
| 266 | // passed as i64 regardless of the align attribute |
| 267 | struct s38 |
| 268 | { |
| 269 | int i; |
| 270 | short s; |
| 271 | }; |
| 272 | typedef struct s38 s38_no_align; |
| 273 | // passing structs in registers |
| 274 | __attribute__ ((noinline)) |
| 275 | int f38(int i, s38_no_align s1, s38_no_align s2) { |
| 276 | // CHECK: define i32 @f38(i32 %i, i64 %s1.coerce, i64 %s2.coerce) |
| 277 | // CHECK: %s1 = alloca %struct.s38, align 8 |
| 278 | // CHECK: %s2 = alloca %struct.s38, align 8 |
| 279 | // CHECK: store i64 %s1.coerce, i64* %{{.*}}, align 1 |
| 280 | // CHECK: store i64 %s2.coerce, i64* %{{.*}}, align 1 |
| 281 | // CHECK: getelementptr inbounds %struct.s38* %s1, i32 0, i32 0 |
| 282 | // CHECK: getelementptr inbounds %struct.s38* %s2, i32 0, i32 0 |
| 283 | // CHECK: getelementptr inbounds %struct.s38* %s1, i32 0, i32 1 |
| 284 | // CHECK: getelementptr inbounds %struct.s38* %s2, i32 0, i32 1 |
| 285 | return s1.i + s2.i + i + s1.s + s2.s; |
| 286 | } |
| 287 | s38_no_align g38; |
| 288 | s38_no_align g38_2; |
| 289 | int caller38() { |
| 290 | // CHECK: define i32 @caller38() |
| 291 | // CHECK: %[[a:.*]] = load i64* bitcast (%struct.s38* @g38 to i64*), align 1 |
| 292 | // CHECK: %[[b:.*]] = load i64* bitcast (%struct.s38* @g38_2 to i64*), align 1 |
| 293 | // CHECK: call i32 @f38(i32 3, i64 %[[a]], i64 %[[b]]) |
| 294 | return f38(3, g38, g38_2); |
| 295 | } |
| 296 | // passing structs on stack |
| 297 | __attribute__ ((noinline)) |
| 298 | int f38_stack(int i, int i2, int i3, int i4, int i5, int i6, int i7, int i8, |
| 299 | int i9, s38_no_align s1, s38_no_align s2) { |
| 300 | // CHECK: define i32 @f38_stack(i32 %i, i32 %i2, i32 %i3, i32 %i4, i32 %i5, i32 %i6, i32 %i7, i32 %i8, i32 %i9, i64 %s1.coerce, i64 %s2.coerce) |
| 301 | // CHECK: %s1 = alloca %struct.s38, align 8 |
| 302 | // CHECK: %s2 = alloca %struct.s38, align 8 |
| 303 | // CHECK: store i64 %s1.coerce, i64* %{{.*}}, align 1 |
| 304 | // CHECK: store i64 %s2.coerce, i64* %{{.*}}, align 1 |
| 305 | // CHECK: getelementptr inbounds %struct.s38* %s1, i32 0, i32 0 |
| 306 | // CHECK: getelementptr inbounds %struct.s38* %s2, i32 0, i32 0 |
| 307 | // CHECK: getelementptr inbounds %struct.s38* %s1, i32 0, i32 1 |
| 308 | // CHECK: getelementptr inbounds %struct.s38* %s2, i32 0, i32 1 |
| 309 | return s1.i + s2.i + i + i2 + i3 + i4 + i5 + i6 + i7 + i8 + i9 + s1.s + s2.s; |
| 310 | } |
| 311 | int caller38_stack() { |
| 312 | // CHECK: define i32 @caller38_stack() |
| 313 | // CHECK: %[[a:.*]] = load i64* bitcast (%struct.s38* @g38 to i64*), align 1 |
| 314 | // CHECK: %[[b:.*]] = load i64* bitcast (%struct.s38* @g38_2 to i64*), align 1 |
| 315 | // CHECK: call i32 @f38_stack(i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i64 %[[a]], i64 %[[b]]) |
| 316 | return f38_stack(1, 2, 3, 4, 5, 6, 7, 8, 9, g38, g38_2); |
| 317 | } |
| 318 | |
| 319 | // structs with size <= 8 bytes, with alignment attribute |
| 320 | struct s39 |
| 321 | { |
| 322 | int i; |
| 323 | short s; |
| 324 | } __attribute__((aligned(16))); |
| 325 | typedef struct s39 s39_with_align; |
| 326 | // passing aligned structs in registers |
| 327 | __attribute__ ((noinline)) |
| 328 | int f39(int i, s39_with_align s1, s39_with_align s2) { |
| 329 | // CHECK: define i32 @f39(i32 %i, i128 %s1.coerce, i128 %s2.coerce) |
| 330 | // CHECK: %s1 = alloca %struct.s39, align 16 |
| 331 | // CHECK: %s2 = alloca %struct.s39, align 16 |
| 332 | // CHECK: store i128 %s1.coerce, i128* %{{.*}}, align 1 |
| 333 | // CHECK: store i128 %s2.coerce, i128* %{{.*}}, align 1 |
| 334 | // CHECK: getelementptr inbounds %struct.s39* %s1, i32 0, i32 0 |
| 335 | // CHECK: getelementptr inbounds %struct.s39* %s2, i32 0, i32 0 |
| 336 | // CHECK: getelementptr inbounds %struct.s39* %s1, i32 0, i32 1 |
| 337 | // CHECK: getelementptr inbounds %struct.s39* %s2, i32 0, i32 1 |
| 338 | return s1.i + s2.i + i + s1.s + s2.s; |
| 339 | } |
| 340 | s39_with_align g39; |
| 341 | s39_with_align g39_2; |
| 342 | int caller39() { |
| 343 | // CHECK: define i32 @caller39() |
| 344 | // CHECK: %[[a:.*]] = load i128* bitcast (%struct.s39* @g39 to i128*), align 1 |
| 345 | // CHECK: %[[b:.*]] = load i128* bitcast (%struct.s39* @g39_2 to i128*), align 1 |
| 346 | // CHECK: call i32 @f39(i32 3, i128 %[[a]], i128 %[[b]]) |
| 347 | return f39(3, g39, g39_2); |
| 348 | } |
| 349 | // passing aligned structs on stack |
| 350 | __attribute__ ((noinline)) |
| 351 | int f39_stack(int i, int i2, int i3, int i4, int i5, int i6, int i7, int i8, |
| 352 | int i9, s39_with_align s1, s39_with_align s2) { |
| 353 | // CHECK: define i32 @f39_stack(i32 %i, i32 %i2, i32 %i3, i32 %i4, i32 %i5, i32 %i6, i32 %i7, i32 %i8, i32 %i9, i128 %s1.coerce, i128 %s2.coerce) |
| 354 | // CHECK: %s1 = alloca %struct.s39, align 16 |
| 355 | // CHECK: %s2 = alloca %struct.s39, align 16 |
| 356 | // CHECK: store i128 %s1.coerce, i128* %{{.*}}, align 1 |
| 357 | // CHECK: store i128 %s2.coerce, i128* %{{.*}}, align 1 |
| 358 | // CHECK: getelementptr inbounds %struct.s39* %s1, i32 0, i32 0 |
| 359 | // CHECK: getelementptr inbounds %struct.s39* %s2, i32 0, i32 0 |
| 360 | // CHECK: getelementptr inbounds %struct.s39* %s1, i32 0, i32 1 |
| 361 | // CHECK: getelementptr inbounds %struct.s39* %s2, i32 0, i32 1 |
| 362 | return s1.i + s2.i + i + i2 + i3 + i4 + i5 + i6 + i7 + i8 + i9 + s1.s + s2.s; |
| 363 | } |
| 364 | int caller39_stack() { |
| 365 | // CHECK: define i32 @caller39_stack() |
| 366 | // CHECK: %[[a:.*]] = load i128* bitcast (%struct.s39* @g39 to i128*), align 1 |
| 367 | // CHECK: %[[b:.*]] = load i128* bitcast (%struct.s39* @g39_2 to i128*), align 1 |
| 368 | // CHECK: call i32 @f39_stack(i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i128 %[[a]], i128 %[[b]]) |
| 369 | return f39_stack(1, 2, 3, 4, 5, 6, 7, 8, 9, g39, g39_2); |
| 370 | } |
| 371 | |
| 372 | // structs with size <= 16 bytes, without alignment attribute |
| 373 | struct s40 |
| 374 | { |
| 375 | int i; |
| 376 | short s; |
| 377 | int i2; |
| 378 | short s2; |
| 379 | }; |
| 380 | typedef struct s40 s40_no_align; |
| 381 | // passing structs in registers |
| 382 | __attribute__ ((noinline)) |
| 383 | int f40(int i, s40_no_align s1, s40_no_align s2) { |
| 384 | // CHECK: define i32 @f40(i32 %i, [2 x i64] %s1.coerce, [2 x i64] %s2.coerce) |
| 385 | // CHECK: %s1 = alloca %struct.s40, align 8 |
| 386 | // CHECK: %s2 = alloca %struct.s40, align 8 |
| 387 | // CHECK: store [2 x i64] %s1.coerce, [2 x i64]* %{{.*}}, align 1 |
| 388 | // CHECK: store [2 x i64] %s2.coerce, [2 x i64]* %{{.*}}, align 1 |
| 389 | // CHECK: getelementptr inbounds %struct.s40* %s1, i32 0, i32 0 |
| 390 | // CHECK: getelementptr inbounds %struct.s40* %s2, i32 0, i32 0 |
| 391 | // CHECK: getelementptr inbounds %struct.s40* %s1, i32 0, i32 1 |
| 392 | // CHECK: getelementptr inbounds %struct.s40* %s2, i32 0, i32 1 |
| 393 | return s1.i + s2.i + i + s1.s + s2.s; |
| 394 | } |
| 395 | s40_no_align g40; |
| 396 | s40_no_align g40_2; |
| 397 | int caller40() { |
| 398 | // CHECK: define i32 @caller40() |
| 399 | // CHECK: %[[a:.*]] = load [2 x i64]* bitcast (%struct.s40* @g40 to [2 x i64]*), align 1 |
| 400 | // CHECK: %[[b:.*]] = load [2 x i64]* bitcast (%struct.s40* @g40_2 to [2 x i64]*), align 1 |
| 401 | // CHECK: call i32 @f40(i32 3, [2 x i64] %[[a]], [2 x i64] %[[b]]) |
| 402 | return f40(3, g40, g40_2); |
| 403 | } |
| 404 | // passing structs on stack |
| 405 | __attribute__ ((noinline)) |
| 406 | int f40_stack(int i, int i2, int i3, int i4, int i5, int i6, int i7, int i8, |
| 407 | int i9, s40_no_align s1, s40_no_align s2) { |
| 408 | // CHECK: define i32 @f40_stack(i32 %i, i32 %i2, i32 %i3, i32 %i4, i32 %i5, i32 %i6, i32 %i7, i32 %i8, i32 %i9, [2 x i64] %s1.coerce, [2 x i64] %s2.coerce) |
| 409 | // CHECK: %s1 = alloca %struct.s40, align 8 |
| 410 | // CHECK: %s2 = alloca %struct.s40, align 8 |
| 411 | // CHECK: store [2 x i64] %s1.coerce, [2 x i64]* %{{.*}}, align 1 |
| 412 | // CHECK: store [2 x i64] %s2.coerce, [2 x i64]* %{{.*}}, align 1 |
| 413 | // CHECK: getelementptr inbounds %struct.s40* %s1, i32 0, i32 0 |
| 414 | // CHECK: getelementptr inbounds %struct.s40* %s2, i32 0, i32 0 |
| 415 | // CHECK: getelementptr inbounds %struct.s40* %s1, i32 0, i32 1 |
| 416 | // CHECK: getelementptr inbounds %struct.s40* %s2, i32 0, i32 1 |
| 417 | return s1.i + s2.i + i + i2 + i3 + i4 + i5 + i6 + i7 + i8 + i9 + s1.s + s2.s; |
| 418 | } |
| 419 | int caller40_stack() { |
| 420 | // CHECK: define i32 @caller40_stack() |
| 421 | // CHECK: %[[a:.*]] = load [2 x i64]* bitcast (%struct.s40* @g40 to [2 x i64]*), align 1 |
| 422 | // CHECK: %[[b:.*]] = load [2 x i64]* bitcast (%struct.s40* @g40_2 to [2 x i64]*), align 1 |
| 423 | // CHECK: call i32 @f40_stack(i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, [2 x i64] %[[a]], [2 x i64] %[[b]]) |
| 424 | return f40_stack(1, 2, 3, 4, 5, 6, 7, 8, 9, g40, g40_2); |
| 425 | } |
| 426 | |
| 427 | // structs with size <= 16 bytes, with alignment attribute |
| 428 | struct s41 |
| 429 | { |
| 430 | int i; |
| 431 | short s; |
| 432 | int i2; |
| 433 | short s2; |
| 434 | } __attribute__((aligned(16))); |
| 435 | typedef struct s41 s41_with_align; |
| 436 | // passing aligned structs in registers |
| 437 | __attribute__ ((noinline)) |
| 438 | int f41(int i, s41_with_align s1, s41_with_align s2) { |
| 439 | // CHECK: define i32 @f41(i32 %i, i128 %s1.coerce, i128 %s2.coerce) |
| 440 | // CHECK: %s1 = alloca %struct.s41, align 16 |
| 441 | // CHECK: %s2 = alloca %struct.s41, align 16 |
| 442 | // CHECK: store i128 %s1.coerce, i128* %{{.*}}, align 1 |
| 443 | // CHECK: store i128 %s2.coerce, i128* %{{.*}}, align 1 |
| 444 | // CHECK: getelementptr inbounds %struct.s41* %s1, i32 0, i32 0 |
| 445 | // CHECK: getelementptr inbounds %struct.s41* %s2, i32 0, i32 0 |
| 446 | // CHECK: getelementptr inbounds %struct.s41* %s1, i32 0, i32 1 |
| 447 | // CHECK: getelementptr inbounds %struct.s41* %s2, i32 0, i32 1 |
| 448 | return s1.i + s2.i + i + s1.s + s2.s; |
| 449 | } |
| 450 | s41_with_align g41; |
| 451 | s41_with_align g41_2; |
| 452 | int caller41() { |
| 453 | // CHECK: define i32 @caller41() |
| 454 | // CHECK: %[[a:.*]] = load i128* bitcast (%struct.s41* @g41 to i128*), align 1 |
| 455 | // CHECK: %[[b:.*]] = load i128* bitcast (%struct.s41* @g41_2 to i128*), align 1 |
| 456 | // CHECK: call i32 @f41(i32 3, i128 %[[a]], i128 %[[b]]) |
| 457 | return f41(3, g41, g41_2); |
| 458 | } |
| 459 | // passing aligned structs on stack |
| 460 | __attribute__ ((noinline)) |
| 461 | int f41_stack(int i, int i2, int i3, int i4, int i5, int i6, int i7, int i8, |
| 462 | int i9, s41_with_align s1, s41_with_align s2) { |
| 463 | // CHECK: define i32 @f41_stack(i32 %i, i32 %i2, i32 %i3, i32 %i4, i32 %i5, i32 %i6, i32 %i7, i32 %i8, i32 %i9, i128 %s1.coerce, i128 %s2.coerce) |
| 464 | // CHECK: %s1 = alloca %struct.s41, align 16 |
| 465 | // CHECK: %s2 = alloca %struct.s41, align 16 |
| 466 | // CHECK: store i128 %s1.coerce, i128* %{{.*}}, align 1 |
| 467 | // CHECK: store i128 %s2.coerce, i128* %{{.*}}, align 1 |
| 468 | // CHECK: getelementptr inbounds %struct.s41* %s1, i32 0, i32 0 |
| 469 | // CHECK: getelementptr inbounds %struct.s41* %s2, i32 0, i32 0 |
| 470 | // CHECK: getelementptr inbounds %struct.s41* %s1, i32 0, i32 1 |
| 471 | // CHECK: getelementptr inbounds %struct.s41* %s2, i32 0, i32 1 |
| 472 | return s1.i + s2.i + i + i2 + i3 + i4 + i5 + i6 + i7 + i8 + i9 + s1.s + s2.s; |
| 473 | } |
| 474 | int caller41_stack() { |
| 475 | // CHECK: define i32 @caller41_stack() |
| 476 | // CHECK: %[[a:.*]] = load i128* bitcast (%struct.s41* @g41 to i128*), align 1 |
| 477 | // CHECK: %[[b:.*]] = load i128* bitcast (%struct.s41* @g41_2 to i128*), align 1 |
| 478 | // CHECK: call i32 @f41_stack(i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i128 %[[a]], i128 %[[b]]) |
| 479 | return f41_stack(1, 2, 3, 4, 5, 6, 7, 8, 9, g41, g41_2); |
| 480 | } |
| 481 | |
| 482 | // structs with size > 16 bytes, without alignment attribute |
| 483 | struct s42 |
| 484 | { |
| 485 | int i; |
| 486 | short s; |
| 487 | int i2; |
| 488 | short s2; |
| 489 | int i3; |
| 490 | short s3; |
| 491 | }; |
| 492 | typedef struct s42 s42_no_align; |
| 493 | // passing structs in registers |
| 494 | __attribute__ ((noinline)) |
| 495 | int f42(int i, s42_no_align s1, s42_no_align s2) { |
| 496 | // CHECK: define i32 @f42(i32 %i, %struct.s42* %s1, %struct.s42* %s2) |
| 497 | // CHECK: getelementptr inbounds %struct.s42* %s1, i32 0, i32 0 |
| 498 | // CHECK: getelementptr inbounds %struct.s42* %s2, i32 0, i32 0 |
| 499 | // CHECK: getelementptr inbounds %struct.s42* %s1, i32 0, i32 1 |
| 500 | // CHECK: getelementptr inbounds %struct.s42* %s2, i32 0, i32 1 |
| 501 | return s1.i + s2.i + i + s1.s + s2.s; |
| 502 | } |
| 503 | s42_no_align g42; |
| 504 | s42_no_align g42_2; |
| 505 | int caller42() { |
| 506 | // CHECK: define i32 @caller42() |
| 507 | // CHECK: %[[a:.*]] = alloca %struct.s42, align 4 |
| 508 | // CHECK: %[[b:.*]] = alloca %struct.s42, align 4 |
| 509 | // CHECK: %[[c:.*]] = bitcast %struct.s42* %[[a]] to i8* |
| 510 | // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64 |
| 511 | // CHECK: %[[d:.*]] = bitcast %struct.s42* %[[b]] to i8* |
| 512 | // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64 |
| 513 | // CHECK: call i32 @f42(i32 3, %struct.s42* %[[a]], %struct.s42* %[[b]]) |
| 514 | return f42(3, g42, g42_2); |
| 515 | } |
| 516 | // passing structs on stack |
| 517 | __attribute__ ((noinline)) |
| 518 | int f42_stack(int i, int i2, int i3, int i4, int i5, int i6, int i7, int i8, |
| 519 | int i9, s42_no_align s1, s42_no_align s2) { |
| 520 | // CHECK: define i32 @f42_stack(i32 %i, i32 %i2, i32 %i3, i32 %i4, i32 %i5, i32 %i6, i32 %i7, i32 %i8, i32 %i9, %struct.s42* %s1, %struct.s42* %s2) |
| 521 | // CHECK: getelementptr inbounds %struct.s42* %s1, i32 0, i32 0 |
| 522 | // CHECK: getelementptr inbounds %struct.s42* %s2, i32 0, i32 0 |
| 523 | // CHECK: getelementptr inbounds %struct.s42* %s1, i32 0, i32 1 |
| 524 | // CHECK: getelementptr inbounds %struct.s42* %s2, i32 0, i32 1 |
| 525 | return s1.i + s2.i + i + i2 + i3 + i4 + i5 + i6 + i7 + i8 + i9 + s1.s + s2.s; |
| 526 | } |
| 527 | int caller42_stack() { |
| 528 | // CHECK: define i32 @caller42_stack() |
| 529 | // CHECK: %[[a:.*]] = alloca %struct.s42, align 4 |
| 530 | // CHECK: %[[b:.*]] = alloca %struct.s42, align 4 |
| 531 | // CHECK: %[[c:.*]] = bitcast %struct.s42* %[[a]] to i8* |
| 532 | // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64 |
| 533 | // CHECK: %[[d:.*]] = bitcast %struct.s42* %[[b]] to i8* |
| 534 | // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64 |
| 535 | // CHECK: call i32 @f42_stack(i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, %struct.s42* %[[a]], %struct.s42* %[[b]]) |
| 536 | return f42_stack(1, 2, 3, 4, 5, 6, 7, 8, 9, g42, g42_2); |
| 537 | } |
| 538 | |
| 539 | // structs with size > 16 bytes, with alignment attribute |
| 540 | struct s43 |
| 541 | { |
| 542 | int i; |
| 543 | short s; |
| 544 | int i2; |
| 545 | short s2; |
| 546 | int i3; |
| 547 | short s3; |
| 548 | } __attribute__((aligned(16))); |
| 549 | typedef struct s43 s43_with_align; |
| 550 | // passing aligned structs in registers |
| 551 | __attribute__ ((noinline)) |
| 552 | int f43(int i, s43_with_align s1, s43_with_align s2) { |
| 553 | // CHECK: define i32 @f43(i32 %i, %struct.s43* %s1, %struct.s43* %s2) |
| 554 | // CHECK: getelementptr inbounds %struct.s43* %s1, i32 0, i32 0 |
| 555 | // CHECK: getelementptr inbounds %struct.s43* %s2, i32 0, i32 0 |
| 556 | // CHECK: getelementptr inbounds %struct.s43* %s1, i32 0, i32 1 |
| 557 | // CHECK: getelementptr inbounds %struct.s43* %s2, i32 0, i32 1 |
| 558 | return s1.i + s2.i + i + s1.s + s2.s; |
| 559 | } |
| 560 | s43_with_align g43; |
| 561 | s43_with_align g43_2; |
| 562 | int caller43() { |
| 563 | // CHECK: define i32 @caller43() |
| 564 | // CHECK: %[[a:.*]] = alloca %struct.s43, align 16 |
| 565 | // CHECK: %[[b:.*]] = alloca %struct.s43, align 16 |
| 566 | // CHECK: %[[c:.*]] = bitcast %struct.s43* %[[a]] to i8* |
| 567 | // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64 |
| 568 | // CHECK: %[[d:.*]] = bitcast %struct.s43* %[[b]] to i8* |
| 569 | // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64 |
| 570 | // CHECK: call i32 @f43(i32 3, %struct.s43* %[[a]], %struct.s43* %[[b]]) |
| 571 | return f43(3, g43, g43_2); |
| 572 | } |
| 573 | // passing aligned structs on stack |
| 574 | __attribute__ ((noinline)) |
| 575 | int f43_stack(int i, int i2, int i3, int i4, int i5, int i6, int i7, int i8, |
| 576 | int i9, s43_with_align s1, s43_with_align s2) { |
| 577 | // CHECK: define i32 @f43_stack(i32 %i, i32 %i2, i32 %i3, i32 %i4, i32 %i5, i32 %i6, i32 %i7, i32 %i8, i32 %i9, %struct.s43* %s1, %struct.s43* %s2) |
| 578 | // CHECK: getelementptr inbounds %struct.s43* %s1, i32 0, i32 0 |
| 579 | // CHECK: getelementptr inbounds %struct.s43* %s2, i32 0, i32 0 |
| 580 | // CHECK: getelementptr inbounds %struct.s43* %s1, i32 0, i32 1 |
| 581 | // CHECK: getelementptr inbounds %struct.s43* %s2, i32 0, i32 1 |
| 582 | return s1.i + s2.i + i + i2 + i3 + i4 + i5 + i6 + i7 + i8 + i9 + s1.s + s2.s; |
| 583 | } |
| 584 | int caller43_stack() { |
| 585 | // CHECK: define i32 @caller43_stack() |
| 586 | // CHECK: %[[a:.*]] = alloca %struct.s43, align 16 |
| 587 | // CHECK: %[[b:.*]] = alloca %struct.s43, align 16 |
| 588 | // CHECK: %[[c:.*]] = bitcast %struct.s43* %[[a]] to i8* |
| 589 | // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64 |
| 590 | // CHECK: %[[d:.*]] = bitcast %struct.s43* %[[b]] to i8* |
| 591 | // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64 |
| 592 | // CHECK: call i32 @f43_stack(i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, %struct.s43* %[[a]], %struct.s43* %[[b]]) |
| 593 | return f43_stack(1, 2, 3, 4, 5, 6, 7, 8, 9, g43, g43_2); |
| 594 | } |
| 595 | |
| 596 | // rdar://13668927 |
| 597 | // We should not split argument s1 between registers and stack. |
| 598 | __attribute__ ((noinline)) |
| 599 | int f40_split(int i, int i2, int i3, int i4, int i5, int i6, int i7, |
| 600 | s40_no_align s1, s40_no_align s2) { |
| 601 | // CHECK: define i32 @f40_split(i32 %i, i32 %i2, i32 %i3, i32 %i4, i32 %i5, i32 %i6, i32 %i7, [1 x i32], [2 x i64] %s1.coerce, [2 x i64] %s2.coerce) |
| 602 | return s1.i + s2.i + i + i2 + i3 + i4 + i5 + i6 + i7 + s1.s + s2.s; |
| 603 | } |
| 604 | int caller40_split() { |
| 605 | // CHECK: define i32 @caller40_split() |
| 606 | // CHECK: call i32 @f40_split(i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, [1 x i32] undef, [2 x i64] %{{.*}} [2 x i64] %{{.*}}) |
| 607 | return f40_split(1, 2, 3, 4, 5, 6, 7, g40, g40_2); |
| 608 | } |
| 609 | |
| 610 | __attribute__ ((noinline)) |
| 611 | int f41_split(int i, int i2, int i3, int i4, int i5, int i6, int i7, |
| 612 | s41_with_align s1, s41_with_align s2) { |
| 613 | // CHECK: define i32 @f41_split(i32 %i, i32 %i2, i32 %i3, i32 %i4, i32 %i5, i32 %i6, i32 %i7, [1 x i32], i128 %s1.coerce, i128 %s2.coerce) |
| 614 | return s1.i + s2.i + i + i2 + i3 + i4 + i5 + i6 + i7 + s1.s + s2.s; |
| 615 | } |
| 616 | int caller41_split() { |
| 617 | // CHECK: define i32 @caller41_split() |
| 618 | // CHECK: call i32 @f41_split(i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, [1 x i32] undef, i128 %{{.*}}, i128 %{{.*}}) |
| 619 | return f41_split(1, 2, 3, 4, 5, 6, 7, g41, g41_2); |
| 620 | } |
| 621 | |
| 622 | // Handle homogeneous aggregates properly in variadic functions. |
| 623 | struct HFA { |
| 624 | float a, b, c, d; |
| 625 | }; |
| 626 | |
| 627 | float test_hfa(int n, ...) { |
| 628 | // CHECK-LABEL: define float @test_hfa(i32 %n, ...) |
| 629 | // CHECK: [[THELIST:%.*]] = alloca i8* |
| 630 | // CHECK: [[CURLIST:%.*]] = load i8** [[THELIST]] |
| 631 | |
| 632 | // HFA is not indirect, so occupies its full 16 bytes on the stack. |
| 633 | // CHECK: [[NEXTLIST:%.*]] = getelementptr i8* [[CURLIST]], i32 16 |
| 634 | // CHECK: store i8* [[NEXTLIST]], i8** [[THELIST]] |
| 635 | |
| 636 | // CHECK: bitcast i8* [[CURLIST]] to %struct.HFA* |
| 637 | __builtin_va_list thelist; |
| 638 | __builtin_va_start(thelist, n); |
| 639 | struct HFA h = __builtin_va_arg(thelist, struct HFA); |
| 640 | return h.d; |
| 641 | } |
| 642 | |
Bob Wilson | 373af73 | 2014-04-21 01:23:39 +0000 | [diff] [blame^] | 643 | float test_hfa_call(struct HFA *a) { |
| 644 | // CHECK-LABEL: define float @test_hfa_call(%struct.HFA* %a) |
| 645 | // CHECK: call float (i32, ...)* @test_hfa(i32 1, [2 x double] {{.*}}) |
| 646 | test_hfa(1, *a); |
| 647 | } |
| 648 | |
Tim Northover | a2ee433 | 2014-03-29 15:09:45 +0000 | [diff] [blame] | 649 | struct TooBigHFA { |
| 650 | float a, b, c, d, e; |
| 651 | }; |
| 652 | |
| 653 | float test_toobig_hfa(int n, ...) { |
| 654 | // CHECK-LABEL: define float @test_toobig_hfa(i32 %n, ...) |
| 655 | // CHECK: [[THELIST:%.*]] = alloca i8* |
| 656 | // CHECK: [[CURLIST:%.*]] = load i8** [[THELIST]] |
| 657 | |
| 658 | // TooBigHFA is not actually an HFA, so gets passed indirectly. Only 8 bytes |
| 659 | // of stack consumed. |
| 660 | // CHECK: [[NEXTLIST:%.*]] = getelementptr i8* [[CURLIST]], i32 8 |
| 661 | // CHECK: store i8* [[NEXTLIST]], i8** [[THELIST]] |
| 662 | |
| 663 | // CHECK: [[HFAPTRPTR:%.*]] = bitcast i8* [[CURLIST]] to i8** |
| 664 | // CHECK: [[HFAPTR:%.*]] = load i8** [[HFAPTRPTR]] |
| 665 | // CHECK: bitcast i8* [[HFAPTR]] to %struct.TooBigHFA* |
| 666 | __builtin_va_list thelist; |
| 667 | __builtin_va_start(thelist, n); |
| 668 | struct TooBigHFA h = __builtin_va_arg(thelist, struct TooBigHFA); |
| 669 | return h.d; |
| 670 | } |
| 671 | |
| 672 | struct HVA { |
| 673 | int32x4_t a, b; |
| 674 | }; |
| 675 | |
| 676 | int32x4_t test_hva(int n, ...) { |
| 677 | // CHECK-LABEL: define <4 x i32> @test_hva(i32 %n, ...) |
| 678 | // CHECK: [[THELIST:%.*]] = alloca i8* |
| 679 | // CHECK: [[CURLIST:%.*]] = load i8** [[THELIST]] |
| 680 | |
| 681 | // HVA is not indirect, so occupies its full 16 bytes on the stack. but it |
| 682 | // must be properly aligned. |
| 683 | // CHECK: [[ALIGN0:%.*]] = getelementptr i8* [[CURLIST]], i32 15 |
| 684 | // CHECK: [[ALIGN1:%.*]] = ptrtoint i8* [[ALIGN0]] to i64 |
| 685 | // CHECK: [[ALIGN2:%.*]] = and i64 [[ALIGN1]], -16 |
| 686 | // CHECK: [[ALIGNED_LIST:%.*]] = inttoptr i64 [[ALIGN2]] to i8* |
| 687 | |
| 688 | // CHECK: [[NEXTLIST:%.*]] = getelementptr i8* [[ALIGNED_LIST]], i32 32 |
| 689 | // CHECK: store i8* [[NEXTLIST]], i8** [[THELIST]] |
| 690 | |
| 691 | // CHECK: bitcast i8* [[ALIGNED_LIST]] to %struct.HVA* |
| 692 | __builtin_va_list thelist; |
| 693 | __builtin_va_start(thelist, n); |
| 694 | struct HVA h = __builtin_va_arg(thelist, struct HVA); |
| 695 | return h.b; |
| 696 | } |
| 697 | |
| 698 | struct TooBigHVA { |
| 699 | int32x4_t a, b, c, d, e; |
| 700 | }; |
| 701 | |
| 702 | int32x4_t test_toobig_hva(int n, ...) { |
| 703 | // CHECK-LABEL: define <4 x i32> @test_toobig_hva(i32 %n, ...) |
| 704 | // CHECK: [[THELIST:%.*]] = alloca i8* |
| 705 | // CHECK: [[CURLIST:%.*]] = load i8** [[THELIST]] |
| 706 | |
| 707 | // TooBigHVA is not actually an HVA, so gets passed indirectly. Only 8 bytes |
| 708 | // of stack consumed. |
| 709 | // CHECK: [[NEXTLIST:%.*]] = getelementptr i8* [[CURLIST]], i32 8 |
| 710 | // CHECK: store i8* [[NEXTLIST]], i8** [[THELIST]] |
| 711 | |
| 712 | // CHECK: [[HVAPTRPTR:%.*]] = bitcast i8* [[CURLIST]] to i8** |
| 713 | // CHECK: [[HVAPTR:%.*]] = load i8** [[HVAPTRPTR]] |
| 714 | // CHECK: bitcast i8* [[HVAPTR]] to %struct.TooBigHVA* |
| 715 | __builtin_va_list thelist; |
| 716 | __builtin_va_start(thelist, n); |
| 717 | struct TooBigHVA h = __builtin_va_arg(thelist, struct TooBigHVA); |
| 718 | return h.d; |
| 719 | } |