Blame - clang/test/CodeGen/riscv32-abi.c - toolchain/llvm-project

blob: e32a8c258f1f681e5922fc1c9e903637a46d7ae6 [file] [log] [blame]

Alex Bradbury	8cbdd48	2018-01-15 17:54:52 +0000	[diff] [blame^]	1	// RUN: %clang_cc1 -triple riscv32 -emit-llvm %s -o - \| FileCheck %s
				2
				3	#include <stddef.h>
				4	#include <stdint.h>
				5
				6	// CHECK-LABEL: define void @f_void()
				7	void f_void(void) {}
				8
				9	// Scalar arguments and return values smaller than the word size are extended
				10	// according to the sign of their type, up to 32 bits
				11
				12	// CHECK-LABEL: define zeroext i1 @f_scalar_0(i1 zeroext %x)
				13	_Bool f_scalar_0(_Bool x) { return x; }
				14
				15	// CHECK-LABEL: define signext i8 @f_scalar_1(i8 signext %x)
				16	int8_t f_scalar_1(int8_t x) { return x; }
				17
				18	// CHECK-LABEL: define zeroext i8 @f_scalar_2(i8 zeroext %x)
				19	uint8_t f_scalar_2(uint8_t x) { return x; }
				20
				21	// CHECK-LABEL: define i32 @f_scalar_3(i32 %x)
				22	int32_t f_scalar_3(int32_t x) { return x; }
				23
				24	// CHECK-LABEL: define i64 @f_scalar_4(i64 %x)
				25	int64_t f_scalar_4(int64_t x) { return x; }
				26
				27	// CHECK-LABEL: define float @f_fp_scalar_1(float %x)
				28	float f_fp_scalar_1(float x) { return x; }
				29
				30	// CHECK-LABEL: define double @f_fp_scalar_2(double %x)
				31	double f_fp_scalar_2(double x) { return x; }
				32
				33	// Scalars larger than 2*xlen are passed/returned indirect. However, the
				34	// RISC-V LLVM backend can handle this fine, so the function doesn't need to
				35	// be modified.
				36
				37	// CHECK-LABEL: define fp128 @f_fp_scalar_3(fp128 %x)
				38	long double f_fp_scalar_3(long double x) { return x; }
				39
				40	// Empty structs or unions are ignored.
				41
				42	struct empty_s {};
				43
				44	// CHECK-LABEL: define void @f_agg_empty_struct()
				45	struct empty_s f_agg_empty_struct(struct empty_s x) {
				46	return x;
				47	}
				48
				49	union empty_u {};
				50
				51	// CHECK-LABEL: define void @f_agg_empty_union()
				52	union empty_u f_agg_empty_union(union empty_u x) {
				53	return x;
				54	}
				55
				56	// Aggregates <= 2*xlen may be passed in registers, so will be coerced to
				57	// integer arguments. The rules for return are the same.
				58
				59	struct tiny {
				60	uint8_t a, b, c, d;
				61	};
				62
				63	// CHECK-LABEL: define void @f_agg_tiny(i32 %x.coerce)
				64	void f_agg_tiny(struct tiny x) {
				65	x.a += x.b;
				66	x.c += x.d;
				67	}
				68
				69	// CHECK-LABEL: define i32 @f_agg_tiny_ret()
				70	struct tiny f_agg_tiny_ret() {
				71	return (struct tiny){1, 2, 3, 4};
				72	}
				73
				74	typedef uint8_t v4i8 __attribute__((vector_size(4)));
				75	typedef int32_t v1i32 __attribute__((vector_size(4)));
				76
				77	// CHECK-LABEL: define void @f_vec_tiny_v4i8(i32 %x.coerce)
				78	void f_vec_tiny_v4i8(v4i8 x) {
				79	x[0] = x[1];
				80	x[2] = x[3];
				81	}
				82
				83	// CHECK-LABEL: define i32 @f_vec_tiny_v4i8_ret()
				84	v4i8 f_vec_tiny_v4i8_ret() {
				85	return (v4i8){1, 2, 3, 4};
				86	}
				87
				88	// CHECK-LABEL: define void @f_vec_tiny_v1i32(i32 %x.coerce)
				89	void f_vec_tiny_v1i32(v1i32 x) {
				90	x[0] = 114;
				91	}
				92
				93	// CHECK-LABEL: define i32 @f_vec_tiny_v1i32_ret()
				94	v1i32 f_vec_tiny_v1i32_ret() {
				95	return (v1i32){1};
				96	}
				97
				98	struct small {
				99	int32_t a, *b;
				100	};
				101
				102	// CHECK-LABEL: define void @f_agg_small([2 x i32] %x.coerce)
				103	void f_agg_small(struct small x) {
				104	x.a += *x.b;
				105	x.b = &x.a;
				106	}
				107
				108	// CHECK-LABEL: define [2 x i32] @f_agg_small_ret()
				109	struct small f_agg_small_ret() {
				110	return (struct small){1, 0};
				111	}
				112
				113	typedef uint8_t v8i8 __attribute__((vector_size(8)));
				114	typedef int64_t v1i64 __attribute__((vector_size(8)));
				115
				116	// CHECK-LABEL: define void @f_vec_small_v8i8(i64 %x.coerce)
				117	void f_vec_small_v8i8(v8i8 x) {
				118	x[0] = x[7];
				119	}
				120
				121	// CHECK-LABEL: define i64 @f_vec_small_v8i8_ret()
				122	v8i8 f_vec_small_v8i8_ret() {
				123	return (v8i8){1, 2, 3, 4, 5, 6, 7, 8};
				124	}
				125
				126	// CHECK-LABEL: define void @f_vec_small_v1i64(i64 %x.coerce)
				127	void f_vec_small_v1i64(v1i64 x) {
				128	x[0] = 114;
				129	}
				130
				131	// CHECK-LABEL: define i64 @f_vec_small_v1i64_ret()
				132	v1i64 f_vec_small_v1i64_ret() {
				133	return (v1i64){1};
				134	}
				135
				136	// Aggregates of 2xlen size and 2xlen alignment should be coerced to a
				137	// single 2*xlen-sized argument, to ensure that alignment can be maintained if
				138	// passed on the stack.
				139
				140	struct small_aligned {
				141	int64_t a;
				142	};
				143
				144	// CHECK-LABEL: define void @f_agg_small_aligned(i64 %x.coerce)
				145	void f_agg_small_aligned(struct small_aligned x) {
				146	x.a += x.a;
				147	}
				148
				149	// CHECK-LABEL: define i64 @f_agg_small_aligned_ret(i64 %x.coerce)
				150	struct small_aligned f_agg_small_aligned_ret(struct small_aligned x) {
				151	return (struct small_aligned){10};
				152	}
				153
				154	// Aggregates greater > 2*xlen will be passed and returned indirectly
				155	struct large {
				156	int32_t a, b, c, d;
				157	};
				158
				159	// CHECK-LABEL: define void @f_agg_large(%struct.large* %x)
				160	void f_agg_large(struct large x) {
				161	x.a = x.b + x.c + x.d;
				162	}
				163
				164	// The address where the struct should be written to will be the first
				165	// argument
				166	// CHECK-LABEL: define void @f_agg_large_ret(%struct.large* noalias sret %agg.result, i32 %i, i8 signext %j)
				167	struct large f_agg_large_ret(int32_t i, int8_t j) {
				168	return (struct large){1, 2, 3, 4};
				169	}
				170
				171	typedef unsigned char v16i8 __attribute__((vector_size(16)));
				172
				173	// CHECK-LABEL: define void @f_vec_large_v16i8(<16 x i8>*)
				174	void f_vec_large_v16i8(v16i8 x) {
				175	x[0] = x[7];
				176	}
				177
				178	// CHECK-LABEL: define void @f_vec_large_v16i8_ret(<16 x i8>* noalias sret %agg.result)
				179	v16i8 f_vec_large_v16i8_ret() {
				180	return (v16i8){1, 2, 3, 4, 5, 6, 7, 8};
				181	}
				182
				183	// Scalars passed on the stack should have signext/zeroext attributes (they
				184	// are anyext).
				185
				186	// CHECK-LABEL: define i32 @f_scalar_stack_1(i32 %a.coerce, [2 x i32] %b.coerce, i64 %c.coerce, %struct.large* %d, i8 zeroext %e, i8 signext %f, i8 %g, i8 %h)
				187	int f_scalar_stack_1(struct tiny a, struct small b, struct small_aligned c,
				188	struct large d, uint8_t e, int8_t f, uint8_t g, int8_t h) {
				189	return g + h;
				190	}
				191
				192	// CHECK-LABEL: define i32 @f_scalar_stack_2(i32 %a, i64 %b, float %c, double %d, fp128 %e, i8 zeroext %f, i8 %g, i8 %h)
				193	int f_scalar_stack_2(int32_t a, int64_t b, float c, double d, long double e,
				194	uint8_t f, int8_t g, uint8_t h) {
				195	return g + h;
				196	}
				197
				198	// Ensure that scalars passed on the stack are still determined correctly in
				199	// the presence of large return values that consume a register due to the need
				200	// to pass a pointer.
				201
				202	// CHECK-LABEL: define void @f_scalar_stack_3(%struct.large* noalias sret %agg.result, i32 %a, i64 %b, double %c, fp128 %d, i8 zeroext %e, i8 %f, i8 %g)
				203	struct large f_scalar_stack_3(int32_t a, int64_t b, double c, long double d,
				204	uint8_t e, int8_t f, uint8_t g) {
				205	return (struct large){a, e, f, g};
				206	}
				207
				208	// CHECK-LABEL: define fp128 @f_scalar_stack_4(i32 %a, i64 %b, double %c, fp128 %d, i8 zeroext %e, i8 %f, i8 %g)
				209	long double f_scalar_stack_4(int32_t a, int64_t b, double c, long double d,
				210	uint8_t e, int8_t f, uint8_t g) {
				211	return d;
				212	}
				213
				214	// Aggregates and >=XLen scalars passed on the stack should be lowered just as
				215	// they would be if passed via registers.
				216
				217	// CHECK-LABEL: define void @f_scalar_stack_5(double %a, i64 %b, double %c, i64 %d, i32 %e, i64 %f, float %g, double %h, fp128 %i)
				218	void f_scalar_stack_5(double a, int64_t b, double c, int64_t d, int e,
				219	int64_t f, float g, double h, long double i) {}
				220
				221	// CHECK-LABEL: define void @f_agg_stack(double %a, i64 %b, double %c, i64 %d, i32 %e.coerce, [2 x i32] %f.coerce, i64 %g.coerce, %struct.large* %h)
				222	void f_agg_stack(double a, int64_t b, double c, int64_t d, struct tiny e,
				223	struct small f, struct small_aligned g, struct large h) {}
				224
				225	// Ensure that ABI lowering happens as expected for vararg calls. For RV32
				226	// with the base integer calling convention there will be no observable
				227	// differences in the lowered IR for a call with varargs vs without.
				228
				229	int f_va_callee(int, ...);
				230
				231	// CHECK-LABEL: define void @f_va_caller()
				232	// CHECK: call i32 (i32, ...) @f_va_callee(i32 1, i32 2, i64 3, double 4.000000e+00, double 5.000000e+00, i32 {{%.}}, [2 x i32] {{%.}}, i64 {{%.}}, %struct.large {{%.*}})
				233	void f_va_caller() {
				234	f_va_callee(1, 2, 3LL, 4.0f, 5.0, (struct tiny){6, 7, 8, 9},
				235	(struct small){10, NULL}, (struct small_aligned){11},
				236	(struct large){12, 13, 14, 15});
				237	}
				238
				239	// CHECK-LABEL: define i32 @f_va_1(i8* %fmt, ...) {{.*}} {
				240	// CHECK: [[FMT_ADDR:%.]] = alloca i8, align 4
				241	// CHECK: [[VA:%.]] = alloca i8, align 4
				242	// CHECK: [[V:%.*]] = alloca i32, align 4
				243	// CHECK: store i8* %fmt, i8** [[FMT_ADDR]], align 4
				244	// CHECK: [[VA1:%.]] = bitcast i8* [[VA]] to i8*
				245	// CHECK: call void @llvm.va_start(i8* [[VA1]])
				246	// CHECK: [[ARGP_CUR:%.]] = load i8, i8** [[VA]], align 4
				247	// CHECK: [[ARGP_NEXT:%.]] = getelementptr inbounds i8, i8 [[ARGP_CUR]], i32 4
				248	// CHECK: store i8* [[ARGP_NEXT]], i8** [[VA]], align 4
				249	// CHECK: [[TMP0:%.]] = bitcast i8 [[ARGP_CUR]] to i32*
				250	// CHECK: [[TMP1:%.]] = load i32, i32 [[TMP0]], align 4
				251	// CHECK: store i32 [[TMP1]], i32* [[V]], align 4
				252	// CHECK: [[VA2:%.]] = bitcast i8* [[VA]] to i8*
				253	// CHECK: call void @llvm.va_end(i8* [[VA2]])
				254	// CHECK: [[TMP2:%.]] = load i32, i32 [[V]], align 4
				255	// CHECK: ret i32 [[TMP2]]
				256	// CHECK: }
				257	int f_va_1(char *fmt, ...) {
				258	__builtin_va_list va;
				259
				260	__builtin_va_start(va, fmt);
				261	int v = __builtin_va_arg(va, int);
				262	__builtin_va_end(va);
				263
				264	return v;
				265	}
				266
				267	// An "aligned" register pair (where the first register is even-numbered) is
				268	// used to pass varargs with 2x xlen alignment and 2x xlen size. Ensure the
				269	// correct offsets are used.
				270
				271	// CHECK-LABEL: @f_va_2(
				272	// CHECK-NEXT: entry:
				273	// CHECK-NEXT: [[FMT_ADDR:%.]] = alloca i8, align 4
				274	// CHECK-NEXT: [[VA:%.]] = alloca i8, align 4
				275	// CHECK-NEXT: [[V:%.*]] = alloca double, align 8
				276	// CHECK-NEXT: store i8* [[FMT:%.]], i8* [[FMT_ADDR]], align 4
				277	// CHECK-NEXT: [[VA1:%.]] = bitcast i8* [[VA]] to i8*
				278	// CHECK-NEXT: call void @llvm.va_start(i8* [[VA1]])
				279	// CHECK-NEXT: [[ARGP_CUR:%.]] = load i8, i8** [[VA]], align 4
				280	// CHECK-NEXT: [[TMP0:%.]] = ptrtoint i8 [[ARGP_CUR]] to i32
				281	// CHECK-NEXT: [[TMP1:%.*]] = add i32 [[TMP0]], 7
				282	// CHECK-NEXT: [[TMP2:%.*]] = and i32 [[TMP1]], -8
				283	// CHECK-NEXT: [[ARGP_CUR_ALIGNED:%.]] = inttoptr i32 [[TMP2]] to i8
				284	// CHECK-NEXT: [[ARGP_NEXT:%.]] = getelementptr inbounds i8, i8 [[ARGP_CUR_ALIGNED]], i32 8
				285	// CHECK-NEXT: store i8* [[ARGP_NEXT]], i8** [[VA]], align 4
				286	// CHECK-NEXT: [[TMP3:%.]] = bitcast i8 [[ARGP_CUR_ALIGNED]] to double*
				287	// CHECK-NEXT: [[TMP4:%.]] = load double, double [[TMP3]], align 8
				288	// CHECK-NEXT: store double [[TMP4]], double* [[V]], align 8
				289	// CHECK-NEXT: [[VA2:%.]] = bitcast i8* [[VA]] to i8*
				290	// CHECK-NEXT: call void @llvm.va_end(i8* [[VA2]])
				291	// CHECK-NEXT: [[TMP5:%.]] = load double, double [[V]], align 8
				292	// CHECK-NEXT: ret double [[TMP5]]
				293	double f_va_2(char *fmt, ...) {
				294	__builtin_va_list va;
				295
				296	__builtin_va_start(va, fmt);
				297	double v = __builtin_va_arg(va, double);
				298	__builtin_va_end(va);
				299
				300	return v;
				301	}
				302
				303	// Two "aligned" register pairs.
				304
				305	// CHECK-LABEL: @f_va_3(
				306	// CHECK-NEXT: entry:
				307	// CHECK-NEXT: [[FMT_ADDR:%.]] = alloca i8, align 4
				308	// CHECK-NEXT: [[VA:%.]] = alloca i8, align 4
				309	// CHECK-NEXT: [[V:%.*]] = alloca double, align 8
				310	// CHECK-NEXT: [[W:%.*]] = alloca i32, align 4
				311	// CHECK-NEXT: [[X:%.*]] = alloca double, align 8
				312	// CHECK-NEXT: store i8* [[FMT:%.]], i8* [[FMT_ADDR]], align 4
				313	// CHECK-NEXT: [[VA1:%.]] = bitcast i8* [[VA]] to i8*
				314	// CHECK-NEXT: call void @llvm.va_start(i8* [[VA1]])
				315	// CHECK-NEXT: [[ARGP_CUR:%.]] = load i8, i8** [[VA]], align 4
				316	// CHECK-NEXT: [[TMP0:%.]] = ptrtoint i8 [[ARGP_CUR]] to i32
				317	// CHECK-NEXT: [[TMP1:%.*]] = add i32 [[TMP0]], 7
				318	// CHECK-NEXT: [[TMP2:%.*]] = and i32 [[TMP1]], -8
				319	// CHECK-NEXT: [[ARGP_CUR_ALIGNED:%.]] = inttoptr i32 [[TMP2]] to i8
				320	// CHECK-NEXT: [[ARGP_NEXT:%.]] = getelementptr inbounds i8, i8 [[ARGP_CUR_ALIGNED]], i32 8
				321	// CHECK-NEXT: store i8* [[ARGP_NEXT]], i8** [[VA]], align 4
				322	// CHECK-NEXT: [[TMP3:%.]] = bitcast i8 [[ARGP_CUR_ALIGNED]] to double*
				323	// CHECK-NEXT: [[TMP4:%.]] = load double, double [[TMP3]], align 8
				324	// CHECK-NEXT: store double [[TMP4]], double* [[V]], align 8
				325	// CHECK-NEXT: [[ARGP_CUR2:%.]] = load i8, i8** [[VA]], align 4
				326	// CHECK-NEXT: [[ARGP_NEXT3:%.]] = getelementptr inbounds i8, i8 [[ARGP_CUR2]], i32 4
				327	// CHECK-NEXT: store i8* [[ARGP_NEXT3]], i8** [[VA]], align 4
				328	// CHECK-NEXT: [[TMP5:%.]] = bitcast i8 [[ARGP_CUR2]] to i32*
				329	// CHECK-NEXT: [[TMP6:%.]] = load i32, i32 [[TMP5]], align 4
				330	// CHECK-NEXT: store i32 [[TMP6]], i32* [[W]], align 4
				331	// CHECK-NEXT: [[ARGP_CUR4:%.]] = load i8, i8** [[VA]], align 4
				332	// CHECK-NEXT: [[TMP7:%.]] = ptrtoint i8 [[ARGP_CUR4]] to i32
				333	// CHECK-NEXT: [[TMP8:%.*]] = add i32 [[TMP7]], 7
				334	// CHECK-NEXT: [[TMP9:%.*]] = and i32 [[TMP8]], -8
				335	// CHECK-NEXT: [[ARGP_CUR4_ALIGNED:%.]] = inttoptr i32 [[TMP9]] to i8
				336	// CHECK-NEXT: [[ARGP_NEXT5:%.]] = getelementptr inbounds i8, i8 [[ARGP_CUR4_ALIGNED]], i32 8
				337	// CHECK-NEXT: store i8* [[ARGP_NEXT5]], i8** [[VA]], align 4
				338	// CHECK-NEXT: [[TMP10:%.]] = bitcast i8 [[ARGP_CUR4_ALIGNED]] to double*
				339	// CHECK-NEXT: [[TMP11:%.]] = load double, double [[TMP10]], align 8
				340	// CHECK-NEXT: store double [[TMP11]], double* [[X]], align 8
				341	// CHECK-NEXT: [[VA6:%.]] = bitcast i8* [[VA]] to i8*
				342	// CHECK-NEXT: call void @llvm.va_end(i8* [[VA6]])
				343	// CHECK-NEXT: [[TMP12:%.]] = load double, double [[V]], align 8
				344	// CHECK-NEXT: [[TMP13:%.]] = load double, double [[X]], align 8
				345	// CHECK-NEXT: [[ADD:%.*]] = fadd double [[TMP12]], [[TMP13]]
				346	// CHECK-NEXT: ret double [[ADD]]
				347	double f_va_3(char *fmt, ...) {
				348	__builtin_va_list va;
				349
				350	__builtin_va_start(va, fmt);
				351	double v = __builtin_va_arg(va, double);
				352	int w = __builtin_va_arg(va, int);
				353	double x = __builtin_va_arg(va, double);
				354	__builtin_va_end(va);
				355
				356	return v + x;
				357	}
				358
				359	// CHECK-LABEL: define i32 @f_va_4(i8* %fmt, ...) {{.*}} {
				360	// CHECK-NEXT: entry:
				361	// CHECK-NEXT: [[FMT_ADDR:%.]] = alloca i8, align 4
				362	// CHECK-NEXT: [[VA:%.]] = alloca i8, align 4
				363	// CHECK-NEXT: [[V:%.*]] = alloca i32, align 4
				364	// CHECK-NEXT: [[LD:%.*]] = alloca fp128, align 16
				365	// CHECK-NEXT: [[TS:%.]] = alloca [[STRUCT_TINY:%.]], align 1
				366	// CHECK-NEXT: [[SS:%.]] = alloca [[STRUCT_SMALL:%.]], align 4
				367	// CHECK-NEXT: [[LS:%.]] = alloca [[STRUCT_LARGE:%.]], align 4
				368	// CHECK-NEXT: [[RET:%.*]] = alloca i32, align 4
				369	// CHECK-NEXT: store i8* [[FMT:%.]], i8* [[FMT_ADDR]], align 4
				370	// CHECK-NEXT: [[VA1:%.]] = bitcast i8* [[VA]] to i8*
				371	// CHECK-NEXT: call void @llvm.va_start(i8* [[VA1]])
				372	// CHECK-NEXT: [[ARGP_CUR:%.]] = load i8, i8** [[VA]], align 4
				373	// CHECK-NEXT: [[ARGP_NEXT:%.]] = getelementptr inbounds i8, i8 [[ARGP_CUR]], i32 4
				374	// CHECK-NEXT: store i8* [[ARGP_NEXT]], i8** [[VA]], align 4
				375	// CHECK-NEXT: [[TMP0:%.]] = bitcast i8 [[ARGP_CUR]] to i32*
				376	// CHECK-NEXT: [[TMP1:%.]] = load i32, i32 [[TMP0]], align 4
				377	// CHECK-NEXT: store i32 [[TMP1]], i32* [[V]], align 4
				378	// CHECK-NEXT: [[ARGP_CUR2:%.]] = load i8, i8** [[VA]], align 4
				379	// CHECK-NEXT: [[ARGP_NEXT3:%.]] = getelementptr inbounds i8, i8 [[ARGP_CUR2]], i32 4
				380	// CHECK-NEXT: store i8* [[ARGP_NEXT3]], i8** [[VA]], align 4
				381	// CHECK-NEXT: [[TMP2:%.]] = bitcast i8 [[ARGP_CUR2]] to fp128**
				382	// CHECK-NEXT: [[TMP3:%.]] = load fp128, fp128** [[TMP2]], align 4
				383	// CHECK-NEXT: [[TMP4:%.]] = load fp128, fp128 [[TMP3]], align 16
				384	// CHECK-NEXT: store fp128 [[TMP4]], fp128* [[LD]], align 16
				385	// CHECK-NEXT: [[ARGP_CUR4:%.]] = load i8, i8** [[VA]], align 4
				386	// CHECK-NEXT: [[ARGP_NEXT5:%.]] = getelementptr inbounds i8, i8 [[ARGP_CUR4]], i32 4
				387	// CHECK-NEXT: store i8* [[ARGP_NEXT5]], i8** [[VA]], align 4
				388	// CHECK-NEXT: [[TMP5:%.]] = bitcast i8 [[ARGP_CUR4]] to %struct.tiny*
				389	// CHECK-NEXT: [[TMP6:%.]] = bitcast %struct.tiny [[TS]] to i8*
				390	// CHECK-NEXT: [[TMP7:%.]] = bitcast %struct.tiny [[TMP5]] to i8*
				391	// CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP6]], i8* [[TMP7]], i32 4, i32 1, i1 false)
				392	// CHECK-NEXT: [[ARGP_CUR6:%.]] = load i8, i8** [[VA]], align 4
				393	// CHECK-NEXT: [[ARGP_NEXT7:%.]] = getelementptr inbounds i8, i8 [[ARGP_CUR6]], i32 8
				394	// CHECK-NEXT: store i8* [[ARGP_NEXT7]], i8** [[VA]], align 4
				395	// CHECK-NEXT: [[TMP8:%.]] = bitcast i8 [[ARGP_CUR6]] to %struct.small*
				396	// CHECK-NEXT: [[TMP9:%.]] = bitcast %struct.small [[SS]] to i8*
				397	// CHECK-NEXT: [[TMP10:%.]] = bitcast %struct.small [[TMP8]] to i8*
				398	// CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP9]], i8* [[TMP10]], i32 8, i32 4, i1 false)
				399	// CHECK-NEXT: [[ARGP_CUR8:%.]] = load i8, i8** [[VA]], align 4
				400	// CHECK-NEXT: [[ARGP_NEXT9:%.]] = getelementptr inbounds i8, i8 [[ARGP_CUR8]], i32 4
				401	// CHECK-NEXT: store i8* [[ARGP_NEXT9]], i8** [[VA]], align 4
				402	// CHECK-NEXT: [[TMP11:%.]] = bitcast i8 [[ARGP_CUR8]] to %struct.large**
				403	// CHECK-NEXT: [[TMP12:%.]] = load %struct.large, %struct.large** [[TMP11]], align 4
				404	// CHECK-NEXT: [[TMP13:%.]] = bitcast %struct.large [[LS]] to i8*
				405	// CHECK-NEXT: [[TMP14:%.]] = bitcast %struct.large [[TMP12]] to i8*
				406	// CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP13]], i8* [[TMP14]], i32 16, i32 4, i1 false)
				407	// CHECK-NEXT: [[VA10:%.]] = bitcast i8* [[VA]] to i8*
				408	// CHECK-NEXT: call void @llvm.va_end(i8* [[VA10]])
				409	int f_va_4(char *fmt, ...) {
				410	__builtin_va_list va;
				411
				412	__builtin_va_start(va, fmt);
				413	int v = __builtin_va_arg(va, int);
				414	long double ld = __builtin_va_arg(va, long double);
				415	struct tiny ts = __builtin_va_arg(va, struct tiny);
				416	struct small ss = __builtin_va_arg(va, struct small);
				417	struct large ls = __builtin_va_arg(va, struct large);
				418	__builtin_va_end(va);
				419
				420	int ret = (int)((long double)v + ld);
				421	ret = ret + ts.a + ts.b + ts.c + ts.d;
				422	ret = ret + ss.a + (int)ss.b;
				423	ret = ret + ls.a + ls.b + ls.c + ls.d;
				424
				425	return ret;
				426	}