|  | ; RUN: llc -stack-symbol-ordering=0 -mcpu=core-avx -debug-only=stackmaps < %s | FileCheck %s | 
|  | ; REQUIRES: asserts | 
|  |  | 
|  | target triple = "x86_64-pc-linux-gnu" | 
|  |  | 
|  | ; Can we lower a single vector? | 
|  | define <2 x i8 addrspace(1)*> @test(<2 x i8 addrspace(1)*> %obj) gc "statepoint-example" { | 
|  | entry: | 
|  | ; CHECK-LABEL: @test | 
|  | ; CHECK: subq	$24, %rsp | 
|  | ; CHECK: movaps	%xmm0, (%rsp) | 
|  | ; CHECK: callq	do_safepoint | 
|  | ; CHECK: movaps	(%rsp), %xmm0 | 
|  | ; CHECK: addq	$24, %rsp | 
|  | %safepoint_token = call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @do_safepoint, i32 0, i32 0, i32 0, i32 0, <2 x i8 addrspace(1)*> %obj) | 
|  | %obj.relocated = call coldcc <2 x i8 addrspace(1)*> @llvm.experimental.gc.relocate.v2p1i8(token %safepoint_token, i32 7, i32 7) ; (%obj, %obj) | 
|  | ret <2 x i8 addrspace(1)*> %obj.relocated | 
|  | } | 
|  |  | 
|  | ; Can we lower the base, derived pairs if both are vectors? | 
|  | define <2 x i8 addrspace(1)*> @test2(<2 x i8 addrspace(1)*> %obj, i64 %offset) gc "statepoint-example" { | 
|  | entry: | 
|  | ; CHECK-LABEL: @test2 | 
|  | ; CHECK: subq	$40, %rsp | 
|  | ; CHECK: movd	%rdi, %xmm1 | 
|  | ; CHECK: pshufd	$68, %xmm1, %xmm1       # xmm1 = xmm1[0,1,0,1] | 
|  | ; CHECK: paddq	%xmm0, %xmm1 | 
|  | ; CHECK: movdqa	%xmm0, 16(%rsp) | 
|  | ; CHECK: movdqa	%xmm1, (%rsp) | 
|  | ; CHECK: callq	do_safepoint | 
|  | ; CHECK: movaps	(%rsp), %xmm0 | 
|  | ; CHECK: addq	$40, %rsp | 
|  | %derived = getelementptr i8, <2 x i8 addrspace(1)*> %obj, i64 %offset | 
|  | %safepoint_token = call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @do_safepoint, i32 0, i32 0, i32 0, i32 0, <2 x i8 addrspace(1)*> %obj, <2 x i8 addrspace(1)*> %derived) | 
|  | %derived.relocated = call coldcc <2 x i8 addrspace(1)*> @llvm.experimental.gc.relocate.v2p1i8(token %safepoint_token, i32 7, i32 8) ; (%obj, %derived) | 
|  | ret <2 x i8 addrspace(1)*> %derived.relocated | 
|  | } | 
|  |  | 
|  | ; Originally, this was just a variant of @test2 above, but it ends up | 
|  | ; covering a bunch of interesting missed optimizations.  Specifically: | 
|  | ; - We waste a stack slot for a value that a backend transform pass | 
|  | ;   CSEd to another spilled one. | 
|  | ; - We don't remove the testb even though it serves no purpose | 
|  | ; - We could in principal reuse the argument memory (%rsi) and do away | 
|  | ;   with stack slots entirely. | 
|  | define <2 x i64 addrspace(1)*> @test3(i1 %cnd, <2 x i64 addrspace(1)*>* %ptr) gc "statepoint-example" { | 
|  | entry: | 
|  | ; CHECK-LABEL: @test3 | 
|  | ; CHECK: subq	$40, %rsp | 
|  | ; CHECK: testb	$1, %dil | 
|  | ; CHECK: movaps	(%rsi), %xmm0 | 
|  | ; CHECK: movaps	%xmm0, 16(%rsp) | 
|  | ; CHECK: movaps	%xmm0, (%rsp) | 
|  | ; CHECK: callq	do_safepoint | 
|  | ; CHECK: movaps	(%rsp), %xmm0 | 
|  | ; CHECK: addq	$40, %rsp | 
|  | br i1 %cnd, label %taken, label %untaken | 
|  |  | 
|  | taken:                                            ; preds = %entry | 
|  | %obja = load <2 x i64 addrspace(1)*>, <2 x i64 addrspace(1)*>* %ptr | 
|  | br label %merge | 
|  |  | 
|  | untaken:                                          ; preds = %entry | 
|  | %objb = load <2 x i64 addrspace(1)*>, <2 x i64 addrspace(1)*>* %ptr | 
|  | br label %merge | 
|  |  | 
|  | merge:                                            ; preds = %untaken, %taken | 
|  | %obj.base = phi <2 x i64 addrspace(1)*> [ %obja, %taken ], [ %objb, %untaken ] | 
|  | %obj = phi <2 x i64 addrspace(1)*> [ %obja, %taken ], [ %objb, %untaken ] | 
|  | %safepoint_token = call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @do_safepoint, i32 0, i32 0, i32 0, i32 0, <2 x i64 addrspace(1)*> %obj, <2 x i64 addrspace(1)*> %obj.base) | 
|  | %obj.relocated = call coldcc <2 x i8 addrspace(1)*> @llvm.experimental.gc.relocate.v2p1i8(token %safepoint_token, i32 8, i32 7) ; (%obj.base, %obj) | 
|  | %obj.relocated.casted = bitcast <2 x i8 addrspace(1)*> %obj.relocated to <2 x i64 addrspace(1)*> | 
|  | %obj.base.relocated = call coldcc <2 x i8 addrspace(1)*> @llvm.experimental.gc.relocate.v2p1i8(token %safepoint_token, i32 8, i32 8) ; (%obj.base, %obj.base) | 
|  | %obj.base.relocated.casted = bitcast <2 x i8 addrspace(1)*> %obj.base.relocated to <2 x i64 addrspace(1)*> | 
|  | ret <2 x i64 addrspace(1)*> %obj.relocated.casted | 
|  | } | 
|  |  | 
|  | ; Can we handle vector constants?  At the moment, we don't appear to actually | 
|  | ; get selection dag nodes for these. | 
|  | define <2 x i8 addrspace(1)*> @test4() gc "statepoint-example" { | 
|  | entry: | 
|  | ; CHECK-LABEL: @test4 | 
|  | ; CHECK: subq	$24, %rsp | 
|  | ; CHECK: xorps %xmm0, %xmm0 | 
|  | ; CHECK: movaps	%xmm0, (%rsp) | 
|  | ; CHECK: callq	do_safepoint | 
|  | ; CHECK: movaps	(%rsp), %xmm0 | 
|  | ; CHECK: addq	$24, %rsp | 
|  | %safepoint_token = call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @do_safepoint, i32 0, i32 0, i32 0, i32 0, <2 x i8 addrspace(1)*> zeroinitializer) | 
|  | %obj.relocated = call coldcc <2 x i8 addrspace(1)*> @llvm.experimental.gc.relocate.v2p1i8(token %safepoint_token, i32 7, i32 7) ; (%obj, %obj) | 
|  | ret <2 x i8 addrspace(1)*> %obj.relocated | 
|  | } | 
|  |  | 
|  | ; Check that we can lower a constant typed as i128 correctly.  Note that the | 
|  | ; actual value is representable in 64 bits.  We don't have a representation | 
|  | ; of larger than 64 bit constant in the StackMap format. | 
|  | define void @test5() gc "statepoint-example" { | 
|  | entry: | 
|  | ; CHECK-LABEL: @test5 | 
|  | ; CHECK: push | 
|  | ; CHECK: callq	do_safepoint | 
|  | ; CHECK: pop | 
|  | %safepoint_token = call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @do_safepoint, i32 0, i32 0, i32 0, i32 1, i128 0) | 
|  | ret void | 
|  | } | 
|  |  | 
|  | ; CHECK: __LLVM_StackMaps: | 
|  |  | 
|  | ; CHECK: .Ltmp1-test | 
|  | ; Check for the two spill slots | 
|  | ; Stack Maps: 		Loc 3: Indirect 7+0	[encoding: .byte 3, .byte 16, .short 7, .int 0] | 
|  | ; Stack Maps: 		Loc 4: Indirect 7+0	[encoding: .byte 3, .byte 16, .short 7, .int 0] | 
|  | ; CHECK: .byte	3 | 
|  | ; CHECK: .byte	16 | 
|  | ; CHECK: .short	7 | 
|  | ; CHECK: .long	0 | 
|  | ; CHECK: .byte	3 | 
|  | ; CHECK: .byte	16 | 
|  | ; CHECK: .short	7 | 
|  | ; CHECK: .long	0 | 
|  |  | 
|  | ; CHECK: .Ltmp3-test2 | 
|  | ; Check for the two spill slots | 
|  | ; Stack Maps: 		Loc 3: Indirect 7+16	[encoding: .byte 3, .byte 16, .short 7, .int 16] | 
|  | ; Stack Maps: 		Loc 4: Indirect 7+0	[encoding: .byte 3, .byte 16, .short 7, .int 0] | 
|  | ; CHECK: .byte	3 | 
|  | ; CHECK: .byte	16 | 
|  | ; CHECK: .short	7 | 
|  | ; CHECK: .long	16 | 
|  | ; CHECK: .byte	3 | 
|  | ; CHECK: .byte	16 | 
|  | ; CHECK: .short	7 | 
|  | ; CHECK: .long	0 | 
|  |  | 
|  | ; CHECK: .Ltmp5-test3 | 
|  | ; Check for the four spill slots | 
|  | ; Stack Maps: 		Loc 3: Indirect 7+16	[encoding: .byte 3, .byte 16, .short 7, .int 16] | 
|  | ; Stack Maps: 		Loc 4: Indirect 7+16	[encoding: .byte 3, .byte 16, .short 7, .int 16] | 
|  | ; Stack Maps: 		Loc 5: Indirect 7+16	[encoding: .byte 3, .byte 16, .short 7, .int 16] | 
|  | ; Stack Maps: 		Loc 6: Indirect 7+0		[encoding: .byte 3, .byte 16, .short 7, .int 0] | 
|  | ; CHECK: .byte	3 | 
|  | ; CHECK: .byte	16 | 
|  | ; CHECK: .short	7 | 
|  | ; CHECK: .long	16 | 
|  | ; CHECK: .byte	3 | 
|  | ; CHECK: .byte	16 | 
|  | ; CHECK: .short	7 | 
|  | ; CHECK: .long	16 | 
|  | ; CHECK: .byte	3 | 
|  | ; CHECK: .byte	16 | 
|  | ; CHECK: .short	7 | 
|  | ; CHECK: .long	16 | 
|  | ; CHECK: .byte	3 | 
|  | ; CHECK: .byte	16 | 
|  | ; CHECK: .short	7 | 
|  | ; CHECK: .long	0 | 
|  |  | 
|  | declare void @do_safepoint() | 
|  |  | 
|  | declare token @llvm.experimental.gc.statepoint.p0f_isVoidf(i64, i32, void ()*, i32, i32, ...) | 
|  | declare i8 addrspace(1)* @llvm.experimental.gc.relocate.p1i8(token, i32, i32) | 
|  | declare <2 x i8 addrspace(1)*> @llvm.experimental.gc.relocate.v2p1i8(token, i32, i32) |