[SystemZ] Support stackmaps and patchpoints

This adds back-end support for the @llvm.experimental.stackmap and
@llvm.experimental.patchpoint intrinsics.

llvm-svn: 326611
diff --git a/llvm/test/CodeGen/SystemZ/patchpoint-invoke.ll b/llvm/test/CodeGen/SystemZ/patchpoint-invoke.ll
new file mode 100644
index 0000000..a9bb7ac
--- /dev/null
+++ b/llvm/test/CodeGen/SystemZ/patchpoint-invoke.ll
@@ -0,0 +1,65 @@
+; RUN: llc -mtriple=s390x-linux-gnu < %s | FileCheck %s
+
+; Test invoking of patchpoints
+;
+define i64 @patchpoint_invoke(i64 %p1, i64 %p2) personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) {
+entry:
+; CHECK-LABEL: patchpoint_invoke:
+; CHECK-NEXT:  [[FUNC_BEGIN:.L.*]]:
+; CHECK-NEXT: .cfi_startproc
+; CHECK:      .cfi_lsda 0, [[EXCEPTION_LABEL:.L[^ ]*]]
+; CHECK:      aghi %r15, -160
+
+; Unfortunately, hardcode the name of the label that begins the patchpoint:
+; CHECK:      .Ltmp0:
+; CHECK:      llilf   %r1, 559038736
+; CHECK-NEXT: basr    %r14, %r1
+; CHECK-NEXT: bcr     0, %r0
+; CHECK-NEXT: [[PP_END:.L.*]]:
+; CHECK:      br %r14
+  %resolveCall = inttoptr i64 559038736 to i8*
+  %result = invoke i64 (i64, i32, i8*, i32, ...) @llvm.experimental.patchpoint.i64(i64 2, i32 10, i8* %resolveCall, i32 1, i64 %p1, i64 %p2)
+            to label %success unwind label %threw
+
+success:
+  ret i64 %result
+
+threw:
+  %0 = landingpad { i8*, i32 }
+          catch i8* null
+  ret i64 0
+}
+
+; Verify that the exception table was emitted:
+; CHECK:      [[EXCEPTION_LABEL]]:
+; CHECK-NEXT: .byte 255
+; CHECK-NEXT: .byte 0
+; CHECK-NEXT: .uleb128 .Lttbase{{[0-9]+}}-[[TTBASEREF:.Lttbaseref[0-9]+]]
+; CHECK-NEXT: [[TTBASEREF]]:
+; CHECK-NEXT: .byte 1
+; CHECK-NEXT: .uleb128 .Lcst_end{{[0-9]+}}-[[CST_BEGIN:.Lcst_begin[0-9]+]]
+; CHECK-NEXT: [[CST_BEGIN]]:
+; Verify that the unwind data covers the entire patchpoint region:
+; CHECK-NEXT: .uleb128 .Ltmp0-[[FUNC_BEGIN]]
+; CHECK-NEXT: .uleb128 [[PP_END]]-.Ltmp0
+
+
+; Verify that the stackmap section got emitted:
+; CHECK-LABEL: __LLVM_StackMaps:
+; Header
+; CHECK-NEXT:   .byte 3
+; CHECK-NEXT:   .byte 0
+; CHECK-NEXT:   .short 0
+; Num Functions
+; CHECK-NEXT:   .long 1
+; Num LargeConstants
+; CHECK-NEXT:   .long 0
+; Num Callsites
+; CHECK-NEXT:   .long 1
+; CHECK-NEXT:   .quad patchpoint_invoke
+
+
+declare void @llvm.experimental.stackmap(i64, i32, ...)
+declare void @llvm.experimental.patchpoint.void(i64, i32, i8*, i32, ...)
+declare i64 @llvm.experimental.patchpoint.i64(i64, i32, i8*, i32, ...)
+declare i32 @__gxx_personality_v0(...)
diff --git a/llvm/test/CodeGen/SystemZ/patchpoint.ll b/llvm/test/CodeGen/SystemZ/patchpoint.ll
new file mode 100644
index 0000000..bd67bdf
--- /dev/null
+++ b/llvm/test/CodeGen/SystemZ/patchpoint.ll
@@ -0,0 +1,102 @@
+; RUN: llc -mtriple=s390x-linux-gnu < %s | FileCheck %s
+
+; Trivial patchpoint codegen
+;
+define i64 @trivial_patchpoint_codegen(i64 %p1, i64 %p2, i64 %p3, i64 %p4) {
+entry:
+; CHECK-LABEL: trivial_patchpoint_codegen:
+; CHECK:       llilf   %r1, 559038736
+; CHECK-NEXT:  basr    %r14, %r1
+; CHECK-NEXT:  bcr     0, %r0
+; CHECK:       lgr     [[REG0:%r[0-9]+]], %r2
+; CHECK:       llilf   %r1, 559038737
+; CHECK-NEXT:  basr    %r14, %r1
+; CHECK-NEXT:  bcr     0, %r0
+; CHECK:       lgr     %r2, [[REG0:%r[0-9]+]]
+; CHECK:       br      %r14
+  %resolveCall2 = inttoptr i64 559038736 to i8*
+  %result = tail call i64 (i64, i32, i8*, i32, ...) @llvm.experimental.patchpoint.i64(i64 2, i32 10, i8* %resolveCall2, i32 4, i64 %p1, i64 %p2, i64 %p3, i64 %p4)
+  %resolveCall3 = inttoptr i64 559038737 to i8*
+  tail call void (i64, i32, i8*, i32, ...) @llvm.experimental.patchpoint.void(i64 3, i32 10, i8* %resolveCall3, i32 2, i64 %p1, i64 %result)
+  ret i64 %result
+}
+
+; Trivial symbolic patchpoint codegen.
+;
+
+declare i64 @foo(i64 %p1, i64 %p2)
+define i64 @trivial_symbolic_patchpoint_codegen(i64 %p1, i64 %p2) {
+entry:
+; CHECK-LABEL: trivial_symbolic_patchpoint_codegen:
+; CHECK:       brasl   %r14, foo@PLT
+; CHECK-NEXT:  bcr     0, %r0
+; CHECK:       br      %r14
+  %result = tail call i64 (i64, i32, i8*, i32, ...) @llvm.experimental.patchpoint.i64(i64 9, i32 8, i8* bitcast (i64 (i64, i64)* @foo to i8*), i32 2, i64 %p1, i64 %p2)
+  ret i64 %result
+}
+
+
+; Caller frame metadata with stackmaps. This should not be optimized
+; as a leaf function.
+;
+; CHECK-LABEL: caller_meta_leaf
+; CHECK: aghi  %r15, -184
+; CHECK: .Ltmp
+; CHECK: lmg   %r14, %r15, 296(%r15)
+; CHECK: br    %r14
+define void @caller_meta_leaf() {
+entry:
+  %metadata = alloca i64, i32 3, align 8
+  store i64 11, i64* %metadata
+  store i64 12, i64* %metadata
+  store i64 13, i64* %metadata
+  call void (i64, i32, ...) @llvm.experimental.stackmap(i64 4, i32 0, i64* %metadata)
+  ret void
+}
+
+; Test patchpoints reusing the same TargetConstant.
+; <rdar:15390785> Assertion failed: (CI.getNumArgOperands() >= NumArgs + 4)
+; There is no way to verify this, since it depends on memory allocation.
+; But I think it's useful to include as a working example.
+define i64 @testLowerConstant(i64 %arg, i64 %tmp2, i64 %tmp10, i64* %tmp33, i64 %tmp79) {
+entry:
+  %tmp80 = add i64 %tmp79, -16
+  %tmp81 = inttoptr i64 %tmp80 to i64*
+  %tmp82 = load i64, i64* %tmp81, align 8
+  tail call void (i64, i32, ...) @llvm.experimental.stackmap(i64 14, i32 6, i64 %arg, i64 %tmp2, i64 %tmp10, i64 %tmp82)
+  tail call void (i64, i32, i8*, i32, ...) @llvm.experimental.patchpoint.void(i64 15, i32 30, i8* null, i32 3, i64 %arg, i64 %tmp10, i64 %tmp82)
+  %tmp83 = load i64, i64* %tmp33, align 8
+  %tmp84 = add i64 %tmp83, -24
+  %tmp85 = inttoptr i64 %tmp84 to i64*
+  %tmp86 = load i64, i64* %tmp85, align 8
+  tail call void (i64, i32, ...) @llvm.experimental.stackmap(i64 17, i32 6, i64 %arg, i64 %tmp10, i64 %tmp86)
+  tail call void (i64, i32, i8*, i32, ...) @llvm.experimental.patchpoint.void(i64 18, i32 30, i8* null, i32 3, i64 %arg, i64 %tmp10, i64 %tmp86)
+  ret i64 10
+}
+
+; Test small patchpoints that don't emit calls.
+define void @small_patchpoint_codegen(i64 %p1, i64 %p2, i64 %p3, i64 %p4) {
+entry:
+; CHECK-LABEL: small_patchpoint_codegen:
+; CHECK:      .Ltmp
+; CHECK:      bcr 0, %r0
+; CHECK:      br %r14
+  %result = tail call i64 (i64, i32, i8*, i32, ...) @llvm.experimental.patchpoint.i64(i64 5, i32 2, i8* null, i32 2, i64 %p1, i64 %p2)
+  ret void
+}
+
+; Test large target address.
+define i64 @large_target_address_patchpoint_codegen() {
+entry:
+; CHECK-LABEL: large_target_address_patchpoint_codegen:
+; CHECK:        llilf   %r1, 2566957755
+; CHECK-NEXT:   iihf    %r1, 1432778632
+; CHECK-NEXT:   basr    %r14, %r1
+  %resolveCall2 = inttoptr i64 6153737369414576827 to i8*
+  %result = tail call i64 (i64, i32, i8*, i32, ...) @llvm.experimental.patchpoint.i64(i64 2, i32 14, i8* %resolveCall2, i32 0)
+  ret i64 %result
+}
+
+declare void @llvm.experimental.stackmap(i64, i32, ...)
+declare void @llvm.experimental.patchpoint.void(i64, i32, i8*, i32, ...)
+declare i64 @llvm.experimental.patchpoint.i64(i64, i32, i8*, i32, ...)
diff --git a/llvm/test/CodeGen/SystemZ/stackmap-nops.ll b/llvm/test/CodeGen/SystemZ/stackmap-nops.ll
new file mode 100644
index 0000000..066d7f6
--- /dev/null
+++ b/llvm/test/CodeGen/SystemZ/stackmap-nops.ll
@@ -0,0 +1,140 @@
+; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
+
+define void @nop_test() {
+entry:
+; CHECK-LABEL: nop_test:
+
+; 2
+; CHECK:      bcr 0, %r0
+
+; 4
+; CHECK:      bc 0, 0
+
+; 6
+; CHECK:      brcl 0, [[LAB:.Ltmp[0-9]+]]
+; CHECK-NEXT: [[LAB]]:
+
+; 8
+; CHECK:      brcl 0, [[LAB:.Ltmp[0-9]+]]
+; CHECK-NEXT: [[LAB]]:
+; CHECK:      bcr 0, %r0
+
+; 10
+; CHECK:      brcl 0, [[LAB:.Ltmp[0-9]+]]
+; CHECK-NEXT: [[LAB]]:
+; CHECK:      bc 0, 0
+
+; 12
+; CHECK:      brcl 0, [[LAB:.Ltmp[0-9]+]]
+; CHECK-NEXT: [[LAB]]:
+; CHECK:      brcl 0, [[LAB:.Ltmp[0-9]+]]
+; CHECK-NEXT: [[LAB]]:
+
+; 14
+; CHECK:      brcl 0, [[LAB:.Ltmp[0-9]+]]
+; CHECK-NEXT: [[LAB]]:
+; CHECK:      brcl 0, [[LAB:.Ltmp[0-9]+]]
+; CHECK-NEXT: [[LAB]]:
+; CHECK:      bcr 0, %r0
+
+; 16
+; CHECK:      brcl 0, [[LAB:.Ltmp[0-9]+]]
+; CHECK-NEXT: [[LAB]]:
+; CHECK:      brcl 0, [[LAB:.Ltmp[0-9]+]]
+; CHECK-NEXT: [[LAB]]:
+; CHECK:      bc 0, 0
+
+; 18
+; CHECK:      brcl 0, [[LAB:.Ltmp[0-9]+]]
+; CHECK-NEXT: [[LAB]]:
+; CHECK:      brcl 0, [[LAB:.Ltmp[0-9]+]]
+; CHECK-NEXT: [[LAB]]:
+; CHECK:      brcl 0, [[LAB:.Ltmp[0-9]+]]
+; CHECK-NEXT: [[LAB]]:
+
+; 20
+; CHECK:      brcl 0, [[LAB:.Ltmp[0-9]+]]
+; CHECK-NEXT: [[LAB]]:
+; CHECK:      brcl 0, [[LAB:.Ltmp[0-9]+]]
+; CHECK-NEXT: [[LAB]]:
+; CHECK:      brcl 0, [[LAB:.Ltmp[0-9]+]]
+; CHECK-NEXT: [[LAB]]:
+; CHECK:      bcr 0, %r0
+
+; 22
+; CHECK:      brcl 0, [[LAB:.Ltmp[0-9]+]]
+; CHECK-NEXT: [[LAB]]:
+; CHECK:      brcl 0, [[LAB:.Ltmp[0-9]+]]
+; CHECK-NEXT: [[LAB]]:
+; CHECK:      brcl 0, [[LAB:.Ltmp[0-9]+]]
+; CHECK-NEXT: [[LAB]]:
+; CHECK:      bc 0, 0
+
+; 24
+; CHECK:      brcl 0, [[LAB:.Ltmp[0-9]+]]
+; CHECK-NEXT: [[LAB]]:
+; CHECK:      brcl 0, [[LAB:.Ltmp[0-9]+]]
+; CHECK-NEXT: [[LAB]]:
+; CHECK:      brcl 0, [[LAB:.Ltmp[0-9]+]]
+; CHECK-NEXT: [[LAB]]:
+; CHECK:      brcl 0, [[LAB:.Ltmp[0-9]+]]
+; CHECK-NEXT: [[LAB]]:
+
+; 26
+; CHECK:      brcl 0, [[LAB:.Ltmp[0-9]+]]
+; CHECK-NEXT: [[LAB]]:
+; CHECK:      brcl 0, [[LAB:.Ltmp[0-9]+]]
+; CHECK-NEXT: [[LAB]]:
+; CHECK:      brcl 0, [[LAB:.Ltmp[0-9]+]]
+; CHECK-NEXT: [[LAB]]:
+; CHECK:      brcl 0, [[LAB:.Ltmp[0-9]+]]
+; CHECK-NEXT: [[LAB]]:
+; CHECK:      bcr 0, %r0
+
+; 28
+; CHECK:      brcl 0, [[LAB:.Ltmp[0-9]+]]
+; CHECK-NEXT: [[LAB]]:
+; CHECK:      brcl 0, [[LAB:.Ltmp[0-9]+]]
+; CHECK-NEXT: [[LAB]]:
+; CHECK:      brcl 0, [[LAB:.Ltmp[0-9]+]]
+; CHECK-NEXT: [[LAB]]:
+; CHECK:      brcl 0, [[LAB:.Ltmp[0-9]+]]
+; CHECK-NEXT: [[LAB]]:
+; CHECK:      bc 0, 0
+
+; 30
+; CHECK:      brcl 0, [[LAB:.Ltmp[0-9]+]]
+; CHECK-NEXT: [[LAB]]:
+; CHECK:      brcl 0, [[LAB:.Ltmp[0-9]+]]
+; CHECK-NEXT: [[LAB]]:
+; CHECK:      brcl 0, [[LAB:.Ltmp[0-9]+]]
+; CHECK-NEXT: [[LAB]]:
+; CHECK:      brcl 0, [[LAB:.Ltmp[0-9]+]]
+; CHECK-NEXT: [[LAB]]:
+; CHECK:      brcl 0, [[LAB:.Ltmp[0-9]+]]
+; CHECK-NEXT: [[LAB]]:
+
+  tail call void (i64, i32, ...) @llvm.experimental.stackmap(i64  0, i32  0)
+  tail call void (i64, i32, ...) @llvm.experimental.stackmap(i64  2, i32  2)
+  tail call void (i64, i32, ...) @llvm.experimental.stackmap(i64  4, i32  4)
+  tail call void (i64, i32, ...) @llvm.experimental.stackmap(i64  6, i32  6)
+  tail call void (i64, i32, ...) @llvm.experimental.stackmap(i64  8, i32  8)
+  tail call void (i64, i32, ...) @llvm.experimental.stackmap(i64 10, i32 10)
+  tail call void (i64, i32, ...) @llvm.experimental.stackmap(i64 12, i32 12)
+  tail call void (i64, i32, ...) @llvm.experimental.stackmap(i64 14, i32 14)
+  tail call void (i64, i32, ...) @llvm.experimental.stackmap(i64 16, i32 16)
+  tail call void (i64, i32, ...) @llvm.experimental.stackmap(i64 18, i32 18)
+  tail call void (i64, i32, ...) @llvm.experimental.stackmap(i64 20, i32 20)
+  tail call void (i64, i32, ...) @llvm.experimental.stackmap(i64 22, i32 22)
+  tail call void (i64, i32, ...) @llvm.experimental.stackmap(i64 24, i32 24)
+  tail call void (i64, i32, ...) @llvm.experimental.stackmap(i64 26, i32 26)
+  tail call void (i64, i32, ...) @llvm.experimental.stackmap(i64 28, i32 28)
+  tail call void (i64, i32, ...) @llvm.experimental.stackmap(i64 30, i32 30)
+; Add an extra stackmap with a zero-length shadow to thwart the shadow
+; optimization. This will force all bytes of the previous shadow to be
+; padded with nops.
+  tail call void (i64, i32, ...) @llvm.experimental.stackmap(i64 31, i32 0)
+  ret void
+}
+
+declare void @llvm.experimental.stackmap(i64, i32, ...)
diff --git a/llvm/test/CodeGen/SystemZ/stackmap-shadow-optimization.ll b/llvm/test/CodeGen/SystemZ/stackmap-shadow-optimization.ll
new file mode 100644
index 0000000..5b828e7
--- /dev/null
+++ b/llvm/test/CodeGen/SystemZ/stackmap-shadow-optimization.ll
@@ -0,0 +1,27 @@
+; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
+
+; Check that the stackmap shadow optimization is only outputting a 2-byte
+; nop here. 8-bytes are requested, but 6 are covered by the code for the call to
+; bar.  However, the frame teardown and the return do not count towards the
+; stackmap shadow as the call return counts as a branch target so must flush
+; the shadow.
+; Note that in order for a thread to not return in to the patched space
+; the call must be at the end of the shadow, so the required nop must be
+; before the call, not after.
+define void @shadow_optimization_test() {
+entry:
+; CHECK-LABEL:  shadow_optimization_test:
+; CHECK:        brasl %r14, bar@PLT
+; CHECK-NEXT:   .Ltmp
+; CHECK-NEXT:   bcr 0, %r0
+; CHECK-NEXT:   brasl %r14, bar@PLT
+; CHECK-NEXT:   brasl %r14, bar@PLT
+  call void @bar()
+  tail call void (i64, i32, ...) @llvm.experimental.stackmap(i64 0, i32 8)
+  call void @bar()
+  call void @bar()
+  ret void
+}
+declare void @bar()
+
+declare void @llvm.experimental.stackmap(i64, i32, ...)
diff --git a/llvm/test/CodeGen/SystemZ/stackmap.ll b/llvm/test/CodeGen/SystemZ/stackmap.ll
new file mode 100644
index 0000000..bf1a2e3
--- /dev/null
+++ b/llvm/test/CodeGen/SystemZ/stackmap.ll
@@ -0,0 +1,537 @@
+; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
+;
+; Note: Print verbose stackmaps using -debug-only=stackmaps.
+
+; CHECK:       .section .llvm_stackmaps
+; CHECK-NEXT:  __LLVM_StackMaps:
+; Header
+; CHECK-NEXT:   .byte 3
+; CHECK-NEXT:   .byte 0
+; CHECK-NEXT:   .short 0
+; Num Functions
+; CHECK-NEXT:   .long 15
+; Num LargeConstants
+; CHECK-NEXT:   .long 3
+; Num Callsites
+; CHECK-NEXT:   .long 19
+
+; Functions and stack size
+; CHECK-NEXT:   .quad constantargs
+; CHECK-NEXT:   .quad 160
+; CHECK-NEXT:   .quad 1
+; CHECK-NEXT:   .quad osrinline
+; CHECK-NEXT:   .quad 160
+; CHECK-NEXT:   .quad 1
+; CHECK-NEXT:   .quad osrcold
+; CHECK-NEXT:   .quad 160
+; CHECK-NEXT:   .quad 1
+; CHECK-NEXT:   .quad propertyRead
+; CHECK-NEXT:   .quad 160
+; CHECK-NEXT:   .quad 1
+; CHECK-NEXT:   .quad propertyWrite
+; CHECK-NEXT:   .quad 160
+; CHECK-NEXT:   .quad 1
+; CHECK-NEXT:   .quad jsVoidCall
+; CHECK-NEXT:   .quad 160
+; CHECK-NEXT:   .quad 1
+; CHECK-NEXT:   .quad jsIntCall
+; CHECK-NEXT:   .quad 160
+; CHECK-NEXT:   .quad 1
+; CHECK-NEXT:   .quad spilledValue
+; CHECK-NEXT:   .quad 240
+; CHECK-NEXT:   .quad 1
+; CHECK-NEXT:   .quad spilledStackMapValue
+; CHECK-NEXT:   .quad 200
+; CHECK-NEXT:   .quad 1
+; CHECK-NEXT:   .quad spillSubReg
+; CHECK-NEXT:   .quad 168
+; CHECK-NEXT:   .quad 1
+; CHECK-NEXT:   .quad liveConstant
+; CHECK-NEXT:   .quad 160
+; CHECK-NEXT:   .quad 1
+; CHECK-NEXT:   .quad directFrameIdx
+; CHECK-NEXT:   .quad 200
+; CHECK-NEXT:   .quad 2
+; CHECK-NEXT:   .quad longid
+; CHECK-NEXT:   .quad 160
+; CHECK-NEXT:   .quad 4
+; CHECK-NEXT:   .quad clobberScratch
+; CHECK-NEXT:   .quad 168
+; CHECK-NEXT:   .quad 1
+; CHECK-NEXT:   .quad needsStackRealignment
+; CHECK-NEXT:   .quad -1
+; CHECK-NEXT:   .quad 1
+
+; Large Constants
+; CHECK-NEXT:   .quad   2147483648
+; CHECK-NEXT:   .quad   4294967295
+; CHECK-NEXT:   .quad   4294967296
+
+; Callsites
+; Constant arguments
+;
+; CHECK-NEXT:   .quad   1
+; CHECK-NEXT:   .long   .L{{.*}}-constantargs
+; CHECK-NEXT:   .short  0
+; CHECK-NEXT:   .short  12
+; SmallConstant
+; CHECK-NEXT:   .byte   4
+; CHECK-NEXT:   .byte   0
+; CHECK-NEXT:   .short  8
+; CHECK-NEXT:   .short  0
+; CHECK-NEXT:   .short  0
+; CHECK-NEXT:   .long   -1
+; SmallConstant
+; CHECK-NEXT:   .byte   4
+; CHECK-NEXT:   .byte   0
+; CHECK-NEXT:   .short  8
+; CHECK-NEXT:   .short  0
+; CHECK-NEXT:   .short  0
+; CHECK-NEXT:   .long   -1
+; SmallConstant
+; CHECK-NEXT:   .byte   4
+; CHECK-NEXT:   .byte   0
+; CHECK-NEXT:   .short  8
+; CHECK-NEXT:   .short  0
+; CHECK-NEXT:   .short  0
+; CHECK-NEXT:   .long   65536
+; SmallConstant
+; CHECK-NEXT:   .byte   4
+; CHECK-NEXT:   .byte   0
+; CHECK-NEXT:   .short  8
+; CHECK-NEXT:   .short  0
+; CHECK-NEXT:   .short  0
+; CHECK-NEXT:   .long   2000000000
+; SmallConstant
+; CHECK-NEXT:   .byte   4
+; CHECK-NEXT:   .byte   0
+; CHECK-NEXT:   .short  8
+; CHECK-NEXT:   .short  0
+; CHECK-NEXT:   .short  0
+; CHECK-NEXT:   .long   2147483647
+; SmallConstant
+; CHECK-NEXT:   .byte   4
+; CHECK-NEXT:   .byte   0
+; CHECK-NEXT:   .short  8
+; CHECK-NEXT:   .short  0
+; CHECK-NEXT:   .short  0
+; CHECK-NEXT:   .long   -1
+; SmallConstant
+; CHECK-NEXT:   .byte   4
+; CHECK-NEXT:   .byte   0
+; CHECK-NEXT:   .short  8
+; CHECK-NEXT:   .short  0
+; CHECK-NEXT:   .short  0
+; CHECK-NEXT:   .long   -1
+; SmallConstant
+; CHECK-NEXT:   .byte   4
+; CHECK-NEXT:   .byte   0
+; CHECK-NEXT:   .short  8
+; CHECK-NEXT:   .short  0
+; CHECK-NEXT:   .short  0
+; CHECK-NEXT:   .long   0
+; LargeConstant at index 0
+; CHECK-NEXT:   .byte   5
+; CHECK-NEXT:   .byte   0
+; CHECK-NEXT:   .short  8
+; CHECK-NEXT:   .short  0
+; CHECK-NEXT:   .short  0
+; CHECK-NEXT:   .long   0
+; LargeConstant at index 1
+; CHECK-NEXT:   .byte   5
+; CHECK-NEXT:   .byte   0
+; CHECK-NEXT:   .short  8
+; CHECK-NEXT:   .short  0
+; CHECK-NEXT:   .short  0
+; CHECK-NEXT:   .long   1
+; LargeConstant at index 2
+; CHECK-NEXT:   .byte   5
+; CHECK-NEXT:   .byte   0
+; CHECK-NEXT:   .short  8
+; CHECK-NEXT:   .short  0
+; CHECK-NEXT:   .short  0
+; CHECK-NEXT:   .long   2
+; SmallConstant
+; CHECK-NEXT:   .byte   4
+; CHECK-NEXT:   .byte   0
+; CHECK-NEXT:   .short  8
+; CHECK-NEXT:   .short  0
+; CHECK-NEXT:   .short  0
+; CHECK-NEXT:   .long   -1
+
+define void @constantargs() {
+entry:
+  %0 = inttoptr i64 12345 to i8*
+  tail call void (i64, i32, i8*, i32, ...) @llvm.experimental.patchpoint.void(i64 1, i32 14, i8* %0, i32 0, i16 65535, i16 -1, i32 65536, i32 2000000000, i32 2147483647, i32 -1, i32 4294967295, i32 4294967296, i64 2147483648, i64 4294967295, i64 4294967296, i64 -1)
+  ret void
+}
+
+; Inline OSR Exit
+;
+; CHECK:        .long   .L{{.*}}-osrinline
+; CHECK-NEXT:   .short  0
+; CHECK-NEXT:   .short  2
+; CHECK-NEXT:   .byte   1
+; CHECK-NEXT:   .byte   0
+; CHECK-NEXT:   .short  8
+; CHECK-NEXT:   .short  {{[0-9]+}}
+; CHECK-NEXT:   .short  0
+; CHECK-NEXT:   .long   0
+; CHECK-NEXT:   .byte   1
+; CHECK-NEXT:   .byte   0
+; CHECK-NEXT:   .short  8
+; CHECK-NEXT:   .short  {{[0-9]+}}
+; CHECK-NEXT:   .short  0
+; CHECK-NEXT:   .long   0
+define void @osrinline(i64 %a, i64 %b) {
+entry:
+  ; Runtime void->void call.
+  call void inttoptr (i64 -559038737 to void ()*)()
+  ; Followed by inline OSR patchpoint with 12-byte shadow and 2 live vars.
+  call void (i64, i32, ...) @llvm.experimental.stackmap(i64 3, i32 12, i64 %a, i64 %b)
+  ret void
+}
+
+; Cold OSR Exit
+;
+; 2 live variables in register.
+;
+; CHECK:        .long   .L{{.*}}-osrcold
+; CHECK-NEXT:   .short  0
+; CHECK-NEXT:   .short  2
+; CHECK-NEXT:   .byte   1
+; CHECK-NEXT:   .byte   0
+; CHECK-NEXT:   .short  8
+; CHECK-NEXT:   .short  {{[0-9]+}}
+; CHECK-NEXT:   .short  0
+; CHECK-NEXT:   .long   0
+; CHECK-NEXT:   .byte   1
+; CHECK-NEXT:   .byte   0
+; CHECK-NEXT:   .short  8
+; CHECK-NEXT:   .short  {{[0-9]+}}
+; CHECK-NEXT:   .short  0
+; CHECK-NEXT:   .long   0
+define void @osrcold(i64 %a, i64 %b) {
+entry:
+  %test = icmp slt i64 %a, %b
+  br i1 %test, label %ret, label %cold
+cold:
+  ; OSR patchpoint with 12-byte nop-slide and 2 live vars.
+  %thunk = inttoptr i64 -559038737 to i8*
+  call void (i64, i32, i8*, i32, ...) @llvm.experimental.patchpoint.void(i64 4, i32 14, i8* %thunk, i32 0, i64 %a, i64 %b)
+  unreachable
+ret:
+  ret void
+}
+
+; Property Read
+; CHECK:        .long   .L{{.*}}-propertyRead
+; CHECK-NEXT:   .short  0
+; CHECK-NEXT:   .short  2
+; CHECK-NEXT:   .byte   1
+; CHECK-NEXT:   .byte   0
+; CHECK-NEXT:   .short  8
+; CHECK-NEXT:   .short  {{[0-9]+}}
+; CHECK-NEXT:   .short  0
+; CHECK-NEXT:   .long   0
+; CHECK-NEXT:   .byte   1
+; CHECK-NEXT:   .byte   0
+; CHECK-NEXT:   .short  8
+; CHECK-NEXT:   .short  {{[0-9]+}}
+; CHECK-NEXT:   .short  0
+; CHECK-NEXT:   .long   0
+define i64 @propertyRead(i64* %obj) {
+entry:
+  %resolveRead = inttoptr i64 -559038737 to i8*
+  %result = call anyregcc i64 (i64, i32, i8*, i32, ...) @llvm.experimental.patchpoint.i64(i64 5, i32 14, i8* %resolveRead, i32 1, i64* %obj)
+  %add = add i64 %result, 3
+  ret i64 %add
+}
+
+; Property Write
+; CHECK:        .long   .L{{.*}}-propertyWrite
+; CHECK-NEXT:   .short  0
+; CHECK-NEXT:   .short  2
+; CHECK-NEXT:   .byte   1
+; CHECK-NEXT:   .byte   0
+; CHECK-NEXT:   .short  8
+; CHECK-NEXT:   .short  {{[0-9]+}}
+; CHECK-NEXT:   .short  0
+; CHECK-NEXT:   .long   0
+; CHECK-NEXT:   .byte   1
+; CHECK-NEXT:   .byte   0
+; CHECK-NEXT:   .short  8
+; CHECK-NEXT:   .short  {{[0-9]+}}
+; CHECK-NEXT:   .short  0
+; CHECK-NEXT:   .long   0
+define void @propertyWrite(i64 %dummy1, i64* %obj, i64 %dummy2, i64 %a) {
+entry:
+  %resolveWrite = inttoptr i64 -559038737 to i8*
+  call anyregcc void (i64, i32, i8*, i32, ...) @llvm.experimental.patchpoint.void(i64 6, i32 14, i8* %resolveWrite, i32 2, i64* %obj, i64 %a)
+  ret void
+}
+
+; Void JS Call
+;
+; 2 live variables in registers.
+;
+; CHECK:        .long   .L{{.*}}-jsVoidCall
+; CHECK-NEXT:   .short  0
+; CHECK-NEXT:   .short  2
+; CHECK-NEXT:   .byte   1
+; CHECK-NEXT:   .byte   0
+; CHECK-NEXT:   .short  8
+; CHECK-NEXT:   .short  {{[0-9]+}}
+; CHECK-NEXT:   .short  0
+; CHECK-NEXT:   .long   0
+; CHECK-NEXT:   .byte   1
+; CHECK-NEXT:   .byte   0
+; CHECK-NEXT:   .short  8
+; CHECK-NEXT:   .short  {{[0-9]+}}
+; CHECK-NEXT:   .short  0
+; CHECK-NEXT:   .long   0
+define void @jsVoidCall(i64 %dummy1, i64* %obj, i64 %arg, i64 %l1, i64 %l2) {
+entry:
+  %resolveCall = inttoptr i64 -559038737 to i8*
+  call void (i64, i32, i8*, i32, ...) @llvm.experimental.patchpoint.void(i64 7, i32 14, i8* %resolveCall, i32 2, i64* %obj, i64 %arg, i64 %l1, i64 %l2)
+  ret void
+}
+
+; i64 JS Call
+;
+; 2 live variables in registers.
+;
+; CHECK:        .long   .L{{.*}}-jsIntCall
+; CHECK-NEXT:   .short  0
+; CHECK-NEXT:   .short  2
+; CHECK-NEXT:   .byte   1
+; CHECK-NEXT:   .byte   0
+; CHECK-NEXT:   .short  8
+; CHECK-NEXT:   .short  {{[0-9]+}}
+; CHECK-NEXT:   .short  0
+; CHECK-NEXT:   .long   0
+; CHECK-NEXT:   .byte   1
+; CHECK-NEXT:   .byte   0
+; CHECK-NEXT:   .short  8
+; CHECK-NEXT:   .short  {{[0-9]+}}
+; CHECK-NEXT:   .short  0
+; CHECK-NEXT:   .long   0
+define i64 @jsIntCall(i64 %dummy1, i64* %obj, i64 %arg, i64 %l1, i64 %l2) {
+entry:
+  %resolveCall = inttoptr i64 -559038737 to i8*
+  %result = call i64 (i64, i32, i8*, i32, ...) @llvm.experimental.patchpoint.i64(i64 8, i32 14, i8* %resolveCall, i32 2, i64* %obj, i64 %arg, i64 %l1, i64 %l2)
+  %add = add i64 %result, 3
+  ret i64 %add
+}
+
+; Spilled stack map values.
+;
+; Verify 17 stack map entries.
+;
+; CHECK:        .long .L{{.*}}-spilledValue
+; CHECK-NEXT:   .short 0
+; CHECK-NEXT:   .short 17
+;
+; Check that at least one is a spilled entry from the parameter area.
+; Location: Indirect r15 + XX
+; CHECK:        .byte  3
+; CHECK-NEXT:   .byte  0
+; CHECK-NEXT:   .short 8
+; CHECK-NEXT:   .short 15
+; CHECK-NEXT:   .short 0
+; CHECK-NEXT:   .long
+define void @spilledValue(i64 %arg0, i64 %arg1, i64 %arg2, i64 %arg3, i64 %arg4, i64 %l0, i64 %l1, i64 %l2, i64 %l3, i64 %l4, i64 %l5, i64 %l6, i64 %l7, i64 %l8, i64 %l9, i64 %l10, i64 %l11, i64 %l12, i64 %l13, i64 %l14, i64 %l15, i64 %l16) {
+entry:
+  call void (i64, i32, i8*, i32, ...) @llvm.experimental.patchpoint.void(i64 11, i32 14, i8* null, i32 5, i64 %arg0, i64 %arg1, i64 %arg2, i64 %arg3, i64 %arg4, i64 %l0, i64 %l1, i64 %l2, i64 %l3, i64 %l4, i64 %l5, i64 %l6, i64 %l7, i64 %l8, i64 %l9, i64 %l10, i64 %l11, i64 %l12, i64 %l13, i64 %l14, i64 %l15, i64 %l16)
+  ret void
+}
+
+; Spilled stack map values.
+;
+; Verify 17 stack map entries.
+;
+; CHECK:        .long .L{{.*}}-spilledStackMapValue
+; CHECK-NEXT:   .short 0
+; CHECK-NEXT:   .short 17
+;
+; Check that at least one is a spilled entry from the parameter area.
+; Location: Indirect r15 + XX
+; CHECK:        .byte  3
+; CHECK-NEXT:   .byte  0
+; CHECK-NEXT:   .short 8
+; CHECK-NEXT:   .short 15
+; CHECK-NEXT:   .short 0
+; CHECK-NEXT:   .long
+define void @spilledStackMapValue(i64 %l0, i64 %l1, i64 %l2, i64 %l3, i64 %l4, i64 %l5, i64 %l6, i64 %l7, i64 %l8, i64 %l9, i64 %l10, i64 %l11, i64 %l12, i64 %l13, i64 %l14, i64 %l15, i64 %l16) {
+entry:
+  call void (i64, i32, ...) @llvm.experimental.stackmap(i64 12, i32 16, i64 %l0, i64 %l1, i64 %l2, i64 %l3, i64 %l4, i64 %l5, i64 %l6, i64 %l7, i64 %l8, i64 %l9, i64 %l10, i64 %l11, i64 %l12, i64 %l13, i64 %l14, i64 %l15, i64 %l16)
+  ret void
+}
+
+; Spill a subregister stackmap operand.
+;
+; CHECK:        .long .L{{.*}}-spillSubReg
+; CHECK-NEXT:   .short 0
+; 4 locations
+; CHECK-NEXT:   .short 1
+;
+; Check that the subregister operand is a 4-byte spill.
+; Location: Indirect, 4-byte, %r15 + 164
+; CHECK:        .byte  3
+; CHECK-NEXT:   .byte  0
+; CHECK-NEXT:   .short 4
+; CHECK-NEXT:   .short 15
+; CHECK-NEXT:   .short 0
+; CHECK-NEXT:   .long  164
+define void @spillSubReg(i64 %arg) #0 {
+bb:
+  br i1 undef, label %bb1, label %bb2
+
+bb1:
+  unreachable
+
+bb2:
+  %tmp = load i64, i64* inttoptr (i64 140685446136880 to i64*)
+  br i1 undef, label %bb16, label %bb17
+
+bb16:
+  unreachable
+
+bb17:
+  %tmp32 = trunc i64 %tmp to i32
+  br i1 undef, label %bb60, label %bb61
+
+bb60:
+  tail call void asm sideeffect "nopr %r0", "~{r0},~{r1},~{r2},~{r3},~{r4},~{r5},~{r6},~{r7},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14}"() nounwind
+  tail call void (i64, i32, ...) @llvm.experimental.stackmap(i64 13, i32 6, i32 %tmp32)
+  unreachable
+
+bb61:
+  unreachable
+}
+
+; Map a constant value.
+;
+; CHECK:        .long .L{{.*}}-liveConstant
+; CHECK-NEXT:   .short 0
+; 1 location
+; CHECK-NEXT:   .short 1
+; Loc 0: SmallConstant
+; CHECK-NEXT:   .byte   4
+; CHECK-NEXT:   .byte   0
+; CHECK-NEXT:   .short  8
+; CHECK-NEXT:   .short  0
+; CHECK-NEXT:   .short  0
+; CHECK-NEXT:   .long   33
+
+define void @liveConstant() {
+  tail call void (i64, i32, ...) @llvm.experimental.stackmap(i64 15, i32 6, i32 33)
+  ret void
+}
+
+; Directly map an alloca's address.
+;
+; Callsite 16
+; CHECK:        .long .L{{.*}}-directFrameIdx
+; CHECK-NEXT:   .short 0
+; 1 location
+; CHECK-NEXT:   .short	1
+; Loc 0: Direct %r15 + ofs
+; CHECK-NEXT:   .byte	2
+; CHECK-NEXT:   .byte	0
+; CHECK-NEXT:   .short	8
+; CHECK-NEXT:   .short	15
+; CHECK-NEXT:   .short	0
+; CHECK-NEXT:   .long
+
+; Callsite 17
+; CHECK:        .long .L{{.*}}-directFrameIdx
+; CHECK-NEXT:   .short	0
+; 2 locations
+; CHECK-NEXT:   .short	2
+; Loc 0: Direct %r15 + ofs
+; CHECK-NEXT:   .byte	2
+; CHECK-NEXT:   .byte	0
+; CHECK-NEXT:   .short	8
+; CHECK-NEXT:   .short	15
+; CHECK-NEXT:   .short	0
+; CHECK-NEXT:   .long
+; Loc 1: Direct %r15 + ofs
+; CHECK-NEXT:   .byte	2
+; CHECK-NEXT:   .byte   0
+; CHECK-NEXT:   .short  8
+; CHECK-NEXT:   .short	15
+; CHECK-NEXT:   .short  0
+; CHECK-NEXT:   .long
+define void @directFrameIdx() {
+entry:
+  %metadata1 = alloca i64, i32 3, align 8
+  store i64 11, i64* %metadata1
+  store i64 12, i64* %metadata1
+  store i64 13, i64* %metadata1
+  call void (i64, i32, ...) @llvm.experimental.stackmap(i64 16, i32 0, i64* %metadata1)
+  %metadata2 = alloca i8, i32 4, align 8
+  %metadata3 = alloca i16, i32 4, align 8
+  call void (i64, i32, i8*, i32, ...) @llvm.experimental.patchpoint.void(i64 17, i32 6, i8* null, i32 0, i8* %metadata2, i16* %metadata3)
+  ret void
+}
+
+; Test a 64-bit ID.
+;
+; CHECK:        .quad 4294967295
+; CHECK:        .long .L{{.*}}-longid
+; CHECK:        .quad 4294967296
+; CHECK:        .long .L{{.*}}-longid
+; CHECK:        .quad 9223372036854775807
+; CHECK:        .long .L{{.*}}-longid
+; CHECK:        .quad -1
+; CHECK:        .long .L{{.*}}-longid
+define void @longid() {
+entry:
+  tail call void (i64, i32, i8*, i32, ...) @llvm.experimental.patchpoint.void(i64 4294967295, i32 0, i8* null, i32 0)
+  tail call void (i64, i32, i8*, i32, ...) @llvm.experimental.patchpoint.void(i64 4294967296, i32 0, i8* null, i32 0)
+  tail call void (i64, i32, i8*, i32, ...) @llvm.experimental.patchpoint.void(i64 9223372036854775807, i32 0, i8* null, i32 0)
+  tail call void (i64, i32, i8*, i32, ...) @llvm.experimental.patchpoint.void(i64 -1, i32 0, i8* null, i32 0)
+  ret void
+}
+
+; Map a value when %r0 and %r1 are the only free registers.
+; The scratch registers should not be used for a live stackmap value.
+;
+; CHECK:        .long .L{{.*}}-clobberScratch
+; CHECK-NEXT:   .short 0
+; 1 location
+; CHECK-NEXT:   .short 1
+; Loc 0: Indirect %r15 + offset
+; CHECK-NEXT:   .byte   3
+; CHECK-NEXT:   .byte   0
+; CHECK-NEXT:   .short  4
+; CHECK-NEXT:   .short  15
+; CHECK-NEXT:   .short  0
+; CHECK-NEXT:   .long   164
+define void @clobberScratch(i32 %a) {
+  tail call void asm sideeffect "nopr %r0", "~{r2},~{r3},~{r4},~{r5},~{r6},~{r7},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14}"() nounwind
+  tail call void (i64, i32, ...) @llvm.experimental.stackmap(i64 16, i32 8, i32 %a)
+  ret void
+}
+
+; A stack frame which needs to be realigned at runtime (to meet alignment
+; criteria for values on the stack) does not have a fixed frame size.
+; CHECK:        .long .L{{.*}}-needsStackRealignment
+; CHECK-NEXT:   .short 0
+; 0 locations
+; CHECK-NEXT:   .short 0
+define void @needsStackRealignment() {
+  %val = alloca i64, i32 3, align 128
+  tail call void (...) @escape_values(i64* %val)
+; Note: Adding any non-constant to the stackmap would fail because we
+; expected to be able to address off the frame pointer.  In a realigned
+; frame, we must use the stack pointer instead.  This is a separate bug.
+  tail call void (i64, i32, ...) @llvm.experimental.stackmap(i64 0, i32 0)
+  ret void
+}
+declare void @escape_values(...)
+
+declare void @llvm.experimental.stackmap(i64, i32, ...)
+declare void @llvm.experimental.patchpoint.void(i64, i32, i8*, i32, ...)
+declare i64 @llvm.experimental.patchpoint.i64(i64, i32, i8*, i32, ...)