[WebAssembly] Adding 64-bit versions of __stack_pointer and other globals

We have 6 globals, all of which except for __table_base are 64-bit under wasm64.

Differential Revision: https://reviews.llvm.org/D82130
diff --git a/llvm/test/CodeGen/WebAssembly/userstack.ll b/llvm/test/CodeGen/WebAssembly/userstack.ll
index dd73c2e..dec202e 100644
--- a/llvm/test/CodeGen/WebAssembly/userstack.ll
+++ b/llvm/test/CodeGen/WebAssembly/userstack.ll
@@ -1,18 +1,16 @@
-; RUN: llc < %s -asm-verbose=false -disable-wasm-fallthrough-return-opt -wasm-keep-registers | FileCheck %s
-
-target datalayout = "e-m:e-p:32:32-i64:64-n32:64-S128"
-target triple = "wasm32-unknown-unknown"
+; RUN: llc < %s --mtriple=wasm32-unknown-unknown -asm-verbose=false -disable-wasm-fallthrough-return-opt -wasm-keep-registers | FileCheck -DPTR=32 %s
+; RUN: llc < %s --mtriple=wasm64-unknown-unknown -asm-verbose=false -disable-wasm-fallthrough-return-opt -wasm-keep-registers | FileCheck -DPTR=64 %s
 
 declare void @ext_func(i64* %ptr)
 declare void @ext_func_i32(i32* %ptr)
 
 ; CHECK-LABEL: alloca32:
 ; Check that there is an extra local for the stack pointer.
-; CHECK: .local i32{{$}}
+; CHECK: .local i[[PTR]]{{$}}
 define void @alloca32() noredzone {
  ; CHECK-NEXT: global.get $push[[L2:.+]]=, __stack_pointer{{$}}
- ; CHECK-NEXT: i32.const $push[[L3:.+]]=, 16
- ; CHECK-NEXT: i32.sub $push[[L9:.+]]=, $pop[[L2]], $pop[[L3]]
+ ; CHECK-NEXT: i[[PTR]].const $push[[L3:.+]]=, 16
+ ; CHECK-NEXT: i[[PTR]].sub $push[[L9:.+]]=, $pop[[L2]], $pop[[L3]]
  ; CHECK-NEXT: local.tee $push[[L8:.+]]=, [[SP:.+]], $pop[[L9]]{{$}}
  ; CHECK-NEXT: global.set __stack_pointer, $pop[[L8]]{{$}}
  %retval = alloca i32
@@ -21,18 +19,18 @@
  ; CHECK: i32.store 12($pop[[L4]]), $pop[[L0]]
  store i32 0, i32* %retval
  ; CHECK: local.get $push[[L6:.+]]=, [[SP]]{{$}}
- ; CHECK-NEXT: i32.const $push[[L5:.+]]=, 16
- ; CHECK-NEXT: i32.add $push[[L7:.+]]=, $pop[[L6]], $pop[[L5]]
+ ; CHECK-NEXT: i[[PTR]].const $push[[L5:.+]]=, 16
+ ; CHECK-NEXT: i[[PTR]].add $push[[L7:.+]]=, $pop[[L6]], $pop[[L5]]
  ; CHECK-NEXT: global.set __stack_pointer, $pop[[L7]]
  ret void
 }
 
 ; CHECK-LABEL: alloca3264:
-; CHECK: .local i32{{$}}
+; CHECK: .local i[[PTR]]{{$}}
 define void @alloca3264() {
  ; CHECK: global.get $push[[L3:.+]]=, __stack_pointer{{$}}
- ; CHECK-NEXT: i32.const $push[[L4:.+]]=, 16
- ; CHECK-NEXT: i32.sub $push[[L6:.+]]=, $pop[[L3]], $pop[[L4]]
+ ; CHECK-NEXT: i[[PTR]].const $push[[L4:.+]]=, 16
+ ; CHECK-NEXT: i[[PTR]].sub $push[[L6:.+]]=, $pop[[L3]], $pop[[L4]]
  ; CHECK-NEXT: local.tee $push[[L5:.+]]=, [[SP:.+]], $pop[[L6]]
  %r1 = alloca i32
  %r2 = alloca double
@@ -48,17 +46,17 @@
 }
 
 ; CHECK-LABEL: allocarray:
-; CHECK: .local i32{{$}}
+; CHECK: .local i[[PTR]]{{$}}
 define void @allocarray() {
  ; CHECK-NEXT: global.get $push[[L4:.+]]=, __stack_pointer{{$}}
- ; CHECK-NEXT: i32.const $push[[L5:.+]]=, 144{{$}}
- ; CHECK-NEXT: i32.sub $push[[L12:.+]]=, $pop[[L4]], $pop[[L5]]
+ ; CHECK-NEXT: i[[PTR]].const $push[[L5:.+]]=, 144{{$}}
+ ; CHECK-NEXT: i[[PTR]].sub $push[[L12:.+]]=, $pop[[L4]], $pop[[L5]]
  ; CHECK-NEXT: local.tee $push[[L11:.+]]=, 0, $pop[[L12]]
  ; CHECK-NEXT: global.set __stack_pointer, $pop[[L11]]
  %r = alloca [33 x i32]
 
- ; CHECK:      i32.const $push{{.+}}=, 24
- ; CHECK-NEXT: i32.add $push[[L3:.+]]=, $pop{{.+}}, $pop{{.+}}
+ ; CHECK:      i[[PTR]].const $push{{.+}}=, 24
+ ; CHECK-NEXT: i[[PTR]].add $push[[L3:.+]]=, $pop{{.+}}, $pop{{.+}}
  ; CHECK-NEXT: i32.const $push[[L1:.+]]=, 1{{$}}
  ; CHECK-NEXT: i32.store 0($pop[[L3]]), $pop[[L1]]{{$}}
  ; CHECK-NEXT: local.get $push[[L4:.+]]=, 0{{$}}
@@ -70,16 +68,16 @@
  store i32 1, i32* %p2
 
  ; CHECK-NEXT: local.get $push[[L2:.+]]=, [[SP]]{{$}}
- ; CHECK-NEXT: i32.const $push[[L7:.+]]=, 144
- ; CHECK-NEXT: i32.add $push[[L8:.+]]=, $pop[[L2]], $pop[[L7]]
+ ; CHECK-NEXT: i[[PTR]].const $push[[L7:.+]]=, 144
+ ; CHECK-NEXT: i[[PTR]].add $push[[L8:.+]]=, $pop[[L2]], $pop[[L7]]
  ; CHECK-NEXT: global.set __stack_pointer, $pop[[L8]]
  ret void
 }
 
 ; CHECK-LABEL: non_mem_use
 define void @non_mem_use(i8** %addr) {
- ; CHECK: i32.const $push[[L2:.+]]=, 48
- ; CHECK-NEXT: i32.sub $push[[L12:.+]]=, {{.+}}, $pop[[L2]]
+ ; CHECK: i[[PTR]].const $push[[L2:.+]]=, 48
+ ; CHECK-NEXT: i[[PTR]].sub $push[[L12:.+]]=, {{.+}}, $pop[[L2]]
  ; CHECK-NEXT: local.tee $push[[L11:.+]]=, [[SP:.+]], $pop[[L12]]
  ; CHECK-NEXT: global.set __stack_pointer, $pop[[L11]]
  %buf = alloca [27 x i8], align 16
@@ -87,8 +85,8 @@
  %r2 = alloca i64
  ; %r is at SP+8
  ; CHECK: local.get $push[[L3:.+]]=, [[SP]]
- ; CHECK: i32.const $push[[OFF:.+]]=, 8
- ; CHECK-NEXT: i32.add $push[[ARG1:.+]]=, $pop[[L3]], $pop[[OFF]]
+ ; CHECK: i[[PTR]].const $push[[OFF:.+]]=, 8
+ ; CHECK-NEXT: i[[PTR]].add $push[[ARG1:.+]]=, $pop[[L3]], $pop[[OFF]]
  ; CHECK-NEXT: call ext_func, $pop[[ARG1]]
  call void @ext_func(i64* %r)
  ; %r2 is at SP+0, no add needed
@@ -98,20 +96,20 @@
  ; Use as a value, but in a store
  ; %buf is at SP+16
  ; CHECK: local.get $push[[L5:.+]]=, [[SP]]
- ; CHECK: i32.const $push[[OFF:.+]]=, 16
- ; CHECK-NEXT: i32.add $push[[VAL:.+]]=, $pop[[L5]], $pop[[OFF]]
- ; CHECK-NEXT: i32.store 0($pop{{.+}}), $pop[[VAL]]
+ ; CHECK: i[[PTR]].const $push[[OFF:.+]]=, 16
+ ; CHECK-NEXT: i[[PTR]].add $push[[VAL:.+]]=, $pop[[L5]], $pop[[OFF]]
+ ; CHECK-NEXT: i[[PTR]].store 0($pop{{.+}}), $pop[[VAL]]
  %gep = getelementptr inbounds [27 x i8], [27 x i8]* %buf, i32 0, i32 0
  store i8* %gep, i8** %addr
  ret void
 }
 
 ; CHECK-LABEL: allocarray_inbounds:
-; CHECK: .local i32{{$}}
+; CHECK: .local i[[PTR]]{{$}}
 define void @allocarray_inbounds() {
  ; CHECK: global.get $push[[L3:.+]]=, __stack_pointer{{$}}
- ; CHECK-NEXT: i32.const $push[[L4:.+]]=, 32{{$}}
- ; CHECK-NEXT: i32.sub $push[[L11:.+]]=, $pop[[L3]], $pop[[L4]]
+ ; CHECK-NEXT: i[[PTR]].const $push[[L4:.+]]=, 32{{$}}
+ ; CHECK-NEXT: i[[PTR]].sub $push[[L11:.+]]=, $pop[[L3]], $pop[[L4]]
  ; CHECK-NEXT: local.tee $push[[L10:.+]]=, [[SP:.+]], $pop[[L11]]
  ; CHECK-NEXT: global.set __stack_pointer, $pop[[L10]]{{$}}
  %r = alloca [5 x i32]
@@ -125,8 +123,8 @@
  store i32 1, i32* %p2
  call void @ext_func(i64* null);
  ; CHECK: call ext_func
- ; CHECK: i32.const $push[[L5:.+]]=, 32{{$}}
- ; CHECK-NEXT: i32.add $push[[L7:.+]]=, ${{.+}}, $pop[[L5]]
+ ; CHECK: i[[PTR]].const $push[[L5:.+]]=, 32{{$}}
+ ; CHECK-NEXT: i[[PTR]].add $push[[L7:.+]]=, ${{.+}}, $pop[[L5]]
  ; CHECK-NEXT: global.set __stack_pointer, $pop[[L7]]
  ret void
 }
@@ -136,7 +134,7 @@
  ; CHECK: global.get $push[[L13:.+]]=, __stack_pointer{{$}}
  ; CHECK-NEXT: local.tee $push[[L12:.+]]=, [[SP:.+]], $pop[[L13]]{{$}}
  ; Target independent codegen bumps the stack pointer.
- ; CHECK: i32.sub
+ ; CHECK: i[[PTR]].sub
  ; Check that SP is written back to memory after decrement
  ; CHECK: global.set __stack_pointer,
  %r = alloca i32, i32 %alloc
@@ -152,12 +150,12 @@
  ; CHECK: global.get $push[[L13:.+]]=, __stack_pointer{{$}}
  ; CHECK-NEXT: local.tee $push[[L12:.+]]=, [[SP:.+]], $pop[[L13]]{{$}}
  ; Target independent codegen bumps the stack pointer
- ; CHECK: i32.sub
+ ; CHECK: i[[PTR]].sub
  %r = alloca i32, i32 %alloc
- ; CHECK-NEXT: local.tee       $push[[L8:.+]]=, {{.+}}, $pop
- ; CHECK: local.get $push[[L7:.+]]=, 0{{$}}
- ; CHECK-NEXT: i32.const       $push[[L6:.+]]=, 0{{$}}
- ; CHECK-NEXT: i32.store       0($pop[[L7]]), $pop[[L6]]{{$}}
+ ; CHECK-NEXT: local.tee $push[[L8:.+]]=, [[SP2:.+]], $pop
+ ; CHECK: local.get $push[[L7:.+]]=, [[SP2]]{{$}}
+ ; CHECK-NEXT: i32.const $push[[L6:.+]]=, 0{{$}}
+ ; CHECK-NEXT: i32.store 0($pop[[L7]]), $pop[[L6]]{{$}}
  store i32 0, i32* %r
  ; CHECK-NEXT: return
  ret void
@@ -167,8 +165,8 @@
 define void @dynamic_static_alloca(i32 %alloc) noredzone {
  ; Decrement SP in the prolog by the static amount and writeback to memory.
  ; CHECK: global.get $push[[L11:.+]]=, __stack_pointer{{$}}
- ; CHECK-NEXT: i32.const $push[[L12:.+]]=, 16
- ; CHECK-NEXT: i32.sub $push[[L23:.+]]=, $pop[[L11]], $pop[[L12]]
+ ; CHECK-NEXT: i[[PTR]].const $push[[L12:.+]]=, 16
+ ; CHECK-NEXT: i[[PTR]].sub $push[[L23:.+]]=, $pop[[L11]], $pop[[L12]]
  ; CHECK-NEXT: local.tee $push[[L22:.+]]=, [[SP:.+]], $pop[[L23]]
  ; CHECK-NEXT: global.set __stack_pointer, $pop[[L22]]
 
@@ -181,7 +179,7 @@
  store volatile i32 101, i32* %static
 
  ; Decrement SP in the body by the dynamic amount.
- ; CHECK: i32.sub
+ ; CHECK: i[[PTR]].sub
  ; CHECK: local.tee $push[[L16:.+]]=, [[dynamic_local:.+]], $pop{{.+}}
  ; CHECK: local.tee $push[[L15:.+]]=, [[other:.+]], $pop[[L16]]{{$}}
  ; CHECK: global.set __stack_pointer, $pop[[L15]]{{$}}
@@ -201,7 +199,7 @@
  store volatile i32 103, i32* %dynamic
 
  ; Decrement SP in the body by the dynamic amount.
- ; CHECK: i32.sub
+ ; CHECK: i[[PTR]].sub
  ; CHECK: local.tee $push{{.+}}=, [[dynamic2_local:.+]], $pop{{.+}}
  %dynamic.2 = alloca i32, i32 %alloc
 
@@ -224,8 +222,8 @@
 
  ; Writeback to memory.
  ; CHECK: local.get $push[[L24:.+]]=, [[FP]]{{$}}
- ; CHECK: i32.const $push[[L18:.+]]=, 16
- ; CHECK-NEXT: i32.add $push[[L19:.+]]=, $pop[[L24]], $pop[[L18]]
+ ; CHECK: i[[PTR]].const $push[[L18:.+]]=, 16
+ ; CHECK-NEXT: i[[PTR]].add $push[[L19:.+]]=, $pop[[L24]], $pop[[L18]]
  ; CHECK-NEXT: global.set __stack_pointer, $pop[[L19]]
  ret void
 }
@@ -273,11 +271,11 @@
 ; CHECK-LABEL: copytoreg_fi:
 define void @copytoreg_fi(i1 %cond, i32* %b) {
 entry:
- ; CHECK: i32.const $push[[L1:.+]]=, 16
- ; CHECK-NEXT: i32.sub $push[[L3:.+]]=, {{.+}}, $pop[[L1]]
+ ; CHECK: i[[PTR]].const $push[[L1:.+]]=, 16
+ ; CHECK-NEXT: i[[PTR]].sub $push[[L3:.+]]=, {{.+}}, $pop[[L1]]
  %addr = alloca i32
- ; CHECK: i32.const $push[[OFF:.+]]=, 12
- ; CHECK-NEXT: i32.add $push[[ADDR:.+]]=, $pop[[L3]], $pop[[OFF]]
+ ; CHECK: i[[PTR]].const $push[[OFF:.+]]=, 12
+ ; CHECK-NEXT: i[[PTR]].add $push[[ADDR:.+]]=, $pop[[L3]], $pop[[OFF]]
  ; CHECK-NEXT: local.set [[COPY:.+]], $pop[[ADDR]]
  br label %body
 body:
@@ -309,7 +307,7 @@
 ; Test __builtin_frame_address(1).
 
 ; CHECK-LABEL: frameaddress_1:
-; CHECK:      i32.const $push0=, 0{{$}}
+; CHECK:      i[[PTR]].const $push0=, 0{{$}}
 ; CHECK-NEXT: call use_i8_star, $pop0{{$}}
 ; CHECK-NEXT: return{{$}}
 define void @frameaddress_1() {
@@ -330,6 +328,6 @@
   ret void
 }
 
-; CHECK: .globaltype	__stack_pointer, i32{{$}}
+; CHECK: .globaltype	__stack_pointer, i[[PTR]]{{$}}
 
 ; TODO: test over-aligned alloca