[WebAssembly] Implement thread-local storage (local-exec model)

Summary:
Thread local variables are placed inside a `.tdata` segment. Their symbols are
offsets from the start of the segment. The address of a thread local variable
is computed as `__tls_base` + the offset from the start of the segment.

`.tdata` segment is a passive segment and `memory.init` is used once per thread
to initialize the thread local storage.

`__tls_base` is a wasm global. Since each thread has its own wasm instance,
it is effectively thread local. Currently, `__tls_base` must be initialized
at thread startup, and so cannot be used with dynamic libraries.

`__tls_base` is to be initialized with a new linker-synthesized function,
`__wasm_init_tls`, which takes as an argument a block of memory to use as the
storage for thread locals. It then initializes the block of memory and sets
`__tls_base`. As `__wasm_init_tls` will handle the memory initialization,
the memory does not have to be zeroed.

To help allocating memory for thread-local storage, a new compiler intrinsic
is introduced: `__builtin_wasm_tls_size()`. This instrinsic function returns
the size of the thread-local storage for the current function.

The expected usage is to run something like the following upon thread startup:

    __wasm_init_tls(malloc(__builtin_wasm_tls_size()));

Reviewers: tlively, aheejin, kripken, sbc100

Subscribers: dschuff, jgravelle-google, hiraditya, sunfish, jfb, cfe-commits, llvm-commits

Tags: #clang, #llvm

Differential Revision: https://reviews.llvm.org/D64537

llvm-svn: 366272
diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyISelDAGToDAG.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyISelDAGToDAG.cpp
index bd699d9..1efbb3b 100644
--- a/llvm/lib/Target/WebAssembly/WebAssemblyISelDAGToDAG.cpp
+++ b/llvm/lib/Target/WebAssembly/WebAssemblyISelDAGToDAG.cpp
@@ -15,6 +15,7 @@
 #include "WebAssembly.h"
 #include "WebAssemblyTargetMachine.h"
 #include "llvm/CodeGen/SelectionDAGISel.h"
+#include "llvm/IR/DiagnosticInfo.h"
 #include "llvm/IR/Function.h" // To access function attributes.
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/KnownBits.h"
@@ -171,6 +172,54 @@
     }
   }
 
+  case ISD::GlobalTLSAddress: {
+    const auto *GA = cast<GlobalAddressSDNode>(Node);
+
+    if (!MF.getSubtarget<WebAssemblySubtarget>().hasBulkMemory())
+      report_fatal_error("cannot use thread-local storage without bulk memory",
+                         false);
+
+    if (GA->getGlobal()->getThreadLocalMode() !=
+        GlobalValue::LocalExecTLSModel) {
+      report_fatal_error("only -ftls-model=local-exec is supported for now",
+                         false);
+    }
+
+    MVT PtrVT = TLI->getPointerTy(CurDAG->getDataLayout());
+    assert(PtrVT == MVT::i32 && "only wasm32 is supported for now");
+
+    SDValue TLSBaseSym = CurDAG->getTargetExternalSymbol("__tls_base", PtrVT);
+    SDValue TLSOffsetSym = CurDAG->getTargetGlobalAddress(
+        GA->getGlobal(), DL, PtrVT, GA->getOffset(), 0);
+
+    MachineSDNode *TLSBase = CurDAG->getMachineNode(WebAssembly::GLOBAL_GET_I32,
+                                                    DL, MVT::i32, TLSBaseSym);
+    MachineSDNode *TLSOffset = CurDAG->getMachineNode(
+        WebAssembly::CONST_I32, DL, MVT::i32, TLSOffsetSym);
+    MachineSDNode *TLSAddress =
+        CurDAG->getMachineNode(WebAssembly::ADD_I32, DL, MVT::i32,
+                               SDValue(TLSBase, 0), SDValue(TLSOffset, 0));
+    ReplaceNode(Node, TLSAddress);
+    return;
+  }
+
+  case ISD::INTRINSIC_WO_CHAIN: {
+    unsigned IntNo = cast<ConstantSDNode>(Node->getOperand(0))->getZExtValue();
+    switch (IntNo) {
+    case Intrinsic::wasm_tls_size: {
+      MVT PtrVT = TLI->getPointerTy(CurDAG->getDataLayout());
+      assert(PtrVT == MVT::i32 && "only wasm32 is supported for now");
+
+      MachineSDNode *TLSSize = CurDAG->getMachineNode(
+          WebAssembly::GLOBAL_GET_I32, DL, PtrVT,
+          CurDAG->getTargetExternalSymbol("__tls_size", MVT::i32));
+      ReplaceNode(Node, TLSSize);
+      return;
+    }
+    }
+    break;
+  }
+
   default:
     break;
   }