[WebAssembly] Initialize memory in start function

Summary:
 - `__wasm_init_memory` is now the WebAssembly start function instead
   of being called from `__wasm_call_ctors` or called directly by the
   runtime.
 - Adds a new synthetic data symbol `__wasm_init_memory_flag` that is
   atomically incremented from zero to one by the thread responsible
   for initializing memory.
 - All threads now unconditionally perform data.drop on all passive
   segments.
 - Removes --passive-segments and --active-segments flags and controls
   segment type based on --shared-memory instead. The deleted flags
   were only present to ameliorate the upgrade path in Emscripten.

Reviewers: sbc100, aheejin

Subscribers: dschuff, jgravelle-google, sunfish, jfb, llvm-commits

Tags: #llvm

Differential Revision: https://reviews.llvm.org/D65783

llvm-svn: 370965
diff --git a/lld/wasm/Writer.cpp b/lld/wasm/Writer.cpp
index ba71da8..58d585b 100644
--- a/lld/wasm/Writer.cpp
+++ b/lld/wasm/Writer.cpp
@@ -254,6 +254,15 @@
     }
   }
 
+  // Make space for the memory initialization flag
+  if (WasmSym::initMemoryFlag) {
+    memoryPtr = alignTo(memoryPtr, 4);
+    WasmSym::initMemoryFlag->setVirtualAddress(memoryPtr);
+    log(formatv("mem: {0,-15} offset={1,-8} size={2,-8} align={3}",
+                "__wasm_init_memory_flag", memoryPtr, 4, 4));
+    memoryPtr += 4;
+  }
+
   // TODO: Add .bss space here.
   if (WasmSym::dataEnd)
     WasmSym::dataEnd->setVirtualAddress(memoryPtr);
@@ -333,6 +342,7 @@
   addSection(out.globalSec);
   addSection(out.eventSec);
   addSection(out.exportSec);
+  addSection(out.startSec);
   addSection(out.elemSec);
   addSection(out.dataCountSec);
 
@@ -363,15 +373,15 @@
   StringMap<std::string> used;
   StringMap<std::string> required;
   StringMap<std::string> disallowed;
+  SmallSet<std::string, 8> &allowed = out.targetFeaturesSec->features;
   bool tlsUsed = false;
 
   // Only infer used features if user did not specify features
   bool inferFeatures = !config->features.hasValue();
 
   if (!inferFeatures) {
-    for (auto &feature : config->features.getValue())
-      out.targetFeaturesSec->features.insert(feature);
-    // No need to read or check features
+    auto &explicitFeatures = config->features.getValue();
+    allowed.insert(explicitFeatures.begin(), explicitFeatures.end());
     if (!config->checkFeatures)
       return;
   }
@@ -397,21 +407,20 @@
       }
     }
 
-    for (InputSegment *segment : file->segments) {
-      if (!segment->live)
-        continue;
+    // Find TLS data segments
+    auto isTLS = [](InputSegment *segment) {
       StringRef name = segment->getName();
-      if (name.startswith(".tdata") || name.startswith(".tbss"))
-        tlsUsed = true;
-    }
+      return segment->live &&
+             (name.startswith(".tdata") || name.startswith(".tbss"));
+    };
+    tlsUsed = tlsUsed ||
+              std::any_of(file->segments.begin(), file->segments.end(), isTLS);
   }
 
   if (inferFeatures)
-    out.targetFeaturesSec->features.insert(used.keys().begin(),
-                                           used.keys().end());
+    allowed.insert(used.keys().begin(), used.keys().end());
 
-  if (out.targetFeaturesSec->features.count("atomics") &&
-      !config->sharedMemory) {
+  if (allowed.count("atomics") && !config->sharedMemory) {
     if (inferFeatures)
       error(Twine("'atomics' feature is used by ") + used["atomics"] +
             ", so --shared-memory must be used");
@@ -426,18 +435,22 @@
     error("'atomics' feature is disallowed by " + disallowed["atomics"] +
           ", so --shared-memory must not be used");
 
-  if (!used.count("bulk-memory") && config->passiveSegments)
-    error("'bulk-memory' feature must be used in order to emit passive "
-          "segments");
+  if (!allowed.count("atomics") && config->sharedMemory)
+    error("'atomics' feature must be used in order to use shared "
+          "memory");
 
-  if (!used.count("bulk-memory") && tlsUsed)
+  if (!allowed.count("bulk-memory") && config->sharedMemory)
+    error("'bulk-memory' feature must be used in order to use shared "
+          "memory");
+
+  if (!allowed.count("bulk-memory") && tlsUsed)
     error("'bulk-memory' feature must be used in order to use thread-local "
           "storage");
 
   // Validate that used features are allowed in output
   if (!inferFeatures) {
     for (auto &feature : used.keys()) {
-      if (!out.targetFeaturesSec->features.count(feature))
+      if (!allowed.count(feature))
         error(Twine("Target feature '") + feature + "' used by " +
               used[feature] + " is not allowed.");
     }
@@ -655,7 +668,7 @@
       if (s == nullptr) {
         LLVM_DEBUG(dbgs() << "new segment: " << name << "\n");
         s = make<OutputSegment>(name, segments.size());
-        if (config->passiveSegments || name == ".tdata")
+        if (config->sharedMemory || name == ".tdata")
           s->initFlags = WASM_SEGMENT_IS_PASSIVE;
         segments.push_back(s);
       }
@@ -678,32 +691,119 @@
 
 void Writer::createInitMemoryFunction() {
   LLVM_DEBUG(dbgs() << "createInitMemoryFunction\n");
+  assert(WasmSym::initMemoryFlag);
+  uint32_t flagAddress = WasmSym::initMemoryFlag->getVirtualAddress();
   std::string bodyContent;
   {
     raw_string_ostream os(bodyContent);
     writeUleb128(os, 0, "num locals");
 
-    // initialize passive data segments
-    for (const OutputSegment *s : segments) {
-      if (s->initFlags & WASM_SEGMENT_IS_PASSIVE && s->name != ".tdata") {
-        // destination address
-        writeU8(os, WASM_OPCODE_I32_CONST, "i32.const");
-        writeSleb128(os, s->startVA, "destination address");
-        // source segment offset
-        writeU8(os, WASM_OPCODE_I32_CONST, "i32.const");
-        writeSleb128(os, 0, "segment offset");
-        // memory region size
-        writeU8(os, WASM_OPCODE_I32_CONST, "i32.const");
-        writeSleb128(os, s->size, "memory region size");
-        // memory.init instruction
-        writeU8(os, WASM_OPCODE_MISC_PREFIX, "bulk-memory prefix");
-        writeUleb128(os, WASM_OPCODE_MEMORY_INIT, "MEMORY.INIT");
-        writeUleb128(os, s->index, "segment index immediate");
-        writeU8(os, 0, "memory index immediate");
-        // data.drop instruction
-        writeU8(os, WASM_OPCODE_MISC_PREFIX, "bulk-memory prefix");
-        writeUleb128(os, WASM_OPCODE_DATA_DROP, "DATA.DROP");
-        writeUleb128(os, s->index, "segment index immediate");
+    if (segments.size()) {
+      // Initialize memory in a thread-safe manner. The thread that successfully
+      // increments the flag from 0 to 1 is is responsible for performing the
+      // memory initialization. Other threads go sleep on the flag until the
+      // first thread finishing initializing memory, increments the flag to 2,
+      // and wakes all the other threads. Once the flag has been set to 2,
+      // subsequently started threads will skip the sleep. All threads
+      // unconditionally drop their passive data segments once memory has been
+      // initialized. The generated code is as follows:
+      //
+      // (func $__wasm_init_memory
+      //  (if
+      //   (i32.atomic.rmw.cmpxchg align=2 offset=0
+      //    (i32.const $__init_memory_flag)
+      //    (i32.const 0)
+      //    (i32.const 1)
+      //   )
+      //   (then
+      //    (drop
+      //     (i32.atomic.wait align=2 offset=0
+      //      (i32.const $__init_memory_flag)
+      //      (i32.const 1)
+      //      (i32.const -1)
+      //     )
+      //    )
+      //   )
+      //   (else
+      //    ( ... initialize data segments ... )
+      //    (i32.atomic.store align=2 offset=0
+      //     (i32.const $__init_memory_flag)
+      //     (i32.const 2)
+      //    )
+      //    (drop
+      //     (i32.atomic.notify align=2 offset=0
+      //      (i32.const $__init_memory_flag)
+      //      (i32.const -1u)
+      //     )
+      //    )
+      //   )
+      //  )
+      //  ( ... drop data segments ... )
+      // )
+
+      // Atomically check whether this is the main thread.
+      writeI32Const(os, flagAddress, "flag address");
+      writeI32Const(os, 0, "expected flag value");
+      writeI32Const(os, 1, "flag value");
+      writeU8(os, WASM_OPCODE_ATOMICS_PREFIX, "atomics prefix");
+      writeUleb128(os, WASM_OPCODE_I32_RMW_CMPXCHG, "i32.atomic.rmw.cmpxchg");
+      writeMemArg(os, 2, 0);
+      writeU8(os, WASM_OPCODE_IF, "IF");
+      writeU8(os, WASM_TYPE_NORESULT, "blocktype");
+
+      // Did not increment 0, so wait for main thread to initialize memory
+      writeI32Const(os, flagAddress, "flag address");
+      writeI32Const(os, 1, "expected flag value");
+      writeI64Const(os, -1, "timeout");
+      writeU8(os, WASM_OPCODE_ATOMICS_PREFIX, "atomics prefix");
+      writeUleb128(os, WASM_OPCODE_I32_ATOMIC_WAIT, "i32.atomic.wait");
+      writeMemArg(os, 2, 0);
+      writeU8(os, WASM_OPCODE_DROP, "drop");
+
+      writeU8(os, WASM_OPCODE_ELSE, "ELSE");
+
+      // Did increment 0, so conditionally initialize passive data segments
+      for (const OutputSegment *s : segments) {
+        if (s->initFlags & WASM_SEGMENT_IS_PASSIVE && s->name != ".tdata") {
+          // destination address
+          writeI32Const(os, s->startVA, "destination address");
+          // source segment offset
+          writeI32Const(os, 0, "segment offset");
+          // memory region size
+          writeI32Const(os, s->size, "memory region size");
+          // memory.init instruction
+          writeU8(os, WASM_OPCODE_MISC_PREFIX, "bulk-memory prefix");
+          writeUleb128(os, WASM_OPCODE_MEMORY_INIT, "memory.init");
+          writeUleb128(os, s->index, "segment index immediate");
+          writeU8(os, 0, "memory index immediate");
+        }
+      }
+
+      // Set flag to 2 to mark end of initialization
+      writeI32Const(os, flagAddress, "flag address");
+      writeI32Const(os, 2, "flag value");
+      writeU8(os, WASM_OPCODE_ATOMICS_PREFIX, "atomics prefix");
+      writeUleb128(os, WASM_OPCODE_I32_ATOMIC_STORE, "i32.atomic.store");
+      writeMemArg(os, 2, 0);
+
+      // Notify any waiters that memory initialization is complete
+      writeI32Const(os, flagAddress, "flag address");
+      writeI32Const(os, -1, "number of waiters");
+      writeU8(os, WASM_OPCODE_ATOMICS_PREFIX, "atomics prefix");
+      writeUleb128(os, WASM_OPCODE_ATOMIC_NOTIFY, "atomic.notify");
+      writeMemArg(os, 2, 0);
+      writeU8(os, WASM_OPCODE_DROP, "drop");
+
+      writeU8(os, WASM_OPCODE_END, "END");
+
+      // Unconditionally drop passive data segments
+      for (const OutputSegment *s : segments) {
+        if (s->initFlags & WASM_SEGMENT_IS_PASSIVE && s->name != ".tdata") {
+          // data.drop instruction
+          writeU8(os, WASM_OPCODE_MISC_PREFIX, "bulk-memory prefix");
+          writeUleb128(os, WASM_OPCODE_DATA_DROP, "data.drop");
+          writeUleb128(os, s->index, "segment index immediate");
+        }
       }
     }
     writeU8(os, WASM_OPCODE_END, "END");
@@ -744,12 +844,6 @@
     raw_string_ostream os(bodyContent);
     writeUleb128(os, 0, "num locals");
 
-    if (config->passiveSegments) {
-      writeU8(os, WASM_OPCODE_CALL, "CALL");
-      writeUleb128(os, WasmSym::initMemory->getFunctionIndex(),
-                   "function index");
-    }
-
     if (config->isPic) {
       writeU8(os, WASM_OPCODE_CALL, "CALL");
       writeUleb128(os, WasmSym::applyRelocs->getFunctionIndex(),
@@ -794,11 +888,9 @@
       writeU8(os, WASM_OPCODE_LOCAL_GET, "local.get");
       writeUleb128(os, 0, "local index");
 
-      writeU8(os, WASM_OPCODE_I32_CONST, "i32.const");
-      writeSleb128(os, 0, "segment offset");
+      writeI32Const(os, 0, "segment offset");
 
-      writeU8(os, WASM_OPCODE_I32_CONST, "i32.const");
-      writeSleb128(os, tlsSeg->size, "memory region size");
+      writeI32Const(os, tlsSeg->size, "memory region size");
 
       writeU8(os, WASM_OPCODE_MISC_PREFIX, "bulk-memory prefix");
       writeUleb128(os, WASM_OPCODE_MEMORY_INIT, "MEMORY.INIT");
@@ -851,6 +943,7 @@
   out.globalSec = make<GlobalSection>();
   out.eventSec = make<EventSection>();
   out.exportSec = make<ExportSection>();
+  out.startSec = make<StartSection>(segments.size());
   out.elemSec = make<ElemSection>();
   out.dataCountSec = make<DataCountSection>(segments.size());
   out.linkingSec = make<LinkingSection>(initFunctions, segments);
@@ -900,7 +993,7 @@
 
   if (!config->relocatable) {
     // Create linker synthesized functions
-    if (config->passiveSegments)
+    if (config->sharedMemory)
       createInitMemoryFunction();
     if (config->isPic)
       createApplyRelocationsFunction();