Subzero: Partial implementation of global initializers.

This is still missing a couple things:

1. It only supports flat arrays and zeroinitializers.  Arrays of structs are not yet supported.

2. Initializers can't yet contain relocatables, e.g. the address of another global.Mod

Some changes are made to work around an llvm-mc assembler bug.  When assembling using intel syntax, llvm-mc doesn't correctly parse symbolic constants or add relocation entries in some circumstances.  Call instructions work, and use in a memory operand works, e.g. mov eax, [ArrayBase+4*ecx].  To work around this, we adjust legalize() to not allow ConstantRelocatable by default, except for memory operands and when called from lowerCall(), so the relocatable ends up being the source operand of a mov instruction.  Then, the mov emit routine actually emits an lea instruction for such moves.

A few lit tests needed to be adjusted to make szdiff work properly with respect to global initializers.

In the new cross test, the driver calls test code that returns a pointer to an array with a global initializer, and the driver compares the arrays returned by llc and Subzero.

BUG= none
R=jvoung@chromium.org

Review URL: https://codereview.chromium.org/358013003
diff --git a/src/IceInstX8632.cpp b/src/IceInstX8632.cpp
index 376d454..b983c3e 100644
--- a/src/IceInstX8632.cpp
+++ b/src/IceInstX8632.cpp
@@ -655,8 +655,21 @@
 void InstX8632Mov::emit(const Cfg *Func) const {
   Ostream &Str = Func->getContext()->getStrEmit();
   assert(getSrcSize() == 1);
-  Str << "\tmov" << TypeX8632Attributes[getDest()->getType()].SdSsString
-      << "\t";
+  Operand *Src = getSrc(0);
+  // The llvm-mc assembler using Intel syntax has a bug in which "mov
+  // reg, RelocatableConstant" does not generate the right instruction
+  // with a relocation.  To work around, we emit "lea reg,
+  // [RelocatableConstant]".  Also, the lowering and legalization is
+  // changed to allow relocatable constants only in Assign and Call
+  // instructions or in Mem operands.  TODO(stichnot): remove LEAHACK
+  // once a proper emitter is used.
+  bool UseLeaHack = llvm::isa<ConstantRelocatable>(Src);
+  Str << "\t";
+  if (UseLeaHack)
+    Str << "lea";
+  else
+    Str << "mov" << TypeX8632Attributes[getDest()->getType()].SdSsString;
+  Str << "\t";
   // For an integer truncation operation, src is wider than dest.
   // Ideally, we use a mov instruction whose data width matches the
   // narrower dest.  This is a problem if e.g. src is a register like
@@ -665,10 +678,10 @@
   // for stack-allocated dest variables because typeWidthOnStack()
   // pads to a 4-byte boundary even if only a lower portion is used.
   assert(Func->getTarget()->typeWidthInBytesOnStack(getDest()->getType()) ==
-         Func->getTarget()->typeWidthInBytesOnStack(getSrc(0)->getType()));
-  getDest()->asType(getSrc(0)->getType()).emit(Func);
+         Func->getTarget()->typeWidthInBytesOnStack(Src->getType()));
+  getDest()->asType(Src->getType()).emit(Func);
   Str << ", ";
-  getSrc(0)->emit(Func);
+  Src->emit(Func);
   Str << "\n";
 }
 
diff --git a/src/IceTargetLowering.cpp b/src/IceTargetLowering.cpp
index 877f717..d29506e 100644
--- a/src/IceTargetLowering.cpp
+++ b/src/IceTargetLowering.cpp
@@ -174,4 +174,23 @@
   LinearScan.scan(RegMask);
 }
 
+TargetGlobalInitLowering *
+TargetGlobalInitLowering::createLowering(TargetArch Target,
+                                         GlobalContext *Ctx) {
+  // These statements can be #ifdef'd to specialize the code generator
+  // to a subset of the available targets.  TODO: use CRTP.
+  if (Target == Target_X8632)
+    return TargetGlobalInitX8632::create(Ctx);
+#if 0
+  if (Target == Target_X8664)
+    return IceTargetGlobalInitX8664::create(Ctx);
+  if (Target == Target_ARM32)
+    return IceTargetGlobalInitARM32::create(Ctx);
+  if (Target == Target_ARM64)
+    return IceTargetGlobalInitARM64::create(Ctx);
+#endif
+  llvm_unreachable("Unsupported target");
+  return NULL;
+}
+
 } // end of namespace Ice
diff --git a/src/IceTargetLowering.h b/src/IceTargetLowering.h
index dbb9a42..ddb66fa 100644
--- a/src/IceTargetLowering.h
+++ b/src/IceTargetLowering.h
@@ -199,6 +199,29 @@
   TargetLowering &operator=(const TargetLowering &) LLVM_DELETED_FUNCTION;
 };
 
+// TargetGlobalInitLowering is used for "lowering" global
+// initializers.  It is separated out from TargetLowering because it
+// does not require a Cfg.
+class TargetGlobalInitLowering {
+public:
+  static TargetGlobalInitLowering *createLowering(TargetArch Target,
+                                                  GlobalContext *Ctx);
+  // TODO: Allow relocations to be represented as part of the Data.
+  virtual void lower(const IceString &Name, SizeT Align, bool IsInternal,
+                     bool IsConst, bool IsZeroInitializer, SizeT Size,
+                     const char *Data, bool DisableTranslation) = 0;
+
+protected:
+  TargetGlobalInitLowering(GlobalContext *Ctx) : Ctx(Ctx) {}
+  GlobalContext *Ctx;
+
+private:
+  TargetGlobalInitLowering(const TargetGlobalInitLowering &)
+  LLVM_DELETED_FUNCTION;
+  TargetGlobalInitLowering &
+  operator=(const TargetGlobalInitLowering &) LLVM_DELETED_FUNCTION;
+};
+
 } // end of namespace Ice
 
 #endif // SUBZERO_SRC_ICETARGETLOWERING_H
diff --git a/src/IceTargetLoweringX8632.cpp b/src/IceTargetLoweringX8632.cpp
index ef9bc22..fec2d62 100644
--- a/src/IceTargetLoweringX8632.cpp
+++ b/src/IceTargetLoweringX8632.cpp
@@ -1319,7 +1319,9 @@
       break;
     }
   }
-  Operand *CallTarget = legalize(Instr->getCallTarget());
+  // TODO(stichnot): LEAHACK: remove Legal_All (and use default) once
+  // a proper emitter is used.
+  Operand *CallTarget = legalize(Instr->getCallTarget(), Legal_All);
   Inst *NewCall = InstX8632Call::create(Func, eax, CallTarget);
   Context.insert(NewCall);
   if (edx)
@@ -2445,11 +2447,19 @@
       // need to go in uninitialized registers.
       From = Ctx->getConstantZero(From->getType());
     }
-    bool NeedsReg =
-        !(Allowed & Legal_Imm) ||
-        // ConstantFloat and ConstantDouble are actually memory operands.
-        (!(Allowed & Legal_Mem) &&
-         (From->getType() == IceType_f32 || From->getType() == IceType_f64));
+    bool NeedsReg = false;
+    if (!(Allowed & Legal_Imm))
+      // Immediate specifically not allowed
+      NeedsReg = true;
+    // TODO(stichnot): LEAHACK: remove Legal_Reloc once a proper
+    // emitter is used.
+    if (!(Allowed & Legal_Reloc) && llvm::isa<ConstantRelocatable>(From))
+      // Relocatable specifically not allowed
+      NeedsReg = true;
+    if (!(Allowed & Legal_Mem) &&
+        (From->getType() == IceType_f32 || From->getType() == IceType_f64))
+      // On x86, FP constants are lowered to mem operands.
+      NeedsReg = true;
     if (NeedsReg) {
       Variable *Reg = makeReg(From->getType(), RegNum);
       _mov(Reg, From);
@@ -2581,4 +2591,100 @@
   Str << "qword ptr [L$" << IceType_f64 << "$" << getPoolEntryID() << "]";
 }
 
+TargetGlobalInitX8632::TargetGlobalInitX8632(GlobalContext *Ctx)
+    : TargetGlobalInitLowering(Ctx) {}
+
+namespace {
+char hexdigit(unsigned X) { return X < 10 ? '0' + X : 'A' + X - 10; }
+}
+
+void TargetGlobalInitX8632::lower(const IceString &Name, SizeT Align,
+                                  bool IsInternal, bool IsConst,
+                                  bool IsZeroInitializer, SizeT Size,
+                                  const char *Data, bool DisableTranslation) {
+  if (Ctx->isVerbose()) {
+    // TODO: Consider moving the dump output into the driver to be
+    // reused for all targets.
+    Ostream &Str = Ctx->getStrDump();
+    Str << "@" << Name << " = " << (IsInternal ? "internal" : "external");
+    Str << (IsConst ? " constant" : " global");
+    Str << " [" << Size << " x i8] ";
+    if (IsZeroInitializer) {
+      Str << "zeroinitializer";
+    } else {
+      Str << "c\"";
+      // Code taken from PrintEscapedString() in AsmWriter.cpp.  Keep
+      // the strings in the same format as the .ll file for practical
+      // diffing.
+      for (uint64_t i = 0; i < Size; ++i) {
+        unsigned char C = Data[i];
+        if (isprint(C) && C != '\\' && C != '"')
+          Str << C;
+        else
+          Str << '\\' << hexdigit(C >> 4) << hexdigit(C & 0x0F);
+      }
+      Str << "\"";
+    }
+    Str << ", align " << Align << "\n";
+  }
+
+  if (DisableTranslation)
+    return;
+
+  Ostream &Str = Ctx->getStrEmit();
+  // constant:
+  //   .section .rodata,"a",@progbits
+  //   .align ALIGN
+  //   .byte ...
+  //   .size NAME, SIZE
+
+  // non-constant:
+  //   .data
+  //   .align ALIGN
+  //   .byte ...
+  //   .size NAME, SIZE
+
+  // zeroinitializer (constant):
+  //   (.section or .data as above)
+  //   .align ALIGN
+  //   .zero SIZE
+  //   .size NAME, SIZE
+
+  // zeroinitializer (non-constant):
+  //   (.section or .data as above)
+  //   .comm NAME, SIZE, ALIGN
+  //   .local NAME
+
+  IceString MangledName = Ctx->mangleName(Name);
+  // Start a new section.
+  if (IsConst) {
+    Str << "\t.section\t.rodata,\"a\",@progbits\n";
+  } else {
+    Str << "\t.type\t" << MangledName << ",@object\n";
+    Str << "\t.data\n";
+  }
+  if (IsZeroInitializer) {
+    if (IsConst) {
+      Str << "\t.align\t" << Align << "\n";
+      Str << MangledName << ":\n";
+      Str << "\t.zero\t" << Size << "\n";
+      Str << "\t.size\t" << MangledName << ", " << Size << "\n";
+    } else {
+      // TODO(stichnot): Put the appropriate non-constant
+      // zeroinitializers in a .bss section to reduce object size.
+      Str << "\t.comm\t" << MangledName << ", " << Size << ", " << Align
+          << "\n";
+    }
+  } else {
+    Str << "\t.align\t" << Align << "\n";
+    Str << MangledName << ":\n";
+    for (SizeT i = 0; i < Size; ++i) {
+      Str << "\t.byte\t" << (((unsigned)Data[i]) & 0xff) << "\n";
+    }
+    Str << "\t.size\t" << MangledName << ", " << Size << "\n";
+  }
+  Str << "\t" << (IsInternal ? ".local" : ".global") << "\t" << MangledName
+      << "\n";
+}
+
 } // end of namespace Ice
diff --git a/src/IceTargetLoweringX8632.h b/src/IceTargetLoweringX8632.h
index 972b29f..521c36e 100644
--- a/src/IceTargetLoweringX8632.h
+++ b/src/IceTargetLoweringX8632.h
@@ -109,10 +109,13 @@
     Legal_Reg = 1 << 0, // physical register, not stack location
     Legal_Imm = 1 << 1,
     Legal_Mem = 1 << 2, // includes [eax+4*ecx] as well as [esp+12]
+    // TODO(stichnot): LEAHACK: remove Legal_Reloc once a proper
+    // emitter is used.
+    Legal_Reloc = 1 << 3,
     Legal_All = ~Legal_None
   };
   typedef uint32_t LegalMask;
-  Operand *legalize(Operand *From, LegalMask Allowed = Legal_All,
+  Operand *legalize(Operand *From, LegalMask Allowed = Legal_All & ~Legal_Reloc,
                     bool AllowOverlap = false,
                     int32_t RegNum = Variable::NoRegister);
   Variable *legalizeToVar(Operand *From, bool AllowOverlap = false,
@@ -291,6 +294,25 @@
   template <typename T> void emitConstantPool() const;
 };
 
+class TargetGlobalInitX8632 : public TargetGlobalInitLowering {
+public:
+  static TargetGlobalInitLowering *create(GlobalContext *Ctx) {
+    return new TargetGlobalInitX8632(Ctx);
+  }
+  virtual void lower(const IceString &Name, SizeT Align, bool IsInternal,
+                     bool IsConst, bool IsZeroInitializer, SizeT Size,
+                     const char *Data, bool DisableTranslation);
+
+protected:
+  TargetGlobalInitX8632(GlobalContext *Ctx);
+
+private:
+  TargetGlobalInitX8632(const TargetGlobalInitX8632 &) LLVM_DELETED_FUNCTION;
+  TargetGlobalInitX8632 &
+  operator=(const TargetGlobalInitX8632 &) LLVM_DELETED_FUNCTION;
+  virtual ~TargetGlobalInitX8632() {}
+};
+
 template <> void ConstantFloat::emit(GlobalContext *Ctx) const;
 template <> void ConstantDouble::emit(GlobalContext *Ctx) const;
 
diff --git a/src/llvm2ice.cpp b/src/llvm2ice.cpp
index 01d4f9b..6d26cff 100644
--- a/src/llvm2ice.cpp
+++ b/src/llvm2ice.cpp
@@ -13,11 +13,16 @@
 //
 //===----------------------------------------------------------------------===//
 
+#include "IceCfg.h"
 #include "IceConverter.h"
 #include "IceDefs.h"
+#include "IceTargetLowering.h"
 #include "IceTypes.h"
 
+#include "llvm/IR/Constant.h"
+#include "llvm/IR/Constants.h"
 #include "llvm/IR/LLVMContext.h"
+#include "llvm/IR/Module.h"
 #include "llvm/IRReader/IRReader.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/raw_os_ostream.h"
@@ -94,9 +99,10 @@
                clEnumValEnd),
     cl::init(LLVMFormat));
 
-static cl::opt<bool> BuildOnRead(
-    "build-on-read", cl::desc("Build ICE instructions when reading bitcode"),
-    cl::init(false));
+static cl::opt<bool>
+BuildOnRead("build-on-read",
+            cl::desc("Build ICE instructions when reading bitcode"),
+            cl::init(false));
 
 int main(int argc, char **argv) {
 
@@ -129,8 +135,8 @@
     // Parse the input LLVM IR file into a module.
     SMDiagnostic Err;
     Ice::Timer T;
-    Module *Mod = NaClParseIRFile(IRFilename, InputFileFormat, Err,
-                                  getGlobalContext());
+    Module *Mod =
+        NaClParseIRFile(IRFilename, InputFileFormat, Err, getGlobalContext());
 
     if (SubzeroTimingEnabled) {
       std::cerr << "[Subzero timing] IR Parsing: " << T.getElapsedSec()
@@ -142,6 +148,47 @@
       return 1;
     }
 
+    // TODO(stichnot): Move this into IceConverter.cpp.
+    OwningPtr<Ice::TargetGlobalInitLowering> GlobalLowering(
+        Ice::TargetGlobalInitLowering::createLowering(TargetArch, &Ctx));
+    for (Module::const_global_iterator I = Mod->global_begin(),
+                                       E = Mod->global_end();
+         I != E; ++I) {
+      if (!I->hasInitializer())
+        continue;
+      const Constant *Initializer = I->getInitializer();
+      Ice::IceString Name = I->getName();
+      unsigned Align = I->getAlignment();
+      uint64_t NumElements = 0;
+      const char *Data = NULL;
+      bool IsInternal = I->hasInternalLinkage();
+      bool IsConst = I->isConstant();
+      bool IsZeroInitializer = false;
+
+      if (const ConstantDataArray *CDA =
+              dyn_cast<ConstantDataArray>(Initializer)) {
+        NumElements = CDA->getNumElements();
+        assert(isa<IntegerType>(CDA->getElementType()) &&
+               cast<IntegerType>(CDA->getElementType())->getBitWidth() == 8);
+        Data = CDA->getRawDataValues().data();
+      } else if (isa<ConstantAggregateZero>(Initializer)) {
+        if (const ArrayType *AT = dyn_cast<ArrayType>(Initializer->getType())) {
+          assert(isa<IntegerType>(AT->getElementType()) &&
+                 cast<IntegerType>(AT->getElementType())->getBitWidth() == 8);
+          NumElements = AT->getNumElements();
+          IsZeroInitializer = true;
+        } else {
+          llvm_unreachable("Unhandled constant aggregate zero type");
+        }
+      } else {
+        llvm_unreachable("Unhandled global initializer");
+      }
+
+      GlobalLowering->lower(Name, Align, IsInternal, IsConst, IsZeroInitializer,
+                            NumElements, Data, DisableTranslation);
+    }
+    GlobalLowering.reset();
+
     Ice::Converter Converter(&Ctx, DisableInternal, SubzeroTimingEnabled,
                              DisableTranslation);
     return Converter.convertToIce(Mod);