Move lowerGlobal() from target-specific code to emitGlobal() in generic code.

Emitting the global initializers is mostly the same across
each architecture (same filling, alignment, etc.). The only difference
is in assembler-directive quirks. E.g., on ARM for ".align N" N is
the exponent for a power of 2, while on x86 N is the actual number
of bytes. To avoid target-specific directives, use .p2align which
is always a power of 2. Similarly, use % instead of @. Either one
may be a comment character for *some* architecture, but for the
architectures we care about % is not a comment character while @
is sometimes (ARM).

Usually MIPS uses ".space N" for ".zero", but the assembler seems
to accept ".zero" so don't change that for now.

May need to adjust .long in the future too.
.word for AArch64 and .4byte for MIPS?

Potentially we can refactor the lowerGlobals() dispatcher
(ELF vs ASM vs IASM). The only thing target-specific about that
is *probably* just the relocation type.

BUG= https://code.google.com/p/nativeclient/issues/detail?id=4076
R=stichnot@chromium.org

Review URL: https://codereview.chromium.org/1188603002.
diff --git a/src/IceTargetLowering.cpp b/src/IceTargetLowering.cpp
index aefad00..4bb035a 100644
--- a/src/IceTargetLowering.cpp
+++ b/src/IceTargetLowering.cpp
@@ -20,6 +20,7 @@
 #include "assembler_mips32.h"
 #include "IceCfg.h" // setError()
 #include "IceCfgNode.h"
+#include "IceGlobalInits.h"
 #include "IceOperand.h"
 #include "IceRegAlloc.h"
 #include "IceTargetLowering.h"
@@ -445,6 +446,89 @@
 
 TargetDataLowering::~TargetDataLowering() {}
 
+void TargetDataLowering::emitGlobal(const VariableDeclaration &Var) {
+  if (!ALLOW_DUMP)
+    return;
+
+  // If external and not initialized, this must be a cross test.
+  // Don't generate a declaration for such cases.
+  bool IsExternal = Var.isExternal() || Ctx->getFlags().getDisableInternal();
+  if (IsExternal && !Var.hasInitializer())
+    return;
+
+  Ostream &Str = Ctx->getStrEmit();
+  const VariableDeclaration::InitializerListType &Initializers =
+      Var.getInitializers();
+  bool HasNonzeroInitializer = Var.hasNonzeroInitializer();
+  bool IsConstant = Var.getIsConstant();
+  uint32_t Align = Var.getAlignment();
+  SizeT Size = Var.getNumBytes();
+  IceString MangledName = Var.mangleName(Ctx);
+  IceString SectionSuffix = "";
+  if (Ctx->getFlags().getDataSections())
+    SectionSuffix = "." + MangledName;
+
+  Str << "\t.type\t" << MangledName << ",%object\n";
+
+  if (IsConstant)
+    Str << "\t.section\t.rodata" << SectionSuffix << ",\"a\",%progbits\n";
+  else if (HasNonzeroInitializer)
+    Str << "\t.section\t.data" << SectionSuffix << ",\"aw\",%progbits\n";
+  else
+    Str << "\t.section\t.bss" << SectionSuffix << ",\"aw\",%nobits\n";
+
+  if (IsExternal)
+    Str << "\t.globl\t" << MangledName << "\n";
+
+  if (Align > 1) {
+    assert(llvm::isPowerOf2_32(Align));
+    // Use the .p2align directive, since the .align N directive can either
+    // interpret N as bytes, or power of 2 bytes, depending on the target.
+    Str << "\t.p2align\t" << llvm::Log2_32(Align) << "\n";
+  }
+
+  Str << MangledName << ":\n";
+
+  if (HasNonzeroInitializer) {
+    for (VariableDeclaration::Initializer *Init : Initializers) {
+      switch (Init->getKind()) {
+      case VariableDeclaration::Initializer::DataInitializerKind: {
+        const auto Data = llvm::cast<VariableDeclaration::DataInitializer>(Init)
+                              ->getContents();
+        for (SizeT i = 0; i < Init->getNumBytes(); ++i) {
+          Str << "\t.byte\t" << (((unsigned)Data[i]) & 0xff) << "\n";
+        }
+        break;
+      }
+      case VariableDeclaration::Initializer::ZeroInitializerKind:
+        Str << "\t.zero\t" << Init->getNumBytes() << "\n";
+        break;
+      case VariableDeclaration::Initializer::RelocInitializerKind: {
+        const auto Reloc =
+            llvm::cast<VariableDeclaration::RelocInitializer>(Init);
+        Str << "\t" << getEmit32Directive() << "\t";
+        Str << Reloc->getDeclaration()->mangleName(Ctx);
+        if (RelocOffsetT Offset = Reloc->getOffset()) {
+          if (Offset >= 0 || (Offset == INT32_MIN))
+            Str << " + " << Offset;
+          else
+            Str << " - " << -Offset;
+        }
+        Str << "\n";
+        break;
+      }
+      }
+    }
+  } else
+    // NOTE: for non-constant zero initializers, this is BSS (no bits),
+    // so an ELF writer would not write to the file, and only track
+    // virtual offsets, but the .s writer still needs this .zero and
+    // cannot simply use the .size to advance offsets.
+    Str << "\t.zero\t" << Size << "\n";
+
+  Str << "\t.size\t" << MangledName << ", " << Size << "\n";
+}
+
 std::unique_ptr<TargetHeaderLowering>
 TargetHeaderLowering::createLowering(GlobalContext *Ctx) {
   TargetArch Target = Ctx->getFlags().getTargetArch();
diff --git a/src/IceTargetLowering.h b/src/IceTargetLowering.h
index 66e663b..4d9598a 100644
--- a/src/IceTargetLowering.h
+++ b/src/IceTargetLowering.h
@@ -384,6 +384,13 @@
   virtual void lowerConstants() = 0;
 
 protected:
+  void emitGlobal(const VariableDeclaration &Var);
+
+  // For now, we assume .long is the right directive for emitting 4 byte
+  // emit global relocations. However, LLVM MIPS usually uses .4byte instead.
+  // Perhaps there is some difference when the location is unaligned.
+  const char *getEmit32Directive() { return ".long"; }
+
   explicit TargetDataLowering(GlobalContext *Ctx) : Ctx(Ctx) {}
   GlobalContext *Ctx;
 };
diff --git a/src/IceTargetLoweringARM32.cpp b/src/IceTargetLoweringARM32.cpp
index a2091b2..a4aa517 100644
--- a/src/IceTargetLoweringARM32.cpp
+++ b/src/IceTargetLoweringARM32.cpp
@@ -2194,11 +2194,6 @@
 TargetDataARM32::TargetDataARM32(GlobalContext *Ctx)
     : TargetDataLowering(Ctx) {}
 
-void TargetDataARM32::lowerGlobal(const VariableDeclaration &Var) const {
-  (void)Var;
-  UnimplementedError(Ctx->getFlags());
-}
-
 void TargetDataARM32::lowerGlobals(
     std::unique_ptr<VariableDeclarationList> Vars) {
   switch (Ctx->getFlags().getOutFileType()) {
@@ -2212,7 +2207,7 @@
     OstreamLocker L(Ctx);
     for (const VariableDeclaration *Var : *Vars) {
       if (GlobalContext::matchSymbolName(Var->getName(), TranslateOnly)) {
-        lowerGlobal(*Var);
+        emitGlobal(*Var);
       }
     }
   } break;
diff --git a/src/IceTargetLoweringARM32.h b/src/IceTargetLoweringARM32.h
index 2477aaa..7f5fdc8 100644
--- a/src/IceTargetLoweringARM32.h
+++ b/src/IceTargetLoweringARM32.h
@@ -319,7 +319,6 @@
   explicit TargetDataARM32(GlobalContext *Ctx);
 
 private:
-  void lowerGlobal(const VariableDeclaration &Var) const;
   ~TargetDataARM32() override {}
   template <typename T> static void emitConstantPool(GlobalContext *Ctx);
 };
diff --git a/src/IceTargetLoweringMIPS32.cpp b/src/IceTargetLoweringMIPS32.cpp
index 748881e..ee2300e 100644
--- a/src/IceTargetLoweringMIPS32.cpp
+++ b/src/IceTargetLoweringMIPS32.cpp
@@ -671,11 +671,6 @@
 TargetDataMIPS32::TargetDataMIPS32(GlobalContext *Ctx)
     : TargetDataLowering(Ctx) {}
 
-void TargetDataMIPS32::lowerGlobal(const VariableDeclaration &Var) const {
-  (void)Var;
-  llvm::report_fatal_error("Not yet implemented");
-}
-
 void TargetDataMIPS32::lowerGlobals(
     std::unique_ptr<VariableDeclarationList> Vars) {
   switch (Ctx->getFlags().getOutFileType()) {
@@ -689,7 +684,7 @@
     OstreamLocker L(Ctx);
     for (const VariableDeclaration *Var : *Vars) {
       if (GlobalContext::matchSymbolName(Var->getName(), TranslateOnly)) {
-        lowerGlobal(*Var);
+        emitGlobal(*Var);
       }
     }
   } break;
diff --git a/src/IceTargetLoweringMIPS32.h b/src/IceTargetLoweringMIPS32.h
index 3aad63f..9204833 100644
--- a/src/IceTargetLoweringMIPS32.h
+++ b/src/IceTargetLoweringMIPS32.h
@@ -144,7 +144,6 @@
   explicit TargetDataMIPS32(GlobalContext *Ctx);
 
 private:
-  void lowerGlobal(const VariableDeclaration &Var) const;
   ~TargetDataMIPS32() override {}
   template <typename T> static void emitConstantPool(GlobalContext *Ctx);
 };
diff --git a/src/IceTargetLoweringX8632.cpp b/src/IceTargetLoweringX8632.cpp
index dcfdc96..dbb4e4a 100644
--- a/src/IceTargetLoweringX8632.cpp
+++ b/src/IceTargetLoweringX8632.cpp
@@ -5017,82 +5017,6 @@
 TargetDataX8632::TargetDataX8632(GlobalContext *Ctx)
     : TargetDataLowering(Ctx) {}
 
-void TargetDataX8632::lowerGlobal(const VariableDeclaration &Var) {
-  // If external and not initialized, this must be a cross test.
-  // Don't generate a declaration for such cases.
-  bool IsExternal = Var.isExternal() || Ctx->getFlags().getDisableInternal();
-  if (IsExternal && !Var.hasInitializer())
-    return;
-
-  Ostream &Str = Ctx->getStrEmit();
-  const VariableDeclaration::InitializerListType &Initializers =
-      Var.getInitializers();
-  bool HasNonzeroInitializer = Var.hasNonzeroInitializer();
-  bool IsConstant = Var.getIsConstant();
-  uint32_t Align = Var.getAlignment();
-  SizeT Size = Var.getNumBytes();
-  IceString MangledName = Var.mangleName(Ctx);
-  IceString SectionSuffix = "";
-  if (Ctx->getFlags().getDataSections())
-    SectionSuffix = "." + MangledName;
-
-  Str << "\t.type\t" << MangledName << ",@object\n";
-
-  if (IsConstant)
-    Str << "\t.section\t.rodata" << SectionSuffix << ",\"a\",@progbits\n";
-  else if (HasNonzeroInitializer)
-    Str << "\t.section\t.data" << SectionSuffix << ",\"aw\",@progbits\n";
-  else
-    Str << "\t.section\t.bss" << SectionSuffix << ",\"aw\",@nobits\n";
-
-  if (IsExternal)
-    Str << "\t.globl\t" << MangledName << "\n";
-
-  if (Align > 1)
-    Str << "\t.align\t" << Align << "\n";
-
-  Str << MangledName << ":\n";
-
-  if (HasNonzeroInitializer) {
-    for (VariableDeclaration::Initializer *Init : Initializers) {
-      switch (Init->getKind()) {
-      case VariableDeclaration::Initializer::DataInitializerKind: {
-        const auto Data = llvm::cast<VariableDeclaration::DataInitializer>(Init)
-                              ->getContents();
-        for (SizeT i = 0; i < Init->getNumBytes(); ++i) {
-          Str << "\t.byte\t" << (((unsigned)Data[i]) & 0xff) << "\n";
-        }
-        break;
-      }
-      case VariableDeclaration::Initializer::ZeroInitializerKind:
-        Str << "\t.zero\t" << Init->getNumBytes() << "\n";
-        break;
-      case VariableDeclaration::Initializer::RelocInitializerKind: {
-        const auto Reloc =
-            llvm::cast<VariableDeclaration::RelocInitializer>(Init);
-        Str << "\t.long\t";
-        Str << Reloc->getDeclaration()->mangleName(Ctx);
-        if (RelocOffsetT Offset = Reloc->getOffset()) {
-          if (Offset >= 0 || (Offset == INT32_MIN))
-            Str << " + " << Offset;
-          else
-            Str << " - " << -Offset;
-        }
-        Str << "\n";
-        break;
-      }
-      }
-    }
-  } else
-    // NOTE: for non-constant zero initializers, this is BSS (no bits),
-    // so an ELF writer would not write to the file, and only track
-    // virtual offsets, but the .s writer still needs this .zero and
-    // cannot simply use the .size to advance offsets.
-    Str << "\t.zero\t" << Size << "\n";
-
-  Str << "\t.size\t" << MangledName << ", " << Size << "\n";
-}
-
 void TargetDataX8632::lowerGlobals(
     std::unique_ptr<VariableDeclarationList> Vars) {
   switch (Ctx->getFlags().getOutFileType()) {
@@ -5106,7 +5030,7 @@
     OstreamLocker L(Ctx);
     for (const VariableDeclaration *Var : *Vars) {
       if (GlobalContext::matchSymbolName(Var->getName(), TranslateOnly)) {
-        lowerGlobal(*Var);
+        emitGlobal(*Var);
       }
     }
   } break;
diff --git a/src/IceTargetLoweringX8632.h b/src/IceTargetLoweringX8632.h
index fe3612c..a921294 100644
--- a/src/IceTargetLoweringX8632.h
+++ b/src/IceTargetLoweringX8632.h
@@ -594,7 +594,6 @@
   explicit TargetDataX8632(GlobalContext *Ctx);
 
 private:
-  void lowerGlobal(const VariableDeclaration &Var);
   ~TargetDataX8632() override {}
   template <typename T> static void emitConstantPool(GlobalContext *Ctx);
 };
diff --git a/tests_lit/llvm2ice_tests/globalinit.pnacl.ll b/tests_lit/llvm2ice_tests/globalinit.pnacl.ll
index fcefe6a..e22d292 100644
--- a/tests_lit/llvm2ice_tests/globalinit.pnacl.ll
+++ b/tests_lit/llvm2ice_tests/globalinit.pnacl.ll
@@ -2,16 +2,35 @@
 
 ; REQUIRES: allow_dump
 
-; Test -filetype=asm to test the lea "hack" until we are fully confident
-; in -filetype=iasm .
-; RUN: %p2i -i %s --filetype=asm --args --verbose none | FileCheck %s
+; Test initializers with -filetype=asm.
+; RUN: %if --need=target_X8632 --command %p2i --filetype=asm --target x8632 \
+; RUN:   -i %s --args -O2 | %if --need=target_X8632 --command FileCheck %s
 
-; Test -filetype=iasm and try to cross reference instructions w/ the
-; symbol table.
-; RUN: %p2i --assemble --disassemble -i %s --args --verbose none \
-; RUN:   | FileCheck --check-prefix=IAS %s
-; RUN: %p2i --assemble --disassemble --dis-flags=-t -i %s --args \
-; RUN:   --verbose none | FileCheck --check-prefix=SYMTAB %s
+; RUN: %if --need=target_ARM32 --command %p2i --filetype=asm --target arm32 \
+; RUN:   -i %s --args -O2 --skip-unimplemented \
+; RUN:   | %if --need=target_ARM32 --command FileCheck %s
+
+; Test instructions for materializing addresses.
+; RUN: %if --need=target_X8632 --command %p2i --filetype=asm --target x8632 \
+; RUN:   -i %s --args -O2 \
+; RUN: | %if --need=target_X8632 --command FileCheck %s --check-prefix=X8632
+
+; Test instructions with -filetype=obj and try to cross reference instructions
+; w/ the symbol table.
+; RUN: %if --need=target_X8632 --command %p2i --assemble --disassemble \
+; RUN:   --target x8632 -i %s --args --verbose none \
+; RUN:   | %if --need=target_X8632 --command FileCheck --check-prefix=IAS %s
+
+; RUN: %if --need=target_X8632 --command %p2i --assemble --disassemble \
+; RUN:   --dis-flags=-t --target x8632 -i %s --args --verbose none \
+; RUN:   | %if --need=target_X8632 --command FileCheck --check-prefix=SYMTAB %s
+
+; Only checking symtab for ARM for now. TODO(jvoung): Need to lower
+; arguments at callsite.
+; RUN: %if --need=target_ARM32 --command %p2i --filetype=asm --assemble \
+; RUN:   --disassemble --dis-flags=-t --target arm32 -i %s \
+; RUN:   --args --verbose none --skip-unimplemented \
+; RUN:   | %if --need=target_ARM32 --command FileCheck --check-prefix=SYMTAB %s
 
 define internal i32 @main(i32 %argc, i32 %argv) {
 entry:
@@ -31,14 +50,14 @@
   call void @use(i32 %expanded13)
   ret i32 0
 }
-; CHECK-LABEL: main
-; CHECK: movl $PrimitiveInit,
-; CHECK: movl $PrimitiveInitConst,
-; CHECK: movl $PrimitiveInitStatic,
-; CHECK: movl $PrimitiveUninit,
-; CHECK: movl $ArrayInit,
-; CHECK: movl $ArrayInitPartial,
-; CHECK: movl $ArrayUninit,
+; X8632-LABEL: main
+; X8632: movl $PrimitiveInit,
+; X8632: movl $PrimitiveInitConst,
+; X8632: movl $PrimitiveInitStatic,
+; X8632: movl $PrimitiveUninit,
+; X8632: movl $ArrayInit,
+; X8632: movl $ArrayInitPartial,
+; X8632: movl $ArrayUninit,
 
 ; objdump does not indicate what symbol the mov/relocation applies to
 ; so we grep for "mov {{.*}}, OFFSET, sec", along with
@@ -91,73 +110,73 @@
 
 
 @PrimitiveInit = internal global [4 x i8] c"\1B\00\00\00", align 4
-; CHECK: .type PrimitiveInit,@object
-; CHECK-NEXT: .section .data,"aw",@progbits
-; CHECK-NEXT: .align 4
+; CHECK: .type PrimitiveInit,%object
+; CHECK-NEXT: .section .data,"aw",%progbits
+; CHECK-NEXT: .p2align 2
 ; CHECK-NEXT: PrimitiveInit:
 ; CHECK-NEXT: .byte
 ; CHECK: .size PrimitiveInit, 4
 
 @PrimitiveInitConst = internal constant [4 x i8] c"\0D\00\00\00", align 4
-; CHECK: .type PrimitiveInitConst,@object
-; CHECK-NEXT: .section .rodata,"a",@progbits
-; CHECK-NEXT: .align 4
+; CHECK: .type PrimitiveInitConst,%object
+; CHECK-NEXT: .section .rodata,"a",%progbits
+; CHECK-NEXT: .p2align 2
 ; CHECK-NEXT: PrimitiveInitConst:
 ; CHECK-NEXT: .byte
 ; CHECK: .size PrimitiveInitConst, 4
 
 @ArrayInit = internal global [20 x i8] c"\0A\00\00\00\14\00\00\00\1E\00\00\00(\00\00\002\00\00\00", align 4
-; CHECK: .type ArrayInit,@object
-; CHECK-NEXT: .section .data,"aw",@progbits
-; CHECK-NEXT: .align 4
+; CHECK: .type ArrayInit,%object
+; CHECK-NEXT: .section .data,"aw",%progbits
+; CHECK-NEXT: .p2align 2
 ; CHECK-NEXT: ArrayInit:
 ; CHECK-NEXT: .byte
 ; CHECK: .size ArrayInit, 20
 
 @ArrayInitPartial = internal global [40 x i8] c"<\00\00\00F\00\00\00P\00\00\00Z\00\00\00d\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00", align 4
-; CHECK: .type ArrayInitPartial,@object
-; CHECK-NEXT: .section .data,"aw",@progbits
-; CHECK-NEXT: .align 4
+; CHECK: .type ArrayInitPartial,%object
+; CHECK-NEXT: .section .data,"aw",%progbits
+; CHECK-NEXT: .p2align 2
 ; CHECK-NEXT: ArrayInitPartial:
 ; CHECK-NEXT: .byte
 ; CHECK: .size ArrayInitPartial, 40
 
 @PrimitiveInitStatic = internal global [4 x i8] zeroinitializer, align 4
-; CHECK: .type PrimitiveInitStatic,@object
-; CHECK-NEXT: .section .bss,"aw",@nobits
-; CHECK-NEXT: .align 4
+; CHECK: .type PrimitiveInitStatic,%object
+; CHECK-NEXT: .section .bss,"aw",%nobits
+; CHECK-NEXT: .p2align 2
 ; CHECK-NEXT: PrimitiveInitStatic:
 ; CHECK-NEXT: .zero 4
 ; CHECK-NEXT: .size PrimitiveInitStatic, 4
 
 @PrimitiveUninit = internal global [4 x i8] zeroinitializer, align 4
-; CHECK: .type PrimitiveUninit,@object
-; CHECK-NEXT: .section .bss,"aw",@nobits
-; CHECK-NEXT: .align 4
+; CHECK: .type PrimitiveUninit,%object
+; CHECK-NEXT: .section .bss,"aw",%nobits
+; CHECK-NEXT: .p2align 2
 ; CHECK-NEXT: PrimitiveUninit:
 ; CHECK-NEXT: .zero 4
 ; CHECK-NEXT: .size PrimitiveUninit, 4
 
 @ArrayUninit = internal global [20 x i8] zeroinitializer, align 4
-; CHECK: .type ArrayUninit,@object
-; CHECK-NEXT: .section .bss,"aw",@nobits
-; CHECK-NEXT: .align 4
+; CHECK: .type ArrayUninit,%object
+; CHECK-NEXT: .section .bss,"aw",%nobits
+; CHECK-NEXT: .p2align 2
 ; CHECK-NEXT: ArrayUninit:
 ; CHECK-NEXT: .zero 20
 ; CHECK-NEXT: .size ArrayUninit, 20
 
 @ArrayUninitConstDouble = internal constant [200 x i8] zeroinitializer, align 8
-; CHECK: .type ArrayUninitConstDouble,@object
-; CHECK-NEXT: .section .rodata,"a",@progbits
-; CHECK-NEXT: .align 8
+; CHECK: .type ArrayUninitConstDouble,%object
+; CHECK-NEXT: .section .rodata,"a",%progbits
+; CHECK-NEXT: .p2align 3
 ; CHECK-NEXT: ArrayUninitConstDouble:
 ; CHECK-NEXT: .zero 200
 ; CHECK-NEXT: .size ArrayUninitConstDouble, 200
 
 @ArrayUninitConstInt = internal constant [20 x i8] zeroinitializer, align 4
-; CHECK: .type ArrayUninitConstInt,@object
-; CHECK: .section .rodata,"a",@progbits
-; CHECK-NEXT: .align 4
+; CHECK: .type ArrayUninitConstInt,%object
+; CHECK: .section .rodata,"a",%progbits
+; CHECK-NEXT: .p2align 2
 ; CHECK-NEXT: ArrayUninitConstInt:
 ; CHECK-NEXT: .zero 20
 ; CHECK-NEXT: .size ArrayUninitConstInt, 20
diff --git a/tests_lit/llvm2ice_tests/globalrelocs.ll b/tests_lit/llvm2ice_tests/globalrelocs.ll
index 7e6a01c..0d163e6 100644
--- a/tests_lit/llvm2ice_tests/globalrelocs.ll
+++ b/tests_lit/llvm2ice_tests/globalrelocs.ll
@@ -16,8 +16,8 @@
 
 @bytes = internal global [7 x i8] c"abcdefg"
 ; DUMP: @bytes = internal global [7 x i8] c"abcdefg"
-; CHECK:	.type	bytes,@object
-; CHECK:	.section	.data,"aw",@progbits
+; CHECK:	.type	bytes,%object
+; CHECK:	.section	.data,"aw",%progbits
 ; CHECK:bytes:
 ; CHECK:	.byte	97
 ; CHECK:	.byte	98
@@ -30,8 +30,8 @@
 
 @const_bytes = internal constant [7 x i8] c"abcdefg"
 ; DUMP: @const_bytes = internal constant [7 x i8] c"abcdefg"
-; CHECK:	.type	const_bytes,@object
-; CHECK:	.section	.rodata,"a",@progbits
+; CHECK:	.type	const_bytes,%object
+; CHECK:	.section	.rodata,"a",%progbits
 ; CHECK:const_bytes:
 ; CHECK:	.byte	97
 ; CHECK:	.byte	98
@@ -44,40 +44,40 @@
 
 @ptr_to_ptr = internal global i32 ptrtoint (i32* @ptr to i32)
 ; DUMP: @ptr_to_ptr = internal global i32 ptrtoint (i32* @ptr to i32)
-; CHECK:	.type	ptr_to_ptr,@object
-; CHECK:	.section	.data,"aw",@progbits
+; CHECK:	.type	ptr_to_ptr,%object
+; CHECK:	.section	.data,"aw",%progbits
 ; CHECK:ptr_to_ptr:
 ; CHECK:	.long	ptr
 ; CHECK:	.size	ptr_to_ptr, 4
 
 @const_ptr_to_ptr = internal constant i32 ptrtoint (i32* @ptr to i32)
 ; DUMP: @const_ptr_to_ptr = internal constant i32 ptrtoint (i32* @ptr to i32)
-; CHECK:	.type	const_ptr_to_ptr,@object
-; CHECK:	.section	.rodata,"a",@progbits
+; CHECK:	.type	const_ptr_to_ptr,%object
+; CHECK:	.section	.rodata,"a",%progbits
 ; CHECK:const_ptr_to_ptr:
 ; CHECK:	.long	ptr
 ; CHECK:	.size	const_ptr_to_ptr, 4
 
 @ptr_to_func = internal global i32 ptrtoint (void ()* @func to i32)
 ; DUMP: @ptr_to_func = internal global i32 ptrtoint (void ()* @func to i32)
-; CHECK:	.type	ptr_to_func,@object
-; CHECK:	.section	.data,"aw",@progbits
+; CHECK:	.type	ptr_to_func,%object
+; CHECK:	.section	.data,"aw",%progbits
 ; CHECK:ptr_to_func:
 ; CHECK:	.long	func
 ; CHECK:	.size	ptr_to_func, 4
 
 @const_ptr_to_func = internal constant i32 ptrtoint (void ()* @func to i32)
 ; DUMP: @const_ptr_to_func = internal constant i32 ptrtoint (void ()* @func to i32)
-; CHECK:	.type	const_ptr_to_func,@object
-; CHECK:	.section	.rodata,"a",@progbits
+; CHECK:	.type	const_ptr_to_func,%object
+; CHECK:	.section	.rodata,"a",%progbits
 ; CHECK:const_ptr_to_func:
 ; CHECK:	.long	func
 ; CHECK:	.size	const_ptr_to_func, 4
 
 @compound = internal global <{ [3 x i8], i32 }> <{ [3 x i8] c"foo", i32 ptrtoint (void ()* @func to i32) }>
 ; DUMP: @compound = internal global <{ [3 x i8], i32 }> <{ [3 x i8] c"foo", i32 ptrtoint (void ()* @func to i32) }>
-; CHECK:	.type	compound,@object
-; CHECK:	.section	.data,"aw",@progbits
+; CHECK:	.type	compound,%object
+; CHECK:	.section	.data,"aw",%progbits
 ; CHECK:compound:
 ; CHECK:	.byte	102
 ; CHECK:	.byte	111
@@ -87,8 +87,8 @@
 
 @const_compound = internal constant <{ [3 x i8], i32 }> <{ [3 x i8] c"foo", i32 ptrtoint (void ()* @func to i32) }>
 ; DUMP: @const_compound = internal constant <{ [3 x i8], i32 }> <{ [3 x i8] c"foo", i32 ptrtoint (void ()* @func to i32) }>
-; CHECK:	.type	const_compound,@object
-; CHECK:	.section	.rodata,"a",@progbits
+; CHECK:	.type	const_compound,%object
+; CHECK:	.section	.rodata,"a",%progbits
 ; CHECK:const_compound:
 ; CHECK:	.byte	102
 ; CHECK:	.byte	111
@@ -98,162 +98,162 @@
 
 @ptr = internal global i32 ptrtoint ([7 x i8]* @bytes to i32)
 ; DUMP: @ptr = internal global i32 ptrtoint ([7 x i8]* @bytes to i32)
-; CHECK:	.type	ptr,@object
-; CHECK:	.section	.data,"aw",@progbits
+; CHECK:	.type	ptr,%object
+; CHECK:	.section	.data,"aw",%progbits
 ; CHECK:ptr:
 ; CHECK:	.long	bytes
 ; CHECK:	.size	ptr, 4
 
 @const_ptr = internal constant i32 ptrtoint ([7 x i8]* @bytes to i32)
 ; DUMP: @const_ptr = internal constant i32 ptrtoint ([7 x i8]* @bytes to i32)
-; CHECK:	.type	const_ptr,@object
-; CHECK:	.section	.rodata,"a",@progbits
+; CHECK:	.type	const_ptr,%object
+; CHECK:	.section	.rodata,"a",%progbits
 ; CHECK:const_ptr:
 ; CHECK:	.long	bytes
 ; CHECK:	.size	const_ptr, 4
 
 @addend_ptr = internal global i32 add (i32 ptrtoint (i32* @ptr to i32), i32 1)
 ; DUMP: @addend_ptr = internal global i32 add (i32 ptrtoint (i32* @ptr to i32), i32 1)
-; CHECK:	.type	addend_ptr,@object
-; CHECK:	.section	.data,"aw",@progbits
+; CHECK:	.type	addend_ptr,%object
+; CHECK:	.section	.data,"aw",%progbits
 ; CHECK:addend_ptr:
 ; CHECK:	.long	ptr + 1
 ; CHECK:	.size	addend_ptr, 4
 
 @const_addend_ptr = internal constant i32 add (i32 ptrtoint (i32* @ptr to i32), i32 1)
 ; DUMP: @const_addend_ptr = internal constant i32 add (i32 ptrtoint (i32* @ptr to i32), i32 1)
-; CHECK:	.type	const_addend_ptr,@object
-; CHECK:	.section	.rodata,"a",@progbits
+; CHECK:	.type	const_addend_ptr,%object
+; CHECK:	.section	.rodata,"a",%progbits
 ; CHECK:const_addend_ptr:
 ; CHECK:	.long	ptr + 1
 ; CHECK:	.size	const_addend_ptr, 4
 
 @addend_negative = internal global i32 add (i32 ptrtoint (i32* @ptr to i32), i32 -1)
 ; DUMP: @addend_negative = internal global i32 add (i32 ptrtoint (i32* @ptr to i32), i32 -1)
-; CHECK:	.type	addend_negative,@object
-; CHECK:	.section	.data,"aw",@progbits
+; CHECK:	.type	addend_negative,%object
+; CHECK:	.section	.data,"aw",%progbits
 ; CHECK:addend_negative:
 ; CHECK:	.long	ptr - 1
 ; CHECK:	.size	addend_negative, 4
 
 @const_addend_negative = internal constant i32 add (i32 ptrtoint (i32* @ptr to i32), i32 -1)
 ; DUMP: @const_addend_negative = internal constant i32 add (i32 ptrtoint (i32* @ptr to i32), i32 -1)
-; CHECK:	.type	const_addend_negative,@object
-; CHECK:	.section	.rodata,"a",@progbits
+; CHECK:	.type	const_addend_negative,%object
+; CHECK:	.section	.rodata,"a",%progbits
 ; CHECK:const_addend_negative:
 ; CHECK:	.long	ptr - 1
 ; CHECK:	.size	const_addend_negative, 4
 
 @addend_array1 = internal global i32 add (i32 ptrtoint ([7 x i8]* @bytes to i32), i32 1)
 ; DUMP: @addend_array1 = internal global i32 add (i32 ptrtoint ([7 x i8]* @bytes to i32), i32 1)
-; CHECK:	.type	addend_array1,@object
-; CHECK:	.section	.data,"aw",@progbits
+; CHECK:	.type	addend_array1,%object
+; CHECK:	.section	.data,"aw",%progbits
 ; CHECK:addend_array1:
 ; CHECK:	.long	bytes + 1
 ; CHECK:	.size	addend_array1, 4
 
 @const_addend_array1 = internal constant i32 add (i32 ptrtoint ([7 x i8]* @bytes to i32), i32 1)
 ; DUMP: @const_addend_array1 = internal constant i32 add (i32 ptrtoint ([7 x i8]* @bytes to i32), i32 1)
-; CHECK:	.type	const_addend_array1,@object
-; CHECK:	.section	.rodata,"a",@progbits
+; CHECK:	.type	const_addend_array1,%object
+; CHECK:	.section	.rodata,"a",%progbits
 ; CHECK:const_addend_array1:
 ; CHECK:	.long	bytes + 1
 ; CHECK:	.size	const_addend_array1, 4
 
 @addend_array2 = internal global i32 add (i32 ptrtoint ([7 x i8]* @bytes to i32), i32 7)
 ; DUMP: @addend_array2 = internal global i32 add (i32 ptrtoint ([7 x i8]* @bytes to i32), i32 7)
-; CHECK:	.type	addend_array2,@object
-; CHECK:	.section	.data,"aw",@progbits
+; CHECK:	.type	addend_array2,%object
+; CHECK:	.section	.data,"aw",%progbits
 ; CHECK:addend_array2:
 ; CHECK:	.long	bytes + 7
 ; CHECK:	.size	addend_array2, 4
 
 @const_addend_array2 = internal constant i32 add (i32 ptrtoint ([7 x i8]* @bytes to i32), i32 7)
 ; DUMP: @const_addend_array2 = internal constant i32 add (i32 ptrtoint ([7 x i8]* @bytes to i32), i32 7)
-; CHECK:	.type	const_addend_array2,@object
-; CHECK:	.section	.rodata,"a",@progbits
+; CHECK:	.type	const_addend_array2,%object
+; CHECK:	.section	.rodata,"a",%progbits
 ; CHECK:const_addend_array2:
 ; CHECK:	.long	bytes + 7
 ; CHECK:	.size	const_addend_array2, 4
 
 @addend_array3 = internal global i32 add (i32 ptrtoint ([7 x i8]* @bytes to i32), i32 9)
 ; DUMP: @addend_array3 = internal global i32 add (i32 ptrtoint ([7 x i8]* @bytes to i32), i32 9)
-; CHECK:	.type	addend_array3,@object
-; CHECK:	.section	.data,"aw",@progbits
+; CHECK:	.type	addend_array3,%object
+; CHECK:	.section	.data,"aw",%progbits
 ; CHECK:addend_array3:
 ; CHECK:	.long	bytes + 9
 ; CHECK:	.size	addend_array3, 4
 
 @const_addend_array3 = internal constant i32 add (i32 ptrtoint ([7 x i8]* @bytes to i32), i32 9)
 ; DUMP: @const_addend_array3 = internal constant i32 add (i32 ptrtoint ([7 x i8]* @bytes to i32), i32 9)
-; CHECK:	.type	const_addend_array3,@object
-; CHECK:	.section	.rodata,"a",@progbits
+; CHECK:	.type	const_addend_array3,%object
+; CHECK:	.section	.rodata,"a",%progbits
 ; CHECK:const_addend_array3:
 ; CHECK:	.long	bytes + 9
 ; CHECK:	.size	const_addend_array3, 4
 
 @addend_struct1 = internal global i32 add (i32 ptrtoint (<{ [3 x i8], i32 }>* @compound to i32), i32 1)
 ; DUMP: @addend_struct1 = internal global i32 add (i32 ptrtoint (<{ [3 x i8], i32 }>* @compound to i32), i32 1)
-; CHECK:	.type	addend_struct1,@object
-; CHECK:	.section	.data,"aw",@progbits
+; CHECK:	.type	addend_struct1,%object
+; CHECK:	.section	.data,"aw",%progbits
 ; CHECK:addend_struct1:
 ; CHECK:	.long	compound + 1
 ; CHECK:	.size	addend_struct1, 4
 
 @const_addend_struct1 = internal constant i32 add (i32 ptrtoint (<{ [3 x i8], i32 }>* @compound to i32), i32 1)
 ; DUMP: @const_addend_struct1 = internal constant i32 add (i32 ptrtoint (<{ [3 x i8], i32 }>* @compound to i32), i32 1)
-; CHECK:	.type	const_addend_struct1,@object
-; CHECK:	.section	.rodata,"a",@progbits
+; CHECK:	.type	const_addend_struct1,%object
+; CHECK:	.section	.rodata,"a",%progbits
 ; CHECK:const_addend_struct1:
 ; CHECK:	.long	compound + 1
 ; CHECK:	.size	const_addend_struct1, 4
 
 @addend_struct2 = internal global i32 add (i32 ptrtoint (<{ [3 x i8], i32 }>* @compound to i32), i32 4)
 ; DUMP: @addend_struct2 = internal global i32 add (i32 ptrtoint (<{ [3 x i8], i32 }>* @compound to i32), i32 4)
-; CHECK:	.type	addend_struct2,@object
-; CHECK:	.section	.data,"aw",@progbits
+; CHECK:	.type	addend_struct2,%object
+; CHECK:	.section	.data,"aw",%progbits
 ; CHECK:addend_struct2:
 ; CHECK:	.long	compound + 4
 ; CHECK:	.size	addend_struct2, 4
 
 @const_addend_struct2 = internal constant i32 add (i32 ptrtoint (<{ [3 x i8], i32 }>* @compound to i32), i32 4)
 ; DUMP: @const_addend_struct2 = internal constant i32 add (i32 ptrtoint (<{ [3 x i8], i32 }>* @compound to i32), i32 4)
-; CHECK:	.type	const_addend_struct2,@object
-; CHECK:	.section	.rodata,"a",@progbits
+; CHECK:	.type	const_addend_struct2,%object
+; CHECK:	.section	.rodata,"a",%progbits
 ; CHECK:const_addend_struct2:
 ; CHECK:	.long	compound + 4
 ; CHECK:	.size	const_addend_struct2, 4
 
 @ptr_to_func_align = internal global i32 ptrtoint (void ()* @func to i32), align 8
 ; DUMP: @ptr_to_func_align = internal global i32 ptrtoint (void ()* @func to i32), align 8
-; CHECK:	.type	ptr_to_func_align,@object
-; CHECK:	.section	.data,"aw",@progbits
-; CHECK:	.align	8
+; CHECK:	.type	ptr_to_func_align,%object
+; CHECK:	.section	.data,"aw",%progbits
+; CHECK:	.p2align	3
 ; CHECK:ptr_to_func_align:
 ; CHECK:	.long	func
 ; CHECK:	.size	ptr_to_func_align, 4
 
 @const_ptr_to_func_align = internal constant i32 ptrtoint (void ()* @func to i32), align 8
 ; DUMP: @const_ptr_to_func_align = internal constant i32 ptrtoint (void ()* @func to i32), align 8
-; CHECK:	.type	const_ptr_to_func_align,@object
-; CHECK:	.section	.rodata,"a",@progbits
-; CHECK:	.align	8
+; CHECK:	.type	const_ptr_to_func_align,%object
+; CHECK:	.section	.rodata,"a",%progbits
+; CHECK:	.p2align	3
 ; CHECK:const_ptr_to_func_align:
 ; CHECK:	.long	func
 ; CHECK:	.size	const_ptr_to_func_align, 4
 
 @char = internal constant [1 x i8] c"0"
 ; DUMP: @char = internal constant [1 x i8] c"0"
-; CHECK:	.type	char,@object
-; CHECK:	.section	.rodata,"a",@progbits
+; CHECK:	.type	char,%object
+; CHECK:	.section	.rodata,"a",%progbits
 ; CHECK:char:
 ; CHECK:	.byte	48
 ; CHECK:	.size	char, 1
 
 @short = internal constant [2 x i8] zeroinitializer
 ; DUMP: @short = internal constant [2 x i8] zeroinitializer
-; CHECK:	.type	short,@object
-; CHECK:	.section	.rodata,"a",@progbits
+; CHECK:	.type	short,%object
+; CHECK:	.section	.rodata,"a",%progbits
 ; CHECK:short:
 ; CHECK:	.zero	2
 ; CHECK:	.size	short, 2