diff --git a/src/IceTargetLowering.cpp b/src/IceTargetLowering.cpp
index aefad00..4bb035a 100644
--- a/src/IceTargetLowering.cpp
+++ b/src/IceTargetLowering.cpp
@@ -20,6 +20,7 @@
 #include "assembler_mips32.h"
 #include "IceCfg.h" // setError()
 #include "IceCfgNode.h"
+#include "IceGlobalInits.h"
 #include "IceOperand.h"
 #include "IceRegAlloc.h"
 #include "IceTargetLowering.h"
@@ -445,6 +446,89 @@
 
 TargetDataLowering::~TargetDataLowering() {}
 
+void TargetDataLowering::emitGlobal(const VariableDeclaration &Var) {
+  if (!ALLOW_DUMP)
+    return;
+
+  // If external and not initialized, this must be a cross test.
+  // Don't generate a declaration for such cases.
+  bool IsExternal = Var.isExternal() || Ctx->getFlags().getDisableInternal();
+  if (IsExternal && !Var.hasInitializer())
+    return;
+
+  Ostream &Str = Ctx->getStrEmit();
+  const VariableDeclaration::InitializerListType &Initializers =
+      Var.getInitializers();
+  bool HasNonzeroInitializer = Var.hasNonzeroInitializer();
+  bool IsConstant = Var.getIsConstant();
+  uint32_t Align = Var.getAlignment();
+  SizeT Size = Var.getNumBytes();
+  IceString MangledName = Var.mangleName(Ctx);
+  IceString SectionSuffix = "";
+  if (Ctx->getFlags().getDataSections())
+    SectionSuffix = "." + MangledName;
+
+  Str << "\t.type\t" << MangledName << ",%object\n";
+
+  if (IsConstant)
+    Str << "\t.section\t.rodata" << SectionSuffix << ",\"a\",%progbits\n";
+  else if (HasNonzeroInitializer)
+    Str << "\t.section\t.data" << SectionSuffix << ",\"aw\",%progbits\n";
+  else
+    Str << "\t.section\t.bss" << SectionSuffix << ",\"aw\",%nobits\n";
+
+  if (IsExternal)
+    Str << "\t.globl\t" << MangledName << "\n";
+
+  if (Align > 1) {
+    assert(llvm::isPowerOf2_32(Align));
+    // Use the .p2align directive, since the .align N directive can either
+    // interpret N as bytes, or power of 2 bytes, depending on the target.
+    Str << "\t.p2align\t" << llvm::Log2_32(Align) << "\n";
+  }
+
+  Str << MangledName << ":\n";
+
+  if (HasNonzeroInitializer) {
+    for (VariableDeclaration::Initializer *Init : Initializers) {
+      switch (Init->getKind()) {
+      case VariableDeclaration::Initializer::DataInitializerKind: {
+        const auto Data = llvm::cast<VariableDeclaration::DataInitializer>(Init)
+                              ->getContents();
+        for (SizeT i = 0; i < Init->getNumBytes(); ++i) {
+          Str << "\t.byte\t" << (((unsigned)Data[i]) & 0xff) << "\n";
+        }
+        break;
+      }
+      case VariableDeclaration::Initializer::ZeroInitializerKind:
+        Str << "\t.zero\t" << Init->getNumBytes() << "\n";
+        break;
+      case VariableDeclaration::Initializer::RelocInitializerKind: {
+        const auto Reloc =
+            llvm::cast<VariableDeclaration::RelocInitializer>(Init);
+        Str << "\t" << getEmit32Directive() << "\t";
+        Str << Reloc->getDeclaration()->mangleName(Ctx);
+        if (RelocOffsetT Offset = Reloc->getOffset()) {
+          if (Offset >= 0 || (Offset == INT32_MIN))
+            Str << " + " << Offset;
+          else
+            Str << " - " << -Offset;
+        }
+        Str << "\n";
+        break;
+      }
+      }
+    }
+  } else
+    // NOTE: for non-constant zero initializers, this is BSS (no bits),
+    // so an ELF writer would not write to the file, and only track
+    // virtual offsets, but the .s writer still needs this .zero and
+    // cannot simply use the .size to advance offsets.
+    Str << "\t.zero\t" << Size << "\n";
+
+  Str << "\t.size\t" << MangledName << ", " << Size << "\n";
+}
+
 std::unique_ptr<TargetHeaderLowering>
 TargetHeaderLowering::createLowering(GlobalContext *Ctx) {
   TargetArch Target = Ctx->getFlags().getTargetArch();
diff --git a/src/IceTargetLowering.h b/src/IceTargetLowering.h
index 66e663b..4d9598a 100644
--- a/src/IceTargetLowering.h
+++ b/src/IceTargetLowering.h
@@ -384,6 +384,13 @@
   virtual void lowerConstants() = 0;
 
 protected:
+  void emitGlobal(const VariableDeclaration &Var);
+
+  // For now, we assume .long is the right directive for emitting 4 byte
+  // emit global relocations. However, LLVM MIPS usually uses .4byte instead.
+  // Perhaps there is some difference when the location is unaligned.
+  const char *getEmit32Directive() { return ".long"; }
+
   explicit TargetDataLowering(GlobalContext *Ctx) : Ctx(Ctx) {}
   GlobalContext *Ctx;
 };
diff --git a/src/IceTargetLoweringARM32.cpp b/src/IceTargetLoweringARM32.cpp
index a2091b2..a4aa517 100644
--- a/src/IceTargetLoweringARM32.cpp
+++ b/src/IceTargetLoweringARM32.cpp
@@ -2194,11 +2194,6 @@
 TargetDataARM32::TargetDataARM32(GlobalContext *Ctx)
     : TargetDataLowering(Ctx) {}
 
-void TargetDataARM32::lowerGlobal(const VariableDeclaration &Var) const {
-  (void)Var;
-  UnimplementedError(Ctx->getFlags());
-}
-
 void TargetDataARM32::lowerGlobals(
     std::unique_ptr<VariableDeclarationList> Vars) {
   switch (Ctx->getFlags().getOutFileType()) {
@@ -2212,7 +2207,7 @@
     OstreamLocker L(Ctx);
     for (const VariableDeclaration *Var : *Vars) {
       if (GlobalContext::matchSymbolName(Var->getName(), TranslateOnly)) {
-        lowerGlobal(*Var);
+        emitGlobal(*Var);
       }
     }
   } break;
diff --git a/src/IceTargetLoweringARM32.h b/src/IceTargetLoweringARM32.h
index 2477aaa..7f5fdc8 100644
--- a/src/IceTargetLoweringARM32.h
+++ b/src/IceTargetLoweringARM32.h
@@ -319,7 +319,6 @@
   explicit TargetDataARM32(GlobalContext *Ctx);
 
 private:
-  void lowerGlobal(const VariableDeclaration &Var) const;
   ~TargetDataARM32() override {}
   template <typename T> static void emitConstantPool(GlobalContext *Ctx);
 };
diff --git a/src/IceTargetLoweringMIPS32.cpp b/src/IceTargetLoweringMIPS32.cpp
index 748881e..ee2300e 100644
--- a/src/IceTargetLoweringMIPS32.cpp
+++ b/src/IceTargetLoweringMIPS32.cpp
@@ -671,11 +671,6 @@
 TargetDataMIPS32::TargetDataMIPS32(GlobalContext *Ctx)
     : TargetDataLowering(Ctx) {}
 
-void TargetDataMIPS32::lowerGlobal(const VariableDeclaration &Var) const {
-  (void)Var;
-  llvm::report_fatal_error("Not yet implemented");
-}
-
 void TargetDataMIPS32::lowerGlobals(
     std::unique_ptr<VariableDeclarationList> Vars) {
   switch (Ctx->getFlags().getOutFileType()) {
@@ -689,7 +684,7 @@
     OstreamLocker L(Ctx);
     for (const VariableDeclaration *Var : *Vars) {
       if (GlobalContext::matchSymbolName(Var->getName(), TranslateOnly)) {
-        lowerGlobal(*Var);
+        emitGlobal(*Var);
       }
     }
   } break;
diff --git a/src/IceTargetLoweringMIPS32.h b/src/IceTargetLoweringMIPS32.h
index 3aad63f..9204833 100644
--- a/src/IceTargetLoweringMIPS32.h
+++ b/src/IceTargetLoweringMIPS32.h
@@ -144,7 +144,6 @@
   explicit TargetDataMIPS32(GlobalContext *Ctx);
 
 private:
-  void lowerGlobal(const VariableDeclaration &Var) const;
   ~TargetDataMIPS32() override {}
   template <typename T> static void emitConstantPool(GlobalContext *Ctx);
 };
diff --git a/src/IceTargetLoweringX8632.cpp b/src/IceTargetLoweringX8632.cpp
index dcfdc96..dbb4e4a 100644
--- a/src/IceTargetLoweringX8632.cpp
+++ b/src/IceTargetLoweringX8632.cpp
@@ -5017,82 +5017,6 @@
 TargetDataX8632::TargetDataX8632(GlobalContext *Ctx)
     : TargetDataLowering(Ctx) {}
 
-void TargetDataX8632::lowerGlobal(const VariableDeclaration &Var) {
-  // If external and not initialized, this must be a cross test.
-  // Don't generate a declaration for such cases.
-  bool IsExternal = Var.isExternal() || Ctx->getFlags().getDisableInternal();
-  if (IsExternal && !Var.hasInitializer())
-    return;
-
-  Ostream &Str = Ctx->getStrEmit();
-  const VariableDeclaration::InitializerListType &Initializers =
-      Var.getInitializers();
-  bool HasNonzeroInitializer = Var.hasNonzeroInitializer();
-  bool IsConstant = Var.getIsConstant();
-  uint32_t Align = Var.getAlignment();
-  SizeT Size = Var.getNumBytes();
-  IceString MangledName = Var.mangleName(Ctx);
-  IceString SectionSuffix = "";
-  if (Ctx->getFlags().getDataSections())
-    SectionSuffix = "." + MangledName;
-
-  Str << "\t.type\t" << MangledName << ",@object\n";
-
-  if (IsConstant)
-    Str << "\t.section\t.rodata" << SectionSuffix << ",\"a\",@progbits\n";
-  else if (HasNonzeroInitializer)
-    Str << "\t.section\t.data" << SectionSuffix << ",\"aw\",@progbits\n";
-  else
-    Str << "\t.section\t.bss" << SectionSuffix << ",\"aw\",@nobits\n";
-
-  if (IsExternal)
-    Str << "\t.globl\t" << MangledName << "\n";
-
-  if (Align > 1)
-    Str << "\t.align\t" << Align << "\n";
-
-  Str << MangledName << ":\n";
-
-  if (HasNonzeroInitializer) {
-    for (VariableDeclaration::Initializer *Init : Initializers) {
-      switch (Init->getKind()) {
-      case VariableDeclaration::Initializer::DataInitializerKind: {
-        const auto Data = llvm::cast<VariableDeclaration::DataInitializer>(Init)
-                              ->getContents();
-        for (SizeT i = 0; i < Init->getNumBytes(); ++i) {
-          Str << "\t.byte\t" << (((unsigned)Data[i]) & 0xff) << "\n";
-        }
-        break;
-      }
-      case VariableDeclaration::Initializer::ZeroInitializerKind:
-        Str << "\t.zero\t" << Init->getNumBytes() << "\n";
-        break;
-      case VariableDeclaration::Initializer::RelocInitializerKind: {
-        const auto Reloc =
-            llvm::cast<VariableDeclaration::RelocInitializer>(Init);
-        Str << "\t.long\t";
-        Str << Reloc->getDeclaration()->mangleName(Ctx);
-        if (RelocOffsetT Offset = Reloc->getOffset()) {
-          if (Offset >= 0 || (Offset == INT32_MIN))
-            Str << " + " << Offset;
-          else
-            Str << " - " << -Offset;
-        }
-        Str << "\n";
-        break;
-      }
-      }
-    }
-  } else
-    // NOTE: for non-constant zero initializers, this is BSS (no bits),
-    // so an ELF writer would not write to the file, and only track
-    // virtual offsets, but the .s writer still needs this .zero and
-    // cannot simply use the .size to advance offsets.
-    Str << "\t.zero\t" << Size << "\n";
-
-  Str << "\t.size\t" << MangledName << ", " << Size << "\n";
-}
-
 void TargetDataX8632::lowerGlobals(
     std::unique_ptr<VariableDeclarationList> Vars) {
   switch (Ctx->getFlags().getOutFileType()) {
@@ -5106,7 +5030,7 @@
     OstreamLocker L(Ctx);
     for (const VariableDeclaration *Var : *Vars) {
       if (GlobalContext::matchSymbolName(Var->getName(), TranslateOnly)) {
-        lowerGlobal(*Var);
+        emitGlobal(*Var);
       }
     }
   } break;
diff --git a/src/IceTargetLoweringX8632.h b/src/IceTargetLoweringX8632.h
index fe3612c..a921294 100644
--- a/src/IceTargetLoweringX8632.h
+++ b/src/IceTargetLoweringX8632.h
@@ -594,7 +594,6 @@
   explicit TargetDataX8632(GlobalContext *Ctx);
 
 private:
-  void lowerGlobal(const VariableDeclaration &Var);
   ~TargetDataX8632() override {}
   template <typename T> static void emitConstantPool(GlobalContext *Ctx);
 };
diff --git a/tests_lit/llvm2ice_tests/globalinit.pnacl.ll b/tests_lit/llvm2ice_tests/globalinit.pnacl.ll
index fcefe6a..e22d292 100644
--- a/tests_lit/llvm2ice_tests/globalinit.pnacl.ll
+++ b/tests_lit/llvm2ice_tests/globalinit.pnacl.ll
@@ -2,16 +2,35 @@
 
 ; REQUIRES: allow_dump
 
-; Test -filetype=asm to test the lea "hack" until we are fully confident
-; in -filetype=iasm .
-; RUN: %p2i -i %s --filetype=asm --args --verbose none | FileCheck %s
+; Test initializers with -filetype=asm.
+; RUN: %if --need=target_X8632 --command %p2i --filetype=asm --target x8632 \
+; RUN:   -i %s --args -O2 | %if --need=target_X8632 --command FileCheck %s
 
-; Test -filetype=iasm and try to cross reference instructions w/ the
-; symbol table.
-; RUN: %p2i --assemble --disassemble -i %s --args --verbose none \
-; RUN:   | FileCheck --check-prefix=IAS %s
-; RUN: %p2i --assemble --disassemble --dis-flags=-t -i %s --args \
-; RUN:   --verbose none | FileCheck --check-prefix=SYMTAB %s
+; RUN: %if --need=target_ARM32 --command %p2i --filetype=asm --target arm32 \
+; RUN:   -i %s --args -O2 --skip-unimplemented \
+; RUN:   | %if --need=target_ARM32 --command FileCheck %s
+
+; Test instructions for materializing addresses.
+; RUN: %if --need=target_X8632 --command %p2i --filetype=asm --target x8632 \
+; RUN:   -i %s --args -O2 \
+; RUN: | %if --need=target_X8632 --command FileCheck %s --check-prefix=X8632
+
+; Test instructions with -filetype=obj and try to cross reference instructions
+; w/ the symbol table.
+; RUN: %if --need=target_X8632 --command %p2i --assemble --disassemble \
+; RUN:   --target x8632 -i %s --args --verbose none \
+; RUN:   | %if --need=target_X8632 --command FileCheck --check-prefix=IAS %s
+
+; RUN: %if --need=target_X8632 --command %p2i --assemble --disassemble \
+; RUN:   --dis-flags=-t --target x8632 -i %s --args --verbose none \
+; RUN:   | %if --need=target_X8632 --command FileCheck --check-prefix=SYMTAB %s
+
+; Only checking symtab for ARM for now. TODO(jvoung): Need to lower
+; arguments at callsite.
+; RUN: %if --need=target_ARM32 --command %p2i --filetype=asm --assemble \
+; RUN:   --disassemble --dis-flags=-t --target arm32 -i %s \
+; RUN:   --args --verbose none --skip-unimplemented \
+; RUN:   | %if --need=target_ARM32 --command FileCheck --check-prefix=SYMTAB %s
 
 define internal i32 @main(i32 %argc, i32 %argv) {
 entry:
@@ -31,14 +50,14 @@
   call void @use(i32 %expanded13)
   ret i32 0
 }
-; CHECK-LABEL: main
-; CHECK: movl $PrimitiveInit,
-; CHECK: movl $PrimitiveInitConst,
-; CHECK: movl $PrimitiveInitStatic,
-; CHECK: movl $PrimitiveUninit,
-; CHECK: movl $ArrayInit,
-; CHECK: movl $ArrayInitPartial,
-; CHECK: movl $ArrayUninit,
+; X8632-LABEL: main
+; X8632: movl $PrimitiveInit,
+; X8632: movl $PrimitiveInitConst,
+; X8632: movl $PrimitiveInitStatic,
+; X8632: movl $PrimitiveUninit,
+; X8632: movl $ArrayInit,
+; X8632: movl $ArrayInitPartial,
+; X8632: movl $ArrayUninit,
 
 ; objdump does not indicate what symbol the mov/relocation applies to
 ; so we grep for "mov {{.*}}, OFFSET, sec", along with
@@ -91,73 +110,73 @@
 
 
 @PrimitiveInit = internal global [4 x i8] c"\1B\00\00\00", align 4
-; CHECK: .type PrimitiveInit,@object
-; CHECK-NEXT: .section .data,"aw",@progbits
-; CHECK-NEXT: .align 4
+; CHECK: .type PrimitiveInit,%object
+; CHECK-NEXT: .section .data,"aw",%progbits
+; CHECK-NEXT: .p2align 2
 ; CHECK-NEXT: PrimitiveInit:
 ; CHECK-NEXT: .byte
 ; CHECK: .size PrimitiveInit, 4
 
 @PrimitiveInitConst = internal constant [4 x i8] c"\0D\00\00\00", align 4
-; CHECK: .type PrimitiveInitConst,@object
-; CHECK-NEXT: .section .rodata,"a",@progbits
-; CHECK-NEXT: .align 4
+; CHECK: .type PrimitiveInitConst,%object
+; CHECK-NEXT: .section .rodata,"a",%progbits
+; CHECK-NEXT: .p2align 2
 ; CHECK-NEXT: PrimitiveInitConst:
 ; CHECK-NEXT: .byte
 ; CHECK: .size PrimitiveInitConst, 4
 
 @ArrayInit = internal global [20 x i8] c"\0A\00\00\00\14\00\00\00\1E\00\00\00(\00\00\002\00\00\00", align 4
-; CHECK: .type ArrayInit,@object
-; CHECK-NEXT: .section .data,"aw",@progbits
-; CHECK-NEXT: .align 4
+; CHECK: .type ArrayInit,%object
+; CHECK-NEXT: .section .data,"aw",%progbits
+; CHECK-NEXT: .p2align 2
 ; CHECK-NEXT: ArrayInit:
 ; CHECK-NEXT: .byte
 ; CHECK: .size ArrayInit, 20
 
 @ArrayInitPartial = internal global [40 x i8] c"<\00\00\00F\00\00\00P\00\00\00Z\00\00\00d\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00", align 4
-; CHECK: .type ArrayInitPartial,@object
-; CHECK-NEXT: .section .data,"aw",@progbits
-; CHECK-NEXT: .align 4
+; CHECK: .type ArrayInitPartial,%object
+; CHECK-NEXT: .section .data,"aw",%progbits
+; CHECK-NEXT: .p2align 2
 ; CHECK-NEXT: ArrayInitPartial:
 ; CHECK-NEXT: .byte
 ; CHECK: .size ArrayInitPartial, 40
 
 @PrimitiveInitStatic = internal global [4 x i8] zeroinitializer, align 4
-; CHECK: .type PrimitiveInitStatic,@object
-; CHECK-NEXT: .section .bss,"aw",@nobits
-; CHECK-NEXT: .align 4
+; CHECK: .type PrimitiveInitStatic,%object
+; CHECK-NEXT: .section .bss,"aw",%nobits
+; CHECK-NEXT: .p2align 2
 ; CHECK-NEXT: PrimitiveInitStatic:
 ; CHECK-NEXT: .zero 4
 ; CHECK-NEXT: .size PrimitiveInitStatic, 4
 
 @PrimitiveUninit = internal global [4 x i8] zeroinitializer, align 4
-; CHECK: .type PrimitiveUninit,@object
-; CHECK-NEXT: .section .bss,"aw",@nobits
-; CHECK-NEXT: .align 4
+; CHECK: .type PrimitiveUninit,%object
+; CHECK-NEXT: .section .bss,"aw",%nobits
+; CHECK-NEXT: .p2align 2
 ; CHECK-NEXT: PrimitiveUninit:
 ; CHECK-NEXT: .zero 4
 ; CHECK-NEXT: .size PrimitiveUninit, 4
 
 @ArrayUninit = internal global [20 x i8] zeroinitializer, align 4
-; CHECK: .type ArrayUninit,@object
-; CHECK-NEXT: .section .bss,"aw",@nobits
-; CHECK-NEXT: .align 4
+; CHECK: .type ArrayUninit,%object
+; CHECK-NEXT: .section .bss,"aw",%nobits
+; CHECK-NEXT: .p2align 2
 ; CHECK-NEXT: ArrayUninit:
 ; CHECK-NEXT: .zero 20
 ; CHECK-NEXT: .size ArrayUninit, 20
 
 @ArrayUninitConstDouble = internal constant [200 x i8] zeroinitializer, align 8
-; CHECK: .type ArrayUninitConstDouble,@object
-; CHECK-NEXT: .section .rodata,"a",@progbits
-; CHECK-NEXT: .align 8
+; CHECK: .type ArrayUninitConstDouble,%object
+; CHECK-NEXT: .section .rodata,"a",%progbits
+; CHECK-NEXT: .p2align 3
 ; CHECK-NEXT: ArrayUninitConstDouble:
 ; CHECK-NEXT: .zero 200
 ; CHECK-NEXT: .size ArrayUninitConstDouble, 200
 
 @ArrayUninitConstInt = internal constant [20 x i8] zeroinitializer, align 4
-; CHECK: .type ArrayUninitConstInt,@object
-; CHECK: .section .rodata,"a",@progbits
-; CHECK-NEXT: .align 4
+; CHECK: .type ArrayUninitConstInt,%object
+; CHECK: .section .rodata,"a",%progbits
+; CHECK-NEXT: .p2align 2
 ; CHECK-NEXT: ArrayUninitConstInt:
 ; CHECK-NEXT: .zero 20
 ; CHECK-NEXT: .size ArrayUninitConstInt, 20
diff --git a/tests_lit/llvm2ice_tests/globalrelocs.ll b/tests_lit/llvm2ice_tests/globalrelocs.ll
index 7e6a01c..0d163e6 100644
--- a/tests_lit/llvm2ice_tests/globalrelocs.ll
+++ b/tests_lit/llvm2ice_tests/globalrelocs.ll
@@ -16,8 +16,8 @@
 
 @bytes = internal global [7 x i8] c"abcdefg"
 ; DUMP: @bytes = internal global [7 x i8] c"abcdefg"
-; CHECK:	.type	bytes,@object
-; CHECK:	.section	.data,"aw",@progbits
+; CHECK:	.type	bytes,%object
+; CHECK:	.section	.data,"aw",%progbits
 ; CHECK:bytes:
 ; CHECK:	.byte	97
 ; CHECK:	.byte	98
@@ -30,8 +30,8 @@
 
 @const_bytes = internal constant [7 x i8] c"abcdefg"
 ; DUMP: @const_bytes = internal constant [7 x i8] c"abcdefg"
-; CHECK:	.type	const_bytes,@object
-; CHECK:	.section	.rodata,"a",@progbits
+; CHECK:	.type	const_bytes,%object
+; CHECK:	.section	.rodata,"a",%progbits
 ; CHECK:const_bytes:
 ; CHECK:	.byte	97
 ; CHECK:	.byte	98
@@ -44,40 +44,40 @@
 
 @ptr_to_ptr = internal global i32 ptrtoint (i32* @ptr to i32)
 ; DUMP: @ptr_to_ptr = internal global i32 ptrtoint (i32* @ptr to i32)
-; CHECK:	.type	ptr_to_ptr,@object
-; CHECK:	.section	.data,"aw",@progbits
+; CHECK:	.type	ptr_to_ptr,%object
+; CHECK:	.section	.data,"aw",%progbits
 ; CHECK:ptr_to_ptr:
 ; CHECK:	.long	ptr
 ; CHECK:	.size	ptr_to_ptr, 4
 
 @const_ptr_to_ptr = internal constant i32 ptrtoint (i32* @ptr to i32)
 ; DUMP: @const_ptr_to_ptr = internal constant i32 ptrtoint (i32* @ptr to i32)
-; CHECK:	.type	const_ptr_to_ptr,@object
-; CHECK:	.section	.rodata,"a",@progbits
+; CHECK:	.type	const_ptr_to_ptr,%object
+; CHECK:	.section	.rodata,"a",%progbits
 ; CHECK:const_ptr_to_ptr:
 ; CHECK:	.long	ptr
 ; CHECK:	.size	const_ptr_to_ptr, 4
 
 @ptr_to_func = internal global i32 ptrtoint (void ()* @func to i32)
 ; DUMP: @ptr_to_func = internal global i32 ptrtoint (void ()* @func to i32)
-; CHECK:	.type	ptr_to_func,@object
-; CHECK:	.section	.data,"aw",@progbits
+; CHECK:	.type	ptr_to_func,%object
+; CHECK:	.section	.data,"aw",%progbits
 ; CHECK:ptr_to_func:
 ; CHECK:	.long	func
 ; CHECK:	.size	ptr_to_func, 4
 
 @const_ptr_to_func = internal constant i32 ptrtoint (void ()* @func to i32)
 ; DUMP: @const_ptr_to_func = internal constant i32 ptrtoint (void ()* @func to i32)
-; CHECK:	.type	const_ptr_to_func,@object
-; CHECK:	.section	.rodata,"a",@progbits
+; CHECK:	.type	const_ptr_to_func,%object
+; CHECK:	.section	.rodata,"a",%progbits
 ; CHECK:const_ptr_to_func:
 ; CHECK:	.long	func
 ; CHECK:	.size	const_ptr_to_func, 4
 
 @compound = internal global <{ [3 x i8], i32 }> <{ [3 x i8] c"foo", i32 ptrtoint (void ()* @func to i32) }>
 ; DUMP: @compound = internal global <{ [3 x i8], i32 }> <{ [3 x i8] c"foo", i32 ptrtoint (void ()* @func to i32) }>
-; CHECK:	.type	compound,@object
-; CHECK:	.section	.data,"aw",@progbits
+; CHECK:	.type	compound,%object
+; CHECK:	.section	.data,"aw",%progbits
 ; CHECK:compound:
 ; CHECK:	.byte	102
 ; CHECK:	.byte	111
@@ -87,8 +87,8 @@
 
 @const_compound = internal constant <{ [3 x i8], i32 }> <{ [3 x i8] c"foo", i32 ptrtoint (void ()* @func to i32) }>
 ; DUMP: @const_compound = internal constant <{ [3 x i8], i32 }> <{ [3 x i8] c"foo", i32 ptrtoint (void ()* @func to i32) }>
-; CHECK:	.type	const_compound,@object
-; CHECK:	.section	.rodata,"a",@progbits
+; CHECK:	.type	const_compound,%object
+; CHECK:	.section	.rodata,"a",%progbits
 ; CHECK:const_compound:
 ; CHECK:	.byte	102
 ; CHECK:	.byte	111
@@ -98,162 +98,162 @@
 
 @ptr = internal global i32 ptrtoint ([7 x i8]* @bytes to i32)
 ; DUMP: @ptr = internal global i32 ptrtoint ([7 x i8]* @bytes to i32)
-; CHECK:	.type	ptr,@object
-; CHECK:	.section	.data,"aw",@progbits
+; CHECK:	.type	ptr,%object
+; CHECK:	.section	.data,"aw",%progbits
 ; CHECK:ptr:
 ; CHECK:	.long	bytes
 ; CHECK:	.size	ptr, 4
 
 @const_ptr = internal constant i32 ptrtoint ([7 x i8]* @bytes to i32)
 ; DUMP: @const_ptr = internal constant i32 ptrtoint ([7 x i8]* @bytes to i32)
-; CHECK:	.type	const_ptr,@object
-; CHECK:	.section	.rodata,"a",@progbits
+; CHECK:	.type	const_ptr,%object
+; CHECK:	.section	.rodata,"a",%progbits
 ; CHECK:const_ptr:
 ; CHECK:	.long	bytes
 ; CHECK:	.size	const_ptr, 4
 
 @addend_ptr = internal global i32 add (i32 ptrtoint (i32* @ptr to i32), i32 1)
 ; DUMP: @addend_ptr = internal global i32 add (i32 ptrtoint (i32* @ptr to i32), i32 1)
-; CHECK:	.type	addend_ptr,@object
-; CHECK:	.section	.data,"aw",@progbits
+; CHECK:	.type	addend_ptr,%object
+; CHECK:	.section	.data,"aw",%progbits
 ; CHECK:addend_ptr:
 ; CHECK:	.long	ptr + 1
 ; CHECK:	.size	addend_ptr, 4
 
 @const_addend_ptr = internal constant i32 add (i32 ptrtoint (i32* @ptr to i32), i32 1)
 ; DUMP: @const_addend_ptr = internal constant i32 add (i32 ptrtoint (i32* @ptr to i32), i32 1)
-; CHECK:	.type	const_addend_ptr,@object
-; CHECK:	.section	.rodata,"a",@progbits
+; CHECK:	.type	const_addend_ptr,%object
+; CHECK:	.section	.rodata,"a",%progbits
 ; CHECK:const_addend_ptr:
 ; CHECK:	.long	ptr + 1
 ; CHECK:	.size	const_addend_ptr, 4
 
 @addend_negative = internal global i32 add (i32 ptrtoint (i32* @ptr to i32), i32 -1)
 ; DUMP: @addend_negative = internal global i32 add (i32 ptrtoint (i32* @ptr to i32), i32 -1)
-; CHECK:	.type	addend_negative,@object
-; CHECK:	.section	.data,"aw",@progbits
+; CHECK:	.type	addend_negative,%object
+; CHECK:	.section	.data,"aw",%progbits
 ; CHECK:addend_negative:
 ; CHECK:	.long	ptr - 1
 ; CHECK:	.size	addend_negative, 4
 
 @const_addend_negative = internal constant i32 add (i32 ptrtoint (i32* @ptr to i32), i32 -1)
 ; DUMP: @const_addend_negative = internal constant i32 add (i32 ptrtoint (i32* @ptr to i32), i32 -1)
-; CHECK:	.type	const_addend_negative,@object
-; CHECK:	.section	.rodata,"a",@progbits
+; CHECK:	.type	const_addend_negative,%object
+; CHECK:	.section	.rodata,"a",%progbits
 ; CHECK:const_addend_negative:
 ; CHECK:	.long	ptr - 1
 ; CHECK:	.size	const_addend_negative, 4
 
 @addend_array1 = internal global i32 add (i32 ptrtoint ([7 x i8]* @bytes to i32), i32 1)
 ; DUMP: @addend_array1 = internal global i32 add (i32 ptrtoint ([7 x i8]* @bytes to i32), i32 1)
-; CHECK:	.type	addend_array1,@object
-; CHECK:	.section	.data,"aw",@progbits
+; CHECK:	.type	addend_array1,%object
+; CHECK:	.section	.data,"aw",%progbits
 ; CHECK:addend_array1:
 ; CHECK:	.long	bytes + 1
 ; CHECK:	.size	addend_array1, 4
 
 @const_addend_array1 = internal constant i32 add (i32 ptrtoint ([7 x i8]* @bytes to i32), i32 1)
 ; DUMP: @const_addend_array1 = internal constant i32 add (i32 ptrtoint ([7 x i8]* @bytes to i32), i32 1)
-; CHECK:	.type	const_addend_array1,@object
-; CHECK:	.section	.rodata,"a",@progbits
+; CHECK:	.type	const_addend_array1,%object
+; CHECK:	.section	.rodata,"a",%progbits
 ; CHECK:const_addend_array1:
 ; CHECK:	.long	bytes + 1
 ; CHECK:	.size	const_addend_array1, 4
 
 @addend_array2 = internal global i32 add (i32 ptrtoint ([7 x i8]* @bytes to i32), i32 7)
 ; DUMP: @addend_array2 = internal global i32 add (i32 ptrtoint ([7 x i8]* @bytes to i32), i32 7)
-; CHECK:	.type	addend_array2,@object
-; CHECK:	.section	.data,"aw",@progbits
+; CHECK:	.type	addend_array2,%object
+; CHECK:	.section	.data,"aw",%progbits
 ; CHECK:addend_array2:
 ; CHECK:	.long	bytes + 7
 ; CHECK:	.size	addend_array2, 4
 
 @const_addend_array2 = internal constant i32 add (i32 ptrtoint ([7 x i8]* @bytes to i32), i32 7)
 ; DUMP: @const_addend_array2 = internal constant i32 add (i32 ptrtoint ([7 x i8]* @bytes to i32), i32 7)
-; CHECK:	.type	const_addend_array2,@object
-; CHECK:	.section	.rodata,"a",@progbits
+; CHECK:	.type	const_addend_array2,%object
+; CHECK:	.section	.rodata,"a",%progbits
 ; CHECK:const_addend_array2:
 ; CHECK:	.long	bytes + 7
 ; CHECK:	.size	const_addend_array2, 4
 
 @addend_array3 = internal global i32 add (i32 ptrtoint ([7 x i8]* @bytes to i32), i32 9)
 ; DUMP: @addend_array3 = internal global i32 add (i32 ptrtoint ([7 x i8]* @bytes to i32), i32 9)
-; CHECK:	.type	addend_array3,@object
-; CHECK:	.section	.data,"aw",@progbits
+; CHECK:	.type	addend_array3,%object
+; CHECK:	.section	.data,"aw",%progbits
 ; CHECK:addend_array3:
 ; CHECK:	.long	bytes + 9
 ; CHECK:	.size	addend_array3, 4
 
 @const_addend_array3 = internal constant i32 add (i32 ptrtoint ([7 x i8]* @bytes to i32), i32 9)
 ; DUMP: @const_addend_array3 = internal constant i32 add (i32 ptrtoint ([7 x i8]* @bytes to i32), i32 9)
-; CHECK:	.type	const_addend_array3,@object
-; CHECK:	.section	.rodata,"a",@progbits
+; CHECK:	.type	const_addend_array3,%object
+; CHECK:	.section	.rodata,"a",%progbits
 ; CHECK:const_addend_array3:
 ; CHECK:	.long	bytes + 9
 ; CHECK:	.size	const_addend_array3, 4
 
 @addend_struct1 = internal global i32 add (i32 ptrtoint (<{ [3 x i8], i32 }>* @compound to i32), i32 1)
 ; DUMP: @addend_struct1 = internal global i32 add (i32 ptrtoint (<{ [3 x i8], i32 }>* @compound to i32), i32 1)
-; CHECK:	.type	addend_struct1,@object
-; CHECK:	.section	.data,"aw",@progbits
+; CHECK:	.type	addend_struct1,%object
+; CHECK:	.section	.data,"aw",%progbits
 ; CHECK:addend_struct1:
 ; CHECK:	.long	compound + 1
 ; CHECK:	.size	addend_struct1, 4
 
 @const_addend_struct1 = internal constant i32 add (i32 ptrtoint (<{ [3 x i8], i32 }>* @compound to i32), i32 1)
 ; DUMP: @const_addend_struct1 = internal constant i32 add (i32 ptrtoint (<{ [3 x i8], i32 }>* @compound to i32), i32 1)
-; CHECK:	.type	const_addend_struct1,@object
-; CHECK:	.section	.rodata,"a",@progbits
+; CHECK:	.type	const_addend_struct1,%object
+; CHECK:	.section	.rodata,"a",%progbits
 ; CHECK:const_addend_struct1:
 ; CHECK:	.long	compound + 1
 ; CHECK:	.size	const_addend_struct1, 4
 
 @addend_struct2 = internal global i32 add (i32 ptrtoint (<{ [3 x i8], i32 }>* @compound to i32), i32 4)
 ; DUMP: @addend_struct2 = internal global i32 add (i32 ptrtoint (<{ [3 x i8], i32 }>* @compound to i32), i32 4)
-; CHECK:	.type	addend_struct2,@object
-; CHECK:	.section	.data,"aw",@progbits
+; CHECK:	.type	addend_struct2,%object
+; CHECK:	.section	.data,"aw",%progbits
 ; CHECK:addend_struct2:
 ; CHECK:	.long	compound + 4
 ; CHECK:	.size	addend_struct2, 4
 
 @const_addend_struct2 = internal constant i32 add (i32 ptrtoint (<{ [3 x i8], i32 }>* @compound to i32), i32 4)
 ; DUMP: @const_addend_struct2 = internal constant i32 add (i32 ptrtoint (<{ [3 x i8], i32 }>* @compound to i32), i32 4)
-; CHECK:	.type	const_addend_struct2,@object
-; CHECK:	.section	.rodata,"a",@progbits
+; CHECK:	.type	const_addend_struct2,%object
+; CHECK:	.section	.rodata,"a",%progbits
 ; CHECK:const_addend_struct2:
 ; CHECK:	.long	compound + 4
 ; CHECK:	.size	const_addend_struct2, 4
 
 @ptr_to_func_align = internal global i32 ptrtoint (void ()* @func to i32), align 8
 ; DUMP: @ptr_to_func_align = internal global i32 ptrtoint (void ()* @func to i32), align 8
-; CHECK:	.type	ptr_to_func_align,@object
-; CHECK:	.section	.data,"aw",@progbits
-; CHECK:	.align	8
+; CHECK:	.type	ptr_to_func_align,%object
+; CHECK:	.section	.data,"aw",%progbits
+; CHECK:	.p2align	3
 ; CHECK:ptr_to_func_align:
 ; CHECK:	.long	func
 ; CHECK:	.size	ptr_to_func_align, 4
 
 @const_ptr_to_func_align = internal constant i32 ptrtoint (void ()* @func to i32), align 8
 ; DUMP: @const_ptr_to_func_align = internal constant i32 ptrtoint (void ()* @func to i32), align 8
-; CHECK:	.type	const_ptr_to_func_align,@object
-; CHECK:	.section	.rodata,"a",@progbits
-; CHECK:	.align	8
+; CHECK:	.type	const_ptr_to_func_align,%object
+; CHECK:	.section	.rodata,"a",%progbits
+; CHECK:	.p2align	3
 ; CHECK:const_ptr_to_func_align:
 ; CHECK:	.long	func
 ; CHECK:	.size	const_ptr_to_func_align, 4
 
 @char = internal constant [1 x i8] c"0"
 ; DUMP: @char = internal constant [1 x i8] c"0"
-; CHECK:	.type	char,@object
-; CHECK:	.section	.rodata,"a",@progbits
+; CHECK:	.type	char,%object
+; CHECK:	.section	.rodata,"a",%progbits
 ; CHECK:char:
 ; CHECK:	.byte	48
 ; CHECK:	.size	char, 1
 
 @short = internal constant [2 x i8] zeroinitializer
 ; DUMP: @short = internal constant [2 x i8] zeroinitializer
-; CHECK:	.type	short,@object
-; CHECK:	.section	.rodata,"a",@progbits
+; CHECK:	.type	short,%object
+; CHECK:	.section	.rodata,"a",%progbits
 ; CHECK:short:
 ; CHECK:	.zero	2
 ; CHECK:	.size	short, 2
