Refactor data-in-code annotations.

Use a dedicated MachO load command to annotate data-in-code regions.
This is the same format the linker produces for final executable images,
allowing consistency of representation and use of introspection tools
for both object and executable files.

Data-in-code regions are annotated via ".data_region"/".end_data_region"
directive pairs, with an optional region type.

data_region_directive := ".data_region" { region_type }
region_type := "jt8" | "jt16" | "jt32" | "jta32"
end_data_region_directive := ".end_data_region"

The previous handling of ARM-style "$d.*" labels was broken and has
been removed. Specifically, it didn't handle ARM vs. Thumb mode when
marking the end of the section.

rdar://11459456

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@157062 91177308-0d34-0410-b5e6-96231b3b80d8
diff --git a/lib/MC/MCAsmInfo.cpp b/lib/MC/MCAsmInfo.cpp
index 2456858..d9c1d51 100644
--- a/lib/MC/MCAsmInfo.cpp
+++ b/lib/MC/MCAsmInfo.cpp
@@ -50,6 +50,7 @@
   AllowNameToStartWithDigit = false;
   AllowPeriodsInName = true;
   AllowUTF8 = true;
+  UseDataRegionDirectives = false;
   ZeroDirective = "\t.zero\t";
   AsciiDirective = "\t.ascii\t";
   AscizDirective = "\t.asciz\t";
@@ -57,12 +58,6 @@
   Data16bitsDirective = "\t.short\t";
   Data32bitsDirective = "\t.long\t";
   Data64bitsDirective = "\t.quad\t";
-  DataBegin = "$d.";
-  CodeBegin = "$a.";
-  JT8Begin = "$d.";
-  JT16Begin = "$d.";
-  JT32Begin = "$d.";
-  SupportsDataRegions = false;
   SunStyleELFSectionSwitchSyntax = false;
   UsesELFSectionDirectiveForBSS = false;
   AlignDirective = "\t.align\t";
diff --git a/lib/MC/MCAsmInfoCOFF.cpp b/lib/MC/MCAsmInfoCOFF.cpp
index 8941365..678e75a 100644
--- a/lib/MC/MCAsmInfoCOFF.cpp
+++ b/lib/MC/MCAsmInfoCOFF.cpp
@@ -36,8 +36,6 @@
   SupportsDebugInformation = true;
   DwarfSectionOffsetDirective = "\t.secrel32\t";
   HasMicrosoftFastStdCallMangling = true;
-
-  SupportsDataRegions = false;
 }
 
 void MCAsmInfoMicrosoft::anchor() { }
diff --git a/lib/MC/MCAsmInfoDarwin.cpp b/lib/MC/MCAsmInfoDarwin.cpp
index 4d67592..73ef7ba 100644
--- a/lib/MC/MCAsmInfoDarwin.cpp
+++ b/lib/MC/MCAsmInfoDarwin.cpp
@@ -43,13 +43,6 @@
   HasMachoTBSSDirective = true; // Uses .tbss
   HasStaticCtorDtorReferenceInStaticMode = true;
 
-  CodeBegin = "L$start$code$";
-  DataBegin = "L$start$data$";
-  JT8Begin  = "L$start$jt8$";
-  JT16Begin = "L$start$jt16$";
-  JT32Begin = "L$start$jt32$";
-  SupportsDataRegions = true;
-
   // FIXME: Darwin 10 and newer don't need this.
   LinkerRequiresNonEmptyDwarfLines = true;
 
diff --git a/lib/MC/MCAsmStreamer.cpp b/lib/MC/MCAsmStreamer.cpp
index 3ce84c2..9497b6b 100644
--- a/lib/MC/MCAsmStreamer.cpp
+++ b/lib/MC/MCAsmStreamer.cpp
@@ -138,6 +138,7 @@
   virtual void EmitEHSymAttributes(const MCSymbol *Symbol,
                                    MCSymbol *EHSymbol);
   virtual void EmitAssemblerFlag(MCAssemblerFlag Flag);
+  virtual void EmitDataRegion(MCDataRegionType Kind);
   virtual void EmitThumbFunc(MCSymbol *Func);
 
   virtual void EmitAssignment(MCSymbol *Symbol, const MCExpr *Value);
@@ -352,6 +353,21 @@
   EmitEOL();
 }
 
+void MCAsmStreamer::EmitDataRegion(MCDataRegionType Kind) {
+  MCContext &Ctx = getContext();
+  const MCAsmInfo &MAI = Ctx.getAsmInfo();
+  if (!MAI.doesSupportDataRegionDirectives())
+    return;
+  switch (Kind) {
+  case MCDR_DataRegion:            OS << "\t.data_region"; break;
+  case MCDR_DataRegionJT8:         OS << "\t.data_region jt8"; break;
+  case MCDR_DataRegionJT16:        OS << "\t.data_region jt16"; break;
+  case MCDR_DataRegionJT32:        OS << "\t.data_region jt32"; break;
+  case MCDR_DataRegionEnd:         OS << "\t.end_data_region"; break;
+  }
+  EmitEOL();
+}
+
 void MCAsmStreamer::EmitThumbFunc(MCSymbol *Func) {
   // This needs to emit to a temporary string to get properly quoted
   // MCSymbols when they have spaces in them.
diff --git a/lib/MC/MCELFStreamer.cpp b/lib/MC/MCELFStreamer.cpp
index 6c4d0e3..6ac9d9d 100644
--- a/lib/MC/MCELFStreamer.cpp
+++ b/lib/MC/MCELFStreamer.cpp
@@ -13,6 +13,8 @@
 
 #include "MCELF.h"
 #include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/ADT/Twine.h"
 #include "llvm/MC/MCAssembler.h"
 #include "llvm/MC/MCCodeEmitter.h"
 #include "llvm/MC/MCContext.h"
diff --git a/lib/MC/MCMachOStreamer.cpp b/lib/MC/MCMachOStreamer.cpp
index bc6cf77..970aa8b 100644
--- a/lib/MC/MCMachOStreamer.cpp
+++ b/lib/MC/MCMachOStreamer.cpp
@@ -1,4 +1,3 @@
-//===- lib/MC/MCMachOStreamer.cpp - Mach-O Object Output ------------===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -33,6 +32,8 @@
 private:
   virtual void EmitInstToData(const MCInst &Inst);
 
+  void EmitDataRegion(DataRegionData::KindTy Kind);
+  void EmitDataRegionEnd();
 public:
   MCMachOStreamer(MCContext &Context, MCAsmBackend &MAB,
                   raw_ostream &OS, MCCodeEmitter *Emitter)
@@ -46,6 +47,7 @@
   virtual void EmitEHSymAttributes(const MCSymbol *Symbol,
                                    MCSymbol *EHSymbol);
   virtual void EmitAssemblerFlag(MCAssemblerFlag Flag);
+  virtual void EmitDataRegion(MCDataRegionType Kind);
   virtual void EmitThumbFunc(MCSymbol *Func);
   virtual void EmitAssignment(MCSymbol *Symbol, const MCExpr *Value);
   virtual void EmitSymbolAttribute(MCSymbol *Symbol, MCSymbolAttr Attribute);
@@ -138,6 +140,26 @@
   SD.setFlags(SD.getFlags() & ~SF_ReferenceTypeMask);
 }
 
+void MCMachOStreamer::EmitDataRegion(DataRegionData::KindTy Kind) {
+  // Create a temporary label to mark the start of the data region.
+  MCSymbol *Start = getContext().CreateTempSymbol();
+  EmitLabel(Start);
+  // Record the region for the object writer to use.
+  DataRegionData Data = { Kind, Start, NULL };
+  std::vector<DataRegionData> &Regions = getAssembler().getDataRegions();
+  Regions.push_back(Data);
+}
+
+void MCMachOStreamer::EmitDataRegionEnd() {
+  std::vector<DataRegionData> &Regions = getAssembler().getDataRegions();
+  assert(Regions.size() && "Mismatched .end_data_region!");
+  DataRegionData &Data = Regions.back();
+  assert(Data.End == NULL && "Mismatched .end_data_region!");
+  // Create a temporary label to mark the end of the data region.
+  Data.End = getContext().CreateTempSymbol();
+  EmitLabel(Data.End);
+}
+
 void MCMachOStreamer::EmitAssemblerFlag(MCAssemblerFlag Flag) {
   // Let the target do whatever target specific stuff it needs to do.
   getAssembler().getBackend().handleAssemblerFlag(Flag);
@@ -153,6 +175,26 @@
   }
 }
 
+void MCMachOStreamer::EmitDataRegion(MCDataRegionType Kind) {
+  switch (Kind) {
+  case MCDR_DataRegion:
+    EmitDataRegion(DataRegionData::Data);
+    return;
+  case MCDR_DataRegionJT8:
+    EmitDataRegion(DataRegionData::JumpTable8);
+    return;
+  case MCDR_DataRegionJT16:
+    EmitDataRegion(DataRegionData::JumpTable16);
+    return;
+  case MCDR_DataRegionJT32:
+    EmitDataRegion(DataRegionData::JumpTable32);
+    return;
+  case MCDR_DataRegionEnd:
+    EmitDataRegionEnd();
+    return;
+  }
+}
+
 void MCMachOStreamer::EmitThumbFunc(MCSymbol *Symbol) {
   // Remember that the function is a thumb function. Fixup and relocation
   // values will need adjusted.
diff --git a/lib/MC/MCParser/DarwinAsmParser.cpp b/lib/MC/MCParser/DarwinAsmParser.cpp
index 6f45068..5662fea 100644
--- a/lib/MC/MCParser/DarwinAsmParser.cpp
+++ b/lib/MC/MCParser/DarwinAsmParser.cpp
@@ -14,6 +14,7 @@
 #include "llvm/MC/MCSymbol.h"
 #include "llvm/MC/MCParser/MCAsmLexer.h"
 #include "llvm/MC/MCParser/MCAsmParser.h"
+#include "llvm/ADT/StringSwitch.h"
 #include "llvm/ADT/StringRef.h"
 #include "llvm/ADT/Twine.h"
 #include "llvm/Support/MemoryBuffer.h"
@@ -56,6 +57,9 @@
     AddDirectiveHandler<&DarwinAsmParser::ParseDirectiveTBSS>(".tbss");
     AddDirectiveHandler<&DarwinAsmParser::ParseDirectiveZerofill>(".zerofill");
 
+    AddDirectiveHandler<&DarwinAsmParser::ParseDirectiveDataRegion>(".data_region");
+    AddDirectiveHandler<&DarwinAsmParser::ParseDirectiveDataRegionEnd>(".end_data_region");
+
     // Special section directives.
     AddDirectiveHandler<&DarwinAsmParser::ParseSectionDirectiveConst>(".const");
     AddDirectiveHandler<&DarwinAsmParser::ParseSectionDirectiveConstData>(".const_data");
@@ -113,6 +117,8 @@
   bool ParseDirectiveSubsectionsViaSymbols(StringRef, SMLoc);
   bool ParseDirectiveTBSS(StringRef, SMLoc);
   bool ParseDirectiveZerofill(StringRef, SMLoc);
+  bool ParseDirectiveDataRegion(StringRef, SMLoc);
+  bool ParseDirectiveDataRegionEnd(StringRef, SMLoc);
 
   // Named Section Directive
   bool ParseSectionDirectiveConst(StringRef, SMLoc) {
@@ -659,6 +665,42 @@
   return false;
 }
 
+/// ParseDirectiveDataRegion
+///  ::= .data_region [ ( jt8 | jt16 | jt32 ) ]
+bool DarwinAsmParser::ParseDirectiveDataRegion(StringRef, SMLoc) {
+  if (getLexer().is(AsmToken::EndOfStatement)) {
+    Lex();
+    getStreamer().EmitDataRegion(MCDR_DataRegion);
+    return false;
+  }
+  StringRef RegionType;
+  SMLoc Loc = getParser().getTok().getLoc();
+  if (getParser().ParseIdentifier(RegionType))
+    return TokError("expected region type after '.data_region' directive");
+  int Kind = StringSwitch<int>(RegionType)
+    .Case("jt8", MCDR_DataRegionJT8)
+    .Case("jt16", MCDR_DataRegionJT16)
+    .Case("jt32", MCDR_DataRegionJT32)
+    .Default(-1);
+  if (Kind == -1)
+    return Error(Loc, "unknown region type in '.data_region' directive");
+  Lex();
+
+  getStreamer().EmitDataRegion((MCDataRegionType)Kind);
+  return false;
+}
+
+/// ParseDirectiveDataRegionEnd
+///  ::= .end_data_region
+bool DarwinAsmParser::ParseDirectiveDataRegionEnd(StringRef, SMLoc) {
+  if (getLexer().isNot(AsmToken::EndOfStatement))
+    return TokError("unexpected token in '.end_data_region' directive");
+
+  Lex();
+  getStreamer().EmitDataRegion(MCDR_DataRegionEnd);
+  return false;
+}
+
 namespace llvm {
 
 MCAsmParserExtension *createDarwinAsmParser() {
diff --git a/lib/MC/MCStreamer.cpp b/lib/MC/MCStreamer.cpp
index 43e62ff..e363f28 100644
--- a/lib/MC/MCStreamer.cpp
+++ b/lib/MC/MCStreamer.cpp
@@ -20,12 +20,9 @@
 #include <cstdlib>
 using namespace llvm;
 
-MCStreamer::MCStreamer(MCContext &Ctx) : Context(Ctx), EmitEHFrame(true),
-                                         EmitDebugFrame(false),
-                                         CurrentW64UnwindInfo(0),
-                                         LastSymbol(0),
-                                         UniqueCodeBeginSuffix(0),
-                                         UniqueDataBeginSuffix(0) {
+MCStreamer::MCStreamer(MCContext &Ctx)
+  : Context(Ctx), EmitEHFrame(true), EmitDebugFrame(false),
+    CurrentW64UnwindInfo(0), LastSymbol(0) {
   const MCSection *section = NULL;
   SectionStack.push_back(std::make_pair(section, section));
 }
@@ -183,85 +180,6 @@
   LastSymbol = Symbol;
 }
 
-void MCStreamer::EmitDataRegion() {
-  if (RegionIndicator == Data) return;
-
-  MCContext &Context = getContext();
-  const MCAsmInfo &MAI = Context.getAsmInfo();
-  if (!MAI.getSupportsDataRegions()) return;
-
-  // Generate a unique symbol name.
-  MCSymbol *NewSym = Context.GetOrCreateSymbol(MAI.getDataBeginLabelName() +
-                                               Twine(UniqueDataBeginSuffix++));
-  EmitLabel(NewSym);
-
-  RegionIndicator = Data;
-}
-
-void MCStreamer::EmitCodeRegion() {
-  if (RegionIndicator == Code) return;
-
-  MCContext &Context = getContext();
-  const MCAsmInfo &MAI = Context.getAsmInfo();
-  if (!MAI.getSupportsDataRegions()) return;
-
-  // Generate a unique symbol name.
-  MCSymbol *NewSym = Context.GetOrCreateSymbol(MAI.getCodeBeginLabelName() +
-                                               Twine(UniqueCodeBeginSuffix++));
-  EmitLabel(NewSym);
-
-  RegionIndicator = Code;
-}
-
-void MCStreamer::EmitJumpTable8Region() {
-  if (RegionIndicator == JumpTable8) return;
-
-  MCContext &Context = getContext();
-  const MCAsmInfo &MAI = Context.getAsmInfo();
-  if (!MAI.getSupportsDataRegions()) return;
-
-  // Generate a unique symbol name.
-  MCSymbol *NewSym =
-    Context.GetOrCreateSymbol(MAI.getJumpTable8BeginLabelName() +
-                              Twine(UniqueDataBeginSuffix++));
-  EmitLabel(NewSym);
-
-  RegionIndicator = JumpTable8;
-}
-
-void MCStreamer::EmitJumpTable16Region() {
-  if (RegionIndicator == JumpTable16) return;
-
-  MCContext &Context = getContext();
-  const MCAsmInfo &MAI = Context.getAsmInfo();
-  if (!MAI.getSupportsDataRegions()) return;
-
-  // Generate a unique symbol name.
-  MCSymbol *NewSym =
-    Context.GetOrCreateSymbol(MAI.getJumpTable16BeginLabelName() +
-                              Twine(UniqueDataBeginSuffix++));
-  EmitLabel(NewSym);
-
-  RegionIndicator = JumpTable16;
-}
-
-
-void MCStreamer::EmitJumpTable32Region() {
-  if (RegionIndicator == JumpTable32) return;
-
-  MCContext &Context = getContext();
-  const MCAsmInfo &MAI = Context.getAsmInfo();
-  if (!MAI.getSupportsDataRegions()) return;
-
-  // Generate a unique symbol name.
-  MCSymbol *NewSym =
-    Context.GetOrCreateSymbol(MAI.getJumpTable32BeginLabelName() +
-                              Twine(UniqueDataBeginSuffix++));
-  EmitLabel(NewSym);
-
-  RegionIndicator = JumpTable32;
-}
-
 void MCStreamer::EmitCompactUnwindEncoding(uint32_t CompactUnwindEncoding) {
   EnsureValidFrame();
   MCDwarfFrameInfo *CurFrame = getCurrentFrameInfo();
@@ -283,7 +201,6 @@
   EmitCFIStartProcImpl(Frame);
 
   FrameInfos.push_back(Frame);
-  RegionIndicator = Code;
 }
 
 void MCStreamer::EmitCFIStartProcImpl(MCDwarfFrameInfo &Frame) {
diff --git a/lib/MC/MachObjectWriter.cpp b/lib/MC/MachObjectWriter.cpp
index 8e4066c..5820a22 100644
--- a/lib/MC/MachObjectWriter.cpp
+++ b/lib/MC/MachObjectWriter.cpp
@@ -21,6 +21,7 @@
 #include "llvm/MC/MCMachOSymbolFlags.h"
 #include "llvm/MC/MCValue.h"
 #include "llvm/Object/MachOFormat.h"
+#include "llvm/Support/Debug.h"
 #include "llvm/Support/ErrorHandling.h"
 
 #include <vector>
@@ -351,6 +352,21 @@
     Write32(Address);
 }
 
+void MachObjectWriter::WriteLinkeditLoadCommand(uint32_t Type,
+                                                uint32_t DataOffset,
+                                                uint32_t DataSize) {
+  uint64_t Start = OS.tell();
+  (void) Start;
+
+  Write32(Type);
+  Write32(macho::LinkeditLoadCommandSize);
+  Write32(DataOffset);
+  Write32(DataSize);
+
+  assert(OS.tell() - Start == macho::LinkeditLoadCommandSize);
+}
+
+
 void MachObjectWriter::RecordRelocation(const MCAssembler &Asm,
                                         const MCAsmLayout &Layout,
                                         const MCFragment *Fragment,
@@ -654,6 +670,13 @@
                          macho::DysymtabLoadCommandSize);
   }
 
+  // Add the data-in-code load command size, if used.
+  unsigned NumDataRegions = Asm.getDataRegions().size();
+  if (NumDataRegions) {
+    ++NumLoadCommands;
+    LoadCommandsSize += macho::LinkeditLoadCommandSize;
+  }
+
   // Compute the total size of the section data, as well as its file size and vm
   // size.
   uint64_t SectionDataStart = (is64Bit() ? macho::Header64Size :
@@ -701,6 +724,15 @@
     RelocTableEnd += NumRelocs * macho::RelocationInfoSize;
   }
 
+  // Write the data-in-code load command, if used.
+  uint64_t DataInCodeTableEnd = RelocTableEnd + NumDataRegions * 8;
+  if (NumDataRegions) {
+    uint64_t DataRegionsOffset = RelocTableEnd;
+    uint64_t DataRegionsSize = NumDataRegions * 8;
+    WriteLinkeditLoadCommand(macho::LCT_DataInCode, DataRegionsOffset,
+                             DataRegionsSize);
+  }
+
   // Write the symbol table load command, if used.
   if (NumSymbols) {
     unsigned FirstLocalSymbol = 0;
@@ -717,10 +749,10 @@
 
     // If used, the indirect symbols are written after the section data.
     if (NumIndirectSymbols)
-      IndirectSymbolOffset = RelocTableEnd;
+      IndirectSymbolOffset = DataInCodeTableEnd;
 
     // The symbol table is written after the indirect symbol data.
-    uint64_t SymbolTableOffset = RelocTableEnd + IndirectSymbolSize;
+    uint64_t SymbolTableOffset = DataInCodeTableEnd + IndirectSymbolSize;
 
     // The string table is written after symbol table.
     uint64_t StringTableOffset =
@@ -760,6 +792,23 @@
     }
   }
 
+  // Write out the data-in-code region payload, if there is one.
+  for (MCAssembler::const_data_region_iterator
+         it = Asm.data_region_begin(), ie = Asm.data_region_end();
+         it != ie; ++it) {
+    const DataRegionData *Data = &(*it);
+    uint64_t Start = getSymbolAddress(&Layout.getAssembler().getSymbolData(*Data->Start), Layout);
+    uint64_t End = getSymbolAddress(&Layout.getAssembler().getSymbolData(*Data->End), Layout);
+    DEBUG(dbgs() << "data in code region-- kind: " << Data->Kind
+                 << "  start: " << Start << "(" << Data->Start->getName() << ")"
+                 << "  end: " << End << "(" << Data->End->getName() << ")"
+                 << "  size: " << End - Start
+                 << "\n");
+    Write32(Start);
+    Write16(End - Start);
+    Write16(Data->Kind);
+  }
+
   // Write the symbol table data, if used.
   if (NumSymbols) {
     // Write the indirect symbol entries.