Use assembler expressions to lay out the EH LSDA.

Rely on the assembler to finalize the layout of the DWARF/Itanium
exception-handling LSDA. Rather than calculate the exact size of each
thing in the LSDA, use assembler directives:

    To emit the offset to the TTBase label:

.uleb128 .Lttbase0-.Lttbaseref0
.Lttbaseref0:

    To emit the size of the call site table:

.uleb128 .Lcst_end0-.Lcst_begin0
.Lcst_begin0:
... call site table entries ...
.Lcst_end0:

    To align the type info table:

... action table ...
.balign 4
.long _ZTIi
.long _ZTIl
.Lttbase0:

Using assembler directives simplifies the compiler and allows switching
the encoding of offsets in the call site table from udata4 to uleb128 for
a large code size savings. (This commit does not change the encoding.)

The combination of the uleb128 followed by a balign creates an unfortunate
dependency cycle that the assembler must sometimes resolve either by
padding an LEB or by inserting zero padding before the type table. See
PR35809 or GNU as bug 4029.

Patch by Ryan Prichard!

llvm-svn: 324749
diff --git a/llvm/lib/CodeGen/AsmPrinter/ARMException.cpp b/llvm/lib/CodeGen/AsmPrinter/ARMException.cpp
index 15cfbd5..9011f02 100644
--- a/llvm/lib/CodeGen/AsmPrinter/ARMException.cpp
+++ b/llvm/lib/CodeGen/AsmPrinter/ARMException.cpp
@@ -91,7 +91,8 @@
     ATS.emitFnEnd();
 }
 
-void ARMException::emitTypeInfos(unsigned TTypeEncoding) {
+void ARMException::emitTypeInfos(unsigned TTypeEncoding,
+                                 MCSymbol *TTBaseLabel) {
   const MachineFunction *MF = Asm->MF;
   const std::vector<const GlobalValue *> &TypeInfos = MF->getTypeInfos();
   const std::vector<unsigned> &FilterIds = MF->getFilterIds();
@@ -112,6 +113,8 @@
     Asm->EmitTTypeReference(GV, TTypeEncoding);
   }
 
+  Asm->OutStreamer->EmitLabel(TTBaseLabel);
+
   // Emit the Exception Specifications.
   if (VerboseAsm && !FilterIds.empty()) {
     Asm->OutStreamer->AddComment(">> Filter TypeInfos <<");
diff --git a/llvm/lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp b/llvm/lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp
index e6e8871..a1d6f3c 100644
--- a/llvm/lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp
+++ b/llvm/lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp
@@ -43,15 +43,6 @@
   OutStreamer->EmitSLEB128IntValue(Value);
 }
 
-/// EmitULEB128 - emit the specified unsigned leb128 value.
-void AsmPrinter::EmitPaddedULEB128(uint64_t Value, unsigned PadTo,
-                                   const char *Desc) const {
-  if (isVerbose() && Desc)
-    OutStreamer->AddComment(Desc);
-
-  OutStreamer->EmitPaddedULEB128IntValue(Value, PadTo);
-}
-
 void AsmPrinter::EmitULEB128(uint64_t Value, const char *Desc) const {
   if (isVerbose() && Desc)
     OutStreamer->AddComment(Desc);
@@ -59,6 +50,12 @@
   OutStreamer->EmitULEB128IntValue(Value);
 }
 
+/// Emit something like ".uleb128 Hi-Lo".
+void AsmPrinter::EmitLabelDifferenceAsULEB128(const MCSymbol *Hi,
+                                              const MCSymbol *Lo) const {
+  OutStreamer->emitAbsoluteSymbolDiffAsULEB128(Hi, Lo);
+}
+
 static const char *DecodeDWARFEncoding(unsigned Encoding) {
   switch (Encoding) {
   case dwarf::DW_EH_PE_absptr:
@@ -67,6 +64,10 @@
     return "omit";
   case dwarf::DW_EH_PE_pcrel:
     return "pcrel";
+  case dwarf::DW_EH_PE_uleb128:
+    return "uleb128";
+  case dwarf::DW_EH_PE_sleb128:
+    return "sleb128";
   case dwarf::DW_EH_PE_udata4:
     return "udata4";
   case dwarf::DW_EH_PE_udata8:
diff --git a/llvm/lib/CodeGen/AsmPrinter/DwarfException.h b/llvm/lib/CodeGen/AsmPrinter/DwarfException.h
index 80d5bd2..b57ea8f 100644
--- a/llvm/lib/CodeGen/AsmPrinter/DwarfException.h
+++ b/llvm/lib/CodeGen/AsmPrinter/DwarfException.h
@@ -70,7 +70,7 @@
 };
 
 class LLVM_LIBRARY_VISIBILITY ARMException : public DwarfCFIExceptionBase {
-  void emitTypeInfos(unsigned TTypeEncoding) override;
+  void emitTypeInfos(unsigned TTypeEncoding, MCSymbol *TTBaseLabel) override;
   ARMTargetStreamer &getTargetStreamer();
 
 public:
diff --git a/llvm/lib/CodeGen/AsmPrinter/EHStreamer.cpp b/llvm/lib/CodeGen/AsmPrinter/EHStreamer.cpp
index 871699a..d9db783 100644
--- a/llvm/lib/CodeGen/AsmPrinter/EHStreamer.cpp
+++ b/llvm/lib/CodeGen/AsmPrinter/EHStreamer.cpp
@@ -58,10 +58,10 @@
 
 /// Compute the actions table and gather the first action index for each landing
 /// pad site.
-unsigned EHStreamer::
-computeActionsTable(const SmallVectorImpl<const LandingPadInfo*> &LandingPads,
-                    SmallVectorImpl<ActionEntry> &Actions,
-                    SmallVectorImpl<unsigned> &FirstActions) {
+void EHStreamer::computeActionsTable(
+    const SmallVectorImpl<const LandingPadInfo *> &LandingPads,
+    SmallVectorImpl<ActionEntry> &Actions,
+    SmallVectorImpl<unsigned> &FirstActions) {
   // The action table follows the call-site table in the LSDA. The individual
   // records are of two types:
   //
@@ -161,8 +161,6 @@
 
     PrevLPI = LPI;
   }
-
-  return SizeActions;
 }
 
 /// Return `true' if this is a call to a function marked `nounwind'. Return
@@ -369,47 +367,23 @@
   // landing pad site.
   SmallVector<ActionEntry, 32> Actions;
   SmallVector<unsigned, 64> FirstActions;
-  unsigned SizeActions =
-    computeActionsTable(LandingPads, Actions, FirstActions);
+  computeActionsTable(LandingPads, Actions, FirstActions);
 
   // Compute the call-site table.
   SmallVector<CallSiteEntry, 64> CallSites;
   computeCallSiteTable(CallSites, LandingPads, FirstActions);
 
-  // Final tallies.
-
-  // Call sites.
   bool IsSJLJ = Asm->MAI->getExceptionHandlingType() == ExceptionHandling::SjLj;
   bool HaveTTData = IsSJLJ ? (!TypeInfos.empty() || !FilterIds.empty()) : true;
 
-  unsigned CallSiteTableLength;
-  if (IsSJLJ)
-    CallSiteTableLength = 0;
-  else {
-    unsigned SiteStartSize  = 4; // dwarf::DW_EH_PE_udata4
-    unsigned SiteLengthSize = 4; // dwarf::DW_EH_PE_udata4
-    unsigned LandingPadSize = 4; // dwarf::DW_EH_PE_udata4
-    CallSiteTableLength =
-      CallSites.size() * (SiteStartSize + SiteLengthSize + LandingPadSize);
-  }
-
-  for (unsigned i = 0, e = CallSites.size(); i < e; ++i) {
-    CallSiteTableLength += getULEB128Size(CallSites[i].Action);
-    if (IsSJLJ)
-      CallSiteTableLength += getULEB128Size(i);
-  }
-
   // Type infos.
   MCSection *LSDASection = Asm->getObjFileLowering().getLSDASection();
   unsigned TTypeEncoding;
-  unsigned TypeFormatSize;
 
   if (!HaveTTData) {
     // For SjLj exceptions, if there is no TypeInfo, then we just explicitly say
     // that we're omitting that bit.
     TTypeEncoding = dwarf::DW_EH_PE_omit;
-    // dwarf::DW_EH_PE_absptr
-    TypeFormatSize = Asm->getDataLayout().getPointerSize();
   } else {
     // Okay, we have actual filters or typeinfos to emit.  As such, we need to
     // pick a type encoding for them.  We're about to emit a list of pointers to
@@ -439,7 +413,6 @@
     // in target-independent code.
     //
     TTypeEncoding = Asm->getObjFileLowering().getTTypeEncoding();
-    TypeFormatSize = Asm->GetSizeOfEncodedValue(TTypeEncoding);
   }
 
   // Begin the exception table.
@@ -460,64 +433,35 @@
   Asm->EmitEncodingByte(dwarf::DW_EH_PE_omit, "@LPStart");
   Asm->EmitEncodingByte(TTypeEncoding, "@TType");
 
-  // The type infos need to be aligned. GCC does this by inserting padding just
-  // before the type infos. However, this changes the size of the exception
-  // table, so you need to take this into account when you output the exception
-  // table size. However, the size is output using a variable length encoding.
-  // So by increasing the size by inserting padding, you may increase the number
-  // of bytes used for writing the size. If it increases, say by one byte, then
-  // you now need to output one less byte of padding to get the type infos
-  // aligned. However this decreases the size of the exception table. This
-  // changes the value you have to output for the exception table size. Due to
-  // the variable length encoding, the number of bytes used for writing the
-  // length may decrease. If so, you then have to increase the amount of
-  // padding. And so on. If you look carefully at the GCC code you will see that
-  // it indeed does this in a loop, going on and on until the values stabilize.
-  // We chose another solution: don't output padding inside the table like GCC
-  // does, instead output it before the table.
-  unsigned SizeTypes = TypeInfos.size() * TypeFormatSize;
-  unsigned CallSiteTableLengthSize = getULEB128Size(CallSiteTableLength);
-  unsigned TTypeBaseOffset =
-    sizeof(int8_t) +                            // Call site format
-    CallSiteTableLengthSize +                   // Call site table length size
-    CallSiteTableLength +                       // Call site table length
-    SizeActions +                               // Actions size
-    SizeTypes;
-  unsigned TTypeBaseOffsetSize = getULEB128Size(TTypeBaseOffset);
-  unsigned TotalSize =
-    sizeof(int8_t) +                            // LPStart format
-    sizeof(int8_t) +                            // TType format
-    (HaveTTData ? TTypeBaseOffsetSize : 0) +    // TType base offset size
-    TTypeBaseOffset;                            // TType base offset
-  unsigned PadBytes = (4 - TotalSize) & 3;
-
+  MCSymbol *TTBaseLabel = nullptr;
   if (HaveTTData) {
-    // Account for any extra padding that will be added to the call site table
-    // length.
-    Asm->EmitPaddedULEB128(TTypeBaseOffset, TTypeBaseOffsetSize + PadBytes,
-                           "@TType base offset");
-    PadBytes = 0;
+    // N.B.: There is a dependency loop between the size of the TTBase uleb128
+    // here and the amount of padding before the aligned type table. The
+    // assembler must sometimes pad this uleb128 or insert extra padding before
+    // the type table. See PR35809 or GNU as bug 4029.
+    MCSymbol *TTBaseRefLabel = Asm->createTempSymbol("ttbaseref");
+    TTBaseLabel = Asm->createTempSymbol("ttbase");
+    Asm->EmitLabelDifferenceAsULEB128(TTBaseLabel, TTBaseRefLabel);
+    Asm->OutStreamer->EmitLabel(TTBaseRefLabel);
   }
 
   bool VerboseAsm = Asm->OutStreamer->isVerboseAsm();
 
+  // Emit the landing pad call site table.
+  MCSymbol *CstBeginLabel = Asm->createTempSymbol("cst_begin");
+  MCSymbol *CstEndLabel = Asm->createTempSymbol("cst_end");
+  Asm->EmitEncodingByte(dwarf::DW_EH_PE_udata4, "Call site");
+  Asm->EmitLabelDifferenceAsULEB128(CstEndLabel, CstBeginLabel);
+  Asm->OutStreamer->EmitLabel(CstBeginLabel);
+
   // SjLj Exception handling
   if (IsSJLJ) {
-    Asm->EmitEncodingByte(dwarf::DW_EH_PE_udata4, "Call site");
-
-    // Add extra padding if it wasn't added to the TType base offset.
-    Asm->EmitPaddedULEB128(CallSiteTableLength,
-                           CallSiteTableLengthSize + PadBytes,
-                           "Call site table length");
-
-    // Emit the landing pad site information.
     unsigned idx = 0;
     for (SmallVectorImpl<CallSiteEntry>::const_iterator
          I = CallSites.begin(), E = CallSites.end(); I != E; ++I, ++idx) {
       const CallSiteEntry &S = *I;
 
-      // Offset of the landing pad, counted in 16-byte bundles relative to the
-      // @LPStart address.
+      // Index of the call site entry.
       if (VerboseAsm) {
         Asm->OutStreamer->AddComment(">> Call Site " + Twine(idx) + " <<");
         Asm->OutStreamer->AddComment("  On exception at call site "+Twine(idx));
@@ -557,14 +501,6 @@
     // A missing entry in the call-site table indicates that a call is not
     // supposed to throw.
 
-    // Emit the landing pad call site table.
-    Asm->EmitEncodingByte(dwarf::DW_EH_PE_udata4, "Call site");
-
-    // Add extra padding if it wasn't added to the TType base offset.
-    Asm->EmitPaddedULEB128(CallSiteTableLength,
-                           CallSiteTableLengthSize + PadBytes,
-                           "Call site table length");
-
     unsigned Entry = 0;
     for (SmallVectorImpl<CallSiteEntry>::const_iterator
          I = CallSites.begin(), E = CallSites.end(); I != E; ++I) {
@@ -579,9 +515,7 @@
       if (!EndLabel)
         EndLabel = Asm->getFunctionEnd();
 
-      // Offset of the call site relative to the previous call site, counted in
-      // number of 16-byte bundles. The first call site is counted relative to
-      // the start of the procedure fragment.
+      // Offset of the call site relative to the start of the procedure.
       if (VerboseAsm)
         Asm->OutStreamer->AddComment(">> Call Site " + Twine(++Entry) + " <<");
       Asm->EmitLabelDifference(BeginLabel, EHFuncBeginSym, 4);
@@ -591,8 +525,7 @@
                                      EndLabel->getName());
       Asm->EmitLabelDifference(EndLabel, BeginLabel, 4);
 
-      // Offset of the landing pad, counted in 16-byte bundles relative to the
-      // @LPStart address.
+      // Offset of the landing pad relative to the start of the procedure.
       if (!S.LPad) {
         if (VerboseAsm)
           Asm->OutStreamer->AddComment("    has no landing pad");
@@ -617,6 +550,7 @@
       Asm->EmitULEB128(S.Action);
     }
   }
+  Asm->OutStreamer->EmitLabel(CstEndLabel);
 
   // Emit the Action Table.
   int Entry = 0;
@@ -660,12 +594,15 @@
     Asm->EmitSLEB128(Action.NextAction);
   }
 
-  emitTypeInfos(TTypeEncoding);
+  if (HaveTTData) {
+    Asm->EmitAlignment(2);
+    emitTypeInfos(TTypeEncoding, TTBaseLabel);
+  }
 
   Asm->EmitAlignment(2);
 }
 
-void EHStreamer::emitTypeInfos(unsigned TTypeEncoding) {
+void EHStreamer::emitTypeInfos(unsigned TTypeEncoding, MCSymbol *TTBaseLabel) {
   const MachineFunction *MF = Asm->MF;
   const std::vector<const GlobalValue *> &TypeInfos = MF->getTypeInfos();
   const std::vector<unsigned> &FilterIds = MF->getFilterIds();
@@ -687,6 +624,8 @@
     Asm->EmitTTypeReference(GV, TTypeEncoding);
   }
 
+  Asm->OutStreamer->EmitLabel(TTBaseLabel);
+
   // Emit the Exception Specifications.
   if (VerboseAsm && !FilterIds.empty()) {
     Asm->OutStreamer->AddComment(">> Filter TypeInfos <<");
diff --git a/llvm/lib/CodeGen/AsmPrinter/EHStreamer.h b/llvm/lib/CodeGen/AsmPrinter/EHStreamer.h
index 28bceab..ab3bd5d 100644
--- a/llvm/lib/CodeGen/AsmPrinter/EHStreamer.h
+++ b/llvm/lib/CodeGen/AsmPrinter/EHStreamer.h
@@ -72,9 +72,9 @@
 
   /// Compute the actions table and gather the first action index for each
   /// landing pad site.
-  unsigned computeActionsTable(const SmallVectorImpl<const LandingPadInfo*>&LPs,
-                               SmallVectorImpl<ActionEntry> &Actions,
-                               SmallVectorImpl<unsigned> &FirstActions);
+  void computeActionsTable(const SmallVectorImpl<const LandingPadInfo *> &LPs,
+                           SmallVectorImpl<ActionEntry> &Actions,
+                           SmallVectorImpl<unsigned> &FirstActions);
 
   void computePadMap(const SmallVectorImpl<const LandingPadInfo *> &LandingPads,
                      RangeMapType &PadMap);
@@ -110,7 +110,7 @@
   ///     catches in the function.  This tables is reversed indexed base 1.
   void emitExceptionTable();
 
-  virtual void emitTypeInfos(unsigned TTypeEncoding);
+  virtual void emitTypeInfos(unsigned TTypeEncoding, MCSymbol *TTBaseLabel);
 
   // Helpers for identifying what kind of clause an EH typeid or selector
   // corresponds to. Negative selectors are for filter clauses, the zero