[ThinLTO] Print module summary index to assembly Summary: Implements AsmWriter support for printing the module summary index to assembly with the format discussed in the RFC "LLVM Assembly format for ThinLTO Summary". Implements just enough of the parsing support to recognize and ignore the summary entries. As agreed in the RFC thread, this will be the behavior when assembling the IR. A follow on change will implement parsing/assembling of the summary entries for use by tools that currently build the summary index from bitcode. Reviewers: dexonsmith, pcc Subscribers: inglorion, eraman, steven_wu, dblaikie, llvm-commits Differential Revision: https://reviews.llvm.org/D46699 llvm-svn: 333335

commit: 08d5b4ef0d0839761e3b7546efcd3ee0454742ea [log] [tgz]
author: Teresa Johnson <tejohnson@google.com> Sat May 26 02:34:13 2018 +0000
committer: Teresa Johnson <tejohnson@google.com> Sat May 26 02:34:13 2018 +0000
tree: 1962452dfea56ccff172d7612ec1a8c982de27e2
parent: ffebfe10c1cfc2eae92ddd0dc1b212595ac47864 [diff]
diff --git a/llvm/lib/AsmParser/LLLexer.cpp b/llvm/lib/AsmParser/LLLexer.cpp
index 54c4c59..640b825 100644
--- a/llvm/lib/AsmParser/LLLexer.cpp
+++ b/llvm/lib/AsmParser/LLLexer.cpp

@@ -219,6 +219,8 @@
       SkipLineComment();
       continue;
     case '!': return LexExclaim();
+    case '^':
+      return LexCaret();
     case '#': return LexHash();
     case '0': case '1': case '2': case '3': case '4':
     case '5': case '6': case '7': case '8': case '9':
@@ -328,6 +330,22 @@
   return false;
 }
 
+// Lex an ID: [0-9]+. On success, the ID is stored in UIntVal and Token is
+// returned, otherwise the Error token is returned.
+lltok::Kind LLLexer::LexUIntID(lltok::Kind Token) {
+  if (!isdigit(static_cast<unsigned char>(CurPtr[0])))
+    return lltok::Error;
+
+  for (++CurPtr; isdigit(static_cast<unsigned char>(CurPtr[0])); ++CurPtr)
+    /*empty*/;
+
+  uint64_t Val = atoull(TokStart + 1, CurPtr);
+  if ((unsigned)Val != Val)
+    Error("invalid value number (too large)!");
+  UIntVal = unsigned(Val);
+  return Token;
+}
+
 lltok::Kind LLLexer::LexVar(lltok::Kind Var, lltok::Kind VarID) {
   // Handle StringConstant: \"[^\"]*\"
   if (CurPtr[0] == '"') {
@@ -357,17 +375,7 @@
     return Var;
 
   // Handle VarID: [0-9]+
-  if (isdigit(static_cast<unsigned char>(CurPtr[0]))) {
-    for (++CurPtr; isdigit(static_cast<unsigned char>(CurPtr[0])); ++CurPtr)
-      /*empty*/;
-
-    uint64_t Val = atoull(TokStart+1, CurPtr);
-    if ((unsigned)Val != Val)
-      Error("invalid value number (too large)!");
-    UIntVal = unsigned(Val);
-    return VarID;
-  }
-  return lltok::Error;
+  return LexUIntID(VarID);
 }
 
 /// Lex all tokens that start with a % character.
@@ -420,22 +428,18 @@
   return lltok::exclaim;
 }
 
+/// Lex all tokens that start with a ^ character.
+///    SummaryID ::= ^[0-9]+
+lltok::Kind LLLexer::LexCaret() {
+  // Handle SummaryID: ^[0-9]+
+  return LexUIntID(lltok::SummaryID);
+}
+
 /// Lex all tokens that start with a # character.
 ///    AttrGrpID ::= #[0-9]+
 lltok::Kind LLLexer::LexHash() {
   // Handle AttrGrpID: #[0-9]+
-  if (isdigit(static_cast<unsigned char>(CurPtr[0]))) {
-    for (++CurPtr; isdigit(static_cast<unsigned char>(CurPtr[0])); ++CurPtr)
-      /*empty*/;
-
-    uint64_t Val = atoull(TokStart+1, CurPtr);
-    if ((unsigned)Val != Val)
-      Error("invalid value number (too large)!");
-    UIntVal = unsigned(Val);
-    return lltok::AttrGrpID;
-  }
-
-  return lltok::Error;
+  return LexUIntID(lltok::AttrGrpID);
 }
 
 /// Lex a label, integer type, keyword, or hexadecimal integer constant.

diff --git a/llvm/lib/AsmParser/LLLexer.h b/llvm/lib/AsmParser/LLLexer.h
index 90bf17d..48d1e94 100644
--- a/llvm/lib/AsmParser/LLLexer.h
+++ b/llvm/lib/AsmParser/LLLexer.h

@@ -81,10 +81,12 @@
     lltok::Kind LexDollar();
     lltok::Kind LexExclaim();
     lltok::Kind LexPercent();
+    lltok::Kind LexUIntID(lltok::Kind Token);
     lltok::Kind LexVar(lltok::Kind Var, lltok::Kind VarID);
     lltok::Kind LexQuote();
     lltok::Kind Lex0x();
     lltok::Kind LexHash();
+    lltok::Kind LexCaret();
 
     uint64_t atoull(const char *Buffer, const char *End);
     uint64_t HexIntToVal(const char *Buffer, const char *End);

diff --git a/llvm/lib/AsmParser/LLParser.cpp b/llvm/lib/AsmParser/LLParser.cpp
index afdf542..0d478d4 100644
--- a/llvm/lib/AsmParser/LLParser.cpp
+++ b/llvm/lib/AsmParser/LLParser.cpp

@@ -282,6 +282,10 @@
     case lltok::GlobalVar:  if (ParseNamedGlobal()) return true; break;
     case lltok::ComdatVar:  if (parseComdat()) return true; break;
     case lltok::exclaim:    if (ParseStandaloneMetadata()) return true; break;
+    case lltok::SummaryID:
+      if (ParseSummaryEntry())
+        return true;
+      break;
     case lltok::MetadataVar:if (ParseNamedMetadata()) return true; break;
     case lltok::kw_attributes: if (ParseUnnamedAttrGrp()) return true; break;
     case lltok::kw_uselistorder: if (ParseUseListOrder()) return true; break;
@@ -711,6 +715,55 @@
   return false;
 }
 
+// Skips a single module summary entry.
+bool LLParser::SkipModuleSummaryEntry() {
+  // Each module summary entry consists of a tag for the entry
+  // type, followed by a colon, then the fields surrounded by nested sets of
+  // parentheses. The "tag:" looks like a Label. Once parsing support is
+  // in place we will look for the tokens corresponding to the expected tags.
+  if (ParseToken(lltok::LabelStr,
+                 "expected 'label' at start of summary entry") ||
+      ParseToken(lltok::lparen, "expected '(' at start of summary entry"))
+    return true;
+  // Now walk through the parenthesized entry, until the number of open
+  // parentheses goes back down to 0 (the first '(' was parsed above).
+  unsigned NumOpenParen = 1;
+  do {
+    switch (Lex.getKind()) {
+    case lltok::lparen:
+      NumOpenParen++;
+      break;
+    case lltok::rparen:
+      NumOpenParen--;
+      break;
+    case lltok::Eof:
+      return TokError("found end of file while parsing summary entry");
+    default:
+      // Skip everything in between parentheses.
+      break;
+    }
+    Lex.Lex();
+  } while (NumOpenParen > 0);
+  return false;
+}
+
+/// ParseSummaryEntry:
+///   ::= SummaryID '=' ...
+bool LLParser::ParseSummaryEntry() {
+  assert(Lex.getKind() == lltok::SummaryID);
+  // unsigned SummaryID = Lex.getUIntVal();
+
+  Lex.Lex();
+  if (ParseToken(lltok::equal, "expected '=' here"))
+    return true;
+
+  // TODO: Support parsing into a ModuleSummaryIndex object saved in
+  // the LLParser. For now, skip the summary entry.
+  if (SkipModuleSummaryEntry())
+    return true;
+  return false;
+}
+
 static bool isValidVisibilityForLinkage(unsigned V, unsigned L) {
   return !GlobalValue::isLocalLinkage((GlobalValue::LinkageTypes)L) ||
          (GlobalValue::VisibilityTypes)V == GlobalValue::DefaultVisibility;

diff --git a/llvm/lib/AsmParser/LLParser.h b/llvm/lib/AsmParser/LLParser.h
index 79428d4..bead02a 100644
--- a/llvm/lib/AsmParser/LLParser.h
+++ b/llvm/lib/AsmParser/LLParser.h

@@ -312,6 +312,8 @@
     bool ParseFnAttributeValuePairs(AttrBuilder &B,
                                     std::vector<unsigned> &FwdRefAttrGrps,
                                     bool inAttrGrp, LocTy &BuiltinLoc);
+    bool SkipModuleSummaryEntry();
+    bool ParseSummaryEntry();
 
     // Type Parsing.
     bool ParseType(Type *&Result, const Twine &Msg, bool AllowVoid = false);

diff --git a/llvm/lib/AsmParser/LLToken.h b/llvm/lib/AsmParser/LLToken.h
index 5e05f54..b784b02 100644
--- a/llvm/lib/AsmParser/LLToken.h
+++ b/llvm/lib/AsmParser/LLToken.h

@@ -351,6 +351,7 @@
   GlobalID,   // @42
   LocalVarID, // %42
   AttrGrpID,  // #42
+  SummaryID,  // ^42
 
   // String valued tokens (StrVal).
   LabelStr,         // foo:
commit	08d5b4ef0d0839761e3b7546efcd3ee0454742ea	[log] [tgz]
author	Teresa Johnson <tejohnson@google.com>	Sat May 26 02:34:13 2018 +0000
committer	Teresa Johnson <tejohnson@google.com>	Sat May 26 02:34:13 2018 +0000
tree	1962452dfea56ccff172d7612ec1a8c982de27e2
parent	ffebfe10c1cfc2eae92ddd0dc1b212595ac47864 [diff]