Switch the clang-to-CIndex interface for code-completion to a binary format, for a massive speedup

git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@90209 91177308-0d34-0410-b5e6-96231b3b80d8
diff --git a/lib/Frontend/CompilerInstance.cpp b/lib/Frontend/CompilerInstance.cpp
index 1cb5993..1083d5e 100644
--- a/lib/Frontend/CompilerInstance.cpp
+++ b/lib/Frontend/CompilerInstance.cpp
@@ -29,6 +29,7 @@
 #include "llvm/Support/raw_ostream.h"
 #include "llvm/Support/Timer.h"
 #include "llvm/System/Path.h"
+#include "llvm/System/Program.h"
 using namespace clang;
 
 CompilerInstance::CompilerInstance(llvm::LLVMContext *_LLVMContext,
@@ -256,6 +257,12 @@
                                  getFrontendOpts().DebugCodeCompletionPrinter,
                                  getFrontendOpts().ShowMacrosInCodeCompletion,
                                  llvm::outs()));
+
+  if (CompletionConsumer->isOutputBinary() &&
+      llvm::sys::Program::ChangeStdoutToBinary()) {
+    getPreprocessor().getDiagnostics().Report(diag::err_fe_stdout_binary);
+    CompletionConsumer.reset();
+  }
 }
 
 void CompilerInstance::createFrontendTimer() {
diff --git a/lib/Sema/CodeCompleteConsumer.cpp b/lib/Sema/CodeCompleteConsumer.cpp
index b0a7a66..91b16d3 100644
--- a/lib/Sema/CodeCompleteConsumer.cpp
+++ b/lib/Sema/CodeCompleteConsumer.cpp
@@ -14,6 +14,7 @@
 #include "clang/AST/DeclCXX.h"
 #include "clang/Parse/Scope.h"
 #include "clang/Lex/Preprocessor.h"
+#include "clang-c/Index.h"
 #include "Sema.h"
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/StringSwitch.h"
@@ -209,306 +210,111 @@
   return Result;
 }
 
-namespace {
-  // Escape a string for XML-like formatting.
-  struct EscapedString {
-    EscapedString(llvm::StringRef Str) : Str(Str) { }
-    
-    llvm::StringRef Str;
-  };
-  
-  llvm::raw_ostream &operator<<(llvm::raw_ostream &OS, EscapedString EStr) {
-    llvm::StringRef Str = EStr.Str;
-    while (!Str.empty()) {
-      // Find the next escaped character.
-      llvm::StringRef::size_type Pos = Str.find_first_of("<>&\"'");
-      
-      // Print everything before that escaped character.
-      OS << Str.substr(0, Pos);
+static void WriteUnsigned(llvm::raw_ostream &OS, unsigned Value) {
+  OS.write((const char *)&Value, sizeof(unsigned));
+}
 
-      // If we didn't find any escaped characters, we're done.
-      if (Pos == llvm::StringRef::npos)
-        break;
-      
-      // Print the appropriate escape sequence.
-      switch (Str[Pos]) {
-        case '<': OS << "&lt;"; break;
-        case '>': OS << "&gt;"; break;
-        case '&': OS << "&amp;"; break;
-        case '"': OS << "&quot;"; break;
-        case '\'': OS << "&apos;"; break;
-      }
-      
-      // Remove everything up to and including that escaped character.
-      Str = Str.substr(Pos + 1);
-    }
-    
-    return OS;
-  }
-  
-  /// \brief Remove XML-like escaping from a string.
-  std::string UnescapeString(llvm::StringRef Str) {
-    using llvm::StringRef;
-    
-    std::string Result;
-    llvm::raw_string_ostream OS(Result);
-    
-    while (!Str.empty()) {
-      StringRef::size_type Amp = Str.find('&');
-      OS << Str.substr(0, Amp);
-      
-      if (Amp == StringRef::npos)
-        break;
-      
-      StringRef::size_type Semi = Str.substr(Amp).find(';');
-      if (Semi == StringRef::npos) {
-        // Malformed input; do the best we can.
-        OS << '&';
-        Str = Str.substr(Amp + 1);
-        continue;
-      }
-      
-      char Unescaped = llvm::StringSwitch<char>(Str.substr(Amp + 1, Semi - 1))
-        .Case("lt", '<')
-        .Case("gt", '>')
-        .Case("amp", '&')
-        .Case("quot", '"')
-        .Case("apos", '\'')
-        .Default('\0');
-      
-      if (Unescaped)
-        OS << Unescaped;
-      else
-        OS << Str.substr(Amp, Semi + 1);
-      Str = Str.substr(Amp + Semi + 1);
-    }
-    
-    return OS.str();
-  }
+static bool ReadUnsigned(const char *&Memory, const char *MemoryEnd,
+                         unsigned &Value) {
+  if (Memory + sizeof(unsigned) > MemoryEnd)
+    return true;
+
+  memmove(&Value, Memory, sizeof(unsigned));
+  Memory += sizeof(unsigned);
+  return false;
 }
 
 void CodeCompletionString::Serialize(llvm::raw_ostream &OS) const {
+  // Write the number of chunks.
+  WriteUnsigned(OS, size());
+
   for (iterator C = begin(), CEnd = end(); C != CEnd; ++C) {
+    WriteUnsigned(OS, C->Kind);
+
     switch (C->Kind) {
     case CK_TypedText:
-      OS << "<typed-text>" << EscapedString(C->Text) << "</>";
-      break;
     case CK_Text:
-      OS << "<text>" << EscapedString(C->Text) << "</>";
-      break;
-    case CK_Optional:
-      OS << "<optional>";
-      C->Optional->Serialize(OS);
-      OS << "</>";
-      break;
     case CK_Placeholder:
-      OS << "<placeholder>" << EscapedString(C->Text) << "</>";
-      break;
     case CK_Informative:
-      OS << "<informative>" << EscapedString(C->Text) << "</>";
+    case CK_CurrentParameter: {
+      const char *Text = C->Text;
+      unsigned StrLen = strlen(Text);
+      WriteUnsigned(OS, StrLen);
+      OS.write(Text, StrLen);
       break;
-    case CK_CurrentParameter:
-      OS << "<current-parameter>" << EscapedString(C->Text) << "</>";
+    }
+
+    case CK_Optional:
+      C->Optional->Serialize(OS);
       break;
+
     case CK_LeftParen:
-      OS << "<lparen/>";
-      break;
     case CK_RightParen:
-      OS << "<rparen/>";
-      break;
     case CK_LeftBracket:
-      OS << "<lbracket/>";
-      break;
     case CK_RightBracket:
-      OS << "<rbracket/>";
-      break;
     case CK_LeftBrace:
-      OS << "<lbrace/>";
-      break;
     case CK_RightBrace:
-      OS << "<rbrace/>";
-      break;
     case CK_LeftAngle:
-      OS << "<langle/>";
-      break;
     case CK_RightAngle:
-      OS << "<rangle/>";
-      break;
     case CK_Comma:
-      OS << "<comma/>";
       break;
-    }  
+    }
   }
 }
 
-/// \brief Parse the next XML-ish tag of the form <blah>.
-///
-/// \param Str the string in which we're looking for the next tag.
-///
-/// \param TagPos if successful, will be set to the start of the tag we found.
-///
-/// \param Standalone will indicate whether this is a "standalone" tag that
-/// has no associated data, e.g., <comma/>.
-///
-/// \param Terminator will indicate whether this is a terminating tag (that is
-/// or starts with '/').
-///
-/// \returns the tag itself, without the angle brackets.
-static llvm::StringRef ParseNextTag(llvm::StringRef Str, 
-                                    llvm::StringRef::size_type &StartTag,
-                                    llvm::StringRef::size_type &AfterTag,
-                                    bool &Standalone, bool &Terminator) {
-  using llvm::StringRef;
-  
-  Standalone = false;
-  Terminator = false;
-  AfterTag = StringRef::npos;
-  
-  // Find the starting '<'. 
-  StartTag = Str.find('<');
-  if (StartTag == StringRef::npos)
-    return llvm::StringRef();
-  
-  // Find the corresponding '>'.
-  llvm::StringRef::size_type EndTag = Str.substr(StartTag).find('>');
-  if (EndTag == StringRef::npos)
-    return llvm::StringRef();
-  AfterTag = StartTag + EndTag + 1;
-  
-  // Determine whether this is a terminating tag.
-  if (Str[StartTag + 1] == '/') {
-    Terminator = true;
-    Str = Str.substr(1);
-    --EndTag;
-  }
-  
-  // Determine whether this is a standalone tag.
-  if (!Terminator && Str[StartTag + EndTag - 1] == '/') {
-    Standalone = true;
-    if (EndTag > 1)
-      --EndTag;
-  }
+CodeCompletionString *CodeCompletionString::Deserialize(const char *&Str,
+                                                        const char *StrEnd) {
+  if (Str == StrEnd || *Str == 0)
+    return 0;
 
-  return Str.substr(StartTag + 1, EndTag - 1);
-}
-
-CodeCompletionString *CodeCompletionString::Deserialize(llvm::StringRef &Str) {
-  using llvm::StringRef;
-  
   CodeCompletionString *Result = new CodeCompletionString;
-  
-  do {
-    // Parse the next tag.
-    StringRef::size_type StartTag, AfterTag;
-    bool Standalone, Terminator;
-    StringRef Tag = ParseNextTag(Str, StartTag, AfterTag, Standalone, 
-                                 Terminator);
-    
-    if (StartTag == StringRef::npos)
-      break;
-    
-    // Figure out what kind of chunk we have.
-    const unsigned UnknownKind = 10000;
-    unsigned Kind = llvm::StringSwitch<unsigned>(Tag)
-      .Case("typed-text", CK_TypedText)
-      .Case("text", CK_Text)
-      .Case("optional", CK_Optional)
-      .Case("placeholder", CK_Placeholder)
-      .Case("informative", CK_Informative)
-      .Case("current-parameter", CK_CurrentParameter)
-      .Case("lparen", CK_LeftParen)
-      .Case("rparen", CK_RightParen)
-      .Case("lbracket", CK_LeftBracket)
-      .Case("rbracket", CK_RightBracket)
-      .Case("lbrace", CK_LeftBrace)
-      .Case("rbrace", CK_RightBrace)
-      .Case("langle", CK_LeftAngle)
-      .Case("rangle", CK_RightAngle)
-      .Case("comma", CK_Comma)
-      .Default(UnknownKind);
-    
-    // If we've hit a terminator tag, we're done.
-    if (Terminator)
-      break;
-    
-    // Consume the tag.
-    Str = Str.substr(AfterTag);
+  unsigned NumBlocks;
+  if (ReadUnsigned(Str, StrEnd, NumBlocks))
+    return Result;
 
-    // Handle standalone tags now, since they don't need to be matched to
-    // anything.
-    if (Standalone) {
-      // Ignore anything we don't know about.
-      if (Kind == UnknownKind)
-        continue;
-      
-      switch ((ChunkKind)Kind) {
-      case CK_TypedText:
-      case CK_Text:
-      case CK_Optional:
-      case CK_Placeholder:
-      case CK_Informative:
-      case CK_CurrentParameter:
-        // There is no point in creating empty chunks of these kinds.
-        break;
-        
-      case CK_LeftParen:
-      case CK_RightParen:
-      case CK_LeftBracket:
-      case CK_RightBracket:
-      case CK_LeftBrace:
-      case CK_RightBrace:
-      case CK_LeftAngle:
-      case CK_RightAngle:
-      case CK_Comma:
-        Result->AddChunk(Chunk((ChunkKind)Kind));
-        break;
-      }
-      
-      continue;
+  for (unsigned I = 0; I != NumBlocks; ++I) {
+    if (Str + 1 >= StrEnd)
+      break;
+
+    // Parse the next kind.
+    unsigned KindValue;
+    if (ReadUnsigned(Str, StrEnd, KindValue))
+      return Result;
+
+    switch (ChunkKind Kind = (ChunkKind)KindValue) {
+    case CK_TypedText:
+    case CK_Text:
+    case CK_Placeholder:
+    case CK_Informative:
+    case CK_CurrentParameter: {
+      unsigned StrLen;
+      if (ReadUnsigned(Str, StrEnd, StrLen) || (Str + StrLen > StrEnd))
+        return Result;
+
+      Result->AddChunk(Chunk(Kind, StringRef(Str, StrLen)));
+      Str += StrLen;
+      break;
     }
-    
-    if (Kind == CK_Optional) {
-      // Deserialize the optional code-completion string.
-      std::auto_ptr<CodeCompletionString> Optional(Deserialize(Str));
+
+    case CK_Optional: {
+      std::auto_ptr<CodeCompletionString> Optional(Deserialize(Str, StrEnd));
       Result->AddOptionalChunk(Optional);
+      break;
     }
-    
-    StringRef EndTag = ParseNextTag(Str, StartTag, AfterTag, Standalone, 
-                                    Terminator);
-    if (StartTag == StringRef::npos || !Terminator || Standalone)
-      break; // Parsing failed; just give up.
-    
-    if (EndTag.empty() || Tag == EndTag) {
-      // Found the matching end tag. Add this chunk based on the text
-      // between the tags, then consume that input.
-      StringRef Text = Str.substr(0, StartTag);
-      switch ((ChunkKind)Kind) {
-      case CK_TypedText:
-      case CK_Text:
-      case CK_Placeholder:
-      case CK_Informative:
-      case CK_CurrentParameter:
-      case CK_LeftParen:
-      case CK_RightParen:
-      case CK_LeftBracket:
-      case CK_RightBracket:
-      case CK_LeftBrace:
-      case CK_RightBrace:
-      case CK_LeftAngle:
-      case CK_RightAngle:
-      case CK_Comma:
-        Result->AddChunk(Chunk((ChunkKind)Kind, UnescapeString(Text)));
-        break;
-          
-      case CK_Optional:
-        // We've already added the optional chunk.
-        break;
-      }
+
+    case CK_LeftParen:
+    case CK_RightParen:
+    case CK_LeftBracket:
+    case CK_RightBracket:
+    case CK_LeftBrace:
+    case CK_RightBrace:
+    case CK_LeftAngle:
+    case CK_RightAngle:
+    case CK_Comma:
+      Result->AddChunk(Chunk(Kind));
+      break;      
     }
-    
-    // Remove this tag.
-    Str = Str.substr(AfterTag);
-  } while (!Str.empty());
+  };
   
   return Result;
 }
@@ -632,62 +438,110 @@
                                                        unsigned NumResults) {
   // Print the results.
   for (unsigned I = 0; I != NumResults; ++I) {
-    OS << "COMPLETION:" << Results[I].Rank << ":";
+    CXCursorKind Kind = CXCursor_NotImplemented;
+
     switch (Results[I].Kind) {
-      case Result::RK_Declaration:
-        if (RecordDecl *Record = dyn_cast<RecordDecl>(Results[I].Declaration)) {
-          if (Record->isStruct())
-            OS << "Struct:";
-          else if (Record->isUnion())
-            OS << "Union:";
-          else
-            OS << "Class:";
-        } else if (ObjCMethodDecl *Method
-                     = dyn_cast<ObjCMethodDecl>(Results[I].Declaration)) {
-          if (Method->isInstanceMethod())
-            OS << "ObjCInstanceMethod:";
-          else
-            OS << "ObjCClassMethod:";
-        } else {
-          OS << Results[I].Declaration->getDeclKindName() << ":";
-        }
-        if (CodeCompletionString *CCS 
-              = Results[I].CreateCodeCompletionString(SemaRef)) {
-          CCS->Serialize(OS);
-          delete CCS;
-        } else {
-          OS << "<typed-text>" 
-             << Results[I].Declaration->getNameAsString() 
-             << "</>";
-        }
-        
-        OS << '\n';
-        break;
-        
-      case Result::RK_Keyword:
-        OS << "Keyword:<typed-text>" << Results[I].Keyword << "</>\n";
-        break;
-        
-      case Result::RK_Macro: {
-        OS << "Macro:";
-        if (CodeCompletionString *CCS 
-              = Results[I].CreateCodeCompletionString(SemaRef)) {
-          CCS->Serialize(OS);
-          delete CCS;
-        } else {
-          OS << "<typed-text>" << Results[I].Macro->getName() << "</>";
-        }
-        OS << '\n';
+    case Result::RK_Declaration:
+      switch (Results[I].Declaration->getKind()) {
+      case Decl::Record:
+      case Decl::CXXRecord:
+      case Decl::ClassTemplateSpecialization: {
+        RecordDecl *Record = cast<RecordDecl>(Results[I].Declaration);
+        if (Record->isStruct())
+          Kind = CXCursor_StructDecl;
+        else if (Record->isUnion())
+          Kind = CXCursor_UnionDecl;
+        else
+          Kind = CXCursor_ClassDecl;
         break;
       }
         
-      case Result::RK_Pattern: {
-        OS << "Pattern:";
-        Results[I].Pattern->Serialize(OS);
-        OS << '\n';
+      case Decl::ObjCMethod: {
+        ObjCMethodDecl *Method = cast<ObjCMethodDecl>(Results[I].Declaration);
+        if (Method->isInstanceMethod())
+            Kind = CXCursor_ObjCInstanceMethodDecl;
+        else
+          Kind = CXCursor_ObjCClassMethodDecl;
         break;
       }
+        
+      case Decl::Typedef:
+        Kind = CXCursor_TypedefDecl;
+        break;
+        
+      case Decl::Enum:
+        Kind = CXCursor_EnumDecl;
+        break;
+        
+      case Decl::Field:
+        Kind = CXCursor_FieldDecl;
+        break;
+        
+      case Decl::EnumConstant:
+        Kind = CXCursor_EnumConstantDecl;
+        break;
+        
+      case Decl::Function:
+      case Decl::CXXMethod:
+      case Decl::CXXConstructor:
+      case Decl::CXXDestructor:
+      case Decl::CXXConversion:
+        Kind = CXCursor_FunctionDecl;
+        break;
+        
+      case Decl::Var:
+        Kind = CXCursor_VarDecl;
+        break;
+        
+      case Decl::ParmVar:
+        Kind = CXCursor_ParmDecl;
+        break;
+        
+      case Decl::ObjCInterface:
+        Kind = CXCursor_ObjCInterfaceDecl;
+        break;
+        
+      case Decl::ObjCCategory:
+        Kind = CXCursor_ObjCCategoryDecl;
+        break;
+        
+      case Decl::ObjCProtocol:
+        Kind = CXCursor_ObjCProtocolDecl;
+        break;
+        
+      case Decl::ObjCProperty:
+        Kind = CXCursor_ObjCPropertyDecl;
+        break;
+        
+      case Decl::ObjCIvar:
+        Kind = CXCursor_ObjCIvarDecl;
+        break;
+        
+      case Decl::ObjCImplementation:
+        Kind = CXCursor_ObjCClassDefn;
+        break;
+        
+      case Decl::ObjCCategoryImpl:
+        Kind = CXCursor_ObjCCategoryDefn;
+        break;
+        
+      default:
+        break;
+      }
+      break;
+        
+    case Result::RK_Keyword:
+    case Result::RK_Macro:
+    case Result::RK_Pattern:
+      Kind = CXCursor_NotImplemented;
+      break;
     }
+
+    WriteUnsigned(OS, Kind);
+    CodeCompletionString *CCS = Results[I].CreateCodeCompletionString(SemaRef);
+    assert(CCS && "No code-completion string?");
+    CCS->Serialize(OS);
+    delete CCS;
   }
   
   // Once we've printed the code-completion results, suppress remaining
@@ -702,13 +556,12 @@
                                                 OverloadCandidate *Candidates,
                                                        unsigned NumCandidates) {
   for (unsigned I = 0; I != NumCandidates; ++I) {
-    if (CodeCompletionString *CCS
-        = Candidates[I].CreateSignatureString(CurrentArg, SemaRef)) {
-      OS << "OVERLOAD:";
-      CCS->Serialize(OS);
-      OS << '\n';
-      delete CCS;
-    }
+    WriteUnsigned(OS, CXCursor_NotImplemented);
+    CodeCompletionString *CCS
+      = Candidates[I].CreateSignatureString(CurrentArg, SemaRef);
+    assert(CCS && "No code-completion string?");
+    CCS->Serialize(OS);
+    delete CCS;
   }
   
   // Once we've printed the code-completion results, suppress remaining
diff --git a/lib/Sema/SemaCodeComplete.cpp b/lib/Sema/SemaCodeComplete.cpp
index e1a5790..b386adb 100644
--- a/lib/Sema/SemaCodeComplete.cpp
+++ b/lib/Sema/SemaCodeComplete.cpp
@@ -801,17 +801,26 @@
 CodeCompleteConsumer::Result::CreateCodeCompletionString(Sema &S) {
   typedef CodeCompletionString::Chunk Chunk;
   
-  if (Kind == RK_Keyword)
-    return 0;
+  if (Kind == RK_Pattern)
+    return Pattern->Clone();
+  
+  CodeCompletionString *Result = new CodeCompletionString;
+
+  if (Kind == RK_Keyword) {
+    Result->AddTypedTextChunk(Keyword);
+    return Result;
+  }
   
   if (Kind == RK_Macro) {
     MacroInfo *MI = S.PP.getMacroInfo(Macro);
-    if (!MI || !MI->isFunctionLike())
-      return 0;
+    assert(MI && "Not a macro?");
+
+    Result->AddTypedTextChunk(Macro->getName());
+
+    if (!MI->isFunctionLike())
+      return Result;
     
     // Format a function-like macro with placeholders for the arguments.
-    CodeCompletionString *Result = new CodeCompletionString;
-    Result->AddTypedTextChunk(Macro->getName());
     Result->AddChunk(Chunk(CodeCompletionString::CK_LeftParen));
     for (MacroInfo::arg_iterator A = MI->arg_begin(), AEnd = MI->arg_end();
          A != AEnd; ++A) {
@@ -843,14 +852,12 @@
   NamedDecl *ND = Declaration;
   
   if (StartsNestedNameSpecifier) {
-    CodeCompletionString *Result = new CodeCompletionString;
     Result->AddTypedTextChunk(ND->getNameAsString());
     Result->AddTextChunk("::");
     return Result;
   }
   
   if (FunctionDecl *Function = dyn_cast<FunctionDecl>(ND)) {
-    CodeCompletionString *Result = new CodeCompletionString;
     AddQualifierToCompletionString(Result, Qualifier, QualifierIsInformative, 
                                    S.Context);
     Result->AddTypedTextChunk(Function->getNameAsString());
@@ -861,7 +868,6 @@
   }
   
   if (FunctionTemplateDecl *FunTmpl = dyn_cast<FunctionTemplateDecl>(ND)) {
-    CodeCompletionString *Result = new CodeCompletionString;
     AddQualifierToCompletionString(Result, Qualifier, QualifierIsInformative, 
                                    S.Context);
     FunctionDecl *Function = FunTmpl->getTemplatedDecl();
@@ -915,7 +921,6 @@
   }
   
   if (TemplateDecl *Template = dyn_cast<TemplateDecl>(ND)) {
-    CodeCompletionString *Result = new CodeCompletionString;
     AddQualifierToCompletionString(Result, Qualifier, QualifierIsInformative, 
                                    S.Context);
     Result->AddTypedTextChunk(Template->getNameAsString());
@@ -926,7 +931,6 @@
   }
   
   if (ObjCMethodDecl *Method = dyn_cast<ObjCMethodDecl>(ND)) {
-    CodeCompletionString *Result = new CodeCompletionString;
     Selector Sel = Method->getSelector();
     if (Sel.isUnarySelector()) {
       Result->AddTypedTextChunk(Sel.getIdentifierInfoForSlot(0)->getName());
@@ -982,15 +986,12 @@
     return Result;
   }
 
-  if (Qualifier) {
-    CodeCompletionString *Result = new CodeCompletionString;
+  if (Qualifier)
     AddQualifierToCompletionString(Result, Qualifier, QualifierIsInformative, 
                                    S.Context);
-    Result->AddTypedTextChunk(ND->getNameAsString());
-    return Result;
-  }
-  
-  return 0;
+
+  Result->AddTypedTextChunk(ND->getNameAsString());
+  return Result;
 }
 
 CodeCompletionString *