Added LanguageStandard::LS_JavaScript to gate all JS-specific parsing.

Summary:
Use LS_JavaScript for files ending with ".js". Added support for ">>>="
operator.

Reviewers: djasper, klimek

Reviewed By: djasper

CC: cfe-commits, klimek

Differential Revision: http://llvm-reviews.chandlerc.com/D2242

llvm-svn: 195961
diff --git a/clang/lib/Format/Format.cpp b/clang/lib/Format/Format.cpp
index 66dd387..7f91239 100644
--- a/clang/lib/Format/Format.cpp
+++ b/clang/lib/Format/Format.cpp
@@ -34,6 +34,15 @@
 namespace llvm {
 namespace yaml {
 template <>
+struct ScalarEnumerationTraits<clang::format::FormatStyle::LanguageKind> {
+  static void enumeration(IO &IO,
+                          clang::format::FormatStyle::LanguageKind &Value) {
+    IO.enumCase(Value, "Cpp", clang::format::FormatStyle::LK_Cpp);
+    IO.enumCase(Value, "JavaScript", clang::format::FormatStyle::LK_JavaScript);
+  }
+};
+
+template <>
 struct ScalarEnumerationTraits<clang::format::FormatStyle::LanguageStandard> {
   static void enumeration(IO &IO,
                           clang::format::FormatStyle::LanguageStandard &Value) {
@@ -99,13 +108,17 @@
     } else {
       StringRef BasedOnStyle;
       IO.mapOptional("BasedOnStyle", BasedOnStyle);
-      if (!BasedOnStyle.empty())
+      if (!BasedOnStyle.empty()) {
+        clang::format::FormatStyle::LanguageKind Language = Style.Language;
         if (!clang::format::getPredefinedStyle(BasedOnStyle, &Style)) {
           IO.setError(Twine("Unknown value for BasedOnStyle: ", BasedOnStyle));
           return;
         }
+        Style.Language = Language;
+      }
     }
 
+    IO.mapOptional("Language", Style.Language);
     IO.mapOptional("AccessModifierOffset", Style.AccessModifierOffset);
     IO.mapOptional("ConstructorInitializerIndentWidth",
                    Style.ConstructorInitializerIndentWidth);
@@ -173,6 +186,36 @@
     IO.mapOptional("ContinuationIndentWidth", Style.ContinuationIndentWidth);
   }
 };
+
+// Allows to read vector<FormatStyle> while keeping default values.
+// Elements will be written or read starting from the 1st element.
+// When writing, the 0th element is ignored.
+// When reading, keys that are not present in the serialized form will be
+// copied from the 0th element of the vector. If the first element had no
+// Language specified, it will be treated as the default one for the following
+// elements.
+template <>
+struct DocumentListTraits<std::vector<clang::format::FormatStyle> > {
+  static size_t size(IO &io, std::vector<clang::format::FormatStyle> &Seq) {
+    return Seq.size() - 1;
+  }
+  static clang::format::FormatStyle &
+  element(IO &io, std::vector<clang::format::FormatStyle> &Seq, size_t Index) {
+    if (Index + 2 > Seq.size()) {
+      assert(Index + 2 == Seq.size() + 1);
+      clang::format::FormatStyle Template;
+      if (Seq.size() > 1 &&
+          Seq[1].Language == clang::format::FormatStyle::LK_None) {
+        Template = Seq[1];
+      } else {
+        Template = Seq[0];
+        Template.Language = clang::format::FormatStyle::LK_None;
+      }
+      Seq.resize(Index + 2, Template);
+    }
+    return Seq[Index + 1];
+  }
+};
 }
 }
 
@@ -188,6 +231,7 @@
 
 FormatStyle getLLVMStyle() {
   FormatStyle LLVMStyle;
+  LLVMStyle.Language = FormatStyle::LK_Cpp;
   LLVMStyle.AccessModifierOffset = -2;
   LLVMStyle.AlignEscapedNewlinesLeft = false;
   LLVMStyle.AlignTrailingComments = true;
@@ -236,6 +280,7 @@
 
 FormatStyle getGoogleStyle() {
   FormatStyle GoogleStyle;
+  GoogleStyle.Language = FormatStyle::LK_Cpp;
   GoogleStyle.AccessModifierOffset = -1;
   GoogleStyle.AlignEscapedNewlinesLeft = true;
   GoogleStyle.AlignTrailingComments = true;
@@ -337,11 +382,42 @@
 }
 
 llvm::error_code parseConfiguration(StringRef Text, FormatStyle *Style) {
+  assert(Style);
+  assert(Style->Language != FormatStyle::LK_None);
   if (Text.trim().empty())
     return llvm::make_error_code(llvm::errc::invalid_argument);
+
+  std::vector<FormatStyle> Styles;
+  // DocumentListTraits<vector<FormatStyle>> uses 0th element as the default one
+  // for the fields, keys for which are missing from the configuration.
+  Styles.push_back(*Style);
   llvm::yaml::Input Input(Text);
-  Input >> *Style;
-  return Input.error();
+  Input >> Styles;
+  if (Input.error())
+    return Input.error();
+
+  for (unsigned i = 1; i < Styles.size(); ++i) {
+    // Ensures that only the first configuration can skip the Language option.
+    if (Styles[i].Language == FormatStyle::LK_None && i != 1)
+      return llvm::make_error_code(llvm::errc::invalid_argument);
+    // Ensure that each language is configured at most once.
+    for (unsigned j = 1; j < i; ++j) {
+      if (Styles[i].Language == Styles[j].Language)
+        return llvm::make_error_code(llvm::errc::invalid_argument);
+    }
+  }
+  // Look for a suitable configuration starting from the end, so we can
+  // find the configuration for the specific language first, and the default
+  // configuration (which can only be at slot 1) after it.
+  for (unsigned i = Styles.size() - 1; i > 0; --i) {
+    if (Styles[i].Language == Styles[0].Language ||
+        Styles[i].Language == FormatStyle::LK_None) {
+      *Style = Styles[i];
+      Style->Language = Styles[0].Language;
+      return llvm::make_error_code(llvm::errc::success);
+    }
+  }
+  return llvm::make_error_code(llvm::errc::not_supported);
 }
 
 std::string configurationAsText(const FormatStyle &Style) {
@@ -986,24 +1062,44 @@
 
 private:
   void tryMergePreviousTokens() {
-    tryMerge_TMacro() || tryMergeJavaScriptIdentityOperators();
+    if (tryMerge_TMacro())
+      return;
+
+    if (Style.Language == FormatStyle::LK_JavaScript) {
+      static tok::TokenKind JSIdentity[] = { tok::equalequal, tok::equal };
+      static tok::TokenKind JSNotIdentity[] = { tok::exclaimequal, tok::equal };
+      static tok::TokenKind JSShiftEqual[] = { tok::greater, tok::greater,
+                                               tok::greaterequal };
+      // FIXME: We probably need to change token type to mimic operator with the
+      // correct priority.
+      if (tryMergeTokens(JSIdentity))
+        return;
+      if (tryMergeTokens(JSNotIdentity))
+        return;
+      if (tryMergeTokens(JSShiftEqual))
+        return;
+    }
   }
 
-  bool tryMergeJavaScriptIdentityOperators() {
-    if (Tokens.size() < 2)
+  bool tryMergeTokens(ArrayRef<tok::TokenKind> Kinds) {
+    if (Tokens.size() < Kinds.size())
       return false;
-    FormatToken &First = *Tokens[Tokens.size() - 2];
-    if (!First.isOneOf(tok::exclaimequal, tok::equalequal))
+
+    SmallVectorImpl<FormatToken *>::const_iterator First =
+        Tokens.end() - Kinds.size();
+    if (!First[0]->is(Kinds[0]))
       return false;
-    FormatToken &Second = *Tokens.back();
-    if (!Second.is(tok::equal))
-      return false;
-    if (Second.WhitespaceRange.getBegin() != Second.WhitespaceRange.getEnd())
-      return false;
-    First.TokenText =
-        StringRef(First.TokenText.data(), First.TokenText.size() + 1);
-    First.ColumnWidth += 1;
-    Tokens.pop_back();
+    unsigned AddLength = 0;
+    for (unsigned i = 1; i < Kinds.size(); ++i) {
+      if (!First[i]->is(Kinds[i]) || First[i]->WhitespaceRange.getBegin() !=
+                                         First[i]->WhitespaceRange.getEnd())
+        return false;
+      AddLength += First[i]->TokenText.size();
+    }
+    Tokens.resize(Tokens.size() - Kinds.size() + 1);
+    First[0]->TokenText = StringRef(First[0]->TokenText.data(),
+                                    First[0]->TokenText.size() + AddLength);
+    First[0]->ColumnWidth += AddLength;
     return true;
   }
 
@@ -1201,6 +1297,17 @@
   }
 };
 
+static StringRef getLanguageName(FormatStyle::LanguageKind Language) {
+  switch (Language) {
+  case FormatStyle::LK_Cpp:
+    return "C++";
+  case FormatStyle::LK_JavaScript:
+    return "JavaScript";
+  default:
+    return "Unknown";
+  }
+}
+
 class Formatter : public UnwrappedLineConsumer {
 public:
   Formatter(const FormatStyle &Style, Lexer &Lex, SourceManager &SourceMgr,
@@ -1213,6 +1320,8 @@
                        << (Encoding == encoding::Encoding_UTF8 ? "UTF8"
                                                                : "unknown")
                        << "\n");
+    DEBUG(llvm::dbgs() << "Language: " << getLanguageName(Style.Language)
+                       << "\n");
   }
 
   tooling::Replacements format() {
@@ -1538,11 +1647,26 @@
     "parameters, e.g.:\n"
     "  -style=\"{BasedOnStyle: llvm, IndentWidth: 8}\"";
 
+static void fillLanguageByFileName(StringRef FileName, FormatStyle *Style) {
+  if (FileName.endswith_lower(".c") || FileName.endswith_lower(".h") ||
+      FileName.endswith_lower(".cpp") || FileName.endswith_lower(".hpp") ||
+      FileName.endswith_lower(".cc") || FileName.endswith_lower(".hh") ||
+      FileName.endswith_lower(".cxx") || FileName.endswith_lower(".hxx") ||
+      FileName.endswith_lower(".m") || FileName.endswith_lower(".mm")) {
+    Style->Language = FormatStyle::LK_Cpp;
+  }
+  if (FileName.endswith_lower(".js")) {
+    Style->Language = FormatStyle::LK_JavaScript;
+  }
+}
+
 FormatStyle getStyle(StringRef StyleName, StringRef FileName) {
+  // FIXME: Configure fallback style from outside (add a command line option).
   // Fallback style in case the rest of this function can't determine a style.
   StringRef FallbackStyle = "LLVM";
   FormatStyle Style;
   getPredefinedStyle(FallbackStyle, &Style);
+  fillLanguageByFileName(FileName, &Style);
 
   if (StyleName.startswith("{")) {
     // Parse YAML/JSON style from the command line.
@@ -1557,9 +1681,11 @@
     if (!getPredefinedStyle(StyleName, &Style))
       llvm::errs() << "Invalid value for -style, using " << FallbackStyle
                    << " style\n";
+    fillLanguageByFileName(FileName, &Style);
     return Style;
   }
 
+  SmallString<128> UnsuitableConfigFiles;
   SmallString<128> Path(FileName);
   llvm::sys::fs::make_absolute(Path);
   for (StringRef Directory = Path; !Directory.empty();
@@ -1591,8 +1717,14 @@
         continue;
       }
       if (llvm::error_code ec = parseConfiguration(Text->getBuffer(), &Style)) {
-        llvm::errs() << "Error reading " << ConfigFile << ": " << ec.message()
-                     << "\n";
+        if (ec == llvm::errc::not_supported) {
+          if (!UnsuitableConfigFiles.empty())
+            UnsuitableConfigFiles.append(", ");
+          UnsuitableConfigFiles.append(ConfigFile);
+        } else {
+          llvm::errs() << "Error reading " << ConfigFile << ": " << ec.message()
+                       << "\n";
+        }
         continue;
       }
       DEBUG(llvm::dbgs() << "Using configuration file " << ConfigFile << "\n");
@@ -1601,6 +1733,11 @@
   }
   llvm::errs() << "Can't find usable .clang-format, using " << FallbackStyle
                << " style\n";
+  if (!UnsuitableConfigFiles.empty()) {
+    llvm::errs() << "Configuration file(s) do(es) not support "
+                 << getLanguageName(Style.Language) << ": "
+                 << UnsuitableConfigFiles << "\n";
+  }
   return Style;
 }