Eat the UTF-8 BOM at the beginning of a file since it's ignored anyhow.

Nom Nom Nom.

Patch by Anton Korobeynikov!


git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@129174 91177308-0d34-0410-b5e6-96231b3b80d8
diff --git a/lib/Lex/Lexer.cpp b/lib/Lex/Lexer.cpp
index 34b16c7..ea2a2de 100644
--- a/lib/Lex/Lexer.cpp
+++ b/lib/Lex/Lexer.cpp
@@ -71,9 +71,22 @@
          "We assume that the input buffer has a null character at the end"
          " to simplify lexing!");
 
+  // Check whether we have a BOM in the beginning of the buffer. If yes - act
+  // accordingly. Right now we support only UTF-8 with and without BOM, so, just
+  // skip the UTF-8 BOM if it's present.
+  if (BufferStart == BufferPtr) {
+    // Determine the size of the BOM.
+    size_t BOMLength = llvm::StringSwitch<size_t>(BufferStart)
+      .StartsWith("\xEF\xBB\xBF", 3) // UTF-8 BOM
+      .Default(0);
+
+    // Skip the BOM.
+    BufferPtr += BOMLength;
+  }
+
   Is_PragmaLexer = false;
   IsInConflictMarker = false;
-  
+
   // Start of the file is a start of line.
   IsAtStartOfLine = true;