Correctly mark first token in the presence of UTF-8 BOM.

Summary: Fixes http://llvm.org/PR17753

Reviewers: klimek

Reviewed By: klimek

CC: cfe-commits, klimek

Differential Revision: http://llvm-reviews.chandlerc.com/D2159

git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@194576 91177308-0d34-0410-b5e6-96231b3b80d8
diff --git a/unittests/Format/FormatTest.cpp b/unittests/Format/FormatTest.cpp
index f2ac3ba..b6574c7 100644
--- a/unittests/Format/FormatTest.cpp
+++ b/unittests/Format/FormatTest.cpp
@@ -6911,6 +6911,14 @@
                    getLLVMStyleWithColumns(12)));
 }
 
+TEST_F(FormatTest, HandlesUTF8BOM) {
+  EXPECT_EQ("\xef\xbb\xbf", format("\xef\xbb\xbf"));
+  EXPECT_EQ("\xef\xbb\xbf#include <iostream>",
+            format("\xef\xbb\xbf#include <iostream>"));
+  EXPECT_EQ("\xef\xbb\xbf\n#include <iostream>",
+            format("\xef\xbb\xbf\n#include <iostream>"));
+}
+
 // FIXME: Encode Cyrillic and CJK characters below to appease MS compilers.
 #if !defined(_MSC_VER)