Correctly mark first token in the presence of UTF-8 BOM.
Summary: Fixes http://llvm.org/PR17753
Reviewers: klimek
Reviewed By: klimek
CC: cfe-commits, klimek
Differential Revision: http://llvm-reviews.chandlerc.com/D2159
git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@194576 91177308-0d34-0410-b5e6-96231b3b80d8
diff --git a/unittests/Format/FormatTest.cpp b/unittests/Format/FormatTest.cpp
index f2ac3ba..b6574c7 100644
--- a/unittests/Format/FormatTest.cpp
+++ b/unittests/Format/FormatTest.cpp
@@ -6911,6 +6911,14 @@
getLLVMStyleWithColumns(12)));
}
+TEST_F(FormatTest, HandlesUTF8BOM) {
+ EXPECT_EQ("\xef\xbb\xbf", format("\xef\xbb\xbf"));
+ EXPECT_EQ("\xef\xbb\xbf#include <iostream>",
+ format("\xef\xbb\xbf#include <iostream>"));
+ EXPECT_EQ("\xef\xbb\xbf\n#include <iostream>",
+ format("\xef\xbb\xbf\n#include <iostream>"));
+}
+
// FIXME: Encode Cyrillic and CJK characters below to appease MS compilers.
#if !defined(_MSC_VER)