Fix bugs in EOL marking in command line tokenizers
Add unit tests for this behavior, since the integration test for
clang-cl did not catch these bugs.
Fixes PR47604
Differential Revision: https://reviews.llvm.org/D90866
diff --git a/llvm/lib/Support/CommandLine.cpp b/llvm/lib/Support/CommandLine.cpp
index e53421a..a185863 100644
--- a/llvm/lib/Support/CommandLine.cpp
+++ b/llvm/lib/Support/CommandLine.cpp
@@ -832,7 +832,7 @@
// Consume runs of whitespace.
if (Token.empty()) {
while (I != E && isWhitespace(Src[I])) {
- // Mark the end of lines in response files
+ // Mark the end of lines in response files.
if (MarkEOLs && Src[I] == '\n')
NewArgv.push_back(nullptr);
++I;
@@ -869,6 +869,9 @@
if (isWhitespace(C)) {
if (!Token.empty())
NewArgv.push_back(Saver.save(StringRef(Token)).data());
+ // Mark the end of lines in response files.
+ if (MarkEOLs && C == '\n')
+ NewArgv.push_back(nullptr);
Token.clear();
continue;
}
@@ -880,9 +883,6 @@
// Append the last token after hitting EOF with no whitespace.
if (!Token.empty())
NewArgv.push_back(Saver.save(StringRef(Token)).data());
- // Mark the end of response files
- if (MarkEOLs)
- NewArgv.push_back(nullptr);
}
/// Backslashes are interpreted in a rather complicated way in the Windows-style
@@ -956,11 +956,11 @@
++I;
StringRef NormalChars = Src.slice(Start, I);
if (I >= E || isWhitespaceOrNull(Src[I])) {
- if (I < E && Src[I] == '\n')
- MarkEOL();
// No special characters: slice out the substring and start the next
// token. Copy the string if the caller asks us to.
AddToken(AlwaysCopy ? Saver.save(NormalChars) : NormalChars);
+ if (I < E && Src[I] == '\n')
+ MarkEOL();
} else if (Src[I] == '\"') {
Token += NormalChars;
State = QUOTED;
diff --git a/llvm/unittests/Support/CommandLineTest.cpp b/llvm/unittests/Support/CommandLineTest.cpp
index c02e9e5..a05f389 100644
--- a/llvm/unittests/Support/CommandLineTest.cpp
+++ b/llvm/unittests/Support/CommandLineTest.cpp
@@ -199,14 +199,15 @@
bool MarkEOLs);
void testCommandLineTokenizer(ParserFunction *parse, StringRef Input,
- const char *const Output[], size_t OutputSize) {
+ ArrayRef<const char *> Output,
+ bool MarkEOLs = false) {
SmallVector<const char *, 0> Actual;
BumpPtrAllocator A;
StringSaver Saver(A);
- parse(Input, Saver, Actual, /*MarkEOLs=*/false);
- EXPECT_EQ(OutputSize, Actual.size());
+ parse(Input, Saver, Actual, MarkEOLs);
+ EXPECT_EQ(Output.size(), Actual.size());
for (unsigned I = 0, E = Actual.size(); I != E; ++I) {
- if (I < OutputSize) {
+ if (I < Output.size()) {
EXPECT_STREQ(Output[I], Actual[I]);
}
}
@@ -219,8 +220,7 @@
const char *const Output[] = {
"foo bar", "foo bar", "foo bar", "foo\\bar",
"-DFOO=bar()", "foobarbaz", "C:\\src\\foo.cpp", "C:srcfoo.cpp"};
- testCommandLineTokenizer(cl::TokenizeGNUCommandLine, Input, Output,
- array_lengthof(Output));
+ testCommandLineTokenizer(cl::TokenizeGNUCommandLine, Input, Output);
}
TEST(CommandLineTest, TokenizeWindowsCommandLine1) {
@@ -228,75 +228,85 @@
R"(a\b c\\d e\\"f g" h\"i j\\\"k "lmn" o pqr "st \"u" \v)";
const char *const Output[] = { "a\\b", "c\\\\d", "e\\f g", "h\"i", "j\\\"k",
"lmn", "o", "pqr", "st \"u", "\\v" };
- testCommandLineTokenizer(cl::TokenizeWindowsCommandLine, Input, Output,
- array_lengthof(Output));
+ testCommandLineTokenizer(cl::TokenizeWindowsCommandLine, Input, Output);
}
TEST(CommandLineTest, TokenizeWindowsCommandLine2) {
const char Input[] = "clang -c -DFOO=\"\"\"ABC\"\"\" x.cpp";
const char *const Output[] = { "clang", "-c", "-DFOO=\"ABC\"", "x.cpp"};
- testCommandLineTokenizer(cl::TokenizeWindowsCommandLine, Input, Output,
- array_lengthof(Output));
+ testCommandLineTokenizer(cl::TokenizeWindowsCommandLine, Input, Output);
}
TEST(CommandLineTest, TokenizeWindowsCommandLineQuotedLastArgument) {
const char Input1[] = R"(a b c d "")";
const char *const Output1[] = {"a", "b", "c", "d", ""};
- testCommandLineTokenizer(cl::TokenizeWindowsCommandLine, Input1, Output1,
- array_lengthof(Output1));
+ testCommandLineTokenizer(cl::TokenizeWindowsCommandLine, Input1, Output1);
const char Input2[] = R"(a b c d ")";
const char *const Output2[] = {"a", "b", "c", "d"};
- testCommandLineTokenizer(cl::TokenizeWindowsCommandLine, Input2, Output2,
- array_lengthof(Output2));
+ testCommandLineTokenizer(cl::TokenizeWindowsCommandLine, Input2, Output2);
+}
+
+TEST(CommandLineTest, TokenizeAndMarkEOLs) {
+ // Clang uses EOL marking in response files to support options that consume
+ // the rest of the arguments on the current line, but do not consume arguments
+ // from subsequent lines. For example, given these rsp files contents:
+ // /c /Zi /O2
+ // /Oy- /link /debug /opt:ref
+ // /Zc:ThreadsafeStatics-
+ //
+ // clang-cl needs to treat "/debug /opt:ref" as linker flags, and everything
+ // else as compiler flags. The tokenizer inserts nullptr sentinels into the
+ // output so that clang-cl can find the end of the current line.
+ const char Input[] = "clang -Xclang foo\n\nfoo\"bar\"baz\n x.cpp\n";
+ const char *const Output[] = {"clang", "-Xclang", "foo",
+ nullptr, nullptr, "foobarbaz",
+ nullptr, "x.cpp", nullptr};
+ testCommandLineTokenizer(cl::TokenizeWindowsCommandLine, Input, Output,
+ /*MarkEOLs=*/true);
+ testCommandLineTokenizer(cl::TokenizeGNUCommandLine, Input, Output,
+ /*MarkEOLs=*/true);
}
TEST(CommandLineTest, TokenizeConfigFile1) {
const char *Input = "\\";
const char *const Output[] = { "\\" };
- testCommandLineTokenizer(cl::tokenizeConfigFile, Input, Output,
- array_lengthof(Output));
+ testCommandLineTokenizer(cl::tokenizeConfigFile, Input, Output);
}
TEST(CommandLineTest, TokenizeConfigFile2) {
const char *Input = "\\abc";
const char *const Output[] = { "abc" };
- testCommandLineTokenizer(cl::tokenizeConfigFile, Input, Output,
- array_lengthof(Output));
+ testCommandLineTokenizer(cl::tokenizeConfigFile, Input, Output);
}
TEST(CommandLineTest, TokenizeConfigFile3) {
const char *Input = "abc\\";
const char *const Output[] = { "abc\\" };
- testCommandLineTokenizer(cl::tokenizeConfigFile, Input, Output,
- array_lengthof(Output));
+ testCommandLineTokenizer(cl::tokenizeConfigFile, Input, Output);
}
TEST(CommandLineTest, TokenizeConfigFile4) {
const char *Input = "abc\\\n123";
const char *const Output[] = { "abc123" };
- testCommandLineTokenizer(cl::tokenizeConfigFile, Input, Output,
- array_lengthof(Output));
+ testCommandLineTokenizer(cl::tokenizeConfigFile, Input, Output);
}
TEST(CommandLineTest, TokenizeConfigFile5) {
const char *Input = "abc\\\r\n123";
const char *const Output[] = { "abc123" };
- testCommandLineTokenizer(cl::tokenizeConfigFile, Input, Output,
- array_lengthof(Output));
+ testCommandLineTokenizer(cl::tokenizeConfigFile, Input, Output);
}
TEST(CommandLineTest, TokenizeConfigFile6) {
const char *Input = "abc\\\n";
const char *const Output[] = { "abc" };
- testCommandLineTokenizer(cl::tokenizeConfigFile, Input, Output,
- array_lengthof(Output));
+ testCommandLineTokenizer(cl::tokenizeConfigFile, Input, Output);
}
TEST(CommandLineTest, TokenizeConfigFile7) {
const char *Input = "abc\\\r\n";
const char *const Output[] = { "abc" };
- testCommandLineTokenizer(cl::tokenizeConfigFile, Input, Output,
- array_lengthof(Output));
+ testCommandLineTokenizer(cl::tokenizeConfigFile, Input, Output);
}
TEST(CommandLineTest, TokenizeConfigFile8) {
@@ -318,15 +328,13 @@
TEST(CommandLineTest, TokenizeConfigFile10) {
const char *Input = "\\\nabc";
const char *const Output[] = { "abc" };
- testCommandLineTokenizer(cl::tokenizeConfigFile, Input, Output,
- array_lengthof(Output));
+ testCommandLineTokenizer(cl::tokenizeConfigFile, Input, Output);
}
TEST(CommandLineTest, TokenizeConfigFile11) {
const char *Input = "\\\r\nabc";
const char *const Output[] = { "abc" };
- testCommandLineTokenizer(cl::tokenizeConfigFile, Input, Output,
- array_lengthof(Output));
+ testCommandLineTokenizer(cl::tokenizeConfigFile, Input, Output);
}
TEST(CommandLineTest, AliasesWithArguments) {
@@ -962,6 +970,34 @@
testing::Pointwise(StringEquality(), {"test/test", "-flag"}));
}
+TEST(CommandLineTest, ResponseFileEOLs) {
+ vfs::InMemoryFileSystem FS;
+#ifdef _WIN32
+ const char *TestRoot = "C:\\";
+#else
+ const char *TestRoot = "//net";
+#endif
+ FS.setCurrentWorkingDirectory(TestRoot);
+ FS.addFile("eols.rsp", 0,
+ MemoryBuffer::getMemBuffer("-Xclang -Wno-whatever\n input.cpp"));
+ SmallVector<const char *, 2> Argv = {"clang", "@eols.rsp"};
+ BumpPtrAllocator A;
+ StringSaver Saver(A);
+ ASSERT_TRUE(cl::ExpandResponseFiles(Saver, cl::TokenizeWindowsCommandLine,
+ Argv, true, true, FS,
+ /*CurrentDir=*/StringRef(TestRoot)));
+ const char *Expected[] = {"clang", "-Xclang", "-Wno-whatever", nullptr,
+ "input.cpp"};
+ ASSERT_EQ(array_lengthof(Expected), Argv.size());
+ for (size_t I = 0, E = array_lengthof(Expected); I < E; ++I) {
+ if (Expected[I] == nullptr) {
+ ASSERT_EQ(Argv[I], nullptr);
+ } else {
+ ASSERT_STREQ(Expected[I], Argv[I]);
+ }
+ }
+}
+
TEST(CommandLineTest, SetDefautValue) {
cl::ResetCommandLineParser();