Improved our argument parsing abilities to be able to handle stuff more like
a shell would interpret it. A few examples that we now handle correctly
INPUT: "Hello "world
OUTPUT: "Hello World"
INPUT: "Hello "' World'
OUTPUT: "Hello World"
INPUT: Hello" World"
OUTPUT: "Hello World"
This broke the setting of dictionary values for the "settings set" command
for things like:
(lldb) settings set target.process.env-vars ["MY_ENV_VAR"]=YES
since we would drop the quotes. I fixed the user settings controller to use
a regular expression so it can accept any of the following inputs for
dictionary setting:
settings set target.process.env-vars ["MY_ENV_VAR"]=YES
settings set target.process.env-vars [MY_ENV_VAR]=YES
settings set target.process.env-vars MY_ENV_VAR=YES
We might want to eventually drop the first two syntaxes, but I won't make
that decision right now.
This allows more natural setting of the envirorment variables:
settings set target.process.env-vars MY_ENV_VAR=YES ABC=DEF CWD=/tmp
git-svn-id: https://llvm.org/svn/llvm-project/llvdb/trunk@122166 91177308-0d34-0410-b5e6-96231b3b80d8
diff --git a/source/Interpreter/Args.cpp b/source/Interpreter/Args.cpp
index e5264e5..affa644 100644
--- a/source/Interpreter/Args.cpp
+++ b/source/Interpreter/Args.cpp
@@ -23,10 +23,6 @@
using namespace lldb;
using namespace lldb_private;
-static const char *k_space_characters = "\t\n\v\f\r ";
-static const char *k_space_characters_with_slash = "\t\n\v\f\r \\";
-
-
//----------------------------------------------------------------------
// Args constructor
//----------------------------------------------------------------------
@@ -34,7 +30,8 @@
m_args(),
m_argv()
{
- SetCommandString (command);
+ if (command)
+ SetCommandString (command);
}
@@ -42,11 +39,10 @@
m_args(),
m_argv()
{
- SetCommandString (command, len);
+ if (command && len)
+ SetCommandString (command, len);
}
-
-
//----------------------------------------------------------------------
// Destructor
//----------------------------------------------------------------------
@@ -97,20 +93,20 @@
Args::GetQuotedCommandString (std::string &command)
{
command.clear ();
- int argc = GetArgumentCount ();
- for (int i = 0; i < argc; ++i)
+ size_t argc = GetArgumentCount ();
+ for (size_t i = 0; i < argc; ++i)
{
if (i > 0)
- command += ' ';
- char quote_char = m_args_quote_char[i];
- if (quote_char != '\0')
+ command.append (1, ' ');
+ char quote_char = GetArgumentQuoteCharAtIndex(i);
+ if (quote_char)
{
- command += quote_char;
- command += m_argv[i];
- command += quote_char;
+ command.append (1, quote_char);
+ command.append (m_argv[i]);
+ command.append (1, quote_char);
}
else
- command += m_argv[i];
+ command.append (m_argv[i]);
}
return argc > 0;
}
@@ -127,136 +123,197 @@
void
Args::SetCommandString (const char *command)
{
+ StreamFile s(stdout);
+ s.Printf("\nCOMMAND: %s\n", command);
m_args.clear();
m_argv.clear();
+ m_args_quote_char.clear();
+
if (command && command[0])
{
- const char *arg_start;
- const char *next_arg_start;
- for (arg_start = command, next_arg_start = NULL;
- arg_start && arg_start[0];
- arg_start = next_arg_start, next_arg_start = NULL)
+ static const char *k_space_separators = " \t";
+ static const char *k_space_separators_with_slash_and_quotes = " \t \\'\"`";
+ const char *arg_end = NULL;
+ const char *arg_pos;
+ for (arg_pos = command;
+ arg_pos && arg_pos[0];
+ arg_pos = arg_end)
{
- // Skip any leading space characters
- arg_start = ::strspn (arg_start, k_space_characters) + arg_start;
-
- // If there were only space characters to the end of the line, then
+ // Skip any leading space separators
+ const char *arg_start = ::strspn (arg_pos, k_space_separators) + arg_pos;
+
+ // If there were only space separators to the end of the line, then
// we're done.
if (*arg_start == '\0')
break;
+ // Arguments can be split into multiple discongituous pieces,
+ // for example:
+ // "Hello ""World"
+ // this would result in a single argument "Hello World" (without/
+ // the quotes) since the quotes would be removed and there is
+ // not space between the strings. So we need to keep track of the
+ // current start of each argument piece in "arg_piece_start"
+ const char *arg_piece_start = arg_start;
+ arg_pos = arg_piece_start;
+
std::string arg;
- const char *arg_end = NULL;
+ // Since we can have multiple quotes that form a single command
+ // in a command like: "Hello "world'!' (which will make a single
+ // argument "Hello world!") we remember the first quote character
+ // we encounter and use that for the quote character.
+ char first_quote_char = '\0';
+ char quote_char = '\0';
+ bool arg_complete = false;
- switch (*arg_start)
+ do
{
- case '\'':
- case '"':
- case '`':
+ arg_end = ::strcspn (arg_pos, k_space_separators_with_slash_and_quotes) + arg_pos;
+
+ switch (arg_end[0])
{
- // Look for either a quote character, or the backslash
- // character
- const char quote_char = *arg_start;
- char find_chars[3] = { quote_char, '\\' , '\0'};
- bool is_backtick = (quote_char == '`');
- if (quote_char == '"' || quote_char == '`')
- m_args_quote_char.push_back(quote_char);
- else
- m_args_quote_char.push_back('\0');
+ default:
+ assert (!"Unhandled case statement, we must handle this...");
+ break;
- while (*arg_start != '\0')
+ case '\0':
+ // End of C string
+ if (arg_piece_start && arg_piece_start[0])
+ arg.append (arg_piece_start);
+ arg_complete = true;
+ break;
+
+ case '\\':
+ // Backslash character
+ switch (arg_end[1])
{
- arg_end = ::strcspn (arg_start + 1, find_chars) + arg_start + 1;
-
- if (*arg_end == '\0')
- {
- arg.append (arg_start);
+ case '\0':
+ arg.append (arg_piece_start);
+ arg_complete = true;
break;
- }
- // Watch out for quote characters prefixed with '\'
- if (*arg_end == '\\')
+ default:
+ arg_pos = arg_end + 2;
+ break;
+ }
+ break;
+
+ case '"':
+ case '\'':
+ case '`':
+ // Quote characters
+ if (quote_char)
+ {
+ // We found a quote character while inside a quoted
+ // character argument. If it matches our current quote
+ // character, this ends the effect of the quotes. If it
+ // doesn't we ignore it.
+ if (quote_char == arg_end[0])
{
- if (arg_end[1] == quote_char)
+ arg.append (arg_piece_start, arg_end - arg_piece_start);
+ // Clear the quote character and let parsing
+ // continue (we need to watch for things like:
+ // "Hello ""World"
+ // "Hello "World
+ // "Hello "'World'
+ // All of which will result in a single argument "Hello World"
+ quote_char = '\0'; // Note that we are no longer inside quotes
+ arg_pos = arg_end + 1; // Skip the quote character
+ arg_piece_start = arg_pos; // Note we are starting from later in the string
+ }
+ else
+ {
+ // different quote, skip it and keep going
+ arg_pos = arg_end + 1;
+ }
+ }
+ else
+ {
+ // We found the start of a quote scope.
+ // Make sure there isn't a string that predeces
+ // the start of a quote scope like:
+ // Hello" World"
+ // If so, then add the "Hello" to the arg
+ if (arg_end > arg_piece_start)
+ arg.append (arg_piece_start, arg_end - arg_piece_start);
+
+ // Enter into a quote scope
+ quote_char = arg_end[0];
+
+ if (first_quote_char == '\0')
+ first_quote_char = quote_char;
+
+ arg_pos = arg_end;
+
+ if (quote_char != '`')
+ ++arg_pos; // Skip the quote character if it is not a backtick
+
+ arg_piece_start = arg_pos; // Note we are starting from later in the string
+
+ // Skip till the next quote character
+ const char *end_quote = ::strchr (arg_piece_start, quote_char);
+ while (end_quote && end_quote[-1] == '\\')
+ {
+ // Don't skip the quote character if it is
+ // preceded by a '\' character
+ end_quote = ::strchr (end_quote + 1, quote_char);
+ }
+
+ if (end_quote)
+ {
+ if (end_quote > arg_piece_start)
{
- // The character following the '\' is our quote
- // character so strip the backslash character
- arg.append (arg_start, arg_end);
+ // Keep the backtick quote on commands
+ if (quote_char == '`')
+ arg.append (arg_piece_start, end_quote + 1 - arg_piece_start);
+ else
+ arg.append (arg_piece_start, end_quote - arg_piece_start);
+ }
+
+ // If the next character is a space or the end of
+ // string, this argument is complete...
+ if (end_quote[1] == ' ' || end_quote[1] == '\t' || end_quote[1] == '\0')
+ {
+ arg_complete = true;
+ arg_end = end_quote + 1;
}
else
{
- // The character following the '\' is NOT our
- // quote character, so include the backslash
- // and continue
- arg.append (arg_start, arg_end + 1);
+ arg_pos = end_quote + 1;
+ arg_piece_start = arg_pos;
}
- arg_start = arg_end + 1;
- continue;
- }
- else
- {
- arg.append (arg_start, arg_end + 1);
- next_arg_start = arg_end + 1;
- break;
+ quote_char = '\0';
}
}
+ break;
- // Skip single and double quotes, but leave backtick quotes
- if (!is_backtick)
+ case ' ':
+ case '\t':
+ if (quote_char)
{
- char first_c = arg[0];
- arg.erase(0,1);
- // Only erase the last character if it is the same as the first.
- // Otherwise, we're parsing an incomplete command line, and we
- // would be stripping off the last character of that string.
- if (arg[arg.size() - 1] == first_c)
- arg.erase(arg.size() - 1, 1);
+ // We are currently processing a quoted character and found
+ // a space character, skip any spaces and keep trying to find
+ // the end of the argument.
+ arg_pos = ::strspn (arg_end, k_space_separators) + arg_end;
}
- }
- break;
- default:
- {
- m_args_quote_char.push_back('\0');
- // Look for the next non-escaped space character
- while (*arg_start != '\0')
+ else
{
- arg_end = ::strcspn (arg_start, k_space_characters_with_slash) + arg_start;
-
- if (arg_end == NULL)
- {
- arg.append(arg_start);
- break;
- }
-
- if (*arg_end == '\\')
- {
- // Append up to the '\' char
- arg.append (arg_start, arg_end);
-
- if (arg_end[1] == '\0')
- break;
-
- // Append the character following the '\' if it isn't
- // the end of the string
- arg.append (1, arg_end[1]);
- arg_start = arg_end + 2;
- continue;
- }
- else
- {
- arg.append (arg_start, arg_end);
- next_arg_start = arg_end;
- break;
- }
+ // We are not inside any quotes, we just found a space after an
+ // argument
+ if (arg_end > arg_piece_start)
+ arg.append (arg_piece_start, arg_end - arg_piece_start);
+ arg_complete = true;
}
+ break;
}
- break;
- }
+ } while (!arg_complete);
m_args.push_back(arg);
+ m_args_quote_char.push_back (first_quote_char);
}
+ UpdateArgvFromArgs();
}
- UpdateArgvFromArgs();
+ Dump (&s);
}
void
@@ -309,6 +366,9 @@
for (pos = m_args.begin(); pos != end; ++pos)
m_argv.push_back(pos->c_str());
m_argv.push_back(NULL);
+ // Make sure we have enough arg quote chars in the array
+ if (m_args_quote_char.size() < m_args.size())
+ m_args_quote_char.resize (m_argv.size());
}
size_t
@@ -359,7 +419,8 @@
{
m_argv.erase(m_argv.begin());
m_args.pop_front();
- m_args_quote_char.erase(m_args_quote_char.begin());
+ if (!m_args_quote_char.empty())
+ m_args_quote_char.erase(m_args_quote_char.begin());
}
}
@@ -399,8 +460,13 @@
pos = m_args.insert(pos, arg_cstr);
-
- m_args_quote_char.insert(m_args_quote_char.begin() + idx, quote_char);
+ if (idx >= m_args_quote_char.size())
+ {
+ m_args_quote_char.resize(idx + 1);
+ m_args_quote_char[idx] = quote_char;
+ }
+ else
+ m_args_quote_char.insert(m_args_quote_char.begin() + idx, quote_char);
UpdateArgvFromArgs();
return GetArgumentAtIndex(idx);
@@ -422,6 +488,8 @@
pos->assign(arg_cstr);
assert(idx < m_argv.size() - 1);
m_argv[idx] = pos->c_str();
+ if (idx >= m_args_quote_char.size())
+ m_args_quote_char.resize(idx + 1);
m_args_quote_char[idx] = quote_char;
return GetArgumentAtIndex(idx);
}
@@ -444,7 +512,8 @@
m_args.erase (pos);
assert(idx < m_argv.size() - 1);
m_argv.erase(m_argv.begin() + idx);
- m_args_quote_char.erase(m_args_quote_char.begin() + idx);
+ if (idx < m_args_quote_char.size())
+ m_args_quote_char.erase(m_args_quote_char.begin() + idx);
}
}
@@ -462,7 +531,7 @@
for (i=0; i<argc; ++i)
{
m_args.push_back (argv[i]);
- if ((argv[i][0] == '"') || (argv[i][0] == '`'))
+ if ((argv[i][0] == '\'') || (argv[i][0] == '"') || (argv[i][0] == '`'))
m_args_quote_char.push_back (argv[i][0]);
else
m_args_quote_char.push_back ('\0');