- djm@cvs.openbsd.org 2007/10/24 03:30:02
     [sftp.c]
     rework argument splitting and parsing to cope correctly with common
     shell escapes and make handling of escaped characters consistent
     with sh(1) and between sftp commands (especially between ones that
     glob their arguments and ones that don't).
     parse command flags using getopt(3) rather than hand-rolled parsers.
     ok dtucker@
diff --git a/sftp.c b/sftp.c
index f0d5dd5..e811387 100644
--- a/sftp.c
+++ b/sftp.c
@@ -1,4 +1,4 @@
-/* $OpenBSD: sftp.c,v 1.96 2007/01/03 04:09:15 stevesk Exp $ */
+/* $OpenBSD: sftp.c,v 1.97 2007/10/24 03:30:02 djm Exp $ */
 /*
  * Copyright (c) 2001-2004 Damien Miller <djm@openbsd.org>
  *
@@ -26,6 +26,7 @@
 #include <sys/socket.h>
 #include <sys/wait.h>
 
+#include <ctype.h>
 #include <errno.h>
 
 #ifdef HAVE_PATHS_H
@@ -346,144 +347,78 @@
 }
 
 static int
-parse_getput_flags(const char **cpp, int *pflag)
+parse_getput_flags(const char *cmd, char **argv, int argc, int *pflag)
 {
-	const char *cp = *cpp;
+	extern int optind, optreset, opterr;
+	int ch;
 
-	/* Check for flags */
-	if (cp[0] == '-' && cp[1] && strchr(WHITESPACE, cp[2])) {
-		switch (cp[1]) {
+	optind = optreset = 1;
+	opterr = 0;
+
+	*pflag = 0;
+	while ((ch = getopt(argc, argv, "Pp")) != -1) {
+		switch (ch) {
 		case 'p':
 		case 'P':
 			*pflag = 1;
 			break;
 		default:
-			error("Invalid flag -%c", cp[1]);
-			return(-1);
+			error("%s: Invalid flag -%c", cmd, ch);
+			return -1;
 		}
-		cp += 2;
-		*cpp = cp + strspn(cp, WHITESPACE);
 	}
 
-	return(0);
+	return optind;
 }
 
 static int
-parse_ls_flags(const char **cpp, int *lflag)
+parse_ls_flags(char **argv, int argc, int *lflag)
 {
-	const char *cp = *cpp;
+	extern int optind, optreset, opterr;
+	int ch;
 
-	/* Defaults */
+	optind = optreset = 1;
+	opterr = 0;
+
 	*lflag = LS_NAME_SORT;
-
-	/* Check for flags */
-	if (cp++[0] == '-') {
-		for (; strchr(WHITESPACE, *cp) == NULL; cp++) {
-			switch (*cp) {
-			case 'l':
-				*lflag &= ~VIEW_FLAGS;
-				*lflag |= LS_LONG_VIEW;
-				break;
-			case '1':
-				*lflag &= ~VIEW_FLAGS;
-				*lflag |= LS_SHORT_VIEW;
-				break;
-			case 'n':
-				*lflag &= ~VIEW_FLAGS;
-				*lflag |= LS_NUMERIC_VIEW|LS_LONG_VIEW;
-				break;
-			case 'S':
-				*lflag &= ~SORT_FLAGS;
-				*lflag |= LS_SIZE_SORT;
-				break;
-			case 't':
-				*lflag &= ~SORT_FLAGS;
-				*lflag |= LS_TIME_SORT;
-				break;
-			case 'r':
-				*lflag |= LS_REVERSE_SORT;
-				break;
-			case 'f':
-				*lflag &= ~SORT_FLAGS;
-				break;
-			case 'a':
-				*lflag |= LS_SHOW_ALL;
-				break;
-			default:
-				error("Invalid flag -%c", *cp);
-				return(-1);
-			}
+	while ((ch = getopt(argc, argv, "1Saflnrt")) != -1) {
+		switch (ch) {
+		case '1':
+			*lflag &= ~VIEW_FLAGS;
+			*lflag |= LS_SHORT_VIEW;
+			break;
+		case 'S':
+			*lflag &= ~SORT_FLAGS;
+			*lflag |= LS_SIZE_SORT;
+			break;
+		case 'a':
+			*lflag |= LS_SHOW_ALL;
+			break;
+		case 'f':
+			*lflag &= ~SORT_FLAGS;
+			break;
+		case 'l':
+			*lflag &= ~VIEW_FLAGS;
+			*lflag |= LS_LONG_VIEW;
+			break;
+		case 'n':
+			*lflag &= ~VIEW_FLAGS;
+			*lflag |= LS_NUMERIC_VIEW|LS_LONG_VIEW;
+			break;
+		case 'r':
+			*lflag |= LS_REVERSE_SORT;
+			break;
+		case 't':
+			*lflag &= ~SORT_FLAGS;
+			*lflag |= LS_TIME_SORT;
+			break;
+		default:
+			error("ls: Invalid flag -%c", ch);
+			return -1;
 		}
-		*cpp = cp + strspn(cp, WHITESPACE);
 	}
 
-	return(0);
-}
-
-static int
-get_pathname(const char **cpp, char **path)
-{
-	const char *cp = *cpp, *end;
-	char quot;
-	u_int i, j;
-
-	cp += strspn(cp, WHITESPACE);
-	if (!*cp) {
-		*cpp = cp;
-		*path = NULL;
-		return (0);
-	}
-
-	*path = xmalloc(strlen(cp) + 1);
-
-	/* Check for quoted filenames */
-	if (*cp == '\"' || *cp == '\'') {
-		quot = *cp++;
-
-		/* Search for terminating quote, unescape some chars */
-		for (i = j = 0; i <= strlen(cp); i++) {
-			if (cp[i] == quot) {	/* Found quote */
-				i++;
-				(*path)[j] = '\0';
-				break;
-			}
-			if (cp[i] == '\0') {	/* End of string */
-				error("Unterminated quote");
-				goto fail;
-			}
-			if (cp[i] == '\\') {	/* Escaped characters */
-				i++;
-				if (cp[i] != '\'' && cp[i] != '\"' &&
-				    cp[i] != '\\') {
-					error("Bad escaped character '\\%c'",
-					    cp[i]);
-					goto fail;
-				}
-			}
-			(*path)[j++] = cp[i];
-		}
-
-		if (j == 0) {
-			error("Empty quotes");
-			goto fail;
-		}
-		*cpp = cp + i + strspn(cp + i, WHITESPACE);
-	} else {
-		/* Read to end of filename */
-		end = strpbrk(cp, WHITESPACE);
-		if (end == NULL)
-			end = strchr(cp, '\0');
-		*cpp = end + strspn(end, WHITESPACE);
-
-		memcpy(*path, cp, end - cp);
-		(*path)[end - cp] = '\0';
-	}
-	return (0);
-
- fail:
-	xfree(*path);
-	*path = NULL;
-	return (-1);
+	return optind;
 }
 
 static int
@@ -866,15 +801,189 @@
 	return (0);
 }
 
+/*
+ * Undo escaping of glob sequences in place. Used to undo extra escaping
+ * applied in makeargv() when the string is destined for a function that
+ * does not glob it.
+ */
+static void
+undo_glob_escape(char *s)
+{
+	size_t i, j;
+
+	for (i = j = 0;;) {
+		if (s[i] == '\0') {
+			s[j] = '\0';
+			return;
+		}
+		if (s[i] != '\\') {
+			s[j++] = s[i++];
+			continue;
+		}
+		/* s[i] == '\\' */
+		++i;
+		switch (s[i]) {
+		case '?':
+		case '[':
+		case '*':
+		case '\\':
+			s[j++] = s[i++];
+			break;
+		case '\0':
+			s[j++] = '\\';
+			s[j] = '\0';
+			return;
+		default:
+			s[j++] = '\\';
+			s[j++] = s[i++];
+			break;
+		}
+	}
+}
+
+/*
+ * Split a string into an argument vector using sh(1)-style quoting,
+ * comment and escaping rules, but with some tweaks to handle glob(3)
+ * wildcards.
+ * Returns NULL on error or a NULL-terminated array of arguments.
+ */
+#define MAXARGS 	128
+#define MAXARGLEN	8192
+static char **
+makeargv(const char *arg, int *argcp)
+{
+	int argc, quot;
+	size_t i, j;
+	static char argvs[MAXARGLEN];
+	static char *argv[MAXARGS + 1];
+	enum { MA_START, MA_SQUOTE, MA_DQUOTE, MA_UNQUOTED } state, q;
+
+	*argcp = argc = 0;
+	if (strlen(arg) > sizeof(argvs) - 1) {
+ args_too_longs:
+		error("string too long");
+		return NULL;
+	}
+	state = MA_START;
+	i = j = 0;
+	for (;;) {
+		if (isspace(arg[i])) {
+			if (state == MA_UNQUOTED) {
+				/* Terminate current argument */
+				argvs[j++] = '\0';
+				argc++;
+				state = MA_START;
+			} else if (state != MA_START)
+				argvs[j++] = arg[i];
+		} else if (arg[i] == '"' || arg[i] == '\'') {
+			q = arg[i] == '"' ? MA_DQUOTE : MA_SQUOTE;
+			if (state == MA_START) {
+				argv[argc] = argvs + j;
+				state = q;
+			} else if (state == MA_UNQUOTED) 
+				state = q;
+			else if (state == q)
+				state = MA_UNQUOTED;
+			else
+				argvs[j++] = arg[i];
+		} else if (arg[i] == '\\') {
+			if (state == MA_SQUOTE || state == MA_DQUOTE) {
+				quot = state == MA_SQUOTE ? '\'' : '"';
+				/* Unescape quote we are in */
+				/* XXX support \n and friends? */
+				if (arg[i + 1] == quot) {
+					i++;
+					argvs[j++] = arg[i];
+				} else if (arg[i + 1] == '?' ||
+				    arg[i + 1] == '[' || arg[i + 1] == '*') {
+					/*
+					 * Special case for sftp: append
+					 * double-escaped glob sequence -
+					 * glob will undo one level of
+					 * escaping. NB. string can grow here.
+					 */
+					if (j >= sizeof(argvs) - 5)
+						goto args_too_longs;
+					argvs[j++] = '\\';
+					argvs[j++] = arg[i++];
+					argvs[j++] = '\\';
+					argvs[j++] = arg[i];
+				} else {
+					argvs[j++] = arg[i++];
+					argvs[j++] = arg[i];
+				}
+			} else {
+				if (state == MA_START) {
+					argv[argc] = argvs + j;
+					state = MA_UNQUOTED;
+				}
+				if (arg[i + 1] == '?' || arg[i + 1] == '[' ||
+				    arg[i + 1] == '*' || arg[i + 1] == '\\') {
+					/*
+					 * Special case for sftp: append
+					 * escaped glob sequence -
+					 * glob will undo one level of
+					 * escaping.
+					 */
+					argvs[j++] = arg[i++];
+					argvs[j++] = arg[i];
+				} else {
+					/* Unescape everything */
+					/* XXX support \n and friends? */
+					i++;
+					argvs[j++] = arg[i];
+				}
+			}
+		} else if (arg[i] == '#') {
+			if (state == MA_SQUOTE || state == MA_DQUOTE)
+				argvs[j++] = arg[i];
+			else
+				goto string_done;
+		} else if (arg[i] == '\0') {
+			if (state == MA_SQUOTE || state == MA_DQUOTE) {
+				error("Unterminated quoted argument");
+				return NULL;
+			}
+ string_done:
+			if (state == MA_UNQUOTED) {
+				argvs[j++] = '\0';
+				argc++;
+			}
+			break;
+		} else {
+			if (state == MA_START) {
+				argv[argc] = argvs + j;
+				state = MA_UNQUOTED;
+			}
+			if ((state == MA_SQUOTE || state == MA_DQUOTE) &&
+			    (arg[i] == '?' || arg[i] == '[' || arg[i] == '*')) {
+				/*
+				 * Special case for sftp: escape quoted
+				 * glob(3) wildcards. NB. string can grow
+				 * here.
+				 */
+				if (j >= sizeof(argvs) - 3)
+					goto args_too_longs;
+				argvs[j++] = '\\';
+				argvs[j++] = arg[i];
+			} else
+				argvs[j++] = arg[i];
+		}
+		i++;
+	}
+	*argcp = argc;
+	return argv;
+}
+
 static int
 parse_args(const char **cpp, int *pflag, int *lflag, int *iflag,
     unsigned long *n_arg, char **path1, char **path2)
 {
 	const char *cmd, *cp = *cpp;
-	char *cp2;
+	char *cp2, **argv;
 	int base = 0;
 	long l;
-	int i, cmdnum;
+	int i, cmdnum, optidx, argc;
 
 	/* Skip leading whitespace */
 	cp = cp + strspn(cp, WHITESPACE);
@@ -890,17 +999,13 @@
 		cp++;
 	}
 
-	/* Figure out which command we have */
-	for (i = 0; cmds[i].c; i++) {
-		int cmdlen = strlen(cmds[i].c);
+	if ((argv = makeargv(cp, &argc)) == NULL)
+		return -1;
 
-		/* Check for command followed by whitespace */
-		if (!strncasecmp(cp, cmds[i].c, cmdlen) &&
-		    strchr(WHITESPACE, cp[cmdlen])) {
-			cp += cmdlen;
-			cp = cp + strspn(cp, WHITESPACE);
+	/* Figure out which command we have */
+	for (i = 0; cmds[i].c != NULL; i++) {
+		if (strcasecmp(cmds[i].c, argv[0]) == 0)
 			break;
-		}
 	}
 	cmdnum = cmds[i].n;
 	cmd = cmds[i].c;
@@ -911,40 +1016,44 @@
 		cmdnum = I_SHELL;
 	} else if (cmdnum == -1) {
 		error("Invalid command.");
-		return (-1);
+		return -1;
 	}
 
 	/* Get arguments and parse flags */
 	*lflag = *pflag = *n_arg = 0;
 	*path1 = *path2 = NULL;
+	optidx = 1;
 	switch (cmdnum) {
 	case I_GET:
 	case I_PUT:
-		if (parse_getput_flags(&cp, pflag))
-			return(-1);
+		if ((optidx = parse_getput_flags(cmd, argv, argc, pflag)) == -1)
+			return -1;
 		/* Get first pathname (mandatory) */
-		if (get_pathname(&cp, path1))
-			return(-1);
-		if (*path1 == NULL) {
+		if (argc - optidx < 1) {
 			error("You must specify at least one path after a "
 			    "%s command.", cmd);
-			return(-1);
+			return -1;
 		}
-		/* Try to get second pathname (optional) */
-		if (get_pathname(&cp, path2))
-			return(-1);
+		*path1 = xstrdup(argv[optidx]);
+		/* Get second pathname (optional) */
+		if (argc - optidx > 1) {
+			*path2 = xstrdup(argv[optidx + 1]);
+			/* Destination is not globbed */
+			undo_glob_escape(*path2);
+		}
 		break;
 	case I_RENAME:
 	case I_SYMLINK:
-		if (get_pathname(&cp, path1))
-			return(-1);
-		if (get_pathname(&cp, path2))
-			return(-1);
-		if (!*path1 || !*path2) {
+		if (argc - optidx < 2) {
 			error("You must specify two paths after a %s "
 			    "command.", cmd);
-			return(-1);
+			return -1;
 		}
+		*path1 = xstrdup(argv[optidx]);
+		*path2 = xstrdup(argv[optidx + 1]);
+		/* Paths are not globbed */
+		undo_glob_escape(*path1);
+		undo_glob_escape(*path2);
 		break;
 	case I_RM:
 	case I_MKDIR:
@@ -953,59 +1062,55 @@
 	case I_LCHDIR:
 	case I_LMKDIR:
 		/* Get pathname (mandatory) */
-		if (get_pathname(&cp, path1))
-			return(-1);
-		if (*path1 == NULL) {
+		if (argc - optidx < 1) {
 			error("You must specify a path after a %s command.",
 			    cmd);
-			return(-1);
+			return -1;
 		}
+		*path1 = xstrdup(argv[optidx]);
+		/* Only "rm" globs */
+		if (cmdnum != I_RM)
+			undo_glob_escape(*path1);
 		break;
 	case I_LS:
-		if (parse_ls_flags(&cp, lflag))
+		if ((optidx = parse_ls_flags(argv, argc, lflag)) == -1)
 			return(-1);
 		/* Path is optional */
-		if (get_pathname(&cp, path1))
-			return(-1);
+		if (argc - optidx > 0)
+			*path1 = xstrdup(argv[optidx]);
 		break;
 	case I_LLS:
 	case I_SHELL:
 		/* Uses the rest of the line */
 		break;
 	case I_LUMASK:
-		base = 8;
 	case I_CHMOD:
 		base = 8;
 	case I_CHOWN:
 	case I_CHGRP:
 		/* Get numeric arg (mandatory) */
+		if (argc - optidx < 1)
+			goto need_num_arg;
 		errno = 0;
-		l = strtol(cp, &cp2, base);
-		if (cp2 == cp || ((l == LONG_MIN || l == LONG_MAX) &&
-		    errno == ERANGE) || l < 0) {
+		l = strtol(argv[optidx], &cp2, base);
+		if (cp2 == argv[optidx] || *cp2 != '\0' ||
+		    ((l == LONG_MIN || l == LONG_MAX) && errno == ERANGE) ||
+		    l < 0) {
+ need_num_arg:
 			error("You must supply a numeric argument "
 			    "to the %s command.", cmd);
-			return(-1);
+			return -1;
 		}
-		cp = cp2;
 		*n_arg = l;
-		if (cmdnum == I_LUMASK && strchr(WHITESPACE, *cp))
+		if (cmdnum == I_LUMASK)
 			break;
-		if (cmdnum == I_LUMASK || !strchr(WHITESPACE, *cp)) {
-			error("You must supply a numeric argument "
-			    "to the %s command.", cmd);
-			return(-1);
-		}
-		cp += strspn(cp, WHITESPACE);
-
 		/* Get pathname (mandatory) */
-		if (get_pathname(&cp, path1))
-			return(-1);
-		if (*path1 == NULL) {
+		if (argc - optidx < 2) {
 			error("You must specify a path after a %s command.",
 			    cmd);
-			return(-1);
+			return -1;
 		}
+		*path1 = xstrdup(argv[optidx + 1]);
 		break;
 	case I_QUIT:
 	case I_PWD: