change our tokenize to yield empty tokens
Our tokenize function currently skips empty tokens. This means we
incorrectly accept invalid syntax with our seccomp filter like:
close: arg0 == 1 |||||| arg0 == 2
Change the tokenizer helper to yield an empty string in this case so
we can correctly detect & reject these. We don't have any scenarios
where we actually want to allow empty strings currently either (and
if we did, the callers could check themselves).
Bug: None
Test: unittests pass
Change-Id: I282e4e4544a24c0e5a7036b693429bdd209339cf
diff --git a/syscall_filter_unittest.cc b/syscall_filter_unittest.cc
index 6bc044d..db01fbb 100644
--- a/syscall_filter_unittest.cc
+++ b/syscall_filter_unittest.cc
@@ -1261,6 +1261,19 @@
ASSERT_NE(res, 0);
}
+TEST(FilterTest, invalid_tokens) {
+ struct sock_fprog actual;
+ const char *policy = "read: arg0 == 1 |||| arg0 == 2\n";
+
+ FILE *policy_file = write_policy_to_pipe(policy, strlen(policy));
+ ASSERT_NE(policy_file, nullptr);
+
+ int res =
+ compile_filter("policy", policy_file, &actual, USE_RET_KILL, NO_LOGGING);
+ fclose(policy_file);
+ ASSERT_NE(res, 0);
+}
+
TEST(FilterTest, nonexistent) {
struct sock_fprog actual;
int res = compile_filter("policy", NULL, &actual, USE_RET_KILL, NO_LOGGING);
diff --git a/util.c b/util.c
index 14c028a..9bb37ca 100644
--- a/util.c
+++ b/util.c
@@ -257,8 +257,8 @@
{
char *ret = NULL;
- /* If the string is NULL or empty, there are no tokens to be found. */
- if (stringp == NULL || *stringp == NULL || **stringp == '\0')
+ /* If the string is NULL, there are no tokens to be found. */
+ if (stringp == NULL || *stringp == NULL)
return NULL;
/*
@@ -271,33 +271,19 @@
return ret;
}
- char *found;
- while (**stringp != '\0') {
- found = strstr(*stringp, delim);
-
- if (!found) {
- /*
- * The delimiter was not found, so the full string
- * makes up the only token, and we're done.
- */
- ret = *stringp;
- *stringp = NULL;
- break;
- }
-
- if (found != *stringp) {
- /* There's a non-empty token before the delimiter. */
- *found = '\0';
- ret = *stringp;
- *stringp = found + strlen(delim);
- break;
- }
-
+ char *found = strstr(*stringp, delim);
+ if (!found) {
/*
- * The delimiter was found at the start of the string,
- * skip it and keep looking for a non-empty token.
+ * The delimiter was not found, so the full string
+ * makes up the only token, and we're done.
*/
- *stringp += strlen(delim);
+ ret = *stringp;
+ *stringp = NULL;
+ } else {
+ /* There's a token here, possibly empty. That's OK. */
+ *found = '\0';
+ ret = *stringp;
+ *stringp = found + strlen(delim);
}
return ret;
diff --git a/util.h b/util.h
index 9ec88ce..7ff86b8 100644
--- a/util.h
+++ b/util.h
@@ -83,6 +83,18 @@
int parse_size(size_t *size, const char *sizespec);
char *strip(char *s);
+
+/*
+ * tokenize: locate the next token in @stringp using the @delim
+ * @stringp A pointer to the string to scan for tokens
+ * @delim The delimiter to split by
+ *
+ * Note that, unlike strtok, @delim is not a set of characters, but the full
+ * delimiter. e.g. "a,;b,;c" with a delim of ",;" will yield ["a","b","c"].
+ *
+ * Note that, unlike strtok, this may return an empty token. e.g. "a,,b" with
+ * strtok will yield ["a","b"], but this will yield ["a","","b"].
+ */
char *tokenize(char **stringp, const char *delim);
char *path_join(const char *external_path, const char *internal_path);
diff --git a/util_unittest.cc b/util_unittest.cc
index b5cdff7..ec3d714 100644
--- a/util_unittest.cc
+++ b/util_unittest.cc
@@ -65,3 +65,29 @@
ASSERT_EQ(nullptr, p);
ASSERT_EQ(nullptr, tokenize(&p, ","));
}
+
+// Check edge case with an empty string.
+TEST(tokenize, empty_string) {
+ char str[] = "";
+ char *p = str;
+ ASSERT_EQ("", std::string(tokenize(&p, ",")));
+ ASSERT_EQ(nullptr, p);
+ ASSERT_EQ(nullptr, tokenize(&p, ","));
+}
+
+// Check behavior with empty tokens at the start/middle/end.
+TEST(tokenize, empty_tokens) {
+ char str[] = ",,a,b,,,c,,";
+ char *p = str;
+ ASSERT_EQ("", std::string(tokenize(&p, ",")));
+ ASSERT_EQ("", std::string(tokenize(&p, ",")));
+ ASSERT_EQ("a", std::string(tokenize(&p, ",")));
+ ASSERT_EQ("b", std::string(tokenize(&p, ",")));
+ ASSERT_EQ("", std::string(tokenize(&p, ",")));
+ ASSERT_EQ("", std::string(tokenize(&p, ",")));
+ ASSERT_EQ("c", std::string(tokenize(&p, ",")));
+ ASSERT_EQ("", std::string(tokenize(&p, ",")));
+ ASSERT_EQ("", std::string(tokenize(&p, ",")));
+ ASSERT_EQ(nullptr, p);
+ ASSERT_EQ(nullptr, tokenize(&p, ","));
+}