Use SECCOMP_RET_TRAP when setting thread sync.

SECCOMP_RET_KILL will only kill the offending thread -- it's equivalent
to having the thread call syscall(SYS_exit, SIGSYS). This is explicitly
*not* the same as exit_group(2), so other threads in the thread group
will not be killed.

When setting thread sync, we normally would expect all threads in the
thread group to be killed. To do this, use SECCOMP_RET_TRAP and reset
the signal disposition for SIGSYS to its default value, which is to
abort and dump core (see signal(7)).

There was also a small bug related to seccomp_can_softfail(), where we
were never using seccomp even when it was available.

Bug: 31862018
Test: Manual with multi-threaded program.

Change-Id: I4a10d256b0ba1b15041d46c22bd45b445f8ef3f7
diff --git a/libminijail.c b/libminijail.c
index 5513b19..e837414 100644
--- a/libminijail.c
+++ b/libminijail.c
@@ -139,7 +139,7 @@
 		int no_new_privs:1;
 		int seccomp_filter:1;
 		int seccomp_filter_tsync:1;
-		int log_seccomp_filter:1;
+		int seccomp_filter_logging:1;
 		int chroot:1;
 		int pivot_root:1;
 		int mount_tmp:1;
@@ -351,12 +351,20 @@
 
 void API minijail_set_seccomp_filter_tsync(struct minijail *j)
 {
+	if (j->filter_len > 0 && j->filter_prog != NULL) {
+		die("minijail_set_seccomp_filter_tsync() must be called "
+		    "before minijail_parse_seccomp_filters()");
+	}
 	j->flags.seccomp_filter_tsync = 1;
 }
 
 void API minijail_log_seccomp_filter_failures(struct minijail *j)
 {
-	j->flags.log_seccomp_filter = 1;
+	if (j->filter_len > 0 && j->filter_prog != NULL) {
+		die("minijail_log_seccomp_filter_failures() must be called "
+		    "before minijail_parse_seccomp_filters()");
+	}
+	j->flags.seccomp_filter_logging = 1;
 }
 
 void API minijail_use_caps(struct minijail *j, uint64_t capmask)
@@ -673,7 +681,7 @@
 {
 	j->flags.seccomp_filter = 0;
 	j->flags.seccomp_filter_tsync = 0;
-	j->flags.log_seccomp_filter = 0;
+	j->flags.seccomp_filter_logging = 0;
 	j->filter_len = 0;
 	j->filter_prog = NULL;
 	j->flags.no_new_privs = 0;
@@ -705,17 +713,15 @@
 		if (sys_seccomp(SECCOMP_SET_MODE_FILTER,
 				SECCOMP_FILTER_FLAG_TSYNC, NULL) == -1) {
 			int saved_errno = errno;
-			if (seccomp_can_softfail()) {
-				if (saved_errno == ENOSYS) {
-					warn(
-					    "seccomp(2) syscall not supported");
-					clear_seccomp_options(j);
-				}
-				if (saved_errno == EINVAL) {
-					warn("seccomp filter thread sync not "
-					     "supported");
-					clear_seccomp_options(j);
-				}
+			if (saved_errno == ENOSYS && seccomp_can_softfail()) {
+				warn("seccomp(2) syscall not supported");
+				clear_seccomp_options(j);
+				return 0;
+			} else if (saved_errno == EINVAL &&
+				   seccomp_can_softfail()) {
+				warn(
+				    "seccomp filter thread sync not supported");
+				clear_seccomp_options(j);
 				return 0;
 			}
 			/*
@@ -732,7 +738,11 @@
 static int parse_seccomp_filters(struct minijail *j, FILE *policy_file)
 {
 	struct sock_fprog *fprog = malloc(sizeof(struct sock_fprog));
-	if (compile_filter(policy_file, fprog, j->flags.log_seccomp_filter)) {
+	int use_ret_trap =
+	    j->flags.seccomp_filter_tsync || j->flags.seccomp_filter_logging;
+	int allow_logging = j->flags.seccomp_filter_logging;
+
+	if (compile_filter(policy_file, fprog, use_ret_trap, allow_logging)) {
 		free(fprog);
 		return -1;
 	}
@@ -1462,14 +1472,25 @@
 		return;
 	}
 
-	/*
-	 * If we're logging seccomp filter failures,
-	 * install the SIGSYS handler first.
-	 */
-	if (j->flags.seccomp_filter && j->flags.log_seccomp_filter) {
-		if (install_sigsys_handler())
-			pdie("install SIGSYS handler");
-		warn("logging seccomp filter failures");
+	if (j->flags.seccomp_filter) {
+		if (j->flags.seccomp_filter_logging) {
+			/*
+			 * If logging seccomp filter failures,
+			 * install the SIGSYS handler first.
+			 */
+			if (install_sigsys_handler())
+				pdie("failed to install SIGSYS handler");
+			warn("logging seccomp filter failures");
+		} else if (j->flags.seccomp_filter_tsync) {
+			/*
+			 * If setting thread sync,
+			 * reset the SIGSYS signal handler so that
+			 * the entire thread group is killed.
+			 */
+			if (signal(SIGSYS, SIG_DFL) == SIG_ERR)
+				pdie("failed to reset SIGSYS disposition");
+			info("reset SIGSYS disposition");
+		}
 	}
 
 	/*
diff --git a/syscall_filter.c b/syscall_filter.c
index 0e3d7b3..37b8136 100644
--- a/syscall_filter.c
+++ b/syscall_filter.c
@@ -130,7 +130,7 @@
 	append_filter_block(head, filter, len);
 }
 
-void allow_log_syscalls(struct filter_block *head)
+void allow_logging_syscalls(struct filter_block *head)
 {
 	unsigned int i;
 	for (i = 0; i < log_syscalls_len; i++) {
@@ -219,8 +219,7 @@
 	return 0;
 }
 
-int compile_errno(struct filter_block *head, char *ret_errno,
-		  int log_failures)
+int compile_errno(struct filter_block *head, char *ret_errno, int use_ret_trap)
 {
 	char *errno_ptr;
 
@@ -240,7 +239,7 @@
 
 		append_ret_errno(head, errno_val);
 	} else {
-		if (!log_failures)
+		if (!use_ret_trap)
 			append_ret_kill(head);
 		else
 			append_ret_trap(head);
@@ -251,7 +250,7 @@
 struct filter_block *compile_section(int nr, const char *policy_line,
 				     unsigned int entry_lbl_id,
 				     struct bpf_labels *labels,
-				     int log_failures)
+				     int use_ret_trap)
 {
 	/*
 	 * |policy_line| should be an expression of the form:
@@ -313,7 +312,7 @@
 
 	/* Checks whether we're unconditionally blocking this syscall. */
 	if (strncmp(line, "return", strlen("return")) == 0) {
-		if (compile_errno(head, line, log_failures) < 0)
+		if (compile_errno(head, line, use_ret_trap) < 0)
 			return NULL;
 		free(line);
 		return head;
@@ -361,10 +360,10 @@
 	 * otherwise just kill the task.
 	 */
 	if (ret_errno) {
-		if (compile_errno(head, ret_errno, log_failures) < 0)
+		if (compile_errno(head, ret_errno, use_ret_trap) < 0)
 			return NULL;
 	} else {
-		if (!log_failures)
+		if (!use_ret_trap)
 			append_ret_kill(head);
 		else
 			append_ret_trap(head);
@@ -384,7 +383,8 @@
 	return head;
 }
 
-int compile_filter(FILE *policy_file, struct sock_fprog *prog, int log_failures)
+int compile_filter(FILE *policy_file, struct sock_fprog *prog, int use_ret_trap,
+		   int allow_logging)
 {
 	char line[MAX_LINE_LENGTH];
 	int line_count = 0;
@@ -408,9 +408,9 @@
 	len = bpf_load_syscall_nr(load_nr);
 	append_filter_block(head, load_nr, len);
 
-	/* If we're logging failures, allow the necessary syscalls first. */
-	if (log_failures)
-		allow_log_syscalls(head);
+	/* If logging failures, allow the necessary syscalls first. */
+	if (allow_logging)
+		allow_logging_syscalls(head);
 
 	/*
 	 * Loop through all the lines in the policy file.
@@ -439,7 +439,7 @@
 		if (nr < 0) {
 			warn("compile_filter: nonexistent syscall '%s'",
 			     syscall_name);
-			if (log_failures) {
+			if (allow_logging) {
 				/*
 				 * If we're logging failures, assume we're in a
 				 * debugging case and continue.
@@ -479,7 +479,7 @@
 			/* Build the arg filter block. */
 			struct filter_block *block =
 			    compile_section(nr, policy_line, id, &labels,
-					    log_failures);
+					    use_ret_trap);
 
 			if (!block)
 				return -1;
@@ -493,10 +493,10 @@
 	}
 
 	/*
-	 * If none of the syscalls match, either fall back to KILL,
+	 * If none of the syscalls match, either fall through to KILL,
 	 * or return TRAP.
 	 */
-	if (!log_failures)
+	if (!use_ret_trap)
 		append_ret_kill(head);
 	else
 		append_ret_trap(head);
diff --git a/syscall_filter.h b/syscall_filter.h
index 09c6550..4625d80 100644
--- a/syscall_filter.h
+++ b/syscall_filter.h
@@ -11,9 +11,6 @@
 
 #include "bpf.h"
 
-#define NO_LOGGING  0
-#define USE_LOGGING 1
-
 #ifdef __cplusplus
 extern "C" {
 #endif
@@ -32,8 +29,9 @@
 struct filter_block *compile_section(int nr, const char *policy_line,
 				     unsigned int label_id,
 				     struct bpf_labels *labels,
-				     int log_failures);
-int compile_filter(FILE *policy_file, struct sock_fprog *prog, int log_failures);
+				     int do_ret_trap);
+int compile_filter(FILE *policy_file, struct sock_fprog *prog, int do_ret_trap,
+		   int add_logging_syscalls);
 
 int flatten_block_list(struct filter_block *head, struct sock_filter *filter,
 		       size_t index, size_t cap);
diff --git a/syscall_filter_unittest.c b/syscall_filter_unittest.c
index 603479e..7b4b7d6 100644
--- a/syscall_filter_unittest.c
+++ b/syscall_filter_unittest.c
@@ -645,7 +645,8 @@
 	FILE *policy_file = write_policy_to_pipe(policy, strlen(policy));
 	ASSERT_NE(policy_file, NULL);
 
-	int res = compile_filter(policy_file, &actual, NO_LOGGING);
+	int res =
+	    compile_filter(policy_file, &actual, USE_RET_KILL, NO_LOGGING);
 	fclose(policy_file);
 
 	/*
@@ -672,6 +673,45 @@
 	free(actual.filter);
 }
 
+TEST_F(filter, seccomp_mode1_trap) {
+	struct sock_fprog actual;
+	const char *policy =
+		"read: 1\n"
+		"write: 1\n"
+		"rt_sigreturn: 1\n"
+		"exit: 1\n";
+
+	FILE *policy_file = write_policy_to_pipe(policy, strlen(policy));
+	ASSERT_NE(policy_file, NULL);
+
+	int res =
+	    compile_filter(policy_file, &actual, USE_RET_TRAP, NO_LOGGING);
+	fclose(policy_file);
+
+	/*
+	 * Checks return value, filter length, and that the filter
+	 * validates arch, loads syscall number, and
+	 * only allows expected syscalls.
+	 */
+	ASSERT_EQ(res, 0);
+	EXPECT_EQ(actual.len, 13);
+	EXPECT_ARCH_VALIDATION(actual.filter);
+	EXPECT_EQ_STMT(actual.filter + ARCH_VALIDATION_LEN,
+			BPF_LD+BPF_W+BPF_ABS, syscall_nr);
+	EXPECT_ALLOW_SYSCALL(actual.filter + ARCH_VALIDATION_LEN + 1,
+			__NR_read);
+	EXPECT_ALLOW_SYSCALL(actual.filter + ARCH_VALIDATION_LEN + 3,
+			__NR_write);
+	EXPECT_ALLOW_SYSCALL(actual.filter + ARCH_VALIDATION_LEN + 5,
+			__NR_rt_sigreturn);
+	EXPECT_ALLOW_SYSCALL(actual.filter + ARCH_VALIDATION_LEN + 7,
+			__NR_exit);
+	EXPECT_EQ_STMT(actual.filter + ARCH_VALIDATION_LEN + 9, BPF_RET+BPF_K,
+			SECCOMP_RET_TRAP);
+
+	free(actual.filter);
+}
+
 TEST_F(filter, seccomp_read_write) {
 	struct sock_fprog actual;
 	const char *policy =
@@ -683,7 +723,8 @@
 	FILE *policy_file = write_policy_to_pipe(policy, strlen(policy));
 	ASSERT_NE(policy_file, NULL);
 
-	int res = compile_filter(policy_file, &actual, NO_LOGGING);
+	int res =
+	    compile_filter(policy_file, &actual, USE_RET_KILL, NO_LOGGING);
 	fclose(policy_file);
 
 	/*
@@ -720,7 +761,7 @@
 	FILE *policy_file = write_policy_to_pipe(policy, strlen(policy));
 	ASSERT_NE(policy_file, NULL);
 
-	int res = compile_filter(policy_file, &actual, NO_LOGGING);
+	int res = compile_filter(policy_file, &actual, 0, NO_LOGGING);
 	fclose(policy_file);
 	ASSERT_NE(res, 0);
 }
@@ -732,14 +773,14 @@
 	FILE *policy_file = write_policy_to_pipe(policy, strlen(policy));
 	ASSERT_NE(policy_file, NULL);
 
-	int res = compile_filter(policy_file, &actual, NO_LOGGING);
+	int res = compile_filter(policy_file, &actual, 0, NO_LOGGING);
 	fclose(policy_file);
 	ASSERT_NE(res, 0);
 }
 
 TEST_F(filter, nonexistent) {
 	struct sock_fprog actual;
-	int res = compile_filter(NULL, &actual, NO_LOGGING);
+	int res = compile_filter(NULL, &actual, 0, NO_LOGGING);
 	ASSERT_NE(res, 0);
 }
 
@@ -754,7 +795,8 @@
 	FILE *policy_file = write_policy_to_pipe(policy, strlen(policy));
 	ASSERT_NE(policy_file, NULL);
 
-	int res = compile_filter(policy_file, &actual, USE_LOGGING);
+	int res =
+	    compile_filter(policy_file, &actual, USE_RET_TRAP, USE_LOGGING);
 	fclose(policy_file);
 
 	size_t i;
@@ -789,4 +831,51 @@
 	free(actual.filter);
 }
 
+TEST_F(filter, allow_log_but_kill) {
+	struct sock_fprog actual;
+	const char *policy =
+		"read: 1\n"
+		"write: 1\n"
+		"rt_sigreturn: 1\n"
+		"exit: 1\n";
+
+	FILE *policy_file = write_policy_to_pipe(policy, strlen(policy));
+	ASSERT_NE(policy_file, NULL);
+
+	int res =
+	    compile_filter(policy_file, &actual, USE_RET_KILL, USE_LOGGING);
+	fclose(policy_file);
+
+	size_t i;
+	size_t index = 0;
+	/*
+	 * Checks return value, filter length, and that the filter
+	 * validates arch, loads syscall number, only allows expected syscalls,
+	 * and returns TRAP on failure.
+	 * NOTE(jorgelo): the filter is longer since we add the syscalls needed
+	 * for logging.
+	 */
+	ASSERT_EQ(res, 0);
+	EXPECT_EQ(actual.len, 13 + 2 * log_syscalls_len);
+	EXPECT_ARCH_VALIDATION(actual.filter);
+	EXPECT_EQ_STMT(actual.filter + ARCH_VALIDATION_LEN,
+			BPF_LD+BPF_W+BPF_ABS, syscall_nr);
+
+	index = ARCH_VALIDATION_LEN + 1;
+	for (i = 0; i < log_syscalls_len; i++)
+		EXPECT_ALLOW_SYSCALL(actual.filter + (index + 2 * i),
+				     lookup_syscall(log_syscalls[i]));
+
+	index += 2 * log_syscalls_len;
+
+	EXPECT_ALLOW_SYSCALL(actual.filter + index, __NR_read);
+	EXPECT_ALLOW_SYSCALL(actual.filter + index + 2, __NR_write);
+	EXPECT_ALLOW_SYSCALL(actual.filter + index + 4, __NR_rt_sigreturn);
+	EXPECT_ALLOW_SYSCALL(actual.filter + index + 6, __NR_exit);
+	EXPECT_EQ_STMT(actual.filter + index + 8, BPF_RET+BPF_K,
+			SECCOMP_RET_KILL);
+
+	free(actual.filter);
+}
+
 TEST_HARNESS_MAIN
diff --git a/syscall_filter_unittest.cpp b/syscall_filter_unittest.cpp
index 09e98f6..10fa8fd 100644
--- a/syscall_filter_unittest.cpp
+++ b/syscall_filter_unittest.cpp
@@ -621,7 +621,7 @@
   FILE *policy_file = write_policy_to_pipe(policy, strlen(policy));
   ASSERT_TRUE(policy_file != NULL);
 
-  int res = compile_filter(policy_file, &actual, NO_LOGGING);
+  int res = compile_filter(policy_file, &actual, USE_RET_KILL, NO_LOGGING);
   fclose(policy_file);
 
   /*
@@ -647,6 +647,44 @@
   free(actual.filter);
 }
 
+TEST(FilterTest, seccomp_mode1_trap) {
+  struct sock_fprog actual;
+  const char *policy =
+    "read: 1\n"
+    "write: 1\n"
+    "rt_sigreturn: 1\n"
+    "exit: 1\n";
+
+  FILE *policy_file = write_policy_to_pipe(policy, strlen(policy));
+  ASSERT_TRUE(policy_file != NULL);
+
+  int res = compile_filter(policy_file, &actual, USE_RET_TRAP, NO_LOGGING);
+  fclose(policy_file);
+
+  /*
+   * Checks return value, filter length, and that the filter
+   * validates arch, loads syscall number, and
+   * only allows expected syscalls.
+   */
+  ASSERT_EQ(res, 0);
+  EXPECT_EQ(actual.len, 13);
+  EXPECT_ARCH_VALIDATION(actual.filter);
+  EXPECT_EQ_STMT(actual.filter + ARCH_VALIDATION_LEN,
+      BPF_LD+BPF_W+BPF_ABS, syscall_nr);
+  EXPECT_ALLOW_SYSCALL(actual.filter + ARCH_VALIDATION_LEN + 1,
+      __NR_read);
+  EXPECT_ALLOW_SYSCALL(actual.filter + ARCH_VALIDATION_LEN + 3,
+      __NR_write);
+  EXPECT_ALLOW_SYSCALL(actual.filter + ARCH_VALIDATION_LEN + 5,
+      __NR_rt_sigreturn);
+  EXPECT_ALLOW_SYSCALL(actual.filter + ARCH_VALIDATION_LEN + 7,
+      __NR_exit);
+  EXPECT_EQ_STMT(actual.filter + ARCH_VALIDATION_LEN + 9, BPF_RET+BPF_K,
+      SECCOMP_RET_TRAP);
+
+  free(actual.filter);
+}
+
 TEST(FilterTest, seccomp_read_write) {
   struct sock_fprog actual;
   const char *policy =
@@ -658,7 +696,7 @@
   FILE *policy_file = write_policy_to_pipe(policy, strlen(policy));
   ASSERT_TRUE(policy_file != NULL);
 
-  int res = compile_filter(policy_file, &actual, NO_LOGGING);
+  int res = compile_filter(policy_file, &actual, USE_RET_KILL, NO_LOGGING);
   fclose(policy_file);
 
   /*
@@ -699,7 +737,7 @@
   FILE *policy_file = write_policy_to_pipe(policy, strlen(policy));
   ASSERT_TRUE(policy_file != NULL);
 
-  int res = compile_filter(policy_file, &actual, NO_LOGGING);
+  int res = compile_filter(policy_file, &actual, 0, NO_LOGGING);
   fclose(policy_file);
   ASSERT_NE(res, 0);
 }
@@ -711,14 +749,14 @@
   FILE *policy_file = write_policy_to_pipe(policy, strlen(policy));
   ASSERT_TRUE(policy_file != NULL);
 
-  int res = compile_filter(policy_file, &actual, NO_LOGGING);
+  int res = compile_filter(policy_file, &actual, 0, NO_LOGGING);
   fclose(policy_file);
   ASSERT_NE(res, 0);
 }
 
 TEST(FilterTest, nonexistent) {
   struct sock_fprog actual;
-  int res = compile_filter(NULL, &actual, NO_LOGGING);
+  int res = compile_filter(NULL, &actual, 0, NO_LOGGING);
   ASSERT_NE(res, 0);
 }
 
@@ -733,7 +771,7 @@
   FILE *policy_file = write_policy_to_pipe(policy, strlen(policy));
   ASSERT_TRUE(policy_file != NULL);
 
-  int res = compile_filter(policy_file, &actual, USE_LOGGING);
+  int res = compile_filter(policy_file, &actual, USE_RET_TRAP, USE_LOGGING);
   fclose(policy_file);
 
   size_t i;
@@ -767,3 +805,49 @@
 
   free(actual.filter);
 }
+
+TEST(FilterTest, allow_log_but_kill) {
+  struct sock_fprog actual;
+  const char *policy =
+    "read: 1\n"
+    "write: 1\n"
+    "rt_sigreturn: 1\n"
+    "exit: 1\n";
+
+  FILE *policy_file = write_policy_to_pipe(policy, strlen(policy));
+  ASSERT_TRUE(policy_file != NULL);
+
+  int res = compile_filter(policy_file, &actual, USE_RET_KILL, USE_LOGGING);
+  fclose(policy_file);
+
+  size_t i;
+  size_t index = 0;
+  /*
+   * Checks return value, filter length, and that the filter
+   * validates arch, loads syscall number, only allows expected syscalls,
+   * and returns TRAP on failure.
+   * NOTE(jorgelo): the filter is longer since we add the syscalls needed
+   * for logging.
+   */
+  ASSERT_EQ(res, 0);
+  EXPECT_EQ(actual.len, 13 + 2 * log_syscalls_len);
+  EXPECT_ARCH_VALIDATION(actual.filter);
+  EXPECT_EQ_STMT(actual.filter + ARCH_VALIDATION_LEN,
+      BPF_LD+BPF_W+BPF_ABS, syscall_nr);
+
+  index = ARCH_VALIDATION_LEN + 1;
+  for (i = 0; i < log_syscalls_len; i++)
+    EXPECT_ALLOW_SYSCALL(actual.filter + (index + 2 * i),
+             lookup_syscall(log_syscalls[i]));
+
+  index += 2 * log_syscalls_len;
+
+  EXPECT_ALLOW_SYSCALL(actual.filter + index, __NR_read);
+  EXPECT_ALLOW_SYSCALL(actual.filter + index + 2, __NR_write);
+  EXPECT_ALLOW_SYSCALL(actual.filter + index + 4, __NR_rt_sigreturn);
+  EXPECT_ALLOW_SYSCALL(actual.filter + index + 6, __NR_exit);
+  EXPECT_EQ_STMT(actual.filter + index + 8, BPF_RET+BPF_K,
+      SECCOMP_RET_KILL);
+
+  free(actual.filter);
+}
diff --git a/syscall_filter_unittest_macros.h b/syscall_filter_unittest_macros.h
index ec86374..81ec67c 100644
--- a/syscall_filter_unittest_macros.h
+++ b/syscall_filter_unittest_macros.h
@@ -13,6 +13,12 @@
  * limitations under the License.
  */
 
+#define USE_RET_KILL 0
+#define USE_RET_TRAP 1
+
+#define NO_LOGGING  0
+#define USE_LOGGING 1
+
 /* BPF testing macros. */
 #define EXPECT_EQ_BLOCK(_block, _code, _k, _jt, _jf)	\
 do {	\