minijail: Support setting syscall table with PR_ALT_SYSCALL

Add support for setting the syscall table for a jailed process using
prctl(PR_ALT_SYSCALL).  This adds the option '-a <table>' which
changes the jailed process's syscall table to the alt_syscall
table named <table>.  alt_syscall tables must be registerd in the
kernel (see crosreview.com/312137 for an example of how this is done).

Bug: 25649436
TEST=Create a test blacklist that blocks write(2) and observe that
'minijail0 -a test -- /bin/echo hello' prints nothing to stdout.

Change-Id: Idddafa1d0b81483a594e05d9d3390d4f9ad849c6
Signed-off-by: Andrew Bresticker <abrestic@chromium.org>
diff --git a/libminijail.c b/libminijail.c
index e9dfc89..544651a 100644
--- a/libminijail.c
+++ b/libminijail.c
@@ -53,6 +53,10 @@
 # define PR_SET_SECCOMP 22
 #endif
 
+#ifndef PR_ALT_SYSCALL
+# define PR_ALT_SYSCALL 0x43724f53
+#endif
+
 /* For seccomp_filter using BPF. */
 #ifndef PR_SET_NO_NEW_PRIVS
 # define PR_SET_NO_NEW_PRIVS 38
@@ -102,6 +106,7 @@
 		int mount_tmp:1;
 		int do_init:1;
 		int pid_file:1;
+		int alt_syscall:1;
 	} flags;
 	uid_t uid;
 	gid_t gid;
@@ -116,6 +121,7 @@
 	char *pid_file_path;
 	char *uidmap;
 	char *gidmap;
+	char *alt_syscall_table;
 	struct sock_fprog *filter_prog;
 	struct mountpoint *mounts_head;
 	struct mountpoint *mounts_tail;
@@ -539,6 +545,15 @@
 	fclose(file);
 }
 
+int API minijail_use_alt_syscall(struct minijail *j, const char *table)
+{
+	j->alt_syscall_table = strdup(table);
+	if (!j->alt_syscall_table)
+		return -ENOMEM;
+	j->flags.alt_syscall = 1;
+	return 0;
+}
+
 struct marshal_state {
 	size_t available;
 	size_t total;
@@ -577,6 +592,10 @@
 		marshal_append(state, j->user, strlen(j->user) + 1);
 	if (j->chrootdir)
 		marshal_append(state, j->chrootdir, strlen(j->chrootdir) + 1);
+	if (j->alt_syscall_table) {
+		marshal_append(state, j->alt_syscall_table,
+			       strlen(j->alt_syscall_table) + 1);
+	}
 	if (j->flags.seccomp_filter && j->filter_prog) {
 		struct sock_fprog *fp = j->filter_prog;
 		marshal_append(state, (char *)fp->filter,
@@ -673,6 +692,15 @@
 			goto bad_chrootdir;
 	}
 
+	if (j->alt_syscall_table) {	/* stale pointer */
+		char *alt_syscall_table = consumestr(&serialized, &length);
+		if (!alt_syscall_table)
+			goto bad_syscall_table;
+		j->alt_syscall_table = strdup(alt_syscall_table);
+		if (!j->alt_syscall_table)
+			goto bad_syscall_table;
+	}
+
 	if (j->flags.seccomp_filter && j->filter_len > 0) {
 		size_t ninstrs = j->filter_len;
 		if (ninstrs > (SIZE_MAX / sizeof(struct sock_filter)) ||
@@ -720,6 +748,9 @@
 		free(j->filter_prog);
 	}
 bad_filters:
+	if (j->alt_syscall_table)
+		free(j->alt_syscall_table);
+bad_syscall_table:
 	if (j->chrootdir)
 		free(j->chrootdir);
 bad_chrootdir:
@@ -728,6 +759,7 @@
 clear_pointers:
 	j->user = NULL;
 	j->chrootdir = NULL;
+	j->alt_syscall_table = NULL;
 out:
 	return ret;
 }
@@ -1153,6 +1185,15 @@
 	}
 
 	/*
+	 * Select the specified alternate syscall table.  The table must not
+	 * block prctl(2) if we're using seccomp as well.
+	 */
+	if (j->flags.alt_syscall) {
+		if (prctl(PR_ALT_SYSCALL, 1, j->alt_syscall_table))
+			pdie("prctl(PR_ALT_SYSCALL)");
+	}
+
+	/*
 	 * seccomp has to come last since it cuts off all the other
 	 * privilege-dropping syscalls :)
 	 */
@@ -1701,5 +1742,7 @@
 		free(j->user);
 	if (j->chrootdir)
 		free(j->chrootdir);
+	if (j->alt_syscall_table)
+		free(j->alt_syscall_table);
 	free(j);
 }