Refactor Minijail in preparation for ambient capabilities work.
-Extract helper functions that don't take a 'struct minijail' into a
separate file. Document this in a new HACKING file.
-Add support for long cmdline options in minijail0.c.
Bug: 32066154
Test: Unit tests on Linux and Android.
Change-Id: I246ff7f9459792e64e5be5b9c9ea650e3f1d2c58
diff --git a/system.c b/system.c
new file mode 100644
index 0000000..05d0c47
--- /dev/null
+++ b/system.c
@@ -0,0 +1,254 @@
+/* Copyright (C) 2017 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "system.h"
+
+#include <errno.h>
+#include <fcntl.h>
+#include <net/if.h>
+#include <stdbool.h>
+#include <stdio.h>
+#include <string.h>
+#include <sys/ioctl.h>
+#include <sys/prctl.h>
+#include <sys/socket.h>
+#include <sys/stat.h>
+#include <unistd.h>
+
+#include "util.h"
+
+#ifdef HAVE_SECUREBITS_H
+#include <linux/securebits.h>
+#else
+#define SECURE_ALL_BITS 0x55
+#define SECURE_ALL_LOCKS (SECURE_ALL_BITS << 1)
+#endif
+/* For kernels < 4.3. */
+#define OLD_SECURE_ALL_BITS 0x15
+#define OLD_SECURE_ALL_LOCKS (OLD_SECURE_ALL_BITS << 1)
+
+/*
+ * Assert the value of SECURE_ALL_BITS at compile-time.
+ * Brillo devices are currently compiled against 4.4 kernel headers. Kernel 4.3
+ * added a new securebit.
+ * When a new securebit is added, the new SECURE_ALL_BITS mask will return EPERM
+ * when used on older kernels. The compile-time assert will catch this situation
+ * at compile time.
+ */
+#ifdef __BRILLO__
+_Static_assert(SECURE_ALL_BITS == 0x55, "SECURE_ALL_BITS == 0x55.");
+#endif
+
+int lock_securebits(void)
+{
+ /*
+ * Kernels 4.3+ define a new securebit (SECURE_NO_CAP_AMBIENT_RAISE),
+ * so using the SECURE_ALL_BITS and SECURE_ALL_LOCKS masks from
+ * newer kernel headers will return EPERM on older kernels. Detect this,
+ * and retry with the right mask for older (2.6.26-4.2) kernels.
+ */
+ int securebits_ret =
+ prctl(PR_SET_SECUREBITS, SECURE_ALL_BITS | SECURE_ALL_LOCKS);
+ if (securebits_ret < 0) {
+ if (errno == EPERM) {
+ /* Possibly running on kernel < 4.3. */
+ securebits_ret =
+ prctl(PR_SET_SECUREBITS,
+ OLD_SECURE_ALL_BITS | OLD_SECURE_ALL_LOCKS);
+ }
+ }
+ if (securebits_ret < 0) {
+ pwarn("prctl(PR_SET_SECUREBITS) failed");
+ return -1;
+ }
+
+ return 0;
+}
+
+int write_proc_file(pid_t pid, const char *content, const char *basename)
+{
+ int fd, ret;
+ size_t sz, len;
+ ssize_t written;
+ char filename[32];
+
+ sz = sizeof(filename);
+ ret = snprintf(filename, sz, "/proc/%d/%s", pid, basename);
+ if (ret < 0 || (size_t)ret >= sz) {
+ warn("failed to generate %s filename", basename);
+ return -1;
+ }
+
+ fd = open(filename, O_WRONLY | O_CLOEXEC);
+ if (fd < 0) {
+ pwarn("failed to open '%s'", filename);
+ return -errno;
+ }
+
+ len = strlen(content);
+ written = write(fd, content, len);
+ if (written < 0) {
+ pwarn("failed to write '%s'", filename);
+ return -1;
+ }
+
+ if ((size_t)written < len) {
+ warn("failed to write %zu bytes to '%s'", len, filename);
+ return -1;
+ }
+ close(fd);
+ return 0;
+}
+
+/*
+ * We specifically do not use cap_valid() as that only tells us the last
+ * valid cap we were *compiled* against (i.e. what the version of kernel
+ * headers says). If we run on a different kernel version, then it's not
+ * uncommon for that to be less (if an older kernel) or more (if a newer
+ * kernel).
+ * Normally, we suck up the answer via /proc. On Android, not all processes are
+ * guaranteed to be able to access '/proc/sys/kernel/cap_last_cap' so we
+ * programmatically find the value by calling prctl(PR_CAPBSET_READ).
+ */
+unsigned int get_last_valid_cap(void)
+{
+ unsigned int last_valid_cap = 0;
+ if (is_android()) {
+ for (; prctl(PR_CAPBSET_READ, last_valid_cap, 0, 0, 0) >= 0;
+ ++last_valid_cap)
+ ;
+
+ /* |last_valid_cap| will be the first failing value. */
+ if (last_valid_cap > 0) {
+ last_valid_cap--;
+ }
+ } else {
+ const char cap_file[] = "/proc/sys/kernel/cap_last_cap";
+ FILE *fp = fopen(cap_file, "re");
+ if (fscanf(fp, "%u", &last_valid_cap) != 1)
+ pdie("fscanf(%s)", cap_file);
+ fclose(fp);
+ }
+ return last_valid_cap;
+}
+
+int config_net_loopback(void)
+{
+ const char ifname[] = "lo";
+ int sock;
+ struct ifreq ifr;
+
+ /* Make sure people don't try to add really long names. */
+ _Static_assert(sizeof(ifname) <= IFNAMSIZ, "interface name too long");
+
+ sock = socket(AF_LOCAL, SOCK_DGRAM | SOCK_CLOEXEC, 0);
+ if (sock < 0) {
+ pwarn("socket(AF_LOCAL) failed");
+ return -1;
+ }
+
+ /*
+ * Do the equiv of `ip link set up lo`. The kernel will assign
+ * IPv4 (127.0.0.1) & IPv6 (::1) addresses automatically!
+ */
+ strcpy(ifr.ifr_name, ifname);
+ if (ioctl(sock, SIOCGIFFLAGS, &ifr) < 0) {
+ pwarn("ioctl(SIOCGIFFLAGS) failed");
+ return -1;
+ }
+
+ /* The kernel preserves ifr.ifr_name for use. */
+ ifr.ifr_flags |= IFF_UP | IFF_RUNNING;
+ if (ioctl(sock, SIOCSIFFLAGS, &ifr) < 0) {
+ pwarn("ioctl(SIOCSIFFLAGS) failed");
+ return -1;
+ }
+
+ close(sock);
+ return 0;
+}
+
+int setup_pipe_end(int fds[2], size_t index)
+{
+ if (index > 1)
+ return -1;
+
+ close(fds[1 - index]);
+ return fds[index];
+}
+
+int setup_and_dupe_pipe_end(int fds[2], size_t index, int fd)
+{
+ if (index > 1)
+ return -1;
+
+ close(fds[1 - index]);
+ /* dup2(2) the corresponding end of the pipe into |fd|. */
+ return dup2(fds[index], fd);
+}
+
+int write_pid_to_path(pid_t pid, const char *path)
+{
+ FILE *fp = fopen(path, "w");
+
+ if (!fp) {
+ pwarn("failed to open '%s'", path);
+ return -errno;
+ }
+ if (fprintf(fp, "%d\n", (int)pid) < 0) {
+ /* fprintf(3) does not set errno on failure. */
+ warn("fprintf(%s) failed", path);
+ return -1;
+ }
+ if (fclose(fp)) {
+ pwarn("fclose(%s) failed", path);
+ return -errno;
+ }
+
+ return 0;
+}
+
+/*
+ * setup_mount_destination: Ensures the mount target exists.
+ * Creates it if needed and possible.
+ */
+int setup_mount_destination(const char *source, const char *dest, uid_t uid,
+ uid_t gid)
+{
+ int rc;
+ struct stat st_buf;
+
+ rc = stat(dest, &st_buf);
+ if (rc == 0) /* destination exists */
+ return 0;
+
+ /*
+ * Try to create the destination.
+ * Either make a directory or touch a file depending on the source type.
+ * If the source doesn't exist, assume it is a filesystem type such as
+ * "tmpfs" and create a directory to mount it on.
+ */
+ rc = stat(source, &st_buf);
+ if (rc || S_ISDIR(st_buf.st_mode) || S_ISBLK(st_buf.st_mode)) {
+ if (mkdir(dest, 0700))
+ return -errno;
+ } else {
+ int fd = open(dest, O_RDWR | O_CREAT, 0700);
+ if (fd < 0)
+ return -errno;
+ close(fd);
+ }
+ return chown(dest, uid, gid);
+}