blob: 118daf4383127293c0b4ba976f285d56e51df842 [file] [log] [blame]
Mike Frysinger50e31fa2018-01-19 18:59:49 -05001/* Copyright 2017 The Chromium OS Authors. All rights reserved.
2 * Use of this source code is governed by a BSD-style license that can be
3 * found in the LICENSE file.
Jorge Lucangeli Obes0b208772017-04-19 14:15:46 -04004 */
5
6#include "system.h"
7
8#include <errno.h>
9#include <fcntl.h>
Luis Hector Chavez71323552017-09-05 09:17:22 -070010#include <grp.h>
Jorge Lucangeli Obes0b208772017-04-19 14:15:46 -040011#include <net/if.h>
Luis Hector Chavez71323552017-09-05 09:17:22 -070012#include <pwd.h>
Jorge Lucangeli Obes0b208772017-04-19 14:15:46 -040013#include <stdbool.h>
14#include <stdio.h>
15#include <string.h>
16#include <sys/ioctl.h>
17#include <sys/prctl.h>
18#include <sys/socket.h>
19#include <sys/stat.h>
Luis Hector Chavez0bacbf82018-07-10 20:06:55 -070020#include <sys/statvfs.h>
Jorge Lucangeli Obes0b208772017-04-19 14:15:46 -040021#include <unistd.h>
22
Mattias Nisslere5200192018-10-18 12:29:40 +020023#include <linux/securebits.h>
24
Jorge Lucangeli Obes0b208772017-04-19 14:15:46 -040025#include "util.h"
26
Mattias Nisslere5200192018-10-18 12:29:40 +020027/*
28 * SECBIT_NO_CAP_AMBIENT_RAISE was added in kernel 4.3, so fill in the
29 * definition if the securebits header doesn't provide it.
30 */
31#ifndef SECBIT_NO_CAP_AMBIENT_RAISE
32#define SECBIT_NO_CAP_AMBIENT_RAISE (issecure_mask(6))
Jorge Lucangeli Obes0b208772017-04-19 14:15:46 -040033#endif
Jorge Lucangeli Obesa6eb21a2017-04-20 10:44:00 -040034
Mattias Nisslere5200192018-10-18 12:29:40 +020035#ifndef SECBIT_NO_CAP_AMBIENT_RAISE_LOCKED
36#define SECBIT_NO_CAP_AMBIENT_RAISE_LOCKED (issecure_mask(7))
37#endif
Jorge Lucangeli Obes0b208772017-04-19 14:15:46 -040038
39/*
40 * Assert the value of SECURE_ALL_BITS at compile-time.
Jorge Lucangeli Obesa6eb21a2017-04-20 10:44:00 -040041 * Android devices are currently compiled against 4.4 kernel headers. Kernel 4.3
Jorge Lucangeli Obes0b208772017-04-19 14:15:46 -040042 * added a new securebit.
43 * When a new securebit is added, the new SECURE_ALL_BITS mask will return EPERM
44 * when used on older kernels. The compile-time assert will catch this situation
45 * at compile time.
46 */
Jorge Lucangeli Obesa6eb21a2017-04-20 10:44:00 -040047#if defined(__ANDROID__)
Jorge Lucangeli Obes0b208772017-04-19 14:15:46 -040048_Static_assert(SECURE_ALL_BITS == 0x55, "SECURE_ALL_BITS == 0x55.");
49#endif
50
Jorge Lucangeli Obes54234212018-04-26 11:52:15 -040051int secure_noroot_set_and_locked(uint64_t mask)
52{
53 return (mask & (SECBIT_NOROOT | SECBIT_NOROOT_LOCKED)) ==
54 (SECBIT_NOROOT | SECBIT_NOROOT_LOCKED);
55}
56
Mattias Nissler48b5ff12018-10-11 15:31:41 +020057int lock_securebits(uint64_t skip_mask, bool require_keep_caps)
Jorge Lucangeli Obes0b208772017-04-19 14:15:46 -040058{
Mattias Nissler48b5ff12018-10-11 15:31:41 +020059 /* The general idea is to set all bits, subject to exceptions below. */
60 unsigned long securebits = SECURE_ALL_BITS | SECURE_ALL_LOCKS;
61
62 /*
63 * SECBIT_KEEP_CAPS is special in that it is automatically cleared on
64 * execve(2). This implies that attempts to set SECBIT_KEEP_CAPS (as is
65 * the default) in processes that have it locked already (such as nested
66 * minijail usage) would fail. Thus, unless the caller requires it,
67 * allow it to remain off if it is already locked.
68 */
69 if (!require_keep_caps) {
70 int current_securebits = prctl(PR_GET_SECUREBITS);
71 if (current_securebits < 0) {
72 pwarn("prctl(PR_GET_SECUREBITS) failed");
73 return -1;
74 }
75
76 if ((current_securebits & SECBIT_KEEP_CAPS_LOCKED) != 0 &&
77 (current_securebits & SECBIT_KEEP_CAPS) == 0) {
78 securebits &= ~SECBIT_KEEP_CAPS;
79 }
80 }
81
Jorge Lucangeli Obes0b208772017-04-19 14:15:46 -040082 /*
Jorge Lucangeli Obesa6eb21a2017-04-20 10:44:00 -040083 * Ambient capabilities can only be raised if they're already present
84 * in the permitted *and* inheritable set. Therefore, we don't really
85 * need to lock the NO_CAP_AMBIENT_RAISE securebit, since we are already
86 * configuring the permitted and inheritable set.
Jorge Lucangeli Obes0b208772017-04-19 14:15:46 -040087 */
Mattias Nissler48b5ff12018-10-11 15:31:41 +020088 securebits &=
89 ~(SECBIT_NO_CAP_AMBIENT_RAISE | SECBIT_NO_CAP_AMBIENT_RAISE_LOCKED);
90
91 /* Don't set any bits that the user requested not to be touched. */
92 securebits &= ~skip_mask;
93
Luis Hector Chavezec0a2c12017-06-29 20:29:57 -070094 if (!securebits) {
Jorge Lucangeli Obes54234212018-04-26 11:52:15 -040095 warn("not locking any securebits");
Luis Hector Chavezec0a2c12017-06-29 20:29:57 -070096 return 0;
97 }
98 int securebits_ret = prctl(PR_SET_SECUREBITS, securebits);
Jorge Lucangeli Obes0b208772017-04-19 14:15:46 -040099 if (securebits_ret < 0) {
100 pwarn("prctl(PR_SET_SECUREBITS) failed");
101 return -1;
102 }
103
104 return 0;
105}
106
107int write_proc_file(pid_t pid, const char *content, const char *basename)
108{
109 int fd, ret;
110 size_t sz, len;
111 ssize_t written;
112 char filename[32];
113
114 sz = sizeof(filename);
115 ret = snprintf(filename, sz, "/proc/%d/%s", pid, basename);
116 if (ret < 0 || (size_t)ret >= sz) {
117 warn("failed to generate %s filename", basename);
118 return -1;
119 }
120
121 fd = open(filename, O_WRONLY | O_CLOEXEC);
122 if (fd < 0) {
123 pwarn("failed to open '%s'", filename);
124 return -errno;
125 }
126
127 len = strlen(content);
128 written = write(fd, content, len);
129 if (written < 0) {
130 pwarn("failed to write '%s'", filename);
Jorge Lucangeli Obes673c89d2018-10-04 16:08:10 -0400131 return -errno;
Jorge Lucangeli Obes0b208772017-04-19 14:15:46 -0400132 }
133
134 if ((size_t)written < len) {
135 warn("failed to write %zu bytes to '%s'", len, filename);
136 return -1;
137 }
138 close(fd);
139 return 0;
140}
141
142/*
143 * We specifically do not use cap_valid() as that only tells us the last
144 * valid cap we were *compiled* against (i.e. what the version of kernel
145 * headers says). If we run on a different kernel version, then it's not
146 * uncommon for that to be less (if an older kernel) or more (if a newer
147 * kernel).
148 * Normally, we suck up the answer via /proc. On Android, not all processes are
149 * guaranteed to be able to access '/proc/sys/kernel/cap_last_cap' so we
150 * programmatically find the value by calling prctl(PR_CAPBSET_READ).
151 */
152unsigned int get_last_valid_cap(void)
153{
154 unsigned int last_valid_cap = 0;
155 if (is_android()) {
156 for (; prctl(PR_CAPBSET_READ, last_valid_cap, 0, 0, 0) >= 0;
157 ++last_valid_cap)
158 ;
159
160 /* |last_valid_cap| will be the first failing value. */
161 if (last_valid_cap > 0) {
162 last_valid_cap--;
163 }
164 } else {
165 const char cap_file[] = "/proc/sys/kernel/cap_last_cap";
166 FILE *fp = fopen(cap_file, "re");
167 if (fscanf(fp, "%u", &last_valid_cap) != 1)
168 pdie("fscanf(%s)", cap_file);
169 fclose(fp);
170 }
171 return last_valid_cap;
172}
173
Jorge Lucangeli Obesa6eb21a2017-04-20 10:44:00 -0400174int cap_ambient_supported(void)
175{
176 return prctl(PR_CAP_AMBIENT, PR_CAP_AMBIENT_IS_SET, CAP_CHOWN, 0, 0) >=
177 0;
178}
179
Jorge Lucangeli Obes0b208772017-04-19 14:15:46 -0400180int config_net_loopback(void)
181{
182 const char ifname[] = "lo";
183 int sock;
184 struct ifreq ifr;
185
186 /* Make sure people don't try to add really long names. */
187 _Static_assert(sizeof(ifname) <= IFNAMSIZ, "interface name too long");
188
189 sock = socket(AF_LOCAL, SOCK_DGRAM | SOCK_CLOEXEC, 0);
190 if (sock < 0) {
191 pwarn("socket(AF_LOCAL) failed");
192 return -1;
193 }
194
195 /*
196 * Do the equiv of `ip link set up lo`. The kernel will assign
197 * IPv4 (127.0.0.1) & IPv6 (::1) addresses automatically!
198 */
199 strcpy(ifr.ifr_name, ifname);
200 if (ioctl(sock, SIOCGIFFLAGS, &ifr) < 0) {
201 pwarn("ioctl(SIOCGIFFLAGS) failed");
202 return -1;
203 }
204
205 /* The kernel preserves ifr.ifr_name for use. */
206 ifr.ifr_flags |= IFF_UP | IFF_RUNNING;
207 if (ioctl(sock, SIOCSIFFLAGS, &ifr) < 0) {
208 pwarn("ioctl(SIOCSIFFLAGS) failed");
209 return -1;
210 }
211
212 close(sock);
213 return 0;
214}
215
216int setup_pipe_end(int fds[2], size_t index)
217{
218 if (index > 1)
219 return -1;
220
221 close(fds[1 - index]);
222 return fds[index];
223}
224
François Degrosa8be2c42019-10-01 12:06:42 +1000225int dupe_and_close_fd(int fds[2], size_t index, int fd)
Jorge Lucangeli Obes0b208772017-04-19 14:15:46 -0400226{
227 if (index > 1)
228 return -1;
229
Jorge Lucangeli Obes0b208772017-04-19 14:15:46 -0400230 /* dup2(2) the corresponding end of the pipe into |fd|. */
François Degrosa8be2c42019-10-01 12:06:42 +1000231 fd = dup2(fds[index], fd);
232
233 close(fds[0]);
234 close(fds[1]);
235 return fd;
Jorge Lucangeli Obes0b208772017-04-19 14:15:46 -0400236}
237
238int write_pid_to_path(pid_t pid, const char *path)
239{
Jorge Lucangeli Obes1f5d0952019-06-04 09:18:26 -0400240 FILE *fp = fopen(path, "we");
Jorge Lucangeli Obes0b208772017-04-19 14:15:46 -0400241
242 if (!fp) {
Jorge Lucangeli Obes1f5d0952019-06-04 09:18:26 -0400243 pwarn("failed to open '%s'", path);
244 return -errno;
Jorge Lucangeli Obes0b208772017-04-19 14:15:46 -0400245 }
246 if (fprintf(fp, "%d\n", (int)pid) < 0) {
247 /* fprintf(3) does not set errno on failure. */
248 warn("fprintf(%s) failed", path);
Jorge Lucangeli Obes1f5d0952019-06-04 09:18:26 -0400249 return -1;
Jorge Lucangeli Obes0b208772017-04-19 14:15:46 -0400250 }
251 if (fclose(fp)) {
252 pwarn("fclose(%s) failed", path);
Jorge Lucangeli Obes1f5d0952019-06-04 09:18:26 -0400253 return -errno;
Jorge Lucangeli Obes0b208772017-04-19 14:15:46 -0400254 }
255
Jorge Lucangeli Obes1f5d0952019-06-04 09:18:26 -0400256 return 0;
Jorge Lucangeli Obes0b208772017-04-19 14:15:46 -0400257}
258
259/*
Mike Frysinger5fdba4e2018-01-17 15:39:48 -0500260 * Create the |path| directory and its parents (if need be) with |mode|.
261 * If not |isdir|, then |path| is actually a file, so the last component
262 * will not be created.
263 */
264int mkdir_p(const char *path, mode_t mode, bool isdir)
265{
yusukes059e0bd2018-03-05 10:22:16 -0800266 int rc;
Mike Frysinger5fdba4e2018-01-17 15:39:48 -0500267 char *dir = strdup(path);
yusukes059e0bd2018-03-05 10:22:16 -0800268 if (!dir) {
269 rc = errno;
270 pwarn("strdup(%s) failed", path);
271 return -rc;
272 }
Mike Frysinger5fdba4e2018-01-17 15:39:48 -0500273
274 /* Starting from the root, work our way out to the end. */
275 char *p = strchr(dir + 1, '/');
276 while (p) {
277 *p = '\0';
278 if (mkdir(dir, mode) && errno != EEXIST) {
yusukes059e0bd2018-03-05 10:22:16 -0800279 rc = errno;
280 pwarn("mkdir(%s, 0%o) failed", dir, mode);
Mike Frysinger5fdba4e2018-01-17 15:39:48 -0500281 free(dir);
yusukes059e0bd2018-03-05 10:22:16 -0800282 return -rc;
Mike Frysinger5fdba4e2018-01-17 15:39:48 -0500283 }
284 *p = '/';
285 p = strchr(p + 1, '/');
286 }
287
288 /*
289 * Create the last directory. We still check EEXIST here in case
290 * of trailing slashes.
291 */
292 free(dir);
yusukes059e0bd2018-03-05 10:22:16 -0800293 if (isdir && mkdir(path, mode) && errno != EEXIST) {
294 rc = errno;
295 pwarn("mkdir(%s, 0%o) failed", path, mode);
296 return -rc;
297 }
Mike Frysinger5fdba4e2018-01-17 15:39:48 -0500298 return 0;
299}
300
301/*
Jorge Lucangeli Obes0b208772017-04-19 14:15:46 -0400302 * setup_mount_destination: Ensures the mount target exists.
303 * Creates it if needed and possible.
304 */
305int setup_mount_destination(const char *source, const char *dest, uid_t uid,
Luis Hector Chavez0bacbf82018-07-10 20:06:55 -0700306 uid_t gid, bool bind, unsigned long *mnt_flags)
Jorge Lucangeli Obes0b208772017-04-19 14:15:46 -0400307{
308 int rc;
309 struct stat st_buf;
Jorge Lucangeli Obes7654c6e2019-09-09 10:45:38 -0400310 bool domkdir;
Jorge Lucangeli Obes0b208772017-04-19 14:15:46 -0400311
312 rc = stat(dest, &st_buf);
Jorge Lucangeli Obes7654c6e2019-09-09 10:45:38 -0400313 if (rc == 0) /* destination exists */
Jorge Lucangeli Obesb4b7c5a2019-09-09 10:47:36 -0400314 return 0;
Jorge Lucangeli Obes0b208772017-04-19 14:15:46 -0400315
Jorge Lucangeli Obes7654c6e2019-09-09 10:45:38 -0400316 /*
317 * Try to create the destination.
318 * Either make a directory or touch a file depending on the source type.
319 *
320 * If the source isn't an absolute path, assume it is a filesystem type
321 * such as "tmpfs" and create a directory to mount it on. The dest will
322 * be something like "none" or "proc" which we shouldn't be checking.
323 */
324 if (source[0] == '/') {
325 /* The source is an absolute path -- it better exist! */
326 rc = stat(source, &st_buf);
327 if (rc) {
Jorge Lucangeli Obes9299cae2019-08-23 11:28:39 -0400328 rc = errno;
Jorge Lucangeli Obes7654c6e2019-09-09 10:45:38 -0400329 pwarn("stat(%s) failed", source);
Jorge Lucangeli Obes9299cae2019-08-23 11:28:39 -0400330 return -rc;
331 }
Jorge Lucangeli Obes7654c6e2019-09-09 10:45:38 -0400332
333 /*
334 * If bind mounting, we only create a directory if the source
335 * is a directory, else we always bind mount it as a file to
336 * support device nodes, sockets, etc...
337 *
338 * For all other mounts, we assume a block/char source is
339 * going to want a directory to mount to. If the source is
340 * something else (e.g. a fifo or socket), this probably will
341 * not do the right thing, but we'll fail later on when we try
342 * to mount(), so shouldn't be a big deal.
343 */
344 domkdir = S_ISDIR(st_buf.st_mode) ||
345 (!bind && (S_ISBLK(st_buf.st_mode) ||
346 S_ISCHR(st_buf.st_mode)));
347
Jorge Lucangeli Obesb4b7c5a2019-09-09 10:47:36 -0400348 /* If bind mounting, also grab the mount flags of the source. */
349 if (bind && mnt_flags) {
350 struct statvfs stvfs_buf;
351 rc = statvfs(source, &stvfs_buf);
352 if (rc) {
353 rc = errno;
354 pwarn(
355 "failed to look up mount flags: source=%s",
356 source);
357 return -rc;
358 }
359 *mnt_flags = stvfs_buf.f_flag;
360 }
Jorge Lucangeli Obes7654c6e2019-09-09 10:45:38 -0400361 } else {
362 /* The source is a relative path -- assume it's a pseudo fs. */
363
364 /* Disallow relative bind mounts. */
365 if (bind) {
366 warn("relative bind-mounts are not allowed: source=%s",
367 source);
368 return -EINVAL;
369 }
370
371 domkdir = true;
Mike Frysingereaab4202017-08-14 14:57:21 -0400372 }
373
Mike Frysinger5fdba4e2018-01-17 15:39:48 -0500374 /*
Jorge Lucangeli Obes7654c6e2019-09-09 10:45:38 -0400375 * Now that we know what we want to do, do it!
376 * We always create the intermediate dirs and the final path with 0755
377 * perms and root/root ownership. This shouldn't be a problem because
378 * the actual mount will set those perms/ownership on the mount point
379 * which is all people should need to access it.
Mike Frysinger5fdba4e2018-01-17 15:39:48 -0500380 */
Jorge Lucangeli Obes7654c6e2019-09-09 10:45:38 -0400381 rc = mkdir_p(dest, 0755, domkdir);
382 if (rc)
383 return rc;
384 if (!domkdir) {
385 int fd = open(dest, O_RDWR | O_CREAT | O_CLOEXEC, 0700);
386 if (fd < 0) {
387 rc = errno;
388 pwarn("open(%s) failed", dest);
389 return -rc;
390 }
391 close(fd);
Jorge Lucangeli Obes0b208772017-04-19 14:15:46 -0400392 }
Jorge Lucangeli Obes7654c6e2019-09-09 10:45:38 -0400393 if (chown(dest, uid, gid)) {
394 rc = errno;
395 pwarn("chown(%s, %u, %u) failed", dest, uid, gid);
396 return -rc;
397 }
Jorge Lucangeli Obesb4b7c5a2019-09-09 10:47:36 -0400398 return 0;
Jorge Lucangeli Obes0b208772017-04-19 14:15:46 -0400399}
Luis Hector Chavez71323552017-09-05 09:17:22 -0700400
401/*
402 * lookup_user: Gets the uid/gid for the given username.
403 */
404int lookup_user(const char *user, uid_t *uid, gid_t *gid)
405{
406 char *buf = NULL;
407 struct passwd pw;
408 struct passwd *ppw = NULL;
409 ssize_t sz = sysconf(_SC_GETPW_R_SIZE_MAX);
410 if (sz == -1)
411 sz = 65536; /* your guess is as good as mine... */
412
413 /*
414 * sysconf(_SC_GETPW_R_SIZE_MAX), under glibc, is documented to return
415 * the maximum needed size of the buffer, so we don't have to search.
416 */
417 buf = malloc(sz);
418 if (!buf)
419 return -ENOMEM;
420 getpwnam_r(user, &pw, buf, sz, &ppw);
421 /*
422 * We're safe to free the buffer here. The strings inside |pw| point
423 * inside |buf|, but we don't use any of them; this leaves the pointers
424 * dangling but it's safe. |ppw| points at |pw| if getpwnam_r(3)
425 * succeeded.
426 */
427 free(buf);
428 /* getpwnam_r(3) does *not* set errno when |ppw| is NULL. */
429 if (!ppw)
430 return -1;
431
432 *uid = ppw->pw_uid;
433 *gid = ppw->pw_gid;
434 return 0;
435}
436
437/*
438 * lookup_group: Gets the gid for the given group name.
439 */
440int lookup_group(const char *group, gid_t *gid)
441{
442 char *buf = NULL;
443 struct group gr;
444 struct group *pgr = NULL;
445 ssize_t sz = sysconf(_SC_GETGR_R_SIZE_MAX);
446 if (sz == -1)
447 sz = 65536; /* and mine is as good as yours, really */
448
449 /*
450 * sysconf(_SC_GETGR_R_SIZE_MAX), under glibc, is documented to return
451 * the maximum needed size of the buffer, so we don't have to search.
452 */
453 buf = malloc(sz);
454 if (!buf)
455 return -ENOMEM;
456 getgrnam_r(group, &gr, buf, sz, &pgr);
457 /*
458 * We're safe to free the buffer here. The strings inside gr point
459 * inside buf, but we don't use any of them; this leaves the pointers
460 * dangling but it's safe. pgr points at gr if getgrnam_r succeeded.
461 */
462 free(buf);
463 /* getgrnam_r(3) does *not* set errno when |pgr| is NULL. */
464 if (!pgr)
465 return -1;
466
467 *gid = pgr->gr_gid;
468 return 0;
469}
Jorge Lucangeli Obes32201f82019-06-12 14:45:06 -0400470
471static int seccomp_action_is_available(const char *wanted)
472{
473 if (is_android()) {
474 /*
475 * Accessing |actions_avail| is generating SELinux denials, so
476 * skip for now.
477 * TODO(crbug.com/978022, jorgelo): Remove once the denial is
478 * fixed.
479 */
480 return 0;
481 }
482 const char actions_avail_path[] =
483 "/proc/sys/kernel/seccomp/actions_avail";
484 FILE *f = fopen(actions_avail_path, "re");
485
486 if (!f) {
487 pwarn("fopen(%s) failed", actions_avail_path);
488 return 0;
489 }
490
491 char *actions_avail = NULL;
492 size_t buf_size = 0;
493 if (getline(&actions_avail, &buf_size, f) < 0) {
494 pwarn("getline() failed");
495 free(actions_avail);
496 return 0;
497 }
498
499 /*
500 * This is just substring search, which means that partial matches will
501 * match too (e.g. "action" would match "longaction"). There are no
502 * seccomp actions which include other actions though, so we're good for
503 * now. Eventually we might want to split the string by spaces.
504 */
505 return strstr(actions_avail, wanted) != NULL;
506}
507
508int seccomp_ret_log_available(void)
509{
510 static int ret_log_available = -1;
511
512 if (ret_log_available == -1)
513 ret_log_available = seccomp_action_is_available("log");
514
515 return ret_log_available;
516}
517
518int seccomp_ret_kill_process_available(void)
519{
520 static int ret_kill_process_available = -1;
521
522 if (ret_kill_process_available == -1)
523 ret_kill_process_available =
524 seccomp_action_is_available("kill_process");
525
526 return ret_kill_process_available;
527}