blob: 7527653c635e9e6c49f519849e5adab648d83ebc [file] [log] [blame]
Mike Frysinger50e31fa2018-01-19 18:59:49 -05001/* Copyright 2017 The Chromium OS Authors. All rights reserved.
2 * Use of this source code is governed by a BSD-style license that can be
3 * found in the LICENSE file.
Jorge Lucangeli Obes0b208772017-04-19 14:15:46 -04004 */
5
6#include "system.h"
7
8#include <errno.h>
9#include <fcntl.h>
Luis Hector Chavez71323552017-09-05 09:17:22 -070010#include <grp.h>
Jorge Lucangeli Obes0b208772017-04-19 14:15:46 -040011#include <net/if.h>
Luis Hector Chavez71323552017-09-05 09:17:22 -070012#include <pwd.h>
Jorge Lucangeli Obes0b208772017-04-19 14:15:46 -040013#include <stdbool.h>
14#include <stdio.h>
15#include <string.h>
16#include <sys/ioctl.h>
17#include <sys/prctl.h>
18#include <sys/socket.h>
19#include <sys/stat.h>
Luis Hector Chavez0bacbf82018-07-10 20:06:55 -070020#include <sys/statvfs.h>
Jorge Lucangeli Obes0b208772017-04-19 14:15:46 -040021#include <unistd.h>
22
Mattias Nisslere5200192018-10-18 12:29:40 +020023#include <linux/securebits.h>
24
Jorge Lucangeli Obes0b208772017-04-19 14:15:46 -040025#include "util.h"
26
Mattias Nisslere5200192018-10-18 12:29:40 +020027/*
28 * SECBIT_NO_CAP_AMBIENT_RAISE was added in kernel 4.3, so fill in the
29 * definition if the securebits header doesn't provide it.
30 */
31#ifndef SECBIT_NO_CAP_AMBIENT_RAISE
32#define SECBIT_NO_CAP_AMBIENT_RAISE (issecure_mask(6))
Jorge Lucangeli Obes0b208772017-04-19 14:15:46 -040033#endif
Jorge Lucangeli Obesa6eb21a2017-04-20 10:44:00 -040034
Mattias Nisslere5200192018-10-18 12:29:40 +020035#ifndef SECBIT_NO_CAP_AMBIENT_RAISE_LOCKED
36#define SECBIT_NO_CAP_AMBIENT_RAISE_LOCKED (issecure_mask(7))
37#endif
Jorge Lucangeli Obes0b208772017-04-19 14:15:46 -040038
39/*
40 * Assert the value of SECURE_ALL_BITS at compile-time.
Jorge Lucangeli Obesa6eb21a2017-04-20 10:44:00 -040041 * Android devices are currently compiled against 4.4 kernel headers. Kernel 4.3
Jorge Lucangeli Obes0b208772017-04-19 14:15:46 -040042 * added a new securebit.
43 * When a new securebit is added, the new SECURE_ALL_BITS mask will return EPERM
44 * when used on older kernels. The compile-time assert will catch this situation
45 * at compile time.
46 */
Jorge Lucangeli Obesa6eb21a2017-04-20 10:44:00 -040047#if defined(__ANDROID__)
Jorge Lucangeli Obes0b208772017-04-19 14:15:46 -040048_Static_assert(SECURE_ALL_BITS == 0x55, "SECURE_ALL_BITS == 0x55.");
49#endif
50
Luis Hector Chavez89cbc322018-08-06 11:31:15 -070051int secure_keep_caps_locked(void)
52{
53 int bits = prctl(PR_GET_SECUREBITS);
54 if (bits < 0)
55 return 0;
56 return bits & SECBIT_KEEP_CAPS_LOCKED;
57}
58
Jorge Lucangeli Obes54234212018-04-26 11:52:15 -040059int secure_noroot_set_and_locked(uint64_t mask)
60{
61 return (mask & (SECBIT_NOROOT | SECBIT_NOROOT_LOCKED)) ==
62 (SECBIT_NOROOT | SECBIT_NOROOT_LOCKED);
63}
64
Luis Hector Chavezec0a2c12017-06-29 20:29:57 -070065int lock_securebits(uint64_t skip_mask)
Jorge Lucangeli Obes0b208772017-04-19 14:15:46 -040066{
67 /*
Jorge Lucangeli Obesa6eb21a2017-04-20 10:44:00 -040068 * Ambient capabilities can only be raised if they're already present
69 * in the permitted *and* inheritable set. Therefore, we don't really
70 * need to lock the NO_CAP_AMBIENT_RAISE securebit, since we are already
71 * configuring the permitted and inheritable set.
Jorge Lucangeli Obes0b208772017-04-19 14:15:46 -040072 */
Dylan Reida7f4fc92017-07-13 18:45:23 -070073 unsigned long securebits =
Mattias Nisslere5200192018-10-18 12:29:40 +020074 (SECBIT_NO_CAP_AMBIENT_RAISE | SECBIT_NO_CAP_AMBIENT_RAISE_LOCKED) &
75 ~skip_mask;
Luis Hector Chavezec0a2c12017-06-29 20:29:57 -070076 if (!securebits) {
Jorge Lucangeli Obes54234212018-04-26 11:52:15 -040077 warn("not locking any securebits");
Luis Hector Chavezec0a2c12017-06-29 20:29:57 -070078 return 0;
79 }
80 int securebits_ret = prctl(PR_SET_SECUREBITS, securebits);
Jorge Lucangeli Obes0b208772017-04-19 14:15:46 -040081 if (securebits_ret < 0) {
82 pwarn("prctl(PR_SET_SECUREBITS) failed");
83 return -1;
84 }
85
86 return 0;
87}
88
89int write_proc_file(pid_t pid, const char *content, const char *basename)
90{
91 int fd, ret;
92 size_t sz, len;
93 ssize_t written;
94 char filename[32];
95
96 sz = sizeof(filename);
97 ret = snprintf(filename, sz, "/proc/%d/%s", pid, basename);
98 if (ret < 0 || (size_t)ret >= sz) {
99 warn("failed to generate %s filename", basename);
100 return -1;
101 }
102
103 fd = open(filename, O_WRONLY | O_CLOEXEC);
104 if (fd < 0) {
105 pwarn("failed to open '%s'", filename);
106 return -errno;
107 }
108
109 len = strlen(content);
110 written = write(fd, content, len);
111 if (written < 0) {
112 pwarn("failed to write '%s'", filename);
Jorge Lucangeli Obes673c89d2018-10-04 16:08:10 -0400113 return -errno;
Jorge Lucangeli Obes0b208772017-04-19 14:15:46 -0400114 }
115
116 if ((size_t)written < len) {
117 warn("failed to write %zu bytes to '%s'", len, filename);
118 return -1;
119 }
120 close(fd);
121 return 0;
122}
123
124/*
125 * We specifically do not use cap_valid() as that only tells us the last
126 * valid cap we were *compiled* against (i.e. what the version of kernel
127 * headers says). If we run on a different kernel version, then it's not
128 * uncommon for that to be less (if an older kernel) or more (if a newer
129 * kernel).
130 * Normally, we suck up the answer via /proc. On Android, not all processes are
131 * guaranteed to be able to access '/proc/sys/kernel/cap_last_cap' so we
132 * programmatically find the value by calling prctl(PR_CAPBSET_READ).
133 */
134unsigned int get_last_valid_cap(void)
135{
136 unsigned int last_valid_cap = 0;
137 if (is_android()) {
138 for (; prctl(PR_CAPBSET_READ, last_valid_cap, 0, 0, 0) >= 0;
139 ++last_valid_cap)
140 ;
141
142 /* |last_valid_cap| will be the first failing value. */
143 if (last_valid_cap > 0) {
144 last_valid_cap--;
145 }
146 } else {
147 const char cap_file[] = "/proc/sys/kernel/cap_last_cap";
148 FILE *fp = fopen(cap_file, "re");
149 if (fscanf(fp, "%u", &last_valid_cap) != 1)
150 pdie("fscanf(%s)", cap_file);
151 fclose(fp);
152 }
153 return last_valid_cap;
154}
155
Jorge Lucangeli Obesa6eb21a2017-04-20 10:44:00 -0400156int cap_ambient_supported(void)
157{
158 return prctl(PR_CAP_AMBIENT, PR_CAP_AMBIENT_IS_SET, CAP_CHOWN, 0, 0) >=
159 0;
160}
161
Jorge Lucangeli Obes0b208772017-04-19 14:15:46 -0400162int config_net_loopback(void)
163{
164 const char ifname[] = "lo";
165 int sock;
166 struct ifreq ifr;
167
168 /* Make sure people don't try to add really long names. */
169 _Static_assert(sizeof(ifname) <= IFNAMSIZ, "interface name too long");
170
171 sock = socket(AF_LOCAL, SOCK_DGRAM | SOCK_CLOEXEC, 0);
172 if (sock < 0) {
173 pwarn("socket(AF_LOCAL) failed");
174 return -1;
175 }
176
177 /*
178 * Do the equiv of `ip link set up lo`. The kernel will assign
179 * IPv4 (127.0.0.1) & IPv6 (::1) addresses automatically!
180 */
181 strcpy(ifr.ifr_name, ifname);
182 if (ioctl(sock, SIOCGIFFLAGS, &ifr) < 0) {
183 pwarn("ioctl(SIOCGIFFLAGS) failed");
184 return -1;
185 }
186
187 /* The kernel preserves ifr.ifr_name for use. */
188 ifr.ifr_flags |= IFF_UP | IFF_RUNNING;
189 if (ioctl(sock, SIOCSIFFLAGS, &ifr) < 0) {
190 pwarn("ioctl(SIOCSIFFLAGS) failed");
191 return -1;
192 }
193
194 close(sock);
195 return 0;
196}
197
198int setup_pipe_end(int fds[2], size_t index)
199{
200 if (index > 1)
201 return -1;
202
203 close(fds[1 - index]);
204 return fds[index];
205}
206
207int setup_and_dupe_pipe_end(int fds[2], size_t index, int fd)
208{
209 if (index > 1)
210 return -1;
211
212 close(fds[1 - index]);
213 /* dup2(2) the corresponding end of the pipe into |fd|. */
214 return dup2(fds[index], fd);
215}
216
217int write_pid_to_path(pid_t pid, const char *path)
218{
Mike Frysinger0b5cffa2017-08-15 18:06:18 -0400219 FILE *fp = fopen(path, "we");
Jorge Lucangeli Obes0b208772017-04-19 14:15:46 -0400220
221 if (!fp) {
222 pwarn("failed to open '%s'", path);
223 return -errno;
224 }
225 if (fprintf(fp, "%d\n", (int)pid) < 0) {
226 /* fprintf(3) does not set errno on failure. */
227 warn("fprintf(%s) failed", path);
228 return -1;
229 }
230 if (fclose(fp)) {
231 pwarn("fclose(%s) failed", path);
232 return -errno;
233 }
234
235 return 0;
236}
237
238/*
Mike Frysinger5fdba4e2018-01-17 15:39:48 -0500239 * Create the |path| directory and its parents (if need be) with |mode|.
240 * If not |isdir|, then |path| is actually a file, so the last component
241 * will not be created.
242 */
243int mkdir_p(const char *path, mode_t mode, bool isdir)
244{
yusukes059e0bd2018-03-05 10:22:16 -0800245 int rc;
Mike Frysinger5fdba4e2018-01-17 15:39:48 -0500246 char *dir = strdup(path);
yusukes059e0bd2018-03-05 10:22:16 -0800247 if (!dir) {
248 rc = errno;
249 pwarn("strdup(%s) failed", path);
250 return -rc;
251 }
Mike Frysinger5fdba4e2018-01-17 15:39:48 -0500252
253 /* Starting from the root, work our way out to the end. */
254 char *p = strchr(dir + 1, '/');
255 while (p) {
256 *p = '\0';
257 if (mkdir(dir, mode) && errno != EEXIST) {
yusukes059e0bd2018-03-05 10:22:16 -0800258 rc = errno;
259 pwarn("mkdir(%s, 0%o) failed", dir, mode);
Mike Frysinger5fdba4e2018-01-17 15:39:48 -0500260 free(dir);
yusukes059e0bd2018-03-05 10:22:16 -0800261 return -rc;
Mike Frysinger5fdba4e2018-01-17 15:39:48 -0500262 }
263 *p = '/';
264 p = strchr(p + 1, '/');
265 }
266
267 /*
268 * Create the last directory. We still check EEXIST here in case
269 * of trailing slashes.
270 */
271 free(dir);
yusukes059e0bd2018-03-05 10:22:16 -0800272 if (isdir && mkdir(path, mode) && errno != EEXIST) {
273 rc = errno;
274 pwarn("mkdir(%s, 0%o) failed", path, mode);
275 return -rc;
276 }
Mike Frysinger5fdba4e2018-01-17 15:39:48 -0500277 return 0;
278}
279
280/*
Jorge Lucangeli Obes0b208772017-04-19 14:15:46 -0400281 * setup_mount_destination: Ensures the mount target exists.
282 * Creates it if needed and possible.
283 */
284int setup_mount_destination(const char *source, const char *dest, uid_t uid,
Luis Hector Chavez0bacbf82018-07-10 20:06:55 -0700285 uid_t gid, bool bind, unsigned long *mnt_flags)
Jorge Lucangeli Obes0b208772017-04-19 14:15:46 -0400286{
287 int rc;
288 struct stat st_buf;
Mike Frysingereaab4202017-08-14 14:57:21 -0400289 bool domkdir;
Jorge Lucangeli Obes0b208772017-04-19 14:15:46 -0400290
291 rc = stat(dest, &st_buf);
292 if (rc == 0) /* destination exists */
293 return 0;
294
295 /*
296 * Try to create the destination.
297 * Either make a directory or touch a file depending on the source type.
Mike Frysingereaab4202017-08-14 14:57:21 -0400298 *
299 * If the source isn't an absolute path, assume it is a filesystem type
300 * such as "tmpfs" and create a directory to mount it on. The dest will
301 * be something like "none" or "proc" which we shouldn't be checking.
Jorge Lucangeli Obes0b208772017-04-19 14:15:46 -0400302 */
Mike Frysingereaab4202017-08-14 14:57:21 -0400303 if (source[0] == '/') {
304 /* The source is an absolute path -- it better exist! */
305 rc = stat(source, &st_buf);
yusukes059e0bd2018-03-05 10:22:16 -0800306 if (rc) {
307 rc = errno;
308 pwarn("stat(%s) failed", source);
309 return -rc;
310 }
Mike Frysingereaab4202017-08-14 14:57:21 -0400311
312 /*
313 * If bind mounting, we only create a directory if the source
314 * is a directory, else we always bind mount it as a file to
315 * support device nodes, sockets, etc...
316 *
317 * For all other mounts, we assume a block/char source is
318 * going to want a directory to mount to. If the source is
319 * something else (e.g. a fifo or socket), this probably will
320 * not do the right thing, but we'll fail later on when we try
321 * to mount(), so shouldn't be a big deal.
322 */
323 domkdir = S_ISDIR(st_buf.st_mode) ||
324 (!bind && (S_ISBLK(st_buf.st_mode) ||
325 S_ISCHR(st_buf.st_mode)));
Luis Hector Chavez0bacbf82018-07-10 20:06:55 -0700326
327 /* If bind mounting, also grab the mount flags of the source. */
328 if (bind && mnt_flags) {
329 struct statvfs stvfs_buf;
330 rc = statvfs(source, &stvfs_buf);
331 if (rc) {
332 rc = errno;
333 pwarn(
334 "failed to look up mount flags: source=%s",
335 source);
336 return -rc;
337 }
338 *mnt_flags = stvfs_buf.f_flag;
339 }
Mike Frysingereaab4202017-08-14 14:57:21 -0400340 } else {
341 /* The source is a relative path -- assume it's a pseudo fs. */
342
343 /* Disallow relative bind mounts. */
yusukes059e0bd2018-03-05 10:22:16 -0800344 if (bind) {
345 warn("relative bind-mounts are not allowed: source=%s",
346 source);
Mike Frysingereaab4202017-08-14 14:57:21 -0400347 return -EINVAL;
yusukes059e0bd2018-03-05 10:22:16 -0800348 }
Mike Frysingereaab4202017-08-14 14:57:21 -0400349
350 domkdir = true;
351 }
352
Mike Frysinger5fdba4e2018-01-17 15:39:48 -0500353 /*
354 * Now that we know what we want to do, do it!
355 * We always create the intermediate dirs and the final path with 0755
356 * perms and root/root ownership. This shouldn't be a problem because
357 * the actual mount will set those perms/ownership on the mount point
358 * which is all people should need to access it.
359 */
yusukes059e0bd2018-03-05 10:22:16 -0800360 rc = mkdir_p(dest, 0755, domkdir);
361 if (rc)
362 return rc;
Mike Frysinger5fdba4e2018-01-17 15:39:48 -0500363 if (!domkdir) {
Mike Frysingereaab4202017-08-14 14:57:21 -0400364 int fd = open(dest, O_RDWR | O_CREAT | O_CLOEXEC, 0700);
yusukes059e0bd2018-03-05 10:22:16 -0800365 if (fd < 0) {
366 rc = errno;
367 pwarn("open(%s) failed", dest);
368 return -rc;
369 }
Jorge Lucangeli Obes0b208772017-04-19 14:15:46 -0400370 close(fd);
371 }
yusukes059e0bd2018-03-05 10:22:16 -0800372 if (chown(dest, uid, gid)) {
373 rc = errno;
374 pwarn("chown(%s, %u, %u) failed", dest, uid, gid);
375 return -rc;
376 }
yusukes76a9d742018-03-05 10:20:22 -0800377 return 0;
Jorge Lucangeli Obes0b208772017-04-19 14:15:46 -0400378}
Luis Hector Chavez71323552017-09-05 09:17:22 -0700379
380/*
381 * lookup_user: Gets the uid/gid for the given username.
382 */
383int lookup_user(const char *user, uid_t *uid, gid_t *gid)
384{
385 char *buf = NULL;
386 struct passwd pw;
387 struct passwd *ppw = NULL;
388 ssize_t sz = sysconf(_SC_GETPW_R_SIZE_MAX);
389 if (sz == -1)
390 sz = 65536; /* your guess is as good as mine... */
391
392 /*
393 * sysconf(_SC_GETPW_R_SIZE_MAX), under glibc, is documented to return
394 * the maximum needed size of the buffer, so we don't have to search.
395 */
396 buf = malloc(sz);
397 if (!buf)
398 return -ENOMEM;
399 getpwnam_r(user, &pw, buf, sz, &ppw);
400 /*
401 * We're safe to free the buffer here. The strings inside |pw| point
402 * inside |buf|, but we don't use any of them; this leaves the pointers
403 * dangling but it's safe. |ppw| points at |pw| if getpwnam_r(3)
404 * succeeded.
405 */
406 free(buf);
407 /* getpwnam_r(3) does *not* set errno when |ppw| is NULL. */
408 if (!ppw)
409 return -1;
410
411 *uid = ppw->pw_uid;
412 *gid = ppw->pw_gid;
413 return 0;
414}
415
416/*
417 * lookup_group: Gets the gid for the given group name.
418 */
419int lookup_group(const char *group, gid_t *gid)
420{
421 char *buf = NULL;
422 struct group gr;
423 struct group *pgr = NULL;
424 ssize_t sz = sysconf(_SC_GETGR_R_SIZE_MAX);
425 if (sz == -1)
426 sz = 65536; /* and mine is as good as yours, really */
427
428 /*
429 * sysconf(_SC_GETGR_R_SIZE_MAX), under glibc, is documented to return
430 * the maximum needed size of the buffer, so we don't have to search.
431 */
432 buf = malloc(sz);
433 if (!buf)
434 return -ENOMEM;
435 getgrnam_r(group, &gr, buf, sz, &pgr);
436 /*
437 * We're safe to free the buffer here. The strings inside gr point
438 * inside buf, but we don't use any of them; this leaves the pointers
439 * dangling but it's safe. pgr points at gr if getgrnam_r succeeded.
440 */
441 free(buf);
442 /* getgrnam_r(3) does *not* set errno when |pgr| is NULL. */
443 if (!pgr)
444 return -1;
445
446 *gid = pgr->gr_gid;
447 return 0;
448}