blob: 62306d9e20d2f4522081067dd5cd0a393b290238 [file] [log] [blame]
Elly Jonescd7a9042011-07-22 13:56:51 -04001/* Copyright (c) 2011 The Chromium OS Authors. All rights reserved.
2 * Use of this source code is governed by a BSD-style license that can be
Will Drewry32ac9f52011-08-18 21:36:27 -05003 * found in the LICENSE file.
4 */
Elly Jonescd7a9042011-07-22 13:56:51 -04005
6#define _BSD_SOURCE
7#define _GNU_SOURCE
Will Drewry32ac9f52011-08-18 21:36:27 -05008#include <ctype.h>
Elly Jonescd7a9042011-07-22 13:56:51 -04009#include <errno.h>
10#include <grp.h>
11#include <inttypes.h>
Will Drewryfe4a3722011-09-16 14:50:50 -050012#include <limits.h>
Elly Jonescd7a9042011-07-22 13:56:51 -040013#include <linux/capability.h>
14#include <linux/securebits.h>
15#include <pwd.h>
16#include <sched.h>
17#include <signal.h>
Will Drewry2f54b6a2011-09-16 13:45:31 -050018#include <stdarg.h>
Elly Jonescd7a9042011-07-22 13:56:51 -040019#include <stdio.h>
20#include <stdlib.h>
21#include <string.h>
22#include <syscall.h>
23#include <sys/capability.h>
24#include <sys/mount.h>
Will Drewryf89aef52011-09-16 16:48:57 -050025#include <sys/param.h>
Elly Jonescd7a9042011-07-22 13:56:51 -040026#include <sys/prctl.h>
27#include <sys/wait.h>
28#include <syslog.h>
29#include <unistd.h>
30
31#include "libminijail.h"
Will Drewry32ac9f52011-08-18 21:36:27 -050032#include "libsyscalls.h"
Elly Jonescd7a9042011-07-22 13:56:51 -040033#include "libminijail-private.h"
34
Will Drewry32ac9f52011-08-18 21:36:27 -050035/* Until these are reliably available in linux/prctl.h */
36#ifndef PR_SET_SECCOMP_FILTER
37# define PR_SECCOMP_FILTER_SYSCALL 0
38# define PR_SECCOMP_FILTER_EVENT 1
39# define PR_GET_SECCOMP_FILTER 35
40# define PR_SET_SECCOMP_FILTER 36
41# define PR_CLEAR_SECCOMP_FILTER 37
42#endif
43
Will Drewry32ac9f52011-08-18 21:36:27 -050044#define die(_msg, ...) do { \
45 syslog(LOG_ERR, "libminijail: " _msg, ## __VA_ARGS__); \
46 abort(); \
47} while (0)
Elly Jonescd7a9042011-07-22 13:56:51 -040048
Will Drewry32ac9f52011-08-18 21:36:27 -050049#define pdie(_msg, ...) \
50 die(_msg ": %s", ## __VA_ARGS__, strerror(errno))
51
52#define warn(_msg, ...) \
53 syslog(LOG_WARNING, "libminijail: " _msg, ## __VA_ARGS__)
Elly Jonescd7a9042011-07-22 13:56:51 -040054
Will Drewryf89aef52011-09-16 16:48:57 -050055struct seccomp_filter {
56 int nr;
57 char *filter;
58 struct seccomp_filter *next, *prev;
59};
60
61struct minijail {
62 struct {
63 int uid : 1;
64 int gid : 1;
65 int caps : 1;
66 int vfs : 1;
67 int pids : 1;
68 int seccomp : 1;
69 int readonly : 1;
70 int usergroups : 1;
71 int ptrace : 1;
72 int seccomp_filter : 1;
73 } flags;
74 uid_t uid;
75 gid_t gid;
76 gid_t usergid;
77 char *user;
78 uint64_t caps;
79 pid_t initpid;
80 int filter_count;
81 struct seccomp_filter *filters;
82};
83
Elly Jonescd7a9042011-07-22 13:56:51 -040084struct minijail *minijail_new(void) {
85 struct minijail *j = malloc(sizeof(*j));
86 if (j)
87 memset(j, 0, sizeof(*j));
88 return j;
89}
90
91void minijail_change_uid(struct minijail *j, uid_t uid) {
92 if (uid == 0)
93 die("useless change to uid 0");
94 j->uid = uid;
95 j->flags.uid = 1;
96}
97
98void minijail_change_gid(struct minijail *j, gid_t gid) {
99 if (gid == 0)
100 die("useless change to gid 0");
101 j->gid = gid;
102 j->flags.gid = 1;
103}
104
105int minijail_change_user(struct minijail *j, const char *user) {
106 /* In principle this should use getpwnam(), but:
107 * 1) getpwnam_r() isn't actually reentrant anyway, since it uses a
108 * statically-allocated file descriptor internally
109 * 2) fgetpwnam() (by analogy with fgetpwent) would solve (1) except that it
110 * doesn't exist
111 * 3) sysconf() (see getpwnam_r(3)) is allowed to return a size that is not
112 * large enough, which means having to loop on growing the buffer we pass
113 * in
114 */
115 struct passwd *pw = getpwnam(user);
116 if (!pw)
117 return errno;
118 minijail_change_uid(j, pw->pw_uid);
Will Drewry2ddaad02011-09-16 11:36:08 -0500119 j->user = strdup(user);
120 if (!j->user)
121 return -ENOMEM;
Elly Jonescd7a9042011-07-22 13:56:51 -0400122 j->usergid = pw->pw_gid;
123 return 0;
124}
125
126int minijail_change_group(struct minijail *j, const char *group) {
127 /* In principle this should use getgrnam(), but:
128 * 1) getgrnam_r() isn't actually reentrant anyway, since it uses a
129 * statically-allocated file descriptor internally
130 * 2) fgetgrnam() (by analogy with fgetgrent) would solve (1) except that it
131 * doesn't exist
132 * 3) sysconf() (see getgrnam_r(3)) is allowed to return a size that is not
133 * large enough, which means having to loop on growing the buffer we pass
134 * in
135 */
136 struct group *gr = getgrnam(group);
137 if (!gr)
138 return errno;
139 minijail_change_gid(j, gr->gr_gid);
140 return 0;
141}
142
143void minijail_use_seccomp(struct minijail *j) {
144 j->flags.seccomp = 1;
145}
146
Will Drewry32ac9f52011-08-18 21:36:27 -0500147void minijail_use_seccomp_filter(struct minijail *j) {
148 j->flags.seccomp_filter = 1;
149}
150
Elly Jonescd7a9042011-07-22 13:56:51 -0400151void minijail_use_caps(struct minijail *j, uint64_t capmask) {
152 j->caps = capmask;
153 j->flags.caps = 1;
154}
155
156void minijail_namespace_vfs(struct minijail *j) {
157 j->flags.vfs = 1;
158}
159
160void minijail_namespace_pids(struct minijail *j) {
161 j->flags.pids = 1;
162}
163
164void minijail_remount_readonly(struct minijail *j) {
165 j->flags.vfs = 1;
166 j->flags.readonly = 1;
167}
168
169void minijail_inherit_usergroups(struct minijail *j) {
170 j->flags.usergroups = 1;
171}
172
173void minijail_disable_ptrace(struct minijail *j) {
174 j->flags.ptrace = 1;
175}
176
Will Drewry32ac9f52011-08-18 21:36:27 -0500177int minijail_add_seccomp_filter(struct minijail *j, int nr,
178 const char *filter) {
179 struct seccomp_filter *sf;
180 if (!filter || nr < 0)
181 return -EINVAL;
182
183 sf = malloc(sizeof(*sf));
184 if (!sf)
185 return -ENOMEM;
186 sf->nr = nr;
187 sf->filter = strndup(filter, MINIJAIL_MAX_SECCOMP_FILTER_LINE);
188 if (!sf->filter) {
189 free(sf);
190 return -ENOMEM;
191 }
192
Will Drewryf89aef52011-09-16 16:48:57 -0500193 j->filter_count++;
194
Will Drewry32ac9f52011-08-18 21:36:27 -0500195 if (!j->filters) {
196 j->filters = sf;
197 sf->next = sf;
198 sf->prev = sf;
199 return 0;
200 }
201 sf->next = j->filters;
202 sf->prev = j->filters->prev;
203 sf->prev->next = sf;
204 j->filters->prev = sf;
205 return 0;
206}
207
208int minijail_lookup_syscall(const char *name) {
209 const struct syscall_entry *entry = syscall_table;
210 for (; entry->name && entry->nr >= 0; ++entry)
211 if (!strcmp(entry->name, name))
212 return entry->nr;
213 return -1;
214}
215
216static char *strip(char *s) {
217 char *end;
218 while (*s && isblank(*s))
219 s++;
220 end = s + strlen(s) - 1;
221 while (*end && (isblank(*end) || *end == '\n'))
222 end--;
223 *(end+1) = '\0';
224 return s;
225}
226
227void minijail_parse_seccomp_filters(struct minijail *j, const char *path) {
228 FILE *file = fopen(path, "r");
229 char line[MINIJAIL_MAX_SECCOMP_FILTER_LINE];
230 int count = 1;
231 if (!file)
232 pdie("failed to open seccomp filters file");
233
234 /* Format is simple:
235 * syscall_name<COLON><FILTER STRING>[\n|EOF]
236 * #...comment...
237 * <empty line?
238 */
239 while (fgets(line, sizeof(line), file)) {
240 char *filter = line;
241 char *name = strsep(&filter, ":");
242 char *name_end = NULL;
243 int nr = -1;
244
245 if (!name)
246 die("invalid filter on line %d", count);
247
248 name = strip(name);
249
250 if (!filter) {
251 if (strlen(name))
252 die("invalid filter on line %d", count);
253 /* Allow empty lines */
254 continue;
255 }
256
257 /* Allow comment lines */
258 if (*name == '#')
259 continue;
260
261 filter = strip(filter);
262
263 /* Take direct syscall numbers */
264 nr = strtol(name, &name_end, 0);
265 /* Or fail-over to using names */
266 if (*name_end != '\0')
267 nr = minijail_lookup_syscall(name);
268 if (nr < 0)
269 die("syscall '%s' unknown", name);
270
271 if (minijail_add_seccomp_filter(j, nr, filter))
272 pdie("failed to add filter for syscall '%s'", name);
273 }
274 fclose(file);
275}
276
Will Drewryf89aef52011-09-16 16:48:57 -0500277struct marshal_state {
278 size_t available;
279 size_t total;
280 char *buf;
281};
282
283static void marshal_state_init(struct marshal_state *state,
284 char *buf,
285 size_t available) {
286 state->available = available;
287 state->buf = buf;
288 state->total = 0;
289}
290
291static void marshal_append(struct marshal_state *state,
292 char *src,
293 size_t length) {
294 size_t copy_len = MIN(state->available, length);
295
296 /* Up to |available| will be written. */
297 if (copy_len) {
298 memcpy(state->buf, src, copy_len);
299 state->buf += copy_len;
300 state->available -= copy_len;
301 }
302 /* |total| will contain the expected length. */
303 state->total += length;
304}
305
306static void minijail_marshal_helper(struct marshal_state *state,
307 const struct minijail *j) {
308 marshal_append(state, (char *) j, sizeof(*j));
Will Drewry2ddaad02011-09-16 11:36:08 -0500309 if (j->user)
Will Drewryf89aef52011-09-16 16:48:57 -0500310 marshal_append(state, j->user, strlen(j->user) + 1);
311 if (j->flags.seccomp_filter && j->filters) {
312 struct seccomp_filter *f = j->filters;
313 do {
314 marshal_append(state, (char *) &f->nr, sizeof(f->nr));
315 marshal_append(state, f->filter, strlen(f->filter) + 1);
316 f = f->next;
317 } while (f != j->filters);
318 }
319}
320
321size_t minijail_size(const struct minijail *j) {
322 struct marshal_state state;
323 marshal_state_init(&state, NULL, 0);
324 minijail_marshal_helper(&state, j);
325 return state.total;
Will Drewry2ddaad02011-09-16 11:36:08 -0500326}
327
Will Drewry2ddaad02011-09-16 11:36:08 -0500328int minijail_marshal(const struct minijail *j, char *buf, size_t available) {
Will Drewryf89aef52011-09-16 16:48:57 -0500329 struct marshal_state state;
330 marshal_state_init(&state, buf, available);
331 minijail_marshal_helper(&state, j);
332 return (state.total > available);
Will Drewry2ddaad02011-09-16 11:36:08 -0500333}
334
335int minijail_unmarshal(struct minijail *j, char *serialized, size_t length) {
336 if (length < sizeof(*j))
337 return -EINVAL;
338 memcpy((void *) j, serialized, sizeof(*j));
339 serialized += sizeof(*j);
340 length -= sizeof(*j);
Will Drewryf89aef52011-09-16 16:48:57 -0500341
Will Drewry2ddaad02011-09-16 11:36:08 -0500342 if (j->user) { /* stale pointer */
343 if (!length)
344 return -EINVAL;
345 j->user = strndup(serialized, length);
346 length -= strlen(j->user) + 1;
Will Drewryf89aef52011-09-16 16:48:57 -0500347 serialized += strlen(j->user) + 1;
348 }
349
350 if (j->flags.seccomp_filter && j->filter_count) {
351 int count = j->filter_count;
352 /* Let add_seccomp_filter recompute the value. */
353 j->filter_count = 0;
354 j->filters = NULL; /* Don't follow the stale pointer. */
355 for ( ; count > 0; --count) {
356 int *nr = (int *) serialized;
357 char *filter;
358 if (length < sizeof(*nr))
359 return -EINVAL;
360 length -= sizeof(*nr);
361 serialized += sizeof(*nr);
362 if (!length)
363 return -EINVAL;
364 filter = serialized;
365 if (minijail_add_seccomp_filter(j, *nr, filter))
366 return -EINVAL;
367 length -= strlen(filter) + 1;
368 serialized += strlen(filter) + 1;
369 }
Will Drewry2ddaad02011-09-16 11:36:08 -0500370 }
371 return 0;
372}
373
Will Drewryfe4a3722011-09-16 14:50:50 -0500374void minijail_preenter(struct minijail *j) {
375 /* Strip out options which are minijail_run() only. */
376 j->flags.vfs = 0;
377 j->flags.readonly = 0;
378 j->flags.pids = 0;
379}
380
381void minijail_preexec(struct minijail *j) {
382 int vfs = j->flags.vfs;
383 int readonly = j->flags.readonly;
Will Drewry2ddaad02011-09-16 11:36:08 -0500384 if (j->user)
385 free(j->user);
386 j->user = NULL;
Will Drewryfe4a3722011-09-16 14:50:50 -0500387 memset(&j->flags, 0, sizeof(j->flags));
388 /* Now restore anything we meant to keep. */
389 j->flags.vfs = vfs;
390 j->flags.readonly = readonly;
391 /* Note, pidns will already have been used before this call. */
Will Drewry2ddaad02011-09-16 11:36:08 -0500392}
393
Elly Jonescd7a9042011-07-22 13:56:51 -0400394static int remount_readonly(void) {
395 const char *kProcPath = "/proc";
396 const unsigned int kSafeFlags = MS_NODEV | MS_NOEXEC | MS_NOSUID;
397 /* Right now, we're holding a reference to our parent's old mount of /proc in
398 * our namespace, which means using MS_REMOUNT here would mutate our parent's
399 * mount as well, even though we're in a VFS namespace (!). Instead, remove
400 * their mount from our namespace and make our own. */
401 if (umount(kProcPath))
402 return errno;
403 if (mount("", kProcPath, "proc", kSafeFlags | MS_RDONLY, ""))
404 return errno;
405 return 0;
406}
407
408static void drop_caps(const struct minijail *j) {
409 cap_t caps = cap_get_proc();
410 cap_value_t raise_flag[1];
411 unsigned int i;
412 if (!caps)
413 die("can't get process caps");
414 if (cap_clear_flag(caps, CAP_INHERITABLE))
415 die("can't clear inheritable caps");
416 if (cap_clear_flag(caps, CAP_EFFECTIVE))
417 die("can't clear effective caps");
418 if (cap_clear_flag(caps, CAP_PERMITTED))
419 die("can't clear permitted caps");
420 for (i = 0; i < sizeof(j->caps) * 8 && cap_valid((int)i); ++i) {
421 if (i != CAP_SETPCAP && !(j->caps & (1 << i)))
422 continue;
423 raise_flag[0] = i;
424 if (cap_set_flag(caps, CAP_EFFECTIVE, 1, raise_flag, CAP_SET))
425 die("can't add effective cap");
426 if (cap_set_flag(caps, CAP_PERMITTED, 1, raise_flag, CAP_SET))
427 die("can't add permitted cap");
428 if (cap_set_flag(caps, CAP_INHERITABLE, 1, raise_flag, CAP_SET))
429 die("can't add inheritable cap");
430 }
431 if (cap_set_proc(caps))
432 die("can't apply cleaned capset");
433 cap_free(caps);
434 for (i = 0; i < sizeof(j->caps) * 8 && cap_valid((int)i); ++i) {
435 if (j->caps & (1 << i))
436 continue;
437 if (prctl(PR_CAPBSET_DROP, i))
438 pdie("prctl(PR_CAPBSET_DROP)");
439 }
440}
441
Will Drewry32ac9f52011-08-18 21:36:27 -0500442static int setup_seccomp_filters(const struct minijail *j) {
443 const struct seccomp_filter *sf = j->filters;
444 int ret = 0;
445 int broaden = 0;
446
447 /* No filters installed isn't necessarily an error. */
448 if (!sf)
449 return ret;
450
451 do {
452 errno = 0;
453 ret = prctl(PR_SET_SECCOMP_FILTER, PR_SECCOMP_FILTER_SYSCALL,
454 sf->nr, broaden ? "1" : sf->filter);
455 if (ret) {
456 switch (errno) {
457 case ENOSYS:
458 /* TODO(wad) make this a config option */
459 if (broaden)
460 die("CONFIG_SECCOMP_FILTER is not supported by your kernel");
461 warn("missing CONFIG_FTRACE_SYSCALLS; relaxing the filter for %d",
462 sf->nr);
463 broaden = 1;
464 continue;
465 case E2BIG:
466 warn("seccomp filter too long: %d", sf->nr);
467 pdie("filter too long");
468 case ENOSPC:
469 pdie("too many seccomp filters");
470 case EPERM:
471 warn("syscall filter disallowed for %d", sf->nr);
472 pdie("failed to install seccomp filter");
473 case EINVAL:
474 warn("seccomp filter or call method is invalid. %d:'%s'",
475 sf->nr, sf->filter);
476 default:
477 pdie("failed to install seccomp filter");
478 }
479 }
480 sf = sf->next;
481 broaden = 0;
482 } while (sf != j->filters);
483 return ret;
484}
485
Elly Jonescd7a9042011-07-22 13:56:51 -0400486void minijail_enter(const struct minijail *j) {
487 if (j->flags.pids)
488 die("tried to enter a pid-namespaced jail; try minijail_run()?");
489
Will Drewryf89aef52011-09-16 16:48:57 -0500490 if (j->flags.seccomp_filter && setup_seccomp_filters(j))
491 pdie("failed to configure seccomp filters");
Will Drewry32ac9f52011-08-18 21:36:27 -0500492
Elly Jonescd7a9042011-07-22 13:56:51 -0400493 if (j->flags.usergroups && !j->user)
494 die("usergroup inheritance without username");
495
496 /* We can't recover from failures if we've dropped privileges partially,
497 * so we don't even try. If any of our operations fail, we abort() the
498 * entire process. */
499 if (j->flags.vfs && unshare(CLONE_NEWNS))
500 pdie("unshare");
501
502 if (j->flags.readonly && remount_readonly())
503 pdie("remount");
504
505 if (j->flags.caps) {
506 /* POSIX capabilities are a bit tricky. If we drop our capability to change
507 * uids, our attempt to use setuid() below will fail. Hang on to root caps
508 * across setuid(), then lock securebits. */
509 if (prctl(PR_SET_KEEPCAPS, 1))
510 pdie("prctl(PR_SET_KEEPCAPS)");
511 if (prctl(PR_SET_SECUREBITS, SECURE_ALL_BITS | SECURE_ALL_LOCKS))
512 pdie("prctl(PR_SET_SECUREBITS)");
513 }
514
Will Drewry32ac9f52011-08-18 21:36:27 -0500515 if (j->flags.usergroups && initgroups(j->user, j->usergid)) {
Elly Jonescd7a9042011-07-22 13:56:51 -0400516 pdie("initgroups");
Will Drewry32ac9f52011-08-18 21:36:27 -0500517 } else if (!j->flags.usergroups && setgroups(0, NULL)) {
Elly Jonescd7a9042011-07-22 13:56:51 -0400518 pdie("setgroups");
Will Drewry32ac9f52011-08-18 21:36:27 -0500519 }
Elly Jonescd7a9042011-07-22 13:56:51 -0400520
521 if (j->flags.gid && setresgid(j->gid, j->gid, j->gid))
522 pdie("setresgid");
523
524 if (j->flags.uid && setresuid(j->uid, j->uid, j->uid))
525 pdie("setresuid");
526
527 if (j->flags.caps)
528 drop_caps(j);
529
530 /* seccomp has to come last since it cuts off all the other
531 * privilege-dropping syscalls :) */
Will Drewry32ac9f52011-08-18 21:36:27 -0500532 if (j->flags.seccomp_filter && prctl(PR_SET_SECCOMP, 13))
533 pdie("prctl(PR_SET_SECCOMP, 13)");
534
Elly Jonescd7a9042011-07-22 13:56:51 -0400535 if (j->flags.seccomp && prctl(PR_SET_SECCOMP, 1))
536 pdie("prctl(PR_SET_SECCOMP)");
537}
538
539static int init_exitstatus = 0;
540
541static void init_term(int __attribute__((unused)) sig) {
542 _exit(init_exitstatus);
543}
544
545static int init(pid_t rootpid) {
546 pid_t pid;
547 int status;
548 signal(SIGTERM, init_term); /* so that we exit with the right status */
Will Drewryf89aef52011-09-16 16:48:57 -0500549 /* TODO(wad) self jail with seccomp_filters here. */
Elly Jonescd7a9042011-07-22 13:56:51 -0400550 while ((pid = wait(&status)) > 0) {
551 /* This loop will only end when either there are no processes left inside
552 * our pid namespace or we get a signal. */
553 if (pid == rootpid)
554 init_exitstatus = status;
555 }
556 if (!WIFEXITED(init_exitstatus))
557 _exit(MINIJAIL_ERR_INIT);
558 _exit(WEXITSTATUS(init_exitstatus));
559}
560
Will Drewryfe4a3722011-09-16 14:50:50 -0500561int minijail_from_fd(int fd, struct minijail *j) {
562 size_t sz = 0;
563 size_t bytes = read(fd, &sz, sizeof(sz));
564 char *buf;
Will Drewry2f54b6a2011-09-16 13:45:31 -0500565 int r;
Will Drewryfe4a3722011-09-16 14:50:50 -0500566 if (sizeof(sz) != bytes)
567 return -EINVAL;
568 if (sz > USHRT_MAX) /* Arbitrary sanity check */
Will Drewry2f54b6a2011-09-16 13:45:31 -0500569 return -E2BIG;
Will Drewryfe4a3722011-09-16 14:50:50 -0500570 buf = malloc(sz);
571 if (!buf)
572 return -ENOMEM;
573 bytes = read(fd, buf, sz);
574 if (bytes != sz) {
575 free(buf);
576 return -EINVAL;
577 }
578 r = minijail_unmarshal(j, buf, sz);
579 free(buf);
580 return r;
Will Drewry2f54b6a2011-09-16 13:45:31 -0500581}
582
Will Drewryfe4a3722011-09-16 14:50:50 -0500583int minijail_to_fd(struct minijail *j, int fd) {
584 char *buf;
585 size_t sz = minijail_size(j);
586 ssize_t written;
587 int r;
Elly Jonescd7a9042011-07-22 13:56:51 -0400588
Will Drewryfe4a3722011-09-16 14:50:50 -0500589 if (!sz)
590 return -EINVAL;
591 buf = malloc(sz);
592 if ((r = minijail_marshal(j, buf, sz))) {
593 free(buf);
594 return r;
595 }
596 /* Sends [size][minijail]. */
597 written = write(fd, &sz, sizeof(sz));
598 if (written != sizeof(sz)) {
599 free(buf);
600 return -EFAULT;
601 }
602 written = write(fd, buf, sz);
603 if (written < 0 || (size_t) written != sz) {
604 free(buf);
605 return -EFAULT;
606 }
607 free(buf);
608 return 0;
Will Drewry2f54b6a2011-09-16 13:45:31 -0500609}
Elly Jonescd7a9042011-07-22 13:56:51 -0400610
Will Drewry2f54b6a2011-09-16 13:45:31 -0500611static int setup_preload(void) {
612 char *oldenv = getenv(kLdPreloadEnvVar) ? : "";
613 char *newenv = malloc(strlen(oldenv) + 2 + strlen(PRELOADPATH));
614 if (!newenv)
Elly Jonescd7a9042011-07-22 13:56:51 -0400615 return -ENOMEM;
Elly Jonescd7a9042011-07-22 13:56:51 -0400616
617 /* Only insert a separating space if we have something to separate... */
618 sprintf(newenv, "%s%s%s", oldenv, strlen(oldenv) ? " " : "", PRELOADPATH);
619
620 /* setenv() makes a copy of the string we give it */
Ben Chan541c7e52011-08-26 14:55:53 -0700621 setenv(kLdPreloadEnvVar, newenv, 1);
Elly Jonescd7a9042011-07-22 13:56:51 -0400622 free(newenv);
Elly Jonescd7a9042011-07-22 13:56:51 -0400623 return 0;
624}
625
Will Drewryf89aef52011-09-16 16:48:57 -0500626static int setup_pipe(int fds[2]) {
627 int r = pipe(fds);
628 char fd_buf[11];
629 if (r)
630 return r;
631 r = snprintf(fd_buf, sizeof(fd_buf), "%d", fds[0]);
632 if (r <= 0)
633 return -EINVAL;
634 setenv(kFdEnvVar, fd_buf, 1);
635 return 0;
636}
637
Elly Jonescd7a9042011-07-22 13:56:51 -0400638int minijail_run(struct minijail *j, const char *filename, char *const argv[]) {
639 unsigned int pidns = j->flags.pids ? CLONE_NEWPID : 0;
Ben Chan541c7e52011-08-26 14:55:53 -0700640 char *oldenv, *oldenv_copy = NULL;
Will Drewryf89aef52011-09-16 16:48:57 -0500641 pid_t child_pid;
Will Drewry2f54b6a2011-09-16 13:45:31 -0500642 int pipe_fds[2];
Will Drewryf89aef52011-09-16 16:48:57 -0500643 int ret;
Ben Chan541c7e52011-08-26 14:55:53 -0700644
645 oldenv = getenv(kLdPreloadEnvVar);
646 if (oldenv) {
647 oldenv_copy = strdup(oldenv);
648 if (!oldenv_copy)
649 return -ENOMEM;
650 }
Will Drewryf89aef52011-09-16 16:48:57 -0500651
652 if (setup_preload())
653 return -EFAULT;
Will Drewry2f54b6a2011-09-16 13:45:31 -0500654
655 /* Before we fork(2) and execve(2) the child process, we need to open
656 * a pipe(2) to send the minijail configuration over.
657 */
Will Drewryf89aef52011-09-16 16:48:57 -0500658 if (setup_pipe(pipe_fds))
659 return -EFAULT;
Elly Jonescd7a9042011-07-22 13:56:51 -0400660
Will Drewryf89aef52011-09-16 16:48:57 -0500661 child_pid = syscall(SYS_clone, pidns | SIGCHLD, NULL);
662 if (child_pid < 0) {
663 free(oldenv_copy);
664 return child_pid;
665 }
666
667 if (child_pid) {
668 /* Restore parent's LD_PRELOAD. */
Ben Chan541c7e52011-08-26 14:55:53 -0700669 if (oldenv_copy) {
670 setenv(kLdPreloadEnvVar, oldenv_copy, 1);
671 free(oldenv_copy);
672 } else {
673 unsetenv(kLdPreloadEnvVar);
674 }
Will Drewry2f54b6a2011-09-16 13:45:31 -0500675 unsetenv(kFdEnvVar);
Will Drewryf89aef52011-09-16 16:48:57 -0500676 j->initpid = child_pid;
677 close(pipe_fds[0]); /* read endpoint */
678 ret = minijail_to_fd(j, pipe_fds[1]);
679 close(pipe_fds[1]); /* write endpoint */
680 if (ret) {
Will Drewry2f54b6a2011-09-16 13:45:31 -0500681 kill(j->initpid, SIGKILL);
682 die("failed to send marshalled minijail");
683 }
Elly Jonescd7a9042011-07-22 13:56:51 -0400684 return 0;
685 }
Ben Chan541c7e52011-08-26 14:55:53 -0700686 free(oldenv_copy);
687
Will Drewryfe4a3722011-09-16 14:50:50 -0500688 /* Drop everything that cannot be inherited across execve. */
689 minijail_preexec(j);
Elly Jonescd7a9042011-07-22 13:56:51 -0400690 /* Jail this process and its descendants... */
691 minijail_enter(j);
692
693 if (pidns) {
694 /* pid namespace: this process will become init inside the new namespace, so
695 * fork off a child to actually run the program (we don't want all programs
696 * we might exec to have to know how to be init). */
Will Drewryf89aef52011-09-16 16:48:57 -0500697 child_pid = fork();
698 if (child_pid < 0)
699 _exit(child_pid);
700 else if (child_pid > 0)
701 init(child_pid); /* never returns */
Elly Jonescd7a9042011-07-22 13:56:51 -0400702 }
703
704 /* If we aren't pid-namespaced:
705 * calling process
706 * -> execve()-ing process
707 * If we are:
708 * calling process
709 * -> init()-ing process
710 * -> execve()-ing process
711 */
712 _exit(execve(filename, argv, environ));
713}
714
715int minijail_kill(struct minijail *j) {
716 int st;
717 if (kill(j->initpid, SIGTERM))
718 return errno;
719 if (waitpid(j->initpid, &st, 0) < 0)
720 return errno;
721 return st;
722}
723
724int minijail_wait(struct minijail *j) {
725 int st;
726 if (waitpid(j->initpid, &st, 0) < 0)
727 return errno;
728 if (!WIFEXITED(st))
729 return MINIJAIL_ERR_JAIL;
730 return WEXITSTATUS(st);
731}
732
733void minijail_destroy(struct minijail *j) {
Will Drewry32ac9f52011-08-18 21:36:27 -0500734 struct seccomp_filter *f = j->filters;
735 /* Unlink the tail and head */
736 if (f)
737 f->prev->next = NULL;
738 while (f) {
739 struct seccomp_filter *next = f->next;
740 free(f->filter);
741 free(f);
742 f = next;
743 }
Will Drewry2ddaad02011-09-16 11:36:08 -0500744 if (j->user)
745 free(j->user);
Elly Jonescd7a9042011-07-22 13:56:51 -0400746 free(j);
747}