blob: 938a8d710cde82d5b6515de47c10f2dca8b10795 [file] [log] [blame]
Elly Jonescd7a9042011-07-22 13:56:51 -04001/* Copyright (c) 2011 The Chromium OS Authors. All rights reserved.
2 * Use of this source code is governed by a BSD-style license that can be
Will Drewry32ac9f52011-08-18 21:36:27 -05003 * found in the LICENSE file.
4 */
Elly Jonescd7a9042011-07-22 13:56:51 -04005
6#define _BSD_SOURCE
7#define _GNU_SOURCE
Will Drewry32ac9f52011-08-18 21:36:27 -05008#include <ctype.h>
Elly Jonescd7a9042011-07-22 13:56:51 -04009#include <errno.h>
10#include <grp.h>
11#include <inttypes.h>
Will Drewryfe4a3722011-09-16 14:50:50 -050012#include <limits.h>
Elly Jonescd7a9042011-07-22 13:56:51 -040013#include <linux/capability.h>
14#include <linux/securebits.h>
15#include <pwd.h>
16#include <sched.h>
17#include <signal.h>
Will Drewry2f54b6a2011-09-16 13:45:31 -050018#include <stdarg.h>
Elly Jonescd7a9042011-07-22 13:56:51 -040019#include <stdio.h>
20#include <stdlib.h>
21#include <string.h>
22#include <syscall.h>
23#include <sys/capability.h>
24#include <sys/mount.h>
25#include <sys/prctl.h>
26#include <sys/wait.h>
27#include <syslog.h>
28#include <unistd.h>
29
30#include "libminijail.h"
Will Drewry32ac9f52011-08-18 21:36:27 -050031#include "libsyscalls.h"
Elly Jonescd7a9042011-07-22 13:56:51 -040032#include "libminijail-private.h"
33
Will Drewry32ac9f52011-08-18 21:36:27 -050034/* Until these are reliably available in linux/prctl.h */
35#ifndef PR_SET_SECCOMP_FILTER
36# define PR_SECCOMP_FILTER_SYSCALL 0
37# define PR_SECCOMP_FILTER_EVENT 1
38# define PR_GET_SECCOMP_FILTER 35
39# define PR_SET_SECCOMP_FILTER 36
40# define PR_CLEAR_SECCOMP_FILTER 37
41#endif
42
Will Drewry32ac9f52011-08-18 21:36:27 -050043#define die(_msg, ...) do { \
44 syslog(LOG_ERR, "libminijail: " _msg, ## __VA_ARGS__); \
45 abort(); \
46} while (0)
Elly Jonescd7a9042011-07-22 13:56:51 -040047
Will Drewry32ac9f52011-08-18 21:36:27 -050048#define pdie(_msg, ...) \
49 die(_msg ": %s", ## __VA_ARGS__, strerror(errno))
50
51#define warn(_msg, ...) \
52 syslog(LOG_WARNING, "libminijail: " _msg, ## __VA_ARGS__)
Elly Jonescd7a9042011-07-22 13:56:51 -040053
54struct minijail *minijail_new(void) {
55 struct minijail *j = malloc(sizeof(*j));
56 if (j)
57 memset(j, 0, sizeof(*j));
58 return j;
59}
60
61void minijail_change_uid(struct minijail *j, uid_t uid) {
62 if (uid == 0)
63 die("useless change to uid 0");
64 j->uid = uid;
65 j->flags.uid = 1;
66}
67
68void minijail_change_gid(struct minijail *j, gid_t gid) {
69 if (gid == 0)
70 die("useless change to gid 0");
71 j->gid = gid;
72 j->flags.gid = 1;
73}
74
75int minijail_change_user(struct minijail *j, const char *user) {
76 /* In principle this should use getpwnam(), but:
77 * 1) getpwnam_r() isn't actually reentrant anyway, since it uses a
78 * statically-allocated file descriptor internally
79 * 2) fgetpwnam() (by analogy with fgetpwent) would solve (1) except that it
80 * doesn't exist
81 * 3) sysconf() (see getpwnam_r(3)) is allowed to return a size that is not
82 * large enough, which means having to loop on growing the buffer we pass
83 * in
84 */
85 struct passwd *pw = getpwnam(user);
86 if (!pw)
87 return errno;
88 minijail_change_uid(j, pw->pw_uid);
Will Drewry2ddaad02011-09-16 11:36:08 -050089 j->user = strdup(user);
90 if (!j->user)
91 return -ENOMEM;
Elly Jonescd7a9042011-07-22 13:56:51 -040092 j->usergid = pw->pw_gid;
93 return 0;
94}
95
96int minijail_change_group(struct minijail *j, const char *group) {
97 /* In principle this should use getgrnam(), but:
98 * 1) getgrnam_r() isn't actually reentrant anyway, since it uses a
99 * statically-allocated file descriptor internally
100 * 2) fgetgrnam() (by analogy with fgetgrent) would solve (1) except that it
101 * doesn't exist
102 * 3) sysconf() (see getgrnam_r(3)) is allowed to return a size that is not
103 * large enough, which means having to loop on growing the buffer we pass
104 * in
105 */
106 struct group *gr = getgrnam(group);
107 if (!gr)
108 return errno;
109 minijail_change_gid(j, gr->gr_gid);
110 return 0;
111}
112
113void minijail_use_seccomp(struct minijail *j) {
114 j->flags.seccomp = 1;
115}
116
Will Drewry32ac9f52011-08-18 21:36:27 -0500117void minijail_use_seccomp_filter(struct minijail *j) {
118 j->flags.seccomp_filter = 1;
119}
120
Elly Jonescd7a9042011-07-22 13:56:51 -0400121void minijail_use_caps(struct minijail *j, uint64_t capmask) {
122 j->caps = capmask;
123 j->flags.caps = 1;
124}
125
126void minijail_namespace_vfs(struct minijail *j) {
127 j->flags.vfs = 1;
128}
129
130void minijail_namespace_pids(struct minijail *j) {
131 j->flags.pids = 1;
132}
133
134void minijail_remount_readonly(struct minijail *j) {
135 j->flags.vfs = 1;
136 j->flags.readonly = 1;
137}
138
139void minijail_inherit_usergroups(struct minijail *j) {
140 j->flags.usergroups = 1;
141}
142
143void minijail_disable_ptrace(struct minijail *j) {
144 j->flags.ptrace = 1;
145}
146
Will Drewry32ac9f52011-08-18 21:36:27 -0500147int minijail_add_seccomp_filter(struct minijail *j, int nr,
148 const char *filter) {
149 struct seccomp_filter *sf;
150 if (!filter || nr < 0)
151 return -EINVAL;
152
153 sf = malloc(sizeof(*sf));
154 if (!sf)
155 return -ENOMEM;
156 sf->nr = nr;
157 sf->filter = strndup(filter, MINIJAIL_MAX_SECCOMP_FILTER_LINE);
158 if (!sf->filter) {
159 free(sf);
160 return -ENOMEM;
161 }
162
163 if (!j->filters) {
164 j->filters = sf;
165 sf->next = sf;
166 sf->prev = sf;
167 return 0;
168 }
169 sf->next = j->filters;
170 sf->prev = j->filters->prev;
171 sf->prev->next = sf;
172 j->filters->prev = sf;
173 return 0;
174}
175
176int minijail_lookup_syscall(const char *name) {
177 const struct syscall_entry *entry = syscall_table;
178 for (; entry->name && entry->nr >= 0; ++entry)
179 if (!strcmp(entry->name, name))
180 return entry->nr;
181 return -1;
182}
183
184static char *strip(char *s) {
185 char *end;
186 while (*s && isblank(*s))
187 s++;
188 end = s + strlen(s) - 1;
189 while (*end && (isblank(*end) || *end == '\n'))
190 end--;
191 *(end+1) = '\0';
192 return s;
193}
194
195void minijail_parse_seccomp_filters(struct minijail *j, const char *path) {
196 FILE *file = fopen(path, "r");
197 char line[MINIJAIL_MAX_SECCOMP_FILTER_LINE];
198 int count = 1;
199 if (!file)
200 pdie("failed to open seccomp filters file");
201
202 /* Format is simple:
203 * syscall_name<COLON><FILTER STRING>[\n|EOF]
204 * #...comment...
205 * <empty line?
206 */
207 while (fgets(line, sizeof(line), file)) {
208 char *filter = line;
209 char *name = strsep(&filter, ":");
210 char *name_end = NULL;
211 int nr = -1;
212
213 if (!name)
214 die("invalid filter on line %d", count);
215
216 name = strip(name);
217
218 if (!filter) {
219 if (strlen(name))
220 die("invalid filter on line %d", count);
221 /* Allow empty lines */
222 continue;
223 }
224
225 /* Allow comment lines */
226 if (*name == '#')
227 continue;
228
229 filter = strip(filter);
230
231 /* Take direct syscall numbers */
232 nr = strtol(name, &name_end, 0);
233 /* Or fail-over to using names */
234 if (*name_end != '\0')
235 nr = minijail_lookup_syscall(name);
236 if (nr < 0)
237 die("syscall '%s' unknown", name);
238
239 if (minijail_add_seccomp_filter(j, nr, filter))
240 pdie("failed to add filter for syscall '%s'", name);
241 }
242 fclose(file);
243}
244
Will Drewry2ddaad02011-09-16 11:36:08 -0500245size_t minijail_size(const struct minijail *j) {
246 size_t bytes = sizeof(*j);
247 if (j->user)
248 bytes += strlen(j->user) + 1;
249 /* TODO(wad) if (seccomp_filter) */
250 return bytes;
251}
252
Will Drewry2ddaad02011-09-16 11:36:08 -0500253int minijail_marshal(const struct minijail *j, char *buf, size_t available) {
254 size_t total = sizeof(*j);
255 if (available < total)
256 return -ENOSPC;
257 available -= total;
258 memcpy(buf, (void *) j, sizeof(*j));
259 if (j->user) {
260 size_t len = strlen(j->user) + 1;
261 if (available < len)
262 return -ENOSPC;
263 memcpy(buf + total, j->user, len);
264 available -= len;
265 total += len;
266 }
267 return 0;
268}
269
270int minijail_unmarshal(struct minijail *j, char *serialized, size_t length) {
271 if (length < sizeof(*j))
272 return -EINVAL;
273 memcpy((void *) j, serialized, sizeof(*j));
274 serialized += sizeof(*j);
275 length -= sizeof(*j);
276 if (j->user) { /* stale pointer */
277 if (!length)
278 return -EINVAL;
279 j->user = strndup(serialized, length);
280 length -= strlen(j->user) + 1;
281 }
282 return 0;
283}
284
Will Drewryfe4a3722011-09-16 14:50:50 -0500285void minijail_preenter(struct minijail *j) {
286 /* Strip out options which are minijail_run() only. */
287 j->flags.vfs = 0;
288 j->flags.readonly = 0;
289 j->flags.pids = 0;
290}
291
292void minijail_preexec(struct minijail *j) {
293 int vfs = j->flags.vfs;
294 int readonly = j->flags.readonly;
Will Drewry2ddaad02011-09-16 11:36:08 -0500295 if (j->user)
296 free(j->user);
297 j->user = NULL;
Will Drewryfe4a3722011-09-16 14:50:50 -0500298
299 memset(&j->flags, 0, sizeof(j->flags));
300 /* Now restore anything we meant to keep. */
301 j->flags.vfs = vfs;
302 j->flags.readonly = readonly;
303 /* Note, pidns will already have been used before this call. */
Will Drewry2ddaad02011-09-16 11:36:08 -0500304}
305
Elly Jonescd7a9042011-07-22 13:56:51 -0400306static int remount_readonly(void) {
307 const char *kProcPath = "/proc";
308 const unsigned int kSafeFlags = MS_NODEV | MS_NOEXEC | MS_NOSUID;
309 /* Right now, we're holding a reference to our parent's old mount of /proc in
310 * our namespace, which means using MS_REMOUNT here would mutate our parent's
311 * mount as well, even though we're in a VFS namespace (!). Instead, remove
312 * their mount from our namespace and make our own. */
313 if (umount(kProcPath))
314 return errno;
315 if (mount("", kProcPath, "proc", kSafeFlags | MS_RDONLY, ""))
316 return errno;
317 return 0;
318}
319
320static void drop_caps(const struct minijail *j) {
321 cap_t caps = cap_get_proc();
322 cap_value_t raise_flag[1];
323 unsigned int i;
324 if (!caps)
325 die("can't get process caps");
326 if (cap_clear_flag(caps, CAP_INHERITABLE))
327 die("can't clear inheritable caps");
328 if (cap_clear_flag(caps, CAP_EFFECTIVE))
329 die("can't clear effective caps");
330 if (cap_clear_flag(caps, CAP_PERMITTED))
331 die("can't clear permitted caps");
332 for (i = 0; i < sizeof(j->caps) * 8 && cap_valid((int)i); ++i) {
333 if (i != CAP_SETPCAP && !(j->caps & (1 << i)))
334 continue;
335 raise_flag[0] = i;
336 if (cap_set_flag(caps, CAP_EFFECTIVE, 1, raise_flag, CAP_SET))
337 die("can't add effective cap");
338 if (cap_set_flag(caps, CAP_PERMITTED, 1, raise_flag, CAP_SET))
339 die("can't add permitted cap");
340 if (cap_set_flag(caps, CAP_INHERITABLE, 1, raise_flag, CAP_SET))
341 die("can't add inheritable cap");
342 }
343 if (cap_set_proc(caps))
344 die("can't apply cleaned capset");
345 cap_free(caps);
346 for (i = 0; i < sizeof(j->caps) * 8 && cap_valid((int)i); ++i) {
347 if (j->caps & (1 << i))
348 continue;
349 if (prctl(PR_CAPBSET_DROP, i))
350 pdie("prctl(PR_CAPBSET_DROP)");
351 }
352}
353
Will Drewry32ac9f52011-08-18 21:36:27 -0500354static int setup_seccomp_filters(const struct minijail *j) {
355 const struct seccomp_filter *sf = j->filters;
356 int ret = 0;
357 int broaden = 0;
358
359 /* No filters installed isn't necessarily an error. */
360 if (!sf)
361 return ret;
362
363 do {
364 errno = 0;
365 ret = prctl(PR_SET_SECCOMP_FILTER, PR_SECCOMP_FILTER_SYSCALL,
366 sf->nr, broaden ? "1" : sf->filter);
367 if (ret) {
368 switch (errno) {
369 case ENOSYS:
370 /* TODO(wad) make this a config option */
371 if (broaden)
372 die("CONFIG_SECCOMP_FILTER is not supported by your kernel");
373 warn("missing CONFIG_FTRACE_SYSCALLS; relaxing the filter for %d",
374 sf->nr);
375 broaden = 1;
376 continue;
377 case E2BIG:
378 warn("seccomp filter too long: %d", sf->nr);
379 pdie("filter too long");
380 case ENOSPC:
381 pdie("too many seccomp filters");
382 case EPERM:
383 warn("syscall filter disallowed for %d", sf->nr);
384 pdie("failed to install seccomp filter");
385 case EINVAL:
386 warn("seccomp filter or call method is invalid. %d:'%s'",
387 sf->nr, sf->filter);
388 default:
389 pdie("failed to install seccomp filter");
390 }
391 }
392 sf = sf->next;
393 broaden = 0;
394 } while (sf != j->filters);
395 return ret;
396}
397
Elly Jonescd7a9042011-07-22 13:56:51 -0400398void minijail_enter(const struct minijail *j) {
Will Drewry32ac9f52011-08-18 21:36:27 -0500399 int ret;
Elly Jonescd7a9042011-07-22 13:56:51 -0400400 if (j->flags.pids)
401 die("tried to enter a pid-namespaced jail; try minijail_run()?");
402
Will Drewry32ac9f52011-08-18 21:36:27 -0500403 ret = setup_seccomp_filters(j);
404 if (j->flags.seccomp_filter && ret)
405 die("failed to configure seccomp filters");
406
Elly Jonescd7a9042011-07-22 13:56:51 -0400407 if (j->flags.usergroups && !j->user)
408 die("usergroup inheritance without username");
409
410 /* We can't recover from failures if we've dropped privileges partially,
411 * so we don't even try. If any of our operations fail, we abort() the
412 * entire process. */
413 if (j->flags.vfs && unshare(CLONE_NEWNS))
414 pdie("unshare");
415
416 if (j->flags.readonly && remount_readonly())
417 pdie("remount");
418
419 if (j->flags.caps) {
420 /* POSIX capabilities are a bit tricky. If we drop our capability to change
421 * uids, our attempt to use setuid() below will fail. Hang on to root caps
422 * across setuid(), then lock securebits. */
423 if (prctl(PR_SET_KEEPCAPS, 1))
424 pdie("prctl(PR_SET_KEEPCAPS)");
425 if (prctl(PR_SET_SECUREBITS, SECURE_ALL_BITS | SECURE_ALL_LOCKS))
426 pdie("prctl(PR_SET_SECUREBITS)");
427 }
428
Will Drewry32ac9f52011-08-18 21:36:27 -0500429 if (j->flags.usergroups && initgroups(j->user, j->usergid)) {
Elly Jonescd7a9042011-07-22 13:56:51 -0400430 pdie("initgroups");
Will Drewry32ac9f52011-08-18 21:36:27 -0500431 } else if (!j->flags.usergroups && setgroups(0, NULL)) {
Elly Jonescd7a9042011-07-22 13:56:51 -0400432 pdie("setgroups");
Will Drewry32ac9f52011-08-18 21:36:27 -0500433 }
Elly Jonescd7a9042011-07-22 13:56:51 -0400434
435 if (j->flags.gid && setresgid(j->gid, j->gid, j->gid))
436 pdie("setresgid");
437
438 if (j->flags.uid && setresuid(j->uid, j->uid, j->uid))
439 pdie("setresuid");
440
441 if (j->flags.caps)
442 drop_caps(j);
443
444 /* seccomp has to come last since it cuts off all the other
445 * privilege-dropping syscalls :) */
Will Drewry32ac9f52011-08-18 21:36:27 -0500446 if (j->flags.seccomp_filter && prctl(PR_SET_SECCOMP, 13))
447 pdie("prctl(PR_SET_SECCOMP, 13)");
448
Elly Jonescd7a9042011-07-22 13:56:51 -0400449 if (j->flags.seccomp && prctl(PR_SET_SECCOMP, 1))
450 pdie("prctl(PR_SET_SECCOMP)");
451}
452
453static int init_exitstatus = 0;
454
455static void init_term(int __attribute__((unused)) sig) {
456 _exit(init_exitstatus);
457}
458
459static int init(pid_t rootpid) {
460 pid_t pid;
461 int status;
462 signal(SIGTERM, init_term); /* so that we exit with the right status */
463 while ((pid = wait(&status)) > 0) {
464 /* This loop will only end when either there are no processes left inside
465 * our pid namespace or we get a signal. */
466 if (pid == rootpid)
467 init_exitstatus = status;
468 }
469 if (!WIFEXITED(init_exitstatus))
470 _exit(MINIJAIL_ERR_INIT);
471 _exit(WEXITSTATUS(init_exitstatus));
472}
473
Will Drewryfe4a3722011-09-16 14:50:50 -0500474int minijail_from_fd(int fd, struct minijail *j) {
475 size_t sz = 0;
476 size_t bytes = read(fd, &sz, sizeof(sz));
477 char *buf;
Will Drewry2f54b6a2011-09-16 13:45:31 -0500478 int r;
Will Drewryfe4a3722011-09-16 14:50:50 -0500479 if (sizeof(sz) != bytes)
480 return -EINVAL;
481 if (sz > USHRT_MAX) /* Arbitrary sanity check */
Will Drewry2f54b6a2011-09-16 13:45:31 -0500482 return -E2BIG;
Will Drewryfe4a3722011-09-16 14:50:50 -0500483 buf = malloc(sz);
484 if (!buf)
485 return -ENOMEM;
486 bytes = read(fd, buf, sz);
487 if (bytes != sz) {
488 free(buf);
489 return -EINVAL;
490 }
491 r = minijail_unmarshal(j, buf, sz);
492 free(buf);
493 return r;
Will Drewry2f54b6a2011-09-16 13:45:31 -0500494}
495
Will Drewryfe4a3722011-09-16 14:50:50 -0500496int minijail_to_fd(struct minijail *j, int fd) {
497 char *buf;
498 size_t sz = minijail_size(j);
499 ssize_t written;
500 int r;
Elly Jonescd7a9042011-07-22 13:56:51 -0400501
Will Drewry32ac9f52011-08-18 21:36:27 -0500502 if (j->flags.seccomp_filter)
503 warn("TODO(wad) seccomp_filter is installed in the parent which "
504 "requires overly permissive rules for execve(2)ing.");
Will Drewryfe4a3722011-09-16 14:50:50 -0500505 if (!sz)
506 return -EINVAL;
507 buf = malloc(sz);
508 if ((r = minijail_marshal(j, buf, sz))) {
509 free(buf);
510 return r;
511 }
512 /* Sends [size][minijail]. */
513 written = write(fd, &sz, sizeof(sz));
514 if (written != sizeof(sz)) {
515 free(buf);
516 return -EFAULT;
517 }
518 written = write(fd, buf, sz);
519 if (written < 0 || (size_t) written != sz) {
520 free(buf);
521 return -EFAULT;
522 }
523 free(buf);
524 return 0;
Will Drewry2f54b6a2011-09-16 13:45:31 -0500525}
Elly Jonescd7a9042011-07-22 13:56:51 -0400526
Will Drewry2f54b6a2011-09-16 13:45:31 -0500527static int setup_preload(void) {
528 char *oldenv = getenv(kLdPreloadEnvVar) ? : "";
529 char *newenv = malloc(strlen(oldenv) + 2 + strlen(PRELOADPATH));
530 if (!newenv)
Elly Jonescd7a9042011-07-22 13:56:51 -0400531 return -ENOMEM;
Elly Jonescd7a9042011-07-22 13:56:51 -0400532
533 /* Only insert a separating space if we have something to separate... */
534 sprintf(newenv, "%s%s%s", oldenv, strlen(oldenv) ? " " : "", PRELOADPATH);
535
536 /* setenv() makes a copy of the string we give it */
Ben Chan541c7e52011-08-26 14:55:53 -0700537 setenv(kLdPreloadEnvVar, newenv, 1);
Elly Jonescd7a9042011-07-22 13:56:51 -0400538 free(newenv);
Elly Jonescd7a9042011-07-22 13:56:51 -0400539 return 0;
540}
541
542int minijail_run(struct minijail *j, const char *filename, char *const argv[]) {
543 unsigned int pidns = j->flags.pids ? CLONE_NEWPID : 0;
Ben Chan541c7e52011-08-26 14:55:53 -0700544 char *oldenv, *oldenv_copy = NULL;
Elly Jonescd7a9042011-07-22 13:56:51 -0400545 pid_t r;
Will Drewry2f54b6a2011-09-16 13:45:31 -0500546 int pipe_fds[2];
547 char fd_buf[11];
Ben Chan541c7e52011-08-26 14:55:53 -0700548
549 oldenv = getenv(kLdPreloadEnvVar);
550 if (oldenv) {
551 oldenv_copy = strdup(oldenv);
552 if (!oldenv_copy)
553 return -ENOMEM;
554 }
Will Drewry2f54b6a2011-09-16 13:45:31 -0500555 r = setup_preload();
556 if (r)
Elly Jonescd7a9042011-07-22 13:56:51 -0400557 return r;
Will Drewry2f54b6a2011-09-16 13:45:31 -0500558
559 /* Before we fork(2) and execve(2) the child process, we need to open
560 * a pipe(2) to send the minijail configuration over.
561 */
562 r = pipe(pipe_fds);
563 if (r)
564 return r;
565 r = snprintf(fd_buf, sizeof(fd_buf), "%d", pipe_fds[0]);
566 if (r <= 0)
567 return -EINVAL;
568 setenv(kFdEnvVar, fd_buf, 1);
Elly Jonescd7a9042011-07-22 13:56:51 -0400569
570 r = syscall(SYS_clone, pidns | SIGCHLD, NULL);
571 if (r > 0) {
Ben Chan541c7e52011-08-26 14:55:53 -0700572 if (oldenv_copy) {
573 setenv(kLdPreloadEnvVar, oldenv_copy, 1);
574 free(oldenv_copy);
575 } else {
576 unsetenv(kLdPreloadEnvVar);
577 }
Will Drewry2f54b6a2011-09-16 13:45:31 -0500578 unsetenv(kFdEnvVar);
Elly Jonescd7a9042011-07-22 13:56:51 -0400579 j->initpid = r;
Will Drewry2f54b6a2011-09-16 13:45:31 -0500580 close(pipe_fds[0]);
Will Drewryfe4a3722011-09-16 14:50:50 -0500581 r = minijail_to_fd(j, pipe_fds[1]);
Will Drewry2f54b6a2011-09-16 13:45:31 -0500582 close(pipe_fds[1]);
583 if (r) {
584 kill(j->initpid, SIGKILL);
585 die("failed to send marshalled minijail");
586 }
Elly Jonescd7a9042011-07-22 13:56:51 -0400587 return 0;
588 }
Ben Chan541c7e52011-08-26 14:55:53 -0700589
590 free(oldenv_copy);
591
Elly Jonescd7a9042011-07-22 13:56:51 -0400592 if (r < 0)
593 return r;
594
Will Drewryfe4a3722011-09-16 14:50:50 -0500595 /* Drop everything that cannot be inherited across execve. */
596 minijail_preexec(j);
Elly Jonescd7a9042011-07-22 13:56:51 -0400597
598 /* Jail this process and its descendants... */
599 minijail_enter(j);
600
601 if (pidns) {
602 /* pid namespace: this process will become init inside the new namespace, so
603 * fork off a child to actually run the program (we don't want all programs
604 * we might exec to have to know how to be init). */
605 r = fork();
606 if (r < 0)
607 _exit(r);
608 else if (r > 0)
609 init(r); /* never returns */
610 }
611
Will Drewry2f54b6a2011-09-16 13:45:31 -0500612
Elly Jonescd7a9042011-07-22 13:56:51 -0400613 /* If we aren't pid-namespaced:
614 * calling process
615 * -> execve()-ing process
616 * If we are:
617 * calling process
618 * -> init()-ing process
619 * -> execve()-ing process
620 */
621 _exit(execve(filename, argv, environ));
622}
623
624int minijail_kill(struct minijail *j) {
625 int st;
626 if (kill(j->initpid, SIGTERM))
627 return errno;
628 if (waitpid(j->initpid, &st, 0) < 0)
629 return errno;
630 return st;
631}
632
633int minijail_wait(struct minijail *j) {
634 int st;
635 if (waitpid(j->initpid, &st, 0) < 0)
636 return errno;
637 if (!WIFEXITED(st))
638 return MINIJAIL_ERR_JAIL;
639 return WEXITSTATUS(st);
640}
641
642void minijail_destroy(struct minijail *j) {
Will Drewry32ac9f52011-08-18 21:36:27 -0500643 struct seccomp_filter *f = j->filters;
644 /* Unlink the tail and head */
645 if (f)
646 f->prev->next = NULL;
647 while (f) {
648 struct seccomp_filter *next = f->next;
649 free(f->filter);
650 free(f);
651 f = next;
652 }
Will Drewry2ddaad02011-09-16 11:36:08 -0500653 if (j->user)
654 free(j->user);
Elly Jonescd7a9042011-07-22 13:56:51 -0400655 free(j);
656}