blob: fdf6f29ca5f3b2e92f908ba5f2537517d66f9def [file] [log] [blame]
Elly Jonesdd3e8512012-01-23 15:13:38 -05001/*
2 * Copyright (c) 2012 The Chromium OS Authors. All rights reserved.
Elly Jonescd7a9042011-07-22 13:56:51 -04003 * Use of this source code is governed by a BSD-style license that can be
Will Drewry32ac9f52011-08-18 21:36:27 -05004 * found in the LICENSE file.
5 */
Elly Jonescd7a9042011-07-22 13:56:51 -04006
7#define _BSD_SOURCE
8#define _GNU_SOURCE
Jorge Lucangeli Obesc2c9bcc2012-05-01 09:30:24 -07009
Jorge Lucangeli Obes524c0402012-01-17 11:30:23 -080010#include <asm/unistd.h>
Will Drewry32ac9f52011-08-18 21:36:27 -050011#include <ctype.h>
Elly Jonescd7a9042011-07-22 13:56:51 -040012#include <errno.h>
13#include <grp.h>
14#include <inttypes.h>
Will Drewryfe4a3722011-09-16 14:50:50 -050015#include <limits.h>
Elly Jonescd7a9042011-07-22 13:56:51 -040016#include <linux/capability.h>
17#include <linux/securebits.h>
18#include <pwd.h>
19#include <sched.h>
20#include <signal.h>
Will Drewry2f54b6a2011-09-16 13:45:31 -050021#include <stdarg.h>
Jorge Lucangeli Obes524c0402012-01-17 11:30:23 -080022#include <stddef.h>
Elly Jonescd7a9042011-07-22 13:56:51 -040023#include <stdio.h>
24#include <stdlib.h>
25#include <string.h>
26#include <syscall.h>
27#include <sys/capability.h>
28#include <sys/mount.h>
Will Drewryf89aef52011-09-16 16:48:57 -050029#include <sys/param.h>
Elly Jonescd7a9042011-07-22 13:56:51 -040030#include <sys/prctl.h>
Jorge Lucangeli Obes524c0402012-01-17 11:30:23 -080031#include <sys/user.h>
Elly Jonescd7a9042011-07-22 13:56:51 -040032#include <sys/wait.h>
Elly Jonescd7a9042011-07-22 13:56:51 -040033#include <unistd.h>
34
35#include "libminijail.h"
36#include "libminijail-private.h"
37
Jorge Lucangeli Obesbda833c2012-07-31 16:25:56 -070038#include "signal.h"
Jorge Lucangeli Obes524c0402012-01-17 11:30:23 -080039#include "syscall_filter.h"
Jorge Lucangeli Obesa6b034d2012-08-07 15:29:20 -070040#include "util.h"
Jorge Lucangeli Obes524c0402012-01-17 11:30:23 -080041
Will Drewry32ac9f52011-08-18 21:36:27 -050042/* Until these are reliably available in linux/prctl.h */
Jorge Lucangeli Obes524c0402012-01-17 11:30:23 -080043#ifndef PR_SET_SECCOMP
Jorge Lucangeli Obesc2c9bcc2012-05-01 09:30:24 -070044# define PR_SET_SECCOMP 22
Jorge Lucangeli Obes524c0402012-01-17 11:30:23 -080045#endif
46
47/* For seccomp_filter using BPF. */
48#ifndef PR_SET_NO_NEW_PRIVS
49# define PR_SET_NO_NEW_PRIVS 38
50#endif
51#ifndef SECCOMP_MODE_FILTER
52# define SECCOMP_MODE_FILTER 2 /* uses user-supplied filter. */
Will Drewry32ac9f52011-08-18 21:36:27 -050053#endif
54
Elly Jones51a5b6c2011-10-12 19:09:26 -040055struct binding {
56 char *src;
57 char *dest;
58 int writeable;
59 struct binding *next;
60};
61
Will Drewryf89aef52011-09-16 16:48:57 -050062struct minijail {
Elly Jonese1749eb2011-10-07 13:54:59 -040063 struct {
64 int uid:1;
65 int gid:1;
66 int caps:1;
67 int vfs:1;
68 int pids:1;
69 int seccomp:1;
70 int readonly:1;
71 int usergroups:1;
72 int ptrace:1;
Jorge Lucangeli Obesc2c9bcc2012-05-01 09:30:24 -070073 int no_new_privs:1;
Elly Jonese1749eb2011-10-07 13:54:59 -040074 int seccomp_filter:1;
Jorge Lucangeli Obesbda833c2012-07-31 16:25:56 -070075 int log_seccomp_filter:1;
Elly Jones51a5b6c2011-10-12 19:09:26 -040076 int chroot:1;
Elly Jonese1749eb2011-10-07 13:54:59 -040077 } flags;
78 uid_t uid;
79 gid_t gid;
80 gid_t usergid;
81 char *user;
82 uint64_t caps;
83 pid_t initpid;
Jorge Lucangeli Obes524c0402012-01-17 11:30:23 -080084 int filter_len;
Elly Jones51a5b6c2011-10-12 19:09:26 -040085 int binding_count;
86 char *chrootdir;
Jorge Lucangeli Obes524c0402012-01-17 11:30:23 -080087 struct sock_fprog *filter_prog;
Elly Jones51a5b6c2011-10-12 19:09:26 -040088 struct binding *bindings_head;
89 struct binding *bindings_tail;
Will Drewryf89aef52011-09-16 16:48:57 -050090};
91
Will Drewry6ac91122011-10-21 16:38:58 -050092struct minijail API *minijail_new(void)
Elly Jonese1749eb2011-10-07 13:54:59 -040093{
Elly Jones51a5b6c2011-10-12 19:09:26 -040094 return calloc(1, sizeof(struct minijail));
Elly Jonescd7a9042011-07-22 13:56:51 -040095}
96
Will Drewry6ac91122011-10-21 16:38:58 -050097void API minijail_change_uid(struct minijail *j, uid_t uid)
Elly Jonese1749eb2011-10-07 13:54:59 -040098{
99 if (uid == 0)
100 die("useless change to uid 0");
101 j->uid = uid;
102 j->flags.uid = 1;
Elly Jonescd7a9042011-07-22 13:56:51 -0400103}
104
Will Drewry6ac91122011-10-21 16:38:58 -0500105void API minijail_change_gid(struct minijail *j, gid_t gid)
Elly Jonese1749eb2011-10-07 13:54:59 -0400106{
107 if (gid == 0)
108 die("useless change to gid 0");
109 j->gid = gid;
110 j->flags.gid = 1;
Elly Jonescd7a9042011-07-22 13:56:51 -0400111}
112
Will Drewry6ac91122011-10-21 16:38:58 -0500113int API minijail_change_user(struct minijail *j, const char *user)
Elly Jonese1749eb2011-10-07 13:54:59 -0400114{
115 char *buf = NULL;
116 struct passwd pw;
117 struct passwd *ppw = NULL;
118 ssize_t sz = sysconf(_SC_GETPW_R_SIZE_MAX);
119 if (sz == -1)
120 sz = 65536; /* your guess is as good as mine... */
Elly Joneseb300c52011-09-22 14:35:43 -0400121
Elly Jonesdd3e8512012-01-23 15:13:38 -0500122 /*
123 * sysconf(_SC_GETPW_R_SIZE_MAX), under glibc, is documented to return
Elly Jonese1749eb2011-10-07 13:54:59 -0400124 * the maximum needed size of the buffer, so we don't have to search.
125 */
126 buf = malloc(sz);
127 if (!buf)
128 return -ENOMEM;
129 getpwnam_r(user, &pw, buf, sz, &ppw);
Elly Jonesdd3e8512012-01-23 15:13:38 -0500130 /*
131 * We're safe to free the buffer here. The strings inside pw point
132 * inside buf, but we don't use any of them; this leaves the pointers
133 * dangling but it's safe. ppw points at pw if getpwnam_r succeeded.
134 */
Elly Jonese1749eb2011-10-07 13:54:59 -0400135 free(buf);
136 if (!ppw)
137 return -errno;
138 minijail_change_uid(j, ppw->pw_uid);
139 j->user = strdup(user);
140 if (!j->user)
141 return -ENOMEM;
142 j->usergid = ppw->pw_gid;
143 return 0;
Elly Jonescd7a9042011-07-22 13:56:51 -0400144}
145
Will Drewry6ac91122011-10-21 16:38:58 -0500146int API minijail_change_group(struct minijail *j, const char *group)
Elly Jonese1749eb2011-10-07 13:54:59 -0400147{
148 char *buf = NULL;
149 struct group gr;
150 struct group *pgr = NULL;
151 ssize_t sz = sysconf(_SC_GETGR_R_SIZE_MAX);
152 if (sz == -1)
153 sz = 65536; /* and mine is as good as yours, really */
Elly Joneseb300c52011-09-22 14:35:43 -0400154
Elly Jonesdd3e8512012-01-23 15:13:38 -0500155 /*
156 * sysconf(_SC_GETGR_R_SIZE_MAX), under glibc, is documented to return
Elly Jonese1749eb2011-10-07 13:54:59 -0400157 * the maximum needed size of the buffer, so we don't have to search.
158 */
159 buf = malloc(sz);
160 if (!buf)
161 return -ENOMEM;
162 getgrnam_r(group, &gr, buf, sz, &pgr);
Elly Jonesdd3e8512012-01-23 15:13:38 -0500163 /*
164 * We're safe to free the buffer here. The strings inside gr point
165 * inside buf, but we don't use any of them; this leaves the pointers
166 * dangling but it's safe. pgr points at gr if getgrnam_r succeeded.
167 */
Elly Jonese1749eb2011-10-07 13:54:59 -0400168 free(buf);
169 if (!pgr)
170 return -errno;
171 minijail_change_gid(j, pgr->gr_gid);
172 return 0;
Elly Jonescd7a9042011-07-22 13:56:51 -0400173}
174
Will Drewry6ac91122011-10-21 16:38:58 -0500175void API minijail_use_seccomp(struct minijail *j)
Elly Jonese1749eb2011-10-07 13:54:59 -0400176{
177 j->flags.seccomp = 1;
Elly Jonescd7a9042011-07-22 13:56:51 -0400178}
179
Jorge Lucangeli Obesc2c9bcc2012-05-01 09:30:24 -0700180void API minijail_no_new_privs(struct minijail *j)
181{
182 j->flags.no_new_privs = 1;
183}
184
Will Drewry6ac91122011-10-21 16:38:58 -0500185void API minijail_use_seccomp_filter(struct minijail *j)
Elly Jonese1749eb2011-10-07 13:54:59 -0400186{
187 j->flags.seccomp_filter = 1;
Will Drewry32ac9f52011-08-18 21:36:27 -0500188}
189
Jorge Lucangeli Obesbda833c2012-07-31 16:25:56 -0700190void API minijail_log_seccomp_filter_failures(struct minijail *j)
191{
192 j->flags.log_seccomp_filter = 1;
193}
194
Will Drewry6ac91122011-10-21 16:38:58 -0500195void API minijail_use_caps(struct minijail *j, uint64_t capmask)
Elly Jonese1749eb2011-10-07 13:54:59 -0400196{
197 j->caps = capmask;
198 j->flags.caps = 1;
Elly Jonescd7a9042011-07-22 13:56:51 -0400199}
200
Will Drewry6ac91122011-10-21 16:38:58 -0500201void API minijail_namespace_vfs(struct minijail *j)
Elly Jonese1749eb2011-10-07 13:54:59 -0400202{
203 j->flags.vfs = 1;
Elly Jonescd7a9042011-07-22 13:56:51 -0400204}
205
Will Drewry6ac91122011-10-21 16:38:58 -0500206void API minijail_namespace_pids(struct minijail *j)
Elly Jonese1749eb2011-10-07 13:54:59 -0400207{
Elly Jonese58176c2012-01-23 11:46:17 -0500208 j->flags.vfs = 1;
209 j->flags.readonly = 1;
Elly Jonese1749eb2011-10-07 13:54:59 -0400210 j->flags.pids = 1;
Elly Jonescd7a9042011-07-22 13:56:51 -0400211}
212
Will Drewry6ac91122011-10-21 16:38:58 -0500213void API minijail_remount_readonly(struct minijail *j)
Elly Jonese1749eb2011-10-07 13:54:59 -0400214{
215 j->flags.vfs = 1;
216 j->flags.readonly = 1;
Elly Jonescd7a9042011-07-22 13:56:51 -0400217}
218
Will Drewry6ac91122011-10-21 16:38:58 -0500219void API minijail_inherit_usergroups(struct minijail *j)
Elly Jonese1749eb2011-10-07 13:54:59 -0400220{
221 j->flags.usergroups = 1;
Elly Jonescd7a9042011-07-22 13:56:51 -0400222}
223
Will Drewry6ac91122011-10-21 16:38:58 -0500224void API minijail_disable_ptrace(struct minijail *j)
Elly Jonese1749eb2011-10-07 13:54:59 -0400225{
226 j->flags.ptrace = 1;
Elly Jonescd7a9042011-07-22 13:56:51 -0400227}
228
Will Drewry6ac91122011-10-21 16:38:58 -0500229int API minijail_enter_chroot(struct minijail *j, const char *dir) {
Elly Jones51a5b6c2011-10-12 19:09:26 -0400230 if (j->chrootdir)
231 return -EINVAL;
232 j->chrootdir = strdup(dir);
233 if (!j->chrootdir)
234 return -ENOMEM;
235 j->flags.chroot = 1;
236 return 0;
237}
238
Will Drewry6ac91122011-10-21 16:38:58 -0500239int API minijail_bind(struct minijail *j, const char *src, const char *dest,
240 int writeable) {
Elly Jones51a5b6c2011-10-12 19:09:26 -0400241 struct binding *b;
242
243 if (*dest != '/')
244 return -EINVAL;
245 b = calloc(1, sizeof(*b));
246 if (!b)
247 return -ENOMEM;
248 b->dest = strdup(dest);
249 if (!b->dest)
250 goto error;
251 b->src = strdup(src);
252 if (!b->src)
253 goto error;
254 b->writeable = writeable;
255
Jorge Lucangeli Obes224e4272012-08-02 14:31:39 -0700256 info("bind %s -> %s", src, dest);
Elly Jones51a5b6c2011-10-12 19:09:26 -0400257
Elly Jonesdd3e8512012-01-23 15:13:38 -0500258 /*
259 * Force vfs namespacing so the bind mounts don't leak out into the
Elly Jones51a5b6c2011-10-12 19:09:26 -0400260 * containing vfs namespace.
261 */
262 minijail_namespace_vfs(j);
263
264 if (j->bindings_tail)
265 j->bindings_tail->next = b;
266 else
267 j->bindings_head = b;
268 j->bindings_tail = b;
269 j->binding_count++;
270
271 return 0;
272
273error:
274 free(b->src);
275 free(b->dest);
276 free(b);
277 return -ENOMEM;
278}
279
Will Drewry6ac91122011-10-21 16:38:58 -0500280void API minijail_parse_seccomp_filters(struct minijail *j, const char *path)
Elly Jonese1749eb2011-10-07 13:54:59 -0400281{
282 FILE *file = fopen(path, "r");
Jorge Lucangeli Obes524c0402012-01-17 11:30:23 -0800283 if (!file) {
Jorge Lucangeli Obes224e4272012-08-02 14:31:39 -0700284 pdie("failed to open seccomp filter file '%s'", path);
Elly Jonese1749eb2011-10-07 13:54:59 -0400285 }
Jorge Lucangeli Obes524c0402012-01-17 11:30:23 -0800286
287 struct sock_fprog *fprog = malloc(sizeof(struct sock_fprog));
Jorge Lucangeli Obesbda833c2012-07-31 16:25:56 -0700288 if (compile_filter(file, fprog, j->flags.log_seccomp_filter)) {
289 die("failed to compile seccomp filter BPF program in '%s'",
290 path);
Jorge Lucangeli Obes524c0402012-01-17 11:30:23 -0800291 }
292
293 j->filter_len = fprog->len;
294 j->filter_prog = fprog;
295
Elly Jonese1749eb2011-10-07 13:54:59 -0400296 fclose(file);
Will Drewry32ac9f52011-08-18 21:36:27 -0500297}
298
Will Drewryf89aef52011-09-16 16:48:57 -0500299struct marshal_state {
Elly Jonese1749eb2011-10-07 13:54:59 -0400300 size_t available;
301 size_t total;
302 char *buf;
Will Drewryf89aef52011-09-16 16:48:57 -0500303};
304
Will Drewry6ac91122011-10-21 16:38:58 -0500305void marshal_state_init(struct marshal_state *state,
306 char *buf, size_t available)
Elly Jonese1749eb2011-10-07 13:54:59 -0400307{
308 state->available = available;
309 state->buf = buf;
310 state->total = 0;
Will Drewryf89aef52011-09-16 16:48:57 -0500311}
312
Will Drewry6ac91122011-10-21 16:38:58 -0500313void marshal_append(struct marshal_state *state,
314 char *src, size_t length)
Elly Jonese1749eb2011-10-07 13:54:59 -0400315{
316 size_t copy_len = MIN(state->available, length);
Will Drewryf89aef52011-09-16 16:48:57 -0500317
Elly Jonese1749eb2011-10-07 13:54:59 -0400318 /* Up to |available| will be written. */
319 if (copy_len) {
320 memcpy(state->buf, src, copy_len);
321 state->buf += copy_len;
322 state->available -= copy_len;
323 }
324 /* |total| will contain the expected length. */
325 state->total += length;
Will Drewryf89aef52011-09-16 16:48:57 -0500326}
327
Will Drewry6ac91122011-10-21 16:38:58 -0500328void minijail_marshal_helper(struct marshal_state *state,
329 const struct minijail *j)
Elly Jonese1749eb2011-10-07 13:54:59 -0400330{
Elly Jones51a5b6c2011-10-12 19:09:26 -0400331 struct binding *b = NULL;
Elly Jonese1749eb2011-10-07 13:54:59 -0400332 marshal_append(state, (char *)j, sizeof(*j));
333 if (j->user)
334 marshal_append(state, j->user, strlen(j->user) + 1);
Elly Jones51a5b6c2011-10-12 19:09:26 -0400335 if (j->chrootdir)
336 marshal_append(state, j->chrootdir, strlen(j->chrootdir) + 1);
Jorge Lucangeli Obes524c0402012-01-17 11:30:23 -0800337 if (j->flags.seccomp_filter && j->filter_prog) {
338 struct sock_fprog *fp = j->filter_prog;
339 marshal_append(state, (char *)fp->filter,
340 fp->len * sizeof(struct sock_filter));
Elly Jonese1749eb2011-10-07 13:54:59 -0400341 }
Elly Jones51a5b6c2011-10-12 19:09:26 -0400342 for (b = j->bindings_head; b; b = b->next) {
343 marshal_append(state, b->src, strlen(b->src) + 1);
344 marshal_append(state, b->dest, strlen(b->dest) + 1);
Jorge Lucangeli Obesbda833c2012-07-31 16:25:56 -0700345 marshal_append(state, (char *)&b->writeable,
346 sizeof(b->writeable));
Elly Jones51a5b6c2011-10-12 19:09:26 -0400347 }
Will Drewryf89aef52011-09-16 16:48:57 -0500348}
349
Will Drewry6ac91122011-10-21 16:38:58 -0500350size_t API minijail_size(const struct minijail *j)
Elly Jonese1749eb2011-10-07 13:54:59 -0400351{
352 struct marshal_state state;
353 marshal_state_init(&state, NULL, 0);
354 minijail_marshal_helper(&state, j);
355 return state.total;
Will Drewry2ddaad02011-09-16 11:36:08 -0500356}
357
Elly Jonese1749eb2011-10-07 13:54:59 -0400358int minijail_marshal(const struct minijail *j, char *buf, size_t available)
359{
360 struct marshal_state state;
361 marshal_state_init(&state, buf, available);
362 minijail_marshal_helper(&state, j);
363 return (state.total > available);
Will Drewry2ddaad02011-09-16 11:36:08 -0500364}
365
Elly Jones51a5b6c2011-10-12 19:09:26 -0400366/* consumebytes: consumes @length bytes from a buffer @buf of length @buflength
367 * @length Number of bytes to consume
368 * @buf Buffer to consume from
369 * @buflength Size of @buf
370 *
371 * Returns a pointer to the base of the bytes, or NULL for errors.
372 */
Will Drewry6ac91122011-10-21 16:38:58 -0500373void *consumebytes(size_t length, char **buf, size_t *buflength) {
Elly Jones51a5b6c2011-10-12 19:09:26 -0400374 char *p = *buf;
375 if (length > *buflength)
376 return NULL;
377 *buf += length;
378 *buflength -= length;
379 return p;
380}
381
382/* consumestr: consumes a C string from a buffer @buf of length @length
383 * @buf Buffer to consume
384 * @length Length of buffer
385 *
386 * Returns a pointer to the base of the string, or NULL for errors.
387 */
Will Drewry6ac91122011-10-21 16:38:58 -0500388char *consumestr(char **buf, size_t *buflength) {
Elly Jones51a5b6c2011-10-12 19:09:26 -0400389 size_t len = strnlen(*buf, *buflength);
390 if (len == *buflength)
391 /* There's no null-terminator */
392 return NULL;
393 return consumebytes(len + 1, buf, buflength);
394}
395
Elly Jonese1749eb2011-10-07 13:54:59 -0400396int minijail_unmarshal(struct minijail *j, char *serialized, size_t length)
397{
Elly Jones51a5b6c2011-10-12 19:09:26 -0400398 int i;
399 int count;
Will Drewrybee7ba72011-10-21 20:47:01 -0500400 int ret = -EINVAL;
401
Elly Jonese1749eb2011-10-07 13:54:59 -0400402 if (length < sizeof(*j))
Will Drewrybee7ba72011-10-21 20:47:01 -0500403 goto out;
Elly Jonese1749eb2011-10-07 13:54:59 -0400404 memcpy((void *)j, serialized, sizeof(*j));
405 serialized += sizeof(*j);
406 length -= sizeof(*j);
Will Drewryf89aef52011-09-16 16:48:57 -0500407
Will Drewrybee7ba72011-10-21 20:47:01 -0500408 /* Potentially stale pointers not used as signals. */
409 j->bindings_head = NULL;
410 j->bindings_tail = NULL;
Jorge Lucangeli Obes524c0402012-01-17 11:30:23 -0800411 j->filter_prog = NULL;
Will Drewrybee7ba72011-10-21 20:47:01 -0500412
Elly Jonese1749eb2011-10-07 13:54:59 -0400413 if (j->user) { /* stale pointer */
Elly Jones51a5b6c2011-10-12 19:09:26 -0400414 char *user = consumestr(&serialized, &length);
415 if (!user)
Will Drewrybee7ba72011-10-21 20:47:01 -0500416 goto clear_pointers;
Elly Jones51a5b6c2011-10-12 19:09:26 -0400417 j->user = strdup(user);
Will Drewrybee7ba72011-10-21 20:47:01 -0500418 if (!j->user)
419 goto clear_pointers;
Elly Jonese1749eb2011-10-07 13:54:59 -0400420 }
Will Drewryf89aef52011-09-16 16:48:57 -0500421
Elly Jonesa8d1e1b2011-10-21 15:38:00 -0400422 if (j->chrootdir) { /* stale pointer */
423 char *chrootdir = consumestr(&serialized, &length);
424 if (!chrootdir)
Will Drewrybee7ba72011-10-21 20:47:01 -0500425 goto bad_chrootdir;
Elly Jonesa8d1e1b2011-10-21 15:38:00 -0400426 j->chrootdir = strdup(chrootdir);
Will Drewrybee7ba72011-10-21 20:47:01 -0500427 if (!j->chrootdir)
428 goto bad_chrootdir;
Elly Jonesa8d1e1b2011-10-21 15:38:00 -0400429 }
430
Jorge Lucangeli Obes524c0402012-01-17 11:30:23 -0800431 if (j->flags.seccomp_filter && j->filter_len > 0) {
432 size_t ninstrs = j->filter_len;
433 if (ninstrs > (SIZE_MAX / sizeof(struct sock_filter)) ||
434 ninstrs > USHRT_MAX)
435 goto bad_filters;
436
437 size_t program_len = ninstrs * sizeof(struct sock_filter);
438 void *program = consumebytes(program_len, &serialized, &length);
439 if (!program)
440 goto bad_filters;
441
442 j->filter_prog = malloc(sizeof(struct sock_fprog));
443 j->filter_prog->len = ninstrs;
444 j->filter_prog->filter = malloc(program_len);
445 memcpy(j->filter_prog->filter, program, program_len);
Elly Jonese1749eb2011-10-07 13:54:59 -0400446 }
Elly Jones51a5b6c2011-10-12 19:09:26 -0400447
448 count = j->binding_count;
Elly Jones51a5b6c2011-10-12 19:09:26 -0400449 j->binding_count = 0;
450 for (i = 0; i < count; ++i) {
451 int *writeable;
452 const char *dest;
453 const char *src = consumestr(&serialized, &length);
454 if (!src)
Will Drewrybee7ba72011-10-21 20:47:01 -0500455 goto bad_bindings;
Elly Jones51a5b6c2011-10-12 19:09:26 -0400456 dest = consumestr(&serialized, &length);
457 if (!dest)
Will Drewrybee7ba72011-10-21 20:47:01 -0500458 goto bad_bindings;
Elly Jones51a5b6c2011-10-12 19:09:26 -0400459 writeable = consumebytes(sizeof(*writeable), &serialized, &length);
460 if (!writeable)
Will Drewrybee7ba72011-10-21 20:47:01 -0500461 goto bad_bindings;
Elly Jones51a5b6c2011-10-12 19:09:26 -0400462 if (minijail_bind(j, src, dest, *writeable))
Will Drewrybee7ba72011-10-21 20:47:01 -0500463 goto bad_bindings;
Elly Jones51a5b6c2011-10-12 19:09:26 -0400464 }
465
Elly Jonese1749eb2011-10-07 13:54:59 -0400466 return 0;
Will Drewrybee7ba72011-10-21 20:47:01 -0500467
468bad_bindings:
Jorge Lucangeli Obes524c0402012-01-17 11:30:23 -0800469 if (j->flags.seccomp_filter && j->filter_len > 0) {
470 free(j->filter_prog->filter);
471 free(j->filter_prog);
472 }
Will Drewrybee7ba72011-10-21 20:47:01 -0500473bad_filters:
474 if (j->chrootdir)
475 free(j->chrootdir);
476bad_chrootdir:
477 if (j->user)
478 free(j->user);
479clear_pointers:
480 j->user = NULL;
481 j->chrootdir = NULL;
482out:
483 return ret;
Will Drewry2ddaad02011-09-16 11:36:08 -0500484}
485
Elly Jonese1749eb2011-10-07 13:54:59 -0400486void minijail_preenter(struct minijail *j)
487{
488 /* Strip out options which are minijail_run() only. */
489 j->flags.vfs = 0;
490 j->flags.readonly = 0;
491 j->flags.pids = 0;
Will Drewryfe4a3722011-09-16 14:50:50 -0500492}
493
Elly Jonese1749eb2011-10-07 13:54:59 -0400494void minijail_preexec(struct minijail *j)
495{
496 int vfs = j->flags.vfs;
497 int readonly = j->flags.readonly;
498 if (j->user)
499 free(j->user);
500 j->user = NULL;
501 memset(&j->flags, 0, sizeof(j->flags));
502 /* Now restore anything we meant to keep. */
503 j->flags.vfs = vfs;
504 j->flags.readonly = readonly;
505 /* Note, pidns will already have been used before this call. */
Will Drewry2ddaad02011-09-16 11:36:08 -0500506}
507
Elly Jones51a5b6c2011-10-12 19:09:26 -0400508/* bind_one: Applies bindings from @b for @j, recursing as needed.
509 * @j Minijail these bindings are for
510 * @b Head of list of bindings
511 *
512 * Returns 0 for success.
513 */
Will Drewry6ac91122011-10-21 16:38:58 -0500514int bind_one(const struct minijail *j, struct binding *b) {
Elly Jones51a5b6c2011-10-12 19:09:26 -0400515 int ret = 0;
516 char *dest = NULL;
Elly Jones51a5b6c2011-10-12 19:09:26 -0400517 if (ret)
518 return ret;
519 /* dest has a leading "/" */
520 if (asprintf(&dest, "%s%s", j->chrootdir, b->dest) < 0)
521 return -ENOMEM;
Elly Jonesa1059632011-12-15 15:17:07 -0500522 ret = mount(b->src, dest, NULL, MS_BIND, NULL);
Elly Jones51a5b6c2011-10-12 19:09:26 -0400523 if (ret)
524 pdie("bind: %s -> %s", b->src, dest);
Elly Jonesa1059632011-12-15 15:17:07 -0500525 if (!b->writeable) {
526 ret = mount(b->src, dest, NULL,
527 MS_BIND | MS_REMOUNT | MS_RDONLY, NULL);
528 if (ret)
529 pdie("bind ro: %s -> %s", b->src, dest);
530 }
Elly Jones51a5b6c2011-10-12 19:09:26 -0400531 free(dest);
532 if (b->next)
533 return bind_one(j, b->next);
534 return ret;
535}
536
Will Drewry6ac91122011-10-21 16:38:58 -0500537int enter_chroot(const struct minijail *j) {
Elly Jones51a5b6c2011-10-12 19:09:26 -0400538 int ret;
539 if (j->bindings_head && (ret = bind_one(j, j->bindings_head)))
540 return ret;
541
542 if (chroot(j->chrootdir))
543 return -errno;
544
545 if (chdir("/"))
546 return -errno;
547
548 return 0;
549}
550
Will Drewry6ac91122011-10-21 16:38:58 -0500551int remount_readonly(void)
Elly Jonese1749eb2011-10-07 13:54:59 -0400552{
553 const char *kProcPath = "/proc";
554 const unsigned int kSafeFlags = MS_NODEV | MS_NOEXEC | MS_NOSUID;
Elly Jonesdd3e8512012-01-23 15:13:38 -0500555 /*
556 * Right now, we're holding a reference to our parent's old mount of
Elly Jonese1749eb2011-10-07 13:54:59 -0400557 * /proc in our namespace, which means using MS_REMOUNT here would
558 * mutate our parent's mount as well, even though we're in a VFS
559 * namespace (!). Instead, remove their mount from our namespace
560 * and make our own.
561 */
562 if (umount(kProcPath))
563 return -errno;
564 if (mount("", kProcPath, "proc", kSafeFlags | MS_RDONLY, ""))
565 return -errno;
566 return 0;
Elly Jonescd7a9042011-07-22 13:56:51 -0400567}
568
Jorge Lucangeli Obes6201cf52012-08-23 11:42:27 -0700569void drop_ugid(const struct minijail *j)
570{
571 if (j->flags.usergroups) {
572 if (initgroups(j->user, j->usergid))
573 pdie("initgroups");
574 } else {
575 /* Only attempt to clear supplemental groups if we are changing
576 * users. */
577 if ((j->uid || j->gid) && setgroups(0, NULL))
578 pdie("setgroups");
579 }
580
581 if (j->flags.gid && setresgid(j->gid, j->gid, j->gid))
582 pdie("setresgid");
583
584 if (j->flags.uid && setresuid(j->uid, j->uid, j->uid))
585 pdie("setresuid");
586}
587
Will Drewry6ac91122011-10-21 16:38:58 -0500588void drop_caps(const struct minijail *j)
Elly Jonese1749eb2011-10-07 13:54:59 -0400589{
590 cap_t caps = cap_get_proc();
591 cap_value_t raise_flag[1];
592 unsigned int i;
593 if (!caps)
594 die("can't get process caps");
595 if (cap_clear_flag(caps, CAP_INHERITABLE))
596 die("can't clear inheritable caps");
597 if (cap_clear_flag(caps, CAP_EFFECTIVE))
598 die("can't clear effective caps");
599 if (cap_clear_flag(caps, CAP_PERMITTED))
600 die("can't clear permitted caps");
601 for (i = 0; i < sizeof(j->caps) * 8 && cap_valid((int)i); ++i) {
602 if (i != CAP_SETPCAP && !(j->caps & (1 << i)))
603 continue;
604 raise_flag[0] = i;
605 if (cap_set_flag(caps, CAP_EFFECTIVE, 1, raise_flag, CAP_SET))
606 die("can't add effective cap");
607 if (cap_set_flag(caps, CAP_PERMITTED, 1, raise_flag, CAP_SET))
608 die("can't add permitted cap");
609 if (cap_set_flag(caps, CAP_INHERITABLE, 1, raise_flag, CAP_SET))
610 die("can't add inheritable cap");
611 }
612 if (cap_set_proc(caps))
613 die("can't apply cleaned capset");
614 cap_free(caps);
615 for (i = 0; i < sizeof(j->caps) * 8 && cap_valid((int)i); ++i) {
616 if (j->caps & (1 << i))
617 continue;
618 if (prctl(PR_CAPBSET_DROP, i))
619 pdie("prctl(PR_CAPBSET_DROP)");
620 }
Elly Jonescd7a9042011-07-22 13:56:51 -0400621}
622
Jorge Lucangeli Obes6201cf52012-08-23 11:42:27 -0700623void set_seccomp_filter(const struct minijail *j)
624{
625 /*
626 * Set no_new_privs. See </kernel/seccomp.c> and </kernel/sys.c>
627 * in the kernel source tree for an explanation of the parameters.
628 */
629 if (j->flags.no_new_privs) {
630 if (prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0))
631 pdie("prctl(PR_SET_NO_NEW_PRIVS)");
632 }
633
634 /*
635 * If we're logging seccomp filter failures,
636 * install the SIGSYS handler first.
637 */
638 if (j->flags.seccomp_filter && j->flags.log_seccomp_filter) {
639 if (install_sigsys_handler())
640 pdie("install SIGSYS handler");
641 warn("logging seccomp filter failures");
642 }
643
644 /*
645 * Install the syscall filter.
646 */
647 if (j->flags.seccomp_filter) {
648 if (prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, j->filter_prog))
649 pdie("prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER)");
650 }
651}
652
Will Drewry6ac91122011-10-21 16:38:58 -0500653void API minijail_enter(const struct minijail *j)
Elly Jonese1749eb2011-10-07 13:54:59 -0400654{
655 if (j->flags.pids)
656 die("tried to enter a pid-namespaced jail;"
657 "try minijail_run()?");
Elly Jonescd7a9042011-07-22 13:56:51 -0400658
Elly Jonese1749eb2011-10-07 13:54:59 -0400659 if (j->flags.usergroups && !j->user)
660 die("usergroup inheritance without username");
Elly Jonescd7a9042011-07-22 13:56:51 -0400661
Elly Jonesdd3e8512012-01-23 15:13:38 -0500662 /*
663 * We can't recover from failures if we've dropped privileges partially,
Elly Jonese1749eb2011-10-07 13:54:59 -0400664 * so we don't even try. If any of our operations fail, we abort() the
665 * entire process.
666 */
667 if (j->flags.vfs && unshare(CLONE_NEWNS))
668 pdie("unshare");
Elly Jonescd7a9042011-07-22 13:56:51 -0400669
Elly Jones51a5b6c2011-10-12 19:09:26 -0400670 if (j->flags.chroot && enter_chroot(j))
671 pdie("chroot");
672
Elly Jonese1749eb2011-10-07 13:54:59 -0400673 if (j->flags.readonly && remount_readonly())
674 pdie("remount");
Elly Jonescd7a9042011-07-22 13:56:51 -0400675
Elly Jonese1749eb2011-10-07 13:54:59 -0400676 if (j->flags.caps) {
Elly Jonesdd3e8512012-01-23 15:13:38 -0500677 /*
678 * POSIX capabilities are a bit tricky. If we drop our
Elly Jonese1749eb2011-10-07 13:54:59 -0400679 * capability to change uids, our attempt to use setuid()
680 * below will fail. Hang on to root caps across setuid(), then
681 * lock securebits.
682 */
683 if (prctl(PR_SET_KEEPCAPS, 1))
684 pdie("prctl(PR_SET_KEEPCAPS)");
685 if (prctl
686 (PR_SET_SECUREBITS, SECURE_ALL_BITS | SECURE_ALL_LOCKS))
687 pdie("prctl(PR_SET_SECUREBITS)");
688 }
Elly Jonescd7a9042011-07-22 13:56:51 -0400689
Jorge Lucangeli Obesc2c9bcc2012-05-01 09:30:24 -0700690 /*
Jorge Lucangeli Obes6201cf52012-08-23 11:42:27 -0700691 * If we're setting no_new_privs, we can drop privileges
692 * before setting seccomp filter. This way filter policies
693 * don't need to allow privilege-dropping syscalls.
Jorge Lucangeli Obesc2c9bcc2012-05-01 09:30:24 -0700694 */
695 if (j->flags.no_new_privs) {
Jorge Lucangeli Obes6201cf52012-08-23 11:42:27 -0700696 drop_ugid(j);
697 if (j->flags.caps)
698 drop_caps(j);
Jorge Lucangeli Obesc2c9bcc2012-05-01 09:30:24 -0700699
Jorge Lucangeli Obes6201cf52012-08-23 11:42:27 -0700700 set_seccomp_filter(j);
Elly Jonese1749eb2011-10-07 13:54:59 -0400701 } else {
Jorge Lucangeli Obes6201cf52012-08-23 11:42:27 -0700702 /*
703 * If we're not setting no_new_privs,
704 * we need to set seccomp filter *before* dropping privileges.
705 * WARNING: this means that filter policies *must* allow
706 * setgroups()/setresgid()/setresuid() for dropping root and
707 * capget()/capset()/prctl() for dropping caps.
708 */
709 set_seccomp_filter(j);
710
711 drop_ugid(j);
712 if (j->flags.caps)
713 drop_caps(j);
Elly Jonese1749eb2011-10-07 13:54:59 -0400714 }
Elly Jonescd7a9042011-07-22 13:56:51 -0400715
Elly Jonesdd3e8512012-01-23 15:13:38 -0500716 /*
717 * seccomp has to come last since it cuts off all the other
Elly Jonese1749eb2011-10-07 13:54:59 -0400718 * privilege-dropping syscalls :)
719 */
Elly Jonese1749eb2011-10-07 13:54:59 -0400720 if (j->flags.seccomp && prctl(PR_SET_SECCOMP, 1))
721 pdie("prctl(PR_SET_SECCOMP)");
Elly Jonescd7a9042011-07-22 13:56:51 -0400722}
723
Will Drewry6ac91122011-10-21 16:38:58 -0500724/* TODO(wad) will visibility affect this variable? */
Elly Jonescd7a9042011-07-22 13:56:51 -0400725static int init_exitstatus = 0;
726
Will Drewry6ac91122011-10-21 16:38:58 -0500727void init_term(int __attribute__ ((unused)) sig)
Elly Jonese1749eb2011-10-07 13:54:59 -0400728{
729 _exit(init_exitstatus);
Elly Jonescd7a9042011-07-22 13:56:51 -0400730}
731
Will Drewry6ac91122011-10-21 16:38:58 -0500732int init(pid_t rootpid)
Elly Jonese1749eb2011-10-07 13:54:59 -0400733{
734 pid_t pid;
735 int status;
736 /* so that we exit with the right status */
737 signal(SIGTERM, init_term);
738 /* TODO(wad) self jail with seccomp_filters here. */
739 while ((pid = wait(&status)) > 0) {
Elly Jonesdd3e8512012-01-23 15:13:38 -0500740 /*
741 * This loop will only end when either there are no processes
Elly Jonese1749eb2011-10-07 13:54:59 -0400742 * left inside our pid namespace or we get a signal.
743 */
744 if (pid == rootpid)
745 init_exitstatus = status;
746 }
747 if (!WIFEXITED(init_exitstatus))
748 _exit(MINIJAIL_ERR_INIT);
749 _exit(WEXITSTATUS(init_exitstatus));
Elly Jonescd7a9042011-07-22 13:56:51 -0400750}
751
Will Drewry6ac91122011-10-21 16:38:58 -0500752int API minijail_from_fd(int fd, struct minijail *j)
Elly Jonese1749eb2011-10-07 13:54:59 -0400753{
754 size_t sz = 0;
755 size_t bytes = read(fd, &sz, sizeof(sz));
756 char *buf;
757 int r;
758 if (sizeof(sz) != bytes)
759 return -EINVAL;
760 if (sz > USHRT_MAX) /* Arbitrary sanity check */
761 return -E2BIG;
762 buf = malloc(sz);
763 if (!buf)
764 return -ENOMEM;
765 bytes = read(fd, buf, sz);
766 if (bytes != sz) {
767 free(buf);
768 return -EINVAL;
769 }
770 r = minijail_unmarshal(j, buf, sz);
771 free(buf);
772 return r;
Will Drewry2f54b6a2011-09-16 13:45:31 -0500773}
774
Will Drewry6ac91122011-10-21 16:38:58 -0500775int API minijail_to_fd(struct minijail *j, int fd)
Elly Jonese1749eb2011-10-07 13:54:59 -0400776{
777 char *buf;
778 size_t sz = minijail_size(j);
779 ssize_t written;
780 int r;
Elly Jonescd7a9042011-07-22 13:56:51 -0400781
Elly Jonese1749eb2011-10-07 13:54:59 -0400782 if (!sz)
783 return -EINVAL;
784 buf = malloc(sz);
785 r = minijail_marshal(j, buf, sz);
786 if (r) {
787 free(buf);
788 return r;
789 }
790 /* Sends [size][minijail]. */
791 written = write(fd, &sz, sizeof(sz));
792 if (written != sizeof(sz)) {
793 free(buf);
794 return -EFAULT;
795 }
796 written = write(fd, buf, sz);
797 if (written < 0 || (size_t) written != sz) {
798 free(buf);
799 return -EFAULT;
800 }
801 free(buf);
802 return 0;
Will Drewry2f54b6a2011-09-16 13:45:31 -0500803}
Elly Jonescd7a9042011-07-22 13:56:51 -0400804
Will Drewry6ac91122011-10-21 16:38:58 -0500805int setup_preload(void)
Elly Jonese1749eb2011-10-07 13:54:59 -0400806{
807 char *oldenv = getenv(kLdPreloadEnvVar) ? : "";
808 char *newenv = malloc(strlen(oldenv) + 2 + strlen(PRELOADPATH));
809 if (!newenv)
810 return -ENOMEM;
Elly Jonescd7a9042011-07-22 13:56:51 -0400811
Elly Jonese1749eb2011-10-07 13:54:59 -0400812 /* Only insert a separating space if we have something to separate... */
813 sprintf(newenv, "%s%s%s", oldenv, strlen(oldenv) ? " " : "",
814 PRELOADPATH);
Elly Jonescd7a9042011-07-22 13:56:51 -0400815
Elly Jonese1749eb2011-10-07 13:54:59 -0400816 /* setenv() makes a copy of the string we give it */
817 setenv(kLdPreloadEnvVar, newenv, 1);
818 free(newenv);
819 return 0;
Elly Jonescd7a9042011-07-22 13:56:51 -0400820}
821
Will Drewry6ac91122011-10-21 16:38:58 -0500822int setup_pipe(int fds[2])
Elly Jonese1749eb2011-10-07 13:54:59 -0400823{
824 int r = pipe(fds);
825 char fd_buf[11];
826 if (r)
827 return r;
828 r = snprintf(fd_buf, sizeof(fd_buf), "%d", fds[0]);
829 if (r <= 0)
830 return -EINVAL;
831 setenv(kFdEnvVar, fd_buf, 1);
832 return 0;
Will Drewryf89aef52011-09-16 16:48:57 -0500833}
834
Will Drewry6ac91122011-10-21 16:38:58 -0500835int API minijail_run(struct minijail *j, const char *filename,
836 char *const argv[])
Elly Jonese1749eb2011-10-07 13:54:59 -0400837{
Jorge Lucangeli Obes9807d032012-04-17 13:36:00 -0700838 return minijail_run_pid(j, filename, argv, NULL);
839}
840
841int API minijail_run_pid(struct minijail *j, const char *filename,
842 char *const argv[], pid_t *pchild_pid)
843{
Elly Jonese1749eb2011-10-07 13:54:59 -0400844 char *oldenv, *oldenv_copy = NULL;
845 pid_t child_pid;
846 int pipe_fds[2];
847 int ret;
Elly Jonesa05d7bb2012-06-14 14:09:27 -0400848 /* We need to remember this across the minijail_preexec() call. */
849 int pid_namespace = j->flags.pids;
Ben Chan541c7e52011-08-26 14:55:53 -0700850
Elly Jonese1749eb2011-10-07 13:54:59 -0400851 oldenv = getenv(kLdPreloadEnvVar);
852 if (oldenv) {
853 oldenv_copy = strdup(oldenv);
854 if (!oldenv_copy)
855 return -ENOMEM;
856 }
Will Drewryf89aef52011-09-16 16:48:57 -0500857
Elly Jonese1749eb2011-10-07 13:54:59 -0400858 if (setup_preload())
859 return -EFAULT;
Will Drewry2f54b6a2011-09-16 13:45:31 -0500860
Elly Jonesdd3e8512012-01-23 15:13:38 -0500861 /*
862 * Before we fork(2) and execve(2) the child process, we need to open
Elly Jonese1749eb2011-10-07 13:54:59 -0400863 * a pipe(2) to send the minijail configuration over.
864 */
865 if (setup_pipe(pipe_fds))
866 return -EFAULT;
Elly Jonescd7a9042011-07-22 13:56:51 -0400867
Elly Jones761b7412012-06-13 15:49:52 -0400868 /* Use sys_clone() if and only if we're creating a pid namespace.
869 *
870 * tl;dr: WARNING: do not mix pid namespaces and multithreading.
871 *
872 * In multithreaded programs, there are a bunch of locks inside libc,
873 * some of which may be held by other threads at the time that we call
874 * minijail_run_pid(). If we call fork(), glibc does its level best to
875 * ensure that we hold all of these locks before it calls clone()
876 * internally and drop them after clone() returns, but when we call
877 * sys_clone(2) directly, all that gets bypassed and we end up with a
878 * child address space where some of libc's important locks are held by
879 * other threads (which did not get cloned, and hence will never release
880 * those locks). This is okay so long as we call exec() immediately
881 * after, but a bunch of seemingly-innocent libc functions like setenv()
882 * take locks.
883 *
884 * Hence, only call sys_clone() if we need to, in order to get at pid
885 * namespacing. If we follow this path, the child's address space might
886 * have broken locks; you may only call functions that do not acquire
887 * any locks.
888 *
889 * Unfortunately, fork() acquires every lock it can get its hands on, as
890 * previously detailed, so this function is highly likely to deadlock
891 * later on (see "deadlock here") if we're multithreaded.
892 *
893 * We might hack around this by having the clone()d child (init of the
894 * pid namespace) return directly, rather than leaving the clone()d
895 * process hanging around to be init for the new namespace (and having
896 * its fork()ed child return in turn), but that process would be crippled
897 * with its libc locks potentially broken. We might try fork()ing in the
898 * parent before we clone() to ensure that we own all the locks, but
899 * then we have to have the forked child hanging around consuming
900 * resources (and possibly having file descriptors / shared memory
901 * regions / etc attached). We'd need to keep the child around to avoid
902 * having its children get reparented to init.
903 *
904 * TODO(ellyjones): figure out if the "forked child hanging around"
905 * problem is fixable or not. It would be nice if we worked in this
906 * case.
907 */
Elly Jonesa05d7bb2012-06-14 14:09:27 -0400908 if (pid_namespace)
Elly Jones761b7412012-06-13 15:49:52 -0400909 child_pid = syscall(SYS_clone, CLONE_NEWPID | SIGCHLD, NULL);
910 else
911 child_pid = fork();
912
Elly Jonese1749eb2011-10-07 13:54:59 -0400913 if (child_pid < 0) {
914 free(oldenv_copy);
915 return child_pid;
916 }
Will Drewryf89aef52011-09-16 16:48:57 -0500917
Elly Jonese1749eb2011-10-07 13:54:59 -0400918 if (child_pid) {
919 /* Restore parent's LD_PRELOAD. */
920 if (oldenv_copy) {
921 setenv(kLdPreloadEnvVar, oldenv_copy, 1);
922 free(oldenv_copy);
923 } else {
924 unsetenv(kLdPreloadEnvVar);
925 }
926 unsetenv(kFdEnvVar);
927 j->initpid = child_pid;
928 close(pipe_fds[0]); /* read endpoint */
929 ret = minijail_to_fd(j, pipe_fds[1]);
930 close(pipe_fds[1]); /* write endpoint */
931 if (ret) {
932 kill(j->initpid, SIGKILL);
933 die("failed to send marshalled minijail");
934 }
Jorge Lucangeli Obes9807d032012-04-17 13:36:00 -0700935 if (pchild_pid)
936 *pchild_pid = child_pid;
Elly Jonese1749eb2011-10-07 13:54:59 -0400937 return 0;
938 }
939 free(oldenv_copy);
Ben Chan541c7e52011-08-26 14:55:53 -0700940
Elly Jonese1749eb2011-10-07 13:54:59 -0400941 /* Drop everything that cannot be inherited across execve. */
942 minijail_preexec(j);
943 /* Jail this process and its descendants... */
944 minijail_enter(j);
Elly Jonescd7a9042011-07-22 13:56:51 -0400945
Elly Jonesa05d7bb2012-06-14 14:09:27 -0400946 if (pid_namespace) {
Elly Jonesdd3e8512012-01-23 15:13:38 -0500947 /*
948 * pid namespace: this process will become init inside the new
Elly Jonese1749eb2011-10-07 13:54:59 -0400949 * namespace, so fork off a child to actually run the program
950 * (we don't want all programs we might exec to have to know
951 * how to be init).
Elly Jones761b7412012-06-13 15:49:52 -0400952 *
953 * If we're multithreaded, we'll probably deadlock here. See
954 * WARNING above.
Elly Jonese1749eb2011-10-07 13:54:59 -0400955 */
956 child_pid = fork();
957 if (child_pid < 0)
958 _exit(child_pid);
959 else if (child_pid > 0)
960 init(child_pid); /* never returns */
961 }
Elly Jonescd7a9042011-07-22 13:56:51 -0400962
Elly Jonesdd3e8512012-01-23 15:13:38 -0500963 /*
964 * If we aren't pid-namespaced:
Elly Jonese1749eb2011-10-07 13:54:59 -0400965 * calling process
966 * -> execve()-ing process
967 * If we are:
968 * calling process
969 * -> init()-ing process
970 * -> execve()-ing process
971 */
972 _exit(execve(filename, argv, environ));
Elly Jonescd7a9042011-07-22 13:56:51 -0400973}
974
Will Drewry6ac91122011-10-21 16:38:58 -0500975int API minijail_kill(struct minijail *j)
Elly Jonese1749eb2011-10-07 13:54:59 -0400976{
977 int st;
978 if (kill(j->initpid, SIGTERM))
979 return -errno;
980 if (waitpid(j->initpid, &st, 0) < 0)
981 return -errno;
982 return st;
Elly Jonescd7a9042011-07-22 13:56:51 -0400983}
984
Will Drewry6ac91122011-10-21 16:38:58 -0500985int API minijail_wait(struct minijail *j)
Elly Jonese1749eb2011-10-07 13:54:59 -0400986{
987 int st;
988 if (waitpid(j->initpid, &st, 0) < 0)
989 return -errno;
Jorge Lucangeli Obesc2c9bcc2012-05-01 09:30:24 -0700990 if (!WIFEXITED(st)) {
991 if (WIFSIGNALED(st))
992 warn("child process received signal %d", WTERMSIG(st));
Elly Jonese1749eb2011-10-07 13:54:59 -0400993 return MINIJAIL_ERR_JAIL;
Jorge Lucangeli Obesc2c9bcc2012-05-01 09:30:24 -0700994 }
Elly Jonese1749eb2011-10-07 13:54:59 -0400995 return WEXITSTATUS(st);
Elly Jonescd7a9042011-07-22 13:56:51 -0400996}
997
Will Drewry6ac91122011-10-21 16:38:58 -0500998void API minijail_destroy(struct minijail *j)
Elly Jonese1749eb2011-10-07 13:54:59 -0400999{
Jorge Lucangeli Obes524c0402012-01-17 11:30:23 -08001000 if (j->flags.seccomp_filter && j->filter_prog) {
1001 free(j->filter_prog->filter);
1002 free(j->filter_prog);
Elly Jonese1749eb2011-10-07 13:54:59 -04001003 }
Elly Jones51a5b6c2011-10-12 19:09:26 -04001004 while (j->bindings_head) {
1005 struct binding *b = j->bindings_head;
1006 j->bindings_head = j->bindings_head->next;
1007 free(b->dest);
1008 free(b->src);
1009 free(b);
1010 }
1011 j->bindings_tail = NULL;
Elly Jonese1749eb2011-10-07 13:54:59 -04001012 if (j->user)
1013 free(j->user);
Will Drewrybee7ba72011-10-21 20:47:01 -05001014 if (j->chrootdir)
1015 free(j->chrootdir);
Elly Jonese1749eb2011-10-07 13:54:59 -04001016 free(j);
Elly Jonescd7a9042011-07-22 13:56:51 -04001017}