blob: 1451dcd3cccd0832b518bd219fc77ddfe19f674f [file] [log] [blame]
Elly Jonesdd3e8512012-01-23 15:13:38 -05001/*
2 * Copyright (c) 2012 The Chromium OS Authors. All rights reserved.
Elly Jonescd7a9042011-07-22 13:56:51 -04003 * Use of this source code is governed by a BSD-style license that can be
Will Drewry32ac9f52011-08-18 21:36:27 -05004 * found in the LICENSE file.
5 */
Elly Jonescd7a9042011-07-22 13:56:51 -04006
7#define _BSD_SOURCE
8#define _GNU_SOURCE
Will Drewry32ac9f52011-08-18 21:36:27 -05009#include <ctype.h>
Elly Jonescd7a9042011-07-22 13:56:51 -040010#include <errno.h>
11#include <grp.h>
12#include <inttypes.h>
Will Drewryfe4a3722011-09-16 14:50:50 -050013#include <limits.h>
Elly Jonescd7a9042011-07-22 13:56:51 -040014#include <linux/capability.h>
15#include <linux/securebits.h>
16#include <pwd.h>
17#include <sched.h>
18#include <signal.h>
Will Drewry2f54b6a2011-09-16 13:45:31 -050019#include <stdarg.h>
Elly Jonescd7a9042011-07-22 13:56:51 -040020#include <stdio.h>
21#include <stdlib.h>
22#include <string.h>
23#include <syscall.h>
24#include <sys/capability.h>
25#include <sys/mount.h>
Will Drewryf89aef52011-09-16 16:48:57 -050026#include <sys/param.h>
Elly Jonescd7a9042011-07-22 13:56:51 -040027#include <sys/prctl.h>
28#include <sys/wait.h>
29#include <syslog.h>
30#include <unistd.h>
31
32#include "libminijail.h"
Will Drewry32ac9f52011-08-18 21:36:27 -050033#include "libsyscalls.h"
Elly Jonescd7a9042011-07-22 13:56:51 -040034#include "libminijail-private.h"
35
Will Drewry32ac9f52011-08-18 21:36:27 -050036/* Until these are reliably available in linux/prctl.h */
37#ifndef PR_SET_SECCOMP_FILTER
Elly Jonese1749eb2011-10-07 13:54:59 -040038# define PR_SECCOMP_FILTER_SYSCALL 0
39# define PR_SECCOMP_FILTER_EVENT 1
40# define PR_GET_SECCOMP_FILTER 35
41# define PR_SET_SECCOMP_FILTER 36
42# define PR_CLEAR_SECCOMP_FILTER 37
Will Drewry32ac9f52011-08-18 21:36:27 -050043#endif
44
Will Drewry32ac9f52011-08-18 21:36:27 -050045#define die(_msg, ...) do { \
Elly Jonese1749eb2011-10-07 13:54:59 -040046 syslog(LOG_ERR, "libminijail: " _msg, ## __VA_ARGS__); \
47 abort(); \
Will Drewry32ac9f52011-08-18 21:36:27 -050048} while (0)
Elly Jonescd7a9042011-07-22 13:56:51 -040049
Will Drewry32ac9f52011-08-18 21:36:27 -050050#define pdie(_msg, ...) \
Elly Jonese1749eb2011-10-07 13:54:59 -040051 die(_msg ": %s", ## __VA_ARGS__, strerror(errno))
Will Drewry32ac9f52011-08-18 21:36:27 -050052
53#define warn(_msg, ...) \
Elly Jonese1749eb2011-10-07 13:54:59 -040054 syslog(LOG_WARNING, "libminijail: " _msg, ## __VA_ARGS__)
Elly Jonescd7a9042011-07-22 13:56:51 -040055
Will Drewryf89aef52011-09-16 16:48:57 -050056struct seccomp_filter {
Elly Jonese1749eb2011-10-07 13:54:59 -040057 int nr;
58 char *filter;
59 struct seccomp_filter *next, *prev;
Will Drewryf89aef52011-09-16 16:48:57 -050060};
61
Elly Jones51a5b6c2011-10-12 19:09:26 -040062struct binding {
63 char *src;
64 char *dest;
65 int writeable;
66 struct binding *next;
67};
68
Will Drewryf89aef52011-09-16 16:48:57 -050069struct minijail {
Elly Jonese1749eb2011-10-07 13:54:59 -040070 struct {
71 int uid:1;
72 int gid:1;
73 int caps:1;
74 int vfs:1;
75 int pids:1;
76 int seccomp:1;
77 int readonly:1;
78 int usergroups:1;
79 int ptrace:1;
80 int seccomp_filter:1;
Elly Jones51a5b6c2011-10-12 19:09:26 -040081 int chroot:1;
Elly Jonese1749eb2011-10-07 13:54:59 -040082 } flags;
83 uid_t uid;
84 gid_t gid;
85 gid_t usergid;
86 char *user;
87 uint64_t caps;
88 pid_t initpid;
89 int filter_count;
Elly Jones51a5b6c2011-10-12 19:09:26 -040090 int binding_count;
91 char *chrootdir;
Elly Jonese1749eb2011-10-07 13:54:59 -040092 struct seccomp_filter *filters;
Elly Jones51a5b6c2011-10-12 19:09:26 -040093 struct binding *bindings_head;
94 struct binding *bindings_tail;
Will Drewryf89aef52011-09-16 16:48:57 -050095};
96
Will Drewry6ac91122011-10-21 16:38:58 -050097struct minijail API *minijail_new(void)
Elly Jonese1749eb2011-10-07 13:54:59 -040098{
Elly Jones51a5b6c2011-10-12 19:09:26 -040099 return calloc(1, sizeof(struct minijail));
Elly Jonescd7a9042011-07-22 13:56:51 -0400100}
101
Will Drewry6ac91122011-10-21 16:38:58 -0500102void API minijail_change_uid(struct minijail *j, uid_t uid)
Elly Jonese1749eb2011-10-07 13:54:59 -0400103{
104 if (uid == 0)
105 die("useless change to uid 0");
106 j->uid = uid;
107 j->flags.uid = 1;
Elly Jonescd7a9042011-07-22 13:56:51 -0400108}
109
Will Drewry6ac91122011-10-21 16:38:58 -0500110void API minijail_change_gid(struct minijail *j, gid_t gid)
Elly Jonese1749eb2011-10-07 13:54:59 -0400111{
112 if (gid == 0)
113 die("useless change to gid 0");
114 j->gid = gid;
115 j->flags.gid = 1;
Elly Jonescd7a9042011-07-22 13:56:51 -0400116}
117
Will Drewry6ac91122011-10-21 16:38:58 -0500118int API minijail_change_user(struct minijail *j, const char *user)
Elly Jonese1749eb2011-10-07 13:54:59 -0400119{
120 char *buf = NULL;
121 struct passwd pw;
122 struct passwd *ppw = NULL;
123 ssize_t sz = sysconf(_SC_GETPW_R_SIZE_MAX);
124 if (sz == -1)
125 sz = 65536; /* your guess is as good as mine... */
Elly Joneseb300c52011-09-22 14:35:43 -0400126
Elly Jonesdd3e8512012-01-23 15:13:38 -0500127 /*
128 * sysconf(_SC_GETPW_R_SIZE_MAX), under glibc, is documented to return
Elly Jonese1749eb2011-10-07 13:54:59 -0400129 * the maximum needed size of the buffer, so we don't have to search.
130 */
131 buf = malloc(sz);
132 if (!buf)
133 return -ENOMEM;
134 getpwnam_r(user, &pw, buf, sz, &ppw);
Elly Jonesdd3e8512012-01-23 15:13:38 -0500135 /*
136 * We're safe to free the buffer here. The strings inside pw point
137 * inside buf, but we don't use any of them; this leaves the pointers
138 * dangling but it's safe. ppw points at pw if getpwnam_r succeeded.
139 */
Elly Jonese1749eb2011-10-07 13:54:59 -0400140 free(buf);
141 if (!ppw)
142 return -errno;
143 minijail_change_uid(j, ppw->pw_uid);
144 j->user = strdup(user);
145 if (!j->user)
146 return -ENOMEM;
147 j->usergid = ppw->pw_gid;
148 return 0;
Elly Jonescd7a9042011-07-22 13:56:51 -0400149}
150
Will Drewry6ac91122011-10-21 16:38:58 -0500151int API minijail_change_group(struct minijail *j, const char *group)
Elly Jonese1749eb2011-10-07 13:54:59 -0400152{
153 char *buf = NULL;
154 struct group gr;
155 struct group *pgr = NULL;
156 ssize_t sz = sysconf(_SC_GETGR_R_SIZE_MAX);
157 if (sz == -1)
158 sz = 65536; /* and mine is as good as yours, really */
Elly Joneseb300c52011-09-22 14:35:43 -0400159
Elly Jonesdd3e8512012-01-23 15:13:38 -0500160 /*
161 * sysconf(_SC_GETGR_R_SIZE_MAX), under glibc, is documented to return
Elly Jonese1749eb2011-10-07 13:54:59 -0400162 * the maximum needed size of the buffer, so we don't have to search.
163 */
164 buf = malloc(sz);
165 if (!buf)
166 return -ENOMEM;
167 getgrnam_r(group, &gr, buf, sz, &pgr);
Elly Jonesdd3e8512012-01-23 15:13:38 -0500168 /*
169 * We're safe to free the buffer here. The strings inside gr point
170 * inside buf, but we don't use any of them; this leaves the pointers
171 * dangling but it's safe. pgr points at gr if getgrnam_r succeeded.
172 */
Elly Jonese1749eb2011-10-07 13:54:59 -0400173 free(buf);
174 if (!pgr)
175 return -errno;
176 minijail_change_gid(j, pgr->gr_gid);
177 return 0;
Elly Jonescd7a9042011-07-22 13:56:51 -0400178}
179
Will Drewry6ac91122011-10-21 16:38:58 -0500180void API minijail_use_seccomp(struct minijail *j)
Elly Jonese1749eb2011-10-07 13:54:59 -0400181{
182 j->flags.seccomp = 1;
Elly Jonescd7a9042011-07-22 13:56:51 -0400183}
184
Will Drewry6ac91122011-10-21 16:38:58 -0500185void API minijail_use_seccomp_filter(struct minijail *j)
Elly Jonese1749eb2011-10-07 13:54:59 -0400186{
187 j->flags.seccomp_filter = 1;
Will Drewry32ac9f52011-08-18 21:36:27 -0500188}
189
Will Drewry6ac91122011-10-21 16:38:58 -0500190void API minijail_use_caps(struct minijail *j, uint64_t capmask)
Elly Jonese1749eb2011-10-07 13:54:59 -0400191{
192 j->caps = capmask;
193 j->flags.caps = 1;
Elly Jonescd7a9042011-07-22 13:56:51 -0400194}
195
Will Drewry6ac91122011-10-21 16:38:58 -0500196void API minijail_namespace_vfs(struct minijail *j)
Elly Jonese1749eb2011-10-07 13:54:59 -0400197{
198 j->flags.vfs = 1;
Elly Jonescd7a9042011-07-22 13:56:51 -0400199}
200
Will Drewry6ac91122011-10-21 16:38:58 -0500201void API minijail_namespace_pids(struct minijail *j)
Elly Jonese1749eb2011-10-07 13:54:59 -0400202{
Elly Jonese58176c2012-01-23 11:46:17 -0500203 j->flags.vfs = 1;
204 j->flags.readonly = 1;
Elly Jonese1749eb2011-10-07 13:54:59 -0400205 j->flags.pids = 1;
Elly Jonescd7a9042011-07-22 13:56:51 -0400206}
207
Will Drewry6ac91122011-10-21 16:38:58 -0500208void API minijail_remount_readonly(struct minijail *j)
Elly Jonese1749eb2011-10-07 13:54:59 -0400209{
210 j->flags.vfs = 1;
211 j->flags.readonly = 1;
Elly Jonescd7a9042011-07-22 13:56:51 -0400212}
213
Will Drewry6ac91122011-10-21 16:38:58 -0500214void API minijail_inherit_usergroups(struct minijail *j)
Elly Jonese1749eb2011-10-07 13:54:59 -0400215{
216 j->flags.usergroups = 1;
Elly Jonescd7a9042011-07-22 13:56:51 -0400217}
218
Will Drewry6ac91122011-10-21 16:38:58 -0500219void API minijail_disable_ptrace(struct minijail *j)
Elly Jonese1749eb2011-10-07 13:54:59 -0400220{
221 j->flags.ptrace = 1;
Elly Jonescd7a9042011-07-22 13:56:51 -0400222}
223
Will Drewry6ac91122011-10-21 16:38:58 -0500224int API minijail_enter_chroot(struct minijail *j, const char *dir) {
Elly Jones51a5b6c2011-10-12 19:09:26 -0400225 if (j->chrootdir)
226 return -EINVAL;
227 j->chrootdir = strdup(dir);
228 if (!j->chrootdir)
229 return -ENOMEM;
230 j->flags.chroot = 1;
231 return 0;
232}
233
Will Drewry6ac91122011-10-21 16:38:58 -0500234int API minijail_bind(struct minijail *j, const char *src, const char *dest,
235 int writeable) {
Elly Jones51a5b6c2011-10-12 19:09:26 -0400236 struct binding *b;
237
238 if (*dest != '/')
239 return -EINVAL;
240 b = calloc(1, sizeof(*b));
241 if (!b)
242 return -ENOMEM;
243 b->dest = strdup(dest);
244 if (!b->dest)
245 goto error;
246 b->src = strdup(src);
247 if (!b->src)
248 goto error;
249 b->writeable = writeable;
250
251 syslog(LOG_INFO, "libminijail: bind %s -> %s", src, dest);
252
Elly Jonesdd3e8512012-01-23 15:13:38 -0500253 /*
254 * Force vfs namespacing so the bind mounts don't leak out into the
Elly Jones51a5b6c2011-10-12 19:09:26 -0400255 * containing vfs namespace.
256 */
257 minijail_namespace_vfs(j);
258
259 if (j->bindings_tail)
260 j->bindings_tail->next = b;
261 else
262 j->bindings_head = b;
263 j->bindings_tail = b;
264 j->binding_count++;
265
266 return 0;
267
268error:
269 free(b->src);
270 free(b->dest);
271 free(b);
272 return -ENOMEM;
273}
274
Will Drewry6ac91122011-10-21 16:38:58 -0500275int API minijail_add_seccomp_filter(struct minijail *j, int nr,
276 const char *filter)
Elly Jonese1749eb2011-10-07 13:54:59 -0400277{
278 struct seccomp_filter *sf;
279 if (!filter || nr < 0)
280 return -EINVAL;
Will Drewry32ac9f52011-08-18 21:36:27 -0500281
Elly Jonese1749eb2011-10-07 13:54:59 -0400282 sf = malloc(sizeof(*sf));
283 if (!sf)
284 return -ENOMEM;
285 sf->nr = nr;
286 sf->filter = strndup(filter, MINIJAIL_MAX_SECCOMP_FILTER_LINE);
287 if (!sf->filter) {
288 free(sf);
289 return -ENOMEM;
290 }
Will Drewry32ac9f52011-08-18 21:36:27 -0500291
Elly Jonese1749eb2011-10-07 13:54:59 -0400292 j->filter_count++;
Will Drewryf89aef52011-09-16 16:48:57 -0500293
Elly Jonese1749eb2011-10-07 13:54:59 -0400294 if (!j->filters) {
295 j->filters = sf;
296 sf->next = sf;
297 sf->prev = sf;
298 return 0;
299 }
300 sf->next = j->filters;
301 sf->prev = j->filters->prev;
302 sf->prev->next = sf;
303 j->filters->prev = sf;
304 return 0;
Will Drewry32ac9f52011-08-18 21:36:27 -0500305}
306
Will Drewry6ac91122011-10-21 16:38:58 -0500307int API minijail_lookup_syscall(const char *name)
Elly Jonese1749eb2011-10-07 13:54:59 -0400308{
309 const struct syscall_entry *entry = syscall_table;
310 for (; entry->name && entry->nr >= 0; ++entry)
311 if (!strcmp(entry->name, name))
312 return entry->nr;
313 return -1;
Will Drewry32ac9f52011-08-18 21:36:27 -0500314}
315
Will Drewry6ac91122011-10-21 16:38:58 -0500316char *strip(char *s)
Elly Jonese1749eb2011-10-07 13:54:59 -0400317{
318 char *end;
319 while (*s && isblank(*s))
320 s++;
321 end = s + strlen(s) - 1;
322 while (*end && (isblank(*end) || *end == '\n'))
323 end--;
324 *(end + 1) = '\0';
325 return s;
Will Drewry32ac9f52011-08-18 21:36:27 -0500326}
327
Will Drewry6ac91122011-10-21 16:38:58 -0500328void API minijail_parse_seccomp_filters(struct minijail *j, const char *path)
Elly Jonese1749eb2011-10-07 13:54:59 -0400329{
330 FILE *file = fopen(path, "r");
331 char line[MINIJAIL_MAX_SECCOMP_FILTER_LINE];
Ben Chan1d697932011-10-14 10:53:32 -0700332 int count = 0;
Elly Jonese1749eb2011-10-07 13:54:59 -0400333 if (!file)
334 pdie("failed to open seccomp filters file");
Will Drewry32ac9f52011-08-18 21:36:27 -0500335
Elly Jonesdd3e8512012-01-23 15:13:38 -0500336 /*
337 * Format is simple:
Elly Jonese1749eb2011-10-07 13:54:59 -0400338 * syscall_name<COLON><FILTER STRING>[\n|EOF]
339 * #...comment...
340 * <empty line?
341 */
342 while (fgets(line, sizeof(line), file)) {
343 char *filter = line;
344 char *name = strsep(&filter, ":");
345 char *name_end = NULL;
346 int nr = -1;
Ben Chan1d697932011-10-14 10:53:32 -0700347 count++;
Will Drewry32ac9f52011-08-18 21:36:27 -0500348
Ben Chan1d697932011-10-14 10:53:32 -0700349 /* Allow comment lines */
350 if (*name == '#')
351 continue;
Will Drewry32ac9f52011-08-18 21:36:27 -0500352
Elly Jonese1749eb2011-10-07 13:54:59 -0400353 name = strip(name);
Will Drewry32ac9f52011-08-18 21:36:27 -0500354
Elly Jonese1749eb2011-10-07 13:54:59 -0400355 if (!filter) {
356 if (strlen(name))
357 die("invalid filter on line %d", count);
358 /* Allow empty lines */
359 continue;
360 }
Will Drewry32ac9f52011-08-18 21:36:27 -0500361
Elly Jonese1749eb2011-10-07 13:54:59 -0400362 filter = strip(filter);
Will Drewry32ac9f52011-08-18 21:36:27 -0500363
Elly Jonese1749eb2011-10-07 13:54:59 -0400364 /* Take direct syscall numbers */
365 nr = strtol(name, &name_end, 0);
366 /* Or fail-over to using names */
367 if (*name_end != '\0')
368 nr = minijail_lookup_syscall(name);
369 if (nr < 0)
370 die("syscall '%s' unknown", name);
Will Drewry32ac9f52011-08-18 21:36:27 -0500371
Elly Jonese1749eb2011-10-07 13:54:59 -0400372 if (minijail_add_seccomp_filter(j, nr, filter))
373 pdie("failed to add filter for syscall '%s'", name);
374 }
375 fclose(file);
Will Drewry32ac9f52011-08-18 21:36:27 -0500376}
377
Will Drewryf89aef52011-09-16 16:48:57 -0500378struct marshal_state {
Elly Jonese1749eb2011-10-07 13:54:59 -0400379 size_t available;
380 size_t total;
381 char *buf;
Will Drewryf89aef52011-09-16 16:48:57 -0500382};
383
Will Drewry6ac91122011-10-21 16:38:58 -0500384void marshal_state_init(struct marshal_state *state,
385 char *buf, size_t available)
Elly Jonese1749eb2011-10-07 13:54:59 -0400386{
387 state->available = available;
388 state->buf = buf;
389 state->total = 0;
Will Drewryf89aef52011-09-16 16:48:57 -0500390}
391
Will Drewry6ac91122011-10-21 16:38:58 -0500392void marshal_append(struct marshal_state *state,
393 char *src, size_t length)
Elly Jonese1749eb2011-10-07 13:54:59 -0400394{
395 size_t copy_len = MIN(state->available, length);
Will Drewryf89aef52011-09-16 16:48:57 -0500396
Elly Jonese1749eb2011-10-07 13:54:59 -0400397 /* Up to |available| will be written. */
398 if (copy_len) {
399 memcpy(state->buf, src, copy_len);
400 state->buf += copy_len;
401 state->available -= copy_len;
402 }
403 /* |total| will contain the expected length. */
404 state->total += length;
Will Drewryf89aef52011-09-16 16:48:57 -0500405}
406
Will Drewry6ac91122011-10-21 16:38:58 -0500407void minijail_marshal_helper(struct marshal_state *state,
408 const struct minijail *j)
Elly Jonese1749eb2011-10-07 13:54:59 -0400409{
Elly Jones51a5b6c2011-10-12 19:09:26 -0400410 struct binding *b = NULL;
Elly Jonese1749eb2011-10-07 13:54:59 -0400411 marshal_append(state, (char *)j, sizeof(*j));
412 if (j->user)
413 marshal_append(state, j->user, strlen(j->user) + 1);
Elly Jones51a5b6c2011-10-12 19:09:26 -0400414 if (j->chrootdir)
415 marshal_append(state, j->chrootdir, strlen(j->chrootdir) + 1);
Elly Jonese1749eb2011-10-07 13:54:59 -0400416 if (j->flags.seccomp_filter && j->filters) {
417 struct seccomp_filter *f = j->filters;
418 do {
419 marshal_append(state, (char *)&f->nr, sizeof(f->nr));
420 marshal_append(state, f->filter, strlen(f->filter) + 1);
421 f = f->next;
422 } while (f != j->filters);
423 }
Elly Jones51a5b6c2011-10-12 19:09:26 -0400424 for (b = j->bindings_head; b; b = b->next) {
425 marshal_append(state, b->src, strlen(b->src) + 1);
426 marshal_append(state, b->dest, strlen(b->dest) + 1);
427 marshal_append(state, (char *)&b->writeable, sizeof(b->writeable));
428 }
Will Drewryf89aef52011-09-16 16:48:57 -0500429}
430
Will Drewry6ac91122011-10-21 16:38:58 -0500431size_t API minijail_size(const struct minijail *j)
Elly Jonese1749eb2011-10-07 13:54:59 -0400432{
433 struct marshal_state state;
434 marshal_state_init(&state, NULL, 0);
435 minijail_marshal_helper(&state, j);
436 return state.total;
Will Drewry2ddaad02011-09-16 11:36:08 -0500437}
438
Elly Jonese1749eb2011-10-07 13:54:59 -0400439int minijail_marshal(const struct minijail *j, char *buf, size_t available)
440{
441 struct marshal_state state;
442 marshal_state_init(&state, buf, available);
443 minijail_marshal_helper(&state, j);
444 return (state.total > available);
Will Drewry2ddaad02011-09-16 11:36:08 -0500445}
446
Elly Jones51a5b6c2011-10-12 19:09:26 -0400447/* consumebytes: consumes @length bytes from a buffer @buf of length @buflength
448 * @length Number of bytes to consume
449 * @buf Buffer to consume from
450 * @buflength Size of @buf
451 *
452 * Returns a pointer to the base of the bytes, or NULL for errors.
453 */
Will Drewry6ac91122011-10-21 16:38:58 -0500454void *consumebytes(size_t length, char **buf, size_t *buflength) {
Elly Jones51a5b6c2011-10-12 19:09:26 -0400455 char *p = *buf;
456 if (length > *buflength)
457 return NULL;
458 *buf += length;
459 *buflength -= length;
460 return p;
461}
462
463/* consumestr: consumes a C string from a buffer @buf of length @length
464 * @buf Buffer to consume
465 * @length Length of buffer
466 *
467 * Returns a pointer to the base of the string, or NULL for errors.
468 */
Will Drewry6ac91122011-10-21 16:38:58 -0500469char *consumestr(char **buf, size_t *buflength) {
Elly Jones51a5b6c2011-10-12 19:09:26 -0400470 size_t len = strnlen(*buf, *buflength);
471 if (len == *buflength)
472 /* There's no null-terminator */
473 return NULL;
474 return consumebytes(len + 1, buf, buflength);
475}
476
Elly Jonese1749eb2011-10-07 13:54:59 -0400477int minijail_unmarshal(struct minijail *j, char *serialized, size_t length)
478{
Elly Jones51a5b6c2011-10-12 19:09:26 -0400479 int i;
480 int count;
Will Drewrybee7ba72011-10-21 20:47:01 -0500481 int ret = -EINVAL;
482
Elly Jonese1749eb2011-10-07 13:54:59 -0400483 if (length < sizeof(*j))
Will Drewrybee7ba72011-10-21 20:47:01 -0500484 goto out;
Elly Jonese1749eb2011-10-07 13:54:59 -0400485 memcpy((void *)j, serialized, sizeof(*j));
486 serialized += sizeof(*j);
487 length -= sizeof(*j);
Will Drewryf89aef52011-09-16 16:48:57 -0500488
Will Drewrybee7ba72011-10-21 20:47:01 -0500489 /* Potentially stale pointers not used as signals. */
490 j->bindings_head = NULL;
491 j->bindings_tail = NULL;
492 j->filters = NULL;
493
Elly Jonese1749eb2011-10-07 13:54:59 -0400494 if (j->user) { /* stale pointer */
Elly Jones51a5b6c2011-10-12 19:09:26 -0400495 char *user = consumestr(&serialized, &length);
496 if (!user)
Will Drewrybee7ba72011-10-21 20:47:01 -0500497 goto clear_pointers;
Elly Jones51a5b6c2011-10-12 19:09:26 -0400498 j->user = strdup(user);
Will Drewrybee7ba72011-10-21 20:47:01 -0500499 if (!j->user)
500 goto clear_pointers;
Elly Jonese1749eb2011-10-07 13:54:59 -0400501 }
Will Drewryf89aef52011-09-16 16:48:57 -0500502
Elly Jonesa8d1e1b2011-10-21 15:38:00 -0400503 if (j->chrootdir) { /* stale pointer */
504 char *chrootdir = consumestr(&serialized, &length);
505 if (!chrootdir)
Will Drewrybee7ba72011-10-21 20:47:01 -0500506 goto bad_chrootdir;
Elly Jonesa8d1e1b2011-10-21 15:38:00 -0400507 j->chrootdir = strdup(chrootdir);
Will Drewrybee7ba72011-10-21 20:47:01 -0500508 if (!j->chrootdir)
509 goto bad_chrootdir;
Elly Jonesa8d1e1b2011-10-21 15:38:00 -0400510 }
511
Elly Jonese1749eb2011-10-07 13:54:59 -0400512 if (j->flags.seccomp_filter && j->filter_count) {
Elly Jones51a5b6c2011-10-12 19:09:26 -0400513 count = j->filter_count;
Elly Jonese1749eb2011-10-07 13:54:59 -0400514 /* Let add_seccomp_filter recompute the value. */
515 j->filter_count = 0;
Elly Jonese1749eb2011-10-07 13:54:59 -0400516 for (; count > 0; --count) {
Elly Jones51a5b6c2011-10-12 19:09:26 -0400517 int *nr = (int *)consumebytes(sizeof(*nr), &serialized,
518 &length);
Elly Jonese1749eb2011-10-07 13:54:59 -0400519 char *filter;
Elly Jones51a5b6c2011-10-12 19:09:26 -0400520 if (!nr)
Will Drewrybee7ba72011-10-21 20:47:01 -0500521 goto bad_filters;
Elly Jones51a5b6c2011-10-12 19:09:26 -0400522 filter = consumestr(&serialized, &length);
523 if (!filter)
Will Drewrybee7ba72011-10-21 20:47:01 -0500524 goto bad_filters;
Elly Jonese1749eb2011-10-07 13:54:59 -0400525 if (minijail_add_seccomp_filter(j, *nr, filter))
Will Drewrybee7ba72011-10-21 20:47:01 -0500526 goto bad_filters;
Elly Jonese1749eb2011-10-07 13:54:59 -0400527 }
528 }
Elly Jones51a5b6c2011-10-12 19:09:26 -0400529
530 count = j->binding_count;
Elly Jones51a5b6c2011-10-12 19:09:26 -0400531 j->binding_count = 0;
532 for (i = 0; i < count; ++i) {
533 int *writeable;
534 const char *dest;
535 const char *src = consumestr(&serialized, &length);
536 if (!src)
Will Drewrybee7ba72011-10-21 20:47:01 -0500537 goto bad_bindings;
Elly Jones51a5b6c2011-10-12 19:09:26 -0400538 dest = consumestr(&serialized, &length);
539 if (!dest)
Will Drewrybee7ba72011-10-21 20:47:01 -0500540 goto bad_bindings;
Elly Jones51a5b6c2011-10-12 19:09:26 -0400541 writeable = consumebytes(sizeof(*writeable), &serialized, &length);
542 if (!writeable)
Will Drewrybee7ba72011-10-21 20:47:01 -0500543 goto bad_bindings;
Elly Jones51a5b6c2011-10-12 19:09:26 -0400544 if (minijail_bind(j, src, dest, *writeable))
Will Drewrybee7ba72011-10-21 20:47:01 -0500545 goto bad_bindings;
Elly Jones51a5b6c2011-10-12 19:09:26 -0400546 }
547
Elly Jonese1749eb2011-10-07 13:54:59 -0400548 return 0;
Will Drewrybee7ba72011-10-21 20:47:01 -0500549
550bad_bindings:
551bad_filters:
552 if (j->chrootdir)
553 free(j->chrootdir);
554bad_chrootdir:
555 if (j->user)
556 free(j->user);
557clear_pointers:
558 j->user = NULL;
559 j->chrootdir = NULL;
560out:
561 return ret;
Will Drewry2ddaad02011-09-16 11:36:08 -0500562}
563
Elly Jonese1749eb2011-10-07 13:54:59 -0400564void minijail_preenter(struct minijail *j)
565{
566 /* Strip out options which are minijail_run() only. */
567 j->flags.vfs = 0;
568 j->flags.readonly = 0;
569 j->flags.pids = 0;
Will Drewryfe4a3722011-09-16 14:50:50 -0500570}
571
Elly Jonese1749eb2011-10-07 13:54:59 -0400572void minijail_preexec(struct minijail *j)
573{
574 int vfs = j->flags.vfs;
575 int readonly = j->flags.readonly;
576 if (j->user)
577 free(j->user);
578 j->user = NULL;
579 memset(&j->flags, 0, sizeof(j->flags));
580 /* Now restore anything we meant to keep. */
581 j->flags.vfs = vfs;
582 j->flags.readonly = readonly;
583 /* Note, pidns will already have been used before this call. */
Will Drewry2ddaad02011-09-16 11:36:08 -0500584}
585
Elly Jones51a5b6c2011-10-12 19:09:26 -0400586/* bind_one: Applies bindings from @b for @j, recursing as needed.
587 * @j Minijail these bindings are for
588 * @b Head of list of bindings
589 *
590 * Returns 0 for success.
591 */
Will Drewry6ac91122011-10-21 16:38:58 -0500592int bind_one(const struct minijail *j, struct binding *b) {
Elly Jones51a5b6c2011-10-12 19:09:26 -0400593 int ret = 0;
594 char *dest = NULL;
Elly Jones51a5b6c2011-10-12 19:09:26 -0400595 if (ret)
596 return ret;
597 /* dest has a leading "/" */
598 if (asprintf(&dest, "%s%s", j->chrootdir, b->dest) < 0)
599 return -ENOMEM;
Elly Jonesa1059632011-12-15 15:17:07 -0500600 ret = mount(b->src, dest, NULL, MS_BIND, NULL);
Elly Jones51a5b6c2011-10-12 19:09:26 -0400601 if (ret)
602 pdie("bind: %s -> %s", b->src, dest);
Elly Jonesa1059632011-12-15 15:17:07 -0500603 if (!b->writeable) {
604 ret = mount(b->src, dest, NULL,
605 MS_BIND | MS_REMOUNT | MS_RDONLY, NULL);
606 if (ret)
607 pdie("bind ro: %s -> %s", b->src, dest);
608 }
Elly Jones51a5b6c2011-10-12 19:09:26 -0400609 free(dest);
610 if (b->next)
611 return bind_one(j, b->next);
612 return ret;
613}
614
Will Drewry6ac91122011-10-21 16:38:58 -0500615int enter_chroot(const struct minijail *j) {
Elly Jones51a5b6c2011-10-12 19:09:26 -0400616 int ret;
617 if (j->bindings_head && (ret = bind_one(j, j->bindings_head)))
618 return ret;
619
620 if (chroot(j->chrootdir))
621 return -errno;
622
623 if (chdir("/"))
624 return -errno;
625
626 return 0;
627}
628
Will Drewry6ac91122011-10-21 16:38:58 -0500629int remount_readonly(void)
Elly Jonese1749eb2011-10-07 13:54:59 -0400630{
631 const char *kProcPath = "/proc";
632 const unsigned int kSafeFlags = MS_NODEV | MS_NOEXEC | MS_NOSUID;
Elly Jonesdd3e8512012-01-23 15:13:38 -0500633 /*
634 * Right now, we're holding a reference to our parent's old mount of
Elly Jonese1749eb2011-10-07 13:54:59 -0400635 * /proc in our namespace, which means using MS_REMOUNT here would
636 * mutate our parent's mount as well, even though we're in a VFS
637 * namespace (!). Instead, remove their mount from our namespace
638 * and make our own.
639 */
640 if (umount(kProcPath))
641 return -errno;
642 if (mount("", kProcPath, "proc", kSafeFlags | MS_RDONLY, ""))
643 return -errno;
644 return 0;
Elly Jonescd7a9042011-07-22 13:56:51 -0400645}
646
Will Drewry6ac91122011-10-21 16:38:58 -0500647void drop_caps(const struct minijail *j)
Elly Jonese1749eb2011-10-07 13:54:59 -0400648{
649 cap_t caps = cap_get_proc();
650 cap_value_t raise_flag[1];
651 unsigned int i;
652 if (!caps)
653 die("can't get process caps");
654 if (cap_clear_flag(caps, CAP_INHERITABLE))
655 die("can't clear inheritable caps");
656 if (cap_clear_flag(caps, CAP_EFFECTIVE))
657 die("can't clear effective caps");
658 if (cap_clear_flag(caps, CAP_PERMITTED))
659 die("can't clear permitted caps");
660 for (i = 0; i < sizeof(j->caps) * 8 && cap_valid((int)i); ++i) {
661 if (i != CAP_SETPCAP && !(j->caps & (1 << i)))
662 continue;
663 raise_flag[0] = i;
664 if (cap_set_flag(caps, CAP_EFFECTIVE, 1, raise_flag, CAP_SET))
665 die("can't add effective cap");
666 if (cap_set_flag(caps, CAP_PERMITTED, 1, raise_flag, CAP_SET))
667 die("can't add permitted cap");
668 if (cap_set_flag(caps, CAP_INHERITABLE, 1, raise_flag, CAP_SET))
669 die("can't add inheritable cap");
670 }
671 if (cap_set_proc(caps))
672 die("can't apply cleaned capset");
673 cap_free(caps);
674 for (i = 0; i < sizeof(j->caps) * 8 && cap_valid((int)i); ++i) {
675 if (j->caps & (1 << i))
676 continue;
677 if (prctl(PR_CAPBSET_DROP, i))
678 pdie("prctl(PR_CAPBSET_DROP)");
679 }
Elly Jonescd7a9042011-07-22 13:56:51 -0400680}
681
Will Drewry6ac91122011-10-21 16:38:58 -0500682int setup_seccomp_filters(const struct minijail *j)
Elly Jonese1749eb2011-10-07 13:54:59 -0400683{
684 const struct seccomp_filter *sf = j->filters;
685 int ret = 0;
686 int broaden = 0;
Will Drewry32ac9f52011-08-18 21:36:27 -0500687
Elly Jonese1749eb2011-10-07 13:54:59 -0400688 /* No filters installed isn't necessarily an error. */
689 if (!sf)
690 return ret;
Will Drewry32ac9f52011-08-18 21:36:27 -0500691
Elly Jonese1749eb2011-10-07 13:54:59 -0400692 do {
693 errno = 0;
694 ret = prctl(PR_SET_SECCOMP_FILTER, PR_SECCOMP_FILTER_SYSCALL,
695 sf->nr, broaden ? "1" : sf->filter);
696 if (ret) {
697 switch (errno) {
698 case ENOSYS:
699 /* TODO(wad) make this a config option */
700 if (broaden)
701 die("CONFIG_SECCOMP_FILTER is not"
702 "supported by your kernel");
703 warn("missing CONFIG_FTRACE_SYSCALLS; relaxing"
704 "the filter for %d", sf->nr);
705 broaden = 1;
706 continue;
707 case E2BIG:
708 warn("seccomp filter too long: %d", sf->nr);
709 pdie("filter too long");
710 case ENOSPC:
711 pdie("too many seccomp filters");
712 case EPERM:
713 warn("syscall filter disallowed for %d",
714 sf->nr);
715 pdie("failed to install seccomp filter");
716 case EINVAL:
717 warn("seccomp filter or call method is"
718 " invalid. %d:'%s'", sf->nr, sf->filter);
719 default:
720 pdie("failed to install seccomp filter");
721 }
722 }
723 sf = sf->next;
724 broaden = 0;
725 } while (sf != j->filters);
726 return ret;
Will Drewry32ac9f52011-08-18 21:36:27 -0500727}
728
Will Drewry6ac91122011-10-21 16:38:58 -0500729void API minijail_enter(const struct minijail *j)
Elly Jonese1749eb2011-10-07 13:54:59 -0400730{
731 if (j->flags.pids)
732 die("tried to enter a pid-namespaced jail;"
733 "try minijail_run()?");
Elly Jonescd7a9042011-07-22 13:56:51 -0400734
Elly Jonese1749eb2011-10-07 13:54:59 -0400735 if (j->flags.seccomp_filter && setup_seccomp_filters(j))
736 pdie("failed to configure seccomp filters");
Will Drewry32ac9f52011-08-18 21:36:27 -0500737
Elly Jonese1749eb2011-10-07 13:54:59 -0400738 if (j->flags.usergroups && !j->user)
739 die("usergroup inheritance without username");
Elly Jonescd7a9042011-07-22 13:56:51 -0400740
Elly Jonesdd3e8512012-01-23 15:13:38 -0500741 /*
742 * We can't recover from failures if we've dropped privileges partially,
Elly Jonese1749eb2011-10-07 13:54:59 -0400743 * so we don't even try. If any of our operations fail, we abort() the
744 * entire process.
745 */
746 if (j->flags.vfs && unshare(CLONE_NEWNS))
747 pdie("unshare");
Elly Jonescd7a9042011-07-22 13:56:51 -0400748
Elly Jones51a5b6c2011-10-12 19:09:26 -0400749 if (j->flags.chroot && enter_chroot(j))
750 pdie("chroot");
751
Elly Jonese1749eb2011-10-07 13:54:59 -0400752 if (j->flags.readonly && remount_readonly())
753 pdie("remount");
Elly Jonescd7a9042011-07-22 13:56:51 -0400754
Elly Jonese1749eb2011-10-07 13:54:59 -0400755 if (j->flags.caps) {
Elly Jonesdd3e8512012-01-23 15:13:38 -0500756 /*
757 * POSIX capabilities are a bit tricky. If we drop our
Elly Jonese1749eb2011-10-07 13:54:59 -0400758 * capability to change uids, our attempt to use setuid()
759 * below will fail. Hang on to root caps across setuid(), then
760 * lock securebits.
761 */
762 if (prctl(PR_SET_KEEPCAPS, 1))
763 pdie("prctl(PR_SET_KEEPCAPS)");
764 if (prctl
765 (PR_SET_SECUREBITS, SECURE_ALL_BITS | SECURE_ALL_LOCKS))
766 pdie("prctl(PR_SET_SECUREBITS)");
767 }
Elly Jonescd7a9042011-07-22 13:56:51 -0400768
Elly Jonese1749eb2011-10-07 13:54:59 -0400769 if (j->flags.usergroups) {
770 if (initgroups(j->user, j->usergid))
771 pdie("initgroups");
772 } else {
773 /* Only attempt to clear supplemental groups if we are changing
774 * users. */
775 if ((j->uid || j->gid) && setgroups(0, NULL))
776 pdie("setgroups");
777 }
Elly Jonescd7a9042011-07-22 13:56:51 -0400778
Elly Jonese1749eb2011-10-07 13:54:59 -0400779 if (j->flags.gid && setresgid(j->gid, j->gid, j->gid))
780 pdie("setresgid");
Elly Jonescd7a9042011-07-22 13:56:51 -0400781
Elly Jonese1749eb2011-10-07 13:54:59 -0400782 if (j->flags.uid && setresuid(j->uid, j->uid, j->uid))
783 pdie("setresuid");
Elly Jonescd7a9042011-07-22 13:56:51 -0400784
Elly Jonese1749eb2011-10-07 13:54:59 -0400785 if (j->flags.caps)
786 drop_caps(j);
Elly Jonescd7a9042011-07-22 13:56:51 -0400787
Elly Jonesdd3e8512012-01-23 15:13:38 -0500788 /*
789 * seccomp has to come last since it cuts off all the other
Elly Jonese1749eb2011-10-07 13:54:59 -0400790 * privilege-dropping syscalls :)
791 */
792 if (j->flags.seccomp_filter && prctl(PR_SET_SECCOMP, 13))
793 pdie("prctl(PR_SET_SECCOMP, 13)");
Will Drewry32ac9f52011-08-18 21:36:27 -0500794
Elly Jonese1749eb2011-10-07 13:54:59 -0400795 if (j->flags.seccomp && prctl(PR_SET_SECCOMP, 1))
796 pdie("prctl(PR_SET_SECCOMP)");
Elly Jonescd7a9042011-07-22 13:56:51 -0400797}
798
Will Drewry6ac91122011-10-21 16:38:58 -0500799/* TODO(wad) will visibility affect this variable? */
Elly Jonescd7a9042011-07-22 13:56:51 -0400800static int init_exitstatus = 0;
801
Will Drewry6ac91122011-10-21 16:38:58 -0500802void init_term(int __attribute__ ((unused)) sig)
Elly Jonese1749eb2011-10-07 13:54:59 -0400803{
804 _exit(init_exitstatus);
Elly Jonescd7a9042011-07-22 13:56:51 -0400805}
806
Will Drewry6ac91122011-10-21 16:38:58 -0500807int init(pid_t rootpid)
Elly Jonese1749eb2011-10-07 13:54:59 -0400808{
809 pid_t pid;
810 int status;
811 /* so that we exit with the right status */
812 signal(SIGTERM, init_term);
813 /* TODO(wad) self jail with seccomp_filters here. */
814 while ((pid = wait(&status)) > 0) {
Elly Jonesdd3e8512012-01-23 15:13:38 -0500815 /*
816 * This loop will only end when either there are no processes
Elly Jonese1749eb2011-10-07 13:54:59 -0400817 * left inside our pid namespace or we get a signal.
818 */
819 if (pid == rootpid)
820 init_exitstatus = status;
821 }
822 if (!WIFEXITED(init_exitstatus))
823 _exit(MINIJAIL_ERR_INIT);
824 _exit(WEXITSTATUS(init_exitstatus));
Elly Jonescd7a9042011-07-22 13:56:51 -0400825}
826
Will Drewry6ac91122011-10-21 16:38:58 -0500827int API minijail_from_fd(int fd, struct minijail *j)
Elly Jonese1749eb2011-10-07 13:54:59 -0400828{
829 size_t sz = 0;
830 size_t bytes = read(fd, &sz, sizeof(sz));
831 char *buf;
832 int r;
833 if (sizeof(sz) != bytes)
834 return -EINVAL;
835 if (sz > USHRT_MAX) /* Arbitrary sanity check */
836 return -E2BIG;
837 buf = malloc(sz);
838 if (!buf)
839 return -ENOMEM;
840 bytes = read(fd, buf, sz);
841 if (bytes != sz) {
842 free(buf);
843 return -EINVAL;
844 }
845 r = minijail_unmarshal(j, buf, sz);
846 free(buf);
847 return r;
Will Drewry2f54b6a2011-09-16 13:45:31 -0500848}
849
Will Drewry6ac91122011-10-21 16:38:58 -0500850int API minijail_to_fd(struct minijail *j, int fd)
Elly Jonese1749eb2011-10-07 13:54:59 -0400851{
852 char *buf;
853 size_t sz = minijail_size(j);
854 ssize_t written;
855 int r;
Elly Jonescd7a9042011-07-22 13:56:51 -0400856
Elly Jonese1749eb2011-10-07 13:54:59 -0400857 if (!sz)
858 return -EINVAL;
859 buf = malloc(sz);
860 r = minijail_marshal(j, buf, sz);
861 if (r) {
862 free(buf);
863 return r;
864 }
865 /* Sends [size][minijail]. */
866 written = write(fd, &sz, sizeof(sz));
867 if (written != sizeof(sz)) {
868 free(buf);
869 return -EFAULT;
870 }
871 written = write(fd, buf, sz);
872 if (written < 0 || (size_t) written != sz) {
873 free(buf);
874 return -EFAULT;
875 }
876 free(buf);
877 return 0;
Will Drewry2f54b6a2011-09-16 13:45:31 -0500878}
Elly Jonescd7a9042011-07-22 13:56:51 -0400879
Will Drewry6ac91122011-10-21 16:38:58 -0500880int setup_preload(void)
Elly Jonese1749eb2011-10-07 13:54:59 -0400881{
882 char *oldenv = getenv(kLdPreloadEnvVar) ? : "";
883 char *newenv = malloc(strlen(oldenv) + 2 + strlen(PRELOADPATH));
884 if (!newenv)
885 return -ENOMEM;
Elly Jonescd7a9042011-07-22 13:56:51 -0400886
Elly Jonese1749eb2011-10-07 13:54:59 -0400887 /* Only insert a separating space if we have something to separate... */
888 sprintf(newenv, "%s%s%s", oldenv, strlen(oldenv) ? " " : "",
889 PRELOADPATH);
Elly Jonescd7a9042011-07-22 13:56:51 -0400890
Elly Jonese1749eb2011-10-07 13:54:59 -0400891 /* setenv() makes a copy of the string we give it */
892 setenv(kLdPreloadEnvVar, newenv, 1);
893 free(newenv);
894 return 0;
Elly Jonescd7a9042011-07-22 13:56:51 -0400895}
896
Will Drewry6ac91122011-10-21 16:38:58 -0500897int setup_pipe(int fds[2])
Elly Jonese1749eb2011-10-07 13:54:59 -0400898{
899 int r = pipe(fds);
900 char fd_buf[11];
901 if (r)
902 return r;
903 r = snprintf(fd_buf, sizeof(fd_buf), "%d", fds[0]);
904 if (r <= 0)
905 return -EINVAL;
906 setenv(kFdEnvVar, fd_buf, 1);
907 return 0;
Will Drewryf89aef52011-09-16 16:48:57 -0500908}
909
Will Drewry6ac91122011-10-21 16:38:58 -0500910int API minijail_run(struct minijail *j, const char *filename,
911 char *const argv[])
Elly Jonese1749eb2011-10-07 13:54:59 -0400912{
Jorge Lucangeli Obes9807d032012-04-17 13:36:00 -0700913 return minijail_run_pid(j, filename, argv, NULL);
914}
915
916int API minijail_run_pid(struct minijail *j, const char *filename,
917 char *const argv[], pid_t *pchild_pid)
918{
Elly Jonese1749eb2011-10-07 13:54:59 -0400919 unsigned int pidns = j->flags.pids ? CLONE_NEWPID : 0;
920 char *oldenv, *oldenv_copy = NULL;
921 pid_t child_pid;
922 int pipe_fds[2];
923 int ret;
Ben Chan541c7e52011-08-26 14:55:53 -0700924
Elly Jonese1749eb2011-10-07 13:54:59 -0400925 oldenv = getenv(kLdPreloadEnvVar);
926 if (oldenv) {
927 oldenv_copy = strdup(oldenv);
928 if (!oldenv_copy)
929 return -ENOMEM;
930 }
Will Drewryf89aef52011-09-16 16:48:57 -0500931
Elly Jonese1749eb2011-10-07 13:54:59 -0400932 if (setup_preload())
933 return -EFAULT;
Will Drewry2f54b6a2011-09-16 13:45:31 -0500934
Elly Jonesdd3e8512012-01-23 15:13:38 -0500935 /*
936 * Before we fork(2) and execve(2) the child process, we need to open
Elly Jonese1749eb2011-10-07 13:54:59 -0400937 * a pipe(2) to send the minijail configuration over.
938 */
939 if (setup_pipe(pipe_fds))
940 return -EFAULT;
Elly Jonescd7a9042011-07-22 13:56:51 -0400941
Elly Jonese1749eb2011-10-07 13:54:59 -0400942 child_pid = syscall(SYS_clone, pidns | SIGCHLD, NULL);
943 if (child_pid < 0) {
944 free(oldenv_copy);
945 return child_pid;
946 }
Will Drewryf89aef52011-09-16 16:48:57 -0500947
Elly Jonese1749eb2011-10-07 13:54:59 -0400948 if (child_pid) {
949 /* Restore parent's LD_PRELOAD. */
950 if (oldenv_copy) {
951 setenv(kLdPreloadEnvVar, oldenv_copy, 1);
952 free(oldenv_copy);
953 } else {
954 unsetenv(kLdPreloadEnvVar);
955 }
956 unsetenv(kFdEnvVar);
957 j->initpid = child_pid;
958 close(pipe_fds[0]); /* read endpoint */
959 ret = minijail_to_fd(j, pipe_fds[1]);
960 close(pipe_fds[1]); /* write endpoint */
961 if (ret) {
962 kill(j->initpid, SIGKILL);
963 die("failed to send marshalled minijail");
964 }
Jorge Lucangeli Obes9807d032012-04-17 13:36:00 -0700965 if (pchild_pid)
966 *pchild_pid = child_pid;
Elly Jonese1749eb2011-10-07 13:54:59 -0400967 return 0;
968 }
969 free(oldenv_copy);
Ben Chan541c7e52011-08-26 14:55:53 -0700970
Elly Jonese1749eb2011-10-07 13:54:59 -0400971 /* Drop everything that cannot be inherited across execve. */
972 minijail_preexec(j);
973 /* Jail this process and its descendants... */
974 minijail_enter(j);
Elly Jonescd7a9042011-07-22 13:56:51 -0400975
Elly Jonese1749eb2011-10-07 13:54:59 -0400976 if (pidns) {
Elly Jonesdd3e8512012-01-23 15:13:38 -0500977 /*
978 * pid namespace: this process will become init inside the new
Elly Jonese1749eb2011-10-07 13:54:59 -0400979 * namespace, so fork off a child to actually run the program
980 * (we don't want all programs we might exec to have to know
981 * how to be init).
982 */
983 child_pid = fork();
984 if (child_pid < 0)
985 _exit(child_pid);
986 else if (child_pid > 0)
987 init(child_pid); /* never returns */
988 }
Elly Jonescd7a9042011-07-22 13:56:51 -0400989
Elly Jonesdd3e8512012-01-23 15:13:38 -0500990 /*
991 * If we aren't pid-namespaced:
Elly Jonese1749eb2011-10-07 13:54:59 -0400992 * calling process
993 * -> execve()-ing process
994 * If we are:
995 * calling process
996 * -> init()-ing process
997 * -> execve()-ing process
998 */
999 _exit(execve(filename, argv, environ));
Elly Jonescd7a9042011-07-22 13:56:51 -04001000}
1001
Will Drewry6ac91122011-10-21 16:38:58 -05001002int API minijail_kill(struct minijail *j)
Elly Jonese1749eb2011-10-07 13:54:59 -04001003{
1004 int st;
1005 if (kill(j->initpid, SIGTERM))
1006 return -errno;
1007 if (waitpid(j->initpid, &st, 0) < 0)
1008 return -errno;
1009 return st;
Elly Jonescd7a9042011-07-22 13:56:51 -04001010}
1011
Will Drewry6ac91122011-10-21 16:38:58 -05001012int API minijail_wait(struct minijail *j)
Elly Jonese1749eb2011-10-07 13:54:59 -04001013{
1014 int st;
1015 if (waitpid(j->initpid, &st, 0) < 0)
1016 return -errno;
1017 if (!WIFEXITED(st))
1018 return MINIJAIL_ERR_JAIL;
1019 return WEXITSTATUS(st);
Elly Jonescd7a9042011-07-22 13:56:51 -04001020}
1021
Will Drewry6ac91122011-10-21 16:38:58 -05001022void API minijail_destroy(struct minijail *j)
Elly Jonese1749eb2011-10-07 13:54:59 -04001023{
1024 struct seccomp_filter *f = j->filters;
1025 /* Unlink the tail and head */
1026 if (f)
1027 f->prev->next = NULL;
1028 while (f) {
1029 struct seccomp_filter *next = f->next;
1030 free(f->filter);
1031 free(f);
1032 f = next;
1033 }
Elly Jones51a5b6c2011-10-12 19:09:26 -04001034 while (j->bindings_head) {
1035 struct binding *b = j->bindings_head;
1036 j->bindings_head = j->bindings_head->next;
1037 free(b->dest);
1038 free(b->src);
1039 free(b);
1040 }
1041 j->bindings_tail = NULL;
Elly Jonese1749eb2011-10-07 13:54:59 -04001042 if (j->user)
1043 free(j->user);
Will Drewrybee7ba72011-10-21 20:47:01 -05001044 if (j->chrootdir)
1045 free(j->chrootdir);
Elly Jonese1749eb2011-10-07 13:54:59 -04001046 free(j);
Elly Jonescd7a9042011-07-22 13:56:51 -04001047}