blob: 0080c4926ad7d91765a71cd0d4c406a3b4525efc [file] [log] [blame]
Elly Jonesdd3e8512012-01-23 15:13:38 -05001/*
2 * Copyright (c) 2012 The Chromium OS Authors. All rights reserved.
Elly Jonescd7a9042011-07-22 13:56:51 -04003 * Use of this source code is governed by a BSD-style license that can be
Will Drewry32ac9f52011-08-18 21:36:27 -05004 * found in the LICENSE file.
5 */
Elly Jonescd7a9042011-07-22 13:56:51 -04006
7#define _BSD_SOURCE
8#define _GNU_SOURCE
Will Drewry32ac9f52011-08-18 21:36:27 -05009#include <ctype.h>
Elly Jonescd7a9042011-07-22 13:56:51 -040010#include <errno.h>
11#include <grp.h>
12#include <inttypes.h>
Will Drewryfe4a3722011-09-16 14:50:50 -050013#include <limits.h>
Elly Jonescd7a9042011-07-22 13:56:51 -040014#include <linux/capability.h>
15#include <linux/securebits.h>
16#include <pwd.h>
17#include <sched.h>
18#include <signal.h>
Will Drewry2f54b6a2011-09-16 13:45:31 -050019#include <stdarg.h>
Elly Jonescd7a9042011-07-22 13:56:51 -040020#include <stdio.h>
21#include <stdlib.h>
22#include <string.h>
23#include <syscall.h>
24#include <sys/capability.h>
25#include <sys/mount.h>
Will Drewryf89aef52011-09-16 16:48:57 -050026#include <sys/param.h>
Elly Jonescd7a9042011-07-22 13:56:51 -040027#include <sys/prctl.h>
28#include <sys/wait.h>
29#include <syslog.h>
30#include <unistd.h>
31
32#include "libminijail.h"
Will Drewry32ac9f52011-08-18 21:36:27 -050033#include "libsyscalls.h"
Elly Jonescd7a9042011-07-22 13:56:51 -040034#include "libminijail-private.h"
35
Will Drewry32ac9f52011-08-18 21:36:27 -050036/* Until these are reliably available in linux/prctl.h */
37#ifndef PR_SET_SECCOMP_FILTER
Elly Jonese1749eb2011-10-07 13:54:59 -040038# define PR_SECCOMP_FILTER_SYSCALL 0
39# define PR_SECCOMP_FILTER_EVENT 1
40# define PR_GET_SECCOMP_FILTER 35
41# define PR_SET_SECCOMP_FILTER 36
42# define PR_CLEAR_SECCOMP_FILTER 37
Will Drewry32ac9f52011-08-18 21:36:27 -050043#endif
44
Will Drewry32ac9f52011-08-18 21:36:27 -050045#define die(_msg, ...) do { \
Elly Jonese1749eb2011-10-07 13:54:59 -040046 syslog(LOG_ERR, "libminijail: " _msg, ## __VA_ARGS__); \
47 abort(); \
Will Drewry32ac9f52011-08-18 21:36:27 -050048} while (0)
Elly Jonescd7a9042011-07-22 13:56:51 -040049
Will Drewry32ac9f52011-08-18 21:36:27 -050050#define pdie(_msg, ...) \
Elly Jonese1749eb2011-10-07 13:54:59 -040051 die(_msg ": %s", ## __VA_ARGS__, strerror(errno))
Will Drewry32ac9f52011-08-18 21:36:27 -050052
53#define warn(_msg, ...) \
Elly Jonese1749eb2011-10-07 13:54:59 -040054 syslog(LOG_WARNING, "libminijail: " _msg, ## __VA_ARGS__)
Elly Jonescd7a9042011-07-22 13:56:51 -040055
Will Drewryf89aef52011-09-16 16:48:57 -050056struct seccomp_filter {
Elly Jonese1749eb2011-10-07 13:54:59 -040057 int nr;
58 char *filter;
59 struct seccomp_filter *next, *prev;
Will Drewryf89aef52011-09-16 16:48:57 -050060};
61
Elly Jones51a5b6c2011-10-12 19:09:26 -040062struct binding {
63 char *src;
64 char *dest;
65 int writeable;
66 struct binding *next;
67};
68
Will Drewryf89aef52011-09-16 16:48:57 -050069struct minijail {
Elly Jonese1749eb2011-10-07 13:54:59 -040070 struct {
71 int uid:1;
72 int gid:1;
73 int caps:1;
74 int vfs:1;
75 int pids:1;
76 int seccomp:1;
77 int readonly:1;
78 int usergroups:1;
79 int ptrace:1;
80 int seccomp_filter:1;
Elly Jones51a5b6c2011-10-12 19:09:26 -040081 int chroot:1;
Elly Jonese1749eb2011-10-07 13:54:59 -040082 } flags;
83 uid_t uid;
84 gid_t gid;
85 gid_t usergid;
86 char *user;
87 uint64_t caps;
88 pid_t initpid;
89 int filter_count;
Elly Jones51a5b6c2011-10-12 19:09:26 -040090 int binding_count;
91 char *chrootdir;
Elly Jonese1749eb2011-10-07 13:54:59 -040092 struct seccomp_filter *filters;
Elly Jones51a5b6c2011-10-12 19:09:26 -040093 struct binding *bindings_head;
94 struct binding *bindings_tail;
Will Drewryf89aef52011-09-16 16:48:57 -050095};
96
Will Drewry6ac91122011-10-21 16:38:58 -050097struct minijail API *minijail_new(void)
Elly Jonese1749eb2011-10-07 13:54:59 -040098{
Elly Jones51a5b6c2011-10-12 19:09:26 -040099 return calloc(1, sizeof(struct minijail));
Elly Jonescd7a9042011-07-22 13:56:51 -0400100}
101
Will Drewry6ac91122011-10-21 16:38:58 -0500102void API minijail_change_uid(struct minijail *j, uid_t uid)
Elly Jonese1749eb2011-10-07 13:54:59 -0400103{
104 if (uid == 0)
105 die("useless change to uid 0");
106 j->uid = uid;
107 j->flags.uid = 1;
Elly Jonescd7a9042011-07-22 13:56:51 -0400108}
109
Will Drewry6ac91122011-10-21 16:38:58 -0500110void API minijail_change_gid(struct minijail *j, gid_t gid)
Elly Jonese1749eb2011-10-07 13:54:59 -0400111{
112 if (gid == 0)
113 die("useless change to gid 0");
114 j->gid = gid;
115 j->flags.gid = 1;
Elly Jonescd7a9042011-07-22 13:56:51 -0400116}
117
Will Drewry6ac91122011-10-21 16:38:58 -0500118int API minijail_change_user(struct minijail *j, const char *user)
Elly Jonese1749eb2011-10-07 13:54:59 -0400119{
120 char *buf = NULL;
121 struct passwd pw;
122 struct passwd *ppw = NULL;
123 ssize_t sz = sysconf(_SC_GETPW_R_SIZE_MAX);
124 if (sz == -1)
125 sz = 65536; /* your guess is as good as mine... */
Elly Joneseb300c52011-09-22 14:35:43 -0400126
Elly Jonesdd3e8512012-01-23 15:13:38 -0500127 /*
128 * sysconf(_SC_GETPW_R_SIZE_MAX), under glibc, is documented to return
Elly Jonese1749eb2011-10-07 13:54:59 -0400129 * the maximum needed size of the buffer, so we don't have to search.
130 */
131 buf = malloc(sz);
132 if (!buf)
133 return -ENOMEM;
134 getpwnam_r(user, &pw, buf, sz, &ppw);
Elly Jonesdd3e8512012-01-23 15:13:38 -0500135 /*
136 * We're safe to free the buffer here. The strings inside pw point
137 * inside buf, but we don't use any of them; this leaves the pointers
138 * dangling but it's safe. ppw points at pw if getpwnam_r succeeded.
139 */
Elly Jonese1749eb2011-10-07 13:54:59 -0400140 free(buf);
141 if (!ppw)
142 return -errno;
143 minijail_change_uid(j, ppw->pw_uid);
144 j->user = strdup(user);
145 if (!j->user)
146 return -ENOMEM;
147 j->usergid = ppw->pw_gid;
148 return 0;
Elly Jonescd7a9042011-07-22 13:56:51 -0400149}
150
Will Drewry6ac91122011-10-21 16:38:58 -0500151int API minijail_change_group(struct minijail *j, const char *group)
Elly Jonese1749eb2011-10-07 13:54:59 -0400152{
153 char *buf = NULL;
154 struct group gr;
155 struct group *pgr = NULL;
156 ssize_t sz = sysconf(_SC_GETGR_R_SIZE_MAX);
157 if (sz == -1)
158 sz = 65536; /* and mine is as good as yours, really */
Elly Joneseb300c52011-09-22 14:35:43 -0400159
Elly Jonesdd3e8512012-01-23 15:13:38 -0500160 /*
161 * sysconf(_SC_GETGR_R_SIZE_MAX), under glibc, is documented to return
Elly Jonese1749eb2011-10-07 13:54:59 -0400162 * the maximum needed size of the buffer, so we don't have to search.
163 */
164 buf = malloc(sz);
165 if (!buf)
166 return -ENOMEM;
167 getgrnam_r(group, &gr, buf, sz, &pgr);
Elly Jonesdd3e8512012-01-23 15:13:38 -0500168 /*
169 * We're safe to free the buffer here. The strings inside gr point
170 * inside buf, but we don't use any of them; this leaves the pointers
171 * dangling but it's safe. pgr points at gr if getgrnam_r succeeded.
172 */
Elly Jonese1749eb2011-10-07 13:54:59 -0400173 free(buf);
174 if (!pgr)
175 return -errno;
176 minijail_change_gid(j, pgr->gr_gid);
177 return 0;
Elly Jonescd7a9042011-07-22 13:56:51 -0400178}
179
Will Drewry6ac91122011-10-21 16:38:58 -0500180void API minijail_use_seccomp(struct minijail *j)
Elly Jonese1749eb2011-10-07 13:54:59 -0400181{
182 j->flags.seccomp = 1;
Elly Jonescd7a9042011-07-22 13:56:51 -0400183}
184
Will Drewry6ac91122011-10-21 16:38:58 -0500185void API minijail_use_seccomp_filter(struct minijail *j)
Elly Jonese1749eb2011-10-07 13:54:59 -0400186{
Jorge Lucangeli Obes2343d832012-04-25 21:59:48 -0700187 /* TODO(jorgelo): re-enable this when the seccomp BPF merge is done. */
188 j->flags.seccomp_filter = 0;
189}
190
191/* TODO(jorgelo): remove this when the seccomp BPF merge is done. */
192void API minijail_force_seccomp_filter(struct minijail *j)
193{
Elly Jonese1749eb2011-10-07 13:54:59 -0400194 j->flags.seccomp_filter = 1;
Will Drewry32ac9f52011-08-18 21:36:27 -0500195}
196
Will Drewry6ac91122011-10-21 16:38:58 -0500197void API minijail_use_caps(struct minijail *j, uint64_t capmask)
Elly Jonese1749eb2011-10-07 13:54:59 -0400198{
199 j->caps = capmask;
200 j->flags.caps = 1;
Elly Jonescd7a9042011-07-22 13:56:51 -0400201}
202
Will Drewry6ac91122011-10-21 16:38:58 -0500203void API minijail_namespace_vfs(struct minijail *j)
Elly Jonese1749eb2011-10-07 13:54:59 -0400204{
205 j->flags.vfs = 1;
Elly Jonescd7a9042011-07-22 13:56:51 -0400206}
207
Will Drewry6ac91122011-10-21 16:38:58 -0500208void API minijail_namespace_pids(struct minijail *j)
Elly Jonese1749eb2011-10-07 13:54:59 -0400209{
Elly Jonese58176c2012-01-23 11:46:17 -0500210 j->flags.vfs = 1;
211 j->flags.readonly = 1;
Elly Jonese1749eb2011-10-07 13:54:59 -0400212 j->flags.pids = 1;
Elly Jonescd7a9042011-07-22 13:56:51 -0400213}
214
Will Drewry6ac91122011-10-21 16:38:58 -0500215void API minijail_remount_readonly(struct minijail *j)
Elly Jonese1749eb2011-10-07 13:54:59 -0400216{
217 j->flags.vfs = 1;
218 j->flags.readonly = 1;
Elly Jonescd7a9042011-07-22 13:56:51 -0400219}
220
Will Drewry6ac91122011-10-21 16:38:58 -0500221void API minijail_inherit_usergroups(struct minijail *j)
Elly Jonese1749eb2011-10-07 13:54:59 -0400222{
223 j->flags.usergroups = 1;
Elly Jonescd7a9042011-07-22 13:56:51 -0400224}
225
Will Drewry6ac91122011-10-21 16:38:58 -0500226void API minijail_disable_ptrace(struct minijail *j)
Elly Jonese1749eb2011-10-07 13:54:59 -0400227{
228 j->flags.ptrace = 1;
Elly Jonescd7a9042011-07-22 13:56:51 -0400229}
230
Will Drewry6ac91122011-10-21 16:38:58 -0500231int API minijail_enter_chroot(struct minijail *j, const char *dir) {
Elly Jones51a5b6c2011-10-12 19:09:26 -0400232 if (j->chrootdir)
233 return -EINVAL;
234 j->chrootdir = strdup(dir);
235 if (!j->chrootdir)
236 return -ENOMEM;
237 j->flags.chroot = 1;
238 return 0;
239}
240
Will Drewry6ac91122011-10-21 16:38:58 -0500241int API minijail_bind(struct minijail *j, const char *src, const char *dest,
242 int writeable) {
Elly Jones51a5b6c2011-10-12 19:09:26 -0400243 struct binding *b;
244
245 if (*dest != '/')
246 return -EINVAL;
247 b = calloc(1, sizeof(*b));
248 if (!b)
249 return -ENOMEM;
250 b->dest = strdup(dest);
251 if (!b->dest)
252 goto error;
253 b->src = strdup(src);
254 if (!b->src)
255 goto error;
256 b->writeable = writeable;
257
258 syslog(LOG_INFO, "libminijail: bind %s -> %s", src, dest);
259
Elly Jonesdd3e8512012-01-23 15:13:38 -0500260 /*
261 * Force vfs namespacing so the bind mounts don't leak out into the
Elly Jones51a5b6c2011-10-12 19:09:26 -0400262 * containing vfs namespace.
263 */
264 minijail_namespace_vfs(j);
265
266 if (j->bindings_tail)
267 j->bindings_tail->next = b;
268 else
269 j->bindings_head = b;
270 j->bindings_tail = b;
271 j->binding_count++;
272
273 return 0;
274
275error:
276 free(b->src);
277 free(b->dest);
278 free(b);
279 return -ENOMEM;
280}
281
Will Drewry6ac91122011-10-21 16:38:58 -0500282int API minijail_add_seccomp_filter(struct minijail *j, int nr,
283 const char *filter)
Elly Jonese1749eb2011-10-07 13:54:59 -0400284{
285 struct seccomp_filter *sf;
286 if (!filter || nr < 0)
287 return -EINVAL;
Will Drewry32ac9f52011-08-18 21:36:27 -0500288
Elly Jonese1749eb2011-10-07 13:54:59 -0400289 sf = malloc(sizeof(*sf));
290 if (!sf)
291 return -ENOMEM;
292 sf->nr = nr;
293 sf->filter = strndup(filter, MINIJAIL_MAX_SECCOMP_FILTER_LINE);
294 if (!sf->filter) {
295 free(sf);
296 return -ENOMEM;
297 }
Will Drewry32ac9f52011-08-18 21:36:27 -0500298
Elly Jonese1749eb2011-10-07 13:54:59 -0400299 j->filter_count++;
Will Drewryf89aef52011-09-16 16:48:57 -0500300
Elly Jonese1749eb2011-10-07 13:54:59 -0400301 if (!j->filters) {
302 j->filters = sf;
303 sf->next = sf;
304 sf->prev = sf;
305 return 0;
306 }
307 sf->next = j->filters;
308 sf->prev = j->filters->prev;
309 sf->prev->next = sf;
310 j->filters->prev = sf;
311 return 0;
Will Drewry32ac9f52011-08-18 21:36:27 -0500312}
313
Will Drewry6ac91122011-10-21 16:38:58 -0500314int API minijail_lookup_syscall(const char *name)
Elly Jonese1749eb2011-10-07 13:54:59 -0400315{
316 const struct syscall_entry *entry = syscall_table;
317 for (; entry->name && entry->nr >= 0; ++entry)
318 if (!strcmp(entry->name, name))
319 return entry->nr;
320 return -1;
Will Drewry32ac9f52011-08-18 21:36:27 -0500321}
322
Will Drewry6ac91122011-10-21 16:38:58 -0500323char *strip(char *s)
Elly Jonese1749eb2011-10-07 13:54:59 -0400324{
325 char *end;
326 while (*s && isblank(*s))
327 s++;
328 end = s + strlen(s) - 1;
329 while (*end && (isblank(*end) || *end == '\n'))
330 end--;
331 *(end + 1) = '\0';
332 return s;
Will Drewry32ac9f52011-08-18 21:36:27 -0500333}
334
Will Drewry6ac91122011-10-21 16:38:58 -0500335void API minijail_parse_seccomp_filters(struct minijail *j, const char *path)
Elly Jonese1749eb2011-10-07 13:54:59 -0400336{
337 FILE *file = fopen(path, "r");
338 char line[MINIJAIL_MAX_SECCOMP_FILTER_LINE];
Ben Chan1d697932011-10-14 10:53:32 -0700339 int count = 0;
Elly Jonese1749eb2011-10-07 13:54:59 -0400340 if (!file)
341 pdie("failed to open seccomp filters file");
Will Drewry32ac9f52011-08-18 21:36:27 -0500342
Elly Jonesdd3e8512012-01-23 15:13:38 -0500343 /*
344 * Format is simple:
Elly Jonese1749eb2011-10-07 13:54:59 -0400345 * syscall_name<COLON><FILTER STRING>[\n|EOF]
346 * #...comment...
347 * <empty line?
348 */
349 while (fgets(line, sizeof(line), file)) {
350 char *filter = line;
351 char *name = strsep(&filter, ":");
352 char *name_end = NULL;
353 int nr = -1;
Ben Chan1d697932011-10-14 10:53:32 -0700354 count++;
Will Drewry32ac9f52011-08-18 21:36:27 -0500355
Ben Chan1d697932011-10-14 10:53:32 -0700356 /* Allow comment lines */
357 if (*name == '#')
358 continue;
Will Drewry32ac9f52011-08-18 21:36:27 -0500359
Elly Jonese1749eb2011-10-07 13:54:59 -0400360 name = strip(name);
Will Drewry32ac9f52011-08-18 21:36:27 -0500361
Elly Jonese1749eb2011-10-07 13:54:59 -0400362 if (!filter) {
363 if (strlen(name))
364 die("invalid filter on line %d", count);
365 /* Allow empty lines */
366 continue;
367 }
Will Drewry32ac9f52011-08-18 21:36:27 -0500368
Elly Jonese1749eb2011-10-07 13:54:59 -0400369 filter = strip(filter);
Will Drewry32ac9f52011-08-18 21:36:27 -0500370
Elly Jonese1749eb2011-10-07 13:54:59 -0400371 /* Take direct syscall numbers */
372 nr = strtol(name, &name_end, 0);
373 /* Or fail-over to using names */
374 if (*name_end != '\0')
375 nr = minijail_lookup_syscall(name);
376 if (nr < 0)
377 die("syscall '%s' unknown", name);
Will Drewry32ac9f52011-08-18 21:36:27 -0500378
Elly Jonese1749eb2011-10-07 13:54:59 -0400379 if (minijail_add_seccomp_filter(j, nr, filter))
380 pdie("failed to add filter for syscall '%s'", name);
381 }
382 fclose(file);
Will Drewry32ac9f52011-08-18 21:36:27 -0500383}
384
Will Drewryf89aef52011-09-16 16:48:57 -0500385struct marshal_state {
Elly Jonese1749eb2011-10-07 13:54:59 -0400386 size_t available;
387 size_t total;
388 char *buf;
Will Drewryf89aef52011-09-16 16:48:57 -0500389};
390
Will Drewry6ac91122011-10-21 16:38:58 -0500391void marshal_state_init(struct marshal_state *state,
392 char *buf, size_t available)
Elly Jonese1749eb2011-10-07 13:54:59 -0400393{
394 state->available = available;
395 state->buf = buf;
396 state->total = 0;
Will Drewryf89aef52011-09-16 16:48:57 -0500397}
398
Will Drewry6ac91122011-10-21 16:38:58 -0500399void marshal_append(struct marshal_state *state,
400 char *src, size_t length)
Elly Jonese1749eb2011-10-07 13:54:59 -0400401{
402 size_t copy_len = MIN(state->available, length);
Will Drewryf89aef52011-09-16 16:48:57 -0500403
Elly Jonese1749eb2011-10-07 13:54:59 -0400404 /* Up to |available| will be written. */
405 if (copy_len) {
406 memcpy(state->buf, src, copy_len);
407 state->buf += copy_len;
408 state->available -= copy_len;
409 }
410 /* |total| will contain the expected length. */
411 state->total += length;
Will Drewryf89aef52011-09-16 16:48:57 -0500412}
413
Will Drewry6ac91122011-10-21 16:38:58 -0500414void minijail_marshal_helper(struct marshal_state *state,
415 const struct minijail *j)
Elly Jonese1749eb2011-10-07 13:54:59 -0400416{
Elly Jones51a5b6c2011-10-12 19:09:26 -0400417 struct binding *b = NULL;
Elly Jonese1749eb2011-10-07 13:54:59 -0400418 marshal_append(state, (char *)j, sizeof(*j));
419 if (j->user)
420 marshal_append(state, j->user, strlen(j->user) + 1);
Elly Jones51a5b6c2011-10-12 19:09:26 -0400421 if (j->chrootdir)
422 marshal_append(state, j->chrootdir, strlen(j->chrootdir) + 1);
Elly Jonese1749eb2011-10-07 13:54:59 -0400423 if (j->flags.seccomp_filter && j->filters) {
424 struct seccomp_filter *f = j->filters;
425 do {
426 marshal_append(state, (char *)&f->nr, sizeof(f->nr));
427 marshal_append(state, f->filter, strlen(f->filter) + 1);
428 f = f->next;
429 } while (f != j->filters);
430 }
Elly Jones51a5b6c2011-10-12 19:09:26 -0400431 for (b = j->bindings_head; b; b = b->next) {
432 marshal_append(state, b->src, strlen(b->src) + 1);
433 marshal_append(state, b->dest, strlen(b->dest) + 1);
434 marshal_append(state, (char *)&b->writeable, sizeof(b->writeable));
435 }
Will Drewryf89aef52011-09-16 16:48:57 -0500436}
437
Will Drewry6ac91122011-10-21 16:38:58 -0500438size_t API minijail_size(const struct minijail *j)
Elly Jonese1749eb2011-10-07 13:54:59 -0400439{
440 struct marshal_state state;
441 marshal_state_init(&state, NULL, 0);
442 minijail_marshal_helper(&state, j);
443 return state.total;
Will Drewry2ddaad02011-09-16 11:36:08 -0500444}
445
Elly Jonese1749eb2011-10-07 13:54:59 -0400446int minijail_marshal(const struct minijail *j, char *buf, size_t available)
447{
448 struct marshal_state state;
449 marshal_state_init(&state, buf, available);
450 minijail_marshal_helper(&state, j);
451 return (state.total > available);
Will Drewry2ddaad02011-09-16 11:36:08 -0500452}
453
Elly Jones51a5b6c2011-10-12 19:09:26 -0400454/* consumebytes: consumes @length bytes from a buffer @buf of length @buflength
455 * @length Number of bytes to consume
456 * @buf Buffer to consume from
457 * @buflength Size of @buf
458 *
459 * Returns a pointer to the base of the bytes, or NULL for errors.
460 */
Will Drewry6ac91122011-10-21 16:38:58 -0500461void *consumebytes(size_t length, char **buf, size_t *buflength) {
Elly Jones51a5b6c2011-10-12 19:09:26 -0400462 char *p = *buf;
463 if (length > *buflength)
464 return NULL;
465 *buf += length;
466 *buflength -= length;
467 return p;
468}
469
470/* consumestr: consumes a C string from a buffer @buf of length @length
471 * @buf Buffer to consume
472 * @length Length of buffer
473 *
474 * Returns a pointer to the base of the string, or NULL for errors.
475 */
Will Drewry6ac91122011-10-21 16:38:58 -0500476char *consumestr(char **buf, size_t *buflength) {
Elly Jones51a5b6c2011-10-12 19:09:26 -0400477 size_t len = strnlen(*buf, *buflength);
478 if (len == *buflength)
479 /* There's no null-terminator */
480 return NULL;
481 return consumebytes(len + 1, buf, buflength);
482}
483
Elly Jonese1749eb2011-10-07 13:54:59 -0400484int minijail_unmarshal(struct minijail *j, char *serialized, size_t length)
485{
Elly Jones51a5b6c2011-10-12 19:09:26 -0400486 int i;
487 int count;
Will Drewrybee7ba72011-10-21 20:47:01 -0500488 int ret = -EINVAL;
489
Elly Jonese1749eb2011-10-07 13:54:59 -0400490 if (length < sizeof(*j))
Will Drewrybee7ba72011-10-21 20:47:01 -0500491 goto out;
Elly Jonese1749eb2011-10-07 13:54:59 -0400492 memcpy((void *)j, serialized, sizeof(*j));
493 serialized += sizeof(*j);
494 length -= sizeof(*j);
Will Drewryf89aef52011-09-16 16:48:57 -0500495
Will Drewrybee7ba72011-10-21 20:47:01 -0500496 /* Potentially stale pointers not used as signals. */
497 j->bindings_head = NULL;
498 j->bindings_tail = NULL;
499 j->filters = NULL;
500
Elly Jonese1749eb2011-10-07 13:54:59 -0400501 if (j->user) { /* stale pointer */
Elly Jones51a5b6c2011-10-12 19:09:26 -0400502 char *user = consumestr(&serialized, &length);
503 if (!user)
Will Drewrybee7ba72011-10-21 20:47:01 -0500504 goto clear_pointers;
Elly Jones51a5b6c2011-10-12 19:09:26 -0400505 j->user = strdup(user);
Will Drewrybee7ba72011-10-21 20:47:01 -0500506 if (!j->user)
507 goto clear_pointers;
Elly Jonese1749eb2011-10-07 13:54:59 -0400508 }
Will Drewryf89aef52011-09-16 16:48:57 -0500509
Elly Jonesa8d1e1b2011-10-21 15:38:00 -0400510 if (j->chrootdir) { /* stale pointer */
511 char *chrootdir = consumestr(&serialized, &length);
512 if (!chrootdir)
Will Drewrybee7ba72011-10-21 20:47:01 -0500513 goto bad_chrootdir;
Elly Jonesa8d1e1b2011-10-21 15:38:00 -0400514 j->chrootdir = strdup(chrootdir);
Will Drewrybee7ba72011-10-21 20:47:01 -0500515 if (!j->chrootdir)
516 goto bad_chrootdir;
Elly Jonesa8d1e1b2011-10-21 15:38:00 -0400517 }
518
Elly Jonese1749eb2011-10-07 13:54:59 -0400519 if (j->flags.seccomp_filter && j->filter_count) {
Elly Jones51a5b6c2011-10-12 19:09:26 -0400520 count = j->filter_count;
Elly Jonese1749eb2011-10-07 13:54:59 -0400521 /* Let add_seccomp_filter recompute the value. */
522 j->filter_count = 0;
Elly Jonese1749eb2011-10-07 13:54:59 -0400523 for (; count > 0; --count) {
Elly Jones51a5b6c2011-10-12 19:09:26 -0400524 int *nr = (int *)consumebytes(sizeof(*nr), &serialized,
525 &length);
Elly Jonese1749eb2011-10-07 13:54:59 -0400526 char *filter;
Elly Jones51a5b6c2011-10-12 19:09:26 -0400527 if (!nr)
Will Drewrybee7ba72011-10-21 20:47:01 -0500528 goto bad_filters;
Elly Jones51a5b6c2011-10-12 19:09:26 -0400529 filter = consumestr(&serialized, &length);
530 if (!filter)
Will Drewrybee7ba72011-10-21 20:47:01 -0500531 goto bad_filters;
Elly Jonese1749eb2011-10-07 13:54:59 -0400532 if (minijail_add_seccomp_filter(j, *nr, filter))
Will Drewrybee7ba72011-10-21 20:47:01 -0500533 goto bad_filters;
Elly Jonese1749eb2011-10-07 13:54:59 -0400534 }
535 }
Elly Jones51a5b6c2011-10-12 19:09:26 -0400536
537 count = j->binding_count;
Elly Jones51a5b6c2011-10-12 19:09:26 -0400538 j->binding_count = 0;
539 for (i = 0; i < count; ++i) {
540 int *writeable;
541 const char *dest;
542 const char *src = consumestr(&serialized, &length);
543 if (!src)
Will Drewrybee7ba72011-10-21 20:47:01 -0500544 goto bad_bindings;
Elly Jones51a5b6c2011-10-12 19:09:26 -0400545 dest = consumestr(&serialized, &length);
546 if (!dest)
Will Drewrybee7ba72011-10-21 20:47:01 -0500547 goto bad_bindings;
Elly Jones51a5b6c2011-10-12 19:09:26 -0400548 writeable = consumebytes(sizeof(*writeable), &serialized, &length);
549 if (!writeable)
Will Drewrybee7ba72011-10-21 20:47:01 -0500550 goto bad_bindings;
Elly Jones51a5b6c2011-10-12 19:09:26 -0400551 if (minijail_bind(j, src, dest, *writeable))
Will Drewrybee7ba72011-10-21 20:47:01 -0500552 goto bad_bindings;
Elly Jones51a5b6c2011-10-12 19:09:26 -0400553 }
554
Elly Jonese1749eb2011-10-07 13:54:59 -0400555 return 0;
Will Drewrybee7ba72011-10-21 20:47:01 -0500556
557bad_bindings:
558bad_filters:
559 if (j->chrootdir)
560 free(j->chrootdir);
561bad_chrootdir:
562 if (j->user)
563 free(j->user);
564clear_pointers:
565 j->user = NULL;
566 j->chrootdir = NULL;
567out:
568 return ret;
Will Drewry2ddaad02011-09-16 11:36:08 -0500569}
570
Elly Jonese1749eb2011-10-07 13:54:59 -0400571void minijail_preenter(struct minijail *j)
572{
573 /* Strip out options which are minijail_run() only. */
574 j->flags.vfs = 0;
575 j->flags.readonly = 0;
576 j->flags.pids = 0;
Will Drewryfe4a3722011-09-16 14:50:50 -0500577}
578
Elly Jonese1749eb2011-10-07 13:54:59 -0400579void minijail_preexec(struct minijail *j)
580{
581 int vfs = j->flags.vfs;
582 int readonly = j->flags.readonly;
583 if (j->user)
584 free(j->user);
585 j->user = NULL;
586 memset(&j->flags, 0, sizeof(j->flags));
587 /* Now restore anything we meant to keep. */
588 j->flags.vfs = vfs;
589 j->flags.readonly = readonly;
590 /* Note, pidns will already have been used before this call. */
Will Drewry2ddaad02011-09-16 11:36:08 -0500591}
592
Elly Jones51a5b6c2011-10-12 19:09:26 -0400593/* bind_one: Applies bindings from @b for @j, recursing as needed.
594 * @j Minijail these bindings are for
595 * @b Head of list of bindings
596 *
597 * Returns 0 for success.
598 */
Will Drewry6ac91122011-10-21 16:38:58 -0500599int bind_one(const struct minijail *j, struct binding *b) {
Elly Jones51a5b6c2011-10-12 19:09:26 -0400600 int ret = 0;
601 char *dest = NULL;
Elly Jones51a5b6c2011-10-12 19:09:26 -0400602 if (ret)
603 return ret;
604 /* dest has a leading "/" */
605 if (asprintf(&dest, "%s%s", j->chrootdir, b->dest) < 0)
606 return -ENOMEM;
Elly Jonesa1059632011-12-15 15:17:07 -0500607 ret = mount(b->src, dest, NULL, MS_BIND, NULL);
Elly Jones51a5b6c2011-10-12 19:09:26 -0400608 if (ret)
609 pdie("bind: %s -> %s", b->src, dest);
Elly Jonesa1059632011-12-15 15:17:07 -0500610 if (!b->writeable) {
611 ret = mount(b->src, dest, NULL,
612 MS_BIND | MS_REMOUNT | MS_RDONLY, NULL);
613 if (ret)
614 pdie("bind ro: %s -> %s", b->src, dest);
615 }
Elly Jones51a5b6c2011-10-12 19:09:26 -0400616 free(dest);
617 if (b->next)
618 return bind_one(j, b->next);
619 return ret;
620}
621
Will Drewry6ac91122011-10-21 16:38:58 -0500622int enter_chroot(const struct minijail *j) {
Elly Jones51a5b6c2011-10-12 19:09:26 -0400623 int ret;
624 if (j->bindings_head && (ret = bind_one(j, j->bindings_head)))
625 return ret;
626
627 if (chroot(j->chrootdir))
628 return -errno;
629
630 if (chdir("/"))
631 return -errno;
632
633 return 0;
634}
635
Will Drewry6ac91122011-10-21 16:38:58 -0500636int remount_readonly(void)
Elly Jonese1749eb2011-10-07 13:54:59 -0400637{
638 const char *kProcPath = "/proc";
639 const unsigned int kSafeFlags = MS_NODEV | MS_NOEXEC | MS_NOSUID;
Elly Jonesdd3e8512012-01-23 15:13:38 -0500640 /*
641 * Right now, we're holding a reference to our parent's old mount of
Elly Jonese1749eb2011-10-07 13:54:59 -0400642 * /proc in our namespace, which means using MS_REMOUNT here would
643 * mutate our parent's mount as well, even though we're in a VFS
644 * namespace (!). Instead, remove their mount from our namespace
645 * and make our own.
646 */
647 if (umount(kProcPath))
648 return -errno;
649 if (mount("", kProcPath, "proc", kSafeFlags | MS_RDONLY, ""))
650 return -errno;
651 return 0;
Elly Jonescd7a9042011-07-22 13:56:51 -0400652}
653
Will Drewry6ac91122011-10-21 16:38:58 -0500654void drop_caps(const struct minijail *j)
Elly Jonese1749eb2011-10-07 13:54:59 -0400655{
656 cap_t caps = cap_get_proc();
657 cap_value_t raise_flag[1];
658 unsigned int i;
659 if (!caps)
660 die("can't get process caps");
661 if (cap_clear_flag(caps, CAP_INHERITABLE))
662 die("can't clear inheritable caps");
663 if (cap_clear_flag(caps, CAP_EFFECTIVE))
664 die("can't clear effective caps");
665 if (cap_clear_flag(caps, CAP_PERMITTED))
666 die("can't clear permitted caps");
667 for (i = 0; i < sizeof(j->caps) * 8 && cap_valid((int)i); ++i) {
668 if (i != CAP_SETPCAP && !(j->caps & (1 << i)))
669 continue;
670 raise_flag[0] = i;
671 if (cap_set_flag(caps, CAP_EFFECTIVE, 1, raise_flag, CAP_SET))
672 die("can't add effective cap");
673 if (cap_set_flag(caps, CAP_PERMITTED, 1, raise_flag, CAP_SET))
674 die("can't add permitted cap");
675 if (cap_set_flag(caps, CAP_INHERITABLE, 1, raise_flag, CAP_SET))
676 die("can't add inheritable cap");
677 }
678 if (cap_set_proc(caps))
679 die("can't apply cleaned capset");
680 cap_free(caps);
681 for (i = 0; i < sizeof(j->caps) * 8 && cap_valid((int)i); ++i) {
682 if (j->caps & (1 << i))
683 continue;
684 if (prctl(PR_CAPBSET_DROP, i))
685 pdie("prctl(PR_CAPBSET_DROP)");
686 }
Elly Jonescd7a9042011-07-22 13:56:51 -0400687}
688
Will Drewry6ac91122011-10-21 16:38:58 -0500689int setup_seccomp_filters(const struct minijail *j)
Elly Jonese1749eb2011-10-07 13:54:59 -0400690{
691 const struct seccomp_filter *sf = j->filters;
692 int ret = 0;
693 int broaden = 0;
Will Drewry32ac9f52011-08-18 21:36:27 -0500694
Elly Jonese1749eb2011-10-07 13:54:59 -0400695 /* No filters installed isn't necessarily an error. */
696 if (!sf)
697 return ret;
Will Drewry32ac9f52011-08-18 21:36:27 -0500698
Elly Jonese1749eb2011-10-07 13:54:59 -0400699 do {
700 errno = 0;
701 ret = prctl(PR_SET_SECCOMP_FILTER, PR_SECCOMP_FILTER_SYSCALL,
702 sf->nr, broaden ? "1" : sf->filter);
703 if (ret) {
704 switch (errno) {
705 case ENOSYS:
706 /* TODO(wad) make this a config option */
707 if (broaden)
708 die("CONFIG_SECCOMP_FILTER is not"
709 "supported by your kernel");
710 warn("missing CONFIG_FTRACE_SYSCALLS; relaxing"
711 "the filter for %d", sf->nr);
712 broaden = 1;
713 continue;
714 case E2BIG:
715 warn("seccomp filter too long: %d", sf->nr);
716 pdie("filter too long");
717 case ENOSPC:
718 pdie("too many seccomp filters");
719 case EPERM:
720 warn("syscall filter disallowed for %d",
721 sf->nr);
722 pdie("failed to install seccomp filter");
723 case EINVAL:
724 warn("seccomp filter or call method is"
725 " invalid. %d:'%s'", sf->nr, sf->filter);
726 default:
727 pdie("failed to install seccomp filter");
728 }
729 }
730 sf = sf->next;
731 broaden = 0;
732 } while (sf != j->filters);
733 return ret;
Will Drewry32ac9f52011-08-18 21:36:27 -0500734}
735
Will Drewry6ac91122011-10-21 16:38:58 -0500736void API minijail_enter(const struct minijail *j)
Elly Jonese1749eb2011-10-07 13:54:59 -0400737{
738 if (j->flags.pids)
739 die("tried to enter a pid-namespaced jail;"
740 "try minijail_run()?");
Elly Jonescd7a9042011-07-22 13:56:51 -0400741
Elly Jonese1749eb2011-10-07 13:54:59 -0400742 if (j->flags.seccomp_filter && setup_seccomp_filters(j))
743 pdie("failed to configure seccomp filters");
Will Drewry32ac9f52011-08-18 21:36:27 -0500744
Elly Jonese1749eb2011-10-07 13:54:59 -0400745 if (j->flags.usergroups && !j->user)
746 die("usergroup inheritance without username");
Elly Jonescd7a9042011-07-22 13:56:51 -0400747
Elly Jonesdd3e8512012-01-23 15:13:38 -0500748 /*
749 * We can't recover from failures if we've dropped privileges partially,
Elly Jonese1749eb2011-10-07 13:54:59 -0400750 * so we don't even try. If any of our operations fail, we abort() the
751 * entire process.
752 */
753 if (j->flags.vfs && unshare(CLONE_NEWNS))
754 pdie("unshare");
Elly Jonescd7a9042011-07-22 13:56:51 -0400755
Elly Jones51a5b6c2011-10-12 19:09:26 -0400756 if (j->flags.chroot && enter_chroot(j))
757 pdie("chroot");
758
Elly Jonese1749eb2011-10-07 13:54:59 -0400759 if (j->flags.readonly && remount_readonly())
760 pdie("remount");
Elly Jonescd7a9042011-07-22 13:56:51 -0400761
Elly Jonese1749eb2011-10-07 13:54:59 -0400762 if (j->flags.caps) {
Elly Jonesdd3e8512012-01-23 15:13:38 -0500763 /*
764 * POSIX capabilities are a bit tricky. If we drop our
Elly Jonese1749eb2011-10-07 13:54:59 -0400765 * capability to change uids, our attempt to use setuid()
766 * below will fail. Hang on to root caps across setuid(), then
767 * lock securebits.
768 */
769 if (prctl(PR_SET_KEEPCAPS, 1))
770 pdie("prctl(PR_SET_KEEPCAPS)");
771 if (prctl
772 (PR_SET_SECUREBITS, SECURE_ALL_BITS | SECURE_ALL_LOCKS))
773 pdie("prctl(PR_SET_SECUREBITS)");
774 }
Elly Jonescd7a9042011-07-22 13:56:51 -0400775
Elly Jonese1749eb2011-10-07 13:54:59 -0400776 if (j->flags.usergroups) {
777 if (initgroups(j->user, j->usergid))
778 pdie("initgroups");
779 } else {
780 /* Only attempt to clear supplemental groups if we are changing
781 * users. */
782 if ((j->uid || j->gid) && setgroups(0, NULL))
783 pdie("setgroups");
784 }
Elly Jonescd7a9042011-07-22 13:56:51 -0400785
Elly Jonese1749eb2011-10-07 13:54:59 -0400786 if (j->flags.gid && setresgid(j->gid, j->gid, j->gid))
787 pdie("setresgid");
Elly Jonescd7a9042011-07-22 13:56:51 -0400788
Elly Jonese1749eb2011-10-07 13:54:59 -0400789 if (j->flags.uid && setresuid(j->uid, j->uid, j->uid))
790 pdie("setresuid");
Elly Jonescd7a9042011-07-22 13:56:51 -0400791
Elly Jonese1749eb2011-10-07 13:54:59 -0400792 if (j->flags.caps)
793 drop_caps(j);
Elly Jonescd7a9042011-07-22 13:56:51 -0400794
Elly Jonesdd3e8512012-01-23 15:13:38 -0500795 /*
796 * seccomp has to come last since it cuts off all the other
Elly Jonese1749eb2011-10-07 13:54:59 -0400797 * privilege-dropping syscalls :)
798 */
799 if (j->flags.seccomp_filter && prctl(PR_SET_SECCOMP, 13))
800 pdie("prctl(PR_SET_SECCOMP, 13)");
Will Drewry32ac9f52011-08-18 21:36:27 -0500801
Elly Jonese1749eb2011-10-07 13:54:59 -0400802 if (j->flags.seccomp && prctl(PR_SET_SECCOMP, 1))
803 pdie("prctl(PR_SET_SECCOMP)");
Elly Jonescd7a9042011-07-22 13:56:51 -0400804}
805
Will Drewry6ac91122011-10-21 16:38:58 -0500806/* TODO(wad) will visibility affect this variable? */
Elly Jonescd7a9042011-07-22 13:56:51 -0400807static int init_exitstatus = 0;
808
Will Drewry6ac91122011-10-21 16:38:58 -0500809void init_term(int __attribute__ ((unused)) sig)
Elly Jonese1749eb2011-10-07 13:54:59 -0400810{
811 _exit(init_exitstatus);
Elly Jonescd7a9042011-07-22 13:56:51 -0400812}
813
Will Drewry6ac91122011-10-21 16:38:58 -0500814int init(pid_t rootpid)
Elly Jonese1749eb2011-10-07 13:54:59 -0400815{
816 pid_t pid;
817 int status;
818 /* so that we exit with the right status */
819 signal(SIGTERM, init_term);
820 /* TODO(wad) self jail with seccomp_filters here. */
821 while ((pid = wait(&status)) > 0) {
Elly Jonesdd3e8512012-01-23 15:13:38 -0500822 /*
823 * This loop will only end when either there are no processes
Elly Jonese1749eb2011-10-07 13:54:59 -0400824 * left inside our pid namespace or we get a signal.
825 */
826 if (pid == rootpid)
827 init_exitstatus = status;
828 }
829 if (!WIFEXITED(init_exitstatus))
830 _exit(MINIJAIL_ERR_INIT);
831 _exit(WEXITSTATUS(init_exitstatus));
Elly Jonescd7a9042011-07-22 13:56:51 -0400832}
833
Will Drewry6ac91122011-10-21 16:38:58 -0500834int API minijail_from_fd(int fd, struct minijail *j)
Elly Jonese1749eb2011-10-07 13:54:59 -0400835{
836 size_t sz = 0;
837 size_t bytes = read(fd, &sz, sizeof(sz));
838 char *buf;
839 int r;
840 if (sizeof(sz) != bytes)
841 return -EINVAL;
842 if (sz > USHRT_MAX) /* Arbitrary sanity check */
843 return -E2BIG;
844 buf = malloc(sz);
845 if (!buf)
846 return -ENOMEM;
847 bytes = read(fd, buf, sz);
848 if (bytes != sz) {
849 free(buf);
850 return -EINVAL;
851 }
852 r = minijail_unmarshal(j, buf, sz);
853 free(buf);
854 return r;
Will Drewry2f54b6a2011-09-16 13:45:31 -0500855}
856
Will Drewry6ac91122011-10-21 16:38:58 -0500857int API minijail_to_fd(struct minijail *j, int fd)
Elly Jonese1749eb2011-10-07 13:54:59 -0400858{
859 char *buf;
860 size_t sz = minijail_size(j);
861 ssize_t written;
862 int r;
Elly Jonescd7a9042011-07-22 13:56:51 -0400863
Elly Jonese1749eb2011-10-07 13:54:59 -0400864 if (!sz)
865 return -EINVAL;
866 buf = malloc(sz);
867 r = minijail_marshal(j, buf, sz);
868 if (r) {
869 free(buf);
870 return r;
871 }
872 /* Sends [size][minijail]. */
873 written = write(fd, &sz, sizeof(sz));
874 if (written != sizeof(sz)) {
875 free(buf);
876 return -EFAULT;
877 }
878 written = write(fd, buf, sz);
879 if (written < 0 || (size_t) written != sz) {
880 free(buf);
881 return -EFAULT;
882 }
883 free(buf);
884 return 0;
Will Drewry2f54b6a2011-09-16 13:45:31 -0500885}
Elly Jonescd7a9042011-07-22 13:56:51 -0400886
Will Drewry6ac91122011-10-21 16:38:58 -0500887int setup_preload(void)
Elly Jonese1749eb2011-10-07 13:54:59 -0400888{
889 char *oldenv = getenv(kLdPreloadEnvVar) ? : "";
890 char *newenv = malloc(strlen(oldenv) + 2 + strlen(PRELOADPATH));
891 if (!newenv)
892 return -ENOMEM;
Elly Jonescd7a9042011-07-22 13:56:51 -0400893
Elly Jonese1749eb2011-10-07 13:54:59 -0400894 /* Only insert a separating space if we have something to separate... */
895 sprintf(newenv, "%s%s%s", oldenv, strlen(oldenv) ? " " : "",
896 PRELOADPATH);
Elly Jonescd7a9042011-07-22 13:56:51 -0400897
Elly Jonese1749eb2011-10-07 13:54:59 -0400898 /* setenv() makes a copy of the string we give it */
899 setenv(kLdPreloadEnvVar, newenv, 1);
900 free(newenv);
901 return 0;
Elly Jonescd7a9042011-07-22 13:56:51 -0400902}
903
Will Drewry6ac91122011-10-21 16:38:58 -0500904int setup_pipe(int fds[2])
Elly Jonese1749eb2011-10-07 13:54:59 -0400905{
906 int r = pipe(fds);
907 char fd_buf[11];
908 if (r)
909 return r;
910 r = snprintf(fd_buf, sizeof(fd_buf), "%d", fds[0]);
911 if (r <= 0)
912 return -EINVAL;
913 setenv(kFdEnvVar, fd_buf, 1);
914 return 0;
Will Drewryf89aef52011-09-16 16:48:57 -0500915}
916
Will Drewry6ac91122011-10-21 16:38:58 -0500917int API minijail_run(struct minijail *j, const char *filename,
918 char *const argv[])
Elly Jonese1749eb2011-10-07 13:54:59 -0400919{
Jorge Lucangeli Obes9807d032012-04-17 13:36:00 -0700920 return minijail_run_pid(j, filename, argv, NULL);
921}
922
923int API minijail_run_pid(struct minijail *j, const char *filename,
924 char *const argv[], pid_t *pchild_pid)
925{
Elly Jonese1749eb2011-10-07 13:54:59 -0400926 unsigned int pidns = j->flags.pids ? CLONE_NEWPID : 0;
927 char *oldenv, *oldenv_copy = NULL;
928 pid_t child_pid;
929 int pipe_fds[2];
930 int ret;
Ben Chan541c7e52011-08-26 14:55:53 -0700931
Elly Jonese1749eb2011-10-07 13:54:59 -0400932 oldenv = getenv(kLdPreloadEnvVar);
933 if (oldenv) {
934 oldenv_copy = strdup(oldenv);
935 if (!oldenv_copy)
936 return -ENOMEM;
937 }
Will Drewryf89aef52011-09-16 16:48:57 -0500938
Elly Jonese1749eb2011-10-07 13:54:59 -0400939 if (setup_preload())
940 return -EFAULT;
Will Drewry2f54b6a2011-09-16 13:45:31 -0500941
Elly Jonesdd3e8512012-01-23 15:13:38 -0500942 /*
943 * Before we fork(2) and execve(2) the child process, we need to open
Elly Jonese1749eb2011-10-07 13:54:59 -0400944 * a pipe(2) to send the minijail configuration over.
945 */
946 if (setup_pipe(pipe_fds))
947 return -EFAULT;
Elly Jonescd7a9042011-07-22 13:56:51 -0400948
Elly Jonese1749eb2011-10-07 13:54:59 -0400949 child_pid = syscall(SYS_clone, pidns | SIGCHLD, NULL);
950 if (child_pid < 0) {
951 free(oldenv_copy);
952 return child_pid;
953 }
Will Drewryf89aef52011-09-16 16:48:57 -0500954
Elly Jonese1749eb2011-10-07 13:54:59 -0400955 if (child_pid) {
956 /* Restore parent's LD_PRELOAD. */
957 if (oldenv_copy) {
958 setenv(kLdPreloadEnvVar, oldenv_copy, 1);
959 free(oldenv_copy);
960 } else {
961 unsetenv(kLdPreloadEnvVar);
962 }
963 unsetenv(kFdEnvVar);
964 j->initpid = child_pid;
965 close(pipe_fds[0]); /* read endpoint */
966 ret = minijail_to_fd(j, pipe_fds[1]);
967 close(pipe_fds[1]); /* write endpoint */
968 if (ret) {
969 kill(j->initpid, SIGKILL);
970 die("failed to send marshalled minijail");
971 }
Jorge Lucangeli Obes9807d032012-04-17 13:36:00 -0700972 if (pchild_pid)
973 *pchild_pid = child_pid;
Elly Jonese1749eb2011-10-07 13:54:59 -0400974 return 0;
975 }
976 free(oldenv_copy);
Ben Chan541c7e52011-08-26 14:55:53 -0700977
Elly Jonese1749eb2011-10-07 13:54:59 -0400978 /* Drop everything that cannot be inherited across execve. */
979 minijail_preexec(j);
980 /* Jail this process and its descendants... */
981 minijail_enter(j);
Elly Jonescd7a9042011-07-22 13:56:51 -0400982
Elly Jonese1749eb2011-10-07 13:54:59 -0400983 if (pidns) {
Elly Jonesdd3e8512012-01-23 15:13:38 -0500984 /*
985 * pid namespace: this process will become init inside the new
Elly Jonese1749eb2011-10-07 13:54:59 -0400986 * namespace, so fork off a child to actually run the program
987 * (we don't want all programs we might exec to have to know
988 * how to be init).
989 */
990 child_pid = fork();
991 if (child_pid < 0)
992 _exit(child_pid);
993 else if (child_pid > 0)
994 init(child_pid); /* never returns */
995 }
Elly Jonescd7a9042011-07-22 13:56:51 -0400996
Elly Jonesdd3e8512012-01-23 15:13:38 -0500997 /*
998 * If we aren't pid-namespaced:
Elly Jonese1749eb2011-10-07 13:54:59 -0400999 * calling process
1000 * -> execve()-ing process
1001 * If we are:
1002 * calling process
1003 * -> init()-ing process
1004 * -> execve()-ing process
1005 */
1006 _exit(execve(filename, argv, environ));
Elly Jonescd7a9042011-07-22 13:56:51 -04001007}
1008
Will Drewry6ac91122011-10-21 16:38:58 -05001009int API minijail_kill(struct minijail *j)
Elly Jonese1749eb2011-10-07 13:54:59 -04001010{
1011 int st;
1012 if (kill(j->initpid, SIGTERM))
1013 return -errno;
1014 if (waitpid(j->initpid, &st, 0) < 0)
1015 return -errno;
1016 return st;
Elly Jonescd7a9042011-07-22 13:56:51 -04001017}
1018
Will Drewry6ac91122011-10-21 16:38:58 -05001019int API minijail_wait(struct minijail *j)
Elly Jonese1749eb2011-10-07 13:54:59 -04001020{
1021 int st;
1022 if (waitpid(j->initpid, &st, 0) < 0)
1023 return -errno;
1024 if (!WIFEXITED(st))
1025 return MINIJAIL_ERR_JAIL;
1026 return WEXITSTATUS(st);
Elly Jonescd7a9042011-07-22 13:56:51 -04001027}
1028
Will Drewry6ac91122011-10-21 16:38:58 -05001029void API minijail_destroy(struct minijail *j)
Elly Jonese1749eb2011-10-07 13:54:59 -04001030{
1031 struct seccomp_filter *f = j->filters;
1032 /* Unlink the tail and head */
1033 if (f)
1034 f->prev->next = NULL;
1035 while (f) {
1036 struct seccomp_filter *next = f->next;
1037 free(f->filter);
1038 free(f);
1039 f = next;
1040 }
Elly Jones51a5b6c2011-10-12 19:09:26 -04001041 while (j->bindings_head) {
1042 struct binding *b = j->bindings_head;
1043 j->bindings_head = j->bindings_head->next;
1044 free(b->dest);
1045 free(b->src);
1046 free(b);
1047 }
1048 j->bindings_tail = NULL;
Elly Jonese1749eb2011-10-07 13:54:59 -04001049 if (j->user)
1050 free(j->user);
Will Drewrybee7ba72011-10-21 20:47:01 -05001051 if (j->chrootdir)
1052 free(j->chrootdir);
Elly Jonese1749eb2011-10-07 13:54:59 -04001053 free(j);
Elly Jonescd7a9042011-07-22 13:56:51 -04001054}