blob: 78cfbc10507844d9e3108450a05fe6ededc6e3fa [file] [log] [blame]
Elly Jonesdd3e8512012-01-23 15:13:38 -05001/*
2 * Copyright (c) 2012 The Chromium OS Authors. All rights reserved.
Elly Jonescd7a9042011-07-22 13:56:51 -04003 * Use of this source code is governed by a BSD-style license that can be
Will Drewry32ac9f52011-08-18 21:36:27 -05004 * found in the LICENSE file.
5 */
Elly Jonescd7a9042011-07-22 13:56:51 -04006
7#define _BSD_SOURCE
8#define _GNU_SOURCE
Jorge Lucangeli Obes524c0402012-01-17 11:30:23 -08009#include <asm/unistd.h>
Will Drewry32ac9f52011-08-18 21:36:27 -050010#include <ctype.h>
Elly Jonescd7a9042011-07-22 13:56:51 -040011#include <errno.h>
12#include <grp.h>
13#include <inttypes.h>
Will Drewryfe4a3722011-09-16 14:50:50 -050014#include <limits.h>
Elly Jonescd7a9042011-07-22 13:56:51 -040015#include <linux/capability.h>
16#include <linux/securebits.h>
17#include <pwd.h>
18#include <sched.h>
19#include <signal.h>
Will Drewry2f54b6a2011-09-16 13:45:31 -050020#include <stdarg.h>
Jorge Lucangeli Obes524c0402012-01-17 11:30:23 -080021#include <stddef.h>
Elly Jonescd7a9042011-07-22 13:56:51 -040022#include <stdio.h>
23#include <stdlib.h>
24#include <string.h>
25#include <syscall.h>
26#include <sys/capability.h>
27#include <sys/mount.h>
Will Drewryf89aef52011-09-16 16:48:57 -050028#include <sys/param.h>
Elly Jonescd7a9042011-07-22 13:56:51 -040029#include <sys/prctl.h>
Jorge Lucangeli Obes524c0402012-01-17 11:30:23 -080030#include <sys/user.h>
Elly Jonescd7a9042011-07-22 13:56:51 -040031#include <sys/wait.h>
32#include <syslog.h>
33#include <unistd.h>
34
35#include "libminijail.h"
Will Drewry32ac9f52011-08-18 21:36:27 -050036#include "libsyscalls.h"
Elly Jonescd7a9042011-07-22 13:56:51 -040037#include "libminijail-private.h"
38
Jorge Lucangeli Obes524c0402012-01-17 11:30:23 -080039#include "syscall_filter.h"
40
Will Drewry32ac9f52011-08-18 21:36:27 -050041/* Until these are reliably available in linux/prctl.h */
Jorge Lucangeli Obes524c0402012-01-17 11:30:23 -080042#ifndef PR_SET_SECCOMP
43# define PR_SET_SECCOMP 22
44#endif
45
46/* For seccomp_filter using BPF. */
47#ifndef PR_SET_NO_NEW_PRIVS
48# define PR_SET_NO_NEW_PRIVS 38
49#endif
50#ifndef SECCOMP_MODE_FILTER
51# define SECCOMP_MODE_FILTER 2 /* uses user-supplied filter. */
Will Drewry32ac9f52011-08-18 21:36:27 -050052#endif
53
Will Drewry32ac9f52011-08-18 21:36:27 -050054#define die(_msg, ...) do { \
Elly Jonese1749eb2011-10-07 13:54:59 -040055 syslog(LOG_ERR, "libminijail: " _msg, ## __VA_ARGS__); \
56 abort(); \
Will Drewry32ac9f52011-08-18 21:36:27 -050057} while (0)
Elly Jonescd7a9042011-07-22 13:56:51 -040058
Will Drewry32ac9f52011-08-18 21:36:27 -050059#define pdie(_msg, ...) \
Elly Jonese1749eb2011-10-07 13:54:59 -040060 die(_msg ": %s", ## __VA_ARGS__, strerror(errno))
Will Drewry32ac9f52011-08-18 21:36:27 -050061
62#define warn(_msg, ...) \
Elly Jonese1749eb2011-10-07 13:54:59 -040063 syslog(LOG_WARNING, "libminijail: " _msg, ## __VA_ARGS__)
Elly Jonescd7a9042011-07-22 13:56:51 -040064
Elly Jones51a5b6c2011-10-12 19:09:26 -040065struct binding {
66 char *src;
67 char *dest;
68 int writeable;
69 struct binding *next;
70};
71
Will Drewryf89aef52011-09-16 16:48:57 -050072struct minijail {
Elly Jonese1749eb2011-10-07 13:54:59 -040073 struct {
74 int uid:1;
75 int gid:1;
76 int caps:1;
77 int vfs:1;
78 int pids:1;
79 int seccomp:1;
80 int readonly:1;
81 int usergroups:1;
82 int ptrace:1;
83 int seccomp_filter:1;
Elly Jones51a5b6c2011-10-12 19:09:26 -040084 int chroot:1;
Elly Jonese1749eb2011-10-07 13:54:59 -040085 } flags;
86 uid_t uid;
87 gid_t gid;
88 gid_t usergid;
89 char *user;
90 uint64_t caps;
91 pid_t initpid;
Jorge Lucangeli Obes524c0402012-01-17 11:30:23 -080092 int filter_len;
Elly Jones51a5b6c2011-10-12 19:09:26 -040093 int binding_count;
94 char *chrootdir;
Jorge Lucangeli Obes524c0402012-01-17 11:30:23 -080095 struct sock_fprog *filter_prog;
Elly Jones51a5b6c2011-10-12 19:09:26 -040096 struct binding *bindings_head;
97 struct binding *bindings_tail;
Will Drewryf89aef52011-09-16 16:48:57 -050098};
99
Will Drewry6ac91122011-10-21 16:38:58 -0500100struct minijail API *minijail_new(void)
Elly Jonese1749eb2011-10-07 13:54:59 -0400101{
Elly Jones51a5b6c2011-10-12 19:09:26 -0400102 return calloc(1, sizeof(struct minijail));
Elly Jonescd7a9042011-07-22 13:56:51 -0400103}
104
Will Drewry6ac91122011-10-21 16:38:58 -0500105void API minijail_change_uid(struct minijail *j, uid_t uid)
Elly Jonese1749eb2011-10-07 13:54:59 -0400106{
107 if (uid == 0)
108 die("useless change to uid 0");
109 j->uid = uid;
110 j->flags.uid = 1;
Elly Jonescd7a9042011-07-22 13:56:51 -0400111}
112
Will Drewry6ac91122011-10-21 16:38:58 -0500113void API minijail_change_gid(struct minijail *j, gid_t gid)
Elly Jonese1749eb2011-10-07 13:54:59 -0400114{
115 if (gid == 0)
116 die("useless change to gid 0");
117 j->gid = gid;
118 j->flags.gid = 1;
Elly Jonescd7a9042011-07-22 13:56:51 -0400119}
120
Will Drewry6ac91122011-10-21 16:38:58 -0500121int API minijail_change_user(struct minijail *j, const char *user)
Elly Jonese1749eb2011-10-07 13:54:59 -0400122{
123 char *buf = NULL;
124 struct passwd pw;
125 struct passwd *ppw = NULL;
126 ssize_t sz = sysconf(_SC_GETPW_R_SIZE_MAX);
127 if (sz == -1)
128 sz = 65536; /* your guess is as good as mine... */
Elly Joneseb300c52011-09-22 14:35:43 -0400129
Elly Jonesdd3e8512012-01-23 15:13:38 -0500130 /*
131 * sysconf(_SC_GETPW_R_SIZE_MAX), under glibc, is documented to return
Elly Jonese1749eb2011-10-07 13:54:59 -0400132 * the maximum needed size of the buffer, so we don't have to search.
133 */
134 buf = malloc(sz);
135 if (!buf)
136 return -ENOMEM;
137 getpwnam_r(user, &pw, buf, sz, &ppw);
Elly Jonesdd3e8512012-01-23 15:13:38 -0500138 /*
139 * We're safe to free the buffer here. The strings inside pw point
140 * inside buf, but we don't use any of them; this leaves the pointers
141 * dangling but it's safe. ppw points at pw if getpwnam_r succeeded.
142 */
Elly Jonese1749eb2011-10-07 13:54:59 -0400143 free(buf);
144 if (!ppw)
145 return -errno;
146 minijail_change_uid(j, ppw->pw_uid);
147 j->user = strdup(user);
148 if (!j->user)
149 return -ENOMEM;
150 j->usergid = ppw->pw_gid;
151 return 0;
Elly Jonescd7a9042011-07-22 13:56:51 -0400152}
153
Will Drewry6ac91122011-10-21 16:38:58 -0500154int API minijail_change_group(struct minijail *j, const char *group)
Elly Jonese1749eb2011-10-07 13:54:59 -0400155{
156 char *buf = NULL;
157 struct group gr;
158 struct group *pgr = NULL;
159 ssize_t sz = sysconf(_SC_GETGR_R_SIZE_MAX);
160 if (sz == -1)
161 sz = 65536; /* and mine is as good as yours, really */
Elly Joneseb300c52011-09-22 14:35:43 -0400162
Elly Jonesdd3e8512012-01-23 15:13:38 -0500163 /*
164 * sysconf(_SC_GETGR_R_SIZE_MAX), under glibc, is documented to return
Elly Jonese1749eb2011-10-07 13:54:59 -0400165 * the maximum needed size of the buffer, so we don't have to search.
166 */
167 buf = malloc(sz);
168 if (!buf)
169 return -ENOMEM;
170 getgrnam_r(group, &gr, buf, sz, &pgr);
Elly Jonesdd3e8512012-01-23 15:13:38 -0500171 /*
172 * We're safe to free the buffer here. The strings inside gr point
173 * inside buf, but we don't use any of them; this leaves the pointers
174 * dangling but it's safe. pgr points at gr if getgrnam_r succeeded.
175 */
Elly Jonese1749eb2011-10-07 13:54:59 -0400176 free(buf);
177 if (!pgr)
178 return -errno;
179 minijail_change_gid(j, pgr->gr_gid);
180 return 0;
Elly Jonescd7a9042011-07-22 13:56:51 -0400181}
182
Will Drewry6ac91122011-10-21 16:38:58 -0500183void API minijail_use_seccomp(struct minijail *j)
Elly Jonese1749eb2011-10-07 13:54:59 -0400184{
185 j->flags.seccomp = 1;
Elly Jonescd7a9042011-07-22 13:56:51 -0400186}
187
Will Drewry6ac91122011-10-21 16:38:58 -0500188void API minijail_use_seccomp_filter(struct minijail *j)
Elly Jonese1749eb2011-10-07 13:54:59 -0400189{
Jorge Lucangeli Obes2343d832012-04-25 21:59:48 -0700190 /* TODO(jorgelo): re-enable this when the seccomp BPF merge is done. */
191 j->flags.seccomp_filter = 0;
192}
193
194/* TODO(jorgelo): remove this when the seccomp BPF merge is done. */
195void API minijail_force_seccomp_filter(struct minijail *j)
196{
Elly Jonese1749eb2011-10-07 13:54:59 -0400197 j->flags.seccomp_filter = 1;
Will Drewry32ac9f52011-08-18 21:36:27 -0500198}
199
Will Drewry6ac91122011-10-21 16:38:58 -0500200void API minijail_use_caps(struct minijail *j, uint64_t capmask)
Elly Jonese1749eb2011-10-07 13:54:59 -0400201{
202 j->caps = capmask;
203 j->flags.caps = 1;
Elly Jonescd7a9042011-07-22 13:56:51 -0400204}
205
Will Drewry6ac91122011-10-21 16:38:58 -0500206void API minijail_namespace_vfs(struct minijail *j)
Elly Jonese1749eb2011-10-07 13:54:59 -0400207{
208 j->flags.vfs = 1;
Elly Jonescd7a9042011-07-22 13:56:51 -0400209}
210
Will Drewry6ac91122011-10-21 16:38:58 -0500211void API minijail_namespace_pids(struct minijail *j)
Elly Jonese1749eb2011-10-07 13:54:59 -0400212{
Elly Jonese58176c2012-01-23 11:46:17 -0500213 j->flags.vfs = 1;
214 j->flags.readonly = 1;
Elly Jonese1749eb2011-10-07 13:54:59 -0400215 j->flags.pids = 1;
Elly Jonescd7a9042011-07-22 13:56:51 -0400216}
217
Will Drewry6ac91122011-10-21 16:38:58 -0500218void API minijail_remount_readonly(struct minijail *j)
Elly Jonese1749eb2011-10-07 13:54:59 -0400219{
220 j->flags.vfs = 1;
221 j->flags.readonly = 1;
Elly Jonescd7a9042011-07-22 13:56:51 -0400222}
223
Will Drewry6ac91122011-10-21 16:38:58 -0500224void API minijail_inherit_usergroups(struct minijail *j)
Elly Jonese1749eb2011-10-07 13:54:59 -0400225{
226 j->flags.usergroups = 1;
Elly Jonescd7a9042011-07-22 13:56:51 -0400227}
228
Will Drewry6ac91122011-10-21 16:38:58 -0500229void API minijail_disable_ptrace(struct minijail *j)
Elly Jonese1749eb2011-10-07 13:54:59 -0400230{
231 j->flags.ptrace = 1;
Elly Jonescd7a9042011-07-22 13:56:51 -0400232}
233
Will Drewry6ac91122011-10-21 16:38:58 -0500234int API minijail_enter_chroot(struct minijail *j, const char *dir) {
Elly Jones51a5b6c2011-10-12 19:09:26 -0400235 if (j->chrootdir)
236 return -EINVAL;
237 j->chrootdir = strdup(dir);
238 if (!j->chrootdir)
239 return -ENOMEM;
240 j->flags.chroot = 1;
241 return 0;
242}
243
Will Drewry6ac91122011-10-21 16:38:58 -0500244int API minijail_bind(struct minijail *j, const char *src, const char *dest,
245 int writeable) {
Elly Jones51a5b6c2011-10-12 19:09:26 -0400246 struct binding *b;
247
248 if (*dest != '/')
249 return -EINVAL;
250 b = calloc(1, sizeof(*b));
251 if (!b)
252 return -ENOMEM;
253 b->dest = strdup(dest);
254 if (!b->dest)
255 goto error;
256 b->src = strdup(src);
257 if (!b->src)
258 goto error;
259 b->writeable = writeable;
260
261 syslog(LOG_INFO, "libminijail: bind %s -> %s", src, dest);
262
Elly Jonesdd3e8512012-01-23 15:13:38 -0500263 /*
264 * Force vfs namespacing so the bind mounts don't leak out into the
Elly Jones51a5b6c2011-10-12 19:09:26 -0400265 * containing vfs namespace.
266 */
267 minijail_namespace_vfs(j);
268
269 if (j->bindings_tail)
270 j->bindings_tail->next = b;
271 else
272 j->bindings_head = b;
273 j->bindings_tail = b;
274 j->binding_count++;
275
276 return 0;
277
278error:
279 free(b->src);
280 free(b->dest);
281 free(b);
282 return -ENOMEM;
283}
284
Will Drewry6ac91122011-10-21 16:38:58 -0500285void API minijail_parse_seccomp_filters(struct minijail *j, const char *path)
Elly Jonese1749eb2011-10-07 13:54:59 -0400286{
287 FILE *file = fopen(path, "r");
Jorge Lucangeli Obes524c0402012-01-17 11:30:23 -0800288 if (!file) {
289 pdie("failed to open seccomp filters file '%s'", path);
Elly Jonese1749eb2011-10-07 13:54:59 -0400290 }
Jorge Lucangeli Obes524c0402012-01-17 11:30:23 -0800291
292 struct sock_fprog *fprog = malloc(sizeof(struct sock_fprog));
293 if (compile_filter(file, fprog)) {
294 die("failed to compile seccomp filters BPF program in '%s'", path);
295 }
296
297 j->filter_len = fprog->len;
298 j->filter_prog = fprog;
299
Elly Jonese1749eb2011-10-07 13:54:59 -0400300 fclose(file);
Will Drewry32ac9f52011-08-18 21:36:27 -0500301}
302
Will Drewryf89aef52011-09-16 16:48:57 -0500303struct marshal_state {
Elly Jonese1749eb2011-10-07 13:54:59 -0400304 size_t available;
305 size_t total;
306 char *buf;
Will Drewryf89aef52011-09-16 16:48:57 -0500307};
308
Will Drewry6ac91122011-10-21 16:38:58 -0500309void marshal_state_init(struct marshal_state *state,
310 char *buf, size_t available)
Elly Jonese1749eb2011-10-07 13:54:59 -0400311{
312 state->available = available;
313 state->buf = buf;
314 state->total = 0;
Will Drewryf89aef52011-09-16 16:48:57 -0500315}
316
Will Drewry6ac91122011-10-21 16:38:58 -0500317void marshal_append(struct marshal_state *state,
318 char *src, size_t length)
Elly Jonese1749eb2011-10-07 13:54:59 -0400319{
320 size_t copy_len = MIN(state->available, length);
Will Drewryf89aef52011-09-16 16:48:57 -0500321
Elly Jonese1749eb2011-10-07 13:54:59 -0400322 /* Up to |available| will be written. */
323 if (copy_len) {
324 memcpy(state->buf, src, copy_len);
325 state->buf += copy_len;
326 state->available -= copy_len;
327 }
328 /* |total| will contain the expected length. */
329 state->total += length;
Will Drewryf89aef52011-09-16 16:48:57 -0500330}
331
Will Drewry6ac91122011-10-21 16:38:58 -0500332void minijail_marshal_helper(struct marshal_state *state,
333 const struct minijail *j)
Elly Jonese1749eb2011-10-07 13:54:59 -0400334{
Elly Jones51a5b6c2011-10-12 19:09:26 -0400335 struct binding *b = NULL;
Elly Jonese1749eb2011-10-07 13:54:59 -0400336 marshal_append(state, (char *)j, sizeof(*j));
337 if (j->user)
338 marshal_append(state, j->user, strlen(j->user) + 1);
Elly Jones51a5b6c2011-10-12 19:09:26 -0400339 if (j->chrootdir)
340 marshal_append(state, j->chrootdir, strlen(j->chrootdir) + 1);
Jorge Lucangeli Obes524c0402012-01-17 11:30:23 -0800341 if (j->flags.seccomp_filter && j->filter_prog) {
342 struct sock_fprog *fp = j->filter_prog;
343 marshal_append(state, (char *)fp->filter,
344 fp->len * sizeof(struct sock_filter));
Elly Jonese1749eb2011-10-07 13:54:59 -0400345 }
Elly Jones51a5b6c2011-10-12 19:09:26 -0400346 for (b = j->bindings_head; b; b = b->next) {
347 marshal_append(state, b->src, strlen(b->src) + 1);
348 marshal_append(state, b->dest, strlen(b->dest) + 1);
349 marshal_append(state, (char *)&b->writeable, sizeof(b->writeable));
350 }
Will Drewryf89aef52011-09-16 16:48:57 -0500351}
352
Will Drewry6ac91122011-10-21 16:38:58 -0500353size_t API minijail_size(const struct minijail *j)
Elly Jonese1749eb2011-10-07 13:54:59 -0400354{
355 struct marshal_state state;
356 marshal_state_init(&state, NULL, 0);
357 minijail_marshal_helper(&state, j);
358 return state.total;
Will Drewry2ddaad02011-09-16 11:36:08 -0500359}
360
Elly Jonese1749eb2011-10-07 13:54:59 -0400361int minijail_marshal(const struct minijail *j, char *buf, size_t available)
362{
363 struct marshal_state state;
364 marshal_state_init(&state, buf, available);
365 minijail_marshal_helper(&state, j);
366 return (state.total > available);
Will Drewry2ddaad02011-09-16 11:36:08 -0500367}
368
Elly Jones51a5b6c2011-10-12 19:09:26 -0400369/* consumebytes: consumes @length bytes from a buffer @buf of length @buflength
370 * @length Number of bytes to consume
371 * @buf Buffer to consume from
372 * @buflength Size of @buf
373 *
374 * Returns a pointer to the base of the bytes, or NULL for errors.
375 */
Will Drewry6ac91122011-10-21 16:38:58 -0500376void *consumebytes(size_t length, char **buf, size_t *buflength) {
Elly Jones51a5b6c2011-10-12 19:09:26 -0400377 char *p = *buf;
378 if (length > *buflength)
379 return NULL;
380 *buf += length;
381 *buflength -= length;
382 return p;
383}
384
385/* consumestr: consumes a C string from a buffer @buf of length @length
386 * @buf Buffer to consume
387 * @length Length of buffer
388 *
389 * Returns a pointer to the base of the string, or NULL for errors.
390 */
Will Drewry6ac91122011-10-21 16:38:58 -0500391char *consumestr(char **buf, size_t *buflength) {
Elly Jones51a5b6c2011-10-12 19:09:26 -0400392 size_t len = strnlen(*buf, *buflength);
393 if (len == *buflength)
394 /* There's no null-terminator */
395 return NULL;
396 return consumebytes(len + 1, buf, buflength);
397}
398
Elly Jonese1749eb2011-10-07 13:54:59 -0400399int minijail_unmarshal(struct minijail *j, char *serialized, size_t length)
400{
Elly Jones51a5b6c2011-10-12 19:09:26 -0400401 int i;
402 int count;
Will Drewrybee7ba72011-10-21 20:47:01 -0500403 int ret = -EINVAL;
404
Elly Jonese1749eb2011-10-07 13:54:59 -0400405 if (length < sizeof(*j))
Will Drewrybee7ba72011-10-21 20:47:01 -0500406 goto out;
Elly Jonese1749eb2011-10-07 13:54:59 -0400407 memcpy((void *)j, serialized, sizeof(*j));
408 serialized += sizeof(*j);
409 length -= sizeof(*j);
Will Drewryf89aef52011-09-16 16:48:57 -0500410
Will Drewrybee7ba72011-10-21 20:47:01 -0500411 /* Potentially stale pointers not used as signals. */
412 j->bindings_head = NULL;
413 j->bindings_tail = NULL;
Jorge Lucangeli Obes524c0402012-01-17 11:30:23 -0800414 j->filter_prog = NULL;
Will Drewrybee7ba72011-10-21 20:47:01 -0500415
Elly Jonese1749eb2011-10-07 13:54:59 -0400416 if (j->user) { /* stale pointer */
Elly Jones51a5b6c2011-10-12 19:09:26 -0400417 char *user = consumestr(&serialized, &length);
418 if (!user)
Will Drewrybee7ba72011-10-21 20:47:01 -0500419 goto clear_pointers;
Elly Jones51a5b6c2011-10-12 19:09:26 -0400420 j->user = strdup(user);
Will Drewrybee7ba72011-10-21 20:47:01 -0500421 if (!j->user)
422 goto clear_pointers;
Elly Jonese1749eb2011-10-07 13:54:59 -0400423 }
Will Drewryf89aef52011-09-16 16:48:57 -0500424
Elly Jonesa8d1e1b2011-10-21 15:38:00 -0400425 if (j->chrootdir) { /* stale pointer */
426 char *chrootdir = consumestr(&serialized, &length);
427 if (!chrootdir)
Will Drewrybee7ba72011-10-21 20:47:01 -0500428 goto bad_chrootdir;
Elly Jonesa8d1e1b2011-10-21 15:38:00 -0400429 j->chrootdir = strdup(chrootdir);
Will Drewrybee7ba72011-10-21 20:47:01 -0500430 if (!j->chrootdir)
431 goto bad_chrootdir;
Elly Jonesa8d1e1b2011-10-21 15:38:00 -0400432 }
433
Jorge Lucangeli Obes524c0402012-01-17 11:30:23 -0800434 if (j->flags.seccomp_filter && j->filter_len > 0) {
435 size_t ninstrs = j->filter_len;
436 if (ninstrs > (SIZE_MAX / sizeof(struct sock_filter)) ||
437 ninstrs > USHRT_MAX)
438 goto bad_filters;
439
440 size_t program_len = ninstrs * sizeof(struct sock_filter);
441 void *program = consumebytes(program_len, &serialized, &length);
442 if (!program)
443 goto bad_filters;
444
445 j->filter_prog = malloc(sizeof(struct sock_fprog));
446 j->filter_prog->len = ninstrs;
447 j->filter_prog->filter = malloc(program_len);
448 memcpy(j->filter_prog->filter, program, program_len);
Elly Jonese1749eb2011-10-07 13:54:59 -0400449 }
Elly Jones51a5b6c2011-10-12 19:09:26 -0400450
451 count = j->binding_count;
Elly Jones51a5b6c2011-10-12 19:09:26 -0400452 j->binding_count = 0;
453 for (i = 0; i < count; ++i) {
454 int *writeable;
455 const char *dest;
456 const char *src = consumestr(&serialized, &length);
457 if (!src)
Will Drewrybee7ba72011-10-21 20:47:01 -0500458 goto bad_bindings;
Elly Jones51a5b6c2011-10-12 19:09:26 -0400459 dest = consumestr(&serialized, &length);
460 if (!dest)
Will Drewrybee7ba72011-10-21 20:47:01 -0500461 goto bad_bindings;
Elly Jones51a5b6c2011-10-12 19:09:26 -0400462 writeable = consumebytes(sizeof(*writeable), &serialized, &length);
463 if (!writeable)
Will Drewrybee7ba72011-10-21 20:47:01 -0500464 goto bad_bindings;
Elly Jones51a5b6c2011-10-12 19:09:26 -0400465 if (minijail_bind(j, src, dest, *writeable))
Will Drewrybee7ba72011-10-21 20:47:01 -0500466 goto bad_bindings;
Elly Jones51a5b6c2011-10-12 19:09:26 -0400467 }
468
Elly Jonese1749eb2011-10-07 13:54:59 -0400469 return 0;
Will Drewrybee7ba72011-10-21 20:47:01 -0500470
471bad_bindings:
Jorge Lucangeli Obes524c0402012-01-17 11:30:23 -0800472 if (j->flags.seccomp_filter && j->filter_len > 0) {
473 free(j->filter_prog->filter);
474 free(j->filter_prog);
475 }
Will Drewrybee7ba72011-10-21 20:47:01 -0500476bad_filters:
477 if (j->chrootdir)
478 free(j->chrootdir);
479bad_chrootdir:
480 if (j->user)
481 free(j->user);
482clear_pointers:
483 j->user = NULL;
484 j->chrootdir = NULL;
485out:
486 return ret;
Will Drewry2ddaad02011-09-16 11:36:08 -0500487}
488
Elly Jonese1749eb2011-10-07 13:54:59 -0400489void minijail_preenter(struct minijail *j)
490{
491 /* Strip out options which are minijail_run() only. */
492 j->flags.vfs = 0;
493 j->flags.readonly = 0;
494 j->flags.pids = 0;
Will Drewryfe4a3722011-09-16 14:50:50 -0500495}
496
Elly Jonese1749eb2011-10-07 13:54:59 -0400497void minijail_preexec(struct minijail *j)
498{
499 int vfs = j->flags.vfs;
500 int readonly = j->flags.readonly;
501 if (j->user)
502 free(j->user);
503 j->user = NULL;
504 memset(&j->flags, 0, sizeof(j->flags));
505 /* Now restore anything we meant to keep. */
506 j->flags.vfs = vfs;
507 j->flags.readonly = readonly;
508 /* Note, pidns will already have been used before this call. */
Will Drewry2ddaad02011-09-16 11:36:08 -0500509}
510
Elly Jones51a5b6c2011-10-12 19:09:26 -0400511/* bind_one: Applies bindings from @b for @j, recursing as needed.
512 * @j Minijail these bindings are for
513 * @b Head of list of bindings
514 *
515 * Returns 0 for success.
516 */
Will Drewry6ac91122011-10-21 16:38:58 -0500517int bind_one(const struct minijail *j, struct binding *b) {
Elly Jones51a5b6c2011-10-12 19:09:26 -0400518 int ret = 0;
519 char *dest = NULL;
Elly Jones51a5b6c2011-10-12 19:09:26 -0400520 if (ret)
521 return ret;
522 /* dest has a leading "/" */
523 if (asprintf(&dest, "%s%s", j->chrootdir, b->dest) < 0)
524 return -ENOMEM;
Elly Jonesa1059632011-12-15 15:17:07 -0500525 ret = mount(b->src, dest, NULL, MS_BIND, NULL);
Elly Jones51a5b6c2011-10-12 19:09:26 -0400526 if (ret)
527 pdie("bind: %s -> %s", b->src, dest);
Elly Jonesa1059632011-12-15 15:17:07 -0500528 if (!b->writeable) {
529 ret = mount(b->src, dest, NULL,
530 MS_BIND | MS_REMOUNT | MS_RDONLY, NULL);
531 if (ret)
532 pdie("bind ro: %s -> %s", b->src, dest);
533 }
Elly Jones51a5b6c2011-10-12 19:09:26 -0400534 free(dest);
535 if (b->next)
536 return bind_one(j, b->next);
537 return ret;
538}
539
Will Drewry6ac91122011-10-21 16:38:58 -0500540int enter_chroot(const struct minijail *j) {
Elly Jones51a5b6c2011-10-12 19:09:26 -0400541 int ret;
542 if (j->bindings_head && (ret = bind_one(j, j->bindings_head)))
543 return ret;
544
545 if (chroot(j->chrootdir))
546 return -errno;
547
548 if (chdir("/"))
549 return -errno;
550
551 return 0;
552}
553
Will Drewry6ac91122011-10-21 16:38:58 -0500554int remount_readonly(void)
Elly Jonese1749eb2011-10-07 13:54:59 -0400555{
556 const char *kProcPath = "/proc";
557 const unsigned int kSafeFlags = MS_NODEV | MS_NOEXEC | MS_NOSUID;
Elly Jonesdd3e8512012-01-23 15:13:38 -0500558 /*
559 * Right now, we're holding a reference to our parent's old mount of
Elly Jonese1749eb2011-10-07 13:54:59 -0400560 * /proc in our namespace, which means using MS_REMOUNT here would
561 * mutate our parent's mount as well, even though we're in a VFS
562 * namespace (!). Instead, remove their mount from our namespace
563 * and make our own.
564 */
565 if (umount(kProcPath))
566 return -errno;
567 if (mount("", kProcPath, "proc", kSafeFlags | MS_RDONLY, ""))
568 return -errno;
569 return 0;
Elly Jonescd7a9042011-07-22 13:56:51 -0400570}
571
Will Drewry6ac91122011-10-21 16:38:58 -0500572void drop_caps(const struct minijail *j)
Elly Jonese1749eb2011-10-07 13:54:59 -0400573{
574 cap_t caps = cap_get_proc();
575 cap_value_t raise_flag[1];
576 unsigned int i;
577 if (!caps)
578 die("can't get process caps");
579 if (cap_clear_flag(caps, CAP_INHERITABLE))
580 die("can't clear inheritable caps");
581 if (cap_clear_flag(caps, CAP_EFFECTIVE))
582 die("can't clear effective caps");
583 if (cap_clear_flag(caps, CAP_PERMITTED))
584 die("can't clear permitted caps");
585 for (i = 0; i < sizeof(j->caps) * 8 && cap_valid((int)i); ++i) {
586 if (i != CAP_SETPCAP && !(j->caps & (1 << i)))
587 continue;
588 raise_flag[0] = i;
589 if (cap_set_flag(caps, CAP_EFFECTIVE, 1, raise_flag, CAP_SET))
590 die("can't add effective cap");
591 if (cap_set_flag(caps, CAP_PERMITTED, 1, raise_flag, CAP_SET))
592 die("can't add permitted cap");
593 if (cap_set_flag(caps, CAP_INHERITABLE, 1, raise_flag, CAP_SET))
594 die("can't add inheritable cap");
595 }
596 if (cap_set_proc(caps))
597 die("can't apply cleaned capset");
598 cap_free(caps);
599 for (i = 0; i < sizeof(j->caps) * 8 && cap_valid((int)i); ++i) {
600 if (j->caps & (1 << i))
601 continue;
602 if (prctl(PR_CAPBSET_DROP, i))
603 pdie("prctl(PR_CAPBSET_DROP)");
604 }
Elly Jonescd7a9042011-07-22 13:56:51 -0400605}
606
Will Drewry6ac91122011-10-21 16:38:58 -0500607void API minijail_enter(const struct minijail *j)
Elly Jonese1749eb2011-10-07 13:54:59 -0400608{
609 if (j->flags.pids)
610 die("tried to enter a pid-namespaced jail;"
611 "try minijail_run()?");
Elly Jonescd7a9042011-07-22 13:56:51 -0400612
Elly Jonese1749eb2011-10-07 13:54:59 -0400613 if (j->flags.usergroups && !j->user)
614 die("usergroup inheritance without username");
Elly Jonescd7a9042011-07-22 13:56:51 -0400615
Elly Jonesdd3e8512012-01-23 15:13:38 -0500616 /*
617 * We can't recover from failures if we've dropped privileges partially,
Elly Jonese1749eb2011-10-07 13:54:59 -0400618 * so we don't even try. If any of our operations fail, we abort() the
619 * entire process.
620 */
621 if (j->flags.vfs && unshare(CLONE_NEWNS))
622 pdie("unshare");
Elly Jonescd7a9042011-07-22 13:56:51 -0400623
Elly Jones51a5b6c2011-10-12 19:09:26 -0400624 if (j->flags.chroot && enter_chroot(j))
625 pdie("chroot");
626
Elly Jonese1749eb2011-10-07 13:54:59 -0400627 if (j->flags.readonly && remount_readonly())
628 pdie("remount");
Elly Jonescd7a9042011-07-22 13:56:51 -0400629
Elly Jonese1749eb2011-10-07 13:54:59 -0400630 if (j->flags.caps) {
Elly Jonesdd3e8512012-01-23 15:13:38 -0500631 /*
632 * POSIX capabilities are a bit tricky. If we drop our
Elly Jonese1749eb2011-10-07 13:54:59 -0400633 * capability to change uids, our attempt to use setuid()
634 * below will fail. Hang on to root caps across setuid(), then
635 * lock securebits.
636 */
637 if (prctl(PR_SET_KEEPCAPS, 1))
638 pdie("prctl(PR_SET_KEEPCAPS)");
639 if (prctl
640 (PR_SET_SECUREBITS, SECURE_ALL_BITS | SECURE_ALL_LOCKS))
641 pdie("prctl(PR_SET_SECUREBITS)");
642 }
Elly Jonescd7a9042011-07-22 13:56:51 -0400643
Elly Jonese1749eb2011-10-07 13:54:59 -0400644 if (j->flags.usergroups) {
645 if (initgroups(j->user, j->usergid))
646 pdie("initgroups");
647 } else {
648 /* Only attempt to clear supplemental groups if we are changing
649 * users. */
650 if ((j->uid || j->gid) && setgroups(0, NULL))
651 pdie("setgroups");
652 }
Elly Jonescd7a9042011-07-22 13:56:51 -0400653
Elly Jonese1749eb2011-10-07 13:54:59 -0400654 if (j->flags.gid && setresgid(j->gid, j->gid, j->gid))
655 pdie("setresgid");
Elly Jonescd7a9042011-07-22 13:56:51 -0400656
Elly Jonese1749eb2011-10-07 13:54:59 -0400657 if (j->flags.uid && setresuid(j->uid, j->uid, j->uid))
658 pdie("setresuid");
Elly Jonescd7a9042011-07-22 13:56:51 -0400659
Elly Jonese1749eb2011-10-07 13:54:59 -0400660 if (j->flags.caps)
661 drop_caps(j);
Elly Jonescd7a9042011-07-22 13:56:51 -0400662
Elly Jonesdd3e8512012-01-23 15:13:38 -0500663 /*
664 * seccomp has to come last since it cuts off all the other
Elly Jonese1749eb2011-10-07 13:54:59 -0400665 * privilege-dropping syscalls :)
666 */
Jorge Lucangeli Obes524c0402012-01-17 11:30:23 -0800667 if (j->flags.seccomp_filter) {
668 /* TODO(jorgelo): document call to PR_SET_NO_NEW_PRIVS. */
669 if (prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0)) {
670 pdie("prctl(PR_SET_NO_NEW_PRIVS)");
671 }
672 if (prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, j->filter_prog)) {
673 pdie("prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER)");
674 }
675 }
Will Drewry32ac9f52011-08-18 21:36:27 -0500676
Elly Jonese1749eb2011-10-07 13:54:59 -0400677 if (j->flags.seccomp && prctl(PR_SET_SECCOMP, 1))
678 pdie("prctl(PR_SET_SECCOMP)");
Elly Jonescd7a9042011-07-22 13:56:51 -0400679}
680
Will Drewry6ac91122011-10-21 16:38:58 -0500681/* TODO(wad) will visibility affect this variable? */
Elly Jonescd7a9042011-07-22 13:56:51 -0400682static int init_exitstatus = 0;
683
Will Drewry6ac91122011-10-21 16:38:58 -0500684void init_term(int __attribute__ ((unused)) sig)
Elly Jonese1749eb2011-10-07 13:54:59 -0400685{
686 _exit(init_exitstatus);
Elly Jonescd7a9042011-07-22 13:56:51 -0400687}
688
Will Drewry6ac91122011-10-21 16:38:58 -0500689int init(pid_t rootpid)
Elly Jonese1749eb2011-10-07 13:54:59 -0400690{
691 pid_t pid;
692 int status;
693 /* so that we exit with the right status */
694 signal(SIGTERM, init_term);
695 /* TODO(wad) self jail with seccomp_filters here. */
696 while ((pid = wait(&status)) > 0) {
Elly Jonesdd3e8512012-01-23 15:13:38 -0500697 /*
698 * This loop will only end when either there are no processes
Elly Jonese1749eb2011-10-07 13:54:59 -0400699 * left inside our pid namespace or we get a signal.
700 */
701 if (pid == rootpid)
702 init_exitstatus = status;
703 }
704 if (!WIFEXITED(init_exitstatus))
705 _exit(MINIJAIL_ERR_INIT);
706 _exit(WEXITSTATUS(init_exitstatus));
Elly Jonescd7a9042011-07-22 13:56:51 -0400707}
708
Will Drewry6ac91122011-10-21 16:38:58 -0500709int API minijail_from_fd(int fd, struct minijail *j)
Elly Jonese1749eb2011-10-07 13:54:59 -0400710{
711 size_t sz = 0;
712 size_t bytes = read(fd, &sz, sizeof(sz));
713 char *buf;
714 int r;
715 if (sizeof(sz) != bytes)
716 return -EINVAL;
717 if (sz > USHRT_MAX) /* Arbitrary sanity check */
718 return -E2BIG;
719 buf = malloc(sz);
720 if (!buf)
721 return -ENOMEM;
722 bytes = read(fd, buf, sz);
723 if (bytes != sz) {
724 free(buf);
725 return -EINVAL;
726 }
727 r = minijail_unmarshal(j, buf, sz);
728 free(buf);
729 return r;
Will Drewry2f54b6a2011-09-16 13:45:31 -0500730}
731
Will Drewry6ac91122011-10-21 16:38:58 -0500732int API minijail_to_fd(struct minijail *j, int fd)
Elly Jonese1749eb2011-10-07 13:54:59 -0400733{
734 char *buf;
735 size_t sz = minijail_size(j);
736 ssize_t written;
737 int r;
Elly Jonescd7a9042011-07-22 13:56:51 -0400738
Elly Jonese1749eb2011-10-07 13:54:59 -0400739 if (!sz)
740 return -EINVAL;
741 buf = malloc(sz);
742 r = minijail_marshal(j, buf, sz);
743 if (r) {
744 free(buf);
745 return r;
746 }
747 /* Sends [size][minijail]. */
748 written = write(fd, &sz, sizeof(sz));
749 if (written != sizeof(sz)) {
750 free(buf);
751 return -EFAULT;
752 }
753 written = write(fd, buf, sz);
754 if (written < 0 || (size_t) written != sz) {
755 free(buf);
756 return -EFAULT;
757 }
758 free(buf);
759 return 0;
Will Drewry2f54b6a2011-09-16 13:45:31 -0500760}
Elly Jonescd7a9042011-07-22 13:56:51 -0400761
Will Drewry6ac91122011-10-21 16:38:58 -0500762int setup_preload(void)
Elly Jonese1749eb2011-10-07 13:54:59 -0400763{
764 char *oldenv = getenv(kLdPreloadEnvVar) ? : "";
765 char *newenv = malloc(strlen(oldenv) + 2 + strlen(PRELOADPATH));
766 if (!newenv)
767 return -ENOMEM;
Elly Jonescd7a9042011-07-22 13:56:51 -0400768
Elly Jonese1749eb2011-10-07 13:54:59 -0400769 /* Only insert a separating space if we have something to separate... */
770 sprintf(newenv, "%s%s%s", oldenv, strlen(oldenv) ? " " : "",
771 PRELOADPATH);
Elly Jonescd7a9042011-07-22 13:56:51 -0400772
Elly Jonese1749eb2011-10-07 13:54:59 -0400773 /* setenv() makes a copy of the string we give it */
774 setenv(kLdPreloadEnvVar, newenv, 1);
775 free(newenv);
776 return 0;
Elly Jonescd7a9042011-07-22 13:56:51 -0400777}
778
Will Drewry6ac91122011-10-21 16:38:58 -0500779int setup_pipe(int fds[2])
Elly Jonese1749eb2011-10-07 13:54:59 -0400780{
781 int r = pipe(fds);
782 char fd_buf[11];
783 if (r)
784 return r;
785 r = snprintf(fd_buf, sizeof(fd_buf), "%d", fds[0]);
786 if (r <= 0)
787 return -EINVAL;
788 setenv(kFdEnvVar, fd_buf, 1);
789 return 0;
Will Drewryf89aef52011-09-16 16:48:57 -0500790}
791
Will Drewry6ac91122011-10-21 16:38:58 -0500792int API minijail_run(struct minijail *j, const char *filename,
793 char *const argv[])
Elly Jonese1749eb2011-10-07 13:54:59 -0400794{
Jorge Lucangeli Obes9807d032012-04-17 13:36:00 -0700795 return minijail_run_pid(j, filename, argv, NULL);
796}
797
798int API minijail_run_pid(struct minijail *j, const char *filename,
799 char *const argv[], pid_t *pchild_pid)
800{
Elly Jonese1749eb2011-10-07 13:54:59 -0400801 unsigned int pidns = j->flags.pids ? CLONE_NEWPID : 0;
802 char *oldenv, *oldenv_copy = NULL;
803 pid_t child_pid;
804 int pipe_fds[2];
805 int ret;
Ben Chan541c7e52011-08-26 14:55:53 -0700806
Elly Jonese1749eb2011-10-07 13:54:59 -0400807 oldenv = getenv(kLdPreloadEnvVar);
808 if (oldenv) {
809 oldenv_copy = strdup(oldenv);
810 if (!oldenv_copy)
811 return -ENOMEM;
812 }
Will Drewryf89aef52011-09-16 16:48:57 -0500813
Elly Jonese1749eb2011-10-07 13:54:59 -0400814 if (setup_preload())
815 return -EFAULT;
Will Drewry2f54b6a2011-09-16 13:45:31 -0500816
Elly Jonesdd3e8512012-01-23 15:13:38 -0500817 /*
818 * Before we fork(2) and execve(2) the child process, we need to open
Elly Jonese1749eb2011-10-07 13:54:59 -0400819 * a pipe(2) to send the minijail configuration over.
820 */
821 if (setup_pipe(pipe_fds))
822 return -EFAULT;
Elly Jonescd7a9042011-07-22 13:56:51 -0400823
Elly Jonese1749eb2011-10-07 13:54:59 -0400824 child_pid = syscall(SYS_clone, pidns | SIGCHLD, NULL);
825 if (child_pid < 0) {
826 free(oldenv_copy);
827 return child_pid;
828 }
Will Drewryf89aef52011-09-16 16:48:57 -0500829
Elly Jonese1749eb2011-10-07 13:54:59 -0400830 if (child_pid) {
831 /* Restore parent's LD_PRELOAD. */
832 if (oldenv_copy) {
833 setenv(kLdPreloadEnvVar, oldenv_copy, 1);
834 free(oldenv_copy);
835 } else {
836 unsetenv(kLdPreloadEnvVar);
837 }
838 unsetenv(kFdEnvVar);
839 j->initpid = child_pid;
840 close(pipe_fds[0]); /* read endpoint */
841 ret = minijail_to_fd(j, pipe_fds[1]);
842 close(pipe_fds[1]); /* write endpoint */
843 if (ret) {
844 kill(j->initpid, SIGKILL);
845 die("failed to send marshalled minijail");
846 }
Jorge Lucangeli Obes9807d032012-04-17 13:36:00 -0700847 if (pchild_pid)
848 *pchild_pid = child_pid;
Elly Jonese1749eb2011-10-07 13:54:59 -0400849 return 0;
850 }
851 free(oldenv_copy);
Ben Chan541c7e52011-08-26 14:55:53 -0700852
Elly Jonese1749eb2011-10-07 13:54:59 -0400853 /* Drop everything that cannot be inherited across execve. */
854 minijail_preexec(j);
855 /* Jail this process and its descendants... */
856 minijail_enter(j);
Elly Jonescd7a9042011-07-22 13:56:51 -0400857
Elly Jonese1749eb2011-10-07 13:54:59 -0400858 if (pidns) {
Elly Jonesdd3e8512012-01-23 15:13:38 -0500859 /*
860 * pid namespace: this process will become init inside the new
Elly Jonese1749eb2011-10-07 13:54:59 -0400861 * namespace, so fork off a child to actually run the program
862 * (we don't want all programs we might exec to have to know
863 * how to be init).
864 */
865 child_pid = fork();
866 if (child_pid < 0)
867 _exit(child_pid);
868 else if (child_pid > 0)
869 init(child_pid); /* never returns */
870 }
Elly Jonescd7a9042011-07-22 13:56:51 -0400871
Elly Jonesdd3e8512012-01-23 15:13:38 -0500872 /*
873 * If we aren't pid-namespaced:
Elly Jonese1749eb2011-10-07 13:54:59 -0400874 * calling process
875 * -> execve()-ing process
876 * If we are:
877 * calling process
878 * -> init()-ing process
879 * -> execve()-ing process
880 */
881 _exit(execve(filename, argv, environ));
Elly Jonescd7a9042011-07-22 13:56:51 -0400882}
883
Will Drewry6ac91122011-10-21 16:38:58 -0500884int API minijail_kill(struct minijail *j)
Elly Jonese1749eb2011-10-07 13:54:59 -0400885{
886 int st;
887 if (kill(j->initpid, SIGTERM))
888 return -errno;
889 if (waitpid(j->initpid, &st, 0) < 0)
890 return -errno;
891 return st;
Elly Jonescd7a9042011-07-22 13:56:51 -0400892}
893
Will Drewry6ac91122011-10-21 16:38:58 -0500894int API minijail_wait(struct minijail *j)
Elly Jonese1749eb2011-10-07 13:54:59 -0400895{
896 int st;
897 if (waitpid(j->initpid, &st, 0) < 0)
898 return -errno;
899 if (!WIFEXITED(st))
900 return MINIJAIL_ERR_JAIL;
901 return WEXITSTATUS(st);
Elly Jonescd7a9042011-07-22 13:56:51 -0400902}
903
Will Drewry6ac91122011-10-21 16:38:58 -0500904void API minijail_destroy(struct minijail *j)
Elly Jonese1749eb2011-10-07 13:54:59 -0400905{
Jorge Lucangeli Obes524c0402012-01-17 11:30:23 -0800906 if (j->flags.seccomp_filter && j->filter_prog) {
907 free(j->filter_prog->filter);
908 free(j->filter_prog);
Elly Jonese1749eb2011-10-07 13:54:59 -0400909 }
Elly Jones51a5b6c2011-10-12 19:09:26 -0400910 while (j->bindings_head) {
911 struct binding *b = j->bindings_head;
912 j->bindings_head = j->bindings_head->next;
913 free(b->dest);
914 free(b->src);
915 free(b);
916 }
917 j->bindings_tail = NULL;
Elly Jonese1749eb2011-10-07 13:54:59 -0400918 if (j->user)
919 free(j->user);
Will Drewrybee7ba72011-10-21 20:47:01 -0500920 if (j->chrootdir)
921 free(j->chrootdir);
Elly Jonese1749eb2011-10-07 13:54:59 -0400922 free(j);
Elly Jonescd7a9042011-07-22 13:56:51 -0400923}