Merge from Chromium at DEPS revision r167172
This commit was generated by merge_to_master.py.
Change-Id: Ib8d56fd5ae39a2d7e8c91dcd76cc6d13f25f2aab
diff --git a/sandbox/linux/sandbox_linux.gypi b/sandbox/linux/sandbox_linux.gypi
new file mode 100644
index 0000000..c02cd31
--- /dev/null
+++ b/sandbox/linux/sandbox_linux.gypi
@@ -0,0 +1,145 @@
+# Copyright (c) 2012 The Chromium Authors. All rights reserved.
+# Use of this source code is governed by a BSD-style license that can be
+# found in the LICENSE file.
+
+{
+ 'targets': [
+ # We have two principal targets: sandbox and sandbox_linux_unittests
+ # All other targets are listed as dependencies.
+ # FIXME(jln): for historial reasons, sandbox_linux is the setuid sandbox
+ # and is its own target.
+ {
+ 'target_name': 'sandbox',
+ 'type': 'none',
+ 'dependencies': [
+ 'suid_sandbox_client',
+ ],
+ 'conditions': [
+ # Only compile in the seccomp mode 1 code for the flag combination
+ # where we support it.
+ [ 'OS=="linux" and (target_arch=="ia32" or target_arch=="x64") '
+ 'and toolkit_views==0 and selinux==0', {
+ 'dependencies': [
+ 'linux/seccomp-legacy/seccomp.gyp:seccomp_sandbox',
+ ],
+ }],
+ # Similarly, compile seccomp BPF when we support it
+ [ 'OS=="linux" and (target_arch=="ia32" or target_arch=="x64" '
+ 'or target_arch=="arm")', {
+ 'type': 'static_library',
+ 'dependencies': [
+ 'seccomp_bpf',
+ ],
+ }],
+ ],
+ },
+ {
+ 'target_name': 'sandbox_linux_unittests',
+ 'type': 'executable',
+ 'dependencies': [
+ 'sandbox',
+ '../testing/gtest.gyp:gtest',
+ ],
+ 'sources': [
+ 'tests/main.cc',
+ 'tests/unit_tests.cc',
+ 'tests/unit_tests.h',
+ 'suid/client/setuid_sandbox_client_unittest.cc',
+ ],
+ 'include_dirs': [
+ '../..',
+ ],
+ 'conditions': [
+ [ 'OS=="linux" and (target_arch=="ia32" or target_arch=="x64" '
+ 'or target_arch=="arm")', {
+ 'sources': [
+ 'seccomp-bpf/bpf_tests.h',
+ 'seccomp-bpf/codegen_unittest.cc',
+ 'seccomp-bpf/errorcode_unittest.cc',
+ 'seccomp-bpf/sandbox_bpf_unittest.cc',
+ 'seccomp-bpf/syscall_iterator_unittest.cc',
+ ],
+ }],
+ ],
+ },
+ {
+ 'target_name': 'seccomp_bpf',
+ 'type': 'static_library',
+ 'sources': [
+ 'seccomp-bpf/basicblock.cc',
+ 'seccomp-bpf/basicblock.h',
+ 'seccomp-bpf/codegen.cc',
+ 'seccomp-bpf/codegen.h',
+ 'seccomp-bpf/die.cc',
+ 'seccomp-bpf/die.h',
+ 'seccomp-bpf/errorcode.cc',
+ 'seccomp-bpf/errorcode.h',
+ 'seccomp-bpf/instruction.h',
+ 'seccomp-bpf/sandbox_bpf.cc',
+ 'seccomp-bpf/sandbox_bpf.h',
+ 'seccomp-bpf/syscall_iterator.cc',
+ 'seccomp-bpf/syscall_iterator.h',
+ 'seccomp-bpf/verifier.cc',
+ 'seccomp-bpf/verifier.h',
+ ],
+ 'dependencies': [
+ '../base/base.gyp:base',
+ ],
+ 'include_dirs': [
+ '../..',
+ ],
+ },
+ {
+ # The setuid sandbox, for Linux
+ 'target_name': 'chrome_sandbox',
+ 'type': 'executable',
+ 'sources': [
+ 'suid/common/sandbox.h',
+ 'suid/common/suid_unsafe_environment_variables.h',
+ 'suid/linux_util.c',
+ 'suid/linux_util.h',
+ 'suid/process_util.h',
+ 'suid/process_util_linux.c',
+ 'suid/sandbox.c',
+ ],
+ 'cflags': [
+ # For ULLONG_MAX
+ '-std=gnu99',
+ ],
+ 'include_dirs': [
+ '../..',
+ ],
+ },
+ {
+ 'target_name': 'libc_urandom_override',
+ 'type': 'static_library',
+ 'sources': [
+ 'services/libc_urandom_override.cc',
+ 'services/libc_urandom_override.h',
+ ],
+ 'dependencies': [
+ '../base/base.gyp:base',
+ ],
+ 'include_dirs': [
+ '..',
+ ],
+ },
+ {
+ 'target_name': 'suid_sandbox_client',
+ 'type': 'static_library',
+ 'sources': [
+ 'suid/common/sandbox.h',
+ 'suid/common/suid_unsafe_environment_variables.h',
+ 'suid/client/setuid_sandbox_client.cc',
+ 'suid/client/setuid_sandbox_client.h',
+ ],
+ 'dependencies': [
+ '../base/base.gyp:base',
+ ],
+ 'include_dirs': [
+ '..',
+ ],
+ },
+
+ ],
+}
diff --git a/sandbox/linux/seccomp-bpf/Makefile b/sandbox/linux/seccomp-bpf/Makefile
new file mode 100644
index 0000000..a697198
--- /dev/null
+++ b/sandbox/linux/seccomp-bpf/Makefile
@@ -0,0 +1,30 @@
+DEF_CFLAGS = -g -O3 -Wall -Werror -Wextra -Wno-missing-field-initializers -fPIC -I.
+DEF_CPPFLAGS = -D_GNU_SOURCE -DSECCOMP_BPF_STANDALONE -DSECCOMP_BPF_VALGRIND_HACKS -include valgrind/valgrind.h -iquote ../../..
+DEF_LDFLAGS = -g -lpthread
+DEPFLAGS = -MMD -MF .$@.d
+MODS := demo sandbox_bpf die codegen errorcode syscall_iterator util verifier
+OBJS64 := $(shell echo ${MODS} | xargs -n 1 | sed -e 's/$$/.o64/')
+OBJS32 := $(shell echo ${MODS} | xargs -n 1 | sed -e 's/$$/.o32/')
+ALL_OBJS = $(OBJS32) $(OBJS64)
+DEP_FILES = $(wildcard $(foreach f,$(ALL_OBJS),.$(f).d))
+
+.SUFFIXES: .o64 .o32
+
+all: demo32 demo64
+
+clean:
+ $(RM) demo32 demo64
+ $(RM) *.o *.o32 *.o64 .*.d
+ $(RM) core core.* vgcore vgcore.* strace.log*
+
+-include $(DEP_FILES)
+
+demo32: ${OBJS32}
+ ${CXX} -m32 -o $@ $+ ${DEF_LDFLAGS} ${LDFLAGS}
+demo64: ${OBJS64}
+ ${CXX} -m64 -o $@ $+ ${DEF_LDFLAGS} ${LDFLAGS}
+
+.cc.o32:
+ ${CXX} -m32 ${DEF_CFLAGS} ${DEF_CPPFLAGS} ${CFLAGS} ${CPPFLAGS} ${DEPFLAGS} -c -o $@ $<
+.cc.o64:
+ ${CXX} -m64 ${DEF_CFLAGS} ${DEF_CPPFLAGS} ${CFLAGS} ${CPPFLAGS} ${DEPFLAGS} -c -o $@ $<
diff --git a/sandbox/linux/seccomp-bpf/basicblock.cc b/sandbox/linux/seccomp-bpf/basicblock.cc
new file mode 100644
index 0000000..bf27c58
--- /dev/null
+++ b/sandbox/linux/seccomp-bpf/basicblock.cc
@@ -0,0 +1,16 @@
+// Copyright (c) 2012 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "sandbox/linux/seccomp-bpf/basicblock.h"
+
+
+namespace playground2 {
+
+BasicBlock::BasicBlock() {
+}
+
+BasicBlock::~BasicBlock() {
+}
+
+} // namespace
diff --git a/sandbox/linux/seccomp-bpf/basicblock.h b/sandbox/linux/seccomp-bpf/basicblock.h
new file mode 100644
index 0000000..1782a80
--- /dev/null
+++ b/sandbox/linux/seccomp-bpf/basicblock.h
@@ -0,0 +1,51 @@
+// Copyright (c) 2012 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#ifndef SANDBOX_LINUX_SECCOMP_BPF_BASICBLOCK_H__
+#define SANDBOX_LINUX_SECCOMP_BPF_BASICBLOCK_H__
+
+#include <vector>
+
+#include "sandbox/linux/seccomp-bpf/instruction.h"
+
+
+namespace playground2 {
+
+struct BasicBlock {
+ BasicBlock();
+ ~BasicBlock();
+
+ // Our implementation of the code generator uses a "Less" operator to
+ // identify common sequences of basic blocks. This would normally be
+ // really easy to do, but STL requires us to wrap the comparator into
+ // a class. We begrudgingly add some code here that provides this wrapping.
+ template<class T> class Less {
+ public:
+ Less(const T& data, int (*cmp)(const BasicBlock *, const BasicBlock *,
+ const T& data))
+ : data_(data),
+ cmp_(cmp) {
+ }
+
+ bool operator() (const BasicBlock *a, const BasicBlock *b) const {
+ return cmp_(a, b, data_) < 0;
+ }
+
+ private:
+ const T& data_;
+ int (*cmp_)(const BasicBlock *, const BasicBlock *, const T&);
+ };
+
+ // Basic blocks are essentially nothing more than a set of instructions.
+ std::vector<Instruction *> instructions;
+
+ // In order to compute relative branch offsets we need to keep track of
+ // how far our block is away from the very last basic block. The "offset_"
+ // is measured in number of BPF instructions.
+ int offset;
+};
+
+} // namespace playground2
+
+#endif // SANDBOX_LINUX_SECCOMP_BPF_BASICBLOCK_H__
diff --git a/sandbox/linux/seccomp-bpf/bpf_tests.h b/sandbox/linux/seccomp-bpf/bpf_tests.h
new file mode 100644
index 0000000..8da25f9
--- /dev/null
+++ b/sandbox/linux/seccomp-bpf/bpf_tests.h
@@ -0,0 +1,94 @@
+// Copyright (c) 2012 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#ifndef SANDBOX_LINUX_SECCOMP_BPF_BPF_TESTS_H__
+#define SANDBOX_LINUX_SECCOMP_BPF_BPF_TESTS_H__
+
+#include "sandbox/linux/tests/unit_tests.h"
+#include "sandbox/linux/seccomp-bpf/sandbox_bpf.h"
+
+
+namespace sandbox {
+
+// BPF_TEST() is a special version of SANDBOX_TEST(). It turns into a no-op,
+// if the host does not have kernel support for running BPF filters.
+// Also, it takes advantage of the Die class to avoid calling LOG(FATAL), from
+// inside our tests, as we don't need or even want all the error handling that
+// LOG(FATAL) would do.
+// BPF_TEST() takes a C++ data type as an optional fourth parameter. If
+// present, this sets up a variable that can be accessed as "BPF_AUX". This
+// variable will be passed as an argument to the "policy" function. Policies
+// would typically use it as an argument to Sandbox::Trap(), if they want to
+// communicate data between the BPF_TEST() and a Trap() function.
+#define BPF_TEST(test_case_name, test_name, policy, aux...) \
+ void BPF_TEST_##test_name(sandbox::BpfTests<aux>::AuxType& BPF_AUX); \
+ TEST(test_case_name, test_name) { \
+ sandbox::BpfTests<aux>::TestArgs arg(BPF_TEST_##test_name, policy); \
+ sandbox::BpfTests<aux>::RunTestInProcess( \
+ sandbox::BpfTests<aux>::TestWrapper, &arg);\
+ } \
+ void BPF_TEST_##test_name(sandbox::BpfTests<aux>::AuxType& BPF_AUX)
+
+// Assertions are handled exactly the same as with a normal SANDBOX_TEST()
+#define BPF_ASSERT SANDBOX_ASSERT
+
+
+// The "Aux" type is optional. We use an "empty" type by default, so that if
+// the caller doesn't provide any type, all the BPF_AUX related data compiles
+// to nothing.
+template<class Aux = int[0]>
+class BpfTests : public UnitTests {
+ public:
+ typedef Aux AuxType;
+
+ class TestArgs {
+ public:
+ TestArgs(void (*t)(AuxType&), playground2::Sandbox::EvaluateSyscall p)
+ : test_(t),
+ policy_(p),
+ aux_() {
+ }
+
+ void (*test() const)(AuxType&) { return test_; }
+ playground2::Sandbox::EvaluateSyscall policy() const { return policy_; }
+
+ private:
+ friend class BpfTests;
+
+ void (*test_)(AuxType&);
+ playground2::Sandbox::EvaluateSyscall policy_;
+ AuxType aux_;
+ };
+
+ static void TestWrapper(void *void_arg) {
+ TestArgs *arg = reinterpret_cast<TestArgs *>(void_arg);
+ playground2::Die::EnableSimpleExit();
+ if (playground2::Sandbox::supportsSeccompSandbox(-1) ==
+ playground2::Sandbox::STATUS_AVAILABLE) {
+ // Ensure the the sandbox is actually available at this time
+ int proc_fd;
+ BPF_ASSERT((proc_fd = open("/proc", O_RDONLY|O_DIRECTORY)) >= 0);
+ BPF_ASSERT(playground2::Sandbox::supportsSeccompSandbox(proc_fd) ==
+ playground2::Sandbox::STATUS_AVAILABLE);
+
+ // Initialize and then start the sandbox with our custom policy
+ playground2::Sandbox::setProcFd(proc_fd);
+ playground2::Sandbox::setSandboxPolicy(arg->policy(), &arg->aux_);
+ playground2::Sandbox::startSandbox();
+
+ arg->test()(arg->aux_);
+ } else {
+ // TODO(markus): (crbug.com/141545) Call the compiler and verify the
+ // policy. That's the least we can do, if we don't have kernel support.
+ playground2::Sandbox::setSandboxPolicy(arg->policy(), NULL);
+ }
+ }
+
+ private:
+ DISALLOW_IMPLICIT_CONSTRUCTORS(BpfTests);
+};
+
+} // namespace
+
+#endif // SANDBOX_LINUX_SECCOMP_BPF_BPF_TESTS_H__
diff --git a/sandbox/linux/seccomp-bpf/codegen.cc b/sandbox/linux/seccomp-bpf/codegen.cc
new file mode 100644
index 0000000..8b36315
--- /dev/null
+++ b/sandbox/linux/seccomp-bpf/codegen.cc
@@ -0,0 +1,657 @@
+// Copyright (c) 2012 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "sandbox/linux/seccomp-bpf/codegen.h"
+
+
+namespace playground2 {
+
+CodeGen::CodeGen()
+ : compiled_(false) {
+}
+
+CodeGen::~CodeGen() {
+ for (Instructions::iterator iter = instructions_.begin();
+ iter != instructions_.end();
+ ++iter) {
+ delete *iter;
+ }
+ for (BasicBlocks::iterator iter = basic_blocks_.begin();
+ iter != basic_blocks_.end();
+ ++iter) {
+ delete *iter;
+ }
+}
+
+void CodeGen::PrintProgram(const Sandbox::Program& program) {
+ for (Sandbox::Program::const_iterator iter = program.begin();
+ iter != program.end();
+ ++iter) {
+ int ip = (int)(iter - program.begin());
+ fprintf(stderr, "%3d) ", ip);
+ switch (BPF_CLASS(iter->code)) {
+ case BPF_LD:
+ if (iter->code == BPF_LD+BPF_W+BPF_ABS) {
+ fprintf(stderr, "LOAD %d\n", (int)iter->k);
+ } else {
+ fprintf(stderr, "LOAD ???\n");
+ }
+ break;
+ case BPF_JMP:
+ if (BPF_OP(iter->code) == BPF_JA) {
+ fprintf(stderr, "JMP %d\n", ip + iter->k + 1);
+ } else {
+ fprintf(stderr, "if A %s 0x%x; then JMP %d else JMP %d\n",
+ BPF_OP(iter->code) == BPF_JSET ? "&" :
+ BPF_OP(iter->code) == BPF_JEQ ? "==" :
+ BPF_OP(iter->code) == BPF_JGE ? ">=" :
+ BPF_OP(iter->code) == BPF_JGT ? ">" : "???",
+ (int)iter->k,
+ ip + iter->jt + 1, ip + iter->jf + 1);
+ }
+ break;
+ case BPF_RET:
+ fprintf(stderr, "RET 0x%x\n", iter->k);
+ break;
+ default:
+ fprintf(stderr, "???\n");
+ break;
+ }
+ }
+ return;
+}
+
+Instruction *CodeGen::MakeInstruction(uint16_t code, uint32_t k,
+ Instruction *next) {
+ // We can handle non-jumping instructions and "always" jumps. Both of
+ // them are followed by exactly one "next" instruction.
+ // We allow callers to defer specifying "next", but then they must call
+ // "joinInstructions" later.
+ if (BPF_CLASS(code) == BPF_JMP && BPF_OP(code) != BPF_JA) {
+ SANDBOX_DIE("Must provide both \"true\" and \"false\" branch "
+ "for a BPF_JMP");
+ }
+ if (next && BPF_CLASS(code) == BPF_RET) {
+ SANDBOX_DIE("Cannot append instructions after a return statement");
+ }
+ if (BPF_CLASS(code) == BPF_JMP) {
+ // "Always" jumps use the "true" branch target, only.
+ Instruction *insn = new Instruction(code, 0, next, NULL);
+ instructions_.push_back(insn);
+ return insn;
+ } else {
+ // Non-jumping instructions do not use any of the branch targets.
+ Instruction *insn = new Instruction(code, k, next);
+ instructions_.push_back(insn);
+ return insn;
+ }
+}
+
+Instruction *CodeGen::MakeInstruction(uint16_t code, const ErrorCode& err) {
+ if (BPF_CLASS(code) != BPF_RET) {
+ SANDBOX_DIE("ErrorCodes can only be used in return expressions");
+ }
+ if (err.error_type_ != ErrorCode::ET_SIMPLE &&
+ err.error_type_ != ErrorCode::ET_TRAP) {
+ SANDBOX_DIE("ErrorCode is not suitable for returning from a BPF program");
+ }
+ return MakeInstruction(code, err.err_);
+}
+
+Instruction *CodeGen::MakeInstruction(uint16_t code, uint32_t k,
+ Instruction *jt, Instruction *jf) {
+ // We can handle all conditional jumps. They are followed by both a
+ // "true" and a "false" branch.
+ if (BPF_CLASS(code) != BPF_JMP || BPF_OP(code) == BPF_JA) {
+ SANDBOX_DIE("Expected a BPF_JMP instruction");
+ }
+ if (!jt && !jf) {
+ // We allow callers to defer specifying exactly one of the branch
+ // targets. It must then be set later by calling "JoinInstructions".
+ SANDBOX_DIE("Branches must jump to a valid instruction");
+ }
+ Instruction *insn = new Instruction(code, k, jt, jf);
+ instructions_.push_back(insn);
+ return insn;
+}
+
+void CodeGen::JoinInstructions(Instruction *head, Instruction *tail) {
+ // Merge two instructions, or set the branch target for an "always" jump.
+ // This function should be called, if the caller didn't initially provide
+ // a value for "next" when creating the instruction.
+ if (BPF_CLASS(head->code) == BPF_JMP) {
+ if (BPF_OP(head->code) == BPF_JA) {
+ if (head->jt_ptr) {
+ SANDBOX_DIE("Cannot append instructions in the middle of a sequence");
+ }
+ head->jt_ptr = tail;
+ } else {
+ if (!head->jt_ptr && head->jf_ptr) {
+ head->jt_ptr = tail;
+ } else if (!head->jf_ptr && head->jt_ptr) {
+ head->jf_ptr = tail;
+ } else {
+ SANDBOX_DIE("Cannot append instructions after a jump");
+ }
+ }
+ } else if (BPF_CLASS(head->code) == BPF_RET) {
+ SANDBOX_DIE("Cannot append instructions after a return statement");
+ } else if (head->next) {
+ SANDBOX_DIE("Cannot append instructions in the middle of a sequence");
+ } else {
+ head->next = tail;
+ }
+ return;
+}
+
+void CodeGen::FindBranchTargets(const Instruction& instructions,
+ BranchTargets *branch_targets) {
+ // Follow all possible paths through the "instructions" graph and compute
+ // a list of branch targets. This will later be needed to compute the
+ // boundaries of basic blocks.
+ // We maintain a set of all instructions that we have previously seen. This
+ // set ultimately converges on all instructions in the program.
+ std::set<const Instruction *> seen_instructions;
+ Instructions stack;
+ for (const Instruction *insn = &instructions; insn; ) {
+ seen_instructions.insert(insn);
+ if (BPF_CLASS(insn->code) == BPF_JMP) {
+ // Found a jump. Increase count of incoming edges for each of the jump
+ // targets.
+ ++(*branch_targets)[insn->jt_ptr];
+ if (BPF_OP(insn->code) != BPF_JA) {
+ ++(*branch_targets)[insn->jf_ptr];
+ stack.push_back(const_cast<Instruction *>(insn));
+ }
+ // Start a recursive decent for depth-first traversal.
+ if (seen_instructions.find(insn->jt_ptr) == seen_instructions.end()) {
+ // We haven't seen the "true" branch yet. Traverse it now. We have
+ // already remembered the "false" branch on the stack and will
+ // traverse it later.
+ insn = insn->jt_ptr;
+ continue;
+ } else {
+ // Now try traversing the "false" branch.
+ insn = NULL;
+ }
+ } else {
+ // This is a non-jump instruction, just continue to the next instruction
+ // (if any). It's OK if "insn" becomes NULL when reaching a return
+ // instruction.
+ if (!insn->next != (BPF_CLASS(insn->code) == BPF_RET)) {
+ SANDBOX_DIE("Internal compiler error; return instruction must be at "
+ "the end of the BPF program");
+ }
+ if (seen_instructions.find(insn->next) == seen_instructions.end()) {
+ insn = insn->next;
+ } else {
+ // We have seen this instruction before. That could happen if it is
+ // a branch target. No need to continue processing.
+ insn = NULL;
+ }
+ }
+ while (!insn && !stack.empty()) {
+ // We are done processing all the way to a leaf node, backtrack up the
+ // stack to any branches that we haven't processed yet. By definition,
+ // this has to be a "false" branch, as we always process the "true"
+ // branches right away.
+ insn = stack.back();
+ stack.pop_back();
+ if (seen_instructions.find(insn->jf_ptr) == seen_instructions.end()) {
+ // We haven't seen the "false" branch yet. So, that's where we'll
+ // go now.
+ insn = insn->jf_ptr;
+ } else {
+ // We have seen both the "true" and the "false" branch, continue
+ // up the stack.
+ if (seen_instructions.find(insn->jt_ptr) == seen_instructions.end()) {
+ SANDBOX_DIE("Internal compiler error; cannot find all "
+ "branch targets");
+ }
+ insn = NULL;
+ }
+ }
+ }
+ return;
+}
+
+BasicBlock *CodeGen::MakeBasicBlock(Instruction *head,
+ Instruction *tail) {
+ // Iterate over all the instructions between "head" and "tail" and
+ // insert them into a new basic block.
+ BasicBlock *bb = new BasicBlock;
+ for (;; head = head->next) {
+ bb->instructions.push_back(head);
+ if (head == tail) {
+ break;
+ }
+ if (BPF_CLASS(head->code) == BPF_JMP) {
+ SANDBOX_DIE("Found a jump inside of a basic block");
+ }
+ }
+ basic_blocks_.push_back(bb);
+ return bb;
+}
+
+void CodeGen::AddBasicBlock(Instruction *head,
+ Instruction *tail,
+ const BranchTargets& branch_targets,
+ TargetsToBlocks *basic_blocks,
+ BasicBlock **firstBlock) {
+ // Add a new basic block to "basic_blocks". Also set "firstBlock", if it
+ // has not been set before.
+ BranchTargets::const_iterator iter = branch_targets.find(head);
+ if ((iter == branch_targets.end()) != !*firstBlock ||
+ !*firstBlock != basic_blocks->empty()) {
+ SANDBOX_DIE("Only the very first basic block should have no "
+ "incoming jumps");
+ }
+ BasicBlock *bb = MakeBasicBlock(head, tail);
+ if (!*firstBlock) {
+ *firstBlock = bb;
+ }
+ (*basic_blocks)[head] = bb;
+ return;
+}
+
+BasicBlock *CodeGen::CutGraphIntoBasicBlocks(
+ Instruction *instructions, const BranchTargets& branch_targets,
+ TargetsToBlocks *basic_blocks) {
+ // Textbook implementation of a basic block generator. All basic blocks
+ // start with a branch target and end with either a return statement or
+ // a jump (or are followed by an instruction that forms the beginning of a
+ // new block). Both conditional and "always" jumps are supported.
+ BasicBlock *first_block = NULL;
+ std::set<const Instruction *> seen_instructions;
+ Instructions stack;
+ Instruction *tail = NULL;
+ Instruction *head = instructions;
+ for (Instruction *insn = head; insn; ) {
+ if (seen_instructions.find(insn) != seen_instructions.end()) {
+ // We somehow went in a circle. This should never be possible. Not even
+ // cyclic graphs are supposed to confuse us this much.
+ SANDBOX_DIE("Internal compiler error; cannot compute basic blocks");
+ }
+ seen_instructions.insert(insn);
+ if (tail && branch_targets.find(insn) != branch_targets.end()) {
+ // We reached a branch target. Start a new basic block (this means,
+ // flushing the previous basic block first).
+ AddBasicBlock(head, tail, branch_targets, basic_blocks, &first_block);
+ head = insn;
+ }
+ if (BPF_CLASS(insn->code) == BPF_JMP) {
+ // We reached a jump instruction, this completes our current basic
+ // block. Flush it and continue by traversing both the true and the
+ // false branch of the jump. We need to maintain a stack to do so.
+ AddBasicBlock(head, insn, branch_targets, basic_blocks, &first_block);
+ if (BPF_OP(insn->code) != BPF_JA) {
+ stack.push_back(insn->jf_ptr);
+ }
+ insn = insn->jt_ptr;
+
+ // If we are jumping to an instruction that we have previously
+ // processed, we are done with this branch. Continue by backtracking
+ // up the stack.
+ while (seen_instructions.find(insn) != seen_instructions.end()) {
+ backtracking:
+ if (stack.empty()) {
+ // We successfully traversed all reachable instructions.
+ return first_block;
+ } else {
+ // Going up the stack.
+ insn = stack.back();
+ stack.pop_back();
+ }
+ }
+ // Starting a new basic block.
+ tail = NULL;
+ head = insn;
+ } else {
+ // We found a non-jumping instruction, append it to current basic
+ // block.
+ tail = insn;
+ insn = insn->next;
+ if (!insn) {
+ // We reached a return statement, flush the current basic block and
+ // backtrack up the stack.
+ AddBasicBlock(head, tail, branch_targets, basic_blocks, &first_block);
+ goto backtracking;
+ }
+ }
+ }
+ return first_block;
+}
+
+// We define a comparator that inspects the sequence of instructions in our
+// basic block and any blocks referenced by this block. This function can be
+// used in a "less" comparator for the purpose of storing pointers to basic
+// blocks in STL containers; this gives an easy option to use STL to find
+// shared tail sequences of basic blocks.
+static int PointerCompare(const BasicBlock *block1, const BasicBlock *block2,
+ const TargetsToBlocks& blocks) {
+ // Return <0, 0, or >0 depending on the ordering of "block1" and "block2".
+ // If we are looking at the exact same block, this is trivial and we don't
+ // need to do a full comparison.
+ if (block1 == block2) {
+ return 0;
+ }
+
+ // We compare the sequence of instructions in both basic blocks.
+ const Instructions& insns1 = block1->instructions;
+ const Instructions& insns2 = block2->instructions;
+ Instructions::const_iterator iter1 = insns1.begin();
+ Instructions::const_iterator iter2 = insns2.begin();
+ for (;; ++iter1, ++iter2) {
+ // If we have reached the end of the sequence of instructions in one or
+ // both basic blocks, we know the relative ordering between the two blocks
+ // and can return.
+ if (iter1 == insns1.end()) {
+ return iter2 == insns2.end() ? 0 : -1;
+ } else if (iter2 == insns2.end()) {
+ return 1;
+ }
+
+ // Compare the individual fields for both instructions.
+ const Instruction& insn1 = **iter1;
+ const Instruction& insn2 = **iter2;
+ if (insn1.code == insn2.code) {
+ if (insn1.k == insn2.k) {
+ // Only conditional jump instructions use the jt_ptr and jf_ptr
+ // fields.
+ if (BPF_CLASS(insn1.code) == BPF_JMP) {
+ if (BPF_OP(insn1.code) != BPF_JA) {
+ // Recursively compare the "true" and "false" branches.
+ // A well-formed BPF program can't have any cycles, so we know
+ // that our recursive algorithm will ultimately terminate.
+ // In the unlikely event that the programmer made a mistake and
+ // went out of the way to give us a cyclic program, we will crash
+ // with a stack overflow. We are OK with that.
+ int c = PointerCompare(blocks.find(insn1.jt_ptr)->second,
+ blocks.find(insn2.jt_ptr)->second,
+ blocks);
+ if (c == 0) {
+ c = PointerCompare(blocks.find(insn1.jf_ptr)->second,
+ blocks.find(insn2.jf_ptr)->second,
+ blocks);
+ if (c == 0) {
+ continue;
+ } else {
+ return c;
+ }
+ } else {
+ return c;
+ }
+ } else {
+ int c = PointerCompare(blocks.find(insn1.jt_ptr)->second,
+ blocks.find(insn2.jt_ptr)->second,
+ blocks);
+ if (c == 0) {
+ continue;
+ } else {
+ return c;
+ }
+ }
+ } else {
+ continue;
+ }
+ } else {
+ return insn1.k - insn2.k;
+ }
+ } else {
+ return insn1.code - insn2.code;
+ }
+ }
+}
+
+void CodeGen::MergeTails(TargetsToBlocks *blocks) {
+ // We enter all of our basic blocks into a set using the BasicBlock::Less()
+ // comparator. This naturally results in blocks with identical tails of
+ // instructions to map to the same entry in the set. Whenever we discover
+ // that a particular chain of instructions is already in the set, we merge
+ // the basic blocks and update the pointer in the "blocks" map.
+ // Returns the number of unique basic blocks.
+ // N.B. We don't merge instructions on a granularity that is finer than
+ // a basic block. In practice, this is sufficiently rare that we don't
+ // incur a big cost.
+ // Similarly, we currently don't merge anything other than tails. In
+ // the future, we might decide to revisit this decision and attempt to
+ // merge arbitrary sub-sequences of instructions.
+ BasicBlock::Less<TargetsToBlocks> less(*blocks, PointerCompare);
+ typedef std::set<BasicBlock *, BasicBlock::Less<TargetsToBlocks> > Set;
+ Set seen_basic_blocks(less);
+ for (TargetsToBlocks::iterator iter = blocks->begin();
+ iter != blocks->end();
+ ++iter) {
+ BasicBlock *bb = iter->second;
+ Set::const_iterator entry = seen_basic_blocks.find(bb);
+ if (entry == seen_basic_blocks.end()) {
+ // This is the first time we see this particular sequence of
+ // instructions. Enter the basic block into the set of known
+ // basic blocks.
+ seen_basic_blocks.insert(bb);
+ } else {
+ // We have previously seen another basic block that defines the same
+ // sequence of instructions. Merge the two blocks and update the
+ // pointer in the "blocks" map.
+ iter->second = *entry;
+ }
+ }
+}
+
+void CodeGen::ComputeIncomingBranches(BasicBlock *block,
+ const TargetsToBlocks& targets_to_blocks,
+ IncomingBranches *incoming_branches) {
+ // We increment the number of incoming branches each time we encounter a
+ // basic block. But we only traverse recursively the very first time we
+ // encounter a new block. This is necessary to make topological sorting
+ // work correctly.
+ if (++(*incoming_branches)[block] == 1) {
+ Instruction *last_insn = block->instructions.back();
+ if (BPF_CLASS(last_insn->code) == BPF_JMP) {
+ ComputeIncomingBranches(
+ targets_to_blocks.find(last_insn->jt_ptr)->second,
+ targets_to_blocks, incoming_branches);
+ if (BPF_OP(last_insn->code) != BPF_JA) {
+ ComputeIncomingBranches(
+ targets_to_blocks.find(last_insn->jf_ptr)->second,
+ targets_to_blocks, incoming_branches);
+ }
+ } else if (BPF_CLASS(last_insn->code) != BPF_RET) {
+ ComputeIncomingBranches(targets_to_blocks.find(last_insn->next)->second,
+ targets_to_blocks, incoming_branches);
+ }
+ }
+}
+
+void CodeGen::TopoSortBasicBlocks(BasicBlock *first_block,
+ const TargetsToBlocks& blocks,
+ BasicBlocks *basic_blocks) {
+ // Textbook implementation of a toposort. We keep looking for basic blocks
+ // that don't have any incoming branches (initially, this is just the
+ // "first_block") and add them to the topologically sorted list of
+ // "basic_blocks". As we do so, we remove outgoing branches. This potentially
+ // ends up making our descendants eligible for the sorted list. The
+ // sorting algorithm terminates when there are no more basic blocks that have
+ // no incoming branches. If we didn't move all blocks from the set of
+ // "unordered_blocks" to the sorted list of "basic_blocks", there must have
+ // been a cyclic dependency. This should never happen in a BPF program, as
+ // well-formed BPF programs only ever have forward branches.
+ IncomingBranches unordered_blocks;
+ ComputeIncomingBranches(first_block, blocks, &unordered_blocks);
+
+ std::set<BasicBlock *> heads;
+ for (;;) {
+ // Move block from "unordered_blocks" to "basic_blocks".
+ basic_blocks->push_back(first_block);
+
+ // Inspect last instruction in the basic block. This is typically either a
+ // jump or a return statement. But it could also be a "normal" instruction
+ // that is followed by a jump target.
+ Instruction *last_insn = first_block->instructions.back();
+ if (BPF_CLASS(last_insn->code) == BPF_JMP) {
+ // Remove outgoing branches. This might end up moving our descendants
+ // into set of "head" nodes that no longer have any incoming branches.
+ TargetsToBlocks::const_iterator iter;
+ if (BPF_OP(last_insn->code) != BPF_JA) {
+ iter = blocks.find(last_insn->jf_ptr);
+ if (!--unordered_blocks[iter->second]) {
+ heads.insert(iter->second);
+ }
+ }
+ iter = blocks.find(last_insn->jt_ptr);
+ if (!--unordered_blocks[iter->second]) {
+ first_block = iter->second;
+ continue;
+ }
+ } else if (BPF_CLASS(last_insn->code) != BPF_RET) {
+ // We encountered an instruction that doesn't change code flow. Try to
+ // pick the next "first_block" from "last_insn->next", if possible.
+ TargetsToBlocks::const_iterator iter;
+ iter = blocks.find(last_insn->next);
+ if (!--unordered_blocks[iter->second]) {
+ first_block = iter->second;
+ continue;
+ } else {
+ // Our basic block is supposed to be followed by "last_insn->next",
+ // but dependencies prevent this from happening. Insert a BPF_JA
+ // instruction to correct the code flow.
+ Instruction *ja = MakeInstruction(BPF_JMP+BPF_JA, 0, last_insn->next);
+ first_block->instructions.push_back(ja);
+ last_insn->next = ja;
+ }
+ }
+ if (heads.empty()) {
+ if (unordered_blocks.size() != basic_blocks->size()) {
+ SANDBOX_DIE("Internal compiler error; cyclic graph detected");
+ }
+ return;
+ }
+ // Proceed by picking an arbitrary node from the set of basic blocks that
+ // do not have any incoming branches.
+ first_block = *heads.begin();
+ heads.erase(heads.begin());
+ }
+}
+
+void CodeGen::ComputeRelativeJumps(BasicBlocks *basic_blocks,
+ const TargetsToBlocks& targets_to_blocks) {
+ // While we previously used pointers in jt_ptr and jf_ptr to link jump
+ // instructions to their targets, we now convert these jumps to relative
+ // jumps that are suitable for loading the BPF program into the kernel.
+ int offset = 0;
+
+ // Since we just completed a toposort, all jump targets are guaranteed to
+ // go forward. This means, iterating over the basic blocks in reverse makes
+ // it trivial to compute the correct offsets.
+ BasicBlock *bb = NULL;
+ BasicBlock *last_bb = NULL;
+ for (BasicBlocks::reverse_iterator iter = basic_blocks->rbegin();
+ iter != basic_blocks->rend();
+ ++iter) {
+ last_bb = bb;
+ bb = *iter;
+ Instruction *insn = bb->instructions.back();
+ if (BPF_CLASS(insn->code) == BPF_JMP) {
+ // Basic block ended in a jump instruction. We can now compute the
+ // appropriate offsets.
+ if (BPF_OP(insn->code) == BPF_JA) {
+ // "Always" jumps use the 32bit "k" field for the offset, instead
+ // of the 8bit "jt" and "jf" fields.
+ int jmp =
+ offset - targets_to_blocks.find(insn->jt_ptr)->second->offset;
+ insn->k = jmp;
+ insn->jt = insn->jf = 0;
+ } else {
+ // The offset computations for conditional jumps are just the same
+ // as for "always" jumps.
+ int jt = offset-targets_to_blocks.find(insn->jt_ptr)->second->offset;
+ int jf = offset-targets_to_blocks.find(insn->jf_ptr)->second->offset;
+
+ // There is an added complication, because conditional relative jumps
+ // can only jump at most 255 instructions forward. If we have to jump
+ // further, insert an extra "always" jump.
+ Instructions::size_type jmp = bb->instructions.size();
+ if (jt > 255 || (jt == 255 && jf > 255)) {
+ Instruction *ja = MakeInstruction(BPF_JMP+BPF_JA, 0, insn->jt_ptr);
+ bb->instructions.push_back(ja);
+ ja->k = jt;
+ ja->jt = ja->jf = 0;
+
+ // The newly inserted "always" jump, of course, requires us to adjust
+ // the jump targets in the original conditional jump.
+ jt = 0;
+ ++jf;
+ }
+ if (jf > 255) {
+ Instruction *ja = MakeInstruction(BPF_JMP+BPF_JA, 0, insn->jf_ptr);
+ bb->instructions.insert(bb->instructions.begin() + jmp, ja);
+ ja->k = jf;
+ ja->jt = ja->jf = 0;
+
+ // Again, we have to adjust the jump targets in the original
+ // conditional jump.
+ ++jt;
+ jf = 0;
+ }
+
+ // Now we can finally set the relative jump targets in the conditional
+ // jump instruction. Afterwards, we must no longer access the jt_ptr
+ // and jf_ptr fields.
+ insn->jt = jt;
+ insn->jf = jf;
+ }
+ } else if (BPF_CLASS(insn->code) != BPF_RET &&
+ targets_to_blocks.find(insn->next)->second != last_bb) {
+ SANDBOX_DIE("Internal compiler error; invalid basic block encountered");
+ }
+
+ // Proceed to next basic block.
+ offset += bb->instructions.size();
+ bb->offset = offset;
+ }
+ return;
+}
+
+void CodeGen::ConcatenateBasicBlocks(const BasicBlocks& basic_blocks,
+ Sandbox::Program *program) {
+ // Our basic blocks have been sorted and relative jump offsets have been
+ // computed. The last remaining step is for all the instructions in our
+ // basic blocks to be concatenated into a BPF program.
+ program->clear();
+ for (BasicBlocks::const_iterator bb_iter = basic_blocks.begin();
+ bb_iter != basic_blocks.end();
+ ++bb_iter) {
+ const BasicBlock& bb = **bb_iter;
+ for (Instructions::const_iterator insn_iter = bb.instructions.begin();
+ insn_iter != bb.instructions.end();
+ ++insn_iter) {
+ const Instruction& insn = **insn_iter;
+ program->push_back(
+ (struct sock_filter) { insn.code, insn.jt, insn.jf, insn.k });
+ }
+ }
+ return;
+}
+
+void CodeGen::Compile(Instruction *instructions, Sandbox::Program *program) {
+ if (compiled_) {
+ SANDBOX_DIE("Cannot call Compile() multiple times. Create a new code "
+ "generator instead");
+ }
+ compiled_ = true;
+
+ BranchTargets branch_targets;
+ FindBranchTargets(*instructions, &branch_targets);
+ TargetsToBlocks all_blocks;
+ BasicBlock *first_block =
+ CutGraphIntoBasicBlocks(instructions, branch_targets, &all_blocks);
+ MergeTails(&all_blocks);
+ BasicBlocks basic_blocks;
+ TopoSortBasicBlocks(first_block, all_blocks, &basic_blocks);
+ ComputeRelativeJumps(&basic_blocks, all_blocks);
+ ConcatenateBasicBlocks(basic_blocks, program);
+ return;
+}
+
+} // namespace
diff --git a/sandbox/linux/seccomp-bpf/codegen.h b/sandbox/linux/seccomp-bpf/codegen.h
new file mode 100644
index 0000000..b7d1d39
--- /dev/null
+++ b/sandbox/linux/seccomp-bpf/codegen.h
@@ -0,0 +1,147 @@
+// Copyright (c) 2012 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#ifndef SANDBOX_LINUX_SECCOMP_BPF_CODEGEN_H__
+#define SANDBOX_LINUX_SECCOMP_BPF_CODEGEN_H__
+
+#include <map>
+#include <set>
+#include <vector>
+
+#include "sandbox/linux/seccomp-bpf/basicblock.h"
+#include "sandbox/linux/seccomp-bpf/instruction.h"
+#include "sandbox/linux/seccomp-bpf/sandbox_bpf.h"
+
+
+namespace playground2 {
+
+typedef std::vector<Instruction *> Instructions;
+typedef std::vector<BasicBlock *> BasicBlocks;
+typedef std::map<const Instruction *, int> BranchTargets;
+typedef std::map<const Instruction *, BasicBlock *> TargetsToBlocks;
+typedef std::map<const BasicBlock *, int> IncomingBranches;
+
+// The code generator instantiates a basic compiler that can convert a
+// graph of BPF instructions into a well-formed stream of BPF instructions.
+// Most notably, it ensures that jumps are always forward and don't exceed
+// the limit of 255 instructions imposed by the instruction set.
+//
+// Callers would typically create a new CodeGen object and then use it to
+// build a DAG of Instructions. They'll eventually call Compile() to convert
+// this DAG to a Sandbox::Program.
+//
+// Instructions can be chained at the time when they are created, or they
+// can be joined later by calling JoinInstructions().
+//
+// CodeGen gen;
+// Instruction *dag, *branch;
+// dag =
+// gen.MakeInstruction(BPF_LD+BPF_W+BPF_ABS,
+// offsetof(struct arch_seccomp_data, nr),
+// branch =
+// gen.MakeInstruction(BPF_JMP+BPF_EQ+BPF_K, __NR_getpid,
+// Trap(GetPidHandler, NULL), NULL);
+// gen.JoinInstructions(branch,
+// gen.MakeInstruction(BPF_RET+BPF_K, ErrorCode(ErrorCode::ERR_ALLOWED)));
+//
+// // Simplified code follows; in practice, it is important to avoid calling
+// // any C++ destructors after starting the sandbox.
+// Sandbox::Program program;
+// gen.Compile(dag, program);
+// const struct sock_fprog prog = {
+// static_cast<unsigned short>(program->size()), &program[0] };
+// prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog);
+//
+class CodeGen {
+ public:
+ CodeGen();
+ ~CodeGen();
+
+ // This is a helper method that can be used for debugging purposes. It is
+ // not normally called.
+ static void PrintProgram(const Sandbox::Program& program);
+
+ // Create a new instruction. Instructions form a DAG. The instruction objects
+ // are owned by the CodeGen object. They do not need to be explicitly
+ // deleted.
+ // For details on the possible parameters refer to <linux/filter.h>
+ Instruction *MakeInstruction(uint16_t code, uint32_t k,
+ Instruction *next = NULL);
+ Instruction *MakeInstruction(uint16_t code, const ErrorCode& err);
+ Instruction *MakeInstruction(uint16_t code, uint32_t k,
+ Instruction *jt, Instruction *jf);
+
+ // Join two (sequences of) instructions. This is useful, if the "next"
+ // parameter had not originally been given in the call to MakeInstruction(),
+ // or if a (conditional) jump still has an unsatisfied target.
+ void JoinInstructions(Instruction *head, Instruction *tail);
+
+ // Compiles the graph of instructions into a BPF program that can be passed
+ // to the kernel. Please note that this function modifies the graph in place
+ // and must therefore only be called once per graph.
+ void Compile(Instruction *instructions, Sandbox::Program *program);
+
+ private:
+ friend class CodeGenUnittestHelper;
+
+ // Find all the instructions that are the target of BPF_JMPs.
+ void FindBranchTargets(const Instruction& instructions,
+ BranchTargets *branch_targets);
+
+ // Combine instructions between "head" and "tail" into a new basic block.
+ // Basic blocks are defined as sequences of instructions whose only branch
+ // target is the very first instruction; furthermore, any BPF_JMP or BPF_RET
+ // instruction must be at the very end of the basic block.
+ BasicBlock *MakeBasicBlock(Instruction *head, Instruction *tail);
+
+ // Creates a basic block and adds it to "basic_blocks"; sets "first_block"
+ // if it is still NULL.
+ void AddBasicBlock(Instruction *head, Instruction *tail,
+ const BranchTargets& branch_targets,
+ TargetsToBlocks *basic_blocks, BasicBlock **first_block);
+
+ // Cuts the DAG of instructions into basic blocks.
+ BasicBlock *CutGraphIntoBasicBlocks(Instruction *instructions,
+ const BranchTargets& branch_targets,
+ TargetsToBlocks *blocks);
+
+ // Find common tail sequences of basic blocks and coalesce them.
+ void MergeTails(TargetsToBlocks *blocks);
+
+ // For each basic block, compute the number of incoming branches.
+ void ComputeIncomingBranches(BasicBlock *block,
+ const TargetsToBlocks& targets_to_blocks,
+ IncomingBranches *incoming_branches);
+
+ // Topologically sort the basic blocks so that all jumps are forward jumps.
+ // This is a requirement for any well-formed BPF program.
+ void TopoSortBasicBlocks(BasicBlock *first_block,
+ const TargetsToBlocks& blocks,
+ BasicBlocks *basic_blocks);
+
+ // Convert jt_ptr_ and jf_ptr_ fields in BPF_JMP instructions to valid
+ // jt_ and jf_ jump offsets. This can result in BPF_JA instructions being
+ // inserted, if we need to jump over more than 256 instructions.
+ void ComputeRelativeJumps(BasicBlocks *basic_blocks,
+ const TargetsToBlocks& targets_to_blocks);
+
+ // Concatenate instructions from all basic blocks into a BPF program that
+ // can be passed to the kernel.
+ void ConcatenateBasicBlocks(const BasicBlocks&, Sandbox::Program *program);
+
+ // We stick all instructions and basic blocks into pools that get destroyed
+ // when the CodeGen object is destroyed. This way, we neither need to worry
+ // about explicitly managing ownership, nor do we need to worry about using
+ // smart pointers in the presence of circular references.
+ Instructions instructions_;
+ BasicBlocks basic_blocks_;
+
+ // Compile() must only ever be called once as it makes destructive changes
+ // to the DAG.
+ bool compiled_;
+};
+
+} // namespace
+
+#endif // SANDBOX_LINUX_SECCOMP_BPF_CODEGEN_H__
diff --git a/sandbox/linux/seccomp-bpf/codegen_unittest.cc b/sandbox/linux/seccomp-bpf/codegen_unittest.cc
new file mode 100644
index 0000000..d24bcf2
--- /dev/null
+++ b/sandbox/linux/seccomp-bpf/codegen_unittest.cc
@@ -0,0 +1,445 @@
+// Copyright (c) 2012 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include <algorithm>
+#include <set>
+#include <vector>
+
+#include "sandbox/linux/seccomp-bpf/codegen.h"
+#include "sandbox/linux/seccomp-bpf/sandbox_bpf.h"
+#include "sandbox/linux/tests/unit_tests.h"
+
+namespace playground2 {
+
+class SandboxUnittestHelper : public Sandbox {
+ public:
+ typedef Sandbox::Program Program;
+};
+
+// We want to access some of the private methods in the code generator. We
+// do so by defining a "friend" that makes these methods public for us.
+class CodeGenUnittestHelper : public CodeGen {
+ public:
+ void FindBranchTargets(const Instruction& instructions,
+ BranchTargets *branch_targets) {
+ CodeGen::FindBranchTargets(instructions, branch_targets);
+ }
+
+ BasicBlock *CutGraphIntoBasicBlocks(Instruction *insns,
+ const BranchTargets& branch_targets,
+ TargetsToBlocks *blocks) {
+ return CodeGen::CutGraphIntoBasicBlocks(insns, branch_targets, blocks);
+ }
+
+ void MergeTails(TargetsToBlocks *blocks) {
+ CodeGen::MergeTails(blocks);
+ }
+};
+
+enum { NO_FLAGS = 0x0000,
+ HAS_MERGEABLE_TAILS = 0x0001,
+};
+
+Instruction *SampleProgramOneInstruction(CodeGen *codegen, int *flags) {
+ // Create the most basic valid BPF program:
+ // RET ERR_ALLOWED
+ *flags = NO_FLAGS;
+ return codegen->MakeInstruction(BPF_RET+BPF_K,
+ ErrorCode(ErrorCode::ERR_ALLOWED));
+}
+
+Instruction *SampleProgramSimpleBranch(CodeGen *codegen, int *flags) {
+ // Create a program with a single branch:
+ // JUMP if eq 42 then $0 else $1
+ // 0: RET EPERM
+ // 1: RET ERR_ALLOWED
+ *flags = NO_FLAGS;
+ return codegen->MakeInstruction(BPF_JMP+BPF_JEQ+BPF_K, 42,
+ codegen->MakeInstruction(BPF_RET+BPF_K,
+ ErrorCode(EPERM)),
+ codegen->MakeInstruction(BPF_RET+BPF_K,
+ ErrorCode(ErrorCode::ERR_ALLOWED)));
+}
+
+Instruction *SampleProgramAtypicalBranch(CodeGen *codegen, int *flags) {
+ // Create a program with a single branch:
+ // JUMP if eq 42 then $0 else $0
+ // 0: RET ERR_ALLOWED
+
+ // N.B.: As the instructions in both sides of the branch are already
+ // the same object, we do not actually have any "mergeable" branches.
+ // This needs to be reflected in our choice of "flags".
+ *flags = NO_FLAGS;
+
+ Instruction *ret =
+ codegen->MakeInstruction(BPF_RET+BPF_K,
+ ErrorCode(ErrorCode::ERR_ALLOWED));
+ return codegen->MakeInstruction(BPF_JMP+BPF_JEQ+BPF_K, 42, ret, ret);
+}
+
+Instruction *SampleProgramComplex(CodeGen *codegen, int *flags) {
+ // Creates a basic BPF program that we'll use to test some of the code:
+ // JUMP if eq 42 the $0 else $1 (insn6)
+ // 0: LD 23 (insn5)
+ // 1: JUMP if eq 42 then $2 else $4 (insn4)
+ // 2: JUMP to $3 (insn1)
+ // 3: LD 42 (insn0)
+ // RET ErrorCode(42) (insn2)
+ // 4: LD 42 (insn3)
+ // RET ErrorCode(42) (insn3+)
+ *flags = HAS_MERGEABLE_TAILS;
+
+ Instruction *insn0 = codegen->MakeInstruction(BPF_LD+BPF_W+BPF_ABS, 42);
+ SANDBOX_ASSERT(insn0);
+ SANDBOX_ASSERT(insn0->code == BPF_LD+BPF_W+BPF_ABS);
+ SANDBOX_ASSERT(insn0->k == 42);
+ SANDBOX_ASSERT(insn0->next == NULL);
+
+ Instruction *insn1 = codegen->MakeInstruction(BPF_JMP+BPF_JA, 0, insn0);
+ SANDBOX_ASSERT(insn1);
+ SANDBOX_ASSERT(insn1->code == BPF_JMP+BPF_JA);
+ SANDBOX_ASSERT(insn1->jt_ptr == insn0);
+
+ Instruction *insn2 = codegen->MakeInstruction(BPF_RET+BPF_K, ErrorCode(42));
+ SANDBOX_ASSERT(insn2);
+ SANDBOX_ASSERT(insn2->code == BPF_RET+BPF_K);
+ SANDBOX_ASSERT(insn2->next == NULL);
+
+ // We explicitly duplicate instructions so that MergeTails() can coalesce
+ // them later.
+ Instruction *insn3 = codegen->MakeInstruction(BPF_LD+BPF_W+BPF_ABS, 42,
+ codegen->MakeInstruction(BPF_RET+BPF_K, ErrorCode(42)));
+
+ Instruction *insn4 = codegen->MakeInstruction(BPF_JMP+BPF_JEQ+BPF_K, 42,
+ insn1, insn3);
+ SANDBOX_ASSERT(insn4);
+ SANDBOX_ASSERT(insn4->code == BPF_JMP+BPF_JEQ+BPF_K);
+ SANDBOX_ASSERT(insn4->k == 42);
+ SANDBOX_ASSERT(insn4->jt_ptr == insn1);
+ SANDBOX_ASSERT(insn4->jf_ptr == insn3);
+
+ codegen->JoinInstructions(insn0, insn2);
+ SANDBOX_ASSERT(insn0->next == insn2);
+
+ Instruction *insn5 = codegen->MakeInstruction(BPF_LD+BPF_W+BPF_ABS,
+ 23, insn4);
+ SANDBOX_ASSERT(insn5);
+ SANDBOX_ASSERT(insn5->code == BPF_LD+BPF_W+BPF_ABS);
+ SANDBOX_ASSERT(insn5->k == 23);
+ SANDBOX_ASSERT(insn5->next == insn4);
+
+ // Force a basic block that ends in neither a jump instruction nor a return
+ // instruction. It only contains "insn5". This exercises one of the less
+ // common code paths in the topo-sort algorithm.
+ // This also gives us a diamond-shaped pattern in our graph, which stresses
+ // another aspect of the topo-sort algorithm (namely, the ability to
+ // correctly count the incoming branches for subtrees that are not disjunct).
+ Instruction *insn6 = codegen->MakeInstruction(BPF_JMP+BPF_JEQ+BPF_K, 42,
+ insn5, insn4);
+
+ return insn6;
+}
+
+void ForAllPrograms(void (*test)(CodeGenUnittestHelper *, Instruction *, int)){
+ Instruction *(*function_table[])(CodeGen *codegen, int *flags) = {
+ SampleProgramOneInstruction,
+ SampleProgramSimpleBranch,
+ SampleProgramAtypicalBranch,
+ SampleProgramComplex,
+ };
+
+ for (size_t i = 0; i < arraysize(function_table); ++i) {
+ CodeGenUnittestHelper codegen;
+ int flags = NO_FLAGS;
+ Instruction *prg = function_table[i](&codegen, &flags);
+ test(&codegen, prg, flags);
+ }
+}
+
+void MakeInstruction(CodeGenUnittestHelper *codegen,
+ Instruction *program, int) {
+ // Nothing to do here
+}
+
+SANDBOX_TEST(CodeGen, MakeInstruction) {
+ ForAllPrograms(MakeInstruction);
+}
+
+void FindBranchTargets(CodeGenUnittestHelper *codegen, Instruction *prg, int) {
+ BranchTargets branch_targets;
+ codegen->FindBranchTargets(*prg, &branch_targets);
+
+ // Verifying the general properties that should be true for every
+ // well-formed BPF program.
+ // Perform a depth-first traversal of the BPF program an verify that all
+ // targets of BPF_JMP instructions are represented in the "branch_targets".
+ // At the same time, compute a set of both the branch targets and all the
+ // instructions in the program.
+ std::vector<Instruction *> stack;
+ std::set<Instruction *> all_instructions;
+ std::set<Instruction *> target_instructions;
+ BranchTargets::const_iterator end = branch_targets.end();
+ for (Instruction *insn = prg;;) {
+ all_instructions.insert(insn);
+ if (BPF_CLASS(insn->code) == BPF_JMP) {
+ target_instructions.insert(insn->jt_ptr);
+ SANDBOX_ASSERT(insn->jt_ptr != NULL);
+ SANDBOX_ASSERT(branch_targets.find(insn->jt_ptr) != end);
+ if (BPF_OP(insn->code) != BPF_JA) {
+ target_instructions.insert(insn->jf_ptr);
+ SANDBOX_ASSERT(insn->jf_ptr != NULL);
+ SANDBOX_ASSERT(branch_targets.find(insn->jf_ptr) != end);
+ stack.push_back(insn->jf_ptr);
+ }
+ insn = insn->jt_ptr;
+ } else if (BPF_CLASS(insn->code) == BPF_RET) {
+ SANDBOX_ASSERT(insn->next == NULL);
+ if (stack.empty()) {
+ break;
+ }
+ insn = stack.back();
+ stack.pop_back();
+ } else {
+ SANDBOX_ASSERT(insn->next != NULL);
+ insn = insn->next;
+ }
+ }
+ SANDBOX_ASSERT(target_instructions.size() == branch_targets.size());
+
+ // We can now subtract the set of the branch targets from the set of all
+ // instructions. This gives us a set with the instructions that nobody
+ // ever jumps to. Verify that they are no included in the
+ // "branch_targets" that FindBranchTargets() computed for us.
+ Instructions non_target_instructions(all_instructions.size() -
+ target_instructions.size());
+ set_difference(all_instructions.begin(), all_instructions.end(),
+ target_instructions.begin(), target_instructions.end(),
+ non_target_instructions.begin());
+ for (Instructions::const_iterator iter = non_target_instructions.begin();
+ iter != non_target_instructions.end();
+ ++iter) {
+ SANDBOX_ASSERT(branch_targets.find(*iter) == end);
+ }
+}
+
+SANDBOX_TEST(CodeGen, FindBranchTargets) {
+ ForAllPrograms(FindBranchTargets);
+}
+
+void CutGraphIntoBasicBlocks(CodeGenUnittestHelper *codegen,
+ Instruction *prg, int) {
+ BranchTargets branch_targets;
+ codegen->FindBranchTargets(*prg, &branch_targets);
+ TargetsToBlocks all_blocks;
+ BasicBlock *first_block =
+ codegen->CutGraphIntoBasicBlocks(prg, branch_targets, &all_blocks);
+ SANDBOX_ASSERT(first_block != NULL);
+ SANDBOX_ASSERT(first_block->instructions.size() > 0);
+ Instruction *first_insn = first_block->instructions[0];
+
+ // Basic blocks are supposed to start with a branch target and end with
+ // either a jump or a return instruction. It can also end, if the next
+ // instruction forms the beginning of a new basic block. There should be
+ // no other jumps or return instructions in the middle of a basic block.
+ for (TargetsToBlocks::const_iterator bb_iter = all_blocks.begin();
+ bb_iter != all_blocks.end();
+ ++bb_iter) {
+ BasicBlock *bb = bb_iter->second;
+ SANDBOX_ASSERT(bb != NULL);
+ SANDBOX_ASSERT(bb->instructions.size() > 0);
+ Instruction *insn = bb->instructions[0];
+ SANDBOX_ASSERT(insn == first_insn ||
+ branch_targets.find(insn) != branch_targets.end());
+ for (Instructions::const_iterator insn_iter = bb->instructions.begin();;){
+ insn = *insn_iter;
+ if (++insn_iter != bb->instructions.end()) {
+ SANDBOX_ASSERT(BPF_CLASS(insn->code) != BPF_JMP);
+ SANDBOX_ASSERT(BPF_CLASS(insn->code) != BPF_RET);
+ } else {
+ SANDBOX_ASSERT(BPF_CLASS(insn->code) == BPF_JMP ||
+ BPF_CLASS(insn->code) == BPF_RET ||
+ branch_targets.find(insn->next) !=
+ branch_targets.end());
+ break;
+ }
+ SANDBOX_ASSERT(branch_targets.find(*insn_iter) == branch_targets.end());
+ }
+ }
+}
+
+SANDBOX_TEST(CodeGen, CutGraphIntoBasicBlocks) {
+ ForAllPrograms(CutGraphIntoBasicBlocks);
+}
+
+void MergeTails(CodeGenUnittestHelper *codegen, Instruction *prg,
+ int flags) {
+ BranchTargets branch_targets;
+ codegen->FindBranchTargets(*prg, &branch_targets);
+ TargetsToBlocks all_blocks;
+ BasicBlock *first_block =
+ codegen->CutGraphIntoBasicBlocks(prg, branch_targets, &all_blocks);
+
+ // The shape of our graph and thus the function of our program should
+ // still be unchanged after we run MergeTails(). We verify this by
+ // serializing the graph and verifying that it is still the same.
+ // We also verify that at least some of the edges changed because of
+ // tail merging.
+ std::string graph[2];
+ std::string edges[2];
+
+ // The loop executes twice. After the first run, we call MergeTails() on
+ // our graph.
+ for (int i = 0;;) {
+ // Traverse the entire program in depth-first order.
+ std::vector<BasicBlock *> stack;
+ for (BasicBlock *bb = first_block;;) {
+ // Serialize the instructions in this basic block. In general, we only
+ // need to serialize "code" and "k"; except for a BPF_JA instruction
+ // where "k" isn't set.
+ // The stream of instructions should be unchanged after MergeTails().
+ for (Instructions::const_iterator iter = bb->instructions.begin();
+ iter != bb->instructions.end();
+ ++iter) {
+ graph[i].append(reinterpret_cast<char *>(&(*iter)->code),
+ sizeof((*iter)->code));
+ if (BPF_CLASS((*iter)->code) != BPF_JMP ||
+ BPF_OP((*iter)->code) != BPF_JA) {
+ graph[i].append(reinterpret_cast<char *>(&(*iter)->k),
+ sizeof((*iter)->k));
+ }
+ }
+
+ // Also serialize the addresses the basic blocks as we encounter them.
+ // This will change as basic blocks are coalesed by MergeTails().
+ edges[i].append(reinterpret_cast<char *>(&bb), sizeof(bb));
+
+ // Depth-first traversal of the graph. We only ever need to look at the
+ // very last instruction in the basic block, as that is the only one that
+ // can change code flow.
+ Instruction *insn = bb->instructions.back();
+ if (BPF_CLASS(insn->code) == BPF_JMP) {
+ // For jump instructions, we need to remember the "false" branch while
+ // traversing the "true" branch. This is not necessary for BPF_JA which
+ // only has a single branch.
+ if (BPF_OP(insn->code) != BPF_JA) {
+ stack.push_back(all_blocks[insn->jf_ptr]);
+ }
+ bb = all_blocks[insn->jt_ptr];
+ } else if (BPF_CLASS(insn->code) == BPF_RET) {
+ // After a BPF_RET, see if we need to back track.
+ if (stack.empty()) {
+ break;
+ }
+ bb = stack.back();
+ stack.pop_back();
+ } else {
+ // For "normal" instructions, just follow to the next basic block.
+ bb = all_blocks[insn->next];
+ }
+ }
+
+ // Our loop runs exactly two times.
+ if (++i > 1) {
+ break;
+ }
+ codegen->MergeTails(&all_blocks);
+ }
+ SANDBOX_ASSERT(graph[0] == graph[1]);
+ if (flags & HAS_MERGEABLE_TAILS) {
+ SANDBOX_ASSERT(edges[0] != edges[1]);
+ } else {
+ SANDBOX_ASSERT(edges[0] == edges[1]);
+ }
+}
+
+SANDBOX_TEST(CodeGen, MergeTails) {
+ ForAllPrograms(MergeTails);
+}
+
+void CompileAndCompare(CodeGenUnittestHelper *codegen, Instruction *prg, int) {
+ // TopoSortBasicBlocks() has internal checks that cause it to fail, if it
+ // detects a problem. Typically, if anything goes wrong, this looks to the
+ // TopoSort algorithm as if there had been cycles in the input data.
+ // This provides a pretty good unittest.
+ // We hand-crafted the program returned by SampleProgram() to exercise
+ // several of the more interesting code-paths. See comments in
+ // SampleProgram() for details.
+ // In addition to relying on the internal consistency checks in the compiler,
+ // we also serialize the graph and the resulting BPF program and compare
+ // them. With the exception of BPF_JA instructions that might have been
+ // inserted, both instruction streams should be equivalent.
+ // As Compile() modifies the instructions, we have to serialize the graph
+ // before calling Compile().
+ std::string source;
+ Instructions source_stack;
+ for (const Instruction *insn = prg, *next; insn; insn = next) {
+ if (BPF_CLASS(insn->code) == BPF_JMP) {
+ if (BPF_OP(insn->code) == BPF_JA) {
+ // Do not serialize BPF_JA instructions (see above).
+ next = insn->jt_ptr;
+ continue;
+ } else {
+ source_stack.push_back(insn->jf_ptr);
+ next = insn->jt_ptr;
+ }
+ } else if (BPF_CLASS(insn->code) == BPF_RET) {
+ if (source_stack.empty()) {
+ next = NULL;
+ } else {
+ next = source_stack.back();
+ source_stack.pop_back();
+ }
+ } else {
+ next = insn->next;
+ }
+ // Only serialize "code" and "k". That's all the information we need to
+ // compare. The rest of the information is encoded in the order of
+ // instructions.
+ source.append(reinterpret_cast<const char *>(&insn->code),
+ sizeof(insn->code));
+ source.append(reinterpret_cast<const char *>(&insn->k),
+ sizeof(insn->k));
+ }
+
+ // Compile the program
+ SandboxUnittestHelper::Program bpf;
+ codegen->Compile(prg, &bpf);
+
+ // Serialize the resulting BPF instructions.
+ std::string assembly;
+ std::vector<int> assembly_stack;
+ for (int idx = 0; idx >= 0;) {
+ SANDBOX_ASSERT(idx < (int)bpf.size());
+ struct sock_filter& insn = bpf[idx];
+ if (BPF_CLASS(insn.code) == BPF_JMP) {
+ if (BPF_OP(insn.code) == BPF_JA) {
+ // Do not serialize BPF_JA instructions (see above).
+ idx += insn.k + 1;
+ continue;
+ } else {
+ assembly_stack.push_back(idx + insn.jf + 1);
+ idx += insn.jt + 1;
+ }
+ } else if (BPF_CLASS(insn.code) == BPF_RET) {
+ if (assembly_stack.empty()) {
+ idx = -1;
+ } else {
+ idx = assembly_stack.back();
+ assembly_stack.pop_back();
+ }
+ } else {
+ ++idx;
+ }
+ // Serialize the same information that we serialized before compilation.
+ assembly.append(reinterpret_cast<char *>(&insn.code), sizeof(insn.code));
+ assembly.append(reinterpret_cast<char *>(&insn.k), sizeof(insn.k));
+ }
+ SANDBOX_ASSERT(source == assembly);
+}
+
+SANDBOX_TEST(CodeGen, All) {
+ ForAllPrograms(CompileAndCompare);
+}
+
+} // namespace playground2
diff --git a/sandbox/linux/seccomp-bpf/demo.cc b/sandbox/linux/seccomp-bpf/demo.cc
new file mode 100644
index 0000000..02fd8a0
--- /dev/null
+++ b/sandbox/linux/seccomp-bpf/demo.cc
@@ -0,0 +1,414 @@
+// Copyright (c) 2012 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include <errno.h>
+#include <fcntl.h>
+#include <linux/unistd.h>
+#include <netinet/in.h>
+#include <netinet/tcp.h>
+#include <netinet/udp.h>
+#include <pthread.h>
+#include <signal.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/ioctl.h>
+#include <sys/ipc.h>
+#include <sys/mman.h>
+#include <sys/prctl.h>
+#include <sys/resource.h>
+#include <sys/shm.h>
+#include <sys/socket.h>
+#include <sys/time.h>
+#include <sys/types.h>
+#include <time.h>
+#include <unistd.h>
+
+#include "sandbox/linux/seccomp-bpf/sandbox_bpf.h"
+#include "sandbox/linux/seccomp-bpf/util.h"
+
+using playground2::arch_seccomp_data;
+using playground2::ErrorCode;
+using playground2::Sandbox;
+using playground2::Util;
+
+#define ERR EPERM
+
+// We don't expect our sandbox to do anything useful yet. So, we will fail
+// almost immediately. For now, force the code to continue running. The
+// following line should be removed as soon as the sandbox is starting to
+// actually enforce restrictions in a meaningful way:
+#define _exit(x) do { } while (0)
+
+
+// POSIX doesn't define any async-signal safe function for converting
+// an integer to ASCII. We'll have to define our own version.
+// itoa_r() converts a (signed) integer to ASCII. It returns "buf", if the
+// conversion was successful or NULL otherwise. It never writes more than "sz"
+// bytes. Output will be truncated as needed, and a NUL character is always
+// appended.
+static char *itoa_r(int i, char *buf, size_t sz) {
+ // Make sure we can write at least one NUL byte.
+ size_t n = 1;
+ if (n > sz) {
+ return NULL;
+ }
+
+ // Handle negative numbers.
+ char *start = buf;
+ int minint = 0;
+ if (i < 0) {
+ // Make sure we can write the '-' character.
+ if (++n > sz) {
+ *start = '\000';
+ return NULL;
+ }
+ *start++ = '-';
+
+ // Turn our number positive.
+ if (i == -i) {
+ // The lowest-most negative integer needs special treatment.
+ minint = 1;
+ i = -(i + 1);
+ } else {
+ // "Normal" negative numbers are easy.
+ i = -i;
+ }
+ }
+
+ // Loop until we have converted the entire number. Output at least one
+ // character (i.e. '0').
+ char *ptr = start;
+ do {
+ // Make sure there is still enough space left in our output buffer.
+ if (++n > sz) {
+ buf = NULL;
+ goto truncate;
+ }
+
+ // Output the next digit and (if necessary) compensate for the lowest-most
+ // negative integer needing special treatment. This works because, no
+ // matter the bit width of the integer, the lowest-most integer always ends
+ // in 2, 4, 6, or 8.
+ *ptr++ = i%10 + '0' + minint;
+ minint = 0;
+ i /= 10;
+ } while (i);
+ truncate: // Terminate the output with a NUL character.
+ *ptr = '\000';
+
+ // Conversion to ASCII actually resulted in the digits being in reverse
+ // order. We can't easily generate them in forward order, as we can't tell
+ // the number of characters needed until we are done converting.
+ // So, now, we reverse the string (except for the possible "-" sign).
+ while (--ptr > start) {
+ char ch = *ptr;
+ *ptr = *start;
+ *start++ = ch;
+ }
+ return buf;
+}
+
+// This handler gets called, whenever we encounter a system call that we
+// don't recognize explicitly. For the purposes of this program, we just
+// log the system call and then deny it. More elaborate sandbox policies
+// might try to evaluate the system call in user-space, instead.
+// The only notable complication is that this function must be async-signal
+// safe. This restricts the libary functions that we can call.
+static intptr_t defaultHandler(const struct arch_seccomp_data& data,
+ void *) {
+ static const char msg0[] = "Disallowed system call #";
+ static const char msg1[] = "\n";
+ char buf[sizeof(msg0) - 1 + 25 + sizeof(msg1)];
+
+ *buf = '\000';
+ strncat(buf, msg0, sizeof(buf));
+
+ char *ptr = strrchr(buf, '\000');
+ itoa_r(data.nr, ptr, sizeof(buf) - (ptr - buf));
+
+ ptr = strrchr(ptr, '\000');
+ strncat(ptr, msg1, sizeof(buf) - (ptr - buf));
+
+ ptr = strrchr(ptr, '\000');
+ if (HANDLE_EINTR(write(2, buf, ptr - buf))) { }
+
+ return -ERR;
+}
+
+static ErrorCode evaluator(int sysno) {
+ switch (sysno) {
+ #if defined(__NR_accept)
+ case __NR_accept: case __NR_accept4:
+#endif
+ case __NR_alarm:
+ case __NR_brk:
+ case __NR_clock_gettime:
+ case __NR_close:
+ case __NR_dup: case __NR_dup2:
+ case __NR_epoll_create: case __NR_epoll_ctl: case __NR_epoll_wait:
+ case __NR_exit: case __NR_exit_group:
+ case __NR_fcntl:
+#if defined(__NR_fcntl64)
+ case __NR_fcntl64:
+#endif
+ case __NR_fdatasync:
+ case __NR_fstat:
+#if defined(__NR_fstat64)
+ case __NR_fstat64:
+#endif
+ case __NR_ftruncate:
+ case __NR_futex:
+ case __NR_getdents: case __NR_getdents64:
+ case __NR_getegid:
+#if defined(__NR_getegid32)
+ case __NR_getegid32:
+#endif
+ case __NR_geteuid:
+#if defined(__NR_geteuid32)
+ case __NR_geteuid32:
+#endif
+ case __NR_getgid:
+#if defined(__NR_getgid32)
+ case __NR_getgid32:
+#endif
+ case __NR_getitimer: case __NR_setitimer:
+#if defined(__NR_getpeername)
+ case __NR_getpeername:
+#endif
+ case __NR_getpid: case __NR_gettid:
+#if defined(__NR_getsockname)
+ case __NR_getsockname:
+#endif
+ case __NR_gettimeofday:
+ case __NR_getuid:
+#if defined(__NR_getuid32)
+ case __NR_getuid32:
+#endif
+#if defined(__NR__llseek)
+ case __NR__llseek:
+#endif
+ case __NR_lseek:
+ case __NR_nanosleep:
+ case __NR_pipe: case __NR_pipe2:
+ case __NR_poll:
+ case __NR_pread64: case __NR_preadv:
+ case __NR_pwrite64: case __NR_pwritev:
+ case __NR_read: case __NR_readv:
+ case __NR_restart_syscall:
+ case __NR_set_robust_list:
+ case __NR_rt_sigaction:
+#if defined(__NR_sigaction)
+ case __NR_sigaction:
+#endif
+#if defined(__NR_signal)
+ case __NR_signal:
+#endif
+ case __NR_rt_sigprocmask:
+#if defined(__NR_sigprocmask)
+ case __NR_sigprocmask:
+#endif
+#if defined(__NR_shutdown)
+ case __NR_shutdown:
+#endif
+ case __NR_rt_sigreturn:
+#if defined(__NR_sigreturn)
+ case __NR_sigreturn:
+#endif
+#if defined(__NR_socketpair)
+ case __NR_socketpair:
+#endif
+ case __NR_time:
+ case __NR_uname:
+ case __NR_write: case __NR_writev:
+ return ErrorCode(ErrorCode::ERR_ALLOWED);
+
+ // The following system calls are temporarily permitted. This must be
+ // tightened later. But we currently don't implement enough of the sandboxing
+ // API to do so.
+ // As is, this sandbox isn't exactly safe :-/
+#if defined(__NR_sendmsg)
+ case __NR_sendmsg: case __NR_sendto:
+ case __NR_recvmsg: case __NR_recvfrom:
+ case __NR_getsockopt: case __NR_setsockopt:
+#elif defined(__NR_socketcall)
+ case __NR_socketcall:
+#endif
+#if defined(__NR_shmat)
+ case __NR_shmat: case __NR_shmctl: case __NR_shmdt: case __NR_shmget:
+#elif defined(__NR_ipc)
+ case __NR_ipc:
+#endif
+#if defined(__NR_mmap2)
+ case __NR_mmap2:
+#else
+ case __NR_mmap:
+#endif
+#if defined(__NR_ugetrlimit)
+ case __NR_ugetrlimit:
+#endif
+ case __NR_getrlimit:
+ case __NR_ioctl:
+ case __NR_prctl:
+ case __NR_clone:
+ case __NR_munmap: case __NR_mprotect: case __NR_madvise:
+ case __NR_remap_file_pages:
+ return ErrorCode(ErrorCode::ERR_ALLOWED);
+
+ // Everything that isn't explicitly allowed is denied.
+ default:
+ return Sandbox::Trap(defaultHandler, NULL);
+ }
+}
+
+static void *threadFnc(void *arg) {
+ return arg;
+}
+
+static void *sendmsgStressThreadFnc(void *arg) {
+ if (arg) { }
+ static const int repetitions = 100;
+ static const int kNumFds = 3;
+ for (int rep = 0; rep < repetitions; ++rep) {
+ int fds[2 + kNumFds];
+ if (socketpair(AF_UNIX, SOCK_STREAM, 0, fds)) {
+ perror("socketpair()");
+ _exit(1);
+ }
+ size_t len = 4;
+ char buf[4];
+ if (!Util::sendFds(fds[0], "test", 4, fds[1], fds[1], fds[1], -1) ||
+ !Util::getFds(fds[1], buf, &len, fds+2, fds+3, fds+4, NULL) ||
+ len != 4 ||
+ memcmp(buf, "test", len) ||
+ write(fds[2], "demo", 4) != 4 ||
+ read(fds[0], buf, 4) != 4 ||
+ memcmp(buf, "demo", 4)) {
+ perror("sending/receiving of fds");
+ _exit(1);
+ }
+ for (int i = 0; i < 2+kNumFds; ++i) {
+ if (close(fds[i])) {
+ perror("close");
+ _exit(1);
+ }
+ }
+ }
+ return NULL;
+}
+
+int main(int argc, char *argv[]) {
+ if (argc) { }
+ if (argv) { }
+ int proc_fd = open("/proc", O_RDONLY|O_DIRECTORY);
+ if (Sandbox::supportsSeccompSandbox(proc_fd) !=
+ Sandbox::STATUS_AVAILABLE) {
+ perror("sandbox");
+ _exit(1);
+ }
+ Sandbox::setProcFd(proc_fd);
+ Sandbox::setSandboxPolicy(evaluator, NULL);
+ Sandbox::startSandbox();
+
+ // Check that we can create threads
+ pthread_t thr;
+ if (!pthread_create(&thr, NULL, threadFnc,
+ reinterpret_cast<void *>(0x1234))) {
+ void *ret;
+ pthread_join(thr, &ret);
+ if (ret != reinterpret_cast<void *>(0x1234)) {
+ perror("clone() failed");
+ _exit(1);
+ }
+ } else {
+ perror("clone() failed");
+ _exit(1);
+ }
+
+ // Check that we handle restart_syscall() without dieing. This is a little
+ // tricky to trigger. And I can't think of a good way to verify whether it
+ // actually executed.
+ signal(SIGALRM, SIG_IGN);
+ const struct itimerval tv = { { 0, 0 }, { 0, 5*1000 } };
+ const struct timespec tmo = { 0, 100*1000*1000 };
+ setitimer(ITIMER_REAL, &tv, NULL);
+ nanosleep(&tmo, NULL);
+
+ // Check that we can query the size of the stack, but that all other
+ // calls to getrlimit() fail.
+ if (((errno = 0), !getrlimit(RLIMIT_STACK, NULL)) || errno != EFAULT ||
+ ((errno = 0), !getrlimit(RLIMIT_CORE, NULL)) || errno != ERR) {
+ perror("getrlimit()");
+ _exit(1);
+ }
+
+ // Check that we can query TCGETS and TIOCGWINSZ, but no other ioctls().
+ if (((errno = 0), !ioctl(2, TCGETS, NULL)) || errno != EFAULT ||
+ ((errno = 0), !ioctl(2, TIOCGWINSZ, NULL)) || errno != EFAULT ||
+ ((errno = 0), !ioctl(2, TCSETS, NULL)) || errno != ERR) {
+ perror("ioctl()");
+ _exit(1);
+ }
+
+ // Check that prctl() can manipulate the dumpable flag, but nothing else.
+ if (((errno = 0), !prctl(PR_GET_DUMPABLE)) || errno ||
+ ((errno = 0), prctl(PR_SET_DUMPABLE, 1)) || errno ||
+ ((errno = 0), !prctl(PR_SET_SECCOMP, 0)) || errno != ERR) {
+ perror("prctl()");
+ _exit(1);
+ }
+
+ // Check that we can send and receive file handles.
+ int fds[3];
+ if (socketpair(AF_UNIX, SOCK_STREAM, 0, fds)) {
+ perror("socketpair()");
+ _exit(1);
+ }
+ size_t len = 4;
+ char buf[4];
+ if (!Util::sendFds(fds[0], "test", 4, fds[1], -1) ||
+ !Util::getFds(fds[1], buf, &len, fds+2, NULL) ||
+ len != 4 ||
+ memcmp(buf, "test", len) ||
+ write(fds[2], "demo", 4) != 4 ||
+ read(fds[0], buf, 4) != 4 ||
+ memcmp(buf, "demo", 4) ||
+ close(fds[0]) ||
+ close(fds[1]) ||
+ close(fds[2])) {
+ perror("sending/receiving of fds");
+ _exit(1);
+ }
+
+ // Check whether SysV IPC works.
+ int shmid;
+ void *addr;
+ if ((shmid = shmget(IPC_PRIVATE, 4096, IPC_CREAT|0600)) < 0 ||
+ (addr = shmat(shmid, NULL, 0)) == reinterpret_cast<void *>(-1) ||
+ shmdt(addr) ||
+ shmctl(shmid, IPC_RMID, NULL)) {
+ perror("sysv IPC");
+ _exit(1);
+ }
+
+ // Print a message so that the user can see the sandbox is activated.
+ time_t tm = time(NULL);
+ printf("Sandbox has been started at %s", ctime(&tm));
+
+ // Stress-test the sendmsg() code
+ static const int kSendmsgStressNumThreads = 10;
+ pthread_t sendmsgStressThreads[kSendmsgStressNumThreads];
+ for (int i = 0; i < kSendmsgStressNumThreads; ++i) {
+ if (pthread_create(sendmsgStressThreads + i, NULL,
+ sendmsgStressThreadFnc, NULL)) {
+ perror("pthread_create");
+ _exit(1);
+ }
+ }
+ for (int i = 0; i < kSendmsgStressNumThreads; ++i) {
+ pthread_join(sendmsgStressThreads[i], NULL);
+ }
+
+ return 0;
+}
diff --git a/sandbox/linux/seccomp-bpf/die.cc b/sandbox/linux/seccomp-bpf/die.cc
new file mode 100644
index 0000000..b141424
--- /dev/null
+++ b/sandbox/linux/seccomp-bpf/die.cc
@@ -0,0 +1,66 @@
+// Copyright (c) 2012 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include <string>
+
+#include "sandbox/linux/seccomp-bpf/sandbox_bpf.h"
+
+
+namespace playground2 {
+
+void Die::ExitGroup() {
+ // exit_group() should exit our program. After all, it is defined as a
+ // function that doesn't return. But things can theoretically go wrong.
+ // Especially, since we are dealing with system call filters. Continuing
+ // execution would be very bad in most cases where ExitGroup() gets called.
+ // So, we'll try a few other strategies too.
+ syscall(__NR_exit_group, 1);
+
+ // We have no idea what our run-time environment looks like. So, signal
+ // handlers might or might not do the right thing. Try to reset settings
+ // to a defined state; but we have not way to verify whether we actually
+ // succeeded in doing so. Nonetheless, triggering a fatal signal could help
+ // us terminate.
+ signal(SIGSEGV, SIG_DFL);
+ syscall(__NR_prctl, PR_SET_DUMPABLE, (void *)0, (void *)0, (void *)0);
+ if (*(volatile char *)0) { }
+
+ // If there is no way for us to ask for the program to exit, the next
+ // best thing we can do is to loop indefinitely. Maybe, somebody will notice
+ // and file a bug...
+ // We in fact retry the system call inside of our loop so that it will
+ // stand out when somebody tries to diagnose the problem by using "strace".
+ for (;;) {
+ syscall(__NR_exit_group, 1);
+ }
+}
+
+void Die::SandboxDie(const char *msg, const char *file, int line) {
+ if (simple_exit_) {
+ LogToStderr(msg, file, line);
+ } else {
+ #if defined(SECCOMP_BPF_STANDALONE)
+ Die::LogToStderr(msg, file, line);
+ #else
+ logging::LogMessage(file, line, logging::LOG_FATAL).stream() << msg;
+ #endif
+ }
+ ExitGroup();
+}
+
+void Die::LogToStderr(const char *msg, const char *file, int line) {
+ if (msg) {
+ char buf[40];
+ snprintf(buf, sizeof(buf), "%d", line);
+ std::string s = std::string(file) + ":" + buf + ":" + msg + "\n";
+
+ // No need to loop. Short write()s are unlikely and if they happen we
+ // probably prefer them over a loop that blocks.
+ if (HANDLE_EINTR(write(2, s.c_str(), s.length()))) { }
+ }
+}
+
+bool Die::simple_exit_ = false;
+
+} // namespace
diff --git a/sandbox/linux/seccomp-bpf/die.h b/sandbox/linux/seccomp-bpf/die.h
new file mode 100644
index 0000000..608afde
--- /dev/null
+++ b/sandbox/linux/seccomp-bpf/die.h
@@ -0,0 +1,47 @@
+// Copyright (c) 2012 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#ifndef SANDBOX_LINUX_SECCOMP_BPF_DIE_H__
+#define SANDBOX_LINUX_SECCOMP_BPF_DIE_H__
+
+namespace playground2 {
+
+class Die {
+ public:
+ // This is the main API for using this file. Prints a error message and
+ // exits with a fatal error.
+ #define SANDBOX_DIE(m) Die::SandboxDie(m, __FILE__, __LINE__)
+
+ // Terminate the program, even if the current sandbox policy prevents some
+ // of the more commonly used functions used for exiting.
+ // Most users would want to call SANDBOX_DIE() instead, as it logs extra
+ // information. But calling ExitGroup() is correct and in some rare cases
+ // preferable. So, we make it part of the public API.
+ static void ExitGroup() __attribute__((noreturn));
+
+ // This method gets called by SANDBOX_DIE(). There is normally no reason
+ // to call it directly unless you are defining your own exiting macro.
+ static void SandboxDie(const char *msg, const char *file, int line)
+ __attribute__((noreturn));
+
+ // Writes a message to stderr. Used as a fall-back choice, if we don't have
+ // any other way to report an error.
+ static void LogToStderr(const char *msg, const char *file, int line);
+
+ // We generally want to run all exit handlers. This means, on SANDBOX_DIE()
+ // we should be calling LOG(FATAL). But there are some situations where
+ // we just need to print a message and then terminate. This would typically
+ // happen in cases where we consume the error message internally (e.g. in
+ // unit tests or in the supportsSeccompSandbox() method).
+ static void EnableSimpleExit() { simple_exit_ = true; }
+
+ private:
+ static bool simple_exit_;
+
+ DISALLOW_IMPLICIT_CONSTRUCTORS(Die);
+};
+
+} // namespace
+
+#endif // SANDBOX_LINUX_SECCOMP_BPF_DIE_H__
diff --git a/sandbox/linux/seccomp-bpf/errorcode.cc b/sandbox/linux/seccomp-bpf/errorcode.cc
new file mode 100644
index 0000000..cc79cb6
--- /dev/null
+++ b/sandbox/linux/seccomp-bpf/errorcode.cc
@@ -0,0 +1,103 @@
+// Copyright (c) 2012 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "sandbox/linux/seccomp-bpf/sandbox_bpf.h"
+
+
+namespace playground2 {
+
+ErrorCode::ErrorCode(int err) {
+ switch (err) {
+ case ERR_ALLOWED:
+ err_ = SECCOMP_RET_ALLOW;
+ error_type_ = ET_SIMPLE;
+ break;
+ case ERR_MIN_ERRNO ... ERR_MAX_ERRNO:
+ err_ = SECCOMP_RET_ERRNO + err;
+ error_type_ = ET_SIMPLE;
+ break;
+ default:
+ SANDBOX_DIE("Invalid use of ErrorCode object");
+ }
+}
+
+ErrorCode::ErrorCode(ErrorCode::TrapFnc fnc, const void *aux, uint16_t id)
+ : error_type_(ET_TRAP),
+ fnc_(fnc),
+ aux_(const_cast<void *>(aux)),
+ err_(SECCOMP_RET_TRAP + id) {
+}
+
+ErrorCode::ErrorCode(int argno, ArgType width, Operation op, uint64_t value,
+ const ErrorCode *passed, const ErrorCode *failed)
+ : error_type_(ET_COND),
+ value_(value),
+ argno_(argno),
+ width_(width),
+ op_(op),
+ passed_(passed),
+ failed_(failed),
+ err_(SECCOMP_RET_INVALID) {
+ if (op < 0 || op >= OP_NUM_OPS) {
+ SANDBOX_DIE("Invalid opcode in BPF sandbox rules");
+ }
+}
+
+bool ErrorCode::Equals(const ErrorCode& err) const {
+ if (error_type_ == ET_INVALID || err.error_type_ == ET_INVALID) {
+ SANDBOX_DIE("Dereferencing invalid ErrorCode");
+ }
+ if (error_type_ != err.error_type_) {
+ return false;
+ }
+ if (error_type_ == ET_SIMPLE || error_type_ == ET_TRAP) {
+ return err_ == err.err_;
+ } else if (error_type_ == ET_COND) {
+ return value_ == err.value_ &&
+ argno_ == err.argno_ &&
+ width_ == err.width_ &&
+ op_ == err.op_ &&
+ passed_->Equals(*err.passed_) &&
+ failed_->Equals(*err.failed_);
+ } else {
+ SANDBOX_DIE("Corrupted ErrorCode");
+ }
+}
+
+bool ErrorCode::LessThan(const ErrorCode& err) const {
+ // Implementing a "LessThan()" operator allows us to use ErrorCode objects
+ // as keys in STL containers; most notably, it also allows us to put them
+ // into std::set<>. Actual ordering is not important as long as it is
+ // deterministic.
+ if (error_type_ == ET_INVALID || err.error_type_ == ET_INVALID) {
+ SANDBOX_DIE("Dereferencing invalid ErrorCode");
+ }
+ if (error_type_ != err.error_type_) {
+ return error_type_ < err.error_type_;
+ } else {
+ if (error_type_ == ET_SIMPLE || error_type_ == ET_TRAP) {
+ return err_ < err.err_;
+ } else if (error_type_ == ET_COND) {
+ if (value_ != err.value_) {
+ return value_ < err.value_;
+ } else if (argno_ != err.argno_) {
+ return argno_ < err.argno_;
+ } else if (width_ != err.width_) {
+ return width_ < err.width_;
+ } else if (op_ != err.op_) {
+ return op_ < err.op_;
+ } else if (!passed_->Equals(*err.passed_)) {
+ return passed_->LessThan(*err.passed_);
+ } else if (!failed_->Equals(*err.failed_)) {
+ return failed_->LessThan(*err.failed_);
+ } else {
+ return false;
+ }
+ } else {
+ SANDBOX_DIE("Corrupted ErrorCode");
+ }
+ }
+}
+
+} // namespace
diff --git a/sandbox/linux/seccomp-bpf/errorcode.h b/sandbox/linux/seccomp-bpf/errorcode.h
new file mode 100644
index 0000000..2b941ee
--- /dev/null
+++ b/sandbox/linux/seccomp-bpf/errorcode.h
@@ -0,0 +1,133 @@
+// Copyright (c) 2012 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#ifndef SANDBOX_LINUX_SECCOMP_BPF_ERRORCODE_H__
+#define SANDBOX_LINUX_SECCOMP_BPF_ERRORCODE_H__
+
+namespace playground2 {
+
+struct arch_seccomp_data;
+
+// This class holds all the possible values that can returned by a sandbox
+// policy.
+// We can either wrap a symbolic ErrorCode (i.e. ERR_XXX enum values), an
+// errno value (in the range 1..4095), a pointer to a TrapFnc callback
+// handling a SECCOMP_RET_TRAP trap, or a complex constraint.
+// All of the commonly used values are stored in the "err_" field. So, code
+// that is using the ErrorCode class typically operates on a single 32bit
+// field.
+class ErrorCode {
+ public:
+ enum {
+ // Allow this system call.
+ ERR_ALLOWED = 0x0000,
+
+ // Deny the system call with a particular "errno" value.
+ ERR_MIN_ERRNO = 1,
+ ERR_MAX_ERRNO = 4095,
+
+ // This code should never be used directly, it is used internally only.
+ ERR_INVALID = -1,
+ };
+
+ // TrapFnc is a pointer to a function that handles Seccomp traps in
+ // user-space. The seccomp policy can request that a trap handler gets
+ // installed; it does so by returning a suitable ErrorCode() from the
+ // syscallEvaluator. See the ErrorCode() constructor for how to pass in
+ // the function pointer.
+ // Please note that TrapFnc is executed from signal context and must be
+ // async-signal safe:
+ // http://pubs.opengroup.org/onlinepubs/009695399/functions/xsh_chap02_04.html
+ typedef intptr_t (*TrapFnc)(const struct arch_seccomp_data& args, void *aux);
+
+ enum ArgType {
+ TP_32BIT, TP_64BIT,
+ };
+
+ enum Operation {
+ OP_EQUAL, OP_GREATER, OP_GREATER_EQUAL, OP_HAS_BITS,
+ OP_NUM_OPS,
+ };
+
+ // We allow the default constructor, as it makes the ErrorCode class
+ // much easier to use. But if we ever encounter an invalid ErrorCode
+ // when compiling a BPF filter, we deliberately generate an invalid
+ // program that will get flagged both by our Verifier class and by
+ // the Linux kernel.
+ ErrorCode() :
+ error_type_(ET_INVALID),
+ err_(SECCOMP_RET_INVALID) {
+ }
+ explicit ErrorCode(int err);
+
+ // For all practical purposes, ErrorCodes are treated as if they were
+ // structs. The copy constructor and assignment operator are trivial and
+ // we do not need to explicitly specify them.
+ // Most notably, it is in fact perfectly OK to directly copy the passed_ and
+ // failed_ field. They only ever get set by our private constructor, and the
+ // callers handle life-cycle management for these objects.
+
+ // Destructor
+ ~ErrorCode() { }
+
+ bool Equals(const ErrorCode& err) const;
+ bool LessThan(const ErrorCode& err) const;
+
+ uint32_t err() const { return err_; }
+
+ struct LessThan {
+ bool operator()(const ErrorCode& a, const ErrorCode& b) const {
+ return a.LessThan(b);
+ }
+ };
+
+ private:
+ friend class CodeGen;
+ friend class Sandbox;
+ friend class Verifier;
+
+ enum ErrorType {
+ ET_INVALID, ET_SIMPLE, ET_TRAP, ET_COND,
+ };
+
+ // If we are wrapping a callback, we must assign a unique id. This id is
+ // how the kernel tells us which one of our different SECCOMP_RET_TRAP
+ // cases has been triggered.
+ ErrorCode(TrapFnc fnc, const void *aux, uint16_t id);
+
+ // Some system calls require inspection of arguments. This constructor
+ // allows us to specify additional constraints.
+ ErrorCode(int argno, ArgType width, Operation op, uint64_t value,
+ const ErrorCode *passed, const ErrorCode *failed);
+
+ ErrorType error_type_;
+
+ union {
+ // Fields needed for SECCOMP_RET_TRAP callbacks
+ struct {
+ TrapFnc fnc_; // Callback function and arg, if trap was
+ void *aux_; // triggered by the kernel's BPF filter.
+ };
+
+ // Fields needed when inspecting additional arguments.
+ struct {
+ uint64_t value_; // Value that we are comparing with.
+ int argno_; // Syscall arg number that we are inspecting.
+ ArgType width_; // Whether we are looking at a 32/64bit value.
+ Operation op_; // Comparison operation.
+ const ErrorCode *passed_; // Value to be returned if comparison passed,
+ const ErrorCode *failed_; // or if it failed.
+ };
+ };
+
+ // 32bit field used for all possible types of ErrorCode values. This is
+ // the value that uniquely identifies any ErrorCode and it (typically) can
+ // be emitted directly into a BPF filter program.
+ uint32_t err_;
+
+};
+
+} // namespace
+
+#endif // SANDBOX_LINUX_SECCOMP_BPF_ERRORCODE_H__
diff --git a/sandbox/linux/seccomp-bpf/errorcode_unittest.cc b/sandbox/linux/seccomp-bpf/errorcode_unittest.cc
new file mode 100644
index 0000000..21f889e
--- /dev/null
+++ b/sandbox/linux/seccomp-bpf/errorcode_unittest.cc
@@ -0,0 +1,77 @@
+// Copyright (c) 2012 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "sandbox/linux/seccomp-bpf/sandbox_bpf.h"
+#include "sandbox/linux/tests/unit_tests.h"
+
+using namespace playground2;
+
+namespace {
+
+SANDBOX_TEST(ErrorCode, ErrnoConstructor) {
+ ErrorCode e0;
+ SANDBOX_ASSERT(e0.err() == SECCOMP_RET_INVALID);
+
+ ErrorCode e1(ErrorCode::ERR_ALLOWED);
+ SANDBOX_ASSERT(e1.err() == SECCOMP_RET_ALLOW);
+
+ ErrorCode e2(EPERM);
+ SANDBOX_ASSERT(e2.err() == SECCOMP_RET_ERRNO + EPERM);
+
+ ErrorCode e3 = Sandbox::Trap(NULL, NULL);
+ SANDBOX_ASSERT((e3.err() & SECCOMP_RET_ACTION) == SECCOMP_RET_TRAP);
+}
+
+SANDBOX_TEST(ErrorCode, Trap) {
+ ErrorCode e0 = Sandbox::Trap(NULL, "a");
+ ErrorCode e1 = Sandbox::Trap(NULL, "b");
+ SANDBOX_ASSERT((e0.err() & SECCOMP_RET_DATA) + 1 ==
+ (e1.err() & SECCOMP_RET_DATA));
+
+ ErrorCode e2 = Sandbox::Trap(NULL, "a");
+ SANDBOX_ASSERT((e0.err() & SECCOMP_RET_DATA) ==
+ (e2.err() & SECCOMP_RET_DATA));
+}
+
+SANDBOX_TEST(ErrorCode, Equals) {
+ ErrorCode e1(ErrorCode::ERR_ALLOWED);
+ ErrorCode e2(ErrorCode::ERR_ALLOWED);
+ SANDBOX_ASSERT(e1.Equals(e1));
+ SANDBOX_ASSERT(e1.Equals(e2));
+ SANDBOX_ASSERT(e2.Equals(e1));
+
+ ErrorCode e3(EPERM);
+ SANDBOX_ASSERT(!e1.Equals(e3));
+
+ ErrorCode e4 = Sandbox::Trap(NULL, "a");
+ ErrorCode e5 = Sandbox::Trap(NULL, "b");
+ ErrorCode e6 = Sandbox::Trap(NULL, "a");
+ SANDBOX_ASSERT(!e1.Equals(e4));
+ SANDBOX_ASSERT(!e3.Equals(e4));
+ SANDBOX_ASSERT(!e5.Equals(e4));
+ SANDBOX_ASSERT( e6.Equals(e4));
+}
+
+SANDBOX_TEST(ErrorCode, LessThan) {
+ ErrorCode e1(ErrorCode::ERR_ALLOWED);
+ ErrorCode e2(ErrorCode::ERR_ALLOWED);
+ SANDBOX_ASSERT(!e1.LessThan(e1));
+ SANDBOX_ASSERT(!e1.LessThan(e2));
+ SANDBOX_ASSERT(!e2.LessThan(e1));
+
+ ErrorCode e3(EPERM);
+ SANDBOX_ASSERT(!e1.LessThan(e3));
+ SANDBOX_ASSERT( e3.LessThan(e1));
+
+ ErrorCode e4 = Sandbox::Trap(NULL, "a");
+ ErrorCode e5 = Sandbox::Trap(NULL, "b");
+ ErrorCode e6 = Sandbox::Trap(NULL, "a");
+ SANDBOX_ASSERT(e1.LessThan(e4));
+ SANDBOX_ASSERT(e3.LessThan(e4));
+ SANDBOX_ASSERT(e4.LessThan(e5));
+ SANDBOX_ASSERT(!e4.LessThan(e6));
+ SANDBOX_ASSERT(!e6.LessThan(e4));
+}
+
+} // namespace
diff --git a/sandbox/linux/seccomp-bpf/instruction.h b/sandbox/linux/seccomp-bpf/instruction.h
new file mode 100644
index 0000000..0fc8123
--- /dev/null
+++ b/sandbox/linux/seccomp-bpf/instruction.h
@@ -0,0 +1,63 @@
+// Copyright (c) 2012 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#ifndef SANDBOX_LINUX_SECCOMP_BPF_INSTRUCTION_H__
+#define SANDBOX_LINUX_SECCOMP_BPF_INSTRUCTION_H__
+
+#include <stdint.h>
+
+
+namespace playground2 {
+
+// The fields in this structure have the same meaning as the corresponding
+// fields in "struct sock_filter". See <linux/filter.h> for a lot more
+// detail.
+// code -- Opcode of the instruction. This is typically a bitwise
+// combination BPF_XXX values.
+// k -- Operand; BPF instructions take zero or one operands. Operands
+// are 32bit-wide constants, if present. They can be immediate
+// values (if BPF_K is present in "code_"), addresses (if BPF_ABS
+// is present in "code_"), or relative jump offsets (if BPF_JMP
+// and BPF_JA are present in "code_").
+// jt, jf -- all conditional jumps have a 8bit-wide jump offset that allows
+// jumps of up to 256 instructions forward. Conditional jumps are
+// identified by BPF_JMP in "code_", but the lack of BPF_JA.
+// Conditional jumps have a "t"rue and "f"alse branch.
+struct Instruction {
+ // Constructor for an non-jumping instruction or for an unconditional
+ // "always" jump.
+ Instruction(uint16_t c, uint32_t parm, Instruction *n) :
+ code(c), next(n), k(parm) { }
+
+ // Constructor for a conditional jump instruction.
+ Instruction(uint16_t c, uint32_t parm, Instruction *jt, Instruction *jf) :
+ code(c), jt_ptr(jt), jf_ptr(jf), k(parm) { }
+
+ uint16_t code;
+ union {
+ // When code generation is complete, we will have computed relative
+ // branch targets that are in the range 0..255.
+ struct {
+ uint8_t jt, jf;
+ };
+
+ // While assembling the BPF program, we use pointers for branch targets.
+ // Once we have computed basic blocks, these pointers will be entered as
+ // keys in a TargetsToBlocks map and should no longer be dereferenced
+ // directly.
+ struct {
+ Instruction *jt_ptr, *jf_ptr;
+ };
+
+ // While assembling the BPF program, non-jumping instructions are linked
+ // by the "next_" pointer. This field is no longer needed when we have
+ // computed basic blocks.
+ Instruction *next;
+ };
+ uint32_t k;
+};
+
+} // namespace
+
+#endif // SANDBOX_LINUX_SECCOMP_BPF_INSTRUCTION_H__
diff --git a/sandbox/linux/seccomp-bpf/sandbox_bpf.cc b/sandbox/linux/seccomp-bpf/sandbox_bpf.cc
new file mode 100644
index 0000000..eb03995
--- /dev/null
+++ b/sandbox/linux/seccomp-bpf/sandbox_bpf.cc
@@ -0,0 +1,655 @@
+// Copyright (c) 2012 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "sandbox/linux/seccomp-bpf/codegen.h"
+#include "sandbox/linux/seccomp-bpf/sandbox_bpf.h"
+#include "sandbox/linux/seccomp-bpf/syscall_iterator.h"
+#include "sandbox/linux/seccomp-bpf/verifier.h"
+
+namespace {
+
+void WriteFailedStderrSetupMessage(int out_fd) {
+ const char* error_string = strerror(errno);
+ static const char msg[] = "Failed to set up stderr: ";
+ if (HANDLE_EINTR(write(out_fd, msg, sizeof(msg)-1)) > 0 && error_string &&
+ HANDLE_EINTR(write(out_fd, error_string, strlen(error_string))) > 0 &&
+ HANDLE_EINTR(write(out_fd, "\n", 1))) {
+ }
+}
+
+} // namespace
+
+// The kernel gives us a sandbox, we turn it into a playground :-)
+// This is version 2 of the playground; version 1 was built on top of
+// pre-BPF seccomp mode.
+namespace playground2 {
+
+const int kExpectedExitCode = 100;
+
+// We define a really simple sandbox policy. It is just good enough for us
+// to tell that the sandbox has actually been activated.
+ErrorCode Sandbox::probeEvaluator(int sysnum, void *) {
+ switch (sysnum) {
+ case __NR_getpid:
+ // Return EPERM so that we can check that the filter actually ran.
+ return ErrorCode(EPERM);
+ case __NR_exit_group:
+ // Allow exit() with a non-default return code.
+ return ErrorCode(ErrorCode::ERR_ALLOWED);
+ default:
+ // Make everything else fail in an easily recognizable way.
+ return ErrorCode(EINVAL);
+ }
+}
+
+void Sandbox::probeProcess(void) {
+ if (syscall(__NR_getpid) < 0 && errno == EPERM) {
+ syscall(__NR_exit_group, static_cast<intptr_t>(kExpectedExitCode));
+ }
+}
+
+bool Sandbox::isValidSyscallNumber(int sysnum) {
+ return SyscallIterator::IsValid(sysnum);
+}
+
+ErrorCode Sandbox::allowAllEvaluator(int sysnum, void *) {
+ if (!isValidSyscallNumber(sysnum)) {
+ return ErrorCode(ENOSYS);
+ }
+ return ErrorCode(ErrorCode::ERR_ALLOWED);
+}
+
+void Sandbox::tryVsyscallProcess(void) {
+ time_t current_time;
+ // time() is implemented as a vsyscall. With an older glibc, with
+ // vsyscall=emulate and some versions of the seccomp BPF patch
+ // we may get SIGKILL-ed. Detect this!
+ if (time(¤t_time) != static_cast<time_t>(-1)) {
+ syscall(__NR_exit_group, static_cast<intptr_t>(kExpectedExitCode));
+ }
+}
+
+bool Sandbox::RunFunctionInPolicy(void (*CodeInSandbox)(),
+ EvaluateSyscall syscallEvaluator,
+ void *aux,
+ int proc_fd) {
+ // Block all signals before forking a child process. This prevents an
+ // attacker from manipulating our test by sending us an unexpected signal.
+ sigset_t oldMask, newMask;
+ if (sigfillset(&newMask) ||
+ sigprocmask(SIG_BLOCK, &newMask, &oldMask)) {
+ SANDBOX_DIE("sigprocmask() failed");
+ }
+ int fds[2];
+ if (pipe2(fds, O_NONBLOCK|O_CLOEXEC)) {
+ SANDBOX_DIE("pipe() failed");
+ }
+
+ if (fds[0] <= 2 || fds[1] <= 2) {
+ SANDBOX_DIE("Process started without standard file descriptors");
+ }
+
+ pid_t pid = fork();
+ if (pid < 0) {
+ // Die if we cannot fork(). We would probably fail a little later
+ // anyway, as the machine is likely very close to running out of
+ // memory.
+ // But what we don't want to do is return "false", as a crafty
+ // attacker might cause fork() to fail at will and could trick us
+ // into running without a sandbox.
+ sigprocmask(SIG_SETMASK, &oldMask, NULL); // OK, if it fails
+ SANDBOX_DIE("fork() failed unexpectedly");
+ }
+
+ // In the child process
+ if (!pid) {
+ // Test a very simple sandbox policy to verify that we can
+ // successfully turn on sandboxing.
+ Die::EnableSimpleExit();
+
+ if (HANDLE_EINTR(close(fds[0]))) {
+ WriteFailedStderrSetupMessage(fds[1]);
+ SANDBOX_DIE(NULL);
+ }
+ if (HANDLE_EINTR(dup2(fds[1], 2)) != 2) {
+ // Stderr could very well be a file descriptor to .xsession-errors, or
+ // another file, which could be backed by a file system that could cause
+ // dup2 to fail while trying to close stderr. It's important that we do
+ // not fail on trying to close stderr.
+ // If dup2 fails here, we will continue normally, this means that our
+ // parent won't cause a fatal failure if something writes to stderr in
+ // this child.
+ }
+ if (HANDLE_EINTR(close(fds[1]))) {
+ WriteFailedStderrSetupMessage(fds[1]);
+ SANDBOX_DIE(NULL);
+ }
+
+ evaluators_.clear();
+ setSandboxPolicy(syscallEvaluator, aux);
+ setProcFd(proc_fd);
+
+ // By passing "quiet=true" to "startSandboxInternal()" we suppress
+ // messages for expected and benign failures (e.g. if the current
+ // kernel lacks support for BPF filters).
+ startSandboxInternal(true);
+
+ // Run our code in the sandbox.
+ CodeInSandbox();
+
+ // CodeInSandbox() is not supposed to return here.
+ SANDBOX_DIE(NULL);
+ }
+
+ // In the parent process.
+ if (HANDLE_EINTR(close(fds[1]))) {
+ SANDBOX_DIE("close() failed");
+ }
+ if (sigprocmask(SIG_SETMASK, &oldMask, NULL)) {
+ SANDBOX_DIE("sigprocmask() failed");
+ }
+ int status;
+ if (HANDLE_EINTR(waitpid(pid, &status, 0)) != pid) {
+ SANDBOX_DIE("waitpid() failed unexpectedly");
+ }
+ bool rc = WIFEXITED(status) && WEXITSTATUS(status) == kExpectedExitCode;
+
+ // If we fail to support sandboxing, there might be an additional
+ // error message. If so, this was an entirely unexpected and fatal
+ // failure. We should report the failure and somebody must fix
+ // things. This is probably a security-critical bug in the sandboxing
+ // code.
+ if (!rc) {
+ char buf[4096];
+ ssize_t len = HANDLE_EINTR(read(fds[0], buf, sizeof(buf) - 1));
+ if (len > 0) {
+ while (len > 1 && buf[len-1] == '\n') {
+ --len;
+ }
+ buf[len] = '\000';
+ SANDBOX_DIE(buf);
+ }
+ }
+ if (HANDLE_EINTR(close(fds[0]))) {
+ SANDBOX_DIE("close() failed");
+ }
+
+ return rc;
+}
+
+bool Sandbox::kernelSupportSeccompBPF(int proc_fd) {
+#if defined(SECCOMP_BPF_VALGRIND_HACKS)
+ if (RUNNING_ON_VALGRIND) {
+ // Valgrind doesn't like our run-time test. Disable testing and assume we
+ // always support sandboxing. This feature should only ever be enabled when
+ // debugging.
+ return true;
+ }
+#endif
+
+ return
+ RunFunctionInPolicy(probeProcess, Sandbox::probeEvaluator, 0, proc_fd) &&
+ RunFunctionInPolicy(tryVsyscallProcess, Sandbox::allowAllEvaluator, 0,
+ proc_fd);
+}
+
+Sandbox::SandboxStatus Sandbox::supportsSeccompSandbox(int proc_fd) {
+ // It the sandbox is currently active, we clearly must have support for
+ // sandboxing.
+ if (status_ == STATUS_ENABLED) {
+ return status_;
+ }
+
+ // Even if the sandbox was previously available, something might have
+ // changed in our run-time environment. Check one more time.
+ if (status_ == STATUS_AVAILABLE) {
+ if (!isSingleThreaded(proc_fd)) {
+ status_ = STATUS_UNAVAILABLE;
+ }
+ return status_;
+ }
+
+ if (status_ == STATUS_UNAVAILABLE && isSingleThreaded(proc_fd)) {
+ // All state transitions resulting in STATUS_UNAVAILABLE are immediately
+ // preceded by STATUS_AVAILABLE. Furthermore, these transitions all
+ // happen, if and only if they are triggered by the process being multi-
+ // threaded.
+ // In other words, if a single-threaded process is currently in the
+ // STATUS_UNAVAILABLE state, it is safe to assume that sandboxing is
+ // actually available.
+ status_ = STATUS_AVAILABLE;
+ return status_;
+ }
+
+ // If we have not previously checked for availability of the sandbox or if
+ // we otherwise don't believe to have a good cached value, we have to
+ // perform a thorough check now.
+ if (status_ == STATUS_UNKNOWN) {
+ status_ = kernelSupportSeccompBPF(proc_fd)
+ ? STATUS_AVAILABLE : STATUS_UNSUPPORTED;
+
+ // As we are performing our tests from a child process, the run-time
+ // environment that is visible to the sandbox is always guaranteed to be
+ // single-threaded. Let's check here whether the caller is single-
+ // threaded. Otherwise, we mark the sandbox as temporarily unavailable.
+ if (status_ == STATUS_AVAILABLE && !isSingleThreaded(proc_fd)) {
+ status_ = STATUS_UNAVAILABLE;
+ }
+ }
+ return status_;
+}
+
+void Sandbox::setProcFd(int proc_fd) {
+ proc_fd_ = proc_fd;
+}
+
+void Sandbox::startSandboxInternal(bool quiet) {
+ if (status_ == STATUS_UNSUPPORTED || status_ == STATUS_UNAVAILABLE) {
+ SANDBOX_DIE("Trying to start sandbox, even though it is known to be "
+ "unavailable");
+ } else if (status_ == STATUS_ENABLED) {
+ SANDBOX_DIE("Cannot start sandbox recursively. Use multiple calls to "
+ "setSandboxPolicy() to stack policies instead");
+ }
+ if (proc_fd_ < 0) {
+ proc_fd_ = open("/proc", O_RDONLY|O_DIRECTORY);
+ }
+ if (proc_fd_ < 0) {
+ // For now, continue in degraded mode, if we can't access /proc.
+ // In the future, we might want to tighten this requirement.
+ }
+ if (!isSingleThreaded(proc_fd_)) {
+ SANDBOX_DIE("Cannot start sandbox, if process is already multi-threaded");
+ }
+
+ // We no longer need access to any files in /proc. We want to do this
+ // before installing the filters, just in case that our policy denies
+ // close().
+ if (proc_fd_ >= 0) {
+ if (HANDLE_EINTR(close(proc_fd_))) {
+ SANDBOX_DIE("Failed to close file descriptor for /proc");
+ }
+ proc_fd_ = -1;
+ }
+
+ // Install the filters.
+ installFilter(quiet);
+
+ // We are now inside the sandbox.
+ status_ = STATUS_ENABLED;
+}
+
+bool Sandbox::isSingleThreaded(int proc_fd) {
+ if (proc_fd < 0) {
+ // Cannot determine whether program is single-threaded. Hope for
+ // the best...
+ return true;
+ }
+
+ struct stat sb;
+ int task = -1;
+ if ((task = openat(proc_fd, "self/task", O_RDONLY|O_DIRECTORY)) < 0 ||
+ fstat(task, &sb) != 0 ||
+ sb.st_nlink != 3 ||
+ HANDLE_EINTR(close(task))) {
+ if (task >= 0) {
+ if (HANDLE_EINTR(close(task))) { }
+ }
+ return false;
+ }
+ return true;
+}
+
+bool Sandbox::isDenied(const ErrorCode& code) {
+ return (code.err() & SECCOMP_RET_ACTION) == SECCOMP_RET_TRAP ||
+ (code.err() >= (SECCOMP_RET_ERRNO + ErrorCode::ERR_MIN_ERRNO) &&
+ code.err() <= (SECCOMP_RET_ERRNO + ErrorCode::ERR_MAX_ERRNO));
+}
+
+void Sandbox::policySanityChecks(EvaluateSyscall syscallEvaluator,
+ void *aux) {
+ for (SyscallIterator iter(true); !iter.Done(); ) {
+ uint32_t sysnum = iter.Next();
+ if (!isDenied(syscallEvaluator(sysnum, aux))) {
+ SANDBOX_DIE("Policies should deny system calls that are outside the "
+ "expected range (typically MIN_SYSCALL..MAX_SYSCALL)");
+ }
+ }
+ return;
+}
+
+void Sandbox::setSandboxPolicy(EvaluateSyscall syscallEvaluator, void *aux) {
+ if (status_ == STATUS_ENABLED) {
+ SANDBOX_DIE("Cannot change policy after sandbox has started");
+ }
+ policySanityChecks(syscallEvaluator, aux);
+ evaluators_.push_back(std::make_pair(syscallEvaluator, aux));
+}
+
+void Sandbox::installFilter(bool quiet) {
+ // Verify that the user pushed a policy.
+ if (evaluators_.empty()) {
+ filter_failed:
+ SANDBOX_DIE("Failed to configure system call filters");
+ }
+
+ // Set new SIGSYS handler
+ struct sigaction sa;
+ memset(&sa, 0, sizeof(sa));
+ sa.sa_sigaction = &sigSys;
+ sa.sa_flags = SA_SIGINFO;
+ if (sigaction(SIGSYS, &sa, NULL) < 0) {
+ goto filter_failed;
+ }
+
+ // Unmask SIGSYS
+ sigset_t mask;
+ if (sigemptyset(&mask) ||
+ sigaddset(&mask, SIGSYS) ||
+ sigprocmask(SIG_UNBLOCK, &mask, NULL)) {
+ goto filter_failed;
+ }
+
+ // We can't handle stacked evaluators, yet. We'll get there eventually
+ // though. Hang tight.
+ if (evaluators_.size() != 1) {
+ SANDBOX_DIE("Not implemented");
+ }
+
+ // Assemble the BPF filter program.
+ CodeGen *gen = new CodeGen();
+ if (!gen) {
+ SANDBOX_DIE("Out of memory");
+ }
+
+ // If the architecture doesn't match SECCOMP_ARCH, disallow the
+ // system call.
+ Instruction *tail;
+ Instruction *head =
+ gen->MakeInstruction(BPF_LD+BPF_W+BPF_ABS,
+ offsetof(struct arch_seccomp_data, arch),
+ gen->MakeInstruction(BPF_JMP+BPF_JEQ+BPF_K, SECCOMP_ARCH,
+ tail =
+ // Grab the system call number, so that we can implement jump tables.
+ gen->MakeInstruction(BPF_LD+BPF_W+BPF_ABS,
+ offsetof(struct arch_seccomp_data, nr)),
+ gen->MakeInstruction(BPF_RET+BPF_K,
+ Kill(
+ "Invalid audit architecture in BPF filter").err_)));
+
+ // On Intel architectures, verify that system call numbers are in the
+ // expected number range. The older i386 and x86-64 APIs clear bit 30
+ // on all system calls. The newer x32 API always sets bit 30.
+#if defined(__i386__) || defined(__x86_64__)
+ Instruction *invalidX32 =
+ gen->MakeInstruction(BPF_RET+BPF_K,
+ Kill("Illegal mixing of system call ABIs").err_);
+ Instruction *checkX32 =
+#if defined(__x86_64__) && defined(__ILP32__)
+ gen->MakeInstruction(BPF_JMP+BPF_JSET+BPF_K, 0x40000000, 0, invalidX32);
+#else
+ gen->MakeInstruction(BPF_JMP+BPF_JSET+BPF_K, 0x40000000, invalidX32, 0);
+#endif
+ gen->JoinInstructions(tail, checkX32);
+ tail = checkX32;
+#endif
+
+
+ {
+ // Evaluate all possible system calls and group their ErrorCodes into
+ // ranges of identical codes.
+ Ranges ranges;
+ findRanges(&ranges);
+
+ // Compile the system call ranges to an optimized BPF jumptable
+ Instruction *jumptable =
+ assembleJumpTable(gen, ranges.begin(), ranges.end());
+
+ // Append jump table to our pre-amble
+ gen->JoinInstructions(tail, jumptable);
+ }
+
+ // Turn the DAG into a vector of instructions.
+ Program *program = new Program();
+ gen->Compile(head, program);
+ delete gen;
+
+ // Make sure compilation resulted in BPF program that executes
+ // correctly. Otherwise, there is an internal error in our BPF compiler.
+ // There is really nothing the caller can do until the bug is fixed.
+#ifndef NDEBUG
+ const char *err = NULL;
+ if (!Verifier::VerifyBPF(*program, evaluators_, &err)) {
+ SANDBOX_DIE(err);
+ }
+#endif
+
+ // We want to be very careful in not imposing any requirements on the
+ // policies that are set with setSandboxPolicy(). This means, as soon as
+ // the sandbox is active, we shouldn't be relying on libraries that could
+ // be making system calls. This, for example, means we should avoid
+ // using the heap and we should avoid using STL functions.
+ // Temporarily copy the contents of the "program" vector into a
+ // stack-allocated array; and then explicitly destroy that object.
+ // This makes sure we don't ex- or implicitly call new/delete after we
+ // installed the BPF filter program in the kernel. Depending on the
+ // system memory allocator that is in effect, these operators can result
+ // in system calls to things like munmap() or brk().
+ struct sock_filter bpf[program->size()];
+ const struct sock_fprog prog = {
+ static_cast<unsigned short>(program->size()), bpf };
+ memcpy(bpf, &(*program)[0], sizeof(bpf));
+ delete program;
+
+ // Release memory that is no longer needed
+ evaluators_.clear();
+ errMap_.clear();
+
+#if defined(SECCOMP_BPF_VALGRIND_HACKS)
+ // Valgrind is really not happy about our sandbox. Disable it when running
+ // in Valgrind. This feature is dangerous and should never be enabled by
+ // default. We protect it behind a pre-processor option.
+ if (!RUNNING_ON_VALGRIND)
+#endif
+ {
+ // Install BPF filter program
+ if (prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0)) {
+ SANDBOX_DIE(quiet ? NULL : "Kernel refuses to enable no-new-privs");
+ } else {
+ if (prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog)) {
+ SANDBOX_DIE(quiet ? NULL : "Kernel refuses to turn on BPF filters");
+ }
+ }
+ }
+
+ return;
+}
+
+void Sandbox::findRanges(Ranges *ranges) {
+ // Please note that "struct seccomp_data" defines system calls as a signed
+ // int32_t, but BPF instructions always operate on unsigned quantities. We
+ // deal with this disparity by enumerating from MIN_SYSCALL to MAX_SYSCALL,
+ // and then verifying that the rest of the number range (both positive and
+ // negative) all return the same ErrorCode.
+ EvaluateSyscall evaluateSyscall = evaluators_.begin()->first;
+ void *aux = evaluators_.begin()->second;
+ uint32_t oldSysnum = 0;
+ ErrorCode oldErr = evaluateSyscall(oldSysnum, aux);
+ ErrorCode invalidErr = evaluateSyscall(MIN_SYSCALL - 1, aux);
+ for (SyscallIterator iter(false); !iter.Done(); ) {
+ uint32_t sysnum = iter.Next();
+ ErrorCode err = evaluateSyscall(static_cast<int>(sysnum), aux);
+ if (!iter.IsValid(sysnum) && !invalidErr.Equals(err)) {
+ // A proper sandbox policy should always treat system calls outside of
+ // the range MIN_SYSCALL..MAX_SYSCALL (i.e. anything that returns
+ // "false" for SyscallIterator::IsValid()) identically. Typically, all
+ // of these system calls would be denied with the same ErrorCode.
+ SANDBOX_DIE("Invalid seccomp policy");
+ }
+ if (!err.Equals(oldErr) || iter.Done()) {
+ ranges->push_back(Range(oldSysnum, sysnum - 1, oldErr));
+ oldSysnum = sysnum;
+ oldErr = err;
+ }
+ }
+}
+
+Instruction *Sandbox::assembleJumpTable(CodeGen *gen,
+ Ranges::const_iterator start,
+ Ranges::const_iterator stop) {
+ // We convert the list of system call ranges into jump table that performs
+ // a binary search over the ranges.
+ // As a sanity check, we need to have at least one distinct ranges for us
+ // to be able to build a jump table.
+ if (stop - start <= 0) {
+ SANDBOX_DIE("Invalid set of system call ranges");
+ } else if (stop - start == 1) {
+ // If we have narrowed things down to a single range object, we can
+ // return from the BPF filter program.
+ return gen->MakeInstruction(BPF_RET+BPF_K, start->err);
+ }
+
+ // Pick the range object that is located at the mid point of our list.
+ // We compare our system call number against the lowest valid system call
+ // number in this range object. If our number is lower, it is outside of
+ // this range object. If it is greater or equal, it might be inside.
+ Ranges::const_iterator mid = start + (stop - start)/2;
+
+ // Sub-divide the list of ranges and continue recursively.
+ Instruction *jf = assembleJumpTable(gen, start, mid);
+ Instruction *jt = assembleJumpTable(gen, mid, stop);
+ return gen->MakeInstruction(BPF_JMP+BPF_JGE+BPF_K, mid->from, jt, jf);
+}
+
+void Sandbox::sigSys(int nr, siginfo_t *info, void *void_context) {
+ // Various sanity checks to make sure we actually received a signal
+ // triggered by a BPF filter. If something else triggered SIGSYS
+ // (e.g. kill()), there is really nothing we can do with this signal.
+ if (nr != SIGSYS || info->si_code != SYS_SECCOMP || !void_context ||
+ info->si_errno <= 0 ||
+ static_cast<size_t>(info->si_errno) > trapArraySize_) {
+ // SANDBOX_DIE() can call LOG(FATAL). This is not normally async-signal
+ // safe and can lead to bugs. We should eventually implement a different
+ // logging and reporting mechanism that is safe to be called from
+ // the sigSys() handler.
+ // TODO: If we feel confident that our code otherwise works correctly, we
+ // could actually make an argument that spurious SIGSYS should
+ // just get silently ignored. TBD
+ sigsys_err:
+ SANDBOX_DIE("Unexpected SIGSYS received");
+ }
+
+ // Signal handlers should always preserve "errno". Otherwise, we could
+ // trigger really subtle bugs.
+ int old_errno = errno;
+
+ // Obtain the signal context. This, most notably, gives us access to
+ // all CPU registers at the time of the signal.
+ ucontext_t *ctx = reinterpret_cast<ucontext_t *>(void_context);
+
+ // Obtain the siginfo information that is specific to SIGSYS. Unfortunately,
+ // most versions of glibc don't include this information in siginfo_t. So,
+ // we need to explicitly copy it into a arch_sigsys structure.
+ struct arch_sigsys sigsys;
+ memcpy(&sigsys, &info->_sifields, sizeof(sigsys));
+
+ // Some more sanity checks.
+ if (sigsys.ip != reinterpret_cast<void *>(SECCOMP_IP(ctx)) ||
+ sigsys.nr != static_cast<int>(SECCOMP_SYSCALL(ctx)) ||
+ sigsys.arch != SECCOMP_ARCH) {
+ goto sigsys_err;
+ }
+
+ // Copy the seccomp-specific data into a arch_seccomp_data structure. This
+ // is what we are showing to TrapFnc callbacks that the system call evaluator
+ // registered with the sandbox.
+ struct arch_seccomp_data data = {
+ sigsys.nr,
+ SECCOMP_ARCH,
+ reinterpret_cast<uint64_t>(sigsys.ip),
+ {
+ static_cast<uint64_t>(SECCOMP_PARM1(ctx)),
+ static_cast<uint64_t>(SECCOMP_PARM2(ctx)),
+ static_cast<uint64_t>(SECCOMP_PARM3(ctx)),
+ static_cast<uint64_t>(SECCOMP_PARM4(ctx)),
+ static_cast<uint64_t>(SECCOMP_PARM5(ctx)),
+ static_cast<uint64_t>(SECCOMP_PARM6(ctx))
+ }
+ };
+
+ // Now call the TrapFnc callback associated with this particular instance
+ // of SECCOMP_RET_TRAP.
+ const ErrorCode& err = trapArray_[info->si_errno - 1];
+ intptr_t rc = err.fnc_(data, err.aux_);
+
+ // Update the CPU register that stores the return code of the system call
+ // that we just handled, and restore "errno" to the value that it had
+ // before entering the signal handler.
+ SECCOMP_RESULT(ctx) = static_cast<greg_t>(rc);
+ errno = old_errno;
+
+ return;
+}
+
+ErrorCode Sandbox::Trap(ErrorCode::TrapFnc fnc, const void *aux) {
+ // Each unique pair of TrapFnc and auxiliary data make up a distinct instance
+ // of a SECCOMP_RET_TRAP.
+ std::pair<ErrorCode::TrapFnc, const void *> key(fnc, aux);
+ TrapIds::const_iterator iter = trapIds_.find(key);
+ uint16_t id;
+ if (iter != trapIds_.end()) {
+ // We have seen this pair before. Return the same id that we assigned
+ // earlier.
+ id = iter->second;
+ } else {
+ // This is a new pair. Remember it and assign a new id.
+ // Please note that we have to store traps in memory that doesn't get
+ // deallocated when the program is shutting down. A memory leak is
+ // intentional, because we might otherwise not be able to execute
+ // system calls part way through the program shutting down
+ if (!traps_) {
+ traps_ = new Traps();
+ }
+ if (traps_->size() >= SECCOMP_RET_DATA) {
+ // In practice, this is pretty much impossible to trigger, as there
+ // are other kernel limitations that restrict overall BPF program sizes.
+ SANDBOX_DIE("Too many SECCOMP_RET_TRAP callback instances");
+ }
+ id = traps_->size() + 1;
+
+ traps_->push_back(ErrorCode(fnc, aux, id));
+ trapIds_[key] = id;
+
+ // We want to access the traps_ vector from our signal handler. But
+ // we are not assured that doing so is async-signal safe. On the other
+ // hand, C++ guarantees that the contents of a vector is stored in a
+ // contiguous C-style array.
+ // So, we look up the address and size of this array outside of the
+ // signal handler, where we can safely do so.
+ trapArray_ = &(*traps_)[0];
+ trapArraySize_ = id;
+ }
+
+ ErrorCode err = ErrorCode(fnc, aux, id);
+ return errMap_[err.err()] = err;
+}
+
+intptr_t Sandbox::bpfFailure(const struct arch_seccomp_data&, void *aux) {
+ SANDBOX_DIE(static_cast<char *>(aux));
+}
+
+ErrorCode Sandbox::Kill(const char *msg) {
+ return Trap(bpfFailure, const_cast<char *>(msg));
+}
+
+Sandbox::SandboxStatus Sandbox::status_ = STATUS_UNKNOWN;
+int Sandbox::proc_fd_ = -1;
+Sandbox::Evaluators Sandbox::evaluators_;
+Sandbox::ErrMap Sandbox::errMap_;
+Sandbox::Traps *Sandbox::traps_ = NULL;
+Sandbox::TrapIds Sandbox::trapIds_;
+ErrorCode *Sandbox::trapArray_ = NULL;
+size_t Sandbox::trapArraySize_ = 0;
+
+} // namespace
diff --git a/sandbox/linux/seccomp-bpf/sandbox_bpf.h b/sandbox/linux/seccomp-bpf/sandbox_bpf.h
new file mode 100644
index 0000000..a50ddb3
--- /dev/null
+++ b/sandbox/linux/seccomp-bpf/sandbox_bpf.h
@@ -0,0 +1,345 @@
+// Copyright (c) 2012 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#ifndef SANDBOX_LINUX_SECCOMP_BPF_SANDBOX_BPF_H__
+#define SANDBOX_LINUX_SECCOMP_BPF_SANDBOX_BPF_H__
+
+#include <endian.h>
+#include <errno.h>
+#include <fcntl.h>
+#include <linux/audit.h>
+#include <linux/filter.h>
+// #include <linux/seccomp.h>
+#include <linux/unistd.h>
+#include <netinet/in.h>
+#include <netinet/tcp.h>
+#include <netinet/udp.h>
+#include <sched.h>
+#include <signal.h>
+#include <stddef.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/ioctl.h>
+#include <sys/ipc.h>
+#include <sys/mman.h>
+#include <sys/prctl.h>
+#include <sys/shm.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+#include <sys/uio.h>
+#include <sys/wait.h>
+#include <time.h>
+#include <unistd.h>
+
+#include <algorithm>
+#include <limits>
+#include <map>
+#include <utility>
+#include <vector>
+
+#ifndef SECCOMP_BPF_STANDALONE
+#include "base/basictypes.h"
+#include "base/eintr_wrapper.h"
+#include "base/logging.h"
+#endif
+
+#if defined(SECCOMP_BPF_VALGRIND_HACKS)
+#ifndef SECCOMP_BPF_STANDALONE
+#include "base/third_party/valgrind/valgrind.h"
+#endif
+#endif
+
+
+// The Seccomp2 kernel ABI is not part of older versions of glibc.
+// As we can't break compilation with these versions of the library,
+// we explicitly define all missing symbols.
+
+#ifndef PR_SET_NO_NEW_PRIVS
+#define PR_SET_NO_NEW_PRIVS 38
+#define PR_GET_NO_NEW_PRIVS 39
+#endif
+#ifndef IPC_64
+#define IPC_64 0x0100
+#endif
+#ifndef SECCOMP_MODE_FILTER
+#define SECCOMP_MODE_DISABLED 0
+#define SECCOMP_MODE_STRICT 1
+#define SECCOMP_MODE_FILTER 2 // User user-supplied filter
+#define SECCOMP_RET_KILL 0x00000000U // Kill the task immediately
+#define SECCOMP_RET_TRAP 0x00030000U // Disallow and force a SIGSYS
+#define SECCOMP_RET_ERRNO 0x00050000U // Returns an errno
+#define SECCOMP_RET_TRACE 0x7ff00000U // Pass to a tracer or disallow
+#define SECCOMP_RET_ALLOW 0x7fff0000U // Allow
+#define SECCOMP_RET_INVALID 0x8f8f8f8fU // Illegal return value
+#define SECCOMP_RET_ACTION 0xffff0000U // Masks for the return value
+#define SECCOMP_RET_DATA 0x0000ffffU // sections
+#endif
+#define SECCOMP_DENY_ERRNO EPERM
+#ifndef SYS_SECCOMP
+#define SYS_SECCOMP 1
+#endif
+
+// Impose some reasonable maximum BPF program size. Realistically, the
+// kernel probably has much lower limits. But by limiting to less than
+// 30 bits, we can ease requirements on some of our data types.
+#define SECCOMP_MAX_PROGRAM_SIZE (1<<30)
+
+#if defined(__i386__)
+#define MIN_SYSCALL 0u
+#define MAX_PUBLIC_SYSCALL 1024u
+#define MAX_SYSCALL MAX_PUBLIC_SYSCALL
+#define SECCOMP_ARCH AUDIT_ARCH_I386
+
+#define SECCOMP_REG(_ctx, _reg) ((_ctx)->uc_mcontext.gregs[(_reg)])
+#define SECCOMP_RESULT(_ctx) SECCOMP_REG(_ctx, REG_EAX)
+#define SECCOMP_SYSCALL(_ctx) SECCOMP_REG(_ctx, REG_EAX)
+#define SECCOMP_IP(_ctx) SECCOMP_REG(_ctx, REG_EIP)
+#define SECCOMP_PARM1(_ctx) SECCOMP_REG(_ctx, REG_EBX)
+#define SECCOMP_PARM2(_ctx) SECCOMP_REG(_ctx, REG_ECX)
+#define SECCOMP_PARM3(_ctx) SECCOMP_REG(_ctx, REG_EDX)
+#define SECCOMP_PARM4(_ctx) SECCOMP_REG(_ctx, REG_ESI)
+#define SECCOMP_PARM5(_ctx) SECCOMP_REG(_ctx, REG_EDI)
+#define SECCOMP_PARM6(_ctx) SECCOMP_REG(_ctx, REG_EBP)
+
+#elif defined(__x86_64__)
+#define MIN_SYSCALL 0u
+#define MAX_PUBLIC_SYSCALL 1024u
+#define MAX_SYSCALL MAX_PUBLIC_SYSCALL
+#define SECCOMP_ARCH AUDIT_ARCH_X86_64
+
+#define SECCOMP_REG(_ctx, _reg) ((_ctx)->uc_mcontext.gregs[(_reg)])
+#define SECCOMP_RESULT(_ctx) SECCOMP_REG(_ctx, REG_RAX)
+#define SECCOMP_SYSCALL(_ctx) SECCOMP_REG(_ctx, REG_RAX)
+#define SECCOMP_IP(_ctx) SECCOMP_REG(_ctx, REG_RIP)
+#define SECCOMP_PARM1(_ctx) SECCOMP_REG(_ctx, REG_RDI)
+#define SECCOMP_PARM2(_ctx) SECCOMP_REG(_ctx, REG_RSI)
+#define SECCOMP_PARM3(_ctx) SECCOMP_REG(_ctx, REG_RDX)
+#define SECCOMP_PARM4(_ctx) SECCOMP_REG(_ctx, REG_R10)
+#define SECCOMP_PARM5(_ctx) SECCOMP_REG(_ctx, REG_R8)
+#define SECCOMP_PARM6(_ctx) SECCOMP_REG(_ctx, REG_R9)
+
+#elif defined(__arm__) && (defined(__thumb__) || defined(__ARM_EABI__))
+// ARM EABI includes "ARM private" system calls starting at |__ARM_NR_BASE|,
+// and a "ghost syscall private to the kernel", cmpxchg,
+// at |__ARM_NR_BASE+0x00fff0|.
+// See </arch/arm/include/asm/unistd.h> in the Linux kernel.
+#define MIN_SYSCALL ((unsigned int)__NR_SYSCALL_BASE)
+#define MAX_PUBLIC_SYSCALL (MIN_SYSCALL + 1024u)
+#define MIN_PRIVATE_SYSCALL ((unsigned int)__ARM_NR_BASE)
+#define MAX_PRIVATE_SYSCALL (MIN_PRIVATE_SYSCALL + 16u)
+#define MIN_GHOST_SYSCALL ((unsigned int)__ARM_NR_BASE + 0xfff0u)
+#define MAX_SYSCALL (MIN_GHOST_SYSCALL + 4u)
+// <linux/audit.h> includes <linux/elf-em.h>, which does not define EM_ARM.
+// <linux/elf.h> only includes <asm/elf.h> if we're in the kernel.
+# if !defined(EM_ARM)
+# define EM_ARM 40
+# endif
+#define SECCOMP_ARCH AUDIT_ARCH_ARM
+
+// ARM sigcontext_t is different from i386/x86_64.
+// See </arch/arm/include/asm/sigcontext.h> in the Linux kernel.
+#define SECCOMP_REG(_ctx, _reg) ((_ctx)->uc_mcontext.arm_##_reg)
+// ARM EABI syscall convention.
+#define SECCOMP_RESULT(_ctx) SECCOMP_REG(_ctx, r0)
+#define SECCOMP_SYSCALL(_ctx) SECCOMP_REG(_ctx, r7)
+#define SECCOMP_IP(_ctx) SECCOMP_REG(_ctx, pc)
+#define SECCOMP_PARM1(_ctx) SECCOMP_REG(_ctx, r0)
+#define SECCOMP_PARM2(_ctx) SECCOMP_REG(_ctx, r1)
+#define SECCOMP_PARM3(_ctx) SECCOMP_REG(_ctx, r2)
+#define SECCOMP_PARM4(_ctx) SECCOMP_REG(_ctx, r3)
+#define SECCOMP_PARM5(_ctx) SECCOMP_REG(_ctx, r4)
+#define SECCOMP_PARM6(_ctx) SECCOMP_REG(_ctx, r5)
+
+#else
+#error Unsupported target platform
+
+#endif
+
+#if defined(SECCOMP_BPF_STANDALONE)
+#define arraysize(x) (sizeof(x)/sizeof(*(x)))
+#define HANDLE_EINTR TEMP_FAILURE_RETRY
+#define DISALLOW_IMPLICIT_CONSTRUCTORS(TypeName) \
+ TypeName(); \
+ TypeName(const TypeName&); \
+ void operator=(const TypeName&)
+#endif
+
+#include "sandbox/linux/seccomp-bpf/die.h"
+#include "sandbox/linux/seccomp-bpf/errorcode.h"
+
+namespace playground2 {
+
+struct arch_seccomp_data {
+ int nr;
+ uint32_t arch;
+ uint64_t instruction_pointer;
+ uint64_t args[6];
+};
+
+struct arch_sigsys {
+ void *ip;
+ int nr;
+ unsigned int arch;
+};
+
+class CodeGen;
+class SandboxUnittestHelper;
+struct Instruction;
+
+class Sandbox {
+ public:
+ enum SandboxStatus {
+ STATUS_UNKNOWN, // Status prior to calling supportsSeccompSandbox()
+ STATUS_UNSUPPORTED, // The kernel does not appear to support sandboxing
+ STATUS_UNAVAILABLE, // Currently unavailable but might work again later
+ STATUS_AVAILABLE, // Sandboxing is available but not currently active
+ STATUS_ENABLED // The sandbox is now active
+ };
+
+ // TrapFnc is a pointer to a function that handles Seccomp traps in
+ // user-space. The seccomp policy can request that a trap handler gets
+ // installed; it does so by returning a suitable ErrorCode() from the
+ // syscallEvaluator. See the ErrorCode() constructor for how to pass in
+ // the function pointer.
+ // Please note that TrapFnc is executed from signal context and must be
+ // async-signal safe:
+ // http://pubs.opengroup.org/onlinepubs/009695399/functions/xsh_chap02_04.html
+ typedef intptr_t (*TrapFnc)(const struct arch_seccomp_data& args, void *aux);
+
+ enum Operation {
+ OP_NOP, OP_EQUAL, OP_NOTEQUAL, OP_LESS,
+ OP_LESS_EQUAL, OP_GREATER, OP_GREATER_EQUAL,
+ OP_HAS_BITS, OP_DOES_NOT_HAVE_BITS
+ };
+
+ struct Constraint {
+ bool is32bit;
+ Operation op;
+ uint32_t value;
+ ErrorCode passed;
+ ErrorCode failed;
+ };
+
+ // When calling setSandboxPolicy(), the caller can provide an arbitrary
+ // pointer. This pointer will then be forwarded to the sandbox policy
+ // each time a call is made through an EvaluateSyscall function pointer.
+ // One common use case would be to pass the "aux" pointer as an argument
+ // to Trap() functions.
+ typedef ErrorCode (*EvaluateSyscall)(int sysnum, void *aux);
+ typedef std::vector<std::pair<EvaluateSyscall, void *> >Evaluators;
+
+ // Checks whether a particular system call number is valid on the current
+ // architecture. E.g. on ARM there's a non-contiguous range of private
+ // system calls.
+ static bool isValidSyscallNumber(int sysnum);
+
+ // There are a lot of reasons why the Seccomp sandbox might not be available.
+ // This could be because the kernel does not support Seccomp mode, or it
+ // could be because another sandbox is already active.
+ // "proc_fd" should be a file descriptor for "/proc", or -1 if not
+ // provided by the caller.
+ static SandboxStatus supportsSeccompSandbox(int proc_fd);
+
+ // The sandbox needs to be able to access files in "/proc/self". If this
+ // directory is not accessible when "startSandbox()" gets called, the caller
+ // can provide an already opened file descriptor by calling "setProcFd()".
+ // The sandbox becomes the new owner of this file descriptor and will
+ // eventually close it when "startSandbox()" executes.
+ static void setProcFd(int proc_fd);
+
+ // The system call evaluator function is called with the system
+ // call number. It can decide to allow the system call unconditionally
+ // by returning ERR_ALLOWED; it can deny the system call unconditionally by
+ // returning an appropriate "errno" value; or it can request inspection
+ // of system call argument(s) by returning a suitable ErrorCode.
+ // The "aux" parameter can be used to pass optional data to the system call
+ // evaluator. There are different possible uses for this data, but one of the
+ // use cases would be for the policy to then forward this pointer to a Trap()
+ // handler. In this case, of course, the data that is pointed to must remain
+ // valid for the entire time that Trap() handlers can be called; typically,
+ // this would be the lifetime of the program.
+ static void setSandboxPolicy(EvaluateSyscall syscallEvaluator, void *aux);
+
+ // We can use ErrorCode to request calling of a trap handler. This method
+ // performs the required wrapping of the callback function into an
+ // ErrorCode object.
+ // The "aux" field can carry a pointer to arbitrary data. See EvaluateSyscall
+ // for a description of how to pass data from setSandboxPolicy() to a Trap()
+ // handler.
+ static ErrorCode Trap(ErrorCode::TrapFnc fnc, const void *aux);
+
+ // Kill the program and print an error message.
+ static ErrorCode Kill(const char *msg);
+
+ // This is the main public entry point. It finds all system calls that
+ // need rewriting, sets up the resources needed by the sandbox, and
+ // enters Seccomp mode.
+ static void startSandbox() { startSandboxInternal(false); }
+
+ private:
+ friend class ErrorCode;
+ friend class CodeGen;
+ friend class SandboxUnittestHelper;
+ friend class Util;
+ friend class Verifier;
+
+ typedef std::vector<struct sock_filter> Program;
+
+ struct Range {
+ Range(uint32_t f, uint32_t t, const ErrorCode& e) :
+ from(f),
+ to(t),
+ err(e) {
+ }
+ uint32_t from, to;
+ ErrorCode err;
+ };
+ typedef std::vector<Range> Ranges;
+ typedef std::map<uint32_t, ErrorCode> ErrMap;
+ typedef std::vector<ErrorCode> Traps;
+ typedef std::map<std::pair<TrapFnc, const void *>, int> TrapIds;
+
+ // Get a file descriptor pointing to "/proc", if currently available.
+ static int proc_fd() { return proc_fd_; }
+
+ static ErrorCode probeEvaluator(int sysnum, void *) __attribute__((const));
+ static void probeProcess(void);
+ static ErrorCode allowAllEvaluator(int sysnum, void *aux);
+ static void tryVsyscallProcess(void);
+ static bool kernelSupportSeccompBPF(int proc_fd);
+ static bool RunFunctionInPolicy(void (*function)(),
+ EvaluateSyscall syscallEvaluator,
+ void *aux,
+ int proc_fd);
+ static void startSandboxInternal(bool quiet);
+ static bool isSingleThreaded(int proc_fd);
+ static bool isDenied(const ErrorCode& code);
+ static bool disableFilesystem();
+ static void policySanityChecks(EvaluateSyscall syscallEvaluator,
+ void *aux);
+ static void installFilter(bool quiet);
+ static void findRanges(Ranges *ranges);
+ static Instruction *assembleJumpTable(CodeGen *gen,
+ Ranges::const_iterator start,
+ Ranges::const_iterator stop);
+ static void sigSys(int nr, siginfo_t *info, void *void_context);
+ static intptr_t bpfFailure(const struct arch_seccomp_data& data, void *aux);
+ static int getTrapId(TrapFnc fnc, const void *aux);
+
+ static SandboxStatus status_;
+ static int proc_fd_;
+ static Evaluators evaluators_;
+ static ErrMap errMap_;
+ static Traps *traps_;
+ static TrapIds trapIds_;
+ static ErrorCode *trapArray_;
+ static size_t trapArraySize_;
+ DISALLOW_IMPLICIT_CONSTRUCTORS(Sandbox);
+};
+
+} // namespace
+
+#endif // SANDBOX_LINUX_SECCOMP_BPF_SANDBOX_BPF_H__
diff --git a/sandbox/linux/seccomp-bpf/sandbox_bpf_unittest.cc b/sandbox/linux/seccomp-bpf/sandbox_bpf_unittest.cc
new file mode 100644
index 0000000..8ea23d9
--- /dev/null
+++ b/sandbox/linux/seccomp-bpf/sandbox_bpf_unittest.cc
@@ -0,0 +1,267 @@
+// Copyright (c) 2012 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include <ostream>
+
+#include "sandbox/linux/seccomp-bpf/bpf_tests.h"
+#include "sandbox/linux/seccomp-bpf/verifier.h"
+#include "testing/gtest/include/gtest/gtest.h"
+
+using namespace playground2;
+
+namespace {
+
+const int kExpectedReturnValue = 42;
+
+// This test should execute no matter whether we have kernel support. So,
+// we make it a TEST() instead of a BPF_TEST().
+TEST(SandboxBpf, CallSupports) {
+ // We check that we don't crash, but it's ok if the kernel doesn't
+ // support it.
+ bool seccomp_bpf_supported =
+ Sandbox::supportsSeccompSandbox(-1) == Sandbox::STATUS_AVAILABLE;
+ // We want to log whether or not seccomp BPF is actually supported
+ // since actual test coverage depends on it.
+ RecordProperty("SeccompBPFSupported",
+ seccomp_bpf_supported ? "true." : "false.");
+ std::cout << "Seccomp BPF supported: "
+ << (seccomp_bpf_supported ? "true." : "false.")
+ << "\n";
+}
+
+SANDBOX_TEST(SandboxBpf, CallSupportsTwice) {
+ Sandbox::supportsSeccompSandbox(-1);
+ Sandbox::supportsSeccompSandbox(-1);
+}
+
+// BPF_TEST does a lot of the boiler-plate code around setting up a
+// policy and optional passing data between the caller, the policy and
+// any Trap() handlers. This is great for writing short and concise tests,
+// and it helps us accidentally forgetting any of the crucial steps in
+// setting up the sandbox. But it wouldn't hurt to have at least one test
+// that explicitly walks through all these steps.
+
+intptr_t FakeGetPid(const struct arch_seccomp_data& args, void *aux) {
+ BPF_ASSERT(aux);
+ pid_t *pid_ptr = static_cast<pid_t *>(aux);
+ return (*pid_ptr)++;
+}
+
+ErrorCode VerboseAPITestingPolicy(int sysno, void *aux) {
+ if (!Sandbox::isValidSyscallNumber(sysno)) {
+ return ErrorCode(ENOSYS);
+ } else if (sysno == __NR_getpid) {
+ return Sandbox::Trap(FakeGetPid, aux);
+ } else {
+ return ErrorCode(ErrorCode::ERR_ALLOWED);
+ }
+}
+
+SANDBOX_TEST(SandboxBpf, VerboseAPITesting) {
+ if (Sandbox::supportsSeccompSandbox(-1) ==
+ playground2::Sandbox::STATUS_AVAILABLE) {
+ pid_t test_var = 0;
+ playground2::Sandbox::setSandboxPolicy(VerboseAPITestingPolicy, &test_var);
+ playground2::Sandbox::startSandbox();
+
+ BPF_ASSERT(test_var == 0);
+ BPF_ASSERT(syscall(__NR_getpid) == 0);
+ BPF_ASSERT(test_var == 1);
+ BPF_ASSERT(syscall(__NR_getpid) == 1);
+ BPF_ASSERT(test_var == 2);
+
+ // N.B.: Any future call to getpid() would corrupt the stack.
+ // This is OK. The SANDBOX_TEST() macro is guaranteed to
+ // only ever call _exit() after the test completes.
+ }
+}
+
+// A simple blacklist test
+
+ErrorCode BlacklistNanosleepPolicy(int sysno, void *) {
+ if (!Sandbox::isValidSyscallNumber(sysno)) {
+ // FIXME: we should really not have to do that in a trivial policy
+ return ErrorCode(ENOSYS);
+ }
+
+ switch (sysno) {
+ case __NR_nanosleep:
+ return ErrorCode(EACCES);
+ default:
+ return ErrorCode(ErrorCode::ERR_ALLOWED);
+ }
+}
+
+BPF_TEST(SandboxBpf, ApplyBasicBlacklistPolicy, BlacklistNanosleepPolicy) {
+ // nanosleep() should be denied
+ const struct timespec ts = {0, 0};
+ errno = 0;
+ BPF_ASSERT(syscall(__NR_nanosleep, &ts, NULL) == -1);
+ BPF_ASSERT(errno == EACCES);
+}
+
+// Now do a simple whitelist test
+
+ErrorCode WhitelistGetpidPolicy(int sysno, void *) {
+ switch (sysno) {
+ case __NR_getpid:
+ case __NR_exit_group:
+ return ErrorCode(ErrorCode::ERR_ALLOWED);
+ default:
+ return ErrorCode(ENOMEM);
+ }
+}
+
+BPF_TEST(SandboxBpf, ApplyBasicWhitelistPolicy, WhitelistGetpidPolicy) {
+ // getpid() should be allowed
+ errno = 0;
+ BPF_ASSERT(syscall(__NR_getpid) > 0);
+ BPF_ASSERT(errno == 0);
+
+ // getpgid() should be denied
+ BPF_ASSERT(getpgid(0) == -1);
+ BPF_ASSERT(errno == ENOMEM);
+}
+
+// A simple blacklist policy, with a SIGSYS handler
+
+intptr_t EnomemHandler(const struct arch_seccomp_data& args, void *aux) {
+ // We also check that the auxiliary data is correct
+ SANDBOX_ASSERT(aux);
+ *(static_cast<int*>(aux)) = kExpectedReturnValue;
+ return -ENOMEM;
+}
+
+ErrorCode BlacklistNanosleepPolicySigsys(int sysno, void *aux) {
+ if (!Sandbox::isValidSyscallNumber(sysno)) {
+ // FIXME: we should really not have to do that in a trivial policy
+ return ErrorCode(ENOSYS);
+ }
+
+ switch (sysno) {
+ case __NR_nanosleep:
+ return Sandbox::Trap(EnomemHandler, aux);
+ default:
+ return ErrorCode(ErrorCode::ERR_ALLOWED);
+ }
+}
+
+BPF_TEST(SandboxBpf, BasicBlacklistWithSigsys,
+ BlacklistNanosleepPolicySigsys, int /* BPF_AUX */) {
+ // getpid() should work properly
+ errno = 0;
+ BPF_ASSERT(syscall(__NR_getpid) > 0);
+ BPF_ASSERT(errno == 0);
+
+ // Our Auxiliary Data, should be reset by the signal handler
+ BPF_AUX = -1;
+ const struct timespec ts = {0, 0};
+ BPF_ASSERT(syscall(__NR_nanosleep, &ts, NULL) == -1);
+ BPF_ASSERT(errno == ENOMEM);
+
+ // We expect the signal handler to modify AuxData
+ BPF_ASSERT(BPF_AUX == kExpectedReturnValue);
+}
+
+// A more complex, but synthetic policy. This tests the correctness of the BPF
+// program by iterating through all syscalls and checking for an errno that
+// depends on the syscall number. Unlike the Verifier, this exercises the BPF
+// interpreter in the kernel.
+
+// We try to make sure we exercise optimizations in the BPF compiler. We make
+// sure that the compiler can have an opportunity to coalesce syscalls with
+// contiguous numbers and we also make sure that disjoint sets can return the
+// same errno.
+int SysnoToRandomErrno(int sysno) {
+ // Small contiguous sets of 3 system calls return an errno equal to the
+ // index of that set + 1 (so that we never return a NUL errno).
+ return ((sysno & ~3) >> 2) % 29 + 1;
+}
+
+ErrorCode SyntheticPolicy(int sysno, void *) {
+ if (!Sandbox::isValidSyscallNumber(sysno)) {
+ // FIXME: we should really not have to do that in a trivial policy
+ return ErrorCode(ENOSYS);
+ }
+
+// TODO(jorgelo): remove this once the new code generator lands.
+#if defined(__arm__)
+ if (sysno > static_cast<int>(MAX_PUBLIC_SYSCALL)) {
+ return ErrorCode(ENOSYS);
+ }
+#endif
+
+ if (sysno == __NR_exit_group || sysno == __NR_write) {
+ // exit_group() is special, we really need it to work.
+ // write() is needed for BPF_ASSERT() to report a useful error message.
+ return ErrorCode(ErrorCode::ERR_ALLOWED);
+ } else {
+ return ErrorCode(SysnoToRandomErrno(sysno));
+ }
+}
+
+BPF_TEST(SandboxBpf, SyntheticPolicy, SyntheticPolicy) {
+ // Ensure that that kExpectedReturnValue + syscallnumber + 1 does not int
+ // overflow.
+ BPF_ASSERT(
+ std::numeric_limits<int>::max() - kExpectedReturnValue - 1 >=
+ static_cast<int>(MAX_PUBLIC_SYSCALL));
+
+ for (int syscall_number = static_cast<int>(MIN_SYSCALL);
+ syscall_number <= static_cast<int>(MAX_PUBLIC_SYSCALL);
+ ++syscall_number) {
+ if (syscall_number == __NR_exit_group ||
+ syscall_number == __NR_write) {
+ // exit_group() is special
+ continue;
+ }
+ errno = 0;
+ BPF_ASSERT(syscall(syscall_number) == -1);
+ BPF_ASSERT(errno == SysnoToRandomErrno(syscall_number));
+ }
+}
+
+#if defined(__arm__)
+// A simple policy that tests whether ARM private system calls are supported
+// by our BPF compiler and by the BPF interpreter in the kernel.
+
+// For ARM private system calls, return an errno equal to their offset from
+// MIN_PRIVATE_SYSCALL plus 1 (to avoid NUL errno).
+int ArmPrivateSysnoToErrno(int sysno) {
+ if (sysno >= static_cast<int>(MIN_PRIVATE_SYSCALL) &&
+ sysno <= static_cast<int>(MAX_PRIVATE_SYSCALL)) {
+ return (sysno - MIN_PRIVATE_SYSCALL) + 1;
+ } else {
+ return ENOSYS;
+ }
+}
+
+ErrorCode ArmPrivatePolicy(int sysno, void *) {
+ if (!Sandbox::isValidSyscallNumber(sysno)) {
+ // FIXME: we should really not have to do that in a trivial policy.
+ return ErrorCode(ENOSYS);
+ }
+
+ // Start from |__ARM_NR_set_tls + 1| so as not to mess with actual
+ // ARM private system calls.
+ if (sysno >= static_cast<int>(__ARM_NR_set_tls + 1) &&
+ sysno <= static_cast<int>(MAX_PRIVATE_SYSCALL)) {
+ return ErrorCode(ArmPrivateSysnoToErrno(sysno));
+ } else {
+ return ErrorCode(ErrorCode::ERR_ALLOWED);
+ }
+}
+
+BPF_TEST(SandboxBpf, ArmPrivatePolicy, ArmPrivatePolicy) {
+ for (int syscall_number = static_cast<int>(__ARM_NR_set_tls + 1);
+ syscall_number <= static_cast<int>(MAX_PRIVATE_SYSCALL);
+ ++syscall_number) {
+ errno = 0;
+ BPF_ASSERT(syscall(syscall_number) == -1);
+ BPF_ASSERT(errno == ArmPrivateSysnoToErrno(syscall_number));
+ }
+}
+#endif // defined(__arm__)
+
+} // namespace
diff --git a/sandbox/linux/seccomp-bpf/syscall_iterator.cc b/sandbox/linux/seccomp-bpf/syscall_iterator.cc
new file mode 100644
index 0000000..583dcf6
--- /dev/null
+++ b/sandbox/linux/seccomp-bpf/syscall_iterator.cc
@@ -0,0 +1,91 @@
+// Copyright (c) 2012 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "sandbox/linux/seccomp-bpf/sandbox_bpf.h"
+#include "sandbox/linux/seccomp-bpf/syscall_iterator.h"
+
+namespace playground2 {
+
+uint32_t SyscallIterator::Next() {
+ if (done_) {
+ return num_;
+ }
+
+ uint32_t val;
+ do {
+ // |num_| has been initialized to 0, which we assume is also MIN_SYSCALL.
+ // This true for supported architectures (Intel and ARM EABI).
+ CHECK_EQ(MIN_SYSCALL, 0u);
+ val = num_;
+
+ // First we iterate up to MAX_PUBLIC_SYSCALL, which is equal to MAX_SYSCALL
+ // on Intel architectures, but leaves room for private syscalls on ARM.
+ if (num_ <= MAX_PUBLIC_SYSCALL) {
+ if (invalid_only_ && num_ < MAX_PUBLIC_SYSCALL) {
+ num_ = MAX_PUBLIC_SYSCALL;
+ } else {
+ ++num_;
+ }
+#if defined(__arm__)
+ // ARM EABI includes "ARM private" system calls starting at
+ // MIN_PRIVATE_SYSCALL, and a "ghost syscall private to the kernel" at
+ // MIN_GHOST_SYSCALL.
+ } else if (num_ < MIN_PRIVATE_SYSCALL - 1) {
+ num_ = MIN_PRIVATE_SYSCALL - 1;
+ } else if (num_ <= MAX_PRIVATE_SYSCALL) {
+ if (invalid_only_ && num_ < MAX_PRIVATE_SYSCALL) {
+ num_ = MAX_PRIVATE_SYSCALL;
+ } else {
+ ++num_;
+ }
+ } else if (num_ < MIN_GHOST_SYSCALL - 1) {
+ num_ = MIN_GHOST_SYSCALL - 1;
+ } else if (num_ <= MAX_SYSCALL) {
+ if (invalid_only_ && num_ < MAX_SYSCALL) {
+ num_ = MAX_SYSCALL;
+ } else {
+ ++num_;
+ }
+#endif
+ // BPF programs only ever operate on unsigned quantities. So, that's how
+ // we iterate; we return values from 0..0xFFFFFFFFu. But there are places,
+ // where the kernel might interpret system call numbers as signed
+ // quantities, so the boundaries between signed and unsigned values are
+ // potential problem cases. We want to explicitly return these values from
+ // our iterator.
+ } else if (num_ < 0x7FFFFFFFu) {
+ num_ = 0x7FFFFFFFu;
+ } else if (num_ < 0x80000000u) {
+ num_ = 0x80000000u;
+ } else if (num_ < 0xFFFFFFFFu) {
+ num_ = 0xFFFFFFFFu;
+ }
+ } while (invalid_only_ && IsValid(val));
+
+ done_ |= val == 0xFFFFFFFFu;
+ return val;
+}
+
+bool SyscallIterator::IsValid(uint32_t num) {
+ uint32_t min_syscall = MIN_SYSCALL;
+ if (num >= min_syscall && num <= MAX_PUBLIC_SYSCALL) {
+ return true;
+ }
+ if (IsArmPrivate(num)) {
+ return true;
+ }
+ return false;
+}
+
+bool SyscallIterator::IsArmPrivate(uint32_t num) {
+#if defined(__arm__) && (defined(__thumb__) || defined(__ARM_EABI__))
+ return (num >= MIN_PRIVATE_SYSCALL && num <= MAX_PRIVATE_SYSCALL) ||
+ (num >= MIN_GHOST_SYSCALL && num <= MAX_SYSCALL);
+#else
+ return false;
+#endif
+}
+
+} // namespace
+
diff --git a/sandbox/linux/seccomp-bpf/syscall_iterator.h b/sandbox/linux/seccomp-bpf/syscall_iterator.h
new file mode 100644
index 0000000..39568d8
--- /dev/null
+++ b/sandbox/linux/seccomp-bpf/syscall_iterator.h
@@ -0,0 +1,58 @@
+// Copyright (c) 2012 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#ifndef SANDBOX_LINUX_SECCOMP_BPF_SYSCALL_ITERATOR_H__
+#define SANDBOX_LINUX_SECCOMP_BPF_SYSCALL_ITERATOR_H__
+
+#include <stdint.h>
+
+#include <base/logging.h>
+
+namespace playground2 {
+
+// Iterates over the entire system call range from 0..0xFFFFFFFFu. This
+// iterator is aware of how system calls look like and will skip quickly
+// over ranges that can't contain system calls. It iterates more slowly
+// whenever it reaches a range that is potentially problematic, returning
+// the last invalid value before a valid range of system calls, and the
+// first invalid value after a valid range of syscalls. It iterates over
+// individual values whenever it is in the normal range for system calls
+// (typically MIN_SYSCALL..MAX_SYSCALL).
+// If |invalid_only| is true, this iterator will only return invalid
+// syscall numbers, but will still skip quickly over invalid ranges,
+// returning the first invalid value in the range and then skipping
+// to the last invalid value in the range.
+//
+// Example usage:
+// for (SyscallIterator iter(false); !iter.Done(); ) {
+// uint32_t sysnum = iter.Next();
+// // Do something with sysnum.
+// }
+//
+// TODO(markus): Make this a classic C++ iterator.
+class SyscallIterator {
+ public:
+ explicit SyscallIterator(bool invalid_only)
+ : invalid_only_(invalid_only),
+ done_(false),
+ num_(0) {}
+
+ bool Done() const { return done_; }
+ uint32_t Next();
+ static bool IsValid(uint32_t num);
+
+ private:
+ static bool IsArmPrivate(uint32_t num);
+
+ bool invalid_only_;
+ bool done_;
+ uint32_t num_;
+
+ DISALLOW_COPY_AND_ASSIGN(SyscallIterator);
+};
+
+} // namespace playground2
+
+#endif // SANDBOX_LINUX_SECCOMP_BPF_SYSCALL_ITERATOR_H__
+
diff --git a/sandbox/linux/seccomp-bpf/syscall_iterator_unittest.cc b/sandbox/linux/seccomp-bpf/syscall_iterator_unittest.cc
new file mode 100644
index 0000000..26f11ce
--- /dev/null
+++ b/sandbox/linux/seccomp-bpf/syscall_iterator_unittest.cc
@@ -0,0 +1,135 @@
+// Copyright (c) 2012 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "sandbox/linux/seccomp-bpf/sandbox_bpf.h"
+#include "sandbox/linux/seccomp-bpf/syscall_iterator.h"
+#include "sandbox/linux/tests/unit_tests.h"
+
+using namespace playground2;
+
+namespace {
+
+SANDBOX_TEST(SyscallIterator, Monotonous) {
+ for (int i = 0; i < 2; ++i) {
+ bool invalid_only = !i; // Testing both |invalid_only| cases.
+ SyscallIterator iter(invalid_only);
+ uint32_t next = iter.Next();
+
+ if (!invalid_only) {
+ // The iterator should start at 0.
+ SANDBOX_ASSERT(next == 0);
+ }
+ for (uint32_t last = next; !iter.Done(); last = next) {
+ next = iter.Next();
+ SANDBOX_ASSERT(last < next);
+ }
+ // The iterator should always return 0xFFFFFFFFu as the last value.
+ SANDBOX_ASSERT(next == 0xFFFFFFFFu);
+ }
+}
+
+SANDBOX_TEST(SyscallIterator, PublicSyscallRange) {
+ SyscallIterator iter(false);
+ uint32_t next = iter.Next();
+
+ // The iterator should cover the public syscall range
+ // MIN_SYSCALL..MAX_PUBLIC_SYSCALL, without skipping syscalls.
+ // We're assuming MIN_SYSCALL == 0 for all architectures,
+ // this is currently valid for Intel and ARM EABI.
+ SANDBOX_ASSERT(MIN_SYSCALL == 0);
+ SANDBOX_ASSERT(next == MIN_SYSCALL);
+ for (uint32_t last = next; next < MAX_PUBLIC_SYSCALL + 1; last = next) {
+ SANDBOX_ASSERT((next = iter.Next()) == last + 1);
+ }
+ SANDBOX_ASSERT(next == MAX_PUBLIC_SYSCALL + 1);
+}
+
+#if defined(__arm__)
+SANDBOX_TEST(SyscallIterator, ARMPrivateSyscallRange) {
+ SyscallIterator iter(false);
+ uint32_t next = iter.Next();
+ while (next < MIN_PRIVATE_SYSCALL - 1) {
+ next = iter.Next();
+ }
+ // The iterator should cover the ARM private syscall range
+ // without skipping syscalls.
+ SANDBOX_ASSERT(next == MIN_PRIVATE_SYSCALL - 1);
+ for (uint32_t last = next; next < MAX_PRIVATE_SYSCALL + 1; last = next) {
+ SANDBOX_ASSERT((next = iter.Next()) == last + 1);
+ }
+ SANDBOX_ASSERT(next == MAX_PRIVATE_SYSCALL + 1);
+}
+
+SANDBOX_TEST(SyscallIterator, ARMHiddenSyscallRange) {
+ SyscallIterator iter(false);
+ uint32_t next = iter.Next();
+ while (next < MIN_GHOST_SYSCALL - 1) {
+ next = iter.Next();
+ }
+ // The iterator should cover the ARM hidden syscall range
+ // without skipping syscalls.
+ SANDBOX_ASSERT(next == MIN_GHOST_SYSCALL - 1);
+ for (uint32_t last = next; next < MAX_SYSCALL + 1; last = next) {
+ SANDBOX_ASSERT((next = iter.Next()) == last + 1);
+ }
+ SANDBOX_ASSERT(next == MAX_SYSCALL + 1);
+}
+#endif
+
+SANDBOX_TEST(SyscallIterator, Invalid) {
+ for (int i = 0; i < 2; ++i) {
+ bool invalid_only = !i; // Testing both |invalid_only| cases.
+ SyscallIterator iter(invalid_only);
+ uint32_t next = iter.Next();
+
+ while (next < MAX_SYSCALL + 1) {
+ next = iter.Next();
+ }
+
+ SANDBOX_ASSERT(next == MAX_SYSCALL + 1);
+ while (next < 0x7FFFFFFFu) {
+ next = iter.Next();
+ }
+
+ // The iterator should return the signed/unsigned corner cases.
+ SANDBOX_ASSERT(next == 0x7FFFFFFFu);
+ next = iter.Next();
+ SANDBOX_ASSERT(next == 0x80000000u);
+ SANDBOX_ASSERT(!iter.Done());
+ next = iter.Next();
+ SANDBOX_ASSERT(iter.Done());
+ SANDBOX_ASSERT(next == 0xFFFFFFFFu);
+ }
+}
+
+SANDBOX_TEST(SyscallIterator, InvalidOnly) {
+ bool invalid_only = true;
+ SyscallIterator iter(invalid_only);
+ uint32_t next = iter.Next();
+ // We're assuming MIN_SYSCALL == 0 for all architectures,
+ // this is currently valid for Intel and ARM EABI.
+ // First invalid syscall should then be |MAX_PUBLIC_SYSCALL + 1|.
+ SANDBOX_ASSERT(MIN_SYSCALL == 0);
+ SANDBOX_ASSERT(next == MAX_PUBLIC_SYSCALL + 1);
+
+#if defined(__arm__)
+ next = iter.Next();
+ // The iterator should skip until the last invalid syscall in this range.
+ SANDBOX_ASSERT(next == MIN_PRIVATE_SYSCALL - 1);
+ while (next <= MAX_PRIVATE_SYSCALL) {
+ next = iter.Next();
+ }
+
+ next = iter.Next();
+ // The iterator should skip until the last invalid syscall in this range.
+ SANDBOX_ASSERT(next == MIN_GHOST_SYSCALL - 1);
+ while (next <= MAX_SYSCALL) {
+ next = iter.Next();
+ }
+ SANDBOX_ASSERT(next == MAX_SYSCALL + 1);
+#endif
+}
+
+} // namespace
+
diff --git a/sandbox/linux/seccomp-bpf/util.cc b/sandbox/linux/seccomp-bpf/util.cc
new file mode 100644
index 0000000..904a169
--- /dev/null
+++ b/sandbox/linux/seccomp-bpf/util.cc
@@ -0,0 +1,164 @@
+// Copyright (c) 2012 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include <dirent.h>
+#include <errno.h>
+#include <fcntl.h>
+#include <stdarg.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/socket.h>
+#include <sys/types.h>
+#include <unistd.h>
+
+#include "sandbox/linux/seccomp-bpf/sandbox_bpf.h"
+#include "sandbox/linux/seccomp-bpf/util.h"
+
+namespace playground2 {
+
+bool Util::sendFds(int transport, const void *buf, size_t len, ...) {
+ int count = 0;
+ va_list ap;
+ va_start(ap, len);
+ while (va_arg(ap, int) >= 0) {
+ ++count;
+ }
+ va_end(ap);
+ if (!count) {
+ return false;
+ }
+ char cmsg_buf[CMSG_SPACE(count*sizeof(int))];
+ memset(cmsg_buf, 0, sizeof(cmsg_buf));
+ struct iovec iov[2] = { { 0 } };
+ struct msghdr msg = { 0 };
+ int dummy = 0;
+ iov[0].iov_base = &dummy;
+ iov[0].iov_len = sizeof(dummy);
+ if (buf && len > 0) {
+ iov[1].iov_base = const_cast<void *>(buf);
+ iov[1].iov_len = len;
+ }
+ msg.msg_iov = iov;
+ msg.msg_iovlen = (buf && len > 0) ? 2 : 1;
+ msg.msg_control = cmsg_buf;
+ msg.msg_controllen = CMSG_LEN(count*sizeof(int));
+ struct cmsghdr *cmsg = CMSG_FIRSTHDR(&msg);
+ cmsg->cmsg_level = SOL_SOCKET;
+ cmsg->cmsg_type = SCM_RIGHTS;
+ cmsg->cmsg_len = CMSG_LEN(count*sizeof(int));
+ va_start(ap, len);
+ for (int i = 0, fd; (fd = va_arg(ap, int)) >= 0; ++i) {
+ (reinterpret_cast<int *>(CMSG_DATA(cmsg)))[i] = fd;
+ }
+ return sendmsg(transport, &msg, 0) ==
+ static_cast<ssize_t>(sizeof(dummy) + ((buf && len > 0) ? len : 0));
+}
+
+bool Util::getFds(int transport, void *buf, size_t *len, ...) {
+ int count = 0;
+ va_list ap;
+ va_start(ap, len);
+ for (int *fd; (fd = va_arg(ap, int *)) != NULL; ++count) {
+ *fd = -1;
+ }
+ va_end(ap);
+ if (!count) {
+ return false;
+ }
+ char cmsg_buf[CMSG_SPACE(count*sizeof(int))];
+ memset(cmsg_buf, 0, sizeof(cmsg_buf));
+ struct iovec iov[2] = { { 0 } };
+ struct msghdr msg = { 0 };
+ int err;
+ iov[0].iov_base = &err;
+ iov[0].iov_len = sizeof(int);
+ if (buf && len && *len > 0) {
+ iov[1].iov_base = buf;
+ iov[1].iov_len = *len;
+ }
+ msg.msg_iov = iov;
+ msg.msg_iovlen = (buf && len && *len > 0) ? 2 : 1;
+ msg.msg_control = cmsg_buf;
+ msg.msg_controllen = CMSG_LEN(count*sizeof(int));
+ ssize_t bytes = recvmsg(transport, &msg, 0);
+ if (len) {
+ *len = bytes > static_cast<int>(sizeof(int)) ? bytes - sizeof(int) : 0;
+ }
+ if (bytes != static_cast<ssize_t>(sizeof(int) + iov[1].iov_len)) {
+ if (bytes >= 0) {
+ errno = 0;
+ }
+ return false;
+ }
+ if (err) {
+ // "err" is the first four bytes of the payload. If these are non-zero,
+ // the sender on the other side of the socketpair sent us an errno value.
+ // We don't expect to get any file handles in this case.
+ errno = err;
+ return false;
+ }
+ struct cmsghdr *cmsg = CMSG_FIRSTHDR(&msg);
+ if ((msg.msg_flags & (MSG_TRUNC|MSG_CTRUNC)) ||
+ !cmsg ||
+ cmsg->cmsg_level != SOL_SOCKET ||
+ cmsg->cmsg_type != SCM_RIGHTS ||
+ cmsg->cmsg_len != CMSG_LEN(count*sizeof(int))) {
+ errno = EBADF;
+ return false;
+ }
+ va_start(ap, len);
+ for (int *fd, i = 0; (fd = va_arg(ap, int *)) != NULL; ++i) {
+ *fd = (reinterpret_cast<int *>(CMSG_DATA(cmsg)))[i];
+ }
+ va_end(ap);
+ return true;
+}
+
+void Util::closeAllBut(int fd, ...) {
+ int proc_fd;
+ int fdir;
+ if ((proc_fd = Sandbox::proc_fd()) < 0 ||
+ (fdir = openat(proc_fd, "self/fd", O_RDONLY|O_DIRECTORY)) < 0) {
+ SANDBOX_DIE("Cannot access \"/proc/self/fd\"");
+ }
+ int dev_null = open("/dev/null", O_RDWR);
+ DIR *dir = fdopendir(fdir);
+ struct dirent de, *res;
+ while (!readdir_r(dir, &de, &res) && res) {
+ if (res->d_name[0] < '0') {
+ continue;
+ }
+ int i = atoi(res->d_name);
+ if (i >= 0 && i != dirfd(dir) && i != dev_null) {
+ va_list ap;
+ va_start(ap, fd);
+ for (int f = fd;; f = va_arg(ap, int)) {
+ if (f < 0) {
+ if (i <= 2) {
+ // Never ever close 0..2. If we cannot redirect to /dev/null,
+ // then we are better off leaving the standard descriptors open.
+ if (dev_null >= 0) {
+ if (HANDLE_EINTR(dup2(dev_null, i))) {
+ SANDBOX_DIE("Cannot dup2()");
+ }
+ }
+ } else {
+ if (HANDLE_EINTR(close(i))) { }
+ }
+ break;
+ } else if (i == f) {
+ break;
+ }
+ }
+ va_end(ap);
+ }
+ }
+ closedir(dir);
+ if (dev_null >= 0) {
+ if (HANDLE_EINTR(close(dev_null))) { }
+ }
+ return;
+}
+
+} // namespace
diff --git a/sandbox/linux/seccomp-bpf/util.h b/sandbox/linux/seccomp-bpf/util.h
new file mode 100644
index 0000000..3e4d41b
--- /dev/null
+++ b/sandbox/linux/seccomp-bpf/util.h
@@ -0,0 +1,19 @@
+// Copyright (c) 2012 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#ifndef SANDBOX_LINUX_SECCOMP_BPF_UTIL_H__
+#define SANDBOX_LINUX_SECCOMP_BPF_UTIL_H__
+
+namespace playground2 {
+
+class Util {
+ public:
+ static bool sendFds(int transport, const void *buf, size_t len, ...);
+ static bool getFds(int transport, void *buf, size_t *len, ...);
+ static void closeAllBut(int fd, ...);
+};
+
+} // namespace
+
+#endif // SANDBOX_LINUX_SECCOMP_BPF_UTIL_H__
diff --git a/sandbox/linux/seccomp-bpf/verifier.cc b/sandbox/linux/seccomp-bpf/verifier.cc
new file mode 100644
index 0000000..40a1aa2
--- /dev/null
+++ b/sandbox/linux/seccomp-bpf/verifier.cc
@@ -0,0 +1,168 @@
+// Copyright (c) 2012 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "sandbox/linux/seccomp-bpf/sandbox_bpf.h"
+#include "sandbox/linux/seccomp-bpf/syscall_iterator.h"
+#include "sandbox/linux/seccomp-bpf/verifier.h"
+
+
+namespace playground2 {
+
+bool Verifier::VerifyBPF(const std::vector<struct sock_filter>& program,
+ const Sandbox::Evaluators& evaluators,
+ const char **err) {
+ *err = NULL;
+ if (evaluators.size() != 1) {
+ *err = "Not implemented";
+ return false;
+ }
+ Sandbox::EvaluateSyscall evaluate_syscall = evaluators.begin()->first;
+ void *aux = evaluators.begin()->second;
+ for (SyscallIterator iter(false); !iter.Done(); ) {
+ uint32_t sysnum = iter.Next();
+ // We ideally want to iterate over the full system call range and values
+ // just above and just below this range. This gives us the full result set
+ // of the "evaluators".
+ // On Intel systems, this can fail in a surprising way, as a cleared bit 30
+ // indicates either i386 or x86-64; and a set bit 30 indicates x32. And
+ // unless we pay attention to setting this bit correctly, an early check in
+ // our BPF program will make us fail with a misleading error code.
+ struct arch_seccomp_data data = { static_cast<int>(sysnum),
+ static_cast<uint32_t>(SECCOMP_ARCH) };
+#if defined(__i386__) || defined(__x86_64__)
+#if defined(__x86_64__) && defined(__ILP32__)
+ if (!(sysnum & 0x40000000u)) {
+ continue;
+ }
+#else
+ if (sysnum & 0x40000000u) {
+ continue;
+ }
+#endif
+#endif
+ ErrorCode code = evaluate_syscall(sysnum, aux);
+ uint32_t computed_ret = EvaluateBPF(program, data, err);
+ if (*err) {
+ return false;
+ } else if (computed_ret != code.err()) {
+ *err = "Exit code from BPF program doesn't match";
+ return false;
+ }
+ }
+ return true;
+}
+
+uint32_t Verifier::EvaluateBPF(const std::vector<struct sock_filter>& program,
+ const struct arch_seccomp_data& data,
+ const char **err) {
+ *err = NULL;
+ if (program.size() < 1 || program.size() >= SECCOMP_MAX_PROGRAM_SIZE) {
+ *err = "Invalid program length";
+ return 0;
+ }
+ for (State state(program, data); !*err; ++state.ip) {
+ if (state.ip >= program.size()) {
+ *err = "Invalid instruction pointer in BPF program";
+ break;
+ }
+ const struct sock_filter& insn = program[state.ip];
+ switch (BPF_CLASS(insn.code)) {
+ case BPF_LD:
+ Ld(&state, insn, err);
+ break;
+ case BPF_JMP:
+ Jmp(&state, insn, err);
+ break;
+ case BPF_RET:
+ return Ret(&state, insn, err);
+ default:
+ *err = "Unexpected instruction in BPF program";
+ break;
+ }
+ }
+ return 0;
+}
+
+void Verifier::Ld(State *state, const struct sock_filter& insn,
+ const char **err) {
+ if (BPF_SIZE(insn.code) != BPF_W ||
+ BPF_MODE(insn.code) != BPF_ABS) {
+ *err = "Invalid BPF_LD instruction";
+ return;
+ }
+ if (insn.k < sizeof(struct arch_seccomp_data) && (insn.k & 3) == 0) {
+ // We only allow loading of properly aligned 32bit quantities.
+ memcpy(&state->accumulator,
+ reinterpret_cast<const char *>(&state->data) + insn.k,
+ 4);
+ } else {
+ *err = "Invalid operand in BPF_LD instruction";
+ return;
+ }
+ state->acc_is_valid = true;
+ return;
+}
+
+void Verifier::Jmp(State *state, const struct sock_filter& insn,
+ const char **err) {
+ if (BPF_OP(insn.code) == BPF_JA) {
+ if (state->ip + insn.k + 1 >= state->program.size() ||
+ state->ip + insn.k + 1 <= state->ip) {
+ compilation_failure:
+ *err = "Invalid BPF_JMP instruction";
+ return;
+ }
+ state->ip += insn.k;
+ } else {
+ if (BPF_SRC(insn.code) != BPF_K ||
+ !state->acc_is_valid ||
+ state->ip + insn.jt + 1 >= state->program.size() ||
+ state->ip + insn.jf + 1 >= state->program.size()) {
+ goto compilation_failure;
+ }
+ switch (BPF_OP(insn.code)) {
+ case BPF_JEQ:
+ if (state->accumulator == insn.k) {
+ state->ip += insn.jt;
+ } else {
+ state->ip += insn.jf;
+ }
+ break;
+ case BPF_JGT:
+ if (state->accumulator > insn.k) {
+ state->ip += insn.jt;
+ } else {
+ state->ip += insn.jf;
+ }
+ break;
+ case BPF_JGE:
+ if (state->accumulator >= insn.k) {
+ state->ip += insn.jt;
+ } else {
+ state->ip += insn.jf;
+ }
+ break;
+ case BPF_JSET:
+ if (state->accumulator & insn.k) {
+ state->ip += insn.jt;
+ } else {
+ state->ip += insn.jf;
+ }
+ break;
+ default:
+ goto compilation_failure;
+ }
+ }
+}
+
+uint32_t Verifier::Ret(State *, const struct sock_filter& insn,
+ const char **err) {
+ if (BPF_SRC(insn.code) != BPF_K) {
+ *err = "Invalid BPF_RET instruction";
+ return 0;
+ }
+ return insn.k;
+}
+
+} // namespace
diff --git a/sandbox/linux/seccomp-bpf/verifier.h b/sandbox/linux/seccomp-bpf/verifier.h
new file mode 100644
index 0000000..505015e
--- /dev/null
+++ b/sandbox/linux/seccomp-bpf/verifier.h
@@ -0,0 +1,75 @@
+// Copyright (c) 2012 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#ifndef SANDBOX_LINUX_SECCOMP_BPF_VERIFIER_H__
+#define SANDBOX_LINUX_SECCOMP_BPF_VERIFIER_H__
+
+#include <linux/filter.h>
+
+#include <utility>
+#include <vector>
+
+#include "sandbox/linux/seccomp-bpf/sandbox_bpf.h"
+
+
+namespace playground2 {
+
+class Verifier {
+ public:
+ // Evaluate the BPF program for all possible inputs and verify that it
+ // computes the correct result. We use the "evaluators" to determine
+ // the full set of possible inputs that we have to iterate over.
+ // Returns success, if the BPF filter accurately reflects the rules
+ // set by the "evaluators".
+ // Upon success, "err" is set to NULL. Upon failure, it contains a static
+ // error message that does not need to be free()'d.
+ static bool VerifyBPF(const std::vector<struct sock_filter>& program,
+ const Sandbox::Evaluators& evaluators,
+ const char **err);
+
+ // Evaluate a given BPF program for a particular set of system call
+ // parameters. If evaluation failed for any reason, "err" will be set to
+ // a non-NULL error string. Otherwise, the BPF program's result will be
+ // returned by the function and "err" is NULL.
+ // We do not actually implement the full BPF state machine, but only the
+ // parts that can actually be generated by our BPF compiler. If this code
+ // is used for purposes other than verifying the output of the sandbox's
+ // BPF compiler, we might have to extend this BPF interpreter.
+ static uint32_t EvaluateBPF(const std::vector<struct sock_filter>& program,
+ const struct arch_seccomp_data& data,
+ const char **err);
+
+ private:
+ struct State {
+ State(const std::vector<struct sock_filter>& p,
+ const struct arch_seccomp_data& d) :
+ program(p),
+ data(d),
+ ip(0),
+ accumulator(0),
+ acc_is_valid(false) {
+ }
+ const std::vector<struct sock_filter>& program;
+ const struct arch_seccomp_data& data;
+ unsigned int ip;
+ uint32_t accumulator;
+ bool acc_is_valid;
+
+ private:
+ DISALLOW_IMPLICIT_CONSTRUCTORS(State);
+ };
+
+ static void Ld (State *state, const struct sock_filter& insn,
+ const char **err);
+ static void Jmp(State *state, const struct sock_filter& insn,
+ const char **err);
+ static uint32_t Ret(State *state, const struct sock_filter& insn,
+ const char **err);
+
+ DISALLOW_IMPLICIT_CONSTRUCTORS(Verifier);
+};
+
+} // namespace
+
+#endif // SANDBOX_LINUX_SECCOMP_BPF_VERIFIER_H__
diff --git a/sandbox/linux/selinux/README b/sandbox/linux/selinux/README
new file mode 100644
index 0000000..f5428a3
--- /dev/null
+++ b/sandbox/linux/selinux/README
@@ -0,0 +1,12 @@
+This contains a basic and seemingly functional policy for Chromium. This policy
+was written on FC12 and might not function on other distributions depending on
+the version of the refpolicy installed.
+
+When building Chromium with the GYP define selinux=1, the seccomp sandbox is
+disabled and the zygote will perform a dynamic transition to chromium_renderer_t
+after forking a renderer. The policy in this directory defines access vectors
+for chromium_renderer_t.
+
+To install:
+ % make -f /usr/share/selinux/devel/Makefile
+ % sudo /usr/sbin/semodule -i chromium-browser.pp
diff --git a/sandbox/linux/selinux/chromium-browser.if b/sandbox/linux/selinux/chromium-browser.if
new file mode 100644
index 0000000..3eb6a30
--- /dev/null
+++ b/sandbox/linux/selinux/chromium-browser.if
@@ -0,0 +1 @@
+## <summary></summary>
diff --git a/sandbox/linux/selinux/chromium-browser.te b/sandbox/linux/selinux/chromium-browser.te
new file mode 100644
index 0000000..ae2f8b7
--- /dev/null
+++ b/sandbox/linux/selinux/chromium-browser.te
@@ -0,0 +1,40 @@
+policy_module(chromium-browser,1.0.0)
+
+gen_require(`
+ type gnome_home_t;
+ type proc_t;
+ type tmpfs_t;
+ type unconfined_t;
+ type urandom_device_t;
+ type user_devpts_t;
+ type user_tmpfs_t;
+')
+
+type chromium_renderer_t;
+domain_base_type(chromium_renderer_t)
+role unconfined_r types chromium_renderer_t;
+
+allow unconfined_t chromium_renderer_t:process { dyntransition };
+
+allow chromium_renderer_t unconfined_t:unix_stream_socket { read write send_msg recv_msg };
+allow unconfined_t chromium_renderer_t:unix_stream_socket { read write send_msg recv_msg };
+
+allow chromium_renderer_t urandom_device_t:chr_file { read };
+allow chromium_renderer_t user_devpts_t:chr_file { write };
+allow chromium_renderer_t self:process { execmem };
+allow chromium_renderer_t self:fifo_file { read write };
+allow chromium_renderer_t self:unix_dgram_socket { read write create send_msg recv_msg sendto };
+allow chromium_renderer_t unconfined_t:unix_dgram_socket { read write send_msg recv_msg };
+allow unconfined_t chromium_renderer_t:unix_dgram_socket { read write send_msg recv_msg };
+allow chromium_renderer_t user_tmpfs_t:file { read write append open getattr };
+allow chromium_renderer_t tmpfs_t:file { read write };
+allow chromium_renderer_t self:shm { create destroy getattr setattr read write associate unix_read unix_write };
+
+# For reading dictionaries out of the user-data-dir
+allow chromium_renderer_t gnome_home_t:file { read getattr };
+
+miscfiles_read_localization(chromium_renderer_t);
+miscfiles_read_fonts(chromium_renderer_t);
+
+# The renderer will attempt to read meminfo
+dontaudit chromium_renderer_t proc_t:file { read };
diff --git a/sandbox/linux/services/arm_linux_syscalls.h b/sandbox/linux/services/arm_linux_syscalls.h
new file mode 100644
index 0000000..dd0826e
--- /dev/null
+++ b/sandbox/linux/services/arm_linux_syscalls.h
@@ -0,0 +1,1377 @@
+// Copyright (c) 2012 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+// Generated from the Linux kernel's calls.S.
+#ifndef SANDBOX_LINUX_SERVICES_ARM_LINUX_SYSCALLS_H_
+#define SANDBOX_LINUX_SERVICES_ARM_LINUX_SYSCALLS_H_
+
+#if !defined(__arm__) || !defined(__ARM_EABI__)
+#error "Including header on wrong architecture"
+#endif
+
+// __NR_SYSCALL_BASE, __ARM_NR_BASE are defined in <asm/unistd.h>.
+#include <asm/unistd.h>
+
+// This syscall list has holes, because ARM EABI makes some syscalls obsolete.
+
+#if !defined(__NR_restart_syscall)
+#define __NR_restart_syscall (__NR_SYSCALL_BASE+0)
+#endif
+
+#if !defined(__NR_exit)
+#define __NR_exit (__NR_SYSCALL_BASE+1)
+#endif
+
+#if !defined(__NR_fork)
+#define __NR_fork (__NR_SYSCALL_BASE+2)
+#endif
+
+#if !defined(__NR_read)
+#define __NR_read (__NR_SYSCALL_BASE+3)
+#endif
+
+#if !defined(__NR_write)
+#define __NR_write (__NR_SYSCALL_BASE+4)
+#endif
+
+#if !defined(__NR_open)
+#define __NR_open (__NR_SYSCALL_BASE+5)
+#endif
+
+#if !defined(__NR_close)
+#define __NR_close (__NR_SYSCALL_BASE+6)
+#endif
+
+#if !defined(__NR_creat)
+#define __NR_creat (__NR_SYSCALL_BASE+8)
+#endif
+
+#if !defined(__NR_link)
+#define __NR_link (__NR_SYSCALL_BASE+9)
+#endif
+
+#if !defined(__NR_unlink)
+#define __NR_unlink (__NR_SYSCALL_BASE+10)
+#endif
+
+#if !defined(__NR_execve)
+#define __NR_execve (__NR_SYSCALL_BASE+11)
+#endif
+
+#if !defined(__NR_chdir)
+#define __NR_chdir (__NR_SYSCALL_BASE+12)
+#endif
+
+#if !defined(__NR_mknod)
+#define __NR_mknod (__NR_SYSCALL_BASE+14)
+#endif
+
+#if !defined(__NR_chmod)
+#define __NR_chmod (__NR_SYSCALL_BASE+15)
+#endif
+
+#if !defined(__NR_lchown)
+#define __NR_lchown (__NR_SYSCALL_BASE+16)
+#endif
+
+#if !defined(__NR_lseek)
+#define __NR_lseek (__NR_SYSCALL_BASE+19)
+#endif
+
+#if !defined(__NR_getpid)
+#define __NR_getpid (__NR_SYSCALL_BASE+20)
+#endif
+
+#if !defined(__NR_mount)
+#define __NR_mount (__NR_SYSCALL_BASE+21)
+#endif
+
+#if !defined(__NR_setuid)
+#define __NR_setuid (__NR_SYSCALL_BASE+23)
+#endif
+
+#if !defined(__NR_getuid)
+#define __NR_getuid (__NR_SYSCALL_BASE+24)
+#endif
+
+#if !defined(__NR_ptrace)
+#define __NR_ptrace (__NR_SYSCALL_BASE+26)
+#endif
+
+#if !defined(__NR_pause)
+#define __NR_pause (__NR_SYSCALL_BASE+29)
+#endif
+
+#if !defined(__NR_access)
+#define __NR_access (__NR_SYSCALL_BASE+33)
+#endif
+
+#if !defined(__NR_nice)
+#define __NR_nice (__NR_SYSCALL_BASE+34)
+#endif
+
+#if !defined(__NR_sync)
+#define __NR_sync (__NR_SYSCALL_BASE+36)
+#endif
+
+#if !defined(__NR_kill)
+#define __NR_kill (__NR_SYSCALL_BASE+37)
+#endif
+
+#if !defined(__NR_rename)
+#define __NR_rename (__NR_SYSCALL_BASE+38)
+#endif
+
+#if !defined(__NR_mkdir)
+#define __NR_mkdir (__NR_SYSCALL_BASE+39)
+#endif
+
+#if !defined(__NR_rmdir)
+#define __NR_rmdir (__NR_SYSCALL_BASE+40)
+#endif
+
+#if !defined(__NR_dup)
+#define __NR_dup (__NR_SYSCALL_BASE+41)
+#endif
+
+#if !defined(__NR_pipe)
+#define __NR_pipe (__NR_SYSCALL_BASE+42)
+#endif
+
+#if !defined(__NR_times)
+#define __NR_times (__NR_SYSCALL_BASE+43)
+#endif
+
+#if !defined(__NR_brk)
+#define __NR_brk (__NR_SYSCALL_BASE+45)
+#endif
+
+#if !defined(__NR_setgid)
+#define __NR_setgid (__NR_SYSCALL_BASE+46)
+#endif
+
+#if !defined(__NR_getgid)
+#define __NR_getgid (__NR_SYSCALL_BASE+47)
+#endif
+
+#if !defined(__NR_geteuid)
+#define __NR_geteuid (__NR_SYSCALL_BASE+49)
+#endif
+
+#if !defined(__NR_getegid)
+#define __NR_getegid (__NR_SYSCALL_BASE+50)
+#endif
+
+#if !defined(__NR_acct)
+#define __NR_acct (__NR_SYSCALL_BASE+51)
+#endif
+
+#if !defined(__NR_umount2)
+#define __NR_umount2 (__NR_SYSCALL_BASE+52)
+#endif
+
+#if !defined(__NR_ioctl)
+#define __NR_ioctl (__NR_SYSCALL_BASE+54)
+#endif
+
+#if !defined(__NR_fcntl)
+#define __NR_fcntl (__NR_SYSCALL_BASE+55)
+#endif
+
+#if !defined(__NR_setpgid)
+#define __NR_setpgid (__NR_SYSCALL_BASE+57)
+#endif
+
+#if !defined(__NR_umask)
+#define __NR_umask (__NR_SYSCALL_BASE+60)
+#endif
+
+#if !defined(__NR_chroot)
+#define __NR_chroot (__NR_SYSCALL_BASE+61)
+#endif
+
+#if !defined(__NR_ustat)
+#define __NR_ustat (__NR_SYSCALL_BASE+62)
+#endif
+
+#if !defined(__NR_dup2)
+#define __NR_dup2 (__NR_SYSCALL_BASE+63)
+#endif
+
+#if !defined(__NR_getppid)
+#define __NR_getppid (__NR_SYSCALL_BASE+64)
+#endif
+
+#if !defined(__NR_getpgrp)
+#define __NR_getpgrp (__NR_SYSCALL_BASE+65)
+#endif
+
+#if !defined(__NR_setsid)
+#define __NR_setsid (__NR_SYSCALL_BASE+66)
+#endif
+
+#if !defined(__NR_sigaction)
+#define __NR_sigaction (__NR_SYSCALL_BASE+67)
+#endif
+
+#if !defined(__NR_setreuid)
+#define __NR_setreuid (__NR_SYSCALL_BASE+70)
+#endif
+
+#if !defined(__NR_setregid)
+#define __NR_setregid (__NR_SYSCALL_BASE+71)
+#endif
+
+#if !defined(__NR_sigsuspend)
+#define __NR_sigsuspend (__NR_SYSCALL_BASE+72)
+#endif
+
+#if !defined(__NR_sigpending)
+#define __NR_sigpending (__NR_SYSCALL_BASE+73)
+#endif
+
+#if !defined(__NR_sethostname)
+#define __NR_sethostname (__NR_SYSCALL_BASE+74)
+#endif
+
+#if !defined(__NR_setrlimit)
+#define __NR_setrlimit (__NR_SYSCALL_BASE+75)
+#endif
+
+#if !defined(__NR_getrusage)
+#define __NR_getrusage (__NR_SYSCALL_BASE+77)
+#endif
+
+#if !defined(__NR_gettimeofday)
+#define __NR_gettimeofday (__NR_SYSCALL_BASE+78)
+#endif
+
+#if !defined(__NR_settimeofday)
+#define __NR_settimeofday (__NR_SYSCALL_BASE+79)
+#endif
+
+#if !defined(__NR_getgroups)
+#define __NR_getgroups (__NR_SYSCALL_BASE+80)
+#endif
+
+#if !defined(__NR_setgroups)
+#define __NR_setgroups (__NR_SYSCALL_BASE+81)
+#endif
+
+#if !defined(__NR_symlink)
+#define __NR_symlink (__NR_SYSCALL_BASE+83)
+#endif
+
+#if !defined(__NR_readlink)
+#define __NR_readlink (__NR_SYSCALL_BASE+85)
+#endif
+
+#if !defined(__NR_uselib)
+#define __NR_uselib (__NR_SYSCALL_BASE+86)
+#endif
+
+#if !defined(__NR_swapon)
+#define __NR_swapon (__NR_SYSCALL_BASE+87)
+#endif
+
+#if !defined(__NR_reboot)
+#define __NR_reboot (__NR_SYSCALL_BASE+88)
+#endif
+
+#if !defined(__NR_munmap)
+#define __NR_munmap (__NR_SYSCALL_BASE+91)
+#endif
+
+#if !defined(__NR_truncate)
+#define __NR_truncate (__NR_SYSCALL_BASE+92)
+#endif
+
+#if !defined(__NR_ftruncate)
+#define __NR_ftruncate (__NR_SYSCALL_BASE+93)
+#endif
+
+#if !defined(__NR_fchmod)
+#define __NR_fchmod (__NR_SYSCALL_BASE+94)
+#endif
+
+#if !defined(__NR_fchown)
+#define __NR_fchown (__NR_SYSCALL_BASE+95)
+#endif
+
+#if !defined(__NR_getpriority)
+#define __NR_getpriority (__NR_SYSCALL_BASE+96)
+#endif
+
+#if !defined(__NR_setpriority)
+#define __NR_setpriority (__NR_SYSCALL_BASE+97)
+#endif
+
+#if !defined(__NR_statfs)
+#define __NR_statfs (__NR_SYSCALL_BASE+99)
+#endif
+
+#if !defined(__NR_fstatfs)
+#define __NR_fstatfs (__NR_SYSCALL_BASE+100)
+#endif
+
+#if !defined(__NR_syslog)
+#define __NR_syslog (__NR_SYSCALL_BASE+103)
+#endif
+
+#if !defined(__NR_setitimer)
+#define __NR_setitimer (__NR_SYSCALL_BASE+104)
+#endif
+
+#if !defined(__NR_getitimer)
+#define __NR_getitimer (__NR_SYSCALL_BASE+105)
+#endif
+
+#if !defined(__NR_stat)
+#define __NR_stat (__NR_SYSCALL_BASE+106)
+#endif
+
+#if !defined(__NR_lstat)
+#define __NR_lstat (__NR_SYSCALL_BASE+107)
+#endif
+
+#if !defined(__NR_fstat)
+#define __NR_fstat (__NR_SYSCALL_BASE+108)
+#endif
+
+#if !defined(__NR_vhangup)
+#define __NR_vhangup (__NR_SYSCALL_BASE+111)
+#endif
+
+#if !defined(__NR_wait4)
+#define __NR_wait4 (__NR_SYSCALL_BASE+114)
+#endif
+
+#if !defined(__NR_swapoff)
+#define __NR_swapoff (__NR_SYSCALL_BASE+115)
+#endif
+
+#if !defined(__NR_sysinfo)
+#define __NR_sysinfo (__NR_SYSCALL_BASE+116)
+#endif
+
+#if !defined(__NR_fsync)
+#define __NR_fsync (__NR_SYSCALL_BASE+118)
+#endif
+
+#if !defined(__NR_sigreturn)
+#define __NR_sigreturn (__NR_SYSCALL_BASE+119)
+#endif
+
+#if !defined(__NR_clone)
+#define __NR_clone (__NR_SYSCALL_BASE+120)
+#endif
+
+#if !defined(__NR_setdomainname)
+#define __NR_setdomainname (__NR_SYSCALL_BASE+121)
+#endif
+
+#if !defined(__NR_uname)
+#define __NR_uname (__NR_SYSCALL_BASE+122)
+#endif
+
+#if !defined(__NR_adjtimex)
+#define __NR_adjtimex (__NR_SYSCALL_BASE+124)
+#endif
+
+#if !defined(__NR_mprotect)
+#define __NR_mprotect (__NR_SYSCALL_BASE+125)
+#endif
+
+#if !defined(__NR_sigprocmask)
+#define __NR_sigprocmask (__NR_SYSCALL_BASE+126)
+#endif
+
+#if !defined(__NR_init_module)
+#define __NR_init_module (__NR_SYSCALL_BASE+128)
+#endif
+
+#if !defined(__NR_delete_module)
+#define __NR_delete_module (__NR_SYSCALL_BASE+129)
+#endif
+
+#if !defined(__NR_quotactl)
+#define __NR_quotactl (__NR_SYSCALL_BASE+131)
+#endif
+
+#if !defined(__NR_getpgid)
+#define __NR_getpgid (__NR_SYSCALL_BASE+132)
+#endif
+
+#if !defined(__NR_fchdir)
+#define __NR_fchdir (__NR_SYSCALL_BASE+133)
+#endif
+
+#if !defined(__NR_bdflush)
+#define __NR_bdflush (__NR_SYSCALL_BASE+134)
+#endif
+
+#if !defined(__NR_sysfs)
+#define __NR_sysfs (__NR_SYSCALL_BASE+135)
+#endif
+
+#if !defined(__NR_personality)
+#define __NR_personality (__NR_SYSCALL_BASE+136)
+#endif
+
+#if !defined(__NR_setfsuid)
+#define __NR_setfsuid (__NR_SYSCALL_BASE+138)
+#endif
+
+#if !defined(__NR_setfsgid)
+#define __NR_setfsgid (__NR_SYSCALL_BASE+139)
+#endif
+
+#if !defined(__NR__llseek)
+#define __NR__llseek (__NR_SYSCALL_BASE+140)
+#endif
+
+#if !defined(__NR_getdents)
+#define __NR_getdents (__NR_SYSCALL_BASE+141)
+#endif
+
+#if !defined(__NR__newselect)
+#define __NR__newselect (__NR_SYSCALL_BASE+142)
+#endif
+
+#if !defined(__NR_flock)
+#define __NR_flock (__NR_SYSCALL_BASE+143)
+#endif
+
+#if !defined(__NR_msync)
+#define __NR_msync (__NR_SYSCALL_BASE+144)
+#endif
+
+#if !defined(__NR_readv)
+#define __NR_readv (__NR_SYSCALL_BASE+145)
+#endif
+
+#if !defined(__NR_writev)
+#define __NR_writev (__NR_SYSCALL_BASE+146)
+#endif
+
+#if !defined(__NR_getsid)
+#define __NR_getsid (__NR_SYSCALL_BASE+147)
+#endif
+
+#if !defined(__NR_fdatasync)
+#define __NR_fdatasync (__NR_SYSCALL_BASE+148)
+#endif
+
+#if !defined(__NR__sysctl)
+#define __NR__sysctl (__NR_SYSCALL_BASE+149)
+#endif
+
+#if !defined(__NR_mlock)
+#define __NR_mlock (__NR_SYSCALL_BASE+150)
+#endif
+
+#if !defined(__NR_munlock)
+#define __NR_munlock (__NR_SYSCALL_BASE+151)
+#endif
+
+#if !defined(__NR_mlockall)
+#define __NR_mlockall (__NR_SYSCALL_BASE+152)
+#endif
+
+#if !defined(__NR_munlockall)
+#define __NR_munlockall (__NR_SYSCALL_BASE+153)
+#endif
+
+#if !defined(__NR_sched_setparam)
+#define __NR_sched_setparam (__NR_SYSCALL_BASE+154)
+#endif
+
+#if !defined(__NR_sched_getparam)
+#define __NR_sched_getparam (__NR_SYSCALL_BASE+155)
+#endif
+
+#if !defined(__NR_sched_setscheduler)
+#define __NR_sched_setscheduler (__NR_SYSCALL_BASE+156)
+#endif
+
+#if !defined(__NR_sched_getscheduler)
+#define __NR_sched_getscheduler (__NR_SYSCALL_BASE+157)
+#endif
+
+#if !defined(__NR_sched_yield)
+#define __NR_sched_yield (__NR_SYSCALL_BASE+158)
+#endif
+
+#if !defined(__NR_sched_get_priority_max)
+#define __NR_sched_get_priority_max (__NR_SYSCALL_BASE+159)
+#endif
+
+#if !defined(__NR_sched_get_priority_min)
+#define __NR_sched_get_priority_min (__NR_SYSCALL_BASE+160)
+#endif
+
+#if !defined(__NR_sched_rr_get_interval)
+#define __NR_sched_rr_get_interval (__NR_SYSCALL_BASE+161)
+#endif
+
+#if !defined(__NR_nanosleep)
+#define __NR_nanosleep (__NR_SYSCALL_BASE+162)
+#endif
+
+#if !defined(__NR_mremap)
+#define __NR_mremap (__NR_SYSCALL_BASE+163)
+#endif
+
+#if !defined(__NR_setresuid)
+#define __NR_setresuid (__NR_SYSCALL_BASE+164)
+#endif
+
+#if !defined(__NR_getresuid)
+#define __NR_getresuid (__NR_SYSCALL_BASE+165)
+#endif
+
+#if !defined(__NR_poll)
+#define __NR_poll (__NR_SYSCALL_BASE+168)
+#endif
+
+#if !defined(__NR_nfsservctl)
+#define __NR_nfsservctl (__NR_SYSCALL_BASE+169)
+#endif
+
+#if !defined(__NR_setresgid)
+#define __NR_setresgid (__NR_SYSCALL_BASE+170)
+#endif
+
+#if !defined(__NR_getresgid)
+#define __NR_getresgid (__NR_SYSCALL_BASE+171)
+#endif
+
+#if !defined(__NR_prctl)
+#define __NR_prctl (__NR_SYSCALL_BASE+172)
+#endif
+
+#if !defined(__NR_rt_sigreturn)
+#define __NR_rt_sigreturn (__NR_SYSCALL_BASE+173)
+#endif
+
+#if !defined(__NR_rt_sigaction)
+#define __NR_rt_sigaction (__NR_SYSCALL_BASE+174)
+#endif
+
+#if !defined(__NR_rt_sigprocmask)
+#define __NR_rt_sigprocmask (__NR_SYSCALL_BASE+175)
+#endif
+
+#if !defined(__NR_rt_sigpending)
+#define __NR_rt_sigpending (__NR_SYSCALL_BASE+176)
+#endif
+
+#if !defined(__NR_rt_sigtimedwait)
+#define __NR_rt_sigtimedwait (__NR_SYSCALL_BASE+177)
+#endif
+
+#if !defined(__NR_rt_sigqueueinfo)
+#define __NR_rt_sigqueueinfo (__NR_SYSCALL_BASE+178)
+#endif
+
+#if !defined(__NR_rt_sigsuspend)
+#define __NR_rt_sigsuspend (__NR_SYSCALL_BASE+179)
+#endif
+
+#if !defined(__NR_pread64)
+#define __NR_pread64 (__NR_SYSCALL_BASE+180)
+#endif
+
+#if !defined(__NR_pwrite64)
+#define __NR_pwrite64 (__NR_SYSCALL_BASE+181)
+#endif
+
+#if !defined(__NR_chown)
+#define __NR_chown (__NR_SYSCALL_BASE+182)
+#endif
+
+#if !defined(__NR_getcwd)
+#define __NR_getcwd (__NR_SYSCALL_BASE+183)
+#endif
+
+#if !defined(__NR_capget)
+#define __NR_capget (__NR_SYSCALL_BASE+184)
+#endif
+
+#if !defined(__NR_capset)
+#define __NR_capset (__NR_SYSCALL_BASE+185)
+#endif
+
+#if !defined(__NR_sigaltstack)
+#define __NR_sigaltstack (__NR_SYSCALL_BASE+186)
+#endif
+
+#if !defined(__NR_sendfile)
+#define __NR_sendfile (__NR_SYSCALL_BASE+187)
+#endif
+
+#if !defined(__NR_vfork)
+#define __NR_vfork (__NR_SYSCALL_BASE+190)
+#endif
+
+#if !defined(__NR_ugetrlimit)
+#define __NR_ugetrlimit (__NR_SYSCALL_BASE+191)
+#endif
+
+#if !defined(__NR_mmap2)
+#define __NR_mmap2 (__NR_SYSCALL_BASE+192)
+#endif
+
+#if !defined(__NR_truncate64)
+#define __NR_truncate64 (__NR_SYSCALL_BASE+193)
+#endif
+
+#if !defined(__NR_ftruncate64)
+#define __NR_ftruncate64 (__NR_SYSCALL_BASE+194)
+#endif
+
+#if !defined(__NR_stat64)
+#define __NR_stat64 (__NR_SYSCALL_BASE+195)
+#endif
+
+#if !defined(__NR_lstat64)
+#define __NR_lstat64 (__NR_SYSCALL_BASE+196)
+#endif
+
+#if !defined(__NR_fstat64)
+#define __NR_fstat64 (__NR_SYSCALL_BASE+197)
+#endif
+
+#if !defined(__NR_lchown32)
+#define __NR_lchown32 (__NR_SYSCALL_BASE+198)
+#endif
+
+#if !defined(__NR_getuid32)
+#define __NR_getuid32 (__NR_SYSCALL_BASE+199)
+#endif
+
+#if !defined(__NR_getgid32)
+#define __NR_getgid32 (__NR_SYSCALL_BASE+200)
+#endif
+
+#if !defined(__NR_geteuid32)
+#define __NR_geteuid32 (__NR_SYSCALL_BASE+201)
+#endif
+
+#if !defined(__NR_getegid32)
+#define __NR_getegid32 (__NR_SYSCALL_BASE+202)
+#endif
+
+#if !defined(__NR_setreuid32)
+#define __NR_setreuid32 (__NR_SYSCALL_BASE+203)
+#endif
+
+#if !defined(__NR_setregid32)
+#define __NR_setregid32 (__NR_SYSCALL_BASE+204)
+#endif
+
+#if !defined(__NR_getgroups32)
+#define __NR_getgroups32 (__NR_SYSCALL_BASE+205)
+#endif
+
+#if !defined(__NR_setgroups32)
+#define __NR_setgroups32 (__NR_SYSCALL_BASE+206)
+#endif
+
+#if !defined(__NR_fchown32)
+#define __NR_fchown32 (__NR_SYSCALL_BASE+207)
+#endif
+
+#if !defined(__NR_setresuid32)
+#define __NR_setresuid32 (__NR_SYSCALL_BASE+208)
+#endif
+
+#if !defined(__NR_getresuid32)
+#define __NR_getresuid32 (__NR_SYSCALL_BASE+209)
+#endif
+
+#if !defined(__NR_setresgid32)
+#define __NR_setresgid32 (__NR_SYSCALL_BASE+210)
+#endif
+
+#if !defined(__NR_getresgid32)
+#define __NR_getresgid32 (__NR_SYSCALL_BASE+211)
+#endif
+
+#if !defined(__NR_chown32)
+#define __NR_chown32 (__NR_SYSCALL_BASE+212)
+#endif
+
+#if !defined(__NR_setuid32)
+#define __NR_setuid32 (__NR_SYSCALL_BASE+213)
+#endif
+
+#if !defined(__NR_setgid32)
+#define __NR_setgid32 (__NR_SYSCALL_BASE+214)
+#endif
+
+#if !defined(__NR_setfsuid32)
+#define __NR_setfsuid32 (__NR_SYSCALL_BASE+215)
+#endif
+
+#if !defined(__NR_setfsgid32)
+#define __NR_setfsgid32 (__NR_SYSCALL_BASE+216)
+#endif
+
+#if !defined(__NR_getdents64)
+#define __NR_getdents64 (__NR_SYSCALL_BASE+217)
+#endif
+
+#if !defined(__NR_pivot_root)
+#define __NR_pivot_root (__NR_SYSCALL_BASE+218)
+#endif
+
+#if !defined(__NR_mincore)
+#define __NR_mincore (__NR_SYSCALL_BASE+219)
+#endif
+
+#if !defined(__NR_madvise)
+#define __NR_madvise (__NR_SYSCALL_BASE+220)
+#endif
+
+#if !defined(__NR_fcntl64)
+#define __NR_fcntl64 (__NR_SYSCALL_BASE+221)
+#endif
+
+#if !defined(__NR_gettid)
+#define __NR_gettid (__NR_SYSCALL_BASE+224)
+#endif
+
+#if !defined(__NR_readahead)
+#define __NR_readahead (__NR_SYSCALL_BASE+225)
+#endif
+
+#if !defined(__NR_setxattr)
+#define __NR_setxattr (__NR_SYSCALL_BASE+226)
+#endif
+
+#if !defined(__NR_lsetxattr)
+#define __NR_lsetxattr (__NR_SYSCALL_BASE+227)
+#endif
+
+#if !defined(__NR_fsetxattr)
+#define __NR_fsetxattr (__NR_SYSCALL_BASE+228)
+#endif
+
+#if !defined(__NR_getxattr)
+#define __NR_getxattr (__NR_SYSCALL_BASE+229)
+#endif
+
+#if !defined(__NR_lgetxattr)
+#define __NR_lgetxattr (__NR_SYSCALL_BASE+230)
+#endif
+
+#if !defined(__NR_fgetxattr)
+#define __NR_fgetxattr (__NR_SYSCALL_BASE+231)
+#endif
+
+#if !defined(__NR_listxattr)
+#define __NR_listxattr (__NR_SYSCALL_BASE+232)
+#endif
+
+#if !defined(__NR_llistxattr)
+#define __NR_llistxattr (__NR_SYSCALL_BASE+233)
+#endif
+
+#if !defined(__NR_flistxattr)
+#define __NR_flistxattr (__NR_SYSCALL_BASE+234)
+#endif
+
+#if !defined(__NR_removexattr)
+#define __NR_removexattr (__NR_SYSCALL_BASE+235)
+#endif
+
+#if !defined(__NR_lremovexattr)
+#define __NR_lremovexattr (__NR_SYSCALL_BASE+236)
+#endif
+
+#if !defined(__NR_fremovexattr)
+#define __NR_fremovexattr (__NR_SYSCALL_BASE+237)
+#endif
+
+#if !defined(__NR_tkill)
+#define __NR_tkill (__NR_SYSCALL_BASE+238)
+#endif
+
+#if !defined(__NR_sendfile64)
+#define __NR_sendfile64 (__NR_SYSCALL_BASE+239)
+#endif
+
+#if !defined(__NR_futex)
+#define __NR_futex (__NR_SYSCALL_BASE+240)
+#endif
+
+#if !defined(__NR_sched_setaffinity)
+#define __NR_sched_setaffinity (__NR_SYSCALL_BASE+241)
+#endif
+
+#if !defined(__NR_sched_getaffinity)
+#define __NR_sched_getaffinity (__NR_SYSCALL_BASE+242)
+#endif
+
+#if !defined(__NR_io_setup)
+#define __NR_io_setup (__NR_SYSCALL_BASE+243)
+#endif
+
+#if !defined(__NR_io_destroy)
+#define __NR_io_destroy (__NR_SYSCALL_BASE+244)
+#endif
+
+#if !defined(__NR_io_getevents)
+#define __NR_io_getevents (__NR_SYSCALL_BASE+245)
+#endif
+
+#if !defined(__NR_io_submit)
+#define __NR_io_submit (__NR_SYSCALL_BASE+246)
+#endif
+
+#if !defined(__NR_io_cancel)
+#define __NR_io_cancel (__NR_SYSCALL_BASE+247)
+#endif
+
+#if !defined(__NR_exit_group)
+#define __NR_exit_group (__NR_SYSCALL_BASE+248)
+#endif
+
+#if !defined(__NR_lookup_dcookie)
+#define __NR_lookup_dcookie (__NR_SYSCALL_BASE+249)
+#endif
+
+#if !defined(__NR_epoll_create)
+#define __NR_epoll_create (__NR_SYSCALL_BASE+250)
+#endif
+
+#if !defined(__NR_epoll_ctl)
+#define __NR_epoll_ctl (__NR_SYSCALL_BASE+251)
+#endif
+
+#if !defined(__NR_epoll_wait)
+#define __NR_epoll_wait (__NR_SYSCALL_BASE+252)
+#endif
+
+#if !defined(__NR_remap_file_pages)
+#define __NR_remap_file_pages (__NR_SYSCALL_BASE+253)
+#endif
+
+#if !defined(__NR_set_tid_address)
+#define __NR_set_tid_address (__NR_SYSCALL_BASE+256)
+#endif
+
+#if !defined(__NR_timer_create)
+#define __NR_timer_create (__NR_SYSCALL_BASE+257)
+#endif
+
+#if !defined(__NR_timer_settime)
+#define __NR_timer_settime (__NR_SYSCALL_BASE+258)
+#endif
+
+#if !defined(__NR_timer_gettime)
+#define __NR_timer_gettime (__NR_SYSCALL_BASE+259)
+#endif
+
+#if !defined(__NR_timer_getoverrun)
+#define __NR_timer_getoverrun (__NR_SYSCALL_BASE+260)
+#endif
+
+#if !defined(__NR_timer_delete)
+#define __NR_timer_delete (__NR_SYSCALL_BASE+261)
+#endif
+
+#if !defined(__NR_clock_settime)
+#define __NR_clock_settime (__NR_SYSCALL_BASE+262)
+#endif
+
+#if !defined(__NR_clock_gettime)
+#define __NR_clock_gettime (__NR_SYSCALL_BASE+263)
+#endif
+
+#if !defined(__NR_clock_getres)
+#define __NR_clock_getres (__NR_SYSCALL_BASE+264)
+#endif
+
+#if !defined(__NR_clock_nanosleep)
+#define __NR_clock_nanosleep (__NR_SYSCALL_BASE+265)
+#endif
+
+#if !defined(__NR_statfs64)
+#define __NR_statfs64 (__NR_SYSCALL_BASE+266)
+#endif
+
+#if !defined(__NR_fstatfs64)
+#define __NR_fstatfs64 (__NR_SYSCALL_BASE+267)
+#endif
+
+#if !defined(__NR_tgkill)
+#define __NR_tgkill (__NR_SYSCALL_BASE+268)
+#endif
+
+#if !defined(__NR_utimes)
+#define __NR_utimes (__NR_SYSCALL_BASE+269)
+#endif
+
+#if !defined(__NR_arm_fadvise64_64)
+#define __NR_arm_fadvise64_64 (__NR_SYSCALL_BASE+270)
+#endif
+
+#if !defined(__NR_pciconfig_iobase)
+#define __NR_pciconfig_iobase (__NR_SYSCALL_BASE+271)
+#endif
+
+#if !defined(__NR_pciconfig_read)
+#define __NR_pciconfig_read (__NR_SYSCALL_BASE+272)
+#endif
+
+#if !defined(__NR_pciconfig_write)
+#define __NR_pciconfig_write (__NR_SYSCALL_BASE+273)
+#endif
+
+#if !defined(__NR_mq_open)
+#define __NR_mq_open (__NR_SYSCALL_BASE+274)
+#endif
+
+#if !defined(__NR_mq_unlink)
+#define __NR_mq_unlink (__NR_SYSCALL_BASE+275)
+#endif
+
+#if !defined(__NR_mq_timedsend)
+#define __NR_mq_timedsend (__NR_SYSCALL_BASE+276)
+#endif
+
+#if !defined(__NR_mq_timedreceive)
+#define __NR_mq_timedreceive (__NR_SYSCALL_BASE+277)
+#endif
+
+#if !defined(__NR_mq_notify)
+#define __NR_mq_notify (__NR_SYSCALL_BASE+278)
+#endif
+
+#if !defined(__NR_mq_getsetattr)
+#define __NR_mq_getsetattr (__NR_SYSCALL_BASE+279)
+#endif
+
+#if !defined(__NR_waitid)
+#define __NR_waitid (__NR_SYSCALL_BASE+280)
+#endif
+
+#if !defined(__NR_socket)
+#define __NR_socket (__NR_SYSCALL_BASE+281)
+#endif
+
+#if !defined(__NR_bind)
+#define __NR_bind (__NR_SYSCALL_BASE+282)
+#endif
+
+#if !defined(__NR_connect)
+#define __NR_connect (__NR_SYSCALL_BASE+283)
+#endif
+
+#if !defined(__NR_listen)
+#define __NR_listen (__NR_SYSCALL_BASE+284)
+#endif
+
+#if !defined(__NR_accept)
+#define __NR_accept (__NR_SYSCALL_BASE+285)
+#endif
+
+#if !defined(__NR_getsockname)
+#define __NR_getsockname (__NR_SYSCALL_BASE+286)
+#endif
+
+#if !defined(__NR_getpeername)
+#define __NR_getpeername (__NR_SYSCALL_BASE+287)
+#endif
+
+#if !defined(__NR_socketpair)
+#define __NR_socketpair (__NR_SYSCALL_BASE+288)
+#endif
+
+#if !defined(__NR_send)
+#define __NR_send (__NR_SYSCALL_BASE+289)
+#endif
+
+#if !defined(__NR_sendto)
+#define __NR_sendto (__NR_SYSCALL_BASE+290)
+#endif
+
+#if !defined(__NR_recv)
+#define __NR_recv (__NR_SYSCALL_BASE+291)
+#endif
+
+#if !defined(__NR_recvfrom)
+#define __NR_recvfrom (__NR_SYSCALL_BASE+292)
+#endif
+
+#if !defined(__NR_shutdown)
+#define __NR_shutdown (__NR_SYSCALL_BASE+293)
+#endif
+
+#if !defined(__NR_setsockopt)
+#define __NR_setsockopt (__NR_SYSCALL_BASE+294)
+#endif
+
+#if !defined(__NR_getsockopt)
+#define __NR_getsockopt (__NR_SYSCALL_BASE+295)
+#endif
+
+#if !defined(__NR_sendmsg)
+#define __NR_sendmsg (__NR_SYSCALL_BASE+296)
+#endif
+
+#if !defined(__NR_recvmsg)
+#define __NR_recvmsg (__NR_SYSCALL_BASE+297)
+#endif
+
+#if !defined(__NR_semop)
+#define __NR_semop (__NR_SYSCALL_BASE+298)
+#endif
+
+#if !defined(__NR_semget)
+#define __NR_semget (__NR_SYSCALL_BASE+299)
+#endif
+
+#if !defined(__NR_semctl)
+#define __NR_semctl (__NR_SYSCALL_BASE+300)
+#endif
+
+#if !defined(__NR_msgsnd)
+#define __NR_msgsnd (__NR_SYSCALL_BASE+301)
+#endif
+
+#if !defined(__NR_msgrcv)
+#define __NR_msgrcv (__NR_SYSCALL_BASE+302)
+#endif
+
+#if !defined(__NR_msgget)
+#define __NR_msgget (__NR_SYSCALL_BASE+303)
+#endif
+
+#if !defined(__NR_msgctl)
+#define __NR_msgctl (__NR_SYSCALL_BASE+304)
+#endif
+
+#if !defined(__NR_shmat)
+#define __NR_shmat (__NR_SYSCALL_BASE+305)
+#endif
+
+#if !defined(__NR_shmdt)
+#define __NR_shmdt (__NR_SYSCALL_BASE+306)
+#endif
+
+#if !defined(__NR_shmget)
+#define __NR_shmget (__NR_SYSCALL_BASE+307)
+#endif
+
+#if !defined(__NR_shmctl)
+#define __NR_shmctl (__NR_SYSCALL_BASE+308)
+#endif
+
+#if !defined(__NR_add_key)
+#define __NR_add_key (__NR_SYSCALL_BASE+309)
+#endif
+
+#if !defined(__NR_request_key)
+#define __NR_request_key (__NR_SYSCALL_BASE+310)
+#endif
+
+#if !defined(__NR_keyctl)
+#define __NR_keyctl (__NR_SYSCALL_BASE+311)
+#endif
+
+#if !defined(__NR_semtimedop)
+#define __NR_semtimedop (__NR_SYSCALL_BASE+312)
+#endif
+
+#if !defined(__NR_vserver)
+#define __NR_vserver (__NR_SYSCALL_BASE+313)
+#endif
+
+#if !defined(__NR_ioprio_set)
+#define __NR_ioprio_set (__NR_SYSCALL_BASE+314)
+#endif
+
+#if !defined(__NR_ioprio_get)
+#define __NR_ioprio_get (__NR_SYSCALL_BASE+315)
+#endif
+
+#if !defined(__NR_inotify_init)
+#define __NR_inotify_init (__NR_SYSCALL_BASE+316)
+#endif
+
+#if !defined(__NR_inotify_add_watch)
+#define __NR_inotify_add_watch (__NR_SYSCALL_BASE+317)
+#endif
+
+#if !defined(__NR_inotify_rm_watch)
+#define __NR_inotify_rm_watch (__NR_SYSCALL_BASE+318)
+#endif
+
+#if !defined(__NR_mbind)
+#define __NR_mbind (__NR_SYSCALL_BASE+319)
+#endif
+
+#if !defined(__NR_get_mempolicy)
+#define __NR_get_mempolicy (__NR_SYSCALL_BASE+320)
+#endif
+
+#if !defined(__NR_set_mempolicy)
+#define __NR_set_mempolicy (__NR_SYSCALL_BASE+321)
+#endif
+
+#if !defined(__NR_openat)
+#define __NR_openat (__NR_SYSCALL_BASE+322)
+#endif
+
+#if !defined(__NR_mkdirat)
+#define __NR_mkdirat (__NR_SYSCALL_BASE+323)
+#endif
+
+#if !defined(__NR_mknodat)
+#define __NR_mknodat (__NR_SYSCALL_BASE+324)
+#endif
+
+#if !defined(__NR_fchownat)
+#define __NR_fchownat (__NR_SYSCALL_BASE+325)
+#endif
+
+#if !defined(__NR_futimesat)
+#define __NR_futimesat (__NR_SYSCALL_BASE+326)
+#endif
+
+#if !defined(__NR_fstatat64)
+#define __NR_fstatat64 (__NR_SYSCALL_BASE+327)
+#endif
+
+#if !defined(__NR_unlinkat)
+#define __NR_unlinkat (__NR_SYSCALL_BASE+328)
+#endif
+
+#if !defined(__NR_renameat)
+#define __NR_renameat (__NR_SYSCALL_BASE+329)
+#endif
+
+#if !defined(__NR_linkat)
+#define __NR_linkat (__NR_SYSCALL_BASE+330)
+#endif
+
+#if !defined(__NR_symlinkat)
+#define __NR_symlinkat (__NR_SYSCALL_BASE+331)
+#endif
+
+#if !defined(__NR_readlinkat)
+#define __NR_readlinkat (__NR_SYSCALL_BASE+332)
+#endif
+
+#if !defined(__NR_fchmodat)
+#define __NR_fchmodat (__NR_SYSCALL_BASE+333)
+#endif
+
+#if !defined(__NR_faccessat)
+#define __NR_faccessat (__NR_SYSCALL_BASE+334)
+#endif
+
+#if !defined(__NR_pselect6)
+#define __NR_pselect6 (__NR_SYSCALL_BASE+335)
+#endif
+
+#if !defined(__NR_ppoll)
+#define __NR_ppoll (__NR_SYSCALL_BASE+336)
+#endif
+
+#if !defined(__NR_unshare)
+#define __NR_unshare (__NR_SYSCALL_BASE+337)
+#endif
+
+#if !defined(__NR_set_robust_list)
+#define __NR_set_robust_list (__NR_SYSCALL_BASE+338)
+#endif
+
+#if !defined(__NR_get_robust_list)
+#define __NR_get_robust_list (__NR_SYSCALL_BASE+339)
+#endif
+
+#if !defined(__NR_splice)
+#define __NR_splice (__NR_SYSCALL_BASE+340)
+#endif
+
+#if !defined(__NR_arm_sync_file_range)
+#define __NR_arm_sync_file_range (__NR_SYSCALL_BASE+341)
+#endif
+
+#if !defined(__NR_tee)
+#define __NR_tee (__NR_SYSCALL_BASE+342)
+#endif
+
+#if !defined(__NR_vmsplice)
+#define __NR_vmsplice (__NR_SYSCALL_BASE+343)
+#endif
+
+#if !defined(__NR_move_pages)
+#define __NR_move_pages (__NR_SYSCALL_BASE+344)
+#endif
+
+#if !defined(__NR_getcpu)
+#define __NR_getcpu (__NR_SYSCALL_BASE+345)
+#endif
+
+#if !defined(__NR_epoll_pwait)
+#define __NR_epoll_pwait (__NR_SYSCALL_BASE+346)
+#endif
+
+#if !defined(__NR_kexec_load)
+#define __NR_kexec_load (__NR_SYSCALL_BASE+347)
+#endif
+
+#if !defined(__NR_utimensat)
+#define __NR_utimensat (__NR_SYSCALL_BASE+348)
+#endif
+
+#if !defined(__NR_signalfd)
+#define __NR_signalfd (__NR_SYSCALL_BASE+349)
+#endif
+
+#if !defined(__NR_timerfd_create)
+#define __NR_timerfd_create (__NR_SYSCALL_BASE+350)
+#endif
+
+#if !defined(__NR_eventfd)
+#define __NR_eventfd (__NR_SYSCALL_BASE+351)
+#endif
+
+#if !defined(__NR_fallocate)
+#define __NR_fallocate (__NR_SYSCALL_BASE+352)
+#endif
+
+#if !defined(__NR_timerfd_settime)
+#define __NR_timerfd_settime (__NR_SYSCALL_BASE+353)
+#endif
+
+#if !defined(__NR_timerfd_gettime)
+#define __NR_timerfd_gettime (__NR_SYSCALL_BASE+354)
+#endif
+
+#if !defined(__NR_signalfd4)
+#define __NR_signalfd4 (__NR_SYSCALL_BASE+355)
+#endif
+
+#if !defined(__NR_eventfd2)
+#define __NR_eventfd2 (__NR_SYSCALL_BASE+356)
+#endif
+
+#if !defined(__NR_epoll_create1)
+#define __NR_epoll_create1 (__NR_SYSCALL_BASE+357)
+#endif
+
+#if !defined(__NR_dup3)
+#define __NR_dup3 (__NR_SYSCALL_BASE+358)
+#endif
+
+#if !defined(__NR_pipe2)
+#define __NR_pipe2 (__NR_SYSCALL_BASE+359)
+#endif
+
+#if !defined(__NR_inotify_init1)
+#define __NR_inotify_init1 (__NR_SYSCALL_BASE+360)
+#endif
+
+#if !defined(__NR_preadv)
+#define __NR_preadv (__NR_SYSCALL_BASE+361)
+#endif
+
+#if !defined(__NR_pwritev)
+#define __NR_pwritev (__NR_SYSCALL_BASE+362)
+#endif
+
+#if !defined(__NR_rt_tgsigqueueinfo)
+#define __NR_rt_tgsigqueueinfo (__NR_SYSCALL_BASE+363)
+#endif
+
+#if !defined(__NR_perf_event_open)
+#define __NR_perf_event_open (__NR_SYSCALL_BASE+364)
+#endif
+
+#if !defined(__NR_recvmmsg)
+#define __NR_recvmmsg (__NR_SYSCALL_BASE+365)
+#endif
+
+#if !defined(__NR_accept4)
+#define __NR_accept4 (__NR_SYSCALL_BASE+366)
+#endif
+
+#if !defined(__NR_fanotify_init)
+#define __NR_fanotify_init (__NR_SYSCALL_BASE+367)
+#endif
+
+#if !defined(__NR_fanotify_mark)
+#define __NR_fanotify_mark (__NR_SYSCALL_BASE+368)
+#endif
+
+#if !defined(__NR_prlimit64)
+#define __NR_prlimit64 (__NR_SYSCALL_BASE+369)
+#endif
+
+#if !defined(__NR_name_to_handle_at)
+#define __NR_name_to_handle_at (__NR_SYSCALL_BASE+370)
+#endif
+
+#if !defined(__NR_open_by_handle_at)
+#define __NR_open_by_handle_at (__NR_SYSCALL_BASE+371)
+#endif
+
+#if !defined(__NR_clock_adjtime)
+#define __NR_clock_adjtime (__NR_SYSCALL_BASE+372)
+#endif
+
+#if !defined(__NR_syncfs)
+#define __NR_syncfs (__NR_SYSCALL_BASE+373)
+#endif
+
+#if !defined(__NR_sendmmsg)
+#define __NR_sendmmsg (__NR_SYSCALL_BASE+374)
+#endif
+
+#if !defined(__NR_setns)
+#define __NR_setns (__NR_SYSCALL_BASE+375)
+#endif
+
+#if !defined(__NR_process_vm_readv)
+#define __NR_process_vm_readv (__NR_SYSCALL_BASE+376)
+#endif
+
+#if !defined(__NR_process_vm_writev)
+#define __NR_process_vm_writev (__NR_SYSCALL_BASE+377)
+#endif
+
+// ARM private syscalls.
+#if !defined(__ARM_NR_breakpoint)
+#define __ARM_NR_breakpoint (__ARM_NR_BASE+1)
+#endif
+
+#if !defined(__ARM_NR_cacheflush)
+#define __ARM_NR_cacheflush (__ARM_NR_BASE+2)
+#endif
+
+#if !defined(__ARM_NR_usr26)
+#define __ARM_NR_usr26 (__ARM_NR_BASE+3)
+#endif
+
+#if !defined(__ARM_NR_usr32)
+#define __ARM_NR_usr32 (__ARM_NR_BASE+4)
+#endif
+
+#if !defined(__ARM_NR_set_tls)
+#define __ARM_NR_set_tls (__ARM_NR_BASE+5)
+#endif
+
+// ARM kernel private syscall.
+#if !defined(__ARM_NR_cmpxchg)
+#define __ARM_NR_cmpxchg (__ARM_NR_BASE+0x00fff0)
+#endif
+
+#endif // SANDBOX_LINUX_SERVICES_ARM_LINUX_SYSCALLS_H_
+
diff --git a/sandbox/linux/services/libc_urandom_override.cc b/sandbox/linux/services/libc_urandom_override.cc
new file mode 100644
index 0000000..ee34045
--- /dev/null
+++ b/sandbox/linux/services/libc_urandom_override.cc
@@ -0,0 +1,167 @@
+// Copyright (c) 2012 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "sandbox/linux/services/libc_urandom_override.h"
+
+#include <dlfcn.h>
+#include <pthread.h>
+#include <stdio.h>
+#include <sys/stat.h>
+#include <unistd.h>
+
+#include "base/eintr_wrapper.h"
+#include "base/logging.h"
+#include "base/rand_util.h"
+
+// Note: this file is used by the zygote and nacl_helper.
+
+namespace sandbox {
+
+// With SELinux we can carve out a precise sandbox, so we don't have to play
+// with intercepting libc calls.
+#if !defined(CHROMIUM_SELINUX)
+
+static bool g_override_urandom = false;
+
+void InitLibcUrandomOverrides() {
+ // Make sure /dev/urandom is open.
+ base::GetUrandomFD();
+ g_override_urandom = true;
+}
+
+// TODO(sergeyu): Currently this code doesn't work properly under ASAN
+// - it crashes content_unittests. Make sure it works properly and
+// enable it here. http://crbug.com/123263
+#if !defined(ADDRESS_SANITIZER)
+
+static const char kUrandomDevPath[] = "/dev/urandom";
+
+typedef FILE* (*FopenFunction)(const char* path, const char* mode);
+typedef int (*XstatFunction)(int version, const char *path, struct stat *buf);
+typedef int (*Xstat64Function)(int version, const char *path,
+ struct stat64 *buf);
+
+static pthread_once_t g_libc_file_io_funcs_guard = PTHREAD_ONCE_INIT;
+static FopenFunction g_libc_fopen;
+static FopenFunction g_libc_fopen64;
+static XstatFunction g_libc_xstat;
+static Xstat64Function g_libc_xstat64;
+
+static void InitLibcFileIOFunctions() {
+ g_libc_fopen = reinterpret_cast<FopenFunction>(
+ dlsym(RTLD_NEXT, "fopen"));
+ g_libc_fopen64 = reinterpret_cast<FopenFunction>(
+ dlsym(RTLD_NEXT, "fopen64"));
+
+ if (!g_libc_fopen) {
+ LOG(FATAL) << "Failed to get fopen() from libc.";
+ } else if (!g_libc_fopen64) {
+#if !defined(OS_OPENBSD) && !defined(OS_FREEBSD)
+ LOG(WARNING) << "Failed to get fopen64() from libc. Using fopen() instead.";
+#endif // !defined(OS_OPENBSD) && !defined(OS_FREEBSD)
+ g_libc_fopen64 = g_libc_fopen;
+ }
+
+ // TODO(sergeyu): This works only on systems with glibc. Fix it to
+ // work properly on other systems if necessary.
+ g_libc_xstat = reinterpret_cast<XstatFunction>(
+ dlsym(RTLD_NEXT, "__xstat"));
+ g_libc_xstat64 = reinterpret_cast<Xstat64Function>(
+ dlsym(RTLD_NEXT, "__xstat64"));
+
+ if (!g_libc_xstat) {
+ LOG(FATAL) << "Failed to get __xstat() from libc.";
+ }
+ if (!g_libc_xstat64) {
+ LOG(WARNING) << "Failed to get __xstat64() from libc.";
+ }
+}
+
+// fopen() and fopen64() are intercepted here so that NSS can open
+// /dev/urandom to seed its random number generator. NSS is used by
+// remoting in the sendbox.
+
+// fopen() call may be redirected to fopen64() in stdio.h using
+// __REDIRECT(), which sets asm name for fopen() to "fopen64". This
+// means that we cannot override fopen() directly here. Instead the
+// the code below defines fopen_override() function with asm name
+// "fopen", so that all references to fopen() will resolve to this
+// function.
+__attribute__ ((__visibility__("default")))
+FILE* fopen_override(const char* path, const char* mode) __asm__ ("fopen");
+
+__attribute__ ((__visibility__("default")))
+FILE* fopen_override(const char* path, const char* mode) {
+ if (g_override_urandom && strcmp(path, kUrandomDevPath) == 0) {
+ int fd = HANDLE_EINTR(dup(base::GetUrandomFD()));
+ if (fd < 0) {
+ PLOG(ERROR) << "dup() failed.";
+ return NULL;
+ }
+ return fdopen(fd, mode);
+ } else {
+ CHECK_EQ(0, pthread_once(&g_libc_file_io_funcs_guard,
+ InitLibcFileIOFunctions));
+ return g_libc_fopen(path, mode);
+ }
+}
+
+__attribute__ ((__visibility__("default")))
+FILE* fopen64(const char* path, const char* mode) {
+ if (g_override_urandom && strcmp(path, kUrandomDevPath) == 0) {
+ int fd = HANDLE_EINTR(dup(base::GetUrandomFD()));
+ if (fd < 0) {
+ PLOG(ERROR) << "dup() failed.";
+ return NULL;
+ }
+ return fdopen(fd, mode);
+ } else {
+ CHECK_EQ(0, pthread_once(&g_libc_file_io_funcs_guard,
+ InitLibcFileIOFunctions));
+ return g_libc_fopen64(path, mode);
+ }
+}
+
+// stat() is subject to the same problem as fopen(), so we have to use
+// the same trick to override it.
+__attribute__ ((__visibility__("default")))
+int xstat_override(int version,
+ const char *path,
+ struct stat *buf) __asm__ ("__xstat");
+
+__attribute__ ((__visibility__("default")))
+int xstat_override(int version, const char *path, struct stat *buf) {
+ if (g_override_urandom && strcmp(path, kUrandomDevPath) == 0) {
+ int result = __fxstat(version, base::GetUrandomFD(), buf);
+ return result;
+ } else {
+ CHECK_EQ(0, pthread_once(&g_libc_file_io_funcs_guard,
+ InitLibcFileIOFunctions));
+ return g_libc_xstat(version, path, buf);
+ }
+}
+
+__attribute__ ((__visibility__("default")))
+int xstat64_override(int version,
+ const char *path,
+ struct stat64 *buf) __asm__ ("__xstat64");
+
+__attribute__ ((__visibility__("default")))
+int xstat64_override(int version, const char *path, struct stat64 *buf) {
+ if (g_override_urandom && strcmp(path, kUrandomDevPath) == 0) {
+ int result = __fxstat64(version, base::GetUrandomFD(), buf);
+ return result;
+ } else {
+ CHECK_EQ(0, pthread_once(&g_libc_file_io_funcs_guard,
+ InitLibcFileIOFunctions));
+ CHECK(g_libc_xstat64);
+ return g_libc_xstat64(version, path, buf);
+ }
+}
+
+#endif // !ADDRESS_SANITIZER
+
+#endif // !CHROMIUM_SELINUX
+
+} // namespace content
diff --git a/sandbox/linux/services/libc_urandom_override.h b/sandbox/linux/services/libc_urandom_override.h
new file mode 100644
index 0000000..1990313
--- /dev/null
+++ b/sandbox/linux/services/libc_urandom_override.h
@@ -0,0 +1,18 @@
+// Copyright (c) 2012 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#ifndef SANDBOX_LINUX_SERVICES_LIBC_URANDOM_OVERRIDE_H_
+#define SANDBOX_LINUX_SERVICES_LIBC_URANDOM_OVERRIDE_H_
+
+namespace sandbox {
+
+#if !defined(CHROMIUM_SELINUX)
+
+void InitLibcUrandomOverrides();
+
+#endif // !CHROMIUM_SELINUX
+
+} // namespace sandbox
+
+#endif // SANDBOX_LINUX_SERVICES_LIBC_URANDOM_OVERRIDE_H_
diff --git a/sandbox/linux/services/linux_syscalls.h b/sandbox/linux/services/linux_syscalls.h
new file mode 100644
index 0000000..77c1be8
--- /dev/null
+++ b/sandbox/linux/services/linux_syscalls.h
@@ -0,0 +1,25 @@
+// Copyright (c) 2012 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+// This header will be kept up to date so that we can compile system-call
+// policies even when system headers are old.
+// System call numbers are accessible through __NR_syscall_name.
+
+#ifndef SANDBOX_LINUX_SERVICES_LINUX_SYSCALLS_H_
+#define SANDBOX_LINUX_SERVICES_LINUX_SYSCALLS_H_
+
+#if defined(__x86_64__)
+#include "sandbox/linux/services/x86_64_linux_syscalls.h"
+#endif
+
+#if defined(__i386__)
+#include "sandbox/linux/services/x86_32_linux_syscalls.h"
+#endif
+
+#if defined(__arm__) && defined(__ARM_EABI__)
+#include "sandbox/linux/services/arm_linux_syscalls.h"
+#endif
+
+#endif // SANDBOX_LINUX_SERVICES_LINUX_SYSCALLS_H_
+
diff --git a/sandbox/linux/services/x86_32_linux_syscalls.h b/sandbox/linux/services/x86_32_linux_syscalls.h
new file mode 100644
index 0000000..ca8ccbd
--- /dev/null
+++ b/sandbox/linux/services/x86_32_linux_syscalls.h
@@ -0,0 +1,1398 @@
+// Copyright (c) 2012 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+// Generated from the Linux kernel's syscall_32.tbl.
+#ifndef SANDBOX_LINUX_SERVICES_X86_32_LINUX_SYSCALLS_H_
+#define SANDBOX_LINUX_SERVICES_X86_32_LINUX_SYSCALLS_H_
+
+#if !defined(__i386__)
+#error "Including header on wrong architecture"
+#endif
+
+#if !defined(__NR_restart_syscall)
+#define __NR_restart_syscall 0
+#endif
+
+#if !defined(__NR_exit)
+#define __NR_exit 1
+#endif
+
+#if !defined(__NR_fork)
+#define __NR_fork 2
+#endif
+
+#if !defined(__NR_read)
+#define __NR_read 3
+#endif
+
+#if !defined(__NR_write)
+#define __NR_write 4
+#endif
+
+#if !defined(__NR_open)
+#define __NR_open 5
+#endif
+
+#if !defined(__NR_close)
+#define __NR_close 6
+#endif
+
+#if !defined(__NR_waitpid)
+#define __NR_waitpid 7
+#endif
+
+#if !defined(__NR_creat)
+#define __NR_creat 8
+#endif
+
+#if !defined(__NR_link)
+#define __NR_link 9
+#endif
+
+#if !defined(__NR_unlink)
+#define __NR_unlink 10
+#endif
+
+#if !defined(__NR_execve)
+#define __NR_execve 11
+#endif
+
+#if !defined(__NR_chdir)
+#define __NR_chdir 12
+#endif
+
+#if !defined(__NR_time)
+#define __NR_time 13
+#endif
+
+#if !defined(__NR_mknod)
+#define __NR_mknod 14
+#endif
+
+#if !defined(__NR_chmod)
+#define __NR_chmod 15
+#endif
+
+#if !defined(__NR_lchown)
+#define __NR_lchown 16
+#endif
+
+#if !defined(__NR_break)
+#define __NR_break 17
+#endif
+
+#if !defined(__NR_oldstat)
+#define __NR_oldstat 18
+#endif
+
+#if !defined(__NR_lseek)
+#define __NR_lseek 19
+#endif
+
+#if !defined(__NR_getpid)
+#define __NR_getpid 20
+#endif
+
+#if !defined(__NR_mount)
+#define __NR_mount 21
+#endif
+
+#if !defined(__NR_umount)
+#define __NR_umount 22
+#endif
+
+#if !defined(__NR_setuid)
+#define __NR_setuid 23
+#endif
+
+#if !defined(__NR_getuid)
+#define __NR_getuid 24
+#endif
+
+#if !defined(__NR_stime)
+#define __NR_stime 25
+#endif
+
+#if !defined(__NR_ptrace)
+#define __NR_ptrace 26
+#endif
+
+#if !defined(__NR_alarm)
+#define __NR_alarm 27
+#endif
+
+#if !defined(__NR_oldfstat)
+#define __NR_oldfstat 28
+#endif
+
+#if !defined(__NR_pause)
+#define __NR_pause 29
+#endif
+
+#if !defined(__NR_utime)
+#define __NR_utime 30
+#endif
+
+#if !defined(__NR_stty)
+#define __NR_stty 31
+#endif
+
+#if !defined(__NR_gtty)
+#define __NR_gtty 32
+#endif
+
+#if !defined(__NR_access)
+#define __NR_access 33
+#endif
+
+#if !defined(__NR_nice)
+#define __NR_nice 34
+#endif
+
+#if !defined(__NR_ftime)
+#define __NR_ftime 35
+#endif
+
+#if !defined(__NR_sync)
+#define __NR_sync 36
+#endif
+
+#if !defined(__NR_kill)
+#define __NR_kill 37
+#endif
+
+#if !defined(__NR_rename)
+#define __NR_rename 38
+#endif
+
+#if !defined(__NR_mkdir)
+#define __NR_mkdir 39
+#endif
+
+#if !defined(__NR_rmdir)
+#define __NR_rmdir 40
+#endif
+
+#if !defined(__NR_dup)
+#define __NR_dup 41
+#endif
+
+#if !defined(__NR_pipe)
+#define __NR_pipe 42
+#endif
+
+#if !defined(__NR_times)
+#define __NR_times 43
+#endif
+
+#if !defined(__NR_prof)
+#define __NR_prof 44
+#endif
+
+#if !defined(__NR_brk)
+#define __NR_brk 45
+#endif
+
+#if !defined(__NR_setgid)
+#define __NR_setgid 46
+#endif
+
+#if !defined(__NR_getgid)
+#define __NR_getgid 47
+#endif
+
+#if !defined(__NR_signal)
+#define __NR_signal 48
+#endif
+
+#if !defined(__NR_geteuid)
+#define __NR_geteuid 49
+#endif
+
+#if !defined(__NR_getegid)
+#define __NR_getegid 50
+#endif
+
+#if !defined(__NR_acct)
+#define __NR_acct 51
+#endif
+
+#if !defined(__NR_umount2)
+#define __NR_umount2 52
+#endif
+
+#if !defined(__NR_lock)
+#define __NR_lock 53
+#endif
+
+#if !defined(__NR_ioctl)
+#define __NR_ioctl 54
+#endif
+
+#if !defined(__NR_fcntl)
+#define __NR_fcntl 55
+#endif
+
+#if !defined(__NR_mpx)
+#define __NR_mpx 56
+#endif
+
+#if !defined(__NR_setpgid)
+#define __NR_setpgid 57
+#endif
+
+#if !defined(__NR_ulimit)
+#define __NR_ulimit 58
+#endif
+
+#if !defined(__NR_oldolduname)
+#define __NR_oldolduname 59
+#endif
+
+#if !defined(__NR_umask)
+#define __NR_umask 60
+#endif
+
+#if !defined(__NR_chroot)
+#define __NR_chroot 61
+#endif
+
+#if !defined(__NR_ustat)
+#define __NR_ustat 62
+#endif
+
+#if !defined(__NR_dup2)
+#define __NR_dup2 63
+#endif
+
+#if !defined(__NR_getppid)
+#define __NR_getppid 64
+#endif
+
+#if !defined(__NR_getpgrp)
+#define __NR_getpgrp 65
+#endif
+
+#if !defined(__NR_setsid)
+#define __NR_setsid 66
+#endif
+
+#if !defined(__NR_sigaction)
+#define __NR_sigaction 67
+#endif
+
+#if !defined(__NR_sgetmask)
+#define __NR_sgetmask 68
+#endif
+
+#if !defined(__NR_ssetmask)
+#define __NR_ssetmask 69
+#endif
+
+#if !defined(__NR_setreuid)
+#define __NR_setreuid 70
+#endif
+
+#if !defined(__NR_setregid)
+#define __NR_setregid 71
+#endif
+
+#if !defined(__NR_sigsuspend)
+#define __NR_sigsuspend 72
+#endif
+
+#if !defined(__NR_sigpending)
+#define __NR_sigpending 73
+#endif
+
+#if !defined(__NR_sethostname)
+#define __NR_sethostname 74
+#endif
+
+#if !defined(__NR_setrlimit)
+#define __NR_setrlimit 75
+#endif
+
+#if !defined(__NR_getrlimit)
+#define __NR_getrlimit 76
+#endif
+
+#if !defined(__NR_getrusage)
+#define __NR_getrusage 77
+#endif
+
+#if !defined(__NR_gettimeofday)
+#define __NR_gettimeofday 78
+#endif
+
+#if !defined(__NR_settimeofday)
+#define __NR_settimeofday 79
+#endif
+
+#if !defined(__NR_getgroups)
+#define __NR_getgroups 80
+#endif
+
+#if !defined(__NR_setgroups)
+#define __NR_setgroups 81
+#endif
+
+#if !defined(__NR_select)
+#define __NR_select 82
+#endif
+
+#if !defined(__NR_symlink)
+#define __NR_symlink 83
+#endif
+
+#if !defined(__NR_oldlstat)
+#define __NR_oldlstat 84
+#endif
+
+#if !defined(__NR_readlink)
+#define __NR_readlink 85
+#endif
+
+#if !defined(__NR_uselib)
+#define __NR_uselib 86
+#endif
+
+#if !defined(__NR_swapon)
+#define __NR_swapon 87
+#endif
+
+#if !defined(__NR_reboot)
+#define __NR_reboot 88
+#endif
+
+#if !defined(__NR_readdir)
+#define __NR_readdir 89
+#endif
+
+#if !defined(__NR_mmap)
+#define __NR_mmap 90
+#endif
+
+#if !defined(__NR_munmap)
+#define __NR_munmap 91
+#endif
+
+#if !defined(__NR_truncate)
+#define __NR_truncate 92
+#endif
+
+#if !defined(__NR_ftruncate)
+#define __NR_ftruncate 93
+#endif
+
+#if !defined(__NR_fchmod)
+#define __NR_fchmod 94
+#endif
+
+#if !defined(__NR_fchown)
+#define __NR_fchown 95
+#endif
+
+#if !defined(__NR_getpriority)
+#define __NR_getpriority 96
+#endif
+
+#if !defined(__NR_setpriority)
+#define __NR_setpriority 97
+#endif
+
+#if !defined(__NR_profil)
+#define __NR_profil 98
+#endif
+
+#if !defined(__NR_statfs)
+#define __NR_statfs 99
+#endif
+
+#if !defined(__NR_fstatfs)
+#define __NR_fstatfs 100
+#endif
+
+#if !defined(__NR_ioperm)
+#define __NR_ioperm 101
+#endif
+
+#if !defined(__NR_socketcall)
+#define __NR_socketcall 102
+#endif
+
+#if !defined(__NR_syslog)
+#define __NR_syslog 103
+#endif
+
+#if !defined(__NR_setitimer)
+#define __NR_setitimer 104
+#endif
+
+#if !defined(__NR_getitimer)
+#define __NR_getitimer 105
+#endif
+
+#if !defined(__NR_stat)
+#define __NR_stat 106
+#endif
+
+#if !defined(__NR_lstat)
+#define __NR_lstat 107
+#endif
+
+#if !defined(__NR_fstat)
+#define __NR_fstat 108
+#endif
+
+#if !defined(__NR_olduname)
+#define __NR_olduname 109
+#endif
+
+#if !defined(__NR_iopl)
+#define __NR_iopl 110
+#endif
+
+#if !defined(__NR_vhangup)
+#define __NR_vhangup 111
+#endif
+
+#if !defined(__NR_idle)
+#define __NR_idle 112
+#endif
+
+#if !defined(__NR_vm86old)
+#define __NR_vm86old 113
+#endif
+
+#if !defined(__NR_wait4)
+#define __NR_wait4 114
+#endif
+
+#if !defined(__NR_swapoff)
+#define __NR_swapoff 115
+#endif
+
+#if !defined(__NR_sysinfo)
+#define __NR_sysinfo 116
+#endif
+
+#if !defined(__NR_ipc)
+#define __NR_ipc 117
+#endif
+
+#if !defined(__NR_fsync)
+#define __NR_fsync 118
+#endif
+
+#if !defined(__NR_sigreturn)
+#define __NR_sigreturn 119
+#endif
+
+#if !defined(__NR_clone)
+#define __NR_clone 120
+#endif
+
+#if !defined(__NR_setdomainname)
+#define __NR_setdomainname 121
+#endif
+
+#if !defined(__NR_uname)
+#define __NR_uname 122
+#endif
+
+#if !defined(__NR_modify_ldt)
+#define __NR_modify_ldt 123
+#endif
+
+#if !defined(__NR_adjtimex)
+#define __NR_adjtimex 124
+#endif
+
+#if !defined(__NR_mprotect)
+#define __NR_mprotect 125
+#endif
+
+#if !defined(__NR_sigprocmask)
+#define __NR_sigprocmask 126
+#endif
+
+#if !defined(__NR_create_module)
+#define __NR_create_module 127
+#endif
+
+#if !defined(__NR_init_module)
+#define __NR_init_module 128
+#endif
+
+#if !defined(__NR_delete_module)
+#define __NR_delete_module 129
+#endif
+
+#if !defined(__NR_get_kernel_syms)
+#define __NR_get_kernel_syms 130
+#endif
+
+#if !defined(__NR_quotactl)
+#define __NR_quotactl 131
+#endif
+
+#if !defined(__NR_getpgid)
+#define __NR_getpgid 132
+#endif
+
+#if !defined(__NR_fchdir)
+#define __NR_fchdir 133
+#endif
+
+#if !defined(__NR_bdflush)
+#define __NR_bdflush 134
+#endif
+
+#if !defined(__NR_sysfs)
+#define __NR_sysfs 135
+#endif
+
+#if !defined(__NR_personality)
+#define __NR_personality 136
+#endif
+
+#if !defined(__NR_afs_syscall)
+#define __NR_afs_syscall 137
+#endif
+
+#if !defined(__NR_setfsuid)
+#define __NR_setfsuid 138
+#endif
+
+#if !defined(__NR_setfsgid)
+#define __NR_setfsgid 139
+#endif
+
+#if !defined(__NR__llseek)
+#define __NR__llseek 140
+#endif
+
+#if !defined(__NR_getdents)
+#define __NR_getdents 141
+#endif
+
+#if !defined(__NR__newselect)
+#define __NR__newselect 142
+#endif
+
+#if !defined(__NR_flock)
+#define __NR_flock 143
+#endif
+
+#if !defined(__NR_msync)
+#define __NR_msync 144
+#endif
+
+#if !defined(__NR_readv)
+#define __NR_readv 145
+#endif
+
+#if !defined(__NR_writev)
+#define __NR_writev 146
+#endif
+
+#if !defined(__NR_getsid)
+#define __NR_getsid 147
+#endif
+
+#if !defined(__NR_fdatasync)
+#define __NR_fdatasync 148
+#endif
+
+#if !defined(__NR__sysctl)
+#define __NR__sysctl 149
+#endif
+
+#if !defined(__NR_mlock)
+#define __NR_mlock 150
+#endif
+
+#if !defined(__NR_munlock)
+#define __NR_munlock 151
+#endif
+
+#if !defined(__NR_mlockall)
+#define __NR_mlockall 152
+#endif
+
+#if !defined(__NR_munlockall)
+#define __NR_munlockall 153
+#endif
+
+#if !defined(__NR_sched_setparam)
+#define __NR_sched_setparam 154
+#endif
+
+#if !defined(__NR_sched_getparam)
+#define __NR_sched_getparam 155
+#endif
+
+#if !defined(__NR_sched_setscheduler)
+#define __NR_sched_setscheduler 156
+#endif
+
+#if !defined(__NR_sched_getscheduler)
+#define __NR_sched_getscheduler 157
+#endif
+
+#if !defined(__NR_sched_yield)
+#define __NR_sched_yield 158
+#endif
+
+#if !defined(__NR_sched_get_priority_max)
+#define __NR_sched_get_priority_max 159
+#endif
+
+#if !defined(__NR_sched_get_priority_min)
+#define __NR_sched_get_priority_min 160
+#endif
+
+#if !defined(__NR_sched_rr_get_interval)
+#define __NR_sched_rr_get_interval 161
+#endif
+
+#if !defined(__NR_nanosleep)
+#define __NR_nanosleep 162
+#endif
+
+#if !defined(__NR_mremap)
+#define __NR_mremap 163
+#endif
+
+#if !defined(__NR_setresuid)
+#define __NR_setresuid 164
+#endif
+
+#if !defined(__NR_getresuid)
+#define __NR_getresuid 165
+#endif
+
+#if !defined(__NR_vm86)
+#define __NR_vm86 166
+#endif
+
+#if !defined(__NR_query_module)
+#define __NR_query_module 167
+#endif
+
+#if !defined(__NR_poll)
+#define __NR_poll 168
+#endif
+
+#if !defined(__NR_nfsservctl)
+#define __NR_nfsservctl 169
+#endif
+
+#if !defined(__NR_setresgid)
+#define __NR_setresgid 170
+#endif
+
+#if !defined(__NR_getresgid)
+#define __NR_getresgid 171
+#endif
+
+#if !defined(__NR_prctl)
+#define __NR_prctl 172
+#endif
+
+#if !defined(__NR_rt_sigreturn)
+#define __NR_rt_sigreturn 173
+#endif
+
+#if !defined(__NR_rt_sigaction)
+#define __NR_rt_sigaction 174
+#endif
+
+#if !defined(__NR_rt_sigprocmask)
+#define __NR_rt_sigprocmask 175
+#endif
+
+#if !defined(__NR_rt_sigpending)
+#define __NR_rt_sigpending 176
+#endif
+
+#if !defined(__NR_rt_sigtimedwait)
+#define __NR_rt_sigtimedwait 177
+#endif
+
+#if !defined(__NR_rt_sigqueueinfo)
+#define __NR_rt_sigqueueinfo 178
+#endif
+
+#if !defined(__NR_rt_sigsuspend)
+#define __NR_rt_sigsuspend 179
+#endif
+
+#if !defined(__NR_pread64)
+#define __NR_pread64 180
+#endif
+
+#if !defined(__NR_pwrite64)
+#define __NR_pwrite64 181
+#endif
+
+#if !defined(__NR_chown)
+#define __NR_chown 182
+#endif
+
+#if !defined(__NR_getcwd)
+#define __NR_getcwd 183
+#endif
+
+#if !defined(__NR_capget)
+#define __NR_capget 184
+#endif
+
+#if !defined(__NR_capset)
+#define __NR_capset 185
+#endif
+
+#if !defined(__NR_sigaltstack)
+#define __NR_sigaltstack 186
+#endif
+
+#if !defined(__NR_sendfile)
+#define __NR_sendfile 187
+#endif
+
+#if !defined(__NR_getpmsg)
+#define __NR_getpmsg 188
+#endif
+
+#if !defined(__NR_putpmsg)
+#define __NR_putpmsg 189
+#endif
+
+#if !defined(__NR_vfork)
+#define __NR_vfork 190
+#endif
+
+#if !defined(__NR_ugetrlimit)
+#define __NR_ugetrlimit 191
+#endif
+
+#if !defined(__NR_mmap2)
+#define __NR_mmap2 192
+#endif
+
+#if !defined(__NR_truncate64)
+#define __NR_truncate64 193
+#endif
+
+#if !defined(__NR_ftruncate64)
+#define __NR_ftruncate64 194
+#endif
+
+#if !defined(__NR_stat64)
+#define __NR_stat64 195
+#endif
+
+#if !defined(__NR_lstat64)
+#define __NR_lstat64 196
+#endif
+
+#if !defined(__NR_fstat64)
+#define __NR_fstat64 197
+#endif
+
+#if !defined(__NR_lchown32)
+#define __NR_lchown32 198
+#endif
+
+#if !defined(__NR_getuid32)
+#define __NR_getuid32 199
+#endif
+
+#if !defined(__NR_getgid32)
+#define __NR_getgid32 200
+#endif
+
+#if !defined(__NR_geteuid32)
+#define __NR_geteuid32 201
+#endif
+
+#if !defined(__NR_getegid32)
+#define __NR_getegid32 202
+#endif
+
+#if !defined(__NR_setreuid32)
+#define __NR_setreuid32 203
+#endif
+
+#if !defined(__NR_setregid32)
+#define __NR_setregid32 204
+#endif
+
+#if !defined(__NR_getgroups32)
+#define __NR_getgroups32 205
+#endif
+
+#if !defined(__NR_setgroups32)
+#define __NR_setgroups32 206
+#endif
+
+#if !defined(__NR_fchown32)
+#define __NR_fchown32 207
+#endif
+
+#if !defined(__NR_setresuid32)
+#define __NR_setresuid32 208
+#endif
+
+#if !defined(__NR_getresuid32)
+#define __NR_getresuid32 209
+#endif
+
+#if !defined(__NR_setresgid32)
+#define __NR_setresgid32 210
+#endif
+
+#if !defined(__NR_getresgid32)
+#define __NR_getresgid32 211
+#endif
+
+#if !defined(__NR_chown32)
+#define __NR_chown32 212
+#endif
+
+#if !defined(__NR_setuid32)
+#define __NR_setuid32 213
+#endif
+
+#if !defined(__NR_setgid32)
+#define __NR_setgid32 214
+#endif
+
+#if !defined(__NR_setfsuid32)
+#define __NR_setfsuid32 215
+#endif
+
+#if !defined(__NR_setfsgid32)
+#define __NR_setfsgid32 216
+#endif
+
+#if !defined(__NR_pivot_root)
+#define __NR_pivot_root 217
+#endif
+
+#if !defined(__NR_mincore)
+#define __NR_mincore 218
+#endif
+
+#if !defined(__NR_madvise)
+#define __NR_madvise 219
+#endif
+
+#if !defined(__NR_getdents64)
+#define __NR_getdents64 220
+#endif
+
+#if !defined(__NR_fcntl64)
+#define __NR_fcntl64 221
+#endif
+
+#if !defined(__NR_gettid)
+#define __NR_gettid 224
+#endif
+
+#if !defined(__NR_readahead)
+#define __NR_readahead 225
+#endif
+
+#if !defined(__NR_setxattr)
+#define __NR_setxattr 226
+#endif
+
+#if !defined(__NR_lsetxattr)
+#define __NR_lsetxattr 227
+#endif
+
+#if !defined(__NR_fsetxattr)
+#define __NR_fsetxattr 228
+#endif
+
+#if !defined(__NR_getxattr)
+#define __NR_getxattr 229
+#endif
+
+#if !defined(__NR_lgetxattr)
+#define __NR_lgetxattr 230
+#endif
+
+#if !defined(__NR_fgetxattr)
+#define __NR_fgetxattr 231
+#endif
+
+#if !defined(__NR_listxattr)
+#define __NR_listxattr 232
+#endif
+
+#if !defined(__NR_llistxattr)
+#define __NR_llistxattr 233
+#endif
+
+#if !defined(__NR_flistxattr)
+#define __NR_flistxattr 234
+#endif
+
+#if !defined(__NR_removexattr)
+#define __NR_removexattr 235
+#endif
+
+#if !defined(__NR_lremovexattr)
+#define __NR_lremovexattr 236
+#endif
+
+#if !defined(__NR_fremovexattr)
+#define __NR_fremovexattr 237
+#endif
+
+#if !defined(__NR_tkill)
+#define __NR_tkill 238
+#endif
+
+#if !defined(__NR_sendfile64)
+#define __NR_sendfile64 239
+#endif
+
+#if !defined(__NR_futex)
+#define __NR_futex 240
+#endif
+
+#if !defined(__NR_sched_setaffinity)
+#define __NR_sched_setaffinity 241
+#endif
+
+#if !defined(__NR_sched_getaffinity)
+#define __NR_sched_getaffinity 242
+#endif
+
+#if !defined(__NR_set_thread_area)
+#define __NR_set_thread_area 243
+#endif
+
+#if !defined(__NR_get_thread_area)
+#define __NR_get_thread_area 244
+#endif
+
+#if !defined(__NR_io_setup)
+#define __NR_io_setup 245
+#endif
+
+#if !defined(__NR_io_destroy)
+#define __NR_io_destroy 246
+#endif
+
+#if !defined(__NR_io_getevents)
+#define __NR_io_getevents 247
+#endif
+
+#if !defined(__NR_io_submit)
+#define __NR_io_submit 248
+#endif
+
+#if !defined(__NR_io_cancel)
+#define __NR_io_cancel 249
+#endif
+
+#if !defined(__NR_fadvise64)
+#define __NR_fadvise64 250
+#endif
+
+#if !defined(__NR_exit_group)
+#define __NR_exit_group 252
+#endif
+
+#if !defined(__NR_lookup_dcookie)
+#define __NR_lookup_dcookie 253
+#endif
+
+#if !defined(__NR_epoll_create)
+#define __NR_epoll_create 254
+#endif
+
+#if !defined(__NR_epoll_ctl)
+#define __NR_epoll_ctl 255
+#endif
+
+#if !defined(__NR_epoll_wait)
+#define __NR_epoll_wait 256
+#endif
+
+#if !defined(__NR_remap_file_pages)
+#define __NR_remap_file_pages 257
+#endif
+
+#if !defined(__NR_set_tid_address)
+#define __NR_set_tid_address 258
+#endif
+
+#if !defined(__NR_timer_create)
+#define __NR_timer_create 259
+#endif
+
+#if !defined(__NR_timer_settime)
+#define __NR_timer_settime 260
+#endif
+
+#if !defined(__NR_timer_gettime)
+#define __NR_timer_gettime 261
+#endif
+
+#if !defined(__NR_timer_getoverrun)
+#define __NR_timer_getoverrun 262
+#endif
+
+#if !defined(__NR_timer_delete)
+#define __NR_timer_delete 263
+#endif
+
+#if !defined(__NR_clock_settime)
+#define __NR_clock_settime 264
+#endif
+
+#if !defined(__NR_clock_gettime)
+#define __NR_clock_gettime 265
+#endif
+
+#if !defined(__NR_clock_getres)
+#define __NR_clock_getres 266
+#endif
+
+#if !defined(__NR_clock_nanosleep)
+#define __NR_clock_nanosleep 267
+#endif
+
+#if !defined(__NR_statfs64)
+#define __NR_statfs64 268
+#endif
+
+#if !defined(__NR_fstatfs64)
+#define __NR_fstatfs64 269
+#endif
+
+#if !defined(__NR_tgkill)
+#define __NR_tgkill 270
+#endif
+
+#if !defined(__NR_utimes)
+#define __NR_utimes 271
+#endif
+
+#if !defined(__NR_fadvise64_64)
+#define __NR_fadvise64_64 272
+#endif
+
+#if !defined(__NR_vserver)
+#define __NR_vserver 273
+#endif
+
+#if !defined(__NR_mbind)
+#define __NR_mbind 274
+#endif
+
+#if !defined(__NR_get_mempolicy)
+#define __NR_get_mempolicy 275
+#endif
+
+#if !defined(__NR_set_mempolicy)
+#define __NR_set_mempolicy 276
+#endif
+
+#if !defined(__NR_mq_open)
+#define __NR_mq_open 277
+#endif
+
+#if !defined(__NR_mq_unlink)
+#define __NR_mq_unlink 278
+#endif
+
+#if !defined(__NR_mq_timedsend)
+#define __NR_mq_timedsend 279
+#endif
+
+#if !defined(__NR_mq_timedreceive)
+#define __NR_mq_timedreceive 280
+#endif
+
+#if !defined(__NR_mq_notify)
+#define __NR_mq_notify 281
+#endif
+
+#if !defined(__NR_mq_getsetattr)
+#define __NR_mq_getsetattr 282
+#endif
+
+#if !defined(__NR_kexec_load)
+#define __NR_kexec_load 283
+#endif
+
+#if !defined(__NR_waitid)
+#define __NR_waitid 284
+#endif
+
+#if !defined(__NR_add_key)
+#define __NR_add_key 286
+#endif
+
+#if !defined(__NR_request_key)
+#define __NR_request_key 287
+#endif
+
+#if !defined(__NR_keyctl)
+#define __NR_keyctl 288
+#endif
+
+#if !defined(__NR_ioprio_set)
+#define __NR_ioprio_set 289
+#endif
+
+#if !defined(__NR_ioprio_get)
+#define __NR_ioprio_get 290
+#endif
+
+#if !defined(__NR_inotify_init)
+#define __NR_inotify_init 291
+#endif
+
+#if !defined(__NR_inotify_add_watch)
+#define __NR_inotify_add_watch 292
+#endif
+
+#if !defined(__NR_inotify_rm_watch)
+#define __NR_inotify_rm_watch 293
+#endif
+
+#if !defined(__NR_migrate_pages)
+#define __NR_migrate_pages 294
+#endif
+
+#if !defined(__NR_openat)
+#define __NR_openat 295
+#endif
+
+#if !defined(__NR_mkdirat)
+#define __NR_mkdirat 296
+#endif
+
+#if !defined(__NR_mknodat)
+#define __NR_mknodat 297
+#endif
+
+#if !defined(__NR_fchownat)
+#define __NR_fchownat 298
+#endif
+
+#if !defined(__NR_futimesat)
+#define __NR_futimesat 299
+#endif
+
+#if !defined(__NR_fstatat64)
+#define __NR_fstatat64 300
+#endif
+
+#if !defined(__NR_unlinkat)
+#define __NR_unlinkat 301
+#endif
+
+#if !defined(__NR_renameat)
+#define __NR_renameat 302
+#endif
+
+#if !defined(__NR_linkat)
+#define __NR_linkat 303
+#endif
+
+#if !defined(__NR_symlinkat)
+#define __NR_symlinkat 304
+#endif
+
+#if !defined(__NR_readlinkat)
+#define __NR_readlinkat 305
+#endif
+
+#if !defined(__NR_fchmodat)
+#define __NR_fchmodat 306
+#endif
+
+#if !defined(__NR_faccessat)
+#define __NR_faccessat 307
+#endif
+
+#if !defined(__NR_pselect6)
+#define __NR_pselect6 308
+#endif
+
+#if !defined(__NR_ppoll)
+#define __NR_ppoll 309
+#endif
+
+#if !defined(__NR_unshare)
+#define __NR_unshare 310
+#endif
+
+#if !defined(__NR_set_robust_list)
+#define __NR_set_robust_list 311
+#endif
+
+#if !defined(__NR_get_robust_list)
+#define __NR_get_robust_list 312
+#endif
+
+#if !defined(__NR_splice)
+#define __NR_splice 313
+#endif
+
+#if !defined(__NR_sync_file_range)
+#define __NR_sync_file_range 314
+#endif
+
+#if !defined(__NR_tee)
+#define __NR_tee 315
+#endif
+
+#if !defined(__NR_vmsplice)
+#define __NR_vmsplice 316
+#endif
+
+#if !defined(__NR_move_pages)
+#define __NR_move_pages 317
+#endif
+
+#if !defined(__NR_getcpu)
+#define __NR_getcpu 318
+#endif
+
+#if !defined(__NR_epoll_pwait)
+#define __NR_epoll_pwait 319
+#endif
+
+#if !defined(__NR_utimensat)
+#define __NR_utimensat 320
+#endif
+
+#if !defined(__NR_signalfd)
+#define __NR_signalfd 321
+#endif
+
+#if !defined(__NR_timerfd_create)
+#define __NR_timerfd_create 322
+#endif
+
+#if !defined(__NR_eventfd)
+#define __NR_eventfd 323
+#endif
+
+#if !defined(__NR_fallocate)
+#define __NR_fallocate 324
+#endif
+
+#if !defined(__NR_timerfd_settime)
+#define __NR_timerfd_settime 325
+#endif
+
+#if !defined(__NR_timerfd_gettime)
+#define __NR_timerfd_gettime 326
+#endif
+
+#if !defined(__NR_signalfd4)
+#define __NR_signalfd4 327
+#endif
+
+#if !defined(__NR_eventfd2)
+#define __NR_eventfd2 328
+#endif
+
+#if !defined(__NR_epoll_create1)
+#define __NR_epoll_create1 329
+#endif
+
+#if !defined(__NR_dup3)
+#define __NR_dup3 330
+#endif
+
+#if !defined(__NR_pipe2)
+#define __NR_pipe2 331
+#endif
+
+#if !defined(__NR_inotify_init1)
+#define __NR_inotify_init1 332
+#endif
+
+#if !defined(__NR_preadv)
+#define __NR_preadv 333
+#endif
+
+#if !defined(__NR_pwritev)
+#define __NR_pwritev 334
+#endif
+
+#if !defined(__NR_rt_tgsigqueueinfo)
+#define __NR_rt_tgsigqueueinfo 335
+#endif
+
+#if !defined(__NR_perf_event_open)
+#define __NR_perf_event_open 336
+#endif
+
+#if !defined(__NR_recvmmsg)
+#define __NR_recvmmsg 337
+#endif
+
+#if !defined(__NR_fanotify_init)
+#define __NR_fanotify_init 338
+#endif
+
+#if !defined(__NR_fanotify_mark)
+#define __NR_fanotify_mark 339
+#endif
+
+#if !defined(__NR_prlimit64)
+#define __NR_prlimit64 340
+#endif
+
+#if !defined(__NR_name_to_handle_at)
+#define __NR_name_to_handle_at 341
+#endif
+
+#if !defined(__NR_open_by_handle_at)
+#define __NR_open_by_handle_at 342
+#endif
+
+#if !defined(__NR_clock_adjtime)
+#define __NR_clock_adjtime 343
+#endif
+
+#if !defined(__NR_syncfs)
+#define __NR_syncfs 344
+#endif
+
+#if !defined(__NR_sendmmsg)
+#define __NR_sendmmsg 345
+#endif
+
+#if !defined(__NR_setns)
+#define __NR_setns 346
+#endif
+
+#if !defined(__NR_process_vm_readv)
+#define __NR_process_vm_readv 347
+#endif
+
+#if !defined(__NR_process_vm_writev)
+#define __NR_process_vm_writev 348
+#endif
+
+#if !defined(__NR_kcmp)
+#define __NR_kcmp 349
+#endif
+
+#endif // SANDBOX_LINUX_SERVICES_X86_32_LINUX_SYSCALLS_H_
+
diff --git a/sandbox/linux/services/x86_64_linux_syscalls.h b/sandbox/linux/services/x86_64_linux_syscalls.h
new file mode 100644
index 0000000..bd30c55
--- /dev/null
+++ b/sandbox/linux/services/x86_64_linux_syscalls.h
@@ -0,0 +1,1266 @@
+// Copyright (c) 2012 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+// Generated from the Linux kernel's syscall_64.tbl.
+#ifndef SANDBOX_LINUX_SERVICES_X86_64_LINUX_SYSCALLS_H_
+#define SANDBOX_LINUX_SERVICES_X86_64_LINUX_SYSCALLS_H_
+
+#if !defined(__x86_64__)
+#error "Including header on wrong architecture"
+#endif
+
+#if !defined(__NR_read)
+#define __NR_read 0
+#endif
+
+#if !defined(__NR_write)
+#define __NR_write 1
+#endif
+
+#if !defined(__NR_open)
+#define __NR_open 2
+#endif
+
+#if !defined(__NR_close)
+#define __NR_close 3
+#endif
+
+#if !defined(__NR_stat)
+#define __NR_stat 4
+#endif
+
+#if !defined(__NR_fstat)
+#define __NR_fstat 5
+#endif
+
+#if !defined(__NR_lstat)
+#define __NR_lstat 6
+#endif
+
+#if !defined(__NR_poll)
+#define __NR_poll 7
+#endif
+
+#if !defined(__NR_lseek)
+#define __NR_lseek 8
+#endif
+
+#if !defined(__NR_mmap)
+#define __NR_mmap 9
+#endif
+
+#if !defined(__NR_mprotect)
+#define __NR_mprotect 10
+#endif
+
+#if !defined(__NR_munmap)
+#define __NR_munmap 11
+#endif
+
+#if !defined(__NR_brk)
+#define __NR_brk 12
+#endif
+
+#if !defined(__NR_rt_sigaction)
+#define __NR_rt_sigaction 13
+#endif
+
+#if !defined(__NR_rt_sigprocmask)
+#define __NR_rt_sigprocmask 14
+#endif
+
+#if !defined(__NR_rt_sigreturn)
+#define __NR_rt_sigreturn 15
+#endif
+
+#if !defined(__NR_ioctl)
+#define __NR_ioctl 16
+#endif
+
+#if !defined(__NR_pread64)
+#define __NR_pread64 17
+#endif
+
+#if !defined(__NR_pwrite64)
+#define __NR_pwrite64 18
+#endif
+
+#if !defined(__NR_readv)
+#define __NR_readv 19
+#endif
+
+#if !defined(__NR_writev)
+#define __NR_writev 20
+#endif
+
+#if !defined(__NR_access)
+#define __NR_access 21
+#endif
+
+#if !defined(__NR_pipe)
+#define __NR_pipe 22
+#endif
+
+#if !defined(__NR_select)
+#define __NR_select 23
+#endif
+
+#if !defined(__NR_sched_yield)
+#define __NR_sched_yield 24
+#endif
+
+#if !defined(__NR_mremap)
+#define __NR_mremap 25
+#endif
+
+#if !defined(__NR_msync)
+#define __NR_msync 26
+#endif
+
+#if !defined(__NR_mincore)
+#define __NR_mincore 27
+#endif
+
+#if !defined(__NR_madvise)
+#define __NR_madvise 28
+#endif
+
+#if !defined(__NR_shmget)
+#define __NR_shmget 29
+#endif
+
+#if !defined(__NR_shmat)
+#define __NR_shmat 30
+#endif
+
+#if !defined(__NR_shmctl)
+#define __NR_shmctl 31
+#endif
+
+#if !defined(__NR_dup)
+#define __NR_dup 32
+#endif
+
+#if !defined(__NR_dup2)
+#define __NR_dup2 33
+#endif
+
+#if !defined(__NR_pause)
+#define __NR_pause 34
+#endif
+
+#if !defined(__NR_nanosleep)
+#define __NR_nanosleep 35
+#endif
+
+#if !defined(__NR_getitimer)
+#define __NR_getitimer 36
+#endif
+
+#if !defined(__NR_alarm)
+#define __NR_alarm 37
+#endif
+
+#if !defined(__NR_setitimer)
+#define __NR_setitimer 38
+#endif
+
+#if !defined(__NR_getpid)
+#define __NR_getpid 39
+#endif
+
+#if !defined(__NR_sendfile)
+#define __NR_sendfile 40
+#endif
+
+#if !defined(__NR_socket)
+#define __NR_socket 41
+#endif
+
+#if !defined(__NR_connect)
+#define __NR_connect 42
+#endif
+
+#if !defined(__NR_accept)
+#define __NR_accept 43
+#endif
+
+#if !defined(__NR_sendto)
+#define __NR_sendto 44
+#endif
+
+#if !defined(__NR_recvfrom)
+#define __NR_recvfrom 45
+#endif
+
+#if !defined(__NR_sendmsg)
+#define __NR_sendmsg 46
+#endif
+
+#if !defined(__NR_recvmsg)
+#define __NR_recvmsg 47
+#endif
+
+#if !defined(__NR_shutdown)
+#define __NR_shutdown 48
+#endif
+
+#if !defined(__NR_bind)
+#define __NR_bind 49
+#endif
+
+#if !defined(__NR_listen)
+#define __NR_listen 50
+#endif
+
+#if !defined(__NR_getsockname)
+#define __NR_getsockname 51
+#endif
+
+#if !defined(__NR_getpeername)
+#define __NR_getpeername 52
+#endif
+
+#if !defined(__NR_socketpair)
+#define __NR_socketpair 53
+#endif
+
+#if !defined(__NR_setsockopt)
+#define __NR_setsockopt 54
+#endif
+
+#if !defined(__NR_getsockopt)
+#define __NR_getsockopt 55
+#endif
+
+#if !defined(__NR_clone)
+#define __NR_clone 56
+#endif
+
+#if !defined(__NR_fork)
+#define __NR_fork 57
+#endif
+
+#if !defined(__NR_vfork)
+#define __NR_vfork 58
+#endif
+
+#if !defined(__NR_execve)
+#define __NR_execve 59
+#endif
+
+#if !defined(__NR_exit)
+#define __NR_exit 60
+#endif
+
+#if !defined(__NR_wait4)
+#define __NR_wait4 61
+#endif
+
+#if !defined(__NR_kill)
+#define __NR_kill 62
+#endif
+
+#if !defined(__NR_uname)
+#define __NR_uname 63
+#endif
+
+#if !defined(__NR_semget)
+#define __NR_semget 64
+#endif
+
+#if !defined(__NR_semop)
+#define __NR_semop 65
+#endif
+
+#if !defined(__NR_semctl)
+#define __NR_semctl 66
+#endif
+
+#if !defined(__NR_shmdt)
+#define __NR_shmdt 67
+#endif
+
+#if !defined(__NR_msgget)
+#define __NR_msgget 68
+#endif
+
+#if !defined(__NR_msgsnd)
+#define __NR_msgsnd 69
+#endif
+
+#if !defined(__NR_msgrcv)
+#define __NR_msgrcv 70
+#endif
+
+#if !defined(__NR_msgctl)
+#define __NR_msgctl 71
+#endif
+
+#if !defined(__NR_fcntl)
+#define __NR_fcntl 72
+#endif
+
+#if !defined(__NR_flock)
+#define __NR_flock 73
+#endif
+
+#if !defined(__NR_fsync)
+#define __NR_fsync 74
+#endif
+
+#if !defined(__NR_fdatasync)
+#define __NR_fdatasync 75
+#endif
+
+#if !defined(__NR_truncate)
+#define __NR_truncate 76
+#endif
+
+#if !defined(__NR_ftruncate)
+#define __NR_ftruncate 77
+#endif
+
+#if !defined(__NR_getdents)
+#define __NR_getdents 78
+#endif
+
+#if !defined(__NR_getcwd)
+#define __NR_getcwd 79
+#endif
+
+#if !defined(__NR_chdir)
+#define __NR_chdir 80
+#endif
+
+#if !defined(__NR_fchdir)
+#define __NR_fchdir 81
+#endif
+
+#if !defined(__NR_rename)
+#define __NR_rename 82
+#endif
+
+#if !defined(__NR_mkdir)
+#define __NR_mkdir 83
+#endif
+
+#if !defined(__NR_rmdir)
+#define __NR_rmdir 84
+#endif
+
+#if !defined(__NR_creat)
+#define __NR_creat 85
+#endif
+
+#if !defined(__NR_link)
+#define __NR_link 86
+#endif
+
+#if !defined(__NR_unlink)
+#define __NR_unlink 87
+#endif
+
+#if !defined(__NR_symlink)
+#define __NR_symlink 88
+#endif
+
+#if !defined(__NR_readlink)
+#define __NR_readlink 89
+#endif
+
+#if !defined(__NR_chmod)
+#define __NR_chmod 90
+#endif
+
+#if !defined(__NR_fchmod)
+#define __NR_fchmod 91
+#endif
+
+#if !defined(__NR_chown)
+#define __NR_chown 92
+#endif
+
+#if !defined(__NR_fchown)
+#define __NR_fchown 93
+#endif
+
+#if !defined(__NR_lchown)
+#define __NR_lchown 94
+#endif
+
+#if !defined(__NR_umask)
+#define __NR_umask 95
+#endif
+
+#if !defined(__NR_gettimeofday)
+#define __NR_gettimeofday 96
+#endif
+
+#if !defined(__NR_getrlimit)
+#define __NR_getrlimit 97
+#endif
+
+#if !defined(__NR_getrusage)
+#define __NR_getrusage 98
+#endif
+
+#if !defined(__NR_sysinfo)
+#define __NR_sysinfo 99
+#endif
+
+#if !defined(__NR_times)
+#define __NR_times 100
+#endif
+
+#if !defined(__NR_ptrace)
+#define __NR_ptrace 101
+#endif
+
+#if !defined(__NR_getuid)
+#define __NR_getuid 102
+#endif
+
+#if !defined(__NR_syslog)
+#define __NR_syslog 103
+#endif
+
+#if !defined(__NR_getgid)
+#define __NR_getgid 104
+#endif
+
+#if !defined(__NR_setuid)
+#define __NR_setuid 105
+#endif
+
+#if !defined(__NR_setgid)
+#define __NR_setgid 106
+#endif
+
+#if !defined(__NR_geteuid)
+#define __NR_geteuid 107
+#endif
+
+#if !defined(__NR_getegid)
+#define __NR_getegid 108
+#endif
+
+#if !defined(__NR_setpgid)
+#define __NR_setpgid 109
+#endif
+
+#if !defined(__NR_getppid)
+#define __NR_getppid 110
+#endif
+
+#if !defined(__NR_getpgrp)
+#define __NR_getpgrp 111
+#endif
+
+#if !defined(__NR_setsid)
+#define __NR_setsid 112
+#endif
+
+#if !defined(__NR_setreuid)
+#define __NR_setreuid 113
+#endif
+
+#if !defined(__NR_setregid)
+#define __NR_setregid 114
+#endif
+
+#if !defined(__NR_getgroups)
+#define __NR_getgroups 115
+#endif
+
+#if !defined(__NR_setgroups)
+#define __NR_setgroups 116
+#endif
+
+#if !defined(__NR_setresuid)
+#define __NR_setresuid 117
+#endif
+
+#if !defined(__NR_getresuid)
+#define __NR_getresuid 118
+#endif
+
+#if !defined(__NR_setresgid)
+#define __NR_setresgid 119
+#endif
+
+#if !defined(__NR_getresgid)
+#define __NR_getresgid 120
+#endif
+
+#if !defined(__NR_getpgid)
+#define __NR_getpgid 121
+#endif
+
+#if !defined(__NR_setfsuid)
+#define __NR_setfsuid 122
+#endif
+
+#if !defined(__NR_setfsgid)
+#define __NR_setfsgid 123
+#endif
+
+#if !defined(__NR_getsid)
+#define __NR_getsid 124
+#endif
+
+#if !defined(__NR_capget)
+#define __NR_capget 125
+#endif
+
+#if !defined(__NR_capset)
+#define __NR_capset 126
+#endif
+
+#if !defined(__NR_rt_sigpending)
+#define __NR_rt_sigpending 127
+#endif
+
+#if !defined(__NR_rt_sigtimedwait)
+#define __NR_rt_sigtimedwait 128
+#endif
+
+#if !defined(__NR_rt_sigqueueinfo)
+#define __NR_rt_sigqueueinfo 129
+#endif
+
+#if !defined(__NR_rt_sigsuspend)
+#define __NR_rt_sigsuspend 130
+#endif
+
+#if !defined(__NR_sigaltstack)
+#define __NR_sigaltstack 131
+#endif
+
+#if !defined(__NR_utime)
+#define __NR_utime 132
+#endif
+
+#if !defined(__NR_mknod)
+#define __NR_mknod 133
+#endif
+
+#if !defined(__NR_uselib)
+#define __NR_uselib 134
+#endif
+
+#if !defined(__NR_personality)
+#define __NR_personality 135
+#endif
+
+#if !defined(__NR_ustat)
+#define __NR_ustat 136
+#endif
+
+#if !defined(__NR_statfs)
+#define __NR_statfs 137
+#endif
+
+#if !defined(__NR_fstatfs)
+#define __NR_fstatfs 138
+#endif
+
+#if !defined(__NR_sysfs)
+#define __NR_sysfs 139
+#endif
+
+#if !defined(__NR_getpriority)
+#define __NR_getpriority 140
+#endif
+
+#if !defined(__NR_setpriority)
+#define __NR_setpriority 141
+#endif
+
+#if !defined(__NR_sched_setparam)
+#define __NR_sched_setparam 142
+#endif
+
+#if !defined(__NR_sched_getparam)
+#define __NR_sched_getparam 143
+#endif
+
+#if !defined(__NR_sched_setscheduler)
+#define __NR_sched_setscheduler 144
+#endif
+
+#if !defined(__NR_sched_getscheduler)
+#define __NR_sched_getscheduler 145
+#endif
+
+#if !defined(__NR_sched_get_priority_max)
+#define __NR_sched_get_priority_max 146
+#endif
+
+#if !defined(__NR_sched_get_priority_min)
+#define __NR_sched_get_priority_min 147
+#endif
+
+#if !defined(__NR_sched_rr_get_interval)
+#define __NR_sched_rr_get_interval 148
+#endif
+
+#if !defined(__NR_mlock)
+#define __NR_mlock 149
+#endif
+
+#if !defined(__NR_munlock)
+#define __NR_munlock 150
+#endif
+
+#if !defined(__NR_mlockall)
+#define __NR_mlockall 151
+#endif
+
+#if !defined(__NR_munlockall)
+#define __NR_munlockall 152
+#endif
+
+#if !defined(__NR_vhangup)
+#define __NR_vhangup 153
+#endif
+
+#if !defined(__NR_modify_ldt)
+#define __NR_modify_ldt 154
+#endif
+
+#if !defined(__NR_pivot_root)
+#define __NR_pivot_root 155
+#endif
+
+#if !defined(__NR__sysctl)
+#define __NR__sysctl 156
+#endif
+
+#if !defined(__NR_prctl)
+#define __NR_prctl 157
+#endif
+
+#if !defined(__NR_arch_prctl)
+#define __NR_arch_prctl 158
+#endif
+
+#if !defined(__NR_adjtimex)
+#define __NR_adjtimex 159
+#endif
+
+#if !defined(__NR_setrlimit)
+#define __NR_setrlimit 160
+#endif
+
+#if !defined(__NR_chroot)
+#define __NR_chroot 161
+#endif
+
+#if !defined(__NR_sync)
+#define __NR_sync 162
+#endif
+
+#if !defined(__NR_acct)
+#define __NR_acct 163
+#endif
+
+#if !defined(__NR_settimeofday)
+#define __NR_settimeofday 164
+#endif
+
+#if !defined(__NR_mount)
+#define __NR_mount 165
+#endif
+
+#if !defined(__NR_umount2)
+#define __NR_umount2 166
+#endif
+
+#if !defined(__NR_swapon)
+#define __NR_swapon 167
+#endif
+
+#if !defined(__NR_swapoff)
+#define __NR_swapoff 168
+#endif
+
+#if !defined(__NR_reboot)
+#define __NR_reboot 169
+#endif
+
+#if !defined(__NR_sethostname)
+#define __NR_sethostname 170
+#endif
+
+#if !defined(__NR_setdomainname)
+#define __NR_setdomainname 171
+#endif
+
+#if !defined(__NR_iopl)
+#define __NR_iopl 172
+#endif
+
+#if !defined(__NR_ioperm)
+#define __NR_ioperm 173
+#endif
+
+#if !defined(__NR_create_module)
+#define __NR_create_module 174
+#endif
+
+#if !defined(__NR_init_module)
+#define __NR_init_module 175
+#endif
+
+#if !defined(__NR_delete_module)
+#define __NR_delete_module 176
+#endif
+
+#if !defined(__NR_get_kernel_syms)
+#define __NR_get_kernel_syms 177
+#endif
+
+#if !defined(__NR_query_module)
+#define __NR_query_module 178
+#endif
+
+#if !defined(__NR_quotactl)
+#define __NR_quotactl 179
+#endif
+
+#if !defined(__NR_nfsservctl)
+#define __NR_nfsservctl 180
+#endif
+
+#if !defined(__NR_getpmsg)
+#define __NR_getpmsg 181
+#endif
+
+#if !defined(__NR_putpmsg)
+#define __NR_putpmsg 182
+#endif
+
+#if !defined(__NR_afs_syscall)
+#define __NR_afs_syscall 183
+#endif
+
+#if !defined(__NR_tuxcall)
+#define __NR_tuxcall 184
+#endif
+
+#if !defined(__NR_security)
+#define __NR_security 185
+#endif
+
+#if !defined(__NR_gettid)
+#define __NR_gettid 186
+#endif
+
+#if !defined(__NR_readahead)
+#define __NR_readahead 187
+#endif
+
+#if !defined(__NR_setxattr)
+#define __NR_setxattr 188
+#endif
+
+#if !defined(__NR_lsetxattr)
+#define __NR_lsetxattr 189
+#endif
+
+#if !defined(__NR_fsetxattr)
+#define __NR_fsetxattr 190
+#endif
+
+#if !defined(__NR_getxattr)
+#define __NR_getxattr 191
+#endif
+
+#if !defined(__NR_lgetxattr)
+#define __NR_lgetxattr 192
+#endif
+
+#if !defined(__NR_fgetxattr)
+#define __NR_fgetxattr 193
+#endif
+
+#if !defined(__NR_listxattr)
+#define __NR_listxattr 194
+#endif
+
+#if !defined(__NR_llistxattr)
+#define __NR_llistxattr 195
+#endif
+
+#if !defined(__NR_flistxattr)
+#define __NR_flistxattr 196
+#endif
+
+#if !defined(__NR_removexattr)
+#define __NR_removexattr 197
+#endif
+
+#if !defined(__NR_lremovexattr)
+#define __NR_lremovexattr 198
+#endif
+
+#if !defined(__NR_fremovexattr)
+#define __NR_fremovexattr 199
+#endif
+
+#if !defined(__NR_tkill)
+#define __NR_tkill 200
+#endif
+
+#if !defined(__NR_time)
+#define __NR_time 201
+#endif
+
+#if !defined(__NR_futex)
+#define __NR_futex 202
+#endif
+
+#if !defined(__NR_sched_setaffinity)
+#define __NR_sched_setaffinity 203
+#endif
+
+#if !defined(__NR_sched_getaffinity)
+#define __NR_sched_getaffinity 204
+#endif
+
+#if !defined(__NR_set_thread_area)
+#define __NR_set_thread_area 205
+#endif
+
+#if !defined(__NR_io_setup)
+#define __NR_io_setup 206
+#endif
+
+#if !defined(__NR_io_destroy)
+#define __NR_io_destroy 207
+#endif
+
+#if !defined(__NR_io_getevents)
+#define __NR_io_getevents 208
+#endif
+
+#if !defined(__NR_io_submit)
+#define __NR_io_submit 209
+#endif
+
+#if !defined(__NR_io_cancel)
+#define __NR_io_cancel 210
+#endif
+
+#if !defined(__NR_get_thread_area)
+#define __NR_get_thread_area 211
+#endif
+
+#if !defined(__NR_lookup_dcookie)
+#define __NR_lookup_dcookie 212
+#endif
+
+#if !defined(__NR_epoll_create)
+#define __NR_epoll_create 213
+#endif
+
+#if !defined(__NR_epoll_ctl_old)
+#define __NR_epoll_ctl_old 214
+#endif
+
+#if !defined(__NR_epoll_wait_old)
+#define __NR_epoll_wait_old 215
+#endif
+
+#if !defined(__NR_remap_file_pages)
+#define __NR_remap_file_pages 216
+#endif
+
+#if !defined(__NR_getdents64)
+#define __NR_getdents64 217
+#endif
+
+#if !defined(__NR_set_tid_address)
+#define __NR_set_tid_address 218
+#endif
+
+#if !defined(__NR_restart_syscall)
+#define __NR_restart_syscall 219
+#endif
+
+#if !defined(__NR_semtimedop)
+#define __NR_semtimedop 220
+#endif
+
+#if !defined(__NR_fadvise64)
+#define __NR_fadvise64 221
+#endif
+
+#if !defined(__NR_timer_create)
+#define __NR_timer_create 222
+#endif
+
+#if !defined(__NR_timer_settime)
+#define __NR_timer_settime 223
+#endif
+
+#if !defined(__NR_timer_gettime)
+#define __NR_timer_gettime 224
+#endif
+
+#if !defined(__NR_timer_getoverrun)
+#define __NR_timer_getoverrun 225
+#endif
+
+#if !defined(__NR_timer_delete)
+#define __NR_timer_delete 226
+#endif
+
+#if !defined(__NR_clock_settime)
+#define __NR_clock_settime 227
+#endif
+
+#if !defined(__NR_clock_gettime)
+#define __NR_clock_gettime 228
+#endif
+
+#if !defined(__NR_clock_getres)
+#define __NR_clock_getres 229
+#endif
+
+#if !defined(__NR_clock_nanosleep)
+#define __NR_clock_nanosleep 230
+#endif
+
+#if !defined(__NR_exit_group)
+#define __NR_exit_group 231
+#endif
+
+#if !defined(__NR_epoll_wait)
+#define __NR_epoll_wait 232
+#endif
+
+#if !defined(__NR_epoll_ctl)
+#define __NR_epoll_ctl 233
+#endif
+
+#if !defined(__NR_tgkill)
+#define __NR_tgkill 234
+#endif
+
+#if !defined(__NR_utimes)
+#define __NR_utimes 235
+#endif
+
+#if !defined(__NR_vserver)
+#define __NR_vserver 236
+#endif
+
+#if !defined(__NR_mbind)
+#define __NR_mbind 237
+#endif
+
+#if !defined(__NR_set_mempolicy)
+#define __NR_set_mempolicy 238
+#endif
+
+#if !defined(__NR_get_mempolicy)
+#define __NR_get_mempolicy 239
+#endif
+
+#if !defined(__NR_mq_open)
+#define __NR_mq_open 240
+#endif
+
+#if !defined(__NR_mq_unlink)
+#define __NR_mq_unlink 241
+#endif
+
+#if !defined(__NR_mq_timedsend)
+#define __NR_mq_timedsend 242
+#endif
+
+#if !defined(__NR_mq_timedreceive)
+#define __NR_mq_timedreceive 243
+#endif
+
+#if !defined(__NR_mq_notify)
+#define __NR_mq_notify 244
+#endif
+
+#if !defined(__NR_mq_getsetattr)
+#define __NR_mq_getsetattr 245
+#endif
+
+#if !defined(__NR_kexec_load)
+#define __NR_kexec_load 246
+#endif
+
+#if !defined(__NR_waitid)
+#define __NR_waitid 247
+#endif
+
+#if !defined(__NR_add_key)
+#define __NR_add_key 248
+#endif
+
+#if !defined(__NR_request_key)
+#define __NR_request_key 249
+#endif
+
+#if !defined(__NR_keyctl)
+#define __NR_keyctl 250
+#endif
+
+#if !defined(__NR_ioprio_set)
+#define __NR_ioprio_set 251
+#endif
+
+#if !defined(__NR_ioprio_get)
+#define __NR_ioprio_get 252
+#endif
+
+#if !defined(__NR_inotify_init)
+#define __NR_inotify_init 253
+#endif
+
+#if !defined(__NR_inotify_add_watch)
+#define __NR_inotify_add_watch 254
+#endif
+
+#if !defined(__NR_inotify_rm_watch)
+#define __NR_inotify_rm_watch 255
+#endif
+
+#if !defined(__NR_migrate_pages)
+#define __NR_migrate_pages 256
+#endif
+
+#if !defined(__NR_openat)
+#define __NR_openat 257
+#endif
+
+#if !defined(__NR_mkdirat)
+#define __NR_mkdirat 258
+#endif
+
+#if !defined(__NR_mknodat)
+#define __NR_mknodat 259
+#endif
+
+#if !defined(__NR_fchownat)
+#define __NR_fchownat 260
+#endif
+
+#if !defined(__NR_futimesat)
+#define __NR_futimesat 261
+#endif
+
+#if !defined(__NR_newfstatat)
+#define __NR_newfstatat 262
+#endif
+
+#if !defined(__NR_unlinkat)
+#define __NR_unlinkat 263
+#endif
+
+#if !defined(__NR_renameat)
+#define __NR_renameat 264
+#endif
+
+#if !defined(__NR_linkat)
+#define __NR_linkat 265
+#endif
+
+#if !defined(__NR_symlinkat)
+#define __NR_symlinkat 266
+#endif
+
+#if !defined(__NR_readlinkat)
+#define __NR_readlinkat 267
+#endif
+
+#if !defined(__NR_fchmodat)
+#define __NR_fchmodat 268
+#endif
+
+#if !defined(__NR_faccessat)
+#define __NR_faccessat 269
+#endif
+
+#if !defined(__NR_pselect6)
+#define __NR_pselect6 270
+#endif
+
+#if !defined(__NR_ppoll)
+#define __NR_ppoll 271
+#endif
+
+#if !defined(__NR_unshare)
+#define __NR_unshare 272
+#endif
+
+#if !defined(__NR_set_robust_list)
+#define __NR_set_robust_list 273
+#endif
+
+#if !defined(__NR_get_robust_list)
+#define __NR_get_robust_list 274
+#endif
+
+#if !defined(__NR_splice)
+#define __NR_splice 275
+#endif
+
+#if !defined(__NR_tee)
+#define __NR_tee 276
+#endif
+
+#if !defined(__NR_sync_file_range)
+#define __NR_sync_file_range 277
+#endif
+
+#if !defined(__NR_vmsplice)
+#define __NR_vmsplice 278
+#endif
+
+#if !defined(__NR_move_pages)
+#define __NR_move_pages 279
+#endif
+
+#if !defined(__NR_utimensat)
+#define __NR_utimensat 280
+#endif
+
+#if !defined(__NR_epoll_pwait)
+#define __NR_epoll_pwait 281
+#endif
+
+#if !defined(__NR_signalfd)
+#define __NR_signalfd 282
+#endif
+
+#if !defined(__NR_timerfd_create)
+#define __NR_timerfd_create 283
+#endif
+
+#if !defined(__NR_eventfd)
+#define __NR_eventfd 284
+#endif
+
+#if !defined(__NR_fallocate)
+#define __NR_fallocate 285
+#endif
+
+#if !defined(__NR_timerfd_settime)
+#define __NR_timerfd_settime 286
+#endif
+
+#if !defined(__NR_timerfd_gettime)
+#define __NR_timerfd_gettime 287
+#endif
+
+#if !defined(__NR_accept4)
+#define __NR_accept4 288
+#endif
+
+#if !defined(__NR_signalfd4)
+#define __NR_signalfd4 289
+#endif
+
+#if !defined(__NR_eventfd2)
+#define __NR_eventfd2 290
+#endif
+
+#if !defined(__NR_epoll_create1)
+#define __NR_epoll_create1 291
+#endif
+
+#if !defined(__NR_dup3)
+#define __NR_dup3 292
+#endif
+
+#if !defined(__NR_pipe2)
+#define __NR_pipe2 293
+#endif
+
+#if !defined(__NR_inotify_init1)
+#define __NR_inotify_init1 294
+#endif
+
+#if !defined(__NR_preadv)
+#define __NR_preadv 295
+#endif
+
+#if !defined(__NR_pwritev)
+#define __NR_pwritev 296
+#endif
+
+#if !defined(__NR_rt_tgsigqueueinfo)
+#define __NR_rt_tgsigqueueinfo 297
+#endif
+
+#if !defined(__NR_perf_event_open)
+#define __NR_perf_event_open 298
+#endif
+
+#if !defined(__NR_recvmmsg)
+#define __NR_recvmmsg 299
+#endif
+
+#if !defined(__NR_fanotify_init)
+#define __NR_fanotify_init 300
+#endif
+
+#if !defined(__NR_fanotify_mark)
+#define __NR_fanotify_mark 301
+#endif
+
+#if !defined(__NR_prlimit64)
+#define __NR_prlimit64 302
+#endif
+
+#if !defined(__NR_name_to_handle_at)
+#define __NR_name_to_handle_at 303
+#endif
+
+#if !defined(__NR_open_by_handle_at)
+#define __NR_open_by_handle_at 304
+#endif
+
+#if !defined(__NR_clock_adjtime)
+#define __NR_clock_adjtime 305
+#endif
+
+#if !defined(__NR_syncfs)
+#define __NR_syncfs 306
+#endif
+
+#if !defined(__NR_sendmmsg)
+#define __NR_sendmmsg 307
+#endif
+
+#if !defined(__NR_setns)
+#define __NR_setns 308
+#endif
+
+#if !defined(__NR_getcpu)
+#define __NR_getcpu 309
+#endif
+
+#if !defined(__NR_process_vm_readv)
+#define __NR_process_vm_readv 310
+#endif
+
+#if !defined(__NR_process_vm_writev)
+#define __NR_process_vm_writev 311
+#endif
+
+#if !defined(__NR_kcmp)
+#define __NR_kcmp 312
+#endif
+
+#endif // SANDBOX_LINUX_SERVICES_X86_64_LINUX_SYSCALLS_H_
+
diff --git a/sandbox/linux/suid/client/setuid_sandbox_client.cc b/sandbox/linux/suid/client/setuid_sandbox_client.cc
new file mode 100644
index 0000000..45d700b
--- /dev/null
+++ b/sandbox/linux/suid/client/setuid_sandbox_client.cc
@@ -0,0 +1,179 @@
+// Copyright (c) 2012 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include <sys/types.h>
+#include <sys/wait.h>
+#include <unistd.h>
+
+#include "base/eintr_wrapper.h"
+#include "base/environment.h"
+#include "base/logging.h"
+#include "base/memory/scoped_ptr.h"
+#include "base/string_number_conversions.h"
+
+#include "sandbox/linux/suid/common/sandbox.h"
+#include "sandbox/linux/suid/common/suid_unsafe_environment_variables.h"
+#include "setuid_sandbox_client.h"
+
+namespace {
+
+// Set an environment variable that reflects the API version we expect from the
+// setuid sandbox. Old versions of the sandbox will ignore this.
+void SetSandboxAPIEnvironmentVariable(base::Environment* env) {
+ env->SetVar(sandbox::kSandboxEnvironmentApiRequest,
+ base::IntToString(sandbox::kSUIDSandboxApiNumber));
+}
+
+// Wrapper around a shared C function.
+// Returns the "saved" environment variable name corresponding to |envvar|
+// in a new string or NULL.
+std::string* CreateSavedVariableName(const char* env_var) {
+ char* const saved_env_var = SandboxSavedEnvironmentVariable(env_var);
+ if (!saved_env_var)
+ return NULL;
+ std::string* saved_env_var_copy = new std::string(saved_env_var);
+ // SandboxSavedEnvironmentVariable is the C function that we wrap and uses
+ // malloc() to allocate memory.
+ free(saved_env_var);
+ return saved_env_var_copy;
+}
+
+// The ELF loader will clear many environment variables so we save them to
+// different names here so that the SUID sandbox can resolve them for the
+// renderer.
+void SaveSUIDUnsafeEnvironmentVariables(base::Environment* env) {
+ for (unsigned i = 0; kSUIDUnsafeEnvironmentVariables[i]; ++i) {
+ const char* env_var = kSUIDUnsafeEnvironmentVariables[i];
+ // Get the saved environment variable corresponding to envvar.
+ scoped_ptr<std::string> saved_env_var(CreateSavedVariableName(env_var));
+ if (saved_env_var == NULL)
+ continue;
+
+ std::string value;
+ if (env->GetVar(env_var, &value))
+ env->SetVar(saved_env_var->c_str(), value);
+ else
+ env->UnSetVar(saved_env_var->c_str());
+ }
+}
+
+int GetHelperApi(base::Environment* env) {
+ std::string api_string;
+ int api_number = 0; // Assume API version 0 if no environment was found.
+ if (env->GetVar(sandbox::kSandboxEnvironmentApiProvides, &api_string) &&
+ !base::StringToInt(api_string, &api_number)) {
+ // It's an error if we could not convert the API number.
+ api_number = -1;
+ }
+ return api_number;
+}
+
+// Convert |var_name| from the environment |env| to an int.
+// Return -1 if the variable does not exist or the value cannot be converted.
+int EnvToInt(base::Environment* env, const char* var_name) {
+ std::string var_string;
+ int var_value = -1;
+ if (env->GetVar(var_name, &var_string) &&
+ !base::StringToInt(var_string, &var_value)) {
+ var_value = -1;
+ }
+ return var_value;
+}
+
+pid_t GetHelperPID(base::Environment* env) {
+ return EnvToInt(env, sandbox::kSandboxHelperPidEnvironmentVarName);
+}
+
+// Get the IPC file descriptor used to communicate with the setuid helper.
+int GetIPCDescriptor(base::Environment* env) {
+ return EnvToInt(env, sandbox::kSandboxDescriptorEnvironmentVarName);
+}
+
+} // namespace
+
+namespace sandbox {
+
+SetuidSandboxClient* SetuidSandboxClient::Create() {
+ base::Environment* environment(base::Environment::Create());
+ SetuidSandboxClient* sandbox_client(new(SetuidSandboxClient));
+
+ CHECK(environment);
+ sandbox_client->env_ = environment;
+ return sandbox_client;
+}
+
+SetuidSandboxClient::SetuidSandboxClient()
+ : env_(NULL),
+ sandboxed_(false) {
+}
+
+SetuidSandboxClient::~SetuidSandboxClient() {
+ delete env_;
+}
+
+bool SetuidSandboxClient::ChrootMe() {
+ int ipc_fd = GetIPCDescriptor(env_);
+
+ if (ipc_fd < 0) {
+ LOG(ERROR) << "Failed to obtain the sandbox IPC descriptor";
+ return false;
+ }
+
+ if (HANDLE_EINTR(write(ipc_fd, &kMsgChrootMe, 1)) != 1) {
+ PLOG(ERROR) << "Failed to write to chroot pipe";
+ return false;
+ }
+
+ // We need to reap the chroot helper process in any event.
+ pid_t helper_pid = GetHelperPID(env_);
+ // If helper_pid is -1 we wait for any child.
+ if (waitpid(helper_pid, NULL, 0) < 0) {
+ PLOG(ERROR) << "Failed to wait for setuid helper to die";
+ return false;
+ }
+
+ char reply;
+ if (HANDLE_EINTR(read(ipc_fd, &reply, 1)) != 1) {
+ PLOG(ERROR) << "Failed to read from chroot pipe";
+ return false;
+ }
+
+ if (reply != kMsgChrootSuccessful) {
+ LOG(ERROR) << "Error code reply from chroot helper";
+ return false;
+ }
+
+ // We now consider ourselves "fully sandboxed" as far as the
+ // setuid sandbox is concerned.
+ sandboxed_ = true;
+ return true;
+}
+
+bool SetuidSandboxClient::IsSuidSandboxUpToDate() const {
+ return GetHelperApi(env_) == kSUIDSandboxApiNumber;
+}
+
+bool SetuidSandboxClient::IsSuidSandboxChild() const {
+ return GetIPCDescriptor(env_) >= 0;
+}
+
+bool SetuidSandboxClient::IsInNewPIDNamespace() const {
+ return env_->HasVar(kSandboxPIDNSEnvironmentVarName);
+}
+
+bool SetuidSandboxClient::IsInNewNETNamespace() const {
+ return env_->HasVar(kSandboxNETNSEnvironmentVarName);
+}
+
+bool SetuidSandboxClient::IsSandboxed() const {
+ return sandboxed_;
+}
+
+void SetuidSandboxClient::SetupLaunchEnvironment() {
+ SaveSUIDUnsafeEnvironmentVariables(env_);
+ SetSandboxAPIEnvironmentVariable(env_);
+}
+
+} // namespace sandbox
+
diff --git a/sandbox/linux/suid/client/setuid_sandbox_client.h b/sandbox/linux/suid/client/setuid_sandbox_client.h
new file mode 100644
index 0000000..a9f6536
--- /dev/null
+++ b/sandbox/linux/suid/client/setuid_sandbox_client.h
@@ -0,0 +1,59 @@
+// Copyright (c) 2012 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#ifndef SANDBOX_LINUX_SUID_SETUID_SANDBOX_CLIENT_H_
+#define SANDBOX_LINUX_SUID_SETUID_SANDBOX_CLIENT_H_
+
+#include "base/basictypes.h"
+
+namespace base { class Environment; }
+
+namespace sandbox {
+
+// Helper class to use the setuid sandbox. This class is to be used both
+// before launching the setuid helper and after being executed through the
+// setuid helper.
+//
+// A typical use would be:
+// 1. The browser calls SetupLaunchEnvironment()
+// 2. The browser launches a renderer through the setuid sandbox.
+// 3. The renderer requests being chroot-ed through ChrootMe() and
+// requests other sandboxing status via the status functions.
+class SetuidSandboxClient {
+ public:
+ // All instantation should go through this factory method.
+ static class SetuidSandboxClient* Create();
+ ~SetuidSandboxClient();
+
+ // Ask the setuid helper over the setuid sandbox IPC channel to chroot() us
+ // to an empty directory.
+ // Will only work if we have been launched through the setuid helper.
+ bool ChrootMe();
+
+ // Did we get launched through an up to date setuid binary ?
+ bool IsSuidSandboxUpToDate() const;
+ // Did we get launched through the setuid helper ?
+ bool IsSuidSandboxChild() const;
+ // Did the setuid helper create a new PID namespace ?
+ bool IsInNewPIDNamespace() const;
+ // Did the setuid helper create a new network namespace ?
+ bool IsInNewNETNamespace() const;
+ // Are we done and fully sandboxed ?
+ bool IsSandboxed() const;
+
+ // Set-up the environment. This should be done prior to launching the setuid
+ // helper.
+ void SetupLaunchEnvironment();
+
+ private:
+ // Holds the environment. Will never be NULL.
+ base::Environment* env_;
+ bool sandboxed_;
+ DISALLOW_IMPLICIT_CONSTRUCTORS(SetuidSandboxClient);
+};
+
+} // namespace sandbox
+
+#endif // SANDBOX_LINUX_SUID_SETUID_SANDBOX_CLIENT_H_
+
diff --git a/sandbox/linux/suid/client/setuid_sandbox_client_unittest.cc b/sandbox/linux/suid/client/setuid_sandbox_client_unittest.cc
new file mode 100644
index 0000000..293f423
--- /dev/null
+++ b/sandbox/linux/suid/client/setuid_sandbox_client_unittest.cc
@@ -0,0 +1,94 @@
+// Copyright (c) 2012 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "base/environment.h"
+#include "base/logging.h"
+#include "base/memory/scoped_ptr.h"
+#include "base/string_number_conversions.h"
+#include "testing/gtest/include/gtest/gtest.h"
+
+#include "sandbox/linux/suid/common/sandbox.h"
+#include "setuid_sandbox_client.h"
+
+namespace sandbox {
+
+TEST(SetuidSandboxClient, SetupLaunchEnvironment) {
+ const char kTestValue[] = "This is a test";
+ scoped_ptr<base::Environment> env(base::Environment::Create());
+ EXPECT_TRUE(env != NULL);
+
+ std::string saved_ld_preload;
+ bool environment_had_ld_preload;
+ // First, back-up the real LD_PRELOAD if any.
+ environment_had_ld_preload = env->GetVar("LD_PRELOAD", &saved_ld_preload);
+ // Setup environment variables to save or not save.
+ EXPECT_TRUE(env->SetVar("LD_PRELOAD", kTestValue));
+ EXPECT_TRUE(env->UnSetVar("LD_ORIGIN_PATH"));
+
+ scoped_ptr<SetuidSandboxClient>
+ sandbox_client(SetuidSandboxClient::Create());
+ EXPECT_TRUE(sandbox_client != NULL);
+
+ // Make sure the environment is clean.
+ EXPECT_TRUE(env->UnSetVar(kSandboxEnvironmentApiRequest));
+ EXPECT_TRUE(env->UnSetVar(kSandboxEnvironmentApiProvides));
+
+ sandbox_client->SetupLaunchEnvironment();
+
+ // Check if the requested API environment was set.
+ std::string api_request;
+ EXPECT_TRUE(env->GetVar(kSandboxEnvironmentApiRequest, &api_request));
+ int api_request_num;
+ EXPECT_TRUE(base::StringToInt(api_request, &api_request_num));
+ EXPECT_EQ(api_request_num, kSUIDSandboxApiNumber);
+
+ // Now check if LD_PRELOAD was saved to SANDBOX_LD_PRELOAD.
+ std::string sandbox_ld_preload;
+ EXPECT_TRUE(env->GetVar("SANDBOX_LD_PRELOAD", &sandbox_ld_preload));
+ EXPECT_EQ(sandbox_ld_preload, kTestValue);
+
+ // Check that LD_ORIGIN_PATH was not saved.
+ EXPECT_FALSE(env->HasVar("SANDBOX_LD_ORIGIN_PATH"));
+
+ // We should not forget to restore LD_PRELOAD at the end, or this environment
+ // variable will affect the next running tests!
+ if (environment_had_ld_preload) {
+ EXPECT_TRUE(env->SetVar("LD_PRELOAD", saved_ld_preload));
+ } else {
+ EXPECT_TRUE(env->UnSetVar("LD_PRELOAD"));
+ }
+}
+
+TEST(SetuidSandboxClient, SandboxedClientAPI) {
+ scoped_ptr<base::Environment> env(base::Environment::Create());
+ EXPECT_TRUE(env != NULL);
+
+ scoped_ptr<SetuidSandboxClient>
+ sandbox_client(SetuidSandboxClient::Create());
+ EXPECT_TRUE(sandbox_client != NULL);
+
+ // Set-up a fake environment as if we went through the setuid sandbox.
+ EXPECT_TRUE(env->SetVar(kSandboxEnvironmentApiProvides,
+ base::IntToString(kSUIDSandboxApiNumber)));
+ EXPECT_TRUE(env->SetVar(kSandboxDescriptorEnvironmentVarName, "1"));
+ EXPECT_TRUE(env->SetVar(kSandboxPIDNSEnvironmentVarName, "1"));
+ EXPECT_TRUE(env->UnSetVar(kSandboxNETNSEnvironmentVarName));
+
+ // Check the API.
+ EXPECT_TRUE(sandbox_client->IsSuidSandboxUpToDate());
+ EXPECT_TRUE(sandbox_client->IsSuidSandboxChild());
+ EXPECT_TRUE(sandbox_client->IsInNewPIDNamespace());
+ EXPECT_FALSE(sandbox_client->IsInNewNETNamespace());
+
+ // Forge an incorrect API version and check.
+ EXPECT_TRUE(env->SetVar(kSandboxEnvironmentApiProvides,
+ base::IntToString(kSUIDSandboxApiNumber + 1)));
+ EXPECT_FALSE(sandbox_client->IsSuidSandboxUpToDate());
+ // We didn't go through the actual sandboxing mechanism as it is
+ // very hard in a unit test.
+ EXPECT_FALSE(sandbox_client->IsSandboxed());
+}
+
+} // namespace sandbox
+
diff --git a/sandbox/linux/suid/common/sandbox.h b/sandbox/linux/suid/common/sandbox.h
new file mode 100644
index 0000000..aad4ff8
--- /dev/null
+++ b/sandbox/linux/suid/common/sandbox.h
@@ -0,0 +1,43 @@
+// Copyright (c) 2012 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#ifndef SANDBOX_LINUX_SUID_SANDBOX_H_
+#define SANDBOX_LINUX_SUID_SANDBOX_H_
+
+#if defined(__cplusplus)
+namespace sandbox {
+#endif
+
+// These are command line switches that may be used by other programs
+// (e.g. Chrome) to construct a command line for the sandbox.
+static const char kAdjustOOMScoreSwitch[] = "--adjust-oom-score";
+#if defined(OS_CHROMEOS)
+static const char kAdjustLowMemMarginSwitch[] = "--adjust-low-mem";
+#endif
+
+static const char kSandboxDescriptorEnvironmentVarName[] = "SBX_D";
+static const char kSandboxHelperPidEnvironmentVarName[] = "SBX_HELPER_PID";
+
+static const long kSUIDSandboxApiNumber = 1;
+static const char kSandboxEnvironmentApiRequest[] = "SBX_CHROME_API_RQ";
+static const char kSandboxEnvironmentApiProvides[] = "SBX_CHROME_API_PRV";
+
+// This number must be kept in sync with common/zygote_commands_linux.h
+static const int kZygoteIdFd = 7;
+
+// These are the magic byte values which the sandboxed process uses to request
+// that it be chrooted.
+static const char kMsgChrootMe = 'C';
+static const char kMsgChrootSuccessful = 'O';
+
+// These are set if we have respectively switched to a new PID or NET namespace
+// by going through the setuid binary helper.
+static const char kSandboxPIDNSEnvironmentVarName[] = "SBX_PID_NS";
+static const char kSandboxNETNSEnvironmentVarName[] = "SBX_NET_NS";
+
+#if defined(__cplusplus)
+} // namespace sandbox
+#endif
+
+#endif // SANDBOX_LINUX_SUID_SANDBOX_H_
diff --git a/sandbox/linux/suid/common/suid_unsafe_environment_variables.h b/sandbox/linux/suid/common/suid_unsafe_environment_variables.h
new file mode 100644
index 0000000..ee4db76
--- /dev/null
+++ b/sandbox/linux/suid/common/suid_unsafe_environment_variables.h
@@ -0,0 +1,80 @@
+// Copyright (c) 2012 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+// This is a list of environment variables which the ELF loader unsets when
+// loading a SUID binary. Because they are unset rather than just ignored, they
+// aren't passed to child processes of SUID processes either.
+//
+// We need to save these environment variables before running a SUID sandbox
+// and restore them before running child processes (but after dropping root).
+//
+// List gathered from glibc sources (00ebd7ed58df389a78e41dece058048725cb585e):
+// sysdeps/unix/sysv/linux/i386/dl-librecon.h
+// sysdeps/generic/unsecvars.h
+
+#ifndef SANDBOX_LINUX_SUID_SUID_UNSAFE_ENVIRONMENT_VARIABLES_H_
+#define SANDBOX_LINUX_SUID_SUID_UNSAFE_ENVIRONMENT_VARIABLES_H_
+
+#if defined(__cplusplus)
+#include <limits>
+#define SIZE_MAX std::numeric_limits<size_t>::max()
+#endif
+
+#include <stdlib.h> // malloc
+#include <string.h> // memcpy
+
+static const char* kSUIDUnsafeEnvironmentVariables[] = {
+ "LD_AOUT_LIBRARY_PATH",
+ "LD_AOUT_PRELOAD",
+ "GCONV_PATH",
+ "GETCONF_DIR",
+ "HOSTALIASES",
+ "LD_AUDIT",
+ "LD_DEBUG",
+ "LD_DEBUG_OUTPUT",
+ "LD_DYNAMIC_WEAK",
+ "LD_LIBRARY_PATH",
+ "LD_ORIGIN_PATH",
+ "LD_PRELOAD",
+ "LD_PROFILE",
+ "LD_SHOW_AUXV",
+ "LD_USE_LOAD_BIAS",
+ "LOCALDOMAIN",
+ "LOCPATH",
+ "MALLOC_TRACE",
+ "NIS_PATH",
+ "NLSPATH",
+ "RESOLV_HOST_CONF",
+ "RES_OPTIONS",
+ "TMPDIR",
+ "TZDIR",
+ NULL,
+};
+
+// Return a malloc allocated string containing the 'saved' environment variable
+// name for a given environment variable.
+static inline char* SandboxSavedEnvironmentVariable(const char* envvar) {
+ const size_t envvar_len = strlen(envvar);
+
+ if (envvar_len > SIZE_MAX - 1 -8)
+ return NULL;
+
+ const size_t saved_envvarlen = envvar_len + 1 /* NUL terminator */ +
+ 8 /* strlen("SANDBOX_") */;
+ char* const saved_envvar = (char*) malloc(saved_envvarlen);
+ if (!saved_envvar)
+ return NULL;
+
+ memcpy(saved_envvar, "SANDBOX_", 8);
+ memcpy(saved_envvar + 8, envvar, envvar_len);
+ saved_envvar[8 + envvar_len] = 0;
+
+ return saved_envvar;
+}
+
+#if defined(__cplusplus)
+#undef SIZE_MAX
+#endif
+
+#endif // SANDBOX_LINUX_SUID_SUID_UNSAFE_ENVIRONMENT_VARIABLES_H_
diff --git a/sandbox/linux/suid/linux_util.c b/sandbox/linux/suid/linux_util.c
new file mode 100644
index 0000000..c5af0d0
--- /dev/null
+++ b/sandbox/linux/suid/linux_util.c
@@ -0,0 +1,112 @@
+// Copyright (c) 2009 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+// The following is duplicated from base/linux_utils.cc.
+// We shouldn't link against C++ code in a setuid binary.
+
+#include "linux_util.h"
+
+#include <dirent.h>
+#include <limits.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+#include <unistd.h>
+
+// expected prefix of the target of the /proc/self/fd/%d link for a socket
+static const char kSocketLinkPrefix[] = "socket:[";
+
+// Parse a symlink in /proc/pid/fd/$x and return the inode number of the
+// socket.
+// inode_out: (output) set to the inode number on success
+// path: e.g. /proc/1234/fd/5 (must be a UNIX domain socket descriptor)
+static bool ProcPathGetInode(ino_t* inode_out, const char* path) {
+ char buf[256];
+ const ssize_t n = readlink(path, buf, sizeof(buf) - 1);
+ if (n == -1)
+ return false;
+ buf[n] = 0;
+
+ if (memcmp(kSocketLinkPrefix, buf, sizeof(kSocketLinkPrefix) - 1))
+ return false;
+
+ char *endptr;
+ const unsigned long long int inode_ul =
+ strtoull(buf + sizeof(kSocketLinkPrefix) - 1, &endptr, 10);
+ if (*endptr != ']')
+ return false;
+
+ if (inode_ul == ULLONG_MAX)
+ return false;
+
+ *inode_out = inode_ul;
+ return true;
+}
+
+bool FindProcessHoldingSocket(pid_t* pid_out, ino_t socket_inode) {
+ bool already_found = false;
+
+ DIR* proc = opendir("/proc");
+ if (!proc)
+ return false;
+
+ const uid_t uid = getuid();
+ struct dirent* dent;
+ while ((dent = readdir(proc))) {
+ char *endptr;
+ const unsigned long int pid_ul = strtoul(dent->d_name, &endptr, 10);
+ if (pid_ul == ULONG_MAX || *endptr)
+ continue;
+
+ // We have this setuid code here because the zygote and its children have
+ // /proc/$pid/fd owned by root. While scanning through /proc, we add this
+ // extra check so users cannot accidentally gain information about other
+ // users' processes. To determine process ownership, we use the property
+ // that if user foo owns process N, then /proc/N is owned by foo.
+ {
+ char buf[256];
+ struct stat statbuf;
+ snprintf(buf, sizeof(buf), "/proc/%lu", pid_ul);
+ if (stat(buf, &statbuf) < 0)
+ continue;
+ if (uid != statbuf.st_uid)
+ continue;
+ }
+
+ char buf[256];
+ snprintf(buf, sizeof(buf), "/proc/%lu/fd", pid_ul);
+ DIR* fd = opendir(buf);
+ if (!fd)
+ continue;
+
+ while ((dent = readdir(fd))) {
+ int printed = snprintf(buf, sizeof(buf), "/proc/%lu/fd/%s", pid_ul,
+ dent->d_name);
+ if (printed < 0 || printed >= (int)(sizeof(buf) - 1)) {
+ continue;
+ }
+
+ ino_t fd_inode;
+ if (ProcPathGetInode(&fd_inode, buf)) {
+ if (fd_inode == socket_inode) {
+ if (already_found) {
+ closedir(fd);
+ closedir(proc);
+ return false;
+ }
+
+ already_found = true;
+ *pid_out = pid_ul;
+ break;
+ }
+ }
+ }
+ closedir(fd);
+ }
+ closedir(proc);
+
+ return already_found;
+}
diff --git a/sandbox/linux/suid/linux_util.h b/sandbox/linux/suid/linux_util.h
new file mode 100644
index 0000000..d064252
--- /dev/null
+++ b/sandbox/linux/suid/linux_util.h
@@ -0,0 +1,21 @@
+// Copyright (c) 2012 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+// The following is duplicated from base/linux_utils.h.
+// We shouldn't link against C++ code in a setuid binary.
+
+#ifndef SANDBOX_LINUX_SUID_LINUX_UTIL_H_
+#define SANDBOX_LINUX_SUID_LINUX_UTIL_H_
+
+#include <stdbool.h>
+#include <sys/types.h>
+
+static const char kFindInodeSwitch[] = "--find-inode";
+static const char kSuidSandboxGetApiSwitch[] = "--get-api";
+
+// Find the process which holds the given socket, named by inode number. If
+// multiple processes hold the socket, this function returns false.
+bool FindProcessHoldingSocket(pid_t* pid_out, ino_t socket_inode);
+
+#endif // SANDBOX_LINUX_SUID_LINUX_UTIL_H_
diff --git a/sandbox/linux/suid/process_util.h b/sandbox/linux/suid/process_util.h
new file mode 100644
index 0000000..f6b4c31
--- /dev/null
+++ b/sandbox/linux/suid/process_util.h
@@ -0,0 +1,32 @@
+// Copyright (c) 2012 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+// The following is duplicated from base/process_utils.h.
+// We shouldn't link against C++ code in a setuid binary.
+
+#ifndef SANDBOX_LINUX_SUID_PROCESS_UTIL_H_
+#define SANDBOX_LINUX_SUID_PROCESS_UTIL_H_
+
+#include <stdbool.h>
+#include <sys/types.h>
+
+#include "base/base_export.h"
+
+// This adjusts /proc/process/oom_score_adj so the Linux OOM killer
+// will prefer certain process types over others. The range for the
+// adjustment is [-1000, 1000], with [0, 1000] being user accessible.
+//
+// If the Linux system isn't new enough to use oom_score_adj, then we
+// try to set the older oom_adj value instead, scaling the score to
+// the required range of [0, 15]. This may result in some aliasing of
+// values, of course.
+BASE_EXPORT bool AdjustOOMScore(pid_t process, int score);
+
+// This adjusts /sys/kernel/mm/chromeos-low_mem/margin so that
+// the kernel notifies us that we are low on memory when less than
+// |margin_mb| megabytes are available. Setting |margin_mb| to -1
+// turns off low memory notification.
+BASE_EXPORT bool AdjustLowMemoryMargin(int64_t margin_mb);
+
+#endif // SANDBOX_LINUX_SUID_PROCESS_UTIL_H_
diff --git a/sandbox/linux/suid/process_util_linux.c b/sandbox/linux/suid/process_util_linux.c
new file mode 100644
index 0000000..5e6b33b
--- /dev/null
+++ b/sandbox/linux/suid/process_util_linux.c
@@ -0,0 +1,106 @@
+// Copyright (c) 2012 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+// The following is the C version of code from base/process_utils_linux.cc.
+// We shouldn't link against C++ code in a setuid binary.
+
+#define _GNU_SOURCE // needed for O_DIRECTORY
+
+#include "process_util.h"
+
+#include <fcntl.h>
+#include <inttypes.h>
+#include <limits.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+#include <unistd.h>
+
+// Ranges for the current (oom_score_adj) and previous (oom_adj)
+// flavors of OOM score.
+static const int kMaxOomScore = 1000;
+static const int kMaxOldOomScore = 15;
+
+// Kernel pseudo-file that allows setting of the low memory margin.
+static const char kLowMemMarginFile[] =
+ "/sys/kernel/mm/chromeos-low_mem/margin";
+
+// NOTE: This is not the only version of this function in the source:
+// the base library (in process_util_linux.cc) also has its own C++ version.
+bool AdjustOOMScore(pid_t process, int score) {
+ if (score < 0 || score > kMaxOomScore)
+ return false;
+
+ char oom_adj[27]; // "/proc/" + log_10(2**64) + "\0"
+ // 6 + 20 + 1 = 27
+ snprintf(oom_adj, sizeof(oom_adj), "/proc/%" PRIdMAX, (intmax_t)process);
+
+ const int dirfd = open(oom_adj, O_RDONLY | O_DIRECTORY);
+ if (dirfd < 0)
+ return false;
+
+ struct stat statbuf;
+ if (fstat(dirfd, &statbuf) < 0) {
+ close(dirfd);
+ return false;
+ }
+ if (getuid() != statbuf.st_uid) {
+ close(dirfd);
+ return false;
+ }
+
+ int fd = openat(dirfd, "oom_score_adj", O_WRONLY);
+ if (fd < 0) {
+ // We failed to open oom_score_adj, so let's try for the older
+ // oom_adj file instead.
+ fd = openat(dirfd, "oom_adj", O_WRONLY);
+ if (fd < 0) {
+ // Nope, that doesn't work either.
+ return false;
+ } else {
+ // If we're using the old oom_adj file, the allowed range is now
+ // [0, kMaxOldOomScore], so we scale the score. This may result in some
+ // aliasing of values, of course.
+ score = score * kMaxOldOomScore / kMaxOomScore;
+ }
+ }
+ close(dirfd);
+
+ char buf[11]; // 0 <= |score| <= kMaxOomScore; using log_10(2**32) + 1 size
+ snprintf(buf, sizeof(buf), "%d", score);
+ size_t len = strlen(buf);
+
+ ssize_t bytes_written = write(fd, buf, len);
+ close(fd);
+ return (bytes_written == len);
+}
+
+bool AdjustLowMemoryMargin(int64_t margin_mb) {
+ int file_descriptor = open(kLowMemMarginFile, O_WRONLY);
+ if (file_descriptor < 0)
+ return false;
+
+ // Only allow those values which are reasonable, to prevent mischief.
+ char value[21];
+ switch (margin_mb) {
+ case -1L:
+ snprintf(value, sizeof(value), "off");
+ break;
+ case 0L:
+ case 25L:
+ case 50L:
+ case 100L:
+ case 200L:
+ snprintf(value, sizeof(value), "%lld", (long long int)margin_mb);
+ break;
+ default:
+ return false;
+ }
+
+ bool success = (write(file_descriptor, value, strlen(value)) >= 0);
+ close(file_descriptor);
+ return success;
+}
diff --git a/sandbox/linux/suid/sandbox.c b/sandbox/linux/suid/sandbox.c
new file mode 100644
index 0000000..32435a7
--- /dev/null
+++ b/sandbox/linux/suid/sandbox.c
@@ -0,0 +1,482 @@
+// Copyright (c) 2012 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+// http://code.google.com/p/chromium/wiki/LinuxSUIDSandbox
+
+#include "common/sandbox.h"
+
+#define _GNU_SOURCE
+#include <asm/unistd.h>
+#include <errno.h>
+#include <fcntl.h>
+#include <limits.h>
+#include <sched.h>
+#include <signal.h>
+#include <stdarg.h>
+#include <stdbool.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/prctl.h>
+#include <sys/resource.h>
+#include <sys/socket.h>
+#include <sys/stat.h>
+#include <sys/time.h>
+#include <sys/types.h>
+#include <sys/vfs.h>
+#include <sys/wait.h>
+#include <unistd.h>
+
+#include "linux_util.h"
+#include "process_util.h"
+#include "common/suid_unsafe_environment_variables.h"
+
+#if !defined(CLONE_NEWPID)
+#define CLONE_NEWPID 0x20000000
+#endif
+#if !defined(CLONE_NEWNET)
+#define CLONE_NEWNET 0x40000000
+#endif
+
+static bool DropRoot();
+
+#define HANDLE_EINTR(x) TEMP_FAILURE_RETRY(x)
+
+static void FatalError(const char *msg, ...)
+ __attribute__((noreturn, format(printf, 1, 2)));
+
+static void FatalError(const char *msg, ...) {
+ va_list ap;
+ va_start(ap, msg);
+
+ vfprintf(stderr, msg, ap);
+ fprintf(stderr, ": %s\n", strerror(errno));
+ fflush(stderr);
+ va_end(ap);
+ _exit(1);
+}
+
+// We will chroot() to the helper's /proc/self directory. Anything there will
+// not exist anymore if we make sure to wait() for the helper.
+//
+// /proc/self/fdinfo or /proc/self/fd are especially safe and will be empty
+// even if the helper survives as a zombie.
+//
+// There is very little reason to use fdinfo/ instead of fd/ but we are
+// paranoid. fdinfo/ only exists since 2.6.22 so we allow fallback to fd/
+#define SAFE_DIR "/proc/self/fdinfo"
+#define SAFE_DIR2 "/proc/self/fd"
+
+static bool SpawnChrootHelper() {
+ int sv[2];
+ if (socketpair(AF_UNIX, SOCK_STREAM, 0, sv) == -1) {
+ perror("socketpair");
+ return false;
+ }
+
+ char *safedir = NULL;
+ struct stat sdir_stat;
+ if (!stat(SAFE_DIR, &sdir_stat) && S_ISDIR(sdir_stat.st_mode))
+ safedir = SAFE_DIR;
+ else
+ if (!stat(SAFE_DIR2, &sdir_stat) && S_ISDIR(sdir_stat.st_mode))
+ safedir = SAFE_DIR2;
+ else {
+ fprintf(stderr, "Could not find %s\n", SAFE_DIR2);
+ return false;
+ }
+
+ const pid_t pid = syscall(
+ __NR_clone, CLONE_FS | SIGCHLD, 0, 0, 0);
+
+ if (pid == -1) {
+ perror("clone");
+ close(sv[0]);
+ close(sv[1]);
+ return false;
+ }
+
+ if (pid == 0) {
+ // We share our files structure with an untrusted process. As a security in
+ // depth measure, we make sure that we can't open anything by mistake.
+ // TODO(agl): drop CAP_SYS_RESOURCE / use SECURE_NOROOT
+
+ const struct rlimit nofile = {0, 0};
+ if (setrlimit(RLIMIT_NOFILE, &nofile))
+ FatalError("Setting RLIMIT_NOFILE");
+
+ if (close(sv[1]))
+ FatalError("close");
+
+ // wait for message
+ char msg;
+ ssize_t bytes;
+ do {
+ bytes = read(sv[0], &msg, 1);
+ } while (bytes == -1 && errno == EINTR);
+
+ if (bytes == 0)
+ _exit(0);
+ if (bytes != 1)
+ FatalError("read");
+
+ // do chrooting
+ if (msg != kMsgChrootMe)
+ FatalError("Unknown message from sandboxed process");
+
+ // sanity check
+ if (chdir(safedir))
+ FatalError("Cannot chdir into /proc/ directory");
+
+ if (chroot(safedir))
+ FatalError("Cannot chroot into /proc/ directory");
+
+ if (chdir("/"))
+ FatalError("Cannot chdir to / after chroot");
+
+ const char reply = kMsgChrootSuccessful;
+ do {
+ bytes = write(sv[0], &reply, 1);
+ } while (bytes == -1 && errno == EINTR);
+
+ if (bytes != 1)
+ FatalError("Writing reply");
+
+ _exit(0);
+ // We now become a zombie. /proc/self/fd(info) is now an empty dir and we
+ // are chrooted there.
+ // Our (unprivileged) parent should not even be able to open "." or "/"
+ // since they would need to pass the ptrace() check. If our parent wait()
+ // for us, our root directory will completely disappear.
+ }
+
+ if (close(sv[0])) {
+ close(sv[1]);
+ perror("close");
+ return false;
+ }
+
+ // In the parent process, we install an environment variable containing the
+ // number of the file descriptor.
+ char desc_str[64];
+ int printed = snprintf(desc_str, sizeof(desc_str), "%u", sv[1]);
+ if (printed < 0 || printed >= (int)sizeof(desc_str)) {
+ fprintf(stderr, "Failed to snprintf\n");
+ return false;
+ }
+
+ if (setenv(kSandboxDescriptorEnvironmentVarName, desc_str, 1)) {
+ perror("setenv");
+ close(sv[1]);
+ return false;
+ }
+
+ // We also install an environment variable containing the pid of the child
+ char helper_pid_str[64];
+ printed = snprintf(helper_pid_str, sizeof(helper_pid_str), "%u", pid);
+ if (printed < 0 || printed >= (int)sizeof(helper_pid_str)) {
+ fprintf(stderr, "Failed to snprintf\n");
+ return false;
+ }
+
+ if (setenv(kSandboxHelperPidEnvironmentVarName, helper_pid_str, 1)) {
+ perror("setenv");
+ close(sv[1]);
+ return false;
+ }
+
+ return true;
+}
+
+// Block until child_pid exits, then exit. Try to preserve the exit code.
+static void WaitForChildAndExit(pid_t child_pid) {
+ int exit_code = -1;
+ siginfo_t reaped_child_info;
+
+ int wait_ret =
+ HANDLE_EINTR(waitid(P_PID, child_pid, &reaped_child_info, WEXITED));
+
+ if (!wait_ret && reaped_child_info.si_pid == child_pid) {
+ if (reaped_child_info.si_code == CLD_EXITED) {
+ exit_code = reaped_child_info.si_status;
+ } else {
+ // Exit with code 0 if the child got signaled.
+ exit_code = 0;
+ }
+ }
+ _exit(exit_code);
+}
+
+static bool MoveToNewNamespaces() {
+ // These are the sets of flags which we'll try, in order.
+ const int kCloneExtraFlags[] = {
+ CLONE_NEWPID | CLONE_NEWNET,
+ CLONE_NEWPID,
+ };
+
+ // We need to close kZygoteIdFd before the child can continue. We use this
+ // socketpair to tell the child when to continue;
+ int sync_fds[2];
+ if (socketpair(AF_UNIX, SOCK_STREAM, 0, sync_fds)) {
+ FatalError("Failed to create a socketpair");
+ }
+
+ for (size_t i = 0;
+ i < sizeof(kCloneExtraFlags) / sizeof(kCloneExtraFlags[0]);
+ i++) {
+ pid_t pid = syscall(__NR_clone, SIGCHLD | kCloneExtraFlags[i], 0, 0, 0);
+
+ if (pid > 0) {
+ if (!DropRoot()) {
+ FatalError("Could not drop privileges");
+ } else {
+ if (close(sync_fds[0]) || shutdown(sync_fds[1], SHUT_RD))
+ FatalError("Could not close socketpair");
+ // The kZygoteIdFd needs to be closed in the parent before
+ // Zygote gets started.
+ if (close(kZygoteIdFd))
+ FatalError("close");
+ // Tell our child to continue
+ if (HANDLE_EINTR(send(sync_fds[1], "C", 1, MSG_NOSIGNAL)) != 1)
+ FatalError("send");
+ if (close(sync_fds[1]))
+ FatalError("close");
+ // We want to keep a full process tree and we don't want our childs to
+ // be reparented to (the outer PID namespace) init. So we wait for it.
+ WaitForChildAndExit(pid);
+ }
+ // NOTREACHED
+ FatalError("Not reached");
+ }
+
+ if (pid == 0) {
+ if (close(sync_fds[1]) || shutdown(sync_fds[0], SHUT_WR))
+ FatalError("Could not close socketpair");
+
+ // Wait for the parent to confirm it closed kZygoteIdFd before we
+ // continue
+ char should_continue;
+ if (HANDLE_EINTR(read(sync_fds[0], &should_continue, 1)) != 1)
+ FatalError("Read on socketpair");
+ if (close(sync_fds[0]))
+ FatalError("close");
+
+ if (kCloneExtraFlags[i] & CLONE_NEWPID) {
+ setenv(kSandboxPIDNSEnvironmentVarName, "", 1 /* overwrite */);
+ } else {
+ unsetenv(kSandboxPIDNSEnvironmentVarName);
+ }
+
+ if (kCloneExtraFlags[i] & CLONE_NEWNET) {
+ setenv(kSandboxNETNSEnvironmentVarName, "", 1 /* overwrite */);
+ } else {
+ unsetenv(kSandboxNETNSEnvironmentVarName);
+ }
+
+ break;
+ }
+
+ if (errno != EINVAL) {
+ perror("Failed to move to new PID namespace");
+ return false;
+ }
+ }
+
+ // If the system doesn't support NEWPID then we carry on anyway.
+ return true;
+}
+
+static bool DropRoot() {
+ if (prctl(PR_SET_DUMPABLE, 0, 0, 0, 0)) {
+ perror("prctl(PR_SET_DUMPABLE)");
+ return false;
+ }
+
+ if (prctl(PR_GET_DUMPABLE, 0, 0, 0, 0)) {
+ perror("Still dumpable after prctl(PR_SET_DUMPABLE)");
+ return false;
+ }
+
+ gid_t rgid, egid, sgid;
+ if (getresgid(&rgid, &egid, &sgid)) {
+ perror("getresgid");
+ return false;
+ }
+
+ if (setresgid(rgid, rgid, rgid)) {
+ perror("setresgid");
+ return false;
+ }
+
+ uid_t ruid, euid, suid;
+ if (getresuid(&ruid, &euid, &suid)) {
+ perror("getresuid");
+ return false;
+ }
+
+ if (setresuid(ruid, ruid, ruid)) {
+ perror("setresuid");
+ return false;
+ }
+
+ return true;
+}
+
+static bool SetupChildEnvironment() {
+ unsigned i;
+
+ // ld.so may have cleared several environment variables because we are SUID.
+ // However, the child process might need them so zygote_host_linux.cc saves a
+ // copy in SANDBOX_$x. This is safe because we have dropped root by this
+ // point, so we can only exec a binary with the permissions of the user who
+ // ran us in the first place.
+
+ for (i = 0; kSUIDUnsafeEnvironmentVariables[i]; ++i) {
+ const char* const envvar = kSUIDUnsafeEnvironmentVariables[i];
+ char* const saved_envvar = SandboxSavedEnvironmentVariable(envvar);
+ if (!saved_envvar)
+ return false;
+
+ const char* const value = getenv(saved_envvar);
+ if (value) {
+ setenv(envvar, value, 1 /* overwrite */);
+ unsetenv(saved_envvar);
+ }
+
+ free(saved_envvar);
+ }
+
+ return true;
+}
+
+bool CheckAndExportApiVersion() {
+ // Check the environment to see if a specific API version was requested.
+ // assume version 0 if none.
+ long api_number = -1;
+ char *api_string = getenv(kSandboxEnvironmentApiRequest);
+ if (!api_string) {
+ api_number = 0;
+ } else {
+ errno = 0;
+ char* endptr = NULL;
+ api_number = strtol(api_string, &endptr, 10);
+ if (!endptr || *endptr || errno != 0)
+ return false;
+ }
+
+ // Warn only for now.
+ if (api_number != kSUIDSandboxApiNumber) {
+ fprintf(stderr, "The setuid sandbox provides API version %ld, "
+ "but you need %ld\n"
+ "Please read "
+ "https://code.google.com/p/chromium/wiki/LinuxSUIDSandboxDevelopment."
+ "\n\n",
+ kSUIDSandboxApiNumber,
+ api_number);
+ }
+
+ // Export our version so that the sandboxed process can verify it did not
+ // use an old sandbox.
+ char version_string[64];
+ snprintf(version_string, sizeof(version_string), "%ld",
+ kSUIDSandboxApiNumber);
+ if (setenv(kSandboxEnvironmentApiProvides, version_string, 1)) {
+ perror("setenv");
+ return false;
+ }
+
+ return true;
+}
+
+int main(int argc, char **argv) {
+ if (argc <= 1) {
+ if (argc <= 0) {
+ return 1;
+ }
+
+ fprintf(stderr, "Usage: %s <renderer process> <args...>\n", argv[0]);
+ return 1;
+ }
+
+ // Allow someone to query our API version
+ if (argc == 2 && 0 == strcmp(argv[1], kSuidSandboxGetApiSwitch)) {
+ printf("%ld\n", kSUIDSandboxApiNumber);
+ return 0;
+ }
+
+ // In the SUID sandbox, if we succeed in calling MoveToNewNamespaces()
+ // below, then the zygote and all the renderers are in an alternate PID
+ // namespace and do not know their real PIDs. As such, they report the wrong
+ // PIDs to the task manager.
+ //
+ // To fix this, when the zygote spawns a new renderer, it gives the renderer
+ // a dummy socket, which has a unique inode number. Then it asks the sandbox
+ // host to find the PID of the process holding that fd by searching /proc.
+ //
+ // Since the zygote and renderers are all spawned by this setuid executable,
+ // their entries in /proc are owned by root and only readable by root. In
+ // order to search /proc for the fd we want, this setuid executable has to
+ // double as a helper and perform the search. The code block below does this
+ // when you call it with --find-inode INODE_NUMBER.
+ if (argc == 3 && (0 == strcmp(argv[1], kFindInodeSwitch))) {
+ pid_t pid;
+ char* endptr = NULL;
+ errno = 0;
+ ino_t inode = strtoull(argv[2], &endptr, 10);
+ if (inode == ULLONG_MAX || !endptr || *endptr || errno != 0)
+ return 1;
+ if (!FindProcessHoldingSocket(&pid, inode))
+ return 1;
+ printf("%d\n", pid);
+ return 0;
+ }
+ // Likewise, we cannot adjust /proc/pid/oom_adj for sandboxed renderers
+ // because those files are owned by root. So we need another helper here.
+ if (argc == 4 && (0 == strcmp(argv[1], kAdjustOOMScoreSwitch))) {
+ char* endptr = NULL;
+ long score;
+ errno = 0;
+ unsigned long pid_ul = strtoul(argv[2], &endptr, 10);
+ if (pid_ul == ULONG_MAX || !endptr || *endptr || errno != 0)
+ return 1;
+ pid_t pid = pid_ul;
+ endptr = NULL;
+ errno = 0;
+ score = strtol(argv[3], &endptr, 10);
+ if (score == LONG_MAX || score == LONG_MIN ||
+ !endptr || *endptr || errno != 0)
+ return 1;
+ return AdjustOOMScore(pid, score);
+ }
+#if defined(OS_CHROMEOS)
+ if (argc == 3 && (0 == strcmp(argv[1], kAdjustLowMemMarginSwitch))) {
+ char* endptr = NULL;
+ errno = 0;
+ unsigned long margin_mb = strtoul(argv[2], &endptr, 10);
+ if (!endptr || *endptr || errno != 0)
+ return 1;
+ return AdjustLowMemoryMargin(margin_mb);
+ }
+#endif
+
+ // Protect the core setuid sandbox functionality with an API version
+ if (!CheckAndExportApiVersion()) {
+ return 1;
+ }
+
+ if (!MoveToNewNamespaces())
+ return 1;
+ if (!SpawnChrootHelper())
+ return 1;
+ if (!DropRoot())
+ return 1;
+ if (!SetupChildEnvironment())
+ return 1;
+
+ execv(argv[1], &argv[1]);
+ FatalError("execv failed");
+
+ return 1;
+}
diff --git a/sandbox/linux/tests/main.cc b/sandbox/linux/tests/main.cc
new file mode 100644
index 0000000..4412645
--- /dev/null
+++ b/sandbox/linux/tests/main.cc
@@ -0,0 +1,16 @@
+// Copyright (c) 2012 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "testing/gtest/include/gtest/gtest.h"
+
+
+int main(int argc, char *argv[]) {
+ testing::InitGoogleTest(&argc, argv);
+ // Always go through re-execution for death tests.
+ // This makes gtest only marginally slower for us and has the
+ // additional side effect of getting rid of gtest warnings about fork()
+ // safety.
+ ::testing::FLAGS_gtest_death_test_style = "threadsafe";
+ return RUN_ALL_TESTS();
+}
diff --git a/sandbox/linux/tests/unit_tests.cc b/sandbox/linux/tests/unit_tests.cc
new file mode 100644
index 0000000..105c45b
--- /dev/null
+++ b/sandbox/linux/tests/unit_tests.cc
@@ -0,0 +1,77 @@
+// Copyright (c) 2012 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include <stdio.h>
+#include <sys/resource.h>
+#include <sys/time.h>
+
+#include "base/file_util.h"
+#include "sandbox/linux/tests/unit_tests.h"
+
+namespace sandbox {
+
+static const int kExpectedValue = 42;
+
+void UnitTests::RunTestInProcess(UnitTests::Test test, void *arg) {
+ // Runs a test in a sub-process. This is necessary for most of the code
+ // in the BPF sandbox, as it potentially makes global state changes and as
+ // it also tends to raise fatal errors, if the code has been used in an
+ // insecure manner.
+ int fds[2];
+ ASSERT_EQ(0, pipe(fds));
+
+ pid_t pid;
+ ASSERT_LE(0, (pid = fork()));
+ if (!pid) {
+ // In child process
+ // Redirect stderr to our pipe. This way, we can capture all error
+ // messages, if we decide we want to do so in our tests.
+ SANDBOX_ASSERT(dup2(fds[1], 2) == 2);
+ SANDBOX_ASSERT(!close(fds[0]));
+ SANDBOX_ASSERT(!close(fds[1]));
+
+ // Disable core files. They are not very useful for our individual test
+ // cases.
+ struct rlimit no_core = { 0 };
+ setrlimit(RLIMIT_CORE, &no_core);
+
+ test(arg);
+ _exit(kExpectedValue);
+ }
+
+ (void)HANDLE_EINTR(close(fds[1]));
+ std::vector<char> msg;
+ ssize_t rc;
+ do {
+ const unsigned int kCapacity = 256;
+ size_t len = msg.size();
+ msg.resize(len + kCapacity);
+ rc = HANDLE_EINTR(read(fds[0], &msg[len], kCapacity));
+ msg.resize(len + std::max(rc, static_cast<ssize_t>(0)));
+ } while (rc > 0);
+ std::string details;
+ if (!msg.empty()) {
+ details = "Actual test failure: " + std::string(msg.begin(), msg.end());
+ }
+ (void)HANDLE_EINTR(close(fds[0]));
+
+ int status = 0;
+ int waitpid_returned = HANDLE_EINTR(waitpid(pid, &status, 0));
+ ASSERT_EQ(pid, waitpid_returned) << details;
+ bool subprocess_terminated_normally = WIFEXITED(status);
+ ASSERT_TRUE(subprocess_terminated_normally) << details;
+ int subprocess_exit_status = WEXITSTATUS(status);
+ ASSERT_EQ(kExpectedValue, subprocess_exit_status) << details;
+ bool subprocess_exited_but_printed_messages = !msg.empty();
+ EXPECT_FALSE(subprocess_exited_but_printed_messages) << details;
+}
+
+void UnitTests::AssertionFailure(const char *expr, const char *file,
+ int line) {
+ fprintf(stderr, "%s:%d:%s", file, line, expr);
+ fflush(stderr);
+ _exit(1);
+}
+
+} // namespace
diff --git a/sandbox/linux/tests/unit_tests.h b/sandbox/linux/tests/unit_tests.h
new file mode 100644
index 0000000..d6b4761
--- /dev/null
+++ b/sandbox/linux/tests/unit_tests.h
@@ -0,0 +1,54 @@
+// Copyright (c) 2012 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#ifndef SANDBOX_LINUX_TESTS_UNIT_TESTS_H__
+#define SANDBOX_LINUX_TESTS_UNIT_TESTS_H__
+
+#include "base/basictypes.h"
+#include "testing/gtest/include/gtest/gtest.h"
+
+namespace sandbox {
+
+// Define a new test case that runs inside of a death test. This is necessary,
+// as most of our tests by definition make global and irreversible changes to
+// the system (i.e. they install a sandbox). GTest provides death tests as a
+// tool to isolate global changes from the rest of the tests.
+#define SANDBOX_TEST(test_case_name, test_name) \
+ void TEST_##test_name(void *); \
+ TEST(test_case_name, test_name) { \
+ sandbox::UnitTests::RunTestInProcess(TEST_##test_name, NULL); \
+ } \
+ void TEST_##test_name(void *)
+
+// Simple assertion macro that is compatible with running inside of a death
+// test. We unfortunately cannot use any of the GTest macros.
+#define SANDBOX_STR(x) #x
+#define SANDBOX_ASSERT(expr) \
+ ((expr) \
+ ? static_cast<void>(0) \
+ : sandbox::UnitTests::AssertionFailure(SANDBOX_STR(expr), \
+ __FILE__, __LINE__))
+
+class UnitTests {
+ public:
+ typedef void (*Test)(void *);
+
+ // Runs a test inside a short-lived process. Do not call this function
+ // directly. It is automatically invoked by SANDBOX_TEST(). Most sandboxing
+ // functions make global irreversible changes to the execution environment
+ // and must therefore execute in their own isolated process.
+ static void RunTestInProcess(Test test, void *arg);
+
+ // Report a useful error message and terminate the current SANDBOX_TEST().
+ // Calling this function from outside a SANDBOX_TEST() is unlikely to do
+ // anything useful.
+ static void AssertionFailure(const char *expr, const char *file, int line);
+
+ private:
+ DISALLOW_IMPLICIT_CONSTRUCTORS(UnitTests);
+};
+
+} // namespace
+
+#endif // SANDBOX_LINUX_TESTS_UNIT_TESTS_H__