| //===-- msandr.cc ---------------------------------------------------------===// |
| // |
| // The LLVM Compiler Infrastructure |
| // |
| // This file is distributed under the University of Illinois Open Source |
| // License. See LICENSE.TXT for details. |
| // |
| //===----------------------------------------------------------------------===// |
| // |
| // This file is a part of MemorySanitizer. |
| // |
| // DynamoRio client for MemorySanitizer. |
| // |
| // MemorySanitizer requires that all program code is instrumented. Any memory |
| // store that can turn an uninitialized value into an initialized value must be |
| // observed by the tool, otherwise we risk reporting a false UMR. |
| // |
| // This also includes any libraries that the program depends on. |
| // |
| // In the case when rebuilding all program dependencies with MemorySanitizer is |
| // problematic, an experimental MSanDR tool (the code you are currently looking |
| // at) can be used. It is a DynamoRio-based tool that uses dynamic |
| // instrumentation to |
| // * Unpoison all memory stores. |
| // * Unpoison TLS slots used by MemorySanitizer to pass function arguments and |
| // return value shadow on anything that looks like a function call or a return |
| // from a function. |
| // |
| // This tool does not detect the use of uninitialized values in uninstrumented |
| // libraries. It merely gets rid of false positives by marking all data that |
| // passes through uninstrumented code as fully initialized. |
| //===----------------------------------------------------------------------===// |
| |
| #include <dr_api.h> |
| #include <drutil.h> |
| #include <drmgr.h> |
| #include <drsyscall.h> |
| |
| #include <sys/mman.h> |
| #include <sys/syscall.h> /* for SYS_mmap */ |
| |
| #include <algorithm> |
| #include <string> |
| #include <set> |
| #include <vector> |
| #include <string.h> |
| |
| #define TESTALL(mask, var) (((mask) & (var)) == (mask)) |
| #define TESTANY(mask, var) (((mask) & (var)) != 0) |
| |
| #define CHECK_IMPL(condition, file, line) \ |
| do { \ |
| if (!(condition)) { \ |
| dr_printf("Check failed: `%s`\nat %s:%d\n", #condition, file, line); \ |
| dr_abort(); \ |
| } \ |
| } while (0) // TODO: stacktrace |
| |
| #define CHECK(condition) CHECK_IMPL(condition, __FILE__, __LINE__) |
| |
| #define VERBOSITY 0 |
| |
| namespace { |
| |
| class ModuleData { |
| public: |
| ModuleData(); |
| ModuleData(const module_data_t *info); |
| // Yes, we want default copy, assign, and dtor semantics. |
| |
| public: |
| app_pc start_; |
| app_pc end_; |
| // Full path to the module. |
| std::string path_; |
| module_handle_t handle_; |
| bool should_instrument_; |
| bool executed_; |
| }; |
| |
| std::string g_app_path; |
| |
| int msan_retval_tls_offset; |
| int msan_param_tls_offset; |
| |
| // A vector of loaded modules sorted by module bounds. We lookup the current PC |
| // in here from the bb event. This is better than an rb tree because the lookup |
| // is faster and the bb event occurs far more than the module load event. |
| std::vector<ModuleData> g_module_list; |
| |
| ModuleData::ModuleData() |
| : start_(NULL), end_(NULL), path_(""), handle_(NULL), |
| should_instrument_(false), executed_(false) { |
| } |
| |
| ModuleData::ModuleData(const module_data_t *info) |
| : start_(info->start), end_(info->end), path_(info->full_path), |
| handle_(info->handle), |
| // We'll check the black/white lists later and adjust this. |
| should_instrument_(true), executed_(false) { |
| } |
| |
| int(*__msan_get_retval_tls_offset)(); |
| int(*__msan_get_param_tls_offset)(); |
| void (*__msan_unpoison)(void *base, size_t size); |
| bool (*__msan_is_in_loader)(); |
| |
| static generic_func_t LookupCallback(module_data_t *app, const char *name) { |
| generic_func_t callback = dr_get_proc_address(app->handle, name); |
| if (callback == NULL) { |
| dr_printf("Couldn't find `%s` in %s\n", name, app->full_path); |
| CHECK(callback); |
| } |
| return callback; |
| } |
| |
| void InitializeMSanCallbacks() { |
| module_data_t *app = dr_lookup_module_by_name(dr_get_application_name()); |
| if (!app) { |
| dr_printf("%s - oops, dr_lookup_module_by_name failed!\n", |
| dr_get_application_name()); |
| CHECK(app); |
| } |
| g_app_path = app->full_path; |
| |
| __msan_get_retval_tls_offset = (int (*)()) |
| LookupCallback(app, "__msan_get_retval_tls_offset"); |
| __msan_get_param_tls_offset = (int (*)()) |
| LookupCallback(app, "__msan_get_param_tls_offset"); |
| __msan_unpoison = (void(*)(void *, size_t)) |
| LookupCallback(app, "__msan_unpoison"); |
| __msan_is_in_loader = (bool (*)()) |
| LookupCallback(app, "__msan_is_in_loader"); |
| |
| dr_free_module_data(app); |
| } |
| |
| // FIXME: Handle absolute addresses and PC-relative addresses. |
| // FIXME: Handle TLS accesses via FS or GS. DR assumes all other segments have |
| // a zero base anyway. |
| bool OperandIsInteresting(opnd_t opnd) { |
| return (opnd_is_base_disp(opnd) && opnd_get_segment(opnd) != DR_SEG_FS && |
| opnd_get_segment(opnd) != DR_SEG_GS); |
| } |
| |
| bool WantToInstrument(instr_t *instr) { |
| // TODO: skip push instructions? |
| switch (instr_get_opcode(instr)) { |
| // FIXME: support the instructions excluded below: |
| case OP_rep_cmps: |
| // f3 a6 rep cmps %ds:(%rsi) %es:(%rdi) %rsi %rdi %rcx -> %rsi %rdi %rcx |
| return false; |
| } |
| |
| // Labels appear due to drutil_expand_rep_string() |
| if (instr_is_label(instr)) |
| return false; |
| |
| CHECK(instr_ok_to_mangle(instr) == true); |
| |
| if (instr_writes_memory(instr)) { |
| for (int d = 0; d < instr_num_dsts(instr); d++) { |
| opnd_t op = instr_get_dst(instr, d); |
| if (OperandIsInteresting(op)) |
| return true; |
| } |
| } |
| |
| return false; |
| } |
| |
| #define PRE(at, what) instrlist_meta_preinsert(bb, at, INSTR_CREATE_##what); |
| #define PREF(at, what) instrlist_meta_preinsert(bb, at, what); |
| |
| void InstrumentMops(void *drcontext, instrlist_t *bb, instr_t *instr, opnd_t op, |
| bool is_write) { |
| bool need_to_restore_eflags = false; |
| uint flags = instr_get_arith_flags(instr); |
| // TODO: do something smarter with flags and spills in general? |
| // For example, spill them only once for a sequence of instrumented |
| // instructions that don't change/read flags. |
| |
| if (!TESTALL(EFLAGS_WRITE_6, flags) || TESTANY(EFLAGS_READ_6, flags)) { |
| if (VERBOSITY > 1) |
| dr_printf("Spilling eflags...\n"); |
| need_to_restore_eflags = true; |
| // TODO: Maybe sometimes don't need to 'seto'. |
| // TODO: Maybe sometimes don't want to spill XAX here? |
| // TODO: No need to spill XAX here if XAX is not used in the BB. |
| dr_save_reg(drcontext, bb, instr, DR_REG_XAX, SPILL_SLOT_1); |
| dr_save_arith_flags_to_xax(drcontext, bb, instr); |
| dr_save_reg(drcontext, bb, instr, DR_REG_XAX, SPILL_SLOT_3); |
| dr_restore_reg(drcontext, bb, instr, DR_REG_XAX, SPILL_SLOT_1); |
| } |
| |
| #if 0 |
| dr_printf("==DRMSAN== DEBUG: %d %d %d %d %d %d\n", |
| opnd_is_memory_reference(op), opnd_is_base_disp(op), |
| opnd_is_base_disp(op) ? opnd_get_index(op) : -1, |
| opnd_is_far_memory_reference(op), opnd_is_reg_pointer_sized(op), |
| opnd_is_base_disp(op) ? opnd_get_disp(op) : -1); |
| #endif |
| |
| reg_id_t R1; |
| bool address_in_R1 = false; |
| if (opnd_is_base_disp(op) && opnd_get_index(op) == DR_REG_NULL && |
| opnd_get_disp(op) == 0) { |
| // If this is a simple access with no offset or index, we can just use the |
| // base for R1. |
| address_in_R1 = true; |
| R1 = opnd_get_base(op); |
| } else { |
| // Otherwise, we need to compute the addr into R1. |
| // TODO: reuse some spare register? e.g. r15 on x64 |
| // TODO: might be used as a non-mem-ref register? |
| R1 = DR_REG_XAX; |
| } |
| CHECK(reg_is_pointer_sized(R1)); // otherwise R2 may be wrong. |
| |
| // Pick R2 that's not R1 or used by the operand. It's OK if the instr uses |
| // R2 elsewhere, since we'll restore it before instr. |
| reg_id_t GPR_TO_USE_FOR_R2[] = { |
| DR_REG_XAX, DR_REG_XBX, DR_REG_XCX, DR_REG_XDX |
| // Don't forget to update the +4 below if you add anything else! |
| }; |
| std::set<reg_id_t> unused_registers(GPR_TO_USE_FOR_R2, GPR_TO_USE_FOR_R2 + 4); |
| unused_registers.erase(R1); |
| for (int j = 0; j < opnd_num_regs_used(op); j++) { |
| unused_registers.erase(opnd_get_reg_used(op, j)); |
| } |
| |
| CHECK(unused_registers.size() > 0); |
| reg_id_t R2 = *unused_registers.begin(); |
| CHECK(R1 != R2); |
| |
| // Save the current values of R1 and R2. |
| dr_save_reg(drcontext, bb, instr, R1, SPILL_SLOT_1); |
| // TODO: Something smarter than spilling a "fixed" register R2? |
| dr_save_reg(drcontext, bb, instr, R2, SPILL_SLOT_2); |
| |
| if (!address_in_R1) |
| CHECK(drutil_insert_get_mem_addr(drcontext, bb, instr, op, R1, R2)); |
| PRE(instr, mov_imm(drcontext, opnd_create_reg(R2), |
| OPND_CREATE_INT64(0xffffbfffffffffff))); |
| PRE(instr, and(drcontext, opnd_create_reg(R1), opnd_create_reg(R2))); |
| // There is no mov_st of a 64-bit immediate, so... |
| opnd_size_t op_size = opnd_get_size(op); |
| CHECK(op_size != OPSZ_NA); |
| uint access_size = opnd_size_in_bytes(op_size); |
| if (access_size <= 4) { |
| PRE(instr, |
| mov_st(drcontext, opnd_create_base_disp(R1, DR_REG_NULL, 0, 0, op_size), |
| opnd_create_immed_int((ptr_int_t) 0, op_size))); |
| } else { |
| // FIXME: tail? |
| for (uint ofs = 0; ofs < access_size; ofs += 4) { |
| PRE(instr, |
| mov_st(drcontext, OPND_CREATE_MEM32(R1, ofs), OPND_CREATE_INT32(0))); |
| } |
| } |
| |
| // Restore the registers and flags. |
| dr_restore_reg(drcontext, bb, instr, R1, SPILL_SLOT_1); |
| dr_restore_reg(drcontext, bb, instr, R2, SPILL_SLOT_2); |
| |
| if (need_to_restore_eflags) { |
| if (VERBOSITY > 1) |
| dr_printf("Restoring eflags\n"); |
| // TODO: Check if it's reverse to the dr_restore_reg above and optimize. |
| dr_save_reg(drcontext, bb, instr, DR_REG_XAX, SPILL_SLOT_1); |
| dr_restore_reg(drcontext, bb, instr, DR_REG_XAX, SPILL_SLOT_3); |
| dr_restore_arith_flags_from_xax(drcontext, bb, instr); |
| dr_restore_reg(drcontext, bb, instr, DR_REG_XAX, SPILL_SLOT_1); |
| } |
| |
| // The original instruction is left untouched. The above instrumentation is just |
| // a prefix. |
| } |
| |
| void InstrumentReturn(void *drcontext, instrlist_t *bb, instr_t *instr) { |
| dr_save_reg(drcontext, bb, instr, DR_REG_XAX, SPILL_SLOT_1); |
| |
| // Clobbers nothing except xax. |
| bool res = |
| dr_insert_get_seg_base(drcontext, bb, instr, DR_SEG_FS, DR_REG_XAX); |
| CHECK(res); |
| |
| // TODO: unpoison more bytes? |
| PRE(instr, |
| mov_st(drcontext, OPND_CREATE_MEM64(DR_REG_XAX, msan_retval_tls_offset), |
| OPND_CREATE_INT32(0))); |
| |
| dr_restore_reg(drcontext, bb, instr, DR_REG_XAX, SPILL_SLOT_1); |
| |
| // The original instruction is left untouched. The above instrumentation is just |
| // a prefix. |
| } |
| |
| void InstrumentIndirectBranch(void *drcontext, instrlist_t *bb, |
| instr_t *instr) { |
| dr_save_reg(drcontext, bb, instr, DR_REG_XAX, SPILL_SLOT_1); |
| |
| // Clobbers nothing except xax. |
| bool res = |
| dr_insert_get_seg_base(drcontext, bb, instr, DR_SEG_FS, DR_REG_XAX); |
| CHECK(res); |
| |
| // TODO: unpoison more bytes? |
| for (int i = 0; i < 6; ++i) { |
| PRE(instr, |
| mov_st(drcontext, OPND_CREATE_MEMPTR(DR_REG_XAX, msan_param_tls_offset + |
| i * sizeof(void *)), |
| OPND_CREATE_INT32(0))); |
| } |
| |
| dr_restore_reg(drcontext, bb, instr, DR_REG_XAX, SPILL_SLOT_1); |
| |
| // The original instruction is left untouched. The above instrumentation is just |
| // a prefix. |
| } |
| |
| // For use with binary search. Modules shouldn't overlap, so we shouldn't have |
| // to look at end_. If that can happen, we won't support such an application. |
| bool ModuleDataCompareStart(const ModuleData &left, const ModuleData &right) { |
| return left.start_ < right.start_; |
| } |
| |
| // Look up the module containing PC. Should be relatively fast, as its called |
| // for each bb instrumentation. |
| ModuleData *LookupModuleByPC(app_pc pc) { |
| ModuleData fake_mod_data; |
| fake_mod_data.start_ = pc; |
| std::vector<ModuleData>::iterator it = |
| lower_bound(g_module_list.begin(), g_module_list.end(), fake_mod_data, |
| ModuleDataCompareStart); |
| // if (it == g_module_list.end()) |
| // return NULL; |
| if (it == g_module_list.end() || pc < it->start_) |
| --it; |
| CHECK(it->start_ <= pc); |
| if (pc >= it->end_) { |
| // We're past the end of this module. We shouldn't be in the next module, |
| // or lower_bound lied to us. |
| ++it; |
| CHECK(it == g_module_list.end() || pc < it->start_); |
| return NULL; |
| } |
| |
| // OK, we found the module. |
| return &*it; |
| } |
| |
| bool ShouldInstrumentNonModuleCode() { return true; } |
| |
| bool ShouldInstrumentModule(ModuleData *mod_data) { |
| // TODO(rnk): Flags for blacklist would get wired in here. |
| generic_func_t p = |
| dr_get_proc_address(mod_data->handle_, "__msan_track_origins"); |
| return !p; |
| } |
| |
| bool ShouldInstrumentPc(app_pc pc, ModuleData **pmod_data) { |
| ModuleData *mod_data = LookupModuleByPC(pc); |
| if (pmod_data) |
| *pmod_data = mod_data; |
| if (mod_data != NULL) { |
| // This module is on a blacklist. |
| if (!mod_data->should_instrument_) { |
| return false; |
| } |
| } else if (!ShouldInstrumentNonModuleCode()) { |
| return false; |
| } |
| return true; |
| } |
| |
| // TODO(rnk): Make sure we instrument after __msan_init. |
| dr_emit_flags_t |
| event_basic_block_app2app(void *drcontext, void *tag, instrlist_t *bb, |
| bool for_trace, bool translating) { |
| app_pc pc = dr_fragment_app_pc(tag); |
| |
| if (ShouldInstrumentPc(pc, NULL)) |
| CHECK(drutil_expand_rep_string(drcontext, bb)); |
| |
| return DR_EMIT_PERSISTABLE; |
| } |
| |
| dr_emit_flags_t event_basic_block(void *drcontext, void *tag, instrlist_t *bb, |
| bool for_trace, bool translating) { |
| app_pc pc = dr_fragment_app_pc(tag); |
| ModuleData *mod_data; |
| |
| if (!ShouldInstrumentPc(pc, &mod_data)) |
| return DR_EMIT_PERSISTABLE; |
| |
| if (VERBOSITY > 1) |
| dr_printf("============================================================\n"); |
| if (VERBOSITY > 0) { |
| std::string mod_path = (mod_data ? mod_data->path_ : "<no module, JITed?>"); |
| if (mod_data && !mod_data->executed_) { |
| mod_data->executed_ = true; // Nevermind this race. |
| dr_printf("Executing from new module: %s\n", mod_path.c_str()); |
| } |
| dr_printf("BB to be instrumented: %p [from %s]; translating = %s\n", pc, |
| mod_path.c_str(), translating ? "true" : "false"); |
| if (mod_data) { |
| // Match standard sanitizer trace format for free symbols. |
| // #0 0x7f6e35cf2e45 (/blah/foo.so+0x11fe45) |
| dr_printf(" #0 %p (%s+%p)\n", pc, mod_data->path_.c_str(), |
| pc - mod_data->start_); |
| } |
| } |
| if (VERBOSITY > 1) { |
| instrlist_disassemble(drcontext, pc, bb, STDOUT); |
| instr_t *instr; |
| for (instr = instrlist_first(bb); instr; instr = instr_get_next(instr)) { |
| dr_printf("opcode: %d\n", instr_get_opcode(instr)); |
| } |
| } |
| |
| for (instr_t *i = instrlist_first(bb); i != NULL; i = instr_get_next(i)) { |
| int opcode = instr_get_opcode(i); |
| if (opcode == OP_ret || opcode == OP_ret_far) { |
| InstrumentReturn(drcontext, bb, i); |
| continue; |
| } |
| |
| // These instructions hopefully cover all cases where control is transferred |
| // to a function in a different module (we only care about calls into |
| // compiler-instrumented modules). |
| // * call_ind is used for normal indirect calls. |
| // * jmp_ind is used for indirect tail calls, and calls through PLT (PLT |
| // stub includes a jump to an address from GOT). |
| if (opcode == OP_call_ind || opcode == OP_call_far_ind || |
| opcode == OP_jmp_ind || opcode == OP_jmp_far_ind) { |
| InstrumentIndirectBranch(drcontext, bb, i); |
| continue; |
| } |
| |
| if (!WantToInstrument(i)) |
| continue; |
| |
| if (VERBOSITY > 1) { |
| app_pc orig_pc = dr_fragment_app_pc(tag); |
| uint flags = instr_get_arith_flags(i); |
| dr_printf("+%d -> to be instrumented! [opcode=%d, flags = 0x%08X]\n", |
| instr_get_app_pc(i) - orig_pc, instr_get_opcode(i), flags); |
| } |
| |
| if (instr_writes_memory(i)) { |
| // Instrument memory writes |
| // bool instrumented_anything = false; |
| for (int d = 0; d < instr_num_dsts(i); d++) { |
| opnd_t op = instr_get_dst(i, d); |
| if (!OperandIsInteresting(op)) |
| continue; |
| |
| // CHECK(!instrumented_anything); |
| // instrumented_anything = true; |
| InstrumentMops(drcontext, bb, i, op, true); |
| break; // only instrumenting the first dst |
| } |
| } |
| } |
| |
| // TODO: optimize away redundant restore-spill pairs? |
| |
| if (VERBOSITY > 1) { |
| pc = dr_fragment_app_pc(tag); |
| dr_printf("\nFinished instrumenting dynamorio_basic_block(PC=" PFX ")\n", pc); |
| instrlist_disassemble(drcontext, pc, bb, STDOUT); |
| } |
| return DR_EMIT_PERSISTABLE; |
| } |
| |
| void event_module_load(void *drcontext, const module_data_t *info, |
| bool loaded) { |
| // Insert the module into the list while maintaining the ordering. |
| ModuleData mod_data(info); |
| std::vector<ModuleData>::iterator it = |
| upper_bound(g_module_list.begin(), g_module_list.end(), mod_data, |
| ModuleDataCompareStart); |
| it = g_module_list.insert(it, mod_data); |
| // Check if we should instrument this module. |
| it->should_instrument_ = ShouldInstrumentModule(&*it); |
| dr_module_set_should_instrument(info->handle, it->should_instrument_); |
| |
| if (VERBOSITY > 0) |
| dr_printf("==DRMSAN== Loaded module: %s [%p...%p], instrumentation is %s\n", |
| info->full_path, info->start, info->end, |
| it->should_instrument_ ? "on" : "off"); |
| } |
| |
| void event_module_unload(void *drcontext, const module_data_t *info) { |
| if (VERBOSITY > 0) |
| dr_printf("==DRMSAN== Unloaded module: %s [%p...%p]\n", info->full_path, |
| info->start, info->end); |
| |
| // Remove the module from the list. |
| ModuleData mod_data(info); |
| std::vector<ModuleData>::iterator it = |
| lower_bound(g_module_list.begin(), g_module_list.end(), mod_data, |
| ModuleDataCompareStart); |
| // It's a bug if we didn't actually find the module. |
| CHECK(it != g_module_list.end() && it->start_ == mod_data.start_ && |
| it->end_ == mod_data.end_ && it->path_ == mod_data.path_); |
| g_module_list.erase(it); |
| } |
| |
| void event_exit() { |
| // Clean up so DR doesn't tell us we're leaking memory. |
| drsys_exit(); |
| drutil_exit(); |
| drmgr_exit(); |
| |
| if (VERBOSITY > 0) |
| dr_printf("==DRMSAN== DONE\n"); |
| } |
| |
| bool event_filter_syscall(void *drcontext, int sysnum) { |
| // FIXME: only intercept syscalls with memory effects. |
| return true; /* intercept everything */ |
| } |
| |
| bool drsys_iter_memarg_cb(drsys_arg_t *arg, void *user_data) { |
| CHECK(arg->valid); |
| |
| if (arg->pre) |
| return true; |
| if (!TESTANY(DRSYS_PARAM_OUT, arg->mode)) |
| return true; |
| |
| size_t sz = arg->size; |
| |
| if (sz > 0xFFFFFFFF) { |
| drmf_status_t res; |
| drsys_syscall_t *syscall = (drsys_syscall_t *)user_data; |
| const char *name; |
| res = drsys_syscall_name(syscall, &name); |
| CHECK(res == DRMF_SUCCESS); |
| |
| dr_printf("SANITY: syscall '%s' arg %d writes %llu bytes memory?!" |
| " Clipping to %llu.\n", |
| name, arg->ordinal, (unsigned long long) sz, |
| (unsigned long long)(sz & 0xFFFFFFFF)); |
| } |
| |
| if (VERBOSITY > 0) { |
| drmf_status_t res; |
| drsys_syscall_t *syscall = (drsys_syscall_t *)user_data; |
| const char *name; |
| res = drsys_syscall_name(syscall, &name); |
| dr_printf("drsyscall: syscall '%s' arg %d wrote range [%p, %p)\n", |
| name, arg->ordinal, arg->start_addr, |
| (char *)arg->start_addr + sz); |
| } |
| |
| // We don't switch to the app context because __msan_unpoison() doesn't need |
| // TLS segments. |
| __msan_unpoison(arg->start_addr, sz); |
| |
| return true; /* keep going */ |
| } |
| |
| bool event_pre_syscall(void *drcontext, int sysnum) { |
| drsys_syscall_t *syscall; |
| drsys_sysnum_t sysnum_full; |
| bool known; |
| drsys_param_type_t ret_type; |
| drmf_status_t res; |
| const char *name; |
| |
| res = drsys_cur_syscall(drcontext, &syscall); |
| CHECK(res == DRMF_SUCCESS); |
| |
| res = drsys_syscall_number(syscall, &sysnum_full); |
| CHECK(res == DRMF_SUCCESS); |
| CHECK(sysnum == sysnum_full.number); |
| |
| res = drsys_syscall_is_known(syscall, &known); |
| CHECK(res == DRMF_SUCCESS); |
| |
| res = drsys_syscall_name(syscall, &name); |
| CHECK(res == DRMF_SUCCESS); |
| |
| res = drsys_syscall_return_type(syscall, &ret_type); |
| CHECK(res == DRMF_SUCCESS); |
| CHECK(ret_type != DRSYS_TYPE_INVALID); |
| CHECK(!known || ret_type != DRSYS_TYPE_UNKNOWN); |
| |
| res = drsys_iterate_memargs(drcontext, drsys_iter_memarg_cb, NULL); |
| CHECK(res == DRMF_SUCCESS); |
| |
| return true; |
| } |
| |
| static bool IsInLoader(void *drcontext) { |
| // TODO: This segment swap is inefficient. DR should just let us query the |
| // app segment base, which it has. Alternatively, if we disable |
| // -mangle_app_seg, then we won't need the swap. |
| bool need_swap = !dr_using_app_state(drcontext); |
| if (need_swap) |
| dr_switch_to_app_state(drcontext); |
| bool is_in_loader = __msan_is_in_loader(); |
| if (need_swap) |
| dr_switch_to_dr_state(drcontext); |
| return is_in_loader; |
| } |
| |
| void event_post_syscall(void *drcontext, int sysnum) { |
| drsys_syscall_t *syscall; |
| drsys_sysnum_t sysnum_full; |
| bool success = false; |
| drmf_status_t res; |
| |
| res = drsys_cur_syscall(drcontext, &syscall); |
| CHECK(res == DRMF_SUCCESS); |
| |
| res = drsys_syscall_number(syscall, &sysnum_full); |
| CHECK(res == DRMF_SUCCESS); |
| CHECK(sysnum == sysnum_full.number); |
| |
| res = drsys_syscall_succeeded(syscall, dr_syscall_get_result(drcontext), |
| &success); |
| CHECK(res == DRMF_SUCCESS); |
| |
| if (success) { |
| res = |
| drsys_iterate_memargs(drcontext, drsys_iter_memarg_cb, (void *)syscall); |
| CHECK(res == DRMF_SUCCESS); |
| } |
| |
| // Our normal mmap interceptor can't intercept calls from the loader itself. |
| // This means we don't clear the shadow for calls to dlopen. For now, we |
| // solve this by intercepting mmap from ld.so here, but ideally we'd have a |
| // solution that doesn't rely on msandr. |
| // |
| // Be careful not to intercept maps done by the msan rtl. Otherwise we end up |
| // unpoisoning vast regions of memory and OOMing. |
| // TODO: __msan_unpoison() could "flush" large regions of memory like tsan |
| // does instead of doing a large memset. However, we need the memory to be |
| // zeroed, where as tsan does not, so plain madvise is not enough. |
| if (success && (sysnum == SYS_mmap IF_NOT_X64(|| sysnum == SYS_mmap2))) { |
| if (IsInLoader(drcontext)) { |
| app_pc base = (app_pc)dr_syscall_get_result(drcontext); |
| ptr_uint_t size; |
| drmf_status_t res = drsys_pre_syscall_arg(drcontext, 1, &size); |
| CHECK(res == DRMF_SUCCESS); |
| if (VERBOSITY > 0) |
| dr_printf("unpoisoning for dlopen: [%p-%p]\n", base, base + size); |
| // We don't switch to the app context because __msan_unpoison() doesn't |
| // need TLS segments. |
| __msan_unpoison(base, size); |
| } |
| } |
| } |
| |
| } // namespace |
| |
| DR_EXPORT void dr_init(client_id_t id) { |
| drmf_status_t res; |
| |
| drmgr_init(); |
| drutil_init(); |
| |
| std::string app_name = dr_get_application_name(); |
| // This blacklist will still run these apps through DR's code cache. On the |
| // other hand, we are able to follow children of these apps. |
| // FIXME: Once DR has detach, we could just detach here. Alternatively, |
| // if DR had a fork or exec hook to let us decide there, that would be nice. |
| // FIXME: make the blacklist cmd-adjustable. |
| if (app_name == "python" || app_name == "python2.7" || app_name == "bash" || |
| app_name == "sh" || app_name == "true" || app_name == "exit" || |
| app_name == "yes" || app_name == "echo") |
| return; |
| |
| drsys_options_t ops; |
| memset(&ops, 0, sizeof(ops)); |
| ops.struct_size = sizeof(ops); |
| ops.analyze_unknown_syscalls = false; |
| |
| res = drsys_init(id, &ops); |
| CHECK(res == DRMF_SUCCESS); |
| |
| dr_register_filter_syscall_event(event_filter_syscall); |
| drmgr_register_pre_syscall_event(event_pre_syscall); |
| drmgr_register_post_syscall_event(event_post_syscall); |
| res = drsys_filter_all_syscalls(); |
| CHECK(res == DRMF_SUCCESS); |
| |
| InitializeMSanCallbacks(); |
| |
| // FIXME: the shadow is initialized earlier when DR calls one of our wrapper |
| // functions. This may change one day. |
| // TODO: make this more robust. |
| |
| void *drcontext = dr_get_current_drcontext(); |
| |
| dr_switch_to_app_state(drcontext); |
| msan_retval_tls_offset = __msan_get_retval_tls_offset(); |
| msan_param_tls_offset = __msan_get_param_tls_offset(); |
| dr_switch_to_dr_state(drcontext); |
| if (VERBOSITY > 0) { |
| dr_printf("__msan_retval_tls offset: %d\n", msan_retval_tls_offset); |
| dr_printf("__msan_param_tls offset: %d\n", msan_param_tls_offset); |
| } |
| |
| // Standard DR events. |
| dr_register_exit_event(event_exit); |
| |
| drmgr_priority_t priority = { |
| sizeof(priority), /* size of struct */ |
| "msandr", /* name of our operation */ |
| NULL, /* optional name of operation we should precede */ |
| NULL, /* optional name of operation we should follow */ |
| 0 |
| }; /* numeric priority */ |
| |
| drmgr_register_bb_app2app_event(event_basic_block_app2app, &priority); |
| drmgr_register_bb_instru2instru_event(event_basic_block, &priority); |
| drmgr_register_module_load_event(event_module_load); |
| drmgr_register_module_unload_event(event_module_unload); |
| if (VERBOSITY > 0) |
| dr_printf("==MSANDR== Starting!\n"); |
| } |