blob: 6c9d7c675acae9c91de70d828c71d3e92eb5e248 [file] [log] [blame]
Evgeniy Stepanov79b2d172013-02-22 09:34:19 +00001//===-- msandr.cc ---------------------------------------------------------===//
2//
3// The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// This file is a part of MemorySanitizer.
11//
12// DynamoRio client for MemorySanitizer.
13//
14// MemorySanitizer requires that all program code is instrumented. Any memory
15// store that can turn an uninitialized value into an initialized value must be
16// observed by the tool, otherwise we risk reporting a false UMR.
17//
18// This also includes any libraries that the program depends on.
19//
20// In the case when rebuilding all program dependencies with MemorySanitizer is
21// problematic, an experimental MSanDR tool (the code you are currently looking
22// at) can be used. It is a DynamoRio-based tool that uses dynamic
23// instrumentation to
24// * Unpoison all memory stores.
25// * Unpoison TLS slots used by MemorySanitizer to pass function arguments and
26// return value shadow on anything that looks like a function call or a return
27// from a function.
28//
29// This tool does not detect the use of uninitialized values in uninstrumented
30// libraries. It merely gets rid of false positives by marking all data that
31// passes through uninstrumented code as fully initialized.
32//===----------------------------------------------------------------------===//
33
34#include <dr_api.h>
35#include <drutil.h>
36#include <drmgr.h>
37#include <drsyscall.h>
38
39#include <sys/mman.h>
Reid Kleckner0f92deb2013-03-11 18:07:42 +000040#include <sys/syscall.h> /* for SYS_mmap */
Evgeniy Stepanov79b2d172013-02-22 09:34:19 +000041
42#include <algorithm>
43#include <string>
44#include <set>
45#include <vector>
46#include <string.h>
47
Evgeniy Stepanov79b2d172013-02-22 09:34:19 +000048#define TESTALL(mask, var) (((mask) & (var)) == (mask))
49#define TESTANY(mask, var) (((mask) & (var)) != 0)
50
51#define CHECK_IMPL(condition, file, line) \
52 do { \
53 if (!(condition)) { \
54 dr_printf("Check failed: `%s`\nat %s:%d\n", #condition, file, line); \
55 dr_abort(); \
56 } \
57 } while (0) // TODO: stacktrace
58
59#define CHECK(condition) CHECK_IMPL(condition, __FILE__, __LINE__)
60
61#define VERBOSITY 0
62
Evgeniy Stepanov4b26afd2013-10-29 19:44:47 +000063// XXX: it seems setting macro in CMakeLists.txt does not work,
64// so manually set it here now.
65
66// Building msandr client for running in DynamoRIO hybrid mode,
67// which allows some module running natively.
68// TODO: turn it on by default when hybrid is stable enough
69// #define MSANDR_NATIVE_EXEC
70
71// Building msandr client for standalone test that does not need to
72// run with msan build executables. Disable by default.
73// #define MSANDR_STANDALONE_TEST
74
75#define NUM_TLS_RETVAL 1
76#define NUM_TLS_PARAM 6
77
78#ifdef MSANDR_STANDALONE_TEST
79// For testing purpose, we map app to shadow memory at [0x100000, 0x20000).
80// Normally, the app starts at 0x400000:
81// 00400000-004e0000 r-xp 00000000 fc:00 524343 /bin/bash
82// so there should be no problem.
83# define SHADOW_MEMORY_BASE ((void *)0x100000)
84# define SHADOW_MEMORY_SIZE (0x100000)
85# define SHADOW_MEMORY_MASK (SHADOW_MEMORY_SIZE - 4 /* to avoid overflow */)
86#else
87// shadow memory range [0x200000000000, 0x400000000000)
88// assuming no app memory below 0x200000000000
89# define SHADOW_MEMORY_MASK 0x3fffffffffffULL
90#endif /* MSANDR_STANDALONE_TEST */
91
Evgeniy Stepanov79b2d172013-02-22 09:34:19 +000092namespace {
93
Evgeniy Stepanov88732a32013-10-25 14:57:02 +000094std::string g_app_path;
95
96int msan_retval_tls_offset;
97int msan_param_tls_offset;
98
99#ifndef MSANDR_NATIVE_EXEC
Evgeniy Stepanov79b2d172013-02-22 09:34:19 +0000100class ModuleData {
101public:
102 ModuleData();
103 ModuleData(const module_data_t *info);
104 // Yes, we want default copy, assign, and dtor semantics.
105
106public:
107 app_pc start_;
108 app_pc end_;
109 // Full path to the module.
Evgeniy Stepanov2ecccf82013-03-22 08:47:42 +0000110 std::string path_;
Evgeniy Stepanov79b2d172013-02-22 09:34:19 +0000111 module_handle_t handle_;
112 bool should_instrument_;
113 bool executed_;
114};
115
Evgeniy Stepanov79b2d172013-02-22 09:34:19 +0000116// A vector of loaded modules sorted by module bounds. We lookup the current PC
117// in here from the bb event. This is better than an rb tree because the lookup
118// is faster and the bb event occurs far more than the module load event.
119std::vector<ModuleData> g_module_list;
120
121ModuleData::ModuleData()
122 : start_(NULL), end_(NULL), path_(""), handle_(NULL),
123 should_instrument_(false), executed_(false) {
124}
125
126ModuleData::ModuleData(const module_data_t *info)
127 : start_(info->start), end_(info->end), path_(info->full_path),
128 handle_(info->handle),
129 // We'll check the black/white lists later and adjust this.
130 should_instrument_(true), executed_(false) {
131}
Evgeniy Stepanov88732a32013-10-25 14:57:02 +0000132#endif /* !MSANDR_NATIVE_EXEC */
Evgeniy Stepanov79b2d172013-02-22 09:34:19 +0000133
134int(*__msan_get_retval_tls_offset)();
135int(*__msan_get_param_tls_offset)();
Reid Kleckner0f92deb2013-03-11 18:07:42 +0000136void (*__msan_unpoison)(void *base, size_t size);
137bool (*__msan_is_in_loader)();
138
Evgeniy Stepanov4b26afd2013-10-29 19:44:47 +0000139#ifdef MSANDR_STANDALONE_TEST
140uint mock_msan_retval_tls_offset;
141uint mock_msan_param_tls_offset;
142static int mock_msan_get_retval_tls_offset() {
143 return (int)mock_msan_retval_tls_offset;
144}
145
146static int mock_msan_get_param_tls_offset() {
147 return (int)mock_msan_param_tls_offset;
148}
149
150static void mock_msan_unpoison(void *base, size_t size) {
151 /* do nothing */
152}
153
154static bool mock_msan_is_in_loader() {
155 return false;
156}
157#endif /* MSANDR_STANDALONE_TEST */
158
Reid Kleckner0f92deb2013-03-11 18:07:42 +0000159static generic_func_t LookupCallback(module_data_t *app, const char *name) {
Evgeniy Stepanov4b26afd2013-10-29 19:44:47 +0000160#ifdef MSANDR_STANDALONE_TEST
161 if (strcmp("__msan_get_retval_tls_offset", name) == 0) {
162 return (generic_func_t)mock_msan_get_retval_tls_offset;
163 } else if (strcmp("__msan_get_param_tls_offset", name) == 0) {
164 return (generic_func_t)mock_msan_get_param_tls_offset;
165 } else if (strcmp("__msan_unpoison", name) == 0) {
166 return (generic_func_t)mock_msan_unpoison;
167 } else if (strcmp("__msan_is_in_loader", name) == 0) {
168 return (generic_func_t)mock_msan_is_in_loader;
169 }
170 CHECK(false);
171 return NULL;
172#else /* !MSANDR_STANDALONE_TEST */
Reid Kleckner0f92deb2013-03-11 18:07:42 +0000173 generic_func_t callback = dr_get_proc_address(app->handle, name);
174 if (callback == NULL) {
175 dr_printf("Couldn't find `%s` in %s\n", name, app->full_path);
176 CHECK(callback);
177 }
178 return callback;
Evgeniy Stepanov4b26afd2013-10-29 19:44:47 +0000179#endif /* !MSANDR_STANDALONE_TEST */
Reid Kleckner0f92deb2013-03-11 18:07:42 +0000180}
Evgeniy Stepanov79b2d172013-02-22 09:34:19 +0000181
182void InitializeMSanCallbacks() {
183 module_data_t *app = dr_lookup_module_by_name(dr_get_application_name());
184 if (!app) {
185 dr_printf("%s - oops, dr_lookup_module_by_name failed!\n",
186 dr_get_application_name());
187 CHECK(app);
188 }
189 g_app_path = app->full_path;
190
Reid Kleckner0f92deb2013-03-11 18:07:42 +0000191 __msan_get_retval_tls_offset = (int (*)())
192 LookupCallback(app, "__msan_get_retval_tls_offset");
193 __msan_get_param_tls_offset = (int (*)())
194 LookupCallback(app, "__msan_get_param_tls_offset");
195 __msan_unpoison = (void(*)(void *, size_t))
196 LookupCallback(app, "__msan_unpoison");
197 __msan_is_in_loader = (bool (*)())
198 LookupCallback(app, "__msan_is_in_loader");
Evgeniy Stepanov79b2d172013-02-22 09:34:19 +0000199
Reid Kleckner0f92deb2013-03-11 18:07:42 +0000200 dr_free_module_data(app);
Evgeniy Stepanov79b2d172013-02-22 09:34:19 +0000201}
202
Evgeniy Stepanov79b2d172013-02-22 09:34:19 +0000203// FIXME: Handle absolute addresses and PC-relative addresses.
204// FIXME: Handle TLS accesses via FS or GS. DR assumes all other segments have
205// a zero base anyway.
206bool OperandIsInteresting(opnd_t opnd) {
207 return (opnd_is_base_disp(opnd) && opnd_get_segment(opnd) != DR_SEG_FS &&
208 opnd_get_segment(opnd) != DR_SEG_GS);
209}
210
211bool WantToInstrument(instr_t *instr) {
212 // TODO: skip push instructions?
213 switch (instr_get_opcode(instr)) {
214 // FIXME: support the instructions excluded below:
215 case OP_rep_cmps:
216 // f3 a6 rep cmps %ds:(%rsi) %es:(%rdi) %rsi %rdi %rcx -> %rsi %rdi %rcx
217 return false;
218 }
219
220 // Labels appear due to drutil_expand_rep_string()
221 if (instr_is_label(instr))
222 return false;
223
224 CHECK(instr_ok_to_mangle(instr) == true);
225
226 if (instr_writes_memory(instr)) {
227 for (int d = 0; d < instr_num_dsts(instr); d++) {
228 opnd_t op = instr_get_dst(instr, d);
229 if (OperandIsInteresting(op))
230 return true;
231 }
232 }
233
234 return false;
235}
236
237#define PRE(at, what) instrlist_meta_preinsert(bb, at, INSTR_CREATE_##what);
238#define PREF(at, what) instrlist_meta_preinsert(bb, at, what);
239
240void InstrumentMops(void *drcontext, instrlist_t *bb, instr_t *instr, opnd_t op,
241 bool is_write) {
242 bool need_to_restore_eflags = false;
243 uint flags = instr_get_arith_flags(instr);
244 // TODO: do something smarter with flags and spills in general?
245 // For example, spill them only once for a sequence of instrumented
246 // instructions that don't change/read flags.
247
248 if (!TESTALL(EFLAGS_WRITE_6, flags) || TESTANY(EFLAGS_READ_6, flags)) {
249 if (VERBOSITY > 1)
250 dr_printf("Spilling eflags...\n");
251 need_to_restore_eflags = true;
252 // TODO: Maybe sometimes don't need to 'seto'.
253 // TODO: Maybe sometimes don't want to spill XAX here?
254 // TODO: No need to spill XAX here if XAX is not used in the BB.
255 dr_save_reg(drcontext, bb, instr, DR_REG_XAX, SPILL_SLOT_1);
256 dr_save_arith_flags_to_xax(drcontext, bb, instr);
257 dr_save_reg(drcontext, bb, instr, DR_REG_XAX, SPILL_SLOT_3);
258 dr_restore_reg(drcontext, bb, instr, DR_REG_XAX, SPILL_SLOT_1);
259 }
260
261#if 0
262 dr_printf("==DRMSAN== DEBUG: %d %d %d %d %d %d\n",
263 opnd_is_memory_reference(op), opnd_is_base_disp(op),
264 opnd_is_base_disp(op) ? opnd_get_index(op) : -1,
265 opnd_is_far_memory_reference(op), opnd_is_reg_pointer_sized(op),
266 opnd_is_base_disp(op) ? opnd_get_disp(op) : -1);
267#endif
268
269 reg_id_t R1;
270 bool address_in_R1 = false;
271 if (opnd_is_base_disp(op) && opnd_get_index(op) == DR_REG_NULL &&
272 opnd_get_disp(op) == 0) {
273 // If this is a simple access with no offset or index, we can just use the
274 // base for R1.
275 address_in_R1 = true;
276 R1 = opnd_get_base(op);
277 } else {
278 // Otherwise, we need to compute the addr into R1.
279 // TODO: reuse some spare register? e.g. r15 on x64
280 // TODO: might be used as a non-mem-ref register?
281 R1 = DR_REG_XAX;
282 }
283 CHECK(reg_is_pointer_sized(R1)); // otherwise R2 may be wrong.
284
Evgeniy Stepanova0475712013-10-29 22:19:25 +0000285 // Pick R2 from R8 to R15.
286 // It's OK if the instr uses R2 elsewhere, since we'll restore it before instr.
287 reg_id_t R2;
288 for (R2 = DR_REG_R8; R2 <= DR_REG_R15; R2++) {
289 if (!opnd_uses_reg(op, R2))
290 break;
Evgeniy Stepanov79b2d172013-02-22 09:34:19 +0000291 }
Evgeniy Stepanova0475712013-10-29 22:19:25 +0000292 CHECK((R2 <= DR_REG_R15) && R1 != R2);
Evgeniy Stepanov79b2d172013-02-22 09:34:19 +0000293
294 // Save the current values of R1 and R2.
295 dr_save_reg(drcontext, bb, instr, R1, SPILL_SLOT_1);
296 // TODO: Something smarter than spilling a "fixed" register R2?
297 dr_save_reg(drcontext, bb, instr, R2, SPILL_SLOT_2);
298
299 if (!address_in_R1)
300 CHECK(drutil_insert_get_mem_addr(drcontext, bb, instr, op, R1, R2));
301 PRE(instr, mov_imm(drcontext, opnd_create_reg(R2),
Evgeniy Stepanov4b26afd2013-10-29 19:44:47 +0000302 OPND_CREATE_INT64(SHADOW_MEMORY_MASK)));
Evgeniy Stepanov79b2d172013-02-22 09:34:19 +0000303 PRE(instr, and(drcontext, opnd_create_reg(R1), opnd_create_reg(R2)));
Evgeniy Stepanov4b26afd2013-10-29 19:44:47 +0000304#ifdef MSANDR_STANDALONE_TEST
305 PRE(instr, add(drcontext, opnd_create_reg(R1),
306 OPND_CREATE_INT32(SHADOW_MEMORY_BASE)));
307#endif
Evgeniy Stepanov79b2d172013-02-22 09:34:19 +0000308 // There is no mov_st of a 64-bit immediate, so...
309 opnd_size_t op_size = opnd_get_size(op);
310 CHECK(op_size != OPSZ_NA);
311 uint access_size = opnd_size_in_bytes(op_size);
Evgeniy Stepanov4b26afd2013-10-29 19:44:47 +0000312 if (access_size <= 4 || op_size == OPSZ_PTR /* x64 support sign extension */) {
Evgeniy Stepanov9afa4522013-10-30 17:44:22 +0000313 instr_t *label = INSTR_CREATE_label(drcontext);
314 opnd_t immed;
315 if (op_size == OPSZ_PTR || op_size == OPSZ_4)
316 immed = OPND_CREATE_INT32(0);
317 else
318 immed = opnd_create_immed_int((ptr_int_t) 0, op_size);
319 // we check if target is 0 before write to reduce unnecessary memory stores.
320 PRE(instr, cmp(drcontext,
321 opnd_create_base_disp(R1, DR_REG_NULL, 0, 0, op_size),
322 immed));
323 PRE(instr, jcc(drcontext, OP_je, opnd_create_instr(label)));
324 PRE(instr, mov_st(drcontext,
325 opnd_create_base_disp(R1, DR_REG_NULL, 0, 0, op_size),
326 immed));
327 PREF(instr, label);
Evgeniy Stepanov79b2d172013-02-22 09:34:19 +0000328 } else {
329 // FIXME: tail?
330 for (uint ofs = 0; ofs < access_size; ofs += 4) {
Evgeniy Stepanov9afa4522013-10-30 17:44:22 +0000331 instr_t *label = INSTR_CREATE_label(drcontext);
332 opnd_t immed = OPND_CREATE_INT32(0);
333 PRE(instr, cmp(drcontext, OPND_CREATE_MEM32(R1, ofs), immed));
334 PRE(instr, jcc(drcontext, OP_je, opnd_create_instr(label)));
335 PRE(instr, mov_st(drcontext, OPND_CREATE_MEM32(R1, ofs), immed));
336 PREF(instr, label)
Evgeniy Stepanov79b2d172013-02-22 09:34:19 +0000337 }
338 }
339
340 // Restore the registers and flags.
341 dr_restore_reg(drcontext, bb, instr, R1, SPILL_SLOT_1);
342 dr_restore_reg(drcontext, bb, instr, R2, SPILL_SLOT_2);
343
Evgeniy Stepanova0475712013-10-29 22:19:25 +0000344 // TODO: move aflags save/restore to per instr instead of per opnd
Evgeniy Stepanov79b2d172013-02-22 09:34:19 +0000345 if (need_to_restore_eflags) {
346 if (VERBOSITY > 1)
347 dr_printf("Restoring eflags\n");
348 // TODO: Check if it's reverse to the dr_restore_reg above and optimize.
349 dr_save_reg(drcontext, bb, instr, DR_REG_XAX, SPILL_SLOT_1);
350 dr_restore_reg(drcontext, bb, instr, DR_REG_XAX, SPILL_SLOT_3);
351 dr_restore_arith_flags_from_xax(drcontext, bb, instr);
352 dr_restore_reg(drcontext, bb, instr, DR_REG_XAX, SPILL_SLOT_1);
353 }
354
355 // The original instruction is left untouched. The above instrumentation is just
356 // a prefix.
357}
358
359void InstrumentReturn(void *drcontext, instrlist_t *bb, instr_t *instr) {
Evgeniy Stepanov4b26afd2013-10-29 19:44:47 +0000360#ifdef MSANDR_STANDALONE_TEST
361 PRE(instr,
362 mov_st(drcontext,
363 opnd_create_far_base_disp(DR_SEG_GS /* DR's TLS */,
364 DR_REG_NULL, DR_REG_NULL,
365 0, msan_retval_tls_offset,
366 OPSZ_PTR),
367 OPND_CREATE_INT32(0)));
368#else /* !MSANDR_STANDALONE_TEST */
369 /* XXX: the code below only works if -mangle_app_seg and -private_loader,
370 * which is turned of for optimized native exec
371 */
Evgeniy Stepanov79b2d172013-02-22 09:34:19 +0000372 dr_save_reg(drcontext, bb, instr, DR_REG_XAX, SPILL_SLOT_1);
373
374 // Clobbers nothing except xax.
375 bool res =
376 dr_insert_get_seg_base(drcontext, bb, instr, DR_SEG_FS, DR_REG_XAX);
377 CHECK(res);
378
379 // TODO: unpoison more bytes?
380 PRE(instr,
381 mov_st(drcontext, OPND_CREATE_MEM64(DR_REG_XAX, msan_retval_tls_offset),
382 OPND_CREATE_INT32(0)));
383
384 dr_restore_reg(drcontext, bb, instr, DR_REG_XAX, SPILL_SLOT_1);
385
386 // The original instruction is left untouched. The above instrumentation is just
387 // a prefix.
Evgeniy Stepanov4b26afd2013-10-29 19:44:47 +0000388#endif /* !MSANDR_STANDALONE_TEST */
Evgeniy Stepanov79b2d172013-02-22 09:34:19 +0000389}
390
391void InstrumentIndirectBranch(void *drcontext, instrlist_t *bb,
392 instr_t *instr) {
Evgeniy Stepanov4b26afd2013-10-29 19:44:47 +0000393#ifdef MSANDR_STANDALONE_TEST
394 for (int i = 0; i < NUM_TLS_PARAM; ++i) {
395 PRE(instr,
396 mov_st(drcontext,
397 opnd_create_far_base_disp(DR_SEG_GS /* DR's TLS */,
398 DR_REG_NULL, DR_REG_NULL,
399 0,
400 msan_param_tls_offset +
401 i * sizeof(void *),
402 OPSZ_PTR),
403 OPND_CREATE_INT32(0)));
404 }
405#else /* !MSANDR_STANDALONE_TEST */
406 /* XXX: the code below only works if -mangle_app_seg and -private_loader,
407 * which is turned off for optimized native exec
408 */
Evgeniy Stepanov79b2d172013-02-22 09:34:19 +0000409 dr_save_reg(drcontext, bb, instr, DR_REG_XAX, SPILL_SLOT_1);
410
411 // Clobbers nothing except xax.
412 bool res =
413 dr_insert_get_seg_base(drcontext, bb, instr, DR_SEG_FS, DR_REG_XAX);
414 CHECK(res);
415
416 // TODO: unpoison more bytes?
Evgeniy Stepanov4b26afd2013-10-29 19:44:47 +0000417 for (int i = 0; i < NUM_TLS_PARAM; ++i) {
Evgeniy Stepanov79b2d172013-02-22 09:34:19 +0000418 PRE(instr,
419 mov_st(drcontext, OPND_CREATE_MEMPTR(DR_REG_XAX, msan_param_tls_offset +
420 i * sizeof(void *)),
421 OPND_CREATE_INT32(0)));
422 }
423
424 dr_restore_reg(drcontext, bb, instr, DR_REG_XAX, SPILL_SLOT_1);
425
426 // The original instruction is left untouched. The above instrumentation is just
427 // a prefix.
Evgeniy Stepanov4b26afd2013-10-29 19:44:47 +0000428#endif /* !MSANDR_STANDALONE_TEST */
Evgeniy Stepanov79b2d172013-02-22 09:34:19 +0000429}
430
Evgeniy Stepanov88732a32013-10-25 14:57:02 +0000431#ifndef MSANDR_NATIVE_EXEC
Evgeniy Stepanov79b2d172013-02-22 09:34:19 +0000432// For use with binary search. Modules shouldn't overlap, so we shouldn't have
433// to look at end_. If that can happen, we won't support such an application.
434bool ModuleDataCompareStart(const ModuleData &left, const ModuleData &right) {
435 return left.start_ < right.start_;
436}
437
438// Look up the module containing PC. Should be relatively fast, as its called
439// for each bb instrumentation.
440ModuleData *LookupModuleByPC(app_pc pc) {
441 ModuleData fake_mod_data;
442 fake_mod_data.start_ = pc;
443 std::vector<ModuleData>::iterator it =
444 lower_bound(g_module_list.begin(), g_module_list.end(), fake_mod_data,
445 ModuleDataCompareStart);
446 // if (it == g_module_list.end())
447 // return NULL;
448 if (it == g_module_list.end() || pc < it->start_)
449 --it;
450 CHECK(it->start_ <= pc);
451 if (pc >= it->end_) {
452 // We're past the end of this module. We shouldn't be in the next module,
453 // or lower_bound lied to us.
454 ++it;
455 CHECK(it == g_module_list.end() || pc < it->start_);
456 return NULL;
457 }
458
459 // OK, we found the module.
460 return &*it;
461}
462
463bool ShouldInstrumentNonModuleCode() { return true; }
464
465bool ShouldInstrumentModule(ModuleData *mod_data) {
466 // TODO(rnk): Flags for blacklist would get wired in here.
467 generic_func_t p =
468 dr_get_proc_address(mod_data->handle_, "__msan_track_origins");
469 return !p;
470}
471
472bool ShouldInstrumentPc(app_pc pc, ModuleData **pmod_data) {
473 ModuleData *mod_data = LookupModuleByPC(pc);
474 if (pmod_data)
475 *pmod_data = mod_data;
476 if (mod_data != NULL) {
477 // This module is on a blacklist.
478 if (!mod_data->should_instrument_) {
479 return false;
480 }
481 } else if (!ShouldInstrumentNonModuleCode()) {
482 return false;
483 }
484 return true;
485}
Evgeniy Stepanov4b26afd2013-10-29 19:44:47 +0000486#endif /* !MSANDR_NATIVE_CLIENT */
Evgeniy Stepanov79b2d172013-02-22 09:34:19 +0000487
488// TODO(rnk): Make sure we instrument after __msan_init.
489dr_emit_flags_t
490event_basic_block_app2app(void *drcontext, void *tag, instrlist_t *bb,
491 bool for_trace, bool translating) {
Evgeniy Stepanov88732a32013-10-25 14:57:02 +0000492#ifndef MSANDR_NATIVE_EXEC
Evgeniy Stepanov79b2d172013-02-22 09:34:19 +0000493 app_pc pc = dr_fragment_app_pc(tag);
Evgeniy Stepanov79b2d172013-02-22 09:34:19 +0000494 if (ShouldInstrumentPc(pc, NULL))
495 CHECK(drutil_expand_rep_string(drcontext, bb));
Evgeniy Stepanov88732a32013-10-25 14:57:02 +0000496#else /* MSANDR_NATIVE_EXEC */
497 CHECK(drutil_expand_rep_string(drcontext, bb));
498#endif /* MSANDR_NATIVE_EXEC */
Evgeniy Stepanov79b2d172013-02-22 09:34:19 +0000499 return DR_EMIT_PERSISTABLE;
500}
501
502dr_emit_flags_t event_basic_block(void *drcontext, void *tag, instrlist_t *bb,
503 bool for_trace, bool translating) {
504 app_pc pc = dr_fragment_app_pc(tag);
Evgeniy Stepanov88732a32013-10-25 14:57:02 +0000505#ifndef MSANDR_NATIVE_EXEC
Evgeniy Stepanov79b2d172013-02-22 09:34:19 +0000506 ModuleData *mod_data;
507
508 if (!ShouldInstrumentPc(pc, &mod_data))
509 return DR_EMIT_PERSISTABLE;
510
511 if (VERBOSITY > 1)
512 dr_printf("============================================================\n");
513 if (VERBOSITY > 0) {
Evgeniy Stepanov2ecccf82013-03-22 08:47:42 +0000514 std::string mod_path = (mod_data ? mod_data->path_ : "<no module, JITed?>");
Evgeniy Stepanov79b2d172013-02-22 09:34:19 +0000515 if (mod_data && !mod_data->executed_) {
516 mod_data->executed_ = true; // Nevermind this race.
517 dr_printf("Executing from new module: %s\n", mod_path.c_str());
518 }
519 dr_printf("BB to be instrumented: %p [from %s]; translating = %s\n", pc,
520 mod_path.c_str(), translating ? "true" : "false");
521 if (mod_data) {
522 // Match standard sanitizer trace format for free symbols.
523 // #0 0x7f6e35cf2e45 (/blah/foo.so+0x11fe45)
524 dr_printf(" #0 %p (%s+%p)\n", pc, mod_data->path_.c_str(),
525 pc - mod_data->start_);
526 }
527 }
Evgeniy Stepanov88732a32013-10-25 14:57:02 +0000528#endif /* !MSANDR_NATIVE_EXEC */
529
Evgeniy Stepanov79b2d172013-02-22 09:34:19 +0000530 if (VERBOSITY > 1) {
531 instrlist_disassemble(drcontext, pc, bb, STDOUT);
532 instr_t *instr;
533 for (instr = instrlist_first(bb); instr; instr = instr_get_next(instr)) {
534 dr_printf("opcode: %d\n", instr_get_opcode(instr));
535 }
536 }
537
538 for (instr_t *i = instrlist_first(bb); i != NULL; i = instr_get_next(i)) {
539 int opcode = instr_get_opcode(i);
540 if (opcode == OP_ret || opcode == OP_ret_far) {
541 InstrumentReturn(drcontext, bb, i);
542 continue;
543 }
544
545 // These instructions hopefully cover all cases where control is transferred
546 // to a function in a different module (we only care about calls into
547 // compiler-instrumented modules).
548 // * call_ind is used for normal indirect calls.
549 // * jmp_ind is used for indirect tail calls, and calls through PLT (PLT
550 // stub includes a jump to an address from GOT).
551 if (opcode == OP_call_ind || opcode == OP_call_far_ind ||
552 opcode == OP_jmp_ind || opcode == OP_jmp_far_ind) {
553 InstrumentIndirectBranch(drcontext, bb, i);
554 continue;
555 }
556
557 if (!WantToInstrument(i))
558 continue;
559
560 if (VERBOSITY > 1) {
561 app_pc orig_pc = dr_fragment_app_pc(tag);
562 uint flags = instr_get_arith_flags(i);
563 dr_printf("+%d -> to be instrumented! [opcode=%d, flags = 0x%08X]\n",
564 instr_get_app_pc(i) - orig_pc, instr_get_opcode(i), flags);
565 }
566
567 if (instr_writes_memory(i)) {
568 // Instrument memory writes
569 // bool instrumented_anything = false;
570 for (int d = 0; d < instr_num_dsts(i); d++) {
571 opnd_t op = instr_get_dst(i, d);
572 if (!OperandIsInteresting(op))
573 continue;
574
575 // CHECK(!instrumented_anything);
576 // instrumented_anything = true;
577 InstrumentMops(drcontext, bb, i, op, true);
578 break; // only instrumenting the first dst
579 }
580 }
581 }
582
583// TODO: optimize away redundant restore-spill pairs?
584
585 if (VERBOSITY > 1) {
586 pc = dr_fragment_app_pc(tag);
587 dr_printf("\nFinished instrumenting dynamorio_basic_block(PC=" PFX ")\n", pc);
588 instrlist_disassemble(drcontext, pc, bb, STDOUT);
589 }
590 return DR_EMIT_PERSISTABLE;
591}
592
Evgeniy Stepanov88732a32013-10-25 14:57:02 +0000593#ifndef MSANDR_NATIVE_EXEC
Evgeniy Stepanov79b2d172013-02-22 09:34:19 +0000594void event_module_load(void *drcontext, const module_data_t *info,
595 bool loaded) {
596 // Insert the module into the list while maintaining the ordering.
597 ModuleData mod_data(info);
598 std::vector<ModuleData>::iterator it =
599 upper_bound(g_module_list.begin(), g_module_list.end(), mod_data,
600 ModuleDataCompareStart);
601 it = g_module_list.insert(it, mod_data);
602 // Check if we should instrument this module.
603 it->should_instrument_ = ShouldInstrumentModule(&*it);
604 dr_module_set_should_instrument(info->handle, it->should_instrument_);
605
606 if (VERBOSITY > 0)
607 dr_printf("==DRMSAN== Loaded module: %s [%p...%p], instrumentation is %s\n",
608 info->full_path, info->start, info->end,
609 it->should_instrument_ ? "on" : "off");
610}
611
612void event_module_unload(void *drcontext, const module_data_t *info) {
613 if (VERBOSITY > 0)
614 dr_printf("==DRMSAN== Unloaded module: %s [%p...%p]\n", info->full_path,
615 info->start, info->end);
616
617 // Remove the module from the list.
618 ModuleData mod_data(info);
619 std::vector<ModuleData>::iterator it =
620 lower_bound(g_module_list.begin(), g_module_list.end(), mod_data,
621 ModuleDataCompareStart);
622 // It's a bug if we didn't actually find the module.
623 CHECK(it != g_module_list.end() && it->start_ == mod_data.start_ &&
624 it->end_ == mod_data.end_ && it->path_ == mod_data.path_);
625 g_module_list.erase(it);
626}
Evgeniy Stepanov88732a32013-10-25 14:57:02 +0000627#endif /* !MSANDR_NATIVE_EXEC */
Evgeniy Stepanov79b2d172013-02-22 09:34:19 +0000628
629void event_exit() {
Reid Kleckner7a53bca2013-03-13 13:59:09 +0000630 // Clean up so DR doesn't tell us we're leaking memory.
631 drsys_exit();
632 drutil_exit();
633 drmgr_exit();
634
Evgeniy Stepanov4b26afd2013-10-29 19:44:47 +0000635#ifdef MSANDR_STANDALONE_TEST
636 /* free tls */
637 bool res;
638 res = dr_raw_tls_cfree(msan_retval_tls_offset, NUM_TLS_RETVAL);
639 CHECK(res);
640 res = dr_raw_tls_cfree(msan_param_tls_offset, NUM_TLS_PARAM);
641 CHECK(res);
642 /* we do not bother to free the shadow memory */
643#endif /* !MSANDR_STANDALONE_TEST */
Evgeniy Stepanov79b2d172013-02-22 09:34:19 +0000644 if (VERBOSITY > 0)
645 dr_printf("==DRMSAN== DONE\n");
646}
647
648bool event_filter_syscall(void *drcontext, int sysnum) {
649 // FIXME: only intercept syscalls with memory effects.
650 return true; /* intercept everything */
651}
652
653bool drsys_iter_memarg_cb(drsys_arg_t *arg, void *user_data) {
654 CHECK(arg->valid);
655
656 if (arg->pre)
657 return true;
Reid Kleckner0f92deb2013-03-11 18:07:42 +0000658 if (!TESTANY(DRSYS_PARAM_OUT, arg->mode))
Evgeniy Stepanov79b2d172013-02-22 09:34:19 +0000659 return true;
660
661 size_t sz = arg->size;
662
663 if (sz > 0xFFFFFFFF) {
664 drmf_status_t res;
665 drsys_syscall_t *syscall = (drsys_syscall_t *)user_data;
666 const char *name;
667 res = drsys_syscall_name(syscall, &name);
668 CHECK(res == DRMF_SUCCESS);
669
670 dr_printf("SANITY: syscall '%s' arg %d writes %llu bytes memory?!"
671 " Clipping to %llu.\n",
672 name, arg->ordinal, (unsigned long long) sz,
673 (unsigned long long)(sz & 0xFFFFFFFF));
674 }
675
Reid Kleckner0f92deb2013-03-11 18:07:42 +0000676 if (VERBOSITY > 0) {
677 drmf_status_t res;
678 drsys_syscall_t *syscall = (drsys_syscall_t *)user_data;
679 const char *name;
680 res = drsys_syscall_name(syscall, &name);
Evgeniy Stepanov88732a32013-10-25 14:57:02 +0000681 CHECK(res == DRMF_SUCCESS);
Reid Kleckner0f92deb2013-03-11 18:07:42 +0000682 dr_printf("drsyscall: syscall '%s' arg %d wrote range [%p, %p)\n",
683 name, arg->ordinal, arg->start_addr,
684 (char *)arg->start_addr + sz);
685 }
686
687 // We don't switch to the app context because __msan_unpoison() doesn't need
688 // TLS segments.
689 __msan_unpoison(arg->start_addr, sz);
Evgeniy Stepanov79b2d172013-02-22 09:34:19 +0000690
691 return true; /* keep going */
692}
693
694bool event_pre_syscall(void *drcontext, int sysnum) {
695 drsys_syscall_t *syscall;
696 drsys_sysnum_t sysnum_full;
697 bool known;
698 drsys_param_type_t ret_type;
699 drmf_status_t res;
700 const char *name;
701
702 res = drsys_cur_syscall(drcontext, &syscall);
703 CHECK(res == DRMF_SUCCESS);
704
705 res = drsys_syscall_number(syscall, &sysnum_full);
706 CHECK(res == DRMF_SUCCESS);
707 CHECK(sysnum == sysnum_full.number);
708
709 res = drsys_syscall_is_known(syscall, &known);
710 CHECK(res == DRMF_SUCCESS);
711
712 res = drsys_syscall_name(syscall, &name);
713 CHECK(res == DRMF_SUCCESS);
714
715 res = drsys_syscall_return_type(syscall, &ret_type);
716 CHECK(res == DRMF_SUCCESS);
717 CHECK(ret_type != DRSYS_TYPE_INVALID);
718 CHECK(!known || ret_type != DRSYS_TYPE_UNKNOWN);
719
720 res = drsys_iterate_memargs(drcontext, drsys_iter_memarg_cb, NULL);
721 CHECK(res == DRMF_SUCCESS);
722
723 return true;
724}
725
Reid Kleckner0f92deb2013-03-11 18:07:42 +0000726static bool IsInLoader(void *drcontext) {
727 // TODO: This segment swap is inefficient. DR should just let us query the
728 // app segment base, which it has. Alternatively, if we disable
729 // -mangle_app_seg, then we won't need the swap.
730 bool need_swap = !dr_using_app_state(drcontext);
731 if (need_swap)
732 dr_switch_to_app_state(drcontext);
733 bool is_in_loader = __msan_is_in_loader();
734 if (need_swap)
735 dr_switch_to_dr_state(drcontext);
736 return is_in_loader;
737}
738
Evgeniy Stepanov79b2d172013-02-22 09:34:19 +0000739void event_post_syscall(void *drcontext, int sysnum) {
740 drsys_syscall_t *syscall;
741 drsys_sysnum_t sysnum_full;
742 bool success = false;
743 drmf_status_t res;
744
745 res = drsys_cur_syscall(drcontext, &syscall);
746 CHECK(res == DRMF_SUCCESS);
747
748 res = drsys_syscall_number(syscall, &sysnum_full);
749 CHECK(res == DRMF_SUCCESS);
750 CHECK(sysnum == sysnum_full.number);
751
752 res = drsys_syscall_succeeded(syscall, dr_syscall_get_result(drcontext),
753 &success);
754 CHECK(res == DRMF_SUCCESS);
755
756 if (success) {
757 res =
758 drsys_iterate_memargs(drcontext, drsys_iter_memarg_cb, (void *)syscall);
759 CHECK(res == DRMF_SUCCESS);
760 }
Reid Kleckner0f92deb2013-03-11 18:07:42 +0000761
762 // Our normal mmap interceptor can't intercept calls from the loader itself.
763 // This means we don't clear the shadow for calls to dlopen. For now, we
764 // solve this by intercepting mmap from ld.so here, but ideally we'd have a
765 // solution that doesn't rely on msandr.
766 //
767 // Be careful not to intercept maps done by the msan rtl. Otherwise we end up
768 // unpoisoning vast regions of memory and OOMing.
769 // TODO: __msan_unpoison() could "flush" large regions of memory like tsan
770 // does instead of doing a large memset. However, we need the memory to be
771 // zeroed, where as tsan does not, so plain madvise is not enough.
772 if (success && (sysnum == SYS_mmap IF_NOT_X64(|| sysnum == SYS_mmap2))) {
773 if (IsInLoader(drcontext)) {
774 app_pc base = (app_pc)dr_syscall_get_result(drcontext);
775 ptr_uint_t size;
776 drmf_status_t res = drsys_pre_syscall_arg(drcontext, 1, &size);
777 CHECK(res == DRMF_SUCCESS);
778 if (VERBOSITY > 0)
779 dr_printf("unpoisoning for dlopen: [%p-%p]\n", base, base + size);
780 // We don't switch to the app context because __msan_unpoison() doesn't
781 // need TLS segments.
782 __msan_unpoison(base, size);
783 }
784 }
Evgeniy Stepanov79b2d172013-02-22 09:34:19 +0000785}
786
787} // namespace
788
789DR_EXPORT void dr_init(client_id_t id) {
790 drmf_status_t res;
791
792 drmgr_init();
793 drutil_init();
794
Evgeniy Stepanov2ecccf82013-03-22 08:47:42 +0000795 std::string app_name = dr_get_application_name();
Evgeniy Stepanov79b2d172013-02-22 09:34:19 +0000796 // This blacklist will still run these apps through DR's code cache. On the
797 // other hand, we are able to follow children of these apps.
798 // FIXME: Once DR has detach, we could just detach here. Alternatively,
799 // if DR had a fork or exec hook to let us decide there, that would be nice.
800 // FIXME: make the blacklist cmd-adjustable.
801 if (app_name == "python" || app_name == "python2.7" || app_name == "bash" ||
802 app_name == "sh" || app_name == "true" || app_name == "exit" ||
803 app_name == "yes" || app_name == "echo")
804 return;
805
806 drsys_options_t ops;
807 memset(&ops, 0, sizeof(ops));
808 ops.struct_size = sizeof(ops);
809 ops.analyze_unknown_syscalls = false;
810
811 res = drsys_init(id, &ops);
812 CHECK(res == DRMF_SUCCESS);
813
814 dr_register_filter_syscall_event(event_filter_syscall);
815 drmgr_register_pre_syscall_event(event_pre_syscall);
816 drmgr_register_post_syscall_event(event_post_syscall);
817 res = drsys_filter_all_syscalls();
818 CHECK(res == DRMF_SUCCESS);
819
Evgeniy Stepanov4b26afd2013-10-29 19:44:47 +0000820#ifdef MSANDR_STANDALONE_TEST
821 reg_id_t reg_seg;
822 /* alloc tls */
823 if (!dr_raw_tls_calloc(&reg_seg, &mock_msan_retval_tls_offset, NUM_TLS_RETVAL, 0))
824 CHECK(false);
825 CHECK(reg_seg == DR_SEG_GS /* x64 only! */);
826 if (!dr_raw_tls_calloc(&reg_seg, &mock_msan_param_tls_offset, NUM_TLS_PARAM, 0))
827 CHECK(false);
828 CHECK(reg_seg == DR_SEG_GS /* x64 only! */);
829 /* alloc shadow memory */
830 if (mmap(SHADOW_MEMORY_BASE, SHADOW_MEMORY_SIZE, PROT_READ|PROT_WRITE,
831 MAP_PRIVATE | MAP_ANON, -1, 0) != SHADOW_MEMORY_BASE) {
832 CHECK(false);
833 }
834#endif /* MSANDR_STANDALONE_TEST */
Evgeniy Stepanov79b2d172013-02-22 09:34:19 +0000835 InitializeMSanCallbacks();
836
837 // FIXME: the shadow is initialized earlier when DR calls one of our wrapper
838 // functions. This may change one day.
839 // TODO: make this more robust.
840
841 void *drcontext = dr_get_current_drcontext();
842
843 dr_switch_to_app_state(drcontext);
844 msan_retval_tls_offset = __msan_get_retval_tls_offset();
845 msan_param_tls_offset = __msan_get_param_tls_offset();
846 dr_switch_to_dr_state(drcontext);
847 if (VERBOSITY > 0) {
848 dr_printf("__msan_retval_tls offset: %d\n", msan_retval_tls_offset);
849 dr_printf("__msan_param_tls offset: %d\n", msan_param_tls_offset);
850 }
851
852 // Standard DR events.
853 dr_register_exit_event(event_exit);
854
855 drmgr_priority_t priority = {
856 sizeof(priority), /* size of struct */
857 "msandr", /* name of our operation */
858 NULL, /* optional name of operation we should precede */
859 NULL, /* optional name of operation we should follow */
860 0
861 }; /* numeric priority */
862
863 drmgr_register_bb_app2app_event(event_basic_block_app2app, &priority);
864 drmgr_register_bb_instru2instru_event(event_basic_block, &priority);
Evgeniy Stepanov88732a32013-10-25 14:57:02 +0000865#ifndef MSANDR_NATIVE_EXEC
Evgeniy Stepanov79b2d172013-02-22 09:34:19 +0000866 drmgr_register_module_load_event(event_module_load);
867 drmgr_register_module_unload_event(event_module_unload);
Evgeniy Stepanov88732a32013-10-25 14:57:02 +0000868#endif /* MSANDR_NATIVE_EXEC */
Evgeniy Stepanov79b2d172013-02-22 09:34:19 +0000869 if (VERBOSITY > 0)
870 dr_printf("==MSANDR== Starting!\n");
871}