blob: 5609e91cd1486e383ce33630c5e2ea838d830286 [file] [log] [blame]
Alexey Samsonov603c4be2012-06-04 13:55:19 +00001//===-- sanitizer_symbolizer.cc -------------------------------------------===//
Alexey Samsonov2f7d8262012-06-01 06:11:13 +00002//
3// The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
Alexey Samsonov2f7d8262012-06-01 06:11:13 +000010// This file is shared between AddressSanitizer and ThreadSanitizer
Alexey Samsonov41df5652012-07-31 11:51:26 +000011// run-time libraries. See sanitizer_symbolizer.h for details.
Alexey Samsonov2f7d8262012-06-01 06:11:13 +000012//===----------------------------------------------------------------------===//
13
Alexey Samsonov6e0c3a42012-06-07 08:52:56 +000014#include "sanitizer_common.h"
Alexey Samsonov9c6e5302012-08-23 07:32:06 +000015#include "sanitizer_placement_new.h"
Alexey Samsonovfa82b082012-06-15 14:00:25 +000016#include "sanitizer_procmaps.h"
Alexey Samsonov2f7d8262012-06-01 06:11:13 +000017#include "sanitizer_symbolizer.h"
Alexey Samsonov2f7d8262012-06-01 06:11:13 +000018
19namespace __sanitizer {
20
21void AddressInfo::Clear() {
Alexey Samsonov6e0c3a42012-06-07 08:52:56 +000022 InternalFree(module);
23 InternalFree(function);
24 InternalFree(file);
Alexey Samsonovfa82b082012-06-15 14:00:25 +000025 internal_memset(this, 0, sizeof(AddressInfo));
Alexey Samsonov2f7d8262012-06-01 06:11:13 +000026}
27
Alexey Samsonov9c6e5302012-08-23 07:32:06 +000028LoadedModule::LoadedModule(const char *module_name, uptr base_address) {
Alexey Samsonova68633f2012-07-03 08:24:14 +000029 full_name_ = internal_strdup(module_name);
Alexey Samsonov75983dd2012-07-19 07:51:20 +000030 base_address_ = base_address;
Alexey Samsonova68633f2012-07-03 08:24:14 +000031 n_ranges_ = 0;
Alexey Samsonova68633f2012-07-03 08:24:14 +000032}
33
Alexey Samsonov9c6e5302012-08-23 07:32:06 +000034void LoadedModule::addAddressRange(uptr beg, uptr end) {
Alexey Samsonova68633f2012-07-03 08:24:14 +000035 CHECK_LT(n_ranges_, kMaxNumberOfAddressRanges);
36 ranges_[n_ranges_].beg = beg;
37 ranges_[n_ranges_].end = end;
Alexey Samsonova68633f2012-07-03 08:24:14 +000038 n_ranges_++;
39}
40
Alexey Samsonov9c6e5302012-08-23 07:32:06 +000041bool LoadedModule::containsAddress(uptr address) const {
Alexey Samsonova68633f2012-07-03 08:24:14 +000042 for (uptr i = 0; i < n_ranges_; i++) {
43 if (ranges_[i].beg <= address && address < ranges_[i].end)
44 return true;
45 }
46 return false;
47}
48
Alexey Samsonov9c6e5302012-08-23 07:32:06 +000049// Extracts the prefix of "str" that consists of any characters not
50// present in "delims" string, and copies this prefix to "result", allocating
51// space for it.
52// Returns a pointer to "str" after skipping extracted prefix and first
53// delimiter char.
54static const char *ExtractToken(const char *str, const char *delims,
55 char **result) {
56 uptr prefix_len = internal_strcspn(str, delims);
57 *result = (char*)InternalAlloc(prefix_len + 1);
58 internal_memcpy(*result, str, prefix_len);
59 (*result)[prefix_len] = '\0';
60 const char *prefix_end = str + prefix_len;
61 if (*prefix_end != '\0') prefix_end++;
62 return prefix_end;
Alexey Samsonova68633f2012-07-03 08:24:14 +000063}
64
Alexey Samsonov9c6e5302012-08-23 07:32:06 +000065// Same as ExtractToken, but converts extracted token to integer.
66static const char *ExtractInt(const char *str, const char *delims,
67 int *result) {
68 char *buff;
69 const char *ret = ExtractToken(str, delims, &buff);
70 if (buff != 0) {
Dmitry Vyukov5a1f2332013-01-11 07:23:51 +000071 *result = (int)internal_atoll(buff);
72 }
73 InternalFree(buff);
74 return ret;
75}
76
77static const char *ExtractUptr(const char *str, const char *delims,
78 uptr *result) {
79 char *buff;
80 const char *ret = ExtractToken(str, delims, &buff);
81 if (buff != 0) {
82 *result = (uptr)internal_atoll(buff);
Alexey Samsonov9c6e5302012-08-23 07:32:06 +000083 }
84 InternalFree(buff);
85 return ret;
Alexey Samsonova68633f2012-07-03 08:24:14 +000086}
Alexey Samsonov2f7d8262012-06-01 06:11:13 +000087
Alexey Samsonov9c6e5302012-08-23 07:32:06 +000088// ExternalSymbolizer encapsulates communication between the tool and
89// external symbolizer program, running in a different subprocess,
90// For now we assume the following protocol:
91// For each request of the form
92// <module_name> <module_offset>
93// passed to STDIN, external symbolizer prints to STDOUT response:
94// <function_name>
95// <file_name>:<line_number>:<column_number>
Alexey Samsonov38e853d2012-09-04 15:34:43 +000096// <function_name>
97// <file_name>:<line_number>:<column_number>
98// ...
Alexey Samsonov9c6e5302012-08-23 07:32:06 +000099// <empty line>
100class ExternalSymbolizer {
101 public:
102 ExternalSymbolizer(const char *path, int input_fd, int output_fd)
103 : path_(path),
104 input_fd_(input_fd),
105 output_fd_(output_fd),
106 times_restarted_(0) {
107 CHECK(path_);
108 CHECK_NE(input_fd_, kInvalidFd);
109 CHECK_NE(output_fd_, kInvalidFd);
110 }
Alexey Samsonov38e853d2012-09-04 15:34:43 +0000111
Dmitry Vyukov5a1f2332013-01-11 07:23:51 +0000112 char *SendCommand(bool is_data, const char *module_name, uptr module_offset) {
Alexey Samsonov38e853d2012-09-04 15:34:43 +0000113 CHECK(module_name);
Dmitry Vyukov5a1f2332013-01-11 07:23:51 +0000114 internal_snprintf(buffer_, kBufferSize, "%s%s 0x%zx\n",
115 is_data ? "DATA " : "", module_name, module_offset);
116 if (!writeToSymbolizer(buffer_, internal_strlen(buffer_)))
Alexey Samsonov38e853d2012-09-04 15:34:43 +0000117 return 0;
Dmitry Vyukov5a1f2332013-01-11 07:23:51 +0000118 if (!readFromSymbolizer(buffer_, kBufferSize))
119 return 0;
120 return buffer_;
121 }
Alexey Samsonov38e853d2012-09-04 15:34:43 +0000122
Dmitry Vyukov5a1f2332013-01-11 07:23:51 +0000123 bool Restart() {
124 if (times_restarted_ >= kMaxTimesRestarted) return false;
125 times_restarted_++;
126 internal_close(input_fd_);
127 internal_close(output_fd_);
128 return StartSymbolizerSubprocess(path_, &input_fd_, &output_fd_);
129 }
130
131 private:
132 bool readFromSymbolizer(char *buffer, uptr max_length) {
133 if (max_length == 0)
134 return true;
135 uptr read_len = 0;
136 while (true) {
137 uptr just_read = internal_read(input_fd_, buffer + read_len,
138 max_length - read_len);
139 // We can't read 0 bytes, as we don't expect external symbolizer to close
140 // its stdout.
141 if (just_read == 0 || just_read == (uptr)-1) {
142 Report("WARNING: Can't read from symbolizer at fd %d\n", input_fd_);
143 return false;
144 }
145 read_len += just_read;
146 // Empty line marks the end of symbolizer output.
147 if (read_len >= 2 && buffer[read_len - 1] == '\n' &&
148 buffer[read_len - 2] == '\n') {
149 break;
150 }
151 }
152 return true;
153 }
154
155 bool writeToSymbolizer(const char *buffer, uptr length) {
156 if (length == 0)
157 return true;
158 uptr write_len = internal_write(output_fd_, buffer, length);
159 if (write_len == 0 || write_len == (uptr)-1) {
160 Report("WARNING: Can't write to symbolizer at fd %d\n", output_fd_);
161 return false;
162 }
163 return true;
164 }
165
166 const char *path_;
167 int input_fd_;
168 int output_fd_;
169
170 static const uptr kBufferSize = 16 * 1024;
171 char buffer_[kBufferSize];
172
173 static const uptr kMaxTimesRestarted = 5;
174 uptr times_restarted_;
175};
176
177static LowLevelAllocator symbolizer_allocator; // Linker initialized.
178
Alexey Samsonov11008192013-01-24 13:42:45 +0000179#if SANITIZER_SUPPORTS_WEAK_HOOKS
180extern "C" {
181SANITIZER_WEAK_ATTRIBUTE SANITIZER_INTERFACE_ATTRIBUTE
182bool __sanitizer_symbolize_code(const char *ModuleName, u64 ModuleOffset,
183 char *Buffer, int MaxLength);
184SANITIZER_WEAK_ATTRIBUTE SANITIZER_INTERFACE_ATTRIBUTE
185bool __sanitizer_symbolize_data(const char *ModuleName, u64 ModuleOffset,
186 char *Buffer, int MaxLength);
187} // extern "C"
Alexey Samsonov11008192013-01-24 13:42:45 +0000188
189class InternalSymbolizer {
190 public:
191 typedef bool (*SanitizerSymbolizeFn)(const char*, u64, char*, int);
192 static InternalSymbolizer *get() {
Alexey Samsonov11008192013-01-24 13:42:45 +0000193 if (__sanitizer_symbolize_code != 0 &&
194 __sanitizer_symbolize_data != 0) {
195 void *mem = symbolizer_allocator.Allocate(sizeof(InternalSymbolizer));
196 return new(mem) InternalSymbolizer();
197 }
Alexey Samsonov11008192013-01-24 13:42:45 +0000198 return 0;
199 }
200 char *SendCommand(bool is_data, const char *module_name, uptr module_offset) {
Alexey Samsonov11008192013-01-24 13:42:45 +0000201 SanitizerSymbolizeFn symbolize_fn = is_data ? __sanitizer_symbolize_data
202 : __sanitizer_symbolize_code;
203 if (symbolize_fn(module_name, module_offset, buffer_, kBufferSize))
204 return buffer_;
Alexey Samsonov11008192013-01-24 13:42:45 +0000205 return 0;
206 }
207
208 private:
209 InternalSymbolizer() { }
210
211 static const int kBufferSize = 16 * 1024;
212 char buffer_[kBufferSize];
213};
Alexey Samsonov78855142013-01-24 14:01:59 +0000214#else // SANITIZER_SUPPORTS_WEAK_HOOKS
215
216class InternalSymbolizer {
217 public:
218 static InternalSymbolizer *get() { return 0; }
219 char *SendCommand(bool is_data, const char *module_name, uptr module_offset) {
220 return 0;
221 }
222};
223
224#endif // SANITIZER_SUPPORTS_WEAK_HOOKS
Alexey Samsonov11008192013-01-24 13:42:45 +0000225
Dmitry Vyukov5a1f2332013-01-11 07:23:51 +0000226class Symbolizer {
227 public:
228 uptr SymbolizeCode(uptr addr, AddressInfo *frames, uptr max_frames) {
229 if (max_frames == 0)
Alexey Samsonov38e853d2012-09-04 15:34:43 +0000230 return 0;
Dmitry Vyukov5a1f2332013-01-11 07:23:51 +0000231 LoadedModule *module = FindModuleForAddress(addr);
232 if (module == 0)
233 return 0;
234 const char *module_name = module->full_name();
235 uptr module_offset = addr - module->base_address();
236 const char *str = SendCommand(false, module_name, module_offset);
237 if (str == 0) {
238 // External symbolizer was not initialized or failed. Fill only data
239 // about module name and offset.
240 AddressInfo *info = &frames[0];
241 info->Clear();
242 info->FillAddressAndModuleInfo(addr, module_name, module_offset);
243 return 1;
244 }
245 uptr frame_id = 0;
Alexey Samsonov38e853d2012-09-04 15:34:43 +0000246 for (frame_id = 0; frame_id < max_frames; frame_id++) {
247 AddressInfo *info = &frames[frame_id];
248 char *function_name = 0;
249 str = ExtractToken(str, "\n", &function_name);
250 CHECK(function_name);
251 if (function_name[0] == '\0') {
252 // There are no more frames.
253 break;
254 }
255 info->Clear();
256 info->FillAddressAndModuleInfo(addr, module_name, module_offset);
257 info->function = function_name;
258 // Parse <file>:<line>:<column> buffer.
259 char *file_line_info = 0;
260 str = ExtractToken(str, "\n", &file_line_info);
261 CHECK(file_line_info);
262 const char *line_info = ExtractToken(file_line_info, ":", &info->file);
263 line_info = ExtractInt(line_info, ":", &info->line);
264 line_info = ExtractInt(line_info, "", &info->column);
265 InternalFree(file_line_info);
266
267 // Functions and filenames can be "??", in which case we write 0
268 // to address info to mark that names are unknown.
269 if (0 == internal_strcmp(info->function, "??")) {
270 InternalFree(info->function);
271 info->function = 0;
272 }
273 if (0 == internal_strcmp(info->file, "??")) {
274 InternalFree(info->file);
275 info->file = 0;
276 }
Alexey Samsonov9c6e5302012-08-23 07:32:06 +0000277 }
Alexey Samsonov38e853d2012-09-04 15:34:43 +0000278 if (frame_id == 0) {
279 // Make sure we return at least one frame.
280 AddressInfo *info = &frames[0];
281 info->Clear();
282 info->FillAddressAndModuleInfo(addr, module_name, module_offset);
283 frame_id = 1;
Alexey Samsonov9c6e5302012-08-23 07:32:06 +0000284 }
Alexey Samsonov38e853d2012-09-04 15:34:43 +0000285 return frame_id;
Alexey Samsonov9c6e5302012-08-23 07:32:06 +0000286 }
Alexey Samsonov38e853d2012-09-04 15:34:43 +0000287
Dmitry Vyukov5a1f2332013-01-11 07:23:51 +0000288 bool SymbolizeData(uptr addr, DataInfo *info) {
Dmitry Vyukov433c2192012-12-03 11:45:34 +0000289 LoadedModule *module = FindModuleForAddress(addr);
290 if (module == 0)
291 return false;
292 const char *module_name = module->full_name();
293 uptr module_offset = addr - module->base_address();
Dmitry Vyukov5a1f2332013-01-11 07:23:51 +0000294 internal_memset(info, 0, sizeof(*info));
295 info->address = addr;
296 info->module = internal_strdup(module_name);
297 info->module_offset = module_offset;
298 const char *str = SendCommand(true, module_name, module_offset);
299 if (str == 0)
300 return true;
301 str = ExtractToken(str, "\n", &info->name);
302 str = ExtractUptr(str, " ", &info->start);
303 str = ExtractUptr(str, "\n", &info->size);
304 info->start += module->base_address();
Dmitry Vyukov433c2192012-12-03 11:45:34 +0000305 return true;
306 }
307
Alexey Samsonov9c6e5302012-08-23 07:32:06 +0000308 bool InitializeExternalSymbolizer(const char *path_to_symbolizer) {
309 int input_fd, output_fd;
310 if (!StartSymbolizerSubprocess(path_to_symbolizer, &input_fd, &output_fd))
311 return false;
Alexey Samsonov2acf5562012-08-31 11:07:52 +0000312 void *mem = symbolizer_allocator.Allocate(sizeof(ExternalSymbolizer));
Alexey Samsonov9c6e5302012-08-23 07:32:06 +0000313 external_symbolizer_ = new(mem) ExternalSymbolizer(path_to_symbolizer,
314 input_fd, output_fd);
315 return true;
316 }
Alexey Samsonovfa82b082012-06-15 14:00:25 +0000317
Dmitry Vyukov90a24672013-01-29 09:35:14 +0000318 bool IsSymbolizerAvailable() {
319 if (internal_symbolizer_ == 0)
320 internal_symbolizer_ = InternalSymbolizer::get();
321 return internal_symbolizer_ || external_symbolizer_;
322 }
323
Alexey Samsonovfa82b082012-06-15 14:00:25 +0000324 private:
Dmitry Vyukov5a1f2332013-01-11 07:23:51 +0000325 char *SendCommand(bool is_data, const char *module_name, uptr module_offset) {
Alexey Samsonov11008192013-01-24 13:42:45 +0000326 // First, try to use internal symbolizer.
Alexey Samsonov405056c2013-02-26 13:40:51 +0000327 if (!IsSymbolizerAvailable()) {
328 return 0;
Alexey Samsonov11008192013-01-24 13:42:45 +0000329 }
330 if (internal_symbolizer_) {
331 return internal_symbolizer_->SendCommand(is_data, module_name,
332 module_offset);
333 }
334 // Otherwise, fall back to external symbolizer.
Dmitry Vyukov5a1f2332013-01-11 07:23:51 +0000335 if (external_symbolizer_ == 0) {
336 ReportExternalSymbolizerError(
337 "WARNING: Trying to symbolize code, but external "
338 "symbolizer is not initialized!\n");
339 return 0;
340 }
341 for (;;) {
342 char *reply = external_symbolizer_->SendCommand(is_data, module_name,
343 module_offset);
344 if (reply)
345 return reply;
346 // Try to restart symbolizer subprocess. If we don't succeed, forget
347 // about it and don't try to use it later.
348 if (!external_symbolizer_->Restart()) {
349 ReportExternalSymbolizerError(
350 "WARNING: Failed to use and restart external symbolizer!\n");
351 external_symbolizer_ = 0;
352 return 0;
353 }
354 }
355 }
356
Alexey Samsonov9c6e5302012-08-23 07:32:06 +0000357 LoadedModule *FindModuleForAddress(uptr address) {
Alexey Samsonova68633f2012-07-03 08:24:14 +0000358 if (modules_ == 0) {
Alexey Samsonov2acf5562012-08-31 11:07:52 +0000359 modules_ = (LoadedModule*)(symbolizer_allocator.Allocate(
360 kMaxNumberOfModuleContexts * sizeof(LoadedModule)));
Alexey Samsonova68633f2012-07-03 08:24:14 +0000361 CHECK(modules_);
362 n_modules_ = GetListOfModules(modules_, kMaxNumberOfModuleContexts);
Alexey Samsonov405056c2013-02-26 13:40:51 +0000363 // FIXME: Return this check when GetListOfModules is implemented on Mac.
364 // CHECK_GT(n_modules_, 0);
Alexey Samsonova68633f2012-07-03 08:24:14 +0000365 CHECK_LT(n_modules_, kMaxNumberOfModuleContexts);
366 }
367 for (uptr i = 0; i < n_modules_; i++) {
368 if (modules_[i].containsAddress(address)) {
369 return &modules_[i];
370 }
371 }
372 return 0;
373 }
Alexey Samsonov9c6e5302012-08-23 07:32:06 +0000374 void ReportExternalSymbolizerError(const char *msg) {
375 // Don't use atomics here for now, as SymbolizeCode can't be called
376 // from multiple threads anyway.
377 static bool reported;
378 if (!reported) {
379 Report(msg);
380 reported = true;
381 }
382 }
383
Alexey Samsonove98723f2012-10-17 13:12:23 +0000384 // 16K loaded modules should be enough for everyone.
385 static const uptr kMaxNumberOfModuleContexts = 1 << 14;
Alexey Samsonov98ea5072012-08-31 11:43:01 +0000386 LoadedModule *modules_; // Array of module descriptions is leaked.
Alexey Samsonova68633f2012-07-03 08:24:14 +0000387 uptr n_modules_;
Alexey Samsonov9c6e5302012-08-23 07:32:06 +0000388
Alexey Samsonov2acf5562012-08-31 11:07:52 +0000389 ExternalSymbolizer *external_symbolizer_; // Leaked.
Alexey Samsonov11008192013-01-24 13:42:45 +0000390 InternalSymbolizer *internal_symbolizer_; // Leaked.
Alexey Samsonovfa82b082012-06-15 14:00:25 +0000391};
392
Alexey Samsonova68633f2012-07-03 08:24:14 +0000393static Symbolizer symbolizer; // Linker initialized.
Alexey Samsonovfa82b082012-06-15 14:00:25 +0000394
395uptr SymbolizeCode(uptr address, AddressInfo *frames, uptr max_frames) {
396 return symbolizer.SymbolizeCode(address, frames, max_frames);
Alexey Samsonov2f7d8262012-06-01 06:11:13 +0000397}
398
Dmitry Vyukov5a1f2332013-01-11 07:23:51 +0000399bool SymbolizeData(uptr address, DataInfo *info) {
400 return symbolizer.SymbolizeData(address, info);
Dmitry Vyukov433c2192012-12-03 11:45:34 +0000401}
402
Alexey Samsonov9c6e5302012-08-23 07:32:06 +0000403bool InitializeExternalSymbolizer(const char *path_to_symbolizer) {
404 return symbolizer.InitializeExternalSymbolizer(path_to_symbolizer);
405}
406
Dmitry Vyukov90a24672013-01-29 09:35:14 +0000407bool IsSymbolizerAvailable() {
408 return symbolizer.IsSymbolizerAvailable();
409}
410
Alexey Samsonov2f7d8262012-06-01 06:11:13 +0000411} // namespace __sanitizer