blob: 4123014b863925707a8285779f534f83d0712095 [file] [log] [blame]
The Android Open Source Project52d4c302009-03-03 19:29:09 -08001// Copyright 2006 The Android Open Source Project
2
3#ifndef TRACE_READER_H
4#define TRACE_READER_H
5
6#include <string.h>
7#include <inttypes.h>
8#include <elf.h>
9#include <assert.h>
10#include <cxxabi.h>
11#include "read_elf.h"
12#include "trace_reader_base.h"
13#include "hash_table.h"
14
15struct TraceReaderEmptyStruct {
16};
17
18template <class T = TraceReaderEmptyStruct>
19class TraceReader : public TraceReaderBase {
20 public:
21
22 struct region_entry;
23 typedef struct symbol_entry : public T {
24 typedef region_entry region_type;
25
26 // Define flag values
27 static const uint32_t kIsPlt = 0x01;
28 static const uint32_t kIsVectorStart = 0x02;
29 static const uint32_t kIsVectorTable = (kIsPlt | kIsVectorStart);
30 static const uint32_t kIsInterpreter = 0x04;
31 static const uint32_t kIsMethod = 0x08;
32
33 uint32_t addr;
34
35 // This may hold the name of the interpreted method instead of
36 // the name of the native function if the native function is a
37 // virtual machine interpreter.
38 const char *name;
39
40 // The symbol for the virtual machine interpreter, or NULL
41 symbol_entry *vm_sym;
42 region_type *region;
43 uint32_t flags;
44 } symbol_type;
45
46 typedef struct region_entry {
47 // Define flag values
48 static const uint32_t kIsKernelRegion = 0x01;
49 static const uint32_t kSharedSymbols = 0x02;
50 static const uint32_t kIsLibraryRegion = 0x04;
51 static const uint32_t kIsUserMappedRegion = 0x08;
52
53 region_entry() : refs(0), path(NULL), vstart(0), vend(0), base_addr(0),
54 file_offset(0), flags(0), nsymbols(0), symbols(NULL) {}
55
56 symbol_type *LookupFunctionByName(char *name) {
57 // Just do a linear search
58 for (int ii = 0; ii < nsymbols; ++ii) {
59 if (strcmp(symbols[ii].name, name) == 0)
60 return &symbols[ii];
61 }
62 return NULL;
63 }
64
65 int refs; // reference count
66 char *path;
67 uint32_t vstart;
68 uint32_t vend;
69 uint32_t base_addr;
70 uint32_t file_offset;
71 uint32_t flags;
72 int nsymbols;
73 symbol_type *symbols;
74 } region_type;
75
76 typedef typename HashTable<region_type*>::entry_type hash_entry_type;
77
78 class ProcessState {
79 public:
80
81 // The "regions" array below is a pointer to array of pointers to
82 // regions. The size of the pointer array is kInitialNumRegions,
83 // but grows if needed. There is a separate region for each mmap
84 // call which includes shared libraries as well as .dex and .jar
85 // files. In addition, there is a region for the main executable
86 // for this process, as well as a few regions for the kernel.
87 //
88 // If a child process is a clone of a parent process, the
89 // regions array is unused. Instead, the "addr_manager" pointer is
90 // used to find the process that is the address space manager for
91 // both the parent and child processes.
92 static const int kInitialNumRegions = 10;
93
94 static const int kMaxMethodStackSize = 1000;
95
96 // Define values for the ProcessState flag bits
97 static const int kCalledExec = 0x01;
98 static const int kCalledExit = 0x02;
99 static const int kIsClone = 0x04;
100 static const int kHasKernelRegion = 0x08;
101 static const int kHasFirstMmap = 0x10;
102
103 ProcessState() {
104 cpu_time = 0;
105 tgid = 0;
106 pid = 0;
107 parent_pid = 0;
108 exit_val = 0;
109 flags = 0;
110 argc = 0;
111 argv = NULL;
112 name = NULL;
113 nregions = 0;
114 max_regions = 0;
115 // Don't allocate space yet until we know if we are a clone.
116 regions = NULL;
117 parent = NULL;
118 addr_manager = this;
119 next = NULL;
120 current_method_sym = NULL;
121 method_stack_top = 0;
122 }
123
124 ~ProcessState() {
125 delete[] name;
126 if ((flags & kIsClone) != 0) {
127 return;
128 }
129
130 // Free the regions. We must be careful not to free the symbols
131 // within each region because the symbols are sometimes shared
132 // between multiple regions. The TraceReader class has a hash
133 // table containing all the unique regions and it will free the
134 // region symbols in its destructor. We need to free only the
135 // regions and the array of region pointers.
136 //
137 // Each region is also reference-counted. The count is zero
138 // if no other processes are sharing this region.
139 for (int ii = 0; ii < nregions; ii++) {
140 if (regions[ii]->refs > 0) {
141 regions[ii]->refs -= 1;
142 continue;
143 }
144
145 delete regions[ii];
146 }
147
148 delete[] regions;
149
150 for (int ii = 0; ii < argc; ++ii)
151 delete[] argv[ii];
152 delete[] argv;
153 }
154
155 // Dumps the stack contents to standard output. For debugging.
156 void DumpStack();
157
158 uint64_t cpu_time;
159 uint64_t start_time;
160 uint64_t end_time;
161 int tgid;
162 int pid;
163 int parent_pid;
164 int exit_val;
165 uint32_t flags;
166 int argc;
167 char **argv;
168 char *name;
169 int nregions; // num regions in use
170 int max_regions; // max regions allocated
171 region_type **regions;
172 ProcessState *parent;
173 ProcessState *addr_manager; // the address space manager process
174 ProcessState *next;
175 int method_stack_top;
176 uint32_t method_stack[kMaxMethodStackSize];
177 symbol_type *current_method_sym;
178 };
179
180 TraceReader();
181 ~TraceReader();
182
183 void ReadKernelSymbols(const char *kernel_file);
184 void CopyKernelRegion(ProcessState *pstate);
185 void ClearRegions(ProcessState *pstate);
186 void CopyRegions(ProcessState *parent, ProcessState *child);
187 symbol_type *LookupFunction(int pid, uint32_t addr, uint64_t time);
188 symbol_type *GetSymbols(int *num_syms);
189 ProcessState *GetCurrentProcess() { return current_; }
190 ProcessState *GetProcesses(int *num_procs);
191 ProcessState *GetNextProcess();
192 char *GetProcessName(int pid);
193 void SetRoot(const char *root) { root_ = root; }
194 void SetDemangle(bool demangle) { demangle_ = demangle; }
195 bool ReadMethodSymbol(MethodRec *method_record,
196 symbol_type **psym,
197 ProcessState **pproc);
198
199 protected:
200 virtual int FindCurrentPid(uint64_t time);
201
202 private:
203
204 static const int kNumPids = 32768;
205 static const uint32_t kIncludeLocalSymbols = 0x1;
206
207 void AddPredefinedRegion(region_type *region, const char *path,
208 uint32_t vstart, uint32_t vend,
209 uint32_t base);
210 void InitRegionSymbols(region_type *region, int nsymbols);
211 void AddRegionSymbol(region_type *region, int idx,
212 uint32_t addr, const char *name,
213 uint32_t flags);
214 void AddPredefinedRegions(ProcessState *pstate);
215 void demangle_names(int nfuncs, symbol_type *functions);
216 bool ReadElfSymbols(region_type *region, uint32_t flags);
217 void AddRegion(ProcessState *pstate, region_type *region);
218 region_type *FindRegion(uint32_t addr, int nregions,
219 region_type **regions);
220 symbol_type *FindFunction(uint32_t addr, int nsyms,
221 symbol_type *symbols, bool exact_match);
222 symbol_type *FindCurrentMethod(int pid, uint64_t time);
223 void PopulateSymbolsFromDexFile(const DexFileList *dexfile,
224 region_type *region);
225 void HandlePidEvent(PidEvent *event);
226 void HandleMethodRecord(ProcessState *pstate,
227 MethodRec *method_rec);
228
229 int cached_pid_;
230 symbol_type *cached_func_;
231 symbol_type unknown_;
232 int next_pid_;
233
234 PidEvent next_pid_event_;
235 ProcessState *processes_[kNumPids];
236 ProcessState *current_;
237 MethodRec next_method_;
238 uint64_t function_start_time_;
239 const char *root_;
240 HashTable<region_type*> *hash_;
241 bool demangle_;
242};
243
244template<class T>
245TraceReader<T>::TraceReader()
246{
247 static PidEvent event_no_action;
248
249 cached_pid_ = -1;
250 cached_func_ = NULL;
251
252 memset(&unknown_, 0, sizeof(symbol_type));
253 unknown_.name = "(unknown)";
254 next_pid_ = 0;
255
256 memset(&event_no_action, 0, sizeof(PidEvent));
257 event_no_action.rec_type = kPidNoAction;
258 next_pid_event_ = event_no_action;
259 for (int ii = 1; ii < kNumPids; ++ii)
260 processes_[ii] = NULL;
261 current_ = new ProcessState;
262 processes_[0] = current_;
263 next_method_.time = 0;
264 next_method_.addr = 0;
265 next_method_.flags = 0;
266 function_start_time_ = 0;
267 root_ = "";
268 hash_ = new HashTable<region_type*>(512);
269 AddPredefinedRegions(current_);
270 demangle_ = true;
271}
272
273template<class T>
274TraceReader<T>::~TraceReader()
275{
276 hash_entry_type *ptr;
277 for (ptr = hash_->GetFirst(); ptr; ptr = hash_->GetNext()) {
278 region_type *region = ptr->value;
279 int nsymbols = region->nsymbols;
280 for (int ii = 0; ii < nsymbols; ii++) {
281 delete[] region->symbols[ii].name;
282 }
283 delete[] region->symbols;
284 delete[] region->path;
285
286 // Do not delete the region itself here. Each region
287 // is reference-counted and deleted by the ProcessState
288 // object that owns it.
289 }
290 delete hash_;
291
292 // Delete the ProcessState objects after the region symbols in
293 // the hash table above so that we still have valid region pointers
294 // when deleting the region symbols.
295 for (int ii = 0; ii < kNumPids; ++ii) {
296 delete processes_[ii];
297 }
298}
299
300// This function is used by the qsort() routine to sort symbols
301// into increasing address order.
302template<class T>
303int cmp_symbol_addr(const void *a, const void *b) {
304 typedef typename TraceReader<T>::symbol_type stype;
305
306 const stype *syma = static_cast<stype const *>(a);
307 const stype *symb = static_cast<stype const *>(b);
308 uint32_t addr1 = syma->addr;
309 uint32_t addr2 = symb->addr;
310 if (addr1 < addr2)
311 return -1;
312 if (addr1 > addr2)
313 return 1;
314
315 // The addresses are the same, sort the symbols into
316 // increasing alphabetical order. But put symbols that
317 // that start with "_" last.
318 if (syma->name[0] == '_' || symb->name[0] == '_') {
319 // Count the number of leading underscores and sort the
320 // symbol with the most underscores last.
321 int aCount = 0;
322 while (syma->name[aCount] == '_')
323 aCount += 1;
324 int bCount = 0;
325 while (symb->name[bCount] == '_')
326 bCount += 1;
327 if (aCount < bCount) {
328 return -1;
329 }
330 if (aCount > bCount) {
331 return 1;
332 }
333 // If the symbols have the same number of underscores, then
334 // fall through and sort by the whole name.
335 }
336 return strcmp(syma->name, symb->name);
337}
338
339// This function is used by the qsort() routine to sort region entries
340// into increasing address order.
341template<class T>
342int cmp_region_addr(const void *a, const void *b) {
343 typedef typename TraceReader<T>::region_type rtype;
344
345 const rtype *ma = *static_cast<rtype* const *>(a);
346 const rtype *mb = *static_cast<rtype* const *>(b);
347 uint32_t addr1 = ma->vstart;
348 uint32_t addr2 = mb->vstart;
349 if (addr1 < addr2)
350 return -1;
351 if (addr1 == addr2)
352 return 0;
353 return 1;
354}
355
356// This routine returns a new array containing all the symbols.
357template<class T>
358typename TraceReader<T>::symbol_type*
359TraceReader<T>::GetSymbols(int *num_syms)
360{
361 // Count the symbols
362 int nsyms = 0;
363 for (hash_entry_type *ptr = hash_->GetFirst(); ptr; ptr = hash_->GetNext()) {
364 region_type *region = ptr->value;
365 nsyms += region->nsymbols;
366 }
367 *num_syms = nsyms;
368
369 // Allocate space
370 symbol_type *syms = new symbol_type[nsyms];
371 symbol_type *next_sym = syms;
372
373 // Copy the symbols
374 for (hash_entry_type *ptr = hash_->GetFirst(); ptr; ptr = hash_->GetNext()) {
375 region_type *region = ptr->value;
376 memcpy(next_sym, region->symbols, region->nsymbols * sizeof(symbol_type));
377 next_sym += region->nsymbols;
378 }
379
380 return syms;
381}
382
383// This routine returns all the valid processes.
384template<class T>
385typename TraceReader<T>::ProcessState*
386TraceReader<T>::GetProcesses(int *num_procs)
387{
388 // Count the valid processes
389 int nprocs = 0;
390 for (int ii = 0; ii < kNumPids; ++ii) {
391 if (processes_[ii])
392 nprocs += 1;
393 }
394
395 // Allocate a new array to hold the valid processes.
396 ProcessState *procs = new ProcessState[nprocs];
397
398 // Copy the processes to the new array.
399 ProcessState *pstate = procs;
400 for (int ii = 0; ii < kNumPids; ++ii) {
401 if (processes_[ii])
402 memcpy(pstate++, processes_[ii], sizeof(ProcessState));
403 }
404
405 *num_procs = nprocs;
406 return procs;
407}
408
409// This routine returns the next valid process, or NULL if there are no
410// more valid processes.
411template<class T>
412typename TraceReader<T>::ProcessState*
413TraceReader<T>::GetNextProcess()
414{
415 while (next_pid_ < kNumPids) {
416 if (processes_[next_pid_])
417 return processes_[next_pid_++];
418 next_pid_ += 1;
419 }
420 next_pid_ = 0;
421 return NULL;
422}
423
424template<class T>
425char* TraceReader<T>::GetProcessName(int pid)
426{
427 if (pid < 0 || pid >= kNumPids || processes_[pid] == NULL)
428 return "(unknown)";
429 return processes_[pid]->name;
430}
431
432template<class T>
433void TraceReader<T>::AddPredefinedRegion(region_type *region, const char *path,
434 uint32_t vstart, uint32_t vend,
435 uint32_t base)
436{
437 // Copy the path to make it easy to delete later.
438 int len = strlen(path);
439 region->path = new char[len + 1];
440 strcpy(region->path, path);
441 region->vstart = vstart;
442 region->vend = vend;
443 region->base_addr = base;
444 region->flags = region_type::kIsKernelRegion;
445}
446
447template<class T>
448void TraceReader<T>::InitRegionSymbols(region_type *region, int nsymbols)
449{
450 region->nsymbols = nsymbols;
451 region->symbols = new symbol_type[nsymbols];
452 memset(region->symbols, 0, nsymbols * sizeof(symbol_type));
453}
454
455template<class T>
456void TraceReader<T>::AddRegionSymbol(region_type *region, int idx,
457 uint32_t addr, const char *name,
458 uint32_t flags)
459{
460 region->symbols[idx].addr = addr;
461 region->symbols[idx].name = Strdup(name);
462 region->symbols[idx].vm_sym = NULL;
463 region->symbols[idx].region = region;
464 region->symbols[idx].flags = flags;
465}
466
467template<class T>
468void TraceReader<T>::AddPredefinedRegions(ProcessState *pstate)
469{
470 region_type *region = new region_type;
471 AddPredefinedRegion(region, "(bootloader)", 0, 0x14, 0);
472 InitRegionSymbols(region, 2);
473 AddRegionSymbol(region, 0, 0, "(bootloader_start)", 0);
474 AddRegionSymbol(region, 1, 0x14, "(bootloader_end)", 0);
475 AddRegion(pstate, region);
476 hash_->Update(region->path, region);
477
478 region = new region_type;
479 AddPredefinedRegion(region, "(exception vectors)", 0xffff0000, 0xffff0500,
480 0xffff0000);
481 InitRegionSymbols(region, 2);
482 AddRegionSymbol(region, 0, 0x0, "(vector_start)",
483 symbol_type::kIsVectorStart);
484 AddRegionSymbol(region, 1, 0x500, "(vector_end)", 0);
485 AddRegion(pstate, region);
486 hash_->Update(region->path, region);
487
488 region = new region_type;
489 AddPredefinedRegion(region, "(atomic ops)", 0xffff0f80, 0xffff1000,
490 0xffff0f80);
491 // Mark this region as also being mapped in user-space.
492 // This isn't used anywhere in this code but client code can test for
493 // this flag and decide whether to treat this as kernel or user code.
494 region->flags |= region_type::kIsUserMappedRegion;
495
496 InitRegionSymbols(region, 4);
497 AddRegionSymbol(region, 0, 0x0, "(kuser_atomic_inc)", 0);
498 AddRegionSymbol(region, 1, 0x20, "(kuser_atomic_dec)", 0);
499 AddRegionSymbol(region, 2, 0x40, "(kuser_cmpxchg)", 0);
500 AddRegionSymbol(region, 3, 0x80, "(kuser_end)", 0);
501 AddRegion(pstate, region);
502 hash_->Update(region->path, region);
503}
504
505template<class T>
506void TraceReader<T>::ReadKernelSymbols(const char *kernel_file)
507{
508 region_type *region = new region_type;
509 // Copy the path to make it easy to delete later.
510 int len = strlen(kernel_file);
511 region->path = new char[len + 1];
512 strcpy(region->path, kernel_file);
513 region->flags = region_type::kIsKernelRegion;
514 ReadElfSymbols(region, kIncludeLocalSymbols);
515 region->vend = 0xffff0000;
516 AddRegion(processes_[0], region);
517 processes_[0]->flags |= ProcessState::kHasKernelRegion;
518 hash_->Update(region->path, region);
519}
520
521template<class T>
522void TraceReader<T>::demangle_names(int nfuncs, symbol_type *functions)
523{
524 char *demangled;
525 int status;
526
527 for (int ii = 0; ii < nfuncs; ++ii) {
528 demangled = NULL;
529 int len = strlen(functions[ii].name);
530
531 // If we don't check for "len > 1" then the demangler will incorrectly
532 // expand 1-letter function names. For example, "b" becomes "bool",
533 // "c" becomes "char" and "d" becomes "double". Also check that the
534 // first character is an underscore. Otherwise, on some strings
535 // the demangler will try to read past the end of the string (because
536 // the string is not really a C++ mangled name) and valgrind will
537 // complain.
538 if (demangle_ && len > 1 && functions[ii].name[0] == '_') {
539 demangled = abi::__cxa_demangle(functions[ii].name, 0, NULL,
540 &status);
541 }
542
543 if (demangled != NULL) {
544 delete[] functions[ii].name;
545 functions[ii].name = Strdup(demangled);
546 free(demangled);
547 }
548 }
549}
550
551// Adds the symbols from the given ELF file to the given process.
552// Returns false if the file was not an ELF file or if there was an
553// error trying to read the sections of the ELF file.
554template<class T>
555bool TraceReader<T>::ReadElfSymbols(region_type *region, uint32_t flags)
556{
557 static char full_path[4096];
558 Elf32_Shdr *symtab, *symstr;
559 Elf32_Ehdr *hdr;
560 Elf32_Shdr *shdr;
561
562 full_path[0] = 0;
563 if (root_ && strcmp(root_, "/")) {
564 strcpy(full_path, root_);
565 }
566 strcat(full_path, region->path);
567 FILE *fobj = fopen(full_path, "r");
568 if(fobj == NULL) {
569 EmptyRegion:
570 // we need to create an (unknown) symbol with address 0, otherwise some
571 // other parts of the trace reader will simply crash when dealing with
572 // an empty region
573 region->vstart = 0;
574 region->nsymbols = 1;
575 region->symbols = new symbol_type[1];
576 memset(region->symbols, 0, sizeof(symbol_type));
577
578 region->symbols[0].addr = 0;
579 region->symbols[0].name = Strdup("(unknown)");
580 region->symbols[0].vm_sym = NULL;
581 region->symbols[0].region = region;
582 region->symbols[0].flags = 0;
583
584 if (fobj != NULL)
585 fclose(fobj);
586 return false;
587 }
588
589 hdr = ReadElfHeader(fobj);
590 if (hdr == NULL) {
591 fprintf(stderr, "Cannot read ELF header from '%s'\n", full_path);
592 goto EmptyRegion;
593 }
594
595 shdr = ReadSectionHeaders(hdr, fobj);
596 if(shdr == NULL) {
597 fprintf(stderr, "Can't read section headers from executable\n");
598 goto EmptyRegion;
599 }
600 char *section_names = ReadStringTable(hdr, shdr, fobj);
601
602 // Get the symbol table section
603 symtab = FindSymbolTableSection(hdr, shdr, section_names);
604 if (symtab == NULL || symtab->sh_size == 0) {
605 fprintf(stderr, "Can't read symbol table from '%s'\n", full_path);
606 goto EmptyRegion;
607 }
608
609 // Get the symbol string table section
610 symstr = FindSymbolStringTableSection(hdr, shdr, section_names);
611 if (symstr == NULL || symstr->sh_size == 0) {
612 fprintf(stderr, "Can't read symbol string table from '%s'\n", full_path);
613 goto EmptyRegion;
614 }
615
616 // Load the symbol string table data
617 char *symbol_names = new char[symstr->sh_size];
618 ReadSection(symstr, symbol_names, fobj);
619
620 int num_entries = symtab->sh_size / symtab->sh_entsize;
621 Elf32_Sym *elf_symbols = new Elf32_Sym[num_entries];
622 ReadSection(symtab, elf_symbols, fobj);
623 AdjustElfSymbols(hdr, elf_symbols, num_entries);
624#if 0
625 printf("size: %d, ent_size: %d, num_entries: %d\n",
626 symtab->sh_size, symtab->sh_entsize, num_entries);
627#endif
628 int nfuncs = 0;
629
630 // Allocate space for all of the symbols for now. We will
631 // reallocate space for just the function symbols after we
632 // know how many there are. Also, make sure there is room
633 // for some extra symbols, including the text section names.
634 int num_alloc = num_entries + hdr->e_shnum + 1;
635 symbol_type *func_symbols = new symbol_type[num_alloc];
636 memset(func_symbols, 0, num_alloc * sizeof(symbol_type));
637
638 // If this is the shared library for a virtual machine, then
639 // set the IsInterpreter flag for all symbols in that shared library.
640 // This will allow us to replace the symbol names with the name of
641 // the currently executing method on the virtual machine.
642 int symbol_flags = 0;
643 char *cp = strrchr(region->path, '/');
644 if (cp != NULL) {
645 // Move past the '/'
646 cp += 1;
647 } else {
648 // There was no '/', so use the whole path
649 cp = region->path;
650 }
651 if (strcmp(cp, "libdvm.so") == 0) {
652 symbol_flags = symbol_type::kIsInterpreter;
653 }
654
655 bool zero_found = false;
656 for (int ii = 1; ii < num_entries; ++ii) {
657 int idx = elf_symbols[ii].st_name;
658
659 // If the symbol does not have a name, or if the name starts with a
660 // dollar sign ($), then skip it.
661 if (idx == 0 || symbol_names[idx] == 0 || symbol_names[idx] == '$')
662 continue;
663
664 // If the section index is not executable, then skip it.
665 uint32_t section = elf_symbols[ii].st_shndx;
666 if (section == 0 || section >= hdr->e_shnum)
667 continue;
668 if ((shdr[section].sh_flags & SHF_EXECINSTR) == 0)
669 continue;
670
671 uint8_t sym_type = ELF32_ST_TYPE(elf_symbols[ii].st_info);
672 uint8_t sym_bind = ELF32_ST_BIND(elf_symbols[ii].st_info);
673
674 // Allow the caller to decide if we want local non-function
675 // symbols to be included. We currently include these symbols
676 // only for the kernel, where it is useful because the kernel
677 // has lots of assembly language labels that have meaningful names.
678 if ((flags & kIncludeLocalSymbols) == 0 && sym_bind == STB_LOCAL
679 && sym_type != STT_FUNC) {
680 continue;
681 }
682#if 0
683 printf("%08x %x %x %s\n",
684 elf_symbols[ii].st_value,
685 sym_bind,
686 sym_type,
687 &symbol_names[idx]);
688#endif
689 if (sym_type != STT_FUNC && sym_type != STT_NOTYPE)
690 continue;
691
692 if (elf_symbols[ii].st_value == 0)
693 zero_found = true;
694
695 // The address of thumb functions seem to have the low bit set,
696 // even though the instructions are really at an even address.
697 uint32_t addr = elf_symbols[ii].st_value & ~0x1;
698 func_symbols[nfuncs].addr = addr;
699 func_symbols[nfuncs].name = Strdup(&symbol_names[idx]);
700 func_symbols[nfuncs].flags = symbol_flags;
701
702 nfuncs += 1;
703 }
704
705 // Add a [0, "(unknown)"] symbol pair if there is not already a
706 // symbol with the address zero. We don't need to reallocate space
707 // because we already have more than we need.
708 if (!zero_found) {
709 func_symbols[nfuncs].addr = 0;
710 func_symbols[nfuncs].name = Strdup("(0 unknown)");
711 nfuncs += 1;
712 }
713
714 // Add another entry at the end
715 func_symbols[nfuncs].addr = 0xffffffff;
716 func_symbols[nfuncs].name = Strdup("(end)");
717 nfuncs += 1;
718
719 // Add in the names of the text sections, but only if there
720 // are no symbols with that address already.
721 for (int section = 0; section < hdr->e_shnum; ++section) {
722 if ((shdr[section].sh_flags & SHF_EXECINSTR) == 0)
723 continue;
724
725 uint32_t addr = shdr[section].sh_addr;
726 // Search for a symbol with a matching address. The symbols aren't
727 // sorted yet so we just search the whole list.
728 int ii;
729 for (ii = 0; ii < nfuncs; ++ii) {
730 if (addr == func_symbols[ii].addr)
731 break;
732 }
733 if (ii == nfuncs) {
734 // Symbol at address "addr" does not exist, so add the text
735 // section name. This will usually add the ".plt" section
736 // (procedure linkage table).
737 int idx = shdr[section].sh_name;
738 func_symbols[nfuncs].addr = addr;
739 func_symbols[nfuncs].name = Strdup(&section_names[idx]);
740 if (strcmp(func_symbols[nfuncs].name, ".plt") == 0) {
741 func_symbols[nfuncs].flags |= symbol_type::kIsPlt;
742 // Change the name of the symbol to include the
743 // name of the library. Otherwise we will have lots
744 // of ".plt" symbols.
745 int len = strlen(region->path);
746 len += strlen(":.plt");
747 char *name = new char[len + 1];
748 strcpy(name, region->path);
749 strcat(name, ":.plt");
750 delete[] func_symbols[nfuncs].name;
751 func_symbols[nfuncs].name = name;
752
753 // Check if this is part of the virtual machine interpreter
754 char *cp = strrchr(region->path, '/');
755 if (cp != NULL) {
756 // Move past the '/'
757 cp += 1;
758 } else {
759 // There was no '/', so use the whole path
760 cp = region->path;
761 }
762 if (strcmp(cp, "libdvm.so") == 0) {
763 func_symbols[nfuncs].flags |= symbol_type::kIsInterpreter;
764 }
765 }
766 nfuncs += 1;
767 }
768 }
769
770 // Allocate just the space we need now that we know exactly
771 // how many symbols we have.
772 symbol_type *functions = new symbol_type[nfuncs];
773
774 // Copy the symbols to the functions array
775 memcpy(functions, func_symbols, nfuncs * sizeof(symbol_type));
776 delete[] func_symbols;
777
778 // Assign the region pointers
779 for (int ii = 0; ii < nfuncs; ++ii) {
780 functions[ii].region = region;
781 }
782
783 // Sort the symbols into increasing address order
784 qsort(functions, nfuncs, sizeof(symbol_type), cmp_symbol_addr<T>);
785
786 // If there are multiple symbols with the same address, then remove
787 // the duplicates. First, count the number of duplicates.
788 uint32_t prev_addr = ~0;
789 int num_duplicates = 0;
790 for (int ii = 0; ii < nfuncs; ++ii) {
791 if (prev_addr == functions[ii].addr)
792 num_duplicates += 1;
793 prev_addr = functions[ii].addr;
794 }
795
796 if (num_duplicates > 0) {
797 int num_uniq = nfuncs - num_duplicates;
798
799 // Allocate space for the unique functions
800 symbol_type *uniq_functions = new symbol_type[num_uniq];
801
802 // Copy the unique functions
803 prev_addr = ~0;
804 int next_uniq = 0;
805 for (int ii = 0; ii < nfuncs; ++ii) {
806 if (prev_addr == functions[ii].addr) {
807 delete[] functions[ii].name;
808 continue;
809 }
810 memcpy(&uniq_functions[next_uniq++], &functions[ii],
811 sizeof(symbol_type));
812 prev_addr = functions[ii].addr;
813 }
814 assert(next_uniq == num_uniq);
815
816 delete[] functions;
817 functions = uniq_functions;
818 nfuncs = num_uniq;
819 }
820
821 // Finally, demangle all of the symbol names
822 demangle_names(nfuncs, functions);
823
824 uint32_t min_addr = 0;
825 if (!zero_found)
826 min_addr = functions[1].addr;
827 if (region->vstart == 0)
828 region->vstart = min_addr;
829 region->nsymbols = nfuncs;
830 region->symbols = functions;
831
832#if 0
833 printf("%s num symbols: %d min_addr: 0x%x\n", region->path, nfuncs, min_addr);
834 for (int ii = 0; ii < nfuncs; ++ii) {
835 printf("0x%08x %s\n", functions[ii].addr, functions[ii].name);
836 }
837#endif
838 delete[] elf_symbols;
839 delete[] symbol_names;
840 delete[] section_names;
841 delete[] shdr;
842 delete hdr;
843 fclose(fobj);
844
845 return true;
846}
847
848template<class T>
849void TraceReader<T>::CopyKernelRegion(ProcessState *pstate)
850{
851 ProcessState *manager = pstate->addr_manager;
852 if (manager->flags & ProcessState::kHasKernelRegion)
853 return;
854
855 int nregions = processes_[0]->nregions;
856 region_type **regions = processes_[0]->regions;
857 for (int ii = 0; ii < nregions; ii++) {
858 if (regions[ii]->flags & region_type::kIsKernelRegion) {
859 AddRegion(manager, regions[ii]);
860 regions[ii]->refs += 1;
861 }
862 }
863 manager->flags |= ProcessState::kHasKernelRegion;
864}
865
866template<class T>
867void TraceReader<T>::ClearRegions(ProcessState *pstate)
868{
869 assert(pstate->pid != 0);
870 int nregions = pstate->nregions;
871 region_type **regions = pstate->regions;
872
873 // Decrement the reference count on all the regions
874 for (int ii = 0; ii < nregions; ii++) {
875 if (regions[ii]->refs > 0) {
876 regions[ii]->refs -= 1;
877 continue;
878 }
879
880 delete regions[ii];
881 }
882 delete[] pstate->regions;
883 pstate->regions = NULL;
884 pstate->nregions = 0;
885 pstate->max_regions = 0;
886 pstate->addr_manager = pstate;
887 pstate->flags &= ~ProcessState::kIsClone;
888 pstate->flags &= ~ProcessState::kHasKernelRegion;
889 CopyKernelRegion(pstate);
890}
891
892template<class T>
893void TraceReader<T>::AddRegion(ProcessState *pstate, region_type *region)
894{
895 ProcessState *manager = pstate->addr_manager;
896 if (manager->regions == NULL) {
897 manager->max_regions = ProcessState::kInitialNumRegions;
898 manager->regions = new region_type*[manager->max_regions];
899 manager->nregions = 0;
900 }
901
902 // Check if we need to grow the array
903 int nregions = manager->nregions;
904 int max_regions = manager->max_regions;
905 if (nregions >= max_regions) {
906 max_regions <<= 1;
907 manager->max_regions = max_regions;
908 region_type **regions = new region_type*[max_regions];
909 for (int ii = 0; ii < nregions; ii++) {
910 regions[ii] = manager->regions[ii];
911 }
912 delete[] manager->regions;
913 manager->regions = regions;
914 }
915
916 // Add the new region to the end of the array and resort
917 manager->regions[nregions] = region;
918 nregions += 1;
919 manager->nregions = nregions;
920
921 // Resort the regions into increasing start address
922 qsort(manager->regions, nregions, sizeof(region_type*), cmp_region_addr<T>);
923}
924
925template<class T>
926void TraceReader<T>::CopyRegions(ProcessState *parent, ProcessState *child)
927{
928 // Copy the parent's address space
929 ProcessState *manager = parent->addr_manager;
930 int nregions = manager->nregions;
931 child->nregions = nregions;
932 child->max_regions = manager->max_regions;
933 region_type **regions = new region_type*[manager->max_regions];
934 child->regions = regions;
935 memcpy(regions, manager->regions, nregions * sizeof(region_type*));
936
937 // Increment the reference count on all the regions
938 for (int ii = 0; ii < nregions; ii++) {
939 regions[ii]->refs += 1;
940 }
941}
942
943template<class T>
944typename TraceReader<T>::region_type *
945TraceReader<T>::FindRegion(uint32_t addr, int nregions, region_type **regions)
946{
947 int high = nregions;
948 int low = -1;
949 while (low + 1 < high) {
950 int middle = (high + low) / 2;
951 uint32_t middle_addr = regions[middle]->vstart;
952 if (middle_addr == addr)
953 return regions[middle];
954 if (middle_addr > addr)
955 high = middle;
956 else
957 low = middle;
958 }
959
960 // If we get here then we did not find an exact address match. So use
961 // the closest region address that is less than the given address.
962 if (low < 0)
963 low = 0;
964 return regions[low];
965}
966
967template<class T>
968typename TraceReader<T>::symbol_type *
969TraceReader<T>::FindFunction(uint32_t addr, int nsyms, symbol_type *symbols,
970 bool exact_match)
971{
972 int high = nsyms;
973 int low = -1;
974 while (low + 1 < high) {
975 int middle = (high + low) / 2;
976 uint32_t middle_addr = symbols[middle].addr;
977 if (middle_addr == addr)
978 return &symbols[middle];
979 if (middle_addr > addr)
980 high = middle;
981 else
982 low = middle;
983 }
984
985 // If we get here then we did not find an exact address match. So use
986 // the closest function address that is less than the given address.
987 // We added a symbol with address zero so if there is no known
988 // function containing the given address, then we will return the
989 // "(unknown)" symbol.
990 if (low >= 0 && !exact_match)
991 return &symbols[low];
992 return NULL;
993}
994
995template<class T>
996typename TraceReader<T>::symbol_type *
997TraceReader<T>::LookupFunction(int pid, uint32_t addr, uint64_t time)
998{
999 // Check if the previous match is still a good match.
1000 if (cached_pid_ == pid) {
1001 uint32_t vstart = cached_func_->region->vstart;
1002 uint32_t vend = cached_func_->region->vend;
1003 if (addr >= vstart && addr < vend) {
1004 uint32_t sym_addr = addr - cached_func_->region->base_addr;
1005 if (sym_addr >= cached_func_->addr
1006 && sym_addr < (cached_func_ + 1)->addr) {
1007 // If this function is the virtual machine interpreter, then
1008 // read the method trace to find the "real" method name based
1009 // on the current time and pid.
1010 if (cached_func_->flags & symbol_type::kIsInterpreter) {
1011 symbol_type *sym = FindCurrentMethod(pid, time);
1012 if (sym != NULL) {
1013 sym->vm_sym = cached_func_;
1014 return sym;
1015 }
1016 }
1017 return cached_func_;
1018 }
1019 }
1020 }
1021
1022 ProcessState *pstate = processes_[pid];
1023 if (pstate == NULL) {
1024 // There is no process state for the specified pid.
1025 // This should never happen.
1026 cached_pid_ = -1;
1027 cached_func_ = NULL;
1028 return NULL;
1029 }
1030 ProcessState *manager = pstate->addr_manager;
1031 cached_pid_ = pid;
1032 region_type *region = FindRegion(addr, manager->nregions, manager->regions);
1033 uint32_t sym_addr = addr - region->base_addr;
1034
1035 cached_func_ = FindFunction(sym_addr, region->nsymbols, region->symbols,
1036 false /* no exact match */);
1037 if (cached_func_ != NULL) {
1038 cached_func_->region = region;
1039
1040 // If this function is the virtual machine interpreter, then
1041 // read the method trace to find the "real" method name based
1042 // on the current time and pid.
1043 if (cached_func_->flags & symbol_type::kIsInterpreter) {
1044 symbol_type *sym = FindCurrentMethod(pid, time);
1045 if (sym != NULL) {
1046 sym->vm_sym = cached_func_;
1047 return sym;
1048 }
1049 }
1050 }
1051
1052 return cached_func_;
1053}
1054
1055template <class T>
1056void TraceReader<T>::HandlePidEvent(PidEvent *event)
1057{
1058 switch (event->rec_type) {
1059 case kPidFork:
1060 case kPidClone:
1061 // event->pid is the process id of the child
1062 if (event->pid >= kNumPids) {
1063 fprintf(stderr, "Error: pid (%d) too large\n", event->pid);
1064 exit(1);
1065 }
1066 // Create a new ProcessState struct for the child
1067 // and link it in at the front of the list for that
1068 // pid.
1069 {
1070 ProcessState *child = new ProcessState;
1071 processes_[event->pid] = child;
1072 child->pid = event->pid;
1073 child->tgid = event->tgid;
1074
1075 // Link the new child at the front of the list (only needed if
1076 // pids wrap around, which will probably never happen when
1077 // tracing because it would take so long).
1078 child->next = processes_[event->pid];
1079 child->parent_pid = current_->pid;
1080 child->parent = current_;
1081 child->start_time = event->time;
1082 child->name = Strdup(current_->name);
1083 if (event->rec_type == kPidFork) {
1084 CopyRegions(current_, child);
1085 } else {
1086 // Share the parent's address space
1087 child->flags |= ProcessState::kIsClone;
1088
1089 // The address space manager for the clone is the same
1090 // as the address space manager for the parent. This works
1091 // even if the child later clones itself.
1092 child->addr_manager = current_->addr_manager;
1093 }
1094 }
1095 break;
1096 case kPidSwitch:
1097 // event->pid is the process id of the process we are
1098 // switching to.
1099 {
1100 uint64_t elapsed = event->time - function_start_time_;
1101 function_start_time_ = event->time;
1102 current_->cpu_time += elapsed;
1103 }
1104 if (current_->flags & ProcessState::kCalledExit)
1105 current_->end_time = event->time;
1106
1107 if (event->pid >= kNumPids) {
1108 fprintf(stderr, "Error: pid (%d) too large\n", event->pid);
1109 exit(1);
1110 }
1111
1112 // If the process we are switching to does not exist, then
1113 // create one. This can happen because the tracing code does
1114 // not start tracing from the very beginning of the kernel.
1115 current_ = processes_[event->pid];
1116 if (current_ == NULL) {
1117 current_ = new ProcessState;
1118 processes_[event->pid] = current_;
1119 current_->pid = event->pid;
1120 current_->start_time = event->time;
1121 CopyKernelRegion(current_);
1122 }
1123#if 0
1124 {
1125 printf("switching to p%d\n", current_->pid);
1126 ProcessState *manager = current_->addr_manager;
1127 for (int ii = 0; ii < manager->nregions; ++ii) {
1128 printf(" %08x - %08x offset: %d nsyms: %4d %s\n",
1129 manager->regions[ii]->vstart,
1130 manager->regions[ii]->vend,
1131 manager->regions[ii]->file_offset,
1132 manager->regions[ii]->nsymbols,
1133 manager->regions[ii]->path);
1134 }
1135 }
1136#endif
1137 break;
1138 case kPidExit:
1139 current_->exit_val = event->pid;
1140 current_->flags |= ProcessState::kCalledExit;
1141 break;
1142 case kPidMmap:
1143 {
1144 region_type *region;
1145 region_type *existing_region = hash_->Find(event->path);
1146 if (existing_region == NULL || existing_region->vstart != event->vstart) {
1147 // Create a new region and add it to the current process'
1148 // address space.
1149 region = new region_type;
1150
1151 // The event->path is allocated by ReadPidEvent() and owned
1152 // by us.
1153 region->path = event->path;
1154 region->vstart = event->vstart;
1155 region->vend = event->vend;
1156 region->file_offset = event->offset;
1157 if (existing_region == NULL) {
1158 DexFileList *dexfile = dex_hash_->Find(event->path);
1159 if (dexfile != NULL) {
1160 PopulateSymbolsFromDexFile(dexfile, region);
1161 } else {
1162 ReadElfSymbols(region, 0);
1163 }
1164 hash_->Update(region->path, region);
1165 } else {
1166 region->nsymbols = existing_region->nsymbols;
1167 region->symbols = existing_region->symbols;
1168 region->path = existing_region->path;
1169 delete[] event->path;
1170 region->flags |= region_type::kSharedSymbols;
1171 }
1172
1173 // The base_addr is subtracted from an address before the
1174 // symbol name lookup and is either zero or event->vstart.
1175 // HACK: Determine if base_addr is non-zero by looking at the
1176 // second symbol address (skip the first symbol because that is
1177 // the special symbol "(unknown)" with an address of zero).
1178 if (region->nsymbols > 2 && region->symbols[1].addr < event->vstart)
1179 region->base_addr = event->vstart;
1180
1181 // Treat all mmapped regions after the first as "libraries".
1182 // Profiling tools can test for this property.
1183 if (current_->flags & ProcessState::kHasFirstMmap)
1184 region->flags |= region_type::kIsLibraryRegion;
1185 else
1186 current_->flags |= ProcessState::kHasFirstMmap;
1187#if 0
1188 printf("%s vstart: 0x%x vend: 0x%x offset: 0x%x\n",
1189 region->path, region->vstart, region->vend, region->file_offset);
1190#endif
1191 } else {
1192 region = existing_region;
1193 region->refs += 1;
1194 delete[] event->path;
1195 }
1196 AddRegion(current_, region);
1197 }
1198 break;
1199 case kPidExec:
1200 if (current_->argc > 0) {
1201 for (int ii = 0; ii < current_->argc; ii++) {
1202 delete[] current_->argv[ii];
1203 }
1204 delete[] current_->argv;
1205 }
1206 delete[] current_->name;
1207
1208 current_->argc = event->argc;
1209 current_->argv = event->argv;
1210 current_->name = Strdup(current_->argv[0]);
1211 current_->flags |= ProcessState::kCalledExec;
1212 ClearRegions(current_);
1213 break;
1214 case kPidName:
1215 case kPidKthreadName:
1216 {
1217 ProcessState *pstate = processes_[event->pid];
1218 if (pstate == NULL) {
1219 pstate = new ProcessState;
1220 if (event->rec_type == kPidKthreadName) {
1221 pstate->tgid = event->tgid;
1222 }
1223 pstate->pid = event->pid;
1224 pstate->start_time = event->time;
1225 processes_[event->pid] = pstate;
1226 CopyKernelRegion(pstate);
1227 } else {
1228 delete[] pstate->name;
1229 }
1230 pstate->name = event->path;
1231 }
1232 break;
1233 case kPidNoAction:
1234 break;
1235 case kPidSymbolAdd:
1236 delete[] event->path;
1237 break;
1238 case kPidSymbolRemove:
1239 break;
1240 }
1241}
1242
1243// Finds the current pid for the given time. This routine reads the pid
1244// trace file and assumes that the "time" parameter is monotonically
1245// increasing.
1246template <class T>
1247int TraceReader<T>::FindCurrentPid(uint64_t time)
1248{
1249 if (time < next_pid_event_.time)
1250 return current_->pid;
1251
1252 while (1) {
1253 HandlePidEvent(&next_pid_event_);
1254
1255 if (internal_pid_reader_->ReadPidEvent(&next_pid_event_)) {
1256 next_pid_event_.time = ~0ull;
1257 break;
1258 }
1259 if (next_pid_event_.time > time)
1260 break;
1261 }
1262 return current_->pid;
1263}
1264
1265template <class T>
1266void TraceReader<T>::ProcessState::DumpStack()
1267{
1268 for (int ii = 0; ii < method_stack_top; ii++) {
1269 printf("%2d: 0x%08x\n", ii, method_stack[ii]);
1270 }
1271}
1272
1273template <class T>
1274void TraceReader<T>::HandleMethodRecord(ProcessState *pstate,
1275 MethodRec *method_rec)
1276{
1277 uint32_t addr;
1278 int top = pstate->method_stack_top;
1279 if (method_rec->flags == kMethodEnter) {
1280 // Push this method on the stack
1281 if (top >= pstate->kMaxMethodStackSize) {
1282 fprintf(stderr, "Stack overflow at time %llu\n", method_rec->time);
1283 exit(1);
1284 }
1285 pstate->method_stack[top] = method_rec->addr;
1286 pstate->method_stack_top = top + 1;
1287 addr = method_rec->addr;
1288 } else {
1289 if (top <= 0) {
1290 // If the stack underflows, then set the current method to NULL.
1291 pstate->current_method_sym = NULL;
1292 return;
1293 }
1294 top -= 1;
1295 addr = pstate->method_stack[top];
1296 if (addr != method_rec->addr) {
1297 fprintf(stderr,
1298 "Stack method (0x%x) at index %d does not match trace record (0x%x) at time %llu\n",
1299 addr, top, method_rec->addr, method_rec->time);
1300 for (int ii = 0; ii <= top; ii++) {
1301 fprintf(stderr, " %d: 0x%x\n", ii, pstate->method_stack[ii]);
1302 }
1303 exit(1);
1304 }
1305
1306 pstate->method_stack_top = top;
1307 if (top == 0) {
1308 // When we empty the stack, set the current method to NULL
1309 pstate->current_method_sym = NULL;
1310 return;
1311 }
1312 addr = pstate->method_stack[top - 1];
1313 }
1314 ProcessState *manager = pstate->addr_manager;
1315 region_type *region = FindRegion(addr, manager->nregions, manager->regions);
1316 uint32_t sym_addr = addr - region->base_addr;
1317 symbol_type *sym = FindFunction(sym_addr, region->nsymbols,
1318 region->symbols, true /* exact match */);
1319
1320 pstate->current_method_sym = sym;
1321 if (sym != NULL) {
1322 sym->region = region;
1323 }
1324}
1325
1326template <class T>
1327typename TraceReader<T>::symbol_type*
1328TraceReader<T>::FindCurrentMethod(int pid, uint64_t time)
1329{
1330 ProcessState *procState = processes_[pid];
1331
1332 if (time < next_method_.time) {
1333 return procState->current_method_sym;
1334 }
1335
1336 while (1) {
1337 if (next_method_.time != 0) {
1338 // We may have to process methods from a different pid so use
1339 // a local variable here so that we don't overwrite procState.
1340 ProcessState *pState = processes_[next_method_.pid];
1341 HandleMethodRecord(pState, &next_method_);
1342 }
1343
1344 if (internal_method_reader_->ReadMethod(&next_method_)) {
1345 next_method_.time = ~0ull;
1346 break;
1347 }
1348 if (next_method_.time > time)
1349 break;
1350 }
1351 return procState->current_method_sym;
1352}
1353
1354template <class T>
1355void TraceReader<T>::PopulateSymbolsFromDexFile(const DexFileList *dexfile,
1356 region_type *region)
1357
1358{
1359 int nsymbols = dexfile->nsymbols;
1360 DexSym *dexsyms = dexfile->symbols;
1361 region->nsymbols = nsymbols + 1;
1362 symbol_type *symbols = new symbol_type[nsymbols + 1];
1363 memset(symbols, 0, (nsymbols + 1) * sizeof(symbol_type));
1364 region->symbols = symbols;
1365 for (int ii = 0; ii < nsymbols; ii++) {
1366 symbols[ii].addr = dexsyms[ii].addr;
1367 symbols[ii].name = Strdup(dexsyms[ii].name);
1368 symbols[ii].vm_sym = NULL;
1369 symbols[ii].region = region;
1370 symbols[ii].flags = symbol_type::kIsMethod;
1371 }
1372
1373 // Add an entry at the end with an address of 0xffffffff. This
1374 // is required for LookupFunction() to work.
1375 symbol_type *symbol = &symbols[nsymbols];
1376 symbol->addr = 0xffffffff;
1377 symbol->name = Strdup("(end)");
1378 symbol->vm_sym = NULL;
1379 symbol->region = region;
1380 symbol->flags = symbol_type::kIsMethod;
1381}
1382
1383template <class T>
1384bool TraceReader<T>::ReadMethodSymbol(MethodRec *method_record,
1385 symbol_type **psym,
1386 ProcessState **pproc)
1387{
1388 if (internal_method_reader_->ReadMethod(&next_method_)) {
1389 return true;
1390 }
1391
1392 // Copy the whole MethodRec struct
1393 *method_record = next_method_;
1394
1395 uint64_t time = next_method_.time;
1396
1397 // Read the pid trace file up to this point to make sure the
1398 // process state is valid.
1399 FindCurrentPid(time);
1400
1401 ProcessState *pstate = processes_[next_method_.pid];
1402 *pproc = pstate;
1403 HandleMethodRecord(pstate, &next_method_);
1404 *psym = pstate->current_method_sym;
1405 return false;
1406}
1407
1408#endif /* TRACE_READER_H */