blob: 74a8a70c5e00af5607581c69dfc605d1203b9857 [file] [log] [blame]
mmentovai7daf2462006-09-20 21:16:16 +00001// Copyright (c) 2006, Google Inc.
2// All rights reserved.
brynercb91a2f2006-08-25 21:14:45 +00003//
mmentovai7daf2462006-09-20 21:16:16 +00004// Redistribution and use in source and binary forms, with or without
5// modification, are permitted provided that the following conditions are
6// met:
brynercb91a2f2006-08-25 21:14:45 +00007//
mmentovai7daf2462006-09-20 21:16:16 +00008// * Redistributions of source code must retain the above copyright
9// notice, this list of conditions and the following disclaimer.
10// * Redistributions in binary form must reproduce the above
11// copyright notice, this list of conditions and the following disclaimer
12// in the documentation and/or other materials provided with the
13// distribution.
14// * Neither the name of Google Inc. nor the names of its
15// contributors may be used to endorse or promote products derived from
16// this software without specific prior written permission.
brynercb91a2f2006-08-25 21:14:45 +000017//
mmentovai7daf2462006-09-20 21:16:16 +000018// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
19// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
20// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
21// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
22// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
23// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
24// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
25// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
26// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
28// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
brynercb91a2f2006-08-25 21:14:45 +000029
brynercb91a2f2006-08-25 21:14:45 +000030#include <atlbase.h>
mmentovai29401d22006-10-26 18:06:43 +000031#include <dia2.h>
ted.mielczarekd35f1132010-10-05 19:39:23 +000032#include <ImageHlp.h>
mmentovai29401d22006-10-26 18:06:43 +000033#include <stdio.h>
34
mmentovai73cd14b2006-11-15 22:24:42 +000035#include "common/windows/string_utils-inl.h"
36
bryner8b1645d2006-10-16 17:27:03 +000037#include "common/windows/pdb_source_line_writer.h"
mmentovai29401d22006-10-26 18:06:43 +000038#include "common/windows/guid_string.h"
brynercb91a2f2006-08-25 21:14:45 +000039
mmentovai246f4062006-10-20 01:46:38 +000040// This constant may be missing from DbgHelp.h. See the documentation for
41// IDiaSymbol::get_undecoratedNameEx.
42#ifndef UNDNAME_NO_ECSU
43#define UNDNAME_NO_ECSU 0x8000 // Suppresses enum/class/struct/union.
44#endif // UNDNAME_NO_ECSU
45
mmentovaie5dc6082007-02-14 19:51:05 +000046namespace google_breakpad {
brynercb91a2f2006-08-25 21:14:45 +000047
ted.mielczarekd35f1132010-10-05 19:39:23 +000048using std::vector;
49
50// A helper class to scope a PLOADED_IMAGE.
51class AutoImage {
52 public:
53 explicit AutoImage(PLOADED_IMAGE img) : img_(img) {}
54 ~AutoImage() {
55 if (img_)
56 ImageUnload(img_);
57 }
58
59 operator PLOADED_IMAGE() { return img_; }
60 PLOADED_IMAGE operator->() { return img_; }
61
62 private:
63 PLOADED_IMAGE img_;
64};
65
brynercb91a2f2006-08-25 21:14:45 +000066PDBSourceLineWriter::PDBSourceLineWriter() : output_(NULL) {
67}
68
69PDBSourceLineWriter::~PDBSourceLineWriter() {
70}
71
bryner8b1645d2006-10-16 17:27:03 +000072bool PDBSourceLineWriter::Open(const wstring &file, FileFormat format) {
brynercb91a2f2006-08-25 21:14:45 +000073 Close();
74
75 if (FAILED(CoInitialize(NULL))) {
76 fprintf(stderr, "CoInitialize failed\n");
77 return false;
78 }
79
80 CComPtr<IDiaDataSource> data_source;
81 if (FAILED(data_source.CoCreateInstance(CLSID_DiaSource))) {
vitalybuka@chromium.org04023b12011-04-26 20:52:48 +000082 const int kGuidSize = 64;
83 wchar_t classid[kGuidSize] = {0};
84 StringFromGUID2(CLSID_DiaSource, classid, kGuidSize);
85 // vc80 uses bce36434-2c24-499e-bf49-8bd99b0eeb68.
86 // vc90 uses 4C41678E-887B-4365-A09E-925D28DB33C2.
87 fprintf(stderr, "CoCreateInstance CLSID_DiaSource %S failed "
88 "(msdia*.dll unregistered?)\n", classid);
brynercb91a2f2006-08-25 21:14:45 +000089 return false;
90 }
91
bryner8b1645d2006-10-16 17:27:03 +000092 switch (format) {
93 case PDB_FILE:
94 if (FAILED(data_source->loadDataFromPdb(file.c_str()))) {
95 fprintf(stderr, "loadDataFromPdb failed\n");
96 return false;
97 }
98 break;
99 case EXE_FILE:
100 if (FAILED(data_source->loadDataForExe(file.c_str(), NULL, NULL))) {
101 fprintf(stderr, "loadDataForExe failed\n");
102 return false;
103 }
ted.mielczarekd35f1132010-10-05 19:39:23 +0000104 code_file_ = file;
bryner8b1645d2006-10-16 17:27:03 +0000105 break;
mmentovai80866e72006-11-06 19:34:19 +0000106 case ANY_FILE:
107 if (FAILED(data_source->loadDataFromPdb(file.c_str()))) {
108 if (FAILED(data_source->loadDataForExe(file.c_str(), NULL, NULL))) {
109 fprintf(stderr, "loadDataForPdb and loadDataFromExe failed\n");
110 return false;
111 }
ted.mielczarekd35f1132010-10-05 19:39:23 +0000112 code_file_ = file;
mmentovai80866e72006-11-06 19:34:19 +0000113 }
114 break;
bryner8b1645d2006-10-16 17:27:03 +0000115 default:
116 fprintf(stderr, "Unknown file format\n");
117 return false;
brynercb91a2f2006-08-25 21:14:45 +0000118 }
119
120 if (FAILED(data_source->openSession(&session_))) {
121 fprintf(stderr, "openSession failed\n");
122 }
123
124 return true;
125}
126
127bool PDBSourceLineWriter::PrintLines(IDiaEnumLineNumbers *lines) {
128 // The line number format is:
129 // <rva> <line number> <source file id>
130 CComPtr<IDiaLineNumber> line;
131 ULONG count;
132
133 while (SUCCEEDED(lines->Next(1, &line, &count)) && count == 1) {
134 DWORD rva;
135 if (FAILED(line->get_relativeVirtualAddress(&rva))) {
136 fprintf(stderr, "failed to get line rva\n");
137 return false;
138 }
139
mmentovaicb9fd5b2006-09-08 18:03:56 +0000140 DWORD length;
141 if (FAILED(line->get_length(&length))) {
142 fprintf(stderr, "failed to get line code length\n");
143 return false;
144 }
145
ted.mielczarek87306482009-11-23 14:50:55 +0000146 DWORD dia_source_id;
147 if (FAILED(line->get_sourceFileId(&dia_source_id))) {
brynercb91a2f2006-08-25 21:14:45 +0000148 fprintf(stderr, "failed to get line source file id\n");
149 return false;
150 }
ted.mielczarek87306482009-11-23 14:50:55 +0000151 // duplicate file names are coalesced to share one ID
152 DWORD source_id = GetRealFileID(dia_source_id);
brynercb91a2f2006-08-25 21:14:45 +0000153
154 DWORD line_num;
155 if (FAILED(line->get_lineNumber(&line_num))) {
156 fprintf(stderr, "failed to get line number\n");
157 return false;
158 }
159
mmentovaicb9fd5b2006-09-08 18:03:56 +0000160 fprintf(output_, "%x %x %d %d\n", rva, length, line_num, source_id);
brynercb91a2f2006-08-25 21:14:45 +0000161 line.Release();
162 }
163 return true;
164}
165
ted.mielczarek927cc8f2009-11-20 18:24:41 +0000166bool PDBSourceLineWriter::PrintFunction(IDiaSymbol *function,
167 IDiaSymbol *block) {
brynercb91a2f2006-08-25 21:14:45 +0000168 // The function format is:
mmentovai246f4062006-10-20 01:46:38 +0000169 // FUNC <address> <length> <param_stack_size> <function>
170 DWORD rva;
ted.mielczarek927cc8f2009-11-20 18:24:41 +0000171 if (FAILED(block->get_relativeVirtualAddress(&rva))) {
mmentovai246f4062006-10-20 01:46:38 +0000172 fprintf(stderr, "couldn't get rva\n");
brynercb91a2f2006-08-25 21:14:45 +0000173 return false;
174 }
175
176 ULONGLONG length;
ted.mielczarek927cc8f2009-11-20 18:24:41 +0000177 if (FAILED(block->get_length(&length))) {
brynercb91a2f2006-08-25 21:14:45 +0000178 fprintf(stderr, "failed to get function length\n");
179 return false;
180 }
181
mmentovai6bc866c2007-03-22 21:10:30 +0000182 if (length == 0) {
183 // Silently ignore zero-length functions, which can infrequently pop up.
184 return true;
185 }
186
mmentovai246f4062006-10-20 01:46:38 +0000187 CComBSTR name;
188 int stack_param_size;
189 if (!GetSymbolFunctionName(function, &name, &stack_param_size)) {
brynercb91a2f2006-08-25 21:14:45 +0000190 return false;
191 }
192
mmentovai246f4062006-10-20 01:46:38 +0000193 // If the decorated name didn't give the parameter size, try to
194 // calculate it.
195 if (stack_param_size < 0) {
196 stack_param_size = GetFunctionStackParamSize(function);
197 }
198
mmentovai73cd14b2006-11-15 22:24:42 +0000199 fprintf(output_, "FUNC %x %" WIN_STRING_FORMAT_LL "x %x %ws\n",
mmentovai246f4062006-10-20 01:46:38 +0000200 rva, length, stack_param_size, name);
201
brynercb91a2f2006-08-25 21:14:45 +0000202 CComPtr<IDiaEnumLineNumbers> lines;
203 if (FAILED(session_->findLinesByRVA(rva, DWORD(length), &lines))) {
204 return false;
205 }
206
brynercb91a2f2006-08-25 21:14:45 +0000207 if (!PrintLines(lines)) {
208 return false;
209 }
210 return true;
211}
212
213bool PDBSourceLineWriter::PrintSourceFiles() {
214 CComPtr<IDiaSymbol> global;
215 if (FAILED(session_->get_globalScope(&global))) {
216 fprintf(stderr, "get_globalScope failed\n");
217 return false;
218 }
219
220 CComPtr<IDiaEnumSymbols> compilands;
221 if (FAILED(global->findChildren(SymTagCompiland, NULL,
222 nsNone, &compilands))) {
223 fprintf(stderr, "findChildren failed\n");
224 return false;
225 }
226
227 CComPtr<IDiaSymbol> compiland;
228 ULONG count;
229 while (SUCCEEDED(compilands->Next(1, &compiland, &count)) && count == 1) {
230 CComPtr<IDiaEnumSourceFiles> source_files;
231 if (FAILED(session_->findFile(compiland, NULL, nsNone, &source_files))) {
232 return false;
233 }
234 CComPtr<IDiaSourceFile> file;
235 while (SUCCEEDED(source_files->Next(1, &file, &count)) && count == 1) {
236 DWORD file_id;
237 if (FAILED(file->get_uniqueId(&file_id))) {
238 return false;
239 }
240
241 CComBSTR file_name;
242 if (FAILED(file->get_fileName(&file_name))) {
243 return false;
244 }
245
ted.mielczarek87306482009-11-23 14:50:55 +0000246 wstring file_name_string(file_name);
247 if (!FileIDIsCached(file_name_string)) {
248 // this is a new file name, cache it and output a FILE line.
249 CacheFileID(file_name_string, file_id);
250 fwprintf(output_, L"FILE %d %s\n", file_id, file_name);
251 } else {
252 // this file name has already been seen, just save this
253 // ID for later lookup.
254 StoreDuplicateFileID(file_name_string, file_id);
255 }
brynercb91a2f2006-08-25 21:14:45 +0000256 file.Release();
257 }
258 compiland.Release();
259 }
260 return true;
261}
262
263bool PDBSourceLineWriter::PrintFunctions() {
264 CComPtr<IDiaEnumSymbolsByAddr> symbols;
265 if (FAILED(session_->getSymbolsByAddr(&symbols))) {
266 fprintf(stderr, "failed to get symbol enumerator\n");
267 return false;
268 }
269
270 CComPtr<IDiaSymbol> symbol;
271 if (FAILED(symbols->symbolByAddr(1, 0, &symbol))) {
272 fprintf(stderr, "failed to enumerate symbols\n");
273 return false;
274 }
275
276 DWORD rva_last = 0;
277 if (FAILED(symbol->get_relativeVirtualAddress(&rva_last))) {
278 fprintf(stderr, "failed to get symbol rva\n");
mmentovai246f4062006-10-20 01:46:38 +0000279 return false;
brynercb91a2f2006-08-25 21:14:45 +0000280 }
281
282 ULONG count;
283 do {
284 DWORD tag;
285 if (FAILED(symbol->get_symTag(&tag))) {
286 fprintf(stderr, "failed to get symbol tag\n");
287 return false;
288 }
mmentovai246f4062006-10-20 01:46:38 +0000289
290 // For a given function, DIA seems to give either a symbol with
291 // SymTagFunction or SymTagPublicSymbol, but not both. This means
292 // that PDBSourceLineWriter will output either a FUNC or PUBLIC line,
293 // but not both.
brynercb91a2f2006-08-25 21:14:45 +0000294 if (tag == SymTagFunction) {
ted.mielczarek927cc8f2009-11-20 18:24:41 +0000295 if (!PrintFunction(symbol, symbol)) {
brynercb91a2f2006-08-25 21:14:45 +0000296 return false;
297 }
mmentovai246f4062006-10-20 01:46:38 +0000298 } else if (tag == SymTagPublicSymbol) {
299 if (!PrintCodePublicSymbol(symbol)) {
300 return false;
301 }
brynercb91a2f2006-08-25 21:14:45 +0000302 }
303 symbol.Release();
304 } while (SUCCEEDED(symbols->Next(1, &symbol, &count)) && count == 1);
305
ted.mielczarek927cc8f2009-11-20 18:24:41 +0000306 // When building with PGO, the compiler can split functions into
307 // "hot" and "cold" blocks, and move the "cold" blocks out to separate
308 // pages, so the function can be noncontiguous. To find these blocks,
309 // we have to iterate over all the compilands, and then find blocks
310 // that are children of them. We can then find the lexical parents
311 // of those blocks and print out an extra FUNC line for blocks
312 // that are not contained in their parent functions.
313 CComPtr<IDiaSymbol> global;
314 if (FAILED(session_->get_globalScope(&global))) {
315 fprintf(stderr, "get_globalScope failed\n");
316 return false;
317 }
318
319 CComPtr<IDiaEnumSymbols> compilands;
320 if (FAILED(global->findChildren(SymTagCompiland, NULL,
321 nsNone, &compilands))) {
322 fprintf(stderr, "findChildren failed on the global\n");
323 return false;
324 }
325
326 CComPtr<IDiaSymbol> compiland;
327 while (SUCCEEDED(compilands->Next(1, &compiland, &count)) && count == 1) {
328 CComPtr<IDiaEnumSymbols> blocks;
329 if (FAILED(compiland->findChildren(SymTagBlock, NULL,
330 nsNone, &blocks))) {
331 fprintf(stderr, "findChildren failed on a compiland\n");
332 return false;
333 }
334
335 CComPtr<IDiaSymbol> block;
336 while (SUCCEEDED(blocks->Next(1, &block, &count)) && count == 1) {
337 // find this block's lexical parent function
338 CComPtr<IDiaSymbol> parent;
339 DWORD tag;
340 if (SUCCEEDED(block->get_lexicalParent(&parent)) &&
341 SUCCEEDED(parent->get_symTag(&tag)) &&
342 tag == SymTagFunction) {
343 // now get the block's offset and the function's offset and size,
344 // and determine if the block is outside of the function
345 DWORD func_rva, block_rva;
346 ULONGLONG func_length;
347 if (SUCCEEDED(block->get_relativeVirtualAddress(&block_rva)) &&
348 SUCCEEDED(parent->get_relativeVirtualAddress(&func_rva)) &&
349 SUCCEEDED(parent->get_length(&func_length))) {
350 if (block_rva < func_rva || block_rva > (func_rva + func_length)) {
351 if (!PrintFunction(parent, block)) {
352 return false;
353 }
354 }
355 }
356 }
357 parent.Release();
358 block.Release();
359 }
360 blocks.Release();
361 compiland.Release();
362 }
363
brynercb91a2f2006-08-25 21:14:45 +0000364 return true;
365}
366
mmentovaifc1c78e2006-09-28 21:09:37 +0000367bool PDBSourceLineWriter::PrintFrameData() {
368 // It would be nice if it were possible to output frame data alongside the
369 // associated function, as is done with line numbers, but the DIA API
370 // doesn't make it possible to get the frame data in that way.
371
372 CComPtr<IDiaEnumTables> tables;
373 if (FAILED(session_->getEnumTables(&tables)))
374 return false;
375
376 // Pick up the first table that supports IDiaEnumFrameData.
377 CComPtr<IDiaEnumFrameData> frame_data_enum;
378 CComPtr<IDiaTable> table;
379 ULONG count;
380 while (!frame_data_enum &&
381 SUCCEEDED(tables->Next(1, &table, &count)) &&
mmentovai246f4062006-10-20 01:46:38 +0000382 count == 1) {
mmentovaifc1c78e2006-09-28 21:09:37 +0000383 table->QueryInterface(_uuidof(IDiaEnumFrameData),
384 reinterpret_cast<void**>(&frame_data_enum));
385 table.Release();
386 }
387 if (!frame_data_enum)
388 return false;
389
mmentovai4365e2f2006-11-21 16:58:36 +0000390 DWORD last_type = -1;
391 DWORD last_rva = -1;
392 DWORD last_code_size = 0;
393 DWORD last_prolog_size = -1;
394
mmentovaifc1c78e2006-09-28 21:09:37 +0000395 CComPtr<IDiaFrameData> frame_data;
396 while (SUCCEEDED(frame_data_enum->Next(1, &frame_data, &count)) &&
397 count == 1) {
398 DWORD type;
399 if (FAILED(frame_data->get_type(&type)))
400 return false;
401
402 DWORD rva;
403 if (FAILED(frame_data->get_relativeVirtualAddress(&rva)))
404 return false;
405
406 DWORD code_size;
407 if (FAILED(frame_data->get_lengthBlock(&code_size)))
408 return false;
409
410 DWORD prolog_size;
411 if (FAILED(frame_data->get_lengthProlog(&prolog_size)))
412 return false;
413
414 // epliog_size is always 0.
415 DWORD epilog_size = 0;
416
mmentovai246f4062006-10-20 01:46:38 +0000417 // parameter_size is the size of parameters passed on the stack. If any
418 // parameters are not passed on the stack (such as in registers), their
419 // sizes will not be included in parameter_size.
mmentovaifc1c78e2006-09-28 21:09:37 +0000420 DWORD parameter_size;
421 if (FAILED(frame_data->get_lengthParams(&parameter_size)))
422 return false;
423
424 DWORD saved_register_size;
425 if (FAILED(frame_data->get_lengthSavedRegisters(&saved_register_size)))
426 return false;
427
428 DWORD local_size;
429 if (FAILED(frame_data->get_lengthLocals(&local_size)))
430 return false;
431
mmentovai246f4062006-10-20 01:46:38 +0000432 // get_maxStack can return S_FALSE, just use 0 in that case.
433 DWORD max_stack_size = 0;
mmentovaifc1c78e2006-09-28 21:09:37 +0000434 if (FAILED(frame_data->get_maxStack(&max_stack_size)))
435 return false;
436
mmentovai246f4062006-10-20 01:46:38 +0000437 // get_programString can return S_FALSE, indicating that there is no
438 // program string. In that case, check whether %ebp is used.
439 HRESULT program_string_result;
440 CComBSTR program_string;
441 if (FAILED(program_string_result = frame_data->get_program(
442 &program_string))) {
mmentovaifc1c78e2006-09-28 21:09:37 +0000443 return false;
mmentovai246f4062006-10-20 01:46:38 +0000444 }
mmentovaifc1c78e2006-09-28 21:09:37 +0000445
mmentovai246f4062006-10-20 01:46:38 +0000446 // get_allocatesBasePointer can return S_FALSE, treat that as though
447 // %ebp is not used.
448 BOOL allocates_base_pointer = FALSE;
449 if (program_string_result != S_OK) {
450 if (FAILED(frame_data->get_allocatesBasePointer(
451 &allocates_base_pointer))) {
452 return false;
453 }
454 }
455
mmentovai4365e2f2006-11-21 16:58:36 +0000456 // Only print out a line if type, rva, code_size, or prolog_size have
457 // changed from the last line. It is surprisingly common (especially in
458 // system library PDBs) for DIA to return a series of identical
459 // IDiaFrameData objects. For kernel32.pdb from Windows XP SP2 on x86,
460 // this check reduces the size of the dumped symbol file by a third.
461 if (type != last_type || rva != last_rva || code_size != last_code_size ||
462 prolog_size != last_prolog_size) {
463 fprintf(output_, "STACK WIN %x %x %x %x %x %x %x %x %x %d ",
464 type, rva, code_size, prolog_size, epilog_size,
465 parameter_size, saved_register_size, local_size, max_stack_size,
466 program_string_result == S_OK);
467 if (program_string_result == S_OK) {
468 fprintf(output_, "%ws\n", program_string);
469 } else {
470 fprintf(output_, "%d\n", allocates_base_pointer);
471 }
472
473 last_type = type;
474 last_rva = rva;
475 last_code_size = code_size;
476 last_prolog_size = prolog_size;
mmentovai246f4062006-10-20 01:46:38 +0000477 }
mmentovaifc1c78e2006-09-28 21:09:37 +0000478
479 frame_data.Release();
480 }
481
482 return true;
483}
484
mmentovai246f4062006-10-20 01:46:38 +0000485bool PDBSourceLineWriter::PrintCodePublicSymbol(IDiaSymbol *symbol) {
486 BOOL is_code;
487 if (FAILED(symbol->get_code(&is_code))) {
488 return false;
489 }
490 if (!is_code) {
491 return true;
492 }
493
494 DWORD rva;
495 if (FAILED(symbol->get_relativeVirtualAddress(&rva))) {
496 return false;
497 }
498
499 CComBSTR name;
500 int stack_param_size;
501 if (!GetSymbolFunctionName(symbol, &name, &stack_param_size)) {
502 return false;
503 }
504
505 fprintf(output_, "PUBLIC %x %x %ws\n", rva,
506 stack_param_size > 0 ? stack_param_size : 0, name);
507 return true;
508}
509
mmentovai80866e72006-11-06 19:34:19 +0000510bool PDBSourceLineWriter::PrintPDBInfo() {
mmentovai93fa3752006-12-06 20:18:26 +0000511 PDBModuleInfo info;
512 if (!GetModuleInfo(&info)) {
mmentovai80866e72006-11-06 19:34:19 +0000513 return false;
514 }
515
mmentovai4365e2f2006-11-21 16:58:36 +0000516 // Hard-code "windows" for the OS because that's the only thing that makes
517 // sense for PDB files. (This might not be strictly correct for Windows CE
518 // support, but we don't care about that at the moment.)
mmentovai93fa3752006-12-06 20:18:26 +0000519 fprintf(output_, "MODULE windows %ws %ws %ws\n",
520 info.cpu.c_str(), info.debug_identifier.c_str(),
521 info.debug_file.c_str());
mmentovai80866e72006-11-06 19:34:19 +0000522
523 return true;
524}
525
ted.mielczarekd35f1132010-10-05 19:39:23 +0000526bool PDBSourceLineWriter::PrintPEInfo() {
527 PEModuleInfo info;
528 if (!GetPEInfo(&info)) {
529 return false;
530 }
531
532 fprintf(output_, "INFO CODE_ID %ws %ws\n",
533 info.code_identifier.c_str(),
534 info.code_file.c_str());
535 return true;
536}
537
mmentovai246f4062006-10-20 01:46:38 +0000538// wcstol_positive_strict is sort of like wcstol, but much stricter. string
539// should be a buffer pointing to a null-terminated string containing only
540// decimal digits. If the entire string can be converted to an integer
541// without overflowing, and there are no non-digit characters before the
542// result is set to the value and this function returns true. Otherwise,
543// this function returns false. This is an alternative to the strtol, atoi,
544// and scanf families, which are not as strict about input and in some cases
545// don't provide a good way for the caller to determine if a conversion was
546// successful.
547static bool wcstol_positive_strict(wchar_t *string, int *result) {
548 int value = 0;
549 for (wchar_t *c = string; *c != '\0'; ++c) {
550 int last_value = value;
551 value *= 10;
552 // Detect overflow.
553 if (value / 10 != last_value || value < 0) {
554 return false;
555 }
556 if (*c < '0' || *c > '9') {
557 return false;
558 }
559 unsigned int c_value = *c - '0';
560 last_value = value;
561 value += c_value;
562 // Detect overflow.
563 if (value < last_value) {
564 return false;
565 }
566 // Forbid leading zeroes unless the string is just "0".
567 if (value == 0 && *(c+1) != '\0') {
568 return false;
569 }
570 }
571 *result = value;
572 return true;
573}
574
ted.mielczarekd35f1132010-10-05 19:39:23 +0000575bool PDBSourceLineWriter::FindPEFile() {
576 CComPtr<IDiaSymbol> global;
577 if (FAILED(session_->get_globalScope(&global))) {
578 fprintf(stderr, "get_globalScope failed\n");
579 return false;
580 }
581
582 CComBSTR symbols_file;
583 if (SUCCEEDED(global->get_symbolsFileName(&symbols_file))) {
584 wstring file(symbols_file);
585
586 // Look for an EXE or DLL file.
587 const wchar_t *extensions[] = { L"exe", L"dll" };
588 for (int i = 0; i < sizeof(extensions) / sizeof(extensions[0]); i++) {
589 size_t dot_pos = file.find_last_of(L".");
590 if (dot_pos != wstring::npos) {
591 file.replace(dot_pos + 1, wstring::npos, extensions[i]);
592 // Check if this file exists.
593 if (GetFileAttributesW(file.c_str()) != INVALID_FILE_ATTRIBUTES) {
594 code_file_ = file;
595 return true;
596 }
597 }
598 }
599 }
600
601 return false;
602}
603
mmentovai246f4062006-10-20 01:46:38 +0000604// static
605bool PDBSourceLineWriter::GetSymbolFunctionName(IDiaSymbol *function,
606 BSTR *name,
607 int *stack_param_size) {
608 *stack_param_size = -1;
609 const DWORD undecorate_options = UNDNAME_NO_MS_KEYWORDS |
610 UNDNAME_NO_FUNCTION_RETURNS |
611 UNDNAME_NO_ALLOCATION_MODEL |
612 UNDNAME_NO_ALLOCATION_LANGUAGE |
613 UNDNAME_NO_THISTYPE |
614 UNDNAME_NO_ACCESS_SPECIFIERS |
615 UNDNAME_NO_THROW_SIGNATURES |
616 UNDNAME_NO_MEMBER_TYPE |
617 UNDNAME_NO_RETURN_UDT_MODEL |
618 UNDNAME_NO_ECSU;
619
620 // Use get_undecoratedNameEx to get readable C++ names with arguments.
621 if (function->get_undecoratedNameEx(undecorate_options, name) != S_OK) {
622 if (function->get_name(name) != S_OK) {
623 fprintf(stderr, "failed to get function name\n");
624 return false;
625 }
626 // If a name comes from get_name because no undecorated form existed,
627 // it's already formatted properly to be used as output. Don't do any
628 // additional processing.
mmentovai73cd14b2006-11-15 22:24:42 +0000629 //
630 // MSVC7's DIA seems to not undecorate names in as many cases as MSVC8's.
631 // This will result in calling get_name for some C++ symbols, so
632 // all of the parameter and return type information may not be included in
633 // the name string.
mmentovai246f4062006-10-20 01:46:38 +0000634 } else {
635 // C++ uses a bogus "void" argument for functions and methods that don't
636 // take any parameters. Take it out of the undecorated name because it's
637 // ugly and unnecessary.
638 const wchar_t *replace_string = L"(void)";
639 const size_t replace_length = wcslen(replace_string);
640 const wchar_t *replacement_string = L"()";
641 size_t length = wcslen(*name);
642 if (length >= replace_length) {
643 wchar_t *name_end = *name + length - replace_length;
644 if (wcscmp(name_end, replace_string) == 0) {
mmentovai73cd14b2006-11-15 22:24:42 +0000645 WindowsStringUtils::safe_wcscpy(name_end, replace_length,
646 replacement_string);
mmentovai246f4062006-10-20 01:46:38 +0000647 length = wcslen(*name);
648 }
649 }
650
651 // Undecorate names used for stdcall and fastcall. These names prefix
652 // the identifier with '_' (stdcall) or '@' (fastcall) and suffix it
653 // with '@' followed by the number of bytes of parameters, in decimal.
654 // If such a name is found, take note of the size and undecorate it.
655 // Only do this for names that aren't C++, which is determined based on
656 // whether the undecorated name contains any ':' or '(' characters.
657 if (!wcschr(*name, ':') && !wcschr(*name, '(') &&
658 (*name[0] == '_' || *name[0] == '@')) {
659 wchar_t *last_at = wcsrchr(*name + 1, '@');
660 if (last_at && wcstol_positive_strict(last_at + 1, stack_param_size)) {
661 // If this function adheres to the fastcall convention, it accepts up
662 // to the first 8 bytes of parameters in registers (%ecx and %edx).
663 // We're only interested in the stack space used for parameters, so
664 // so subtract 8 and don't let the size go below 0.
665 if (*name[0] == '@') {
666 if (*stack_param_size > 8) {
667 *stack_param_size -= 8;
668 } else {
669 *stack_param_size = 0;
670 }
671 }
672
673 // Undecorate the name by moving it one character to the left in its
674 // buffer, and terminating it where the last '@' had been.
mmentovai73cd14b2006-11-15 22:24:42 +0000675 WindowsStringUtils::safe_wcsncpy(*name, length,
676 *name + 1, last_at - *name - 1);
677 } else if (*name[0] == '_') {
mmentovai246f4062006-10-20 01:46:38 +0000678 // This symbol's name is encoded according to the cdecl rules. The
679 // name doesn't end in a '@' character followed by a decimal positive
mmentovai2fc823f2006-10-20 19:50:01 +0000680 // integer, so it's not a stdcall name. Strip off the leading
mmentovai246f4062006-10-20 01:46:38 +0000681 // underscore.
mmentovai73cd14b2006-11-15 22:24:42 +0000682 WindowsStringUtils::safe_wcsncpy(*name, length, *name + 1, length);
mmentovai246f4062006-10-20 01:46:38 +0000683 }
684 }
685 }
686
687 return true;
688}
689
690// static
691int PDBSourceLineWriter::GetFunctionStackParamSize(IDiaSymbol *function) {
692 // This implementation is highly x86-specific.
693
694 // Gather the symbols corresponding to data.
695 CComPtr<IDiaEnumSymbols> data_children;
696 if (FAILED(function->findChildren(SymTagData, NULL, nsNone,
697 &data_children))) {
698 return 0;
699 }
700
701 // lowest_base is the lowest %ebp-relative byte offset used for a parameter.
702 // highest_end is one greater than the highest offset (i.e. base + length).
703 // Stack parameters are assumed to be contiguous, because in reality, they
704 // are.
705 int lowest_base = INT_MAX;
706 int highest_end = INT_MIN;
707
708 CComPtr<IDiaSymbol> child;
709 DWORD count;
710 while (SUCCEEDED(data_children->Next(1, &child, &count)) && count == 1) {
711 // If any operation fails at this point, just proceed to the next child.
712 // Use the next_child label instead of continue because child needs to
713 // be released before it's reused. Declare constructable/destructable
714 // types early to avoid gotos that cross initializations.
715 CComPtr<IDiaSymbol> child_type;
716
717 // DataIsObjectPtr is only used for |this|. Because |this| can be passed
718 // as a stack parameter, look for it in addition to traditional
719 // parameters.
720 DWORD child_kind;
721 if (FAILED(child->get_dataKind(&child_kind)) ||
722 (child_kind != DataIsParam && child_kind != DataIsObjectPtr)) {
723 goto next_child;
724 }
725
726 // Only concentrate on register-relative parameters. Parameters may also
727 // be enregistered (passed directly in a register), but those don't
728 // consume any stack space, so they're not of interest.
729 DWORD child_location_type;
730 if (FAILED(child->get_locationType(&child_location_type)) ||
731 child_location_type != LocIsRegRel) {
732 goto next_child;
733 }
734
735 // Of register-relative parameters, the only ones that make any sense are
736 // %ebp- or %esp-relative. Note that MSVC's debugging information always
737 // gives parameters as %ebp-relative even when a function doesn't use a
738 // traditional frame pointer and stack parameters are accessed relative to
739 // %esp, so just look for %ebp-relative parameters. If you wanted to
740 // access parameters, you'd probably want to treat these %ebp-relative
741 // offsets as if they were relative to %esp before a function's prolog
742 // executed.
743 DWORD child_register;
744 if (FAILED(child->get_registerId(&child_register)) ||
745 child_register != CV_REG_EBP) {
746 goto next_child;
747 }
748
749 LONG child_register_offset;
750 if (FAILED(child->get_offset(&child_register_offset))) {
751 goto next_child;
752 }
753
mmentovai329e6a92007-04-25 15:17:19 +0000754 // IDiaSymbol::get_type can succeed but still pass back a NULL value.
755 if (FAILED(child->get_type(&child_type)) || !child_type) {
mmentovai246f4062006-10-20 01:46:38 +0000756 goto next_child;
757 }
758
759 ULONGLONG child_length;
760 if (FAILED(child_type->get_length(&child_length))) {
761 goto next_child;
762 }
763
764 int child_end = child_register_offset + static_cast<ULONG>(child_length);
765 if (child_register_offset < lowest_base) {
766 lowest_base = child_register_offset;
767 }
768 if (child_end > highest_end) {
769 highest_end = child_end;
770 }
771
772next_child:
773 child.Release();
774 }
775
776 int param_size = 0;
777 // Make sure lowest_base isn't less than 4, because [%esp+4] is the lowest
778 // possible address to find a stack parameter before executing a function's
779 // prolog (see above). Some optimizations cause parameter offsets to be
780 // lower than 4, but we're not concerned with those because we're only
781 // looking for parameters contained in addresses higher than where the
782 // return address is stored.
783 if (lowest_base < 4) {
784 lowest_base = 4;
785 }
786 if (highest_end > lowest_base) {
787 // All stack parameters are pushed as at least 4-byte quantities. If the
788 // last type was narrower than 4 bytes, promote it. This assumes that all
789 // parameters' offsets are 4-byte-aligned, which is always the case. Only
790 // worry about the last type, because we're not summing the type sizes,
791 // just looking at the lowest and highest offsets.
792 int remainder = highest_end % 4;
793 if (remainder) {
794 highest_end += 4 - remainder;
795 }
796
797 param_size = highest_end - lowest_base;
798 }
799
800 return param_size;
801}
802
brynercb91a2f2006-08-25 21:14:45 +0000803bool PDBSourceLineWriter::WriteMap(FILE *map_file) {
brynercb91a2f2006-08-25 21:14:45 +0000804 output_ = map_file;
mmentovai80866e72006-11-06 19:34:19 +0000805
ted.mielczarekd35f1132010-10-05 19:39:23 +0000806 bool ret = PrintPDBInfo();
ted.mielczarekd35f1132010-10-05 19:39:23 +0000807 // This is not a critical piece of the symbol file.
ted.mielczarekcff9fdb2011-02-15 13:39:08 +0000808 PrintPEInfo();
ted.mielczarekd35f1132010-10-05 19:39:23 +0000809 ret = ret &&
810 PrintSourceFiles() &&
811 PrintFunctions() &&
812 PrintFrameData();
brynercb91a2f2006-08-25 21:14:45 +0000813
814 output_ = NULL;
815 return ret;
816}
817
818void PDBSourceLineWriter::Close() {
819 session_.Release();
820}
821
mmentovai93fa3752006-12-06 20:18:26 +0000822bool PDBSourceLineWriter::GetModuleInfo(PDBModuleInfo *info) {
823 if (!info) {
824 return false;
mmentovai80866e72006-11-06 19:34:19 +0000825 }
mmentovai80866e72006-11-06 19:34:19 +0000826
mmentovai93fa3752006-12-06 20:18:26 +0000827 info->debug_file.clear();
828 info->debug_identifier.clear();
829 info->cpu.clear();
mmentovai80866e72006-11-06 19:34:19 +0000830
bryner8b1645d2006-10-16 17:27:03 +0000831 CComPtr<IDiaSymbol> global;
832 if (FAILED(session_->get_globalScope(&global))) {
mmentovai80866e72006-11-06 19:34:19 +0000833 return false;
bryner8b1645d2006-10-16 17:27:03 +0000834 }
835
ted.mielczarek6c7d6412010-08-31 15:08:49 +0000836 DWORD machine_type;
837 // get_machineType can return S_FALSE.
838 if (global->get_machineType(&machine_type) == S_OK) {
839 // The documentation claims that get_machineType returns a value from
840 // the CV_CPU_TYPE_e enumeration, but that's not the case.
841 // Instead, it returns one of the IMAGE_FILE_MACHINE values as
842 // defined here:
843 // http://msdn.microsoft.com/en-us/library/ms680313%28VS.85%29.aspx
844 switch (machine_type) {
845 case IMAGE_FILE_MACHINE_I386:
846 info->cpu = L"x86";
847 break;
848 case IMAGE_FILE_MACHINE_AMD64:
849 info->cpu = L"x86_64";
850 break;
851 default:
852 info->cpu = L"unknown";
853 break;
854 }
mmentovai93fa3752006-12-06 20:18:26 +0000855 } else {
856 // Unexpected, but handle gracefully.
857 info->cpu = L"unknown";
858 }
859
860 // DWORD* and int* are not compatible. This is clean and avoids a cast.
861 DWORD age;
862 if (FAILED(global->get_age(&age))) {
863 return false;
mmentovai4365e2f2006-11-21 16:58:36 +0000864 }
865
866 bool uses_guid;
867 if (!UsesGUID(&uses_guid)) {
mmentovai80866e72006-11-06 19:34:19 +0000868 return false;
bryner8b1645d2006-10-16 17:27:03 +0000869 }
mmentovai4365e2f2006-11-21 16:58:36 +0000870
871 if (uses_guid) {
mmentovai93fa3752006-12-06 20:18:26 +0000872 GUID guid;
873 if (FAILED(global->get_guid(&guid))) {
mmentovai4365e2f2006-11-21 16:58:36 +0000874 return false;
875 }
876
mmentovaic7b6c112006-12-12 21:52:56 +0000877 // Use the same format that the MS symbol server uses in filesystem
878 // hierarchies.
mmentovai93fa3752006-12-06 20:18:26 +0000879 wchar_t age_string[9];
mmentovaiaec44482007-03-30 20:30:08 +0000880 swprintf(age_string, sizeof(age_string) / sizeof(age_string[0]),
881 L"%x", age);
mmentovai30fc9ce2007-04-04 16:13:55 +0000882
883 // remove when VC++7.1 is no longer supported
884 age_string[sizeof(age_string) / sizeof(age_string[0]) - 1] = L'\0';
mmentovai93fa3752006-12-06 20:18:26 +0000885
886 info->debug_identifier = GUIDString::GUIDToSymbolServerWString(&guid);
887 info->debug_identifier.append(age_string);
mmentovai4365e2f2006-11-21 16:58:36 +0000888 } else {
889 DWORD signature;
890 if (FAILED(global->get_signature(&signature))) {
891 return false;
892 }
893
mmentovaic7b6c112006-12-12 21:52:56 +0000894 // Use the same format that the MS symbol server uses in filesystem
895 // hierarchies.
mmentovai93fa3752006-12-06 20:18:26 +0000896 wchar_t identifier_string[17];
mmentovaiaec44482007-03-30 20:30:08 +0000897 swprintf(identifier_string,
898 sizeof(identifier_string) / sizeof(identifier_string[0]),
899 L"%08X%x", signature, age);
mmentovai30fc9ce2007-04-04 16:13:55 +0000900
901 // remove when VC++7.1 is no longer supported
902 identifier_string[sizeof(identifier_string) /
903 sizeof(identifier_string[0]) - 1] = L'\0';
904
mmentovai93fa3752006-12-06 20:18:26 +0000905 info->debug_identifier = identifier_string;
mmentovai4365e2f2006-11-21 16:58:36 +0000906 }
bryner8b1645d2006-10-16 17:27:03 +0000907
mmentovai93fa3752006-12-06 20:18:26 +0000908 CComBSTR debug_file_string;
909 if (FAILED(global->get_symbolsFileName(&debug_file_string))) {
mmentovai80866e72006-11-06 19:34:19 +0000910 return false;
911 }
mmentovai93fa3752006-12-06 20:18:26 +0000912 info->debug_file =
913 WindowsStringUtils::GetBaseName(wstring(debug_file_string));
mmentovai80866e72006-11-06 19:34:19 +0000914
915 return true;
bryner8b1645d2006-10-16 17:27:03 +0000916}
917
ted.mielczarekd35f1132010-10-05 19:39:23 +0000918bool PDBSourceLineWriter::GetPEInfo(PEModuleInfo *info) {
919 if (!info) {
920 return false;
921 }
922
923 if (code_file_.empty() && !FindPEFile()) {
924 fprintf(stderr, "Couldn't locate EXE or DLL file.\n");
925 return false;
926 }
927
928 // Convert wchar to native charset because ImageLoad only takes
929 // a PSTR as input.
930 string code_file;
931 if (!WindowsStringUtils::safe_wcstombs(code_file_, &code_file)) {
932 return false;
933 }
934
935 AutoImage img(ImageLoad((PSTR)code_file.c_str(), NULL));
936 if (!img) {
937 fprintf(stderr, "Failed to open PE file: %s\n", code_file.c_str());
938 return false;
939 }
940
941 info->code_file = WindowsStringUtils::GetBaseName(code_file_);
942
943 // The date and time that the file was created by the linker.
944 DWORD TimeDateStamp = img->FileHeader->FileHeader.TimeDateStamp;
945 // The size of the file in bytes, including all headers.
946 DWORD SizeOfImage = 0;
947 PIMAGE_OPTIONAL_HEADER64 opt =
948 &((PIMAGE_NT_HEADERS64)img->FileHeader)->OptionalHeader;
949 if (opt->Magic == IMAGE_NT_OPTIONAL_HDR64_MAGIC) {
950 // 64-bit PE file.
951 SizeOfImage = opt->SizeOfImage;
952 }
953 else {
954 // 32-bit PE file.
955 SizeOfImage = img->FileHeader->OptionalHeader.SizeOfImage;
956 }
957 wchar_t code_identifier[32];
958 swprintf(code_identifier,
959 sizeof(code_identifier) / sizeof(code_identifier[0]),
960 L"%08X%X", TimeDateStamp, SizeOfImage);
961 info->code_identifier = code_identifier;
962
963 return true;
964}
965
mmentovai4365e2f2006-11-21 16:58:36 +0000966bool PDBSourceLineWriter::UsesGUID(bool *uses_guid) {
967 if (!uses_guid)
968 return false;
969
970 CComPtr<IDiaSymbol> global;
971 if (FAILED(session_->get_globalScope(&global)))
972 return false;
973
974 GUID guid;
975 if (FAILED(global->get_guid(&guid)))
976 return false;
977
978 DWORD signature;
979 if (FAILED(global->get_signature(&signature)))
980 return false;
981
982 // There are two possibilities for guid: either it's a real 128-bit GUID
983 // as identified in a code module by a new-style CodeView record, or it's
984 // a 32-bit signature (timestamp) as identified by an old-style record.
985 // See MDCVInfoPDB70 and MDCVInfoPDB20 in minidump_format.h.
986 //
987 // Because DIA doesn't provide a way to directly determine whether a module
988 // uses a GUID or a 32-bit signature, this code checks whether the first 32
989 // bits of guid are the same as the signature, and if the rest of guid is
990 // zero. If so, then with a pretty high degree of certainty, there's an
991 // old-style CodeView record in use. This method will only falsely find an
992 // an old-style CodeView record if a real 128-bit GUID has its first 32
993 // bits set the same as the module's signature (timestamp) and the rest of
994 // the GUID is set to 0. This is highly unlikely.
995
996 GUID signature_guid = {signature}; // 0-initializes other members
997 *uses_guid = !IsEqualGUID(guid, signature_guid);
998 return true;
999}
1000
mmentovaie5dc6082007-02-14 19:51:05 +00001001} // namespace google_breakpad