PageRenderTime 80ms CodeModel.GetById 13ms app.highlight 58ms RepoModel.GetById 1ms app.codeStats 1ms

/thirdparty/breakpad/common/windows/pdb_source_line_writer.cc

http://github.com/tomahawk-player/tomahawk
C++ | 1001 lines | 676 code | 132 blank | 193 comment | 189 complexity | 901460afb69fb1c997d30ed44f246f59 MD5 | raw file
   1// Copyright (c) 2006, Google Inc.
   2// All rights reserved.
   3//
   4// Redistribution and use in source and binary forms, with or without
   5// modification, are permitted provided that the following conditions are
   6// met:
   7//
   8//     * Redistributions of source code must retain the above copyright
   9// notice, this list of conditions and the following disclaimer.
  10//     * Redistributions in binary form must reproduce the above
  11// copyright notice, this list of conditions and the following disclaimer
  12// in the documentation and/or other materials provided with the
  13// distribution.
  14//     * Neither the name of Google Inc. nor the names of its
  15// contributors may be used to endorse or promote products derived from
  16// this software without specific prior written permission.
  17//
  18// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
  19// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
  20// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
  21// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
  22// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
  23// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
  24// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  25// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  26// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  27// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
  28// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  29
  30#include <atlbase.h>
  31#include <dia2.h>
  32#include <ImageHlp.h>
  33#include <stdio.h>
  34
  35#include "common/windows/string_utils-inl.h"
  36
  37#include "common/windows/pdb_source_line_writer.h"
  38#include "common/windows/guid_string.h"
  39
  40// This constant may be missing from DbgHelp.h.  See the documentation for
  41// IDiaSymbol::get_undecoratedNameEx.
  42#ifndef UNDNAME_NO_ECSU
  43#define UNDNAME_NO_ECSU 0x8000  // Suppresses enum/class/struct/union.
  44#endif  // UNDNAME_NO_ECSU
  45
  46namespace google_breakpad {
  47
  48using std::vector;
  49
  50// A helper class to scope a PLOADED_IMAGE.
  51class AutoImage {
  52 public:
  53  explicit AutoImage(PLOADED_IMAGE img) : img_(img) {}
  54  ~AutoImage() {
  55    if (img_)
  56      ImageUnload(img_);
  57  }
  58
  59  operator PLOADED_IMAGE() { return img_; }
  60  PLOADED_IMAGE operator->() { return img_; }
  61
  62 private:
  63  PLOADED_IMAGE img_;
  64};
  65
  66PDBSourceLineWriter::PDBSourceLineWriter() : output_(NULL) {
  67}
  68
  69PDBSourceLineWriter::~PDBSourceLineWriter() {
  70}
  71
  72bool PDBSourceLineWriter::Open(const wstring &file, FileFormat format) {
  73  Close();
  74
  75  if (FAILED(CoInitialize(NULL))) {
  76    fprintf(stderr, "CoInitialize failed\n");
  77    return false;
  78  }
  79
  80  CComPtr<IDiaDataSource> data_source;
  81  if (FAILED(data_source.CoCreateInstance(CLSID_DiaSource))) {
  82    const int kGuidSize = 64;
  83    wchar_t classid[kGuidSize] = {0};
  84    StringFromGUID2(CLSID_DiaSource, classid, kGuidSize);
  85    // vc80 uses bce36434-2c24-499e-bf49-8bd99b0eeb68.
  86    // vc90 uses 4C41678E-887B-4365-A09E-925D28DB33C2.
  87    fprintf(stderr, "CoCreateInstance CLSID_DiaSource %S failed "
  88            "(msdia*.dll unregistered?)\n", classid);
  89    return false;
  90  }
  91
  92  switch (format) {
  93    case PDB_FILE:
  94      if (FAILED(data_source->loadDataFromPdb(file.c_str()))) {
  95        fprintf(stderr, "loadDataFromPdb failed for %ws\n", file.c_str());
  96        return false;
  97      }
  98      break;
  99    case EXE_FILE:
 100      if (FAILED(data_source->loadDataForExe(file.c_str(), NULL, NULL))) {
 101        fprintf(stderr, "loadDataForExe failed for %ws\n", file.c_str());
 102        return false;
 103      }
 104      code_file_ = file;
 105      break;
 106    case ANY_FILE:
 107      if (FAILED(data_source->loadDataFromPdb(file.c_str()))) {
 108        if (FAILED(data_source->loadDataForExe(file.c_str(), NULL, NULL))) {
 109          fprintf(stderr, "loadDataForPdb and loadDataFromExe failed for %ws\n", file.c_str());
 110          return false;
 111        }
 112	code_file_ = file;
 113      }
 114      break;
 115    default:
 116      fprintf(stderr, "Unknown file format\n");
 117      return false;
 118  }
 119
 120  if (FAILED(data_source->openSession(&session_))) {
 121    fprintf(stderr, "openSession failed\n");
 122  }
 123
 124  return true;
 125}
 126
 127bool PDBSourceLineWriter::PrintLines(IDiaEnumLineNumbers *lines) {
 128  // The line number format is:
 129  // <rva> <line number> <source file id>
 130  CComPtr<IDiaLineNumber> line;
 131  ULONG count;
 132
 133  while (SUCCEEDED(lines->Next(1, &line, &count)) && count == 1) {
 134    DWORD rva;
 135    if (FAILED(line->get_relativeVirtualAddress(&rva))) {
 136      fprintf(stderr, "failed to get line rva\n");
 137      return false;
 138    }
 139
 140    DWORD length;
 141    if (FAILED(line->get_length(&length))) {
 142      fprintf(stderr, "failed to get line code length\n");
 143      return false;
 144    }
 145
 146    DWORD dia_source_id;
 147    if (FAILED(line->get_sourceFileId(&dia_source_id))) {
 148      fprintf(stderr, "failed to get line source file id\n");
 149      return false;
 150    }
 151    // duplicate file names are coalesced to share one ID
 152    DWORD source_id = GetRealFileID(dia_source_id);
 153
 154    DWORD line_num;
 155    if (FAILED(line->get_lineNumber(&line_num))) {
 156      fprintf(stderr, "failed to get line number\n");
 157      return false;
 158    }
 159
 160    fprintf(output_, "%x %x %d %d\n", rva, length, line_num, source_id);
 161    line.Release();
 162  }
 163  return true;
 164}
 165
 166bool PDBSourceLineWriter::PrintFunction(IDiaSymbol *function,
 167                                        IDiaSymbol *block) {
 168  // The function format is:
 169  // FUNC <address> <length> <param_stack_size> <function>
 170  DWORD rva;
 171  if (FAILED(block->get_relativeVirtualAddress(&rva))) {
 172    fprintf(stderr, "couldn't get rva\n");
 173    return false;
 174  }
 175
 176  ULONGLONG length;
 177  if (FAILED(block->get_length(&length))) {
 178    fprintf(stderr, "failed to get function length\n");
 179    return false;
 180  }
 181
 182  if (length == 0) {
 183    // Silently ignore zero-length functions, which can infrequently pop up.
 184    return true;
 185  }
 186
 187  CComBSTR name;
 188  int stack_param_size;
 189  if (!GetSymbolFunctionName(function, &name, &stack_param_size)) {
 190    return false;
 191  }
 192
 193  // If the decorated name didn't give the parameter size, try to
 194  // calculate it.
 195  if (stack_param_size < 0) {
 196    stack_param_size = GetFunctionStackParamSize(function);
 197  }
 198
 199  fprintf(output_, "FUNC %x %" WIN_STRING_FORMAT_LL "x %x %ws\n",
 200          rva, length, stack_param_size, name);
 201
 202  CComPtr<IDiaEnumLineNumbers> lines;
 203  if (FAILED(session_->findLinesByRVA(rva, DWORD(length), &lines))) {
 204    return false;
 205  }
 206
 207  if (!PrintLines(lines)) {
 208    return false;
 209  }
 210  return true;
 211}
 212
 213bool PDBSourceLineWriter::PrintSourceFiles() {
 214  CComPtr<IDiaSymbol> global;
 215  if (FAILED(session_->get_globalScope(&global))) {
 216    fprintf(stderr, "get_globalScope failed\n");
 217    return false;
 218  }
 219
 220  CComPtr<IDiaEnumSymbols> compilands;
 221  if (FAILED(global->findChildren(SymTagCompiland, NULL,
 222                                  nsNone, &compilands))) {
 223    fprintf(stderr, "findChildren failed\n");
 224    return false;
 225  }
 226
 227  CComPtr<IDiaSymbol> compiland;
 228  ULONG count;
 229  while (SUCCEEDED(compilands->Next(1, &compiland, &count)) && count == 1) {
 230    CComPtr<IDiaEnumSourceFiles> source_files;
 231    if (FAILED(session_->findFile(compiland, NULL, nsNone, &source_files))) {
 232      return false;
 233    }
 234    CComPtr<IDiaSourceFile> file;
 235    while (SUCCEEDED(source_files->Next(1, &file, &count)) && count == 1) {
 236      DWORD file_id;
 237      if (FAILED(file->get_uniqueId(&file_id))) {
 238        return false;
 239      }
 240
 241      CComBSTR file_name;
 242      if (FAILED(file->get_fileName(&file_name))) {
 243        return false;
 244      }
 245
 246      wstring file_name_string(file_name);
 247      if (!FileIDIsCached(file_name_string)) {
 248        // this is a new file name, cache it and output a FILE line.
 249        CacheFileID(file_name_string, file_id);
 250        fwprintf(output_, L"FILE %d %s\n", file_id, file_name);
 251      } else {
 252        // this file name has already been seen, just save this
 253        // ID for later lookup.
 254        StoreDuplicateFileID(file_name_string, file_id);
 255      }
 256      file.Release();
 257    }
 258    compiland.Release();
 259  }
 260  return true;
 261}
 262
 263bool PDBSourceLineWriter::PrintFunctions() {
 264  CComPtr<IDiaEnumSymbolsByAddr> symbols;
 265  if (FAILED(session_->getSymbolsByAddr(&symbols))) {
 266    fprintf(stderr, "failed to get symbol enumerator\n");
 267    return false;
 268  }
 269
 270  CComPtr<IDiaSymbol> symbol;
 271  if (FAILED(symbols->symbolByAddr(1, 0, &symbol))) {
 272    fprintf(stderr, "failed to enumerate symbols\n");
 273    return false;
 274  }
 275
 276  DWORD rva_last = 0;
 277  if (FAILED(symbol->get_relativeVirtualAddress(&rva_last))) {
 278    fprintf(stderr, "failed to get symbol rva\n");
 279    return false;
 280  }
 281
 282  ULONG count;
 283  do {
 284    DWORD tag;
 285    if (FAILED(symbol->get_symTag(&tag))) {
 286      fprintf(stderr, "failed to get symbol tag\n");
 287      return false;
 288    }
 289
 290    // For a given function, DIA seems to give either a symbol with
 291    // SymTagFunction or SymTagPublicSymbol, but not both.  This means
 292    // that PDBSourceLineWriter will output either a FUNC or PUBLIC line,
 293    // but not both.
 294    if (tag == SymTagFunction) {
 295      if (!PrintFunction(symbol, symbol)) {
 296        return false;
 297      }
 298    } else if (tag == SymTagPublicSymbol) {
 299      if (!PrintCodePublicSymbol(symbol)) {
 300        return false;
 301      }
 302    }
 303    symbol.Release();
 304  } while (SUCCEEDED(symbols->Next(1, &symbol, &count)) && count == 1);
 305
 306  // When building with PGO, the compiler can split functions into
 307  // "hot" and "cold" blocks, and move the "cold" blocks out to separate
 308  // pages, so the function can be noncontiguous. To find these blocks,
 309  // we have to iterate over all the compilands, and then find blocks
 310  // that are children of them. We can then find the lexical parents
 311  // of those blocks and print out an extra FUNC line for blocks
 312  // that are not contained in their parent functions.
 313  CComPtr<IDiaSymbol> global;
 314  if (FAILED(session_->get_globalScope(&global))) {
 315    fprintf(stderr, "get_globalScope failed\n");
 316    return false;
 317  }
 318
 319  CComPtr<IDiaEnumSymbols> compilands;
 320  if (FAILED(global->findChildren(SymTagCompiland, NULL,
 321                                  nsNone, &compilands))) {
 322    fprintf(stderr, "findChildren failed on the global\n");
 323    return false;
 324  }
 325
 326  CComPtr<IDiaSymbol> compiland;
 327  while (SUCCEEDED(compilands->Next(1, &compiland, &count)) && count == 1) {
 328    CComPtr<IDiaEnumSymbols> blocks;
 329    if (FAILED(compiland->findChildren(SymTagBlock, NULL,
 330                                       nsNone, &blocks))) {
 331      fprintf(stderr, "findChildren failed on a compiland\n");
 332      return false;
 333    }
 334
 335    CComPtr<IDiaSymbol> block;
 336    while (SUCCEEDED(blocks->Next(1, &block, &count)) && count == 1) {
 337      // find this block's lexical parent function
 338      CComPtr<IDiaSymbol> parent;
 339      DWORD tag;
 340      if (SUCCEEDED(block->get_lexicalParent(&parent)) &&
 341          SUCCEEDED(parent->get_symTag(&tag)) &&
 342          tag == SymTagFunction) {
 343        // now get the block's offset and the function's offset and size,
 344        // and determine if the block is outside of the function
 345        DWORD func_rva, block_rva;
 346        ULONGLONG func_length;
 347        if (SUCCEEDED(block->get_relativeVirtualAddress(&block_rva)) &&
 348            SUCCEEDED(parent->get_relativeVirtualAddress(&func_rva)) &&
 349            SUCCEEDED(parent->get_length(&func_length))) {
 350          if (block_rva < func_rva || block_rva > (func_rva + func_length)) {
 351            if (!PrintFunction(parent, block)) {
 352              return false;
 353            }
 354          }
 355        }
 356      }
 357      parent.Release();
 358      block.Release();
 359    }
 360    blocks.Release();
 361    compiland.Release();
 362  }
 363
 364  return true;
 365}
 366
 367bool PDBSourceLineWriter::PrintFrameData() {
 368  // It would be nice if it were possible to output frame data alongside the
 369  // associated function, as is done with line numbers, but the DIA API
 370  // doesn't make it possible to get the frame data in that way.
 371
 372  CComPtr<IDiaEnumTables> tables;
 373  if (FAILED(session_->getEnumTables(&tables)))
 374    return false;
 375
 376  // Pick up the first table that supports IDiaEnumFrameData.
 377  CComPtr<IDiaEnumFrameData> frame_data_enum;
 378  CComPtr<IDiaTable> table;
 379  ULONG count;
 380  while (!frame_data_enum &&
 381         SUCCEEDED(tables->Next(1, &table, &count)) &&
 382         count == 1) {
 383    table->QueryInterface(_uuidof(IDiaEnumFrameData),
 384                          reinterpret_cast<void**>(&frame_data_enum));
 385    table.Release();
 386  }
 387  if (!frame_data_enum)
 388    return false;
 389
 390  DWORD last_type = -1;
 391  DWORD last_rva = -1;
 392  DWORD last_code_size = 0;
 393  DWORD last_prolog_size = -1;
 394
 395  CComPtr<IDiaFrameData> frame_data;
 396  while (SUCCEEDED(frame_data_enum->Next(1, &frame_data, &count)) &&
 397         count == 1) {
 398    DWORD type;
 399    if (FAILED(frame_data->get_type(&type)))
 400      return false;
 401
 402    DWORD rva;
 403    if (FAILED(frame_data->get_relativeVirtualAddress(&rva)))
 404      return false;
 405
 406    DWORD code_size;
 407    if (FAILED(frame_data->get_lengthBlock(&code_size)))
 408      return false;
 409
 410    DWORD prolog_size;
 411    if (FAILED(frame_data->get_lengthProlog(&prolog_size)))
 412      return false;
 413
 414    // epliog_size is always 0.
 415    DWORD epilog_size = 0;
 416
 417    // parameter_size is the size of parameters passed on the stack.  If any
 418    // parameters are not passed on the stack (such as in registers), their
 419    // sizes will not be included in parameter_size.
 420    DWORD parameter_size;
 421    if (FAILED(frame_data->get_lengthParams(&parameter_size)))
 422      return false;
 423
 424    DWORD saved_register_size;
 425    if (FAILED(frame_data->get_lengthSavedRegisters(&saved_register_size)))
 426      return false;
 427
 428    DWORD local_size;
 429    if (FAILED(frame_data->get_lengthLocals(&local_size)))
 430      return false;
 431
 432    // get_maxStack can return S_FALSE, just use 0 in that case.
 433    DWORD max_stack_size = 0;
 434    if (FAILED(frame_data->get_maxStack(&max_stack_size)))
 435      return false;
 436
 437    // get_programString can return S_FALSE, indicating that there is no
 438    // program string.  In that case, check whether %ebp is used.
 439    HRESULT program_string_result;
 440    CComBSTR program_string;
 441    if (FAILED(program_string_result = frame_data->get_program(
 442        &program_string))) {
 443      return false;
 444    }
 445
 446    // get_allocatesBasePointer can return S_FALSE, treat that as though
 447    // %ebp is not used.
 448    BOOL allocates_base_pointer = FALSE;
 449    if (program_string_result != S_OK) {
 450      if (FAILED(frame_data->get_allocatesBasePointer(
 451          &allocates_base_pointer))) {
 452        return false;
 453      }
 454    }
 455
 456    // Only print out a line if type, rva, code_size, or prolog_size have
 457    // changed from the last line.  It is surprisingly common (especially in
 458    // system library PDBs) for DIA to return a series of identical
 459    // IDiaFrameData objects.  For kernel32.pdb from Windows XP SP2 on x86,
 460    // this check reduces the size of the dumped symbol file by a third.
 461    if (type != last_type || rva != last_rva || code_size != last_code_size ||
 462        prolog_size != last_prolog_size) {
 463      fprintf(output_, "STACK WIN %x %x %x %x %x %x %x %x %x %d ",
 464              type, rva, code_size, prolog_size, epilog_size,
 465              parameter_size, saved_register_size, local_size, max_stack_size,
 466              program_string_result == S_OK);
 467      if (program_string_result == S_OK) {
 468        fprintf(output_, "%ws\n", program_string);
 469      } else {
 470        fprintf(output_, "%d\n", allocates_base_pointer);
 471      }
 472
 473      last_type = type;
 474      last_rva = rva;
 475      last_code_size = code_size;
 476      last_prolog_size = prolog_size;
 477    }
 478
 479    frame_data.Release();
 480  }
 481
 482  return true;
 483}
 484
 485bool PDBSourceLineWriter::PrintCodePublicSymbol(IDiaSymbol *symbol) {
 486  BOOL is_code;
 487  if (FAILED(symbol->get_code(&is_code))) {
 488    return false;
 489  }
 490  if (!is_code) {
 491    return true;
 492  }
 493
 494  DWORD rva;
 495  if (FAILED(symbol->get_relativeVirtualAddress(&rva))) {
 496    return false;
 497  }
 498
 499  CComBSTR name;
 500  int stack_param_size;
 501  if (!GetSymbolFunctionName(symbol, &name, &stack_param_size)) {
 502    return false;
 503  }
 504
 505  fprintf(output_, "PUBLIC %x %x %ws\n", rva,
 506          stack_param_size > 0 ? stack_param_size : 0, name);
 507  return true;
 508}
 509
 510bool PDBSourceLineWriter::PrintPDBInfo() {
 511  PDBModuleInfo info;
 512  if (!GetModuleInfo(&info)) {
 513    return false;
 514  }
 515
 516  // Hard-code "windows" for the OS because that's the only thing that makes
 517  // sense for PDB files.  (This might not be strictly correct for Windows CE
 518  // support, but we don't care about that at the moment.)
 519  fprintf(output_, "MODULE windows %ws %ws %ws\n",
 520          info.cpu.c_str(), info.debug_identifier.c_str(),
 521          info.debug_file.c_str());
 522
 523  return true;
 524}
 525
 526bool PDBSourceLineWriter::PrintPEInfo() {
 527  PEModuleInfo info;
 528  if (!GetPEInfo(&info)) {
 529    return false;
 530  }
 531
 532  fprintf(output_, "INFO CODE_ID %ws %ws\n",
 533	  info.code_identifier.c_str(),
 534	  info.code_file.c_str());
 535  return true;
 536}
 537
 538// wcstol_positive_strict is sort of like wcstol, but much stricter.  string
 539// should be a buffer pointing to a null-terminated string containing only
 540// decimal digits.  If the entire string can be converted to an integer
 541// without overflowing, and there are no non-digit characters before the
 542// result is set to the value and this function returns true.  Otherwise,
 543// this function returns false.  This is an alternative to the strtol, atoi,
 544// and scanf families, which are not as strict about input and in some cases
 545// don't provide a good way for the caller to determine if a conversion was
 546// successful.
 547static bool wcstol_positive_strict(wchar_t *string, int *result) {
 548  int value = 0;
 549  for (wchar_t *c = string; *c != '\0'; ++c) {
 550    int last_value = value;
 551    value *= 10;
 552    // Detect overflow.
 553    if (value / 10 != last_value || value < 0) {
 554      return false;
 555    }
 556    if (*c < '0' || *c > '9') {
 557      return false;
 558    }
 559    unsigned int c_value = *c - '0';
 560    last_value = value;
 561    value += c_value;
 562    // Detect overflow.
 563    if (value < last_value) {
 564      return false;
 565    }
 566    // Forbid leading zeroes unless the string is just "0".
 567    if (value == 0 && *(c+1) != '\0') {
 568      return false;
 569    }
 570  }
 571  *result = value;
 572  return true;
 573}
 574
 575bool PDBSourceLineWriter::FindPEFile() {
 576  CComPtr<IDiaSymbol> global;
 577  if (FAILED(session_->get_globalScope(&global))) {
 578    fprintf(stderr, "get_globalScope failed\n");
 579    return false;
 580  }
 581
 582  CComBSTR symbols_file;
 583  if (SUCCEEDED(global->get_symbolsFileName(&symbols_file))) {
 584    wstring file(symbols_file);
 585    
 586    // Look for an EXE or DLL file.
 587    const wchar_t *extensions[] = { L"exe", L"dll" };
 588    for (int i = 0; i < sizeof(extensions) / sizeof(extensions[0]); i++) {
 589      size_t dot_pos = file.find_last_of(L".");
 590      if (dot_pos != wstring::npos) {
 591	file.replace(dot_pos + 1, wstring::npos, extensions[i]);
 592	// Check if this file exists.
 593	if (GetFileAttributesW(file.c_str()) != INVALID_FILE_ATTRIBUTES) {
 594	  code_file_ = file;
 595	  return true;
 596	}
 597      }
 598    }
 599  }
 600
 601  return false;
 602}
 603
 604// static
 605bool PDBSourceLineWriter::GetSymbolFunctionName(IDiaSymbol *function,
 606                                                BSTR *name,
 607                                                int *stack_param_size) {
 608  *stack_param_size = -1;
 609  const DWORD undecorate_options = UNDNAME_NO_MS_KEYWORDS |
 610                                   UNDNAME_NO_FUNCTION_RETURNS |
 611                                   UNDNAME_NO_ALLOCATION_MODEL |
 612                                   UNDNAME_NO_ALLOCATION_LANGUAGE |
 613                                   UNDNAME_NO_THISTYPE |
 614                                   UNDNAME_NO_ACCESS_SPECIFIERS |
 615                                   UNDNAME_NO_THROW_SIGNATURES |
 616                                   UNDNAME_NO_MEMBER_TYPE |
 617                                   UNDNAME_NO_RETURN_UDT_MODEL |
 618                                   UNDNAME_NO_ECSU;
 619
 620  // Use get_undecoratedNameEx to get readable C++ names with arguments.
 621  if (function->get_undecoratedNameEx(undecorate_options, name) != S_OK) {
 622    if (function->get_name(name) != S_OK) {
 623      fprintf(stderr, "failed to get function name\n");
 624      return false;
 625    }
 626    // If a name comes from get_name because no undecorated form existed,
 627    // it's already formatted properly to be used as output.  Don't do any
 628    // additional processing.
 629    //
 630    // MSVC7's DIA seems to not undecorate names in as many cases as MSVC8's.
 631    // This will result in calling get_name for some C++ symbols, so
 632    // all of the parameter and return type information may not be included in
 633    // the name string.
 634  } else {
 635    // C++ uses a bogus "void" argument for functions and methods that don't
 636    // take any parameters.  Take it out of the undecorated name because it's
 637    // ugly and unnecessary.
 638    const wchar_t *replace_string = L"(void)";
 639    const size_t replace_length = wcslen(replace_string);
 640    const wchar_t *replacement_string = L"()";
 641    size_t length = wcslen(*name);
 642    if (length >= replace_length) {
 643      wchar_t *name_end = *name + length - replace_length;
 644      if (wcscmp(name_end, replace_string) == 0) {
 645        WindowsStringUtils::safe_wcscpy(name_end, replace_length,
 646                                        replacement_string);
 647        length = wcslen(*name);
 648      }
 649    }
 650
 651    // Undecorate names used for stdcall and fastcall.  These names prefix
 652    // the identifier with '_' (stdcall) or '@' (fastcall) and suffix it
 653    // with '@' followed by the number of bytes of parameters, in decimal.
 654    // If such a name is found, take note of the size and undecorate it.
 655    // Only do this for names that aren't C++, which is determined based on
 656    // whether the undecorated name contains any ':' or '(' characters.
 657    if (!wcschr(*name, ':') && !wcschr(*name, '(') &&
 658        (*name[0] == '_' || *name[0] == '@')) {
 659      wchar_t *last_at = wcsrchr(*name + 1, '@');
 660      if (last_at && wcstol_positive_strict(last_at + 1, stack_param_size)) {
 661        // If this function adheres to the fastcall convention, it accepts up
 662        // to the first 8 bytes of parameters in registers (%ecx and %edx).
 663        // We're only interested in the stack space used for parameters, so
 664        // so subtract 8 and don't let the size go below 0.
 665        if (*name[0] == '@') {
 666          if (*stack_param_size > 8) {
 667            *stack_param_size -= 8;
 668          } else {
 669            *stack_param_size = 0;
 670          }
 671        }
 672
 673        // Undecorate the name by moving it one character to the left in its
 674        // buffer, and terminating it where the last '@' had been.
 675        WindowsStringUtils::safe_wcsncpy(*name, length,
 676                                         *name + 1, last_at - *name - 1);
 677     } else if (*name[0] == '_') {
 678        // This symbol's name is encoded according to the cdecl rules.  The
 679        // name doesn't end in a '@' character followed by a decimal positive
 680        // integer, so it's not a stdcall name.  Strip off the leading
 681        // underscore.
 682        WindowsStringUtils::safe_wcsncpy(*name, length, *name + 1, length);
 683      }
 684    }
 685  }
 686
 687  return true;
 688}
 689
 690// static
 691int PDBSourceLineWriter::GetFunctionStackParamSize(IDiaSymbol *function) {
 692  // This implementation is highly x86-specific.
 693
 694  // Gather the symbols corresponding to data.
 695  CComPtr<IDiaEnumSymbols> data_children;
 696  if (FAILED(function->findChildren(SymTagData, NULL, nsNone,
 697                                    &data_children))) {
 698    return 0;
 699  }
 700
 701  // lowest_base is the lowest %ebp-relative byte offset used for a parameter.
 702  // highest_end is one greater than the highest offset (i.e. base + length).
 703  // Stack parameters are assumed to be contiguous, because in reality, they
 704  // are.
 705  int lowest_base = INT_MAX;
 706  int highest_end = INT_MIN;
 707
 708  CComPtr<IDiaSymbol> child;
 709  DWORD count;
 710  while (SUCCEEDED(data_children->Next(1, &child, &count)) && count == 1) {
 711    // If any operation fails at this point, just proceed to the next child.
 712    // Use the next_child label instead of continue because child needs to
 713    // be released before it's reused.  Declare constructable/destructable
 714    // types early to avoid gotos that cross initializations.
 715    CComPtr<IDiaSymbol> child_type;
 716
 717    // DataIsObjectPtr is only used for |this|.  Because |this| can be passed
 718    // as a stack parameter, look for it in addition to traditional
 719    // parameters.
 720    DWORD child_kind;
 721    if (FAILED(child->get_dataKind(&child_kind)) ||
 722        (child_kind != DataIsParam && child_kind != DataIsObjectPtr)) {
 723      goto next_child;
 724    }
 725
 726    // Only concentrate on register-relative parameters.  Parameters may also
 727    // be enregistered (passed directly in a register), but those don't
 728    // consume any stack space, so they're not of interest.
 729    DWORD child_location_type;
 730    if (FAILED(child->get_locationType(&child_location_type)) ||
 731        child_location_type != LocIsRegRel) {
 732      goto next_child;
 733    }
 734
 735    // Of register-relative parameters, the only ones that make any sense are
 736    // %ebp- or %esp-relative.  Note that MSVC's debugging information always
 737    // gives parameters as %ebp-relative even when a function doesn't use a
 738    // traditional frame pointer and stack parameters are accessed relative to
 739    // %esp, so just look for %ebp-relative parameters.  If you wanted to
 740    // access parameters, you'd probably want to treat these %ebp-relative
 741    // offsets as if they were relative to %esp before a function's prolog
 742    // executed.
 743    DWORD child_register;
 744    if (FAILED(child->get_registerId(&child_register)) ||
 745        child_register != CV_REG_EBP) {
 746      goto next_child;
 747    }
 748
 749    LONG child_register_offset;
 750    if (FAILED(child->get_offset(&child_register_offset))) {
 751      goto next_child;
 752    }
 753
 754    // IDiaSymbol::get_type can succeed but still pass back a NULL value.
 755    if (FAILED(child->get_type(&child_type)) || !child_type) {
 756      goto next_child;
 757    }
 758
 759    ULONGLONG child_length;
 760    if (FAILED(child_type->get_length(&child_length))) {
 761      goto next_child;
 762    }
 763
 764    int child_end = child_register_offset + static_cast<ULONG>(child_length);
 765    if (child_register_offset < lowest_base) {
 766      lowest_base = child_register_offset;
 767    }
 768    if (child_end > highest_end) {
 769      highest_end = child_end;
 770    }
 771
 772next_child:
 773    child.Release();
 774  }
 775
 776  int param_size = 0;
 777  // Make sure lowest_base isn't less than 4, because [%esp+4] is the lowest
 778  // possible address to find a stack parameter before executing a function's
 779  // prolog (see above).  Some optimizations cause parameter offsets to be
 780  // lower than 4, but we're not concerned with those because we're only
 781  // looking for parameters contained in addresses higher than where the
 782  // return address is stored.
 783  if (lowest_base < 4) {
 784    lowest_base = 4;
 785  }
 786  if (highest_end > lowest_base) {
 787    // All stack parameters are pushed as at least 4-byte quantities.  If the
 788    // last type was narrower than 4 bytes, promote it.  This assumes that all
 789    // parameters' offsets are 4-byte-aligned, which is always the case.  Only
 790    // worry about the last type, because we're not summing the type sizes,
 791    // just looking at the lowest and highest offsets.
 792    int remainder = highest_end % 4;
 793    if (remainder) {
 794      highest_end += 4 - remainder;
 795    }
 796
 797    param_size = highest_end - lowest_base;
 798  }
 799
 800  return param_size;
 801}
 802
 803bool PDBSourceLineWriter::WriteMap(FILE *map_file) {
 804  output_ = map_file;
 805
 806  bool ret = PrintPDBInfo();
 807  // This is not a critical piece of the symbol file.
 808  PrintPEInfo();
 809  ret = ret &&
 810    PrintSourceFiles() && 
 811    PrintFunctions() &&
 812    PrintFrameData();
 813
 814  output_ = NULL;
 815  return ret;
 816}
 817
 818void PDBSourceLineWriter::Close() {
 819  session_.Release();
 820}
 821
 822bool PDBSourceLineWriter::GetModuleInfo(PDBModuleInfo *info) {
 823  if (!info) {
 824    return false;
 825  }
 826
 827  info->debug_file.clear();
 828  info->debug_identifier.clear();
 829  info->cpu.clear();
 830
 831  CComPtr<IDiaSymbol> global;
 832  if (FAILED(session_->get_globalScope(&global))) {
 833    return false;
 834  }
 835
 836  DWORD machine_type;
 837  // get_machineType can return S_FALSE.
 838  if (global->get_machineType(&machine_type) == S_OK) {
 839    // The documentation claims that get_machineType returns a value from
 840    // the CV_CPU_TYPE_e enumeration, but that's not the case.
 841    // Instead, it returns one of the IMAGE_FILE_MACHINE values as
 842    // defined here:
 843    // http://msdn.microsoft.com/en-us/library/ms680313%28VS.85%29.aspx
 844    switch (machine_type) {
 845      case IMAGE_FILE_MACHINE_I386:
 846        info->cpu = L"x86";
 847        break;
 848      case IMAGE_FILE_MACHINE_AMD64:
 849        info->cpu = L"x86_64";
 850        break;
 851      default:
 852        info->cpu = L"unknown";
 853        break;
 854    }
 855  } else {
 856    // Unexpected, but handle gracefully.
 857    info->cpu = L"unknown";
 858  }
 859
 860  // DWORD* and int* are not compatible.  This is clean and avoids a cast.
 861  DWORD age;
 862  if (FAILED(global->get_age(&age))) {
 863    return false;
 864  }
 865
 866  bool uses_guid;
 867  if (!UsesGUID(&uses_guid)) {
 868    return false;
 869  }
 870
 871  if (uses_guid) {
 872    GUID guid;
 873    if (FAILED(global->get_guid(&guid))) {
 874      return false;
 875    }
 876
 877    // Use the same format that the MS symbol server uses in filesystem
 878    // hierarchies.
 879    wchar_t age_string[9];
 880    swprintf(age_string, sizeof(age_string) / sizeof(age_string[0]),
 881             L"%x", age);
 882
 883    // remove when VC++7.1 is no longer supported
 884    age_string[sizeof(age_string) / sizeof(age_string[0]) - 1] = L'\0';
 885
 886    info->debug_identifier = GUIDString::GUIDToSymbolServerWString(&guid);
 887    info->debug_identifier.append(age_string);
 888  } else {
 889    DWORD signature;
 890    if (FAILED(global->get_signature(&signature))) {
 891      return false;
 892    }
 893
 894    // Use the same format that the MS symbol server uses in filesystem
 895    // hierarchies.
 896    wchar_t identifier_string[17];
 897    swprintf(identifier_string,
 898             sizeof(identifier_string) / sizeof(identifier_string[0]),
 899             L"%08X%x", signature, age);
 900
 901    // remove when VC++7.1 is no longer supported
 902    identifier_string[sizeof(identifier_string) /
 903                      sizeof(identifier_string[0]) - 1] = L'\0';
 904
 905    info->debug_identifier = identifier_string;
 906  }
 907
 908  CComBSTR debug_file_string;
 909  if (FAILED(global->get_symbolsFileName(&debug_file_string))) {
 910    return false;
 911  }
 912  info->debug_file =
 913      WindowsStringUtils::GetBaseName(wstring(debug_file_string));
 914
 915  return true;
 916}
 917
 918bool PDBSourceLineWriter::GetPEInfo(PEModuleInfo *info) {
 919  if (!info) {
 920    return false;
 921  }
 922
 923  if (code_file_.empty() && !FindPEFile()) {
 924    fprintf(stderr, "Couldn't locate EXE or DLL file.\n");
 925    return false;
 926  }
 927
 928  // Convert wchar to native charset because ImageLoad only takes
 929  // a PSTR as input.
 930  string code_file;
 931  if (!WindowsStringUtils::safe_wcstombs(code_file_, &code_file)) {
 932    return false;
 933  }
 934
 935  AutoImage img(ImageLoad((PSTR)code_file.c_str(), NULL));
 936  if (!img) {
 937    fprintf(stderr, "Failed to open PE file: %s\n", code_file.c_str());
 938    return false;
 939  }
 940
 941  info->code_file = WindowsStringUtils::GetBaseName(code_file_);
 942
 943  // The date and time that the file was created by the linker.
 944  DWORD TimeDateStamp = img->FileHeader->FileHeader.TimeDateStamp;
 945  // The size of the file in bytes, including all headers.
 946  DWORD SizeOfImage = 0;
 947  PIMAGE_OPTIONAL_HEADER64 opt =
 948    &((PIMAGE_NT_HEADERS64)img->FileHeader)->OptionalHeader;
 949  if (opt->Magic == IMAGE_NT_OPTIONAL_HDR64_MAGIC) {
 950    // 64-bit PE file.
 951    SizeOfImage = opt->SizeOfImage;
 952  }
 953  else {
 954    // 32-bit PE file.
 955    SizeOfImage = img->FileHeader->OptionalHeader.SizeOfImage;
 956  }
 957  wchar_t code_identifier[32];
 958  swprintf(code_identifier,
 959	   sizeof(code_identifier) / sizeof(code_identifier[0]),
 960	   L"%08X%X", TimeDateStamp, SizeOfImage);
 961  info->code_identifier = code_identifier;
 962
 963  return true;
 964}
 965
 966bool PDBSourceLineWriter::UsesGUID(bool *uses_guid) {
 967  if (!uses_guid)
 968    return false;
 969
 970  CComPtr<IDiaSymbol> global;
 971  if (FAILED(session_->get_globalScope(&global)))
 972    return false;
 973
 974  GUID guid;
 975  if (FAILED(global->get_guid(&guid)))
 976    return false;
 977
 978  DWORD signature;
 979  if (FAILED(global->get_signature(&signature)))
 980    return false;
 981
 982  // There are two possibilities for guid: either it's a real 128-bit GUID
 983  // as identified in a code module by a new-style CodeView record, or it's
 984  // a 32-bit signature (timestamp) as identified by an old-style record.
 985  // See MDCVInfoPDB70 and MDCVInfoPDB20 in minidump_format.h.
 986  //
 987  // Because DIA doesn't provide a way to directly determine whether a module
 988  // uses a GUID or a 32-bit signature, this code checks whether the first 32
 989  // bits of guid are the same as the signature, and if the rest of guid is
 990  // zero.  If so, then with a pretty high degree of certainty, there's an
 991  // old-style CodeView record in use.  This method will only falsely find an
 992  // an old-style CodeView record if a real 128-bit GUID has its first 32
 993  // bits set the same as the module's signature (timestamp) and the rest of
 994  // the GUID is set to 0.  This is highly unlikely.
 995
 996  GUID signature_guid = {signature};  // 0-initializes other members
 997  *uses_guid = !IsEqualGUID(guid, signature_guid);
 998  return true;
 999}
1000
1001}  // namespace google_breakpad