PageRenderTime 33ms CodeModel.GetById 2ms app.highlight 26ms RepoModel.GetById 1ms app.codeStats 1ms

/thirdparty/breakpad/processor/basic_source_line_resolver.cc

http://github.com/tomahawk-player/tomahawk
C++ | 451 lines | 273 code | 61 blank | 117 comment | 76 complexity | 66f3cefe891ea2e7388290ba9ce624a9 MD5 | raw file
  1// Copyright (c) 2010 Google Inc.
  2// All rights reserved.
  3//
  4// Redistribution and use in source and binary forms, with or without
  5// modification, are permitted provided that the following conditions are
  6// met:
  7//
  8//     * Redistributions of source code must retain the above copyright
  9// notice, this list of conditions and the following disclaimer.
 10//     * Redistributions in binary form must reproduce the above
 11// copyright notice, this list of conditions and the following disclaimer
 12// in the documentation and/or other materials provided with the
 13// distribution.
 14//     * Neither the name of Google Inc. nor the names of its
 15// contributors may be used to endorse or promote products derived from
 16// this software without specific prior written permission.
 17//
 18// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
 19// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
 20// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
 21// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
 22// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
 23// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
 24// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
 25// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
 26// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 27// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 28// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 29//
 30// basic_source_line_resolver.cc: BasicSourceLineResolver implementation.
 31//
 32// See basic_source_line_resolver.h and basic_source_line_resolver_types.h
 33// for documentation.
 34
 35
 36#include <stdio.h>
 37#include <string.h>
 38#include <sys/types.h>
 39#include <sys/stat.h>
 40#include <unistd.h>
 41
 42#include <map>
 43#include <utility>
 44#include <vector>
 45
 46#include "google_breakpad/processor/basic_source_line_resolver.h"
 47#include "processor/basic_source_line_resolver_types.h"
 48#include "processor/module_factory.h"
 49
 50#include "processor/tokenize.h"
 51
 52using std::map;
 53using std::vector;
 54using std::make_pair;
 55
 56namespace google_breakpad {
 57
 58static const char *kWhitespace = " \r\n";
 59
 60BasicSourceLineResolver::BasicSourceLineResolver() :
 61    SourceLineResolverBase(new BasicModuleFactory) { }
 62
 63bool BasicSourceLineResolver::Module::LoadMapFromMemory(char *memory_buffer) {
 64  linked_ptr<Function> cur_func;
 65  int line_number = 0;
 66  char *save_ptr;
 67  size_t map_buffer_length = strlen(memory_buffer);
 68
 69  // If the length is 0, we can still pretend we have a symbol file. This is
 70  // for scenarios that want to test symbol lookup, but don't necessarily care
 71  // if certain modules do not have any information, like system libraries.
 72  if (map_buffer_length == 0) {
 73    return true;
 74  }
 75
 76  if (memory_buffer[map_buffer_length - 1] == '\n') {
 77    memory_buffer[map_buffer_length - 1] = '\0';
 78  }
 79
 80  char *buffer;
 81  buffer = strtok_r(memory_buffer, "\r\n", &save_ptr);
 82
 83  while (buffer != NULL) {
 84    ++line_number;
 85
 86    if (strncmp(buffer, "FILE ", 5) == 0) {
 87      if (!ParseFile(buffer)) {
 88        BPLOG(ERROR) << "ParseFile on buffer failed at " <<
 89            ":" << line_number;
 90        return false;
 91      }
 92    } else if (strncmp(buffer, "STACK ", 6) == 0) {
 93      if (!ParseStackInfo(buffer)) {
 94        BPLOG(ERROR) << "ParseStackInfo failed at " <<
 95            ":" << line_number;
 96        return false;
 97      }
 98    } else if (strncmp(buffer, "FUNC ", 5) == 0) {
 99      cur_func.reset(ParseFunction(buffer));
100      if (!cur_func.get()) {
101        BPLOG(ERROR) << "ParseFunction failed at " <<
102            ":" << line_number;
103        return false;
104      }
105      // StoreRange will fail if the function has an invalid address or size.
106      // We'll silently ignore this, the function and any corresponding lines
107      // will be destroyed when cur_func is released.
108      functions_.StoreRange(cur_func->address, cur_func->size, cur_func);
109    } else if (strncmp(buffer, "PUBLIC ", 7) == 0) {
110      // Clear cur_func: public symbols don't contain line number information.
111      cur_func.reset();
112
113      if (!ParsePublicSymbol(buffer)) {
114        BPLOG(ERROR) << "ParsePublicSymbol failed at " <<
115            ":" << line_number;
116        return false;
117      }
118    } else if (strncmp(buffer, "MODULE ", 7) == 0) {
119      // Ignore these.  They're not of any use to BasicSourceLineResolver,
120      // which is fed modules by a SymbolSupplier.  These lines are present to
121      // aid other tools in properly placing symbol files so that they can
122      // be accessed by a SymbolSupplier.
123      //
124      // MODULE <guid> <age> <filename>
125    } else if (strncmp(buffer, "INFO ", 5) == 0) {
126      // Ignore these as well, they're similarly just for housekeeping.
127      //
128      // INFO CODE_ID <code id> <filename>
129    } else {
130      if (!cur_func.get()) {
131        BPLOG(ERROR) << "Found source line data without a function at " <<
132            ":" << line_number;
133        return false;
134      }
135      Line *line = ParseLine(buffer);
136      if (!line) {
137        BPLOG(ERROR) << "ParseLine failed at " << line_number << " for " <<
138            buffer;
139        return false;
140      }
141      cur_func->lines.StoreRange(line->address, line->size,
142                                 linked_ptr<Line>(line));
143    }
144    buffer = strtok_r(NULL, "\r\n", &save_ptr);
145  }
146  return true;
147}
148
149void BasicSourceLineResolver::Module::LookupAddress(StackFrame *frame) const {
150  MemAddr address = frame->instruction - frame->module->base_address();
151
152  // First, look for a FUNC record that covers address. Use
153  // RetrieveNearestRange instead of RetrieveRange so that, if there
154  // is no such function, we can use the next function to bound the
155  // extent of the PUBLIC symbol we find, below. This does mean we
156  // need to check that address indeed falls within the function we
157  // find; do the range comparison in an overflow-friendly way.
158  linked_ptr<Function> func;
159  linked_ptr<PublicSymbol> public_symbol;
160  MemAddr function_base;
161  MemAddr function_size;
162  MemAddr public_address;
163  if (functions_.RetrieveNearestRange(address, &func,
164                                      &function_base, &function_size) &&
165      address >= function_base && address - function_base < function_size) {
166    frame->function_name = func->name;
167    frame->function_base = frame->module->base_address() + function_base;
168
169    linked_ptr<Line> line;
170    MemAddr line_base;
171    if (func->lines.RetrieveRange(address, &line, &line_base, NULL)) {
172      FileMap::const_iterator it = files_.find(line->source_file_id);
173      if (it != files_.end()) {
174        frame->source_file_name = files_.find(line->source_file_id)->second;
175      }
176      frame->source_line = line->line;
177      frame->source_line_base = frame->module->base_address() + line_base;
178    }
179  } else if (public_symbols_.Retrieve(address,
180                                      &public_symbol, &public_address) &&
181             (!func.get() || public_address > function_base)) {
182    frame->function_name = public_symbol->name;
183    frame->function_base = frame->module->base_address() + public_address;
184  }
185}
186
187WindowsFrameInfo *BasicSourceLineResolver::Module::FindWindowsFrameInfo(
188    const StackFrame *frame) const {
189  MemAddr address = frame->instruction - frame->module->base_address();
190  scoped_ptr<WindowsFrameInfo> result(new WindowsFrameInfo());
191
192  // We only know about WindowsFrameInfo::STACK_INFO_FRAME_DATA and
193  // WindowsFrameInfo::STACK_INFO_FPO. Prefer them in this order.
194  // WindowsFrameInfo::STACK_INFO_FRAME_DATA is the newer type that
195  // includes its own program string.
196  // WindowsFrameInfo::STACK_INFO_FPO is the older type
197  // corresponding to the FPO_DATA struct. See stackwalker_x86.cc.
198  linked_ptr<WindowsFrameInfo> frame_info;
199  if ((windows_frame_info_[WindowsFrameInfo::STACK_INFO_FRAME_DATA]
200       .RetrieveRange(address, &frame_info))
201      || (windows_frame_info_[WindowsFrameInfo::STACK_INFO_FPO]
202          .RetrieveRange(address, &frame_info))) {
203    result->CopyFrom(*frame_info.get());
204    return result.release();
205  }
206
207  // Even without a relevant STACK line, many functions contain
208  // information about how much space their parameters consume on the
209  // stack. Use RetrieveNearestRange instead of RetrieveRange, so that
210  // we can use the function to bound the extent of the PUBLIC symbol,
211  // below. However, this does mean we need to check that ADDRESS
212  // falls within the retrieved function's range; do the range
213  // comparison in an overflow-friendly way.
214  linked_ptr<Function> function;
215  MemAddr function_base, function_size;
216  if (functions_.RetrieveNearestRange(address, &function,
217                                      &function_base, &function_size) &&
218      address >= function_base && address - function_base < function_size) {
219    result->parameter_size = function->parameter_size;
220    result->valid |= WindowsFrameInfo::VALID_PARAMETER_SIZE;
221    return result.release();
222  }
223
224  // PUBLIC symbols might have a parameter size. Use the function we
225  // found above to limit the range the public symbol covers.
226  linked_ptr<PublicSymbol> public_symbol;
227  MemAddr public_address;
228  if (public_symbols_.Retrieve(address, &public_symbol, &public_address) &&
229      (!function.get() || public_address > function_base)) {
230    result->parameter_size = public_symbol->parameter_size;
231  }
232
233  return NULL;
234}
235
236CFIFrameInfo *BasicSourceLineResolver::Module::FindCFIFrameInfo(
237    const StackFrame *frame) const {
238  MemAddr address = frame->instruction - frame->module->base_address();
239  MemAddr initial_base, initial_size;
240  string initial_rules;
241
242  // Find the initial rule whose range covers this address. That
243  // provides an initial set of register recovery rules. Then, walk
244  // forward from the initial rule's starting address to frame's
245  // instruction address, applying delta rules.
246  if (!cfi_initial_rules_.RetrieveRange(address, &initial_rules,
247                                        &initial_base, &initial_size)) {
248    return NULL;
249  }
250
251  // Create a frame info structure, and populate it with the rules from
252  // the STACK CFI INIT record.
253  scoped_ptr<CFIFrameInfo> rules(new CFIFrameInfo());
254  if (!ParseCFIRuleSet(initial_rules, rules.get()))
255    return NULL;
256
257  // Find the first delta rule that falls within the initial rule's range.
258  map<MemAddr, string>::const_iterator delta =
259    cfi_delta_rules_.lower_bound(initial_base);
260
261  // Apply delta rules up to and including the frame's address.
262  while (delta != cfi_delta_rules_.end() && delta->first <= address) {
263    ParseCFIRuleSet(delta->second, rules.get());
264    delta++;
265  }
266
267  return rules.release();
268}
269
270bool BasicSourceLineResolver::Module::ParseFile(char *file_line) {
271  // FILE <id> <filename>
272  file_line += 5;  // skip prefix
273
274  vector<char*> tokens;
275  if (!Tokenize(file_line, kWhitespace, 2, &tokens)) {
276    return false;
277  }
278
279  int index = atoi(tokens[0]);
280  if (index < 0) {
281    return false;
282  }
283
284  char *filename = tokens[1];
285  if (!filename) {
286    return false;
287  }
288
289  files_.insert(make_pair(index, string(filename)));
290  return true;
291}
292
293BasicSourceLineResolver::Function*
294BasicSourceLineResolver::Module::ParseFunction(char *function_line) {
295  // FUNC <address> <size> <stack_param_size> <name>
296  function_line += 5;  // skip prefix
297
298  vector<char*> tokens;
299  if (!Tokenize(function_line, kWhitespace, 4, &tokens)) {
300    return NULL;
301  }
302
303  u_int64_t address    = strtoull(tokens[0], NULL, 16);
304  u_int64_t size       = strtoull(tokens[1], NULL, 16);
305  int stack_param_size = strtoull(tokens[2], NULL, 16);
306  char *name           = tokens[3];
307
308  return new Function(name, address, size, stack_param_size);
309}
310
311BasicSourceLineResolver::Line* BasicSourceLineResolver::Module::ParseLine(
312    char *line_line) {
313  // <address> <line number> <source file id>
314  vector<char*> tokens;
315  if (!Tokenize(line_line, kWhitespace, 4, &tokens)) {
316    return NULL;
317  }
318
319  u_int64_t address = strtoull(tokens[0], NULL, 16);
320  u_int64_t size    = strtoull(tokens[1], NULL, 16);
321  int line_number   = atoi(tokens[2]);
322  int source_file   = atoi(tokens[3]);
323  if (line_number <= 0) {
324    return NULL;
325  }
326
327  return new Line(address, size, source_file, line_number);
328}
329
330bool BasicSourceLineResolver::Module::ParsePublicSymbol(char *public_line) {
331  // PUBLIC <address> <stack_param_size> <name>
332
333  // Skip "PUBLIC " prefix.
334  public_line += 7;
335
336  vector<char*> tokens;
337  if (!Tokenize(public_line, kWhitespace, 3, &tokens)) {
338    return false;
339  }
340
341  u_int64_t address    = strtoull(tokens[0], NULL, 16);
342  int stack_param_size = strtoull(tokens[1], NULL, 16);
343  char *name           = tokens[2];
344
345  // A few public symbols show up with an address of 0.  This has been seen
346  // in the dumped output of ntdll.pdb for symbols such as _CIlog, _CIpow,
347  // RtlDescribeChunkLZNT1, and RtlReserveChunkLZNT1.  They would conflict
348  // with one another if they were allowed into the public_symbols_ map,
349  // but since the address is obviously invalid, gracefully accept them
350  // as input without putting them into the map.
351  if (address == 0) {
352    return true;
353  }
354
355  linked_ptr<PublicSymbol> symbol(new PublicSymbol(name, address,
356                                                   stack_param_size));
357  return public_symbols_.Store(address, symbol);
358}
359
360bool BasicSourceLineResolver::Module::ParseStackInfo(char *stack_info_line) {
361  // Skip "STACK " prefix.
362  stack_info_line += 6;
363
364  // Find the token indicating what sort of stack frame walking
365  // information this is.
366  while (*stack_info_line == ' ')
367    stack_info_line++;
368  const char *platform = stack_info_line;
369  while (!strchr(kWhitespace, *stack_info_line))
370    stack_info_line++;
371  *stack_info_line++ = '\0';
372
373  // MSVC stack frame info.
374  if (strcmp(platform, "WIN") == 0) {
375    int type = 0;
376    u_int64_t rva, code_size;
377    linked_ptr<WindowsFrameInfo>
378      stack_frame_info(WindowsFrameInfo::ParseFromString(stack_info_line,
379                                                         type,
380                                                         rva,
381                                                         code_size));
382    if (stack_frame_info == NULL)
383      return false;
384
385    // TODO(mmentovai): I wanted to use StoreRange's return value as this
386    // method's return value, but MSVC infrequently outputs stack info that
387    // violates the containment rules.  This happens with a section of code
388    // in strncpy_s in test_app.cc (testdata/minidump2).  There, problem looks
389    // like this:
390    //   STACK WIN 4 4242 1a a 0 ...  (STACK WIN 4 base size prolog 0 ...)
391    //   STACK WIN 4 4243 2e 9 0 ...
392    // ContainedRangeMap treats these two blocks as conflicting.  In reality,
393    // when the prolog lengths are taken into account, the actual code of
394    // these blocks doesn't conflict.  However, we can't take the prolog lengths
395    // into account directly here because we'd wind up with a different set
396    // of range conflicts when MSVC outputs stack info like this:
397    //   STACK WIN 4 1040 73 33 0 ...
398    //   STACK WIN 4 105a 59 19 0 ...
399    // because in both of these entries, the beginning of the code after the
400    // prolog is at 0x1073, and the last byte of contained code is at 0x10b2.
401    // Perhaps we could get away with storing ranges by rva + prolog_size
402    // if ContainedRangeMap were modified to allow replacement of
403    // already-stored values.
404
405    windows_frame_info_[type].StoreRange(rva, code_size, stack_frame_info);
406    return true;
407  } else if (strcmp(platform, "CFI") == 0) {
408    // DWARF CFI stack frame info
409    return ParseCFIFrameInfo(stack_info_line);
410  } else {
411    // Something unrecognized.
412    return false;
413  }
414}
415
416bool BasicSourceLineResolver::Module::ParseCFIFrameInfo(
417    char *stack_info_line) {
418  char *cursor;
419
420  // Is this an INIT record or a delta record?
421  char *init_or_address = strtok_r(stack_info_line, " \r\n", &cursor);
422  if (!init_or_address)
423    return false;
424
425  if (strcmp(init_or_address, "INIT") == 0) {
426    // This record has the form "STACK INIT <address> <size> <rules...>".
427    char *address_field = strtok_r(NULL, " \r\n", &cursor);
428    if (!address_field) return false;
429
430    char *size_field = strtok_r(NULL, " \r\n", &cursor);
431    if (!size_field) return false;
432
433    char *initial_rules = strtok_r(NULL, "\r\n", &cursor);
434    if (!initial_rules) return false;
435
436    MemAddr address = strtoul(address_field, NULL, 16);
437    MemAddr size    = strtoul(size_field,    NULL, 16);
438    cfi_initial_rules_.StoreRange(address, size, initial_rules);
439    return true;
440  }
441
442  // This record has the form "STACK <address> <rules...>".
443  char *address_field = init_or_address;
444  char *delta_rules = strtok_r(NULL, "\r\n", &cursor);
445  if (!delta_rules) return false;
446  MemAddr address = strtoul(address_field, NULL, 16);
447  cfi_delta_rules_[address] = delta_rules;
448  return true;
449}
450
451}  // namespace google_breakpad