/thirdparty/breakpad/processor/basic_source_line_resolver.cc

http://github.com/tomahawk-player/tomahawk · C++ · 451 lines · 273 code · 61 blank · 117 comment · 76 complexity · 66f3cefe891ea2e7388290ba9ce624a9 MD5 · raw file

  1. // Copyright (c) 2010 Google Inc.
  2. // All rights reserved.
  3. //
  4. // Redistribution and use in source and binary forms, with or without
  5. // modification, are permitted provided that the following conditions are
  6. // met:
  7. //
  8. // * Redistributions of source code must retain the above copyright
  9. // notice, this list of conditions and the following disclaimer.
  10. // * Redistributions in binary form must reproduce the above
  11. // copyright notice, this list of conditions and the following disclaimer
  12. // in the documentation and/or other materials provided with the
  13. // distribution.
  14. // * Neither the name of Google Inc. nor the names of its
  15. // contributors may be used to endorse or promote products derived from
  16. // this software without specific prior written permission.
  17. //
  18. // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
  19. // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
  20. // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
  21. // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
  22. // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
  23. // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
  24. // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  25. // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  26. // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  27. // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
  28. // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  29. //
  30. // basic_source_line_resolver.cc: BasicSourceLineResolver implementation.
  31. //
  32. // See basic_source_line_resolver.h and basic_source_line_resolver_types.h
  33. // for documentation.
  34. #include <stdio.h>
  35. #include <string.h>
  36. #include <sys/types.h>
  37. #include <sys/stat.h>
  38. #include <unistd.h>
  39. #include <map>
  40. #include <utility>
  41. #include <vector>
  42. #include "google_breakpad/processor/basic_source_line_resolver.h"
  43. #include "processor/basic_source_line_resolver_types.h"
  44. #include "processor/module_factory.h"
  45. #include "processor/tokenize.h"
  46. using std::map;
  47. using std::vector;
  48. using std::make_pair;
  49. namespace google_breakpad {
  50. static const char *kWhitespace = " \r\n";
  51. BasicSourceLineResolver::BasicSourceLineResolver() :
  52. SourceLineResolverBase(new BasicModuleFactory) { }
  53. bool BasicSourceLineResolver::Module::LoadMapFromMemory(char *memory_buffer) {
  54. linked_ptr<Function> cur_func;
  55. int line_number = 0;
  56. char *save_ptr;
  57. size_t map_buffer_length = strlen(memory_buffer);
  58. // If the length is 0, we can still pretend we have a symbol file. This is
  59. // for scenarios that want to test symbol lookup, but don't necessarily care
  60. // if certain modules do not have any information, like system libraries.
  61. if (map_buffer_length == 0) {
  62. return true;
  63. }
  64. if (memory_buffer[map_buffer_length - 1] == '\n') {
  65. memory_buffer[map_buffer_length - 1] = '\0';
  66. }
  67. char *buffer;
  68. buffer = strtok_r(memory_buffer, "\r\n", &save_ptr);
  69. while (buffer != NULL) {
  70. ++line_number;
  71. if (strncmp(buffer, "FILE ", 5) == 0) {
  72. if (!ParseFile(buffer)) {
  73. BPLOG(ERROR) << "ParseFile on buffer failed at " <<
  74. ":" << line_number;
  75. return false;
  76. }
  77. } else if (strncmp(buffer, "STACK ", 6) == 0) {
  78. if (!ParseStackInfo(buffer)) {
  79. BPLOG(ERROR) << "ParseStackInfo failed at " <<
  80. ":" << line_number;
  81. return false;
  82. }
  83. } else if (strncmp(buffer, "FUNC ", 5) == 0) {
  84. cur_func.reset(ParseFunction(buffer));
  85. if (!cur_func.get()) {
  86. BPLOG(ERROR) << "ParseFunction failed at " <<
  87. ":" << line_number;
  88. return false;
  89. }
  90. // StoreRange will fail if the function has an invalid address or size.
  91. // We'll silently ignore this, the function and any corresponding lines
  92. // will be destroyed when cur_func is released.
  93. functions_.StoreRange(cur_func->address, cur_func->size, cur_func);
  94. } else if (strncmp(buffer, "PUBLIC ", 7) == 0) {
  95. // Clear cur_func: public symbols don't contain line number information.
  96. cur_func.reset();
  97. if (!ParsePublicSymbol(buffer)) {
  98. BPLOG(ERROR) << "ParsePublicSymbol failed at " <<
  99. ":" << line_number;
  100. return false;
  101. }
  102. } else if (strncmp(buffer, "MODULE ", 7) == 0) {
  103. // Ignore these. They're not of any use to BasicSourceLineResolver,
  104. // which is fed modules by a SymbolSupplier. These lines are present to
  105. // aid other tools in properly placing symbol files so that they can
  106. // be accessed by a SymbolSupplier.
  107. //
  108. // MODULE <guid> <age> <filename>
  109. } else if (strncmp(buffer, "INFO ", 5) == 0) {
  110. // Ignore these as well, they're similarly just for housekeeping.
  111. //
  112. // INFO CODE_ID <code id> <filename>
  113. } else {
  114. if (!cur_func.get()) {
  115. BPLOG(ERROR) << "Found source line data without a function at " <<
  116. ":" << line_number;
  117. return false;
  118. }
  119. Line *line = ParseLine(buffer);
  120. if (!line) {
  121. BPLOG(ERROR) << "ParseLine failed at " << line_number << " for " <<
  122. buffer;
  123. return false;
  124. }
  125. cur_func->lines.StoreRange(line->address, line->size,
  126. linked_ptr<Line>(line));
  127. }
  128. buffer = strtok_r(NULL, "\r\n", &save_ptr);
  129. }
  130. return true;
  131. }
  132. void BasicSourceLineResolver::Module::LookupAddress(StackFrame *frame) const {
  133. MemAddr address = frame->instruction - frame->module->base_address();
  134. // First, look for a FUNC record that covers address. Use
  135. // RetrieveNearestRange instead of RetrieveRange so that, if there
  136. // is no such function, we can use the next function to bound the
  137. // extent of the PUBLIC symbol we find, below. This does mean we
  138. // need to check that address indeed falls within the function we
  139. // find; do the range comparison in an overflow-friendly way.
  140. linked_ptr<Function> func;
  141. linked_ptr<PublicSymbol> public_symbol;
  142. MemAddr function_base;
  143. MemAddr function_size;
  144. MemAddr public_address;
  145. if (functions_.RetrieveNearestRange(address, &func,
  146. &function_base, &function_size) &&
  147. address >= function_base && address - function_base < function_size) {
  148. frame->function_name = func->name;
  149. frame->function_base = frame->module->base_address() + function_base;
  150. linked_ptr<Line> line;
  151. MemAddr line_base;
  152. if (func->lines.RetrieveRange(address, &line, &line_base, NULL)) {
  153. FileMap::const_iterator it = files_.find(line->source_file_id);
  154. if (it != files_.end()) {
  155. frame->source_file_name = files_.find(line->source_file_id)->second;
  156. }
  157. frame->source_line = line->line;
  158. frame->source_line_base = frame->module->base_address() + line_base;
  159. }
  160. } else if (public_symbols_.Retrieve(address,
  161. &public_symbol, &public_address) &&
  162. (!func.get() || public_address > function_base)) {
  163. frame->function_name = public_symbol->name;
  164. frame->function_base = frame->module->base_address() + public_address;
  165. }
  166. }
  167. WindowsFrameInfo *BasicSourceLineResolver::Module::FindWindowsFrameInfo(
  168. const StackFrame *frame) const {
  169. MemAddr address = frame->instruction - frame->module->base_address();
  170. scoped_ptr<WindowsFrameInfo> result(new WindowsFrameInfo());
  171. // We only know about WindowsFrameInfo::STACK_INFO_FRAME_DATA and
  172. // WindowsFrameInfo::STACK_INFO_FPO. Prefer them in this order.
  173. // WindowsFrameInfo::STACK_INFO_FRAME_DATA is the newer type that
  174. // includes its own program string.
  175. // WindowsFrameInfo::STACK_INFO_FPO is the older type
  176. // corresponding to the FPO_DATA struct. See stackwalker_x86.cc.
  177. linked_ptr<WindowsFrameInfo> frame_info;
  178. if ((windows_frame_info_[WindowsFrameInfo::STACK_INFO_FRAME_DATA]
  179. .RetrieveRange(address, &frame_info))
  180. || (windows_frame_info_[WindowsFrameInfo::STACK_INFO_FPO]
  181. .RetrieveRange(address, &frame_info))) {
  182. result->CopyFrom(*frame_info.get());
  183. return result.release();
  184. }
  185. // Even without a relevant STACK line, many functions contain
  186. // information about how much space their parameters consume on the
  187. // stack. Use RetrieveNearestRange instead of RetrieveRange, so that
  188. // we can use the function to bound the extent of the PUBLIC symbol,
  189. // below. However, this does mean we need to check that ADDRESS
  190. // falls within the retrieved function's range; do the range
  191. // comparison in an overflow-friendly way.
  192. linked_ptr<Function> function;
  193. MemAddr function_base, function_size;
  194. if (functions_.RetrieveNearestRange(address, &function,
  195. &function_base, &function_size) &&
  196. address >= function_base && address - function_base < function_size) {
  197. result->parameter_size = function->parameter_size;
  198. result->valid |= WindowsFrameInfo::VALID_PARAMETER_SIZE;
  199. return result.release();
  200. }
  201. // PUBLIC symbols might have a parameter size. Use the function we
  202. // found above to limit the range the public symbol covers.
  203. linked_ptr<PublicSymbol> public_symbol;
  204. MemAddr public_address;
  205. if (public_symbols_.Retrieve(address, &public_symbol, &public_address) &&
  206. (!function.get() || public_address > function_base)) {
  207. result->parameter_size = public_symbol->parameter_size;
  208. }
  209. return NULL;
  210. }
  211. CFIFrameInfo *BasicSourceLineResolver::Module::FindCFIFrameInfo(
  212. const StackFrame *frame) const {
  213. MemAddr address = frame->instruction - frame->module->base_address();
  214. MemAddr initial_base, initial_size;
  215. string initial_rules;
  216. // Find the initial rule whose range covers this address. That
  217. // provides an initial set of register recovery rules. Then, walk
  218. // forward from the initial rule's starting address to frame's
  219. // instruction address, applying delta rules.
  220. if (!cfi_initial_rules_.RetrieveRange(address, &initial_rules,
  221. &initial_base, &initial_size)) {
  222. return NULL;
  223. }
  224. // Create a frame info structure, and populate it with the rules from
  225. // the STACK CFI INIT record.
  226. scoped_ptr<CFIFrameInfo> rules(new CFIFrameInfo());
  227. if (!ParseCFIRuleSet(initial_rules, rules.get()))
  228. return NULL;
  229. // Find the first delta rule that falls within the initial rule's range.
  230. map<MemAddr, string>::const_iterator delta =
  231. cfi_delta_rules_.lower_bound(initial_base);
  232. // Apply delta rules up to and including the frame's address.
  233. while (delta != cfi_delta_rules_.end() && delta->first <= address) {
  234. ParseCFIRuleSet(delta->second, rules.get());
  235. delta++;
  236. }
  237. return rules.release();
  238. }
  239. bool BasicSourceLineResolver::Module::ParseFile(char *file_line) {
  240. // FILE <id> <filename>
  241. file_line += 5; // skip prefix
  242. vector<char*> tokens;
  243. if (!Tokenize(file_line, kWhitespace, 2, &tokens)) {
  244. return false;
  245. }
  246. int index = atoi(tokens[0]);
  247. if (index < 0) {
  248. return false;
  249. }
  250. char *filename = tokens[1];
  251. if (!filename) {
  252. return false;
  253. }
  254. files_.insert(make_pair(index, string(filename)));
  255. return true;
  256. }
  257. BasicSourceLineResolver::Function*
  258. BasicSourceLineResolver::Module::ParseFunction(char *function_line) {
  259. // FUNC <address> <size> <stack_param_size> <name>
  260. function_line += 5; // skip prefix
  261. vector<char*> tokens;
  262. if (!Tokenize(function_line, kWhitespace, 4, &tokens)) {
  263. return NULL;
  264. }
  265. u_int64_t address = strtoull(tokens[0], NULL, 16);
  266. u_int64_t size = strtoull(tokens[1], NULL, 16);
  267. int stack_param_size = strtoull(tokens[2], NULL, 16);
  268. char *name = tokens[3];
  269. return new Function(name, address, size, stack_param_size);
  270. }
  271. BasicSourceLineResolver::Line* BasicSourceLineResolver::Module::ParseLine(
  272. char *line_line) {
  273. // <address> <line number> <source file id>
  274. vector<char*> tokens;
  275. if (!Tokenize(line_line, kWhitespace, 4, &tokens)) {
  276. return NULL;
  277. }
  278. u_int64_t address = strtoull(tokens[0], NULL, 16);
  279. u_int64_t size = strtoull(tokens[1], NULL, 16);
  280. int line_number = atoi(tokens[2]);
  281. int source_file = atoi(tokens[3]);
  282. if (line_number <= 0) {
  283. return NULL;
  284. }
  285. return new Line(address, size, source_file, line_number);
  286. }
  287. bool BasicSourceLineResolver::Module::ParsePublicSymbol(char *public_line) {
  288. // PUBLIC <address> <stack_param_size> <name>
  289. // Skip "PUBLIC " prefix.
  290. public_line += 7;
  291. vector<char*> tokens;
  292. if (!Tokenize(public_line, kWhitespace, 3, &tokens)) {
  293. return false;
  294. }
  295. u_int64_t address = strtoull(tokens[0], NULL, 16);
  296. int stack_param_size = strtoull(tokens[1], NULL, 16);
  297. char *name = tokens[2];
  298. // A few public symbols show up with an address of 0. This has been seen
  299. // in the dumped output of ntdll.pdb for symbols such as _CIlog, _CIpow,
  300. // RtlDescribeChunkLZNT1, and RtlReserveChunkLZNT1. They would conflict
  301. // with one another if they were allowed into the public_symbols_ map,
  302. // but since the address is obviously invalid, gracefully accept them
  303. // as input without putting them into the map.
  304. if (address == 0) {
  305. return true;
  306. }
  307. linked_ptr<PublicSymbol> symbol(new PublicSymbol(name, address,
  308. stack_param_size));
  309. return public_symbols_.Store(address, symbol);
  310. }
  311. bool BasicSourceLineResolver::Module::ParseStackInfo(char *stack_info_line) {
  312. // Skip "STACK " prefix.
  313. stack_info_line += 6;
  314. // Find the token indicating what sort of stack frame walking
  315. // information this is.
  316. while (*stack_info_line == ' ')
  317. stack_info_line++;
  318. const char *platform = stack_info_line;
  319. while (!strchr(kWhitespace, *stack_info_line))
  320. stack_info_line++;
  321. *stack_info_line++ = '\0';
  322. // MSVC stack frame info.
  323. if (strcmp(platform, "WIN") == 0) {
  324. int type = 0;
  325. u_int64_t rva, code_size;
  326. linked_ptr<WindowsFrameInfo>
  327. stack_frame_info(WindowsFrameInfo::ParseFromString(stack_info_line,
  328. type,
  329. rva,
  330. code_size));
  331. if (stack_frame_info == NULL)
  332. return false;
  333. // TODO(mmentovai): I wanted to use StoreRange's return value as this
  334. // method's return value, but MSVC infrequently outputs stack info that
  335. // violates the containment rules. This happens with a section of code
  336. // in strncpy_s in test_app.cc (testdata/minidump2). There, problem looks
  337. // like this:
  338. // STACK WIN 4 4242 1a a 0 ... (STACK WIN 4 base size prolog 0 ...)
  339. // STACK WIN 4 4243 2e 9 0 ...
  340. // ContainedRangeMap treats these two blocks as conflicting. In reality,
  341. // when the prolog lengths are taken into account, the actual code of
  342. // these blocks doesn't conflict. However, we can't take the prolog lengths
  343. // into account directly here because we'd wind up with a different set
  344. // of range conflicts when MSVC outputs stack info like this:
  345. // STACK WIN 4 1040 73 33 0 ...
  346. // STACK WIN 4 105a 59 19 0 ...
  347. // because in both of these entries, the beginning of the code after the
  348. // prolog is at 0x1073, and the last byte of contained code is at 0x10b2.
  349. // Perhaps we could get away with storing ranges by rva + prolog_size
  350. // if ContainedRangeMap were modified to allow replacement of
  351. // already-stored values.
  352. windows_frame_info_[type].StoreRange(rva, code_size, stack_frame_info);
  353. return true;
  354. } else if (strcmp(platform, "CFI") == 0) {
  355. // DWARF CFI stack frame info
  356. return ParseCFIFrameInfo(stack_info_line);
  357. } else {
  358. // Something unrecognized.
  359. return false;
  360. }
  361. }
  362. bool BasicSourceLineResolver::Module::ParseCFIFrameInfo(
  363. char *stack_info_line) {
  364. char *cursor;
  365. // Is this an INIT record or a delta record?
  366. char *init_or_address = strtok_r(stack_info_line, " \r\n", &cursor);
  367. if (!init_or_address)
  368. return false;
  369. if (strcmp(init_or_address, "INIT") == 0) {
  370. // This record has the form "STACK INIT <address> <size> <rules...>".
  371. char *address_field = strtok_r(NULL, " \r\n", &cursor);
  372. if (!address_field) return false;
  373. char *size_field = strtok_r(NULL, " \r\n", &cursor);
  374. if (!size_field) return false;
  375. char *initial_rules = strtok_r(NULL, "\r\n", &cursor);
  376. if (!initial_rules) return false;
  377. MemAddr address = strtoul(address_field, NULL, 16);
  378. MemAddr size = strtoul(size_field, NULL, 16);
  379. cfi_initial_rules_.StoreRange(address, size, initial_rules);
  380. return true;
  381. }
  382. // This record has the form "STACK <address> <rules...>".
  383. char *address_field = init_or_address;
  384. char *delta_rules = strtok_r(NULL, "\r\n", &cursor);
  385. if (!delta_rules) return false;
  386. MemAddr address = strtoul(address_field, NULL, 16);
  387. cfi_delta_rules_[address] = delta_rules;
  388. return true;
  389. }
  390. } // namespace google_breakpad