/thirdparty/breakpad/common/stabs_reader.h

http://github.com/tomahawk-player/tomahawk · C++ Header · 325 lines · 89 code · 53 blank · 183 comment · 0 complexity · af743b7c3133b2c521108a736526443f MD5 · raw file

  1. // -*- mode: c++ -*-
  2. // Copyright (c) 2010 Google Inc. All Rights Reserved.
  3. //
  4. // Redistribution and use in source and binary forms, with or without
  5. // modification, are permitted provided that the following conditions are
  6. // met:
  7. //
  8. // * Redistributions of source code must retain the above copyright
  9. // notice, this list of conditions and the following disclaimer.
  10. // * Redistributions in binary form must reproduce the above
  11. // copyright notice, this list of conditions and the following disclaimer
  12. // in the documentation and/or other materials provided with the
  13. // distribution.
  14. // * Neither the name of Google Inc. nor the names of its
  15. // contributors may be used to endorse or promote products derived from
  16. // this software without specific prior written permission.
  17. //
  18. // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
  19. // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
  20. // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
  21. // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
  22. // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
  23. // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
  24. // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  25. // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  26. // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  27. // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
  28. // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  29. // Original author: Jim Blandy <jimb@mozilla.com> <jimb@red-bean.com>
  30. // stabs_reader.h: Define StabsReader, a parser for STABS debugging
  31. // information. A description of the STABS debugging format can be
  32. // found at:
  33. //
  34. // http://sourceware.org/gdb/current/onlinedocs/stabs_toc.html
  35. //
  36. // The comments here assume you understand the format.
  37. //
  38. // This parser can handle big-endian and little-endian data, and the symbol
  39. // values may be either 32 or 64 bits long. It handles both STABS in
  40. // sections (as used on Linux) and STABS appearing directly in an
  41. // a.out-like symbol table (as used in Darwin OS X Mach-O files).
  42. #ifndef COMMON_STABS_READER_H__
  43. #define COMMON_STABS_READER_H__
  44. #include <stddef.h>
  45. #include <stdint.h>
  46. #ifdef HAVE_CONFIG_H
  47. #include <config.h>
  48. #endif
  49. #ifdef HAVE_A_OUT_H
  50. #include <a.out.h>
  51. #endif
  52. #ifdef HAVE_MACH_O_NLIST_H
  53. #include <mach-o/nlist.h>
  54. #endif
  55. #include <string>
  56. #include <vector>
  57. #include "common/byte_cursor.h"
  58. namespace google_breakpad {
  59. class StabsHandler;
  60. class StabsReader {
  61. public:
  62. // Create a reader for the STABS debug information whose .stab section is
  63. // being traversed by ITERATOR, and whose .stabstr section is referred to
  64. // by STRINGS. The reader will call the member functions of HANDLER to
  65. // report the information it finds, when the reader's 'Process' member
  66. // function is called.
  67. //
  68. // BIG_ENDIAN should be true if the entries in the .stab section are in
  69. // big-endian form, or false if they are in little-endian form.
  70. //
  71. // VALUE_SIZE should be either 4 or 8, indicating the size of the 'value'
  72. // field in each entry in bytes.
  73. //
  74. // UNITIZED should be true if the STABS data is stored in units with
  75. // N_UNDF headers. This is usually the case for STABS stored in sections,
  76. // like .stab/.stabstr, and usually not the case for STABS stored in the
  77. // actual symbol table; UNITIZED should be true when parsing Linux stabs,
  78. // false when parsing Mac OS X STABS. For details, see:
  79. // http://sourceware.org/gdb/current/onlinedocs/stabs/Stab-Section-Basics.html
  80. //
  81. // Note that, in ELF, the .stabstr section should be found using the
  82. // 'sh_link' field of the .stab section header, not by name.
  83. StabsReader(const uint8_t *stab, size_t stab_size,
  84. const uint8_t *stabstr, size_t stabstr_size,
  85. bool big_endian, size_t value_size, bool unitized,
  86. StabsHandler *handler);
  87. // Process the STABS data, calling the handler's member functions to
  88. // report what we find. While the handler functions return true,
  89. // continue to process until we reach the end of the section. If we
  90. // processed the entire section and all handlers returned true,
  91. // return true. If any handler returned false, return false.
  92. //
  93. // This is only meant to be called once per StabsReader instance;
  94. // resuming a prior processing pass that stopped abruptly isn't supported.
  95. bool Process();
  96. private:
  97. // An class for walking arrays of STABS entries. This isolates the main
  98. // STABS reader from the exact format (size; endianness) of the entries
  99. // themselves.
  100. class EntryIterator {
  101. public:
  102. // The contents of a STABS entry, adjusted for the host's endianness,
  103. // word size, 'struct nlist' layout, and so on.
  104. struct Entry {
  105. // True if this iterator has reached the end of the entry array. When
  106. // this is set, the other members of this structure are not valid.
  107. bool at_end;
  108. // The number of this entry within the list.
  109. size_t index;
  110. // The current entry's name offset. This is the offset within the
  111. // current compilation unit's strings, as establish by the N_UNDF entries.
  112. size_t name_offset;
  113. // The current entry's type, 'other' field, descriptor, and value.
  114. unsigned char type;
  115. unsigned char other;
  116. short descriptor;
  117. uint64_t value;
  118. };
  119. // Create a EntryIterator walking the entries in BUFFER. Treat the
  120. // entries as big-endian if BIG_ENDIAN is true, as little-endian
  121. // otherwise. Assume each entry has a 'value' field whose size is
  122. // VALUE_SIZE.
  123. //
  124. // This would not be terribly clean to extend to other format variations,
  125. // but it's enough to handle Linux and Mac, and we'd like STABS to die
  126. // anyway.
  127. //
  128. // For the record: on Linux, STABS entry values are always 32 bits,
  129. // regardless of the architecture address size (don't ask me why); on
  130. // Mac, they are 32 or 64 bits long. Oddly, the section header's entry
  131. // size for a Linux ELF .stab section varies according to the ELF class
  132. // from 12 to 20 even as the actual entries remain unchanged.
  133. EntryIterator(const ByteBuffer *buffer, bool big_endian, size_t value_size);
  134. // Move to the next entry. This function's behavior is undefined if
  135. // at_end() is true when it is called.
  136. EntryIterator &operator++() { Fetch(); entry_.index++; return *this; }
  137. // Dereferencing this iterator produces a reference to an Entry structure
  138. // that holds the current entry's values. The entry is owned by this
  139. // EntryIterator, and will be invalidated at the next call to operator++.
  140. const Entry &operator*() const { return entry_; }
  141. const Entry *operator->() const { return &entry_; }
  142. private:
  143. // Read the STABS entry at cursor_, and set entry_ appropriately.
  144. void Fetch();
  145. // The size of entries' value field, in bytes.
  146. size_t value_size_;
  147. // A byte cursor traversing buffer_.
  148. ByteCursor cursor_;
  149. // Values for the entry this iterator refers to.
  150. Entry entry_;
  151. };
  152. // A source line, saved to be reported later.
  153. struct Line {
  154. uint64_t address;
  155. const char *filename;
  156. int number;
  157. };
  158. // Return the name of the current symbol.
  159. const char *SymbolString();
  160. // Process a compilation unit starting at symbol_. Return true
  161. // to continue processing, or false to abort.
  162. bool ProcessCompilationUnit();
  163. // Process a function in current_source_file_ starting at symbol_.
  164. // Return true to continue processing, or false to abort.
  165. bool ProcessFunction();
  166. // Process an exported function symbol.
  167. // Return true to continue processing, or false to abort.
  168. bool ProcessExtern();
  169. // The STABS entries being parsed.
  170. ByteBuffer entries_;
  171. // The string section to which the entries refer.
  172. ByteBuffer strings_;
  173. // The iterator walking the STABS entries.
  174. EntryIterator iterator_;
  175. // True if the data is "unitized"; see the explanation in the comment for
  176. // StabsReader::StabsReader.
  177. bool unitized_;
  178. StabsHandler *handler_;
  179. // The offset of the current compilation unit's strings within stabstr_.
  180. size_t string_offset_;
  181. // The value string_offset_ should have for the next compilation unit,
  182. // as established by N_UNDF entries.
  183. size_t next_cu_string_offset_;
  184. // The current source file name.
  185. const char *current_source_file_;
  186. // Mac OS X STABS place SLINE records before functions; we accumulate a
  187. // vector of these until we see the FUN record, and then report them
  188. // after the StartFunction call.
  189. std::vector<Line> queued_lines_;
  190. };
  191. // Consumer-provided callback structure for the STABS reader. Clients
  192. // of the STABS reader provide an instance of this structure. The
  193. // reader then invokes the member functions of that instance to report
  194. // the information it finds.
  195. //
  196. // The default definitions of the member functions do nothing, and return
  197. // true so processing will continue.
  198. class StabsHandler {
  199. public:
  200. StabsHandler() { }
  201. virtual ~StabsHandler() { }
  202. // Some general notes about the handler callback functions:
  203. // Processing proceeds until the end of the .stabs section, or until
  204. // one of these functions returns false.
  205. // The addresses given are as reported in the STABS info, without
  206. // regard for whether the module may be loaded at different
  207. // addresses at different times (a shared library, say). When
  208. // processing STABS from an ELF shared library, the addresses given
  209. // all assume the library is loaded at its nominal load address.
  210. // They are *not* offsets from the nominal load address. If you
  211. // want offsets, you must subtract off the library's nominal load
  212. // address.
  213. // The arguments to these functions named FILENAME are all
  214. // references to strings stored in the .stabstr section. Because
  215. // both the Linux and Solaris linkers factor out duplicate strings
  216. // from the .stabstr section, the consumer can assume that if two
  217. // FILENAME values are different addresses, they represent different
  218. // file names.
  219. //
  220. // Thus, it's safe to use (say) std::map<char *, ...>, which does
  221. // string address comparisons, not string content comparisons.
  222. // Since all the strings are in same array of characters --- the
  223. // .stabstr section --- comparing their addresses produces
  224. // predictable, if not lexicographically meaningful, results.
  225. // Begin processing a compilation unit whose main source file is
  226. // named FILENAME, and whose base address is ADDRESS. If
  227. // BUILD_DIRECTORY is non-NULL, it is the name of the build
  228. // directory in which the compilation occurred.
  229. virtual bool StartCompilationUnit(const char *filename, uint64_t address,
  230. const char *build_directory) {
  231. return true;
  232. }
  233. // Finish processing the compilation unit. If ADDRESS is non-zero,
  234. // it is the ending address of the compilation unit. If ADDRESS is
  235. // zero, then the compilation unit's ending address is not
  236. // available, and the consumer must infer it by other means.
  237. virtual bool EndCompilationUnit(uint64_t address) { return true; }
  238. // Begin processing a function named NAME, whose starting address is
  239. // ADDRESS. This function belongs to the compilation unit that was
  240. // most recently started but not ended.
  241. //
  242. // Note that, unlike filenames, NAME is not a pointer into the
  243. // .stabstr section; this is because the name as it appears in the
  244. // STABS data is followed by type information. The value passed to
  245. // StartFunction is the function name alone.
  246. //
  247. // In languages that use name mangling, like C++, NAME is mangled.
  248. virtual bool StartFunction(const std::string &name, uint64_t address) {
  249. return true;
  250. }
  251. // Finish processing the function. If ADDRESS is non-zero, it is
  252. // the ending address for the function. If ADDRESS is zero, then
  253. // the function's ending address is not available, and the consumer
  254. // must infer it by other means.
  255. virtual bool EndFunction(uint64_t address) { return true; }
  256. // Report that the code at ADDRESS is attributable to line NUMBER of
  257. // the source file named FILENAME. The caller must infer the ending
  258. // address of the line.
  259. virtual bool Line(uint64_t address, const char *filename, int number) {
  260. return true;
  261. }
  262. // Report that an exported function NAME is present at ADDRESS.
  263. // The size of the function is unknown.
  264. virtual bool Extern(const std::string &name, uint64_t address) {
  265. return true;
  266. }
  267. // Report a warning. FORMAT is a printf-like format string,
  268. // specifying how to format the subsequent arguments.
  269. virtual void Warning(const char *format, ...) = 0;
  270. };
  271. } // namespace google_breakpad
  272. #endif // COMMON_STABS_READER_H__