PageRenderTime 35ms CodeModel.GetById 21ms app.highlight 10ms RepoModel.GetById 1ms app.codeStats 0ms

/thirdparty/breakpad/common/stabs_reader.h

http://github.com/tomahawk-player/tomahawk
C++ Header | 325 lines | 89 code | 53 blank | 183 comment | 0 complexity | af743b7c3133b2c521108a736526443f MD5 | raw file
  1// -*- mode: c++ -*-
  2
  3// Copyright (c) 2010 Google Inc. All Rights Reserved.
  4//
  5// Redistribution and use in source and binary forms, with or without
  6// modification, are permitted provided that the following conditions are
  7// met:
  8//
  9//     * Redistributions of source code must retain the above copyright
 10// notice, this list of conditions and the following disclaimer.
 11//     * Redistributions in binary form must reproduce the above
 12// copyright notice, this list of conditions and the following disclaimer
 13// in the documentation and/or other materials provided with the
 14// distribution.
 15//     * Neither the name of Google Inc. nor the names of its
 16// contributors may be used to endorse or promote products derived from
 17// this software without specific prior written permission.
 18//
 19// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
 20// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
 21// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
 22// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
 23// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
 24// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
 25// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
 26// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
 27// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 28// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 29// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 30
 31// Original author: Jim Blandy <jimb@mozilla.com> <jimb@red-bean.com>
 32
 33// stabs_reader.h: Define StabsReader, a parser for STABS debugging
 34// information. A description of the STABS debugging format can be
 35// found at:
 36//
 37//    http://sourceware.org/gdb/current/onlinedocs/stabs_toc.html
 38//
 39// The comments here assume you understand the format.
 40//
 41// This parser can handle big-endian and little-endian data, and the symbol
 42// values may be either 32 or 64 bits long. It handles both STABS in
 43// sections (as used on Linux) and STABS appearing directly in an
 44// a.out-like symbol table (as used in Darwin OS X Mach-O files).
 45
 46#ifndef COMMON_STABS_READER_H__
 47#define COMMON_STABS_READER_H__
 48
 49#include <stddef.h>
 50#include <stdint.h>
 51
 52#ifdef HAVE_CONFIG_H
 53#include <config.h>
 54#endif
 55
 56#ifdef HAVE_A_OUT_H
 57#include <a.out.h>
 58#endif
 59#ifdef HAVE_MACH_O_NLIST_H
 60#include <mach-o/nlist.h>
 61#endif
 62
 63#include <string>
 64#include <vector>
 65
 66#include "common/byte_cursor.h"
 67
 68namespace google_breakpad {
 69
 70class StabsHandler;
 71
 72class StabsReader {
 73 public:
 74  // Create a reader for the STABS debug information whose .stab section is
 75  // being traversed by ITERATOR, and whose .stabstr section is referred to
 76  // by STRINGS. The reader will call the member functions of HANDLER to
 77  // report the information it finds, when the reader's 'Process' member
 78  // function is called.
 79  //
 80  // BIG_ENDIAN should be true if the entries in the .stab section are in
 81  // big-endian form, or false if they are in little-endian form.
 82  //
 83  // VALUE_SIZE should be either 4 or 8, indicating the size of the 'value'
 84  // field in each entry in bytes.
 85  //
 86  // UNITIZED should be true if the STABS data is stored in units with
 87  // N_UNDF headers. This is usually the case for STABS stored in sections,
 88  // like .stab/.stabstr, and usually not the case for STABS stored in the
 89  // actual symbol table; UNITIZED should be true when parsing Linux stabs,
 90  // false when parsing Mac OS X STABS. For details, see:
 91  // http://sourceware.org/gdb/current/onlinedocs/stabs/Stab-Section-Basics.html
 92  // 
 93  // Note that, in ELF, the .stabstr section should be found using the
 94  // 'sh_link' field of the .stab section header, not by name.
 95  StabsReader(const uint8_t *stab,    size_t stab_size,
 96              const uint8_t *stabstr, size_t stabstr_size,
 97              bool big_endian, size_t value_size, bool unitized,
 98              StabsHandler *handler);
 99
100  // Process the STABS data, calling the handler's member functions to
101  // report what we find.  While the handler functions return true,
102  // continue to process until we reach the end of the section.  If we
103  // processed the entire section and all handlers returned true,
104  // return true.  If any handler returned false, return false.
105  // 
106  // This is only meant to be called once per StabsReader instance;
107  // resuming a prior processing pass that stopped abruptly isn't supported.
108  bool Process();
109
110 private:
111
112  // An class for walking arrays of STABS entries. This isolates the main
113  // STABS reader from the exact format (size; endianness) of the entries
114  // themselves.
115  class EntryIterator {
116   public:
117    // The contents of a STABS entry, adjusted for the host's endianness,
118    // word size, 'struct nlist' layout, and so on.
119    struct Entry {
120      // True if this iterator has reached the end of the entry array. When
121      // this is set, the other members of this structure are not valid.
122      bool at_end;
123
124      // The number of this entry within the list.
125      size_t index;
126
127      // The current entry's name offset. This is the offset within the
128      // current compilation unit's strings, as establish by the N_UNDF entries.
129      size_t name_offset;
130
131      // The current entry's type, 'other' field, descriptor, and value.
132      unsigned char type;
133      unsigned char other;
134      short descriptor;
135      uint64_t value;
136    };
137
138    // Create a EntryIterator walking the entries in BUFFER. Treat the
139    // entries as big-endian if BIG_ENDIAN is true, as little-endian
140    // otherwise. Assume each entry has a 'value' field whose size is
141    // VALUE_SIZE.
142    //
143    // This would not be terribly clean to extend to other format variations,
144    // but it's enough to handle Linux and Mac, and we'd like STABS to die
145    // anyway.
146    //
147    // For the record: on Linux, STABS entry values are always 32 bits,
148    // regardless of the architecture address size (don't ask me why); on
149    // Mac, they are 32 or 64 bits long. Oddly, the section header's entry
150    // size for a Linux ELF .stab section varies according to the ELF class
151    // from 12 to 20 even as the actual entries remain unchanged.
152    EntryIterator(const ByteBuffer *buffer, bool big_endian, size_t value_size);
153
154    // Move to the next entry. This function's behavior is undefined if
155    // at_end() is true when it is called.
156    EntryIterator &operator++() { Fetch(); entry_.index++; return *this; }
157
158    // Dereferencing this iterator produces a reference to an Entry structure
159    // that holds the current entry's values. The entry is owned by this
160    // EntryIterator, and will be invalidated at the next call to operator++.
161    const Entry &operator*() const { return entry_; }
162    const Entry *operator->() const { return &entry_; }
163
164   private:
165    // Read the STABS entry at cursor_, and set entry_ appropriately.
166    void Fetch();
167
168    // The size of entries' value field, in bytes.
169    size_t value_size_;
170
171    // A byte cursor traversing buffer_.
172    ByteCursor cursor_;
173
174    // Values for the entry this iterator refers to.
175    Entry entry_;
176  };
177
178  // A source line, saved to be reported later.
179  struct Line {
180    uint64_t address;
181    const char *filename;
182    int number;
183  };
184
185  // Return the name of the current symbol.
186  const char *SymbolString();
187
188  // Process a compilation unit starting at symbol_.  Return true
189  // to continue processing, or false to abort.
190  bool ProcessCompilationUnit();
191
192  // Process a function in current_source_file_ starting at symbol_.
193  // Return true to continue processing, or false to abort.
194  bool ProcessFunction();
195
196  // Process an exported function symbol.
197  // Return true to continue processing, or false to abort.
198  bool ProcessExtern();
199
200  // The STABS entries being parsed.
201  ByteBuffer entries_;
202
203  // The string section to which the entries refer.
204  ByteBuffer strings_;
205
206  // The iterator walking the STABS entries.
207  EntryIterator iterator_;
208
209  // True if the data is "unitized"; see the explanation in the comment for
210  // StabsReader::StabsReader.
211  bool unitized_;
212
213  StabsHandler *handler_;
214
215  // The offset of the current compilation unit's strings within stabstr_.
216  size_t string_offset_;
217
218  // The value string_offset_ should have for the next compilation unit,
219  // as established by N_UNDF entries.
220  size_t next_cu_string_offset_;
221
222  // The current source file name.
223  const char *current_source_file_;
224
225  // Mac OS X STABS place SLINE records before functions; we accumulate a
226  // vector of these until we see the FUN record, and then report them
227  // after the StartFunction call.
228  std::vector<Line> queued_lines_;
229};
230
231// Consumer-provided callback structure for the STABS reader.  Clients
232// of the STABS reader provide an instance of this structure.  The
233// reader then invokes the member functions of that instance to report
234// the information it finds.
235//
236// The default definitions of the member functions do nothing, and return
237// true so processing will continue.
238class StabsHandler {
239 public:
240  StabsHandler() { }
241  virtual ~StabsHandler() { }
242
243  // Some general notes about the handler callback functions:
244
245  // Processing proceeds until the end of the .stabs section, or until
246  // one of these functions returns false.
247
248  // The addresses given are as reported in the STABS info, without
249  // regard for whether the module may be loaded at different
250  // addresses at different times (a shared library, say).  When
251  // processing STABS from an ELF shared library, the addresses given
252  // all assume the library is loaded at its nominal load address.
253  // They are *not* offsets from the nominal load address.  If you
254  // want offsets, you must subtract off the library's nominal load
255  // address.
256
257  // The arguments to these functions named FILENAME are all
258  // references to strings stored in the .stabstr section.  Because
259  // both the Linux and Solaris linkers factor out duplicate strings
260  // from the .stabstr section, the consumer can assume that if two
261  // FILENAME values are different addresses, they represent different
262  // file names.
263  //
264  // Thus, it's safe to use (say) std::map<char *, ...>, which does
265  // string address comparisons, not string content comparisons.
266  // Since all the strings are in same array of characters --- the
267  // .stabstr section --- comparing their addresses produces
268  // predictable, if not lexicographically meaningful, results.
269
270  // Begin processing a compilation unit whose main source file is
271  // named FILENAME, and whose base address is ADDRESS.  If
272  // BUILD_DIRECTORY is non-NULL, it is the name of the build
273  // directory in which the compilation occurred.
274  virtual bool StartCompilationUnit(const char *filename, uint64_t address,
275                                    const char *build_directory) {
276    return true;
277  }
278
279  // Finish processing the compilation unit.  If ADDRESS is non-zero,
280  // it is the ending address of the compilation unit.  If ADDRESS is
281  // zero, then the compilation unit's ending address is not
282  // available, and the consumer must infer it by other means.
283  virtual bool EndCompilationUnit(uint64_t address) { return true; }
284
285  // Begin processing a function named NAME, whose starting address is
286  // ADDRESS.  This function belongs to the compilation unit that was
287  // most recently started but not ended.
288  //
289  // Note that, unlike filenames, NAME is not a pointer into the
290  // .stabstr section; this is because the name as it appears in the
291  // STABS data is followed by type information.  The value passed to
292  // StartFunction is the function name alone.
293  //
294  // In languages that use name mangling, like C++, NAME is mangled.
295  virtual bool StartFunction(const std::string &name, uint64_t address) {
296    return true;
297  }
298
299  // Finish processing the function.  If ADDRESS is non-zero, it is
300  // the ending address for the function.  If ADDRESS is zero, then
301  // the function's ending address is not available, and the consumer
302  // must infer it by other means.
303  virtual bool EndFunction(uint64_t address) { return true; }
304  
305  // Report that the code at ADDRESS is attributable to line NUMBER of
306  // the source file named FILENAME.  The caller must infer the ending
307  // address of the line.
308  virtual bool Line(uint64_t address, const char *filename, int number) {
309    return true;
310  }
311
312  // Report that an exported function NAME is present at ADDRESS.
313  // The size of the function is unknown.
314  virtual bool Extern(const std::string &name, uint64_t address) {
315    return true;
316  }
317
318  // Report a warning.  FORMAT is a printf-like format string,
319  // specifying how to format the subsequent arguments.
320  virtual void Warning(const char *format, ...) = 0;
321};
322
323} // namespace google_breakpad
324
325#endif  // COMMON_STABS_READER_H__