PageRenderTime 22ms CodeModel.GetById 9ms app.highlight 9ms RepoModel.GetById 1ms app.codeStats 0ms

/thirdparty/breakpad/common/dwarf/bytereader.h

http://github.com/tomahawk-player/tomahawk
C++ Header | 310 lines | 50 code | 37 blank | 223 comment | 0 complexity | ad1e12ff041a46a1192b160abc71e7c5 MD5 | raw file
  1// -*- mode: C++ -*-
  2
  3// Copyright (c) 2010 Google Inc. All Rights Reserved.
  4//
  5// Redistribution and use in source and binary forms, with or without
  6// modification, are permitted provided that the following conditions are
  7// met:
  8//
  9//     * Redistributions of source code must retain the above copyright
 10// notice, this list of conditions and the following disclaimer.
 11//     * Redistributions in binary form must reproduce the above
 12// copyright notice, this list of conditions and the following disclaimer
 13// in the documentation and/or other materials provided with the
 14// distribution.
 15//     * Neither the name of Google Inc. nor the names of its
 16// contributors may be used to endorse or promote products derived from
 17// this software without specific prior written permission.
 18//
 19// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
 20// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
 21// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
 22// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
 23// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
 24// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
 25// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
 26// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
 27// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 28// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 29// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 30
 31#ifndef COMMON_DWARF_BYTEREADER_H__
 32#define COMMON_DWARF_BYTEREADER_H__
 33
 34#include <string>
 35#include "common/dwarf/types.h"
 36#include "common/dwarf/dwarf2enums.h"
 37
 38namespace dwarf2reader {
 39
 40// We can't use the obvious name of LITTLE_ENDIAN and BIG_ENDIAN
 41// because it conflicts with a macro
 42enum Endianness {
 43  ENDIANNESS_BIG,
 44  ENDIANNESS_LITTLE
 45};
 46
 47// A ByteReader knows how to read single- and multi-byte values of
 48// various endiannesses, sizes, and encodings, as used in DWARF
 49// debugging information and Linux C++ exception handling data.
 50class ByteReader {
 51 public:
 52  // Construct a ByteReader capable of reading one-, two-, four-, and
 53  // eight-byte values according to ENDIANNESS, absolute machine-sized
 54  // addresses, DWARF-style "initial length" values, signed and
 55  // unsigned LEB128 numbers, and Linux C++ exception handling data's
 56  // encoded pointers.
 57  explicit ByteReader(enum Endianness endianness);
 58  virtual ~ByteReader();
 59
 60  // Read a single byte from BUFFER and return it as an unsigned 8 bit
 61  // number.
 62  uint8 ReadOneByte(const char* buffer) const;
 63
 64  // Read two bytes from BUFFER and return them as an unsigned 16 bit
 65  // number, using this ByteReader's endianness.
 66  uint16 ReadTwoBytes(const char* buffer) const;
 67
 68  // Read four bytes from BUFFER and return them as an unsigned 32 bit
 69  // number, using this ByteReader's endianness. This function returns
 70  // a uint64 so that it is compatible with ReadAddress and
 71  // ReadOffset. The number it returns will never be outside the range
 72  // of an unsigned 32 bit integer.
 73  uint64 ReadFourBytes(const char* buffer) const;
 74
 75  // Read eight bytes from BUFFER and return them as an unsigned 64
 76  // bit number, using this ByteReader's endianness.
 77  uint64 ReadEightBytes(const char* buffer) const;
 78
 79  // Read an unsigned LEB128 (Little Endian Base 128) number from
 80  // BUFFER and return it as an unsigned 64 bit integer. Set LEN to
 81  // the number of bytes read.
 82  //
 83  // The unsigned LEB128 representation of an integer N is a variable
 84  // number of bytes:
 85  //
 86  // - If N is between 0 and 0x7f, then its unsigned LEB128
 87  //   representation is a single byte whose value is N.
 88  //
 89  // - Otherwise, its unsigned LEB128 representation is (N & 0x7f) |
 90  //   0x80, followed by the unsigned LEB128 representation of N /
 91  //   128, rounded towards negative infinity.
 92  //
 93  // In other words, we break VALUE into groups of seven bits, put
 94  // them in little-endian order, and then write them as eight-bit
 95  // bytes with the high bit on all but the last.
 96  uint64 ReadUnsignedLEB128(const char* buffer, size_t* len) const;
 97
 98  // Read a signed LEB128 number from BUFFER and return it as an
 99  // signed 64 bit integer. Set LEN to the number of bytes read.
100  //
101  // The signed LEB128 representation of an integer N is a variable
102  // number of bytes:
103  //
104  // - If N is between -0x40 and 0x3f, then its signed LEB128
105  //   representation is a single byte whose value is N in two's
106  //   complement.
107  //
108  // - Otherwise, its signed LEB128 representation is (N & 0x7f) |
109  //   0x80, followed by the signed LEB128 representation of N / 128,
110  //   rounded towards negative infinity.
111  //
112  // In other words, we break VALUE into groups of seven bits, put
113  // them in little-endian order, and then write them as eight-bit
114  // bytes with the high bit on all but the last.
115  int64 ReadSignedLEB128(const char* buffer, size_t* len) const;
116
117  // Indicate that addresses on this architecture are SIZE bytes long. SIZE
118  // must be either 4 or 8. (DWARF allows addresses to be any number of
119  // bytes in length from 1 to 255, but we only support 32- and 64-bit
120  // addresses at the moment.) You must call this before using the
121  // ReadAddress member function.
122  //
123  // For data in a .debug_info section, or something that .debug_info
124  // refers to like line number or macro data, the compilation unit
125  // header's address_size field indicates the address size to use. Call
126  // frame information doesn't indicate its address size (a shortcoming of
127  // the spec); you must supply the appropriate size based on the
128  // architecture of the target machine.
129  void SetAddressSize(uint8 size);
130
131  // Return the current address size, in bytes. This is either 4,
132  // indicating 32-bit addresses, or 8, indicating 64-bit addresses.
133  uint8 AddressSize() const { return address_size_; }
134
135  // Read an address from BUFFER and return it as an unsigned 64 bit
136  // integer, respecting this ByteReader's endianness and address size. You
137  // must call SetAddressSize before calling this function.
138  uint64 ReadAddress(const char* buffer) const;
139
140  // DWARF actually defines two slightly different formats: 32-bit DWARF
141  // and 64-bit DWARF. This is *not* related to the size of registers or
142  // addresses on the target machine; it refers only to the size of section
143  // offsets and data lengths appearing in the DWARF data. One only needs
144  // 64-bit DWARF when the debugging data itself is larger than 4GiB.
145  // 32-bit DWARF can handle x86_64 or PPC64 code just fine, unless the
146  // debugging data itself is very large.
147  //
148  // DWARF information identifies itself as 32-bit or 64-bit DWARF: each
149  // compilation unit and call frame information entry begins with an
150  // "initial length" field, which, in addition to giving the length of the
151  // data, also indicates the size of section offsets and lengths appearing
152  // in that data. The ReadInitialLength member function, below, reads an
153  // initial length and sets the ByteReader's offset size as a side effect.
154  // Thus, in the normal process of reading DWARF data, the appropriate
155  // offset size is set automatically. So, you should only need to call
156  // SetOffsetSize if you are using the same ByteReader to jump from the
157  // midst of one block of DWARF data into another.
158
159  // Read a DWARF "initial length" field from START, and return it as
160  // an unsigned 64 bit integer, respecting this ByteReader's
161  // endianness. Set *LEN to the length of the initial length in
162  // bytes, either four or twelve. As a side effect, set this
163  // ByteReader's offset size to either 4 (if we see a 32-bit DWARF
164  // initial length) or 8 (if we see a 64-bit DWARF initial length).
165  //
166  // A DWARF initial length is either:
167  //
168  // - a byte count stored as an unsigned 32-bit value less than
169  //   0xffffff00, indicating that the data whose length is being
170  //   measured uses the 32-bit DWARF format, or
171  //
172  // - The 32-bit value 0xffffffff, followed by a 64-bit byte count,
173  //   indicating that the data whose length is being measured uses
174  //   the 64-bit DWARF format.
175  uint64 ReadInitialLength(const char* start, size_t* len);
176
177  // Read an offset from BUFFER and return it as an unsigned 64 bit
178  // integer, respecting the ByteReader's endianness. In 32-bit DWARF, the
179  // offset is 4 bytes long; in 64-bit DWARF, the offset is eight bytes
180  // long. You must call ReadInitialLength or SetOffsetSize before calling
181  // this function; see the comments above for details.
182  uint64 ReadOffset(const char* buffer) const;
183
184  // Return the current offset size, in bytes.
185  // A return value of 4 indicates that we are reading 32-bit DWARF.
186  // A return value of 8 indicates that we are reading 64-bit DWARF.
187  uint8 OffsetSize() const { return offset_size_; }
188
189  // Indicate that section offsets and lengths are SIZE bytes long. SIZE
190  // must be either 4 (meaning 32-bit DWARF) or 8 (meaning 64-bit DWARF).
191  // Usually, you should not call this function yourself; instead, let a
192  // call to ReadInitialLength establish the data's offset size
193  // automatically.
194  void SetOffsetSize(uint8 size);
195
196  // The Linux C++ ABI uses a variant of DWARF call frame information
197  // for exception handling. This data is included in the program's
198  // address space as the ".eh_frame" section, and intepreted at
199  // runtime to walk the stack, find exception handlers, and run
200  // cleanup code. The format is mostly the same as DWARF CFI, with
201  // some adjustments made to provide the additional
202  // exception-handling data, and to make the data easier to work with
203  // in memory --- for example, to allow it to be placed in read-only
204  // memory even when describing position-independent code.
205  //
206  // In particular, exception handling data can select a number of
207  // different encodings for pointers that appear in the data, as
208  // described by the DwarfPointerEncoding enum. There are actually
209  // four axes(!) to the encoding:
210  //
211  // - The pointer size: pointers can be 2, 4, or 8 bytes long, or use
212  //   the DWARF LEB128 encoding.
213  //
214  // - The pointer's signedness: pointers can be signed or unsigned.
215  //
216  // - The pointer's base address: the data stored in the exception
217  //   handling data can be the actual address (that is, an absolute
218  //   pointer), or relative to one of a number of different base
219  //   addreses --- including that of the encoded pointer itself, for
220  //   a form of "pc-relative" addressing.
221  //
222  // - The pointer may be indirect: it may be the address where the
223  //   true pointer is stored. (This is used to refer to things via
224  //   global offset table entries, program linkage table entries, or
225  //   other tricks used in position-independent code.)
226  //
227  // There are also two options that fall outside that matrix
228  // altogether: the pointer may be omitted, or it may have padding to
229  // align it on an appropriate address boundary. (That last option
230  // may seem like it should be just another axis, but it is not.)
231
232  // Indicate that the exception handling data is loaded starting at
233  // SECTION_BASE, and that the start of its buffer in our own memory
234  // is BUFFER_BASE. This allows us to find the address that a given
235  // byte in our buffer would have when loaded into the program the
236  // data describes. We need this to resolve DW_EH_PE_pcrel pointers.
237  void SetCFIDataBase(uint64 section_base, const char *buffer_base);
238
239  // Indicate that the base address of the program's ".text" section
240  // is TEXT_BASE. We need this to resolve DW_EH_PE_textrel pointers.
241  void SetTextBase(uint64 text_base);
242
243  // Indicate that the base address for DW_EH_PE_datarel pointers is
244  // DATA_BASE. The proper value depends on the ABI; it is usually the
245  // address of the global offset table, held in a designated register in
246  // position-independent code. You will need to look at the startup code
247  // for the target system to be sure. I tried; my eyes bled.
248  void SetDataBase(uint64 data_base);
249
250  // Indicate that the base address for the FDE we are processing is
251  // FUNCTION_BASE. This is the start address of DW_EH_PE_funcrel
252  // pointers. (This encoding does not seem to be used by the GNU
253  // toolchain.)
254  void SetFunctionBase(uint64 function_base);
255
256  // Indicate that we are no longer processing any FDE, so any use of
257  // a DW_EH_PE_funcrel encoding is an error.
258  void ClearFunctionBase();
259
260  // Return true if ENCODING is a valid pointer encoding.
261  bool ValidEncoding(DwarfPointerEncoding encoding) const;
262
263  // Return true if we have all the information we need to read a
264  // pointer that uses ENCODING. This checks that the appropriate
265  // SetFooBase function for ENCODING has been called.
266  bool UsableEncoding(DwarfPointerEncoding encoding) const;
267
268  // Read an encoded pointer from BUFFER using ENCODING; return the
269  // absolute address it represents, and set *LEN to the pointer's
270  // length in bytes, including any padding for aligned pointers.
271  //
272  // This function calls 'abort' if ENCODING is invalid or refers to a
273  // base address this reader hasn't been given, so you should check
274  // with ValidEncoding and UsableEncoding first if you would rather
275  // die in a more helpful way.
276  uint64 ReadEncodedPointer(const char *buffer, DwarfPointerEncoding encoding,
277                            size_t *len) const;
278
279 private:
280
281  // Function pointer type for our address and offset readers.
282  typedef uint64 (ByteReader::*AddressReader)(const char*) const;
283
284  // Read an offset from BUFFER and return it as an unsigned 64 bit
285  // integer.  DWARF2/3 define offsets as either 4 or 8 bytes,
286  // generally depending on the amount of DWARF2/3 info present.
287  // This function pointer gets set by SetOffsetSize.
288  AddressReader offset_reader_;
289
290  // Read an address from BUFFER and return it as an unsigned 64 bit
291  // integer.  DWARF2/3 allow addresses to be any size from 0-255
292  // bytes currently.  Internally we support 4 and 8 byte addresses,
293  // and will CHECK on anything else.
294  // This function pointer gets set by SetAddressSize.
295  AddressReader address_reader_;
296
297  Endianness endian_;
298  uint8 address_size_;
299  uint8 offset_size_;
300
301  // Base addresses for Linux C++ exception handling data's encoded pointers.
302  bool have_section_base_, have_text_base_, have_data_base_;
303  bool have_function_base_;
304  uint64 section_base_, text_base_, data_base_, function_base_;
305  const char *buffer_base_;
306};
307
308}  // namespace dwarf2reader
309
310#endif  // COMMON_DWARF_BYTEREADER_H__