/thirdparty/breakpad/common/dwarf/bytereader.h

http://github.com/tomahawk-player/tomahawk · C++ Header · 310 lines · 50 code · 37 blank · 223 comment · 0 complexity · ad1e12ff041a46a1192b160abc71e7c5 MD5 · raw file

  1. // -*- mode: C++ -*-
  2. // Copyright (c) 2010 Google Inc. All Rights Reserved.
  3. //
  4. // Redistribution and use in source and binary forms, with or without
  5. // modification, are permitted provided that the following conditions are
  6. // met:
  7. //
  8. // * Redistributions of source code must retain the above copyright
  9. // notice, this list of conditions and the following disclaimer.
  10. // * Redistributions in binary form must reproduce the above
  11. // copyright notice, this list of conditions and the following disclaimer
  12. // in the documentation and/or other materials provided with the
  13. // distribution.
  14. // * Neither the name of Google Inc. nor the names of its
  15. // contributors may be used to endorse or promote products derived from
  16. // this software without specific prior written permission.
  17. //
  18. // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
  19. // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
  20. // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
  21. // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
  22. // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
  23. // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
  24. // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  25. // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  26. // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  27. // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
  28. // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  29. #ifndef COMMON_DWARF_BYTEREADER_H__
  30. #define COMMON_DWARF_BYTEREADER_H__
  31. #include <string>
  32. #include "common/dwarf/types.h"
  33. #include "common/dwarf/dwarf2enums.h"
  34. namespace dwarf2reader {
  35. // We can't use the obvious name of LITTLE_ENDIAN and BIG_ENDIAN
  36. // because it conflicts with a macro
  37. enum Endianness {
  38. ENDIANNESS_BIG,
  39. ENDIANNESS_LITTLE
  40. };
  41. // A ByteReader knows how to read single- and multi-byte values of
  42. // various endiannesses, sizes, and encodings, as used in DWARF
  43. // debugging information and Linux C++ exception handling data.
  44. class ByteReader {
  45. public:
  46. // Construct a ByteReader capable of reading one-, two-, four-, and
  47. // eight-byte values according to ENDIANNESS, absolute machine-sized
  48. // addresses, DWARF-style "initial length" values, signed and
  49. // unsigned LEB128 numbers, and Linux C++ exception handling data's
  50. // encoded pointers.
  51. explicit ByteReader(enum Endianness endianness);
  52. virtual ~ByteReader();
  53. // Read a single byte from BUFFER and return it as an unsigned 8 bit
  54. // number.
  55. uint8 ReadOneByte(const char* buffer) const;
  56. // Read two bytes from BUFFER and return them as an unsigned 16 bit
  57. // number, using this ByteReader's endianness.
  58. uint16 ReadTwoBytes(const char* buffer) const;
  59. // Read four bytes from BUFFER and return them as an unsigned 32 bit
  60. // number, using this ByteReader's endianness. This function returns
  61. // a uint64 so that it is compatible with ReadAddress and
  62. // ReadOffset. The number it returns will never be outside the range
  63. // of an unsigned 32 bit integer.
  64. uint64 ReadFourBytes(const char* buffer) const;
  65. // Read eight bytes from BUFFER and return them as an unsigned 64
  66. // bit number, using this ByteReader's endianness.
  67. uint64 ReadEightBytes(const char* buffer) const;
  68. // Read an unsigned LEB128 (Little Endian Base 128) number from
  69. // BUFFER and return it as an unsigned 64 bit integer. Set LEN to
  70. // the number of bytes read.
  71. //
  72. // The unsigned LEB128 representation of an integer N is a variable
  73. // number of bytes:
  74. //
  75. // - If N is between 0 and 0x7f, then its unsigned LEB128
  76. // representation is a single byte whose value is N.
  77. //
  78. // - Otherwise, its unsigned LEB128 representation is (N & 0x7f) |
  79. // 0x80, followed by the unsigned LEB128 representation of N /
  80. // 128, rounded towards negative infinity.
  81. //
  82. // In other words, we break VALUE into groups of seven bits, put
  83. // them in little-endian order, and then write them as eight-bit
  84. // bytes with the high bit on all but the last.
  85. uint64 ReadUnsignedLEB128(const char* buffer, size_t* len) const;
  86. // Read a signed LEB128 number from BUFFER and return it as an
  87. // signed 64 bit integer. Set LEN to the number of bytes read.
  88. //
  89. // The signed LEB128 representation of an integer N is a variable
  90. // number of bytes:
  91. //
  92. // - If N is between -0x40 and 0x3f, then its signed LEB128
  93. // representation is a single byte whose value is N in two's
  94. // complement.
  95. //
  96. // - Otherwise, its signed LEB128 representation is (N & 0x7f) |
  97. // 0x80, followed by the signed LEB128 representation of N / 128,
  98. // rounded towards negative infinity.
  99. //
  100. // In other words, we break VALUE into groups of seven bits, put
  101. // them in little-endian order, and then write them as eight-bit
  102. // bytes with the high bit on all but the last.
  103. int64 ReadSignedLEB128(const char* buffer, size_t* len) const;
  104. // Indicate that addresses on this architecture are SIZE bytes long. SIZE
  105. // must be either 4 or 8. (DWARF allows addresses to be any number of
  106. // bytes in length from 1 to 255, but we only support 32- and 64-bit
  107. // addresses at the moment.) You must call this before using the
  108. // ReadAddress member function.
  109. //
  110. // For data in a .debug_info section, or something that .debug_info
  111. // refers to like line number or macro data, the compilation unit
  112. // header's address_size field indicates the address size to use. Call
  113. // frame information doesn't indicate its address size (a shortcoming of
  114. // the spec); you must supply the appropriate size based on the
  115. // architecture of the target machine.
  116. void SetAddressSize(uint8 size);
  117. // Return the current address size, in bytes. This is either 4,
  118. // indicating 32-bit addresses, or 8, indicating 64-bit addresses.
  119. uint8 AddressSize() const { return address_size_; }
  120. // Read an address from BUFFER and return it as an unsigned 64 bit
  121. // integer, respecting this ByteReader's endianness and address size. You
  122. // must call SetAddressSize before calling this function.
  123. uint64 ReadAddress(const char* buffer) const;
  124. // DWARF actually defines two slightly different formats: 32-bit DWARF
  125. // and 64-bit DWARF. This is *not* related to the size of registers or
  126. // addresses on the target machine; it refers only to the size of section
  127. // offsets and data lengths appearing in the DWARF data. One only needs
  128. // 64-bit DWARF when the debugging data itself is larger than 4GiB.
  129. // 32-bit DWARF can handle x86_64 or PPC64 code just fine, unless the
  130. // debugging data itself is very large.
  131. //
  132. // DWARF information identifies itself as 32-bit or 64-bit DWARF: each
  133. // compilation unit and call frame information entry begins with an
  134. // "initial length" field, which, in addition to giving the length of the
  135. // data, also indicates the size of section offsets and lengths appearing
  136. // in that data. The ReadInitialLength member function, below, reads an
  137. // initial length and sets the ByteReader's offset size as a side effect.
  138. // Thus, in the normal process of reading DWARF data, the appropriate
  139. // offset size is set automatically. So, you should only need to call
  140. // SetOffsetSize if you are using the same ByteReader to jump from the
  141. // midst of one block of DWARF data into another.
  142. // Read a DWARF "initial length" field from START, and return it as
  143. // an unsigned 64 bit integer, respecting this ByteReader's
  144. // endianness. Set *LEN to the length of the initial length in
  145. // bytes, either four or twelve. As a side effect, set this
  146. // ByteReader's offset size to either 4 (if we see a 32-bit DWARF
  147. // initial length) or 8 (if we see a 64-bit DWARF initial length).
  148. //
  149. // A DWARF initial length is either:
  150. //
  151. // - a byte count stored as an unsigned 32-bit value less than
  152. // 0xffffff00, indicating that the data whose length is being
  153. // measured uses the 32-bit DWARF format, or
  154. //
  155. // - The 32-bit value 0xffffffff, followed by a 64-bit byte count,
  156. // indicating that the data whose length is being measured uses
  157. // the 64-bit DWARF format.
  158. uint64 ReadInitialLength(const char* start, size_t* len);
  159. // Read an offset from BUFFER and return it as an unsigned 64 bit
  160. // integer, respecting the ByteReader's endianness. In 32-bit DWARF, the
  161. // offset is 4 bytes long; in 64-bit DWARF, the offset is eight bytes
  162. // long. You must call ReadInitialLength or SetOffsetSize before calling
  163. // this function; see the comments above for details.
  164. uint64 ReadOffset(const char* buffer) const;
  165. // Return the current offset size, in bytes.
  166. // A return value of 4 indicates that we are reading 32-bit DWARF.
  167. // A return value of 8 indicates that we are reading 64-bit DWARF.
  168. uint8 OffsetSize() const { return offset_size_; }
  169. // Indicate that section offsets and lengths are SIZE bytes long. SIZE
  170. // must be either 4 (meaning 32-bit DWARF) or 8 (meaning 64-bit DWARF).
  171. // Usually, you should not call this function yourself; instead, let a
  172. // call to ReadInitialLength establish the data's offset size
  173. // automatically.
  174. void SetOffsetSize(uint8 size);
  175. // The Linux C++ ABI uses a variant of DWARF call frame information
  176. // for exception handling. This data is included in the program's
  177. // address space as the ".eh_frame" section, and intepreted at
  178. // runtime to walk the stack, find exception handlers, and run
  179. // cleanup code. The format is mostly the same as DWARF CFI, with
  180. // some adjustments made to provide the additional
  181. // exception-handling data, and to make the data easier to work with
  182. // in memory --- for example, to allow it to be placed in read-only
  183. // memory even when describing position-independent code.
  184. //
  185. // In particular, exception handling data can select a number of
  186. // different encodings for pointers that appear in the data, as
  187. // described by the DwarfPointerEncoding enum. There are actually
  188. // four axes(!) to the encoding:
  189. //
  190. // - The pointer size: pointers can be 2, 4, or 8 bytes long, or use
  191. // the DWARF LEB128 encoding.
  192. //
  193. // - The pointer's signedness: pointers can be signed or unsigned.
  194. //
  195. // - The pointer's base address: the data stored in the exception
  196. // handling data can be the actual address (that is, an absolute
  197. // pointer), or relative to one of a number of different base
  198. // addreses --- including that of the encoded pointer itself, for
  199. // a form of "pc-relative" addressing.
  200. //
  201. // - The pointer may be indirect: it may be the address where the
  202. // true pointer is stored. (This is used to refer to things via
  203. // global offset table entries, program linkage table entries, or
  204. // other tricks used in position-independent code.)
  205. //
  206. // There are also two options that fall outside that matrix
  207. // altogether: the pointer may be omitted, or it may have padding to
  208. // align it on an appropriate address boundary. (That last option
  209. // may seem like it should be just another axis, but it is not.)
  210. // Indicate that the exception handling data is loaded starting at
  211. // SECTION_BASE, and that the start of its buffer in our own memory
  212. // is BUFFER_BASE. This allows us to find the address that a given
  213. // byte in our buffer would have when loaded into the program the
  214. // data describes. We need this to resolve DW_EH_PE_pcrel pointers.
  215. void SetCFIDataBase(uint64 section_base, const char *buffer_base);
  216. // Indicate that the base address of the program's ".text" section
  217. // is TEXT_BASE. We need this to resolve DW_EH_PE_textrel pointers.
  218. void SetTextBase(uint64 text_base);
  219. // Indicate that the base address for DW_EH_PE_datarel pointers is
  220. // DATA_BASE. The proper value depends on the ABI; it is usually the
  221. // address of the global offset table, held in a designated register in
  222. // position-independent code. You will need to look at the startup code
  223. // for the target system to be sure. I tried; my eyes bled.
  224. void SetDataBase(uint64 data_base);
  225. // Indicate that the base address for the FDE we are processing is
  226. // FUNCTION_BASE. This is the start address of DW_EH_PE_funcrel
  227. // pointers. (This encoding does not seem to be used by the GNU
  228. // toolchain.)
  229. void SetFunctionBase(uint64 function_base);
  230. // Indicate that we are no longer processing any FDE, so any use of
  231. // a DW_EH_PE_funcrel encoding is an error.
  232. void ClearFunctionBase();
  233. // Return true if ENCODING is a valid pointer encoding.
  234. bool ValidEncoding(DwarfPointerEncoding encoding) const;
  235. // Return true if we have all the information we need to read a
  236. // pointer that uses ENCODING. This checks that the appropriate
  237. // SetFooBase function for ENCODING has been called.
  238. bool UsableEncoding(DwarfPointerEncoding encoding) const;
  239. // Read an encoded pointer from BUFFER using ENCODING; return the
  240. // absolute address it represents, and set *LEN to the pointer's
  241. // length in bytes, including any padding for aligned pointers.
  242. //
  243. // This function calls 'abort' if ENCODING is invalid or refers to a
  244. // base address this reader hasn't been given, so you should check
  245. // with ValidEncoding and UsableEncoding first if you would rather
  246. // die in a more helpful way.
  247. uint64 ReadEncodedPointer(const char *buffer, DwarfPointerEncoding encoding,
  248. size_t *len) const;
  249. private:
  250. // Function pointer type for our address and offset readers.
  251. typedef uint64 (ByteReader::*AddressReader)(const char*) const;
  252. // Read an offset from BUFFER and return it as an unsigned 64 bit
  253. // integer. DWARF2/3 define offsets as either 4 or 8 bytes,
  254. // generally depending on the amount of DWARF2/3 info present.
  255. // This function pointer gets set by SetOffsetSize.
  256. AddressReader offset_reader_;
  257. // Read an address from BUFFER and return it as an unsigned 64 bit
  258. // integer. DWARF2/3 allow addresses to be any size from 0-255
  259. // bytes currently. Internally we support 4 and 8 byte addresses,
  260. // and will CHECK on anything else.
  261. // This function pointer gets set by SetAddressSize.
  262. AddressReader address_reader_;
  263. Endianness endian_;
  264. uint8 address_size_;
  265. uint8 offset_size_;
  266. // Base addresses for Linux C++ exception handling data's encoded pointers.
  267. bool have_section_base_, have_text_base_, have_data_base_;
  268. bool have_function_base_;
  269. uint64 section_base_, text_base_, data_base_, function_base_;
  270. const char *buffer_base_;
  271. };
  272. } // namespace dwarf2reader
  273. #endif // COMMON_DWARF_BYTEREADER_H__