PageRenderTime 39ms CodeModel.GetById 14ms RepoModel.GetById 1ms app.codeStats 0ms

/thirdparty/breakpad/common/mac/macho_reader.h

http://github.com/tomahawk-player/tomahawk
C++ Header | 459 lines | 159 code | 85 blank | 215 comment | 1 complexity | 4187fac9b845f46fbf7ea9e43b0af3c9 MD5 | raw file
Possible License(s): LGPL-2.1, BSD-3-Clause, GPL-3.0, GPL-2.0
  1. // -*- mode: C++ -*-
  2. // Copyright (c) 2010, Google Inc.
  3. // All rights reserved.
  4. //
  5. // Redistribution and use in source and binary forms, with or without
  6. // modification, are permitted provided that the following conditions are
  7. // met:
  8. //
  9. // * Redistributions of source code must retain the above copyright
  10. // notice, this list of conditions and the following disclaimer.
  11. // * Redistributions in binary form must reproduce the above
  12. // copyright notice, this list of conditions and the following disclaimer
  13. // in the documentation and/or other materials provided with the
  14. // distribution.
  15. // * Neither the name of Google Inc. nor the names of its
  16. // contributors may be used to endorse or promote products derived from
  17. // this software without specific prior written permission.
  18. //
  19. // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
  20. // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
  21. // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
  22. // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
  23. // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
  24. // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
  25. // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  26. // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  27. // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  28. // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
  29. // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  30. // Original author: Jim Blandy <jimb@mozilla.com> <jimb@red-bean.com>
  31. // macho_reader.h: A class for parsing Mach-O files.
  32. #ifndef BREAKPAD_COMMON_MAC_MACHO_READER_H_
  33. #define BREAKPAD_COMMON_MAC_MACHO_READER_H_
  34. #include <mach-o/loader.h>
  35. #include <mach-o/fat.h>
  36. #include <stdint.h>
  37. #include <stdlib.h>
  38. #include <unistd.h>
  39. #include <map>
  40. #include <string>
  41. #include <vector>
  42. #include "common/byte_cursor.h"
  43. namespace google_breakpad {
  44. namespace mach_o {
  45. using std::map;
  46. using std::string;
  47. using std::vector;
  48. // The Mac headers don't specify particular types for these groups of
  49. // constants, but defining them here provides some documentation
  50. // value. We also give them the same width as the fields in which
  51. // they appear, which makes them a bit easier to use with ByteCursors.
  52. typedef uint32_t Magic;
  53. typedef uint32_t FileType;
  54. typedef uint32_t FileFlags;
  55. typedef uint32_t LoadCommandType;
  56. typedef uint32_t SegmentFlags;
  57. typedef uint32_t SectionFlags;
  58. // A parser for fat binary files, used to store universal binaries.
  59. // When applied to a (non-fat) Mach-O file, this behaves as if the
  60. // file were a fat file containing a single object file.
  61. class FatReader {
  62. public:
  63. // A class for reporting errors found while parsing fat binary files. The
  64. // default definitions of these methods print messages to stderr.
  65. class Reporter {
  66. public:
  67. // Create a reporter that attributes problems to |filename|.
  68. explicit Reporter(const string &filename) : filename_(filename) { }
  69. virtual ~Reporter() { }
  70. // The data does not begin with a fat binary or Mach-O magic number.
  71. // This is a fatal error.
  72. virtual void BadHeader();
  73. // The Mach-O fat binary file ends abruptly, without enough space
  74. // to contain an object file it claims is present.
  75. virtual void MisplacedObjectFile();
  76. // The file ends abruptly: either it is not large enough to hold a
  77. // complete header, or the header implies that contents are present
  78. // beyond the actual end of the file.
  79. virtual void TooShort();
  80. private:
  81. // The filename to which the reader should attribute problems.
  82. string filename_;
  83. };
  84. // Create a fat binary file reader that uses |reporter| to report problems.
  85. explicit FatReader(Reporter *reporter) : reporter_(reporter) { }
  86. // Read the |size| bytes at |buffer| as a fat binary file. On success,
  87. // return true; on failure, report the problem to reporter_ and return
  88. // false.
  89. //
  90. // If the data is a plain Mach-O file, rather than a fat binary file,
  91. // then the reader behaves as if it had found a fat binary file whose
  92. // single object file is the Mach-O file.
  93. bool Read(const uint8_t *buffer, size_t size);
  94. // Return an array of 'struct fat_arch' structures describing the
  95. // object files present in this fat binary file. Set |size| to the
  96. // number of elements in the array.
  97. //
  98. // Assuming Read returned true, the entries are validated: it is
  99. // safe to assume that the offsets and sizes in each 'struct
  100. // fat_arch' refer to subranges of the bytes passed to Read.
  101. //
  102. // If there are no object files in this fat binary, then this
  103. // function can return NULL.
  104. //
  105. // The array is owned by this FatReader instance; it will be freed when
  106. // this FatReader is destroyed.
  107. //
  108. // This function returns a C-style array instead of a vector to make it
  109. // possible to use the result with OS X functions like NXFindBestFatArch,
  110. // so that the symbol dumper will behave consistently with other OS X
  111. // utilities that work with fat binaries.
  112. const struct fat_arch *object_files(size_t *count) const {
  113. *count = object_files_.size();
  114. if (object_files_.size() > 0)
  115. return &object_files_[0];
  116. return NULL;
  117. }
  118. private:
  119. // We use this to report problems parsing the file's contents. (WEAK)
  120. Reporter *reporter_;
  121. // The contents of the fat binary or Mach-O file we're parsing. We do not
  122. // own the storage it refers to.
  123. ByteBuffer buffer_;
  124. // The magic number of this binary, in host byte order.
  125. Magic magic_;
  126. // The list of object files in this binary.
  127. // object_files_.size() == fat_header.nfat_arch
  128. vector<struct fat_arch> object_files_;
  129. };
  130. // A segment in a Mach-O file. All these fields have been byte-swapped as
  131. // appropriate for use by the executing architecture.
  132. struct Segment {
  133. // The ByteBuffers below point into the bytes passed to the Reader that
  134. // created this Segment.
  135. ByteBuffer section_list; // This segment's section list.
  136. ByteBuffer contents; // This segment's contents.
  137. // This segment's name.
  138. string name;
  139. // The address at which this segment should be loaded in memory. If
  140. // bits_64 is false, only the bottom 32 bits of this value are valid.
  141. uint64_t vmaddr;
  142. // The size of this segment when loaded into memory. This may be larger
  143. // than contents.Size(), in which case the extra area will be
  144. // initialized with zeros. If bits_64 is false, only the bottom 32 bits
  145. // of this value are valid.
  146. uint64_t vmsize;
  147. // The maximum and initial VM protection of this segment's contents.
  148. uint32_t maxprot;
  149. uint32_t initprot;
  150. // The number of sections in section_list.
  151. uint32_t nsects;
  152. // Flags describing this segment, from SegmentFlags.
  153. uint32_t flags;
  154. // True if this is a 64-bit section; false if it is a 32-bit section.
  155. bool bits_64;
  156. };
  157. // A section in a Mach-O file. All these fields have been byte-swapped as
  158. // appropriate for use by the executing architecture.
  159. struct Section {
  160. // This section's contents. This points into the bytes passed to the
  161. // Reader that created this Section.
  162. ByteBuffer contents;
  163. // This section's name.
  164. string section_name; // section[_64].sectname
  165. // The name of the segment this section belongs to.
  166. string segment_name; // section[_64].segname
  167. // The address at which this section's contents should be loaded in
  168. // memory. If bits_64 is false, only the bottom 32 bits of this value
  169. // are valid.
  170. uint64_t address;
  171. // The contents of this section should be loaded into memory at an
  172. // address which is a multiple of (two raised to this power).
  173. uint32_t align;
  174. // Flags from SectionFlags describing the section's contents.
  175. uint32_t flags;
  176. // We don't support reading relocations yet.
  177. // True if this is a 64-bit section; false if it is a 32-bit section.
  178. bool bits_64;
  179. };
  180. // A map from section names to Sections.
  181. typedef map<string, Section> SectionMap;
  182. // A reader for a Mach-O file.
  183. //
  184. // This does not handle fat binaries; see FatReader above. FatReader
  185. // provides a friendly interface for parsing data that could be either a
  186. // fat binary or a Mach-O file.
  187. class Reader {
  188. public:
  189. // A class for reporting errors found while parsing Mach-O files. The
  190. // default definitions of these member functions print messages to
  191. // stderr.
  192. class Reporter {
  193. public:
  194. // Create a reporter that attributes problems to |filename|.
  195. explicit Reporter(const string &filename) : filename_(filename) { }
  196. virtual ~Reporter() { }
  197. // Reporter functions for fatal errors return void; the reader will
  198. // definitely return an error to its caller after calling them
  199. // The data does not begin with a Mach-O magic number, or the magic
  200. // number does not match the expected value for the cpu architecture.
  201. // This is a fatal error.
  202. virtual void BadHeader();
  203. // The data contained in a Mach-O fat binary (|cpu_type|, |cpu_subtype|)
  204. // does not match the expected CPU architecture
  205. // (|expected_cpu_type|, |expected_cpu_subtype|).
  206. virtual void CPUTypeMismatch(cpu_type_t cpu_type,
  207. cpu_subtype_t cpu_subtype,
  208. cpu_type_t expected_cpu_type,
  209. cpu_subtype_t expected_cpu_subtype);
  210. // The file ends abruptly: either it is not large enough to hold a
  211. // complete header, or the header implies that contents are present
  212. // beyond the actual end of the file.
  213. virtual void HeaderTruncated();
  214. // The file's load command region, as given in the Mach-O header, is
  215. // too large for the file.
  216. virtual void LoadCommandRegionTruncated();
  217. // The file's Mach-O header claims the file contains |claimed| load
  218. // commands, but the I'th load command, of type |type|, extends beyond
  219. // the end of the load command region, as given by the Mach-O header.
  220. // If |type| is zero, the command's type was unreadable.
  221. virtual void LoadCommandsOverrun(size_t claimed, size_t i,
  222. LoadCommandType type);
  223. // The contents of the |i|'th load command, of type |type|, extend beyond
  224. // the size given in the load command's header.
  225. virtual void LoadCommandTooShort(size_t i, LoadCommandType type);
  226. // The LC_SEGMENT or LC_SEGMENT_64 load command for the segment named
  227. // |name| is too short to hold the sections that its header says it does.
  228. // (This more specific than LoadCommandTooShort.)
  229. virtual void SectionsMissing(const string &name);
  230. // The segment named |name| claims that its contents lie beyond the end
  231. // of the file.
  232. virtual void MisplacedSegmentData(const string &name);
  233. // The section named |section| in the segment named |segment| claims that
  234. // its contents do not lie entirely within the segment.
  235. virtual void MisplacedSectionData(const string &section,
  236. const string &segment);
  237. // The LC_SYMTAB command claims that symbol table contents are located
  238. // beyond the end of the file.
  239. virtual void MisplacedSymbolTable();
  240. // An attempt was made to read a Mach-O file of the unsupported
  241. // CPU architecture |cpu_type|.
  242. virtual void UnsupportedCPUType(cpu_type_t cpu_type);
  243. private:
  244. string filename_;
  245. };
  246. // A handler for sections parsed from a segment. The WalkSegmentSections
  247. // member function accepts an instance of this class, and applies it to
  248. // each section defined in a given segment.
  249. class SectionHandler {
  250. public:
  251. virtual ~SectionHandler() { }
  252. // Called to report that the segment's section list contains |section|.
  253. // This should return true if the iteration should continue, or false
  254. // if it should stop.
  255. virtual bool HandleSection(const Section &section) = 0;
  256. };
  257. // A handler for the load commands in a Mach-O file.
  258. class LoadCommandHandler {
  259. public:
  260. LoadCommandHandler() { }
  261. virtual ~LoadCommandHandler() { }
  262. // When called from WalkLoadCommands, the following handler functions
  263. // should return true if they wish to continue iterating over the load
  264. // command list, or false if they wish to stop iterating.
  265. //
  266. // When called from LoadCommandIterator::Handle or Reader::Handle,
  267. // these functions' return values are simply passed through to Handle's
  268. // caller.
  269. //
  270. // The definitions provided by this base class simply return true; the
  271. // default is to silently ignore sections whose member functions the
  272. // subclass doesn't override.
  273. // COMMAND is load command we don't recognize. We provide only the
  274. // command type and a ByteBuffer enclosing the command's data (If we
  275. // cannot parse the command type or its size, we call
  276. // reporter_->IncompleteLoadCommand instead.)
  277. virtual bool UnknownCommand(LoadCommandType type,
  278. const ByteBuffer &contents) {
  279. return true;
  280. }
  281. // The load command is LC_SEGMENT or LC_SEGMENT_64, defining a segment
  282. // with the properties given in |segment|.
  283. virtual bool SegmentCommand(const Segment &segment) {
  284. return true;
  285. }
  286. // The load command is LC_SYMTAB. |entries| holds the array of nlist
  287. // entries, and |names| holds the strings the entries refer to.
  288. virtual bool SymtabCommand(const ByteBuffer &entries,
  289. const ByteBuffer &names) {
  290. return true;
  291. }
  292. // Add handler functions for more load commands here as needed.
  293. };
  294. // Create a Mach-O file reader that reports problems to |reporter|.
  295. explicit Reader(Reporter *reporter)
  296. : reporter_(reporter) { }
  297. // Read the given data as a Mach-O file. The reader retains pointers
  298. // into the data passed, so the data should live as long as the reader
  299. // does. On success, return true; on failure, return false.
  300. //
  301. // At most one of these functions should be invoked once on each Reader
  302. // instance.
  303. bool Read(const uint8_t *buffer,
  304. size_t size,
  305. cpu_type_t expected_cpu_type,
  306. cpu_subtype_t expected_cpu_subtype);
  307. bool Read(const ByteBuffer &buffer,
  308. cpu_type_t expected_cpu_type,
  309. cpu_subtype_t expected_cpu_subtype) {
  310. return Read(buffer.start,
  311. buffer.Size(),
  312. expected_cpu_type,
  313. expected_cpu_subtype);
  314. }
  315. // Return this file's characteristics, as found in the Mach-O header.
  316. cpu_type_t cpu_type() const { return cpu_type_; }
  317. cpu_subtype_t cpu_subtype() const { return cpu_subtype_; }
  318. FileType file_type() const { return file_type_; }
  319. FileFlags flags() const { return flags_; }
  320. // Return true if this is a 64-bit Mach-O file, false if it is a 32-bit
  321. // Mach-O file.
  322. bool bits_64() const { return bits_64_; }
  323. // Return true if this is a big-endian Mach-O file, false if it is
  324. // little-endian.
  325. bool big_endian() const { return big_endian_; }
  326. // Apply |handler| to each load command in this Mach-O file, stopping when
  327. // a handler function returns false. If we encounter a malformed load
  328. // command, report it via reporter_ and return false. Return true if all
  329. // load commands were parseable and all handlers returned true.
  330. bool WalkLoadCommands(LoadCommandHandler *handler) const;
  331. // Set |segment| to describe the segment named |name|, if present. If
  332. // found, |segment|'s byte buffers refer to a subregion of the bytes
  333. // passed to Read. If we find the section, return true; otherwise,
  334. // return false.
  335. bool FindSegment(const string &name, Segment *segment) const;
  336. // Apply |handler| to each section defined in |segment|. If |handler| returns
  337. // false, stop iterating and return false. If all calls to |handler| return
  338. // true and we reach the end of the section list, return true.
  339. bool WalkSegmentSections(const Segment &segment, SectionHandler *handler)
  340. const;
  341. // Clear |section_map| and then populate it with a map of the sections
  342. // in |segment|, from section names to Section structures.
  343. // Each Section's contents refer to bytes in |segment|'s contents.
  344. // On success, return true; if a problem occurs, report it and return false.
  345. bool MapSegmentSections(const Segment &segment, SectionMap *section_map)
  346. const;
  347. private:
  348. // Used internally.
  349. class SegmentFinder;
  350. class SectionMapper;
  351. // We use this to report problems parsing the file's contents. (WEAK)
  352. Reporter *reporter_;
  353. // The contents of the Mach-O file we're parsing. We do not own the
  354. // storage it refers to.
  355. ByteBuffer buffer_;
  356. // True if this file is big-endian.
  357. bool big_endian_;
  358. // True if this file is a 64-bit Mach-O file.
  359. bool bits_64_;
  360. // This file's cpu type and subtype.
  361. cpu_type_t cpu_type_; // mach_header[_64].cputype
  362. cpu_subtype_t cpu_subtype_; // mach_header[_64].cpusubtype
  363. // This file's type.
  364. FileType file_type_; // mach_header[_64].filetype
  365. // The region of buffer_ occupied by load commands.
  366. ByteBuffer load_commands_;
  367. // The number of load commands in load_commands_.
  368. uint32_t load_command_count_; // mach_header[_64].ncmds
  369. // This file's header flags.
  370. FileFlags flags_;
  371. };
  372. } // namespace mach_o
  373. } // namespace google_breakpad
  374. #endif // BREAKPAD_COMMON_MAC_MACHO_READER_H_