/thirdparty/breakpad/common/mac/macho_reader.cc

http://github.com/tomahawk-player/tomahawk · C++ · 530 lines · 379 code · 59 blank · 92 comment · 70 complexity · e6e9d5c64d41457eefe1e0e64c1b4e17 MD5 · raw file

  1. // Copyright (c) 2010, Google Inc.
  2. // All rights reserved.
  3. //
  4. // Redistribution and use in source and binary forms, with or without
  5. // modification, are permitted provided that the following conditions are
  6. // met:
  7. //
  8. // * Redistributions of source code must retain the above copyright
  9. // notice, this list of conditions and the following disclaimer.
  10. // * Redistributions in binary form must reproduce the above
  11. // copyright notice, this list of conditions and the following disclaimer
  12. // in the documentation and/or other materials provided with the
  13. // distribution.
  14. // * Neither the name of Google Inc. nor the names of its
  15. // contributors may be used to endorse or promote products derived from
  16. // this software without specific prior written permission.
  17. //
  18. // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
  19. // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
  20. // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
  21. // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
  22. // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
  23. // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
  24. // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  25. // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  26. // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  27. // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
  28. // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  29. // Original author: Jim Blandy <jimb@mozilla.com> <jimb@red-bean.com>
  30. // macho_reader.cc: Implementation of google_breakpad::Mach_O::FatReader and
  31. // google_breakpad::Mach_O::Reader. See macho_reader.h for details.
  32. #include "common/mac/macho_reader.h"
  33. #include <assert.h>
  34. #include <stdio.h>
  35. #include <stdlib.h>
  36. // Unfortunately, CPU_TYPE_ARM is not define for 10.4.
  37. #if !defined(CPU_TYPE_ARM)
  38. #define CPU_TYPE_ARM 12
  39. #endif
  40. namespace google_breakpad {
  41. namespace mach_o {
  42. // If NDEBUG is #defined, then the 'assert' macro doesn't evaluate its
  43. // arguments, so you can't place expressions that do necessary work in
  44. // the argument of an assert. Nor can you assign the result of the
  45. // expression to a variable and assert that the variable's value is
  46. // true: you'll get unused variable warnings when NDEBUG is #defined.
  47. //
  48. // ASSERT_ALWAYS_EVAL always evaluates its argument, and asserts that
  49. // the result is true if NDEBUG is not #defined.
  50. #if defined(NDEBUG)
  51. #define ASSERT_ALWAYS_EVAL(x) (x)
  52. #else
  53. #define ASSERT_ALWAYS_EVAL(x) assert(x)
  54. #endif
  55. void FatReader::Reporter::BadHeader() {
  56. fprintf(stderr, "%s: file is neither a fat binary file"
  57. " nor a Mach-O object file\n", filename_.c_str());
  58. }
  59. void FatReader::Reporter::TooShort() {
  60. fprintf(stderr, "%s: file too short for the data it claims to contain\n",
  61. filename_.c_str());
  62. }
  63. void FatReader::Reporter::MisplacedObjectFile() {
  64. fprintf(stderr, "%s: file too short for the object files it claims"
  65. " to contain\n", filename_.c_str());
  66. }
  67. bool FatReader::Read(const uint8_t *buffer, size_t size) {
  68. buffer_.start = buffer;
  69. buffer_.end = buffer + size;
  70. ByteCursor cursor(&buffer_);
  71. // Fat binaries always use big-endian, so read the magic number in
  72. // that endianness. To recognize Mach-O magic numbers, which can use
  73. // either endianness, check for both the proper and reversed forms
  74. // of the magic numbers.
  75. cursor.set_big_endian(true);
  76. if (cursor >> magic_) {
  77. if (magic_ == FAT_MAGIC) {
  78. // How many object files does this fat binary contain?
  79. uint32_t object_files_count;
  80. if (!(cursor >> object_files_count)) { // nfat_arch
  81. reporter_->TooShort();
  82. return false;
  83. }
  84. // Read the list of object files.
  85. object_files_.resize(object_files_count);
  86. for (size_t i = 0; i < object_files_count; i++) {
  87. struct fat_arch *objfile = &object_files_[i];
  88. // Read this object file entry, byte-swapping as appropriate.
  89. cursor >> objfile->cputype
  90. >> objfile->cpusubtype
  91. >> objfile->offset
  92. >> objfile->size
  93. >> objfile->align;
  94. if (!cursor) {
  95. reporter_->TooShort();
  96. return false;
  97. }
  98. // Does the file actually have the bytes this entry refers to?
  99. size_t fat_size = buffer_.Size();
  100. if (objfile->offset > fat_size ||
  101. objfile->size > fat_size - objfile->offset) {
  102. reporter_->MisplacedObjectFile();
  103. return false;
  104. }
  105. }
  106. return true;
  107. } else if (magic_ == MH_MAGIC || magic_ == MH_MAGIC_64 ||
  108. magic_ == MH_CIGAM || magic_ == MH_CIGAM_64) {
  109. // If this is a little-endian Mach-O file, fix the cursor's endianness.
  110. if (magic_ == MH_CIGAM || magic_ == MH_CIGAM_64)
  111. cursor.set_big_endian(false);
  112. // Record the entire file as a single entry in the object file list.
  113. object_files_.resize(1);
  114. // Get the cpu type and subtype from the Mach-O header.
  115. if (!(cursor >> object_files_[0].cputype
  116. >> object_files_[0].cpusubtype)) {
  117. reporter_->TooShort();
  118. return false;
  119. }
  120. object_files_[0].offset = 0;
  121. object_files_[0].size = static_cast<uint32_t>(buffer_.Size());
  122. // This alignment is correct for 32 and 64-bit x86 and ppc.
  123. // See get_align in the lipo source for other architectures:
  124. // http://www.opensource.apple.com/source/cctools/cctools-773/misc/lipo.c
  125. object_files_[0].align = 12; // 2^12 == 4096
  126. return true;
  127. }
  128. }
  129. reporter_->BadHeader();
  130. return false;
  131. }
  132. void Reader::Reporter::BadHeader() {
  133. fprintf(stderr, "%s: file is not a Mach-O object file\n", filename_.c_str());
  134. }
  135. void Reader::Reporter::CPUTypeMismatch(cpu_type_t cpu_type,
  136. cpu_subtype_t cpu_subtype,
  137. cpu_type_t expected_cpu_type,
  138. cpu_subtype_t expected_cpu_subtype) {
  139. fprintf(stderr, "%s: CPU type %d, subtype %d does not match expected"
  140. " type %d, subtype %d\n",
  141. filename_.c_str(), cpu_type, cpu_subtype,
  142. expected_cpu_type, expected_cpu_subtype);
  143. }
  144. void Reader::Reporter::HeaderTruncated() {
  145. fprintf(stderr, "%s: file does not contain a complete Mach-O header\n",
  146. filename_.c_str());
  147. }
  148. void Reader::Reporter::LoadCommandRegionTruncated() {
  149. fprintf(stderr, "%s: file too short to hold load command region"
  150. " given in Mach-O header\n", filename_.c_str());
  151. }
  152. void Reader::Reporter::LoadCommandsOverrun(size_t claimed, size_t i,
  153. LoadCommandType type) {
  154. fprintf(stderr, "%s: file's header claims there are %ld"
  155. " load commands, but load command #%ld",
  156. filename_.c_str(), claimed, i);
  157. if (type) fprintf(stderr, ", of type %d,", type);
  158. fprintf(stderr, " extends beyond the end of the load command region\n");
  159. }
  160. void Reader::Reporter::LoadCommandTooShort(size_t i, LoadCommandType type) {
  161. fprintf(stderr, "%s: the contents of load command #%ld, of type %d,"
  162. " extend beyond the size given in the load command's header\n",
  163. filename_.c_str(), i, type);
  164. }
  165. void Reader::Reporter::SectionsMissing(const string &name) {
  166. fprintf(stderr, "%s: the load command for segment '%s'"
  167. " is too short to hold the section headers it claims to have\n",
  168. filename_.c_str(), name.c_str());
  169. }
  170. void Reader::Reporter::MisplacedSegmentData(const string &name) {
  171. fprintf(stderr, "%s: the segment '%s' claims its contents lie beyond"
  172. " the end of the file\n", filename_.c_str(), name.c_str());
  173. }
  174. void Reader::Reporter::MisplacedSectionData(const string &section,
  175. const string &segment) {
  176. fprintf(stderr, "%s: the section '%s' in segment '%s'"
  177. " claims its contents lie outside the segment's contents\n",
  178. filename_.c_str(), section.c_str(), segment.c_str());
  179. }
  180. void Reader::Reporter::MisplacedSymbolTable() {
  181. fprintf(stderr, "%s: the LC_SYMTAB load command claims that the symbol"
  182. " table's contents are located beyond the end of the file\n",
  183. filename_.c_str());
  184. }
  185. void Reader::Reporter::UnsupportedCPUType(cpu_type_t cpu_type) {
  186. fprintf(stderr, "%s: CPU type %d is not supported\n",
  187. filename_.c_str(), cpu_type);
  188. }
  189. bool Reader::Read(const uint8_t *buffer,
  190. size_t size,
  191. cpu_type_t expected_cpu_type,
  192. cpu_subtype_t expected_cpu_subtype) {
  193. assert(!buffer_.start);
  194. buffer_.start = buffer;
  195. buffer_.end = buffer + size;
  196. ByteCursor cursor(&buffer_, true);
  197. uint32_t magic;
  198. if (!(cursor >> magic)) {
  199. reporter_->HeaderTruncated();
  200. return false;
  201. }
  202. if (expected_cpu_type != CPU_TYPE_ANY) {
  203. uint32_t expected_magic;
  204. // validate that magic matches the expected cpu type
  205. switch (expected_cpu_type) {
  206. case CPU_TYPE_ARM:
  207. case CPU_TYPE_I386:
  208. expected_magic = MH_CIGAM;
  209. break;
  210. case CPU_TYPE_POWERPC:
  211. expected_magic = MH_MAGIC;
  212. break;
  213. case CPU_TYPE_X86_64:
  214. expected_magic = MH_CIGAM_64;
  215. break;
  216. case CPU_TYPE_POWERPC64:
  217. expected_magic = MH_MAGIC_64;
  218. break;
  219. default:
  220. reporter_->UnsupportedCPUType(expected_cpu_type);
  221. return false;
  222. }
  223. if (expected_magic != magic) {
  224. reporter_->BadHeader();
  225. return false;
  226. }
  227. }
  228. // Since the byte cursor is in big-endian mode, a reversed magic number
  229. // always indicates a little-endian file, regardless of our own endianness.
  230. switch (magic) {
  231. case MH_MAGIC: big_endian_ = true; bits_64_ = false; break;
  232. case MH_CIGAM: big_endian_ = false; bits_64_ = false; break;
  233. case MH_MAGIC_64: big_endian_ = true; bits_64_ = true; break;
  234. case MH_CIGAM_64: big_endian_ = false; bits_64_ = true; break;
  235. default:
  236. reporter_->BadHeader();
  237. return false;
  238. }
  239. cursor.set_big_endian(big_endian_);
  240. uint32_t commands_size, reserved;
  241. cursor >> cpu_type_ >> cpu_subtype_ >> file_type_ >> load_command_count_
  242. >> commands_size >> flags_;
  243. if (bits_64_)
  244. cursor >> reserved;
  245. if (!cursor) {
  246. reporter_->HeaderTruncated();
  247. return false;
  248. }
  249. if (expected_cpu_type != CPU_TYPE_ANY &&
  250. (expected_cpu_type != cpu_type_ ||
  251. expected_cpu_subtype != cpu_subtype_)) {
  252. reporter_->CPUTypeMismatch(cpu_type_, cpu_subtype_,
  253. expected_cpu_type, expected_cpu_subtype);
  254. return false;
  255. }
  256. cursor
  257. .PointTo(&load_commands_.start, commands_size)
  258. .PointTo(&load_commands_.end, 0);
  259. if (!cursor) {
  260. reporter_->LoadCommandRegionTruncated();
  261. return false;
  262. }
  263. return true;
  264. }
  265. bool Reader::WalkLoadCommands(Reader::LoadCommandHandler *handler) const {
  266. ByteCursor list_cursor(&load_commands_, big_endian_);
  267. for (size_t index = 0; index < load_command_count_; ++index) {
  268. // command refers to this load command alone, so that cursor will
  269. // refuse to read past the load command's end. But since we haven't
  270. // read the size yet, let command initially refer to the entire
  271. // remainder of the load command series.
  272. ByteBuffer command(list_cursor.here(), list_cursor.Available());
  273. ByteCursor cursor(&command, big_endian_);
  274. // Read the command type and size --- fields common to all commands.
  275. uint32_t type, size;
  276. if (!(cursor >> type)) {
  277. reporter_->LoadCommandsOverrun(load_command_count_, index, 0);
  278. return false;
  279. }
  280. if (!(cursor >> size) || size > command.Size()) {
  281. reporter_->LoadCommandsOverrun(load_command_count_, index, type);
  282. return false;
  283. }
  284. // Now that we've read the length, restrict command's range to this
  285. // load command only.
  286. command.end = command.start + size;
  287. switch (type) {
  288. case LC_SEGMENT:
  289. case LC_SEGMENT_64: {
  290. Segment segment;
  291. segment.bits_64 = (type == LC_SEGMENT_64);
  292. size_t word_size = segment.bits_64 ? 8 : 4;
  293. cursor.CString(&segment.name, 16);
  294. size_t file_offset, file_size;
  295. cursor
  296. .Read(word_size, false, &segment.vmaddr)
  297. .Read(word_size, false, &segment.vmsize)
  298. .Read(word_size, false, &file_offset)
  299. .Read(word_size, false, &file_size);
  300. cursor >> segment.maxprot
  301. >> segment.initprot
  302. >> segment.nsects
  303. >> segment.flags;
  304. if (!cursor) {
  305. reporter_->LoadCommandTooShort(index, type);
  306. return false;
  307. }
  308. if (file_offset > buffer_.Size() ||
  309. file_size > buffer_.Size() - file_offset) {
  310. reporter_->MisplacedSegmentData(segment.name);
  311. return false;
  312. }
  313. // Mach-O files in .dSYM bundles have the contents of the loaded
  314. // segments removed, and their file offsets and file sizes zeroed
  315. // out. To help us handle this special case properly, give such
  316. // segments' contents NULL starting and ending pointers.
  317. if (file_offset == 0 && file_size == 0) {
  318. segment.contents.start = segment.contents.end = NULL;
  319. } else {
  320. segment.contents.start = buffer_.start + file_offset;
  321. segment.contents.end = segment.contents.start + file_size;
  322. }
  323. // The section list occupies the remainder of this load command's space.
  324. segment.section_list.start = cursor.here();
  325. segment.section_list.end = command.end;
  326. if (!handler->SegmentCommand(segment))
  327. return false;
  328. break;
  329. }
  330. case LC_SYMTAB: {
  331. uint32_t symoff, nsyms, stroff, strsize;
  332. cursor >> symoff >> nsyms >> stroff >> strsize;
  333. if (!cursor) {
  334. reporter_->LoadCommandTooShort(index, type);
  335. return false;
  336. }
  337. // How big are the entries in the symbol table?
  338. // sizeof(struct nlist_64) : sizeof(struct nlist),
  339. // but be paranoid about alignment vs. target architecture.
  340. size_t symbol_size = bits_64_ ? 16 : 12;
  341. // How big is the entire symbol array?
  342. size_t symbols_size = nsyms * symbol_size;
  343. if (symoff > buffer_.Size() || symbols_size > buffer_.Size() - symoff ||
  344. stroff > buffer_.Size() || strsize > buffer_.Size() - stroff) {
  345. reporter_->MisplacedSymbolTable();
  346. return false;
  347. }
  348. ByteBuffer entries(buffer_.start + symoff, symbols_size);
  349. ByteBuffer names(buffer_.start + stroff, strsize);
  350. if (!handler->SymtabCommand(entries, names))
  351. return false;
  352. break;
  353. }
  354. default: {
  355. if (!handler->UnknownCommand(type, command))
  356. return false;
  357. break;
  358. }
  359. }
  360. list_cursor.set_here(command.end);
  361. }
  362. return true;
  363. }
  364. // A load command handler that looks for a segment of a given name.
  365. class Reader::SegmentFinder : public LoadCommandHandler {
  366. public:
  367. // Create a load command handler that looks for a segment named NAME,
  368. // and sets SEGMENT to describe it if found.
  369. SegmentFinder(const string &name, Segment *segment)
  370. : name_(name), segment_(segment), found_() { }
  371. // Return true if the traversal found the segment, false otherwise.
  372. bool found() const { return found_; }
  373. bool SegmentCommand(const Segment &segment) {
  374. if (segment.name == name_) {
  375. *segment_ = segment;
  376. found_ = true;
  377. return false;
  378. }
  379. return true;
  380. }
  381. private:
  382. // The name of the segment our creator is looking for.
  383. const string &name_;
  384. // Where we should store the segment if found. (WEAK)
  385. Segment *segment_;
  386. // True if we found the segment.
  387. bool found_;
  388. };
  389. bool Reader::FindSegment(const string &name, Segment *segment) const {
  390. SegmentFinder finder(name, segment);
  391. WalkLoadCommands(&finder);
  392. return finder.found();
  393. }
  394. bool Reader::WalkSegmentSections(const Segment &segment,
  395. SectionHandler *handler) const {
  396. size_t word_size = segment.bits_64 ? 8 : 4;
  397. ByteCursor cursor(&segment.section_list, big_endian_);
  398. for (size_t i = 0; i < segment.nsects; i++) {
  399. Section section;
  400. section.bits_64 = segment.bits_64;
  401. uint64_t size;
  402. uint32_t offset, dummy32;
  403. cursor
  404. .CString(&section.section_name, 16)
  405. .CString(&section.segment_name, 16)
  406. .Read(word_size, false, &section.address)
  407. .Read(word_size, false, &size)
  408. >> offset
  409. >> section.align
  410. >> dummy32
  411. >> dummy32
  412. >> section.flags
  413. >> dummy32
  414. >> dummy32;
  415. if (section.bits_64)
  416. cursor >> dummy32;
  417. if (!cursor) {
  418. reporter_->SectionsMissing(segment.name);
  419. return false;
  420. }
  421. if ((section.flags & SECTION_TYPE) == S_ZEROFILL) {
  422. // Zero-fill sections have a size, but no contents.
  423. section.contents.start = section.contents.end = NULL;
  424. } else if (segment.contents.start == NULL &&
  425. segment.contents.end == NULL) {
  426. // Mach-O files in .dSYM bundles have the contents of the loaded
  427. // segments removed, and their file offsets and file sizes zeroed
  428. // out. However, the sections within those segments still have
  429. // non-zero sizes. There's no reason to call MisplacedSectionData in
  430. // this case; the caller may just need the section's load
  431. // address. But do set the contents' limits to NULL, for safety.
  432. section.contents.start = section.contents.end = NULL;
  433. } else {
  434. if (offset < size_t(segment.contents.start - buffer_.start) ||
  435. offset > size_t(segment.contents.end - buffer_.start) ||
  436. size > size_t(segment.contents.end - buffer_.start - offset)) {
  437. reporter_->MisplacedSectionData(section.section_name,
  438. section.segment_name);
  439. return false;
  440. }
  441. section.contents.start = buffer_.start + offset;
  442. section.contents.end = section.contents.start + size;
  443. }
  444. if (!handler->HandleSection(section))
  445. return false;
  446. }
  447. return true;
  448. }
  449. // A SectionHandler that builds a SectionMap for the sections within a
  450. // given segment.
  451. class Reader::SectionMapper: public SectionHandler {
  452. public:
  453. // Create a SectionHandler that populates MAP with an entry for
  454. // each section it is given.
  455. SectionMapper(SectionMap *map) : map_(map) { }
  456. bool HandleSection(const Section &section) {
  457. (*map_)[section.section_name] = section;
  458. return true;
  459. }
  460. private:
  461. // The map under construction. (WEAK)
  462. SectionMap *map_;
  463. };
  464. bool Reader::MapSegmentSections(const Segment &segment,
  465. SectionMap *section_map) const {
  466. section_map->clear();
  467. SectionMapper mapper(section_map);
  468. return WalkSegmentSections(segment, &mapper);
  469. }
  470. } // namespace mach_o
  471. } // namespace google_breakpad