PageRenderTime 53ms CodeModel.GetById 17ms app.highlight 31ms RepoModel.GetById 1ms app.codeStats 0ms

/thirdparty/breakpad/common/mac/macho_reader.cc

http://github.com/tomahawk-player/tomahawk
C++ | 530 lines | 379 code | 59 blank | 92 comment | 70 complexity | e6e9d5c64d41457eefe1e0e64c1b4e17 MD5 | raw file
  1// Copyright (c) 2010, Google Inc.
  2// All rights reserved.
  3//
  4// Redistribution and use in source and binary forms, with or without
  5// modification, are permitted provided that the following conditions are
  6// met:
  7//
  8//     * Redistributions of source code must retain the above copyright
  9// notice, this list of conditions and the following disclaimer.
 10//     * Redistributions in binary form must reproduce the above
 11// copyright notice, this list of conditions and the following disclaimer
 12// in the documentation and/or other materials provided with the
 13// distribution.
 14//     * Neither the name of Google Inc. nor the names of its
 15// contributors may be used to endorse or promote products derived from
 16// this software without specific prior written permission.
 17//
 18// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
 19// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
 20// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
 21// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
 22// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
 23// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
 24// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
 25// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
 26// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 27// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 28// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 29
 30// Original author: Jim Blandy <jimb@mozilla.com> <jimb@red-bean.com>
 31
 32// macho_reader.cc: Implementation of google_breakpad::Mach_O::FatReader and
 33// google_breakpad::Mach_O::Reader. See macho_reader.h for details.
 34
 35#include "common/mac/macho_reader.h"
 36
 37#include <assert.h>
 38#include <stdio.h>
 39#include <stdlib.h>
 40
 41// Unfortunately, CPU_TYPE_ARM is not define for 10.4.
 42#if !defined(CPU_TYPE_ARM)
 43#define CPU_TYPE_ARM 12
 44#endif
 45
 46namespace google_breakpad {
 47namespace mach_o {
 48
 49// If NDEBUG is #defined, then the 'assert' macro doesn't evaluate its
 50// arguments, so you can't place expressions that do necessary work in
 51// the argument of an assert. Nor can you assign the result of the
 52// expression to a variable and assert that the variable's value is
 53// true: you'll get unused variable warnings when NDEBUG is #defined.
 54//
 55// ASSERT_ALWAYS_EVAL always evaluates its argument, and asserts that
 56// the result is true if NDEBUG is not #defined.
 57#if defined(NDEBUG)
 58#define ASSERT_ALWAYS_EVAL(x) (x)
 59#else
 60#define ASSERT_ALWAYS_EVAL(x) assert(x)
 61#endif
 62
 63void FatReader::Reporter::BadHeader() {
 64  fprintf(stderr, "%s: file is neither a fat binary file"
 65          " nor a Mach-O object file\n", filename_.c_str());
 66}
 67
 68void FatReader::Reporter::TooShort() {
 69  fprintf(stderr, "%s: file too short for the data it claims to contain\n",
 70          filename_.c_str());
 71}
 72
 73void FatReader::Reporter::MisplacedObjectFile() {
 74  fprintf(stderr, "%s: file too short for the object files it claims"
 75          " to contain\n", filename_.c_str());
 76}
 77
 78bool FatReader::Read(const uint8_t *buffer, size_t size) {
 79  buffer_.start = buffer;
 80  buffer_.end = buffer + size;
 81  ByteCursor cursor(&buffer_);
 82
 83  // Fat binaries always use big-endian, so read the magic number in
 84  // that endianness. To recognize Mach-O magic numbers, which can use
 85  // either endianness, check for both the proper and reversed forms
 86  // of the magic numbers.
 87  cursor.set_big_endian(true);
 88  if (cursor >> magic_) {
 89    if (magic_ == FAT_MAGIC) {
 90      // How many object files does this fat binary contain?
 91      uint32_t object_files_count;
 92      if (!(cursor >> object_files_count)) {  // nfat_arch
 93        reporter_->TooShort();
 94        return false;
 95      }
 96
 97      // Read the list of object files.
 98      object_files_.resize(object_files_count);
 99      for (size_t i = 0; i < object_files_count; i++) {
100        struct fat_arch *objfile = &object_files_[i];
101
102        // Read this object file entry, byte-swapping as appropriate.
103        cursor >> objfile->cputype
104               >> objfile->cpusubtype
105               >> objfile->offset
106               >> objfile->size
107               >> objfile->align;
108        if (!cursor) {
109          reporter_->TooShort();
110          return false;
111        }
112        // Does the file actually have the bytes this entry refers to?
113        size_t fat_size = buffer_.Size();
114        if (objfile->offset > fat_size ||
115            objfile->size > fat_size - objfile->offset) {
116          reporter_->MisplacedObjectFile();
117          return false;
118        }
119      }
120
121      return true;
122    } else if (magic_ == MH_MAGIC || magic_ == MH_MAGIC_64 ||
123               magic_ == MH_CIGAM || magic_ == MH_CIGAM_64) {
124      // If this is a little-endian Mach-O file, fix the cursor's endianness.
125      if (magic_ == MH_CIGAM || magic_ == MH_CIGAM_64)
126        cursor.set_big_endian(false);
127      // Record the entire file as a single entry in the object file list.
128      object_files_.resize(1);
129
130      // Get the cpu type and subtype from the Mach-O header.
131      if (!(cursor >> object_files_[0].cputype
132                   >> object_files_[0].cpusubtype)) {
133        reporter_->TooShort();
134        return false;
135      }
136
137      object_files_[0].offset = 0;
138      object_files_[0].size = static_cast<uint32_t>(buffer_.Size());
139      // This alignment is correct for 32 and 64-bit x86 and ppc.
140      // See get_align in the lipo source for other architectures:
141      // http://www.opensource.apple.com/source/cctools/cctools-773/misc/lipo.c
142      object_files_[0].align = 12;  // 2^12 == 4096
143      
144      return true;
145    }
146  }
147  
148  reporter_->BadHeader();
149  return false;
150}
151
152void Reader::Reporter::BadHeader() {
153  fprintf(stderr, "%s: file is not a Mach-O object file\n", filename_.c_str());
154}
155
156void Reader::Reporter::CPUTypeMismatch(cpu_type_t cpu_type,
157                                       cpu_subtype_t cpu_subtype,
158                                       cpu_type_t expected_cpu_type,
159                                       cpu_subtype_t expected_cpu_subtype) {
160  fprintf(stderr, "%s: CPU type %d, subtype %d does not match expected"
161          " type %d, subtype %d\n",
162          filename_.c_str(), cpu_type, cpu_subtype,
163          expected_cpu_type, expected_cpu_subtype);
164}
165
166void Reader::Reporter::HeaderTruncated() {
167  fprintf(stderr, "%s: file does not contain a complete Mach-O header\n",
168          filename_.c_str());
169}
170
171void Reader::Reporter::LoadCommandRegionTruncated() {
172  fprintf(stderr, "%s: file too short to hold load command region"
173          " given in Mach-O header\n", filename_.c_str());
174}
175
176void Reader::Reporter::LoadCommandsOverrun(size_t claimed, size_t i,
177                                           LoadCommandType type) {
178  fprintf(stderr, "%s: file's header claims there are %ld"
179          " load commands, but load command #%ld",
180          filename_.c_str(), claimed, i);
181  if (type) fprintf(stderr, ", of type %d,", type);
182  fprintf(stderr, " extends beyond the end of the load command region\n");
183}
184
185void Reader::Reporter::LoadCommandTooShort(size_t i, LoadCommandType type) {
186  fprintf(stderr, "%s: the contents of load command #%ld, of type %d,"
187          " extend beyond the size given in the load command's header\n",
188          filename_.c_str(), i, type);
189}
190
191void Reader::Reporter::SectionsMissing(const string &name) {
192  fprintf(stderr, "%s: the load command for segment '%s'"
193          " is too short to hold the section headers it claims to have\n",
194          filename_.c_str(), name.c_str());
195}
196
197void Reader::Reporter::MisplacedSegmentData(const string &name) {
198  fprintf(stderr, "%s: the segment '%s' claims its contents lie beyond"
199          " the end of the file\n", filename_.c_str(), name.c_str());
200}
201
202void Reader::Reporter::MisplacedSectionData(const string &section,
203                                            const string &segment) {
204  fprintf(stderr, "%s: the section '%s' in segment '%s'"
205          " claims its contents lie outside the segment's contents\n",
206          filename_.c_str(), section.c_str(), segment.c_str());
207}
208
209void Reader::Reporter::MisplacedSymbolTable() {
210  fprintf(stderr, "%s: the LC_SYMTAB load command claims that the symbol"
211          " table's contents are located beyond the end of the file\n",
212          filename_.c_str());
213}
214
215void Reader::Reporter::UnsupportedCPUType(cpu_type_t cpu_type) {
216  fprintf(stderr, "%s: CPU type %d is not supported\n",
217          filename_.c_str(), cpu_type);
218}
219
220bool Reader::Read(const uint8_t *buffer,
221                  size_t size,
222                  cpu_type_t expected_cpu_type,
223                  cpu_subtype_t expected_cpu_subtype) {
224  assert(!buffer_.start);
225  buffer_.start = buffer;
226  buffer_.end = buffer + size;
227  ByteCursor cursor(&buffer_, true);
228  uint32_t magic;
229  if (!(cursor >> magic)) {
230    reporter_->HeaderTruncated();
231    return false;
232  }
233
234  if (expected_cpu_type != CPU_TYPE_ANY) {
235    uint32_t expected_magic;
236    // validate that magic matches the expected cpu type
237    switch (expected_cpu_type) {
238      case CPU_TYPE_ARM:
239      case CPU_TYPE_I386:
240        expected_magic = MH_CIGAM;
241        break;
242      case CPU_TYPE_POWERPC:
243        expected_magic = MH_MAGIC;
244        break;
245      case CPU_TYPE_X86_64:
246        expected_magic = MH_CIGAM_64;
247        break;
248      case CPU_TYPE_POWERPC64:
249        expected_magic = MH_MAGIC_64;
250        break;
251      default:
252        reporter_->UnsupportedCPUType(expected_cpu_type);
253        return false;
254    }
255
256    if (expected_magic != magic) {
257      reporter_->BadHeader();
258      return false;
259    }
260  }
261
262  // Since the byte cursor is in big-endian mode, a reversed magic number
263  // always indicates a little-endian file, regardless of our own endianness.
264  switch (magic) {
265    case MH_MAGIC:    big_endian_ = true;  bits_64_ = false; break;
266    case MH_CIGAM:    big_endian_ = false; bits_64_ = false; break;
267    case MH_MAGIC_64: big_endian_ = true;  bits_64_ = true;  break;
268    case MH_CIGAM_64: big_endian_ = false; bits_64_ = true;  break;
269    default:
270      reporter_->BadHeader();
271      return false;
272  }
273  cursor.set_big_endian(big_endian_);
274  uint32_t commands_size, reserved;
275  cursor >> cpu_type_ >> cpu_subtype_ >> file_type_ >> load_command_count_
276         >> commands_size >> flags_;
277  if (bits_64_)
278    cursor >> reserved;
279  if (!cursor) {
280    reporter_->HeaderTruncated();
281    return false;
282  }
283
284  if (expected_cpu_type != CPU_TYPE_ANY &&
285      (expected_cpu_type != cpu_type_ ||
286       expected_cpu_subtype != cpu_subtype_)) {
287    reporter_->CPUTypeMismatch(cpu_type_, cpu_subtype_,
288                              expected_cpu_type, expected_cpu_subtype);
289    return false;
290  }
291
292  cursor
293      .PointTo(&load_commands_.start, commands_size)
294      .PointTo(&load_commands_.end, 0);
295  if (!cursor) {
296    reporter_->LoadCommandRegionTruncated();
297    return false;
298  }
299
300  return true;
301}
302
303bool Reader::WalkLoadCommands(Reader::LoadCommandHandler *handler) const {
304  ByteCursor list_cursor(&load_commands_, big_endian_);
305
306  for (size_t index = 0; index < load_command_count_; ++index) {
307    // command refers to this load command alone, so that cursor will
308    // refuse to read past the load command's end. But since we haven't
309    // read the size yet, let command initially refer to the entire
310    // remainder of the load command series.
311    ByteBuffer command(list_cursor.here(), list_cursor.Available());
312    ByteCursor cursor(&command, big_endian_);
313    
314    // Read the command type and size --- fields common to all commands.
315    uint32_t type, size;
316    if (!(cursor >> type)) {
317      reporter_->LoadCommandsOverrun(load_command_count_, index, 0);
318      return false;
319    }
320    if (!(cursor >> size) || size > command.Size()) {
321      reporter_->LoadCommandsOverrun(load_command_count_, index, type);
322      return false;
323    }
324
325    // Now that we've read the length, restrict command's range to this
326    // load command only.
327    command.end = command.start + size;
328
329    switch (type) {
330      case LC_SEGMENT:
331      case LC_SEGMENT_64: {
332        Segment segment;
333        segment.bits_64 = (type == LC_SEGMENT_64);
334        size_t word_size = segment.bits_64 ? 8 : 4;
335        cursor.CString(&segment.name, 16);
336        size_t file_offset, file_size;
337        cursor
338            .Read(word_size, false, &segment.vmaddr)
339            .Read(word_size, false, &segment.vmsize)
340            .Read(word_size, false, &file_offset)
341            .Read(word_size, false, &file_size);
342        cursor >> segment.maxprot
343               >> segment.initprot
344               >> segment.nsects
345               >> segment.flags;
346        if (!cursor) {
347          reporter_->LoadCommandTooShort(index, type);
348          return false;
349        }
350        if (file_offset > buffer_.Size() ||
351            file_size > buffer_.Size() - file_offset) {
352          reporter_->MisplacedSegmentData(segment.name);
353          return false;
354        }
355        // Mach-O files in .dSYM bundles have the contents of the loaded
356        // segments removed, and their file offsets and file sizes zeroed
357        // out. To help us handle this special case properly, give such
358        // segments' contents NULL starting and ending pointers.
359        if (file_offset == 0 && file_size == 0) {
360          segment.contents.start = segment.contents.end = NULL;
361        } else {
362          segment.contents.start = buffer_.start + file_offset;
363          segment.contents.end = segment.contents.start + file_size;
364        }
365        // The section list occupies the remainder of this load command's space.
366        segment.section_list.start = cursor.here();
367        segment.section_list.end = command.end;
368
369        if (!handler->SegmentCommand(segment))
370          return false;
371        break;
372      }
373
374      case LC_SYMTAB: {
375        uint32_t symoff, nsyms, stroff, strsize;
376        cursor >> symoff >> nsyms >> stroff >> strsize;
377        if (!cursor) {
378          reporter_->LoadCommandTooShort(index, type);
379          return false;
380        }
381        // How big are the entries in the symbol table?
382        // sizeof(struct nlist_64) : sizeof(struct nlist),
383        // but be paranoid about alignment vs. target architecture.
384        size_t symbol_size = bits_64_ ? 16 : 12;
385        // How big is the entire symbol array?
386        size_t symbols_size = nsyms * symbol_size;
387        if (symoff > buffer_.Size() || symbols_size > buffer_.Size() - symoff ||
388            stroff > buffer_.Size() || strsize > buffer_.Size() - stroff) {
389          reporter_->MisplacedSymbolTable();
390          return false;
391        }
392        ByteBuffer entries(buffer_.start + symoff, symbols_size);
393        ByteBuffer names(buffer_.start + stroff, strsize);
394        if (!handler->SymtabCommand(entries, names))
395          return false;
396        break;
397      }
398      
399      default: {
400        if (!handler->UnknownCommand(type, command))
401          return false;
402        break;
403      }
404    }
405
406    list_cursor.set_here(command.end);
407  }
408
409  return true;
410}
411
412// A load command handler that looks for a segment of a given name.
413class Reader::SegmentFinder : public LoadCommandHandler {
414 public:
415  // Create a load command handler that looks for a segment named NAME,
416  // and sets SEGMENT to describe it if found.
417  SegmentFinder(const string &name, Segment *segment) 
418      : name_(name), segment_(segment), found_() { }
419
420  // Return true if the traversal found the segment, false otherwise.
421  bool found() const { return found_; }
422
423  bool SegmentCommand(const Segment &segment) {
424    if (segment.name == name_) {
425      *segment_ = segment;
426      found_ = true;
427      return false;
428    }
429    return true;
430  }
431
432 private:
433  // The name of the segment our creator is looking for.
434  const string &name_;
435
436  // Where we should store the segment if found. (WEAK)
437  Segment *segment_;
438
439  // True if we found the segment.
440  bool found_;
441};
442
443bool Reader::FindSegment(const string &name, Segment *segment) const {
444  SegmentFinder finder(name, segment);
445  WalkLoadCommands(&finder);
446  return finder.found();
447}
448
449bool Reader::WalkSegmentSections(const Segment &segment,
450                                 SectionHandler *handler) const {
451  size_t word_size = segment.bits_64 ? 8 : 4;
452  ByteCursor cursor(&segment.section_list, big_endian_);
453
454  for (size_t i = 0; i < segment.nsects; i++) {
455    Section section;
456    section.bits_64 = segment.bits_64;
457    uint64_t size;
458    uint32_t offset, dummy32;
459    cursor
460        .CString(&section.section_name, 16)
461        .CString(&section.segment_name, 16)
462        .Read(word_size, false, &section.address)
463        .Read(word_size, false, &size)
464        >> offset
465        >> section.align
466        >> dummy32
467        >> dummy32
468        >> section.flags
469        >> dummy32
470        >> dummy32;
471    if (section.bits_64)
472      cursor >> dummy32;
473    if (!cursor) {
474      reporter_->SectionsMissing(segment.name);
475      return false;
476    }
477    if ((section.flags & SECTION_TYPE) == S_ZEROFILL) {
478      // Zero-fill sections have a size, but no contents.
479      section.contents.start = section.contents.end = NULL;
480    } else if (segment.contents.start == NULL && 
481               segment.contents.end == NULL) {
482      // Mach-O files in .dSYM bundles have the contents of the loaded
483      // segments removed, and their file offsets and file sizes zeroed
484      // out.  However, the sections within those segments still have
485      // non-zero sizes.  There's no reason to call MisplacedSectionData in
486      // this case; the caller may just need the section's load
487      // address. But do set the contents' limits to NULL, for safety.
488      section.contents.start = section.contents.end = NULL;
489    } else {
490      if (offset < size_t(segment.contents.start - buffer_.start) ||
491          offset > size_t(segment.contents.end - buffer_.start) ||
492          size > size_t(segment.contents.end - buffer_.start - offset)) {
493        reporter_->MisplacedSectionData(section.section_name,
494                                        section.segment_name);
495        return false;
496      }
497      section.contents.start = buffer_.start + offset;
498      section.contents.end = section.contents.start + size;
499    }
500    if (!handler->HandleSection(section))
501      return false;
502  }
503  return true;
504}
505
506// A SectionHandler that builds a SectionMap for the sections within a
507// given segment.
508class Reader::SectionMapper: public SectionHandler {
509 public:
510  // Create a SectionHandler that populates MAP with an entry for
511  // each section it is given.
512  SectionMapper(SectionMap *map) : map_(map) { }
513  bool HandleSection(const Section &section) {
514    (*map_)[section.section_name] = section;
515    return true;
516  }
517 private:
518  // The map under construction. (WEAK)
519  SectionMap *map_;
520};
521
522bool Reader::MapSegmentSections(const Segment &segment,
523                                SectionMap *section_map) const {
524  section_map->clear();
525  SectionMapper mapper(section_map);
526  return WalkSegmentSections(segment, &mapper);
527}
528
529}  // namespace mach_o
530}  // namespace google_breakpad