PageRenderTime 66ms CodeModel.GetById 8ms app.highlight 51ms RepoModel.GetById 1ms app.codeStats 0ms

/thirdparty/breakpad/common/linux/dump_symbols.cc

http://github.com/tomahawk-player/tomahawk
C++ | 826 lines | 601 code | 81 blank | 144 comment | 91 complexity | c0b62a4995393a1dd7da0b564094571b MD5 | raw file
  1// Copyright (c) 2011 Google Inc.
  2// All rights reserved.
  3//
  4// Redistribution and use in source and binary forms, with or without
  5// modification, are permitted provided that the following conditions are
  6// met:
  7//
  8//     * Redistributions of source code must retain the above copyright
  9// notice, this list of conditions and the following disclaimer.
 10//     * Redistributions in binary form must reproduce the above
 11// copyright notice, this list of conditions and the following disclaimer
 12// in the documentation and/or other materials provided with the
 13// distribution.
 14//     * Neither the name of Google Inc. nor the names of its
 15// contributors may be used to endorse or promote products derived from
 16// this software without specific prior written permission.
 17//
 18// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
 19// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
 20// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
 21// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
 22// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
 23// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
 24// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
 25// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
 26// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 27// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 28// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 29
 30// Restructured in 2009 by: Jim Blandy <jimb@mozilla.com> <jimb@red-bean.com>
 31
 32// dump_symbols.cc: implement google_breakpad::WriteSymbolFile:
 33// Find all the debugging info in a file and dump it as a Breakpad symbol file.
 34
 35#include "common/linux/dump_symbols.h"
 36
 37#include <assert.h>
 38#include <elf.h>
 39#include <errno.h>
 40#include <fcntl.h>
 41#include <link.h>
 42#include <stdio.h>
 43#include <stdlib.h>
 44#include <string.h>
 45#include <sys/mman.h>
 46#include <sys/stat.h>
 47#include <unistd.h>
 48
 49#include <iostream>
 50#include <set>
 51#include <string>
 52#include <utility>
 53#include <vector>
 54
 55#include "common/dwarf/bytereader-inl.h"
 56#include "common/dwarf/dwarf2diehandler.h"
 57#include "common/dwarf_cfi_to_module.h"
 58#include "common/dwarf_cu_to_module.h"
 59#include "common/dwarf_line_to_module.h"
 60#include "common/linux/elf_symbols_to_module.h"
 61#include "common/linux/file_id.h"
 62#include "common/module.h"
 63#include "common/stabs_reader.h"
 64#include "common/stabs_to_module.h"
 65
 66// This namespace contains helper functions.
 67namespace {
 68
 69using google_breakpad::DwarfCFIToModule;
 70using google_breakpad::DwarfCUToModule;
 71using google_breakpad::DwarfLineToModule;
 72using google_breakpad::Module;
 73using google_breakpad::StabsToModule;
 74
 75//
 76// FDWrapper
 77//
 78// Wrapper class to make sure opened file is closed.
 79//
 80class FDWrapper {
 81 public:
 82  explicit FDWrapper(int fd) :
 83    fd_(fd) {}
 84  ~FDWrapper() {
 85    if (fd_ != -1)
 86      close(fd_);
 87  }
 88  int get() {
 89    return fd_;
 90  }
 91  int release() {
 92    int fd = fd_;
 93    fd_ = -1;
 94    return fd;
 95  }
 96 private:
 97  int fd_;
 98};
 99
100//
101// MmapWrapper
102//
103// Wrapper class to make sure mapped regions are unmapped.
104//
105class MmapWrapper {
106 public:
107  MmapWrapper() : is_set_(false) {}
108  ~MmapWrapper() {
109    assert(is_set_);
110    if (base_ != NULL) {
111      assert(size_ > 0);
112      munmap(base_, size_);
113    }
114  }
115  void set(void *mapped_address, size_t mapped_size) {
116    is_set_ = true;
117    base_ = mapped_address;
118    size_ = mapped_size;
119  }
120  void release() {
121    assert(is_set_);
122    base_ = NULL;
123    size_ = 0;
124  }
125
126 private:
127  bool is_set_;
128  void *base_;
129  size_t size_;
130};
131
132
133// Fix offset into virtual address by adding the mapped base into offsets.
134// Make life easier when want to find something by offset.
135static void FixAddress(void *obj_base) {
136  ElfW(Addr) base = reinterpret_cast<ElfW(Addr)>(obj_base);
137  ElfW(Ehdr) *elf_header = static_cast<ElfW(Ehdr) *>(obj_base);
138  elf_header->e_phoff += base;
139  elf_header->e_shoff += base;
140  ElfW(Shdr) *sections = reinterpret_cast<ElfW(Shdr) *>(elf_header->e_shoff);
141  for (int i = 0; i < elf_header->e_shnum; ++i)
142    sections[i].sh_offset += base;
143}
144
145// Find the preferred loading address of the binary.
146static ElfW(Addr) GetLoadingAddress(const ElfW(Phdr) *program_headers,
147                                    int nheader) {
148  for (int i = 0; i < nheader; ++i) {
149    const ElfW(Phdr) &header = program_headers[i];
150    // For executable, it is the PT_LOAD segment with offset to zero.
151    if (header.p_type == PT_LOAD &&
152        header.p_offset == 0)
153      return header.p_vaddr;
154  }
155  // For other types of ELF, return 0.
156  return 0;
157}
158
159static bool IsValidElf(const ElfW(Ehdr) *elf_header) {
160  return memcmp(elf_header, ELFMAG, SELFMAG) == 0;
161}
162
163static const ElfW(Shdr) *FindSectionByName(const char *name,
164                                           const ElfW(Shdr) *sections,
165                                           const ElfW(Shdr) *section_names,
166                                           int nsection) {
167  assert(name != NULL);
168  assert(sections != NULL);
169  assert(nsection > 0);
170
171  int name_len = strlen(name);
172  if (name_len == 0)
173    return NULL;
174
175  // Find the end of the section name section, to make sure that
176  // comparisons don't run off the end of the section.
177  const char *names_end =
178    reinterpret_cast<char*>(section_names->sh_offset + section_names->sh_size);
179
180  for (int i = 0; i < nsection; ++i) {
181    const char *section_name =
182      reinterpret_cast<char*>(section_names->sh_offset + sections[i].sh_name);
183    if (names_end - section_name >= name_len + 1 &&
184        strcmp(name, section_name) == 0) {
185      if (sections[i].sh_type == SHT_NOBITS) {
186        fprintf(stderr,
187                "Section %s found, but ignored because type=SHT_NOBITS.\n",
188                name);
189        return NULL;
190      }
191      return sections + i;
192    }
193  }
194  return NULL;
195}
196
197static bool LoadStabs(const ElfW(Ehdr) *elf_header,
198                      const ElfW(Shdr) *stab_section,
199                      const ElfW(Shdr) *stabstr_section,
200                      const bool big_endian,
201                      Module *module) {
202  // A callback object to handle data from the STABS reader.
203  StabsToModule handler(module);
204  // Find the addresses of the STABS data, and create a STABS reader object.
205  // On Linux, STABS entries always have 32-bit values, regardless of the
206  // address size of the architecture whose code they're describing, and
207  // the strings are always "unitized".
208  uint8_t *stabs = reinterpret_cast<uint8_t *>(stab_section->sh_offset);
209  uint8_t *stabstr = reinterpret_cast<uint8_t *>(stabstr_section->sh_offset);
210  google_breakpad::StabsReader reader(stabs, stab_section->sh_size,
211                                      stabstr, stabstr_section->sh_size,
212                                      big_endian, 4, true, &handler);
213  // Read the STABS data, and do post-processing.
214  if (!reader.Process())
215    return false;
216  handler.Finalize();
217  return true;
218}
219
220// A line-to-module loader that accepts line number info parsed by
221// dwarf2reader::LineInfo and populates a Module and a line vector
222// with the results.
223class DumperLineToModule: public DwarfCUToModule::LineToModuleFunctor {
224 public:
225  // Create a line-to-module converter using BYTE_READER.
226  explicit DumperLineToModule(dwarf2reader::ByteReader *byte_reader)
227      : byte_reader_(byte_reader) { }
228  void operator()(const char *program, uint64 length,
229                  Module *module, std::vector<Module::Line> *lines) {
230    DwarfLineToModule handler(module, lines);
231    dwarf2reader::LineInfo parser(program, length, byte_reader_, &handler);
232    parser.Start();
233  }
234 private:
235  dwarf2reader::ByteReader *byte_reader_;
236};
237
238static bool LoadDwarf(const std::string &dwarf_filename,
239                      const ElfW(Ehdr) *elf_header,
240                      const bool big_endian,
241                      Module *module) {
242  const dwarf2reader::Endianness endianness = big_endian ?
243      dwarf2reader::ENDIANNESS_BIG : dwarf2reader::ENDIANNESS_LITTLE;
244  dwarf2reader::ByteReader byte_reader(endianness);
245
246  // Construct a context for this file.
247  DwarfCUToModule::FileContext file_context(dwarf_filename, module);
248
249  // Build a map of the ELF file's sections.
250  const ElfW(Shdr) *sections
251      = reinterpret_cast<ElfW(Shdr) *>(elf_header->e_shoff);
252  int num_sections = elf_header->e_shnum;
253  const ElfW(Shdr) *section_names = sections + elf_header->e_shstrndx;
254  for (int i = 0; i < num_sections; i++) {
255    const ElfW(Shdr) *section = &sections[i];
256    std::string name = reinterpret_cast<const char *>(section_names->sh_offset +
257                                                      section->sh_name);
258    const char *contents = reinterpret_cast<const char *>(section->sh_offset);
259    uint64 length = section->sh_size;
260    file_context.section_map[name] = std::make_pair(contents, length);
261  }
262
263  // Parse all the compilation units in the .debug_info section.
264  DumperLineToModule line_to_module(&byte_reader);
265  std::pair<const char *, uint64> debug_info_section
266      = file_context.section_map[".debug_info"];
267  // We should never have been called if the file doesn't have a
268  // .debug_info section.
269  assert(debug_info_section.first);
270  uint64 debug_info_length = debug_info_section.second;
271  for (uint64 offset = 0; offset < debug_info_length;) {
272    // Make a handler for the root DIE that populates MODULE with the
273    // data we find.
274    DwarfCUToModule::WarningReporter reporter(dwarf_filename, offset);
275    DwarfCUToModule root_handler(&file_context, &line_to_module, &reporter);
276    // Make a Dwarf2Handler that drives our DIEHandler.
277    dwarf2reader::DIEDispatcher die_dispatcher(&root_handler);
278    // Make a DWARF parser for the compilation unit at OFFSET.
279    dwarf2reader::CompilationUnit reader(file_context.section_map,
280                                         offset,
281                                         &byte_reader,
282                                         &die_dispatcher);
283    // Process the entire compilation unit; get the offset of the next.
284    offset += reader.Start();
285  }
286  return true;
287}
288
289// Fill REGISTER_NAMES with the register names appropriate to the
290// machine architecture given in HEADER, indexed by the register
291// numbers used in DWARF call frame information. Return true on
292// success, or false if we don't recognize HEADER's machine
293// architecture.
294static bool DwarfCFIRegisterNames(const ElfW(Ehdr) *elf_header,
295                                  std::vector<std::string> *register_names) {
296  switch (elf_header->e_machine) {
297    case EM_386:
298      *register_names = DwarfCFIToModule::RegisterNames::I386();
299      return true;
300    case EM_ARM:
301      *register_names = DwarfCFIToModule::RegisterNames::ARM();
302      return true;
303    case EM_X86_64:
304      *register_names = DwarfCFIToModule::RegisterNames::X86_64();
305      return true;
306    default:
307      return false;
308  }
309}
310
311static bool LoadDwarfCFI(const std::string &dwarf_filename,
312                         const ElfW(Ehdr) *elf_header,
313                         const char *section_name,
314                         const ElfW(Shdr) *section,
315                         const bool eh_frame,
316                         const ElfW(Shdr) *got_section,
317                         const ElfW(Shdr) *text_section,
318                         const bool big_endian,
319                         Module *module) {
320  // Find the appropriate set of register names for this file's
321  // architecture.
322  std::vector<std::string> register_names;
323  if (!DwarfCFIRegisterNames(elf_header, &register_names)) {
324    fprintf(stderr, "%s: unrecognized ELF machine architecture '%d';"
325            " cannot convert DWARF call frame information\n",
326            dwarf_filename.c_str(), elf_header->e_machine);
327    return false;
328  }
329
330  const dwarf2reader::Endianness endianness = big_endian ?
331      dwarf2reader::ENDIANNESS_BIG : dwarf2reader::ENDIANNESS_LITTLE;
332
333  // Find the call frame information and its size.
334  const char *cfi = reinterpret_cast<const char *>(section->sh_offset);
335  size_t cfi_size = section->sh_size;
336
337  // Plug together the parser, handler, and their entourages.
338  DwarfCFIToModule::Reporter module_reporter(dwarf_filename, section_name);
339  DwarfCFIToModule handler(module, register_names, &module_reporter);
340  dwarf2reader::ByteReader byte_reader(endianness);
341  // Since we're using the ElfW macro, we're not actually capable of
342  // processing both ELF32 and ELF64 files with the same program; that
343  // would take a bit more work. But this will work out well enough.
344  if (elf_header->e_ident[EI_CLASS] == ELFCLASS32)
345    byte_reader.SetAddressSize(4);
346  else if (elf_header->e_ident[EI_CLASS] == ELFCLASS64)
347    byte_reader.SetAddressSize(8);
348  else {
349    fprintf(stderr, "%s: bad file class in ELF header: %d\n",
350            dwarf_filename.c_str(), elf_header->e_ident[EI_CLASS]);
351    return false;
352  }
353  // Provide the base addresses for .eh_frame encoded pointers, if
354  // possible.
355  byte_reader.SetCFIDataBase(section->sh_addr, cfi);
356  if (got_section)
357    byte_reader.SetDataBase(got_section->sh_addr);
358  if (text_section)
359    byte_reader.SetTextBase(text_section->sh_addr);
360
361  dwarf2reader::CallFrameInfo::Reporter dwarf_reporter(dwarf_filename,
362                                                       section_name);
363  dwarf2reader::CallFrameInfo parser(cfi, cfi_size,
364                                     &byte_reader, &handler, &dwarf_reporter,
365                                     eh_frame);
366  parser.Start();
367  return true;
368}
369
370bool LoadELF(const std::string &obj_file, MmapWrapper* map_wrapper,
371             ElfW(Ehdr) **elf_header) {
372  int obj_fd = open(obj_file.c_str(), O_RDONLY);
373  if (obj_fd < 0) {
374    fprintf(stderr, "Failed to open ELF file '%s': %s\n",
375            obj_file.c_str(), strerror(errno));
376    return false;
377  }
378  FDWrapper obj_fd_wrapper(obj_fd);
379  struct stat st;
380  if (fstat(obj_fd, &st) != 0 && st.st_size <= 0) {
381    fprintf(stderr, "Unable to fstat ELF file '%s': %s\n",
382            obj_file.c_str(), strerror(errno));
383    return false;
384  }
385  void *obj_base = mmap(NULL, st.st_size,
386                        PROT_READ | PROT_WRITE, MAP_PRIVATE, obj_fd, 0);
387  if (obj_base == MAP_FAILED) {
388    fprintf(stderr, "Failed to mmap ELF file '%s': %s\n",
389            obj_file.c_str(), strerror(errno));
390    return false;
391  }
392  map_wrapper->set(obj_base, st.st_size);
393  *elf_header = reinterpret_cast<ElfW(Ehdr) *>(obj_base);
394  if (!IsValidElf(*elf_header)) {
395    fprintf(stderr, "Not a valid ELF file: %s\n", obj_file.c_str());
396    return false;
397  }
398  return true;
399}
400
401// Get the endianness of ELF_HEADER. If it's invalid, return false.
402bool ElfEndianness(const ElfW(Ehdr) *elf_header, bool *big_endian) {
403  if (elf_header->e_ident[EI_DATA] == ELFDATA2LSB) {
404    *big_endian = false;
405    return true;
406  }
407  if (elf_header->e_ident[EI_DATA] == ELFDATA2MSB) {
408    *big_endian = true;
409    return true;
410  }
411
412  fprintf(stderr, "bad data encoding in ELF header: %d\n",
413          elf_header->e_ident[EI_DATA]);
414  return false;
415}
416
417// Read the .gnu_debuglink and get the debug file name. If anything goes
418// wrong, return an empty string.
419static std::string ReadDebugLink(const ElfW(Shdr) *debuglink_section,
420                                 const std::string &obj_file,
421                                 const std::string &debug_dir) {
422  char *debuglink = reinterpret_cast<char *>(debuglink_section->sh_offset);
423  size_t debuglink_len = strlen(debuglink) + 5;  // '\0' + CRC32.
424  debuglink_len = 4 * ((debuglink_len + 3) / 4);  // Round to nearest 4 bytes.
425
426  // Sanity check.
427  if (debuglink_len != debuglink_section->sh_size) {
428    fprintf(stderr, "Mismatched .gnu_debuglink string / section size: "
429            "%zx %zx\n", debuglink_len, debuglink_section->sh_size);
430    return "";
431  }
432
433  std::string debuglink_path = debug_dir + "/" + debuglink;
434  int debuglink_fd = open(debuglink_path.c_str(), O_RDONLY);
435  if (debuglink_fd < 0) {
436    fprintf(stderr, "Failed to open debug ELF file '%s' for '%s': %s\n",
437            debuglink_path.c_str(), obj_file.c_str(), strerror(errno));
438    return "";
439  }
440  FDWrapper debuglink_fd_wrapper(debuglink_fd);
441  // TODO(thestig) check the CRC-32 at the end of the .gnu_debuglink
442  // section.
443
444  return debuglink_path;
445}
446
447//
448// LoadSymbolsInfo
449//
450// Holds the state between the two calls to LoadSymbols() in case we have to
451// follow the .gnu_debuglink section and load debug information from a
452// different file.
453//
454class LoadSymbolsInfo {
455 public:
456  explicit LoadSymbolsInfo(const std::string &dbg_dir) :
457    debug_dir_(dbg_dir),
458    has_loading_addr_(false) {}
459
460  // Keeps track of which sections have been loaded so we don't accidentally
461  // load it twice from two different files.
462  void LoadedSection(const std::string &section) {
463    if (loaded_sections_.count(section) == 0) {
464      loaded_sections_.insert(section);
465    } else {
466      fprintf(stderr, "Section %s has already been loaded.\n",
467              section.c_str());
468    }
469  }
470
471  // We expect the ELF file and linked debug file to have the same preferred
472  // loading address.
473  void set_loading_addr(ElfW(Addr) addr, const std::string &filename) {
474    if (!has_loading_addr_) {
475      loading_addr_ = addr;
476      loaded_file_ = filename;
477      return;
478    }
479
480    if (addr != loading_addr_) {
481      fprintf(stderr,
482              "ELF file '%s' and debug ELF file '%s' "
483              "have different load addresses.\n",
484              loaded_file_.c_str(), filename.c_str());
485      assert(false);
486    }
487  }
488
489  // Setters and getters
490  const std::string &debug_dir() const {
491    return debug_dir_;
492  }
493
494  std::string debuglink_file() const {
495    return debuglink_file_;
496  }
497  void set_debuglink_file(std::string file) {
498    debuglink_file_ = file;
499  }
500
501 private:
502  const std::string &debug_dir_;  // Directory with the debug ELF file.
503
504  std::string debuglink_file_;  // Full path to the debug ELF file.
505
506  bool has_loading_addr_;  // Indicate if LOADING_ADDR_ is valid.
507
508  ElfW(Addr) loading_addr_;  // Saves the preferred loading address from the
509                             // first call to LoadSymbols().
510
511  std::string loaded_file_;  // Name of the file loaded from the first call to
512                             // LoadSymbols().
513
514  std::set<std::string> loaded_sections_;  // Tracks the Loaded ELF sections
515                                           // between calls to LoadSymbols().
516};
517
518static bool LoadSymbols(const std::string &obj_file,
519                        const bool big_endian,
520                        ElfW(Ehdr) *elf_header,
521                        const bool read_gnu_debug_link,
522                        LoadSymbolsInfo *info,
523                        Module *module) {
524  // Translate all offsets in section headers into address.
525  FixAddress(elf_header);
526  ElfW(Addr) loading_addr = GetLoadingAddress(
527      reinterpret_cast<ElfW(Phdr) *>(elf_header->e_phoff),
528      elf_header->e_phnum);
529  module->SetLoadAddress(loading_addr);
530  info->set_loading_addr(loading_addr, obj_file);
531
532  const ElfW(Shdr) *sections =
533      reinterpret_cast<ElfW(Shdr) *>(elf_header->e_shoff);
534  const ElfW(Shdr) *section_names = sections + elf_header->e_shstrndx;
535  bool found_debug_info_section = false;
536  bool found_usable_info = false;
537
538  // Look for STABS debugging information, and load it if present.
539  const ElfW(Shdr) *stab_section
540      = FindSectionByName(".stab", sections, section_names,
541                          elf_header->e_shnum);
542  if (stab_section) {
543    const ElfW(Shdr) *stabstr_section = stab_section->sh_link + sections;
544    if (stabstr_section) {
545      found_debug_info_section = true;
546      found_usable_info = true;
547      info->LoadedSection(".stab");
548      if (!LoadStabs(elf_header, stab_section, stabstr_section, big_endian,
549                     module)) {
550        fprintf(stderr, "%s: \".stab\" section found, but failed to load STABS"
551                " debugging information\n", obj_file.c_str());
552      }
553    }
554  }
555
556  // Look for DWARF debugging information, and load it if present.
557  const ElfW(Shdr) *dwarf_section
558      = FindSectionByName(".debug_info", sections, section_names,
559                          elf_header->e_shnum);
560  if (dwarf_section) {
561    found_debug_info_section = true;
562    found_usable_info = true;
563    info->LoadedSection(".debug_info");
564    if (!LoadDwarf(obj_file, elf_header, big_endian, module))
565      fprintf(stderr, "%s: \".debug_info\" section found, but failed to load "
566              "DWARF debugging information\n", obj_file.c_str());
567  }
568
569  // Dwarf Call Frame Information (CFI) is actually independent from
570  // the other DWARF debugging information, and can be used alone.
571  const ElfW(Shdr) *dwarf_cfi_section =
572      FindSectionByName(".debug_frame", sections, section_names,
573                          elf_header->e_shnum);
574  if (dwarf_cfi_section) {
575    // Ignore the return value of this function; even without call frame
576    // information, the other debugging information could be perfectly
577    // useful.
578    info->LoadedSection(".debug_frame");
579    bool result =
580      LoadDwarfCFI(obj_file, elf_header, ".debug_frame",
581                   dwarf_cfi_section, false, 0, 0, big_endian, module);
582    found_usable_info = found_usable_info || result;
583  }
584
585  // Linux C++ exception handling information can also provide
586  // unwinding data.
587  const ElfW(Shdr) *eh_frame_section =
588      FindSectionByName(".eh_frame", sections, section_names,
589                        elf_header->e_shnum);
590  if (eh_frame_section) {
591    // Pointers in .eh_frame data may be relative to the base addresses of
592    // certain sections. Provide those sections if present.
593    const ElfW(Shdr) *got_section =
594      FindSectionByName(".got", sections, section_names, elf_header->e_shnum);
595    const ElfW(Shdr) *text_section =
596      FindSectionByName(".text", sections, section_names,
597                        elf_header->e_shnum);
598    info->LoadedSection(".eh_frame");
599    // As above, ignore the return value of this function.
600    bool result =
601      LoadDwarfCFI(obj_file, elf_header, ".eh_frame", eh_frame_section, true,
602                   got_section, text_section, big_endian, module);
603    found_usable_info = found_usable_info || result;
604  }
605
606  if (!found_debug_info_section) {
607    fprintf(stderr, "%s: file contains no debugging information"
608            " (no \".stab\" or \".debug_info\" sections)\n",
609            obj_file.c_str());
610
611    // Failed, but maybe we can find a .gnu_debuglink section?
612    if (read_gnu_debug_link) {
613      const ElfW(Shdr) *gnu_debuglink_section
614          = FindSectionByName(".gnu_debuglink", sections, section_names,
615                              elf_header->e_shnum);
616      if (gnu_debuglink_section) {
617        if (!info->debug_dir().empty()) {
618          std::string debuglink_file =
619              ReadDebugLink(gnu_debuglink_section, obj_file, info->debug_dir());
620          info->set_debuglink_file(debuglink_file);
621        } else {
622          fprintf(stderr, ".gnu_debuglink section found in '%s', "
623                  "but no debug path specified.\n", obj_file.c_str());
624        }
625      } else {
626        fprintf(stderr, "%s does not contain a .gnu_debuglink section.\n",
627                obj_file.c_str());
628      }
629    } else {
630      // The caller doesn't want to consult .gnu_debuglink.
631      // See if there are export symbols available.
632      const ElfW(Shdr) *dynsym_section =
633        FindSectionByName(".dynsym", sections, section_names,
634                          elf_header->e_shnum);
635      const ElfW(Shdr) *dynstr_section =
636        FindSectionByName(".dynstr", sections, section_names,
637                          elf_header->e_shnum);
638      if (dynsym_section && dynstr_section) {
639        info->LoadedSection(".dynsym");
640        fprintf(stderr, "Have .dynsym + .dynstr\n");
641
642        uint8_t* dynsyms =
643          reinterpret_cast<uint8_t*>(dynsym_section->sh_offset);
644        uint8_t* dynstrs =
645          reinterpret_cast<uint8_t*>(dynstr_section->sh_offset);
646        bool result =
647          ELFSymbolsToModule(dynsyms,
648                             dynsym_section->sh_size,
649                             dynstrs,
650                             dynstr_section->sh_size,
651                             big_endian,
652                             // This could change to something more useful
653                             // when support for dumping cross-architecture
654                             // symbols is finished.
655                             sizeof(ElfW(Addr)),
656                             module);
657        found_usable_info = found_usable_info || result;
658      }
659
660      // Return true if some usable information was found, since
661      // the caller doesn't want to use .gnu_debuglink.
662      return found_usable_info;
663    }
664
665    // No debug info was found, let the user try again with .gnu_debuglink
666    // if present.
667    return false;
668  }
669
670  return true;
671}
672
673// Return the breakpad symbol file identifier for the architecture of
674// ELF_HEADER.
675const char *ElfArchitecture(const ElfW(Ehdr) *elf_header) {
676  ElfW(Half) arch = elf_header->e_machine;
677  switch (arch) {
678    case EM_386:        return "x86";
679    case EM_ARM:        return "arm";
680    case EM_MIPS:       return "mips";
681    case EM_PPC64:      return "ppc64";
682    case EM_PPC:        return "ppc";
683    case EM_S390:       return "s390";
684    case EM_SPARC:      return "sparc";
685    case EM_SPARCV9:    return "sparcv9";
686    case EM_X86_64:     return "x86_64";
687    default: return NULL;
688  }
689}
690
691// Format the Elf file identifier in IDENTIFIER as a UUID with the
692// dashes removed.
693std::string FormatIdentifier(unsigned char identifier[16]) {
694  char identifier_str[40];
695  google_breakpad::FileID::ConvertIdentifierToString(
696      identifier,
697      identifier_str,
698      sizeof(identifier_str));
699  std::string id_no_dash;
700  for (int i = 0; identifier_str[i] != '\0'; ++i)
701    if (identifier_str[i] != '-')
702      id_no_dash += identifier_str[i];
703  // Add an extra "0" by the end.  PDB files on Windows have an 'age'
704  // number appended to the end of the file identifier; this isn't
705  // really used or necessary on other platforms, but let's preserve
706  // the pattern.
707  id_no_dash += '0';
708  return id_no_dash;
709}
710
711// Return the non-directory portion of FILENAME: the portion after the
712// last slash, or the whole filename if there are no slashes.
713std::string BaseFileName(const std::string &filename) {
714  // Lots of copies!  basename's behavior is less than ideal.
715  char *c_filename = strdup(filename.c_str());
716  std::string base = basename(c_filename);
717  free(c_filename);
718  return base;
719}
720
721}  // namespace
722
723namespace google_breakpad {
724
725// Not explicitly exported, but not static so it can be used in unit tests.
726// Ideally obj_file would be const, but internally this code does write
727// to some ELF header fields to make its work simpler.
728bool WriteSymbolFileInternal(uint8_t* obj_file,
729                             const std::string &obj_filename,
730                             const std::string &debug_dir,
731                             bool cfi,
732                             std::ostream &sym_stream) {
733  ElfW(Ehdr) *elf_header = reinterpret_cast<ElfW(Ehdr) *>(obj_file);
734
735  if (!IsValidElf(elf_header)) {
736    fprintf(stderr, "Not a valid ELF file: %s\n", obj_filename.c_str());
737    return false;
738  }
739
740  unsigned char identifier[16];
741  if (!google_breakpad::FileID::ElfFileIdentifierFromMappedFile(elf_header,
742                                                                identifier)) {
743    fprintf(stderr, "%s: unable to generate file identifier\n",
744            obj_filename.c_str());
745    return false;
746  }
747
748  const char *architecture = ElfArchitecture(elf_header);
749  if (!architecture) {
750    fprintf(stderr, "%s: unrecognized ELF machine architecture: %d\n",
751            obj_filename.c_str(), elf_header->e_machine);
752    return false;
753  }
754
755  // Figure out what endianness this file is.
756  bool big_endian;
757  if (!ElfEndianness(elf_header, &big_endian))
758    return false;
759
760  std::string name = BaseFileName(obj_filename);
761  std::string os = "Linux";
762  std::string id = FormatIdentifier(identifier);
763
764  LoadSymbolsInfo info(debug_dir);
765  Module module(name, os, architecture, id);
766  if (!LoadSymbols(obj_filename, big_endian, elf_header, !debug_dir.empty(),
767                   &info, &module)) {
768    const std::string debuglink_file = info.debuglink_file();
769    if (debuglink_file.empty())
770      return false;
771
772    // Load debuglink ELF file.
773    fprintf(stderr, "Found debugging info in %s\n", debuglink_file.c_str());
774    MmapWrapper debug_map_wrapper;
775    ElfW(Ehdr) *debug_elf_header = NULL;
776    if (!LoadELF(debuglink_file, &debug_map_wrapper, &debug_elf_header))
777      return false;
778    // Sanity checks to make sure everything matches up.
779    const char *debug_architecture = ElfArchitecture(debug_elf_header);
780    if (!debug_architecture) {
781      fprintf(stderr, "%s: unrecognized ELF machine architecture: %d\n",
782              debuglink_file.c_str(), debug_elf_header->e_machine);
783      return false;
784    }
785    if (strcmp(architecture, debug_architecture)) {
786      fprintf(stderr, "%s with ELF machine architecture %s does not match "
787              "%s with ELF architecture %s\n",
788              debuglink_file.c_str(), debug_architecture,
789              obj_filename.c_str(), architecture);
790      return false;
791    }
792
793    bool debug_big_endian;
794    if (!ElfEndianness(debug_elf_header, &debug_big_endian))
795      return false;
796    if (debug_big_endian != big_endian) {
797      fprintf(stderr, "%s and %s does not match in endianness\n",
798              obj_filename.c_str(), debuglink_file.c_str());
799      return false;
800    }
801
802    if (!LoadSymbols(debuglink_file, debug_big_endian, debug_elf_header,
803                     false, &info, &module)) {
804      return false;
805    }
806  }
807  if (!module.Write(sym_stream, cfi))
808    return false;
809
810  return true;
811}
812
813bool WriteSymbolFile(const std::string &obj_file,
814                     const std::string &debug_dir,
815                     bool cfi,
816                     std::ostream &sym_stream) {
817  MmapWrapper map_wrapper;
818  ElfW(Ehdr) *elf_header = NULL;
819  if (!LoadELF(obj_file, &map_wrapper, &elf_header))
820    return false;
821
822  return WriteSymbolFileInternal(reinterpret_cast<uint8_t*>(elf_header),
823                                 obj_file, debug_dir, cfi, sym_stream);
824}
825
826}  // namespace google_breakpad