/thirdparty/breakpad/common/linux/dump_symbols.cc

http://github.com/tomahawk-player/tomahawk · C++ · 826 lines · 601 code · 81 blank · 144 comment · 91 complexity · c0b62a4995393a1dd7da0b564094571b MD5 · raw file

  1. // Copyright (c) 2011 Google Inc.
  2. // All rights reserved.
  3. //
  4. // Redistribution and use in source and binary forms, with or without
  5. // modification, are permitted provided that the following conditions are
  6. // met:
  7. //
  8. // * Redistributions of source code must retain the above copyright
  9. // notice, this list of conditions and the following disclaimer.
  10. // * Redistributions in binary form must reproduce the above
  11. // copyright notice, this list of conditions and the following disclaimer
  12. // in the documentation and/or other materials provided with the
  13. // distribution.
  14. // * Neither the name of Google Inc. nor the names of its
  15. // contributors may be used to endorse or promote products derived from
  16. // this software without specific prior written permission.
  17. //
  18. // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
  19. // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
  20. // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
  21. // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
  22. // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
  23. // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
  24. // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  25. // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  26. // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  27. // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
  28. // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  29. // Restructured in 2009 by: Jim Blandy <jimb@mozilla.com> <jimb@red-bean.com>
  30. // dump_symbols.cc: implement google_breakpad::WriteSymbolFile:
  31. // Find all the debugging info in a file and dump it as a Breakpad symbol file.
  32. #include "common/linux/dump_symbols.h"
  33. #include <assert.h>
  34. #include <elf.h>
  35. #include <errno.h>
  36. #include <fcntl.h>
  37. #include <link.h>
  38. #include <stdio.h>
  39. #include <stdlib.h>
  40. #include <string.h>
  41. #include <sys/mman.h>
  42. #include <sys/stat.h>
  43. #include <unistd.h>
  44. #include <iostream>
  45. #include <set>
  46. #include <string>
  47. #include <utility>
  48. #include <vector>
  49. #include "common/dwarf/bytereader-inl.h"
  50. #include "common/dwarf/dwarf2diehandler.h"
  51. #include "common/dwarf_cfi_to_module.h"
  52. #include "common/dwarf_cu_to_module.h"
  53. #include "common/dwarf_line_to_module.h"
  54. #include "common/linux/elf_symbols_to_module.h"
  55. #include "common/linux/file_id.h"
  56. #include "common/module.h"
  57. #include "common/stabs_reader.h"
  58. #include "common/stabs_to_module.h"
  59. // This namespace contains helper functions.
  60. namespace {
  61. using google_breakpad::DwarfCFIToModule;
  62. using google_breakpad::DwarfCUToModule;
  63. using google_breakpad::DwarfLineToModule;
  64. using google_breakpad::Module;
  65. using google_breakpad::StabsToModule;
  66. //
  67. // FDWrapper
  68. //
  69. // Wrapper class to make sure opened file is closed.
  70. //
  71. class FDWrapper {
  72. public:
  73. explicit FDWrapper(int fd) :
  74. fd_(fd) {}
  75. ~FDWrapper() {
  76. if (fd_ != -1)
  77. close(fd_);
  78. }
  79. int get() {
  80. return fd_;
  81. }
  82. int release() {
  83. int fd = fd_;
  84. fd_ = -1;
  85. return fd;
  86. }
  87. private:
  88. int fd_;
  89. };
  90. //
  91. // MmapWrapper
  92. //
  93. // Wrapper class to make sure mapped regions are unmapped.
  94. //
  95. class MmapWrapper {
  96. public:
  97. MmapWrapper() : is_set_(false) {}
  98. ~MmapWrapper() {
  99. assert(is_set_);
  100. if (base_ != NULL) {
  101. assert(size_ > 0);
  102. munmap(base_, size_);
  103. }
  104. }
  105. void set(void *mapped_address, size_t mapped_size) {
  106. is_set_ = true;
  107. base_ = mapped_address;
  108. size_ = mapped_size;
  109. }
  110. void release() {
  111. assert(is_set_);
  112. base_ = NULL;
  113. size_ = 0;
  114. }
  115. private:
  116. bool is_set_;
  117. void *base_;
  118. size_t size_;
  119. };
  120. // Fix offset into virtual address by adding the mapped base into offsets.
  121. // Make life easier when want to find something by offset.
  122. static void FixAddress(void *obj_base) {
  123. ElfW(Addr) base = reinterpret_cast<ElfW(Addr)>(obj_base);
  124. ElfW(Ehdr) *elf_header = static_cast<ElfW(Ehdr) *>(obj_base);
  125. elf_header->e_phoff += base;
  126. elf_header->e_shoff += base;
  127. ElfW(Shdr) *sections = reinterpret_cast<ElfW(Shdr) *>(elf_header->e_shoff);
  128. for (int i = 0; i < elf_header->e_shnum; ++i)
  129. sections[i].sh_offset += base;
  130. }
  131. // Find the preferred loading address of the binary.
  132. static ElfW(Addr) GetLoadingAddress(const ElfW(Phdr) *program_headers,
  133. int nheader) {
  134. for (int i = 0; i < nheader; ++i) {
  135. const ElfW(Phdr) &header = program_headers[i];
  136. // For executable, it is the PT_LOAD segment with offset to zero.
  137. if (header.p_type == PT_LOAD &&
  138. header.p_offset == 0)
  139. return header.p_vaddr;
  140. }
  141. // For other types of ELF, return 0.
  142. return 0;
  143. }
  144. static bool IsValidElf(const ElfW(Ehdr) *elf_header) {
  145. return memcmp(elf_header, ELFMAG, SELFMAG) == 0;
  146. }
  147. static const ElfW(Shdr) *FindSectionByName(const char *name,
  148. const ElfW(Shdr) *sections,
  149. const ElfW(Shdr) *section_names,
  150. int nsection) {
  151. assert(name != NULL);
  152. assert(sections != NULL);
  153. assert(nsection > 0);
  154. int name_len = strlen(name);
  155. if (name_len == 0)
  156. return NULL;
  157. // Find the end of the section name section, to make sure that
  158. // comparisons don't run off the end of the section.
  159. const char *names_end =
  160. reinterpret_cast<char*>(section_names->sh_offset + section_names->sh_size);
  161. for (int i = 0; i < nsection; ++i) {
  162. const char *section_name =
  163. reinterpret_cast<char*>(section_names->sh_offset + sections[i].sh_name);
  164. if (names_end - section_name >= name_len + 1 &&
  165. strcmp(name, section_name) == 0) {
  166. if (sections[i].sh_type == SHT_NOBITS) {
  167. fprintf(stderr,
  168. "Section %s found, but ignored because type=SHT_NOBITS.\n",
  169. name);
  170. return NULL;
  171. }
  172. return sections + i;
  173. }
  174. }
  175. return NULL;
  176. }
  177. static bool LoadStabs(const ElfW(Ehdr) *elf_header,
  178. const ElfW(Shdr) *stab_section,
  179. const ElfW(Shdr) *stabstr_section,
  180. const bool big_endian,
  181. Module *module) {
  182. // A callback object to handle data from the STABS reader.
  183. StabsToModule handler(module);
  184. // Find the addresses of the STABS data, and create a STABS reader object.
  185. // On Linux, STABS entries always have 32-bit values, regardless of the
  186. // address size of the architecture whose code they're describing, and
  187. // the strings are always "unitized".
  188. uint8_t *stabs = reinterpret_cast<uint8_t *>(stab_section->sh_offset);
  189. uint8_t *stabstr = reinterpret_cast<uint8_t *>(stabstr_section->sh_offset);
  190. google_breakpad::StabsReader reader(stabs, stab_section->sh_size,
  191. stabstr, stabstr_section->sh_size,
  192. big_endian, 4, true, &handler);
  193. // Read the STABS data, and do post-processing.
  194. if (!reader.Process())
  195. return false;
  196. handler.Finalize();
  197. return true;
  198. }
  199. // A line-to-module loader that accepts line number info parsed by
  200. // dwarf2reader::LineInfo and populates a Module and a line vector
  201. // with the results.
  202. class DumperLineToModule: public DwarfCUToModule::LineToModuleFunctor {
  203. public:
  204. // Create a line-to-module converter using BYTE_READER.
  205. explicit DumperLineToModule(dwarf2reader::ByteReader *byte_reader)
  206. : byte_reader_(byte_reader) { }
  207. void operator()(const char *program, uint64 length,
  208. Module *module, std::vector<Module::Line> *lines) {
  209. DwarfLineToModule handler(module, lines);
  210. dwarf2reader::LineInfo parser(program, length, byte_reader_, &handler);
  211. parser.Start();
  212. }
  213. private:
  214. dwarf2reader::ByteReader *byte_reader_;
  215. };
  216. static bool LoadDwarf(const std::string &dwarf_filename,
  217. const ElfW(Ehdr) *elf_header,
  218. const bool big_endian,
  219. Module *module) {
  220. const dwarf2reader::Endianness endianness = big_endian ?
  221. dwarf2reader::ENDIANNESS_BIG : dwarf2reader::ENDIANNESS_LITTLE;
  222. dwarf2reader::ByteReader byte_reader(endianness);
  223. // Construct a context for this file.
  224. DwarfCUToModule::FileContext file_context(dwarf_filename, module);
  225. // Build a map of the ELF file's sections.
  226. const ElfW(Shdr) *sections
  227. = reinterpret_cast<ElfW(Shdr) *>(elf_header->e_shoff);
  228. int num_sections = elf_header->e_shnum;
  229. const ElfW(Shdr) *section_names = sections + elf_header->e_shstrndx;
  230. for (int i = 0; i < num_sections; i++) {
  231. const ElfW(Shdr) *section = &sections[i];
  232. std::string name = reinterpret_cast<const char *>(section_names->sh_offset +
  233. section->sh_name);
  234. const char *contents = reinterpret_cast<const char *>(section->sh_offset);
  235. uint64 length = section->sh_size;
  236. file_context.section_map[name] = std::make_pair(contents, length);
  237. }
  238. // Parse all the compilation units in the .debug_info section.
  239. DumperLineToModule line_to_module(&byte_reader);
  240. std::pair<const char *, uint64> debug_info_section
  241. = file_context.section_map[".debug_info"];
  242. // We should never have been called if the file doesn't have a
  243. // .debug_info section.
  244. assert(debug_info_section.first);
  245. uint64 debug_info_length = debug_info_section.second;
  246. for (uint64 offset = 0; offset < debug_info_length;) {
  247. // Make a handler for the root DIE that populates MODULE with the
  248. // data we find.
  249. DwarfCUToModule::WarningReporter reporter(dwarf_filename, offset);
  250. DwarfCUToModule root_handler(&file_context, &line_to_module, &reporter);
  251. // Make a Dwarf2Handler that drives our DIEHandler.
  252. dwarf2reader::DIEDispatcher die_dispatcher(&root_handler);
  253. // Make a DWARF parser for the compilation unit at OFFSET.
  254. dwarf2reader::CompilationUnit reader(file_context.section_map,
  255. offset,
  256. &byte_reader,
  257. &die_dispatcher);
  258. // Process the entire compilation unit; get the offset of the next.
  259. offset += reader.Start();
  260. }
  261. return true;
  262. }
  263. // Fill REGISTER_NAMES with the register names appropriate to the
  264. // machine architecture given in HEADER, indexed by the register
  265. // numbers used in DWARF call frame information. Return true on
  266. // success, or false if we don't recognize HEADER's machine
  267. // architecture.
  268. static bool DwarfCFIRegisterNames(const ElfW(Ehdr) *elf_header,
  269. std::vector<std::string> *register_names) {
  270. switch (elf_header->e_machine) {
  271. case EM_386:
  272. *register_names = DwarfCFIToModule::RegisterNames::I386();
  273. return true;
  274. case EM_ARM:
  275. *register_names = DwarfCFIToModule::RegisterNames::ARM();
  276. return true;
  277. case EM_X86_64:
  278. *register_names = DwarfCFIToModule::RegisterNames::X86_64();
  279. return true;
  280. default:
  281. return false;
  282. }
  283. }
  284. static bool LoadDwarfCFI(const std::string &dwarf_filename,
  285. const ElfW(Ehdr) *elf_header,
  286. const char *section_name,
  287. const ElfW(Shdr) *section,
  288. const bool eh_frame,
  289. const ElfW(Shdr) *got_section,
  290. const ElfW(Shdr) *text_section,
  291. const bool big_endian,
  292. Module *module) {
  293. // Find the appropriate set of register names for this file's
  294. // architecture.
  295. std::vector<std::string> register_names;
  296. if (!DwarfCFIRegisterNames(elf_header, &register_names)) {
  297. fprintf(stderr, "%s: unrecognized ELF machine architecture '%d';"
  298. " cannot convert DWARF call frame information\n",
  299. dwarf_filename.c_str(), elf_header->e_machine);
  300. return false;
  301. }
  302. const dwarf2reader::Endianness endianness = big_endian ?
  303. dwarf2reader::ENDIANNESS_BIG : dwarf2reader::ENDIANNESS_LITTLE;
  304. // Find the call frame information and its size.
  305. const char *cfi = reinterpret_cast<const char *>(section->sh_offset);
  306. size_t cfi_size = section->sh_size;
  307. // Plug together the parser, handler, and their entourages.
  308. DwarfCFIToModule::Reporter module_reporter(dwarf_filename, section_name);
  309. DwarfCFIToModule handler(module, register_names, &module_reporter);
  310. dwarf2reader::ByteReader byte_reader(endianness);
  311. // Since we're using the ElfW macro, we're not actually capable of
  312. // processing both ELF32 and ELF64 files with the same program; that
  313. // would take a bit more work. But this will work out well enough.
  314. if (elf_header->e_ident[EI_CLASS] == ELFCLASS32)
  315. byte_reader.SetAddressSize(4);
  316. else if (elf_header->e_ident[EI_CLASS] == ELFCLASS64)
  317. byte_reader.SetAddressSize(8);
  318. else {
  319. fprintf(stderr, "%s: bad file class in ELF header: %d\n",
  320. dwarf_filename.c_str(), elf_header->e_ident[EI_CLASS]);
  321. return false;
  322. }
  323. // Provide the base addresses for .eh_frame encoded pointers, if
  324. // possible.
  325. byte_reader.SetCFIDataBase(section->sh_addr, cfi);
  326. if (got_section)
  327. byte_reader.SetDataBase(got_section->sh_addr);
  328. if (text_section)
  329. byte_reader.SetTextBase(text_section->sh_addr);
  330. dwarf2reader::CallFrameInfo::Reporter dwarf_reporter(dwarf_filename,
  331. section_name);
  332. dwarf2reader::CallFrameInfo parser(cfi, cfi_size,
  333. &byte_reader, &handler, &dwarf_reporter,
  334. eh_frame);
  335. parser.Start();
  336. return true;
  337. }
  338. bool LoadELF(const std::string &obj_file, MmapWrapper* map_wrapper,
  339. ElfW(Ehdr) **elf_header) {
  340. int obj_fd = open(obj_file.c_str(), O_RDONLY);
  341. if (obj_fd < 0) {
  342. fprintf(stderr, "Failed to open ELF file '%s': %s\n",
  343. obj_file.c_str(), strerror(errno));
  344. return false;
  345. }
  346. FDWrapper obj_fd_wrapper(obj_fd);
  347. struct stat st;
  348. if (fstat(obj_fd, &st) != 0 && st.st_size <= 0) {
  349. fprintf(stderr, "Unable to fstat ELF file '%s': %s\n",
  350. obj_file.c_str(), strerror(errno));
  351. return false;
  352. }
  353. void *obj_base = mmap(NULL, st.st_size,
  354. PROT_READ | PROT_WRITE, MAP_PRIVATE, obj_fd, 0);
  355. if (obj_base == MAP_FAILED) {
  356. fprintf(stderr, "Failed to mmap ELF file '%s': %s\n",
  357. obj_file.c_str(), strerror(errno));
  358. return false;
  359. }
  360. map_wrapper->set(obj_base, st.st_size);
  361. *elf_header = reinterpret_cast<ElfW(Ehdr) *>(obj_base);
  362. if (!IsValidElf(*elf_header)) {
  363. fprintf(stderr, "Not a valid ELF file: %s\n", obj_file.c_str());
  364. return false;
  365. }
  366. return true;
  367. }
  368. // Get the endianness of ELF_HEADER. If it's invalid, return false.
  369. bool ElfEndianness(const ElfW(Ehdr) *elf_header, bool *big_endian) {
  370. if (elf_header->e_ident[EI_DATA] == ELFDATA2LSB) {
  371. *big_endian = false;
  372. return true;
  373. }
  374. if (elf_header->e_ident[EI_DATA] == ELFDATA2MSB) {
  375. *big_endian = true;
  376. return true;
  377. }
  378. fprintf(stderr, "bad data encoding in ELF header: %d\n",
  379. elf_header->e_ident[EI_DATA]);
  380. return false;
  381. }
  382. // Read the .gnu_debuglink and get the debug file name. If anything goes
  383. // wrong, return an empty string.
  384. static std::string ReadDebugLink(const ElfW(Shdr) *debuglink_section,
  385. const std::string &obj_file,
  386. const std::string &debug_dir) {
  387. char *debuglink = reinterpret_cast<char *>(debuglink_section->sh_offset);
  388. size_t debuglink_len = strlen(debuglink) + 5; // '\0' + CRC32.
  389. debuglink_len = 4 * ((debuglink_len + 3) / 4); // Round to nearest 4 bytes.
  390. // Sanity check.
  391. if (debuglink_len != debuglink_section->sh_size) {
  392. fprintf(stderr, "Mismatched .gnu_debuglink string / section size: "
  393. "%zx %zx\n", debuglink_len, debuglink_section->sh_size);
  394. return "";
  395. }
  396. std::string debuglink_path = debug_dir + "/" + debuglink;
  397. int debuglink_fd = open(debuglink_path.c_str(), O_RDONLY);
  398. if (debuglink_fd < 0) {
  399. fprintf(stderr, "Failed to open debug ELF file '%s' for '%s': %s\n",
  400. debuglink_path.c_str(), obj_file.c_str(), strerror(errno));
  401. return "";
  402. }
  403. FDWrapper debuglink_fd_wrapper(debuglink_fd);
  404. // TODO(thestig) check the CRC-32 at the end of the .gnu_debuglink
  405. // section.
  406. return debuglink_path;
  407. }
  408. //
  409. // LoadSymbolsInfo
  410. //
  411. // Holds the state between the two calls to LoadSymbols() in case we have to
  412. // follow the .gnu_debuglink section and load debug information from a
  413. // different file.
  414. //
  415. class LoadSymbolsInfo {
  416. public:
  417. explicit LoadSymbolsInfo(const std::string &dbg_dir) :
  418. debug_dir_(dbg_dir),
  419. has_loading_addr_(false) {}
  420. // Keeps track of which sections have been loaded so we don't accidentally
  421. // load it twice from two different files.
  422. void LoadedSection(const std::string &section) {
  423. if (loaded_sections_.count(section) == 0) {
  424. loaded_sections_.insert(section);
  425. } else {
  426. fprintf(stderr, "Section %s has already been loaded.\n",
  427. section.c_str());
  428. }
  429. }
  430. // We expect the ELF file and linked debug file to have the same preferred
  431. // loading address.
  432. void set_loading_addr(ElfW(Addr) addr, const std::string &filename) {
  433. if (!has_loading_addr_) {
  434. loading_addr_ = addr;
  435. loaded_file_ = filename;
  436. return;
  437. }
  438. if (addr != loading_addr_) {
  439. fprintf(stderr,
  440. "ELF file '%s' and debug ELF file '%s' "
  441. "have different load addresses.\n",
  442. loaded_file_.c_str(), filename.c_str());
  443. assert(false);
  444. }
  445. }
  446. // Setters and getters
  447. const std::string &debug_dir() const {
  448. return debug_dir_;
  449. }
  450. std::string debuglink_file() const {
  451. return debuglink_file_;
  452. }
  453. void set_debuglink_file(std::string file) {
  454. debuglink_file_ = file;
  455. }
  456. private:
  457. const std::string &debug_dir_; // Directory with the debug ELF file.
  458. std::string debuglink_file_; // Full path to the debug ELF file.
  459. bool has_loading_addr_; // Indicate if LOADING_ADDR_ is valid.
  460. ElfW(Addr) loading_addr_; // Saves the preferred loading address from the
  461. // first call to LoadSymbols().
  462. std::string loaded_file_; // Name of the file loaded from the first call to
  463. // LoadSymbols().
  464. std::set<std::string> loaded_sections_; // Tracks the Loaded ELF sections
  465. // between calls to LoadSymbols().
  466. };
  467. static bool LoadSymbols(const std::string &obj_file,
  468. const bool big_endian,
  469. ElfW(Ehdr) *elf_header,
  470. const bool read_gnu_debug_link,
  471. LoadSymbolsInfo *info,
  472. Module *module) {
  473. // Translate all offsets in section headers into address.
  474. FixAddress(elf_header);
  475. ElfW(Addr) loading_addr = GetLoadingAddress(
  476. reinterpret_cast<ElfW(Phdr) *>(elf_header->e_phoff),
  477. elf_header->e_phnum);
  478. module->SetLoadAddress(loading_addr);
  479. info->set_loading_addr(loading_addr, obj_file);
  480. const ElfW(Shdr) *sections =
  481. reinterpret_cast<ElfW(Shdr) *>(elf_header->e_shoff);
  482. const ElfW(Shdr) *section_names = sections + elf_header->e_shstrndx;
  483. bool found_debug_info_section = false;
  484. bool found_usable_info = false;
  485. // Look for STABS debugging information, and load it if present.
  486. const ElfW(Shdr) *stab_section
  487. = FindSectionByName(".stab", sections, section_names,
  488. elf_header->e_shnum);
  489. if (stab_section) {
  490. const ElfW(Shdr) *stabstr_section = stab_section->sh_link + sections;
  491. if (stabstr_section) {
  492. found_debug_info_section = true;
  493. found_usable_info = true;
  494. info->LoadedSection(".stab");
  495. if (!LoadStabs(elf_header, stab_section, stabstr_section, big_endian,
  496. module)) {
  497. fprintf(stderr, "%s: \".stab\" section found, but failed to load STABS"
  498. " debugging information\n", obj_file.c_str());
  499. }
  500. }
  501. }
  502. // Look for DWARF debugging information, and load it if present.
  503. const ElfW(Shdr) *dwarf_section
  504. = FindSectionByName(".debug_info", sections, section_names,
  505. elf_header->e_shnum);
  506. if (dwarf_section) {
  507. found_debug_info_section = true;
  508. found_usable_info = true;
  509. info->LoadedSection(".debug_info");
  510. if (!LoadDwarf(obj_file, elf_header, big_endian, module))
  511. fprintf(stderr, "%s: \".debug_info\" section found, but failed to load "
  512. "DWARF debugging information\n", obj_file.c_str());
  513. }
  514. // Dwarf Call Frame Information (CFI) is actually independent from
  515. // the other DWARF debugging information, and can be used alone.
  516. const ElfW(Shdr) *dwarf_cfi_section =
  517. FindSectionByName(".debug_frame", sections, section_names,
  518. elf_header->e_shnum);
  519. if (dwarf_cfi_section) {
  520. // Ignore the return value of this function; even without call frame
  521. // information, the other debugging information could be perfectly
  522. // useful.
  523. info->LoadedSection(".debug_frame");
  524. bool result =
  525. LoadDwarfCFI(obj_file, elf_header, ".debug_frame",
  526. dwarf_cfi_section, false, 0, 0, big_endian, module);
  527. found_usable_info = found_usable_info || result;
  528. }
  529. // Linux C++ exception handling information can also provide
  530. // unwinding data.
  531. const ElfW(Shdr) *eh_frame_section =
  532. FindSectionByName(".eh_frame", sections, section_names,
  533. elf_header->e_shnum);
  534. if (eh_frame_section) {
  535. // Pointers in .eh_frame data may be relative to the base addresses of
  536. // certain sections. Provide those sections if present.
  537. const ElfW(Shdr) *got_section =
  538. FindSectionByName(".got", sections, section_names, elf_header->e_shnum);
  539. const ElfW(Shdr) *text_section =
  540. FindSectionByName(".text", sections, section_names,
  541. elf_header->e_shnum);
  542. info->LoadedSection(".eh_frame");
  543. // As above, ignore the return value of this function.
  544. bool result =
  545. LoadDwarfCFI(obj_file, elf_header, ".eh_frame", eh_frame_section, true,
  546. got_section, text_section, big_endian, module);
  547. found_usable_info = found_usable_info || result;
  548. }
  549. if (!found_debug_info_section) {
  550. fprintf(stderr, "%s: file contains no debugging information"
  551. " (no \".stab\" or \".debug_info\" sections)\n",
  552. obj_file.c_str());
  553. // Failed, but maybe we can find a .gnu_debuglink section?
  554. if (read_gnu_debug_link) {
  555. const ElfW(Shdr) *gnu_debuglink_section
  556. = FindSectionByName(".gnu_debuglink", sections, section_names,
  557. elf_header->e_shnum);
  558. if (gnu_debuglink_section) {
  559. if (!info->debug_dir().empty()) {
  560. std::string debuglink_file =
  561. ReadDebugLink(gnu_debuglink_section, obj_file, info->debug_dir());
  562. info->set_debuglink_file(debuglink_file);
  563. } else {
  564. fprintf(stderr, ".gnu_debuglink section found in '%s', "
  565. "but no debug path specified.\n", obj_file.c_str());
  566. }
  567. } else {
  568. fprintf(stderr, "%s does not contain a .gnu_debuglink section.\n",
  569. obj_file.c_str());
  570. }
  571. } else {
  572. // The caller doesn't want to consult .gnu_debuglink.
  573. // See if there are export symbols available.
  574. const ElfW(Shdr) *dynsym_section =
  575. FindSectionByName(".dynsym", sections, section_names,
  576. elf_header->e_shnum);
  577. const ElfW(Shdr) *dynstr_section =
  578. FindSectionByName(".dynstr", sections, section_names,
  579. elf_header->e_shnum);
  580. if (dynsym_section && dynstr_section) {
  581. info->LoadedSection(".dynsym");
  582. fprintf(stderr, "Have .dynsym + .dynstr\n");
  583. uint8_t* dynsyms =
  584. reinterpret_cast<uint8_t*>(dynsym_section->sh_offset);
  585. uint8_t* dynstrs =
  586. reinterpret_cast<uint8_t*>(dynstr_section->sh_offset);
  587. bool result =
  588. ELFSymbolsToModule(dynsyms,
  589. dynsym_section->sh_size,
  590. dynstrs,
  591. dynstr_section->sh_size,
  592. big_endian,
  593. // This could change to something more useful
  594. // when support for dumping cross-architecture
  595. // symbols is finished.
  596. sizeof(ElfW(Addr)),
  597. module);
  598. found_usable_info = found_usable_info || result;
  599. }
  600. // Return true if some usable information was found, since
  601. // the caller doesn't want to use .gnu_debuglink.
  602. return found_usable_info;
  603. }
  604. // No debug info was found, let the user try again with .gnu_debuglink
  605. // if present.
  606. return false;
  607. }
  608. return true;
  609. }
  610. // Return the breakpad symbol file identifier for the architecture of
  611. // ELF_HEADER.
  612. const char *ElfArchitecture(const ElfW(Ehdr) *elf_header) {
  613. ElfW(Half) arch = elf_header->e_machine;
  614. switch (arch) {
  615. case EM_386: return "x86";
  616. case EM_ARM: return "arm";
  617. case EM_MIPS: return "mips";
  618. case EM_PPC64: return "ppc64";
  619. case EM_PPC: return "ppc";
  620. case EM_S390: return "s390";
  621. case EM_SPARC: return "sparc";
  622. case EM_SPARCV9: return "sparcv9";
  623. case EM_X86_64: return "x86_64";
  624. default: return NULL;
  625. }
  626. }
  627. // Format the Elf file identifier in IDENTIFIER as a UUID with the
  628. // dashes removed.
  629. std::string FormatIdentifier(unsigned char identifier[16]) {
  630. char identifier_str[40];
  631. google_breakpad::FileID::ConvertIdentifierToString(
  632. identifier,
  633. identifier_str,
  634. sizeof(identifier_str));
  635. std::string id_no_dash;
  636. for (int i = 0; identifier_str[i] != '\0'; ++i)
  637. if (identifier_str[i] != '-')
  638. id_no_dash += identifier_str[i];
  639. // Add an extra "0" by the end. PDB files on Windows have an 'age'
  640. // number appended to the end of the file identifier; this isn't
  641. // really used or necessary on other platforms, but let's preserve
  642. // the pattern.
  643. id_no_dash += '0';
  644. return id_no_dash;
  645. }
  646. // Return the non-directory portion of FILENAME: the portion after the
  647. // last slash, or the whole filename if there are no slashes.
  648. std::string BaseFileName(const std::string &filename) {
  649. // Lots of copies! basename's behavior is less than ideal.
  650. char *c_filename = strdup(filename.c_str());
  651. std::string base = basename(c_filename);
  652. free(c_filename);
  653. return base;
  654. }
  655. } // namespace
  656. namespace google_breakpad {
  657. // Not explicitly exported, but not static so it can be used in unit tests.
  658. // Ideally obj_file would be const, but internally this code does write
  659. // to some ELF header fields to make its work simpler.
  660. bool WriteSymbolFileInternal(uint8_t* obj_file,
  661. const std::string &obj_filename,
  662. const std::string &debug_dir,
  663. bool cfi,
  664. std::ostream &sym_stream) {
  665. ElfW(Ehdr) *elf_header = reinterpret_cast<ElfW(Ehdr) *>(obj_file);
  666. if (!IsValidElf(elf_header)) {
  667. fprintf(stderr, "Not a valid ELF file: %s\n", obj_filename.c_str());
  668. return false;
  669. }
  670. unsigned char identifier[16];
  671. if (!google_breakpad::FileID::ElfFileIdentifierFromMappedFile(elf_header,
  672. identifier)) {
  673. fprintf(stderr, "%s: unable to generate file identifier\n",
  674. obj_filename.c_str());
  675. return false;
  676. }
  677. const char *architecture = ElfArchitecture(elf_header);
  678. if (!architecture) {
  679. fprintf(stderr, "%s: unrecognized ELF machine architecture: %d\n",
  680. obj_filename.c_str(), elf_header->e_machine);
  681. return false;
  682. }
  683. // Figure out what endianness this file is.
  684. bool big_endian;
  685. if (!ElfEndianness(elf_header, &big_endian))
  686. return false;
  687. std::string name = BaseFileName(obj_filename);
  688. std::string os = "Linux";
  689. std::string id = FormatIdentifier(identifier);
  690. LoadSymbolsInfo info(debug_dir);
  691. Module module(name, os, architecture, id);
  692. if (!LoadSymbols(obj_filename, big_endian, elf_header, !debug_dir.empty(),
  693. &info, &module)) {
  694. const std::string debuglink_file = info.debuglink_file();
  695. if (debuglink_file.empty())
  696. return false;
  697. // Load debuglink ELF file.
  698. fprintf(stderr, "Found debugging info in %s\n", debuglink_file.c_str());
  699. MmapWrapper debug_map_wrapper;
  700. ElfW(Ehdr) *debug_elf_header = NULL;
  701. if (!LoadELF(debuglink_file, &debug_map_wrapper, &debug_elf_header))
  702. return false;
  703. // Sanity checks to make sure everything matches up.
  704. const char *debug_architecture = ElfArchitecture(debug_elf_header);
  705. if (!debug_architecture) {
  706. fprintf(stderr, "%s: unrecognized ELF machine architecture: %d\n",
  707. debuglink_file.c_str(), debug_elf_header->e_machine);
  708. return false;
  709. }
  710. if (strcmp(architecture, debug_architecture)) {
  711. fprintf(stderr, "%s with ELF machine architecture %s does not match "
  712. "%s with ELF architecture %s\n",
  713. debuglink_file.c_str(), debug_architecture,
  714. obj_filename.c_str(), architecture);
  715. return false;
  716. }
  717. bool debug_big_endian;
  718. if (!ElfEndianness(debug_elf_header, &debug_big_endian))
  719. return false;
  720. if (debug_big_endian != big_endian) {
  721. fprintf(stderr, "%s and %s does not match in endianness\n",
  722. obj_filename.c_str(), debuglink_file.c_str());
  723. return false;
  724. }
  725. if (!LoadSymbols(debuglink_file, debug_big_endian, debug_elf_header,
  726. false, &info, &module)) {
  727. return false;
  728. }
  729. }
  730. if (!module.Write(sym_stream, cfi))
  731. return false;
  732. return true;
  733. }
  734. bool WriteSymbolFile(const std::string &obj_file,
  735. const std::string &debug_dir,
  736. bool cfi,
  737. std::ostream &sym_stream) {
  738. MmapWrapper map_wrapper;
  739. ElfW(Ehdr) *elf_header = NULL;
  740. if (!LoadELF(obj_file, &map_wrapper, &elf_header))
  741. return false;
  742. return WriteSymbolFileInternal(reinterpret_cast<uint8_t*>(elf_header),
  743. obj_file, debug_dir, cfi, sym_stream);
  744. }
  745. } // namespace google_breakpad