PageRenderTime 73ms CodeModel.GetById 32ms RepoModel.GetById 0ms app.codeStats 1ms

/contrib/llvm-project/lld/MachO/InputFiles.cpp

https://github.com/freebsd/freebsd
C++ | 1202 lines | 838 code | 101 blank | 263 comment | 210 complexity | 7ee410160a7edff24fc49cbfba6313aa MD5 | raw file
  1. //===- InputFiles.cpp -----------------------------------------------------===//
  2. //
  3. // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
  4. // See https://llvm.org/LICENSE.txt for license information.
  5. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
  6. //
  7. //===----------------------------------------------------------------------===//
  8. //
  9. // This file contains functions to parse Mach-O object files. In this comment,
  10. // we describe the Mach-O file structure and how we parse it.
  11. //
  12. // Mach-O is not very different from ELF or COFF. The notion of symbols,
  13. // sections and relocations exists in Mach-O as it does in ELF and COFF.
  14. //
  15. // Perhaps the notion that is new to those who know ELF/COFF is "subsections".
  16. // In ELF/COFF, sections are an atomic unit of data copied from input files to
  17. // output files. When we merge or garbage-collect sections, we treat each
  18. // section as an atomic unit. In Mach-O, that's not the case. Sections can
  19. // consist of multiple subsections, and subsections are a unit of merging and
  20. // garbage-collecting. Therefore, Mach-O's subsections are more similar to
  21. // ELF/COFF's sections than Mach-O's sections are.
  22. //
  23. // A section can have multiple symbols. A symbol that does not have the
  24. // N_ALT_ENTRY attribute indicates a beginning of a subsection. Therefore, by
  25. // definition, a symbol is always present at the beginning of each subsection. A
  26. // symbol with N_ALT_ENTRY attribute does not start a new subsection and can
  27. // point to a middle of a subsection.
  28. //
  29. // The notion of subsections also affects how relocations are represented in
  30. // Mach-O. All references within a section need to be explicitly represented as
  31. // relocations if they refer to different subsections, because we obviously need
  32. // to fix up addresses if subsections are laid out in an output file differently
  33. // than they were in object files. To represent that, Mach-O relocations can
  34. // refer to an unnamed location via its address. Scattered relocations (those
  35. // with the R_SCATTERED bit set) always refer to unnamed locations.
  36. // Non-scattered relocations refer to an unnamed location if r_extern is not set
  37. // and r_symbolnum is zero.
  38. //
  39. // Without the above differences, I think you can use your knowledge about ELF
  40. // and COFF for Mach-O.
  41. //
  42. //===----------------------------------------------------------------------===//
  43. #include "InputFiles.h"
  44. #include "Config.h"
  45. #include "Driver.h"
  46. #include "Dwarf.h"
  47. #include "ExportTrie.h"
  48. #include "InputSection.h"
  49. #include "MachOStructs.h"
  50. #include "ObjC.h"
  51. #include "OutputSection.h"
  52. #include "OutputSegment.h"
  53. #include "SymbolTable.h"
  54. #include "Symbols.h"
  55. #include "SyntheticSections.h"
  56. #include "Target.h"
  57. #include "lld/Common/CommonLinkerContext.h"
  58. #include "lld/Common/DWARF.h"
  59. #include "lld/Common/Reproduce.h"
  60. #include "llvm/ADT/iterator.h"
  61. #include "llvm/BinaryFormat/MachO.h"
  62. #include "llvm/LTO/LTO.h"
  63. #include "llvm/Support/BinaryStreamReader.h"
  64. #include "llvm/Support/Endian.h"
  65. #include "llvm/Support/MemoryBuffer.h"
  66. #include "llvm/Support/Path.h"
  67. #include "llvm/Support/TarWriter.h"
  68. #include "llvm/Support/TimeProfiler.h"
  69. #include "llvm/TextAPI/Architecture.h"
  70. #include "llvm/TextAPI/InterfaceFile.h"
  71. #include <type_traits>
  72. using namespace llvm;
  73. using namespace llvm::MachO;
  74. using namespace llvm::support::endian;
  75. using namespace llvm::sys;
  76. using namespace lld;
  77. using namespace lld::macho;
  78. // Returns "<internal>", "foo.a(bar.o)", or "baz.o".
  79. std::string lld::toString(const InputFile *f) {
  80. if (!f)
  81. return "<internal>";
  82. // Multiple dylibs can be defined in one .tbd file.
  83. if (auto dylibFile = dyn_cast<DylibFile>(f))
  84. if (f->getName().endswith(".tbd"))
  85. return (f->getName() + "(" + dylibFile->installName + ")").str();
  86. if (f->archiveName.empty())
  87. return std::string(f->getName());
  88. return (f->archiveName + "(" + path::filename(f->getName()) + ")").str();
  89. }
  90. SetVector<InputFile *> macho::inputFiles;
  91. std::unique_ptr<TarWriter> macho::tar;
  92. int InputFile::idCount = 0;
  93. static VersionTuple decodeVersion(uint32_t version) {
  94. unsigned major = version >> 16;
  95. unsigned minor = (version >> 8) & 0xffu;
  96. unsigned subMinor = version & 0xffu;
  97. return VersionTuple(major, minor, subMinor);
  98. }
  99. static std::vector<PlatformInfo> getPlatformInfos(const InputFile *input) {
  100. if (!isa<ObjFile>(input) && !isa<DylibFile>(input))
  101. return {};
  102. const char *hdr = input->mb.getBufferStart();
  103. std::vector<PlatformInfo> platformInfos;
  104. for (auto *cmd : findCommands<build_version_command>(hdr, LC_BUILD_VERSION)) {
  105. PlatformInfo info;
  106. info.target.Platform = static_cast<PlatformType>(cmd->platform);
  107. info.minimum = decodeVersion(cmd->minos);
  108. platformInfos.emplace_back(std::move(info));
  109. }
  110. for (auto *cmd : findCommands<version_min_command>(
  111. hdr, LC_VERSION_MIN_MACOSX, LC_VERSION_MIN_IPHONEOS,
  112. LC_VERSION_MIN_TVOS, LC_VERSION_MIN_WATCHOS)) {
  113. PlatformInfo info;
  114. switch (cmd->cmd) {
  115. case LC_VERSION_MIN_MACOSX:
  116. info.target.Platform = PLATFORM_MACOS;
  117. break;
  118. case LC_VERSION_MIN_IPHONEOS:
  119. info.target.Platform = PLATFORM_IOS;
  120. break;
  121. case LC_VERSION_MIN_TVOS:
  122. info.target.Platform = PLATFORM_TVOS;
  123. break;
  124. case LC_VERSION_MIN_WATCHOS:
  125. info.target.Platform = PLATFORM_WATCHOS;
  126. break;
  127. }
  128. info.minimum = decodeVersion(cmd->version);
  129. platformInfos.emplace_back(std::move(info));
  130. }
  131. return platformInfos;
  132. }
  133. static bool checkCompatibility(const InputFile *input) {
  134. std::vector<PlatformInfo> platformInfos = getPlatformInfos(input);
  135. if (platformInfos.empty())
  136. return true;
  137. auto it = find_if(platformInfos, [&](const PlatformInfo &info) {
  138. return removeSimulator(info.target.Platform) ==
  139. removeSimulator(config->platform());
  140. });
  141. if (it == platformInfos.end()) {
  142. std::string platformNames;
  143. raw_string_ostream os(platformNames);
  144. interleave(
  145. platformInfos, os,
  146. [&](const PlatformInfo &info) {
  147. os << getPlatformName(info.target.Platform);
  148. },
  149. "/");
  150. error(toString(input) + " has platform " + platformNames +
  151. Twine(", which is different from target platform ") +
  152. getPlatformName(config->platform()));
  153. return false;
  154. }
  155. if (it->minimum > config->platformInfo.minimum)
  156. warn(toString(input) + " has version " + it->minimum.getAsString() +
  157. ", which is newer than target minimum of " +
  158. config->platformInfo.minimum.getAsString());
  159. return true;
  160. }
  161. // This cache mostly exists to store system libraries (and .tbds) as they're
  162. // loaded, rather than the input archives, which are already cached at a higher
  163. // level, and other files like the filelist that are only read once.
  164. // Theoretically this caching could be more efficient by hoisting it, but that
  165. // would require altering many callers to track the state.
  166. DenseMap<CachedHashStringRef, MemoryBufferRef> macho::cachedReads;
  167. // Open a given file path and return it as a memory-mapped file.
  168. Optional<MemoryBufferRef> macho::readFile(StringRef path) {
  169. CachedHashStringRef key(path);
  170. auto entry = cachedReads.find(key);
  171. if (entry != cachedReads.end())
  172. return entry->second;
  173. ErrorOr<std::unique_ptr<MemoryBuffer>> mbOrErr = MemoryBuffer::getFile(path);
  174. if (std::error_code ec = mbOrErr.getError()) {
  175. error("cannot open " + path + ": " + ec.message());
  176. return None;
  177. }
  178. std::unique_ptr<MemoryBuffer> &mb = *mbOrErr;
  179. MemoryBufferRef mbref = mb->getMemBufferRef();
  180. make<std::unique_ptr<MemoryBuffer>>(std::move(mb)); // take mb ownership
  181. // If this is a regular non-fat file, return it.
  182. const char *buf = mbref.getBufferStart();
  183. const auto *hdr = reinterpret_cast<const fat_header *>(buf);
  184. if (mbref.getBufferSize() < sizeof(uint32_t) ||
  185. read32be(&hdr->magic) != FAT_MAGIC) {
  186. if (tar)
  187. tar->append(relativeToRoot(path), mbref.getBuffer());
  188. return cachedReads[key] = mbref;
  189. }
  190. llvm::BumpPtrAllocator &bAlloc = lld::bAlloc();
  191. // Object files and archive files may be fat files, which contain multiple
  192. // real files for different CPU ISAs. Here, we search for a file that matches
  193. // with the current link target and returns it as a MemoryBufferRef.
  194. const auto *arch = reinterpret_cast<const fat_arch *>(buf + sizeof(*hdr));
  195. for (uint32_t i = 0, n = read32be(&hdr->nfat_arch); i < n; ++i) {
  196. if (reinterpret_cast<const char *>(arch + i + 1) >
  197. buf + mbref.getBufferSize()) {
  198. error(path + ": fat_arch struct extends beyond end of file");
  199. return None;
  200. }
  201. if (read32be(&arch[i].cputype) != static_cast<uint32_t>(target->cpuType) ||
  202. read32be(&arch[i].cpusubtype) != target->cpuSubtype)
  203. continue;
  204. uint32_t offset = read32be(&arch[i].offset);
  205. uint32_t size = read32be(&arch[i].size);
  206. if (offset + size > mbref.getBufferSize())
  207. error(path + ": slice extends beyond end of file");
  208. if (tar)
  209. tar->append(relativeToRoot(path), mbref.getBuffer());
  210. return cachedReads[key] = MemoryBufferRef(StringRef(buf + offset, size),
  211. path.copy(bAlloc));
  212. }
  213. error("unable to find matching architecture in " + path);
  214. return None;
  215. }
  216. InputFile::InputFile(Kind kind, const InterfaceFile &interface)
  217. : id(idCount++), fileKind(kind), name(saver().save(interface.getPath())) {}
  218. // Some sections comprise of fixed-size records, so instead of splitting them at
  219. // symbol boundaries, we split them based on size. Records are distinct from
  220. // literals in that they may contain references to other sections, instead of
  221. // being leaf nodes in the InputSection graph.
  222. //
  223. // Note that "record" is a term I came up with. In contrast, "literal" is a term
  224. // used by the Mach-O format.
  225. static Optional<size_t> getRecordSize(StringRef segname, StringRef name) {
  226. if (name == section_names::cfString) {
  227. if (config->icfLevel != ICFLevel::none && segname == segment_names::data)
  228. return target->wordSize == 8 ? 32 : 16;
  229. } else if (name == section_names::compactUnwind) {
  230. if (segname == segment_names::ld)
  231. return target->wordSize == 8 ? 32 : 20;
  232. }
  233. return {};
  234. }
  235. // Parse the sequence of sections within a single LC_SEGMENT(_64).
  236. // Split each section into subsections.
  237. template <class SectionHeader>
  238. void ObjFile::parseSections(ArrayRef<SectionHeader> sectionHeaders) {
  239. sections.reserve(sectionHeaders.size());
  240. auto *buf = reinterpret_cast<const uint8_t *>(mb.getBufferStart());
  241. for (const SectionHeader &sec : sectionHeaders) {
  242. StringRef name =
  243. StringRef(sec.sectname, strnlen(sec.sectname, sizeof(sec.sectname)));
  244. StringRef segname =
  245. StringRef(sec.segname, strnlen(sec.segname, sizeof(sec.segname)));
  246. ArrayRef<uint8_t> data = {isZeroFill(sec.flags) ? nullptr
  247. : buf + sec.offset,
  248. static_cast<size_t>(sec.size)};
  249. if (sec.align >= 32) {
  250. error("alignment " + std::to_string(sec.align) + " of section " + name +
  251. " is too large");
  252. sections.push_back(sec.addr);
  253. continue;
  254. }
  255. uint32_t align = 1 << sec.align;
  256. uint32_t flags = sec.flags;
  257. auto splitRecords = [&](int recordSize) -> void {
  258. sections.push_back(sec.addr);
  259. if (data.empty())
  260. return;
  261. Subsections &subsections = sections.back().subsections;
  262. subsections.reserve(data.size() / recordSize);
  263. auto *isec = make<ConcatInputSection>(
  264. segname, name, this, data.slice(0, recordSize), align, flags);
  265. subsections.push_back({0, isec});
  266. for (uint64_t off = recordSize; off < data.size(); off += recordSize) {
  267. // Copying requires less memory than constructing a fresh InputSection.
  268. auto *copy = make<ConcatInputSection>(*isec);
  269. copy->data = data.slice(off, recordSize);
  270. subsections.push_back({off, copy});
  271. }
  272. };
  273. if (sectionType(sec.flags) == S_CSTRING_LITERALS ||
  274. (config->dedupLiterals && isWordLiteralSection(sec.flags))) {
  275. if (sec.nreloc && config->dedupLiterals)
  276. fatal(toString(this) + " contains relocations in " + sec.segname + "," +
  277. sec.sectname +
  278. ", so LLD cannot deduplicate literals. Try re-running without "
  279. "--deduplicate-literals.");
  280. InputSection *isec;
  281. if (sectionType(sec.flags) == S_CSTRING_LITERALS) {
  282. isec =
  283. make<CStringInputSection>(segname, name, this, data, align, flags);
  284. // FIXME: parallelize this?
  285. cast<CStringInputSection>(isec)->splitIntoPieces();
  286. } else {
  287. isec = make<WordLiteralInputSection>(segname, name, this, data, align,
  288. flags);
  289. }
  290. sections.push_back(sec.addr);
  291. sections.back().subsections.push_back({0, isec});
  292. } else if (auto recordSize = getRecordSize(segname, name)) {
  293. splitRecords(*recordSize);
  294. if (name == section_names::compactUnwind)
  295. compactUnwindSection = &sections.back();
  296. } else if (segname == segment_names::llvm) {
  297. if (name == "__cg_profile" && config->callGraphProfileSort) {
  298. TimeTraceScope timeScope("Parsing call graph section");
  299. BinaryStreamReader reader(data, support::little);
  300. while (!reader.empty()) {
  301. uint32_t fromIndex, toIndex;
  302. uint64_t count;
  303. if (Error err = reader.readInteger(fromIndex))
  304. fatal(toString(this) + ": Expected 32-bit integer");
  305. if (Error err = reader.readInteger(toIndex))
  306. fatal(toString(this) + ": Expected 32-bit integer");
  307. if (Error err = reader.readInteger(count))
  308. fatal(toString(this) + ": Expected 64-bit integer");
  309. callGraph.emplace_back();
  310. CallGraphEntry &entry = callGraph.back();
  311. entry.fromIndex = fromIndex;
  312. entry.toIndex = toIndex;
  313. entry.count = count;
  314. }
  315. }
  316. // ld64 does not appear to emit contents from sections within the __LLVM
  317. // segment. Symbols within those sections point to bitcode metadata
  318. // instead of actual symbols. Global symbols within those sections could
  319. // have the same name without causing duplicate symbol errors. Push an
  320. // empty entry to ensure indices line up for the remaining sections.
  321. // TODO: Evaluate whether the bitcode metadata is needed.
  322. sections.push_back(sec.addr);
  323. } else {
  324. auto *isec =
  325. make<ConcatInputSection>(segname, name, this, data, align, flags);
  326. if (isDebugSection(isec->getFlags()) &&
  327. isec->getSegName() == segment_names::dwarf) {
  328. // Instead of emitting DWARF sections, we emit STABS symbols to the
  329. // object files that contain them. We filter them out early to avoid
  330. // parsing their relocations unnecessarily. But we must still push an
  331. // empty entry to ensure the indices line up for the remaining sections.
  332. sections.push_back(sec.addr);
  333. debugSections.push_back(isec);
  334. } else {
  335. sections.push_back(sec.addr);
  336. sections.back().subsections.push_back({0, isec});
  337. }
  338. }
  339. }
  340. }
  341. // Find the subsection corresponding to the greatest section offset that is <=
  342. // that of the given offset.
  343. //
  344. // offset: an offset relative to the start of the original InputSection (before
  345. // any subsection splitting has occurred). It will be updated to represent the
  346. // same location as an offset relative to the start of the containing
  347. // subsection.
  348. template <class T>
  349. static InputSection *findContainingSubsection(const Subsections &subsections,
  350. T *offset) {
  351. static_assert(std::is_same<uint64_t, T>::value ||
  352. std::is_same<uint32_t, T>::value,
  353. "unexpected type for offset");
  354. auto it = std::prev(llvm::upper_bound(
  355. subsections, *offset,
  356. [](uint64_t value, Subsection subsec) { return value < subsec.offset; }));
  357. *offset -= it->offset;
  358. return it->isec;
  359. }
  360. template <class SectionHeader>
  361. static bool validateRelocationInfo(InputFile *file, const SectionHeader &sec,
  362. relocation_info rel) {
  363. const RelocAttrs &relocAttrs = target->getRelocAttrs(rel.r_type);
  364. bool valid = true;
  365. auto message = [relocAttrs, file, sec, rel, &valid](const Twine &diagnostic) {
  366. valid = false;
  367. return (relocAttrs.name + " relocation " + diagnostic + " at offset " +
  368. std::to_string(rel.r_address) + " of " + sec.segname + "," +
  369. sec.sectname + " in " + toString(file))
  370. .str();
  371. };
  372. if (!relocAttrs.hasAttr(RelocAttrBits::LOCAL) && !rel.r_extern)
  373. error(message("must be extern"));
  374. if (relocAttrs.hasAttr(RelocAttrBits::PCREL) != rel.r_pcrel)
  375. error(message(Twine("must ") + (rel.r_pcrel ? "not " : "") +
  376. "be PC-relative"));
  377. if (isThreadLocalVariables(sec.flags) &&
  378. !relocAttrs.hasAttr(RelocAttrBits::UNSIGNED))
  379. error(message("not allowed in thread-local section, must be UNSIGNED"));
  380. if (rel.r_length < 2 || rel.r_length > 3 ||
  381. !relocAttrs.hasAttr(static_cast<RelocAttrBits>(1 << rel.r_length))) {
  382. static SmallVector<StringRef, 4> widths{"0", "4", "8", "4 or 8"};
  383. error(message("has width " + std::to_string(1 << rel.r_length) +
  384. " bytes, but must be " +
  385. widths[(static_cast<int>(relocAttrs.bits) >> 2) & 3] +
  386. " bytes"));
  387. }
  388. return valid;
  389. }
  390. template <class SectionHeader>
  391. void ObjFile::parseRelocations(ArrayRef<SectionHeader> sectionHeaders,
  392. const SectionHeader &sec,
  393. Subsections &subsections) {
  394. auto *buf = reinterpret_cast<const uint8_t *>(mb.getBufferStart());
  395. ArrayRef<relocation_info> relInfos(
  396. reinterpret_cast<const relocation_info *>(buf + sec.reloff), sec.nreloc);
  397. auto subsecIt = subsections.rbegin();
  398. for (size_t i = 0; i < relInfos.size(); i++) {
  399. // Paired relocations serve as Mach-O's method for attaching a
  400. // supplemental datum to a primary relocation record. ELF does not
  401. // need them because the *_RELOC_RELA records contain the extra
  402. // addend field, vs. *_RELOC_REL which omit the addend.
  403. //
  404. // The {X86_64,ARM64}_RELOC_SUBTRACTOR record holds the subtrahend,
  405. // and the paired *_RELOC_UNSIGNED record holds the minuend. The
  406. // datum for each is a symbolic address. The result is the offset
  407. // between two addresses.
  408. //
  409. // The ARM64_RELOC_ADDEND record holds the addend, and the paired
  410. // ARM64_RELOC_BRANCH26 or ARM64_RELOC_PAGE21/PAGEOFF12 holds the
  411. // base symbolic address.
  412. //
  413. // Note: X86 does not use *_RELOC_ADDEND because it can embed an
  414. // addend into the instruction stream. On X86, a relocatable address
  415. // field always occupies an entire contiguous sequence of byte(s),
  416. // so there is no need to merge opcode bits with address
  417. // bits. Therefore, it's easy and convenient to store addends in the
  418. // instruction-stream bytes that would otherwise contain zeroes. By
  419. // contrast, RISC ISAs such as ARM64 mix opcode bits with with
  420. // address bits so that bitwise arithmetic is necessary to extract
  421. // and insert them. Storing addends in the instruction stream is
  422. // possible, but inconvenient and more costly at link time.
  423. relocation_info relInfo = relInfos[i];
  424. bool isSubtrahend =
  425. target->hasAttr(relInfo.r_type, RelocAttrBits::SUBTRAHEND);
  426. if (isSubtrahend && StringRef(sec.sectname) == section_names::ehFrame) {
  427. // __TEXT,__eh_frame only has symbols and SUBTRACTOR relocs when ld64 -r
  428. // adds local "EH_Frame1" and "func.eh". Ignore them because they have
  429. // gone unused by Mac OS since Snow Leopard (10.6), vintage 2009.
  430. ++i;
  431. continue;
  432. }
  433. int64_t pairedAddend = 0;
  434. if (target->hasAttr(relInfo.r_type, RelocAttrBits::ADDEND)) {
  435. pairedAddend = SignExtend64<24>(relInfo.r_symbolnum);
  436. relInfo = relInfos[++i];
  437. }
  438. assert(i < relInfos.size());
  439. if (!validateRelocationInfo(this, sec, relInfo))
  440. continue;
  441. if (relInfo.r_address & R_SCATTERED)
  442. fatal("TODO: Scattered relocations not supported");
  443. int64_t embeddedAddend = target->getEmbeddedAddend(mb, sec.offset, relInfo);
  444. assert(!(embeddedAddend && pairedAddend));
  445. int64_t totalAddend = pairedAddend + embeddedAddend;
  446. Reloc r;
  447. r.type = relInfo.r_type;
  448. r.pcrel = relInfo.r_pcrel;
  449. r.length = relInfo.r_length;
  450. r.offset = relInfo.r_address;
  451. if (relInfo.r_extern) {
  452. r.referent = symbols[relInfo.r_symbolnum];
  453. r.addend = isSubtrahend ? 0 : totalAddend;
  454. } else {
  455. assert(!isSubtrahend);
  456. const SectionHeader &referentSecHead =
  457. sectionHeaders[relInfo.r_symbolnum - 1];
  458. uint64_t referentOffset;
  459. if (relInfo.r_pcrel) {
  460. // The implicit addend for pcrel section relocations is the pcrel offset
  461. // in terms of the addresses in the input file. Here we adjust it so
  462. // that it describes the offset from the start of the referent section.
  463. // FIXME This logic was written around x86_64 behavior -- ARM64 doesn't
  464. // have pcrel section relocations. We may want to factor this out into
  465. // the arch-specific .cpp file.
  466. assert(target->hasAttr(r.type, RelocAttrBits::BYTE4));
  467. referentOffset = sec.addr + relInfo.r_address + 4 + totalAddend -
  468. referentSecHead.addr;
  469. } else {
  470. // The addend for a non-pcrel relocation is its absolute address.
  471. referentOffset = totalAddend - referentSecHead.addr;
  472. }
  473. Subsections &referentSubsections =
  474. sections[relInfo.r_symbolnum - 1].subsections;
  475. r.referent =
  476. findContainingSubsection(referentSubsections, &referentOffset);
  477. r.addend = referentOffset;
  478. }
  479. // Find the subsection that this relocation belongs to.
  480. // Though not required by the Mach-O format, clang and gcc seem to emit
  481. // relocations in order, so let's take advantage of it. However, ld64 emits
  482. // unsorted relocations (in `-r` mode), so we have a fallback for that
  483. // uncommon case.
  484. InputSection *subsec;
  485. while (subsecIt != subsections.rend() && subsecIt->offset > r.offset)
  486. ++subsecIt;
  487. if (subsecIt == subsections.rend() ||
  488. subsecIt->offset + subsecIt->isec->getSize() <= r.offset) {
  489. subsec = findContainingSubsection(subsections, &r.offset);
  490. // Now that we know the relocs are unsorted, avoid trying the 'fast path'
  491. // for the other relocations.
  492. subsecIt = subsections.rend();
  493. } else {
  494. subsec = subsecIt->isec;
  495. r.offset -= subsecIt->offset;
  496. }
  497. subsec->relocs.push_back(r);
  498. if (isSubtrahend) {
  499. relocation_info minuendInfo = relInfos[++i];
  500. // SUBTRACTOR relocations should always be followed by an UNSIGNED one
  501. // attached to the same address.
  502. assert(target->hasAttr(minuendInfo.r_type, RelocAttrBits::UNSIGNED) &&
  503. relInfo.r_address == minuendInfo.r_address);
  504. Reloc p;
  505. p.type = minuendInfo.r_type;
  506. if (minuendInfo.r_extern) {
  507. p.referent = symbols[minuendInfo.r_symbolnum];
  508. p.addend = totalAddend;
  509. } else {
  510. uint64_t referentOffset =
  511. totalAddend - sectionHeaders[minuendInfo.r_symbolnum - 1].addr;
  512. Subsections &referentSubsectVec =
  513. sections[minuendInfo.r_symbolnum - 1].subsections;
  514. p.referent =
  515. findContainingSubsection(referentSubsectVec, &referentOffset);
  516. p.addend = referentOffset;
  517. }
  518. subsec->relocs.push_back(p);
  519. }
  520. }
  521. }
  522. template <class NList>
  523. static macho::Symbol *createDefined(const NList &sym, StringRef name,
  524. InputSection *isec, uint64_t value,
  525. uint64_t size) {
  526. // Symbol scope is determined by sym.n_type & (N_EXT | N_PEXT):
  527. // N_EXT: Global symbols. These go in the symbol table during the link,
  528. // and also in the export table of the output so that the dynamic
  529. // linker sees them.
  530. // N_EXT | N_PEXT: Linkage unit (think: dylib) scoped. These go in the
  531. // symbol table during the link so that duplicates are
  532. // either reported (for non-weak symbols) or merged
  533. // (for weak symbols), but they do not go in the export
  534. // table of the output.
  535. // N_PEXT: llvm-mc does not emit these, but `ld -r` (wherein ld64 emits
  536. // object files) may produce them. LLD does not yet support -r.
  537. // These are translation-unit scoped, identical to the `0` case.
  538. // 0: Translation-unit scoped. These are not in the symbol table during
  539. // link, and not in the export table of the output either.
  540. bool isWeakDefCanBeHidden =
  541. (sym.n_desc & (N_WEAK_DEF | N_WEAK_REF)) == (N_WEAK_DEF | N_WEAK_REF);
  542. if (sym.n_type & N_EXT) {
  543. bool isPrivateExtern = sym.n_type & N_PEXT;
  544. // lld's behavior for merging symbols is slightly different from ld64:
  545. // ld64 picks the winning symbol based on several criteria (see
  546. // pickBetweenRegularAtoms() in ld64's SymbolTable.cpp), while lld
  547. // just merges metadata and keeps the contents of the first symbol
  548. // with that name (see SymbolTable::addDefined). For:
  549. // * inline function F in a TU built with -fvisibility-inlines-hidden
  550. // * and inline function F in another TU built without that flag
  551. // ld64 will pick the one from the file built without
  552. // -fvisibility-inlines-hidden.
  553. // lld will instead pick the one listed first on the link command line and
  554. // give it visibility as if the function was built without
  555. // -fvisibility-inlines-hidden.
  556. // If both functions have the same contents, this will have the same
  557. // behavior. If not, it won't, but the input had an ODR violation in
  558. // that case.
  559. //
  560. // Similarly, merging a symbol
  561. // that's isPrivateExtern and not isWeakDefCanBeHidden with one
  562. // that's not isPrivateExtern but isWeakDefCanBeHidden technically
  563. // should produce one
  564. // that's not isPrivateExtern but isWeakDefCanBeHidden. That matters
  565. // with ld64's semantics, because it means the non-private-extern
  566. // definition will continue to take priority if more private extern
  567. // definitions are encountered. With lld's semantics there's no observable
  568. // difference between a symbol that's isWeakDefCanBeHidden(autohide) or one
  569. // that's privateExtern -- neither makes it into the dynamic symbol table,
  570. // unless the autohide symbol is explicitly exported.
  571. // But if a symbol is both privateExtern and autohide then it can't
  572. // be exported.
  573. // So we nullify the autohide flag when privateExtern is present
  574. // and promote the symbol to privateExtern when it is not already.
  575. if (isWeakDefCanBeHidden && isPrivateExtern)
  576. isWeakDefCanBeHidden = false;
  577. else if (isWeakDefCanBeHidden)
  578. isPrivateExtern = true;
  579. return symtab->addDefined(
  580. name, isec->getFile(), isec, value, size, sym.n_desc & N_WEAK_DEF,
  581. isPrivateExtern, sym.n_desc & N_ARM_THUMB_DEF,
  582. sym.n_desc & REFERENCED_DYNAMICALLY, sym.n_desc & N_NO_DEAD_STRIP,
  583. isWeakDefCanBeHidden);
  584. }
  585. assert(!isWeakDefCanBeHidden &&
  586. "weak_def_can_be_hidden on already-hidden symbol?");
  587. return make<Defined>(
  588. name, isec->getFile(), isec, value, size, sym.n_desc & N_WEAK_DEF,
  589. /*isExternal=*/false, /*isPrivateExtern=*/false,
  590. sym.n_desc & N_ARM_THUMB_DEF, sym.n_desc & REFERENCED_DYNAMICALLY,
  591. sym.n_desc & N_NO_DEAD_STRIP);
  592. }
  593. // Absolute symbols are defined symbols that do not have an associated
  594. // InputSection. They cannot be weak.
  595. template <class NList>
  596. static macho::Symbol *createAbsolute(const NList &sym, InputFile *file,
  597. StringRef name) {
  598. if (sym.n_type & N_EXT) {
  599. return symtab->addDefined(
  600. name, file, nullptr, sym.n_value, /*size=*/0,
  601. /*isWeakDef=*/false, sym.n_type & N_PEXT, sym.n_desc & N_ARM_THUMB_DEF,
  602. /*isReferencedDynamically=*/false, sym.n_desc & N_NO_DEAD_STRIP,
  603. /*isWeakDefCanBeHidden=*/false);
  604. }
  605. return make<Defined>(name, file, nullptr, sym.n_value, /*size=*/0,
  606. /*isWeakDef=*/false,
  607. /*isExternal=*/false, /*isPrivateExtern=*/false,
  608. sym.n_desc & N_ARM_THUMB_DEF,
  609. /*isReferencedDynamically=*/false,
  610. sym.n_desc & N_NO_DEAD_STRIP);
  611. }
  612. template <class NList>
  613. macho::Symbol *ObjFile::parseNonSectionSymbol(const NList &sym,
  614. StringRef name) {
  615. uint8_t type = sym.n_type & N_TYPE;
  616. switch (type) {
  617. case N_UNDF:
  618. return sym.n_value == 0
  619. ? symtab->addUndefined(name, this, sym.n_desc & N_WEAK_REF)
  620. : symtab->addCommon(name, this, sym.n_value,
  621. 1 << GET_COMM_ALIGN(sym.n_desc),
  622. sym.n_type & N_PEXT);
  623. case N_ABS:
  624. return createAbsolute(sym, this, name);
  625. case N_PBUD:
  626. case N_INDR:
  627. error("TODO: support symbols of type " + std::to_string(type));
  628. return nullptr;
  629. case N_SECT:
  630. llvm_unreachable(
  631. "N_SECT symbols should not be passed to parseNonSectionSymbol");
  632. default:
  633. llvm_unreachable("invalid symbol type");
  634. }
  635. }
  636. template <class NList> static bool isUndef(const NList &sym) {
  637. return (sym.n_type & N_TYPE) == N_UNDF && sym.n_value == 0;
  638. }
  639. template <class LP>
  640. void ObjFile::parseSymbols(ArrayRef<typename LP::section> sectionHeaders,
  641. ArrayRef<typename LP::nlist> nList,
  642. const char *strtab, bool subsectionsViaSymbols) {
  643. using NList = typename LP::nlist;
  644. // Groups indices of the symbols by the sections that contain them.
  645. std::vector<std::vector<uint32_t>> symbolsBySection(sections.size());
  646. symbols.resize(nList.size());
  647. SmallVector<unsigned, 32> undefineds;
  648. for (uint32_t i = 0; i < nList.size(); ++i) {
  649. const NList &sym = nList[i];
  650. // Ignore debug symbols for now.
  651. // FIXME: may need special handling.
  652. if (sym.n_type & N_STAB)
  653. continue;
  654. StringRef name = strtab + sym.n_strx;
  655. if ((sym.n_type & N_TYPE) == N_SECT) {
  656. Subsections &subsections = sections[sym.n_sect - 1].subsections;
  657. // parseSections() may have chosen not to parse this section.
  658. if (subsections.empty())
  659. continue;
  660. symbolsBySection[sym.n_sect - 1].push_back(i);
  661. } else if (isUndef(sym)) {
  662. undefineds.push_back(i);
  663. } else {
  664. symbols[i] = parseNonSectionSymbol(sym, name);
  665. }
  666. }
  667. for (size_t i = 0; i < sections.size(); ++i) {
  668. Subsections &subsections = sections[i].subsections;
  669. if (subsections.empty())
  670. continue;
  671. InputSection *lastIsec = subsections.back().isec;
  672. if (lastIsec->getName() == section_names::ehFrame) {
  673. // __TEXT,__eh_frame only has symbols and SUBTRACTOR relocs when ld64 -r
  674. // adds local "EH_Frame1" and "func.eh". Ignore them because they have
  675. // gone unused by Mac OS since Snow Leopard (10.6), vintage 2009.
  676. continue;
  677. }
  678. std::vector<uint32_t> &symbolIndices = symbolsBySection[i];
  679. uint64_t sectionAddr = sectionHeaders[i].addr;
  680. uint32_t sectionAlign = 1u << sectionHeaders[i].align;
  681. // Record-based sections have already been split into subsections during
  682. // parseSections(), so we simply need to match Symbols to the corresponding
  683. // subsection here.
  684. if (getRecordSize(lastIsec->getSegName(), lastIsec->getName())) {
  685. for (size_t j = 0; j < symbolIndices.size(); ++j) {
  686. uint32_t symIndex = symbolIndices[j];
  687. const NList &sym = nList[symIndex];
  688. StringRef name = strtab + sym.n_strx;
  689. uint64_t symbolOffset = sym.n_value - sectionAddr;
  690. InputSection *isec =
  691. findContainingSubsection(subsections, &symbolOffset);
  692. if (symbolOffset != 0) {
  693. error(toString(lastIsec) + ": symbol " + name +
  694. " at misaligned offset");
  695. continue;
  696. }
  697. symbols[symIndex] = createDefined(sym, name, isec, 0, isec->getSize());
  698. }
  699. continue;
  700. }
  701. // Calculate symbol sizes and create subsections by splitting the sections
  702. // along symbol boundaries.
  703. // We populate subsections by repeatedly splitting the last (highest
  704. // address) subsection.
  705. llvm::stable_sort(symbolIndices, [&](uint32_t lhs, uint32_t rhs) {
  706. return nList[lhs].n_value < nList[rhs].n_value;
  707. });
  708. for (size_t j = 0; j < symbolIndices.size(); ++j) {
  709. uint32_t symIndex = symbolIndices[j];
  710. const NList &sym = nList[symIndex];
  711. StringRef name = strtab + sym.n_strx;
  712. Subsection &subsec = subsections.back();
  713. InputSection *isec = subsec.isec;
  714. uint64_t subsecAddr = sectionAddr + subsec.offset;
  715. size_t symbolOffset = sym.n_value - subsecAddr;
  716. uint64_t symbolSize =
  717. j + 1 < symbolIndices.size()
  718. ? nList[symbolIndices[j + 1]].n_value - sym.n_value
  719. : isec->data.size() - symbolOffset;
  720. // There are 4 cases where we do not need to create a new subsection:
  721. // 1. If the input file does not use subsections-via-symbols.
  722. // 2. Multiple symbols at the same address only induce one subsection.
  723. // (The symbolOffset == 0 check covers both this case as well as
  724. // the first loop iteration.)
  725. // 3. Alternative entry points do not induce new subsections.
  726. // 4. If we have a literal section (e.g. __cstring and __literal4).
  727. if (!subsectionsViaSymbols || symbolOffset == 0 ||
  728. sym.n_desc & N_ALT_ENTRY || !isa<ConcatInputSection>(isec)) {
  729. symbols[symIndex] =
  730. createDefined(sym, name, isec, symbolOffset, symbolSize);
  731. continue;
  732. }
  733. auto *concatIsec = cast<ConcatInputSection>(isec);
  734. auto *nextIsec = make<ConcatInputSection>(*concatIsec);
  735. nextIsec->wasCoalesced = false;
  736. if (isZeroFill(isec->getFlags())) {
  737. // Zero-fill sections have NULL data.data() non-zero data.size()
  738. nextIsec->data = {nullptr, isec->data.size() - symbolOffset};
  739. isec->data = {nullptr, symbolOffset};
  740. } else {
  741. nextIsec->data = isec->data.slice(symbolOffset);
  742. isec->data = isec->data.slice(0, symbolOffset);
  743. }
  744. // By construction, the symbol will be at offset zero in the new
  745. // subsection.
  746. symbols[symIndex] =
  747. createDefined(sym, name, nextIsec, /*value=*/0, symbolSize);
  748. // TODO: ld64 appears to preserve the original alignment as well as each
  749. // subsection's offset from the last aligned address. We should consider
  750. // emulating that behavior.
  751. nextIsec->align = MinAlign(sectionAlign, sym.n_value);
  752. subsections.push_back({sym.n_value - sectionAddr, nextIsec});
  753. }
  754. }
  755. // Undefined symbols can trigger recursive fetch from Archives due to
  756. // LazySymbols. Process defined symbols first so that the relative order
  757. // between a defined symbol and an undefined symbol does not change the
  758. // symbol resolution behavior. In addition, a set of interconnected symbols
  759. // will all be resolved to the same file, instead of being resolved to
  760. // different files.
  761. for (unsigned i : undefineds) {
  762. const NList &sym = nList[i];
  763. StringRef name = strtab + sym.n_strx;
  764. symbols[i] = parseNonSectionSymbol(sym, name);
  765. }
  766. }
  767. OpaqueFile::OpaqueFile(MemoryBufferRef mb, StringRef segName,
  768. StringRef sectName)
  769. : InputFile(OpaqueKind, mb) {
  770. const auto *buf = reinterpret_cast<const uint8_t *>(mb.getBufferStart());
  771. ArrayRef<uint8_t> data = {buf, mb.getBufferSize()};
  772. ConcatInputSection *isec =
  773. make<ConcatInputSection>(segName.take_front(16), sectName.take_front(16),
  774. /*file=*/this, data);
  775. isec->live = true;
  776. sections.push_back(0);
  777. sections.back().subsections.push_back({0, isec});
  778. }
  779. ObjFile::ObjFile(MemoryBufferRef mb, uint32_t modTime, StringRef archiveName,
  780. bool lazy)
  781. : InputFile(ObjKind, mb, lazy), modTime(modTime) {
  782. this->archiveName = std::string(archiveName);
  783. if (lazy) {
  784. if (target->wordSize == 8)
  785. parseLazy<LP64>();
  786. else
  787. parseLazy<ILP32>();
  788. } else {
  789. if (target->wordSize == 8)
  790. parse<LP64>();
  791. else
  792. parse<ILP32>();
  793. }
  794. }
  795. template <class LP> void ObjFile::parse() {
  796. using Header = typename LP::mach_header;
  797. using SegmentCommand = typename LP::segment_command;
  798. using SectionHeader = typename LP::section;
  799. using NList = typename LP::nlist;
  800. auto *buf = reinterpret_cast<const uint8_t *>(mb.getBufferStart());
  801. auto *hdr = reinterpret_cast<const Header *>(mb.getBufferStart());
  802. Architecture arch = getArchitectureFromCpuType(hdr->cputype, hdr->cpusubtype);
  803. if (arch != config->arch()) {
  804. auto msg = config->errorForArchMismatch
  805. ? static_cast<void (*)(const Twine &)>(error)
  806. : warn;
  807. msg(toString(this) + " has architecture " + getArchitectureName(arch) +
  808. " which is incompatible with target architecture " +
  809. getArchitectureName(config->arch()));
  810. return;
  811. }
  812. if (!checkCompatibility(this))
  813. return;
  814. for (auto *cmd : findCommands<linker_option_command>(hdr, LC_LINKER_OPTION)) {
  815. StringRef data{reinterpret_cast<const char *>(cmd + 1),
  816. cmd->cmdsize - sizeof(linker_option_command)};
  817. parseLCLinkerOption(this, cmd->count, data);
  818. }
  819. ArrayRef<SectionHeader> sectionHeaders;
  820. if (const load_command *cmd = findCommand(hdr, LP::segmentLCType)) {
  821. auto *c = reinterpret_cast<const SegmentCommand *>(cmd);
  822. sectionHeaders = ArrayRef<SectionHeader>{
  823. reinterpret_cast<const SectionHeader *>(c + 1), c->nsects};
  824. parseSections(sectionHeaders);
  825. }
  826. // TODO: Error on missing LC_SYMTAB?
  827. if (const load_command *cmd = findCommand(hdr, LC_SYMTAB)) {
  828. auto *c = reinterpret_cast<const symtab_command *>(cmd);
  829. ArrayRef<NList> nList(reinterpret_cast<const NList *>(buf + c->symoff),
  830. c->nsyms);
  831. const char *strtab = reinterpret_cast<const char *>(buf) + c->stroff;
  832. bool subsectionsViaSymbols = hdr->flags & MH_SUBSECTIONS_VIA_SYMBOLS;
  833. parseSymbols<LP>(sectionHeaders, nList, strtab, subsectionsViaSymbols);
  834. }
  835. // The relocations may refer to the symbols, so we parse them after we have
  836. // parsed all the symbols.
  837. for (size_t i = 0, n = sections.size(); i < n; ++i)
  838. if (!sections[i].subsections.empty())
  839. parseRelocations(sectionHeaders, sectionHeaders[i],
  840. sections[i].subsections);
  841. parseDebugInfo();
  842. if (compactUnwindSection)
  843. registerCompactUnwind();
  844. }
  845. template <class LP> void ObjFile::parseLazy() {
  846. using Header = typename LP::mach_header;
  847. using NList = typename LP::nlist;
  848. auto *buf = reinterpret_cast<const uint8_t *>(mb.getBufferStart());
  849. auto *hdr = reinterpret_cast<const Header *>(mb.getBufferStart());
  850. const load_command *cmd = findCommand(hdr, LC_SYMTAB);
  851. if (!cmd)
  852. return;
  853. auto *c = reinterpret_cast<const symtab_command *>(cmd);
  854. ArrayRef<NList> nList(reinterpret_cast<const NList *>(buf + c->symoff),
  855. c->nsyms);
  856. const char *strtab = reinterpret_cast<const char *>(buf) + c->stroff;
  857. symbols.resize(nList.size());
  858. for (auto it : llvm::enumerate(nList)) {
  859. const NList &sym = it.value();
  860. if ((sym.n_type & N_EXT) && !isUndef(sym)) {
  861. // TODO: Bound checking
  862. StringRef name = strtab + sym.n_strx;
  863. symbols[it.index()] = symtab->addLazyObject(name, *this);
  864. if (!lazy)
  865. break;
  866. }
  867. }
  868. }
  869. void ObjFile::parseDebugInfo() {
  870. std::unique_ptr<DwarfObject> dObj = DwarfObject::create(this);
  871. if (!dObj)
  872. return;
  873. auto *ctx = make<DWARFContext>(
  874. std::move(dObj), "",
  875. [&](Error err) {
  876. warn(toString(this) + ": " + toString(std::move(err)));
  877. },
  878. [&](Error warning) {
  879. warn(toString(this) + ": " + toString(std::move(warning)));
  880. });
  881. // TODO: Since object files can contain a lot of DWARF info, we should verify
  882. // that we are parsing just the info we need
  883. const DWARFContext::compile_unit_range &units = ctx->compile_units();
  884. // FIXME: There can be more than one compile unit per object file. See
  885. // PR48637.
  886. auto it = units.begin();
  887. compileUnit = it->get();
  888. }
  889. ArrayRef<data_in_code_entry> ObjFile::getDataInCode() const {
  890. const auto *buf = reinterpret_cast<const uint8_t *>(mb.getBufferStart());
  891. const load_command *cmd = findCommand(buf, LC_DATA_IN_CODE);
  892. if (!cmd)
  893. return {};
  894. const auto *c = reinterpret_cast<const linkedit_data_command *>(cmd);
  895. return {reinterpret_cast<const data_in_code_entry *>(buf + c->dataoff),
  896. c->datasize / sizeof(data_in_code_entry)};
  897. }
  898. // Create pointers from symbols to their associated compact unwind entries.
  899. void ObjFile::registerCompactUnwind() {
  900. for (const Subsection &subsection : compactUnwindSection->subsections) {
  901. ConcatInputSection *isec = cast<ConcatInputSection>(subsection.isec);
  902. // Hack!! Since each CUE contains a different function address, if ICF
  903. // operated naively and compared the entire contents of each CUE, entries
  904. // with identical unwind info but belonging to different functions would
  905. // never be considered equivalent. To work around this problem, we slice
  906. // away the function address here. (Note that we do not adjust the offsets
  907. // of the corresponding relocations.) We rely on `relocateCompactUnwind()`
  908. // to correctly handle these truncated input sections.
  909. isec->data = isec->data.slice(target->wordSize);
  910. ConcatInputSection *referentIsec;
  911. for (auto it = isec->relocs.begin(); it != isec->relocs.end();) {
  912. Reloc &r = *it;
  913. // CUE::functionAddress is at offset 0. Skip personality & LSDA relocs.
  914. if (r.offset != 0) {
  915. ++it;
  916. continue;
  917. }
  918. uint64_t add = r.addend;
  919. if (auto *sym = cast_or_null<Defined>(r.referent.dyn_cast<Symbol *>())) {
  920. // Check whether the symbol defined in this file is the prevailing one.
  921. // Skip if it is e.g. a weak def that didn't prevail.
  922. if (sym->getFile() != this) {
  923. ++it;
  924. continue;
  925. }
  926. add += sym->value;
  927. referentIsec = cast<ConcatInputSection>(sym->isec);
  928. } else {
  929. referentIsec =
  930. cast<ConcatInputSection>(r.referent.dyn_cast<InputSection *>());
  931. }
  932. if (referentIsec->getSegName() != segment_names::text)
  933. error("compact unwind references address in " + toString(referentIsec) +
  934. " which is not in segment __TEXT");
  935. // The functionAddress relocations are typically section relocations.
  936. // However, unwind info operates on a per-symbol basis, so we search for
  937. // the function symbol here.
  938. auto symIt = llvm::lower_bound(
  939. referentIsec->symbols, add,
  940. [](Defined *d, uint64_t add) { return d->value < add; });
  941. // The relocation should point at the exact address of a symbol (with no
  942. // addend).
  943. if (symIt == referentIsec->symbols.end() || (*symIt)->value != add) {
  944. assert(referentIsec->wasCoalesced);
  945. ++it;
  946. continue;
  947. }
  948. (*symIt)->unwindEntry = isec;
  949. // Since we've sliced away the functionAddress, we should remove the
  950. // corresponding relocation too. Given that clang emits relocations in
  951. // reverse order of address, this relocation should be at the end of the
  952. // vector for most of our input object files, so this is typically an O(1)
  953. // operation.
  954. it = isec->relocs.erase(it);
  955. }
  956. }
  957. }
  958. // The path can point to either a dylib or a .tbd file.
  959. static DylibFile *loadDylib(StringRef path, DylibFile *umbrella) {
  960. Optional<MemoryBufferRef> mbref = readFile(path);
  961. if (!mbref) {
  962. error("could not read dylib file at " + path);
  963. return nullptr;
  964. }
  965. return loadDylib(*mbref, umbrella);
  966. }
  967. // TBD files are parsed into a series of TAPI documents (InterfaceFiles), with
  968. // the first document storing child pointers to the rest of them. When we are
  969. // processing a given TBD file, we store that top-level document in
  970. // currentTopLevelTapi. When processing re-exports, we search its children for
  971. // potentially matching documents in the same TBD file. Note that the children
  972. // themselves don't point to further documents, i.e. this is a two-level tree.
  973. //
  974. // Re-exports can either refer to on-disk files, or to documents within .tbd
  975. // files.
  976. static DylibFile *findDylib(StringRef path, DylibFile *umbrella,
  977. const InterfaceFile *currentTopLevelTapi) {
  978. // Search order:
  979. // 1. Install name basename in -F / -L directories.
  980. {
  981. StringRef stem = path::stem(path);
  982. SmallString<128> frameworkName;
  983. path::append(frameworkName, path::Style::posix, stem + ".framework", stem);
  984. bool isFramework = path.endswith(frameworkName);
  985. if (isFramework) {
  986. for (StringRef dir : config->frameworkSearchPaths) {
  987. SmallString<128> candidate = dir;
  988. path::append(candidate, frameworkName);
  989. if (Optional<StringRef> dylibPath = resolveDylibPath(candidate.str()))
  990. return loadDylib(*dylibPath, umbrella);
  991. }
  992. } else if (Optional<StringRef> dylibPath = findPathCombination(
  993. stem, config->librarySearchPaths, {".tbd", ".dylib"}))
  994. return loadDylib(*dylibPath, umbrella);
  995. }
  996. // 2. As absolute path.
  997. if (path::is_absolute(path, path::Style::posix))
  998. for (StringRef root : config->systemLibraryRoots)
  999. if (Optional<StringRef> dylibPath = resolveDylibPath((root + path).str()))
  1000. return loadDylib(*dylibPath, umbrella);
  1001. // 3. As relative path.
  1002. // TODO: Handle -dylib_file
  1003. // Replace @executable_path, @loader_path, @rpath prefixes in install name.
  1004. SmallString<128> newPath;
  1005. if (config->outputType == MH_EXECUTE &&
  1006. path.consume_front("@executable_path/")) {
  1007. // ld64 allows overriding this with the undocumented flag -executable_path.
  1008. // lld doesn't currently implement that flag.
  1009. // FIXME: Consider using finalOutput instead of outputFile.
  1010. path::append(newPath, path::parent_path(config->outputFile), path);
  1011. path = newPath;
  1012. } else if (path.consume_front("@loader_path/")) {
  1013. fs::real_path(umbrella->getName(), newPath);
  1014. path::remove_filename(newPath);
  1015. path::append(newPath, path);
  1016. path = newPath;
  1017. } else if (path.startswith("@rpath/")) {
  1018. for (StringRef rpath : umbrella->rpaths) {
  1019. newPath.clear();
  1020. if (rpath.consume_front("@loader_path/")) {
  1021. fs::real_path(umbrella->getName(), newPath);
  1022. path::remove_filename(newPath);
  1023. }
  1024. path::append(newPath, rpath, path.drop_front(strlen("@rpath/")));
  1025. if (Optional<StringRef> dylibPath = resolveDylibPath(newPath.str()))
  1026. return loadDylib(*dylibPath, umbrella);
  1027. }
  1028. }
  1029. // FIXME: Should this be further up?
  1030. if (currentTopLevelTapi) {
  1031. for (InterfaceFile &child :
  1032. make_pointee_range(currentTopLevelTapi->documents())) {
  1033. assert(child.documents().empty());
  1034. if (path == child.getInstallName()) {
  1035. auto file = make<DylibFile>(child, umbrella);
  1036. file->parseReexports(child);
  1037. return file;
  1038. }
  1039. }
  1040. }
  1041. if (Optional<StringRef> dylibPath = resolveDylibPath(path))
  1042. return loadDylib(*dylibPath, umbrella);
  1043. return nullptr;
  1044. }
  1045. // If a re-exported dylib is public (lives in /usr/lib or
  1046. // /System/Library/Frameworks), then it is considered implicitly linked: we
  1047. // should bind to its symbols directly instead of via the re-exporting umbrella
  1048. // library.
  1049. static bool isImplicitlyLinked(StringRef path) {
  1050. if (!config->implicitDylibs)
  1051. return false;
  1052. if (path::parent_path(path) == "/usr/lib")
  1053. return true;
  1054. // Match /System/Library/Frameworks/$FOO.framework/**/$FOO
  1055. if (path.consume_front("/System/Library/Frameworks/")) {
  1056. StringRef frameworkName = path.take_until([](char c) { return c == '.'; });
  1057. return path::filename(path) == frameworkName;
  1058. }
  1059. return false;
  1060. }
  1061. static void loadReexport(StringRef path, DylibFile *umbrella,
  1062. const InterfaceFile *currentTopLevelTapi) {
  1063. DylibFile *reexport = findDylib(path, umbrella, currentTopLevelTapi);
  1064. if (!reexport)
  1065. error("unable to locate re-export with install name " + path);
  1066. }
  1067. DylibFile::DylibFile(MemoryBufferRef mb, DylibFile *umbrella,
  1068. bool isBundleLoader)
  1069. : InputFile(DylibKind, mb), refState(RefState::Unreferenced),
  1070. isBundleLoader(isBundleLoader) {
  1071. assert(!isBundleLoader || !umbrella);
  1072. if (umbrella == nullptr)
  1073. umbrella = this;
  1074. this->umbrella = umbrella;
  1075. auto *buf = reinterpret_cast<const uint8_t *>(mb.getBufferStart());
  1076. auto *hdr = reinterpret_cast<const mach_header *>(mb.getBufferStart());
  1077. // Initialize installName.
  1078. if (const load_command *cmd = findCommand(hdr, LC_ID_DYLIB)) {
  1079. auto *c = reinterpret_cast<const dylib_command *>(cmd);
  1080. currentVersion = read32le(&c->dylib.current_version);
  1081. compatibilityVersion = read32le(&c->dylib.compatibility_version);
  1082. installName =
  1083. reinterpret_cast<const char *>(cmd) + read32le(&c->dylib.name);
  1084. } else if (!isBundleLoader) {
  1085. // macho_executable and macho_bundle don't have LC_ID_DYLIB,
  1086. // so it's OK.
  1087. error("dylib " + toString(this) + " missing LC_ID_DYLIB load command");
  1088. return;
  1089. }
  1090. if (config->printEachFile)
  1091. message(toString(this));
  1092. inputFiles.insert(this);
  1093. deadStrippable = hdr->flags & MH_DEAD_STRIPPABLE_DYLIB;
  1094. if (!checkCompatibility(this))
  1095. return;
  1096. checkAppExtensionSafety(hdr->flags & MH_APP_EXTENSION_SAFE);
  1097. for (auto *cmd : findCommands<rpath_command>(hdr, LC_RPATH)) {
  1098. StringRef rpath{reinterpret_cast<const char *>(cmd) + cmd->path};
  1099. rpaths.push_back(rpath);
  1100. }
  1101. // Initi