InputFiles.cpp source code [lld/ELF/InputFiles.cpp]

1	//===- InputFiles.cpp -----------------------------------------------------===//
2	//
3	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4	// See https://llvm.org/LICENSE.txt for license information.
5	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6	//
7	//===----------------------------------------------------------------------===//
8
9	#include "InputFiles.h"
10	#include "Config.h"
11	#include "DWARF.h"
12	#include "Driver.h"
13	#include "InputSection.h"
14	#include "LinkerScript.h"
15	#include "SymbolTable.h"
16	#include "Symbols.h"
17	#include "SyntheticSections.h"
18	#include "Target.h"
19	#include "lld/Common/CommonLinkerContext.h"
20	#include "lld/Common/DWARF.h"
21	#include "llvm/ADT/CachedHashString.h"
22	#include "llvm/ADT/STLExtras.h"
23	#include "llvm/LTO/LTO.h"
24	#include "llvm/Object/IRObjectFile.h"
25	#include "llvm/Support/ARMAttributeParser.h"
26	#include "llvm/Support/ARMBuildAttributes.h"
27	#include "llvm/Support/Endian.h"
28	#include "llvm/Support/FileSystem.h"
29	#include "llvm/Support/Path.h"
30	#include "llvm/Support/RISCVAttributeParser.h"
31	#include "llvm/Support/TarWriter.h"
32	#include "llvm/Support/raw_ostream.h"
33	#include <optional>
34
35	using namespace llvm;
36	using namespace llvm::ELF;
37	using namespace llvm::object;
38	using namespace llvm::sys;
39	using namespace llvm::sys::fs;
40	using namespace llvm::support::endian;
41	using namespace lld;
42	using namespace lld::elf;
43
44	// This function is explicitly instantiated in ARM.cpp, don't do it here to
45	// avoid warnings with MSVC.
46	extern template void ObjFile<ELF32LE>::importCmseSymbols();
47	extern template void ObjFile<ELF32BE>::importCmseSymbols();
48	extern template void ObjFile<ELF64LE>::importCmseSymbols();
49	extern template void ObjFile<ELF64BE>::importCmseSymbols();
50
51	bool InputFile::isInGroup;
52	uint32_t InputFile::nextGroupId;
53
54	std::unique_ptr<TarWriter> elf::tar;
55
56	// Returns "<internal>", "foo.a(bar.o)" or "baz.o".
57	std::string lld::toString(const InputFile *f) {
58	static std::mutex mu;
59	if (!f)
60	return "<internal>";
61
62	{
63	std::lock_guard<std::mutex> lock(mu);
64	if (f->toStringCache.empty()) {
65	if (f->archiveName.empty())
66	f->toStringCache = f->getName();
67	else
68	(f->archiveName + "(" + f->getName() + ")").toVector(Out&: f->toStringCache);
69	}
70	}
71	return std::string(f->toStringCache);
72	}
73
74	static ELFKind getELFKind(MemoryBufferRef mb, StringRef archiveName) {
75	unsigned char size;
76	unsigned char endian;
77	std::tie(args&: size, args&: endian) = getElfArchType(Object: mb.getBuffer());
78
79	auto report = [&](StringRef msg) {
80	StringRef filename = mb.getBufferIdentifier();
81	if (archiveName.empty())
82	fatal(msg: filename + ": " + msg);
83	else
84	fatal(msg: archiveName + "(" + filename + "): " + msg);
85	};
86
87	if (!mb.getBuffer().starts_with(Prefix: ElfMagic))
88	report ("not an ELF file");
89	if (endian != ELFDATA2LSB && endian != ELFDATA2MSB)
90	report ("corrupted ELF file: invalid data encoding");
91	if (size != ELFCLASS32 && size != ELFCLASS64)
92	report ("corrupted ELF file: invalid file class");
93
94	size_t bufSize = mb.getBuffer().size();
95	if ((size == ELFCLASS32 && bufSize < sizeof(Elf32_Ehdr)) \|\|
96	(size == ELFCLASS64 && bufSize < sizeof(Elf64_Ehdr)))
97	report ("corrupted ELF file: file is too short");
98
99	if (size == ELFCLASS32)
100	return (endian == ELFDATA2LSB) ? ELF32LEKind : ELF32BEKind;
101	return (endian == ELFDATA2LSB) ? ELF64LEKind : ELF64BEKind;
102	}
103
104	// For ARM only, to set the EF_ARM_ABI_FLOAT_SOFT or EF_ARM_ABI_FLOAT_HARD
105	// flag in the ELF Header we need to look at Tag_ABI_VFP_args to find out how
106	// the input objects have been compiled.
107	static void updateARMVFPArgs(const ARMAttributeParser &attributes,
108	const InputFile *f) {
109	std::optional<unsigned> attr =
110	attributes.getAttributeValue(tag: ARMBuildAttrs::ABI_VFP_args);
111	if (!attr)
112	// If an ABI tag isn't present then it is implicitly given the value of 0
113	// which maps to ARMBuildAttrs::BaseAAPCS. However many assembler files,
114	// including some in glibc that don't use FP args (and should have value 3)
115	// don't have the attribute so we do not consider an implicit value of 0
116	// as a clash.
117	return;
118
119	unsigned vfpArgs = *attr;
120	ARMVFPArgKind arg;
121	switch (vfpArgs) {
122	case ARMBuildAttrs::BaseAAPCS:
123	arg = ARMVFPArgKind::Base;
124	break;
125	case ARMBuildAttrs::HardFPAAPCS:
126	arg = ARMVFPArgKind::VFP;
127	break;
128	case ARMBuildAttrs::ToolChainFPPCS:
129	// Tool chain specific convention that conforms to neither AAPCS variant.
130	arg = ARMVFPArgKind::ToolChain;
131	break;
132	case ARMBuildAttrs::CompatibleFPAAPCS:
133	// Object compatible with all conventions.
134	return;
135	default:
136	error(msg: toString(f) + ": unknown Tag_ABI_VFP_args value: " + Twine (vfpArgs));
137	return;
138	}
139	// Follow ld.bfd and error if there is a mix of calling conventions.
140	if (config ->armVFPArgs != arg && config ->armVFPArgs != ARMVFPArgKind::Default)
141	error(msg: toString(f) + ": incompatible Tag_ABI_VFP_args");
142	else
143	config ->armVFPArgs = arg;
144	}
145
146	// The ARM support in lld makes some use of instructions that are not available
147	// on all ARM architectures. Namely:
148	// - Use of BLX instruction for interworking between ARM and Thumb state.
149	// - Use of the extended Thumb branch encoding in relocation.
150	// - Use of the MOVT/MOVW instructions in Thumb Thunks.
151	// The ARM Attributes section contains information about the architecture chosen
152	// at compile time. We follow the convention that if at least one input object
153	// is compiled with an architecture that supports these features then lld is
154	// permitted to use them.
155	static void updateSupportedARMFeatures(const ARMAttributeParser &attributes) {
156	std::optional<unsigned> attr =
157	attributes.getAttributeValue(tag: ARMBuildAttrs::CPU_arch);
158	if (!attr)
159	return;
160	auto arch = *attr;
161	switch (arch) {
162	case ARMBuildAttrs::Pre_v4:
163	case ARMBuildAttrs::v4:
164	case ARMBuildAttrs::v4T:
165	// Architectures prior to v5 do not support BLX instruction
166	break;
167	case ARMBuildAttrs::v5T:
168	case ARMBuildAttrs::v5TE:
169	case ARMBuildAttrs::v5TEJ:
170	case ARMBuildAttrs::v6:
171	case ARMBuildAttrs::v6KZ:
172	case ARMBuildAttrs::v6K:
173	config ->armHasBlx = true;
174	// Architectures used in pre-Cortex processors do not support
175	// The J1 = 1 J2 = 1 Thumb branch range extension, with the exception
176	// of Architecture v6T2 (arm1156t2-s and arm1156t2f-s) that do.
177	break;
178	default:
179	// All other Architectures have BLX and extended branch encoding
180	config ->armHasBlx = true;
181	config ->armJ1J2BranchEncoding = true;
182	if (arch != ARMBuildAttrs::v6_M && arch != ARMBuildAttrs::v6S_M)
183	// All Architectures used in Cortex processors with the exception
184	// of v6-M and v6S-M have the MOVT and MOVW instructions.
185	config ->armHasMovtMovw = true;
186	break;
187	}
188
189	// Only ARMv8-M or later architectures have CMSE support.
190	std::optional<unsigned> profile =
191	attributes.getAttributeValue(tag: ARMBuildAttrs::CPU_arch_profile);
192	if (!profile)
193	return;
194	if (arch >= ARMBuildAttrs::CPUArch::v8_M_Base &&
195	profile == ARMBuildAttrs::MicroControllerProfile)
196	config ->armCMSESupport = true;
197	}
198
199	InputFile::InputFile(Kind k, MemoryBufferRef m)
200	: mb (m), groupId(nextGroupId), fileKind(k) {
201	// All files within the same --{start,end}-group get the same group ID.
202	// Otherwise, a new file will get a new group ID.
203	if (!isInGroup)
204	++nextGroupId;
205	}
206
207	std::optional<MemoryBufferRef> elf::readFile(StringRef path) {
208	llvm::TimeTraceScope timeScope("Load input files", path);
209
210	// The --chroot option changes our virtual root directory.
211	// This is useful when you are dealing with files created by --reproduce.
212	if (!config ->chroot.empty() && path.starts_with(Prefix: "/"))
213	path = saver().save(S: config ->chroot + path);
214
215	bool remapped = false;
216	auto it = config ->remapInputs.find(Val: path);
217	if (it != config ->remapInputs.end()) {
218	path = it ->second;
219	remapped = true;
220	} else {
221	for (const auto &[pat, toFile] : config ->remapInputsWildcards) {
222	if (pat.match(S: path)) {
223	path = toFile;
224	remapped = true;
225	break;
226	}
227	}
228	}
229	if (remapped) {
230	// Use /dev/null to indicate an input file that should be ignored. Change
231	// the path to NUL on Windows.
232	#ifdef _WIN32
233	if (path == "/dev/null")
234	path = "NUL";
235	#endif
236	}
237
238	log(msg: path);
239	config ->dependencyFiles.insert(X: llvm::CachedHashString (path));
240
241	auto mbOrErr = MemoryBuffer::getFile(Filename: path, /IsText=/false,
242	/RequiresNullTerminator=/false);
243	if (auto ec = mbOrErr.getError()) {
244	error(msg: "cannot open " + path + ": " + ec.message());
245	return std::nullopt;
246	}
247
248	MemoryBufferRef mbref = (*mbOrErr)->getMemBufferRef();
249	ctx.memoryBuffers.push_back(Elt: std::move(mbOrErr)); // take MB ownership*
250
251	if (tar)
252	tar ->append(Path: relativeToRoot(path), Data: mbref.getBuffer());
253	return mbref;
254	}
255
256	// All input object files must be for the same architecture
257	// (e.g. it does not make sense to link x86 object files with
258	// MIPS object files.) This function checks for that error.
259	static bool isCompatible(InputFile *file) {
260	if (!file->isElf() && !isa<BitcodeFile>(Val: file))
261	return true;
262
263	if (file->ekind == config ->ekind && file->emachine == config ->emachine) {
264	if (config ->emachine != EM_MIPS)
265	return true;
266	if (isMipsN32Abi(f: file) == config ->mipsN32Abi)
267	return true;
268	}
269
270	StringRef target =
271	!config ->bfdname.empty() ? config ->bfdname : config ->emulation;
272	if (!target.empty()) {
273	error(msg: toString(f: file) + " is incompatible with " + target);
274	return false;
275	}
276
277	InputFile existing = nullptr*;
278	if (!ctx.objectFiles.empty())
279	existing = ctx.objectFiles [`0`];
280	else if (!ctx.sharedFiles.empty())
281	existing = ctx.sharedFiles [`0`];
282	else if (!ctx.bitcodeFiles.empty())
283	existing = ctx.bitcodeFiles [`0`];
284	std::string with;
285	if (existing)
286	with = " with " + toString(f: existing);
287	error(msg: toString(f: file) + " is incompatible" + with);
288	return false;
289	}
290
291	template <class ELFT> static void doParseFile(InputFile *file) {
292	if (!isCompatible(file))
293	return;
294
295	// Lazy object file
296	if (file->lazy) {
297	if (auto *f = dyn_cast<BitcodeFile>(Val: file)) {
298	ctx.lazyBitcodeFiles.push_back(Elt: f);
299	f->parseLazy();
300	} else {
301	cast<ObjFile<ELFT>>(file)->parseLazy();
302	}
303	return;
304	}
305
306	if (config ->trace)
307	message(msg: toString(f: file));
308
309	if (file->kind() == InputFile::ObjKind) {
310	ctx.objectFiles.push_back(Elt: cast<ELFFileBase>(Val: file));
311	cast<ObjFile<ELFT>>(file)->parse();
312	} else if (auto *f = dyn_cast<SharedFile>(Val: file)) {
313	f->parse<ELFT>();
314	} else if (auto *f = dyn_cast<BitcodeFile>(Val: file)) {
315	ctx.bitcodeFiles.push_back(Elt: f);
316	f->parse();
317	} else {
318	ctx.binaryFiles.push_back(Elt: cast<BinaryFile>(Val: file));
319	cast<BinaryFile>(Val: file)->parse();
320	}
321	}
322
323	// Add symbols in File to the symbol table.
324	void elf::parseFile(InputFile *file) { invokeELFT(doParseFile, file); }
325
326	// This function is explicitly instantiated in ARM.cpp. Mark it extern here,
327	// to avoid warnings when building with MSVC.
328	extern template void ObjFile<ELF32LE>::importCmseSymbols();
329	extern template void ObjFile<ELF32BE>::importCmseSymbols();
330	extern template void ObjFile<ELF64LE>::importCmseSymbols();
331	extern template void ObjFile<ELF64BE>::importCmseSymbols();
332
333	template <class ELFT>
334	static void doParseFiles(const std::vector<InputFile *> &files,
335	InputFile *armCmseImpLib) {
336	// Add all files to the symbol table. This will add almost all symbols that we
337	// need to the symbol table. This process might add files to the link due to
338	// addDependentLibrary.
339	for (size_t i = `0`; i < files.size(); ++i) {
340	llvm::TimeTraceScope timeScope("Parse input files", files [i]->getName());
341	doParseFile<ELFT>(files [i]);
342	}
343	if (armCmseImpLib)
344	cast<ObjFile<ELFT>>(*armCmseImpLib).importCmseSymbols();
345	}
346
347	void elf::parseFiles(const std::vector<InputFile *> &files,
348	InputFile *armCmseImpLib) {
349	llvm::TimeTraceScope timeScope("Parse input files");
350	invokeELFT(doParseFiles, files, armCmseImpLib);
351	}
352
353	// Concatenates arguments to construct a string representing an error location.
354	static std::string createFileLineMsg(StringRef path, unsigned line) {
355	std::string filename = std::string (path::filename(path));
356	std::string lineno = ":" + std::to_string(val: line);
357	if (filename == path)
358	return filename + lineno;
359	return filename + lineno + " (" + path.str() + lineno + ")";
360	}
361
362	template <class ELFT>
363	static std::string getSrcMsgAux(ObjFile<ELFT> &file, const Symbol &sym,
364	const InputSectionBase &sec, uint64_t offset) {
365	// In DWARF, functions and variables are stored to different places.
366	// First, look up a function for a given offset.
367	if (std::optional<DILineInfo> info = file.getDILineInfo(&sec, offset))
368	return createFileLineMsg(path: info ->FileName, line: info ->Line);
369
370	// If it failed, look up again as a variable.
371	if (std::optional<std::pair<std::string, unsigned>> fileLine =
372	file.getVariableLoc(sym.getName()))
373	return createFileLineMsg(path: fileLine ->first, line: fileLine ->second);
374
375	// File.sourceFile contains STT_FILE symbol, and that is a last resort.
376	return std::string(file.sourceFile);
377	}
378
379	std::string InputFile::getSrcMsg(const Symbol &sym, const InputSectionBase &sec,
380	uint64_t offset) {
381	if (kind() != ObjKind)
382	return "";
383	switch (ekind) {
384	default:
385	llvm_unreachable("Invalid kind");
386	case ELF32LEKind:
387	return getSrcMsgAux(file&: cast<ObjFile<ELF32LE>>(Val&: *this), sym, sec, offset);
388	case ELF32BEKind:
389	return getSrcMsgAux(file&: cast<ObjFile<ELF32BE>>(Val&: *this), sym, sec, offset);
390	case ELF64LEKind:
391	return getSrcMsgAux(file&: cast<ObjFile<ELF64LE>>(Val&: *this), sym, sec, offset);
392	case ELF64BEKind:
393	return getSrcMsgAux(file&: cast<ObjFile<ELF64BE>>(Val&: *this), sym, sec, offset);
394	}
395	}
396
397	StringRef InputFile::getNameForScript() const {
398	if (archiveName.empty())
399	return getName();
400
401	if (nameForScriptCache.empty())
402	nameForScriptCache = (archiveName + Twine(`':'`) + getName()).str();
403
404	return nameForScriptCache;
405	}
406
407	// An ELF object file may contain a `.deplibs` section. If it exists, the
408	// section contains a list of library specifiers such as `m` for libm. This
409	// function resolves a given name by finding the first matching library checking
410	// the various ways that a library can be specified to LLD. This ELF extension
411	// is a form of autolinking and is called `dependent libraries`. It is currently
412	// unique to LLVM and lld.
413	static void addDependentLibrary(StringRef specifier, const InputFile *f) {
414	if (!config ->dependentLibraries)
415	return;
416	if (std::optional<std::string> s = searchLibraryBaseName(path: specifier))
417	ctx.driver.addFile(path: saver().save(S: s), /withLOption=/*true);
418	else if (std::optional<std::string> s = findFromSearchPaths(path: specifier))
419	ctx.driver.addFile(path: saver().save(S: s), /withLOption=/*true);
420	else if (fs::exists(Path: specifier))
421	ctx.driver.addFile(path: specifier, /withLOption=/false);
422	else
423	error(msg: toString(f) +
424	": unable to find library from dependent library specifier: " +
425	specifier);
426	}
427
428	// Record the membership of a section group so that in the garbage collection
429	// pass, section group members are kept or discarded as a unit.
430	template <class ELFT>
431	static void handleSectionGroup(ArrayRef<InputSectionBase *> sections,
432	ArrayRef<typename ELFT::Word> entries) {
433	bool hasAlloc = false;
434	for (uint32_t index : entries.slice(`1`)) {
435	if (index >= sections.size())
436	return;
437	if (InputSectionBase *s = sections [index])
438	if (s != &InputSection::discarded && s->flags & SHF_ALLOC)
439	hasAlloc = true;
440	}
441
442	// If any member has the SHF_ALLOC flag, the whole group is subject to garbage
443	// collection. See the comment in markLive(). This rule retains .debug_types
444	// and .rela.debug_types.
445	if (!hasAlloc)
446	return;
447
448	// Connect the members in a circular doubly-linked list via
449	// nextInSectionGroup.
450	InputSectionBase *head;
451	InputSectionBase prev = nullptr*;
452	for (uint32_t index : entries.slice(`1`)) {
453	InputSectionBase *s = sections [index];
454	if (!s \|\| s == &InputSection::discarded)
455	continue;
456	if (prev)
457	prev->nextInSectionGroup = s;
458	else
459	head = s;
460	prev = s;
461	}
462	if (prev)
463	prev->nextInSectionGroup = head;
464	}
465
466	template <class ELFT> DWARFCache *ObjFile<ELFT>::getDwarf() {
467	llvm::call_once(initDwarf, [this]() {
468	dwarf = std::make_unique<DWARFCache>(std::make_unique<DWARFContext>(
469	std::make_unique<LLDDwarfObj<ELFT>>(this), "",
470	[&](Error err) { warn(getName() + ": " + toString(E: std::move(err))); },
471	[&](Error warning) {
472	warn(getName() + ": " + toString(E: std::move(warning)));
473	}));
474	});
475
476	return dwarf.get();
477	}
478
479	// Returns the pair of file name and line number describing location of data
480	// object (variable, array, etc) definition.
481	template <class ELFT>
482	std::optional<std::pair<std::string, unsigned>>
483	ObjFile<ELFT>::getVariableLoc(StringRef name) {
484	return getDwarf()->getVariableLoc(name);
485	}
486
487	// Returns source line information for a given offset
488	// using DWARF debug info.
489	template <class ELFT>
490	std::optional<DILineInfo>
491	ObjFile<ELFT>::getDILineInfo(const InputSectionBase *s, uint64_t offset) {
492	// Detect SectionIndex for specified section.
493	uint64_t sectionIndex = object::SectionedAddress::UndefSection;
494	ArrayRef<InputSectionBase *> sections = s->file->getSections();
495	for (uint64_t curIndex = `0`; curIndex < sections.size(); ++curIndex) {
496	if (s == sections [curIndex]) {
497	sectionIndex = curIndex;
498	break;
499	}
500	}
501
502	return getDwarf()->getDILineInfo(offset, sectionIndex);
503	}
504
505	ELFFileBase::ELFFileBase(Kind k, ELFKind ekind, MemoryBufferRef mb)
506	: InputFile (k, mb) {
507	this->ekind = ekind;
508	}
509
510	template <typename Elf_Shdr>
511	static const Elf_Shdr *findSection(ArrayRef<Elf_Shdr> sections, uint32_t type) {
512	for (const Elf_Shdr &sec : sections)
513	if (sec.sh_type == type)
514	return &sec;
515	return nullptr;
516	}
517
518	void ELFFileBase::init() {
519	switch (ekind) {
520	case ELF32LEKind:
521	init<ELF32LE>(k: fileKind);
522	break;
523	case ELF32BEKind:
524	init<ELF32BE>(k: fileKind);
525	break;
526	case ELF64LEKind:
527	init<ELF64LE>(k: fileKind);
528	break;
529	case ELF64BEKind:
530	init<ELF64BE>(k: fileKind);
531	break;
532	default:
533	llvm_unreachable("getELFKind");
534	}
535	}
536
537	template <class ELFT> void ELFFileBase::init(InputFile::Kind k) {
538	using Elf_Shdr = typename ELFT::Shdr;
539	using Elf_Sym = typename ELFT::Sym;
540
541	// Initialize trivial attributes.
542	const ELFFile<ELFT> &obj = getObj<ELFT>();
543	emachine = obj.getHeader().e_machine;
544	osabi = obj.getHeader().e_ident[llvm::ELF::EI_OSABI];
545	abiVersion = obj.getHeader().e_ident[llvm::ELF::EI_ABIVERSION];
546
547	ArrayRef<Elf_Shdr> sections = CHECK(obj.sections(), this);
548	elfShdrs = sections.data();
549	numELFShdrs = sections.size();
550
551	// Find a symbol table.
552	const Elf_Shdr *symtabSec =
553	findSection(sections, k == SharedKind ? SHT_DYNSYM : SHT_SYMTAB);
554
555	if (!symtabSec)
556	return;
557
558	// Initialize members corresponding to a symbol table.
559	firstGlobal = symtabSec->sh_info;
560
561	ArrayRef<Elf_Sym> eSyms = CHECK(obj.symbols(symtabSec), this);
562	if (firstGlobal == `0` \|\| firstGlobal > eSyms.size())
563	fatal(msg: toString(f: this) + ": invalid sh_info in symbol table");
564
565	elfSyms = reinterpret_cast<const void *>(eSyms.data());
566	numELFSyms = uint32_t(eSyms.size());
567	stringTable = CHECK(obj.getStringTableForSymtab(symtabSec, sections), this*);
568	}
569
570	template <class ELFT>
571	uint32_t ObjFile<ELFT>::getSectionIndex(const Elf_Sym &sym) const {
572	return CHECK(
573	this->getObj().getSectionIndex(sym, getELFSyms<ELFT>(), shndxTable),
574	this);
575	}
576
577	template <class ELFT> void ObjFile<ELFT>::parse(bool ignoreComdats) {
578	object::ELFFile<ELFT> obj = this->getObj();
579	// Read a section table. justSymbols is usually false.
580	if (this->justSymbols) {
581	initializeJustSymbols();
582	initializeSymbols(obj);
583	return;
584	}
585
586	// Handle dependent libraries and selection of section groups as these are not
587	// done in parallel.
588	ArrayRef<Elf_Shdr> objSections = getELFShdrs<ELFT>();
589	StringRef shstrtab = CHECK(obj.getSectionStringTable(objSections), this);
590	uint64_t size = objSections.size();
591	sections.resize(size);
592	for (size_t i = `0`; i != size; ++i) {
593	const Elf_Shdr &sec = objSections[i];
594	if (sec.sh_type == SHT_LLVM_DEPENDENT_LIBRARIES && !config ->relocatable) {
595	StringRef name = check(obj.getSectionName(sec, shstrtab));
596	ArrayRef<char> data = CHECK(
597	this->getObj().template getSectionContentsAsArray<char>(sec), this);
598	if (!data.empty() && data.back() != `'\0'`) {
599	error(
600	toString(this) +
601	": corrupted dependent libraries section (unterminated string): " +
602	name);
603	} else {
604	for (const char d = data.begin(), e = data.end(); d < e;) {
605	StringRef s(d);
606	addDependentLibrary(s, this);
607	d += s.size() + `1`;
608	}
609	}
610	this->sections[i] = &InputSection::discarded;
611	continue;
612	}
613
614	if (sec.sh_type == SHT_ARM_ATTRIBUTES && config ->emachine == EM_ARM) {
615	ARMAttributeParser attributes;
616	ArrayRef<uint8_t> contents =
617	check(this->getObj().getSectionContents(sec));
618	StringRef name = check(obj.getSectionName(sec, shstrtab));
619	this->sections[i] = &InputSection::discarded;
620	if (Error e = attributes.parse(section: contents, endian: ekind == ELF32LEKind
621	? llvm::endianness::little
622	: llvm::endianness::big)) {
623	InputSection isec(*this, sec, name);
624	warn(msg: toString(&isec) + ": " + llvm::toString(E: std::move(e)));
625	} else {
626	updateSupportedARMFeatures(attributes);
627	updateARMVFPArgs(attributes, this);
628
629	// FIXME: Retain the first attribute section we see. The eglibc ARM
630	// dynamic loaders require the presence of an attribute section for
631	// dlopen to work. In a full implementation we would merge all attribute
632	// sections.
633	if (in.attributes == nullptr) {
634	in.attributes = std::make_unique<InputSection>(*this, sec, name);
635	this->sections[i] = in.attributes.get();
636	}
637	}
638	}
639
640	// Producing a static binary with MTE globals is not currently supported,
641	// remove all SHT_AARCH64_MEMTAG_GLOBALS_STATIC sections as they're unused
642	// medatada, and we don't want them to end up in the output file for static
643	// executables.
644	if (sec.sh_type == SHT_AARCH64_MEMTAG_GLOBALS_STATIC &&
645	!canHaveMemtagGlobals()) {
646	this->sections[i] = &InputSection::discarded;
647	continue;
648	}
649
650	if (sec.sh_type != SHT_GROUP)
651	continue;
652	StringRef signature = getShtGroupSignature(sections: objSections, sec);
653	ArrayRef<Elf_Word> entries =
654	CHECK(obj.template getSectionContentsAsArray<Elf_Word>(sec), this);
655	if (entries.empty())
656	fatal(toString(this) + ": empty SHT_GROUP");
657
658	Elf_Word flag = entries[`0`];
659	if (flag && flag != GRP_COMDAT)
660	fatal(toString(this) + ": unsupported SHT_GROUP format");
661
662	bool keepGroup =
663	(flag & GRP_COMDAT) == `0` \|\| ignoreComdats \|\|
664	symtab.comdatGroups.try_emplace(CachedHashStringRef (signature), this)
665	.second;
666	if (keepGroup) {
667	if (config ->relocatable)
668	this->sections[i] = createInputSection(
669	idx: i, sec, name: check(obj.getSectionName(sec, shstrtab)));
670	continue;
671	}
672
673	// Otherwise, discard group members.
674	for (uint32_t secIndex : entries.slice(`1`)) {
675	if (secIndex >= size)
676	fatal(toString(this) +
677	": invalid section index in group: " + Twine(secIndex));
678	this->sections[secIndex] = &InputSection::discarded;
679	}
680	}
681
682	// Read a symbol table.
683	initializeSymbols(obj);
684	}
685
686	// Sections with SHT_GROUP and comdat bits define comdat section groups.
687	// They are identified and deduplicated by group name. This function
688	// returns a group name.
689	template <class ELFT>
690	StringRef ObjFile<ELFT>::getShtGroupSignature(ArrayRef<Elf_Shdr> sections,
691	const Elf_Shdr &sec) {
692	typename ELFT::SymRange symbols = this->getELFSyms<ELFT>();
693	if (sec.sh_info >= symbols.size())
694	fatal(toString(this) + ": invalid symbol index");
695	const typename ELFT::Sym &sym = symbols[sec.sh_info];
696	return CHECK(sym.getName(this->stringTable), this);
697	}
698
699	template <class ELFT>
700	bool ObjFile<ELFT>::shouldMerge(const Elf_Shdr &sec, StringRef name) {
701	// On a regular link we don't merge sections if -O0 (default is -O1). This
702	// sometimes makes the linker significantly faster, although the output will
703	// be bigger.
704	//
705	// Doing the same for -r would create a problem as it would combine sections
706	// with different sh_entsize. One option would be to just copy every SHF_MERGE
707	// section as is to the output. While this would produce a valid ELF file with
708	// usable SHF_MERGE sections, tools like (llvm-)?dwarfdump get confused when
709	// they see two .debug_str. We could have separate logic for combining
710	// SHF_MERGE sections based both on their name and sh_entsize, but that seems
711	// to be more trouble than it is worth. Instead, we just use the regular (-O1)
712	// logic for -r.
713	if (config ->optimize == `0` && !config ->relocatable)
714	return false;
715
716	// A mergeable section with size 0 is useless because they don't have
717	// any data to merge. A mergeable string section with size 0 can be
718	// argued as invalid because it doesn't end with a null character.
719	// We'll avoid a mess by handling them as if they were non-mergeable.
720	if (sec.sh_size == `0`)
721	return false;
722
723	// Check for sh_entsize. The ELF spec is not clear about the zero
724	// sh_entsize. It says that "the member [sh_entsize] contains 0 if
725	// the section does not hold a table of fixed-size entries". We know
726	// that Rust 1.13 produces a string mergeable section with a zero
727	// sh_entsize. Here we just accept it rather than being picky about it.
728	uint64_t entSize = sec.sh_entsize;
729	if (entSize == `0`)
730	return false;
731	if (sec.sh_size % entSize)
732	fatal(toString(this) + ":(" + name + "): SHF_MERGE section size (" +
733	Twine(sec.sh_size) + ") must be a multiple of sh_entsize (" +
734	Twine(entSize) + ")");
735
736	if (sec.sh_flags & SHF_WRITE)
737	fatal(toString(this) + ":(" + name +
738	"): writable SHF_MERGE section is not supported");
739
740	return true;
741	}
742
743	// This is for --just-symbols.
744	//
745	// --just-symbols is a very minor feature that allows you to link your
746	// output against other existing program, so that if you load both your
747	// program and the other program into memory, your output can refer the
748	// other program's symbols.
749	//
750	// When the option is given, we link "just symbols". The section table is
751	// initialized with null pointers.
752	template <class ELFT> void ObjFile<ELFT>::initializeJustSymbols() {
753	sections.resize(numELFShdrs);
754	}
755
756	static bool isKnownSpecificSectionType(uint32_t t, uint32_t flags) {
757	if (SHT_LOUSER <= t && t <= SHT_HIUSER && !(flags & SHF_ALLOC))
758	return true;
759	if (SHT_LOOS <= t && t <= SHT_HIOS && !(flags & SHF_OS_NONCONFORMING))
760	return true;
761	// Allow all processor-specific types. This is different from GNU ld.
762	return SHT_LOPROC <= t && t <= SHT_HIPROC;
763	}
764
765	template <class ELFT>
766	void ObjFile<ELFT>::initializeSections(bool ignoreComdats,
767	const llvm::object::ELFFile<ELFT> &obj) {
768	ArrayRef<Elf_Shdr> objSections = getELFShdrs<ELFT>();
769	StringRef shstrtab = CHECK(obj.getSectionStringTable(objSections), this);
770	uint64_t size = objSections.size();
771	SmallVector<ArrayRef<Elf_Word>, `0`> selectedGroups;
772	for (size_t i = `0`; i != size; ++i) {
773	if (this->sections[i] == &InputSection::discarded)
774	continue;
775	const Elf_Shdr &sec = objSections[i];
776	const uint32_t type = sec.sh_type;
777
778	// SHF_EXCLUDE'ed sections are discarded by the linker. However,
779	// if -r is given, we'll let the final link discard such sections.
780	// This is compatible with GNU.
781	if ((sec.sh_flags & SHF_EXCLUDE) && !config ->relocatable) {
782	if (type == SHT_LLVM_CALL_GRAPH_PROFILE)
783	cgProfileSectionIndex = i;
784	if (type == SHT_LLVM_ADDRSIG) {
785	// We ignore the address-significance table if we know that the object
786	// file was created by objcopy or ld -r. This is because these tools
787	// will reorder the symbols in the symbol table, invalidating the data
788	// in the address-significance table, which refers to symbols by index.
789	if (sec.sh_link != `0`)
790	this->addrsigSec = &sec;
791	else if (config ->icf == ICFLevel::Safe)
792	warn(toString(this) +
793	": --icf=safe conservatively ignores "
794	"SHT_LLVM_ADDRSIG [index " +
795	Twine(i) +
796	"] with sh_link=0 "
797	"(likely created using objcopy or ld -r)");
798	}
799	this->sections[i] = &InputSection::discarded;
800	continue;
801	}
802
803	switch (type) {
804	case SHT_GROUP: {
805	if (!config ->relocatable)
806	sections[i] = &InputSection::discarded;
807	StringRef signature =
808	cantFail(this->getELFSyms<ELFT>()[sec.sh_info].getName(stringTable));
809	ArrayRef<Elf_Word> entries =
810	cantFail(obj.template getSectionContentsAsArray<Elf_Word>(sec));
811	if ((entries[`0`] & GRP_COMDAT) == `0` \|\| ignoreComdats \|\|
812	symtab.comdatGroups.find(Val: CachedHashStringRef (signature))->second ==
813	this)
814	selectedGroups.push_back(entries);
815	break;
816	}
817	case SHT_SYMTAB_SHNDX:
818	shndxTable = CHECK(obj.getSHNDXTable(sec, objSections), this);
819	break;
820	case SHT_SYMTAB:
821	case SHT_STRTAB:
822	case SHT_REL:
823	case SHT_RELA:
824	case SHT_NULL:
825	break;
826	case SHT_PROGBITS:
827	case SHT_NOTE:
828	case SHT_NOBITS:
829	case SHT_INIT_ARRAY:
830	case SHT_FINI_ARRAY:
831	case SHT_PREINIT_ARRAY:
832	this->sections[i] =
833	createInputSection(idx: i, sec, name: check(obj.getSectionName(sec, shstrtab)));
834	break;
835	default:
836	this->sections[i] =
837	createInputSection(idx: i, sec, name: check(obj.getSectionName(sec, shstrtab)));
838	if (type == SHT_LLVM_SYMPART)
839	ctx.hasSympart.store(i: true, m: std::memory_order_relaxed);
840	else if (config ->rejectMismatch &&
841	!isKnownSpecificSectionType(type, sec.sh_flags))
842	errorOrWarn(toString(this->sections[i]) + ": unknown section type 0x" +
843	Twine::utohexstr(Val: type));
844	break;
845	}
846	}
847
848	// We have a second loop. It is used to:
849	// 1) handle SHF_LINK_ORDER sections.
850	// 2) create relocation sections. In some cases the section header index of a
851	// relocation section may be smaller than that of the relocated section. In
852	// such cases, the relocation section would attempt to reference a target
853	// section that has not yet been created. For simplicity, delay creation of
854	// relocation sections until now.
855	for (size_t i = `0`; i != size; ++i) {
856	if (this->sections[i] == &InputSection::discarded)
857	continue;
858	const Elf_Shdr &sec = objSections[i];
859
860	if (isStaticRelSecType(sec.sh_type)) {
861	// Find a relocation target section and associate this section with that.
862	// Target may have been discarded if it is in a different section group
863	// and the group is discarded, even though it's a violation of the spec.
864	// We handle that situation gracefully by discarding dangling relocation
865	// sections.
866	const uint32_t info = sec.sh_info;
867	InputSectionBase *s = getRelocTarget(idx: i, sec, info);
868	if (!s)
869	continue;
870
871	// ELF spec allows mergeable sections with relocations, but they are rare,
872	// and it is in practice hard to merge such sections by contents, because
873	// applying relocations at end of linking changes section contents. So, we
874	// simply handle such sections as non-mergeable ones. Degrading like this
875	// is acceptable because section merging is optional.
876	if (auto *ms = dyn_cast<MergeInputSection>(Val: s)) {
877	s = makeThreadLocal<InputSection>(
878	args&: ms->file, args&: ms->flags, args&: ms->type, args&: ms->addralign,
879	args: ms->contentMaybeDecompress(), args&: ms->name);
880	sections[info] = s;
881	}
882
883	if (s->relSecIdx != `0`)
884	error(
885	msg: toString(s) +
886	": multiple relocation sections to one section are not supported");
887	s->relSecIdx = i;
888
889	// Relocation sections are usually removed from the output, so return
890	// `nullptr` for the normal case. However, if -r or --emit-relocs is
891	// specified, we need to copy them to the output. (Some post link analysis
892	// tools specify --emit-relocs to obtain the information.)
893	if (config ->copyRelocs) {
894	auto *isec = makeThreadLocal<InputSection>(
895	*this, sec, check(obj.getSectionName(sec, shstrtab)));
896	// If the relocated section is discarded (due to /DISCARD/ or
897	// --gc-sections), the relocation section should be discarded as well.
898	s->dependentSections.push_back(NewVal: isec);
899	sections[i] = isec;
900	}
901	continue;
902	}
903
904	// A SHF_LINK_ORDER section with sh_link=0 is handled as if it did not have
905	// the flag.
906	if (!sec.sh_link \|\| !(sec.sh_flags & SHF_LINK_ORDER))
907	continue;
908
909	InputSectionBase linkSec = nullptr*;
910	if (sec.sh_link < size)
911	linkSec = this->sections[sec.sh_link];
912	if (!linkSec)
913	fatal(toString(this) + ": invalid sh_link index: " + Twine(sec.sh_link));
914
915	// A SHF_LINK_ORDER section is discarded if its linked-to section is
916	// discarded.
917	InputSection isec = cast<InputSection>(this*->sections[i]);
918	linkSec->dependentSections.push_back(NewVal: isec);
919	if (!isa<InputSection>(Val: linkSec))
920	error(msg: "a section " + isec->name +
921	" with SHF_LINK_ORDER should not refer a non-regular section: " +
922	toString(linkSec));
923	}
924
925	for (ArrayRef<Elf_Word> entries : selectedGroups)
926	handleSectionGroup<ELFT>(this->sections, entries);
927	}
928
929	// Read the following info from the .note.gnu.property section and write it to
930	// the corresponding fields in `ObjFile`:
931	// - Feature flags (32 bits) representing x86 or AArch64 features for
932	// hardware-assisted call flow control;
933	// - AArch64 PAuth ABI core info (16 bytes).
934	template <class ELFT>
935	void readGnuProperty(const InputSection &sec, ObjFile<ELFT> &f) {
936	using Elf_Nhdr = typename ELFT::Nhdr;
937	using Elf_Note = typename ELFT::Note;
938
939	ArrayRef<uint8_t> data = sec.content();
940	auto reportFatal = [&](const uint8_t place, const* Twine &msg) {
941	fatal(msg: toString(f: sec.file) + ":(" + sec.name + "+0x" +
942	Twine::utohexstr(Val: place - sec.content().data()) + "): " + msg);
943	};
944	while (!data.empty()) {
945	// Read one NOTE record.
946	auto nhdr = reinterpret_cast<const* Elf_Nhdr *>(data.data());
947	if (data.size() < sizeof(Elf_Nhdr) \|\|
948	data.size() < nhdr->getSize(sec.addralign))
949	reportFatal(data.data(), "data is too short");
950
951	Elf_Note note(*nhdr);
952	if (nhdr->n_type != NT_GNU_PROPERTY_TYPE_0 \|\| note.getName() != "GNU") {
953	data = data.slice(nhdr->getSize(sec.addralign));
954	continue;
955	}
956
957	uint32_t featureAndType = config ->emachine == EM_AARCH64
958	? GNU_PROPERTY_AARCH64_FEATURE_1_AND
959	: GNU_PROPERTY_X86_FEATURE_1_AND;
960
961	// Read a body of a NOTE record, which consists of type-length-value fields.
962	ArrayRef<uint8_t> desc = note.getDesc(sec.addralign);
963	while (!desc.empty()) {
964	const uint8_t *place = desc.data();
965	if (desc.size() < `8`)
966	reportFatal(place, "program property is too short");
967	uint32_t type = read32<ELFT::Endianness>(desc.data());
968	uint32_t size = read32<ELFT::Endianness>(desc.data() + `4`);
969	desc = desc.slice(N: `8`);
970	if (desc.size() < size)
971	reportFatal(place, "program property is too short");
972
973	if (type == featureAndType) {
974	// We found a FEATURE_1_AND field. There may be more than one of these
975	// in a .note.gnu.property section, for a relocatable object we
976	// accumulate the bits set.
977	if (size < `4`)
978	reportFatal(place, "FEATURE_1_AND entry is too short");
979	f.andFeatures \|= read32<ELFT::Endianness>(desc.data());
980	} else if (config ->emachine == EM_AARCH64 &&
981	type == GNU_PROPERTY_AARCH64_FEATURE_PAUTH) {
982	if (!f.aarch64PauthAbiCoreInfo.empty()) {
983	reportFatal(data.data(),
984	"multiple GNU_PROPERTY_AARCH64_FEATURE_PAUTH entries are "
985	"not supported");
986	} else if (size != `16`) {
987	reportFatal(data.data(), "GNU_PROPERTY_AARCH64_FEATURE_PAUTH entry "
988	"is invalid: expected 16 bytes, but got " +
989	Twine (size));
990	}
991	f.aarch64PauthAbiCoreInfo = desc;
992	}
993
994	// Padding is present in the note descriptor, if necessary.
995	desc = desc.slice(alignTo<(ELFT::Is64Bits ? `8` : `4`)>(size));
996	}
997
998	// Go to next NOTE record to look for more FEATURE_1_AND descriptions.
999	data = data.slice(nhdr->getSize(sec.addralign));
1000	}
1001	}
1002
1003	template <class ELFT>
1004	InputSectionBase *ObjFile<ELFT>::getRelocTarget(uint32_t idx,
1005	const Elf_Shdr &sec,
1006	uint32_t info) {
1007	if (info < this->sections.size()) {
1008	InputSectionBase target = this*->sections[info];
1009
1010	// Strictly speaking, a relocation section must be included in the
1011	// group of the section it relocates. However, LLVM 3.3 and earlier
1012	// would fail to do so, so we gracefully handle that case.
1013	if (target == &InputSection::discarded)
1014	return nullptr;
1015
1016	if (target != nullptr)
1017	return target;
1018	}
1019
1020	error(toString(this) + Twine(": relocation section (index ") + Twine(idx) +
1021	") has invalid sh_info (" + Twine(info) + ")");
1022	return nullptr;
1023	}
1024
1025	// The function may be called concurrently for different input files. For
1026	// allocation, prefer makeThreadLocal which does not require holding a lock.
1027	template <class ELFT>
1028	InputSectionBase *ObjFile<ELFT>::createInputSection(uint32_t idx,
1029	const Elf_Shdr &sec,
1030	StringRef name) {
1031	if (name.starts_with(Prefix: ".n")) {
1032	// The GNU linker uses .note.GNU-stack section as a marker indicating
1033	// that the code in the object file does not expect that the stack is
1034	// executable (in terms of NX bit). If all input files have the marker,
1035	// the GNU linker adds a PT_GNU_STACK segment to tells the loader to
1036	// make the stack non-executable. Most object files have this section as
1037	// of 2017.
1038	//
1039	// But making the stack non-executable is a norm today for security
1040	// reasons. Failure to do so may result in a serious security issue.
1041	// Therefore, we make LLD always add PT_GNU_STACK unless it is
1042	// explicitly told to do otherwise (by -z execstack). Because the stack
1043	// executable-ness is controlled solely by command line options,
1044	// .note.GNU-stack sections are simply ignored.
1045	if (name == ".note.GNU-stack")
1046	return &InputSection::discarded;
1047
1048	// Object files that use processor features such as Intel Control-Flow
1049	// Enforcement (CET) or AArch64 Branch Target Identification BTI, use a
1050	// .note.gnu.property section containing a bitfield of feature bits like the
1051	// GNU_PROPERTY_X86_FEATURE_1_IBT flag. Read a bitmap containing the flag.
1052	//
1053	// Since we merge bitmaps from multiple object files to create a new
1054	// .note.gnu.property containing a single AND'ed bitmap, we discard an input
1055	// file's .note.gnu.property section.
1056	if (name == ".note.gnu.property") {
1057	readGnuProperty<ELFT>(InputSection(*this, sec, name), *this);
1058	return &InputSection::discarded;
1059	}
1060
1061	// Split stacks is a feature to support a discontiguous stack,
1062	// commonly used in the programming language Go. For the details,
1063	// see https://gcc.gnu.org/wiki/SplitStacks. An object file compiled
1064	// for split stack will include a .note.GNU-split-stack section.
1065	if (name == ".note.GNU-split-stack") {
1066	if (config ->relocatable) {
1067	error(
1068	msg: "cannot mix split-stack and non-split-stack in a relocatable link");
1069	return &InputSection::discarded;
1070	}
1071	this->splitStack = true;
1072	return &InputSection::discarded;
1073	}
1074
1075	// An object file compiled for split stack, but where some of the
1076	// functions were compiled with the no_split_stack_attribute will
1077	// include a .note.GNU-no-split-stack section.
1078	if (name == ".note.GNU-no-split-stack") {
1079	this->someNoSplitStack = true;
1080	return &InputSection::discarded;
1081	}
1082
1083	// Strip existing .note.gnu.build-id sections so that the output won't have
1084	// more than one build-id. This is not usually a problem because input
1085	// object files normally don't have .build-id sections, but you can create
1086	// such files by "ld.{bfd,gold,lld} -r --build-id", and we want to guard
1087	// against it.
1088	if (name == ".note.gnu.build-id")
1089	return &InputSection::discarded;
1090	}
1091
1092	// The linker merges EH (exception handling) frames and creates a
1093	// .eh_frame_hdr section for runtime. So we handle them with a special
1094	// class. For relocatable outputs, they are just passed through.
1095	if (name == ".eh_frame" && !config ->relocatable)
1096	return makeThreadLocal<EhInputSection>(*this, sec, name);
1097
1098	if ((sec.sh_flags & SHF_MERGE) && shouldMerge(sec, name))
1099	return makeThreadLocal<MergeInputSection>(*this, sec, name);
1100	return makeThreadLocal<InputSection>(*this, sec, name);
1101	}
1102
1103	// Initialize symbols. symbols is a parallel array to the corresponding ELF
1104	// symbol table.
1105	template <class ELFT>
1106	void ObjFile<ELFT>::initializeSymbols(const object::ELFFile<ELFT> &obj) {
1107	ArrayRef<Elf_Sym> eSyms = this->getELFSyms<ELFT>();
1108	if (numSymbols == `0`) {
1109	numSymbols = eSyms.size();
1110	symbols = std::make_unique<Symbol *[]>(numSymbols);
1111	}
1112
1113	// Some entries have been filled by LazyObjFile.
1114	for (size_t i = firstGlobal, end = eSyms.size(); i != end; ++i)
1115	if (!symbols[i])
1116	symbols[i] = symtab.insert(CHECK(eSyms[i].getName(stringTable), this));
1117
1118	// Perform symbol resolution on non-local symbols.
1119	SmallVector<unsigned, `32`> undefineds;
1120	for (size_t i = firstGlobal, end = eSyms.size(); i != end; ++i) {
1121	const Elf_Sym &eSym = eSyms[i];
1122	uint32_t secIdx = eSym.st_shndx;
1123	if (secIdx == SHN_UNDEF) {
1124	undefineds.push_back(Elt: i);
1125	continue;
1126	}
1127
1128	uint8_t binding = eSym.getBinding();
1129	uint8_t stOther = eSym.st_other;
1130	uint8_t type = eSym.getType();
1131	uint64_t value = eSym.st_value;
1132	uint64_t size = eSym.st_size;
1133
1134	Symbol *sym = symbols[i];
1135	sym->isUsedInRegularObj = true;
1136	if (LLVM_UNLIKELY(eSym.st_shndx == SHN_COMMON)) {
1137	if (value == `0` \|\| value >= UINT32_MAX)
1138	fatal(toString(this) + ": common symbol '" + sym->getName() +
1139	"' has invalid alignment: " + Twine(value));
1140	hasCommonSyms = true;
1141	sym->resolve(
1142	other: CommonSymbol{this, StringRef(), binding, stOther, type, value, size});
1143	continue;
1144	}
1145
1146	// Handle global defined symbols. Defined::section will be set in postParse.
1147	sym->resolve(other: Defined{this, StringRef(), binding, stOther, type, value, size,
1148	nullptr});
1149	}
1150
1151	// Undefined symbols (excluding those defined relative to non-prevailing
1152	// sections) can trigger recursive extract. Process defined symbols first so
1153	// that the relative order between a defined symbol and an undefined symbol
1154	// does not change the symbol resolution behavior. In addition, a set of
1155	// interconnected symbols will all be resolved to the same file, instead of
1156	// being resolved to different files.
1157	for (unsigned i : undefineds) {
1158	const Elf_Sym &eSym = eSyms[i];
1159	Symbol *sym = symbols[i];
1160	sym->resolve(other: Undefined{this, StringRef(), eSym.getBinding(), eSym.st_other,
1161	eSym.getType()});
1162	sym->isUsedInRegularObj = true;
1163	sym->referenced = true;
1164	}
1165	}
1166
1167	template <class ELFT>
1168	void ObjFile<ELFT>::initSectionsAndLocalSyms(bool ignoreComdats) {
1169	if (!justSymbols)
1170	initializeSections(ignoreComdats, obj: getObj());
1171
1172	if (!firstGlobal)
1173	return;
1174	SymbolUnion *locals = makeThreadLocalN<SymbolUnion>(firstGlobal);
1175	memset(locals, `0`, sizeof(SymbolUnion) * firstGlobal);
1176
1177	ArrayRef<Elf_Sym> eSyms = this->getELFSyms<ELFT>();
1178	for (size_t i = `0`, end = firstGlobal; i != end; ++i) {
1179	const Elf_Sym &eSym = eSyms[i];
1180	uint32_t secIdx = eSym.st_shndx;
1181	if (LLVM_UNLIKELY(secIdx == SHN_XINDEX))
1182	secIdx = check(getExtendedSymbolTableIndex<ELFT>(eSym, i, shndxTable));
1183	else if (secIdx >= SHN_LORESERVE)
1184	secIdx = `0`;
1185	if (LLVM_UNLIKELY(secIdx >= sections.size()))
1186	fatal(toString(this) + ": invalid section index: " + Twine(secIdx));
1187	if (LLVM_UNLIKELY(eSym.getBinding() != STB_LOCAL))
1188	error(toString(this) + ": non-local symbol (" + Twine(i) +
1189	") found at index < .symtab's sh_info (" + Twine(end) + ")");
1190
1191	InputSectionBase *sec = sections[secIdx];
1192	uint8_t type = eSym.getType();
1193	if (type == STT_FILE)
1194	sourceFile = CHECK(eSym.getName(stringTable), this);
1195	if (LLVM_UNLIKELY(stringTable.size() <= eSym.st_name))
1196	fatal(toString(this) + ": invalid symbol name offset");
1197	StringRef name(stringTable.data() + eSym.st_name);
1198
1199	symbols[i] = reinterpret_cast<Symbol *>(locals + i);
1200	if (eSym.st_shndx == SHN_UNDEF \|\| sec == &InputSection::discarded)
1201	new (symbols[i]) Undefined(this, name, STB_LOCAL, eSym.st_other, type,
1202	/discardedSecIdx=/secIdx);
1203	else
1204	new (symbols[i]) Defined(this, name, STB_LOCAL, eSym.st_other, type,
1205	eSym.st_value, eSym.st_size, sec);
1206	symbols[i]->partition = `1`;
1207	symbols[i]->isUsedInRegularObj = true;
1208	}
1209	}
1210
1211	// Called after all ObjFile::parse is called for all ObjFiles. This checks
1212	// duplicate symbols and may do symbol property merge in the future.
1213	template <class ELFT> void ObjFile<ELFT>::postParse() {
1214	static std::mutex mu;
1215	ArrayRef<Elf_Sym> eSyms = this->getELFSyms<ELFT>();
1216	for (size_t i = firstGlobal, end = eSyms.size(); i != end; ++i) {
1217	const Elf_Sym &eSym = eSyms[i];
1218	Symbol &sym = *symbols[i];
1219	uint32_t secIdx = eSym.st_shndx;
1220	uint8_t binding = eSym.getBinding();
1221	if (LLVM_UNLIKELY(binding != STB_GLOBAL && binding != STB_WEAK &&
1222	binding != STB_GNU_UNIQUE))
1223	errorOrWarn(toString(this) + ": symbol (" + Twine(i) +
1224	") has invalid binding: " + Twine((int)binding));
1225
1226	// st_value of STT_TLS represents the assigned offset, not the actual
1227	// address which is used by STT_FUNC and STT_OBJECT. STT_TLS symbols can
1228	// only be referenced by special TLS relocations. It is usually an error if
1229	// a STT_TLS symbol is replaced by a non-STT_TLS symbol, vice versa.
1230	if (LLVM_UNLIKELY(sym.isTls()) && eSym.getType() != STT_TLS &&
1231	eSym.getType() != STT_NOTYPE)
1232	errorOrWarn("TLS attribute mismatch: " + toString(sym) + "\n>>> in " +
1233	toString(f: sym.file) + "\n>>> in " + toString(this));
1234
1235	// Handle non-COMMON defined symbol below. !sym.file allows a symbol
1236	// assignment to redefine a symbol without an error.
1237	if (!sym.file \|\| !sym.isDefined() \|\| secIdx == SHN_UNDEF \|\|
1238	secIdx == SHN_COMMON)
1239	continue;
1240
1241	if (LLVM_UNLIKELY(secIdx == SHN_XINDEX))
1242	secIdx = check(getExtendedSymbolTableIndex<ELFT>(eSym, i, shndxTable));
1243	else if (secIdx >= SHN_LORESERVE)
1244	secIdx = `0`;
1245	if (LLVM_UNLIKELY(secIdx >= sections.size()))
1246	fatal(toString(this) + ": invalid section index: " + Twine(secIdx));
1247	InputSectionBase *sec = sections[secIdx];
1248	if (sec == &InputSection::discarded) {
1249	if (sym.traced) {
1250	printTraceSymbol(sym: Undefined{this, sym.getName(), sym.binding,
1251	sym.stOther, sym.type, secIdx},
1252	name: sym.getName());
1253	}
1254	if (sym.file == this) {
1255	std::lock_guard<std::mutex> lock(mu);
1256	ctx.nonPrevailingSyms.emplace_back(Args: &sym, Args&: secIdx);
1257	}
1258	continue;
1259	}
1260
1261	if (sym.file == this) {
1262	cast<Defined>(Val&: sym).section = sec;
1263	continue;
1264	}
1265
1266	if (sym.binding == STB_WEAK \|\| binding == STB_WEAK)
1267	continue;
1268	std::lock_guard<std::mutex> lock(mu);
1269	ctx.duplicates.push_back(Elt: {&sym, this, sec, eSym.st_value});
1270	}
1271	}
1272
1273	// The handling of tentative definitions (COMMON symbols) in archives is murky.
1274	// A tentative definition will be promoted to a global definition if there are
1275	// no non-tentative definitions to dominate it. When we hold a tentative
1276	// definition to a symbol and are inspecting archive members for inclusion
1277	// there are 2 ways we can proceed:
1278	//
1279	// 1) Consider the tentative definition a 'real' definition (ie promotion from
1280	// tentative to real definition has already happened) and not inspect
1281	// archive members for Global/Weak definitions to replace the tentative
1282	// definition. An archive member would only be included if it satisfies some
1283	// other undefined symbol. This is the behavior Gold uses.
1284	//
1285	// 2) Consider the tentative definition as still undefined (ie the promotion to
1286	// a real definition happens only after all symbol resolution is done).
1287	// The linker searches archive members for STB_GLOBAL definitions to
1288	// replace the tentative definition with. This is the behavior used by
1289	// GNU ld.
1290	//
1291	// The second behavior is inherited from SysVR4, which based it on the FORTRAN
1292	// COMMON BLOCK model. This behavior is needed for proper initialization in old
1293	// (pre F90) FORTRAN code that is packaged into an archive.
1294	//
1295	// The following functions search archive members for definitions to replace
1296	// tentative definitions (implementing behavior 2).
1297	static bool isBitcodeNonCommonDef(MemoryBufferRef mb, StringRef symName,
1298	StringRef archiveName) {
1299	IRSymtabFile symtabFile = check(e: readIRSymtab(MBRef: mb));
1300	for (const irsymtab::Reader::SymbolRef &sym :
1301	symtabFile.TheReader.symbols()) {
1302	if (sym.isGlobal() && sym.getName() == symName)
1303	return !sym.isUndefined() && !sym.isWeak() && !sym.isCommon();
1304	}
1305	return false;
1306	}
1307
1308	template <class ELFT>
1309	static bool isNonCommonDef(ELFKind ekind, MemoryBufferRef mb, StringRef symName,
1310	StringRef archiveName) {
1311	ObjFile<ELFT> *obj = make<ObjFile<ELFT>>(ekind, mb, archiveName);
1312	obj->init();
1313	StringRef stringtable = obj->getStringTable();
1314
1315	for (auto sym : obj->template getGlobalELFSyms<ELFT>()) {
1316	Expected<StringRef> name = sym.getName(stringtable);
1317	if (name && name.get() == symName)
1318	return sym.isDefined() && sym.getBinding() == STB_GLOBAL &&
1319	!sym.isCommon();
1320	}
1321	return false;
1322	}
1323
1324	static bool isNonCommonDef(MemoryBufferRef mb, StringRef symName,
1325	StringRef archiveName) {
1326	switch (getELFKind(mb, archiveName)) {
1327	case ELF32LEKind:
1328	return isNonCommonDef<ELF32LE>(ekind: ELF32LEKind, mb, symName, archiveName);
1329	case ELF32BEKind:
1330	return isNonCommonDef<ELF32BE>(ekind: ELF32BEKind, mb, symName, archiveName);
1331	case ELF64LEKind:
1332	return isNonCommonDef<ELF64LE>(ekind: ELF64LEKind, mb, symName, archiveName);
1333	case ELF64BEKind:
1334	return isNonCommonDef<ELF64BE>(ekind: ELF64BEKind, mb, symName, archiveName);
1335	default:
1336	llvm_unreachable("getELFKind");
1337	}
1338	}
1339
1340	unsigned SharedFile::vernauxNum;
1341
1342	SharedFile::SharedFile(MemoryBufferRef m, StringRef defaultSoName)
1343	: ELFFileBase (SharedKind, getELFKind(mb: m, archiveName: ""), m), soName (defaultSoName),
1344	isNeeded(!config ->asNeeded) {}
1345
1346	// Parse the version definitions in the object file if present, and return a
1347	// vector whose nth element contains a pointer to the Elf_Verdef for version
1348	// identifier n. Version identifiers that are not definitions map to nullptr.
1349	template <typename ELFT>
1350	static SmallVector<const void *, `0`>
1351	parseVerdefs(const uint8_t base, const* typename ELFT::Shdr *sec) {
1352	if (!sec)
1353	return {};
1354
1355	// Build the Verdefs array by following the chain of Elf_Verdef objects
1356	// from the start of the .gnu.version_d section.
1357	SmallVector<const void *, `0`> verdefs;
1358	const uint8_t *verdef = base + sec->sh_offset;
1359	for (unsigned i = `0`, e = sec->sh_info; i != e; ++i) {
1360	auto curVerdef = reinterpret_cast<const* typename ELFT::Verdef *>(verdef);
1361	verdef += curVerdef->vd_next;
1362	unsigned verdefIndex = curVerdef->vd_ndx;
1363	if (verdefIndex >= verdefs.size())
1364	verdefs.resize(N: verdefIndex + `1`);
1365	verdefs [verdefIndex] = curVerdef;
1366	}
1367	return verdefs;
1368	}
1369
1370	// Parse SHT_GNU_verneed to properly set the name of a versioned undefined
1371	// symbol. We detect fatal issues which would cause vulnerabilities, but do not
1372	// implement sophisticated error checking like in llvm-readobj because the value
1373	// of such diagnostics is low.
1374	template <typename ELFT>
1375	std::vector<uint32_t> SharedFile::parseVerneed(const ELFFile<ELFT> &obj,
1376	const typename ELFT::Shdr *sec) {
1377	if (!sec)
1378	return {};
1379	std::vector<uint32_t> verneeds;
1380	ArrayRef<uint8_t> data = CHECK(obj.getSectionContents(sec), this*);
1381	const uint8_t *verneedBuf = data.begin();
1382	for (unsigned i = `0`; i != sec->sh_info; ++i) {
1383	if (verneedBuf + sizeof(typename ELFT::Verneed) > data.end())
1384	fatal(msg: toString(f: this) + " has an invalid Verneed");
1385	auto vn = reinterpret_cast<const* typename ELFT::Verneed *>(verneedBuf);
1386	const uint8_t *vernauxBuf = verneedBuf + vn->vn_aux;
1387	for (unsigned j = `0`; j != vn->vn_cnt; ++j) {
1388	if (vernauxBuf + sizeof(typename ELFT::Vernaux) > data.end())
1389	fatal(msg: toString(f: this) + " has an invalid Vernaux");
1390	auto aux = reinterpret_cast<const* typename ELFT::Vernaux *>(vernauxBuf);
1391	if (aux->vna_name >= this->stringTable.size())
1392	fatal(msg: toString(f: this) + " has a Vernaux with an invalid vna_name");
1393	uint16_t version = aux->vna_other & VERSYM_VERSION;
1394	if (version >= verneeds.size())
1395	verneeds.resize(new_size: version + `1`);
1396	verneeds [version] = aux->vna_name;
1397	vernauxBuf += aux->vna_next;
1398	}
1399	verneedBuf += vn->vn_next;
1400	}
1401	return verneeds;
1402	}
1403
1404	// We do not usually care about alignments of data in shared object
1405	// files because the loader takes care of it. However, if we promote a
1406	// DSO symbol to point to .bss due to copy relocation, we need to keep
1407	// the original alignment requirements. We infer it in this function.
1408	template <typename ELFT>
1409	static uint64_t getAlignment(ArrayRef<typename ELFT::Shdr> sections,
1410	const typename ELFT::Sym &sym) {
1411	uint64_t ret = UINT64_MAX;
1412	if (sym.st_value)
1413	ret = `1ULL` << llvm::countr_zero(Val: (uint64_t)sym.st_value);
1414	if (`0` < sym.st_shndx && sym.st_shndx < sections.size())
1415	ret = std::min<uint64_t>(ret, sections[sym.st_shndx].sh_addralign);
1416	return (ret > UINT32_MAX) ? `0` : ret;
1417	}
1418
1419	// Fully parse the shared object file.
1420	//
1421	// This function parses symbol versions. If a DSO has version information,
1422	// the file has a ".gnu.version_d" section which contains symbol version
1423	// definitions. Each symbol is associated to one version through a table in
1424	// ".gnu.version" section. That table is a parallel array for the symbol
1425	// table, and each table entry contains an index in ".gnu.version_d".
1426	//
1427	// The special index 0 is reserved for VERF_NDX_LOCAL and 1 is for
1428	// VER_NDX_GLOBAL. There's no table entry for these special versions in
1429	// ".gnu.version_d".
1430	//
1431	// The file format for symbol versioning is perhaps a bit more complicated
1432	// than necessary, but you can easily understand the code if you wrap your
1433	// head around the data structure described above.
1434	template <class ELFT> void SharedFile::parse() {
1435	using Elf_Dyn = typename ELFT::Dyn;
1436	using Elf_Shdr = typename ELFT::Shdr;
1437	using Elf_Sym = typename ELFT::Sym;
1438	using Elf_Verdef = typename ELFT::Verdef;
1439	using Elf_Versym = typename ELFT::Versym;
1440
1441	ArrayRef<Elf_Dyn> dynamicTags;
1442	const ELFFile<ELFT> obj = this->getObj<ELFT>();
1443	ArrayRef<Elf_Shdr> sections = getELFShdrs<ELFT>();
1444
1445	const Elf_Shdr versymSec = nullptr*;
1446	const Elf_Shdr verdefSec = nullptr*;
1447	const Elf_Shdr verneedSec = nullptr*;
1448
1449	// Search for .dynsym, .dynamic, .symtab, .gnu.version and .gnu.version_d.
1450	for (const Elf_Shdr &sec : sections) {
1451	switch (sec.sh_type) {
1452	default:
1453	continue;
1454	case SHT_DYNAMIC:
1455	dynamicTags =
1456	CHECK(obj.template getSectionContentsAsArray<Elf_Dyn>(sec), this);
1457	break;
1458	case SHT_GNU_versym:
1459	versymSec = &sec;
1460	break;
1461	case SHT_GNU_verdef:
1462	verdefSec = &sec;
1463	break;
1464	case SHT_GNU_verneed:
1465	verneedSec = &sec;
1466	break;
1467	}
1468	}
1469
1470	if (versymSec && numELFSyms == `0`) {
1471	error(msg: "SHT_GNU_versym should be associated with symbol table");
1472	return;
1473	}
1474
1475	// Search for a DT_SONAME tag to initialize this->soName.
1476	for (const Elf_Dyn &dyn : dynamicTags) {
1477	if (dyn.d_tag == DT_NEEDED) {
1478	uint64_t val = dyn.getVal();
1479	if (val >= this->stringTable.size())
1480	fatal(msg: toString(f: this) + ": invalid DT_NEEDED entry");
1481	dtNeeded.push_back(Elt: this->stringTable.data() + val);
1482	} else if (dyn.d_tag == DT_SONAME) {
1483	uint64_t val = dyn.getVal();
1484	if (val >= this->stringTable.size())
1485	fatal(msg: toString(f: this) + ": invalid DT_SONAME entry");
1486	soName = this->stringTable.data() + val;
1487	}
1488	}
1489
1490	// DSOs are uniquified not by filename but by soname.
1491	DenseMap<CachedHashStringRef, SharedFile *>::iterator it;
1492	bool wasInserted;
1493	std::tie(args&: it, args&: wasInserted) =
1494	symtab.soNames.try_emplace(Key: CachedHashStringRef (soName), Args: this);
1495
1496	// If a DSO appears more than once on the command line with and without
1497	// --as-needed, --no-as-needed takes precedence over --as-needed because a
1498	// user can add an extra DSO with --no-as-needed to force it to be added to
1499	// the dependency list.
1500	it ->second->isNeeded \|= isNeeded;
1501	if (!wasInserted)
1502	return;
1503
1504	ctx.sharedFiles.push_back(Elt: this);
1505
1506	verdefs = parseVerdefs<ELFT>(obj.base(), verdefSec);
1507	std::vector<uint32_t> verneeds = parseVerneed<ELFT>(obj, verneedSec);
1508
1509	// Parse ".gnu.version" section which is a parallel array for the symbol
1510	// table. If a given file doesn't have a ".gnu.version" section, we use
1511	// VER_NDX_GLOBAL.
1512	size_t size = numELFSyms - firstGlobal;
1513	std::vector<uint16_t> versyms(size, VER_NDX_GLOBAL);
1514	if (versymSec) {
1515	ArrayRef<Elf_Versym> versym =
1516	CHECK(obj.template getSectionContentsAsArray<Elf_Versym>(*versymSec),
1517	this)
1518	.slice(firstGlobal);
1519	for (size_t i = `0`; i < size; ++i)
1520	versyms [i] = versym[i].vs_index;
1521	}
1522
1523	// System libraries can have a lot of symbols with versions. Using a
1524	// fixed buffer for computing the versions name (foo@ver) can save a
1525	// lot of allocations.
1526	SmallString<`0`> versionedNameBuffer;
1527
1528	// Add symbols to the symbol table.
1529	ArrayRef<Elf_Sym> syms = this->getGlobalELFSyms<ELFT>();
1530	for (size_t i = `0`, e = syms.size(); i != e; ++i) {
1531	const Elf_Sym &sym = syms[i];
1532
1533	// ELF spec requires that all local symbols precede weak or global
1534	// symbols in each symbol table, and the index of first non-local symbol
1535	// is stored to sh_info. If a local symbol appears after some non-local
1536	// symbol, that's a violation of the spec.
1537	StringRef name = CHECK(sym.getName(stringTable), this);
1538	if (sym.getBinding() == STB_LOCAL) {
1539	errorOrWarn(msg: toString(f: this) + ": invalid local symbol '" + name +
1540	"' in global part of symbol table");
1541	continue;
1542	}
1543
1544	const uint16_t ver = versyms [i], idx = ver & ~VERSYM_HIDDEN;
1545	if (sym.isUndefined()) {
1546	// For unversioned undefined symbols, VER_NDX_GLOBAL makes more sense but
1547	// as of binutils 2.34, GNU ld produces VER_NDX_LOCAL.
1548	if (ver != VER_NDX_LOCAL && ver != VER_NDX_GLOBAL) {
1549	if (idx >= verneeds.size()) {
1550	error(msg: "corrupt input file: version need index " + Twine(idx) +
1551	" for symbol " + name + " is out of bounds\n>>> defined in " +
1552	toString(f: this));
1553	continue;
1554	}
1555	StringRef verName = stringTable.data() + verneeds [idx];
1556	versionedNameBuffer.clear();
1557	name = saver().save(
1558	S: (name + "@" + verName).toStringRef(Out&: versionedNameBuffer));
1559	}
1560	Symbol *s = symtab.addSymbol(
1561	newSym: Undefined{this, name, sym.getBinding(), sym.st_other, sym.getType()});
1562	s->exportDynamic = true;
1563	if (sym.getBinding() != STB_WEAK &&
1564	config ->unresolvedSymbolsInShlib != UnresolvedPolicy::Ignore)
1565	requiredSymbols.push_back(Elt: s);
1566	continue;
1567	}
1568
1569	if (ver == VER_NDX_LOCAL \|\|
1570	(ver != VER_NDX_GLOBAL && idx >= verdefs.size())) {
1571	// In GNU ld < 2.31 (before 3be08ea4728b56d35e136af4e6fd3086ade17764), the
1572	// MIPS port puts _gp_disp symbol into DSO files and incorrectly assigns
1573	// VER_NDX_LOCAL. Workaround this bug.
1574	if (config ->emachine == EM_MIPS && name == "_gp_disp")
1575	continue;
1576	error(msg: "corrupt input file: version definition index " + Twine(idx) +
1577	" for symbol " + name + " is out of bounds\n>>> defined in " +
1578	toString(f: this));
1579	continue;
1580	}
1581
1582	uint32_t alignment = getAlignment<ELFT>(sections, sym);
1583	if (ver == idx) {
1584	auto *s = symtab.addSymbol(
1585	newSym: SharedSymbol{*this, name, sym.getBinding(), sym.st_other,
1586	sym.getType(), sym.st_value, sym.st_size, alignment});
1587	s->dsoDefined = true;
1588	if (s->file == this)
1589	s->versionId = ver;
1590	}
1591
1592	// Also add the symbol with the versioned name to handle undefined symbols
1593	// with explicit versions.
1594	if (ver == VER_NDX_GLOBAL)
1595	continue;
1596
1597	StringRef verName =
1598	stringTable.data() +
1599	reinterpret_cast<const Elf_Verdef *>(verdefs [idx])->getAux()->vda_name;
1600	versionedNameBuffer.clear();
1601	name = (name + "@" + verName).toStringRef(Out&: versionedNameBuffer);
1602	auto *s = symtab.addSymbol(
1603	newSym: SharedSymbol{*this, saver().save(S: name), sym.getBinding(), sym.st_other,
1604	sym.getType(), sym.st_value, sym.st_size, alignment});
1605	s->dsoDefined = true;
1606	if (s->file == this)
1607	s->versionId = idx;
1608	}
1609	}
1610
1611	static ELFKind getBitcodeELFKind(const Triple &t) {
1612	if (t.isLittleEndian())
1613	return t.isArch64Bit() ? ELF64LEKind : ELF32LEKind;
1614	return t.isArch64Bit() ? ELF64BEKind : ELF32BEKind;
1615	}
1616
1617	static uint16_t getBitcodeMachineKind(StringRef path, const Triple &t) {
1618	switch (t.getArch()) {
1619	case Triple::aarch64:
1620	case Triple::aarch64_be:
1621	return EM_AARCH64;
1622	case Triple::amdgcn:
1623	case Triple::r600:
1624	return EM_AMDGPU;
1625	case Triple::arm:
1626	case Triple::armeb:
1627	case Triple::thumb:
1628	case Triple::thumbeb:
1629	return EM_ARM;
1630	case Triple::avr:
1631	return EM_AVR;
1632	case Triple::hexagon:
1633	return EM_HEXAGON;
1634	case Triple::loongarch32:
1635	case Triple::loongarch64:
1636	return EM_LOONGARCH;
1637	case Triple::mips:
1638	case Triple::mipsel:
1639	case Triple::mips64:
1640	case Triple::mips64el:
1641	return EM_MIPS;
1642	case Triple::msp430:
1643	return EM_MSP430;
1644	case Triple::ppc:
1645	case Triple::ppcle:
1646	return EM_PPC;
1647	case Triple::ppc64:
1648	case Triple::ppc64le:
1649	return EM_PPC64;
1650	case Triple::riscv32:
1651	case Triple::riscv64:
1652	return EM_RISCV;
1653	case Triple::sparcv9:
1654	return EM_SPARCV9;
1655	case Triple::systemz:
1656	return EM_S390;
1657	case Triple::x86:
1658	return t.isOSIAMCU() ? EM_IAMCU : EM_386;
1659	case Triple::x86_64:
1660	return EM_X86_64;
1661	default:
1662	error(msg: path + ": could not infer e_machine from bitcode target triple " +
1663	t.str());
1664	return EM_NONE;
1665	}
1666	}
1667
1668	static uint8_t getOsAbi(const Triple &t) {
1669	switch (t.getOS()) {
1670	case Triple::AMDHSA:
1671	return ELF::ELFOSABI_AMDGPU_HSA;
1672	case Triple::AMDPAL:
1673	return ELF::ELFOSABI_AMDGPU_PAL;
1674	case Triple::Mesa3D:
1675	return ELF::ELFOSABI_AMDGPU_MESA3D;
1676	default:
1677	return ELF::ELFOSABI_NONE;
1678	}
1679	}
1680
1681	BitcodeFile::BitcodeFile(MemoryBufferRef mb, StringRef archiveName,
1682	uint64_t offsetInArchive, bool lazy)
1683	: InputFile (BitcodeKind, mb) {
1684	this->archiveName = archiveName;
1685	this->lazy = lazy;
1686
1687	std::string path = mb.getBufferIdentifier().str();
1688	if (config ->thinLTOIndexOnly)
1689	path = replaceThinLTOSuffix(path: mb.getBufferIdentifier());
1690
1691	// ThinLTO assumes that all MemoryBufferRefs given to it have a unique
1692	// name. If two archives define two members with the same name, this
1693	// causes a collision which result in only one of the objects being taken
1694	// into consideration at LTO time (which very likely causes undefined
1695	// symbols later in the link stage). So we append file offset to make
1696	// filename unique.
1697	StringRef name = archiveName.empty()
1698	? saver().save(S: path)
1699	: saver().save(S: archiveName + "(" + path::filename(path) +
1700	" at " + utostr(X: offsetInArchive) + ")");
1701	MemoryBufferRef mbref(mb.getBuffer(), name);
1702
1703	obj = CHECK(lto::InputFile::create(mbref), this);
1704
1705	Triple t(obj ->getTargetTriple());
1706	ekind = getBitcodeELFKind(t);
1707	emachine = getBitcodeMachineKind(path: mb.getBufferIdentifier(), t);
1708	osabi = getOsAbi(t);
1709	}
1710
1711	static uint8_t mapVisibility(GlobalValue::VisibilityTypes gvVisibility) {
1712	switch (gvVisibility) {
1713	case GlobalValue::DefaultVisibility:
1714	return STV_DEFAULT;
1715	case GlobalValue::HiddenVisibility:
1716	return STV_HIDDEN;
1717	case GlobalValue::ProtectedVisibility:
1718	return STV_PROTECTED;
1719	}
1720	llvm_unreachable("unknown visibility");
1721	}
1722
1723	static void
1724	createBitcodeSymbol(Symbol &sym, const* std::vector<bool> &keptComdats,
1725	const lto::InputFile::Symbol &objSym, BitcodeFile &f) {
1726	uint8_t binding = objSym.isWeak() ? STB_WEAK : STB_GLOBAL;
1727	uint8_t type = objSym.isTLS() ? STT_TLS : STT_NOTYPE;
1728	uint8_t visibility = mapVisibility(gvVisibility: objSym.getVisibility());
1729
1730	if (!sym)
1731	sym = symtab.insert(name: saver().save(S: objSym.getName()));
1732
1733	int c = objSym.getComdatIndex();
1734	if (objSym.isUndefined() \|\| (c != -`1` && !keptComdats [c])) {
1735	Undefined newSym(&f, StringRef (), binding, visibility, type);
1736	sym->resolve(other: newSym);
1737	sym->referenced = true;
1738	return;
1739	}
1740
1741	if (objSym.isCommon()) {
1742	sym->resolve(other: CommonSymbol {&f, StringRef (), binding, visibility, STT_OBJECT,
1743	objSym.getCommonAlignment(),
1744	objSym.getCommonSize()});
1745	} else {
1746	Defined newSym(&f, StringRef (), binding, visibility, type, `0`, `0`, nullptr);
1747	if (objSym.canBeOmittedFromSymbolTable())
1748	newSym.exportDynamic = false;
1749	sym->resolve(other: newSym);
1750	}
1751	}
1752
1753	void BitcodeFile::parse() {
1754	for (std::pair<StringRef, Comdat::SelectionKind> s : obj ->getComdatTable()) {
1755	keptComdats.push_back(
1756	x: s.second == Comdat::NoDeduplicate \|\|
1757	symtab.comdatGroups.try_emplace(Key: CachedHashStringRef (s.first), Args: this)
1758	.second);
1759	}
1760
1761	if (numSymbols == `0`) {
1762	numSymbols = obj ->symbols().size();
1763	symbols = std::make_unique<Symbol *[]>(num: numSymbols);
1764	}
1765	// Process defined symbols first. See the comment in
1766	// ObjFile<ELFT>::initializeSymbols.
1767	for (auto [i, irSym] : llvm::enumerate(First: obj ->symbols()))
1768	if (!irSym.isUndefined())
1769	createBitcodeSymbol(sym&: symbols [i], keptComdats, objSym: irSym, f&: *this);
1770	for (auto [i, irSym] : llvm::enumerate(First: obj ->symbols()))
1771	if (irSym.isUndefined())
1772	createBitcodeSymbol(sym&: symbols [i], keptComdats, objSym: irSym, f&: *this);
1773
1774	for (auto l : obj ->getDependentLibraries())
1775	addDependentLibrary(specifier: l, f: this);
1776	}
1777
1778	void BitcodeFile::parseLazy() {
1779	numSymbols = obj ->symbols().size();
1780	symbols = std::make_unique<Symbol *[]>(num: numSymbols);
1781	for (auto [i, irSym] : llvm::enumerate(First: obj ->symbols()))
1782	if (!irSym.isUndefined()) {
1783	auto *sym = symtab.insert(name: saver().save(S: irSym.getName()));
1784	sym->resolve(other: LazySymbol {*this});
1785	symbols [i] = sym;
1786	}
1787	}
1788
1789	void BitcodeFile::postParse() {
1790	for (auto [i, irSym] : llvm::enumerate(First: obj ->symbols())) {
1791	const Symbol &sym = *symbols [i];
1792	if (sym.file == this \|\| !sym.isDefined() \|\| irSym.isUndefined() \|\|
1793	irSym.isCommon() \|\| irSym.isWeak())
1794	continue;
1795	int c = irSym.getComdatIndex();
1796	if (c != -`1` && !keptComdats [c])
1797	continue;
1798	reportDuplicate(sym, newFile: this, errSec: nullptr, errOffset: `0`);
1799	}
1800	}
1801
1802	void BinaryFile::parse() {
1803	ArrayRef<uint8_t> data = arrayRefFromStringRef(Input: mb.getBuffer());
1804	auto section = make<InputSection>(args: this*, args: SHF_ALLOC \| SHF_WRITE, args: SHT_PROGBITS,
1805	args: `8`, args&: data, args: ".data");
1806	sections.push_back(Elt: section);
1807
1808	// For each input file foo that is embedded to a result as a binary
1809	// blob, we define _binary_foo_{start,end,size} symbols, so that
1810	// user programs can access blobs by name. Non-alphanumeric
1811	// characters in a filename are replaced with underscore.
1812	std::string s = "_binary_" + mb.getBufferIdentifier().str();
1813	for (char &c : s)
1814	if (!isAlnum(C: c))
1815	c = `'_'`;
1816
1817	llvm::StringSaver &saver = lld::saver();
1818
1819	symtab.addAndCheckDuplicate(newSym: Defined {this, saver.save(S: s + "_start"),
1820	STB_GLOBAL, STV_DEFAULT, STT_OBJECT, `0`, `0`,
1821	section});
1822	symtab.addAndCheckDuplicate(newSym: Defined {this, saver.save(S: s + "_end"), STB_GLOBAL,
1823	STV_DEFAULT, STT_OBJECT, data.size(), `0`,
1824	section});
1825	symtab.addAndCheckDuplicate(newSym: Defined {this, saver.save(S: s + "_size"), STB_GLOBAL,
1826	STV_DEFAULT, STT_OBJECT, data.size(), `0`,
1827	nullptr});
1828	}
1829
1830	InputFile *elf::createInternalFile(StringRef name) {
1831	auto *file =
1832	make<InputFile>(args: InputFile::InternalKind, args: MemoryBufferRef("", name));
1833	// References from an internal file do not lead to --warn-backrefs
1834	// diagnostics.
1835	file->groupId = `0`;
1836	return file;
1837	}
1838
1839	ELFFileBase *elf::createObjFile(MemoryBufferRef mb, StringRef archiveName,
1840	bool lazy) {
1841	ELFFileBase *f;
1842	switch (getELFKind(mb, archiveName)) {
1843	case ELF32LEKind:
1844	f = make<ObjFile<ELF32LE>>(args: ELF32LEKind, args&: mb, args&: archiveName);
1845	break;
1846	case ELF32BEKind:
1847	f = make<ObjFile<ELF32BE>>(args: ELF32BEKind, args&: mb, args&: archiveName);
1848	break;
1849	case ELF64LEKind:
1850	f = make<ObjFile<ELF64LE>>(args: ELF64LEKind, args&: mb, args&: archiveName);
1851	break;
1852	case ELF64BEKind:
1853	f = make<ObjFile<ELF64BE>>(args: ELF64BEKind, args&: mb, args&: archiveName);
1854	break;
1855	default:
1856	llvm_unreachable("getELFKind");
1857	}
1858	f->init();
1859	f->lazy = lazy;
1860	return f;
1861	}
1862
1863	template <class ELFT> void ObjFile<ELFT>::parseLazy() {
1864	const ArrayRef<typename ELFT::Sym> eSyms = this->getELFSyms<ELFT>();
1865	numSymbols = eSyms.size();
1866	symbols = std::make_unique<Symbol *[]>(numSymbols);
1867
1868	// resolve() may trigger this->extract() if an existing symbol is an undefined
1869	// symbol. If that happens, this function has served its purpose, and we can
1870	// exit from the loop early.
1871	for (size_t i = firstGlobal, end = eSyms.size(); i != end; ++i) {
1872	if (eSyms[i].st_shndx == SHN_UNDEF)
1873	continue;
1874	symbols[i] = symtab.insert(CHECK(eSyms[i].getName(stringTable), this));
1875	symbols[i]->resolve(LazySymbol{*this});
1876	if (!lazy)
1877	break;
1878	}
1879	}
1880
1881	bool InputFile::shouldExtractForCommon(StringRef name) const {
1882	if (isa<BitcodeFile>(Val: this))
1883	return isBitcodeNonCommonDef(mb, symName: name, archiveName);
1884
1885	return isNonCommonDef(mb, symName: name, archiveName);
1886	}
1887
1888	std::string elf::replaceThinLTOSuffix(StringRef path) {
1889	auto [suffix, repl] = config ->thinLTOObjectSuffixReplace;
1890	if (path.consume_back(Suffix: suffix))
1891	return (path + repl).str();
1892	return std::string (path);
1893	}
1894
1895	template class elf::ObjFile<ELF32LE>;
1896	template class elf::ObjFile<ELF32BE>;
1897	template class elf::ObjFile<ELF64LE>;
1898	template class elf::ObjFile<ELF64BE>;
1899
1900	template void SharedFile::parse<ELF32LE>();
1901	template void SharedFile::parse<ELF32BE>();
1902	template void SharedFile::parse<ELF64LE>();
1903	template void SharedFile::parse<ELF64BE>();
1904

source code of lld/ELF/InputFiles.cpp