BenchmarkRunner.cpp source code [llvm/tools/llvm-exegesis/lib/BenchmarkRunner.cpp]

1	//===-- BenchmarkRunner.cpp -------------------------------------- C++ --===//
2	//
3	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4	// See https://llvm.org/LICENSE.txt for license information.
5	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6	//
7	//===----------------------------------------------------------------------===//
8
9	#include <cmath>
10	#include <memory>
11	#include <string>
12
13	#include "Assembler.h"
14	#include "BenchmarkRunner.h"
15	#include "Error.h"
16	#include "MCInstrDescView.h"
17	#include "MmapUtils.h"
18	#include "PerfHelper.h"
19	#include "SubprocessMemory.h"
20	#include "Target.h"
21	#include "llvm/ADT/StringExtras.h"
22	#include "llvm/ADT/StringRef.h"
23	#include "llvm/ADT/Twine.h"
24	#include "llvm/Support/CrashRecoveryContext.h"
25	#include "llvm/Support/Error.h"
26	#include "llvm/Support/FileSystem.h"
27	#include "llvm/Support/MemoryBuffer.h"
28	#include "llvm/Support/Program.h"
29	#include "llvm/Support/Signals.h"
30	#include "llvm/Support/SystemZ/zOSSupport.h"
31
32	#ifdef __linux__
33	#ifdef HAVE_LIBPFM
34	#include <perfmon/perf_event.h>
35	#endif
36	#include <sys/mman.h>
37	#include <sys/ptrace.h>
38	#include <sys/resource.h>
39	#include <sys/socket.h>
40	#include <sys/syscall.h>
41	#include <sys/wait.h>
42	#include <unistd.h>
43
44	#if defined(__GLIBC__) && __has_include(<sys/rseq.h>) && defined(HAVE_BUILTIN_THREAD_POINTER)
45	#include <sys/rseq.h>
46	#if defined(RSEQ_SIG) && defined(SYS_rseq)
47	#define GLIBC_INITS_RSEQ
48	#endif
49	#endif
50	#endif // __linux__
51
52	namespace llvm {
53	namespace exegesis {
54
55	BenchmarkRunner::BenchmarkRunner(const LLVMState &State, Benchmark::ModeE Mode,
56	BenchmarkPhaseSelectorE BenchmarkPhaseSelector,
57	ExecutionModeE ExecutionMode,
58	ArrayRef<ValidationEvent> ValCounters)
59	: State(State), Mode(Mode), BenchmarkPhaseSelector(BenchmarkPhaseSelector),
60	ExecutionMode(ExecutionMode), ValidationCounters (ValCounters),
61	Scratch(std::make_unique<ScratchSpace>()) {}
62
63	BenchmarkRunner::~BenchmarkRunner() = default;
64
65	void BenchmarkRunner::FunctionExecutor::accumulateCounterValues(
66	const SmallVectorImpl<int64_t> &NewValues,
67	SmallVectorImpl<int64_t> *Result) {
68	const size_t NumValues = std::max(a: NewValues.size(), b: Result->size());
69	if (NumValues > Result->size())
70	Result->resize(N: NumValues, NV: `0`);
71	for (size_t I = `0`, End = NewValues.size(); I < End; ++I)
72	(*Result)[I] += NewValues [I];
73	}
74
75	Expected<SmallVector<int64_t, `4`>>
76	BenchmarkRunner::FunctionExecutor::runAndSample(
77	const char Counters, ArrayRef<const* char *> ValidationCounters,
78	SmallVectorImpl<int64_t> &ValidationCounterValues) const {
79	// We sum counts when there are several counters for a single ProcRes
80	// (e.g. P23 on SandyBridge).
81	SmallVector<int64_t, `4`> CounterValues;
82	SmallVector<StringRef, `2`> CounterNames;
83	StringRef (Counters).split(A&: CounterNames, Separator: `'+'`);
84	for (auto &CounterName : CounterNames) {
85	CounterName = CounterName.trim();
86	Expected<SmallVector<int64_t, `4`>> ValueOrError = runWithCounter(
87	CounterName, ValidationCounters, ValidationCounterValues);
88	if (!ValueOrError)
89	return ValueOrError.takeError();
90	accumulateCounterValues(NewValues: ValueOrError.get(), Result: &CounterValues);
91	}
92	return CounterValues;
93	}
94
95	namespace {
96	class InProcessFunctionExecutorImpl : public BenchmarkRunner::FunctionExecutor {
97	public:
98	static Expected<std::unique_ptr<InProcessFunctionExecutorImpl>>
99	create(const LLVMState &State, object::OwningBinary<object::ObjectFile> Obj,
100	BenchmarkRunner::ScratchSpace *Scratch) {
101	Expected<ExecutableFunction> EF =
102	ExecutableFunction::create(TM: State.createTargetMachine(), ObjectFileHolder: std::move(Obj));
103
104	if (!EF)
105	return EF.takeError();
106
107	return std::unique_ptr<InProcessFunctionExecutorImpl>(
108	new InProcessFunctionExecutorImpl (State, std::move(*EF), Scratch));
109	}
110
111	private:
112	InProcessFunctionExecutorImpl(const LLVMState &State,
113	ExecutableFunction Function,
114	BenchmarkRunner::ScratchSpace *Scratch)
115	: State(State), Function (std::move(Function)), Scratch(Scratch) {}
116
117	static void accumulateCounterValues(const SmallVector<int64_t, `4`> &NewValues,
118	SmallVector<int64_t, `4`> *Result) {
119	const size_t NumValues = std::max(a: NewValues.size(), b: Result->size());
120	if (NumValues > Result->size())
121	Result->resize(N: NumValues, NV: `0`);
122	for (size_t I = `0`, End = NewValues.size(); I < End; ++I)
123	(*Result)[I] += NewValues [I];
124	}
125
126	Expected<SmallVector<int64_t, `4`>> runWithCounter(
127	StringRef CounterName, ArrayRef<const char *> ValidationCounters,
128	SmallVectorImpl<int64_t> &ValidationCounterValues) const override {
129	const ExegesisTarget &ET = State.getExegesisTarget();
130	char *const ScratchPtr = Scratch->ptr();
131	auto CounterOrError =
132	ET.createCounter(CounterName, State, ValidationCounters);
133
134	if (!CounterOrError)
135	return CounterOrError.takeError();
136
137	pfm::CounterGroup *Counter = CounterOrError.get().get();
138	Scratch->clear();
139	{
140	auto PS = ET.withSavedState();
141	CrashRecoveryContext CRC;
142	CrashRecoveryContext::Enable();
143	const bool Crashed = !CRC.RunSafely(Fn: [this, Counter, ScratchPtr]() {
144	Counter->start();
145	this->Function (ScratchPtr);
146	Counter->stop();
147	});
148	CrashRecoveryContext::Disable();
149	PS.reset();
150	if (Crashed) {
151	#ifdef LLVM_ON_UNIX
152	// See "Exit Status for Commands":
153	// https://pubs.opengroup.org/onlinepubs/9699919799/xrat/V4_xcu_chap02.html
154	constexpr const int kSigOffset = `128`;
155	return make_error<SnippetSignal>(Args: CRC.RetCode - kSigOffset);
156	#else
157	// The exit code of the process on windows is not meaningful as a
158	// signal, so simply pass in -1 as the signal into the error.
159	return make_error<SnippetSignal>(-`1`);
160	#endif // LLVM_ON_UNIX
161	}
162	}
163
164	auto ValidationValuesOrErr = Counter->readValidationCountersOrError();
165	if (!ValidationValuesOrErr)
166	return ValidationValuesOrErr.takeError();
167
168	ArrayRef RealValidationValues = *ValidationValuesOrErr;
169	for (size_t I = `0`; I < RealValidationValues.size(); ++I)
170	ValidationCounterValues [I] = RealValidationValues [I];
171
172	return Counter->readOrError(FunctionBytes: Function.getFunctionBytes());
173	}
174
175	const LLVMState &State;
176	const ExecutableFunction Function;
177	BenchmarkRunner::ScratchSpace *const Scratch;
178	};
179
180	#ifdef __linux__
181	// The following class implements a function executor that executes the
182	// benchmark code within a subprocess rather than within the main llvm-exegesis
183	// process. This allows for much more control over the execution context of the
184	// snippet, particularly with regard to memory. This class performs all the
185	// necessary functions to create the subprocess, execute the snippet in the
186	// subprocess, and report results/handle errors.
187	class SubProcessFunctionExecutorImpl
188	: public BenchmarkRunner::FunctionExecutor {
189	public:
190	static Expected<std::unique_ptr<SubProcessFunctionExecutorImpl>>
191	create(const LLVMState &State, object::OwningBinary<object::ObjectFile> Obj,
192	const BenchmarkKey &Key) {
193	Expected<ExecutableFunction> EF =
194	ExecutableFunction::create(TM: State.createTargetMachine(), ObjectFileHolder: std::move(Obj));
195	if (!EF)
196	return EF.takeError();
197
198	return std::unique_ptr<SubProcessFunctionExecutorImpl>(
199	new SubProcessFunctionExecutorImpl (State, std::move(*EF), Key));
200	}
201
202	private:
203	SubProcessFunctionExecutorImpl(const LLVMState &State,
204	ExecutableFunction Function,
205	const BenchmarkKey &Key)
206	: State(State), Function (std::move(Function)), Key(Key) {}
207
208	enum ChildProcessExitCodeE {
209	CounterFDReadFailed = `1`,
210	RSeqDisableFailed,
211	FunctionDataMappingFailed,
212	AuxiliaryMemorySetupFailed
213	};
214
215	StringRef childProcessExitCodeToString(int ExitCode) const {
216	switch (ExitCode) {
217	case ChildProcessExitCodeE::CounterFDReadFailed:
218	return "Counter file descriptor read failed";
219	case ChildProcessExitCodeE::RSeqDisableFailed:
220	return "Disabling restartable sequences failed";
221	case ChildProcessExitCodeE::FunctionDataMappingFailed:
222	return "Failed to map memory for assembled snippet";
223	case ChildProcessExitCodeE::AuxiliaryMemorySetupFailed:
224	return "Failed to setup auxiliary memory";
225	default:
226	return "Child process returned with unknown exit code";
227	}
228	}
229
230	Error sendFileDescriptorThroughSocket(int SocketFD, int FD) const {
231	struct msghdr Message = {};
232	char Buffer[CMSG_SPACE(sizeof(FD))];
233	memset(s: Buffer, c: `0`, n: sizeof(Buffer));
234	Message.msg_control = Buffer;
235	Message.msg_controllen = sizeof(Buffer);
236
237	struct cmsghdr *ControlMessage = CMSG_FIRSTHDR(&Message);
238	ControlMessage->cmsg_level = SOL_SOCKET;
239	ControlMessage->cmsg_type = SCM_RIGHTS;
240	ControlMessage->cmsg_len = CMSG_LEN(sizeof(FD));
241
242	memcpy(CMSG_DATA(ControlMessage), src: &FD, n: sizeof(FD));
243
244	Message.msg_controllen = CMSG_SPACE(sizeof(FD));
245
246	ssize_t BytesWritten = sendmsg(fd: SocketFD, message: &Message, flags: `0`);
247
248	if (BytesWritten < `0`)
249	return make_error<Failure>(Args: "Failed to write FD to socket: " +
250	Twine (strerror(errno)));
251
252	return Error::success();
253	}
254
255	Expected<int> getFileDescriptorFromSocket(int SocketFD) const {
256	struct msghdr Message = {};
257
258	char ControlBuffer[`256`];
259	Message.msg_control = ControlBuffer;
260	Message.msg_controllen = sizeof(ControlBuffer);
261
262	ssize_t BytesRead = recvmsg(fd: SocketFD, message: &Message, flags: `0`);
263
264	if (BytesRead < `0`)
265	return make_error<Failure>(Args: "Failed to read FD from socket: " +
266	Twine (strerror(errno)));
267
268	struct cmsghdr *ControlMessage = CMSG_FIRSTHDR(&Message);
269
270	int FD;
271
272	if (ControlMessage->cmsg_len != CMSG_LEN(sizeof(FD)))
273	return make_error<Failure>(Args: "Failed to get correct number of bytes for "
274	"file descriptor from socket.");
275
276	memcpy(dest: &FD, CMSG_DATA(ControlMessage), n: sizeof(FD));
277
278	return FD;
279	}
280
281	Error
282	runParentProcess(pid_t ChildPID, int WriteFD, StringRef CounterName,
283	SmallVectorImpl<int64_t> &CounterValues,
284	ArrayRef<const char *> ValidationCounters,
285	SmallVectorImpl<int64_t> &ValidationCounterValues) const {
286	const ExegesisTarget &ET = State.getExegesisTarget();
287	auto CounterOrError =
288	ET.createCounter(CounterName, State, ValidationCounters, ProcessID: ChildPID);
289
290	if (!CounterOrError)
291	return CounterOrError.takeError();
292
293	pfm::CounterGroup *Counter = CounterOrError.get().get();
294
295	// Make sure to attach to the process (and wait for the sigstop to be
296	// delivered and for the process to continue) before we write to the counter
297	// file descriptor. Attaching to the process before writing to the socket
298	// ensures that the subprocess at most has blocked on the read call. If we
299	// attach afterwards, the subprocess might exit before we get to the attach
300	// call due to effects like scheduler contention, introducing transient
301	// failures.
302	if (ptrace(request: PTRACE_ATTACH, ChildPID, NULL, NULL) != `0`)
303	return make_error<Failure>(Args: "Failed to attach to the child process: " +
304	Twine (strerror(errno)));
305
306	if (waitpid(pid: ChildPID, NULL, options: `0`) == -`1`) {
307	return make_error<Failure>(
308	Args: "Failed to wait for child process to stop after attaching: " +
309	Twine (strerror(errno)));
310	}
311
312	if (ptrace(request: PTRACE_CONT, ChildPID, NULL, NULL) != `0`)
313	return make_error<Failure>(
314	Args: "Failed to continue execution of the child process: " +
315	Twine (strerror(errno)));
316
317	int CounterFileDescriptor = Counter->getFileDescriptor();
318	Error SendError =
319	sendFileDescriptorThroughSocket(SocketFD: WriteFD, FD: CounterFileDescriptor);
320
321	if (SendError)
322	return SendError;
323
324	int ChildStatus;
325	if (waitpid(pid: ChildPID, stat_loc: &ChildStatus, options: `0`) == -`1`) {
326	return make_error<Failure>(
327	Args: "Waiting for the child process to complete failed: " +
328	Twine (strerror(errno)));
329	}
330
331	if (WIFEXITED(ChildStatus)) {
332	int ChildExitCode = WEXITSTATUS(ChildStatus);
333	if (ChildExitCode == `0`) {
334	// The child exited succesfully, read counter values and return
335	// success.
336	auto CounterValueOrErr = Counter->readOrError();
337	if (!CounterValueOrErr)
338	return CounterValueOrErr.takeError();
339	CounterValues = std::move(*CounterValueOrErr);
340
341	auto ValidationValuesOrErr = Counter->readValidationCountersOrError();
342	if (!ValidationValuesOrErr)
343	return ValidationValuesOrErr.takeError();
344
345	ArrayRef RealValidationValues = *ValidationValuesOrErr;
346	for (size_t I = `0`; I < RealValidationValues.size(); ++I)
347	ValidationCounterValues [I] = RealValidationValues [I];
348
349	return Error::success();
350	}
351	// The child exited, but not successfully.
352	return make_error<Failure>(
353	Args: "Child benchmarking process exited with non-zero exit code: " +
354	childProcessExitCodeToString(ExitCode: ChildExitCode));
355	}
356
357	// An error was encountered running the snippet, process it
358	siginfo_t ChildSignalInfo;
359	if (ptrace(request: PTRACE_GETSIGINFO, ChildPID, NULL, &ChildSignalInfo) == -`1`) {
360	return make_error<Failure>(Args: "Getting signal info from the child failed: " +
361	Twine (strerror(errno)));
362	}
363
364	// Send SIGKILL rather than SIGTERM as the child process has no SIGTERM
365	// handlers to run, and calling SIGTERM would mean that ptrace will force
366	// it to block in the signal-delivery-stop for the SIGSEGV/other signals,
367	// and upon exit.
368	if (kill(pid: ChildPID, SIGKILL) == -`1`)
369	return make_error<Failure>(Args: "Failed to kill child benchmarking proces: " +
370	Twine (strerror(errno)));
371
372	// Wait for the process to exit so that there are no zombie processes left
373	// around.
374	if (waitpid(pid: ChildPID, NULL, options: `0`) == -`1`)
375	return make_error<Failure>(Args: "Failed to wait for process to die: " +
376	Twine (strerror(errno)));
377
378	if (ChildSignalInfo.si_signo == SIGSEGV)
379	return make_error<SnippetSegmentationFault>(
380	Args: reinterpret_cast<intptr_t>(ChildSignalInfo.si_addr));
381
382	return make_error<SnippetSignal>(Args&: ChildSignalInfo.si_signo);
383	}
384
385	Error createSubProcessAndRunBenchmark(
386	StringRef CounterName, SmallVectorImpl<int64_t> &CounterValues,
387	ArrayRef<const char *> ValidationCounters,
388	SmallVectorImpl<int64_t> &ValidationCounterValues) const {
389	int PipeFiles[`2`];
390	int PipeSuccessOrErr = socketpair(AF_UNIX, SOCK_DGRAM, protocol: `0`, fds: PipeFiles);
391	if (PipeSuccessOrErr != `0`) {
392	return make_error<Failure>(
393	Args: "Failed to create a pipe for interprocess communication between "
394	"llvm-exegesis and the benchmarking subprocess: " +
395	Twine (strerror(errno)));
396	}
397
398	SubprocessMemory SPMemory;
399	Error MemoryInitError = SPMemory.initializeSubprocessMemory(ProcessID: getpid());
400	if (MemoryInitError)
401	return MemoryInitError;
402
403	Error AddMemDefError =
404	SPMemory.addMemoryDefinition(MemoryDefinitions: Key.MemoryValues, ProcessID: getpid());
405	if (AddMemDefError)
406	return AddMemDefError;
407
408	long ParentTID = SubprocessMemory::getCurrentTID();
409	pid_t ParentOrChildPID = fork();
410
411	if (ParentOrChildPID == -`1`) {
412	return make_error<Failure>(Args: "Failed to create child process: " +
413	Twine (strerror(errno)));
414	}
415
416	if (ParentOrChildPID == `0`) {
417	// We are in the child process, close the write end of the pipe.
418	close(fd: PipeFiles[`1`]);
419	// Unregister handlers, signal handling is now handled through ptrace in
420	// the host process.
421	sys::unregisterHandlers();
422	runChildSubprocess(Pipe: PipeFiles[`0`], Key, ParentTID);
423	// The child process terminates in the above function, so we should never
424	// get to this point.
425	llvm_unreachable("Child process didn't exit when expected.");
426	}
427
428	// Close the read end of the pipe as we only need to write to the subprocess
429	// from the parent process.
430	close(fd: PipeFiles[`0`]);
431	return runParentProcess(ChildPID: ParentOrChildPID, WriteFD: PipeFiles[`1`], CounterName,
432	CounterValues, ValidationCounters,
433	ValidationCounterValues);
434	}
435
436	void disableCoreDumps() const {
437	struct rlimit rlim;
438
439	rlim.rlim_cur = `0`;
440	setrlimit(RLIMIT_CORE, rlimits: &rlim);
441	}
442
443	[[noreturn]] void runChildSubprocess(int Pipe, const BenchmarkKey &Key,
444	long ParentTID) const {
445	// Disable core dumps in the child process as otherwise everytime we
446	// encounter an execution failure like a segmentation fault, we will create
447	// a core dump. We report the information directly rather than require the
448	// user inspect a core dump.
449	disableCoreDumps();
450
451	// The following occurs within the benchmarking subprocess.
452	pid_t ParentPID = getppid();
453
454	Expected<int> CounterFileDescriptorOrError =
455	getFileDescriptorFromSocket(SocketFD: Pipe);
456
457	if (!CounterFileDescriptorOrError)
458	exit(status: ChildProcessExitCodeE::CounterFDReadFailed);
459
460	int CounterFileDescriptor = *CounterFileDescriptorOrError;
461
462	// Glibc versions greater than 2.35 automatically call rseq during
463	// initialization. Unmapping the region that glibc sets up for this causes
464	// segfaults in the program. Unregister the rseq region so that we can safely
465	// unmap it later
466	#ifdef GLIBC_INITS_RSEQ
467	long RseqDisableOutput =
468	syscall(SYS_rseq, (intptr_t)__builtin_thread_pointer() + __rseq_offset,
469	__rseq_size, RSEQ_FLAG_UNREGISTER, RSEQ_SIG);
470	if (RseqDisableOutput != `0`)
471	exit(status: ChildProcessExitCodeE::RSeqDisableFailed);
472	#endif // GLIBC_INITS_RSEQ
473
474	// The frontend that generates the memory annotation structures should
475	// validate that the address to map the snippet in at is a multiple of
476	// the page size. Assert that this is true here.
477	assert(Key.SnippetAddress % getpagesize() == `0` &&
478	"The snippet address needs to be aligned to a page boundary.");
479
480	size_t FunctionDataCopySize = this->Function.FunctionBytes.size();
481	void *MapAddress = NULL;
482	int MapFlags = MAP_PRIVATE \| MAP_ANONYMOUS;
483
484	if (Key.SnippetAddress != `0`) {
485	MapAddress = reinterpret_cast<void *>(Key.SnippetAddress);
486	MapFlags \|= MAP_FIXED_NOREPLACE;
487	}
488
489	char *FunctionDataCopy =
490	(char *)mmap(addr: MapAddress, len: FunctionDataCopySize, PROT_READ \| PROT_WRITE,
491	flags: MapFlags, fd: `0`, offset: `0`);
492	if ((intptr_t)FunctionDataCopy == -`1`)
493	exit(status: ChildProcessExitCodeE::FunctionDataMappingFailed);
494
495	memcpy(dest: FunctionDataCopy, src: this->Function.FunctionBytes.data(),
496	n: this->Function.FunctionBytes.size());
497	mprotect(addr: FunctionDataCopy, len: FunctionDataCopySize, PROT_READ \| PROT_EXEC);
498
499	Expected<int> AuxMemFDOrError =
500	SubprocessMemory::setupAuxiliaryMemoryInSubprocess(
501	MemoryDefinitions: Key.MemoryValues, ParentPID, ParentTID, CounterFileDescriptor);
502	if (!AuxMemFDOrError)
503	exit(status: ChildProcessExitCodeE::AuxiliaryMemorySetupFailed);
504
505	((void ()(size_t, int*))(intptr_t)FunctionDataCopy)(FunctionDataCopySize,
506	*AuxMemFDOrError);
507
508	exit(status: `0`);
509	}
510
511	Expected<SmallVector<int64_t, `4`>> runWithCounter(
512	StringRef CounterName, ArrayRef<const char *> ValidationCounters,
513	SmallVectorImpl<int64_t> &ValidationCounterValues) const override {
514	SmallVector<int64_t, `4`> Value(`1`, `0`);
515	Error PossibleBenchmarkError = createSubProcessAndRunBenchmark(
516	CounterName, CounterValues&: Value, ValidationCounters, ValidationCounterValues);
517
518	if (PossibleBenchmarkError)
519	return std::move(PossibleBenchmarkError);
520
521	return Value;
522	}
523
524	const LLVMState &State;
525	const ExecutableFunction Function;
526	const BenchmarkKey &Key;
527	};
528	#endif // __linux__
529	} // namespace
530
531	Expected<SmallString<`0`>> BenchmarkRunner::assembleSnippet(
532	const BenchmarkCode &BC, const SnippetRepetitor &Repetitor,
533	unsigned MinInstructions, unsigned LoopBodySize,
534	bool GenerateMemoryInstructions) const {
535	const std::vector<MCInst> &Instructions = BC.Key.Instructions;
536	SmallString<`0`> Buffer;
537	raw_svector_ostream OS(Buffer);
538	if (Error E = assembleToStream(
539	ET: State.getExegesisTarget(), TM: State.createTargetMachine(), LiveIns: BC.LiveIns,
540	Fill: Repetitor.Repeat(Instructions, MinInstructions, LoopBodySize,
541	CleanupMemory: GenerateMemoryInstructions),
542	AsmStreamm&: OS, Key: BC.Key, GenerateMemoryInstructions)) {
543	return std::move(E);
544	}
545	return Buffer;
546	}
547
548	Expected<BenchmarkRunner::RunnableConfiguration>
549	BenchmarkRunner::getRunnableConfiguration(
550	const BenchmarkCode &BC, unsigned MinInstructions, unsigned LoopBodySize,
551	const SnippetRepetitor &Repetitor) const {
552	RunnableConfiguration RC;
553
554	Benchmark &BenchmarkResult = RC.BenchmarkResult;
555	BenchmarkResult.Mode = Mode;
556	BenchmarkResult.CpuName =
557	std::string (State.getTargetMachine().getTargetCPU());
558	BenchmarkResult.LLVMTriple =
559	State.getTargetMachine().getTargetTriple().normalize();
560	BenchmarkResult.MinInstructions = MinInstructions;
561	BenchmarkResult.Info = BC.Info;
562
563	const std::vector<MCInst> &Instructions = BC.Key.Instructions;
564
565	bool GenerateMemoryInstructions = ExecutionMode == ExecutionModeE::SubProcess;
566
567	BenchmarkResult.Key = BC.Key;
568
569	// Assemble at least kMinInstructionsForSnippet instructions by repeating
570	// the snippet for debug/analysis. This is so that the user clearly
571	// understands that the inside instructions are repeated.
572	if (BenchmarkPhaseSelector > BenchmarkPhaseSelectorE::PrepareSnippet) {
573	const int MinInstructionsForSnippet = `4` * Instructions.size();
574	const int LoopBodySizeForSnippet = `2` * Instructions.size();
575	auto Snippet =
576	assembleSnippet(BC, Repetitor, MinInstructions: MinInstructionsForSnippet,
577	LoopBodySize: LoopBodySizeForSnippet, GenerateMemoryInstructions);
578	if (Error E = Snippet.takeError())
579	return std::move(E);
580
581	if (auto Err = getBenchmarkFunctionBytes(InputData: *Snippet,
582	Bytes&: BenchmarkResult.AssembledSnippet))
583	return std::move(Err);
584	}
585
586	// Assemble enough repetitions of the snippet so we have at least
587	// MinInstructions instructions.
588	if (BenchmarkPhaseSelector >
589	BenchmarkPhaseSelectorE::PrepareAndAssembleSnippet) {
590	auto Snippet =
591	assembleSnippet(BC, Repetitor, MinInstructions: BenchmarkResult.MinInstructions,
592	LoopBodySize, GenerateMemoryInstructions);
593	if (Error E = Snippet.takeError())
594	return std::move(E);
595	RC.ObjectFile = getObjectFromBuffer(Buffer: *Snippet);
596	}
597
598	return std::move(RC);
599	}
600
601	Expected<std::unique_ptr<BenchmarkRunner::FunctionExecutor>>
602	BenchmarkRunner::createFunctionExecutor(
603	object::OwningBinary<object::ObjectFile> ObjectFile,
604	const BenchmarkKey &Key) const {
605	switch (ExecutionMode) {
606	case ExecutionModeE::InProcess: {
607	auto InProcessExecutorOrErr = InProcessFunctionExecutorImpl::create(
608	State, Obj: std::move(ObjectFile), Scratch: Scratch.get());
609	if (!InProcessExecutorOrErr)
610	return InProcessExecutorOrErr.takeError();
611
612	return std::move(*InProcessExecutorOrErr);
613	}
614	case ExecutionModeE::SubProcess: {
615	#ifdef __linux__
616	auto SubProcessExecutorOrErr = SubProcessFunctionExecutorImpl::create(
617	State, Obj: std::move(ObjectFile), Key);
618	if (!SubProcessExecutorOrErr)
619	return SubProcessExecutorOrErr.takeError();
620
621	return std::move(*SubProcessExecutorOrErr);
622	#else
623	return make_error<Failure>(
624	"The subprocess execution mode is only supported on Linux");
625	#endif
626	}
627	}
628	llvm_unreachable("ExecutionMode is outside expected range");
629	}
630
631	std::pair<Error, Benchmark> BenchmarkRunner::runConfiguration(
632	RunnableConfiguration &&RC,
633	const std::optional<StringRef> &DumpFile) const {
634	Benchmark &BenchmarkResult = RC.BenchmarkResult;
635	object::OwningBinary<object::ObjectFile> &ObjectFile = RC.ObjectFile;
636
637	if (DumpFile && BenchmarkPhaseSelector >
638	BenchmarkPhaseSelectorE::PrepareAndAssembleSnippet) {
639	auto ObjectFilePath =
640	writeObjectFile(Buffer: ObjectFile.getBinary()->getData(), FileName: *DumpFile);
641	if (Error E = ObjectFilePath.takeError()) {
642	return {std::move(E), std::move(BenchmarkResult)};
643	}
644	outs() << "Check generated assembly with: /usr/bin/objdump -d "
645	<< *ObjectFilePath << "\n";
646	}
647
648	if (BenchmarkPhaseSelector < BenchmarkPhaseSelectorE::Measure) {
649	BenchmarkResult.Error = "actual measurements skipped.";
650	return {Error::success(), std::move(BenchmarkResult)};
651	}
652
653	Expected<std::unique_ptr<BenchmarkRunner::FunctionExecutor>> Executor =
654	createFunctionExecutor(ObjectFile: std::move(ObjectFile), Key: RC.BenchmarkResult.Key);
655	if (!Executor)
656	return {Executor.takeError(), std::move(BenchmarkResult)};
657	auto NewMeasurements = runMeasurements(Executor: **Executor);
658
659	if (Error E = NewMeasurements.takeError()) {
660	return {std::move(E), std::move(BenchmarkResult)};
661	}
662	assert(BenchmarkResult.MinInstructions > `0` && "invalid MinInstructions");
663	for (BenchmarkMeasure &BM : *NewMeasurements) {
664	// Scale the measurements by the number of instructions.
665	BM.PerInstructionValue /= BenchmarkResult.MinInstructions;
666	// Scale the measurements by the number of times the entire snippet is
667	// repeated.
668	BM.PerSnippetValue /=
669	std::ceil(x: BenchmarkResult.MinInstructions /
670	static_cast<double>(BenchmarkResult.Key.Instructions.size()));
671	}
672	BenchmarkResult.Measurements = std::move(*NewMeasurements);
673
674	return {Error::success(), std::move(BenchmarkResult)};
675	}
676
677	Expected<std::string>
678	BenchmarkRunner::writeObjectFile(StringRef Buffer, StringRef FileName) const {
679	int ResultFD = `0`;
680	SmallString<`256`> ResultPath = FileName;
681	if (Error E = errorCodeToError(
682	EC: FileName.empty() ? sys::fs::createTemporaryFile(Prefix: "snippet", Suffix: "o",
683	ResultFD, ResultPath)
684	: sys::fs::openFileForReadWrite(
685	Name: FileName, ResultFD, Disp: sys::fs::CD_CreateAlways,
686	Flags: sys::fs::OF_None)))
687	return std::move(E);
688	raw_fd_ostream OFS(ResultFD, true /ShouldClose/);
689	OFS.write(Ptr: Buffer.data(), Size: Buffer.size());
690	OFS.flush();
691	return std::string(ResultPath);
692	}
693
694	static bool EventLessThan(const std::pair<ValidationEvent, const char *> LHS,
695	const ValidationEvent RHS) {
696	return static_cast<int>(LHS.first) < static_cast<int>(RHS);
697	}
698
699	Error BenchmarkRunner::getValidationCountersToRun(
700	SmallVector<const char > &ValCountersToRun) const* {
701	const PfmCountersInfo &PCI = State.getPfmCounters();
702	ValCountersToRun.reserve(N: ValidationCounters.size());
703
704	ValCountersToRun.reserve(N: ValidationCounters.size());
705	ArrayRef TargetValidationEvents(PCI.ValidationEvents,
706	PCI.NumValidationEvents);
707	for (const ValidationEvent RequestedValEvent : ValidationCounters) {
708	auto ValCounterIt =
709	lower_bound(Range&: TargetValidationEvents, Value: RequestedValEvent, C: EventLessThan);
710	if (ValCounterIt == TargetValidationEvents.end() \|\|
711	ValCounterIt->first != RequestedValEvent)
712	return make_error<Failure>(Args: "Cannot create validation counter");
713
714	assert(ValCounterIt->first == RequestedValEvent &&
715	"The array of validation events from the target should be sorted");
716	ValCountersToRun.push_back(Elt: ValCounterIt->second);
717	}
718
719	return Error::success();
720	}
721
722	BenchmarkRunner::FunctionExecutor::~FunctionExecutor() {}
723
724	} // namespace exegesis
725	} // namespace llvm
726

source code of llvm/tools/llvm-exegesis/lib/BenchmarkRunner.cpp