1//===-- BenchmarkRunner.cpp -------------------------------------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
9#include <cmath>
10#include <memory>
11#include <string>
12
13#include "Assembler.h"
14#include "BenchmarkRunner.h"
15#include "Error.h"
16#include "MCInstrDescView.h"
17#include "MmapUtils.h"
18#include "PerfHelper.h"
19#include "SubprocessMemory.h"
20#include "Target.h"
21#include "llvm/ADT/StringExtras.h"
22#include "llvm/ADT/StringRef.h"
23#include "llvm/ADT/Twine.h"
24#include "llvm/Support/CrashRecoveryContext.h"
25#include "llvm/Support/Error.h"
26#include "llvm/Support/FileSystem.h"
27#include "llvm/Support/MemoryBuffer.h"
28#include "llvm/Support/Program.h"
29#include "llvm/Support/Signals.h"
30#include "llvm/Support/SystemZ/zOSSupport.h"
31
32#ifdef __linux__
33#ifdef HAVE_LIBPFM
34#include <perfmon/perf_event.h>
35#endif
36#include <sys/mman.h>
37#include <sys/ptrace.h>
38#include <sys/resource.h>
39#include <sys/socket.h>
40#include <sys/syscall.h>
41#include <sys/wait.h>
42#include <unistd.h>
43
44#if defined(__GLIBC__) && __has_include(<sys/rseq.h>) && defined(HAVE_BUILTIN_THREAD_POINTER)
45#include <sys/rseq.h>
46#if defined(RSEQ_SIG) && defined(SYS_rseq)
47#define GLIBC_INITS_RSEQ
48#endif
49#endif
50#endif // __linux__
51
52namespace llvm {
53namespace exegesis {
54
55BenchmarkRunner::BenchmarkRunner(const LLVMState &State, Benchmark::ModeE Mode,
56 BenchmarkPhaseSelectorE BenchmarkPhaseSelector,
57 ExecutionModeE ExecutionMode,
58 ArrayRef<ValidationEvent> ValCounters)
59 : State(State), Mode(Mode), BenchmarkPhaseSelector(BenchmarkPhaseSelector),
60 ExecutionMode(ExecutionMode), ValidationCounters(ValCounters),
61 Scratch(std::make_unique<ScratchSpace>()) {}
62
63BenchmarkRunner::~BenchmarkRunner() = default;
64
65void BenchmarkRunner::FunctionExecutor::accumulateCounterValues(
66 const SmallVectorImpl<int64_t> &NewValues,
67 SmallVectorImpl<int64_t> *Result) {
68 const size_t NumValues = std::max(a: NewValues.size(), b: Result->size());
69 if (NumValues > Result->size())
70 Result->resize(N: NumValues, NV: 0);
71 for (size_t I = 0, End = NewValues.size(); I < End; ++I)
72 (*Result)[I] += NewValues[I];
73}
74
75Expected<SmallVector<int64_t, 4>>
76BenchmarkRunner::FunctionExecutor::runAndSample(
77 const char *Counters, ArrayRef<const char *> ValidationCounters,
78 SmallVectorImpl<int64_t> &ValidationCounterValues) const {
79 // We sum counts when there are several counters for a single ProcRes
80 // (e.g. P23 on SandyBridge).
81 SmallVector<int64_t, 4> CounterValues;
82 SmallVector<StringRef, 2> CounterNames;
83 StringRef(Counters).split(A&: CounterNames, Separator: '+');
84 for (auto &CounterName : CounterNames) {
85 CounterName = CounterName.trim();
86 Expected<SmallVector<int64_t, 4>> ValueOrError = runWithCounter(
87 CounterName, ValidationCounters, ValidationCounterValues);
88 if (!ValueOrError)
89 return ValueOrError.takeError();
90 accumulateCounterValues(NewValues: ValueOrError.get(), Result: &CounterValues);
91 }
92 return CounterValues;
93}
94
95namespace {
96class InProcessFunctionExecutorImpl : public BenchmarkRunner::FunctionExecutor {
97public:
98 static Expected<std::unique_ptr<InProcessFunctionExecutorImpl>>
99 create(const LLVMState &State, object::OwningBinary<object::ObjectFile> Obj,
100 BenchmarkRunner::ScratchSpace *Scratch) {
101 Expected<ExecutableFunction> EF =
102 ExecutableFunction::create(TM: State.createTargetMachine(), ObjectFileHolder: std::move(Obj));
103
104 if (!EF)
105 return EF.takeError();
106
107 return std::unique_ptr<InProcessFunctionExecutorImpl>(
108 new InProcessFunctionExecutorImpl(State, std::move(*EF), Scratch));
109 }
110
111private:
112 InProcessFunctionExecutorImpl(const LLVMState &State,
113 ExecutableFunction Function,
114 BenchmarkRunner::ScratchSpace *Scratch)
115 : State(State), Function(std::move(Function)), Scratch(Scratch) {}
116
117 static void accumulateCounterValues(const SmallVector<int64_t, 4> &NewValues,
118 SmallVector<int64_t, 4> *Result) {
119 const size_t NumValues = std::max(a: NewValues.size(), b: Result->size());
120 if (NumValues > Result->size())
121 Result->resize(N: NumValues, NV: 0);
122 for (size_t I = 0, End = NewValues.size(); I < End; ++I)
123 (*Result)[I] += NewValues[I];
124 }
125
126 Expected<SmallVector<int64_t, 4>> runWithCounter(
127 StringRef CounterName, ArrayRef<const char *> ValidationCounters,
128 SmallVectorImpl<int64_t> &ValidationCounterValues) const override {
129 const ExegesisTarget &ET = State.getExegesisTarget();
130 char *const ScratchPtr = Scratch->ptr();
131 auto CounterOrError =
132 ET.createCounter(CounterName, State, ValidationCounters);
133
134 if (!CounterOrError)
135 return CounterOrError.takeError();
136
137 pfm::CounterGroup *Counter = CounterOrError.get().get();
138 Scratch->clear();
139 {
140 auto PS = ET.withSavedState();
141 CrashRecoveryContext CRC;
142 CrashRecoveryContext::Enable();
143 const bool Crashed = !CRC.RunSafely(Fn: [this, Counter, ScratchPtr]() {
144 Counter->start();
145 this->Function(ScratchPtr);
146 Counter->stop();
147 });
148 CrashRecoveryContext::Disable();
149 PS.reset();
150 if (Crashed) {
151#ifdef LLVM_ON_UNIX
152 // See "Exit Status for Commands":
153 // https://pubs.opengroup.org/onlinepubs/9699919799/xrat/V4_xcu_chap02.html
154 constexpr const int kSigOffset = 128;
155 return make_error<SnippetSignal>(Args: CRC.RetCode - kSigOffset);
156#else
157 // The exit code of the process on windows is not meaningful as a
158 // signal, so simply pass in -1 as the signal into the error.
159 return make_error<SnippetSignal>(-1);
160#endif // LLVM_ON_UNIX
161 }
162 }
163
164 auto ValidationValuesOrErr = Counter->readValidationCountersOrError();
165 if (!ValidationValuesOrErr)
166 return ValidationValuesOrErr.takeError();
167
168 ArrayRef RealValidationValues = *ValidationValuesOrErr;
169 for (size_t I = 0; I < RealValidationValues.size(); ++I)
170 ValidationCounterValues[I] = RealValidationValues[I];
171
172 return Counter->readOrError(FunctionBytes: Function.getFunctionBytes());
173 }
174
175 const LLVMState &State;
176 const ExecutableFunction Function;
177 BenchmarkRunner::ScratchSpace *const Scratch;
178};
179
180#ifdef __linux__
181// The following class implements a function executor that executes the
182// benchmark code within a subprocess rather than within the main llvm-exegesis
183// process. This allows for much more control over the execution context of the
184// snippet, particularly with regard to memory. This class performs all the
185// necessary functions to create the subprocess, execute the snippet in the
186// subprocess, and report results/handle errors.
187class SubProcessFunctionExecutorImpl
188 : public BenchmarkRunner::FunctionExecutor {
189public:
190 static Expected<std::unique_ptr<SubProcessFunctionExecutorImpl>>
191 create(const LLVMState &State, object::OwningBinary<object::ObjectFile> Obj,
192 const BenchmarkKey &Key) {
193 Expected<ExecutableFunction> EF =
194 ExecutableFunction::create(TM: State.createTargetMachine(), ObjectFileHolder: std::move(Obj));
195 if (!EF)
196 return EF.takeError();
197
198 return std::unique_ptr<SubProcessFunctionExecutorImpl>(
199 new SubProcessFunctionExecutorImpl(State, std::move(*EF), Key));
200 }
201
202private:
203 SubProcessFunctionExecutorImpl(const LLVMState &State,
204 ExecutableFunction Function,
205 const BenchmarkKey &Key)
206 : State(State), Function(std::move(Function)), Key(Key) {}
207
208 enum ChildProcessExitCodeE {
209 CounterFDReadFailed = 1,
210 RSeqDisableFailed,
211 FunctionDataMappingFailed,
212 AuxiliaryMemorySetupFailed
213 };
214
215 StringRef childProcessExitCodeToString(int ExitCode) const {
216 switch (ExitCode) {
217 case ChildProcessExitCodeE::CounterFDReadFailed:
218 return "Counter file descriptor read failed";
219 case ChildProcessExitCodeE::RSeqDisableFailed:
220 return "Disabling restartable sequences failed";
221 case ChildProcessExitCodeE::FunctionDataMappingFailed:
222 return "Failed to map memory for assembled snippet";
223 case ChildProcessExitCodeE::AuxiliaryMemorySetupFailed:
224 return "Failed to setup auxiliary memory";
225 default:
226 return "Child process returned with unknown exit code";
227 }
228 }
229
230 Error sendFileDescriptorThroughSocket(int SocketFD, int FD) const {
231 struct msghdr Message = {};
232 char Buffer[CMSG_SPACE(sizeof(FD))];
233 memset(s: Buffer, c: 0, n: sizeof(Buffer));
234 Message.msg_control = Buffer;
235 Message.msg_controllen = sizeof(Buffer);
236
237 struct cmsghdr *ControlMessage = CMSG_FIRSTHDR(&Message);
238 ControlMessage->cmsg_level = SOL_SOCKET;
239 ControlMessage->cmsg_type = SCM_RIGHTS;
240 ControlMessage->cmsg_len = CMSG_LEN(sizeof(FD));
241
242 memcpy(CMSG_DATA(ControlMessage), src: &FD, n: sizeof(FD));
243
244 Message.msg_controllen = CMSG_SPACE(sizeof(FD));
245
246 ssize_t BytesWritten = sendmsg(fd: SocketFD, message: &Message, flags: 0);
247
248 if (BytesWritten < 0)
249 return make_error<Failure>(Args: "Failed to write FD to socket: " +
250 Twine(strerror(errno)));
251
252 return Error::success();
253 }
254
255 Expected<int> getFileDescriptorFromSocket(int SocketFD) const {
256 struct msghdr Message = {};
257
258 char ControlBuffer[256];
259 Message.msg_control = ControlBuffer;
260 Message.msg_controllen = sizeof(ControlBuffer);
261
262 ssize_t BytesRead = recvmsg(fd: SocketFD, message: &Message, flags: 0);
263
264 if (BytesRead < 0)
265 return make_error<Failure>(Args: "Failed to read FD from socket: " +
266 Twine(strerror(errno)));
267
268 struct cmsghdr *ControlMessage = CMSG_FIRSTHDR(&Message);
269
270 int FD;
271
272 if (ControlMessage->cmsg_len != CMSG_LEN(sizeof(FD)))
273 return make_error<Failure>(Args: "Failed to get correct number of bytes for "
274 "file descriptor from socket.");
275
276 memcpy(dest: &FD, CMSG_DATA(ControlMessage), n: sizeof(FD));
277
278 return FD;
279 }
280
281 Error
282 runParentProcess(pid_t ChildPID, int WriteFD, StringRef CounterName,
283 SmallVectorImpl<int64_t> &CounterValues,
284 ArrayRef<const char *> ValidationCounters,
285 SmallVectorImpl<int64_t> &ValidationCounterValues) const {
286 const ExegesisTarget &ET = State.getExegesisTarget();
287 auto CounterOrError =
288 ET.createCounter(CounterName, State, ValidationCounters, ProcessID: ChildPID);
289
290 if (!CounterOrError)
291 return CounterOrError.takeError();
292
293 pfm::CounterGroup *Counter = CounterOrError.get().get();
294
295 // Make sure to attach to the process (and wait for the sigstop to be
296 // delivered and for the process to continue) before we write to the counter
297 // file descriptor. Attaching to the process before writing to the socket
298 // ensures that the subprocess at most has blocked on the read call. If we
299 // attach afterwards, the subprocess might exit before we get to the attach
300 // call due to effects like scheduler contention, introducing transient
301 // failures.
302 if (ptrace(request: PTRACE_ATTACH, ChildPID, NULL, NULL) != 0)
303 return make_error<Failure>(Args: "Failed to attach to the child process: " +
304 Twine(strerror(errno)));
305
306 if (waitpid(pid: ChildPID, NULL, options: 0) == -1) {
307 return make_error<Failure>(
308 Args: "Failed to wait for child process to stop after attaching: " +
309 Twine(strerror(errno)));
310 }
311
312 if (ptrace(request: PTRACE_CONT, ChildPID, NULL, NULL) != 0)
313 return make_error<Failure>(
314 Args: "Failed to continue execution of the child process: " +
315 Twine(strerror(errno)));
316
317 int CounterFileDescriptor = Counter->getFileDescriptor();
318 Error SendError =
319 sendFileDescriptorThroughSocket(SocketFD: WriteFD, FD: CounterFileDescriptor);
320
321 if (SendError)
322 return SendError;
323
324 int ChildStatus;
325 if (waitpid(pid: ChildPID, stat_loc: &ChildStatus, options: 0) == -1) {
326 return make_error<Failure>(
327 Args: "Waiting for the child process to complete failed: " +
328 Twine(strerror(errno)));
329 }
330
331 if (WIFEXITED(ChildStatus)) {
332 int ChildExitCode = WEXITSTATUS(ChildStatus);
333 if (ChildExitCode == 0) {
334 // The child exited succesfully, read counter values and return
335 // success.
336 auto CounterValueOrErr = Counter->readOrError();
337 if (!CounterValueOrErr)
338 return CounterValueOrErr.takeError();
339 CounterValues = std::move(*CounterValueOrErr);
340
341 auto ValidationValuesOrErr = Counter->readValidationCountersOrError();
342 if (!ValidationValuesOrErr)
343 return ValidationValuesOrErr.takeError();
344
345 ArrayRef RealValidationValues = *ValidationValuesOrErr;
346 for (size_t I = 0; I < RealValidationValues.size(); ++I)
347 ValidationCounterValues[I] = RealValidationValues[I];
348
349 return Error::success();
350 }
351 // The child exited, but not successfully.
352 return make_error<Failure>(
353 Args: "Child benchmarking process exited with non-zero exit code: " +
354 childProcessExitCodeToString(ExitCode: ChildExitCode));
355 }
356
357 // An error was encountered running the snippet, process it
358 siginfo_t ChildSignalInfo;
359 if (ptrace(request: PTRACE_GETSIGINFO, ChildPID, NULL, &ChildSignalInfo) == -1) {
360 return make_error<Failure>(Args: "Getting signal info from the child failed: " +
361 Twine(strerror(errno)));
362 }
363
364 // Send SIGKILL rather than SIGTERM as the child process has no SIGTERM
365 // handlers to run, and calling SIGTERM would mean that ptrace will force
366 // it to block in the signal-delivery-stop for the SIGSEGV/other signals,
367 // and upon exit.
368 if (kill(pid: ChildPID, SIGKILL) == -1)
369 return make_error<Failure>(Args: "Failed to kill child benchmarking proces: " +
370 Twine(strerror(errno)));
371
372 // Wait for the process to exit so that there are no zombie processes left
373 // around.
374 if (waitpid(pid: ChildPID, NULL, options: 0) == -1)
375 return make_error<Failure>(Args: "Failed to wait for process to die: " +
376 Twine(strerror(errno)));
377
378 if (ChildSignalInfo.si_signo == SIGSEGV)
379 return make_error<SnippetSegmentationFault>(
380 Args: reinterpret_cast<intptr_t>(ChildSignalInfo.si_addr));
381
382 return make_error<SnippetSignal>(Args&: ChildSignalInfo.si_signo);
383 }
384
385 Error createSubProcessAndRunBenchmark(
386 StringRef CounterName, SmallVectorImpl<int64_t> &CounterValues,
387 ArrayRef<const char *> ValidationCounters,
388 SmallVectorImpl<int64_t> &ValidationCounterValues) const {
389 int PipeFiles[2];
390 int PipeSuccessOrErr = socketpair(AF_UNIX, SOCK_DGRAM, protocol: 0, fds: PipeFiles);
391 if (PipeSuccessOrErr != 0) {
392 return make_error<Failure>(
393 Args: "Failed to create a pipe for interprocess communication between "
394 "llvm-exegesis and the benchmarking subprocess: " +
395 Twine(strerror(errno)));
396 }
397
398 SubprocessMemory SPMemory;
399 Error MemoryInitError = SPMemory.initializeSubprocessMemory(ProcessID: getpid());
400 if (MemoryInitError)
401 return MemoryInitError;
402
403 Error AddMemDefError =
404 SPMemory.addMemoryDefinition(MemoryDefinitions: Key.MemoryValues, ProcessID: getpid());
405 if (AddMemDefError)
406 return AddMemDefError;
407
408 long ParentTID = SubprocessMemory::getCurrentTID();
409 pid_t ParentOrChildPID = fork();
410
411 if (ParentOrChildPID == -1) {
412 return make_error<Failure>(Args: "Failed to create child process: " +
413 Twine(strerror(errno)));
414 }
415
416 if (ParentOrChildPID == 0) {
417 // We are in the child process, close the write end of the pipe.
418 close(fd: PipeFiles[1]);
419 // Unregister handlers, signal handling is now handled through ptrace in
420 // the host process.
421 sys::unregisterHandlers();
422 runChildSubprocess(Pipe: PipeFiles[0], Key, ParentTID);
423 // The child process terminates in the above function, so we should never
424 // get to this point.
425 llvm_unreachable("Child process didn't exit when expected.");
426 }
427
428 // Close the read end of the pipe as we only need to write to the subprocess
429 // from the parent process.
430 close(fd: PipeFiles[0]);
431 return runParentProcess(ChildPID: ParentOrChildPID, WriteFD: PipeFiles[1], CounterName,
432 CounterValues, ValidationCounters,
433 ValidationCounterValues);
434 }
435
436 void disableCoreDumps() const {
437 struct rlimit rlim;
438
439 rlim.rlim_cur = 0;
440 setrlimit(RLIMIT_CORE, rlimits: &rlim);
441 }
442
443 [[noreturn]] void runChildSubprocess(int Pipe, const BenchmarkKey &Key,
444 long ParentTID) const {
445 // Disable core dumps in the child process as otherwise everytime we
446 // encounter an execution failure like a segmentation fault, we will create
447 // a core dump. We report the information directly rather than require the
448 // user inspect a core dump.
449 disableCoreDumps();
450
451 // The following occurs within the benchmarking subprocess.
452 pid_t ParentPID = getppid();
453
454 Expected<int> CounterFileDescriptorOrError =
455 getFileDescriptorFromSocket(SocketFD: Pipe);
456
457 if (!CounterFileDescriptorOrError)
458 exit(status: ChildProcessExitCodeE::CounterFDReadFailed);
459
460 int CounterFileDescriptor = *CounterFileDescriptorOrError;
461
462// Glibc versions greater than 2.35 automatically call rseq during
463// initialization. Unmapping the region that glibc sets up for this causes
464// segfaults in the program. Unregister the rseq region so that we can safely
465// unmap it later
466#ifdef GLIBC_INITS_RSEQ
467 long RseqDisableOutput =
468 syscall(SYS_rseq, (intptr_t)__builtin_thread_pointer() + __rseq_offset,
469 __rseq_size, RSEQ_FLAG_UNREGISTER, RSEQ_SIG);
470 if (RseqDisableOutput != 0)
471 exit(status: ChildProcessExitCodeE::RSeqDisableFailed);
472#endif // GLIBC_INITS_RSEQ
473
474 // The frontend that generates the memory annotation structures should
475 // validate that the address to map the snippet in at is a multiple of
476 // the page size. Assert that this is true here.
477 assert(Key.SnippetAddress % getpagesize() == 0 &&
478 "The snippet address needs to be aligned to a page boundary.");
479
480 size_t FunctionDataCopySize = this->Function.FunctionBytes.size();
481 void *MapAddress = NULL;
482 int MapFlags = MAP_PRIVATE | MAP_ANONYMOUS;
483
484 if (Key.SnippetAddress != 0) {
485 MapAddress = reinterpret_cast<void *>(Key.SnippetAddress);
486 MapFlags |= MAP_FIXED_NOREPLACE;
487 }
488
489 char *FunctionDataCopy =
490 (char *)mmap(addr: MapAddress, len: FunctionDataCopySize, PROT_READ | PROT_WRITE,
491 flags: MapFlags, fd: 0, offset: 0);
492 if ((intptr_t)FunctionDataCopy == -1)
493 exit(status: ChildProcessExitCodeE::FunctionDataMappingFailed);
494
495 memcpy(dest: FunctionDataCopy, src: this->Function.FunctionBytes.data(),
496 n: this->Function.FunctionBytes.size());
497 mprotect(addr: FunctionDataCopy, len: FunctionDataCopySize, PROT_READ | PROT_EXEC);
498
499 Expected<int> AuxMemFDOrError =
500 SubprocessMemory::setupAuxiliaryMemoryInSubprocess(
501 MemoryDefinitions: Key.MemoryValues, ParentPID, ParentTID, CounterFileDescriptor);
502 if (!AuxMemFDOrError)
503 exit(status: ChildProcessExitCodeE::AuxiliaryMemorySetupFailed);
504
505 ((void (*)(size_t, int))(intptr_t)FunctionDataCopy)(FunctionDataCopySize,
506 *AuxMemFDOrError);
507
508 exit(status: 0);
509 }
510
511 Expected<SmallVector<int64_t, 4>> runWithCounter(
512 StringRef CounterName, ArrayRef<const char *> ValidationCounters,
513 SmallVectorImpl<int64_t> &ValidationCounterValues) const override {
514 SmallVector<int64_t, 4> Value(1, 0);
515 Error PossibleBenchmarkError = createSubProcessAndRunBenchmark(
516 CounterName, CounterValues&: Value, ValidationCounters, ValidationCounterValues);
517
518 if (PossibleBenchmarkError)
519 return std::move(PossibleBenchmarkError);
520
521 return Value;
522 }
523
524 const LLVMState &State;
525 const ExecutableFunction Function;
526 const BenchmarkKey &Key;
527};
528#endif // __linux__
529} // namespace
530
531Expected<SmallString<0>> BenchmarkRunner::assembleSnippet(
532 const BenchmarkCode &BC, const SnippetRepetitor &Repetitor,
533 unsigned MinInstructions, unsigned LoopBodySize,
534 bool GenerateMemoryInstructions) const {
535 const std::vector<MCInst> &Instructions = BC.Key.Instructions;
536 SmallString<0> Buffer;
537 raw_svector_ostream OS(Buffer);
538 if (Error E = assembleToStream(
539 ET: State.getExegesisTarget(), TM: State.createTargetMachine(), LiveIns: BC.LiveIns,
540 Fill: Repetitor.Repeat(Instructions, MinInstructions, LoopBodySize,
541 CleanupMemory: GenerateMemoryInstructions),
542 AsmStreamm&: OS, Key: BC.Key, GenerateMemoryInstructions)) {
543 return std::move(E);
544 }
545 return Buffer;
546}
547
548Expected<BenchmarkRunner::RunnableConfiguration>
549BenchmarkRunner::getRunnableConfiguration(
550 const BenchmarkCode &BC, unsigned MinInstructions, unsigned LoopBodySize,
551 const SnippetRepetitor &Repetitor) const {
552 RunnableConfiguration RC;
553
554 Benchmark &BenchmarkResult = RC.BenchmarkResult;
555 BenchmarkResult.Mode = Mode;
556 BenchmarkResult.CpuName =
557 std::string(State.getTargetMachine().getTargetCPU());
558 BenchmarkResult.LLVMTriple =
559 State.getTargetMachine().getTargetTriple().normalize();
560 BenchmarkResult.MinInstructions = MinInstructions;
561 BenchmarkResult.Info = BC.Info;
562
563 const std::vector<MCInst> &Instructions = BC.Key.Instructions;
564
565 bool GenerateMemoryInstructions = ExecutionMode == ExecutionModeE::SubProcess;
566
567 BenchmarkResult.Key = BC.Key;
568
569 // Assemble at least kMinInstructionsForSnippet instructions by repeating
570 // the snippet for debug/analysis. This is so that the user clearly
571 // understands that the inside instructions are repeated.
572 if (BenchmarkPhaseSelector > BenchmarkPhaseSelectorE::PrepareSnippet) {
573 const int MinInstructionsForSnippet = 4 * Instructions.size();
574 const int LoopBodySizeForSnippet = 2 * Instructions.size();
575 auto Snippet =
576 assembleSnippet(BC, Repetitor, MinInstructions: MinInstructionsForSnippet,
577 LoopBodySize: LoopBodySizeForSnippet, GenerateMemoryInstructions);
578 if (Error E = Snippet.takeError())
579 return std::move(E);
580
581 if (auto Err = getBenchmarkFunctionBytes(InputData: *Snippet,
582 Bytes&: BenchmarkResult.AssembledSnippet))
583 return std::move(Err);
584 }
585
586 // Assemble enough repetitions of the snippet so we have at least
587 // MinInstructions instructions.
588 if (BenchmarkPhaseSelector >
589 BenchmarkPhaseSelectorE::PrepareAndAssembleSnippet) {
590 auto Snippet =
591 assembleSnippet(BC, Repetitor, MinInstructions: BenchmarkResult.MinInstructions,
592 LoopBodySize, GenerateMemoryInstructions);
593 if (Error E = Snippet.takeError())
594 return std::move(E);
595 RC.ObjectFile = getObjectFromBuffer(Buffer: *Snippet);
596 }
597
598 return std::move(RC);
599}
600
601Expected<std::unique_ptr<BenchmarkRunner::FunctionExecutor>>
602BenchmarkRunner::createFunctionExecutor(
603 object::OwningBinary<object::ObjectFile> ObjectFile,
604 const BenchmarkKey &Key) const {
605 switch (ExecutionMode) {
606 case ExecutionModeE::InProcess: {
607 auto InProcessExecutorOrErr = InProcessFunctionExecutorImpl::create(
608 State, Obj: std::move(ObjectFile), Scratch: Scratch.get());
609 if (!InProcessExecutorOrErr)
610 return InProcessExecutorOrErr.takeError();
611
612 return std::move(*InProcessExecutorOrErr);
613 }
614 case ExecutionModeE::SubProcess: {
615#ifdef __linux__
616 auto SubProcessExecutorOrErr = SubProcessFunctionExecutorImpl::create(
617 State, Obj: std::move(ObjectFile), Key);
618 if (!SubProcessExecutorOrErr)
619 return SubProcessExecutorOrErr.takeError();
620
621 return std::move(*SubProcessExecutorOrErr);
622#else
623 return make_error<Failure>(
624 "The subprocess execution mode is only supported on Linux");
625#endif
626 }
627 }
628 llvm_unreachable("ExecutionMode is outside expected range");
629}
630
631std::pair<Error, Benchmark> BenchmarkRunner::runConfiguration(
632 RunnableConfiguration &&RC,
633 const std::optional<StringRef> &DumpFile) const {
634 Benchmark &BenchmarkResult = RC.BenchmarkResult;
635 object::OwningBinary<object::ObjectFile> &ObjectFile = RC.ObjectFile;
636
637 if (DumpFile && BenchmarkPhaseSelector >
638 BenchmarkPhaseSelectorE::PrepareAndAssembleSnippet) {
639 auto ObjectFilePath =
640 writeObjectFile(Buffer: ObjectFile.getBinary()->getData(), FileName: *DumpFile);
641 if (Error E = ObjectFilePath.takeError()) {
642 return {std::move(E), std::move(BenchmarkResult)};
643 }
644 outs() << "Check generated assembly with: /usr/bin/objdump -d "
645 << *ObjectFilePath << "\n";
646 }
647
648 if (BenchmarkPhaseSelector < BenchmarkPhaseSelectorE::Measure) {
649 BenchmarkResult.Error = "actual measurements skipped.";
650 return {Error::success(), std::move(BenchmarkResult)};
651 }
652
653 Expected<std::unique_ptr<BenchmarkRunner::FunctionExecutor>> Executor =
654 createFunctionExecutor(ObjectFile: std::move(ObjectFile), Key: RC.BenchmarkResult.Key);
655 if (!Executor)
656 return {Executor.takeError(), std::move(BenchmarkResult)};
657 auto NewMeasurements = runMeasurements(Executor: **Executor);
658
659 if (Error E = NewMeasurements.takeError()) {
660 return {std::move(E), std::move(BenchmarkResult)};
661 }
662 assert(BenchmarkResult.MinInstructions > 0 && "invalid MinInstructions");
663 for (BenchmarkMeasure &BM : *NewMeasurements) {
664 // Scale the measurements by the number of instructions.
665 BM.PerInstructionValue /= BenchmarkResult.MinInstructions;
666 // Scale the measurements by the number of times the entire snippet is
667 // repeated.
668 BM.PerSnippetValue /=
669 std::ceil(x: BenchmarkResult.MinInstructions /
670 static_cast<double>(BenchmarkResult.Key.Instructions.size()));
671 }
672 BenchmarkResult.Measurements = std::move(*NewMeasurements);
673
674 return {Error::success(), std::move(BenchmarkResult)};
675}
676
677Expected<std::string>
678BenchmarkRunner::writeObjectFile(StringRef Buffer, StringRef FileName) const {
679 int ResultFD = 0;
680 SmallString<256> ResultPath = FileName;
681 if (Error E = errorCodeToError(
682 EC: FileName.empty() ? sys::fs::createTemporaryFile(Prefix: "snippet", Suffix: "o",
683 ResultFD, ResultPath)
684 : sys::fs::openFileForReadWrite(
685 Name: FileName, ResultFD, Disp: sys::fs::CD_CreateAlways,
686 Flags: sys::fs::OF_None)))
687 return std::move(E);
688 raw_fd_ostream OFS(ResultFD, true /*ShouldClose*/);
689 OFS.write(Ptr: Buffer.data(), Size: Buffer.size());
690 OFS.flush();
691 return std::string(ResultPath);
692}
693
694static bool EventLessThan(const std::pair<ValidationEvent, const char *> LHS,
695 const ValidationEvent RHS) {
696 return static_cast<int>(LHS.first) < static_cast<int>(RHS);
697}
698
699Error BenchmarkRunner::getValidationCountersToRun(
700 SmallVector<const char *> &ValCountersToRun) const {
701 const PfmCountersInfo &PCI = State.getPfmCounters();
702 ValCountersToRun.reserve(N: ValidationCounters.size());
703
704 ValCountersToRun.reserve(N: ValidationCounters.size());
705 ArrayRef TargetValidationEvents(PCI.ValidationEvents,
706 PCI.NumValidationEvents);
707 for (const ValidationEvent RequestedValEvent : ValidationCounters) {
708 auto ValCounterIt =
709 lower_bound(Range&: TargetValidationEvents, Value: RequestedValEvent, C: EventLessThan);
710 if (ValCounterIt == TargetValidationEvents.end() ||
711 ValCounterIt->first != RequestedValEvent)
712 return make_error<Failure>(Args: "Cannot create validation counter");
713
714 assert(ValCounterIt->first == RequestedValEvent &&
715 "The array of validation events from the target should be sorted");
716 ValCountersToRun.push_back(Elt: ValCounterIt->second);
717 }
718
719 return Error::success();
720}
721
722BenchmarkRunner::FunctionExecutor::~FunctionExecutor() {}
723
724} // namespace exegesis
725} // namespace llvm
726

source code of llvm/tools/llvm-exegesis/lib/BenchmarkRunner.cpp