1//===- Timing.cpp - Execution time measurement facilities -----------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// Facilities to measure and provide statistics on execution time.
10//
11//===----------------------------------------------------------------------===//
12
13#include "mlir/Support/Timing.h"
14#include "mlir/Support/ThreadLocalCache.h"
15#include "llvm/ADT/MapVector.h"
16#include "llvm/ADT/Statistic.h"
17#include "llvm/ADT/StringMap.h"
18#include "llvm/ADT/StringSet.h"
19#include "llvm/Support/Allocator.h"
20#include "llvm/Support/CommandLine.h"
21#include "llvm/Support/Format.h"
22#include "llvm/Support/FormatVariadic.h"
23#include "llvm/Support/ManagedStatic.h"
24#include "llvm/Support/RWMutex.h"
25#include "llvm/Support/Threading.h"
26#include "llvm/Support/raw_ostream.h"
27
28#include <atomic>
29#include <optional>
30#include <chrono>
31
32using namespace mlir;
33using namespace detail;
34using DisplayMode = DefaultTimingManager::DisplayMode;
35
36constexpr llvm::StringLiteral kTimingDescription =
37 "... Execution time report ...";
38
39//===----------------------------------------------------------------------===//
40// TimingManager
41//===----------------------------------------------------------------------===//
42
43namespace mlir {
44namespace detail {
45/// Private implementation details of the `TimingManager`.
46class TimingManagerImpl {
47public:
48 // Identifier allocator, map, and mutex for thread safety.
49 llvm::BumpPtrAllocator identifierAllocator;
50 llvm::StringSet<llvm::BumpPtrAllocator &> identifiers;
51 llvm::sys::SmartRWMutex<true> identifierMutex;
52
53 /// A thread local cache of identifiers to reduce lock contention.
54 ThreadLocalCache<llvm::StringMap<llvm::StringMapEntry<std::nullopt_t> *>>
55 localIdentifierCache;
56
57 TimingManagerImpl() : identifiers(identifierAllocator) {}
58};
59} // namespace detail
60} // namespace mlir
61
62TimingManager::TimingManager() : impl(std::make_unique<TimingManagerImpl>()) {}
63
64TimingManager::~TimingManager() = default;
65
66/// Get the root timer of this timing manager.
67Timer TimingManager::getRootTimer() {
68 auto rt = rootTimer();
69 return rt ? Timer(*this, *rt) : Timer();
70}
71
72/// Get the root timer of this timing manager wrapped in a `TimingScope`.
73TimingScope TimingManager::getRootScope() {
74 return TimingScope(getRootTimer());
75}
76
77//===----------------------------------------------------------------------===//
78// Identifier uniquing
79//===----------------------------------------------------------------------===//
80
81/// Return an identifier for the specified string.
82TimingIdentifier TimingIdentifier::get(StringRef str, TimingManager &tm) {
83 // Check for an existing instance in the local cache.
84 auto &impl = *tm.impl;
85 auto *&localEntry = (*impl.localIdentifierCache)[str];
86 if (localEntry)
87 return TimingIdentifier(localEntry);
88
89 // Check for an existing identifier in read-only mode.
90 {
91 llvm::sys::SmartScopedReader<true> contextLock(impl.identifierMutex);
92 auto it = impl.identifiers.find(Key: str);
93 if (it != impl.identifiers.end()) {
94 localEntry = &*it;
95 return TimingIdentifier(localEntry);
96 }
97 }
98
99 // Acquire a writer-lock so that we can safely create the new instance.
100 llvm::sys::SmartScopedWriter<true> contextLock(impl.identifierMutex);
101 auto it = impl.identifiers.insert(key: str).first;
102 localEntry = &*it;
103 return TimingIdentifier(localEntry);
104}
105
106//===----------------------------------------------------------------------===//
107// Helpers for time record printing
108//===----------------------------------------------------------------------===//
109
110namespace {
111
112/// Simple record class to record timing information.
113struct TimeRecord {
114 TimeRecord(double wall = 0.0, double user = 0.0) : wall(wall), user(user) {}
115
116 TimeRecord &operator+=(const TimeRecord &other) {
117 wall += other.wall;
118 user += other.user;
119 return *this;
120 }
121
122 TimeRecord &operator-=(const TimeRecord &other) {
123 wall -= other.wall;
124 user -= other.user;
125 return *this;
126 }
127
128 /// Print the current time record to 'os', with a breakdown showing
129 /// contributions to the give 'total' time record.
130 void print(raw_ostream &os, const TimeRecord &total) {
131 if (total.user != total.wall)
132 os << llvm::format(Fmt: " %8.4f (%5.1f%%)", Vals: user, Vals: 100.0 * user / total.user);
133 os << llvm::format(Fmt: " %8.4f (%5.1f%%) ", Vals: wall, Vals: 100.0 * wall / total.wall);
134 }
135
136 double wall, user;
137};
138
139} // namespace
140
141/// Utility to print a single line entry in the timer output.
142static void printTimeEntry(raw_ostream &os, unsigned indent, StringRef name,
143 TimeRecord time, TimeRecord total) {
144 time.print(os, total);
145 os.indent(NumSpaces: indent) << name << "\n";
146}
147
148/// Utility to print the timer heading information.
149static void printTimeHeader(raw_ostream &os, TimeRecord total) {
150 // Figure out how many spaces to description name.
151 unsigned padding = (80 - kTimingDescription.size()) / 2;
152 os << "===" << std::string(73, '-') << "===\n";
153 os.indent(NumSpaces: padding) << kTimingDescription << '\n';
154 os << "===" << std::string(73, '-') << "===\n";
155
156 // Print the total time followed by the section headers.
157 os << llvm::format(Fmt: " Total Execution Time: %.4f seconds\n\n", Vals: total.wall);
158 if (total.user != total.wall)
159 os << " ----User Time----";
160 os << " ----Wall Time---- ----Name----\n";
161}
162
163//===----------------------------------------------------------------------===//
164// Timer Implementation for DefaultTimingManager
165//===----------------------------------------------------------------------===//
166
167namespace {
168
169/// A timer used to sample execution time.
170///
171/// Separately tracks wall time and user time to account for parallel threads of
172/// execution. Timers are intended to be started and stopped multiple times.
173/// Each start and stop will add to the timer's wall and user time.
174class TimerImpl {
175public:
176 using ChildrenMap = llvm::MapVector<const void *, std::unique_ptr<TimerImpl>>;
177 using AsyncChildrenMap = llvm::DenseMap<uint64_t, ChildrenMap>;
178
179 TimerImpl(std::string &&name) : threadId(llvm::get_threadid()), name(name) {}
180
181 /// Start the timer.
182 void start() { startTime = std::chrono::steady_clock::now(); }
183
184 /// Stop the timer.
185 void stop() {
186 auto newTime = std::chrono::steady_clock::now() - startTime;
187 wallTime += newTime;
188 userTime += newTime;
189 }
190
191 /// Create a child timer nested within this one. Multiple calls to this
192 /// function with the same unique identifier `id` will return the same child
193 /// timer.
194 ///
195 /// This function can be called from other threads, as long as this timer
196 /// outlives any uses of the child timer on the other thread.
197 TimerImpl *nest(const void *id, function_ref<std::string()> nameBuilder) {
198 auto tid = llvm::get_threadid();
199 if (tid == threadId)
200 return nestTail(child&: children[id], nameBuilder);
201 std::unique_lock<std::mutex> lock(asyncMutex);
202 return nestTail(child&: asyncChildren[tid][id], nameBuilder);
203 }
204
205 /// Tail-called from `nest()`.
206 TimerImpl *nestTail(std::unique_ptr<TimerImpl> &child,
207 function_ref<std::string()> nameBuilder) {
208 if (!child)
209 child = std::make_unique<TimerImpl>(args: nameBuilder());
210 return child.get();
211 }
212
213 /// Finalize this timer and all its children.
214 ///
215 /// If this timer has async children, which happens if `nest()` was called
216 /// from another thread, this function merges the async childr timers into the
217 /// main list of child timers.
218 ///
219 /// Caution: Call this function only after all nested timers running on other
220 /// threads no longer need their timers!
221 void finalize() {
222 addAsyncUserTime();
223 mergeAsyncChildren();
224 }
225
226 /// Add the user time of all async children to this timer's user time. This is
227 /// necessary since the user time already contains all regular child timers,
228 /// but not the asynchronous ones (by the nesting nature of the timers).
229 std::chrono::nanoseconds addAsyncUserTime() {
230 auto added = std::chrono::nanoseconds(0);
231 for (auto &child : children)
232 added += child.second->addAsyncUserTime();
233 for (auto &thread : asyncChildren) {
234 for (auto &child : thread.second) {
235 child.second->addAsyncUserTime();
236 added += child.second->userTime;
237 }
238 }
239 userTime += added;
240 return added;
241 }
242
243 /// Ensure that this timer and recursively all its children have their async
244 /// children folded into the main map of children.
245 void mergeAsyncChildren() {
246 for (auto &child : children)
247 child.second->mergeAsyncChildren();
248 mergeChildren(other: std::move(asyncChildren));
249 assert(asyncChildren.empty());
250 }
251
252 /// Merge multiple child timers into this timer.
253 ///
254 /// Children in `other` are added as children to this timer, or, if this timer
255 /// already contains a child with the corresponding unique identifier, are
256 /// merged into the existing child.
257 void mergeChildren(ChildrenMap &&other) {
258 if (children.empty()) {
259 children = std::move(other);
260 for (auto &child : children)
261 child.second->mergeAsyncChildren();
262 } else {
263 for (auto &child : other)
264 mergeChild(id: child.first, other: std::move(child.second));
265 other.clear();
266 }
267 }
268
269 /// See above.
270 void mergeChildren(AsyncChildrenMap &&other) {
271 for (auto &thread : other) {
272 mergeChildren(other: std::move(thread.second));
273 assert(thread.second.empty());
274 }
275 other.clear();
276 }
277
278 /// Merge a child timer into this timer for a given unique identifier.
279 ///
280 /// Moves all child and async child timers of `other` into this timer's child
281 /// for the given unique identifier.
282 void mergeChild(const void *id, std::unique_ptr<TimerImpl> &&other) {
283 auto &into = children[id];
284 if (!into) {
285 into = std::move(other);
286 into->mergeAsyncChildren();
287 } else {
288 into->wallTime = std::max(a: into->wallTime, b: other->wallTime);
289 into->userTime += other->userTime;
290 into->mergeChildren(other: std::move(other->children));
291 into->mergeChildren(other: std::move(other->asyncChildren));
292 other.reset();
293 }
294 }
295
296 /// Dump a human-readable tree representation of the timer and its children.
297 /// This is useful for debugging the timing mechanisms and structure of the
298 /// timers.
299 void dump(raw_ostream &os, unsigned indent = 0, unsigned markThreadId = 0) {
300 auto time = getTimeRecord();
301 os << std::string(indent * 2, ' ') << name << " [" << threadId << "]"
302 << llvm::format(Fmt: " %7.4f / %7.4f", Vals: time.user, Vals: time.wall);
303 if (threadId != markThreadId && markThreadId != 0)
304 os << " (*)";
305 os << "\n";
306 for (auto &child : children)
307 child.second->dump(os, indent: indent + 1, markThreadId: threadId);
308 for (auto &thread : asyncChildren)
309 for (auto &child : thread.second)
310 child.second->dump(os, indent: indent + 1, markThreadId: threadId);
311 }
312
313 /// Returns the time for this timer in seconds.
314 TimeRecord getTimeRecord() {
315 return TimeRecord(
316 std::chrono::duration_cast<std::chrono::duration<double>>(d: wallTime)
317 .count(),
318 std::chrono::duration_cast<std::chrono::duration<double>>(d: userTime)
319 .count());
320 }
321
322 /// Print the timing result in list mode.
323 void printAsList(raw_ostream &os, TimeRecord total) {
324 // Flatten the leaf timers in the tree and merge them by name.
325 llvm::StringMap<TimeRecord> mergedTimers;
326 std::function<void(TimerImpl *)> addTimer = [&](TimerImpl *timer) {
327 mergedTimers[timer->name] += timer->getTimeRecord();
328 for (auto &children : timer->children)
329 addTimer(children.second.get());
330 };
331 addTimer(this);
332
333 // Sort the timing information by wall time.
334 std::vector<std::pair<StringRef, TimeRecord>> timerNameAndTime;
335 for (auto &it : mergedTimers)
336 timerNameAndTime.emplace_back(args: it.first(), args&: it.second);
337 llvm::array_pod_sort(Start: timerNameAndTime.begin(), End: timerNameAndTime.end(),
338 Compare: [](const std::pair<StringRef, TimeRecord> *lhs,
339 const std::pair<StringRef, TimeRecord> *rhs) {
340 return llvm::array_pod_sort_comparator<double>(
341 P1: &rhs->second.wall, P2: &lhs->second.wall);
342 });
343
344 // Print the timing information sequentially.
345 for (auto &timeData : timerNameAndTime)
346 printTimeEntry(os, indent: 0, name: timeData.first, time: timeData.second, total);
347 }
348
349 /// Print the timing result in tree mode.
350 void printAsTree(raw_ostream &os, TimeRecord total, unsigned indent = 0) {
351 unsigned childIndent = indent;
352 if (!hidden) {
353 printTimeEntry(os, indent, name, time: getTimeRecord(), total);
354 childIndent += 2;
355 }
356 for (auto &child : children) {
357 child.second->printAsTree(os, total, indent: childIndent);
358 }
359 }
360
361 /// Print the current timing information.
362 void print(raw_ostream &os, DisplayMode displayMode) {
363 // Print the banner.
364 auto total = getTimeRecord();
365 printTimeHeader(os, total);
366
367 // Defer to a specialized printer for each display mode.
368 switch (displayMode) {
369 case DisplayMode::List:
370 printAsList(os, total);
371 break;
372 case DisplayMode::Tree:
373 printAsTree(os, total);
374 break;
375 }
376
377 // Print the top-level time not accounted for by child timers, and the
378 // total.
379 auto rest = total;
380 for (auto &child : children)
381 rest -= child.second->getTimeRecord();
382 printTimeEntry(os, indent: 0, name: "Rest", time: rest, total);
383 printTimeEntry(os, indent: 0, name: "Total", time: total, total);
384 os.flush();
385 }
386
387 /// The last time instant at which the timer was started.
388 std::chrono::time_point<std::chrono::steady_clock> startTime;
389
390 /// Accumulated wall time. If multiple threads of execution are merged into
391 /// this timer, the wall time will hold the maximum wall time of each thread
392 /// of execution.
393 std::chrono::nanoseconds wallTime = std::chrono::nanoseconds(0);
394
395 /// Accumulated user time. If multiple threads of execution are merged into
396 /// this timer, each thread's user time is added here.
397 std::chrono::nanoseconds userTime = std::chrono::nanoseconds(0);
398
399 /// The thread on which this timer is running.
400 uint64_t threadId;
401
402 /// A descriptive name for this timer.
403 std::string name;
404
405 /// Whether to omit this timer from reports and directly show its children.
406 bool hidden = false;
407
408 /// Child timers on the same thread the timer itself. We keep at most one
409 /// timer per unique identifier.
410 ChildrenMap children;
411
412 /// Child timers on other threads. We keep at most one timer per unique
413 /// identifier.
414 AsyncChildrenMap asyncChildren;
415
416 /// Mutex for the async children.
417 std::mutex asyncMutex;
418};
419
420} // namespace
421
422//===----------------------------------------------------------------------===//
423// DefaultTimingManager
424//===----------------------------------------------------------------------===//
425
426namespace mlir {
427namespace detail {
428
429/// Implementation details of the `DefaultTimingManager`.
430class DefaultTimingManagerImpl {
431public:
432 /// Whether we should do our work or not.
433 bool enabled = false;
434
435 /// The configured display mode.
436 DisplayMode displayMode = DisplayMode::Tree;
437
438 /// The stream where we should print our output. This will always be non-null.
439 raw_ostream *output = &llvm::errs();
440
441 /// The root timer.
442 std::unique_ptr<TimerImpl> rootTimer;
443};
444
445} // namespace detail
446} // namespace mlir
447
448DefaultTimingManager::DefaultTimingManager()
449 : impl(std::make_unique<DefaultTimingManagerImpl>()) {
450 clear(); // initializes the root timer
451}
452
453DefaultTimingManager::~DefaultTimingManager() { print(); }
454
455/// Enable or disable execution time sampling.
456void DefaultTimingManager::setEnabled(bool enabled) { impl->enabled = enabled; }
457
458/// Return whether execution time sampling is enabled.
459bool DefaultTimingManager::isEnabled() const { return impl->enabled; }
460
461/// Change the display mode.
462void DefaultTimingManager::setDisplayMode(DisplayMode displayMode) {
463 impl->displayMode = displayMode;
464}
465
466/// Return the current display mode;
467DefaultTimingManager::DisplayMode DefaultTimingManager::getDisplayMode() const {
468 return impl->displayMode;
469}
470
471/// Change the stream where the output will be printed to.
472void DefaultTimingManager::setOutput(raw_ostream &os) { impl->output = &os; }
473
474/// Return the current output stream where the output will be printed to.
475raw_ostream &DefaultTimingManager::getOutput() const {
476 assert(impl->output);
477 return *impl->output;
478}
479
480/// Print and clear the timing results.
481void DefaultTimingManager::print() {
482 if (impl->enabled) {
483 impl->rootTimer->finalize();
484 impl->rootTimer->print(os&: *impl->output, displayMode: impl->displayMode);
485 }
486 clear();
487}
488
489/// Clear the timing results.
490void DefaultTimingManager::clear() {
491 impl->rootTimer = std::make_unique<TimerImpl>(args: "root");
492 impl->rootTimer->hidden = true;
493}
494
495/// Debug print the timer data structures to an output stream.
496void DefaultTimingManager::dumpTimers(raw_ostream &os) {
497 impl->rootTimer->dump(os);
498}
499
500/// Debug print the timers as a list.
501void DefaultTimingManager::dumpAsList(raw_ostream &os) {
502 impl->rootTimer->finalize();
503 impl->rootTimer->print(os, displayMode: DisplayMode::List);
504}
505
506/// Debug print the timers as a tree.
507void DefaultTimingManager::dumpAsTree(raw_ostream &os) {
508 impl->rootTimer->finalize();
509 impl->rootTimer->print(os, displayMode: DisplayMode::Tree);
510}
511
512std::optional<void *> DefaultTimingManager::rootTimer() {
513 if (impl->enabled)
514 return impl->rootTimer.get();
515 return std::nullopt;
516}
517
518void DefaultTimingManager::startTimer(void *handle) {
519 static_cast<TimerImpl *>(handle)->start();
520}
521
522void DefaultTimingManager::stopTimer(void *handle) {
523 static_cast<TimerImpl *>(handle)->stop();
524}
525
526void *DefaultTimingManager::nestTimer(void *handle, const void *id,
527 function_ref<std::string()> nameBuilder) {
528 return static_cast<TimerImpl *>(handle)->nest(id, nameBuilder);
529}
530
531void DefaultTimingManager::hideTimer(void *handle) {
532 static_cast<TimerImpl *>(handle)->hidden = true;
533}
534
535//===----------------------------------------------------------------------===//
536// DefaultTimingManager Command Line Options
537//===----------------------------------------------------------------------===//
538
539namespace {
540struct DefaultTimingManagerOptions {
541 llvm::cl::opt<bool> timing{"mlir-timing",
542 llvm::cl::desc("Display execution times"),
543 llvm::cl::init(Val: false)};
544 llvm::cl::opt<DisplayMode> displayMode{
545 "mlir-timing-display", llvm::cl::desc("Display method for timing data"),
546 llvm::cl::init(Val: DisplayMode::Tree),
547 llvm::cl::values(
548 clEnumValN(DisplayMode::List, "list",
549 "display the results in a list sorted by total time"),
550 clEnumValN(DisplayMode::Tree, "tree",
551 "display the results ina with a nested tree view"))};
552};
553} // namespace
554
555static llvm::ManagedStatic<DefaultTimingManagerOptions> options;
556
557void mlir::registerDefaultTimingManagerCLOptions() {
558 // Make sure that the options struct has been constructed.
559 *options;
560}
561
562void mlir::applyDefaultTimingManagerCLOptions(DefaultTimingManager &tm) {
563 if (!options.isConstructed())
564 return;
565 tm.setEnabled(options->timing);
566 tm.setDisplayMode(options->displayMode);
567}
568

source code of mlir/lib/Support/Timing.cpp