1 | //===- Timing.cpp - Execution time measurement facilities -----------------===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | // |
9 | // Facilities to measure and provide statistics on execution time. |
10 | // |
11 | //===----------------------------------------------------------------------===// |
12 | |
13 | #include "mlir/Support/Timing.h" |
14 | #include "mlir/Support/ThreadLocalCache.h" |
15 | #include "llvm/ADT/MapVector.h" |
16 | #include "llvm/ADT/Statistic.h" |
17 | #include "llvm/ADT/StringMap.h" |
18 | #include "llvm/ADT/StringSet.h" |
19 | #include "llvm/Support/Allocator.h" |
20 | #include "llvm/Support/CommandLine.h" |
21 | #include "llvm/Support/Format.h" |
22 | #include "llvm/Support/FormatVariadic.h" |
23 | #include "llvm/Support/ManagedStatic.h" |
24 | #include "llvm/Support/RWMutex.h" |
25 | #include "llvm/Support/Threading.h" |
26 | #include "llvm/Support/raw_ostream.h" |
27 | |
28 | #include <atomic> |
29 | #include <optional> |
30 | #include <chrono> |
31 | |
32 | using namespace mlir; |
33 | using namespace detail; |
34 | using DisplayMode = DefaultTimingManager::DisplayMode; |
35 | |
36 | constexpr llvm::StringLiteral kTimingDescription = |
37 | "... Execution time report ..." ; |
38 | |
39 | //===----------------------------------------------------------------------===// |
40 | // TimingManager |
41 | //===----------------------------------------------------------------------===// |
42 | |
43 | namespace mlir { |
44 | namespace detail { |
45 | /// Private implementation details of the `TimingManager`. |
46 | class TimingManagerImpl { |
47 | public: |
48 | // Identifier allocator, map, and mutex for thread safety. |
49 | llvm::BumpPtrAllocator identifierAllocator; |
50 | llvm::StringSet<llvm::BumpPtrAllocator &> identifiers; |
51 | llvm::sys::SmartRWMutex<true> identifierMutex; |
52 | |
53 | /// A thread local cache of identifiers to reduce lock contention. |
54 | ThreadLocalCache<llvm::StringMap<llvm::StringMapEntry<std::nullopt_t> *>> |
55 | localIdentifierCache; |
56 | |
57 | TimingManagerImpl() : identifiers(identifierAllocator) {} |
58 | }; |
59 | } // namespace detail |
60 | } // namespace mlir |
61 | |
62 | TimingManager::TimingManager() : impl(std::make_unique<TimingManagerImpl>()) {} |
63 | |
64 | TimingManager::~TimingManager() = default; |
65 | |
66 | /// Get the root timer of this timing manager. |
67 | Timer TimingManager::getRootTimer() { |
68 | auto rt = rootTimer(); |
69 | return rt ? Timer(*this, *rt) : Timer(); |
70 | } |
71 | |
72 | /// Get the root timer of this timing manager wrapped in a `TimingScope`. |
73 | TimingScope TimingManager::getRootScope() { |
74 | return TimingScope(getRootTimer()); |
75 | } |
76 | |
77 | //===----------------------------------------------------------------------===// |
78 | // Identifier uniquing |
79 | //===----------------------------------------------------------------------===// |
80 | |
81 | /// Return an identifier for the specified string. |
82 | TimingIdentifier TimingIdentifier::get(StringRef str, TimingManager &tm) { |
83 | // Check for an existing instance in the local cache. |
84 | auto &impl = *tm.impl; |
85 | auto *&localEntry = (*impl.localIdentifierCache)[str]; |
86 | if (localEntry) |
87 | return TimingIdentifier(localEntry); |
88 | |
89 | // Check for an existing identifier in read-only mode. |
90 | { |
91 | llvm::sys::SmartScopedReader<true> contextLock(impl.identifierMutex); |
92 | auto it = impl.identifiers.find(Key: str); |
93 | if (it != impl.identifiers.end()) { |
94 | localEntry = &*it; |
95 | return TimingIdentifier(localEntry); |
96 | } |
97 | } |
98 | |
99 | // Acquire a writer-lock so that we can safely create the new instance. |
100 | llvm::sys::SmartScopedWriter<true> contextLock(impl.identifierMutex); |
101 | auto it = impl.identifiers.insert(key: str).first; |
102 | localEntry = &*it; |
103 | return TimingIdentifier(localEntry); |
104 | } |
105 | |
106 | //===----------------------------------------------------------------------===// |
107 | // Helpers for time record printing |
108 | //===----------------------------------------------------------------------===// |
109 | |
110 | namespace { |
111 | |
112 | /// Simple record class to record timing information. |
113 | struct TimeRecord { |
114 | TimeRecord(double wall = 0.0, double user = 0.0) : wall(wall), user(user) {} |
115 | |
116 | TimeRecord &operator+=(const TimeRecord &other) { |
117 | wall += other.wall; |
118 | user += other.user; |
119 | return *this; |
120 | } |
121 | |
122 | TimeRecord &operator-=(const TimeRecord &other) { |
123 | wall -= other.wall; |
124 | user -= other.user; |
125 | return *this; |
126 | } |
127 | |
128 | /// Print the current time record to 'os', with a breakdown showing |
129 | /// contributions to the give 'total' time record. |
130 | void print(raw_ostream &os, const TimeRecord &total) { |
131 | if (total.user != total.wall) |
132 | os << llvm::format(Fmt: " %8.4f (%5.1f%%)" , Vals: user, Vals: 100.0 * user / total.user); |
133 | os << llvm::format(Fmt: " %8.4f (%5.1f%%) " , Vals: wall, Vals: 100.0 * wall / total.wall); |
134 | } |
135 | |
136 | double wall, user; |
137 | }; |
138 | |
139 | } // namespace |
140 | |
141 | /// Utility to print a single line entry in the timer output. |
142 | static void printTimeEntry(raw_ostream &os, unsigned indent, StringRef name, |
143 | TimeRecord time, TimeRecord total) { |
144 | time.print(os, total); |
145 | os.indent(NumSpaces: indent) << name << "\n" ; |
146 | } |
147 | |
148 | /// Utility to print the timer heading information. |
149 | static void (raw_ostream &os, TimeRecord total) { |
150 | // Figure out how many spaces to description name. |
151 | unsigned padding = (80 - kTimingDescription.size()) / 2; |
152 | os << "===" << std::string(73, '-') << "===\n" ; |
153 | os.indent(NumSpaces: padding) << kTimingDescription << '\n'; |
154 | os << "===" << std::string(73, '-') << "===\n" ; |
155 | |
156 | // Print the total time followed by the section headers. |
157 | os << llvm::format(Fmt: " Total Execution Time: %.4f seconds\n\n" , Vals: total.wall); |
158 | if (total.user != total.wall) |
159 | os << " ----User Time----" ; |
160 | os << " ----Wall Time---- ----Name----\n" ; |
161 | } |
162 | |
163 | //===----------------------------------------------------------------------===// |
164 | // Timer Implementation for DefaultTimingManager |
165 | //===----------------------------------------------------------------------===// |
166 | |
167 | namespace { |
168 | |
169 | /// A timer used to sample execution time. |
170 | /// |
171 | /// Separately tracks wall time and user time to account for parallel threads of |
172 | /// execution. Timers are intended to be started and stopped multiple times. |
173 | /// Each start and stop will add to the timer's wall and user time. |
174 | class TimerImpl { |
175 | public: |
176 | using ChildrenMap = llvm::MapVector<const void *, std::unique_ptr<TimerImpl>>; |
177 | using AsyncChildrenMap = llvm::DenseMap<uint64_t, ChildrenMap>; |
178 | |
179 | TimerImpl(std::string &&name) : threadId(llvm::get_threadid()), name(name) {} |
180 | |
181 | /// Start the timer. |
182 | void start() { startTime = std::chrono::steady_clock::now(); } |
183 | |
184 | /// Stop the timer. |
185 | void stop() { |
186 | auto newTime = std::chrono::steady_clock::now() - startTime; |
187 | wallTime += newTime; |
188 | userTime += newTime; |
189 | } |
190 | |
191 | /// Create a child timer nested within this one. Multiple calls to this |
192 | /// function with the same unique identifier `id` will return the same child |
193 | /// timer. |
194 | /// |
195 | /// This function can be called from other threads, as long as this timer |
196 | /// outlives any uses of the child timer on the other thread. |
197 | TimerImpl *nest(const void *id, function_ref<std::string()> nameBuilder) { |
198 | auto tid = llvm::get_threadid(); |
199 | if (tid == threadId) |
200 | return nestTail(child&: children[id], nameBuilder); |
201 | std::unique_lock<std::mutex> lock(asyncMutex); |
202 | return nestTail(child&: asyncChildren[tid][id], nameBuilder); |
203 | } |
204 | |
205 | /// Tail-called from `nest()`. |
206 | TimerImpl *nestTail(std::unique_ptr<TimerImpl> &child, |
207 | function_ref<std::string()> nameBuilder) { |
208 | if (!child) |
209 | child = std::make_unique<TimerImpl>(args: nameBuilder()); |
210 | return child.get(); |
211 | } |
212 | |
213 | /// Finalize this timer and all its children. |
214 | /// |
215 | /// If this timer has async children, which happens if `nest()` was called |
216 | /// from another thread, this function merges the async childr timers into the |
217 | /// main list of child timers. |
218 | /// |
219 | /// Caution: Call this function only after all nested timers running on other |
220 | /// threads no longer need their timers! |
221 | void finalize() { |
222 | addAsyncUserTime(); |
223 | mergeAsyncChildren(); |
224 | } |
225 | |
226 | /// Add the user time of all async children to this timer's user time. This is |
227 | /// necessary since the user time already contains all regular child timers, |
228 | /// but not the asynchronous ones (by the nesting nature of the timers). |
229 | std::chrono::nanoseconds addAsyncUserTime() { |
230 | auto added = std::chrono::nanoseconds(0); |
231 | for (auto &child : children) |
232 | added += child.second->addAsyncUserTime(); |
233 | for (auto &thread : asyncChildren) { |
234 | for (auto &child : thread.second) { |
235 | child.second->addAsyncUserTime(); |
236 | added += child.second->userTime; |
237 | } |
238 | } |
239 | userTime += added; |
240 | return added; |
241 | } |
242 | |
243 | /// Ensure that this timer and recursively all its children have their async |
244 | /// children folded into the main map of children. |
245 | void mergeAsyncChildren() { |
246 | for (auto &child : children) |
247 | child.second->mergeAsyncChildren(); |
248 | mergeChildren(other: std::move(asyncChildren)); |
249 | assert(asyncChildren.empty()); |
250 | } |
251 | |
252 | /// Merge multiple child timers into this timer. |
253 | /// |
254 | /// Children in `other` are added as children to this timer, or, if this timer |
255 | /// already contains a child with the corresponding unique identifier, are |
256 | /// merged into the existing child. |
257 | void mergeChildren(ChildrenMap &&other) { |
258 | if (children.empty()) { |
259 | children = std::move(other); |
260 | for (auto &child : children) |
261 | child.second->mergeAsyncChildren(); |
262 | } else { |
263 | for (auto &child : other) |
264 | mergeChild(id: child.first, other: std::move(child.second)); |
265 | other.clear(); |
266 | } |
267 | } |
268 | |
269 | /// See above. |
270 | void mergeChildren(AsyncChildrenMap &&other) { |
271 | for (auto &thread : other) { |
272 | mergeChildren(other: std::move(thread.second)); |
273 | assert(thread.second.empty()); |
274 | } |
275 | other.clear(); |
276 | } |
277 | |
278 | /// Merge a child timer into this timer for a given unique identifier. |
279 | /// |
280 | /// Moves all child and async child timers of `other` into this timer's child |
281 | /// for the given unique identifier. |
282 | void mergeChild(const void *id, std::unique_ptr<TimerImpl> &&other) { |
283 | auto &into = children[id]; |
284 | if (!into) { |
285 | into = std::move(other); |
286 | into->mergeAsyncChildren(); |
287 | } else { |
288 | into->wallTime = std::max(a: into->wallTime, b: other->wallTime); |
289 | into->userTime += other->userTime; |
290 | into->mergeChildren(other: std::move(other->children)); |
291 | into->mergeChildren(other: std::move(other->asyncChildren)); |
292 | other.reset(); |
293 | } |
294 | } |
295 | |
296 | /// Dump a human-readable tree representation of the timer and its children. |
297 | /// This is useful for debugging the timing mechanisms and structure of the |
298 | /// timers. |
299 | void dump(raw_ostream &os, unsigned indent = 0, unsigned markThreadId = 0) { |
300 | auto time = getTimeRecord(); |
301 | os << std::string(indent * 2, ' ') << name << " [" << threadId << "]" |
302 | << llvm::format(Fmt: " %7.4f / %7.4f" , Vals: time.user, Vals: time.wall); |
303 | if (threadId != markThreadId && markThreadId != 0) |
304 | os << " (*)" ; |
305 | os << "\n" ; |
306 | for (auto &child : children) |
307 | child.second->dump(os, indent: indent + 1, markThreadId: threadId); |
308 | for (auto &thread : asyncChildren) |
309 | for (auto &child : thread.second) |
310 | child.second->dump(os, indent: indent + 1, markThreadId: threadId); |
311 | } |
312 | |
313 | /// Returns the time for this timer in seconds. |
314 | TimeRecord getTimeRecord() { |
315 | return TimeRecord( |
316 | std::chrono::duration_cast<std::chrono::duration<double>>(d: wallTime) |
317 | .count(), |
318 | std::chrono::duration_cast<std::chrono::duration<double>>(d: userTime) |
319 | .count()); |
320 | } |
321 | |
322 | /// Print the timing result in list mode. |
323 | void printAsList(raw_ostream &os, TimeRecord total) { |
324 | // Flatten the leaf timers in the tree and merge them by name. |
325 | llvm::StringMap<TimeRecord> mergedTimers; |
326 | std::function<void(TimerImpl *)> addTimer = [&](TimerImpl *timer) { |
327 | mergedTimers[timer->name] += timer->getTimeRecord(); |
328 | for (auto &children : timer->children) |
329 | addTimer(children.second.get()); |
330 | }; |
331 | addTimer(this); |
332 | |
333 | // Sort the timing information by wall time. |
334 | std::vector<std::pair<StringRef, TimeRecord>> timerNameAndTime; |
335 | for (auto &it : mergedTimers) |
336 | timerNameAndTime.emplace_back(args: it.first(), args&: it.second); |
337 | llvm::array_pod_sort(Start: timerNameAndTime.begin(), End: timerNameAndTime.end(), |
338 | Compare: [](const std::pair<StringRef, TimeRecord> *lhs, |
339 | const std::pair<StringRef, TimeRecord> *rhs) { |
340 | return llvm::array_pod_sort_comparator<double>( |
341 | P1: &rhs->second.wall, P2: &lhs->second.wall); |
342 | }); |
343 | |
344 | // Print the timing information sequentially. |
345 | for (auto &timeData : timerNameAndTime) |
346 | printTimeEntry(os, indent: 0, name: timeData.first, time: timeData.second, total); |
347 | } |
348 | |
349 | /// Print the timing result in tree mode. |
350 | void printAsTree(raw_ostream &os, TimeRecord total, unsigned indent = 0) { |
351 | unsigned childIndent = indent; |
352 | if (!hidden) { |
353 | printTimeEntry(os, indent, name, time: getTimeRecord(), total); |
354 | childIndent += 2; |
355 | } |
356 | for (auto &child : children) { |
357 | child.second->printAsTree(os, total, indent: childIndent); |
358 | } |
359 | } |
360 | |
361 | /// Print the current timing information. |
362 | void print(raw_ostream &os, DisplayMode displayMode) { |
363 | // Print the banner. |
364 | auto total = getTimeRecord(); |
365 | printTimeHeader(os, total); |
366 | |
367 | // Defer to a specialized printer for each display mode. |
368 | switch (displayMode) { |
369 | case DisplayMode::List: |
370 | printAsList(os, total); |
371 | break; |
372 | case DisplayMode::Tree: |
373 | printAsTree(os, total); |
374 | break; |
375 | } |
376 | |
377 | // Print the top-level time not accounted for by child timers, and the |
378 | // total. |
379 | auto rest = total; |
380 | for (auto &child : children) |
381 | rest -= child.second->getTimeRecord(); |
382 | printTimeEntry(os, indent: 0, name: "Rest" , time: rest, total); |
383 | printTimeEntry(os, indent: 0, name: "Total" , time: total, total); |
384 | os.flush(); |
385 | } |
386 | |
387 | /// The last time instant at which the timer was started. |
388 | std::chrono::time_point<std::chrono::steady_clock> startTime; |
389 | |
390 | /// Accumulated wall time. If multiple threads of execution are merged into |
391 | /// this timer, the wall time will hold the maximum wall time of each thread |
392 | /// of execution. |
393 | std::chrono::nanoseconds wallTime = std::chrono::nanoseconds(0); |
394 | |
395 | /// Accumulated user time. If multiple threads of execution are merged into |
396 | /// this timer, each thread's user time is added here. |
397 | std::chrono::nanoseconds userTime = std::chrono::nanoseconds(0); |
398 | |
399 | /// The thread on which this timer is running. |
400 | uint64_t threadId; |
401 | |
402 | /// A descriptive name for this timer. |
403 | std::string name; |
404 | |
405 | /// Whether to omit this timer from reports and directly show its children. |
406 | bool hidden = false; |
407 | |
408 | /// Child timers on the same thread the timer itself. We keep at most one |
409 | /// timer per unique identifier. |
410 | ChildrenMap children; |
411 | |
412 | /// Child timers on other threads. We keep at most one timer per unique |
413 | /// identifier. |
414 | AsyncChildrenMap asyncChildren; |
415 | |
416 | /// Mutex for the async children. |
417 | std::mutex asyncMutex; |
418 | }; |
419 | |
420 | } // namespace |
421 | |
422 | //===----------------------------------------------------------------------===// |
423 | // DefaultTimingManager |
424 | //===----------------------------------------------------------------------===// |
425 | |
426 | namespace mlir { |
427 | namespace detail { |
428 | |
429 | /// Implementation details of the `DefaultTimingManager`. |
430 | class DefaultTimingManagerImpl { |
431 | public: |
432 | /// Whether we should do our work or not. |
433 | bool enabled = false; |
434 | |
435 | /// The configured display mode. |
436 | DisplayMode displayMode = DisplayMode::Tree; |
437 | |
438 | /// The stream where we should print our output. This will always be non-null. |
439 | raw_ostream *output = &llvm::errs(); |
440 | |
441 | /// The root timer. |
442 | std::unique_ptr<TimerImpl> rootTimer; |
443 | }; |
444 | |
445 | } // namespace detail |
446 | } // namespace mlir |
447 | |
448 | DefaultTimingManager::DefaultTimingManager() |
449 | : impl(std::make_unique<DefaultTimingManagerImpl>()) { |
450 | clear(); // initializes the root timer |
451 | } |
452 | |
453 | DefaultTimingManager::~DefaultTimingManager() { print(); } |
454 | |
455 | /// Enable or disable execution time sampling. |
456 | void DefaultTimingManager::setEnabled(bool enabled) { impl->enabled = enabled; } |
457 | |
458 | /// Return whether execution time sampling is enabled. |
459 | bool DefaultTimingManager::isEnabled() const { return impl->enabled; } |
460 | |
461 | /// Change the display mode. |
462 | void DefaultTimingManager::setDisplayMode(DisplayMode displayMode) { |
463 | impl->displayMode = displayMode; |
464 | } |
465 | |
466 | /// Return the current display mode; |
467 | DefaultTimingManager::DisplayMode DefaultTimingManager::getDisplayMode() const { |
468 | return impl->displayMode; |
469 | } |
470 | |
471 | /// Change the stream where the output will be printed to. |
472 | void DefaultTimingManager::setOutput(raw_ostream &os) { impl->output = &os; } |
473 | |
474 | /// Return the current output stream where the output will be printed to. |
475 | raw_ostream &DefaultTimingManager::getOutput() const { |
476 | assert(impl->output); |
477 | return *impl->output; |
478 | } |
479 | |
480 | /// Print and clear the timing results. |
481 | void DefaultTimingManager::print() { |
482 | if (impl->enabled) { |
483 | impl->rootTimer->finalize(); |
484 | impl->rootTimer->print(os&: *impl->output, displayMode: impl->displayMode); |
485 | } |
486 | clear(); |
487 | } |
488 | |
489 | /// Clear the timing results. |
490 | void DefaultTimingManager::clear() { |
491 | impl->rootTimer = std::make_unique<TimerImpl>(args: "root" ); |
492 | impl->rootTimer->hidden = true; |
493 | } |
494 | |
495 | /// Debug print the timer data structures to an output stream. |
496 | void DefaultTimingManager::dumpTimers(raw_ostream &os) { |
497 | impl->rootTimer->dump(os); |
498 | } |
499 | |
500 | /// Debug print the timers as a list. |
501 | void DefaultTimingManager::dumpAsList(raw_ostream &os) { |
502 | impl->rootTimer->finalize(); |
503 | impl->rootTimer->print(os, displayMode: DisplayMode::List); |
504 | } |
505 | |
506 | /// Debug print the timers as a tree. |
507 | void DefaultTimingManager::dumpAsTree(raw_ostream &os) { |
508 | impl->rootTimer->finalize(); |
509 | impl->rootTimer->print(os, displayMode: DisplayMode::Tree); |
510 | } |
511 | |
512 | std::optional<void *> DefaultTimingManager::rootTimer() { |
513 | if (impl->enabled) |
514 | return impl->rootTimer.get(); |
515 | return std::nullopt; |
516 | } |
517 | |
518 | void DefaultTimingManager::startTimer(void *handle) { |
519 | static_cast<TimerImpl *>(handle)->start(); |
520 | } |
521 | |
522 | void DefaultTimingManager::stopTimer(void *handle) { |
523 | static_cast<TimerImpl *>(handle)->stop(); |
524 | } |
525 | |
526 | void *DefaultTimingManager::nestTimer(void *handle, const void *id, |
527 | function_ref<std::string()> nameBuilder) { |
528 | return static_cast<TimerImpl *>(handle)->nest(id, nameBuilder); |
529 | } |
530 | |
531 | void DefaultTimingManager::hideTimer(void *handle) { |
532 | static_cast<TimerImpl *>(handle)->hidden = true; |
533 | } |
534 | |
535 | //===----------------------------------------------------------------------===// |
536 | // DefaultTimingManager Command Line Options |
537 | //===----------------------------------------------------------------------===// |
538 | |
539 | namespace { |
540 | struct DefaultTimingManagerOptions { |
541 | llvm::cl::opt<bool> timing{"mlir-timing" , |
542 | llvm::cl::desc("Display execution times" ), |
543 | llvm::cl::init(Val: false)}; |
544 | llvm::cl::opt<DisplayMode> displayMode{ |
545 | "mlir-timing-display" , llvm::cl::desc("Display method for timing data" ), |
546 | llvm::cl::init(Val: DisplayMode::Tree), |
547 | llvm::cl::values( |
548 | clEnumValN(DisplayMode::List, "list" , |
549 | "display the results in a list sorted by total time" ), |
550 | clEnumValN(DisplayMode::Tree, "tree" , |
551 | "display the results ina with a nested tree view" ))}; |
552 | }; |
553 | } // namespace |
554 | |
555 | static llvm::ManagedStatic<DefaultTimingManagerOptions> options; |
556 | |
557 | void mlir::registerDefaultTimingManagerCLOptions() { |
558 | // Make sure that the options struct has been constructed. |
559 | *options; |
560 | } |
561 | |
562 | void mlir::applyDefaultTimingManagerCLOptions(DefaultTimingManager &tm) { |
563 | if (!options.isConstructed()) |
564 | return; |
565 | tm.setEnabled(options->timing); |
566 | tm.setDisplayMode(options->displayMode); |
567 | } |
568 | |