| 1 | //===- bolt/Core/ParallelUtilities.cpp - Parallel utilities ---------------===// |
| 2 | // |
| 3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
| 4 | // See https://llvm.org/LICENSE.txt for license information. |
| 5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
| 6 | // |
| 7 | //===----------------------------------------------------------------------===// |
| 8 | // |
| 9 | // Implementation of the class that manages parallel work on BinaryFunctions. |
| 10 | // |
| 11 | //===----------------------------------------------------------------------===// |
| 12 | |
| 13 | #include "bolt/Core/ParallelUtilities.h" |
| 14 | #include "bolt/Core/BinaryContext.h" |
| 15 | #include "bolt/Core/BinaryFunction.h" |
| 16 | #include "llvm/Support/RWMutex.h" |
| 17 | #include "llvm/Support/ThreadPool.h" |
| 18 | #include "llvm/Support/Timer.h" |
| 19 | #include <mutex> |
| 20 | |
| 21 | #define DEBUG_TYPE "par-utils" |
| 22 | |
| 23 | namespace opts { |
| 24 | extern cl::OptionCategory BoltCategory; |
| 25 | |
| 26 | cl::opt<unsigned> |
| 27 | ThreadCount("thread-count" , |
| 28 | cl::desc("number of threads" ), |
| 29 | cl::init(Val: hardware_concurrency().compute_thread_count()), |
| 30 | cl::cat(BoltCategory)); |
| 31 | |
| 32 | cl::opt<bool> |
| 33 | NoThreads("no-threads" , |
| 34 | cl::desc("disable multithreading" ), |
| 35 | cl::init(Val: false), |
| 36 | cl::cat(BoltCategory)); |
| 37 | |
| 38 | cl::opt<unsigned> |
| 39 | TaskCount("tasks-per-thread" , |
| 40 | cl::desc("number of tasks to be created per thread" ), |
| 41 | cl::init(Val: 20), |
| 42 | cl::cat(BoltCategory)); |
| 43 | |
| 44 | } // namespace opts |
| 45 | |
| 46 | namespace llvm { |
| 47 | namespace bolt { |
| 48 | namespace ParallelUtilities { |
| 49 | |
| 50 | namespace { |
| 51 | /// A single thread pool that is used to run parallel tasks |
| 52 | std::unique_ptr<ThreadPoolInterface> ThreadPoolPtr; |
| 53 | |
| 54 | unsigned computeCostFor(const BinaryFunction &BF, |
| 55 | const PredicateTy &SkipPredicate, |
| 56 | const SchedulingPolicy &SchedPolicy) { |
| 57 | if (SchedPolicy == SchedulingPolicy::SP_TRIVIAL) |
| 58 | return 1; |
| 59 | |
| 60 | if (SkipPredicate && SkipPredicate(BF)) |
| 61 | return 0; |
| 62 | |
| 63 | switch (SchedPolicy) { |
| 64 | case SchedulingPolicy::SP_CONSTANT: |
| 65 | return 1; |
| 66 | case SchedulingPolicy::SP_INST_LINEAR: |
| 67 | return BF.getSize(); |
| 68 | case SchedulingPolicy::SP_INST_QUADRATIC: |
| 69 | return BF.getSize() * BF.getSize(); |
| 70 | case SchedulingPolicy::SP_BB_LINEAR: |
| 71 | return BF.size(); |
| 72 | case SchedulingPolicy::SP_BB_QUADRATIC: |
| 73 | return BF.size() * BF.size(); |
| 74 | default: |
| 75 | llvm_unreachable("unsupported scheduling policy" ); |
| 76 | } |
| 77 | } |
| 78 | |
| 79 | inline unsigned estimateTotalCost(const BinaryContext &BC, |
| 80 | const PredicateTy &SkipPredicate, |
| 81 | SchedulingPolicy &SchedPolicy) { |
| 82 | if (SchedPolicy == SchedulingPolicy::SP_TRIVIAL) |
| 83 | return BC.getBinaryFunctions().size(); |
| 84 | |
| 85 | unsigned TotalCost = 0; |
| 86 | for (auto &BFI : BC.getBinaryFunctions()) { |
| 87 | const BinaryFunction &BF = BFI.second; |
| 88 | TotalCost += computeCostFor(BF, SkipPredicate, SchedPolicy); |
| 89 | } |
| 90 | |
| 91 | // Switch to trivial scheduling if total estimated work is zero |
| 92 | if (TotalCost == 0) { |
| 93 | BC.outs() |
| 94 | << "BOLT-WARNING: Running parallel work of 0 estimated cost, will " |
| 95 | "switch to trivial scheduling.\n" ; |
| 96 | |
| 97 | SchedPolicy = SP_TRIVIAL; |
| 98 | TotalCost = BC.getBinaryFunctions().size(); |
| 99 | } |
| 100 | return TotalCost; |
| 101 | } |
| 102 | |
| 103 | } // namespace |
| 104 | |
| 105 | ThreadPoolInterface &getThreadPool(const unsigned ThreadsCount) { |
| 106 | if (ThreadPoolPtr) |
| 107 | return *ThreadPoolPtr; |
| 108 | |
| 109 | if (ThreadsCount > 1) |
| 110 | ThreadPoolPtr = std::make_unique<DefaultThreadPool>( |
| 111 | args: llvm::hardware_concurrency(ThreadCount: ThreadsCount)); |
| 112 | else |
| 113 | ThreadPoolPtr = std::make_unique<SingleThreadExecutor>(); |
| 114 | return *ThreadPoolPtr; |
| 115 | } |
| 116 | |
| 117 | void runOnEachFunction(BinaryContext &BC, SchedulingPolicy SchedPolicy, |
| 118 | WorkFuncTy WorkFunction, PredicateTy SkipPredicate, |
| 119 | std::string LogName, bool ForceSequential, |
| 120 | unsigned TasksPerThread) { |
| 121 | if (BC.getBinaryFunctions().size() == 0) |
| 122 | return; |
| 123 | |
| 124 | auto runBlock = [&](std::map<uint64_t, BinaryFunction>::iterator BlockBegin, |
| 125 | std::map<uint64_t, BinaryFunction>::iterator BlockEnd) { |
| 126 | Timer T(LogName, LogName); |
| 127 | LLVM_DEBUG(T.startTimer()); |
| 128 | |
| 129 | for (auto It = BlockBegin; It != BlockEnd; ++It) { |
| 130 | BinaryFunction &BF = It->second; |
| 131 | if (SkipPredicate && SkipPredicate(BF)) |
| 132 | continue; |
| 133 | |
| 134 | WorkFunction(BF); |
| 135 | } |
| 136 | LLVM_DEBUG(T.stopTimer()); |
| 137 | }; |
| 138 | |
| 139 | if (opts::NoThreads || ForceSequential) { |
| 140 | runBlock(BC.getBinaryFunctions().begin(), BC.getBinaryFunctions().end()); |
| 141 | return; |
| 142 | } |
| 143 | |
| 144 | // Estimate the overall runtime cost using the scheduling policy |
| 145 | const unsigned TotalCost = estimateTotalCost(BC, SkipPredicate, SchedPolicy); |
| 146 | const unsigned BlocksCount = TasksPerThread * opts::ThreadCount; |
| 147 | const unsigned BlockCost = |
| 148 | TotalCost > BlocksCount ? TotalCost / BlocksCount : 1; |
| 149 | |
| 150 | // Divide work into blocks of equal cost |
| 151 | ThreadPoolInterface &Pool = getThreadPool(); |
| 152 | auto BlockBegin = BC.getBinaryFunctions().begin(); |
| 153 | unsigned CurrentCost = 0; |
| 154 | |
| 155 | for (auto It = BC.getBinaryFunctions().begin(); |
| 156 | It != BC.getBinaryFunctions().end(); ++It) { |
| 157 | BinaryFunction &BF = It->second; |
| 158 | CurrentCost += computeCostFor(BF, SkipPredicate, SchedPolicy); |
| 159 | |
| 160 | if (CurrentCost >= BlockCost) { |
| 161 | Pool.async(F&: runBlock, ArgList&: BlockBegin, ArgList: std::next(x: It)); |
| 162 | BlockBegin = std::next(x: It); |
| 163 | CurrentCost = 0; |
| 164 | } |
| 165 | } |
| 166 | Pool.async(F&: runBlock, ArgList&: BlockBegin, ArgList: BC.getBinaryFunctions().end()); |
| 167 | Pool.wait(); |
| 168 | } |
| 169 | |
| 170 | void runOnEachFunctionWithUniqueAllocId( |
| 171 | BinaryContext &BC, SchedulingPolicy SchedPolicy, |
| 172 | WorkFuncWithAllocTy WorkFunction, PredicateTy SkipPredicate, |
| 173 | std::string LogName, bool ForceSequential, unsigned TasksPerThread) { |
| 174 | if (BC.getBinaryFunctions().size() == 0) |
| 175 | return; |
| 176 | |
| 177 | llvm::sys::RWMutex MainLock; |
| 178 | auto runBlock = [&](std::map<uint64_t, BinaryFunction>::iterator BlockBegin, |
| 179 | std::map<uint64_t, BinaryFunction>::iterator BlockEnd, |
| 180 | MCPlusBuilder::AllocatorIdTy AllocId) { |
| 181 | Timer T(LogName, LogName); |
| 182 | LLVM_DEBUG(T.startTimer()); |
| 183 | std::shared_lock<llvm::sys::RWMutex> Lock(MainLock); |
| 184 | for (auto It = BlockBegin; It != BlockEnd; ++It) { |
| 185 | BinaryFunction &BF = It->second; |
| 186 | if (SkipPredicate && SkipPredicate(BF)) |
| 187 | continue; |
| 188 | |
| 189 | WorkFunction(BF, AllocId); |
| 190 | } |
| 191 | LLVM_DEBUG(T.stopTimer()); |
| 192 | }; |
| 193 | |
| 194 | unsigned AllocId = 1; |
| 195 | auto EnsureAllocatorExists = [&BC](unsigned AllocId) { |
| 196 | if (!BC.MIB->checkAllocatorExists(AllocatorId: AllocId)) { |
| 197 | MCPlusBuilder::AllocatorIdTy Id = |
| 198 | BC.MIB->initializeNewAnnotationAllocator(); |
| 199 | (void)Id; |
| 200 | assert(AllocId == Id && "unexpected allocator id created" ); |
| 201 | } |
| 202 | }; |
| 203 | |
| 204 | if (opts::NoThreads || ForceSequential) { |
| 205 | EnsureAllocatorExists(AllocId); |
| 206 | runBlock(BC.getBinaryFunctions().begin(), BC.getBinaryFunctions().end(), |
| 207 | AllocId); |
| 208 | return; |
| 209 | } |
| 210 | // This lock is used to postpone task execution |
| 211 | std::unique_lock<llvm::sys::RWMutex> Lock(MainLock); |
| 212 | |
| 213 | // Estimate the overall runtime cost using the scheduling policy |
| 214 | const unsigned TotalCost = estimateTotalCost(BC, SkipPredicate, SchedPolicy); |
| 215 | const unsigned BlocksCount = TasksPerThread * opts::ThreadCount; |
| 216 | const unsigned BlockCost = |
| 217 | TotalCost > BlocksCount ? TotalCost / BlocksCount : 1; |
| 218 | |
| 219 | // Divide work into blocks of equal cost |
| 220 | ThreadPoolInterface &Pool = getThreadPool(); |
| 221 | auto BlockBegin = BC.getBinaryFunctions().begin(); |
| 222 | unsigned CurrentCost = 0; |
| 223 | for (auto It = BC.getBinaryFunctions().begin(); |
| 224 | It != BC.getBinaryFunctions().end(); ++It) { |
| 225 | BinaryFunction &BF = It->second; |
| 226 | CurrentCost += computeCostFor(BF, SkipPredicate, SchedPolicy); |
| 227 | |
| 228 | if (CurrentCost >= BlockCost) { |
| 229 | EnsureAllocatorExists(AllocId); |
| 230 | Pool.async(F&: runBlock, ArgList&: BlockBegin, ArgList: std::next(x: It), ArgList&: AllocId); |
| 231 | AllocId++; |
| 232 | BlockBegin = std::next(x: It); |
| 233 | CurrentCost = 0; |
| 234 | } |
| 235 | } |
| 236 | |
| 237 | EnsureAllocatorExists(AllocId); |
| 238 | |
| 239 | Pool.async(F&: runBlock, ArgList&: BlockBegin, ArgList: BC.getBinaryFunctions().end(), ArgList&: AllocId); |
| 240 | Lock.unlock(); |
| 241 | Pool.wait(); |
| 242 | } |
| 243 | |
| 244 | } // namespace ParallelUtilities |
| 245 | } // namespace bolt |
| 246 | } // namespace llvm |
| 247 | |