1 | //===- MachineScheduler.h - MachineInstr Scheduling Pass --------*- C++ -*-===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | // |
9 | // This file provides an interface for customizing the standard MachineScheduler |
10 | // pass. Note that the entire pass may be replaced as follows: |
11 | // |
12 | // <Target>TargetMachine::createPassConfig(PassManagerBase &PM) { |
13 | // PM.substitutePass(&MachineSchedulerID, &CustomSchedulerPassID); |
14 | // ...} |
15 | // |
16 | // The MachineScheduler pass is only responsible for choosing the regions to be |
17 | // scheduled. Targets can override the DAG builder and scheduler without |
18 | // replacing the pass as follows: |
19 | // |
20 | // ScheduleDAGInstrs *<Target>PassConfig:: |
21 | // createMachineScheduler(MachineSchedContext *C) { |
22 | // return new CustomMachineScheduler(C); |
23 | // } |
24 | // |
25 | // The default scheduler, ScheduleDAGMILive, builds the DAG and drives list |
26 | // scheduling while updating the instruction stream, register pressure, and live |
27 | // intervals. Most targets don't need to override the DAG builder and list |
28 | // scheduler, but subtargets that require custom scheduling heuristics may |
29 | // plugin an alternate MachineSchedStrategy. The strategy is responsible for |
30 | // selecting the highest priority node from the list: |
31 | // |
32 | // ScheduleDAGInstrs *<Target>PassConfig:: |
33 | // createMachineScheduler(MachineSchedContext *C) { |
34 | // return new ScheduleDAGMILive(C, CustomStrategy(C)); |
35 | // } |
36 | // |
37 | // The DAG builder can also be customized in a sense by adding DAG mutations |
38 | // that will run after DAG building and before list scheduling. DAG mutations |
39 | // can adjust dependencies based on target-specific knowledge or add weak edges |
40 | // to aid heuristics: |
41 | // |
42 | // ScheduleDAGInstrs *<Target>PassConfig:: |
43 | // createMachineScheduler(MachineSchedContext *C) { |
44 | // ScheduleDAGMI *DAG = createGenericSchedLive(C); |
45 | // DAG->addMutation(new CustomDAGMutation(...)); |
46 | // return DAG; |
47 | // } |
48 | // |
49 | // A target that supports alternative schedulers can use the |
50 | // MachineSchedRegistry to allow command line selection. This can be done by |
51 | // implementing the following boilerplate: |
52 | // |
53 | // static ScheduleDAGInstrs *createCustomMachineSched(MachineSchedContext *C) { |
54 | // return new CustomMachineScheduler(C); |
55 | // } |
56 | // static MachineSchedRegistry |
57 | // SchedCustomRegistry("custom", "Run my target's custom scheduler", |
58 | // createCustomMachineSched); |
59 | // |
60 | // |
61 | // Finally, subtargets that don't need to implement custom heuristics but would |
62 | // like to configure the GenericScheduler's policy for a given scheduler region, |
63 | // including scheduling direction and register pressure tracking policy, can do |
64 | // this: |
65 | // |
66 | // void <SubTarget>Subtarget:: |
67 | // overrideSchedPolicy(MachineSchedPolicy &Policy, |
68 | // unsigned NumRegionInstrs) const { |
69 | // Policy.<Flag> = true; |
70 | // } |
71 | // |
72 | //===----------------------------------------------------------------------===// |
73 | |
74 | #ifndef LLVM_CODEGEN_MACHINESCHEDULER_H |
75 | #define LLVM_CODEGEN_MACHINESCHEDULER_H |
76 | |
77 | #include "llvm/ADT/APInt.h" |
78 | #include "llvm/ADT/ArrayRef.h" |
79 | #include "llvm/ADT/BitVector.h" |
80 | #include "llvm/ADT/STLExtras.h" |
81 | #include "llvm/ADT/SmallVector.h" |
82 | #include "llvm/ADT/StringRef.h" |
83 | #include "llvm/ADT/Twine.h" |
84 | #include "llvm/CodeGen/MachineBasicBlock.h" |
85 | #include "llvm/CodeGen/MachinePassRegistry.h" |
86 | #include "llvm/CodeGen/RegisterPressure.h" |
87 | #include "llvm/CodeGen/ScheduleDAG.h" |
88 | #include "llvm/CodeGen/ScheduleDAGInstrs.h" |
89 | #include "llvm/CodeGen/ScheduleDAGMutation.h" |
90 | #include "llvm/CodeGen/TargetSchedule.h" |
91 | #include "llvm/Support/CommandLine.h" |
92 | #include "llvm/Support/ErrorHandling.h" |
93 | #include <algorithm> |
94 | #include <cassert> |
95 | #include <llvm/Support/raw_ostream.h> |
96 | #include <memory> |
97 | #include <string> |
98 | #include <vector> |
99 | |
100 | namespace llvm { |
101 | |
102 | extern cl::opt<bool> ForceTopDown; |
103 | extern cl::opt<bool> ForceBottomUp; |
104 | extern cl::opt<bool> VerifyScheduling; |
105 | #ifndef NDEBUG |
106 | extern cl::opt<bool> ViewMISchedDAGs; |
107 | extern cl::opt<bool> PrintDAGs; |
108 | #else |
109 | extern const bool ViewMISchedDAGs; |
110 | extern const bool PrintDAGs; |
111 | #endif |
112 | |
113 | class AAResults; |
114 | class LiveIntervals; |
115 | class MachineDominatorTree; |
116 | class MachineFunction; |
117 | class MachineInstr; |
118 | class MachineLoopInfo; |
119 | class RegisterClassInfo; |
120 | class SchedDFSResult; |
121 | class ScheduleHazardRecognizer; |
122 | class TargetInstrInfo; |
123 | class TargetPassConfig; |
124 | class TargetRegisterInfo; |
125 | |
126 | /// MachineSchedContext provides enough context from the MachineScheduler pass |
127 | /// for the target to instantiate a scheduler. |
128 | struct MachineSchedContext { |
129 | MachineFunction *MF = nullptr; |
130 | const MachineLoopInfo *MLI = nullptr; |
131 | const MachineDominatorTree *MDT = nullptr; |
132 | const TargetPassConfig *PassConfig = nullptr; |
133 | AAResults *AA = nullptr; |
134 | LiveIntervals *LIS = nullptr; |
135 | |
136 | RegisterClassInfo *RegClassInfo; |
137 | |
138 | MachineSchedContext(); |
139 | MachineSchedContext &operator=(const MachineSchedContext &other) = delete; |
140 | MachineSchedContext(const MachineSchedContext &other) = delete; |
141 | virtual ~MachineSchedContext(); |
142 | }; |
143 | |
144 | /// MachineSchedRegistry provides a selection of available machine instruction |
145 | /// schedulers. |
146 | class MachineSchedRegistry |
147 | : public MachinePassRegistryNode< |
148 | ScheduleDAGInstrs *(*)(MachineSchedContext *)> { |
149 | public: |
150 | using ScheduleDAGCtor = ScheduleDAGInstrs *(*)(MachineSchedContext *); |
151 | |
152 | // RegisterPassParser requires a (misnamed) FunctionPassCtor type. |
153 | using FunctionPassCtor = ScheduleDAGCtor; |
154 | |
155 | static MachinePassRegistry<ScheduleDAGCtor> Registry; |
156 | |
157 | MachineSchedRegistry(const char *N, const char *D, ScheduleDAGCtor C) |
158 | : MachinePassRegistryNode(N, D, C) { |
159 | Registry.Add(Node: this); |
160 | } |
161 | |
162 | ~MachineSchedRegistry() { Registry.Remove(Node: this); } |
163 | |
164 | // Accessors. |
165 | // |
166 | MachineSchedRegistry *getNext() const { |
167 | return (MachineSchedRegistry *)MachinePassRegistryNode::getNext(); |
168 | } |
169 | |
170 | static MachineSchedRegistry *getList() { |
171 | return (MachineSchedRegistry *)Registry.getList(); |
172 | } |
173 | |
174 | static void setListener(MachinePassRegistryListener<FunctionPassCtor> *L) { |
175 | Registry.setListener(L); |
176 | } |
177 | }; |
178 | |
179 | class ScheduleDAGMI; |
180 | |
181 | /// Define a generic scheduling policy for targets that don't provide their own |
182 | /// MachineSchedStrategy. This can be overriden for each scheduling region |
183 | /// before building the DAG. |
184 | struct MachineSchedPolicy { |
185 | // Allow the scheduler to disable register pressure tracking. |
186 | bool ShouldTrackPressure = false; |
187 | /// Track LaneMasks to allow reordering of independent subregister writes |
188 | /// of the same vreg. \sa MachineSchedStrategy::shouldTrackLaneMasks() |
189 | bool ShouldTrackLaneMasks = false; |
190 | |
191 | // Allow the scheduler to force top-down or bottom-up scheduling. If neither |
192 | // is true, the scheduler runs in both directions and converges. |
193 | bool OnlyTopDown = false; |
194 | bool OnlyBottomUp = false; |
195 | |
196 | // Disable heuristic that tries to fetch nodes from long dependency chains |
197 | // first. |
198 | bool DisableLatencyHeuristic = false; |
199 | |
200 | // Compute DFSResult for use in scheduling heuristics. |
201 | bool ComputeDFSResult = false; |
202 | |
203 | MachineSchedPolicy() = default; |
204 | }; |
205 | |
206 | /// MachineSchedStrategy - Interface to the scheduling algorithm used by |
207 | /// ScheduleDAGMI. |
208 | /// |
209 | /// Initialization sequence: |
210 | /// initPolicy -> shouldTrackPressure -> initialize(DAG) -> registerRoots |
211 | class MachineSchedStrategy { |
212 | virtual void anchor(); |
213 | |
214 | public: |
215 | virtual ~MachineSchedStrategy() = default; |
216 | |
217 | /// Optionally override the per-region scheduling policy. |
218 | virtual void initPolicy(MachineBasicBlock::iterator Begin, |
219 | MachineBasicBlock::iterator End, |
220 | unsigned NumRegionInstrs) {} |
221 | |
222 | virtual void dumpPolicy() const {} |
223 | |
224 | /// Check if pressure tracking is needed before building the DAG and |
225 | /// initializing this strategy. Called after initPolicy. |
226 | virtual bool shouldTrackPressure() const { return true; } |
227 | |
228 | /// Returns true if lanemasks should be tracked. LaneMask tracking is |
229 | /// necessary to reorder independent subregister defs for the same vreg. |
230 | /// This has to be enabled in combination with shouldTrackPressure(). |
231 | virtual bool shouldTrackLaneMasks() const { return false; } |
232 | |
233 | // If this method returns true, handling of the scheduling regions |
234 | // themselves (in case of a scheduling boundary in MBB) will be done |
235 | // beginning with the topmost region of MBB. |
236 | virtual bool doMBBSchedRegionsTopDown() const { return false; } |
237 | |
238 | /// Initialize the strategy after building the DAG for a new region. |
239 | virtual void initialize(ScheduleDAGMI *DAG) = 0; |
240 | |
241 | /// Tell the strategy that MBB is about to be processed. |
242 | virtual void enterMBB(MachineBasicBlock *MBB) {}; |
243 | |
244 | /// Tell the strategy that current MBB is done. |
245 | virtual void leaveMBB() {}; |
246 | |
247 | /// Notify this strategy that all roots have been released (including those |
248 | /// that depend on EntrySU or ExitSU). |
249 | virtual void registerRoots() {} |
250 | |
251 | /// Pick the next node to schedule, or return NULL. Set IsTopNode to true to |
252 | /// schedule the node at the top of the unscheduled region. Otherwise it will |
253 | /// be scheduled at the bottom. |
254 | virtual SUnit *pickNode(bool &IsTopNode) = 0; |
255 | |
256 | /// Scheduler callback to notify that a new subtree is scheduled. |
257 | virtual void scheduleTree(unsigned SubtreeID) {} |
258 | |
259 | /// Notify MachineSchedStrategy that ScheduleDAGMI has scheduled an |
260 | /// instruction and updated scheduled/remaining flags in the DAG nodes. |
261 | virtual void schedNode(SUnit *SU, bool IsTopNode) = 0; |
262 | |
263 | /// When all predecessor dependencies have been resolved, free this node for |
264 | /// top-down scheduling. |
265 | virtual void releaseTopNode(SUnit *SU) = 0; |
266 | |
267 | /// When all successor dependencies have been resolved, free this node for |
268 | /// bottom-up scheduling. |
269 | virtual void releaseBottomNode(SUnit *SU) = 0; |
270 | }; |
271 | |
272 | /// ScheduleDAGMI is an implementation of ScheduleDAGInstrs that simply |
273 | /// schedules machine instructions according to the given MachineSchedStrategy |
274 | /// without much extra book-keeping. This is the common functionality between |
275 | /// PreRA and PostRA MachineScheduler. |
276 | class ScheduleDAGMI : public ScheduleDAGInstrs { |
277 | protected: |
278 | AAResults *AA; |
279 | LiveIntervals *LIS; |
280 | std::unique_ptr<MachineSchedStrategy> SchedImpl; |
281 | |
282 | /// Ordered list of DAG postprocessing steps. |
283 | std::vector<std::unique_ptr<ScheduleDAGMutation>> Mutations; |
284 | |
285 | /// The top of the unscheduled zone. |
286 | MachineBasicBlock::iterator CurrentTop; |
287 | |
288 | /// The bottom of the unscheduled zone. |
289 | MachineBasicBlock::iterator CurrentBottom; |
290 | |
291 | /// Record the next node in a scheduled cluster. |
292 | const SUnit *NextClusterPred = nullptr; |
293 | const SUnit *NextClusterSucc = nullptr; |
294 | |
295 | #if LLVM_ENABLE_ABI_BREAKING_CHECKS |
296 | /// The number of instructions scheduled so far. Used to cut off the |
297 | /// scheduler at the point determined by misched-cutoff. |
298 | unsigned = 0; |
299 | #endif |
300 | |
301 | public: |
302 | ScheduleDAGMI(MachineSchedContext *C, std::unique_ptr<MachineSchedStrategy> S, |
303 | bool RemoveKillFlags) |
304 | : ScheduleDAGInstrs(*C->MF, C->MLI, RemoveKillFlags), AA(C->AA), |
305 | LIS(C->LIS), SchedImpl(std::move(S)) {} |
306 | |
307 | // Provide a vtable anchor |
308 | ~ScheduleDAGMI() override; |
309 | |
310 | /// If this method returns true, handling of the scheduling regions |
311 | /// themselves (in case of a scheduling boundary in MBB) will be done |
312 | /// beginning with the topmost region of MBB. |
313 | bool doMBBSchedRegionsTopDown() const override { |
314 | return SchedImpl->doMBBSchedRegionsTopDown(); |
315 | } |
316 | |
317 | // Returns LiveIntervals instance for use in DAG mutators and such. |
318 | LiveIntervals *getLIS() const { return LIS; } |
319 | |
320 | /// Return true if this DAG supports VReg liveness and RegPressure. |
321 | virtual bool hasVRegLiveness() const { return false; } |
322 | |
323 | /// Add a postprocessing step to the DAG builder. |
324 | /// Mutations are applied in the order that they are added after normal DAG |
325 | /// building and before MachineSchedStrategy initialization. |
326 | /// |
327 | /// ScheduleDAGMI takes ownership of the Mutation object. |
328 | void addMutation(std::unique_ptr<ScheduleDAGMutation> Mutation) { |
329 | if (Mutation) |
330 | Mutations.push_back(x: std::move(Mutation)); |
331 | } |
332 | |
333 | MachineBasicBlock::iterator top() const { return CurrentTop; } |
334 | MachineBasicBlock::iterator bottom() const { return CurrentBottom; } |
335 | |
336 | /// Implement the ScheduleDAGInstrs interface for handling the next scheduling |
337 | /// region. This covers all instructions in a block, while schedule() may only |
338 | /// cover a subset. |
339 | void enterRegion(MachineBasicBlock *bb, |
340 | MachineBasicBlock::iterator begin, |
341 | MachineBasicBlock::iterator end, |
342 | unsigned regioninstrs) override; |
343 | |
344 | /// Implement ScheduleDAGInstrs interface for scheduling a sequence of |
345 | /// reorderable instructions. |
346 | void schedule() override; |
347 | |
348 | void startBlock(MachineBasicBlock *bb) override; |
349 | void finishBlock() override; |
350 | |
351 | /// Change the position of an instruction within the basic block and update |
352 | /// live ranges and region boundary iterators. |
353 | void moveInstruction(MachineInstr *MI, MachineBasicBlock::iterator InsertPos); |
354 | |
355 | const SUnit *getNextClusterPred() const { return NextClusterPred; } |
356 | |
357 | const SUnit *getNextClusterSucc() const { return NextClusterSucc; } |
358 | |
359 | void viewGraph(const Twine &Name, const Twine &Title) override; |
360 | void viewGraph() override; |
361 | |
362 | protected: |
363 | // Top-Level entry points for the schedule() driver... |
364 | |
365 | /// Apply each ScheduleDAGMutation step in order. This allows different |
366 | /// instances of ScheduleDAGMI to perform custom DAG postprocessing. |
367 | void postProcessDAG(); |
368 | |
369 | /// Release ExitSU predecessors and setup scheduler queues. |
370 | void initQueues(ArrayRef<SUnit*> TopRoots, ArrayRef<SUnit*> BotRoots); |
371 | |
372 | /// Update scheduler DAG and queues after scheduling an instruction. |
373 | void updateQueues(SUnit *SU, bool IsTopNode); |
374 | |
375 | /// Reinsert debug_values recorded in ScheduleDAGInstrs::DbgValues. |
376 | void placeDebugValues(); |
377 | |
378 | /// dump the scheduled Sequence. |
379 | void dumpSchedule() const; |
380 | /// Print execution trace of the schedule top-down or bottom-up. |
381 | void dumpScheduleTraceTopDown() const; |
382 | void dumpScheduleTraceBottomUp() const; |
383 | |
384 | // Lesser helpers... |
385 | bool checkSchedLimit(); |
386 | |
387 | void findRootsAndBiasEdges(SmallVectorImpl<SUnit*> &TopRoots, |
388 | SmallVectorImpl<SUnit*> &BotRoots); |
389 | |
390 | void releaseSucc(SUnit *SU, SDep *SuccEdge); |
391 | void releaseSuccessors(SUnit *SU); |
392 | void releasePred(SUnit *SU, SDep *PredEdge); |
393 | void releasePredecessors(SUnit *SU); |
394 | }; |
395 | |
396 | /// ScheduleDAGMILive is an implementation of ScheduleDAGInstrs that schedules |
397 | /// machine instructions while updating LiveIntervals and tracking regpressure. |
398 | class ScheduleDAGMILive : public ScheduleDAGMI { |
399 | protected: |
400 | RegisterClassInfo *RegClassInfo; |
401 | |
402 | /// Information about DAG subtrees. If DFSResult is NULL, then SchedulerTrees |
403 | /// will be empty. |
404 | SchedDFSResult *DFSResult = nullptr; |
405 | BitVector ScheduledTrees; |
406 | |
407 | MachineBasicBlock::iterator LiveRegionEnd; |
408 | |
409 | /// Maps vregs to the SUnits of their uses in the current scheduling region. |
410 | VReg2SUnitMultiMap VRegUses; |
411 | |
412 | // Map each SU to its summary of pressure changes. This array is updated for |
413 | // liveness during bottom-up scheduling. Top-down scheduling may proceed but |
414 | // has no affect on the pressure diffs. |
415 | PressureDiffs SUPressureDiffs; |
416 | |
417 | /// Register pressure in this region computed by initRegPressure. |
418 | bool ShouldTrackPressure = false; |
419 | bool ShouldTrackLaneMasks = false; |
420 | IntervalPressure RegPressure; |
421 | RegPressureTracker RPTracker; |
422 | |
423 | /// List of pressure sets that exceed the target's pressure limit before |
424 | /// scheduling, listed in increasing set ID order. Each pressure set is paired |
425 | /// with its max pressure in the currently scheduled regions. |
426 | std::vector<PressureChange> RegionCriticalPSets; |
427 | |
428 | /// The top of the unscheduled zone. |
429 | IntervalPressure TopPressure; |
430 | RegPressureTracker TopRPTracker; |
431 | |
432 | /// The bottom of the unscheduled zone. |
433 | IntervalPressure BotPressure; |
434 | RegPressureTracker BotRPTracker; |
435 | |
436 | public: |
437 | ScheduleDAGMILive(MachineSchedContext *C, |
438 | std::unique_ptr<MachineSchedStrategy> S) |
439 | : ScheduleDAGMI(C, std::move(S), /*RemoveKillFlags=*/false), |
440 | RegClassInfo(C->RegClassInfo), RPTracker(RegPressure), |
441 | TopRPTracker(TopPressure), BotRPTracker(BotPressure) {} |
442 | |
443 | ~ScheduleDAGMILive() override; |
444 | |
445 | /// Return true if this DAG supports VReg liveness and RegPressure. |
446 | bool hasVRegLiveness() const override { return true; } |
447 | |
448 | /// Return true if register pressure tracking is enabled. |
449 | bool isTrackingPressure() const { return ShouldTrackPressure; } |
450 | |
451 | /// Get current register pressure for the top scheduled instructions. |
452 | const IntervalPressure &getTopPressure() const { return TopPressure; } |
453 | const RegPressureTracker &getTopRPTracker() const { return TopRPTracker; } |
454 | |
455 | /// Get current register pressure for the bottom scheduled instructions. |
456 | const IntervalPressure &getBotPressure() const { return BotPressure; } |
457 | const RegPressureTracker &getBotRPTracker() const { return BotRPTracker; } |
458 | |
459 | /// Get register pressure for the entire scheduling region before scheduling. |
460 | const IntervalPressure &getRegPressure() const { return RegPressure; } |
461 | |
462 | const std::vector<PressureChange> &getRegionCriticalPSets() const { |
463 | return RegionCriticalPSets; |
464 | } |
465 | |
466 | PressureDiff &getPressureDiff(const SUnit *SU) { |
467 | return SUPressureDiffs[SU->NodeNum]; |
468 | } |
469 | const PressureDiff &getPressureDiff(const SUnit *SU) const { |
470 | return SUPressureDiffs[SU->NodeNum]; |
471 | } |
472 | |
473 | /// Compute a DFSResult after DAG building is complete, and before any |
474 | /// queue comparisons. |
475 | void computeDFSResult(); |
476 | |
477 | /// Return a non-null DFS result if the scheduling strategy initialized it. |
478 | const SchedDFSResult *getDFSResult() const { return DFSResult; } |
479 | |
480 | BitVector &getScheduledTrees() { return ScheduledTrees; } |
481 | |
482 | /// Implement the ScheduleDAGInstrs interface for handling the next scheduling |
483 | /// region. This covers all instructions in a block, while schedule() may only |
484 | /// cover a subset. |
485 | void enterRegion(MachineBasicBlock *bb, |
486 | MachineBasicBlock::iterator begin, |
487 | MachineBasicBlock::iterator end, |
488 | unsigned regioninstrs) override; |
489 | |
490 | /// Implement ScheduleDAGInstrs interface for scheduling a sequence of |
491 | /// reorderable instructions. |
492 | void schedule() override; |
493 | |
494 | /// Compute the cyclic critical path through the DAG. |
495 | unsigned computeCyclicCriticalPath(); |
496 | |
497 | void dump() const override; |
498 | |
499 | protected: |
500 | // Top-Level entry points for the schedule() driver... |
501 | |
502 | /// Call ScheduleDAGInstrs::buildSchedGraph with register pressure tracking |
503 | /// enabled. This sets up three trackers. RPTracker will cover the entire DAG |
504 | /// region, TopTracker and BottomTracker will be initialized to the top and |
505 | /// bottom of the DAG region without covereing any unscheduled instruction. |
506 | void buildDAGWithRegPressure(); |
507 | |
508 | /// Release ExitSU predecessors and setup scheduler queues. Re-position |
509 | /// the Top RP tracker in case the region beginning has changed. |
510 | void initQueues(ArrayRef<SUnit*> TopRoots, ArrayRef<SUnit*> BotRoots); |
511 | |
512 | /// Move an instruction and update register pressure. |
513 | void scheduleMI(SUnit *SU, bool IsTopNode); |
514 | |
515 | // Lesser helpers... |
516 | |
517 | void initRegPressure(); |
518 | |
519 | void updatePressureDiffs(ArrayRef<RegisterMaskPair> LiveUses); |
520 | |
521 | void updateScheduledPressure(const SUnit *SU, |
522 | const std::vector<unsigned> &NewMaxPressure); |
523 | |
524 | void collectVRegUses(SUnit &SU); |
525 | }; |
526 | |
527 | //===----------------------------------------------------------------------===// |
528 | /// |
529 | /// Helpers for implementing custom MachineSchedStrategy classes. These take |
530 | /// care of the book-keeping associated with list scheduling heuristics. |
531 | /// |
532 | //===----------------------------------------------------------------------===// |
533 | |
534 | /// ReadyQueue encapsulates vector of "ready" SUnits with basic convenience |
535 | /// methods for pushing and removing nodes. ReadyQueue's are uniquely identified |
536 | /// by an ID. SUnit::NodeQueueId is a mask of the ReadyQueues the SUnit is in. |
537 | /// |
538 | /// This is a convenience class that may be used by implementations of |
539 | /// MachineSchedStrategy. |
540 | class ReadyQueue { |
541 | unsigned ID; |
542 | std::string Name; |
543 | std::vector<SUnit*> Queue; |
544 | |
545 | public: |
546 | ReadyQueue(unsigned id, const Twine &name): ID(id), Name(name.str()) {} |
547 | |
548 | unsigned getID() const { return ID; } |
549 | |
550 | StringRef getName() const { return Name; } |
551 | |
552 | // SU is in this queue if it's NodeQueueID is a superset of this ID. |
553 | bool isInQueue(SUnit *SU) const { return (SU->NodeQueueId & ID); } |
554 | |
555 | bool empty() const { return Queue.empty(); } |
556 | |
557 | void clear() { Queue.clear(); } |
558 | |
559 | unsigned size() const { return Queue.size(); } |
560 | |
561 | using iterator = std::vector<SUnit*>::iterator; |
562 | |
563 | iterator begin() { return Queue.begin(); } |
564 | |
565 | iterator end() { return Queue.end(); } |
566 | |
567 | ArrayRef<SUnit*> elements() { return Queue; } |
568 | |
569 | iterator find(SUnit *SU) { return llvm::find(Range&: Queue, Val: SU); } |
570 | |
571 | void push(SUnit *SU) { |
572 | Queue.push_back(x: SU); |
573 | SU->NodeQueueId |= ID; |
574 | } |
575 | |
576 | iterator remove(iterator I) { |
577 | (*I)->NodeQueueId &= ~ID; |
578 | *I = Queue.back(); |
579 | unsigned idx = I - Queue.begin(); |
580 | Queue.pop_back(); |
581 | return Queue.begin() + idx; |
582 | } |
583 | |
584 | void dump() const; |
585 | }; |
586 | |
587 | /// Summarize the unscheduled region. |
588 | struct SchedRemainder { |
589 | // Critical path through the DAG in expected latency. |
590 | unsigned CriticalPath; |
591 | unsigned CyclicCritPath; |
592 | |
593 | // Scaled count of micro-ops left to schedule. |
594 | unsigned RemIssueCount; |
595 | |
596 | bool IsAcyclicLatencyLimited; |
597 | |
598 | // Unscheduled resources |
599 | SmallVector<unsigned, 16> RemainingCounts; |
600 | |
601 | SchedRemainder() { reset(); } |
602 | |
603 | void reset() { |
604 | CriticalPath = 0; |
605 | CyclicCritPath = 0; |
606 | RemIssueCount = 0; |
607 | IsAcyclicLatencyLimited = false; |
608 | RemainingCounts.clear(); |
609 | } |
610 | |
611 | void init(ScheduleDAGMI *DAG, const TargetSchedModel *SchedModel); |
612 | }; |
613 | |
614 | /// ResourceSegments are a collection of intervals closed on the |
615 | /// left and opened on the right: |
616 | /// |
617 | /// list{ [a1, b1), [a2, b2), ..., [a_N, b_N) } |
618 | /// |
619 | /// The collection has the following properties: |
620 | /// |
621 | /// 1. The list is ordered: a_i < b_i and b_i < a_(i+1) |
622 | /// |
623 | /// 2. The intervals in the collection do not intersect each other. |
624 | /// |
625 | /// A \ref ResourceSegments instance represents the cycle |
626 | /// reservation history of the instance of and individual resource. |
627 | class ResourceSegments { |
628 | public: |
629 | /// Represents an interval of discrete integer values closed on |
630 | /// the left and open on the right: [a, b). |
631 | typedef std::pair<int64_t, int64_t> IntervalTy; |
632 | |
633 | /// Adds an interval [a, b) to the collection of the instance. |
634 | /// |
635 | /// When adding [a, b[ to the collection, the operation merges the |
636 | /// adjacent intervals. For example |
637 | /// |
638 | /// 0 1 2 3 4 5 6 7 8 9 10 |
639 | /// [-----) [--) [--) |
640 | /// + [--) |
641 | /// = [-----------) [--) |
642 | /// |
643 | /// To be able to debug duplicate resource usage, the function has |
644 | /// assertion that checks that no interval should be added if it |
645 | /// overlaps any of the intervals in the collection. We can |
646 | /// require this because by definition a \ref ResourceSegments is |
647 | /// attached only to an individual resource instance. |
648 | void add(IntervalTy A, const unsigned CutOff = 10); |
649 | |
650 | public: |
651 | /// Checks whether intervals intersect. |
652 | static bool intersects(IntervalTy A, IntervalTy B); |
653 | |
654 | /// These function return the interval used by a resource in bottom and top |
655 | /// scheduling. |
656 | /// |
657 | /// Consider an instruction that uses resources X0, X1 and X2 as follows: |
658 | /// |
659 | /// X0 X1 X1 X2 +--------+-------------+--------------+ |
660 | /// |Resource|AcquireAtCycle|ReleaseAtCycle| |
661 | /// +--------+-------------+--------------+ |
662 | /// | X0 | 0 | 1 | |
663 | /// +--------+-------------+--------------+ |
664 | /// | X1 | 1 | 3 | |
665 | /// +--------+-------------+--------------+ |
666 | /// | X2 | 3 | 4 | |
667 | /// +--------+-------------+--------------+ |
668 | /// |
669 | /// If we can schedule the instruction at cycle C, we need to |
670 | /// compute the interval of the resource as follows: |
671 | /// |
672 | /// # TOP DOWN SCHEDULING |
673 | /// |
674 | /// Cycles scheduling flows to the _right_, in the same direction |
675 | /// of time. |
676 | /// |
677 | /// C 1 2 3 4 5 ... |
678 | /// ------|------|------|------|------|------|-----> |
679 | /// X0 X1 X1 X2 ---> direction of time |
680 | /// X0 [C, C+1) |
681 | /// X1 [C+1, C+3) |
682 | /// X2 [C+3, C+4) |
683 | /// |
684 | /// Therefore, the formula to compute the interval for a resource |
685 | /// of an instruction that can be scheduled at cycle C in top-down |
686 | /// scheduling is: |
687 | /// |
688 | /// [C+AcquireAtCycle, C+ReleaseAtCycle) |
689 | /// |
690 | /// |
691 | /// # BOTTOM UP SCHEDULING |
692 | /// |
693 | /// Cycles scheduling flows to the _left_, in opposite direction |
694 | /// of time. |
695 | /// |
696 | /// In bottom up scheduling, the scheduling happens in opposite |
697 | /// direction to the execution of the cycles of the |
698 | /// instruction. When the instruction is scheduled at cycle `C`, |
699 | /// the resources are allocated in the past relative to `C`: |
700 | /// |
701 | /// 2 1 C -1 -2 -3 -4 -5 ... |
702 | /// <-----|------|------|------|------|------|------|------|--- |
703 | /// X0 X1 X1 X2 ---> direction of time |
704 | /// X0 (C+1, C] |
705 | /// X1 (C, C-2] |
706 | /// X2 (C-2, C-3] |
707 | /// |
708 | /// Therefore, the formula to compute the interval for a resource |
709 | /// of an instruction that can be scheduled at cycle C in bottom-up |
710 | /// scheduling is: |
711 | /// |
712 | /// [C-ReleaseAtCycle+1, C-AcquireAtCycle+1) |
713 | /// |
714 | /// |
715 | /// NOTE: In both cases, the number of cycles booked by a |
716 | /// resources is the value (ReleaseAtCycle - AcquireAtCycle). |
717 | static IntervalTy getResourceIntervalBottom(unsigned C, unsigned AcquireAtCycle, |
718 | unsigned ReleaseAtCycle) { |
719 | return std::make_pair<long, long>(x: (long)C - (long)ReleaseAtCycle + 1L, |
720 | y: (long)C - (long)AcquireAtCycle + 1L); |
721 | } |
722 | static IntervalTy getResourceIntervalTop(unsigned C, unsigned AcquireAtCycle, |
723 | unsigned ReleaseAtCycle) { |
724 | return std::make_pair<long, long>(x: (long)C + (long)AcquireAtCycle, |
725 | y: (long)C + (long)ReleaseAtCycle); |
726 | } |
727 | |
728 | private: |
729 | /// Finds the first cycle in which a resource can be allocated. |
730 | /// |
731 | /// The function uses the \param IntervalBuider [*] to build a |
732 | /// resource interval [a, b[ out of the input parameters \param |
733 | /// CurrCycle, \param AcquireAtCycle and \param ReleaseAtCycle. |
734 | /// |
735 | /// The function then loops through the intervals in the ResourceSegments |
736 | /// and shifts the interval [a, b[ and the ReturnCycle to the |
737 | /// right until there is no intersection between the intervals of |
738 | /// the \ref ResourceSegments instance and the new shifted [a, b[. When |
739 | /// this condition is met, the ReturnCycle (which |
740 | /// correspond to the cycle in which the resource can be |
741 | /// allocated) is returned. |
742 | /// |
743 | /// c = CurrCycle in input |
744 | /// c 1 2 3 4 5 6 7 8 9 10 ... ---> (time |
745 | /// flow) |
746 | /// ResourceSegments... [---) [-------) [-----------) |
747 | /// c [1 3[ -> AcquireAtCycle=1, ReleaseAtCycle=3 |
748 | /// ++c [1 3) |
749 | /// ++c [1 3) |
750 | /// ++c [1 3) |
751 | /// ++c [1 3) |
752 | /// ++c [1 3) ---> returns c |
753 | /// incremented by 5 (c+5) |
754 | /// |
755 | /// |
756 | /// Notice that for bottom-up scheduling the diagram is slightly |
757 | /// different because the current cycle c is always on the right |
758 | /// of the interval [a, b) (see \ref |
759 | /// `getResourceIntervalBottom`). This is because the cycle |
760 | /// increments for bottom-up scheduling moved in the direction |
761 | /// opposite to the direction of time: |
762 | /// |
763 | /// --------> direction of time. |
764 | /// XXYZZZ (resource usage) |
765 | /// --------> direction of top-down execution cycles. |
766 | /// <-------- direction of bottom-up execution cycles. |
767 | /// |
768 | /// Even though bottom-up scheduling moves against the flow of |
769 | /// time, the algorithm used to find the first free slot in between |
770 | /// intervals is the same as for top-down scheduling. |
771 | /// |
772 | /// [*] See \ref `getResourceIntervalTop` and |
773 | /// \ref `getResourceIntervalBottom` to see how such resource intervals |
774 | /// are built. |
775 | unsigned getFirstAvailableAt( |
776 | unsigned CurrCycle, unsigned AcquireAtCycle, unsigned ReleaseAtCycle, |
777 | std::function<IntervalTy(unsigned, unsigned, unsigned)> IntervalBuilder) |
778 | const; |
779 | |
780 | public: |
781 | /// getFirstAvailableAtFromBottom and getFirstAvailableAtFromTop |
782 | /// should be merged in a single function in which a function that |
783 | /// creates the `NewInterval` is passed as a parameter. |
784 | unsigned getFirstAvailableAtFromBottom(unsigned CurrCycle, |
785 | unsigned AcquireAtCycle, |
786 | unsigned ReleaseAtCycle) const { |
787 | return getFirstAvailableAt(CurrCycle, AcquireAtCycle, ReleaseAtCycle, |
788 | IntervalBuilder: getResourceIntervalBottom); |
789 | } |
790 | unsigned getFirstAvailableAtFromTop(unsigned CurrCycle, |
791 | unsigned AcquireAtCycle, |
792 | unsigned ReleaseAtCycle) const { |
793 | return getFirstAvailableAt(CurrCycle, AcquireAtCycle, ReleaseAtCycle, |
794 | IntervalBuilder: getResourceIntervalTop); |
795 | } |
796 | |
797 | private: |
798 | std::list<IntervalTy> _Intervals; |
799 | /// Merge all adjacent intervals in the collection. For all pairs |
800 | /// of adjacient intervals, it performs [a, b) + [b, c) -> [a, c). |
801 | /// |
802 | /// Before performing the merge operation, the intervals are |
803 | /// sorted with \ref sort_predicate. |
804 | void sortAndMerge(); |
805 | |
806 | public: |
807 | // constructor for empty set |
808 | explicit ResourceSegments(){}; |
809 | bool empty() const { return _Intervals.empty(); } |
810 | explicit ResourceSegments(std::list<IntervalTy> Intervals) |
811 | : _Intervals(Intervals) { |
812 | sortAndMerge(); |
813 | } |
814 | |
815 | friend bool operator==(const ResourceSegments &c1, |
816 | const ResourceSegments &c2) { |
817 | return c1._Intervals == c2._Intervals; |
818 | } |
819 | friend llvm::raw_ostream &operator<<(llvm::raw_ostream &os, |
820 | const ResourceSegments &Segments) { |
821 | os << "{ " ; |
822 | for (auto p : Segments._Intervals) |
823 | os << "[" << p.first << ", " << p.second << "), " ; |
824 | os << "}\n" ; |
825 | return os; |
826 | } |
827 | }; |
828 | |
829 | /// Each Scheduling boundary is associated with ready queues. It tracks the |
830 | /// current cycle in the direction of movement, and maintains the state |
831 | /// of "hazards" and other interlocks at the current cycle. |
832 | class SchedBoundary { |
833 | public: |
834 | /// SUnit::NodeQueueId: 0 (none), 1 (top), 2 (bot), 3 (both) |
835 | enum { |
836 | TopQID = 1, |
837 | BotQID = 2, |
838 | LogMaxQID = 2 |
839 | }; |
840 | |
841 | ScheduleDAGMI *DAG = nullptr; |
842 | const TargetSchedModel *SchedModel = nullptr; |
843 | SchedRemainder *Rem = nullptr; |
844 | |
845 | ReadyQueue Available; |
846 | ReadyQueue Pending; |
847 | |
848 | ScheduleHazardRecognizer *HazardRec = nullptr; |
849 | |
850 | private: |
851 | /// True if the pending Q should be checked/updated before scheduling another |
852 | /// instruction. |
853 | bool CheckPending; |
854 | |
855 | /// Number of cycles it takes to issue the instructions scheduled in this |
856 | /// zone. It is defined as: scheduled-micro-ops / issue-width + stalls. |
857 | /// See getStalls(). |
858 | unsigned CurrCycle; |
859 | |
860 | /// Micro-ops issued in the current cycle |
861 | unsigned CurrMOps; |
862 | |
863 | /// MinReadyCycle - Cycle of the soonest available instruction. |
864 | unsigned MinReadyCycle; |
865 | |
866 | // The expected latency of the critical path in this scheduled zone. |
867 | unsigned ExpectedLatency; |
868 | |
869 | // The latency of dependence chains leading into this zone. |
870 | // For each node scheduled bottom-up: DLat = max DLat, N.Depth. |
871 | // For each cycle scheduled: DLat -= 1. |
872 | unsigned DependentLatency; |
873 | |
874 | /// Count the scheduled (issued) micro-ops that can be retired by |
875 | /// time=CurrCycle assuming the first scheduled instr is retired at time=0. |
876 | unsigned RetiredMOps; |
877 | |
878 | // Count scheduled resources that have been executed. Resources are |
879 | // considered executed if they become ready in the time that it takes to |
880 | // saturate any resource including the one in question. Counts are scaled |
881 | // for direct comparison with other resources. Counts can be compared with |
882 | // MOps * getMicroOpFactor and Latency * getLatencyFactor. |
883 | SmallVector<unsigned, 16> ExecutedResCounts; |
884 | |
885 | /// Cache the max count for a single resource. |
886 | unsigned MaxExecutedResCount; |
887 | |
888 | // Cache the critical resources ID in this scheduled zone. |
889 | unsigned ZoneCritResIdx; |
890 | |
891 | // Is the scheduled region resource limited vs. latency limited. |
892 | bool IsResourceLimited; |
893 | |
894 | public: |
895 | private: |
896 | /// Record how resources have been allocated across the cycles of |
897 | /// the execution. |
898 | std::map<unsigned, ResourceSegments> ReservedResourceSegments; |
899 | std::vector<unsigned> ReservedCycles; |
900 | /// For each PIdx, stores first index into ReservedResourceSegments that |
901 | /// corresponds to it. |
902 | /// |
903 | /// For example, consider the following 3 resources (ResourceCount = |
904 | /// 3): |
905 | /// |
906 | /// +------------+--------+ |
907 | /// |ResourceName|NumUnits| |
908 | /// +------------+--------+ |
909 | /// | X | 2 | |
910 | /// +------------+--------+ |
911 | /// | Y | 3 | |
912 | /// +------------+--------+ |
913 | /// | Z | 1 | |
914 | /// +------------+--------+ |
915 | /// |
916 | /// In this case, the total number of resource instances is 6. The |
917 | /// vector \ref ReservedResourceSegments will have a slot for each instance. |
918 | /// The vector \ref ReservedCyclesIndex will track at what index the first |
919 | /// instance of the resource is found in the vector of \ref |
920 | /// ReservedResourceSegments: |
921 | /// |
922 | /// Indexes of instances in |
923 | /// ReservedResourceSegments |
924 | /// |
925 | /// 0 1 2 3 4 5 |
926 | /// ReservedCyclesIndex[0] = 0; [X0, X1, |
927 | /// ReservedCyclesIndex[1] = 2; Y0, Y1, Y2 |
928 | /// ReservedCyclesIndex[2] = 5; Z |
929 | SmallVector<unsigned, 16> ReservedCyclesIndex; |
930 | |
931 | // For each PIdx, stores the resource group IDs of its subunits |
932 | SmallVector<APInt, 16> ResourceGroupSubUnitMasks; |
933 | |
934 | #if LLVM_ENABLE_ABI_BREAKING_CHECKS |
935 | // Remember the greatest possible stall as an upper bound on the number of |
936 | // times we should retry the pending queue because of a hazard. |
937 | unsigned MaxObservedStall; |
938 | #endif |
939 | |
940 | public: |
941 | /// Pending queues extend the ready queues with the same ID and the |
942 | /// PendingFlag set. |
943 | SchedBoundary(unsigned ID, const Twine &Name): |
944 | Available(ID, Name+".A" ), Pending(ID << LogMaxQID, Name+".P" ) { |
945 | reset(); |
946 | } |
947 | SchedBoundary &operator=(const SchedBoundary &other) = delete; |
948 | SchedBoundary(const SchedBoundary &other) = delete; |
949 | ~SchedBoundary(); |
950 | |
951 | void reset(); |
952 | |
953 | void init(ScheduleDAGMI *dag, const TargetSchedModel *smodel, |
954 | SchedRemainder *rem); |
955 | |
956 | bool isTop() const { |
957 | return Available.getID() == TopQID; |
958 | } |
959 | |
960 | /// Number of cycles to issue the instructions scheduled in this zone. |
961 | unsigned getCurrCycle() const { return CurrCycle; } |
962 | |
963 | /// Micro-ops issued in the current cycle |
964 | unsigned getCurrMOps() const { return CurrMOps; } |
965 | |
966 | // The latency of dependence chains leading into this zone. |
967 | unsigned getDependentLatency() const { return DependentLatency; } |
968 | |
969 | /// Get the number of latency cycles "covered" by the scheduled |
970 | /// instructions. This is the larger of the critical path within the zone |
971 | /// and the number of cycles required to issue the instructions. |
972 | unsigned getScheduledLatency() const { |
973 | return std::max(a: ExpectedLatency, b: CurrCycle); |
974 | } |
975 | |
976 | unsigned getUnscheduledLatency(SUnit *SU) const { |
977 | return isTop() ? SU->getHeight() : SU->getDepth(); |
978 | } |
979 | |
980 | unsigned getResourceCount(unsigned ResIdx) const { |
981 | return ExecutedResCounts[ResIdx]; |
982 | } |
983 | |
984 | /// Get the scaled count of scheduled micro-ops and resources, including |
985 | /// executed resources. |
986 | unsigned getCriticalCount() const { |
987 | if (!ZoneCritResIdx) |
988 | return RetiredMOps * SchedModel->getMicroOpFactor(); |
989 | return getResourceCount(ResIdx: ZoneCritResIdx); |
990 | } |
991 | |
992 | /// Get a scaled count for the minimum execution time of the scheduled |
993 | /// micro-ops that are ready to execute by getExecutedCount. Notice the |
994 | /// feedback loop. |
995 | unsigned getExecutedCount() const { |
996 | return std::max(a: CurrCycle * SchedModel->getLatencyFactor(), |
997 | b: MaxExecutedResCount); |
998 | } |
999 | |
1000 | unsigned getZoneCritResIdx() const { return ZoneCritResIdx; } |
1001 | |
1002 | // Is the scheduled region resource limited vs. latency limited. |
1003 | bool isResourceLimited() const { return IsResourceLimited; } |
1004 | |
1005 | /// Get the difference between the given SUnit's ready time and the current |
1006 | /// cycle. |
1007 | unsigned getLatencyStallCycles(SUnit *SU); |
1008 | |
1009 | unsigned getNextResourceCycleByInstance(unsigned InstanceIndex, |
1010 | unsigned ReleaseAtCycle, |
1011 | unsigned AcquireAtCycle); |
1012 | |
1013 | std::pair<unsigned, unsigned> getNextResourceCycle(const MCSchedClassDesc *SC, |
1014 | unsigned PIdx, |
1015 | unsigned ReleaseAtCycle, |
1016 | unsigned AcquireAtCycle); |
1017 | |
1018 | bool isUnbufferedGroup(unsigned PIdx) const { |
1019 | return SchedModel->getProcResource(PIdx)->SubUnitsIdxBegin && |
1020 | !SchedModel->getProcResource(PIdx)->BufferSize; |
1021 | } |
1022 | |
1023 | bool checkHazard(SUnit *SU); |
1024 | |
1025 | unsigned findMaxLatency(ArrayRef<SUnit*> ReadySUs); |
1026 | |
1027 | unsigned getOtherResourceCount(unsigned &OtherCritIdx); |
1028 | |
1029 | /// Release SU to make it ready. If it's not in hazard, remove it from |
1030 | /// pending queue (if already in) and push into available queue. |
1031 | /// Otherwise, push the SU into pending queue. |
1032 | /// |
1033 | /// @param SU The unit to be released. |
1034 | /// @param ReadyCycle Until which cycle the unit is ready. |
1035 | /// @param InPQueue Whether SU is already in pending queue. |
1036 | /// @param Idx Position offset in pending queue (if in it). |
1037 | void releaseNode(SUnit *SU, unsigned ReadyCycle, bool InPQueue, |
1038 | unsigned Idx = 0); |
1039 | |
1040 | void bumpCycle(unsigned NextCycle); |
1041 | |
1042 | void incExecutedResources(unsigned PIdx, unsigned Count); |
1043 | |
1044 | unsigned countResource(const MCSchedClassDesc *SC, unsigned PIdx, |
1045 | unsigned Cycles, unsigned ReadyCycle, |
1046 | unsigned StartAtCycle); |
1047 | |
1048 | void bumpNode(SUnit *SU); |
1049 | |
1050 | void releasePending(); |
1051 | |
1052 | void removeReady(SUnit *SU); |
1053 | |
1054 | /// Call this before applying any other heuristics to the Available queue. |
1055 | /// Updates the Available/Pending Q's if necessary and returns the single |
1056 | /// available instruction, or NULL if there are multiple candidates. |
1057 | SUnit *pickOnlyChoice(); |
1058 | |
1059 | /// Dump the state of the information that tracks resource usage. |
1060 | void dumpReservedCycles() const; |
1061 | void dumpScheduledState() const; |
1062 | }; |
1063 | |
1064 | /// Base class for GenericScheduler. This class maintains information about |
1065 | /// scheduling candidates based on TargetSchedModel making it easy to implement |
1066 | /// heuristics for either preRA or postRA scheduling. |
1067 | class GenericSchedulerBase : public MachineSchedStrategy { |
1068 | public: |
1069 | /// Represent the type of SchedCandidate found within a single queue. |
1070 | /// pickNodeBidirectional depends on these listed by decreasing priority. |
1071 | enum CandReason : uint8_t { |
1072 | NoCand, Only1, PhysReg, RegExcess, RegCritical, Stall, Cluster, Weak, |
1073 | RegMax, ResourceReduce, ResourceDemand, BotHeightReduce, BotPathReduce, |
1074 | TopDepthReduce, TopPathReduce, NextDefUse, NodeOrder}; |
1075 | |
1076 | #ifndef NDEBUG |
1077 | static const char *getReasonStr(GenericSchedulerBase::CandReason Reason); |
1078 | #endif |
1079 | |
1080 | /// Policy for scheduling the next instruction in the candidate's zone. |
1081 | struct CandPolicy { |
1082 | bool ReduceLatency = false; |
1083 | unsigned ReduceResIdx = 0; |
1084 | unsigned DemandResIdx = 0; |
1085 | |
1086 | CandPolicy() = default; |
1087 | |
1088 | bool operator==(const CandPolicy &RHS) const { |
1089 | return ReduceLatency == RHS.ReduceLatency && |
1090 | ReduceResIdx == RHS.ReduceResIdx && |
1091 | DemandResIdx == RHS.DemandResIdx; |
1092 | } |
1093 | bool operator!=(const CandPolicy &RHS) const { |
1094 | return !(*this == RHS); |
1095 | } |
1096 | }; |
1097 | |
1098 | /// Status of an instruction's critical resource consumption. |
1099 | struct SchedResourceDelta { |
1100 | // Count critical resources in the scheduled region required by SU. |
1101 | unsigned CritResources = 0; |
1102 | |
1103 | // Count critical resources from another region consumed by SU. |
1104 | unsigned DemandedResources = 0; |
1105 | |
1106 | SchedResourceDelta() = default; |
1107 | |
1108 | bool operator==(const SchedResourceDelta &RHS) const { |
1109 | return CritResources == RHS.CritResources |
1110 | && DemandedResources == RHS.DemandedResources; |
1111 | } |
1112 | bool operator!=(const SchedResourceDelta &RHS) const { |
1113 | return !operator==(RHS); |
1114 | } |
1115 | }; |
1116 | |
1117 | /// Store the state used by GenericScheduler heuristics, required for the |
1118 | /// lifetime of one invocation of pickNode(). |
1119 | struct SchedCandidate { |
1120 | CandPolicy Policy; |
1121 | |
1122 | // The best SUnit candidate. |
1123 | SUnit *SU; |
1124 | |
1125 | // The reason for this candidate. |
1126 | CandReason Reason; |
1127 | |
1128 | // Whether this candidate should be scheduled at top/bottom. |
1129 | bool AtTop; |
1130 | |
1131 | // Register pressure values for the best candidate. |
1132 | RegPressureDelta RPDelta; |
1133 | |
1134 | // Critical resource consumption of the best candidate. |
1135 | SchedResourceDelta ResDelta; |
1136 | |
1137 | SchedCandidate() { reset(NewPolicy: CandPolicy()); } |
1138 | SchedCandidate(const CandPolicy &Policy) { reset(NewPolicy: Policy); } |
1139 | |
1140 | void reset(const CandPolicy &NewPolicy) { |
1141 | Policy = NewPolicy; |
1142 | SU = nullptr; |
1143 | Reason = NoCand; |
1144 | AtTop = false; |
1145 | RPDelta = RegPressureDelta(); |
1146 | ResDelta = SchedResourceDelta(); |
1147 | } |
1148 | |
1149 | bool isValid() const { return SU; } |
1150 | |
1151 | // Copy the status of another candidate without changing policy. |
1152 | void setBest(SchedCandidate &Best) { |
1153 | assert(Best.Reason != NoCand && "uninitialized Sched candidate" ); |
1154 | SU = Best.SU; |
1155 | Reason = Best.Reason; |
1156 | AtTop = Best.AtTop; |
1157 | RPDelta = Best.RPDelta; |
1158 | ResDelta = Best.ResDelta; |
1159 | } |
1160 | |
1161 | void initResourceDelta(const ScheduleDAGMI *DAG, |
1162 | const TargetSchedModel *SchedModel); |
1163 | }; |
1164 | |
1165 | protected: |
1166 | const MachineSchedContext *Context; |
1167 | const TargetSchedModel *SchedModel = nullptr; |
1168 | const TargetRegisterInfo *TRI = nullptr; |
1169 | |
1170 | SchedRemainder Rem; |
1171 | |
1172 | GenericSchedulerBase(const MachineSchedContext *C) : Context(C) {} |
1173 | |
1174 | void setPolicy(CandPolicy &Policy, bool IsPostRA, SchedBoundary &CurrZone, |
1175 | SchedBoundary *OtherZone); |
1176 | |
1177 | #ifndef NDEBUG |
1178 | void traceCandidate(const SchedCandidate &Cand); |
1179 | #endif |
1180 | |
1181 | private: |
1182 | bool shouldReduceLatency(const CandPolicy &Policy, SchedBoundary &CurrZone, |
1183 | bool ComputeRemLatency, unsigned &RemLatency) const; |
1184 | }; |
1185 | |
1186 | // Utility functions used by heuristics in tryCandidate(). |
1187 | bool tryLess(int TryVal, int CandVal, |
1188 | GenericSchedulerBase::SchedCandidate &TryCand, |
1189 | GenericSchedulerBase::SchedCandidate &Cand, |
1190 | GenericSchedulerBase::CandReason Reason); |
1191 | bool tryGreater(int TryVal, int CandVal, |
1192 | GenericSchedulerBase::SchedCandidate &TryCand, |
1193 | GenericSchedulerBase::SchedCandidate &Cand, |
1194 | GenericSchedulerBase::CandReason Reason); |
1195 | bool tryLatency(GenericSchedulerBase::SchedCandidate &TryCand, |
1196 | GenericSchedulerBase::SchedCandidate &Cand, |
1197 | SchedBoundary &Zone); |
1198 | bool tryPressure(const PressureChange &TryP, |
1199 | const PressureChange &CandP, |
1200 | GenericSchedulerBase::SchedCandidate &TryCand, |
1201 | GenericSchedulerBase::SchedCandidate &Cand, |
1202 | GenericSchedulerBase::CandReason Reason, |
1203 | const TargetRegisterInfo *TRI, |
1204 | const MachineFunction &MF); |
1205 | unsigned getWeakLeft(const SUnit *SU, bool isTop); |
1206 | int biasPhysReg(const SUnit *SU, bool isTop); |
1207 | |
1208 | /// GenericScheduler shrinks the unscheduled zone using heuristics to balance |
1209 | /// the schedule. |
1210 | class GenericScheduler : public GenericSchedulerBase { |
1211 | public: |
1212 | GenericScheduler(const MachineSchedContext *C): |
1213 | GenericSchedulerBase(C), Top(SchedBoundary::TopQID, "TopQ" ), |
1214 | Bot(SchedBoundary::BotQID, "BotQ" ) {} |
1215 | |
1216 | void initPolicy(MachineBasicBlock::iterator Begin, |
1217 | MachineBasicBlock::iterator End, |
1218 | unsigned NumRegionInstrs) override; |
1219 | |
1220 | void dumpPolicy() const override; |
1221 | |
1222 | bool shouldTrackPressure() const override { |
1223 | return RegionPolicy.ShouldTrackPressure; |
1224 | } |
1225 | |
1226 | bool shouldTrackLaneMasks() const override { |
1227 | return RegionPolicy.ShouldTrackLaneMasks; |
1228 | } |
1229 | |
1230 | void initialize(ScheduleDAGMI *dag) override; |
1231 | |
1232 | SUnit *pickNode(bool &IsTopNode) override; |
1233 | |
1234 | void schedNode(SUnit *SU, bool IsTopNode) override; |
1235 | |
1236 | void releaseTopNode(SUnit *SU) override { |
1237 | if (SU->isScheduled) |
1238 | return; |
1239 | |
1240 | Top.releaseNode(SU, ReadyCycle: SU->TopReadyCycle, InPQueue: false); |
1241 | TopCand.SU = nullptr; |
1242 | } |
1243 | |
1244 | void releaseBottomNode(SUnit *SU) override { |
1245 | if (SU->isScheduled) |
1246 | return; |
1247 | |
1248 | Bot.releaseNode(SU, ReadyCycle: SU->BotReadyCycle, InPQueue: false); |
1249 | BotCand.SU = nullptr; |
1250 | } |
1251 | |
1252 | void registerRoots() override; |
1253 | |
1254 | protected: |
1255 | ScheduleDAGMILive *DAG = nullptr; |
1256 | |
1257 | MachineSchedPolicy RegionPolicy; |
1258 | |
1259 | // State of the top and bottom scheduled instruction boundaries. |
1260 | SchedBoundary Top; |
1261 | SchedBoundary Bot; |
1262 | |
1263 | /// Candidate last picked from Top boundary. |
1264 | SchedCandidate TopCand; |
1265 | /// Candidate last picked from Bot boundary. |
1266 | SchedCandidate BotCand; |
1267 | |
1268 | void checkAcyclicLatency(); |
1269 | |
1270 | void initCandidate(SchedCandidate &Cand, SUnit *SU, bool AtTop, |
1271 | const RegPressureTracker &RPTracker, |
1272 | RegPressureTracker &TempTracker); |
1273 | |
1274 | virtual bool tryCandidate(SchedCandidate &Cand, SchedCandidate &TryCand, |
1275 | SchedBoundary *Zone) const; |
1276 | |
1277 | SUnit *pickNodeBidirectional(bool &IsTopNode); |
1278 | |
1279 | void pickNodeFromQueue(SchedBoundary &Zone, |
1280 | const CandPolicy &ZonePolicy, |
1281 | const RegPressureTracker &RPTracker, |
1282 | SchedCandidate &Candidate); |
1283 | |
1284 | void reschedulePhysReg(SUnit *SU, bool isTop); |
1285 | }; |
1286 | |
1287 | /// PostGenericScheduler - Interface to the scheduling algorithm used by |
1288 | /// ScheduleDAGMI. |
1289 | /// |
1290 | /// Callbacks from ScheduleDAGMI: |
1291 | /// initPolicy -> initialize(DAG) -> registerRoots -> pickNode ... |
1292 | class PostGenericScheduler : public GenericSchedulerBase { |
1293 | protected: |
1294 | ScheduleDAGMI *DAG = nullptr; |
1295 | SchedBoundary Top; |
1296 | SmallVector<SUnit*, 8> BotRoots; |
1297 | |
1298 | public: |
1299 | PostGenericScheduler(const MachineSchedContext *C): |
1300 | GenericSchedulerBase(C), Top(SchedBoundary::TopQID, "TopQ" ) {} |
1301 | |
1302 | ~PostGenericScheduler() override = default; |
1303 | |
1304 | void initPolicy(MachineBasicBlock::iterator Begin, |
1305 | MachineBasicBlock::iterator End, |
1306 | unsigned NumRegionInstrs) override { |
1307 | /* no configurable policy */ |
1308 | } |
1309 | |
1310 | /// PostRA scheduling does not track pressure. |
1311 | bool shouldTrackPressure() const override { return false; } |
1312 | |
1313 | void initialize(ScheduleDAGMI *Dag) override; |
1314 | |
1315 | void registerRoots() override; |
1316 | |
1317 | SUnit *pickNode(bool &IsTopNode) override; |
1318 | |
1319 | void scheduleTree(unsigned SubtreeID) override { |
1320 | llvm_unreachable("PostRA scheduler does not support subtree analysis." ); |
1321 | } |
1322 | |
1323 | void schedNode(SUnit *SU, bool IsTopNode) override; |
1324 | |
1325 | void releaseTopNode(SUnit *SU) override { |
1326 | if (SU->isScheduled) |
1327 | return; |
1328 | Top.releaseNode(SU, ReadyCycle: SU->TopReadyCycle, InPQueue: false); |
1329 | } |
1330 | |
1331 | // Only called for roots. |
1332 | void releaseBottomNode(SUnit *SU) override { |
1333 | BotRoots.push_back(Elt: SU); |
1334 | } |
1335 | |
1336 | protected: |
1337 | virtual bool tryCandidate(SchedCandidate &Cand, SchedCandidate &TryCand); |
1338 | |
1339 | void pickNodeFromQueue(SchedCandidate &Cand); |
1340 | }; |
1341 | |
1342 | /// Create the standard converging machine scheduler. This will be used as the |
1343 | /// default scheduler if the target does not set a default. |
1344 | /// Adds default DAG mutations. |
1345 | ScheduleDAGMILive *createGenericSchedLive(MachineSchedContext *C); |
1346 | |
1347 | /// Create a generic scheduler with no vreg liveness or DAG mutation passes. |
1348 | ScheduleDAGMI *createGenericSchedPostRA(MachineSchedContext *C); |
1349 | |
1350 | /// If ReorderWhileClustering is set to true, no attempt will be made to |
1351 | /// reduce reordering due to store clustering. |
1352 | std::unique_ptr<ScheduleDAGMutation> |
1353 | createLoadClusterDAGMutation(const TargetInstrInfo *TII, |
1354 | const TargetRegisterInfo *TRI, |
1355 | bool ReorderWhileClustering = false); |
1356 | |
1357 | /// If ReorderWhileClustering is set to true, no attempt will be made to |
1358 | /// reduce reordering due to store clustering. |
1359 | std::unique_ptr<ScheduleDAGMutation> |
1360 | createStoreClusterDAGMutation(const TargetInstrInfo *TII, |
1361 | const TargetRegisterInfo *TRI, |
1362 | bool ReorderWhileClustering = false); |
1363 | |
1364 | std::unique_ptr<ScheduleDAGMutation> |
1365 | createCopyConstrainDAGMutation(const TargetInstrInfo *TII, |
1366 | const TargetRegisterInfo *TRI); |
1367 | |
1368 | } // end namespace llvm |
1369 | |
1370 | #endif // LLVM_CODEGEN_MACHINESCHEDULER_H |
1371 | |