1 | //===- MachinePipeliner.h - Machine Software Pipeliner Pass -------------===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | // |
9 | // An implementation of the Swing Modulo Scheduling (SMS) software pipeliner. |
10 | // |
11 | // Software pipelining (SWP) is an instruction scheduling technique for loops |
12 | // that overlap loop iterations and exploits ILP via a compiler transformation. |
13 | // |
14 | // Swing Modulo Scheduling is an implementation of software pipelining |
15 | // that generates schedules that are near optimal in terms of initiation |
16 | // interval, register requirements, and stage count. See the papers: |
17 | // |
18 | // "Swing Modulo Scheduling: A Lifetime-Sensitive Approach", by J. Llosa, |
19 | // A. Gonzalez, E. Ayguade, and M. Valero. In PACT '96 Proceedings of the 1996 |
20 | // Conference on Parallel Architectures and Compilation Techiniques. |
21 | // |
22 | // "Lifetime-Sensitive Modulo Scheduling in a Production Environment", by J. |
23 | // Llosa, E. Ayguade, A. Gonzalez, M. Valero, and J. Eckhardt. In IEEE |
24 | // Transactions on Computers, Vol. 50, No. 3, 2001. |
25 | // |
26 | // "An Implementation of Swing Modulo Scheduling With Extensions for |
27 | // Superblocks", by T. Lattner, Master's Thesis, University of Illinois at |
28 | // Urbana-Champaign, 2005. |
29 | // |
30 | // |
31 | // The SMS algorithm consists of three main steps after computing the minimal |
32 | // initiation interval (MII). |
33 | // 1) Analyze the dependence graph and compute information about each |
34 | // instruction in the graph. |
35 | // 2) Order the nodes (instructions) by priority based upon the heuristics |
36 | // described in the algorithm. |
37 | // 3) Attempt to schedule the nodes in the specified order using the MII. |
38 | // |
39 | //===----------------------------------------------------------------------===// |
40 | #ifndef LLVM_CODEGEN_MACHINEPIPELINER_H |
41 | #define LLVM_CODEGEN_MACHINEPIPELINER_H |
42 | |
43 | #include "llvm/ADT/SetVector.h" |
44 | #include "llvm/CodeGen/DFAPacketizer.h" |
45 | #include "llvm/CodeGen/MachineDominators.h" |
46 | #include "llvm/CodeGen/MachineOptimizationRemarkEmitter.h" |
47 | #include "llvm/CodeGen/RegisterClassInfo.h" |
48 | #include "llvm/CodeGen/ScheduleDAGInstrs.h" |
49 | #include "llvm/CodeGen/ScheduleDAGMutation.h" |
50 | #include "llvm/CodeGen/TargetInstrInfo.h" |
51 | #include "llvm/InitializePasses.h" |
52 | |
53 | #include <deque> |
54 | |
55 | namespace llvm { |
56 | |
57 | class AAResults; |
58 | class NodeSet; |
59 | class SMSchedule; |
60 | |
61 | extern cl::opt<bool> SwpEnableCopyToPhi; |
62 | extern cl::opt<int> SwpForceIssueWidth; |
63 | |
64 | /// The main class in the implementation of the target independent |
65 | /// software pipeliner pass. |
66 | class MachinePipeliner : public MachineFunctionPass { |
67 | public: |
68 | MachineFunction *MF = nullptr; |
69 | MachineOptimizationRemarkEmitter *ORE = nullptr; |
70 | const MachineLoopInfo *MLI = nullptr; |
71 | const MachineDominatorTree *MDT = nullptr; |
72 | const InstrItineraryData *InstrItins = nullptr; |
73 | const TargetInstrInfo *TII = nullptr; |
74 | RegisterClassInfo RegClassInfo; |
75 | bool disabledByPragma = false; |
76 | unsigned II_setByPragma = 0; |
77 | |
78 | #ifndef NDEBUG |
79 | static int NumTries; |
80 | #endif |
81 | |
82 | /// Cache the target analysis information about the loop. |
83 | struct LoopInfo { |
84 | MachineBasicBlock *TBB = nullptr; |
85 | MachineBasicBlock *FBB = nullptr; |
86 | SmallVector<MachineOperand, 4> BrCond; |
87 | MachineInstr *LoopInductionVar = nullptr; |
88 | MachineInstr *LoopCompare = nullptr; |
89 | std::unique_ptr<TargetInstrInfo::PipelinerLoopInfo> LoopPipelinerInfo = |
90 | nullptr; |
91 | }; |
92 | LoopInfo LI; |
93 | |
94 | static char ID; |
95 | |
96 | MachinePipeliner() : MachineFunctionPass(ID) { |
97 | initializeMachinePipelinerPass(*PassRegistry::getPassRegistry()); |
98 | } |
99 | |
100 | bool runOnMachineFunction(MachineFunction &MF) override; |
101 | |
102 | void getAnalysisUsage(AnalysisUsage &AU) const override; |
103 | |
104 | private: |
105 | void preprocessPhiNodes(MachineBasicBlock &B); |
106 | bool canPipelineLoop(MachineLoop &L); |
107 | bool scheduleLoop(MachineLoop &L); |
108 | bool swingModuloScheduler(MachineLoop &L); |
109 | void setPragmaPipelineOptions(MachineLoop &L); |
110 | }; |
111 | |
112 | /// This class builds the dependence graph for the instructions in a loop, |
113 | /// and attempts to schedule the instructions using the SMS algorithm. |
114 | class SwingSchedulerDAG : public ScheduleDAGInstrs { |
115 | MachinePipeliner &Pass; |
116 | /// The minimum initiation interval between iterations for this schedule. |
117 | unsigned MII = 0; |
118 | /// The maximum initiation interval between iterations for this schedule. |
119 | unsigned MAX_II = 0; |
120 | /// Set to true if a valid pipelined schedule is found for the loop. |
121 | bool Scheduled = false; |
122 | MachineLoop &Loop; |
123 | LiveIntervals &LIS; |
124 | const RegisterClassInfo &RegClassInfo; |
125 | unsigned II_setByPragma = 0; |
126 | TargetInstrInfo::PipelinerLoopInfo *LoopPipelinerInfo = nullptr; |
127 | |
128 | /// A toplogical ordering of the SUnits, which is needed for changing |
129 | /// dependences and iterating over the SUnits. |
130 | ScheduleDAGTopologicalSort Topo; |
131 | |
132 | struct NodeInfo { |
133 | int ASAP = 0; |
134 | int ALAP = 0; |
135 | int ZeroLatencyDepth = 0; |
136 | int ZeroLatencyHeight = 0; |
137 | |
138 | NodeInfo() = default; |
139 | }; |
140 | /// Computed properties for each node in the graph. |
141 | std::vector<NodeInfo> ScheduleInfo; |
142 | |
143 | enum OrderKind { BottomUp = 0, TopDown = 1 }; |
144 | /// Computed node ordering for scheduling. |
145 | SetVector<SUnit *> NodeOrder; |
146 | |
147 | using NodeSetType = SmallVector<NodeSet, 8>; |
148 | using ValueMapTy = DenseMap<unsigned, unsigned>; |
149 | using MBBVectorTy = SmallVectorImpl<MachineBasicBlock *>; |
150 | using InstrMapTy = DenseMap<MachineInstr *, MachineInstr *>; |
151 | |
152 | /// Instructions to change when emitting the final schedule. |
153 | DenseMap<SUnit *, std::pair<unsigned, int64_t>> InstrChanges; |
154 | |
155 | /// We may create a new instruction, so remember it because it |
156 | /// must be deleted when the pass is finished. |
157 | DenseMap<MachineInstr*, MachineInstr *> NewMIs; |
158 | |
159 | /// Ordered list of DAG postprocessing steps. |
160 | std::vector<std::unique_ptr<ScheduleDAGMutation>> Mutations; |
161 | |
162 | /// Helper class to implement Johnson's circuit finding algorithm. |
163 | class Circuits { |
164 | std::vector<SUnit> &SUnits; |
165 | SetVector<SUnit *> Stack; |
166 | BitVector Blocked; |
167 | SmallVector<SmallPtrSet<SUnit *, 4>, 10> B; |
168 | SmallVector<SmallVector<int, 4>, 16> AdjK; |
169 | // Node to Index from ScheduleDAGTopologicalSort |
170 | std::vector<int> *Node2Idx; |
171 | unsigned NumPaths = 0u; |
172 | static unsigned MaxPaths; |
173 | |
174 | public: |
175 | Circuits(std::vector<SUnit> &SUs, ScheduleDAGTopologicalSort &Topo) |
176 | : SUnits(SUs), Blocked(SUs.size()), B(SUs.size()), AdjK(SUs.size()) { |
177 | Node2Idx = new std::vector<int>(SUs.size()); |
178 | unsigned Idx = 0; |
179 | for (const auto &NodeNum : Topo) |
180 | Node2Idx->at(n: NodeNum) = Idx++; |
181 | } |
182 | Circuits &operator=(const Circuits &other) = delete; |
183 | Circuits(const Circuits &other) = delete; |
184 | ~Circuits() { delete Node2Idx; } |
185 | |
186 | /// Reset the data structures used in the circuit algorithm. |
187 | void reset() { |
188 | Stack.clear(); |
189 | Blocked.reset(); |
190 | B.assign(NumElts: SUnits.size(), Elt: SmallPtrSet<SUnit *, 4>()); |
191 | NumPaths = 0; |
192 | } |
193 | |
194 | void createAdjacencyStructure(SwingSchedulerDAG *DAG); |
195 | bool circuit(int V, int S, NodeSetType &NodeSets, bool HasBackedge = false); |
196 | void unblock(int U); |
197 | }; |
198 | |
199 | struct CopyToPhiMutation : public ScheduleDAGMutation { |
200 | void apply(ScheduleDAGInstrs *DAG) override; |
201 | }; |
202 | |
203 | public: |
204 | SwingSchedulerDAG(MachinePipeliner &P, MachineLoop &L, LiveIntervals &lis, |
205 | const RegisterClassInfo &rci, unsigned II, |
206 | TargetInstrInfo::PipelinerLoopInfo *PLI) |
207 | : ScheduleDAGInstrs(*P.MF, P.MLI, false), Pass(P), Loop(L), LIS(lis), |
208 | RegClassInfo(rci), II_setByPragma(II), LoopPipelinerInfo(PLI), |
209 | Topo(SUnits, &ExitSU) { |
210 | P.MF->getSubtarget().getSMSMutations(Mutations); |
211 | if (SwpEnableCopyToPhi) |
212 | Mutations.push_back(x: std::make_unique<CopyToPhiMutation>()); |
213 | } |
214 | |
215 | void schedule() override; |
216 | void finishBlock() override; |
217 | |
218 | /// Return true if the loop kernel has been scheduled. |
219 | bool hasNewSchedule() { return Scheduled; } |
220 | |
221 | /// Return the earliest time an instruction may be scheduled. |
222 | int getASAP(SUnit *Node) { return ScheduleInfo[Node->NodeNum].ASAP; } |
223 | |
224 | /// Return the latest time an instruction my be scheduled. |
225 | int getALAP(SUnit *Node) { return ScheduleInfo[Node->NodeNum].ALAP; } |
226 | |
227 | /// The mobility function, which the number of slots in which |
228 | /// an instruction may be scheduled. |
229 | int getMOV(SUnit *Node) { return getALAP(Node) - getASAP(Node); } |
230 | |
231 | /// The depth, in the dependence graph, for a node. |
232 | unsigned getDepth(SUnit *Node) { return Node->getDepth(); } |
233 | |
234 | /// The maximum unweighted length of a path from an arbitrary node to the |
235 | /// given node in which each edge has latency 0 |
236 | int getZeroLatencyDepth(SUnit *Node) { |
237 | return ScheduleInfo[Node->NodeNum].ZeroLatencyDepth; |
238 | } |
239 | |
240 | /// The height, in the dependence graph, for a node. |
241 | unsigned getHeight(SUnit *Node) { return Node->getHeight(); } |
242 | |
243 | /// The maximum unweighted length of a path from the given node to an |
244 | /// arbitrary node in which each edge has latency 0 |
245 | int getZeroLatencyHeight(SUnit *Node) { |
246 | return ScheduleInfo[Node->NodeNum].ZeroLatencyHeight; |
247 | } |
248 | |
249 | /// Return true if the dependence is a back-edge in the data dependence graph. |
250 | /// Since the DAG doesn't contain cycles, we represent a cycle in the graph |
251 | /// using an anti dependence from a Phi to an instruction. |
252 | bool isBackedge(SUnit *Source, const SDep &Dep) { |
253 | if (Dep.getKind() != SDep::Anti) |
254 | return false; |
255 | return Source->getInstr()->isPHI() || Dep.getSUnit()->getInstr()->isPHI(); |
256 | } |
257 | |
258 | bool isLoopCarriedDep(SUnit *Source, const SDep &Dep, bool isSucc = true); |
259 | |
260 | /// The distance function, which indicates that operation V of iteration I |
261 | /// depends on operations U of iteration I-distance. |
262 | unsigned getDistance(SUnit *U, SUnit *V, const SDep &Dep) { |
263 | // Instructions that feed a Phi have a distance of 1. Computing larger |
264 | // values for arrays requires data dependence information. |
265 | if (V->getInstr()->isPHI() && Dep.getKind() == SDep::Anti) |
266 | return 1; |
267 | return 0; |
268 | } |
269 | |
270 | void applyInstrChange(MachineInstr *MI, SMSchedule &Schedule); |
271 | |
272 | void fixupRegisterOverlaps(std::deque<SUnit *> &Instrs); |
273 | |
274 | /// Return the new base register that was stored away for the changed |
275 | /// instruction. |
276 | unsigned getInstrBaseReg(SUnit *SU) const { |
277 | DenseMap<SUnit *, std::pair<unsigned, int64_t>>::const_iterator It = |
278 | InstrChanges.find(Val: SU); |
279 | if (It != InstrChanges.end()) |
280 | return It->second.first; |
281 | return 0; |
282 | } |
283 | |
284 | void addMutation(std::unique_ptr<ScheduleDAGMutation> Mutation) { |
285 | Mutations.push_back(x: std::move(Mutation)); |
286 | } |
287 | |
288 | static bool classof(const ScheduleDAGInstrs *DAG) { return true; } |
289 | |
290 | private: |
291 | void addLoopCarriedDependences(AAResults *AA); |
292 | void updatePhiDependences(); |
293 | void changeDependences(); |
294 | unsigned calculateResMII(); |
295 | unsigned calculateRecMII(NodeSetType &RecNodeSets); |
296 | void findCircuits(NodeSetType &NodeSets); |
297 | void fuseRecs(NodeSetType &NodeSets); |
298 | void removeDuplicateNodes(NodeSetType &NodeSets); |
299 | void computeNodeFunctions(NodeSetType &NodeSets); |
300 | void registerPressureFilter(NodeSetType &NodeSets); |
301 | void colocateNodeSets(NodeSetType &NodeSets); |
302 | void checkNodeSets(NodeSetType &NodeSets); |
303 | void groupRemainingNodes(NodeSetType &NodeSets); |
304 | void addConnectedNodes(SUnit *SU, NodeSet &NewSet, |
305 | SetVector<SUnit *> &NodesAdded); |
306 | void computeNodeOrder(NodeSetType &NodeSets); |
307 | void checkValidNodeOrder(const NodeSetType &Circuits) const; |
308 | bool schedulePipeline(SMSchedule &Schedule); |
309 | bool computeDelta(MachineInstr &MI, unsigned &Delta); |
310 | MachineInstr *findDefInLoop(Register Reg); |
311 | bool canUseLastOffsetValue(MachineInstr *MI, unsigned &BasePos, |
312 | unsigned &OffsetPos, unsigned &NewBase, |
313 | int64_t &NewOffset); |
314 | void postProcessDAG(); |
315 | /// Set the Minimum Initiation Interval for this schedule attempt. |
316 | void setMII(unsigned ResMII, unsigned RecMII); |
317 | /// Set the Maximum Initiation Interval for this schedule attempt. |
318 | void setMAX_II(); |
319 | }; |
320 | |
321 | /// A NodeSet contains a set of SUnit DAG nodes with additional information |
322 | /// that assigns a priority to the set. |
323 | class NodeSet { |
324 | SetVector<SUnit *> Nodes; |
325 | bool HasRecurrence = false; |
326 | unsigned RecMII = 0; |
327 | int MaxMOV = 0; |
328 | unsigned MaxDepth = 0; |
329 | unsigned Colocate = 0; |
330 | SUnit *ExceedPressure = nullptr; |
331 | unsigned Latency = 0; |
332 | |
333 | public: |
334 | using iterator = SetVector<SUnit *>::const_iterator; |
335 | |
336 | NodeSet() = default; |
337 | NodeSet(iterator S, iterator E) : Nodes(S, E), HasRecurrence(true) { |
338 | Latency = 0; |
339 | for (const SUnit *Node : Nodes) { |
340 | DenseMap<SUnit *, unsigned> SuccSUnitLatency; |
341 | for (const SDep &Succ : Node->Succs) { |
342 | auto SuccSUnit = Succ.getSUnit(); |
343 | if (!Nodes.count(key: SuccSUnit)) |
344 | continue; |
345 | unsigned CurLatency = Succ.getLatency(); |
346 | unsigned MaxLatency = 0; |
347 | if (SuccSUnitLatency.count(Val: SuccSUnit)) |
348 | MaxLatency = SuccSUnitLatency[SuccSUnit]; |
349 | if (CurLatency > MaxLatency) |
350 | SuccSUnitLatency[SuccSUnit] = CurLatency; |
351 | } |
352 | for (auto SUnitLatency : SuccSUnitLatency) |
353 | Latency += SUnitLatency.second; |
354 | } |
355 | } |
356 | |
357 | bool insert(SUnit *SU) { return Nodes.insert(X: SU); } |
358 | |
359 | void insert(iterator S, iterator E) { Nodes.insert(Start: S, End: E); } |
360 | |
361 | template <typename UnaryPredicate> bool remove_if(UnaryPredicate P) { |
362 | return Nodes.remove_if(P); |
363 | } |
364 | |
365 | unsigned count(SUnit *SU) const { return Nodes.count(key: SU); } |
366 | |
367 | bool hasRecurrence() { return HasRecurrence; }; |
368 | |
369 | unsigned size() const { return Nodes.size(); } |
370 | |
371 | bool empty() const { return Nodes.empty(); } |
372 | |
373 | SUnit *getNode(unsigned i) const { return Nodes[i]; }; |
374 | |
375 | void setRecMII(unsigned mii) { RecMII = mii; }; |
376 | |
377 | void setColocate(unsigned c) { Colocate = c; }; |
378 | |
379 | void setExceedPressure(SUnit *SU) { ExceedPressure = SU; } |
380 | |
381 | bool isExceedSU(SUnit *SU) { return ExceedPressure == SU; } |
382 | |
383 | int compareRecMII(NodeSet &RHS) { return RecMII - RHS.RecMII; } |
384 | |
385 | int getRecMII() { return RecMII; } |
386 | |
387 | /// Summarize node functions for the entire node set. |
388 | void computeNodeSetInfo(SwingSchedulerDAG *SSD) { |
389 | for (SUnit *SU : *this) { |
390 | MaxMOV = std::max(a: MaxMOV, b: SSD->getMOV(Node: SU)); |
391 | MaxDepth = std::max(a: MaxDepth, b: SSD->getDepth(Node: SU)); |
392 | } |
393 | } |
394 | |
395 | unsigned getLatency() { return Latency; } |
396 | |
397 | unsigned getMaxDepth() { return MaxDepth; } |
398 | |
399 | void clear() { |
400 | Nodes.clear(); |
401 | RecMII = 0; |
402 | HasRecurrence = false; |
403 | MaxMOV = 0; |
404 | MaxDepth = 0; |
405 | Colocate = 0; |
406 | ExceedPressure = nullptr; |
407 | } |
408 | |
409 | operator SetVector<SUnit *> &() { return Nodes; } |
410 | |
411 | /// Sort the node sets by importance. First, rank them by recurrence MII, |
412 | /// then by mobility (least mobile done first), and finally by depth. |
413 | /// Each node set may contain a colocate value which is used as the first |
414 | /// tie breaker, if it's set. |
415 | bool operator>(const NodeSet &RHS) const { |
416 | if (RecMII == RHS.RecMII) { |
417 | if (Colocate != 0 && RHS.Colocate != 0 && Colocate != RHS.Colocate) |
418 | return Colocate < RHS.Colocate; |
419 | if (MaxMOV == RHS.MaxMOV) |
420 | return MaxDepth > RHS.MaxDepth; |
421 | return MaxMOV < RHS.MaxMOV; |
422 | } |
423 | return RecMII > RHS.RecMII; |
424 | } |
425 | |
426 | bool operator==(const NodeSet &RHS) const { |
427 | return RecMII == RHS.RecMII && MaxMOV == RHS.MaxMOV && |
428 | MaxDepth == RHS.MaxDepth; |
429 | } |
430 | |
431 | bool operator!=(const NodeSet &RHS) const { return !operator==(RHS); } |
432 | |
433 | iterator begin() { return Nodes.begin(); } |
434 | iterator end() { return Nodes.end(); } |
435 | void print(raw_ostream &os) const; |
436 | |
437 | #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) |
438 | LLVM_DUMP_METHOD void dump() const; |
439 | #endif |
440 | }; |
441 | |
442 | // 16 was selected based on the number of ProcResource kinds for all |
443 | // existing Subtargets, so that SmallVector don't need to resize too often. |
444 | static const int DefaultProcResSize = 16; |
445 | |
446 | class ResourceManager { |
447 | private: |
448 | const MCSubtargetInfo *STI; |
449 | const MCSchedModel &SM; |
450 | const TargetSubtargetInfo *ST; |
451 | const TargetInstrInfo *TII; |
452 | SwingSchedulerDAG *DAG; |
453 | const bool UseDFA; |
454 | /// DFA resources for each slot |
455 | llvm::SmallVector<std::unique_ptr<DFAPacketizer>> DFAResources; |
456 | /// Modulo Reservation Table. When a resource with ID R is consumed in cycle |
457 | /// C, it is counted in MRT[C mod II][R]. (Used when UseDFA == F) |
458 | llvm::SmallVector<llvm::SmallVector<uint64_t, DefaultProcResSize>> MRT; |
459 | /// The number of scheduled micro operations for each slot. Micro operations |
460 | /// are assumed to be scheduled one per cycle, starting with the cycle in |
461 | /// which the instruction is scheduled. |
462 | llvm::SmallVector<int> NumScheduledMops; |
463 | /// Each processor resource is associated with a so-called processor resource |
464 | /// mask. This vector allows to correlate processor resource IDs with |
465 | /// processor resource masks. There is exactly one element per each processor |
466 | /// resource declared by the scheduling model. |
467 | llvm::SmallVector<uint64_t, DefaultProcResSize> ProcResourceMasks; |
468 | int InitiationInterval = 0; |
469 | /// The number of micro operations that can be scheduled at a cycle. |
470 | int IssueWidth; |
471 | |
472 | int calculateResMIIDFA() const; |
473 | /// Check if MRT is overbooked |
474 | bool isOverbooked() const; |
475 | /// Reserve resources on MRT |
476 | void reserveResources(const MCSchedClassDesc *SCDesc, int Cycle); |
477 | /// Unreserve resources on MRT |
478 | void unreserveResources(const MCSchedClassDesc *SCDesc, int Cycle); |
479 | |
480 | /// Return M satisfying Dividend = Divisor * X + M, 0 < M < Divisor. |
481 | /// The slot on MRT to reserve a resource for the cycle C is positiveModulo(C, |
482 | /// II). |
483 | int positiveModulo(int Dividend, int Divisor) const { |
484 | assert(Divisor > 0); |
485 | int R = Dividend % Divisor; |
486 | if (R < 0) |
487 | R += Divisor; |
488 | return R; |
489 | } |
490 | |
491 | #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) |
492 | LLVM_DUMP_METHOD void dumpMRT() const; |
493 | #endif |
494 | |
495 | public: |
496 | ResourceManager(const TargetSubtargetInfo *ST, SwingSchedulerDAG *DAG) |
497 | : STI(ST), SM(ST->getSchedModel()), ST(ST), TII(ST->getInstrInfo()), |
498 | DAG(DAG), UseDFA(ST->useDFAforSMS()), |
499 | ProcResourceMasks(SM.getNumProcResourceKinds(), 0), |
500 | IssueWidth(SM.IssueWidth) { |
501 | initProcResourceVectors(SM, Masks&: ProcResourceMasks); |
502 | if (IssueWidth <= 0) |
503 | // If IssueWidth is not specified, set a sufficiently large value |
504 | IssueWidth = 100; |
505 | if (SwpForceIssueWidth > 0) |
506 | IssueWidth = SwpForceIssueWidth; |
507 | } |
508 | |
509 | void initProcResourceVectors(const MCSchedModel &SM, |
510 | SmallVectorImpl<uint64_t> &Masks); |
511 | |
512 | /// Check if the resources occupied by a machine instruction are available |
513 | /// in the current state. |
514 | bool canReserveResources(SUnit &SU, int Cycle); |
515 | |
516 | /// Reserve the resources occupied by a machine instruction and change the |
517 | /// current state to reflect that change. |
518 | void reserveResources(SUnit &SU, int Cycle); |
519 | |
520 | int calculateResMII() const; |
521 | |
522 | /// Initialize resources with the initiation interval II. |
523 | void init(int II); |
524 | }; |
525 | |
526 | /// This class represents the scheduled code. The main data structure is a |
527 | /// map from scheduled cycle to instructions. During scheduling, the |
528 | /// data structure explicitly represents all stages/iterations. When |
529 | /// the algorithm finshes, the schedule is collapsed into a single stage, |
530 | /// which represents instructions from different loop iterations. |
531 | /// |
532 | /// The SMS algorithm allows negative values for cycles, so the first cycle |
533 | /// in the schedule is the smallest cycle value. |
534 | class SMSchedule { |
535 | private: |
536 | /// Map from execution cycle to instructions. |
537 | DenseMap<int, std::deque<SUnit *>> ScheduledInstrs; |
538 | |
539 | /// Map from instruction to execution cycle. |
540 | std::map<SUnit *, int> InstrToCycle; |
541 | |
542 | /// Keep track of the first cycle value in the schedule. It starts |
543 | /// as zero, but the algorithm allows negative values. |
544 | int FirstCycle = 0; |
545 | |
546 | /// Keep track of the last cycle value in the schedule. |
547 | int LastCycle = 0; |
548 | |
549 | /// The initiation interval (II) for the schedule. |
550 | int InitiationInterval = 0; |
551 | |
552 | /// Target machine information. |
553 | const TargetSubtargetInfo &ST; |
554 | |
555 | /// Virtual register information. |
556 | MachineRegisterInfo &MRI; |
557 | |
558 | ResourceManager ProcItinResources; |
559 | |
560 | public: |
561 | SMSchedule(MachineFunction *mf, SwingSchedulerDAG *DAG) |
562 | : ST(mf->getSubtarget()), MRI(mf->getRegInfo()), |
563 | ProcItinResources(&ST, DAG) {} |
564 | |
565 | void reset() { |
566 | ScheduledInstrs.clear(); |
567 | InstrToCycle.clear(); |
568 | FirstCycle = 0; |
569 | LastCycle = 0; |
570 | InitiationInterval = 0; |
571 | } |
572 | |
573 | /// Set the initiation interval for this schedule. |
574 | void setInitiationInterval(int ii) { |
575 | InitiationInterval = ii; |
576 | ProcItinResources.init(II: ii); |
577 | } |
578 | |
579 | /// Return the initiation interval for this schedule. |
580 | int getInitiationInterval() const { return InitiationInterval; } |
581 | |
582 | /// Return the first cycle in the completed schedule. This |
583 | /// can be a negative value. |
584 | int getFirstCycle() const { return FirstCycle; } |
585 | |
586 | /// Return the last cycle in the finalized schedule. |
587 | int getFinalCycle() const { return FirstCycle + InitiationInterval - 1; } |
588 | |
589 | /// Return the cycle of the earliest scheduled instruction in the dependence |
590 | /// chain. |
591 | int earliestCycleInChain(const SDep &Dep); |
592 | |
593 | /// Return the cycle of the latest scheduled instruction in the dependence |
594 | /// chain. |
595 | int latestCycleInChain(const SDep &Dep); |
596 | |
597 | void computeStart(SUnit *SU, int *MaxEarlyStart, int *MinLateStart, |
598 | int *MinEnd, int *MaxStart, int II, SwingSchedulerDAG *DAG); |
599 | bool insert(SUnit *SU, int StartCycle, int EndCycle, int II); |
600 | |
601 | /// Iterators for the cycle to instruction map. |
602 | using sched_iterator = DenseMap<int, std::deque<SUnit *>>::iterator; |
603 | using const_sched_iterator = |
604 | DenseMap<int, std::deque<SUnit *>>::const_iterator; |
605 | |
606 | /// Return true if the instruction is scheduled at the specified stage. |
607 | bool isScheduledAtStage(SUnit *SU, unsigned StageNum) { |
608 | return (stageScheduled(SU) == (int)StageNum); |
609 | } |
610 | |
611 | /// Return the stage for a scheduled instruction. Return -1 if |
612 | /// the instruction has not been scheduled. |
613 | int stageScheduled(SUnit *SU) const { |
614 | std::map<SUnit *, int>::const_iterator it = InstrToCycle.find(x: SU); |
615 | if (it == InstrToCycle.end()) |
616 | return -1; |
617 | return (it->second - FirstCycle) / InitiationInterval; |
618 | } |
619 | |
620 | /// Return the cycle for a scheduled instruction. This function normalizes |
621 | /// the first cycle to be 0. |
622 | unsigned cycleScheduled(SUnit *SU) const { |
623 | std::map<SUnit *, int>::const_iterator it = InstrToCycle.find(x: SU); |
624 | assert(it != InstrToCycle.end() && "Instruction hasn't been scheduled." ); |
625 | return (it->second - FirstCycle) % InitiationInterval; |
626 | } |
627 | |
628 | /// Return the maximum stage count needed for this schedule. |
629 | unsigned getMaxStageCount() { |
630 | return (LastCycle - FirstCycle) / InitiationInterval; |
631 | } |
632 | |
633 | /// Return the instructions that are scheduled at the specified cycle. |
634 | std::deque<SUnit *> &getInstructions(int cycle) { |
635 | return ScheduledInstrs[cycle]; |
636 | } |
637 | |
638 | SmallSet<SUnit *, 8> |
639 | computeUnpipelineableNodes(SwingSchedulerDAG *SSD, |
640 | TargetInstrInfo::PipelinerLoopInfo *PLI); |
641 | |
642 | std::deque<SUnit *> |
643 | reorderInstructions(const SwingSchedulerDAG *SSD, |
644 | const std::deque<SUnit *> &Instrs) const; |
645 | |
646 | bool |
647 | normalizeNonPipelinedInstructions(SwingSchedulerDAG *SSD, |
648 | TargetInstrInfo::PipelinerLoopInfo *PLI); |
649 | bool isValidSchedule(SwingSchedulerDAG *SSD); |
650 | void finalizeSchedule(SwingSchedulerDAG *SSD); |
651 | void orderDependence(const SwingSchedulerDAG *SSD, SUnit *SU, |
652 | std::deque<SUnit *> &Insts) const; |
653 | bool isLoopCarried(const SwingSchedulerDAG *SSD, MachineInstr &Phi) const; |
654 | bool isLoopCarriedDefOfUse(const SwingSchedulerDAG *SSD, MachineInstr *Def, |
655 | MachineOperand &MO) const; |
656 | void print(raw_ostream &os) const; |
657 | void dump() const; |
658 | }; |
659 | |
660 | } // end namespace llvm |
661 | |
662 | #endif // LLVM_CODEGEN_MACHINEPIPELINER_H |
663 | |