1 | //===- ScheduleOptimizer.cpp - Calculate an optimized schedule ------------===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | // |
9 | // This pass generates an entirely new schedule tree from the data dependences |
10 | // and iteration domains. The new schedule tree is computed in two steps: |
11 | // |
12 | // 1) The isl scheduling optimizer is run |
13 | // |
14 | // The isl scheduling optimizer creates a new schedule tree that maximizes |
15 | // parallelism and tileability and minimizes data-dependence distances. The |
16 | // algorithm used is a modified version of the ``Pluto'' algorithm: |
17 | // |
18 | // U. Bondhugula, A. Hartono, J. Ramanujam, and P. Sadayappan. |
19 | // A Practical Automatic Polyhedral Parallelizer and Locality Optimizer. |
20 | // In Proceedings of the 2008 ACM SIGPLAN Conference On Programming Language |
21 | // Design and Implementation, PLDI ’08, pages 101–113. ACM, 2008. |
22 | // |
23 | // 2) A set of post-scheduling transformations is applied on the schedule tree. |
24 | // |
25 | // These optimizations include: |
26 | // |
27 | // - Tiling of the innermost tilable bands |
28 | // - Prevectorization - The choice of a possible outer loop that is strip-mined |
29 | // to the innermost level to enable inner-loop |
30 | // vectorization. |
31 | // - Some optimizations for spatial locality are also planned. |
32 | // |
33 | // For a detailed description of the schedule tree itself please see section 6 |
34 | // of: |
35 | // |
36 | // Polyhedral AST generation is more than scanning polyhedra |
37 | // Tobias Grosser, Sven Verdoolaege, Albert Cohen |
38 | // ACM Transactions on Programming Languages and Systems (TOPLAS), |
39 | // 37(4), July 2015 |
40 | // http://www.grosser.es/#pub-polyhedral-AST-generation |
41 | // |
42 | // This publication also contains a detailed discussion of the different options |
43 | // for polyhedral loop unrolling, full/partial tile separation and other uses |
44 | // of the schedule tree. |
45 | // |
46 | //===----------------------------------------------------------------------===// |
47 | |
48 | #include "polly/ScheduleOptimizer.h" |
49 | #include "polly/CodeGen/CodeGeneration.h" |
50 | #include "polly/DependenceInfo.h" |
51 | #include "polly/ManualOptimizer.h" |
52 | #include "polly/MatmulOptimizer.h" |
53 | #include "polly/Options.h" |
54 | #include "polly/ScheduleTreeTransform.h" |
55 | #include "polly/Support/ISLOStream.h" |
56 | #include "polly/Support/ISLTools.h" |
57 | #include "llvm/ADT/Sequence.h" |
58 | #include "llvm/ADT/Statistic.h" |
59 | #include "llvm/Analysis/OptimizationRemarkEmitter.h" |
60 | #include "llvm/InitializePasses.h" |
61 | #include "llvm/Support/CommandLine.h" |
62 | #include "isl/options.h" |
63 | |
64 | using namespace llvm; |
65 | using namespace polly; |
66 | |
67 | namespace llvm { |
68 | class Loop; |
69 | class Module; |
70 | } // namespace llvm |
71 | |
72 | #include "polly/Support/PollyDebug.h" |
73 | #define DEBUG_TYPE "polly-opt-isl" |
74 | |
75 | static cl::opt<std::string> |
76 | OptimizeDeps("polly-opt-optimize-only" , |
77 | cl::desc("Only a certain kind of dependences (all/raw)" ), |
78 | cl::Hidden, cl::init(Val: "all" ), cl::cat(PollyCategory)); |
79 | |
80 | static cl::opt<std::string> |
81 | SimplifyDeps("polly-opt-simplify-deps" , |
82 | cl::desc("Dependences should be simplified (yes/no)" ), |
83 | cl::Hidden, cl::init(Val: "yes" ), cl::cat(PollyCategory)); |
84 | |
85 | static cl::opt<int> MaxConstantTerm( |
86 | "polly-opt-max-constant-term" , |
87 | cl::desc("The maximal constant term allowed (-1 is unlimited)" ), cl::Hidden, |
88 | cl::init(Val: 20), cl::cat(PollyCategory)); |
89 | |
90 | static cl::opt<int> MaxCoefficient( |
91 | "polly-opt-max-coefficient" , |
92 | cl::desc("The maximal coefficient allowed (-1 is unlimited)" ), cl::Hidden, |
93 | cl::init(Val: 20), cl::cat(PollyCategory)); |
94 | |
95 | static cl::opt<std::string> |
96 | MaximizeBandDepth("polly-opt-maximize-bands" , |
97 | cl::desc("Maximize the band depth (yes/no)" ), cl::Hidden, |
98 | cl::init(Val: "yes" ), cl::cat(PollyCategory)); |
99 | |
100 | static cl::opt<int> |
101 | ScheduleComputeOut("polly-schedule-computeout" , |
102 | cl::desc("Bound the scheduler by maximal amount" |
103 | "of computational steps. " ), |
104 | cl::Hidden, cl::init(Val: 300000), cl::ZeroOrMore, |
105 | cl::cat(PollyCategory)); |
106 | |
107 | static cl::opt<bool> |
108 | GreedyFusion("polly-loopfusion-greedy" , |
109 | cl::desc("Aggressively try to fuse everything" ), cl::Hidden, |
110 | cl::cat(PollyCategory)); |
111 | |
112 | static cl::opt<std::string> OuterCoincidence( |
113 | "polly-opt-outer-coincidence" , |
114 | cl::desc("Try to construct schedules where the outer member of each band " |
115 | "satisfies the coincidence constraints (yes/no)" ), |
116 | cl::Hidden, cl::init(Val: "no" ), cl::cat(PollyCategory)); |
117 | |
118 | static cl::opt<int> PrevectorWidth( |
119 | "polly-prevect-width" , |
120 | cl::desc( |
121 | "The number of loop iterations to strip-mine for pre-vectorization" ), |
122 | cl::Hidden, cl::init(Val: 4), cl::cat(PollyCategory)); |
123 | |
124 | static cl::opt<bool> FirstLevelTiling("polly-tiling" , |
125 | cl::desc("Enable loop tiling" ), |
126 | cl::init(Val: true), cl::cat(PollyCategory)); |
127 | |
128 | static cl::opt<int> FirstLevelDefaultTileSize( |
129 | "polly-default-tile-size" , |
130 | cl::desc("The default tile size (if not enough were provided by" |
131 | " --polly-tile-sizes)" ), |
132 | cl::Hidden, cl::init(Val: 32), cl::cat(PollyCategory)); |
133 | |
134 | static cl::list<int> |
135 | FirstLevelTileSizes("polly-tile-sizes" , |
136 | cl::desc("A tile size for each loop dimension, filled " |
137 | "with --polly-default-tile-size" ), |
138 | cl::Hidden, cl::CommaSeparated, cl::cat(PollyCategory)); |
139 | |
140 | static cl::opt<bool> |
141 | SecondLevelTiling("polly-2nd-level-tiling" , |
142 | cl::desc("Enable a 2nd level loop of loop tiling" ), |
143 | cl::cat(PollyCategory)); |
144 | |
145 | static cl::opt<int> SecondLevelDefaultTileSize( |
146 | "polly-2nd-level-default-tile-size" , |
147 | cl::desc("The default 2nd-level tile size (if not enough were provided by" |
148 | " --polly-2nd-level-tile-sizes)" ), |
149 | cl::Hidden, cl::init(Val: 16), cl::cat(PollyCategory)); |
150 | |
151 | static cl::list<int> |
152 | SecondLevelTileSizes("polly-2nd-level-tile-sizes" , |
153 | cl::desc("A tile size for each loop dimension, filled " |
154 | "with --polly-default-tile-size" ), |
155 | cl::Hidden, cl::CommaSeparated, |
156 | cl::cat(PollyCategory)); |
157 | |
158 | static cl::opt<bool> RegisterTiling("polly-register-tiling" , |
159 | cl::desc("Enable register tiling" ), |
160 | cl::cat(PollyCategory)); |
161 | |
162 | static cl::opt<int> RegisterDefaultTileSize( |
163 | "polly-register-tiling-default-tile-size" , |
164 | cl::desc("The default register tile size (if not enough were provided by" |
165 | " --polly-register-tile-sizes)" ), |
166 | cl::Hidden, cl::init(Val: 2), cl::cat(PollyCategory)); |
167 | |
168 | static cl::list<int> |
169 | RegisterTileSizes("polly-register-tile-sizes" , |
170 | cl::desc("A tile size for each loop dimension, filled " |
171 | "with --polly-register-tile-size" ), |
172 | cl::Hidden, cl::CommaSeparated, cl::cat(PollyCategory)); |
173 | |
174 | static cl::opt<bool> PragmaBasedOpts( |
175 | "polly-pragma-based-opts" , |
176 | cl::desc("Apply user-directed transformation from metadata" ), |
177 | cl::init(Val: true), cl::cat(PollyCategory)); |
178 | |
179 | static cl::opt<bool> EnableReschedule("polly-reschedule" , |
180 | cl::desc("Optimize SCoPs using ISL" ), |
181 | cl::init(Val: true), cl::cat(PollyCategory)); |
182 | |
183 | static cl::opt<bool> |
184 | PMBasedOpts("polly-pattern-matching-based-opts" , |
185 | cl::desc("Perform optimizations based on pattern matching" ), |
186 | cl::init(Val: true), cl::cat(PollyCategory)); |
187 | |
188 | static cl::opt<bool> |
189 | EnablePostopts("polly-postopts" , |
190 | cl::desc("Apply post-rescheduling optimizations such as " |
191 | "tiling (requires -polly-reschedule)" ), |
192 | cl::init(Val: true), cl::cat(PollyCategory)); |
193 | |
194 | static cl::opt<bool> OptimizedScops( |
195 | "polly-optimized-scops" , |
196 | cl::desc("Polly - Dump polyhedral description of Scops optimized with " |
197 | "the isl scheduling optimizer and the set of post-scheduling " |
198 | "transformations is applied on the schedule tree" ), |
199 | cl::cat(PollyCategory)); |
200 | |
201 | STATISTIC(ScopsProcessed, "Number of scops processed" ); |
202 | STATISTIC(ScopsRescheduled, "Number of scops rescheduled" ); |
203 | STATISTIC(ScopsOptimized, "Number of scops optimized" ); |
204 | |
205 | STATISTIC(NumAffineLoopsOptimized, "Number of affine loops optimized" ); |
206 | STATISTIC(NumBoxedLoopsOptimized, "Number of boxed loops optimized" ); |
207 | |
208 | #define THREE_STATISTICS(VARNAME, DESC) \ |
209 | static Statistic VARNAME[3] = { \ |
210 | {DEBUG_TYPE, #VARNAME "0", DESC " (original)"}, \ |
211 | {DEBUG_TYPE, #VARNAME "1", DESC " (after scheduler)"}, \ |
212 | {DEBUG_TYPE, #VARNAME "2", DESC " (after optimizer)"}} |
213 | |
214 | THREE_STATISTICS(NumBands, "Number of bands" ); |
215 | THREE_STATISTICS(NumBandMembers, "Number of band members" ); |
216 | THREE_STATISTICS(NumCoincident, "Number of coincident band members" ); |
217 | THREE_STATISTICS(NumPermutable, "Number of permutable bands" ); |
218 | THREE_STATISTICS(NumFilters, "Number of filter nodes" ); |
219 | THREE_STATISTICS(NumExtension, "Number of extension nodes" ); |
220 | |
221 | STATISTIC(FirstLevelTileOpts, "Number of first level tiling applied" ); |
222 | STATISTIC(SecondLevelTileOpts, "Number of second level tiling applied" ); |
223 | STATISTIC(RegisterTileOpts, "Number of register tiling applied" ); |
224 | STATISTIC(PrevectOpts, "Number of strip-mining for prevectorization applied" ); |
225 | STATISTIC(MatMulOpts, |
226 | "Number of matrix multiplication patterns detected and optimized" ); |
227 | |
228 | namespace { |
229 | /// Additional parameters of the schedule optimizer. |
230 | /// |
231 | /// Target Transform Info and the SCoP dependencies used by the schedule |
232 | /// optimizer. |
233 | struct OptimizerAdditionalInfoTy { |
234 | const llvm::TargetTransformInfo *TTI; |
235 | const Dependences *D; |
236 | bool PatternOpts; |
237 | bool Postopts; |
238 | bool Prevect; |
239 | bool &DepsChanged; |
240 | }; |
241 | |
242 | class ScheduleTreeOptimizer final { |
243 | public: |
244 | /// Apply schedule tree transformations. |
245 | /// |
246 | /// This function takes an (possibly already optimized) schedule tree and |
247 | /// applies a set of additional optimizations on the schedule tree. The |
248 | /// transformations applied include: |
249 | /// |
250 | /// - Pattern-based optimizations |
251 | /// - Tiling |
252 | /// - Prevectorization |
253 | /// |
254 | /// @param Schedule The schedule object the transformations will be applied |
255 | /// to. |
256 | /// @param OAI Target Transform Info and the SCoP dependencies. |
257 | /// @returns The transformed schedule. |
258 | static isl::schedule |
259 | optimizeSchedule(isl::schedule Schedule, |
260 | const OptimizerAdditionalInfoTy *OAI = nullptr); |
261 | |
262 | /// Apply schedule tree transformations. |
263 | /// |
264 | /// This function takes a node in an (possibly already optimized) schedule |
265 | /// tree and applies a set of additional optimizations on this schedule tree |
266 | /// node and its descendants. The transformations applied include: |
267 | /// |
268 | /// - Pattern-based optimizations |
269 | /// - Tiling |
270 | /// - Prevectorization |
271 | /// |
272 | /// @param Node The schedule object post-transformations will be applied to. |
273 | /// @param OAI Target Transform Info and the SCoP dependencies. |
274 | /// @returns The transformed schedule. |
275 | static isl::schedule_node |
276 | optimizeScheduleNode(isl::schedule_node Node, |
277 | const OptimizerAdditionalInfoTy *OAI = nullptr); |
278 | |
279 | /// Decide if the @p NewSchedule is profitable for @p S. |
280 | /// |
281 | /// @param S The SCoP we optimize. |
282 | /// @param NewSchedule The new schedule we computed. |
283 | /// |
284 | /// @return True, if we believe @p NewSchedule is an improvement for @p S. |
285 | static bool isProfitableSchedule(polly::Scop &S, isl::schedule NewSchedule); |
286 | |
287 | /// Isolate a set of partial tile prefixes. |
288 | /// |
289 | /// This set should ensure that it contains only partial tile prefixes that |
290 | /// have exactly VectorWidth iterations. |
291 | /// |
292 | /// @param Node A schedule node band, which is a parent of a band node, |
293 | /// that contains a vector loop. |
294 | /// @return Modified isl_schedule_node. |
295 | static isl::schedule_node isolateFullPartialTiles(isl::schedule_node Node, |
296 | int VectorWidth); |
297 | |
298 | private: |
299 | /// Check if this node is a band node we want to tile. |
300 | /// |
301 | /// We look for innermost band nodes where individual dimensions are marked as |
302 | /// permutable. |
303 | /// |
304 | /// @param Node The node to check. |
305 | static bool isTileableBandNode(isl::schedule_node Node); |
306 | |
307 | /// Check if this node is a band node we want to transform using pattern |
308 | /// matching. |
309 | /// |
310 | /// We look for innermost band nodes where individual dimensions are marked as |
311 | /// permutable. There is no restriction on the number of individual |
312 | /// dimensions. |
313 | /// |
314 | /// @param Node The node to check. |
315 | static bool isPMOptimizableBandNode(isl::schedule_node Node); |
316 | |
317 | /// Pre-vectorizes one scheduling dimension of a schedule band. |
318 | /// |
319 | /// prevectSchedBand splits out the dimension DimToVectorize, tiles it and |
320 | /// sinks the resulting point loop. |
321 | /// |
322 | /// Example (DimToVectorize=0, VectorWidth=4): |
323 | /// |
324 | /// | Before transformation: |
325 | /// | |
326 | /// | A[i,j] -> [i,j] |
327 | /// | |
328 | /// | for (i = 0; i < 128; i++) |
329 | /// | for (j = 0; j < 128; j++) |
330 | /// | A(i,j); |
331 | /// |
332 | /// | After transformation: |
333 | /// | |
334 | /// | for (it = 0; it < 32; it+=1) |
335 | /// | for (j = 0; j < 128; j++) |
336 | /// | for (ip = 0; ip <= 3; ip++) |
337 | /// | A(4 * it + ip,j); |
338 | /// |
339 | /// The goal of this transformation is to create a trivially vectorizable |
340 | /// loop. This means a parallel loop at the innermost level that has a |
341 | /// constant number of iterations corresponding to the target vector width. |
342 | /// |
343 | /// This transformation creates a loop at the innermost level. The loop has |
344 | /// a constant number of iterations, if the number of loop iterations at |
345 | /// DimToVectorize can be divided by VectorWidth. The default VectorWidth is |
346 | /// currently constant and not yet target specific. This function does not |
347 | /// reason about parallelism. |
348 | static isl::schedule_node prevectSchedBand(isl::schedule_node Node, |
349 | unsigned DimToVectorize, |
350 | int VectorWidth); |
351 | |
352 | /// Apply additional optimizations on the bands in the schedule tree. |
353 | /// |
354 | /// We are looking for an innermost band node and apply the following |
355 | /// transformations: |
356 | /// |
357 | /// - Tile the band |
358 | /// - if the band is tileable |
359 | /// - if the band has more than one loop dimension |
360 | /// |
361 | /// - Prevectorize the schedule of the band (or the point loop in case of |
362 | /// tiling). |
363 | /// - if vectorization is enabled |
364 | /// |
365 | /// @param Node The schedule node to (possibly) optimize. |
366 | /// @param User A pointer to forward some use information |
367 | /// (currently unused). |
368 | static isl_schedule_node *optimizeBand(isl_schedule_node *Node, void *User); |
369 | |
370 | /// Apply tiling optimizations on the bands in the schedule tree. |
371 | /// |
372 | /// @param Node The schedule node to (possibly) optimize. |
373 | static isl::schedule_node applyTileBandOpt(isl::schedule_node Node); |
374 | |
375 | /// Apply prevectorization on the bands in the schedule tree. |
376 | /// |
377 | /// @param Node The schedule node to (possibly) prevectorize. |
378 | static isl::schedule_node applyPrevectBandOpt(isl::schedule_node Node); |
379 | }; |
380 | |
381 | isl::schedule_node |
382 | ScheduleTreeOptimizer::isolateFullPartialTiles(isl::schedule_node Node, |
383 | int VectorWidth) { |
384 | assert(isl_schedule_node_get_type(Node.get()) == isl_schedule_node_band); |
385 | Node = Node.child(pos: 0).child(pos: 0); |
386 | isl::union_map SchedRelUMap = Node.get_prefix_schedule_relation(); |
387 | isl::union_set ScheduleRangeUSet = SchedRelUMap.range(); |
388 | isl::set ScheduleRange{ScheduleRangeUSet}; |
389 | isl::set IsolateDomain = getPartialTilePrefixes(ScheduleRange, VectorWidth); |
390 | auto AtomicOption = getDimOptions(Ctx: IsolateDomain.ctx(), Option: "atomic" ); |
391 | isl::union_set IsolateOption = getIsolateOptions(IsolateDomain, OutDimsNum: 1); |
392 | Node = Node.parent().parent(); |
393 | isl::union_set Options = IsolateOption.unite(uset2: AtomicOption); |
394 | isl::schedule_node_band Result = |
395 | Node.as<isl::schedule_node_band>().set_ast_build_options(Options); |
396 | return Result; |
397 | } |
398 | |
399 | struct InsertSimdMarkers final : ScheduleNodeRewriter<InsertSimdMarkers> { |
400 | isl::schedule_node visitBand(isl::schedule_node_band Band) { |
401 | isl::schedule_node Node = visitChildren(Node: Band); |
402 | |
403 | // Only add SIMD markers to innermost bands. |
404 | if (!Node.first_child().isa<isl::schedule_node_leaf>()) |
405 | return Node; |
406 | |
407 | isl::id LoopMarker = isl::id::alloc(ctx: Band.ctx(), name: "SIMD" , user: nullptr); |
408 | return Band.insert_mark(mark: LoopMarker); |
409 | } |
410 | }; |
411 | |
412 | isl::schedule_node ScheduleTreeOptimizer::prevectSchedBand( |
413 | isl::schedule_node Node, unsigned DimToVectorize, int VectorWidth) { |
414 | assert(isl_schedule_node_get_type(Node.get()) == isl_schedule_node_band); |
415 | |
416 | auto Space = isl::manage(ptr: isl_schedule_node_band_get_space(node: Node.get())); |
417 | unsigned ScheduleDimensions = unsignedFromIslSize(Size: Space.dim(type: isl::dim::set)); |
418 | assert(DimToVectorize < ScheduleDimensions); |
419 | |
420 | if (DimToVectorize > 0) { |
421 | Node = isl::manage( |
422 | ptr: isl_schedule_node_band_split(node: Node.release(), pos: DimToVectorize)); |
423 | Node = Node.child(pos: 0); |
424 | } |
425 | if (DimToVectorize < ScheduleDimensions - 1) |
426 | Node = isl::manage(ptr: isl_schedule_node_band_split(node: Node.release(), pos: 1)); |
427 | Space = isl::manage(ptr: isl_schedule_node_band_get_space(node: Node.get())); |
428 | auto Sizes = isl::multi_val::zero(space: Space); |
429 | Sizes = Sizes.set_val(pos: 0, el: isl::val(Node.ctx(), VectorWidth)); |
430 | Node = |
431 | isl::manage(ptr: isl_schedule_node_band_tile(node: Node.release(), sizes: Sizes.release())); |
432 | Node = isolateFullPartialTiles(Node, VectorWidth); |
433 | Node = Node.child(pos: 0); |
434 | // Make sure the "trivially vectorizable loop" is not unrolled. Otherwise, |
435 | // we will have troubles to match it in the backend. |
436 | Node = Node.as<isl::schedule_node_band>().set_ast_build_options( |
437 | isl::union_set(Node.ctx(), "{ unroll[x]: 1 = 0 }" )); |
438 | |
439 | // Sink the inner loop into the smallest possible statements to make them |
440 | // represent a single vector instruction if possible. |
441 | Node = isl::manage(ptr: isl_schedule_node_band_sink(node: Node.release())); |
442 | |
443 | // Add SIMD markers to those vector statements. |
444 | InsertSimdMarkers SimdMarkerInserter; |
445 | Node = SimdMarkerInserter.visit(Node); |
446 | |
447 | PrevectOpts++; |
448 | return Node.parent(); |
449 | } |
450 | |
451 | static bool isSimpleInnermostBand(const isl::schedule_node &Node) { |
452 | assert(isl_schedule_node_get_type(Node.get()) == isl_schedule_node_band); |
453 | assert(isl_schedule_node_n_children(Node.get()) == 1); |
454 | |
455 | auto ChildType = isl_schedule_node_get_type(node: Node.child(pos: 0).get()); |
456 | |
457 | if (ChildType == isl_schedule_node_leaf) |
458 | return true; |
459 | |
460 | if (ChildType != isl_schedule_node_sequence) |
461 | return false; |
462 | |
463 | auto Sequence = Node.child(pos: 0); |
464 | |
465 | for (int c = 0, nc = isl_schedule_node_n_children(node: Sequence.get()); c < nc; |
466 | ++c) { |
467 | auto Child = Sequence.child(pos: c); |
468 | if (isl_schedule_node_get_type(node: Child.get()) != isl_schedule_node_filter) |
469 | return false; |
470 | if (isl_schedule_node_get_type(node: Child.child(pos: 0).get()) != |
471 | isl_schedule_node_leaf) |
472 | return false; |
473 | } |
474 | return true; |
475 | } |
476 | |
477 | /// Check if this node is a band node, which has only one child. |
478 | /// |
479 | /// @param Node The node to check. |
480 | static bool isOneTimeParentBandNode(isl::schedule_node Node) { |
481 | if (isl_schedule_node_get_type(node: Node.get()) != isl_schedule_node_band) |
482 | return false; |
483 | |
484 | if (isl_schedule_node_n_children(node: Node.get()) != 1) |
485 | return false; |
486 | |
487 | return true; |
488 | } |
489 | |
490 | bool ScheduleTreeOptimizer::isTileableBandNode(isl::schedule_node Node) { |
491 | if (!isOneTimeParentBandNode(Node)) |
492 | return false; |
493 | |
494 | if (!isl_schedule_node_band_get_permutable(node: Node.get())) |
495 | return false; |
496 | |
497 | auto Space = isl::manage(ptr: isl_schedule_node_band_get_space(node: Node.get())); |
498 | |
499 | if (unsignedFromIslSize(Size: Space.dim(type: isl::dim::set)) <= 1u) |
500 | return false; |
501 | |
502 | return isSimpleInnermostBand(Node); |
503 | } |
504 | |
505 | bool ScheduleTreeOptimizer::isPMOptimizableBandNode(isl::schedule_node Node) { |
506 | if (!isOneTimeParentBandNode(Node)) |
507 | return false; |
508 | |
509 | return Node.child(pos: 0).isa<isl::schedule_node_leaf>(); |
510 | } |
511 | |
512 | __isl_give isl::schedule_node |
513 | ScheduleTreeOptimizer::applyTileBandOpt(isl::schedule_node Node) { |
514 | if (FirstLevelTiling) { |
515 | Node = tileNode(Node, Identifier: "1st level tiling" , TileSizes: FirstLevelTileSizes, |
516 | DefaultTileSize: FirstLevelDefaultTileSize); |
517 | FirstLevelTileOpts++; |
518 | } |
519 | |
520 | if (SecondLevelTiling) { |
521 | Node = tileNode(Node, Identifier: "2nd level tiling" , TileSizes: SecondLevelTileSizes, |
522 | DefaultTileSize: SecondLevelDefaultTileSize); |
523 | SecondLevelTileOpts++; |
524 | } |
525 | |
526 | if (RegisterTiling) { |
527 | Node = |
528 | applyRegisterTiling(Node, TileSizes: RegisterTileSizes, DefaultTileSize: RegisterDefaultTileSize); |
529 | RegisterTileOpts++; |
530 | } |
531 | |
532 | return Node; |
533 | } |
534 | |
535 | isl::schedule_node |
536 | ScheduleTreeOptimizer::applyPrevectBandOpt(isl::schedule_node Node) { |
537 | auto Space = isl::manage(ptr: isl_schedule_node_band_get_space(node: Node.get())); |
538 | int Dims = unsignedFromIslSize(Size: Space.dim(type: isl::dim::set)); |
539 | |
540 | for (int i = Dims - 1; i >= 0; i--) |
541 | if (Node.as<isl::schedule_node_band>().member_get_coincident(pos: i)) { |
542 | Node = prevectSchedBand(Node, DimToVectorize: i, VectorWidth: PrevectorWidth); |
543 | break; |
544 | } |
545 | |
546 | return Node; |
547 | } |
548 | |
549 | __isl_give isl_schedule_node * |
550 | ScheduleTreeOptimizer::optimizeBand(__isl_take isl_schedule_node *NodeArg, |
551 | void *User) { |
552 | const OptimizerAdditionalInfoTy *OAI = |
553 | static_cast<const OptimizerAdditionalInfoTy *>(User); |
554 | assert(OAI && "Expecting optimization options" ); |
555 | |
556 | isl::schedule_node Node = isl::manage(ptr: NodeArg); |
557 | |
558 | if (OAI->PatternOpts && isPMOptimizableBandNode(Node)) { |
559 | isl::schedule_node PatternOptimizedSchedule = |
560 | tryOptimizeMatMulPattern(Node, TTI: OAI->TTI, D: OAI->D); |
561 | if (!PatternOptimizedSchedule.is_null()) { |
562 | MatMulOpts++; |
563 | OAI->DepsChanged = true; |
564 | return PatternOptimizedSchedule.release(); |
565 | } |
566 | } |
567 | |
568 | if (!isTileableBandNode(Node)) |
569 | return Node.release(); |
570 | |
571 | if (OAI->Postopts) |
572 | Node = applyTileBandOpt(Node); |
573 | |
574 | if (OAI->Prevect) { |
575 | // FIXME: Prevectorization requirements are different from those checked by |
576 | // isTileableBandNode. |
577 | Node = applyPrevectBandOpt(Node); |
578 | } |
579 | |
580 | return Node.release(); |
581 | } |
582 | |
583 | isl::schedule |
584 | ScheduleTreeOptimizer::optimizeSchedule(isl::schedule Schedule, |
585 | const OptimizerAdditionalInfoTy *OAI) { |
586 | auto Root = Schedule.get_root(); |
587 | Root = optimizeScheduleNode(Node: Root, OAI); |
588 | return Root.get_schedule(); |
589 | } |
590 | |
591 | isl::schedule_node ScheduleTreeOptimizer::optimizeScheduleNode( |
592 | isl::schedule_node Node, const OptimizerAdditionalInfoTy *OAI) { |
593 | Node = isl::manage(ptr: isl_schedule_node_map_descendant_bottom_up( |
594 | node: Node.release(), fn: optimizeBand, |
595 | user: const_cast<void *>(static_cast<const void *>(OAI)))); |
596 | return Node; |
597 | } |
598 | |
599 | bool ScheduleTreeOptimizer::isProfitableSchedule(Scop &S, |
600 | isl::schedule NewSchedule) { |
601 | // To understand if the schedule has been optimized we check if the schedule |
602 | // has changed at all. |
603 | // TODO: We can improve this by tracking if any necessarily beneficial |
604 | // transformations have been performed. This can e.g. be tiling, loop |
605 | // interchange, or ...) We can track this either at the place where the |
606 | // transformation has been performed or, in case of automatic ILP based |
607 | // optimizations, by comparing (yet to be defined) performance metrics |
608 | // before/after the scheduling optimizer |
609 | // (e.g., #stride-one accesses) |
610 | // FIXME: A schedule tree whose union_map-conversion is identical to the |
611 | // original schedule map may still allow for parallelization, i.e. can still |
612 | // be profitable. |
613 | auto NewScheduleMap = NewSchedule.get_map(); |
614 | auto OldSchedule = S.getSchedule(); |
615 | assert(!OldSchedule.is_null() && |
616 | "Only IslScheduleOptimizer can insert extension nodes " |
617 | "that make Scop::getSchedule() return nullptr." ); |
618 | bool changed = !OldSchedule.is_equal(umap2: NewScheduleMap); |
619 | return changed; |
620 | } |
621 | |
622 | class IslScheduleOptimizerWrapperPass final : public ScopPass { |
623 | public: |
624 | static char ID; |
625 | |
626 | explicit IslScheduleOptimizerWrapperPass() : ScopPass(ID) {} |
627 | |
628 | /// Optimize the schedule of the SCoP @p S. |
629 | bool runOnScop(Scop &S) override; |
630 | |
631 | /// Print the new schedule for the SCoP @p S. |
632 | void printScop(raw_ostream &OS, Scop &S) const override; |
633 | |
634 | /// Register all analyses and transformation required. |
635 | void getAnalysisUsage(AnalysisUsage &AU) const override; |
636 | |
637 | /// Release the internal memory. |
638 | void releaseMemory() override { |
639 | LastSchedule = {}; |
640 | IslCtx.reset(); |
641 | } |
642 | |
643 | private: |
644 | std::shared_ptr<isl_ctx> IslCtx; |
645 | isl::schedule LastSchedule; |
646 | }; |
647 | |
648 | char IslScheduleOptimizerWrapperPass::ID = 0; |
649 | |
650 | #ifndef NDEBUG |
651 | static void printSchedule(llvm::raw_ostream &OS, const isl::schedule &Schedule, |
652 | StringRef Desc) { |
653 | isl::ctx Ctx = Schedule.ctx(); |
654 | isl_printer *P = isl_printer_to_str(ctx: Ctx.get()); |
655 | P = isl_printer_set_yaml_style(p: P, ISL_YAML_STYLE_BLOCK); |
656 | P = isl_printer_print_schedule(p: P, schedule: Schedule.get()); |
657 | char *Str = isl_printer_get_str(printer: P); |
658 | OS << Desc << ": \n" << Str << "\n" ; |
659 | free(ptr: Str); |
660 | isl_printer_free(printer: P); |
661 | } |
662 | #endif |
663 | |
664 | /// Collect statistics for the schedule tree. |
665 | /// |
666 | /// @param Schedule The schedule tree to analyze. If not a schedule tree it is |
667 | /// ignored. |
668 | /// @param Version The version of the schedule tree that is analyzed. |
669 | /// 0 for the original schedule tree before any transformation. |
670 | /// 1 for the schedule tree after isl's rescheduling. |
671 | /// 2 for the schedule tree after optimizations are applied |
672 | /// (tiling, pattern matching) |
673 | static void walkScheduleTreeForStatistics(isl::schedule Schedule, int Version) { |
674 | auto Root = Schedule.get_root(); |
675 | if (Root.is_null()) |
676 | return; |
677 | |
678 | isl_schedule_node_foreach_descendant_top_down( |
679 | node: Root.get(), |
680 | fn: [](__isl_keep isl_schedule_node *nodeptr, void *user) -> isl_bool { |
681 | isl::schedule_node Node = isl::manage_copy(ptr: nodeptr); |
682 | int Version = *static_cast<int *>(user); |
683 | |
684 | switch (isl_schedule_node_get_type(node: Node.get())) { |
685 | case isl_schedule_node_band: { |
686 | NumBands[Version]++; |
687 | if (isl_schedule_node_band_get_permutable(node: Node.get()) == |
688 | isl_bool_true) |
689 | NumPermutable[Version]++; |
690 | |
691 | int CountMembers = isl_schedule_node_band_n_member(node: Node.get()); |
692 | NumBandMembers[Version] += CountMembers; |
693 | for (int i = 0; i < CountMembers; i += 1) { |
694 | if (Node.as<isl::schedule_node_band>().member_get_coincident(pos: i)) |
695 | NumCoincident[Version]++; |
696 | } |
697 | break; |
698 | } |
699 | |
700 | case isl_schedule_node_filter: |
701 | NumFilters[Version]++; |
702 | break; |
703 | |
704 | case isl_schedule_node_extension: |
705 | NumExtension[Version]++; |
706 | break; |
707 | |
708 | default: |
709 | break; |
710 | } |
711 | |
712 | return isl_bool_true; |
713 | }, |
714 | user: &Version); |
715 | } |
716 | |
717 | static void runIslScheduleOptimizer( |
718 | Scop &S, |
719 | function_ref<const Dependences &(Dependences::AnalysisLevel)> GetDeps, |
720 | TargetTransformInfo *TTI, OptimizationRemarkEmitter *ORE, |
721 | isl::schedule &LastSchedule, bool &DepsChanged) { |
722 | // Skip empty SCoPs but still allow code generation as it will delete the |
723 | // loops present but not needed. |
724 | if (S.getSize() == 0) { |
725 | S.markAsOptimized(); |
726 | return; |
727 | } |
728 | |
729 | ScopsProcessed++; |
730 | |
731 | // Schedule without optimizations. |
732 | isl::schedule Schedule = S.getScheduleTree(); |
733 | walkScheduleTreeForStatistics(Schedule: S.getScheduleTree(), Version: 0); |
734 | POLLY_DEBUG(printSchedule(dbgs(), Schedule, "Original schedule tree" )); |
735 | |
736 | bool HasUserTransformation = false; |
737 | if (PragmaBasedOpts) { |
738 | isl::schedule ManuallyTransformed = applyManualTransformations( |
739 | S: &S, Sched: Schedule, D: GetDeps(Dependences::AL_Statement), ORE); |
740 | if (ManuallyTransformed.is_null()) { |
741 | POLLY_DEBUG(dbgs() << "Error during manual optimization\n" ); |
742 | return; |
743 | } |
744 | |
745 | if (ManuallyTransformed.get() != Schedule.get()) { |
746 | // User transformations have precedence over other transformations. |
747 | HasUserTransformation = true; |
748 | Schedule = std::move(ManuallyTransformed); |
749 | POLLY_DEBUG( |
750 | printSchedule(dbgs(), Schedule, "After manual transformations" )); |
751 | } |
752 | } |
753 | |
754 | // Only continue if either manual transformations have been applied or we are |
755 | // allowed to apply heuristics. |
756 | // TODO: Detect disabled heuristics and no user-directed transformation |
757 | // metadata earlier in ScopDetection. |
758 | if (!HasUserTransformation && S.hasDisableHeuristicsHint()) { |
759 | POLLY_DEBUG(dbgs() << "Heuristic optimizations disabled by metadata\n" ); |
760 | return; |
761 | } |
762 | |
763 | // Get dependency analysis. |
764 | const Dependences &D = GetDeps(Dependences::AL_Statement); |
765 | if (D.getSharedIslCtx() != S.getSharedIslCtx()) { |
766 | POLLY_DEBUG(dbgs() << "DependenceInfo for another SCoP/isl_ctx\n" ); |
767 | return; |
768 | } |
769 | if (!D.hasValidDependences()) { |
770 | POLLY_DEBUG(dbgs() << "Dependency information not available\n" ); |
771 | return; |
772 | } |
773 | |
774 | // Apply ISL's algorithm only if not overriden by the user. Note that |
775 | // post-rescheduling optimizations (tiling, pattern-based, prevectorization) |
776 | // rely on the coincidence/permutable annotations on schedule tree bands that |
777 | // are added by the rescheduling analyzer. Therefore, disabling the |
778 | // rescheduler implicitly also disables these optimizations. |
779 | if (!EnableReschedule) { |
780 | POLLY_DEBUG(dbgs() << "Skipping rescheduling due to command line option\n" ); |
781 | } else if (HasUserTransformation) { |
782 | POLLY_DEBUG( |
783 | dbgs() << "Skipping rescheduling due to manual transformation\n" ); |
784 | } else { |
785 | // Build input data. |
786 | int ValidityKinds = |
787 | Dependences::TYPE_RAW | Dependences::TYPE_WAR | Dependences::TYPE_WAW; |
788 | int ProximityKinds; |
789 | |
790 | if (OptimizeDeps == "all" ) |
791 | ProximityKinds = |
792 | Dependences::TYPE_RAW | Dependences::TYPE_WAR | Dependences::TYPE_WAW; |
793 | else if (OptimizeDeps == "raw" ) |
794 | ProximityKinds = Dependences::TYPE_RAW; |
795 | else { |
796 | errs() << "Do not know how to optimize for '" << OptimizeDeps << "'" |
797 | << " Falling back to optimizing all dependences.\n" ; |
798 | ProximityKinds = |
799 | Dependences::TYPE_RAW | Dependences::TYPE_WAR | Dependences::TYPE_WAW; |
800 | } |
801 | |
802 | isl::union_set Domain = S.getDomains(); |
803 | |
804 | if (Domain.is_null()) |
805 | return; |
806 | |
807 | isl::union_map Validity = D.getDependences(Kinds: ValidityKinds); |
808 | isl::union_map Proximity = D.getDependences(Kinds: ProximityKinds); |
809 | |
810 | // Simplify the dependences by removing the constraints introduced by the |
811 | // domains. This can speed up the scheduling time significantly, as large |
812 | // constant coefficients will be removed from the dependences. The |
813 | // introduction of some additional dependences reduces the possible |
814 | // transformations, but in most cases, such transformation do not seem to be |
815 | // interesting anyway. In some cases this option may stop the scheduler to |
816 | // find any schedule. |
817 | if (SimplifyDeps == "yes" ) { |
818 | Validity = Validity.gist_domain(uset: Domain); |
819 | Validity = Validity.gist_range(uset: Domain); |
820 | Proximity = Proximity.gist_domain(uset: Domain); |
821 | Proximity = Proximity.gist_range(uset: Domain); |
822 | } else if (SimplifyDeps != "no" ) { |
823 | errs() |
824 | << "warning: Option -polly-opt-simplify-deps should either be 'yes' " |
825 | "or 'no'. Falling back to default: 'yes'\n" ; |
826 | } |
827 | |
828 | POLLY_DEBUG(dbgs() << "\n\nCompute schedule from: " ); |
829 | POLLY_DEBUG(dbgs() << "Domain := " << Domain << ";\n" ); |
830 | POLLY_DEBUG(dbgs() << "Proximity := " << Proximity << ";\n" ); |
831 | POLLY_DEBUG(dbgs() << "Validity := " << Validity << ";\n" ); |
832 | |
833 | int IslMaximizeBands; |
834 | if (MaximizeBandDepth == "yes" ) { |
835 | IslMaximizeBands = 1; |
836 | } else if (MaximizeBandDepth == "no" ) { |
837 | IslMaximizeBands = 0; |
838 | } else { |
839 | errs() |
840 | << "warning: Option -polly-opt-maximize-bands should either be 'yes'" |
841 | " or 'no'. Falling back to default: 'yes'\n" ; |
842 | IslMaximizeBands = 1; |
843 | } |
844 | |
845 | int IslOuterCoincidence; |
846 | if (OuterCoincidence == "yes" ) { |
847 | IslOuterCoincidence = 1; |
848 | } else if (OuterCoincidence == "no" ) { |
849 | IslOuterCoincidence = 0; |
850 | } else { |
851 | errs() << "warning: Option -polly-opt-outer-coincidence should either be " |
852 | "'yes' or 'no'. Falling back to default: 'no'\n" ; |
853 | IslOuterCoincidence = 0; |
854 | } |
855 | |
856 | isl_ctx *Ctx = S.getIslCtx().get(); |
857 | |
858 | isl_options_set_schedule_outer_coincidence(ctx: Ctx, val: IslOuterCoincidence); |
859 | isl_options_set_schedule_maximize_band_depth(ctx: Ctx, val: IslMaximizeBands); |
860 | isl_options_set_schedule_max_constant_term(ctx: Ctx, val: MaxConstantTerm); |
861 | isl_options_set_schedule_max_coefficient(ctx: Ctx, val: MaxCoefficient); |
862 | isl_options_set_tile_scale_tile_loops(ctx: Ctx, val: 0); |
863 | |
864 | auto OnErrorStatus = isl_options_get_on_error(ctx: Ctx); |
865 | isl_options_set_on_error(ctx: Ctx, ISL_ON_ERROR_CONTINUE); |
866 | |
867 | auto SC = isl::schedule_constraints::on_domain(domain: Domain); |
868 | SC = SC.set_proximity(Proximity); |
869 | SC = SC.set_validity(Validity); |
870 | SC = SC.set_coincidence(Validity); |
871 | |
872 | { |
873 | IslMaxOperationsGuard MaxOpGuard(Ctx, ScheduleComputeOut); |
874 | Schedule = SC.compute_schedule(); |
875 | |
876 | if (MaxOpGuard.hasQuotaExceeded()) |
877 | POLLY_DEBUG( |
878 | dbgs() << "Schedule optimizer calculation exceeds ISL quota\n" ); |
879 | } |
880 | |
881 | isl_options_set_on_error(ctx: Ctx, val: OnErrorStatus); |
882 | |
883 | ScopsRescheduled++; |
884 | POLLY_DEBUG(printSchedule(dbgs(), Schedule, "After rescheduling" )); |
885 | } |
886 | |
887 | walkScheduleTreeForStatistics(Schedule, Version: 1); |
888 | |
889 | // In cases the scheduler is not able to optimize the code, we just do not |
890 | // touch the schedule. |
891 | if (Schedule.is_null()) |
892 | return; |
893 | |
894 | if (GreedyFusion) { |
895 | isl::union_map Validity = D.getDependences( |
896 | Kinds: Dependences::TYPE_RAW | Dependences::TYPE_WAR | Dependences::TYPE_WAW); |
897 | Schedule = applyGreedyFusion(Sched: Schedule, Deps: Validity); |
898 | assert(!Schedule.is_null()); |
899 | } |
900 | |
901 | // Apply post-rescheduling optimizations (if enabled) and/or prevectorization. |
902 | const OptimizerAdditionalInfoTy OAI = { |
903 | .TTI: TTI, |
904 | .D: const_cast<Dependences *>(&D), |
905 | /*PatternOpts=*/!HasUserTransformation && PMBasedOpts, |
906 | /*Postopts=*/!HasUserTransformation && EnablePostopts, |
907 | /*Prevect=*/PollyVectorizerChoice != VECTORIZER_NONE, |
908 | .DepsChanged: DepsChanged}; |
909 | if (OAI.PatternOpts || OAI.Postopts || OAI.Prevect) { |
910 | Schedule = ScheduleTreeOptimizer::optimizeSchedule(Schedule, OAI: &OAI); |
911 | Schedule = hoistExtensionNodes(Sched: Schedule); |
912 | POLLY_DEBUG(printSchedule(dbgs(), Schedule, "After post-optimizations" )); |
913 | walkScheduleTreeForStatistics(Schedule, Version: 2); |
914 | } |
915 | |
916 | // Skip profitability check if user transformation(s) have been applied. |
917 | if (!HasUserTransformation && |
918 | !ScheduleTreeOptimizer::isProfitableSchedule(S, NewSchedule: Schedule)) |
919 | return; |
920 | |
921 | auto ScopStats = S.getStatistics(); |
922 | ScopsOptimized++; |
923 | NumAffineLoopsOptimized += ScopStats.NumAffineLoops; |
924 | NumBoxedLoopsOptimized += ScopStats.NumBoxedLoops; |
925 | LastSchedule = Schedule; |
926 | |
927 | S.setScheduleTree(Schedule); |
928 | S.markAsOptimized(); |
929 | |
930 | if (OptimizedScops) |
931 | errs() << S; |
932 | } |
933 | |
934 | bool IslScheduleOptimizerWrapperPass::runOnScop(Scop &S) { |
935 | releaseMemory(); |
936 | |
937 | Function &F = S.getFunction(); |
938 | IslCtx = S.getSharedIslCtx(); |
939 | |
940 | auto getDependences = |
941 | [this](Dependences::AnalysisLevel) -> const Dependences & { |
942 | return getAnalysis<DependenceInfo>().getDependences( |
943 | Level: Dependences::AL_Statement); |
944 | }; |
945 | OptimizationRemarkEmitter &ORE = |
946 | getAnalysis<OptimizationRemarkEmitterWrapperPass>().getORE(); |
947 | TargetTransformInfo *TTI = |
948 | &getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F); |
949 | |
950 | bool DepsChanged = false; |
951 | runIslScheduleOptimizer(S, GetDeps: getDependences, TTI, ORE: &ORE, LastSchedule, |
952 | DepsChanged); |
953 | if (DepsChanged) |
954 | getAnalysis<DependenceInfo>().abandonDependences(); |
955 | return false; |
956 | } |
957 | |
958 | static void runScheduleOptimizerPrinter(raw_ostream &OS, |
959 | isl::schedule LastSchedule) { |
960 | isl_printer *p; |
961 | char *ScheduleStr; |
962 | |
963 | OS << "Calculated schedule:\n" ; |
964 | |
965 | if (LastSchedule.is_null()) { |
966 | OS << "n/a\n" ; |
967 | return; |
968 | } |
969 | |
970 | p = isl_printer_to_str(ctx: LastSchedule.ctx().get()); |
971 | p = isl_printer_set_yaml_style(p, ISL_YAML_STYLE_BLOCK); |
972 | p = isl_printer_print_schedule(p, schedule: LastSchedule.get()); |
973 | ScheduleStr = isl_printer_get_str(printer: p); |
974 | isl_printer_free(printer: p); |
975 | |
976 | OS << ScheduleStr << "\n" ; |
977 | |
978 | free(ptr: ScheduleStr); |
979 | } |
980 | |
981 | void IslScheduleOptimizerWrapperPass::printScop(raw_ostream &OS, Scop &) const { |
982 | runScheduleOptimizerPrinter(OS, LastSchedule); |
983 | } |
984 | |
985 | void IslScheduleOptimizerWrapperPass::getAnalysisUsage( |
986 | AnalysisUsage &AU) const { |
987 | ScopPass::getAnalysisUsage(AU); |
988 | AU.addRequired<DependenceInfo>(); |
989 | AU.addRequired<TargetTransformInfoWrapperPass>(); |
990 | AU.addRequired<OptimizationRemarkEmitterWrapperPass>(); |
991 | |
992 | AU.addPreserved<DependenceInfo>(); |
993 | AU.addPreserved<OptimizationRemarkEmitterWrapperPass>(); |
994 | } |
995 | |
996 | } // namespace |
997 | |
998 | Pass *polly::createIslScheduleOptimizerWrapperPass() { |
999 | return new IslScheduleOptimizerWrapperPass(); |
1000 | } |
1001 | |
1002 | INITIALIZE_PASS_BEGIN(IslScheduleOptimizerWrapperPass, "polly-opt-isl" , |
1003 | "Polly - Optimize schedule of SCoP" , false, false); |
1004 | INITIALIZE_PASS_DEPENDENCY(DependenceInfo); |
1005 | INITIALIZE_PASS_DEPENDENCY(ScopInfoRegionPass); |
1006 | INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass); |
1007 | INITIALIZE_PASS_DEPENDENCY(OptimizationRemarkEmitterWrapperPass); |
1008 | INITIALIZE_PASS_END(IslScheduleOptimizerWrapperPass, "polly-opt-isl" , |
1009 | "Polly - Optimize schedule of SCoP" , false, false) |
1010 | |
1011 | static llvm::PreservedAnalyses |
1012 | runIslScheduleOptimizerUsingNPM(Scop &S, ScopAnalysisManager &SAM, |
1013 | ScopStandardAnalysisResults &SAR, SPMUpdater &U, |
1014 | raw_ostream *OS) { |
1015 | DependenceAnalysis::Result &Deps = SAM.getResult<DependenceAnalysis>(IR&: S, ExtraArgs&: SAR); |
1016 | auto GetDeps = [&Deps](Dependences::AnalysisLevel) -> const Dependences & { |
1017 | return Deps.getDependences(Level: Dependences::AL_Statement); |
1018 | }; |
1019 | OptimizationRemarkEmitter ORE(&S.getFunction()); |
1020 | TargetTransformInfo *TTI = &SAR.TTI; |
1021 | isl::schedule LastSchedule; |
1022 | bool DepsChanged = false; |
1023 | runIslScheduleOptimizer(S, GetDeps, TTI, ORE: &ORE, LastSchedule, DepsChanged); |
1024 | if (DepsChanged) |
1025 | Deps.abandonDependences(); |
1026 | |
1027 | if (OS) { |
1028 | *OS << "Printing analysis 'Polly - Optimize schedule of SCoP' for region: '" |
1029 | << S.getName() << "' in function '" << S.getFunction().getName() |
1030 | << "':\n" ; |
1031 | runScheduleOptimizerPrinter(OS&: *OS, LastSchedule); |
1032 | } |
1033 | return PreservedAnalyses::all(); |
1034 | } |
1035 | |
1036 | llvm::PreservedAnalyses |
1037 | IslScheduleOptimizerPass::run(Scop &S, ScopAnalysisManager &SAM, |
1038 | ScopStandardAnalysisResults &SAR, SPMUpdater &U) { |
1039 | return runIslScheduleOptimizerUsingNPM(S, SAM, SAR, U, OS: nullptr); |
1040 | } |
1041 | |
1042 | llvm::PreservedAnalyses |
1043 | IslScheduleOptimizerPrinterPass::run(Scop &S, ScopAnalysisManager &SAM, |
1044 | ScopStandardAnalysisResults &SAR, |
1045 | SPMUpdater &U) { |
1046 | return runIslScheduleOptimizerUsingNPM(S, SAM, SAR, U, OS: &OS); |
1047 | } |
1048 | |
1049 | //===----------------------------------------------------------------------===// |
1050 | |
1051 | namespace { |
1052 | /// Print result from IslScheduleOptimizerWrapperPass. |
1053 | class IslScheduleOptimizerPrinterLegacyPass final : public ScopPass { |
1054 | public: |
1055 | static char ID; |
1056 | |
1057 | IslScheduleOptimizerPrinterLegacyPass() |
1058 | : IslScheduleOptimizerPrinterLegacyPass(outs()) {} |
1059 | explicit IslScheduleOptimizerPrinterLegacyPass(llvm::raw_ostream &OS) |
1060 | : ScopPass(ID), OS(OS) {} |
1061 | |
1062 | bool runOnScop(Scop &S) override { |
1063 | IslScheduleOptimizerWrapperPass &P = |
1064 | getAnalysis<IslScheduleOptimizerWrapperPass>(); |
1065 | |
1066 | OS << "Printing analysis '" << P.getPassName() << "' for region: '" |
1067 | << S.getRegion().getNameStr() << "' in function '" |
1068 | << S.getFunction().getName() << "':\n" ; |
1069 | P.printScop(OS, S); |
1070 | |
1071 | return false; |
1072 | } |
1073 | |
1074 | void getAnalysisUsage(AnalysisUsage &AU) const override { |
1075 | ScopPass::getAnalysisUsage(AU); |
1076 | AU.addRequired<IslScheduleOptimizerWrapperPass>(); |
1077 | AU.setPreservesAll(); |
1078 | } |
1079 | |
1080 | private: |
1081 | llvm::raw_ostream &OS; |
1082 | }; |
1083 | |
1084 | char IslScheduleOptimizerPrinterLegacyPass::ID = 0; |
1085 | } // namespace |
1086 | |
1087 | Pass *polly::createIslScheduleOptimizerPrinterLegacyPass(raw_ostream &OS) { |
1088 | return new IslScheduleOptimizerPrinterLegacyPass(OS); |
1089 | } |
1090 | |
1091 | INITIALIZE_PASS_BEGIN(IslScheduleOptimizerPrinterLegacyPass, |
1092 | "polly-print-opt-isl" , |
1093 | "Polly - Print optimizer schedule of SCoP" , false, false); |
1094 | INITIALIZE_PASS_DEPENDENCY(IslScheduleOptimizerWrapperPass) |
1095 | INITIALIZE_PASS_END(IslScheduleOptimizerPrinterLegacyPass, |
1096 | "polly-print-opt-isl" , |
1097 | "Polly - Print optimizer schedule of SCoP" , false, false) |
1098 | |