1 | //===------ Simplify.cpp ----------------------------------------*- C++ -*-===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | // |
9 | // Simplify a SCoP by removing unnecessary statements and accesses. |
10 | // |
11 | //===----------------------------------------------------------------------===// |
12 | |
13 | #include "polly/Simplify.h" |
14 | #include "polly/ScopInfo.h" |
15 | #include "polly/ScopPass.h" |
16 | #include "polly/Support/GICHelper.h" |
17 | #include "polly/Support/ISLOStream.h" |
18 | #include "polly/Support/ISLTools.h" |
19 | #include "polly/Support/VirtualInstruction.h" |
20 | #include "llvm/ADT/Statistic.h" |
21 | #include "llvm/InitializePasses.h" |
22 | #include "llvm/Support/Debug.h" |
23 | #include <optional> |
24 | |
25 | #include "polly/Support/PollyDebug.h" |
26 | #define DEBUG_TYPE "polly-simplify" |
27 | |
28 | using namespace llvm; |
29 | using namespace polly; |
30 | |
31 | namespace { |
32 | |
33 | #define TWO_STATISTICS(VARNAME, DESC) \ |
34 | static llvm::Statistic VARNAME[2] = { \ |
35 | {DEBUG_TYPE, #VARNAME "0", DESC " (first)"}, \ |
36 | {DEBUG_TYPE, #VARNAME "1", DESC " (second)"}} |
37 | |
38 | /// Number of max disjuncts we allow in removeOverwrites(). This is to avoid |
39 | /// that the analysis of accesses in a statement is becoming too complex. Chosen |
40 | /// to be relatively small because all the common cases should access only few |
41 | /// array elements per statement. |
42 | static unsigned const SimplifyMaxDisjuncts = 4; |
43 | |
44 | TWO_STATISTICS(ScopsProcessed, "Number of SCoPs processed" ); |
45 | TWO_STATISTICS(ScopsModified, "Number of SCoPs simplified" ); |
46 | |
47 | TWO_STATISTICS(TotalEmptyDomainsRemoved, |
48 | "Number of statement with empty domains removed in any SCoP" ); |
49 | TWO_STATISTICS(TotalOverwritesRemoved, "Number of removed overwritten writes" ); |
50 | TWO_STATISTICS(TotalWritesCoalesced, "Number of writes coalesced with another" ); |
51 | TWO_STATISTICS(TotalRedundantWritesRemoved, |
52 | "Number of writes of same value removed in any SCoP" ); |
53 | TWO_STATISTICS(TotalEmptyPartialAccessesRemoved, |
54 | "Number of empty partial accesses removed" ); |
55 | TWO_STATISTICS(TotalDeadAccessesRemoved, "Number of dead accesses removed" ); |
56 | TWO_STATISTICS(TotalDeadInstructionsRemoved, |
57 | "Number of unused instructions removed" ); |
58 | TWO_STATISTICS(TotalStmtsRemoved, "Number of statements removed in any SCoP" ); |
59 | |
60 | TWO_STATISTICS(NumValueWrites, "Number of scalar value writes after Simplify" ); |
61 | TWO_STATISTICS( |
62 | NumValueWritesInLoops, |
63 | "Number of scalar value writes nested in affine loops after Simplify" ); |
64 | TWO_STATISTICS(NumPHIWrites, |
65 | "Number of scalar phi writes after the first simplification" ); |
66 | TWO_STATISTICS( |
67 | NumPHIWritesInLoops, |
68 | "Number of scalar phi writes nested in affine loops after Simplify" ); |
69 | TWO_STATISTICS(NumSingletonWrites, "Number of singleton writes after Simplify" ); |
70 | TWO_STATISTICS( |
71 | NumSingletonWritesInLoops, |
72 | "Number of singleton writes nested in affine loops after Simplify" ); |
73 | |
74 | static bool isImplicitRead(MemoryAccess *MA) { |
75 | return MA->isRead() && MA->isOriginalScalarKind(); |
76 | } |
77 | |
78 | static bool isExplicitAccess(MemoryAccess *MA) { |
79 | return MA->isOriginalArrayKind(); |
80 | } |
81 | |
82 | static bool isImplicitWrite(MemoryAccess *MA) { |
83 | return MA->isWrite() && MA->isOriginalScalarKind(); |
84 | } |
85 | |
86 | /// Like isl::union_map::unite, but may also return an underapproximated |
87 | /// result if getting too complex. |
88 | /// |
89 | /// This is implemented by adding disjuncts to the results until the limit is |
90 | /// reached. |
91 | static isl::union_map underapproximatedAddMap(isl::union_map UMap, |
92 | isl::map Map) { |
93 | if (UMap.is_null() || Map.is_null()) |
94 | return {}; |
95 | |
96 | isl::map PrevMap = UMap.extract_map(space: Map.get_space()); |
97 | |
98 | // Fast path: If known that we cannot exceed the disjunct limit, just add |
99 | // them. |
100 | if (unsignedFromIslSize(Size: PrevMap.n_basic_map()) + |
101 | unsignedFromIslSize(Size: Map.n_basic_map()) <= |
102 | SimplifyMaxDisjuncts) |
103 | return UMap.unite(umap2: Map); |
104 | |
105 | isl::map Result = isl::map::empty(space: PrevMap.get_space()); |
106 | for (isl::basic_map BMap : PrevMap.get_basic_map_list()) { |
107 | if (unsignedFromIslSize(Size: Result.n_basic_map()) > SimplifyMaxDisjuncts) |
108 | break; |
109 | Result = Result.unite(map2: BMap); |
110 | } |
111 | for (isl::basic_map BMap : Map.get_basic_map_list()) { |
112 | if (unsignedFromIslSize(Size: Result.n_basic_map()) > SimplifyMaxDisjuncts) |
113 | break; |
114 | Result = Result.unite(map2: BMap); |
115 | } |
116 | |
117 | isl::union_map UResult = |
118 | UMap.subtract(umap2: isl::map::universe(space: PrevMap.get_space())); |
119 | UResult.unite(umap2: Result); |
120 | |
121 | return UResult; |
122 | } |
123 | |
124 | class SimplifyImpl final { |
125 | private: |
126 | /// The invocation id (if there are multiple instances in the pass manager's |
127 | /// pipeline) to determine which statistics to update. |
128 | int CallNo; |
129 | |
130 | /// The last/current SCoP that is/has been processed. |
131 | Scop *S = nullptr; |
132 | |
133 | /// Number of statements with empty domains removed from the SCoP. |
134 | int EmptyDomainsRemoved = 0; |
135 | |
136 | /// Number of writes that are overwritten anyway. |
137 | int OverwritesRemoved = 0; |
138 | |
139 | /// Number of combined writes. |
140 | int WritesCoalesced = 0; |
141 | |
142 | /// Number of redundant writes removed from this SCoP. |
143 | int RedundantWritesRemoved = 0; |
144 | |
145 | /// Number of writes with empty access domain removed. |
146 | int EmptyPartialAccessesRemoved = 0; |
147 | |
148 | /// Number of unused accesses removed from this SCoP. |
149 | int DeadAccessesRemoved = 0; |
150 | |
151 | /// Number of unused instructions removed from this SCoP. |
152 | int DeadInstructionsRemoved = 0; |
153 | |
154 | /// Number of unnecessary statements removed from the SCoP. |
155 | int StmtsRemoved = 0; |
156 | |
157 | /// Remove statements that are never executed due to their domains being |
158 | /// empty. |
159 | /// |
160 | /// In contrast to Scop::simplifySCoP, this removes based on the SCoP's |
161 | /// effective domain, i.e. including the SCoP's context as used by some other |
162 | /// simplification methods in this pass. This is necessary because the |
163 | /// analysis on empty domains is unreliable, e.g. remove a scalar value |
164 | /// definition MemoryAccesses, but not its use. |
165 | void removeEmptyDomainStmts(); |
166 | |
167 | /// Remove writes that are overwritten unconditionally later in the same |
168 | /// statement. |
169 | /// |
170 | /// There must be no read of the same value between the write (that is to be |
171 | /// removed) and the overwrite. |
172 | void removeOverwrites(); |
173 | |
174 | /// Combine writes that write the same value if possible. |
175 | /// |
176 | /// This function is able to combine: |
177 | /// - Partial writes with disjoint domain. |
178 | /// - Writes that write to the same array element. |
179 | /// |
180 | /// In all cases, both writes must write the same values. |
181 | void coalesceWrites(); |
182 | |
183 | /// Remove writes that just write the same value already stored in the |
184 | /// element. |
185 | void removeRedundantWrites(); |
186 | |
187 | /// Remove statements without side effects. |
188 | void removeUnnecessaryStmts(); |
189 | |
190 | /// Remove accesses that have an empty domain. |
191 | void removeEmptyPartialAccesses(); |
192 | |
193 | /// Mark all reachable instructions and access, and sweep those that are not |
194 | /// reachable. |
195 | void markAndSweep(LoopInfo *LI); |
196 | |
197 | /// Print simplification statistics to @p OS. |
198 | void printStatistics(llvm::raw_ostream &OS, int Indent = 0) const; |
199 | |
200 | /// Print the current state of all MemoryAccesses to @p OS. |
201 | void printAccesses(llvm::raw_ostream &OS, int Indent = 0) const; |
202 | |
203 | public: |
204 | explicit SimplifyImpl(int CallNo = 0) : CallNo(CallNo) {} |
205 | |
206 | void run(Scop &S, LoopInfo *LI); |
207 | |
208 | void printScop(raw_ostream &OS, Scop &S) const; |
209 | |
210 | /// Return whether at least one simplification has been applied. |
211 | bool isModified() const; |
212 | }; |
213 | |
214 | /// Return whether at least one simplification has been applied. |
215 | bool SimplifyImpl::isModified() const { |
216 | return EmptyDomainsRemoved > 0 || OverwritesRemoved > 0 || |
217 | WritesCoalesced > 0 || RedundantWritesRemoved > 0 || |
218 | EmptyPartialAccessesRemoved > 0 || DeadAccessesRemoved > 0 || |
219 | DeadInstructionsRemoved > 0 || StmtsRemoved > 0; |
220 | } |
221 | |
222 | /// Remove statements that are never executed due to their domains being |
223 | /// empty. |
224 | /// |
225 | /// In contrast to Scop::simplifySCoP, this removes based on the SCoP's |
226 | /// effective domain, i.e. including the SCoP's context as used by some other |
227 | /// simplification methods in this pass. This is necessary because the |
228 | /// analysis on empty domains is unreliable, e.g. remove a scalar value |
229 | /// definition MemoryAccesses, but not its use. |
230 | void SimplifyImpl::removeEmptyDomainStmts() { |
231 | size_t NumStmtsBefore = S->getSize(); |
232 | |
233 | S->removeStmts(ShouldDelete: [](ScopStmt &Stmt) -> bool { |
234 | auto EffectiveDomain = |
235 | Stmt.getDomain().intersect_params(params: Stmt.getParent()->getContext()); |
236 | return EffectiveDomain.is_empty(); |
237 | }); |
238 | |
239 | assert(NumStmtsBefore >= S->getSize()); |
240 | EmptyDomainsRemoved = NumStmtsBefore - S->getSize(); |
241 | POLLY_DEBUG(dbgs() << "Removed " << EmptyDomainsRemoved << " (of " |
242 | << NumStmtsBefore << ") statements with empty domains \n" ); |
243 | TotalEmptyDomainsRemoved[CallNo] += EmptyDomainsRemoved; |
244 | } |
245 | |
246 | /// Remove writes that are overwritten unconditionally later in the same |
247 | /// statement. |
248 | /// |
249 | /// There must be no read of the same value between the write (that is to be |
250 | /// removed) and the overwrite. |
251 | void SimplifyImpl::removeOverwrites() { |
252 | for (auto &Stmt : *S) { |
253 | isl::set Domain = Stmt.getDomain(); |
254 | isl::union_map WillBeOverwritten = isl::union_map::empty(ctx: S->getIslCtx()); |
255 | |
256 | SmallVector<MemoryAccess *, 32> Accesses(getAccessesInOrder(Stmt)); |
257 | |
258 | // Iterate in reverse order, so the overwrite comes before the write that |
259 | // is to be removed. |
260 | for (auto *MA : reverse(C&: Accesses)) { |
261 | |
262 | // In region statements, the explicit accesses can be in blocks that are |
263 | // can be executed in any order. We therefore process only the implicit |
264 | // writes and stop after that. |
265 | if (Stmt.isRegionStmt() && isExplicitAccess(MA)) |
266 | break; |
267 | |
268 | auto AccRel = MA->getAccessRelation(); |
269 | AccRel = AccRel.intersect_domain(set: Domain); |
270 | AccRel = AccRel.intersect_params(params: S->getContext()); |
271 | |
272 | // If a value is read in-between, do not consider it as overwritten. |
273 | if (MA->isRead()) { |
274 | // Invalidate all overwrites for the array it accesses to avoid too |
275 | // complex isl sets. |
276 | isl::map AccRelUniv = isl::map::universe(space: AccRel.get_space()); |
277 | WillBeOverwritten = WillBeOverwritten.subtract(umap2: AccRelUniv); |
278 | continue; |
279 | } |
280 | |
281 | // If all of a write's elements are overwritten, remove it. |
282 | isl::union_map AccRelUnion = AccRel; |
283 | if (AccRelUnion.is_subset(umap2: WillBeOverwritten)) { |
284 | POLLY_DEBUG(dbgs() << "Removing " << MA |
285 | << " which will be overwritten anyway\n" ); |
286 | |
287 | Stmt.removeSingleMemoryAccess(MA); |
288 | OverwritesRemoved++; |
289 | TotalOverwritesRemoved[CallNo]++; |
290 | } |
291 | |
292 | // Unconditional writes overwrite other values. |
293 | if (MA->isMustWrite()) { |
294 | // Avoid too complex isl sets. If necessary, throw away some of the |
295 | // knowledge. |
296 | WillBeOverwritten = underapproximatedAddMap(UMap: WillBeOverwritten, Map: AccRel); |
297 | } |
298 | } |
299 | } |
300 | } |
301 | |
302 | /// Combine writes that write the same value if possible. |
303 | /// |
304 | /// This function is able to combine: |
305 | /// - Partial writes with disjoint domain. |
306 | /// - Writes that write to the same array element. |
307 | /// |
308 | /// In all cases, both writes must write the same values. |
309 | void SimplifyImpl::coalesceWrites() { |
310 | for (auto &Stmt : *S) { |
311 | isl::set Domain = Stmt.getDomain().intersect_params(params: S->getContext()); |
312 | |
313 | // We let isl do the lookup for the same-value condition. For this, we |
314 | // wrap llvm::Value into an isl::set such that isl can do the lookup in |
315 | // its hashtable implementation. llvm::Values are only compared within a |
316 | // ScopStmt, so the map can be local to this scope. TODO: Refactor with |
317 | // ZoneAlgorithm::makeValueSet() |
318 | SmallDenseMap<Value *, isl::set> ValueSets; |
319 | auto makeValueSet = [&ValueSets, this](Value *V) -> isl::set { |
320 | assert(V); |
321 | isl::set &Result = ValueSets[V]; |
322 | if (Result.is_null()) { |
323 | isl::ctx Ctx = S->getIslCtx(); |
324 | std::string Name = getIslCompatibleName( |
325 | Prefix: "Val" , Val: V, Number: ValueSets.size() - 1, Suffix: std::string(), UseInstructionNames); |
326 | isl::id Id = isl::id::alloc(ctx: Ctx, name: Name, user: V); |
327 | Result = isl::set::universe( |
328 | space: isl::space(Ctx, 0, 0).set_tuple_id(type: isl::dim::set, id: Id)); |
329 | } |
330 | return Result; |
331 | }; |
332 | |
333 | // List of all eligible (for coalescing) writes of the future. |
334 | // { [Domain[] -> Element[]] -> [Value[] -> MemoryAccess[]] } |
335 | isl::union_map FutureWrites = isl::union_map::empty(ctx: S->getIslCtx()); |
336 | |
337 | // Iterate over accesses from the last to the first. |
338 | SmallVector<MemoryAccess *, 32> Accesses(getAccessesInOrder(Stmt)); |
339 | for (MemoryAccess *MA : reverse(C&: Accesses)) { |
340 | // In region statements, the explicit accesses can be in blocks that can |
341 | // be executed in any order. We therefore process only the implicit |
342 | // writes and stop after that. |
343 | if (Stmt.isRegionStmt() && isExplicitAccess(MA)) |
344 | break; |
345 | |
346 | // { Domain[] -> Element[] } |
347 | isl::map AccRel = MA->getLatestAccessRelation().intersect_domain(set: Domain); |
348 | |
349 | // { [Domain[] -> Element[]] } |
350 | isl::set AccRelWrapped = AccRel.wrap(); |
351 | |
352 | // { Value[] } |
353 | isl::set ValSet; |
354 | |
355 | if (MA->isMustWrite() && (MA->isOriginalScalarKind() || |
356 | isa<StoreInst>(Val: MA->getAccessInstruction()))) { |
357 | // Normally, tryGetValueStored() should be used to determine which |
358 | // element is written, but it can return nullptr; For PHI accesses, |
359 | // getAccessValue() returns the PHI instead of the PHI's incoming |
360 | // value. In this case, where we only compare values of a single |
361 | // statement, this is fine, because within a statement, a PHI in a |
362 | // successor block has always the same value as the incoming write. We |
363 | // still preferably use the incoming value directly so we also catch |
364 | // direct uses of that. |
365 | Value *StoredVal = MA->tryGetValueStored(); |
366 | if (!StoredVal) |
367 | StoredVal = MA->getAccessValue(); |
368 | ValSet = makeValueSet(StoredVal); |
369 | |
370 | // { Domain[] } |
371 | isl::set AccDomain = AccRel.domain(); |
372 | |
373 | // Parts of the statement's domain that is not written by this access. |
374 | isl::set UndefDomain = Domain.subtract(set2: AccDomain); |
375 | |
376 | // { Element[] } |
377 | isl::set ElementUniverse = |
378 | isl::set::universe(space: AccRel.get_space().range()); |
379 | |
380 | // { Domain[] -> Element[] } |
381 | isl::map UndefAnything = |
382 | isl::map::from_domain_and_range(domain: UndefDomain, range: ElementUniverse); |
383 | |
384 | // We are looking a compatible write access. The other write can |
385 | // access these elements... |
386 | isl::map AllowedAccesses = AccRel.unite(map2: UndefAnything); |
387 | |
388 | // ... and must write the same value. |
389 | // { [Domain[] -> Element[]] -> Value[] } |
390 | isl::map Filter = |
391 | isl::map::from_domain_and_range(domain: AllowedAccesses.wrap(), range: ValSet); |
392 | |
393 | // Lookup future write that fulfills these conditions. |
394 | // { [[Domain[] -> Element[]] -> Value[]] -> MemoryAccess[] } |
395 | isl::union_map Filtered = |
396 | FutureWrites.uncurry().intersect_domain(uset: Filter.wrap()); |
397 | |
398 | // Iterate through the candidates. |
399 | for (isl::map Map : Filtered.get_map_list()) { |
400 | MemoryAccess *OtherMA = (MemoryAccess *)Map.get_space() |
401 | .get_tuple_id(type: isl::dim::out) |
402 | .get_user(); |
403 | |
404 | isl::map OtherAccRel = |
405 | OtherMA->getLatestAccessRelation().intersect_domain(set: Domain); |
406 | |
407 | // The filter only guaranteed that some of OtherMA's accessed |
408 | // elements are allowed. Verify that it only accesses allowed |
409 | // elements. Otherwise, continue with the next candidate. |
410 | if (!OtherAccRel.is_subset(map2: AllowedAccesses).is_true()) |
411 | continue; |
412 | |
413 | // The combined access relation. |
414 | // { Domain[] -> Element[] } |
415 | isl::map NewAccRel = AccRel.unite(map2: OtherAccRel); |
416 | simplify(Map&: NewAccRel); |
417 | |
418 | // Carry out the coalescing. |
419 | Stmt.removeSingleMemoryAccess(MA); |
420 | OtherMA->setNewAccessRelation(NewAccRel); |
421 | |
422 | // We removed MA, OtherMA takes its role. |
423 | MA = OtherMA; |
424 | |
425 | TotalWritesCoalesced[CallNo]++; |
426 | WritesCoalesced++; |
427 | |
428 | // Don't look for more candidates. |
429 | break; |
430 | } |
431 | } |
432 | |
433 | // Two writes cannot be coalesced if there is another access (to some of |
434 | // the written elements) between them. Remove all visited write accesses |
435 | // from the list of eligible writes. Don't just remove the accessed |
436 | // elements, but any MemoryAccess that touches any of the invalidated |
437 | // elements. |
438 | SmallPtrSet<MemoryAccess *, 2> TouchedAccesses; |
439 | for (isl::map Map : |
440 | FutureWrites.intersect_domain(uset: AccRelWrapped).get_map_list()) { |
441 | MemoryAccess *MA = (MemoryAccess *)Map.get_space() |
442 | .range() |
443 | .unwrap() |
444 | .get_tuple_id(type: isl::dim::out) |
445 | .get_user(); |
446 | TouchedAccesses.insert(Ptr: MA); |
447 | } |
448 | isl::union_map NewFutureWrites = |
449 | isl::union_map::empty(ctx: FutureWrites.ctx()); |
450 | for (isl::map FutureWrite : FutureWrites.get_map_list()) { |
451 | MemoryAccess *MA = (MemoryAccess *)FutureWrite.get_space() |
452 | .range() |
453 | .unwrap() |
454 | .get_tuple_id(type: isl::dim::out) |
455 | .get_user(); |
456 | if (!TouchedAccesses.count(Ptr: MA)) |
457 | NewFutureWrites = NewFutureWrites.unite(umap2: FutureWrite); |
458 | } |
459 | FutureWrites = NewFutureWrites; |
460 | |
461 | if (MA->isMustWrite() && !ValSet.is_null()) { |
462 | // { MemoryAccess[] } |
463 | auto AccSet = |
464 | isl::set::universe(space: isl::space(S->getIslCtx(), 0, 0) |
465 | .set_tuple_id(type: isl::dim::set, id: MA->getId())); |
466 | |
467 | // { Val[] -> MemoryAccess[] } |
468 | isl::map ValAccSet = isl::map::from_domain_and_range(domain: ValSet, range: AccSet); |
469 | |
470 | // { [Domain[] -> Element[]] -> [Value[] -> MemoryAccess[]] } |
471 | isl::map AccRelValAcc = |
472 | isl::map::from_domain_and_range(domain: AccRelWrapped, range: ValAccSet.wrap()); |
473 | FutureWrites = FutureWrites.unite(umap2: AccRelValAcc); |
474 | } |
475 | } |
476 | } |
477 | } |
478 | |
479 | /// Remove writes that just write the same value already stored in the |
480 | /// element. |
481 | void SimplifyImpl::removeRedundantWrites() { |
482 | for (auto &Stmt : *S) { |
483 | SmallDenseMap<Value *, isl::set> ValueSets; |
484 | auto makeValueSet = [&ValueSets, this](Value *V) -> isl::set { |
485 | assert(V); |
486 | isl::set &Result = ValueSets[V]; |
487 | if (Result.is_null()) { |
488 | isl_ctx *Ctx = S->getIslCtx().get(); |
489 | std::string Name = getIslCompatibleName( |
490 | Prefix: "Val" , Val: V, Number: ValueSets.size() - 1, Suffix: std::string(), UseInstructionNames); |
491 | isl::id Id = isl::manage(ptr: isl_id_alloc(ctx: Ctx, name: Name.c_str(), user: V)); |
492 | Result = isl::set::universe( |
493 | space: isl::space(Ctx, 0, 0).set_tuple_id(type: isl::dim::set, id: Id)); |
494 | } |
495 | return Result; |
496 | }; |
497 | |
498 | isl::set Domain = Stmt.getDomain(); |
499 | Domain = Domain.intersect_params(params: S->getContext()); |
500 | |
501 | // List of element reads that still have the same value while iterating |
502 | // through the MemoryAccesses. |
503 | // { [Domain[] -> Element[]] -> Val[] } |
504 | isl::union_map Known = isl::union_map::empty(ctx: S->getIslCtx()); |
505 | |
506 | SmallVector<MemoryAccess *, 32> Accesses(getAccessesInOrder(Stmt)); |
507 | for (MemoryAccess *MA : Accesses) { |
508 | // Is the memory access in a defined order relative to the other |
509 | // accesses? In region statements, only the first and the last accesses |
510 | // have defined order. Execution of those in the middle may depend on |
511 | // runtime conditions an therefore cannot be modified. |
512 | bool IsOrdered = |
513 | Stmt.isBlockStmt() || MA->isOriginalScalarKind() || |
514 | (!S->getBoxedLoops().size() && MA->getAccessInstruction() && |
515 | Stmt.getEntryBlock() == MA->getAccessInstruction()->getParent()); |
516 | |
517 | isl::map AccRel = MA->getAccessRelation(); |
518 | AccRel = AccRel.intersect_domain(set: Domain); |
519 | isl::set AccRelWrapped = AccRel.wrap(); |
520 | |
521 | // Determine whether a write is redundant (stores only values that are |
522 | // already present in the written array elements) and remove it if this |
523 | // is the case. |
524 | if (IsOrdered && MA->isMustWrite() && |
525 | (isa<StoreInst>(Val: MA->getAccessInstruction()) || |
526 | MA->isOriginalScalarKind())) { |
527 | Value *StoredVal = MA->tryGetValueStored(); |
528 | if (!StoredVal) |
529 | StoredVal = MA->getAccessValue(); |
530 | |
531 | if (StoredVal) { |
532 | // Lookup in the set of known values. |
533 | isl::map AccRelStoredVal = isl::map::from_domain_and_range( |
534 | domain: AccRelWrapped, range: makeValueSet(StoredVal)); |
535 | if (isl::union_map(AccRelStoredVal).is_subset(umap2: Known)) { |
536 | POLLY_DEBUG(dbgs() << "Cleanup of " << MA << ":\n" ); |
537 | POLLY_DEBUG(dbgs() << " Scalar: " << *StoredVal << "\n" ); |
538 | POLLY_DEBUG(dbgs() << " AccRel: " << AccRel << "\n" ); |
539 | |
540 | Stmt.removeSingleMemoryAccess(MA); |
541 | |
542 | RedundantWritesRemoved++; |
543 | TotalRedundantWritesRemoved[CallNo]++; |
544 | } |
545 | } |
546 | } |
547 | |
548 | // Update the know values set. |
549 | if (MA->isRead()) { |
550 | // Loaded values are the currently known values of the array element |
551 | // it was loaded from. |
552 | Value *LoadedVal = MA->getAccessValue(); |
553 | if (LoadedVal && IsOrdered) { |
554 | isl::map AccRelVal = isl::map::from_domain_and_range( |
555 | domain: AccRelWrapped, range: makeValueSet(LoadedVal)); |
556 | |
557 | Known = Known.unite(umap2: AccRelVal); |
558 | } |
559 | } else if (MA->isWrite()) { |
560 | // Remove (possibly) overwritten values from the known elements set. |
561 | // We remove all elements of the accessed array to avoid too complex |
562 | // isl sets. |
563 | isl::set AccRelUniv = isl::set::universe(space: AccRelWrapped.get_space()); |
564 | Known = Known.subtract_domain(dom: AccRelUniv); |
565 | |
566 | // At this point, we could add the written value of must-writes. |
567 | // However, writing same values is already handled by |
568 | // coalesceWrites(). |
569 | } |
570 | } |
571 | } |
572 | } |
573 | |
574 | /// Remove statements without side effects. |
575 | void SimplifyImpl::removeUnnecessaryStmts() { |
576 | auto NumStmtsBefore = S->getSize(); |
577 | S->simplifySCoP(AfterHoisting: true); |
578 | assert(NumStmtsBefore >= S->getSize()); |
579 | StmtsRemoved = NumStmtsBefore - S->getSize(); |
580 | POLLY_DEBUG(dbgs() << "Removed " << StmtsRemoved << " (of " << NumStmtsBefore |
581 | << ") statements\n" ); |
582 | TotalStmtsRemoved[CallNo] += StmtsRemoved; |
583 | } |
584 | |
585 | /// Remove accesses that have an empty domain. |
586 | void SimplifyImpl::removeEmptyPartialAccesses() { |
587 | for (ScopStmt &Stmt : *S) { |
588 | // Defer the actual removal to not invalidate iterators. |
589 | SmallVector<MemoryAccess *, 8> DeferredRemove; |
590 | |
591 | for (MemoryAccess *MA : Stmt) { |
592 | if (!MA->isWrite()) |
593 | continue; |
594 | |
595 | isl::map AccRel = MA->getAccessRelation(); |
596 | if (!AccRel.is_empty().is_true()) |
597 | continue; |
598 | |
599 | POLLY_DEBUG( |
600 | dbgs() << "Removing " << MA |
601 | << " because it's a partial access that never occurs\n" ); |
602 | DeferredRemove.push_back(Elt: MA); |
603 | } |
604 | |
605 | for (MemoryAccess *MA : DeferredRemove) { |
606 | Stmt.removeSingleMemoryAccess(MA); |
607 | EmptyPartialAccessesRemoved++; |
608 | TotalEmptyPartialAccessesRemoved[CallNo]++; |
609 | } |
610 | } |
611 | } |
612 | |
613 | /// Mark all reachable instructions and access, and sweep those that are not |
614 | /// reachable. |
615 | void SimplifyImpl::markAndSweep(LoopInfo *LI) { |
616 | DenseSet<MemoryAccess *> UsedMA; |
617 | DenseSet<VirtualInstruction> UsedInsts; |
618 | |
619 | // Get all reachable instructions and accesses. |
620 | markReachable(S, LI, UsedInsts, UsedAccs&: UsedMA); |
621 | |
622 | // Remove all non-reachable accesses. |
623 | // We need get all MemoryAccesses first, in order to not invalidate the |
624 | // iterators when removing them. |
625 | SmallVector<MemoryAccess *, 64> AllMAs; |
626 | for (ScopStmt &Stmt : *S) |
627 | AllMAs.append(in_start: Stmt.begin(), in_end: Stmt.end()); |
628 | |
629 | for (MemoryAccess *MA : AllMAs) { |
630 | if (UsedMA.count(V: MA)) |
631 | continue; |
632 | POLLY_DEBUG(dbgs() << "Removing " << MA |
633 | << " because its value is not used\n" ); |
634 | ScopStmt *Stmt = MA->getStatement(); |
635 | Stmt->removeSingleMemoryAccess(MA); |
636 | |
637 | DeadAccessesRemoved++; |
638 | TotalDeadAccessesRemoved[CallNo]++; |
639 | } |
640 | |
641 | // Remove all non-reachable instructions. |
642 | for (ScopStmt &Stmt : *S) { |
643 | // Note that for region statements, we can only remove the non-terminator |
644 | // instructions of the entry block. All other instructions are not in the |
645 | // instructions list, but implicitly always part of the statement. |
646 | |
647 | SmallVector<Instruction *, 32> AllInsts(Stmt.insts_begin(), |
648 | Stmt.insts_end()); |
649 | SmallVector<Instruction *, 32> RemainInsts; |
650 | |
651 | for (Instruction *Inst : AllInsts) { |
652 | auto It = UsedInsts.find(V: {&Stmt, Inst}); |
653 | if (It == UsedInsts.end()) { |
654 | POLLY_DEBUG(dbgs() << "Removing " ; Inst->print(dbgs()); |
655 | dbgs() << " because it is not used\n" ); |
656 | DeadInstructionsRemoved++; |
657 | TotalDeadInstructionsRemoved[CallNo]++; |
658 | continue; |
659 | } |
660 | |
661 | RemainInsts.push_back(Elt: Inst); |
662 | |
663 | // If instructions appear multiple times, keep only the first. |
664 | UsedInsts.erase(I: It); |
665 | } |
666 | |
667 | // Set the new instruction list to be only those we did not remove. |
668 | Stmt.setInstructions(RemainInsts); |
669 | } |
670 | } |
671 | |
672 | /// Print simplification statistics to @p OS. |
673 | void SimplifyImpl::printStatistics(llvm::raw_ostream &OS, int Indent) const { |
674 | OS.indent(NumSpaces: Indent) << "Statistics {\n" ; |
675 | OS.indent(NumSpaces: Indent + 4) << "Empty domains removed: " << EmptyDomainsRemoved |
676 | << '\n'; |
677 | OS.indent(NumSpaces: Indent + 4) << "Overwrites removed: " << OverwritesRemoved << '\n'; |
678 | OS.indent(NumSpaces: Indent + 4) << "Partial writes coalesced: " << WritesCoalesced |
679 | << "\n" ; |
680 | OS.indent(NumSpaces: Indent + 4) << "Redundant writes removed: " |
681 | << RedundantWritesRemoved << "\n" ; |
682 | OS.indent(NumSpaces: Indent + 4) << "Accesses with empty domains removed: " |
683 | << EmptyPartialAccessesRemoved << "\n" ; |
684 | OS.indent(NumSpaces: Indent + 4) << "Dead accesses removed: " << DeadAccessesRemoved |
685 | << '\n'; |
686 | OS.indent(NumSpaces: Indent + 4) << "Dead instructions removed: " |
687 | << DeadInstructionsRemoved << '\n'; |
688 | OS.indent(NumSpaces: Indent + 4) << "Stmts removed: " << StmtsRemoved << "\n" ; |
689 | OS.indent(NumSpaces: Indent) << "}\n" ; |
690 | } |
691 | |
692 | /// Print the current state of all MemoryAccesses to @p OS. |
693 | void SimplifyImpl::printAccesses(llvm::raw_ostream &OS, int Indent) const { |
694 | OS.indent(NumSpaces: Indent) << "After accesses {\n" ; |
695 | for (auto &Stmt : *S) { |
696 | OS.indent(NumSpaces: Indent + 4) << Stmt.getBaseName() << "\n" ; |
697 | for (auto *MA : Stmt) |
698 | MA->print(OS); |
699 | } |
700 | OS.indent(NumSpaces: Indent) << "}\n" ; |
701 | } |
702 | |
703 | void SimplifyImpl::run(Scop &S, LoopInfo *LI) { |
704 | // Must not have run before. |
705 | assert(!this->S); |
706 | assert(!isModified()); |
707 | |
708 | // Prepare processing of this SCoP. |
709 | this->S = &S; |
710 | ScopsProcessed[CallNo]++; |
711 | |
712 | POLLY_DEBUG(dbgs() << "Removing statements that are never executed...\n" ); |
713 | removeEmptyDomainStmts(); |
714 | |
715 | POLLY_DEBUG(dbgs() << "Removing partial writes that never happen...\n" ); |
716 | removeEmptyPartialAccesses(); |
717 | |
718 | POLLY_DEBUG(dbgs() << "Removing overwrites...\n" ); |
719 | removeOverwrites(); |
720 | |
721 | POLLY_DEBUG(dbgs() << "Coalesce partial writes...\n" ); |
722 | coalesceWrites(); |
723 | |
724 | POLLY_DEBUG(dbgs() << "Removing redundant writes...\n" ); |
725 | removeRedundantWrites(); |
726 | |
727 | POLLY_DEBUG(dbgs() << "Cleanup unused accesses...\n" ); |
728 | markAndSweep(LI); |
729 | |
730 | POLLY_DEBUG(dbgs() << "Removing statements without side effects...\n" ); |
731 | removeUnnecessaryStmts(); |
732 | |
733 | if (isModified()) |
734 | ScopsModified[CallNo]++; |
735 | POLLY_DEBUG(dbgs() << "\nFinal Scop:\n" ); |
736 | POLLY_DEBUG(dbgs() << S); |
737 | |
738 | auto ScopStats = S.getStatistics(); |
739 | NumValueWrites[CallNo] += ScopStats.NumValueWrites; |
740 | NumValueWritesInLoops[CallNo] += ScopStats.NumValueWritesInLoops; |
741 | NumPHIWrites[CallNo] += ScopStats.NumPHIWrites; |
742 | NumPHIWritesInLoops[CallNo] += ScopStats.NumPHIWritesInLoops; |
743 | NumSingletonWrites[CallNo] += ScopStats.NumSingletonWrites; |
744 | NumSingletonWritesInLoops[CallNo] += ScopStats.NumSingletonWritesInLoops; |
745 | } |
746 | |
747 | void SimplifyImpl::printScop(raw_ostream &OS, Scop &S) const { |
748 | assert(&S == this->S && |
749 | "Can only print analysis for the last processed SCoP" ); |
750 | printStatistics(OS); |
751 | |
752 | if (!isModified()) { |
753 | OS << "SCoP could not be simplified\n" ; |
754 | return; |
755 | } |
756 | printAccesses(OS); |
757 | } |
758 | |
759 | class SimplifyWrapperPass final : public ScopPass { |
760 | public: |
761 | static char ID; |
762 | int CallNo; |
763 | std::optional<SimplifyImpl> Impl; |
764 | |
765 | explicit SimplifyWrapperPass(int CallNo = 0) : ScopPass(ID), CallNo(CallNo) {} |
766 | |
767 | void getAnalysisUsage(AnalysisUsage &AU) const override { |
768 | AU.addRequiredTransitive<ScopInfoRegionPass>(); |
769 | AU.addRequired<LoopInfoWrapperPass>(); |
770 | AU.setPreservesAll(); |
771 | } |
772 | |
773 | bool runOnScop(Scop &S) override { |
774 | LoopInfo *LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo(); |
775 | |
776 | Impl.emplace(args&: CallNo); |
777 | Impl->run(S, LI); |
778 | |
779 | return false; |
780 | } |
781 | |
782 | void printScop(raw_ostream &OS, Scop &S) const override { |
783 | if (Impl) |
784 | Impl->printScop(OS, S); |
785 | } |
786 | |
787 | void releaseMemory() override { Impl.reset(); } |
788 | }; |
789 | |
790 | char SimplifyWrapperPass::ID; |
791 | |
792 | static llvm::PreservedAnalyses |
793 | runSimplifyUsingNPM(Scop &S, ScopAnalysisManager &SAM, |
794 | ScopStandardAnalysisResults &SAR, SPMUpdater &U, int CallNo, |
795 | raw_ostream *OS) { |
796 | SimplifyImpl Impl(CallNo); |
797 | Impl.run(S, LI: &SAR.LI); |
798 | if (OS) { |
799 | *OS << "Printing analysis 'Polly - Simplify' for region: '" << S.getName() |
800 | << "' in function '" << S.getFunction().getName() << "':\n" ; |
801 | Impl.printScop(OS&: *OS, S); |
802 | } |
803 | |
804 | if (!Impl.isModified()) |
805 | return llvm::PreservedAnalyses::all(); |
806 | |
807 | PreservedAnalyses PA; |
808 | PA.preserveSet<AllAnalysesOn<Module>>(); |
809 | PA.preserveSet<AllAnalysesOn<Function>>(); |
810 | PA.preserveSet<AllAnalysesOn<Loop>>(); |
811 | return PA; |
812 | } |
813 | |
814 | } // anonymous namespace |
815 | |
816 | llvm::PreservedAnalyses SimplifyPass::run(Scop &S, ScopAnalysisManager &SAM, |
817 | ScopStandardAnalysisResults &SAR, |
818 | SPMUpdater &U) { |
819 | return runSimplifyUsingNPM(S, SAM, SAR, U, CallNo, OS: nullptr); |
820 | } |
821 | |
822 | llvm::PreservedAnalyses |
823 | SimplifyPrinterPass::run(Scop &S, ScopAnalysisManager &SAM, |
824 | ScopStandardAnalysisResults &SAR, SPMUpdater &U) { |
825 | return runSimplifyUsingNPM(S, SAM, SAR, U, CallNo, OS: &OS); |
826 | } |
827 | |
828 | SmallVector<MemoryAccess *, 32> polly::getAccessesInOrder(ScopStmt &Stmt) { |
829 | SmallVector<MemoryAccess *, 32> Accesses; |
830 | |
831 | for (MemoryAccess *MemAcc : Stmt) |
832 | if (isImplicitRead(MA: MemAcc)) |
833 | Accesses.push_back(Elt: MemAcc); |
834 | |
835 | for (MemoryAccess *MemAcc : Stmt) |
836 | if (isExplicitAccess(MA: MemAcc)) |
837 | Accesses.push_back(Elt: MemAcc); |
838 | |
839 | for (MemoryAccess *MemAcc : Stmt) |
840 | if (isImplicitWrite(MA: MemAcc)) |
841 | Accesses.push_back(Elt: MemAcc); |
842 | |
843 | return Accesses; |
844 | } |
845 | |
846 | Pass *polly::createSimplifyWrapperPass(int CallNo) { |
847 | return new SimplifyWrapperPass(CallNo); |
848 | } |
849 | |
850 | INITIALIZE_PASS_BEGIN(SimplifyWrapperPass, "polly-simplify" , "Polly - Simplify" , |
851 | false, false) |
852 | INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass) |
853 | INITIALIZE_PASS_END(SimplifyWrapperPass, "polly-simplify" , "Polly - Simplify" , |
854 | false, false) |
855 | |
856 | //===----------------------------------------------------------------------===// |
857 | |
858 | namespace { |
859 | /// Print result from SimplifyWrapperPass. |
860 | class SimplifyPrinterLegacyPass final : public ScopPass { |
861 | public: |
862 | static char ID; |
863 | |
864 | SimplifyPrinterLegacyPass() : SimplifyPrinterLegacyPass(outs()) {} |
865 | explicit SimplifyPrinterLegacyPass(llvm::raw_ostream &OS) |
866 | : ScopPass(ID), OS(OS) {} |
867 | |
868 | bool runOnScop(Scop &S) override { |
869 | SimplifyWrapperPass &P = getAnalysis<SimplifyWrapperPass>(); |
870 | |
871 | OS << "Printing analysis '" << P.getPassName() << "' for region: '" |
872 | << S.getRegion().getNameStr() << "' in function '" |
873 | << S.getFunction().getName() << "':\n" ; |
874 | P.printScop(OS, S); |
875 | |
876 | return false; |
877 | } |
878 | |
879 | void getAnalysisUsage(AnalysisUsage &AU) const override { |
880 | ScopPass::getAnalysisUsage(AU); |
881 | AU.addRequired<SimplifyWrapperPass>(); |
882 | AU.setPreservesAll(); |
883 | } |
884 | |
885 | private: |
886 | llvm::raw_ostream &OS; |
887 | }; |
888 | |
889 | char SimplifyPrinterLegacyPass::ID = 0; |
890 | } // namespace |
891 | |
892 | Pass *polly::createSimplifyPrinterLegacyPass(raw_ostream &OS) { |
893 | return new SimplifyPrinterLegacyPass(OS); |
894 | } |
895 | |
896 | INITIALIZE_PASS_BEGIN(SimplifyPrinterLegacyPass, "polly-print-simplify" , |
897 | "Polly - Print Simplify actions" , false, false) |
898 | INITIALIZE_PASS_DEPENDENCY(SimplifyWrapperPass) |
899 | INITIALIZE_PASS_END(SimplifyPrinterLegacyPass, "polly-print-simplify" , |
900 | "Polly - Print Simplify actions" , false, false) |
901 | |