| 1 | //===------ Simplify.cpp ----------------------------------------*- C++ -*-===// |
| 2 | // |
| 3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
| 4 | // See https://llvm.org/LICENSE.txt for license information. |
| 5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
| 6 | // |
| 7 | //===----------------------------------------------------------------------===// |
| 8 | // |
| 9 | // Simplify a SCoP by removing unnecessary statements and accesses. |
| 10 | // |
| 11 | //===----------------------------------------------------------------------===// |
| 12 | |
| 13 | #include "polly/Simplify.h" |
| 14 | #include "polly/ScopInfo.h" |
| 15 | #include "polly/ScopPass.h" |
| 16 | #include "polly/Support/GICHelper.h" |
| 17 | #include "polly/Support/ISLOStream.h" |
| 18 | #include "polly/Support/ISLTools.h" |
| 19 | #include "polly/Support/VirtualInstruction.h" |
| 20 | #include "llvm/ADT/Statistic.h" |
| 21 | #include "llvm/InitializePasses.h" |
| 22 | #include "llvm/Support/Debug.h" |
| 23 | #include <optional> |
| 24 | |
| 25 | #include "polly/Support/PollyDebug.h" |
| 26 | #define DEBUG_TYPE "polly-simplify" |
| 27 | |
| 28 | using namespace llvm; |
| 29 | using namespace polly; |
| 30 | |
| 31 | namespace { |
| 32 | |
| 33 | #define TWO_STATISTICS(VARNAME, DESC) \ |
| 34 | static llvm::Statistic VARNAME[2] = { \ |
| 35 | {DEBUG_TYPE, #VARNAME "0", DESC " (first)"}, \ |
| 36 | {DEBUG_TYPE, #VARNAME "1", DESC " (second)"}} |
| 37 | |
| 38 | /// Number of max disjuncts we allow in removeOverwrites(). This is to avoid |
| 39 | /// that the analysis of accesses in a statement is becoming too complex. Chosen |
| 40 | /// to be relatively small because all the common cases should access only few |
| 41 | /// array elements per statement. |
| 42 | static unsigned const SimplifyMaxDisjuncts = 4; |
| 43 | |
| 44 | TWO_STATISTICS(ScopsProcessed, "Number of SCoPs processed" ); |
| 45 | TWO_STATISTICS(ScopsModified, "Number of SCoPs simplified" ); |
| 46 | |
| 47 | TWO_STATISTICS(TotalEmptyDomainsRemoved, |
| 48 | "Number of statement with empty domains removed in any SCoP" ); |
| 49 | TWO_STATISTICS(TotalOverwritesRemoved, "Number of removed overwritten writes" ); |
| 50 | TWO_STATISTICS(TotalWritesCoalesced, "Number of writes coalesced with another" ); |
| 51 | TWO_STATISTICS(TotalRedundantWritesRemoved, |
| 52 | "Number of writes of same value removed in any SCoP" ); |
| 53 | TWO_STATISTICS(TotalEmptyPartialAccessesRemoved, |
| 54 | "Number of empty partial accesses removed" ); |
| 55 | TWO_STATISTICS(TotalDeadAccessesRemoved, "Number of dead accesses removed" ); |
| 56 | TWO_STATISTICS(TotalDeadInstructionsRemoved, |
| 57 | "Number of unused instructions removed" ); |
| 58 | TWO_STATISTICS(TotalStmtsRemoved, "Number of statements removed in any SCoP" ); |
| 59 | |
| 60 | TWO_STATISTICS(NumValueWrites, "Number of scalar value writes after Simplify" ); |
| 61 | TWO_STATISTICS( |
| 62 | NumValueWritesInLoops, |
| 63 | "Number of scalar value writes nested in affine loops after Simplify" ); |
| 64 | TWO_STATISTICS(NumPHIWrites, |
| 65 | "Number of scalar phi writes after the first simplification" ); |
| 66 | TWO_STATISTICS( |
| 67 | NumPHIWritesInLoops, |
| 68 | "Number of scalar phi writes nested in affine loops after Simplify" ); |
| 69 | TWO_STATISTICS(NumSingletonWrites, "Number of singleton writes after Simplify" ); |
| 70 | TWO_STATISTICS( |
| 71 | NumSingletonWritesInLoops, |
| 72 | "Number of singleton writes nested in affine loops after Simplify" ); |
| 73 | |
| 74 | static bool isImplicitRead(MemoryAccess *MA) { |
| 75 | return MA->isRead() && MA->isOriginalScalarKind(); |
| 76 | } |
| 77 | |
| 78 | static bool isExplicitAccess(MemoryAccess *MA) { |
| 79 | return MA->isOriginalArrayKind(); |
| 80 | } |
| 81 | |
| 82 | static bool isImplicitWrite(MemoryAccess *MA) { |
| 83 | return MA->isWrite() && MA->isOriginalScalarKind(); |
| 84 | } |
| 85 | |
| 86 | /// Like isl::union_map::unite, but may also return an underapproximated |
| 87 | /// result if getting too complex. |
| 88 | /// |
| 89 | /// This is implemented by adding disjuncts to the results until the limit is |
| 90 | /// reached. |
| 91 | static isl::union_map underapproximatedAddMap(isl::union_map UMap, |
| 92 | isl::map Map) { |
| 93 | if (UMap.is_null() || Map.is_null()) |
| 94 | return {}; |
| 95 | |
| 96 | isl::map PrevMap = UMap.extract_map(space: Map.get_space()); |
| 97 | |
| 98 | // Fast path: If known that we cannot exceed the disjunct limit, just add |
| 99 | // them. |
| 100 | if (unsignedFromIslSize(Size: PrevMap.n_basic_map()) + |
| 101 | unsignedFromIslSize(Size: Map.n_basic_map()) <= |
| 102 | SimplifyMaxDisjuncts) |
| 103 | return UMap.unite(umap2: Map); |
| 104 | |
| 105 | isl::map Result = isl::map::empty(space: PrevMap.get_space()); |
| 106 | for (isl::basic_map BMap : PrevMap.get_basic_map_list()) { |
| 107 | if (unsignedFromIslSize(Size: Result.n_basic_map()) > SimplifyMaxDisjuncts) |
| 108 | break; |
| 109 | Result = Result.unite(map2: BMap); |
| 110 | } |
| 111 | for (isl::basic_map BMap : Map.get_basic_map_list()) { |
| 112 | if (unsignedFromIslSize(Size: Result.n_basic_map()) > SimplifyMaxDisjuncts) |
| 113 | break; |
| 114 | Result = Result.unite(map2: BMap); |
| 115 | } |
| 116 | |
| 117 | isl::union_map UResult = |
| 118 | UMap.subtract(umap2: isl::map::universe(space: PrevMap.get_space())); |
| 119 | UResult.unite(umap2: Result); |
| 120 | |
| 121 | return UResult; |
| 122 | } |
| 123 | |
| 124 | class SimplifyImpl final { |
| 125 | private: |
| 126 | /// The invocation id (if there are multiple instances in the pass manager's |
| 127 | /// pipeline) to determine which statistics to update. |
| 128 | int CallNo; |
| 129 | |
| 130 | /// The last/current SCoP that is/has been processed. |
| 131 | Scop *S = nullptr; |
| 132 | |
| 133 | /// Number of statements with empty domains removed from the SCoP. |
| 134 | int EmptyDomainsRemoved = 0; |
| 135 | |
| 136 | /// Number of writes that are overwritten anyway. |
| 137 | int OverwritesRemoved = 0; |
| 138 | |
| 139 | /// Number of combined writes. |
| 140 | int WritesCoalesced = 0; |
| 141 | |
| 142 | /// Number of redundant writes removed from this SCoP. |
| 143 | int RedundantWritesRemoved = 0; |
| 144 | |
| 145 | /// Number of writes with empty access domain removed. |
| 146 | int EmptyPartialAccessesRemoved = 0; |
| 147 | |
| 148 | /// Number of unused accesses removed from this SCoP. |
| 149 | int DeadAccessesRemoved = 0; |
| 150 | |
| 151 | /// Number of unused instructions removed from this SCoP. |
| 152 | int DeadInstructionsRemoved = 0; |
| 153 | |
| 154 | /// Number of unnecessary statements removed from the SCoP. |
| 155 | int StmtsRemoved = 0; |
| 156 | |
| 157 | /// Remove statements that are never executed due to their domains being |
| 158 | /// empty. |
| 159 | /// |
| 160 | /// In contrast to Scop::simplifySCoP, this removes based on the SCoP's |
| 161 | /// effective domain, i.e. including the SCoP's context as used by some other |
| 162 | /// simplification methods in this pass. This is necessary because the |
| 163 | /// analysis on empty domains is unreliable, e.g. remove a scalar value |
| 164 | /// definition MemoryAccesses, but not its use. |
| 165 | void removeEmptyDomainStmts(); |
| 166 | |
| 167 | /// Remove writes that are overwritten unconditionally later in the same |
| 168 | /// statement. |
| 169 | /// |
| 170 | /// There must be no read of the same value between the write (that is to be |
| 171 | /// removed) and the overwrite. |
| 172 | void removeOverwrites(); |
| 173 | |
| 174 | /// Combine writes that write the same value if possible. |
| 175 | /// |
| 176 | /// This function is able to combine: |
| 177 | /// - Partial writes with disjoint domain. |
| 178 | /// - Writes that write to the same array element. |
| 179 | /// |
| 180 | /// In all cases, both writes must write the same values. |
| 181 | void coalesceWrites(); |
| 182 | |
| 183 | /// Remove writes that just write the same value already stored in the |
| 184 | /// element. |
| 185 | void removeRedundantWrites(); |
| 186 | |
| 187 | /// Remove statements without side effects. |
| 188 | void removeUnnecessaryStmts(); |
| 189 | |
| 190 | /// Remove accesses that have an empty domain. |
| 191 | void removeEmptyPartialAccesses(); |
| 192 | |
| 193 | /// Mark all reachable instructions and access, and sweep those that are not |
| 194 | /// reachable. |
| 195 | void markAndSweep(LoopInfo *LI); |
| 196 | |
| 197 | /// Print simplification statistics to @p OS. |
| 198 | void printStatistics(llvm::raw_ostream &OS, int Indent = 0) const; |
| 199 | |
| 200 | /// Print the current state of all MemoryAccesses to @p OS. |
| 201 | void printAccesses(llvm::raw_ostream &OS, int Indent = 0) const; |
| 202 | |
| 203 | public: |
| 204 | explicit SimplifyImpl(int CallNo = 0) : CallNo(CallNo) {} |
| 205 | |
| 206 | void run(Scop &S, LoopInfo *LI); |
| 207 | |
| 208 | void printScop(raw_ostream &OS, Scop &S) const; |
| 209 | |
| 210 | /// Return whether at least one simplification has been applied. |
| 211 | bool isModified() const; |
| 212 | }; |
| 213 | |
| 214 | /// Return whether at least one simplification has been applied. |
| 215 | bool SimplifyImpl::isModified() const { |
| 216 | return EmptyDomainsRemoved > 0 || OverwritesRemoved > 0 || |
| 217 | WritesCoalesced > 0 || RedundantWritesRemoved > 0 || |
| 218 | EmptyPartialAccessesRemoved > 0 || DeadAccessesRemoved > 0 || |
| 219 | DeadInstructionsRemoved > 0 || StmtsRemoved > 0; |
| 220 | } |
| 221 | |
| 222 | /// Remove statements that are never executed due to their domains being |
| 223 | /// empty. |
| 224 | /// |
| 225 | /// In contrast to Scop::simplifySCoP, this removes based on the SCoP's |
| 226 | /// effective domain, i.e. including the SCoP's context as used by some other |
| 227 | /// simplification methods in this pass. This is necessary because the |
| 228 | /// analysis on empty domains is unreliable, e.g. remove a scalar value |
| 229 | /// definition MemoryAccesses, but not its use. |
| 230 | void SimplifyImpl::removeEmptyDomainStmts() { |
| 231 | size_t NumStmtsBefore = S->getSize(); |
| 232 | |
| 233 | S->removeStmts(ShouldDelete: [](ScopStmt &Stmt) -> bool { |
| 234 | auto EffectiveDomain = |
| 235 | Stmt.getDomain().intersect_params(params: Stmt.getParent()->getContext()); |
| 236 | return EffectiveDomain.is_empty(); |
| 237 | }); |
| 238 | |
| 239 | assert(NumStmtsBefore >= S->getSize()); |
| 240 | EmptyDomainsRemoved = NumStmtsBefore - S->getSize(); |
| 241 | POLLY_DEBUG(dbgs() << "Removed " << EmptyDomainsRemoved << " (of " |
| 242 | << NumStmtsBefore << ") statements with empty domains \n" ); |
| 243 | TotalEmptyDomainsRemoved[CallNo] += EmptyDomainsRemoved; |
| 244 | } |
| 245 | |
| 246 | /// Remove writes that are overwritten unconditionally later in the same |
| 247 | /// statement. |
| 248 | /// |
| 249 | /// There must be no read of the same value between the write (that is to be |
| 250 | /// removed) and the overwrite. |
| 251 | void SimplifyImpl::removeOverwrites() { |
| 252 | for (auto &Stmt : *S) { |
| 253 | isl::set Domain = Stmt.getDomain(); |
| 254 | isl::union_map WillBeOverwritten = isl::union_map::empty(ctx: S->getIslCtx()); |
| 255 | |
| 256 | SmallVector<MemoryAccess *, 32> Accesses(getAccessesInOrder(Stmt)); |
| 257 | |
| 258 | // Iterate in reverse order, so the overwrite comes before the write that |
| 259 | // is to be removed. |
| 260 | for (auto *MA : reverse(C&: Accesses)) { |
| 261 | |
| 262 | // In region statements, the explicit accesses can be in blocks that are |
| 263 | // can be executed in any order. We therefore process only the implicit |
| 264 | // writes and stop after that. |
| 265 | if (Stmt.isRegionStmt() && isExplicitAccess(MA)) |
| 266 | break; |
| 267 | |
| 268 | auto AccRel = MA->getAccessRelation(); |
| 269 | AccRel = AccRel.intersect_domain(set: Domain); |
| 270 | AccRel = AccRel.intersect_params(params: S->getContext()); |
| 271 | |
| 272 | // If a value is read in-between, do not consider it as overwritten. |
| 273 | if (MA->isRead()) { |
| 274 | // Invalidate all overwrites for the array it accesses to avoid too |
| 275 | // complex isl sets. |
| 276 | isl::map AccRelUniv = isl::map::universe(space: AccRel.get_space()); |
| 277 | WillBeOverwritten = WillBeOverwritten.subtract(umap2: AccRelUniv); |
| 278 | continue; |
| 279 | } |
| 280 | |
| 281 | // If all of a write's elements are overwritten, remove it. |
| 282 | isl::union_map AccRelUnion = AccRel; |
| 283 | if (AccRelUnion.is_subset(umap2: WillBeOverwritten)) { |
| 284 | POLLY_DEBUG(dbgs() << "Removing " << MA |
| 285 | << " which will be overwritten anyway\n" ); |
| 286 | |
| 287 | Stmt.removeSingleMemoryAccess(MA); |
| 288 | OverwritesRemoved++; |
| 289 | TotalOverwritesRemoved[CallNo]++; |
| 290 | } |
| 291 | |
| 292 | // Unconditional writes overwrite other values. |
| 293 | if (MA->isMustWrite()) { |
| 294 | // Avoid too complex isl sets. If necessary, throw away some of the |
| 295 | // knowledge. |
| 296 | WillBeOverwritten = underapproximatedAddMap(UMap: WillBeOverwritten, Map: AccRel); |
| 297 | } |
| 298 | } |
| 299 | } |
| 300 | } |
| 301 | |
| 302 | /// Combine writes that write the same value if possible. |
| 303 | /// |
| 304 | /// This function is able to combine: |
| 305 | /// - Partial writes with disjoint domain. |
| 306 | /// - Writes that write to the same array element. |
| 307 | /// |
| 308 | /// In all cases, both writes must write the same values. |
| 309 | void SimplifyImpl::coalesceWrites() { |
| 310 | for (auto &Stmt : *S) { |
| 311 | isl::set Domain = Stmt.getDomain().intersect_params(params: S->getContext()); |
| 312 | |
| 313 | // We let isl do the lookup for the same-value condition. For this, we |
| 314 | // wrap llvm::Value into an isl::set such that isl can do the lookup in |
| 315 | // its hashtable implementation. llvm::Values are only compared within a |
| 316 | // ScopStmt, so the map can be local to this scope. TODO: Refactor with |
| 317 | // ZoneAlgorithm::makeValueSet() |
| 318 | SmallDenseMap<Value *, isl::set> ValueSets; |
| 319 | auto makeValueSet = [&ValueSets, this](Value *V) -> isl::set { |
| 320 | assert(V); |
| 321 | isl::set &Result = ValueSets[V]; |
| 322 | if (Result.is_null()) { |
| 323 | isl::ctx Ctx = S->getIslCtx(); |
| 324 | std::string Name = getIslCompatibleName( |
| 325 | Prefix: "Val" , Val: V, Number: ValueSets.size() - 1, Suffix: std::string(), UseInstructionNames); |
| 326 | isl::id Id = isl::id::alloc(ctx: Ctx, name: Name, user: V); |
| 327 | Result = isl::set::universe( |
| 328 | space: isl::space(Ctx, 0, 0).set_tuple_id(type: isl::dim::set, id: Id)); |
| 329 | } |
| 330 | return Result; |
| 331 | }; |
| 332 | |
| 333 | // List of all eligible (for coalescing) writes of the future. |
| 334 | // { [Domain[] -> Element[]] -> [Value[] -> MemoryAccess[]] } |
| 335 | isl::union_map FutureWrites = isl::union_map::empty(ctx: S->getIslCtx()); |
| 336 | |
| 337 | // Iterate over accesses from the last to the first. |
| 338 | SmallVector<MemoryAccess *, 32> Accesses(getAccessesInOrder(Stmt)); |
| 339 | for (MemoryAccess *MA : reverse(C&: Accesses)) { |
| 340 | // In region statements, the explicit accesses can be in blocks that can |
| 341 | // be executed in any order. We therefore process only the implicit |
| 342 | // writes and stop after that. |
| 343 | if (Stmt.isRegionStmt() && isExplicitAccess(MA)) |
| 344 | break; |
| 345 | |
| 346 | // { Domain[] -> Element[] } |
| 347 | isl::map AccRel = MA->getLatestAccessRelation().intersect_domain(set: Domain); |
| 348 | |
| 349 | // { [Domain[] -> Element[]] } |
| 350 | isl::set AccRelWrapped = AccRel.wrap(); |
| 351 | |
| 352 | // { Value[] } |
| 353 | isl::set ValSet; |
| 354 | |
| 355 | if (MA->isMustWrite() && (MA->isOriginalScalarKind() || |
| 356 | isa<StoreInst>(Val: MA->getAccessInstruction()))) { |
| 357 | // Normally, tryGetValueStored() should be used to determine which |
| 358 | // element is written, but it can return nullptr; For PHI accesses, |
| 359 | // getAccessValue() returns the PHI instead of the PHI's incoming |
| 360 | // value. In this case, where we only compare values of a single |
| 361 | // statement, this is fine, because within a statement, a PHI in a |
| 362 | // successor block has always the same value as the incoming write. We |
| 363 | // still preferably use the incoming value directly so we also catch |
| 364 | // direct uses of that. |
| 365 | Value *StoredVal = MA->tryGetValueStored(); |
| 366 | if (!StoredVal) |
| 367 | StoredVal = MA->getAccessValue(); |
| 368 | ValSet = makeValueSet(StoredVal); |
| 369 | |
| 370 | // { Domain[] } |
| 371 | isl::set AccDomain = AccRel.domain(); |
| 372 | |
| 373 | // Parts of the statement's domain that is not written by this access. |
| 374 | isl::set UndefDomain = Domain.subtract(set2: AccDomain); |
| 375 | |
| 376 | // { Element[] } |
| 377 | isl::set ElementUniverse = |
| 378 | isl::set::universe(space: AccRel.get_space().range()); |
| 379 | |
| 380 | // { Domain[] -> Element[] } |
| 381 | isl::map UndefAnything = |
| 382 | isl::map::from_domain_and_range(domain: UndefDomain, range: ElementUniverse); |
| 383 | |
| 384 | // We are looking a compatible write access. The other write can |
| 385 | // access these elements... |
| 386 | isl::map AllowedAccesses = AccRel.unite(map2: UndefAnything); |
| 387 | |
| 388 | // ... and must write the same value. |
| 389 | // { [Domain[] -> Element[]] -> Value[] } |
| 390 | isl::map Filter = |
| 391 | isl::map::from_domain_and_range(domain: AllowedAccesses.wrap(), range: ValSet); |
| 392 | |
| 393 | // Lookup future write that fulfills these conditions. |
| 394 | // { [[Domain[] -> Element[]] -> Value[]] -> MemoryAccess[] } |
| 395 | isl::union_map Filtered = |
| 396 | FutureWrites.uncurry().intersect_domain(uset: Filter.wrap()); |
| 397 | |
| 398 | // Iterate through the candidates. |
| 399 | for (isl::map Map : Filtered.get_map_list()) { |
| 400 | MemoryAccess *OtherMA = (MemoryAccess *)Map.get_space() |
| 401 | .get_tuple_id(type: isl::dim::out) |
| 402 | .get_user(); |
| 403 | |
| 404 | isl::map OtherAccRel = |
| 405 | OtherMA->getLatestAccessRelation().intersect_domain(set: Domain); |
| 406 | |
| 407 | // The filter only guaranteed that some of OtherMA's accessed |
| 408 | // elements are allowed. Verify that it only accesses allowed |
| 409 | // elements. Otherwise, continue with the next candidate. |
| 410 | if (!OtherAccRel.is_subset(map2: AllowedAccesses).is_true()) |
| 411 | continue; |
| 412 | |
| 413 | // The combined access relation. |
| 414 | // { Domain[] -> Element[] } |
| 415 | isl::map NewAccRel = AccRel.unite(map2: OtherAccRel); |
| 416 | simplify(Map&: NewAccRel); |
| 417 | |
| 418 | // Carry out the coalescing. |
| 419 | Stmt.removeSingleMemoryAccess(MA); |
| 420 | OtherMA->setNewAccessRelation(NewAccRel); |
| 421 | |
| 422 | // We removed MA, OtherMA takes its role. |
| 423 | MA = OtherMA; |
| 424 | |
| 425 | TotalWritesCoalesced[CallNo]++; |
| 426 | WritesCoalesced++; |
| 427 | |
| 428 | // Don't look for more candidates. |
| 429 | break; |
| 430 | } |
| 431 | } |
| 432 | |
| 433 | // Two writes cannot be coalesced if there is another access (to some of |
| 434 | // the written elements) between them. Remove all visited write accesses |
| 435 | // from the list of eligible writes. Don't just remove the accessed |
| 436 | // elements, but any MemoryAccess that touches any of the invalidated |
| 437 | // elements. |
| 438 | SmallPtrSet<MemoryAccess *, 2> TouchedAccesses; |
| 439 | for (isl::map Map : |
| 440 | FutureWrites.intersect_domain(uset: AccRelWrapped).get_map_list()) { |
| 441 | MemoryAccess *MA = (MemoryAccess *)Map.get_space() |
| 442 | .range() |
| 443 | .unwrap() |
| 444 | .get_tuple_id(type: isl::dim::out) |
| 445 | .get_user(); |
| 446 | TouchedAccesses.insert(Ptr: MA); |
| 447 | } |
| 448 | isl::union_map NewFutureWrites = |
| 449 | isl::union_map::empty(ctx: FutureWrites.ctx()); |
| 450 | for (isl::map FutureWrite : FutureWrites.get_map_list()) { |
| 451 | MemoryAccess *MA = (MemoryAccess *)FutureWrite.get_space() |
| 452 | .range() |
| 453 | .unwrap() |
| 454 | .get_tuple_id(type: isl::dim::out) |
| 455 | .get_user(); |
| 456 | if (!TouchedAccesses.count(Ptr: MA)) |
| 457 | NewFutureWrites = NewFutureWrites.unite(umap2: FutureWrite); |
| 458 | } |
| 459 | FutureWrites = NewFutureWrites; |
| 460 | |
| 461 | if (MA->isMustWrite() && !ValSet.is_null()) { |
| 462 | // { MemoryAccess[] } |
| 463 | auto AccSet = |
| 464 | isl::set::universe(space: isl::space(S->getIslCtx(), 0, 0) |
| 465 | .set_tuple_id(type: isl::dim::set, id: MA->getId())); |
| 466 | |
| 467 | // { Val[] -> MemoryAccess[] } |
| 468 | isl::map ValAccSet = isl::map::from_domain_and_range(domain: ValSet, range: AccSet); |
| 469 | |
| 470 | // { [Domain[] -> Element[]] -> [Value[] -> MemoryAccess[]] } |
| 471 | isl::map AccRelValAcc = |
| 472 | isl::map::from_domain_and_range(domain: AccRelWrapped, range: ValAccSet.wrap()); |
| 473 | FutureWrites = FutureWrites.unite(umap2: AccRelValAcc); |
| 474 | } |
| 475 | } |
| 476 | } |
| 477 | } |
| 478 | |
| 479 | /// Remove writes that just write the same value already stored in the |
| 480 | /// element. |
| 481 | void SimplifyImpl::removeRedundantWrites() { |
| 482 | for (auto &Stmt : *S) { |
| 483 | SmallDenseMap<Value *, isl::set> ValueSets; |
| 484 | auto makeValueSet = [&ValueSets, this](Value *V) -> isl::set { |
| 485 | assert(V); |
| 486 | isl::set &Result = ValueSets[V]; |
| 487 | if (Result.is_null()) { |
| 488 | isl_ctx *Ctx = S->getIslCtx().get(); |
| 489 | std::string Name = getIslCompatibleName( |
| 490 | Prefix: "Val" , Val: V, Number: ValueSets.size() - 1, Suffix: std::string(), UseInstructionNames); |
| 491 | isl::id Id = isl::manage(ptr: isl_id_alloc(ctx: Ctx, name: Name.c_str(), user: V)); |
| 492 | Result = isl::set::universe( |
| 493 | space: isl::space(Ctx, 0, 0).set_tuple_id(type: isl::dim::set, id: Id)); |
| 494 | } |
| 495 | return Result; |
| 496 | }; |
| 497 | |
| 498 | isl::set Domain = Stmt.getDomain(); |
| 499 | Domain = Domain.intersect_params(params: S->getContext()); |
| 500 | |
| 501 | // List of element reads that still have the same value while iterating |
| 502 | // through the MemoryAccesses. |
| 503 | // { [Domain[] -> Element[]] -> Val[] } |
| 504 | isl::union_map Known = isl::union_map::empty(ctx: S->getIslCtx()); |
| 505 | |
| 506 | SmallVector<MemoryAccess *, 32> Accesses(getAccessesInOrder(Stmt)); |
| 507 | for (MemoryAccess *MA : Accesses) { |
| 508 | // Is the memory access in a defined order relative to the other |
| 509 | // accesses? In region statements, only the first and the last accesses |
| 510 | // have defined order. Execution of those in the middle may depend on |
| 511 | // runtime conditions an therefore cannot be modified. |
| 512 | bool IsOrdered = |
| 513 | Stmt.isBlockStmt() || MA->isOriginalScalarKind() || |
| 514 | (!S->getBoxedLoops().size() && MA->getAccessInstruction() && |
| 515 | Stmt.getEntryBlock() == MA->getAccessInstruction()->getParent()); |
| 516 | |
| 517 | isl::map AccRel = MA->getAccessRelation(); |
| 518 | AccRel = AccRel.intersect_domain(set: Domain); |
| 519 | isl::set AccRelWrapped = AccRel.wrap(); |
| 520 | |
| 521 | // Determine whether a write is redundant (stores only values that are |
| 522 | // already present in the written array elements) and remove it if this |
| 523 | // is the case. |
| 524 | if (IsOrdered && MA->isMustWrite() && |
| 525 | (isa<StoreInst>(Val: MA->getAccessInstruction()) || |
| 526 | MA->isOriginalScalarKind())) { |
| 527 | Value *StoredVal = MA->tryGetValueStored(); |
| 528 | if (!StoredVal) |
| 529 | StoredVal = MA->getAccessValue(); |
| 530 | |
| 531 | if (StoredVal) { |
| 532 | // Lookup in the set of known values. |
| 533 | isl::map AccRelStoredVal = isl::map::from_domain_and_range( |
| 534 | domain: AccRelWrapped, range: makeValueSet(StoredVal)); |
| 535 | if (isl::union_map(AccRelStoredVal).is_subset(umap2: Known)) { |
| 536 | POLLY_DEBUG(dbgs() << "Cleanup of " << MA << ":\n" ); |
| 537 | POLLY_DEBUG(dbgs() << " Scalar: " << *StoredVal << "\n" ); |
| 538 | POLLY_DEBUG(dbgs() << " AccRel: " << AccRel << "\n" ); |
| 539 | |
| 540 | Stmt.removeSingleMemoryAccess(MA); |
| 541 | |
| 542 | RedundantWritesRemoved++; |
| 543 | TotalRedundantWritesRemoved[CallNo]++; |
| 544 | } |
| 545 | } |
| 546 | } |
| 547 | |
| 548 | // Update the know values set. |
| 549 | if (MA->isRead()) { |
| 550 | // Loaded values are the currently known values of the array element |
| 551 | // it was loaded from. |
| 552 | Value *LoadedVal = MA->getAccessValue(); |
| 553 | if (LoadedVal && IsOrdered) { |
| 554 | isl::map AccRelVal = isl::map::from_domain_and_range( |
| 555 | domain: AccRelWrapped, range: makeValueSet(LoadedVal)); |
| 556 | |
| 557 | Known = Known.unite(umap2: AccRelVal); |
| 558 | } |
| 559 | } else if (MA->isWrite()) { |
| 560 | // Remove (possibly) overwritten values from the known elements set. |
| 561 | // We remove all elements of the accessed array to avoid too complex |
| 562 | // isl sets. |
| 563 | isl::set AccRelUniv = isl::set::universe(space: AccRelWrapped.get_space()); |
| 564 | Known = Known.subtract_domain(dom: AccRelUniv); |
| 565 | |
| 566 | // At this point, we could add the written value of must-writes. |
| 567 | // However, writing same values is already handled by |
| 568 | // coalesceWrites(). |
| 569 | } |
| 570 | } |
| 571 | } |
| 572 | } |
| 573 | |
| 574 | /// Remove statements without side effects. |
| 575 | void SimplifyImpl::removeUnnecessaryStmts() { |
| 576 | auto NumStmtsBefore = S->getSize(); |
| 577 | S->simplifySCoP(AfterHoisting: true); |
| 578 | assert(NumStmtsBefore >= S->getSize()); |
| 579 | StmtsRemoved = NumStmtsBefore - S->getSize(); |
| 580 | POLLY_DEBUG(dbgs() << "Removed " << StmtsRemoved << " (of " << NumStmtsBefore |
| 581 | << ") statements\n" ); |
| 582 | TotalStmtsRemoved[CallNo] += StmtsRemoved; |
| 583 | } |
| 584 | |
| 585 | /// Remove accesses that have an empty domain. |
| 586 | void SimplifyImpl::removeEmptyPartialAccesses() { |
| 587 | for (ScopStmt &Stmt : *S) { |
| 588 | // Defer the actual removal to not invalidate iterators. |
| 589 | SmallVector<MemoryAccess *, 8> DeferredRemove; |
| 590 | |
| 591 | for (MemoryAccess *MA : Stmt) { |
| 592 | if (!MA->isWrite()) |
| 593 | continue; |
| 594 | |
| 595 | isl::map AccRel = MA->getAccessRelation(); |
| 596 | if (!AccRel.is_empty().is_true()) |
| 597 | continue; |
| 598 | |
| 599 | POLLY_DEBUG( |
| 600 | dbgs() << "Removing " << MA |
| 601 | << " because it's a partial access that never occurs\n" ); |
| 602 | DeferredRemove.push_back(Elt: MA); |
| 603 | } |
| 604 | |
| 605 | for (MemoryAccess *MA : DeferredRemove) { |
| 606 | Stmt.removeSingleMemoryAccess(MA); |
| 607 | EmptyPartialAccessesRemoved++; |
| 608 | TotalEmptyPartialAccessesRemoved[CallNo]++; |
| 609 | } |
| 610 | } |
| 611 | } |
| 612 | |
| 613 | /// Mark all reachable instructions and access, and sweep those that are not |
| 614 | /// reachable. |
| 615 | void SimplifyImpl::markAndSweep(LoopInfo *LI) { |
| 616 | DenseSet<MemoryAccess *> UsedMA; |
| 617 | DenseSet<VirtualInstruction> UsedInsts; |
| 618 | |
| 619 | // Get all reachable instructions and accesses. |
| 620 | markReachable(S, LI, UsedInsts, UsedAccs&: UsedMA); |
| 621 | |
| 622 | // Remove all non-reachable accesses. |
| 623 | // We need get all MemoryAccesses first, in order to not invalidate the |
| 624 | // iterators when removing them. |
| 625 | SmallVector<MemoryAccess *, 64> AllMAs; |
| 626 | for (ScopStmt &Stmt : *S) |
| 627 | AllMAs.append(in_start: Stmt.begin(), in_end: Stmt.end()); |
| 628 | |
| 629 | for (MemoryAccess *MA : AllMAs) { |
| 630 | if (UsedMA.count(V: MA)) |
| 631 | continue; |
| 632 | POLLY_DEBUG(dbgs() << "Removing " << MA |
| 633 | << " because its value is not used\n" ); |
| 634 | ScopStmt *Stmt = MA->getStatement(); |
| 635 | Stmt->removeSingleMemoryAccess(MA); |
| 636 | |
| 637 | DeadAccessesRemoved++; |
| 638 | TotalDeadAccessesRemoved[CallNo]++; |
| 639 | } |
| 640 | |
| 641 | // Remove all non-reachable instructions. |
| 642 | for (ScopStmt &Stmt : *S) { |
| 643 | // Note that for region statements, we can only remove the non-terminator |
| 644 | // instructions of the entry block. All other instructions are not in the |
| 645 | // instructions list, but implicitly always part of the statement. |
| 646 | |
| 647 | SmallVector<Instruction *, 32> AllInsts(Stmt.insts_begin(), |
| 648 | Stmt.insts_end()); |
| 649 | SmallVector<Instruction *, 32> RemainInsts; |
| 650 | |
| 651 | for (Instruction *Inst : AllInsts) { |
| 652 | auto It = UsedInsts.find(V: {&Stmt, Inst}); |
| 653 | if (It == UsedInsts.end()) { |
| 654 | POLLY_DEBUG(dbgs() << "Removing " ; Inst->print(dbgs()); |
| 655 | dbgs() << " because it is not used\n" ); |
| 656 | DeadInstructionsRemoved++; |
| 657 | TotalDeadInstructionsRemoved[CallNo]++; |
| 658 | continue; |
| 659 | } |
| 660 | |
| 661 | RemainInsts.push_back(Elt: Inst); |
| 662 | |
| 663 | // If instructions appear multiple times, keep only the first. |
| 664 | UsedInsts.erase(I: It); |
| 665 | } |
| 666 | |
| 667 | // Set the new instruction list to be only those we did not remove. |
| 668 | Stmt.setInstructions(RemainInsts); |
| 669 | } |
| 670 | } |
| 671 | |
| 672 | /// Print simplification statistics to @p OS. |
| 673 | void SimplifyImpl::printStatistics(llvm::raw_ostream &OS, int Indent) const { |
| 674 | OS.indent(NumSpaces: Indent) << "Statistics {\n" ; |
| 675 | OS.indent(NumSpaces: Indent + 4) << "Empty domains removed: " << EmptyDomainsRemoved |
| 676 | << '\n'; |
| 677 | OS.indent(NumSpaces: Indent + 4) << "Overwrites removed: " << OverwritesRemoved << '\n'; |
| 678 | OS.indent(NumSpaces: Indent + 4) << "Partial writes coalesced: " << WritesCoalesced |
| 679 | << "\n" ; |
| 680 | OS.indent(NumSpaces: Indent + 4) << "Redundant writes removed: " |
| 681 | << RedundantWritesRemoved << "\n" ; |
| 682 | OS.indent(NumSpaces: Indent + 4) << "Accesses with empty domains removed: " |
| 683 | << EmptyPartialAccessesRemoved << "\n" ; |
| 684 | OS.indent(NumSpaces: Indent + 4) << "Dead accesses removed: " << DeadAccessesRemoved |
| 685 | << '\n'; |
| 686 | OS.indent(NumSpaces: Indent + 4) << "Dead instructions removed: " |
| 687 | << DeadInstructionsRemoved << '\n'; |
| 688 | OS.indent(NumSpaces: Indent + 4) << "Stmts removed: " << StmtsRemoved << "\n" ; |
| 689 | OS.indent(NumSpaces: Indent) << "}\n" ; |
| 690 | } |
| 691 | |
| 692 | /// Print the current state of all MemoryAccesses to @p OS. |
| 693 | void SimplifyImpl::printAccesses(llvm::raw_ostream &OS, int Indent) const { |
| 694 | OS.indent(NumSpaces: Indent) << "After accesses {\n" ; |
| 695 | for (auto &Stmt : *S) { |
| 696 | OS.indent(NumSpaces: Indent + 4) << Stmt.getBaseName() << "\n" ; |
| 697 | for (auto *MA : Stmt) |
| 698 | MA->print(OS); |
| 699 | } |
| 700 | OS.indent(NumSpaces: Indent) << "}\n" ; |
| 701 | } |
| 702 | |
| 703 | void SimplifyImpl::run(Scop &S, LoopInfo *LI) { |
| 704 | // Must not have run before. |
| 705 | assert(!this->S); |
| 706 | assert(!isModified()); |
| 707 | |
| 708 | // Prepare processing of this SCoP. |
| 709 | this->S = &S; |
| 710 | ScopsProcessed[CallNo]++; |
| 711 | |
| 712 | POLLY_DEBUG(dbgs() << "Removing statements that are never executed...\n" ); |
| 713 | removeEmptyDomainStmts(); |
| 714 | |
| 715 | POLLY_DEBUG(dbgs() << "Removing partial writes that never happen...\n" ); |
| 716 | removeEmptyPartialAccesses(); |
| 717 | |
| 718 | POLLY_DEBUG(dbgs() << "Removing overwrites...\n" ); |
| 719 | removeOverwrites(); |
| 720 | |
| 721 | POLLY_DEBUG(dbgs() << "Coalesce partial writes...\n" ); |
| 722 | coalesceWrites(); |
| 723 | |
| 724 | POLLY_DEBUG(dbgs() << "Removing redundant writes...\n" ); |
| 725 | removeRedundantWrites(); |
| 726 | |
| 727 | POLLY_DEBUG(dbgs() << "Cleanup unused accesses...\n" ); |
| 728 | markAndSweep(LI); |
| 729 | |
| 730 | POLLY_DEBUG(dbgs() << "Removing statements without side effects...\n" ); |
| 731 | removeUnnecessaryStmts(); |
| 732 | |
| 733 | if (isModified()) |
| 734 | ScopsModified[CallNo]++; |
| 735 | POLLY_DEBUG(dbgs() << "\nFinal Scop:\n" ); |
| 736 | POLLY_DEBUG(dbgs() << S); |
| 737 | |
| 738 | auto ScopStats = S.getStatistics(); |
| 739 | NumValueWrites[CallNo] += ScopStats.NumValueWrites; |
| 740 | NumValueWritesInLoops[CallNo] += ScopStats.NumValueWritesInLoops; |
| 741 | NumPHIWrites[CallNo] += ScopStats.NumPHIWrites; |
| 742 | NumPHIWritesInLoops[CallNo] += ScopStats.NumPHIWritesInLoops; |
| 743 | NumSingletonWrites[CallNo] += ScopStats.NumSingletonWrites; |
| 744 | NumSingletonWritesInLoops[CallNo] += ScopStats.NumSingletonWritesInLoops; |
| 745 | } |
| 746 | |
| 747 | void SimplifyImpl::printScop(raw_ostream &OS, Scop &S) const { |
| 748 | assert(&S == this->S && |
| 749 | "Can only print analysis for the last processed SCoP" ); |
| 750 | printStatistics(OS); |
| 751 | |
| 752 | if (!isModified()) { |
| 753 | OS << "SCoP could not be simplified\n" ; |
| 754 | return; |
| 755 | } |
| 756 | printAccesses(OS); |
| 757 | } |
| 758 | |
| 759 | class SimplifyWrapperPass final : public ScopPass { |
| 760 | public: |
| 761 | static char ID; |
| 762 | int CallNo; |
| 763 | std::optional<SimplifyImpl> Impl; |
| 764 | |
| 765 | explicit SimplifyWrapperPass(int CallNo = 0) : ScopPass(ID), CallNo(CallNo) {} |
| 766 | |
| 767 | void getAnalysisUsage(AnalysisUsage &AU) const override { |
| 768 | AU.addRequiredTransitive<ScopInfoRegionPass>(); |
| 769 | AU.addRequired<LoopInfoWrapperPass>(); |
| 770 | AU.setPreservesAll(); |
| 771 | } |
| 772 | |
| 773 | bool runOnScop(Scop &S) override { |
| 774 | LoopInfo *LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo(); |
| 775 | |
| 776 | Impl.emplace(args&: CallNo); |
| 777 | Impl->run(S, LI); |
| 778 | |
| 779 | return false; |
| 780 | } |
| 781 | |
| 782 | void printScop(raw_ostream &OS, Scop &S) const override { |
| 783 | if (Impl) |
| 784 | Impl->printScop(OS, S); |
| 785 | } |
| 786 | |
| 787 | void releaseMemory() override { Impl.reset(); } |
| 788 | }; |
| 789 | |
| 790 | char SimplifyWrapperPass::ID; |
| 791 | |
| 792 | static llvm::PreservedAnalyses |
| 793 | runSimplifyUsingNPM(Scop &S, ScopAnalysisManager &SAM, |
| 794 | ScopStandardAnalysisResults &SAR, SPMUpdater &U, int CallNo, |
| 795 | raw_ostream *OS) { |
| 796 | SimplifyImpl Impl(CallNo); |
| 797 | Impl.run(S, LI: &SAR.LI); |
| 798 | if (OS) { |
| 799 | *OS << "Printing analysis 'Polly - Simplify' for region: '" << S.getName() |
| 800 | << "' in function '" << S.getFunction().getName() << "':\n" ; |
| 801 | Impl.printScop(OS&: *OS, S); |
| 802 | } |
| 803 | |
| 804 | if (!Impl.isModified()) |
| 805 | return llvm::PreservedAnalyses::all(); |
| 806 | |
| 807 | PreservedAnalyses PA; |
| 808 | PA.preserveSet<AllAnalysesOn<Module>>(); |
| 809 | PA.preserveSet<AllAnalysesOn<Function>>(); |
| 810 | PA.preserveSet<AllAnalysesOn<Loop>>(); |
| 811 | return PA; |
| 812 | } |
| 813 | |
| 814 | } // anonymous namespace |
| 815 | |
| 816 | llvm::PreservedAnalyses SimplifyPass::run(Scop &S, ScopAnalysisManager &SAM, |
| 817 | ScopStandardAnalysisResults &SAR, |
| 818 | SPMUpdater &U) { |
| 819 | return runSimplifyUsingNPM(S, SAM, SAR, U, CallNo, OS: nullptr); |
| 820 | } |
| 821 | |
| 822 | llvm::PreservedAnalyses |
| 823 | SimplifyPrinterPass::run(Scop &S, ScopAnalysisManager &SAM, |
| 824 | ScopStandardAnalysisResults &SAR, SPMUpdater &U) { |
| 825 | return runSimplifyUsingNPM(S, SAM, SAR, U, CallNo, OS: &OS); |
| 826 | } |
| 827 | |
| 828 | SmallVector<MemoryAccess *, 32> polly::getAccessesInOrder(ScopStmt &Stmt) { |
| 829 | SmallVector<MemoryAccess *, 32> Accesses; |
| 830 | |
| 831 | for (MemoryAccess *MemAcc : Stmt) |
| 832 | if (isImplicitRead(MA: MemAcc)) |
| 833 | Accesses.push_back(Elt: MemAcc); |
| 834 | |
| 835 | for (MemoryAccess *MemAcc : Stmt) |
| 836 | if (isExplicitAccess(MA: MemAcc)) |
| 837 | Accesses.push_back(Elt: MemAcc); |
| 838 | |
| 839 | for (MemoryAccess *MemAcc : Stmt) |
| 840 | if (isImplicitWrite(MA: MemAcc)) |
| 841 | Accesses.push_back(Elt: MemAcc); |
| 842 | |
| 843 | return Accesses; |
| 844 | } |
| 845 | |
| 846 | Pass *polly::createSimplifyWrapperPass(int CallNo) { |
| 847 | return new SimplifyWrapperPass(CallNo); |
| 848 | } |
| 849 | |
| 850 | INITIALIZE_PASS_BEGIN(SimplifyWrapperPass, "polly-simplify" , "Polly - Simplify" , |
| 851 | false, false) |
| 852 | INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass) |
| 853 | INITIALIZE_PASS_END(SimplifyWrapperPass, "polly-simplify" , "Polly - Simplify" , |
| 854 | false, false) |
| 855 | |
| 856 | //===----------------------------------------------------------------------===// |
| 857 | |
| 858 | namespace { |
| 859 | /// Print result from SimplifyWrapperPass. |
| 860 | class SimplifyPrinterLegacyPass final : public ScopPass { |
| 861 | public: |
| 862 | static char ID; |
| 863 | |
| 864 | SimplifyPrinterLegacyPass() : SimplifyPrinterLegacyPass(outs()) {} |
| 865 | explicit SimplifyPrinterLegacyPass(llvm::raw_ostream &OS) |
| 866 | : ScopPass(ID), OS(OS) {} |
| 867 | |
| 868 | bool runOnScop(Scop &S) override { |
| 869 | SimplifyWrapperPass &P = getAnalysis<SimplifyWrapperPass>(); |
| 870 | |
| 871 | OS << "Printing analysis '" << P.getPassName() << "' for region: '" |
| 872 | << S.getRegion().getNameStr() << "' in function '" |
| 873 | << S.getFunction().getName() << "':\n" ; |
| 874 | P.printScop(OS, S); |
| 875 | |
| 876 | return false; |
| 877 | } |
| 878 | |
| 879 | void getAnalysisUsage(AnalysisUsage &AU) const override { |
| 880 | ScopPass::getAnalysisUsage(AU); |
| 881 | AU.addRequired<SimplifyWrapperPass>(); |
| 882 | AU.setPreservesAll(); |
| 883 | } |
| 884 | |
| 885 | private: |
| 886 | llvm::raw_ostream &OS; |
| 887 | }; |
| 888 | |
| 889 | char SimplifyPrinterLegacyPass::ID = 0; |
| 890 | } // namespace |
| 891 | |
| 892 | Pass *polly::createSimplifyPrinterLegacyPass(raw_ostream &OS) { |
| 893 | return new SimplifyPrinterLegacyPass(OS); |
| 894 | } |
| 895 | |
| 896 | INITIALIZE_PASS_BEGIN(SimplifyPrinterLegacyPass, "polly-print-simplify" , |
| 897 | "Polly - Print Simplify actions" , false, false) |
| 898 | INITIALIZE_PASS_DEPENDENCY(SimplifyWrapperPass) |
| 899 | INITIALIZE_PASS_END(SimplifyPrinterLegacyPass, "polly-print-simplify" , |
| 900 | "Polly - Print Simplify actions" , false, false) |
| 901 | |