1 | //===- LLVMInlining.cpp - LLVM inlining interface and logic -----*- C++ -*-===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | // |
9 | // Logic for inlining LLVM functions and the definition of the |
10 | // LLVMInliningInterface. |
11 | // |
12 | //===----------------------------------------------------------------------===// |
13 | |
14 | #include "LLVMInlining.h" |
15 | #include "mlir/Dialect/LLVMIR/LLVMDialect.h" |
16 | #include "mlir/IR/Matchers.h" |
17 | #include "mlir/Interfaces/DataLayoutInterfaces.h" |
18 | #include "mlir/Transforms/InliningUtils.h" |
19 | #include "llvm/ADT/ScopeExit.h" |
20 | #include "llvm/Support/Debug.h" |
21 | |
22 | #define DEBUG_TYPE "llvm-inliner" |
23 | |
24 | using namespace mlir; |
25 | |
26 | /// Check whether the given alloca is an input to a lifetime intrinsic, |
27 | /// optionally passing through one or more casts on the way. This is not |
28 | /// transitive through block arguments. |
29 | static bool hasLifetimeMarkers(LLVM::AllocaOp allocaOp) { |
30 | SmallVector<Operation *> stack(allocaOp->getUsers().begin(), |
31 | allocaOp->getUsers().end()); |
32 | while (!stack.empty()) { |
33 | Operation *op = stack.pop_back_val(); |
34 | if (isa<LLVM::LifetimeStartOp, LLVM::LifetimeEndOp>(op)) |
35 | return true; |
36 | if (isa<LLVM::BitcastOp>(op)) |
37 | stack.append(in_start: op->getUsers().begin(), in_end: op->getUsers().end()); |
38 | } |
39 | return false; |
40 | } |
41 | |
42 | /// Handles alloca operations in the inlined blocks: |
43 | /// - Moves all alloca operations with a constant size in the former entry block |
44 | /// of the callee into the entry block of the caller, so they become part of |
45 | /// the function prologue/epilogue during code generation. |
46 | /// - Inserts lifetime intrinsics that limit the scope of inlined static allocas |
47 | /// to the inlined blocks. |
48 | /// - Inserts StackSave and StackRestore operations if dynamic allocas were |
49 | /// inlined. |
50 | static void |
51 | handleInlinedAllocas(Operation *call, |
52 | iterator_range<Region::iterator> inlinedBlocks) { |
53 | // Locate the entry block of the closest callsite ancestor that has either the |
54 | // IsolatedFromAbove or AutomaticAllocationScope trait. In pure LLVM dialect |
55 | // programs, this is the LLVMFuncOp containing the call site. However, in |
56 | // mixed-dialect programs, the callsite might be nested in another operation |
57 | // that carries one of these traits. In such scenarios, this traversal stops |
58 | // at the closest ancestor with either trait, ensuring visibility post |
59 | // relocation and respecting allocation scopes. |
60 | Block *callerEntryBlock = nullptr; |
61 | Operation *currentOp = call; |
62 | while (Operation *parentOp = currentOp->getParentOp()) { |
63 | if (parentOp->mightHaveTrait<OpTrait::IsIsolatedFromAbove>() || |
64 | parentOp->mightHaveTrait<OpTrait::AutomaticAllocationScope>()) { |
65 | callerEntryBlock = ¤tOp->getParentRegion()->front(); |
66 | break; |
67 | } |
68 | currentOp = parentOp; |
69 | } |
70 | |
71 | // Avoid relocating the alloca operations if the call has been inlined into |
72 | // the entry block already, which is typically the encompassing |
73 | // LLVM function, or if the relevant entry block cannot be identified. |
74 | Block *calleeEntryBlock = &(*inlinedBlocks.begin()); |
75 | if (!callerEntryBlock || callerEntryBlock == calleeEntryBlock) |
76 | return; |
77 | |
78 | SmallVector<std::tuple<LLVM::AllocaOp, IntegerAttr, bool>> allocasToMove; |
79 | bool shouldInsertLifetimes = false; |
80 | bool hasDynamicAlloca = false; |
81 | // Conservatively only move static alloca operations that are part of the |
82 | // entry block and do not inspect nested regions, since they may execute |
83 | // conditionally or have other unknown semantics. |
84 | for (auto allocaOp : calleeEntryBlock->getOps<LLVM::AllocaOp>()) { |
85 | IntegerAttr arraySize; |
86 | if (!matchPattern(allocaOp.getArraySize(), m_Constant(&arraySize))) { |
87 | hasDynamicAlloca = true; |
88 | continue; |
89 | } |
90 | bool shouldInsertLifetime = |
91 | arraySize.getValue() != 0 && !hasLifetimeMarkers(allocaOp); |
92 | shouldInsertLifetimes |= shouldInsertLifetime; |
93 | allocasToMove.emplace_back(allocaOp, arraySize, shouldInsertLifetime); |
94 | } |
95 | // Check the remaining inlined blocks for dynamic allocas as well. |
96 | for (Block &block : llvm::drop_begin(RangeOrContainer&: inlinedBlocks)) { |
97 | if (hasDynamicAlloca) |
98 | break; |
99 | hasDynamicAlloca = |
100 | llvm::any_of(block.getOps<LLVM::AllocaOp>(), [](auto allocaOp) { |
101 | return !matchPattern(allocaOp.getArraySize(), m_Constant()); |
102 | }); |
103 | } |
104 | if (allocasToMove.empty() && !hasDynamicAlloca) |
105 | return; |
106 | OpBuilder builder(calleeEntryBlock, calleeEntryBlock->begin()); |
107 | Value stackPtr; |
108 | if (hasDynamicAlloca) { |
109 | // This may result in multiple stacksave/stackrestore intrinsics in the same |
110 | // scope if some are already present in the body of the caller. This is not |
111 | // invalid IR, but LLVM cleans these up in InstCombineCalls.cpp, along with |
112 | // other cases where the stacksave/stackrestore is redundant. |
113 | stackPtr = builder.create<LLVM::StackSaveOp>( |
114 | call->getLoc(), LLVM::LLVMPointerType::get(call->getContext())); |
115 | } |
116 | builder.setInsertionPoint(block: callerEntryBlock, insertPoint: callerEntryBlock->begin()); |
117 | for (auto &[allocaOp, arraySize, shouldInsertLifetime] : allocasToMove) { |
118 | auto newConstant = builder.create<LLVM::ConstantOp>( |
119 | allocaOp->getLoc(), allocaOp.getArraySize().getType(), arraySize); |
120 | // Insert a lifetime start intrinsic where the alloca was before moving it. |
121 | if (shouldInsertLifetime) { |
122 | OpBuilder::InsertionGuard insertionGuard(builder); |
123 | builder.setInsertionPoint(allocaOp); |
124 | builder.create<LLVM::LifetimeStartOp>( |
125 | allocaOp.getLoc(), arraySize.getValue().getLimitedValue(), |
126 | allocaOp.getResult()); |
127 | } |
128 | allocaOp->moveAfter(newConstant); |
129 | allocaOp.getArraySizeMutable().assign(newConstant.getResult()); |
130 | } |
131 | if (!shouldInsertLifetimes && !hasDynamicAlloca) |
132 | return; |
133 | // Insert a lifetime end intrinsic before each return in the callee function. |
134 | for (Block &block : inlinedBlocks) { |
135 | if (!block.getTerminator()->hasTrait<OpTrait::ReturnLike>()) |
136 | continue; |
137 | builder.setInsertionPoint(block.getTerminator()); |
138 | if (hasDynamicAlloca) |
139 | builder.create<LLVM::StackRestoreOp>(call->getLoc(), stackPtr); |
140 | for (auto &[allocaOp, arraySize, shouldInsertLifetime] : allocasToMove) { |
141 | if (shouldInsertLifetime) |
142 | builder.create<LLVM::LifetimeEndOp>( |
143 | allocaOp.getLoc(), arraySize.getValue().getLimitedValue(), |
144 | allocaOp.getResult()); |
145 | } |
146 | } |
147 | } |
148 | |
149 | /// Maps all alias scopes in the inlined operations to deep clones of the scopes |
150 | /// and domain. This is required for code such as `foo(a, b); foo(a2, b2);` to |
151 | /// not incorrectly return `noalias` for e.g. operations on `a` and `a2`. |
152 | static void |
153 | deepCloneAliasScopes(iterator_range<Region::iterator> inlinedBlocks) { |
154 | DenseMap<Attribute, Attribute> mapping; |
155 | |
156 | // Register handles in the walker to create the deep clones. |
157 | // The walker ensures that an attribute is only ever walked once and does a |
158 | // post-order walk, ensuring the domain is visited prior to the scope. |
159 | AttrTypeWalker walker; |
160 | |
161 | // Perform the deep clones while visiting. Builders create a distinct |
162 | // attribute to make sure that new instances are always created by the |
163 | // uniquer. |
164 | walker.addWalk(callback: [&](LLVM::AliasScopeDomainAttr domainAttr) { |
165 | mapping[domainAttr] = LLVM::AliasScopeDomainAttr::get( |
166 | domainAttr.getContext(), domainAttr.getDescription()); |
167 | }); |
168 | |
169 | walker.addWalk(callback: [&](LLVM::AliasScopeAttr scopeAttr) { |
170 | mapping[scopeAttr] = LLVM::AliasScopeAttr::get( |
171 | cast<LLVM::AliasScopeDomainAttr>(mapping.lookup(scopeAttr.getDomain())), |
172 | scopeAttr.getDescription()); |
173 | }); |
174 | |
175 | // Map an array of scopes to an array of deep clones. |
176 | auto convertScopeList = [&](ArrayAttr arrayAttr) -> ArrayAttr { |
177 | if (!arrayAttr) |
178 | return nullptr; |
179 | |
180 | // Create the deep clones if necessary. |
181 | walker.walk(arrayAttr); |
182 | |
183 | return ArrayAttr::get(arrayAttr.getContext(), |
184 | llvm::map_to_vector(arrayAttr, [&](Attribute attr) { |
185 | return mapping.lookup(attr); |
186 | })); |
187 | }; |
188 | |
189 | for (Block &block : inlinedBlocks) { |
190 | for (Operation &op : block) { |
191 | if (auto aliasInterface = dyn_cast<LLVM::AliasAnalysisOpInterface>(op)) { |
192 | aliasInterface.setAliasScopes( |
193 | convertScopeList(aliasInterface.getAliasScopesOrNull())); |
194 | aliasInterface.setNoAliasScopes( |
195 | convertScopeList(aliasInterface.getNoAliasScopesOrNull())); |
196 | } |
197 | |
198 | if (auto noAliasScope = dyn_cast<LLVM::NoAliasScopeDeclOp>(op)) { |
199 | // Create the deep clones if necessary. |
200 | walker.walk(noAliasScope.getScopeAttr()); |
201 | |
202 | noAliasScope.setScopeAttr(cast<LLVM::AliasScopeAttr>( |
203 | mapping.lookup(noAliasScope.getScopeAttr()))); |
204 | } |
205 | } |
206 | } |
207 | } |
208 | |
209 | /// Creates a new ArrayAttr by concatenating `lhs` with `rhs`. |
210 | /// Returns null if both parameters are null. If only one attribute is null, |
211 | /// return the other. |
212 | static ArrayAttr concatArrayAttr(ArrayAttr lhs, ArrayAttr rhs) { |
213 | if (!lhs) |
214 | return rhs; |
215 | if (!rhs) |
216 | return lhs; |
217 | |
218 | SmallVector<Attribute> result; |
219 | llvm::append_range(result, lhs); |
220 | llvm::append_range(result, rhs); |
221 | return ArrayAttr::get(lhs.getContext(), result); |
222 | } |
223 | |
224 | /// Attempts to return the underlying pointer value that `pointerValue` is based |
225 | /// on. This traverses down the chain of operations to the last operation |
226 | /// producing the base pointer and returns it. If it encounters an operation it |
227 | /// cannot further traverse through, returns the operation's result. |
228 | static Value getUnderlyingObject(Value pointerValue) { |
229 | while (true) { |
230 | if (auto gepOp = pointerValue.getDefiningOp<LLVM::GEPOp>()) { |
231 | pointerValue = gepOp.getBase(); |
232 | continue; |
233 | } |
234 | |
235 | if (auto addrCast = pointerValue.getDefiningOp<LLVM::AddrSpaceCastOp>()) { |
236 | pointerValue = addrCast.getOperand(); |
237 | continue; |
238 | } |
239 | |
240 | break; |
241 | } |
242 | |
243 | return pointerValue; |
244 | } |
245 | |
246 | /// Attempts to return the set of all underlying pointer values that |
247 | /// `pointerValue` is based on. This function traverses through select |
248 | /// operations and block arguments unlike getUnderlyingObject. |
249 | static SmallVector<Value> getUnderlyingObjectSet(Value pointerValue) { |
250 | SmallVector<Value> result; |
251 | |
252 | SmallVector<Value> workList{pointerValue}; |
253 | // Avoid dataflow loops. |
254 | SmallPtrSet<Value, 4> seen; |
255 | do { |
256 | Value current = workList.pop_back_val(); |
257 | current = getUnderlyingObject(pointerValue: current); |
258 | |
259 | if (!seen.insert(Ptr: current).second) |
260 | continue; |
261 | |
262 | if (auto selectOp = current.getDefiningOp<LLVM::SelectOp>()) { |
263 | workList.push_back(Elt: selectOp.getTrueValue()); |
264 | workList.push_back(Elt: selectOp.getFalseValue()); |
265 | continue; |
266 | } |
267 | |
268 | if (auto blockArg = dyn_cast<BlockArgument>(Val&: current)) { |
269 | Block *parentBlock = blockArg.getParentBlock(); |
270 | |
271 | // Attempt to find all block argument operands for every predecessor. |
272 | // If any operand to the block argument wasn't found in a predecessor, |
273 | // conservatively add the block argument to the result set. |
274 | SmallVector<Value> operands; |
275 | bool anyUnknown = false; |
276 | for (auto iter = parentBlock->pred_begin(); |
277 | iter != parentBlock->pred_end(); iter++) { |
278 | auto branch = dyn_cast<BranchOpInterface>((*iter)->getTerminator()); |
279 | if (!branch) { |
280 | result.push_back(Elt: blockArg); |
281 | anyUnknown = true; |
282 | break; |
283 | } |
284 | |
285 | Value operand = branch.getSuccessorOperands( |
286 | iter.getSuccessorIndex())[blockArg.getArgNumber()]; |
287 | if (!operand) { |
288 | result.push_back(Elt: blockArg); |
289 | anyUnknown = true; |
290 | break; |
291 | } |
292 | |
293 | operands.push_back(Elt: operand); |
294 | } |
295 | |
296 | if (!anyUnknown) |
297 | llvm::append_range(C&: workList, R&: operands); |
298 | |
299 | continue; |
300 | } |
301 | |
302 | result.push_back(Elt: current); |
303 | } while (!workList.empty()); |
304 | |
305 | return result; |
306 | } |
307 | |
308 | /// Creates a new AliasScopeAttr for every noalias parameter and attaches it to |
309 | /// the appropriate inlined memory operations in an attempt to preserve the |
310 | /// original semantics of the parameter attribute. |
311 | static void createNewAliasScopesFromNoAliasParameter( |
312 | Operation *call, iterator_range<Region::iterator> inlinedBlocks) { |
313 | |
314 | // First collect all noalias parameters. These have been specially marked by |
315 | // the `handleArgument` implementation by using the `ssa.copy` intrinsic and |
316 | // attaching a `noalias` attribute to it. |
317 | // These are only meant to be temporary and should therefore be deleted after |
318 | // we're done using them here. |
319 | SetVector<LLVM::SSACopyOp> noAliasParams; |
320 | for (Value argument : cast<LLVM::CallOp>(call).getArgOperands()) { |
321 | for (Operation *user : argument.getUsers()) { |
322 | auto ssaCopy = llvm::dyn_cast<LLVM::SSACopyOp>(user); |
323 | if (!ssaCopy) |
324 | continue; |
325 | if (!ssaCopy->hasAttr(LLVM::LLVMDialect::getNoAliasAttrName())) |
326 | continue; |
327 | |
328 | noAliasParams.insert(ssaCopy); |
329 | } |
330 | } |
331 | |
332 | // If there were none, we have nothing to do here. |
333 | if (noAliasParams.empty()) |
334 | return; |
335 | |
336 | // Scope exit block to make it impossible to forget to get rid of the |
337 | // intrinsics. |
338 | auto exit = llvm::make_scope_exit(F: [&] { |
339 | for (LLVM::SSACopyOp ssaCopyOp : noAliasParams) { |
340 | ssaCopyOp.replaceAllUsesWith(ssaCopyOp.getOperand()); |
341 | ssaCopyOp->erase(); |
342 | } |
343 | }); |
344 | |
345 | // Create a new domain for this specific inlining and a new scope for every |
346 | // noalias parameter. |
347 | auto functionDomain = LLVM::AliasScopeDomainAttr::get( |
348 | call->getContext(), cast<LLVM::CallOp>(call).getCalleeAttr().getAttr()); |
349 | DenseMap<Value, LLVM::AliasScopeAttr> pointerScopes; |
350 | for (LLVM::SSACopyOp copyOp : noAliasParams) { |
351 | auto scope = LLVM::AliasScopeAttr::get(functionDomain); |
352 | pointerScopes[copyOp] = scope; |
353 | |
354 | OpBuilder(call).create<LLVM::NoAliasScopeDeclOp>(call->getLoc(), scope); |
355 | } |
356 | |
357 | // Go through every instruction and attempt to find which noalias parameters |
358 | // it is definitely based on and definitely not based on. |
359 | for (Block &inlinedBlock : inlinedBlocks) { |
360 | for (auto aliasInterface : |
361 | inlinedBlock.getOps<LLVM::AliasAnalysisOpInterface>()) { |
362 | |
363 | // Collect the pointer arguments affected by the alias scopes. |
364 | SmallVector<Value> pointerArgs = aliasInterface.getAccessedOperands(); |
365 | |
366 | // Find the set of underlying pointers that this pointer is based on. |
367 | SmallPtrSet<Value, 4> basedOnPointers; |
368 | for (Value pointer : pointerArgs) |
369 | llvm::copy(getUnderlyingObjectSet(pointer), |
370 | std::inserter(basedOnPointers, basedOnPointers.begin())); |
371 | |
372 | bool aliasesOtherKnownObject = false; |
373 | // Go through the based on pointers and check that they are either: |
374 | // * Constants that can be ignored (undef, poison, null pointer). |
375 | // * Based on a noalias parameter. |
376 | // * Other pointers that we know can't alias with our noalias parameter. |
377 | // |
378 | // Any other value might be a pointer based on any noalias parameter that |
379 | // hasn't been identified. In that case conservatively don't add any |
380 | // scopes to this operation indicating either aliasing or not aliasing |
381 | // with any parameter. |
382 | if (llvm::any_of(basedOnPointers, [&](Value object) { |
383 | if (matchPattern(object, m_Constant())) |
384 | return false; |
385 | |
386 | if (noAliasParams.contains(object.getDefiningOp<LLVM::SSACopyOp>())) |
387 | return false; |
388 | |
389 | // TODO: This should include other arguments from the inlined |
390 | // callable. |
391 | if (isa_and_nonnull<LLVM::AllocaOp, LLVM::AddressOfOp>( |
392 | object.getDefiningOp())) { |
393 | aliasesOtherKnownObject = true; |
394 | return false; |
395 | } |
396 | return true; |
397 | })) |
398 | continue; |
399 | |
400 | // Add all noalias parameter scopes to the noalias scope list that we are |
401 | // not based on. |
402 | SmallVector<Attribute> noAliasScopes; |
403 | for (LLVM::SSACopyOp noAlias : noAliasParams) { |
404 | if (basedOnPointers.contains(noAlias)) |
405 | continue; |
406 | |
407 | noAliasScopes.push_back(pointerScopes[noAlias]); |
408 | } |
409 | |
410 | if (!noAliasScopes.empty()) |
411 | aliasInterface.setNoAliasScopes( |
412 | concatArrayAttr(aliasInterface.getNoAliasScopesOrNull(), |
413 | ArrayAttr::get(call->getContext(), noAliasScopes))); |
414 | |
415 | // Don't add alias scopes to call operations or operations that might |
416 | // operate on pointers not based on any noalias parameter. |
417 | // Since we add all scopes to an operation's noalias list that it |
418 | // definitely doesn't alias, we mustn't do the same for the alias.scope |
419 | // list if other objects are involved. |
420 | // |
421 | // Consider the following case: |
422 | // %0 = llvm.alloca |
423 | // %1 = select %magic, %0, %noalias_param |
424 | // store 5, %1 (1) noalias=[scope(...)] |
425 | // ... |
426 | // store 3, %0 (2) noalias=[scope(noalias_param), scope(...)] |
427 | // |
428 | // We can add the scopes of any noalias parameters that aren't |
429 | // noalias_param's scope to (1) and add all of them to (2). We mustn't add |
430 | // the scope of noalias_param to the alias.scope list of (1) since |
431 | // that would mean (2) cannot alias with (1) which is wrong since both may |
432 | // store to %0. |
433 | // |
434 | // In conclusion, only add scopes to the alias.scope list if all pointers |
435 | // have a corresponding scope. |
436 | // Call operations are included in this list since we do not know whether |
437 | // the callee accesses any memory besides the ones passed as its |
438 | // arguments. |
439 | if (aliasesOtherKnownObject || |
440 | isa<LLVM::CallOp>(aliasInterface.getOperation())) |
441 | continue; |
442 | |
443 | SmallVector<Attribute> aliasScopes; |
444 | for (LLVM::SSACopyOp noAlias : noAliasParams) |
445 | if (basedOnPointers.contains(noAlias)) |
446 | aliasScopes.push_back(pointerScopes[noAlias]); |
447 | |
448 | if (!aliasScopes.empty()) |
449 | aliasInterface.setAliasScopes( |
450 | concatArrayAttr(aliasInterface.getAliasScopesOrNull(), |
451 | ArrayAttr::get(call->getContext(), aliasScopes))); |
452 | } |
453 | } |
454 | } |
455 | |
456 | /// Appends any alias scopes of the call operation to any inlined memory |
457 | /// operation. |
458 | static void |
459 | appendCallOpAliasScopes(Operation *call, |
460 | iterator_range<Region::iterator> inlinedBlocks) { |
461 | auto callAliasInterface = dyn_cast<LLVM::AliasAnalysisOpInterface>(call); |
462 | if (!callAliasInterface) |
463 | return; |
464 | |
465 | ArrayAttr aliasScopes = callAliasInterface.getAliasScopesOrNull(); |
466 | ArrayAttr noAliasScopes = callAliasInterface.getNoAliasScopesOrNull(); |
467 | // If the call has neither alias scopes or noalias scopes we have nothing to |
468 | // do here. |
469 | if (!aliasScopes && !noAliasScopes) |
470 | return; |
471 | |
472 | // Simply append the call op's alias and noalias scopes to any operation |
473 | // implementing AliasAnalysisOpInterface. |
474 | for (Block &block : inlinedBlocks) { |
475 | for (auto aliasInterface : block.getOps<LLVM::AliasAnalysisOpInterface>()) { |
476 | if (aliasScopes) |
477 | aliasInterface.setAliasScopes(concatArrayAttr( |
478 | aliasInterface.getAliasScopesOrNull(), aliasScopes)); |
479 | |
480 | if (noAliasScopes) |
481 | aliasInterface.setNoAliasScopes(concatArrayAttr( |
482 | aliasInterface.getNoAliasScopesOrNull(), noAliasScopes)); |
483 | } |
484 | } |
485 | } |
486 | |
487 | /// Handles all interactions with alias scopes during inlining. |
488 | static void handleAliasScopes(Operation *call, |
489 | iterator_range<Region::iterator> inlinedBlocks) { |
490 | deepCloneAliasScopes(inlinedBlocks); |
491 | createNewAliasScopesFromNoAliasParameter(call, inlinedBlocks); |
492 | appendCallOpAliasScopes(call, inlinedBlocks); |
493 | } |
494 | |
495 | /// Appends any access groups of the call operation to any inlined memory |
496 | /// operation. |
497 | static void handleAccessGroups(Operation *call, |
498 | iterator_range<Region::iterator> inlinedBlocks) { |
499 | auto callAccessGroupInterface = dyn_cast<LLVM::AccessGroupOpInterface>(call); |
500 | if (!callAccessGroupInterface) |
501 | return; |
502 | |
503 | auto accessGroups = callAccessGroupInterface.getAccessGroupsOrNull(); |
504 | if (!accessGroups) |
505 | return; |
506 | |
507 | // Simply append the call op's access groups to any operation implementing |
508 | // AccessGroupOpInterface. |
509 | for (Block &block : inlinedBlocks) |
510 | for (auto accessGroupOpInterface : |
511 | block.getOps<LLVM::AccessGroupOpInterface>()) |
512 | accessGroupOpInterface.setAccessGroups(concatArrayAttr( |
513 | accessGroupOpInterface.getAccessGroupsOrNull(), accessGroups)); |
514 | } |
515 | |
516 | /// If `requestedAlignment` is higher than the alignment specified on `alloca`, |
517 | /// realigns `alloca` if this does not exceed the natural stack alignment. |
518 | /// Returns the post-alignment of `alloca`, whether it was realigned or not. |
519 | static uint64_t tryToEnforceAllocaAlignment(LLVM::AllocaOp alloca, |
520 | uint64_t requestedAlignment, |
521 | DataLayout const &dataLayout) { |
522 | uint64_t allocaAlignment = alloca.getAlignment().value_or(1); |
523 | if (requestedAlignment <= allocaAlignment) |
524 | // No realignment necessary. |
525 | return allocaAlignment; |
526 | uint64_t naturalStackAlignmentBits = dataLayout.getStackAlignment(); |
527 | // If the natural stack alignment is not specified, the data layout returns |
528 | // zero. Optimistically allow realignment in this case. |
529 | if (naturalStackAlignmentBits == 0 || |
530 | // If the requested alignment exceeds the natural stack alignment, this |
531 | // will trigger a dynamic stack realignment, so we prefer to copy... |
532 | 8 * requestedAlignment <= naturalStackAlignmentBits || |
533 | // ...unless the alloca already triggers dynamic stack realignment. Then |
534 | // we might as well further increase the alignment to avoid a copy. |
535 | 8 * allocaAlignment > naturalStackAlignmentBits) { |
536 | alloca.setAlignment(requestedAlignment); |
537 | allocaAlignment = requestedAlignment; |
538 | } |
539 | return allocaAlignment; |
540 | } |
541 | |
542 | /// Tries to find and return the alignment of the pointer `value` by looking for |
543 | /// an alignment attribute on the defining allocation op or function argument. |
544 | /// If the found alignment is lower than `requestedAlignment`, tries to realign |
545 | /// the pointer, then returns the resulting post-alignment, regardless of |
546 | /// whether it was realigned or not. If no existing alignment attribute is |
547 | /// found, returns 1 (i.e., assume that no alignment is guaranteed). |
548 | static uint64_t tryToEnforceAlignment(Value value, uint64_t requestedAlignment, |
549 | DataLayout const &dataLayout) { |
550 | if (Operation *definingOp = value.getDefiningOp()) { |
551 | if (auto alloca = dyn_cast<LLVM::AllocaOp>(definingOp)) |
552 | return tryToEnforceAllocaAlignment(alloca, requestedAlignment, |
553 | dataLayout); |
554 | if (auto addressOf = dyn_cast<LLVM::AddressOfOp>(definingOp)) |
555 | if (auto global = SymbolTable::lookupNearestSymbolFrom<LLVM::GlobalOp>( |
556 | definingOp, addressOf.getGlobalNameAttr())) |
557 | return global.getAlignment().value_or(1); |
558 | // We don't currently handle this operation; assume no alignment. |
559 | return 1; |
560 | } |
561 | // Since there is no defining op, this is a block argument. Probably this |
562 | // comes directly from a function argument, so check that this is the case. |
563 | Operation *parentOp = value.getParentBlock()->getParentOp(); |
564 | if (auto func = dyn_cast<LLVM::LLVMFuncOp>(parentOp)) { |
565 | // Use the alignment attribute set for this argument in the parent function |
566 | // if it has been set. |
567 | auto blockArg = llvm::cast<BlockArgument>(Val&: value); |
568 | if (Attribute alignAttr = func.getArgAttr( |
569 | blockArg.getArgNumber(), LLVM::LLVMDialect::getAlignAttrName())) |
570 | return cast<IntegerAttr>(alignAttr).getValue().getLimitedValue(); |
571 | } |
572 | // We didn't find anything useful; assume no alignment. |
573 | return 1; |
574 | } |
575 | |
576 | /// Introduces a new alloca and copies the memory pointed to by `argument` to |
577 | /// the address of the new alloca, then returns the value of the new alloca. |
578 | static Value handleByValArgumentInit(OpBuilder &builder, Location loc, |
579 | Value argument, Type elementType, |
580 | uint64_t elementTypeSize, |
581 | uint64_t targetAlignment) { |
582 | // Allocate the new value on the stack. |
583 | Value allocaOp; |
584 | { |
585 | // Since this is a static alloca, we can put it directly in the entry block, |
586 | // so they can be absorbed into the prologue/epilogue at code generation. |
587 | OpBuilder::InsertionGuard insertionGuard(builder); |
588 | Block *entryBlock = &(*argument.getParentRegion()->begin()); |
589 | builder.setInsertionPointToStart(entryBlock); |
590 | Value one = builder.create<LLVM::ConstantOp>(loc, builder.getI64Type(), |
591 | builder.getI64IntegerAttr(1)); |
592 | allocaOp = builder.create<LLVM::AllocaOp>( |
593 | loc, argument.getType(), elementType, one, targetAlignment); |
594 | } |
595 | // Copy the pointee to the newly allocated value. |
596 | Value copySize = builder.create<LLVM::ConstantOp>( |
597 | loc, builder.getI64Type(), builder.getI64IntegerAttr(elementTypeSize)); |
598 | builder.create<LLVM::MemcpyOp>(loc, allocaOp, argument, copySize, |
599 | /*isVolatile=*/false); |
600 | return allocaOp; |
601 | } |
602 | |
603 | /// Handles a function argument marked with the byval attribute by introducing a |
604 | /// memcpy or realigning the defining operation, if required either due to the |
605 | /// pointee being writeable in the callee, and/or due to an alignment mismatch. |
606 | /// `requestedAlignment` specifies the alignment set in the "align" argument |
607 | /// attribute (or 1 if no align attribute was set). |
608 | static Value handleByValArgument(OpBuilder &builder, Operation *callable, |
609 | Value argument, Type elementType, |
610 | uint64_t requestedAlignment) { |
611 | auto func = cast<LLVM::LLVMFuncOp>(callable); |
612 | LLVM::MemoryEffectsAttr memoryEffects = func.getMemoryAttr(); |
613 | // If there is no memory effects attribute, assume that the function is |
614 | // not read-only. |
615 | bool isReadOnly = memoryEffects && |
616 | memoryEffects.getArgMem() != LLVM::ModRefInfo::ModRef && |
617 | memoryEffects.getArgMem() != LLVM::ModRefInfo::Mod; |
618 | // Check if there's an alignment mismatch requiring us to copy. |
619 | DataLayout dataLayout = DataLayout::closest(op: callable); |
620 | uint64_t minimumAlignment = dataLayout.getTypeABIAlignment(t: elementType); |
621 | if (isReadOnly) { |
622 | if (requestedAlignment <= minimumAlignment) |
623 | return argument; |
624 | uint64_t currentAlignment = |
625 | tryToEnforceAlignment(value: argument, requestedAlignment, dataLayout); |
626 | if (currentAlignment >= requestedAlignment) |
627 | return argument; |
628 | } |
629 | uint64_t targetAlignment = std::max(a: requestedAlignment, b: minimumAlignment); |
630 | return handleByValArgumentInit(builder, func.getLoc(), argument, elementType, |
631 | dataLayout.getTypeSize(t: elementType), |
632 | targetAlignment); |
633 | } |
634 | |
635 | namespace { |
636 | struct LLVMInlinerInterface : public DialectInlinerInterface { |
637 | using DialectInlinerInterface::DialectInlinerInterface; |
638 | |
639 | LLVMInlinerInterface(Dialect *dialect) |
640 | : DialectInlinerInterface(dialect), |
641 | // Cache set of StringAttrs for fast lookup in `isLegalToInline`. |
642 | disallowedFunctionAttrs({ |
643 | StringAttr::get(dialect->getContext(), "noduplicate" ), |
644 | StringAttr::get(dialect->getContext(), "noinline" ), |
645 | StringAttr::get(dialect->getContext(), "optnone" ), |
646 | StringAttr::get(dialect->getContext(), "presplitcoroutine" ), |
647 | StringAttr::get(dialect->getContext(), "returns_twice" ), |
648 | StringAttr::get(dialect->getContext(), "strictfp" ), |
649 | }) {} |
650 | |
651 | bool isLegalToInline(Operation *call, Operation *callable, |
652 | bool wouldBeCloned) const final { |
653 | if (!wouldBeCloned) |
654 | return false; |
655 | if (!isa<LLVM::CallOp>(call)) { |
656 | LLVM_DEBUG(llvm::dbgs() |
657 | << "Cannot inline: call is not an LLVM::CallOp\n" ); |
658 | return false; |
659 | } |
660 | auto funcOp = dyn_cast<LLVM::LLVMFuncOp>(callable); |
661 | if (!funcOp) { |
662 | LLVM_DEBUG(llvm::dbgs() |
663 | << "Cannot inline: callable is not an LLVM::LLVMFuncOp\n" ); |
664 | return false; |
665 | } |
666 | if (funcOp.isVarArg()) { |
667 | LLVM_DEBUG(llvm::dbgs() << "Cannot inline: callable is variadic\n" ); |
668 | return false; |
669 | } |
670 | // TODO: Generate aliasing metadata from noalias argument/result attributes. |
671 | if (auto attrs = funcOp.getArgAttrs()) { |
672 | for (DictionaryAttr attrDict : attrs->getAsRange<DictionaryAttr>()) { |
673 | if (attrDict.contains(LLVM::LLVMDialect::getInAllocaAttrName())) { |
674 | LLVM_DEBUG(llvm::dbgs() << "Cannot inline " << funcOp.getSymName() |
675 | << ": inalloca arguments not supported\n" ); |
676 | return false; |
677 | } |
678 | } |
679 | } |
680 | // TODO: Handle exceptions. |
681 | if (funcOp.getPersonality()) { |
682 | LLVM_DEBUG(llvm::dbgs() << "Cannot inline " << funcOp.getSymName() |
683 | << ": unhandled function personality\n" ); |
684 | return false; |
685 | } |
686 | if (funcOp.getPassthrough()) { |
687 | // TODO: Used attributes should not be passthrough. |
688 | if (llvm::any_of(*funcOp.getPassthrough(), [&](Attribute attr) { |
689 | auto stringAttr = dyn_cast<StringAttr>(attr); |
690 | if (!stringAttr) |
691 | return false; |
692 | if (disallowedFunctionAttrs.contains(V: stringAttr)) { |
693 | LLVM_DEBUG(llvm::dbgs() |
694 | << "Cannot inline " << funcOp.getSymName() |
695 | << ": found disallowed function attribute " |
696 | << stringAttr << "\n" ); |
697 | return true; |
698 | } |
699 | return false; |
700 | })) |
701 | return false; |
702 | } |
703 | return true; |
704 | } |
705 | |
706 | bool isLegalToInline(Region *, Region *, bool, IRMapping &) const final { |
707 | return true; |
708 | } |
709 | |
710 | bool isLegalToInline(Operation *op, Region *, bool, IRMapping &) const final { |
711 | // The inliner cannot handle variadic function arguments. |
712 | return !isa<LLVM::VaStartOp>(op); |
713 | } |
714 | |
715 | /// Handle the given inlined return by replacing it with a branch. This |
716 | /// overload is called when the inlined region has more than one block. |
717 | void handleTerminator(Operation *op, Block *newDest) const final { |
718 | // Only return needs to be handled here. |
719 | auto returnOp = dyn_cast<LLVM::ReturnOp>(op); |
720 | if (!returnOp) |
721 | return; |
722 | |
723 | // Replace the return with a branch to the dest. |
724 | OpBuilder builder(op); |
725 | builder.create<LLVM::BrOp>(op->getLoc(), returnOp.getOperands(), newDest); |
726 | op->erase(); |
727 | } |
728 | |
729 | /// Handle the given inlined return by replacing the uses of the call with the |
730 | /// operands of the return. This overload is called when the inlined region |
731 | /// only contains one block. |
732 | void handleTerminator(Operation *op, ValueRange valuesToRepl) const final { |
733 | // Return will be the only terminator present. |
734 | auto returnOp = cast<LLVM::ReturnOp>(op); |
735 | |
736 | // Replace the values directly with the return operands. |
737 | assert(returnOp.getNumOperands() == valuesToRepl.size()); |
738 | for (auto [dst, src] : llvm::zip(valuesToRepl, returnOp.getOperands())) |
739 | dst.replaceAllUsesWith(src); |
740 | } |
741 | |
742 | Value handleArgument(OpBuilder &builder, Operation *call, Operation *callable, |
743 | Value argument, |
744 | DictionaryAttr argumentAttrs) const final { |
745 | if (std::optional<NamedAttribute> attr = |
746 | argumentAttrs.getNamed(LLVM::LLVMDialect::getByValAttrName())) { |
747 | Type elementType = cast<TypeAttr>(attr->getValue()).getValue(); |
748 | uint64_t requestedAlignment = 1; |
749 | if (std::optional<NamedAttribute> alignAttr = |
750 | argumentAttrs.getNamed(LLVM::LLVMDialect::getAlignAttrName())) { |
751 | requestedAlignment = cast<IntegerAttr>(alignAttr->getValue()) |
752 | .getValue() |
753 | .getLimitedValue(); |
754 | } |
755 | return handleByValArgument(builder, callable, argument, elementType, |
756 | requestedAlignment); |
757 | } |
758 | if ([[maybe_unused]] std::optional<NamedAttribute> attr = |
759 | argumentAttrs.getNamed(LLVM::LLVMDialect::getNoAliasAttrName())) { |
760 | if (argument.use_empty()) |
761 | return argument; |
762 | |
763 | // This code is essentially a workaround for deficiencies in the |
764 | // inliner interface: We need to transform operations *after* inlined |
765 | // based on the argument attributes of the parameters *before* inlining. |
766 | // This method runs prior to actual inlining and thus cannot transform the |
767 | // post-inlining code, while `processInlinedCallBlocks` does not have |
768 | // access to pre-inlining function arguments. Additionally, it is required |
769 | // to distinguish which parameter an SSA value originally came from. |
770 | // As a workaround until this is changed: Create an ssa.copy intrinsic |
771 | // with the noalias attribute that can easily be found, and is extremely |
772 | // unlikely to exist in the code prior to inlining, using this to |
773 | // communicate between this method and `processInlinedCallBlocks`. |
774 | // TODO: Fix this by refactoring the inliner interface. |
775 | auto copyOp = builder.create<LLVM::SSACopyOp>(call->getLoc(), argument); |
776 | copyOp->setDiscardableAttr( |
777 | builder.getStringAttr(LLVM::LLVMDialect::getNoAliasAttrName()), |
778 | builder.getUnitAttr()); |
779 | return copyOp; |
780 | } |
781 | return argument; |
782 | } |
783 | |
784 | void processInlinedCallBlocks( |
785 | Operation *call, |
786 | iterator_range<Region::iterator> inlinedBlocks) const override { |
787 | handleInlinedAllocas(call, inlinedBlocks); |
788 | handleAliasScopes(call, inlinedBlocks); |
789 | handleAccessGroups(call, inlinedBlocks); |
790 | } |
791 | |
792 | // Keeping this (immutable) state on the interface allows us to look up |
793 | // StringAttrs instead of looking up strings, since StringAttrs are bound to |
794 | // the current context and thus cannot be initialized as static fields. |
795 | const DenseSet<StringAttr> disallowedFunctionAttrs; |
796 | }; |
797 | |
798 | } // end anonymous namespace |
799 | |
800 | void LLVM::detail::addLLVMInlinerInterface(LLVM::LLVMDialect *dialect) { |
801 | dialect->addInterfaces<LLVMInlinerInterface>(); |
802 | } |
803 | |