1//===- LLVMInlining.cpp - LLVM inlining interface and logic -----*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// Logic for inlining LLVM functions and the definition of the
10// LLVMInliningInterface.
11//
12//===----------------------------------------------------------------------===//
13
14#include "LLVMInlining.h"
15#include "mlir/Dialect/LLVMIR/LLVMDialect.h"
16#include "mlir/IR/Matchers.h"
17#include "mlir/Interfaces/DataLayoutInterfaces.h"
18#include "mlir/Transforms/InliningUtils.h"
19#include "llvm/ADT/ScopeExit.h"
20#include "llvm/Support/Debug.h"
21
22#define DEBUG_TYPE "llvm-inliner"
23
24using namespace mlir;
25
26/// Check whether the given alloca is an input to a lifetime intrinsic,
27/// optionally passing through one or more casts on the way. This is not
28/// transitive through block arguments.
29static bool hasLifetimeMarkers(LLVM::AllocaOp allocaOp) {
30 SmallVector<Operation *> stack(allocaOp->getUsers().begin(),
31 allocaOp->getUsers().end());
32 while (!stack.empty()) {
33 Operation *op = stack.pop_back_val();
34 if (isa<LLVM::LifetimeStartOp, LLVM::LifetimeEndOp>(op))
35 return true;
36 if (isa<LLVM::BitcastOp>(op))
37 stack.append(in_start: op->getUsers().begin(), in_end: op->getUsers().end());
38 }
39 return false;
40}
41
42/// Handles alloca operations in the inlined blocks:
43/// - Moves all alloca operations with a constant size in the former entry block
44/// of the callee into the entry block of the caller, so they become part of
45/// the function prologue/epilogue during code generation.
46/// - Inserts lifetime intrinsics that limit the scope of inlined static allocas
47/// to the inlined blocks.
48/// - Inserts StackSave and StackRestore operations if dynamic allocas were
49/// inlined.
50static void
51handleInlinedAllocas(Operation *call,
52 iterator_range<Region::iterator> inlinedBlocks) {
53 // Locate the entry block of the closest callsite ancestor that has either the
54 // IsolatedFromAbove or AutomaticAllocationScope trait. In pure LLVM dialect
55 // programs, this is the LLVMFuncOp containing the call site. However, in
56 // mixed-dialect programs, the callsite might be nested in another operation
57 // that carries one of these traits. In such scenarios, this traversal stops
58 // at the closest ancestor with either trait, ensuring visibility post
59 // relocation and respecting allocation scopes.
60 Block *callerEntryBlock = nullptr;
61 Operation *currentOp = call;
62 while (Operation *parentOp = currentOp->getParentOp()) {
63 if (parentOp->mightHaveTrait<OpTrait::IsIsolatedFromAbove>() ||
64 parentOp->mightHaveTrait<OpTrait::AutomaticAllocationScope>()) {
65 callerEntryBlock = &currentOp->getParentRegion()->front();
66 break;
67 }
68 currentOp = parentOp;
69 }
70
71 // Avoid relocating the alloca operations if the call has been inlined into
72 // the entry block already, which is typically the encompassing
73 // LLVM function, or if the relevant entry block cannot be identified.
74 Block *calleeEntryBlock = &(*inlinedBlocks.begin());
75 if (!callerEntryBlock || callerEntryBlock == calleeEntryBlock)
76 return;
77
78 SmallVector<std::tuple<LLVM::AllocaOp, IntegerAttr, bool>> allocasToMove;
79 bool shouldInsertLifetimes = false;
80 bool hasDynamicAlloca = false;
81 // Conservatively only move static alloca operations that are part of the
82 // entry block and do not inspect nested regions, since they may execute
83 // conditionally or have other unknown semantics.
84 for (auto allocaOp : calleeEntryBlock->getOps<LLVM::AllocaOp>()) {
85 IntegerAttr arraySize;
86 if (!matchPattern(allocaOp.getArraySize(), m_Constant(&arraySize))) {
87 hasDynamicAlloca = true;
88 continue;
89 }
90 bool shouldInsertLifetime =
91 arraySize.getValue() != 0 && !hasLifetimeMarkers(allocaOp);
92 shouldInsertLifetimes |= shouldInsertLifetime;
93 allocasToMove.emplace_back(allocaOp, arraySize, shouldInsertLifetime);
94 }
95 // Check the remaining inlined blocks for dynamic allocas as well.
96 for (Block &block : llvm::drop_begin(RangeOrContainer&: inlinedBlocks)) {
97 if (hasDynamicAlloca)
98 break;
99 hasDynamicAlloca =
100 llvm::any_of(block.getOps<LLVM::AllocaOp>(), [](auto allocaOp) {
101 return !matchPattern(allocaOp.getArraySize(), m_Constant());
102 });
103 }
104 if (allocasToMove.empty() && !hasDynamicAlloca)
105 return;
106 OpBuilder builder(calleeEntryBlock, calleeEntryBlock->begin());
107 Value stackPtr;
108 if (hasDynamicAlloca) {
109 // This may result in multiple stacksave/stackrestore intrinsics in the same
110 // scope if some are already present in the body of the caller. This is not
111 // invalid IR, but LLVM cleans these up in InstCombineCalls.cpp, along with
112 // other cases where the stacksave/stackrestore is redundant.
113 stackPtr = builder.create<LLVM::StackSaveOp>(
114 call->getLoc(), LLVM::LLVMPointerType::get(call->getContext()));
115 }
116 builder.setInsertionPoint(block: callerEntryBlock, insertPoint: callerEntryBlock->begin());
117 for (auto &[allocaOp, arraySize, shouldInsertLifetime] : allocasToMove) {
118 auto newConstant = builder.create<LLVM::ConstantOp>(
119 allocaOp->getLoc(), allocaOp.getArraySize().getType(), arraySize);
120 // Insert a lifetime start intrinsic where the alloca was before moving it.
121 if (shouldInsertLifetime) {
122 OpBuilder::InsertionGuard insertionGuard(builder);
123 builder.setInsertionPoint(allocaOp);
124 builder.create<LLVM::LifetimeStartOp>(
125 allocaOp.getLoc(), arraySize.getValue().getLimitedValue(),
126 allocaOp.getResult());
127 }
128 allocaOp->moveAfter(newConstant);
129 allocaOp.getArraySizeMutable().assign(newConstant.getResult());
130 }
131 if (!shouldInsertLifetimes && !hasDynamicAlloca)
132 return;
133 // Insert a lifetime end intrinsic before each return in the callee function.
134 for (Block &block : inlinedBlocks) {
135 if (!block.getTerminator()->hasTrait<OpTrait::ReturnLike>())
136 continue;
137 builder.setInsertionPoint(block.getTerminator());
138 if (hasDynamicAlloca)
139 builder.create<LLVM::StackRestoreOp>(call->getLoc(), stackPtr);
140 for (auto &[allocaOp, arraySize, shouldInsertLifetime] : allocasToMove) {
141 if (shouldInsertLifetime)
142 builder.create<LLVM::LifetimeEndOp>(
143 allocaOp.getLoc(), arraySize.getValue().getLimitedValue(),
144 allocaOp.getResult());
145 }
146 }
147}
148
149/// Maps all alias scopes in the inlined operations to deep clones of the scopes
150/// and domain. This is required for code such as `foo(a, b); foo(a2, b2);` to
151/// not incorrectly return `noalias` for e.g. operations on `a` and `a2`.
152static void
153deepCloneAliasScopes(iterator_range<Region::iterator> inlinedBlocks) {
154 DenseMap<Attribute, Attribute> mapping;
155
156 // Register handles in the walker to create the deep clones.
157 // The walker ensures that an attribute is only ever walked once and does a
158 // post-order walk, ensuring the domain is visited prior to the scope.
159 AttrTypeWalker walker;
160
161 // Perform the deep clones while visiting. Builders create a distinct
162 // attribute to make sure that new instances are always created by the
163 // uniquer.
164 walker.addWalk(callback: [&](LLVM::AliasScopeDomainAttr domainAttr) {
165 mapping[domainAttr] = LLVM::AliasScopeDomainAttr::get(
166 domainAttr.getContext(), domainAttr.getDescription());
167 });
168
169 walker.addWalk(callback: [&](LLVM::AliasScopeAttr scopeAttr) {
170 mapping[scopeAttr] = LLVM::AliasScopeAttr::get(
171 cast<LLVM::AliasScopeDomainAttr>(mapping.lookup(scopeAttr.getDomain())),
172 scopeAttr.getDescription());
173 });
174
175 // Map an array of scopes to an array of deep clones.
176 auto convertScopeList = [&](ArrayAttr arrayAttr) -> ArrayAttr {
177 if (!arrayAttr)
178 return nullptr;
179
180 // Create the deep clones if necessary.
181 walker.walk(arrayAttr);
182
183 return ArrayAttr::get(arrayAttr.getContext(),
184 llvm::map_to_vector(arrayAttr, [&](Attribute attr) {
185 return mapping.lookup(attr);
186 }));
187 };
188
189 for (Block &block : inlinedBlocks) {
190 for (Operation &op : block) {
191 if (auto aliasInterface = dyn_cast<LLVM::AliasAnalysisOpInterface>(op)) {
192 aliasInterface.setAliasScopes(
193 convertScopeList(aliasInterface.getAliasScopesOrNull()));
194 aliasInterface.setNoAliasScopes(
195 convertScopeList(aliasInterface.getNoAliasScopesOrNull()));
196 }
197
198 if (auto noAliasScope = dyn_cast<LLVM::NoAliasScopeDeclOp>(op)) {
199 // Create the deep clones if necessary.
200 walker.walk(noAliasScope.getScopeAttr());
201
202 noAliasScope.setScopeAttr(cast<LLVM::AliasScopeAttr>(
203 mapping.lookup(noAliasScope.getScopeAttr())));
204 }
205 }
206 }
207}
208
209/// Creates a new ArrayAttr by concatenating `lhs` with `rhs`.
210/// Returns null if both parameters are null. If only one attribute is null,
211/// return the other.
212static ArrayAttr concatArrayAttr(ArrayAttr lhs, ArrayAttr rhs) {
213 if (!lhs)
214 return rhs;
215 if (!rhs)
216 return lhs;
217
218 SmallVector<Attribute> result;
219 llvm::append_range(result, lhs);
220 llvm::append_range(result, rhs);
221 return ArrayAttr::get(lhs.getContext(), result);
222}
223
224/// Attempts to return the underlying pointer value that `pointerValue` is based
225/// on. This traverses down the chain of operations to the last operation
226/// producing the base pointer and returns it. If it encounters an operation it
227/// cannot further traverse through, returns the operation's result.
228static Value getUnderlyingObject(Value pointerValue) {
229 while (true) {
230 if (auto gepOp = pointerValue.getDefiningOp<LLVM::GEPOp>()) {
231 pointerValue = gepOp.getBase();
232 continue;
233 }
234
235 if (auto addrCast = pointerValue.getDefiningOp<LLVM::AddrSpaceCastOp>()) {
236 pointerValue = addrCast.getOperand();
237 continue;
238 }
239
240 break;
241 }
242
243 return pointerValue;
244}
245
246/// Attempts to return the set of all underlying pointer values that
247/// `pointerValue` is based on. This function traverses through select
248/// operations and block arguments unlike getUnderlyingObject.
249static SmallVector<Value> getUnderlyingObjectSet(Value pointerValue) {
250 SmallVector<Value> result;
251
252 SmallVector<Value> workList{pointerValue};
253 // Avoid dataflow loops.
254 SmallPtrSet<Value, 4> seen;
255 do {
256 Value current = workList.pop_back_val();
257 current = getUnderlyingObject(pointerValue: current);
258
259 if (!seen.insert(Ptr: current).second)
260 continue;
261
262 if (auto selectOp = current.getDefiningOp<LLVM::SelectOp>()) {
263 workList.push_back(Elt: selectOp.getTrueValue());
264 workList.push_back(Elt: selectOp.getFalseValue());
265 continue;
266 }
267
268 if (auto blockArg = dyn_cast<BlockArgument>(Val&: current)) {
269 Block *parentBlock = blockArg.getParentBlock();
270
271 // Attempt to find all block argument operands for every predecessor.
272 // If any operand to the block argument wasn't found in a predecessor,
273 // conservatively add the block argument to the result set.
274 SmallVector<Value> operands;
275 bool anyUnknown = false;
276 for (auto iter = parentBlock->pred_begin();
277 iter != parentBlock->pred_end(); iter++) {
278 auto branch = dyn_cast<BranchOpInterface>((*iter)->getTerminator());
279 if (!branch) {
280 result.push_back(Elt: blockArg);
281 anyUnknown = true;
282 break;
283 }
284
285 Value operand = branch.getSuccessorOperands(
286 iter.getSuccessorIndex())[blockArg.getArgNumber()];
287 if (!operand) {
288 result.push_back(Elt: blockArg);
289 anyUnknown = true;
290 break;
291 }
292
293 operands.push_back(Elt: operand);
294 }
295
296 if (!anyUnknown)
297 llvm::append_range(C&: workList, R&: operands);
298
299 continue;
300 }
301
302 result.push_back(Elt: current);
303 } while (!workList.empty());
304
305 return result;
306}
307
308/// Creates a new AliasScopeAttr for every noalias parameter and attaches it to
309/// the appropriate inlined memory operations in an attempt to preserve the
310/// original semantics of the parameter attribute.
311static void createNewAliasScopesFromNoAliasParameter(
312 Operation *call, iterator_range<Region::iterator> inlinedBlocks) {
313
314 // First collect all noalias parameters. These have been specially marked by
315 // the `handleArgument` implementation by using the `ssa.copy` intrinsic and
316 // attaching a `noalias` attribute to it.
317 // These are only meant to be temporary and should therefore be deleted after
318 // we're done using them here.
319 SetVector<LLVM::SSACopyOp> noAliasParams;
320 for (Value argument : cast<LLVM::CallOp>(call).getArgOperands()) {
321 for (Operation *user : argument.getUsers()) {
322 auto ssaCopy = llvm::dyn_cast<LLVM::SSACopyOp>(user);
323 if (!ssaCopy)
324 continue;
325 if (!ssaCopy->hasAttr(LLVM::LLVMDialect::getNoAliasAttrName()))
326 continue;
327
328 noAliasParams.insert(ssaCopy);
329 }
330 }
331
332 // If there were none, we have nothing to do here.
333 if (noAliasParams.empty())
334 return;
335
336 // Scope exit block to make it impossible to forget to get rid of the
337 // intrinsics.
338 auto exit = llvm::make_scope_exit(F: [&] {
339 for (LLVM::SSACopyOp ssaCopyOp : noAliasParams) {
340 ssaCopyOp.replaceAllUsesWith(ssaCopyOp.getOperand());
341 ssaCopyOp->erase();
342 }
343 });
344
345 // Create a new domain for this specific inlining and a new scope for every
346 // noalias parameter.
347 auto functionDomain = LLVM::AliasScopeDomainAttr::get(
348 call->getContext(), cast<LLVM::CallOp>(call).getCalleeAttr().getAttr());
349 DenseMap<Value, LLVM::AliasScopeAttr> pointerScopes;
350 for (LLVM::SSACopyOp copyOp : noAliasParams) {
351 auto scope = LLVM::AliasScopeAttr::get(functionDomain);
352 pointerScopes[copyOp] = scope;
353
354 OpBuilder(call).create<LLVM::NoAliasScopeDeclOp>(call->getLoc(), scope);
355 }
356
357 // Go through every instruction and attempt to find which noalias parameters
358 // it is definitely based on and definitely not based on.
359 for (Block &inlinedBlock : inlinedBlocks) {
360 for (auto aliasInterface :
361 inlinedBlock.getOps<LLVM::AliasAnalysisOpInterface>()) {
362
363 // Collect the pointer arguments affected by the alias scopes.
364 SmallVector<Value> pointerArgs = aliasInterface.getAccessedOperands();
365
366 // Find the set of underlying pointers that this pointer is based on.
367 SmallPtrSet<Value, 4> basedOnPointers;
368 for (Value pointer : pointerArgs)
369 llvm::copy(getUnderlyingObjectSet(pointer),
370 std::inserter(basedOnPointers, basedOnPointers.begin()));
371
372 bool aliasesOtherKnownObject = false;
373 // Go through the based on pointers and check that they are either:
374 // * Constants that can be ignored (undef, poison, null pointer).
375 // * Based on a noalias parameter.
376 // * Other pointers that we know can't alias with our noalias parameter.
377 //
378 // Any other value might be a pointer based on any noalias parameter that
379 // hasn't been identified. In that case conservatively don't add any
380 // scopes to this operation indicating either aliasing or not aliasing
381 // with any parameter.
382 if (llvm::any_of(basedOnPointers, [&](Value object) {
383 if (matchPattern(object, m_Constant()))
384 return false;
385
386 if (noAliasParams.contains(object.getDefiningOp<LLVM::SSACopyOp>()))
387 return false;
388
389 // TODO: This should include other arguments from the inlined
390 // callable.
391 if (isa_and_nonnull<LLVM::AllocaOp, LLVM::AddressOfOp>(
392 object.getDefiningOp())) {
393 aliasesOtherKnownObject = true;
394 return false;
395 }
396 return true;
397 }))
398 continue;
399
400 // Add all noalias parameter scopes to the noalias scope list that we are
401 // not based on.
402 SmallVector<Attribute> noAliasScopes;
403 for (LLVM::SSACopyOp noAlias : noAliasParams) {
404 if (basedOnPointers.contains(noAlias))
405 continue;
406
407 noAliasScopes.push_back(pointerScopes[noAlias]);
408 }
409
410 if (!noAliasScopes.empty())
411 aliasInterface.setNoAliasScopes(
412 concatArrayAttr(aliasInterface.getNoAliasScopesOrNull(),
413 ArrayAttr::get(call->getContext(), noAliasScopes)));
414
415 // Don't add alias scopes to call operations or operations that might
416 // operate on pointers not based on any noalias parameter.
417 // Since we add all scopes to an operation's noalias list that it
418 // definitely doesn't alias, we mustn't do the same for the alias.scope
419 // list if other objects are involved.
420 //
421 // Consider the following case:
422 // %0 = llvm.alloca
423 // %1 = select %magic, %0, %noalias_param
424 // store 5, %1 (1) noalias=[scope(...)]
425 // ...
426 // store 3, %0 (2) noalias=[scope(noalias_param), scope(...)]
427 //
428 // We can add the scopes of any noalias parameters that aren't
429 // noalias_param's scope to (1) and add all of them to (2). We mustn't add
430 // the scope of noalias_param to the alias.scope list of (1) since
431 // that would mean (2) cannot alias with (1) which is wrong since both may
432 // store to %0.
433 //
434 // In conclusion, only add scopes to the alias.scope list if all pointers
435 // have a corresponding scope.
436 // Call operations are included in this list since we do not know whether
437 // the callee accesses any memory besides the ones passed as its
438 // arguments.
439 if (aliasesOtherKnownObject ||
440 isa<LLVM::CallOp>(aliasInterface.getOperation()))
441 continue;
442
443 SmallVector<Attribute> aliasScopes;
444 for (LLVM::SSACopyOp noAlias : noAliasParams)
445 if (basedOnPointers.contains(noAlias))
446 aliasScopes.push_back(pointerScopes[noAlias]);
447
448 if (!aliasScopes.empty())
449 aliasInterface.setAliasScopes(
450 concatArrayAttr(aliasInterface.getAliasScopesOrNull(),
451 ArrayAttr::get(call->getContext(), aliasScopes)));
452 }
453 }
454}
455
456/// Appends any alias scopes of the call operation to any inlined memory
457/// operation.
458static void
459appendCallOpAliasScopes(Operation *call,
460 iterator_range<Region::iterator> inlinedBlocks) {
461 auto callAliasInterface = dyn_cast<LLVM::AliasAnalysisOpInterface>(call);
462 if (!callAliasInterface)
463 return;
464
465 ArrayAttr aliasScopes = callAliasInterface.getAliasScopesOrNull();
466 ArrayAttr noAliasScopes = callAliasInterface.getNoAliasScopesOrNull();
467 // If the call has neither alias scopes or noalias scopes we have nothing to
468 // do here.
469 if (!aliasScopes && !noAliasScopes)
470 return;
471
472 // Simply append the call op's alias and noalias scopes to any operation
473 // implementing AliasAnalysisOpInterface.
474 for (Block &block : inlinedBlocks) {
475 for (auto aliasInterface : block.getOps<LLVM::AliasAnalysisOpInterface>()) {
476 if (aliasScopes)
477 aliasInterface.setAliasScopes(concatArrayAttr(
478 aliasInterface.getAliasScopesOrNull(), aliasScopes));
479
480 if (noAliasScopes)
481 aliasInterface.setNoAliasScopes(concatArrayAttr(
482 aliasInterface.getNoAliasScopesOrNull(), noAliasScopes));
483 }
484 }
485}
486
487/// Handles all interactions with alias scopes during inlining.
488static void handleAliasScopes(Operation *call,
489 iterator_range<Region::iterator> inlinedBlocks) {
490 deepCloneAliasScopes(inlinedBlocks);
491 createNewAliasScopesFromNoAliasParameter(call, inlinedBlocks);
492 appendCallOpAliasScopes(call, inlinedBlocks);
493}
494
495/// Appends any access groups of the call operation to any inlined memory
496/// operation.
497static void handleAccessGroups(Operation *call,
498 iterator_range<Region::iterator> inlinedBlocks) {
499 auto callAccessGroupInterface = dyn_cast<LLVM::AccessGroupOpInterface>(call);
500 if (!callAccessGroupInterface)
501 return;
502
503 auto accessGroups = callAccessGroupInterface.getAccessGroupsOrNull();
504 if (!accessGroups)
505 return;
506
507 // Simply append the call op's access groups to any operation implementing
508 // AccessGroupOpInterface.
509 for (Block &block : inlinedBlocks)
510 for (auto accessGroupOpInterface :
511 block.getOps<LLVM::AccessGroupOpInterface>())
512 accessGroupOpInterface.setAccessGroups(concatArrayAttr(
513 accessGroupOpInterface.getAccessGroupsOrNull(), accessGroups));
514}
515
516/// If `requestedAlignment` is higher than the alignment specified on `alloca`,
517/// realigns `alloca` if this does not exceed the natural stack alignment.
518/// Returns the post-alignment of `alloca`, whether it was realigned or not.
519static uint64_t tryToEnforceAllocaAlignment(LLVM::AllocaOp alloca,
520 uint64_t requestedAlignment,
521 DataLayout const &dataLayout) {
522 uint64_t allocaAlignment = alloca.getAlignment().value_or(1);
523 if (requestedAlignment <= allocaAlignment)
524 // No realignment necessary.
525 return allocaAlignment;
526 uint64_t naturalStackAlignmentBits = dataLayout.getStackAlignment();
527 // If the natural stack alignment is not specified, the data layout returns
528 // zero. Optimistically allow realignment in this case.
529 if (naturalStackAlignmentBits == 0 ||
530 // If the requested alignment exceeds the natural stack alignment, this
531 // will trigger a dynamic stack realignment, so we prefer to copy...
532 8 * requestedAlignment <= naturalStackAlignmentBits ||
533 // ...unless the alloca already triggers dynamic stack realignment. Then
534 // we might as well further increase the alignment to avoid a copy.
535 8 * allocaAlignment > naturalStackAlignmentBits) {
536 alloca.setAlignment(requestedAlignment);
537 allocaAlignment = requestedAlignment;
538 }
539 return allocaAlignment;
540}
541
542/// Tries to find and return the alignment of the pointer `value` by looking for
543/// an alignment attribute on the defining allocation op or function argument.
544/// If the found alignment is lower than `requestedAlignment`, tries to realign
545/// the pointer, then returns the resulting post-alignment, regardless of
546/// whether it was realigned or not. If no existing alignment attribute is
547/// found, returns 1 (i.e., assume that no alignment is guaranteed).
548static uint64_t tryToEnforceAlignment(Value value, uint64_t requestedAlignment,
549 DataLayout const &dataLayout) {
550 if (Operation *definingOp = value.getDefiningOp()) {
551 if (auto alloca = dyn_cast<LLVM::AllocaOp>(definingOp))
552 return tryToEnforceAllocaAlignment(alloca, requestedAlignment,
553 dataLayout);
554 if (auto addressOf = dyn_cast<LLVM::AddressOfOp>(definingOp))
555 if (auto global = SymbolTable::lookupNearestSymbolFrom<LLVM::GlobalOp>(
556 definingOp, addressOf.getGlobalNameAttr()))
557 return global.getAlignment().value_or(1);
558 // We don't currently handle this operation; assume no alignment.
559 return 1;
560 }
561 // Since there is no defining op, this is a block argument. Probably this
562 // comes directly from a function argument, so check that this is the case.
563 Operation *parentOp = value.getParentBlock()->getParentOp();
564 if (auto func = dyn_cast<LLVM::LLVMFuncOp>(parentOp)) {
565 // Use the alignment attribute set for this argument in the parent function
566 // if it has been set.
567 auto blockArg = llvm::cast<BlockArgument>(Val&: value);
568 if (Attribute alignAttr = func.getArgAttr(
569 blockArg.getArgNumber(), LLVM::LLVMDialect::getAlignAttrName()))
570 return cast<IntegerAttr>(alignAttr).getValue().getLimitedValue();
571 }
572 // We didn't find anything useful; assume no alignment.
573 return 1;
574}
575
576/// Introduces a new alloca and copies the memory pointed to by `argument` to
577/// the address of the new alloca, then returns the value of the new alloca.
578static Value handleByValArgumentInit(OpBuilder &builder, Location loc,
579 Value argument, Type elementType,
580 uint64_t elementTypeSize,
581 uint64_t targetAlignment) {
582 // Allocate the new value on the stack.
583 Value allocaOp;
584 {
585 // Since this is a static alloca, we can put it directly in the entry block,
586 // so they can be absorbed into the prologue/epilogue at code generation.
587 OpBuilder::InsertionGuard insertionGuard(builder);
588 Block *entryBlock = &(*argument.getParentRegion()->begin());
589 builder.setInsertionPointToStart(entryBlock);
590 Value one = builder.create<LLVM::ConstantOp>(loc, builder.getI64Type(),
591 builder.getI64IntegerAttr(1));
592 allocaOp = builder.create<LLVM::AllocaOp>(
593 loc, argument.getType(), elementType, one, targetAlignment);
594 }
595 // Copy the pointee to the newly allocated value.
596 Value copySize = builder.create<LLVM::ConstantOp>(
597 loc, builder.getI64Type(), builder.getI64IntegerAttr(elementTypeSize));
598 builder.create<LLVM::MemcpyOp>(loc, allocaOp, argument, copySize,
599 /*isVolatile=*/false);
600 return allocaOp;
601}
602
603/// Handles a function argument marked with the byval attribute by introducing a
604/// memcpy or realigning the defining operation, if required either due to the
605/// pointee being writeable in the callee, and/or due to an alignment mismatch.
606/// `requestedAlignment` specifies the alignment set in the "align" argument
607/// attribute (or 1 if no align attribute was set).
608static Value handleByValArgument(OpBuilder &builder, Operation *callable,
609 Value argument, Type elementType,
610 uint64_t requestedAlignment) {
611 auto func = cast<LLVM::LLVMFuncOp>(callable);
612 LLVM::MemoryEffectsAttr memoryEffects = func.getMemoryAttr();
613 // If there is no memory effects attribute, assume that the function is
614 // not read-only.
615 bool isReadOnly = memoryEffects &&
616 memoryEffects.getArgMem() != LLVM::ModRefInfo::ModRef &&
617 memoryEffects.getArgMem() != LLVM::ModRefInfo::Mod;
618 // Check if there's an alignment mismatch requiring us to copy.
619 DataLayout dataLayout = DataLayout::closest(op: callable);
620 uint64_t minimumAlignment = dataLayout.getTypeABIAlignment(t: elementType);
621 if (isReadOnly) {
622 if (requestedAlignment <= minimumAlignment)
623 return argument;
624 uint64_t currentAlignment =
625 tryToEnforceAlignment(value: argument, requestedAlignment, dataLayout);
626 if (currentAlignment >= requestedAlignment)
627 return argument;
628 }
629 uint64_t targetAlignment = std::max(a: requestedAlignment, b: minimumAlignment);
630 return handleByValArgumentInit(builder, func.getLoc(), argument, elementType,
631 dataLayout.getTypeSize(t: elementType),
632 targetAlignment);
633}
634
635namespace {
636struct LLVMInlinerInterface : public DialectInlinerInterface {
637 using DialectInlinerInterface::DialectInlinerInterface;
638
639 LLVMInlinerInterface(Dialect *dialect)
640 : DialectInlinerInterface(dialect),
641 // Cache set of StringAttrs for fast lookup in `isLegalToInline`.
642 disallowedFunctionAttrs({
643 StringAttr::get(dialect->getContext(), "noduplicate"),
644 StringAttr::get(dialect->getContext(), "noinline"),
645 StringAttr::get(dialect->getContext(), "optnone"),
646 StringAttr::get(dialect->getContext(), "presplitcoroutine"),
647 StringAttr::get(dialect->getContext(), "returns_twice"),
648 StringAttr::get(dialect->getContext(), "strictfp"),
649 }) {}
650
651 bool isLegalToInline(Operation *call, Operation *callable,
652 bool wouldBeCloned) const final {
653 if (!wouldBeCloned)
654 return false;
655 if (!isa<LLVM::CallOp>(call)) {
656 LLVM_DEBUG(llvm::dbgs()
657 << "Cannot inline: call is not an LLVM::CallOp\n");
658 return false;
659 }
660 auto funcOp = dyn_cast<LLVM::LLVMFuncOp>(callable);
661 if (!funcOp) {
662 LLVM_DEBUG(llvm::dbgs()
663 << "Cannot inline: callable is not an LLVM::LLVMFuncOp\n");
664 return false;
665 }
666 if (funcOp.isVarArg()) {
667 LLVM_DEBUG(llvm::dbgs() << "Cannot inline: callable is variadic\n");
668 return false;
669 }
670 // TODO: Generate aliasing metadata from noalias argument/result attributes.
671 if (auto attrs = funcOp.getArgAttrs()) {
672 for (DictionaryAttr attrDict : attrs->getAsRange<DictionaryAttr>()) {
673 if (attrDict.contains(LLVM::LLVMDialect::getInAllocaAttrName())) {
674 LLVM_DEBUG(llvm::dbgs() << "Cannot inline " << funcOp.getSymName()
675 << ": inalloca arguments not supported\n");
676 return false;
677 }
678 }
679 }
680 // TODO: Handle exceptions.
681 if (funcOp.getPersonality()) {
682 LLVM_DEBUG(llvm::dbgs() << "Cannot inline " << funcOp.getSymName()
683 << ": unhandled function personality\n");
684 return false;
685 }
686 if (funcOp.getPassthrough()) {
687 // TODO: Used attributes should not be passthrough.
688 if (llvm::any_of(*funcOp.getPassthrough(), [&](Attribute attr) {
689 auto stringAttr = dyn_cast<StringAttr>(attr);
690 if (!stringAttr)
691 return false;
692 if (disallowedFunctionAttrs.contains(V: stringAttr)) {
693 LLVM_DEBUG(llvm::dbgs()
694 << "Cannot inline " << funcOp.getSymName()
695 << ": found disallowed function attribute "
696 << stringAttr << "\n");
697 return true;
698 }
699 return false;
700 }))
701 return false;
702 }
703 return true;
704 }
705
706 bool isLegalToInline(Region *, Region *, bool, IRMapping &) const final {
707 return true;
708 }
709
710 bool isLegalToInline(Operation *op, Region *, bool, IRMapping &) const final {
711 // The inliner cannot handle variadic function arguments.
712 return !isa<LLVM::VaStartOp>(op);
713 }
714
715 /// Handle the given inlined return by replacing it with a branch. This
716 /// overload is called when the inlined region has more than one block.
717 void handleTerminator(Operation *op, Block *newDest) const final {
718 // Only return needs to be handled here.
719 auto returnOp = dyn_cast<LLVM::ReturnOp>(op);
720 if (!returnOp)
721 return;
722
723 // Replace the return with a branch to the dest.
724 OpBuilder builder(op);
725 builder.create<LLVM::BrOp>(op->getLoc(), returnOp.getOperands(), newDest);
726 op->erase();
727 }
728
729 /// Handle the given inlined return by replacing the uses of the call with the
730 /// operands of the return. This overload is called when the inlined region
731 /// only contains one block.
732 void handleTerminator(Operation *op, ValueRange valuesToRepl) const final {
733 // Return will be the only terminator present.
734 auto returnOp = cast<LLVM::ReturnOp>(op);
735
736 // Replace the values directly with the return operands.
737 assert(returnOp.getNumOperands() == valuesToRepl.size());
738 for (auto [dst, src] : llvm::zip(valuesToRepl, returnOp.getOperands()))
739 dst.replaceAllUsesWith(src);
740 }
741
742 Value handleArgument(OpBuilder &builder, Operation *call, Operation *callable,
743 Value argument,
744 DictionaryAttr argumentAttrs) const final {
745 if (std::optional<NamedAttribute> attr =
746 argumentAttrs.getNamed(LLVM::LLVMDialect::getByValAttrName())) {
747 Type elementType = cast<TypeAttr>(attr->getValue()).getValue();
748 uint64_t requestedAlignment = 1;
749 if (std::optional<NamedAttribute> alignAttr =
750 argumentAttrs.getNamed(LLVM::LLVMDialect::getAlignAttrName())) {
751 requestedAlignment = cast<IntegerAttr>(alignAttr->getValue())
752 .getValue()
753 .getLimitedValue();
754 }
755 return handleByValArgument(builder, callable, argument, elementType,
756 requestedAlignment);
757 }
758 if ([[maybe_unused]] std::optional<NamedAttribute> attr =
759 argumentAttrs.getNamed(LLVM::LLVMDialect::getNoAliasAttrName())) {
760 if (argument.use_empty())
761 return argument;
762
763 // This code is essentially a workaround for deficiencies in the
764 // inliner interface: We need to transform operations *after* inlined
765 // based on the argument attributes of the parameters *before* inlining.
766 // This method runs prior to actual inlining and thus cannot transform the
767 // post-inlining code, while `processInlinedCallBlocks` does not have
768 // access to pre-inlining function arguments. Additionally, it is required
769 // to distinguish which parameter an SSA value originally came from.
770 // As a workaround until this is changed: Create an ssa.copy intrinsic
771 // with the noalias attribute that can easily be found, and is extremely
772 // unlikely to exist in the code prior to inlining, using this to
773 // communicate between this method and `processInlinedCallBlocks`.
774 // TODO: Fix this by refactoring the inliner interface.
775 auto copyOp = builder.create<LLVM::SSACopyOp>(call->getLoc(), argument);
776 copyOp->setDiscardableAttr(
777 builder.getStringAttr(LLVM::LLVMDialect::getNoAliasAttrName()),
778 builder.getUnitAttr());
779 return copyOp;
780 }
781 return argument;
782 }
783
784 void processInlinedCallBlocks(
785 Operation *call,
786 iterator_range<Region::iterator> inlinedBlocks) const override {
787 handleInlinedAllocas(call, inlinedBlocks);
788 handleAliasScopes(call, inlinedBlocks);
789 handleAccessGroups(call, inlinedBlocks);
790 }
791
792 // Keeping this (immutable) state on the interface allows us to look up
793 // StringAttrs instead of looking up strings, since StringAttrs are bound to
794 // the current context and thus cannot be initialized as static fields.
795 const DenseSet<StringAttr> disallowedFunctionAttrs;
796};
797
798} // end anonymous namespace
799
800void LLVM::detail::addLLVMInlinerInterface(LLVM::LLVMDialect *dialect) {
801 dialect->addInterfaces<LLVMInlinerInterface>();
802}
803

source code of mlir/lib/Dialect/LLVMIR/IR/LLVMInlining.cpp