1//===- CoroSplit.cpp - Converts a coroutine into a state machine ----------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8// This pass builds the coroutine frame and outlines resume and destroy parts
9// of the coroutine into separate functions.
10//
11// We present a coroutine to an LLVM as an ordinary function with suspension
12// points marked up with intrinsics. We let the optimizer party on the coroutine
13// as a single function for as long as possible. Shortly before the coroutine is
14// eligible to be inlined into its callers, we split up the coroutine into parts
15// corresponding to an initial, resume and destroy invocations of the coroutine,
16// add them to the current SCC and restart the IPO pipeline to optimize the
17// coroutine subfunctions we extracted before proceeding to the caller of the
18// coroutine.
19//===----------------------------------------------------------------------===//
20
21#include "llvm/Transforms/Coroutines/CoroSplit.h"
22#include "CoroInstr.h"
23#include "CoroInternal.h"
24#include "llvm/ADT/DenseMap.h"
25#include "llvm/ADT/PriorityWorklist.h"
26#include "llvm/ADT/SmallPtrSet.h"
27#include "llvm/ADT/SmallVector.h"
28#include "llvm/ADT/StringRef.h"
29#include "llvm/ADT/Twine.h"
30#include "llvm/Analysis/CFG.h"
31#include "llvm/Analysis/CallGraph.h"
32#include "llvm/Analysis/ConstantFolding.h"
33#include "llvm/Analysis/LazyCallGraph.h"
34#include "llvm/Analysis/OptimizationRemarkEmitter.h"
35#include "llvm/Analysis/TargetTransformInfo.h"
36#include "llvm/BinaryFormat/Dwarf.h"
37#include "llvm/IR/Argument.h"
38#include "llvm/IR/Attributes.h"
39#include "llvm/IR/BasicBlock.h"
40#include "llvm/IR/CFG.h"
41#include "llvm/IR/CallingConv.h"
42#include "llvm/IR/Constants.h"
43#include "llvm/IR/DataLayout.h"
44#include "llvm/IR/DerivedTypes.h"
45#include "llvm/IR/Dominators.h"
46#include "llvm/IR/Function.h"
47#include "llvm/IR/GlobalValue.h"
48#include "llvm/IR/GlobalVariable.h"
49#include "llvm/IR/IRBuilder.h"
50#include "llvm/IR/InstIterator.h"
51#include "llvm/IR/InstrTypes.h"
52#include "llvm/IR/Instruction.h"
53#include "llvm/IR/Instructions.h"
54#include "llvm/IR/IntrinsicInst.h"
55#include "llvm/IR/LLVMContext.h"
56#include "llvm/IR/Module.h"
57#include "llvm/IR/Type.h"
58#include "llvm/IR/Value.h"
59#include "llvm/IR/Verifier.h"
60#include "llvm/Support/Casting.h"
61#include "llvm/Support/Debug.h"
62#include "llvm/Support/PrettyStackTrace.h"
63#include "llvm/Support/raw_ostream.h"
64#include "llvm/Transforms/Scalar.h"
65#include "llvm/Transforms/Utils/BasicBlockUtils.h"
66#include "llvm/Transforms/Utils/CallGraphUpdater.h"
67#include "llvm/Transforms/Utils/Cloning.h"
68#include "llvm/Transforms/Utils/Local.h"
69#include "llvm/Transforms/Utils/ValueMapper.h"
70#include <cassert>
71#include <cstddef>
72#include <cstdint>
73#include <initializer_list>
74#include <iterator>
75
76using namespace llvm;
77
78#define DEBUG_TYPE "coro-split"
79
80namespace {
81
82/// A little helper class for building
83class CoroCloner {
84public:
85 enum class Kind {
86 /// The shared resume function for a switch lowering.
87 SwitchResume,
88
89 /// The shared unwind function for a switch lowering.
90 SwitchUnwind,
91
92 /// The shared cleanup function for a switch lowering.
93 SwitchCleanup,
94
95 /// An individual continuation function.
96 Continuation,
97
98 /// An async resume function.
99 Async,
100 };
101
102private:
103 Function &OrigF;
104 Function *NewF;
105 const Twine &Suffix;
106 coro::Shape &Shape;
107 Kind FKind;
108 ValueToValueMapTy VMap;
109 IRBuilder<> Builder;
110 Value *NewFramePtr = nullptr;
111
112 /// The active suspend instruction; meaningful only for continuation and async
113 /// ABIs.
114 AnyCoroSuspendInst *ActiveSuspend = nullptr;
115
116public:
117 /// Create a cloner for a switch lowering.
118 CoroCloner(Function &OrigF, const Twine &Suffix, coro::Shape &Shape,
119 Kind FKind)
120 : OrigF(OrigF), NewF(nullptr), Suffix(Suffix), Shape(Shape), FKind(FKind),
121 Builder(OrigF.getContext()) {
122 assert(Shape.ABI == coro::ABI::Switch);
123 }
124
125 /// Create a cloner for a continuation lowering.
126 CoroCloner(Function &OrigF, const Twine &Suffix, coro::Shape &Shape,
127 Function *NewF, AnyCoroSuspendInst *ActiveSuspend)
128 : OrigF(OrigF), NewF(NewF), Suffix(Suffix), Shape(Shape),
129 FKind(Shape.ABI == coro::ABI::Async ? Kind::Async : Kind::Continuation),
130 Builder(OrigF.getContext()), ActiveSuspend(ActiveSuspend) {
131 assert(Shape.ABI == coro::ABI::Retcon ||
132 Shape.ABI == coro::ABI::RetconOnce || Shape.ABI == coro::ABI::Async);
133 assert(NewF && "need existing function for continuation");
134 assert(ActiveSuspend && "need active suspend point for continuation");
135 }
136
137 Function *getFunction() const {
138 assert(NewF != nullptr && "declaration not yet set");
139 return NewF;
140 }
141
142 void create();
143
144private:
145 bool isSwitchDestroyFunction() {
146 switch (FKind) {
147 case Kind::Async:
148 case Kind::Continuation:
149 case Kind::SwitchResume:
150 return false;
151 case Kind::SwitchUnwind:
152 case Kind::SwitchCleanup:
153 return true;
154 }
155 llvm_unreachable("Unknown CoroCloner::Kind enum");
156 }
157
158 void replaceEntryBlock();
159 Value *deriveNewFramePointer();
160 void replaceRetconOrAsyncSuspendUses();
161 void replaceCoroSuspends();
162 void replaceCoroEnds();
163 void replaceSwiftErrorOps();
164 void salvageDebugInfo();
165 void handleFinalSuspend();
166};
167
168} // end anonymous namespace
169
170// FIXME:
171// Lower the intrinisc in CoroEarly phase if coroutine frame doesn't escape
172// and it is known that other transformations, for example, sanitizers
173// won't lead to incorrect code.
174static void lowerAwaitSuspend(IRBuilder<> &Builder, CoroAwaitSuspendInst *CB) {
175 auto Wrapper = CB->getWrapperFunction();
176 auto Awaiter = CB->getAwaiter();
177 auto FramePtr = CB->getFrame();
178
179 Builder.SetInsertPoint(CB);
180
181 CallBase *NewCall = nullptr;
182 // await_suspend has only 2 parameters, awaiter and handle.
183 // Copy parameter attributes from the intrinsic call, but remove the last,
184 // because the last parameter now becomes the function that is being called.
185 AttributeList NewAttributes =
186 CB->getAttributes().removeParamAttributes(C&: CB->getContext(), ArgNo: 2);
187
188 if (auto Invoke = dyn_cast<InvokeInst>(Val: CB)) {
189 auto WrapperInvoke =
190 Builder.CreateInvoke(Callee: Wrapper, NormalDest: Invoke->getNormalDest(),
191 UnwindDest: Invoke->getUnwindDest(), Args: {Awaiter, FramePtr});
192
193 WrapperInvoke->setCallingConv(Invoke->getCallingConv());
194 std::copy(first: Invoke->bundle_op_info_begin(), last: Invoke->bundle_op_info_end(),
195 result: WrapperInvoke->bundle_op_info_begin());
196 WrapperInvoke->setAttributes(NewAttributes);
197 WrapperInvoke->setDebugLoc(Invoke->getDebugLoc());
198 NewCall = WrapperInvoke;
199 } else if (auto Call = dyn_cast<CallInst>(Val: CB)) {
200 auto WrapperCall = Builder.CreateCall(Callee: Wrapper, Args: {Awaiter, FramePtr});
201
202 WrapperCall->setAttributes(NewAttributes);
203 WrapperCall->setDebugLoc(Call->getDebugLoc());
204 NewCall = WrapperCall;
205 } else {
206 llvm_unreachable("Unexpected coro_await_suspend invocation method");
207 }
208
209 CB->replaceAllUsesWith(V: NewCall);
210 CB->eraseFromParent();
211}
212
213static void lowerAwaitSuspends(Function &F, coro::Shape &Shape) {
214 IRBuilder<> Builder(F.getContext());
215 for (auto *AWS : Shape.CoroAwaitSuspends)
216 lowerAwaitSuspend(Builder, CB: AWS);
217}
218
219static void maybeFreeRetconStorage(IRBuilder<> &Builder,
220 const coro::Shape &Shape, Value *FramePtr,
221 CallGraph *CG) {
222 assert(Shape.ABI == coro::ABI::Retcon || Shape.ABI == coro::ABI::RetconOnce);
223 if (Shape.RetconLowering.IsFrameInlineInStorage)
224 return;
225
226 Shape.emitDealloc(Builder, Ptr: FramePtr, CG);
227}
228
229/// Replace an llvm.coro.end.async.
230/// Will inline the must tail call function call if there is one.
231/// \returns true if cleanup of the coro.end block is needed, false otherwise.
232static bool replaceCoroEndAsync(AnyCoroEndInst *End) {
233 IRBuilder<> Builder(End);
234
235 auto *EndAsync = dyn_cast<CoroAsyncEndInst>(Val: End);
236 if (!EndAsync) {
237 Builder.CreateRetVoid();
238 return true /*needs cleanup of coro.end block*/;
239 }
240
241 auto *MustTailCallFunc = EndAsync->getMustTailCallFunction();
242 if (!MustTailCallFunc) {
243 Builder.CreateRetVoid();
244 return true /*needs cleanup of coro.end block*/;
245 }
246
247 // Move the must tail call from the predecessor block into the end block.
248 auto *CoroEndBlock = End->getParent();
249 auto *MustTailCallFuncBlock = CoroEndBlock->getSinglePredecessor();
250 assert(MustTailCallFuncBlock && "Must have a single predecessor block");
251 auto It = MustTailCallFuncBlock->getTerminator()->getIterator();
252 auto *MustTailCall = cast<CallInst>(Val: &*std::prev(x: It));
253 CoroEndBlock->splice(ToIt: End->getIterator(), FromBB: MustTailCallFuncBlock,
254 FromIt: MustTailCall->getIterator());
255
256 // Insert the return instruction.
257 Builder.SetInsertPoint(End);
258 Builder.CreateRetVoid();
259 InlineFunctionInfo FnInfo;
260
261 // Remove the rest of the block, by splitting it into an unreachable block.
262 auto *BB = End->getParent();
263 BB->splitBasicBlock(I: End);
264 BB->getTerminator()->eraseFromParent();
265
266 auto InlineRes = InlineFunction(CB&: *MustTailCall, IFI&: FnInfo);
267 assert(InlineRes.isSuccess() && "Expected inlining to succeed");
268 (void)InlineRes;
269
270 // We have cleaned up the coro.end block above.
271 return false;
272}
273
274/// Replace a non-unwind call to llvm.coro.end.
275static void replaceFallthroughCoroEnd(AnyCoroEndInst *End,
276 const coro::Shape &Shape, Value *FramePtr,
277 bool InResume, CallGraph *CG) {
278 // Start inserting right before the coro.end.
279 IRBuilder<> Builder(End);
280
281 // Create the return instruction.
282 switch (Shape.ABI) {
283 // The cloned functions in switch-lowering always return void.
284 case coro::ABI::Switch:
285 assert(!cast<CoroEndInst>(End)->hasResults() &&
286 "switch coroutine should not return any values");
287 // coro.end doesn't immediately end the coroutine in the main function
288 // in this lowering, because we need to deallocate the coroutine.
289 if (!InResume)
290 return;
291 Builder.CreateRetVoid();
292 break;
293
294 // In async lowering this returns.
295 case coro::ABI::Async: {
296 bool CoroEndBlockNeedsCleanup = replaceCoroEndAsync(End);
297 if (!CoroEndBlockNeedsCleanup)
298 return;
299 break;
300 }
301
302 // In unique continuation lowering, the continuations always return void.
303 // But we may have implicitly allocated storage.
304 case coro::ABI::RetconOnce: {
305 maybeFreeRetconStorage(Builder, Shape, FramePtr, CG);
306 auto *CoroEnd = cast<CoroEndInst>(Val: End);
307 auto *RetTy = Shape.getResumeFunctionType()->getReturnType();
308
309 if (!CoroEnd->hasResults()) {
310 assert(RetTy->isVoidTy());
311 Builder.CreateRetVoid();
312 break;
313 }
314
315 auto *CoroResults = CoroEnd->getResults();
316 unsigned NumReturns = CoroResults->numReturns();
317
318 if (auto *RetStructTy = dyn_cast<StructType>(Val: RetTy)) {
319 assert(RetStructTy->getNumElements() == NumReturns &&
320 "numbers of returns should match resume function singature");
321 Value *ReturnValue = UndefValue::get(T: RetStructTy);
322 unsigned Idx = 0;
323 for (Value *RetValEl : CoroResults->return_values())
324 ReturnValue = Builder.CreateInsertValue(Agg: ReturnValue, Val: RetValEl, Idxs: Idx++);
325 Builder.CreateRet(V: ReturnValue);
326 } else if (NumReturns == 0) {
327 assert(RetTy->isVoidTy());
328 Builder.CreateRetVoid();
329 } else {
330 assert(NumReturns == 1);
331 Builder.CreateRet(V: *CoroResults->retval_begin());
332 }
333 CoroResults->replaceAllUsesWith(
334 V: ConstantTokenNone::get(Context&: CoroResults->getContext()));
335 CoroResults->eraseFromParent();
336 break;
337 }
338
339 // In non-unique continuation lowering, we signal completion by returning
340 // a null continuation.
341 case coro::ABI::Retcon: {
342 assert(!cast<CoroEndInst>(End)->hasResults() &&
343 "retcon coroutine should not return any values");
344 maybeFreeRetconStorage(Builder, Shape, FramePtr, CG);
345 auto RetTy = Shape.getResumeFunctionType()->getReturnType();
346 auto RetStructTy = dyn_cast<StructType>(Val: RetTy);
347 PointerType *ContinuationTy =
348 cast<PointerType>(Val: RetStructTy ? RetStructTy->getElementType(N: 0) : RetTy);
349
350 Value *ReturnValue = ConstantPointerNull::get(T: ContinuationTy);
351 if (RetStructTy) {
352 ReturnValue = Builder.CreateInsertValue(Agg: UndefValue::get(T: RetStructTy),
353 Val: ReturnValue, Idxs: 0);
354 }
355 Builder.CreateRet(V: ReturnValue);
356 break;
357 }
358 }
359
360 // Remove the rest of the block, by splitting it into an unreachable block.
361 auto *BB = End->getParent();
362 BB->splitBasicBlock(I: End);
363 BB->getTerminator()->eraseFromParent();
364}
365
366// Mark a coroutine as done, which implies that the coroutine is finished and
367// never get resumed.
368//
369// In resume-switched ABI, the done state is represented by storing zero in
370// ResumeFnAddr.
371//
372// NOTE: We couldn't omit the argument `FramePtr`. It is necessary because the
373// pointer to the frame in splitted function is not stored in `Shape`.
374static void markCoroutineAsDone(IRBuilder<> &Builder, const coro::Shape &Shape,
375 Value *FramePtr) {
376 assert(
377 Shape.ABI == coro::ABI::Switch &&
378 "markCoroutineAsDone is only supported for Switch-Resumed ABI for now.");
379 auto *GepIndex = Builder.CreateStructGEP(
380 Ty: Shape.FrameTy, Ptr: FramePtr, Idx: coro::Shape::SwitchFieldIndex::Resume,
381 Name: "ResumeFn.addr");
382 auto *NullPtr = ConstantPointerNull::get(T: cast<PointerType>(
383 Val: Shape.FrameTy->getTypeAtIndex(N: coro::Shape::SwitchFieldIndex::Resume)));
384 Builder.CreateStore(Val: NullPtr, Ptr: GepIndex);
385
386 // If the coroutine don't have unwind coro end, we could omit the store to
387 // the final suspend point since we could infer the coroutine is suspended
388 // at the final suspend point by the nullness of ResumeFnAddr.
389 // However, we can't skip it if the coroutine have unwind coro end. Since
390 // the coroutine reaches unwind coro end is considered suspended at the
391 // final suspend point (the ResumeFnAddr is null) but in fact the coroutine
392 // didn't complete yet. We need the IndexVal for the final suspend point
393 // to make the states clear.
394 if (Shape.SwitchLowering.HasUnwindCoroEnd &&
395 Shape.SwitchLowering.HasFinalSuspend) {
396 assert(cast<CoroSuspendInst>(Shape.CoroSuspends.back())->isFinal() &&
397 "The final suspend should only live in the last position of "
398 "CoroSuspends.");
399 ConstantInt *IndexVal = Shape.getIndex(Value: Shape.CoroSuspends.size() - 1);
400 auto *FinalIndex = Builder.CreateStructGEP(
401 Ty: Shape.FrameTy, Ptr: FramePtr, Idx: Shape.getSwitchIndexField(), Name: "index.addr");
402
403 Builder.CreateStore(Val: IndexVal, Ptr: FinalIndex);
404 }
405}
406
407/// Replace an unwind call to llvm.coro.end.
408static void replaceUnwindCoroEnd(AnyCoroEndInst *End, const coro::Shape &Shape,
409 Value *FramePtr, bool InResume,
410 CallGraph *CG) {
411 IRBuilder<> Builder(End);
412
413 switch (Shape.ABI) {
414 // In switch-lowering, this does nothing in the main function.
415 case coro::ABI::Switch: {
416 // In C++'s specification, the coroutine should be marked as done
417 // if promise.unhandled_exception() throws. The frontend will
418 // call coro.end(true) along this path.
419 //
420 // FIXME: We should refactor this once there is other language
421 // which uses Switch-Resumed style other than C++.
422 markCoroutineAsDone(Builder, Shape, FramePtr);
423 if (!InResume)
424 return;
425 break;
426 }
427 // In async lowering this does nothing.
428 case coro::ABI::Async:
429 break;
430 // In continuation-lowering, this frees the continuation storage.
431 case coro::ABI::Retcon:
432 case coro::ABI::RetconOnce:
433 maybeFreeRetconStorage(Builder, Shape, FramePtr, CG);
434 break;
435 }
436
437 // If coro.end has an associated bundle, add cleanupret instruction.
438 if (auto Bundle = End->getOperandBundle(ID: LLVMContext::OB_funclet)) {
439 auto *FromPad = cast<CleanupPadInst>(Val: Bundle->Inputs[0]);
440 auto *CleanupRet = Builder.CreateCleanupRet(CleanupPad: FromPad, UnwindBB: nullptr);
441 End->getParent()->splitBasicBlock(I: End);
442 CleanupRet->getParent()->getTerminator()->eraseFromParent();
443 }
444}
445
446static void replaceCoroEnd(AnyCoroEndInst *End, const coro::Shape &Shape,
447 Value *FramePtr, bool InResume, CallGraph *CG) {
448 if (End->isUnwind())
449 replaceUnwindCoroEnd(End, Shape, FramePtr, InResume, CG);
450 else
451 replaceFallthroughCoroEnd(End, Shape, FramePtr, InResume, CG);
452
453 auto &Context = End->getContext();
454 End->replaceAllUsesWith(V: InResume ? ConstantInt::getTrue(Context)
455 : ConstantInt::getFalse(Context));
456 End->eraseFromParent();
457}
458
459// In the resume function, we remove the last case (when coro::Shape is built,
460// the final suspend point (if present) is always the last element of
461// CoroSuspends array) since it is an undefined behavior to resume a coroutine
462// suspended at the final suspend point.
463// In the destroy function, if it isn't possible that the ResumeFnAddr is NULL
464// and the coroutine doesn't suspend at the final suspend point actually (this
465// is possible since the coroutine is considered suspended at the final suspend
466// point if promise.unhandled_exception() exits via an exception), we can
467// remove the last case.
468void CoroCloner::handleFinalSuspend() {
469 assert(Shape.ABI == coro::ABI::Switch &&
470 Shape.SwitchLowering.HasFinalSuspend);
471
472 if (isSwitchDestroyFunction() && Shape.SwitchLowering.HasUnwindCoroEnd)
473 return;
474
475 auto *Switch = cast<SwitchInst>(Val&: VMap[Shape.SwitchLowering.ResumeSwitch]);
476 auto FinalCaseIt = std::prev(x: Switch->case_end());
477 BasicBlock *ResumeBB = FinalCaseIt->getCaseSuccessor();
478 Switch->removeCase(I: FinalCaseIt);
479 if (isSwitchDestroyFunction()) {
480 BasicBlock *OldSwitchBB = Switch->getParent();
481 auto *NewSwitchBB = OldSwitchBB->splitBasicBlock(I: Switch, BBName: "Switch");
482 Builder.SetInsertPoint(OldSwitchBB->getTerminator());
483
484 if (NewF->isCoroOnlyDestroyWhenComplete()) {
485 // When the coroutine can only be destroyed when complete, we don't need
486 // to generate code for other cases.
487 Builder.CreateBr(Dest: ResumeBB);
488 } else {
489 auto *GepIndex = Builder.CreateStructGEP(
490 Ty: Shape.FrameTy, Ptr: NewFramePtr, Idx: coro::Shape::SwitchFieldIndex::Resume,
491 Name: "ResumeFn.addr");
492 auto *Load =
493 Builder.CreateLoad(Ty: Shape.getSwitchResumePointerType(), Ptr: GepIndex);
494 auto *Cond = Builder.CreateIsNull(Arg: Load);
495 Builder.CreateCondBr(Cond, True: ResumeBB, False: NewSwitchBB);
496 }
497 OldSwitchBB->getTerminator()->eraseFromParent();
498 }
499}
500
501static FunctionType *
502getFunctionTypeFromAsyncSuspend(AnyCoroSuspendInst *Suspend) {
503 auto *AsyncSuspend = cast<CoroSuspendAsyncInst>(Val: Suspend);
504 auto *StructTy = cast<StructType>(Val: AsyncSuspend->getType());
505 auto &Context = Suspend->getParent()->getParent()->getContext();
506 auto *VoidTy = Type::getVoidTy(C&: Context);
507 return FunctionType::get(Result: VoidTy, Params: StructTy->elements(), isVarArg: false);
508}
509
510static Function *createCloneDeclaration(Function &OrigF, coro::Shape &Shape,
511 const Twine &Suffix,
512 Module::iterator InsertBefore,
513 AnyCoroSuspendInst *ActiveSuspend) {
514 Module *M = OrigF.getParent();
515 auto *FnTy = (Shape.ABI != coro::ABI::Async)
516 ? Shape.getResumeFunctionType()
517 : getFunctionTypeFromAsyncSuspend(Suspend: ActiveSuspend);
518
519 Function *NewF =
520 Function::Create(Ty: FnTy, Linkage: GlobalValue::LinkageTypes::InternalLinkage,
521 N: OrigF.getName() + Suffix);
522
523 M->getFunctionList().insert(where: InsertBefore, New: NewF);
524
525 return NewF;
526}
527
528/// Replace uses of the active llvm.coro.suspend.retcon/async call with the
529/// arguments to the continuation function.
530///
531/// This assumes that the builder has a meaningful insertion point.
532void CoroCloner::replaceRetconOrAsyncSuspendUses() {
533 assert(Shape.ABI == coro::ABI::Retcon || Shape.ABI == coro::ABI::RetconOnce ||
534 Shape.ABI == coro::ABI::Async);
535
536 auto NewS = VMap[ActiveSuspend];
537 if (NewS->use_empty())
538 return;
539
540 // Copy out all the continuation arguments after the buffer pointer into
541 // an easily-indexed data structure for convenience.
542 SmallVector<Value *, 8> Args;
543 // The async ABI includes all arguments -- including the first argument.
544 bool IsAsyncABI = Shape.ABI == coro::ABI::Async;
545 for (auto I = IsAsyncABI ? NewF->arg_begin() : std::next(x: NewF->arg_begin()),
546 E = NewF->arg_end();
547 I != E; ++I)
548 Args.push_back(Elt: &*I);
549
550 // If the suspend returns a single scalar value, we can just do a simple
551 // replacement.
552 if (!isa<StructType>(Val: NewS->getType())) {
553 assert(Args.size() == 1);
554 NewS->replaceAllUsesWith(V: Args.front());
555 return;
556 }
557
558 // Try to peephole extracts of an aggregate return.
559 for (Use &U : llvm::make_early_inc_range(Range: NewS->uses())) {
560 auto *EVI = dyn_cast<ExtractValueInst>(Val: U.getUser());
561 if (!EVI || EVI->getNumIndices() != 1)
562 continue;
563
564 EVI->replaceAllUsesWith(V: Args[EVI->getIndices().front()]);
565 EVI->eraseFromParent();
566 }
567
568 // If we have no remaining uses, we're done.
569 if (NewS->use_empty())
570 return;
571
572 // Otherwise, we need to create an aggregate.
573 Value *Agg = PoisonValue::get(T: NewS->getType());
574 for (size_t I = 0, E = Args.size(); I != E; ++I)
575 Agg = Builder.CreateInsertValue(Agg, Val: Args[I], Idxs: I);
576
577 NewS->replaceAllUsesWith(V: Agg);
578}
579
580void CoroCloner::replaceCoroSuspends() {
581 Value *SuspendResult;
582
583 switch (Shape.ABI) {
584 // In switch lowering, replace coro.suspend with the appropriate value
585 // for the type of function we're extracting.
586 // Replacing coro.suspend with (0) will result in control flow proceeding to
587 // a resume label associated with a suspend point, replacing it with (1) will
588 // result in control flow proceeding to a cleanup label associated with this
589 // suspend point.
590 case coro::ABI::Switch:
591 SuspendResult = Builder.getInt8(C: isSwitchDestroyFunction() ? 1 : 0);
592 break;
593
594 // In async lowering there are no uses of the result.
595 case coro::ABI::Async:
596 return;
597
598 // In returned-continuation lowering, the arguments from earlier
599 // continuations are theoretically arbitrary, and they should have been
600 // spilled.
601 case coro::ABI::RetconOnce:
602 case coro::ABI::Retcon:
603 return;
604 }
605
606 for (AnyCoroSuspendInst *CS : Shape.CoroSuspends) {
607 // The active suspend was handled earlier.
608 if (CS == ActiveSuspend)
609 continue;
610
611 auto *MappedCS = cast<AnyCoroSuspendInst>(Val&: VMap[CS]);
612 MappedCS->replaceAllUsesWith(V: SuspendResult);
613 MappedCS->eraseFromParent();
614 }
615}
616
617void CoroCloner::replaceCoroEnds() {
618 for (AnyCoroEndInst *CE : Shape.CoroEnds) {
619 // We use a null call graph because there's no call graph node for
620 // the cloned function yet. We'll just be rebuilding that later.
621 auto *NewCE = cast<AnyCoroEndInst>(Val&: VMap[CE]);
622 replaceCoroEnd(End: NewCE, Shape, FramePtr: NewFramePtr, /*in resume*/ InResume: true, CG: nullptr);
623 }
624}
625
626static void replaceSwiftErrorOps(Function &F, coro::Shape &Shape,
627 ValueToValueMapTy *VMap) {
628 if (Shape.ABI == coro::ABI::Async && Shape.CoroSuspends.empty())
629 return;
630 Value *CachedSlot = nullptr;
631 auto getSwiftErrorSlot = [&](Type *ValueTy) -> Value * {
632 if (CachedSlot)
633 return CachedSlot;
634
635 // Check if the function has a swifterror argument.
636 for (auto &Arg : F.args()) {
637 if (Arg.isSwiftError()) {
638 CachedSlot = &Arg;
639 return &Arg;
640 }
641 }
642
643 // Create a swifterror alloca.
644 IRBuilder<> Builder(F.getEntryBlock().getFirstNonPHIOrDbg());
645 auto Alloca = Builder.CreateAlloca(Ty: ValueTy);
646 Alloca->setSwiftError(true);
647
648 CachedSlot = Alloca;
649 return Alloca;
650 };
651
652 for (CallInst *Op : Shape.SwiftErrorOps) {
653 auto MappedOp = VMap ? cast<CallInst>(Val&: (*VMap)[Op]) : Op;
654 IRBuilder<> Builder(MappedOp);
655
656 // If there are no arguments, this is a 'get' operation.
657 Value *MappedResult;
658 if (Op->arg_empty()) {
659 auto ValueTy = Op->getType();
660 auto Slot = getSwiftErrorSlot(ValueTy);
661 MappedResult = Builder.CreateLoad(Ty: ValueTy, Ptr: Slot);
662 } else {
663 assert(Op->arg_size() == 1);
664 auto Value = MappedOp->getArgOperand(i: 0);
665 auto ValueTy = Value->getType();
666 auto Slot = getSwiftErrorSlot(ValueTy);
667 Builder.CreateStore(Val: Value, Ptr: Slot);
668 MappedResult = Slot;
669 }
670
671 MappedOp->replaceAllUsesWith(V: MappedResult);
672 MappedOp->eraseFromParent();
673 }
674
675 // If we're updating the original function, we've invalidated SwiftErrorOps.
676 if (VMap == nullptr) {
677 Shape.SwiftErrorOps.clear();
678 }
679}
680
681/// Returns all DbgVariableIntrinsic in F.
682static std::pair<SmallVector<DbgVariableIntrinsic *, 8>,
683 SmallVector<DbgVariableRecord *>>
684collectDbgVariableIntrinsics(Function &F) {
685 SmallVector<DbgVariableIntrinsic *, 8> Intrinsics;
686 SmallVector<DbgVariableRecord *> DbgVariableRecords;
687 for (auto &I : instructions(F)) {
688 for (DbgVariableRecord &DVR : filterDbgVars(R: I.getDbgRecordRange()))
689 DbgVariableRecords.push_back(Elt: &DVR);
690 if (auto *DVI = dyn_cast<DbgVariableIntrinsic>(Val: &I))
691 Intrinsics.push_back(Elt: DVI);
692 }
693 return {Intrinsics, DbgVariableRecords};
694}
695
696void CoroCloner::replaceSwiftErrorOps() {
697 ::replaceSwiftErrorOps(F&: *NewF, Shape, VMap: &VMap);
698}
699
700void CoroCloner::salvageDebugInfo() {
701 auto [Worklist, DbgVariableRecords] = collectDbgVariableIntrinsics(F&: *NewF);
702 SmallDenseMap<Argument *, AllocaInst *, 4> ArgToAllocaMap;
703
704 // Only 64-bit ABIs have a register we can refer to with the entry value.
705 bool UseEntryValue =
706 llvm::Triple(OrigF.getParent()->getTargetTriple()).isArch64Bit();
707 for (DbgVariableIntrinsic *DVI : Worklist)
708 coro::salvageDebugInfo(ArgToAllocaMap, DVI&: *DVI, OptimizeFrame: Shape.OptimizeFrame,
709 IsEntryPoint: UseEntryValue);
710 for (DbgVariableRecord *DVR : DbgVariableRecords)
711 coro::salvageDebugInfo(ArgToAllocaMap, DVR&: *DVR, OptimizeFrame: Shape.OptimizeFrame,
712 UseEntryValue);
713
714 // Remove all salvaged dbg.declare intrinsics that became
715 // either unreachable or stale due to the CoroSplit transformation.
716 DominatorTree DomTree(*NewF);
717 auto IsUnreachableBlock = [&](BasicBlock *BB) {
718 return !isPotentiallyReachable(From: &NewF->getEntryBlock(), To: BB, ExclusionSet: nullptr,
719 DT: &DomTree);
720 };
721 auto RemoveOne = [&](auto *DVI) {
722 if (IsUnreachableBlock(DVI->getParent()))
723 DVI->eraseFromParent();
724 else if (isa_and_nonnull<AllocaInst>(DVI->getVariableLocationOp(0))) {
725 // Count all non-debuginfo uses in reachable blocks.
726 unsigned Uses = 0;
727 for (auto *User : DVI->getVariableLocationOp(0)->users())
728 if (auto *I = dyn_cast<Instruction>(User))
729 if (!isa<AllocaInst>(I) && !IsUnreachableBlock(I->getParent()))
730 ++Uses;
731 if (!Uses)
732 DVI->eraseFromParent();
733 }
734 };
735 for_each(Range&: Worklist, F: RemoveOne);
736 for_each(Range&: DbgVariableRecords, F: RemoveOne);
737}
738
739void CoroCloner::replaceEntryBlock() {
740 // In the original function, the AllocaSpillBlock is a block immediately
741 // following the allocation of the frame object which defines GEPs for
742 // all the allocas that have been moved into the frame, and it ends by
743 // branching to the original beginning of the coroutine. Make this
744 // the entry block of the cloned function.
745 auto *Entry = cast<BasicBlock>(Val&: VMap[Shape.AllocaSpillBlock]);
746 auto *OldEntry = &NewF->getEntryBlock();
747 Entry->setName("entry" + Suffix);
748 Entry->moveBefore(MovePos: OldEntry);
749 Entry->getTerminator()->eraseFromParent();
750
751 // Clear all predecessors of the new entry block. There should be
752 // exactly one predecessor, which we created when splitting out
753 // AllocaSpillBlock to begin with.
754 assert(Entry->hasOneUse());
755 auto BranchToEntry = cast<BranchInst>(Val: Entry->user_back());
756 assert(BranchToEntry->isUnconditional());
757 Builder.SetInsertPoint(BranchToEntry);
758 Builder.CreateUnreachable();
759 BranchToEntry->eraseFromParent();
760
761 // Branch from the entry to the appropriate place.
762 Builder.SetInsertPoint(Entry);
763 switch (Shape.ABI) {
764 case coro::ABI::Switch: {
765 // In switch-lowering, we built a resume-entry block in the original
766 // function. Make the entry block branch to this.
767 auto *SwitchBB =
768 cast<BasicBlock>(Val&: VMap[Shape.SwitchLowering.ResumeEntryBlock]);
769 Builder.CreateBr(Dest: SwitchBB);
770 break;
771 }
772 case coro::ABI::Async:
773 case coro::ABI::Retcon:
774 case coro::ABI::RetconOnce: {
775 // In continuation ABIs, we want to branch to immediately after the
776 // active suspend point. Earlier phases will have put the suspend in its
777 // own basic block, so just thread our jump directly to its successor.
778 assert((Shape.ABI == coro::ABI::Async &&
779 isa<CoroSuspendAsyncInst>(ActiveSuspend)) ||
780 ((Shape.ABI == coro::ABI::Retcon ||
781 Shape.ABI == coro::ABI::RetconOnce) &&
782 isa<CoroSuspendRetconInst>(ActiveSuspend)));
783 auto *MappedCS = cast<AnyCoroSuspendInst>(Val&: VMap[ActiveSuspend]);
784 auto Branch = cast<BranchInst>(Val: MappedCS->getNextNode());
785 assert(Branch->isUnconditional());
786 Builder.CreateBr(Dest: Branch->getSuccessor(i: 0));
787 break;
788 }
789 }
790
791 // Any static alloca that's still being used but not reachable from the new
792 // entry needs to be moved to the new entry.
793 Function *F = OldEntry->getParent();
794 DominatorTree DT{*F};
795 for (Instruction &I : llvm::make_early_inc_range(Range: instructions(F))) {
796 auto *Alloca = dyn_cast<AllocaInst>(Val: &I);
797 if (!Alloca || I.use_empty())
798 continue;
799 if (DT.isReachableFromEntry(A: I.getParent()) ||
800 !isa<ConstantInt>(Val: Alloca->getArraySize()))
801 continue;
802 I.moveBefore(BB&: *Entry, I: Entry->getFirstInsertionPt());
803 }
804}
805
806/// Derive the value of the new frame pointer.
807Value *CoroCloner::deriveNewFramePointer() {
808 // Builder should be inserting to the front of the new entry block.
809
810 switch (Shape.ABI) {
811 // In switch-lowering, the argument is the frame pointer.
812 case coro::ABI::Switch:
813 return &*NewF->arg_begin();
814 // In async-lowering, one of the arguments is an async context as determined
815 // by the `llvm.coro.id.async` intrinsic. We can retrieve the async context of
816 // the resume function from the async context projection function associated
817 // with the active suspend. The frame is located as a tail to the async
818 // context header.
819 case coro::ABI::Async: {
820 auto *ActiveAsyncSuspend = cast<CoroSuspendAsyncInst>(Val: ActiveSuspend);
821 auto ContextIdx = ActiveAsyncSuspend->getStorageArgumentIndex() & 0xff;
822 auto *CalleeContext = NewF->getArg(i: ContextIdx);
823 auto *ProjectionFunc =
824 ActiveAsyncSuspend->getAsyncContextProjectionFunction();
825 auto DbgLoc =
826 cast<CoroSuspendAsyncInst>(Val&: VMap[ActiveSuspend])->getDebugLoc();
827 // Calling i8* (i8*)
828 auto *CallerContext = Builder.CreateCall(FTy: ProjectionFunc->getFunctionType(),
829 Callee: ProjectionFunc, Args: CalleeContext);
830 CallerContext->setCallingConv(ProjectionFunc->getCallingConv());
831 CallerContext->setDebugLoc(DbgLoc);
832 // The frame is located after the async_context header.
833 auto &Context = Builder.getContext();
834 auto *FramePtrAddr = Builder.CreateConstInBoundsGEP1_32(
835 Ty: Type::getInt8Ty(C&: Context), Ptr: CallerContext,
836 Idx0: Shape.AsyncLowering.FrameOffset, Name: "async.ctx.frameptr");
837 // Inline the projection function.
838 InlineFunctionInfo InlineInfo;
839 auto InlineRes = InlineFunction(CB&: *CallerContext, IFI&: InlineInfo);
840 assert(InlineRes.isSuccess());
841 (void)InlineRes;
842 return FramePtrAddr;
843 }
844 // In continuation-lowering, the argument is the opaque storage.
845 case coro::ABI::Retcon:
846 case coro::ABI::RetconOnce: {
847 Argument *NewStorage = &*NewF->arg_begin();
848 auto FramePtrTy = PointerType::getUnqual(C&: Shape.FrameTy->getContext());
849
850 // If the storage is inline, just bitcast to the storage to the frame type.
851 if (Shape.RetconLowering.IsFrameInlineInStorage)
852 return NewStorage;
853
854 // Otherwise, load the real frame from the opaque storage.
855 return Builder.CreateLoad(Ty: FramePtrTy, Ptr: NewStorage);
856 }
857 }
858 llvm_unreachable("bad ABI");
859}
860
861static void addFramePointerAttrs(AttributeList &Attrs, LLVMContext &Context,
862 unsigned ParamIndex, uint64_t Size,
863 Align Alignment, bool NoAlias) {
864 AttrBuilder ParamAttrs(Context);
865 ParamAttrs.addAttribute(Attribute::NonNull);
866 ParamAttrs.addAttribute(Attribute::NoUndef);
867
868 if (NoAlias)
869 ParamAttrs.addAttribute(Attribute::NoAlias);
870
871 ParamAttrs.addAlignmentAttr(Align: Alignment);
872 ParamAttrs.addDereferenceableAttr(Bytes: Size);
873 Attrs = Attrs.addParamAttributes(C&: Context, ArgNo: ParamIndex, B: ParamAttrs);
874}
875
876static void addAsyncContextAttrs(AttributeList &Attrs, LLVMContext &Context,
877 unsigned ParamIndex) {
878 AttrBuilder ParamAttrs(Context);
879 ParamAttrs.addAttribute(Attribute::SwiftAsync);
880 Attrs = Attrs.addParamAttributes(C&: Context, ArgNo: ParamIndex, B: ParamAttrs);
881}
882
883static void addSwiftSelfAttrs(AttributeList &Attrs, LLVMContext &Context,
884 unsigned ParamIndex) {
885 AttrBuilder ParamAttrs(Context);
886 ParamAttrs.addAttribute(Attribute::SwiftSelf);
887 Attrs = Attrs.addParamAttributes(C&: Context, ArgNo: ParamIndex, B: ParamAttrs);
888}
889
890/// Clone the body of the original function into a resume function of
891/// some sort.
892void CoroCloner::create() {
893 // Create the new function if we don't already have one.
894 if (!NewF) {
895 NewF = createCloneDeclaration(OrigF, Shape, Suffix,
896 InsertBefore: OrigF.getParent()->end(), ActiveSuspend);
897 }
898
899 // Replace all args with dummy instructions. If an argument is the old frame
900 // pointer, the dummy will be replaced by the new frame pointer once it is
901 // computed below. Uses of all other arguments should have already been
902 // rewritten by buildCoroutineFrame() to use loads/stores on the coroutine
903 // frame.
904 SmallVector<Instruction *> DummyArgs;
905 for (Argument &A : OrigF.args()) {
906 DummyArgs.push_back(Elt: new FreezeInst(PoisonValue::get(T: A.getType())));
907 VMap[&A] = DummyArgs.back();
908 }
909
910 SmallVector<ReturnInst *, 4> Returns;
911
912 // Ignore attempts to change certain attributes of the function.
913 // TODO: maybe there should be a way to suppress this during cloning?
914 auto savedVisibility = NewF->getVisibility();
915 auto savedUnnamedAddr = NewF->getUnnamedAddr();
916 auto savedDLLStorageClass = NewF->getDLLStorageClass();
917
918 // NewF's linkage (which CloneFunctionInto does *not* change) might not
919 // be compatible with the visibility of OrigF (which it *does* change),
920 // so protect against that.
921 auto savedLinkage = NewF->getLinkage();
922 NewF->setLinkage(llvm::GlobalValue::ExternalLinkage);
923
924 CloneFunctionInto(NewFunc: NewF, OldFunc: &OrigF, VMap,
925 Changes: CloneFunctionChangeType::LocalChangesOnly, Returns);
926
927 auto &Context = NewF->getContext();
928
929 // For async functions / continuations, adjust the scope line of the
930 // clone to the line number of the suspend point. However, only
931 // adjust the scope line when the files are the same. This ensures
932 // line number and file name belong together. The scope line is
933 // associated with all pre-prologue instructions. This avoids a jump
934 // in the linetable from the function declaration to the suspend point.
935 if (DISubprogram *SP = NewF->getSubprogram()) {
936 assert(SP != OrigF.getSubprogram() && SP->isDistinct());
937 if (ActiveSuspend)
938 if (auto DL = ActiveSuspend->getDebugLoc())
939 if (SP->getFile() == DL->getFile())
940 SP->setScopeLine(DL->getLine());
941 // Update the linkage name to reflect the modified symbol name. It
942 // is necessary to update the linkage name in Swift, since the
943 // mangling changes for resume functions. It might also be the
944 // right thing to do in C++, but due to a limitation in LLVM's
945 // AsmPrinter we can only do this if the function doesn't have an
946 // abstract specification, since the DWARF backend expects the
947 // abstract specification to contain the linkage name and asserts
948 // that they are identical.
949 if (SP->getUnit() &&
950 SP->getUnit()->getSourceLanguage() == dwarf::DW_LANG_Swift) {
951 SP->replaceLinkageName(LN: MDString::get(Context, Str: NewF->getName()));
952 if (auto *Decl = SP->getDeclaration()) {
953 auto *NewDecl = DISubprogram::get(
954 Context&: Decl->getContext(), Scope: Decl->getScope(), Name: Decl->getName(),
955 LinkageName: NewF->getName(), File: Decl->getFile(), Line: Decl->getLine(), Type: Decl->getType(),
956 ScopeLine: Decl->getScopeLine(), ContainingType: Decl->getContainingType(),
957 VirtualIndex: Decl->getVirtualIndex(), ThisAdjustment: Decl->getThisAdjustment(),
958 Flags: Decl->getFlags(), SPFlags: Decl->getSPFlags(), Unit: Decl->getUnit(),
959 TemplateParams: Decl->getTemplateParams(), Declaration: nullptr, RetainedNodes: Decl->getRetainedNodes(),
960 ThrownTypes: Decl->getThrownTypes(), Annotations: Decl->getAnnotations(),
961 TargetFuncName: Decl->getTargetFuncName());
962 SP->replaceDeclaration(Decl: NewDecl);
963 }
964 }
965 }
966
967 NewF->setLinkage(savedLinkage);
968 NewF->setVisibility(savedVisibility);
969 NewF->setUnnamedAddr(savedUnnamedAddr);
970 NewF->setDLLStorageClass(savedDLLStorageClass);
971 // The function sanitizer metadata needs to match the signature of the
972 // function it is being attached to. However this does not hold for split
973 // functions here. Thus remove the metadata for split functions.
974 if (Shape.ABI == coro::ABI::Switch &&
975 NewF->hasMetadata(KindID: LLVMContext::MD_func_sanitize))
976 NewF->eraseMetadata(KindID: LLVMContext::MD_func_sanitize);
977
978 // Replace the attributes of the new function:
979 auto OrigAttrs = NewF->getAttributes();
980 auto NewAttrs = AttributeList();
981
982 switch (Shape.ABI) {
983 case coro::ABI::Switch:
984 // Bootstrap attributes by copying function attributes from the
985 // original function. This should include optimization settings and so on.
986 NewAttrs = NewAttrs.addFnAttributes(
987 C&: Context, B: AttrBuilder(Context, OrigAttrs.getFnAttrs()));
988
989 addFramePointerAttrs(Attrs&: NewAttrs, Context, ParamIndex: 0, Size: Shape.FrameSize,
990 Alignment: Shape.FrameAlign, /*NoAlias=*/false);
991 break;
992 case coro::ABI::Async: {
993 auto *ActiveAsyncSuspend = cast<CoroSuspendAsyncInst>(Val: ActiveSuspend);
994 if (OrigF.hasParamAttribute(ArgNo: Shape.AsyncLowering.ContextArgNo,
995 Attribute::Kind: SwiftAsync)) {
996 uint32_t ArgAttributeIndices =
997 ActiveAsyncSuspend->getStorageArgumentIndex();
998 auto ContextArgIndex = ArgAttributeIndices & 0xff;
999 addAsyncContextAttrs(Attrs&: NewAttrs, Context, ParamIndex: ContextArgIndex);
1000
1001 // `swiftasync` must preceed `swiftself` so 0 is not a valid index for
1002 // `swiftself`.
1003 auto SwiftSelfIndex = ArgAttributeIndices >> 8;
1004 if (SwiftSelfIndex)
1005 addSwiftSelfAttrs(Attrs&: NewAttrs, Context, ParamIndex: SwiftSelfIndex);
1006 }
1007
1008 // Transfer the original function's attributes.
1009 auto FnAttrs = OrigF.getAttributes().getFnAttrs();
1010 NewAttrs = NewAttrs.addFnAttributes(C&: Context, B: AttrBuilder(Context, FnAttrs));
1011 break;
1012 }
1013 case coro::ABI::Retcon:
1014 case coro::ABI::RetconOnce:
1015 // If we have a continuation prototype, just use its attributes,
1016 // full-stop.
1017 NewAttrs = Shape.RetconLowering.ResumePrototype->getAttributes();
1018
1019 /// FIXME: Is it really good to add the NoAlias attribute?
1020 addFramePointerAttrs(Attrs&: NewAttrs, Context, ParamIndex: 0,
1021 Size: Shape.getRetconCoroId()->getStorageSize(),
1022 Alignment: Shape.getRetconCoroId()->getStorageAlignment(),
1023 /*NoAlias=*/true);
1024
1025 break;
1026 }
1027
1028 switch (Shape.ABI) {
1029 // In these ABIs, the cloned functions always return 'void', and the
1030 // existing return sites are meaningless. Note that for unique
1031 // continuations, this includes the returns associated with suspends;
1032 // this is fine because we can't suspend twice.
1033 case coro::ABI::Switch:
1034 case coro::ABI::RetconOnce:
1035 // Remove old returns.
1036 for (ReturnInst *Return : Returns)
1037 changeToUnreachable(I: Return);
1038 break;
1039
1040 // With multi-suspend continuations, we'll already have eliminated the
1041 // original returns and inserted returns before all the suspend points,
1042 // so we want to leave any returns in place.
1043 case coro::ABI::Retcon:
1044 break;
1045 // Async lowering will insert musttail call functions at all suspend points
1046 // followed by a return.
1047 // Don't change returns to unreachable because that will trip up the verifier.
1048 // These returns should be unreachable from the clone.
1049 case coro::ABI::Async:
1050 break;
1051 }
1052
1053 NewF->setAttributes(NewAttrs);
1054 NewF->setCallingConv(Shape.getResumeFunctionCC());
1055
1056 // Set up the new entry block.
1057 replaceEntryBlock();
1058
1059 Builder.SetInsertPoint(&NewF->getEntryBlock().front());
1060 NewFramePtr = deriveNewFramePointer();
1061
1062 // Remap frame pointer.
1063 Value *OldFramePtr = VMap[Shape.FramePtr];
1064 NewFramePtr->takeName(V: OldFramePtr);
1065 OldFramePtr->replaceAllUsesWith(V: NewFramePtr);
1066
1067 // Remap vFrame pointer.
1068 auto *NewVFrame = Builder.CreateBitCast(
1069 V: NewFramePtr, DestTy: PointerType::getUnqual(C&: Builder.getContext()), Name: "vFrame");
1070 Value *OldVFrame = cast<Value>(Val&: VMap[Shape.CoroBegin]);
1071 if (OldVFrame != NewVFrame)
1072 OldVFrame->replaceAllUsesWith(V: NewVFrame);
1073
1074 // All uses of the arguments should have been resolved by this point,
1075 // so we can safely remove the dummy values.
1076 for (Instruction *DummyArg : DummyArgs) {
1077 DummyArg->replaceAllUsesWith(V: PoisonValue::get(T: DummyArg->getType()));
1078 DummyArg->deleteValue();
1079 }
1080
1081 switch (Shape.ABI) {
1082 case coro::ABI::Switch:
1083 // Rewrite final suspend handling as it is not done via switch (allows to
1084 // remove final case from the switch, since it is undefined behavior to
1085 // resume the coroutine suspended at the final suspend point.
1086 if (Shape.SwitchLowering.HasFinalSuspend)
1087 handleFinalSuspend();
1088 break;
1089 case coro::ABI::Async:
1090 case coro::ABI::Retcon:
1091 case coro::ABI::RetconOnce:
1092 // Replace uses of the active suspend with the corresponding
1093 // continuation-function arguments.
1094 assert(ActiveSuspend != nullptr &&
1095 "no active suspend when lowering a continuation-style coroutine");
1096 replaceRetconOrAsyncSuspendUses();
1097 break;
1098 }
1099
1100 // Handle suspends.
1101 replaceCoroSuspends();
1102
1103 // Handle swifterror.
1104 replaceSwiftErrorOps();
1105
1106 // Remove coro.end intrinsics.
1107 replaceCoroEnds();
1108
1109 // Salvage debug info that points into the coroutine frame.
1110 salvageDebugInfo();
1111
1112 // Eliminate coro.free from the clones, replacing it with 'null' in cleanup,
1113 // to suppress deallocation code.
1114 if (Shape.ABI == coro::ABI::Switch)
1115 coro::replaceCoroFree(CoroId: cast<CoroIdInst>(Val&: VMap[Shape.CoroBegin->getId()]),
1116 /*Elide=*/FKind == CoroCloner::Kind::SwitchCleanup);
1117}
1118
1119static void updateAsyncFuncPointerContextSize(coro::Shape &Shape) {
1120 assert(Shape.ABI == coro::ABI::Async);
1121
1122 auto *FuncPtrStruct = cast<ConstantStruct>(
1123 Val: Shape.AsyncLowering.AsyncFuncPointer->getInitializer());
1124 auto *OrigRelativeFunOffset = FuncPtrStruct->getOperand(i_nocapture: 0);
1125 auto *OrigContextSize = FuncPtrStruct->getOperand(i_nocapture: 1);
1126 auto *NewContextSize = ConstantInt::get(Ty: OrigContextSize->getType(),
1127 V: Shape.AsyncLowering.ContextSize);
1128 auto *NewFuncPtrStruct = ConstantStruct::get(
1129 T: FuncPtrStruct->getType(), Vs: OrigRelativeFunOffset, Vs: NewContextSize);
1130
1131 Shape.AsyncLowering.AsyncFuncPointer->setInitializer(NewFuncPtrStruct);
1132}
1133
1134static void replaceFrameSizeAndAlignment(coro::Shape &Shape) {
1135 if (Shape.ABI == coro::ABI::Async)
1136 updateAsyncFuncPointerContextSize(Shape);
1137
1138 for (CoroAlignInst *CA : Shape.CoroAligns) {
1139 CA->replaceAllUsesWith(
1140 V: ConstantInt::get(Ty: CA->getType(), V: Shape.FrameAlign.value()));
1141 CA->eraseFromParent();
1142 }
1143
1144 if (Shape.CoroSizes.empty())
1145 return;
1146
1147 // In the same function all coro.sizes should have the same result type.
1148 auto *SizeIntrin = Shape.CoroSizes.back();
1149 Module *M = SizeIntrin->getModule();
1150 const DataLayout &DL = M->getDataLayout();
1151 auto Size = DL.getTypeAllocSize(Ty: Shape.FrameTy);
1152 auto *SizeConstant = ConstantInt::get(Ty: SizeIntrin->getType(), V: Size);
1153
1154 for (CoroSizeInst *CS : Shape.CoroSizes) {
1155 CS->replaceAllUsesWith(V: SizeConstant);
1156 CS->eraseFromParent();
1157 }
1158}
1159
1160static void postSplitCleanup(Function &F) {
1161 removeUnreachableBlocks(F);
1162
1163#ifndef NDEBUG
1164 // For now, we do a mandatory verification step because we don't
1165 // entirely trust this pass. Note that we don't want to add a verifier
1166 // pass to FPM below because it will also verify all the global data.
1167 if (verifyFunction(F, OS: &errs()))
1168 report_fatal_error(reason: "Broken function");
1169#endif
1170}
1171
1172// Assuming we arrived at the block NewBlock from Prev instruction, store
1173// PHI's incoming values in the ResolvedValues map.
1174static void
1175scanPHIsAndUpdateValueMap(Instruction *Prev, BasicBlock *NewBlock,
1176 DenseMap<Value *, Value *> &ResolvedValues) {
1177 auto *PrevBB = Prev->getParent();
1178 for (PHINode &PN : NewBlock->phis()) {
1179 auto V = PN.getIncomingValueForBlock(BB: PrevBB);
1180 // See if we already resolved it.
1181 auto VI = ResolvedValues.find(Val: V);
1182 if (VI != ResolvedValues.end())
1183 V = VI->second;
1184 // Remember the value.
1185 ResolvedValues[&PN] = V;
1186 }
1187}
1188
1189// Replace a sequence of branches leading to a ret, with a clone of a ret
1190// instruction. Suspend instruction represented by a switch, track the PHI
1191// values and select the correct case successor when possible.
1192static bool simplifyTerminatorLeadingToRet(Instruction *InitialInst) {
1193 // There is nothing to simplify.
1194 if (isa<ReturnInst>(Val: InitialInst))
1195 return false;
1196
1197 DenseMap<Value *, Value *> ResolvedValues;
1198 assert(InitialInst->getModule());
1199 const DataLayout &DL = InitialInst->getModule()->getDataLayout();
1200
1201 auto TryResolveConstant = [&ResolvedValues](Value *V) {
1202 auto It = ResolvedValues.find(Val: V);
1203 if (It != ResolvedValues.end())
1204 V = It->second;
1205 return dyn_cast<ConstantInt>(Val: V);
1206 };
1207
1208 Instruction *I = InitialInst;
1209 while (true) {
1210 if (isa<ReturnInst>(Val: I)) {
1211 assert(!cast<ReturnInst>(I)->getReturnValue());
1212 ReplaceInstWithInst(From: InitialInst, To: I->clone());
1213 return true;
1214 }
1215
1216 if (auto *BR = dyn_cast<BranchInst>(Val: I)) {
1217 unsigned SuccIndex = 0;
1218 if (BR->isConditional()) {
1219 // Handle the case the condition of the conditional branch is constant.
1220 // e.g.,
1221 //
1222 // br i1 false, label %cleanup, label %CoroEnd
1223 //
1224 // It is possible during the transformation. We could continue the
1225 // simplifying in this case.
1226 ConstantInt *Cond = TryResolveConstant(BR->getCondition());
1227 if (!Cond)
1228 return false;
1229
1230 SuccIndex = Cond->isOne() ? 0 : 1;
1231 }
1232
1233 BasicBlock *Succ = BR->getSuccessor(i: SuccIndex);
1234 scanPHIsAndUpdateValueMap(Prev: I, NewBlock: Succ, ResolvedValues);
1235 I = Succ->getFirstNonPHIOrDbgOrLifetime();
1236 continue;
1237 }
1238
1239 if (auto *Cmp = dyn_cast<CmpInst>(Val: I)) {
1240 // If the case number of suspended switch instruction is reduced to
1241 // 1, then it is simplified to CmpInst in llvm::ConstantFoldTerminator.
1242 // Try to constant fold it.
1243 ConstantInt *Cond0 = TryResolveConstant(Cmp->getOperand(i_nocapture: 0));
1244 ConstantInt *Cond1 = TryResolveConstant(Cmp->getOperand(i_nocapture: 1));
1245 if (Cond0 && Cond1) {
1246 ConstantInt *Result =
1247 dyn_cast_or_null<ConstantInt>(Val: ConstantFoldCompareInstOperands(
1248 Predicate: Cmp->getPredicate(), LHS: Cond0, RHS: Cond1, DL));
1249 if (Result) {
1250 ResolvedValues[Cmp] = Result;
1251 I = I->getNextNode();
1252 continue;
1253 }
1254 }
1255 }
1256
1257 if (auto *SI = dyn_cast<SwitchInst>(Val: I)) {
1258 ConstantInt *Cond = TryResolveConstant(SI->getCondition());
1259 if (!Cond)
1260 return false;
1261
1262 BasicBlock *Succ = SI->findCaseValue(C: Cond)->getCaseSuccessor();
1263 scanPHIsAndUpdateValueMap(Prev: I, NewBlock: Succ, ResolvedValues);
1264 I = Succ->getFirstNonPHIOrDbgOrLifetime();
1265 continue;
1266 }
1267
1268 if (I->isDebugOrPseudoInst() || I->isLifetimeStartOrEnd() ||
1269 wouldInstructionBeTriviallyDead(I)) {
1270 // We can skip instructions without side effects. If their values are
1271 // needed, we'll notice later, e.g. when hitting a conditional branch.
1272 I = I->getNextNode();
1273 continue;
1274 }
1275
1276 break;
1277 }
1278
1279 return false;
1280}
1281
1282// Check whether CI obeys the rules of musttail attribute.
1283static bool shouldBeMustTail(const CallInst &CI, const Function &F) {
1284 if (CI.isInlineAsm())
1285 return false;
1286
1287 // Match prototypes and calling conventions of resume function.
1288 FunctionType *CalleeTy = CI.getFunctionType();
1289 if (!CalleeTy->getReturnType()->isVoidTy() || (CalleeTy->getNumParams() != 1))
1290 return false;
1291
1292 Type *CalleeParmTy = CalleeTy->getParamType(i: 0);
1293 if (!CalleeParmTy->isPointerTy() ||
1294 (CalleeParmTy->getPointerAddressSpace() != 0))
1295 return false;
1296
1297 if (CI.getCallingConv() != F.getCallingConv())
1298 return false;
1299
1300 // CI should not has any ABI-impacting function attributes.
1301 static const Attribute::AttrKind ABIAttrs[] = {
1302 Attribute::StructRet, Attribute::ByVal, Attribute::InAlloca,
1303 Attribute::Preallocated, Attribute::InReg, Attribute::Returned,
1304 Attribute::SwiftSelf, Attribute::SwiftError};
1305 AttributeList Attrs = CI.getAttributes();
1306 for (auto AK : ABIAttrs)
1307 if (Attrs.hasParamAttr(0, AK))
1308 return false;
1309
1310 return true;
1311}
1312
1313// Coroutine has no suspend points. Remove heap allocation for the coroutine
1314// frame if possible.
1315static void handleNoSuspendCoroutine(coro::Shape &Shape) {
1316 auto *CoroBegin = Shape.CoroBegin;
1317 auto *CoroId = CoroBegin->getId();
1318 auto *AllocInst = CoroId->getCoroAlloc();
1319 switch (Shape.ABI) {
1320 case coro::ABI::Switch: {
1321 auto SwitchId = cast<CoroIdInst>(Val: CoroId);
1322 coro::replaceCoroFree(CoroId: SwitchId, /*Elide=*/AllocInst != nullptr);
1323 if (AllocInst) {
1324 IRBuilder<> Builder(AllocInst);
1325 auto *Frame = Builder.CreateAlloca(Ty: Shape.FrameTy);
1326 Frame->setAlignment(Shape.FrameAlign);
1327 AllocInst->replaceAllUsesWith(V: Builder.getFalse());
1328 AllocInst->eraseFromParent();
1329 CoroBegin->replaceAllUsesWith(V: Frame);
1330 } else {
1331 CoroBegin->replaceAllUsesWith(V: CoroBegin->getMem());
1332 }
1333
1334 break;
1335 }
1336 case coro::ABI::Async:
1337 case coro::ABI::Retcon:
1338 case coro::ABI::RetconOnce:
1339 CoroBegin->replaceAllUsesWith(V: UndefValue::get(T: CoroBegin->getType()));
1340 break;
1341 }
1342
1343 CoroBegin->eraseFromParent();
1344}
1345
1346// SimplifySuspendPoint needs to check that there is no calls between
1347// coro_save and coro_suspend, since any of the calls may potentially resume
1348// the coroutine and if that is the case we cannot eliminate the suspend point.
1349static bool hasCallsInBlockBetween(Instruction *From, Instruction *To) {
1350 for (Instruction *I = From; I != To; I = I->getNextNode()) {
1351 // Assume that no intrinsic can resume the coroutine.
1352 if (isa<IntrinsicInst>(Val: I))
1353 continue;
1354
1355 if (isa<CallBase>(Val: I))
1356 return true;
1357 }
1358 return false;
1359}
1360
1361static bool hasCallsInBlocksBetween(BasicBlock *SaveBB, BasicBlock *ResDesBB) {
1362 SmallPtrSet<BasicBlock *, 8> Set;
1363 SmallVector<BasicBlock *, 8> Worklist;
1364
1365 Set.insert(Ptr: SaveBB);
1366 Worklist.push_back(Elt: ResDesBB);
1367
1368 // Accumulate all blocks between SaveBB and ResDesBB. Because CoroSaveIntr
1369 // returns a token consumed by suspend instruction, all blocks in between
1370 // will have to eventually hit SaveBB when going backwards from ResDesBB.
1371 while (!Worklist.empty()) {
1372 auto *BB = Worklist.pop_back_val();
1373 Set.insert(Ptr: BB);
1374 for (auto *Pred : predecessors(BB))
1375 if (!Set.contains(Ptr: Pred))
1376 Worklist.push_back(Elt: Pred);
1377 }
1378
1379 // SaveBB and ResDesBB are checked separately in hasCallsBetween.
1380 Set.erase(Ptr: SaveBB);
1381 Set.erase(Ptr: ResDesBB);
1382
1383 for (auto *BB : Set)
1384 if (hasCallsInBlockBetween(From: BB->getFirstNonPHI(), To: nullptr))
1385 return true;
1386
1387 return false;
1388}
1389
1390static bool hasCallsBetween(Instruction *Save, Instruction *ResumeOrDestroy) {
1391 auto *SaveBB = Save->getParent();
1392 auto *ResumeOrDestroyBB = ResumeOrDestroy->getParent();
1393
1394 if (SaveBB == ResumeOrDestroyBB)
1395 return hasCallsInBlockBetween(From: Save->getNextNode(), To: ResumeOrDestroy);
1396
1397 // Any calls from Save to the end of the block?
1398 if (hasCallsInBlockBetween(From: Save->getNextNode(), To: nullptr))
1399 return true;
1400
1401 // Any calls from begging of the block up to ResumeOrDestroy?
1402 if (hasCallsInBlockBetween(From: ResumeOrDestroyBB->getFirstNonPHI(),
1403 To: ResumeOrDestroy))
1404 return true;
1405
1406 // Any calls in all of the blocks between SaveBB and ResumeOrDestroyBB?
1407 if (hasCallsInBlocksBetween(SaveBB, ResDesBB: ResumeOrDestroyBB))
1408 return true;
1409
1410 return false;
1411}
1412
1413// If a SuspendIntrin is preceded by Resume or Destroy, we can eliminate the
1414// suspend point and replace it with nornal control flow.
1415static bool simplifySuspendPoint(CoroSuspendInst *Suspend,
1416 CoroBeginInst *CoroBegin) {
1417 Instruction *Prev = Suspend->getPrevNode();
1418 if (!Prev) {
1419 auto *Pred = Suspend->getParent()->getSinglePredecessor();
1420 if (!Pred)
1421 return false;
1422 Prev = Pred->getTerminator();
1423 }
1424
1425 CallBase *CB = dyn_cast<CallBase>(Val: Prev);
1426 if (!CB)
1427 return false;
1428
1429 auto *Callee = CB->getCalledOperand()->stripPointerCasts();
1430
1431 // See if the callsite is for resumption or destruction of the coroutine.
1432 auto *SubFn = dyn_cast<CoroSubFnInst>(Val: Callee);
1433 if (!SubFn)
1434 return false;
1435
1436 // Does not refer to the current coroutine, we cannot do anything with it.
1437 if (SubFn->getFrame() != CoroBegin)
1438 return false;
1439
1440 // See if the transformation is safe. Specifically, see if there are any
1441 // calls in between Save and CallInstr. They can potenitally resume the
1442 // coroutine rendering this optimization unsafe.
1443 auto *Save = Suspend->getCoroSave();
1444 if (hasCallsBetween(Save, ResumeOrDestroy: CB))
1445 return false;
1446
1447 // Replace llvm.coro.suspend with the value that results in resumption over
1448 // the resume or cleanup path.
1449 Suspend->replaceAllUsesWith(V: SubFn->getRawIndex());
1450 Suspend->eraseFromParent();
1451 Save->eraseFromParent();
1452
1453 // No longer need a call to coro.resume or coro.destroy.
1454 if (auto *Invoke = dyn_cast<InvokeInst>(Val: CB)) {
1455 BranchInst::Create(IfTrue: Invoke->getNormalDest(), InsertBefore: Invoke->getIterator());
1456 }
1457
1458 // Grab the CalledValue from CB before erasing the CallInstr.
1459 auto *CalledValue = CB->getCalledOperand();
1460 CB->eraseFromParent();
1461
1462 // If no more users remove it. Usually it is a bitcast of SubFn.
1463 if (CalledValue != SubFn && CalledValue->user_empty())
1464 if (auto *I = dyn_cast<Instruction>(Val: CalledValue))
1465 I->eraseFromParent();
1466
1467 // Now we are good to remove SubFn.
1468 if (SubFn->user_empty())
1469 SubFn->eraseFromParent();
1470
1471 return true;
1472}
1473
1474// Remove suspend points that are simplified.
1475static void simplifySuspendPoints(coro::Shape &Shape) {
1476 // Currently, the only simplification we do is switch-lowering-specific.
1477 if (Shape.ABI != coro::ABI::Switch)
1478 return;
1479
1480 auto &S = Shape.CoroSuspends;
1481 size_t I = 0, N = S.size();
1482 if (N == 0)
1483 return;
1484
1485 size_t ChangedFinalIndex = std::numeric_limits<size_t>::max();
1486 while (true) {
1487 auto SI = cast<CoroSuspendInst>(Val: S[I]);
1488 // Leave final.suspend to handleFinalSuspend since it is undefined behavior
1489 // to resume a coroutine suspended at the final suspend point.
1490 if (!SI->isFinal() && simplifySuspendPoint(Suspend: SI, CoroBegin: Shape.CoroBegin)) {
1491 if (--N == I)
1492 break;
1493
1494 std::swap(a&: S[I], b&: S[N]);
1495
1496 if (cast<CoroSuspendInst>(Val: S[I])->isFinal()) {
1497 assert(Shape.SwitchLowering.HasFinalSuspend);
1498 ChangedFinalIndex = I;
1499 }
1500
1501 continue;
1502 }
1503 if (++I == N)
1504 break;
1505 }
1506 S.resize(N);
1507
1508 // Maintain final.suspend in case final suspend was swapped.
1509 // Due to we requrie the final suspend to be the last element of CoroSuspends.
1510 if (ChangedFinalIndex < N) {
1511 assert(cast<CoroSuspendInst>(S[ChangedFinalIndex])->isFinal());
1512 std::swap(a&: S[ChangedFinalIndex], b&: S.back());
1513 }
1514}
1515
1516namespace {
1517
1518struct SwitchCoroutineSplitter {
1519 static void split(Function &F, coro::Shape &Shape,
1520 SmallVectorImpl<Function *> &Clones,
1521 TargetTransformInfo &TTI) {
1522 assert(Shape.ABI == coro::ABI::Switch);
1523
1524 createResumeEntryBlock(F, Shape);
1525 auto *ResumeClone =
1526 createClone(F, Suffix: ".resume", Shape, FKind: CoroCloner::Kind::SwitchResume);
1527 auto *DestroyClone =
1528 createClone(F, Suffix: ".destroy", Shape, FKind: CoroCloner::Kind::SwitchUnwind);
1529 auto *CleanupClone =
1530 createClone(F, Suffix: ".cleanup", Shape, FKind: CoroCloner::Kind::SwitchCleanup);
1531
1532 postSplitCleanup(F&: *ResumeClone);
1533 postSplitCleanup(F&: *DestroyClone);
1534 postSplitCleanup(F&: *CleanupClone);
1535
1536 // Adding musttail call to support symmetric transfer.
1537 // Skip targets which don't support tail call.
1538 //
1539 // FIXME: Could we support symmetric transfer effectively without musttail
1540 // call?
1541 if (TTI.supportsTailCalls())
1542 addMustTailToCoroResumes(F&: *ResumeClone, TTI);
1543
1544 // Store addresses resume/destroy/cleanup functions in the coroutine frame.
1545 updateCoroFrame(Shape, ResumeFn: ResumeClone, DestroyFn: DestroyClone, CleanupFn: CleanupClone);
1546
1547 assert(Clones.empty());
1548 Clones.push_back(Elt: ResumeClone);
1549 Clones.push_back(Elt: DestroyClone);
1550 Clones.push_back(Elt: CleanupClone);
1551
1552 // Create a constant array referring to resume/destroy/clone functions
1553 // pointed by the last argument of @llvm.coro.info, so that CoroElide pass
1554 // can determined correct function to call.
1555 setCoroInfo(F, Shape, Fns: Clones);
1556 }
1557
1558private:
1559 // Create a resume clone by cloning the body of the original function, setting
1560 // new entry block and replacing coro.suspend an appropriate value to force
1561 // resume or cleanup pass for every suspend point.
1562 static Function *createClone(Function &F, const Twine &Suffix,
1563 coro::Shape &Shape, CoroCloner::Kind FKind) {
1564 CoroCloner Cloner(F, Suffix, Shape, FKind);
1565 Cloner.create();
1566 return Cloner.getFunction();
1567 }
1568
1569 // Create an entry block for a resume function with a switch that will jump to
1570 // suspend points.
1571 static void createResumeEntryBlock(Function &F, coro::Shape &Shape) {
1572 LLVMContext &C = F.getContext();
1573
1574 // resume.entry:
1575 // %index.addr = getelementptr inbounds %f.Frame, %f.Frame* %FramePtr, i32
1576 // 0, i32 2 % index = load i32, i32* %index.addr switch i32 %index, label
1577 // %unreachable [
1578 // i32 0, label %resume.0
1579 // i32 1, label %resume.1
1580 // ...
1581 // ]
1582
1583 auto *NewEntry = BasicBlock::Create(Context&: C, Name: "resume.entry", Parent: &F);
1584 auto *UnreachBB = BasicBlock::Create(Context&: C, Name: "unreachable", Parent: &F);
1585
1586 IRBuilder<> Builder(NewEntry);
1587 auto *FramePtr = Shape.FramePtr;
1588 auto *FrameTy = Shape.FrameTy;
1589 auto *GepIndex = Builder.CreateStructGEP(
1590 Ty: FrameTy, Ptr: FramePtr, Idx: Shape.getSwitchIndexField(), Name: "index.addr");
1591 auto *Index = Builder.CreateLoad(Ty: Shape.getIndexType(), Ptr: GepIndex, Name: "index");
1592 auto *Switch =
1593 Builder.CreateSwitch(V: Index, Dest: UnreachBB, NumCases: Shape.CoroSuspends.size());
1594 Shape.SwitchLowering.ResumeSwitch = Switch;
1595
1596 size_t SuspendIndex = 0;
1597 for (auto *AnyS : Shape.CoroSuspends) {
1598 auto *S = cast<CoroSuspendInst>(Val: AnyS);
1599 ConstantInt *IndexVal = Shape.getIndex(Value: SuspendIndex);
1600
1601 // Replace CoroSave with a store to Index:
1602 // %index.addr = getelementptr %f.frame... (index field number)
1603 // store i32 %IndexVal, i32* %index.addr1
1604 auto *Save = S->getCoroSave();
1605 Builder.SetInsertPoint(Save);
1606 if (S->isFinal()) {
1607 // The coroutine should be marked done if it reaches the final suspend
1608 // point.
1609 markCoroutineAsDone(Builder, Shape, FramePtr);
1610 } else {
1611 auto *GepIndex = Builder.CreateStructGEP(
1612 Ty: FrameTy, Ptr: FramePtr, Idx: Shape.getSwitchIndexField(), Name: "index.addr");
1613 Builder.CreateStore(Val: IndexVal, Ptr: GepIndex);
1614 }
1615
1616 Save->replaceAllUsesWith(V: ConstantTokenNone::get(Context&: C));
1617 Save->eraseFromParent();
1618
1619 // Split block before and after coro.suspend and add a jump from an entry
1620 // switch:
1621 //
1622 // whateverBB:
1623 // whatever
1624 // %0 = call i8 @llvm.coro.suspend(token none, i1 false)
1625 // switch i8 %0, label %suspend[i8 0, label %resume
1626 // i8 1, label %cleanup]
1627 // becomes:
1628 //
1629 // whateverBB:
1630 // whatever
1631 // br label %resume.0.landing
1632 //
1633 // resume.0: ; <--- jump from the switch in the resume.entry
1634 // %0 = tail call i8 @llvm.coro.suspend(token none, i1 false)
1635 // br label %resume.0.landing
1636 //
1637 // resume.0.landing:
1638 // %1 = phi i8[-1, %whateverBB], [%0, %resume.0]
1639 // switch i8 % 1, label %suspend [i8 0, label %resume
1640 // i8 1, label %cleanup]
1641
1642 auto *SuspendBB = S->getParent();
1643 auto *ResumeBB =
1644 SuspendBB->splitBasicBlock(I: S, BBName: "resume." + Twine(SuspendIndex));
1645 auto *LandingBB = ResumeBB->splitBasicBlock(
1646 I: S->getNextNode(), BBName: ResumeBB->getName() + Twine(".landing"));
1647 Switch->addCase(OnVal: IndexVal, Dest: ResumeBB);
1648
1649 cast<BranchInst>(Val: SuspendBB->getTerminator())->setSuccessor(idx: 0, NewSucc: LandingBB);
1650 auto *PN = PHINode::Create(Ty: Builder.getInt8Ty(), NumReservedValues: 2, NameStr: "");
1651 PN->insertBefore(InsertPos: LandingBB->begin());
1652 S->replaceAllUsesWith(V: PN);
1653 PN->addIncoming(V: Builder.getInt8(C: -1), BB: SuspendBB);
1654 PN->addIncoming(V: S, BB: ResumeBB);
1655
1656 ++SuspendIndex;
1657 }
1658
1659 Builder.SetInsertPoint(UnreachBB);
1660 Builder.CreateUnreachable();
1661
1662 Shape.SwitchLowering.ResumeEntryBlock = NewEntry;
1663 }
1664
1665 // Add musttail to any resume instructions that is immediately followed by a
1666 // suspend (i.e. ret). We do this even in -O0 to support guaranteed tail call
1667 // for symmetrical coroutine control transfer (C++ Coroutines TS extension).
1668 // This transformation is done only in the resume part of the coroutine that
1669 // has identical signature and calling convention as the coro.resume call.
1670 static void addMustTailToCoroResumes(Function &F, TargetTransformInfo &TTI) {
1671 bool Changed = false;
1672
1673 // Collect potential resume instructions.
1674 SmallVector<CallInst *, 4> Resumes;
1675 for (auto &I : instructions(F))
1676 if (auto *Call = dyn_cast<CallInst>(Val: &I))
1677 if (shouldBeMustTail(CI: *Call, F))
1678 Resumes.push_back(Elt: Call);
1679
1680 // Set musttail on those that are followed by a ret instruction.
1681 for (CallInst *Call : Resumes)
1682 // Skip targets which don't support tail call on the specific case.
1683 if (TTI.supportsTailCallFor(CB: Call) &&
1684 simplifyTerminatorLeadingToRet(InitialInst: Call->getNextNode())) {
1685 Call->setTailCallKind(CallInst::TCK_MustTail);
1686 Changed = true;
1687 }
1688
1689 if (Changed)
1690 removeUnreachableBlocks(F);
1691 }
1692
1693 // Store addresses of Resume/Destroy/Cleanup functions in the coroutine frame.
1694 static void updateCoroFrame(coro::Shape &Shape, Function *ResumeFn,
1695 Function *DestroyFn, Function *CleanupFn) {
1696 IRBuilder<> Builder(&*Shape.getInsertPtAfterFramePtr());
1697
1698 auto *ResumeAddr = Builder.CreateStructGEP(
1699 Ty: Shape.FrameTy, Ptr: Shape.FramePtr, Idx: coro::Shape::SwitchFieldIndex::Resume,
1700 Name: "resume.addr");
1701 Builder.CreateStore(Val: ResumeFn, Ptr: ResumeAddr);
1702
1703 Value *DestroyOrCleanupFn = DestroyFn;
1704
1705 CoroIdInst *CoroId = Shape.getSwitchCoroId();
1706 if (CoroAllocInst *CA = CoroId->getCoroAlloc()) {
1707 // If there is a CoroAlloc and it returns false (meaning we elide the
1708 // allocation, use CleanupFn instead of DestroyFn).
1709 DestroyOrCleanupFn = Builder.CreateSelect(C: CA, True: DestroyFn, False: CleanupFn);
1710 }
1711
1712 auto *DestroyAddr = Builder.CreateStructGEP(
1713 Ty: Shape.FrameTy, Ptr: Shape.FramePtr, Idx: coro::Shape::SwitchFieldIndex::Destroy,
1714 Name: "destroy.addr");
1715 Builder.CreateStore(Val: DestroyOrCleanupFn, Ptr: DestroyAddr);
1716 }
1717
1718 // Create a global constant array containing pointers to functions provided
1719 // and set Info parameter of CoroBegin to point at this constant. Example:
1720 //
1721 // @f.resumers = internal constant [2 x void(%f.frame*)*]
1722 // [void(%f.frame*)* @f.resume, void(%f.frame*)*
1723 // @f.destroy]
1724 // define void @f() {
1725 // ...
1726 // call i8* @llvm.coro.begin(i8* null, i32 0, i8* null,
1727 // i8* bitcast([2 x void(%f.frame*)*] * @f.resumers to
1728 // i8*))
1729 //
1730 // Assumes that all the functions have the same signature.
1731 static void setCoroInfo(Function &F, coro::Shape &Shape,
1732 ArrayRef<Function *> Fns) {
1733 // This only works under the switch-lowering ABI because coro elision
1734 // only works on the switch-lowering ABI.
1735 SmallVector<Constant *, 4> Args(Fns.begin(), Fns.end());
1736 assert(!Args.empty());
1737 Function *Part = *Fns.begin();
1738 Module *M = Part->getParent();
1739 auto *ArrTy = ArrayType::get(ElementType: Part->getType(), NumElements: Args.size());
1740
1741 auto *ConstVal = ConstantArray::get(T: ArrTy, V: Args);
1742 auto *GV = new GlobalVariable(*M, ConstVal->getType(), /*isConstant=*/true,
1743 GlobalVariable::PrivateLinkage, ConstVal,
1744 F.getName() + Twine(".resumers"));
1745
1746 // Update coro.begin instruction to refer to this constant.
1747 LLVMContext &C = F.getContext();
1748 auto *BC = ConstantExpr::getPointerCast(C: GV, Ty: PointerType::getUnqual(C));
1749 Shape.getSwitchCoroId()->setInfo(BC);
1750 }
1751};
1752
1753} // namespace
1754
1755static void replaceAsyncResumeFunction(CoroSuspendAsyncInst *Suspend,
1756 Value *Continuation) {
1757 auto *ResumeIntrinsic = Suspend->getResumeFunction();
1758 auto &Context = Suspend->getParent()->getParent()->getContext();
1759 auto *Int8PtrTy = PointerType::getUnqual(C&: Context);
1760
1761 IRBuilder<> Builder(ResumeIntrinsic);
1762 auto *Val = Builder.CreateBitOrPointerCast(V: Continuation, DestTy: Int8PtrTy);
1763 ResumeIntrinsic->replaceAllUsesWith(V: Val);
1764 ResumeIntrinsic->eraseFromParent();
1765 Suspend->setOperand(i_nocapture: CoroSuspendAsyncInst::ResumeFunctionArg,
1766 Val_nocapture: UndefValue::get(T: Int8PtrTy));
1767}
1768
1769/// Coerce the arguments in \p FnArgs according to \p FnTy in \p CallArgs.
1770static void coerceArguments(IRBuilder<> &Builder, FunctionType *FnTy,
1771 ArrayRef<Value *> FnArgs,
1772 SmallVectorImpl<Value *> &CallArgs) {
1773 size_t ArgIdx = 0;
1774 for (auto *paramTy : FnTy->params()) {
1775 assert(ArgIdx < FnArgs.size());
1776 if (paramTy != FnArgs[ArgIdx]->getType())
1777 CallArgs.push_back(
1778 Elt: Builder.CreateBitOrPointerCast(V: FnArgs[ArgIdx], DestTy: paramTy));
1779 else
1780 CallArgs.push_back(Elt: FnArgs[ArgIdx]);
1781 ++ArgIdx;
1782 }
1783}
1784
1785CallInst *coro::createMustTailCall(DebugLoc Loc, Function *MustTailCallFn,
1786 TargetTransformInfo &TTI,
1787 ArrayRef<Value *> Arguments,
1788 IRBuilder<> &Builder) {
1789 auto *FnTy = MustTailCallFn->getFunctionType();
1790 // Coerce the arguments, llvm optimizations seem to ignore the types in
1791 // vaarg functions and throws away casts in optimized mode.
1792 SmallVector<Value *, 8> CallArgs;
1793 coerceArguments(Builder, FnTy, FnArgs: Arguments, CallArgs);
1794
1795 auto *TailCall = Builder.CreateCall(FTy: FnTy, Callee: MustTailCallFn, Args: CallArgs);
1796 // Skip targets which don't support tail call.
1797 if (TTI.supportsTailCallFor(CB: TailCall)) {
1798 TailCall->setTailCallKind(CallInst::TCK_MustTail);
1799 }
1800 TailCall->setDebugLoc(Loc);
1801 TailCall->setCallingConv(MustTailCallFn->getCallingConv());
1802 return TailCall;
1803}
1804
1805static void splitAsyncCoroutine(Function &F, coro::Shape &Shape,
1806 SmallVectorImpl<Function *> &Clones,
1807 TargetTransformInfo &TTI) {
1808 assert(Shape.ABI == coro::ABI::Async);
1809 assert(Clones.empty());
1810 // Reset various things that the optimizer might have decided it
1811 // "knows" about the coroutine function due to not seeing a return.
1812 F.removeFnAttr(Attribute::NoReturn);
1813 F.removeRetAttr(Attribute::NoAlias);
1814 F.removeRetAttr(Attribute::NonNull);
1815
1816 auto &Context = F.getContext();
1817 auto *Int8PtrTy = PointerType::getUnqual(C&: Context);
1818
1819 auto *Id = cast<CoroIdAsyncInst>(Val: Shape.CoroBegin->getId());
1820 IRBuilder<> Builder(Id);
1821
1822 auto *FramePtr = Id->getStorage();
1823 FramePtr = Builder.CreateBitOrPointerCast(V: FramePtr, DestTy: Int8PtrTy);
1824 FramePtr = Builder.CreateConstInBoundsGEP1_32(
1825 Ty: Type::getInt8Ty(C&: Context), Ptr: FramePtr, Idx0: Shape.AsyncLowering.FrameOffset,
1826 Name: "async.ctx.frameptr");
1827
1828 // Map all uses of llvm.coro.begin to the allocated frame pointer.
1829 {
1830 // Make sure we don't invalidate Shape.FramePtr.
1831 TrackingVH<Value> Handle(Shape.FramePtr);
1832 Shape.CoroBegin->replaceAllUsesWith(V: FramePtr);
1833 Shape.FramePtr = Handle.getValPtr();
1834 }
1835
1836 // Create all the functions in order after the main function.
1837 auto NextF = std::next(x: F.getIterator());
1838
1839 // Create a continuation function for each of the suspend points.
1840 Clones.reserve(N: Shape.CoroSuspends.size());
1841 for (size_t Idx = 0, End = Shape.CoroSuspends.size(); Idx != End; ++Idx) {
1842 auto *Suspend = cast<CoroSuspendAsyncInst>(Val: Shape.CoroSuspends[Idx]);
1843
1844 // Create the clone declaration.
1845 auto ResumeNameSuffix = ".resume.";
1846 auto ProjectionFunctionName =
1847 Suspend->getAsyncContextProjectionFunction()->getName();
1848 bool UseSwiftMangling = false;
1849 if (ProjectionFunctionName.equals(RHS: "__swift_async_resume_project_context")) {
1850 ResumeNameSuffix = "TQ";
1851 UseSwiftMangling = true;
1852 } else if (ProjectionFunctionName.equals(
1853 RHS: "__swift_async_resume_get_context")) {
1854 ResumeNameSuffix = "TY";
1855 UseSwiftMangling = true;
1856 }
1857 auto *Continuation = createCloneDeclaration(
1858 OrigF&: F, Shape,
1859 Suffix: UseSwiftMangling ? ResumeNameSuffix + Twine(Idx) + "_"
1860 : ResumeNameSuffix + Twine(Idx),
1861 InsertBefore: NextF, ActiveSuspend: Suspend);
1862 Clones.push_back(Elt: Continuation);
1863
1864 // Insert a branch to a new return block immediately before the suspend
1865 // point.
1866 auto *SuspendBB = Suspend->getParent();
1867 auto *NewSuspendBB = SuspendBB->splitBasicBlock(I: Suspend);
1868 auto *Branch = cast<BranchInst>(Val: SuspendBB->getTerminator());
1869
1870 // Place it before the first suspend.
1871 auto *ReturnBB =
1872 BasicBlock::Create(Context&: F.getContext(), Name: "coro.return", Parent: &F, InsertBefore: NewSuspendBB);
1873 Branch->setSuccessor(idx: 0, NewSucc: ReturnBB);
1874
1875 IRBuilder<> Builder(ReturnBB);
1876
1877 // Insert the call to the tail call function and inline it.
1878 auto *Fn = Suspend->getMustTailCallFunction();
1879 SmallVector<Value *, 8> Args(Suspend->args());
1880 auto FnArgs = ArrayRef<Value *>(Args).drop_front(
1881 N: CoroSuspendAsyncInst::MustTailCallFuncArg + 1);
1882 auto *TailCall = coro::createMustTailCall(Loc: Suspend->getDebugLoc(), MustTailCallFn: Fn, TTI,
1883 Arguments: FnArgs, Builder);
1884 Builder.CreateRetVoid();
1885 InlineFunctionInfo FnInfo;
1886 (void)InlineFunction(CB&: *TailCall, IFI&: FnInfo);
1887
1888 // Replace the lvm.coro.async.resume intrisic call.
1889 replaceAsyncResumeFunction(Suspend, Continuation);
1890 }
1891
1892 assert(Clones.size() == Shape.CoroSuspends.size());
1893 for (size_t Idx = 0, End = Shape.CoroSuspends.size(); Idx != End; ++Idx) {
1894 auto *Suspend = Shape.CoroSuspends[Idx];
1895 auto *Clone = Clones[Idx];
1896
1897 CoroCloner(F, "resume." + Twine(Idx), Shape, Clone, Suspend).create();
1898 }
1899}
1900
1901static void splitRetconCoroutine(Function &F, coro::Shape &Shape,
1902 SmallVectorImpl<Function *> &Clones) {
1903 assert(Shape.ABI == coro::ABI::Retcon || Shape.ABI == coro::ABI::RetconOnce);
1904 assert(Clones.empty());
1905
1906 // Reset various things that the optimizer might have decided it
1907 // "knows" about the coroutine function due to not seeing a return.
1908 F.removeFnAttr(Attribute::NoReturn);
1909 F.removeRetAttr(Attribute::NoAlias);
1910 F.removeRetAttr(Attribute::NonNull);
1911
1912 // Allocate the frame.
1913 auto *Id = cast<AnyCoroIdRetconInst>(Val: Shape.CoroBegin->getId());
1914 Value *RawFramePtr;
1915 if (Shape.RetconLowering.IsFrameInlineInStorage) {
1916 RawFramePtr = Id->getStorage();
1917 } else {
1918 IRBuilder<> Builder(Id);
1919
1920 // Determine the size of the frame.
1921 const DataLayout &DL = F.getParent()->getDataLayout();
1922 auto Size = DL.getTypeAllocSize(Ty: Shape.FrameTy);
1923
1924 // Allocate. We don't need to update the call graph node because we're
1925 // going to recompute it from scratch after splitting.
1926 // FIXME: pass the required alignment
1927 RawFramePtr = Shape.emitAlloc(Builder, Size: Builder.getInt64(C: Size), CG: nullptr);
1928 RawFramePtr =
1929 Builder.CreateBitCast(V: RawFramePtr, DestTy: Shape.CoroBegin->getType());
1930
1931 // Stash the allocated frame pointer in the continuation storage.
1932 Builder.CreateStore(Val: RawFramePtr, Ptr: Id->getStorage());
1933 }
1934
1935 // Map all uses of llvm.coro.begin to the allocated frame pointer.
1936 {
1937 // Make sure we don't invalidate Shape.FramePtr.
1938 TrackingVH<Value> Handle(Shape.FramePtr);
1939 Shape.CoroBegin->replaceAllUsesWith(V: RawFramePtr);
1940 Shape.FramePtr = Handle.getValPtr();
1941 }
1942
1943 // Create a unique return block.
1944 BasicBlock *ReturnBB = nullptr;
1945 SmallVector<PHINode *, 4> ReturnPHIs;
1946
1947 // Create all the functions in order after the main function.
1948 auto NextF = std::next(x: F.getIterator());
1949
1950 // Create a continuation function for each of the suspend points.
1951 Clones.reserve(N: Shape.CoroSuspends.size());
1952 for (size_t i = 0, e = Shape.CoroSuspends.size(); i != e; ++i) {
1953 auto Suspend = cast<CoroSuspendRetconInst>(Val: Shape.CoroSuspends[i]);
1954
1955 // Create the clone declaration.
1956 auto Continuation =
1957 createCloneDeclaration(OrigF&: F, Shape, Suffix: ".resume." + Twine(i), InsertBefore: NextF, ActiveSuspend: nullptr);
1958 Clones.push_back(Elt: Continuation);
1959
1960 // Insert a branch to the unified return block immediately before
1961 // the suspend point.
1962 auto SuspendBB = Suspend->getParent();
1963 auto NewSuspendBB = SuspendBB->splitBasicBlock(I: Suspend);
1964 auto Branch = cast<BranchInst>(Val: SuspendBB->getTerminator());
1965
1966 // Create the unified return block.
1967 if (!ReturnBB) {
1968 // Place it before the first suspend.
1969 ReturnBB =
1970 BasicBlock::Create(Context&: F.getContext(), Name: "coro.return", Parent: &F, InsertBefore: NewSuspendBB);
1971 Shape.RetconLowering.ReturnBlock = ReturnBB;
1972
1973 IRBuilder<> Builder(ReturnBB);
1974
1975 // Create PHIs for all the return values.
1976 assert(ReturnPHIs.empty());
1977
1978 // First, the continuation.
1979 ReturnPHIs.push_back(Elt: Builder.CreatePHI(Ty: Continuation->getType(),
1980 NumReservedValues: Shape.CoroSuspends.size()));
1981
1982 // Next, all the directly-yielded values.
1983 for (auto *ResultTy : Shape.getRetconResultTypes())
1984 ReturnPHIs.push_back(
1985 Elt: Builder.CreatePHI(Ty: ResultTy, NumReservedValues: Shape.CoroSuspends.size()));
1986
1987 // Build the return value.
1988 auto RetTy = F.getReturnType();
1989
1990 // Cast the continuation value if necessary.
1991 // We can't rely on the types matching up because that type would
1992 // have to be infinite.
1993 auto CastedContinuationTy =
1994 (ReturnPHIs.size() == 1 ? RetTy : RetTy->getStructElementType(N: 0));
1995 auto *CastedContinuation =
1996 Builder.CreateBitCast(V: ReturnPHIs[0], DestTy: CastedContinuationTy);
1997
1998 Value *RetV;
1999 if (ReturnPHIs.size() == 1) {
2000 RetV = CastedContinuation;
2001 } else {
2002 RetV = PoisonValue::get(T: RetTy);
2003 RetV = Builder.CreateInsertValue(Agg: RetV, Val: CastedContinuation, Idxs: 0);
2004 for (size_t I = 1, E = ReturnPHIs.size(); I != E; ++I)
2005 RetV = Builder.CreateInsertValue(Agg: RetV, Val: ReturnPHIs[I], Idxs: I);
2006 }
2007
2008 Builder.CreateRet(V: RetV);
2009 }
2010
2011 // Branch to the return block.
2012 Branch->setSuccessor(idx: 0, NewSucc: ReturnBB);
2013 ReturnPHIs[0]->addIncoming(V: Continuation, BB: SuspendBB);
2014 size_t NextPHIIndex = 1;
2015 for (auto &VUse : Suspend->value_operands())
2016 ReturnPHIs[NextPHIIndex++]->addIncoming(V: &*VUse, BB: SuspendBB);
2017 assert(NextPHIIndex == ReturnPHIs.size());
2018 }
2019
2020 assert(Clones.size() == Shape.CoroSuspends.size());
2021 for (size_t i = 0, e = Shape.CoroSuspends.size(); i != e; ++i) {
2022 auto Suspend = Shape.CoroSuspends[i];
2023 auto Clone = Clones[i];
2024
2025 CoroCloner(F, "resume." + Twine(i), Shape, Clone, Suspend).create();
2026 }
2027}
2028
2029namespace {
2030class PrettyStackTraceFunction : public PrettyStackTraceEntry {
2031 Function &F;
2032
2033public:
2034 PrettyStackTraceFunction(Function &F) : F(F) {}
2035 void print(raw_ostream &OS) const override {
2036 OS << "While splitting coroutine ";
2037 F.printAsOperand(O&: OS, /*print type*/ PrintType: false, M: F.getParent());
2038 OS << "\n";
2039 }
2040};
2041} // namespace
2042
2043static coro::Shape
2044splitCoroutine(Function &F, SmallVectorImpl<Function *> &Clones,
2045 TargetTransformInfo &TTI, bool OptimizeFrame,
2046 std::function<bool(Instruction &)> MaterializableCallback) {
2047 PrettyStackTraceFunction prettyStackTrace(F);
2048
2049 // The suspend-crossing algorithm in buildCoroutineFrame get tripped
2050 // up by uses in unreachable blocks, so remove them as a first pass.
2051 removeUnreachableBlocks(F);
2052
2053 coro::Shape Shape(F, OptimizeFrame);
2054 if (!Shape.CoroBegin)
2055 return Shape;
2056
2057 lowerAwaitSuspends(F, Shape);
2058
2059 simplifySuspendPoints(Shape);
2060 buildCoroutineFrame(F, Shape, TTI, MaterializableCallback);
2061 replaceFrameSizeAndAlignment(Shape);
2062
2063 // If there are no suspend points, no split required, just remove
2064 // the allocation and deallocation blocks, they are not needed.
2065 if (Shape.CoroSuspends.empty()) {
2066 handleNoSuspendCoroutine(Shape);
2067 } else {
2068 switch (Shape.ABI) {
2069 case coro::ABI::Switch:
2070 SwitchCoroutineSplitter::split(F, Shape, Clones, TTI);
2071 break;
2072 case coro::ABI::Async:
2073 splitAsyncCoroutine(F, Shape, Clones, TTI);
2074 break;
2075 case coro::ABI::Retcon:
2076 case coro::ABI::RetconOnce:
2077 splitRetconCoroutine(F, Shape, Clones);
2078 break;
2079 }
2080 }
2081
2082 // Replace all the swifterror operations in the original function.
2083 // This invalidates SwiftErrorOps in the Shape.
2084 replaceSwiftErrorOps(F, Shape, VMap: nullptr);
2085
2086 // Salvage debug intrinsics that point into the coroutine frame in the
2087 // original function. The Cloner has already salvaged debug info in the new
2088 // coroutine funclets.
2089 SmallDenseMap<Argument *, AllocaInst *, 4> ArgToAllocaMap;
2090 auto [DbgInsts, DbgVariableRecords] = collectDbgVariableIntrinsics(F);
2091 for (auto *DDI : DbgInsts)
2092 coro::salvageDebugInfo(ArgToAllocaMap, DVI&: *DDI, OptimizeFrame: Shape.OptimizeFrame,
2093 IsEntryPoint: false /*UseEntryValue*/);
2094 for (DbgVariableRecord *DVR : DbgVariableRecords)
2095 coro::salvageDebugInfo(ArgToAllocaMap, DVR&: *DVR, OptimizeFrame: Shape.OptimizeFrame,
2096 UseEntryValue: false /*UseEntryValue*/);
2097 return Shape;
2098}
2099
2100/// Remove calls to llvm.coro.end in the original function.
2101static void removeCoroEnds(const coro::Shape &Shape) {
2102 for (auto *End : Shape.CoroEnds) {
2103 replaceCoroEnd(End, Shape, FramePtr: Shape.FramePtr, /*in resume*/ InResume: false, CG: nullptr);
2104 }
2105}
2106
2107static void updateCallGraphAfterCoroutineSplit(
2108 LazyCallGraph::Node &N, const coro::Shape &Shape,
2109 const SmallVectorImpl<Function *> &Clones, LazyCallGraph::SCC &C,
2110 LazyCallGraph &CG, CGSCCAnalysisManager &AM, CGSCCUpdateResult &UR,
2111 FunctionAnalysisManager &FAM) {
2112 if (!Shape.CoroBegin)
2113 return;
2114
2115 if (Shape.ABI != coro::ABI::Switch)
2116 removeCoroEnds(Shape);
2117 else {
2118 for (llvm::AnyCoroEndInst *End : Shape.CoroEnds) {
2119 auto &Context = End->getContext();
2120 End->replaceAllUsesWith(V: ConstantInt::getFalse(Context));
2121 End->eraseFromParent();
2122 }
2123 }
2124
2125 if (!Clones.empty()) {
2126 switch (Shape.ABI) {
2127 case coro::ABI::Switch:
2128 // Each clone in the Switch lowering is independent of the other clones.
2129 // Let the LazyCallGraph know about each one separately.
2130 for (Function *Clone : Clones)
2131 CG.addSplitFunction(OriginalFunction&: N.getFunction(), NewFunction&: *Clone);
2132 break;
2133 case coro::ABI::Async:
2134 case coro::ABI::Retcon:
2135 case coro::ABI::RetconOnce:
2136 // Each clone in the Async/Retcon lowering references of the other clones.
2137 // Let the LazyCallGraph know about all of them at once.
2138 if (!Clones.empty())
2139 CG.addSplitRefRecursiveFunctions(OriginalFunction&: N.getFunction(), NewFunctions: Clones);
2140 break;
2141 }
2142
2143 // Let the CGSCC infra handle the changes to the original function.
2144 updateCGAndAnalysisManagerForCGSCCPass(G&: CG, C, N, AM, UR, FAM);
2145 }
2146
2147 // Do some cleanup and let the CGSCC infra see if we've cleaned up any edges
2148 // to the split functions.
2149 postSplitCleanup(F&: N.getFunction());
2150 updateCGAndAnalysisManagerForFunctionPass(G&: CG, C, N, AM, UR, FAM);
2151}
2152
2153/// Replace a call to llvm.coro.prepare.retcon.
2154static void replacePrepare(CallInst *Prepare, LazyCallGraph &CG,
2155 LazyCallGraph::SCC &C) {
2156 auto CastFn = Prepare->getArgOperand(i: 0); // as an i8*
2157 auto Fn = CastFn->stripPointerCasts(); // as its original type
2158
2159 // Attempt to peephole this pattern:
2160 // %0 = bitcast [[TYPE]] @some_function to i8*
2161 // %1 = call @llvm.coro.prepare.retcon(i8* %0)
2162 // %2 = bitcast %1 to [[TYPE]]
2163 // ==>
2164 // %2 = @some_function
2165 for (Use &U : llvm::make_early_inc_range(Range: Prepare->uses())) {
2166 // Look for bitcasts back to the original function type.
2167 auto *Cast = dyn_cast<BitCastInst>(Val: U.getUser());
2168 if (!Cast || Cast->getType() != Fn->getType())
2169 continue;
2170
2171 // Replace and remove the cast.
2172 Cast->replaceAllUsesWith(V: Fn);
2173 Cast->eraseFromParent();
2174 }
2175
2176 // Replace any remaining uses with the function as an i8*.
2177 // This can never directly be a callee, so we don't need to update CG.
2178 Prepare->replaceAllUsesWith(V: CastFn);
2179 Prepare->eraseFromParent();
2180
2181 // Kill dead bitcasts.
2182 while (auto *Cast = dyn_cast<BitCastInst>(Val: CastFn)) {
2183 if (!Cast->use_empty())
2184 break;
2185 CastFn = Cast->getOperand(i_nocapture: 0);
2186 Cast->eraseFromParent();
2187 }
2188}
2189
2190static bool replaceAllPrepares(Function *PrepareFn, LazyCallGraph &CG,
2191 LazyCallGraph::SCC &C) {
2192 bool Changed = false;
2193 for (Use &P : llvm::make_early_inc_range(Range: PrepareFn->uses())) {
2194 // Intrinsics can only be used in calls.
2195 auto *Prepare = cast<CallInst>(Val: P.getUser());
2196 replacePrepare(Prepare, CG, C);
2197 Changed = true;
2198 }
2199
2200 return Changed;
2201}
2202
2203static void addPrepareFunction(const Module &M,
2204 SmallVectorImpl<Function *> &Fns,
2205 StringRef Name) {
2206 auto *PrepareFn = M.getFunction(Name);
2207 if (PrepareFn && !PrepareFn->use_empty())
2208 Fns.push_back(Elt: PrepareFn);
2209}
2210
2211CoroSplitPass::CoroSplitPass(bool OptimizeFrame)
2212 : MaterializableCallback(coro::defaultMaterializable),
2213 OptimizeFrame(OptimizeFrame) {}
2214
2215PreservedAnalyses CoroSplitPass::run(LazyCallGraph::SCC &C,
2216 CGSCCAnalysisManager &AM,
2217 LazyCallGraph &CG, CGSCCUpdateResult &UR) {
2218 // NB: One invariant of a valid LazyCallGraph::SCC is that it must contain a
2219 // non-zero number of nodes, so we assume that here and grab the first
2220 // node's function's module.
2221 Module &M = *C.begin()->getFunction().getParent();
2222 auto &FAM =
2223 AM.getResult<FunctionAnalysisManagerCGSCCProxy>(IR&: C, ExtraArgs&: CG).getManager();
2224
2225 // Check for uses of llvm.coro.prepare.retcon/async.
2226 SmallVector<Function *, 2> PrepareFns;
2227 addPrepareFunction(M, Fns&: PrepareFns, Name: "llvm.coro.prepare.retcon");
2228 addPrepareFunction(M, Fns&: PrepareFns, Name: "llvm.coro.prepare.async");
2229
2230 // Find coroutines for processing.
2231 SmallVector<LazyCallGraph::Node *> Coroutines;
2232 for (LazyCallGraph::Node &N : C)
2233 if (N.getFunction().isPresplitCoroutine())
2234 Coroutines.push_back(Elt: &N);
2235
2236 if (Coroutines.empty() && PrepareFns.empty())
2237 return PreservedAnalyses::all();
2238
2239 if (Coroutines.empty()) {
2240 for (auto *PrepareFn : PrepareFns) {
2241 replaceAllPrepares(PrepareFn, CG, C);
2242 }
2243 }
2244
2245 // Split all the coroutines.
2246 for (LazyCallGraph::Node *N : Coroutines) {
2247 Function &F = N->getFunction();
2248 LLVM_DEBUG(dbgs() << "CoroSplit: Processing coroutine '" << F.getName()
2249 << "\n");
2250 F.setSplittedCoroutine();
2251
2252 SmallVector<Function *, 4> Clones;
2253 auto &ORE = FAM.getResult<OptimizationRemarkEmitterAnalysis>(IR&: F);
2254 const coro::Shape Shape =
2255 splitCoroutine(F, Clones, TTI&: FAM.getResult<TargetIRAnalysis>(IR&: F),
2256 OptimizeFrame, MaterializableCallback);
2257 updateCallGraphAfterCoroutineSplit(N&: *N, Shape, Clones, C, CG, AM, UR, FAM);
2258
2259 ORE.emit(RemarkBuilder: [&]() {
2260 return OptimizationRemark(DEBUG_TYPE, "CoroSplit", &F)
2261 << "Split '" << ore::NV("function", F.getName())
2262 << "' (frame_size=" << ore::NV("frame_size", Shape.FrameSize)
2263 << ", align=" << ore::NV("align", Shape.FrameAlign.value()) << ")";
2264 });
2265
2266 if (!Shape.CoroSuspends.empty()) {
2267 // Run the CGSCC pipeline on the original and newly split functions.
2268 UR.CWorklist.insert(X: &C);
2269 for (Function *Clone : Clones)
2270 UR.CWorklist.insert(X: CG.lookupSCC(N&: CG.get(F&: *Clone)));
2271 }
2272 }
2273
2274 if (!PrepareFns.empty()) {
2275 for (auto *PrepareFn : PrepareFns) {
2276 replaceAllPrepares(PrepareFn, CG, C);
2277 }
2278 }
2279
2280 return PreservedAnalyses::none();
2281}
2282

source code of llvm/lib/Transforms/Coroutines/CoroSplit.cpp