CoroFrame.cpp source code [llvm/lib/Transforms/Coroutines/CoroFrame.cpp]

1	//===- CoroFrame.cpp - Builds and manipulates coroutine frame -------------===//
2	//
3	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4	// See https://llvm.org/LICENSE.txt for license information.
5	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6	//
7	//===----------------------------------------------------------------------===//
8	// This file contains classes used to discover if for a particular value
9	// there from sue to definition that crosses a suspend block.
10	//
11	// Using the information discovered we form a Coroutine Frame structure to
12	// contain those values. All uses of those values are replaced with appropriate
13	// GEP + load from the coroutine frame. At the point of the definition we spill
14	// the value into the coroutine frame.
15	//===----------------------------------------------------------------------===//
16
17	#include "CoroInternal.h"
18	#include "llvm/ADT/BitVector.h"
19	#include "llvm/ADT/PostOrderIterator.h"
20	#include "llvm/ADT/ScopeExit.h"
21	#include "llvm/ADT/SmallString.h"
22	#include "llvm/Analysis/PtrUseVisitor.h"
23	#include "llvm/Analysis/StackLifetime.h"
24	#include "llvm/Config/llvm-config.h"
25	#include "llvm/IR/CFG.h"
26	#include "llvm/IR/DIBuilder.h"
27	#include "llvm/IR/DebugInfo.h"
28	#include "llvm/IR/Dominators.h"
29	#include "llvm/IR/IRBuilder.h"
30	#include "llvm/IR/InstIterator.h"
31	#include "llvm/IR/IntrinsicInst.h"
32	#include "llvm/Support/Debug.h"
33	#include "llvm/Support/MathExtras.h"
34	#include "llvm/Support/OptimizedStructLayout.h"
35	#include "llvm/Support/circular_raw_ostream.h"
36	#include "llvm/Support/raw_ostream.h"
37	#include "llvm/Transforms/Utils/BasicBlockUtils.h"
38	#include "llvm/Transforms/Utils/Local.h"
39	#include "llvm/Transforms/Utils/PromoteMemToReg.h"
40	#include <algorithm>
41	#include <deque>
42	#include <optional>
43
44	using namespace llvm;
45
46	// The "coro-suspend-crossing" flag is very noisy. There is another debug type,
47	// "coro-frame", which results in leaner debug spew.
48	#define DEBUG_TYPE "coro-suspend-crossing"
49
50	enum { SmallVectorThreshold = `32` };
51
52	// Provides two way mapping between the blocks and numbers.
53	namespace {
54	class BlockToIndexMapping {
55	SmallVector<BasicBlock *, SmallVectorThreshold> V;
56
57	public:
58	size_t size() const { return V.size(); }
59
60	BlockToIndexMapping(Function &F) {
61	for (BasicBlock &BB : F)
62	V.push_back(Elt: &BB);
63	llvm::sort(C&: V);
64	}
65
66	size_t blockToIndex(BasicBlock const BB) const* {
67	auto *I = llvm::lower_bound(Range: V, Value&: BB);
68	assert(I != V.end() && *I == BB && "BasicBlockNumberng: Unknown block");
69	return I - V.begin();
70	}
71
72	BasicBlock indexToBlock(unsigned* Index) const { return V [Index]; }
73	};
74	} // end anonymous namespace
75
76	// The SuspendCrossingInfo maintains data that allows to answer a question
77	// whether given two BasicBlocks A and B there is a path from A to B that
78	// passes through a suspend point.
79	//
80	// For every basic block 'i' it maintains a BlockData that consists of:
81	// Consumes: a bit vector which contains a set of indices of blocks that can
82	// reach block 'i'. A block can trivially reach itself.
83	// Kills: a bit vector which contains a set of indices of blocks that can
84	// reach block 'i' but there is a path crossing a suspend point
85	// not repeating 'i' (path to 'i' without cycles containing 'i').
86	// Suspend: a boolean indicating whether block 'i' contains a suspend point.
87	// End: a boolean indicating whether block 'i' contains a coro.end intrinsic.
88	// KillLoop: There is a path from 'i' to 'i' not otherwise repeating 'i' that
89	// crosses a suspend point.
90	//
91	namespace {
92	class SuspendCrossingInfo {
93	BlockToIndexMapping Mapping;
94
95	struct BlockData {
96	BitVector Consumes;
97	BitVector Kills;
98	bool Suspend = false;
99	bool End = false;
100	bool KillLoop = false;
101	bool Changed = false;
102	};
103	SmallVector<BlockData, SmallVectorThreshold> Block;
104
105	iterator_range<pred_iterator> predecessors(BlockData const &BD) const {
106	BasicBlock *BB = Mapping.indexToBlock(Index: &BD - &Block [`0`]);
107	return llvm::predecessors(BB);
108	}
109
110	BlockData &getBlockData(BasicBlock *BB) {
111	return Block [Mapping.blockToIndex(BB)];
112	}
113
114	/// Compute the BlockData for the current function in one iteration.
115	/// Initialize - Whether this is the first iteration, we can optimize
116	/// the initial case a little bit by manual loop switch.
117	/// Returns whether the BlockData changes in this iteration.
118	template <bool Initialize = false>
119	bool computeBlockData(const ReversePostOrderTraversal<Function *> &RPOT);
120
121	public:
122	#if !defined(NDEBUG) \|\| defined(LLVM_ENABLE_DUMP)
123	void dump() const;
124	void dump(StringRef Label, BitVector const &BV) const;
125	#endif
126
127	SuspendCrossingInfo(Function &F, coro::Shape &Shape);
128
129	/// Returns true if there is a path from \p From to \p To crossing a suspend
130	/// point without crossing \p From a 2nd time.
131	bool hasPathCrossingSuspendPoint(BasicBlock From, BasicBlock To) const {
132	size_t const FromIndex = Mapping.blockToIndex(BB: From);
133	size_t const ToIndex = Mapping.blockToIndex(BB: To);
134	bool const Result = Block [ToIndex].Kills [FromIndex];
135	LLVM_DEBUG(dbgs() << From->getName() << " => " << To->getName()
136	<< " answer is " << Result << "\n");
137	return Result;
138	}
139
140	/// Returns true if there is a path from \p From to \p To crossing a suspend
141	/// point without crossing \p From a 2nd time. If \p From is the same as \p To
142	/// this will also check if there is a looping path crossing a suspend point.
143	bool hasPathOrLoopCrossingSuspendPoint(BasicBlock *From,
144	BasicBlock To) const* {
145	size_t const FromIndex = Mapping.blockToIndex(BB: From);
146	size_t const ToIndex = Mapping.blockToIndex(BB: To);
147	bool Result = Block [ToIndex].Kills [FromIndex] \|\|
148	(From == To && Block [ToIndex].KillLoop);
149	LLVM_DEBUG(dbgs() << From->getName() << " => " << To->getName()
150	<< " answer is " << Result << " (path or loop)\n");
151	return Result;
152	}
153
154	bool isDefinitionAcrossSuspend(BasicBlock DefBB, User U) const {
155	auto *I = cast<Instruction>(Val: U);
156
157	// We rewrote PHINodes, so that only the ones with exactly one incoming
158	// value need to be analyzed.
159	if (auto *PN = dyn_cast<PHINode>(Val: I))
160	if (PN->getNumIncomingValues() > `1`)
161	return false;
162
163	BasicBlock *UseBB = I->getParent();
164
165	// As a special case, treat uses by an llvm.coro.suspend.retcon or an
166	// llvm.coro.suspend.async as if they were uses in the suspend's single
167	// predecessor: the uses conceptually occur before the suspend.
168	if (isa<CoroSuspendRetconInst>(Val: I) \|\| isa<CoroSuspendAsyncInst>(Val: I)) {
169	UseBB = UseBB->getSinglePredecessor();
170	assert(UseBB && "should have split coro.suspend into its own block");
171	}
172
173	return hasPathCrossingSuspendPoint(From: DefBB, To: UseBB);
174	}
175
176	bool isDefinitionAcrossSuspend(Argument &A, User U) const* {
177	return isDefinitionAcrossSuspend(DefBB: &A.getParent()->getEntryBlock(), U);
178	}
179
180	bool isDefinitionAcrossSuspend(Instruction &I, User U) const* {
181	auto *DefBB = I.getParent();
182
183	// As a special case, treat values produced by an llvm.coro.suspend.*
184	// as if they were defined in the single successor: the uses
185	// conceptually occur after the suspend.
186	if (isa<AnyCoroSuspendInst>(Val: I)) {
187	DefBB = DefBB->getSingleSuccessor();
188	assert(DefBB && "should have split coro.suspend into its own block");
189	}
190
191	return isDefinitionAcrossSuspend(DefBB, U);
192	}
193
194	bool isDefinitionAcrossSuspend(Value &V, User U) const* {
195	if (auto *Arg = dyn_cast<Argument>(Val: &V))
196	return isDefinitionAcrossSuspend(A&: *Arg, U);
197	if (auto *Inst = dyn_cast<Instruction>(Val: &V))
198	return isDefinitionAcrossSuspend(I&: *Inst, U);
199
200	llvm_unreachable(
201	"Coroutine could only collect Argument and Instruction now.");
202	}
203	};
204	} // end anonymous namespace
205
206	#if !defined(NDEBUG) \|\| defined(LLVM_ENABLE_DUMP)
207	LLVM_DUMP_METHOD void SuspendCrossingInfo::dump(StringRef Label,
208	BitVector const &BV) const {
209	dbgs() << Label << ":";
210	for (size_t I = `0`, N = BV.size(); I < N; ++I)
211	if (BV [I])
212	dbgs() << " " << Mapping.indexToBlock(Index: I)->getName();
213	dbgs() << "\n";
214	}
215
216	LLVM_DUMP_METHOD void SuspendCrossingInfo::dump() const {
217	for (size_t I = `0`, N = Block.size(); I < N; ++I) {
218	BasicBlock *const B = Mapping.indexToBlock(Index: I);
219	dbgs() << B->getName() << ":\n";
220	dump(Label: " Consumes", BV: Block [I].Consumes);
221	dump(Label: " Kills", BV: Block [I].Kills);
222	}
223	dbgs() << "\n";
224	}
225	#endif
226
227	template <bool Initialize>
228	bool SuspendCrossingInfo::computeBlockData(
229	const ReversePostOrderTraversal<Function *> &RPOT) {
230	bool Changed = false;
231
232	for (const BasicBlock *BB : RPOT) {
233	auto BBNo = Mapping.blockToIndex(BB);
234	auto &B = Block [BBNo];
235
236	// We don't need to count the predecessors when initialization.
237	if constexpr (!Initialize)
238	// If all the predecessors of the current Block don't change,
239	// the BlockData for the current block must not change too.
240	if (all_of(predecessors(BD: B), [this](BasicBlock *BB) {
241	return !Block [Mapping.blockToIndex(BB)].Changed;
242	})) {
243	B.Changed = false;
244	continue;
245	}
246
247	// Saved Consumes and Kills bitsets so that it is easy to see
248	// if anything changed after propagation.
249	auto SavedConsumes = B.Consumes;
250	auto SavedKills = B.Kills;
251
252	for (BasicBlock *PI : predecessors(BD: B)) {
253	auto PrevNo = Mapping.blockToIndex(BB: PI);
254	auto &P = Block [PrevNo];
255
256	// Propagate Kills and Consumes from predecessors into B.
257	B.Consumes \|= P.Consumes;
258	B.Kills \|= P.Kills;
259
260	// If block P is a suspend block, it should propagate kills into block
261	// B for every block P consumes.
262	if (P.Suspend)
263	B.Kills \|= P.Consumes;
264	}
265
266	if (B.Suspend) {
267	// If block B is a suspend block, it should kill all of the blocks it
268	// consumes.
269	B.Kills \|= B.Consumes;
270	} else if (B.End) {
271	// If block B is an end block, it should not propagate kills as the
272	// blocks following coro.end() are reached during initial invocation
273	// of the coroutine while all the data are still available on the
274	// stack or in the registers.
275	B.Kills.reset();
276	} else {
277	// This is reached when B block it not Suspend nor coro.end and it
278	// need to make sure that it is not in the kill set.
279	B.KillLoop \|= B.Kills [BBNo];
280	B.Kills.reset(Idx: BBNo);
281	}
282
283	if constexpr (!Initialize) {
284	B.Changed = (B.Kills != SavedKills) \|\| (B.Consumes != SavedConsumes);
285	Changed \|= B.Changed;
286	}
287	}
288
289	return Changed;
290	}
291
292	SuspendCrossingInfo::SuspendCrossingInfo(Function &F, coro::Shape &Shape)
293	: Mapping (F) {
294	const size_t N = Mapping.size();
295	Block.resize(N);
296
297	// Initialize every block so that it consumes itself
298	for (size_t I = `0`; I < N; ++I) {
299	auto &B = Block [I];
300	B.Consumes.resize(N);
301	B.Kills.resize(N);
302	B.Consumes.set(I);
303	B.Changed = true;
304	}
305
306	// Mark all CoroEnd Blocks. We do not propagate Kills beyond coro.ends as
307	// the code beyond coro.end is reachable during initial invocation of the
308	// coroutine.
309	for (auto *CE : Shape.CoroEnds)
310	getBlockData(BB: CE->getParent()).End = true;
311
312	// Mark all suspend blocks and indicate that they kill everything they
313	// consume. Note, that crossing coro.save also requires a spill, as any code
314	// between coro.save and coro.suspend may resume the coroutine and all of the
315	// state needs to be saved by that time.
316	auto markSuspendBlock = [&](IntrinsicInst *BarrierInst) {
317	BasicBlock *SuspendBlock = BarrierInst->getParent();
318	auto &B = getBlockData(BB: SuspendBlock);
319	B.Suspend = true;
320	B.Kills \|= B.Consumes;
321	};
322	for (auto *CSI : Shape.CoroSuspends) {
323	markSuspendBlock (CSI);
324	if (auto *Save = CSI->getCoroSave())
325	markSuspendBlock (Save);
326	}
327
328	// It is considered to be faster to use RPO traversal for forward-edges
329	// dataflow analysis.
330	ReversePostOrderTraversal<Function *> RPOT(&F);
331	computeBlockData</Initialize=/true>(RPOT);
332	while (computeBlockData</Initialize/ false>(RPOT))
333	;
334
335	LLVM_DEBUG(dump());
336	}
337
338	namespace {
339
340	// RematGraph is used to construct a DAG for rematerializable instructions
341	// When the constructor is invoked with a candidate instruction (which is
342	// materializable) it builds a DAG of materializable instructions from that
343	// point.
344	// Typically, for each instruction identified as re-materializable across a
345	// suspend point, a RematGraph will be created.
346	struct RematGraph {
347	// Each RematNode in the graph contains the edges to instructions providing
348	// operands in the current node.
349	struct RematNode {
350	Instruction *Node;
351	SmallVector<RematNode *> Operands;
352	RematNode() = default;
353	RematNode(Instruction *V) : Node(V) {}
354	};
355
356	RematNode *EntryNode;
357	using RematNodeMap =
358	SmallMapVector<Instruction *, std::unique_ptr<RematNode>, `8`>;
359	RematNodeMap Remats;
360	const std::function<bool(Instruction &)> &MaterializableCallback;
361	SuspendCrossingInfo &Checker;
362
363	RematGraph(const std::function<bool(Instruction &)> &MaterializableCallback,
364	Instruction *I, SuspendCrossingInfo &Checker)
365	: MaterializableCallback(MaterializableCallback), Checker(Checker) {
366	std::unique_ptr<RematNode> FirstNode = std::make_unique<RematNode>(args&: I);
367	EntryNode = FirstNode.get();
368	std::deque<std::unique_ptr<RematNode>> WorkList;
369	addNode(NUPtr: std::move(FirstNode), WorkList, FirstUse: cast<User>(Val: I));
370	while (WorkList.size()) {
371	std::unique_ptr<RematNode> N = std::move(WorkList.front());
372	WorkList.pop_front();
373	addNode(NUPtr: std::move(N), WorkList, FirstUse: cast<User>(Val: I));
374	}
375	}
376
377	void addNode(std::unique_ptr<RematNode> NUPtr,
378	std::deque<std::unique_ptr<RematNode>> &WorkList,
379	User *FirstUse) {
380	RematNode *N = NUPtr.get();
381	if (Remats.count(Key: N->Node))
382	return;
383
384	// We haven't see this node yet - add to the list
385	Remats [N->Node] = std::move(NUPtr);
386	for (auto &Def : N->Node->operands()) {
387	Instruction *D = dyn_cast<Instruction>(Val: Def.get());
388	if (!D \|\| !MaterializableCallback (*D) \|\|
389	!Checker.isDefinitionAcrossSuspend(I&: *D, U: FirstUse))
390	continue;
391
392	if (Remats.count(Key: D)) {
393	// Already have this in the graph
394	N->Operands.push_back(Elt: Remats [D].get());
395	continue;
396	}
397
398	bool NoMatch = true;
399	for (auto &I : WorkList) {
400	if (I ->Node == D) {
401	NoMatch = false;
402	N->Operands.push_back(Elt: I.get());
403	break;
404	}
405	}
406	if (NoMatch) {
407	// Create a new node
408	std::unique_ptr<RematNode> ChildNode = std::make_unique<RematNode>(args&: D);
409	N->Operands.push_back(Elt: ChildNode.get());
410	WorkList.push_back(x: std::move(ChildNode));
411	}
412	}
413	}
414
415	#if !defined(NDEBUG) \|\| defined(LLVM_ENABLE_DUMP)
416	void dump() const {
417	dbgs() << "Entry (";
418	if (EntryNode->Node->getParent()->hasName())
419	dbgs() << EntryNode->Node->getParent()->getName();
420	else
421	EntryNode->Node->getParent()->printAsOperand(O&: dbgs(), PrintType: false);
422	dbgs() << ") : " << *EntryNode->Node << "\n";
423	for (auto &E : Remats) {
424	dbgs() << *(E.first) << "\n";
425	for (RematNode *U : E.second ->Operands)
426	dbgs() << " " << *U->Node << "\n";
427	}
428	}
429	#endif
430	};
431	} // end anonymous namespace
432
433	namespace llvm {
434
435	template <> struct GraphTraits<RematGraph *> {
436	using NodeRef = RematGraph::RematNode *;
437	using ChildIteratorType = RematGraph::RematNode **;
438
439	static NodeRef getEntryNode(RematGraph G) { return* G->EntryNode; }
440	static ChildIteratorType child_begin(NodeRef N) {
441	return N->Operands.begin();
442	}
443	static ChildIteratorType child_end(NodeRef N) { return N->Operands.end(); }
444	};
445
446	} // end namespace llvm
447
448	#undef DEBUG_TYPE // "coro-suspend-crossing"
449	#define DEBUG_TYPE "coro-frame"
450
451	namespace {
452	class FrameTypeBuilder;
453	// Mapping from the to-be-spilled value to all the users that need reload.
454	using SpillInfo = SmallMapVector<Value , SmallVector<Instruction , `2`>, `8`>;
455	struct AllocaInfo {
456	AllocaInst *Alloca;
457	DenseMap<Instruction *, std::optional<APInt>> Aliases;
458	bool MayWriteBeforeCoroBegin;
459	AllocaInfo(AllocaInst *Alloca,
460	DenseMap<Instruction *, std::optional<APInt>> Aliases,
461	bool MayWriteBeforeCoroBegin)
462	: Alloca(Alloca), Aliases (std::move(Aliases)),
463	MayWriteBeforeCoroBegin(MayWriteBeforeCoroBegin) {}
464	};
465	struct FrameDataInfo {
466	// All the values (that are not allocas) that needs to be spilled to the
467	// frame.
468	SpillInfo Spills;
469	// Allocas contains all values defined as allocas that need to live in the
470	// frame.
471	SmallVector<AllocaInfo, `8`> Allocas;
472
473	SmallVector<Value , `8`> getAllDefs() const* {
474	SmallVector<Value *, `8`> Defs;
475	for (const auto &P : Spills)
476	Defs.push_back(Elt: P.first);
477	for (const auto &A : Allocas)
478	Defs.push_back(Elt: A.Alloca);
479	return Defs;
480	}
481
482	uint32_t getFieldIndex(Value V) const* {
483	auto Itr = FieldIndexMap.find(Val: V);
484	assert(Itr != FieldIndexMap.end() &&
485	"Value does not have a frame field index");
486	return Itr ->second;
487	}
488
489	void setFieldIndex(Value *V, uint32_t Index) {
490	assert((LayoutIndexUpdateStarted \|\| FieldIndexMap.count(V) == `0`) &&
491	"Cannot set the index for the same field twice.");
492	FieldIndexMap [V] = Index;
493	}
494
495	Align getAlign(Value V) const* {
496	auto Iter = FieldAlignMap.find(Val: V);
497	assert(Iter != FieldAlignMap.end());
498	return Iter ->second;
499	}
500
501	void setAlign(Value *V, Align AL) {
502	assert(FieldAlignMap.count(V) == `0`);
503	FieldAlignMap.insert(KV: {V, AL});
504	}
505
506	uint64_t getDynamicAlign(Value V) const* {
507	auto Iter = FieldDynamicAlignMap.find(Val: V);
508	assert(Iter != FieldDynamicAlignMap.end());
509	return Iter ->second;
510	}
511
512	void setDynamicAlign(Value *V, uint64_t Align) {
513	assert(FieldDynamicAlignMap.count(V) == `0`);
514	FieldDynamicAlignMap.insert(KV: {V, Align});
515	}
516
517	uint64_t getOffset(Value V) const* {
518	auto Iter = FieldOffsetMap.find(Val: V);
519	assert(Iter != FieldOffsetMap.end());
520	return Iter ->second;
521	}
522
523	void setOffset(Value *V, uint64_t Offset) {
524	assert(FieldOffsetMap.count(V) == `0`);
525	FieldOffsetMap.insert(KV: {V, Offset});
526	}
527
528	// Remap the index of every field in the frame, using the final layout index.
529	void updateLayoutIndex(FrameTypeBuilder &B);
530
531	private:
532	// LayoutIndexUpdateStarted is used to avoid updating the index of any field
533	// twice by mistake.
534	bool LayoutIndexUpdateStarted = false;
535	// Map from values to their slot indexes on the frame. They will be first set
536	// with their original insertion field index. After the frame is built, their
537	// indexes will be updated into the final layout index.
538	DenseMap<Value *, uint32_t> FieldIndexMap;
539	// Map from values to their alignment on the frame. They would be set after
540	// the frame is built.
541	DenseMap<Value *, Align> FieldAlignMap;
542	DenseMap<Value *, uint64_t> FieldDynamicAlignMap;
543	// Map from values to their offset on the frame. They would be set after
544	// the frame is built.
545	DenseMap<Value *, uint64_t> FieldOffsetMap;
546	};
547	} // namespace
548
549	#ifndef NDEBUG
550	static void dumpSpills(StringRef Title, const SpillInfo &Spills) {
551	dbgs() << "------------- " << Title << "--------------\n";
552	for (const auto &E : Spills) {
553	E.first->dump();
554	dbgs() << " user: ";
555	for (auto *I : E.second)
556	I->dump();
557	}
558	}
559	static void dumpRemats(
560	StringRef Title,
561	const SmallMapVector<Instruction *, std::unique_ptr<RematGraph>, `8`> &RM) {
562	dbgs() << "------------- " << Title << "--------------\n";
563	for (const auto &E : RM) {
564	E.second ->dump();
565	dbgs() << "--\n";
566	}
567	}
568
569	static void dumpAllocas(const SmallVectorImpl<AllocaInfo> &Allocas) {
570	dbgs() << "------------- Allocas --------------\n";
571	for (const auto &A : Allocas) {
572	A.Alloca->dump();
573	}
574	}
575	#endif
576
577	namespace {
578	using FieldIDType = size_t;
579	// We cannot rely solely on natural alignment of a type when building a
580	// coroutine frame and if the alignment specified on the Alloca instruction
581	// differs from the natural alignment of the alloca type we will need to insert
582	// padding.
583	class FrameTypeBuilder {
584	private:
585	struct Field {
586	uint64_t Size;
587	uint64_t Offset;
588	Type *Ty;
589	FieldIDType LayoutFieldIndex;
590	Align Alignment;
591	Align TyAlignment;
592	uint64_t DynamicAlignBuffer;
593	};
594
595	const DataLayout &DL;
596	LLVMContext &Context;
597	uint64_t StructSize = `0`;
598	Align StructAlign;
599	bool IsFinished = false;
600
601	std::optional<Align> MaxFrameAlignment;
602
603	SmallVector<Field, `8`> Fields;
604	DenseMap<Value, unsigned*> FieldIndexByKey;
605
606	public:
607	FrameTypeBuilder(LLVMContext &Context, const DataLayout &DL,
608	std::optional<Align> MaxFrameAlignment)
609	: DL(DL), Context(Context), MaxFrameAlignment (MaxFrameAlignment) {}
610
611	/// Add a field to this structure for the storage of an `alloca`
612	/// instruction.
613	[[nodiscard]] FieldIDType addFieldForAlloca(AllocaInst *AI,
614	bool IsHeader = false) {
615	Type *Ty = AI->getAllocatedType();
616
617	// Make an array type if this is a static array allocation.
618	if (AI->isArrayAllocation()) {
619	if (auto *CI = dyn_cast<ConstantInt>(Val: AI->getArraySize()))
620	Ty = ArrayType::get(ElementType: Ty, NumElements: CI->getValue().getZExtValue());
621	else
622	report_fatal_error(reason: "Coroutines cannot handle non static allocas yet");
623	}
624
625	return addField(Ty, MaybeFieldAlignment: AI->getAlign(), IsHeader);
626	}
627
628	/// We want to put the allocas whose lifetime-ranges are not overlapped
629	/// into one slot of coroutine frame.
630	/// Consider the example at:https://bugs.llvm.org/show_bug.cgi?id=45566
631	///
632	/// cppcoro::task<void> alternative_paths(bool cond) {
633	/// if (cond) {
634	/// big_structure a;
635	/// process(a);
636	/// co_await something();
637	/// } else {
638	/// big_structure b;
639	/// process2(b);
640	/// co_await something();
641	/// }
642	/// }
643	///
644	/// We want to put variable a and variable b in the same slot to
645	/// reduce the size of coroutine frame.
646	///
647	/// This function use StackLifetime algorithm to partition the AllocaInsts in
648	/// Spills to non-overlapped sets in order to put Alloca in the same
649	/// non-overlapped set into the same slot in the Coroutine Frame. Then add
650	/// field for the allocas in the same non-overlapped set by using the largest
651	/// type as the field type.
652	///
653	/// Side Effects: Because We sort the allocas, the order of allocas in the
654	/// frame may be different with the order in the source code.
655	void addFieldForAllocas(const Function &F, FrameDataInfo &FrameData,
656	coro::Shape &Shape);
657
658	/// Add a field to this structure.
659	[[nodiscard]] FieldIDType addField(Type *Ty, MaybeAlign MaybeFieldAlignment,
660	bool IsHeader = false,
661	bool IsSpillOfValue = false) {
662	assert(!IsFinished && "adding fields to a finished builder");
663	assert(Ty && "must provide a type for a field");
664
665	// The field size is always the alloc size of the type.
666	uint64_t FieldSize = DL.getTypeAllocSize(Ty);
667
668	// For an alloca with size=0, we don't need to add a field and they
669	// can just point to any index in the frame. Use index 0.
670	if (FieldSize == `0`) {
671	return `0`;
672	}
673
674	// The field alignment might not be the type alignment, but we need
675	// to remember the type alignment anyway to build the type.
676	// If we are spilling values we don't need to worry about ABI alignment
677	// concerns.
678	Align ABIAlign = DL.getABITypeAlign(Ty);
679	Align TyAlignment = ABIAlign;
680	if (IsSpillOfValue && MaxFrameAlignment && *MaxFrameAlignment < ABIAlign)
681	TyAlignment = *MaxFrameAlignment;
682	Align FieldAlignment = MaybeFieldAlignment.value_or(u&: TyAlignment);
683
684	// The field alignment could be bigger than the max frame case, in that case
685	// we request additional storage to be able to dynamically align the
686	// pointer.
687	uint64_t DynamicAlignBuffer = `0`;
688	if (MaxFrameAlignment && (FieldAlignment > *MaxFrameAlignment)) {
689	DynamicAlignBuffer =
690	offsetToAlignment(Value: MaxFrameAlignment ->value(), Alignment: FieldAlignment);
691	FieldAlignment = *MaxFrameAlignment;
692	FieldSize = FieldSize + DynamicAlignBuffer;
693	}
694
695	// Lay out header fields immediately.
696	uint64_t Offset;
697	if (IsHeader) {
698	Offset = alignTo(Size: StructSize, A: FieldAlignment);
699	StructSize = Offset + FieldSize;
700
701	// Everything else has a flexible offset.
702	} else {
703	Offset = OptimizedStructLayoutField::FlexibleOffset;
704	}
705
706	Fields.push_back(Elt: {.Size: FieldSize, .Offset: Offset, .Ty: Ty, .LayoutFieldIndex: `0`, .Alignment: FieldAlignment, .TyAlignment: TyAlignment,
707	.DynamicAlignBuffer: DynamicAlignBuffer});
708	return Fields.size() - `1`;
709	}
710
711	/// Finish the layout and set the body on the given type.
712	void finish(StructType *Ty);
713
714	uint64_t getStructSize() const {
715	assert(IsFinished && "not yet finished!");
716	return StructSize;
717	}
718
719	Align getStructAlign() const {
720	assert(IsFinished && "not yet finished!");
721	return StructAlign;
722	}
723
724	FieldIDType getLayoutFieldIndex(FieldIDType Id) const {
725	assert(IsFinished && "not yet finished!");
726	return Fields [Id].LayoutFieldIndex;
727	}
728
729	Field getLayoutField(FieldIDType Id) const {
730	assert(IsFinished && "not yet finished!");
731	return Fields [Id];
732	}
733	};
734	} // namespace
735
736	void FrameDataInfo::updateLayoutIndex(FrameTypeBuilder &B) {
737	auto Updater = [&](Value *I) {
738	auto Field = B.getLayoutField(Id: getFieldIndex(V: I));
739	setFieldIndex(V: I, Index: Field.LayoutFieldIndex);
740	setAlign(V: I, AL: Field.Alignment);
741	uint64_t dynamicAlign =
742	Field.DynamicAlignBuffer
743	? Field.DynamicAlignBuffer + Field.Alignment.value()
744	: `0`;
745	setDynamicAlign(V: I, Align: dynamicAlign);
746	setOffset(V: I, Offset: Field.Offset);
747	};
748	LayoutIndexUpdateStarted = true;
749	for (auto &S : Spills)
750	Updater (S.first);
751	for (const auto &A : Allocas)
752	Updater (A.Alloca);
753	LayoutIndexUpdateStarted = false;
754	}
755
756	void FrameTypeBuilder::addFieldForAllocas(const Function &F,
757	FrameDataInfo &FrameData,
758	coro::Shape &Shape) {
759	using AllocaSetType = SmallVector<AllocaInst *, `4`>;
760	SmallVector<AllocaSetType, `4`> NonOverlapedAllocas;
761
762	// We need to add field for allocas at the end of this function.
763	auto AddFieldForAllocasAtExit = make_scope_exit(F: [&]() {
764	for (auto AllocaList : NonOverlapedAllocas) {
765	auto LargestAI = AllocaList.begin();
766	FieldIDType Id = addFieldForAlloca(AI: LargestAI);
767	for (auto *Alloca : AllocaList)
768	FrameData.setFieldIndex(V: Alloca, Index: Id);
769	}
770	});
771
772	if (!Shape.OptimizeFrame) {
773	for (const auto &A : FrameData.Allocas) {
774	AllocaInst *Alloca = A.Alloca;
775	NonOverlapedAllocas.emplace_back(Args: AllocaSetType (`1`, Alloca));
776	}
777	return;
778	}
779
780	// Because there are paths from the lifetime.start to coro.end
781	// for each alloca, the liferanges for every alloca is overlaped
782	// in the blocks who contain coro.end and the successor blocks.
783	// So we choose to skip there blocks when we calculate the liferange
784	// for each alloca. It should be reasonable since there shouldn't be uses
785	// in these blocks and the coroutine frame shouldn't be used outside the
786	// coroutine body.
787	//
788	// Note that the user of coro.suspend may not be SwitchInst. However, this
789	// case seems too complex to handle. And it is harmless to skip these
790	// patterns since it just prevend putting the allocas to live in the same
791	// slot.
792	DenseMap<SwitchInst , BasicBlock > DefaultSuspendDest;
793	for (auto *CoroSuspendInst : Shape.CoroSuspends) {
794	for (auto *U : CoroSuspendInst->users()) {
795	if (auto *ConstSWI = dyn_cast<SwitchInst>(Val: U)) {
796	auto SWI = const_cast<SwitchInst >(ConstSWI);
797	DefaultSuspendDest [SWI] = SWI->getDefaultDest();
798	SWI->setDefaultDest(SWI->getSuccessor(idx: `1`));
799	}
800	}
801	}
802
803	auto ExtractAllocas = [&]() {
804	AllocaSetType Allocas;
805	Allocas.reserve(N: FrameData.Allocas.size());
806	for (const auto &A : FrameData.Allocas)
807	Allocas.push_back(Elt: A.Alloca);
808	return Allocas;
809	};
810	StackLifetime StackLifetimeAnalyzer(F, ExtractAllocas (),
811	StackLifetime::LivenessType::May);
812	StackLifetimeAnalyzer.run();
813	auto IsAllocaInferenre = [&](const AllocaInst AI1, const* AllocaInst *AI2) {
814	return StackLifetimeAnalyzer.getLiveRange(AI: AI1).overlaps(
815	Other: StackLifetimeAnalyzer.getLiveRange(AI: AI2));
816	};
817	auto GetAllocaSize = [&](const AllocaInfo &A) {
818	std::optional<TypeSize> RetSize = A.Alloca->getAllocationSize(DL);
819	assert(RetSize && "Variable Length Arrays (VLA) are not supported.\n");
820	assert(!RetSize ->isScalable() && "Scalable vectors are not yet supported");
821	return RetSize ->getFixedValue();
822	};
823	// Put larger allocas in the front. So the larger allocas have higher
824	// priority to merge, which can save more space potentially. Also each
825	// AllocaSet would be ordered. So we can get the largest Alloca in one
826	// AllocaSet easily.
827	sort(C&: FrameData.Allocas, Comp: [&](const auto &Iter1, const auto &Iter2) {
828	return GetAllocaSize(Iter1) > GetAllocaSize(Iter2);
829	});
830	for (const auto &A : FrameData.Allocas) {
831	AllocaInst *Alloca = A.Alloca;
832	bool Merged = false;
833	// Try to find if the Alloca is not inferenced with any existing
834	// NonOverlappedAllocaSet. If it is true, insert the alloca to that
835	// NonOverlappedAllocaSet.
836	for (auto &AllocaSet : NonOverlapedAllocas) {
837	assert(!AllocaSet.empty() && "Processing Alloca Set is not empty.\n");
838	bool NoInference = none_of(Range&: AllocaSet, P: [&](auto Iter) {
839	return IsAllocaInferenre(Alloca, Iter);
840	});
841	// If the alignment of A is multiple of the alignment of B, the address
842	// of A should satisfy the requirement for aligning for B.
843	//
844	// There may be other more fine-grained strategies to handle the alignment
845	// infomation during the merging process. But it seems hard to handle
846	// these strategies and benefit little.
847	bool Alignable = [&]() -> bool {
848	auto LargestAlloca = AllocaSet.begin();
849	return LargestAlloca->getAlign().value() % Alloca->getAlign().value() ==
850	`0`;
851	}();
852	bool CouldMerge = NoInference && Alignable;
853	if (!CouldMerge)
854	continue;
855	AllocaSet.push_back(Elt: Alloca);
856	Merged = true;
857	break;
858	}
859	if (!Merged) {
860	NonOverlapedAllocas.emplace_back(Args: AllocaSetType (`1`, Alloca));
861	}
862	}
863	// Recover the default target destination for each Switch statement
864	// reserved.
865	for (auto SwitchAndDefaultDest : DefaultSuspendDest) {
866	SwitchInst *SWI = SwitchAndDefaultDest.first;
867	BasicBlock *DestBB = SwitchAndDefaultDest.second;
868	SWI->setDefaultDest(DestBB);
869	}
870	// This Debug Info could tell us which allocas are merged into one slot.
871	LLVM_DEBUG(for (auto &AllocaSet
872	: NonOverlapedAllocas) {
873	if (AllocaSet.size() > `1`) {
874	dbgs() << "In Function:" << F.getName() << "\n";
875	dbgs() << "Find Union Set "
876	<< "\n";
877	dbgs() << "\tAllocas are \n";
878	for (auto Alloca : AllocaSet)
879	dbgs() << "\t\t" << *Alloca << "\n";
880	}
881	});
882	}
883
884	void FrameTypeBuilder::finish(StructType *Ty) {
885	assert(!IsFinished && "already finished!");
886
887	// Prepare the optimal-layout field array.
888	// The Id in the layout field is a pointer to our Field for it.
889	SmallVector<OptimizedStructLayoutField, `8`> LayoutFields;
890	LayoutFields.reserve(N: Fields.size());
891	for (auto &Field : Fields) {
892	LayoutFields.emplace_back(Args: &Field, Args&: Field.Size, Args&: Field.Alignment,
893	Args&: Field.Offset);
894	}
895
896	// Perform layout.
897	auto SizeAndAlign = performOptimizedStructLayout(Fields: LayoutFields);
898	StructSize = SizeAndAlign.first;
899	StructAlign = SizeAndAlign.second;
900
901	auto getField = [](const OptimizedStructLayoutField &LayoutField) -> Field & {
902	return *static_cast<Field >(const_cast<void**>(LayoutField.Id));
903	};
904
905	// We need to produce a packed struct type if there's a field whose
906	// assigned offset isn't a multiple of its natural type alignment.
907	bool Packed = [&] {
908	for (auto &LayoutField : LayoutFields) {
909	auto &F = getField (LayoutField);
910	if (!isAligned(Lhs: F.TyAlignment, SizeInBytes: LayoutField.Offset))
911	return true;
912	}
913	return false;
914	}();
915
916	// Build the struct body.
917	SmallVector<Type*, `16`> FieldTypes;
918	FieldTypes.reserve(N: LayoutFields.size() * `3` / `2`);
919	uint64_t LastOffset = `0`;
920	for (auto &LayoutField : LayoutFields) {
921	auto &F = getField (LayoutField);
922
923	auto Offset = LayoutField.Offset;
924
925	// Add a padding field if there's a padding gap and we're either
926	// building a packed struct or the padding gap is more than we'd
927	// get from aligning to the field type's natural alignment.
928	assert(Offset >= LastOffset);
929	if (Offset != LastOffset) {
930	if (Packed \|\| alignTo(Size: LastOffset, A: F.TyAlignment) != Offset)
931	FieldTypes.push_back(Elt: ArrayType::get(ElementType: Type::getInt8Ty(C&: Context),
932	NumElements: Offset - LastOffset));
933	}
934
935	F.Offset = Offset;
936	F.LayoutFieldIndex = FieldTypes.size();
937
938	FieldTypes.push_back(Elt: F.Ty);
939	if (F.DynamicAlignBuffer) {
940	FieldTypes.push_back(
941	Elt: ArrayType::get(ElementType: Type::getInt8Ty(C&: Context), NumElements: F.DynamicAlignBuffer));
942	}
943	LastOffset = Offset + F.Size;
944	}
945
946	Ty->setBody(Elements: FieldTypes, isPacked: Packed);
947
948	#ifndef NDEBUG
949	// Check that the IR layout matches the offsets we expect.
950	auto Layout = DL.getStructLayout(Ty);
951	for (auto &F : Fields) {
952	assert(Ty->getElementType(F.LayoutFieldIndex) == F.Ty);
953	assert(Layout->getElementOffset(F.LayoutFieldIndex) == F.Offset);
954	}
955	#endif
956
957	IsFinished = true;
958	}
959
960	static void cacheDIVar(FrameDataInfo &FrameData,
961	DenseMap<Value , DILocalVariable > &DIVarCache) {
962	for (auto *V : FrameData.getAllDefs()) {
963	if (DIVarCache.contains(Val: V))
964	continue;
965
966	auto CacheIt = [&DIVarCache, V](const auto &Container) {
967	auto I = llvm::find_if(Container, [](auto* *DDI) {
968	return DDI->getExpression()->getNumElements() == `0`;
969	});
970	if (I != Container.end())
971	DIVarCache.insert({V, (*I)->getVariable()});
972	};
973	CacheIt (findDbgDeclares(V));
974	CacheIt (findDVRDeclares(V));
975	}
976	}
977
978	/// Create name for Type. It uses MDString to store new created string to
979	/// avoid memory leak.
980	static StringRef solveTypeName(Type *Ty) {
981	if (Ty->isIntegerTy()) {
982	// The longest name in common may be '__int_128', which has 9 bits.
983	SmallString<`16`> Buffer;
984	raw_svector_ostream OS(Buffer);
985	OS << "__int_" << cast<IntegerType>(Val: Ty)->getBitWidth();
986	auto *MDName = MDString::get(Context&: Ty->getContext(), Str: OS.str());
987	return MDName->getString();
988	}
989
990	if (Ty->isFloatingPointTy()) {
991	if (Ty->isFloatTy())
992	return "__float_";
993	if (Ty->isDoubleTy())
994	return "__double_";
995	return "__floating_type_";
996	}
997
998	if (Ty->isPointerTy())
999	return "PointerType";
1000
1001	if (Ty->isStructTy()) {
1002	if (!cast<StructType>(Val: Ty)->hasName())
1003	return "__LiteralStructType_";
1004
1005	auto Name = Ty->getStructName();
1006
1007	SmallString<`16`> Buffer(Name);
1008	for (auto &Iter : Buffer)
1009	if (Iter == `'.'` \|\| Iter == `':'`)
1010	Iter = `'_'`;
1011	auto *MDName = MDString::get(Context&: Ty->getContext(), Str: Buffer.str());
1012	return MDName->getString();
1013	}
1014
1015	return "UnknownType";
1016	}
1017
1018	static DIType solveDIType(DIBuilder &Builder, Type Ty,
1019	const DataLayout &Layout, DIScope *Scope,
1020	unsigned LineNum,
1021	DenseMap<Type , DIType > &DITypeCache) {
1022	if (DIType *DT = DITypeCache.lookup(Val: Ty))
1023	return DT;
1024
1025	StringRef Name = solveTypeName(Ty);
1026
1027	DIType RetType = nullptr*;
1028
1029	if (Ty->isIntegerTy()) {
1030	auto BitWidth = cast<IntegerType>(Val: Ty)->getBitWidth();
1031	RetType = Builder.createBasicType(Name, SizeInBits: BitWidth, Encoding: dwarf::DW_ATE_signed,
1032	Flags: llvm::DINode::FlagArtificial);
1033	} else if (Ty->isFloatingPointTy()) {
1034	RetType = Builder.createBasicType(Name, SizeInBits: Layout.getTypeSizeInBits(Ty),
1035	Encoding: dwarf::DW_ATE_float,
1036	Flags: llvm::DINode::FlagArtificial);
1037	} else if (Ty->isPointerTy()) {
1038	// Construct PointerType points to null (aka void ) instead of exploring*
1039	// pointee type to avoid infinite search problem. For example, we would be
1040	// in trouble if we traverse recursively:
1041	//
1042	// struct Node {
1043	// Node ptr;*
1044	// };
1045	RetType =
1046	Builder.createPointerType(PointeeTy: nullptr, SizeInBits: Layout.getTypeSizeInBits(Ty),
1047	AlignInBits: Layout.getABITypeAlign(Ty).value() * CHAR_BIT,
1048	/DWARFAddressSpace=/std::nullopt, Name);
1049	} else if (Ty->isStructTy()) {
1050	auto *DIStruct = Builder.createStructType(
1051	Scope, Name, File: Scope->getFile(), LineNumber: LineNum, SizeInBits: Layout.getTypeSizeInBits(Ty),
1052	AlignInBits: Layout.getPrefTypeAlign(Ty).value() * CHAR_BIT,
1053	Flags: llvm::DINode::FlagArtificial, DerivedFrom: nullptr, Elements: llvm::DINodeArray ());
1054
1055	auto *StructTy = cast<StructType>(Val: Ty);
1056	SmallVector<Metadata *, `16`> Elements;
1057	for (unsigned I = `0`; I < StructTy->getNumElements(); I++) {
1058	DIType *DITy = solveDIType(Builder, Ty: StructTy->getElementType(N: I), Layout,
1059	Scope, LineNum, DITypeCache);
1060	assert(DITy);
1061	Elements.push_back(Elt: Builder.createMemberType(
1062	Scope, Name: DITy->getName(), File: Scope->getFile(), LineNo: LineNum,
1063	SizeInBits: DITy->getSizeInBits(), AlignInBits: DITy->getAlignInBits(),
1064	OffsetInBits: Layout.getStructLayout(Ty: StructTy)->getElementOffsetInBits(Idx: I),
1065	Flags: llvm::DINode::FlagArtificial, Ty: DITy));
1066	}
1067
1068	Builder.replaceArrays(T&: DIStruct, Elements: Builder.getOrCreateArray(Elements));
1069
1070	RetType = DIStruct;
1071	} else {
1072	LLVM_DEBUG(dbgs() << "Unresolved Type: " << *Ty << "\n");
1073	TypeSize Size = Layout.getTypeSizeInBits(Ty);
1074	auto *CharSizeType = Builder.createBasicType(
1075	Name, SizeInBits: `8`, Encoding: dwarf::DW_ATE_unsigned_char, Flags: llvm::DINode::FlagArtificial);
1076
1077	if (Size <= `8`)
1078	RetType = CharSizeType;
1079	else {
1080	if (Size % `8` != `0`)
1081	Size = TypeSize::getFixed(ExactSize: Size + `8` - (Size % `8`));
1082
1083	RetType = Builder.createArrayType(
1084	Size, AlignInBits: Layout.getPrefTypeAlign(Ty).value(), Ty: CharSizeType,
1085	Subscripts: Builder.getOrCreateArray(Elements: Builder.getOrCreateSubrange(Lo: `0`, Count: Size / `8`)));
1086	}
1087	}
1088
1089	DITypeCache.insert(KV: {Ty, RetType});
1090	return RetType;
1091	}
1092
1093	/// Build artificial debug info for C++ coroutine frames to allow users to
1094	/// inspect the contents of the frame directly
1095	///
1096	/// Create Debug information for coroutine frame with debug name "__coro_frame".
1097	/// The debug information for the fields of coroutine frame is constructed from
1098	/// the following way:
1099	/// 1. For all the value in the Frame, we search the use of dbg.declare to find
1100	/// the corresponding debug variables for the value. If we can find the
1101	/// debug variable, we can get full and accurate debug information.
1102	/// 2. If we can't get debug information in step 1 and 2, we could only try to
1103	/// build the DIType by Type. We did this in solveDIType. We only handle
1104	/// integer, float, double, integer type and struct type for now.
1105	static void buildFrameDebugInfo(Function &F, coro::Shape &Shape,
1106	FrameDataInfo &FrameData) {
1107	DISubprogram *DIS = F.getSubprogram();
1108	// If there is no DISubprogram for F, it implies the Function are not compiled
1109	// with debug info. So we also don't need to generate debug info for the frame
1110	// neither.
1111	if (!DIS \|\| !DIS->getUnit() \|\|
1112	!dwarf::isCPlusPlus(
1113	S: (dwarf::SourceLanguage)DIS->getUnit()->getSourceLanguage()))
1114	return;
1115
1116	assert(Shape.ABI == coro::ABI::Switch &&
1117	"We could only build debug infomation for C++ coroutine now.\n");
1118
1119	DIBuilder DBuilder(F.getParent(), /AllowUnresolved/* false);
1120
1121	AllocaInst *PromiseAlloca = Shape.getPromiseAlloca();
1122	assert(PromiseAlloca &&
1123	"Coroutine with switch ABI should own Promise alloca");
1124
1125	TinyPtrVector<DbgDeclareInst *> DIs = findDbgDeclares(V: PromiseAlloca);
1126	TinyPtrVector<DbgVariableRecord *> DVRs = findDVRDeclares(V: PromiseAlloca);
1127
1128	DILocalVariable PromiseDIVariable = nullptr*;
1129	DILocation DILoc = nullptr*;
1130	if (!DIs.empty()) {
1131	DbgDeclareInst *PromiseDDI = DIs.front();
1132	PromiseDIVariable = PromiseDDI->getVariable();
1133	DILoc = PromiseDDI->getDebugLoc().get();
1134	} else if (!DVRs.empty()) {
1135	DbgVariableRecord *PromiseDVR = DVRs.front();
1136	PromiseDIVariable = PromiseDVR->getVariable();
1137	DILoc = PromiseDVR->getDebugLoc().get();
1138	} else {
1139	return;
1140	}
1141
1142	DILocalScope *PromiseDIScope = PromiseDIVariable->getScope();
1143	DIFile *DFile = PromiseDIScope->getFile();
1144	unsigned LineNum = PromiseDIVariable->getLine();
1145
1146	DICompositeType *FrameDITy = DBuilder.createStructType(
1147	Scope: DIS->getUnit(), Name: Twine(F.getName() + ".coro_frame_ty").str(),
1148	File: DFile, LineNumber: LineNum, SizeInBits: Shape.FrameSize * `8`,
1149	AlignInBits: Shape.FrameAlign.value() * `8`, Flags: llvm::DINode::FlagArtificial, DerivedFrom: nullptr,
1150	Elements: llvm::DINodeArray ());
1151	StructType *FrameTy = Shape.FrameTy;
1152	SmallVector<Metadata *, `16`> Elements;
1153	DataLayout Layout = F.getParent()->getDataLayout();
1154
1155	DenseMap<Value , DILocalVariable > DIVarCache;
1156	cacheDIVar(FrameData, DIVarCache);
1157
1158	unsigned ResumeIndex = coro::Shape::SwitchFieldIndex::Resume;
1159	unsigned DestroyIndex = coro::Shape::SwitchFieldIndex::Destroy;
1160	unsigned IndexIndex = Shape.SwitchLowering.IndexField;
1161
1162	DenseMap<unsigned, StringRef> NameCache;
1163	NameCache.insert(KV: {ResumeIndex, "__resume_fn"});
1164	NameCache.insert(KV: {DestroyIndex, "__destroy_fn"});
1165	NameCache.insert(KV: {IndexIndex, "__coro_index"});
1166
1167	Type *ResumeFnTy = FrameTy->getElementType(N: ResumeIndex),
1168	*DestroyFnTy = FrameTy->getElementType(N: DestroyIndex),
1169	*IndexTy = FrameTy->getElementType(N: IndexIndex);
1170
1171	DenseMap<unsigned, DIType *> TyCache;
1172	TyCache.insert(
1173	KV: {ResumeIndex, DBuilder.createPointerType(
1174	PointeeTy: nullptr, SizeInBits: Layout.getTypeSizeInBits(Ty: ResumeFnTy))});
1175	TyCache.insert(
1176	KV: {DestroyIndex, DBuilder.createPointerType(
1177	PointeeTy: nullptr, SizeInBits: Layout.getTypeSizeInBits(Ty: DestroyFnTy))});
1178
1179	/// FIXME: If we fill the field `SizeInBits` with the actual size of
1180	/// __coro_index in bits, then __coro_index wouldn't show in the debugger.
1181	TyCache.insert(KV: {IndexIndex, DBuilder.createBasicType(
1182	Name: "__coro_index",
1183	SizeInBits: (Layout.getTypeSizeInBits(Ty: IndexTy) < `8`)
1184	? `8`
1185	: Layout.getTypeSizeInBits(Ty: IndexTy),
1186	Encoding: dwarf::DW_ATE_unsigned_char)});
1187
1188	for (auto *V : FrameData.getAllDefs()) {
1189	if (!DIVarCache.contains(Val: V))
1190	continue;
1191
1192	auto Index = FrameData.getFieldIndex(V);
1193
1194	NameCache.insert(KV: {Index, DIVarCache [V]->getName()});
1195	TyCache.insert(KV: {Index, DIVarCache [V]->getType()});
1196	}
1197
1198	// Cache from index to (Align, Offset Pair)
1199	DenseMap<unsigned, std::pair<unsigned, unsigned>> OffsetCache;
1200	// The Align and Offset of Resume function and Destroy function are fixed.
1201	OffsetCache.insert(KV: {ResumeIndex, {`8`, `0`}});
1202	OffsetCache.insert(KV: {DestroyIndex, {`8`, `8`}});
1203	OffsetCache.insert(
1204	KV: {IndexIndex,
1205	{Shape.SwitchLowering.IndexAlign, Shape.SwitchLowering.IndexOffset}});
1206
1207	for (auto *V : FrameData.getAllDefs()) {
1208	auto Index = FrameData.getFieldIndex(V);
1209
1210	OffsetCache.insert(
1211	KV: {Index, {FrameData.getAlign(V).value(), FrameData.getOffset(V)}});
1212	}
1213
1214	DenseMap<Type , DIType > DITypeCache;
1215	// This counter is used to avoid same type names. e.g., there would be
1216	// many i32 and i64 types in one coroutine. And we would use i32_0 and
1217	// i32_1 to avoid the same type. Since it makes no sense the name of the
1218	// fields confilicts with each other.
1219	unsigned UnknownTypeNum = `0`;
1220	for (unsigned Index = `0`; Index < FrameTy->getNumElements(); Index++) {
1221	if (!OffsetCache.contains(Val: Index))
1222	continue;
1223
1224	std::string Name;
1225	uint64_t SizeInBits;
1226	uint32_t AlignInBits;
1227	uint64_t OffsetInBits;
1228	DIType DITy = nullptr*;
1229
1230	Type *Ty = FrameTy->getElementType(N: Index);
1231	assert(Ty->isSized() && "We can't handle type which is not sized.\n");
1232	SizeInBits = Layout.getTypeSizeInBits(Ty).getFixedValue();
1233	AlignInBits = OffsetCache [Index].first * `8`;
1234	OffsetInBits = OffsetCache [Index].second * `8`;
1235
1236	if (NameCache.contains(Val: Index)) {
1237	Name = NameCache [Index].str();
1238	DITy = TyCache [Index];
1239	} else {
1240	DITy = solveDIType(Builder&: DBuilder, Ty, Layout, Scope: FrameDITy, LineNum, DITypeCache);
1241	assert(DITy && "SolveDIType shouldn't return nullptr.\n");
1242	Name = DITy->getName().str();
1243	Name += "_" + std::to_string(val: UnknownTypeNum);
1244	UnknownTypeNum++;
1245	}
1246
1247	Elements.push_back(Elt: DBuilder.createMemberType(
1248	Scope: FrameDITy, Name, File: DFile, LineNo: LineNum, SizeInBits, AlignInBits, OffsetInBits,
1249	Flags: llvm::DINode::FlagArtificial, Ty: DITy));
1250	}
1251
1252	DBuilder.replaceArrays(T&: FrameDITy, Elements: DBuilder.getOrCreateArray(Elements));
1253
1254	auto *FrameDIVar = DBuilder.createAutoVariable(Scope: PromiseDIScope, Name: "__coro_frame",
1255	File: DFile, LineNo: LineNum, Ty: FrameDITy,
1256	AlwaysPreserve: true, Flags: DINode::FlagArtificial);
1257	assert(FrameDIVar->isValidLocationForIntrinsic(DILoc));
1258
1259	// Subprogram would have ContainedNodes field which records the debug
1260	// variables it contained. So we need to add __coro_frame to the
1261	// ContainedNodes of it.
1262	//
1263	// If we don't add __coro_frame to the RetainedNodes, user may get
1264	// `no symbol __coro_frame in context` rather than `__coro_frame`
1265	// is optimized out, which is more precise.
1266	if (auto *SubProgram = dyn_cast<DISubprogram>(Val: PromiseDIScope)) {
1267	auto RetainedNodes = SubProgram->getRetainedNodes();
1268	SmallVector<Metadata *, `32`> RetainedNodesVec(RetainedNodes.begin(),
1269	RetainedNodes.end());
1270	RetainedNodesVec.push_back(Elt: FrameDIVar);
1271	SubProgram->replaceOperandWith(
1272	I: `7`, New: (MDTuple::get(Context&: F.getContext(), MDs: RetainedNodesVec)));
1273	}
1274
1275	if (UseNewDbgInfoFormat) {
1276	DbgVariableRecord *NewDVR =
1277	new DbgVariableRecord (ValueAsMetadata::get(V: Shape.FramePtr), FrameDIVar,
1278	DBuilder.createExpression(), DILoc,
1279	DbgVariableRecord::LocationType::Declare);
1280	BasicBlock::iterator It = Shape.getInsertPtAfterFramePtr();
1281	It ->getParent()->insertDbgRecordBefore(DR: NewDVR, Here: It);
1282	} else {
1283	DBuilder.insertDeclare(Storage: Shape.FramePtr, VarInfo: FrameDIVar,
1284	Expr: DBuilder.createExpression(), DL: DILoc,
1285	InsertBefore: &*Shape.getInsertPtAfterFramePtr());
1286	}
1287	}
1288
1289	// Build a struct that will keep state for an active coroutine.
1290	// struct f.frame {
1291	// ResumeFnTy ResumeFnAddr;
1292	// ResumeFnTy DestroyFnAddr;
1293	// ... promise (if present) ...
1294	// int ResumeIndex;
1295	// ... spills ...
1296	// };
1297	static StructType *buildFrameType(Function &F, coro::Shape &Shape,
1298	FrameDataInfo &FrameData) {
1299	LLVMContext &C = F.getContext();
1300	const DataLayout &DL = F.getParent()->getDataLayout();
1301	StructType *FrameTy = [&] {
1302	SmallString<`32`> Name(F.getName());
1303	Name.append(RHS: ".Frame");
1304	return StructType::create(Context&: C, Name);
1305	}();
1306
1307	// We will use this value to cap the alignment of spilled values.
1308	std::optional<Align> MaxFrameAlignment;
1309	if (Shape.ABI == coro::ABI::Async)
1310	MaxFrameAlignment = Shape.AsyncLowering.getContextAlignment();
1311	FrameTypeBuilder B(C, DL, MaxFrameAlignment);
1312
1313	AllocaInst *PromiseAlloca = Shape.getPromiseAlloca();
1314	std::optional<FieldIDType> SwitchIndexFieldId;
1315
1316	if (Shape.ABI == coro::ABI::Switch) {
1317	auto *FnPtrTy = PointerType::getUnqual(C);
1318
1319	// Add header fields for the resume and destroy functions.
1320	// We can rely on these being perfectly packed.
1321	(void)B.addField(Ty: FnPtrTy, MaybeFieldAlignment: std::nullopt, /header/ IsHeader: true);
1322	(void)B.addField(Ty: FnPtrTy, MaybeFieldAlignment: std::nullopt, /header/ IsHeader: true);
1323
1324	// PromiseAlloca field needs to be explicitly added here because it's
1325	// a header field with a fixed offset based on its alignment. Hence it
1326	// needs special handling and cannot be added to FrameData.Allocas.
1327	if (PromiseAlloca)
1328	FrameData.setFieldIndex(
1329	V: PromiseAlloca, Index: B.addFieldForAlloca(AI: PromiseAlloca, /header/ IsHeader: true));
1330
1331	// Add a field to store the suspend index. This doesn't need to
1332	// be in the header.
1333	unsigned IndexBits = std::max(a: `1U`, b: Log2_64_Ceil(Value: Shape.CoroSuspends.size()));
1334	Type *IndexType = Type::getIntNTy(C, N: IndexBits);
1335
1336	SwitchIndexFieldId = B.addField(Ty: IndexType, MaybeFieldAlignment: std::nullopt);
1337	} else {
1338	assert(PromiseAlloca == nullptr && "lowering doesn't support promises");
1339	}
1340
1341	// Because multiple allocas may own the same field slot,
1342	// we add allocas to field here.
1343	B.addFieldForAllocas(F, FrameData, Shape);
1344	// Add PromiseAlloca to Allocas list so that
1345	// 1. updateLayoutIndex could update its index after
1346	// `performOptimizedStructLayout`
1347	// 2. it is processed in insertSpills.
1348	if (Shape.ABI == coro::ABI::Switch && PromiseAlloca)
1349	// We assume that the promise alloca won't be modified before
1350	// CoroBegin and no alias will be create before CoroBegin.
1351	FrameData.Allocas.emplace_back(
1352	Args&: PromiseAlloca, Args: DenseMap<Instruction , std::optional<APInt>>{}, Args: false*);
1353	// Create an entry for every spilled value.
1354	for (auto &S : FrameData.Spills) {
1355	Type *FieldType = S.first->getType();
1356	// For byval arguments, we need to store the pointed value in the frame,
1357	// instead of the pointer itself.
1358	if (const Argument *A = dyn_cast<Argument>(Val: S.first))
1359	if (A->hasByValAttr())
1360	FieldType = A->getParamByValType();
1361	FieldIDType Id = B.addField(Ty: FieldType, MaybeFieldAlignment: std::nullopt, IsHeader: false /header/,
1362	IsSpillOfValue: true /IsSpillOfValue/);
1363	FrameData.setFieldIndex(V: S.first, Index: Id);
1364	}
1365
1366	B.finish(Ty: FrameTy);
1367	FrameData.updateLayoutIndex(B);
1368	Shape.FrameAlign = B.getStructAlign();
1369	Shape.FrameSize = B.getStructSize();
1370
1371	switch (Shape.ABI) {
1372	case coro::ABI::Switch: {
1373	// In the switch ABI, remember the switch-index field.
1374	auto IndexField = B.getLayoutField(Id: *SwitchIndexFieldId);
1375	Shape.SwitchLowering.IndexField = IndexField.LayoutFieldIndex;
1376	Shape.SwitchLowering.IndexAlign = IndexField.Alignment.value();
1377	Shape.SwitchLowering.IndexOffset = IndexField.Offset;
1378
1379	// Also round the frame size up to a multiple of its alignment, as is
1380	// generally expected in C/C++.
1381	Shape.FrameSize = alignTo(Size: Shape.FrameSize, A: Shape.FrameAlign);
1382	break;
1383	}
1384
1385	// In the retcon ABI, remember whether the frame is inline in the storage.
1386	case coro::ABI::Retcon:
1387	case coro::ABI::RetconOnce: {
1388	auto Id = Shape.getRetconCoroId();
1389	Shape.RetconLowering.IsFrameInlineInStorage
1390	= (B.getStructSize() <= Id->getStorageSize() &&
1391	B.getStructAlign() <= Id->getStorageAlignment());
1392	break;
1393	}
1394	case coro::ABI::Async: {
1395	Shape.AsyncLowering.FrameOffset =
1396	alignTo(Size: Shape.AsyncLowering.ContextHeaderSize, A: Shape.FrameAlign);
1397	// Also make the final context size a multiple of the context alignment to
1398	// make allocation easier for allocators.
1399	Shape.AsyncLowering.ContextSize =
1400	alignTo(Size: Shape.AsyncLowering.FrameOffset + Shape.FrameSize,
1401	A: Shape.AsyncLowering.getContextAlignment());
1402	if (Shape.AsyncLowering.getContextAlignment() < Shape.FrameAlign) {
1403	report_fatal_error(
1404	reason: "The alignment requirment of frame variables cannot be higher than "
1405	"the alignment of the async function context");
1406	}
1407	break;
1408	}
1409	}
1410
1411	return FrameTy;
1412	}
1413
1414	// We use a pointer use visitor to track how an alloca is being used.
1415	// The goal is to be able to answer the following three questions:
1416	// 1. Should this alloca be allocated on the frame instead.
1417	// 2. Could the content of the alloca be modified prior to CoroBegn, which would
1418	// require copying the data from alloca to the frame after CoroBegin.
1419	// 3. Is there any alias created for this alloca prior to CoroBegin, but used
1420	// after CoroBegin. In that case, we will need to recreate the alias after
1421	// CoroBegin based off the frame. To answer question 1, we track two things:
1422	// a. List of all BasicBlocks that use this alloca or any of the aliases of
1423	// the alloca. In the end, we check if there exists any two basic blocks that
1424	// cross suspension points. If so, this alloca must be put on the frame. b.
1425	// Whether the alloca or any alias of the alloca is escaped at some point,
1426	// either by storing the address somewhere, or the address is used in a
1427	// function call that might capture. If it's ever escaped, this alloca must be
1428	// put on the frame conservatively.
1429	// To answer quetion 2, we track through the variable MayWriteBeforeCoroBegin.
1430	// Whenever a potential write happens, either through a store instruction, a
1431	// function call or any of the memory intrinsics, we check whether this
1432	// instruction is prior to CoroBegin. To answer question 3, we track the offsets
1433	// of all aliases created for the alloca prior to CoroBegin but used after
1434	// CoroBegin. std::optional is used to be able to represent the case when the
1435	// offset is unknown (e.g. when you have a PHINode that takes in different
1436	// offset values). We cannot handle unknown offsets and will assert. This is the
1437	// potential issue left out. An ideal solution would likely require a
1438	// significant redesign.
1439	namespace {
1440	struct AllocaUseVisitor : PtrUseVisitor<AllocaUseVisitor> {
1441	using Base = PtrUseVisitor<AllocaUseVisitor>;
1442	AllocaUseVisitor(const DataLayout &DL, const DominatorTree &DT,
1443	const CoroBeginInst &CB, const SuspendCrossingInfo &Checker,
1444	bool ShouldUseLifetimeStartInfo)
1445	: PtrUseVisitor (DL), DT(DT), CoroBegin(CB), Checker(Checker),
1446	ShouldUseLifetimeStartInfo(ShouldUseLifetimeStartInfo) {}
1447
1448	void visit(Instruction &I) {
1449	Users.insert(Ptr: &I);
1450	Base::visit(I);
1451	// If the pointer is escaped prior to CoroBegin, we have to assume it would
1452	// be written into before CoroBegin as well.
1453	if (PI.isEscaped() && !DT.dominates(Def: &CoroBegin, User: PI.getEscapingInst())) {
1454	MayWriteBeforeCoroBegin = true;
1455	}
1456	}
1457	// We need to provide this overload as PtrUseVisitor uses a pointer based
1458	// visiting function.
1459	void visit(Instruction I) { return* visit(I&: *I); }
1460
1461	void visitPHINode(PHINode &I) {
1462	enqueueUsers(I);
1463	handleAlias(I);
1464	}
1465
1466	void visitSelectInst(SelectInst &I) {
1467	enqueueUsers(I);
1468	handleAlias(I);
1469	}
1470
1471	void visitStoreInst(StoreInst &SI) {
1472	// Regardless whether the alias of the alloca is the value operand or the
1473	// pointer operand, we need to assume the alloca is been written.
1474	handleMayWrite(I: SI);
1475
1476	if (SI.getValueOperand() != U->get())
1477	return;
1478
1479	// We are storing the pointer into a memory location, potentially escaping.
1480	// As an optimization, we try to detect simple cases where it doesn't
1481	// actually escape, for example:
1482	// %ptr = alloca ..
1483	// %addr = alloca ..
1484	// store %ptr, %addr
1485	// %x = load %addr
1486	// ..
1487	// If %addr is only used by loading from it, we could simply treat %x as
1488	// another alias of %ptr, and not considering %ptr being escaped.
1489	auto IsSimpleStoreThenLoad = [&]() {
1490	auto *AI = dyn_cast<AllocaInst>(Val: SI.getPointerOperand());
1491	// If the memory location we are storing to is not an alloca, it
1492	// could be an alias of some other memory locations, which is difficult
1493	// to analyze.
1494	if (!AI)
1495	return false;
1496	// StoreAliases contains aliases of the memory location stored into.
1497	SmallVector<Instruction *, `4`> StoreAliases = {AI};
1498	while (!StoreAliases.empty()) {
1499	Instruction *I = StoreAliases.pop_back_val();
1500	for (User *U : I->users()) {
1501	// If we are loading from the memory location, we are creating an
1502	// alias of the original pointer.
1503	if (auto *LI = dyn_cast<LoadInst>(Val: U)) {
1504	enqueueUsers(I&: *LI);
1505	handleAlias(I&: *LI);
1506	continue;
1507	}
1508	// If we are overriding the memory location, the pointer certainly
1509	// won't escape.
1510	if (auto *S = dyn_cast<StoreInst>(Val: U))
1511	if (S->getPointerOperand() == I)
1512	continue;
1513	if (auto *II = dyn_cast<IntrinsicInst>(Val: U))
1514	if (II->isLifetimeStartOrEnd())
1515	continue;
1516	// BitCastInst creats aliases of the memory location being stored
1517	// into.
1518	if (auto *BI = dyn_cast<BitCastInst>(Val: U)) {
1519	StoreAliases.push_back(Elt: BI);
1520	continue;
1521	}
1522	return false;
1523	}
1524	}
1525
1526	return true;
1527	};
1528
1529	if (!IsSimpleStoreThenLoad())
1530	PI.setEscaped(&SI);
1531	}
1532
1533	// All mem intrinsics modify the data.
1534	void visitMemIntrinsic(MemIntrinsic &MI) { handleMayWrite(I: MI); }
1535
1536	void visitBitCastInst(BitCastInst &BC) {
1537	Base::visitBitCastInst(BC);
1538	handleAlias(I&: BC);
1539	}
1540
1541	void visitAddrSpaceCastInst(AddrSpaceCastInst &ASC) {
1542	Base::visitAddrSpaceCastInst(ASC);
1543	handleAlias(I&: ASC);
1544	}
1545
1546	void visitGetElementPtrInst(GetElementPtrInst &GEPI) {
1547	// The base visitor will adjust Offset accordingly.
1548	Base::visitGetElementPtrInst(GEPI);
1549	handleAlias(I&: GEPI);
1550	}
1551
1552	void visitIntrinsicInst(IntrinsicInst &II) {
1553	// When we found the lifetime markers refers to a
1554	// subrange of the original alloca, ignore the lifetime
1555	// markers to avoid misleading the analysis.
1556	if (II.getIntrinsicID() != Intrinsic::lifetime_start \|\| !IsOffsetKnown \|\|
1557	!Offset.isZero())
1558	return Base::visitIntrinsicInst(II);
1559	LifetimeStarts.insert(Ptr: &II);
1560	}
1561
1562	void visitCallBase(CallBase &CB) {
1563	for (unsigned Op = `0`, OpCount = CB.arg_size(); Op < OpCount; ++Op)
1564	if (U->get() == CB.getArgOperand(i: Op) && !CB.doesNotCapture(OpNo: Op))
1565	PI.setEscaped(&CB);
1566	handleMayWrite(I: CB);
1567	}
1568
1569	bool getShouldLiveOnFrame() const {
1570	if (!ShouldLiveOnFrame)
1571	ShouldLiveOnFrame = computeShouldLiveOnFrame();
1572	return *ShouldLiveOnFrame;
1573	}
1574
1575	bool getMayWriteBeforeCoroBegin() const { return MayWriteBeforeCoroBegin; }
1576
1577	DenseMap<Instruction , std::optional<APInt>> getAliasesCopy() const* {
1578	assert(getShouldLiveOnFrame() && "This method should only be called if the "
1579	"alloca needs to live on the frame.");
1580	for (const auto &P : AliasOffetMap)
1581	if (!P.second)
1582	report_fatal_error(reason: "Unable to handle an alias with unknown offset "
1583	"created before CoroBegin.");
1584	return AliasOffetMap;
1585	}
1586
1587	private:
1588	const DominatorTree &DT;
1589	const CoroBeginInst &CoroBegin;
1590	const SuspendCrossingInfo &Checker;
1591	// All alias to the original AllocaInst, created before CoroBegin and used
1592	// after CoroBegin. Each entry contains the instruction and the offset in the
1593	// original Alloca. They need to be recreated after CoroBegin off the frame.
1594	DenseMap<Instruction *, std::optional<APInt>> AliasOffetMap{};
1595	SmallPtrSet<Instruction *, `4`> Users{};
1596	SmallPtrSet<IntrinsicInst *, `2`> LifetimeStarts{};
1597	bool MayWriteBeforeCoroBegin{false};
1598	bool ShouldUseLifetimeStartInfo{true};
1599
1600	mutable std::optional<bool> ShouldLiveOnFrame{};
1601
1602	bool computeShouldLiveOnFrame() const {
1603	// If lifetime information is available, we check it first since it's
1604	// more precise. We look at every pair of lifetime.start intrinsic and
1605	// every basic block that uses the pointer to see if they cross suspension
1606	// points. The uses cover both direct uses as well as indirect uses.
1607	if (ShouldUseLifetimeStartInfo && !LifetimeStarts.empty()) {
1608	for (auto *I : Users)
1609	for (auto *S : LifetimeStarts)
1610	if (Checker.isDefinitionAcrossSuspend(I&: *S, U: I))
1611	return true;
1612	// Addresses are guaranteed to be identical after every lifetime.start so
1613	// we cannot use the local stack if the address escaped and there is a
1614	// suspend point between lifetime markers. This should also cover the
1615	// case of a single lifetime.start intrinsic in a loop with suspend point.
1616	if (PI.isEscaped()) {
1617	for (auto *A : LifetimeStarts) {
1618	for (auto *B : LifetimeStarts) {
1619	if (Checker.hasPathOrLoopCrossingSuspendPoint(From: A->getParent(),
1620	To: B->getParent()))
1621	return true;
1622	}
1623	}
1624	}
1625	return false;
1626	}
1627	// FIXME: Ideally the isEscaped check should come at the beginning.
1628	// However there are a few loose ends that need to be fixed first before
1629	// we can do that. We need to make sure we are not over-conservative, so
1630	// that the data accessed in-between await_suspend and symmetric transfer
1631	// is always put on the stack, and also data accessed after coro.end is
1632	// always put on the stack (esp the return object). To fix that, we need
1633	// to:
1634	// 1) Potentially treat sret as nocapture in calls
1635	// 2) Special handle the return object and put it on the stack
1636	// 3) Utilize lifetime.end intrinsic
1637	if (PI.isEscaped())
1638	return true;
1639
1640	for (auto *U1 : Users)
1641	for (auto *U2 : Users)
1642	if (Checker.isDefinitionAcrossSuspend(I&: *U1, U: U2))
1643	return true;
1644
1645	return false;
1646	}
1647
1648	void handleMayWrite(const Instruction &I) {
1649	if (!DT.dominates(Def: &CoroBegin, User: &I))
1650	MayWriteBeforeCoroBegin = true;
1651	}
1652
1653	bool usedAfterCoroBegin(Instruction &I) {
1654	for (auto &U : I.uses())
1655	if (DT.dominates(Def: &CoroBegin, U))
1656	return true;
1657	return false;
1658	}
1659
1660	void handleAlias(Instruction &I) {
1661	// We track all aliases created prior to CoroBegin but used after.
1662	// These aliases may need to be recreated after CoroBegin if the alloca
1663	// need to live on the frame.
1664	if (DT.dominates(Def: &CoroBegin, User: &I) \|\| !usedAfterCoroBegin(I))
1665	return;
1666
1667	if (!IsOffsetKnown) {
1668	AliasOffetMap [&I].reset();
1669	} else {
1670	auto Itr = AliasOffetMap.find(Val: &I);
1671	if (Itr == AliasOffetMap.end()) {
1672	AliasOffetMap [&I] = Offset;
1673	} else if (Itr ->second && *Itr ->second != Offset) {
1674	// If we have seen two different possible values for this alias, we set
1675	// it to empty.
1676	AliasOffetMap [&I].reset();
1677	}
1678	}
1679	}
1680	};
1681	} // namespace
1682
1683	// We need to make room to insert a spill after initial PHIs, but before
1684	// catchswitch instruction. Placing it before violates the requirement that
1685	// catchswitch, like all other EHPads must be the first nonPHI in a block.
1686	//
1687	// Split away catchswitch into a separate block and insert in its place:
1688	//
1689	// cleanuppad <InsertPt> cleanupret.
1690	//
1691	// cleanupret instruction will act as an insert point for the spill.
1692	static Instruction splitBeforeCatchSwitch(CatchSwitchInst CatchSwitch) {
1693	BasicBlock *CurrentBlock = CatchSwitch->getParent();
1694	BasicBlock *NewBlock = CurrentBlock->splitBasicBlock(I: CatchSwitch);
1695	CurrentBlock->getTerminator()->eraseFromParent();
1696
1697	auto *CleanupPad =
1698	CleanupPadInst::Create(ParentPad: CatchSwitch->getParentPad(), Args: {}, NameStr: "", InsertAtEnd: CurrentBlock);
1699	auto *CleanupRet =
1700	CleanupReturnInst::Create(CleanupPad, UnwindBB: NewBlock, InsertAtEnd: CurrentBlock);
1701	return CleanupRet;
1702	}
1703
1704	// Replace all alloca and SSA values that are accessed across suspend points
1705	// with GetElementPointer from coroutine frame + loads and stores. Create an
1706	// AllocaSpillBB that will become the new entry block for the resume parts of
1707	// the coroutine:
1708	//
1709	// %hdl = coro.begin(...)
1710	// whatever
1711	//
1712	// becomes:
1713	//
1714	// %hdl = coro.begin(...)
1715	// br label %AllocaSpillBB
1716	//
1717	// AllocaSpillBB:
1718	// ; geps corresponding to allocas that were moved to coroutine frame
1719	// br label PostSpill
1720	//
1721	// PostSpill:
1722	// whatever
1723	//
1724	//
1725	static void insertSpills(const FrameDataInfo &FrameData, coro::Shape &Shape) {
1726	auto *CB = Shape.CoroBegin;
1727	LLVMContext &C = CB->getContext();
1728	Function *F = CB->getFunction();
1729	IRBuilder<> Builder(C);
1730	StructType *FrameTy = Shape.FrameTy;
1731	Value *FramePtr = Shape.FramePtr;
1732	DominatorTree DT(*F);
1733	SmallDenseMap<Argument , AllocaInst , `4`> ArgToAllocaMap;
1734
1735	// Create a GEP with the given index into the coroutine frame for the original
1736	// value Orig. Appends an extra 0 index for array-allocas, preserving the
1737	// original type.
1738	auto GetFramePointer = [&](Value Orig) -> Value {
1739	FieldIDType Index = FrameData.getFieldIndex(V: Orig);
1740	SmallVector<Value *, `3`> Indices = {
1741	ConstantInt::get(Ty: Type::getInt32Ty(C), V: `0`),
1742	ConstantInt::get(Ty: Type::getInt32Ty(C), V: Index),
1743	};
1744
1745	if (auto *AI = dyn_cast<AllocaInst>(Val: Orig)) {
1746	if (auto *CI = dyn_cast<ConstantInt>(Val: AI->getArraySize())) {
1747	auto Count = CI->getValue().getZExtValue();
1748	if (Count > `1`) {
1749	Indices.push_back(Elt: ConstantInt::get(Ty: Type::getInt32Ty(C), V: `0`));
1750	}
1751	} else {
1752	report_fatal_error(reason: "Coroutines cannot handle non static allocas yet");
1753	}
1754	}
1755
1756	auto GEP = cast<GetElementPtrInst>(
1757	Val: Builder.CreateInBoundsGEP(Ty: FrameTy, Ptr: FramePtr, IdxList: Indices));
1758	if (auto *AI = dyn_cast<AllocaInst>(Val: Orig)) {
1759	if (FrameData.getDynamicAlign(V: Orig) != `0`) {
1760	assert(FrameData.getDynamicAlign(Orig) == AI->getAlign().value());
1761	auto *M = AI->getModule();
1762	auto *IntPtrTy = M->getDataLayout().getIntPtrType(AI->getType());
1763	auto *PtrValue = Builder.CreatePtrToInt(V: GEP, DestTy: IntPtrTy);
1764	auto *AlignMask =
1765	ConstantInt::get(Ty: IntPtrTy, V: AI->getAlign().value() - `1`);
1766	PtrValue = Builder.CreateAdd(LHS: PtrValue, RHS: AlignMask);
1767	PtrValue = Builder.CreateAnd(LHS: PtrValue, RHS: Builder.CreateNot(V: AlignMask));
1768	return Builder.CreateIntToPtr(V: PtrValue, DestTy: AI->getType());
1769	}
1770	// If the type of GEP is not equal to the type of AllocaInst, it implies
1771	// that the AllocaInst may be reused in the Frame slot of other
1772	// AllocaInst. So We cast GEP to the AllocaInst here to re-use
1773	// the Frame storage.
1774	//
1775	// Note: If we change the strategy dealing with alignment, we need to refine
1776	// this casting.
1777	if (GEP->getType() != Orig->getType())
1778	return Builder.CreateAddrSpaceCast(V: GEP, DestTy: Orig->getType(),
1779	Name: Orig->getName() + Twine (".cast"));
1780	}
1781	return GEP;
1782	};
1783
1784	for (auto const &E : FrameData.Spills) {
1785	Value *Def = E.first;
1786	auto SpillAlignment = Align(FrameData.getAlign(V: Def));
1787	// Create a store instruction storing the value into the
1788	// coroutine frame.
1789	BasicBlock::iterator InsertPt;
1790	Type ByValTy = nullptr*;
1791	if (auto *Arg = dyn_cast<Argument>(Val: Def)) {
1792	// For arguments, we will place the store instruction right after
1793	// the coroutine frame pointer instruction, i.e. coro.begin.
1794	InsertPt = Shape.getInsertPtAfterFramePtr();
1795
1796	// If we're spilling an Argument, make sure we clear 'nocapture'
1797	// from the coroutine function.
1798	Arg->getParent()->removeParamAttr(Arg->getArgNo(), Attribute::NoCapture);
1799
1800	if (Arg->hasByValAttr())
1801	ByValTy = Arg->getParamByValType();
1802	} else if (auto *CSI = dyn_cast<AnyCoroSuspendInst>(Val: Def)) {
1803	// Don't spill immediately after a suspend; splitting assumes
1804	// that the suspend will be followed by a branch.
1805	InsertPt = CSI->getParent()->getSingleSuccessor()->getFirstNonPHIIt();
1806	} else {
1807	auto *I = cast<Instruction>(Val: Def);
1808	if (!DT.dominates(Def: CB, User: I)) {
1809	// If it is not dominated by CoroBegin, then spill should be
1810	// inserted immediately after CoroFrame is computed.
1811	InsertPt = Shape.getInsertPtAfterFramePtr();
1812	} else if (auto *II = dyn_cast<InvokeInst>(Val: I)) {
1813	// If we are spilling the result of the invoke instruction, split
1814	// the normal edge and insert the spill in the new block.
1815	auto *NewBB = SplitEdge(From: II->getParent(), To: II->getNormalDest());
1816	InsertPt = NewBB->getTerminator()->getIterator();
1817	} else if (isa<PHINode>(Val: I)) {
1818	// Skip the PHINodes and EH pads instructions.
1819	BasicBlock *DefBlock = I->getParent();
1820	if (auto *CSI = dyn_cast<CatchSwitchInst>(Val: DefBlock->getTerminator()))
1821	InsertPt = splitBeforeCatchSwitch(CatchSwitch: CSI)->getIterator();
1822	else
1823	InsertPt = DefBlock->getFirstInsertionPt();
1824	} else {
1825	assert(!I->isTerminator() && "unexpected terminator");
1826	// For all other values, the spill is placed immediately after
1827	// the definition.
1828	InsertPt = I->getNextNode()->getIterator();
1829	}
1830	}
1831
1832	auto Index = FrameData.getFieldIndex(V: Def);
1833	Builder.SetInsertPoint(TheBB: InsertPt ->getParent(), IP: InsertPt);
1834	auto *G = Builder.CreateConstInBoundsGEP2_32(
1835	Ty: FrameTy, Ptr: FramePtr, Idx0: `0`, Idx1: Index, Name: Def->getName() + Twine (".spill.addr"));
1836	if (ByValTy) {
1837	// For byval arguments, we need to store the pointed value in the frame,
1838	// instead of the pointer itself.
1839	auto *Value = Builder.CreateLoad(Ty: ByValTy, Ptr: Def);
1840	Builder.CreateAlignedStore(Val: Value, Ptr: G, Align: SpillAlignment);
1841	} else {
1842	Builder.CreateAlignedStore(Val: Def, Ptr: G, Align: SpillAlignment);
1843	}
1844
1845	BasicBlock CurrentBlock = nullptr*;
1846	Value CurrentReload = nullptr*;
1847	for (auto *U : E.second) {
1848	// If we have not seen the use block, create a load instruction to reload
1849	// the spilled value from the coroutine frame. Populates the Value pointer
1850	// reference provided with the frame GEP.
1851	if (CurrentBlock != U->getParent()) {
1852	CurrentBlock = U->getParent();
1853	Builder.SetInsertPoint(TheBB: CurrentBlock,
1854	IP: CurrentBlock->getFirstInsertionPt());
1855
1856	auto *GEP = GetFramePointer (E.first);
1857	GEP->setName(E.first->getName() + Twine (".reload.addr"));
1858	if (ByValTy)
1859	CurrentReload = GEP;
1860	else
1861	CurrentReload = Builder.CreateAlignedLoad(
1862	Ty: FrameTy->getElementType(N: FrameData.getFieldIndex(V: E.first)), Ptr: GEP,
1863	Align: SpillAlignment, Name: E.first->getName() + Twine (".reload"));
1864
1865	TinyPtrVector<DbgDeclareInst *> DIs = findDbgDeclares(V: Def);
1866	TinyPtrVector<DbgVariableRecord *> DVRs = findDVRDeclares(V: Def);
1867	// Try best to find dbg.declare. If the spill is a temp, there may not
1868	// be a direct dbg.declare. Walk up the load chain to find one from an
1869	// alias.
1870	if (F->getSubprogram()) {
1871	auto *CurDef = Def;
1872	while (DIs.empty() && DVRs.empty() && isa<LoadInst>(Val: CurDef)) {
1873	auto *LdInst = cast<LoadInst>(Val: CurDef);
1874	// Only consider ptr to ptr same type load.
1875	if (LdInst->getPointerOperandType() != LdInst->getType())
1876	break;
1877	CurDef = LdInst->getPointerOperand();
1878	if (!isa<AllocaInst, LoadInst>(Val: CurDef))
1879	break;
1880	DIs = findDbgDeclares(V: CurDef);
1881	DVRs = findDVRDeclares(V: CurDef);
1882	}
1883	}
1884
1885	auto SalvageOne = [&](auto *DDI) {
1886	bool AllowUnresolved = false;
1887	// This dbg.declare is preserved for all coro-split function
1888	// fragments. It will be unreachable in the main function, and
1889	// processed by coro::salvageDebugInfo() by CoroCloner.
1890	if (UseNewDbgInfoFormat) {
1891	DbgVariableRecord NewDVR = new* DbgVariableRecord(
1892	ValueAsMetadata::get(V: CurrentReload), DDI->getVariable(),
1893	DDI->getExpression(), DDI->getDebugLoc(),
1894	DbgVariableRecord::LocationType::Declare);
1895	Builder.GetInsertPoint()->getParent()->insertDbgRecordBefore(
1896	DR: NewDVR, Here: Builder.GetInsertPoint());
1897	} else {
1898	DIBuilder (*CurrentBlock->getParent()->getParent(), AllowUnresolved)
1899	.insertDeclare(CurrentReload, DDI->getVariable(),
1900	DDI->getExpression(), DDI->getDebugLoc(),
1901	&*Builder.GetInsertPoint());
1902	}
1903	// This dbg.declare is for the main function entry point. It
1904	// will be deleted in all coro-split functions.
1905	coro::salvageDebugInfo(ArgToAllocaMap, *DDI, Shape.OptimizeFrame,
1906	false /UseEntryValue/);
1907	};
1908	for_each(Range&: DIs, F: SalvageOne);
1909	for_each(Range&: DVRs, F: SalvageOne);
1910	}
1911
1912	// If we have a single edge PHINode, remove it and replace it with a
1913	// reload from the coroutine frame. (We already took care of multi edge
1914	// PHINodes by rewriting them in the rewritePHIs function).
1915	if (auto *PN = dyn_cast<PHINode>(Val: U)) {
1916	assert(PN->getNumIncomingValues() == `1` &&
1917	"unexpected number of incoming "
1918	"values in the PHINode");
1919	PN->replaceAllUsesWith(V: CurrentReload);
1920	PN->eraseFromParent();
1921	continue;
1922	}
1923
1924	// Replace all uses of CurrentValue in the current instruction with
1925	// reload.
1926	U->replaceUsesOfWith(From: Def, To: CurrentReload);
1927	// Instructions are added to Def's user list if the attached
1928	// debug records use Def. Update those now.
1929	for (DbgVariableRecord &DVR : filterDbgVars(R: U->getDbgRecordRange()))
1930	DVR.replaceVariableLocationOp(OldValue: Def, NewValue: CurrentReload, AllowEmpty: true);
1931	}
1932	}
1933
1934	BasicBlock *FramePtrBB = Shape.getInsertPtAfterFramePtr()->getParent();
1935
1936	auto SpillBlock = FramePtrBB->splitBasicBlock(
1937	I: Shape.getInsertPtAfterFramePtr(), BBName: "AllocaSpillBB");
1938	SpillBlock->splitBasicBlock(I: &SpillBlock->front(), BBName: "PostSpill");
1939	Shape.AllocaSpillBlock = SpillBlock;
1940
1941	// retcon and retcon.once lowering assumes all uses have been sunk.
1942	if (Shape.ABI == coro::ABI::Retcon \|\| Shape.ABI == coro::ABI::RetconOnce \|\|
1943	Shape.ABI == coro::ABI::Async) {
1944	// If we found any allocas, replace all of their remaining uses with Geps.
1945	Builder.SetInsertPoint(TheBB: SpillBlock, IP: SpillBlock->begin());
1946	for (const auto &P : FrameData.Allocas) {
1947	AllocaInst *Alloca = P.Alloca;
1948	auto *G = GetFramePointer (Alloca);
1949
1950	// We are not using ReplaceInstWithInst(P.first, cast<Instruction>(G))
1951	// here, as we are changing location of the instruction.
1952	G->takeName(V: Alloca);
1953	Alloca->replaceAllUsesWith(V: G);
1954	Alloca->eraseFromParent();
1955	}
1956	return;
1957	}
1958
1959	// If we found any alloca, replace all of their remaining uses with GEP
1960	// instructions. To remain debugbility, we replace the uses of allocas for
1961	// dbg.declares and dbg.values with the reload from the frame.
1962	// Note: We cannot replace the alloca with GEP instructions indiscriminately,
1963	// as some of the uses may not be dominated by CoroBegin.
1964	Builder.SetInsertPoint(TheBB: Shape.AllocaSpillBlock,
1965	IP: Shape.AllocaSpillBlock->begin());
1966	SmallVector<Instruction *, `4`> UsersToUpdate;
1967	for (const auto &A : FrameData.Allocas) {
1968	AllocaInst *Alloca = A.Alloca;
1969	UsersToUpdate.clear();
1970	for (User *U : Alloca->users()) {
1971	auto *I = cast<Instruction>(Val: U);
1972	if (DT.dominates(Def: CB, User: I))
1973	UsersToUpdate.push_back(Elt: I);
1974	}
1975	if (UsersToUpdate.empty())
1976	continue;
1977	auto *G = GetFramePointer (Alloca);
1978	G->setName(Alloca->getName() + Twine (".reload.addr"));
1979
1980	SmallVector<DbgVariableIntrinsic *, `4`> DIs;
1981	SmallVector<DbgVariableRecord *> DbgVariableRecords;
1982	findDbgUsers(DbgInsts&: DIs, V: Alloca, DbgVariableRecords: &DbgVariableRecords);
1983	for (auto *DVI : DIs)
1984	DVI->replaceUsesOfWith(From: Alloca, To: G);
1985	for (auto *DVR : DbgVariableRecords)
1986	DVR->replaceVariableLocationOp(OldValue: Alloca, NewValue: G);
1987
1988	for (Instruction *I : UsersToUpdate) {
1989	// It is meaningless to retain the lifetime intrinsics refer for the
1990	// member of coroutine frames and the meaningless lifetime intrinsics
1991	// are possible to block further optimizations.
1992	if (I->isLifetimeStartOrEnd()) {
1993	I->eraseFromParent();
1994	continue;
1995	}
1996
1997	I->replaceUsesOfWith(From: Alloca, To: G);
1998	}
1999	}
2000	Builder.SetInsertPoint(&*Shape.getInsertPtAfterFramePtr());
2001	for (const auto &A : FrameData.Allocas) {
2002	AllocaInst *Alloca = A.Alloca;
2003	if (A.MayWriteBeforeCoroBegin) {
2004	// isEscaped really means potentially modified before CoroBegin.
2005	if (Alloca->isArrayAllocation())
2006	report_fatal_error(
2007	reason: "Coroutines cannot handle copying of array allocas yet");
2008
2009	auto *G = GetFramePointer (Alloca);
2010	auto *Value = Builder.CreateLoad(Ty: Alloca->getAllocatedType(), Ptr: Alloca);
2011	Builder.CreateStore(Val: Value, Ptr: G);
2012	}
2013	// For each alias to Alloca created before CoroBegin but used after
2014	// CoroBegin, we recreate them after CoroBegin by appplying the offset
2015	// to the pointer in the frame.
2016	for (const auto &Alias : A.Aliases) {
2017	auto *FramePtr = GetFramePointer (Alloca);
2018	auto &Value = *Alias.second;
2019	auto ITy = IntegerType::get(C, NumBits: Value.getBitWidth());
2020	auto *AliasPtr =
2021	Builder.CreatePtrAdd(Ptr: FramePtr, Offset: ConstantInt::get(Ty: ITy, V: Value));
2022	Alias.first->replaceUsesWithIf(
2023	New: AliasPtr, ShouldReplace: [&](Use &U) { return DT.dominates(Def: CB, U); });
2024	}
2025	}
2026
2027	// PromiseAlloca is not collected in FrameData.Allocas. So we don't handle
2028	// the case that the PromiseAlloca may have writes before CoroBegin in the
2029	// above codes. And it may be problematic in edge cases. See
2030	// https://github.com/llvm/llvm-project/issues/57861 for an example.
2031	if (Shape.ABI == coro::ABI::Switch && Shape.SwitchLowering.PromiseAlloca) {
2032	AllocaInst *PA = Shape.SwitchLowering.PromiseAlloca;
2033	// If there is memory accessing to promise alloca before CoroBegin;
2034	bool HasAccessingPromiseBeforeCB = llvm::any_of(Range: PA->uses(), P: [&](Use &U) {
2035	auto *Inst = dyn_cast<Instruction>(Val: U.getUser());
2036	if (!Inst \|\| DT.dominates(Def: CB, User: Inst))
2037	return false;
2038
2039	if (auto *CI = dyn_cast<CallInst>(Val: Inst)) {
2040	// It is fine if the call wouldn't write to the Promise.
2041	// This is possible for @llvm.coro.id intrinsics, which
2042	// would take the promise as the second argument as a
2043	// marker.
2044	if (CI->onlyReadsMemory() \|\|
2045	CI->onlyReadsMemory(OpNo: CI->getArgOperandNo(U: &U)))
2046	return false;
2047	return true;
2048	}
2049
2050	return isa<StoreInst>(Val: Inst) \|\|
2051	// It may take too much time to track the uses.
2052	// Be conservative about the case the use may escape.
2053	isa<GetElementPtrInst>(Val: Inst) \|\|
2054	// There would always be a bitcast for the promise alloca
2055	// before we enabled Opaque pointers. And now given
2056	// opaque pointers are enabled by default. This should be
2057	// fine.
2058	isa<BitCastInst>(Val: Inst);
2059	});
2060	if (HasAccessingPromiseBeforeCB) {
2061	Builder.SetInsertPoint(&*Shape.getInsertPtAfterFramePtr());
2062	auto *G = GetFramePointer (PA);
2063	auto *Value = Builder.CreateLoad(Ty: PA->getAllocatedType(), Ptr: PA);
2064	Builder.CreateStore(Val: Value, Ptr: G);
2065	}
2066	}
2067	}
2068
2069	// Moves the values in the PHIs in SuccBB that correspong to PredBB into a new
2070	// PHI in InsertedBB.
2071	static void movePHIValuesToInsertedBlock(BasicBlock *SuccBB,
2072	BasicBlock *InsertedBB,
2073	BasicBlock *PredBB,
2074	PHINode UntilPHI = nullptr*) {
2075	auto *PN = cast<PHINode>(Val: &SuccBB->front());
2076	do {
2077	int Index = PN->getBasicBlockIndex(BB: InsertedBB);
2078	Value *V = PN->getIncomingValue(i: Index);
2079	PHINode *InputV = PHINode::Create(
2080	Ty: V->getType(), NumReservedValues: `1`, NameStr: V->getName() + Twine (".") + SuccBB->getName());
2081	InputV->insertBefore(InsertPos: InsertedBB->begin());
2082	InputV->addIncoming(V, BB: PredBB);
2083	PN->setIncomingValue(i: Index, V: InputV);
2084	PN = dyn_cast<PHINode>(Val: PN->getNextNode());
2085	} while (PN != UntilPHI);
2086	}
2087
2088	// Rewrites the PHI Nodes in a cleanuppad.
2089	static void rewritePHIsForCleanupPad(BasicBlock *CleanupPadBB,
2090	CleanupPadInst *CleanupPad) {
2091	// For every incoming edge to a CleanupPad we will create a new block holding
2092	// all incoming values in single-value PHI nodes. We will then create another
2093	// block to act as a dispather (as all unwind edges for related EH blocks
2094	// must be the same).
2095	//
2096	// cleanuppad:
2097	// %2 = phi i32[%0, %catchswitch], [%1, %catch.1]
2098	// %3 = cleanuppad within none []
2099	//
2100	// It will create:
2101	//
2102	// cleanuppad.corodispatch
2103	// %2 = phi i8[0, %catchswitch], [1, %catch.1]
2104	// %3 = cleanuppad within none []
2105	// switch i8 % 2, label %unreachable
2106	// [i8 0, label %cleanuppad.from.catchswitch
2107	// i8 1, label %cleanuppad.from.catch.1]
2108	// cleanuppad.from.catchswitch:
2109	// %4 = phi i32 [%0, %catchswitch]
2110	// br %label cleanuppad
2111	// cleanuppad.from.catch.1:
2112	// %6 = phi i32 [%1, %catch.1]
2113	// br %label cleanuppad
2114	// cleanuppad:
2115	// %8 = phi i32 [%4, %cleanuppad.from.catchswitch],
2116	// [%6, %cleanuppad.from.catch.1]
2117
2118	// Unreachable BB, in case switching on an invalid value in the dispatcher.
2119	auto *UnreachBB = BasicBlock::Create(
2120	Context&: CleanupPadBB->getContext(), Name: "unreachable", Parent: CleanupPadBB->getParent());
2121	IRBuilder<> Builder(UnreachBB);
2122	Builder.CreateUnreachable();
2123
2124	// Create a new cleanuppad which will be the dispatcher.
2125	auto *NewCleanupPadBB =
2126	BasicBlock::Create(Context&: CleanupPadBB->getContext(),
2127	Name: CleanupPadBB->getName() + Twine (".corodispatch"),
2128	Parent: CleanupPadBB->getParent(), InsertBefore: CleanupPadBB);
2129	Builder.SetInsertPoint(NewCleanupPadBB);
2130	auto *SwitchType = Builder.getInt8Ty();
2131	auto *SetDispatchValuePN =
2132	Builder.CreatePHI(Ty: SwitchType, NumReservedValues: pred_size(BB: CleanupPadBB));
2133	CleanupPad->removeFromParent();
2134	CleanupPad->insertAfter(InsertPos: SetDispatchValuePN);
2135	auto *SwitchOnDispatch = Builder.CreateSwitch(V: SetDispatchValuePN, Dest: UnreachBB,
2136	NumCases: pred_size(BB: CleanupPadBB));
2137
2138	int SwitchIndex = `0`;
2139	SmallVector<BasicBlock *, `8`> Preds(predecessors(BB: CleanupPadBB));
2140	for (BasicBlock *Pred : Preds) {
2141	// Create a new cleanuppad and move the PHI values to there.
2142	auto *CaseBB = BasicBlock::Create(Context&: CleanupPadBB->getContext(),
2143	Name: CleanupPadBB->getName() +
2144	Twine (".from.") + Pred->getName(),
2145	Parent: CleanupPadBB->getParent(), InsertBefore: CleanupPadBB);
2146	updatePhiNodes(DestBB: CleanupPadBB, OldPred: Pred, NewPred: CaseBB);
2147	CaseBB->setName(CleanupPadBB->getName() + Twine (".from.") +
2148	Pred->getName());
2149	Builder.SetInsertPoint(CaseBB);
2150	Builder.CreateBr(Dest: CleanupPadBB);
2151	movePHIValuesToInsertedBlock(SuccBB: CleanupPadBB, InsertedBB: CaseBB, PredBB: NewCleanupPadBB);
2152
2153	// Update this Pred to the new unwind point.
2154	setUnwindEdgeTo(TI: Pred->getTerminator(), Succ: NewCleanupPadBB);
2155
2156	// Setup the switch in the dispatcher.
2157	auto *SwitchConstant = ConstantInt::get(Ty: SwitchType, V: SwitchIndex);
2158	SetDispatchValuePN->addIncoming(V: SwitchConstant, BB: Pred);
2159	SwitchOnDispatch->addCase(OnVal: SwitchConstant, Dest: CaseBB);
2160	SwitchIndex++;
2161	}
2162	}
2163
2164	static void cleanupSinglePredPHIs(Function &F) {
2165	SmallVector<PHINode *, `32`> Worklist;
2166	for (auto &BB : F) {
2167	for (auto &Phi : BB.phis()) {
2168	if (Phi.getNumIncomingValues() == `1`) {
2169	Worklist.push_back(Elt: &Phi);
2170	} else
2171	break;
2172	}
2173	}
2174	while (!Worklist.empty()) {
2175	auto *Phi = Worklist.pop_back_val();
2176	auto *OriginalValue = Phi->getIncomingValue(i: `0`);
2177	Phi->replaceAllUsesWith(V: OriginalValue);
2178	}
2179	}
2180
2181	static void rewritePHIs(BasicBlock &BB) {
2182	// For every incoming edge we will create a block holding all
2183	// incoming values in a single PHI nodes.
2184	//
2185	// loop:
2186	// %n.val = phi i32[%n, %entry], [%inc, %loop]
2187	//
2188	// It will create:
2189	//
2190	// loop.from.entry:
2191	// %n.loop.pre = phi i32 [%n, %entry]
2192	// br %label loop
2193	// loop.from.loop:
2194	// %inc.loop.pre = phi i32 [%inc, %loop]
2195	// br %label loop
2196	//
2197	// After this rewrite, further analysis will ignore any phi nodes with more
2198	// than one incoming edge.
2199
2200	// TODO: Simplify PHINodes in the basic block to remove duplicate
2201	// predecessors.
2202
2203	// Special case for CleanupPad: all EH blocks must have the same unwind edge
2204	// so we need to create an additional "dispatcher" block.
2205	if (auto *CleanupPad =
2206	dyn_cast_or_null<CleanupPadInst>(Val: BB.getFirstNonPHI())) {
2207	SmallVector<BasicBlock *, `8`> Preds(predecessors(BB: &BB));
2208	for (BasicBlock *Pred : Preds) {
2209	if (CatchSwitchInst *CS =
2210	dyn_cast<CatchSwitchInst>(Val: Pred->getTerminator())) {
2211	// CleanupPad with a CatchSwitch predecessor: therefore this is an
2212	// unwind destination that needs to be handle specially.
2213	assert(CS->getUnwindDest() == &BB);
2214	(void)CS;
2215	rewritePHIsForCleanupPad(CleanupPadBB: &BB, CleanupPad);
2216	return;
2217	}
2218	}
2219	}
2220
2221	LandingPadInst LandingPad = nullptr*;
2222	PHINode ReplPHI = nullptr*;
2223	if ((LandingPad = dyn_cast_or_null<LandingPadInst>(Val: BB.getFirstNonPHI()))) {
2224	// ehAwareSplitEdge will clone the LandingPad in all the edge blocks.
2225	// We replace the original landing pad with a PHINode that will collect the
2226	// results from all of them.
2227	ReplPHI = PHINode::Create(Ty: LandingPad->getType(), NumReservedValues: `1`, NameStr: "");
2228	ReplPHI->insertBefore(InsertPos: LandingPad->getIterator());
2229	ReplPHI->takeName(V: LandingPad);
2230	LandingPad->replaceAllUsesWith(V: ReplPHI);
2231	// We will erase the original landing pad at the end of this function after
2232	// ehAwareSplitEdge cloned it in the transition blocks.
2233	}
2234
2235	SmallVector<BasicBlock *, `8`> Preds(predecessors(BB: &BB));
2236	for (BasicBlock *Pred : Preds) {
2237	auto *IncomingBB = ehAwareSplitEdge(BB: Pred, Succ: &BB, OriginalPad: LandingPad, LandingPadReplacement: ReplPHI);
2238	IncomingBB->setName(BB.getName() + Twine (".from.") + Pred->getName());
2239
2240	// Stop the moving of values at ReplPHI, as this is either null or the PHI
2241	// that replaced the landing pad.
2242	movePHIValuesToInsertedBlock(SuccBB: &BB, InsertedBB: IncomingBB, PredBB: Pred, UntilPHI: ReplPHI);
2243	}
2244
2245	if (LandingPad) {
2246	// Calls to ehAwareSplitEdge function cloned the original lading pad.
2247	// No longer need it.
2248	LandingPad->eraseFromParent();
2249	}
2250	}
2251
2252	static void rewritePHIs(Function &F) {
2253	SmallVector<BasicBlock *, `8`> WorkList;
2254
2255	for (BasicBlock &BB : F)
2256	if (auto *PN = dyn_cast<PHINode>(Val: &BB.front()))
2257	if (PN->getNumIncomingValues() > `1`)
2258	WorkList.push_back(Elt: &BB);
2259
2260	for (BasicBlock *BB : WorkList)
2261	rewritePHIs(BB&: *BB);
2262	}
2263
2264	/// Default materializable callback
2265	// Check for instructions that we can recreate on resume as opposed to spill
2266	// the result into a coroutine frame.
2267	bool coro::defaultMaterializable(Instruction &V) {
2268	return (isa<CastInst>(Val: &V) \|\| isa<GetElementPtrInst>(Val: &V) \|\|
2269	isa<BinaryOperator>(Val: &V) \|\| isa<CmpInst>(Val: &V) \|\| isa<SelectInst>(Val: &V));
2270	}
2271
2272	// Check for structural coroutine intrinsics that should not be spilled into
2273	// the coroutine frame.
2274	static bool isCoroutineStructureIntrinsic(Instruction &I) {
2275	return isa<CoroIdInst>(Val: &I) \|\| isa<CoroSaveInst>(Val: &I) \|\|
2276	isa<CoroSuspendInst>(Val: &I);
2277	}
2278
2279	// For each instruction identified as materializable across the suspend point,
2280	// and its associated DAG of other rematerializable instructions,
2281	// recreate the DAG of instructions after the suspend point.
2282	static void rewriteMaterializableInstructions(
2283	const SmallMapVector<Instruction *, std::unique_ptr<RematGraph>, `8`>
2284	&AllRemats) {
2285	// This has to be done in 2 phases
2286	// Do the remats and record the required defs to be replaced in the
2287	// original use instructions
2288	// Once all the remats are complete, replace the uses in the final
2289	// instructions with the new defs
2290	typedef struct {
2291	Instruction *Use;
2292	Instruction *Def;
2293	Instruction *Remat;
2294	} ProcessNode;
2295
2296	SmallVector<ProcessNode> FinalInstructionsToProcess;
2297
2298	for (const auto &E : AllRemats) {
2299	Instruction *Use = E.first;
2300	Instruction CurrentMaterialization = nullptr*;
2301	RematGraph *RG = E.second.get();
2302	ReversePostOrderTraversal<RematGraph *> RPOT(RG);
2303	SmallVector<Instruction *> InstructionsToProcess;
2304
2305	// If the target use is actually a suspend instruction then we have to
2306	// insert the remats into the end of the predecessor (there should only be
2307	// one). This is so that suspend blocks always have the suspend instruction
2308	// as the first instruction.
2309	auto InsertPoint = &*Use->getParent()->getFirstInsertionPt();
2310	if (isa<AnyCoroSuspendInst>(Val: Use)) {
2311	BasicBlock *SuspendPredecessorBlock =
2312	Use->getParent()->getSinglePredecessor();
2313	assert(SuspendPredecessorBlock && "malformed coro suspend instruction");
2314	InsertPoint = SuspendPredecessorBlock->getTerminator();
2315	}
2316
2317	// Note: skip the first instruction as this is the actual use that we're
2318	// rematerializing everything for.
2319	auto I = RPOT.begin();
2320	++I;
2321	for (; I != RPOT.end(); ++I) {
2322	Instruction D = (I)->Node;
2323	CurrentMaterialization = D->clone();
2324	CurrentMaterialization->setName(D->getName());
2325	CurrentMaterialization->insertBefore(InsertPos: InsertPoint);
2326	InsertPoint = CurrentMaterialization;
2327
2328	// Replace all uses of Def in the instructions being added as part of this
2329	// rematerialization group
2330	for (auto &I : InstructionsToProcess)
2331	I->replaceUsesOfWith(From: D, To: CurrentMaterialization);
2332
2333	// Don't replace the final use at this point as this can cause problems
2334	// for other materializations. Instead, for any final use that uses a
2335	// define that's being rematerialized, record the replace values
2336	for (unsigned i = `0`, E = Use->getNumOperands(); i != E; ++i)
2337	if (Use->getOperand(i) == D) // Is this operand pointing to oldval?
2338	FinalInstructionsToProcess.push_back(
2339	Elt: {.Use: Use, .Def: D, .Remat: CurrentMaterialization});
2340
2341	InstructionsToProcess.push_back(Elt: CurrentMaterialization);
2342	}
2343	}
2344
2345	// Finally, replace the uses with the defines that we've just rematerialized
2346	for (auto &R : FinalInstructionsToProcess) {
2347	if (auto *PN = dyn_cast<PHINode>(Val: R.Use)) {
2348	assert(PN->getNumIncomingValues() == `1` && "unexpected number of incoming "
2349	"values in the PHINode");
2350	PN->replaceAllUsesWith(V: R.Remat);
2351	PN->eraseFromParent();
2352	continue;
2353	}
2354	R.Use->replaceUsesOfWith(From: R.Def, To: R.Remat);
2355	}
2356	}
2357
2358	// Splits the block at a particular instruction unless it is the first
2359	// instruction in the block with a single predecessor.
2360	static BasicBlock splitBlockIfNotFirst(Instruction I, const Twine &Name) {
2361	auto *BB = I->getParent();
2362	if (&BB->front() == I) {
2363	if (BB->getSinglePredecessor()) {
2364	BB->setName(Name);
2365	return BB;
2366	}
2367	}
2368	return BB->splitBasicBlock(I, BBName: Name);
2369	}
2370
2371	// Split above and below a particular instruction so that it
2372	// will be all alone by itself in a block.
2373	static void splitAround(Instruction I, const* Twine &Name) {
2374	splitBlockIfNotFirst(I, Name);
2375	splitBlockIfNotFirst(I: I->getNextNode(), Name: "After" + Name);
2376	}
2377
2378	static bool isSuspendBlock(BasicBlock *BB) {
2379	return isa<AnyCoroSuspendInst>(Val: BB->front());
2380	}
2381
2382	typedef SmallPtrSet<BasicBlock*, `8`> VisitedBlocksSet;
2383
2384	/// Does control flow starting at the given block ever reach a suspend
2385	/// instruction before reaching a block in VisitedOrFreeBBs?
2386	static bool isSuspendReachableFrom(BasicBlock *From,
2387	VisitedBlocksSet &VisitedOrFreeBBs) {
2388	// Eagerly try to add this block to the visited set. If it's already
2389	// there, stop recursing; this path doesn't reach a suspend before
2390	// either looping or reaching a freeing block.
2391	if (!VisitedOrFreeBBs.insert(Ptr: From).second)
2392	return false;
2393
2394	// We assume that we'll already have split suspends into their own blocks.
2395	if (isSuspendBlock(BB: From))
2396	return true;
2397
2398	// Recurse on the successors.
2399	for (auto *Succ : successors(BB: From)) {
2400	if (isSuspendReachableFrom(From: Succ, VisitedOrFreeBBs))
2401	return true;
2402	}
2403
2404	return false;
2405	}
2406
2407	/// Is the given alloca "local", i.e. bounded in lifetime to not cross a
2408	/// suspend point?
2409	static bool isLocalAlloca(CoroAllocaAllocInst *AI) {
2410	// Seed the visited set with all the basic blocks containing a free
2411	// so that we won't pass them up.
2412	VisitedBlocksSet VisitedOrFreeBBs;
2413	for (auto *User : AI->users()) {
2414	if (auto FI = dyn_cast<CoroAllocaFreeInst>(Val: User))
2415	VisitedOrFreeBBs.insert(Ptr: FI->getParent());
2416	}
2417
2418	return !isSuspendReachableFrom(From: AI->getParent(), VisitedOrFreeBBs);
2419	}
2420
2421	/// After we split the coroutine, will the given basic block be along
2422	/// an obvious exit path for the resumption function?
2423	static bool willLeaveFunctionImmediatelyAfter(BasicBlock *BB,
2424	unsigned depth = `3`) {
2425	// If we've bottomed out our depth count, stop searching and assume
2426	// that the path might loop back.
2427	if (depth == `0`) return false;
2428
2429	// If this is a suspend block, we're about to exit the resumption function.
2430	if (isSuspendBlock(BB)) return true;
2431
2432	// Recurse into the successors.
2433	for (auto *Succ : successors(BB)) {
2434	if (!willLeaveFunctionImmediatelyAfter(BB: Succ, depth: depth - `1`))
2435	return false;
2436	}
2437
2438	// If none of the successors leads back in a loop, we're on an exit/abort.
2439	return true;
2440	}
2441
2442	static bool localAllocaNeedsStackSave(CoroAllocaAllocInst *AI) {
2443	// Look for a free that isn't sufficiently obviously followed by
2444	// either a suspend or a termination, i.e. something that will leave
2445	// the coro resumption frame.
2446	for (auto *U : AI->users()) {
2447	auto FI = dyn_cast<CoroAllocaFreeInst>(Val: U);
2448	if (!FI) continue;
2449
2450	if (!willLeaveFunctionImmediatelyAfter(BB: FI->getParent()))
2451	return true;
2452	}
2453
2454	// If we never found one, we don't need a stack save.
2455	return false;
2456	}
2457
2458	/// Turn each of the given local allocas into a normal (dynamic) alloca
2459	/// instruction.
2460	static void lowerLocalAllocas(ArrayRef<CoroAllocaAllocInst*> LocalAllocas,
2461	SmallVectorImpl<Instruction*> &DeadInsts) {
2462	for (auto *AI : LocalAllocas) {
2463	IRBuilder<> Builder(AI);
2464
2465	// Save the stack depth. Try to avoid doing this if the stackrestore
2466	// is going to immediately precede a return or something.
2467	Value StackSave = nullptr*;
2468	if (localAllocaNeedsStackSave(AI))
2469	StackSave = Builder.CreateStackSave();
2470
2471	// Allocate memory.
2472	auto Alloca = Builder.CreateAlloca(Ty: Builder.getInt8Ty(), ArraySize: AI->getSize());
2473	Alloca->setAlignment(AI->getAlignment());
2474
2475	for (auto *U : AI->users()) {
2476	// Replace gets with the allocation.
2477	if (isa<CoroAllocaGetInst>(Val: U)) {
2478	U->replaceAllUsesWith(V: Alloca);
2479
2480	// Replace frees with stackrestores. This is safe because
2481	// alloca.alloc is required to obey a stack discipline, although we
2482	// don't enforce that structurally.
2483	} else {
2484	auto FI = cast<CoroAllocaFreeInst>(Val: U);
2485	if (StackSave) {
2486	Builder.SetInsertPoint(FI);
2487	Builder.CreateStackRestore(Ptr: StackSave);
2488	}
2489	}
2490	DeadInsts.push_back(Elt: cast<Instruction>(Val: U));
2491	}
2492
2493	DeadInsts.push_back(Elt: AI);
2494	}
2495	}
2496
2497	/// Turn the given coro.alloca.alloc call into a dynamic allocation.
2498	/// This happens during the all-instructions iteration, so it must not
2499	/// delete the call.
2500	static Instruction lowerNonLocalAlloca(CoroAllocaAllocInst AI,
2501	coro::Shape &Shape,
2502	SmallVectorImpl<Instruction*> &DeadInsts) {
2503	IRBuilder<> Builder(AI);
2504	auto Alloc = Shape.emitAlloc(Builder, Size: AI->getSize(), CG: nullptr);
2505
2506	for (User *U : AI->users()) {
2507	if (isa<CoroAllocaGetInst>(Val: U)) {
2508	U->replaceAllUsesWith(V: Alloc);
2509	} else {
2510	auto FI = cast<CoroAllocaFreeInst>(Val: U);
2511	Builder.SetInsertPoint(FI);
2512	Shape.emitDealloc(Builder, Ptr: Alloc, CG: nullptr);
2513	}
2514	DeadInsts.push_back(Elt: cast<Instruction>(Val: U));
2515	}
2516
2517	// Push this on last so that it gets deleted after all the others.
2518	DeadInsts.push_back(Elt: AI);
2519
2520	// Return the new allocation value so that we can check for needed spills.
2521	return cast<Instruction>(Val: Alloc);
2522	}
2523
2524	/// Get the current swifterror value.
2525	static Value emitGetSwiftErrorValue(IRBuilder<> &Builder, Type ValueTy,
2526	coro::Shape &Shape) {
2527	// Make a fake function pointer as a sort of intrinsic.
2528	auto FnTy = FunctionType::get(Result: ValueTy, Params: {}, isVarArg: false);
2529	auto Fn = ConstantPointerNull::get(T: Builder.getPtrTy());
2530
2531	auto Call = Builder.CreateCall(FTy: FnTy, Callee: Fn, Args: {});
2532	Shape.SwiftErrorOps.push_back(Elt: Call);
2533
2534	return Call;
2535	}
2536
2537	/// Set the given value as the current swifterror value.
2538	///
2539	/// Returns a slot that can be used as a swifterror slot.
2540	static Value emitSetSwiftErrorValue(IRBuilder<> &Builder, Value V,
2541	coro::Shape &Shape) {
2542	// Make a fake function pointer as a sort of intrinsic.
2543	auto FnTy = FunctionType::get(Result: Builder.getPtrTy(),
2544	Params: {V->getType()}, isVarArg: false);
2545	auto Fn = ConstantPointerNull::get(T: Builder.getPtrTy());
2546
2547	auto Call = Builder.CreateCall(FTy: FnTy, Callee: Fn, Args: { V });
2548	Shape.SwiftErrorOps.push_back(Elt: Call);
2549
2550	return Call;
2551	}
2552
2553	/// Set the swifterror value from the given alloca before a call,
2554	/// then put in back in the alloca afterwards.
2555	///
2556	/// Returns an address that will stand in for the swifterror slot
2557	/// until splitting.
2558	static Value emitSetAndGetSwiftErrorValueAround(Instruction Call,
2559	AllocaInst *Alloca,
2560	coro::Shape &Shape) {
2561	auto ValueTy = Alloca->getAllocatedType();
2562	IRBuilder<> Builder(Call);
2563
2564	// Load the current value from the alloca and set it as the
2565	// swifterror value.
2566	auto ValueBeforeCall = Builder.CreateLoad(Ty: ValueTy, Ptr: Alloca);
2567	auto Addr = emitSetSwiftErrorValue(Builder, V: ValueBeforeCall, Shape);
2568
2569	// Move to after the call. Since swifterror only has a guaranteed
2570	// value on normal exits, we can ignore implicit and explicit unwind
2571	// edges.
2572	if (isa<CallInst>(Val: Call)) {
2573	Builder.SetInsertPoint(Call->getNextNode());
2574	} else {
2575	auto Invoke = cast<InvokeInst>(Val: Call);
2576	Builder.SetInsertPoint(Invoke->getNormalDest()->getFirstNonPHIOrDbg());
2577	}
2578
2579	// Get the current swifterror value and store it to the alloca.
2580	auto ValueAfterCall = emitGetSwiftErrorValue(Builder, ValueTy, Shape);
2581	Builder.CreateStore(Val: ValueAfterCall, Ptr: Alloca);
2582
2583	return Addr;
2584	}
2585
2586	/// Eliminate a formerly-swifterror alloca by inserting the get/set
2587	/// intrinsics and attempting to MemToReg the alloca away.
2588	static void eliminateSwiftErrorAlloca(Function &F, AllocaInst *Alloca,
2589	coro::Shape &Shape) {
2590	for (Use &Use : llvm::make_early_inc_range(Range: Alloca->uses())) {
2591	// swifterror values can only be used in very specific ways.
2592	// We take advantage of that here.
2593	auto User = Use.getUser();
2594	if (isa<LoadInst>(Val: User) \|\| isa<StoreInst>(Val: User))
2595	continue;
2596
2597	assert(isa<CallInst>(User) \|\| isa<InvokeInst>(User));
2598	auto Call = cast<Instruction>(Val: User);
2599
2600	auto Addr = emitSetAndGetSwiftErrorValueAround(Call, Alloca, Shape);
2601
2602	// Use the returned slot address as the call argument.
2603	Use.set(Addr);
2604	}
2605
2606	// All the uses should be loads and stores now.
2607	assert(isAllocaPromotable(Alloca));
2608	}
2609
2610	/// "Eliminate" a swifterror argument by reducing it to the alloca case
2611	/// and then loading and storing in the prologue and epilog.
2612	///
2613	/// The argument keeps the swifterror flag.
2614	static void eliminateSwiftErrorArgument(Function &F, Argument &Arg,
2615	coro::Shape &Shape,
2616	SmallVectorImpl<AllocaInst*> &AllocasToPromote) {
2617	IRBuilder<> Builder(F.getEntryBlock().getFirstNonPHIOrDbg());
2618
2619	auto ArgTy = cast<PointerType>(Val: Arg.getType());
2620	auto ValueTy = PointerType::getUnqual(C&: F.getContext());
2621
2622	// Reduce to the alloca case:
2623
2624	// Create an alloca and replace all uses of the arg with it.
2625	auto Alloca = Builder.CreateAlloca(Ty: ValueTy, AddrSpace: ArgTy->getAddressSpace());
2626	Arg.replaceAllUsesWith(V: Alloca);
2627
2628	// Set an initial value in the alloca. swifterror is always null on entry.
2629	auto InitialValue = Constant::getNullValue(Ty: ValueTy);
2630	Builder.CreateStore(Val: InitialValue, Ptr: Alloca);
2631
2632	// Find all the suspends in the function and save and restore around them.
2633	for (auto *Suspend : Shape.CoroSuspends) {
2634	(void) emitSetAndGetSwiftErrorValueAround(Call: Suspend, Alloca, Shape);
2635	}
2636
2637	// Find all the coro.ends in the function and restore the error value.
2638	for (auto *End : Shape.CoroEnds) {
2639	Builder.SetInsertPoint(End);
2640	auto FinalValue = Builder.CreateLoad(Ty: ValueTy, Ptr: Alloca);
2641	(void) emitSetSwiftErrorValue(Builder, V: FinalValue, Shape);
2642	}
2643
2644	// Now we can use the alloca logic.
2645	AllocasToPromote.push_back(Elt: Alloca);
2646	eliminateSwiftErrorAlloca(F, Alloca, Shape);
2647	}
2648
2649	/// Eliminate all problematic uses of swifterror arguments and allocas
2650	/// from the function. We'll fix them up later when splitting the function.
2651	static void eliminateSwiftError(Function &F, coro::Shape &Shape) {
2652	SmallVector<AllocaInst*, `4`> AllocasToPromote;
2653
2654	// Look for a swifterror argument.
2655	for (auto &Arg : F.args()) {
2656	if (!Arg.hasSwiftErrorAttr()) continue;
2657
2658	eliminateSwiftErrorArgument(F, Arg, Shape, AllocasToPromote);
2659	break;
2660	}
2661
2662	// Look for swifterror allocas.
2663	for (auto &Inst : F.getEntryBlock()) {
2664	auto Alloca = dyn_cast<AllocaInst>(Val: &Inst);
2665	if (!Alloca \|\| !Alloca->isSwiftError()) continue;
2666
2667	// Clear the swifterror flag.
2668	Alloca->setSwiftError(false);
2669
2670	AllocasToPromote.push_back(Elt: Alloca);
2671	eliminateSwiftErrorAlloca(F, Alloca, Shape);
2672	}
2673
2674	// If we have any allocas to promote, compute a dominator tree and
2675	// promote them en masse.
2676	if (!AllocasToPromote.empty()) {
2677	DominatorTree DT(F);
2678	PromoteMemToReg(Allocas: AllocasToPromote, DT);
2679	}
2680	}
2681
2682	/// retcon and retcon.once conventions assume that all spill uses can be sunk
2683	/// after the coro.begin intrinsic.
2684	static void sinkSpillUsesAfterCoroBegin(Function &F,
2685	const FrameDataInfo &FrameData,
2686	CoroBeginInst *CoroBegin) {
2687	DominatorTree Dom(F);
2688
2689	SmallSetVector<Instruction *, `32`> ToMove;
2690	SmallVector<Instruction *, `32`> Worklist;
2691
2692	// Collect all users that precede coro.begin.
2693	for (auto *Def : FrameData.getAllDefs()) {
2694	for (User *U : Def->users()) {
2695	auto Inst = cast<Instruction>(Val: U);
2696	if (Inst->getParent() != CoroBegin->getParent() \|\|
2697	Dom.dominates(Def: CoroBegin, User: Inst))
2698	continue;
2699	if (ToMove.insert(X: Inst))
2700	Worklist.push_back(Elt: Inst);
2701	}
2702	}
2703	// Recursively collect users before coro.begin.
2704	while (!Worklist.empty()) {
2705	auto *Def = Worklist.pop_back_val();
2706	for (User *U : Def->users()) {
2707	auto Inst = cast<Instruction>(Val: U);
2708	if (Dom.dominates(Def: CoroBegin, User: Inst))
2709	continue;
2710	if (ToMove.insert(X: Inst))
2711	Worklist.push_back(Elt: Inst);
2712	}
2713	}
2714
2715	// Sort by dominance.
2716	SmallVector<Instruction *, `64`> InsertionList(ToMove.begin(), ToMove.end());
2717	llvm::sort(C&: InsertionList, Comp: [&Dom](Instruction A, Instruction B) -> bool {
2718	// If a dominates b it should preceed (<) b.
2719	return Dom.dominates(Def: A, User: B);
2720	});
2721
2722	Instruction *InsertPt = CoroBegin->getNextNode();
2723	for (Instruction *Inst : InsertionList)
2724	Inst->moveBefore(MovePos: InsertPt);
2725	}
2726
2727	/// For each local variable that all of its user are only used inside one of
2728	/// suspended region, we sink their lifetime.start markers to the place where
2729	/// after the suspend block. Doing so minimizes the lifetime of each variable,
2730	/// hence minimizing the amount of data we end up putting on the frame.
2731	static void sinkLifetimeStartMarkers(Function &F, coro::Shape &Shape,
2732	SuspendCrossingInfo &Checker) {
2733	if (F.hasOptNone())
2734	return;
2735
2736	DominatorTree DT(F);
2737
2738	// Collect all possible basic blocks which may dominate all uses of allocas.
2739	SmallPtrSet<BasicBlock *, `4`> DomSet;
2740	DomSet.insert(Ptr: &F.getEntryBlock());
2741	for (auto *CSI : Shape.CoroSuspends) {
2742	BasicBlock *SuspendBlock = CSI->getParent();
2743	assert(isSuspendBlock(SuspendBlock) && SuspendBlock->getSingleSuccessor() &&
2744	"should have split coro.suspend into its own block");
2745	DomSet.insert(Ptr: SuspendBlock->getSingleSuccessor());
2746	}
2747
2748	for (Instruction &I : instructions(F)) {
2749	AllocaInst* AI = dyn_cast<AllocaInst>(Val: &I);
2750	if (!AI)
2751	continue;
2752
2753	for (BasicBlock *DomBB : DomSet) {
2754	bool Valid = true;
2755	SmallVector<Instruction *, `1`> Lifetimes;
2756
2757	auto isLifetimeStart = [](Instruction* I) {
2758	if (auto* II = dyn_cast<IntrinsicInst>(I))
2759	return II->getIntrinsicID() == Intrinsic::lifetime_start;
2760	return false;
2761	};
2762
2763	auto collectLifetimeStart = [&](Instruction U, AllocaInst AI) {
2764	if (isLifetimeStart(U)) {
2765	Lifetimes.push_back(Elt: U);
2766	return true;
2767	}
2768	if (!U->hasOneUse() \|\| U->stripPointerCasts() != AI)
2769	return false;
2770	if (isLifetimeStart(U->user_back())) {
2771	Lifetimes.push_back(Elt: U->user_back());
2772	return true;
2773	}
2774	return false;
2775	};
2776
2777	for (User *U : AI->users()) {
2778	Instruction *UI = cast<Instruction>(Val: U);
2779	// For all users except lifetime.start markers, if they are all
2780	// dominated by one of the basic blocks and do not cross
2781	// suspend points as well, then there is no need to spill the
2782	// instruction.
2783	if (!DT.dominates(A: DomBB, B: UI->getParent()) \|\|
2784	Checker.isDefinitionAcrossSuspend(DefBB: DomBB, U: UI)) {
2785	// Skip lifetime.start, GEP and bitcast used by lifetime.start
2786	// markers.
2787	if (collectLifetimeStart (UI, AI))
2788	continue;
2789	Valid = false;
2790	break;
2791	}
2792	}
2793	// Sink lifetime.start markers to dominate block when they are
2794	// only used outside the region.
2795	if (Valid && Lifetimes.size() != `0`) {
2796	auto *NewLifetime = Lifetimes [`0`]->clone();
2797	NewLifetime->replaceUsesOfWith(From: NewLifetime->getOperand(i: `1`), To: AI);
2798	NewLifetime->insertBefore(InsertPos: DomBB->getTerminator());
2799
2800	// All the outsided lifetime.start markers are no longer necessary.
2801	for (Instruction *S : Lifetimes)
2802	S->eraseFromParent();
2803
2804	break;
2805	}
2806	}
2807	}
2808	}
2809
2810	static void collectFrameAlloca(AllocaInst *AI, coro::Shape &Shape,
2811	const SuspendCrossingInfo &Checker,
2812	SmallVectorImpl<AllocaInfo> &Allocas,
2813	const DominatorTree &DT) {
2814	if (Shape.CoroSuspends.empty())
2815	return;
2816
2817	// The PromiseAlloca will be specially handled since it needs to be in a
2818	// fixed position in the frame.
2819	if (AI == Shape.SwitchLowering.PromiseAlloca)
2820	return;
2821
2822	// The __coro_gro alloca should outlive the promise, make sure we
2823	// keep it outside the frame.
2824	if (AI->hasMetadata(KindID: LLVMContext::MD_coro_outside_frame))
2825	return;
2826
2827	// The code that uses lifetime.start intrinsic does not work for functions
2828	// with loops without exit. Disable it on ABIs we know to generate such
2829	// code.
2830	bool ShouldUseLifetimeStartInfo =
2831	(Shape.ABI != coro::ABI::Async && Shape.ABI != coro::ABI::Retcon &&
2832	Shape.ABI != coro::ABI::RetconOnce);
2833	AllocaUseVisitor Visitor{AI->getModule()->getDataLayout(), DT,
2834	*Shape.CoroBegin, Checker,
2835	ShouldUseLifetimeStartInfo};
2836	Visitor.visitPtr(I&: *AI);
2837	if (!Visitor.getShouldLiveOnFrame())
2838	return;
2839	Allocas.emplace_back(Args&: AI, Args: Visitor.getAliasesCopy(),
2840	Args: Visitor.getMayWriteBeforeCoroBegin());
2841	}
2842
2843	static std::optional<std::pair<Value &, DIExpression &>>
2844	salvageDebugInfoImpl(SmallDenseMap<Argument , AllocaInst , `4`> &ArgToAllocaMap,
2845	bool OptimizeFrame, bool UseEntryValue, Function *F,
2846	Value Storage, DIExpression Expr,
2847	bool SkipOutermostLoad) {
2848	IRBuilder<> Builder(F->getContext());
2849	auto InsertPt = F->getEntryBlock().getFirstInsertionPt();
2850	while (isa<IntrinsicInst>(Val: InsertPt))
2851	++InsertPt;
2852	Builder.SetInsertPoint(TheBB: &F->getEntryBlock(), IP: InsertPt);
2853
2854	while (auto *Inst = dyn_cast_or_null<Instruction>(Val: Storage)) {
2855	if (auto *LdInst = dyn_cast<LoadInst>(Val: Inst)) {
2856	Storage = LdInst->getPointerOperand();
2857	// FIXME: This is a heuristic that works around the fact that
2858	// LLVM IR debug intrinsics cannot yet distinguish between
2859	// memory and value locations: Because a dbg.declare(alloca) is
2860	// implicitly a memory location no DW_OP_deref operation for the
2861	// last direct load from an alloca is necessary. This condition
2862	// effectively drops the last* DW_OP_deref in the expression.*
2863	if (!SkipOutermostLoad)
2864	Expr = DIExpression::prepend(Expr, Flags: DIExpression::DerefBefore);
2865	} else if (auto *StInst = dyn_cast<StoreInst>(Val: Inst)) {
2866	Storage = StInst->getValueOperand();
2867	} else {
2868	SmallVector<uint64_t, `16`> Ops;
2869	SmallVector<Value *, `0`> AdditionalValues;
2870	Value *Op = llvm::salvageDebugInfoImpl(
2871	I&: *Inst, CurrentLocOps: Expr ? Expr->getNumLocationOperands() : `0`, Ops,
2872	AdditionalValues);
2873	if (!Op \|\| !AdditionalValues.empty()) {
2874	// If salvaging failed or salvaging produced more than one location
2875	// operand, give up.
2876	break;
2877	}
2878	Storage = Op;
2879	Expr = DIExpression::appendOpsToArg(Expr, Ops, ArgNo: `0`, /StackValue/ false);
2880	}
2881	SkipOutermostLoad = false;
2882	}
2883	if (!Storage)
2884	return std::nullopt;
2885
2886	auto *StorageAsArg = dyn_cast<Argument>(Val: Storage);
2887	const bool IsSwiftAsyncArg =
2888	StorageAsArg && StorageAsArg->hasAttribute(Attribute::Kind: SwiftAsync);
2889
2890	// Swift async arguments are described by an entry value of the ABI-defined
2891	// register containing the coroutine context.
2892	// Entry values in variadic expressions are not supported.
2893	if (IsSwiftAsyncArg && UseEntryValue && !Expr->isEntryValue() &&
2894	Expr->isSingleLocationExpression())
2895	Expr = DIExpression::prepend(Expr, Flags: DIExpression::EntryValue);
2896
2897	// If the coroutine frame is an Argument, store it in an alloca to improve
2898	// its availability (e.g. registers may be clobbered).
2899	// Avoid this if optimizations are enabled (they would remove the alloca) or
2900	// if the value is guaranteed to be available through other means (e.g. swift
2901	// ABI guarantees).
2902	if (StorageAsArg && !OptimizeFrame && !IsSwiftAsyncArg) {
2903	auto &Cached = ArgToAllocaMap [StorageAsArg];
2904	if (!Cached) {
2905	Cached = Builder.CreateAlloca(Ty: Storage->getType(), AddrSpace: `0`, ArraySize: nullptr,
2906	Name: Storage->getName() + ".debug");
2907	Builder.CreateStore(Val: Storage, Ptr: Cached);
2908	}
2909	Storage = Cached;
2910	// FIXME: LLVM lacks nuanced semantics to differentiate between
2911	// memory and direct locations at the IR level. The backend will
2912	// turn a dbg.declare(alloca, ..., DIExpression()) into a memory
2913	// location. Thus, if there are deref and offset operations in the
2914	// expression, we need to add a DW_OP_deref at the start* of the*
2915	// expression to first load the contents of the alloca before
2916	// adjusting it with the expression.
2917	Expr = DIExpression::prepend(Expr, Flags: DIExpression::DerefBefore);
2918	}
2919
2920	return {{Storage, Expr}};
2921	}
2922
2923	void coro::salvageDebugInfo(
2924	SmallDenseMap<Argument , AllocaInst , `4`> &ArgToAllocaMap,
2925	DbgVariableIntrinsic &DVI, bool OptimizeFrame, bool UseEntryValue) {
2926
2927	Function *F = DVI.getFunction();
2928	// Follow the pointer arithmetic all the way to the incoming
2929	// function argument and convert into a DIExpression.
2930	bool SkipOutermostLoad = !isa<DbgValueInst>(Val: DVI);
2931	Value *OriginalStorage = DVI.getVariableLocationOp(OpIdx: `0`);
2932
2933	auto SalvagedInfo = ::salvageDebugInfoImpl(
2934	ArgToAllocaMap, OptimizeFrame, UseEntryValue, F, Storage: OriginalStorage,
2935	Expr: DVI.getExpression(), SkipOutermostLoad);
2936	if (!SalvagedInfo)
2937	return;
2938
2939	Value *Storage = &SalvagedInfo ->first;
2940	DIExpression *Expr = &SalvagedInfo ->second;
2941
2942	DVI.replaceVariableLocationOp(OldValue: OriginalStorage, NewValue: Storage);
2943	DVI.setExpression(Expr);
2944	// We only hoist dbg.declare today since it doesn't make sense to hoist
2945	// dbg.value since it does not have the same function wide guarantees that
2946	// dbg.declare does.
2947	if (isa<DbgDeclareInst>(Val: DVI)) {
2948	std::optional<BasicBlock::iterator> InsertPt;
2949	if (auto *I = dyn_cast<Instruction>(Val: Storage)) {
2950	InsertPt = I->getInsertionPointAfterDef();
2951	// Update DILocation only in O0 since it is easy to get out of sync in
2952	// optimizations. See https://github.com/llvm/llvm-project/pull/75104 for
2953	// an example.
2954	if (!OptimizeFrame && I->getDebugLoc())
2955	DVI.setDebugLoc(I->getDebugLoc());
2956	} else if (isa<Argument>(Val: Storage))
2957	InsertPt = F->getEntryBlock().begin();
2958	if (InsertPt)
2959	DVI.moveBefore(BB&: (InsertPt)->getParent(), I: *InsertPt);
2960	}
2961	}
2962
2963	void coro::salvageDebugInfo(
2964	SmallDenseMap<Argument , AllocaInst , `4`> &ArgToAllocaMap,
2965	DbgVariableRecord &DVR, bool OptimizeFrame, bool UseEntryValue) {
2966
2967	Function *F = DVR.getFunction();
2968	// Follow the pointer arithmetic all the way to the incoming
2969	// function argument and convert into a DIExpression.
2970	bool SkipOutermostLoad = DVR.isDbgDeclare();
2971	Value *OriginalStorage = DVR.getVariableLocationOp(OpIdx: `0`);
2972
2973	auto SalvagedInfo = ::salvageDebugInfoImpl(
2974	ArgToAllocaMap, OptimizeFrame, UseEntryValue, F, Storage: OriginalStorage,
2975	Expr: DVR.getExpression(), SkipOutermostLoad);
2976	if (!SalvagedInfo)
2977	return;
2978
2979	Value *Storage = &SalvagedInfo ->first;
2980	DIExpression *Expr = &SalvagedInfo ->second;
2981
2982	DVR.replaceVariableLocationOp(OldValue: OriginalStorage, NewValue: Storage);
2983	DVR.setExpression(Expr);
2984	// We only hoist dbg.declare today since it doesn't make sense to hoist
2985	// dbg.value since it does not have the same function wide guarantees that
2986	// dbg.declare does.
2987	if (DVR.getType() == DbgVariableRecord::LocationType::Declare) {
2988	std::optional<BasicBlock::iterator> InsertPt;
2989	if (auto *I = dyn_cast<Instruction>(Val: Storage)) {
2990	InsertPt = I->getInsertionPointAfterDef();
2991	// Update DILocation only in O0 since it is easy to get out of sync in
2992	// optimizations. See https://github.com/llvm/llvm-project/pull/75104 for
2993	// an example.
2994	if (!OptimizeFrame && I->getDebugLoc())
2995	DVR.setDebugLoc(I->getDebugLoc());
2996	} else if (isa<Argument>(Val: Storage))
2997	InsertPt = F->getEntryBlock().begin();
2998	if (InsertPt) {
2999	DVR.removeFromParent();
3000	(InsertPt)->getParent()->insertDbgRecordBefore(DR: &DVR, Here: InsertPt);
3001	}
3002	}
3003	}
3004
3005	static void doRematerializations(
3006	Function &F, SuspendCrossingInfo &Checker,
3007	const std::function<bool(Instruction &)> &MaterializableCallback) {
3008	if (F.hasOptNone())
3009	return;
3010
3011	SpillInfo Spills;
3012
3013	// See if there are materializable instructions across suspend points
3014	// We record these as the starting point to also identify materializable
3015	// defs of uses in these operations
3016	for (Instruction &I : instructions(F)) {
3017	if (!MaterializableCallback (I))
3018	continue;
3019	for (User *U : I.users())
3020	if (Checker.isDefinitionAcrossSuspend(I, U))
3021	Spills [&I].push_back(Elt: cast<Instruction>(Val: U));
3022	}
3023
3024	// Process each of the identified rematerializable instructions
3025	// and add predecessor instructions that can also be rematerialized.
3026	// This is actually a graph of instructions since we could potentially
3027	// have multiple uses of a def in the set of predecessor instructions.
3028	// The approach here is to maintain a graph of instructions for each bottom
3029	// level instruction - where we have a unique set of instructions (nodes)
3030	// and edges between them. We then walk the graph in reverse post-dominator
3031	// order to insert them past the suspend point, but ensure that ordering is
3032	// correct. We also rely on CSE removing duplicate defs for remats of
3033	// different instructions with a def in common (rather than maintaining more
3034	// complex graphs for each suspend point)
3035
3036	// We can do this by adding new nodes to the list for each suspend
3037	// point. Then using standard GraphTraits to give a reverse post-order
3038	// traversal when we insert the nodes after the suspend
3039	SmallMapVector<Instruction *, std::unique_ptr<RematGraph>, `8`> AllRemats;
3040	for (auto &E : Spills) {
3041	for (Instruction *U : E.second) {
3042	// Don't process a user twice (this can happen if the instruction uses
3043	// more than one rematerializable def)
3044	if (AllRemats.count(Key: U))
3045	continue;
3046
3047	// Constructor creates the whole RematGraph for the given Use
3048	auto RematUPtr =
3049	std::make_unique<RematGraph>(args: MaterializableCallback, args&: U, args&: Checker);
3050
3051	LLVM_DEBUG(dbgs() << "*** Next remat group ***\n";
3052	ReversePostOrderTraversal<RematGraph *> RPOT(RematUPtr.get());
3053	for (auto I = RPOT.begin(); I != RPOT.end();
3054	++I) { (*I)->Node->dump(); } dbgs()
3055	<< "\n";);
3056
3057	AllRemats [U] = std::move(RematUPtr);
3058	}
3059	}
3060
3061	// Rewrite materializable instructions to be materialized at the use
3062	// point.
3063	LLVM_DEBUG(dumpRemats("Materializations", AllRemats));
3064	rewriteMaterializableInstructions(AllRemats);
3065	}
3066
3067	void coro::buildCoroutineFrame(
3068	Function &F, Shape &Shape, TargetTransformInfo &TTI,
3069	const std::function<bool(Instruction &)> &MaterializableCallback) {
3070	// Don't eliminate swifterror in async functions that won't be split.
3071	if (Shape.ABI != coro::ABI::Async \|\| !Shape.CoroSuspends.empty())
3072	eliminateSwiftError(F, Shape);
3073
3074	if (Shape.ABI == coro::ABI::Switch &&
3075	Shape.SwitchLowering.PromiseAlloca) {
3076	Shape.getSwitchCoroId()->clearPromise();
3077	}
3078
3079	// Make sure that all coro.save, coro.suspend and the fallthrough coro.end
3080	// intrinsics are in their own blocks to simplify the logic of building up
3081	// SuspendCrossing data.
3082	for (auto *CSI : Shape.CoroSuspends) {
3083	if (auto *Save = CSI->getCoroSave())
3084	splitAround(I: Save, Name: "CoroSave");
3085	splitAround(I: CSI, Name: "CoroSuspend");
3086	}
3087
3088	// Put CoroEnds into their own blocks.
3089	for (AnyCoroEndInst *CE : Shape.CoroEnds) {
3090	splitAround(I: CE, Name: "CoroEnd");
3091
3092	// Emit the musttail call function in a new block before the CoroEnd.
3093	// We do this here so that the right suspend crossing info is computed for
3094	// the uses of the musttail call function call. (Arguments to the coro.end
3095	// instructions would be ignored)
3096	if (auto *AsyncEnd = dyn_cast<CoroAsyncEndInst>(Val: CE)) {
3097	auto *MustTailCallFn = AsyncEnd->getMustTailCallFunction();
3098	if (!MustTailCallFn)
3099	continue;
3100	IRBuilder<> Builder(AsyncEnd);
3101	SmallVector<Value *, `8`> Args(AsyncEnd->args());
3102	auto Arguments = ArrayRef<Value *>(Args).drop_front(N: `3`);
3103	auto *Call = createMustTailCall(Loc: AsyncEnd->getDebugLoc(), MustTailCallFn,
3104	TTI, Arguments, Builder);
3105	splitAround(I: Call, Name: "MustTailCall.Before.CoroEnd");
3106	}
3107	}
3108
3109	// Later code makes structural assumptions about single predecessors phis e.g
3110	// that they are not live across a suspend point.
3111	cleanupSinglePredPHIs(F);
3112
3113	// Transforms multi-edge PHI Nodes, so that any value feeding into a PHI will
3114	// never has its definition separated from the PHI by the suspend point.
3115	rewritePHIs(F);
3116
3117	// Build suspend crossing info.
3118	SuspendCrossingInfo Checker(F, Shape);
3119
3120	doRematerializations(F, Checker, MaterializableCallback);
3121
3122	FrameDataInfo FrameData;
3123	SmallVector<CoroAllocaAllocInst*, `4`> LocalAllocas;
3124	SmallVector<Instruction*, `4`> DeadInstructions;
3125	if (Shape.ABI != coro::ABI::Async && Shape.ABI != coro::ABI::Retcon &&
3126	Shape.ABI != coro::ABI::RetconOnce)
3127	sinkLifetimeStartMarkers(F, Shape, Checker);
3128
3129	// Collect the spills for arguments and other not-materializable values.
3130	for (Argument &A : F.args())
3131	for (User *U : A.users())
3132	if (Checker.isDefinitionAcrossSuspend(A, U))
3133	FrameData.Spills [&A].push_back(Elt: cast<Instruction>(Val: U));
3134
3135	const DominatorTree DT(F);
3136	for (Instruction &I : instructions(F)) {
3137	// Values returned from coroutine structure intrinsics should not be part
3138	// of the Coroutine Frame.
3139	if (isCoroutineStructureIntrinsic(I) \|\| &I == Shape.CoroBegin)
3140	continue;
3141
3142	// Handle alloca.alloc specially here.
3143	if (auto AI = dyn_cast<CoroAllocaAllocInst>(Val: &I)) {
3144	// Check whether the alloca's lifetime is bounded by suspend points.
3145	if (isLocalAlloca(AI)) {
3146	LocalAllocas.push_back(Elt: AI);
3147	continue;
3148	}
3149
3150	// If not, do a quick rewrite of the alloca and then add spills of
3151	// the rewritten value. The rewrite doesn't invalidate anything in
3152	// Spills because the other alloca intrinsics have no other operands
3153	// besides AI, and it doesn't invalidate the iteration because we delay
3154	// erasing AI.
3155	auto Alloc = lowerNonLocalAlloca(AI, Shape, DeadInsts&: DeadInstructions);
3156
3157	for (User *U : Alloc->users()) {
3158	if (Checker.isDefinitionAcrossSuspend(I&: *Alloc, U))
3159	FrameData.Spills [Alloc].push_back(Elt: cast<Instruction>(Val: U));
3160	}
3161	continue;
3162	}
3163
3164	// Ignore alloca.get; we process this as part of coro.alloca.alloc.
3165	if (isa<CoroAllocaGetInst>(Val: I))
3166	continue;
3167
3168	if (auto *AI = dyn_cast<AllocaInst>(Val: &I)) {
3169	collectFrameAlloca(AI, Shape, Checker, Allocas&: FrameData.Allocas, DT);
3170	continue;
3171	}
3172
3173	for (User *U : I.users())
3174	if (Checker.isDefinitionAcrossSuspend(I, U)) {
3175	// We cannot spill a token.
3176	if (I.getType()->isTokenTy())
3177	report_fatal_error(
3178	reason: "token definition is separated from the use by a suspend point");
3179	FrameData.Spills [&I].push_back(Elt: cast<Instruction>(Val: U));
3180	}
3181	}
3182
3183	LLVM_DEBUG(dumpAllocas(FrameData.Allocas));
3184
3185	// We don't want the layout of coroutine frame to be affected
3186	// by debug information. So we only choose to salvage DbgValueInst for
3187	// whose value is already in the frame.
3188	// We would handle the dbg.values for allocas specially
3189	for (auto &Iter : FrameData.Spills) {
3190	auto *V = Iter.first;
3191	SmallVector<DbgValueInst *, `16`> DVIs;
3192	SmallVector<DbgVariableRecord *, `16`> DVRs;
3193	findDbgValues(DbgValues&: DVIs, V, DbgVariableRecords: &DVRs);
3194	for (DbgValueInst *DVI : DVIs)
3195	if (Checker.isDefinitionAcrossSuspend(V&: *V, U: DVI))
3196	FrameData.Spills [V].push_back(Elt: DVI);
3197	// Add the instructions which carry debug info that is in the frame.
3198	for (DbgVariableRecord *DVR : DVRs)
3199	if (Checker.isDefinitionAcrossSuspend(V&: *V, U: DVR->Marker->MarkedInstr))
3200	FrameData.Spills [V].push_back(Elt: DVR->Marker->MarkedInstr);
3201	}
3202
3203	LLVM_DEBUG(dumpSpills("Spills", FrameData.Spills));
3204	if (Shape.ABI == coro::ABI::Retcon \|\| Shape.ABI == coro::ABI::RetconOnce \|\|
3205	Shape.ABI == coro::ABI::Async)
3206	sinkSpillUsesAfterCoroBegin(F, FrameData, CoroBegin: Shape.CoroBegin);
3207	Shape.FrameTy = buildFrameType(F, Shape, FrameData);
3208	Shape.FramePtr = Shape.CoroBegin;
3209	// For now, this works for C++ programs only.
3210	buildFrameDebugInfo(F, Shape, FrameData);
3211	insertSpills(FrameData, Shape);
3212	lowerLocalAllocas(LocalAllocas, DeadInsts&: DeadInstructions);
3213
3214	for (auto *I : DeadInstructions)
3215	I->eraseFromParent();
3216	}
3217

source code of llvm/lib/Transforms/Coroutines/CoroFrame.cpp