1 | //===- R600ControlFlowFinalizer.cpp - Finalize Control Flow Inst ----------===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | // |
9 | /// \file |
10 | /// This pass compute turns all control flow pseudo instructions into native one |
11 | /// computing their address on the fly; it also sets STACK_SIZE info. |
12 | // |
13 | //===----------------------------------------------------------------------===// |
14 | |
15 | #include "MCTargetDesc/R600MCTargetDesc.h" |
16 | #include "R600.h" |
17 | #include "R600MachineFunctionInfo.h" |
18 | #include "R600Subtarget.h" |
19 | #include "llvm/CodeGen/MachineFunctionPass.h" |
20 | #include <set> |
21 | |
22 | using namespace llvm; |
23 | |
24 | #define DEBUG_TYPE "r600cf" |
25 | |
26 | namespace { |
27 | |
28 | struct CFStack { |
29 | enum StackItem { |
30 | ENTRY = 0, |
31 | SUB_ENTRY = 1, |
32 | FIRST_NON_WQM_PUSH = 2, |
33 | FIRST_NON_WQM_PUSH_W_FULL_ENTRY = 3 |
34 | }; |
35 | |
36 | const R600Subtarget *ST; |
37 | std::vector<StackItem> BranchStack; |
38 | std::vector<StackItem> LoopStack; |
39 | unsigned MaxStackSize; |
40 | unsigned CurrentEntries = 0; |
41 | unsigned CurrentSubEntries = 0; |
42 | |
43 | CFStack(const R600Subtarget *st, CallingConv::ID cc) : ST(st), |
44 | // We need to reserve a stack entry for CALL_FS in vertex shaders. |
45 | MaxStackSize(cc == CallingConv::AMDGPU_VS ? 1 : 0) {} |
46 | |
47 | unsigned getLoopDepth(); |
48 | bool branchStackContains(CFStack::StackItem); |
49 | bool requiresWorkAroundForInst(unsigned Opcode); |
50 | unsigned getSubEntrySize(CFStack::StackItem Item); |
51 | void updateMaxStackSize(); |
52 | void pushBranch(unsigned Opcode, bool isWQM = false); |
53 | void pushLoop(); |
54 | void popBranch(); |
55 | void popLoop(); |
56 | }; |
57 | |
58 | unsigned CFStack::getLoopDepth() { |
59 | return LoopStack.size(); |
60 | } |
61 | |
62 | bool CFStack::branchStackContains(CFStack::StackItem Item) { |
63 | return llvm::is_contained(Range&: BranchStack, Element: Item); |
64 | } |
65 | |
66 | bool CFStack::requiresWorkAroundForInst(unsigned Opcode) { |
67 | if (Opcode == R600::CF_ALU_PUSH_BEFORE && ST->hasCaymanISA() && |
68 | getLoopDepth() > 1) |
69 | return true; |
70 | |
71 | if (!ST->hasCFAluBug()) |
72 | return false; |
73 | |
74 | switch(Opcode) { |
75 | default: return false; |
76 | case R600::CF_ALU_PUSH_BEFORE: |
77 | case R600::CF_ALU_ELSE_AFTER: |
78 | case R600::CF_ALU_BREAK: |
79 | case R600::CF_ALU_CONTINUE: |
80 | if (CurrentSubEntries == 0) |
81 | return false; |
82 | if (ST->getWavefrontSize() == 64) { |
83 | // We are being conservative here. We only require this work-around if |
84 | // CurrentSubEntries > 3 && |
85 | // (CurrentSubEntries % 4 == 3 || CurrentSubEntries % 4 == 0) |
86 | // |
87 | // We have to be conservative, because we don't know for certain that |
88 | // our stack allocation algorithm for Evergreen/NI is correct. Applying this |
89 | // work-around when CurrentSubEntries > 3 allows us to over-allocate stack |
90 | // resources without any problems. |
91 | return CurrentSubEntries > 3; |
92 | } else { |
93 | assert(ST->getWavefrontSize() == 32); |
94 | // We are being conservative here. We only require the work-around if |
95 | // CurrentSubEntries > 7 && |
96 | // (CurrentSubEntries % 8 == 7 || CurrentSubEntries % 8 == 0) |
97 | // See the comment on the wavefront size == 64 case for why we are |
98 | // being conservative. |
99 | return CurrentSubEntries > 7; |
100 | } |
101 | } |
102 | } |
103 | |
104 | unsigned CFStack::getSubEntrySize(CFStack::StackItem Item) { |
105 | switch(Item) { |
106 | default: |
107 | return 0; |
108 | case CFStack::FIRST_NON_WQM_PUSH: |
109 | assert(!ST->hasCaymanISA()); |
110 | if (ST->getGeneration() <= AMDGPUSubtarget::R700) { |
111 | // +1 For the push operation. |
112 | // +2 Extra space required. |
113 | return 3; |
114 | } else { |
115 | // Some documentation says that this is not necessary on Evergreen, |
116 | // but experimentation has show that we need to allocate 1 extra |
117 | // sub-entry for the first non-WQM push. |
118 | // +1 For the push operation. |
119 | // +1 Extra space required. |
120 | return 2; |
121 | } |
122 | case CFStack::FIRST_NON_WQM_PUSH_W_FULL_ENTRY: |
123 | assert(ST->getGeneration() >= AMDGPUSubtarget::EVERGREEN); |
124 | // +1 For the push operation. |
125 | // +1 Extra space required. |
126 | return 2; |
127 | case CFStack::SUB_ENTRY: |
128 | return 1; |
129 | } |
130 | } |
131 | |
132 | void CFStack::updateMaxStackSize() { |
133 | unsigned CurrentStackSize = CurrentEntries + divideCeil(Numerator: CurrentSubEntries, Denominator: 4); |
134 | MaxStackSize = std::max(a: CurrentStackSize, b: MaxStackSize); |
135 | } |
136 | |
137 | void CFStack::pushBranch(unsigned Opcode, bool isWQM) { |
138 | CFStack::StackItem Item = CFStack::ENTRY; |
139 | switch(Opcode) { |
140 | case R600::CF_PUSH_EG: |
141 | case R600::CF_ALU_PUSH_BEFORE: |
142 | if (!isWQM) { |
143 | if (!ST->hasCaymanISA() && |
144 | !branchStackContains(Item: CFStack::FIRST_NON_WQM_PUSH)) |
145 | Item = CFStack::FIRST_NON_WQM_PUSH; // May not be required on Evergreen/NI |
146 | // See comment in |
147 | // CFStack::getSubEntrySize() |
148 | else if (CurrentEntries > 0 && |
149 | ST->getGeneration() > AMDGPUSubtarget::EVERGREEN && |
150 | !ST->hasCaymanISA() && |
151 | !branchStackContains(Item: CFStack::FIRST_NON_WQM_PUSH_W_FULL_ENTRY)) |
152 | Item = CFStack::FIRST_NON_WQM_PUSH_W_FULL_ENTRY; |
153 | else |
154 | Item = CFStack::SUB_ENTRY; |
155 | } else |
156 | Item = CFStack::ENTRY; |
157 | break; |
158 | } |
159 | BranchStack.push_back(x: Item); |
160 | if (Item == CFStack::ENTRY) |
161 | CurrentEntries++; |
162 | else |
163 | CurrentSubEntries += getSubEntrySize(Item); |
164 | updateMaxStackSize(); |
165 | } |
166 | |
167 | void CFStack::pushLoop() { |
168 | LoopStack.push_back(x: CFStack::ENTRY); |
169 | CurrentEntries++; |
170 | updateMaxStackSize(); |
171 | } |
172 | |
173 | void CFStack::popBranch() { |
174 | CFStack::StackItem Top = BranchStack.back(); |
175 | if (Top == CFStack::ENTRY) |
176 | CurrentEntries--; |
177 | else |
178 | CurrentSubEntries-= getSubEntrySize(Item: Top); |
179 | BranchStack.pop_back(); |
180 | } |
181 | |
182 | void CFStack::popLoop() { |
183 | CurrentEntries--; |
184 | LoopStack.pop_back(); |
185 | } |
186 | |
187 | class R600ControlFlowFinalizer : public MachineFunctionPass { |
188 | private: |
189 | using ClauseFile = std::pair<MachineInstr *, std::vector<MachineInstr *>>; |
190 | |
191 | enum ControlFlowInstruction { |
192 | CF_TC, |
193 | CF_VC, |
194 | CF_CALL_FS, |
195 | CF_WHILE_LOOP, |
196 | CF_END_LOOP, |
197 | CF_LOOP_BREAK, |
198 | CF_LOOP_CONTINUE, |
199 | CF_JUMP, |
200 | CF_ELSE, |
201 | CF_POP, |
202 | CF_END |
203 | }; |
204 | |
205 | const R600InstrInfo *TII = nullptr; |
206 | const R600RegisterInfo *TRI = nullptr; |
207 | unsigned MaxFetchInst; |
208 | const R600Subtarget *ST = nullptr; |
209 | |
210 | bool IsTrivialInst(MachineInstr &MI) const { |
211 | switch (MI.getOpcode()) { |
212 | case R600::KILL: |
213 | case R600::RETURN: |
214 | return true; |
215 | default: |
216 | return false; |
217 | } |
218 | } |
219 | |
220 | const MCInstrDesc &getHWInstrDesc(ControlFlowInstruction CFI) const { |
221 | unsigned Opcode = 0; |
222 | bool isEg = (ST->getGeneration() >= AMDGPUSubtarget::EVERGREEN); |
223 | switch (CFI) { |
224 | case CF_TC: |
225 | Opcode = isEg ? R600::CF_TC_EG : R600::CF_TC_R600; |
226 | break; |
227 | case CF_VC: |
228 | Opcode = isEg ? R600::CF_VC_EG : R600::CF_VC_R600; |
229 | break; |
230 | case CF_CALL_FS: |
231 | Opcode = isEg ? R600::CF_CALL_FS_EG : R600::CF_CALL_FS_R600; |
232 | break; |
233 | case CF_WHILE_LOOP: |
234 | Opcode = isEg ? R600::WHILE_LOOP_EG : R600::WHILE_LOOP_R600; |
235 | break; |
236 | case CF_END_LOOP: |
237 | Opcode = isEg ? R600::END_LOOP_EG : R600::END_LOOP_R600; |
238 | break; |
239 | case CF_LOOP_BREAK: |
240 | Opcode = isEg ? R600::LOOP_BREAK_EG : R600::LOOP_BREAK_R600; |
241 | break; |
242 | case CF_LOOP_CONTINUE: |
243 | Opcode = isEg ? R600::CF_CONTINUE_EG : R600::CF_CONTINUE_R600; |
244 | break; |
245 | case CF_JUMP: |
246 | Opcode = isEg ? R600::CF_JUMP_EG : R600::CF_JUMP_R600; |
247 | break; |
248 | case CF_ELSE: |
249 | Opcode = isEg ? R600::CF_ELSE_EG : R600::CF_ELSE_R600; |
250 | break; |
251 | case CF_POP: |
252 | Opcode = isEg ? R600::POP_EG : R600::POP_R600; |
253 | break; |
254 | case CF_END: |
255 | if (ST->hasCaymanISA()) { |
256 | Opcode = R600::CF_END_CM; |
257 | break; |
258 | } |
259 | Opcode = isEg ? R600::CF_END_EG : R600::CF_END_R600; |
260 | break; |
261 | } |
262 | assert (Opcode && "No opcode selected" ); |
263 | return TII->get(Opcode); |
264 | } |
265 | |
266 | bool isCompatibleWithClause(const MachineInstr &MI, |
267 | std::set<unsigned> &DstRegs) const { |
268 | unsigned DstMI, SrcMI; |
269 | for (MachineInstr::const_mop_iterator I = MI.operands_begin(), |
270 | E = MI.operands_end(); |
271 | I != E; ++I) { |
272 | const MachineOperand &MO = *I; |
273 | if (!MO.isReg()) |
274 | continue; |
275 | if (MO.isDef()) { |
276 | Register Reg = MO.getReg(); |
277 | if (R600::R600_Reg128RegClass.contains(Reg)) |
278 | DstMI = Reg; |
279 | else |
280 | DstMI = TRI->getMatchingSuperReg(Reg, |
281 | R600RegisterInfo::getSubRegFromChannel(Channel: TRI->getHWRegChan(reg: Reg)), |
282 | &R600::R600_Reg128RegClass); |
283 | } |
284 | if (MO.isUse()) { |
285 | Register Reg = MO.getReg(); |
286 | if (R600::R600_Reg128RegClass.contains(Reg)) |
287 | SrcMI = Reg; |
288 | else |
289 | SrcMI = TRI->getMatchingSuperReg(Reg, |
290 | R600RegisterInfo::getSubRegFromChannel(Channel: TRI->getHWRegChan(reg: Reg)), |
291 | &R600::R600_Reg128RegClass); |
292 | } |
293 | } |
294 | if ((DstRegs.find(x: SrcMI) == DstRegs.end())) { |
295 | DstRegs.insert(x: DstMI); |
296 | return true; |
297 | } else |
298 | return false; |
299 | } |
300 | |
301 | ClauseFile |
302 | MakeFetchClause(MachineBasicBlock &MBB, MachineBasicBlock::iterator &I) |
303 | const { |
304 | MachineBasicBlock::iterator ClauseHead = I; |
305 | std::vector<MachineInstr *> ClauseContent; |
306 | unsigned AluInstCount = 0; |
307 | bool IsTex = TII->usesTextureCache(MI: *ClauseHead); |
308 | std::set<unsigned> DstRegs; |
309 | for (MachineBasicBlock::iterator E = MBB.end(); I != E; ++I) { |
310 | if (IsTrivialInst(MI&: *I)) |
311 | continue; |
312 | if (AluInstCount >= MaxFetchInst) |
313 | break; |
314 | if ((IsTex && !TII->usesTextureCache(MI: *I)) || |
315 | (!IsTex && !TII->usesVertexCache(MI: *I))) |
316 | break; |
317 | if (!isCompatibleWithClause(MI: *I, DstRegs)) |
318 | break; |
319 | AluInstCount ++; |
320 | ClauseContent.push_back(x: &*I); |
321 | } |
322 | MachineInstr *MIb = BuildMI(BB&: MBB, I: ClauseHead, MIMD: MBB.findDebugLoc(MBBI: ClauseHead), |
323 | MCID: getHWInstrDesc(CFI: IsTex?CF_TC:CF_VC)) |
324 | .addImm(Val: 0) // ADDR |
325 | .addImm(Val: AluInstCount - 1); // COUNT |
326 | return ClauseFile(MIb, std::move(ClauseContent)); |
327 | } |
328 | |
329 | void getLiteral(MachineInstr &MI, std::vector<MachineOperand *> &Lits) const { |
330 | static const unsigned LiteralRegs[] = { |
331 | R600::ALU_LITERAL_X, |
332 | R600::ALU_LITERAL_Y, |
333 | R600::ALU_LITERAL_Z, |
334 | R600::ALU_LITERAL_W |
335 | }; |
336 | const SmallVector<std::pair<MachineOperand *, int64_t>, 3> Srcs = |
337 | TII->getSrcs(MI); |
338 | for (const auto &Src:Srcs) { |
339 | if (Src.first->getReg() != R600::ALU_LITERAL_X) |
340 | continue; |
341 | int64_t Imm = Src.second; |
342 | std::vector<MachineOperand *>::iterator It = |
343 | llvm::find_if(Range&: Lits, P: [&](MachineOperand *val) { |
344 | return val->isImm() && (val->getImm() == Imm); |
345 | }); |
346 | |
347 | // Get corresponding Operand |
348 | MachineOperand &Operand = MI.getOperand( |
349 | TII->getOperandIdx(MI.getOpcode(), R600::OpName::literal)); |
350 | |
351 | if (It != Lits.end()) { |
352 | // Reuse existing literal reg |
353 | unsigned Index = It - Lits.begin(); |
354 | Src.first->setReg(LiteralRegs[Index]); |
355 | } else { |
356 | // Allocate new literal reg |
357 | assert(Lits.size() < 4 && "Too many literals in Instruction Group" ); |
358 | Src.first->setReg(LiteralRegs[Lits.size()]); |
359 | Lits.push_back(x: &Operand); |
360 | } |
361 | } |
362 | } |
363 | |
364 | MachineBasicBlock::iterator insertLiterals( |
365 | MachineBasicBlock::iterator InsertPos, |
366 | const std::vector<unsigned> &Literals) const { |
367 | MachineBasicBlock *MBB = InsertPos->getParent(); |
368 | for (unsigned i = 0, e = Literals.size(); i < e; i+=2) { |
369 | unsigned LiteralPair0 = Literals[i]; |
370 | unsigned LiteralPair1 = (i + 1 < e)?Literals[i + 1]:0; |
371 | InsertPos = BuildMI(MBB, InsertPos->getDebugLoc(), |
372 | TII->get(R600::LITERALS)) |
373 | .addImm(LiteralPair0) |
374 | .addImm(LiteralPair1); |
375 | } |
376 | return InsertPos; |
377 | } |
378 | |
379 | ClauseFile |
380 | MakeALUClause(MachineBasicBlock &MBB, MachineBasicBlock::iterator &I) |
381 | const { |
382 | MachineInstr &ClauseHead = *I; |
383 | std::vector<MachineInstr *> ClauseContent; |
384 | I++; |
385 | for (MachineBasicBlock::instr_iterator E = MBB.instr_end(); I != E;) { |
386 | if (IsTrivialInst(MI&: *I)) { |
387 | ++I; |
388 | continue; |
389 | } |
390 | if (!I->isBundle() && !TII->isALUInstr(Opcode: I->getOpcode())) |
391 | break; |
392 | std::vector<MachineOperand *>Literals; |
393 | if (I->isBundle()) { |
394 | MachineInstr &DeleteMI = *I; |
395 | MachineBasicBlock::instr_iterator BI = I.getInstrIterator(); |
396 | while (++BI != E && BI->isBundledWithPred()) { |
397 | BI->unbundleFromPred(); |
398 | for (MachineOperand &MO : BI->operands()) { |
399 | if (MO.isReg() && MO.isInternalRead()) |
400 | MO.setIsInternalRead(false); |
401 | } |
402 | getLiteral(MI&: *BI, Lits&: Literals); |
403 | ClauseContent.push_back(x: &*BI); |
404 | } |
405 | I = BI; |
406 | DeleteMI.eraseFromParent(); |
407 | } else { |
408 | getLiteral(MI&: *I, Lits&: Literals); |
409 | ClauseContent.push_back(x: &*I); |
410 | I++; |
411 | } |
412 | for (unsigned i = 0, e = Literals.size(); i < e; i += 2) { |
413 | MachineInstrBuilder MILit = BuildMI(MBB, I, I->getDebugLoc(), |
414 | TII->get(R600::LITERALS)); |
415 | if (Literals[i]->isImm()) { |
416 | MILit.addImm(Val: Literals[i]->getImm()); |
417 | } else { |
418 | MILit.addGlobalAddress(GV: Literals[i]->getGlobal(), |
419 | Offset: Literals[i]->getOffset()); |
420 | } |
421 | if (i + 1 < e) { |
422 | if (Literals[i + 1]->isImm()) { |
423 | MILit.addImm(Val: Literals[i + 1]->getImm()); |
424 | } else { |
425 | MILit.addGlobalAddress(GV: Literals[i + 1]->getGlobal(), |
426 | Offset: Literals[i + 1]->getOffset()); |
427 | } |
428 | } else |
429 | MILit.addImm(Val: 0); |
430 | ClauseContent.push_back(x: MILit); |
431 | } |
432 | } |
433 | assert(ClauseContent.size() < 128 && "ALU clause is too big" ); |
434 | ClauseHead.getOperand(i: 7).setImm(ClauseContent.size() - 1); |
435 | return ClauseFile(&ClauseHead, std::move(ClauseContent)); |
436 | } |
437 | |
438 | void EmitFetchClause(MachineBasicBlock::iterator InsertPos, |
439 | const DebugLoc &DL, ClauseFile &Clause, |
440 | unsigned &CfCount) { |
441 | CounterPropagateAddr(MI&: *Clause.first, Addr: CfCount); |
442 | MachineBasicBlock *BB = Clause.first->getParent(); |
443 | BuildMI(BB, DL, TII->get(R600::FETCH_CLAUSE)).addImm(CfCount); |
444 | for (MachineInstr *MI : Clause.second) |
445 | BB->splice(Where: InsertPos, Other: BB, From: MI); |
446 | CfCount += 2 * Clause.second.size(); |
447 | } |
448 | |
449 | void EmitALUClause(MachineBasicBlock::iterator InsertPos, const DebugLoc &DL, |
450 | ClauseFile &Clause, unsigned &CfCount) { |
451 | Clause.first->getOperand(i: 0).setImm(0); |
452 | CounterPropagateAddr(MI&: *Clause.first, Addr: CfCount); |
453 | MachineBasicBlock *BB = Clause.first->getParent(); |
454 | BuildMI(BB, DL, TII->get(R600::ALU_CLAUSE)).addImm(CfCount); |
455 | for (MachineInstr *MI : Clause.second) |
456 | BB->splice(Where: InsertPos, Other: BB, From: MI); |
457 | CfCount += Clause.second.size(); |
458 | } |
459 | |
460 | void CounterPropagateAddr(MachineInstr &MI, unsigned Addr) const { |
461 | MI.getOperand(i: 0).setImm(Addr + MI.getOperand(i: 0).getImm()); |
462 | } |
463 | void CounterPropagateAddr(const std::set<MachineInstr *> &MIs, |
464 | unsigned Addr) const { |
465 | for (MachineInstr *MI : MIs) { |
466 | CounterPropagateAddr(MI&: *MI, Addr); |
467 | } |
468 | } |
469 | |
470 | public: |
471 | static char ID; |
472 | |
473 | R600ControlFlowFinalizer() : MachineFunctionPass(ID) {} |
474 | |
475 | bool runOnMachineFunction(MachineFunction &MF) override { |
476 | ST = &MF.getSubtarget<R600Subtarget>(); |
477 | MaxFetchInst = ST->getTexVTXClauseSize(); |
478 | TII = ST->getInstrInfo(); |
479 | TRI = ST->getRegisterInfo(); |
480 | |
481 | R600MachineFunctionInfo *MFI = MF.getInfo<R600MachineFunctionInfo>(); |
482 | |
483 | CFStack CFStack(ST, MF.getFunction().getCallingConv()); |
484 | for (MachineFunction::iterator MB = MF.begin(), ME = MF.end(); MB != ME; |
485 | ++MB) { |
486 | MachineBasicBlock &MBB = *MB; |
487 | unsigned CfCount = 0; |
488 | std::vector<std::pair<unsigned, std::set<MachineInstr *>>> LoopStack; |
489 | std::vector<MachineInstr * > IfThenElseStack; |
490 | if (MF.getFunction().getCallingConv() == CallingConv::AMDGPU_VS) { |
491 | BuildMI(BB&: MBB, I: MBB.begin(), MIMD: MBB.findDebugLoc(MBBI: MBB.begin()), |
492 | MCID: getHWInstrDesc(CFI: CF_CALL_FS)); |
493 | CfCount++; |
494 | } |
495 | std::vector<ClauseFile> FetchClauses, AluClauses; |
496 | std::vector<MachineInstr *> LastAlu(1); |
497 | std::vector<MachineInstr *> ToPopAfter; |
498 | |
499 | for (MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end(); |
500 | I != E;) { |
501 | if (TII->usesTextureCache(MI: *I) || TII->usesVertexCache(MI: *I)) { |
502 | LLVM_DEBUG(dbgs() << CfCount << ":" ; I->dump();); |
503 | FetchClauses.push_back(x: MakeFetchClause(MBB, I)); |
504 | CfCount++; |
505 | LastAlu.back() = nullptr; |
506 | continue; |
507 | } |
508 | |
509 | MachineBasicBlock::iterator MI = I; |
510 | if (MI->getOpcode() != R600::ENDIF) |
511 | LastAlu.back() = nullptr; |
512 | if (MI->getOpcode() == R600::CF_ALU) |
513 | LastAlu.back() = &*MI; |
514 | I++; |
515 | bool RequiresWorkAround = |
516 | CFStack.requiresWorkAroundForInst(Opcode: MI->getOpcode()); |
517 | switch (MI->getOpcode()) { |
518 | case R600::CF_ALU_PUSH_BEFORE: |
519 | if (RequiresWorkAround) { |
520 | LLVM_DEBUG(dbgs() |
521 | << "Applying bug work-around for ALU_PUSH_BEFORE\n" ); |
522 | BuildMI(MBB, MI, MBB.findDebugLoc(MI), TII->get(R600::CF_PUSH_EG)) |
523 | .addImm(CfCount + 1) |
524 | .addImm(1); |
525 | MI->setDesc(TII->get(R600::CF_ALU)); |
526 | CfCount++; |
527 | CFStack.pushBranch(R600::CF_PUSH_EG); |
528 | } else |
529 | CFStack.pushBranch(R600::CF_ALU_PUSH_BEFORE); |
530 | [[fallthrough]]; |
531 | case R600::CF_ALU: |
532 | I = MI; |
533 | AluClauses.push_back(x: MakeALUClause(MBB, I)); |
534 | LLVM_DEBUG(dbgs() << CfCount << ":" ; MI->dump();); |
535 | CfCount++; |
536 | break; |
537 | case R600::WHILELOOP: { |
538 | CFStack.pushLoop(); |
539 | MachineInstr *MIb = BuildMI(BB&: MBB, I: MI, MIMD: MBB.findDebugLoc(MBBI: MI), |
540 | MCID: getHWInstrDesc(CFI: CF_WHILE_LOOP)) |
541 | .addImm(Val: 1); |
542 | std::pair<unsigned, std::set<MachineInstr *>> Pair(CfCount, |
543 | std::set<MachineInstr *>()); |
544 | Pair.second.insert(x: MIb); |
545 | LoopStack.push_back(x: std::move(Pair)); |
546 | MI->eraseFromParent(); |
547 | CfCount++; |
548 | break; |
549 | } |
550 | case R600::ENDLOOP: { |
551 | CFStack.popLoop(); |
552 | std::pair<unsigned, std::set<MachineInstr *>> Pair = |
553 | std::move(LoopStack.back()); |
554 | LoopStack.pop_back(); |
555 | CounterPropagateAddr(MIs: Pair.second, Addr: CfCount); |
556 | BuildMI(BB&: MBB, I: MI, MIMD: MBB.findDebugLoc(MBBI: MI), MCID: getHWInstrDesc(CFI: CF_END_LOOP)) |
557 | .addImm(Val: Pair.first + 1); |
558 | MI->eraseFromParent(); |
559 | CfCount++; |
560 | break; |
561 | } |
562 | case R600::IF_PREDICATE_SET: { |
563 | LastAlu.push_back(x: nullptr); |
564 | MachineInstr *MIb = BuildMI(BB&: MBB, I: MI, MIMD: MBB.findDebugLoc(MBBI: MI), |
565 | MCID: getHWInstrDesc(CFI: CF_JUMP)) |
566 | .addImm(Val: 0) |
567 | .addImm(Val: 0); |
568 | IfThenElseStack.push_back(x: MIb); |
569 | LLVM_DEBUG(dbgs() << CfCount << ":" ; MIb->dump();); |
570 | MI->eraseFromParent(); |
571 | CfCount++; |
572 | break; |
573 | } |
574 | case R600::ELSE: { |
575 | MachineInstr * JumpInst = IfThenElseStack.back(); |
576 | IfThenElseStack.pop_back(); |
577 | CounterPropagateAddr(MI&: *JumpInst, Addr: CfCount); |
578 | MachineInstr *MIb = BuildMI(BB&: MBB, I: MI, MIMD: MBB.findDebugLoc(MBBI: MI), |
579 | MCID: getHWInstrDesc(CFI: CF_ELSE)) |
580 | .addImm(Val: 0) |
581 | .addImm(Val: 0); |
582 | LLVM_DEBUG(dbgs() << CfCount << ":" ; MIb->dump();); |
583 | IfThenElseStack.push_back(x: MIb); |
584 | MI->eraseFromParent(); |
585 | CfCount++; |
586 | break; |
587 | } |
588 | case R600::ENDIF: { |
589 | CFStack.popBranch(); |
590 | if (LastAlu.back()) { |
591 | ToPopAfter.push_back(x: LastAlu.back()); |
592 | } else { |
593 | MachineInstr *MIb = BuildMI(BB&: MBB, I: MI, MIMD: MBB.findDebugLoc(MBBI: MI), |
594 | MCID: getHWInstrDesc(CFI: CF_POP)) |
595 | .addImm(Val: CfCount + 1) |
596 | .addImm(Val: 1); |
597 | (void)MIb; |
598 | LLVM_DEBUG(dbgs() << CfCount << ":" ; MIb->dump();); |
599 | CfCount++; |
600 | } |
601 | |
602 | MachineInstr *IfOrElseInst = IfThenElseStack.back(); |
603 | IfThenElseStack.pop_back(); |
604 | CounterPropagateAddr(MI&: *IfOrElseInst, Addr: CfCount); |
605 | IfOrElseInst->getOperand(i: 1).setImm(1); |
606 | LastAlu.pop_back(); |
607 | MI->eraseFromParent(); |
608 | break; |
609 | } |
610 | case R600::BREAK: { |
611 | CfCount ++; |
612 | MachineInstr *MIb = BuildMI(BB&: MBB, I: MI, MIMD: MBB.findDebugLoc(MBBI: MI), |
613 | MCID: getHWInstrDesc(CFI: CF_LOOP_BREAK)) |
614 | .addImm(Val: 0); |
615 | LoopStack.back().second.insert(x: MIb); |
616 | MI->eraseFromParent(); |
617 | break; |
618 | } |
619 | case R600::CONTINUE: { |
620 | MachineInstr *MIb = BuildMI(BB&: MBB, I: MI, MIMD: MBB.findDebugLoc(MBBI: MI), |
621 | MCID: getHWInstrDesc(CFI: CF_LOOP_CONTINUE)) |
622 | .addImm(Val: 0); |
623 | LoopStack.back().second.insert(x: MIb); |
624 | MI->eraseFromParent(); |
625 | CfCount++; |
626 | break; |
627 | } |
628 | case R600::RETURN: { |
629 | DebugLoc DL = MBB.findDebugLoc(MBBI: MI); |
630 | BuildMI(BB&: MBB, I: MI, MIMD: DL, MCID: getHWInstrDesc(CFI: CF_END)); |
631 | CfCount++; |
632 | if (CfCount % 2) { |
633 | BuildMI(MBB, I, DL, TII->get(R600::PAD)); |
634 | CfCount++; |
635 | } |
636 | MI->eraseFromParent(); |
637 | for (ClauseFile &CF : FetchClauses) |
638 | EmitFetchClause(InsertPos: I, DL, Clause&: CF, CfCount); |
639 | for (ClauseFile &CF : AluClauses) |
640 | EmitALUClause(InsertPos: I, DL, Clause&: CF, CfCount); |
641 | break; |
642 | } |
643 | default: |
644 | if (TII->isExport(Opcode: MI->getOpcode())) { |
645 | LLVM_DEBUG(dbgs() << CfCount << ":" ; MI->dump();); |
646 | CfCount++; |
647 | } |
648 | break; |
649 | } |
650 | } |
651 | for (MachineInstr *Alu : ToPopAfter) { |
652 | BuildMI(MBB, Alu, MBB.findDebugLoc((MachineBasicBlock::iterator)Alu), |
653 | TII->get(R600::CF_ALU_POP_AFTER)) |
654 | .addImm(Alu->getOperand(0).getImm()) |
655 | .addImm(Alu->getOperand(1).getImm()) |
656 | .addImm(Alu->getOperand(2).getImm()) |
657 | .addImm(Alu->getOperand(3).getImm()) |
658 | .addImm(Alu->getOperand(4).getImm()) |
659 | .addImm(Alu->getOperand(5).getImm()) |
660 | .addImm(Alu->getOperand(6).getImm()) |
661 | .addImm(Alu->getOperand(7).getImm()) |
662 | .addImm(Alu->getOperand(8).getImm()); |
663 | Alu->eraseFromParent(); |
664 | } |
665 | MFI->CFStackSize = CFStack.MaxStackSize; |
666 | } |
667 | |
668 | return false; |
669 | } |
670 | |
671 | StringRef getPassName() const override { |
672 | return "R600 Control Flow Finalizer Pass" ; |
673 | } |
674 | }; |
675 | |
676 | } // end anonymous namespace |
677 | |
678 | INITIALIZE_PASS_BEGIN(R600ControlFlowFinalizer, DEBUG_TYPE, |
679 | "R600 Control Flow Finalizer" , false, false) |
680 | INITIALIZE_PASS_END(R600ControlFlowFinalizer, DEBUG_TYPE, |
681 | "R600 Control Flow Finalizer" , false, false) |
682 | |
683 | char R600ControlFlowFinalizer::ID = 0; |
684 | |
685 | char &llvm::R600ControlFlowFinalizerID = R600ControlFlowFinalizer::ID; |
686 | |
687 | FunctionPass *llvm::createR600ControlFlowFinalizer() { |
688 | return new R600ControlFlowFinalizer(); |
689 | } |
690 | |