1//===- AtomicExpandPass.cpp - Expand atomic instructions ------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file contains a pass (at IR level) to replace atomic instructions with
10// __atomic_* library calls, or target specific instruction which implement the
11// same semantics in a way which better fits the target backend. This can
12// include the use of (intrinsic-based) load-linked/store-conditional loops,
13// AtomicCmpXchg, or type coercions.
14//
15//===----------------------------------------------------------------------===//
16
17#include "llvm/ADT/ArrayRef.h"
18#include "llvm/ADT/STLFunctionalExtras.h"
19#include "llvm/ADT/SmallVector.h"
20#include "llvm/Analysis/InstSimplifyFolder.h"
21#include "llvm/Analysis/OptimizationRemarkEmitter.h"
22#include "llvm/CodeGen/AtomicExpand.h"
23#include "llvm/CodeGen/AtomicExpandUtils.h"
24#include "llvm/CodeGen/RuntimeLibcalls.h"
25#include "llvm/CodeGen/TargetLowering.h"
26#include "llvm/CodeGen/TargetPassConfig.h"
27#include "llvm/CodeGen/TargetSubtargetInfo.h"
28#include "llvm/CodeGen/ValueTypes.h"
29#include "llvm/IR/Attributes.h"
30#include "llvm/IR/BasicBlock.h"
31#include "llvm/IR/Constant.h"
32#include "llvm/IR/Constants.h"
33#include "llvm/IR/DataLayout.h"
34#include "llvm/IR/DerivedTypes.h"
35#include "llvm/IR/Function.h"
36#include "llvm/IR/IRBuilder.h"
37#include "llvm/IR/InstIterator.h"
38#include "llvm/IR/Instruction.h"
39#include "llvm/IR/Instructions.h"
40#include "llvm/IR/MemoryModelRelaxationAnnotations.h"
41#include "llvm/IR/Module.h"
42#include "llvm/IR/Type.h"
43#include "llvm/IR/User.h"
44#include "llvm/IR/Value.h"
45#include "llvm/InitializePasses.h"
46#include "llvm/Pass.h"
47#include "llvm/Support/AtomicOrdering.h"
48#include "llvm/Support/Casting.h"
49#include "llvm/Support/Debug.h"
50#include "llvm/Support/ErrorHandling.h"
51#include "llvm/Support/raw_ostream.h"
52#include "llvm/Target/TargetMachine.h"
53#include "llvm/Transforms/Utils/LowerAtomic.h"
54#include <cassert>
55#include <cstdint>
56#include <iterator>
57
58using namespace llvm;
59
60#define DEBUG_TYPE "atomic-expand"
61
62namespace {
63
64class AtomicExpandImpl {
65 const TargetLowering *TLI = nullptr;
66 const DataLayout *DL = nullptr;
67
68private:
69 bool bracketInstWithFences(Instruction *I, AtomicOrdering Order);
70 IntegerType *getCorrespondingIntegerType(Type *T, const DataLayout &DL);
71 LoadInst *convertAtomicLoadToIntegerType(LoadInst *LI);
72 bool tryExpandAtomicLoad(LoadInst *LI);
73 bool expandAtomicLoadToLL(LoadInst *LI);
74 bool expandAtomicLoadToCmpXchg(LoadInst *LI);
75 StoreInst *convertAtomicStoreToIntegerType(StoreInst *SI);
76 bool tryExpandAtomicStore(StoreInst *SI);
77 void expandAtomicStore(StoreInst *SI);
78 bool tryExpandAtomicRMW(AtomicRMWInst *AI);
79 AtomicRMWInst *convertAtomicXchgToIntegerType(AtomicRMWInst *RMWI);
80 Value *
81 insertRMWLLSCLoop(IRBuilderBase &Builder, Type *ResultTy, Value *Addr,
82 Align AddrAlign, AtomicOrdering MemOpOrder,
83 function_ref<Value *(IRBuilderBase &, Value *)> PerformOp);
84 void expandAtomicOpToLLSC(
85 Instruction *I, Type *ResultTy, Value *Addr, Align AddrAlign,
86 AtomicOrdering MemOpOrder,
87 function_ref<Value *(IRBuilderBase &, Value *)> PerformOp);
88 void expandPartwordAtomicRMW(
89 AtomicRMWInst *I, TargetLoweringBase::AtomicExpansionKind ExpansionKind);
90 AtomicRMWInst *widenPartwordAtomicRMW(AtomicRMWInst *AI);
91 bool expandPartwordCmpXchg(AtomicCmpXchgInst *I);
92 void expandAtomicRMWToMaskedIntrinsic(AtomicRMWInst *AI);
93 void expandAtomicCmpXchgToMaskedIntrinsic(AtomicCmpXchgInst *CI);
94
95 AtomicCmpXchgInst *convertCmpXchgToIntegerType(AtomicCmpXchgInst *CI);
96 static Value *insertRMWCmpXchgLoop(
97 IRBuilderBase &Builder, Type *ResultType, Value *Addr, Align AddrAlign,
98 AtomicOrdering MemOpOrder, SyncScope::ID SSID,
99 function_ref<Value *(IRBuilderBase &, Value *)> PerformOp,
100 CreateCmpXchgInstFun CreateCmpXchg);
101 bool tryExpandAtomicCmpXchg(AtomicCmpXchgInst *CI);
102
103 bool expandAtomicCmpXchg(AtomicCmpXchgInst *CI);
104 bool isIdempotentRMW(AtomicRMWInst *RMWI);
105 bool simplifyIdempotentRMW(AtomicRMWInst *RMWI);
106
107 bool expandAtomicOpToLibcall(Instruction *I, unsigned Size, Align Alignment,
108 Value *PointerOperand, Value *ValueOperand,
109 Value *CASExpected, AtomicOrdering Ordering,
110 AtomicOrdering Ordering2,
111 ArrayRef<RTLIB::Libcall> Libcalls);
112 void expandAtomicLoadToLibcall(LoadInst *LI);
113 void expandAtomicStoreToLibcall(StoreInst *LI);
114 void expandAtomicRMWToLibcall(AtomicRMWInst *I);
115 void expandAtomicCASToLibcall(AtomicCmpXchgInst *I);
116
117 friend bool
118 llvm::expandAtomicRMWToCmpXchg(AtomicRMWInst *AI,
119 CreateCmpXchgInstFun CreateCmpXchg);
120
121public:
122 bool run(Function &F, const TargetMachine *TM);
123};
124
125class AtomicExpandLegacy : public FunctionPass {
126public:
127 static char ID; // Pass identification, replacement for typeid
128
129 AtomicExpandLegacy() : FunctionPass(ID) {
130 initializeAtomicExpandLegacyPass(*PassRegistry::getPassRegistry());
131 }
132
133 bool runOnFunction(Function &F) override;
134};
135
136// IRBuilder to be used for replacement atomic instructions.
137struct ReplacementIRBuilder
138 : IRBuilder<InstSimplifyFolder, IRBuilderCallbackInserter> {
139 MDNode *MMRAMD = nullptr;
140
141 // Preserves the DebugLoc from I, and preserves still valid metadata.
142 // Enable StrictFP builder mode when appropriate.
143 explicit ReplacementIRBuilder(Instruction *I, const DataLayout &DL)
144 : IRBuilder(I->getContext(), DL,
145 IRBuilderCallbackInserter(
146 [this](Instruction *I) { addMMRAMD(I); })) {
147 SetInsertPoint(I);
148 this->CollectMetadataToCopy(Src: I, MetadataKinds: {LLVMContext::MD_pcsections});
149 if (BB->getParent()->getAttributes().hasFnAttr(Attribute::StrictFP))
150 this->setIsFPConstrained(true);
151
152 MMRAMD = I->getMetadata(KindID: LLVMContext::MD_mmra);
153 }
154
155 void addMMRAMD(Instruction *I) {
156 if (canInstructionHaveMMRAs(I: *I))
157 I->setMetadata(KindID: LLVMContext::MD_mmra, Node: MMRAMD);
158 }
159};
160
161} // end anonymous namespace
162
163char AtomicExpandLegacy::ID = 0;
164
165char &llvm::AtomicExpandID = AtomicExpandLegacy::ID;
166
167INITIALIZE_PASS_BEGIN(AtomicExpandLegacy, DEBUG_TYPE,
168 "Expand Atomic instructions", false, false)
169INITIALIZE_PASS_DEPENDENCY(TargetPassConfig)
170INITIALIZE_PASS_END(AtomicExpandLegacy, DEBUG_TYPE,
171 "Expand Atomic instructions", false, false)
172
173// Helper functions to retrieve the size of atomic instructions.
174static unsigned getAtomicOpSize(LoadInst *LI) {
175 const DataLayout &DL = LI->getModule()->getDataLayout();
176 return DL.getTypeStoreSize(Ty: LI->getType());
177}
178
179static unsigned getAtomicOpSize(StoreInst *SI) {
180 const DataLayout &DL = SI->getModule()->getDataLayout();
181 return DL.getTypeStoreSize(Ty: SI->getValueOperand()->getType());
182}
183
184static unsigned getAtomicOpSize(AtomicRMWInst *RMWI) {
185 const DataLayout &DL = RMWI->getModule()->getDataLayout();
186 return DL.getTypeStoreSize(Ty: RMWI->getValOperand()->getType());
187}
188
189static unsigned getAtomicOpSize(AtomicCmpXchgInst *CASI) {
190 const DataLayout &DL = CASI->getModule()->getDataLayout();
191 return DL.getTypeStoreSize(Ty: CASI->getCompareOperand()->getType());
192}
193
194// Determine if a particular atomic operation has a supported size,
195// and is of appropriate alignment, to be passed through for target
196// lowering. (Versus turning into a __atomic libcall)
197template <typename Inst>
198static bool atomicSizeSupported(const TargetLowering *TLI, Inst *I) {
199 unsigned Size = getAtomicOpSize(I);
200 Align Alignment = I->getAlign();
201 return Alignment >= Size &&
202 Size <= TLI->getMaxAtomicSizeInBitsSupported() / 8;
203}
204
205bool AtomicExpandImpl::run(Function &F, const TargetMachine *TM) {
206 const auto *Subtarget = TM->getSubtargetImpl(F);
207 if (!Subtarget->enableAtomicExpand())
208 return false;
209 TLI = Subtarget->getTargetLowering();
210 DL = &F.getParent()->getDataLayout();
211
212 SmallVector<Instruction *, 1> AtomicInsts;
213
214 // Changing control-flow while iterating through it is a bad idea, so gather a
215 // list of all atomic instructions before we start.
216 for (Instruction &I : instructions(F))
217 if (I.isAtomic() && !isa<FenceInst>(Val: &I))
218 AtomicInsts.push_back(Elt: &I);
219
220 bool MadeChange = false;
221 for (auto *I : AtomicInsts) {
222 auto LI = dyn_cast<LoadInst>(Val: I);
223 auto SI = dyn_cast<StoreInst>(Val: I);
224 auto RMWI = dyn_cast<AtomicRMWInst>(Val: I);
225 auto CASI = dyn_cast<AtomicCmpXchgInst>(Val: I);
226 assert((LI || SI || RMWI || CASI) && "Unknown atomic instruction");
227
228 // If the Size/Alignment is not supported, replace with a libcall.
229 if (LI) {
230 if (!atomicSizeSupported(TLI, I: LI)) {
231 expandAtomicLoadToLibcall(LI);
232 MadeChange = true;
233 continue;
234 }
235 } else if (SI) {
236 if (!atomicSizeSupported(TLI, I: SI)) {
237 expandAtomicStoreToLibcall(LI: SI);
238 MadeChange = true;
239 continue;
240 }
241 } else if (RMWI) {
242 if (!atomicSizeSupported(TLI, I: RMWI)) {
243 expandAtomicRMWToLibcall(I: RMWI);
244 MadeChange = true;
245 continue;
246 }
247 } else if (CASI) {
248 if (!atomicSizeSupported(TLI, I: CASI)) {
249 expandAtomicCASToLibcall(I: CASI);
250 MadeChange = true;
251 continue;
252 }
253 }
254
255 if (LI && TLI->shouldCastAtomicLoadInIR(LI) ==
256 TargetLoweringBase::AtomicExpansionKind::CastToInteger) {
257 I = LI = convertAtomicLoadToIntegerType(LI);
258 MadeChange = true;
259 } else if (SI &&
260 TLI->shouldCastAtomicStoreInIR(SI) ==
261 TargetLoweringBase::AtomicExpansionKind::CastToInteger) {
262 I = SI = convertAtomicStoreToIntegerType(SI);
263 MadeChange = true;
264 } else if (RMWI &&
265 TLI->shouldCastAtomicRMWIInIR(RMWI) ==
266 TargetLoweringBase::AtomicExpansionKind::CastToInteger) {
267 I = RMWI = convertAtomicXchgToIntegerType(RMWI);
268 MadeChange = true;
269 } else if (CASI) {
270 // TODO: when we're ready to make the change at the IR level, we can
271 // extend convertCmpXchgToInteger for floating point too.
272 if (CASI->getCompareOperand()->getType()->isPointerTy()) {
273 // TODO: add a TLI hook to control this so that each target can
274 // convert to lowering the original type one at a time.
275 I = CASI = convertCmpXchgToIntegerType(CI: CASI);
276 MadeChange = true;
277 }
278 }
279
280 if (TLI->shouldInsertFencesForAtomic(I)) {
281 auto FenceOrdering = AtomicOrdering::Monotonic;
282 if (LI && isAcquireOrStronger(AO: LI->getOrdering())) {
283 FenceOrdering = LI->getOrdering();
284 LI->setOrdering(AtomicOrdering::Monotonic);
285 } else if (SI && isReleaseOrStronger(AO: SI->getOrdering())) {
286 FenceOrdering = SI->getOrdering();
287 SI->setOrdering(AtomicOrdering::Monotonic);
288 } else if (RMWI && (isReleaseOrStronger(AO: RMWI->getOrdering()) ||
289 isAcquireOrStronger(AO: RMWI->getOrdering()))) {
290 FenceOrdering = RMWI->getOrdering();
291 RMWI->setOrdering(AtomicOrdering::Monotonic);
292 } else if (CASI &&
293 TLI->shouldExpandAtomicCmpXchgInIR(AI: CASI) ==
294 TargetLoweringBase::AtomicExpansionKind::None &&
295 (isReleaseOrStronger(AO: CASI->getSuccessOrdering()) ||
296 isAcquireOrStronger(AO: CASI->getSuccessOrdering()) ||
297 isAcquireOrStronger(AO: CASI->getFailureOrdering()))) {
298 // If a compare and swap is lowered to LL/SC, we can do smarter fence
299 // insertion, with a stronger one on the success path than on the
300 // failure path. As a result, fence insertion is directly done by
301 // expandAtomicCmpXchg in that case.
302 FenceOrdering = CASI->getMergedOrdering();
303 CASI->setSuccessOrdering(AtomicOrdering::Monotonic);
304 CASI->setFailureOrdering(AtomicOrdering::Monotonic);
305 }
306
307 if (FenceOrdering != AtomicOrdering::Monotonic) {
308 MadeChange |= bracketInstWithFences(I, Order: FenceOrdering);
309 }
310 } else if (I->hasAtomicStore() &&
311 TLI->shouldInsertTrailingFenceForAtomicStore(I)) {
312 auto FenceOrdering = AtomicOrdering::Monotonic;
313 if (SI)
314 FenceOrdering = SI->getOrdering();
315 else if (RMWI)
316 FenceOrdering = RMWI->getOrdering();
317 else if (CASI && TLI->shouldExpandAtomicCmpXchgInIR(AI: CASI) !=
318 TargetLoweringBase::AtomicExpansionKind::LLSC)
319 // LLSC is handled in expandAtomicCmpXchg().
320 FenceOrdering = CASI->getSuccessOrdering();
321
322 IRBuilder Builder(I);
323 if (auto TrailingFence =
324 TLI->emitTrailingFence(Builder, Inst: I, Ord: FenceOrdering)) {
325 TrailingFence->moveAfter(MovePos: I);
326 MadeChange = true;
327 }
328 }
329
330 if (LI)
331 MadeChange |= tryExpandAtomicLoad(LI);
332 else if (SI)
333 MadeChange |= tryExpandAtomicStore(SI);
334 else if (RMWI) {
335 // There are two different ways of expanding RMW instructions:
336 // - into a load if it is idempotent
337 // - into a Cmpxchg/LL-SC loop otherwise
338 // we try them in that order.
339
340 if (isIdempotentRMW(RMWI) && simplifyIdempotentRMW(RMWI)) {
341 MadeChange = true;
342 } else {
343 MadeChange |= tryExpandAtomicRMW(AI: RMWI);
344 }
345 } else if (CASI)
346 MadeChange |= tryExpandAtomicCmpXchg(CI: CASI);
347 }
348 return MadeChange;
349}
350
351bool AtomicExpandLegacy::runOnFunction(Function &F) {
352
353 auto *TPC = getAnalysisIfAvailable<TargetPassConfig>();
354 if (!TPC)
355 return false;
356 auto *TM = &TPC->getTM<TargetMachine>();
357 AtomicExpandImpl AE;
358 return AE.run(F, TM);
359}
360
361FunctionPass *llvm::createAtomicExpandLegacyPass() {
362 return new AtomicExpandLegacy();
363}
364
365PreservedAnalyses AtomicExpandPass::run(Function &F,
366 FunctionAnalysisManager &AM) {
367 AtomicExpandImpl AE;
368
369 bool Changed = AE.run(F, TM);
370 if (!Changed)
371 return PreservedAnalyses::all();
372
373 return PreservedAnalyses::none();
374}
375
376bool AtomicExpandImpl::bracketInstWithFences(Instruction *I,
377 AtomicOrdering Order) {
378 ReplacementIRBuilder Builder(I, *DL);
379
380 auto LeadingFence = TLI->emitLeadingFence(Builder, Inst: I, Ord: Order);
381
382 auto TrailingFence = TLI->emitTrailingFence(Builder, Inst: I, Ord: Order);
383 // We have a guard here because not every atomic operation generates a
384 // trailing fence.
385 if (TrailingFence)
386 TrailingFence->moveAfter(MovePos: I);
387
388 return (LeadingFence || TrailingFence);
389}
390
391/// Get the iX type with the same bitwidth as T.
392IntegerType *
393AtomicExpandImpl::getCorrespondingIntegerType(Type *T, const DataLayout &DL) {
394 EVT VT = TLI->getMemValueType(DL, Ty: T);
395 unsigned BitWidth = VT.getStoreSizeInBits();
396 assert(BitWidth == VT.getSizeInBits() && "must be a power of two");
397 return IntegerType::get(C&: T->getContext(), NumBits: BitWidth);
398}
399
400/// Convert an atomic load of a non-integral type to an integer load of the
401/// equivalent bitwidth. See the function comment on
402/// convertAtomicStoreToIntegerType for background.
403LoadInst *AtomicExpandImpl::convertAtomicLoadToIntegerType(LoadInst *LI) {
404 auto *M = LI->getModule();
405 Type *NewTy = getCorrespondingIntegerType(T: LI->getType(), DL: M->getDataLayout());
406
407 ReplacementIRBuilder Builder(LI, *DL);
408
409 Value *Addr = LI->getPointerOperand();
410
411 auto *NewLI = Builder.CreateLoad(Ty: NewTy, Ptr: Addr);
412 NewLI->setAlignment(LI->getAlign());
413 NewLI->setVolatile(LI->isVolatile());
414 NewLI->setAtomic(Ordering: LI->getOrdering(), SSID: LI->getSyncScopeID());
415 LLVM_DEBUG(dbgs() << "Replaced " << *LI << " with " << *NewLI << "\n");
416
417 Value *NewVal = Builder.CreateBitCast(V: NewLI, DestTy: LI->getType());
418 LI->replaceAllUsesWith(V: NewVal);
419 LI->eraseFromParent();
420 return NewLI;
421}
422
423AtomicRMWInst *
424AtomicExpandImpl::convertAtomicXchgToIntegerType(AtomicRMWInst *RMWI) {
425 auto *M = RMWI->getModule();
426 Type *NewTy =
427 getCorrespondingIntegerType(T: RMWI->getType(), DL: M->getDataLayout());
428
429 ReplacementIRBuilder Builder(RMWI, *DL);
430
431 Value *Addr = RMWI->getPointerOperand();
432 Value *Val = RMWI->getValOperand();
433 Value *NewVal = Val->getType()->isPointerTy()
434 ? Builder.CreatePtrToInt(V: Val, DestTy: NewTy)
435 : Builder.CreateBitCast(V: Val, DestTy: NewTy);
436
437 auto *NewRMWI = Builder.CreateAtomicRMW(Op: AtomicRMWInst::Xchg, Ptr: Addr, Val: NewVal,
438 Align: RMWI->getAlign(), Ordering: RMWI->getOrdering(),
439 SSID: RMWI->getSyncScopeID());
440 NewRMWI->setVolatile(RMWI->isVolatile());
441 LLVM_DEBUG(dbgs() << "Replaced " << *RMWI << " with " << *NewRMWI << "\n");
442
443 Value *NewRVal = RMWI->getType()->isPointerTy()
444 ? Builder.CreateIntToPtr(V: NewRMWI, DestTy: RMWI->getType())
445 : Builder.CreateBitCast(V: NewRMWI, DestTy: RMWI->getType());
446 RMWI->replaceAllUsesWith(V: NewRVal);
447 RMWI->eraseFromParent();
448 return NewRMWI;
449}
450
451bool AtomicExpandImpl::tryExpandAtomicLoad(LoadInst *LI) {
452 switch (TLI->shouldExpandAtomicLoadInIR(LI)) {
453 case TargetLoweringBase::AtomicExpansionKind::None:
454 return false;
455 case TargetLoweringBase::AtomicExpansionKind::LLSC:
456 expandAtomicOpToLLSC(
457 I: LI, ResultTy: LI->getType(), Addr: LI->getPointerOperand(), AddrAlign: LI->getAlign(),
458 MemOpOrder: LI->getOrdering(),
459 PerformOp: [](IRBuilderBase &Builder, Value *Loaded) { return Loaded; });
460 return true;
461 case TargetLoweringBase::AtomicExpansionKind::LLOnly:
462 return expandAtomicLoadToLL(LI);
463 case TargetLoweringBase::AtomicExpansionKind::CmpXChg:
464 return expandAtomicLoadToCmpXchg(LI);
465 case TargetLoweringBase::AtomicExpansionKind::NotAtomic:
466 LI->setAtomic(Ordering: AtomicOrdering::NotAtomic);
467 return true;
468 default:
469 llvm_unreachable("Unhandled case in tryExpandAtomicLoad");
470 }
471}
472
473bool AtomicExpandImpl::tryExpandAtomicStore(StoreInst *SI) {
474 switch (TLI->shouldExpandAtomicStoreInIR(SI)) {
475 case TargetLoweringBase::AtomicExpansionKind::None:
476 return false;
477 case TargetLoweringBase::AtomicExpansionKind::Expand:
478 expandAtomicStore(SI);
479 return true;
480 case TargetLoweringBase::AtomicExpansionKind::NotAtomic:
481 SI->setAtomic(Ordering: AtomicOrdering::NotAtomic);
482 return true;
483 default:
484 llvm_unreachable("Unhandled case in tryExpandAtomicStore");
485 }
486}
487
488bool AtomicExpandImpl::expandAtomicLoadToLL(LoadInst *LI) {
489 ReplacementIRBuilder Builder(LI, *DL);
490
491 // On some architectures, load-linked instructions are atomic for larger
492 // sizes than normal loads. For example, the only 64-bit load guaranteed
493 // to be single-copy atomic by ARM is an ldrexd (A3.5.3).
494 Value *Val = TLI->emitLoadLinked(Builder, ValueTy: LI->getType(),
495 Addr: LI->getPointerOperand(), Ord: LI->getOrdering());
496 TLI->emitAtomicCmpXchgNoStoreLLBalance(Builder);
497
498 LI->replaceAllUsesWith(V: Val);
499 LI->eraseFromParent();
500
501 return true;
502}
503
504bool AtomicExpandImpl::expandAtomicLoadToCmpXchg(LoadInst *LI) {
505 ReplacementIRBuilder Builder(LI, *DL);
506 AtomicOrdering Order = LI->getOrdering();
507 if (Order == AtomicOrdering::Unordered)
508 Order = AtomicOrdering::Monotonic;
509
510 Value *Addr = LI->getPointerOperand();
511 Type *Ty = LI->getType();
512 Constant *DummyVal = Constant::getNullValue(Ty);
513
514 Value *Pair = Builder.CreateAtomicCmpXchg(
515 Ptr: Addr, Cmp: DummyVal, New: DummyVal, Align: LI->getAlign(), SuccessOrdering: Order,
516 FailureOrdering: AtomicCmpXchgInst::getStrongestFailureOrdering(SuccessOrdering: Order));
517 Value *Loaded = Builder.CreateExtractValue(Agg: Pair, Idxs: 0, Name: "loaded");
518
519 LI->replaceAllUsesWith(V: Loaded);
520 LI->eraseFromParent();
521
522 return true;
523}
524
525/// Convert an atomic store of a non-integral type to an integer store of the
526/// equivalent bitwidth. We used to not support floating point or vector
527/// atomics in the IR at all. The backends learned to deal with the bitcast
528/// idiom because that was the only way of expressing the notion of a atomic
529/// float or vector store. The long term plan is to teach each backend to
530/// instruction select from the original atomic store, but as a migration
531/// mechanism, we convert back to the old format which the backends understand.
532/// Each backend will need individual work to recognize the new format.
533StoreInst *AtomicExpandImpl::convertAtomicStoreToIntegerType(StoreInst *SI) {
534 ReplacementIRBuilder Builder(SI, *DL);
535 auto *M = SI->getModule();
536 Type *NewTy = getCorrespondingIntegerType(T: SI->getValueOperand()->getType(),
537 DL: M->getDataLayout());
538 Value *NewVal = Builder.CreateBitCast(V: SI->getValueOperand(), DestTy: NewTy);
539
540 Value *Addr = SI->getPointerOperand();
541
542 StoreInst *NewSI = Builder.CreateStore(Val: NewVal, Ptr: Addr);
543 NewSI->setAlignment(SI->getAlign());
544 NewSI->setVolatile(SI->isVolatile());
545 NewSI->setAtomic(Ordering: SI->getOrdering(), SSID: SI->getSyncScopeID());
546 LLVM_DEBUG(dbgs() << "Replaced " << *SI << " with " << *NewSI << "\n");
547 SI->eraseFromParent();
548 return NewSI;
549}
550
551void AtomicExpandImpl::expandAtomicStore(StoreInst *SI) {
552 // This function is only called on atomic stores that are too large to be
553 // atomic if implemented as a native store. So we replace them by an
554 // atomic swap, that can be implemented for example as a ldrex/strex on ARM
555 // or lock cmpxchg8/16b on X86, as these are atomic for larger sizes.
556 // It is the responsibility of the target to only signal expansion via
557 // shouldExpandAtomicRMW in cases where this is required and possible.
558 ReplacementIRBuilder Builder(SI, *DL);
559 AtomicOrdering Ordering = SI->getOrdering();
560 assert(Ordering != AtomicOrdering::NotAtomic);
561 AtomicOrdering RMWOrdering = Ordering == AtomicOrdering::Unordered
562 ? AtomicOrdering::Monotonic
563 : Ordering;
564 AtomicRMWInst *AI = Builder.CreateAtomicRMW(
565 Op: AtomicRMWInst::Xchg, Ptr: SI->getPointerOperand(), Val: SI->getValueOperand(),
566 Align: SI->getAlign(), Ordering: RMWOrdering);
567 SI->eraseFromParent();
568
569 // Now we have an appropriate swap instruction, lower it as usual.
570 tryExpandAtomicRMW(AI);
571}
572
573static void createCmpXchgInstFun(IRBuilderBase &Builder, Value *Addr,
574 Value *Loaded, Value *NewVal, Align AddrAlign,
575 AtomicOrdering MemOpOrder, SyncScope::ID SSID,
576 Value *&Success, Value *&NewLoaded) {
577 Type *OrigTy = NewVal->getType();
578
579 // This code can go away when cmpxchg supports FP and vector types.
580 assert(!OrigTy->isPointerTy());
581 bool NeedBitcast = OrigTy->isFloatingPointTy() || OrigTy->isVectorTy();
582 if (NeedBitcast) {
583 IntegerType *IntTy = Builder.getIntNTy(N: OrigTy->getPrimitiveSizeInBits());
584 NewVal = Builder.CreateBitCast(V: NewVal, DestTy: IntTy);
585 Loaded = Builder.CreateBitCast(V: Loaded, DestTy: IntTy);
586 }
587
588 Value *Pair = Builder.CreateAtomicCmpXchg(
589 Ptr: Addr, Cmp: Loaded, New: NewVal, Align: AddrAlign, SuccessOrdering: MemOpOrder,
590 FailureOrdering: AtomicCmpXchgInst::getStrongestFailureOrdering(SuccessOrdering: MemOpOrder), SSID);
591 Success = Builder.CreateExtractValue(Agg: Pair, Idxs: 1, Name: "success");
592 NewLoaded = Builder.CreateExtractValue(Agg: Pair, Idxs: 0, Name: "newloaded");
593
594 if (NeedBitcast)
595 NewLoaded = Builder.CreateBitCast(V: NewLoaded, DestTy: OrigTy);
596}
597
598bool AtomicExpandImpl::tryExpandAtomicRMW(AtomicRMWInst *AI) {
599 LLVMContext &Ctx = AI->getModule()->getContext();
600 TargetLowering::AtomicExpansionKind Kind = TLI->shouldExpandAtomicRMWInIR(RMW: AI);
601 switch (Kind) {
602 case TargetLoweringBase::AtomicExpansionKind::None:
603 return false;
604 case TargetLoweringBase::AtomicExpansionKind::LLSC: {
605 unsigned MinCASSize = TLI->getMinCmpXchgSizeInBits() / 8;
606 unsigned ValueSize = getAtomicOpSize(RMWI: AI);
607 if (ValueSize < MinCASSize) {
608 expandPartwordAtomicRMW(I: AI,
609 ExpansionKind: TargetLoweringBase::AtomicExpansionKind::LLSC);
610 } else {
611 auto PerformOp = [&](IRBuilderBase &Builder, Value *Loaded) {
612 return buildAtomicRMWValue(Op: AI->getOperation(), Builder, Loaded,
613 Val: AI->getValOperand());
614 };
615 expandAtomicOpToLLSC(I: AI, ResultTy: AI->getType(), Addr: AI->getPointerOperand(),
616 AddrAlign: AI->getAlign(), MemOpOrder: AI->getOrdering(), PerformOp);
617 }
618 return true;
619 }
620 case TargetLoweringBase::AtomicExpansionKind::CmpXChg: {
621 unsigned MinCASSize = TLI->getMinCmpXchgSizeInBits() / 8;
622 unsigned ValueSize = getAtomicOpSize(RMWI: AI);
623 if (ValueSize < MinCASSize) {
624 expandPartwordAtomicRMW(I: AI,
625 ExpansionKind: TargetLoweringBase::AtomicExpansionKind::CmpXChg);
626 } else {
627 SmallVector<StringRef> SSNs;
628 Ctx.getSyncScopeNames(SSNs);
629 auto MemScope = SSNs[AI->getSyncScopeID()].empty()
630 ? "system"
631 : SSNs[AI->getSyncScopeID()];
632 OptimizationRemarkEmitter ORE(AI->getFunction());
633 ORE.emit(RemarkBuilder: [&]() {
634 return OptimizationRemark(DEBUG_TYPE, "Passed", AI)
635 << "A compare and swap loop was generated for an atomic "
636 << AI->getOperationName(Op: AI->getOperation()) << " operation at "
637 << MemScope << " memory scope";
638 });
639 expandAtomicRMWToCmpXchg(AI, CreateCmpXchg: createCmpXchgInstFun);
640 }
641 return true;
642 }
643 case TargetLoweringBase::AtomicExpansionKind::MaskedIntrinsic: {
644 unsigned MinCASSize = TLI->getMinCmpXchgSizeInBits() / 8;
645 unsigned ValueSize = getAtomicOpSize(RMWI: AI);
646 if (ValueSize < MinCASSize) {
647 AtomicRMWInst::BinOp Op = AI->getOperation();
648 // Widen And/Or/Xor and give the target another chance at expanding it.
649 if (Op == AtomicRMWInst::Or || Op == AtomicRMWInst::Xor ||
650 Op == AtomicRMWInst::And) {
651 tryExpandAtomicRMW(AI: widenPartwordAtomicRMW(AI));
652 return true;
653 }
654 }
655 expandAtomicRMWToMaskedIntrinsic(AI);
656 return true;
657 }
658 case TargetLoweringBase::AtomicExpansionKind::BitTestIntrinsic: {
659 TLI->emitBitTestAtomicRMWIntrinsic(AI);
660 return true;
661 }
662 case TargetLoweringBase::AtomicExpansionKind::CmpArithIntrinsic: {
663 TLI->emitCmpArithAtomicRMWIntrinsic(AI);
664 return true;
665 }
666 case TargetLoweringBase::AtomicExpansionKind::NotAtomic:
667 return lowerAtomicRMWInst(RMWI: AI);
668 case TargetLoweringBase::AtomicExpansionKind::Expand:
669 TLI->emitExpandAtomicRMW(AI);
670 return true;
671 default:
672 llvm_unreachable("Unhandled case in tryExpandAtomicRMW");
673 }
674}
675
676namespace {
677
678struct PartwordMaskValues {
679 // These three fields are guaranteed to be set by createMaskInstrs.
680 Type *WordType = nullptr;
681 Type *ValueType = nullptr;
682 Type *IntValueType = nullptr;
683 Value *AlignedAddr = nullptr;
684 Align AlignedAddrAlignment;
685 // The remaining fields can be null.
686 Value *ShiftAmt = nullptr;
687 Value *Mask = nullptr;
688 Value *Inv_Mask = nullptr;
689};
690
691LLVM_ATTRIBUTE_UNUSED
692raw_ostream &operator<<(raw_ostream &O, const PartwordMaskValues &PMV) {
693 auto PrintObj = [&O](auto *V) {
694 if (V)
695 O << *V;
696 else
697 O << "nullptr";
698 O << '\n';
699 };
700 O << "PartwordMaskValues {\n";
701 O << " WordType: ";
702 PrintObj(PMV.WordType);
703 O << " ValueType: ";
704 PrintObj(PMV.ValueType);
705 O << " AlignedAddr: ";
706 PrintObj(PMV.AlignedAddr);
707 O << " AlignedAddrAlignment: " << PMV.AlignedAddrAlignment.value() << '\n';
708 O << " ShiftAmt: ";
709 PrintObj(PMV.ShiftAmt);
710 O << " Mask: ";
711 PrintObj(PMV.Mask);
712 O << " Inv_Mask: ";
713 PrintObj(PMV.Inv_Mask);
714 O << "}\n";
715 return O;
716}
717
718} // end anonymous namespace
719
720/// This is a helper function which builds instructions to provide
721/// values necessary for partword atomic operations. It takes an
722/// incoming address, Addr, and ValueType, and constructs the address,
723/// shift-amounts and masks needed to work with a larger value of size
724/// WordSize.
725///
726/// AlignedAddr: Addr rounded down to a multiple of WordSize
727///
728/// ShiftAmt: Number of bits to right-shift a WordSize value loaded
729/// from AlignAddr for it to have the same value as if
730/// ValueType was loaded from Addr.
731///
732/// Mask: Value to mask with the value loaded from AlignAddr to
733/// include only the part that would've been loaded from Addr.
734///
735/// Inv_Mask: The inverse of Mask.
736static PartwordMaskValues createMaskInstrs(IRBuilderBase &Builder,
737 Instruction *I, Type *ValueType,
738 Value *Addr, Align AddrAlign,
739 unsigned MinWordSize) {
740 PartwordMaskValues PMV;
741
742 Module *M = I->getModule();
743 LLVMContext &Ctx = M->getContext();
744 const DataLayout &DL = M->getDataLayout();
745 unsigned ValueSize = DL.getTypeStoreSize(Ty: ValueType);
746
747 PMV.ValueType = PMV.IntValueType = ValueType;
748 if (PMV.ValueType->isFloatingPointTy() || PMV.ValueType->isVectorTy())
749 PMV.IntValueType =
750 Type::getIntNTy(C&: Ctx, N: ValueType->getPrimitiveSizeInBits());
751
752 PMV.WordType = MinWordSize > ValueSize ? Type::getIntNTy(C&: Ctx, N: MinWordSize * 8)
753 : ValueType;
754 if (PMV.ValueType == PMV.WordType) {
755 PMV.AlignedAddr = Addr;
756 PMV.AlignedAddrAlignment = AddrAlign;
757 PMV.ShiftAmt = ConstantInt::get(Ty: PMV.ValueType, V: 0);
758 PMV.Mask = ConstantInt::get(Ty: PMV.ValueType, V: ~0, /*isSigned*/ IsSigned: true);
759 return PMV;
760 }
761
762 PMV.AlignedAddrAlignment = Align(MinWordSize);
763
764 assert(ValueSize < MinWordSize);
765
766 PointerType *PtrTy = cast<PointerType>(Val: Addr->getType());
767 IntegerType *IntTy = DL.getIntPtrType(C&: Ctx, AddressSpace: PtrTy->getAddressSpace());
768 Value *PtrLSB;
769
770 if (AddrAlign < MinWordSize) {
771 PMV.AlignedAddr = Builder.CreateIntrinsic(
772 Intrinsic::ptrmask, {PtrTy, IntTy},
773 {Addr, ConstantInt::get(Ty: IntTy, V: ~(uint64_t)(MinWordSize - 1))}, nullptr,
774 "AlignedAddr");
775
776 Value *AddrInt = Builder.CreatePtrToInt(V: Addr, DestTy: IntTy);
777 PtrLSB = Builder.CreateAnd(LHS: AddrInt, RHS: MinWordSize - 1, Name: "PtrLSB");
778 } else {
779 // If the alignment is high enough, the LSB are known 0.
780 PMV.AlignedAddr = Addr;
781 PtrLSB = ConstantInt::getNullValue(Ty: IntTy);
782 }
783
784 if (DL.isLittleEndian()) {
785 // turn bytes into bits
786 PMV.ShiftAmt = Builder.CreateShl(LHS: PtrLSB, RHS: 3);
787 } else {
788 // turn bytes into bits, and count from the other side.
789 PMV.ShiftAmt = Builder.CreateShl(
790 LHS: Builder.CreateXor(LHS: PtrLSB, RHS: MinWordSize - ValueSize), RHS: 3);
791 }
792
793 PMV.ShiftAmt = Builder.CreateTrunc(V: PMV.ShiftAmt, DestTy: PMV.WordType, Name: "ShiftAmt");
794 PMV.Mask = Builder.CreateShl(
795 LHS: ConstantInt::get(Ty: PMV.WordType, V: (1 << (ValueSize * 8)) - 1), RHS: PMV.ShiftAmt,
796 Name: "Mask");
797
798 PMV.Inv_Mask = Builder.CreateNot(V: PMV.Mask, Name: "Inv_Mask");
799
800 return PMV;
801}
802
803static Value *extractMaskedValue(IRBuilderBase &Builder, Value *WideWord,
804 const PartwordMaskValues &PMV) {
805 assert(WideWord->getType() == PMV.WordType && "Widened type mismatch");
806 if (PMV.WordType == PMV.ValueType)
807 return WideWord;
808
809 Value *Shift = Builder.CreateLShr(LHS: WideWord, RHS: PMV.ShiftAmt, Name: "shifted");
810 Value *Trunc = Builder.CreateTrunc(V: Shift, DestTy: PMV.IntValueType, Name: "extracted");
811 return Builder.CreateBitCast(V: Trunc, DestTy: PMV.ValueType);
812}
813
814static Value *insertMaskedValue(IRBuilderBase &Builder, Value *WideWord,
815 Value *Updated, const PartwordMaskValues &PMV) {
816 assert(WideWord->getType() == PMV.WordType && "Widened type mismatch");
817 assert(Updated->getType() == PMV.ValueType && "Value type mismatch");
818 if (PMV.WordType == PMV.ValueType)
819 return Updated;
820
821 Updated = Builder.CreateBitCast(V: Updated, DestTy: PMV.IntValueType);
822
823 Value *ZExt = Builder.CreateZExt(V: Updated, DestTy: PMV.WordType, Name: "extended");
824 Value *Shift =
825 Builder.CreateShl(LHS: ZExt, RHS: PMV.ShiftAmt, Name: "shifted", /*HasNUW*/ true);
826 Value *And = Builder.CreateAnd(LHS: WideWord, RHS: PMV.Inv_Mask, Name: "unmasked");
827 Value *Or = Builder.CreateOr(LHS: And, RHS: Shift, Name: "inserted");
828 return Or;
829}
830
831/// Emit IR to implement a masked version of a given atomicrmw
832/// operation. (That is, only the bits under the Mask should be
833/// affected by the operation)
834static Value *performMaskedAtomicOp(AtomicRMWInst::BinOp Op,
835 IRBuilderBase &Builder, Value *Loaded,
836 Value *Shifted_Inc, Value *Inc,
837 const PartwordMaskValues &PMV) {
838 // TODO: update to use
839 // https://graphics.stanford.edu/~seander/bithacks.html#MaskedMerge in order
840 // to merge bits from two values without requiring PMV.Inv_Mask.
841 switch (Op) {
842 case AtomicRMWInst::Xchg: {
843 Value *Loaded_MaskOut = Builder.CreateAnd(LHS: Loaded, RHS: PMV.Inv_Mask);
844 Value *FinalVal = Builder.CreateOr(LHS: Loaded_MaskOut, RHS: Shifted_Inc);
845 return FinalVal;
846 }
847 case AtomicRMWInst::Or:
848 case AtomicRMWInst::Xor:
849 case AtomicRMWInst::And:
850 llvm_unreachable("Or/Xor/And handled by widenPartwordAtomicRMW");
851 case AtomicRMWInst::Add:
852 case AtomicRMWInst::Sub:
853 case AtomicRMWInst::Nand: {
854 // The other arithmetic ops need to be masked into place.
855 Value *NewVal = buildAtomicRMWValue(Op, Builder, Loaded, Val: Shifted_Inc);
856 Value *NewVal_Masked = Builder.CreateAnd(LHS: NewVal, RHS: PMV.Mask);
857 Value *Loaded_MaskOut = Builder.CreateAnd(LHS: Loaded, RHS: PMV.Inv_Mask);
858 Value *FinalVal = Builder.CreateOr(LHS: Loaded_MaskOut, RHS: NewVal_Masked);
859 return FinalVal;
860 }
861 case AtomicRMWInst::Max:
862 case AtomicRMWInst::Min:
863 case AtomicRMWInst::UMax:
864 case AtomicRMWInst::UMin:
865 case AtomicRMWInst::FAdd:
866 case AtomicRMWInst::FSub:
867 case AtomicRMWInst::FMin:
868 case AtomicRMWInst::FMax:
869 case AtomicRMWInst::UIncWrap:
870 case AtomicRMWInst::UDecWrap: {
871 // Finally, other ops will operate on the full value, so truncate down to
872 // the original size, and expand out again after doing the
873 // operation. Bitcasts will be inserted for FP values.
874 Value *Loaded_Extract = extractMaskedValue(Builder, WideWord: Loaded, PMV);
875 Value *NewVal = buildAtomicRMWValue(Op, Builder, Loaded: Loaded_Extract, Val: Inc);
876 Value *FinalVal = insertMaskedValue(Builder, WideWord: Loaded, Updated: NewVal, PMV);
877 return FinalVal;
878 }
879 default:
880 llvm_unreachable("Unknown atomic op");
881 }
882}
883
884/// Expand a sub-word atomicrmw operation into an appropriate
885/// word-sized operation.
886///
887/// It will create an LL/SC or cmpxchg loop, as appropriate, the same
888/// way as a typical atomicrmw expansion. The only difference here is
889/// that the operation inside of the loop may operate upon only a
890/// part of the value.
891void AtomicExpandImpl::expandPartwordAtomicRMW(
892 AtomicRMWInst *AI, TargetLoweringBase::AtomicExpansionKind ExpansionKind) {
893 // Widen And/Or/Xor and give the target another chance at expanding it.
894 AtomicRMWInst::BinOp Op = AI->getOperation();
895 if (Op == AtomicRMWInst::Or || Op == AtomicRMWInst::Xor ||
896 Op == AtomicRMWInst::And) {
897 tryExpandAtomicRMW(AI: widenPartwordAtomicRMW(AI));
898 return;
899 }
900 AtomicOrdering MemOpOrder = AI->getOrdering();
901 SyncScope::ID SSID = AI->getSyncScopeID();
902
903 ReplacementIRBuilder Builder(AI, *DL);
904
905 PartwordMaskValues PMV =
906 createMaskInstrs(Builder, I: AI, ValueType: AI->getType(), Addr: AI->getPointerOperand(),
907 AddrAlign: AI->getAlign(), MinWordSize: TLI->getMinCmpXchgSizeInBits() / 8);
908
909 Value *ValOperand_Shifted = nullptr;
910 if (Op == AtomicRMWInst::Xchg || Op == AtomicRMWInst::Add ||
911 Op == AtomicRMWInst::Sub || Op == AtomicRMWInst::Nand) {
912 ValOperand_Shifted =
913 Builder.CreateShl(LHS: Builder.CreateZExt(V: AI->getValOperand(), DestTy: PMV.WordType),
914 RHS: PMV.ShiftAmt, Name: "ValOperand_Shifted");
915 }
916
917 auto PerformPartwordOp = [&](IRBuilderBase &Builder, Value *Loaded) {
918 return performMaskedAtomicOp(Op, Builder, Loaded, Shifted_Inc: ValOperand_Shifted,
919 Inc: AI->getValOperand(), PMV);
920 };
921
922 Value *OldResult;
923 if (ExpansionKind == TargetLoweringBase::AtomicExpansionKind::CmpXChg) {
924 OldResult = insertRMWCmpXchgLoop(Builder, ResultType: PMV.WordType, Addr: PMV.AlignedAddr,
925 AddrAlign: PMV.AlignedAddrAlignment, MemOpOrder, SSID,
926 PerformOp: PerformPartwordOp, CreateCmpXchg: createCmpXchgInstFun);
927 } else {
928 assert(ExpansionKind == TargetLoweringBase::AtomicExpansionKind::LLSC);
929 OldResult = insertRMWLLSCLoop(Builder, ResultTy: PMV.WordType, Addr: PMV.AlignedAddr,
930 AddrAlign: PMV.AlignedAddrAlignment, MemOpOrder,
931 PerformOp: PerformPartwordOp);
932 }
933
934 Value *FinalOldResult = extractMaskedValue(Builder, WideWord: OldResult, PMV);
935 AI->replaceAllUsesWith(V: FinalOldResult);
936 AI->eraseFromParent();
937}
938
939// Widen the bitwise atomicrmw (or/xor/and) to the minimum supported width.
940AtomicRMWInst *AtomicExpandImpl::widenPartwordAtomicRMW(AtomicRMWInst *AI) {
941 ReplacementIRBuilder Builder(AI, *DL);
942 AtomicRMWInst::BinOp Op = AI->getOperation();
943
944 assert((Op == AtomicRMWInst::Or || Op == AtomicRMWInst::Xor ||
945 Op == AtomicRMWInst::And) &&
946 "Unable to widen operation");
947
948 PartwordMaskValues PMV =
949 createMaskInstrs(Builder, I: AI, ValueType: AI->getType(), Addr: AI->getPointerOperand(),
950 AddrAlign: AI->getAlign(), MinWordSize: TLI->getMinCmpXchgSizeInBits() / 8);
951
952 Value *ValOperand_Shifted =
953 Builder.CreateShl(LHS: Builder.CreateZExt(V: AI->getValOperand(), DestTy: PMV.WordType),
954 RHS: PMV.ShiftAmt, Name: "ValOperand_Shifted");
955
956 Value *NewOperand;
957
958 if (Op == AtomicRMWInst::And)
959 NewOperand =
960 Builder.CreateOr(LHS: ValOperand_Shifted, RHS: PMV.Inv_Mask, Name: "AndOperand");
961 else
962 NewOperand = ValOperand_Shifted;
963
964 AtomicRMWInst *NewAI = Builder.CreateAtomicRMW(
965 Op, Ptr: PMV.AlignedAddr, Val: NewOperand, Align: PMV.AlignedAddrAlignment,
966 Ordering: AI->getOrdering(), SSID: AI->getSyncScopeID());
967 // TODO: Preserve metadata
968
969 Value *FinalOldResult = extractMaskedValue(Builder, WideWord: NewAI, PMV);
970 AI->replaceAllUsesWith(V: FinalOldResult);
971 AI->eraseFromParent();
972 return NewAI;
973}
974
975bool AtomicExpandImpl::expandPartwordCmpXchg(AtomicCmpXchgInst *CI) {
976 // The basic idea here is that we're expanding a cmpxchg of a
977 // smaller memory size up to a word-sized cmpxchg. To do this, we
978 // need to add a retry-loop for strong cmpxchg, so that
979 // modifications to other parts of the word don't cause a spurious
980 // failure.
981
982 // This generates code like the following:
983 // [[Setup mask values PMV.*]]
984 // %NewVal_Shifted = shl i32 %NewVal, %PMV.ShiftAmt
985 // %Cmp_Shifted = shl i32 %Cmp, %PMV.ShiftAmt
986 // %InitLoaded = load i32* %addr
987 // %InitLoaded_MaskOut = and i32 %InitLoaded, %PMV.Inv_Mask
988 // br partword.cmpxchg.loop
989 // partword.cmpxchg.loop:
990 // %Loaded_MaskOut = phi i32 [ %InitLoaded_MaskOut, %entry ],
991 // [ %OldVal_MaskOut, %partword.cmpxchg.failure ]
992 // %FullWord_NewVal = or i32 %Loaded_MaskOut, %NewVal_Shifted
993 // %FullWord_Cmp = or i32 %Loaded_MaskOut, %Cmp_Shifted
994 // %NewCI = cmpxchg i32* %PMV.AlignedAddr, i32 %FullWord_Cmp,
995 // i32 %FullWord_NewVal success_ordering failure_ordering
996 // %OldVal = extractvalue { i32, i1 } %NewCI, 0
997 // %Success = extractvalue { i32, i1 } %NewCI, 1
998 // br i1 %Success, label %partword.cmpxchg.end,
999 // label %partword.cmpxchg.failure
1000 // partword.cmpxchg.failure:
1001 // %OldVal_MaskOut = and i32 %OldVal, %PMV.Inv_Mask
1002 // %ShouldContinue = icmp ne i32 %Loaded_MaskOut, %OldVal_MaskOut
1003 // br i1 %ShouldContinue, label %partword.cmpxchg.loop,
1004 // label %partword.cmpxchg.end
1005 // partword.cmpxchg.end:
1006 // %tmp1 = lshr i32 %OldVal, %PMV.ShiftAmt
1007 // %FinalOldVal = trunc i32 %tmp1 to i8
1008 // %tmp2 = insertvalue { i8, i1 } undef, i8 %FinalOldVal, 0
1009 // %Res = insertvalue { i8, i1 } %25, i1 %Success, 1
1010
1011 Value *Addr = CI->getPointerOperand();
1012 Value *Cmp = CI->getCompareOperand();
1013 Value *NewVal = CI->getNewValOperand();
1014
1015 BasicBlock *BB = CI->getParent();
1016 Function *F = BB->getParent();
1017 ReplacementIRBuilder Builder(CI, *DL);
1018 LLVMContext &Ctx = Builder.getContext();
1019
1020 BasicBlock *EndBB =
1021 BB->splitBasicBlock(I: CI->getIterator(), BBName: "partword.cmpxchg.end");
1022 auto FailureBB =
1023 BasicBlock::Create(Context&: Ctx, Name: "partword.cmpxchg.failure", Parent: F, InsertBefore: EndBB);
1024 auto LoopBB = BasicBlock::Create(Context&: Ctx, Name: "partword.cmpxchg.loop", Parent: F, InsertBefore: FailureBB);
1025
1026 // The split call above "helpfully" added a branch at the end of BB
1027 // (to the wrong place).
1028 std::prev(x: BB->end())->eraseFromParent();
1029 Builder.SetInsertPoint(BB);
1030
1031 PartwordMaskValues PMV =
1032 createMaskInstrs(Builder, I: CI, ValueType: CI->getCompareOperand()->getType(), Addr,
1033 AddrAlign: CI->getAlign(), MinWordSize: TLI->getMinCmpXchgSizeInBits() / 8);
1034
1035 // Shift the incoming values over, into the right location in the word.
1036 Value *NewVal_Shifted =
1037 Builder.CreateShl(LHS: Builder.CreateZExt(V: NewVal, DestTy: PMV.WordType), RHS: PMV.ShiftAmt);
1038 Value *Cmp_Shifted =
1039 Builder.CreateShl(LHS: Builder.CreateZExt(V: Cmp, DestTy: PMV.WordType), RHS: PMV.ShiftAmt);
1040
1041 // Load the entire current word, and mask into place the expected and new
1042 // values
1043 LoadInst *InitLoaded = Builder.CreateLoad(Ty: PMV.WordType, Ptr: PMV.AlignedAddr);
1044 InitLoaded->setVolatile(CI->isVolatile());
1045 Value *InitLoaded_MaskOut = Builder.CreateAnd(LHS: InitLoaded, RHS: PMV.Inv_Mask);
1046 Builder.CreateBr(Dest: LoopBB);
1047
1048 // partword.cmpxchg.loop:
1049 Builder.SetInsertPoint(LoopBB);
1050 PHINode *Loaded_MaskOut = Builder.CreatePHI(Ty: PMV.WordType, NumReservedValues: 2);
1051 Loaded_MaskOut->addIncoming(V: InitLoaded_MaskOut, BB);
1052
1053 // Mask/Or the expected and new values into place in the loaded word.
1054 Value *FullWord_NewVal = Builder.CreateOr(LHS: Loaded_MaskOut, RHS: NewVal_Shifted);
1055 Value *FullWord_Cmp = Builder.CreateOr(LHS: Loaded_MaskOut, RHS: Cmp_Shifted);
1056 AtomicCmpXchgInst *NewCI = Builder.CreateAtomicCmpXchg(
1057 Ptr: PMV.AlignedAddr, Cmp: FullWord_Cmp, New: FullWord_NewVal, Align: PMV.AlignedAddrAlignment,
1058 SuccessOrdering: CI->getSuccessOrdering(), FailureOrdering: CI->getFailureOrdering(), SSID: CI->getSyncScopeID());
1059 NewCI->setVolatile(CI->isVolatile());
1060 // When we're building a strong cmpxchg, we need a loop, so you
1061 // might think we could use a weak cmpxchg inside. But, using strong
1062 // allows the below comparison for ShouldContinue, and we're
1063 // expecting the underlying cmpxchg to be a machine instruction,
1064 // which is strong anyways.
1065 NewCI->setWeak(CI->isWeak());
1066
1067 Value *OldVal = Builder.CreateExtractValue(Agg: NewCI, Idxs: 0);
1068 Value *Success = Builder.CreateExtractValue(Agg: NewCI, Idxs: 1);
1069
1070 if (CI->isWeak())
1071 Builder.CreateBr(Dest: EndBB);
1072 else
1073 Builder.CreateCondBr(Cond: Success, True: EndBB, False: FailureBB);
1074
1075 // partword.cmpxchg.failure:
1076 Builder.SetInsertPoint(FailureBB);
1077 // Upon failure, verify that the masked-out part of the loaded value
1078 // has been modified. If it didn't, abort the cmpxchg, since the
1079 // masked-in part must've.
1080 Value *OldVal_MaskOut = Builder.CreateAnd(LHS: OldVal, RHS: PMV.Inv_Mask);
1081 Value *ShouldContinue = Builder.CreateICmpNE(LHS: Loaded_MaskOut, RHS: OldVal_MaskOut);
1082 Builder.CreateCondBr(Cond: ShouldContinue, True: LoopBB, False: EndBB);
1083
1084 // Add the second value to the phi from above
1085 Loaded_MaskOut->addIncoming(V: OldVal_MaskOut, BB: FailureBB);
1086
1087 // partword.cmpxchg.end:
1088 Builder.SetInsertPoint(CI);
1089
1090 Value *FinalOldVal = extractMaskedValue(Builder, WideWord: OldVal, PMV);
1091 Value *Res = PoisonValue::get(T: CI->getType());
1092 Res = Builder.CreateInsertValue(Agg: Res, Val: FinalOldVal, Idxs: 0);
1093 Res = Builder.CreateInsertValue(Agg: Res, Val: Success, Idxs: 1);
1094
1095 CI->replaceAllUsesWith(V: Res);
1096 CI->eraseFromParent();
1097 return true;
1098}
1099
1100void AtomicExpandImpl::expandAtomicOpToLLSC(
1101 Instruction *I, Type *ResultType, Value *Addr, Align AddrAlign,
1102 AtomicOrdering MemOpOrder,
1103 function_ref<Value *(IRBuilderBase &, Value *)> PerformOp) {
1104 ReplacementIRBuilder Builder(I, *DL);
1105 Value *Loaded = insertRMWLLSCLoop(Builder, ResultTy: ResultType, Addr, AddrAlign,
1106 MemOpOrder, PerformOp);
1107
1108 I->replaceAllUsesWith(V: Loaded);
1109 I->eraseFromParent();
1110}
1111
1112void AtomicExpandImpl::expandAtomicRMWToMaskedIntrinsic(AtomicRMWInst *AI) {
1113 ReplacementIRBuilder Builder(AI, *DL);
1114
1115 PartwordMaskValues PMV =
1116 createMaskInstrs(Builder, I: AI, ValueType: AI->getType(), Addr: AI->getPointerOperand(),
1117 AddrAlign: AI->getAlign(), MinWordSize: TLI->getMinCmpXchgSizeInBits() / 8);
1118
1119 // The value operand must be sign-extended for signed min/max so that the
1120 // target's signed comparison instructions can be used. Otherwise, just
1121 // zero-ext.
1122 Instruction::CastOps CastOp = Instruction::ZExt;
1123 AtomicRMWInst::BinOp RMWOp = AI->getOperation();
1124 if (RMWOp == AtomicRMWInst::Max || RMWOp == AtomicRMWInst::Min)
1125 CastOp = Instruction::SExt;
1126
1127 Value *ValOperand_Shifted = Builder.CreateShl(
1128 LHS: Builder.CreateCast(Op: CastOp, V: AI->getValOperand(), DestTy: PMV.WordType),
1129 RHS: PMV.ShiftAmt, Name: "ValOperand_Shifted");
1130 Value *OldResult = TLI->emitMaskedAtomicRMWIntrinsic(
1131 Builder, AI, AlignedAddr: PMV.AlignedAddr, Incr: ValOperand_Shifted, Mask: PMV.Mask, ShiftAmt: PMV.ShiftAmt,
1132 Ord: AI->getOrdering());
1133 Value *FinalOldResult = extractMaskedValue(Builder, WideWord: OldResult, PMV);
1134 AI->replaceAllUsesWith(V: FinalOldResult);
1135 AI->eraseFromParent();
1136}
1137
1138void AtomicExpandImpl::expandAtomicCmpXchgToMaskedIntrinsic(
1139 AtomicCmpXchgInst *CI) {
1140 ReplacementIRBuilder Builder(CI, *DL);
1141
1142 PartwordMaskValues PMV = createMaskInstrs(
1143 Builder, I: CI, ValueType: CI->getCompareOperand()->getType(), Addr: CI->getPointerOperand(),
1144 AddrAlign: CI->getAlign(), MinWordSize: TLI->getMinCmpXchgSizeInBits() / 8);
1145
1146 Value *CmpVal_Shifted = Builder.CreateShl(
1147 LHS: Builder.CreateZExt(V: CI->getCompareOperand(), DestTy: PMV.WordType), RHS: PMV.ShiftAmt,
1148 Name: "CmpVal_Shifted");
1149 Value *NewVal_Shifted = Builder.CreateShl(
1150 LHS: Builder.CreateZExt(V: CI->getNewValOperand(), DestTy: PMV.WordType), RHS: PMV.ShiftAmt,
1151 Name: "NewVal_Shifted");
1152 Value *OldVal = TLI->emitMaskedAtomicCmpXchgIntrinsic(
1153 Builder, CI, AlignedAddr: PMV.AlignedAddr, CmpVal: CmpVal_Shifted, NewVal: NewVal_Shifted, Mask: PMV.Mask,
1154 Ord: CI->getMergedOrdering());
1155 Value *FinalOldVal = extractMaskedValue(Builder, WideWord: OldVal, PMV);
1156 Value *Res = PoisonValue::get(T: CI->getType());
1157 Res = Builder.CreateInsertValue(Agg: Res, Val: FinalOldVal, Idxs: 0);
1158 Value *Success = Builder.CreateICmpEQ(
1159 LHS: CmpVal_Shifted, RHS: Builder.CreateAnd(LHS: OldVal, RHS: PMV.Mask), Name: "Success");
1160 Res = Builder.CreateInsertValue(Agg: Res, Val: Success, Idxs: 1);
1161
1162 CI->replaceAllUsesWith(V: Res);
1163 CI->eraseFromParent();
1164}
1165
1166Value *AtomicExpandImpl::insertRMWLLSCLoop(
1167 IRBuilderBase &Builder, Type *ResultTy, Value *Addr, Align AddrAlign,
1168 AtomicOrdering MemOpOrder,
1169 function_ref<Value *(IRBuilderBase &, Value *)> PerformOp) {
1170 LLVMContext &Ctx = Builder.getContext();
1171 BasicBlock *BB = Builder.GetInsertBlock();
1172 Function *F = BB->getParent();
1173
1174 assert(AddrAlign >=
1175 F->getParent()->getDataLayout().getTypeStoreSize(ResultTy) &&
1176 "Expected at least natural alignment at this point.");
1177
1178 // Given: atomicrmw some_op iN* %addr, iN %incr ordering
1179 //
1180 // The standard expansion we produce is:
1181 // [...]
1182 // atomicrmw.start:
1183 // %loaded = @load.linked(%addr)
1184 // %new = some_op iN %loaded, %incr
1185 // %stored = @store_conditional(%new, %addr)
1186 // %try_again = icmp i32 ne %stored, 0
1187 // br i1 %try_again, label %loop, label %atomicrmw.end
1188 // atomicrmw.end:
1189 // [...]
1190 BasicBlock *ExitBB =
1191 BB->splitBasicBlock(I: Builder.GetInsertPoint(), BBName: "atomicrmw.end");
1192 BasicBlock *LoopBB = BasicBlock::Create(Context&: Ctx, Name: "atomicrmw.start", Parent: F, InsertBefore: ExitBB);
1193
1194 // The split call above "helpfully" added a branch at the end of BB (to the
1195 // wrong place).
1196 std::prev(x: BB->end())->eraseFromParent();
1197 Builder.SetInsertPoint(BB);
1198 Builder.CreateBr(Dest: LoopBB);
1199
1200 // Start the main loop block now that we've taken care of the preliminaries.
1201 Builder.SetInsertPoint(LoopBB);
1202 Value *Loaded = TLI->emitLoadLinked(Builder, ValueTy: ResultTy, Addr, Ord: MemOpOrder);
1203
1204 Value *NewVal = PerformOp(Builder, Loaded);
1205
1206 Value *StoreSuccess =
1207 TLI->emitStoreConditional(Builder, Val: NewVal, Addr, Ord: MemOpOrder);
1208 Value *TryAgain = Builder.CreateICmpNE(
1209 LHS: StoreSuccess, RHS: ConstantInt::get(Ty: IntegerType::get(C&: Ctx, NumBits: 32), V: 0), Name: "tryagain");
1210 Builder.CreateCondBr(Cond: TryAgain, True: LoopBB, False: ExitBB);
1211
1212 Builder.SetInsertPoint(TheBB: ExitBB, IP: ExitBB->begin());
1213 return Loaded;
1214}
1215
1216/// Convert an atomic cmpxchg of a non-integral type to an integer cmpxchg of
1217/// the equivalent bitwidth. We used to not support pointer cmpxchg in the
1218/// IR. As a migration step, we convert back to what use to be the standard
1219/// way to represent a pointer cmpxchg so that we can update backends one by
1220/// one.
1221AtomicCmpXchgInst *
1222AtomicExpandImpl::convertCmpXchgToIntegerType(AtomicCmpXchgInst *CI) {
1223 auto *M = CI->getModule();
1224 Type *NewTy = getCorrespondingIntegerType(T: CI->getCompareOperand()->getType(),
1225 DL: M->getDataLayout());
1226
1227 ReplacementIRBuilder Builder(CI, *DL);
1228
1229 Value *Addr = CI->getPointerOperand();
1230
1231 Value *NewCmp = Builder.CreatePtrToInt(V: CI->getCompareOperand(), DestTy: NewTy);
1232 Value *NewNewVal = Builder.CreatePtrToInt(V: CI->getNewValOperand(), DestTy: NewTy);
1233
1234 auto *NewCI = Builder.CreateAtomicCmpXchg(
1235 Ptr: Addr, Cmp: NewCmp, New: NewNewVal, Align: CI->getAlign(), SuccessOrdering: CI->getSuccessOrdering(),
1236 FailureOrdering: CI->getFailureOrdering(), SSID: CI->getSyncScopeID());
1237 NewCI->setVolatile(CI->isVolatile());
1238 NewCI->setWeak(CI->isWeak());
1239 LLVM_DEBUG(dbgs() << "Replaced " << *CI << " with " << *NewCI << "\n");
1240
1241 Value *OldVal = Builder.CreateExtractValue(Agg: NewCI, Idxs: 0);
1242 Value *Succ = Builder.CreateExtractValue(Agg: NewCI, Idxs: 1);
1243
1244 OldVal = Builder.CreateIntToPtr(V: OldVal, DestTy: CI->getCompareOperand()->getType());
1245
1246 Value *Res = PoisonValue::get(T: CI->getType());
1247 Res = Builder.CreateInsertValue(Agg: Res, Val: OldVal, Idxs: 0);
1248 Res = Builder.CreateInsertValue(Agg: Res, Val: Succ, Idxs: 1);
1249
1250 CI->replaceAllUsesWith(V: Res);
1251 CI->eraseFromParent();
1252 return NewCI;
1253}
1254
1255bool AtomicExpandImpl::expandAtomicCmpXchg(AtomicCmpXchgInst *CI) {
1256 AtomicOrdering SuccessOrder = CI->getSuccessOrdering();
1257 AtomicOrdering FailureOrder = CI->getFailureOrdering();
1258 Value *Addr = CI->getPointerOperand();
1259 BasicBlock *BB = CI->getParent();
1260 Function *F = BB->getParent();
1261 LLVMContext &Ctx = F->getContext();
1262 // If shouldInsertFencesForAtomic() returns true, then the target does not
1263 // want to deal with memory orders, and emitLeading/TrailingFence should take
1264 // care of everything. Otherwise, emitLeading/TrailingFence are no-op and we
1265 // should preserve the ordering.
1266 bool ShouldInsertFencesForAtomic = TLI->shouldInsertFencesForAtomic(I: CI);
1267 AtomicOrdering MemOpOrder = ShouldInsertFencesForAtomic
1268 ? AtomicOrdering::Monotonic
1269 : CI->getMergedOrdering();
1270
1271 // In implementations which use a barrier to achieve release semantics, we can
1272 // delay emitting this barrier until we know a store is actually going to be
1273 // attempted. The cost of this delay is that we need 2 copies of the block
1274 // emitting the load-linked, affecting code size.
1275 //
1276 // Ideally, this logic would be unconditional except for the minsize check
1277 // since in other cases the extra blocks naturally collapse down to the
1278 // minimal loop. Unfortunately, this puts too much stress on later
1279 // optimisations so we avoid emitting the extra logic in those cases too.
1280 bool HasReleasedLoadBB = !CI->isWeak() && ShouldInsertFencesForAtomic &&
1281 SuccessOrder != AtomicOrdering::Monotonic &&
1282 SuccessOrder != AtomicOrdering::Acquire &&
1283 !F->hasMinSize();
1284
1285 // There's no overhead for sinking the release barrier in a weak cmpxchg, so
1286 // do it even on minsize.
1287 bool UseUnconditionalReleaseBarrier = F->hasMinSize() && !CI->isWeak();
1288
1289 // Given: cmpxchg some_op iN* %addr, iN %desired, iN %new success_ord fail_ord
1290 //
1291 // The full expansion we produce is:
1292 // [...]
1293 // %aligned.addr = ...
1294 // cmpxchg.start:
1295 // %unreleasedload = @load.linked(%aligned.addr)
1296 // %unreleasedload.extract = extract value from %unreleasedload
1297 // %should_store = icmp eq %unreleasedload.extract, %desired
1298 // br i1 %should_store, label %cmpxchg.releasingstore,
1299 // label %cmpxchg.nostore
1300 // cmpxchg.releasingstore:
1301 // fence?
1302 // br label cmpxchg.trystore
1303 // cmpxchg.trystore:
1304 // %loaded.trystore = phi [%unreleasedload, %cmpxchg.releasingstore],
1305 // [%releasedload, %cmpxchg.releasedload]
1306 // %updated.new = insert %new into %loaded.trystore
1307 // %stored = @store_conditional(%updated.new, %aligned.addr)
1308 // %success = icmp eq i32 %stored, 0
1309 // br i1 %success, label %cmpxchg.success,
1310 // label %cmpxchg.releasedload/%cmpxchg.failure
1311 // cmpxchg.releasedload:
1312 // %releasedload = @load.linked(%aligned.addr)
1313 // %releasedload.extract = extract value from %releasedload
1314 // %should_store = icmp eq %releasedload.extract, %desired
1315 // br i1 %should_store, label %cmpxchg.trystore,
1316 // label %cmpxchg.failure
1317 // cmpxchg.success:
1318 // fence?
1319 // br label %cmpxchg.end
1320 // cmpxchg.nostore:
1321 // %loaded.nostore = phi [%unreleasedload, %cmpxchg.start],
1322 // [%releasedload,
1323 // %cmpxchg.releasedload/%cmpxchg.trystore]
1324 // @load_linked_fail_balance()?
1325 // br label %cmpxchg.failure
1326 // cmpxchg.failure:
1327 // fence?
1328 // br label %cmpxchg.end
1329 // cmpxchg.end:
1330 // %loaded.exit = phi [%loaded.nostore, %cmpxchg.failure],
1331 // [%loaded.trystore, %cmpxchg.trystore]
1332 // %success = phi i1 [true, %cmpxchg.success], [false, %cmpxchg.failure]
1333 // %loaded = extract value from %loaded.exit
1334 // %restmp = insertvalue { iN, i1 } undef, iN %loaded, 0
1335 // %res = insertvalue { iN, i1 } %restmp, i1 %success, 1
1336 // [...]
1337 BasicBlock *ExitBB = BB->splitBasicBlock(I: CI->getIterator(), BBName: "cmpxchg.end");
1338 auto FailureBB = BasicBlock::Create(Context&: Ctx, Name: "cmpxchg.failure", Parent: F, InsertBefore: ExitBB);
1339 auto NoStoreBB = BasicBlock::Create(Context&: Ctx, Name: "cmpxchg.nostore", Parent: F, InsertBefore: FailureBB);
1340 auto SuccessBB = BasicBlock::Create(Context&: Ctx, Name: "cmpxchg.success", Parent: F, InsertBefore: NoStoreBB);
1341 auto ReleasedLoadBB =
1342 BasicBlock::Create(Context&: Ctx, Name: "cmpxchg.releasedload", Parent: F, InsertBefore: SuccessBB);
1343 auto TryStoreBB =
1344 BasicBlock::Create(Context&: Ctx, Name: "cmpxchg.trystore", Parent: F, InsertBefore: ReleasedLoadBB);
1345 auto ReleasingStoreBB =
1346 BasicBlock::Create(Context&: Ctx, Name: "cmpxchg.fencedstore", Parent: F, InsertBefore: TryStoreBB);
1347 auto StartBB = BasicBlock::Create(Context&: Ctx, Name: "cmpxchg.start", Parent: F, InsertBefore: ReleasingStoreBB);
1348
1349 ReplacementIRBuilder Builder(CI, *DL);
1350
1351 // The split call above "helpfully" added a branch at the end of BB (to the
1352 // wrong place), but we might want a fence too. It's easiest to just remove
1353 // the branch entirely.
1354 std::prev(x: BB->end())->eraseFromParent();
1355 Builder.SetInsertPoint(BB);
1356 if (ShouldInsertFencesForAtomic && UseUnconditionalReleaseBarrier)
1357 TLI->emitLeadingFence(Builder, Inst: CI, Ord: SuccessOrder);
1358
1359 PartwordMaskValues PMV =
1360 createMaskInstrs(Builder, I: CI, ValueType: CI->getCompareOperand()->getType(), Addr,
1361 AddrAlign: CI->getAlign(), MinWordSize: TLI->getMinCmpXchgSizeInBits() / 8);
1362 Builder.CreateBr(Dest: StartBB);
1363
1364 // Start the main loop block now that we've taken care of the preliminaries.
1365 Builder.SetInsertPoint(StartBB);
1366 Value *UnreleasedLoad =
1367 TLI->emitLoadLinked(Builder, ValueTy: PMV.WordType, Addr: PMV.AlignedAddr, Ord: MemOpOrder);
1368 Value *UnreleasedLoadExtract =
1369 extractMaskedValue(Builder, WideWord: UnreleasedLoad, PMV);
1370 Value *ShouldStore = Builder.CreateICmpEQ(
1371 LHS: UnreleasedLoadExtract, RHS: CI->getCompareOperand(), Name: "should_store");
1372
1373 // If the cmpxchg doesn't actually need any ordering when it fails, we can
1374 // jump straight past that fence instruction (if it exists).
1375 Builder.CreateCondBr(Cond: ShouldStore, True: ReleasingStoreBB, False: NoStoreBB);
1376
1377 Builder.SetInsertPoint(ReleasingStoreBB);
1378 if (ShouldInsertFencesForAtomic && !UseUnconditionalReleaseBarrier)
1379 TLI->emitLeadingFence(Builder, Inst: CI, Ord: SuccessOrder);
1380 Builder.CreateBr(Dest: TryStoreBB);
1381
1382 Builder.SetInsertPoint(TryStoreBB);
1383 PHINode *LoadedTryStore =
1384 Builder.CreatePHI(Ty: PMV.WordType, NumReservedValues: 2, Name: "loaded.trystore");
1385 LoadedTryStore->addIncoming(V: UnreleasedLoad, BB: ReleasingStoreBB);
1386 Value *NewValueInsert =
1387 insertMaskedValue(Builder, WideWord: LoadedTryStore, Updated: CI->getNewValOperand(), PMV);
1388 Value *StoreSuccess = TLI->emitStoreConditional(Builder, Val: NewValueInsert,
1389 Addr: PMV.AlignedAddr, Ord: MemOpOrder);
1390 StoreSuccess = Builder.CreateICmpEQ(
1391 LHS: StoreSuccess, RHS: ConstantInt::get(Ty: Type::getInt32Ty(C&: Ctx), V: 0), Name: "success");
1392 BasicBlock *RetryBB = HasReleasedLoadBB ? ReleasedLoadBB : StartBB;
1393 Builder.CreateCondBr(Cond: StoreSuccess, True: SuccessBB,
1394 False: CI->isWeak() ? FailureBB : RetryBB);
1395
1396 Builder.SetInsertPoint(ReleasedLoadBB);
1397 Value *SecondLoad;
1398 if (HasReleasedLoadBB) {
1399 SecondLoad =
1400 TLI->emitLoadLinked(Builder, ValueTy: PMV.WordType, Addr: PMV.AlignedAddr, Ord: MemOpOrder);
1401 Value *SecondLoadExtract = extractMaskedValue(Builder, WideWord: SecondLoad, PMV);
1402 ShouldStore = Builder.CreateICmpEQ(LHS: SecondLoadExtract,
1403 RHS: CI->getCompareOperand(), Name: "should_store");
1404
1405 // If the cmpxchg doesn't actually need any ordering when it fails, we can
1406 // jump straight past that fence instruction (if it exists).
1407 Builder.CreateCondBr(Cond: ShouldStore, True: TryStoreBB, False: NoStoreBB);
1408 // Update PHI node in TryStoreBB.
1409 LoadedTryStore->addIncoming(V: SecondLoad, BB: ReleasedLoadBB);
1410 } else
1411 Builder.CreateUnreachable();
1412
1413 // Make sure later instructions don't get reordered with a fence if
1414 // necessary.
1415 Builder.SetInsertPoint(SuccessBB);
1416 if (ShouldInsertFencesForAtomic ||
1417 TLI->shouldInsertTrailingFenceForAtomicStore(I: CI))
1418 TLI->emitTrailingFence(Builder, Inst: CI, Ord: SuccessOrder);
1419 Builder.CreateBr(Dest: ExitBB);
1420
1421 Builder.SetInsertPoint(NoStoreBB);
1422 PHINode *LoadedNoStore =
1423 Builder.CreatePHI(Ty: UnreleasedLoad->getType(), NumReservedValues: 2, Name: "loaded.nostore");
1424 LoadedNoStore->addIncoming(V: UnreleasedLoad, BB: StartBB);
1425 if (HasReleasedLoadBB)
1426 LoadedNoStore->addIncoming(V: SecondLoad, BB: ReleasedLoadBB);
1427
1428 // In the failing case, where we don't execute the store-conditional, the
1429 // target might want to balance out the load-linked with a dedicated
1430 // instruction (e.g., on ARM, clearing the exclusive monitor).
1431 TLI->emitAtomicCmpXchgNoStoreLLBalance(Builder);
1432 Builder.CreateBr(Dest: FailureBB);
1433
1434 Builder.SetInsertPoint(FailureBB);
1435 PHINode *LoadedFailure =
1436 Builder.CreatePHI(Ty: UnreleasedLoad->getType(), NumReservedValues: 2, Name: "loaded.failure");
1437 LoadedFailure->addIncoming(V: LoadedNoStore, BB: NoStoreBB);
1438 if (CI->isWeak())
1439 LoadedFailure->addIncoming(V: LoadedTryStore, BB: TryStoreBB);
1440 if (ShouldInsertFencesForAtomic)
1441 TLI->emitTrailingFence(Builder, Inst: CI, Ord: FailureOrder);
1442 Builder.CreateBr(Dest: ExitBB);
1443
1444 // Finally, we have control-flow based knowledge of whether the cmpxchg
1445 // succeeded or not. We expose this to later passes by converting any
1446 // subsequent "icmp eq/ne %loaded, %oldval" into a use of an appropriate
1447 // PHI.
1448 Builder.SetInsertPoint(TheBB: ExitBB, IP: ExitBB->begin());
1449 PHINode *LoadedExit =
1450 Builder.CreatePHI(Ty: UnreleasedLoad->getType(), NumReservedValues: 2, Name: "loaded.exit");
1451 LoadedExit->addIncoming(V: LoadedTryStore, BB: SuccessBB);
1452 LoadedExit->addIncoming(V: LoadedFailure, BB: FailureBB);
1453 PHINode *Success = Builder.CreatePHI(Ty: Type::getInt1Ty(C&: Ctx), NumReservedValues: 2, Name: "success");
1454 Success->addIncoming(V: ConstantInt::getTrue(Context&: Ctx), BB: SuccessBB);
1455 Success->addIncoming(V: ConstantInt::getFalse(Context&: Ctx), BB: FailureBB);
1456
1457 // This is the "exit value" from the cmpxchg expansion. It may be of
1458 // a type wider than the one in the cmpxchg instruction.
1459 Value *LoadedFull = LoadedExit;
1460
1461 Builder.SetInsertPoint(TheBB: ExitBB, IP: std::next(x: Success->getIterator()));
1462 Value *Loaded = extractMaskedValue(Builder, WideWord: LoadedFull, PMV);
1463
1464 // Look for any users of the cmpxchg that are just comparing the loaded value
1465 // against the desired one, and replace them with the CFG-derived version.
1466 SmallVector<ExtractValueInst *, 2> PrunedInsts;
1467 for (auto *User : CI->users()) {
1468 ExtractValueInst *EV = dyn_cast<ExtractValueInst>(Val: User);
1469 if (!EV)
1470 continue;
1471
1472 assert(EV->getNumIndices() == 1 && EV->getIndices()[0] <= 1 &&
1473 "weird extraction from { iN, i1 }");
1474
1475 if (EV->getIndices()[0] == 0)
1476 EV->replaceAllUsesWith(V: Loaded);
1477 else
1478 EV->replaceAllUsesWith(V: Success);
1479
1480 PrunedInsts.push_back(Elt: EV);
1481 }
1482
1483 // We can remove the instructions now we're no longer iterating through them.
1484 for (auto *EV : PrunedInsts)
1485 EV->eraseFromParent();
1486
1487 if (!CI->use_empty()) {
1488 // Some use of the full struct return that we don't understand has happened,
1489 // so we've got to reconstruct it properly.
1490 Value *Res;
1491 Res = Builder.CreateInsertValue(Agg: PoisonValue::get(T: CI->getType()), Val: Loaded, Idxs: 0);
1492 Res = Builder.CreateInsertValue(Agg: Res, Val: Success, Idxs: 1);
1493
1494 CI->replaceAllUsesWith(V: Res);
1495 }
1496
1497 CI->eraseFromParent();
1498 return true;
1499}
1500
1501bool AtomicExpandImpl::isIdempotentRMW(AtomicRMWInst *RMWI) {
1502 auto C = dyn_cast<ConstantInt>(Val: RMWI->getValOperand());
1503 if (!C)
1504 return false;
1505
1506 AtomicRMWInst::BinOp Op = RMWI->getOperation();
1507 switch (Op) {
1508 case AtomicRMWInst::Add:
1509 case AtomicRMWInst::Sub:
1510 case AtomicRMWInst::Or:
1511 case AtomicRMWInst::Xor:
1512 return C->isZero();
1513 case AtomicRMWInst::And:
1514 return C->isMinusOne();
1515 // FIXME: we could also treat Min/Max/UMin/UMax by the INT_MIN/INT_MAX/...
1516 default:
1517 return false;
1518 }
1519}
1520
1521bool AtomicExpandImpl::simplifyIdempotentRMW(AtomicRMWInst *RMWI) {
1522 if (auto ResultingLoad = TLI->lowerIdempotentRMWIntoFencedLoad(RMWI)) {
1523 tryExpandAtomicLoad(LI: ResultingLoad);
1524 return true;
1525 }
1526 return false;
1527}
1528
1529Value *AtomicExpandImpl::insertRMWCmpXchgLoop(
1530 IRBuilderBase &Builder, Type *ResultTy, Value *Addr, Align AddrAlign,
1531 AtomicOrdering MemOpOrder, SyncScope::ID SSID,
1532 function_ref<Value *(IRBuilderBase &, Value *)> PerformOp,
1533 CreateCmpXchgInstFun CreateCmpXchg) {
1534 LLVMContext &Ctx = Builder.getContext();
1535 BasicBlock *BB = Builder.GetInsertBlock();
1536 Function *F = BB->getParent();
1537
1538 // Given: atomicrmw some_op iN* %addr, iN %incr ordering
1539 //
1540 // The standard expansion we produce is:
1541 // [...]
1542 // %init_loaded = load atomic iN* %addr
1543 // br label %loop
1544 // loop:
1545 // %loaded = phi iN [ %init_loaded, %entry ], [ %new_loaded, %loop ]
1546 // %new = some_op iN %loaded, %incr
1547 // %pair = cmpxchg iN* %addr, iN %loaded, iN %new
1548 // %new_loaded = extractvalue { iN, i1 } %pair, 0
1549 // %success = extractvalue { iN, i1 } %pair, 1
1550 // br i1 %success, label %atomicrmw.end, label %loop
1551 // atomicrmw.end:
1552 // [...]
1553 BasicBlock *ExitBB =
1554 BB->splitBasicBlock(I: Builder.GetInsertPoint(), BBName: "atomicrmw.end");
1555 BasicBlock *LoopBB = BasicBlock::Create(Context&: Ctx, Name: "atomicrmw.start", Parent: F, InsertBefore: ExitBB);
1556
1557 // The split call above "helpfully" added a branch at the end of BB (to the
1558 // wrong place), but we want a load. It's easiest to just remove
1559 // the branch entirely.
1560 std::prev(x: BB->end())->eraseFromParent();
1561 Builder.SetInsertPoint(BB);
1562 LoadInst *InitLoaded = Builder.CreateAlignedLoad(Ty: ResultTy, Ptr: Addr, Align: AddrAlign);
1563 Builder.CreateBr(Dest: LoopBB);
1564
1565 // Start the main loop block now that we've taken care of the preliminaries.
1566 Builder.SetInsertPoint(LoopBB);
1567 PHINode *Loaded = Builder.CreatePHI(Ty: ResultTy, NumReservedValues: 2, Name: "loaded");
1568 Loaded->addIncoming(V: InitLoaded, BB);
1569
1570 Value *NewVal = PerformOp(Builder, Loaded);
1571
1572 Value *NewLoaded = nullptr;
1573 Value *Success = nullptr;
1574
1575 CreateCmpXchg(Builder, Addr, Loaded, NewVal, AddrAlign,
1576 MemOpOrder == AtomicOrdering::Unordered
1577 ? AtomicOrdering::Monotonic
1578 : MemOpOrder,
1579 SSID, Success, NewLoaded);
1580 assert(Success && NewLoaded);
1581
1582 Loaded->addIncoming(V: NewLoaded, BB: LoopBB);
1583
1584 Builder.CreateCondBr(Cond: Success, True: ExitBB, False: LoopBB);
1585
1586 Builder.SetInsertPoint(TheBB: ExitBB, IP: ExitBB->begin());
1587 return NewLoaded;
1588}
1589
1590bool AtomicExpandImpl::tryExpandAtomicCmpXchg(AtomicCmpXchgInst *CI) {
1591 unsigned MinCASSize = TLI->getMinCmpXchgSizeInBits() / 8;
1592 unsigned ValueSize = getAtomicOpSize(CASI: CI);
1593
1594 switch (TLI->shouldExpandAtomicCmpXchgInIR(AI: CI)) {
1595 default:
1596 llvm_unreachable("Unhandled case in tryExpandAtomicCmpXchg");
1597 case TargetLoweringBase::AtomicExpansionKind::None:
1598 if (ValueSize < MinCASSize)
1599 return expandPartwordCmpXchg(CI);
1600 return false;
1601 case TargetLoweringBase::AtomicExpansionKind::LLSC: {
1602 return expandAtomicCmpXchg(CI);
1603 }
1604 case TargetLoweringBase::AtomicExpansionKind::MaskedIntrinsic:
1605 expandAtomicCmpXchgToMaskedIntrinsic(CI);
1606 return true;
1607 case TargetLoweringBase::AtomicExpansionKind::NotAtomic:
1608 return lowerAtomicCmpXchgInst(CXI: CI);
1609 }
1610}
1611
1612// Note: This function is exposed externally by AtomicExpandUtils.h
1613bool llvm::expandAtomicRMWToCmpXchg(AtomicRMWInst *AI,
1614 CreateCmpXchgInstFun CreateCmpXchg) {
1615 ReplacementIRBuilder Builder(AI, AI->getModule()->getDataLayout());
1616 Builder.setIsFPConstrained(
1617 AI->getFunction()->hasFnAttribute(Attribute::StrictFP));
1618
1619 // FIXME: If FP exceptions are observable, we should force them off for the
1620 // loop for the FP atomics.
1621 Value *Loaded = AtomicExpandImpl::insertRMWCmpXchgLoop(
1622 Builder, ResultTy: AI->getType(), Addr: AI->getPointerOperand(), AddrAlign: AI->getAlign(),
1623 MemOpOrder: AI->getOrdering(), SSID: AI->getSyncScopeID(),
1624 PerformOp: [&](IRBuilderBase &Builder, Value *Loaded) {
1625 return buildAtomicRMWValue(Op: AI->getOperation(), Builder, Loaded,
1626 Val: AI->getValOperand());
1627 },
1628 CreateCmpXchg);
1629
1630 AI->replaceAllUsesWith(V: Loaded);
1631 AI->eraseFromParent();
1632 return true;
1633}
1634
1635// In order to use one of the sized library calls such as
1636// __atomic_fetch_add_4, the alignment must be sufficient, the size
1637// must be one of the potentially-specialized sizes, and the value
1638// type must actually exist in C on the target (otherwise, the
1639// function wouldn't actually be defined.)
1640static bool canUseSizedAtomicCall(unsigned Size, Align Alignment,
1641 const DataLayout &DL) {
1642 // TODO: "LargestSize" is an approximation for "largest type that
1643 // you can express in C". It seems to be the case that int128 is
1644 // supported on all 64-bit platforms, otherwise only up to 64-bit
1645 // integers are supported. If we get this wrong, then we'll try to
1646 // call a sized libcall that doesn't actually exist. There should
1647 // really be some more reliable way in LLVM of determining integer
1648 // sizes which are valid in the target's C ABI...
1649 unsigned LargestSize = DL.getLargestLegalIntTypeSizeInBits() >= 64 ? 16 : 8;
1650 return Alignment >= Size &&
1651 (Size == 1 || Size == 2 || Size == 4 || Size == 8 || Size == 16) &&
1652 Size <= LargestSize;
1653}
1654
1655void AtomicExpandImpl::expandAtomicLoadToLibcall(LoadInst *I) {
1656 static const RTLIB::Libcall Libcalls[6] = {
1657 RTLIB::ATOMIC_LOAD, RTLIB::ATOMIC_LOAD_1, RTLIB::ATOMIC_LOAD_2,
1658 RTLIB::ATOMIC_LOAD_4, RTLIB::ATOMIC_LOAD_8, RTLIB::ATOMIC_LOAD_16};
1659 unsigned Size = getAtomicOpSize(LI: I);
1660
1661 bool expanded = expandAtomicOpToLibcall(
1662 I, Size, Alignment: I->getAlign(), PointerOperand: I->getPointerOperand(), ValueOperand: nullptr, CASExpected: nullptr,
1663 Ordering: I->getOrdering(), Ordering2: AtomicOrdering::NotAtomic, Libcalls);
1664 if (!expanded)
1665 report_fatal_error(reason: "expandAtomicOpToLibcall shouldn't fail for Load");
1666}
1667
1668void AtomicExpandImpl::expandAtomicStoreToLibcall(StoreInst *I) {
1669 static const RTLIB::Libcall Libcalls[6] = {
1670 RTLIB::ATOMIC_STORE, RTLIB::ATOMIC_STORE_1, RTLIB::ATOMIC_STORE_2,
1671 RTLIB::ATOMIC_STORE_4, RTLIB::ATOMIC_STORE_8, RTLIB::ATOMIC_STORE_16};
1672 unsigned Size = getAtomicOpSize(SI: I);
1673
1674 bool expanded = expandAtomicOpToLibcall(
1675 I, Size, Alignment: I->getAlign(), PointerOperand: I->getPointerOperand(), ValueOperand: I->getValueOperand(),
1676 CASExpected: nullptr, Ordering: I->getOrdering(), Ordering2: AtomicOrdering::NotAtomic, Libcalls);
1677 if (!expanded)
1678 report_fatal_error(reason: "expandAtomicOpToLibcall shouldn't fail for Store");
1679}
1680
1681void AtomicExpandImpl::expandAtomicCASToLibcall(AtomicCmpXchgInst *I) {
1682 static const RTLIB::Libcall Libcalls[6] = {
1683 RTLIB::ATOMIC_COMPARE_EXCHANGE, RTLIB::ATOMIC_COMPARE_EXCHANGE_1,
1684 RTLIB::ATOMIC_COMPARE_EXCHANGE_2, RTLIB::ATOMIC_COMPARE_EXCHANGE_4,
1685 RTLIB::ATOMIC_COMPARE_EXCHANGE_8, RTLIB::ATOMIC_COMPARE_EXCHANGE_16};
1686 unsigned Size = getAtomicOpSize(CASI: I);
1687
1688 bool expanded = expandAtomicOpToLibcall(
1689 I, Size, Alignment: I->getAlign(), PointerOperand: I->getPointerOperand(), ValueOperand: I->getNewValOperand(),
1690 CASExpected: I->getCompareOperand(), Ordering: I->getSuccessOrdering(), Ordering2: I->getFailureOrdering(),
1691 Libcalls);
1692 if (!expanded)
1693 report_fatal_error(reason: "expandAtomicOpToLibcall shouldn't fail for CAS");
1694}
1695
1696static ArrayRef<RTLIB::Libcall> GetRMWLibcall(AtomicRMWInst::BinOp Op) {
1697 static const RTLIB::Libcall LibcallsXchg[6] = {
1698 RTLIB::ATOMIC_EXCHANGE, RTLIB::ATOMIC_EXCHANGE_1,
1699 RTLIB::ATOMIC_EXCHANGE_2, RTLIB::ATOMIC_EXCHANGE_4,
1700 RTLIB::ATOMIC_EXCHANGE_8, RTLIB::ATOMIC_EXCHANGE_16};
1701 static const RTLIB::Libcall LibcallsAdd[6] = {
1702 RTLIB::UNKNOWN_LIBCALL, RTLIB::ATOMIC_FETCH_ADD_1,
1703 RTLIB::ATOMIC_FETCH_ADD_2, RTLIB::ATOMIC_FETCH_ADD_4,
1704 RTLIB::ATOMIC_FETCH_ADD_8, RTLIB::ATOMIC_FETCH_ADD_16};
1705 static const RTLIB::Libcall LibcallsSub[6] = {
1706 RTLIB::UNKNOWN_LIBCALL, RTLIB::ATOMIC_FETCH_SUB_1,
1707 RTLIB::ATOMIC_FETCH_SUB_2, RTLIB::ATOMIC_FETCH_SUB_4,
1708 RTLIB::ATOMIC_FETCH_SUB_8, RTLIB::ATOMIC_FETCH_SUB_16};
1709 static const RTLIB::Libcall LibcallsAnd[6] = {
1710 RTLIB::UNKNOWN_LIBCALL, RTLIB::ATOMIC_FETCH_AND_1,
1711 RTLIB::ATOMIC_FETCH_AND_2, RTLIB::ATOMIC_FETCH_AND_4,
1712 RTLIB::ATOMIC_FETCH_AND_8, RTLIB::ATOMIC_FETCH_AND_16};
1713 static const RTLIB::Libcall LibcallsOr[6] = {
1714 RTLIB::UNKNOWN_LIBCALL, RTLIB::ATOMIC_FETCH_OR_1,
1715 RTLIB::ATOMIC_FETCH_OR_2, RTLIB::ATOMIC_FETCH_OR_4,
1716 RTLIB::ATOMIC_FETCH_OR_8, RTLIB::ATOMIC_FETCH_OR_16};
1717 static const RTLIB::Libcall LibcallsXor[6] = {
1718 RTLIB::UNKNOWN_LIBCALL, RTLIB::ATOMIC_FETCH_XOR_1,
1719 RTLIB::ATOMIC_FETCH_XOR_2, RTLIB::ATOMIC_FETCH_XOR_4,
1720 RTLIB::ATOMIC_FETCH_XOR_8, RTLIB::ATOMIC_FETCH_XOR_16};
1721 static const RTLIB::Libcall LibcallsNand[6] = {
1722 RTLIB::UNKNOWN_LIBCALL, RTLIB::ATOMIC_FETCH_NAND_1,
1723 RTLIB::ATOMIC_FETCH_NAND_2, RTLIB::ATOMIC_FETCH_NAND_4,
1724 RTLIB::ATOMIC_FETCH_NAND_8, RTLIB::ATOMIC_FETCH_NAND_16};
1725
1726 switch (Op) {
1727 case AtomicRMWInst::BAD_BINOP:
1728 llvm_unreachable("Should not have BAD_BINOP.");
1729 case AtomicRMWInst::Xchg:
1730 return ArrayRef(LibcallsXchg);
1731 case AtomicRMWInst::Add:
1732 return ArrayRef(LibcallsAdd);
1733 case AtomicRMWInst::Sub:
1734 return ArrayRef(LibcallsSub);
1735 case AtomicRMWInst::And:
1736 return ArrayRef(LibcallsAnd);
1737 case AtomicRMWInst::Or:
1738 return ArrayRef(LibcallsOr);
1739 case AtomicRMWInst::Xor:
1740 return ArrayRef(LibcallsXor);
1741 case AtomicRMWInst::Nand:
1742 return ArrayRef(LibcallsNand);
1743 case AtomicRMWInst::Max:
1744 case AtomicRMWInst::Min:
1745 case AtomicRMWInst::UMax:
1746 case AtomicRMWInst::UMin:
1747 case AtomicRMWInst::FMax:
1748 case AtomicRMWInst::FMin:
1749 case AtomicRMWInst::FAdd:
1750 case AtomicRMWInst::FSub:
1751 case AtomicRMWInst::UIncWrap:
1752 case AtomicRMWInst::UDecWrap:
1753 // No atomic libcalls are available for max/min/umax/umin.
1754 return {};
1755 }
1756 llvm_unreachable("Unexpected AtomicRMW operation.");
1757}
1758
1759void AtomicExpandImpl::expandAtomicRMWToLibcall(AtomicRMWInst *I) {
1760 ArrayRef<RTLIB::Libcall> Libcalls = GetRMWLibcall(Op: I->getOperation());
1761
1762 unsigned Size = getAtomicOpSize(RMWI: I);
1763
1764 bool Success = false;
1765 if (!Libcalls.empty())
1766 Success = expandAtomicOpToLibcall(
1767 I, Size, Alignment: I->getAlign(), PointerOperand: I->getPointerOperand(), ValueOperand: I->getValOperand(),
1768 CASExpected: nullptr, Ordering: I->getOrdering(), Ordering2: AtomicOrdering::NotAtomic, Libcalls);
1769
1770 // The expansion failed: either there were no libcalls at all for
1771 // the operation (min/max), or there were only size-specialized
1772 // libcalls (add/sub/etc) and we needed a generic. So, expand to a
1773 // CAS libcall, via a CAS loop, instead.
1774 if (!Success) {
1775 expandAtomicRMWToCmpXchg(
1776 AI: I, CreateCmpXchg: [this](IRBuilderBase &Builder, Value *Addr, Value *Loaded,
1777 Value *NewVal, Align Alignment, AtomicOrdering MemOpOrder,
1778 SyncScope::ID SSID, Value *&Success, Value *&NewLoaded) {
1779 // Create the CAS instruction normally...
1780 AtomicCmpXchgInst *Pair = Builder.CreateAtomicCmpXchg(
1781 Ptr: Addr, Cmp: Loaded, New: NewVal, Align: Alignment, SuccessOrdering: MemOpOrder,
1782 FailureOrdering: AtomicCmpXchgInst::getStrongestFailureOrdering(SuccessOrdering: MemOpOrder), SSID);
1783 Success = Builder.CreateExtractValue(Agg: Pair, Idxs: 1, Name: "success");
1784 NewLoaded = Builder.CreateExtractValue(Agg: Pair, Idxs: 0, Name: "newloaded");
1785
1786 // ...and then expand the CAS into a libcall.
1787 expandAtomicCASToLibcall(I: Pair);
1788 });
1789 }
1790}
1791
1792// A helper routine for the above expandAtomic*ToLibcall functions.
1793//
1794// 'Libcalls' contains an array of enum values for the particular
1795// ATOMIC libcalls to be emitted. All of the other arguments besides
1796// 'I' are extracted from the Instruction subclass by the
1797// caller. Depending on the particular call, some will be null.
1798bool AtomicExpandImpl::expandAtomicOpToLibcall(
1799 Instruction *I, unsigned Size, Align Alignment, Value *PointerOperand,
1800 Value *ValueOperand, Value *CASExpected, AtomicOrdering Ordering,
1801 AtomicOrdering Ordering2, ArrayRef<RTLIB::Libcall> Libcalls) {
1802 assert(Libcalls.size() == 6);
1803
1804 LLVMContext &Ctx = I->getContext();
1805 Module *M = I->getModule();
1806 const DataLayout &DL = M->getDataLayout();
1807 IRBuilder<> Builder(I);
1808 IRBuilder<> AllocaBuilder(&I->getFunction()->getEntryBlock().front());
1809
1810 bool UseSizedLibcall = canUseSizedAtomicCall(Size, Alignment, DL);
1811 Type *SizedIntTy = Type::getIntNTy(C&: Ctx, N: Size * 8);
1812
1813 const Align AllocaAlignment = DL.getPrefTypeAlign(Ty: SizedIntTy);
1814
1815 // TODO: the "order" argument type is "int", not int32. So
1816 // getInt32Ty may be wrong if the arch uses e.g. 16-bit ints.
1817 ConstantInt *SizeVal64 = ConstantInt::get(Ty: Type::getInt64Ty(C&: Ctx), V: Size);
1818 assert(Ordering != AtomicOrdering::NotAtomic && "expect atomic MO");
1819 Constant *OrderingVal =
1820 ConstantInt::get(Ty: Type::getInt32Ty(C&: Ctx), V: (int)toCABI(AO: Ordering));
1821 Constant *Ordering2Val = nullptr;
1822 if (CASExpected) {
1823 assert(Ordering2 != AtomicOrdering::NotAtomic && "expect atomic MO");
1824 Ordering2Val =
1825 ConstantInt::get(Ty: Type::getInt32Ty(C&: Ctx), V: (int)toCABI(AO: Ordering2));
1826 }
1827 bool HasResult = I->getType() != Type::getVoidTy(C&: Ctx);
1828
1829 RTLIB::Libcall RTLibType;
1830 if (UseSizedLibcall) {
1831 switch (Size) {
1832 case 1:
1833 RTLibType = Libcalls[1];
1834 break;
1835 case 2:
1836 RTLibType = Libcalls[2];
1837 break;
1838 case 4:
1839 RTLibType = Libcalls[3];
1840 break;
1841 case 8:
1842 RTLibType = Libcalls[4];
1843 break;
1844 case 16:
1845 RTLibType = Libcalls[5];
1846 break;
1847 }
1848 } else if (Libcalls[0] != RTLIB::UNKNOWN_LIBCALL) {
1849 RTLibType = Libcalls[0];
1850 } else {
1851 // Can't use sized function, and there's no generic for this
1852 // operation, so give up.
1853 return false;
1854 }
1855
1856 if (!TLI->getLibcallName(Call: RTLibType)) {
1857 // This target does not implement the requested atomic libcall so give up.
1858 return false;
1859 }
1860
1861 // Build up the function call. There's two kinds. First, the sized
1862 // variants. These calls are going to be one of the following (with
1863 // N=1,2,4,8,16):
1864 // iN __atomic_load_N(iN *ptr, int ordering)
1865 // void __atomic_store_N(iN *ptr, iN val, int ordering)
1866 // iN __atomic_{exchange|fetch_*}_N(iN *ptr, iN val, int ordering)
1867 // bool __atomic_compare_exchange_N(iN *ptr, iN *expected, iN desired,
1868 // int success_order, int failure_order)
1869 //
1870 // Note that these functions can be used for non-integer atomic
1871 // operations, the values just need to be bitcast to integers on the
1872 // way in and out.
1873 //
1874 // And, then, the generic variants. They look like the following:
1875 // void __atomic_load(size_t size, void *ptr, void *ret, int ordering)
1876 // void __atomic_store(size_t size, void *ptr, void *val, int ordering)
1877 // void __atomic_exchange(size_t size, void *ptr, void *val, void *ret,
1878 // int ordering)
1879 // bool __atomic_compare_exchange(size_t size, void *ptr, void *expected,
1880 // void *desired, int success_order,
1881 // int failure_order)
1882 //
1883 // The different signatures are built up depending on the
1884 // 'UseSizedLibcall', 'CASExpected', 'ValueOperand', and 'HasResult'
1885 // variables.
1886
1887 AllocaInst *AllocaCASExpected = nullptr;
1888 AllocaInst *AllocaValue = nullptr;
1889 AllocaInst *AllocaResult = nullptr;
1890
1891 Type *ResultTy;
1892 SmallVector<Value *, 6> Args;
1893 AttributeList Attr;
1894
1895 // 'size' argument.
1896 if (!UseSizedLibcall) {
1897 // Note, getIntPtrType is assumed equivalent to size_t.
1898 Args.push_back(Elt: ConstantInt::get(Ty: DL.getIntPtrType(C&: Ctx), V: Size));
1899 }
1900
1901 // 'ptr' argument.
1902 // note: This assumes all address spaces share a common libfunc
1903 // implementation and that addresses are convertable. For systems without
1904 // that property, we'd need to extend this mechanism to support AS-specific
1905 // families of atomic intrinsics.
1906 Value *PtrVal = PointerOperand;
1907 PtrVal = Builder.CreateAddrSpaceCast(V: PtrVal, DestTy: PointerType::getUnqual(C&: Ctx));
1908 Args.push_back(Elt: PtrVal);
1909
1910 // 'expected' argument, if present.
1911 if (CASExpected) {
1912 AllocaCASExpected = AllocaBuilder.CreateAlloca(Ty: CASExpected->getType());
1913 AllocaCASExpected->setAlignment(AllocaAlignment);
1914 Builder.CreateLifetimeStart(Ptr: AllocaCASExpected, Size: SizeVal64);
1915 Builder.CreateAlignedStore(Val: CASExpected, Ptr: AllocaCASExpected, Align: AllocaAlignment);
1916 Args.push_back(Elt: AllocaCASExpected);
1917 }
1918
1919 // 'val' argument ('desired' for cas), if present.
1920 if (ValueOperand) {
1921 if (UseSizedLibcall) {
1922 Value *IntValue =
1923 Builder.CreateBitOrPointerCast(V: ValueOperand, DestTy: SizedIntTy);
1924 Args.push_back(Elt: IntValue);
1925 } else {
1926 AllocaValue = AllocaBuilder.CreateAlloca(Ty: ValueOperand->getType());
1927 AllocaValue->setAlignment(AllocaAlignment);
1928 Builder.CreateLifetimeStart(Ptr: AllocaValue, Size: SizeVal64);
1929 Builder.CreateAlignedStore(Val: ValueOperand, Ptr: AllocaValue, Align: AllocaAlignment);
1930 Args.push_back(Elt: AllocaValue);
1931 }
1932 }
1933
1934 // 'ret' argument.
1935 if (!CASExpected && HasResult && !UseSizedLibcall) {
1936 AllocaResult = AllocaBuilder.CreateAlloca(Ty: I->getType());
1937 AllocaResult->setAlignment(AllocaAlignment);
1938 Builder.CreateLifetimeStart(Ptr: AllocaResult, Size: SizeVal64);
1939 Args.push_back(Elt: AllocaResult);
1940 }
1941
1942 // 'ordering' ('success_order' for cas) argument.
1943 Args.push_back(Elt: OrderingVal);
1944
1945 // 'failure_order' argument, if present.
1946 if (Ordering2Val)
1947 Args.push_back(Elt: Ordering2Val);
1948
1949 // Now, the return type.
1950 if (CASExpected) {
1951 ResultTy = Type::getInt1Ty(C&: Ctx);
1952 Attr = Attr.addRetAttribute(Ctx, Attribute::ZExt);
1953 } else if (HasResult && UseSizedLibcall)
1954 ResultTy = SizedIntTy;
1955 else
1956 ResultTy = Type::getVoidTy(C&: Ctx);
1957
1958 // Done with setting up arguments and return types, create the call:
1959 SmallVector<Type *, 6> ArgTys;
1960 for (Value *Arg : Args)
1961 ArgTys.push_back(Elt: Arg->getType());
1962 FunctionType *FnType = FunctionType::get(Result: ResultTy, Params: ArgTys, isVarArg: false);
1963 FunctionCallee LibcallFn =
1964 M->getOrInsertFunction(Name: TLI->getLibcallName(Call: RTLibType), T: FnType, AttributeList: Attr);
1965 CallInst *Call = Builder.CreateCall(Callee: LibcallFn, Args);
1966 Call->setAttributes(Attr);
1967 Value *Result = Call;
1968
1969 // And then, extract the results...
1970 if (ValueOperand && !UseSizedLibcall)
1971 Builder.CreateLifetimeEnd(Ptr: AllocaValue, Size: SizeVal64);
1972
1973 if (CASExpected) {
1974 // The final result from the CAS is {load of 'expected' alloca, bool result
1975 // from call}
1976 Type *FinalResultTy = I->getType();
1977 Value *V = PoisonValue::get(T: FinalResultTy);
1978 Value *ExpectedOut = Builder.CreateAlignedLoad(
1979 Ty: CASExpected->getType(), Ptr: AllocaCASExpected, Align: AllocaAlignment);
1980 Builder.CreateLifetimeEnd(Ptr: AllocaCASExpected, Size: SizeVal64);
1981 V = Builder.CreateInsertValue(Agg: V, Val: ExpectedOut, Idxs: 0);
1982 V = Builder.CreateInsertValue(Agg: V, Val: Result, Idxs: 1);
1983 I->replaceAllUsesWith(V);
1984 } else if (HasResult) {
1985 Value *V;
1986 if (UseSizedLibcall)
1987 V = Builder.CreateBitOrPointerCast(V: Result, DestTy: I->getType());
1988 else {
1989 V = Builder.CreateAlignedLoad(Ty: I->getType(), Ptr: AllocaResult,
1990 Align: AllocaAlignment);
1991 Builder.CreateLifetimeEnd(Ptr: AllocaResult, Size: SizeVal64);
1992 }
1993 I->replaceAllUsesWith(V);
1994 }
1995 I->eraseFromParent();
1996 return true;
1997}
1998

source code of llvm/lib/CodeGen/AtomicExpandPass.cpp