AMDGPUPrintfRuntimeBinding.cpp source code [llvm/lib/Target/AMDGPU/AMDGPUPrintfRuntimeBinding.cpp]

1	//=== AMDGPUPrintfRuntimeBinding.cpp - OpenCL printf implementation -------===//
2	//
3	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4	// See https://llvm.org/LICENSE.txt for license information.
5	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6	//
7	//===----------------------------------------------------------------------===//
8	// \file
9	//
10	// The pass bind printfs to a kernel arg pointer that will be bound to a buffer
11	// later by the runtime.
12	//
13	// This pass traverses the functions in the module and converts
14	// each call to printf to a sequence of operations that
15	// store the following into the printf buffer:
16	// - format string (passed as a module's metadata unique ID)
17	// - bitwise copies of printf arguments
18	// The backend passes will need to store metadata in the kernel
19	//===----------------------------------------------------------------------===//
20
21	#include "AMDGPU.h"
22	#include "llvm/ADT/StringExtras.h"
23	#include "llvm/Analysis/ValueTracking.h"
24	#include "llvm/IR/DiagnosticInfo.h"
25	#include "llvm/IR/Dominators.h"
26	#include "llvm/IR/IRBuilder.h"
27	#include "llvm/IR/Instructions.h"
28	#include "llvm/InitializePasses.h"
29	#include "llvm/Support/DataExtractor.h"
30	#include "llvm/TargetParser/Triple.h"
31	#include "llvm/Transforms/Utils/BasicBlockUtils.h"
32
33	using namespace llvm;
34
35	#define DEBUG_TYPE "printfToRuntime"
36	#define DWORD_ALIGN 4
37
38	namespace {
39	class AMDGPUPrintfRuntimeBinding final : public ModulePass {
40
41	public:
42	static char ID;
43
44	explicit AMDGPUPrintfRuntimeBinding();
45
46	private:
47	bool runOnModule(Module &M) override;
48	};
49
50	class AMDGPUPrintfRuntimeBindingImpl {
51	public:
52	AMDGPUPrintfRuntimeBindingImpl() {}
53	bool run(Module &M);
54
55	private:
56	void getConversionSpecifiers(SmallVectorImpl<char> &OpConvSpecifiers,
57	StringRef fmt, size_t num_ops) const;
58
59	bool lowerPrintfForGpu(Module &M);
60
61	const DataLayout *TD;
62	SmallVector<CallInst *, `32`> Printfs;
63	};
64	} // namespace
65
66	char AMDGPUPrintfRuntimeBinding::ID = `0`;
67
68	INITIALIZE_PASS_BEGIN(AMDGPUPrintfRuntimeBinding,
69	"amdgpu-printf-runtime-binding", "AMDGPU Printf lowering",
70	false, false)
71	INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)
72	INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
73	INITIALIZE_PASS_END(AMDGPUPrintfRuntimeBinding, "amdgpu-printf-runtime-binding",
74	"AMDGPU Printf lowering", false, false)
75
76	char &llvm::AMDGPUPrintfRuntimeBindingID = AMDGPUPrintfRuntimeBinding::ID;
77
78	namespace llvm {
79	ModulePass *createAMDGPUPrintfRuntimeBinding() {
80	return new AMDGPUPrintfRuntimeBinding ();
81	}
82	} // namespace llvm
83
84	AMDGPUPrintfRuntimeBinding::AMDGPUPrintfRuntimeBinding() : ModulePass (ID) {
85	initializeAMDGPUPrintfRuntimeBindingPass(Registry&: *PassRegistry::getPassRegistry());
86	}
87
88	void AMDGPUPrintfRuntimeBindingImpl::getConversionSpecifiers(
89	SmallVectorImpl<char> &OpConvSpecifiers, StringRef Fmt,
90	size_t NumOps) const {
91	// not all format characters are collected.
92	// At this time the format characters of interest
93	// are %p and %s, which use to know if we
94	// are either storing a literal string or a
95	// pointer to the printf buffer.
96	static const char ConvSpecifiers[] = "cdieEfgGaosuxXp";
97	size_t CurFmtSpecifierIdx = `0`;
98	size_t PrevFmtSpecifierIdx = `0`;
99
100	while ((CurFmtSpecifierIdx = Fmt.find_first_of(
101	Chars: ConvSpecifiers, From: CurFmtSpecifierIdx)) != StringRef::npos) {
102	bool ArgDump = false;
103	StringRef CurFmt = Fmt.substr(Start: PrevFmtSpecifierIdx,
104	N: CurFmtSpecifierIdx - PrevFmtSpecifierIdx);
105	size_t pTag = CurFmt.find_last_of(C: `'%'`);
106	if (pTag != StringRef::npos) {
107	ArgDump = true;
108	while (pTag && CurFmt [--pTag] == `'%'`) {
109	ArgDump = !ArgDump;
110	}
111	}
112
113	if (ArgDump)
114	OpConvSpecifiers.push_back(Elt: Fmt [CurFmtSpecifierIdx]);
115
116	PrevFmtSpecifierIdx = ++CurFmtSpecifierIdx;
117	}
118	}
119
120	static bool shouldPrintAsStr(char Specifier, Type *OpType) {
121	return Specifier == `'s'` && isa<PointerType>(Val: OpType);
122	}
123
124	constexpr StringLiteral NonLiteralStr("???");
125	static_assert(NonLiteralStr.size() == `3`);
126
127	static StringRef getAsConstantStr(Value *V) {
128	StringRef S;
129	if (!getConstantStringInfo(V, Str&: S))
130	S = NonLiteralStr;
131
132	return S;
133	}
134
135	static void diagnoseInvalidFormatString(const CallBase *CI) {
136	DiagnosticInfoUnsupported UnsupportedFormatStr(
137	*CI->getParent()->getParent(),
138	"printf format string must be a trivially resolved constant string "
139	"global variable",
140	CI->getDebugLoc());
141	CI->getContext().diagnose(DI: UnsupportedFormatStr);
142	}
143
144	bool AMDGPUPrintfRuntimeBindingImpl::lowerPrintfForGpu(Module &M) {
145	LLVMContext &Ctx = M.getContext();
146	IRBuilder<> Builder(Ctx);
147	Type *I32Ty = Type::getInt32Ty(C&: Ctx);
148
149	// Instead of creating global variables, the printf format strings are
150	// extracted and passed as metadata. This avoids polluting llvm's symbol
151	// tables in this module. Metadata is going to be extracted by the backend
152	// passes and inserted into the OpenCL binary as appropriate.
153	NamedMDNode *metaD = M.getOrInsertNamedMetadata(Name: "llvm.printf.fmts");
154	unsigned UniqID = metaD->getNumOperands();
155
156	for (auto *CI : Printfs) {
157	unsigned NumOps = CI->arg_size();
158
159	SmallString<`16`> OpConvSpecifiers;
160	Value *Op = CI->getArgOperand(i: `0`);
161
162	StringRef FormatStr;
163	if (!getConstantStringInfo(V: Op, Str&: FormatStr)) {
164	Value *Stripped = Op->stripPointerCasts();
165	if (!isa<UndefValue>(Val: Stripped) && !isa<ConstantPointerNull>(Val: Stripped))
166	diagnoseInvalidFormatString(CI);
167	continue;
168	}
169
170	// We need this call to ascertain that we are printing a string or a
171	// pointer. It takes out the specifiers and fills up the first arg.
172	getConversionSpecifiers(OpConvSpecifiers, Fmt: FormatStr, NumOps: NumOps - `1`);
173
174	// Add metadata for the string
175	std::string AStreamHolder;
176	raw_string_ostream Sizes(AStreamHolder);
177	int Sum = DWORD_ALIGN;
178	Sizes << CI->arg_size() - `1`;
179	Sizes << `':'`;
180	for (unsigned ArgCount = `1`;
181	ArgCount < CI->arg_size() && ArgCount <= OpConvSpecifiers.size();
182	ArgCount++) {
183	Value *Arg = CI->getArgOperand(i: ArgCount);
184	Type *ArgType = Arg->getType();
185	unsigned ArgSize = TD->getTypeAllocSize(Ty: ArgType);
186	//
187	// ArgSize by design should be a multiple of DWORD_ALIGN,
188	// expand the arguments that do not follow this rule.
189	//
190	if (ArgSize % DWORD_ALIGN != `0`) {
191	Type *ResType = Type::getInt32Ty(C&: Ctx);
192	if (auto *VecType = dyn_cast<VectorType>(Val: ArgType))
193	ResType = VectorType::get(ElementType: ResType, EC: VecType->getElementCount());
194	Builder.SetInsertPoint(CI);
195	Builder.SetCurrentDebugLocation(CI->getDebugLoc());
196
197	if (ArgType->isFloatingPointTy()) {
198	Arg = Builder.CreateBitCast(
199	V: Arg,
200	DestTy: IntegerType::getIntNTy(C&: Ctx, N: ArgType->getPrimitiveSizeInBits()));
201	}
202
203	if (OpConvSpecifiers [ArgCount - `1`] == `'x'` \|\|
204	OpConvSpecifiers [ArgCount - `1`] == `'X'` \|\|
205	OpConvSpecifiers [ArgCount - `1`] == `'u'` \|\|
206	OpConvSpecifiers [ArgCount - `1`] == `'o'`)
207	Arg = Builder.CreateZExt(V: Arg, DestTy: ResType);
208	else
209	Arg = Builder.CreateSExt(V: Arg, DestTy: ResType);
210	ArgType = Arg->getType();
211	ArgSize = TD->getTypeAllocSize(Ty: ArgType);
212	CI->setOperand(i_nocapture: ArgCount, Val_nocapture: Arg);
213	}
214	if (OpConvSpecifiers [ArgCount - `1`] == `'f'`) {
215	ConstantFP *FpCons = dyn_cast<ConstantFP>(Val: Arg);
216	if (FpCons)
217	ArgSize = `4`;
218	else {
219	FPExtInst *FpExt = dyn_cast<FPExtInst>(Val: Arg);
220	if (FpExt && FpExt->getType()->isDoubleTy() &&
221	FpExt->getOperand(i_nocapture: `0`)->getType()->isFloatTy())
222	ArgSize = `4`;
223	}
224	}
225	if (shouldPrintAsStr(Specifier: OpConvSpecifiers [ArgCount - `1`], OpType: ArgType))
226	ArgSize = alignTo(Value: getAsConstantStr(V: Arg).size() + `1`, Align: `4`);
227
228	LLVM_DEBUG(dbgs() << "Printf ArgSize (in buffer) = " << ArgSize
229	<< " for type: " << *ArgType << `'\n'`);
230	Sizes << ArgSize << `':'`;
231	Sum += ArgSize;
232	}
233	LLVM_DEBUG(dbgs() << "Printf format string in source = " << FormatStr
234	<< `'\n'`);
235	for (char C : FormatStr) {
236	// Rest of the C escape sequences (e.g. \') are handled correctly
237	// by the MDParser
238	switch (C) {
239	case `'\a'`:
240	Sizes << "\\a";
241	break;
242	case `'\b'`:
243	Sizes << "\\b";
244	break;
245	case `'\f'`:
246	Sizes << "\\f";
247	break;
248	case `'\n'`:
249	Sizes << "\\n";
250	break;
251	case `'\r'`:
252	Sizes << "\\r";
253	break;
254	case `'\v'`:
255	Sizes << "\\v";
256	break;
257	case `':'`:
258	// ':' cannot be scanned by Flex, as it is defined as a delimiter
259	// Replace it with it's octal representation \72
260	Sizes << "\\72";
261	break;
262	default:
263	Sizes << C;
264	break;
265	}
266	}
267
268	// Insert the printf_alloc call
269	Builder.SetInsertPoint(CI);
270	Builder.SetCurrentDebugLocation(CI->getDebugLoc());
271
272	AttributeList Attr = AttributeList::get(Ctx, AttributeList::FunctionIndex,
273	Attribute::NoUnwind);
274
275	Type *SizetTy = Type::getInt32Ty(C&: Ctx);
276
277	Type *Tys_alloc[`1`] = {SizetTy};
278	Type *I8Ty = Type::getInt8Ty(C&: Ctx);
279	Type *I8Ptr = PointerType::get(ElementType: I8Ty, AddressSpace: `1`);
280	FunctionType FTy_alloc = FunctionType::get(Result: I8Ptr, Params: Tys_alloc, isVarArg: false*);
281	FunctionCallee PrintfAllocFn =
282	M.getOrInsertFunction(Name: StringRef ("__printf_alloc"), T: FTy_alloc, AttributeList: Attr);
283
284	LLVM_DEBUG(dbgs() << "Printf metadata = " << Sizes.str() << `'\n'`);
285	std::string fmtstr = itostr(X: ++UniqID) + ":" + Sizes.str();
286	MDString *fmtStrArray = MDString::get(Context&: Ctx, Str: fmtstr);
287
288	MDNode *myMD = MDNode::get(Context&: Ctx, MDs: fmtStrArray);
289	metaD->addOperand(M: myMD);
290	Value sumC = ConstantInt::get(Ty: SizetTy, V: Sum, IsSigned: false*);
291	SmallVector<Value *, `1`> alloc_args;
292	alloc_args.push_back(Elt: sumC);
293	CallInst *pcall = CallInst::Create(Func: PrintfAllocFn, Args: alloc_args,
294	NameStr: "printf_alloc_fn", InsertBefore: CI->getIterator());
295
296	//
297	// Insert code to split basicblock with a
298	// piece of hammock code.
299	// basicblock splits after buffer overflow check
300	//
301	ConstantPointerNull *zeroIntPtr =
302	ConstantPointerNull::get(T: PointerType::get(ElementType: I8Ty, AddressSpace: `1`));
303	auto *cmp = cast<ICmpInst>(Val: Builder.CreateICmpNE(LHS: pcall, RHS: zeroIntPtr, Name: ""));
304	if (!CI->use_empty()) {
305	Value *result =
306	Builder.CreateSExt(V: Builder.CreateNot(V: cmp), DestTy: I32Ty, Name: "printf_res");
307	CI->replaceAllUsesWith(V: result);
308	}
309	SplitBlock(CI->getParent(), cmp);
310	Instruction *Brnch =
311	SplitBlockAndInsertIfThen(cmp, cmp->getNextNode(), false);
312	BasicBlock::iterator BrnchPoint = Brnch->getIterator();
313
314	Builder.SetInsertPoint(Brnch);
315
316	// store unique printf id in the buffer
317	//
318	GetElementPtrInst *BufferIdx = GetElementPtrInst::Create(
319	PointeeType: I8Ty, Ptr: pcall, IdxList: ConstantInt::get(Context&: Ctx, V: APInt (`32`, `0`)), NameStr: "PrintBuffID",
320	InsertBefore: BrnchPoint);
321
322	Type *idPointer = PointerType::get(ElementType: I32Ty, AddressSpace: AMDGPUAS::GLOBAL_ADDRESS);
323	Value *id_gep_cast =
324	new BitCastInst (BufferIdx, idPointer, "PrintBuffIdCast", BrnchPoint);
325
326	new StoreInst (ConstantInt::get(Ty: I32Ty, V: UniqID), id_gep_cast, BrnchPoint);
327
328	// 1st 4 bytes hold the printf_id
329	// the following GEP is the buffer pointer
330	BufferIdx = GetElementPtrInst::Create(PointeeType: I8Ty, Ptr: pcall,
331	IdxList: ConstantInt::get(Context&: Ctx, V: APInt (`32`, `4`)),
332	NameStr: "PrintBuffGep", InsertBefore: BrnchPoint);
333
334	Type *Int32Ty = Type::getInt32Ty(C&: Ctx);
335	for (unsigned ArgCount = `1`;
336	ArgCount < CI->arg_size() && ArgCount <= OpConvSpecifiers.size();
337	ArgCount++) {
338	Value *Arg = CI->getArgOperand(i: ArgCount);
339	Type *ArgType = Arg->getType();
340	SmallVector<Value *, `32`> WhatToStore;
341	if (ArgType->isFPOrFPVectorTy() && !isa<VectorType>(Val: ArgType)) {
342	if (OpConvSpecifiers [ArgCount - `1`] == `'f'`) {
343	if (auto *FpCons = dyn_cast<ConstantFP>(Val: Arg)) {
344	APFloat Val(FpCons->getValueAPF());
345	bool Lost = false;
346	Val.convert(ToSemantics: APFloat::IEEEsingle(), RM: APFloat::rmNearestTiesToEven,
347	losesInfo: &Lost);
348	Arg = ConstantFP::get(Context&: Ctx, V: Val);
349	} else if (auto *FpExt = dyn_cast<FPExtInst>(Val: Arg)) {
350	if (FpExt->getType()->isDoubleTy() &&
351	FpExt->getOperand(i_nocapture: `0`)->getType()->isFloatTy()) {
352	Arg = FpExt->getOperand(i_nocapture: `0`);
353	}
354	}
355	}
356	WhatToStore.push_back(Elt: Arg);
357	} else if (isa<PointerType>(Val: ArgType)) {
358	if (shouldPrintAsStr(Specifier: OpConvSpecifiers [ArgCount - `1`], OpType: ArgType)) {
359	StringRef S = getAsConstantStr(V: Arg);
360	if (!S.empty()) {
361	const uint64_t ReadSize = `4`;
362
363	DataExtractor Extractor(S, /IsLittleEndian=/true, `8`);
364	DataExtractor::Cursor Offset(`0`);
365	while (Offset && Offset.tell() < S.size()) {
366	uint64_t ReadNow = std::min(a: ReadSize, b: S.size() - Offset.tell());
367	uint64_t ReadBytes = `0`;
368	switch (ReadNow) {
369	default: llvm_unreachable("min(4, X) > 4?");
370	case `1`:
371	ReadBytes = Extractor.getU8(C&: Offset);
372	break;
373	case `2`:
374	ReadBytes = Extractor.getU16(C&: Offset);
375	break;
376	case `3`:
377	ReadBytes = Extractor.getU24(C&: Offset);
378	break;
379	case `4`:
380	ReadBytes = Extractor.getU32(C&: Offset);
381	break;
382	}
383
384	cantFail(Err: Offset.takeError(),
385	Msg: "failed to read bytes from constant array");
386
387	APInt IntVal(`8` * ReadSize, ReadBytes);
388
389	// TODO: Should not bothering aligning up.
390	if (ReadNow < ReadSize)
391	IntVal = IntVal.zext(width: `8` * ReadSize);
392
393	Type *IntTy = Type::getIntNTy(C&: Ctx, N: IntVal.getBitWidth());
394	WhatToStore.push_back(Elt: ConstantInt::get(Ty: IntTy, V: IntVal));
395	}
396	} else {
397	// Empty string, give a hint to RT it is no NULL
398	Value ANumV = ConstantInt::get(Ty: Int32Ty, V: `0xFFFFFF00`, IsSigned: false*);
399	WhatToStore.push_back(Elt: ANumV);
400	}
401	} else {
402	WhatToStore.push_back(Elt: Arg);
403	}
404	} else {
405	WhatToStore.push_back(Elt: Arg);
406	}
407	for (unsigned I = `0`, E = WhatToStore.size(); I != E; ++I) {
408	Value *TheBtCast = WhatToStore [I];
409	unsigned ArgSize = TD->getTypeAllocSize(Ty: TheBtCast->getType());
410	StoreInst StBuff = new* StoreInst (TheBtCast, BufferIdx, BrnchPoint);
411	LLVM_DEBUG(dbgs() << "inserting store to printf buffer:\n"
412	<< *StBuff << `'\n'`);
413	(void)StBuff;
414	if (I + `1` == E && ArgCount + `1` == CI->arg_size())
415	break;
416	BufferIdx = GetElementPtrInst::Create(
417	PointeeType: I8Ty, Ptr: BufferIdx, IdxList: {ConstantInt::get(Ty: I32Ty, V: ArgSize)},
418	NameStr: "PrintBuffNextPtr", InsertBefore: BrnchPoint);
419	LLVM_DEBUG(dbgs() << "inserting gep to the printf buffer:\n"
420	<< *BufferIdx << `'\n'`);
421	}
422	}
423	}
424
425	// erase the printf calls
426	for (auto *CI : Printfs)
427	CI->eraseFromParent();
428
429	Printfs.clear();
430	return true;
431	}
432
433	bool AMDGPUPrintfRuntimeBindingImpl::run(Module &M) {
434	Triple TT(M.getTargetTriple());
435	if (TT.getArch() == Triple::r600)
436	return false;
437
438	auto PrintfFunction = M.getFunction(Name: "printf");
439	if (!PrintfFunction \|\| !PrintfFunction->isDeclaration())
440	return false;
441
442	for (auto &U : PrintfFunction->uses()) {
443	if (auto *CI = dyn_cast<CallInst>(Val: U.getUser())) {
444	if (CI->isCallee(U: &U) && !CI->isNoBuiltin())
445	Printfs.push_back(Elt: CI);
446	}
447	}
448
449	if (Printfs.empty())
450	return false;
451
452	TD = &M.getDataLayout();
453
454	return lowerPrintfForGpu(M);
455	}
456
457	bool AMDGPUPrintfRuntimeBinding::runOnModule(Module &M) {
458	return AMDGPUPrintfRuntimeBindingImpl ().run(M);
459	}
460
461	PreservedAnalyses
462	AMDGPUPrintfRuntimeBindingPass::run(Module &M, ModuleAnalysisManager &AM) {
463	bool Changed = AMDGPUPrintfRuntimeBindingImpl ().run(M);
464	return Changed ? PreservedAnalyses::none() : PreservedAnalyses::all();
465	}
466

source code of llvm/lib/Target/AMDGPU/AMDGPUPrintfRuntimeBinding.cpp