Float2Int.cpp source code [llvm/lib/Transforms/Scalar/Float2Int.cpp]

1	//===- Float2Int.cpp - Demote floating point ops to work on integers ------===//
2	//
3	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4	// See https://llvm.org/LICENSE.txt for license information.
5	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6	//
7	//===----------------------------------------------------------------------===//
8	//
9	// This file implements the Float2Int pass, which aims to demote floating
10	// point operations to work on integers, where that is losslessly possible.
11	//
12	//===----------------------------------------------------------------------===//
13
14	#include "llvm/Transforms/Scalar/Float2Int.h"
15	#include "llvm/ADT/APInt.h"
16	#include "llvm/ADT/APSInt.h"
17	#include "llvm/ADT/SmallVector.h"
18	#include "llvm/Analysis/GlobalsModRef.h"
19	#include "llvm/IR/Constants.h"
20	#include "llvm/IR/Dominators.h"
21	#include "llvm/IR/IRBuilder.h"
22	#include "llvm/IR/Module.h"
23	#include "llvm/Support/CommandLine.h"
24	#include "llvm/Support/Debug.h"
25	#include "llvm/Support/raw_ostream.h"
26	#include <deque>
27
28	#define DEBUG_TYPE "float2int"
29
30	using namespace llvm;
31
32	// The algorithm is simple. Start at instructions that convert from the
33	// float to the int domain: fptoui, fptosi and fcmp. Walk up the def-use
34	// graph, using an equivalence datastructure to unify graphs that interfere.
35	//
36	// Mappable instructions are those with an integer corrollary that, given
37	// integer domain inputs, produce an integer output; fadd, for example.
38	//
39	// If a non-mappable instruction is seen, this entire def-use graph is marked
40	// as non-transformable. If we see an instruction that converts from the
41	// integer domain to FP domain (uitofp,sitofp), we terminate our walk.
42
43	/// The largest integer type worth dealing with.
44	static cl::opt<unsigned>
45	MaxIntegerBW("float2int-max-integer-bw", cl::init(Val: `64`), cl::Hidden,
46	cl::desc ("Max integer bitwidth to consider in float2int"
47	"(default=64)"));
48
49	// Given a FCmp predicate, return a matching ICmp predicate if one
50	// exists, otherwise return BAD_ICMP_PREDICATE.
51	static CmpInst::Predicate mapFCmpPred(CmpInst::Predicate P) {
52	switch (P) {
53	case CmpInst::FCMP_OEQ:
54	case CmpInst::FCMP_UEQ:
55	return CmpInst::ICMP_EQ;
56	case CmpInst::FCMP_OGT:
57	case CmpInst::FCMP_UGT:
58	return CmpInst::ICMP_SGT;
59	case CmpInst::FCMP_OGE:
60	case CmpInst::FCMP_UGE:
61	return CmpInst::ICMP_SGE;
62	case CmpInst::FCMP_OLT:
63	case CmpInst::FCMP_ULT:
64	return CmpInst::ICMP_SLT;
65	case CmpInst::FCMP_OLE:
66	case CmpInst::FCMP_ULE:
67	return CmpInst::ICMP_SLE;
68	case CmpInst::FCMP_ONE:
69	case CmpInst::FCMP_UNE:
70	return CmpInst::ICMP_NE;
71	default:
72	return CmpInst::BAD_ICMP_PREDICATE;
73	}
74	}
75
76	// Given a floating point binary operator, return the matching
77	// integer version.
78	static Instruction::BinaryOps mapBinOpcode(unsigned Opcode) {
79	switch (Opcode) {
80	default: llvm_unreachable("Unhandled opcode!");
81	case Instruction::FAdd: return Instruction::Add;
82	case Instruction::FSub: return Instruction::Sub;
83	case Instruction::FMul: return Instruction::Mul;
84	}
85	}
86
87	// Find the roots - instructions that convert from the FP domain to
88	// integer domain.
89	void Float2IntPass::findRoots(Function &F, const DominatorTree &DT) {
90	for (BasicBlock &BB : F) {
91	// Unreachable code can take on strange forms that we are not prepared to
92	// handle. For example, an instruction may have itself as an operand.
93	if (!DT.isReachableFromEntry(A: &BB))
94	continue;
95
96	for (Instruction &I : BB) {
97	if (isa<VectorType>(Val: I.getType()))
98	continue;
99	switch (I.getOpcode()) {
100	default: break;
101	case Instruction::FPToUI:
102	case Instruction::FPToSI:
103	Roots.insert(X: &I);
104	break;
105	case Instruction::FCmp:
106	if (mapFCmpPred(P: cast<CmpInst>(Val: &I)->getPredicate()) !=
107	CmpInst::BAD_ICMP_PREDICATE)
108	Roots.insert(X: &I);
109	break;
110	}
111	}
112	}
113	}
114
115	// Helper - mark I as having been traversed, having range R.
116	void Float2IntPass::seen(Instruction *I, ConstantRange R) {
117	LLVM_DEBUG(dbgs() << "F2I: " << *I << ":" << R << "\n");
118	auto IT = SeenInsts.find(Key: I);
119	if (IT != SeenInsts.end())
120	IT->second = std::move(R);
121	else
122	SeenInsts.insert(KV: std::make_pair(x&: I, y: std::move(R)));
123	}
124
125	// Helper - get a range representing a poison value.
126	ConstantRange Float2IntPass::badRange() {
127	return ConstantRange::getFull(BitWidth: MaxIntegerBW + `1`);
128	}
129	ConstantRange Float2IntPass::unknownRange() {
130	return ConstantRange::getEmpty(BitWidth: MaxIntegerBW + `1`);
131	}
132	ConstantRange Float2IntPass::validateRange(ConstantRange R) {
133	if (R.getBitWidth() > MaxIntegerBW + `1`)
134	return badRange();
135	return R;
136	}
137
138	// The most obvious way to structure the search is a depth-first, eager
139	// search from each root. However, that require direct recursion and so
140	// can only handle small instruction sequences. Instead, we split the search
141	// up into two phases:
142	// - walkBackwards: A breadth-first walk of the use-def graph starting from
143	// the roots. Populate "SeenInsts" with interesting
144	// instructions and poison values if they're obvious and
145	// cheap to compute. Calculate the equivalance set structure
146	// while we're here too.
147	// - walkForwards: Iterate over SeenInsts in reverse order, so we visit
148	// defs before their uses. Calculate the real range info.
149
150	// Breadth-first walk of the use-def graph; determine the set of nodes
151	// we care about and eagerly determine if some of them are poisonous.
152	void Float2IntPass::walkBackwards() {
153	std::deque<Instruction*> Worklist(Roots.begin(), Roots.end());
154	while (!Worklist.empty()) {
155	Instruction *I = Worklist.back();
156	Worklist.pop_back();
157
158	if (SeenInsts.contains(Key: I))
159	// Seen already.
160	continue;
161
162	switch (I->getOpcode()) {
163	// FIXME: Handle select and phi nodes.
164	default:
165	// Path terminated uncleanly.
166	seen(I, R: badRange());
167	break;
168
169	case Instruction::UIToFP:
170	case Instruction::SIToFP: {
171	// Path terminated cleanly - use the type of the integer input to seed
172	// the analysis.
173	unsigned BW = I->getOperand(i: `0`)->getType()->getPrimitiveSizeInBits();
174	auto Input = ConstantRange::getFull(BitWidth: BW);
175	auto CastOp = (Instruction::CastOps)I->getOpcode();
176	seen(I, R: validateRange(R: Input.castOp(CastOp, BitWidth: MaxIntegerBW+`1`)));
177	continue;
178	}
179
180	case Instruction::FNeg:
181	case Instruction::FAdd:
182	case Instruction::FSub:
183	case Instruction::FMul:
184	case Instruction::FPToUI:
185	case Instruction::FPToSI:
186	case Instruction::FCmp:
187	seen(I, R: unknownRange());
188	break;
189	}
190
191	for (Value *O : I->operands()) {
192	if (Instruction *OI = dyn_cast<Instruction>(Val: O)) {
193	// Unify def-use chains if they interfere.
194	ECs.unionSets(V1: I, V2: OI);
195	if (SeenInsts.find(Key: I)->second != badRange())
196	Worklist.push_back(x: OI);
197	} else if (!isa<ConstantFP>(Val: O)) {
198	// Not an instruction or ConstantFP? we can't do anything.
199	seen(I, R: badRange());
200	}
201	}
202	}
203	}
204
205	// Calculate result range from operand ranges.
206	// Return std::nullopt if the range cannot be calculated yet.
207	std::optional<ConstantRange> Float2IntPass::calcRange(Instruction *I) {
208	SmallVector<ConstantRange, `4`> OpRanges;
209	for (Value *O : I->operands()) {
210	if (Instruction *OI = dyn_cast<Instruction>(Val: O)) {
211	auto OpIt = SeenInsts.find(Key: OI);
212	assert(OpIt != SeenInsts.end() && "def not seen before use!");
213	if (OpIt->second == unknownRange())
214	return std::nullopt; // Wait until operand range has been calculated.
215	OpRanges.push_back(Elt: OpIt->second);
216	} else if (ConstantFP *CF = dyn_cast<ConstantFP>(Val: O)) {
217	// Work out if the floating point number can be losslessly represented
218	// as an integer.
219	// APFloat::convertToInteger(&Exact) purports to do what we want, but
220	// the exactness can be too precise. For example, negative zero can
221	// never be exactly converted to an integer.
222	//
223	// Instead, we ask APFloat to round itself to an integral value - this
224	// preserves sign-of-zero - then compare the result with the original.
225	//
226	const APFloat &F = CF->getValueAPF();
227
228	// First, weed out obviously incorrect values. Non-finite numbers
229	// can't be represented and neither can negative zero, unless
230	// we're in fast math mode.
231	if (!F.isFinite() \|\|
232	(F.isZero() && F.isNegative() && isa<FPMathOperator>(Val: I) &&
233	!I->hasNoSignedZeros()))
234	return badRange();
235
236	APFloat NewF = F;
237	auto Res = NewF.roundToIntegral(RM: APFloat::rmNearestTiesToEven);
238	if (Res != APFloat::opOK \|\| NewF != F)
239	return badRange();
240
241	// OK, it's representable. Now get it.
242	APSInt Int(MaxIntegerBW+`1`, false);
243	bool Exact;
244	CF->getValueAPF().convertToInteger(Result&: Int,
245	RM: APFloat::rmNearestTiesToEven,
246	IsExact: &Exact);
247	OpRanges.push_back(Elt: ConstantRange (Int));
248	} else {
249	llvm_unreachable("Should have already marked this as badRange!");
250	}
251	}
252
253	switch (I->getOpcode()) {
254	// FIXME: Handle select and phi nodes.
255	default:
256	case Instruction::UIToFP:
257	case Instruction::SIToFP:
258	llvm_unreachable("Should have been handled in walkForwards!");
259
260	case Instruction::FNeg: {
261	assert(OpRanges.size() == `1` && "FNeg is a unary operator!");
262	unsigned Size = OpRanges [`0`].getBitWidth();
263	auto Zero = ConstantRange (APInt::getZero(numBits: Size));
264	return Zero.sub(Other: OpRanges [`0`]);
265	}
266
267	case Instruction::FAdd:
268	case Instruction::FSub:
269	case Instruction::FMul: {
270	assert(OpRanges.size() == `2` && "its a binary operator!");
271	auto BinOp = (Instruction::BinaryOps) I->getOpcode();
272	return OpRanges [`0`].binaryOp(BinOp, Other: OpRanges [`1`]);
273	}
274
275	//
276	// Root-only instructions - we'll only see these if they're the
277	// first node in a walk.
278	//
279	case Instruction::FPToUI:
280	case Instruction::FPToSI: {
281	assert(OpRanges.size() == `1` && "FPTo[US]I is a unary operator!");
282	// Note: We're ignoring the casts output size here as that's what the
283	// caller expects.
284	auto CastOp = (Instruction::CastOps)I->getOpcode();
285	return OpRanges [`0`].castOp(CastOp, BitWidth: MaxIntegerBW+`1`);
286	}
287
288	case Instruction::FCmp:
289	assert(OpRanges.size() == `2` && "FCmp is a binary operator!");
290	return OpRanges [`0`].unionWith(CR: OpRanges [`1`]);
291	}
292	}
293
294	// Walk forwards down the list of seen instructions, so we visit defs before
295	// uses.
296	void Float2IntPass::walkForwards() {
297	std::deque<Instruction *> Worklist;
298	for (const auto &Pair : SeenInsts)
299	if (Pair.second == unknownRange())
300	Worklist.push_back(x: Pair.first);
301
302	while (!Worklist.empty()) {
303	Instruction *I = Worklist.back();
304	Worklist.pop_back();
305
306	if (std::optional<ConstantRange> Range = calcRange(I))
307	seen(I, R: *Range);
308	else
309	Worklist.push_front(x: I); // Reprocess later.
310	}
311	}
312
313	// If there is a valid transform to be done, do it.
314	bool Float2IntPass::validateAndTransform(const DataLayout &DL) {
315	bool MadeChange = false;
316
317	// Iterate over every disjoint partition of the def-use graph.
318	for (auto It = ECs.begin(), E = ECs.end(); It != E; ++It) {
319	ConstantRange R(MaxIntegerBW + `1`, false);
320	bool Fail = false;
321	Type ConvertedToTy = nullptr*;
322
323	// For every member of the partition, union all the ranges together.
324	for (auto MI = ECs.member_begin(I: It), ME = ECs.member_end();
325	MI != ME; ++MI) {
326	Instruction I = MI;
327	auto SeenI = SeenInsts.find(Key: I);
328	if (SeenI == SeenInsts.end())
329	continue;
330
331	R = R.unionWith(CR: SeenI->second);
332	// We need to ensure I has no users that have not been seen.
333	// If it does, transformation would be illegal.
334	//
335	// Don't count the roots, as they terminate the graphs.
336	if (!Roots.contains(key: I)) {
337	// Set the type of the conversion while we're here.
338	if (!ConvertedToTy)
339	ConvertedToTy = I->getType();
340	for (User *U : I->users()) {
341	Instruction *UI = dyn_cast<Instruction>(Val: U);
342	if (!UI \|\| !SeenInsts.contains(Key: UI)) {
343	LLVM_DEBUG(dbgs() << "F2I: Failing because of " << *U << "\n");
344	Fail = true;
345	break;
346	}
347	}
348	}
349	if (Fail)
350	break;
351	}
352
353	// If the set was empty, or we failed, or the range is poisonous,
354	// bail out.
355	if (ECs.member_begin(I: It) == ECs.member_end() \|\| Fail \|\|
356	R.isFullSet() \|\| R.isSignWrappedSet())
357	continue;
358	assert(ConvertedToTy && "Must have set the convertedtoty by this point!");
359
360	// The number of bits required is the maximum of the upper and
361	// lower limits, plus one so it can be signed.
362	unsigned MinBW = R.getMinSignedBits() + `1`;
363	LLVM_DEBUG(dbgs() << "F2I: MinBitwidth=" << MinBW << ", R: " << R << "\n");
364
365	// If we've run off the realms of the exactly representable integers,
366	// the floating point result will differ from an integer approximation.
367
368	// Do we need more bits than are in the mantissa of the type we converted
369	// to? semanticsPrecision returns the number of mantissa bits plus one
370	// for the sign bit.
371	unsigned MaxRepresentableBits
372	= APFloat::semanticsPrecision(ConvertedToTy->getFltSemantics()) - `1`;
373	if (MinBW > MaxRepresentableBits) {
374	LLVM_DEBUG(dbgs() << "F2I: Value not guaranteed to be representable!\n");
375	continue;
376	}
377
378	// OK, R is known to be representable.
379	// Pick the smallest legal type that will fit.
380	Type Ty = DL.getSmallestLegalIntType(C&: Ctx, Width: MinBW);
381	if (!Ty) {
382	// Every supported target supports 64-bit and 32-bit integers,
383	// so fallback to a 32 or 64-bit integer if the value fits.
384	if (MinBW <= `32`) {
385	Ty = Type::getInt32Ty(C&: *Ctx);
386	} else if (MinBW <= `64`) {
387	Ty = Type::getInt64Ty(C&: *Ctx);
388	} else {
389	LLVM_DEBUG(dbgs() << "F2I: Value requires more bits to represent than "
390	"the target supports!\n");
391	continue;
392	}
393	}
394
395	for (auto MI = ECs.member_begin(I: It), ME = ECs.member_end();
396	MI != ME; ++MI)
397	convert(I: *MI, ToTy: Ty);
398	MadeChange = true;
399	}
400
401	return MadeChange;
402	}
403
404	Value Float2IntPass::convert(Instruction I, Type *ToTy) {
405	if (ConvertedInsts.contains(Key: I))
406	// Already converted this instruction.
407	return ConvertedInsts [I];
408
409	SmallVector<Value*,`4`> NewOperands;
410	for (Value *V : I->operands()) {
411	// Don't recurse if we're an instruction that terminates the path.
412	if (I->getOpcode() == Instruction::UIToFP \|\|
413	I->getOpcode() == Instruction::SIToFP) {
414	NewOperands.push_back(Elt: V);
415	} else if (Instruction *VI = dyn_cast<Instruction>(Val: V)) {
416	NewOperands.push_back(Elt: convert(I: VI, ToTy));
417	} else if (ConstantFP *CF = dyn_cast<ConstantFP>(Val: V)) {
418	APSInt Val(ToTy->getPrimitiveSizeInBits(), /isUnsigned=/false);
419	bool Exact;
420	CF->getValueAPF().convertToInteger(Result&: Val,
421	RM: APFloat::rmNearestTiesToEven,
422	IsExact: &Exact);
423	NewOperands.push_back(Elt: ConstantInt::get(Ty: ToTy, V: Val));
424	} else {
425	llvm_unreachable("Unhandled operand type?");
426	}
427	}
428
429	// Now create a new instruction.
430	IRBuilder<> IRB(I);
431	Value NewV = nullptr*;
432	switch (I->getOpcode()) {
433	default: llvm_unreachable("Unhandled instruction!");
434
435	case Instruction::FPToUI:
436	NewV = IRB.CreateZExtOrTrunc(V: NewOperands [`0`], DestTy: I->getType());
437	break;
438
439	case Instruction::FPToSI:
440	NewV = IRB.CreateSExtOrTrunc(V: NewOperands [`0`], DestTy: I->getType());
441	break;
442
443	case Instruction::FCmp: {
444	CmpInst::Predicate P = mapFCmpPred(P: cast<CmpInst>(Val: I)->getPredicate());
445	assert(P != CmpInst::BAD_ICMP_PREDICATE && "Unhandled predicate!");
446	NewV = IRB.CreateICmp(P, LHS: NewOperands [`0`], RHS: NewOperands [`1`], Name: I->getName());
447	break;
448	}
449
450	case Instruction::UIToFP:
451	NewV = IRB.CreateZExtOrTrunc(V: NewOperands [`0`], DestTy: ToTy);
452	break;
453
454	case Instruction::SIToFP:
455	NewV = IRB.CreateSExtOrTrunc(V: NewOperands [`0`], DestTy: ToTy);
456	break;
457
458	case Instruction::FNeg:
459	NewV = IRB.CreateNeg(V: NewOperands [`0`], Name: I->getName());
460	break;
461
462	case Instruction::FAdd:
463	case Instruction::FSub:
464	case Instruction::FMul:
465	NewV = IRB.CreateBinOp(Opc: mapBinOpcode(Opcode: I->getOpcode()),
466	LHS: NewOperands [`0`], RHS: NewOperands [`1`],
467	Name: I->getName());
468	break;
469	}
470
471	// If we're a root instruction, RAUW.
472	if (Roots.count(key: I))
473	I->replaceAllUsesWith(V: NewV);
474
475	ConvertedInsts [I] = NewV;
476	return NewV;
477	}
478
479	// Perform dead code elimination on the instructions we just modified.
480	void Float2IntPass::cleanup() {
481	for (auto &I : reverse(C&: ConvertedInsts))
482	I.first->eraseFromParent();
483	}
484
485	bool Float2IntPass::runImpl(Function &F, const DominatorTree &DT) {
486	LLVM_DEBUG(dbgs() << "F2I: Looking at function " << F.getName() << "\n");
487	// Clear out all state.
488	ECs = EquivalenceClasses<Instruction*>();
489	SeenInsts.clear();
490	ConvertedInsts.clear();
491	Roots.clear();
492
493	Ctx = &F.getParent()->getContext();
494
495	findRoots(F, DT);
496
497	walkBackwards();
498	walkForwards();
499
500	const DataLayout &DL = F.getParent()->getDataLayout();
501	bool Modified = validateAndTransform(DL);
502	if (Modified)
503	cleanup();
504	return Modified;
505	}
506
507	PreservedAnalyses Float2IntPass::run(Function &F, FunctionAnalysisManager &AM) {
508	const DominatorTree &DT = AM.getResult<DominatorTreeAnalysis>(IR&: F);
509	if (!runImpl(F, DT))
510	return PreservedAnalyses::all();
511
512	PreservedAnalyses PA;
513	PA.preserveSet<CFGAnalyses>();
514	return PA;
515	}
516

source code of llvm/lib/Transforms/Scalar/Float2Int.cpp