1 | //===- lib/CodeGen/GlobalISel/GISelKnownBits.cpp --------------*- C++ *-===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | // |
9 | /// Provides analysis for querying information about KnownBits during GISel |
10 | /// passes. |
11 | // |
12 | //===----------------------------------------------------------------------===// |
13 | #include "llvm/CodeGen/GlobalISel/GISelKnownBits.h" |
14 | #include "llvm/ADT/StringExtras.h" |
15 | #include "llvm/Analysis/ValueTracking.h" |
16 | #include "llvm/CodeGen/GlobalISel/Utils.h" |
17 | #include "llvm/CodeGen/MachineFrameInfo.h" |
18 | #include "llvm/CodeGen/MachineRegisterInfo.h" |
19 | #include "llvm/CodeGen/TargetLowering.h" |
20 | #include "llvm/CodeGen/TargetOpcodes.h" |
21 | #include "llvm/IR/Module.h" |
22 | #include "llvm/Target/TargetMachine.h" |
23 | |
24 | #define DEBUG_TYPE "gisel-known-bits" |
25 | |
26 | using namespace llvm; |
27 | |
28 | char llvm::GISelKnownBitsAnalysis::ID = 0; |
29 | |
30 | INITIALIZE_PASS(GISelKnownBitsAnalysis, DEBUG_TYPE, |
31 | "Analysis for ComputingKnownBits" , false, true) |
32 | |
33 | GISelKnownBits::GISelKnownBits(MachineFunction &MF, unsigned MaxDepth) |
34 | : MF(MF), MRI(MF.getRegInfo()), TL(*MF.getSubtarget().getTargetLowering()), |
35 | DL(MF.getFunction().getParent()->getDataLayout()), MaxDepth(MaxDepth) {} |
36 | |
37 | Align GISelKnownBits::computeKnownAlignment(Register R, unsigned Depth) { |
38 | const MachineInstr *MI = MRI.getVRegDef(Reg: R); |
39 | switch (MI->getOpcode()) { |
40 | case TargetOpcode::COPY: |
41 | return computeKnownAlignment(R: MI->getOperand(i: 1).getReg(), Depth); |
42 | case TargetOpcode::G_ASSERT_ALIGN: { |
43 | // TODO: Min with source |
44 | return Align(MI->getOperand(i: 2).getImm()); |
45 | } |
46 | case TargetOpcode::G_FRAME_INDEX: { |
47 | int FrameIdx = MI->getOperand(i: 1).getIndex(); |
48 | return MF.getFrameInfo().getObjectAlign(ObjectIdx: FrameIdx); |
49 | } |
50 | case TargetOpcode::G_INTRINSIC: |
51 | case TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS: |
52 | case TargetOpcode::G_INTRINSIC_CONVERGENT: |
53 | case TargetOpcode::G_INTRINSIC_CONVERGENT_W_SIDE_EFFECTS: |
54 | default: |
55 | return TL.computeKnownAlignForTargetInstr(Analysis&: *this, R, MRI, Depth: Depth + 1); |
56 | } |
57 | } |
58 | |
59 | KnownBits GISelKnownBits::getKnownBits(MachineInstr &MI) { |
60 | assert(MI.getNumExplicitDefs() == 1 && |
61 | "expected single return generic instruction" ); |
62 | return getKnownBits(R: MI.getOperand(i: 0).getReg()); |
63 | } |
64 | |
65 | KnownBits GISelKnownBits::getKnownBits(Register R) { |
66 | const LLT Ty = MRI.getType(Reg: R); |
67 | APInt DemandedElts = |
68 | Ty.isVector() ? APInt::getAllOnes(numBits: Ty.getNumElements()) : APInt(1, 1); |
69 | return getKnownBits(R, DemandedElts); |
70 | } |
71 | |
72 | KnownBits GISelKnownBits::getKnownBits(Register R, const APInt &DemandedElts, |
73 | unsigned Depth) { |
74 | // For now, we only maintain the cache during one request. |
75 | assert(ComputeKnownBitsCache.empty() && "Cache should have been cleared" ); |
76 | |
77 | KnownBits Known; |
78 | computeKnownBitsImpl(R, Known, DemandedElts, Depth); |
79 | ComputeKnownBitsCache.clear(); |
80 | return Known; |
81 | } |
82 | |
83 | bool GISelKnownBits::signBitIsZero(Register R) { |
84 | LLT Ty = MRI.getType(Reg: R); |
85 | unsigned BitWidth = Ty.getScalarSizeInBits(); |
86 | return maskedValueIsZero(Val: R, Mask: APInt::getSignMask(BitWidth)); |
87 | } |
88 | |
89 | APInt GISelKnownBits::getKnownZeroes(Register R) { |
90 | return getKnownBits(R).Zero; |
91 | } |
92 | |
93 | APInt GISelKnownBits::getKnownOnes(Register R) { return getKnownBits(R).One; } |
94 | |
95 | LLVM_ATTRIBUTE_UNUSED static void |
96 | dumpResult(const MachineInstr &MI, const KnownBits &Known, unsigned Depth) { |
97 | dbgs() << "[" << Depth << "] Compute known bits: " << MI << "[" << Depth |
98 | << "] Computed for: " << MI << "[" << Depth << "] Known: 0x" |
99 | << toString(I: Known.Zero | Known.One, Radix: 16, Signed: false) << "\n" |
100 | << "[" << Depth << "] Zero: 0x" << toString(I: Known.Zero, Radix: 16, Signed: false) |
101 | << "\n" |
102 | << "[" << Depth << "] One: 0x" << toString(I: Known.One, Radix: 16, Signed: false) |
103 | << "\n" ; |
104 | } |
105 | |
106 | /// Compute known bits for the intersection of \p Src0 and \p Src1 |
107 | void GISelKnownBits::computeKnownBitsMin(Register Src0, Register Src1, |
108 | KnownBits &Known, |
109 | const APInt &DemandedElts, |
110 | unsigned Depth) { |
111 | // Test src1 first, since we canonicalize simpler expressions to the RHS. |
112 | computeKnownBitsImpl(R: Src1, Known, DemandedElts, Depth); |
113 | |
114 | // If we don't know any bits, early out. |
115 | if (Known.isUnknown()) |
116 | return; |
117 | |
118 | KnownBits Known2; |
119 | computeKnownBitsImpl(R: Src0, Known&: Known2, DemandedElts, Depth); |
120 | |
121 | // Only known if known in both the LHS and RHS. |
122 | Known = Known.intersectWith(RHS: Known2); |
123 | } |
124 | |
125 | // Bitfield extract is computed as (Src >> Offset) & Mask, where Mask is |
126 | // created using Width. Use this function when the inputs are KnownBits |
127 | // objects. TODO: Move this KnownBits.h if this is usable in more cases. |
128 | static KnownBits (unsigned BitWidth, const KnownBits &SrcOpKnown, |
129 | const KnownBits &OffsetKnown, |
130 | const KnownBits &WidthKnown) { |
131 | KnownBits Mask(BitWidth); |
132 | Mask.Zero = APInt::getBitsSetFrom( |
133 | numBits: BitWidth, loBit: WidthKnown.getMaxValue().getLimitedValue(Limit: BitWidth)); |
134 | Mask.One = APInt::getLowBitsSet( |
135 | numBits: BitWidth, loBitsSet: WidthKnown.getMinValue().getLimitedValue(Limit: BitWidth)); |
136 | return KnownBits::lshr(LHS: SrcOpKnown, RHS: OffsetKnown) & Mask; |
137 | } |
138 | |
139 | void GISelKnownBits::computeKnownBitsImpl(Register R, KnownBits &Known, |
140 | const APInt &DemandedElts, |
141 | unsigned Depth) { |
142 | MachineInstr &MI = *MRI.getVRegDef(Reg: R); |
143 | unsigned Opcode = MI.getOpcode(); |
144 | LLT DstTy = MRI.getType(Reg: R); |
145 | |
146 | // Handle the case where this is called on a register that does not have a |
147 | // type constraint (i.e. it has a register class constraint instead). This is |
148 | // unlikely to occur except by looking through copies but it is possible for |
149 | // the initial register being queried to be in this state. |
150 | if (!DstTy.isValid()) { |
151 | Known = KnownBits(); |
152 | return; |
153 | } |
154 | |
155 | unsigned BitWidth = DstTy.getScalarSizeInBits(); |
156 | auto CacheEntry = ComputeKnownBitsCache.find(Val: R); |
157 | if (CacheEntry != ComputeKnownBitsCache.end()) { |
158 | Known = CacheEntry->second; |
159 | LLVM_DEBUG(dbgs() << "Cache hit at " ); |
160 | LLVM_DEBUG(dumpResult(MI, Known, Depth)); |
161 | assert(Known.getBitWidth() == BitWidth && "Cache entry size doesn't match" ); |
162 | return; |
163 | } |
164 | Known = KnownBits(BitWidth); // Don't know anything |
165 | |
166 | // Depth may get bigger than max depth if it gets passed to a different |
167 | // GISelKnownBits object. |
168 | // This may happen when say a generic part uses a GISelKnownBits object |
169 | // with some max depth, but then we hit TL.computeKnownBitsForTargetInstr |
170 | // which creates a new GISelKnownBits object with a different and smaller |
171 | // depth. If we just check for equality, we would never exit if the depth |
172 | // that is passed down to the target specific GISelKnownBits object is |
173 | // already bigger than its max depth. |
174 | if (Depth >= getMaxDepth()) |
175 | return; |
176 | |
177 | if (!DemandedElts) |
178 | return; // No demanded elts, better to assume we don't know anything. |
179 | |
180 | KnownBits Known2; |
181 | |
182 | switch (Opcode) { |
183 | default: |
184 | TL.computeKnownBitsForTargetInstr(Analysis&: *this, R, Known, DemandedElts, MRI, |
185 | Depth); |
186 | break; |
187 | case TargetOpcode::G_BUILD_VECTOR: { |
188 | // Collect the known bits that are shared by every demanded vector element. |
189 | Known.Zero.setAllBits(); Known.One.setAllBits(); |
190 | for (unsigned i = 0, e = MI.getNumOperands() - 1; i < e; ++i) { |
191 | if (!DemandedElts[i]) |
192 | continue; |
193 | |
194 | computeKnownBitsImpl(R: MI.getOperand(i: i + 1).getReg(), Known&: Known2, DemandedElts, |
195 | Depth: Depth + 1); |
196 | |
197 | // Known bits are the values that are shared by every demanded element. |
198 | Known = Known.intersectWith(RHS: Known2); |
199 | |
200 | // If we don't know any bits, early out. |
201 | if (Known.isUnknown()) |
202 | break; |
203 | } |
204 | break; |
205 | } |
206 | case TargetOpcode::COPY: |
207 | case TargetOpcode::G_PHI: |
208 | case TargetOpcode::PHI: { |
209 | Known.One = APInt::getAllOnes(numBits: BitWidth); |
210 | Known.Zero = APInt::getAllOnes(numBits: BitWidth); |
211 | // Destination registers should not have subregisters at this |
212 | // point of the pipeline, otherwise the main live-range will be |
213 | // defined more than once, which is against SSA. |
214 | assert(MI.getOperand(0).getSubReg() == 0 && "Is this code in SSA?" ); |
215 | // Record in the cache that we know nothing for MI. |
216 | // This will get updated later and in the meantime, if we reach that |
217 | // phi again, because of a loop, we will cut the search thanks to this |
218 | // cache entry. |
219 | // We could actually build up more information on the phi by not cutting |
220 | // the search, but that additional information is more a side effect |
221 | // than an intended choice. |
222 | // Therefore, for now, save on compile time until we derive a proper way |
223 | // to derive known bits for PHIs within loops. |
224 | ComputeKnownBitsCache[R] = KnownBits(BitWidth); |
225 | // PHI's operand are a mix of registers and basic blocks interleaved. |
226 | // We only care about the register ones. |
227 | for (unsigned Idx = 1; Idx < MI.getNumOperands(); Idx += 2) { |
228 | const MachineOperand &Src = MI.getOperand(i: Idx); |
229 | Register SrcReg = Src.getReg(); |
230 | // Look through trivial copies and phis but don't look through trivial |
231 | // copies or phis of the form `%1:(s32) = OP %0:gpr32`, known-bits |
232 | // analysis is currently unable to determine the bit width of a |
233 | // register class. |
234 | // |
235 | // We can't use NoSubRegister by name as it's defined by each target but |
236 | // it's always defined to be 0 by tablegen. |
237 | if (SrcReg.isVirtual() && Src.getSubReg() == 0 /*NoSubRegister*/ && |
238 | MRI.getType(Reg: SrcReg).isValid()) { |
239 | // For COPYs we don't do anything, don't increase the depth. |
240 | computeKnownBitsImpl(R: SrcReg, Known&: Known2, DemandedElts, |
241 | Depth: Depth + (Opcode != TargetOpcode::COPY)); |
242 | Known = Known.intersectWith(RHS: Known2); |
243 | // If we reach a point where we don't know anything |
244 | // just stop looking through the operands. |
245 | if (Known.isUnknown()) |
246 | break; |
247 | } else { |
248 | // We know nothing. |
249 | Known = KnownBits(BitWidth); |
250 | break; |
251 | } |
252 | } |
253 | break; |
254 | } |
255 | case TargetOpcode::G_CONSTANT: { |
256 | auto CstVal = getIConstantVRegVal(VReg: R, MRI); |
257 | if (!CstVal) |
258 | break; |
259 | Known = KnownBits::makeConstant(C: *CstVal); |
260 | break; |
261 | } |
262 | case TargetOpcode::G_FRAME_INDEX: { |
263 | int FrameIdx = MI.getOperand(i: 1).getIndex(); |
264 | TL.computeKnownBitsForFrameIndex(FIOp: FrameIdx, Known, MF); |
265 | break; |
266 | } |
267 | case TargetOpcode::G_SUB: { |
268 | computeKnownBitsImpl(R: MI.getOperand(i: 1).getReg(), Known, DemandedElts, |
269 | Depth: Depth + 1); |
270 | computeKnownBitsImpl(R: MI.getOperand(i: 2).getReg(), Known&: Known2, DemandedElts, |
271 | Depth: Depth + 1); |
272 | Known = KnownBits::computeForAddSub(/*Add=*/false, /*NSW=*/false, |
273 | /* NUW=*/false, LHS: Known, RHS: Known2); |
274 | break; |
275 | } |
276 | case TargetOpcode::G_XOR: { |
277 | computeKnownBitsImpl(R: MI.getOperand(i: 2).getReg(), Known, DemandedElts, |
278 | Depth: Depth + 1); |
279 | computeKnownBitsImpl(R: MI.getOperand(i: 1).getReg(), Known&: Known2, DemandedElts, |
280 | Depth: Depth + 1); |
281 | |
282 | Known ^= Known2; |
283 | break; |
284 | } |
285 | case TargetOpcode::G_PTR_ADD: { |
286 | if (DstTy.isVector()) |
287 | break; |
288 | // G_PTR_ADD is like G_ADD. FIXME: Is this true for all targets? |
289 | LLT Ty = MRI.getType(Reg: MI.getOperand(i: 1).getReg()); |
290 | if (DL.isNonIntegralAddressSpace(AddrSpace: Ty.getAddressSpace())) |
291 | break; |
292 | [[fallthrough]]; |
293 | } |
294 | case TargetOpcode::G_ADD: { |
295 | computeKnownBitsImpl(R: MI.getOperand(i: 1).getReg(), Known, DemandedElts, |
296 | Depth: Depth + 1); |
297 | computeKnownBitsImpl(R: MI.getOperand(i: 2).getReg(), Known&: Known2, DemandedElts, |
298 | Depth: Depth + 1); |
299 | Known = KnownBits::computeForAddSub(/*Add=*/true, /*NSW=*/false, |
300 | /* NUW=*/false, LHS: Known, RHS: Known2); |
301 | break; |
302 | } |
303 | case TargetOpcode::G_AND: { |
304 | // If either the LHS or the RHS are Zero, the result is zero. |
305 | computeKnownBitsImpl(R: MI.getOperand(i: 2).getReg(), Known, DemandedElts, |
306 | Depth: Depth + 1); |
307 | computeKnownBitsImpl(R: MI.getOperand(i: 1).getReg(), Known&: Known2, DemandedElts, |
308 | Depth: Depth + 1); |
309 | |
310 | Known &= Known2; |
311 | break; |
312 | } |
313 | case TargetOpcode::G_OR: { |
314 | // If either the LHS or the RHS are Zero, the result is zero. |
315 | computeKnownBitsImpl(R: MI.getOperand(i: 2).getReg(), Known, DemandedElts, |
316 | Depth: Depth + 1); |
317 | computeKnownBitsImpl(R: MI.getOperand(i: 1).getReg(), Known&: Known2, DemandedElts, |
318 | Depth: Depth + 1); |
319 | |
320 | Known |= Known2; |
321 | break; |
322 | } |
323 | case TargetOpcode::G_MUL: { |
324 | computeKnownBitsImpl(R: MI.getOperand(i: 2).getReg(), Known, DemandedElts, |
325 | Depth: Depth + 1); |
326 | computeKnownBitsImpl(R: MI.getOperand(i: 1).getReg(), Known&: Known2, DemandedElts, |
327 | Depth: Depth + 1); |
328 | Known = KnownBits::mul(LHS: Known, RHS: Known2); |
329 | break; |
330 | } |
331 | case TargetOpcode::G_SELECT: { |
332 | computeKnownBitsMin(Src0: MI.getOperand(i: 2).getReg(), Src1: MI.getOperand(i: 3).getReg(), |
333 | Known, DemandedElts, Depth: Depth + 1); |
334 | break; |
335 | } |
336 | case TargetOpcode::G_SMIN: { |
337 | // TODO: Handle clamp pattern with number of sign bits |
338 | KnownBits KnownRHS; |
339 | computeKnownBitsImpl(R: MI.getOperand(i: 1).getReg(), Known, DemandedElts, |
340 | Depth: Depth + 1); |
341 | computeKnownBitsImpl(R: MI.getOperand(i: 2).getReg(), Known&: KnownRHS, DemandedElts, |
342 | Depth: Depth + 1); |
343 | Known = KnownBits::smin(LHS: Known, RHS: KnownRHS); |
344 | break; |
345 | } |
346 | case TargetOpcode::G_SMAX: { |
347 | // TODO: Handle clamp pattern with number of sign bits |
348 | KnownBits KnownRHS; |
349 | computeKnownBitsImpl(R: MI.getOperand(i: 1).getReg(), Known, DemandedElts, |
350 | Depth: Depth + 1); |
351 | computeKnownBitsImpl(R: MI.getOperand(i: 2).getReg(), Known&: KnownRHS, DemandedElts, |
352 | Depth: Depth + 1); |
353 | Known = KnownBits::smax(LHS: Known, RHS: KnownRHS); |
354 | break; |
355 | } |
356 | case TargetOpcode::G_UMIN: { |
357 | KnownBits KnownRHS; |
358 | computeKnownBitsImpl(R: MI.getOperand(i: 1).getReg(), Known, |
359 | DemandedElts, Depth: Depth + 1); |
360 | computeKnownBitsImpl(R: MI.getOperand(i: 2).getReg(), Known&: KnownRHS, |
361 | DemandedElts, Depth: Depth + 1); |
362 | Known = KnownBits::umin(LHS: Known, RHS: KnownRHS); |
363 | break; |
364 | } |
365 | case TargetOpcode::G_UMAX: { |
366 | KnownBits KnownRHS; |
367 | computeKnownBitsImpl(R: MI.getOperand(i: 1).getReg(), Known, |
368 | DemandedElts, Depth: Depth + 1); |
369 | computeKnownBitsImpl(R: MI.getOperand(i: 2).getReg(), Known&: KnownRHS, |
370 | DemandedElts, Depth: Depth + 1); |
371 | Known = KnownBits::umax(LHS: Known, RHS: KnownRHS); |
372 | break; |
373 | } |
374 | case TargetOpcode::G_FCMP: |
375 | case TargetOpcode::G_ICMP: { |
376 | if (DstTy.isVector()) |
377 | break; |
378 | if (TL.getBooleanContents(isVec: DstTy.isVector(), |
379 | isFloat: Opcode == TargetOpcode::G_FCMP) == |
380 | TargetLowering::ZeroOrOneBooleanContent && |
381 | BitWidth > 1) |
382 | Known.Zero.setBitsFrom(1); |
383 | break; |
384 | } |
385 | case TargetOpcode::G_SEXT: { |
386 | computeKnownBitsImpl(R: MI.getOperand(i: 1).getReg(), Known, DemandedElts, |
387 | Depth: Depth + 1); |
388 | // If the sign bit is known to be zero or one, then sext will extend |
389 | // it to the top bits, else it will just zext. |
390 | Known = Known.sext(BitWidth); |
391 | break; |
392 | } |
393 | case TargetOpcode::G_ASSERT_SEXT: |
394 | case TargetOpcode::G_SEXT_INREG: { |
395 | computeKnownBitsImpl(R: MI.getOperand(i: 1).getReg(), Known, DemandedElts, |
396 | Depth: Depth + 1); |
397 | Known = Known.sextInReg(SrcBitWidth: MI.getOperand(i: 2).getImm()); |
398 | break; |
399 | } |
400 | case TargetOpcode::G_ANYEXT: { |
401 | computeKnownBitsImpl(R: MI.getOperand(i: 1).getReg(), Known, DemandedElts, |
402 | Depth: Depth + 1); |
403 | Known = Known.anyext(BitWidth); |
404 | break; |
405 | } |
406 | case TargetOpcode::G_LOAD: { |
407 | const MachineMemOperand *MMO = *MI.memoperands_begin(); |
408 | KnownBits KnownRange(MMO->getMemoryType().getScalarSizeInBits()); |
409 | if (const MDNode *Ranges = MMO->getRanges()) |
410 | computeKnownBitsFromRangeMetadata(Ranges: *Ranges, Known&: KnownRange); |
411 | Known = KnownRange.anyext(BitWidth: Known.getBitWidth()); |
412 | break; |
413 | } |
414 | case TargetOpcode::G_SEXTLOAD: |
415 | case TargetOpcode::G_ZEXTLOAD: { |
416 | if (DstTy.isVector()) |
417 | break; |
418 | const MachineMemOperand *MMO = *MI.memoperands_begin(); |
419 | KnownBits KnownRange(MMO->getMemoryType().getScalarSizeInBits()); |
420 | if (const MDNode *Ranges = MMO->getRanges()) |
421 | computeKnownBitsFromRangeMetadata(Ranges: *Ranges, Known&: KnownRange); |
422 | Known = Opcode == TargetOpcode::G_SEXTLOAD |
423 | ? KnownRange.sext(BitWidth: Known.getBitWidth()) |
424 | : KnownRange.zext(BitWidth: Known.getBitWidth()); |
425 | break; |
426 | } |
427 | case TargetOpcode::G_ASHR: { |
428 | KnownBits LHSKnown, RHSKnown; |
429 | computeKnownBitsImpl(R: MI.getOperand(i: 1).getReg(), Known&: LHSKnown, DemandedElts, |
430 | Depth: Depth + 1); |
431 | computeKnownBitsImpl(R: MI.getOperand(i: 2).getReg(), Known&: RHSKnown, DemandedElts, |
432 | Depth: Depth + 1); |
433 | Known = KnownBits::ashr(LHS: LHSKnown, RHS: RHSKnown); |
434 | break; |
435 | } |
436 | case TargetOpcode::G_LSHR: { |
437 | KnownBits LHSKnown, RHSKnown; |
438 | computeKnownBitsImpl(R: MI.getOperand(i: 1).getReg(), Known&: LHSKnown, DemandedElts, |
439 | Depth: Depth + 1); |
440 | computeKnownBitsImpl(R: MI.getOperand(i: 2).getReg(), Known&: RHSKnown, DemandedElts, |
441 | Depth: Depth + 1); |
442 | Known = KnownBits::lshr(LHS: LHSKnown, RHS: RHSKnown); |
443 | break; |
444 | } |
445 | case TargetOpcode::G_SHL: { |
446 | KnownBits LHSKnown, RHSKnown; |
447 | computeKnownBitsImpl(R: MI.getOperand(i: 1).getReg(), Known&: LHSKnown, DemandedElts, |
448 | Depth: Depth + 1); |
449 | computeKnownBitsImpl(R: MI.getOperand(i: 2).getReg(), Known&: RHSKnown, DemandedElts, |
450 | Depth: Depth + 1); |
451 | Known = KnownBits::shl(LHS: LHSKnown, RHS: RHSKnown); |
452 | break; |
453 | } |
454 | case TargetOpcode::G_INTTOPTR: |
455 | case TargetOpcode::G_PTRTOINT: |
456 | if (DstTy.isVector()) |
457 | break; |
458 | // Fall through and handle them the same as zext/trunc. |
459 | [[fallthrough]]; |
460 | case TargetOpcode::G_ASSERT_ZEXT: |
461 | case TargetOpcode::G_ZEXT: |
462 | case TargetOpcode::G_TRUNC: { |
463 | Register SrcReg = MI.getOperand(i: 1).getReg(); |
464 | LLT SrcTy = MRI.getType(Reg: SrcReg); |
465 | unsigned SrcBitWidth; |
466 | |
467 | // G_ASSERT_ZEXT stores the original bitwidth in the immediate operand. |
468 | if (Opcode == TargetOpcode::G_ASSERT_ZEXT) |
469 | SrcBitWidth = MI.getOperand(i: 2).getImm(); |
470 | else { |
471 | SrcBitWidth = SrcTy.isPointer() |
472 | ? DL.getIndexSizeInBits(AS: SrcTy.getAddressSpace()) |
473 | : SrcTy.getSizeInBits(); |
474 | } |
475 | assert(SrcBitWidth && "SrcBitWidth can't be zero" ); |
476 | Known = Known.zextOrTrunc(BitWidth: SrcBitWidth); |
477 | computeKnownBitsImpl(R: SrcReg, Known, DemandedElts, Depth: Depth + 1); |
478 | Known = Known.zextOrTrunc(BitWidth); |
479 | if (BitWidth > SrcBitWidth) |
480 | Known.Zero.setBitsFrom(SrcBitWidth); |
481 | break; |
482 | } |
483 | case TargetOpcode::G_ASSERT_ALIGN: { |
484 | int64_t LogOfAlign = Log2_64(Value: MI.getOperand(i: 2).getImm()); |
485 | |
486 | // TODO: Should use maximum with source |
487 | // If a node is guaranteed to be aligned, set low zero bits accordingly as |
488 | // well as clearing one bits. |
489 | Known.Zero.setLowBits(LogOfAlign); |
490 | Known.One.clearLowBits(loBits: LogOfAlign); |
491 | break; |
492 | } |
493 | case TargetOpcode::G_MERGE_VALUES: { |
494 | unsigned NumOps = MI.getNumOperands(); |
495 | unsigned OpSize = MRI.getType(Reg: MI.getOperand(i: 1).getReg()).getSizeInBits(); |
496 | |
497 | for (unsigned I = 0; I != NumOps - 1; ++I) { |
498 | KnownBits SrcOpKnown; |
499 | computeKnownBitsImpl(R: MI.getOperand(i: I + 1).getReg(), Known&: SrcOpKnown, |
500 | DemandedElts, Depth: Depth + 1); |
501 | Known.insertBits(SubBits: SrcOpKnown, BitPosition: I * OpSize); |
502 | } |
503 | break; |
504 | } |
505 | case TargetOpcode::G_UNMERGE_VALUES: { |
506 | if (DstTy.isVector()) |
507 | break; |
508 | unsigned NumOps = MI.getNumOperands(); |
509 | Register SrcReg = MI.getOperand(i: NumOps - 1).getReg(); |
510 | if (MRI.getType(Reg: SrcReg).isVector()) |
511 | return; // TODO: Handle vectors. |
512 | |
513 | KnownBits SrcOpKnown; |
514 | computeKnownBitsImpl(R: SrcReg, Known&: SrcOpKnown, DemandedElts, Depth: Depth + 1); |
515 | |
516 | // Figure out the result operand index |
517 | unsigned DstIdx = 0; |
518 | for (; DstIdx != NumOps - 1 && MI.getOperand(i: DstIdx).getReg() != R; |
519 | ++DstIdx) |
520 | ; |
521 | |
522 | Known = SrcOpKnown.extractBits(NumBits: BitWidth, BitPosition: BitWidth * DstIdx); |
523 | break; |
524 | } |
525 | case TargetOpcode::G_BSWAP: { |
526 | Register SrcReg = MI.getOperand(i: 1).getReg(); |
527 | computeKnownBitsImpl(R: SrcReg, Known, DemandedElts, Depth: Depth + 1); |
528 | Known = Known.byteSwap(); |
529 | break; |
530 | } |
531 | case TargetOpcode::G_BITREVERSE: { |
532 | Register SrcReg = MI.getOperand(i: 1).getReg(); |
533 | computeKnownBitsImpl(R: SrcReg, Known, DemandedElts, Depth: Depth + 1); |
534 | Known = Known.reverseBits(); |
535 | break; |
536 | } |
537 | case TargetOpcode::G_CTPOP: { |
538 | computeKnownBitsImpl(R: MI.getOperand(i: 1).getReg(), Known&: Known2, DemandedElts, |
539 | Depth: Depth + 1); |
540 | // We can bound the space the count needs. Also, bits known to be zero can't |
541 | // contribute to the population. |
542 | unsigned BitsPossiblySet = Known2.countMaxPopulation(); |
543 | unsigned LowBits = llvm::bit_width(Value: BitsPossiblySet); |
544 | Known.Zero.setBitsFrom(LowBits); |
545 | // TODO: we could bound Known.One using the lower bound on the number of |
546 | // bits which might be set provided by popcnt KnownOne2. |
547 | break; |
548 | } |
549 | case TargetOpcode::G_UBFX: { |
550 | KnownBits SrcOpKnown, OffsetKnown, WidthKnown; |
551 | computeKnownBitsImpl(R: MI.getOperand(i: 1).getReg(), Known&: SrcOpKnown, DemandedElts, |
552 | Depth: Depth + 1); |
553 | computeKnownBitsImpl(R: MI.getOperand(i: 2).getReg(), Known&: OffsetKnown, DemandedElts, |
554 | Depth: Depth + 1); |
555 | computeKnownBitsImpl(R: MI.getOperand(i: 3).getReg(), Known&: WidthKnown, DemandedElts, |
556 | Depth: Depth + 1); |
557 | Known = extractBits(BitWidth, SrcOpKnown, OffsetKnown, WidthKnown); |
558 | break; |
559 | } |
560 | case TargetOpcode::G_SBFX: { |
561 | KnownBits SrcOpKnown, OffsetKnown, WidthKnown; |
562 | computeKnownBitsImpl(R: MI.getOperand(i: 1).getReg(), Known&: SrcOpKnown, DemandedElts, |
563 | Depth: Depth + 1); |
564 | computeKnownBitsImpl(R: MI.getOperand(i: 2).getReg(), Known&: OffsetKnown, DemandedElts, |
565 | Depth: Depth + 1); |
566 | computeKnownBitsImpl(R: MI.getOperand(i: 3).getReg(), Known&: WidthKnown, DemandedElts, |
567 | Depth: Depth + 1); |
568 | Known = extractBits(BitWidth, SrcOpKnown, OffsetKnown, WidthKnown); |
569 | // Sign extend the extracted value using shift left and arithmetic shift |
570 | // right. |
571 | KnownBits ExtKnown = KnownBits::makeConstant(C: APInt(BitWidth, BitWidth)); |
572 | KnownBits ShiftKnown = KnownBits::computeForAddSub( |
573 | /*Add=*/false, /*NSW=*/false, /* NUW=*/false, LHS: ExtKnown, RHS: WidthKnown); |
574 | Known = KnownBits::ashr(LHS: KnownBits::shl(LHS: Known, RHS: ShiftKnown), RHS: ShiftKnown); |
575 | break; |
576 | } |
577 | case TargetOpcode::G_UADDO: |
578 | case TargetOpcode::G_UADDE: |
579 | case TargetOpcode::G_SADDO: |
580 | case TargetOpcode::G_SADDE: |
581 | case TargetOpcode::G_USUBO: |
582 | case TargetOpcode::G_USUBE: |
583 | case TargetOpcode::G_SSUBO: |
584 | case TargetOpcode::G_SSUBE: |
585 | case TargetOpcode::G_UMULO: |
586 | case TargetOpcode::G_SMULO: { |
587 | if (MI.getOperand(i: 1).getReg() == R) { |
588 | // If we know the result of a compare has the top bits zero, use this |
589 | // info. |
590 | if (TL.getBooleanContents(isVec: DstTy.isVector(), isFloat: false) == |
591 | TargetLowering::ZeroOrOneBooleanContent && |
592 | BitWidth > 1) |
593 | Known.Zero.setBitsFrom(1); |
594 | } |
595 | break; |
596 | } |
597 | case TargetOpcode::G_CTLZ: |
598 | case TargetOpcode::G_CTLZ_ZERO_UNDEF: { |
599 | KnownBits SrcOpKnown; |
600 | computeKnownBitsImpl(R: MI.getOperand(i: 1).getReg(), Known&: SrcOpKnown, DemandedElts, |
601 | Depth: Depth + 1); |
602 | // If we have a known 1, its position is our upper bound. |
603 | unsigned PossibleLZ = SrcOpKnown.countMaxLeadingZeros(); |
604 | unsigned LowBits = llvm::bit_width(Value: PossibleLZ); |
605 | Known.Zero.setBitsFrom(LowBits); |
606 | break; |
607 | } |
608 | } |
609 | |
610 | assert(!Known.hasConflict() && "Bits known to be one AND zero?" ); |
611 | LLVM_DEBUG(dumpResult(MI, Known, Depth)); |
612 | |
613 | // Update the cache. |
614 | ComputeKnownBitsCache[R] = Known; |
615 | } |
616 | |
617 | /// Compute number of sign bits for the intersection of \p Src0 and \p Src1 |
618 | unsigned GISelKnownBits::computeNumSignBitsMin(Register Src0, Register Src1, |
619 | const APInt &DemandedElts, |
620 | unsigned Depth) { |
621 | // Test src1 first, since we canonicalize simpler expressions to the RHS. |
622 | unsigned Src1SignBits = computeNumSignBits(R: Src1, DemandedElts, Depth); |
623 | if (Src1SignBits == 1) |
624 | return 1; |
625 | return std::min(a: computeNumSignBits(R: Src0, DemandedElts, Depth), b: Src1SignBits); |
626 | } |
627 | |
628 | unsigned GISelKnownBits::computeNumSignBits(Register R, |
629 | const APInt &DemandedElts, |
630 | unsigned Depth) { |
631 | MachineInstr &MI = *MRI.getVRegDef(Reg: R); |
632 | unsigned Opcode = MI.getOpcode(); |
633 | |
634 | if (Opcode == TargetOpcode::G_CONSTANT) |
635 | return MI.getOperand(i: 1).getCImm()->getValue().getNumSignBits(); |
636 | |
637 | if (Depth == getMaxDepth()) |
638 | return 1; |
639 | |
640 | if (!DemandedElts) |
641 | return 1; // No demanded elts, better to assume we don't know anything. |
642 | |
643 | LLT DstTy = MRI.getType(Reg: R); |
644 | const unsigned TyBits = DstTy.getScalarSizeInBits(); |
645 | |
646 | // Handle the case where this is called on a register that does not have a |
647 | // type constraint. This is unlikely to occur except by looking through copies |
648 | // but it is possible for the initial register being queried to be in this |
649 | // state. |
650 | if (!DstTy.isValid()) |
651 | return 1; |
652 | |
653 | unsigned FirstAnswer = 1; |
654 | switch (Opcode) { |
655 | case TargetOpcode::COPY: { |
656 | MachineOperand &Src = MI.getOperand(i: 1); |
657 | if (Src.getReg().isVirtual() && Src.getSubReg() == 0 && |
658 | MRI.getType(Reg: Src.getReg()).isValid()) { |
659 | // Don't increment Depth for this one since we didn't do any work. |
660 | return computeNumSignBits(R: Src.getReg(), DemandedElts, Depth); |
661 | } |
662 | |
663 | return 1; |
664 | } |
665 | case TargetOpcode::G_SEXT: { |
666 | Register Src = MI.getOperand(i: 1).getReg(); |
667 | LLT SrcTy = MRI.getType(Reg: Src); |
668 | unsigned Tmp = DstTy.getScalarSizeInBits() - SrcTy.getScalarSizeInBits(); |
669 | return computeNumSignBits(R: Src, DemandedElts, Depth: Depth + 1) + Tmp; |
670 | } |
671 | case TargetOpcode::G_ASSERT_SEXT: |
672 | case TargetOpcode::G_SEXT_INREG: { |
673 | // Max of the input and what this extends. |
674 | Register Src = MI.getOperand(i: 1).getReg(); |
675 | unsigned SrcBits = MI.getOperand(i: 2).getImm(); |
676 | unsigned InRegBits = TyBits - SrcBits + 1; |
677 | return std::max(a: computeNumSignBits(R: Src, DemandedElts, Depth: Depth + 1), b: InRegBits); |
678 | } |
679 | case TargetOpcode::G_SEXTLOAD: { |
680 | // FIXME: We need an in-memory type representation. |
681 | if (DstTy.isVector()) |
682 | return 1; |
683 | |
684 | // e.g. i16->i32 = '17' bits known. |
685 | const MachineMemOperand *MMO = *MI.memoperands_begin(); |
686 | return TyBits - MMO->getSizeInBits().getValue() + 1; |
687 | } |
688 | case TargetOpcode::G_ZEXTLOAD: { |
689 | // FIXME: We need an in-memory type representation. |
690 | if (DstTy.isVector()) |
691 | return 1; |
692 | |
693 | // e.g. i16->i32 = '16' bits known. |
694 | const MachineMemOperand *MMO = *MI.memoperands_begin(); |
695 | return TyBits - MMO->getSizeInBits().getValue(); |
696 | } |
697 | case TargetOpcode::G_TRUNC: { |
698 | Register Src = MI.getOperand(i: 1).getReg(); |
699 | LLT SrcTy = MRI.getType(Reg: Src); |
700 | |
701 | // Check if the sign bits of source go down as far as the truncated value. |
702 | unsigned DstTyBits = DstTy.getScalarSizeInBits(); |
703 | unsigned NumSrcBits = SrcTy.getScalarSizeInBits(); |
704 | unsigned NumSrcSignBits = computeNumSignBits(R: Src, DemandedElts, Depth: Depth + 1); |
705 | if (NumSrcSignBits > (NumSrcBits - DstTyBits)) |
706 | return NumSrcSignBits - (NumSrcBits - DstTyBits); |
707 | break; |
708 | } |
709 | case TargetOpcode::G_SELECT: { |
710 | return computeNumSignBitsMin(Src0: MI.getOperand(i: 2).getReg(), |
711 | Src1: MI.getOperand(i: 3).getReg(), DemandedElts, |
712 | Depth: Depth + 1); |
713 | } |
714 | case TargetOpcode::G_SADDO: |
715 | case TargetOpcode::G_SADDE: |
716 | case TargetOpcode::G_UADDO: |
717 | case TargetOpcode::G_UADDE: |
718 | case TargetOpcode::G_SSUBO: |
719 | case TargetOpcode::G_SSUBE: |
720 | case TargetOpcode::G_USUBO: |
721 | case TargetOpcode::G_USUBE: |
722 | case TargetOpcode::G_SMULO: |
723 | case TargetOpcode::G_UMULO: { |
724 | // If compares returns 0/-1, all bits are sign bits. |
725 | // We know that we have an integer-based boolean since these operations |
726 | // are only available for integer. |
727 | if (MI.getOperand(i: 1).getReg() == R) { |
728 | if (TL.getBooleanContents(isVec: DstTy.isVector(), isFloat: false) == |
729 | TargetLowering::ZeroOrNegativeOneBooleanContent) |
730 | return TyBits; |
731 | } |
732 | |
733 | break; |
734 | } |
735 | case TargetOpcode::G_FCMP: |
736 | case TargetOpcode::G_ICMP: { |
737 | bool IsFP = Opcode == TargetOpcode::G_FCMP; |
738 | if (TyBits == 1) |
739 | break; |
740 | auto BC = TL.getBooleanContents(isVec: DstTy.isVector(), isFloat: IsFP); |
741 | if (BC == TargetLoweringBase::ZeroOrNegativeOneBooleanContent) |
742 | return TyBits; // All bits are sign bits. |
743 | if (BC == TargetLowering::ZeroOrOneBooleanContent) |
744 | return TyBits - 1; // Every always-zero bit is a sign bit. |
745 | break; |
746 | } |
747 | case TargetOpcode::G_INTRINSIC: |
748 | case TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS: |
749 | case TargetOpcode::G_INTRINSIC_CONVERGENT: |
750 | case TargetOpcode::G_INTRINSIC_CONVERGENT_W_SIDE_EFFECTS: |
751 | default: { |
752 | unsigned NumBits = |
753 | TL.computeNumSignBitsForTargetInstr(Analysis&: *this, R, DemandedElts, MRI, Depth); |
754 | if (NumBits > 1) |
755 | FirstAnswer = std::max(a: FirstAnswer, b: NumBits); |
756 | break; |
757 | } |
758 | } |
759 | |
760 | // Finally, if we can prove that the top bits of the result are 0's or 1's, |
761 | // use this information. |
762 | KnownBits Known = getKnownBits(R, DemandedElts, Depth); |
763 | APInt Mask; |
764 | if (Known.isNonNegative()) { // sign bit is 0 |
765 | Mask = Known.Zero; |
766 | } else if (Known.isNegative()) { // sign bit is 1; |
767 | Mask = Known.One; |
768 | } else { |
769 | // Nothing known. |
770 | return FirstAnswer; |
771 | } |
772 | |
773 | // Okay, we know that the sign bit in Mask is set. Use CLO to determine |
774 | // the number of identical bits in the top of the input value. |
775 | Mask <<= Mask.getBitWidth() - TyBits; |
776 | return std::max(a: FirstAnswer, b: Mask.countl_one()); |
777 | } |
778 | |
779 | unsigned GISelKnownBits::computeNumSignBits(Register R, unsigned Depth) { |
780 | LLT Ty = MRI.getType(Reg: R); |
781 | APInt DemandedElts = |
782 | Ty.isVector() ? APInt::getAllOnes(numBits: Ty.getNumElements()) : APInt(1, 1); |
783 | return computeNumSignBits(R, DemandedElts, Depth); |
784 | } |
785 | |
786 | void GISelKnownBitsAnalysis::getAnalysisUsage(AnalysisUsage &AU) const { |
787 | AU.setPreservesAll(); |
788 | MachineFunctionPass::getAnalysisUsage(AU); |
789 | } |
790 | |
791 | bool GISelKnownBitsAnalysis::runOnMachineFunction(MachineFunction &MF) { |
792 | return false; |
793 | } |
794 | |
795 | GISelKnownBits &GISelKnownBitsAnalysis::get(MachineFunction &MF) { |
796 | if (!Info) { |
797 | unsigned MaxDepth = |
798 | MF.getTarget().getOptLevel() == CodeGenOptLevel::None ? 2 : 6; |
799 | Info = std::make_unique<GISelKnownBits>(args&: MF, args&: MaxDepth); |
800 | } |
801 | return *Info.get(); |
802 | } |
803 | |