PageRenderTime 185ms CodeModel.GetById 25ms RepoModel.GetById 1ms app.codeStats 1ms

/3rd_party/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp

https://code.google.com/p/softart/
C++ | 1285 lines | 850 code | 144 blank | 291 comment | 288 complexity | bae04db3ea642572d1f62d633a49082c MD5 | raw file
Possible License(s): LGPL-2.1, BSD-3-Clause, JSON, MPL-2.0-no-copyleft-exception, GPL-2.0, GPL-3.0, LGPL-3.0, BSD-2-Clause
  1. //===- InstCombineSimplifyDemanded.cpp ------------------------------------===//
  2. //
  3. // The LLVM Compiler Infrastructure
  4. //
  5. // This file is distributed under the University of Illinois Open Source
  6. // License. See LICENSE.TXT for details.
  7. //
  8. //===----------------------------------------------------------------------===//
  9. //
  10. // This file contains logic for simplifying instructions based on information
  11. // about how they are used.
  12. //
  13. //===----------------------------------------------------------------------===//
  14. #include "InstCombine.h"
  15. #include "llvm/IR/DataLayout.h"
  16. #include "llvm/IR/IntrinsicInst.h"
  17. #include "llvm/Support/PatternMatch.h"
  18. using namespace llvm;
  19. using namespace llvm::PatternMatch;
  20. /// ShrinkDemandedConstant - Check to see if the specified operand of the
  21. /// specified instruction is a constant integer. If so, check to see if there
  22. /// are any bits set in the constant that are not demanded. If so, shrink the
  23. /// constant and return true.
  24. static bool ShrinkDemandedConstant(Instruction *I, unsigned OpNo,
  25. APInt Demanded) {
  26. assert(I && "No instruction?");
  27. assert(OpNo < I->getNumOperands() && "Operand index too large");
  28. // If the operand is not a constant integer, nothing to do.
  29. ConstantInt *OpC = dyn_cast<ConstantInt>(I->getOperand(OpNo));
  30. if (!OpC) return false;
  31. // If there are no bits set that aren't demanded, nothing to do.
  32. Demanded = Demanded.zextOrTrunc(OpC->getValue().getBitWidth());
  33. if ((~Demanded & OpC->getValue()) == 0)
  34. return false;
  35. // This instruction is producing bits that are not demanded. Shrink the RHS.
  36. Demanded &= OpC->getValue();
  37. I->setOperand(OpNo, ConstantInt::get(OpC->getType(), Demanded));
  38. return true;
  39. }
  40. /// SimplifyDemandedInstructionBits - Inst is an integer instruction that
  41. /// SimplifyDemandedBits knows about. See if the instruction has any
  42. /// properties that allow us to simplify its operands.
  43. bool InstCombiner::SimplifyDemandedInstructionBits(Instruction &Inst) {
  44. unsigned BitWidth = Inst.getType()->getScalarSizeInBits();
  45. APInt KnownZero(BitWidth, 0), KnownOne(BitWidth, 0);
  46. APInt DemandedMask(APInt::getAllOnesValue(BitWidth));
  47. Value *V = SimplifyDemandedUseBits(&Inst, DemandedMask,
  48. KnownZero, KnownOne, 0);
  49. if (V == 0) return false;
  50. if (V == &Inst) return true;
  51. ReplaceInstUsesWith(Inst, V);
  52. return true;
  53. }
  54. /// SimplifyDemandedBits - This form of SimplifyDemandedBits simplifies the
  55. /// specified instruction operand if possible, updating it in place. It returns
  56. /// true if it made any change and false otherwise.
  57. bool InstCombiner::SimplifyDemandedBits(Use &U, APInt DemandedMask,
  58. APInt &KnownZero, APInt &KnownOne,
  59. unsigned Depth) {
  60. Value *NewVal = SimplifyDemandedUseBits(U.get(), DemandedMask,
  61. KnownZero, KnownOne, Depth);
  62. if (NewVal == 0) return false;
  63. U = NewVal;
  64. return true;
  65. }
  66. /// SimplifyDemandedUseBits - This function attempts to replace V with a simpler
  67. /// value based on the demanded bits. When this function is called, it is known
  68. /// that only the bits set in DemandedMask of the result of V are ever used
  69. /// downstream. Consequently, depending on the mask and V, it may be possible
  70. /// to replace V with a constant or one of its operands. In such cases, this
  71. /// function does the replacement and returns true. In all other cases, it
  72. /// returns false after analyzing the expression and setting KnownOne and known
  73. /// to be one in the expression. KnownZero contains all the bits that are known
  74. /// to be zero in the expression. These are provided to potentially allow the
  75. /// caller (which might recursively be SimplifyDemandedBits itself) to simplify
  76. /// the expression. KnownOne and KnownZero always follow the invariant that
  77. /// KnownOne & KnownZero == 0. That is, a bit can't be both 1 and 0. Note that
  78. /// the bits in KnownOne and KnownZero may only be accurate for those bits set
  79. /// in DemandedMask. Note also that the bitwidth of V, DemandedMask, KnownZero
  80. /// and KnownOne must all be the same.
  81. ///
  82. /// This returns null if it did not change anything and it permits no
  83. /// simplification. This returns V itself if it did some simplification of V's
  84. /// operands based on the information about what bits are demanded. This returns
  85. /// some other non-null value if it found out that V is equal to another value
  86. /// in the context where the specified bits are demanded, but not for all users.
  87. Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask,
  88. APInt &KnownZero, APInt &KnownOne,
  89. unsigned Depth) {
  90. assert(V != 0 && "Null pointer of Value???");
  91. assert(Depth <= 6 && "Limit Search Depth");
  92. uint32_t BitWidth = DemandedMask.getBitWidth();
  93. Type *VTy = V->getType();
  94. assert((TD || !VTy->isPointerTy()) &&
  95. "SimplifyDemandedBits needs to know bit widths!");
  96. assert((!TD || TD->getTypeSizeInBits(VTy->getScalarType()) == BitWidth) &&
  97. (!VTy->isIntOrIntVectorTy() ||
  98. VTy->getScalarSizeInBits() == BitWidth) &&
  99. KnownZero.getBitWidth() == BitWidth &&
  100. KnownOne.getBitWidth() == BitWidth &&
  101. "Value *V, DemandedMask, KnownZero and KnownOne "
  102. "must have same BitWidth");
  103. if (ConstantInt *CI = dyn_cast<ConstantInt>(V)) {
  104. // We know all of the bits for a constant!
  105. KnownOne = CI->getValue() & DemandedMask;
  106. KnownZero = ~KnownOne & DemandedMask;
  107. return 0;
  108. }
  109. if (isa<ConstantPointerNull>(V)) {
  110. // We know all of the bits for a constant!
  111. KnownOne.clearAllBits();
  112. KnownZero = DemandedMask;
  113. return 0;
  114. }
  115. KnownZero.clearAllBits();
  116. KnownOne.clearAllBits();
  117. if (DemandedMask == 0) { // Not demanding any bits from V.
  118. if (isa<UndefValue>(V))
  119. return 0;
  120. return UndefValue::get(VTy);
  121. }
  122. if (Depth == 6) // Limit search depth.
  123. return 0;
  124. APInt LHSKnownZero(BitWidth, 0), LHSKnownOne(BitWidth, 0);
  125. APInt RHSKnownZero(BitWidth, 0), RHSKnownOne(BitWidth, 0);
  126. Instruction *I = dyn_cast<Instruction>(V);
  127. if (!I) {
  128. ComputeMaskedBits(V, KnownZero, KnownOne, Depth);
  129. return 0; // Only analyze instructions.
  130. }
  131. // If there are multiple uses of this value and we aren't at the root, then
  132. // we can't do any simplifications of the operands, because DemandedMask
  133. // only reflects the bits demanded by *one* of the users.
  134. if (Depth != 0 && !I->hasOneUse()) {
  135. // Despite the fact that we can't simplify this instruction in all User's
  136. // context, we can at least compute the knownzero/knownone bits, and we can
  137. // do simplifications that apply to *just* the one user if we know that
  138. // this instruction has a simpler value in that context.
  139. if (I->getOpcode() == Instruction::And) {
  140. // If either the LHS or the RHS are Zero, the result is zero.
  141. ComputeMaskedBits(I->getOperand(1), RHSKnownZero, RHSKnownOne, Depth+1);
  142. ComputeMaskedBits(I->getOperand(0), LHSKnownZero, LHSKnownOne, Depth+1);
  143. // If all of the demanded bits are known 1 on one side, return the other.
  144. // These bits cannot contribute to the result of the 'and' in this
  145. // context.
  146. if ((DemandedMask & ~LHSKnownZero & RHSKnownOne) ==
  147. (DemandedMask & ~LHSKnownZero))
  148. return I->getOperand(0);
  149. if ((DemandedMask & ~RHSKnownZero & LHSKnownOne) ==
  150. (DemandedMask & ~RHSKnownZero))
  151. return I->getOperand(1);
  152. // If all of the demanded bits in the inputs are known zeros, return zero.
  153. if ((DemandedMask & (RHSKnownZero|LHSKnownZero)) == DemandedMask)
  154. return Constant::getNullValue(VTy);
  155. } else if (I->getOpcode() == Instruction::Or) {
  156. // We can simplify (X|Y) -> X or Y in the user's context if we know that
  157. // only bits from X or Y are demanded.
  158. // If either the LHS or the RHS are One, the result is One.
  159. ComputeMaskedBits(I->getOperand(1), RHSKnownZero, RHSKnownOne, Depth+1);
  160. ComputeMaskedBits(I->getOperand(0), LHSKnownZero, LHSKnownOne, Depth+1);
  161. // If all of the demanded bits are known zero on one side, return the
  162. // other. These bits cannot contribute to the result of the 'or' in this
  163. // context.
  164. if ((DemandedMask & ~LHSKnownOne & RHSKnownZero) ==
  165. (DemandedMask & ~LHSKnownOne))
  166. return I->getOperand(0);
  167. if ((DemandedMask & ~RHSKnownOne & LHSKnownZero) ==
  168. (DemandedMask & ~RHSKnownOne))
  169. return I->getOperand(1);
  170. // If all of the potentially set bits on one side are known to be set on
  171. // the other side, just use the 'other' side.
  172. if ((DemandedMask & (~RHSKnownZero) & LHSKnownOne) ==
  173. (DemandedMask & (~RHSKnownZero)))
  174. return I->getOperand(0);
  175. if ((DemandedMask & (~LHSKnownZero) & RHSKnownOne) ==
  176. (DemandedMask & (~LHSKnownZero)))
  177. return I->getOperand(1);
  178. } else if (I->getOpcode() == Instruction::Xor) {
  179. // We can simplify (X^Y) -> X or Y in the user's context if we know that
  180. // only bits from X or Y are demanded.
  181. ComputeMaskedBits(I->getOperand(1), RHSKnownZero, RHSKnownOne, Depth+1);
  182. ComputeMaskedBits(I->getOperand(0), LHSKnownZero, LHSKnownOne, Depth+1);
  183. // If all of the demanded bits are known zero on one side, return the
  184. // other.
  185. if ((DemandedMask & RHSKnownZero) == DemandedMask)
  186. return I->getOperand(0);
  187. if ((DemandedMask & LHSKnownZero) == DemandedMask)
  188. return I->getOperand(1);
  189. }
  190. // Compute the KnownZero/KnownOne bits to simplify things downstream.
  191. ComputeMaskedBits(I, KnownZero, KnownOne, Depth);
  192. return 0;
  193. }
  194. // If this is the root being simplified, allow it to have multiple uses,
  195. // just set the DemandedMask to all bits so that we can try to simplify the
  196. // operands. This allows visitTruncInst (for example) to simplify the
  197. // operand of a trunc without duplicating all the logic below.
  198. if (Depth == 0 && !V->hasOneUse())
  199. DemandedMask = APInt::getAllOnesValue(BitWidth);
  200. switch (I->getOpcode()) {
  201. default:
  202. ComputeMaskedBits(I, KnownZero, KnownOne, Depth);
  203. break;
  204. case Instruction::And:
  205. // If either the LHS or the RHS are Zero, the result is zero.
  206. if (SimplifyDemandedBits(I->getOperandUse(1), DemandedMask,
  207. RHSKnownZero, RHSKnownOne, Depth+1) ||
  208. SimplifyDemandedBits(I->getOperandUse(0), DemandedMask & ~RHSKnownZero,
  209. LHSKnownZero, LHSKnownOne, Depth+1))
  210. return I;
  211. assert(!(RHSKnownZero & RHSKnownOne) && "Bits known to be one AND zero?");
  212. assert(!(LHSKnownZero & LHSKnownOne) && "Bits known to be one AND zero?");
  213. // If all of the demanded bits are known 1 on one side, return the other.
  214. // These bits cannot contribute to the result of the 'and'.
  215. if ((DemandedMask & ~LHSKnownZero & RHSKnownOne) ==
  216. (DemandedMask & ~LHSKnownZero))
  217. return I->getOperand(0);
  218. if ((DemandedMask & ~RHSKnownZero & LHSKnownOne) ==
  219. (DemandedMask & ~RHSKnownZero))
  220. return I->getOperand(1);
  221. // If all of the demanded bits in the inputs are known zeros, return zero.
  222. if ((DemandedMask & (RHSKnownZero|LHSKnownZero)) == DemandedMask)
  223. return Constant::getNullValue(VTy);
  224. // If the RHS is a constant, see if we can simplify it.
  225. if (ShrinkDemandedConstant(I, 1, DemandedMask & ~LHSKnownZero))
  226. return I;
  227. // Output known-1 bits are only known if set in both the LHS & RHS.
  228. KnownOne = RHSKnownOne & LHSKnownOne;
  229. // Output known-0 are known to be clear if zero in either the LHS | RHS.
  230. KnownZero = RHSKnownZero | LHSKnownZero;
  231. break;
  232. case Instruction::Or:
  233. // If either the LHS or the RHS are One, the result is One.
  234. if (SimplifyDemandedBits(I->getOperandUse(1), DemandedMask,
  235. RHSKnownZero, RHSKnownOne, Depth+1) ||
  236. SimplifyDemandedBits(I->getOperandUse(0), DemandedMask & ~RHSKnownOne,
  237. LHSKnownZero, LHSKnownOne, Depth+1))
  238. return I;
  239. assert(!(RHSKnownZero & RHSKnownOne) && "Bits known to be one AND zero?");
  240. assert(!(LHSKnownZero & LHSKnownOne) && "Bits known to be one AND zero?");
  241. // If all of the demanded bits are known zero on one side, return the other.
  242. // These bits cannot contribute to the result of the 'or'.
  243. if ((DemandedMask & ~LHSKnownOne & RHSKnownZero) ==
  244. (DemandedMask & ~LHSKnownOne))
  245. return I->getOperand(0);
  246. if ((DemandedMask & ~RHSKnownOne & LHSKnownZero) ==
  247. (DemandedMask & ~RHSKnownOne))
  248. return I->getOperand(1);
  249. // If all of the potentially set bits on one side are known to be set on
  250. // the other side, just use the 'other' side.
  251. if ((DemandedMask & (~RHSKnownZero) & LHSKnownOne) ==
  252. (DemandedMask & (~RHSKnownZero)))
  253. return I->getOperand(0);
  254. if ((DemandedMask & (~LHSKnownZero) & RHSKnownOne) ==
  255. (DemandedMask & (~LHSKnownZero)))
  256. return I->getOperand(1);
  257. // If the RHS is a constant, see if we can simplify it.
  258. if (ShrinkDemandedConstant(I, 1, DemandedMask))
  259. return I;
  260. // Output known-0 bits are only known if clear in both the LHS & RHS.
  261. KnownZero = RHSKnownZero & LHSKnownZero;
  262. // Output known-1 are known to be set if set in either the LHS | RHS.
  263. KnownOne = RHSKnownOne | LHSKnownOne;
  264. break;
  265. case Instruction::Xor: {
  266. if (SimplifyDemandedBits(I->getOperandUse(1), DemandedMask,
  267. RHSKnownZero, RHSKnownOne, Depth+1) ||
  268. SimplifyDemandedBits(I->getOperandUse(0), DemandedMask,
  269. LHSKnownZero, LHSKnownOne, Depth+1))
  270. return I;
  271. assert(!(RHSKnownZero & RHSKnownOne) && "Bits known to be one AND zero?");
  272. assert(!(LHSKnownZero & LHSKnownOne) && "Bits known to be one AND zero?");
  273. // If all of the demanded bits are known zero on one side, return the other.
  274. // These bits cannot contribute to the result of the 'xor'.
  275. if ((DemandedMask & RHSKnownZero) == DemandedMask)
  276. return I->getOperand(0);
  277. if ((DemandedMask & LHSKnownZero) == DemandedMask)
  278. return I->getOperand(1);
  279. // If all of the demanded bits are known to be zero on one side or the
  280. // other, turn this into an *inclusive* or.
  281. // e.g. (A & C1)^(B & C2) -> (A & C1)|(B & C2) iff C1&C2 == 0
  282. if ((DemandedMask & ~RHSKnownZero & ~LHSKnownZero) == 0) {
  283. Instruction *Or =
  284. BinaryOperator::CreateOr(I->getOperand(0), I->getOperand(1),
  285. I->getName());
  286. return InsertNewInstWith(Or, *I);
  287. }
  288. // If all of the demanded bits on one side are known, and all of the set
  289. // bits on that side are also known to be set on the other side, turn this
  290. // into an AND, as we know the bits will be cleared.
  291. // e.g. (X | C1) ^ C2 --> (X | C1) & ~C2 iff (C1&C2) == C2
  292. if ((DemandedMask & (RHSKnownZero|RHSKnownOne)) == DemandedMask) {
  293. // all known
  294. if ((RHSKnownOne & LHSKnownOne) == RHSKnownOne) {
  295. Constant *AndC = Constant::getIntegerValue(VTy,
  296. ~RHSKnownOne & DemandedMask);
  297. Instruction *And = BinaryOperator::CreateAnd(I->getOperand(0), AndC);
  298. return InsertNewInstWith(And, *I);
  299. }
  300. }
  301. // If the RHS is a constant, see if we can simplify it.
  302. // FIXME: for XOR, we prefer to force bits to 1 if they will make a -1.
  303. if (ShrinkDemandedConstant(I, 1, DemandedMask))
  304. return I;
  305. // If our LHS is an 'and' and if it has one use, and if any of the bits we
  306. // are flipping are known to be set, then the xor is just resetting those
  307. // bits to zero. We can just knock out bits from the 'and' and the 'xor',
  308. // simplifying both of them.
  309. if (Instruction *LHSInst = dyn_cast<Instruction>(I->getOperand(0)))
  310. if (LHSInst->getOpcode() == Instruction::And && LHSInst->hasOneUse() &&
  311. isa<ConstantInt>(I->getOperand(1)) &&
  312. isa<ConstantInt>(LHSInst->getOperand(1)) &&
  313. (LHSKnownOne & RHSKnownOne & DemandedMask) != 0) {
  314. ConstantInt *AndRHS = cast<ConstantInt>(LHSInst->getOperand(1));
  315. ConstantInt *XorRHS = cast<ConstantInt>(I->getOperand(1));
  316. APInt NewMask = ~(LHSKnownOne & RHSKnownOne & DemandedMask);
  317. Constant *AndC =
  318. ConstantInt::get(I->getType(), NewMask & AndRHS->getValue());
  319. Instruction *NewAnd = BinaryOperator::CreateAnd(I->getOperand(0), AndC);
  320. InsertNewInstWith(NewAnd, *I);
  321. Constant *XorC =
  322. ConstantInt::get(I->getType(), NewMask & XorRHS->getValue());
  323. Instruction *NewXor = BinaryOperator::CreateXor(NewAnd, XorC);
  324. return InsertNewInstWith(NewXor, *I);
  325. }
  326. // Output known-0 bits are known if clear or set in both the LHS & RHS.
  327. KnownZero= (RHSKnownZero & LHSKnownZero) | (RHSKnownOne & LHSKnownOne);
  328. // Output known-1 are known to be set if set in only one of the LHS, RHS.
  329. KnownOne = (RHSKnownZero & LHSKnownOne) | (RHSKnownOne & LHSKnownZero);
  330. break;
  331. }
  332. case Instruction::Select:
  333. if (SimplifyDemandedBits(I->getOperandUse(2), DemandedMask,
  334. RHSKnownZero, RHSKnownOne, Depth+1) ||
  335. SimplifyDemandedBits(I->getOperandUse(1), DemandedMask,
  336. LHSKnownZero, LHSKnownOne, Depth+1))
  337. return I;
  338. assert(!(RHSKnownZero & RHSKnownOne) && "Bits known to be one AND zero?");
  339. assert(!(LHSKnownZero & LHSKnownOne) && "Bits known to be one AND zero?");
  340. // If the operands are constants, see if we can simplify them.
  341. if (ShrinkDemandedConstant(I, 1, DemandedMask) ||
  342. ShrinkDemandedConstant(I, 2, DemandedMask))
  343. return I;
  344. // Only known if known in both the LHS and RHS.
  345. KnownOne = RHSKnownOne & LHSKnownOne;
  346. KnownZero = RHSKnownZero & LHSKnownZero;
  347. break;
  348. case Instruction::Trunc: {
  349. unsigned truncBf = I->getOperand(0)->getType()->getScalarSizeInBits();
  350. DemandedMask = DemandedMask.zext(truncBf);
  351. KnownZero = KnownZero.zext(truncBf);
  352. KnownOne = KnownOne.zext(truncBf);
  353. if (SimplifyDemandedBits(I->getOperandUse(0), DemandedMask,
  354. KnownZero, KnownOne, Depth+1))
  355. return I;
  356. DemandedMask = DemandedMask.trunc(BitWidth);
  357. KnownZero = KnownZero.trunc(BitWidth);
  358. KnownOne = KnownOne.trunc(BitWidth);
  359. assert(!(KnownZero & KnownOne) && "Bits known to be one AND zero?");
  360. break;
  361. }
  362. case Instruction::BitCast:
  363. if (!I->getOperand(0)->getType()->isIntOrIntVectorTy())
  364. return 0; // vector->int or fp->int?
  365. if (VectorType *DstVTy = dyn_cast<VectorType>(I->getType())) {
  366. if (VectorType *SrcVTy =
  367. dyn_cast<VectorType>(I->getOperand(0)->getType())) {
  368. if (DstVTy->getNumElements() != SrcVTy->getNumElements())
  369. // Don't touch a bitcast between vectors of different element counts.
  370. return 0;
  371. } else
  372. // Don't touch a scalar-to-vector bitcast.
  373. return 0;
  374. } else if (I->getOperand(0)->getType()->isVectorTy())
  375. // Don't touch a vector-to-scalar bitcast.
  376. return 0;
  377. if (SimplifyDemandedBits(I->getOperandUse(0), DemandedMask,
  378. KnownZero, KnownOne, Depth+1))
  379. return I;
  380. assert(!(KnownZero & KnownOne) && "Bits known to be one AND zero?");
  381. break;
  382. case Instruction::ZExt: {
  383. // Compute the bits in the result that are not present in the input.
  384. unsigned SrcBitWidth =I->getOperand(0)->getType()->getScalarSizeInBits();
  385. DemandedMask = DemandedMask.trunc(SrcBitWidth);
  386. KnownZero = KnownZero.trunc(SrcBitWidth);
  387. KnownOne = KnownOne.trunc(SrcBitWidth);
  388. if (SimplifyDemandedBits(I->getOperandUse(0), DemandedMask,
  389. KnownZero, KnownOne, Depth+1))
  390. return I;
  391. DemandedMask = DemandedMask.zext(BitWidth);
  392. KnownZero = KnownZero.zext(BitWidth);
  393. KnownOne = KnownOne.zext(BitWidth);
  394. assert(!(KnownZero & KnownOne) && "Bits known to be one AND zero?");
  395. // The top bits are known to be zero.
  396. KnownZero |= APInt::getHighBitsSet(BitWidth, BitWidth - SrcBitWidth);
  397. break;
  398. }
  399. case Instruction::SExt: {
  400. // Compute the bits in the result that are not present in the input.
  401. unsigned SrcBitWidth =I->getOperand(0)->getType()->getScalarSizeInBits();
  402. APInt InputDemandedBits = DemandedMask &
  403. APInt::getLowBitsSet(BitWidth, SrcBitWidth);
  404. APInt NewBits(APInt::getHighBitsSet(BitWidth, BitWidth - SrcBitWidth));
  405. // If any of the sign extended bits are demanded, we know that the sign
  406. // bit is demanded.
  407. if ((NewBits & DemandedMask) != 0)
  408. InputDemandedBits.setBit(SrcBitWidth-1);
  409. InputDemandedBits = InputDemandedBits.trunc(SrcBitWidth);
  410. KnownZero = KnownZero.trunc(SrcBitWidth);
  411. KnownOne = KnownOne.trunc(SrcBitWidth);
  412. if (SimplifyDemandedBits(I->getOperandUse(0), InputDemandedBits,
  413. KnownZero, KnownOne, Depth+1))
  414. return I;
  415. InputDemandedBits = InputDemandedBits.zext(BitWidth);
  416. KnownZero = KnownZero.zext(BitWidth);
  417. KnownOne = KnownOne.zext(BitWidth);
  418. assert(!(KnownZero & KnownOne) && "Bits known to be one AND zero?");
  419. // If the sign bit of the input is known set or clear, then we know the
  420. // top bits of the result.
  421. // If the input sign bit is known zero, or if the NewBits are not demanded
  422. // convert this into a zero extension.
  423. if (KnownZero[SrcBitWidth-1] || (NewBits & ~DemandedMask) == NewBits) {
  424. // Convert to ZExt cast
  425. CastInst *NewCast = new ZExtInst(I->getOperand(0), VTy, I->getName());
  426. return InsertNewInstWith(NewCast, *I);
  427. } else if (KnownOne[SrcBitWidth-1]) { // Input sign bit known set
  428. KnownOne |= NewBits;
  429. }
  430. break;
  431. }
  432. case Instruction::Add: {
  433. // Figure out what the input bits are. If the top bits of the and result
  434. // are not demanded, then the add doesn't demand them from its input
  435. // either.
  436. unsigned NLZ = DemandedMask.countLeadingZeros();
  437. // If there is a constant on the RHS, there are a variety of xformations
  438. // we can do.
  439. if (ConstantInt *RHS = dyn_cast<ConstantInt>(I->getOperand(1))) {
  440. // If null, this should be simplified elsewhere. Some of the xforms here
  441. // won't work if the RHS is zero.
  442. if (RHS->isZero())
  443. break;
  444. // If the top bit of the output is demanded, demand everything from the
  445. // input. Otherwise, we demand all the input bits except NLZ top bits.
  446. APInt InDemandedBits(APInt::getLowBitsSet(BitWidth, BitWidth - NLZ));
  447. // Find information about known zero/one bits in the input.
  448. if (SimplifyDemandedBits(I->getOperandUse(0), InDemandedBits,
  449. LHSKnownZero, LHSKnownOne, Depth+1))
  450. return I;
  451. // If the RHS of the add has bits set that can't affect the input, reduce
  452. // the constant.
  453. if (ShrinkDemandedConstant(I, 1, InDemandedBits))
  454. return I;
  455. // Avoid excess work.
  456. if (LHSKnownZero == 0 && LHSKnownOne == 0)
  457. break;
  458. // Turn it into OR if input bits are zero.
  459. if ((LHSKnownZero & RHS->getValue()) == RHS->getValue()) {
  460. Instruction *Or =
  461. BinaryOperator::CreateOr(I->getOperand(0), I->getOperand(1),
  462. I->getName());
  463. return InsertNewInstWith(Or, *I);
  464. }
  465. // We can say something about the output known-zero and known-one bits,
  466. // depending on potential carries from the input constant and the
  467. // unknowns. For example if the LHS is known to have at most the 0x0F0F0
  468. // bits set and the RHS constant is 0x01001, then we know we have a known
  469. // one mask of 0x00001 and a known zero mask of 0xE0F0E.
  470. // To compute this, we first compute the potential carry bits. These are
  471. // the bits which may be modified. I'm not aware of a better way to do
  472. // this scan.
  473. const APInt &RHSVal = RHS->getValue();
  474. APInt CarryBits((~LHSKnownZero + RHSVal) ^ (~LHSKnownZero ^ RHSVal));
  475. // Now that we know which bits have carries, compute the known-1/0 sets.
  476. // Bits are known one if they are known zero in one operand and one in the
  477. // other, and there is no input carry.
  478. KnownOne = ((LHSKnownZero & RHSVal) |
  479. (LHSKnownOne & ~RHSVal)) & ~CarryBits;
  480. // Bits are known zero if they are known zero in both operands and there
  481. // is no input carry.
  482. KnownZero = LHSKnownZero & ~RHSVal & ~CarryBits;
  483. } else {
  484. // If the high-bits of this ADD are not demanded, then it does not demand
  485. // the high bits of its LHS or RHS.
  486. if (DemandedMask[BitWidth-1] == 0) {
  487. // Right fill the mask of bits for this ADD to demand the most
  488. // significant bit and all those below it.
  489. APInt DemandedFromOps(APInt::getLowBitsSet(BitWidth, BitWidth-NLZ));
  490. if (SimplifyDemandedBits(I->getOperandUse(0), DemandedFromOps,
  491. LHSKnownZero, LHSKnownOne, Depth+1) ||
  492. SimplifyDemandedBits(I->getOperandUse(1), DemandedFromOps,
  493. LHSKnownZero, LHSKnownOne, Depth+1))
  494. return I;
  495. }
  496. }
  497. break;
  498. }
  499. case Instruction::Sub:
  500. // If the high-bits of this SUB are not demanded, then it does not demand
  501. // the high bits of its LHS or RHS.
  502. if (DemandedMask[BitWidth-1] == 0) {
  503. // Right fill the mask of bits for this SUB to demand the most
  504. // significant bit and all those below it.
  505. uint32_t NLZ = DemandedMask.countLeadingZeros();
  506. APInt DemandedFromOps(APInt::getLowBitsSet(BitWidth, BitWidth-NLZ));
  507. if (SimplifyDemandedBits(I->getOperandUse(0), DemandedFromOps,
  508. LHSKnownZero, LHSKnownOne, Depth+1) ||
  509. SimplifyDemandedBits(I->getOperandUse(1), DemandedFromOps,
  510. LHSKnownZero, LHSKnownOne, Depth+1))
  511. return I;
  512. }
  513. // Otherwise just hand the sub off to ComputeMaskedBits to fill in
  514. // the known zeros and ones.
  515. ComputeMaskedBits(V, KnownZero, KnownOne, Depth);
  516. // Turn this into a xor if LHS is 2^n-1 and the remaining bits are known
  517. // zero.
  518. if (ConstantInt *C0 = dyn_cast<ConstantInt>(I->getOperand(0))) {
  519. APInt I0 = C0->getValue();
  520. if ((I0 + 1).isPowerOf2() && (I0 | KnownZero).isAllOnesValue()) {
  521. Instruction *Xor = BinaryOperator::CreateXor(I->getOperand(1), C0);
  522. return InsertNewInstWith(Xor, *I);
  523. }
  524. }
  525. break;
  526. case Instruction::Shl:
  527. if (ConstantInt *SA = dyn_cast<ConstantInt>(I->getOperand(1))) {
  528. {
  529. Value *VarX; ConstantInt *C1;
  530. if (match(I->getOperand(0), m_Shr(m_Value(VarX), m_ConstantInt(C1)))) {
  531. Instruction *Shr = cast<Instruction>(I->getOperand(0));
  532. Value *R = SimplifyShrShlDemandedBits(Shr, I, DemandedMask,
  533. KnownZero, KnownOne);
  534. if (R)
  535. return R;
  536. }
  537. }
  538. uint64_t ShiftAmt = SA->getLimitedValue(BitWidth-1);
  539. APInt DemandedMaskIn(DemandedMask.lshr(ShiftAmt));
  540. // If the shift is NUW/NSW, then it does demand the high bits.
  541. ShlOperator *IOp = cast<ShlOperator>(I);
  542. if (IOp->hasNoSignedWrap())
  543. DemandedMaskIn |= APInt::getHighBitsSet(BitWidth, ShiftAmt+1);
  544. else if (IOp->hasNoUnsignedWrap())
  545. DemandedMaskIn |= APInt::getHighBitsSet(BitWidth, ShiftAmt);
  546. if (SimplifyDemandedBits(I->getOperandUse(0), DemandedMaskIn,
  547. KnownZero, KnownOne, Depth+1))
  548. return I;
  549. assert(!(KnownZero & KnownOne) && "Bits known to be one AND zero?");
  550. KnownZero <<= ShiftAmt;
  551. KnownOne <<= ShiftAmt;
  552. // low bits known zero.
  553. if (ShiftAmt)
  554. KnownZero |= APInt::getLowBitsSet(BitWidth, ShiftAmt);
  555. }
  556. break;
  557. case Instruction::LShr:
  558. // For a logical shift right
  559. if (ConstantInt *SA = dyn_cast<ConstantInt>(I->getOperand(1))) {
  560. uint64_t ShiftAmt = SA->getLimitedValue(BitWidth-1);
  561. // Unsigned shift right.
  562. APInt DemandedMaskIn(DemandedMask.shl(ShiftAmt));
  563. // If the shift is exact, then it does demand the low bits (and knows that
  564. // they are zero).
  565. if (cast<LShrOperator>(I)->isExact())
  566. DemandedMaskIn |= APInt::getLowBitsSet(BitWidth, ShiftAmt);
  567. if (SimplifyDemandedBits(I->getOperandUse(0), DemandedMaskIn,
  568. KnownZero, KnownOne, Depth+1))
  569. return I;
  570. assert(!(KnownZero & KnownOne) && "Bits known to be one AND zero?");
  571. KnownZero = APIntOps::lshr(KnownZero, ShiftAmt);
  572. KnownOne = APIntOps::lshr(KnownOne, ShiftAmt);
  573. if (ShiftAmt) {
  574. // Compute the new bits that are at the top now.
  575. APInt HighBits(APInt::getHighBitsSet(BitWidth, ShiftAmt));
  576. KnownZero |= HighBits; // high bits known zero.
  577. }
  578. }
  579. break;
  580. case Instruction::AShr:
  581. // If this is an arithmetic shift right and only the low-bit is set, we can
  582. // always convert this into a logical shr, even if the shift amount is
  583. // variable. The low bit of the shift cannot be an input sign bit unless
  584. // the shift amount is >= the size of the datatype, which is undefined.
  585. if (DemandedMask == 1) {
  586. // Perform the logical shift right.
  587. Instruction *NewVal = BinaryOperator::CreateLShr(
  588. I->getOperand(0), I->getOperand(1), I->getName());
  589. return InsertNewInstWith(NewVal, *I);
  590. }
  591. // If the sign bit is the only bit demanded by this ashr, then there is no
  592. // need to do it, the shift doesn't change the high bit.
  593. if (DemandedMask.isSignBit())
  594. return I->getOperand(0);
  595. if (ConstantInt *SA = dyn_cast<ConstantInt>(I->getOperand(1))) {
  596. uint32_t ShiftAmt = SA->getLimitedValue(BitWidth-1);
  597. // Signed shift right.
  598. APInt DemandedMaskIn(DemandedMask.shl(ShiftAmt));
  599. // If any of the "high bits" are demanded, we should set the sign bit as
  600. // demanded.
  601. if (DemandedMask.countLeadingZeros() <= ShiftAmt)
  602. DemandedMaskIn.setBit(BitWidth-1);
  603. // If the shift is exact, then it does demand the low bits (and knows that
  604. // they are zero).
  605. if (cast<AShrOperator>(I)->isExact())
  606. DemandedMaskIn |= APInt::getLowBitsSet(BitWidth, ShiftAmt);
  607. if (SimplifyDemandedBits(I->getOperandUse(0), DemandedMaskIn,
  608. KnownZero, KnownOne, Depth+1))
  609. return I;
  610. assert(!(KnownZero & KnownOne) && "Bits known to be one AND zero?");
  611. // Compute the new bits that are at the top now.
  612. APInt HighBits(APInt::getHighBitsSet(BitWidth, ShiftAmt));
  613. KnownZero = APIntOps::lshr(KnownZero, ShiftAmt);
  614. KnownOne = APIntOps::lshr(KnownOne, ShiftAmt);
  615. // Handle the sign bits.
  616. APInt SignBit(APInt::getSignBit(BitWidth));
  617. // Adjust to where it is now in the mask.
  618. SignBit = APIntOps::lshr(SignBit, ShiftAmt);
  619. // If the input sign bit is known to be zero, or if none of the top bits
  620. // are demanded, turn this into an unsigned shift right.
  621. if (BitWidth <= ShiftAmt || KnownZero[BitWidth-ShiftAmt-1] ||
  622. (HighBits & ~DemandedMask) == HighBits) {
  623. // Perform the logical shift right.
  624. BinaryOperator *NewVal = BinaryOperator::CreateLShr(I->getOperand(0),
  625. SA, I->getName());
  626. NewVal->setIsExact(cast<BinaryOperator>(I)->isExact());
  627. return InsertNewInstWith(NewVal, *I);
  628. } else if ((KnownOne & SignBit) != 0) { // New bits are known one.
  629. KnownOne |= HighBits;
  630. }
  631. }
  632. break;
  633. case Instruction::SRem:
  634. if (ConstantInt *Rem = dyn_cast<ConstantInt>(I->getOperand(1))) {
  635. // X % -1 demands all the bits because we don't want to introduce
  636. // INT_MIN % -1 (== undef) by accident.
  637. if (Rem->isAllOnesValue())
  638. break;
  639. APInt RA = Rem->getValue().abs();
  640. if (RA.isPowerOf2()) {
  641. if (DemandedMask.ult(RA)) // srem won't affect demanded bits
  642. return I->getOperand(0);
  643. APInt LowBits = RA - 1;
  644. APInt Mask2 = LowBits | APInt::getSignBit(BitWidth);
  645. if (SimplifyDemandedBits(I->getOperandUse(0), Mask2,
  646. LHSKnownZero, LHSKnownOne, Depth+1))
  647. return I;
  648. // The low bits of LHS are unchanged by the srem.
  649. KnownZero = LHSKnownZero & LowBits;
  650. KnownOne = LHSKnownOne & LowBits;
  651. // If LHS is non-negative or has all low bits zero, then the upper bits
  652. // are all zero.
  653. if (LHSKnownZero[BitWidth-1] || ((LHSKnownZero & LowBits) == LowBits))
  654. KnownZero |= ~LowBits;
  655. // If LHS is negative and not all low bits are zero, then the upper bits
  656. // are all one.
  657. if (LHSKnownOne[BitWidth-1] && ((LHSKnownOne & LowBits) != 0))
  658. KnownOne |= ~LowBits;
  659. assert(!(KnownZero & KnownOne) && "Bits known to be one AND zero?");
  660. }
  661. }
  662. // The sign bit is the LHS's sign bit, except when the result of the
  663. // remainder is zero.
  664. if (DemandedMask.isNegative() && KnownZero.isNonNegative()) {
  665. APInt LHSKnownZero(BitWidth, 0), LHSKnownOne(BitWidth, 0);
  666. ComputeMaskedBits(I->getOperand(0), LHSKnownZero, LHSKnownOne, Depth+1);
  667. // If it's known zero, our sign bit is also zero.
  668. if (LHSKnownZero.isNegative())
  669. KnownZero.setBit(KnownZero.getBitWidth() - 1);
  670. }
  671. break;
  672. case Instruction::URem: {
  673. APInt KnownZero2(BitWidth, 0), KnownOne2(BitWidth, 0);
  674. APInt AllOnes = APInt::getAllOnesValue(BitWidth);
  675. if (SimplifyDemandedBits(I->getOperandUse(0), AllOnes,
  676. KnownZero2, KnownOne2, Depth+1) ||
  677. SimplifyDemandedBits(I->getOperandUse(1), AllOnes,
  678. KnownZero2, KnownOne2, Depth+1))
  679. return I;
  680. unsigned Leaders = KnownZero2.countLeadingOnes();
  681. Leaders = std::max(Leaders,
  682. KnownZero2.countLeadingOnes());
  683. KnownZero = APInt::getHighBitsSet(BitWidth, Leaders) & DemandedMask;
  684. break;
  685. }
  686. case Instruction::Call:
  687. if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) {
  688. switch (II->getIntrinsicID()) {
  689. default: break;
  690. case Intrinsic::bswap: {
  691. // If the only bits demanded come from one byte of the bswap result,
  692. // just shift the input byte into position to eliminate the bswap.
  693. unsigned NLZ = DemandedMask.countLeadingZeros();
  694. unsigned NTZ = DemandedMask.countTrailingZeros();
  695. // Round NTZ down to the next byte. If we have 11 trailing zeros, then
  696. // we need all the bits down to bit 8. Likewise, round NLZ. If we
  697. // have 14 leading zeros, round to 8.
  698. NLZ &= ~7;
  699. NTZ &= ~7;
  700. // If we need exactly one byte, we can do this transformation.
  701. if (BitWidth-NLZ-NTZ == 8) {
  702. unsigned ResultBit = NTZ;
  703. unsigned InputBit = BitWidth-NTZ-8;
  704. // Replace this with either a left or right shift to get the byte into
  705. // the right place.
  706. Instruction *NewVal;
  707. if (InputBit > ResultBit)
  708. NewVal = BinaryOperator::CreateLShr(II->getArgOperand(0),
  709. ConstantInt::get(I->getType(), InputBit-ResultBit));
  710. else
  711. NewVal = BinaryOperator::CreateShl(II->getArgOperand(0),
  712. ConstantInt::get(I->getType(), ResultBit-InputBit));
  713. NewVal->takeName(I);
  714. return InsertNewInstWith(NewVal, *I);
  715. }
  716. // TODO: Could compute known zero/one bits based on the input.
  717. break;
  718. }
  719. case Intrinsic::x86_sse42_crc32_64_64:
  720. KnownZero = APInt::getHighBitsSet(64, 32);
  721. return 0;
  722. }
  723. }
  724. ComputeMaskedBits(V, KnownZero, KnownOne, Depth);
  725. break;
  726. }
  727. // If the client is only demanding bits that we know, return the known
  728. // constant.
  729. if ((DemandedMask & (KnownZero|KnownOne)) == DemandedMask)
  730. return Constant::getIntegerValue(VTy, KnownOne);
  731. return 0;
  732. }
  733. /// Helper routine of SimplifyDemandedUseBits. It tries to simplify
  734. /// "E1 = (X lsr C1) << C2", where the C1 and C2 are constant, into
  735. /// "E2 = X << (C2 - C1)" or "E2 = X >> (C1 - C2)", depending on the sign
  736. /// of "C2-C1".
  737. ///
  738. /// Suppose E1 and E2 are generally different in bits S={bm, bm+1,
  739. /// ..., bn}, without considering the specific value X is holding.
  740. /// This transformation is legal iff one of following conditions is hold:
  741. /// 1) All the bit in S are 0, in this case E1 == E2.
  742. /// 2) We don't care those bits in S, per the input DemandedMask.
  743. /// 3) Combination of 1) and 2). Some bits in S are 0, and we don't care the
  744. /// rest bits.
  745. ///
  746. /// Currently we only test condition 2).
  747. ///
  748. /// As with SimplifyDemandedUseBits, it returns NULL if the simplification was
  749. /// not successful.
  750. Value *InstCombiner::SimplifyShrShlDemandedBits(Instruction *Shr,
  751. Instruction *Shl, APInt DemandedMask, APInt &KnownZero, APInt &KnownOne) {
  752. const APInt &ShlOp1 = cast<ConstantInt>(Shl->getOperand(1))->getValue();
  753. const APInt &ShrOp1 = cast<ConstantInt>(Shr->getOperand(1))->getValue();
  754. if (!ShlOp1 || !ShrOp1)
  755. return 0; // Noop.
  756. Value *VarX = Shr->getOperand(0);
  757. Type *Ty = VarX->getType();
  758. unsigned BitWidth = Ty->getIntegerBitWidth();
  759. if (ShlOp1.uge(BitWidth) || ShrOp1.uge(BitWidth))
  760. return 0; // Undef.
  761. unsigned ShlAmt = ShlOp1.getZExtValue();
  762. unsigned ShrAmt = ShrOp1.getZExtValue();
  763. KnownOne.clearAllBits();
  764. KnownZero = APInt::getBitsSet(KnownZero.getBitWidth(), 0, ShlAmt-1);
  765. KnownZero &= DemandedMask;
  766. APInt BitMask1(APInt::getAllOnesValue(BitWidth));
  767. APInt BitMask2(APInt::getAllOnesValue(BitWidth));
  768. bool isLshr = (Shr->getOpcode() == Instruction::LShr);
  769. BitMask1 = isLshr ? (BitMask1.lshr(ShrAmt) << ShlAmt) :
  770. (BitMask1.ashr(ShrAmt) << ShlAmt);
  771. if (ShrAmt <= ShlAmt) {
  772. BitMask2 <<= (ShlAmt - ShrAmt);
  773. } else {
  774. BitMask2 = isLshr ? BitMask2.lshr(ShrAmt - ShlAmt):
  775. BitMask2.ashr(ShrAmt - ShlAmt);
  776. }
  777. // Check if condition-2 (see the comment to this function) is satified.
  778. if ((BitMask1 & DemandedMask) == (BitMask2 & DemandedMask)) {
  779. if (ShrAmt == ShlAmt)
  780. return VarX;
  781. if (!Shr->hasOneUse())
  782. return 0;
  783. BinaryOperator *New;
  784. if (ShrAmt < ShlAmt) {
  785. Constant *Amt = ConstantInt::get(VarX->getType(), ShlAmt - ShrAmt);
  786. New = BinaryOperator::CreateShl(VarX, Amt);
  787. BinaryOperator *Orig = cast<BinaryOperator>(Shl);
  788. New->setHasNoSignedWrap(Orig->hasNoSignedWrap());
  789. New->setHasNoUnsignedWrap(Orig->hasNoUnsignedWrap());
  790. } else {
  791. Constant *Amt = ConstantInt::get(VarX->getType(), ShrAmt - ShlAmt);
  792. New = isLshr ? BinaryOperator::CreateLShr(VarX, Amt) :
  793. BinaryOperator::CreateAShr(VarX, Amt);
  794. if (cast<BinaryOperator>(Shr)->isExact())
  795. New->setIsExact(true);
  796. }
  797. return InsertNewInstWith(New, *Shl);
  798. }
  799. return 0;
  800. }
  801. /// SimplifyDemandedVectorElts - The specified value produces a vector with
  802. /// any number of elements. DemandedElts contains the set of elements that are
  803. /// actually used by the caller. This method analyzes which elements of the
  804. /// operand are undef and returns that information in UndefElts.
  805. ///
  806. /// If the information about demanded elements can be used to simplify the
  807. /// operation, the operation is simplified, then the resultant value is
  808. /// returned. This returns null if no change was made.
  809. Value *InstCombiner::SimplifyDemandedVectorElts(Value *V, APInt DemandedElts,
  810. APInt &UndefElts,
  811. unsigned Depth) {
  812. unsigned VWidth = cast<VectorType>(V->getType())->getNumElements();
  813. APInt EltMask(APInt::getAllOnesValue(VWidth));
  814. assert((DemandedElts & ~EltMask) == 0 && "Invalid DemandedElts!");
  815. if (isa<UndefValue>(V)) {
  816. // If the entire vector is undefined, just return this info.
  817. UndefElts = EltMask;
  818. return 0;
  819. }
  820. if (DemandedElts == 0) { // If nothing is demanded, provide undef.
  821. UndefElts = EltMask;
  822. return UndefValue::get(V->getType());
  823. }
  824. UndefElts = 0;
  825. // Handle ConstantAggregateZero, ConstantVector, ConstantDataSequential.
  826. if (Constant *C = dyn_cast<Constant>(V)) {
  827. // Check if this is identity. If so, return 0 since we are not simplifying
  828. // anything.
  829. if (DemandedElts.isAllOnesValue())
  830. return 0;
  831. Type *EltTy = cast<VectorType>(V->getType())->getElementType();
  832. Constant *Undef = UndefValue::get(EltTy);
  833. SmallVector<Constant*, 16> Elts;
  834. for (unsigned i = 0; i != VWidth; ++i) {
  835. if (!DemandedElts[i]) { // If not demanded, set to undef.
  836. Elts.push_back(Undef);
  837. UndefElts.setBit(i);
  838. continue;
  839. }
  840. Constant *Elt = C->getAggregateElement(i);
  841. if (Elt == 0) return 0;
  842. if (isa<UndefValue>(Elt)) { // Already undef.
  843. Elts.push_back(Undef);
  844. UndefElts.setBit(i);
  845. } else { // Otherwise, defined.
  846. Elts.push_back(Elt);
  847. }
  848. }
  849. // If we changed the constant, return it.
  850. Constant *NewCV = ConstantVector::get(Elts);
  851. return NewCV != C ? NewCV : 0;
  852. }
  853. // Limit search depth.
  854. if (Depth == 10)
  855. return 0;
  856. // If multiple users are using the root value, proceed with
  857. // simplification conservatively assuming that all elements
  858. // are needed.
  859. if (!V->hasOneUse()) {
  860. // Quit if we find multiple users of a non-root value though.
  861. // They'll be handled when it's their turn to be visited by
  862. // the main instcombine process.
  863. if (Depth != 0)
  864. // TODO: Just compute the UndefElts information recursively.
  865. return 0;
  866. // Conservatively assume that all elements are needed.
  867. DemandedElts = EltMask;
  868. }
  869. Instruction *I = dyn_cast<Instruction>(V);
  870. if (!I) return 0; // Only analyze instructions.
  871. bool MadeChange = false;
  872. APInt UndefElts2(VWidth, 0);
  873. Value *TmpV;
  874. switch (I->getOpcode()) {
  875. default: break;
  876. case Instruction::InsertElement: {
  877. // If this is a variable index, we don't know which element it overwrites.
  878. // demand exactly the same input as we produce.
  879. ConstantInt *Idx = dyn_cast<ConstantInt>(I->getOperand(2));
  880. if (Idx == 0) {
  881. // Note that we can't propagate undef elt info, because we don't know
  882. // which elt is getting updated.
  883. TmpV = SimplifyDemandedVectorElts(I->getOperand(0), DemandedElts,
  884. UndefElts2, Depth+1);
  885. if (TmpV) { I->setOperand(0, TmpV); MadeChange = true; }
  886. break;
  887. }
  888. // If this is inserting an element that isn't demanded, remove this
  889. // insertelement.
  890. unsigned IdxNo = Idx->getZExtValue();
  891. if (IdxNo >= VWidth || !DemandedElts[IdxNo]) {
  892. Worklist.Add(I);
  893. return I->getOperand(0);
  894. }
  895. // Otherwise, the element inserted overwrites whatever was there, so the
  896. // input demanded set is simpler than the output set.
  897. APInt DemandedElts2 = DemandedElts;
  898. DemandedElts2.clearBit(IdxNo);
  899. TmpV = SimplifyDemandedVectorElts(I->getOperand(0), DemandedElts2,
  900. UndefElts, Depth+1);
  901. if (TmpV) { I->setOperand(0, TmpV); MadeChange = true; }
  902. // The inserted element is defined.
  903. UndefElts.clearBit(IdxNo);
  904. break;
  905. }
  906. case Instruction::ShuffleVector: {
  907. ShuffleVectorInst *Shuffle = cast<ShuffleVectorInst>(I);
  908. uint64_t LHSVWidth =
  909. cast<VectorType>(Shuffle->getOperand(0)->getType())->getNumElements();
  910. APInt LeftDemanded(LHSVWidth, 0), RightDemanded(LHSVWidth, 0);
  911. for (unsigned i = 0; i < VWidth; i++) {
  912. if (DemandedElts[i]) {
  913. unsigned MaskVal = Shuffle->getMaskValue(i);
  914. if (MaskVal != -1u) {
  915. assert(MaskVal < LHSVWidth * 2 &&
  916. "shufflevector mask index out of range!");
  917. if (MaskVal < LHSVWidth)
  918. LeftDemanded.setBit(MaskVal);
  919. else
  920. RightDemanded.setBit(MaskVal - LHSVWidth);
  921. }
  922. }
  923. }
  924. APInt UndefElts4(LHSVWidth, 0);
  925. TmpV = SimplifyDemandedVectorElts(I->getOperand(0), LeftDemanded,
  926. UndefElts4, Depth+1);
  927. if (TmpV) { I->setOperand(0, TmpV); MadeChange = true; }
  928. APInt UndefElts3(LHSVWidth, 0);
  929. TmpV = SimplifyDemandedVectorElts(I->getOperand(1), RightDemanded,
  930. UndefElts3, Depth+1);
  931. if (TmpV) { I->setOperand(1, TmpV); MadeChange = true; }
  932. bool NewUndefElts = false;
  933. for (unsigned i = 0; i < VWidth; i++) {
  934. unsigned MaskVal = Shuffle->getMaskValue(i);
  935. if (MaskVal == -1u) {
  936. UndefElts.setBit(i);
  937. } else if (!DemandedElts[i]) {
  938. NewUndefElts = true;
  939. UndefElts.setBit(i);
  940. } else if (MaskVal < LHSVWidth) {
  941. if (UndefElts4[MaskVal]) {
  942. NewUndefElts = true;
  943. UndefElts.setBit(i);
  944. }
  945. } else {
  946. if (UndefElts3[MaskVal - LHSVWidth]) {
  947. NewUndefElts = true;
  948. UndefElts.setBit(i);
  949. }
  950. }
  951. }
  952. if (NewUndefElts) {
  953. // Add additional discovered undefs.
  954. SmallVector<Constant*, 16> Elts;
  955. for (unsigned i = 0; i < VWidth; ++i) {
  956. if (UndefElts[i])
  957. Elts.push_back(UndefValue::get(Type::getInt32Ty(I->getContext())));
  958. else
  959. Elts.push_back(ConstantInt::get(Type::getInt32Ty(I->getContext()),
  960. Shuffle->getMaskValue(i)));
  961. }
  962. I->setOperand(2, ConstantVector::get(Elts));
  963. MadeChange = true;
  964. }
  965. break;
  966. }
  967. case Instruction::Select: {
  968. APInt LeftDemanded(DemandedElts), RightDemanded(DemandedElts);
  969. if (ConstantVector* CV = dyn_cast<ConstantVector>(I->getOperand(0))) {
  970. for (unsigned i = 0; i < VWidth; i++) {
  971. if (CV->getAggregateElement(i)->isNullValue())
  972. LeftDemanded.clearBit(i);
  973. else
  974. RightDemanded.clearBit(i);
  975. }
  976. }
  977. TmpV = SimplifyDemandedVectorElts(I->getOperand(1), LeftDemanded,
  978. UndefElts, Depth+1);
  979. if (TmpV) { I->setOperand(1, TmpV); MadeChange = true; }
  980. TmpV = SimplifyDemandedVectorElts(I->getOperand(2), RightDemanded,
  981. UndefElts2, Depth+1);
  982. if (TmpV) { I->setOperand(2, TmpV); MadeChange = true; }
  983. // Output elements are undefined if both are undefined.
  984. UndefElts &= UndefElts2;
  985. break;
  986. }
  987. case Instruction::BitCast: {
  988. // Vector->vector casts only.
  989. VectorType *VTy = dyn_cast<VectorType>(I->getOperand(0)->getType());
  990. if (!VTy) break;
  991. unsigned InVWidth = VTy->getNumElements();
  992. APInt InputDemandedElts(InVWidth, 0);
  993. unsigned Ratio;
  994. if (VWidth == InVWidth) {
  995. // If we are converting from <4 x i32> -> <4 x f32>, we demand the same
  996. // elements as are demanded of us.
  997. Ratio = 1;
  998. InputDemandedElts = DemandedElts;
  999. } else if (VWidth > InVWidth) {
  1000. // Untested so far.
  1001. break;
  1002. // If there are more elements in the result than there are in the source,
  1003. // then an input element is live if any of the corresponding output
  1004. // elements are live.
  1005. Ratio = VWidth/InVWidth;
  1006. for (unsigned OutIdx = 0; OutIdx != VWidth; ++OutIdx) {
  1007. if (DemandedElts[OutIdx])
  1008. InputDemandedElts.setBit(OutIdx/Ratio);
  1009. }
  1010. } else {
  1011. // Untested so far.
  1012. break;
  1013. // If there are more elements in the source than there are in the result,
  1014. // then an input element is live if the corresponding output element is
  1015. // live.
  1016. Ratio = InVWidth/VWidth;
  1017. for (unsigned InIdx = 0; InIdx != InVWidth; ++InIdx)
  1018. if (DemandedElts[InIdx/Ratio])
  1019. InputDemandedElts.setBit(InIdx);
  1020. }
  1021. // div/rem demand all inputs, because they don't want divide by zero.
  1022. TmpV = SimplifyDemandedVectorElts(I->getOperand(0), InputDemandedElts,
  1023. UndefElts2, Depth+1);
  1024. if (TmpV) {
  1025. I->setOperand(0, TmpV);
  1026. MadeChange = true;
  1027. }
  1028. UndefElts = UndefElts2;
  1029. if (VWidth > InVWidth) {
  1030. llvm_unreachable("Unimp");
  1031. // If there are more elements in the result than there are in the source,
  1032. // then an output element is undef if the corresponding input element is
  1033. // undef.
  1034. for (unsigned OutIdx = 0; OutIdx != VWidth; ++OutIdx)
  1035. if (UndefElts2[OutIdx/Ratio])
  1036. UndefElts.setBit(OutIdx);
  1037. } else if (VWidth < InVWidth) {
  1038. llvm_unreachable("Unimp");
  1039. // If there are more elements in the source than there are in the result,
  1040. // then a result element is undef if all of the corresponding input
  1041. // elements are undef.
  1042. UndefElts = ~0ULL >> (64-VWidth); // Start out all undef.
  1043. for (unsigned InIdx = 0; InIdx != InVWidth; ++InIdx)
  1044. if (!UndefElts2[InIdx]) // Not undef?
  1045. UndefElts.clearBit(InIdx/Ratio); // Clear undef bit.
  1046. }
  1047. break;
  1048. }
  1049. case Instruction::And:
  1050. case Instruction::Or:
  1051. case Instruction::Xor:
  1052. case Instruction::Add:
  1053. case Instruction::Sub:
  1054. case Instruction::Mul:
  1055. // div/rem demand all inputs, because they don't want divide by zero.
  1056. TmpV = SimplifyDemandedVectorElts(I->getOperand(0), DemandedElts,
  1057. UndefElts, Depth+1);
  1058. if (TmpV) { I->setOperand(0, TmpV); MadeChange = true; }
  1059. TmpV = SimplifyDemandedVectorElts(I->getOperand(1), DemandedElts,
  1060. UndefElts2, Depth+1);
  1061. if (TmpV) { I->setOperand(1, TmpV); MadeChange = true; }
  1062. // Output elements are undefined if both are undefined. Consider things
  1063. // like undef&0. The result is known zero, not undef.
  1064. UndefElts &= UndefElts2;
  1065. break;
  1066. case Instruction::FPTrunc:
  1067. case Instruction::FPExt:
  1068. TmpV = SimplifyDemandedVectorElts(I->getOperand(0), DemandedElts,
  1069. UndefElts, Depth+1);
  1070. if (TmpV) { I->setOperand(0, TmpV); MadeChange = true; }
  1071. break;
  1072. case Instruction::Call: {
  1073. IntrinsicInst *II = dyn_cast<IntrinsicInst>(I);
  1074. if (!II) break;
  1075. switch (II->getIntrinsicID()) {
  1076. default: break;
  1077. // Binary vector operations that work column-wise. A dest element is a
  1078. // function of the corresponding input elements from the two inputs.
  1079. case Intrinsic::x86_sse_sub_ss:
  1080. case Intrinsic::x86_sse_mul_ss:
  1081. case Intrinsic::x86_sse_min_ss:
  1082. case Intrinsic::x86_sse_max_ss:
  1083. case Intrinsic::x86_sse2_sub_sd:
  1084. case Intrinsic::x86_sse2_mul_sd:
  1085. case Intrinsic::x86_sse2_min_sd:
  1086. case Intrinsic::x86_sse2_max_sd:
  1087. TmpV = SimplifyDemandedVectorElts(II->getArgOperand(0), DemandedElts,
  1088. UndefElts, Depth+1);
  1089. if (TmpV) { II->setArgOperand(0, TmpV); MadeChange = true; }
  1090. TmpV = SimplifyDemandedVectorElts(II->getArgOperand(1), DemandedElts,
  1091. UndefElts2, Depth+1);
  1092. if (TmpV) { II->setArgOperand(1, TmpV); MadeChange = true; }
  1093. // If only the low elt is demanded and this is a scalarizable intrinsic,
  1094. // scalarize it now.
  1095. if (DemandedElts == 1) {
  1096. switch (II->getIntrinsicID()) {
  1097. default: break;
  1098. case Intrinsic::x86_sse_sub_ss:
  1099. case Intrinsic::x86_sse_mul_ss:
  1100. case Intrinsic::x86_sse2_sub_sd:
  1101. case Intrinsic::x86_sse2_mul_sd:
  1102. // TODO: Lower MIN/MAX/ABS/etc
  1103. Value *LHS = II->getArgOperand(0);
  1104. Value *RHS = II->getArgOperand(1);
  1105. // Extract the element as scalars.
  1106. LHS = InsertNewInstWith(ExtractElementInst::Create(LHS,
  1107. ConstantInt::get(Type::getInt32Ty(I->getContext()), 0U)), *II);
  1108. RHS = InsertNewInstWith(ExtractElementInst::Create(RHS,
  1109. ConstantInt::get(Type::getInt32Ty(I->getContext()), 0U)), *II);
  1110. switch (II->getIntrinsicID()) {
  1111. default: llvm_unreachable("Case stmts out of sync!");
  1112. case Intrinsic::x86_sse_sub_ss:
  1113. case Intrinsic::x86_sse2_sub_sd:
  1114. TmpV = InsertNewInstWith(BinaryOperator::CreateFSub(LHS, RHS,
  1115. II->getName()), *II);
  1116. break;
  1117. case Intrinsic::x86_sse_mul_ss:
  1118. case Intrinsic::x86_sse2_mul_sd:
  1119. TmpV = InsertNewInstWith(BinaryOperator::CreateFMul(LHS, RHS,
  1120. II->getName()), *II);
  1121. break;
  1122. }
  1123. Instruction *New =
  1124. InsertElementInst::Create(
  1125. UndefValue::get(II->getType()), TmpV,
  1126. ConstantInt::get(Type::getInt32Ty(I->getContext()), 0U, false),
  1127. II->getName());
  1128. InsertNewInstWith(New, *II);
  1129. return New;
  1130. }
  1131. }
  1132. // Output elements are undefined if both are undefined. Consider things
  1133. // like undef&0. The result is known zero, not undef.
  1134. UndefElts &= UndefElts2;
  1135. break;
  1136. }
  1137. break;
  1138. }
  1139. }
  1140. return MadeChange ? I : 0;
  1141. }