PageRenderTime 215ms CodeModel.GetById 29ms RepoModel.GetById 0ms app.codeStats 3ms

/3rd_party/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp

https://code.google.com/p/softart/
C++ | 11051 lines | 7632 code | 1358 blank | 2061 comment | 3191 complexity | 9168f24fe989eb6a6817a65e823725b7 MD5 | raw file
Possible License(s): LGPL-2.1, BSD-3-Clause, JSON, MPL-2.0-no-copyleft-exception, GPL-2.0, GPL-3.0, LGPL-3.0, BSD-2-Clause
  1. //===-- DAGCombiner.cpp - Implement a DAG node combiner -------------------===//
  2. //
  3. // The LLVM Compiler Infrastructure
  4. //
  5. // This file is distributed under the University of Illinois Open Source
  6. // License. See LICENSE.TXT for details.
  7. //
  8. //===----------------------------------------------------------------------===//
  9. //
  10. // This pass combines dag nodes to form fewer, simpler DAG nodes. It can be run
  11. // both before and after the DAG is legalized.
  12. //
  13. // This pass is not a substitute for the LLVM IR instcombine pass. This pass is
  14. // primarily intended to handle simplification opportunities that are implicit
  15. // in the LLVM IR and exposed by the various codegen lowering phases.
  16. //
  17. //===----------------------------------------------------------------------===//
  18. #define DEBUG_TYPE "dagcombine"
  19. #include "llvm/CodeGen/SelectionDAG.h"
  20. #include "llvm/ADT/SmallPtrSet.h"
  21. #include "llvm/ADT/Statistic.h"
  22. #include "llvm/Analysis/AliasAnalysis.h"
  23. #include "llvm/CodeGen/MachineFrameInfo.h"
  24. #include "llvm/CodeGen/MachineFunction.h"
  25. #include "llvm/IR/DataLayout.h"
  26. #include "llvm/IR/DerivedTypes.h"
  27. #include "llvm/IR/Function.h"
  28. #include "llvm/IR/LLVMContext.h"
  29. #include "llvm/Support/CommandLine.h"
  30. #include "llvm/Support/Debug.h"
  31. #include "llvm/Support/ErrorHandling.h"
  32. #include "llvm/Support/MathExtras.h"
  33. #include "llvm/Support/raw_ostream.h"
  34. #include "llvm/Target/TargetLowering.h"
  35. #include "llvm/Target/TargetMachine.h"
  36. #include "llvm/Target/TargetOptions.h"
  37. #include "llvm/Target/TargetRegisterInfo.h"
  38. #include "llvm/Target/TargetSubtargetInfo.h"
  39. #include <algorithm>
  40. using namespace llvm;
  41. STATISTIC(NodesCombined , "Number of dag nodes combined");
  42. STATISTIC(PreIndexedNodes , "Number of pre-indexed nodes created");
  43. STATISTIC(PostIndexedNodes, "Number of post-indexed nodes created");
  44. STATISTIC(OpsNarrowed , "Number of load/op/store narrowed");
  45. STATISTIC(LdStFP2Int , "Number of fp load/store pairs transformed to int");
  46. STATISTIC(SlicedLoads, "Number of load sliced");
  47. namespace {
  48. static cl::opt<bool>
  49. CombinerAA("combiner-alias-analysis", cl::Hidden,
  50. cl::desc("Turn on alias analysis during testing"));
  51. static cl::opt<bool>
  52. CombinerGlobalAA("combiner-global-alias-analysis", cl::Hidden,
  53. cl::desc("Include global information in alias analysis"));
  54. /// Hidden option to stress test load slicing, i.e., when this option
  55. /// is enabled, load slicing bypasses most of its profitability guards.
  56. static cl::opt<bool>
  57. StressLoadSlicing("combiner-stress-load-slicing", cl::Hidden,
  58. cl::desc("Bypass the profitability model of load "
  59. "slicing"),
  60. cl::init(false));
  61. //------------------------------ DAGCombiner ---------------------------------//
  62. class DAGCombiner {
  63. SelectionDAG &DAG;
  64. const TargetLowering &TLI;
  65. CombineLevel Level;
  66. CodeGenOpt::Level OptLevel;
  67. bool LegalOperations;
  68. bool LegalTypes;
  69. bool ForCodeSize;
  70. // Worklist of all of the nodes that need to be simplified.
  71. //
  72. // This has the semantics that when adding to the worklist,
  73. // the item added must be next to be processed. It should
  74. // also only appear once. The naive approach to this takes
  75. // linear time.
  76. //
  77. // To reduce the insert/remove time to logarithmic, we use
  78. // a set and a vector to maintain our worklist.
  79. //
  80. // The set contains the items on the worklist, but does not
  81. // maintain the order they should be visited.
  82. //
  83. // The vector maintains the order nodes should be visited, but may
  84. // contain duplicate or removed nodes. When choosing a node to
  85. // visit, we pop off the order stack until we find an item that is
  86. // also in the contents set. All operations are O(log N).
  87. SmallPtrSet<SDNode*, 64> WorkListContents;
  88. SmallVector<SDNode*, 64> WorkListOrder;
  89. // AA - Used for DAG load/store alias analysis.
  90. AliasAnalysis &AA;
  91. /// AddUsersToWorkList - When an instruction is simplified, add all users of
  92. /// the instruction to the work lists because they might get more simplified
  93. /// now.
  94. ///
  95. void AddUsersToWorkList(SDNode *N) {
  96. for (SDNode::use_iterator UI = N->use_begin(), UE = N->use_end();
  97. UI != UE; ++UI)
  98. AddToWorkList(*UI);
  99. }
  100. /// visit - call the node-specific routine that knows how to fold each
  101. /// particular type of node.
  102. SDValue visit(SDNode *N);
  103. public:
  104. /// AddToWorkList - Add to the work list making sure its instance is at the
  105. /// back (next to be processed.)
  106. void AddToWorkList(SDNode *N) {
  107. WorkListContents.insert(N);
  108. WorkListOrder.push_back(N);
  109. }
  110. /// removeFromWorkList - remove all instances of N from the worklist.
  111. ///
  112. void removeFromWorkList(SDNode *N) {
  113. WorkListContents.erase(N);
  114. }
  115. SDValue CombineTo(SDNode *N, const SDValue *To, unsigned NumTo,
  116. bool AddTo = true);
  117. SDValue CombineTo(SDNode *N, SDValue Res, bool AddTo = true) {
  118. return CombineTo(N, &Res, 1, AddTo);
  119. }
  120. SDValue CombineTo(SDNode *N, SDValue Res0, SDValue Res1,
  121. bool AddTo = true) {
  122. SDValue To[] = { Res0, Res1 };
  123. return CombineTo(N, To, 2, AddTo);
  124. }
  125. void CommitTargetLoweringOpt(const TargetLowering::TargetLoweringOpt &TLO);
  126. private:
  127. /// SimplifyDemandedBits - Check the specified integer node value to see if
  128. /// it can be simplified or if things it uses can be simplified by bit
  129. /// propagation. If so, return true.
  130. bool SimplifyDemandedBits(SDValue Op) {
  131. unsigned BitWidth = Op.getValueType().getScalarType().getSizeInBits();
  132. APInt Demanded = APInt::getAllOnesValue(BitWidth);
  133. return SimplifyDemandedBits(Op, Demanded);
  134. }
  135. bool SimplifyDemandedBits(SDValue Op, const APInt &Demanded);
  136. bool CombineToPreIndexedLoadStore(SDNode *N);
  137. bool CombineToPostIndexedLoadStore(SDNode *N);
  138. bool SliceUpLoad(SDNode *N);
  139. void ReplaceLoadWithPromotedLoad(SDNode *Load, SDNode *ExtLoad);
  140. SDValue PromoteOperand(SDValue Op, EVT PVT, bool &Replace);
  141. SDValue SExtPromoteOperand(SDValue Op, EVT PVT);
  142. SDValue ZExtPromoteOperand(SDValue Op, EVT PVT);
  143. SDValue PromoteIntBinOp(SDValue Op);
  144. SDValue PromoteIntShiftOp(SDValue Op);
  145. SDValue PromoteExtend(SDValue Op);
  146. bool PromoteLoad(SDValue Op);
  147. void ExtendSetCCUses(const SmallVectorImpl<SDNode *> &SetCCs,
  148. SDValue Trunc, SDValue ExtLoad, SDLoc DL,
  149. ISD::NodeType ExtType);
  150. /// combine - call the node-specific routine that knows how to fold each
  151. /// particular type of node. If that doesn't do anything, try the
  152. /// target-specific DAG combines.
  153. SDValue combine(SDNode *N);
  154. // Visitation implementation - Implement dag node combining for different
  155. // node types. The semantics are as follows:
  156. // Return Value:
  157. // SDValue.getNode() == 0 - No change was made
  158. // SDValue.getNode() == N - N was replaced, is dead and has been handled.
  159. // otherwise - N should be replaced by the returned Operand.
  160. //
  161. SDValue visitTokenFactor(SDNode *N);
  162. SDValue visitMERGE_VALUES(SDNode *N);
  163. SDValue visitADD(SDNode *N);
  164. SDValue visitSUB(SDNode *N);
  165. SDValue visitADDC(SDNode *N);
  166. SDValue visitSUBC(SDNode *N);
  167. SDValue visitADDE(SDNode *N);
  168. SDValue visitSUBE(SDNode *N);
  169. SDValue visitMUL(SDNode *N);
  170. SDValue visitSDIV(SDNode *N);
  171. SDValue visitUDIV(SDNode *N);
  172. SDValue visitSREM(SDNode *N);
  173. SDValue visitUREM(SDNode *N);
  174. SDValue visitMULHU(SDNode *N);
  175. SDValue visitMULHS(SDNode *N);
  176. SDValue visitSMUL_LOHI(SDNode *N);
  177. SDValue visitUMUL_LOHI(SDNode *N);
  178. SDValue visitSMULO(SDNode *N);
  179. SDValue visitUMULO(SDNode *N);
  180. SDValue visitSDIVREM(SDNode *N);
  181. SDValue visitUDIVREM(SDNode *N);
  182. SDValue visitAND(SDNode *N);
  183. SDValue visitOR(SDNode *N);
  184. SDValue visitXOR(SDNode *N);
  185. SDValue SimplifyVBinOp(SDNode *N);
  186. SDValue SimplifyVUnaryOp(SDNode *N);
  187. SDValue visitSHL(SDNode *N);
  188. SDValue visitSRA(SDNode *N);
  189. SDValue visitSRL(SDNode *N);
  190. SDValue visitCTLZ(SDNode *N);
  191. SDValue visitCTLZ_ZERO_UNDEF(SDNode *N);
  192. SDValue visitCTTZ(SDNode *N);
  193. SDValue visitCTTZ_ZERO_UNDEF(SDNode *N);
  194. SDValue visitCTPOP(SDNode *N);
  195. SDValue visitSELECT(SDNode *N);
  196. SDValue visitVSELECT(SDNode *N);
  197. SDValue visitSELECT_CC(SDNode *N);
  198. SDValue visitSETCC(SDNode *N);
  199. SDValue visitSIGN_EXTEND(SDNode *N);
  200. SDValue visitZERO_EXTEND(SDNode *N);
  201. SDValue visitANY_EXTEND(SDNode *N);
  202. SDValue visitSIGN_EXTEND_INREG(SDNode *N);
  203. SDValue visitTRUNCATE(SDNode *N);
  204. SDValue visitBITCAST(SDNode *N);
  205. SDValue visitBUILD_PAIR(SDNode *N);
  206. SDValue visitFADD(SDNode *N);
  207. SDValue visitFSUB(SDNode *N);
  208. SDValue visitFMUL(SDNode *N);
  209. SDValue visitFMA(SDNode *N);
  210. SDValue visitFDIV(SDNode *N);
  211. SDValue visitFREM(SDNode *N);
  212. SDValue visitFCOPYSIGN(SDNode *N);
  213. SDValue visitSINT_TO_FP(SDNode *N);
  214. SDValue visitUINT_TO_FP(SDNode *N);
  215. SDValue visitFP_TO_SINT(SDNode *N);
  216. SDValue visitFP_TO_UINT(SDNode *N);
  217. SDValue visitFP_ROUND(SDNode *N);
  218. SDValue visitFP_ROUND_INREG(SDNode *N);
  219. SDValue visitFP_EXTEND(SDNode *N);
  220. SDValue visitFNEG(SDNode *N);
  221. SDValue visitFABS(SDNode *N);
  222. SDValue visitFCEIL(SDNode *N);
  223. SDValue visitFTRUNC(SDNode *N);
  224. SDValue visitFFLOOR(SDNode *N);
  225. SDValue visitBRCOND(SDNode *N);
  226. SDValue visitBR_CC(SDNode *N);
  227. SDValue visitLOAD(SDNode *N);
  228. SDValue visitSTORE(SDNode *N);
  229. SDValue visitINSERT_VECTOR_ELT(SDNode *N);
  230. SDValue visitEXTRACT_VECTOR_ELT(SDNode *N);
  231. SDValue visitBUILD_VECTOR(SDNode *N);
  232. SDValue visitCONCAT_VECTORS(SDNode *N);
  233. SDValue visitEXTRACT_SUBVECTOR(SDNode *N);
  234. SDValue visitVECTOR_SHUFFLE(SDNode *N);
  235. SDValue XformToShuffleWithZero(SDNode *N);
  236. SDValue ReassociateOps(unsigned Opc, SDLoc DL, SDValue LHS, SDValue RHS);
  237. SDValue visitShiftByConstant(SDNode *N, unsigned Amt);
  238. bool SimplifySelectOps(SDNode *SELECT, SDValue LHS, SDValue RHS);
  239. SDValue SimplifyBinOpWithSameOpcodeHands(SDNode *N);
  240. SDValue SimplifySelect(SDLoc DL, SDValue N0, SDValue N1, SDValue N2);
  241. SDValue SimplifySelectCC(SDLoc DL, SDValue N0, SDValue N1, SDValue N2,
  242. SDValue N3, ISD::CondCode CC,
  243. bool NotExtCompare = false);
  244. SDValue SimplifySetCC(EVT VT, SDValue N0, SDValue N1, ISD::CondCode Cond,
  245. SDLoc DL, bool foldBooleans = true);
  246. SDValue SimplifyNodeWithTwoResults(SDNode *N, unsigned LoOp,
  247. unsigned HiOp);
  248. SDValue CombineConsecutiveLoads(SDNode *N, EVT VT);
  249. SDValue ConstantFoldBITCASTofBUILD_VECTOR(SDNode *, EVT);
  250. SDValue BuildSDIV(SDNode *N);
  251. SDValue BuildUDIV(SDNode *N);
  252. SDValue MatchBSwapHWordLow(SDNode *N, SDValue N0, SDValue N1,
  253. bool DemandHighBits = true);
  254. SDValue MatchBSwapHWord(SDNode *N, SDValue N0, SDValue N1);
  255. SDNode *MatchRotate(SDValue LHS, SDValue RHS, SDLoc DL);
  256. SDValue ReduceLoadWidth(SDNode *N);
  257. SDValue ReduceLoadOpStoreWidth(SDNode *N);
  258. SDValue TransformFPLoadStorePair(SDNode *N);
  259. SDValue reduceBuildVecExtToExtBuildVec(SDNode *N);
  260. SDValue reduceBuildVecConvertToConvertBuildVec(SDNode *N);
  261. SDValue GetDemandedBits(SDValue V, const APInt &Mask);
  262. /// GatherAllAliases - Walk up chain skipping non-aliasing memory nodes,
  263. /// looking for aliasing nodes and adding them to the Aliases vector.
  264. void GatherAllAliases(SDNode *N, SDValue OriginalChain,
  265. SmallVectorImpl<SDValue> &Aliases);
  266. /// isAlias - Return true if there is any possibility that the two addresses
  267. /// overlap.
  268. bool isAlias(SDValue Ptr1, int64_t Size1, bool IsVolatile1,
  269. const Value *SrcValue1, int SrcValueOffset1,
  270. unsigned SrcValueAlign1,
  271. const MDNode *TBAAInfo1,
  272. SDValue Ptr2, int64_t Size2, bool IsVolatile2,
  273. const Value *SrcValue2, int SrcValueOffset2,
  274. unsigned SrcValueAlign2,
  275. const MDNode *TBAAInfo2) const;
  276. /// isAlias - Return true if there is any possibility that the two addresses
  277. /// overlap.
  278. bool isAlias(LSBaseSDNode *Op0, LSBaseSDNode *Op1);
  279. /// FindAliasInfo - Extracts the relevant alias information from the memory
  280. /// node. Returns true if the operand was a load.
  281. bool FindAliasInfo(SDNode *N,
  282. SDValue &Ptr, int64_t &Size, bool &IsVolatile,
  283. const Value *&SrcValue, int &SrcValueOffset,
  284. unsigned &SrcValueAlignment,
  285. const MDNode *&TBAAInfo) const;
  286. /// FindBetterChain - Walk up chain skipping non-aliasing memory nodes,
  287. /// looking for a better chain (aliasing node.)
  288. SDValue FindBetterChain(SDNode *N, SDValue Chain);
  289. /// Merge consecutive store operations into a wide store.
  290. /// This optimization uses wide integers or vectors when possible.
  291. /// \return True if some memory operations were changed.
  292. bool MergeConsecutiveStores(StoreSDNode *N);
  293. public:
  294. DAGCombiner(SelectionDAG &D, AliasAnalysis &A, CodeGenOpt::Level OL)
  295. : DAG(D), TLI(D.getTargetLoweringInfo()), Level(BeforeLegalizeTypes),
  296. OptLevel(OL), LegalOperations(false), LegalTypes(false), AA(A) {
  297. AttributeSet FnAttrs =
  298. DAG.getMachineFunction().getFunction()->getAttributes();
  299. ForCodeSize =
  300. FnAttrs.hasAttribute(AttributeSet::FunctionIndex,
  301. Attribute::OptimizeForSize) ||
  302. FnAttrs.hasAttribute(AttributeSet::FunctionIndex, Attribute::MinSize);
  303. }
  304. /// Run - runs the dag combiner on all nodes in the work list
  305. void Run(CombineLevel AtLevel);
  306. SelectionDAG &getDAG() const { return DAG; }
  307. /// getShiftAmountTy - Returns a type large enough to hold any valid
  308. /// shift amount - before type legalization these can be huge.
  309. EVT getShiftAmountTy(EVT LHSTy) {
  310. assert(LHSTy.isInteger() && "Shift amount is not an integer type!");
  311. if (LHSTy.isVector())
  312. return LHSTy;
  313. return LegalTypes ? TLI.getScalarShiftAmountTy(LHSTy)
  314. : TLI.getPointerTy();
  315. }
  316. /// isTypeLegal - This method returns true if we are running before type
  317. /// legalization or if the specified VT is legal.
  318. bool isTypeLegal(const EVT &VT) {
  319. if (!LegalTypes) return true;
  320. return TLI.isTypeLegal(VT);
  321. }
  322. /// getSetCCResultType - Convenience wrapper around
  323. /// TargetLowering::getSetCCResultType
  324. EVT getSetCCResultType(EVT VT) const {
  325. return TLI.getSetCCResultType(*DAG.getContext(), VT);
  326. }
  327. };
  328. }
  329. namespace {
  330. /// WorkListRemover - This class is a DAGUpdateListener that removes any deleted
  331. /// nodes from the worklist.
  332. class WorkListRemover : public SelectionDAG::DAGUpdateListener {
  333. DAGCombiner &DC;
  334. public:
  335. explicit WorkListRemover(DAGCombiner &dc)
  336. : SelectionDAG::DAGUpdateListener(dc.getDAG()), DC(dc) {}
  337. virtual void NodeDeleted(SDNode *N, SDNode *E) {
  338. DC.removeFromWorkList(N);
  339. }
  340. };
  341. }
  342. //===----------------------------------------------------------------------===//
  343. // TargetLowering::DAGCombinerInfo implementation
  344. //===----------------------------------------------------------------------===//
  345. void TargetLowering::DAGCombinerInfo::AddToWorklist(SDNode *N) {
  346. ((DAGCombiner*)DC)->AddToWorkList(N);
  347. }
  348. void TargetLowering::DAGCombinerInfo::RemoveFromWorklist(SDNode *N) {
  349. ((DAGCombiner*)DC)->removeFromWorkList(N);
  350. }
  351. SDValue TargetLowering::DAGCombinerInfo::
  352. CombineTo(SDNode *N, const std::vector<SDValue> &To, bool AddTo) {
  353. return ((DAGCombiner*)DC)->CombineTo(N, &To[0], To.size(), AddTo);
  354. }
  355. SDValue TargetLowering::DAGCombinerInfo::
  356. CombineTo(SDNode *N, SDValue Res, bool AddTo) {
  357. return ((DAGCombiner*)DC)->CombineTo(N, Res, AddTo);
  358. }
  359. SDValue TargetLowering::DAGCombinerInfo::
  360. CombineTo(SDNode *N, SDValue Res0, SDValue Res1, bool AddTo) {
  361. return ((DAGCombiner*)DC)->CombineTo(N, Res0, Res1, AddTo);
  362. }
  363. void TargetLowering::DAGCombinerInfo::
  364. CommitTargetLoweringOpt(const TargetLowering::TargetLoweringOpt &TLO) {
  365. return ((DAGCombiner*)DC)->CommitTargetLoweringOpt(TLO);
  366. }
  367. //===----------------------------------------------------------------------===//
  368. // Helper Functions
  369. //===----------------------------------------------------------------------===//
  370. /// isNegatibleForFree - Return 1 if we can compute the negated form of the
  371. /// specified expression for the same cost as the expression itself, or 2 if we
  372. /// can compute the negated form more cheaply than the expression itself.
  373. static char isNegatibleForFree(SDValue Op, bool LegalOperations,
  374. const TargetLowering &TLI,
  375. const TargetOptions *Options,
  376. unsigned Depth = 0) {
  377. // fneg is removable even if it has multiple uses.
  378. if (Op.getOpcode() == ISD::FNEG) return 2;
  379. // Don't allow anything with multiple uses.
  380. if (!Op.hasOneUse()) return 0;
  381. // Don't recurse exponentially.
  382. if (Depth > 6) return 0;
  383. switch (Op.getOpcode()) {
  384. default: return false;
  385. case ISD::ConstantFP:
  386. // Don't invert constant FP values after legalize. The negated constant
  387. // isn't necessarily legal.
  388. return LegalOperations ? 0 : 1;
  389. case ISD::FADD:
  390. // FIXME: determine better conditions for this xform.
  391. if (!Options->UnsafeFPMath) return 0;
  392. // After operation legalization, it might not be legal to create new FSUBs.
  393. if (LegalOperations &&
  394. !TLI.isOperationLegalOrCustom(ISD::FSUB, Op.getValueType()))
  395. return 0;
  396. // fold (fneg (fadd A, B)) -> (fsub (fneg A), B)
  397. if (char V = isNegatibleForFree(Op.getOperand(0), LegalOperations, TLI,
  398. Options, Depth + 1))
  399. return V;
  400. // fold (fneg (fadd A, B)) -> (fsub (fneg B), A)
  401. return isNegatibleForFree(Op.getOperand(1), LegalOperations, TLI, Options,
  402. Depth + 1);
  403. case ISD::FSUB:
  404. // We can't turn -(A-B) into B-A when we honor signed zeros.
  405. if (!Options->UnsafeFPMath) return 0;
  406. // fold (fneg (fsub A, B)) -> (fsub B, A)
  407. return 1;
  408. case ISD::FMUL:
  409. case ISD::FDIV:
  410. if (Options->HonorSignDependentRoundingFPMath()) return 0;
  411. // fold (fneg (fmul X, Y)) -> (fmul (fneg X), Y) or (fmul X, (fneg Y))
  412. if (char V = isNegatibleForFree(Op.getOperand(0), LegalOperations, TLI,
  413. Options, Depth + 1))
  414. return V;
  415. return isNegatibleForFree(Op.getOperand(1), LegalOperations, TLI, Options,
  416. Depth + 1);
  417. case ISD::FP_EXTEND:
  418. case ISD::FP_ROUND:
  419. case ISD::FSIN:
  420. return isNegatibleForFree(Op.getOperand(0), LegalOperations, TLI, Options,
  421. Depth + 1);
  422. }
  423. }
  424. /// GetNegatedExpression - If isNegatibleForFree returns true, this function
  425. /// returns the newly negated expression.
  426. static SDValue GetNegatedExpression(SDValue Op, SelectionDAG &DAG,
  427. bool LegalOperations, unsigned Depth = 0) {
  428. // fneg is removable even if it has multiple uses.
  429. if (Op.getOpcode() == ISD::FNEG) return Op.getOperand(0);
  430. // Don't allow anything with multiple uses.
  431. assert(Op.hasOneUse() && "Unknown reuse!");
  432. assert(Depth <= 6 && "GetNegatedExpression doesn't match isNegatibleForFree");
  433. switch (Op.getOpcode()) {
  434. default: llvm_unreachable("Unknown code");
  435. case ISD::ConstantFP: {
  436. APFloat V = cast<ConstantFPSDNode>(Op)->getValueAPF();
  437. V.changeSign();
  438. return DAG.getConstantFP(V, Op.getValueType());
  439. }
  440. case ISD::FADD:
  441. // FIXME: determine better conditions for this xform.
  442. assert(DAG.getTarget().Options.UnsafeFPMath);
  443. // fold (fneg (fadd A, B)) -> (fsub (fneg A), B)
  444. if (isNegatibleForFree(Op.getOperand(0), LegalOperations,
  445. DAG.getTargetLoweringInfo(),
  446. &DAG.getTarget().Options, Depth+1))
  447. return DAG.getNode(ISD::FSUB, SDLoc(Op), Op.getValueType(),
  448. GetNegatedExpression(Op.getOperand(0), DAG,
  449. LegalOperations, Depth+1),
  450. Op.getOperand(1));
  451. // fold (fneg (fadd A, B)) -> (fsub (fneg B), A)
  452. return DAG.getNode(ISD::FSUB, SDLoc(Op), Op.getValueType(),
  453. GetNegatedExpression(Op.getOperand(1), DAG,
  454. LegalOperations, Depth+1),
  455. Op.getOperand(0));
  456. case ISD::FSUB:
  457. // We can't turn -(A-B) into B-A when we honor signed zeros.
  458. assert(DAG.getTarget().Options.UnsafeFPMath);
  459. // fold (fneg (fsub 0, B)) -> B
  460. if (ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(Op.getOperand(0)))
  461. if (N0CFP->getValueAPF().isZero())
  462. return Op.getOperand(1);
  463. // fold (fneg (fsub A, B)) -> (fsub B, A)
  464. return DAG.getNode(ISD::FSUB, SDLoc(Op), Op.getValueType(),
  465. Op.getOperand(1), Op.getOperand(0));
  466. case ISD::FMUL:
  467. case ISD::FDIV:
  468. assert(!DAG.getTarget().Options.HonorSignDependentRoundingFPMath());
  469. // fold (fneg (fmul X, Y)) -> (fmul (fneg X), Y)
  470. if (isNegatibleForFree(Op.getOperand(0), LegalOperations,
  471. DAG.getTargetLoweringInfo(),
  472. &DAG.getTarget().Options, Depth+1))
  473. return DAG.getNode(Op.getOpcode(), SDLoc(Op), Op.getValueType(),
  474. GetNegatedExpression(Op.getOperand(0), DAG,
  475. LegalOperations, Depth+1),
  476. Op.getOperand(1));
  477. // fold (fneg (fmul X, Y)) -> (fmul X, (fneg Y))
  478. return DAG.getNode(Op.getOpcode(), SDLoc(Op), Op.getValueType(),
  479. Op.getOperand(0),
  480. GetNegatedExpression(Op.getOperand(1), DAG,
  481. LegalOperations, Depth+1));
  482. case ISD::FP_EXTEND:
  483. case ISD::FSIN:
  484. return DAG.getNode(Op.getOpcode(), SDLoc(Op), Op.getValueType(),
  485. GetNegatedExpression(Op.getOperand(0), DAG,
  486. LegalOperations, Depth+1));
  487. case ISD::FP_ROUND:
  488. return DAG.getNode(ISD::FP_ROUND, SDLoc(Op), Op.getValueType(),
  489. GetNegatedExpression(Op.getOperand(0), DAG,
  490. LegalOperations, Depth+1),
  491. Op.getOperand(1));
  492. }
  493. }
  494. // isSetCCEquivalent - Return true if this node is a setcc, or is a select_cc
  495. // that selects between the values 1 and 0, making it equivalent to a setcc.
  496. // Also, set the incoming LHS, RHS, and CC references to the appropriate
  497. // nodes based on the type of node we are checking. This simplifies life a
  498. // bit for the callers.
  499. static bool isSetCCEquivalent(SDValue N, SDValue &LHS, SDValue &RHS,
  500. SDValue &CC) {
  501. if (N.getOpcode() == ISD::SETCC) {
  502. LHS = N.getOperand(0);
  503. RHS = N.getOperand(1);
  504. CC = N.getOperand(2);
  505. return true;
  506. }
  507. if (N.getOpcode() == ISD::SELECT_CC &&
  508. N.getOperand(2).getOpcode() == ISD::Constant &&
  509. N.getOperand(3).getOpcode() == ISD::Constant &&
  510. cast<ConstantSDNode>(N.getOperand(2))->getAPIntValue() == 1 &&
  511. cast<ConstantSDNode>(N.getOperand(3))->isNullValue()) {
  512. LHS = N.getOperand(0);
  513. RHS = N.getOperand(1);
  514. CC = N.getOperand(4);
  515. return true;
  516. }
  517. return false;
  518. }
  519. // isOneUseSetCC - Return true if this is a SetCC-equivalent operation with only
  520. // one use. If this is true, it allows the users to invert the operation for
  521. // free when it is profitable to do so.
  522. static bool isOneUseSetCC(SDValue N) {
  523. SDValue N0, N1, N2;
  524. if (isSetCCEquivalent(N, N0, N1, N2) && N.getNode()->hasOneUse())
  525. return true;
  526. return false;
  527. }
  528. SDValue DAGCombiner::ReassociateOps(unsigned Opc, SDLoc DL,
  529. SDValue N0, SDValue N1) {
  530. EVT VT = N0.getValueType();
  531. if (N0.getOpcode() == Opc && isa<ConstantSDNode>(N0.getOperand(1))) {
  532. if (isa<ConstantSDNode>(N1)) {
  533. // reassoc. (op (op x, c1), c2) -> (op x, (op c1, c2))
  534. SDValue OpNode =
  535. DAG.FoldConstantArithmetic(Opc, VT,
  536. cast<ConstantSDNode>(N0.getOperand(1)),
  537. cast<ConstantSDNode>(N1));
  538. return DAG.getNode(Opc, DL, VT, N0.getOperand(0), OpNode);
  539. }
  540. if (N0.hasOneUse()) {
  541. // reassoc. (op (op x, c1), y) -> (op (op x, y), c1) iff x+c1 has one use
  542. SDValue OpNode = DAG.getNode(Opc, SDLoc(N0), VT,
  543. N0.getOperand(0), N1);
  544. AddToWorkList(OpNode.getNode());
  545. return DAG.getNode(Opc, DL, VT, OpNode, N0.getOperand(1));
  546. }
  547. }
  548. if (N1.getOpcode() == Opc && isa<ConstantSDNode>(N1.getOperand(1))) {
  549. if (isa<ConstantSDNode>(N0)) {
  550. // reassoc. (op c2, (op x, c1)) -> (op x, (op c1, c2))
  551. SDValue OpNode =
  552. DAG.FoldConstantArithmetic(Opc, VT,
  553. cast<ConstantSDNode>(N1.getOperand(1)),
  554. cast<ConstantSDNode>(N0));
  555. return DAG.getNode(Opc, DL, VT, N1.getOperand(0), OpNode);
  556. }
  557. if (N1.hasOneUse()) {
  558. // reassoc. (op y, (op x, c1)) -> (op (op x, y), c1) iff x+c1 has one use
  559. SDValue OpNode = DAG.getNode(Opc, SDLoc(N0), VT,
  560. N1.getOperand(0), N0);
  561. AddToWorkList(OpNode.getNode());
  562. return DAG.getNode(Opc, DL, VT, OpNode, N1.getOperand(1));
  563. }
  564. }
  565. return SDValue();
  566. }
  567. SDValue DAGCombiner::CombineTo(SDNode *N, const SDValue *To, unsigned NumTo,
  568. bool AddTo) {
  569. assert(N->getNumValues() == NumTo && "Broken CombineTo call!");
  570. ++NodesCombined;
  571. DEBUG(dbgs() << "\nReplacing.1 ";
  572. N->dump(&DAG);
  573. dbgs() << "\nWith: ";
  574. To[0].getNode()->dump(&DAG);
  575. dbgs() << " and " << NumTo-1 << " other values\n";
  576. for (unsigned i = 0, e = NumTo; i != e; ++i)
  577. assert((!To[i].getNode() ||
  578. N->getValueType(i) == To[i].getValueType()) &&
  579. "Cannot combine value to value of different type!"));
  580. WorkListRemover DeadNodes(*this);
  581. DAG.ReplaceAllUsesWith(N, To);
  582. if (AddTo) {
  583. // Push the new nodes and any users onto the worklist
  584. for (unsigned i = 0, e = NumTo; i != e; ++i) {
  585. if (To[i].getNode()) {
  586. AddToWorkList(To[i].getNode());
  587. AddUsersToWorkList(To[i].getNode());
  588. }
  589. }
  590. }
  591. // Finally, if the node is now dead, remove it from the graph. The node
  592. // may not be dead if the replacement process recursively simplified to
  593. // something else needing this node.
  594. if (N->use_empty()) {
  595. // Nodes can be reintroduced into the worklist. Make sure we do not
  596. // process a node that has been replaced.
  597. removeFromWorkList(N);
  598. // Finally, since the node is now dead, remove it from the graph.
  599. DAG.DeleteNode(N);
  600. }
  601. return SDValue(N, 0);
  602. }
  603. void DAGCombiner::
  604. CommitTargetLoweringOpt(const TargetLowering::TargetLoweringOpt &TLO) {
  605. // Replace all uses. If any nodes become isomorphic to other nodes and
  606. // are deleted, make sure to remove them from our worklist.
  607. WorkListRemover DeadNodes(*this);
  608. DAG.ReplaceAllUsesOfValueWith(TLO.Old, TLO.New);
  609. // Push the new node and any (possibly new) users onto the worklist.
  610. AddToWorkList(TLO.New.getNode());
  611. AddUsersToWorkList(TLO.New.getNode());
  612. // Finally, if the node is now dead, remove it from the graph. The node
  613. // may not be dead if the replacement process recursively simplified to
  614. // something else needing this node.
  615. if (TLO.Old.getNode()->use_empty()) {
  616. removeFromWorkList(TLO.Old.getNode());
  617. // If the operands of this node are only used by the node, they will now
  618. // be dead. Make sure to visit them first to delete dead nodes early.
  619. for (unsigned i = 0, e = TLO.Old.getNode()->getNumOperands(); i != e; ++i)
  620. if (TLO.Old.getNode()->getOperand(i).getNode()->hasOneUse())
  621. AddToWorkList(TLO.Old.getNode()->getOperand(i).getNode());
  622. DAG.DeleteNode(TLO.Old.getNode());
  623. }
  624. }
  625. /// SimplifyDemandedBits - Check the specified integer node value to see if
  626. /// it can be simplified or if things it uses can be simplified by bit
  627. /// propagation. If so, return true.
  628. bool DAGCombiner::SimplifyDemandedBits(SDValue Op, const APInt &Demanded) {
  629. TargetLowering::TargetLoweringOpt TLO(DAG, LegalTypes, LegalOperations);
  630. APInt KnownZero, KnownOne;
  631. if (!TLI.SimplifyDemandedBits(Op, Demanded, KnownZero, KnownOne, TLO))
  632. return false;
  633. // Revisit the node.
  634. AddToWorkList(Op.getNode());
  635. // Replace the old value with the new one.
  636. ++NodesCombined;
  637. DEBUG(dbgs() << "\nReplacing.2 ";
  638. TLO.Old.getNode()->dump(&DAG);
  639. dbgs() << "\nWith: ";
  640. TLO.New.getNode()->dump(&DAG);
  641. dbgs() << '\n');
  642. CommitTargetLoweringOpt(TLO);
  643. return true;
  644. }
  645. void DAGCombiner::ReplaceLoadWithPromotedLoad(SDNode *Load, SDNode *ExtLoad) {
  646. SDLoc dl(Load);
  647. EVT VT = Load->getValueType(0);
  648. SDValue Trunc = DAG.getNode(ISD::TRUNCATE, dl, VT, SDValue(ExtLoad, 0));
  649. DEBUG(dbgs() << "\nReplacing.9 ";
  650. Load->dump(&DAG);
  651. dbgs() << "\nWith: ";
  652. Trunc.getNode()->dump(&DAG);
  653. dbgs() << '\n');
  654. WorkListRemover DeadNodes(*this);
  655. DAG.ReplaceAllUsesOfValueWith(SDValue(Load, 0), Trunc);
  656. DAG.ReplaceAllUsesOfValueWith(SDValue(Load, 1), SDValue(ExtLoad, 1));
  657. removeFromWorkList(Load);
  658. DAG.DeleteNode(Load);
  659. AddToWorkList(Trunc.getNode());
  660. }
  661. SDValue DAGCombiner::PromoteOperand(SDValue Op, EVT PVT, bool &Replace) {
  662. Replace = false;
  663. SDLoc dl(Op);
  664. if (LoadSDNode *LD = dyn_cast<LoadSDNode>(Op)) {
  665. EVT MemVT = LD->getMemoryVT();
  666. ISD::LoadExtType ExtType = ISD::isNON_EXTLoad(LD)
  667. ? (TLI.isLoadExtLegal(ISD::ZEXTLOAD, MemVT) ? ISD::ZEXTLOAD
  668. : ISD::EXTLOAD)
  669. : LD->getExtensionType();
  670. Replace = true;
  671. return DAG.getExtLoad(ExtType, dl, PVT,
  672. LD->getChain(), LD->getBasePtr(),
  673. MemVT, LD->getMemOperand());
  674. }
  675. unsigned Opc = Op.getOpcode();
  676. switch (Opc) {
  677. default: break;
  678. case ISD::AssertSext:
  679. return DAG.getNode(ISD::AssertSext, dl, PVT,
  680. SExtPromoteOperand(Op.getOperand(0), PVT),
  681. Op.getOperand(1));
  682. case ISD::AssertZext:
  683. return DAG.getNode(ISD::AssertZext, dl, PVT,
  684. ZExtPromoteOperand(Op.getOperand(0), PVT),
  685. Op.getOperand(1));
  686. case ISD::Constant: {
  687. unsigned ExtOpc =
  688. Op.getValueType().isByteSized() ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
  689. return DAG.getNode(ExtOpc, dl, PVT, Op);
  690. }
  691. }
  692. if (!TLI.isOperationLegal(ISD::ANY_EXTEND, PVT))
  693. return SDValue();
  694. return DAG.getNode(ISD::ANY_EXTEND, dl, PVT, Op);
  695. }
  696. SDValue DAGCombiner::SExtPromoteOperand(SDValue Op, EVT PVT) {
  697. if (!TLI.isOperationLegal(ISD::SIGN_EXTEND_INREG, PVT))
  698. return SDValue();
  699. EVT OldVT = Op.getValueType();
  700. SDLoc dl(Op);
  701. bool Replace = false;
  702. SDValue NewOp = PromoteOperand(Op, PVT, Replace);
  703. if (NewOp.getNode() == 0)
  704. return SDValue();
  705. AddToWorkList(NewOp.getNode());
  706. if (Replace)
  707. ReplaceLoadWithPromotedLoad(Op.getNode(), NewOp.getNode());
  708. return DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, NewOp.getValueType(), NewOp,
  709. DAG.getValueType(OldVT));
  710. }
  711. SDValue DAGCombiner::ZExtPromoteOperand(SDValue Op, EVT PVT) {
  712. EVT OldVT = Op.getValueType();
  713. SDLoc dl(Op);
  714. bool Replace = false;
  715. SDValue NewOp = PromoteOperand(Op, PVT, Replace);
  716. if (NewOp.getNode() == 0)
  717. return SDValue();
  718. AddToWorkList(NewOp.getNode());
  719. if (Replace)
  720. ReplaceLoadWithPromotedLoad(Op.getNode(), NewOp.getNode());
  721. return DAG.getZeroExtendInReg(NewOp, dl, OldVT);
  722. }
  723. /// PromoteIntBinOp - Promote the specified integer binary operation if the
  724. /// target indicates it is beneficial. e.g. On x86, it's usually better to
  725. /// promote i16 operations to i32 since i16 instructions are longer.
  726. SDValue DAGCombiner::PromoteIntBinOp(SDValue Op) {
  727. if (!LegalOperations)
  728. return SDValue();
  729. EVT VT = Op.getValueType();
  730. if (VT.isVector() || !VT.isInteger())
  731. return SDValue();
  732. // If operation type is 'undesirable', e.g. i16 on x86, consider
  733. // promoting it.
  734. unsigned Opc = Op.getOpcode();
  735. if (TLI.isTypeDesirableForOp(Opc, VT))
  736. return SDValue();
  737. EVT PVT = VT;
  738. // Consult target whether it is a good idea to promote this operation and
  739. // what's the right type to promote it to.
  740. if (TLI.IsDesirableToPromoteOp(Op, PVT)) {
  741. assert(PVT != VT && "Don't know what type to promote to!");
  742. bool Replace0 = false;
  743. SDValue N0 = Op.getOperand(0);
  744. SDValue NN0 = PromoteOperand(N0, PVT, Replace0);
  745. if (NN0.getNode() == 0)
  746. return SDValue();
  747. bool Replace1 = false;
  748. SDValue N1 = Op.getOperand(1);
  749. SDValue NN1;
  750. if (N0 == N1)
  751. NN1 = NN0;
  752. else {
  753. NN1 = PromoteOperand(N1, PVT, Replace1);
  754. if (NN1.getNode() == 0)
  755. return SDValue();
  756. }
  757. AddToWorkList(NN0.getNode());
  758. if (NN1.getNode())
  759. AddToWorkList(NN1.getNode());
  760. if (Replace0)
  761. ReplaceLoadWithPromotedLoad(N0.getNode(), NN0.getNode());
  762. if (Replace1)
  763. ReplaceLoadWithPromotedLoad(N1.getNode(), NN1.getNode());
  764. DEBUG(dbgs() << "\nPromoting ";
  765. Op.getNode()->dump(&DAG));
  766. SDLoc dl(Op);
  767. return DAG.getNode(ISD::TRUNCATE, dl, VT,
  768. DAG.getNode(Opc, dl, PVT, NN0, NN1));
  769. }
  770. return SDValue();
  771. }
  772. /// PromoteIntShiftOp - Promote the specified integer shift operation if the
  773. /// target indicates it is beneficial. e.g. On x86, it's usually better to
  774. /// promote i16 operations to i32 since i16 instructions are longer.
  775. SDValue DAGCombiner::PromoteIntShiftOp(SDValue Op) {
  776. if (!LegalOperations)
  777. return SDValue();
  778. EVT VT = Op.getValueType();
  779. if (VT.isVector() || !VT.isInteger())
  780. return SDValue();
  781. // If operation type is 'undesirable', e.g. i16 on x86, consider
  782. // promoting it.
  783. unsigned Opc = Op.getOpcode();
  784. if (TLI.isTypeDesirableForOp(Opc, VT))
  785. return SDValue();
  786. EVT PVT = VT;
  787. // Consult target whether it is a good idea to promote this operation and
  788. // what's the right type to promote it to.
  789. if (TLI.IsDesirableToPromoteOp(Op, PVT)) {
  790. assert(PVT != VT && "Don't know what type to promote to!");
  791. bool Replace = false;
  792. SDValue N0 = Op.getOperand(0);
  793. if (Opc == ISD::SRA)
  794. N0 = SExtPromoteOperand(Op.getOperand(0), PVT);
  795. else if (Opc == ISD::SRL)
  796. N0 = ZExtPromoteOperand(Op.getOperand(0), PVT);
  797. else
  798. N0 = PromoteOperand(N0, PVT, Replace);
  799. if (N0.getNode() == 0)
  800. return SDValue();
  801. AddToWorkList(N0.getNode());
  802. if (Replace)
  803. ReplaceLoadWithPromotedLoad(Op.getOperand(0).getNode(), N0.getNode());
  804. DEBUG(dbgs() << "\nPromoting ";
  805. Op.getNode()->dump(&DAG));
  806. SDLoc dl(Op);
  807. return DAG.getNode(ISD::TRUNCATE, dl, VT,
  808. DAG.getNode(Opc, dl, PVT, N0, Op.getOperand(1)));
  809. }
  810. return SDValue();
  811. }
  812. SDValue DAGCombiner::PromoteExtend(SDValue Op) {
  813. if (!LegalOperations)
  814. return SDValue();
  815. EVT VT = Op.getValueType();
  816. if (VT.isVector() || !VT.isInteger())
  817. return SDValue();
  818. // If operation type is 'undesirable', e.g. i16 on x86, consider
  819. // promoting it.
  820. unsigned Opc = Op.getOpcode();
  821. if (TLI.isTypeDesirableForOp(Opc, VT))
  822. return SDValue();
  823. EVT PVT = VT;
  824. // Consult target whether it is a good idea to promote this operation and
  825. // what's the right type to promote it to.
  826. if (TLI.IsDesirableToPromoteOp(Op, PVT)) {
  827. assert(PVT != VT && "Don't know what type to promote to!");
  828. // fold (aext (aext x)) -> (aext x)
  829. // fold (aext (zext x)) -> (zext x)
  830. // fold (aext (sext x)) -> (sext x)
  831. DEBUG(dbgs() << "\nPromoting ";
  832. Op.getNode()->dump(&DAG));
  833. return DAG.getNode(Op.getOpcode(), SDLoc(Op), VT, Op.getOperand(0));
  834. }
  835. return SDValue();
  836. }
  837. bool DAGCombiner::PromoteLoad(SDValue Op) {
  838. if (!LegalOperations)
  839. return false;
  840. EVT VT = Op.getValueType();
  841. if (VT.isVector() || !VT.isInteger())
  842. return false;
  843. // If operation type is 'undesirable', e.g. i16 on x86, consider
  844. // promoting it.
  845. unsigned Opc = Op.getOpcode();
  846. if (TLI.isTypeDesirableForOp(Opc, VT))
  847. return false;
  848. EVT PVT = VT;
  849. // Consult target whether it is a good idea to promote this operation and
  850. // what's the right type to promote it to.
  851. if (TLI.IsDesirableToPromoteOp(Op, PVT)) {
  852. assert(PVT != VT && "Don't know what type to promote to!");
  853. SDLoc dl(Op);
  854. SDNode *N = Op.getNode();
  855. LoadSDNode *LD = cast<LoadSDNode>(N);
  856. EVT MemVT = LD->getMemoryVT();
  857. ISD::LoadExtType ExtType = ISD::isNON_EXTLoad(LD)
  858. ? (TLI.isLoadExtLegal(ISD::ZEXTLOAD, MemVT) ? ISD::ZEXTLOAD
  859. : ISD::EXTLOAD)
  860. : LD->getExtensionType();
  861. SDValue NewLD = DAG.getExtLoad(ExtType, dl, PVT,
  862. LD->getChain(), LD->getBasePtr(),
  863. MemVT, LD->getMemOperand());
  864. SDValue Result = DAG.getNode(ISD::TRUNCATE, dl, VT, NewLD);
  865. DEBUG(dbgs() << "\nPromoting ";
  866. N->dump(&DAG);
  867. dbgs() << "\nTo: ";
  868. Result.getNode()->dump(&DAG);
  869. dbgs() << '\n');
  870. WorkListRemover DeadNodes(*this);
  871. DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result);
  872. DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), NewLD.getValue(1));
  873. removeFromWorkList(N);
  874. DAG.DeleteNode(N);
  875. AddToWorkList(Result.getNode());
  876. return true;
  877. }
  878. return false;
  879. }
  880. //===----------------------------------------------------------------------===//
  881. // Main DAG Combiner implementation
  882. //===----------------------------------------------------------------------===//
  883. void DAGCombiner::Run(CombineLevel AtLevel) {
  884. // set the instance variables, so that the various visit routines may use it.
  885. Level = AtLevel;
  886. LegalOperations = Level >= AfterLegalizeVectorOps;
  887. LegalTypes = Level >= AfterLegalizeTypes;
  888. // Add all the dag nodes to the worklist.
  889. for (SelectionDAG::allnodes_iterator I = DAG.allnodes_begin(),
  890. E = DAG.allnodes_end(); I != E; ++I)
  891. AddToWorkList(I);
  892. // Create a dummy node (which is not added to allnodes), that adds a reference
  893. // to the root node, preventing it from being deleted, and tracking any
  894. // changes of the root.
  895. HandleSDNode Dummy(DAG.getRoot());
  896. // The root of the dag may dangle to deleted nodes until the dag combiner is
  897. // done. Set it to null to avoid confusion.
  898. DAG.setRoot(SDValue());
  899. // while the worklist isn't empty, find a node and
  900. // try and combine it.
  901. while (!WorkListContents.empty()) {
  902. SDNode *N;
  903. // The WorkListOrder holds the SDNodes in order, but it may contain
  904. // duplicates.
  905. // In order to avoid a linear scan, we use a set (O(log N)) to hold what the
  906. // worklist *should* contain, and check the node we want to visit is should
  907. // actually be visited.
  908. do {
  909. N = WorkListOrder.pop_back_val();
  910. } while (!WorkListContents.erase(N));
  911. // If N has no uses, it is dead. Make sure to revisit all N's operands once
  912. // N is deleted from the DAG, since they too may now be dead or may have a
  913. // reduced number of uses, allowing other xforms.
  914. if (N->use_empty() && N != &Dummy) {
  915. for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i)
  916. AddToWorkList(N->getOperand(i).getNode());
  917. DAG.DeleteNode(N);
  918. continue;
  919. }
  920. SDValue RV = combine(N);
  921. if (RV.getNode() == 0)
  922. continue;
  923. ++NodesCombined;
  924. // If we get back the same node we passed in, rather than a new node or
  925. // zero, we know that the node must have defined multiple values and
  926. // CombineTo was used. Since CombineTo takes care of the worklist
  927. // mechanics for us, we have no work to do in this case.
  928. if (RV.getNode() == N)
  929. continue;
  930. assert(N->getOpcode() != ISD::DELETED_NODE &&
  931. RV.getNode()->getOpcode() != ISD::DELETED_NODE &&
  932. "Node was deleted but visit returned new node!");
  933. DEBUG(dbgs() << "\nReplacing.3 ";
  934. N->dump(&DAG);
  935. dbgs() << "\nWith: ";
  936. RV.getNode()->dump(&DAG);
  937. dbgs() << '\n');
  938. // Transfer debug value.
  939. DAG.TransferDbgValues(SDValue(N, 0), RV);
  940. WorkListRemover DeadNodes(*this);
  941. if (N->getNumValues() == RV.getNode()->getNumValues())
  942. DAG.ReplaceAllUsesWith(N, RV.getNode());
  943. else {
  944. assert(N->getValueType(0) == RV.getValueType() &&
  945. N->getNumValues() == 1 && "Type mismatch");
  946. SDValue OpV = RV;
  947. DAG.ReplaceAllUsesWith(N, &OpV);
  948. }
  949. // Push the new node and any users onto the worklist
  950. AddToWorkList(RV.getNode());
  951. AddUsersToWorkList(RV.getNode());
  952. // Add any uses of the old node to the worklist in case this node is the
  953. // last one that uses them. They may become dead after this node is
  954. // deleted.
  955. for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i)
  956. AddToWorkList(N->getOperand(i).getNode());
  957. // Finally, if the node is now dead, remove it from the graph. The node
  958. // may not be dead if the replacement process recursively simplified to
  959. // something else needing this node.
  960. if (N->use_empty()) {
  961. // Nodes can be reintroduced into the worklist. Make sure we do not
  962. // process a node that has been replaced.
  963. removeFromWorkList(N);
  964. // Finally, since the node is now dead, remove it from the graph.
  965. DAG.DeleteNode(N);
  966. }
  967. }
  968. // If the root changed (e.g. it was a dead load, update the root).
  969. DAG.setRoot(Dummy.getValue());
  970. DAG.RemoveDeadNodes();
  971. }
  972. SDValue DAGCombiner::visit(SDNode *N) {
  973. switch (N->getOpcode()) {
  974. default: break;
  975. case ISD::TokenFactor: return visitTokenFactor(N);
  976. case ISD::MERGE_VALUES: return visitMERGE_VALUES(N);
  977. case ISD::ADD: return visitADD(N);
  978. case ISD::SUB: return visitSUB(N);
  979. case ISD::ADDC: return visitADDC(N);
  980. case ISD::SUBC: return visitSUBC(N);
  981. case ISD::ADDE: return visitADDE(N);
  982. case ISD::SUBE: return visitSUBE(N);
  983. case ISD::MUL: return visitMUL(N);
  984. case ISD::SDIV: return visitSDIV(N);
  985. case ISD::UDIV: return visitUDIV(N);
  986. case ISD::SREM: return visitSREM(N);
  987. case ISD::UREM: return visitUREM(N);
  988. case ISD::MULHU: return visitMULHU(N);
  989. case ISD::MULHS: return visitMULHS(N);
  990. case ISD::SMUL_LOHI: return visitSMUL_LOHI(N);
  991. case ISD::UMUL_LOHI: return visitUMUL_LOHI(N);
  992. case ISD::SMULO: return visitSMULO(N);
  993. case ISD::UMULO: return visitUMULO(N);
  994. case ISD::SDIVREM: return visitSDIVREM(N);
  995. case ISD::UDIVREM: return visitUDIVREM(N);
  996. case ISD::AND: return visitAND(N);
  997. case ISD::OR: return visitOR(N);
  998. case ISD::XOR: return visitXOR(N);
  999. case ISD::SHL: return visitSHL(N);
  1000. case ISD::SRA: return visitSRA(N);
  1001. case ISD::SRL: return visitSRL(N);
  1002. case ISD::CTLZ: return visitCTLZ(N);
  1003. case ISD::CTLZ_ZERO_UNDEF: return visitCTLZ_ZERO_UNDEF(N);
  1004. case ISD::CTTZ: return visitCTTZ(N);
  1005. case ISD::CTTZ_ZERO_UNDEF: return visitCTTZ_ZERO_UNDEF(N);
  1006. case ISD::CTPOP: return visitCTPOP(N);
  1007. case ISD::SELECT: return visitSELECT(N);
  1008. case ISD::VSELECT: return visitVSELECT(N);
  1009. case ISD::SELECT_CC: return visitSELECT_CC(N);
  1010. case ISD::SETCC: return visitSETCC(N);
  1011. case ISD::SIGN_EXTEND: return visitSIGN_EXTEND(N);
  1012. case ISD::ZERO_EXTEND: return visitZERO_EXTEND(N);
  1013. case ISD::ANY_EXTEND: return visitANY_EXTEND(N);
  1014. case ISD::SIGN_EXTEND_INREG: return visitSIGN_EXTEND_INREG(N);
  1015. case ISD::TRUNCATE: return visitTRUNCATE(N);
  1016. case ISD::BITCAST: return visitBITCAST(N);
  1017. case ISD::BUILD_PAIR: return visitBUILD_PAIR(N);
  1018. case ISD::FADD: return visitFADD(N);
  1019. case ISD::FSUB: return visitFSUB(N);
  1020. case ISD::FMUL: return visitFMUL(N);
  1021. case ISD::FMA: return visitFMA(N);
  1022. case ISD::FDIV: return visitFDIV(N);
  1023. case ISD::FREM: return visitFREM(N);
  1024. case ISD::FCOPYSIGN: return visitFCOPYSIGN(N);
  1025. case ISD::SINT_TO_FP: return visitSINT_TO_FP(N);
  1026. case ISD::UINT_TO_FP: return visitUINT_TO_FP(N);
  1027. case ISD::FP_TO_SINT: return visitFP_TO_SINT(N);
  1028. case ISD::FP_TO_UINT: return visitFP_TO_UINT(N);
  1029. case ISD::FP_ROUND: return visitFP_ROUND(N);
  1030. case ISD::FP_ROUND_INREG: return visitFP_ROUND_INREG(N);
  1031. case ISD::FP_EXTEND: return visitFP_EXTEND(N);
  1032. case ISD::FNEG: return visitFNEG(N);
  1033. case ISD::FABS: return visitFABS(N);
  1034. case ISD::FFLOOR: return visitFFLOOR(N);
  1035. case ISD::FCEIL: return visitFCEIL(N);
  1036. case ISD::FTRUNC: return visitFTRUNC(N);
  1037. case ISD::BRCOND: return visitBRCOND(N);
  1038. case ISD::BR_CC: return visitBR_CC(N);
  1039. case ISD::LOAD: return visitLOAD(N);
  1040. case ISD::STORE: return visitSTORE(N);
  1041. case ISD::INSERT_VECTOR_ELT: return visitINSERT_VECTOR_ELT(N);
  1042. case ISD::EXTRACT_VECTOR_ELT: return visitEXTRACT_VECTOR_ELT(N);
  1043. case ISD::BUILD_VECTOR: return visitBUILD_VECTOR(N);
  1044. case ISD::CONCAT_VECTORS: return visitCONCAT_VECTORS(N);
  1045. case ISD::EXTRACT_SUBVECTOR: return visitEXTRACT_SUBVECTOR(N);
  1046. case ISD::VECTOR_SHUFFLE: return visitVECTOR_SHUFFLE(N);
  1047. }
  1048. return SDValue();
  1049. }
  1050. SDValue DAGCombiner::combine(SDNode *N) {
  1051. SDValue RV = visit(N);
  1052. // If nothing happened, try a target-specific DAG combine.
  1053. if (RV.getNode() == 0) {
  1054. assert(N->getOpcode() != ISD::DELETED_NODE &&
  1055. "Node was deleted but visit returned NULL!");
  1056. if (N->getOpcode() >= ISD::BUILTIN_OP_END ||
  1057. TLI.hasTargetDAGCombine((ISD::NodeType)N->getOpcode())) {
  1058. // Expose the DAG combiner to the target combiner impls.
  1059. TargetLowering::DAGCombinerInfo
  1060. DagCombineInfo(DAG, Level, false, this);
  1061. RV = TLI.PerformDAGCombine(N, DagCombineInfo);
  1062. }
  1063. }
  1064. // If nothing happened still, try promoting the operation.
  1065. if (RV.getNode() == 0) {
  1066. switch (N->getOpcode()) {
  1067. default: break;
  1068. case ISD::ADD:
  1069. case ISD::SUB:
  1070. case ISD::MUL:
  1071. case ISD::AND:
  1072. case ISD::OR:
  1073. case ISD::XOR:
  1074. RV = PromoteIntBinOp(SDValue(N, 0));
  1075. break;
  1076. case ISD::SHL:
  1077. case ISD::SRA:
  1078. case ISD::SRL:
  1079. RV = PromoteIntShiftOp(SDValue(N, 0));
  1080. break;
  1081. case ISD::SIGN_EXTEND:
  1082. case ISD::ZERO_EXTEND:
  1083. case ISD::ANY_EXTEND:
  1084. RV = PromoteExtend(SDValue(N, 0));
  1085. break;
  1086. case ISD::LOAD:
  1087. if (PromoteLoad(SDValue(N, 0)))
  1088. RV = SDValue(N, 0);
  1089. break;
  1090. }
  1091. }
  1092. // If N is a commutative binary node, try commuting it to enable more
  1093. // sdisel CSE.
  1094. if (RV.getNode() == 0 &&
  1095. SelectionDAG::isCommutativeBinOp(N->getOpcode()) &&
  1096. N->getNumValues() == 1) {
  1097. SDValue N0 = N->getOperand(0);
  1098. SDValue N1 = N->getOperand(1);
  1099. // Constant operands are canonicalized to RHS.
  1100. if (isa<ConstantSDNode>(N0) || !isa<ConstantSDNode>(N1)) {
  1101. SDValue Ops[] = { N1, N0 };
  1102. SDNode *CSENode = DAG.getNodeIfExists(N->getOpcode(), N->getVTList(),
  1103. Ops, 2);
  1104. if (CSENode)
  1105. return SDValue(CSENode, 0);
  1106. }
  1107. }
  1108. return RV;
  1109. }
  1110. /// getInputChainForNode - Given a node, return its input chain if it has one,
  1111. /// otherwise return a null sd operand.
  1112. static SDValue getInputChainForNode(SDNode *N) {
  1113. if (unsigned NumOps = N->getNumOperands()) {
  1114. if (N->getOperand(0).getValueType() == MVT::Other)
  1115. return N->getOperand(0);
  1116. if (N->getOperand(NumOps-1).getValueType() == MVT::Other)
  1117. return N->getOperand(NumOps-1);
  1118. for (unsigned i = 1; i < NumOps-1; ++i)
  1119. if (N->getOperand(i).getValueType() == MVT::Other)
  1120. return N->getOperand(i);
  1121. }
  1122. return SDValue();
  1123. }
  1124. SDValue DAGCombiner::visitTokenFactor(SDNode *N) {
  1125. // If N has two operands, where one has an input chain equal to the other,
  1126. // the 'other' chain is redundant.
  1127. if (N->getNumOperands() == 2) {
  1128. if (getInputChainForNode(N->getOperand(0).getNode()) == N->getOperand(1))
  1129. return N->getOperand(0);
  1130. if (getInputChainForNode(N->getOperand(1).getNode()) == N->getOperand(0))
  1131. return N->getOperand(1);
  1132. }
  1133. SmallVector<SDNode *, 8> TFs; // List of token factors to visit.
  1134. SmallVector<SDValue, 8> Ops; // Ops for replacing token factor.
  1135. SmallPtrSet<SDNode*, 16> SeenOps;
  1136. bool Changed = false; // If we should replace this token factor.
  1137. // Start out with this token factor.
  1138. TFs.push_back(N);
  1139. // Iterate through token factors. The TFs grows when new token factors are
  1140. // encountered.
  1141. for (unsigned i = 0; i < TFs.size(); ++i) {
  1142. SDNode *TF = TFs[i];
  1143. // Check each of the operands.
  1144. for (unsigned i = 0, ie = TF->getNumOperands(); i != ie; ++i) {
  1145. SDValue Op = TF->getOperand(i);
  1146. switch (Op.getOpcode()) {
  1147. case ISD::EntryToken:
  1148. // Entry tokens don't need to be added to the list. They are
  1149. // rededundant.
  1150. Changed = true;
  1151. break;
  1152. case ISD::TokenFactor:
  1153. if (Op.hasOneUse() &&
  1154. std::find(TFs.begin(), TFs.end(), Op.getNode()) == TFs.end()) {
  1155. // Queue up for processing.
  1156. TFs.push_back(Op.getNode());
  1157. // Clean up in case the token factor is removed.
  1158. AddToWorkList(Op.getNode());
  1159. Changed = true;
  1160. break;
  1161. }
  1162. // Fall thru
  1163. default:
  1164. // Only add if it isn't already in the list.
  1165. if (SeenOps.insert(Op.getNode()))
  1166. Ops.push_back(Op);
  1167. else
  1168. Changed = true;
  1169. break;
  1170. }
  1171. }
  1172. }
  1173. SDValue Result;
  1174. // If we've change things around then replace token factor.
  1175. if (Changed) {
  1176. if (Ops.empty()) {
  1177. // The entry token is the only possible outcome.
  1178. Result = DAG.getEntryNode();
  1179. } else {
  1180. // New and improved token factor.
  1181. Result = DAG.getNode(ISD::TokenFactor, SDLoc(N),
  1182. MVT::Other, &Ops[0], Ops.size());
  1183. }
  1184. // Don't add users to work list.
  1185. return CombineTo(N, Result, false);
  1186. }
  1187. return Result;
  1188. }
  1189. /// MERGE_VALUES can always be eliminated.
  1190. SDValue DAGCombiner::visitMERGE_VALUES(SDNode *N) {
  1191. WorkListRemover DeadNodes(*this);
  1192. // Replacing results may cause a different MERGE_VALUES to suddenly
  1193. // be CSE'd with N, and carry its uses with it. Iterate until no
  1194. // uses remain, to ensure that the node can be safely deleted.
  1195. // First add the users of this node to the work list so that they
  1196. // can be tried again once they have new operands.
  1197. AddUsersToWorkList(N);
  1198. do {
  1199. for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i)
  1200. DAG.ReplaceAllUsesOfValueWith(SDValue(N, i), N->getOperand(i));
  1201. } while (!N->use_empty());
  1202. removeFromWorkList(N);
  1203. DAG.DeleteNode(N);
  1204. return SDValue(N, 0); // Return N so it doesn't get rechecked!
  1205. }
  1206. static
  1207. SDValue combineShlAddConstant(SDLoc DL, SDValue N0, SDValue N1,
  1208. SelectionDAG &DAG) {
  1209. EVT VT = N0.getValueType();
  1210. SDValue N00 = N0.getOperand(0);
  1211. SDValue N01 = N0.getOperand(1);
  1212. ConstantSDNode *N01C = dyn_cast<ConstantSDNode>(N01);
  1213. if (N01C && N00.getOpcode() == ISD::ADD && N00.getNode()->hasOneUse() &&
  1214. isa<ConstantSDNode>(N00.getOperand(1))) {
  1215. // fold (add (shl (add x, c1), c2), ) -> (add (add (shl x, c2), c1<<c2), )
  1216. N0 = DAG.getNode(ISD::ADD, SDLoc(N0), VT,
  1217. DAG.getNode(ISD::SHL, SDLoc(N00), VT,
  1218. N00.getOperand(0), N01),
  1219. DAG.getNode(ISD::SHL, SDLoc(N01), VT,
  1220. N00.getOperand(1), N01));
  1221. return DAG.getNode(ISD::ADD, DL, VT, N0, N1);
  1222. }
  1223. return SDValue();
  1224. }
  1225. SDValue DAGCombiner::visitADD(SDNode *N) {
  1226. SDValue N0 = N->getOperand(0);
  1227. SDValue N1 = N->getOperand(1);
  1228. ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
  1229. ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
  1230. EVT VT = N0.getValueType();
  1231. // fold vector ops
  1232. if (VT.isVector()) {
  1233. SDValue FoldedVOp = SimplifyVBinOp(N);
  1234. if (FoldedVOp.getNode()) return FoldedVOp;
  1235. // fold (add x, 0) -> x, vector edition
  1236. if (ISD::isBuildVectorAllZeros(N1.getNode()))
  1237. return N0;
  1238. if (ISD::isBuildVectorAllZeros(N0.getNode()))
  1239. return N1;
  1240. }
  1241. // fold (add x, undef) -> undef
  1242. if (N0.getOpcode() == ISD::UNDEF)
  1243. return N0;
  1244. if (N1.getOpcode() == ISD::UNDEF)
  1245. return N1;
  1246. // fold (add c1, c2) -> c1+c2
  1247. if (N0C && N1C)
  1248. return DAG.FoldConstantArithmetic(ISD::ADD, VT, N0C, N1C);
  1249. // canonicalize constant to RHS
  1250. if (N0C && !N1C)
  1251. return DAG.getNode(ISD::ADD, SDLoc(N), VT, N1, N0);
  1252. // fold (add x, 0) -> x
  1253. if (N1C && N1C->isNullValue())
  1254. return N0;
  1255. // fold (add Sym, c) -> Sym+c
  1256. if (GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(N0))
  1257. if (!LegalOperations && TLI.isOffsetFoldingLegal(GA) && N1C &&
  1258. GA->getOpcode() == ISD::GlobalAddress)
  1259. return DAG.getGlobalAddress(GA->getGlobal(), SDLoc(N1C), VT,
  1260. GA->getOffset() +
  1261. (uint64_t)N1C->getSExtValue());
  1262. // fold ((c1-A)+c2) -> (c1+c2)-A
  1263. if (N1C && N0.getOpcode() == ISD::SUB)
  1264. if (ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0.getOperand(0)))
  1265. return DAG.getNode(ISD::SUB, SDLoc(N), VT,
  1266. DAG.getConstant(N1C->getAPIntValue()+
  1267. N0C->getAPIntValue(), VT),
  1268. N0.getOperand(1));
  1269. // reassociate add
  1270. SDValue RADD = ReassociateOps(ISD::ADD, SDLoc(N), N0, N1);
  1271. if (RADD.getNode() != 0)
  1272. return RADD;
  1273. // fold ((0-A) + B) -> B-A
  1274. if (N0.getOpcode() == ISD::SUB && isa<ConstantSDNode>(N0.getOperand(0)) &&
  1275. cast<ConstantSDNode>(N0.getOperand(0))->isNullValue())
  1276. return DAG.getNode(ISD::SUB, SDLoc(N), VT, N1, N0.getOperand(1));
  1277. // fold (A + (0-B)) -> A-B
  1278. if (N1.getOpcode() == ISD::SUB && isa<ConstantSDNode>(N1.getOperand(0)) &&
  1279. cast<ConstantSDNode>(N1.getOperand(0))->isNullValue())
  1280. return DAG.getNode(ISD::SUB, SDLoc(N), VT, N0, N1.getOperand(1));
  1281. // fold (A+(B-A)) -> B
  1282. if (N1.getOpcode() == ISD::SUB && N0 == N1.getOperand(1))
  1283. return N1.getOperand(0);
  1284. // fold ((B-A)+A) -> B
  1285. if (N0.getOpcode() == ISD::SUB && N1 == N0.getOperand(1))
  1286. return N0.getOperand(0);
  1287. // fold (A+(B-(A+C))) to (B-C)
  1288. if (N1.getOpcode() == ISD::SUB && N1.getOperand(1).getOpcode() == ISD::ADD &&
  1289. N0 == N1.getOperand(1).getOperand(0))
  1290. return DAG.getNode(ISD::SUB, SDLoc(N), VT, N1.getOperand(0),
  1291. N1.getOperand(1).getOperand(1));
  1292. // fold (A+(B-(C+A))) to (B-C)
  1293. if (N1.getOpcode() == ISD::SUB && N1.getOperand(1).getOpcode() == ISD::ADD &&
  1294. N0 == N1.getOperand(1).getOperand(1))
  1295. return DAG.getNode(ISD::SUB, SDLoc(N), VT, N1.getOperand(0),
  1296. N1.getOperand(1).getOperand(0));
  1297. // fold (A+((B-A)+or-C)) to (B+or-C)
  1298. if ((N1.getOpcode() == ISD::SUB || N1.getOpcode() == ISD::ADD) &&
  1299. N1.getOperand(0).getOpcode() == ISD::SUB &&
  1300. N0 == N1.getOperand(0).getOperand(1))
  1301. return DAG.getNode(N1.getOpcode(), SDLoc(N), VT,
  1302. N1.getOperand(0).getOperand(0), N1.getOperand(1));
  1303. // fold (A-B)+(C-D) to (A+C)-(B+D) when A or C is constant
  1304. if (N0.getOpcode() == ISD::SUB && N1.getOpcode() == ISD::SUB) {
  1305. SDValue N00 = N0.getOperand(0);
  1306. SDValue N01 = N0.getOperand(1);
  1307. SDValue N10 = N1.getOperand(0);
  1308. SDValue N11 = N1.getOperand(1);
  1309. if (isa<ConstantSDNode>(N00) || isa<ConstantSDNode>(N10))
  1310. return DAG.getNode(ISD::SUB, SDLoc(N), VT,
  1311. DAG.getNode(ISD::ADD, SDLoc(N0), VT, N00, N10),
  1312. DAG.getNode(ISD::ADD, SDLoc(N1), VT, N01, N11));
  1313. }
  1314. if (!VT.isVector() && SimplifyDemandedBits(SDValue(N, 0)))
  1315. return SDValue(N, 0);
  1316. // fold (a+b) -> (a|b) iff a and b share no bits.
  1317. if (VT.isInteger() && !VT.isVector()) {
  1318. APInt LHSZero, LHSOne;
  1319. APInt RHSZero, RHSOne;
  1320. DAG.ComputeMaskedBits(N0, LHSZero, LHSOne);
  1321. if (LHSZero.getBoolValue()) {
  1322. DAG.ComputeMaskedBits(N1, RHSZero, RHSOne);
  1323. // If all possibly-set bits on the LHS are clear on the RHS, return an OR.
  1324. // If all possibly-set bits on the RHS are clear on the LHS, return an OR.
  1325. if ((RHSZero & ~LHSZero) == ~LHSZero || (LHSZero & ~RHSZero) == ~RHSZero)
  1326. return DAG.getNode(ISD::OR, SDLoc(N), VT, N0, N1);
  1327. }
  1328. }
  1329. // fold (add (shl (add x, c1), c2), ) -> (add (add (shl x, c2), c1<<c2), )
  1330. if (N0.getOpcode() == ISD::SHL && N0.getNode()->hasOneUse()) {
  1331. SDValue Result = combineShlAddConstant(SDLoc(N), N0, N1, DAG);
  1332. if (Result.getNode()) return Result;
  1333. }
  1334. if (N1.getOpcode() == ISD::SHL && N1.getNode()->hasOneUse()) {
  1335. SDValue Result = combineShlAddConstant(SDLoc(N), N1, N0, DAG);
  1336. if (Result.getNode()) return Result;
  1337. }
  1338. // fold (add x, shl(0 - y, n)) -> sub(x, shl(y, n))
  1339. if (N1.getOpcode() == ISD::SHL &&
  1340. N1.getOperand(0).getOpcode() == ISD::SUB)
  1341. if (ConstantSDNode *C =
  1342. dyn_cast<ConstantSDNode>(N1.getOperand(0).getOperand(0)))
  1343. if (C->getAPIntValue() == 0)
  1344. return DAG.getNode(ISD::SUB, SDLoc(N), VT, N0,
  1345. DAG.getNode(ISD::SHL, SDLoc(N), VT,
  1346. N1.getOperand(0).getOperand(1),
  1347. N1.getOperand(1)));
  1348. if (N0.getOpcode() == ISD::SHL &&
  1349. N0.getOperand(0).getOpcode() == ISD::SUB)
  1350. if (ConstantSDNode *C =
  1351. dyn_cast<ConstantSDNode>(N0.getOperand(0).getOperand(0)))
  1352. if (C->getAPIntValue() == 0)
  1353. return DAG.getNode(ISD::SUB, SDLoc(N), VT, N1,
  1354. DAG.getNode(ISD::SHL, SDLoc(N), VT,
  1355. N0.getOperand(0).getOperand(1),
  1356. N0.getOperand(1)));
  1357. if (N1.getOpcode() == ISD::AND) {
  1358. SDValue AndOp0 = N1.getOperand(0);
  1359. ConstantSDNode *AndOp1 = dyn_cast<ConstantSDNode>(N1->getOperand(1));
  1360. unsigned NumSignBits = DAG.ComputeNumSignBits(AndOp0);
  1361. unsigned DestBits = VT.getScalarType().getSizeInBits();
  1362. // (add z, (and (sbbl x, x), 1)) -> (sub z, (sbbl x, x))
  1363. // and similar xforms where the inner op is either ~0 or 0.
  1364. if (NumSignBits == DestBits && AndOp1 && AndOp1->isOne()) {
  1365. SDLoc DL(N);
  1366. return DAG.getNode(ISD::SUB, DL, VT, N->getOperand(0), AndOp0);
  1367. }
  1368. }
  1369. // add (sext i1), X -> sub X, (zext i1)
  1370. if (N0.getOpcode() == ISD::SIGN_EXTEND &&
  1371. N0.getOperand(0).getValueType() == MVT::i1 &&
  1372. !TLI.isOperationLegal(ISD::SIGN_EXTEND, MVT::i1)) {
  1373. SDLoc DL(N);
  1374. SDValue ZExt = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, N0.getOperand(0));
  1375. return DAG.getNode(ISD::SUB, DL, VT, N1, ZExt);
  1376. }
  1377. return SDValue();
  1378. }
  1379. SDValue DAGCombiner::visitADDC(SDNode *N) {
  1380. SDValue N0 = N->getOperand(0);
  1381. SDValue N1 = N->getOperand(1);
  1382. ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
  1383. ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
  1384. EVT VT = N0.getValueType();
  1385. // If the flag result is dead, turn this into an ADD.
  1386. if (!N->hasAnyUseOfValue(1))
  1387. return CombineTo(N, DAG.getNode(ISD::ADD, SDLoc(N), VT, N0, N1),
  1388. DAG.getNode(ISD::CARRY_FALSE,
  1389. SDLoc(N), MVT::Glue));
  1390. // canonicalize constant to RHS.
  1391. if (N0C && !N1C)
  1392. return DAG.getNode(ISD::ADDC, SDLoc(N), N->getVTList(), N1, N0);
  1393. // fold (addc x, 0) -> x + no carry out
  1394. if (N1C && N1C->isNullValue())
  1395. return CombineTo(N, N0, DAG.getNode(ISD::CARRY_FALSE,
  1396. SDLoc(N), MVT::Glue));
  1397. // fold (addc a, b) -> (or a, b), CARRY_FALSE iff a and b share no bits.
  1398. APInt LHSZero, LHSOne;
  1399. APInt RHSZero, RHSOne;
  1400. DAG.ComputeMaskedBits(N0, LHSZero, LHSOne);
  1401. if (LHSZero.getBoolValue()) {
  1402. DAG.ComputeMaskedBits(N1, RHSZero, RHSOne);
  1403. // If all possibly-set bits on the LHS are clear on the RHS, return an OR.
  1404. // If all possibly-set bits on the RHS are clear on the LHS, return an OR.
  1405. if ((RHSZero & ~LHSZero) == ~LHSZero || (LHSZero & ~RHSZero) == ~RHSZero)
  1406. return CombineTo(N, DAG.getNode(ISD::OR, SDLoc(N), VT, N0, N1),
  1407. DAG.getNode(ISD::CARRY_FALSE,
  1408. SDLoc(N), MVT::Glue));
  1409. }
  1410. return SDValue();
  1411. }
  1412. SDValue DAGCombiner::visitADDE(SDNode *N) {
  1413. SDValue N0 = N->getOperand(0);
  1414. SDValue N1 = N->getOperand(1);
  1415. SDValue CarryIn = N->getOperand(2);
  1416. ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
  1417. ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
  1418. // canonicalize constant to RHS
  1419. if (N0C && !N1C)
  1420. return DAG.getNode(ISD::ADDE, SDLoc(N), N->getVTList(),
  1421. N1, N0, CarryIn);
  1422. // fold (adde x, y, false) -> (addc x, y)
  1423. if (CarryIn.getOpcode() == ISD::CARRY_FALSE)
  1424. return DAG.getNode(ISD::ADDC, SDLoc(N), N->getVTList(), N0, N1);
  1425. return SDValue();
  1426. }
  1427. // Since it may not be valid to emit a fold to zero for vector initializers
  1428. // check if we can before folding.
  1429. static SDValue tryFoldToZero(SDLoc DL, const TargetLowering &TLI, EVT VT,
  1430. SelectionDAG &DAG,
  1431. bool LegalOperations, bool LegalTypes) {
  1432. if (!VT.isVector())
  1433. return DAG.getConstant(0, VT);
  1434. if (!LegalOperations || TLI.isOperationLegal(ISD::BUILD_VECTOR, VT))
  1435. return DAG.getConstant(0, VT);
  1436. return SDValue();
  1437. }
  1438. SDValue DAGCombiner::visitSUB(SDNode *N) {
  1439. SDValue N0 = N->getOperand(0);
  1440. SDValue N1 = N->getOperand(1);
  1441. ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0.getNode());
  1442. ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1.getNode());
  1443. ConstantSDNode *N1C1 = N1.getOpcode() != ISD::ADD ? 0 :
  1444. dyn_cast<ConstantSDNode>(N1.getOperand(1).getNode());
  1445. EVT VT = N0.getValueType();
  1446. // fold vector ops
  1447. if (VT.isVector()) {
  1448. SDValue FoldedVOp = SimplifyVBinOp(N);
  1449. if (FoldedVOp.getNode()) return FoldedVOp;
  1450. // fold (sub x, 0) -> x, vector edition
  1451. if (ISD::isBuildVectorAllZeros(N1.getNode()))
  1452. return N0;
  1453. }
  1454. // fold (sub x, x) -> 0
  1455. // FIXME: Refactor this and xor and other similar operations together.
  1456. if (N0 == N1)
  1457. return tryFoldToZero(SDLoc(N), TLI, VT, DAG, LegalOperations, LegalTypes);
  1458. // fold (sub c1, c2) -> c1-c2
  1459. if (N0C && N1C)
  1460. return DAG.FoldConstantArithmetic(ISD::SUB, VT, N0C, N1C);
  1461. // fold (sub x, c) -> (add x, -c)
  1462. if (N1C)
  1463. return DAG.getNode(ISD::ADD, SDLoc(N), VT, N0,
  1464. DAG.getConstant(-N1C->getAPIntValue(), VT));
  1465. // Canonicalize (sub -1, x) -> ~x, i.e. (xor x, -1)
  1466. if (N0C && N0C->isAllOnesValue())
  1467. return DAG.getNode(ISD::XOR, SDLoc(N), VT, N1, N0);
  1468. // fold A-(A-B) -> B
  1469. if (N1.getOpcode() == ISD::SUB && N0 == N1.getOperand(0))
  1470. return N1.getOperand(1);
  1471. // fold (A+B)-A -> B
  1472. if (N0.getOpcode() == ISD::ADD && N0.getOperand(0) == N1)
  1473. return N0.getOperand(1);
  1474. // fold (A+B)-B -> A
  1475. if (N0.getOpcode() == ISD::ADD && N0.getOperand(1) == N1)
  1476. return N0.getOperand(0);
  1477. // fold C2-(A+C1) -> (C2-C1)-A
  1478. if (N1.getOpcode() == ISD::ADD && N0C && N1C1) {
  1479. SDValue NewC = DAG.getConstant(N0C->getAPIntValue() - N1C1->getAPIntValue(),
  1480. VT);
  1481. return DAG.getNode(ISD::SUB, SDLoc(N), VT, NewC,
  1482. N1.getOperand(0));
  1483. }
  1484. // fold ((A+(B+or-C))-B) -> A+or-C
  1485. if (N0.getOpcode() == ISD::ADD &&
  1486. (N0.getOperand(1).getOpcode() == ISD::SUB ||
  1487. N0.getOperand(1).getOpcode() == ISD::ADD) &&
  1488. N0.getOperand(1).getOperand(0) == N1)
  1489. return DAG.getNode(N0.getOperand(1).getOpcode(), SDLoc(N), VT,
  1490. N0.getOperand(0), N0.getOperand(1).getOperand(1));
  1491. // fold ((A+(C+B))-B) -> A+C
  1492. if (N0.getOpcode() == ISD::ADD &&
  1493. N0.getOperand(1).getOpcode() == ISD::ADD &&
  1494. N0.getOperand(1).getOperand(1) == N1)
  1495. return DAG.getNode(ISD::ADD, SDLoc(N), VT,
  1496. N0.getOperand(0), N0.getOperand(1).getOperand(0));
  1497. // fold ((A-(B-C))-C) -> A-B
  1498. if (N0.getOpcode() == ISD::SUB &&
  1499. N0.getOperand(1).getOpcode() == ISD::SUB &&
  1500. N0.getOperand(1).getOperand(1) == N1)
  1501. return DAG.getNode(ISD::SUB, SDLoc(N), VT,
  1502. N0.getOperand(0), N0.getOperand(1).getOperand(0));
  1503. // If either operand of a sub is undef, the result is undef
  1504. if (N0.getOpcode() == ISD::UNDEF)
  1505. return N0;
  1506. if (N1.getOpcode() == ISD::UNDEF)
  1507. return N1;
  1508. // If the relocation model supports it, consider symbol offsets.
  1509. if (GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(N0))
  1510. if (!LegalOperations && TLI.isOffsetFoldingLegal(GA)) {
  1511. // fold (sub Sym, c) -> Sym-c
  1512. if (N1C && GA->getOpcode() == ISD::GlobalAddress)
  1513. return DAG.getGlobalAddress(GA->getGlobal(), SDLoc(N1C), VT,
  1514. GA->getOffset() -
  1515. (uint64_t)N1C->getSExtValue());
  1516. // fold (sub Sym+c1, Sym+c2) -> c1-c2
  1517. if (GlobalAddressSDNode *GB = dyn_cast<GlobalAddressSDNode>(N1))
  1518. if (GA->getGlobal() == GB->getGlobal())
  1519. return DAG.getConstant((uint64_t)GA->getOffset() - GB->getOffset(),
  1520. VT);
  1521. }
  1522. return SDValue();
  1523. }
  1524. SDValue DAGCombiner::visitSUBC(SDNode *N) {
  1525. SDValue N0 = N->getOperand(0);
  1526. SDValue N1 = N->getOperand(1);
  1527. ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
  1528. ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
  1529. EVT VT = N0.getValueType();
  1530. // If the flag result is dead, turn this into an SUB.
  1531. if (!N->hasAnyUseOfValue(1))
  1532. return CombineTo(N, DAG.getNode(ISD::SUB, SDLoc(N), VT, N0, N1),
  1533. DAG.getNode(ISD::CARRY_FALSE, SDLoc(N),
  1534. MVT::Glue));
  1535. // fold (subc x, x) -> 0 + no borrow
  1536. if (N0 == N1)
  1537. return CombineTo(N, DAG.getConstant(0, VT),
  1538. DAG.getNode(ISD::CARRY_FALSE, SDLoc(N),
  1539. MVT::Glue));
  1540. // fold (subc x, 0) -> x + no borrow
  1541. if (N1C && N1C->isNullValue())
  1542. return CombineTo(N, N0, DAG.getNode(ISD::CARRY_FALSE, SDLoc(N),
  1543. MVT::Glue));
  1544. // Canonicalize (sub -1, x) -> ~x, i.e. (xor x, -1) + no borrow
  1545. if (N0C && N0C->isAllOnesValue())
  1546. return CombineTo(N, DAG.getNode(ISD::XOR, SDLoc(N), VT, N1, N0),
  1547. DAG.getNode(ISD::CARRY_FALSE, SDLoc(N),
  1548. MVT::Glue));
  1549. return SDValue();
  1550. }
  1551. SDValue DAGCombiner::visitSUBE(SDNode *N) {
  1552. SDValue N0 = N->getOperand(0);
  1553. SDValue N1 = N->getOperand(1);
  1554. SDValue CarryIn = N->getOperand(2);
  1555. // fold (sube x, y, false) -> (subc x, y)
  1556. if (CarryIn.getOpcode() == ISD::CARRY_FALSE)
  1557. return DAG.getNode(ISD::SUBC, SDLoc(N), N->getVTList(), N0, N1);
  1558. return SDValue();
  1559. }
  1560. /// isConstantSplatVector - Returns true if N is a BUILD_VECTOR node whose
  1561. /// elements are all the same constant or undefined.
  1562. static bool isConstantSplatVector(SDNode *N, APInt& SplatValue) {
  1563. BuildVectorSDNode *C = dyn_cast<BuildVectorSDNode>(N);
  1564. if (!C)
  1565. return false;
  1566. APInt SplatUndef;
  1567. unsigned SplatBitSize;
  1568. bool HasAnyUndefs;
  1569. EVT EltVT = N->getValueType(0).getVectorElementType();
  1570. return (C->isConstantSplat(SplatValue, SplatUndef, SplatBitSize,
  1571. HasAnyUndefs) &&
  1572. EltVT.getSizeInBits() >= SplatBitSize);
  1573. }
  1574. SDValue DAGCombiner::visitMUL(SDNode *N) {
  1575. SDValue N0 = N->getOperand(0);
  1576. SDValue N1 = N->getOperand(1);
  1577. EVT VT = N0.getValueType();
  1578. // fold (mul x, undef) -> 0
  1579. if (N0.getOpcode() == ISD::UNDEF || N1.getOpcode() == ISD::UNDEF)
  1580. return DAG.getConstant(0, VT);
  1581. bool N0IsConst = false;
  1582. bool N1IsConst = false;
  1583. APInt ConstValue0, ConstValue1;
  1584. // fold vector ops
  1585. if (VT.isVector()) {
  1586. SDValue FoldedVOp = SimplifyVBinOp(N);
  1587. if (FoldedVOp.getNode()) return FoldedVOp;
  1588. N0IsConst = isConstantSplatVector(N0.getNode(), ConstValue0);
  1589. N1IsConst = isConstantSplatVector(N1.getNode(), ConstValue1);
  1590. } else {
  1591. N0IsConst = dyn_cast<ConstantSDNode>(N0) != 0;
  1592. ConstValue0 = N0IsConst ? (dyn_cast<ConstantSDNode>(N0))->getAPIntValue()
  1593. : APInt();
  1594. N1IsConst = dyn_cast<ConstantSDNode>(N1) != 0;
  1595. ConstValue1 = N1IsConst ? (dyn_cast<ConstantSDNode>(N1))->getAPIntValue()
  1596. : APInt();
  1597. }
  1598. // fold (mul c1, c2) -> c1*c2
  1599. if (N0IsConst && N1IsConst)
  1600. return DAG.FoldConstantArithmetic(ISD::MUL, VT, N0.getNode(), N1.getNode());
  1601. // canonicalize constant to RHS
  1602. if (N0IsConst && !N1IsConst)
  1603. return DAG.getNode(ISD::MUL, SDLoc(N), VT, N1, N0);
  1604. // fold (mul x, 0) -> 0
  1605. if (N1IsConst && ConstValue1 == 0)
  1606. return N1;
  1607. // We require a splat of the entire scalar bit width for non-contiguous
  1608. // bit patterns.
  1609. bool IsFullSplat =
  1610. ConstValue1.getBitWidth() == VT.getScalarType().getSizeInBits();
  1611. // fold (mul x, 1) -> x
  1612. if (N1IsConst && ConstValue1 == 1 && IsFullSplat)
  1613. return N0;
  1614. // fold (mul x, -1) -> 0-x
  1615. if (N1IsConst && ConstValue1.isAllOnesValue())
  1616. return DAG.getNode(ISD::SUB, SDLoc(N), VT,
  1617. DAG.getConstant(0, VT), N0);
  1618. // fold (mul x, (1 << c)) -> x << c
  1619. if (N1IsConst && ConstValue1.isPowerOf2() && IsFullSplat)
  1620. return DAG.getNode(ISD::SHL, SDLoc(N), VT, N0,
  1621. DAG.getConstant(ConstValue1.logBase2(),
  1622. getShiftAmountTy(N0.getValueType())));
  1623. // fold (mul x, -(1 << c)) -> -(x << c) or (-x) << c
  1624. if (N1IsConst && (-ConstValue1).isPowerOf2() && IsFullSplat) {
  1625. unsigned Log2Val = (-ConstValue1).logBase2();
  1626. // FIXME: If the input is something that is easily negated (e.g. a
  1627. // single-use add), we should put the negate there.
  1628. return DAG.getNode(ISD::SUB, SDLoc(N), VT,
  1629. DAG.getConstant(0, VT),
  1630. DAG.getNode(ISD::SHL, SDLoc(N), VT, N0,
  1631. DAG.getConstant(Log2Val,
  1632. getShiftAmountTy(N0.getValueType()))));
  1633. }
  1634. APInt Val;
  1635. // (mul (shl X, c1), c2) -> (mul X, c2 << c1)
  1636. if (N1IsConst && N0.getOpcode() == ISD::SHL &&
  1637. (isConstantSplatVector(N0.getOperand(1).getNode(), Val) ||
  1638. isa<ConstantSDNode>(N0.getOperand(1)))) {
  1639. SDValue C3 = DAG.getNode(ISD::SHL, SDLoc(N), VT,
  1640. N1, N0.getOperand(1));
  1641. AddToWorkList(C3.getNode());
  1642. return DAG.getNode(ISD::MUL, SDLoc(N), VT,
  1643. N0.getOperand(0), C3);
  1644. }
  1645. // Change (mul (shl X, C), Y) -> (shl (mul X, Y), C) when the shift has one
  1646. // use.
  1647. {
  1648. SDValue Sh(0,0), Y(0,0);
  1649. // Check for both (mul (shl X, C), Y) and (mul Y, (shl X, C)).
  1650. if (N0.getOpcode() == ISD::SHL &&
  1651. (isConstantSplatVector(N0.getOperand(1).getNode(), Val) ||
  1652. isa<ConstantSDNode>(N0.getOperand(1))) &&
  1653. N0.getNode()->hasOneUse()) {
  1654. Sh = N0; Y = N1;
  1655. } else if (N1.getOpcode() == ISD::SHL &&
  1656. isa<ConstantSDNode>(N1.getOperand(1)) &&
  1657. N1.getNode()->hasOneUse()) {
  1658. Sh = N1; Y = N0;
  1659. }
  1660. if (Sh.getNode()) {
  1661. SDValue Mul = DAG.getNode(ISD::MUL, SDLoc(N), VT,
  1662. Sh.getOperand(0), Y);
  1663. return DAG.getNode(ISD::SHL, SDLoc(N), VT,
  1664. Mul, Sh.getOperand(1));
  1665. }
  1666. }
  1667. // fold (mul (add x, c1), c2) -> (add (mul x, c2), c1*c2)
  1668. if (N1IsConst && N0.getOpcode() == ISD::ADD && N0.getNode()->hasOneUse() &&
  1669. (isConstantSplatVector(N0.getOperand(1).getNode(), Val) ||
  1670. isa<ConstantSDNode>(N0.getOperand(1))))
  1671. return DAG.getNode(ISD::ADD, SDLoc(N), VT,
  1672. DAG.getNode(ISD::MUL, SDLoc(N0), VT,
  1673. N0.getOperand(0), N1),
  1674. DAG.getNode(ISD::MUL, SDLoc(N1), VT,
  1675. N0.getOperand(1), N1));
  1676. // reassociate mul
  1677. SDValue RMUL = ReassociateOps(ISD::MUL, SDLoc(N), N0, N1);
  1678. if (RMUL.getNode() != 0)
  1679. return RMUL;
  1680. return SDValue();
  1681. }
  1682. SDValue DAGCombiner::visitSDIV(SDNode *N) {
  1683. SDValue N0 = N->getOperand(0);
  1684. SDValue N1 = N->getOperand(1);
  1685. ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0.getNode());
  1686. ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1.getNode());
  1687. EVT VT = N->getValueType(0);
  1688. // fold vector ops
  1689. if (VT.isVector()) {
  1690. SDValue FoldedVOp = SimplifyVBinOp(N);
  1691. if (FoldedVOp.getNode()) return FoldedVOp;
  1692. }
  1693. // fold (sdiv c1, c2) -> c1/c2
  1694. if (N0C && N1C && !N1C->isNullValue())
  1695. return DAG.FoldConstantArithmetic(ISD::SDIV, VT, N0C, N1C);
  1696. // fold (sdiv X, 1) -> X
  1697. if (N1C && N1C->getAPIntValue() == 1LL)
  1698. return N0;
  1699. // fold (sdiv X, -1) -> 0-X
  1700. if (N1C && N1C->isAllOnesValue())
  1701. return DAG.getNode(ISD::SUB, SDLoc(N), VT,
  1702. DAG.getConstant(0, VT), N0);
  1703. // If we know the sign bits of both operands are zero, strength reduce to a
  1704. // udiv instead. Handles (X&15) /s 4 -> X&15 >> 2
  1705. if (!VT.isVector()) {
  1706. if (DAG.SignBitIsZero(N1) && DAG.SignBitIsZero(N0))
  1707. return DAG.getNode(ISD::UDIV, SDLoc(N), N1.getValueType(),
  1708. N0, N1);
  1709. }
  1710. // fold (sdiv X, pow2) -> simple ops after legalize
  1711. if (N1C && !N1C->isNullValue() &&
  1712. (N1C->getAPIntValue().isPowerOf2() ||
  1713. (-N1C->getAPIntValue()).isPowerOf2())) {
  1714. // If dividing by powers of two is cheap, then don't perform the following
  1715. // fold.
  1716. if (TLI.isPow2DivCheap())
  1717. return SDValue();
  1718. unsigned lg2 = N1C->getAPIntValue().countTrailingZeros();
  1719. // Splat the sign bit into the register
  1720. SDValue SGN = DAG.getNode(ISD::SRA, SDLoc(N), VT, N0,
  1721. DAG.getConstant(VT.getSizeInBits()-1,
  1722. getShiftAmountTy(N0.getValueType())));
  1723. AddToWorkList(SGN.getNode());
  1724. // Add (N0 < 0) ? abs2 - 1 : 0;
  1725. SDValue SRL = DAG.getNode(ISD::SRL, SDLoc(N), VT, SGN,
  1726. DAG.getConstant(VT.getSizeInBits() - lg2,
  1727. getShiftAmountTy(SGN.getValueType())));
  1728. SDValue ADD = DAG.getNode(ISD::ADD, SDLoc(N), VT, N0, SRL);
  1729. AddToWorkList(SRL.getNode());
  1730. AddToWorkList(ADD.getNode()); // Divide by pow2
  1731. SDValue SRA = DAG.getNode(ISD::SRA, SDLoc(N), VT, ADD,
  1732. DAG.getConstant(lg2, getShiftAmountTy(ADD.getValueType())));
  1733. // If we're dividing by a positive value, we're done. Otherwise, we must
  1734. // negate the result.
  1735. if (N1C->getAPIntValue().isNonNegative())
  1736. return SRA;
  1737. AddToWorkList(SRA.getNode());
  1738. return DAG.getNode(ISD::SUB, SDLoc(N), VT,
  1739. DAG.getConstant(0, VT), SRA);
  1740. }
  1741. // if integer divide is expensive and we satisfy the requirements, emit an
  1742. // alternate sequence.
  1743. if (N1C && !N1C->isNullValue() && !TLI.isIntDivCheap()) {
  1744. SDValue Op = BuildSDIV(N);
  1745. if (Op.getNode()) return Op;
  1746. }
  1747. // undef / X -> 0
  1748. if (N0.getOpcode() == ISD::UNDEF)
  1749. return DAG.getConstant(0, VT);
  1750. // X / undef -> undef
  1751. if (N1.getOpcode() == ISD::UNDEF)
  1752. return N1;
  1753. return SDValue();
  1754. }
  1755. SDValue DAGCombiner::visitUDIV(SDNode *N) {
  1756. SDValue N0 = N->getOperand(0);
  1757. SDValue N1 = N->getOperand(1);
  1758. ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0.getNode());
  1759. ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1.getNode());
  1760. EVT VT = N->getValueType(0);
  1761. // fold vector ops
  1762. if (VT.isVector()) {
  1763. SDValue FoldedVOp = SimplifyVBinOp(N);
  1764. if (FoldedVOp.getNode()) return FoldedVOp;
  1765. }
  1766. // fold (udiv c1, c2) -> c1/c2
  1767. if (N0C && N1C && !N1C->isNullValue())
  1768. return DAG.FoldConstantArithmetic(ISD::UDIV, VT, N0C, N1C);
  1769. // fold (udiv x, (1 << c)) -> x >>u c
  1770. if (N1C && N1C->getAPIntValue().isPowerOf2())
  1771. return DAG.getNode(ISD::SRL, SDLoc(N), VT, N0,
  1772. DAG.getConstant(N1C->getAPIntValue().logBase2(),
  1773. getShiftAmountTy(N0.getValueType())));
  1774. // fold (udiv x, (shl c, y)) -> x >>u (log2(c)+y) iff c is power of 2
  1775. if (N1.getOpcode() == ISD::SHL) {
  1776. if (ConstantSDNode *SHC = dyn_cast<ConstantSDNode>(N1.getOperand(0))) {
  1777. if (SHC->getAPIntValue().isPowerOf2()) {
  1778. EVT ADDVT = N1.getOperand(1).getValueType();
  1779. SDValue Add = DAG.getNode(ISD::ADD, SDLoc(N), ADDVT,
  1780. N1.getOperand(1),
  1781. DAG.getConstant(SHC->getAPIntValue()
  1782. .logBase2(),
  1783. ADDVT));
  1784. AddToWorkList(Add.getNode());
  1785. return DAG.getNode(ISD::SRL, SDLoc(N), VT, N0, Add);
  1786. }
  1787. }
  1788. }
  1789. // fold (udiv x, c) -> alternate
  1790. if (N1C && !N1C->isNullValue() && !TLI.isIntDivCheap()) {
  1791. SDValue Op = BuildUDIV(N);
  1792. if (Op.getNode()) return Op;
  1793. }
  1794. // undef / X -> 0
  1795. if (N0.getOpcode() == ISD::UNDEF)
  1796. return DAG.getConstant(0, VT);
  1797. // X / undef -> undef
  1798. if (N1.getOpcode() == ISD::UNDEF)
  1799. return N1;
  1800. return SDValue();
  1801. }
  1802. SDValue DAGCombiner::visitSREM(SDNode *N) {
  1803. SDValue N0 = N->getOperand(0);
  1804. SDValue N1 = N->getOperand(1);
  1805. ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
  1806. ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
  1807. EVT VT = N->getValueType(0);
  1808. // fold (srem c1, c2) -> c1%c2
  1809. if (N0C && N1C && !N1C->isNullValue())
  1810. return DAG.FoldConstantArithmetic(ISD::SREM, VT, N0C, N1C);
  1811. // If we know the sign bits of both operands are zero, strength reduce to a
  1812. // urem instead. Handles (X & 0x0FFFFFFF) %s 16 -> X&15
  1813. if (!VT.isVector()) {
  1814. if (DAG.SignBitIsZero(N1) && DAG.SignBitIsZero(N0))
  1815. return DAG.getNode(ISD::UREM, SDLoc(N), VT, N0, N1);
  1816. }
  1817. // If X/C can be simplified by the division-by-constant logic, lower
  1818. // X%C to the equivalent of X-X/C*C.
  1819. if (N1C && !N1C->isNullValue()) {
  1820. SDValue Div = DAG.getNode(ISD::SDIV, SDLoc(N), VT, N0, N1);
  1821. AddToWorkList(Div.getNode());
  1822. SDValue OptimizedDiv = combine(Div.getNode());
  1823. if (OptimizedDiv.getNode() && OptimizedDiv.getNode() != Div.getNode()) {
  1824. SDValue Mul = DAG.getNode(ISD::MUL, SDLoc(N), VT,
  1825. OptimizedDiv, N1);
  1826. SDValue Sub = DAG.getNode(ISD::SUB, SDLoc(N), VT, N0, Mul);
  1827. AddToWorkList(Mul.getNode());
  1828. return Sub;
  1829. }
  1830. }
  1831. // undef % X -> 0
  1832. if (N0.getOpcode() == ISD::UNDEF)
  1833. return DAG.getConstant(0, VT);
  1834. // X % undef -> undef
  1835. if (N1.getOpcode() == ISD::UNDEF)
  1836. return N1;
  1837. return SDValue();
  1838. }
  1839. SDValue DAGCombiner::visitUREM(SDNode *N) {
  1840. SDValue N0 = N->getOperand(0);
  1841. SDValue N1 = N->getOperand(1);
  1842. ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
  1843. ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
  1844. EVT VT = N->getValueType(0);
  1845. // fold (urem c1, c2) -> c1%c2
  1846. if (N0C && N1C && !N1C->isNullValue())
  1847. return DAG.FoldConstantArithmetic(ISD::UREM, VT, N0C, N1C);
  1848. // fold (urem x, pow2) -> (and x, pow2-1)
  1849. if (N1C && !N1C->isNullValue() && N1C->getAPIntValue().isPowerOf2())
  1850. return DAG.getNode(ISD::AND, SDLoc(N), VT, N0,
  1851. DAG.getConstant(N1C->getAPIntValue()-1,VT));
  1852. // fold (urem x, (shl pow2, y)) -> (and x, (add (shl pow2, y), -1))
  1853. if (N1.getOpcode() == ISD::SHL) {
  1854. if (ConstantSDNode *SHC = dyn_cast<ConstantSDNode>(N1.getOperand(0))) {
  1855. if (SHC->getAPIntValue().isPowerOf2()) {
  1856. SDValue Add =
  1857. DAG.getNode(ISD::ADD, SDLoc(N), VT, N1,
  1858. DAG.getConstant(APInt::getAllOnesValue(VT.getSizeInBits()),
  1859. VT));
  1860. AddToWorkList(Add.getNode());
  1861. return DAG.getNode(ISD::AND, SDLoc(N), VT, N0, Add);
  1862. }
  1863. }
  1864. }
  1865. // If X/C can be simplified by the division-by-constant logic, lower
  1866. // X%C to the equivalent of X-X/C*C.
  1867. if (N1C && !N1C->isNullValue()) {
  1868. SDValue Div = DAG.getNode(ISD::UDIV, SDLoc(N), VT, N0, N1);
  1869. AddToWorkList(Div.getNode());
  1870. SDValue OptimizedDiv = combine(Div.getNode());
  1871. if (OptimizedDiv.getNode() && OptimizedDiv.getNode() != Div.getNode()) {
  1872. SDValue Mul = DAG.getNode(ISD::MUL, SDLoc(N), VT,
  1873. OptimizedDiv, N1);
  1874. SDValue Sub = DAG.getNode(ISD::SUB, SDLoc(N), VT, N0, Mul);
  1875. AddToWorkList(Mul.getNode());
  1876. return Sub;
  1877. }
  1878. }
  1879. // undef % X -> 0
  1880. if (N0.getOpcode() == ISD::UNDEF)
  1881. return DAG.getConstant(0, VT);
  1882. // X % undef -> undef
  1883. if (N1.getOpcode() == ISD::UNDEF)
  1884. return N1;
  1885. return SDValue();
  1886. }
  1887. SDValue DAGCombiner::visitMULHS(SDNode *N) {
  1888. SDValue N0 = N->getOperand(0);
  1889. SDValue N1 = N->getOperand(1);
  1890. ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
  1891. EVT VT = N->getValueType(0);
  1892. SDLoc DL(N);
  1893. // fold (mulhs x, 0) -> 0
  1894. if (N1C && N1C->isNullValue())
  1895. return N1;
  1896. // fold (mulhs x, 1) -> (sra x, size(x)-1)
  1897. if (N1C && N1C->getAPIntValue() == 1)
  1898. return DAG.getNode(ISD::SRA, SDLoc(N), N0.getValueType(), N0,
  1899. DAG.getConstant(N0.getValueType().getSizeInBits() - 1,
  1900. getShiftAmountTy(N0.getValueType())));
  1901. // fold (mulhs x, undef) -> 0
  1902. if (N0.getOpcode() == ISD::UNDEF || N1.getOpcode() == ISD::UNDEF)
  1903. return DAG.getConstant(0, VT);
  1904. // If the type twice as wide is legal, transform the mulhs to a wider multiply
  1905. // plus a shift.
  1906. if (VT.isSimple() && !VT.isVector()) {
  1907. MVT Simple = VT.getSimpleVT();
  1908. unsigned SimpleSize = Simple.getSizeInBits();
  1909. EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), SimpleSize*2);
  1910. if (TLI.isOperationLegal(ISD::MUL, NewVT)) {
  1911. N0 = DAG.getNode(ISD::SIGN_EXTEND, DL, NewVT, N0);
  1912. N1 = DAG.getNode(ISD::SIGN_EXTEND, DL, NewVT, N1);
  1913. N1 = DAG.getNode(ISD::MUL, DL, NewVT, N0, N1);
  1914. N1 = DAG.getNode(ISD::SRL, DL, NewVT, N1,
  1915. DAG.getConstant(SimpleSize, getShiftAmountTy(N1.getValueType())));
  1916. return DAG.getNode(ISD::TRUNCATE, DL, VT, N1);
  1917. }
  1918. }
  1919. return SDValue();
  1920. }
  1921. SDValue DAGCombiner::visitMULHU(SDNode *N) {
  1922. SDValue N0 = N->getOperand(0);
  1923. SDValue N1 = N->getOperand(1);
  1924. ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
  1925. EVT VT = N->getValueType(0);
  1926. SDLoc DL(N);
  1927. // fold (mulhu x, 0) -> 0
  1928. if (N1C && N1C->isNullValue())
  1929. return N1;
  1930. // fold (mulhu x, 1) -> 0
  1931. if (N1C && N1C->getAPIntValue() == 1)
  1932. return DAG.getConstant(0, N0.getValueType());
  1933. // fold (mulhu x, undef) -> 0
  1934. if (N0.getOpcode() == ISD::UNDEF || N1.getOpcode() == ISD::UNDEF)
  1935. return DAG.getConstant(0, VT);
  1936. // If the type twice as wide is legal, transform the mulhu to a wider multiply
  1937. // plus a shift.
  1938. if (VT.isSimple() && !VT.isVector()) {
  1939. MVT Simple = VT.getSimpleVT();
  1940. unsigned SimpleSize = Simple.getSizeInBits();
  1941. EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), SimpleSize*2);
  1942. if (TLI.isOperationLegal(ISD::MUL, NewVT)) {
  1943. N0 = DAG.getNode(ISD::ZERO_EXTEND, DL, NewVT, N0);
  1944. N1 = DAG.getNode(ISD::ZERO_EXTEND, DL, NewVT, N1);
  1945. N1 = DAG.getNode(ISD::MUL, DL, NewVT, N0, N1);
  1946. N1 = DAG.getNode(ISD::SRL, DL, NewVT, N1,
  1947. DAG.getConstant(SimpleSize, getShiftAmountTy(N1.getValueType())));
  1948. return DAG.getNode(ISD::TRUNCATE, DL, VT, N1);
  1949. }
  1950. }
  1951. return SDValue();
  1952. }
  1953. /// SimplifyNodeWithTwoResults - Perform optimizations common to nodes that
  1954. /// compute two values. LoOp and HiOp give the opcodes for the two computations
  1955. /// that are being performed. Return true if a simplification was made.
  1956. ///
  1957. SDValue DAGCombiner::SimplifyNodeWithTwoResults(SDNode *N, unsigned LoOp,
  1958. unsigned HiOp) {
  1959. // If the high half is not needed, just compute the low half.
  1960. bool HiExists = N->hasAnyUseOfValue(1);
  1961. if (!HiExists &&
  1962. (!LegalOperations ||
  1963. TLI.isOperationLegal(LoOp, N->getValueType(0)))) {
  1964. SDValue Res = DAG.getNode(LoOp, SDLoc(N), N->getValueType(0),
  1965. N->op_begin(), N->getNumOperands());
  1966. return CombineTo(N, Res, Res);
  1967. }
  1968. // If the low half is not needed, just compute the high half.
  1969. bool LoExists = N->hasAnyUseOfValue(0);
  1970. if (!LoExists &&
  1971. (!LegalOperations ||
  1972. TLI.isOperationLegal(HiOp, N->getValueType(1)))) {
  1973. SDValue Res = DAG.getNode(HiOp, SDLoc(N), N->getValueType(1),
  1974. N->op_begin(), N->getNumOperands());
  1975. return CombineTo(N, Res, Res);
  1976. }
  1977. // If both halves are used, return as it is.
  1978. if (LoExists && HiExists)
  1979. return SDValue();
  1980. // If the two computed results can be simplified separately, separate them.
  1981. if (LoExists) {
  1982. SDValue Lo = DAG.getNode(LoOp, SDLoc(N), N->getValueType(0),
  1983. N->op_begin(), N->getNumOperands());
  1984. AddToWorkList(Lo.getNode());
  1985. SDValue LoOpt = combine(Lo.getNode());
  1986. if (LoOpt.getNode() && LoOpt.getNode() != Lo.getNode() &&
  1987. (!LegalOperations ||
  1988. TLI.isOperationLegal(LoOpt.getOpcode(), LoOpt.getValueType())))
  1989. return CombineTo(N, LoOpt, LoOpt);
  1990. }
  1991. if (HiExists) {
  1992. SDValue Hi = DAG.getNode(HiOp, SDLoc(N), N->getValueType(1),
  1993. N->op_begin(), N->getNumOperands());
  1994. AddToWorkList(Hi.getNode());
  1995. SDValue HiOpt = combine(Hi.getNode());
  1996. if (HiOpt.getNode() && HiOpt != Hi &&
  1997. (!LegalOperations ||
  1998. TLI.isOperationLegal(HiOpt.getOpcode(), HiOpt.getValueType())))
  1999. return CombineTo(N, HiOpt, HiOpt);
  2000. }
  2001. return SDValue();
  2002. }
  2003. SDValue DAGCombiner::visitSMUL_LOHI(SDNode *N) {
  2004. SDValue Res = SimplifyNodeWithTwoResults(N, ISD::MUL, ISD::MULHS);
  2005. if (Res.getNode()) return Res;
  2006. EVT VT = N->getValueType(0);
  2007. SDLoc DL(N);
  2008. // If the type twice as wide is legal, transform the mulhu to a wider multiply
  2009. // plus a shift.
  2010. if (VT.isSimple() && !VT.isVector()) {
  2011. MVT Simple = VT.getSimpleVT();
  2012. unsigned SimpleSize = Simple.getSizeInBits();
  2013. EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), SimpleSize*2);
  2014. if (TLI.isOperationLegal(ISD::MUL, NewVT)) {
  2015. SDValue Lo = DAG.getNode(ISD::SIGN_EXTEND, DL, NewVT, N->getOperand(0));
  2016. SDValue Hi = DAG.getNode(ISD::SIGN_EXTEND, DL, NewVT, N->getOperand(1));
  2017. Lo = DAG.getNode(ISD::MUL, DL, NewVT, Lo, Hi);
  2018. // Compute the high part as N1.
  2019. Hi = DAG.getNode(ISD::SRL, DL, NewVT, Lo,
  2020. DAG.getConstant(SimpleSize, getShiftAmountTy(Lo.getValueType())));
  2021. Hi = DAG.getNode(ISD::TRUNCATE, DL, VT, Hi);
  2022. // Compute the low part as N0.
  2023. Lo = DAG.getNode(ISD::TRUNCATE, DL, VT, Lo);
  2024. return CombineTo(N, Lo, Hi);
  2025. }
  2026. }
  2027. return SDValue();
  2028. }
  2029. SDValue DAGCombiner::visitUMUL_LOHI(SDNode *N) {
  2030. SDValue Res = SimplifyNodeWithTwoResults(N, ISD::MUL, ISD::MULHU);
  2031. if (Res.getNode()) return Res;
  2032. EVT VT = N->getValueType(0);
  2033. SDLoc DL(N);
  2034. // If the type twice as wide is legal, transform the mulhu to a wider multiply
  2035. // plus a shift.
  2036. if (VT.isSimple() && !VT.isVector()) {
  2037. MVT Simple = VT.getSimpleVT();
  2038. unsigned SimpleSize = Simple.getSizeInBits();
  2039. EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), SimpleSize*2);
  2040. if (TLI.isOperationLegal(ISD::MUL, NewVT)) {
  2041. SDValue Lo = DAG.getNode(ISD::ZERO_EXTEND, DL, NewVT, N->getOperand(0));
  2042. SDValue Hi = DAG.getNode(ISD::ZERO_EXTEND, DL, NewVT, N->getOperand(1));
  2043. Lo = DAG.getNode(ISD::MUL, DL, NewVT, Lo, Hi);
  2044. // Compute the high part as N1.
  2045. Hi = DAG.getNode(ISD::SRL, DL, NewVT, Lo,
  2046. DAG.getConstant(SimpleSize, getShiftAmountTy(Lo.getValueType())));
  2047. Hi = DAG.getNode(ISD::TRUNCATE, DL, VT, Hi);
  2048. // Compute the low part as N0.
  2049. Lo = DAG.getNode(ISD::TRUNCATE, DL, VT, Lo);
  2050. return CombineTo(N, Lo, Hi);
  2051. }
  2052. }
  2053. return SDValue();
  2054. }
  2055. SDValue DAGCombiner::visitSMULO(SDNode *N) {
  2056. // (smulo x, 2) -> (saddo x, x)
  2057. if (ConstantSDNode *C2 = dyn_cast<ConstantSDNode>(N->getOperand(1)))
  2058. if (C2->getAPIntValue() == 2)
  2059. return DAG.getNode(ISD::SADDO, SDLoc(N), N->getVTList(),
  2060. N->getOperand(0), N->getOperand(0));
  2061. return SDValue();
  2062. }
  2063. SDValue DAGCombiner::visitUMULO(SDNode *N) {
  2064. // (umulo x, 2) -> (uaddo x, x)
  2065. if (ConstantSDNode *C2 = dyn_cast<ConstantSDNode>(N->getOperand(1)))
  2066. if (C2->getAPIntValue() == 2)
  2067. return DAG.getNode(ISD::UADDO, SDLoc(N), N->getVTList(),
  2068. N->getOperand(0), N->getOperand(0));
  2069. return SDValue();
  2070. }
  2071. SDValue DAGCombiner::visitSDIVREM(SDNode *N) {
  2072. SDValue Res = SimplifyNodeWithTwoResults(N, ISD::SDIV, ISD::SREM);
  2073. if (Res.getNode()) return Res;
  2074. return SDValue();
  2075. }
  2076. SDValue DAGCombiner::visitUDIVREM(SDNode *N) {
  2077. SDValue Res = SimplifyNodeWithTwoResults(N, ISD::UDIV, ISD::UREM);
  2078. if (Res.getNode()) return Res;
  2079. return SDValue();
  2080. }
  2081. /// SimplifyBinOpWithSameOpcodeHands - If this is a binary operator with
  2082. /// two operands of the same opcode, try to simplify it.
  2083. SDValue DAGCombiner::SimplifyBinOpWithSameOpcodeHands(SDNode *N) {
  2084. SDValue N0 = N->getOperand(0), N1 = N->getOperand(1);
  2085. EVT VT = N0.getValueType();
  2086. assert(N0.getOpcode() == N1.getOpcode() && "Bad input!");
  2087. // Bail early if none of these transforms apply.
  2088. if (N0.getNode()->getNumOperands() == 0) return SDValue();
  2089. // For each of OP in AND/OR/XOR:
  2090. // fold (OP (zext x), (zext y)) -> (zext (OP x, y))
  2091. // fold (OP (sext x), (sext y)) -> (sext (OP x, y))
  2092. // fold (OP (aext x), (aext y)) -> (aext (OP x, y))
  2093. // fold (OP (trunc x), (trunc y)) -> (trunc (OP x, y)) (if trunc isn't free)
  2094. //
  2095. // do not sink logical op inside of a vector extend, since it may combine
  2096. // into a vsetcc.
  2097. EVT Op0VT = N0.getOperand(0).getValueType();
  2098. if ((N0.getOpcode() == ISD::ZERO_EXTEND ||
  2099. N0.getOpcode() == ISD::SIGN_EXTEND ||
  2100. // Avoid infinite looping with PromoteIntBinOp.
  2101. (N0.getOpcode() == ISD::ANY_EXTEND &&
  2102. (!LegalTypes || TLI.isTypeDesirableForOp(N->getOpcode(), Op0VT))) ||
  2103. (N0.getOpcode() == ISD::TRUNCATE &&
  2104. (!TLI.isZExtFree(VT, Op0VT) ||
  2105. !TLI.isTruncateFree(Op0VT, VT)) &&
  2106. TLI.isTypeLegal(Op0VT))) &&
  2107. !VT.isVector() &&
  2108. Op0VT == N1.getOperand(0).getValueType() &&
  2109. (!LegalOperations || TLI.isOperationLegal(N->getOpcode(), Op0VT))) {
  2110. SDValue ORNode = DAG.getNode(N->getOpcode(), SDLoc(N0),
  2111. N0.getOperand(0).getValueType(),
  2112. N0.getOperand(0), N1.getOperand(0));
  2113. AddToWorkList(ORNode.getNode());
  2114. return DAG.getNode(N0.getOpcode(), SDLoc(N), VT, ORNode);
  2115. }
  2116. // For each of OP in SHL/SRL/SRA/AND...
  2117. // fold (and (OP x, z), (OP y, z)) -> (OP (and x, y), z)
  2118. // fold (or (OP x, z), (OP y, z)) -> (OP (or x, y), z)
  2119. // fold (xor (OP x, z), (OP y, z)) -> (OP (xor x, y), z)
  2120. if ((N0.getOpcode() == ISD::SHL || N0.getOpcode() == ISD::SRL ||
  2121. N0.getOpcode() == ISD::SRA || N0.getOpcode() == ISD::AND) &&
  2122. N0.getOperand(1) == N1.getOperand(1)) {
  2123. SDValue ORNode = DAG.getNode(N->getOpcode(), SDLoc(N0),
  2124. N0.getOperand(0).getValueType(),
  2125. N0.getOperand(0), N1.getOperand(0));
  2126. AddToWorkList(ORNode.getNode());
  2127. return DAG.getNode(N0.getOpcode(), SDLoc(N), VT,
  2128. ORNode, N0.getOperand(1));
  2129. }
  2130. // Simplify xor/and/or (bitcast(A), bitcast(B)) -> bitcast(op (A,B))
  2131. // Only perform this optimization after type legalization and before
  2132. // LegalizeVectorOprs. LegalizeVectorOprs promotes vector operations by
  2133. // adding bitcasts. For example (xor v4i32) is promoted to (v2i64), and
  2134. // we don't want to undo this promotion.
  2135. // We also handle SCALAR_TO_VECTOR because xor/or/and operations are cheaper
  2136. // on scalars.
  2137. if ((N0.getOpcode() == ISD::BITCAST ||
  2138. N0.getOpcode() == ISD::SCALAR_TO_VECTOR) &&
  2139. Level == AfterLegalizeTypes) {
  2140. SDValue In0 = N0.getOperand(0);
  2141. SDValue In1 = N1.getOperand(0);
  2142. EVT In0Ty = In0.getValueType();
  2143. EVT In1Ty = In1.getValueType();
  2144. SDLoc DL(N);
  2145. // If both incoming values are integers, and the original types are the
  2146. // same.
  2147. if (In0Ty.isInteger() && In1Ty.isInteger() && In0Ty == In1Ty) {
  2148. SDValue Op = DAG.getNode(N->getOpcode(), DL, In0Ty, In0, In1);
  2149. SDValue BC = DAG.getNode(N0.getOpcode(), DL, VT, Op);
  2150. AddToWorkList(Op.getNode());
  2151. return BC;
  2152. }
  2153. }
  2154. // Xor/and/or are indifferent to the swizzle operation (shuffle of one value).
  2155. // Simplify xor/and/or (shuff(A), shuff(B)) -> shuff(op (A,B))
  2156. // If both shuffles use the same mask, and both shuffle within a single
  2157. // vector, then it is worthwhile to move the swizzle after the operation.
  2158. // The type-legalizer generates this pattern when loading illegal
  2159. // vector types from memory. In many cases this allows additional shuffle
  2160. // optimizations.
  2161. if (N0.getOpcode() == ISD::VECTOR_SHUFFLE && Level < AfterLegalizeDAG &&
  2162. N0.getOperand(1).getOpcode() == ISD::UNDEF &&
  2163. N1.getOperand(1).getOpcode() == ISD::UNDEF) {
  2164. ShuffleVectorSDNode *SVN0 = cast<ShuffleVectorSDNode>(N0);
  2165. ShuffleVectorSDNode *SVN1 = cast<ShuffleVectorSDNode>(N1);
  2166. assert(N0.getOperand(0).getValueType() == N1.getOperand(1).getValueType() &&
  2167. "Inputs to shuffles are not the same type");
  2168. unsigned NumElts = VT.getVectorNumElements();
  2169. // Check that both shuffles use the same mask. The masks are known to be of
  2170. // the same length because the result vector type is the same.
  2171. bool SameMask = true;
  2172. for (unsigned i = 0; i != NumElts; ++i) {
  2173. int Idx0 = SVN0->getMaskElt(i);
  2174. int Idx1 = SVN1->getMaskElt(i);
  2175. if (Idx0 != Idx1) {
  2176. SameMask = false;
  2177. break;
  2178. }
  2179. }
  2180. if (SameMask) {
  2181. SDValue Op = DAG.getNode(N->getOpcode(), SDLoc(N), VT,
  2182. N0.getOperand(0), N1.getOperand(0));
  2183. AddToWorkList(Op.getNode());
  2184. return DAG.getVectorShuffle(VT, SDLoc(N), Op,
  2185. DAG.getUNDEF(VT), &SVN0->getMask()[0]);
  2186. }
  2187. }
  2188. return SDValue();
  2189. }
  2190. SDValue DAGCombiner::visitAND(SDNode *N) {
  2191. SDValue N0 = N->getOperand(0);
  2192. SDValue N1 = N->getOperand(1);
  2193. SDValue LL, LR, RL, RR, CC0, CC1;
  2194. ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
  2195. ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
  2196. EVT VT = N1.getValueType();
  2197. unsigned BitWidth = VT.getScalarType().getSizeInBits();
  2198. // fold vector ops
  2199. if (VT.isVector()) {
  2200. SDValue FoldedVOp = SimplifyVBinOp(N);
  2201. if (FoldedVOp.getNode()) return FoldedVOp;
  2202. // fold (and x, 0) -> 0, vector edition
  2203. if (ISD::isBuildVectorAllZeros(N0.getNode()))
  2204. return N0;
  2205. if (ISD::isBuildVectorAllZeros(N1.getNode()))
  2206. return N1;
  2207. // fold (and x, -1) -> x, vector edition
  2208. if (ISD::isBuildVectorAllOnes(N0.getNode()))
  2209. return N1;
  2210. if (ISD::isBuildVectorAllOnes(N1.getNode()))
  2211. return N0;
  2212. }
  2213. // fold (and x, undef) -> 0
  2214. if (N0.getOpcode() == ISD::UNDEF || N1.getOpcode() == ISD::UNDEF)
  2215. return DAG.getConstant(0, VT);
  2216. // fold (and c1, c2) -> c1&c2
  2217. if (N0C && N1C)
  2218. return DAG.FoldConstantArithmetic(ISD::AND, VT, N0C, N1C);
  2219. // canonicalize constant to RHS
  2220. if (N0C && !N1C)
  2221. return DAG.getNode(ISD::AND, SDLoc(N), VT, N1, N0);
  2222. // fold (and x, -1) -> x
  2223. if (N1C && N1C->isAllOnesValue())
  2224. return N0;
  2225. // if (and x, c) is known to be zero, return 0
  2226. if (N1C && DAG.MaskedValueIsZero(SDValue(N, 0),
  2227. APInt::getAllOnesValue(BitWidth)))
  2228. return DAG.getConstant(0, VT);
  2229. // reassociate and
  2230. SDValue RAND = ReassociateOps(ISD::AND, SDLoc(N), N0, N1);
  2231. if (RAND.getNode() != 0)
  2232. return RAND;
  2233. // fold (and (or x, C), D) -> D if (C & D) == D
  2234. if (N1C && N0.getOpcode() == ISD::OR)
  2235. if (ConstantSDNode *ORI = dyn_cast<ConstantSDNode>(N0.getOperand(1)))
  2236. if ((ORI->getAPIntValue() & N1C->getAPIntValue()) == N1C->getAPIntValue())
  2237. return N1;
  2238. // fold (and (any_ext V), c) -> (zero_ext V) if 'and' only clears top bits.
  2239. if (N1C && N0.getOpcode() == ISD::ANY_EXTEND) {
  2240. SDValue N0Op0 = N0.getOperand(0);
  2241. APInt Mask = ~N1C->getAPIntValue();
  2242. Mask = Mask.trunc(N0Op0.getValueSizeInBits());
  2243. if (DAG.MaskedValueIsZero(N0Op0, Mask)) {
  2244. SDValue Zext = DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N),
  2245. N0.getValueType(), N0Op0);
  2246. // Replace uses of the AND with uses of the Zero extend node.
  2247. CombineTo(N, Zext);
  2248. // We actually want to replace all uses of the any_extend with the
  2249. // zero_extend, to avoid duplicating things. This will later cause this
  2250. // AND to be folded.
  2251. CombineTo(N0.getNode(), Zext);
  2252. return SDValue(N, 0); // Return N so it doesn't get rechecked!
  2253. }
  2254. }
  2255. // similarly fold (and (X (load ([non_ext|any_ext|zero_ext] V))), c) ->
  2256. // (X (load ([non_ext|zero_ext] V))) if 'and' only clears top bits which must
  2257. // already be zero by virtue of the width of the base type of the load.
  2258. //
  2259. // the 'X' node here can either be nothing or an extract_vector_elt to catch
  2260. // more cases.
  2261. if ((N0.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
  2262. N0.getOperand(0).getOpcode() == ISD::LOAD) ||
  2263. N0.getOpcode() == ISD::LOAD) {
  2264. LoadSDNode *Load = cast<LoadSDNode>( (N0.getOpcode() == ISD::LOAD) ?
  2265. N0 : N0.getOperand(0) );
  2266. // Get the constant (if applicable) the zero'th operand is being ANDed with.
  2267. // This can be a pure constant or a vector splat, in which case we treat the
  2268. // vector as a scalar and use the splat value.
  2269. APInt Constant = APInt::getNullValue(1);
  2270. if (const ConstantSDNode *C = dyn_cast<ConstantSDNode>(N1)) {
  2271. Constant = C->getAPIntValue();
  2272. } else if (BuildVectorSDNode *Vector = dyn_cast<BuildVectorSDNode>(N1)) {
  2273. APInt SplatValue, SplatUndef;
  2274. unsigned SplatBitSize;
  2275. bool HasAnyUndefs;
  2276. bool IsSplat = Vector->isConstantSplat(SplatValue, SplatUndef,
  2277. SplatBitSize, HasAnyUndefs);
  2278. if (IsSplat) {
  2279. // Undef bits can contribute to a possible optimisation if set, so
  2280. // set them.
  2281. SplatValue |= SplatUndef;
  2282. // The splat value may be something like "0x00FFFFFF", which means 0 for
  2283. // the first vector value and FF for the rest, repeating. We need a mask
  2284. // that will apply equally to all members of the vector, so AND all the
  2285. // lanes of the constant together.
  2286. EVT VT = Vector->getValueType(0);
  2287. unsigned BitWidth = VT.getVectorElementType().getSizeInBits();
  2288. // If the splat value has been compressed to a bitlength lower
  2289. // than the size of the vector lane, we need to re-expand it to
  2290. // the lane size.
  2291. if (BitWidth > SplatBitSize)
  2292. for (SplatValue = SplatValue.zextOrTrunc(BitWidth);
  2293. SplatBitSize < BitWidth;
  2294. SplatBitSize = SplatBitSize * 2)
  2295. SplatValue |= SplatValue.shl(SplatBitSize);
  2296. Constant = APInt::getAllOnesValue(BitWidth);
  2297. for (unsigned i = 0, n = SplatBitSize/BitWidth; i < n; ++i)
  2298. Constant &= SplatValue.lshr(i*BitWidth).zextOrTrunc(BitWidth);
  2299. }
  2300. }
  2301. // If we want to change an EXTLOAD to a ZEXTLOAD, ensure a ZEXTLOAD is
  2302. // actually legal and isn't going to get expanded, else this is a false
  2303. // optimisation.
  2304. bool CanZextLoadProfitably = TLI.isLoadExtLegal(ISD::ZEXTLOAD,
  2305. Load->getMemoryVT());
  2306. // Resize the constant to the same size as the original memory access before
  2307. // extension. If it is still the AllOnesValue then this AND is completely
  2308. // unneeded.
  2309. Constant =
  2310. Constant.zextOrTrunc(Load->getMemoryVT().getScalarType().getSizeInBits());
  2311. bool B;
  2312. switch (Load->getExtensionType()) {
  2313. default: B = false; break;
  2314. case ISD::EXTLOAD: B = CanZextLoadProfitably; break;
  2315. case ISD::ZEXTLOAD:
  2316. case ISD::NON_EXTLOAD: B = true; break;
  2317. }
  2318. if (B && Constant.isAllOnesValue()) {
  2319. // If the load type was an EXTLOAD, convert to ZEXTLOAD in order to
  2320. // preserve semantics once we get rid of the AND.
  2321. SDValue NewLoad(Load, 0);
  2322. if (Load->getExtensionType() == ISD::EXTLOAD) {
  2323. NewLoad = DAG.getLoad(Load->getAddressingMode(), ISD::ZEXTLOAD,
  2324. Load->getValueType(0), SDLoc(Load),
  2325. Load->getChain(), Load->getBasePtr(),
  2326. Load->getOffset(), Load->getMemoryVT(),
  2327. Load->getMemOperand());
  2328. // Replace uses of the EXTLOAD with the new ZEXTLOAD.
  2329. if (Load->getNumValues() == 3) {
  2330. // PRE/POST_INC loads have 3 values.
  2331. SDValue To[] = { NewLoad.getValue(0), NewLoad.getValue(1),
  2332. NewLoad.getValue(2) };
  2333. CombineTo(Load, To, 3, true);
  2334. } else {
  2335. CombineTo(Load, NewLoad.getValue(0), NewLoad.getValue(1));
  2336. }
  2337. }
  2338. // Fold the AND away, taking care not to fold to the old load node if we
  2339. // replaced it.
  2340. CombineTo(N, (N0.getNode() == Load) ? NewLoad : N0);
  2341. return SDValue(N, 0); // Return N so it doesn't get rechecked!
  2342. }
  2343. }
  2344. // fold (and (setcc x), (setcc y)) -> (setcc (and x, y))
  2345. if (isSetCCEquivalent(N0, LL, LR, CC0) && isSetCCEquivalent(N1, RL, RR, CC1)){
  2346. ISD::CondCode Op0 = cast<CondCodeSDNode>(CC0)->get();
  2347. ISD::CondCode Op1 = cast<CondCodeSDNode>(CC1)->get();
  2348. if (LR == RR && isa<ConstantSDNode>(LR) && Op0 == Op1 &&
  2349. LL.getValueType().isInteger()) {
  2350. // fold (and (seteq X, 0), (seteq Y, 0)) -> (seteq (or X, Y), 0)
  2351. if (cast<ConstantSDNode>(LR)->isNullValue() && Op1 == ISD::SETEQ) {
  2352. SDValue ORNode = DAG.getNode(ISD::OR, SDLoc(N0),
  2353. LR.getValueType(), LL, RL);
  2354. AddToWorkList(ORNode.getNode());
  2355. return DAG.getSetCC(SDLoc(N), VT, ORNode, LR, Op1);
  2356. }
  2357. // fold (and (seteq X, -1), (seteq Y, -1)) -> (seteq (and X, Y), -1)
  2358. if (cast<ConstantSDNode>(LR)->isAllOnesValue() && Op1 == ISD::SETEQ) {
  2359. SDValue ANDNode = DAG.getNode(ISD::AND, SDLoc(N0),
  2360. LR.getValueType(), LL, RL);
  2361. AddToWorkList(ANDNode.getNode());
  2362. return DAG.getSetCC(SDLoc(N), VT, ANDNode, LR, Op1);
  2363. }
  2364. // fold (and (setgt X, -1), (setgt Y, -1)) -> (setgt (or X, Y), -1)
  2365. if (cast<ConstantSDNode>(LR)->isAllOnesValue() && Op1 == ISD::SETGT) {
  2366. SDValue ORNode = DAG.getNode(ISD::OR, SDLoc(N0),
  2367. LR.getValueType(), LL, RL);
  2368. AddToWorkList(ORNode.getNode());
  2369. return DAG.getSetCC(SDLoc(N), VT, ORNode, LR, Op1);
  2370. }
  2371. }
  2372. // Simplify (and (setne X, 0), (setne X, -1)) -> (setuge (add X, 1), 2)
  2373. if (LL == RL && isa<ConstantSDNode>(LR) && isa<ConstantSDNode>(RR) &&
  2374. Op0 == Op1 && LL.getValueType().isInteger() &&
  2375. Op0 == ISD::SETNE && ((cast<ConstantSDNode>(LR)->isNullValue() &&
  2376. cast<ConstantSDNode>(RR)->isAllOnesValue()) ||
  2377. (cast<ConstantSDNode>(LR)->isAllOnesValue() &&
  2378. cast<ConstantSDNode>(RR)->isNullValue()))) {
  2379. SDValue ADDNode = DAG.getNode(ISD::ADD, SDLoc(N0), LL.getValueType(),
  2380. LL, DAG.getConstant(1, LL.getValueType()));
  2381. AddToWorkList(ADDNode.getNode());
  2382. return DAG.getSetCC(SDLoc(N), VT, ADDNode,
  2383. DAG.getConstant(2, LL.getValueType()), ISD::SETUGE);
  2384. }
  2385. // canonicalize equivalent to ll == rl
  2386. if (LL == RR && LR == RL) {
  2387. Op1 = ISD::getSetCCSwappedOperands(Op1);
  2388. std::swap(RL, RR);
  2389. }
  2390. if (LL == RL && LR == RR) {
  2391. bool isInteger = LL.getValueType().isInteger();
  2392. ISD::CondCode Result = ISD::getSetCCAndOperation(Op0, Op1, isInteger);
  2393. if (Result != ISD::SETCC_INVALID &&
  2394. (!LegalOperations ||
  2395. (TLI.isCondCodeLegal(Result, LL.getSimpleValueType()) &&
  2396. TLI.isOperationLegal(ISD::SETCC,
  2397. getSetCCResultType(N0.getSimpleValueType())))))
  2398. return DAG.getSetCC(SDLoc(N), N0.getValueType(),
  2399. LL, LR, Result);
  2400. }
  2401. }
  2402. // Simplify: (and (op x...), (op y...)) -> (op (and x, y))
  2403. if (N0.getOpcode() == N1.getOpcode()) {
  2404. SDValue Tmp = SimplifyBinOpWithSameOpcodeHands(N);
  2405. if (Tmp.getNode()) return Tmp;
  2406. }
  2407. // fold (and (sign_extend_inreg x, i16 to i32), 1) -> (and x, 1)
  2408. // fold (and (sra)) -> (and (srl)) when possible.
  2409. if (!VT.isVector() &&
  2410. SimplifyDemandedBits(SDValue(N, 0)))
  2411. return SDValue(N, 0);
  2412. // fold (zext_inreg (extload x)) -> (zextload x)
  2413. if (ISD::isEXTLoad(N0.getNode()) && ISD::isUNINDEXEDLoad(N0.getNode())) {
  2414. LoadSDNode *LN0 = cast<LoadSDNode>(N0);
  2415. EVT MemVT = LN0->getMemoryVT();
  2416. // If we zero all the possible extended bits, then we can turn this into
  2417. // a zextload if we are running before legalize or the operation is legal.
  2418. unsigned BitWidth = N1.getValueType().getScalarType().getSizeInBits();
  2419. if (DAG.MaskedValueIsZero(N1, APInt::getHighBitsSet(BitWidth,
  2420. BitWidth - MemVT.getScalarType().getSizeInBits())) &&
  2421. ((!LegalOperations && !LN0->isVolatile()) ||
  2422. TLI.isLoadExtLegal(ISD::ZEXTLOAD, MemVT))) {
  2423. SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(N0), VT,
  2424. LN0->getChain(), LN0->getBasePtr(),
  2425. MemVT, LN0->getMemOperand());
  2426. AddToWorkList(N);
  2427. CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1));
  2428. return SDValue(N, 0); // Return N so it doesn't get rechecked!
  2429. }
  2430. }
  2431. // fold (zext_inreg (sextload x)) -> (zextload x) iff load has one use
  2432. if (ISD::isSEXTLoad(N0.getNode()) && ISD::isUNINDEXEDLoad(N0.getNode()) &&
  2433. N0.hasOneUse()) {
  2434. LoadSDNode *LN0 = cast<LoadSDNode>(N0);
  2435. EVT MemVT = LN0->getMemoryVT();
  2436. // If we zero all the possible extended bits, then we can turn this into
  2437. // a zextload if we are running before legalize or the operation is legal.
  2438. unsigned BitWidth = N1.getValueType().getScalarType().getSizeInBits();
  2439. if (DAG.MaskedValueIsZero(N1, APInt::getHighBitsSet(BitWidth,
  2440. BitWidth - MemVT.getScalarType().getSizeInBits())) &&
  2441. ((!LegalOperations && !LN0->isVolatile()) ||
  2442. TLI.isLoadExtLegal(ISD::ZEXTLOAD, MemVT))) {
  2443. SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(N0), VT,
  2444. LN0->getChain(), LN0->getBasePtr(),
  2445. MemVT, LN0->getMemOperand());
  2446. AddToWorkList(N);
  2447. CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1));
  2448. return SDValue(N, 0); // Return N so it doesn't get rechecked!
  2449. }
  2450. }
  2451. // fold (and (load x), 255) -> (zextload x, i8)
  2452. // fold (and (extload x, i16), 255) -> (zextload x, i8)
  2453. // fold (and (any_ext (extload x, i16)), 255) -> (zextload x, i8)
  2454. if (N1C && (N0.getOpcode() == ISD::LOAD ||
  2455. (N0.getOpcode() == ISD::ANY_EXTEND &&
  2456. N0.getOperand(0).getOpcode() == ISD::LOAD))) {
  2457. bool HasAnyExt = N0.getOpcode() == ISD::ANY_EXTEND;
  2458. LoadSDNode *LN0 = HasAnyExt
  2459. ? cast<LoadSDNode>(N0.getOperand(0))
  2460. : cast<LoadSDNode>(N0);
  2461. if (LN0->getExtensionType() != ISD::SEXTLOAD &&
  2462. LN0->isUnindexed() && N0.hasOneUse() && SDValue(LN0, 0).hasOneUse()) {
  2463. uint32_t ActiveBits = N1C->getAPIntValue().getActiveBits();
  2464. if (ActiveBits > 0 && APIntOps::isMask(ActiveBits, N1C->getAPIntValue())){
  2465. EVT ExtVT = EVT::getIntegerVT(*DAG.getContext(), ActiveBits);
  2466. EVT LoadedVT = LN0->getMemoryVT();
  2467. if (ExtVT == LoadedVT &&
  2468. (!LegalOperations || TLI.isLoadExtLegal(ISD::ZEXTLOAD, ExtVT))) {
  2469. EVT LoadResultTy = HasAnyExt ? LN0->getValueType(0) : VT;
  2470. SDValue NewLoad =
  2471. DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(LN0), LoadResultTy,
  2472. LN0->getChain(), LN0->getBasePtr(), ExtVT,
  2473. LN0->getMemOperand());
  2474. AddToWorkList(N);
  2475. CombineTo(LN0, NewLoad, NewLoad.getValue(1));
  2476. return SDValue(N, 0); // Return N so it doesn't get rechecked!
  2477. }
  2478. // Do not change the width of a volatile load.
  2479. // Do not generate loads of non-round integer types since these can
  2480. // be expensive (and would be wrong if the type is not byte sized).
  2481. if (!LN0->isVolatile() && LoadedVT.bitsGT(ExtVT) && ExtVT.isRound() &&
  2482. (!LegalOperations || TLI.isLoadExtLegal(ISD::ZEXTLOAD, ExtVT))) {
  2483. EVT PtrType = LN0->getOperand(1).getValueType();
  2484. unsigned Alignment = LN0->getAlignment();
  2485. SDValue NewPtr = LN0->getBasePtr();
  2486. // For big endian targets, we need to add an offset to the pointer
  2487. // to load the correct bytes. For little endian systems, we merely
  2488. // need to read fewer bytes from the same pointer.
  2489. if (TLI.isBigEndian()) {
  2490. unsigned LVTStoreBytes = LoadedVT.getStoreSize();
  2491. unsigned EVTStoreBytes = ExtVT.getStoreSize();
  2492. unsigned PtrOff = LVTStoreBytes - EVTStoreBytes;
  2493. NewPtr = DAG.getNode(ISD::ADD, SDLoc(LN0), PtrType,
  2494. NewPtr, DAG.getConstant(PtrOff, PtrType));
  2495. Alignment = MinAlign(Alignment, PtrOff);
  2496. }
  2497. AddToWorkList(NewPtr.getNode());
  2498. EVT LoadResultTy = HasAnyExt ? LN0->getValueType(0) : VT;
  2499. SDValue Load =
  2500. DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(LN0), LoadResultTy,
  2501. LN0->getChain(), NewPtr,
  2502. LN0->getPointerInfo(),
  2503. ExtVT, LN0->isVolatile(), LN0->isNonTemporal(),
  2504. Alignment, LN0->getTBAAInfo());
  2505. AddToWorkList(N);
  2506. CombineTo(LN0, Load, Load.getValue(1));
  2507. return SDValue(N, 0); // Return N so it doesn't get rechecked!
  2508. }
  2509. }
  2510. }
  2511. }
  2512. if (N0.getOpcode() == ISD::ADD && N1.getOpcode() == ISD::SRL &&
  2513. VT.getSizeInBits() <= 64) {
  2514. if (ConstantSDNode *ADDI = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
  2515. APInt ADDC = ADDI->getAPIntValue();
  2516. if (!TLI.isLegalAddImmediate(ADDC.getSExtValue())) {
  2517. // Look for (and (add x, c1), (lshr y, c2)). If C1 wasn't a legal
  2518. // immediate for an add, but it is legal if its top c2 bits are set,
  2519. // transform the ADD so the immediate doesn't need to be materialized
  2520. // in a register.
  2521. if (ConstantSDNode *SRLI = dyn_cast<ConstantSDNode>(N1.getOperand(1))) {
  2522. APInt Mask = APInt::getHighBitsSet(VT.getSizeInBits(),
  2523. SRLI->getZExtValue());
  2524. if (DAG.MaskedValueIsZero(N0.getOperand(1), Mask)) {
  2525. ADDC |= Mask;
  2526. if (TLI.isLegalAddImmediate(ADDC.getSExtValue())) {
  2527. SDValue NewAdd =
  2528. DAG.getNode(ISD::ADD, SDLoc(N0), VT,
  2529. N0.getOperand(0), DAG.getConstant(ADDC, VT));
  2530. CombineTo(N0.getNode(), NewAdd);
  2531. return SDValue(N, 0); // Return N so it doesn't get rechecked!
  2532. }
  2533. }
  2534. }
  2535. }
  2536. }
  2537. }
  2538. // fold (and (or (srl N, 8), (shl N, 8)), 0xffff) -> (srl (bswap N), const)
  2539. if (N1C && N1C->getAPIntValue() == 0xffff && N0.getOpcode() == ISD::OR) {
  2540. SDValue BSwap = MatchBSwapHWordLow(N0.getNode(), N0.getOperand(0),
  2541. N0.getOperand(1), false);
  2542. if (BSwap.getNode())
  2543. return BSwap;
  2544. }
  2545. return SDValue();
  2546. }
  2547. /// MatchBSwapHWord - Match (a >> 8) | (a << 8) as (bswap a) >> 16
  2548. ///
  2549. SDValue DAGCombiner::MatchBSwapHWordLow(SDNode *N, SDValue N0, SDValue N1,
  2550. bool DemandHighBits) {
  2551. if (!LegalOperations)
  2552. return SDValue();
  2553. EVT VT = N->getValueType(0);
  2554. if (VT != MVT::i64 && VT != MVT::i32 && VT != MVT::i16)
  2555. return SDValue();
  2556. if (!TLI.isOperationLegal(ISD::BSWAP, VT))
  2557. return SDValue();
  2558. // Recognize (and (shl a, 8), 0xff), (and (srl a, 8), 0xff00)
  2559. bool LookPassAnd0 = false;
  2560. bool LookPassAnd1 = false;
  2561. if (N0.getOpcode() == ISD::AND && N0.getOperand(0).getOpcode() == ISD::SRL)
  2562. std::swap(N0, N1);
  2563. if (N1.getOpcode() == ISD::AND && N1.getOperand(0).getOpcode() == ISD::SHL)
  2564. std::swap(N0, N1);
  2565. if (N0.getOpcode() == ISD::AND) {
  2566. if (!N0.getNode()->hasOneUse())
  2567. return SDValue();
  2568. ConstantSDNode *N01C = dyn_cast<ConstantSDNode>(N0.getOperand(1));
  2569. if (!N01C || N01C->getZExtValue() != 0xFF00)
  2570. return SDValue();
  2571. N0 = N0.getOperand(0);
  2572. LookPassAnd0 = true;
  2573. }
  2574. if (N1.getOpcode() == ISD::AND) {
  2575. if (!N1.getNode()->hasOneUse())
  2576. return SDValue();
  2577. ConstantSDNode *N11C = dyn_cast<ConstantSDNode>(N1.getOperand(1));
  2578. if (!N11C || N11C->getZExtValue() != 0xFF)
  2579. return SDValue();
  2580. N1 = N1.getOperand(0);
  2581. LookPassAnd1 = true;
  2582. }
  2583. if (N0.getOpcode() == ISD::SRL && N1.getOpcode() == ISD::SHL)
  2584. std::swap(N0, N1);
  2585. if (N0.getOpcode() != ISD::SHL || N1.getOpcode() != ISD::SRL)
  2586. return SDValue();
  2587. if (!N0.getNode()->hasOneUse() ||
  2588. !N1.getNode()->hasOneUse())
  2589. return SDValue();
  2590. ConstantSDNode *N01C = dyn_cast<ConstantSDNode>(N0.getOperand(1));
  2591. ConstantSDNode *N11C = dyn_cast<ConstantSDNode>(N1.getOperand(1));
  2592. if (!N01C || !N11C)
  2593. return SDValue();
  2594. if (N01C->getZExtValue() != 8 || N11C->getZExtValue() != 8)
  2595. return SDValue();
  2596. // Look for (shl (and a, 0xff), 8), (srl (and a, 0xff00), 8)
  2597. SDValue N00 = N0->getOperand(0);
  2598. if (!LookPassAnd0 && N00.getOpcode() == ISD::AND) {
  2599. if (!N00.getNode()->hasOneUse())
  2600. return SDValue();
  2601. ConstantSDNode *N001C = dyn_cast<ConstantSDNode>(N00.getOperand(1));
  2602. if (!N001C || N001C->getZExtValue() != 0xFF)
  2603. return SDValue();
  2604. N00 = N00.getOperand(0);
  2605. LookPassAnd0 = true;
  2606. }
  2607. SDValue N10 = N1->getOperand(0);
  2608. if (!LookPassAnd1 && N10.getOpcode() == ISD::AND) {
  2609. if (!N10.getNode()->hasOneUse())
  2610. return SDValue();
  2611. ConstantSDNode *N101C = dyn_cast<ConstantSDNode>(N10.getOperand(1));
  2612. if (!N101C || N101C->getZExtValue() != 0xFF00)
  2613. return SDValue();
  2614. N10 = N10.getOperand(0);
  2615. LookPassAnd1 = true;
  2616. }
  2617. if (N00 != N10)
  2618. return SDValue();
  2619. // Make sure everything beyond the low halfword gets set to zero since the SRL
  2620. // 16 will clear the top bits.
  2621. unsigned OpSizeInBits = VT.getSizeInBits();
  2622. if (DemandHighBits && OpSizeInBits > 16) {
  2623. // If the left-shift isn't masked out then the only way this is a bswap is
  2624. // if all bits beyond the low 8 are 0. In that case the entire pattern
  2625. // reduces to a left shift anyway: leave it for other parts of the combiner.
  2626. if (!LookPassAnd0)
  2627. return SDValue();
  2628. // However, if the right shift isn't masked out then it might be because
  2629. // it's not needed. See if we can spot that too.
  2630. if (!LookPassAnd1 &&
  2631. !DAG.MaskedValueIsZero(
  2632. N10, APInt::getHighBitsSet(OpSizeInBits, OpSizeInBits - 16)))
  2633. return SDValue();
  2634. }
  2635. SDValue Res = DAG.getNode(ISD::BSWAP, SDLoc(N), VT, N00);
  2636. if (OpSizeInBits > 16)
  2637. Res = DAG.getNode(ISD::SRL, SDLoc(N), VT, Res,
  2638. DAG.getConstant(OpSizeInBits-16, getShiftAmountTy(VT)));
  2639. return Res;
  2640. }
  2641. /// isBSwapHWordElement - Return true if the specified node is an element
  2642. /// that makes up a 32-bit packed halfword byteswap. i.e.
  2643. /// ((x&0xff)<<8)|((x&0xff00)>>8)|((x&0x00ff0000)<<8)|((x&0xff000000)>>8)
  2644. static bool isBSwapHWordElement(SDValue N, SmallVectorImpl<SDNode *> &Parts) {
  2645. if (!N.getNode()->hasOneUse())
  2646. return false;
  2647. unsigned Opc = N.getOpcode();
  2648. if (Opc != ISD::AND && Opc != ISD::SHL && Opc != ISD::SRL)
  2649. return false;
  2650. ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N.getOperand(1));
  2651. if (!N1C)
  2652. return false;
  2653. unsigned Num;
  2654. switch (N1C->getZExtValue()) {
  2655. default:
  2656. return false;
  2657. case 0xFF: Num = 0; break;
  2658. case 0xFF00: Num = 1; break;
  2659. case 0xFF0000: Num = 2; break;
  2660. case 0xFF000000: Num = 3; break;
  2661. }
  2662. // Look for (x & 0xff) << 8 as well as ((x << 8) & 0xff00).
  2663. SDValue N0 = N.getOperand(0);
  2664. if (Opc == ISD::AND) {
  2665. if (Num == 0 || Num == 2) {
  2666. // (x >> 8) & 0xff
  2667. // (x >> 8) & 0xff0000
  2668. if (N0.getOpcode() != ISD::SRL)
  2669. return false;
  2670. ConstantSDNode *C = dyn_cast<ConstantSDNode>(N0.getOperand(1));
  2671. if (!C || C->getZExtValue() != 8)
  2672. return false;
  2673. } else {
  2674. // (x << 8) & 0xff00
  2675. // (x << 8) & 0xff000000
  2676. if (N0.getOpcode() != ISD::SHL)
  2677. return false;
  2678. ConstantSDNode *C = dyn_cast<ConstantSDNode>(N0.getOperand(1));
  2679. if (!C || C->getZExtValue() != 8)
  2680. return false;
  2681. }
  2682. } else if (Opc == ISD::SHL) {
  2683. // (x & 0xff) << 8
  2684. // (x & 0xff0000) << 8
  2685. if (Num != 0 && Num != 2)
  2686. return false;
  2687. ConstantSDNode *C = dyn_cast<ConstantSDNode>(N.getOperand(1));
  2688. if (!C || C->getZExtValue() != 8)
  2689. return false;
  2690. } else { // Opc == ISD::SRL
  2691. // (x & 0xff00) >> 8
  2692. // (x & 0xff000000) >> 8
  2693. if (Num != 1 && Num != 3)
  2694. return false;
  2695. ConstantSDNode *C = dyn_cast<ConstantSDNode>(N.getOperand(1));
  2696. if (!C || C->getZExtValue() != 8)
  2697. return false;
  2698. }
  2699. if (Parts[Num])
  2700. return false;
  2701. Parts[Num] = N0.getOperand(0).getNode();
  2702. return true;
  2703. }
  2704. /// MatchBSwapHWord - Match a 32-bit packed halfword bswap. That is
  2705. /// ((x&0xff)<<8)|((x&0xff00)>>8)|((x&0x00ff0000)<<8)|((x&0xff000000)>>8)
  2706. /// => (rotl (bswap x), 16)
  2707. SDValue DAGCombiner::MatchBSwapHWord(SDNode *N, SDValue N0, SDValue N1) {
  2708. if (!LegalOperations)
  2709. return SDValue();
  2710. EVT VT = N->getValueType(0);
  2711. if (VT != MVT::i32)
  2712. return SDValue();
  2713. if (!TLI.isOperationLegal(ISD::BSWAP, VT))
  2714. return SDValue();
  2715. SmallVector<SDNode*,4> Parts(4, (SDNode*)0);
  2716. // Look for either
  2717. // (or (or (and), (and)), (or (and), (and)))
  2718. // (or (or (or (and), (and)), (and)), (and))
  2719. if (N0.getOpcode() != ISD::OR)
  2720. return SDValue();
  2721. SDValue N00 = N0.getOperand(0);
  2722. SDValue N01 = N0.getOperand(1);
  2723. if (N1.getOpcode() == ISD::OR &&
  2724. N00.getNumOperands() == 2 && N01.getNumOperands() == 2) {
  2725. // (or (or (and), (and)), (or (and), (and)))
  2726. SDValue N000 = N00.getOperand(0);
  2727. if (!isBSwapHWordElement(N000, Parts))
  2728. return SDValue();
  2729. SDValue N001 = N00.getOperand(1);
  2730. if (!isBSwapHWordElement(N001, Parts))
  2731. return SDValue();
  2732. SDValue N010 = N01.getOperand(0);
  2733. if (!isBSwapHWordElement(N010, Parts))
  2734. return SDValue();
  2735. SDValue N011 = N01.getOperand(1);
  2736. if (!isBSwapHWordElement(N011, Parts))
  2737. return SDValue();
  2738. } else {
  2739. // (or (or (or (and), (and)), (and)), (and))
  2740. if (!isBSwapHWordElement(N1, Parts))
  2741. return SDValue();
  2742. if (!isBSwapHWordElement(N01, Parts))
  2743. return SDValue();
  2744. if (N00.getOpcode() != ISD::OR)
  2745. return SDValue();
  2746. SDValue N000 = N00.getOperand(0);
  2747. if (!isBSwapHWordElement(N000, Parts))
  2748. return SDValue();
  2749. SDValue N001 = N00.getOperand(1);
  2750. if (!isBSwapHWordElement(N001, Parts))
  2751. return SDValue();
  2752. }
  2753. // Make sure the parts are all coming from the same node.
  2754. if (Parts[0] != Parts[1] || Parts[0] != Parts[2] || Parts[0] != Parts[3])
  2755. return SDValue();
  2756. SDValue BSwap = DAG.getNode(ISD::BSWAP, SDLoc(N), VT,
  2757. SDValue(Parts[0],0));
  2758. // Result of the bswap should be rotated by 16. If it's not legal, then
  2759. // do (x << 16) | (x >> 16).
  2760. SDValue ShAmt = DAG.getConstant(16, getShiftAmountTy(VT));
  2761. if (TLI.isOperationLegalOrCustom(ISD::ROTL, VT))
  2762. return DAG.getNode(ISD::ROTL, SDLoc(N), VT, BSwap, ShAmt);
  2763. if (TLI.isOperationLegalOrCustom(ISD::ROTR, VT))
  2764. return DAG.getNode(ISD::ROTR, SDLoc(N), VT, BSwap, ShAmt);
  2765. return DAG.getNode(ISD::OR, SDLoc(N), VT,
  2766. DAG.getNode(ISD::SHL, SDLoc(N), VT, BSwap, ShAmt),
  2767. DAG.getNode(ISD::SRL, SDLoc(N), VT, BSwap, ShAmt));
  2768. }
  2769. SDValue DAGCombiner::visitOR(SDNode *N) {
  2770. SDValue N0 = N->getOperand(0);
  2771. SDValue N1 = N->getOperand(1);
  2772. SDValue LL, LR, RL, RR, CC0, CC1;
  2773. ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
  2774. ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
  2775. EVT VT = N1.getValueType();
  2776. // fold vector ops
  2777. if (VT.isVector()) {
  2778. SDValue FoldedVOp = SimplifyVBinOp(N);
  2779. if (FoldedVOp.getNode()) return FoldedVOp;
  2780. // fold (or x, 0) -> x, vector edition
  2781. if (ISD::isBuildVectorAllZeros(N0.getNode()))
  2782. return N1;
  2783. if (ISD::isBuildVectorAllZeros(N1.getNode()))
  2784. return N0;
  2785. // fold (or x, -1) -> -1, vector edition
  2786. if (ISD::isBuildVectorAllOnes(N0.getNode()))
  2787. return N0;
  2788. if (ISD::isBuildVectorAllOnes(N1.getNode()))
  2789. return N1;
  2790. }
  2791. // fold (or x, undef) -> -1
  2792. if (!LegalOperations &&
  2793. (N0.getOpcode() == ISD::UNDEF || N1.getOpcode() == ISD::UNDEF)) {
  2794. EVT EltVT = VT.isVector() ? VT.getVectorElementType() : VT;
  2795. return DAG.getConstant(APInt::getAllOnesValue(EltVT.getSizeInBits()), VT);
  2796. }
  2797. // fold (or c1, c2) -> c1|c2
  2798. if (N0C && N1C)
  2799. return DAG.FoldConstantArithmetic(ISD::OR, VT, N0C, N1C);
  2800. // canonicalize constant to RHS
  2801. if (N0C && !N1C)
  2802. return DAG.getNode(ISD::OR, SDLoc(N), VT, N1, N0);
  2803. // fold (or x, 0) -> x
  2804. if (N1C && N1C->isNullValue())
  2805. return N0;
  2806. // fold (or x, -1) -> -1
  2807. if (N1C && N1C->isAllOnesValue())
  2808. return N1;
  2809. // fold (or x, c) -> c iff (x & ~c) == 0
  2810. if (N1C && DAG.MaskedValueIsZero(N0, ~N1C->getAPIntValue()))
  2811. return N1;
  2812. // Recognize halfword bswaps as (bswap + rotl 16) or (bswap + shl 16)
  2813. SDValue BSwap = MatchBSwapHWord(N, N0, N1);
  2814. if (BSwap.getNode() != 0)
  2815. return BSwap;
  2816. BSwap = MatchBSwapHWordLow(N, N0, N1);
  2817. if (BSwap.getNode() != 0)
  2818. return BSwap;
  2819. // reassociate or
  2820. SDValue ROR = ReassociateOps(ISD::OR, SDLoc(N), N0, N1);
  2821. if (ROR.getNode() != 0)
  2822. return ROR;
  2823. // Canonicalize (or (and X, c1), c2) -> (and (or X, c2), c1|c2)
  2824. // iff (c1 & c2) == 0.
  2825. if (N1C && N0.getOpcode() == ISD::AND && N0.getNode()->hasOneUse() &&
  2826. isa<ConstantSDNode>(N0.getOperand(1))) {
  2827. ConstantSDNode *C1 = cast<ConstantSDNode>(N0.getOperand(1));
  2828. if ((C1->getAPIntValue() & N1C->getAPIntValue()) != 0)
  2829. return DAG.getNode(ISD::AND, SDLoc(N), VT,
  2830. DAG.getNode(ISD::OR, SDLoc(N0), VT,
  2831. N0.getOperand(0), N1),
  2832. DAG.FoldConstantArithmetic(ISD::OR, VT, N1C, C1));
  2833. }
  2834. // fold (or (setcc x), (setcc y)) -> (setcc (or x, y))
  2835. if (isSetCCEquivalent(N0, LL, LR, CC0) && isSetCCEquivalent(N1, RL, RR, CC1)){
  2836. ISD::CondCode Op0 = cast<CondCodeSDNode>(CC0)->get();
  2837. ISD::CondCode Op1 = cast<CondCodeSDNode>(CC1)->get();
  2838. if (LR == RR && isa<ConstantSDNode>(LR) && Op0 == Op1 &&
  2839. LL.getValueType().isInteger()) {
  2840. // fold (or (setne X, 0), (setne Y, 0)) -> (setne (or X, Y), 0)
  2841. // fold (or (setlt X, 0), (setlt Y, 0)) -> (setne (or X, Y), 0)
  2842. if (cast<ConstantSDNode>(LR)->isNullValue() &&
  2843. (Op1 == ISD::SETNE || Op1 == ISD::SETLT)) {
  2844. SDValue ORNode = DAG.getNode(ISD::OR, SDLoc(LR),
  2845. LR.getValueType(), LL, RL);
  2846. AddToWorkList(ORNode.getNode());
  2847. return DAG.getSetCC(SDLoc(N), VT, ORNode, LR, Op1);
  2848. }
  2849. // fold (or (setne X, -1), (setne Y, -1)) -> (setne (and X, Y), -1)
  2850. // fold (or (setgt X, -1), (setgt Y -1)) -> (setgt (and X, Y), -1)
  2851. if (cast<ConstantSDNode>(LR)->isAllOnesValue() &&
  2852. (Op1 == ISD::SETNE || Op1 == ISD::SETGT)) {
  2853. SDValue ANDNode = DAG.getNode(ISD::AND, SDLoc(LR),
  2854. LR.getValueType(), LL, RL);
  2855. AddToWorkList(ANDNode.getNode());
  2856. return DAG.getSetCC(SDLoc(N), VT, ANDNode, LR, Op1);
  2857. }
  2858. }
  2859. // canonicalize equivalent to ll == rl
  2860. if (LL == RR && LR == RL) {
  2861. Op1 = ISD::getSetCCSwappedOperands(Op1);
  2862. std::swap(RL, RR);
  2863. }
  2864. if (LL == RL && LR == RR) {
  2865. bool isInteger = LL.getValueType().isInteger();
  2866. ISD::CondCode Result = ISD::getSetCCOrOperation(Op0, Op1, isInteger);
  2867. if (Result != ISD::SETCC_INVALID &&
  2868. (!LegalOperations ||
  2869. (TLI.isCondCodeLegal(Result, LL.getSimpleValueType()) &&
  2870. TLI.isOperationLegal(ISD::SETCC,
  2871. getSetCCResultType(N0.getValueType())))))
  2872. return DAG.getSetCC(SDLoc(N), N0.getValueType(),
  2873. LL, LR, Result);
  2874. }
  2875. }
  2876. // Simplify: (or (op x...), (op y...)) -> (op (or x, y))
  2877. if (N0.getOpcode() == N1.getOpcode()) {
  2878. SDValue Tmp = SimplifyBinOpWithSameOpcodeHands(N);
  2879. if (Tmp.getNode()) return Tmp;
  2880. }
  2881. // (or (and X, C1), (and Y, C2)) -> (and (or X, Y), C3) if possible.
  2882. if (N0.getOpcode() == ISD::AND &&
  2883. N1.getOpcode() == ISD::AND &&
  2884. N0.getOperand(1).getOpcode() == ISD::Constant &&
  2885. N1.getOperand(1).getOpcode() == ISD::Constant &&
  2886. // Don't increase # computations.
  2887. (N0.getNode()->hasOneUse() || N1.getNode()->hasOneUse())) {
  2888. // We can only do this xform if we know that bits from X that are set in C2
  2889. // but not in C1 are already zero. Likewise for Y.
  2890. const APInt &LHSMask =
  2891. cast<ConstantSDNode>(N0.getOperand(1))->getAPIntValue();
  2892. const APInt &RHSMask =
  2893. cast<ConstantSDNode>(N1.getOperand(1))->getAPIntValue();
  2894. if (DAG.MaskedValueIsZero(N0.getOperand(0), RHSMask&~LHSMask) &&
  2895. DAG.MaskedValueIsZero(N1.getOperand(0), LHSMask&~RHSMask)) {
  2896. SDValue X = DAG.getNode(ISD::OR, SDLoc(N0), VT,
  2897. N0.getOperand(0), N1.getOperand(0));
  2898. return DAG.getNode(ISD::AND, SDLoc(N), VT, X,
  2899. DAG.getConstant(LHSMask | RHSMask, VT));
  2900. }
  2901. }
  2902. // See if this is some rotate idiom.
  2903. if (SDNode *Rot = MatchRotate(N0, N1, SDLoc(N)))
  2904. return SDValue(Rot, 0);
  2905. // Simplify the operands using demanded-bits information.
  2906. if (!VT.isVector() &&
  2907. SimplifyDemandedBits(SDValue(N, 0)))
  2908. return SDValue(N, 0);
  2909. return SDValue();
  2910. }
  2911. /// MatchRotateHalf - Match "(X shl/srl V1) & V2" where V2 may not be present.
  2912. static bool MatchRotateHalf(SDValue Op, SDValue &Shift, SDValue &Mask) {
  2913. if (Op.getOpcode() == ISD::AND) {
  2914. if (isa<ConstantSDNode>(Op.getOperand(1))) {
  2915. Mask = Op.getOperand(1);
  2916. Op = Op.getOperand(0);
  2917. } else {
  2918. return false;
  2919. }
  2920. }
  2921. if (Op.getOpcode() == ISD::SRL || Op.getOpcode() == ISD::SHL) {
  2922. Shift = Op;
  2923. return true;
  2924. }
  2925. return false;
  2926. }
  2927. // MatchRotate - Handle an 'or' of two operands. If this is one of the many
  2928. // idioms for rotate, and if the target supports rotation instructions, generate
  2929. // a rot[lr].
  2930. SDNode *DAGCombiner::MatchRotate(SDValue LHS, SDValue RHS, SDLoc DL) {
  2931. // Must be a legal type. Expanded 'n promoted things won't work with rotates.
  2932. EVT VT = LHS.getValueType();
  2933. if (!TLI.isTypeLegal(VT)) return 0;
  2934. // The target must have at least one rotate flavor.
  2935. bool HasROTL = TLI.isOperationLegalOrCustom(ISD::ROTL, VT);
  2936. bool HasROTR = TLI.isOperationLegalOrCustom(ISD::ROTR, VT);
  2937. if (!HasROTL && !HasROTR) return 0;
  2938. // Match "(X shl/srl V1) & V2" where V2 may not be present.
  2939. SDValue LHSShift; // The shift.
  2940. SDValue LHSMask; // AND value if any.
  2941. if (!MatchRotateHalf(LHS, LHSShift, LHSMask))
  2942. return 0; // Not part of a rotate.
  2943. SDValue RHSShift; // The shift.
  2944. SDValue RHSMask; // AND value if any.
  2945. if (!MatchRotateHalf(RHS, RHSShift, RHSMask))
  2946. return 0; // Not part of a rotate.
  2947. if (LHSShift.getOperand(0) != RHSShift.getOperand(0))
  2948. return 0; // Not shifting the same value.
  2949. if (LHSShift.getOpcode() == RHSShift.getOpcode())
  2950. return 0; // Shifts must disagree.
  2951. // Canonicalize shl to left side in a shl/srl pair.
  2952. if (RHSShift.getOpcode() == ISD::SHL) {
  2953. std::swap(LHS, RHS);
  2954. std::swap(LHSShift, RHSShift);
  2955. std::swap(LHSMask , RHSMask );
  2956. }
  2957. unsigned OpSizeInBits = VT.getSizeInBits();
  2958. SDValue LHSShiftArg = LHSShift.getOperand(0);
  2959. SDValue LHSShiftAmt = LHSShift.getOperand(1);
  2960. SDValue RHSShiftAmt = RHSShift.getOperand(1);
  2961. // fold (or (shl x, C1), (srl x, C2)) -> (rotl x, C1)
  2962. // fold (or (shl x, C1), (srl x, C2)) -> (rotr x, C2)
  2963. if (LHSShiftAmt.getOpcode() == ISD::Constant &&
  2964. RHSShiftAmt.getOpcode() == ISD::Constant) {
  2965. uint64_t LShVal = cast<ConstantSDNode>(LHSShiftAmt)->getZExtValue();
  2966. uint64_t RShVal = cast<ConstantSDNode>(RHSShiftAmt)->getZExtValue();
  2967. if ((LShVal + RShVal) != OpSizeInBits)
  2968. return 0;
  2969. SDValue Rot = DAG.getNode(HasROTL ? ISD::ROTL : ISD::ROTR, DL, VT,
  2970. LHSShiftArg, HasROTL ? LHSShiftAmt : RHSShiftAmt);
  2971. // If there is an AND of either shifted operand, apply it to the result.
  2972. if (LHSMask.getNode() || RHSMask.getNode()) {
  2973. APInt Mask = APInt::getAllOnesValue(OpSizeInBits);
  2974. if (LHSMask.getNode()) {
  2975. APInt RHSBits = APInt::getLowBitsSet(OpSizeInBits, LShVal);
  2976. Mask &= cast<ConstantSDNode>(LHSMask)->getAPIntValue() | RHSBits;
  2977. }
  2978. if (RHSMask.getNode()) {
  2979. APInt LHSBits = APInt::getHighBitsSet(OpSizeInBits, RShVal);
  2980. Mask &= cast<ConstantSDNode>(RHSMask)->getAPIntValue() | LHSBits;
  2981. }
  2982. Rot = DAG.getNode(ISD::AND, DL, VT, Rot, DAG.getConstant(Mask, VT));
  2983. }
  2984. return Rot.getNode();
  2985. }
  2986. // If there is a mask here, and we have a variable shift, we can't be sure
  2987. // that we're masking out the right stuff.
  2988. if (LHSMask.getNode() || RHSMask.getNode())
  2989. return 0;
  2990. // If the shift amount is sign/zext/any-extended just peel it off.
  2991. SDValue LExtOp0 = LHSShiftAmt;
  2992. SDValue RExtOp0 = RHSShiftAmt;
  2993. if ((LHSShiftAmt.getOpcode() == ISD::SIGN_EXTEND ||
  2994. LHSShiftAmt.getOpcode() == ISD::ZERO_EXTEND ||
  2995. LHSShiftAmt.getOpcode() == ISD::ANY_EXTEND ||
  2996. LHSShiftAmt.getOpcode() == ISD::TRUNCATE) &&
  2997. (RHSShiftAmt.getOpcode() == ISD::SIGN_EXTEND ||
  2998. RHSShiftAmt.getOpcode() == ISD::ZERO_EXTEND ||
  2999. RHSShiftAmt.getOpcode() == ISD::ANY_EXTEND ||
  3000. RHSShiftAmt.getOpcode() == ISD::TRUNCATE)) {
  3001. LExtOp0 = LHSShiftAmt.getOperand(0);
  3002. RExtOp0 = RHSShiftAmt.getOperand(0);
  3003. }
  3004. if (RExtOp0.getOpcode() == ISD::SUB && RExtOp0.getOperand(1) == LExtOp0) {
  3005. // fold (or (shl x, (*ext y)), (srl x, (*ext (sub 32, y)))) ->
  3006. // (rotl x, y)
  3007. // fold (or (shl x, (*ext y)), (srl x, (*ext (sub 32, y)))) ->
  3008. // (rotr x, (sub 32, y))
  3009. if (ConstantSDNode *SUBC =
  3010. dyn_cast<ConstantSDNode>(RExtOp0.getOperand(0)))
  3011. if (SUBC->getAPIntValue() == OpSizeInBits)
  3012. return DAG.getNode(HasROTL ? ISD::ROTL : ISD::ROTR, DL, VT, LHSShiftArg,
  3013. HasROTL ? LHSShiftAmt : RHSShiftAmt).getNode();
  3014. } else if (LExtOp0.getOpcode() == ISD::SUB &&
  3015. RExtOp0 == LExtOp0.getOperand(1)) {
  3016. // fold (or (shl x, (*ext (sub 32, y))), (srl x, (*ext y))) ->
  3017. // (rotr x, y)
  3018. // fold (or (shl x, (*ext (sub 32, y))), (srl x, (*ext y))) ->
  3019. // (rotl x, (sub 32, y))
  3020. if (ConstantSDNode *SUBC =
  3021. dyn_cast<ConstantSDNode>(LExtOp0.getOperand(0)))
  3022. if (SUBC->getAPIntValue() == OpSizeInBits)
  3023. return DAG.getNode(HasROTR ? ISD::ROTR : ISD::ROTL, DL, VT, LHSShiftArg,
  3024. HasROTR ? RHSShiftAmt : LHSShiftAmt).getNode();
  3025. }
  3026. return 0;
  3027. }
  3028. SDValue DAGCombiner::visitXOR(SDNode *N) {
  3029. SDValue N0 = N->getOperand(0);
  3030. SDValue N1 = N->getOperand(1);
  3031. SDValue LHS, RHS, CC;
  3032. ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
  3033. ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
  3034. EVT VT = N0.getValueType();
  3035. // fold vector ops
  3036. if (VT.isVector()) {
  3037. SDValue FoldedVOp = SimplifyVBinOp(N);
  3038. if (FoldedVOp.getNode()) return FoldedVOp;
  3039. // fold (xor x, 0) -> x, vector edition
  3040. if (ISD::isBuildVectorAllZeros(N0.getNode()))
  3041. return N1;
  3042. if (ISD::isBuildVectorAllZeros(N1.getNode()))
  3043. return N0;
  3044. }
  3045. // fold (xor undef, undef) -> 0. This is a common idiom (misuse).
  3046. if (N0.getOpcode() == ISD::UNDEF && N1.getOpcode() == ISD::UNDEF)
  3047. return DAG.getConstant(0, VT);
  3048. // fold (xor x, undef) -> undef
  3049. if (N0.getOpcode() == ISD::UNDEF)
  3050. return N0;
  3051. if (N1.getOpcode() == ISD::UNDEF)
  3052. return N1;
  3053. // fold (xor c1, c2) -> c1^c2
  3054. if (N0C && N1C)
  3055. return DAG.FoldConstantArithmetic(ISD::XOR, VT, N0C, N1C);
  3056. // canonicalize constant to RHS
  3057. if (N0C && !N1C)
  3058. return DAG.getNode(ISD::XOR, SDLoc(N), VT, N1, N0);
  3059. // fold (xor x, 0) -> x
  3060. if (N1C && N1C->isNullValue())
  3061. return N0;
  3062. // reassociate xor
  3063. SDValue RXOR = ReassociateOps(ISD::XOR, SDLoc(N), N0, N1);
  3064. if (RXOR.getNode() != 0)
  3065. return RXOR;
  3066. // fold !(x cc y) -> (x !cc y)
  3067. if (N1C && N1C->getAPIntValue() == 1 && isSetCCEquivalent(N0, LHS, RHS, CC)) {
  3068. bool isInt = LHS.getValueType().isInteger();
  3069. ISD::CondCode NotCC = ISD::getSetCCInverse(cast<CondCodeSDNode>(CC)->get(),
  3070. isInt);
  3071. if (!LegalOperations ||
  3072. TLI.isCondCodeLegal(NotCC, LHS.getSimpleValueType())) {
  3073. switch (N0.getOpcode()) {
  3074. default:
  3075. llvm_unreachable("Unhandled SetCC Equivalent!");
  3076. case ISD::SETCC:
  3077. return DAG.getSetCC(SDLoc(N), VT, LHS, RHS, NotCC);
  3078. case ISD::SELECT_CC:
  3079. return DAG.getSelectCC(SDLoc(N), LHS, RHS, N0.getOperand(2),
  3080. N0.getOperand(3), NotCC);
  3081. }
  3082. }
  3083. }
  3084. // fold (not (zext (setcc x, y))) -> (zext (not (setcc x, y)))
  3085. if (N1C && N1C->getAPIntValue() == 1 && N0.getOpcode() == ISD::ZERO_EXTEND &&
  3086. N0.getNode()->hasOneUse() &&
  3087. isSetCCEquivalent(N0.getOperand(0), LHS, RHS, CC)){
  3088. SDValue V = N0.getOperand(0);
  3089. V = DAG.getNode(ISD::XOR, SDLoc(N0), V.getValueType(), V,
  3090. DAG.getConstant(1, V.getValueType()));
  3091. AddToWorkList(V.getNode());
  3092. return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N), VT, V);
  3093. }
  3094. // fold (not (or x, y)) -> (and (not x), (not y)) iff x or y are setcc
  3095. if (N1C && N1C->getAPIntValue() == 1 && VT == MVT::i1 &&
  3096. (N0.getOpcode() == ISD::OR || N0.getOpcode() == ISD::AND)) {
  3097. SDValue LHS = N0.getOperand(0), RHS = N0.getOperand(1);
  3098. if (isOneUseSetCC(RHS) || isOneUseSetCC(LHS)) {
  3099. unsigned NewOpcode = N0.getOpcode() == ISD::AND ? ISD::OR : ISD::AND;
  3100. LHS = DAG.getNode(ISD::XOR, SDLoc(LHS), VT, LHS, N1); // LHS = ~LHS
  3101. RHS = DAG.getNode(ISD::XOR, SDLoc(RHS), VT, RHS, N1); // RHS = ~RHS
  3102. AddToWorkList(LHS.getNode()); AddToWorkList(RHS.getNode());
  3103. return DAG.getNode(NewOpcode, SDLoc(N), VT, LHS, RHS);
  3104. }
  3105. }
  3106. // fold (not (or x, y)) -> (and (not x), (not y)) iff x or y are constants
  3107. if (N1C && N1C->isAllOnesValue() &&
  3108. (N0.getOpcode() == ISD::OR || N0.getOpcode() == ISD::AND)) {
  3109. SDValue LHS = N0.getOperand(0), RHS = N0.getOperand(1);
  3110. if (isa<ConstantSDNode>(RHS) || isa<ConstantSDNode>(LHS)) {
  3111. unsigned NewOpcode = N0.getOpcode() == ISD::AND ? ISD::OR : ISD::AND;
  3112. LHS = DAG.getNode(ISD::XOR, SDLoc(LHS), VT, LHS, N1); // LHS = ~LHS
  3113. RHS = DAG.getNode(ISD::XOR, SDLoc(RHS), VT, RHS, N1); // RHS = ~RHS
  3114. AddToWorkList(LHS.getNode()); AddToWorkList(RHS.getNode());
  3115. return DAG.getNode(NewOpcode, SDLoc(N), VT, LHS, RHS);
  3116. }
  3117. }
  3118. // fold (xor (and x, y), y) -> (and (not x), y)
  3119. if (N0.getOpcode() == ISD::AND && N0.getNode()->hasOneUse() &&
  3120. N0->getOperand(1) == N1) {
  3121. SDValue X = N0->getOperand(0);
  3122. SDValue NotX = DAG.getNOT(SDLoc(X), X, VT);
  3123. AddToWorkList(NotX.getNode());
  3124. return DAG.getNode(ISD::AND, SDLoc(N), VT, NotX, N1);
  3125. }
  3126. // fold (xor (xor x, c1), c2) -> (xor x, (xor c1, c2))
  3127. if (N1C && N0.getOpcode() == ISD::XOR) {
  3128. ConstantSDNode *N00C = dyn_cast<ConstantSDNode>(N0.getOperand(0));
  3129. ConstantSDNode *N01C = dyn_cast<ConstantSDNode>(N0.getOperand(1));
  3130. if (N00C)
  3131. return DAG.getNode(ISD::XOR, SDLoc(N), VT, N0.getOperand(1),
  3132. DAG.getConstant(N1C->getAPIntValue() ^
  3133. N00C->getAPIntValue(), VT));
  3134. if (N01C)
  3135. return DAG.getNode(ISD::XOR, SDLoc(N), VT, N0.getOperand(0),
  3136. DAG.getConstant(N1C->getAPIntValue() ^
  3137. N01C->getAPIntValue(), VT));
  3138. }
  3139. // fold (xor x, x) -> 0
  3140. if (N0 == N1)
  3141. return tryFoldToZero(SDLoc(N), TLI, VT, DAG, LegalOperations, LegalTypes);
  3142. // Simplify: xor (op x...), (op y...) -> (op (xor x, y))
  3143. if (N0.getOpcode() == N1.getOpcode()) {
  3144. SDValue Tmp = SimplifyBinOpWithSameOpcodeHands(N);
  3145. if (Tmp.getNode()) return Tmp;
  3146. }
  3147. // Simplify the expression using non-local knowledge.
  3148. if (!VT.isVector() &&
  3149. SimplifyDemandedBits(SDValue(N, 0)))
  3150. return SDValue(N, 0);
  3151. return SDValue();
  3152. }
  3153. /// visitShiftByConstant - Handle transforms common to the three shifts, when
  3154. /// the shift amount is a constant.
  3155. SDValue DAGCombiner::visitShiftByConstant(SDNode *N, unsigned Amt) {
  3156. SDNode *LHS = N->getOperand(0).getNode();
  3157. if (!LHS->hasOneUse()) return SDValue();
  3158. // We want to pull some binops through shifts, so that we have (and (shift))
  3159. // instead of (shift (and)), likewise for add, or, xor, etc. This sort of
  3160. // thing happens with address calculations, so it's important to canonicalize
  3161. // it.
  3162. bool HighBitSet = false; // Can we transform this if the high bit is set?
  3163. switch (LHS->getOpcode()) {
  3164. default: return SDValue();
  3165. case ISD::OR:
  3166. case ISD::XOR:
  3167. HighBitSet = false; // We can only transform sra if the high bit is clear.
  3168. break;
  3169. case ISD::AND:
  3170. HighBitSet = true; // We can only transform sra if the high bit is set.
  3171. break;
  3172. case ISD::ADD:
  3173. if (N->getOpcode() != ISD::SHL)
  3174. return SDValue(); // only shl(add) not sr[al](add).
  3175. HighBitSet = false; // We can only transform sra if the high bit is clear.
  3176. break;
  3177. }
  3178. // We require the RHS of the binop to be a constant as well.
  3179. ConstantSDNode *BinOpCst = dyn_cast<ConstantSDNode>(LHS->getOperand(1));
  3180. if (!BinOpCst) return SDValue();
  3181. // FIXME: disable this unless the input to the binop is a shift by a constant.
  3182. // If it is not a shift, it pessimizes some common cases like:
  3183. //
  3184. // void foo(int *X, int i) { X[i & 1235] = 1; }
  3185. // int bar(int *X, int i) { return X[i & 255]; }
  3186. SDNode *BinOpLHSVal = LHS->getOperand(0).getNode();
  3187. if ((BinOpLHSVal->getOpcode() != ISD::SHL &&
  3188. BinOpLHSVal->getOpcode() != ISD::SRA &&
  3189. BinOpLHSVal->getOpcode() != ISD::SRL) ||
  3190. !isa<ConstantSDNode>(BinOpLHSVal->getOperand(1)))
  3191. return SDValue();
  3192. EVT VT = N->getValueType(0);
  3193. // If this is a signed shift right, and the high bit is modified by the
  3194. // logical operation, do not perform the transformation. The highBitSet
  3195. // boolean indicates the value of the high bit of the constant which would
  3196. // cause it to be modified for this operation.
  3197. if (N->getOpcode() == ISD::SRA) {
  3198. bool BinOpRHSSignSet = BinOpCst->getAPIntValue().isNegative();
  3199. if (BinOpRHSSignSet != HighBitSet)
  3200. return SDValue();
  3201. }
  3202. // Fold the constants, shifting the binop RHS by the shift amount.
  3203. SDValue NewRHS = DAG.getNode(N->getOpcode(), SDLoc(LHS->getOperand(1)),
  3204. N->getValueType(0),
  3205. LHS->getOperand(1), N->getOperand(1));
  3206. // Create the new shift.
  3207. SDValue NewShift = DAG.getNode(N->getOpcode(),
  3208. SDLoc(LHS->getOperand(0)),
  3209. VT, LHS->getOperand(0), N->getOperand(1));
  3210. // Create the new binop.
  3211. return DAG.getNode(LHS->getOpcode(), SDLoc(N), VT, NewShift, NewRHS);
  3212. }
  3213. SDValue DAGCombiner::visitSHL(SDNode *N) {
  3214. SDValue N0 = N->getOperand(0);
  3215. SDValue N1 = N->getOperand(1);
  3216. ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
  3217. ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
  3218. EVT VT = N0.getValueType();
  3219. unsigned OpSizeInBits = VT.getScalarType().getSizeInBits();
  3220. // fold vector ops
  3221. if (VT.isVector()) {
  3222. SDValue FoldedVOp = SimplifyVBinOp(N);
  3223. if (FoldedVOp.getNode()) return FoldedVOp;
  3224. }
  3225. // fold (shl c1, c2) -> c1<<c2
  3226. if (N0C && N1C)
  3227. return DAG.FoldConstantArithmetic(ISD::SHL, VT, N0C, N1C);
  3228. // fold (shl 0, x) -> 0
  3229. if (N0C && N0C->isNullValue())
  3230. return N0;
  3231. // fold (shl x, c >= size(x)) -> undef
  3232. if (N1C && N1C->getZExtValue() >= OpSizeInBits)
  3233. return DAG.getUNDEF(VT);
  3234. // fold (shl x, 0) -> x
  3235. if (N1C && N1C->isNullValue())
  3236. return N0;
  3237. // fold (shl undef, x) -> 0
  3238. if (N0.getOpcode() == ISD::UNDEF)
  3239. return DAG.getConstant(0, VT);
  3240. // if (shl x, c) is known to be zero, return 0
  3241. if (DAG.MaskedValueIsZero(SDValue(N, 0),
  3242. APInt::getAllOnesValue(OpSizeInBits)))
  3243. return DAG.getConstant(0, VT);
  3244. // fold (shl x, (trunc (and y, c))) -> (shl x, (and (trunc y), (trunc c))).
  3245. if (N1.getOpcode() == ISD::TRUNCATE &&
  3246. N1.getOperand(0).getOpcode() == ISD::AND &&
  3247. N1.hasOneUse() && N1.getOperand(0).hasOneUse()) {
  3248. SDValue N101 = N1.getOperand(0).getOperand(1);
  3249. if (ConstantSDNode *N101C = dyn_cast<ConstantSDNode>(N101)) {
  3250. EVT TruncVT = N1.getValueType();
  3251. SDValue N100 = N1.getOperand(0).getOperand(0);
  3252. APInt TruncC = N101C->getAPIntValue();
  3253. TruncC = TruncC.trunc(TruncVT.getSizeInBits());
  3254. return DAG.getNode(ISD::SHL, SDLoc(N), VT, N0,
  3255. DAG.getNode(ISD::AND, SDLoc(N), TruncVT,
  3256. DAG.getNode(ISD::TRUNCATE,
  3257. SDLoc(N),
  3258. TruncVT, N100),
  3259. DAG.getConstant(TruncC, TruncVT)));
  3260. }
  3261. }
  3262. if (N1C && SimplifyDemandedBits(SDValue(N, 0)))
  3263. return SDValue(N, 0);
  3264. // fold (shl (shl x, c1), c2) -> 0 or (shl x, (add c1, c2))
  3265. if (N1C && N0.getOpcode() == ISD::SHL &&
  3266. N0.getOperand(1).getOpcode() == ISD::Constant) {
  3267. uint64_t c1 = cast<ConstantSDNode>(N0.getOperand(1))->getZExtValue();
  3268. uint64_t c2 = N1C->getZExtValue();
  3269. if (c1 + c2 >= OpSizeInBits)
  3270. return DAG.getConstant(0, VT);
  3271. return DAG.getNode(ISD::SHL, SDLoc(N), VT, N0.getOperand(0),
  3272. DAG.getConstant(c1 + c2, N1.getValueType()));
  3273. }
  3274. // fold (shl (ext (shl x, c1)), c2) -> (ext (shl x, (add c1, c2)))
  3275. // For this to be valid, the second form must not preserve any of the bits
  3276. // that are shifted out by the inner shift in the first form. This means
  3277. // the outer shift size must be >= the number of bits added by the ext.
  3278. // As a corollary, we don't care what kind of ext it is.
  3279. if (N1C && (N0.getOpcode() == ISD::ZERO_EXTEND ||
  3280. N0.getOpcode() == ISD::ANY_EXTEND ||
  3281. N0.getOpcode() == ISD::SIGN_EXTEND) &&
  3282. N0.getOperand(0).getOpcode() == ISD::SHL &&
  3283. isa<ConstantSDNode>(N0.getOperand(0)->getOperand(1))) {
  3284. uint64_t c1 =
  3285. cast<ConstantSDNode>(N0.getOperand(0)->getOperand(1))->getZExtValue();
  3286. uint64_t c2 = N1C->getZExtValue();
  3287. EVT InnerShiftVT = N0.getOperand(0).getValueType();
  3288. uint64_t InnerShiftSize = InnerShiftVT.getScalarType().getSizeInBits();
  3289. if (c2 >= OpSizeInBits - InnerShiftSize) {
  3290. if (c1 + c2 >= OpSizeInBits)
  3291. return DAG.getConstant(0, VT);
  3292. return DAG.getNode(ISD::SHL, SDLoc(N0), VT,
  3293. DAG.getNode(N0.getOpcode(), SDLoc(N0), VT,
  3294. N0.getOperand(0)->getOperand(0)),
  3295. DAG.getConstant(c1 + c2, N1.getValueType()));
  3296. }
  3297. }
  3298. // fold (shl (zext (srl x, C)), C) -> (zext (shl (srl x, C), C))
  3299. // Only fold this if the inner zext has no other uses to avoid increasing
  3300. // the total number of instructions.
  3301. if (N1C && N0.getOpcode() == ISD::ZERO_EXTEND && N0.hasOneUse() &&
  3302. N0.getOperand(0).getOpcode() == ISD::SRL &&
  3303. isa<ConstantSDNode>(N0.getOperand(0)->getOperand(1))) {
  3304. uint64_t c1 =
  3305. cast<ConstantSDNode>(N0.getOperand(0)->getOperand(1))->getZExtValue();
  3306. if (c1 < VT.getSizeInBits()) {
  3307. uint64_t c2 = N1C->getZExtValue();
  3308. if (c1 == c2) {
  3309. SDValue NewOp0 = N0.getOperand(0);
  3310. EVT CountVT = NewOp0.getOperand(1).getValueType();
  3311. SDValue NewSHL = DAG.getNode(ISD::SHL, SDLoc(N), NewOp0.getValueType(),
  3312. NewOp0, DAG.getConstant(c2, CountVT));
  3313. AddToWorkList(NewSHL.getNode());
  3314. return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N0), VT, NewSHL);
  3315. }
  3316. }
  3317. }
  3318. // fold (shl (srl x, c1), c2) -> (and (shl x, (sub c2, c1), MASK) or
  3319. // (and (srl x, (sub c1, c2), MASK)
  3320. // Only fold this if the inner shift has no other uses -- if it does, folding
  3321. // this will increase the total number of instructions.
  3322. if (N1C && N0.getOpcode() == ISD::SRL && N0.hasOneUse() &&
  3323. N0.getOperand(1).getOpcode() == ISD::Constant) {
  3324. uint64_t c1 = cast<ConstantSDNode>(N0.getOperand(1))->getZExtValue();
  3325. if (c1 < VT.getSizeInBits()) {
  3326. uint64_t c2 = N1C->getZExtValue();
  3327. APInt Mask = APInt::getHighBitsSet(VT.getSizeInBits(),
  3328. VT.getSizeInBits() - c1);
  3329. SDValue Shift;
  3330. if (c2 > c1) {
  3331. Mask = Mask.shl(c2-c1);
  3332. Shift = DAG.getNode(ISD::SHL, SDLoc(N), VT, N0.getOperand(0),
  3333. DAG.getConstant(c2-c1, N1.getValueType()));
  3334. } else {
  3335. Mask = Mask.lshr(c1-c2);
  3336. Shift = DAG.getNode(ISD::SRL, SDLoc(N), VT, N0.getOperand(0),
  3337. DAG.getConstant(c1-c2, N1.getValueType()));
  3338. }
  3339. return DAG.getNode(ISD::AND, SDLoc(N0), VT, Shift,
  3340. DAG.getConstant(Mask, VT));
  3341. }
  3342. }
  3343. // fold (shl (sra x, c1), c1) -> (and x, (shl -1, c1))
  3344. if (N1C && N0.getOpcode() == ISD::SRA && N1 == N0.getOperand(1)) {
  3345. SDValue HiBitsMask =
  3346. DAG.getConstant(APInt::getHighBitsSet(VT.getSizeInBits(),
  3347. VT.getSizeInBits() -
  3348. N1C->getZExtValue()),
  3349. VT);
  3350. return DAG.getNode(ISD::AND, SDLoc(N), VT, N0.getOperand(0),
  3351. HiBitsMask);
  3352. }
  3353. if (N1C) {
  3354. SDValue NewSHL = visitShiftByConstant(N, N1C->getZExtValue());
  3355. if (NewSHL.getNode())
  3356. return NewSHL;
  3357. }
  3358. return SDValue();
  3359. }
  3360. SDValue DAGCombiner::visitSRA(SDNode *N) {
  3361. SDValue N0 = N->getOperand(0);
  3362. SDValue N1 = N->getOperand(1);
  3363. ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
  3364. ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
  3365. EVT VT = N0.getValueType();
  3366. unsigned OpSizeInBits = VT.getScalarType().getSizeInBits();
  3367. // fold vector ops
  3368. if (VT.isVector()) {
  3369. SDValue FoldedVOp = SimplifyVBinOp(N);
  3370. if (FoldedVOp.getNode()) return FoldedVOp;
  3371. }
  3372. // fold (sra c1, c2) -> (sra c1, c2)
  3373. if (N0C && N1C)
  3374. return DAG.FoldConstantArithmetic(ISD::SRA, VT, N0C, N1C);
  3375. // fold (sra 0, x) -> 0
  3376. if (N0C && N0C->isNullValue())
  3377. return N0;
  3378. // fold (sra -1, x) -> -1
  3379. if (N0C && N0C->isAllOnesValue())
  3380. return N0;
  3381. // fold (sra x, (setge c, size(x))) -> undef
  3382. if (N1C && N1C->getZExtValue() >= OpSizeInBits)
  3383. return DAG.getUNDEF(VT);
  3384. // fold (sra x, 0) -> x
  3385. if (N1C && N1C->isNullValue())
  3386. return N0;
  3387. // fold (sra (shl x, c1), c1) -> sext_inreg for some c1 and target supports
  3388. // sext_inreg.
  3389. if (N1C && N0.getOpcode() == ISD::SHL && N1 == N0.getOperand(1)) {
  3390. unsigned LowBits = OpSizeInBits - (unsigned)N1C->getZExtValue();
  3391. EVT ExtVT = EVT::getIntegerVT(*DAG.getContext(), LowBits);
  3392. if (VT.isVector())
  3393. ExtVT = EVT::getVectorVT(*DAG.getContext(),
  3394. ExtVT, VT.getVectorNumElements());
  3395. if ((!LegalOperations ||
  3396. TLI.isOperationLegal(ISD::SIGN_EXTEND_INREG, ExtVT)))
  3397. return DAG.getNode(ISD::SIGN_EXTEND_INREG, SDLoc(N), VT,
  3398. N0.getOperand(0), DAG.getValueType(ExtVT));
  3399. }
  3400. // fold (sra (sra x, c1), c2) -> (sra x, (add c1, c2))
  3401. if (N1C && N0.getOpcode() == ISD::SRA) {
  3402. if (ConstantSDNode *C1 = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
  3403. unsigned Sum = N1C->getZExtValue() + C1->getZExtValue();
  3404. if (Sum >= OpSizeInBits) Sum = OpSizeInBits-1;
  3405. return DAG.getNode(ISD::SRA, SDLoc(N), VT, N0.getOperand(0),
  3406. DAG.getConstant(Sum, N1C->getValueType(0)));
  3407. }
  3408. }
  3409. // fold (sra (shl X, m), (sub result_size, n))
  3410. // -> (sign_extend (trunc (shl X, (sub (sub result_size, n), m)))) for
  3411. // result_size - n != m.
  3412. // If truncate is free for the target sext(shl) is likely to result in better
  3413. // code.
  3414. if (N0.getOpcode() == ISD::SHL) {
  3415. // Get the two constanst of the shifts, CN0 = m, CN = n.
  3416. const ConstantSDNode *N01C = dyn_cast<ConstantSDNode>(N0.getOperand(1));
  3417. if (N01C && N1C) {
  3418. // Determine what the truncate's result bitsize and type would be.
  3419. EVT TruncVT =
  3420. EVT::getIntegerVT(*DAG.getContext(),
  3421. OpSizeInBits - N1C->getZExtValue());
  3422. // Determine the residual right-shift amount.
  3423. signed ShiftAmt = N1C->getZExtValue() - N01C->getZExtValue();
  3424. // If the shift is not a no-op (in which case this should be just a sign
  3425. // extend already), the truncated to type is legal, sign_extend is legal
  3426. // on that type, and the truncate to that type is both legal and free,
  3427. // perform the transform.
  3428. if ((ShiftAmt > 0) &&
  3429. TLI.isOperationLegalOrCustom(ISD::SIGN_EXTEND, TruncVT) &&
  3430. TLI.isOperationLegalOrCustom(ISD::TRUNCATE, VT) &&
  3431. TLI.isTruncateFree(VT, TruncVT)) {
  3432. SDValue Amt = DAG.getConstant(ShiftAmt,
  3433. getShiftAmountTy(N0.getOperand(0).getValueType()));
  3434. SDValue Shift = DAG.getNode(ISD::SRL, SDLoc(N0), VT,
  3435. N0.getOperand(0), Amt);
  3436. SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SDLoc(N0), TruncVT,
  3437. Shift);
  3438. return DAG.getNode(ISD::SIGN_EXTEND, SDLoc(N),
  3439. N->getValueType(0), Trunc);
  3440. }
  3441. }
  3442. }
  3443. // fold (sra x, (trunc (and y, c))) -> (sra x, (and (trunc y), (trunc c))).
  3444. if (N1.getOpcode() == ISD::TRUNCATE &&
  3445. N1.getOperand(0).getOpcode() == ISD::AND &&
  3446. N1.hasOneUse() && N1.getOperand(0).hasOneUse()) {
  3447. SDValue N101 = N1.getOperand(0).getOperand(1);
  3448. if (ConstantSDNode *N101C = dyn_cast<ConstantSDNode>(N101)) {
  3449. EVT TruncVT = N1.getValueType();
  3450. SDValue N100 = N1.getOperand(0).getOperand(0);
  3451. APInt TruncC = N101C->getAPIntValue();
  3452. TruncC = TruncC.trunc(TruncVT.getScalarType().getSizeInBits());
  3453. return DAG.getNode(ISD::SRA, SDLoc(N), VT, N0,
  3454. DAG.getNode(ISD::AND, SDLoc(N),
  3455. TruncVT,
  3456. DAG.getNode(ISD::TRUNCATE,
  3457. SDLoc(N),
  3458. TruncVT, N100),
  3459. DAG.getConstant(TruncC, TruncVT)));
  3460. }
  3461. }
  3462. // fold (sra (trunc (sr x, c1)), c2) -> (trunc (sra x, c1+c2))
  3463. // if c1 is equal to the number of bits the trunc removes
  3464. if (N0.getOpcode() == ISD::TRUNCATE &&
  3465. (N0.getOperand(0).getOpcode() == ISD::SRL ||
  3466. N0.getOperand(0).getOpcode() == ISD::SRA) &&
  3467. N0.getOperand(0).hasOneUse() &&
  3468. N0.getOperand(0).getOperand(1).hasOneUse() &&
  3469. N1C && isa<ConstantSDNode>(N0.getOperand(0).getOperand(1))) {
  3470. EVT LargeVT = N0.getOperand(0).getValueType();
  3471. ConstantSDNode *LargeShiftAmt =
  3472. cast<ConstantSDNode>(N0.getOperand(0).getOperand(1));
  3473. if (LargeVT.getScalarType().getSizeInBits() - OpSizeInBits ==
  3474. LargeShiftAmt->getZExtValue()) {
  3475. SDValue Amt =
  3476. DAG.getConstant(LargeShiftAmt->getZExtValue() + N1C->getZExtValue(),
  3477. getShiftAmountTy(N0.getOperand(0).getOperand(0).getValueType()));
  3478. SDValue SRA = DAG.getNode(ISD::SRA, SDLoc(N), LargeVT,
  3479. N0.getOperand(0).getOperand(0), Amt);
  3480. return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, SRA);
  3481. }
  3482. }
  3483. // Simplify, based on bits shifted out of the LHS.
  3484. if (N1C && SimplifyDemandedBits(SDValue(N, 0)))
  3485. return SDValue(N, 0);
  3486. // If the sign bit is known to be zero, switch this to a SRL.
  3487. if (DAG.SignBitIsZero(N0))
  3488. return DAG.getNode(ISD::SRL, SDLoc(N), VT, N0, N1);
  3489. if (N1C) {
  3490. SDValue NewSRA = visitShiftByConstant(N, N1C->getZExtValue());
  3491. if (NewSRA.getNode())
  3492. return NewSRA;
  3493. }
  3494. return SDValue();
  3495. }
  3496. SDValue DAGCombiner::visitSRL(SDNode *N) {
  3497. SDValue N0 = N->getOperand(0);
  3498. SDValue N1 = N->getOperand(1);
  3499. ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
  3500. ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
  3501. EVT VT = N0.getValueType();
  3502. unsigned OpSizeInBits = VT.getScalarType().getSizeInBits();
  3503. // fold vector ops
  3504. if (VT.isVector()) {
  3505. SDValue FoldedVOp = SimplifyVBinOp(N);
  3506. if (FoldedVOp.getNode()) return FoldedVOp;
  3507. }
  3508. // fold (srl c1, c2) -> c1 >>u c2
  3509. if (N0C && N1C)
  3510. return DAG.FoldConstantArithmetic(ISD::SRL, VT, N0C, N1C);
  3511. // fold (srl 0, x) -> 0
  3512. if (N0C && N0C->isNullValue())
  3513. return N0;
  3514. // fold (srl x, c >= size(x)) -> undef
  3515. if (N1C && N1C->getZExtValue() >= OpSizeInBits)
  3516. return DAG.getUNDEF(VT);
  3517. // fold (srl x, 0) -> x
  3518. if (N1C && N1C->isNullValue())
  3519. return N0;
  3520. // if (srl x, c) is known to be zero, return 0
  3521. if (N1C && DAG.MaskedValueIsZero(SDValue(N, 0),
  3522. APInt::getAllOnesValue(OpSizeInBits)))
  3523. return DAG.getConstant(0, VT);
  3524. // fold (srl (srl x, c1), c2) -> 0 or (srl x, (add c1, c2))
  3525. if (N1C && N0.getOpcode() == ISD::SRL &&
  3526. N0.getOperand(1).getOpcode() == ISD::Constant) {
  3527. uint64_t c1 = cast<ConstantSDNode>(N0.getOperand(1))->getZExtValue();
  3528. uint64_t c2 = N1C->getZExtValue();
  3529. if (c1 + c2 >= OpSizeInBits)
  3530. return DAG.getConstant(0, VT);
  3531. return DAG.getNode(ISD::SRL, SDLoc(N), VT, N0.getOperand(0),
  3532. DAG.getConstant(c1 + c2, N1.getValueType()));
  3533. }
  3534. // fold (srl (trunc (srl x, c1)), c2) -> 0 or (trunc (srl x, (add c1, c2)))
  3535. if (N1C && N0.getOpcode() == ISD::TRUNCATE &&
  3536. N0.getOperand(0).getOpcode() == ISD::SRL &&
  3537. isa<ConstantSDNode>(N0.getOperand(0)->getOperand(1))) {
  3538. uint64_t c1 =
  3539. cast<ConstantSDNode>(N0.getOperand(0)->getOperand(1))->getZExtValue();
  3540. uint64_t c2 = N1C->getZExtValue();
  3541. EVT InnerShiftVT = N0.getOperand(0).getValueType();
  3542. EVT ShiftCountVT = N0.getOperand(0)->getOperand(1).getValueType();
  3543. uint64_t InnerShiftSize = InnerShiftVT.getScalarType().getSizeInBits();
  3544. // This is only valid if the OpSizeInBits + c1 = size of inner shift.
  3545. if (c1 + OpSizeInBits == InnerShiftSize) {
  3546. if (c1 + c2 >= InnerShiftSize)
  3547. return DAG.getConstant(0, VT);
  3548. return DAG.getNode(ISD::TRUNCATE, SDLoc(N0), VT,
  3549. DAG.getNode(ISD::SRL, SDLoc(N0), InnerShiftVT,
  3550. N0.getOperand(0)->getOperand(0),
  3551. DAG.getConstant(c1 + c2, ShiftCountVT)));
  3552. }
  3553. }
  3554. // fold (srl (shl x, c), c) -> (and x, cst2)
  3555. if (N1C && N0.getOpcode() == ISD::SHL && N0.getOperand(1) == N1 &&
  3556. N0.getValueSizeInBits() <= 64) {
  3557. uint64_t ShAmt = N1C->getZExtValue()+64-N0.getValueSizeInBits();
  3558. return DAG.getNode(ISD::AND, SDLoc(N), VT, N0.getOperand(0),
  3559. DAG.getConstant(~0ULL >> ShAmt, VT));
  3560. }
  3561. // fold (srl (anyextend x), c) -> (and (anyextend (srl x, c)), mask)
  3562. if (N1C && N0.getOpcode() == ISD::ANY_EXTEND) {
  3563. // Shifting in all undef bits?
  3564. EVT SmallVT = N0.getOperand(0).getValueType();
  3565. if (N1C->getZExtValue() >= SmallVT.getSizeInBits())
  3566. return DAG.getUNDEF(VT);
  3567. if (!LegalTypes || TLI.isTypeDesirableForOp(ISD::SRL, SmallVT)) {
  3568. uint64_t ShiftAmt = N1C->getZExtValue();
  3569. SDValue SmallShift = DAG.getNode(ISD::SRL, SDLoc(N0), SmallVT,
  3570. N0.getOperand(0),
  3571. DAG.getConstant(ShiftAmt, getShiftAmountTy(SmallVT)));
  3572. AddToWorkList(SmallShift.getNode());
  3573. APInt Mask = APInt::getAllOnesValue(VT.getSizeInBits()).lshr(ShiftAmt);
  3574. return DAG.getNode(ISD::AND, SDLoc(N), VT,
  3575. DAG.getNode(ISD::ANY_EXTEND, SDLoc(N), VT, SmallShift),
  3576. DAG.getConstant(Mask, VT));
  3577. }
  3578. }
  3579. // fold (srl (sra X, Y), 31) -> (srl X, 31). This srl only looks at the sign
  3580. // bit, which is unmodified by sra.
  3581. if (N1C && N1C->getZExtValue() + 1 == VT.getSizeInBits()) {
  3582. if (N0.getOpcode() == ISD::SRA)
  3583. return DAG.getNode(ISD::SRL, SDLoc(N), VT, N0.getOperand(0), N1);
  3584. }
  3585. // fold (srl (ctlz x), "5") -> x iff x has one bit set (the low bit).
  3586. if (N1C && N0.getOpcode() == ISD::CTLZ &&
  3587. N1C->getAPIntValue() == Log2_32(VT.getSizeInBits())) {
  3588. APInt KnownZero, KnownOne;
  3589. DAG.ComputeMaskedBits(N0.getOperand(0), KnownZero, KnownOne);
  3590. // If any of the input bits are KnownOne, then the input couldn't be all
  3591. // zeros, thus the result of the srl will always be zero.
  3592. if (KnownOne.getBoolValue()) return DAG.getConstant(0, VT);
  3593. // If all of the bits input the to ctlz node are known to be zero, then
  3594. // the result of the ctlz is "32" and the result of the shift is one.
  3595. APInt UnknownBits = ~KnownZero;
  3596. if (UnknownBits == 0) return DAG.getConstant(1, VT);
  3597. // Otherwise, check to see if there is exactly one bit input to the ctlz.
  3598. if ((UnknownBits & (UnknownBits - 1)) == 0) {
  3599. // Okay, we know that only that the single bit specified by UnknownBits
  3600. // could be set on input to the CTLZ node. If this bit is set, the SRL
  3601. // will return 0, if it is clear, it returns 1. Change the CTLZ/SRL pair
  3602. // to an SRL/XOR pair, which is likely to simplify more.
  3603. unsigned ShAmt = UnknownBits.countTrailingZeros();
  3604. SDValue Op = N0.getOperand(0);
  3605. if (ShAmt) {
  3606. Op = DAG.getNode(ISD::SRL, SDLoc(N0), VT, Op,
  3607. DAG.getConstant(ShAmt, getShiftAmountTy(Op.getValueType())));
  3608. AddToWorkList(Op.getNode());
  3609. }
  3610. return DAG.getNode(ISD::XOR, SDLoc(N), VT,
  3611. Op, DAG.getConstant(1, VT));
  3612. }
  3613. }
  3614. // fold (srl x, (trunc (and y, c))) -> (srl x, (and (trunc y), (trunc c))).
  3615. if (N1.getOpcode() == ISD::TRUNCATE &&
  3616. N1.getOperand(0).getOpcode() == ISD::AND &&
  3617. N1.hasOneUse() && N1.getOperand(0).hasOneUse()) {
  3618. SDValue N101 = N1.getOperand(0).getOperand(1);
  3619. if (ConstantSDNode *N101C = dyn_cast<ConstantSDNode>(N101)) {
  3620. EVT TruncVT = N1.getValueType();
  3621. SDValue N100 = N1.getOperand(0).getOperand(0);
  3622. APInt TruncC = N101C->getAPIntValue();
  3623. TruncC = TruncC.trunc(TruncVT.getSizeInBits());
  3624. return DAG.getNode(ISD::SRL, SDLoc(N), VT, N0,
  3625. DAG.getNode(ISD::AND, SDLoc(N),
  3626. TruncVT,
  3627. DAG.getNode(ISD::TRUNCATE,
  3628. SDLoc(N),
  3629. TruncVT, N100),
  3630. DAG.getConstant(TruncC, TruncVT)));
  3631. }
  3632. }
  3633. // fold operands of srl based on knowledge that the low bits are not
  3634. // demanded.
  3635. if (N1C && SimplifyDemandedBits(SDValue(N, 0)))
  3636. return SDValue(N, 0);
  3637. if (N1C) {
  3638. SDValue NewSRL = visitShiftByConstant(N, N1C->getZExtValue());
  3639. if (NewSRL.getNode())
  3640. return NewSRL;
  3641. }
  3642. // Attempt to convert a srl of a load into a narrower zero-extending load.
  3643. SDValue NarrowLoad = ReduceLoadWidth(N);
  3644. if (NarrowLoad.getNode())
  3645. return NarrowLoad;
  3646. // Here is a common situation. We want to optimize:
  3647. //
  3648. // %a = ...
  3649. // %b = and i32 %a, 2
  3650. // %c = srl i32 %b, 1
  3651. // brcond i32 %c ...
  3652. //
  3653. // into
  3654. //
  3655. // %a = ...
  3656. // %b = and %a, 2
  3657. // %c = setcc eq %b, 0
  3658. // brcond %c ...
  3659. //
  3660. // However when after the source operand of SRL is optimized into AND, the SRL
  3661. // itself may not be optimized further. Look for it and add the BRCOND into
  3662. // the worklist.
  3663. if (N->hasOneUse()) {
  3664. SDNode *Use = *N->use_begin();
  3665. if (Use->getOpcode() == ISD::BRCOND)
  3666. AddToWorkList(Use);
  3667. else if (Use->getOpcode() == ISD::TRUNCATE && Use->hasOneUse()) {
  3668. // Also look pass the truncate.
  3669. Use = *Use->use_begin();
  3670. if (Use->getOpcode() == ISD::BRCOND)
  3671. AddToWorkList(Use);
  3672. }
  3673. }
  3674. return SDValue();
  3675. }
  3676. SDValue DAGCombiner::visitCTLZ(SDNode *N) {
  3677. SDValue N0 = N->getOperand(0);
  3678. EVT VT = N->getValueType(0);
  3679. // fold (ctlz c1) -> c2
  3680. if (isa<ConstantSDNode>(N0))
  3681. return DAG.getNode(ISD::CTLZ, SDLoc(N), VT, N0);
  3682. return SDValue();
  3683. }
  3684. SDValue DAGCombiner::visitCTLZ_ZERO_UNDEF(SDNode *N) {
  3685. SDValue N0 = N->getOperand(0);
  3686. EVT VT = N->getValueType(0);
  3687. // fold (ctlz_zero_undef c1) -> c2
  3688. if (isa<ConstantSDNode>(N0))
  3689. return DAG.getNode(ISD::CTLZ_ZERO_UNDEF, SDLoc(N), VT, N0);
  3690. return SDValue();
  3691. }
  3692. SDValue DAGCombiner::visitCTTZ(SDNode *N) {
  3693. SDValue N0 = N->getOperand(0);
  3694. EVT VT = N->getValueType(0);
  3695. // fold (cttz c1) -> c2
  3696. if (isa<ConstantSDNode>(N0))
  3697. return DAG.getNode(ISD::CTTZ, SDLoc(N), VT, N0);
  3698. return SDValue();
  3699. }
  3700. SDValue DAGCombiner::visitCTTZ_ZERO_UNDEF(SDNode *N) {
  3701. SDValue N0 = N->getOperand(0);
  3702. EVT VT = N->getValueType(0);
  3703. // fold (cttz_zero_undef c1) -> c2
  3704. if (isa<ConstantSDNode>(N0))
  3705. return DAG.getNode(ISD::CTTZ_ZERO_UNDEF, SDLoc(N), VT, N0);
  3706. return SDValue();
  3707. }
  3708. SDValue DAGCombiner::visitCTPOP(SDNode *N) {
  3709. SDValue N0 = N->getOperand(0);
  3710. EVT VT = N->getValueType(0);
  3711. // fold (ctpop c1) -> c2
  3712. if (isa<ConstantSDNode>(N0))
  3713. return DAG.getNode(ISD::CTPOP, SDLoc(N), VT, N0);
  3714. return SDValue();
  3715. }
  3716. SDValue DAGCombiner::visitSELECT(SDNode *N) {
  3717. SDValue N0 = N->getOperand(0);
  3718. SDValue N1 = N->getOperand(1);
  3719. SDValue N2 = N->getOperand(2);
  3720. ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
  3721. ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
  3722. ConstantSDNode *N2C = dyn_cast<ConstantSDNode>(N2);
  3723. EVT VT = N->getValueType(0);
  3724. EVT VT0 = N0.getValueType();
  3725. // fold (select C, X, X) -> X
  3726. if (N1 == N2)
  3727. return N1;
  3728. // fold (select true, X, Y) -> X
  3729. if (N0C && !N0C->isNullValue())
  3730. return N1;
  3731. // fold (select false, X, Y) -> Y
  3732. if (N0C && N0C->isNullValue())
  3733. return N2;
  3734. // fold (select C, 1, X) -> (or C, X)
  3735. if (VT == MVT::i1 && N1C && N1C->getAPIntValue() == 1)
  3736. return DAG.getNode(ISD::OR, SDLoc(N), VT, N0, N2);
  3737. // fold (select C, 0, 1) -> (xor C, 1)
  3738. if (VT.isInteger() &&
  3739. (VT0 == MVT::i1 ||
  3740. (VT0.isInteger() &&
  3741. TLI.getBooleanContents(false) ==
  3742. TargetLowering::ZeroOrOneBooleanContent)) &&
  3743. N1C && N2C && N1C->isNullValue() && N2C->getAPIntValue() == 1) {
  3744. SDValue XORNode;
  3745. if (VT == VT0)
  3746. return DAG.getNode(ISD::XOR, SDLoc(N), VT0,
  3747. N0, DAG.getConstant(1, VT0));
  3748. XORNode = DAG.getNode(ISD::XOR, SDLoc(N0), VT0,
  3749. N0, DAG.getConstant(1, VT0));
  3750. AddToWorkList(XORNode.getNode());
  3751. if (VT.bitsGT(VT0))
  3752. return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N), VT, XORNode);
  3753. return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, XORNode);
  3754. }
  3755. // fold (select C, 0, X) -> (and (not C), X)
  3756. if (VT == VT0 && VT == MVT::i1 && N1C && N1C->isNullValue()) {
  3757. SDValue NOTNode = DAG.getNOT(SDLoc(N0), N0, VT);
  3758. AddToWorkList(NOTNode.getNode());
  3759. return DAG.getNode(ISD::AND, SDLoc(N), VT, NOTNode, N2);
  3760. }
  3761. // fold (select C, X, 1) -> (or (not C), X)
  3762. if (VT == VT0 && VT == MVT::i1 && N2C && N2C->getAPIntValue() == 1) {
  3763. SDValue NOTNode = DAG.getNOT(SDLoc(N0), N0, VT);
  3764. AddToWorkList(NOTNode.getNode());
  3765. return DAG.getNode(ISD::OR, SDLoc(N), VT, NOTNode, N1);
  3766. }
  3767. // fold (select C, X, 0) -> (and C, X)
  3768. if (VT == MVT::i1 && N2C && N2C->isNullValue())
  3769. return DAG.getNode(ISD::AND, SDLoc(N), VT, N0, N1);
  3770. // fold (select X, X, Y) -> (or X, Y)
  3771. // fold (select X, 1, Y) -> (or X, Y)
  3772. if (VT == MVT::i1 && (N0 == N1 || (N1C && N1C->getAPIntValue() == 1)))
  3773. return DAG.getNode(ISD::OR, SDLoc(N), VT, N0, N2);
  3774. // fold (select X, Y, X) -> (and X, Y)
  3775. // fold (select X, Y, 0) -> (and X, Y)
  3776. if (VT == MVT::i1 && (N0 == N2 || (N2C && N2C->getAPIntValue() == 0)))
  3777. return DAG.getNode(ISD::AND, SDLoc(N), VT, N0, N1);
  3778. // If we can fold this based on the true/false value, do so.
  3779. if (SimplifySelectOps(N, N1, N2))
  3780. return SDValue(N, 0); // Don't revisit N.
  3781. // fold selects based on a setcc into other things, such as min/max/abs
  3782. if (N0.getOpcode() == ISD::SETCC) {
  3783. // FIXME:
  3784. // Check against MVT::Other for SELECT_CC, which is a workaround for targets
  3785. // having to say they don't support SELECT_CC on every type the DAG knows
  3786. // about, since there is no way to mark an opcode illegal at all value types
  3787. if (TLI.isOperationLegalOrCustom(ISD::SELECT_CC, MVT::Other) &&
  3788. TLI.isOperationLegalOrCustom(ISD::SELECT_CC, VT))
  3789. return DAG.getNode(ISD::SELECT_CC, SDLoc(N), VT,
  3790. N0.getOperand(0), N0.getOperand(1),
  3791. N1, N2, N0.getOperand(2));
  3792. return SimplifySelect(SDLoc(N), N0, N1, N2);
  3793. }
  3794. return SDValue();
  3795. }
  3796. static
  3797. std::pair<SDValue, SDValue> SplitVSETCC(const SDNode *N, SelectionDAG &DAG) {
  3798. SDLoc DL(N);
  3799. EVT LoVT, HiVT;
  3800. llvm::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(N->getValueType(0));
  3801. // Split the inputs.
  3802. SDValue Lo, Hi, LL, LH, RL, RH;
  3803. llvm::tie(LL, LH) = DAG.SplitVectorOperand(N, 0);
  3804. llvm::tie(RL, RH) = DAG.SplitVectorOperand(N, 1);
  3805. Lo = DAG.getNode(N->getOpcode(), DL, LoVT, LL, RL, N->getOperand(2));
  3806. Hi = DAG.getNode(N->getOpcode(), DL, HiVT, LH, RH, N->getOperand(2));
  3807. return std::make_pair(Lo, Hi);
  3808. }
  3809. SDValue DAGCombiner::visitVSELECT(SDNode *N) {
  3810. SDValue N0 = N->getOperand(0);
  3811. SDValue N1 = N->getOperand(1);
  3812. SDValue N2 = N->getOperand(2);
  3813. SDLoc DL(N);
  3814. // Canonicalize integer abs.
  3815. // vselect (setg[te] X, 0), X, -X ->
  3816. // vselect (setgt X, -1), X, -X ->
  3817. // vselect (setl[te] X, 0), -X, X ->
  3818. // Y = sra (X, size(X)-1); xor (add (X, Y), Y)
  3819. if (N0.getOpcode() == ISD::SETCC) {
  3820. SDValue LHS = N0.getOperand(0), RHS = N0.getOperand(1);
  3821. ISD::CondCode CC = cast<CondCodeSDNode>(N0.getOperand(2))->get();
  3822. bool isAbs = false;
  3823. bool RHSIsAllZeros = ISD::isBuildVectorAllZeros(RHS.getNode());
  3824. if (((RHSIsAllZeros && (CC == ISD::SETGT || CC == ISD::SETGE)) ||
  3825. (ISD::isBuildVectorAllOnes(RHS.getNode()) && CC == ISD::SETGT)) &&
  3826. N1 == LHS && N2.getOpcode() == ISD::SUB && N1 == N2.getOperand(1))
  3827. isAbs = ISD::isBuildVectorAllZeros(N2.getOperand(0).getNode());
  3828. else if ((RHSIsAllZeros && (CC == ISD::SETLT || CC == ISD::SETLE)) &&
  3829. N2 == LHS && N1.getOpcode() == ISD::SUB && N2 == N1.getOperand(1))
  3830. isAbs = ISD::isBuildVectorAllZeros(N1.getOperand(0).getNode());
  3831. if (isAbs) {
  3832. EVT VT = LHS.getValueType();
  3833. SDValue Shift = DAG.getNode(
  3834. ISD::SRA, DL, VT, LHS,
  3835. DAG.getConstant(VT.getScalarType().getSizeInBits() - 1, VT));
  3836. SDValue Add = DAG.getNode(ISD::ADD, DL, VT, LHS, Shift);
  3837. AddToWorkList(Shift.getNode());
  3838. AddToWorkList(Add.getNode());
  3839. return DAG.getNode(ISD::XOR, DL, VT, Add, Shift);
  3840. }
  3841. }
  3842. // If the VSELECT result requires splitting and the mask is provided by a
  3843. // SETCC, then split both nodes and its operands before legalization. This
  3844. // prevents the type legalizer from unrolling SETCC into scalar comparisons
  3845. // and enables future optimizations (e.g. min/max pattern matching on X86).
  3846. if (N0.getOpcode() == ISD::SETCC) {
  3847. EVT VT = N->getValueType(0);
  3848. // Check if any splitting is required.
  3849. if (TLI.getTypeAction(*DAG.getContext(), VT) !=
  3850. TargetLowering::TypeSplitVector)
  3851. return SDValue();
  3852. SDValue Lo, Hi, CCLo, CCHi, LL, LH, RL, RH;
  3853. llvm::tie(CCLo, CCHi) = SplitVSETCC(N0.getNode(), DAG);
  3854. llvm::tie(LL, LH) = DAG.SplitVectorOperand(N, 1);
  3855. llvm::tie(RL, RH) = DAG.SplitVectorOperand(N, 2);
  3856. Lo = DAG.getNode(N->getOpcode(), DL, LL.getValueType(), CCLo, LL, RL);
  3857. Hi = DAG.getNode(N->getOpcode(), DL, LH.getValueType(), CCHi, LH, RH);
  3858. // Add the new VSELECT nodes to the work list in case they need to be split
  3859. // again.
  3860. AddToWorkList(Lo.getNode());
  3861. AddToWorkList(Hi.getNode());
  3862. return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, Lo, Hi);
  3863. }
  3864. return SDValue();
  3865. }
  3866. SDValue DAGCombiner::visitSELECT_CC(SDNode *N) {
  3867. SDValue N0 = N->getOperand(0);
  3868. SDValue N1 = N->getOperand(1);
  3869. SDValue N2 = N->getOperand(2);
  3870. SDValue N3 = N->getOperand(3);
  3871. SDValue N4 = N->getOperand(4);
  3872. ISD::CondCode CC = cast<CondCodeSDNode>(N4)->get();
  3873. // fold select_cc lhs, rhs, x, x, cc -> x
  3874. if (N2 == N3)
  3875. return N2;
  3876. // Determine if the condition we're dealing with is constant
  3877. SDValue SCC = SimplifySetCC(getSetCCResultType(N0.getValueType()),
  3878. N0, N1, CC, SDLoc(N), false);
  3879. if (SCC.getNode()) {
  3880. AddToWorkList(SCC.getNode());
  3881. if (ConstantSDNode *SCCC = dyn_cast<ConstantSDNode>(SCC.getNode())) {
  3882. if (!SCCC->isNullValue())
  3883. return N2; // cond always true -> true val
  3884. else
  3885. return N3; // cond always false -> false val
  3886. }
  3887. // Fold to a simpler select_cc
  3888. if (SCC.getOpcode() == ISD::SETCC)
  3889. return DAG.getNode(ISD::SELECT_CC, SDLoc(N), N2.getValueType(),
  3890. SCC.getOperand(0), SCC.getOperand(1), N2, N3,
  3891. SCC.getOperand(2));
  3892. }
  3893. // If we can fold this based on the true/false value, do so.
  3894. if (SimplifySelectOps(N, N2, N3))
  3895. return SDValue(N, 0); // Don't revisit N.
  3896. // fold select_cc into other things, such as min/max/abs
  3897. return SimplifySelectCC(SDLoc(N), N0, N1, N2, N3, CC);
  3898. }
  3899. SDValue DAGCombiner::visitSETCC(SDNode *N) {
  3900. return SimplifySetCC(N->getValueType(0), N->getOperand(0), N->getOperand(1),
  3901. cast<CondCodeSDNode>(N->getOperand(2))->get(),
  3902. SDLoc(N));
  3903. }
  3904. // ExtendUsesToFormExtLoad - Trying to extend uses of a load to enable this:
  3905. // "fold ({s|z|a}ext (load x)) -> ({s|z|a}ext (truncate ({s|z|a}extload x)))"
  3906. // transformation. Returns true if extension are possible and the above
  3907. // mentioned transformation is profitable.
  3908. static bool ExtendUsesToFormExtLoad(SDNode *N, SDValue N0,
  3909. unsigned ExtOpc,
  3910. SmallVectorImpl<SDNode *> &ExtendNodes,
  3911. const TargetLowering &TLI) {
  3912. bool HasCopyToRegUses = false;
  3913. bool isTruncFree = TLI.isTruncateFree(N->getValueType(0), N0.getValueType());
  3914. for (SDNode::use_iterator UI = N0.getNode()->use_begin(),
  3915. UE = N0.getNode()->use_end();
  3916. UI != UE; ++UI) {
  3917. SDNode *User = *UI;
  3918. if (User == N)
  3919. continue;
  3920. if (UI.getUse().getResNo() != N0.getResNo())
  3921. continue;
  3922. // FIXME: Only extend SETCC N, N and SETCC N, c for now.
  3923. if (ExtOpc != ISD::ANY_EXTEND && User->getOpcode() == ISD::SETCC) {
  3924. ISD::CondCode CC = cast<CondCodeSDNode>(User->getOperand(2))->get();
  3925. if (ExtOpc == ISD::ZERO_EXTEND && ISD::isSignedIntSetCC(CC))
  3926. // Sign bits will be lost after a zext.
  3927. return false;
  3928. bool Add = false;
  3929. for (unsigned i = 0; i != 2; ++i) {
  3930. SDValue UseOp = User->getOperand(i);
  3931. if (UseOp == N0)
  3932. continue;
  3933. if (!isa<ConstantSDNode>(UseOp))
  3934. return false;
  3935. Add = true;
  3936. }
  3937. if (Add)
  3938. ExtendNodes.push_back(User);
  3939. continue;
  3940. }
  3941. // If truncates aren't free and there are users we can't
  3942. // extend, it isn't worthwhile.
  3943. if (!isTruncFree)
  3944. return false;
  3945. // Remember if this value is live-out.
  3946. if (User->getOpcode() == ISD::CopyToReg)
  3947. HasCopyToRegUses = true;
  3948. }
  3949. if (HasCopyToRegUses) {
  3950. bool BothLiveOut = false;
  3951. for (SDNode::use_iterator UI = N->use_begin(), UE = N->use_end();
  3952. UI != UE; ++UI) {
  3953. SDUse &Use = UI.getUse();
  3954. if (Use.getResNo() == 0 && Use.getUser()->getOpcode() == ISD::CopyToReg) {
  3955. BothLiveOut = true;
  3956. break;
  3957. }
  3958. }
  3959. if (BothLiveOut)
  3960. // Both unextended and extended values are live out. There had better be
  3961. // a good reason for the transformation.
  3962. return ExtendNodes.size();
  3963. }
  3964. return true;
  3965. }
  3966. void DAGCombiner::ExtendSetCCUses(const SmallVectorImpl<SDNode *> &SetCCs,
  3967. SDValue Trunc, SDValue ExtLoad, SDLoc DL,
  3968. ISD::NodeType ExtType) {
  3969. // Extend SetCC uses if necessary.
  3970. for (unsigned i = 0, e = SetCCs.size(); i != e; ++i) {
  3971. SDNode *SetCC = SetCCs[i];
  3972. SmallVector<SDValue, 4> Ops;
  3973. for (unsigned j = 0; j != 2; ++j) {
  3974. SDValue SOp = SetCC->getOperand(j);
  3975. if (SOp == Trunc)
  3976. Ops.push_back(ExtLoad);
  3977. else
  3978. Ops.push_back(DAG.getNode(ExtType, DL, ExtLoad->getValueType(0), SOp));
  3979. }
  3980. Ops.push_back(SetCC->getOperand(2));
  3981. CombineTo(SetCC, DAG.getNode(ISD::SETCC, DL, SetCC->getValueType(0),
  3982. &Ops[0], Ops.size()));
  3983. }
  3984. }
  3985. SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) {
  3986. SDValue N0 = N->getOperand(0);
  3987. EVT VT = N->getValueType(0);
  3988. // fold (sext c1) -> c1
  3989. if (isa<ConstantSDNode>(N0))
  3990. return DAG.getNode(ISD::SIGN_EXTEND, SDLoc(N), VT, N0);
  3991. // fold (sext (sext x)) -> (sext x)
  3992. // fold (sext (aext x)) -> (sext x)
  3993. if (N0.getOpcode() == ISD::SIGN_EXTEND || N0.getOpcode() == ISD::ANY_EXTEND)
  3994. return DAG.getNode(ISD::SIGN_EXTEND, SDLoc(N), VT,
  3995. N0.getOperand(0));
  3996. if (N0.getOpcode() == ISD::TRUNCATE) {
  3997. // fold (sext (truncate (load x))) -> (sext (smaller load x))
  3998. // fold (sext (truncate (srl (load x), c))) -> (sext (smaller load (x+c/n)))
  3999. SDValue NarrowLoad = ReduceLoadWidth(N0.getNode());
  4000. if (NarrowLoad.getNode()) {
  4001. SDNode* oye = N0.getNode()->getOperand(0).getNode();
  4002. if (NarrowLoad.getNode() != N0.getNode()) {
  4003. CombineTo(N0.getNode(), NarrowLoad);
  4004. // CombineTo deleted the truncate, if needed, but not what's under it.
  4005. AddToWorkList(oye);
  4006. }
  4007. return SDValue(N, 0); // Return N so it doesn't get rechecked!
  4008. }
  4009. // See if the value being truncated is already sign extended. If so, just
  4010. // eliminate the trunc/sext pair.
  4011. SDValue Op = N0.getOperand(0);
  4012. unsigned OpBits = Op.getValueType().getScalarType().getSizeInBits();
  4013. unsigned MidBits = N0.getValueType().getScalarType().getSizeInBits();
  4014. unsigned DestBits = VT.getScalarType().getSizeInBits();
  4015. unsigned NumSignBits = DAG.ComputeNumSignBits(Op);
  4016. if (OpBits == DestBits) {
  4017. // Op is i32, Mid is i8, and Dest is i32. If Op has more than 24 sign
  4018. // bits, it is already ready.
  4019. if (NumSignBits > DestBits-MidBits)
  4020. return Op;
  4021. } else if (OpBits < DestBits) {
  4022. // Op is i32, Mid is i8, and Dest is i64. If Op has more than 24 sign
  4023. // bits, just sext from i32.
  4024. if (NumSignBits > OpBits-MidBits)
  4025. return DAG.getNode(ISD::SIGN_EXTEND, SDLoc(N), VT, Op);
  4026. } else {
  4027. // Op is i64, Mid is i8, and Dest is i32. If Op has more than 56 sign
  4028. // bits, just truncate to i32.
  4029. if (NumSignBits > OpBits-MidBits)
  4030. return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, Op);
  4031. }
  4032. // fold (sext (truncate x)) -> (sextinreg x).
  4033. if (!LegalOperations || TLI.isOperationLegal(ISD::SIGN_EXTEND_INREG,
  4034. N0.getValueType())) {
  4035. if (OpBits < DestBits)
  4036. Op = DAG.getNode(ISD::ANY_EXTEND, SDLoc(N0), VT, Op);
  4037. else if (OpBits > DestBits)
  4038. Op = DAG.getNode(ISD::TRUNCATE, SDLoc(N0), VT, Op);
  4039. return DAG.getNode(ISD::SIGN_EXTEND_INREG, SDLoc(N), VT, Op,
  4040. DAG.getValueType(N0.getValueType()));
  4041. }
  4042. }
  4043. // fold (sext (load x)) -> (sext (truncate (sextload x)))
  4044. // None of the supported targets knows how to perform load and sign extend
  4045. // on vectors in one instruction. We only perform this transformation on
  4046. // scalars.
  4047. if (ISD::isNON_EXTLoad(N0.getNode()) && !VT.isVector() &&
  4048. ((!LegalOperations && !cast<LoadSDNode>(N0)->isVolatile()) ||
  4049. TLI.isLoadExtLegal(ISD::SEXTLOAD, N0.getValueType()))) {
  4050. bool DoXform = true;
  4051. SmallVector<SDNode*, 4> SetCCs;
  4052. if (!N0.hasOneUse())
  4053. DoXform = ExtendUsesToFormExtLoad(N, N0, ISD::SIGN_EXTEND, SetCCs, TLI);
  4054. if (DoXform) {
  4055. LoadSDNode *LN0 = cast<LoadSDNode>(N0);
  4056. SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, SDLoc(N), VT,
  4057. LN0->getChain(),
  4058. LN0->getBasePtr(), N0.getValueType(),
  4059. LN0->getMemOperand());
  4060. CombineTo(N, ExtLoad);
  4061. SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SDLoc(N0),
  4062. N0.getValueType(), ExtLoad);
  4063. CombineTo(N0.getNode(), Trunc, ExtLoad.getValue(1));
  4064. ExtendSetCCUses(SetCCs, Trunc, ExtLoad, SDLoc(N),
  4065. ISD::SIGN_EXTEND);
  4066. return SDValue(N, 0); // Return N so it doesn't get rechecked!
  4067. }
  4068. }
  4069. // fold (sext (sextload x)) -> (sext (truncate (sextload x)))
  4070. // fold (sext ( extload x)) -> (sext (truncate (sextload x)))
  4071. if ((ISD::isSEXTLoad(N0.getNode()) || ISD::isEXTLoad(N0.getNode())) &&
  4072. ISD::isUNINDEXEDLoad(N0.getNode()) && N0.hasOneUse()) {
  4073. LoadSDNode *LN0 = cast<LoadSDNode>(N0);
  4074. EVT MemVT = LN0->getMemoryVT();
  4075. if ((!LegalOperations && !LN0->isVolatile()) ||
  4076. TLI.isLoadExtLegal(ISD::SEXTLOAD, MemVT)) {
  4077. SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, SDLoc(N), VT,
  4078. LN0->getChain(),
  4079. LN0->getBasePtr(), MemVT,
  4080. LN0->getMemOperand());
  4081. CombineTo(N, ExtLoad);
  4082. CombineTo(N0.getNode(),
  4083. DAG.getNode(ISD::TRUNCATE, SDLoc(N0),
  4084. N0.getValueType(), ExtLoad),
  4085. ExtLoad.getValue(1));
  4086. return SDValue(N, 0); // Return N so it doesn't get rechecked!
  4087. }
  4088. }
  4089. // fold (sext (and/or/xor (load x), cst)) ->
  4090. // (and/or/xor (sextload x), (sext cst))
  4091. if ((N0.getOpcode() == ISD::AND || N0.getOpcode() == ISD::OR ||
  4092. N0.getOpcode() == ISD::XOR) &&
  4093. isa<LoadSDNode>(N0.getOperand(0)) &&
  4094. N0.getOperand(1).getOpcode() == ISD::Constant &&
  4095. TLI.isLoadExtLegal(ISD::SEXTLOAD, N0.getValueType()) &&
  4096. (!LegalOperations && TLI.isOperationLegal(N0.getOpcode(), VT))) {
  4097. LoadSDNode *LN0 = cast<LoadSDNode>(N0.getOperand(0));
  4098. if (LN0->getExtensionType() != ISD::ZEXTLOAD) {
  4099. bool DoXform = true;
  4100. SmallVector<SDNode*, 4> SetCCs;
  4101. if (!N0.hasOneUse())
  4102. DoXform = ExtendUsesToFormExtLoad(N, N0.getOperand(0), ISD::SIGN_EXTEND,
  4103. SetCCs, TLI);
  4104. if (DoXform) {
  4105. SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, SDLoc(LN0), VT,
  4106. LN0->getChain(), LN0->getBasePtr(),
  4107. LN0->getMemoryVT(),
  4108. LN0->getMemOperand());
  4109. APInt Mask = cast<ConstantSDNode>(N0.getOperand(1))->getAPIntValue();
  4110. Mask = Mask.sext(VT.getSizeInBits());
  4111. SDValue And = DAG.getNode(N0.getOpcode(), SDLoc(N), VT,
  4112. ExtLoad, DAG.getConstant(Mask, VT));
  4113. SDValue Trunc = DAG.getNode(ISD::TRUNCATE,
  4114. SDLoc(N0.getOperand(0)),
  4115. N0.getOperand(0).getValueType(), ExtLoad);
  4116. CombineTo(N, And);
  4117. CombineTo(N0.getOperand(0).getNode(), Trunc, ExtLoad.getValue(1));
  4118. ExtendSetCCUses(SetCCs, Trunc, ExtLoad, SDLoc(N),
  4119. ISD::SIGN_EXTEND);
  4120. return SDValue(N, 0); // Return N so it doesn't get rechecked!
  4121. }
  4122. }
  4123. }
  4124. if (N0.getOpcode() == ISD::SETCC) {
  4125. // sext(setcc) -> sext_in_reg(vsetcc) for vectors.
  4126. // Only do this before legalize for now.
  4127. if (VT.isVector() && !LegalOperations &&
  4128. TLI.getBooleanContents(true) ==
  4129. TargetLowering::ZeroOrNegativeOneBooleanContent) {
  4130. EVT N0VT = N0.getOperand(0).getValueType();
  4131. // On some architectures (such as SSE/NEON/etc) the SETCC result type is
  4132. // of the same size as the compared operands. Only optimize sext(setcc())
  4133. // if this is the case.
  4134. EVT SVT = getSetCCResultType(N0VT);
  4135. // We know that the # elements of the results is the same as the
  4136. // # elements of the compare (and the # elements of the compare result
  4137. // for that matter). Check to see that they are the same size. If so,
  4138. // we know that the element size of the sext'd result matches the
  4139. // element size of the compare operands.
  4140. if (VT.getSizeInBits() == SVT.getSizeInBits())
  4141. return DAG.getSetCC(SDLoc(N), VT, N0.getOperand(0),
  4142. N0.getOperand(1),
  4143. cast<CondCodeSDNode>(N0.getOperand(2))->get());
  4144. // If the desired elements are smaller or larger than the source
  4145. // elements we can use a matching integer vector type and then
  4146. // truncate/sign extend
  4147. EVT MatchingVectorType = N0VT.changeVectorElementTypeToInteger();
  4148. if (SVT == MatchingVectorType) {
  4149. SDValue VsetCC = DAG.getSetCC(SDLoc(N), MatchingVectorType,
  4150. N0.getOperand(0), N0.getOperand(1),
  4151. cast<CondCodeSDNode>(N0.getOperand(2))->get());
  4152. return DAG.getSExtOrTrunc(VsetCC, SDLoc(N), VT);
  4153. }
  4154. }
  4155. // sext(setcc x, y, cc) -> (select_cc x, y, -1, 0, cc)
  4156. unsigned ElementWidth = VT.getScalarType().getSizeInBits();
  4157. SDValue NegOne =
  4158. DAG.getConstant(APInt::getAllOnesValue(ElementWidth), VT);
  4159. SDValue SCC =
  4160. SimplifySelectCC(SDLoc(N), N0.getOperand(0), N0.getOperand(1),
  4161. NegOne, DAG.getConstant(0, VT),
  4162. cast<CondCodeSDNode>(N0.getOperand(2))->get(), true);
  4163. if (SCC.getNode()) return SCC;
  4164. if (!VT.isVector() &&
  4165. (!LegalOperations ||
  4166. TLI.isOperationLegal(ISD::SETCC, getSetCCResultType(VT)))) {
  4167. return DAG.getSelect(SDLoc(N), VT,
  4168. DAG.getSetCC(SDLoc(N),
  4169. getSetCCResultType(VT),
  4170. N0.getOperand(0), N0.getOperand(1),
  4171. cast<CondCodeSDNode>(N0.getOperand(2))->get()),
  4172. NegOne, DAG.getConstant(0, VT));
  4173. }
  4174. }
  4175. // fold (sext x) -> (zext x) if the sign bit is known zero.
  4176. if ((!LegalOperations || TLI.isOperationLegal(ISD::ZERO_EXTEND, VT)) &&
  4177. DAG.SignBitIsZero(N0))
  4178. return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N), VT, N0);
  4179. return SDValue();
  4180. }
  4181. // isTruncateOf - If N is a truncate of some other value, return true, record
  4182. // the value being truncated in Op and which of Op's bits are zero in KnownZero.
  4183. // This function computes KnownZero to avoid a duplicated call to
  4184. // ComputeMaskedBits in the caller.
  4185. static bool isTruncateOf(SelectionDAG &DAG, SDValue N, SDValue &Op,
  4186. APInt &KnownZero) {
  4187. APInt KnownOne;
  4188. if (N->getOpcode() == ISD::TRUNCATE) {
  4189. Op = N->getOperand(0);
  4190. DAG.ComputeMaskedBits(Op, KnownZero, KnownOne);
  4191. return true;
  4192. }
  4193. if (N->getOpcode() != ISD::SETCC || N->getValueType(0) != MVT::i1 ||
  4194. cast<CondCodeSDNode>(N->getOperand(2))->get() != ISD::SETNE)
  4195. return false;
  4196. SDValue Op0 = N->getOperand(0);
  4197. SDValue Op1 = N->getOperand(1);
  4198. assert(Op0.getValueType() == Op1.getValueType());
  4199. ConstantSDNode *COp0 = dyn_cast<ConstantSDNode>(Op0);
  4200. ConstantSDNode *COp1 = dyn_cast<ConstantSDNode>(Op1);
  4201. if (COp0 && COp0->isNullValue())
  4202. Op = Op1;
  4203. else if (COp1 && COp1->isNullValue())
  4204. Op = Op0;
  4205. else
  4206. return false;
  4207. DAG.ComputeMaskedBits(Op, KnownZero, KnownOne);
  4208. if (!(KnownZero | APInt(Op.getValueSizeInBits(), 1)).isAllOnesValue())
  4209. return false;
  4210. return true;
  4211. }
  4212. SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) {
  4213. SDValue N0 = N->getOperand(0);
  4214. EVT VT = N->getValueType(0);
  4215. // fold (zext c1) -> c1
  4216. if (isa<ConstantSDNode>(N0))
  4217. return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N), VT, N0);
  4218. // fold (zext (zext x)) -> (zext x)
  4219. // fold (zext (aext x)) -> (zext x)
  4220. if (N0.getOpcode() == ISD::ZERO_EXTEND || N0.getOpcode() == ISD::ANY_EXTEND)
  4221. return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N), VT,
  4222. N0.getOperand(0));
  4223. // fold (zext (truncate x)) -> (zext x) or
  4224. // (zext (truncate x)) -> (truncate x)
  4225. // This is valid when the truncated bits of x are already zero.
  4226. // FIXME: We should extend this to work for vectors too.
  4227. SDValue Op;
  4228. APInt KnownZero;
  4229. if (!VT.isVector() && isTruncateOf(DAG, N0, Op, KnownZero)) {
  4230. APInt TruncatedBits =
  4231. (Op.getValueSizeInBits() == N0.getValueSizeInBits()) ?
  4232. APInt(Op.getValueSizeInBits(), 0) :
  4233. APInt::getBitsSet(Op.getValueSizeInBits(),
  4234. N0.getValueSizeInBits(),
  4235. std::min(Op.getValueSizeInBits(),
  4236. VT.getSizeInBits()));
  4237. if (TruncatedBits == (KnownZero & TruncatedBits)) {
  4238. if (VT.bitsGT(Op.getValueType()))
  4239. return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N), VT, Op);
  4240. if (VT.bitsLT(Op.getValueType()))
  4241. return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, Op);
  4242. return Op;
  4243. }
  4244. }
  4245. // fold (zext (truncate (load x))) -> (zext (smaller load x))
  4246. // fold (zext (truncate (srl (load x), c))) -> (zext (small load (x+c/n)))
  4247. if (N0.getOpcode() == ISD::TRUNCATE) {
  4248. SDValue NarrowLoad = ReduceLoadWidth(N0.getNode());
  4249. if (NarrowLoad.getNode()) {
  4250. SDNode* oye = N0.getNode()->getOperand(0).getNode();
  4251. if (NarrowLoad.getNode() != N0.getNode()) {
  4252. CombineTo(N0.getNode(), NarrowLoad);
  4253. // CombineTo deleted the truncate, if needed, but not what's under it.
  4254. AddToWorkList(oye);
  4255. }
  4256. return SDValue(N, 0); // Return N so it doesn't get rechecked!
  4257. }
  4258. }
  4259. // fold (zext (truncate x)) -> (and x, mask)
  4260. if (N0.getOpcode() == ISD::TRUNCATE &&
  4261. (!LegalOperations || TLI.isOperationLegal(ISD::AND, VT))) {
  4262. // fold (zext (truncate (load x))) -> (zext (smaller load x))
  4263. // fold (zext (truncate (srl (load x), c))) -> (zext (smaller load (x+c/n)))
  4264. SDValue NarrowLoad = ReduceLoadWidth(N0.getNode());
  4265. if (NarrowLoad.getNode()) {
  4266. SDNode* oye = N0.getNode()->getOperand(0).getNode();
  4267. if (NarrowLoad.getNode() != N0.getNode()) {
  4268. CombineTo(N0.getNode(), NarrowLoad);
  4269. // CombineTo deleted the truncate, if needed, but not what's under it.
  4270. AddToWorkList(oye);
  4271. }
  4272. return SDValue(N, 0); // Return N so it doesn't get rechecked!
  4273. }
  4274. SDValue Op = N0.getOperand(0);
  4275. if (Op.getValueType().bitsLT(VT)) {
  4276. Op = DAG.getNode(ISD::ANY_EXTEND, SDLoc(N), VT, Op);
  4277. AddToWorkList(Op.getNode());
  4278. } else if (Op.getValueType().bitsGT(VT)) {
  4279. Op = DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, Op);
  4280. AddToWorkList(Op.getNode());
  4281. }
  4282. return DAG.getZeroExtendInReg(Op, SDLoc(N),
  4283. N0.getValueType().getScalarType());
  4284. }
  4285. // Fold (zext (and (trunc x), cst)) -> (and x, cst),
  4286. // if either of the casts is not free.
  4287. if (N0.getOpcode() == ISD::AND &&
  4288. N0.getOperand(0).getOpcode() == ISD::TRUNCATE &&
  4289. N0.getOperand(1).getOpcode() == ISD::Constant &&
  4290. (!TLI.isTruncateFree(N0.getOperand(0).getOperand(0).getValueType(),
  4291. N0.getValueType()) ||
  4292. !TLI.isZExtFree(N0.getValueType(), VT))) {
  4293. SDValue X = N0.getOperand(0).getOperand(0);
  4294. if (X.getValueType().bitsLT(VT)) {
  4295. X = DAG.getNode(ISD::ANY_EXTEND, SDLoc(X), VT, X);
  4296. } else if (X.getValueType().bitsGT(VT)) {
  4297. X = DAG.getNode(ISD::TRUNCATE, SDLoc(X), VT, X);
  4298. }
  4299. APInt Mask = cast<ConstantSDNode>(N0.getOperand(1))->getAPIntValue();
  4300. Mask = Mask.zext(VT.getSizeInBits());
  4301. return DAG.getNode(ISD::AND, SDLoc(N), VT,
  4302. X, DAG.getConstant(Mask, VT));
  4303. }
  4304. // fold (zext (load x)) -> (zext (truncate (zextload x)))
  4305. // None of the supported targets knows how to perform load and vector_zext
  4306. // on vectors in one instruction. We only perform this transformation on
  4307. // scalars.
  4308. if (ISD::isNON_EXTLoad(N0.getNode()) && !VT.isVector() &&
  4309. ((!LegalOperations && !cast<LoadSDNode>(N0)->isVolatile()) ||
  4310. TLI.isLoadExtLegal(ISD::ZEXTLOAD, N0.getValueType()))) {
  4311. bool DoXform = true;
  4312. SmallVector<SDNode*, 4> SetCCs;
  4313. if (!N0.hasOneUse())
  4314. DoXform = ExtendUsesToFormExtLoad(N, N0, ISD::ZERO_EXTEND, SetCCs, TLI);
  4315. if (DoXform) {
  4316. LoadSDNode *LN0 = cast<LoadSDNode>(N0);
  4317. SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(N), VT,
  4318. LN0->getChain(),
  4319. LN0->getBasePtr(), N0.getValueType(),
  4320. LN0->getMemOperand());
  4321. CombineTo(N, ExtLoad);
  4322. SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SDLoc(N0),
  4323. N0.getValueType(), ExtLoad);
  4324. CombineTo(N0.getNode(), Trunc, ExtLoad.getValue(1));
  4325. ExtendSetCCUses(SetCCs, Trunc, ExtLoad, SDLoc(N),
  4326. ISD::ZERO_EXTEND);
  4327. return SDValue(N, 0); // Return N so it doesn't get rechecked!
  4328. }
  4329. }
  4330. // fold (zext (and/or/xor (load x), cst)) ->
  4331. // (and/or/xor (zextload x), (zext cst))
  4332. if ((N0.getOpcode() == ISD::AND || N0.getOpcode() == ISD::OR ||
  4333. N0.getOpcode() == ISD::XOR) &&
  4334. isa<LoadSDNode>(N0.getOperand(0)) &&
  4335. N0.getOperand(1).getOpcode() == ISD::Constant &&
  4336. TLI.isLoadExtLegal(ISD::ZEXTLOAD, N0.getValueType()) &&
  4337. (!LegalOperations && TLI.isOperationLegal(N0.getOpcode(), VT))) {
  4338. LoadSDNode *LN0 = cast<LoadSDNode>(N0.getOperand(0));
  4339. if (LN0->getExtensionType() != ISD::SEXTLOAD) {
  4340. bool DoXform = true;
  4341. SmallVector<SDNode*, 4> SetCCs;
  4342. if (!N0.hasOneUse())
  4343. DoXform = ExtendUsesToFormExtLoad(N, N0.getOperand(0), ISD::ZERO_EXTEND,
  4344. SetCCs, TLI);
  4345. if (DoXform) {
  4346. SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(LN0), VT,
  4347. LN0->getChain(), LN0->getBasePtr(),
  4348. LN0->getMemoryVT(),
  4349. LN0->getMemOperand());
  4350. APInt Mask = cast<ConstantSDNode>(N0.getOperand(1))->getAPIntValue();
  4351. Mask = Mask.zext(VT.getSizeInBits());
  4352. SDValue And = DAG.getNode(N0.getOpcode(), SDLoc(N), VT,
  4353. ExtLoad, DAG.getConstant(Mask, VT));
  4354. SDValue Trunc = DAG.getNode(ISD::TRUNCATE,
  4355. SDLoc(N0.getOperand(0)),
  4356. N0.getOperand(0).getValueType(), ExtLoad);
  4357. CombineTo(N, And);
  4358. CombineTo(N0.getOperand(0).getNode(), Trunc, ExtLoad.getValue(1));
  4359. ExtendSetCCUses(SetCCs, Trunc, ExtLoad, SDLoc(N),
  4360. ISD::ZERO_EXTEND);
  4361. return SDValue(N, 0); // Return N so it doesn't get rechecked!
  4362. }
  4363. }
  4364. }
  4365. // fold (zext (zextload x)) -> (zext (truncate (zextload x)))
  4366. // fold (zext ( extload x)) -> (zext (truncate (zextload x)))
  4367. if ((ISD::isZEXTLoad(N0.getNode()) || ISD::isEXTLoad(N0.getNode())) &&
  4368. ISD::isUNINDEXEDLoad(N0.getNode()) && N0.hasOneUse()) {
  4369. LoadSDNode *LN0 = cast<LoadSDNode>(N0);
  4370. EVT MemVT = LN0->getMemoryVT();
  4371. if ((!LegalOperations && !LN0->isVolatile()) ||
  4372. TLI.isLoadExtLegal(ISD::ZEXTLOAD, MemVT)) {
  4373. SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(N), VT,
  4374. LN0->getChain(),
  4375. LN0->getBasePtr(), MemVT,
  4376. LN0->getMemOperand());
  4377. CombineTo(N, ExtLoad);
  4378. CombineTo(N0.getNode(),
  4379. DAG.getNode(ISD::TRUNCATE, SDLoc(N0), N0.getValueType(),
  4380. ExtLoad),
  4381. ExtLoad.getValue(1));
  4382. return SDValue(N, 0); // Return N so it doesn't get rechecked!
  4383. }
  4384. }
  4385. if (N0.getOpcode() == ISD::SETCC) {
  4386. if (!LegalOperations && VT.isVector()) {
  4387. // zext(setcc) -> (and (vsetcc), (1, 1, ...) for vectors.
  4388. // Only do this before legalize for now.
  4389. EVT N0VT = N0.getOperand(0).getValueType();
  4390. EVT EltVT = VT.getVectorElementType();
  4391. SmallVector<SDValue,8> OneOps(VT.getVectorNumElements(),
  4392. DAG.getConstant(1, EltVT));
  4393. if (VT.getSizeInBits() == N0VT.getSizeInBits())
  4394. // We know that the # elements of the results is the same as the
  4395. // # elements of the compare (and the # elements of the compare result
  4396. // for that matter). Check to see that they are the same size. If so,
  4397. // we know that the element size of the sext'd result matches the
  4398. // element size of the compare operands.
  4399. return DAG.getNode(ISD::AND, SDLoc(N), VT,
  4400. DAG.getSetCC(SDLoc(N), VT, N0.getOperand(0),
  4401. N0.getOperand(1),
  4402. cast<CondCodeSDNode>(N0.getOperand(2))->get()),
  4403. DAG.getNode(ISD::BUILD_VECTOR, SDLoc(N), VT,
  4404. &OneOps[0], OneOps.size()));
  4405. // If the desired elements are smaller or larger than the source
  4406. // elements we can use a matching integer vector type and then
  4407. // truncate/sign extend
  4408. EVT MatchingElementType =
  4409. EVT::getIntegerVT(*DAG.getContext(),
  4410. N0VT.getScalarType().getSizeInBits());
  4411. EVT MatchingVectorType =
  4412. EVT::getVectorVT(*DAG.getContext(), MatchingElementType,
  4413. N0VT.getVectorNumElements());
  4414. SDValue VsetCC =
  4415. DAG.getSetCC(SDLoc(N), MatchingVectorType, N0.getOperand(0),
  4416. N0.getOperand(1),
  4417. cast<CondCodeSDNode>(N0.getOperand(2))->get());
  4418. return DAG.getNode(ISD::AND, SDLoc(N), VT,
  4419. DAG.getSExtOrTrunc(VsetCC, SDLoc(N), VT),
  4420. DAG.getNode(ISD::BUILD_VECTOR, SDLoc(N), VT,
  4421. &OneOps[0], OneOps.size()));
  4422. }
  4423. // zext(setcc x,y,cc) -> select_cc x, y, 1, 0, cc
  4424. SDValue SCC =
  4425. SimplifySelectCC(SDLoc(N), N0.getOperand(0), N0.getOperand(1),
  4426. DAG.getConstant(1, VT), DAG.getConstant(0, VT),
  4427. cast<CondCodeSDNode>(N0.getOperand(2))->get(), true);
  4428. if (SCC.getNode()) return SCC;
  4429. }
  4430. // (zext (shl (zext x), cst)) -> (shl (zext x), cst)
  4431. if ((N0.getOpcode() == ISD::SHL || N0.getOpcode() == ISD::SRL) &&
  4432. isa<ConstantSDNode>(N0.getOperand(1)) &&
  4433. N0.getOperand(0).getOpcode() == ISD::ZERO_EXTEND &&
  4434. N0.hasOneUse()) {
  4435. SDValue ShAmt = N0.getOperand(1);
  4436. unsigned ShAmtVal = cast<ConstantSDNode>(ShAmt)->getZExtValue();
  4437. if (N0.getOpcode() == ISD::SHL) {
  4438. SDValue InnerZExt = N0.getOperand(0);
  4439. // If the original shl may be shifting out bits, do not perform this
  4440. // transformation.
  4441. unsigned KnownZeroBits = InnerZExt.getValueType().getSizeInBits() -
  4442. InnerZExt.getOperand(0).getValueType().getSizeInBits();
  4443. if (ShAmtVal > KnownZeroBits)
  4444. return SDValue();
  4445. }
  4446. SDLoc DL(N);
  4447. // Ensure that the shift amount is wide enough for the shifted value.
  4448. if (VT.getSizeInBits() >= 256)
  4449. ShAmt = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i32, ShAmt);
  4450. return DAG.getNode(N0.getOpcode(), DL, VT,
  4451. DAG.getNode(ISD::ZERO_EXTEND, DL, VT, N0.getOperand(0)),
  4452. ShAmt);
  4453. }
  4454. return SDValue();
  4455. }
  4456. SDValue DAGCombiner::visitANY_EXTEND(SDNode *N) {
  4457. SDValue N0 = N->getOperand(0);
  4458. EVT VT = N->getValueType(0);
  4459. // fold (aext c1) -> c1
  4460. if (isa<ConstantSDNode>(N0))
  4461. return DAG.getNode(ISD::ANY_EXTEND, SDLoc(N), VT, N0);
  4462. // fold (aext (aext x)) -> (aext x)
  4463. // fold (aext (zext x)) -> (zext x)
  4464. // fold (aext (sext x)) -> (sext x)
  4465. if (N0.getOpcode() == ISD::ANY_EXTEND ||
  4466. N0.getOpcode() == ISD::ZERO_EXTEND ||
  4467. N0.getOpcode() == ISD::SIGN_EXTEND)
  4468. return DAG.getNode(N0.getOpcode(), SDLoc(N), VT, N0.getOperand(0));
  4469. // fold (aext (truncate (load x))) -> (aext (smaller load x))
  4470. // fold (aext (truncate (srl (load x), c))) -> (aext (small load (x+c/n)))
  4471. if (N0.getOpcode() == ISD::TRUNCATE) {
  4472. SDValue NarrowLoad = ReduceLoadWidth(N0.getNode());
  4473. if (NarrowLoad.getNode()) {
  4474. SDNode* oye = N0.getNode()->getOperand(0).getNode();
  4475. if (NarrowLoad.getNode() != N0.getNode()) {
  4476. CombineTo(N0.getNode(), NarrowLoad);
  4477. // CombineTo deleted the truncate, if needed, but not what's under it.
  4478. AddToWorkList(oye);
  4479. }
  4480. return SDValue(N, 0); // Return N so it doesn't get rechecked!
  4481. }
  4482. }
  4483. // fold (aext (truncate x))
  4484. if (N0.getOpcode() == ISD::TRUNCATE) {
  4485. SDValue TruncOp = N0.getOperand(0);
  4486. if (TruncOp.getValueType() == VT)
  4487. return TruncOp; // x iff x size == zext size.
  4488. if (TruncOp.getValueType().bitsGT(VT))
  4489. return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, TruncOp);
  4490. return DAG.getNode(ISD::ANY_EXTEND, SDLoc(N), VT, TruncOp);
  4491. }
  4492. // Fold (aext (and (trunc x), cst)) -> (and x, cst)
  4493. // if the trunc is not free.
  4494. if (N0.getOpcode() == ISD::AND &&
  4495. N0.getOperand(0).getOpcode() == ISD::TRUNCATE &&
  4496. N0.getOperand(1).getOpcode() == ISD::Constant &&
  4497. !TLI.isTruncateFree(N0.getOperand(0).getOperand(0).getValueType(),
  4498. N0.getValueType())) {
  4499. SDValue X = N0.getOperand(0).getOperand(0);
  4500. if (X.getValueType().bitsLT(VT)) {
  4501. X = DAG.getNode(ISD::ANY_EXTEND, SDLoc(N), VT, X);
  4502. } else if (X.getValueType().bitsGT(VT)) {
  4503. X = DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, X);
  4504. }
  4505. APInt Mask = cast<ConstantSDNode>(N0.getOperand(1))->getAPIntValue();
  4506. Mask = Mask.zext(VT.getSizeInBits());
  4507. return DAG.getNode(ISD::AND, SDLoc(N), VT,
  4508. X, DAG.getConstant(Mask, VT));
  4509. }
  4510. // fold (aext (load x)) -> (aext (truncate (extload x)))
  4511. // None of the supported targets knows how to perform load and any_ext
  4512. // on vectors in one instruction. We only perform this transformation on
  4513. // scalars.
  4514. if (ISD::isNON_EXTLoad(N0.getNode()) && !VT.isVector() &&
  4515. ((!LegalOperations && !cast<LoadSDNode>(N0)->isVolatile()) ||
  4516. TLI.isLoadExtLegal(ISD::EXTLOAD, N0.getValueType()))) {
  4517. bool DoXform = true;
  4518. SmallVector<SDNode*, 4> SetCCs;
  4519. if (!N0.hasOneUse())
  4520. DoXform = ExtendUsesToFormExtLoad(N, N0, ISD::ANY_EXTEND, SetCCs, TLI);
  4521. if (DoXform) {
  4522. LoadSDNode *LN0 = cast<LoadSDNode>(N0);
  4523. SDValue ExtLoad = DAG.getExtLoad(ISD::EXTLOAD, SDLoc(N), VT,
  4524. LN0->getChain(),
  4525. LN0->getBasePtr(), N0.getValueType(),
  4526. LN0->getMemOperand());
  4527. CombineTo(N, ExtLoad);
  4528. SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SDLoc(N0),
  4529. N0.getValueType(), ExtLoad);
  4530. CombineTo(N0.getNode(), Trunc, ExtLoad.getValue(1));
  4531. ExtendSetCCUses(SetCCs, Trunc, ExtLoad, SDLoc(N),
  4532. ISD::ANY_EXTEND);
  4533. return SDValue(N, 0); // Return N so it doesn't get rechecked!
  4534. }
  4535. }
  4536. // fold (aext (zextload x)) -> (aext (truncate (zextload x)))
  4537. // fold (aext (sextload x)) -> (aext (truncate (sextload x)))
  4538. // fold (aext ( extload x)) -> (aext (truncate (extload x)))
  4539. if (N0.getOpcode() == ISD::LOAD &&
  4540. !ISD::isNON_EXTLoad(N0.getNode()) && ISD::isUNINDEXEDLoad(N0.getNode()) &&
  4541. N0.hasOneUse()) {
  4542. LoadSDNode *LN0 = cast<LoadSDNode>(N0);
  4543. EVT MemVT = LN0->getMemoryVT();
  4544. SDValue ExtLoad = DAG.getExtLoad(LN0->getExtensionType(), SDLoc(N),
  4545. VT, LN0->getChain(), LN0->getBasePtr(),
  4546. MemVT, LN0->getMemOperand());
  4547. CombineTo(N, ExtLoad);
  4548. CombineTo(N0.getNode(),
  4549. DAG.getNode(ISD::TRUNCATE, SDLoc(N0),
  4550. N0.getValueType(), ExtLoad),
  4551. ExtLoad.getValue(1));
  4552. return SDValue(N, 0); // Return N so it doesn't get rechecked!
  4553. }
  4554. if (N0.getOpcode() == ISD::SETCC) {
  4555. // aext(setcc) -> sext_in_reg(vsetcc) for vectors.
  4556. // Only do this before legalize for now.
  4557. if (VT.isVector() && !LegalOperations) {
  4558. EVT N0VT = N0.getOperand(0).getValueType();
  4559. // We know that the # elements of the results is the same as the
  4560. // # elements of the compare (and the # elements of the compare result
  4561. // for that matter). Check to see that they are the same size. If so,
  4562. // we know that the element size of the sext'd result matches the
  4563. // element size of the compare operands.
  4564. if (VT.getSizeInBits() == N0VT.getSizeInBits())
  4565. return DAG.getSetCC(SDLoc(N), VT, N0.getOperand(0),
  4566. N0.getOperand(1),
  4567. cast<CondCodeSDNode>(N0.getOperand(2))->get());
  4568. // If the desired elements are smaller or larger than the source
  4569. // elements we can use a matching integer vector type and then
  4570. // truncate/sign extend
  4571. else {
  4572. EVT MatchingElementType =
  4573. EVT::getIntegerVT(*DAG.getContext(),
  4574. N0VT.getScalarType().getSizeInBits());
  4575. EVT MatchingVectorType =
  4576. EVT::getVectorVT(*DAG.getContext(), MatchingElementType,
  4577. N0VT.getVectorNumElements());
  4578. SDValue VsetCC =
  4579. DAG.getSetCC(SDLoc(N), MatchingVectorType, N0.getOperand(0),
  4580. N0.getOperand(1),
  4581. cast<CondCodeSDNode>(N0.getOperand(2))->get());
  4582. return DAG.getSExtOrTrunc(VsetCC, SDLoc(N), VT);
  4583. }
  4584. }
  4585. // aext(setcc x,y,cc) -> select_cc x, y, 1, 0, cc
  4586. SDValue SCC =
  4587. SimplifySelectCC(SDLoc(N), N0.getOperand(0), N0.getOperand(1),
  4588. DAG.getConstant(1, VT), DAG.getConstant(0, VT),
  4589. cast<CondCodeSDNode>(N0.getOperand(2))->get(), true);
  4590. if (SCC.getNode())
  4591. return SCC;
  4592. }
  4593. return SDValue();
  4594. }
  4595. /// GetDemandedBits - See if the specified operand can be simplified with the
  4596. /// knowledge that only the bits specified by Mask are used. If so, return the
  4597. /// simpler operand, otherwise return a null SDValue.
  4598. SDValue DAGCombiner::GetDemandedBits(SDValue V, const APInt &Mask) {
  4599. switch (V.getOpcode()) {
  4600. default: break;
  4601. case ISD::Constant: {
  4602. const ConstantSDNode *CV = cast<ConstantSDNode>(V.getNode());
  4603. assert(CV != 0 && "Const value should be ConstSDNode.");
  4604. const APInt &CVal = CV->getAPIntValue();
  4605. APInt NewVal = CVal & Mask;
  4606. if (NewVal != CVal)
  4607. return DAG.getConstant(NewVal, V.getValueType());
  4608. break;
  4609. }
  4610. case ISD::OR:
  4611. case ISD::XOR:
  4612. // If the LHS or RHS don't contribute bits to the or, drop them.
  4613. if (DAG.MaskedValueIsZero(V.getOperand(0), Mask))
  4614. return V.getOperand(1);
  4615. if (DAG.MaskedValueIsZero(V.getOperand(1), Mask))
  4616. return V.getOperand(0);
  4617. break;
  4618. case ISD::SRL:
  4619. // Only look at single-use SRLs.
  4620. if (!V.getNode()->hasOneUse())
  4621. break;
  4622. if (ConstantSDNode *RHSC = dyn_cast<ConstantSDNode>(V.getOperand(1))) {
  4623. // See if we can recursively simplify the LHS.
  4624. unsigned Amt = RHSC->getZExtValue();
  4625. // Watch out for shift count overflow though.
  4626. if (Amt >= Mask.getBitWidth()) break;
  4627. APInt NewMask = Mask << Amt;
  4628. SDValue SimplifyLHS = GetDemandedBits(V.getOperand(0), NewMask);
  4629. if (SimplifyLHS.getNode())
  4630. return DAG.getNode(ISD::SRL, SDLoc(V), V.getValueType(),
  4631. SimplifyLHS, V.getOperand(1));
  4632. }
  4633. }
  4634. return SDValue();
  4635. }
  4636. /// ReduceLoadWidth - If the result of a wider load is shifted to right of N
  4637. /// bits and then truncated to a narrower type and where N is a multiple
  4638. /// of number of bits of the narrower type, transform it to a narrower load
  4639. /// from address + N / num of bits of new type. If the result is to be
  4640. /// extended, also fold the extension to form a extending load.
  4641. SDValue DAGCombiner::ReduceLoadWidth(SDNode *N) {
  4642. unsigned Opc = N->getOpcode();
  4643. ISD::LoadExtType ExtType = ISD::NON_EXTLOAD;
  4644. SDValue N0 = N->getOperand(0);
  4645. EVT VT = N->getValueType(0);
  4646. EVT ExtVT = VT;
  4647. // This transformation isn't valid for vector loads.
  4648. if (VT.isVector())
  4649. return SDValue();
  4650. // Special case: SIGN_EXTEND_INREG is basically truncating to ExtVT then
  4651. // extended to VT.
  4652. if (Opc == ISD::SIGN_EXTEND_INREG) {
  4653. ExtType = ISD::SEXTLOAD;
  4654. ExtVT = cast<VTSDNode>(N->getOperand(1))->getVT();
  4655. } else if (Opc == ISD::SRL) {
  4656. // Another special-case: SRL is basically zero-extending a narrower value.
  4657. ExtType = ISD::ZEXTLOAD;
  4658. N0 = SDValue(N, 0);
  4659. ConstantSDNode *N01 = dyn_cast<ConstantSDNode>(N0.getOperand(1));
  4660. if (!N01) return SDValue();
  4661. ExtVT = EVT::getIntegerVT(*DAG.getContext(),
  4662. VT.getSizeInBits() - N01->getZExtValue());
  4663. }
  4664. if (LegalOperations && !TLI.isLoadExtLegal(ExtType, ExtVT))
  4665. return SDValue();
  4666. unsigned EVTBits = ExtVT.getSizeInBits();
  4667. // Do not generate loads of non-round integer types since these can
  4668. // be expensive (and would be wrong if the type is not byte sized).
  4669. if (!ExtVT.isRound())
  4670. return SDValue();
  4671. unsigned ShAmt = 0;
  4672. if (N0.getOpcode() == ISD::SRL && N0.hasOneUse()) {
  4673. if (ConstantSDNode *N01 = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
  4674. ShAmt = N01->getZExtValue();
  4675. // Is the shift amount a multiple of size of VT?
  4676. if ((ShAmt & (EVTBits-1)) == 0) {
  4677. N0 = N0.getOperand(0);
  4678. // Is the load width a multiple of size of VT?
  4679. if ((N0.getValueType().getSizeInBits() & (EVTBits-1)) != 0)
  4680. return SDValue();
  4681. }
  4682. // At this point, we must have a load or else we can't do the transform.
  4683. if (!isa<LoadSDNode>(N0)) return SDValue();
  4684. // Because a SRL must be assumed to *need* to zero-extend the high bits
  4685. // (as opposed to anyext the high bits), we can't combine the zextload
  4686. // lowering of SRL and an sextload.
  4687. if (cast<LoadSDNode>(N0)->getExtensionType() == ISD::SEXTLOAD)
  4688. return SDValue();
  4689. // If the shift amount is larger than the input type then we're not
  4690. // accessing any of the loaded bytes. If the load was a zextload/extload
  4691. // then the result of the shift+trunc is zero/undef (handled elsewhere).
  4692. if (ShAmt >= cast<LoadSDNode>(N0)->getMemoryVT().getSizeInBits())
  4693. return SDValue();
  4694. }
  4695. }
  4696. // If the load is shifted left (and the result isn't shifted back right),
  4697. // we can fold the truncate through the shift.
  4698. unsigned ShLeftAmt = 0;
  4699. if (ShAmt == 0 && N0.getOpcode() == ISD::SHL && N0.hasOneUse() &&
  4700. ExtVT == VT && TLI.isNarrowingProfitable(N0.getValueType(), VT)) {
  4701. if (ConstantSDNode *N01 = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
  4702. ShLeftAmt = N01->getZExtValue();
  4703. N0 = N0.getOperand(0);
  4704. }
  4705. }
  4706. // If we haven't found a load, we can't narrow it. Don't transform one with
  4707. // multiple uses, this would require adding a new load.
  4708. if (!isa<LoadSDNode>(N0) || !N0.hasOneUse())
  4709. return SDValue();
  4710. // Don't change the width of a volatile load.
  4711. LoadSDNode *LN0 = cast<LoadSDNode>(N0);
  4712. if (LN0->isVolatile())
  4713. return SDValue();
  4714. // Verify that we are actually reducing a load width here.
  4715. if (LN0->getMemoryVT().getSizeInBits() < EVTBits)
  4716. return SDValue();
  4717. // For the transform to be legal, the load must produce only two values
  4718. // (the value loaded and the chain). Don't transform a pre-increment
  4719. // load, for example, which produces an extra value. Otherwise the
  4720. // transformation is not equivalent, and the downstream logic to replace
  4721. // uses gets things wrong.
  4722. if (LN0->getNumValues() > 2)
  4723. return SDValue();
  4724. // If the load that we're shrinking is an extload and we're not just
  4725. // discarding the extension we can't simply shrink the load. Bail.
  4726. // TODO: It would be possible to merge the extensions in some cases.
  4727. if (LN0->getExtensionType() != ISD::NON_EXTLOAD &&
  4728. LN0->getMemoryVT().getSizeInBits() < ExtVT.getSizeInBits() + ShAmt)
  4729. return SDValue();
  4730. EVT PtrType = N0.getOperand(1).getValueType();
  4731. if (PtrType == MVT::Untyped || PtrType.isExtended())
  4732. // It's not possible to generate a constant of extended or untyped type.
  4733. return SDValue();
  4734. // For big endian targets, we need to adjust the offset to the pointer to
  4735. // load the correct bytes.
  4736. if (TLI.isBigEndian()) {
  4737. unsigned LVTStoreBits = LN0->getMemoryVT().getStoreSizeInBits();
  4738. unsigned EVTStoreBits = ExtVT.getStoreSizeInBits();
  4739. ShAmt = LVTStoreBits - EVTStoreBits - ShAmt;
  4740. }
  4741. uint64_t PtrOff = ShAmt / 8;
  4742. unsigned NewAlign = MinAlign(LN0->getAlignment(), PtrOff);
  4743. SDValue NewPtr = DAG.getNode(ISD::ADD, SDLoc(LN0),
  4744. PtrType, LN0->getBasePtr(),
  4745. DAG.getConstant(PtrOff, PtrType));
  4746. AddToWorkList(NewPtr.getNode());
  4747. SDValue Load;
  4748. if (ExtType == ISD::NON_EXTLOAD)
  4749. Load = DAG.getLoad(VT, SDLoc(N0), LN0->getChain(), NewPtr,
  4750. LN0->getPointerInfo().getWithOffset(PtrOff),
  4751. LN0->isVolatile(), LN0->isNonTemporal(),
  4752. LN0->isInvariant(), NewAlign, LN0->getTBAAInfo());
  4753. else
  4754. Load = DAG.getExtLoad(ExtType, SDLoc(N0), VT, LN0->getChain(),NewPtr,
  4755. LN0->getPointerInfo().getWithOffset(PtrOff),
  4756. ExtVT, LN0->isVolatile(), LN0->isNonTemporal(),
  4757. NewAlign, LN0->getTBAAInfo());
  4758. // Replace the old load's chain with the new load's chain.
  4759. WorkListRemover DeadNodes(*this);
  4760. DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), Load.getValue(1));
  4761. // Shift the result left, if we've swallowed a left shift.
  4762. SDValue Result = Load;
  4763. if (ShLeftAmt != 0) {
  4764. EVT ShImmTy = getShiftAmountTy(Result.getValueType());
  4765. if (!isUIntN(ShImmTy.getSizeInBits(), ShLeftAmt))
  4766. ShImmTy = VT;
  4767. // If the shift amount is as large as the result size (but, presumably,
  4768. // no larger than the source) then the useful bits of the result are
  4769. // zero; we can't simply return the shortened shift, because the result
  4770. // of that operation is undefined.
  4771. if (ShLeftAmt >= VT.getSizeInBits())
  4772. Result = DAG.getConstant(0, VT);
  4773. else
  4774. Result = DAG.getNode(ISD::SHL, SDLoc(N0), VT,
  4775. Result, DAG.getConstant(ShLeftAmt, ShImmTy));
  4776. }
  4777. // Return the new loaded value.
  4778. return Result;
  4779. }
  4780. SDValue DAGCombiner::visitSIGN_EXTEND_INREG(SDNode *N) {
  4781. SDValue N0 = N->getOperand(0);
  4782. SDValue N1 = N->getOperand(1);
  4783. EVT VT = N->getValueType(0);
  4784. EVT EVT = cast<VTSDNode>(N1)->getVT();
  4785. unsigned VTBits = VT.getScalarType().getSizeInBits();
  4786. unsigned EVTBits = EVT.getScalarType().getSizeInBits();
  4787. // fold (sext_in_reg c1) -> c1
  4788. if (isa<ConstantSDNode>(N0) || N0.getOpcode() == ISD::UNDEF)
  4789. return DAG.getNode(ISD::SIGN_EXTEND_INREG, SDLoc(N), VT, N0, N1);
  4790. // If the input is already sign extended, just drop the extension.
  4791. if (DAG.ComputeNumSignBits(N0) >= VTBits-EVTBits+1)
  4792. return N0;
  4793. // fold (sext_in_reg (sext_in_reg x, VT2), VT1) -> (sext_in_reg x, minVT) pt2
  4794. if (N0.getOpcode() == ISD::SIGN_EXTEND_INREG &&
  4795. EVT.bitsLT(cast<VTSDNode>(N0.getOperand(1))->getVT()))
  4796. return DAG.getNode(ISD::SIGN_EXTEND_INREG, SDLoc(N), VT,
  4797. N0.getOperand(0), N1);
  4798. // fold (sext_in_reg (sext x)) -> (sext x)
  4799. // fold (sext_in_reg (aext x)) -> (sext x)
  4800. // if x is small enough.
  4801. if (N0.getOpcode() == ISD::SIGN_EXTEND || N0.getOpcode() == ISD::ANY_EXTEND) {
  4802. SDValue N00 = N0.getOperand(0);
  4803. if (N00.getValueType().getScalarType().getSizeInBits() <= EVTBits &&
  4804. (!LegalOperations || TLI.isOperationLegal(ISD::SIGN_EXTEND, VT)))
  4805. return DAG.getNode(ISD::SIGN_EXTEND, SDLoc(N), VT, N00, N1);
  4806. }
  4807. // fold (sext_in_reg x) -> (zext_in_reg x) if the sign bit is known zero.
  4808. if (DAG.MaskedValueIsZero(N0, APInt::getBitsSet(VTBits, EVTBits-1, EVTBits)))
  4809. return DAG.getZeroExtendInReg(N0, SDLoc(N), EVT);
  4810. // fold operands of sext_in_reg based on knowledge that the top bits are not
  4811. // demanded.
  4812. if (SimplifyDemandedBits(SDValue(N, 0)))
  4813. return SDValue(N, 0);
  4814. // fold (sext_in_reg (load x)) -> (smaller sextload x)
  4815. // fold (sext_in_reg (srl (load x), c)) -> (smaller sextload (x+c/evtbits))
  4816. SDValue NarrowLoad = ReduceLoadWidth(N);
  4817. if (NarrowLoad.getNode())
  4818. return NarrowLoad;
  4819. // fold (sext_in_reg (srl X, 24), i8) -> (sra X, 24)
  4820. // fold (sext_in_reg (srl X, 23), i8) -> (sra X, 23) iff possible.
  4821. // We already fold "(sext_in_reg (srl X, 25), i8) -> srl X, 25" above.
  4822. if (N0.getOpcode() == ISD::SRL) {
  4823. if (ConstantSDNode *ShAmt = dyn_cast<ConstantSDNode>(N0.getOperand(1)))
  4824. if (ShAmt->getZExtValue()+EVTBits <= VTBits) {
  4825. // We can turn this into an SRA iff the input to the SRL is already sign
  4826. // extended enough.
  4827. unsigned InSignBits = DAG.ComputeNumSignBits(N0.getOperand(0));
  4828. if (VTBits-(ShAmt->getZExtValue()+EVTBits) < InSignBits)
  4829. return DAG.getNode(ISD::SRA, SDLoc(N), VT,
  4830. N0.getOperand(0), N0.getOperand(1));
  4831. }
  4832. }
  4833. // fold (sext_inreg (extload x)) -> (sextload x)
  4834. if (ISD::isEXTLoad(N0.getNode()) &&
  4835. ISD::isUNINDEXEDLoad(N0.getNode()) &&
  4836. EVT == cast<LoadSDNode>(N0)->getMemoryVT() &&
  4837. ((!LegalOperations && !cast<LoadSDNode>(N0)->isVolatile()) ||
  4838. TLI.isLoadExtLegal(ISD::SEXTLOAD, EVT))) {
  4839. LoadSDNode *LN0 = cast<LoadSDNode>(N0);
  4840. SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, SDLoc(N), VT,
  4841. LN0->getChain(),
  4842. LN0->getBasePtr(), EVT,
  4843. LN0->getMemOperand());
  4844. CombineTo(N, ExtLoad);
  4845. CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1));
  4846. AddToWorkList(ExtLoad.getNode());
  4847. return SDValue(N, 0); // Return N so it doesn't get rechecked!
  4848. }
  4849. // fold (sext_inreg (zextload x)) -> (sextload x) iff load has one use
  4850. if (ISD::isZEXTLoad(N0.getNode()) && ISD::isUNINDEXEDLoad(N0.getNode()) &&
  4851. N0.hasOneUse() &&
  4852. EVT == cast<LoadSDNode>(N0)->getMemoryVT() &&
  4853. ((!LegalOperations && !cast<LoadSDNode>(N0)->isVolatile()) ||
  4854. TLI.isLoadExtLegal(ISD::SEXTLOAD, EVT))) {
  4855. LoadSDNode *LN0 = cast<LoadSDNode>(N0);
  4856. SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, SDLoc(N), VT,
  4857. LN0->getChain(),
  4858. LN0->getBasePtr(), EVT,
  4859. LN0->getMemOperand());
  4860. CombineTo(N, ExtLoad);
  4861. CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1));
  4862. return SDValue(N, 0); // Return N so it doesn't get rechecked!
  4863. }
  4864. // Form (sext_inreg (bswap >> 16)) or (sext_inreg (rotl (bswap) 16))
  4865. if (EVTBits <= 16 && N0.getOpcode() == ISD::OR) {
  4866. SDValue BSwap = MatchBSwapHWordLow(N0.getNode(), N0.getOperand(0),
  4867. N0.getOperand(1), false);
  4868. if (BSwap.getNode() != 0)
  4869. return DAG.getNode(ISD::SIGN_EXTEND_INREG, SDLoc(N), VT,
  4870. BSwap, N1);
  4871. }
  4872. return SDValue();
  4873. }
  4874. SDValue DAGCombiner::visitTRUNCATE(SDNode *N) {
  4875. SDValue N0 = N->getOperand(0);
  4876. EVT VT = N->getValueType(0);
  4877. bool isLE = TLI.isLittleEndian();
  4878. // noop truncate
  4879. if (N0.getValueType() == N->getValueType(0))
  4880. return N0;
  4881. // fold (truncate c1) -> c1
  4882. if (isa<ConstantSDNode>(N0))
  4883. return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, N0);
  4884. // fold (truncate (truncate x)) -> (truncate x)
  4885. if (N0.getOpcode() == ISD::TRUNCATE)
  4886. return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, N0.getOperand(0));
  4887. // fold (truncate (ext x)) -> (ext x) or (truncate x) or x
  4888. if (N0.getOpcode() == ISD::ZERO_EXTEND ||
  4889. N0.getOpcode() == ISD::SIGN_EXTEND ||
  4890. N0.getOpcode() == ISD::ANY_EXTEND) {
  4891. if (N0.getOperand(0).getValueType().bitsLT(VT))
  4892. // if the source is smaller than the dest, we still need an extend
  4893. return DAG.getNode(N0.getOpcode(), SDLoc(N), VT,
  4894. N0.getOperand(0));
  4895. if (N0.getOperand(0).getValueType().bitsGT(VT))
  4896. // if the source is larger than the dest, than we just need the truncate
  4897. return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, N0.getOperand(0));
  4898. // if the source and dest are the same type, we can drop both the extend
  4899. // and the truncate.
  4900. return N0.getOperand(0);
  4901. }
  4902. // Fold extract-and-trunc into a narrow extract. For example:
  4903. // i64 x = EXTRACT_VECTOR_ELT(v2i64 val, i32 1)
  4904. // i32 y = TRUNCATE(i64 x)
  4905. // -- becomes --
  4906. // v16i8 b = BITCAST (v2i64 val)
  4907. // i8 x = EXTRACT_VECTOR_ELT(v16i8 b, i32 8)
  4908. //
  4909. // Note: We only run this optimization after type legalization (which often
  4910. // creates this pattern) and before operation legalization after which
  4911. // we need to be more careful about the vector instructions that we generate.
  4912. if (N0.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
  4913. LegalTypes && !LegalOperations && N0->hasOneUse()) {
  4914. EVT VecTy = N0.getOperand(0).getValueType();
  4915. EVT ExTy = N0.getValueType();
  4916. EVT TrTy = N->getValueType(0);
  4917. unsigned NumElem = VecTy.getVectorNumElements();
  4918. unsigned SizeRatio = ExTy.getSizeInBits()/TrTy.getSizeInBits();
  4919. EVT NVT = EVT::getVectorVT(*DAG.getContext(), TrTy, SizeRatio * NumElem);
  4920. assert(NVT.getSizeInBits() == VecTy.getSizeInBits() && "Invalid Size");
  4921. SDValue EltNo = N0->getOperand(1);
  4922. if (isa<ConstantSDNode>(EltNo) && isTypeLegal(NVT)) {
  4923. int Elt = cast<ConstantSDNode>(EltNo)->getZExtValue();
  4924. EVT IndexTy = TLI.getVectorIdxTy();
  4925. int Index = isLE ? (Elt*SizeRatio) : (Elt*SizeRatio + (SizeRatio-1));
  4926. SDValue V = DAG.getNode(ISD::BITCAST, SDLoc(N),
  4927. NVT, N0.getOperand(0));
  4928. return DAG.getNode(ISD::EXTRACT_VECTOR_ELT,
  4929. SDLoc(N), TrTy, V,
  4930. DAG.getConstant(Index, IndexTy));
  4931. }
  4932. }
  4933. // Fold a series of buildvector, bitcast, and truncate if possible.
  4934. // For example fold
  4935. // (2xi32 trunc (bitcast ((4xi32)buildvector x, x, y, y) 2xi64)) to
  4936. // (2xi32 (buildvector x, y)).
  4937. if (Level == AfterLegalizeVectorOps && VT.isVector() &&
  4938. N0.getOpcode() == ISD::BITCAST && N0.hasOneUse() &&
  4939. N0.getOperand(0).getOpcode() == ISD::BUILD_VECTOR &&
  4940. N0.getOperand(0).hasOneUse()) {
  4941. SDValue BuildVect = N0.getOperand(0);
  4942. EVT BuildVectEltTy = BuildVect.getValueType().getVectorElementType();
  4943. EVT TruncVecEltTy = VT.getVectorElementType();
  4944. // Check that the element types match.
  4945. if (BuildVectEltTy == TruncVecEltTy) {
  4946. // Now we only need to compute the offset of the truncated elements.
  4947. unsigned BuildVecNumElts = BuildVect.getNumOperands();
  4948. unsigned TruncVecNumElts = VT.getVectorNumElements();
  4949. unsigned TruncEltOffset = BuildVecNumElts / TruncVecNumElts;
  4950. assert((BuildVecNumElts % TruncVecNumElts) == 0 &&
  4951. "Invalid number of elements");
  4952. SmallVector<SDValue, 8> Opnds;
  4953. for (unsigned i = 0, e = BuildVecNumElts; i != e; i += TruncEltOffset)
  4954. Opnds.push_back(BuildVect.getOperand(i));
  4955. return DAG.getNode(ISD::BUILD_VECTOR, SDLoc(N), VT, &Opnds[0],
  4956. Opnds.size());
  4957. }
  4958. }
  4959. // See if we can simplify the input to this truncate through knowledge that
  4960. // only the low bits are being used.
  4961. // For example "trunc (or (shl x, 8), y)" // -> trunc y
  4962. // Currently we only perform this optimization on scalars because vectors
  4963. // may have different active low bits.
  4964. if (!VT.isVector()) {
  4965. SDValue Shorter =
  4966. GetDemandedBits(N0, APInt::getLowBitsSet(N0.getValueSizeInBits(),
  4967. VT.getSizeInBits()));
  4968. if (Shorter.getNode())
  4969. return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, Shorter);
  4970. }
  4971. // fold (truncate (load x)) -> (smaller load x)
  4972. // fold (truncate (srl (load x), c)) -> (smaller load (x+c/evtbits))
  4973. if (!LegalTypes || TLI.isTypeDesirableForOp(N0.getOpcode(), VT)) {
  4974. SDValue Reduced = ReduceLoadWidth(N);
  4975. if (Reduced.getNode())
  4976. return Reduced;
  4977. }
  4978. // fold (trunc (concat ... x ...)) -> (concat ..., (trunc x), ...)),
  4979. // where ... are all 'undef'.
  4980. if (N0.getOpcode() == ISD::CONCAT_VECTORS && !LegalTypes) {
  4981. SmallVector<EVT, 8> VTs;
  4982. SDValue V;
  4983. unsigned Idx = 0;
  4984. unsigned NumDefs = 0;
  4985. for (unsigned i = 0, e = N0.getNumOperands(); i != e; ++i) {
  4986. SDValue X = N0.getOperand(i);
  4987. if (X.getOpcode() != ISD::UNDEF) {
  4988. V = X;
  4989. Idx = i;
  4990. NumDefs++;
  4991. }
  4992. // Stop if more than one members are non-undef.
  4993. if (NumDefs > 1)
  4994. break;
  4995. VTs.push_back(EVT::getVectorVT(*DAG.getContext(),
  4996. VT.getVectorElementType(),
  4997. X.getValueType().getVectorNumElements()));
  4998. }
  4999. if (NumDefs == 0)
  5000. return DAG.getUNDEF(VT);
  5001. if (NumDefs == 1) {
  5002. assert(V.getNode() && "The single defined operand is empty!");
  5003. SmallVector<SDValue, 8> Opnds;
  5004. for (unsigned i = 0, e = VTs.size(); i != e; ++i) {
  5005. if (i != Idx) {
  5006. Opnds.push_back(DAG.getUNDEF(VTs[i]));
  5007. continue;
  5008. }
  5009. SDValue NV = DAG.getNode(ISD::TRUNCATE, SDLoc(V), VTs[i], V);
  5010. AddToWorkList(NV.getNode());
  5011. Opnds.push_back(NV);
  5012. }
  5013. return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT,
  5014. &Opnds[0], Opnds.size());
  5015. }
  5016. }
  5017. // Simplify the operands using demanded-bits information.
  5018. if (!VT.isVector() &&
  5019. SimplifyDemandedBits(SDValue(N, 0)))
  5020. return SDValue(N, 0);
  5021. return SDValue();
  5022. }
  5023. static SDNode *getBuildPairElt(SDNode *N, unsigned i) {
  5024. SDValue Elt = N->getOperand(i);
  5025. if (Elt.getOpcode() != ISD::MERGE_VALUES)
  5026. return Elt.getNode();
  5027. return Elt.getOperand(Elt.getResNo()).getNode();
  5028. }
  5029. /// CombineConsecutiveLoads - build_pair (load, load) -> load
  5030. /// if load locations are consecutive.
  5031. SDValue DAGCombiner::CombineConsecutiveLoads(SDNode *N, EVT VT) {
  5032. assert(N->getOpcode() == ISD::BUILD_PAIR);
  5033. LoadSDNode *LD1 = dyn_cast<LoadSDNode>(getBuildPairElt(N, 0));
  5034. LoadSDNode *LD2 = dyn_cast<LoadSDNode>(getBuildPairElt(N, 1));
  5035. if (!LD1 || !LD2 || !ISD::isNON_EXTLoad(LD1) || !LD1->hasOneUse() ||
  5036. LD1->getPointerInfo().getAddrSpace() !=
  5037. LD2->getPointerInfo().getAddrSpace())
  5038. return SDValue();
  5039. EVT LD1VT = LD1->getValueType(0);
  5040. if (ISD::isNON_EXTLoad(LD2) &&
  5041. LD2->hasOneUse() &&
  5042. // If both are volatile this would reduce the number of volatile loads.
  5043. // If one is volatile it might be ok, but play conservative and bail out.
  5044. !LD1->isVolatile() &&
  5045. !LD2->isVolatile() &&
  5046. DAG.isConsecutiveLoad(LD2, LD1, LD1VT.getSizeInBits()/8, 1)) {
  5047. unsigned Align = LD1->getAlignment();
  5048. unsigned NewAlign = TLI.getDataLayout()->
  5049. getABITypeAlignment(VT.getTypeForEVT(*DAG.getContext()));
  5050. if (NewAlign <= Align &&
  5051. (!LegalOperations || TLI.isOperationLegal(ISD::LOAD, VT)))
  5052. return DAG.getLoad(VT, SDLoc(N), LD1->getChain(),
  5053. LD1->getBasePtr(), LD1->getPointerInfo(),
  5054. false, false, false, Align);
  5055. }
  5056. return SDValue();
  5057. }
  5058. SDValue DAGCombiner::visitBITCAST(SDNode *N) {
  5059. SDValue N0 = N->getOperand(0);
  5060. EVT VT = N->getValueType(0);
  5061. // If the input is a BUILD_VECTOR with all constant elements, fold this now.
  5062. // Only do this before legalize, since afterward the target may be depending
  5063. // on the bitconvert.
  5064. // First check to see if this is all constant.
  5065. if (!LegalTypes &&
  5066. N0.getOpcode() == ISD::BUILD_VECTOR && N0.getNode()->hasOneUse() &&
  5067. VT.isVector()) {
  5068. bool isSimple = true;
  5069. for (unsigned i = 0, e = N0.getNumOperands(); i != e; ++i)
  5070. if (N0.getOperand(i).getOpcode() != ISD::UNDEF &&
  5071. N0.getOperand(i).getOpcode() != ISD::Constant &&
  5072. N0.getOperand(i).getOpcode() != ISD::ConstantFP) {
  5073. isSimple = false;
  5074. break;
  5075. }
  5076. EVT DestEltVT = N->getValueType(0).getVectorElementType();
  5077. assert(!DestEltVT.isVector() &&
  5078. "Element type of vector ValueType must not be vector!");
  5079. if (isSimple)
  5080. return ConstantFoldBITCASTofBUILD_VECTOR(N0.getNode(), DestEltVT);
  5081. }
  5082. // If the input is a constant, let getNode fold it.
  5083. if (isa<ConstantSDNode>(N0) || isa<ConstantFPSDNode>(N0)) {
  5084. SDValue Res = DAG.getNode(ISD::BITCAST, SDLoc(N), VT, N0);
  5085. if (Res.getNode() != N) {
  5086. if (!LegalOperations ||
  5087. TLI.isOperationLegal(Res.getNode()->getOpcode(), VT))
  5088. return Res;
  5089. // Folding it resulted in an illegal node, and it's too late to
  5090. // do that. Clean up the old node and forego the transformation.
  5091. // Ideally this won't happen very often, because instcombine
  5092. // and the earlier dagcombine runs (where illegal nodes are
  5093. // permitted) should have folded most of them already.
  5094. DAG.DeleteNode(Res.getNode());
  5095. }
  5096. }
  5097. // (conv (conv x, t1), t2) -> (conv x, t2)
  5098. if (N0.getOpcode() == ISD::BITCAST)
  5099. return DAG.getNode(ISD::BITCAST, SDLoc(N), VT,
  5100. N0.getOperand(0));
  5101. // fold (conv (load x)) -> (load (conv*)x)
  5102. // If the resultant load doesn't need a higher alignment than the original!
  5103. if (ISD::isNormalLoad(N0.getNode()) && N0.hasOneUse() &&
  5104. // Do not change the width of a volatile load.
  5105. !cast<LoadSDNode>(N0)->isVolatile() &&
  5106. (!LegalOperations || TLI.isOperationLegal(ISD::LOAD, VT)) &&
  5107. TLI.isLoadBitCastBeneficial(N0.getValueType(), VT)) {
  5108. LoadSDNode *LN0 = cast<LoadSDNode>(N0);
  5109. unsigned Align = TLI.getDataLayout()->
  5110. getABITypeAlignment(VT.getTypeForEVT(*DAG.getContext()));
  5111. unsigned OrigAlign = LN0->getAlignment();
  5112. if (Align <= OrigAlign) {
  5113. SDValue Load = DAG.getLoad(VT, SDLoc(N), LN0->getChain(),
  5114. LN0->getBasePtr(), LN0->getPointerInfo(),
  5115. LN0->isVolatile(), LN0->isNonTemporal(),
  5116. LN0->isInvariant(), OrigAlign,
  5117. LN0->getTBAAInfo());
  5118. AddToWorkList(N);
  5119. CombineTo(N0.getNode(),
  5120. DAG.getNode(ISD::BITCAST, SDLoc(N0),
  5121. N0.getValueType(), Load),
  5122. Load.getValue(1));
  5123. return Load;
  5124. }
  5125. }
  5126. // fold (bitconvert (fneg x)) -> (xor (bitconvert x), signbit)
  5127. // fold (bitconvert (fabs x)) -> (and (bitconvert x), (not signbit))
  5128. // This often reduces constant pool loads.
  5129. if (((N0.getOpcode() == ISD::FNEG && !TLI.isFNegFree(N0.getValueType())) ||
  5130. (N0.getOpcode() == ISD::FABS && !TLI.isFAbsFree(N0.getValueType()))) &&
  5131. N0.getNode()->hasOneUse() && VT.isInteger() &&
  5132. !VT.isVector() && !N0.getValueType().isVector()) {
  5133. SDValue NewConv = DAG.getNode(ISD::BITCAST, SDLoc(N0), VT,
  5134. N0.getOperand(0));
  5135. AddToWorkList(NewConv.getNode());
  5136. APInt SignBit = APInt::getSignBit(VT.getSizeInBits());
  5137. if (N0.getOpcode() == ISD::FNEG)
  5138. return DAG.getNode(ISD::XOR, SDLoc(N), VT,
  5139. NewConv, DAG.getConstant(SignBit, VT));
  5140. assert(N0.getOpcode() == ISD::FABS);
  5141. return DAG.getNode(ISD::AND, SDLoc(N), VT,
  5142. NewConv, DAG.getConstant(~SignBit, VT));
  5143. }
  5144. // fold (bitconvert (fcopysign cst, x)) ->
  5145. // (or (and (bitconvert x), sign), (and cst, (not sign)))
  5146. // Note that we don't handle (copysign x, cst) because this can always be
  5147. // folded to an fneg or fabs.
  5148. if (N0.getOpcode() == ISD::FCOPYSIGN && N0.getNode()->hasOneUse() &&
  5149. isa<ConstantFPSDNode>(N0.getOperand(0)) &&
  5150. VT.isInteger() && !VT.isVector()) {
  5151. unsigned OrigXWidth = N0.getOperand(1).getValueType().getSizeInBits();
  5152. EVT IntXVT = EVT::getIntegerVT(*DAG.getContext(), OrigXWidth);
  5153. if (isTypeLegal(IntXVT)) {
  5154. SDValue X = DAG.getNode(ISD::BITCAST, SDLoc(N0),
  5155. IntXVT, N0.getOperand(1));
  5156. AddToWorkList(X.getNode());
  5157. // If X has a different width than the result/lhs, sext it or truncate it.
  5158. unsigned VTWidth = VT.getSizeInBits();
  5159. if (OrigXWidth < VTWidth) {
  5160. X = DAG.getNode(ISD::SIGN_EXTEND, SDLoc(N), VT, X);
  5161. AddToWorkList(X.getNode());
  5162. } else if (OrigXWidth > VTWidth) {
  5163. // To get the sign bit in the right place, we have to shift it right
  5164. // before truncating.
  5165. X = DAG.getNode(ISD::SRL, SDLoc(X),
  5166. X.getValueType(), X,
  5167. DAG.getConstant(OrigXWidth-VTWidth, X.getValueType()));
  5168. AddToWorkList(X.getNode());
  5169. X = DAG.getNode(ISD::TRUNCATE, SDLoc(X), VT, X);
  5170. AddToWorkList(X.getNode());
  5171. }
  5172. APInt SignBit = APInt::getSignBit(VT.getSizeInBits());
  5173. X = DAG.getNode(ISD::AND, SDLoc(X), VT,
  5174. X, DAG.getConstant(SignBit, VT));
  5175. AddToWorkList(X.getNode());
  5176. SDValue Cst = DAG.getNode(ISD::BITCAST, SDLoc(N0),
  5177. VT, N0.getOperand(0));
  5178. Cst = DAG.getNode(ISD::AND, SDLoc(Cst), VT,
  5179. Cst, DAG.getConstant(~SignBit, VT));
  5180. AddToWorkList(Cst.getNode());
  5181. return DAG.getNode(ISD::OR, SDLoc(N), VT, X, Cst);
  5182. }
  5183. }
  5184. // bitconvert(build_pair(ld, ld)) -> ld iff load locations are consecutive.
  5185. if (N0.getOpcode() == ISD::BUILD_PAIR) {
  5186. SDValue CombineLD = CombineConsecutiveLoads(N0.getNode(), VT);
  5187. if (CombineLD.getNode())
  5188. return CombineLD;
  5189. }
  5190. return SDValue();
  5191. }
  5192. SDValue DAGCombiner::visitBUILD_PAIR(SDNode *N) {
  5193. EVT VT = N->getValueType(0);
  5194. return CombineConsecutiveLoads(N, VT);
  5195. }
  5196. /// ConstantFoldBITCASTofBUILD_VECTOR - We know that BV is a build_vector
  5197. /// node with Constant, ConstantFP or Undef operands. DstEltVT indicates the
  5198. /// destination element value type.
  5199. SDValue DAGCombiner::
  5200. ConstantFoldBITCASTofBUILD_VECTOR(SDNode *BV, EVT DstEltVT) {
  5201. EVT SrcEltVT = BV->getValueType(0).getVectorElementType();
  5202. // If this is already the right type, we're done.
  5203. if (SrcEltVT == DstEltVT) return SDValue(BV, 0);
  5204. unsigned SrcBitSize = SrcEltVT.getSizeInBits();
  5205. unsigned DstBitSize = DstEltVT.getSizeInBits();
  5206. // If this is a conversion of N elements of one type to N elements of another
  5207. // type, convert each element. This handles FP<->INT cases.
  5208. if (SrcBitSize == DstBitSize) {
  5209. EVT VT = EVT::getVectorVT(*DAG.getContext(), DstEltVT,
  5210. BV->getValueType(0).getVectorNumElements());
  5211. // Due to the FP element handling below calling this routine recursively,
  5212. // we can end up with a scalar-to-vector node here.
  5213. if (BV->getOpcode() == ISD::SCALAR_TO_VECTOR)
  5214. return DAG.getNode(ISD::SCALAR_TO_VECTOR, SDLoc(BV), VT,
  5215. DAG.getNode(ISD::BITCAST, SDLoc(BV),
  5216. DstEltVT, BV->getOperand(0)));
  5217. SmallVector<SDValue, 8> Ops;
  5218. for (unsigned i = 0, e = BV->getNumOperands(); i != e; ++i) {
  5219. SDValue Op = BV->getOperand(i);
  5220. // If the vector element type is not legal, the BUILD_VECTOR operands
  5221. // are promoted and implicitly truncated. Make that explicit here.
  5222. if (Op.getValueType() != SrcEltVT)
  5223. Op = DAG.getNode(ISD::TRUNCATE, SDLoc(BV), SrcEltVT, Op);
  5224. Ops.push_back(DAG.getNode(ISD::BITCAST, SDLoc(BV),
  5225. DstEltVT, Op));
  5226. AddToWorkList(Ops.back().getNode());
  5227. }
  5228. return DAG.getNode(ISD::BUILD_VECTOR, SDLoc(BV), VT,
  5229. &Ops[0], Ops.size());
  5230. }
  5231. // Otherwise, we're growing or shrinking the elements. To avoid having to
  5232. // handle annoying details of growing/shrinking FP values, we convert them to
  5233. // int first.
  5234. if (SrcEltVT.isFloatingPoint()) {
  5235. // Convert the input float vector to a int vector where the elements are the
  5236. // same sizes.
  5237. assert((SrcEltVT == MVT::f32 || SrcEltVT == MVT::f64) && "Unknown FP VT!");
  5238. EVT IntVT = EVT::getIntegerVT(*DAG.getContext(), SrcEltVT.getSizeInBits());
  5239. BV = ConstantFoldBITCASTofBUILD_VECTOR(BV, IntVT).getNode();
  5240. SrcEltVT = IntVT;
  5241. }
  5242. // Now we know the input is an integer vector. If the output is a FP type,
  5243. // convert to integer first, then to FP of the right size.
  5244. if (DstEltVT.isFloatingPoint()) {
  5245. assert((DstEltVT == MVT::f32 || DstEltVT == MVT::f64) && "Unknown FP VT!");
  5246. EVT TmpVT = EVT::getIntegerVT(*DAG.getContext(), DstEltVT.getSizeInBits());
  5247. SDNode *Tmp = ConstantFoldBITCASTofBUILD_VECTOR(BV, TmpVT).getNode();
  5248. // Next, convert to FP elements of the same size.
  5249. return ConstantFoldBITCASTofBUILD_VECTOR(Tmp, DstEltVT);
  5250. }
  5251. // Okay, we know the src/dst types are both integers of differing types.
  5252. // Handling growing first.
  5253. assert(SrcEltVT.isInteger() && DstEltVT.isInteger());
  5254. if (SrcBitSize < DstBitSize) {
  5255. unsigned NumInputsPerOutput = DstBitSize/SrcBitSize;
  5256. SmallVector<SDValue, 8> Ops;
  5257. for (unsigned i = 0, e = BV->getNumOperands(); i != e;
  5258. i += NumInputsPerOutput) {
  5259. bool isLE = TLI.isLittleEndian();
  5260. APInt NewBits = APInt(DstBitSize, 0);
  5261. bool EltIsUndef = true;
  5262. for (unsigned j = 0; j != NumInputsPerOutput; ++j) {
  5263. // Shift the previously computed bits over.
  5264. NewBits <<= SrcBitSize;
  5265. SDValue Op = BV->getOperand(i+ (isLE ? (NumInputsPerOutput-j-1) : j));
  5266. if (Op.getOpcode() == ISD::UNDEF) continue;
  5267. EltIsUndef = false;
  5268. NewBits |= cast<ConstantSDNode>(Op)->getAPIntValue().
  5269. zextOrTrunc(SrcBitSize).zext(DstBitSize);
  5270. }
  5271. if (EltIsUndef)
  5272. Ops.push_back(DAG.getUNDEF(DstEltVT));
  5273. else
  5274. Ops.push_back(DAG.getConstant(NewBits, DstEltVT));
  5275. }
  5276. EVT VT = EVT::getVectorVT(*DAG.getContext(), DstEltVT, Ops.size());
  5277. return DAG.getNode(ISD::BUILD_VECTOR, SDLoc(BV), VT,
  5278. &Ops[0], Ops.size());
  5279. }
  5280. // Finally, this must be the case where we are shrinking elements: each input
  5281. // turns into multiple outputs.
  5282. bool isS2V = ISD::isScalarToVector(BV);
  5283. unsigned NumOutputsPerInput = SrcBitSize/DstBitSize;
  5284. EVT VT = EVT::getVectorVT(*DAG.getContext(), DstEltVT,
  5285. NumOutputsPerInput*BV->getNumOperands());
  5286. SmallVector<SDValue, 8> Ops;
  5287. for (unsigned i = 0, e = BV->getNumOperands(); i != e; ++i) {
  5288. if (BV->getOperand(i).getOpcode() == ISD::UNDEF) {
  5289. for (unsigned j = 0; j != NumOutputsPerInput; ++j)
  5290. Ops.push_back(DAG.getUNDEF(DstEltVT));
  5291. continue;
  5292. }
  5293. APInt OpVal = cast<ConstantSDNode>(BV->getOperand(i))->
  5294. getAPIntValue().zextOrTrunc(SrcBitSize);
  5295. for (unsigned j = 0; j != NumOutputsPerInput; ++j) {
  5296. APInt ThisVal = OpVal.trunc(DstBitSize);
  5297. Ops.push_back(DAG.getConstant(ThisVal, DstEltVT));
  5298. if (isS2V && i == 0 && j == 0 && ThisVal.zext(SrcBitSize) == OpVal)
  5299. // Simply turn this into a SCALAR_TO_VECTOR of the new type.
  5300. return DAG.getNode(ISD::SCALAR_TO_VECTOR, SDLoc(BV), VT,
  5301. Ops[0]);
  5302. OpVal = OpVal.lshr(DstBitSize);
  5303. }
  5304. // For big endian targets, swap the order of the pieces of each element.
  5305. if (TLI.isBigEndian())
  5306. std::reverse(Ops.end()-NumOutputsPerInput, Ops.end());
  5307. }
  5308. return DAG.getNode(ISD::BUILD_VECTOR, SDLoc(BV), VT,
  5309. &Ops[0], Ops.size());
  5310. }
  5311. SDValue DAGCombiner::visitFADD(SDNode *N) {
  5312. SDValue N0 = N->getOperand(0);
  5313. SDValue N1 = N->getOperand(1);
  5314. ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
  5315. ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1);
  5316. EVT VT = N->getValueType(0);
  5317. // fold vector ops
  5318. if (VT.isVector()) {
  5319. SDValue FoldedVOp = SimplifyVBinOp(N);
  5320. if (FoldedVOp.getNode()) return FoldedVOp;
  5321. }
  5322. // fold (fadd c1, c2) -> c1 + c2
  5323. if (N0CFP && N1CFP)
  5324. return DAG.getNode(ISD::FADD, SDLoc(N), VT, N0, N1);
  5325. // canonicalize constant to RHS
  5326. if (N0CFP && !N1CFP)
  5327. return DAG.getNode(ISD::FADD, SDLoc(N), VT, N1, N0);
  5328. // fold (fadd A, 0) -> A
  5329. if (DAG.getTarget().Options.UnsafeFPMath && N1CFP &&
  5330. N1CFP->getValueAPF().isZero())
  5331. return N0;
  5332. // fold (fadd A, (fneg B)) -> (fsub A, B)
  5333. if ((!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FSUB, VT)) &&
  5334. isNegatibleForFree(N1, LegalOperations, TLI, &DAG.getTarget().Options) == 2)
  5335. return DAG.getNode(ISD::FSUB, SDLoc(N), VT, N0,
  5336. GetNegatedExpression(N1, DAG, LegalOperations));
  5337. // fold (fadd (fneg A), B) -> (fsub B, A)
  5338. if ((!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FSUB, VT)) &&
  5339. isNegatibleForFree(N0, LegalOperations, TLI, &DAG.getTarget().Options) == 2)
  5340. return DAG.getNode(ISD::FSUB, SDLoc(N), VT, N1,
  5341. GetNegatedExpression(N0, DAG, LegalOperations));
  5342. // If allowed, fold (fadd (fadd x, c1), c2) -> (fadd x, (fadd c1, c2))
  5343. if (DAG.getTarget().Options.UnsafeFPMath && N1CFP &&
  5344. N0.getOpcode() == ISD::FADD && N0.getNode()->hasOneUse() &&
  5345. isa<ConstantFPSDNode>(N0.getOperand(1)))
  5346. return DAG.getNode(ISD::FADD, SDLoc(N), VT, N0.getOperand(0),
  5347. DAG.getNode(ISD::FADD, SDLoc(N), VT,
  5348. N0.getOperand(1), N1));
  5349. // No FP constant should be created after legalization as Instruction
  5350. // Selection pass has hard time in dealing with FP constant.
  5351. //
  5352. // We don't need test this condition for transformation like following, as
  5353. // the DAG being transformed implies it is legal to take FP constant as
  5354. // operand.
  5355. //
  5356. // (fadd (fmul c, x), x) -> (fmul c+1, x)
  5357. //
  5358. bool AllowNewFpConst = (Level < AfterLegalizeDAG);
  5359. // If allow, fold (fadd (fneg x), x) -> 0.0
  5360. if (AllowNewFpConst && DAG.getTarget().Options.UnsafeFPMath &&
  5361. N0.getOpcode() == ISD::FNEG && N0.getOperand(0) == N1)
  5362. return DAG.getConstantFP(0.0, VT);
  5363. // If allow, fold (fadd x, (fneg x)) -> 0.0
  5364. if (AllowNewFpConst && DAG.getTarget().Options.UnsafeFPMath &&
  5365. N1.getOpcode() == ISD::FNEG && N1.getOperand(0) == N0)
  5366. return DAG.getConstantFP(0.0, VT);
  5367. // In unsafe math mode, we can fold chains of FADD's of the same value
  5368. // into multiplications. This transform is not safe in general because
  5369. // we are reducing the number of rounding steps.
  5370. if (DAG.getTarget().Options.UnsafeFPMath &&
  5371. TLI.isOperationLegalOrCustom(ISD::FMUL, VT) &&
  5372. !N0CFP && !N1CFP) {
  5373. if (N0.getOpcode() == ISD::FMUL) {
  5374. ConstantFPSDNode *CFP00 = dyn_cast<ConstantFPSDNode>(N0.getOperand(0));
  5375. ConstantFPSDNode *CFP01 = dyn_cast<ConstantFPSDNode>(N0.getOperand(1));
  5376. // (fadd (fmul c, x), x) -> (fmul x, c+1)
  5377. if (CFP00 && !CFP01 && N0.getOperand(1) == N1) {
  5378. SDValue NewCFP = DAG.getNode(ISD::FADD, SDLoc(N), VT,
  5379. SDValue(CFP00, 0),
  5380. DAG.getConstantFP(1.0, VT));
  5381. return DAG.getNode(ISD::FMUL, SDLoc(N), VT,
  5382. N1, NewCFP);
  5383. }
  5384. // (fadd (fmul x, c), x) -> (fmul x, c+1)
  5385. if (CFP01 && !CFP00 && N0.getOperand(0) == N1) {
  5386. SDValue NewCFP = DAG.getNode(ISD::FADD, SDLoc(N), VT,
  5387. SDValue(CFP01, 0),
  5388. DAG.getConstantFP(1.0, VT));
  5389. return DAG.getNode(ISD::FMUL, SDLoc(N), VT,
  5390. N1, NewCFP);
  5391. }
  5392. // (fadd (fmul c, x), (fadd x, x)) -> (fmul x, c+2)
  5393. if (CFP00 && !CFP01 && N1.getOpcode() == ISD::FADD &&
  5394. N1.getOperand(0) == N1.getOperand(1) &&
  5395. N0.getOperand(1) == N1.getOperand(0)) {
  5396. SDValue NewCFP = DAG.getNode(ISD::FADD, SDLoc(N), VT,
  5397. SDValue(CFP00, 0),
  5398. DAG.getConstantFP(2.0, VT));
  5399. return DAG.getNode(ISD::FMUL, SDLoc(N), VT,
  5400. N0.getOperand(1), NewCFP);
  5401. }
  5402. // (fadd (fmul x, c), (fadd x, x)) -> (fmul x, c+2)
  5403. if (CFP01 && !CFP00 && N1.getOpcode() == ISD::FADD &&
  5404. N1.getOperand(0) == N1.getOperand(1) &&
  5405. N0.getOperand(0) == N1.getOperand(0)) {
  5406. SDValue NewCFP = DAG.getNode(ISD::FADD, SDLoc(N), VT,
  5407. SDValue(CFP01, 0),
  5408. DAG.getConstantFP(2.0, VT));
  5409. return DAG.getNode(ISD::FMUL, SDLoc(N), VT,
  5410. N0.getOperand(0), NewCFP);
  5411. }
  5412. }
  5413. if (N1.getOpcode() == ISD::FMUL) {
  5414. ConstantFPSDNode *CFP10 = dyn_cast<ConstantFPSDNode>(N1.getOperand(0));
  5415. ConstantFPSDNode *CFP11 = dyn_cast<ConstantFPSDNode>(N1.getOperand(1));
  5416. // (fadd x, (fmul c, x)) -> (fmul x, c+1)
  5417. if (CFP10 && !CFP11 && N1.getOperand(1) == N0) {
  5418. SDValue NewCFP = DAG.getNode(ISD::FADD, SDLoc(N), VT,
  5419. SDValue(CFP10, 0),
  5420. DAG.getConstantFP(1.0, VT));
  5421. return DAG.getNode(ISD::FMUL, SDLoc(N), VT,
  5422. N0, NewCFP);
  5423. }
  5424. // (fadd x, (fmul x, c)) -> (fmul x, c+1)
  5425. if (CFP11 && !CFP10 && N1.getOperand(0) == N0) {
  5426. SDValue NewCFP = DAG.getNode(ISD::FADD, SDLoc(N), VT,
  5427. SDValue(CFP11, 0),
  5428. DAG.getConstantFP(1.0, VT));
  5429. return DAG.getNode(ISD::FMUL, SDLoc(N), VT,
  5430. N0, NewCFP);
  5431. }
  5432. // (fadd (fadd x, x), (fmul c, x)) -> (fmul x, c+2)
  5433. if (CFP10 && !CFP11 && N0.getOpcode() == ISD::FADD &&
  5434. N0.getOperand(0) == N0.getOperand(1) &&
  5435. N1.getOperand(1) == N0.getOperand(0)) {
  5436. SDValue NewCFP = DAG.getNode(ISD::FADD, SDLoc(N), VT,
  5437. SDValue(CFP10, 0),
  5438. DAG.getConstantFP(2.0, VT));
  5439. return DAG.getNode(ISD::FMUL, SDLoc(N), VT,
  5440. N1.getOperand(1), NewCFP);
  5441. }
  5442. // (fadd (fadd x, x), (fmul x, c)) -> (fmul x, c+2)
  5443. if (CFP11 && !CFP10 && N0.getOpcode() == ISD::FADD &&
  5444. N0.getOperand(0) == N0.getOperand(1) &&
  5445. N1.getOperand(0) == N0.getOperand(0)) {
  5446. SDValue NewCFP = DAG.getNode(ISD::FADD, SDLoc(N), VT,
  5447. SDValue(CFP11, 0),
  5448. DAG.getConstantFP(2.0, VT));
  5449. return DAG.getNode(ISD::FMUL, SDLoc(N), VT,
  5450. N1.getOperand(0), NewCFP);
  5451. }
  5452. }
  5453. if (N0.getOpcode() == ISD::FADD && AllowNewFpConst) {
  5454. ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(N0.getOperand(0));
  5455. // (fadd (fadd x, x), x) -> (fmul x, 3.0)
  5456. if (!CFP && N0.getOperand(0) == N0.getOperand(1) &&
  5457. (N0.getOperand(0) == N1))
  5458. return DAG.getNode(ISD::FMUL, SDLoc(N), VT,
  5459. N1, DAG.getConstantFP(3.0, VT));
  5460. }
  5461. if (N1.getOpcode() == ISD::FADD && AllowNewFpConst) {
  5462. ConstantFPSDNode *CFP10 = dyn_cast<ConstantFPSDNode>(N1.getOperand(0));
  5463. // (fadd x, (fadd x, x)) -> (fmul x, 3.0)
  5464. if (!CFP10 && N1.getOperand(0) == N1.getOperand(1) &&
  5465. N1.getOperand(0) == N0)
  5466. return DAG.getNode(ISD::FMUL, SDLoc(N), VT,
  5467. N0, DAG.getConstantFP(3.0, VT));
  5468. }
  5469. // (fadd (fadd x, x), (fadd x, x)) -> (fmul x, 4.0)
  5470. if (AllowNewFpConst &&
  5471. N0.getOpcode() == ISD::FADD && N1.getOpcode() == ISD::FADD &&
  5472. N0.getOperand(0) == N0.getOperand(1) &&
  5473. N1.getOperand(0) == N1.getOperand(1) &&
  5474. N0.getOperand(0) == N1.getOperand(0))
  5475. return DAG.getNode(ISD::FMUL, SDLoc(N), VT,
  5476. N0.getOperand(0),
  5477. DAG.getConstantFP(4.0, VT));
  5478. }
  5479. // FADD -> FMA combines:
  5480. if ((DAG.getTarget().Options.AllowFPOpFusion == FPOpFusion::Fast ||
  5481. DAG.getTarget().Options.UnsafeFPMath) &&
  5482. DAG.getTarget().getTargetLowering()->isFMAFasterThanFMulAndFAdd(VT) &&
  5483. (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FMA, VT))) {
  5484. // fold (fadd (fmul x, y), z) -> (fma x, y, z)
  5485. if (N0.getOpcode() == ISD::FMUL && N0->hasOneUse())
  5486. return DAG.getNode(ISD::FMA, SDLoc(N), VT,
  5487. N0.getOperand(0), N0.getOperand(1), N1);
  5488. // fold (fadd x, (fmul y, z)) -> (fma y, z, x)
  5489. // Note: Commutes FADD operands.
  5490. if (N1.getOpcode() == ISD::FMUL && N1->hasOneUse())
  5491. return DAG.getNode(ISD::FMA, SDLoc(N), VT,
  5492. N1.getOperand(0), N1.getOperand(1), N0);
  5493. }
  5494. return SDValue();
  5495. }
  5496. SDValue DAGCombiner::visitFSUB(SDNode *N) {
  5497. SDValue N0 = N->getOperand(0);
  5498. SDValue N1 = N->getOperand(1);
  5499. ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
  5500. ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1);
  5501. EVT VT = N->getValueType(0);
  5502. SDLoc dl(N);
  5503. // fold vector ops
  5504. if (VT.isVector()) {
  5505. SDValue FoldedVOp = SimplifyVBinOp(N);
  5506. if (FoldedVOp.getNode()) return FoldedVOp;
  5507. }
  5508. // fold (fsub c1, c2) -> c1-c2
  5509. if (N0CFP && N1CFP)
  5510. return DAG.getNode(ISD::FSUB, SDLoc(N), VT, N0, N1);
  5511. // fold (fsub A, 0) -> A
  5512. if (DAG.getTarget().Options.UnsafeFPMath &&
  5513. N1CFP && N1CFP->getValueAPF().isZero())
  5514. return N0;
  5515. // fold (fsub 0, B) -> -B
  5516. if (DAG.getTarget().Options.UnsafeFPMath &&
  5517. N0CFP && N0CFP->getValueAPF().isZero()) {
  5518. if (isNegatibleForFree(N1, LegalOperations, TLI, &DAG.getTarget().Options))
  5519. return GetNegatedExpression(N1, DAG, LegalOperations);
  5520. if (!LegalOperations || TLI.isOperationLegal(ISD::FNEG, VT))
  5521. return DAG.getNode(ISD::FNEG, dl, VT, N1);
  5522. }
  5523. // fold (fsub A, (fneg B)) -> (fadd A, B)
  5524. if (isNegatibleForFree(N1, LegalOperations, TLI, &DAG.getTarget().Options))
  5525. return DAG.getNode(ISD::FADD, dl, VT, N0,
  5526. GetNegatedExpression(N1, DAG, LegalOperations));
  5527. // If 'unsafe math' is enabled, fold
  5528. // (fsub x, x) -> 0.0 &
  5529. // (fsub x, (fadd x, y)) -> (fneg y) &
  5530. // (fsub x, (fadd y, x)) -> (fneg y)
  5531. if (DAG.getTarget().Options.UnsafeFPMath) {
  5532. if (N0 == N1)
  5533. return DAG.getConstantFP(0.0f, VT);
  5534. if (N1.getOpcode() == ISD::FADD) {
  5535. SDValue N10 = N1->getOperand(0);
  5536. SDValue N11 = N1->getOperand(1);
  5537. if (N10 == N0 && isNegatibleForFree(N11, LegalOperations, TLI,
  5538. &DAG.getTarget().Options))
  5539. return GetNegatedExpression(N11, DAG, LegalOperations);
  5540. if (N11 == N0 && isNegatibleForFree(N10, LegalOperations, TLI,
  5541. &DAG.getTarget().Options))
  5542. return GetNegatedExpression(N10, DAG, LegalOperations);
  5543. }
  5544. }
  5545. // FSUB -> FMA combines:
  5546. if ((DAG.getTarget().Options.AllowFPOpFusion == FPOpFusion::Fast ||
  5547. DAG.getTarget().Options.UnsafeFPMath) &&
  5548. DAG.getTarget().getTargetLowering()->isFMAFasterThanFMulAndFAdd(VT) &&
  5549. (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FMA, VT))) {
  5550. // fold (fsub (fmul x, y), z) -> (fma x, y, (fneg z))
  5551. if (N0.getOpcode() == ISD::FMUL && N0->hasOneUse())
  5552. return DAG.getNode(ISD::FMA, dl, VT,
  5553. N0.getOperand(0), N0.getOperand(1),
  5554. DAG.getNode(ISD::FNEG, dl, VT, N1));
  5555. // fold (fsub x, (fmul y, z)) -> (fma (fneg y), z, x)
  5556. // Note: Commutes FSUB operands.
  5557. if (N1.getOpcode() == ISD::FMUL && N1->hasOneUse())
  5558. return DAG.getNode(ISD::FMA, dl, VT,
  5559. DAG.getNode(ISD::FNEG, dl, VT,
  5560. N1.getOperand(0)),
  5561. N1.getOperand(1), N0);
  5562. // fold (fsub (fneg (fmul, x, y)), z) -> (fma (fneg x), y, (fneg z))
  5563. if (N0.getOpcode() == ISD::FNEG &&
  5564. N0.getOperand(0).getOpcode() == ISD::FMUL &&
  5565. N0->hasOneUse() && N0.getOperand(0).hasOneUse()) {
  5566. SDValue N00 = N0.getOperand(0).getOperand(0);
  5567. SDValue N01 = N0.getOperand(0).getOperand(1);
  5568. return DAG.getNode(ISD::FMA, dl, VT,
  5569. DAG.getNode(ISD::FNEG, dl, VT, N00), N01,
  5570. DAG.getNode(ISD::FNEG, dl, VT, N1));
  5571. }
  5572. }
  5573. return SDValue();
  5574. }
  5575. SDValue DAGCombiner::visitFMUL(SDNode *N) {
  5576. SDValue N0 = N->getOperand(0);
  5577. SDValue N1 = N->getOperand(1);
  5578. ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
  5579. ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1);
  5580. EVT VT = N->getValueType(0);
  5581. const TargetLowering &TLI = DAG.getTargetLoweringInfo();
  5582. // fold vector ops
  5583. if (VT.isVector()) {
  5584. SDValue FoldedVOp = SimplifyVBinOp(N);
  5585. if (FoldedVOp.getNode()) return FoldedVOp;
  5586. }
  5587. // fold (fmul c1, c2) -> c1*c2
  5588. if (N0CFP && N1CFP)
  5589. return DAG.getNode(ISD::FMUL, SDLoc(N), VT, N0, N1);
  5590. // canonicalize constant to RHS
  5591. if (N0CFP && !N1CFP)
  5592. return DAG.getNode(ISD::FMUL, SDLoc(N), VT, N1, N0);
  5593. // fold (fmul A, 0) -> 0
  5594. if (DAG.getTarget().Options.UnsafeFPMath &&
  5595. N1CFP && N1CFP->getValueAPF().isZero())
  5596. return N1;
  5597. // fold (fmul A, 0) -> 0, vector edition.
  5598. if (DAG.getTarget().Options.UnsafeFPMath &&
  5599. ISD::isBuildVectorAllZeros(N1.getNode()))
  5600. return N1;
  5601. // fold (fmul A, 1.0) -> A
  5602. if (N1CFP && N1CFP->isExactlyValue(1.0))
  5603. return N0;
  5604. // fold (fmul X, 2.0) -> (fadd X, X)
  5605. if (N1CFP && N1CFP->isExactlyValue(+2.0))
  5606. return DAG.getNode(ISD::FADD, SDLoc(N), VT, N0, N0);
  5607. // fold (fmul X, -1.0) -> (fneg X)
  5608. if (N1CFP && N1CFP->isExactlyValue(-1.0))
  5609. if (!LegalOperations || TLI.isOperationLegal(ISD::FNEG, VT))
  5610. return DAG.getNode(ISD::FNEG, SDLoc(N), VT, N0);
  5611. // fold (fmul (fneg X), (fneg Y)) -> (fmul X, Y)
  5612. if (char LHSNeg = isNegatibleForFree(N0, LegalOperations, TLI,
  5613. &DAG.getTarget().Options)) {
  5614. if (char RHSNeg = isNegatibleForFree(N1, LegalOperations, TLI,
  5615. &DAG.getTarget().Options)) {
  5616. // Both can be negated for free, check to see if at least one is cheaper
  5617. // negated.
  5618. if (LHSNeg == 2 || RHSNeg == 2)
  5619. return DAG.getNode(ISD::FMUL, SDLoc(N), VT,
  5620. GetNegatedExpression(N0, DAG, LegalOperations),
  5621. GetNegatedExpression(N1, DAG, LegalOperations));
  5622. }
  5623. }
  5624. // If allowed, fold (fmul (fmul x, c1), c2) -> (fmul x, (fmul c1, c2))
  5625. if (DAG.getTarget().Options.UnsafeFPMath &&
  5626. N1CFP && N0.getOpcode() == ISD::FMUL &&
  5627. N0.getNode()->hasOneUse() && isa<ConstantFPSDNode>(N0.getOperand(1)))
  5628. return DAG.getNode(ISD::FMUL, SDLoc(N), VT, N0.getOperand(0),
  5629. DAG.getNode(ISD::FMUL, SDLoc(N), VT,
  5630. N0.getOperand(1), N1));
  5631. return SDValue();
  5632. }
  5633. SDValue DAGCombiner::visitFMA(SDNode *N) {
  5634. SDValue N0 = N->getOperand(0);
  5635. SDValue N1 = N->getOperand(1);
  5636. SDValue N2 = N->getOperand(2);
  5637. ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
  5638. ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1);
  5639. EVT VT = N->getValueType(0);
  5640. SDLoc dl(N);
  5641. if (DAG.getTarget().Options.UnsafeFPMath) {
  5642. if (N0CFP && N0CFP->isZero())
  5643. return N2;
  5644. if (N1CFP && N1CFP->isZero())
  5645. return N2;
  5646. }
  5647. if (N0CFP && N0CFP->isExactlyValue(1.0))
  5648. return DAG.getNode(ISD::FADD, SDLoc(N), VT, N1, N2);
  5649. if (N1CFP && N1CFP->isExactlyValue(1.0))
  5650. return DAG.getNode(ISD::FADD, SDLoc(N), VT, N0, N2);
  5651. // Canonicalize (fma c, x, y) -> (fma x, c, y)
  5652. if (N0CFP && !N1CFP)
  5653. return DAG.getNode(ISD::FMA, SDLoc(N), VT, N1, N0, N2);
  5654. // (fma x, c1, (fmul x, c2)) -> (fmul x, c1+c2)
  5655. if (DAG.getTarget().Options.UnsafeFPMath && N1CFP &&
  5656. N2.getOpcode() == ISD::FMUL &&
  5657. N0 == N2.getOperand(0) &&
  5658. N2.getOperand(1).getOpcode() == ISD::ConstantFP) {
  5659. return DAG.getNode(ISD::FMUL, dl, VT, N0,
  5660. DAG.getNode(ISD::FADD, dl, VT, N1, N2.getOperand(1)));
  5661. }
  5662. // (fma (fmul x, c1), c2, y) -> (fma x, c1*c2, y)
  5663. if (DAG.getTarget().Options.UnsafeFPMath &&
  5664. N0.getOpcode() == ISD::FMUL && N1CFP &&
  5665. N0.getOperand(1).getOpcode() == ISD::ConstantFP) {
  5666. return DAG.getNode(ISD::FMA, dl, VT,
  5667. N0.getOperand(0),
  5668. DAG.getNode(ISD::FMUL, dl, VT, N1, N0.getOperand(1)),
  5669. N2);
  5670. }
  5671. // (fma x, 1, y) -> (fadd x, y)
  5672. // (fma x, -1, y) -> (fadd (fneg x), y)
  5673. if (N1CFP) {
  5674. if (N1CFP->isExactlyValue(1.0))
  5675. return DAG.getNode(ISD::FADD, dl, VT, N0, N2);
  5676. if (N1CFP->isExactlyValue(-1.0) &&
  5677. (!LegalOperations || TLI.isOperationLegal(ISD::FNEG, VT))) {
  5678. SDValue RHSNeg = DAG.getNode(ISD::FNEG, dl, VT, N0);
  5679. AddToWorkList(RHSNeg.getNode());
  5680. return DAG.getNode(ISD::FADD, dl, VT, N2, RHSNeg);
  5681. }
  5682. }
  5683. // (fma x, c, x) -> (fmul x, (c+1))
  5684. if (DAG.getTarget().Options.UnsafeFPMath && N1CFP && N0 == N2)
  5685. return DAG.getNode(ISD::FMUL, dl, VT, N0,
  5686. DAG.getNode(ISD::FADD, dl, VT,
  5687. N1, DAG.getConstantFP(1.0, VT)));
  5688. // (fma x, c, (fneg x)) -> (fmul x, (c-1))
  5689. if (DAG.getTarget().Options.UnsafeFPMath && N1CFP &&
  5690. N2.getOpcode() == ISD::FNEG && N2.getOperand(0) == N0)
  5691. return DAG.getNode(ISD::FMUL, dl, VT, N0,
  5692. DAG.getNode(ISD::FADD, dl, VT,
  5693. N1, DAG.getConstantFP(-1.0, VT)));
  5694. return SDValue();
  5695. }
  5696. SDValue DAGCombiner::visitFDIV(SDNode *N) {
  5697. SDValue N0 = N->getOperand(0);
  5698. SDValue N1 = N->getOperand(1);
  5699. ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
  5700. ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1);
  5701. EVT VT = N->getValueType(0);
  5702. const TargetLowering &TLI = DAG.getTargetLoweringInfo();
  5703. // fold vector ops
  5704. if (VT.isVector()) {
  5705. SDValue FoldedVOp = SimplifyVBinOp(N);
  5706. if (FoldedVOp.getNode()) return FoldedVOp;
  5707. }
  5708. // fold (fdiv c1, c2) -> c1/c2
  5709. if (N0CFP && N1CFP)
  5710. return DAG.getNode(ISD::FDIV, SDLoc(N), VT, N0, N1);
  5711. // fold (fdiv X, c2) -> fmul X, 1/c2 if losing precision is acceptable.
  5712. if (N1CFP && DAG.getTarget().Options.UnsafeFPMath) {
  5713. // Compute the reciprocal 1.0 / c2.
  5714. APFloat N1APF = N1CFP->getValueAPF();
  5715. APFloat Recip(N1APF.getSemantics(), 1); // 1.0
  5716. APFloat::opStatus st = Recip.divide(N1APF, APFloat::rmNearestTiesToEven);
  5717. // Only do the transform if the reciprocal is a legal fp immediate that
  5718. // isn't too nasty (eg NaN, denormal, ...).
  5719. if ((st == APFloat::opOK || st == APFloat::opInexact) && // Not too nasty
  5720. (!LegalOperations ||
  5721. // FIXME: custom lowering of ConstantFP might fail (see e.g. ARM
  5722. // backend)... we should handle this gracefully after Legalize.
  5723. // TLI.isOperationLegalOrCustom(llvm::ISD::ConstantFP, VT) ||
  5724. TLI.isOperationLegal(llvm::ISD::ConstantFP, VT) ||
  5725. TLI.isFPImmLegal(Recip, VT)))
  5726. return DAG.getNode(ISD::FMUL, SDLoc(N), VT, N0,
  5727. DAG.getConstantFP(Recip, VT));
  5728. }
  5729. // (fdiv (fneg X), (fneg Y)) -> (fdiv X, Y)
  5730. if (char LHSNeg = isNegatibleForFree(N0, LegalOperations, TLI,
  5731. &DAG.getTarget().Options)) {
  5732. if (char RHSNeg = isNegatibleForFree(N1, LegalOperations, TLI,
  5733. &DAG.getTarget().Options)) {
  5734. // Both can be negated for free, check to see if at least one is cheaper
  5735. // negated.
  5736. if (LHSNeg == 2 || RHSNeg == 2)
  5737. return DAG.getNode(ISD::FDIV, SDLoc(N), VT,
  5738. GetNegatedExpression(N0, DAG, LegalOperations),
  5739. GetNegatedExpression(N1, DAG, LegalOperations));
  5740. }
  5741. }
  5742. return SDValue();
  5743. }
  5744. SDValue DAGCombiner::visitFREM(SDNode *N) {
  5745. SDValue N0 = N->getOperand(0);
  5746. SDValue N1 = N->getOperand(1);
  5747. ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
  5748. ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1);
  5749. EVT VT = N->getValueType(0);
  5750. // fold (frem c1, c2) -> fmod(c1,c2)
  5751. if (N0CFP && N1CFP)
  5752. return DAG.getNode(ISD::FREM, SDLoc(N), VT, N0, N1);
  5753. return SDValue();
  5754. }
  5755. SDValue DAGCombiner::visitFCOPYSIGN(SDNode *N) {
  5756. SDValue N0 = N->getOperand(0);
  5757. SDValue N1 = N->getOperand(1);
  5758. ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
  5759. ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1);
  5760. EVT VT = N->getValueType(0);
  5761. if (N0CFP && N1CFP) // Constant fold
  5762. return DAG.getNode(ISD::FCOPYSIGN, SDLoc(N), VT, N0, N1);
  5763. if (N1CFP) {
  5764. const APFloat& V = N1CFP->getValueAPF();
  5765. // copysign(x, c1) -> fabs(x) iff ispos(c1)
  5766. // copysign(x, c1) -> fneg(fabs(x)) iff isneg(c1)
  5767. if (!V.isNegative()) {
  5768. if (!LegalOperations || TLI.isOperationLegal(ISD::FABS, VT))
  5769. return DAG.getNode(ISD::FABS, SDLoc(N), VT, N0);
  5770. } else {
  5771. if (!LegalOperations || TLI.isOperationLegal(ISD::FNEG, VT))
  5772. return DAG.getNode(ISD::FNEG, SDLoc(N), VT,
  5773. DAG.getNode(ISD::FABS, SDLoc(N0), VT, N0));
  5774. }
  5775. }
  5776. // copysign(fabs(x), y) -> copysign(x, y)
  5777. // copysign(fneg(x), y) -> copysign(x, y)
  5778. // copysign(copysign(x,z), y) -> copysign(x, y)
  5779. if (N0.getOpcode() == ISD::FABS || N0.getOpcode() == ISD::FNEG ||
  5780. N0.getOpcode() == ISD::FCOPYSIGN)
  5781. return DAG.getNode(ISD::FCOPYSIGN, SDLoc(N), VT,
  5782. N0.getOperand(0), N1);
  5783. // copysign(x, abs(y)) -> abs(x)
  5784. if (N1.getOpcode() == ISD::FABS)
  5785. return DAG.getNode(ISD::FABS, SDLoc(N), VT, N0);
  5786. // copysign(x, copysign(y,z)) -> copysign(x, z)
  5787. if (N1.getOpcode() == ISD::FCOPYSIGN)
  5788. return DAG.getNode(ISD::FCOPYSIGN, SDLoc(N), VT,
  5789. N0, N1.getOperand(1));
  5790. // copysign(x, fp_extend(y)) -> copysign(x, y)
  5791. // copysign(x, fp_round(y)) -> copysign(x, y)
  5792. if (N1.getOpcode() == ISD::FP_EXTEND || N1.getOpcode() == ISD::FP_ROUND)
  5793. return DAG.getNode(ISD::FCOPYSIGN, SDLoc(N), VT,
  5794. N0, N1.getOperand(0));
  5795. return SDValue();
  5796. }
  5797. SDValue DAGCombiner::visitSINT_TO_FP(SDNode *N) {
  5798. SDValue N0 = N->getOperand(0);
  5799. ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
  5800. EVT VT = N->getValueType(0);
  5801. EVT OpVT = N0.getValueType();
  5802. // fold (sint_to_fp c1) -> c1fp
  5803. if (N0C &&
  5804. // ...but only if the target supports immediate floating-point values
  5805. (!LegalOperations ||
  5806. TLI.isOperationLegalOrCustom(llvm::ISD::ConstantFP, VT)))
  5807. return DAG.getNode(ISD::SINT_TO_FP, SDLoc(N), VT, N0);
  5808. // If the input is a legal type, and SINT_TO_FP is not legal on this target,
  5809. // but UINT_TO_FP is legal on this target, try to convert.
  5810. if (!TLI.isOperationLegalOrCustom(ISD::SINT_TO_FP, OpVT) &&
  5811. TLI.isOperationLegalOrCustom(ISD::UINT_TO_FP, OpVT)) {
  5812. // If the sign bit is known to be zero, we can change this to UINT_TO_FP.
  5813. if (DAG.SignBitIsZero(N0))
  5814. return DAG.getNode(ISD::UINT_TO_FP, SDLoc(N), VT, N0);
  5815. }
  5816. // The next optimizations are desireable only if SELECT_CC can be lowered.
  5817. // Check against MVT::Other for SELECT_CC, which is a workaround for targets
  5818. // having to say they don't support SELECT_CC on every type the DAG knows
  5819. // about, since there is no way to mark an opcode illegal at all value types
  5820. // (See also visitSELECT)
  5821. if (TLI.isOperationLegalOrCustom(ISD::SELECT_CC, MVT::Other)) {
  5822. // fold (sint_to_fp (setcc x, y, cc)) -> (select_cc x, y, -1.0, 0.0,, cc)
  5823. if (N0.getOpcode() == ISD::SETCC && N0.getValueType() == MVT::i1 &&
  5824. !VT.isVector() &&
  5825. (!LegalOperations ||
  5826. TLI.isOperationLegalOrCustom(llvm::ISD::ConstantFP, VT))) {
  5827. SDValue Ops[] =
  5828. { N0.getOperand(0), N0.getOperand(1),
  5829. DAG.getConstantFP(-1.0, VT) , DAG.getConstantFP(0.0, VT),
  5830. N0.getOperand(2) };
  5831. return DAG.getNode(ISD::SELECT_CC, SDLoc(N), VT, Ops, 5);
  5832. }
  5833. // fold (sint_to_fp (zext (setcc x, y, cc))) ->
  5834. // (select_cc x, y, 1.0, 0.0,, cc)
  5835. if (N0.getOpcode() == ISD::ZERO_EXTEND &&
  5836. N0.getOperand(0).getOpcode() == ISD::SETCC &&!VT.isVector() &&
  5837. (!LegalOperations ||
  5838. TLI.isOperationLegalOrCustom(llvm::ISD::ConstantFP, VT))) {
  5839. SDValue Ops[] =
  5840. { N0.getOperand(0).getOperand(0), N0.getOperand(0).getOperand(1),
  5841. DAG.getConstantFP(1.0, VT) , DAG.getConstantFP(0.0, VT),
  5842. N0.getOperand(0).getOperand(2) };
  5843. return DAG.getNode(ISD::SELECT_CC, SDLoc(N), VT, Ops, 5);
  5844. }
  5845. }
  5846. return SDValue();
  5847. }
  5848. SDValue DAGCombiner::visitUINT_TO_FP(SDNode *N) {
  5849. SDValue N0 = N->getOperand(0);
  5850. ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
  5851. EVT VT = N->getValueType(0);
  5852. EVT OpVT = N0.getValueType();
  5853. // fold (uint_to_fp c1) -> c1fp
  5854. if (N0C &&
  5855. // ...but only if the target supports immediate floating-point values
  5856. (!LegalOperations ||
  5857. TLI.isOperationLegalOrCustom(llvm::ISD::ConstantFP, VT)))
  5858. return DAG.getNode(ISD::UINT_TO_FP, SDLoc(N), VT, N0);
  5859. // If the input is a legal type, and UINT_TO_FP is not legal on this target,
  5860. // but SINT_TO_FP is legal on this target, try to convert.
  5861. if (!TLI.isOperationLegalOrCustom(ISD::UINT_TO_FP, OpVT) &&
  5862. TLI.isOperationLegalOrCustom(ISD::SINT_TO_FP, OpVT)) {
  5863. // If the sign bit is known to be zero, we can change this to SINT_TO_FP.
  5864. if (DAG.SignBitIsZero(N0))
  5865. return DAG.getNode(ISD::SINT_TO_FP, SDLoc(N), VT, N0);
  5866. }
  5867. // The next optimizations are desireable only if SELECT_CC can be lowered.
  5868. // Check against MVT::Other for SELECT_CC, which is a workaround for targets
  5869. // having to say they don't support SELECT_CC on every type the DAG knows
  5870. // about, since there is no way to mark an opcode illegal at all value types
  5871. // (See also visitSELECT)
  5872. if (TLI.isOperationLegalOrCustom(ISD::SELECT_CC, MVT::Other)) {
  5873. // fold (uint_to_fp (setcc x, y, cc)) -> (select_cc x, y, -1.0, 0.0,, cc)
  5874. if (N0.getOpcode() == ISD::SETCC && !VT.isVector() &&
  5875. (!LegalOperations ||
  5876. TLI.isOperationLegalOrCustom(llvm::ISD::ConstantFP, VT))) {
  5877. SDValue Ops[] =
  5878. { N0.getOperand(0), N0.getOperand(1),
  5879. DAG.getConstantFP(1.0, VT), DAG.getConstantFP(0.0, VT),
  5880. N0.getOperand(2) };
  5881. return DAG.getNode(ISD::SELECT_CC, SDLoc(N), VT, Ops, 5);
  5882. }
  5883. }
  5884. return SDValue();
  5885. }
  5886. SDValue DAGCombiner::visitFP_TO_SINT(SDNode *N) {
  5887. SDValue N0 = N->getOperand(0);
  5888. ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
  5889. EVT VT = N->getValueType(0);
  5890. // fold (fp_to_sint c1fp) -> c1
  5891. if (N0CFP)
  5892. return DAG.getNode(ISD::FP_TO_SINT, SDLoc(N), VT, N0);
  5893. return SDValue();
  5894. }
  5895. SDValue DAGCombiner::visitFP_TO_UINT(SDNode *N) {
  5896. SDValue N0 = N->getOperand(0);
  5897. ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
  5898. EVT VT = N->getValueType(0);
  5899. // fold (fp_to_uint c1fp) -> c1
  5900. if (N0CFP)
  5901. return DAG.getNode(ISD::FP_TO_UINT, SDLoc(N), VT, N0);
  5902. return SDValue();
  5903. }
  5904. SDValue DAGCombiner::visitFP_ROUND(SDNode *N) {
  5905. SDValue N0 = N->getOperand(0);
  5906. SDValue N1 = N->getOperand(1);
  5907. ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
  5908. EVT VT = N->getValueType(0);
  5909. // fold (fp_round c1fp) -> c1fp
  5910. if (N0CFP)
  5911. return DAG.getNode(ISD::FP_ROUND, SDLoc(N), VT, N0, N1);
  5912. // fold (fp_round (fp_extend x)) -> x
  5913. if (N0.getOpcode() == ISD::FP_EXTEND && VT == N0.getOperand(0).getValueType())
  5914. return N0.getOperand(0);
  5915. // fold (fp_round (fp_round x)) -> (fp_round x)
  5916. if (N0.getOpcode() == ISD::FP_ROUND) {
  5917. // This is a value preserving truncation if both round's are.
  5918. bool IsTrunc = N->getConstantOperandVal(1) == 1 &&
  5919. N0.getNode()->getConstantOperandVal(1) == 1;
  5920. return DAG.getNode(ISD::FP_ROUND, SDLoc(N), VT, N0.getOperand(0),
  5921. DAG.getIntPtrConstant(IsTrunc));
  5922. }
  5923. // fold (fp_round (copysign X, Y)) -> (copysign (fp_round X), Y)
  5924. if (N0.getOpcode() == ISD::FCOPYSIGN && N0.getNode()->hasOneUse()) {
  5925. SDValue Tmp = DAG.getNode(ISD::FP_ROUND, SDLoc(N0), VT,
  5926. N0.getOperand(0), N1);
  5927. AddToWorkList(Tmp.getNode());
  5928. return DAG.getNode(ISD::FCOPYSIGN, SDLoc(N), VT,
  5929. Tmp, N0.getOperand(1));
  5930. }
  5931. return SDValue();
  5932. }
  5933. SDValue DAGCombiner::visitFP_ROUND_INREG(SDNode *N) {
  5934. SDValue N0 = N->getOperand(0);
  5935. EVT VT = N->getValueType(0);
  5936. EVT EVT = cast<VTSDNode>(N->getOperand(1))->getVT();
  5937. ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
  5938. // fold (fp_round_inreg c1fp) -> c1fp
  5939. if (N0CFP && isTypeLegal(EVT)) {
  5940. SDValue Round = DAG.getConstantFP(*N0CFP->getConstantFPValue(), EVT);
  5941. return DAG.getNode(ISD::FP_EXTEND, SDLoc(N), VT, Round);
  5942. }
  5943. return SDValue();
  5944. }
  5945. SDValue DAGCombiner::visitFP_EXTEND(SDNode *N) {
  5946. SDValue N0 = N->getOperand(0);
  5947. ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
  5948. EVT VT = N->getValueType(0);
  5949. // If this is fp_round(fpextend), don't fold it, allow ourselves to be folded.
  5950. if (N->hasOneUse() &&
  5951. N->use_begin()->getOpcode() == ISD::FP_ROUND)
  5952. return SDValue();
  5953. // fold (fp_extend c1fp) -> c1fp
  5954. if (N0CFP)
  5955. return DAG.getNode(ISD::FP_EXTEND, SDLoc(N), VT, N0);
  5956. // Turn fp_extend(fp_round(X, 1)) -> x since the fp_round doesn't affect the
  5957. // value of X.
  5958. if (N0.getOpcode() == ISD::FP_ROUND
  5959. && N0.getNode()->getConstantOperandVal(1) == 1) {
  5960. SDValue In = N0.getOperand(0);
  5961. if (In.getValueType() == VT) return In;
  5962. if (VT.bitsLT(In.getValueType()))
  5963. return DAG.getNode(ISD::FP_ROUND, SDLoc(N), VT,
  5964. In, N0.getOperand(1));
  5965. return DAG.getNode(ISD::FP_EXTEND, SDLoc(N), VT, In);
  5966. }
  5967. // fold (fpext (load x)) -> (fpext (fptrunc (extload x)))
  5968. if (ISD::isNormalLoad(N0.getNode()) && N0.hasOneUse() &&
  5969. ((!LegalOperations && !cast<LoadSDNode>(N0)->isVolatile()) ||
  5970. TLI.isLoadExtLegal(ISD::EXTLOAD, N0.getValueType()))) {
  5971. LoadSDNode *LN0 = cast<LoadSDNode>(N0);
  5972. SDValue ExtLoad = DAG.getExtLoad(ISD::EXTLOAD, SDLoc(N), VT,
  5973. LN0->getChain(),
  5974. LN0->getBasePtr(), N0.getValueType(),
  5975. LN0->getMemOperand());
  5976. CombineTo(N, ExtLoad);
  5977. CombineTo(N0.getNode(),
  5978. DAG.getNode(ISD::FP_ROUND, SDLoc(N0),
  5979. N0.getValueType(), ExtLoad, DAG.getIntPtrConstant(1)),
  5980. ExtLoad.getValue(1));
  5981. return SDValue(N, 0); // Return N so it doesn't get rechecked!
  5982. }
  5983. return SDValue();
  5984. }
  5985. SDValue DAGCombiner::visitFNEG(SDNode *N) {
  5986. SDValue N0 = N->getOperand(0);
  5987. EVT VT = N->getValueType(0);
  5988. if (VT.isVector()) {
  5989. SDValue FoldedVOp = SimplifyVUnaryOp(N);
  5990. if (FoldedVOp.getNode()) return FoldedVOp;
  5991. }
  5992. if (isNegatibleForFree(N0, LegalOperations, DAG.getTargetLoweringInfo(),
  5993. &DAG.getTarget().Options))
  5994. return GetNegatedExpression(N0, DAG, LegalOperations);
  5995. // Transform fneg(bitconvert(x)) -> bitconvert(x^sign) to avoid loading
  5996. // constant pool values.
  5997. if (!TLI.isFNegFree(VT) && N0.getOpcode() == ISD::BITCAST &&
  5998. !VT.isVector() &&
  5999. N0.getNode()->hasOneUse() &&
  6000. N0.getOperand(0).getValueType().isInteger()) {
  6001. SDValue Int = N0.getOperand(0);
  6002. EVT IntVT = Int.getValueType();
  6003. if (IntVT.isInteger() && !IntVT.isVector()) {
  6004. Int = DAG.getNode(ISD::XOR, SDLoc(N0), IntVT, Int,
  6005. DAG.getConstant(APInt::getSignBit(IntVT.getSizeInBits()), IntVT));
  6006. AddToWorkList(Int.getNode());
  6007. return DAG.getNode(ISD::BITCAST, SDLoc(N),
  6008. VT, Int);
  6009. }
  6010. }
  6011. // (fneg (fmul c, x)) -> (fmul -c, x)
  6012. if (N0.getOpcode() == ISD::FMUL) {
  6013. ConstantFPSDNode *CFP1 = dyn_cast<ConstantFPSDNode>(N0.getOperand(1));
  6014. if (CFP1)
  6015. return DAG.getNode(ISD::FMUL, SDLoc(N), VT,
  6016. N0.getOperand(0),
  6017. DAG.getNode(ISD::FNEG, SDLoc(N), VT,
  6018. N0.getOperand(1)));
  6019. }
  6020. return SDValue();
  6021. }
  6022. SDValue DAGCombiner::visitFCEIL(SDNode *N) {
  6023. SDValue N0 = N->getOperand(0);
  6024. ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
  6025. EVT VT = N->getValueType(0);
  6026. // fold (fceil c1) -> fceil(c1)
  6027. if (N0CFP)
  6028. return DAG.getNode(ISD::FCEIL, SDLoc(N), VT, N0);
  6029. return SDValue();
  6030. }
  6031. SDValue DAGCombiner::visitFTRUNC(SDNode *N) {
  6032. SDValue N0 = N->getOperand(0);
  6033. ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
  6034. EVT VT = N->getValueType(0);
  6035. // fold (ftrunc c1) -> ftrunc(c1)
  6036. if (N0CFP)
  6037. return DAG.getNode(ISD::FTRUNC, SDLoc(N), VT, N0);
  6038. return SDValue();
  6039. }
  6040. SDValue DAGCombiner::visitFFLOOR(SDNode *N) {
  6041. SDValue N0 = N->getOperand(0);
  6042. ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
  6043. EVT VT = N->getValueType(0);
  6044. // fold (ffloor c1) -> ffloor(c1)
  6045. if (N0CFP)
  6046. return DAG.getNode(ISD::FFLOOR, SDLoc(N), VT, N0);
  6047. return SDValue();
  6048. }
  6049. SDValue DAGCombiner::visitFABS(SDNode *N) {
  6050. SDValue N0 = N->getOperand(0);
  6051. ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
  6052. EVT VT = N->getValueType(0);
  6053. if (VT.isVector()) {
  6054. SDValue FoldedVOp = SimplifyVUnaryOp(N);
  6055. if (FoldedVOp.getNode()) return FoldedVOp;
  6056. }
  6057. // fold (fabs c1) -> fabs(c1)
  6058. if (N0CFP)
  6059. return DAG.getNode(ISD::FABS, SDLoc(N), VT, N0);
  6060. // fold (fabs (fabs x)) -> (fabs x)
  6061. if (N0.getOpcode() == ISD::FABS)
  6062. return N->getOperand(0);
  6063. // fold (fabs (fneg x)) -> (fabs x)
  6064. // fold (fabs (fcopysign x, y)) -> (fabs x)
  6065. if (N0.getOpcode() == ISD::FNEG || N0.getOpcode() == ISD::FCOPYSIGN)
  6066. return DAG.getNode(ISD::FABS, SDLoc(N), VT, N0.getOperand(0));
  6067. // Transform fabs(bitconvert(x)) -> bitconvert(x&~sign) to avoid loading
  6068. // constant pool values.
  6069. if (!TLI.isFAbsFree(VT) &&
  6070. N0.getOpcode() == ISD::BITCAST && N0.getNode()->hasOneUse() &&
  6071. N0.getOperand(0).getValueType().isInteger() &&
  6072. !N0.getOperand(0).getValueType().isVector()) {
  6073. SDValue Int = N0.getOperand(0);
  6074. EVT IntVT = Int.getValueType();
  6075. if (IntVT.isInteger() && !IntVT.isVector()) {
  6076. Int = DAG.getNode(ISD::AND, SDLoc(N0), IntVT, Int,
  6077. DAG.getConstant(~APInt::getSignBit(IntVT.getSizeInBits()), IntVT));
  6078. AddToWorkList(Int.getNode());
  6079. return DAG.getNode(ISD::BITCAST, SDLoc(N),
  6080. N->getValueType(0), Int);
  6081. }
  6082. }
  6083. return SDValue();
  6084. }
  6085. SDValue DAGCombiner::visitBRCOND(SDNode *N) {
  6086. SDValue Chain = N->getOperand(0);
  6087. SDValue N1 = N->getOperand(1);
  6088. SDValue N2 = N->getOperand(2);
  6089. // If N is a constant we could fold this into a fallthrough or unconditional
  6090. // branch. However that doesn't happen very often in normal code, because
  6091. // Instcombine/SimplifyCFG should have handled the available opportunities.
  6092. // If we did this folding here, it would be necessary to update the
  6093. // MachineBasicBlock CFG, which is awkward.
  6094. // fold a brcond with a setcc condition into a BR_CC node if BR_CC is legal
  6095. // on the target.
  6096. if (N1.getOpcode() == ISD::SETCC &&
  6097. TLI.isOperationLegalOrCustom(ISD::BR_CC,
  6098. N1.getOperand(0).getValueType())) {
  6099. return DAG.getNode(ISD::BR_CC, SDLoc(N), MVT::Other,
  6100. Chain, N1.getOperand(2),
  6101. N1.getOperand(0), N1.getOperand(1), N2);
  6102. }
  6103. if ((N1.hasOneUse() && N1.getOpcode() == ISD::SRL) ||
  6104. ((N1.getOpcode() == ISD::TRUNCATE && N1.hasOneUse()) &&
  6105. (N1.getOperand(0).hasOneUse() &&
  6106. N1.getOperand(0).getOpcode() == ISD::SRL))) {
  6107. SDNode *Trunc = 0;
  6108. if (N1.getOpcode() == ISD::TRUNCATE) {
  6109. // Look pass the truncate.
  6110. Trunc = N1.getNode();
  6111. N1 = N1.getOperand(0);
  6112. }
  6113. // Match this pattern so that we can generate simpler code:
  6114. //
  6115. // %a = ...
  6116. // %b = and i32 %a, 2
  6117. // %c = srl i32 %b, 1
  6118. // brcond i32 %c ...
  6119. //
  6120. // into
  6121. //
  6122. // %a = ...
  6123. // %b = and i32 %a, 2
  6124. // %c = setcc eq %b, 0
  6125. // brcond %c ...
  6126. //
  6127. // This applies only when the AND constant value has one bit set and the
  6128. // SRL constant is equal to the log2 of the AND constant. The back-end is
  6129. // smart enough to convert the result into a TEST/JMP sequence.
  6130. SDValue Op0 = N1.getOperand(0);
  6131. SDValue Op1 = N1.getOperand(1);
  6132. if (Op0.getOpcode() == ISD::AND &&
  6133. Op1.getOpcode() == ISD::Constant) {
  6134. SDValue AndOp1 = Op0.getOperand(1);
  6135. if (AndOp1.getOpcode() == ISD::Constant) {
  6136. const APInt &AndConst = cast<ConstantSDNode>(AndOp1)->getAPIntValue();
  6137. if (AndConst.isPowerOf2() &&
  6138. cast<ConstantSDNode>(Op1)->getAPIntValue()==AndConst.logBase2()) {
  6139. SDValue SetCC =
  6140. DAG.getSetCC(SDLoc(N),
  6141. getSetCCResultType(Op0.getValueType()),
  6142. Op0, DAG.getConstant(0, Op0.getValueType()),
  6143. ISD::SETNE);
  6144. SDValue NewBRCond = DAG.getNode(ISD::BRCOND, SDLoc(N),
  6145. MVT::Other, Chain, SetCC, N2);
  6146. // Don't add the new BRCond into the worklist or else SimplifySelectCC
  6147. // will convert it back to (X & C1) >> C2.
  6148. CombineTo(N, NewBRCond, false);
  6149. // Truncate is dead.
  6150. if (Trunc) {
  6151. removeFromWorkList(Trunc);
  6152. DAG.DeleteNode(Trunc);
  6153. }
  6154. // Replace the uses of SRL with SETCC
  6155. WorkListRemover DeadNodes(*this);
  6156. DAG.ReplaceAllUsesOfValueWith(N1, SetCC);
  6157. removeFromWorkList(N1.getNode());
  6158. DAG.DeleteNode(N1.getNode());
  6159. return SDValue(N, 0); // Return N so it doesn't get rechecked!
  6160. }
  6161. }
  6162. }
  6163. if (Trunc)
  6164. // Restore N1 if the above transformation doesn't match.
  6165. N1 = N->getOperand(1);
  6166. }
  6167. // Transform br(xor(x, y)) -> br(x != y)
  6168. // Transform br(xor(xor(x,y), 1)) -> br (x == y)
  6169. if (N1.hasOneUse() && N1.getOpcode() == ISD::XOR) {
  6170. SDNode *TheXor = N1.getNode();
  6171. SDValue Op0 = TheXor->getOperand(0);
  6172. SDValue Op1 = TheXor->getOperand(1);
  6173. if (Op0.getOpcode() == Op1.getOpcode()) {
  6174. // Avoid missing important xor optimizations.
  6175. SDValue Tmp = visitXOR(TheXor);
  6176. if (Tmp.getNode()) {
  6177. if (Tmp.getNode() != TheXor) {
  6178. DEBUG(dbgs() << "\nReplacing.8 ";
  6179. TheXor->dump(&DAG);
  6180. dbgs() << "\nWith: ";
  6181. Tmp.getNode()->dump(&DAG);
  6182. dbgs() << '\n');
  6183. WorkListRemover DeadNodes(*this);
  6184. DAG.ReplaceAllUsesOfValueWith(N1, Tmp);
  6185. removeFromWorkList(TheXor);
  6186. DAG.DeleteNode(TheXor);
  6187. return DAG.getNode(ISD::BRCOND, SDLoc(N),
  6188. MVT::Other, Chain, Tmp, N2);
  6189. }
  6190. // visitXOR has changed XOR's operands or replaced the XOR completely,
  6191. // bail out.
  6192. return SDValue(N, 0);
  6193. }
  6194. }
  6195. if (Op0.getOpcode() != ISD::SETCC && Op1.getOpcode() != ISD::SETCC) {
  6196. bool Equal = false;
  6197. if (ConstantSDNode *RHSCI = dyn_cast<ConstantSDNode>(Op0))
  6198. if (RHSCI->getAPIntValue() == 1 && Op0.hasOneUse() &&
  6199. Op0.getOpcode() == ISD::XOR) {
  6200. TheXor = Op0.getNode();
  6201. Equal = true;
  6202. }
  6203. EVT SetCCVT = N1.getValueType();
  6204. if (LegalTypes)
  6205. SetCCVT = getSetCCResultType(SetCCVT);
  6206. SDValue SetCC = DAG.getSetCC(SDLoc(TheXor),
  6207. SetCCVT,
  6208. Op0, Op1,
  6209. Equal ? ISD::SETEQ : ISD::SETNE);
  6210. // Replace the uses of XOR with SETCC
  6211. WorkListRemover DeadNodes(*this);
  6212. DAG.ReplaceAllUsesOfValueWith(N1, SetCC);
  6213. removeFromWorkList(N1.getNode());
  6214. DAG.DeleteNode(N1.getNode());
  6215. return DAG.getNode(ISD::BRCOND, SDLoc(N),
  6216. MVT::Other, Chain, SetCC, N2);
  6217. }
  6218. }
  6219. return SDValue();
  6220. }
  6221. // Operand List for BR_CC: Chain, CondCC, CondLHS, CondRHS, DestBB.
  6222. //
  6223. SDValue DAGCombiner::visitBR_CC(SDNode *N) {
  6224. CondCodeSDNode *CC = cast<CondCodeSDNode>(N->getOperand(1));
  6225. SDValue CondLHS = N->getOperand(2), CondRHS = N->getOperand(3);
  6226. // If N is a constant we could fold this into a fallthrough or unconditional
  6227. // branch. However that doesn't happen very often in normal code, because
  6228. // Instcombine/SimplifyCFG should have handled the available opportunities.
  6229. // If we did this folding here, it would be necessary to update the
  6230. // MachineBasicBlock CFG, which is awkward.
  6231. // Use SimplifySetCC to simplify SETCC's.
  6232. SDValue Simp = SimplifySetCC(getSetCCResultType(CondLHS.getValueType()),
  6233. CondLHS, CondRHS, CC->get(), SDLoc(N),
  6234. false);
  6235. if (Simp.getNode()) AddToWorkList(Simp.getNode());
  6236. // fold to a simpler setcc
  6237. if (Simp.getNode() && Simp.getOpcode() == ISD::SETCC)
  6238. return DAG.getNode(ISD::BR_CC, SDLoc(N), MVT::Other,
  6239. N->getOperand(0), Simp.getOperand(2),
  6240. Simp.getOperand(0), Simp.getOperand(1),
  6241. N->getOperand(4));
  6242. return SDValue();
  6243. }
  6244. /// canFoldInAddressingMode - Return true if 'Use' is a load or a store that
  6245. /// uses N as its base pointer and that N may be folded in the load / store
  6246. /// addressing mode.
  6247. static bool canFoldInAddressingMode(SDNode *N, SDNode *Use,
  6248. SelectionDAG &DAG,
  6249. const TargetLowering &TLI) {
  6250. EVT VT;
  6251. if (LoadSDNode *LD = dyn_cast<LoadSDNode>(Use)) {
  6252. if (LD->isIndexed() || LD->getBasePtr().getNode() != N)
  6253. return false;
  6254. VT = Use->getValueType(0);
  6255. } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(Use)) {
  6256. if (ST->isIndexed() || ST->getBasePtr().getNode() != N)
  6257. return false;
  6258. VT = ST->getValue().getValueType();
  6259. } else
  6260. return false;
  6261. TargetLowering::AddrMode AM;
  6262. if (N->getOpcode() == ISD::ADD) {
  6263. ConstantSDNode *Offset = dyn_cast<ConstantSDNode>(N->getOperand(1));
  6264. if (Offset)
  6265. // [reg +/- imm]
  6266. AM.BaseOffs = Offset->getSExtValue();
  6267. else
  6268. // [reg +/- reg]
  6269. AM.Scale = 1;
  6270. } else if (N->getOpcode() == ISD::SUB) {
  6271. ConstantSDNode *Offset = dyn_cast<ConstantSDNode>(N->getOperand(1));
  6272. if (Offset)
  6273. // [reg +/- imm]
  6274. AM.BaseOffs = -Offset->getSExtValue();
  6275. else
  6276. // [reg +/- reg]
  6277. AM.Scale = 1;
  6278. } else
  6279. return false;
  6280. return TLI.isLegalAddressingMode(AM, VT.getTypeForEVT(*DAG.getContext()));
  6281. }
  6282. /// CombineToPreIndexedLoadStore - Try turning a load / store into a
  6283. /// pre-indexed load / store when the base pointer is an add or subtract
  6284. /// and it has other uses besides the load / store. After the
  6285. /// transformation, the new indexed load / store has effectively folded
  6286. /// the add / subtract in and all of its other uses are redirected to the
  6287. /// new load / store.
  6288. bool DAGCombiner::CombineToPreIndexedLoadStore(SDNode *N) {
  6289. if (Level < AfterLegalizeDAG)
  6290. return false;
  6291. bool isLoad = true;
  6292. SDValue Ptr;
  6293. EVT VT;
  6294. if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {
  6295. if (LD->isIndexed())
  6296. return false;
  6297. VT = LD->getMemoryVT();
  6298. if (!TLI.isIndexedLoadLegal(ISD::PRE_INC, VT) &&
  6299. !TLI.isIndexedLoadLegal(ISD::PRE_DEC, VT))
  6300. return false;
  6301. Ptr = LD->getBasePtr();
  6302. } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) {
  6303. if (ST->isIndexed())
  6304. return false;
  6305. VT = ST->getMemoryVT();
  6306. if (!TLI.isIndexedStoreLegal(ISD::PRE_INC, VT) &&
  6307. !TLI.isIndexedStoreLegal(ISD::PRE_DEC, VT))
  6308. return false;
  6309. Ptr = ST->getBasePtr();
  6310. isLoad = false;
  6311. } else {
  6312. return false;
  6313. }
  6314. // If the pointer is not an add/sub, or if it doesn't have multiple uses, bail
  6315. // out. There is no reason to make this a preinc/predec.
  6316. if ((Ptr.getOpcode() != ISD::ADD && Ptr.getOpcode() != ISD::SUB) ||
  6317. Ptr.getNode()->hasOneUse())
  6318. return false;
  6319. // Ask the target to do addressing mode selection.
  6320. SDValue BasePtr;
  6321. SDValue Offset;
  6322. ISD::MemIndexedMode AM = ISD::UNINDEXED;
  6323. if (!TLI.getPreIndexedAddressParts(N, BasePtr, Offset, AM, DAG))
  6324. return false;
  6325. // Backends without true r+i pre-indexed forms may need to pass a
  6326. // constant base with a variable offset so that constant coercion
  6327. // will work with the patterns in canonical form.
  6328. bool Swapped = false;
  6329. if (isa<ConstantSDNode>(BasePtr)) {
  6330. std::swap(BasePtr, Offset);
  6331. Swapped = true;
  6332. }
  6333. // Don't create a indexed load / store with zero offset.
  6334. if (isa<ConstantSDNode>(Offset) &&
  6335. cast<ConstantSDNode>(Offset)->isNullValue())
  6336. return false;
  6337. // Try turning it into a pre-indexed load / store except when:
  6338. // 1) The new base ptr is a frame index.
  6339. // 2) If N is a store and the new base ptr is either the same as or is a
  6340. // predecessor of the value being stored.
  6341. // 3) Another use of old base ptr is a predecessor of N. If ptr is folded
  6342. // that would create a cycle.
  6343. // 4) All uses are load / store ops that use it as old base ptr.
  6344. // Check #1. Preinc'ing a frame index would require copying the stack pointer
  6345. // (plus the implicit offset) to a register to preinc anyway.
  6346. if (isa<FrameIndexSDNode>(BasePtr) || isa<RegisterSDNode>(BasePtr))
  6347. return false;
  6348. // Check #2.
  6349. if (!isLoad) {
  6350. SDValue Val = cast<StoreSDNode>(N)->getValue();
  6351. if (Val == BasePtr || BasePtr.getNode()->isPredecessorOf(Val.getNode()))
  6352. return false;
  6353. }
  6354. // If the offset is a constant, there may be other adds of constants that
  6355. // can be folded with this one. We should do this to avoid having to keep
  6356. // a copy of the original base pointer.
  6357. SmallVector<SDNode *, 16> OtherUses;
  6358. if (isa<ConstantSDNode>(Offset))
  6359. for (SDNode::use_iterator I = BasePtr.getNode()->use_begin(),
  6360. E = BasePtr.getNode()->use_end(); I != E; ++I) {
  6361. SDNode *Use = *I;
  6362. if (Use == Ptr.getNode())
  6363. continue;
  6364. if (Use->isPredecessorOf(N))
  6365. continue;
  6366. if (Use->getOpcode() != ISD::ADD && Use->getOpcode() != ISD::SUB) {
  6367. OtherUses.clear();
  6368. break;
  6369. }
  6370. SDValue Op0 = Use->getOperand(0), Op1 = Use->getOperand(1);
  6371. if (Op1.getNode() == BasePtr.getNode())
  6372. std::swap(Op0, Op1);
  6373. assert(Op0.getNode() == BasePtr.getNode() &&
  6374. "Use of ADD/SUB but not an operand");
  6375. if (!isa<ConstantSDNode>(Op1)) {
  6376. OtherUses.clear();
  6377. break;
  6378. }
  6379. // FIXME: In some cases, we can be smarter about this.
  6380. if (Op1.getValueType() != Offset.getValueType()) {
  6381. OtherUses.clear();
  6382. break;
  6383. }
  6384. OtherUses.push_back(Use);
  6385. }
  6386. if (Swapped)
  6387. std::swap(BasePtr, Offset);
  6388. // Now check for #3 and #4.
  6389. bool RealUse = false;
  6390. // Caches for hasPredecessorHelper
  6391. SmallPtrSet<const SDNode *, 32> Visited;
  6392. SmallVector<const SDNode *, 16> Worklist;
  6393. for (SDNode::use_iterator I = Ptr.getNode()->use_begin(),
  6394. E = Ptr.getNode()->use_end(); I != E; ++I) {
  6395. SDNode *Use = *I;
  6396. if (Use == N)
  6397. continue;
  6398. if (N->hasPredecessorHelper(Use, Visited, Worklist))
  6399. return false;
  6400. // If Ptr may be folded in addressing mode of other use, then it's
  6401. // not profitable to do this transformation.
  6402. if (!canFoldInAddressingMode(Ptr.getNode(), Use, DAG, TLI))
  6403. RealUse = true;
  6404. }
  6405. if (!RealUse)
  6406. return false;
  6407. SDValue Result;
  6408. if (isLoad)
  6409. Result = DAG.getIndexedLoad(SDValue(N,0), SDLoc(N),
  6410. BasePtr, Offset, AM);
  6411. else
  6412. Result = DAG.getIndexedStore(SDValue(N,0), SDLoc(N),
  6413. BasePtr, Offset, AM);
  6414. ++PreIndexedNodes;
  6415. ++NodesCombined;
  6416. DEBUG(dbgs() << "\nReplacing.4 ";
  6417. N->dump(&DAG);
  6418. dbgs() << "\nWith: ";
  6419. Result.getNode()->dump(&DAG);
  6420. dbgs() << '\n');
  6421. WorkListRemover DeadNodes(*this);
  6422. if (isLoad) {
  6423. DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result.getValue(0));
  6424. DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Result.getValue(2));
  6425. } else {
  6426. DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result.getValue(1));
  6427. }
  6428. // Finally, since the node is now dead, remove it from the graph.
  6429. DAG.DeleteNode(N);
  6430. if (Swapped)
  6431. std::swap(BasePtr, Offset);
  6432. // Replace other uses of BasePtr that can be updated to use Ptr
  6433. for (unsigned i = 0, e = OtherUses.size(); i != e; ++i) {
  6434. unsigned OffsetIdx = 1;
  6435. if (OtherUses[i]->getOperand(OffsetIdx).getNode() == BasePtr.getNode())
  6436. OffsetIdx = 0;
  6437. assert(OtherUses[i]->getOperand(!OffsetIdx).getNode() ==
  6438. BasePtr.getNode() && "Expected BasePtr operand");
  6439. // We need to replace ptr0 in the following expression:
  6440. // x0 * offset0 + y0 * ptr0 = t0
  6441. // knowing that
  6442. // x1 * offset1 + y1 * ptr0 = t1 (the indexed load/store)
  6443. //
  6444. // where x0, x1, y0 and y1 in {-1, 1} are given by the types of the
  6445. // indexed load/store and the expresion that needs to be re-written.
  6446. //
  6447. // Therefore, we have:
  6448. // t0 = (x0 * offset0 - x1 * y0 * y1 *offset1) + (y0 * y1) * t1
  6449. ConstantSDNode *CN =
  6450. cast<ConstantSDNode>(OtherUses[i]->getOperand(OffsetIdx));
  6451. int X0, X1, Y0, Y1;
  6452. APInt Offset0 = CN->getAPIntValue();
  6453. APInt Offset1 = cast<ConstantSDNode>(Offset)->getAPIntValue();
  6454. X0 = (OtherUses[i]->getOpcode() == ISD::SUB && OffsetIdx == 1) ? -1 : 1;
  6455. Y0 = (OtherUses[i]->getOpcode() == ISD::SUB && OffsetIdx == 0) ? -1 : 1;
  6456. X1 = (AM == ISD::PRE_DEC && !Swapped) ? -1 : 1;
  6457. Y1 = (AM == ISD::PRE_DEC && Swapped) ? -1 : 1;
  6458. unsigned Opcode = (Y0 * Y1 < 0) ? ISD::SUB : ISD::ADD;
  6459. APInt CNV = Offset0;
  6460. if (X0 < 0) CNV = -CNV;
  6461. if (X1 * Y0 * Y1 < 0) CNV = CNV + Offset1;
  6462. else CNV = CNV - Offset1;
  6463. // We can now generate the new expression.
  6464. SDValue NewOp1 = DAG.getConstant(CNV, CN->getValueType(0));
  6465. SDValue NewOp2 = Result.getValue(isLoad ? 1 : 0);
  6466. SDValue NewUse = DAG.getNode(Opcode,
  6467. SDLoc(OtherUses[i]),
  6468. OtherUses[i]->getValueType(0), NewOp1, NewOp2);
  6469. DAG.ReplaceAllUsesOfValueWith(SDValue(OtherUses[i], 0), NewUse);
  6470. removeFromWorkList(OtherUses[i]);
  6471. DAG.DeleteNode(OtherUses[i]);
  6472. }
  6473. // Replace the uses of Ptr with uses of the updated base value.
  6474. DAG.ReplaceAllUsesOfValueWith(Ptr, Result.getValue(isLoad ? 1 : 0));
  6475. removeFromWorkList(Ptr.getNode());
  6476. DAG.DeleteNode(Ptr.getNode());
  6477. return true;
  6478. }
  6479. /// CombineToPostIndexedLoadStore - Try to combine a load / store with a
  6480. /// add / sub of the base pointer node into a post-indexed load / store.
  6481. /// The transformation folded the add / subtract into the new indexed
  6482. /// load / store effectively and all of its uses are redirected to the
  6483. /// new load / store.
  6484. bool DAGCombiner::CombineToPostIndexedLoadStore(SDNode *N) {
  6485. if (Level < AfterLegalizeDAG)
  6486. return false;
  6487. bool isLoad = true;
  6488. SDValue Ptr;
  6489. EVT VT;
  6490. if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {
  6491. if (LD->isIndexed())
  6492. return false;
  6493. VT = LD->getMemoryVT();
  6494. if (!TLI.isIndexedLoadLegal(ISD::POST_INC, VT) &&
  6495. !TLI.isIndexedLoadLegal(ISD::POST_DEC, VT))
  6496. return false;
  6497. Ptr = LD->getBasePtr();
  6498. } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) {
  6499. if (ST->isIndexed())
  6500. return false;
  6501. VT = ST->getMemoryVT();
  6502. if (!TLI.isIndexedStoreLegal(ISD::POST_INC, VT) &&
  6503. !TLI.isIndexedStoreLegal(ISD::POST_DEC, VT))
  6504. return false;
  6505. Ptr = ST->getBasePtr();
  6506. isLoad = false;
  6507. } else {
  6508. return false;
  6509. }
  6510. if (Ptr.getNode()->hasOneUse())
  6511. return false;
  6512. for (SDNode::use_iterator I = Ptr.getNode()->use_begin(),
  6513. E = Ptr.getNode()->use_end(); I != E; ++I) {
  6514. SDNode *Op = *I;
  6515. if (Op == N ||
  6516. (Op->getOpcode() != ISD::ADD && Op->getOpcode() != ISD::SUB))
  6517. continue;
  6518. SDValue BasePtr;
  6519. SDValue Offset;
  6520. ISD::MemIndexedMode AM = ISD::UNINDEXED;
  6521. if (TLI.getPostIndexedAddressParts(N, Op, BasePtr, Offset, AM, DAG)) {
  6522. // Don't create a indexed load / store with zero offset.
  6523. if (isa<ConstantSDNode>(Offset) &&
  6524. cast<ConstantSDNode>(Offset)->isNullValue())
  6525. continue;
  6526. // Try turning it into a post-indexed load / store except when
  6527. // 1) All uses are load / store ops that use it as base ptr (and
  6528. // it may be folded as addressing mmode).
  6529. // 2) Op must be independent of N, i.e. Op is neither a predecessor
  6530. // nor a successor of N. Otherwise, if Op is folded that would
  6531. // create a cycle.
  6532. if (isa<FrameIndexSDNode>(BasePtr) || isa<RegisterSDNode>(BasePtr))
  6533. continue;
  6534. // Check for #1.
  6535. bool TryNext = false;
  6536. for (SDNode::use_iterator II = BasePtr.getNode()->use_begin(),
  6537. EE = BasePtr.getNode()->use_end(); II != EE; ++II) {
  6538. SDNode *Use = *II;
  6539. if (Use == Ptr.getNode())
  6540. continue;
  6541. // If all the uses are load / store addresses, then don't do the
  6542. // transformation.
  6543. if (Use->getOpcode() == ISD::ADD || Use->getOpcode() == ISD::SUB){
  6544. bool RealUse = false;
  6545. for (SDNode::use_iterator III = Use->use_begin(),
  6546. EEE = Use->use_end(); III != EEE; ++III) {
  6547. SDNode *UseUse = *III;
  6548. if (!canFoldInAddressingMode(Use, UseUse, DAG, TLI))
  6549. RealUse = true;
  6550. }
  6551. if (!RealUse) {
  6552. TryNext = true;
  6553. break;
  6554. }
  6555. }
  6556. }
  6557. if (TryNext)
  6558. continue;
  6559. // Check for #2
  6560. if (!Op->isPredecessorOf(N) && !N->isPredecessorOf(Op)) {
  6561. SDValue Result = isLoad
  6562. ? DAG.getIndexedLoad(SDValue(N,0), SDLoc(N),
  6563. BasePtr, Offset, AM)
  6564. : DAG.getIndexedStore(SDValue(N,0), SDLoc(N),
  6565. BasePtr, Offset, AM);
  6566. ++PostIndexedNodes;
  6567. ++NodesCombined;
  6568. DEBUG(dbgs() << "\nReplacing.5 ";
  6569. N->dump(&DAG);
  6570. dbgs() << "\nWith: ";
  6571. Result.getNode()->dump(&DAG);
  6572. dbgs() << '\n');
  6573. WorkListRemover DeadNodes(*this);
  6574. if (isLoad) {
  6575. DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result.getValue(0));
  6576. DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Result.getValue(2));
  6577. } else {
  6578. DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result.getValue(1));
  6579. }
  6580. // Finally, since the node is now dead, remove it from the graph.
  6581. DAG.DeleteNode(N);
  6582. // Replace the uses of Use with uses of the updated base value.
  6583. DAG.ReplaceAllUsesOfValueWith(SDValue(Op, 0),
  6584. Result.getValue(isLoad ? 1 : 0));
  6585. removeFromWorkList(Op);
  6586. DAG.DeleteNode(Op);
  6587. return true;
  6588. }
  6589. }
  6590. }
  6591. return false;
  6592. }
  6593. SDValue DAGCombiner::visitLOAD(SDNode *N) {
  6594. LoadSDNode *LD = cast<LoadSDNode>(N);
  6595. SDValue Chain = LD->getChain();
  6596. SDValue Ptr = LD->getBasePtr();
  6597. // If load is not volatile and there are no uses of the loaded value (and
  6598. // the updated indexed value in case of indexed loads), change uses of the
  6599. // chain value into uses of the chain input (i.e. delete the dead load).
  6600. if (!LD->isVolatile()) {
  6601. if (N->getValueType(1) == MVT::Other) {
  6602. // Unindexed loads.
  6603. if (!N->hasAnyUseOfValue(0)) {
  6604. // It's not safe to use the two value CombineTo variant here. e.g.
  6605. // v1, chain2 = load chain1, loc
  6606. // v2, chain3 = load chain2, loc
  6607. // v3 = add v2, c
  6608. // Now we replace use of chain2 with chain1. This makes the second load
  6609. // isomorphic to the one we are deleting, and thus makes this load live.
  6610. DEBUG(dbgs() << "\nReplacing.6 ";
  6611. N->dump(&DAG);
  6612. dbgs() << "\nWith chain: ";
  6613. Chain.getNode()->dump(&DAG);
  6614. dbgs() << "\n");
  6615. WorkListRemover DeadNodes(*this);
  6616. DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Chain);
  6617. if (N->use_empty()) {
  6618. removeFromWorkList(N);
  6619. DAG.DeleteNode(N);
  6620. }
  6621. return SDValue(N, 0); // Return N so it doesn't get rechecked!
  6622. }
  6623. } else {
  6624. // Indexed loads.
  6625. assert(N->getValueType(2) == MVT::Other && "Malformed indexed loads?");
  6626. if (!N->hasAnyUseOfValue(0) && !N->hasAnyUseOfValue(1)) {
  6627. SDValue Undef = DAG.getUNDEF(N->getValueType(0));
  6628. DEBUG(dbgs() << "\nReplacing.7 ";
  6629. N->dump(&DAG);
  6630. dbgs() << "\nWith: ";
  6631. Undef.getNode()->dump(&DAG);
  6632. dbgs() << " and 2 other values\n");
  6633. WorkListRemover DeadNodes(*this);
  6634. DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Undef);
  6635. DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1),
  6636. DAG.getUNDEF(N->getValueType(1)));
  6637. DAG.ReplaceAllUsesOfValueWith(SDValue(N, 2), Chain);
  6638. removeFromWorkList(N);
  6639. DAG.DeleteNode(N);
  6640. return SDValue(N, 0); // Return N so it doesn't get rechecked!
  6641. }
  6642. }
  6643. }
  6644. // If this load is directly stored, replace the load value with the stored
  6645. // value.
  6646. // TODO: Handle store large -> read small portion.
  6647. // TODO: Handle TRUNCSTORE/LOADEXT
  6648. if (ISD::isNormalLoad(N) && !LD->isVolatile()) {
  6649. if (ISD::isNON_TRUNCStore(Chain.getNode())) {
  6650. StoreSDNode *PrevST = cast<StoreSDNode>(Chain);
  6651. if (PrevST->getBasePtr() == Ptr &&
  6652. PrevST->getValue().getValueType() == N->getValueType(0))
  6653. return CombineTo(N, Chain.getOperand(1), Chain);
  6654. }
  6655. }
  6656. // Try to infer better alignment information than the load already has.
  6657. if (OptLevel != CodeGenOpt::None && LD->isUnindexed()) {
  6658. if (unsigned Align = DAG.InferPtrAlignment(Ptr)) {
  6659. if (Align > LD->getMemOperand()->getBaseAlignment()) {
  6660. SDValue NewLoad =
  6661. DAG.getExtLoad(LD->getExtensionType(), SDLoc(N),
  6662. LD->getValueType(0),
  6663. Chain, Ptr, LD->getPointerInfo(),
  6664. LD->getMemoryVT(),
  6665. LD->isVolatile(), LD->isNonTemporal(), Align,
  6666. LD->getTBAAInfo());
  6667. return CombineTo(N, NewLoad, SDValue(NewLoad.getNode(), 1), true);
  6668. }
  6669. }
  6670. }
  6671. bool UseAA = CombinerAA.getNumOccurrences() > 0 ? CombinerAA :
  6672. TLI.getTargetMachine().getSubtarget<TargetSubtargetInfo>().useAA();
  6673. if (UseAA) {
  6674. // Walk up chain skipping non-aliasing memory nodes.
  6675. SDValue BetterChain = FindBetterChain(N, Chain);
  6676. // If there is a better chain.
  6677. if (Chain != BetterChain) {
  6678. SDValue ReplLoad;
  6679. // Replace the chain to void dependency.
  6680. if (LD->getExtensionType() == ISD::NON_EXTLOAD) {
  6681. ReplLoad = DAG.getLoad(N->getValueType(0), SDLoc(LD),
  6682. BetterChain, Ptr, LD->getMemOperand());
  6683. } else {
  6684. ReplLoad = DAG.getExtLoad(LD->getExtensionType(), SDLoc(LD),
  6685. LD->getValueType(0),
  6686. BetterChain, Ptr, LD->getMemoryVT(),
  6687. LD->getMemOperand());
  6688. }
  6689. // Create token factor to keep old chain connected.
  6690. SDValue Token = DAG.getNode(ISD::TokenFactor, SDLoc(N),
  6691. MVT::Other, Chain, ReplLoad.getValue(1));
  6692. // Make sure the new and old chains are cleaned up.
  6693. AddToWorkList(Token.getNode());
  6694. // Replace uses with load result and token factor. Don't add users
  6695. // to work list.
  6696. return CombineTo(N, ReplLoad.getValue(0), Token, false);
  6697. }
  6698. }
  6699. // Try transforming N to an indexed load.
  6700. if (CombineToPreIndexedLoadStore(N) || CombineToPostIndexedLoadStore(N))
  6701. return SDValue(N, 0);
  6702. // Try to slice up N to more direct loads if the slices are mapped to
  6703. // different register banks or pairing can take place.
  6704. if (SliceUpLoad(N))
  6705. return SDValue(N, 0);
  6706. return SDValue();
  6707. }
  6708. namespace {
  6709. /// \brief Helper structure used to slice a load in smaller loads.
  6710. /// Basically a slice is obtained from the following sequence:
  6711. /// Origin = load Ty1, Base
  6712. /// Shift = srl Ty1 Origin, CstTy Amount
  6713. /// Inst = trunc Shift to Ty2
  6714. ///
  6715. /// Then, it will be rewriten into:
  6716. /// Slice = load SliceTy, Base + SliceOffset
  6717. /// [Inst = zext Slice to Ty2], only if SliceTy <> Ty2
  6718. ///
  6719. /// SliceTy is deduced from the number of bits that are actually used to
  6720. /// build Inst.
  6721. struct LoadedSlice {
  6722. /// \brief Helper structure used to compute the cost of a slice.
  6723. struct Cost {
  6724. /// Are we optimizing for code size.
  6725. bool ForCodeSize;
  6726. /// Various cost.
  6727. unsigned Loads;
  6728. unsigned Truncates;
  6729. unsigned CrossRegisterBanksCopies;
  6730. unsigned ZExts;
  6731. unsigned Shift;
  6732. Cost(bool ForCodeSize = false)
  6733. : ForCodeSize(ForCodeSize), Loads(0), Truncates(0),
  6734. CrossRegisterBanksCopies(0), ZExts(0), Shift(0) {}
  6735. /// \brief Get the cost of one isolated slice.
  6736. Cost(const LoadedSlice &LS, bool ForCodeSize = false)
  6737. : ForCodeSize(ForCodeSize), Loads(1), Truncates(0),
  6738. CrossRegisterBanksCopies(0), ZExts(0), Shift(0) {
  6739. EVT TruncType = LS.Inst->getValueType(0);
  6740. EVT LoadedType = LS.getLoadedType();
  6741. if (TruncType != LoadedType &&
  6742. !LS.DAG->getTargetLoweringInfo().isZExtFree(LoadedType, TruncType))
  6743. ZExts = 1;
  6744. }
  6745. /// \brief Account for slicing gain in the current cost.
  6746. /// Slicing provide a few gains like removing a shift or a
  6747. /// truncate. This method allows to grow the cost of the original
  6748. /// load with the gain from this slice.
  6749. void addSliceGain(const LoadedSlice &LS) {
  6750. // Each slice saves a truncate.
  6751. const TargetLowering &TLI = LS.DAG->getTargetLoweringInfo();
  6752. if (!TLI.isTruncateFree(LS.Inst->getValueType(0),
  6753. LS.Inst->getOperand(0).getValueType()))
  6754. ++Truncates;
  6755. // If there is a shift amount, this slice gets rid of it.
  6756. if (LS.Shift)
  6757. ++Shift;
  6758. // If this slice can merge a cross register bank copy, account for it.
  6759. if (LS.canMergeExpensiveCrossRegisterBankCopy())
  6760. ++CrossRegisterBanksCopies;
  6761. }
  6762. Cost &operator+=(const Cost &RHS) {
  6763. Loads += RHS.Loads;
  6764. Truncates += RHS.Truncates;
  6765. CrossRegisterBanksCopies += RHS.CrossRegisterBanksCopies;
  6766. ZExts += RHS.ZExts;
  6767. Shift += RHS.Shift;
  6768. return *this;
  6769. }
  6770. bool operator==(const Cost &RHS) const {
  6771. return Loads == RHS.Loads && Truncates == RHS.Truncates &&
  6772. CrossRegisterBanksCopies == RHS.CrossRegisterBanksCopies &&
  6773. ZExts == RHS.ZExts && Shift == RHS.Shift;
  6774. }
  6775. bool operator!=(const Cost &RHS) const { return !(*this == RHS); }
  6776. bool operator<(const Cost &RHS) const {
  6777. // Assume cross register banks copies are as expensive as loads.
  6778. // FIXME: Do we want some more target hooks?
  6779. unsigned ExpensiveOpsLHS = Loads + CrossRegisterBanksCopies;
  6780. unsigned ExpensiveOpsRHS = RHS.Loads + RHS.CrossRegisterBanksCopies;
  6781. // Unless we are optimizing for code size, consider the
  6782. // expensive operation first.
  6783. if (!ForCodeSize && ExpensiveOpsLHS != ExpensiveOpsRHS)
  6784. return ExpensiveOpsLHS < ExpensiveOpsRHS;
  6785. return (Truncates + ZExts + Shift + ExpensiveOpsLHS) <
  6786. (RHS.Truncates + RHS.ZExts + RHS.Shift + ExpensiveOpsRHS);
  6787. }
  6788. bool operator>(const Cost &RHS) const { return RHS < *this; }
  6789. bool operator<=(const Cost &RHS) const { return !(RHS < *this); }
  6790. bool operator>=(const Cost &RHS) const { return !(*this < RHS); }
  6791. };
  6792. // The last instruction that represent the slice. This should be a
  6793. // truncate instruction.
  6794. SDNode *Inst;
  6795. // The original load instruction.
  6796. LoadSDNode *Origin;
  6797. // The right shift amount in bits from the original load.
  6798. unsigned Shift;
  6799. // The DAG from which Origin came from.
  6800. // This is used to get some contextual information about legal types, etc.
  6801. SelectionDAG *DAG;
  6802. LoadedSlice(SDNode *Inst = NULL, LoadSDNode *Origin = NULL,
  6803. unsigned Shift = 0, SelectionDAG *DAG = NULL)
  6804. : Inst(Inst), Origin(Origin), Shift(Shift), DAG(DAG) {}
  6805. LoadedSlice(const LoadedSlice &LS)
  6806. : Inst(LS.Inst), Origin(LS.Origin), Shift(LS.Shift), DAG(LS.DAG) {}
  6807. /// \brief Get the bits used in a chunk of bits \p BitWidth large.
  6808. /// \return Result is \p BitWidth and has used bits set to 1 and
  6809. /// not used bits set to 0.
  6810. APInt getUsedBits() const {
  6811. // Reproduce the trunc(lshr) sequence:
  6812. // - Start from the truncated value.
  6813. // - Zero extend to the desired bit width.
  6814. // - Shift left.
  6815. assert(Origin && "No original load to compare against.");
  6816. unsigned BitWidth = Origin->getValueSizeInBits(0);
  6817. assert(Inst && "This slice is not bound to an instruction");
  6818. assert(Inst->getValueSizeInBits(0) <= BitWidth &&
  6819. "Extracted slice is bigger than the whole type!");
  6820. APInt UsedBits(Inst->getValueSizeInBits(0), 0);
  6821. UsedBits.setAllBits();
  6822. UsedBits = UsedBits.zext(BitWidth);
  6823. UsedBits <<= Shift;
  6824. return UsedBits;
  6825. }
  6826. /// \brief Get the size of the slice to be loaded in bytes.
  6827. unsigned getLoadedSize() const {
  6828. unsigned SliceSize = getUsedBits().countPopulation();
  6829. assert(!(SliceSize & 0x7) && "Size is not a multiple of a byte.");
  6830. return SliceSize / 8;
  6831. }
  6832. /// \brief Get the type that will be loaded for this slice.
  6833. /// Note: This may not be the final type for the slice.
  6834. EVT getLoadedType() const {
  6835. assert(DAG && "Missing context");
  6836. LLVMContext &Ctxt = *DAG->getContext();
  6837. return EVT::getIntegerVT(Ctxt, getLoadedSize() * 8);
  6838. }
  6839. /// \brief Get the alignment of the load used for this slice.
  6840. unsigned getAlignment() const {
  6841. unsigned Alignment = Origin->getAlignment();
  6842. unsigned Offset = getOffsetFromBase();
  6843. if (Offset != 0)
  6844. Alignment = MinAlign(Alignment, Alignment + Offset);
  6845. return Alignment;
  6846. }
  6847. /// \brief Check if this slice can be rewritten with legal operations.
  6848. bool isLegal() const {
  6849. // An invalid slice is not legal.
  6850. if (!Origin || !Inst || !DAG)
  6851. return false;
  6852. // Offsets are for indexed load only, we do not handle that.
  6853. if (Origin->getOffset().getOpcode() != ISD::UNDEF)
  6854. return false;
  6855. const TargetLowering &TLI = DAG->getTargetLoweringInfo();
  6856. // Check that the type is legal.
  6857. EVT SliceType = getLoadedType();
  6858. if (!TLI.isTypeLegal(SliceType))
  6859. return false;
  6860. // Check that the load is legal for this type.
  6861. if (!TLI.isOperationLegal(ISD::LOAD, SliceType))
  6862. return false;
  6863. // Check that the offset can be computed.
  6864. // 1. Check its type.
  6865. EVT PtrType = Origin->getBasePtr().getValueType();
  6866. if (PtrType == MVT::Untyped || PtrType.isExtended())
  6867. return false;
  6868. // 2. Check that it fits in the immediate.
  6869. if (!TLI.isLegalAddImmediate(getOffsetFromBase()))
  6870. return false;
  6871. // 3. Check that the computation is legal.
  6872. if (!TLI.isOperationLegal(ISD::ADD, PtrType))
  6873. return false;
  6874. // Check that the zext is legal if it needs one.
  6875. EVT TruncateType = Inst->getValueType(0);
  6876. if (TruncateType != SliceType &&
  6877. !TLI.isOperationLegal(ISD::ZERO_EXTEND, TruncateType))
  6878. return false;
  6879. return true;
  6880. }
  6881. /// \brief Get the offset in bytes of this slice in the original chunk of
  6882. /// bits.
  6883. /// \pre DAG != NULL.
  6884. uint64_t getOffsetFromBase() const {
  6885. assert(DAG && "Missing context.");
  6886. bool IsBigEndian =
  6887. DAG->getTargetLoweringInfo().getDataLayout()->isBigEndian();
  6888. assert(!(Shift & 0x7) && "Shifts not aligned on Bytes are not supported.");
  6889. uint64_t Offset = Shift / 8;
  6890. unsigned TySizeInBytes = Origin->getValueSizeInBits(0) / 8;
  6891. assert(!(Origin->getValueSizeInBits(0) & 0x7) &&
  6892. "The size of the original loaded type is not a multiple of a"
  6893. " byte.");
  6894. // If Offset is bigger than TySizeInBytes, it means we are loading all
  6895. // zeros. This should have been optimized before in the process.
  6896. assert(TySizeInBytes > Offset &&
  6897. "Invalid shift amount for given loaded size");
  6898. if (IsBigEndian)
  6899. Offset = TySizeInBytes - Offset - getLoadedSize();
  6900. return Offset;
  6901. }
  6902. /// \brief Generate the sequence of instructions to load the slice
  6903. /// represented by this object and redirect the uses of this slice to
  6904. /// this new sequence of instructions.
  6905. /// \pre this->Inst && this->Origin are valid Instructions and this
  6906. /// object passed the legal check: LoadedSlice::isLegal returned true.
  6907. /// \return The last instruction of the sequence used to load the slice.
  6908. SDValue loadSlice() const {
  6909. assert(Inst && Origin && "Unable to replace a non-existing slice.");
  6910. const SDValue &OldBaseAddr = Origin->getBasePtr();
  6911. SDValue BaseAddr = OldBaseAddr;
  6912. // Get the offset in that chunk of bytes w.r.t. the endianess.
  6913. int64_t Offset = static_cast<int64_t>(getOffsetFromBase());
  6914. assert(Offset >= 0 && "Offset too big to fit in int64_t!");
  6915. if (Offset) {
  6916. // BaseAddr = BaseAddr + Offset.
  6917. EVT ArithType = BaseAddr.getValueType();
  6918. BaseAddr = DAG->getNode(ISD::ADD, SDLoc(Origin), ArithType, BaseAddr,
  6919. DAG->getConstant(Offset, ArithType));
  6920. }
  6921. // Create the type of the loaded slice according to its size.
  6922. EVT SliceType = getLoadedType();
  6923. // Create the load for the slice.
  6924. SDValue LastInst = DAG->getLoad(
  6925. SliceType, SDLoc(Origin), Origin->getChain(), BaseAddr,
  6926. Origin->getPointerInfo().getWithOffset(Offset), Origin->isVolatile(),
  6927. Origin->isNonTemporal(), Origin->isInvariant(), getAlignment());
  6928. // If the final type is not the same as the loaded type, this means that
  6929. // we have to pad with zero. Create a zero extend for that.
  6930. EVT FinalType = Inst->getValueType(0);
  6931. if (SliceType != FinalType)
  6932. LastInst =
  6933. DAG->getNode(ISD::ZERO_EXTEND, SDLoc(LastInst), FinalType, LastInst);
  6934. return LastInst;
  6935. }
  6936. /// \brief Check if this slice can be merged with an expensive cross register
  6937. /// bank copy. E.g.,
  6938. /// i = load i32
  6939. /// f = bitcast i32 i to float
  6940. bool canMergeExpensiveCrossRegisterBankCopy() const {
  6941. if (!Inst || !Inst->hasOneUse())
  6942. return false;
  6943. SDNode *Use = *Inst->use_begin();
  6944. if (Use->getOpcode() != ISD::BITCAST)
  6945. return false;
  6946. assert(DAG && "Missing context");
  6947. const TargetLowering &TLI = DAG->getTargetLoweringInfo();
  6948. EVT ResVT = Use->getValueType(0);
  6949. const TargetRegisterClass *ResRC = TLI.getRegClassFor(ResVT.getSimpleVT());
  6950. const TargetRegisterClass *ArgRC =
  6951. TLI.getRegClassFor(Use->getOperand(0).getValueType().getSimpleVT());
  6952. if (ArgRC == ResRC || !TLI.isOperationLegal(ISD::LOAD, ResVT))
  6953. return false;
  6954. // At this point, we know that we perform a cross-register-bank copy.
  6955. // Check if it is expensive.
  6956. const TargetRegisterInfo *TRI = TLI.getTargetMachine().getRegisterInfo();
  6957. // Assume bitcasts are cheap, unless both register classes do not
  6958. // explicitly share a common sub class.
  6959. if (!TRI || TRI->getCommonSubClass(ArgRC, ResRC))
  6960. return false;
  6961. // Check if it will be merged with the load.
  6962. // 1. Check the alignment constraint.
  6963. unsigned RequiredAlignment = TLI.getDataLayout()->getABITypeAlignment(
  6964. ResVT.getTypeForEVT(*DAG->getContext()));
  6965. if (RequiredAlignment > getAlignment())
  6966. return false;
  6967. // 2. Check that the load is a legal operation for that type.
  6968. if (!TLI.isOperationLegal(ISD::LOAD, ResVT))
  6969. return false;
  6970. // 3. Check that we do not have a zext in the way.
  6971. if (Inst->getValueType(0) != getLoadedType())
  6972. return false;
  6973. return true;
  6974. }
  6975. };
  6976. }
  6977. /// \brief Sorts LoadedSlice according to their offset.
  6978. struct LoadedSliceSorter {
  6979. bool operator()(const LoadedSlice &LHS, const LoadedSlice &RHS) {
  6980. assert(LHS.Origin == RHS.Origin && "Different bases not implemented.");
  6981. return LHS.getOffsetFromBase() < RHS.getOffsetFromBase();
  6982. }
  6983. };
  6984. /// \brief Check that all bits set in \p UsedBits form a dense region, i.e.,
  6985. /// \p UsedBits looks like 0..0 1..1 0..0.
  6986. static bool areUsedBitsDense(const APInt &UsedBits) {
  6987. // If all the bits are one, this is dense!
  6988. if (UsedBits.isAllOnesValue())
  6989. return true;
  6990. // Get rid of the unused bits on the right.
  6991. APInt NarrowedUsedBits = UsedBits.lshr(UsedBits.countTrailingZeros());
  6992. // Get rid of the unused bits on the left.
  6993. if (NarrowedUsedBits.countLeadingZeros())
  6994. NarrowedUsedBits = NarrowedUsedBits.trunc(NarrowedUsedBits.getActiveBits());
  6995. // Check that the chunk of bits is completely used.
  6996. return NarrowedUsedBits.isAllOnesValue();
  6997. }
  6998. /// \brief Check whether or not \p First and \p Second are next to each other
  6999. /// in memory. This means that there is no hole between the bits loaded
  7000. /// by \p First and the bits loaded by \p Second.
  7001. static bool areSlicesNextToEachOther(const LoadedSlice &First,
  7002. const LoadedSlice &Second) {
  7003. assert(First.Origin == Second.Origin && First.Origin &&
  7004. "Unable to match different memory origins.");
  7005. APInt UsedBits = First.getUsedBits();
  7006. assert((UsedBits & Second.getUsedBits()) == 0 &&
  7007. "Slices are not supposed to overlap.");
  7008. UsedBits |= Second.getUsedBits();
  7009. return areUsedBitsDense(UsedBits);
  7010. }
  7011. /// \brief Adjust the \p GlobalLSCost according to the target
  7012. /// paring capabilities and the layout of the slices.
  7013. /// \pre \p GlobalLSCost should account for at least as many loads as
  7014. /// there is in the slices in \p LoadedSlices.
  7015. static void adjustCostForPairing(SmallVectorImpl<LoadedSlice> &LoadedSlices,
  7016. LoadedSlice::Cost &GlobalLSCost) {
  7017. unsigned NumberOfSlices = LoadedSlices.size();
  7018. // If there is less than 2 elements, no pairing is possible.
  7019. if (NumberOfSlices < 2)
  7020. return;
  7021. // Sort the slices so that elements that are likely to be next to each
  7022. // other in memory are next to each other in the list.
  7023. std::sort(LoadedSlices.begin(), LoadedSlices.end(), LoadedSliceSorter());
  7024. const TargetLowering &TLI = LoadedSlices[0].DAG->getTargetLoweringInfo();
  7025. // First (resp. Second) is the first (resp. Second) potentially candidate
  7026. // to be placed in a paired load.
  7027. const LoadedSlice *First = NULL;
  7028. const LoadedSlice *Second = NULL;
  7029. for (unsigned CurrSlice = 0; CurrSlice < NumberOfSlices; ++CurrSlice,
  7030. // Set the beginning of the pair.
  7031. First = Second) {
  7032. Second = &LoadedSlices[CurrSlice];
  7033. // If First is NULL, it means we start a new pair.
  7034. // Get to the next slice.
  7035. if (!First)
  7036. continue;
  7037. EVT LoadedType = First->getLoadedType();
  7038. // If the types of the slices are different, we cannot pair them.
  7039. if (LoadedType != Second->getLoadedType())
  7040. continue;
  7041. // Check if the target supplies paired loads for this type.
  7042. unsigned RequiredAlignment = 0;
  7043. if (!TLI.hasPairedLoad(LoadedType, RequiredAlignment)) {
  7044. // move to the next pair, this type is hopeless.
  7045. Second = NULL;
  7046. continue;
  7047. }
  7048. // Check if we meet the alignment requirement.
  7049. if (RequiredAlignment > First->getAlignment())
  7050. continue;
  7051. // Check that both loads are next to each other in memory.
  7052. if (!areSlicesNextToEachOther(*First, *Second))
  7053. continue;
  7054. assert(GlobalLSCost.Loads > 0 && "We save more loads than we created!");
  7055. --GlobalLSCost.Loads;
  7056. // Move to the next pair.
  7057. Second = NULL;
  7058. }
  7059. }
  7060. /// \brief Check the profitability of all involved LoadedSlice.
  7061. /// Currently, it is considered profitable if there is exactly two
  7062. /// involved slices (1) which are (2) next to each other in memory, and
  7063. /// whose cost (\see LoadedSlice::Cost) is smaller than the original load (3).
  7064. ///
  7065. /// Note: The order of the elements in \p LoadedSlices may be modified, but not
  7066. /// the elements themselves.
  7067. ///
  7068. /// FIXME: When the cost model will be mature enough, we can relax
  7069. /// constraints (1) and (2).
  7070. static bool isSlicingProfitable(SmallVectorImpl<LoadedSlice> &LoadedSlices,
  7071. const APInt &UsedBits, bool ForCodeSize) {
  7072. unsigned NumberOfSlices = LoadedSlices.size();
  7073. if (StressLoadSlicing)
  7074. return NumberOfSlices > 1;
  7075. // Check (1).
  7076. if (NumberOfSlices != 2)
  7077. return false;
  7078. // Check (2).
  7079. if (!areUsedBitsDense(UsedBits))
  7080. return false;
  7081. // Check (3).
  7082. LoadedSlice::Cost OrigCost(ForCodeSize), GlobalSlicingCost(ForCodeSize);
  7083. // The original code has one big load.
  7084. OrigCost.Loads = 1;
  7085. for (unsigned CurrSlice = 0; CurrSlice < NumberOfSlices; ++CurrSlice) {
  7086. const LoadedSlice &LS = LoadedSlices[CurrSlice];
  7087. // Accumulate the cost of all the slices.
  7088. LoadedSlice::Cost SliceCost(LS, ForCodeSize);
  7089. GlobalSlicingCost += SliceCost;
  7090. // Account as cost in the original configuration the gain obtained
  7091. // with the current slices.
  7092. OrigCost.addSliceGain(LS);
  7093. }
  7094. // If the target supports paired load, adjust the cost accordingly.
  7095. adjustCostForPairing(LoadedSlices, GlobalSlicingCost);
  7096. return OrigCost > GlobalSlicingCost;
  7097. }
  7098. /// \brief If the given load, \p LI, is used only by trunc or trunc(lshr)
  7099. /// operations, split it in the various pieces being extracted.
  7100. ///
  7101. /// This sort of thing is introduced by SROA.
  7102. /// This slicing takes care not to insert overlapping loads.
  7103. /// \pre LI is a simple load (i.e., not an atomic or volatile load).
  7104. bool DAGCombiner::SliceUpLoad(SDNode *N) {
  7105. if (Level < AfterLegalizeDAG)
  7106. return false;
  7107. LoadSDNode *LD = cast<LoadSDNode>(N);
  7108. if (LD->isVolatile() || !ISD::isNormalLoad(LD) ||
  7109. !LD->getValueType(0).isInteger())
  7110. return false;
  7111. // Keep track of already used bits to detect overlapping values.
  7112. // In that case, we will just abort the transformation.
  7113. APInt UsedBits(LD->getValueSizeInBits(0), 0);
  7114. SmallVector<LoadedSlice, 4> LoadedSlices;
  7115. // Check if this load is used as several smaller chunks of bits.
  7116. // Basically, look for uses in trunc or trunc(lshr) and record a new chain
  7117. // of computation for each trunc.
  7118. for (SDNode::use_iterator UI = LD->use_begin(), UIEnd = LD->use_end();
  7119. UI != UIEnd; ++UI) {
  7120. // Skip the uses of the chain.
  7121. if (UI.getUse().getResNo() != 0)
  7122. continue;
  7123. SDNode *User = *UI;
  7124. unsigned Shift = 0;
  7125. // Check if this is a trunc(lshr).
  7126. if (User->getOpcode() == ISD::SRL && User->hasOneUse() &&
  7127. isa<ConstantSDNode>(User->getOperand(1))) {
  7128. Shift = cast<ConstantSDNode>(User->getOperand(1))->getZExtValue();
  7129. User = *User->use_begin();
  7130. }
  7131. // At this point, User is a Truncate, iff we encountered, trunc or
  7132. // trunc(lshr).
  7133. if (User->getOpcode() != ISD::TRUNCATE)
  7134. return false;
  7135. // The width of the type must be a power of 2 and greater than 8-bits.
  7136. // Otherwise the load cannot be represented in LLVM IR.
  7137. // Moreover, if we shifted with a non 8-bits multiple, the slice
  7138. // will be accross several bytes. We do not support that.
  7139. unsigned Width = User->getValueSizeInBits(0);
  7140. if (Width < 8 || !isPowerOf2_32(Width) || (Shift & 0x7))
  7141. return 0;
  7142. // Build the slice for this chain of computations.
  7143. LoadedSlice LS(User, LD, Shift, &DAG);
  7144. APInt CurrentUsedBits = LS.getUsedBits();
  7145. // Check if this slice overlaps with another.
  7146. if ((CurrentUsedBits & UsedBits) != 0)
  7147. return false;
  7148. // Update the bits used globally.
  7149. UsedBits |= CurrentUsedBits;
  7150. // Check if the new slice would be legal.
  7151. if (!LS.isLegal())
  7152. return false;
  7153. // Record the slice.
  7154. LoadedSlices.push_back(LS);
  7155. }
  7156. // Abort slicing if it does not seem to be profitable.
  7157. if (!isSlicingProfitable(LoadedSlices, UsedBits, ForCodeSize))
  7158. return false;
  7159. ++SlicedLoads;
  7160. // Rewrite each chain to use an independent load.
  7161. // By construction, each chain can be represented by a unique load.
  7162. // Prepare the argument for the new token factor for all the slices.
  7163. SmallVector<SDValue, 8> ArgChains;
  7164. for (SmallVectorImpl<LoadedSlice>::const_iterator
  7165. LSIt = LoadedSlices.begin(),
  7166. LSItEnd = LoadedSlices.end();
  7167. LSIt != LSItEnd; ++LSIt) {
  7168. SDValue SliceInst = LSIt->loadSlice();
  7169. CombineTo(LSIt->Inst, SliceInst, true);
  7170. if (SliceInst.getNode()->getOpcode() != ISD::LOAD)
  7171. SliceInst = SliceInst.getOperand(0);
  7172. assert(SliceInst->getOpcode() == ISD::LOAD &&
  7173. "It takes more than a zext to get to the loaded slice!!");
  7174. ArgChains.push_back(SliceInst.getValue(1));
  7175. }
  7176. SDValue Chain = DAG.getNode(ISD::TokenFactor, SDLoc(LD), MVT::Other,
  7177. &ArgChains[0], ArgChains.size());
  7178. DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Chain);
  7179. return true;
  7180. }
  7181. /// CheckForMaskedLoad - Check to see if V is (and load (ptr), imm), where the
  7182. /// load is having specific bytes cleared out. If so, return the byte size
  7183. /// being masked out and the shift amount.
  7184. static std::pair<unsigned, unsigned>
  7185. CheckForMaskedLoad(SDValue V, SDValue Ptr, SDValue Chain) {
  7186. std::pair<unsigned, unsigned> Result(0, 0);
  7187. // Check for the structure we're looking for.
  7188. if (V->getOpcode() != ISD::AND ||
  7189. !isa<ConstantSDNode>(V->getOperand(1)) ||
  7190. !ISD::isNormalLoad(V->getOperand(0).getNode()))
  7191. return Result;
  7192. // Check the chain and pointer.
  7193. LoadSDNode *LD = cast<LoadSDNode>(V->getOperand(0));
  7194. if (LD->getBasePtr() != Ptr) return Result; // Not from same pointer.
  7195. // The store should be chained directly to the load or be an operand of a
  7196. // tokenfactor.
  7197. if (LD == Chain.getNode())
  7198. ; // ok.
  7199. else if (Chain->getOpcode() != ISD::TokenFactor)
  7200. return Result; // Fail.
  7201. else {
  7202. bool isOk = false;
  7203. for (unsigned i = 0, e = Chain->getNumOperands(); i != e; ++i)
  7204. if (Chain->getOperand(i).getNode() == LD) {
  7205. isOk = true;
  7206. break;
  7207. }
  7208. if (!isOk) return Result;
  7209. }
  7210. // This only handles simple types.
  7211. if (V.getValueType() != MVT::i16 &&
  7212. V.getValueType() != MVT::i32 &&
  7213. V.getValueType() != MVT::i64)
  7214. return Result;
  7215. // Check the constant mask. Invert it so that the bits being masked out are
  7216. // 0 and the bits being kept are 1. Use getSExtValue so that leading bits
  7217. // follow the sign bit for uniformity.
  7218. uint64_t NotMask = ~cast<ConstantSDNode>(V->getOperand(1))->getSExtValue();
  7219. unsigned NotMaskLZ = countLeadingZeros(NotMask);
  7220. if (NotMaskLZ & 7) return Result; // Must be multiple of a byte.
  7221. unsigned NotMaskTZ = countTrailingZeros(NotMask);
  7222. if (NotMaskTZ & 7) return Result; // Must be multiple of a byte.
  7223. if (NotMaskLZ == 64) return Result; // All zero mask.
  7224. // See if we have a continuous run of bits. If so, we have 0*1+0*
  7225. if (CountTrailingOnes_64(NotMask >> NotMaskTZ)+NotMaskTZ+NotMaskLZ != 64)
  7226. return Result;
  7227. // Adjust NotMaskLZ down to be from the actual size of the int instead of i64.
  7228. if (V.getValueType() != MVT::i64 && NotMaskLZ)
  7229. NotMaskLZ -= 64-V.getValueSizeInBits();
  7230. unsigned MaskedBytes = (V.getValueSizeInBits()-NotMaskLZ-NotMaskTZ)/8;
  7231. switch (MaskedBytes) {
  7232. case 1:
  7233. case 2:
  7234. case 4: break;
  7235. default: return Result; // All one mask, or 5-byte mask.
  7236. }
  7237. // Verify that the first bit starts at a multiple of mask so that the access
  7238. // is aligned the same as the access width.
  7239. if (NotMaskTZ && NotMaskTZ/8 % MaskedBytes) return Result;
  7240. Result.first = MaskedBytes;
  7241. Result.second = NotMaskTZ/8;
  7242. return Result;
  7243. }
  7244. /// ShrinkLoadReplaceStoreWithStore - Check to see if IVal is something that
  7245. /// provides a value as specified by MaskInfo. If so, replace the specified
  7246. /// store with a narrower store of truncated IVal.
  7247. static SDNode *
  7248. ShrinkLoadReplaceStoreWithStore(const std::pair<unsigned, unsigned> &MaskInfo,
  7249. SDValue IVal, StoreSDNode *St,
  7250. DAGCombiner *DC) {
  7251. unsigned NumBytes = MaskInfo.first;
  7252. unsigned ByteShift = MaskInfo.second;
  7253. SelectionDAG &DAG = DC->getDAG();
  7254. // Check to see if IVal is all zeros in the part being masked in by the 'or'
  7255. // that uses this. If not, this is not a replacement.
  7256. APInt Mask = ~APInt::getBitsSet(IVal.getValueSizeInBits(),
  7257. ByteShift*8, (ByteShift+NumBytes)*8);
  7258. if (!DAG.MaskedValueIsZero(IVal, Mask)) return 0;
  7259. // Check that it is legal on the target to do this. It is legal if the new
  7260. // VT we're shrinking to (i8/i16/i32) is legal or we're still before type
  7261. // legalization.
  7262. MVT VT = MVT::getIntegerVT(NumBytes*8);
  7263. if (!DC->isTypeLegal(VT))
  7264. return 0;
  7265. // Okay, we can do this! Replace the 'St' store with a store of IVal that is
  7266. // shifted by ByteShift and truncated down to NumBytes.
  7267. if (ByteShift)
  7268. IVal = DAG.getNode(ISD::SRL, SDLoc(IVal), IVal.getValueType(), IVal,
  7269. DAG.getConstant(ByteShift*8,
  7270. DC->getShiftAmountTy(IVal.getValueType())));
  7271. // Figure out the offset for the store and the alignment of the access.
  7272. unsigned StOffset;
  7273. unsigned NewAlign = St->getAlignment();
  7274. if (DAG.getTargetLoweringInfo().isLittleEndian())
  7275. StOffset = ByteShift;
  7276. else
  7277. StOffset = IVal.getValueType().getStoreSize() - ByteShift - NumBytes;
  7278. SDValue Ptr = St->getBasePtr();
  7279. if (StOffset) {
  7280. Ptr = DAG.getNode(ISD::ADD, SDLoc(IVal), Ptr.getValueType(),
  7281. Ptr, DAG.getConstant(StOffset, Ptr.getValueType()));
  7282. NewAlign = MinAlign(NewAlign, StOffset);
  7283. }
  7284. // Truncate down to the new size.
  7285. IVal = DAG.getNode(ISD::TRUNCATE, SDLoc(IVal), VT, IVal);
  7286. ++OpsNarrowed;
  7287. return DAG.getStore(St->getChain(), SDLoc(St), IVal, Ptr,
  7288. St->getPointerInfo().getWithOffset(StOffset),
  7289. false, false, NewAlign).getNode();
  7290. }
  7291. /// ReduceLoadOpStoreWidth - Look for sequence of load / op / store where op is
  7292. /// one of 'or', 'xor', and 'and' of immediates. If 'op' is only touching some
  7293. /// of the loaded bits, try narrowing the load and store if it would end up
  7294. /// being a win for performance or code size.
  7295. SDValue DAGCombiner::ReduceLoadOpStoreWidth(SDNode *N) {
  7296. StoreSDNode *ST = cast<StoreSDNode>(N);
  7297. if (ST->isVolatile())
  7298. return SDValue();
  7299. SDValue Chain = ST->getChain();
  7300. SDValue Value = ST->getValue();
  7301. SDValue Ptr = ST->getBasePtr();
  7302. EVT VT = Value.getValueType();
  7303. if (ST->isTruncatingStore() || VT.isVector() || !Value.hasOneUse())
  7304. return SDValue();
  7305. unsigned Opc = Value.getOpcode();
  7306. // If this is "store (or X, Y), P" and X is "(and (load P), cst)", where cst
  7307. // is a byte mask indicating a consecutive number of bytes, check to see if
  7308. // Y is known to provide just those bytes. If so, we try to replace the
  7309. // load + replace + store sequence with a single (narrower) store, which makes
  7310. // the load dead.
  7311. if (Opc == ISD::OR) {
  7312. std::pair<unsigned, unsigned> MaskedLoad;
  7313. MaskedLoad = CheckForMaskedLoad(Value.getOperand(0), Ptr, Chain);
  7314. if (MaskedLoad.first)
  7315. if (SDNode *NewST = ShrinkLoadReplaceStoreWithStore(MaskedLoad,
  7316. Value.getOperand(1), ST,this))
  7317. return SDValue(NewST, 0);
  7318. // Or is commutative, so try swapping X and Y.
  7319. MaskedLoad = CheckForMaskedLoad(Value.getOperand(1), Ptr, Chain);
  7320. if (MaskedLoad.first)
  7321. if (SDNode *NewST = ShrinkLoadReplaceStoreWithStore(MaskedLoad,
  7322. Value.getOperand(0), ST,this))
  7323. return SDValue(NewST, 0);
  7324. }
  7325. if ((Opc != ISD::OR && Opc != ISD::XOR && Opc != ISD::AND) ||
  7326. Value.getOperand(1).getOpcode() != ISD::Constant)
  7327. return SDValue();
  7328. SDValue N0 = Value.getOperand(0);
  7329. if (ISD::isNormalLoad(N0.getNode()) && N0.hasOneUse() &&
  7330. Chain == SDValue(N0.getNode(), 1)) {
  7331. LoadSDNode *LD = cast<LoadSDNode>(N0);
  7332. if (LD->getBasePtr() != Ptr ||
  7333. LD->getPointerInfo().getAddrSpace() !=
  7334. ST->getPointerInfo().getAddrSpace())
  7335. return SDValue();
  7336. // Find the type to narrow it the load / op / store to.
  7337. SDValue N1 = Value.getOperand(1);
  7338. unsigned BitWidth = N1.getValueSizeInBits();
  7339. APInt Imm = cast<ConstantSDNode>(N1)->getAPIntValue();
  7340. if (Opc == ISD::AND)
  7341. Imm ^= APInt::getAllOnesValue(BitWidth);
  7342. if (Imm == 0 || Imm.isAllOnesValue())
  7343. return SDValue();
  7344. unsigned ShAmt = Imm.countTrailingZeros();
  7345. unsigned MSB = BitWidth - Imm.countLeadingZeros() - 1;
  7346. unsigned NewBW = NextPowerOf2(MSB - ShAmt);
  7347. EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), NewBW);
  7348. while (NewBW < BitWidth &&
  7349. !(TLI.isOperationLegalOrCustom(Opc, NewVT) &&
  7350. TLI.isNarrowingProfitable(VT, NewVT))) {
  7351. NewBW = NextPowerOf2(NewBW);
  7352. NewVT = EVT::getIntegerVT(*DAG.getContext(), NewBW);
  7353. }
  7354. if (NewBW >= BitWidth)
  7355. return SDValue();
  7356. // If the lsb changed does not start at the type bitwidth boundary,
  7357. // start at the previous one.
  7358. if (ShAmt % NewBW)
  7359. ShAmt = (((ShAmt + NewBW - 1) / NewBW) * NewBW) - NewBW;
  7360. APInt Mask = APInt::getBitsSet(BitWidth, ShAmt,
  7361. std::min(BitWidth, ShAmt + NewBW));
  7362. if ((Imm & Mask) == Imm) {
  7363. APInt NewImm = (Imm & Mask).lshr(ShAmt).trunc(NewBW);
  7364. if (Opc == ISD::AND)
  7365. NewImm ^= APInt::getAllOnesValue(NewBW);
  7366. uint64_t PtrOff = ShAmt / 8;
  7367. // For big endian targets, we need to adjust the offset to the pointer to
  7368. // load the correct bytes.
  7369. if (TLI.isBigEndian())
  7370. PtrOff = (BitWidth + 7 - NewBW) / 8 - PtrOff;
  7371. unsigned NewAlign = MinAlign(LD->getAlignment(), PtrOff);
  7372. Type *NewVTTy = NewVT.getTypeForEVT(*DAG.getContext());
  7373. if (NewAlign < TLI.getDataLayout()->getABITypeAlignment(NewVTTy))
  7374. return SDValue();
  7375. SDValue NewPtr = DAG.getNode(ISD::ADD, SDLoc(LD),
  7376. Ptr.getValueType(), Ptr,
  7377. DAG.getConstant(PtrOff, Ptr.getValueType()));
  7378. SDValue NewLD = DAG.getLoad(NewVT, SDLoc(N0),
  7379. LD->getChain(), NewPtr,
  7380. LD->getPointerInfo().getWithOffset(PtrOff),
  7381. LD->isVolatile(), LD->isNonTemporal(),
  7382. LD->isInvariant(), NewAlign,
  7383. LD->getTBAAInfo());
  7384. SDValue NewVal = DAG.getNode(Opc, SDLoc(Value), NewVT, NewLD,
  7385. DAG.getConstant(NewImm, NewVT));
  7386. SDValue NewST = DAG.getStore(Chain, SDLoc(N),
  7387. NewVal, NewPtr,
  7388. ST->getPointerInfo().getWithOffset(PtrOff),
  7389. false, false, NewAlign);
  7390. AddToWorkList(NewPtr.getNode());
  7391. AddToWorkList(NewLD.getNode());
  7392. AddToWorkList(NewVal.getNode());
  7393. WorkListRemover DeadNodes(*this);
  7394. DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), NewLD.getValue(1));
  7395. ++OpsNarrowed;
  7396. return NewST;
  7397. }
  7398. }
  7399. return SDValue();
  7400. }
  7401. /// TransformFPLoadStorePair - For a given floating point load / store pair,
  7402. /// if the load value isn't used by any other operations, then consider
  7403. /// transforming the pair to integer load / store operations if the target
  7404. /// deems the transformation profitable.
  7405. SDValue DAGCombiner::TransformFPLoadStorePair(SDNode *N) {
  7406. StoreSDNode *ST = cast<StoreSDNode>(N);
  7407. SDValue Chain = ST->getChain();
  7408. SDValue Value = ST->getValue();
  7409. if (ISD::isNormalStore(ST) && ISD::isNormalLoad(Value.getNode()) &&
  7410. Value.hasOneUse() &&
  7411. Chain == SDValue(Value.getNode(), 1)) {
  7412. LoadSDNode *LD = cast<LoadSDNode>(Value);
  7413. EVT VT = LD->getMemoryVT();
  7414. if (!VT.isFloatingPoint() ||
  7415. VT != ST->getMemoryVT() ||
  7416. LD->isNonTemporal() ||
  7417. ST->isNonTemporal() ||
  7418. LD->getPointerInfo().getAddrSpace() != 0 ||
  7419. ST->getPointerInfo().getAddrSpace() != 0)
  7420. return SDValue();
  7421. EVT IntVT = EVT::getIntegerVT(*DAG.getContext(), VT.getSizeInBits());
  7422. if (!TLI.isOperationLegal(ISD::LOAD, IntVT) ||
  7423. !TLI.isOperationLegal(ISD::STORE, IntVT) ||
  7424. !TLI.isDesirableToTransformToIntegerOp(ISD::LOAD, VT) ||
  7425. !TLI.isDesirableToTransformToIntegerOp(ISD::STORE, VT))
  7426. return SDValue();
  7427. unsigned LDAlign = LD->getAlignment();
  7428. unsigned STAlign = ST->getAlignment();
  7429. Type *IntVTTy = IntVT.getTypeForEVT(*DAG.getContext());
  7430. unsigned ABIAlign = TLI.getDataLayout()->getABITypeAlignment(IntVTTy);
  7431. if (LDAlign < ABIAlign || STAlign < ABIAlign)
  7432. return SDValue();
  7433. SDValue NewLD = DAG.getLoad(IntVT, SDLoc(Value),
  7434. LD->getChain(), LD->getBasePtr(),
  7435. LD->getPointerInfo(),
  7436. false, false, false, LDAlign);
  7437. SDValue NewST = DAG.getStore(NewLD.getValue(1), SDLoc(N),
  7438. NewLD, ST->getBasePtr(),
  7439. ST->getPointerInfo(),
  7440. false, false, STAlign);
  7441. AddToWorkList(NewLD.getNode());
  7442. AddToWorkList(NewST.getNode());
  7443. WorkListRemover DeadNodes(*this);
  7444. DAG.ReplaceAllUsesOfValueWith(Value.getValue(1), NewLD.getValue(1));
  7445. ++LdStFP2Int;
  7446. return NewST;
  7447. }
  7448. return SDValue();
  7449. }
  7450. /// Helper struct to parse and store a memory address as base + index + offset.
  7451. /// We ignore sign extensions when it is safe to do so.
  7452. /// The following two expressions are not equivalent. To differentiate we need
  7453. /// to store whether there was a sign extension involved in the index
  7454. /// computation.
  7455. /// (load (i64 add (i64 copyfromreg %c)
  7456. /// (i64 signextend (add (i8 load %index)
  7457. /// (i8 1))))
  7458. /// vs
  7459. ///
  7460. /// (load (i64 add (i64 copyfromreg %c)
  7461. /// (i64 signextend (i32 add (i32 signextend (i8 load %index))
  7462. /// (i32 1)))))
  7463. struct BaseIndexOffset {
  7464. SDValue Base;
  7465. SDValue Index;
  7466. int64_t Offset;
  7467. bool IsIndexSignExt;
  7468. BaseIndexOffset() : Offset(0), IsIndexSignExt(false) {}
  7469. BaseIndexOffset(SDValue Base, SDValue Index, int64_t Offset,
  7470. bool IsIndexSignExt) :
  7471. Base(Base), Index(Index), Offset(Offset), IsIndexSignExt(IsIndexSignExt) {}
  7472. bool equalBaseIndex(const BaseIndexOffset &Other) {
  7473. return Other.Base == Base && Other.Index == Index &&
  7474. Other.IsIndexSignExt == IsIndexSignExt;
  7475. }
  7476. /// Parses tree in Ptr for base, index, offset addresses.
  7477. static BaseIndexOffset match(SDValue Ptr) {
  7478. bool IsIndexSignExt = false;
  7479. // We only can pattern match BASE + INDEX + OFFSET. If Ptr is not an ADD
  7480. // instruction, then it could be just the BASE or everything else we don't
  7481. // know how to handle. Just use Ptr as BASE and give up.
  7482. if (Ptr->getOpcode() != ISD::ADD)
  7483. return BaseIndexOffset(Ptr, SDValue(), 0, IsIndexSignExt);
  7484. // We know that we have at least an ADD instruction. Try to pattern match
  7485. // the simple case of BASE + OFFSET.
  7486. if (isa<ConstantSDNode>(Ptr->getOperand(1))) {
  7487. int64_t Offset = cast<ConstantSDNode>(Ptr->getOperand(1))->getSExtValue();
  7488. return BaseIndexOffset(Ptr->getOperand(0), SDValue(), Offset,
  7489. IsIndexSignExt);
  7490. }
  7491. // Inside a loop the current BASE pointer is calculated using an ADD and a
  7492. // MUL instruction. In this case Ptr is the actual BASE pointer.
  7493. // (i64 add (i64 %array_ptr)
  7494. // (i64 mul (i64 %induction_var)
  7495. // (i64 %element_size)))
  7496. if (Ptr->getOperand(1)->getOpcode() == ISD::MUL)
  7497. return BaseIndexOffset(Ptr, SDValue(), 0, IsIndexSignExt);
  7498. // Look at Base + Index + Offset cases.
  7499. SDValue Base = Ptr->getOperand(0);
  7500. SDValue IndexOffset = Ptr->getOperand(1);
  7501. // Skip signextends.
  7502. if (IndexOffset->getOpcode() == ISD::SIGN_EXTEND) {
  7503. IndexOffset = IndexOffset->getOperand(0);
  7504. IsIndexSignExt = true;
  7505. }
  7506. // Either the case of Base + Index (no offset) or something else.
  7507. if (IndexOffset->getOpcode() != ISD::ADD)
  7508. return BaseIndexOffset(Base, IndexOffset, 0, IsIndexSignExt);
  7509. // Now we have the case of Base + Index + offset.
  7510. SDValue Index = IndexOffset->getOperand(0);
  7511. SDValue Offset = IndexOffset->getOperand(1);
  7512. if (!isa<ConstantSDNode>(Offset))
  7513. return BaseIndexOffset(Ptr, SDValue(), 0, IsIndexSignExt);
  7514. // Ignore signextends.
  7515. if (Index->getOpcode() == ISD::SIGN_EXTEND) {
  7516. Index = Index->getOperand(0);
  7517. IsIndexSignExt = true;
  7518. } else IsIndexSignExt = false;
  7519. int64_t Off = cast<ConstantSDNode>(Offset)->getSExtValue();
  7520. return BaseIndexOffset(Base, Index, Off, IsIndexSignExt);
  7521. }
  7522. };
  7523. /// Holds a pointer to an LSBaseSDNode as well as information on where it
  7524. /// is located in a sequence of memory operations connected by a chain.
  7525. struct MemOpLink {
  7526. MemOpLink (LSBaseSDNode *N, int64_t Offset, unsigned Seq):
  7527. MemNode(N), OffsetFromBase(Offset), SequenceNum(Seq) { }
  7528. // Ptr to the mem node.
  7529. LSBaseSDNode *MemNode;
  7530. // Offset from the base ptr.
  7531. int64_t OffsetFromBase;
  7532. // What is the sequence number of this mem node.
  7533. // Lowest mem operand in the DAG starts at zero.
  7534. unsigned SequenceNum;
  7535. };
  7536. /// Sorts store nodes in a link according to their offset from a shared
  7537. // base ptr.
  7538. struct ConsecutiveMemoryChainSorter {
  7539. bool operator()(MemOpLink LHS, MemOpLink RHS) {
  7540. return LHS.OffsetFromBase < RHS.OffsetFromBase;
  7541. }
  7542. };
  7543. bool DAGCombiner::MergeConsecutiveStores(StoreSDNode* St) {
  7544. EVT MemVT = St->getMemoryVT();
  7545. int64_t ElementSizeBytes = MemVT.getSizeInBits()/8;
  7546. bool NoVectors = DAG.getMachineFunction().getFunction()->getAttributes().
  7547. hasAttribute(AttributeSet::FunctionIndex, Attribute::NoImplicitFloat);
  7548. // Don't merge vectors into wider inputs.
  7549. if (MemVT.isVector() || !MemVT.isSimple())
  7550. return false;
  7551. // Perform an early exit check. Do not bother looking at stored values that
  7552. // are not constants or loads.
  7553. SDValue StoredVal = St->getValue();
  7554. bool IsLoadSrc = isa<LoadSDNode>(StoredVal);
  7555. if (!isa<ConstantSDNode>(StoredVal) && !isa<ConstantFPSDNode>(StoredVal) &&
  7556. !IsLoadSrc)
  7557. return false;
  7558. // Only look at ends of store sequences.
  7559. SDValue Chain = SDValue(St, 1);
  7560. if (Chain->hasOneUse() && Chain->use_begin()->getOpcode() == ISD::STORE)
  7561. return false;
  7562. // This holds the base pointer, index, and the offset in bytes from the base
  7563. // pointer.
  7564. BaseIndexOffset BasePtr = BaseIndexOffset::match(St->getBasePtr());
  7565. // We must have a base and an offset.
  7566. if (!BasePtr.Base.getNode())
  7567. return false;
  7568. // Do not handle stores to undef base pointers.
  7569. if (BasePtr.Base.getOpcode() == ISD::UNDEF)
  7570. return false;
  7571. // Save the LoadSDNodes that we find in the chain.
  7572. // We need to make sure that these nodes do not interfere with
  7573. // any of the store nodes.
  7574. SmallVector<LSBaseSDNode*, 8> AliasLoadNodes;
  7575. // Save the StoreSDNodes that we find in the chain.
  7576. SmallVector<MemOpLink, 8> StoreNodes;
  7577. // Walk up the chain and look for nodes with offsets from the same
  7578. // base pointer. Stop when reaching an instruction with a different kind
  7579. // or instruction which has a different base pointer.
  7580. unsigned Seq = 0;
  7581. StoreSDNode *Index = St;
  7582. while (Index) {
  7583. // If the chain has more than one use, then we can't reorder the mem ops.
  7584. if (Index != St && !SDValue(Index, 1)->hasOneUse())
  7585. break;
  7586. // Find the base pointer and offset for this memory node.
  7587. BaseIndexOffset Ptr = BaseIndexOffset::match(Index->getBasePtr());
  7588. // Check that the base pointer is the same as the original one.
  7589. if (!Ptr.equalBaseIndex(BasePtr))
  7590. break;
  7591. // Check that the alignment is the same.
  7592. if (Index->getAlignment() != St->getAlignment())
  7593. break;
  7594. // The memory operands must not be volatile.
  7595. if (Index->isVolatile() || Index->isIndexed())
  7596. break;
  7597. // No truncation.
  7598. if (StoreSDNode *St = dyn_cast<StoreSDNode>(Index))
  7599. if (St->isTruncatingStore())
  7600. break;
  7601. // The stored memory type must be the same.
  7602. if (Index->getMemoryVT() != MemVT)
  7603. break;
  7604. // We do not allow unaligned stores because we want to prevent overriding
  7605. // stores.
  7606. if (Index->getAlignment()*8 != MemVT.getSizeInBits())
  7607. break;
  7608. // We found a potential memory operand to merge.
  7609. StoreNodes.push_back(MemOpLink(Index, Ptr.Offset, Seq++));
  7610. // Find the next memory operand in the chain. If the next operand in the
  7611. // chain is a store then move up and continue the scan with the next
  7612. // memory operand. If the next operand is a load save it and use alias
  7613. // information to check if it interferes with anything.
  7614. SDNode *NextInChain = Index->getChain().getNode();
  7615. while (1) {
  7616. if (StoreSDNode *STn = dyn_cast<StoreSDNode>(NextInChain)) {
  7617. // We found a store node. Use it for the next iteration.
  7618. Index = STn;
  7619. break;
  7620. } else if (LoadSDNode *Ldn = dyn_cast<LoadSDNode>(NextInChain)) {
  7621. if (Ldn->isVolatile()) {
  7622. Index = NULL;
  7623. break;
  7624. }
  7625. // Save the load node for later. Continue the scan.
  7626. AliasLoadNodes.push_back(Ldn);
  7627. NextInChain = Ldn->getChain().getNode();
  7628. continue;
  7629. } else {
  7630. Index = NULL;
  7631. break;
  7632. }
  7633. }
  7634. }
  7635. // Check if there is anything to merge.
  7636. if (StoreNodes.size() < 2)
  7637. return false;
  7638. // Sort the memory operands according to their distance from the base pointer.
  7639. std::sort(StoreNodes.begin(), StoreNodes.end(),
  7640. ConsecutiveMemoryChainSorter());
  7641. // Scan the memory operations on the chain and find the first non-consecutive
  7642. // store memory address.
  7643. unsigned LastConsecutiveStore = 0;
  7644. int64_t StartAddress = StoreNodes[0].OffsetFromBase;
  7645. for (unsigned i = 0, e = StoreNodes.size(); i < e; ++i) {
  7646. // Check that the addresses are consecutive starting from the second
  7647. // element in the list of stores.
  7648. if (i > 0) {
  7649. int64_t CurrAddress = StoreNodes[i].OffsetFromBase;
  7650. if (CurrAddress - StartAddress != (ElementSizeBytes * i))
  7651. break;
  7652. }
  7653. bool Alias = false;
  7654. // Check if this store interferes with any of the loads that we found.
  7655. for (unsigned ld = 0, lde = AliasLoadNodes.size(); ld < lde; ++ld)
  7656. if (isAlias(AliasLoadNodes[ld], StoreNodes[i].MemNode)) {
  7657. Alias = true;
  7658. break;
  7659. }
  7660. // We found a load that alias with this store. Stop the sequence.
  7661. if (Alias)
  7662. break;
  7663. // Mark this node as useful.
  7664. LastConsecutiveStore = i;
  7665. }
  7666. // The node with the lowest store address.
  7667. LSBaseSDNode *FirstInChain = StoreNodes[0].MemNode;
  7668. // Store the constants into memory as one consecutive store.
  7669. if (!IsLoadSrc) {
  7670. unsigned LastLegalType = 0;
  7671. unsigned LastLegalVectorType = 0;
  7672. bool NonZero = false;
  7673. for (unsigned i=0; i<LastConsecutiveStore+1; ++i) {
  7674. StoreSDNode *St = cast<StoreSDNode>(StoreNodes[i].MemNode);
  7675. SDValue StoredVal = St->getValue();
  7676. if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(StoredVal)) {
  7677. NonZero |= !C->isNullValue();
  7678. } else if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(StoredVal)) {
  7679. NonZero |= !C->getConstantFPValue()->isNullValue();
  7680. } else {
  7681. // Non constant.
  7682. break;
  7683. }
  7684. // Find a legal type for the constant store.
  7685. unsigned StoreBW = (i+1) * ElementSizeBytes * 8;
  7686. EVT StoreTy = EVT::getIntegerVT(*DAG.getContext(), StoreBW);
  7687. if (TLI.isTypeLegal(StoreTy))
  7688. LastLegalType = i+1;
  7689. // Or check whether a truncstore is legal.
  7690. else if (TLI.getTypeAction(*DAG.getContext(), StoreTy) ==
  7691. TargetLowering::TypePromoteInteger) {
  7692. EVT LegalizedStoredValueTy =
  7693. TLI.getTypeToTransformTo(*DAG.getContext(), StoredVal.getValueType());
  7694. if (TLI.isTruncStoreLegal(LegalizedStoredValueTy, StoreTy))
  7695. LastLegalType = i+1;
  7696. }
  7697. // Find a legal type for the vector store.
  7698. EVT Ty = EVT::getVectorVT(*DAG.getContext(), MemVT, i+1);
  7699. if (TLI.isTypeLegal(Ty))
  7700. LastLegalVectorType = i + 1;
  7701. }
  7702. // We only use vectors if the constant is known to be zero and the
  7703. // function is not marked with the noimplicitfloat attribute.
  7704. if (NonZero || NoVectors)
  7705. LastLegalVectorType = 0;
  7706. // Check if we found a legal integer type to store.
  7707. if (LastLegalType == 0 && LastLegalVectorType == 0)
  7708. return false;
  7709. bool UseVector = (LastLegalVectorType > LastLegalType) && !NoVectors;
  7710. unsigned NumElem = UseVector ? LastLegalVectorType : LastLegalType;
  7711. // Make sure we have something to merge.
  7712. if (NumElem < 2)
  7713. return false;
  7714. unsigned EarliestNodeUsed = 0;
  7715. for (unsigned i=0; i < NumElem; ++i) {
  7716. // Find a chain for the new wide-store operand. Notice that some
  7717. // of the store nodes that we found may not be selected for inclusion
  7718. // in the wide store. The chain we use needs to be the chain of the
  7719. // earliest store node which is *used* and replaced by the wide store.
  7720. if (StoreNodes[i].SequenceNum > StoreNodes[EarliestNodeUsed].SequenceNum)
  7721. EarliestNodeUsed = i;
  7722. }
  7723. // The earliest Node in the DAG.
  7724. LSBaseSDNode *EarliestOp = StoreNodes[EarliestNodeUsed].MemNode;
  7725. SDLoc DL(StoreNodes[0].MemNode);
  7726. SDValue StoredVal;
  7727. if (UseVector) {
  7728. // Find a legal type for the vector store.
  7729. EVT Ty = EVT::getVectorVT(*DAG.getContext(), MemVT, NumElem);
  7730. assert(TLI.isTypeLegal(Ty) && "Illegal vector store");
  7731. StoredVal = DAG.getConstant(0, Ty);
  7732. } else {
  7733. unsigned StoreBW = NumElem * ElementSizeBytes * 8;
  7734. APInt StoreInt(StoreBW, 0);
  7735. // Construct a single integer constant which is made of the smaller
  7736. // constant inputs.
  7737. bool IsLE = TLI.isLittleEndian();
  7738. for (unsigned i = 0; i < NumElem ; ++i) {
  7739. unsigned Idx = IsLE ?(NumElem - 1 - i) : i;
  7740. StoreSDNode *St = cast<StoreSDNode>(StoreNodes[Idx].MemNode);
  7741. SDValue Val = St->getValue();
  7742. StoreInt<<=ElementSizeBytes*8;
  7743. if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Val)) {
  7744. StoreInt|=C->getAPIntValue().zext(StoreBW);
  7745. } else if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(Val)) {
  7746. StoreInt|= C->getValueAPF().bitcastToAPInt().zext(StoreBW);
  7747. } else {
  7748. assert(false && "Invalid constant element type");
  7749. }
  7750. }
  7751. // Create the new Load and Store operations.
  7752. EVT StoreTy = EVT::getIntegerVT(*DAG.getContext(), StoreBW);
  7753. StoredVal = DAG.getConstant(StoreInt, StoreTy);
  7754. }
  7755. SDValue NewStore = DAG.getStore(EarliestOp->getChain(), DL, StoredVal,
  7756. FirstInChain->getBasePtr(),
  7757. FirstInChain->getPointerInfo(),
  7758. false, false,
  7759. FirstInChain->getAlignment());
  7760. // Replace the first store with the new store
  7761. CombineTo(EarliestOp, NewStore);
  7762. // Erase all other stores.
  7763. for (unsigned i = 0; i < NumElem ; ++i) {
  7764. if (StoreNodes[i].MemNode == EarliestOp)
  7765. continue;
  7766. StoreSDNode *St = cast<StoreSDNode>(StoreNodes[i].MemNode);
  7767. // ReplaceAllUsesWith will replace all uses that existed when it was
  7768. // called, but graph optimizations may cause new ones to appear. For
  7769. // example, the case in pr14333 looks like
  7770. //
  7771. // St's chain -> St -> another store -> X
  7772. //
  7773. // And the only difference from St to the other store is the chain.
  7774. // When we change it's chain to be St's chain they become identical,
  7775. // get CSEed and the net result is that X is now a use of St.
  7776. // Since we know that St is redundant, just iterate.
  7777. while (!St->use_empty())
  7778. DAG.ReplaceAllUsesWith(SDValue(St, 0), St->getChain());
  7779. removeFromWorkList(St);
  7780. DAG.DeleteNode(St);
  7781. }
  7782. return true;
  7783. }
  7784. // Below we handle the case of multiple consecutive stores that
  7785. // come from multiple consecutive loads. We merge them into a single
  7786. // wide load and a single wide store.
  7787. // Look for load nodes which are used by the stored values.
  7788. SmallVector<MemOpLink, 8> LoadNodes;
  7789. // Find acceptable loads. Loads need to have the same chain (token factor),
  7790. // must not be zext, volatile, indexed, and they must be consecutive.
  7791. BaseIndexOffset LdBasePtr;
  7792. for (unsigned i=0; i<LastConsecutiveStore+1; ++i) {
  7793. StoreSDNode *St = cast<StoreSDNode>(StoreNodes[i].MemNode);
  7794. LoadSDNode *Ld = dyn_cast<LoadSDNode>(St->getValue());
  7795. if (!Ld) break;
  7796. // Loads must only have one use.
  7797. if (!Ld->hasNUsesOfValue(1, 0))
  7798. break;
  7799. // Check that the alignment is the same as the stores.
  7800. if (Ld->getAlignment() != St->getAlignment())
  7801. break;
  7802. // The memory operands must not be volatile.
  7803. if (Ld->isVolatile() || Ld->isIndexed())
  7804. break;
  7805. // We do not accept ext loads.
  7806. if (Ld->getExtensionType() != ISD::NON_EXTLOAD)
  7807. break;
  7808. // The stored memory type must be the same.
  7809. if (Ld->getMemoryVT() != MemVT)
  7810. break;
  7811. BaseIndexOffset LdPtr = BaseIndexOffset::match(Ld->getBasePtr());
  7812. // If this is not the first ptr that we check.
  7813. if (LdBasePtr.Base.getNode()) {
  7814. // The base ptr must be the same.
  7815. if (!LdPtr.equalBaseIndex(LdBasePtr))
  7816. break;
  7817. } else {
  7818. // Check that all other base pointers are the same as this one.
  7819. LdBasePtr = LdPtr;
  7820. }
  7821. // We found a potential memory operand to merge.
  7822. LoadNodes.push_back(MemOpLink(Ld, LdPtr.Offset, 0));
  7823. }
  7824. if (LoadNodes.size() < 2)
  7825. return false;
  7826. // Scan the memory operations on the chain and find the first non-consecutive
  7827. // load memory address. These variables hold the index in the store node
  7828. // array.
  7829. unsigned LastConsecutiveLoad = 0;
  7830. // This variable refers to the size and not index in the array.
  7831. unsigned LastLegalVectorType = 0;
  7832. unsigned LastLegalIntegerType = 0;
  7833. StartAddress = LoadNodes[0].OffsetFromBase;
  7834. SDValue FirstChain = LoadNodes[0].MemNode->getChain();
  7835. for (unsigned i = 1; i < LoadNodes.size(); ++i) {
  7836. // All loads much share the same chain.
  7837. if (LoadNodes[i].MemNode->getChain() != FirstChain)
  7838. break;
  7839. int64_t CurrAddress = LoadNodes[i].OffsetFromBase;
  7840. if (CurrAddress - StartAddress != (ElementSizeBytes * i))
  7841. break;
  7842. LastConsecutiveLoad = i;
  7843. // Find a legal type for the vector store.
  7844. EVT StoreTy = EVT::getVectorVT(*DAG.getContext(), MemVT, i+1);
  7845. if (TLI.isTypeLegal(StoreTy))
  7846. LastLegalVectorType = i + 1;
  7847. // Find a legal type for the integer store.
  7848. unsigned StoreBW = (i+1) * ElementSizeBytes * 8;
  7849. StoreTy = EVT::getIntegerVT(*DAG.getContext(), StoreBW);
  7850. if (TLI.isTypeLegal(StoreTy))
  7851. LastLegalIntegerType = i + 1;
  7852. // Or check whether a truncstore and extload is legal.
  7853. else if (TLI.getTypeAction(*DAG.getContext(), StoreTy) ==
  7854. TargetLowering::TypePromoteInteger) {
  7855. EVT LegalizedStoredValueTy =
  7856. TLI.getTypeToTransformTo(*DAG.getContext(), StoreTy);
  7857. if (TLI.isTruncStoreLegal(LegalizedStoredValueTy, StoreTy) &&
  7858. TLI.isLoadExtLegal(ISD::ZEXTLOAD, StoreTy) &&
  7859. TLI.isLoadExtLegal(ISD::SEXTLOAD, StoreTy) &&
  7860. TLI.isLoadExtLegal(ISD::EXTLOAD, StoreTy))
  7861. LastLegalIntegerType = i+1;
  7862. }
  7863. }
  7864. // Only use vector types if the vector type is larger than the integer type.
  7865. // If they are the same, use integers.
  7866. bool UseVectorTy = LastLegalVectorType > LastLegalIntegerType && !NoVectors;
  7867. unsigned LastLegalType = std::max(LastLegalVectorType, LastLegalIntegerType);
  7868. // We add +1 here because the LastXXX variables refer to location while
  7869. // the NumElem refers to array/index size.
  7870. unsigned NumElem = std::min(LastConsecutiveStore, LastConsecutiveLoad) + 1;
  7871. NumElem = std::min(LastLegalType, NumElem);
  7872. if (NumElem < 2)
  7873. return false;
  7874. // The earliest Node in the DAG.
  7875. unsigned EarliestNodeUsed = 0;
  7876. LSBaseSDNode *EarliestOp = StoreNodes[EarliestNodeUsed].MemNode;
  7877. for (unsigned i=1; i<NumElem; ++i) {
  7878. // Find a chain for the new wide-store operand. Notice that some
  7879. // of the store nodes that we found may not be selected for inclusion
  7880. // in the wide store. The chain we use needs to be the chain of the
  7881. // earliest store node which is *used* and replaced by the wide store.
  7882. if (StoreNodes[i].SequenceNum > StoreNodes[EarliestNodeUsed].SequenceNum)
  7883. EarliestNodeUsed = i;
  7884. }
  7885. // Find if it is better to use vectors or integers to load and store
  7886. // to memory.
  7887. EVT JointMemOpVT;
  7888. if (UseVectorTy) {
  7889. JointMemOpVT = EVT::getVectorVT(*DAG.getContext(), MemVT, NumElem);
  7890. } else {
  7891. unsigned StoreBW = NumElem * ElementSizeBytes * 8;
  7892. JointMemOpVT = EVT::getIntegerVT(*DAG.getContext(), StoreBW);
  7893. }
  7894. SDLoc LoadDL(LoadNodes[0].MemNode);
  7895. SDLoc StoreDL(StoreNodes[0].MemNode);
  7896. LoadSDNode *FirstLoad = cast<LoadSDNode>(LoadNodes[0].MemNode);
  7897. SDValue NewLoad = DAG.getLoad(JointMemOpVT, LoadDL,
  7898. FirstLoad->getChain(),
  7899. FirstLoad->getBasePtr(),
  7900. FirstLoad->getPointerInfo(),
  7901. false, false, false,
  7902. FirstLoad->getAlignment());
  7903. SDValue NewStore = DAG.getStore(EarliestOp->getChain(), StoreDL, NewLoad,
  7904. FirstInChain->getBasePtr(),
  7905. FirstInChain->getPointerInfo(), false, false,
  7906. FirstInChain->getAlignment());
  7907. // Replace one of the loads with the new load.
  7908. LoadSDNode *Ld = cast<LoadSDNode>(LoadNodes[0].MemNode);
  7909. DAG.ReplaceAllUsesOfValueWith(SDValue(Ld, 1),
  7910. SDValue(NewLoad.getNode(), 1));
  7911. // Remove the rest of the load chains.
  7912. for (unsigned i = 1; i < NumElem ; ++i) {
  7913. // Replace all chain users of the old load nodes with the chain of the new
  7914. // load node.
  7915. LoadSDNode *Ld = cast<LoadSDNode>(LoadNodes[i].MemNode);
  7916. DAG.ReplaceAllUsesOfValueWith(SDValue(Ld, 1), Ld->getChain());
  7917. }
  7918. // Replace the first store with the new store.
  7919. CombineTo(EarliestOp, NewStore);
  7920. // Erase all other stores.
  7921. for (unsigned i = 0; i < NumElem ; ++i) {
  7922. // Remove all Store nodes.
  7923. if (StoreNodes[i].MemNode == EarliestOp)
  7924. continue;
  7925. StoreSDNode *St = cast<StoreSDNode>(StoreNodes[i].MemNode);
  7926. DAG.ReplaceAllUsesOfValueWith(SDValue(St, 0), St->getChain());
  7927. removeFromWorkList(St);
  7928. DAG.DeleteNode(St);
  7929. }
  7930. return true;
  7931. }
  7932. SDValue DAGCombiner::visitSTORE(SDNode *N) {
  7933. StoreSDNode *ST = cast<StoreSDNode>(N);
  7934. SDValue Chain = ST->getChain();
  7935. SDValue Value = ST->getValue();
  7936. SDValue Ptr = ST->getBasePtr();
  7937. // If this is a store of a bit convert, store the input value if the
  7938. // resultant store does not need a higher alignment than the original.
  7939. if (Value.getOpcode() == ISD::BITCAST && !ST->isTruncatingStore() &&
  7940. ST->isUnindexed()) {
  7941. unsigned OrigAlign = ST->getAlignment();
  7942. EVT SVT = Value.getOperand(0).getValueType();
  7943. unsigned Align = TLI.getDataLayout()->
  7944. getABITypeAlignment(SVT.getTypeForEVT(*DAG.getContext()));
  7945. if (Align <= OrigAlign &&
  7946. ((!LegalOperations && !ST->isVolatile()) ||
  7947. TLI.isOperationLegalOrCustom(ISD::STORE, SVT)))
  7948. return DAG.getStore(Chain, SDLoc(N), Value.getOperand(0),
  7949. Ptr, ST->getPointerInfo(), ST->isVolatile(),
  7950. ST->isNonTemporal(), OrigAlign,
  7951. ST->getTBAAInfo());
  7952. }
  7953. // Turn 'store undef, Ptr' -> nothing.
  7954. if (Value.getOpcode() == ISD::UNDEF && ST->isUnindexed())
  7955. return Chain;
  7956. // Turn 'store float 1.0, Ptr' -> 'store int 0x12345678, Ptr'
  7957. if (ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(Value)) {
  7958. // NOTE: If the original store is volatile, this transform must not increase
  7959. // the number of stores. For example, on x86-32 an f64 can be stored in one
  7960. // processor operation but an i64 (which is not legal) requires two. So the
  7961. // transform should not be done in this case.
  7962. if (Value.getOpcode() != ISD::TargetConstantFP) {
  7963. SDValue Tmp;
  7964. switch (CFP->getSimpleValueType(0).SimpleTy) {
  7965. default: llvm_unreachable("Unknown FP type");
  7966. case MVT::f16: // We don't do this for these yet.
  7967. case MVT::f80:
  7968. case MVT::f128:
  7969. case MVT::ppcf128:
  7970. break;
  7971. case MVT::f32:
  7972. if ((isTypeLegal(MVT::i32) && !LegalOperations && !ST->isVolatile()) ||
  7973. TLI.isOperationLegalOrCustom(ISD::STORE, MVT::i32)) {
  7974. Tmp = DAG.getConstant((uint32_t)CFP->getValueAPF().
  7975. bitcastToAPInt().getZExtValue(), MVT::i32);
  7976. return DAG.getStore(Chain, SDLoc(N), Tmp,
  7977. Ptr, ST->getMemOperand());
  7978. }
  7979. break;
  7980. case MVT::f64:
  7981. if ((TLI.isTypeLegal(MVT::i64) && !LegalOperations &&
  7982. !ST->isVolatile()) ||
  7983. TLI.isOperationLegalOrCustom(ISD::STORE, MVT::i64)) {
  7984. Tmp = DAG.getConstant(CFP->getValueAPF().bitcastToAPInt().
  7985. getZExtValue(), MVT::i64);
  7986. return DAG.getStore(Chain, SDLoc(N), Tmp,
  7987. Ptr, ST->getMemOperand());
  7988. }
  7989. if (!ST->isVolatile() &&
  7990. TLI.isOperationLegalOrCustom(ISD::STORE, MVT::i32)) {
  7991. // Many FP stores are not made apparent until after legalize, e.g. for
  7992. // argument passing. Since this is so common, custom legalize the
  7993. // 64-bit integer store into two 32-bit stores.
  7994. uint64_t Val = CFP->getValueAPF().bitcastToAPInt().getZExtValue();
  7995. SDValue Lo = DAG.getConstant(Val & 0xFFFFFFFF, MVT::i32);
  7996. SDValue Hi = DAG.getConstant(Val >> 32, MVT::i32);
  7997. if (TLI.isBigEndian()) std::swap(Lo, Hi);
  7998. unsigned Alignment = ST->getAlignment();
  7999. bool isVolatile = ST->isVolatile();
  8000. bool isNonTemporal = ST->isNonTemporal();
  8001. const MDNode *TBAAInfo = ST->getTBAAInfo();
  8002. SDValue St0 = DAG.getStore(Chain, SDLoc(ST), Lo,
  8003. Ptr, ST->getPointerInfo(),
  8004. isVolatile, isNonTemporal,
  8005. ST->getAlignment(), TBAAInfo);
  8006. Ptr = DAG.getNode(ISD::ADD, SDLoc(N), Ptr.getValueType(), Ptr,
  8007. DAG.getConstant(4, Ptr.getValueType()));
  8008. Alignment = MinAlign(Alignment, 4U);
  8009. SDValue St1 = DAG.getStore(Chain, SDLoc(ST), Hi,
  8010. Ptr, ST->getPointerInfo().getWithOffset(4),
  8011. isVolatile, isNonTemporal,
  8012. Alignment, TBAAInfo);
  8013. return DAG.getNode(ISD::TokenFactor, SDLoc(N), MVT::Other,
  8014. St0, St1);
  8015. }
  8016. break;
  8017. }
  8018. }
  8019. }
  8020. // Try to infer better alignment information than the store already has.
  8021. if (OptLevel != CodeGenOpt::None && ST->isUnindexed()) {
  8022. if (unsigned Align = DAG.InferPtrAlignment(Ptr)) {
  8023. if (Align > ST->getAlignment())
  8024. return DAG.getTruncStore(Chain, SDLoc(N), Value,
  8025. Ptr, ST->getPointerInfo(), ST->getMemoryVT(),
  8026. ST->isVolatile(), ST->isNonTemporal(), Align,
  8027. ST->getTBAAInfo());
  8028. }
  8029. }
  8030. // Try transforming a pair floating point load / store ops to integer
  8031. // load / store ops.
  8032. SDValue NewST = TransformFPLoadStorePair(N);
  8033. if (NewST.getNode())
  8034. return NewST;
  8035. bool UseAA = CombinerAA.getNumOccurrences() > 0 ? CombinerAA :
  8036. TLI.getTargetMachine().getSubtarget<TargetSubtargetInfo>().useAA();
  8037. if (UseAA) {
  8038. // Walk up chain skipping non-aliasing memory nodes.
  8039. SDValue BetterChain = FindBetterChain(N, Chain);
  8040. // If there is a better chain.
  8041. if (Chain != BetterChain) {
  8042. SDValue ReplStore;
  8043. // Replace the chain to avoid dependency.
  8044. if (ST->isTruncatingStore()) {
  8045. ReplStore = DAG.getTruncStore(BetterChain, SDLoc(N), Value, Ptr,
  8046. ST->getMemoryVT(), ST->getMemOperand());
  8047. } else {
  8048. ReplStore = DAG.getStore(BetterChain, SDLoc(N), Value, Ptr,
  8049. ST->getMemOperand());
  8050. }
  8051. // Create token to keep both nodes around.
  8052. SDValue Token = DAG.getNode(ISD::TokenFactor, SDLoc(N),
  8053. MVT::Other, Chain, ReplStore);
  8054. // Make sure the new and old chains are cleaned up.
  8055. AddToWorkList(Token.getNode());
  8056. // Don't add users to work list.
  8057. return CombineTo(N, Token, false);
  8058. }
  8059. }
  8060. // Try transforming N to an indexed store.
  8061. if (CombineToPreIndexedLoadStore(N) || CombineToPostIndexedLoadStore(N))
  8062. return SDValue(N, 0);
  8063. // FIXME: is there such a thing as a truncating indexed store?
  8064. if (ST->isTruncatingStore() && ST->isUnindexed() &&
  8065. Value.getValueType().isInteger()) {
  8066. // See if we can simplify the input to this truncstore with knowledge that
  8067. // only the low bits are being used. For example:
  8068. // "truncstore (or (shl x, 8), y), i8" -> "truncstore y, i8"
  8069. SDValue Shorter =
  8070. GetDemandedBits(Value,
  8071. APInt::getLowBitsSet(
  8072. Value.getValueType().getScalarType().getSizeInBits(),
  8073. ST->getMemoryVT().getScalarType().getSizeInBits()));
  8074. AddToWorkList(Value.getNode());
  8075. if (Shorter.getNode())
  8076. return DAG.getTruncStore(Chain, SDLoc(N), Shorter,
  8077. Ptr, ST->getMemoryVT(), ST->getMemOperand());
  8078. // Otherwise, see if we can simplify the operation with
  8079. // SimplifyDemandedBits, which only works if the value has a single use.
  8080. if (SimplifyDemandedBits(Value,
  8081. APInt::getLowBitsSet(
  8082. Value.getValueType().getScalarType().getSizeInBits(),
  8083. ST->getMemoryVT().getScalarType().getSizeInBits())))
  8084. return SDValue(N, 0);
  8085. }
  8086. // If this is a load followed by a store to the same location, then the store
  8087. // is dead/noop.
  8088. if (LoadSDNode *Ld = dyn_cast<LoadSDNode>(Value)) {
  8089. if (Ld->getBasePtr() == Ptr && ST->getMemoryVT() == Ld->getMemoryVT() &&
  8090. ST->isUnindexed() && !ST->isVolatile() &&
  8091. // There can't be any side effects between the load and store, such as
  8092. // a call or store.
  8093. Chain.reachesChainWithoutSideEffects(SDValue(Ld, 1))) {
  8094. // The store is dead, remove it.
  8095. return Chain;
  8096. }
  8097. }
  8098. // If this is an FP_ROUND or TRUNC followed by a store, fold this into a
  8099. // truncating store. We can do this even if this is already a truncstore.
  8100. if ((Value.getOpcode() == ISD::FP_ROUND || Value.getOpcode() == ISD::TRUNCATE)
  8101. && Value.getNode()->hasOneUse() && ST->isUnindexed() &&
  8102. TLI.isTruncStoreLegal(Value.getOperand(0).getValueType(),
  8103. ST->getMemoryVT())) {
  8104. return DAG.getTruncStore(Chain, SDLoc(N), Value.getOperand(0),
  8105. Ptr, ST->getMemoryVT(), ST->getMemOperand());
  8106. }
  8107. // Only perform this optimization before the types are legal, because we
  8108. // don't want to perform this optimization on every DAGCombine invocation.
  8109. if (!LegalTypes) {
  8110. bool EverChanged = false;
  8111. do {
  8112. // There can be multiple store sequences on the same chain.
  8113. // Keep trying to merge store sequences until we are unable to do so
  8114. // or until we merge the last store on the chain.
  8115. bool Changed = MergeConsecutiveStores(ST);
  8116. EverChanged |= Changed;
  8117. if (!Changed) break;
  8118. } while (ST->getOpcode() != ISD::DELETED_NODE);
  8119. if (EverChanged)
  8120. return SDValue(N, 0);
  8121. }
  8122. return ReduceLoadOpStoreWidth(N);
  8123. }
  8124. SDValue DAGCombiner::visitINSERT_VECTOR_ELT(SDNode *N) {
  8125. SDValue InVec = N->getOperand(0);
  8126. SDValue InVal = N->getOperand(1);
  8127. SDValue EltNo = N->getOperand(2);
  8128. SDLoc dl(N);
  8129. // If the inserted element is an UNDEF, just use the input vector.
  8130. if (InVal.getOpcode() == ISD::UNDEF)
  8131. return InVec;
  8132. EVT VT = InVec.getValueType();
  8133. // If we can't generate a legal BUILD_VECTOR, exit
  8134. if (LegalOperations && !TLI.isOperationLegal(ISD::BUILD_VECTOR, VT))
  8135. return SDValue();
  8136. // Check that we know which element is being inserted
  8137. if (!isa<ConstantSDNode>(EltNo))
  8138. return SDValue();
  8139. unsigned Elt = cast<ConstantSDNode>(EltNo)->getZExtValue();
  8140. // Check that the operand is a BUILD_VECTOR (or UNDEF, which can essentially
  8141. // be converted to a BUILD_VECTOR). Fill in the Ops vector with the
  8142. // vector elements.
  8143. SmallVector<SDValue, 8> Ops;
  8144. // Do not combine these two vectors if the output vector will not replace
  8145. // the input vector.
  8146. if (InVec.getOpcode() == ISD::BUILD_VECTOR && InVec.hasOneUse()) {
  8147. Ops.append(InVec.getNode()->op_begin(),
  8148. InVec.getNode()->op_end());
  8149. } else if (InVec.getOpcode() == ISD::UNDEF) {
  8150. unsigned NElts = VT.getVectorNumElements();
  8151. Ops.append(NElts, DAG.getUNDEF(InVal.getValueType()));
  8152. } else {
  8153. return SDValue();
  8154. }
  8155. // Insert the element
  8156. if (Elt < Ops.size()) {
  8157. // All the operands of BUILD_VECTOR must have the same type;
  8158. // we enforce that here.
  8159. EVT OpVT = Ops[0].getValueType();
  8160. if (InVal.getValueType() != OpVT)
  8161. InVal = OpVT.bitsGT(InVal.getValueType()) ?
  8162. DAG.getNode(ISD::ANY_EXTEND, dl, OpVT, InVal) :
  8163. DAG.getNode(ISD::TRUNCATE, dl, OpVT, InVal);
  8164. Ops[Elt] = InVal;
  8165. }
  8166. // Return the new vector
  8167. return DAG.getNode(ISD::BUILD_VECTOR, dl,
  8168. VT, &Ops[0], Ops.size());
  8169. }
  8170. SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) {
  8171. // (vextract (scalar_to_vector val, 0) -> val
  8172. SDValue InVec = N->getOperand(0);
  8173. EVT VT = InVec.getValueType();
  8174. EVT NVT = N->getValueType(0);
  8175. if (InVec.getOpcode() == ISD::SCALAR_TO_VECTOR) {
  8176. // Check if the result type doesn't match the inserted element type. A
  8177. // SCALAR_TO_VECTOR may truncate the inserted element and the
  8178. // EXTRACT_VECTOR_ELT may widen the extracted vector.
  8179. SDValue InOp = InVec.getOperand(0);
  8180. if (InOp.getValueType() != NVT) {
  8181. assert(InOp.getValueType().isInteger() && NVT.isInteger());
  8182. return DAG.getSExtOrTrunc(InOp, SDLoc(InVec), NVT);
  8183. }
  8184. return InOp;
  8185. }
  8186. SDValue EltNo = N->getOperand(1);
  8187. bool ConstEltNo = isa<ConstantSDNode>(EltNo);
  8188. // Transform: (EXTRACT_VECTOR_ELT( VECTOR_SHUFFLE )) -> EXTRACT_VECTOR_ELT.
  8189. // We only perform this optimization before the op legalization phase because
  8190. // we may introduce new vector instructions which are not backed by TD
  8191. // patterns. For example on AVX, extracting elements from a wide vector
  8192. // without using extract_subvector.
  8193. if (InVec.getOpcode() == ISD::VECTOR_SHUFFLE
  8194. && ConstEltNo && !LegalOperations) {
  8195. int Elt = cast<ConstantSDNode>(EltNo)->getZExtValue();
  8196. int NumElem = VT.getVectorNumElements();
  8197. ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(InVec);
  8198. // Find the new index to extract from.
  8199. int OrigElt = SVOp->getMaskElt(Elt);
  8200. // Extracting an undef index is undef.
  8201. if (OrigElt == -1)
  8202. return DAG.getUNDEF(NVT);
  8203. // Select the right vector half to extract from.
  8204. if (OrigElt < NumElem) {
  8205. InVec = InVec->getOperand(0);
  8206. } else {
  8207. InVec = InVec->getOperand(1);
  8208. OrigElt -= NumElem;
  8209. }
  8210. EVT IndexTy = TLI.getVectorIdxTy();
  8211. return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SDLoc(N), NVT,
  8212. InVec, DAG.getConstant(OrigElt, IndexTy));
  8213. }
  8214. // Perform only after legalization to ensure build_vector / vector_shuffle
  8215. // optimizations have already been done.
  8216. if (!LegalOperations) return SDValue();
  8217. // (vextract (v4f32 load $addr), c) -> (f32 load $addr+c*size)
  8218. // (vextract (v4f32 s2v (f32 load $addr)), c) -> (f32 load $addr+c*size)
  8219. // (vextract (v4f32 shuffle (load $addr), <1,u,u,u>), 0) -> (f32 load $addr)
  8220. if (ConstEltNo) {
  8221. int Elt = cast<ConstantSDNode>(EltNo)->getZExtValue();
  8222. bool NewLoad = false;
  8223. bool BCNumEltsChanged = false;
  8224. EVT ExtVT = VT.getVectorElementType();
  8225. EVT LVT = ExtVT;
  8226. // If the result of load has to be truncated, then it's not necessarily
  8227. // profitable.
  8228. if (NVT.bitsLT(LVT) && !TLI.isTruncateFree(LVT, NVT))
  8229. return SDValue();
  8230. if (InVec.getOpcode() == ISD::BITCAST) {
  8231. // Don't duplicate a load with other uses.
  8232. if (!InVec.hasOneUse())
  8233. return SDValue();
  8234. EVT BCVT = InVec.getOperand(0).getValueType();
  8235. if (!BCVT.isVector() || ExtVT.bitsGT(BCVT.getVectorElementType()))
  8236. return SDValue();
  8237. if (VT.getVectorNumElements() != BCVT.getVectorNumElements())
  8238. BCNumEltsChanged = true;
  8239. InVec = InVec.getOperand(0);
  8240. ExtVT = BCVT.getVectorElementType();
  8241. NewLoad = true;
  8242. }
  8243. LoadSDNode *LN0 = NULL;
  8244. const ShuffleVectorSDNode *SVN = NULL;
  8245. if (ISD::isNormalLoad(InVec.getNode())) {
  8246. LN0 = cast<LoadSDNode>(InVec);
  8247. } else if (InVec.getOpcode() == ISD::SCALAR_TO_VECTOR &&
  8248. InVec.getOperand(0).getValueType() == ExtVT &&
  8249. ISD::isNormalLoad(InVec.getOperand(0).getNode())) {
  8250. // Don't duplicate a load with other uses.
  8251. if (!InVec.hasOneUse())
  8252. return SDValue();
  8253. LN0 = cast<LoadSDNode>(InVec.getOperand(0));
  8254. } else if ((SVN = dyn_cast<ShuffleVectorSDNode>(InVec))) {
  8255. // (vextract (vector_shuffle (load $addr), v2, <1, u, u, u>), 1)
  8256. // =>
  8257. // (load $addr+1*size)
  8258. // Don't duplicate a load with other uses.
  8259. if (!InVec.hasOneUse())
  8260. return SDValue();
  8261. // If the bit convert changed the number of elements, it is unsafe
  8262. // to examine the mask.
  8263. if (BCNumEltsChanged)
  8264. return SDValue();
  8265. // Select the input vector, guarding against out of range extract vector.
  8266. unsigned NumElems = VT.getVectorNumElements();
  8267. int Idx = (Elt > (int)NumElems) ? -1 : SVN->getMaskElt(Elt);
  8268. InVec = (Idx < (int)NumElems) ? InVec.getOperand(0) : InVec.getOperand(1);
  8269. if (InVec.getOpcode() == ISD::BITCAST) {
  8270. // Don't duplicate a load with other uses.
  8271. if (!InVec.hasOneUse())
  8272. return SDValue();
  8273. InVec = InVec.getOperand(0);
  8274. }
  8275. if (ISD::isNormalLoad(InVec.getNode())) {
  8276. LN0 = cast<LoadSDNode>(InVec);
  8277. Elt = (Idx < (int)NumElems) ? Idx : Idx - (int)NumElems;
  8278. }
  8279. }
  8280. // Make sure we found a non-volatile load and the extractelement is
  8281. // the only use.
  8282. if (!LN0 || !LN0->hasNUsesOfValue(1,0) || LN0->isVolatile())
  8283. return SDValue();
  8284. // If Idx was -1 above, Elt is going to be -1, so just return undef.
  8285. if (Elt == -1)
  8286. return DAG.getUNDEF(LVT);
  8287. unsigned Align = LN0->getAlignment();
  8288. if (NewLoad) {
  8289. // Check the resultant load doesn't need a higher alignment than the
  8290. // original load.
  8291. unsigned NewAlign =
  8292. TLI.getDataLayout()
  8293. ->getABITypeAlignment(LVT.getTypeForEVT(*DAG.getContext()));
  8294. if (NewAlign > Align || !TLI.isOperationLegalOrCustom(ISD::LOAD, LVT))
  8295. return SDValue();
  8296. Align = NewAlign;
  8297. }
  8298. SDValue NewPtr = LN0->getBasePtr();
  8299. unsigned PtrOff = 0;
  8300. if (Elt) {
  8301. PtrOff = LVT.getSizeInBits() * Elt / 8;
  8302. EVT PtrType = NewPtr.getValueType();
  8303. if (TLI.isBigEndian())
  8304. PtrOff = VT.getSizeInBits() / 8 - PtrOff;
  8305. NewPtr = DAG.getNode(ISD::ADD, SDLoc(N), PtrType, NewPtr,
  8306. DAG.getConstant(PtrOff, PtrType));
  8307. }
  8308. // The replacement we need to do here is a little tricky: we need to
  8309. // replace an extractelement of a load with a load.
  8310. // Use ReplaceAllUsesOfValuesWith to do the replacement.
  8311. // Note that this replacement assumes that the extractvalue is the only
  8312. // use of the load; that's okay because we don't want to perform this
  8313. // transformation in other cases anyway.
  8314. SDValue Load;
  8315. SDValue Chain;
  8316. if (NVT.bitsGT(LVT)) {
  8317. // If the result type of vextract is wider than the load, then issue an
  8318. // extending load instead.
  8319. ISD::LoadExtType ExtType = TLI.isLoadExtLegal(ISD::ZEXTLOAD, LVT)
  8320. ? ISD::ZEXTLOAD : ISD::EXTLOAD;
  8321. Load = DAG.getExtLoad(ExtType, SDLoc(N), NVT, LN0->getChain(),
  8322. NewPtr, LN0->getPointerInfo().getWithOffset(PtrOff),
  8323. LVT, LN0->isVolatile(), LN0->isNonTemporal(),
  8324. Align, LN0->getTBAAInfo());
  8325. Chain = Load.getValue(1);
  8326. } else {
  8327. Load = DAG.getLoad(LVT, SDLoc(N), LN0->getChain(), NewPtr,
  8328. LN0->getPointerInfo().getWithOffset(PtrOff),
  8329. LN0->isVolatile(), LN0->isNonTemporal(),
  8330. LN0->isInvariant(), Align, LN0->getTBAAInfo());
  8331. Chain = Load.getValue(1);
  8332. if (NVT.bitsLT(LVT))
  8333. Load = DAG.getNode(ISD::TRUNCATE, SDLoc(N), NVT, Load);
  8334. else
  8335. Load = DAG.getNode(ISD::BITCAST, SDLoc(N), NVT, Load);
  8336. }
  8337. WorkListRemover DeadNodes(*this);
  8338. SDValue From[] = { SDValue(N, 0), SDValue(LN0,1) };
  8339. SDValue To[] = { Load, Chain };
  8340. DAG.ReplaceAllUsesOfValuesWith(From, To, 2);
  8341. // Since we're explcitly calling ReplaceAllUses, add the new node to the
  8342. // worklist explicitly as well.
  8343. AddToWorkList(Load.getNode());
  8344. AddUsersToWorkList(Load.getNode()); // Add users too
  8345. // Make sure to revisit this node to clean it up; it will usually be dead.
  8346. AddToWorkList(N);
  8347. return SDValue(N, 0);
  8348. }
  8349. return SDValue();
  8350. }
  8351. // Simplify (build_vec (ext )) to (bitcast (build_vec ))
  8352. SDValue DAGCombiner::reduceBuildVecExtToExtBuildVec(SDNode *N) {
  8353. // We perform this optimization post type-legalization because
  8354. // the type-legalizer often scalarizes integer-promoted vectors.
  8355. // Performing this optimization before may create bit-casts which
  8356. // will be type-legalized to complex code sequences.
  8357. // We perform this optimization only before the operation legalizer because we
  8358. // may introduce illegal operations.
  8359. if (Level != AfterLegalizeVectorOps && Level != AfterLegalizeTypes)
  8360. return SDValue();
  8361. unsigned NumInScalars = N->getNumOperands();
  8362. SDLoc dl(N);
  8363. EVT VT = N->getValueType(0);
  8364. // Check to see if this is a BUILD_VECTOR of a bunch of values
  8365. // which come from any_extend or zero_extend nodes. If so, we can create
  8366. // a new BUILD_VECTOR using bit-casts which may enable other BUILD_VECTOR
  8367. // optimizations. We do not handle sign-extend because we can't fill the sign
  8368. // using shuffles.
  8369. EVT SourceType = MVT::Other;
  8370. bool AllAnyExt = true;
  8371. for (unsigned i = 0; i != NumInScalars; ++i) {
  8372. SDValue In = N->getOperand(i);
  8373. // Ignore undef inputs.
  8374. if (In.getOpcode() == ISD::UNDEF) continue;
  8375. bool AnyExt = In.getOpcode() == ISD::ANY_EXTEND;
  8376. bool ZeroExt = In.getOpcode() == ISD::ZERO_EXTEND;
  8377. // Abort if the element is not an extension.
  8378. if (!ZeroExt && !AnyExt) {
  8379. SourceType = MVT::Other;
  8380. break;
  8381. }
  8382. // The input is a ZeroExt or AnyExt. Check the original type.
  8383. EVT InTy = In.getOperand(0).getValueType();
  8384. // Check that all of the widened source types are the same.
  8385. if (SourceType == MVT::Other)
  8386. // First time.
  8387. SourceType = InTy;
  8388. else if (InTy != SourceType) {
  8389. // Multiple income types. Abort.
  8390. SourceType = MVT::Other;
  8391. break;
  8392. }
  8393. // Check if all of the extends are ANY_EXTENDs.
  8394. AllAnyExt &= AnyExt;
  8395. }
  8396. // In order to have valid types, all of the inputs must be extended from the
  8397. // same source type and all of the inputs must be any or zero extend.
  8398. // Scalar sizes must be a power of two.
  8399. EVT OutScalarTy = VT.getScalarType();
  8400. bool ValidTypes = SourceType != MVT::Other &&
  8401. isPowerOf2_32(OutScalarTy.getSizeInBits()) &&
  8402. isPowerOf2_32(SourceType.getSizeInBits());
  8403. // Create a new simpler BUILD_VECTOR sequence which other optimizations can
  8404. // turn into a single shuffle instruction.
  8405. if (!ValidTypes)
  8406. return SDValue();
  8407. bool isLE = TLI.isLittleEndian();
  8408. unsigned ElemRatio = OutScalarTy.getSizeInBits()/SourceType.getSizeInBits();
  8409. assert(ElemRatio > 1 && "Invalid element size ratio");
  8410. SDValue Filler = AllAnyExt ? DAG.getUNDEF(SourceType):
  8411. DAG.getConstant(0, SourceType);
  8412. unsigned NewBVElems = ElemRatio * VT.getVectorNumElements();
  8413. SmallVector<SDValue, 8> Ops(NewBVElems, Filler);
  8414. // Populate the new build_vector
  8415. for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
  8416. SDValue Cast = N->getOperand(i);
  8417. assert((Cast.getOpcode() == ISD::ANY_EXTEND ||
  8418. Cast.getOpcode() == ISD::ZERO_EXTEND ||
  8419. Cast.getOpcode() == ISD::UNDEF) && "Invalid cast opcode");
  8420. SDValue In;
  8421. if (Cast.getOpcode() == ISD::UNDEF)
  8422. In = DAG.getUNDEF(SourceType);
  8423. else
  8424. In = Cast->getOperand(0);
  8425. unsigned Index = isLE ? (i * ElemRatio) :
  8426. (i * ElemRatio + (ElemRatio - 1));
  8427. assert(Index < Ops.size() && "Invalid index");
  8428. Ops[Index] = In;
  8429. }
  8430. // The type of the new BUILD_VECTOR node.
  8431. EVT VecVT = EVT::getVectorVT(*DAG.getContext(), SourceType, NewBVElems);
  8432. assert(VecVT.getSizeInBits() == VT.getSizeInBits() &&
  8433. "Invalid vector size");
  8434. // Check if the new vector type is legal.
  8435. if (!isTypeLegal(VecVT)) return SDValue();
  8436. // Make the new BUILD_VECTOR.
  8437. SDValue BV = DAG.getNode(ISD::BUILD_VECTOR, dl, VecVT, &Ops[0], Ops.size());
  8438. // The new BUILD_VECTOR node has the potential to be further optimized.
  8439. AddToWorkList(BV.getNode());
  8440. // Bitcast to the desired type.
  8441. return DAG.getNode(ISD::BITCAST, dl, VT, BV);
  8442. }
  8443. SDValue DAGCombiner::reduceBuildVecConvertToConvertBuildVec(SDNode *N) {
  8444. EVT VT = N->getValueType(0);
  8445. unsigned NumInScalars = N->getNumOperands();
  8446. SDLoc dl(N);
  8447. EVT SrcVT = MVT::Other;
  8448. unsigned Opcode = ISD::DELETED_NODE;
  8449. unsigned NumDefs = 0;
  8450. for (unsigned i = 0; i != NumInScalars; ++i) {
  8451. SDValue In = N->getOperand(i);
  8452. unsigned Opc = In.getOpcode();
  8453. if (Opc == ISD::UNDEF)
  8454. continue;
  8455. // If all scalar values are floats and converted from integers.
  8456. if (Opcode == ISD::DELETED_NODE &&
  8457. (Opc == ISD::UINT_TO_FP || Opc == ISD::SINT_TO_FP)) {
  8458. Opcode = Opc;
  8459. }
  8460. if (Opc != Opcode)
  8461. return SDValue();
  8462. EVT InVT = In.getOperand(0).getValueType();
  8463. // If all scalar values are typed differently, bail out. It's chosen to
  8464. // simplify BUILD_VECTOR of integer types.
  8465. if (SrcVT == MVT::Other)
  8466. SrcVT = InVT;
  8467. if (SrcVT != InVT)
  8468. return SDValue();
  8469. NumDefs++;
  8470. }
  8471. // If the vector has just one element defined, it's not worth to fold it into
  8472. // a vectorized one.
  8473. if (NumDefs < 2)
  8474. return SDValue();
  8475. assert((Opcode == ISD::UINT_TO_FP || Opcode == ISD::SINT_TO_FP)
  8476. && "Should only handle conversion from integer to float.");
  8477. assert(SrcVT != MVT::Other && "Cannot determine source type!");
  8478. EVT NVT = EVT::getVectorVT(*DAG.getContext(), SrcVT, NumInScalars);
  8479. if (!TLI.isOperationLegalOrCustom(Opcode, NVT))
  8480. return SDValue();
  8481. SmallVector<SDValue, 8> Opnds;
  8482. for (unsigned i = 0; i != NumInScalars; ++i) {
  8483. SDValue In = N->getOperand(i);
  8484. if (In.getOpcode() == ISD::UNDEF)
  8485. Opnds.push_back(DAG.getUNDEF(SrcVT));
  8486. else
  8487. Opnds.push_back(In.getOperand(0));
  8488. }
  8489. SDValue BV = DAG.getNode(ISD::BUILD_VECTOR, dl, NVT,
  8490. &Opnds[0], Opnds.size());
  8491. AddToWorkList(BV.getNode());
  8492. return DAG.getNode(Opcode, dl, VT, BV);
  8493. }
  8494. SDValue DAGCombiner::visitBUILD_VECTOR(SDNode *N) {
  8495. unsigned NumInScalars = N->getNumOperands();
  8496. SDLoc dl(N);
  8497. EVT VT = N->getValueType(0);
  8498. // A vector built entirely of undefs is undef.
  8499. if (ISD::allOperandsUndef(N))
  8500. return DAG.getUNDEF(VT);
  8501. SDValue V = reduceBuildVecExtToExtBuildVec(N);
  8502. if (V.getNode())
  8503. return V;
  8504. V = reduceBuildVecConvertToConvertBuildVec(N);
  8505. if (V.getNode())
  8506. return V;
  8507. // Check to see if this is a BUILD_VECTOR of a bunch of EXTRACT_VECTOR_ELT
  8508. // operations. If so, and if the EXTRACT_VECTOR_ELT vector inputs come from
  8509. // at most two distinct vectors, turn this into a shuffle node.
  8510. // May only combine to shuffle after legalize if shuffle is legal.
  8511. if (LegalOperations &&
  8512. !TLI.isOperationLegalOrCustom(ISD::VECTOR_SHUFFLE, VT))
  8513. return SDValue();
  8514. SDValue VecIn1, VecIn2;
  8515. for (unsigned i = 0; i != NumInScalars; ++i) {
  8516. // Ignore undef inputs.
  8517. if (N->getOperand(i).getOpcode() == ISD::UNDEF) continue;
  8518. // If this input is something other than a EXTRACT_VECTOR_ELT with a
  8519. // constant index, bail out.
  8520. if (N->getOperand(i).getOpcode() != ISD::EXTRACT_VECTOR_ELT ||
  8521. !isa<ConstantSDNode>(N->getOperand(i).getOperand(1))) {
  8522. VecIn1 = VecIn2 = SDValue(0, 0);
  8523. break;
  8524. }
  8525. // We allow up to two distinct input vectors.
  8526. SDValue ExtractedFromVec = N->getOperand(i).getOperand(0);
  8527. if (ExtractedFromVec == VecIn1 || ExtractedFromVec == VecIn2)
  8528. continue;
  8529. if (VecIn1.getNode() == 0) {
  8530. VecIn1 = ExtractedFromVec;
  8531. } else if (VecIn2.getNode() == 0) {
  8532. VecIn2 = ExtractedFromVec;
  8533. } else {
  8534. // Too many inputs.
  8535. VecIn1 = VecIn2 = SDValue(0, 0);
  8536. break;
  8537. }
  8538. }
  8539. // If everything is good, we can make a shuffle operation.
  8540. if (VecIn1.getNode()) {
  8541. SmallVector<int, 8> Mask;
  8542. for (unsigned i = 0; i != NumInScalars; ++i) {
  8543. if (N->getOperand(i).getOpcode() == ISD::UNDEF) {
  8544. Mask.push_back(-1);
  8545. continue;
  8546. }
  8547. // If extracting from the first vector, just use the index directly.
  8548. SDValue Extract = N->getOperand(i);
  8549. SDValue ExtVal = Extract.getOperand(1);
  8550. if (Extract.getOperand(0) == VecIn1) {
  8551. unsigned ExtIndex = cast<ConstantSDNode>(ExtVal)->getZExtValue();
  8552. if (ExtIndex > VT.getVectorNumElements())
  8553. return SDValue();
  8554. Mask.push_back(ExtIndex);
  8555. continue;
  8556. }
  8557. // Otherwise, use InIdx + VecSize
  8558. unsigned Idx = cast<ConstantSDNode>(ExtVal)->getZExtValue();
  8559. Mask.push_back(Idx+NumInScalars);
  8560. }
  8561. // We can't generate a shuffle node with mismatched input and output types.
  8562. // Attempt to transform a single input vector to the correct type.
  8563. if ((VT != VecIn1.getValueType())) {
  8564. // We don't support shuffeling between TWO values of different types.
  8565. if (VecIn2.getNode() != 0)
  8566. return SDValue();
  8567. // We only support widening of vectors which are half the size of the
  8568. // output registers. For example XMM->YMM widening on X86 with AVX.
  8569. if (VecIn1.getValueType().getSizeInBits()*2 != VT.getSizeInBits())
  8570. return SDValue();
  8571. // If the input vector type has a different base type to the output
  8572. // vector type, bail out.
  8573. if (VecIn1.getValueType().getVectorElementType() !=
  8574. VT.getVectorElementType())
  8575. return SDValue();
  8576. // Widen the input vector by adding undef values.
  8577. VecIn1 = DAG.getNode(ISD::CONCAT_VECTORS, dl, VT,
  8578. VecIn1, DAG.getUNDEF(VecIn1.getValueType()));
  8579. }
  8580. // If VecIn2 is unused then change it to undef.
  8581. VecIn2 = VecIn2.getNode() ? VecIn2 : DAG.getUNDEF(VT);
  8582. // Check that we were able to transform all incoming values to the same
  8583. // type.
  8584. if (VecIn2.getValueType() != VecIn1.getValueType() ||
  8585. VecIn1.getValueType() != VT)
  8586. return SDValue();
  8587. // Only type-legal BUILD_VECTOR nodes are converted to shuffle nodes.
  8588. if (!isTypeLegal(VT))
  8589. return SDValue();
  8590. // Return the new VECTOR_SHUFFLE node.
  8591. SDValue Ops[2];
  8592. Ops[0] = VecIn1;
  8593. Ops[1] = VecIn2;
  8594. return DAG.getVectorShuffle(VT, dl, Ops[0], Ops[1], &Mask[0]);
  8595. }
  8596. return SDValue();
  8597. }
  8598. SDValue DAGCombiner::visitCONCAT_VECTORS(SDNode *N) {
  8599. // TODO: Check to see if this is a CONCAT_VECTORS of a bunch of
  8600. // EXTRACT_SUBVECTOR operations. If so, and if the EXTRACT_SUBVECTOR vector
  8601. // inputs come from at most two distinct vectors, turn this into a shuffle
  8602. // node.
  8603. // If we only have one input vector, we don't need to do any concatenation.
  8604. if (N->getNumOperands() == 1)
  8605. return N->getOperand(0);
  8606. // Check if all of the operands are undefs.
  8607. EVT VT = N->getValueType(0);
  8608. if (ISD::allOperandsUndef(N))
  8609. return DAG.getUNDEF(VT);
  8610. // Optimize concat_vectors where one of the vectors is undef.
  8611. if (N->getNumOperands() == 2 &&
  8612. N->getOperand(1)->getOpcode() == ISD::UNDEF) {
  8613. SDValue In = N->getOperand(0);
  8614. assert(In.getValueType().isVector() && "Must concat vectors");
  8615. // Transform: concat_vectors(scalar, undef) -> scalar_to_vector(sclr).
  8616. if (In->getOpcode() == ISD::BITCAST &&
  8617. !In->getOperand(0)->getValueType(0).isVector()) {
  8618. SDValue Scalar = In->getOperand(0);
  8619. EVT SclTy = Scalar->getValueType(0);
  8620. if (!SclTy.isFloatingPoint() && !SclTy.isInteger())
  8621. return SDValue();
  8622. EVT NVT = EVT::getVectorVT(*DAG.getContext(), SclTy,
  8623. VT.getSizeInBits() / SclTy.getSizeInBits());
  8624. if (!TLI.isTypeLegal(NVT) || !TLI.isTypeLegal(Scalar.getValueType()))
  8625. return SDValue();
  8626. SDLoc dl = SDLoc(N);
  8627. SDValue Res = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, NVT, Scalar);
  8628. return DAG.getNode(ISD::BITCAST, dl, VT, Res);
  8629. }
  8630. }
  8631. // Type legalization of vectors and DAG canonicalization of SHUFFLE_VECTOR
  8632. // nodes often generate nop CONCAT_VECTOR nodes.
  8633. // Scan the CONCAT_VECTOR operands and look for a CONCAT operations that
  8634. // place the incoming vectors at the exact same location.
  8635. SDValue SingleSource = SDValue();
  8636. unsigned PartNumElem = N->getOperand(0).getValueType().getVectorNumElements();
  8637. for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
  8638. SDValue Op = N->getOperand(i);
  8639. if (Op.getOpcode() == ISD::UNDEF)
  8640. continue;
  8641. // Check if this is the identity extract:
  8642. if (Op.getOpcode() != ISD::EXTRACT_SUBVECTOR)
  8643. return SDValue();
  8644. // Find the single incoming vector for the extract_subvector.
  8645. if (SingleSource.getNode()) {
  8646. if (Op.getOperand(0) != SingleSource)
  8647. return SDValue();
  8648. } else {
  8649. SingleSource = Op.getOperand(0);
  8650. // Check the source type is the same as the type of the result.
  8651. // If not, this concat may extend the vector, so we can not
  8652. // optimize it away.
  8653. if (SingleSource.getValueType() != N->getValueType(0))
  8654. return SDValue();
  8655. }
  8656. unsigned IdentityIndex = i * PartNumElem;
  8657. ConstantSDNode *CS = dyn_cast<ConstantSDNode>(Op.getOperand(1));
  8658. // The extract index must be constant.
  8659. if (!CS)
  8660. return SDValue();
  8661. // Check that we are reading from the identity index.
  8662. if (CS->getZExtValue() != IdentityIndex)
  8663. return SDValue();
  8664. }
  8665. if (SingleSource.getNode())
  8666. return SingleSource;
  8667. return SDValue();
  8668. }
  8669. SDValue DAGCombiner::visitEXTRACT_SUBVECTOR(SDNode* N) {
  8670. EVT NVT = N->getValueType(0);
  8671. SDValue V = N->getOperand(0);
  8672. if (V->getOpcode() == ISD::CONCAT_VECTORS) {
  8673. // Combine:
  8674. // (extract_subvec (concat V1, V2, ...), i)
  8675. // Into:
  8676. // Vi if possible
  8677. // Only operand 0 is checked as 'concat' assumes all inputs of the same
  8678. // type.
  8679. if (V->getOperand(0).getValueType() != NVT)
  8680. return SDValue();
  8681. unsigned Idx = dyn_cast<ConstantSDNode>(N->getOperand(1))->getZExtValue();
  8682. unsigned NumElems = NVT.getVectorNumElements();
  8683. assert((Idx % NumElems) == 0 &&
  8684. "IDX in concat is not a multiple of the result vector length.");
  8685. return V->getOperand(Idx / NumElems);
  8686. }
  8687. // Skip bitcasting
  8688. if (V->getOpcode() == ISD::BITCAST)
  8689. V = V.getOperand(0);
  8690. if (V->getOpcode() == ISD::INSERT_SUBVECTOR) {
  8691. SDLoc dl(N);
  8692. // Handle only simple case where vector being inserted and vector
  8693. // being extracted are of same type, and are half size of larger vectors.
  8694. EVT BigVT = V->getOperand(0).getValueType();
  8695. EVT SmallVT = V->getOperand(1).getValueType();
  8696. if (!NVT.bitsEq(SmallVT) || NVT.getSizeInBits()*2 != BigVT.getSizeInBits())
  8697. return SDValue();
  8698. // Only handle cases where both indexes are constants with the same type.
  8699. ConstantSDNode *ExtIdx = dyn_cast<ConstantSDNode>(N->getOperand(1));
  8700. ConstantSDNode *InsIdx = dyn_cast<ConstantSDNode>(V->getOperand(2));
  8701. if (InsIdx && ExtIdx &&
  8702. InsIdx->getValueType(0).getSizeInBits() <= 64 &&
  8703. ExtIdx->getValueType(0).getSizeInBits() <= 64) {
  8704. // Combine:
  8705. // (extract_subvec (insert_subvec V1, V2, InsIdx), ExtIdx)
  8706. // Into:
  8707. // indices are equal or bit offsets are equal => V1
  8708. // otherwise => (extract_subvec V1, ExtIdx)
  8709. if (InsIdx->getZExtValue() * SmallVT.getScalarType().getSizeInBits() ==
  8710. ExtIdx->getZExtValue() * NVT.getScalarType().getSizeInBits())
  8711. return DAG.getNode(ISD::BITCAST, dl, NVT, V->getOperand(1));
  8712. return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, NVT,
  8713. DAG.getNode(ISD::BITCAST, dl,
  8714. N->getOperand(0).getValueType(),
  8715. V->getOperand(0)), N->getOperand(1));
  8716. }
  8717. }
  8718. return SDValue();
  8719. }
  8720. // Tries to turn a shuffle of two CONCAT_VECTORS into a single concat.
  8721. static SDValue partitionShuffleOfConcats(SDNode *N, SelectionDAG &DAG) {
  8722. EVT VT = N->getValueType(0);
  8723. unsigned NumElts = VT.getVectorNumElements();
  8724. SDValue N0 = N->getOperand(0);
  8725. SDValue N1 = N->getOperand(1);
  8726. ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(N);
  8727. SmallVector<SDValue, 4> Ops;
  8728. EVT ConcatVT = N0.getOperand(0).getValueType();
  8729. unsigned NumElemsPerConcat = ConcatVT.getVectorNumElements();
  8730. unsigned NumConcats = NumElts / NumElemsPerConcat;
  8731. // Look at every vector that's inserted. We're looking for exact
  8732. // subvector-sized copies from a concatenated vector
  8733. for (unsigned I = 0; I != NumConcats; ++I) {
  8734. // Make sure we're dealing with a copy.
  8735. unsigned Begin = I * NumElemsPerConcat;
  8736. bool AllUndef = true, NoUndef = true;
  8737. for (unsigned J = Begin; J != Begin + NumElemsPerConcat; ++J) {
  8738. if (SVN->getMaskElt(J) >= 0)
  8739. AllUndef = false;
  8740. else
  8741. NoUndef = false;
  8742. }
  8743. if (NoUndef) {
  8744. if (SVN->getMaskElt(Begin) % NumElemsPerConcat != 0)
  8745. return SDValue();
  8746. for (unsigned J = 1; J != NumElemsPerConcat; ++J)
  8747. if (SVN->getMaskElt(Begin + J - 1) + 1 != SVN->getMaskElt(Begin + J))
  8748. return SDValue();
  8749. unsigned FirstElt = SVN->getMaskElt(Begin) / NumElemsPerConcat;
  8750. if (FirstElt < N0.getNumOperands())
  8751. Ops.push_back(N0.getOperand(FirstElt));
  8752. else
  8753. Ops.push_back(N1.getOperand(FirstElt - N0.getNumOperands()));
  8754. } else if (AllUndef) {
  8755. Ops.push_back(DAG.getUNDEF(N0.getOperand(0).getValueType()));
  8756. } else { // Mixed with general masks and undefs, can't do optimization.
  8757. return SDValue();
  8758. }
  8759. }
  8760. return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT, Ops.data(),
  8761. Ops.size());
  8762. }
  8763. SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) {
  8764. EVT VT = N->getValueType(0);
  8765. unsigned NumElts = VT.getVectorNumElements();
  8766. SDValue N0 = N->getOperand(0);
  8767. SDValue N1 = N->getOperand(1);
  8768. assert(N0.getValueType() == VT && "Vector shuffle must be normalized in DAG");
  8769. // Canonicalize shuffle undef, undef -> undef
  8770. if (N0.getOpcode() == ISD::UNDEF && N1.getOpcode() == ISD::UNDEF)
  8771. return DAG.getUNDEF(VT);
  8772. ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(N);
  8773. // Canonicalize shuffle v, v -> v, undef
  8774. if (N0 == N1) {
  8775. SmallVector<int, 8> NewMask;
  8776. for (unsigned i = 0; i != NumElts; ++i) {
  8777. int Idx = SVN->getMaskElt(i);
  8778. if (Idx >= (int)NumElts) Idx -= NumElts;
  8779. NewMask.push_back(Idx);
  8780. }
  8781. return DAG.getVectorShuffle(VT, SDLoc(N), N0, DAG.getUNDEF(VT),
  8782. &NewMask[0]);
  8783. }
  8784. // Canonicalize shuffle undef, v -> v, undef. Commute the shuffle mask.
  8785. if (N0.getOpcode() == ISD::UNDEF) {
  8786. SmallVector<int, 8> NewMask;
  8787. for (unsigned i = 0; i != NumElts; ++i) {
  8788. int Idx = SVN->getMaskElt(i);
  8789. if (Idx >= 0) {
  8790. if (Idx >= (int)NumElts)
  8791. Idx -= NumElts;
  8792. else
  8793. Idx = -1; // remove reference to lhs
  8794. }
  8795. NewMask.push_back(Idx);
  8796. }
  8797. return DAG.getVectorShuffle(VT, SDLoc(N), N1, DAG.getUNDEF(VT),
  8798. &NewMask[0]);
  8799. }
  8800. // Remove references to rhs if it is undef
  8801. if (N1.getOpcode() == ISD::UNDEF) {
  8802. bool Changed = false;
  8803. SmallVector<int, 8> NewMask;
  8804. for (unsigned i = 0; i != NumElts; ++i) {
  8805. int Idx = SVN->getMaskElt(i);
  8806. if (Idx >= (int)NumElts) {
  8807. Idx = -1;
  8808. Changed = true;
  8809. }
  8810. NewMask.push_back(Idx);
  8811. }
  8812. if (Changed)
  8813. return DAG.getVectorShuffle(VT, SDLoc(N), N0, N1, &NewMask[0]);
  8814. }
  8815. // If it is a splat, check if the argument vector is another splat or a
  8816. // build_vector with all scalar elements the same.
  8817. if (SVN->isSplat() && SVN->getSplatIndex() < (int)NumElts) {
  8818. SDNode *V = N0.getNode();
  8819. // If this is a bit convert that changes the element type of the vector but
  8820. // not the number of vector elements, look through it. Be careful not to
  8821. // look though conversions that change things like v4f32 to v2f64.
  8822. if (V->getOpcode() == ISD::BITCAST) {
  8823. SDValue ConvInput = V->getOperand(0);
  8824. if (ConvInput.getValueType().isVector() &&
  8825. ConvInput.getValueType().getVectorNumElements() == NumElts)
  8826. V = ConvInput.getNode();
  8827. }
  8828. if (V->getOpcode() == ISD::BUILD_VECTOR) {
  8829. assert(V->getNumOperands() == NumElts &&
  8830. "BUILD_VECTOR has wrong number of operands");
  8831. SDValue Base;
  8832. bool AllSame = true;
  8833. for (unsigned i = 0; i != NumElts; ++i) {
  8834. if (V->getOperand(i).getOpcode() != ISD::UNDEF) {
  8835. Base = V->getOperand(i);
  8836. break;
  8837. }
  8838. }
  8839. // Splat of <u, u, u, u>, return <u, u, u, u>
  8840. if (!Base.getNode())
  8841. return N0;
  8842. for (unsigned i = 0; i != NumElts; ++i) {
  8843. if (V->getOperand(i) != Base) {
  8844. AllSame = false;
  8845. break;
  8846. }
  8847. }
  8848. // Splat of <x, x, x, x>, return <x, x, x, x>
  8849. if (AllSame)
  8850. return N0;
  8851. }
  8852. }
  8853. if (N0.getOpcode() == ISD::CONCAT_VECTORS &&
  8854. Level < AfterLegalizeVectorOps &&
  8855. (N1.getOpcode() == ISD::UNDEF ||
  8856. (N1.getOpcode() == ISD::CONCAT_VECTORS &&
  8857. N0.getOperand(0).getValueType() == N1.getOperand(0).getValueType()))) {
  8858. SDValue V = partitionShuffleOfConcats(N, DAG);
  8859. if (V.getNode())
  8860. return V;
  8861. }
  8862. // If this shuffle node is simply a swizzle of another shuffle node,
  8863. // and it reverses the swizzle of the previous shuffle then we can
  8864. // optimize shuffle(shuffle(x, undef), undef) -> x.
  8865. if (N0.getOpcode() == ISD::VECTOR_SHUFFLE && Level < AfterLegalizeDAG &&
  8866. N1.getOpcode() == ISD::UNDEF) {
  8867. ShuffleVectorSDNode *OtherSV = cast<ShuffleVectorSDNode>(N0);
  8868. // Shuffle nodes can only reverse shuffles with a single non-undef value.
  8869. if (N0.getOperand(1).getOpcode() != ISD::UNDEF)
  8870. return SDValue();
  8871. // The incoming shuffle must be of the same type as the result of the
  8872. // current shuffle.
  8873. assert(OtherSV->getOperand(0).getValueType() == VT &&
  8874. "Shuffle types don't match");
  8875. for (unsigned i = 0; i != NumElts; ++i) {
  8876. int Idx = SVN->getMaskElt(i);
  8877. assert(Idx < (int)NumElts && "Index references undef operand");
  8878. // Next, this index comes from the first value, which is the incoming
  8879. // shuffle. Adopt the incoming index.
  8880. if (Idx >= 0)
  8881. Idx = OtherSV->getMaskElt(Idx);
  8882. // The combined shuffle must map each index to itself.
  8883. if (Idx >= 0 && (unsigned)Idx != i)
  8884. return SDValue();
  8885. }
  8886. return OtherSV->getOperand(0);
  8887. }
  8888. return SDValue();
  8889. }
  8890. /// XformToShuffleWithZero - Returns a vector_shuffle if it able to transform
  8891. /// an AND to a vector_shuffle with the destination vector and a zero vector.
  8892. /// e.g. AND V, <0xffffffff, 0, 0xffffffff, 0>. ==>
  8893. /// vector_shuffle V, Zero, <0, 4, 2, 4>
  8894. SDValue DAGCombiner::XformToShuffleWithZero(SDNode *N) {
  8895. EVT VT = N->getValueType(0);
  8896. SDLoc dl(N);
  8897. SDValue LHS = N->getOperand(0);
  8898. SDValue RHS = N->getOperand(1);
  8899. if (N->getOpcode() == ISD::AND) {
  8900. if (RHS.getOpcode() == ISD::BITCAST)
  8901. RHS = RHS.getOperand(0);
  8902. if (RHS.getOpcode() == ISD::BUILD_VECTOR) {
  8903. SmallVector<int, 8> Indices;
  8904. unsigned NumElts = RHS.getNumOperands();
  8905. for (unsigned i = 0; i != NumElts; ++i) {
  8906. SDValue Elt = RHS.getOperand(i);
  8907. if (!isa<ConstantSDNode>(Elt))
  8908. return SDValue();
  8909. if (cast<ConstantSDNode>(Elt)->isAllOnesValue())
  8910. Indices.push_back(i);
  8911. else if (cast<ConstantSDNode>(Elt)->isNullValue())
  8912. Indices.push_back(NumElts);
  8913. else
  8914. return SDValue();
  8915. }
  8916. // Let's see if the target supports this vector_shuffle.
  8917. EVT RVT = RHS.getValueType();
  8918. if (!TLI.isVectorClearMaskLegal(Indices, RVT))
  8919. return SDValue();
  8920. // Return the new VECTOR_SHUFFLE node.
  8921. EVT EltVT = RVT.getVectorElementType();
  8922. SmallVector<SDValue,8> ZeroOps(RVT.getVectorNumElements(),
  8923. DAG.getConstant(0, EltVT));
  8924. SDValue Zero = DAG.getNode(ISD::BUILD_VECTOR, SDLoc(N),
  8925. RVT, &ZeroOps[0], ZeroOps.size());
  8926. LHS = DAG.getNode(ISD::BITCAST, dl, RVT, LHS);
  8927. SDValue Shuf = DAG.getVectorShuffle(RVT, dl, LHS, Zero, &Indices[0]);
  8928. return DAG.getNode(ISD::BITCAST, dl, VT, Shuf);
  8929. }
  8930. }
  8931. return SDValue();
  8932. }
  8933. /// SimplifyVBinOp - Visit a binary vector operation, like ADD.
  8934. SDValue DAGCombiner::SimplifyVBinOp(SDNode *N) {
  8935. assert(N->getValueType(0).isVector() &&
  8936. "SimplifyVBinOp only works on vectors!");
  8937. SDValue LHS = N->getOperand(0);
  8938. SDValue RHS = N->getOperand(1);
  8939. SDValue Shuffle = XformToShuffleWithZero(N);
  8940. if (Shuffle.getNode()) return Shuffle;
  8941. // If the LHS and RHS are BUILD_VECTOR nodes, see if we can constant fold
  8942. // this operation.
  8943. if (LHS.getOpcode() == ISD::BUILD_VECTOR &&
  8944. RHS.getOpcode() == ISD::BUILD_VECTOR) {
  8945. SmallVector<SDValue, 8> Ops;
  8946. for (unsigned i = 0, e = LHS.getNumOperands(); i != e; ++i) {
  8947. SDValue LHSOp = LHS.getOperand(i);
  8948. SDValue RHSOp = RHS.getOperand(i);
  8949. // If these two elements can't be folded, bail out.
  8950. if ((LHSOp.getOpcode() != ISD::UNDEF &&
  8951. LHSOp.getOpcode() != ISD::Constant &&
  8952. LHSOp.getOpcode() != ISD::ConstantFP) ||
  8953. (RHSOp.getOpcode() != ISD::UNDEF &&
  8954. RHSOp.getOpcode() != ISD::Constant &&
  8955. RHSOp.getOpcode() != ISD::ConstantFP))
  8956. break;
  8957. // Can't fold divide by zero.
  8958. if (N->getOpcode() == ISD::SDIV || N->getOpcode() == ISD::UDIV ||
  8959. N->getOpcode() == ISD::FDIV) {
  8960. if ((RHSOp.getOpcode() == ISD::Constant &&
  8961. cast<ConstantSDNode>(RHSOp.getNode())->isNullValue()) ||
  8962. (RHSOp.getOpcode() == ISD::ConstantFP &&
  8963. cast<ConstantFPSDNode>(RHSOp.getNode())->getValueAPF().isZero()))
  8964. break;
  8965. }
  8966. EVT VT = LHSOp.getValueType();
  8967. EVT RVT = RHSOp.getValueType();
  8968. if (RVT != VT) {
  8969. // Integer BUILD_VECTOR operands may have types larger than the element
  8970. // size (e.g., when the element type is not legal). Prior to type
  8971. // legalization, the types may not match between the two BUILD_VECTORS.
  8972. // Truncate one of the operands to make them match.
  8973. if (RVT.getSizeInBits() > VT.getSizeInBits()) {
  8974. RHSOp = DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, RHSOp);
  8975. } else {
  8976. LHSOp = DAG.getNode(ISD::TRUNCATE, SDLoc(N), RVT, LHSOp);
  8977. VT = RVT;
  8978. }
  8979. }
  8980. SDValue FoldOp = DAG.getNode(N->getOpcode(), SDLoc(LHS), VT,
  8981. LHSOp, RHSOp);
  8982. if (FoldOp.getOpcode() != ISD::UNDEF &&
  8983. FoldOp.getOpcode() != ISD::Constant &&
  8984. FoldOp.getOpcode() != ISD::ConstantFP)
  8985. break;
  8986. Ops.push_back(FoldOp);
  8987. AddToWorkList(FoldOp.getNode());
  8988. }
  8989. if (Ops.size() == LHS.getNumOperands())
  8990. return DAG.getNode(ISD::BUILD_VECTOR, SDLoc(N),
  8991. LHS.getValueType(), &Ops[0], Ops.size());
  8992. }
  8993. return SDValue();
  8994. }
  8995. /// SimplifyVUnaryOp - Visit a binary vector operation, like FABS/FNEG.
  8996. SDValue DAGCombiner::SimplifyVUnaryOp(SDNode *N) {
  8997. assert(N->getValueType(0).isVector() &&
  8998. "SimplifyVUnaryOp only works on vectors!");
  8999. SDValue N0 = N->getOperand(0);
  9000. if (N0.getOpcode() != ISD::BUILD_VECTOR)
  9001. return SDValue();
  9002. // Operand is a BUILD_VECTOR node, see if we can constant fold it.
  9003. SmallVector<SDValue, 8> Ops;
  9004. for (unsigned i = 0, e = N0.getNumOperands(); i != e; ++i) {
  9005. SDValue Op = N0.getOperand(i);
  9006. if (Op.getOpcode() != ISD::UNDEF &&
  9007. Op.getOpcode() != ISD::ConstantFP)
  9008. break;
  9009. EVT EltVT = Op.getValueType();
  9010. SDValue FoldOp = DAG.getNode(N->getOpcode(), SDLoc(N0), EltVT, Op);
  9011. if (FoldOp.getOpcode() != ISD::UNDEF &&
  9012. FoldOp.getOpcode() != ISD::ConstantFP)
  9013. break;
  9014. Ops.push_back(FoldOp);
  9015. AddToWorkList(FoldOp.getNode());
  9016. }
  9017. if (Ops.size() != N0.getNumOperands())
  9018. return SDValue();
  9019. return DAG.getNode(ISD::BUILD_VECTOR, SDLoc(N),
  9020. N0.getValueType(), &Ops[0], Ops.size());
  9021. }
  9022. SDValue DAGCombiner::SimplifySelect(SDLoc DL, SDValue N0,
  9023. SDValue N1, SDValue N2){
  9024. assert(N0.getOpcode() ==ISD::SETCC && "First argument must be a SetCC node!");
  9025. SDValue SCC = SimplifySelectCC(DL, N0.getOperand(0), N0.getOperand(1), N1, N2,
  9026. cast<CondCodeSDNode>(N0.getOperand(2))->get());
  9027. // If we got a simplified select_cc node back from SimplifySelectCC, then
  9028. // break it down into a new SETCC node, and a new SELECT node, and then return
  9029. // the SELECT node, since we were called with a SELECT node.
  9030. if (SCC.getNode()) {
  9031. // Check to see if we got a select_cc back (to turn into setcc/select).
  9032. // Otherwise, just return whatever node we got back, like fabs.
  9033. if (SCC.getOpcode() == ISD::SELECT_CC) {
  9034. SDValue SETCC = DAG.getNode(ISD::SETCC, SDLoc(N0),
  9035. N0.getValueType(),
  9036. SCC.getOperand(0), SCC.getOperand(1),
  9037. SCC.getOperand(4));
  9038. AddToWorkList(SETCC.getNode());
  9039. return DAG.getSelect(SDLoc(SCC), SCC.getValueType(),
  9040. SCC.getOperand(2), SCC.getOperand(3), SETCC);
  9041. }
  9042. return SCC;
  9043. }
  9044. return SDValue();
  9045. }
  9046. /// SimplifySelectOps - Given a SELECT or a SELECT_CC node, where LHS and RHS
  9047. /// are the two values being selected between, see if we can simplify the
  9048. /// select. Callers of this should assume that TheSelect is deleted if this
  9049. /// returns true. As such, they should return the appropriate thing (e.g. the
  9050. /// node) back to the top-level of the DAG combiner loop to avoid it being
  9051. /// looked at.
  9052. bool DAGCombiner::SimplifySelectOps(SDNode *TheSelect, SDValue LHS,
  9053. SDValue RHS) {
  9054. // Cannot simplify select with vector condition
  9055. if (TheSelect->getOperand(0).getValueType().isVector()) return false;
  9056. // If this is a select from two identical things, try to pull the operation
  9057. // through the select.
  9058. if (LHS.getOpcode() != RHS.getOpcode() ||
  9059. !LHS.hasOneUse() || !RHS.hasOneUse())
  9060. return false;
  9061. // If this is a load and the token chain is identical, replace the select
  9062. // of two loads with a load through a select of the address to load from.
  9063. // This triggers in things like "select bool X, 10.0, 123.0" after the FP
  9064. // constants have been dropped into the constant pool.
  9065. if (LHS.getOpcode() == ISD::LOAD) {
  9066. LoadSDNode *LLD = cast<LoadSDNode>(LHS);
  9067. LoadSDNode *RLD = cast<LoadSDNode>(RHS);
  9068. // Token chains must be identical.
  9069. if (LHS.getOperand(0) != RHS.getOperand(0) ||
  9070. // Do not let this transformation reduce the number of volatile loads.
  9071. LLD->isVolatile() || RLD->isVolatile() ||
  9072. // If this is an EXTLOAD, the VT's must match.
  9073. LLD->getMemoryVT() != RLD->getMemoryVT() ||
  9074. // If this is an EXTLOAD, the kind of extension must match.
  9075. (LLD->getExtensionType() != RLD->getExtensionType() &&
  9076. // The only exception is if one of the extensions is anyext.
  9077. LLD->getExtensionType() != ISD::EXTLOAD &&
  9078. RLD->getExtensionType() != ISD::EXTLOAD) ||
  9079. // FIXME: this discards src value information. This is
  9080. // over-conservative. It would be beneficial to be able to remember
  9081. // both potential memory locations. Since we are discarding
  9082. // src value info, don't do the transformation if the memory
  9083. // locations are not in the default address space.
  9084. LLD->getPointerInfo().getAddrSpace() != 0 ||
  9085. RLD->getPointerInfo().getAddrSpace() != 0 ||
  9086. !TLI.isOperationLegalOrCustom(TheSelect->getOpcode(),
  9087. LLD->getBasePtr().getValueType()))
  9088. return false;
  9089. // Check that the select condition doesn't reach either load. If so,
  9090. // folding this will induce a cycle into the DAG. If not, this is safe to
  9091. // xform, so create a select of the addresses.
  9092. SDValue Addr;
  9093. if (TheSelect->getOpcode() == ISD::SELECT) {
  9094. SDNode *CondNode = TheSelect->getOperand(0).getNode();
  9095. if ((LLD->hasAnyUseOfValue(1) && LLD->isPredecessorOf(CondNode)) ||
  9096. (RLD->hasAnyUseOfValue(1) && RLD->isPredecessorOf(CondNode)))
  9097. return false;
  9098. // The loads must not depend on one another.
  9099. if (LLD->isPredecessorOf(RLD) ||
  9100. RLD->isPredecessorOf(LLD))
  9101. return false;
  9102. Addr = DAG.getSelect(SDLoc(TheSelect),
  9103. LLD->getBasePtr().getValueType(),
  9104. TheSelect->getOperand(0), LLD->getBasePtr(),
  9105. RLD->getBasePtr());
  9106. } else { // Otherwise SELECT_CC
  9107. SDNode *CondLHS = TheSelect->getOperand(0).getNode();
  9108. SDNode *CondRHS = TheSelect->getOperand(1).getNode();
  9109. if ((LLD->hasAnyUseOfValue(1) &&
  9110. (LLD->isPredecessorOf(CondLHS) || LLD->isPredecessorOf(CondRHS))) ||
  9111. (RLD->hasAnyUseOfValue(1) &&
  9112. (RLD->isPredecessorOf(CondLHS) || RLD->isPredecessorOf(CondRHS))))
  9113. return false;
  9114. Addr = DAG.getNode(ISD::SELECT_CC, SDLoc(TheSelect),
  9115. LLD->getBasePtr().getValueType(),
  9116. TheSelect->getOperand(0),
  9117. TheSelect->getOperand(1),
  9118. LLD->getBasePtr(), RLD->getBasePtr(),
  9119. TheSelect->getOperand(4));
  9120. }
  9121. SDValue Load;
  9122. if (LLD->getExtensionType() == ISD::NON_EXTLOAD) {
  9123. Load = DAG.getLoad(TheSelect->getValueType(0),
  9124. SDLoc(TheSelect),
  9125. // FIXME: Discards pointer and TBAA info.
  9126. LLD->getChain(), Addr, MachinePointerInfo(),
  9127. LLD->isVolatile(), LLD->isNonTemporal(),
  9128. LLD->isInvariant(), LLD->getAlignment());
  9129. } else {
  9130. Load = DAG.getExtLoad(LLD->getExtensionType() == ISD::EXTLOAD ?
  9131. RLD->getExtensionType() : LLD->getExtensionType(),
  9132. SDLoc(TheSelect),
  9133. TheSelect->getValueType(0),
  9134. // FIXME: Discards pointer and TBAA info.
  9135. LLD->getChain(), Addr, MachinePointerInfo(),
  9136. LLD->getMemoryVT(), LLD->isVolatile(),
  9137. LLD->isNonTemporal(), LLD->getAlignment());
  9138. }
  9139. // Users of the select now use the result of the load.
  9140. CombineTo(TheSelect, Load);
  9141. // Users of the old loads now use the new load's chain. We know the
  9142. // old-load value is dead now.
  9143. CombineTo(LHS.getNode(), Load.getValue(0), Load.getValue(1));
  9144. CombineTo(RHS.getNode(), Load.getValue(0), Load.getValue(1));
  9145. return true;
  9146. }
  9147. return false;
  9148. }
  9149. /// SimplifySelectCC - Simplify an expression of the form (N0 cond N1) ? N2 : N3
  9150. /// where 'cond' is the comparison specified by CC.
  9151. SDValue DAGCombiner::SimplifySelectCC(SDLoc DL, SDValue N0, SDValue N1,
  9152. SDValue N2, SDValue N3,
  9153. ISD::CondCode CC, bool NotExtCompare) {
  9154. // (x ? y : y) -> y.
  9155. if (N2 == N3) return N2;
  9156. EVT VT = N2.getValueType();
  9157. ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1.getNode());
  9158. ConstantSDNode *N2C = dyn_cast<ConstantSDNode>(N2.getNode());
  9159. ConstantSDNode *N3C = dyn_cast<ConstantSDNode>(N3.getNode());
  9160. // Determine if the condition we're dealing with is constant
  9161. SDValue SCC = SimplifySetCC(getSetCCResultType(N0.getValueType()),
  9162. N0, N1, CC, DL, false);
  9163. if (SCC.getNode()) AddToWorkList(SCC.getNode());
  9164. ConstantSDNode *SCCC = dyn_cast_or_null<ConstantSDNode>(SCC.getNode());
  9165. // fold select_cc true, x, y -> x
  9166. if (SCCC && !SCCC->isNullValue())
  9167. return N2;
  9168. // fold select_cc false, x, y -> y
  9169. if (SCCC && SCCC->isNullValue())
  9170. return N3;
  9171. // Check to see if we can simplify the select into an fabs node
  9172. if (ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(N1)) {
  9173. // Allow either -0.0 or 0.0
  9174. if (CFP->getValueAPF().isZero()) {
  9175. // select (setg[te] X, +/-0.0), X, fneg(X) -> fabs
  9176. if ((CC == ISD::SETGE || CC == ISD::SETGT) &&
  9177. N0 == N2 && N3.getOpcode() == ISD::FNEG &&
  9178. N2 == N3.getOperand(0))
  9179. return DAG.getNode(ISD::FABS, DL, VT, N0);
  9180. // select (setl[te] X, +/-0.0), fneg(X), X -> fabs
  9181. if ((CC == ISD::SETLT || CC == ISD::SETLE) &&
  9182. N0 == N3 && N2.getOpcode() == ISD::FNEG &&
  9183. N2.getOperand(0) == N3)
  9184. return DAG.getNode(ISD::FABS, DL, VT, N3);
  9185. }
  9186. }
  9187. // Turn "(a cond b) ? 1.0f : 2.0f" into "load (tmp + ((a cond b) ? 0 : 4)"
  9188. // where "tmp" is a constant pool entry containing an array with 1.0 and 2.0
  9189. // in it. This is a win when the constant is not otherwise available because
  9190. // it replaces two constant pool loads with one. We only do this if the FP
  9191. // type is known to be legal, because if it isn't, then we are before legalize
  9192. // types an we want the other legalization to happen first (e.g. to avoid
  9193. // messing with soft float) and if the ConstantFP is not legal, because if
  9194. // it is legal, we may not need to store the FP constant in a constant pool.
  9195. if (ConstantFPSDNode *TV = dyn_cast<ConstantFPSDNode>(N2))
  9196. if (ConstantFPSDNode *FV = dyn_cast<ConstantFPSDNode>(N3)) {
  9197. if (TLI.isTypeLegal(N2.getValueType()) &&
  9198. (TLI.getOperationAction(ISD::ConstantFP, N2.getValueType()) !=
  9199. TargetLowering::Legal) &&
  9200. // If both constants have multiple uses, then we won't need to do an
  9201. // extra load, they are likely around in registers for other users.
  9202. (TV->hasOneUse() || FV->hasOneUse())) {
  9203. Constant *Elts[] = {
  9204. const_cast<ConstantFP*>(FV->getConstantFPValue()),
  9205. const_cast<ConstantFP*>(TV->getConstantFPValue())
  9206. };
  9207. Type *FPTy = Elts[0]->getType();
  9208. const DataLayout &TD = *TLI.getDataLayout();
  9209. // Create a ConstantArray of the two constants.
  9210. Constant *CA = ConstantArray::get(ArrayType::get(FPTy, 2), Elts);
  9211. SDValue CPIdx = DAG.getConstantPool(CA, TLI.getPointerTy(),
  9212. TD.getPrefTypeAlignment(FPTy));
  9213. unsigned Alignment = cast<ConstantPoolSDNode>(CPIdx)->getAlignment();
  9214. // Get the offsets to the 0 and 1 element of the array so that we can
  9215. // select between them.
  9216. SDValue Zero = DAG.getIntPtrConstant(0);
  9217. unsigned EltSize = (unsigned)TD.getTypeAllocSize(Elts[0]->getType());
  9218. SDValue One = DAG.getIntPtrConstant(EltSize);
  9219. SDValue Cond = DAG.getSetCC(DL,
  9220. getSetCCResultType(N0.getValueType()),
  9221. N0, N1, CC);
  9222. AddToWorkList(Cond.getNode());
  9223. SDValue CstOffset = DAG.getSelect(DL, Zero.getValueType(),
  9224. Cond, One, Zero);
  9225. AddToWorkList(CstOffset.getNode());
  9226. CPIdx = DAG.getNode(ISD::ADD, DL, CPIdx.getValueType(), CPIdx,
  9227. CstOffset);
  9228. AddToWorkList(CPIdx.getNode());
  9229. return DAG.getLoad(TV->getValueType(0), DL, DAG.getEntryNode(), CPIdx,
  9230. MachinePointerInfo::getConstantPool(), false,
  9231. false, false, Alignment);
  9232. }
  9233. }
  9234. // Check to see if we can perform the "gzip trick", transforming
  9235. // (select_cc setlt X, 0, A, 0) -> (and (sra X, (sub size(X), 1), A)
  9236. if (N1C && N3C && N3C->isNullValue() && CC == ISD::SETLT &&
  9237. (N1C->isNullValue() || // (a < 0) ? b : 0
  9238. (N1C->getAPIntValue() == 1 && N0 == N2))) { // (a < 1) ? a : 0
  9239. EVT XType = N0.getValueType();
  9240. EVT AType = N2.getValueType();
  9241. if (XType.bitsGE(AType)) {
  9242. // and (sra X, size(X)-1, A) -> "and (srl X, C2), A" iff A is a
  9243. // single-bit constant.
  9244. if (N2C && ((N2C->getAPIntValue() & (N2C->getAPIntValue()-1)) == 0)) {
  9245. unsigned ShCtV = N2C->getAPIntValue().logBase2();
  9246. ShCtV = XType.getSizeInBits()-ShCtV-1;
  9247. SDValue ShCt = DAG.getConstant(ShCtV,
  9248. getShiftAmountTy(N0.getValueType()));
  9249. SDValue Shift = DAG.getNode(ISD::SRL, SDLoc(N0),
  9250. XType, N0, ShCt);
  9251. AddToWorkList(Shift.getNode());
  9252. if (XType.bitsGT(AType)) {
  9253. Shift = DAG.getNode(ISD::TRUNCATE, DL, AType, Shift);
  9254. AddToWorkList(Shift.getNode());
  9255. }
  9256. return DAG.getNode(ISD::AND, DL, AType, Shift, N2);
  9257. }
  9258. SDValue Shift = DAG.getNode(ISD::SRA, SDLoc(N0),
  9259. XType, N0,
  9260. DAG.getConstant(XType.getSizeInBits()-1,
  9261. getShiftAmountTy(N0.getValueType())));
  9262. AddToWorkList(Shift.getNode());
  9263. if (XType.bitsGT(AType)) {
  9264. Shift = DAG.getNode(ISD::TRUNCATE, DL, AType, Shift);
  9265. AddToWorkList(Shift.getNode());
  9266. }
  9267. return DAG.getNode(ISD::AND, DL, AType, Shift, N2);
  9268. }
  9269. }
  9270. // fold (select_cc seteq (and x, y), 0, 0, A) -> (and (shr (shl x)) A)
  9271. // where y is has a single bit set.
  9272. // A plaintext description would be, we can turn the SELECT_CC into an AND
  9273. // when the condition can be materialized as an all-ones register. Any
  9274. // single bit-test can be materialized as an all-ones register with
  9275. // shift-left and shift-right-arith.
  9276. if (CC == ISD::SETEQ && N0->getOpcode() == ISD::AND &&
  9277. N0->getValueType(0) == VT &&
  9278. N1C && N1C->isNullValue() &&
  9279. N2C && N2C->isNullValue()) {
  9280. SDValue AndLHS = N0->getOperand(0);
  9281. ConstantSDNode *ConstAndRHS = dyn_cast<ConstantSDNode>(N0->getOperand(1));
  9282. if (ConstAndRHS && ConstAndRHS->getAPIntValue().countPopulation() == 1) {
  9283. // Shift the tested bit over the sign bit.
  9284. APInt AndMask = ConstAndRHS->getAPIntValue();
  9285. SDValue ShlAmt =
  9286. DAG.getConstant(AndMask.countLeadingZeros(),
  9287. getShiftAmountTy(AndLHS.getValueType()));
  9288. SDValue Shl = DAG.getNode(ISD::SHL, SDLoc(N0), VT, AndLHS, ShlAmt);
  9289. // Now arithmetic right shift it all the way over, so the result is either
  9290. // all-ones, or zero.
  9291. SDValue ShrAmt =
  9292. DAG.getConstant(AndMask.getBitWidth()-1,
  9293. getShiftAmountTy(Shl.getValueType()));
  9294. SDValue Shr = DAG.getNode(ISD::SRA, SDLoc(N0), VT, Shl, ShrAmt);
  9295. return DAG.getNode(ISD::AND, DL, VT, Shr, N3);
  9296. }
  9297. }
  9298. // fold select C, 16, 0 -> shl C, 4
  9299. if (N2C && N3C && N3C->isNullValue() && N2C->getAPIntValue().isPowerOf2() &&
  9300. TLI.getBooleanContents(N0.getValueType().isVector()) ==
  9301. TargetLowering::ZeroOrOneBooleanContent) {
  9302. // If the caller doesn't want us to simplify this into a zext of a compare,
  9303. // don't do it.
  9304. if (NotExtCompare && N2C->getAPIntValue() == 1)
  9305. return SDValue();
  9306. // Get a SetCC of the condition
  9307. // NOTE: Don't create a SETCC if it's not legal on this target.
  9308. if (!LegalOperations ||
  9309. TLI.isOperationLegal(ISD::SETCC,
  9310. LegalTypes ? getSetCCResultType(N0.getValueType()) : MVT::i1)) {
  9311. SDValue Temp, SCC;
  9312. // cast from setcc result type to select result type
  9313. if (LegalTypes) {
  9314. SCC = DAG.getSetCC(DL, getSetCCResultType(N0.getValueType()),
  9315. N0, N1, CC);
  9316. if (N2.getValueType().bitsLT(SCC.getValueType()))
  9317. Temp = DAG.getZeroExtendInReg(SCC, SDLoc(N2),
  9318. N2.getValueType());
  9319. else
  9320. Temp = DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N2),
  9321. N2.getValueType(), SCC);
  9322. } else {
  9323. SCC = DAG.getSetCC(SDLoc(N0), MVT::i1, N0, N1, CC);
  9324. Temp = DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N2),
  9325. N2.getValueType(), SCC);
  9326. }
  9327. AddToWorkList(SCC.getNode());
  9328. AddToWorkList(Temp.getNode());
  9329. if (N2C->getAPIntValue() == 1)
  9330. return Temp;
  9331. // shl setcc result by log2 n2c
  9332. return DAG.getNode(
  9333. ISD::SHL, DL, N2.getValueType(), Temp,
  9334. DAG.getConstant(N2C->getAPIntValue().logBase2(),
  9335. getShiftAmountTy(Temp.getValueType())));
  9336. }
  9337. }
  9338. // Check to see if this is the equivalent of setcc
  9339. // FIXME: Turn all of these into setcc if setcc if setcc is legal
  9340. // otherwise, go ahead with the folds.
  9341. if (0 && N3C && N3C->isNullValue() && N2C && (N2C->getAPIntValue() == 1ULL)) {
  9342. EVT XType = N0.getValueType();
  9343. if (!LegalOperations ||
  9344. TLI.isOperationLegal(ISD::SETCC, getSetCCResultType(XType))) {
  9345. SDValue Res = DAG.getSetCC(DL, getSetCCResultType(XType), N0, N1, CC);
  9346. if (Res.getValueType() != VT)
  9347. Res = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Res);
  9348. return Res;
  9349. }
  9350. // fold (seteq X, 0) -> (srl (ctlz X, log2(size(X))))
  9351. if (N1C && N1C->isNullValue() && CC == ISD::SETEQ &&
  9352. (!LegalOperations ||
  9353. TLI.isOperationLegal(ISD::CTLZ, XType))) {
  9354. SDValue Ctlz = DAG.getNode(ISD::CTLZ, SDLoc(N0), XType, N0);
  9355. return DAG.getNode(ISD::SRL, DL, XType, Ctlz,
  9356. DAG.getConstant(Log2_32(XType.getSizeInBits()),
  9357. getShiftAmountTy(Ctlz.getValueType())));
  9358. }
  9359. // fold (setgt X, 0) -> (srl (and (-X, ~X), size(X)-1))
  9360. if (N1C && N1C->isNullValue() && CC == ISD::SETGT) {
  9361. SDValue NegN0 = DAG.getNode(ISD::SUB, SDLoc(N0),
  9362. XType, DAG.getConstant(0, XType), N0);
  9363. SDValue NotN0 = DAG.getNOT(SDLoc(N0), N0, XType);
  9364. return DAG.getNode(ISD::SRL, DL, XType,
  9365. DAG.getNode(ISD::AND, DL, XType, NegN0, NotN0),
  9366. DAG.getConstant(XType.getSizeInBits()-1,
  9367. getShiftAmountTy(XType)));
  9368. }
  9369. // fold (setgt X, -1) -> (xor (srl (X, size(X)-1), 1))
  9370. if (N1C && N1C->isAllOnesValue() && CC == ISD::SETGT) {
  9371. SDValue Sign = DAG.getNode(ISD::SRL, SDLoc(N0), XType, N0,
  9372. DAG.getConstant(XType.getSizeInBits()-1,
  9373. getShiftAmountTy(N0.getValueType())));
  9374. return DAG.getNode(ISD::XOR, DL, XType, Sign, DAG.getConstant(1, XType));
  9375. }
  9376. }
  9377. // Check to see if this is an integer abs.
  9378. // select_cc setg[te] X, 0, X, -X ->
  9379. // select_cc setgt X, -1, X, -X ->
  9380. // select_cc setl[te] X, 0, -X, X ->
  9381. // select_cc setlt X, 1, -X, X ->
  9382. // Y = sra (X, size(X)-1); xor (add (X, Y), Y)
  9383. if (N1C) {
  9384. ConstantSDNode *SubC = NULL;
  9385. if (((N1C->isNullValue() && (CC == ISD::SETGT || CC == ISD::SETGE)) ||
  9386. (N1C->isAllOnesValue() && CC == ISD::SETGT)) &&
  9387. N0 == N2 && N3.getOpcode() == ISD::SUB && N0 == N3.getOperand(1))
  9388. SubC = dyn_cast<ConstantSDNode>(N3.getOperand(0));
  9389. else if (((N1C->isNullValue() && (CC == ISD::SETLT || CC == ISD::SETLE)) ||
  9390. (N1C->isOne() && CC == ISD::SETLT)) &&
  9391. N0 == N3 && N2.getOpcode() == ISD::SUB && N0 == N2.getOperand(1))
  9392. SubC = dyn_cast<ConstantSDNode>(N2.getOperand(0));
  9393. EVT XType = N0.getValueType();
  9394. if (SubC && SubC->isNullValue() && XType.isInteger()) {
  9395. SDValue Shift = DAG.getNode(ISD::SRA, SDLoc(N0), XType,
  9396. N0,
  9397. DAG.getConstant(XType.getSizeInBits()-1,
  9398. getShiftAmountTy(N0.getValueType())));
  9399. SDValue Add = DAG.getNode(ISD::ADD, SDLoc(N0),
  9400. XType, N0, Shift);
  9401. AddToWorkList(Shift.getNode());
  9402. AddToWorkList(Add.getNode());
  9403. return DAG.getNode(ISD::XOR, DL, XType, Add, Shift);
  9404. }
  9405. }
  9406. return SDValue();
  9407. }
  9408. /// SimplifySetCC - This is a stub for TargetLowering::SimplifySetCC.
  9409. SDValue DAGCombiner::SimplifySetCC(EVT VT, SDValue N0,
  9410. SDValue N1, ISD::CondCode Cond,
  9411. SDLoc DL, bool foldBooleans) {
  9412. TargetLowering::DAGCombinerInfo
  9413. DagCombineInfo(DAG, Level, false, this);
  9414. return TLI.SimplifySetCC(VT, N0, N1, Cond, foldBooleans, DagCombineInfo, DL);
  9415. }
  9416. /// BuildSDIVSequence - Given an ISD::SDIV node expressing a divide by constant,
  9417. /// return a DAG expression to select that will generate the same value by
  9418. /// multiplying by a magic number. See:
  9419. /// <http://the.wall.riscom.net/books/proc/ppc/cwg/code2.html>
  9420. SDValue DAGCombiner::BuildSDIV(SDNode *N) {
  9421. std::vector<SDNode*> Built;
  9422. SDValue S = TLI.BuildSDIV(N, DAG, LegalOperations, &Built);
  9423. for (std::vector<SDNode*>::iterator ii = Built.begin(), ee = Built.end();
  9424. ii != ee; ++ii)
  9425. AddToWorkList(*ii);
  9426. return S;
  9427. }
  9428. /// BuildUDIVSequence - Given an ISD::UDIV node expressing a divide by constant,
  9429. /// return a DAG expression to select that will generate the same value by
  9430. /// multiplying by a magic number. See:
  9431. /// <http://the.wall.riscom.net/books/proc/ppc/cwg/code2.html>
  9432. SDValue DAGCombiner::BuildUDIV(SDNode *N) {
  9433. std::vector<SDNode*> Built;
  9434. SDValue S = TLI.BuildUDIV(N, DAG, LegalOperations, &Built);
  9435. for (std::vector<SDNode*>::iterator ii = Built.begin(), ee = Built.end();
  9436. ii != ee; ++ii)
  9437. AddToWorkList(*ii);
  9438. return S;
  9439. }
  9440. /// FindBaseOffset - Return true if base is a frame index, which is known not
  9441. // to alias with anything but itself. Provides base object and offset as
  9442. // results.
  9443. static bool FindBaseOffset(SDValue Ptr, SDValue &Base, int64_t &Offset,
  9444. const GlobalValue *&GV, const void *&CV) {
  9445. // Assume it is a primitive operation.
  9446. Base = Ptr; Offset = 0; GV = 0; CV = 0;
  9447. // If it's an adding a simple constant then integrate the offset.
  9448. if (Base.getOpcode() == ISD::ADD) {
  9449. if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Base.getOperand(1))) {
  9450. Base = Base.getOperand(0);
  9451. Offset += C->getZExtValue();
  9452. }
  9453. }
  9454. // Return the underlying GlobalValue, and update the Offset. Return false
  9455. // for GlobalAddressSDNode since the same GlobalAddress may be represented
  9456. // by multiple nodes with different offsets.
  9457. if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Base)) {
  9458. GV = G->getGlobal();
  9459. Offset += G->getOffset();
  9460. return false;
  9461. }
  9462. // Return the underlying Constant value, and update the Offset. Return false
  9463. // for ConstantSDNodes since the same constant pool entry may be represented
  9464. // by multiple nodes with different offsets.
  9465. if (ConstantPoolSDNode *C = dyn_cast<ConstantPoolSDNode>(Base)) {
  9466. CV = C->isMachineConstantPoolEntry() ? (const void *)C->getMachineCPVal()
  9467. : (const void *)C->getConstVal();
  9468. Offset += C->getOffset();
  9469. return false;
  9470. }
  9471. // If it's any of the following then it can't alias with anything but itself.
  9472. return isa<FrameIndexSDNode>(Base);
  9473. }
  9474. /// isAlias - Return true if there is any possibility that the two addresses
  9475. /// overlap.
  9476. bool DAGCombiner::isAlias(SDValue Ptr1, int64_t Size1, bool IsVolatile1,
  9477. const Value *SrcValue1, int SrcValueOffset1,
  9478. unsigned SrcValueAlign1,
  9479. const MDNode *TBAAInfo1,
  9480. SDValue Ptr2, int64_t Size2, bool IsVolatile2,
  9481. const Value *SrcValue2, int SrcValueOffset2,
  9482. unsigned SrcValueAlign2,
  9483. const MDNode *TBAAInfo2) const {
  9484. // If they are the same then they must be aliases.
  9485. if (Ptr1 == Ptr2) return true;
  9486. // If they are both volatile then they cannot be reordered.
  9487. if (IsVolatile1 && IsVolatile2) return true;
  9488. // Gather base node and offset information.
  9489. SDValue Base1, Base2;
  9490. int64_t Offset1, Offset2;
  9491. const GlobalValue *GV1, *GV2;
  9492. const void *CV1, *CV2;
  9493. bool isFrameIndex1 = FindBaseOffset(Ptr1, Base1, Offset1, GV1, CV1);
  9494. bool isFrameIndex2 = FindBaseOffset(Ptr2, Base2, Offset2, GV2, CV2);
  9495. // If they have a same base address then check to see if they overlap.
  9496. if (Base1 == Base2 || (GV1 && (GV1 == GV2)) || (CV1 && (CV1 == CV2)))
  9497. return !((Offset1 + Size1) <= Offset2 || (Offset2 + Size2) <= Offset1);
  9498. // It is possible for different frame indices to alias each other, mostly
  9499. // when tail call optimization reuses return address slots for arguments.
  9500. // To catch this case, look up the actual index of frame indices to compute
  9501. // the real alias relationship.
  9502. if (isFrameIndex1 && isFrameIndex2) {
  9503. MachineFrameInfo *MFI = DAG.getMachineFunction().getFrameInfo();
  9504. Offset1 += MFI->getObjectOffset(cast<FrameIndexSDNode>(Base1)->getIndex());
  9505. Offset2 += MFI->getObjectOffset(cast<FrameIndexSDNode>(Base2)->getIndex());
  9506. return !((Offset1 + Size1) <= Offset2 || (Offset2 + Size2) <= Offset1);
  9507. }
  9508. // Otherwise, if we know what the bases are, and they aren't identical, then
  9509. // we know they cannot alias.
  9510. if ((isFrameIndex1 || CV1 || GV1) && (isFrameIndex2 || CV2 || GV2))
  9511. return false;
  9512. // If we know required SrcValue1 and SrcValue2 have relatively large alignment
  9513. // compared to the size and offset of the access, we may be able to prove they
  9514. // do not alias. This check is conservative for now to catch cases created by
  9515. // splitting vector types.
  9516. if ((SrcValueAlign1 == SrcValueAlign2) &&
  9517. (SrcValueOffset1 != SrcValueOffset2) &&
  9518. (Size1 == Size2) && (SrcValueAlign1 > Size1)) {
  9519. int64_t OffAlign1 = SrcValueOffset1 % SrcValueAlign1;
  9520. int64_t OffAlign2 = SrcValueOffset2 % SrcValueAlign1;
  9521. // There is no overlap between these relatively aligned accesses of similar
  9522. // size, return no alias.
  9523. if ((OffAlign1 + Size1) <= OffAlign2 || (OffAlign2 + Size2) <= OffAlign1)
  9524. return false;
  9525. }
  9526. bool UseAA = CombinerGlobalAA.getNumOccurrences() > 0 ? CombinerGlobalAA :
  9527. TLI.getTargetMachine().getSubtarget<TargetSubtargetInfo>().useAA();
  9528. if (UseAA && SrcValue1 && SrcValue2) {
  9529. // Use alias analysis information.
  9530. int64_t MinOffset = std::min(SrcValueOffset1, SrcValueOffset2);
  9531. int64_t Overlap1 = Size1 + SrcValueOffset1 - MinOffset;
  9532. int64_t Overlap2 = Size2 + SrcValueOffset2 - MinOffset;
  9533. AliasAnalysis::AliasResult AAResult =
  9534. AA.alias(AliasAnalysis::Location(SrcValue1, Overlap1, TBAAInfo1),
  9535. AliasAnalysis::Location(SrcValue2, Overlap2, TBAAInfo2));
  9536. if (AAResult == AliasAnalysis::NoAlias)
  9537. return false;
  9538. }
  9539. // Otherwise we have to assume they alias.
  9540. return true;
  9541. }
  9542. bool DAGCombiner::isAlias(LSBaseSDNode *Op0, LSBaseSDNode *Op1) {
  9543. SDValue Ptr0, Ptr1;
  9544. int64_t Size0, Size1;
  9545. bool IsVolatile0, IsVolatile1;
  9546. const Value *SrcValue0, *SrcValue1;
  9547. int SrcValueOffset0, SrcValueOffset1;
  9548. unsigned SrcValueAlign0, SrcValueAlign1;
  9549. const MDNode *SrcTBAAInfo0, *SrcTBAAInfo1;
  9550. FindAliasInfo(Op0, Ptr0, Size0, IsVolatile0, SrcValue0, SrcValueOffset0,
  9551. SrcValueAlign0, SrcTBAAInfo0);
  9552. FindAliasInfo(Op1, Ptr1, Size1, IsVolatile1, SrcValue1, SrcValueOffset1,
  9553. SrcValueAlign1, SrcTBAAInfo1);
  9554. return isAlias(Ptr0, Size0, IsVolatile0, SrcValue0, SrcValueOffset0,
  9555. SrcValueAlign0, SrcTBAAInfo0,
  9556. Ptr1, Size1, IsVolatile1, SrcValue1, SrcValueOffset1,
  9557. SrcValueAlign1, SrcTBAAInfo1);
  9558. }
  9559. /// FindAliasInfo - Extracts the relevant alias information from the memory
  9560. /// node. Returns true if the operand was a nonvolatile load.
  9561. bool DAGCombiner::FindAliasInfo(SDNode *N,
  9562. SDValue &Ptr, int64_t &Size, bool &IsVolatile,
  9563. const Value *&SrcValue,
  9564. int &SrcValueOffset,
  9565. unsigned &SrcValueAlign,
  9566. const MDNode *&TBAAInfo) const {
  9567. LSBaseSDNode *LS = cast<LSBaseSDNode>(N);
  9568. Ptr = LS->getBasePtr();
  9569. Size = LS->getMemoryVT().getSizeInBits() >> 3;
  9570. IsVolatile = LS->isVolatile();
  9571. SrcValue = LS->getSrcValue();
  9572. SrcValueOffset = LS->getSrcValueOffset();
  9573. SrcValueAlign = LS->getOriginalAlignment();
  9574. TBAAInfo = LS->getTBAAInfo();
  9575. return isa<LoadSDNode>(LS) && !IsVolatile;
  9576. }
  9577. /// GatherAllAliases - Walk up chain skipping non-aliasing memory nodes,
  9578. /// looking for aliasing nodes and adding them to the Aliases vector.
  9579. void DAGCombiner::GatherAllAliases(SDNode *N, SDValue OriginalChain,
  9580. SmallVectorImpl<SDValue> &Aliases) {
  9581. SmallVector<SDValue, 8> Chains; // List of chains to visit.
  9582. SmallPtrSet<SDNode *, 16> Visited; // Visited node set.
  9583. // Get alias information for node.
  9584. SDValue Ptr;
  9585. int64_t Size;
  9586. bool IsVolatile;
  9587. const Value *SrcValue;
  9588. int SrcValueOffset;
  9589. unsigned SrcValueAlign;
  9590. const MDNode *SrcTBAAInfo;
  9591. bool IsLoad = FindAliasInfo(N, Ptr, Size, IsVolatile, SrcValue,
  9592. SrcValueOffset, SrcValueAlign, SrcTBAAInfo);
  9593. // Starting off.
  9594. Chains.push_back(OriginalChain);
  9595. unsigned Depth = 0;
  9596. // Look at each chain and determine if it is an alias. If so, add it to the
  9597. // aliases list. If not, then continue up the chain looking for the next
  9598. // candidate.
  9599. while (!Chains.empty()) {
  9600. SDValue Chain = Chains.back();
  9601. Chains.pop_back();
  9602. // For TokenFactor nodes, look at each operand and only continue up the
  9603. // chain until we find two aliases. If we've seen two aliases, assume we'll
  9604. // find more and revert to original chain since the xform is unlikely to be
  9605. // profitable.
  9606. //
  9607. // FIXME: The depth check could be made to return the last non-aliasing
  9608. // chain we found before we hit a tokenfactor rather than the original
  9609. // chain.
  9610. if (Depth > 6 || Aliases.size() == 2) {
  9611. Aliases.clear();
  9612. Aliases.push_back(OriginalChain);
  9613. break;
  9614. }
  9615. // Don't bother if we've been before.
  9616. if (!Visited.insert(Chain.getNode()))
  9617. continue;
  9618. switch (Chain.getOpcode()) {
  9619. case ISD::EntryToken:
  9620. // Entry token is ideal chain operand, but handled in FindBetterChain.
  9621. break;
  9622. case ISD::LOAD:
  9623. case ISD::STORE: {
  9624. // Get alias information for Chain.
  9625. SDValue OpPtr;
  9626. int64_t OpSize;
  9627. bool OpIsVolatile;
  9628. const Value *OpSrcValue;
  9629. int OpSrcValueOffset;
  9630. unsigned OpSrcValueAlign;
  9631. const MDNode *OpSrcTBAAInfo;
  9632. bool IsOpLoad = FindAliasInfo(Chain.getNode(), OpPtr, OpSize,
  9633. OpIsVolatile, OpSrcValue, OpSrcValueOffset,
  9634. OpSrcValueAlign,
  9635. OpSrcTBAAInfo);
  9636. // If chain is alias then stop here.
  9637. if (!(IsLoad && IsOpLoad) &&
  9638. isAlias(Ptr, Size, IsVolatile, SrcValue, SrcValueOffset,
  9639. SrcValueAlign, SrcTBAAInfo,
  9640. OpPtr, OpSize, OpIsVolatile, OpSrcValue, OpSrcValueOffset,
  9641. OpSrcValueAlign, OpSrcTBAAInfo)) {
  9642. Aliases.push_back(Chain);
  9643. } else {
  9644. // Look further up the chain.
  9645. Chains.push_back(Chain.getOperand(0));
  9646. ++Depth;
  9647. }
  9648. break;
  9649. }
  9650. case ISD::TokenFactor:
  9651. // We have to check each of the operands of the token factor for "small"
  9652. // token factors, so we queue them up. Adding the operands to the queue
  9653. // (stack) in reverse order maintains the original order and increases the
  9654. // likelihood that getNode will find a matching token factor (CSE.)
  9655. if (Chain.getNumOperands() > 16) {
  9656. Aliases.push_back(Chain);
  9657. break;
  9658. }
  9659. for (unsigned n = Chain.getNumOperands(); n;)
  9660. Chains.push_back(Chain.getOperand(--n));
  9661. ++Depth;
  9662. break;
  9663. default:
  9664. // For all other instructions we will just have to take what we can get.
  9665. Aliases.push_back(Chain);
  9666. break;
  9667. }
  9668. }
  9669. }
  9670. /// FindBetterChain - Walk up chain skipping non-aliasing memory nodes, looking
  9671. /// for a better chain (aliasing node.)
  9672. SDValue DAGCombiner::FindBetterChain(SDNode *N, SDValue OldChain) {
  9673. SmallVector<SDValue, 8> Aliases; // Ops for replacing token factor.
  9674. // Accumulate all the aliases to this node.
  9675. GatherAllAliases(N, OldChain, Aliases);
  9676. // If no operands then chain to entry token.
  9677. if (Aliases.size() == 0)
  9678. return DAG.getEntryNode();
  9679. // If a single operand then chain to it. We don't need to revisit it.
  9680. if (Aliases.size() == 1)
  9681. return Aliases[0];
  9682. // Construct a custom tailored token factor.
  9683. return DAG.getNode(ISD::TokenFactor, SDLoc(N), MVT::Other,
  9684. &Aliases[0], Aliases.size());
  9685. }
  9686. // SelectionDAG::Combine - This is the entry point for the file.
  9687. //
  9688. void SelectionDAG::Combine(CombineLevel Level, AliasAnalysis &AA,
  9689. CodeGenOpt::Level OptLevel) {
  9690. /// run - This is the main entry point to this class.
  9691. ///
  9692. DAGCombiner(*this, AA, OptLevel).Run(Level);
  9693. }