PageRenderTime 53ms CodeModel.GetById 13ms RepoModel.GetById 1ms app.codeStats 0ms

/external/bsd/llvm/dist/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp

https://gitlab.com/storedmirrors/minix
C++ | 432 lines | 288 code | 80 blank | 64 comment | 96 complexity | 8d5f15bccaa78a76afa08cd43d34c79a MD5 | raw file
  1. //===-- PPCTargetTransformInfo.cpp - PPC specific TTI pass ----------------===//
  2. //
  3. // The LLVM Compiler Infrastructure
  4. //
  5. // This file is distributed under the University of Illinois Open Source
  6. // License. See LICENSE.TXT for details.
  7. //
  8. //===----------------------------------------------------------------------===//
  9. /// \file
  10. /// This file implements a TargetTransformInfo analysis pass specific to the
  11. /// PPC target machine. It uses the target's detailed information to provide
  12. /// more precise answers to certain TTI queries, while letting the target
  13. /// independent and default TTI implementations handle the rest.
  14. ///
  15. //===----------------------------------------------------------------------===//
  16. #include "PPC.h"
  17. #include "PPCTargetMachine.h"
  18. #include "llvm/Analysis/TargetTransformInfo.h"
  19. #include "llvm/Support/CommandLine.h"
  20. #include "llvm/Support/Debug.h"
  21. #include "llvm/Target/CostTable.h"
  22. #include "llvm/Target/TargetLowering.h"
  23. using namespace llvm;
  24. #define DEBUG_TYPE "ppctti"
  25. static cl::opt<bool> DisablePPCConstHoist("disable-ppc-constant-hoisting",
  26. cl::desc("disable constant hoisting on PPC"), cl::init(false), cl::Hidden);
  27. // Declare the pass initialization routine locally as target-specific passes
  28. // don't have a target-wide initialization entry point, and so we rely on the
  29. // pass constructor initialization.
  30. namespace llvm {
  31. void initializePPCTTIPass(PassRegistry &);
  32. }
  33. namespace {
  34. class PPCTTI final : public ImmutablePass, public TargetTransformInfo {
  35. const TargetMachine *TM;
  36. const PPCSubtarget *ST;
  37. const PPCTargetLowering *TLI;
  38. public:
  39. PPCTTI() : ImmutablePass(ID), ST(nullptr), TLI(nullptr) {
  40. llvm_unreachable("This pass cannot be directly constructed");
  41. }
  42. PPCTTI(const PPCTargetMachine *TM)
  43. : ImmutablePass(ID), TM(TM), ST(TM->getSubtargetImpl()),
  44. TLI(TM->getSubtargetImpl()->getTargetLowering()) {
  45. initializePPCTTIPass(*PassRegistry::getPassRegistry());
  46. }
  47. void initializePass() override {
  48. pushTTIStack(this);
  49. }
  50. void getAnalysisUsage(AnalysisUsage &AU) const override {
  51. TargetTransformInfo::getAnalysisUsage(AU);
  52. }
  53. /// Pass identification.
  54. static char ID;
  55. /// Provide necessary pointer adjustments for the two base classes.
  56. void *getAdjustedAnalysisPointer(const void *ID) override {
  57. if (ID == &TargetTransformInfo::ID)
  58. return (TargetTransformInfo*)this;
  59. return this;
  60. }
  61. /// \name Scalar TTI Implementations
  62. /// @{
  63. unsigned getIntImmCost(const APInt &Imm, Type *Ty) const override;
  64. unsigned getIntImmCost(unsigned Opcode, unsigned Idx, const APInt &Imm,
  65. Type *Ty) const override;
  66. unsigned getIntImmCost(Intrinsic::ID IID, unsigned Idx, const APInt &Imm,
  67. Type *Ty) const override;
  68. PopcntSupportKind getPopcntSupport(unsigned TyWidth) const override;
  69. void getUnrollingPreferences(const Function *F, Loop *L,
  70. UnrollingPreferences &UP) const override;
  71. /// @}
  72. /// \name Vector TTI Implementations
  73. /// @{
  74. unsigned getNumberOfRegisters(bool Vector) const override;
  75. unsigned getRegisterBitWidth(bool Vector) const override;
  76. unsigned getMaxInterleaveFactor() const override;
  77. unsigned getArithmeticInstrCost(unsigned Opcode, Type *Ty, OperandValueKind,
  78. OperandValueKind, OperandValueProperties,
  79. OperandValueProperties) const override;
  80. unsigned getShuffleCost(ShuffleKind Kind, Type *Tp,
  81. int Index, Type *SubTp) const override;
  82. unsigned getCastInstrCost(unsigned Opcode, Type *Dst,
  83. Type *Src) const override;
  84. unsigned getCmpSelInstrCost(unsigned Opcode, Type *ValTy,
  85. Type *CondTy) const override;
  86. unsigned getVectorInstrCost(unsigned Opcode, Type *Val,
  87. unsigned Index) const override;
  88. unsigned getMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment,
  89. unsigned AddressSpace) const override;
  90. /// @}
  91. };
  92. } // end anonymous namespace
  93. INITIALIZE_AG_PASS(PPCTTI, TargetTransformInfo, "ppctti",
  94. "PPC Target Transform Info", true, true, false)
  95. char PPCTTI::ID = 0;
  96. ImmutablePass *
  97. llvm::createPPCTargetTransformInfoPass(const PPCTargetMachine *TM) {
  98. return new PPCTTI(TM);
  99. }
  100. //===----------------------------------------------------------------------===//
  101. //
  102. // PPC cost model.
  103. //
  104. //===----------------------------------------------------------------------===//
  105. PPCTTI::PopcntSupportKind PPCTTI::getPopcntSupport(unsigned TyWidth) const {
  106. assert(isPowerOf2_32(TyWidth) && "Ty width must be power of 2");
  107. if (ST->hasPOPCNTD() && TyWidth <= 64)
  108. return PSK_FastHardware;
  109. return PSK_Software;
  110. }
  111. unsigned PPCTTI::getIntImmCost(const APInt &Imm, Type *Ty) const {
  112. if (DisablePPCConstHoist)
  113. return TargetTransformInfo::getIntImmCost(Imm, Ty);
  114. assert(Ty->isIntegerTy());
  115. unsigned BitSize = Ty->getPrimitiveSizeInBits();
  116. if (BitSize == 0)
  117. return ~0U;
  118. if (Imm == 0)
  119. return TCC_Free;
  120. if (Imm.getBitWidth() <= 64) {
  121. if (isInt<16>(Imm.getSExtValue()))
  122. return TCC_Basic;
  123. if (isInt<32>(Imm.getSExtValue())) {
  124. // A constant that can be materialized using lis.
  125. if ((Imm.getZExtValue() & 0xFFFF) == 0)
  126. return TCC_Basic;
  127. return 2 * TCC_Basic;
  128. }
  129. }
  130. return 4 * TCC_Basic;
  131. }
  132. unsigned PPCTTI::getIntImmCost(Intrinsic::ID IID, unsigned Idx,
  133. const APInt &Imm, Type *Ty) const {
  134. if (DisablePPCConstHoist)
  135. return TargetTransformInfo::getIntImmCost(IID, Idx, Imm, Ty);
  136. assert(Ty->isIntegerTy());
  137. unsigned BitSize = Ty->getPrimitiveSizeInBits();
  138. if (BitSize == 0)
  139. return ~0U;
  140. switch (IID) {
  141. default: return TCC_Free;
  142. case Intrinsic::sadd_with_overflow:
  143. case Intrinsic::uadd_with_overflow:
  144. case Intrinsic::ssub_with_overflow:
  145. case Intrinsic::usub_with_overflow:
  146. if ((Idx == 1) && Imm.getBitWidth() <= 64 && isInt<16>(Imm.getSExtValue()))
  147. return TCC_Free;
  148. break;
  149. case Intrinsic::experimental_stackmap:
  150. if ((Idx < 2) || (Imm.getBitWidth() <= 64 && isInt<64>(Imm.getSExtValue())))
  151. return TCC_Free;
  152. break;
  153. case Intrinsic::experimental_patchpoint_void:
  154. case Intrinsic::experimental_patchpoint_i64:
  155. if ((Idx < 4) || (Imm.getBitWidth() <= 64 && isInt<64>(Imm.getSExtValue())))
  156. return TCC_Free;
  157. break;
  158. }
  159. return PPCTTI::getIntImmCost(Imm, Ty);
  160. }
  161. unsigned PPCTTI::getIntImmCost(unsigned Opcode, unsigned Idx, const APInt &Imm,
  162. Type *Ty) const {
  163. if (DisablePPCConstHoist)
  164. return TargetTransformInfo::getIntImmCost(Opcode, Idx, Imm, Ty);
  165. assert(Ty->isIntegerTy());
  166. unsigned BitSize = Ty->getPrimitiveSizeInBits();
  167. if (BitSize == 0)
  168. return ~0U;
  169. unsigned ImmIdx = ~0U;
  170. bool ShiftedFree = false, RunFree = false, UnsignedFree = false,
  171. ZeroFree = false;
  172. switch (Opcode) {
  173. default: return TCC_Free;
  174. case Instruction::GetElementPtr:
  175. // Always hoist the base address of a GetElementPtr. This prevents the
  176. // creation of new constants for every base constant that gets constant
  177. // folded with the offset.
  178. if (Idx == 0)
  179. return 2 * TCC_Basic;
  180. return TCC_Free;
  181. case Instruction::And:
  182. RunFree = true; // (for the rotate-and-mask instructions)
  183. // Fallthrough...
  184. case Instruction::Add:
  185. case Instruction::Or:
  186. case Instruction::Xor:
  187. ShiftedFree = true;
  188. // Fallthrough...
  189. case Instruction::Sub:
  190. case Instruction::Mul:
  191. case Instruction::Shl:
  192. case Instruction::LShr:
  193. case Instruction::AShr:
  194. ImmIdx = 1;
  195. break;
  196. case Instruction::ICmp:
  197. UnsignedFree = true;
  198. ImmIdx = 1;
  199. // Fallthrough... (zero comparisons can use record-form instructions)
  200. case Instruction::Select:
  201. ZeroFree = true;
  202. break;
  203. case Instruction::PHI:
  204. case Instruction::Call:
  205. case Instruction::Ret:
  206. case Instruction::Load:
  207. case Instruction::Store:
  208. break;
  209. }
  210. if (ZeroFree && Imm == 0)
  211. return TCC_Free;
  212. if (Idx == ImmIdx && Imm.getBitWidth() <= 64) {
  213. if (isInt<16>(Imm.getSExtValue()))
  214. return TCC_Free;
  215. if (RunFree) {
  216. if (Imm.getBitWidth() <= 32 &&
  217. (isShiftedMask_32(Imm.getZExtValue()) ||
  218. isShiftedMask_32(~Imm.getZExtValue())))
  219. return TCC_Free;
  220. if (ST->isPPC64() &&
  221. (isShiftedMask_64(Imm.getZExtValue()) ||
  222. isShiftedMask_64(~Imm.getZExtValue())))
  223. return TCC_Free;
  224. }
  225. if (UnsignedFree && isUInt<16>(Imm.getZExtValue()))
  226. return TCC_Free;
  227. if (ShiftedFree && (Imm.getZExtValue() & 0xFFFF) == 0)
  228. return TCC_Free;
  229. }
  230. return PPCTTI::getIntImmCost(Imm, Ty);
  231. }
  232. void PPCTTI::getUnrollingPreferences(const Function *F, Loop *L,
  233. UnrollingPreferences &UP) const {
  234. if (TM->getSubtarget<PPCSubtarget>(F).getDarwinDirective() == PPC::DIR_A2) {
  235. // The A2 is in-order with a deep pipeline, and concatenation unrolling
  236. // helps expose latency-hiding opportunities to the instruction scheduler.
  237. UP.Partial = UP.Runtime = true;
  238. }
  239. TargetTransformInfo::getUnrollingPreferences(F, L, UP);
  240. }
  241. unsigned PPCTTI::getNumberOfRegisters(bool Vector) const {
  242. if (Vector && !ST->hasAltivec())
  243. return 0;
  244. return ST->hasVSX() ? 64 : 32;
  245. }
  246. unsigned PPCTTI::getRegisterBitWidth(bool Vector) const {
  247. if (Vector) {
  248. if (ST->hasAltivec()) return 128;
  249. return 0;
  250. }
  251. if (ST->isPPC64())
  252. return 64;
  253. return 32;
  254. }
  255. unsigned PPCTTI::getMaxInterleaveFactor() const {
  256. unsigned Directive = ST->getDarwinDirective();
  257. // The 440 has no SIMD support, but floating-point instructions
  258. // have a 5-cycle latency, so unroll by 5x for latency hiding.
  259. if (Directive == PPC::DIR_440)
  260. return 5;
  261. // The A2 has no SIMD support, but floating-point instructions
  262. // have a 6-cycle latency, so unroll by 6x for latency hiding.
  263. if (Directive == PPC::DIR_A2)
  264. return 6;
  265. // FIXME: For lack of any better information, do no harm...
  266. if (Directive == PPC::DIR_E500mc || Directive == PPC::DIR_E5500)
  267. return 1;
  268. // For most things, modern systems have two execution units (and
  269. // out-of-order execution).
  270. return 2;
  271. }
  272. unsigned PPCTTI::getArithmeticInstrCost(
  273. unsigned Opcode, Type *Ty, OperandValueKind Op1Info,
  274. OperandValueKind Op2Info, OperandValueProperties Opd1PropInfo,
  275. OperandValueProperties Opd2PropInfo) const {
  276. assert(TLI->InstructionOpcodeToISD(Opcode) && "Invalid opcode");
  277. // Fallback to the default implementation.
  278. return TargetTransformInfo::getArithmeticInstrCost(
  279. Opcode, Ty, Op1Info, Op2Info, Opd1PropInfo, Opd2PropInfo);
  280. }
  281. unsigned PPCTTI::getShuffleCost(ShuffleKind Kind, Type *Tp, int Index,
  282. Type *SubTp) const {
  283. return TargetTransformInfo::getShuffleCost(Kind, Tp, Index, SubTp);
  284. }
  285. unsigned PPCTTI::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src) const {
  286. assert(TLI->InstructionOpcodeToISD(Opcode) && "Invalid opcode");
  287. return TargetTransformInfo::getCastInstrCost(Opcode, Dst, Src);
  288. }
  289. unsigned PPCTTI::getCmpSelInstrCost(unsigned Opcode, Type *ValTy,
  290. Type *CondTy) const {
  291. return TargetTransformInfo::getCmpSelInstrCost(Opcode, ValTy, CondTy);
  292. }
  293. unsigned PPCTTI::getVectorInstrCost(unsigned Opcode, Type *Val,
  294. unsigned Index) const {
  295. assert(Val->isVectorTy() && "This must be a vector type");
  296. int ISD = TLI->InstructionOpcodeToISD(Opcode);
  297. assert(ISD && "Invalid opcode");
  298. if (ST->hasVSX() && Val->getScalarType()->isDoubleTy()) {
  299. // Double-precision scalars are already located in index #0.
  300. if (Index == 0)
  301. return 0;
  302. return TargetTransformInfo::getVectorInstrCost(Opcode, Val, Index);
  303. }
  304. // Estimated cost of a load-hit-store delay. This was obtained
  305. // experimentally as a minimum needed to prevent unprofitable
  306. // vectorization for the paq8p benchmark. It may need to be
  307. // raised further if other unprofitable cases remain.
  308. unsigned LHSPenalty = 2;
  309. if (ISD == ISD::INSERT_VECTOR_ELT)
  310. LHSPenalty += 7;
  311. // Vector element insert/extract with Altivec is very expensive,
  312. // because they require store and reload with the attendant
  313. // processor stall for load-hit-store. Until VSX is available,
  314. // these need to be estimated as very costly.
  315. if (ISD == ISD::EXTRACT_VECTOR_ELT ||
  316. ISD == ISD::INSERT_VECTOR_ELT)
  317. return LHSPenalty +
  318. TargetTransformInfo::getVectorInstrCost(Opcode, Val, Index);
  319. return TargetTransformInfo::getVectorInstrCost(Opcode, Val, Index);
  320. }
  321. unsigned PPCTTI::getMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment,
  322. unsigned AddressSpace) const {
  323. // Legalize the type.
  324. std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(Src);
  325. assert((Opcode == Instruction::Load || Opcode == Instruction::Store) &&
  326. "Invalid Opcode");
  327. unsigned Cost =
  328. TargetTransformInfo::getMemoryOpCost(Opcode, Src, Alignment, AddressSpace);
  329. // VSX loads/stores support unaligned access.
  330. if (ST->hasVSX()) {
  331. if (LT.second == MVT::v2f64 || LT.second == MVT::v2i64)
  332. return Cost;
  333. }
  334. bool UnalignedAltivec =
  335. Src->isVectorTy() &&
  336. Src->getPrimitiveSizeInBits() >= LT.second.getSizeInBits() &&
  337. LT.second.getSizeInBits() == 128 &&
  338. Opcode == Instruction::Load;
  339. // PPC in general does not support unaligned loads and stores. They'll need
  340. // to be decomposed based on the alignment factor.
  341. unsigned SrcBytes = LT.second.getStoreSize();
  342. if (SrcBytes && Alignment && Alignment < SrcBytes && !UnalignedAltivec) {
  343. Cost += LT.first*(SrcBytes/Alignment-1);
  344. // For a vector type, there is also scalarization overhead (only for
  345. // stores, loads are expanded using the vector-load + permutation sequence,
  346. // which is much less expensive).
  347. if (Src->isVectorTy() && Opcode == Instruction::Store)
  348. for (int i = 0, e = Src->getVectorNumElements(); i < e; ++i)
  349. Cost += getVectorInstrCost(Instruction::ExtractElement, Src, i);
  350. }
  351. return Cost;
  352. }