PageRenderTime 65ms CodeModel.GetById 21ms RepoModel.GetById 0ms app.codeStats 0ms

/lib/Target/R600/R600Packetizer.cpp

https://gitlab.com/wustl-pctg-pub/llvm-cilk
C++ | 404 lines | 313 code | 40 blank | 51 comment | 82 complexity | 3a4f7fcd80a8f506497ce574dd1c97b6 MD5 | raw file
  1. //===----- R600Packetizer.cpp - VLIW packetizer ---------------------------===//
  2. //
  3. // The LLVM Compiler Infrastructure
  4. //
  5. // This file is distributed under the University of Illinois Open Source
  6. // License. See LICENSE.TXT for details.
  7. //
  8. //===----------------------------------------------------------------------===//
  9. //
  10. /// \file
  11. /// This pass implements instructions packetization for R600. It unsets isLast
  12. /// bit of instructions inside a bundle and substitutes src register with
  13. /// PreviousVector when applicable.
  14. //
  15. //===----------------------------------------------------------------------===//
  16. #define DEBUG_TYPE "packets"
  17. #include "llvm/Support/Debug.h"
  18. #include "AMDGPU.h"
  19. #include "R600InstrInfo.h"
  20. #include "llvm/CodeGen/DFAPacketizer.h"
  21. #include "llvm/CodeGen/MachineDominators.h"
  22. #include "llvm/CodeGen/MachineFunctionPass.h"
  23. #include "llvm/CodeGen/MachineLoopInfo.h"
  24. #include "llvm/CodeGen/Passes.h"
  25. #include "llvm/CodeGen/ScheduleDAG.h"
  26. #include "llvm/Support/raw_ostream.h"
  27. using namespace llvm;
  28. namespace {
  29. class R600Packetizer : public MachineFunctionPass {
  30. public:
  31. static char ID;
  32. R600Packetizer(const TargetMachine &TM) : MachineFunctionPass(ID) {}
  33. void getAnalysisUsage(AnalysisUsage &AU) const {
  34. AU.setPreservesCFG();
  35. AU.addRequired<MachineDominatorTree>();
  36. AU.addPreserved<MachineDominatorTree>();
  37. AU.addRequired<MachineLoopInfo>();
  38. AU.addPreserved<MachineLoopInfo>();
  39. MachineFunctionPass::getAnalysisUsage(AU);
  40. }
  41. const char *getPassName() const {
  42. return "R600 Packetizer";
  43. }
  44. bool runOnMachineFunction(MachineFunction &Fn);
  45. };
  46. char R600Packetizer::ID = 0;
  47. class R600PacketizerList : public VLIWPacketizerList {
  48. private:
  49. const R600InstrInfo *TII;
  50. const R600RegisterInfo &TRI;
  51. bool VLIW5;
  52. bool ConsideredInstUsesAlreadyWrittenVectorElement;
  53. unsigned getSlot(const MachineInstr *MI) const {
  54. return TRI.getHWRegChan(MI->getOperand(0).getReg());
  55. }
  56. /// \returns register to PV chan mapping for bundle/single instructions that
  57. /// immediatly precedes I.
  58. DenseMap<unsigned, unsigned> getPreviousVector(MachineBasicBlock::iterator I)
  59. const {
  60. DenseMap<unsigned, unsigned> Result;
  61. I--;
  62. if (!TII->isALUInstr(I->getOpcode()) && !I->isBundle())
  63. return Result;
  64. MachineBasicBlock::instr_iterator BI = I.getInstrIterator();
  65. if (I->isBundle())
  66. BI++;
  67. int LastDstChan = -1;
  68. do {
  69. bool isTrans = false;
  70. int BISlot = getSlot(BI);
  71. if (LastDstChan >= BISlot)
  72. isTrans = true;
  73. LastDstChan = BISlot;
  74. if (TII->isPredicated(BI))
  75. continue;
  76. int OperandIdx = TII->getOperandIdx(BI->getOpcode(), AMDGPU::OpName::write);
  77. if (OperandIdx > -1 && BI->getOperand(OperandIdx).getImm() == 0)
  78. continue;
  79. int DstIdx = TII->getOperandIdx(BI->getOpcode(), AMDGPU::OpName::dst);
  80. if (DstIdx == -1) {
  81. continue;
  82. }
  83. unsigned Dst = BI->getOperand(DstIdx).getReg();
  84. if (isTrans || TII->isTransOnly(BI)) {
  85. Result[Dst] = AMDGPU::PS;
  86. continue;
  87. }
  88. if (BI->getOpcode() == AMDGPU::DOT4_r600 ||
  89. BI->getOpcode() == AMDGPU::DOT4_eg) {
  90. Result[Dst] = AMDGPU::PV_X;
  91. continue;
  92. }
  93. if (Dst == AMDGPU::OQAP) {
  94. continue;
  95. }
  96. unsigned PVReg = 0;
  97. switch (TRI.getHWRegChan(Dst)) {
  98. case 0:
  99. PVReg = AMDGPU::PV_X;
  100. break;
  101. case 1:
  102. PVReg = AMDGPU::PV_Y;
  103. break;
  104. case 2:
  105. PVReg = AMDGPU::PV_Z;
  106. break;
  107. case 3:
  108. PVReg = AMDGPU::PV_W;
  109. break;
  110. default:
  111. llvm_unreachable("Invalid Chan");
  112. }
  113. Result[Dst] = PVReg;
  114. } while ((++BI)->isBundledWithPred());
  115. return Result;
  116. }
  117. void substitutePV(MachineInstr *MI, const DenseMap<unsigned, unsigned> &PVs)
  118. const {
  119. unsigned Ops[] = {
  120. AMDGPU::OpName::src0,
  121. AMDGPU::OpName::src1,
  122. AMDGPU::OpName::src2
  123. };
  124. for (unsigned i = 0; i < 3; i++) {
  125. int OperandIdx = TII->getOperandIdx(MI->getOpcode(), Ops[i]);
  126. if (OperandIdx < 0)
  127. continue;
  128. unsigned Src = MI->getOperand(OperandIdx).getReg();
  129. const DenseMap<unsigned, unsigned>::const_iterator It = PVs.find(Src);
  130. if (It != PVs.end())
  131. MI->getOperand(OperandIdx).setReg(It->second);
  132. }
  133. }
  134. public:
  135. // Ctor.
  136. R600PacketizerList(MachineFunction &MF, MachineLoopInfo &MLI,
  137. MachineDominatorTree &MDT)
  138. : VLIWPacketizerList(MF, MLI, MDT, true),
  139. TII (static_cast<const R600InstrInfo *>(MF.getTarget().getInstrInfo())),
  140. TRI(TII->getRegisterInfo()) {
  141. VLIW5 = !MF.getTarget().getSubtarget<AMDGPUSubtarget>().hasCaymanISA();
  142. }
  143. // initPacketizerState - initialize some internal flags.
  144. void initPacketizerState() {
  145. ConsideredInstUsesAlreadyWrittenVectorElement = false;
  146. }
  147. // ignorePseudoInstruction - Ignore bundling of pseudo instructions.
  148. bool ignorePseudoInstruction(MachineInstr *MI, MachineBasicBlock *MBB) {
  149. return false;
  150. }
  151. // isSoloInstruction - return true if instruction MI can not be packetized
  152. // with any other instruction, which means that MI itself is a packet.
  153. bool isSoloInstruction(MachineInstr *MI) {
  154. if (TII->isVector(*MI))
  155. return true;
  156. if (!TII->isALUInstr(MI->getOpcode()))
  157. return true;
  158. if (MI->getOpcode() == AMDGPU::GROUP_BARRIER)
  159. return true;
  160. // XXX: This can be removed once the packetizer properly handles all the
  161. // LDS instruction group restrictions.
  162. if (TII->isLDSInstr(MI->getOpcode()))
  163. return true;
  164. return false;
  165. }
  166. // isLegalToPacketizeTogether - Is it legal to packetize SUI and SUJ
  167. // together.
  168. bool isLegalToPacketizeTogether(SUnit *SUI, SUnit *SUJ) {
  169. MachineInstr *MII = SUI->getInstr(), *MIJ = SUJ->getInstr();
  170. if (getSlot(MII) == getSlot(MIJ))
  171. ConsideredInstUsesAlreadyWrittenVectorElement = true;
  172. // Does MII and MIJ share the same pred_sel ?
  173. int OpI = TII->getOperandIdx(MII->getOpcode(), AMDGPU::OpName::pred_sel),
  174. OpJ = TII->getOperandIdx(MIJ->getOpcode(), AMDGPU::OpName::pred_sel);
  175. unsigned PredI = (OpI > -1)?MII->getOperand(OpI).getReg():0,
  176. PredJ = (OpJ > -1)?MIJ->getOperand(OpJ).getReg():0;
  177. if (PredI != PredJ)
  178. return false;
  179. if (SUJ->isSucc(SUI)) {
  180. for (unsigned i = 0, e = SUJ->Succs.size(); i < e; ++i) {
  181. const SDep &Dep = SUJ->Succs[i];
  182. if (Dep.getSUnit() != SUI)
  183. continue;
  184. if (Dep.getKind() == SDep::Anti)
  185. continue;
  186. if (Dep.getKind() == SDep::Output)
  187. if (MII->getOperand(0).getReg() != MIJ->getOperand(0).getReg())
  188. continue;
  189. return false;
  190. }
  191. }
  192. bool ARDef = TII->definesAddressRegister(MII) ||
  193. TII->definesAddressRegister(MIJ);
  194. bool ARUse = TII->usesAddressRegister(MII) ||
  195. TII->usesAddressRegister(MIJ);
  196. if (ARDef && ARUse)
  197. return false;
  198. return true;
  199. }
  200. // isLegalToPruneDependencies - Is it legal to prune dependece between SUI
  201. // and SUJ.
  202. bool isLegalToPruneDependencies(SUnit *SUI, SUnit *SUJ) {return false;}
  203. void setIsLastBit(MachineInstr *MI, unsigned Bit) const {
  204. unsigned LastOp = TII->getOperandIdx(MI->getOpcode(), AMDGPU::OpName::last);
  205. MI->getOperand(LastOp).setImm(Bit);
  206. }
  207. bool isBundlableWithCurrentPMI(MachineInstr *MI,
  208. const DenseMap<unsigned, unsigned> &PV,
  209. std::vector<R600InstrInfo::BankSwizzle> &BS,
  210. bool &isTransSlot) {
  211. isTransSlot = TII->isTransOnly(MI);
  212. assert (!isTransSlot || VLIW5);
  213. // Is the dst reg sequence legal ?
  214. if (!isTransSlot && !CurrentPacketMIs.empty()) {
  215. if (getSlot(MI) <= getSlot(CurrentPacketMIs.back())) {
  216. if (ConsideredInstUsesAlreadyWrittenVectorElement &&
  217. !TII->isVectorOnly(MI) && VLIW5) {
  218. isTransSlot = true;
  219. DEBUG(dbgs() << "Considering as Trans Inst :"; MI->dump(););
  220. }
  221. else
  222. return false;
  223. }
  224. }
  225. // Are the Constants limitations met ?
  226. CurrentPacketMIs.push_back(MI);
  227. if (!TII->fitsConstReadLimitations(CurrentPacketMIs)) {
  228. DEBUG(
  229. dbgs() << "Couldn't pack :\n";
  230. MI->dump();
  231. dbgs() << "with the following packets :\n";
  232. for (unsigned i = 0, e = CurrentPacketMIs.size() - 1; i < e; i++) {
  233. CurrentPacketMIs[i]->dump();
  234. dbgs() << "\n";
  235. }
  236. dbgs() << "because of Consts read limitations\n";
  237. );
  238. CurrentPacketMIs.pop_back();
  239. return false;
  240. }
  241. // Is there a BankSwizzle set that meet Read Port limitations ?
  242. if (!TII->fitsReadPortLimitations(CurrentPacketMIs,
  243. PV, BS, isTransSlot)) {
  244. DEBUG(
  245. dbgs() << "Couldn't pack :\n";
  246. MI->dump();
  247. dbgs() << "with the following packets :\n";
  248. for (unsigned i = 0, e = CurrentPacketMIs.size() - 1; i < e; i++) {
  249. CurrentPacketMIs[i]->dump();
  250. dbgs() << "\n";
  251. }
  252. dbgs() << "because of Read port limitations\n";
  253. );
  254. CurrentPacketMIs.pop_back();
  255. return false;
  256. }
  257. // We cannot read LDS source registrs from the Trans slot.
  258. if (isTransSlot && TII->readsLDSSrcReg(MI))
  259. return false;
  260. CurrentPacketMIs.pop_back();
  261. return true;
  262. }
  263. MachineBasicBlock::iterator addToPacket(MachineInstr *MI) {
  264. MachineBasicBlock::iterator FirstInBundle =
  265. CurrentPacketMIs.empty() ? MI : CurrentPacketMIs.front();
  266. const DenseMap<unsigned, unsigned> &PV =
  267. getPreviousVector(FirstInBundle);
  268. std::vector<R600InstrInfo::BankSwizzle> BS;
  269. bool isTransSlot;
  270. if (isBundlableWithCurrentPMI(MI, PV, BS, isTransSlot)) {
  271. for (unsigned i = 0, e = CurrentPacketMIs.size(); i < e; i++) {
  272. MachineInstr *MI = CurrentPacketMIs[i];
  273. unsigned Op = TII->getOperandIdx(MI->getOpcode(),
  274. AMDGPU::OpName::bank_swizzle);
  275. MI->getOperand(Op).setImm(BS[i]);
  276. }
  277. unsigned Op = TII->getOperandIdx(MI->getOpcode(),
  278. AMDGPU::OpName::bank_swizzle);
  279. MI->getOperand(Op).setImm(BS.back());
  280. if (!CurrentPacketMIs.empty())
  281. setIsLastBit(CurrentPacketMIs.back(), 0);
  282. substitutePV(MI, PV);
  283. MachineBasicBlock::iterator It = VLIWPacketizerList::addToPacket(MI);
  284. if (isTransSlot) {
  285. endPacket(llvm::next(It)->getParent(), llvm::next(It));
  286. }
  287. return It;
  288. }
  289. endPacket(MI->getParent(), MI);
  290. if (TII->isTransOnly(MI))
  291. return MI;
  292. return VLIWPacketizerList::addToPacket(MI);
  293. }
  294. };
  295. bool R600Packetizer::runOnMachineFunction(MachineFunction &Fn) {
  296. const TargetInstrInfo *TII = Fn.getTarget().getInstrInfo();
  297. MachineLoopInfo &MLI = getAnalysis<MachineLoopInfo>();
  298. MachineDominatorTree &MDT = getAnalysis<MachineDominatorTree>();
  299. // Instantiate the packetizer.
  300. R600PacketizerList Packetizer(Fn, MLI, MDT);
  301. // DFA state table should not be empty.
  302. assert(Packetizer.getResourceTracker() && "Empty DFA table!");
  303. //
  304. // Loop over all basic blocks and remove KILL pseudo-instructions
  305. // These instructions confuse the dependence analysis. Consider:
  306. // D0 = ... (Insn 0)
  307. // R0 = KILL R0, D0 (Insn 1)
  308. // R0 = ... (Insn 2)
  309. // Here, Insn 1 will result in the dependence graph not emitting an output
  310. // dependence between Insn 0 and Insn 2. This can lead to incorrect
  311. // packetization
  312. //
  313. for (MachineFunction::iterator MBB = Fn.begin(), MBBe = Fn.end();
  314. MBB != MBBe; ++MBB) {
  315. MachineBasicBlock::iterator End = MBB->end();
  316. MachineBasicBlock::iterator MI = MBB->begin();
  317. while (MI != End) {
  318. if (MI->isKill() || MI->getOpcode() == AMDGPU::IMPLICIT_DEF ||
  319. (MI->getOpcode() == AMDGPU::CF_ALU && !MI->getOperand(8).getImm())) {
  320. MachineBasicBlock::iterator DeleteMI = MI;
  321. ++MI;
  322. MBB->erase(DeleteMI);
  323. End = MBB->end();
  324. continue;
  325. }
  326. ++MI;
  327. }
  328. }
  329. // Loop over all of the basic blocks.
  330. for (MachineFunction::iterator MBB = Fn.begin(), MBBe = Fn.end();
  331. MBB != MBBe; ++MBB) {
  332. // Find scheduling regions and schedule / packetize each region.
  333. unsigned RemainingCount = MBB->size();
  334. for(MachineBasicBlock::iterator RegionEnd = MBB->end();
  335. RegionEnd != MBB->begin();) {
  336. // The next region starts above the previous region. Look backward in the
  337. // instruction stream until we find the nearest boundary.
  338. MachineBasicBlock::iterator I = RegionEnd;
  339. for(;I != MBB->begin(); --I, --RemainingCount) {
  340. if (TII->isSchedulingBoundary(llvm::prior(I), MBB, Fn))
  341. break;
  342. }
  343. I = MBB->begin();
  344. // Skip empty scheduling regions.
  345. if (I == RegionEnd) {
  346. RegionEnd = llvm::prior(RegionEnd);
  347. --RemainingCount;
  348. continue;
  349. }
  350. // Skip regions with one instruction.
  351. if (I == llvm::prior(RegionEnd)) {
  352. RegionEnd = llvm::prior(RegionEnd);
  353. continue;
  354. }
  355. Packetizer.PacketizeMIs(MBB, I, RegionEnd);
  356. RegionEnd = I;
  357. }
  358. }
  359. return true;
  360. }
  361. } // end anonymous namespace
  362. llvm::FunctionPass *llvm::createR600Packetizer(TargetMachine &tm) {
  363. return new R600Packetizer(tm);
  364. }