PageRenderTime 26ms CodeModel.GetById 12ms RepoModel.GetById 1ms app.codeStats 0ms

/sStruct/ForwardAccessor_impl.h

https://bitbucket.org/jake_lever/sstruct
C Header | 289 lines | 125 code | 47 blank | 117 comment | 24 complexity | dbed045360e8a7fc9965c9cf55970661 MD5 | raw file
  1. namespace hg
  2. {
  3. template < class T, AccessorType AT, size_t MAX_MEM, size_t BLOCK_SIZE >
  4. template < AccessorBehaviour otherAB, AccessorType otherAT, size_t otherMAX_MEM, size_t otherBLOCK_SIZE >
  5. __offload inline vector<T,ACCESS_FORWARD,AT,MAX_MEM,BLOCK_SIZE,SPU>::vector(__outer vector<T,otherAB,otherAT,otherMAX_MEM,otherBLOCK_SIZE,PPU> *vec)
  6. {
  7. Accessor<T,MAX_MEM,BLOCK_SIZE>::setup(vec);
  8. _currentBlock = -Accessor<T,MAX_MEM,BLOCK_SIZE>::MAX_BLOCK_COUNT;
  9. Accessor<T,MAX_MEM,BLOCK_SIZE>::_loadedBlockCount = 0;
  10. }
  11. template < class T, AccessorType AT, size_t MAX_MEM, size_t BLOCK_SIZE >
  12. __offload inline vector<T,ACCESS_FORWARD,AT,MAX_MEM,BLOCK_SIZE,SPU>::vector(unsigned long long ea)
  13. {
  14. Accessor<T,MAX_MEM,BLOCK_SIZE>::setup(ea);
  15. /**
  16. * So, the code that loads more blocks assumes that there are already
  17. * MAX_BLOCK_COUNT blocks loaded. So it works from the "earliest" block
  18. * loaded (_currentBlock) plus MAX_BLOCK_COUNT to find out which block is
  19. * the next to be loaded. So if we offset _currentBlock back to negative
  20. * MAX_BLOCK_COUNT, the first block loaded will be index 0.
  21. */
  22. _currentBlock = -Accessor<T,MAX_MEM,BLOCK_SIZE>::MAX_BLOCK_COUNT;
  23. Accessor<T,MAX_MEM,BLOCK_SIZE>::_loadedBlockCount = 0;
  24. }
  25. template < class T, AccessorType AT, size_t MAX_MEM, size_t BLOCK_SIZE >
  26. __offload inline vector<T,ACCESS_FORWARD,AT,MAX_MEM,BLOCK_SIZE,SPU>::~vector()
  27. {
  28. DEBUG(printf("Flushing %i\n", Accessor<T,MAX_MEM,BLOCK_SIZE>::_loadedBlockCount));
  29. if (AT == READ_WRITE || AT == WRITE_ONLY)
  30. {
  31. for (size_t b=0; b<Accessor<T,MAX_MEM,BLOCK_SIZE>::_loadedBlockCount; b++)
  32. {
  33. //setBlock(i);
  34. size_t spuStart = b * Accessor<T,MAX_MEM,BLOCK_SIZE>::ELE_PER_BLOCK;
  35. size_t ppuStart = (Accessor<T,MAX_MEM,BLOCK_SIZE>::_blockList[b]-1) * Accessor<T,MAX_MEM,BLOCK_SIZE>::ELE_PER_BLOCK;
  36. DEBUG(printf("mfc_put %p to %p\n", &Accessor<T,MAX_MEM,BLOCK_SIZE>::_spu[spuStart], &Accessor<T,MAX_MEM,BLOCK_SIZE>::_ppu[ppuStart]));
  37. mfc_put(&(Accessor<T,MAX_MEM,BLOCK_SIZE>::_spu[spuStart]), (unsigned long long)&(Accessor<T,MAX_MEM,BLOCK_SIZE>::_ppu[ppuStart]), BLOCK_SIZE, b+1, 0, 0);
  38. }
  39. mfc_write_tag_mask((1<<(Accessor<T,MAX_MEM,BLOCK_SIZE>::_loadedBlockCount+1))-1);
  40. mfc_read_tag_status_all();
  41. }
  42. //setBlock(0);
  43. //setBlock(1);
  44. Accessor<T,MAX_MEM,BLOCK_SIZE>::_loadedBlockCount = 0;
  45. Accessor<T,MAX_MEM,BLOCK_SIZE>::destroy();
  46. }
  47. template < class T, AccessorType AT, size_t MAX_MEM, size_t BLOCK_SIZE >
  48. __offload inline size_t vector<T,ACCESS_FORWARD,AT,MAX_MEM,BLOCK_SIZE,SPU>::getBlock(size_t b)
  49. {
  50. DEBUG(printf("\nBLOCK REQUEST:%i\n",b));
  51. size_t blockTarget = 0;
  52. if (__builtin_expect(b == _currentBlock,1))
  53. {
  54. DEBUG(printf("CURRENT block %i at %i\n\n", _currentBlock, _currentBlockLocation));
  55. // Block requested is the current one, so no changes are needed
  56. return _currentBlockLocation;
  57. }
  58. else
  59. {
  60. /**
  61. * The requested block is not the current block. Hence, some changes
  62. * to the blocks will need to be made on the SPU. The first thing
  63. * is to decide how many blocks need to be changed.
  64. *
  65. * If the next block is simply the next block sequentially, then
  66. * only one block will need to be changed (the current one), but if
  67. * blocks have been skipped, then more blocks will need to be reloaded.
  68. *
  69. * So if blocks have been skipped, then needChanged will be positive and
  70. * perhaps only a few blocks will need to be changed. But if the access
  71. * is to a previous block, then needChanged will be negative. And the
  72. * current behaviour for that is basically just a flush and a reload.
  73. * Obviously there is a better way to do it.
  74. */
  75. int needChanged = b - _currentBlock;
  76. /*unsigned int compare1 = greaterThanEqual(needChanged, Accessor<T,MAX_MEM,BLOCK_SIZE>::MAX_BLOCK_COUNT);
  77. unsigned int compare2 = lessThan(needChanged, 0);
  78. unsigned int compare = compare1 | compare2;
  79. needChanged = (compare & Accessor<T,MAX_MEM,BLOCK_SIZE>::MAX_BLOCK_COUNT) | (~compare & needChanged);
  80. _currentBlockLocation = (compare & 0) | (~compare & _currentBlockLocation);
  81. //if (blockTargetPlus >= Accessor<T,MAX_MEM,BLOCK_SIZE>::MAX_BLOCK_COUNT) blockTargetPlus -= Accessor<T,MAX_MEM,BLOCK_SIZE>::MAX_BLOCK_COUNT;
  82. int blockTargetPlus = _currentBlockLocation + needChanged;
  83. unsigned plusCompare = greaterThanEqual(blockTargetPlus, Accessor<T,MAX_MEM,BLOCK_SIZE>::MAX_BLOCK_COUNT);
  84. blockTargetPlus = (plusCompare & (blockTargetPlus-Accessor<T,MAX_MEM,BLOCK_SIZE>::MAX_BLOCK_COUNT)) | (~plusCompare & blockTargetPlus);
  85. blockTarget = (compare & 0) | (~compare & blockTargetPlus);
  86. int startBlock = b - Accessor<T,MAX_MEM,BLOCK_SIZE>::MAX_BLOCK_COUNT;
  87. _currentBlock = (compare & startBlock) | (~compare & _currentBlock);*/
  88. if (needChanged < 0 || needChanged >= Accessor<T,MAX_MEM,BLOCK_SIZE>::MAX_BLOCK_COUNT)
  89. {
  90. needChanged = Accessor<T,MAX_MEM,BLOCK_SIZE>::MAX_BLOCK_COUNT;
  91. _currentBlockLocation = 0;
  92. blockTarget = 0;
  93. /**
  94. * This is similar to the technique used in the constructor.
  95. *
  96. * The code below which actually async loads future blocks assumes
  97. * that the block list is already full (e.g. there are MAX_BLOCK_COUNT
  98. * blocks on the SPU). In this case, we're wanting to reset all the blocks
  99. * and start again from the requested block. E.g. if the block 5 is
  100. * requested and for some reason we're ditching everything (could be we're
  101. * past block 5 already, or block 5 hasn't been loaded yet). Hence we want to
  102. * load from block 5 onwards, e.g. blocks 5,6,7,etc.
  103. *
  104. * So we set _currentBlock to be MAX_BLOCK_COUNT elements earlier than the first
  105. * block we want to load. This is because the code below calculates the next block
  106. * to load as _currentBlock + MAX_BLOCK_COUNT (assuming that the block list is full).
  107. * For instance, if the _currentBlock is 6, and we have 4 blocks loaded, then we will
  108. * have loaded blocks 6,7,8 and 9. Hence the next block to load would be 10.
  109. */
  110. _currentBlock = b - Accessor<T,MAX_MEM,BLOCK_SIZE>::MAX_BLOCK_COUNT;
  111. }
  112. else
  113. {
  114. blockTarget = _currentBlockLocation + needChanged;
  115. if (blockTarget >= Accessor<T,MAX_MEM,BLOCK_SIZE>::MAX_BLOCK_COUNT) blockTarget -= Accessor<T,MAX_MEM,BLOCK_SIZE>::MAX_BLOCK_COUNT;
  116. }
  117. /**
  118. * Another key point is to make sure that the PPU block to load
  119. * actually exists and a copy from illegal memory isn't made.
  120. * So the block number for the final element is calculated.
  121. */
  122. int finalBlockNo = Accessor<T,MAX_MEM,BLOCK_SIZE>::_size / Accessor<T,MAX_MEM,BLOCK_SIZE>::ELE_PER_BLOCK;
  123. /**
  124. * The final block number is then used to calculate the number
  125. * of remaining blocks to be loaded. If need be, this is used
  126. * to reduce the number of blocks to be loaded.
  127. *
  128. * Example: currentBlock = 0. finalBlock = 10. We want block 9.
  129. * Should we also load blocks 10,11,12,etc to fill all the blocks.
  130. * Or maybe just stop at 10?
  131. */
  132. int diff = (_currentBlock + Accessor<T,MAX_MEM,BLOCK_SIZE>::MAX_BLOCK_COUNT + needChanged-1) - finalBlockNo;
  133. DEBUG(printf("ToRead: %i\n", (_currentBlock + Accessor<T,MAX_MEM,BLOCK_SIZE>::MAX_BLOCK_COUNT + needChanged-1)));
  134. DEBUG(printf("FinalBlock: %i\n", finalBlockNo));
  135. DEBUG(printf("Diff: %i\n", diff));
  136. if (diff > 0)
  137. needChanged -= diff;
  138. //unsigned int diffCompare = greaterThan(diff, 0);
  139. //needChanged = (diffCompare & (needChanged-diff)) | (~diffCompare & needChanged);
  140. DEBUG(printf("Need to change %i blocks\n", needChanged));
  141. DEBUG(printf("BlockTarget: %i\n", blockTarget));
  142. DEBUG(printf("CurrentBlockLocation: %i\n", _currentBlockLocation));
  143. //unsigned int blockCompare = lessThan(((int)Accessor<T,MAX_MEM,BLOCK_SIZE>::_loadedBlockCount), needChanged);
  144. //Accessor<T,MAX_MEM,BLOCK_SIZE>::_loadedBlockCount = (blockCompare & needChanged) | (~blockCompare & Accessor<T,MAX_MEM,BLOCK_SIZE>::_loadedBlockCount);
  145. /**
  146. * BlockCount is used to keep track of the number of blocks that are
  147. * currently loaded. This is need for flushing the blocks at the end.
  148. *
  149. * Hence, if more blocks are going to be changed, than are currently
  150. * loaded, the number of blocks loaded should be updated.
  151. */
  152. if (((int)Accessor<T,MAX_MEM,BLOCK_SIZE>::_loadedBlockCount) < needChanged)
  153. {
  154. Accessor<T,MAX_MEM,BLOCK_SIZE>::_loadedBlockCount = needChanged;
  155. DEBUG(printf("Set Accessor<T,MAX_MEM,BLOCK_SIZE>::_loadedBlockCount = %i\n", Accessor<T,MAX_MEM,BLOCK_SIZE>::_loadedBlockCount));
  156. }
  157. /**
  158. * This loops through all the blocks to be changed (from currentBlock
  159. * onwards) and replaces them with future blocks. When needed the blocks
  160. * are written back to the PPU.
  161. */
  162. if (needChanged > 0)
  163. {
  164. int i = 0;
  165. //for (int i=0; i<needChanged; i++)
  166. do
  167. {
  168. /**
  169. * Get the SPU location of the block to be played with (and wrap
  170. * it around if needed so the index is always of a valid block)
  171. */
  172. int location = _currentBlockLocation + i;
  173. //unsigned int locationCompare = greaterThanEqual(location, Accessor<T,MAX_MEM,BLOCK_SIZE>::MAX_BLOCK_COUNT);
  174. //location = (locationCompare & (location - Accessor<T,MAX_MEM,BLOCK_SIZE>::MAX_BLOCK_COUNT)) | (~locationCompare & location);
  175. if (location >= Accessor<T,MAX_MEM,BLOCK_SIZE>::MAX_BLOCK_COUNT) location -= Accessor<T,MAX_MEM,BLOCK_SIZE>::MAX_BLOCK_COUNT;
  176. DEBUG(printf("Changing:%i Location:%i\n", i, location));
  177. size_t spuStart = location * Accessor<T,MAX_MEM,BLOCK_SIZE>::ELE_PER_BLOCK;
  178. /**
  179. * If the block needs to be written back, and it currently
  180. * actually has data loaded into it, then write
  181. * it back to the appropriate block on the PPU.
  182. */
  183. if (AT == READ_WRITE || AT == WRITE_ONLY)
  184. {
  185. if (__builtin_expect((Accessor<T,MAX_MEM,BLOCK_SIZE>::_blockList[location]) > 0, 1))
  186. {
  187. size_t ppuStart = (Accessor<T,MAX_MEM,BLOCK_SIZE>::_blockList[location]-1) * Accessor<T,MAX_MEM,BLOCK_SIZE>::ELE_PER_BLOCK;
  188. DEBUG(printf("Saving block SPU %i to PPU %i\n", location, (Accessor<T,MAX_MEM,BLOCK_SIZE>::_blockList[location]-1)));
  189. DEBUG(printf("mfc_put SPU:%p PPU:%p\n", &Accessor<T,MAX_MEM,BLOCK_SIZE>::_spu[spuStart], &(Accessor<T,MAX_MEM,BLOCK_SIZE>::_ppu[ppuStart])));
  190. mfc_put(&(Accessor<T,MAX_MEM,BLOCK_SIZE>::_spu[spuStart]), (unsigned long long)&(Accessor<T,MAX_MEM,BLOCK_SIZE>::_ppu[ppuStart]), BLOCK_SIZE, location+1, 0, 0);
  191. //mfc_write_tag_mask(1<<(location+1));
  192. //mfc_read_tag_status_all();
  193. }
  194. }
  195. /**
  196. * Calculate the next block to be loaded, simply by adding up
  197. * the current block + the entire number of blocks that can be held
  198. * + which block of the loop we're on
  199. */
  200. Accessor<T,MAX_MEM,BLOCK_SIZE>::_blockList[location] = _currentBlock + Accessor<T,MAX_MEM,BLOCK_SIZE>::MAX_BLOCK_COUNT + i + 1;
  201. /**
  202. * If we're loading blocks (probably are), copy it from the PPU
  203. * to this SPU block.
  204. */
  205. if (AT == READ_WRITE || AT == READ_ONLY)
  206. {
  207. size_t ppuStart = (Accessor<T,MAX_MEM,BLOCK_SIZE>::_blockList[location]-1) * Accessor<T,MAX_MEM,BLOCK_SIZE>::ELE_PER_BLOCK;
  208. DEBUG(printf("Fetching block PPU %i to SPU %i\n", (Accessor<T,MAX_MEM,BLOCK_SIZE>::_blockList[location]-1), location));
  209. DEBUG(printf("mfc_get SPU:%p PPU:%p\n", &Accessor<T,MAX_MEM,BLOCK_SIZE>::_spu[spuStart], &(Accessor<T,MAX_MEM,BLOCK_SIZE>::_ppu[ppuStart])));
  210. mfc_get(&(Accessor<T,MAX_MEM,BLOCK_SIZE>::_spu[spuStart]), (unsigned long long)&(Accessor<T,MAX_MEM,BLOCK_SIZE>::_ppu[ppuStart]), BLOCK_SIZE, location+1, 0, 0);
  211. }
  212. //mfc_write_tag_mask(1<<(location+1));
  213. //mfc_read_tag_status_all();
  214. i++;
  215. } while (i < needChanged);
  216. }
  217. // Set the current blocks
  218. _currentBlock = b;
  219. _currentBlockLocation = blockTarget;
  220. DEBUG(printf("Current set to %i at %i\n", _currentBlock, _currentBlockLocation));
  221. // Wait until needed block is copied (hopefully already done)
  222. mfc_write_tag_mask(1<<(blockTarget+1));
  223. mfc_read_tag_status_all();
  224. DEBUG(printf("BLOCK AT %i\n\n", blockTarget));
  225. return blockTarget;
  226. }
  227. }
  228. template < class T, AccessorType AT, size_t MAX_MEM, size_t BLOCK_SIZE >
  229. __offload inline T& vector<T,ACCESS_FORWARD,AT,MAX_MEM,BLOCK_SIZE,SPU>::operator[](size_t n)
  230. {
  231. size_t blockNo = n / Accessor<T,MAX_MEM,BLOCK_SIZE>::ELE_PER_BLOCK;
  232. size_t blockID = getBlock(blockNo);
  233. n -= blockNo * Accessor<T,MAX_MEM,BLOCK_SIZE>::ELE_PER_BLOCK;
  234. n += blockID * Accessor<T,MAX_MEM,BLOCK_SIZE>::ELE_PER_BLOCK;
  235. //return ((int*)Accessor<T,MAX_MEM,BLOCK_SIZE>::_spu)[n];
  236. return Accessor<T,MAX_MEM,BLOCK_SIZE>::_spu[n];
  237. //return _container->Accessor<T,MAX_MEM,BLOCK_SIZE>::_spu[n];
  238. }
  239. }