/sStruct/ForwardAccessor_impl.h
C Header | 289 lines | 125 code | 47 blank | 117 comment | 24 complexity | dbed045360e8a7fc9965c9cf55970661 MD5 | raw file
-
- namespace hg
- {
-
- template < class T, AccessorType AT, size_t MAX_MEM, size_t BLOCK_SIZE >
- template < AccessorBehaviour otherAB, AccessorType otherAT, size_t otherMAX_MEM, size_t otherBLOCK_SIZE >
- __offload inline vector<T,ACCESS_FORWARD,AT,MAX_MEM,BLOCK_SIZE,SPU>::vector(__outer vector<T,otherAB,otherAT,otherMAX_MEM,otherBLOCK_SIZE,PPU> *vec)
- {
- Accessor<T,MAX_MEM,BLOCK_SIZE>::setup(vec);
-
- _currentBlock = -Accessor<T,MAX_MEM,BLOCK_SIZE>::MAX_BLOCK_COUNT;
- Accessor<T,MAX_MEM,BLOCK_SIZE>::_loadedBlockCount = 0;
- }
-
-
- template < class T, AccessorType AT, size_t MAX_MEM, size_t BLOCK_SIZE >
- __offload inline vector<T,ACCESS_FORWARD,AT,MAX_MEM,BLOCK_SIZE,SPU>::vector(unsigned long long ea)
- {
- Accessor<T,MAX_MEM,BLOCK_SIZE>::setup(ea);
-
- /**
- * So, the code that loads more blocks assumes that there are already
- * MAX_BLOCK_COUNT blocks loaded. So it works from the "earliest" block
- * loaded (_currentBlock) plus MAX_BLOCK_COUNT to find out which block is
- * the next to be loaded. So if we offset _currentBlock back to negative
- * MAX_BLOCK_COUNT, the first block loaded will be index 0.
- */
- _currentBlock = -Accessor<T,MAX_MEM,BLOCK_SIZE>::MAX_BLOCK_COUNT;
- Accessor<T,MAX_MEM,BLOCK_SIZE>::_loadedBlockCount = 0;
- }
-
- template < class T, AccessorType AT, size_t MAX_MEM, size_t BLOCK_SIZE >
- __offload inline vector<T,ACCESS_FORWARD,AT,MAX_MEM,BLOCK_SIZE,SPU>::~vector()
- {
- DEBUG(printf("Flushing %i\n", Accessor<T,MAX_MEM,BLOCK_SIZE>::_loadedBlockCount));
-
- if (AT == READ_WRITE || AT == WRITE_ONLY)
- {
- for (size_t b=0; b<Accessor<T,MAX_MEM,BLOCK_SIZE>::_loadedBlockCount; b++)
- {
- //setBlock(i);
- size_t spuStart = b * Accessor<T,MAX_MEM,BLOCK_SIZE>::ELE_PER_BLOCK;
- size_t ppuStart = (Accessor<T,MAX_MEM,BLOCK_SIZE>::_blockList[b]-1) * Accessor<T,MAX_MEM,BLOCK_SIZE>::ELE_PER_BLOCK;
- DEBUG(printf("mfc_put %p to %p\n", &Accessor<T,MAX_MEM,BLOCK_SIZE>::_spu[spuStart], &Accessor<T,MAX_MEM,BLOCK_SIZE>::_ppu[ppuStart]));
- mfc_put(&(Accessor<T,MAX_MEM,BLOCK_SIZE>::_spu[spuStart]), (unsigned long long)&(Accessor<T,MAX_MEM,BLOCK_SIZE>::_ppu[ppuStart]), BLOCK_SIZE, b+1, 0, 0);
- }
- mfc_write_tag_mask((1<<(Accessor<T,MAX_MEM,BLOCK_SIZE>::_loadedBlockCount+1))-1);
- mfc_read_tag_status_all();
- }
-
- //setBlock(0);
- //setBlock(1);
-
- Accessor<T,MAX_MEM,BLOCK_SIZE>::_loadedBlockCount = 0;
-
- Accessor<T,MAX_MEM,BLOCK_SIZE>::destroy();
- }
-
- template < class T, AccessorType AT, size_t MAX_MEM, size_t BLOCK_SIZE >
- __offload inline size_t vector<T,ACCESS_FORWARD,AT,MAX_MEM,BLOCK_SIZE,SPU>::getBlock(size_t b)
- {
-
- DEBUG(printf("\nBLOCK REQUEST:%i\n",b));
-
- size_t blockTarget = 0;
-
- if (__builtin_expect(b == _currentBlock,1))
- {
- DEBUG(printf("CURRENT block %i at %i\n\n", _currentBlock, _currentBlockLocation));
- // Block requested is the current one, so no changes are needed
- return _currentBlockLocation;
- }
- else
- {
- /**
- * The requested block is not the current block. Hence, some changes
- * to the blocks will need to be made on the SPU. The first thing
- * is to decide how many blocks need to be changed.
- *
- * If the next block is simply the next block sequentially, then
- * only one block will need to be changed (the current one), but if
- * blocks have been skipped, then more blocks will need to be reloaded.
- *
- * So if blocks have been skipped, then needChanged will be positive and
- * perhaps only a few blocks will need to be changed. But if the access
- * is to a previous block, then needChanged will be negative. And the
- * current behaviour for that is basically just a flush and a reload.
- * Obviously there is a better way to do it.
- */
- int needChanged = b - _currentBlock;
-
-
- /*unsigned int compare1 = greaterThanEqual(needChanged, Accessor<T,MAX_MEM,BLOCK_SIZE>::MAX_BLOCK_COUNT);
- unsigned int compare2 = lessThan(needChanged, 0);
- unsigned int compare = compare1 | compare2;
-
- needChanged = (compare & Accessor<T,MAX_MEM,BLOCK_SIZE>::MAX_BLOCK_COUNT) | (~compare & needChanged);
- _currentBlockLocation = (compare & 0) | (~compare & _currentBlockLocation);
-
- //if (blockTargetPlus >= Accessor<T,MAX_MEM,BLOCK_SIZE>::MAX_BLOCK_COUNT) blockTargetPlus -= Accessor<T,MAX_MEM,BLOCK_SIZE>::MAX_BLOCK_COUNT;
- int blockTargetPlus = _currentBlockLocation + needChanged;
- unsigned plusCompare = greaterThanEqual(blockTargetPlus, Accessor<T,MAX_MEM,BLOCK_SIZE>::MAX_BLOCK_COUNT);
- blockTargetPlus = (plusCompare & (blockTargetPlus-Accessor<T,MAX_MEM,BLOCK_SIZE>::MAX_BLOCK_COUNT)) | (~plusCompare & blockTargetPlus);
-
- blockTarget = (compare & 0) | (~compare & blockTargetPlus);
- int startBlock = b - Accessor<T,MAX_MEM,BLOCK_SIZE>::MAX_BLOCK_COUNT;
- _currentBlock = (compare & startBlock) | (~compare & _currentBlock);*/
-
- if (needChanged < 0 || needChanged >= Accessor<T,MAX_MEM,BLOCK_SIZE>::MAX_BLOCK_COUNT)
- {
- needChanged = Accessor<T,MAX_MEM,BLOCK_SIZE>::MAX_BLOCK_COUNT;
- _currentBlockLocation = 0;
- blockTarget = 0;
-
- /**
- * This is similar to the technique used in the constructor.
- *
- * The code below which actually async loads future blocks assumes
- * that the block list is already full (e.g. there are MAX_BLOCK_COUNT
- * blocks on the SPU). In this case, we're wanting to reset all the blocks
- * and start again from the requested block. E.g. if the block 5 is
- * requested and for some reason we're ditching everything (could be we're
- * past block 5 already, or block 5 hasn't been loaded yet). Hence we want to
- * load from block 5 onwards, e.g. blocks 5,6,7,etc.
- *
- * So we set _currentBlock to be MAX_BLOCK_COUNT elements earlier than the first
- * block we want to load. This is because the code below calculates the next block
- * to load as _currentBlock + MAX_BLOCK_COUNT (assuming that the block list is full).
- * For instance, if the _currentBlock is 6, and we have 4 blocks loaded, then we will
- * have loaded blocks 6,7,8 and 9. Hence the next block to load would be 10.
- */
- _currentBlock = b - Accessor<T,MAX_MEM,BLOCK_SIZE>::MAX_BLOCK_COUNT;
- }
- else
- {
- blockTarget = _currentBlockLocation + needChanged;
- if (blockTarget >= Accessor<T,MAX_MEM,BLOCK_SIZE>::MAX_BLOCK_COUNT) blockTarget -= Accessor<T,MAX_MEM,BLOCK_SIZE>::MAX_BLOCK_COUNT;
- }
-
-
- /**
- * Another key point is to make sure that the PPU block to load
- * actually exists and a copy from illegal memory isn't made.
- * So the block number for the final element is calculated.
- */
- int finalBlockNo = Accessor<T,MAX_MEM,BLOCK_SIZE>::_size / Accessor<T,MAX_MEM,BLOCK_SIZE>::ELE_PER_BLOCK;
-
- /**
- * The final block number is then used to calculate the number
- * of remaining blocks to be loaded. If need be, this is used
- * to reduce the number of blocks to be loaded.
- *
- * Example: currentBlock = 0. finalBlock = 10. We want block 9.
- * Should we also load blocks 10,11,12,etc to fill all the blocks.
- * Or maybe just stop at 10?
- */
- int diff = (_currentBlock + Accessor<T,MAX_MEM,BLOCK_SIZE>::MAX_BLOCK_COUNT + needChanged-1) - finalBlockNo;
- DEBUG(printf("ToRead: %i\n", (_currentBlock + Accessor<T,MAX_MEM,BLOCK_SIZE>::MAX_BLOCK_COUNT + needChanged-1)));
- DEBUG(printf("FinalBlock: %i\n", finalBlockNo));
- DEBUG(printf("Diff: %i\n", diff));
-
- if (diff > 0)
- needChanged -= diff;
-
-
- //unsigned int diffCompare = greaterThan(diff, 0);
- //needChanged = (diffCompare & (needChanged-diff)) | (~diffCompare & needChanged);
-
- DEBUG(printf("Need to change %i blocks\n", needChanged));
- DEBUG(printf("BlockTarget: %i\n", blockTarget));
- DEBUG(printf("CurrentBlockLocation: %i\n", _currentBlockLocation));
-
- //unsigned int blockCompare = lessThan(((int)Accessor<T,MAX_MEM,BLOCK_SIZE>::_loadedBlockCount), needChanged);
- //Accessor<T,MAX_MEM,BLOCK_SIZE>::_loadedBlockCount = (blockCompare & needChanged) | (~blockCompare & Accessor<T,MAX_MEM,BLOCK_SIZE>::_loadedBlockCount);
-
- /**
- * BlockCount is used to keep track of the number of blocks that are
- * currently loaded. This is need for flushing the blocks at the end.
- *
- * Hence, if more blocks are going to be changed, than are currently
- * loaded, the number of blocks loaded should be updated.
- */
- if (((int)Accessor<T,MAX_MEM,BLOCK_SIZE>::_loadedBlockCount) < needChanged)
- {
- Accessor<T,MAX_MEM,BLOCK_SIZE>::_loadedBlockCount = needChanged;
- DEBUG(printf("Set Accessor<T,MAX_MEM,BLOCK_SIZE>::_loadedBlockCount = %i\n", Accessor<T,MAX_MEM,BLOCK_SIZE>::_loadedBlockCount));
- }
-
- /**
- * This loops through all the blocks to be changed (from currentBlock
- * onwards) and replaces them with future blocks. When needed the blocks
- * are written back to the PPU.
- */
- if (needChanged > 0)
- {
- int i = 0;
- //for (int i=0; i<needChanged; i++)
- do
- {
- /**
- * Get the SPU location of the block to be played with (and wrap
- * it around if needed so the index is always of a valid block)
- */
- int location = _currentBlockLocation + i;
- //unsigned int locationCompare = greaterThanEqual(location, Accessor<T,MAX_MEM,BLOCK_SIZE>::MAX_BLOCK_COUNT);
- //location = (locationCompare & (location - Accessor<T,MAX_MEM,BLOCK_SIZE>::MAX_BLOCK_COUNT)) | (~locationCompare & location);
- if (location >= Accessor<T,MAX_MEM,BLOCK_SIZE>::MAX_BLOCK_COUNT) location -= Accessor<T,MAX_MEM,BLOCK_SIZE>::MAX_BLOCK_COUNT;
- DEBUG(printf("Changing:%i Location:%i\n", i, location));
-
- size_t spuStart = location * Accessor<T,MAX_MEM,BLOCK_SIZE>::ELE_PER_BLOCK;
-
- /**
- * If the block needs to be written back, and it currently
- * actually has data loaded into it, then write
- * it back to the appropriate block on the PPU.
- */
- if (AT == READ_WRITE || AT == WRITE_ONLY)
- {
- if (__builtin_expect((Accessor<T,MAX_MEM,BLOCK_SIZE>::_blockList[location]) > 0, 1))
- {
- size_t ppuStart = (Accessor<T,MAX_MEM,BLOCK_SIZE>::_blockList[location]-1) * Accessor<T,MAX_MEM,BLOCK_SIZE>::ELE_PER_BLOCK;
- DEBUG(printf("Saving block SPU %i to PPU %i\n", location, (Accessor<T,MAX_MEM,BLOCK_SIZE>::_blockList[location]-1)));
- DEBUG(printf("mfc_put SPU:%p PPU:%p\n", &Accessor<T,MAX_MEM,BLOCK_SIZE>::_spu[spuStart], &(Accessor<T,MAX_MEM,BLOCK_SIZE>::_ppu[ppuStart])));
- mfc_put(&(Accessor<T,MAX_MEM,BLOCK_SIZE>::_spu[spuStart]), (unsigned long long)&(Accessor<T,MAX_MEM,BLOCK_SIZE>::_ppu[ppuStart]), BLOCK_SIZE, location+1, 0, 0);
-
- //mfc_write_tag_mask(1<<(location+1));
- //mfc_read_tag_status_all();
- }
- }
-
- /**
- * Calculate the next block to be loaded, simply by adding up
- * the current block + the entire number of blocks that can be held
- * + which block of the loop we're on
- */
- Accessor<T,MAX_MEM,BLOCK_SIZE>::_blockList[location] = _currentBlock + Accessor<T,MAX_MEM,BLOCK_SIZE>::MAX_BLOCK_COUNT + i + 1;
-
- /**
- * If we're loading blocks (probably are), copy it from the PPU
- * to this SPU block.
- */
- if (AT == READ_WRITE || AT == READ_ONLY)
- {
- size_t ppuStart = (Accessor<T,MAX_MEM,BLOCK_SIZE>::_blockList[location]-1) * Accessor<T,MAX_MEM,BLOCK_SIZE>::ELE_PER_BLOCK;
- DEBUG(printf("Fetching block PPU %i to SPU %i\n", (Accessor<T,MAX_MEM,BLOCK_SIZE>::_blockList[location]-1), location));
- DEBUG(printf("mfc_get SPU:%p PPU:%p\n", &Accessor<T,MAX_MEM,BLOCK_SIZE>::_spu[spuStart], &(Accessor<T,MAX_MEM,BLOCK_SIZE>::_ppu[ppuStart])));
- mfc_get(&(Accessor<T,MAX_MEM,BLOCK_SIZE>::_spu[spuStart]), (unsigned long long)&(Accessor<T,MAX_MEM,BLOCK_SIZE>::_ppu[ppuStart]), BLOCK_SIZE, location+1, 0, 0);
- }
-
- //mfc_write_tag_mask(1<<(location+1));
- //mfc_read_tag_status_all();
- i++;
- } while (i < needChanged);
- }
-
- // Set the current blocks
- _currentBlock = b;
- _currentBlockLocation = blockTarget;
-
- DEBUG(printf("Current set to %i at %i\n", _currentBlock, _currentBlockLocation));
-
- // Wait until needed block is copied (hopefully already done)
- mfc_write_tag_mask(1<<(blockTarget+1));
- mfc_read_tag_status_all();
-
-
- DEBUG(printf("BLOCK AT %i\n\n", blockTarget));
- return blockTarget;
- }
-
- }
-
- template < class T, AccessorType AT, size_t MAX_MEM, size_t BLOCK_SIZE >
- __offload inline T& vector<T,ACCESS_FORWARD,AT,MAX_MEM,BLOCK_SIZE,SPU>::operator[](size_t n)
- {
- size_t blockNo = n / Accessor<T,MAX_MEM,BLOCK_SIZE>::ELE_PER_BLOCK;
-
- size_t blockID = getBlock(blockNo);
-
- n -= blockNo * Accessor<T,MAX_MEM,BLOCK_SIZE>::ELE_PER_BLOCK;
- n += blockID * Accessor<T,MAX_MEM,BLOCK_SIZE>::ELE_PER_BLOCK;
-
- //return ((int*)Accessor<T,MAX_MEM,BLOCK_SIZE>::_spu)[n];
- return Accessor<T,MAX_MEM,BLOCK_SIZE>::_spu[n];
- //return _container->Accessor<T,MAX_MEM,BLOCK_SIZE>::_spu[n];
- }
-
-
- }