PageRenderTime 49ms CodeModel.GetById 17ms RepoModel.GetById 1ms app.codeStats 0ms

/src/utf8.cpp

http://ollie.googlecode.com/
C++ | 1344 lines | 570 code | 342 blank | 432 comment | 128 complexity | d2e7400eb28b2de0da69023e5aa01d29 MD5 | raw file
Possible License(s): LGPL-2.1
  1. /* This file is part of the Ollie libraries
  2. *
  3. * This library is free software; you can redistribute it and/or
  4. * modify it under the terms of the GNU Library General Public
  5. * License as published by the Free Software Foundation; either
  6. * version 2 of the License, or (at your option) any later version.
  7. *
  8. * This library is distributed in the hope that it will be useful,
  9. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  10. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  11. * Library General Public License for more details.
  12. *
  13. * You should have received a copy of the GNU Library General Public License
  14. * along with this library; see the file COPYING. If not, write to
  15. * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
  16. * Boston, MA 02110-1301, USA.
  17. *
  18. * Copyright (C) 2007 Derrick J. Wippler <thrawn01@gmail.com>
  19. **/
  20. #include <utf8.h>
  21. /*!
  22. * Initalize class variables
  23. */
  24. void Utf8Buffer::init( OffSet offPageSize ) {
  25. _boolModified = false;
  26. _fileHandle = 0;
  27. _boolEntireFileLoaded = false;
  28. _offMaxBufferSize = DEFAULT_MAX_BUF_SIZE;
  29. _offBufferSize = 0;
  30. _currentTask = 0;
  31. _offTargetPageSize = offPageSize;
  32. _offCurSaveOffSet = 0;
  33. _offCurLoadOffSet = 0;
  34. _longCurProgress = 0;
  35. // Append an empty page to the buffer
  36. Utf8Page *page = new Utf8Page();
  37. // If the buffer wants to overide the default page size
  38. if( _offTargetPageSize ) {
  39. page->mSetTargetPageSize( offPageSize );
  40. }
  41. // Append a new empty block to the page
  42. page->mAppendBlock( Utf8Block() );
  43. page->mSetOffSet( 0 );
  44. // Append to the page to the container
  45. _pageContainer.mAppendPage( page );
  46. }
  47. /**
  48. * Construct a buffer with empty pages
  49. */
  50. Utf8Buffer::Utf8Buffer( OffSet offPageSize ) {
  51. // Initialize class variables
  52. init( offPageSize );
  53. }
  54. /**
  55. * Construct a buffer with a name
  56. */
  57. Utf8Buffer::Utf8Buffer( const std::string& strName, OffSet offPageSize ) {
  58. // Initialize class variables
  59. init( offPageSize );
  60. // Set the buffer name
  61. _strName = strName;
  62. };
  63. /*!
  64. * Construct a buffer from a file and give it a name
  65. */
  66. Utf8Buffer::Utf8Buffer( File* const file, OffSet offPageSize ) {
  67. // Initialize class variables
  68. init( offPageSize );
  69. // Check for valid handle
  70. if( ! file ) {
  71. fatalError("Internal Error: Cannot create buffer from null file pointer");
  72. }
  73. // Assign the file handler
  74. _fileHandle = file;
  75. // Set the buffer name from the filename
  76. _strName = file->mGetFileName();
  77. }
  78. /*!
  79. * Buffer Destructor
  80. */
  81. Utf8Buffer::~Utf8Buffer( void ) {
  82. // Unallocate the file
  83. if( _fileHandle ) {
  84. delete (_fileHandle);
  85. }
  86. }
  87. /*!
  88. * Assign a File to be represented by this buffer
  89. * The file passed must be an open valid file
  90. */
  91. bool Utf8Buffer::mAssignFile( File* const file ) {
  92. assert( file != 0 );
  93. _strName = file->mGetFileName();
  94. _fileHandle = file;
  95. return true;
  96. }
  97. /*!
  98. * Returns the File Name the current buffer represents
  99. */
  100. std::string Utf8Buffer::mGetFileName( void ) {
  101. if( _fileHandle ) {
  102. return _fileHandle->mGetFileName();
  103. }
  104. return _strName;
  105. }
  106. /*!
  107. * Return true if the buffer has room to add data
  108. */
  109. bool Utf8Buffer::mBufferFull( void ) {
  110. // If the buffer size is not greater than the Max Buffer size
  111. if( _offBufferSize >= _offMaxBufferSize ) {
  112. return true;
  113. }
  114. return false;
  115. }
  116. /*!
  117. * Convienience function for loading the file
  118. * into the buffer for the first time
  119. */
  120. bool Utf8Buffer::mIsBufferReady( void ) {
  121. // Are there any active tasks?
  122. if( _currentTask ) {
  123. return false;
  124. }
  125. return true;
  126. }
  127. /*!
  128. * Call the Task method, Returns true if the operation was a success
  129. * false if there was an error
  130. */
  131. bool Utf8Buffer::mPreformTask( void ) {
  132. assert( _currentTask != 0 );
  133. return (this->*_currentTask)();
  134. }
  135. /*
  136. * Returns the progress of the current task if there is one.
  137. * If not returns false
  138. */
  139. bool Utf8Buffer::mGetProgress( long* longProgress ) {
  140. *longProgress = _longCurProgress;
  141. if( _currentTask ) return true;
  142. return false;
  143. }
  144. /*
  145. * Write out a block of text at a specific offset
  146. */
  147. OffSet Utf8File::mWriteBlock( OffSet offset, const char* arrBlockData, OffSet offBlockSize, Attributes& attr ) {
  148. // Set the current offset
  149. if( mSetOffSet( offset ) == -1 ) {
  150. return -1;
  151. }
  152. // Record our offset
  153. _offCurrent = offset;
  154. return mWriteNextBlock( arrBlockData, offBlockSize, attr );
  155. }
  156. /*
  157. * Read in the next block of text starting at the last read offset
  158. *
  159. * Since utf8 files have no additional attributes, we ignore the
  160. * attributes reference passed
  161. */
  162. OffSet Utf8File::mWriteNextBlock( const char* arrBlockData, OffSet offBlockSize, Attributes &attr ) {
  163. assert( _ioHandle != 0 );
  164. // If we timeout waiting on clear to read
  165. if( _ioHandle->mWaitForClearToWrite( _intTimeout ) ) {
  166. mSetError( _ioHandle->mGetError() );
  167. return -1;
  168. }
  169. OffSet offLen = 0;
  170. // Write out the block of data
  171. if( ( offLen = _ioHandle->mWrite( arrBlockData, offBlockSize ) ) < 0 ) {
  172. mSetError( _ioHandle->mGetError() );
  173. return -1;
  174. }
  175. // Keep track of where in the file we are
  176. _offCurrent += offLen;
  177. // Tell the caller how many bytes we wrote
  178. return offLen;
  179. }
  180. /*!
  181. * Return the size of the next block read will return
  182. * Utf8File will always return the max block size because
  183. * Utf8File does not assign attributes so all the block sizes
  184. * are the same, unless it is the end of the file ( Last block read )
  185. */
  186. OffSet Utf8File::mPeekNextBlock( void ) {
  187. // If we were to read the next block would we hit the end of the file?
  188. if( ( _ioHandle->mGetFileSize() - _offCurrent ) < _offBlockSize ) {
  189. // If so, return how many bytes we would read
  190. return _ioHandle->mGetFileSize() - _offCurrent;
  191. }
  192. return _offBlockSize;
  193. }
  194. /*
  195. * Read in the next block of text starting at the last read offset
  196. *
  197. * Since utf8 files have no additional attributes, we ignore the
  198. * attributes reference passed
  199. */
  200. OffSet Utf8File::mReadNextBlock( char* arrBlockData, Attributes &attr ) {
  201. assert( _ioHandle != 0 );
  202. // If we timeout waiting on clear to read
  203. if( _ioHandle->mWaitForClearToRead( _intTimeout ) ) {
  204. mSetError( _ioHandle->mGetError() );
  205. return -1;
  206. }
  207. OffSet offLen = 0;
  208. // Read in the block from the IO
  209. if( ( offLen = _ioHandle->mRead( arrBlockData, _offBlockSize ) ) < 0 ) {
  210. mSetError( _ioHandle->mGetError() );
  211. return -1;
  212. }
  213. // Keep track of where in the file we are
  214. _offCurrent += offLen;
  215. // Tell the caller how many bytes are in the block of data
  216. return offLen;
  217. }
  218. /*
  219. * Read in a block of text at specific offset
  220. */
  221. OffSet Utf8File::mReadBlock( OffSet offset, char* arrBlockData, Attributes& attr ) {
  222. // Set the current offset
  223. if( mSetOffSet( offset ) == -1 ) {
  224. return -1;
  225. }
  226. return mReadNextBlock( arrBlockData, attr );
  227. }
  228. /*
  229. * Seek to the required offset and remeber where we are
  230. */
  231. OffSet Utf8File::mSetOffSet( OffSet offset ) {
  232. assert( _ioHandle != 0 );
  233. // Return if the requested location is the same
  234. if( _offCurrent == offset ) return _offCurrent;
  235. // If the IO handle we are using does not offer Seek
  236. // and we are not asking to seek the begining of the file
  237. if( ! _ioHandle->mOffersSeek() && offset != 0 ) {
  238. mSetError("Current IO Device does not support file seeks");
  239. return -1;
  240. }
  241. // Attempt to seek to the correct offset
  242. if( _ioHandle->mSeek(offset) == -1 ) {
  243. mSetError( _ioHandle->mGetError() );
  244. return -1;
  245. }
  246. // Record our offset
  247. _offCurrent = offset;
  248. return _offCurrent;
  249. }
  250. /**
  251. * Prepare to save a file
  252. */
  253. bool Utf8File::mPrepareSave( void ) {
  254. // Set the file offset to the begining of the file
  255. if( mSetOffSet( 0 ) != 0 ) return false;
  256. return true;
  257. }
  258. /**
  259. * Prepare to load a file
  260. */
  261. bool Utf8File::mPrepareLoad( void ) {
  262. // Set the file offset to the begining of the file
  263. if( mSetOffSet( 0 ) != 0 ) return false;
  264. return true;
  265. }
  266. /**
  267. * Truncate the file to the offset of _offCurrent
  268. * This is needed to shorten the file if wrote a smaller
  269. * file in place of a larger file
  270. */
  271. bool Utf8File::mFinalizeSave( void ) {
  272. //Truncate the file up to our last offset
  273. if( _ioHandle->mTruncate( _offCurrent) == false ){
  274. mSetError( _ioHandle->mGetError() );
  275. return false;
  276. }
  277. return true;
  278. }
  279. /**
  280. * Finalize the load
  281. */
  282. bool Utf8File::mFinalizeLoad( void ) {
  283. return true;
  284. }
  285. /**
  286. * Returns a itertor to the begining of this this buffer
  287. */
  288. BufferIterator Utf8Buffer::mBegin( void ) {
  289. Utf8BufferIterator *it = new Utf8BufferIterator( this );
  290. Utf8Page::Iterator itPage = _pageContainer.mBegin();
  291. Utf8Block::Iterator itBlock = itPage->mBegin();
  292. it->mSetPage( itPage );
  293. it->mSetBlock( itBlock );
  294. it->mSetPos( 0 );
  295. BufferIterator itBuf( it );
  296. return itBuf;
  297. }
  298. /**
  299. * Returns a itertor to the end of this this buffer
  300. */
  301. BufferIterator Utf8Buffer::mEnd( void ) {
  302. Utf8BufferIterator *it = new Utf8BufferIterator( this );
  303. Utf8Page::Iterator itPage = (--_pageContainer.mEnd());
  304. Utf8Block::Iterator itBlock = (--itPage->mEnd());
  305. it->mSetPage( itPage );
  306. it->mSetBlock( itBlock );
  307. it->mSetPos( itBlock->mGetSize() );
  308. BufferIterator itBuf( it );
  309. return itBuf;
  310. }
  311. /*!
  312. * Saves 1 page of data to a file
  313. */
  314. OffSet Utf8Buffer::mSavePage( Utf8Page::Iterator &itPage, OffSet offSet ) {
  315. OffSet offLen = 0;
  316. assert( _fileHandle != 0 );
  317. // Seek to the requested offset
  318. if( _fileHandle->mSetOffSet( offSet ) == -1 ) {
  319. mSetError( _fileHandle->mGetError() );
  320. return -1;
  321. }
  322. Utf8Block::Iterator it = itPage->mBegin();
  323. // Foreach block in the page write it 2 the file
  324. for( it = itPage->mBegin() ; it != itPage->mEnd() ; it++ ) {
  325. // Attempt to write the block
  326. if( ( offLen = _fileHandle->mWriteNextBlock( it->mGetBlockData().c_str(), it->mGetSize(), it->mGetAttributes() ) ) == -1 ) {
  327. mSetError( _fileHandle->mGetError() );
  328. return -1;
  329. }
  330. // Did we write all the bytes we expected?
  331. if( offLen != it->mGetSize() ) {
  332. mSetError() << "Buffer Error: Attempted to write '" << it->mGetSize() << "' bytes, however '"
  333. << offLen << "' where actually written";
  334. return -1;
  335. }
  336. }
  337. return _fileHandle->mGetOffSet();
  338. }
  339. /*!
  340. * Set the offset to the begining of the file
  341. * in prep for saving the file
  342. */
  343. bool Utf8Buffer::mSaveBuffer( void ) {
  344. // Get the first page of the buffer
  345. _itCurSavePage = _pageContainer.mBegin();
  346. if( _itCurSavePage == _pageContainer.mEnd() ) {
  347. mSetError("Buffer Error: Can not save empty buffer, Atleast not yet");
  348. return false;
  349. }
  350. // Assign the load Page task and set the status message
  351. mSetTaskStatus() << "Saving " << _fileHandle->mGetFileName() << "..." ;
  352. _currentTask = &Utf8Buffer::mSaveFileTask;
  353. // Prepare the file class for save operation
  354. _fileHandle->mPrepareSave();
  355. _offCurSaveOffSet = 0;
  356. _longCurProgress = 0;
  357. return true;
  358. }
  359. bool Utf8Buffer::mSaveFileTask( void ) {
  360. OffSet offset = 0;
  361. assert( _fileHandle != 0 );
  362. // Ensure we don't do anything stupid
  363. if( _itCurSavePage == _pageContainer.mEnd() ) {
  364. mSetError("Buffer Error: Attempted to save pass last page of data");
  365. return false;
  366. }
  367. // Attempt to Save 1 page of data
  368. if( ( offset = mSavePage( _itCurSavePage, _offCurSaveOffSet ) ) == -1 ) return false;
  369. _offCurSaveOffSet = offset;
  370. // Is the file save complete?
  371. if( _offBufferSize == offset ) {
  372. // Finalize the save
  373. _fileHandle->mFinalizeSave();
  374. // We are no longer in a modified state
  375. _boolModified = false;
  376. // Clear our task
  377. mSetTaskStatus();
  378. _currentTask = 0;
  379. _longCurProgress = 100L;
  380. return true;
  381. }
  382. // Record the current progress
  383. _longCurProgress = long( offset / float( _offBufferSize / 100 ) );
  384. // Move to the next page
  385. _itCurSavePage++;
  386. return true;
  387. }
  388. /*!
  389. * Load 1 page of data from a file
  390. */
  391. OffSet Utf8Buffer::mLoadPage( OffSet offSet ) {
  392. OffSet offLen = 0;
  393. Attributes attr;
  394. assert( _fileHandle != 0 );
  395. // Seek to the requested offset
  396. if( _fileHandle->mSetOffSet( offSet ) == -1 ) {
  397. mSetError( _fileHandle->mGetError() );
  398. return -1;
  399. }
  400. Utf8Page *page = new Utf8Page();
  401. // Record the offset for this page
  402. page->mSetFileOffSet( offSet );
  403. page->mSetOffSet( offSet );
  404. // Create an array of data to read to
  405. char* arrBlockData = new char[ _fileHandle->mGetBlockSize() ];
  406. // As long as the next read will return a block greater than 0 bytes
  407. while( ( offLen = _fileHandle->mPeekNextBlock() ) != 0 ) {
  408. // Clear the BlockData for next read
  409. memset(arrBlockData, 0, _fileHandle->mGetBlockSize() );
  410. // Will the next read fit into the current page?
  411. if( ! page->mCanAcceptBytes( offLen ) ) break;
  412. // Read in the next block
  413. if( ( offLen = _fileHandle->mReadNextBlock( arrBlockData, attr ) ) == -1 ) {
  414. mSetError( _fileHandle->mGetError() );
  415. delete page; return -1;
  416. }
  417. // Create a new block of data
  418. Utf8Block block(arrBlockData, offLen );
  419. // Add the attributes to the block
  420. block.mSetAttributes( attr );
  421. // Add the block to the page
  422. page->mAppendBlock( block );
  423. }
  424. delete arrBlockData;
  425. // Only append a new page if the page has some data
  426. if( page->mGetPageSize() != 0 ) {
  427. // Append the page to the page container
  428. _pageContainer.mAppendPage( page );
  429. // Update the buffer size
  430. _offBufferSize += page->mGetPageSize() ;
  431. return _fileHandle->mGetOffSet();
  432. }
  433. // Empty Page, Discard
  434. delete page;
  435. // Return ok, No Errors right?
  436. // The last read might have read 0 bytes ( EOF )
  437. return _fileHandle->mGetOffSet();
  438. }
  439. bool Utf8Buffer::mLoadBuffer( void ) {
  440. // Sanity check!
  441. if( ! _fileHandle ) {
  442. mSetError("Buffer Error: Can't load buffer without a file handle");
  443. return false;
  444. }
  445. // Assign the load Page task and set the status message
  446. mSetTaskStatus() << "Loading " << _fileHandle->mGetFileName() << "..." ;
  447. _currentTask = &Utf8Buffer::mLoadFileTask;
  448. // If the buffer contains pages already
  449. if( _pageContainer.mBegin() != _pageContainer.mEnd() ) {
  450. // Clear all the currently loaded pages
  451. _pageContainer.mClear();
  452. }
  453. // Start the load at the begnning of the file
  454. _offCurLoadOffSet = 0;
  455. _longCurProgress = 0;
  456. return true;
  457. }
  458. /*!
  459. * This task loads the entire file into memory
  460. */
  461. bool Utf8Buffer::mLoadFileTask( void ) {
  462. OffSet offset = 0;
  463. assert( _fileHandle != 0 );
  464. // Attempt to load 1 page at the current offset
  465. if( ( offset = mLoadPage( _offCurLoadOffSet ) ) == -1 ) return false;
  466. _offCurLoadOffSet = offset;
  467. // Is the file loaded completely?
  468. if( _offBufferSize == _fileHandle->mGetFileSize() ) {
  469. // Finalize the load
  470. _fileHandle->mFinalizeLoad();
  471. // Report the entire file loaded into memory
  472. _boolEntireFileLoaded = true;
  473. // Clear our task
  474. mSetTaskStatus();
  475. _currentTask = 0;
  476. _longCurProgress = 100L;
  477. return true;
  478. }
  479. // Record the current progress
  480. _longCurProgress = long( _offBufferSize / float( _fileHandle->mGetFileSize() / 100 ) );
  481. return true;
  482. }
  483. /**
  484. * Move the iterator to a specific offset in the buffer,
  485. * TODO: Improve preformance for large buffers
  486. */
  487. bool Utf8BufferIterator::mSetOffSet( OffSet offset ) {
  488. // Sanity Check, Are we asking for an offset bigger than the buffer?
  489. if( offset > _buf->mGetBufferSize() ) {
  490. mSetError("Buffer Error: Requested OffSet in buffer out of bounds" );
  491. return false;
  492. }
  493. // Get an iterator to the begining of the page containers
  494. Utf8Page::Iterator itPage = _buf->_pageContainer.mBegin();
  495. // Searching thru the buffer
  496. while( itPage != _buf->_pageContainer.mEnd() ) {
  497. // If the current page contains our offset, Stop searching
  498. if( ( itPage->mGetOffSet() + itPage->mGetPageSize() ) > offset ) break;
  499. // Move to the next page
  500. itPage++;
  501. }
  502. // This should never happen... right?
  503. assert( itPage != _buf->_pageContainer.mEnd() );
  504. // Update the page iterator, block iterator and pos
  505. _itPage = itPage;
  506. _itBlock = itPage->mBegin();
  507. _intPos = 0;
  508. // Figure out how far into the page our offset is
  509. offset -= itPage->mGetOffSet();
  510. // Set our offset to the begining of the page, mNext() will update it
  511. // once it finds the correct block
  512. _offCurrent = itPage->mGetOffSet();
  513. // Move to the appropriate offset
  514. return mNext( offset );
  515. }
  516. /**
  517. * Move the iterator back intCount number of characters
  518. */
  519. bool Utf8BufferIterator::mPrev( int intCount ) {
  520. // Remeber the requested positions
  521. int intRequested = intCount;
  522. // If we are asking to move to the previous block
  523. while( intCount > _intPos ) {
  524. // if this is the first block in the page
  525. if( _itBlock == _itPage->mBegin() ) {
  526. // if this is the first page in the buffer
  527. if( _itPage == _buf->_pageContainer.mBegin() ) {
  528. // If this move puts us before the begining of the buffer
  529. if( intCount > _intPos ) {
  530. // ignore the move command and set an error
  531. mSetError("Buffer Error: Requested OffSet in buffer out of bounds");
  532. return false;
  533. }
  534. // Update our location in the block to the begining of the block
  535. _intPos = 0;
  536. return true;
  537. }
  538. // Move back 1 page
  539. _itPage--;
  540. // Set the block to the end of the page
  541. _itBlock = _itPage->mEnd();
  542. }
  543. // Move back 1 block
  544. _itBlock--;
  545. // Subtract the number of positions to move back
  546. intCount -= _intPos;
  547. // Set the new position to the end of the new block
  548. _intPos = _itBlock->mGetSize();
  549. }
  550. // Update our position in the block
  551. _intPos -= intCount;
  552. // Update our offset
  553. _offCurrent -= intRequested;
  554. return true;
  555. }
  556. /**
  557. * Move the iterator over intCount number of blocks
  558. */
  559. bool Utf8BufferIterator::mNextBlock( int intCount ) {
  560. return false;
  561. }
  562. /**
  563. * Move the iterator over intCount number of characters
  564. */
  565. bool Utf8BufferIterator::mNext( int intCount ) {
  566. assert( intCount > 0 );
  567. // Remeber the requested positions
  568. int intRequested = intCount;
  569. // Figure out how many positions we have left till the end of the block
  570. int intPosLeft = _itBlock->mGetSize() - _intPos;
  571. // If we are asking to move past the current block
  572. while( intCount > intPosLeft ) {
  573. // if this is the last block in the page
  574. if( _itBlock == (--( _itPage->mEnd() ) ) ) {
  575. // if this is the last page in the buffer
  576. if( _itPage == (--( _buf->_pageContainer.mEnd() ) ) ) {
  577. // If this move puts us at the end of the buffer
  578. if( ( _intPos + intCount ) == _itBlock->mGetBlockData().size() ) {
  579. // Update our location in the block to the end of the block
  580. _intPos = _itBlock->mGetBlockData().size();
  581. return true;
  582. }
  583. // Else, ignore the move command and throw an error
  584. mSetError("Buffer Error: Requested OffSet in buffer out of bounds");
  585. return false;
  586. }
  587. // Move to the next page
  588. _itPage++;
  589. // Set the block to the begining block
  590. _itBlock = _itPage->mBegin();
  591. } else {
  592. // Move to the next block
  593. _itBlock++;
  594. }
  595. // Subtract the number of positions to move forward
  596. intCount -= intPosLeft;
  597. // Set the new position to the end of the new block
  598. intPosLeft = _itBlock->mGetSize();
  599. }
  600. // Update our position in the block
  601. _intPos += intCount;
  602. // Update our offset
  603. _offCurrent += intRequested;
  604. return true;
  605. }
  606. /**
  607. * Return the charater the iterator points to in utf8 encoding
  608. */
  609. char Utf8BufferIterator::mGetUtf8Char( void ) {
  610. // the block should never never point to the end
  611. assert( _itBlock != _itPage->mEnd() );
  612. // The iterator should never point to a position
  613. // greater than the size of the current block
  614. assert( _intPos <= _itBlock->mGetSize() );
  615. // If the block is empty
  616. if( _itBlock->mIsEmpty() ) {
  617. return 0;
  618. }
  619. // If the pos is at the end of the buffer return 0
  620. if( _intPos == _itBlock->mGetSize() ) {
  621. return 0;
  622. }
  623. // Return the char at the position requested
  624. return _itBlock->mGetBlockData().at(_intPos);
  625. }
  626. /**
  627. * Convienince function to get a string in the buffer
  628. */
  629. const char* Utf8BufferIterator::mGetUtf8String( int intLen, bool boolReverse ) {
  630. // the block should never never point to the end
  631. assert( _itBlock != _itPage->mEnd() );
  632. if( boolReverse ) {
  633. assert( ( _intPos - intLen ) > 0 );
  634. // Get a substring of the datablock and return a const char* to it
  635. return (_strTemp = _itBlock->mGetBlockData().substr(_intPos-intLen, intLen)).c_str();
  636. }
  637. assert( _intPos <= _itBlock->mGetSize() );
  638. // Get a substring of the datablock and return a const char* to it
  639. return (_strTemp = _itBlock->mGetBlockData().substr(_intPos, intLen)).c_str();
  640. }
  641. /**
  642. * Constructor
  643. */
  644. Utf8BufferIterator::Utf8BufferIterator( const Utf8BufferIterator* it ) {
  645. _itPage = it->_itPage;
  646. _itBlock = it->_itBlock;
  647. _offset = it->_offset;
  648. _intPos = it->_intPos;
  649. _buf = it->_buf;
  650. }
  651. /**
  652. * Create a new copy of the Utf8BufferItertor from the an existing instance
  653. */
  654. boost::shared_ptr<BufferIterator> Utf8BufferIterator::copy( ) const {
  655. boost::shared_ptr<Utf8BufferIterator> ptr( new Utf8BufferIterator( this ) );
  656. return ptr;
  657. }
  658. /**
  659. * Returns 1 if the Iterators are equal
  660. */
  661. int Utf8BufferIterator::mEqual( boost::shared_ptr<BufferIterator> sharedLeft, boost::shared_ptr<BufferIterator> sharedRight ){
  662. // Grab the pointers
  663. Utf8BufferIterator* itLeft = boost::polymorphic_downcast<Utf8BufferIterator*>( sharedLeft.get() );
  664. Utf8BufferIterator* itRight = boost::polymorphic_downcast<Utf8BufferIterator*>( sharedRight.get() );
  665. // If the both point to the same address they are equal!
  666. if( itLeft == itRight ) return 1;
  667. // Are these iterators pointing to the same page, block, pos?
  668. if( ( itLeft->_itPage == itRight->_itPage ) and
  669. ( itLeft->_itBlock == itRight->_itBlock ) and
  670. ( itLeft->_intPos == itRight->_intPos ) ) return 1;
  671. return 0;
  672. }
  673. /**
  674. * Insert a char* array into the buffer at the BufferIterator position
  675. */
  676. BufferIterator Utf8Buffer::mInsert( BufferIterator& itBuffer, const char* cstrBuffer, int intBufSize, Attributes &attr ) {
  677. // Ask Buffer Iterator for a pointer to our implementation specific iterator
  678. Utf8BufferIterator* it = itBuffer.mGetPtrAs<Utf8BufferIterator*>();
  679. // Get the block the iterator points to
  680. Utf8Page::Iterator itPage = it->mGetPage();
  681. // Do the attributes of this insert match the block the iterator points to ?
  682. //if( itBlock->mGetAttributes() == attr ) { TODO
  683. // Then insert a new block into the page
  684. // Assign the attributes to the new block
  685. // Spliting the current block if nessary
  686. //}
  687. // Insert the data into the page at this block
  688. itPage->mInsert( it->mGetBlock() , it->mGetPos(), cstrBuffer, intBufSize );
  689. // Get the target page size
  690. OffSet intTargetPageSize = itPage->mGetTargetPageSize();
  691. // Will this insert mean we will need to split the page ?
  692. // ( We Split the page if the page size is twice that of the target page size )
  693. // If we inserted more than 1 page of data, keep spliting
  694. while( itPage->mGetPageSize() >= ( intTargetPageSize * 2 ) ) {
  695. // Split the page, and return an iterator to the new page
  696. itPage = _pageContainer.mSplitPage( it, itPage );
  697. }
  698. // Now that we inserted new data the offsets for the pages need to be adjusted
  699. _pageContainer.mUpdateOffSets( itPage );
  700. // Create a new iterator
  701. Utf8BufferIterator* itNew = new Utf8BufferIterator( it );
  702. // Update the position in the new iterator
  703. itNew->mSetPos( it->mGetPos() + intBufSize );
  704. // Update the size of the buffer
  705. _offBufferSize += intBufSize;
  706. // Notify the buffer was modified
  707. _boolModified = true;
  708. return BufferIterator( itNew );
  709. }
  710. /**
  711. * Delete a range of characters in the buffer starting at the BufferIterator position
  712. * Return true if the delete was successful
  713. */
  714. bool Utf8Buffer::mDelete( BufferIterator& itBuffer, const OffSet offLen ) {
  715. // Create a copy of the iterator
  716. BufferIterator itEnd( itBuffer );
  717. // Advance the buffer by offLen
  718. itEnd.mNext( offLen );
  719. return mDelete(itBuffer, itEnd);
  720. }
  721. /**
  722. * Delete a range of characters in the buffer starting at the BufferIterator position
  723. * and ending at the second BufferIterator, Return true if the delete was successful
  724. * TODO: Think about reverse iterators, maybe 1 method for reverse and 1 for normal
  725. */
  726. bool Utf8Buffer::mDelete( BufferIterator& itBuffer, BufferIterator& itBufferEnd ) {
  727. return false;
  728. // Make a copy of the starting location, incase something goes wrong
  729. // we don't want to in-validate the users iterators
  730. BufferIterator itBufferStart ( itBuffer );
  731. // Ask Buffer Iterators for a pointer to our implementation specific iterators
  732. Utf8BufferIterator* itUtf8Buffer = itBuffer.mGetPtrAs<Utf8BufferIterator*>();
  733. Utf8BufferIterator* itStart = itBufferStart.mGetPtrAs<Utf8BufferIterator*>();
  734. Utf8BufferIterator* itEnd = itBufferEnd.mGetPtrAs<Utf8BufferIterator*>();
  735. // Ensure these iterators belong to us
  736. assert( itEnd->_buf == this );
  737. assert( itStart->_buf == this );
  738. // Delete blocks and pages until Start and
  739. // End iterators point to the same block
  740. while( itStart->mGetBlock() != itEnd->mGetBlock() ) {
  741. // If the iterator doesn't point to the
  742. // begining of the block
  743. if( itStart->mGetPos() != 0 ) {
  744. // Truncate the block starting at intPos
  745. // and return a copy of the bytes that were truncated
  746. Utf8Block deletedChars = itStart->mGetBlock()->mTruncate( itStart->mGetPos() );
  747. // TODO: Append the deleted characters to the change set
  748. // Set the pos to the begining of the new block
  749. itStart->mSetPos( 0 );
  750. // Move the iterator to the next block
  751. itStart->mNextBlock();
  752. continue;
  753. }
  754. // if the iterator now points to a different page
  755. if( itStart->mGetPage() != itUtf8Buffer->mGetPage() ) {
  756. // And the ending iterator doesn't point to this page
  757. if( itStart->mGetPage() != itEnd->mGetPage() ) {
  758. // TODO: Add the page to the change set
  759. // Delete the entire page
  760. continue;
  761. }
  762. }
  763. // TODO: Append the blocks to the changeset
  764. // Delete the block, Updating the block iterator with the next block
  765. itStart->mSetBlock( itStart->mGetPage()->mDeleteBlock( itStart->mGetBlock() ) );
  766. // If we deleted the last block on the page
  767. if( itStart->mGetBlock() == itStart->mGetPage()->mEnd() ) {
  768. // If the page is empty
  769. if( itStart->mGetPage()->mGetPageSize() == 0 ) {
  770. // Remove the page from the page container
  771. itStart->_buf->_pageContainer.mDeletePage( itStart->mGetPage() );
  772. }
  773. // TODO: If we want to merge small pages into other pages
  774. // we should do it here
  775. }
  776. }
  777. // If the iterator doesn't point to the begining of the block
  778. // Delete characters from this block
  779. return true;
  780. }
  781. /*!
  782. * Append a block to the page and return an iterator to the block
  783. */
  784. Utf8Block::Iterator Utf8Page::mAppendBlock( const Utf8Block &block ) {
  785. // Add the block to our page
  786. _blockContainer.push_back( block );
  787. // Record Incr the Cur size of our page
  788. _offPageSize += block.mGetSize();
  789. // Return an iterator to the last element
  790. return --(_blockContainer.end());
  791. }
  792. /*!
  793. * Remove a block from the page
  794. */
  795. Utf8Block::Iterator Utf8Page::mDeleteBlock( const Utf8Block::Iterator& itBlock ) {
  796. // Shrink the size of the page by the block size removed
  797. _offPageSize -= itBlock->mGetSize();
  798. // Remove this block from the container
  799. return _blockContainer.erase( itBlock );
  800. }
  801. /**
  802. * Insert Data into the page
  803. */
  804. void Utf8Page::mInsert( const Utf8Block::Iterator& it, int intPos, const char* cstrBuffer, int intLen ) {
  805. // Insert the data into the buffer
  806. it->mInsert(intPos, cstrBuffer, intLen);
  807. // Update the page size
  808. _offPageSize += intLen;
  809. }
  810. /*!
  811. * Return true if the page size is greater or equal to the max page size
  812. */
  813. bool Utf8Page::mIsFull( void ) const {
  814. if (_offPageSize >= _offTargetPageSize ) { return true; }
  815. return false;
  816. }
  817. /*!
  818. * Return false if adding offBytes to the page will put it over the TargetPageSize
  819. * Return true otherwise
  820. */
  821. bool Utf8Page::mCanAcceptBytes( OffSet offBytes ) const {
  822. // If the num of bytes will put us over the max page size
  823. if( ( offBytes + _offPageSize) > _offTargetPageSize ) {
  824. return false;
  825. }
  826. return true;
  827. }
  828. /*!
  829. * Assign the char* of data to the internal structure of the block
  830. */
  831. void Utf8Block::mSetBlockData( const char* cstrData, OffSet offLen ) {
  832. // Assign the new data
  833. _strBlockData.assign( cstrData, offLen );
  834. // Update the size
  835. _sizeBlockSize = offLen;
  836. }
  837. /*!
  838. * Assign the char* of data to the internal structure of the block
  839. */
  840. void Utf8Block::mSetBlockData( const std::string& string ) {
  841. // Assign the new data
  842. _strBlockData.assign( string );
  843. // Update the size
  844. _sizeBlockSize = string.size();
  845. }
  846. /**
  847. * TODO: Consider using std::string::iterators instead of intPos
  848. */
  849. void Utf8Block::mInsert( int intPos, const char* cstrData, int intSize ) {
  850. // If the intPos requested is larger than the
  851. // size of the string append the data
  852. if( _strBlockData.size() < intPos ) {
  853. // Append the data
  854. _strBlockData.append(cstrData, intSize);
  855. }else {
  856. // Insert more data
  857. _strBlockData.insert(intPos, cstrData, intSize);
  858. }
  859. // Update the size
  860. _sizeBlockSize += intSize;
  861. }
  862. /**
  863. * Split this block starting at intPos, The characters after intPos
  864. * will remain in the block, the characters before intPos will make
  865. * up the new block
  866. */
  867. Utf8Block Utf8Block::mSplit( int intPos ) {
  868. Utf8Block newBlock;
  869. // Set the new block data
  870. newBlock.mSetBlockData( _strBlockData.substr( 0, intPos ) );
  871. // Erase the copied block data
  872. _strBlockData.erase( 0, intPos );
  873. // Update the block size
  874. _sizeBlockSize = _strBlockData.size();
  875. // Copy the attributes from this block into the new block
  876. newBlock.mSetAttributes( mGetAttributes() );
  877. return newBlock;
  878. }
  879. /**
  880. * Truncates the characters starting at intPos till the end of the block
  881. * This method is just like mSplit() except it returns the truncated data
  882. */
  883. Utf8Block Utf8Block::mTruncate( int intPos ) {
  884. Utf8Block newBlock;
  885. // Set the new block data
  886. newBlock.mSetBlockData( _strBlockData.substr( intPos, std::string::npos ) );
  887. // Erase the copied block data
  888. _strBlockData.erase( intPos, std::string::npos );
  889. // Update the block size
  890. _sizeBlockSize = _strBlockData.size();
  891. // Copy the attributes from this block into the new block
  892. newBlock.mSetAttributes( mGetAttributes() );
  893. return newBlock;
  894. }
  895. Utf8Block::Utf8Block( char* cstrData, OffSet offLen ) {
  896. _offOffSet = 0;
  897. _sizeBlockSize = 0;
  898. mSetBlockData( cstrData, offLen );
  899. }
  900. /*!
  901. * Add a page to the container
  902. */
  903. Utf8Page::Iterator Utf8PageContainer::mAppendPage( Utf8Page *page ) {
  904. // Add the new page to the list
  905. _listContainer.push_back( page );
  906. // Incr the size of the page container
  907. _longSize++;
  908. // Return an iterator to the last element
  909. return --(_listContainer.end());
  910. }
  911. /**
  912. * Split the page the iterator points to and update the iterator before returning
  913. */
  914. Utf8Page::Iterator Utf8PageContainer::mSplitPage( Utf8BufferIterator *itBuffer, Utf8Page::Iterator &itOldPage ) {
  915. // Get the target page size
  916. OffSet intTargetSize = itOldPage->mGetTargetPageSize();
  917. OffSet intCurSize = 0;
  918. // Can not split unless the page is over the target page size
  919. assert( itOldPage->mGetPageSize() > itOldPage->mGetTargetPageSize() );
  920. // Insert a new page right before the old page
  921. Utf8Page::Iterator itNewPage = mInsertPage( itOldPage, new Utf8Page() );
  922. // Move blocks into the new page until we hit our target size
  923. Utf8Block::Iterator itBlock;
  924. for( itBlock = itOldPage->mBegin() ; itBlock != itOldPage->mEnd() ; ) {
  925. int intSplitPos = 0;
  926. Utf8Block::Iterator itNewBlock;
  927. // If we have hit our target size, break
  928. if( intCurSize >= intTargetSize ) break;
  929. // If this block will put us over the target block size
  930. if( ( intCurSize + itBlock->mGetSize() ) > intTargetSize ) {
  931. // Figure out where to split the block
  932. intSplitPos = intTargetSize - intCurSize;
  933. // Split the block, appending the new block to the page
  934. itNewBlock = itNewPage->mAppendBlock( itBlock->mSplit( intSplitPos ) );
  935. } else {
  936. // Append the block to the new page
  937. itNewBlock = itNewPage->mAppendBlock( *itBlock );
  938. }
  939. // If the buffer iterator points to the block just copied
  940. if( itBlock == itBuffer->mGetBlock() ) {
  941. // And we split this block
  942. if( intSplitPos ) {
  943. // If the pos points to the new block
  944. if( itBuffer->mGetPos() < intSplitPos ) {
  945. // Update the iterator to point to the new block
  946. itBuffer->mSetBlock( itNewBlock );
  947. } else {
  948. // Adjust the pos for the split block
  949. itBuffer->mSetPos( intSplitPos - itBuffer->mGetPos() );
  950. }
  951. }
  952. }
  953. // If we split the block, don't delete it
  954. if( ! intSplitPos ) {
  955. // Remove the old block
  956. itBlock = itOldPage->mDeleteBlock( itBlock );
  957. }
  958. // Update our current size
  959. intCurSize += itNewBlock->mGetSize();
  960. }
  961. // Update the offsets of the pages
  962. itNewPage->mSetOffSet( itOldPage->mGetOffSet() );
  963. itNewPage->mSetFileOffSet( itOldPage->mGetFileOffSet() );
  964. itOldPage->mSetOffSet( intCurSize + itNewPage->mGetOffSet() );
  965. itOldPage->mSetFileOffSet( -1 );
  966. return itNewPage;
  967. }
  968. Utf8Page::Iterator Utf8PageContainer::mDeletePage( Utf8Page::Iterator const &it ) {
  969. assert( it != _listContainer.end() );
  970. // Update the length of the container
  971. --_longSize;
  972. return _listContainer.erase( it );
  973. }
  974. /*!
  975. * Insert a page to the container
  976. */
  977. Utf8Page::Iterator Utf8PageContainer::mInsertPage( Utf8Page::Iterator const &it, Utf8Page *page) {
  978. // Did we mean append?
  979. if( ! _longSize ) {
  980. return mAppendPage( page );
  981. }
  982. // Update the length of the container
  983. ++_longSize;
  984. return _listContainer.insert( it , page );
  985. }
  986. /**
  987. * Update all the offsets for the pages that follow
  988. * one the iterator points to
  989. */
  990. void Utf8PageContainer::mUpdateOffSets( Utf8Page::Iterator const &it ) {
  991. assert( it != mEnd() );
  992. // Get the offset of the first page ( Should be correct )
  993. OffSet offset = it->mGetOffSet() + it->mGetPageSize();
  994. Utf8Page::Iterator i = it;
  995. // Update the pages until we hit the last page
  996. while( ++i != mEnd() ) {
  997. // Set the offset from the previous page
  998. i->mSetOffSet( offset );
  999. // Calculate the offset of the next page from the size of the previous page
  1000. offset += i->mGetPageSize();
  1001. }
  1002. }