table.h | searchcode

/trunk/src/kangmodb/table.h

# · C Header · 249 lines · 113 code · 21 blank · 115 comment · 4 complexity · 8750903ef59496bb4e775f2e0478f99a MD5 · raw file

/**
 *  table.h
 *  kangmodb
 *
 *  Created by 강모 김 on 11. 5. 1..
 *  Copyright 2011 강모소프트. All rights reserved.
 *
 * Design of "Restart Recovery":
 * (1) Restart Recovery Speed Optimization
 *     - To boost the restart recovery process, stgTable keeps (key, data) pairs in shared memory chunks allocated from stgSharedRegion.
 *     - Log records of active transactions are also kept in stgTransLogBuffer which allocates shared memory chunks from stgSharedRegion.
 *       c.f.> stgTableMgr manages the strSharedRegion object for all tables.
 *
 * (2) Power Failure or OS crash - We don't have shared memory regions.
 *     - Redo all log records in the log stream where only committed transactions send its own log buffer.
 *     - No need to undo any log records, because all transaction logs in the log stream are from committed transactions only.
 *
 * (3) Process Failure - We have shared memory regions.
 *     (2.1) Phase I - Reconstruct stgTableMgr and stgTable objects from the shared memory region.
 *     - Iterate shared memory chunks for stgTable objects to find out root chunk of each table.
 *     - Root chunk is one of shared memory chunks allocated by stgSharedRegion, and becomes the start point for searching (key,data) pair.
 *     - At this point, we can open a cursor for each table, but some tables may have uncommitted modifications by active transactions.
 *     (2.2) Phase II - Reconstruct stgTransLogBuffer objects of active transactions from the shared memory region, rollback all logs.
 *     - Iterate shared memory chunks for stgTransLogBuffer to find out root chunk of each transaction log buffer.
 *     - Root chunk is one of shared memory chunks allocated by stgSharedRegion, and becomes the start point for traversing log records.
 *     - Revert all new versions of (key, data) pairs made by these active transactions by traversing keys stored in log records.      
 *     (2.3) Phase III - Get the maxCommitVersion from the shared memory region, set it to maxCommitVersion_ member in stgTransMgr.
 *     - stgTransMgr stores maxCommitVersion in the shared memory region.
 *
 * Design of "DATA Versioning on a KEY" :
 *     - stgTable stores (KEY, DATA) pair. Duplicate key value is not allowed, so a KEY value is unique in a table.
 *     - Each DATA has following flags
 *       - (01bit) deleted 
 *         - Indicates the (KEY, DATA) pair is deleted
 *       - (32bit) savepointID
 *         - The savepoint ID. A new version of DATA is created only when the savepoint ID changes.
 *         - Update/Delete with the same savepoint ID does not create a new version, but it is in-place updated.
 *       - (64bit) commitVersion
 *         - The commit version number. 0 indicates it is not committed yet. A number greater than 0 indicates it is committed.
 *         - The version number increases monotonously. Transactions allocate the commit version number from stgTransMgr. 
 *         - savepointID can share data storage with commitVersion to optimize memory space.
 *
 *     - Insertion : (KEY, DATA).
 *       - stgTable inserts the KEY into access methods such as skip lists, and then dangles the DATA onto the key with the default savepoint 0(s0), 
 *         but the commit version is set to 0(c0) indicating it is not committed yet.
 *         - (KEY, DATA-s0-c0)
 *       - Upon tranasction commit, the transaction allocates a commit version(say 1234) from stgTransMgr, sets it to DATA indicating it is committed.
 *       - After setting commit version to all (KEY,DATA) pairs that the transaction modified, call stgTransMgr::commitVersion to notify the transaction completed setting the commit version to all modified (KEY,VALUE) pair.
 *       - This allows other transactions to read the committed (KEY,DATA) pairs. 
 *         - (KEY, DATA-c1234)
 *     - When the key is updated, a new version of data, data_s0 is added. (key, data-s0-c0, data )
 *       - When the transaction commits, it sets the commit version to the new version (key, data-c1234, data )
 *     - When the key is deleted, a new version of data, data_s0 is added with its delete bit set. (key, data-deleted-s0-c0, data)
 *       - When the transaction commits, it sets the commit version to the new version (key, data-deleted-c1234, data )
 *
 * Design of "Rollback to Savepoint" :
 *     - Only one transaction can create a new version of DATA on a KEY. (Say, modifying transaction)
 *     - In case the modifying transaction updates, deletes, or inserts DATA on a KEY multiple times, 
 *       no new version is created, but in-place update is done on the first new version of the data.
 *     - However, when a savepoint number of a transaction increases because the transaction allocated a new savepoint, 
 *       a new version of data is is created for the savepoint when the transaction changes DATA on the KEY.
 *       - (KEY, DATA-s1-c0, DATA-s0-c0 ) ; Savepoint number increase from s0 to s1. A dedicated new version for s1, DATA-s1-c0 is created.
 *     - This is to help "Rollback to Savepoint". Rolling back to a savepoint simply removes all new versions created after the savepoint.
 *       - (KEY, DATA-s0-c0) ; Rollback to Savepoint s0 removed the version DATA-s1-c0.
 *     - How to iterate all KEYs that the transaction touched? 
 *       - We can interate log records in stgTransLogBuffer in reverse order until we meet the log for Savepoint s1.
 *       - Each INSERT, UPDATE, DELETE log has the KEY value, so we can search the access method with the KEY.
 *     - After removing all versions created since Savepoint s1, stgTransLogBuf is truncated at the position that Savepoint s1 log exists.
 *
 * Design of "Concurrency control for updating transactions on the same KEY " :
 *     - Other transactions that want to access the same KEY whose DATA is modified by another transaction need to rollback.
 *       - After the rollback, they need to wait until the modifying transaction does commit or rollback. 
 *       - And then, they get the new viewVersion, start accessing the (key, data) pair again.
 *       - If no transaction created a new version of the pair yet, a transaction can create one for it. 
 *       - CAS(Compare and Swap) operation is used for implementing the concurrency control 
 *         to check whether another transaction has created a new version. 
 *       - Applications need to be aware of this process. 
 *         - Application programmers need to write the code to begin the transaction again, modify the same set of tables and (key, value) pairs again.
 */

#ifndef _KD_TABLE_H_
#define _KD_TABLE_H_ (1)

#include "kdInfra.h"
#include "types.h"

#include "transMgr.h"
#include "transaction.h"
#include "set.h"
#include "data.h"
#include "chunkList.h"

/** @brief The table of DATA versions on each KEY pairs. This is the interface called by both normal processing and restart recovery.
 */
class stgTable
{
private :
	set_t * set_;
	stgChunkList chunks_;
public :
	stgTable()
	{
	}
	~stgTable()
	{
	}
	
	/** @brief Initialize the table object with the given tableId.
	 */
	KD_VOID initialize(int tableId)
	{
		KD_TRY
		{
			set_ = set_alloc();
			// TODO : Check if set allocation failed.
		}
		KD_CATCH
		KD_FINALLY
		KD_END
	}

	/** @brief Destroy the table object.
	 */
	KD_VOID destroy()
	{
		KD_TRY
		{
			// TODO : Destroy set_ object.
		}
		KD_CATCH
		KD_FINALLY
		KD_END
	}

	/** @brief Find the data descriptor associated with the given key.
	 * @param dataDesc *dataDesc is set to the found data descriptor. Set *dataDesc to NULL if the key is not found.
	 */
	KD_VOID getDataDesc(const stgKey & key, stgDataDesc ** dataDesc ) const
	{
		KD_TRY
		{
			stgDataDesc * foundData;

			foundData = (stgDataDesc*) set_lookup(set_, key);
			
			*dataDesc = foundData;
		}
		KD_CATCH
		KD_FINALLY
		KD_END
	}

	/** @brief If the key exists, simply return the associated data descritor. Otherwise allocate a new data descriptor, associate it with the given key, put it into an access method. 
	 * @param dataDesc The data descriptor associated with the given key.
	 */ 
	KD_VOID putDataDesc(const stgKey & key, stgDataDesc ** dataDesc )
	{
		KD_TRY
		{
			stgDataDesc * newDataDesc ;
			stgDataDesc * existingDataDesc ;

			// TODO : Allocated newDataDesc from the shared memory chunk.
			KD_ASSERT(0);

			// TODO : Need allocate memory for stgKey::key_, copy the key content. Need to think about where we can free the memory.
			KD_ASSERT(0);
			
			// TODO : Think about what to do when two transactions concurrently try to call this function.
			existingDataDesc = (stgDataDesc*) set_update( set_, key, newDataDesc, 0 /* overwrite */ );

			// If the key already exists, throw KD_EXCP_KEY_EXISTS.
			if ( existingDataDesc )
			{
				// TODO : Free newDataDesc from the shared memory chunk.
				KD_ASSERT(0);
				*dataDesc = existingDataDesc;
			}
			else
			{
				*dataDesc = newDataDesc;
			}
		}
		KD_CATCH
		KD_FINALLY
		KD_END
	}
	
	/** @brief Find the latest data version whose version is less than or equal to the given viewVersion.
	 * @param viewVersion : The maximum version that the transaction can view. Set it to MAX_DATA_VERSION to see uncomitted changes.
	 */
	KD_VOID seekData(const stgVersion viewVersion, stgDataDesc * dataDesc, stgData * data )
	{
		KD_TRY
		{
			stgDataVersion * dataVersion;
			
			KD_CALL( dataDesc->findLatestVersion( viewVersion, & dataVersion) );
			if ( dataVersion == NULL ) 
				// The key is found, but this transaction can't see it or it is marked as deleted. 
				KD_THROW( KD_EXCP_KEY_NOT_FOUND );
			if ( dataVersion->deleted )
				// The latest data version is marked as deleted.
				KD_THROW( KD_EXCP_KEY_NOT_FOUND );
			
			// No data copy happens. Simply copy the data pointer and set the length.
			*data = stgData( dataVersion->data, dataVersion->dataLength );
		}
		KD_CATCH
		KD_FINALLY
		KD_END
	}

	/** @brief Update the data latest data version within the given data descriptor.
	 * If the latest uncomitted data version matches the current savepoint ID in tx, in-place update the data.
	 * Otherwise create a new data version, set savepoint ID of the new version to the current one of tx, copy the data into the new version. 
	 */
	KD_VOID updateData(const stgVersion viewVersion, const stgSavepointId spID, stgDataDesc * dataDesc, const stgData & data )
	{
		KD_TRY
		{
			// TODO : Continue to implement.
			KD_ASSERT(0);
		}
		KD_CATCH
		KD_FINALLY
		KD_END
	}

	/** @brief Set the deleted bit of the latest data version within the given data descriptor.
	 * If the latest uncomitted data version matches the current savepoint ID in tx, set the deleted bit.
	 * Otherwise create a new data version, set savepoint ID of the new version to the current one of tx, set the deleted bit. 
	 * @deletedBit : true if deleted, false otherwise.
	 */
	KD_VOID setDeletedBit(const stgVersion viewVersion, const stgSavepointId spID, stgDataDesc * dataDesc, bool deletedBit )
	{
		KD_TRY
		{
			// TODO : Implement
			KD_ASSERT(0);
		}
		KD_CATCH
		KD_FINALLY
		KD_END
	}

};

#endif /* _KD_TABLE_MGR_H_ */