svm_struct_cpp.cpp | searchcode

/SVM_Multiclass/svm_struct_cpp.cpp

https://github.com/wxiang7/iPM3F
C++ | 1070 lines | 889 code | 108 blank | 73 comment | 181 complexity | 3f36364cebef82b298fccae14ab1ac01 MD5 | raw file

// SVMStruct_CPlus.cpp : Defines the entry point for the console application.
//

#include "stdafx.h"

/***********************************************************************/
/*                                                                     */
/*   svm_struct_main.c                                                 */
/*                                                                     */
/*   Command line interface to the alignment learning module of the    */
/*   Support Vector Machine.                                           */
/*                                                                     */
/*   Author: Thorsten Joachims                                         */
/*   Date: 03.07.04                                                    */
/*                                                                     */
/*   Copyright (c) 2004  Thorsten Joachims - All rights reserved       */
/*                                                                     */
/*   This software is available for non-commercial use only. It must   */
/*   not be modified and distributed without prior permission of the   */
/*   author. The author is not responsible for implications from the   */
/*   use of this software.                                             */
/*                                                                     */
/***********************************************************************/


/* the following enables you to use svm-learn out of C++ */
//#ifdef __cplusplus
//extern "C" {
//#endif
#include "../SVMLight/svm_common.h"
#include "../SVMLight/svm_learn.h"
//#ifdef __cplusplus
//}
//#endif
# include "svm_struct_learn.h"
# include "svm_struct_classify.h"
# include "svm_struct_common.h"
# include "svm_struct_api.h"

#include <stdio.h>
#include <string>
#include <assert.h>
#include <windows.h>
#include <fstream>
#include <queue>
using namespace std;
/* } */

char trainfile[200];           /* file with training examples */
char modelfile[200];           /* file for resulting classifier */
bool g_bClassify;
bool g_bCrossValidation;
bool g_bInnerCV;
bool g_bWarmStart;
int g_nStopStep;			  /* the number of iterations to early stop. */
string g_strTrainDataDir;
int g_nDim;
int g_nStateNum;
int g_nFoldNum;

void eval_CV(vector<wstring>&vecFileName, const int &nFoldNum, 
			 STRUCTMODEL &structmodel,
			 STRUCT_LEARN_PARM &struct_parm,
			 LEARN_PARM &learn_parm, 
			 KERNEL_PARM &kernel_parm, 
			 int alg_type, ofstream &ofs);
void eval_Ratio(vector<wstring>&vecFileName, vector<int> &vecRatio, 
				STRUCTMODEL &structmodel,
				STRUCT_LEARN_PARM &struct_parm,
				LEARN_PARM &learn_parm, 
				KERNEL_PARM &kernel_parm, 
				int alg_type, ofstream &ofs);
double inner_CV(SAMPLE *sample, STRUCTMODEL &structmodel,
			 STRUCT_LEARN_PARM &struct_parm,
			 LEARN_PARM &learn_parm, 
			 KERNEL_PARM &kernel_parm, 
			 int alg_type, ofstream &ofs);
void   read_input_parameters(int, char **, char *, char *,long *, long *,
							 STRUCT_LEARN_PARM *, LEARN_PARM *, KERNEL_PARM *, int *);
void   wait_any_key();
void   print_help();

void partitionData(SAMPLE *sample, const int &nFoldNum, vector<int> &vecSampleNum)
{
	if ( sample->examples[0].m_nFoldIx == -1 ) {
		vecSampleNum.assign( nFoldNum, 0 );
		int nUnitNum = sample->n / nFoldNum;
		for ( int i=0; i<nUnitNum*nFoldNum; i++ ) {
			sample->examples[i].m_nFoldIx = i / nUnitNum;
			vecSampleNum[sample->examples[i].m_nFoldIx] ++;
		}
		for ( int i=nUnitNum*nFoldNum; i<sample->n; i++ ) {
			sample->examples[i].m_nFoldIx = nFoldNum - 1;
			vecSampleNum[nFoldNum-1] ++;
		}
	}
}

SAMPLE* get_traindata(SAMPLE* c, const int&nfold, const int &foldix)
{
	int nunit = c->n / nfold;

	SAMPLE *subc = (SAMPLE*)malloc(sizeof(SAMPLE));
	subc->examples = 0;
	subc->n = 0;

	int nd = 0;
	for ( int i=0; i<c->n; i++ )
	{
		if ( foldix < nfold ) {
			if ( (i >= (foldix-1)*nunit) && ( i < foldix*nunit ) ) continue;
		} else {
			if ( i >= (foldix-1) * nunit ) continue;
		}

		subc->examples = (EXAMPLE*) realloc(subc->examples, sizeof(EXAMPLE)*(nd+1));
		subc->examples[nd] = c->examples[i];

		nd++;
	}
	subc->n = nd;
	return subc;
}

SAMPLE* get_testdata(SAMPLE* c, const int&nfold, const int &foldix)
{
	int nunit = c->n / nfold;

	SAMPLE *subc = (SAMPLE*)malloc(sizeof(SAMPLE));
	subc->examples = 0;
	subc->n = 0;

	int nd = 0, nw = 0;
	for ( int i=0; i<c->n; i++ )
	{
		if ( foldix < nfold ) {
			if ( i < ((foldix-1)*nunit) || i >= foldix*nunit ) continue;
		} else {
			if ( i < (foldix-1) * nunit ) continue;
		}

		subc->examples = (EXAMPLE*) realloc(subc->examples, sizeof(EXAMPLE)*(nd+1));
		subc->examples[nd] = c->examples[i];
		
		nd++;
	}
	subc->n = nd;
	return subc;
}

void reorder(SAMPLE* sample, char *filename)
{
	int num, ix=0;
	int *order = (int*)malloc(sizeof(int)*sample->n);
	FILE *fileptr = fopen(filename, "r");
	while ( (fscanf(fileptr, "%10d", &num) != EOF ) ) {
		order[ix] = num;
		ix ++;
	}
	
	EXAMPLE *docs = (EXAMPLE*)malloc(sizeof(EXAMPLE) * sample->n);
	for ( int i=0; i<sample->n; i++ )
		docs[i] = sample->examples[i];
	for ( int i=0; i<c->num_docs; i++ )
		sample->examples[i] = docs[order[i]];
	free(docs);
	free(order);
}


double evaluation(SAMPLE *testsample, STRUCTMODEL &model, ofstream &ofs, bool bWrite)
{
	double avgloss = 0;
	for( int i=0; i<testsample->n; i++) 
	{
		LABEL y = classify_struct_example(testsample->examples[i].x, &model, NULL);
		double l = loss(testsample->examples[i].y, y, NULL);

		if ( bWrite ) {
			ofs << y.classlabel << "\t" << testsample->examples[i].y.classlabel << endl;
		}

		avgloss += l;
		/*if(l == 0) correct++;
		else incorrect++;*/
		//eval_prediction(i, testsample.examples[i], y, &model, &sparm, &teststats);

		if(empty_label(testsample->examples[i].y)) 
		//{ no_accuracy=1; } /* test data is not labeled */
		if(verbosity>=2) {
			if((i+1) % 100 == 0) {
				printf("%ld..",i+1); fflush(stdout);
			}
		}
		free_label(y);
	}

	avgloss /= testsample.n;
	return avgloss;
}
void loadTrainScheme(vector<int> &trainScheme, char *pFileName)
{
	int dTrainingDataRatio;
	char buff[256];
	int index = 0;

	ifstream ifs;
	ifs.open(pFileName, ios_base::in);
	if ( !ifs.is_open() ) { exit(0);}

	while ( !ifs.fail() ) {
		ifs.getline( buff, 256 );
		string str(buff);

		dTrainingDataRatio = atoi( str.c_str() );
		if ( dTrainingDataRatio <= 0 ) continue;

		trainScheme.push_back(dTrainingDataRatio);
	}
	ifs.close();
}
void loadConfig(const char* pFileName)
{
	g_bClassify = false;
	g_bCrossValidation = false;
	g_bInnerCV = false;
	g_bWarmStart = false;
	g_nStopStep = 200;
	ifstream ifs(pFileName, ios_base::in);
	if ( !ifs.is_open() ) return;
	char buff[512];
	while ( !ifs.eof() ) {
		ifs.getline(buff, 512);
		string str(buff);
		if ( str.empty() || str.find("%%") != str.npos ) continue;

		if ( str.find("classify") != str.npos) {
			g_bClassify = true;
		} else if ( str.compare("Cross-Validation") == 0 ) {
			g_bCrossValidation = true;
		} else if ( str.compare("Inner-CV") == 0 ) {
			g_bInnerCV = true;
		} else if ( str.compare("Warm-Start") == 0 ) {
			g_bWarmStart = true;
		} else;

		size_t stPos = str.find("=");
		if ( str.find("TrainDataDir") != str.npos ) {
			g_strTrainDataDir = str.substr(stPos+2, str.size()-stPos-2);
		} else if ( str.find("Total Dimension") != str.npos ) {
			g_nDim = atoi(str.substr(stPos+2, str.size()-stPos-2).c_str());
		} else if (str.find("State Number") != str.npos ) {
			g_nStateNum = atoi(str.substr(stPos+2, str.size()-stPos-2).c_str());
		} else if ( str.find("FoldNumber") != str.npos ) {
			g_nFoldNum = atoi(str.substr(stPos+2, str.size()-stPos-2).c_str());
		} else if ( str.find("EarlyStopStep") != str.npos ) {
			g_nStopStep = atoi(str.substr(stPos+2, str.size()-stPos-2).c_str());
		} else ;
	}
	ifs.close();
}
wstring UTF82WChar(const BYTE * pszSrc, int nLen)
{
	int nSize = MultiByteToWideChar(CP_UTF8,
		0, 
		(LPCSTR)pszSrc,
		nLen,
		0,
		0);
	if (nSize <= 0) 
	{
		return L"";
	}
	WCHAR * pwszDst	= new WCHAR[nSize + 1];
	if (NULL == pwszDst) 
	{
		return L"";
	}
	MultiByteToWideChar(CP_UTF8,
		0,
		(LPCSTR) pszSrc,
		nLen,
		pwszDst,
		nSize);
	pwszDst[nSize] = L'\0';
	// skip 0xfeff
	if (pwszDst[0] == 0xFEFF)			//skip UTF8 head if needed
	{
		for (int i = 0; i < nSize; i++)
		{
			pwszDst[i] = pwszDst[i + 1];
		}
	}

	wstring wstrTemp = pwszDst;
	delete[] pwszDst;

	return wstrTemp;
}

string WChar2Ansi(LPCWSTR pwszSrc)
{
	int nLen = WideCharToMultiByte(CP_ACP, 
		0,
		pwszSrc,
		-1,
		NULL, 
		0,
		NULL,
		NULL);
	if (nLen<= 0)
	{
		return NULL;
	}
	char* pszDst = new char[nLen];
	if (NULL == pszDst)
	{
		return NULL;
	}
	WideCharToMultiByte(CP_ACP, 
		0,
		pwszSrc,
		-1,
		pszDst,
		nLen,
		NULL,
		NULL);
	pszDst[nLen -1] = 0;

	string strTemp = pszDst;
	delete [] pszDst;

	return strTemp;
}
int TraverseDirectory(IN const wstring &Dir,IN const wstring &Suffix, OUT vector<wstring> &vecFileName)
{
	if ( !vecFileName.empty() ) vecFileName.resize(0);

	WIN32_FIND_DATA FindFileData;
	HANDLE hFind;

	queue<wstring> queDir;
	queDir.push(Dir);

	int iCount = 0;
	wchar_t szExt[MAX_PATH];
	//while ( queDir.empty() != true )
	//{
		wstring strCurrDir = queDir.front();
		wprintf(L"%s\n", strCurrDir.c_str());
		wstring strFileName = strCurrDir + wstring(L"\\*.*");;
		wprintf(L"%s\n\n", strFileName.c_str());

		wcscpy(FindFileData.cFileName, strFileName.c_str());

		hFind = FindFirstFile(strFileName.c_str(), &FindFileData);
		if ( hFind != INVALID_HANDLE_VALUE )
		{
			if ( ( FindFileData.dwFileAttributes & FILE_ATTRIBUTE_DIRECTORY) != FALSE 
				&& wcscmp(FindFileData.cFileName, L".") != 0
				&& wcscmp(FindFileData.cFileName, L"..") != 0
				)
			{
				//wprintf(L"Find a Directory: %s\n", FindFileData.cFileName);
				//queDir.push(strCurrDir + wstring(L"\\") + wstring(FindFileData.cFileName));
			}
			else
			{
				_wsplitpath(FindFileData.cFileName, NULL, NULL, NULL, szExt);
				strFileName = strCurrDir + wstring(L"\\") + wstring(FindFileData.cFileName);

				if ( wcscmp(szExt+1, Suffix.c_str()) == 0 /*
					&& wcscmp(FindFileData.cFileName, L".") != 0
					&& wcscmp(FindFileData.cFileName, L"..") != 0*/
					)
				{
					iCount ++;
					wstring strFullName = strCurrDir + wstring(L"\\") + wstring(FindFileData.cFileName);
					vecFileName.push_back(strFullName);
				}
			}
			BOOL bRet = FindNextFile(hFind,&FindFileData);
			while ( bRet == TRUE ){

				if ( ( bRet == TRUE 
					&& FindFileData.dwFileAttributes & FILE_ATTRIBUTE_DIRECTORY) != FALSE 
					&& wcscmp(FindFileData.cFileName, L".") != 0
					&& wcscmp(FindFileData.cFileName, L"..") != 0
					)
				{
					//wprintf(L"Find a Directory: %s\n", FindFileData.cFileName);
					//queDir.push(strCurrDir + wstring(L"\\") + wstring(FindFileData.cFileName));
				}
				else
				{
					_wsplitpath(FindFileData.cFileName,NULL,NULL,NULL,szExt);
					strFileName = strCurrDir + wstring(L"\\") + wstring(FindFileData.cFileName);

					if ( wcscmp(szExt+1, Suffix.c_str()) == 0 /*
						&& wcscmp(FindFileData.cFileName, L".") != 0
						&& wcscmp(FindFileData.cFileName, L"..") != 0*/
						)
					{
						iCount ++;
						wstring strFullName = strCurrDir + wstring(L"\\") + wstring(FindFileData.cFileName);
						vecFileName.push_back(strFullName);
					}
				}
				bRet = FindNextFile(hFind, &FindFileData);
			}
		}
		queDir.pop();
	//}

	return ERROR_SUCCESS;
}


int main (int argc, char* argv[])
{  
	SAMPLE sample;  /* training sample */
	LEARN_PARM learn_parm;
	KERNEL_PARM kernel_parm;
	STRUCT_LEARN_PARM struct_parm;
	STRUCTMODEL structmodel;
	int alg_type;

	LABEL *pLabel = new LABEL();
	LABEL lb;
	lb.m_labels = NULL;
	EXAMPLE *pEx = new EXAMPLE();
	pEx->y.scores = NULL;


	svm_struct_learn_api_init(argc,argv);

	read_input_parameters(argc, argv, trainfile, modelfile, &verbosity,
		&struct_verbosity, &struct_parm, &learn_parm, &kernel_parm,&alg_type);

	loadConfig(learn_parm.addConfigFile);
	if ( g_bClassify ) { classify(argc, argv); return 0; }


	/* do evaluation */
	vector<wstring> vecFileName;
	TraverseDirectory(UTF82WChar((unsigned char *)g_strTrainDataDir.c_str(), g_strTrainDataDir.size()), L"data", vecFileName);

	ofstream ofs("evRes.txt", ios_base::out | ios_base::app);
	ofs.seekp(ios_base::end);
	if ( learn_parm.m_bQPSolver ) ofs << "L2-norm penalty" << endl;
	else ofs << "LP Solver for L1-norm penalty" << endl;
	ofs << "\t -- C: " << struct_parm.C;
	if ( g_bCrossValidation ) {
		ofs << "\tCV: " << g_nFoldNum << endl;
		eval_CV(vecFileName, g_nFoldNum, structmodel, struct_parm, learn_parm, kernel_parm, alg_type, ofs);
	} else {
		ofs << "\tSampling" << endl;
		vector<int> vecRatio;
		loadTrainScheme(vecRatio, "trainScheme.txt");

		eval_Ratio(vecFileName, vecRatio, structmodel, struct_parm, learn_parm, kernel_parm, alg_type, ofs);
	}

	ofs.close();

	svm_struct_learn_api_exit();

	return 0;
}

void eval_CV(vector<wstring> &vecFileName, const int &nFoldNum, 
			 STRUCTMODEL &structmodel,
			 STRUCT_LEARN_PARM &struct_parm,
			 LEARN_PARM &learn_parm, 
			 KERNEL_PARM &kernel_parm, 
			 int alg_type, ofstream &ofs)
{
	if ( vecFileName.empty() ) return;

	vector<double> totalAvg( vecFileName.size() );
	vector<double> totalVar( vecFileName.size() );
	vector<double> arryAvgLoss;
	for ( int fIx=0; fIx<vecFileName.size(); fIx++ ) 
	{
		arryAvgLoss.assign(nFoldNum, 0);
		strcpy(trainfile, WChar2Ansi(vecFileName[fIx].c_str()).c_str());
		ofs << "   ****** file: " << trainfile << " ********" << endl;
		if(struct_verbosity >=1 ) { printf("Reading training examples..."); fflush(stdout); }

		/* read the training examples */
		SAMPLE sample = read_struct_examples(trainfile, &struct_parm);
		if(struct_verbosity>=1) { printf("done\n"); fflush(stdout); }

		// 10 fold CV for the dataset
		vector<int> vecSampleNum;
		partitionData(&sample, nFoldNum, vecSampleNum);

		// for each partition of the data
		for ( int it=0; it<nFoldNum; it++ )
		{
			ofs << "\tfold: " << it;
			//char buff[512];
			sprintf(modelfile, "parm_f%d_cv%d.txt", fIx, it+1);

			// split the data into training & testing sets
			SAMPLE trSample, tsSample;
			trSample.n = vecSampleNum[it];
			trSample.examples = (EXAMPLE*)malloc( sizeof(EXAMPLE) * vecSampleNum[it] );
			tsSample.n = sample.n - vecSampleNum[it];
			tsSample.examples = (EXAMPLE*)malloc( sizeof(EXAMPLE) * tsSample.n );
			int trIx=0, tsIx=0;
			for ( int k=0; k<sample.n; k++ ) {
				if ( it == sample.examples[k].m_nFoldIx ) {
					trSample.examples[trIx].x.SetHead( sample.examples[k].x.m_pHeadNode );
					trSample.examples[trIx].x.m_pEndNode = sample.examples[k].x.m_pEndNode;
					trSample.examples[trIx].y.m_nSize = sample.examples[k].y.m_nSize;
					trSample.examples[trIx ++].y.m_labels = sample.examples[k].y.m_labels;
				} else {
					tsSample.examples[tsIx].x.SetHead( sample.examples[k].x.m_pHeadNode );
					tsSample.examples[tsIx].x.m_pEndNode = sample.examples[k].x.m_pEndNode;
					tsSample.examples[tsIx].y.m_nSize = sample.examples[k].y.m_nSize;
					tsSample.examples[tsIx ++].y.m_labels = sample.examples[k].y.m_labels;
				}
			}

			if ( learn_parm.m_bInnerCV ) {
				struct_parm.C = inner_CV( &trSample, structmodel, 
					struct_parm, learn_parm, kernel_parm, alg_type, ofs);
			}

			/* Do the learning and return structmodel. */
			long runtime_start, runtime_end;
			runtime_start = get_runtime();

			bool bSucc = true;
			if(alg_type == 1) {
				if ( learn_parm.m_bQPSolver )
					svm_learn_struct(trSample, &struct_parm, &learn_parm, &kernel_parm, &structmodel);
				else 
					bSucc = svm_learn_struct_lp(trSample, &struct_parm, &learn_parm, &kernel_parm, &structmodel, g_nStopStep, true);
			} else exit(1);
			runtime_end = get_runtime();

			/* Warning: The model contains references to the original data 'docs'.
			If you want to free the original data, and only keep the model, you 
			have to make a deep copy of 'model'. */
			if(struct_verbosity>=1) { printf("Writing learned model...");fflush(stdout);}
			write_struct_model(modelfile, &structmodel, &struct_parm);
			if(struct_verbosity>=1) {printf("done\n");fflush(stdout); }

			// do classification on the testing data
			arryAvgLoss[it] = evaluation(&tsSample, structmodel, ofs, false);
			ofs << "\tAvgLoss: " << arryAvgLoss[it] << "  [" << bSucc << "]" 
				<< "\tCPU Seconds: " << (runtime_end-runtime_start)/100.0 << endl;

			// free memory
			for ( int k=0; k<trSample.n; k++ ) {
				trSample.examples[k].x.SetHead(NULL);
				trSample.examples[k].y.m_labels = NULL;
			}
			for ( int k=0; k<tsSample.n; k++ ) {
				tsSample.examples[k].x.SetHead(NULL);
				tsSample.examples[k].y.m_labels = NULL;
			}
			free_struct_sample(trSample);
			free_struct_sample(tsSample);
			free_struct_model(structmodel);
		}

		free_struct_sample(sample);


		ofs << "\tAverage Loss: ";
		totalAvg[fIx] = 0;
		for ( int i=0; i<nFoldNum; i++ ) totalAvg[fIx] += arryAvgLoss[i] / nFoldNum;
		totalVar[fIx] = 0;
		for ( int i=0; i<nFoldNum; i++ ) totalVar[fIx] += (arryAvgLoss[i] - totalAvg[fIx]) * (arryAvgLoss[i] - totalAvg[fIx]) / nFoldNum;
		totalVar[fIx] = sqrt( totalVar[fIx] );
		ofs << "\t  Avg: " << totalAvg[fIx] << "\tVar: " << totalVar[fIx] << endl;
	}

	ofs << "\tTotal Avg over " << vecFileName.size() << " files: " << endl;
	double dAvgVal = 0, dAvgVar = 0;
	for ( int i=0; i<vecFileName.size(); i++ ) {
		dAvgVal += totalAvg[i] / vecFileName.size();
		dAvgVar += totalVar[i] / vecFileName.size();
	}
	ofs << "\t  Avg: " << dAvgVal << "\tVar: " << dAvgVar << endl;
}

void eval_Ratio(vector<wstring>&vecFileName, vector<int> &vecRatio, 
				STRUCTMODEL &structmodel,
				STRUCT_LEARN_PARM &struct_parm,
				LEARN_PARM &learn_parm, 
				KERNEL_PARM &kernel_parm, 
				int alg_type, ofstream &ofs)
{
	vector<vector<double> > arryAvgLoss( vecFileName.size() );
	for ( int db=0; db<vecFileName.size(); db++ )
	{
		strcpy(trainfile, WChar2Ansi(vecFileName[db].c_str()).c_str());
		ofs << "   ****** file: " << trainfile << " ********" << endl;
		if(struct_verbosity >=1 ) { printf("Reading training examples..."); fflush(stdout); }

		/* read the training examples */
		SAMPLE sample = read_struct_examples(trainfile,&struct_parm);
		if(struct_verbosity>=1) { printf("done\n"); fflush(stdout); }

		arryAvgLoss[db].assign(vecRatio.size(), 0);
		// for each partition of the data
		for ( int it=0; it<vecRatio.size(); it++ )
		{
			ofs << "\tRatio: " << vecRatio[it] << endl;
			// split the data into training & testing sets
			int nTrainSampleNum = sample.n * vecRatio[it] / 100;
			SAMPLE trSample;
			trSample.n = nTrainSampleNum;
			trSample.examples = (EXAMPLE*)malloc( sizeof(EXAMPLE) * nTrainSampleNum );
			for ( int k=0; k<nTrainSampleNum; k++ ) {
				trSample.examples[k].x.SetHead( sample.examples[k].x.m_pHeadNode );
				trSample.examples[k].x.m_pEndNode = sample.examples[k].x.m_pEndNode;
				trSample.examples[k].y.m_nSize = sample.examples[k].y.m_nSize;
				trSample.examples[k].y.m_labels = sample.examples[k].y.m_labels;
			}
			SAMPLE tsSample;
			tsSample.n = sample.n - nTrainSampleNum;
			tsSample.examples = (EXAMPLE*)malloc( sizeof(EXAMPLE) * tsSample.n );
			for ( int k=0; k<tsSample.n; k++ ) {
				tsSample.examples[k].x.SetHead( sample.examples[nTrainSampleNum+k].x.m_pHeadNode );
				tsSample.examples[k].x.m_pEndNode = sample.examples[nTrainSampleNum+k].x.m_pEndNode;
				tsSample.examples[k].y.m_nSize = sample.examples[nTrainSampleNum+k].y.m_nSize;
				tsSample.examples[k].y.m_labels = sample.examples[nTrainSampleNum+k].y.m_labels;
			}

			/* perfrom inner-cv to select good param. */
			if ( learn_parm.m_bInnerCV ) {
				struct_parm.C = inner_CV( &trSample, structmodel, 
					struct_parm, learn_parm, kernel_parm, alg_type, ofs);
			}

			/* Do the learning and return structmodel. */
			long runtime_start, runtime_end;
			runtime_start = get_runtime();

			bool bSucc = true;
			if(alg_type == 1) {
				if ( learn_parm.m_bQPSolver )
					svm_learn_struct(trSample, &struct_parm, &learn_parm, &kernel_parm, &structmodel);
				else 
					bSucc = svm_learn_struct_lp(trSample, &struct_parm, &learn_parm, &kernel_parm, &structmodel, g_nStopStep, true);
			} else exit(1);
			runtime_end = get_runtime();


			/* Warning: The model contains references to the original data 'docs'.
			If you want to free the original data, and only keep the model, you 
			have to make a deep copy of 'model'. */
			if(struct_verbosity>=1) { printf("Writing learned model...");fflush(stdout);}
			write_struct_model(modelfile, &structmodel, &struct_parm);
			if(struct_verbosity>=1) {printf("done\n");fflush(stdout); }

			// do classification on the testing data
			arryAvgLoss[db][it] = evaluation(&tsSample, structmodel, ofs, false);
			ofs << "\tAvgLoss: " << arryAvgLoss[db][it] << "  [" << bSucc << "]" 
				<< "\tCPU Seconds: " << (runtime_end-runtime_start)/100.0 << endl;

			// free memory
			for ( int k=0; k<trSample.n; k++ ) {
				trSample.examples[k].x.SetHead(NULL);
				trSample.examples[k].y.m_labels = NULL;
			}
			for ( int k=0; k<tsSample.n; k++ ) {
				tsSample.examples[k].x.SetHead(NULL);
				tsSample.examples[k].y.m_labels = NULL;
			}
			free_struct_sample(trSample);
			free_struct_sample(tsSample);
			free_struct_model(structmodel);
		}

		free_struct_sample(sample);
		//free_struct_model(structmodel);
	}
	ofs << "\tAverage Loss over the " << vecFileName.size() << " files" << endl;
	vector<double> vecDBAvgLoss(vecRatio.size(), 0);
	for ( int i=0; i<vecRatio.size(); i++ ) {
		for ( int db=0; db<vecFileName.size(); db++ )
			vecDBAvgLoss[i] += arryAvgLoss[db][i] / vecFileName.size();
		ofs << "\t" << vecRatio[i] << "\t: " << vecDBAvgLoss[i] << endl;
	}
}

/* inner cross-validation to select good parameters. */
double inner_CV(SAMPLE *sample, STRUCTMODEL &structmodel,
			 STRUCT_LEARN_PARM &struct_parm,
			 LEARN_PARM &learn_parm, 
			 KERNEL_PARM &kernel_parm, 
			 int alg_type, ofstream &ofs)
{
	/* the candidate lambda. */
	vector<double> vecLambda;
	vecLambda.push_back(0.01);
	vecLambda.push_back(0.1);
	vecLambda.push_back(1);
	vecLambda.push_back(4);
	vecLambda.push_back(9);
	vecLambda.push_back(16);

	vector<double> arryAvgLoss( vecLambda.size(), 0 );
	int nUnitNum = sample->n / 5;
	for ( int cv=0; cv<5; cv++ ) /* 5-fold inner cv. */
	{
		// split the data into training & testing sets
		SAMPLE trSample, tsSample;
		trSample.n = nUnitNum;
		if ( cv == 4 ) trSample.n = sample->n - nUnitNum * 4;
		trSample.examples = (EXAMPLE*)malloc( sizeof(EXAMPLE) * trSample.n );
		tsSample.n = sample->n - trSample.n;
		tsSample.examples = (EXAMPLE*)malloc( sizeof(EXAMPLE) * tsSample.n );
		for ( int k=0; k<trSample.n; k++ ) {
			trSample.examples[k].x.SetHead( sample->examples[k+nUnitNum*cv].x.m_pHeadNode );
			trSample.examples[k].x.m_pEndNode = sample->examples[k+nUnitNum*cv].x.m_pEndNode;
			trSample.examples[k].y.m_nSize = sample->examples[k+nUnitNum*cv].y.m_nSize;
			trSample.examples[k].y.m_labels = sample->examples[k+nUnitNum*cv].y.m_labels;
		}
		int tsIx=0;
		for ( int k=0; k<sample->n; k++ ) {
			if ( k >= nUnitNum*cv && k < nUnitNum*cv+trSample.n ) continue;
			tsSample.examples[tsIx].x.SetHead( sample->examples[k].x.m_pHeadNode );
			tsSample.examples[tsIx].x.m_pEndNode = sample->examples[k].x.m_pEndNode;
			tsSample.examples[tsIx].y.m_nSize = sample->examples[k].y.m_nSize;
			tsSample.examples[tsIx ++].y.m_labels = sample->examples[k].y.m_labels;
		}

		for ( int k=0; k<vecLambda.size(); k++ ) {
			struct_parm.C = vecLambda[k];
			/* Do the learning and return structmodel. */
			bool bSucc = true;
			if(alg_type == 1) {
				if ( learn_parm.m_bQPSolver )
					svm_learn_struct(trSample, &struct_parm, &learn_parm, &kernel_parm, &structmodel);
				else 
					bSucc = svm_learn_struct_lp(trSample, &struct_parm, &learn_parm, &kernel_parm, &structmodel, g_nStopStep, true);
			} else exit(1);

			/* Warning: The model contains references to the original data 'docs'.
			If you want to free the original data, and only keep the model, you 
			have to make a deep copy of 'model'. */
			write_struct_model(modelfile, &structmodel, &struct_parm);

			arryAvgLoss[k] += evaluation(&tsSample, structmodel, ofs, false) / 5;

			// free memory
			for ( int k=0; k<trSample.n; k++ ) {
				trSample.examples[k].x.SetHead(NULL);
				trSample.examples[k].y.m_labels = NULL;
			}
			for ( int k=0; k<tsSample.n; k++ ) {
				tsSample.examples[k].x.SetHead(NULL);
				tsSample.examples[k].y.m_labels = NULL;
			}
			free_struct_sample(trSample);
			free_struct_sample(tsSample);
			free_struct_model(structmodel);
		}
	}

	double dBestLambda;
	double dMinErrRate = 1;
	for ( int k=0; k<vecLambda.size(); k++ ) {
		if ( arryAvgLoss[k] < dMinErrRate )
		{
			dMinErrRate = arryAvgLoss[k];
			dBestLambda = vecLambda[k];
		}
	}

	return dBestLambda;
}


/*---------------------------------------------------------------------------*/

void read_input_parameters(int argc,char *argv[],char *trainfile,
						   char *modelfile,
						   long *verbosity,long *struct_verbosity, 
						   STRUCT_LEARN_PARM *struct_parm,
						   LEARN_PARM *learn_parm, KERNEL_PARM *kernel_parm,
						   int *alg_type)
{
	long i;
	char type[100];

	/* set default */
	(*alg_type)=DEFAULT_ALG_TYPE;
	struct_parm->C=-0.01;
	struct_parm->slack_norm=1;
	struct_parm->epsilon=DEFAULT_EPS;
	struct_parm->custom_argc=0;
	struct_parm->loss_function=DEFAULT_LOSS_FCT;
	struct_parm->loss_type=DEFAULT_RESCALING;
	struct_parm->newconstretrain=100;
	struct_parm->ccache_size=5;

	strcpy (modelfile, "svm_struct_model");
	strcpy (learn_parm->predfile, "trans_predictions");
	strcpy (learn_parm->alphafile, "");
	(*verbosity)=0;/*verbosity for svm_light*/
	(*struct_verbosity)=1; /*verbosity for struct learning portion*/
	learn_parm->biased_hyperplane=1;
	learn_parm->remove_inconsistent=0;
	learn_parm->skip_final_opt_check=0;
	learn_parm->svm_maxqpsize=10;
	learn_parm->svm_newvarsinqp=0;
	learn_parm->svm_iter_to_shrink=-9999;
	learn_parm->maxiter=100000;
	learn_parm->kernel_cache_size=40;
	learn_parm->svm_c=99999999;  /* overridden by struct_parm->C */
	learn_parm->eps=0.001;       /* overridden by struct_parm->epsilon */
	learn_parm->transduction_posratio=-1.0;
	learn_parm->svm_costratio=1.0;
	learn_parm->svm_costratio_unlab=1.0;
	learn_parm->svm_unlabbound=1E-5;
	learn_parm->epsilon_crit=0.001;
	learn_parm->epsilon_a=1E-10;  /* changed from 1e-15 */
	learn_parm->compute_loo=0;
	learn_parm->rho=1.0;
	learn_parm->xa_depth=0;
	learn_parm->m_bQPSolver=true; /* default to be QP solver*/
	kernel_parm->kernel_type=0;
	kernel_parm->poly_degree=3;
	kernel_parm->rbf_gamma=1.0;
	kernel_parm->coef_lin=1;
	kernel_parm->coef_const=1;
	strcpy(kernel_parm->custom,"empty");
	strcpy(type,"c");

	for(i=1;(i<argc) && ((argv[i])[0] == '-');i++) {
		switch ((argv[i])[1]) 
		{ 
		case '?': print_help(); exit(0);
		case 'a': i++; strcpy(learn_parm->alphafile,argv[i]); break;
		case 'c': i++; struct_parm->C=atof(argv[i]); break;
		case 'p': i++; struct_parm->slack_norm=atol(argv[i]); break;
		case 'e': i++; struct_parm->epsilon=atof(argv[i]); break;
		case 'k': i++; struct_parm->newconstretrain=atol(argv[i]); break;
		case 'h': i++; learn_parm->svm_iter_to_shrink=atol(argv[i]); break;
		case '#': i++; learn_parm->maxiter=atol(argv[i]); break;
		case 'm': i++; learn_parm->kernel_cache_size=atol(argv[i]); break;
		case 'w': i++; (*alg_type)=atol(argv[i]); break;
		case 'o': i++; struct_parm->loss_type=atol(argv[i]); break;
		case 'n': i++; learn_parm->svm_newvarsinqp=atol(argv[i]); break;
		case 'q': i++; learn_parm->svm_maxqpsize=atol(argv[i]); break;
		case 'l': i++; struct_parm->loss_function=atol(argv[i]); break;
		case 'f': i++; struct_parm->ccache_size=atol(argv[i]); break;
		case 't': i++; kernel_parm->kernel_type=atol(argv[i]); break;
		case 'd': i++; kernel_parm->poly_degree=atol(argv[i]); break;
		case 'g': i++; kernel_parm->rbf_gamma=atof(argv[i]); break;
		case 's': i++; kernel_parm->coef_lin=atof(argv[i]); break;
		case 'r': i++; kernel_parm->coef_const=atof(argv[i]); break;
		case 'u': i++; strcpy(kernel_parm->custom,argv[i]); break;
		case '-': strcpy(struct_parm->custom_argv[struct_parm->custom_argc++],argv[i]);i++; strcpy(struct_parm->custom_argv[struct_parm->custom_argc++],argv[i]);break; 
		case 'v': i++; (*struct_verbosity)=atol(argv[i]); break;
		case 'y': i++; (*verbosity)=atol(argv[i]); break;
		case 'b': i++; strcpy(learn_parm->addConfigFile,argv[i]); break;
		case 'x': learn_parm->m_bQPSolver=false; break;
		default: printf("\nUnrecognized option %s!\n\n",argv[i]);
			print_help();
			exit(0);
		}
	}
	if(i>=argc) {
		printf("\nNot enough input parameters!\n\n");
		wait_any_key();
		print_help();
		exit(0);
	}
	strcpy (trainfile, argv[i]);
	if((i+1)<argc) {
		strcpy (modelfile, argv[i+1]);
	}
	if(learn_parm->svm_iter_to_shrink == -9999) {
		learn_parm->svm_iter_to_shrink=100;
	}

	if((learn_parm->skip_final_opt_check) 
		&& (kernel_parm->kernel_type == LINEAR)) {
			printf("\nIt does not make sense to skip the final optimality check for linear kernels.\n\n");
			learn_parm->skip_final_opt_check=0;
	}    
	if((learn_parm->skip_final_opt_check) 
		&& (learn_parm->remove_inconsistent)) {
			printf("\nIt is necessary to do the final optimality check when removing inconsistent \nexamples.\n");
			wait_any_key();
			print_help();
			exit(0);
	}    
	if((learn_parm->svm_maxqpsize<2)) {
		printf("\nMaximum size of QP-subproblems not in valid range: %ld [2..]\n",learn_parm->svm_maxqpsize); 
		wait_any_key();
		print_help();
		exit(0);
	}
	if((learn_parm->svm_maxqpsize<learn_parm->svm_newvarsinqp)) {
		printf("\nMaximum size of QP-subproblems [%ld] must be larger than the number of\n",learn_parm->svm_maxqpsize); 
		printf("new variables [%ld] entering the working set in each iteration.\n",learn_parm->svm_newvarsinqp); 
		wait_any_key();
		print_help();
		exit(0);
	}
	if(learn_parm->svm_iter_to_shrink<1) {
		printf("\nMaximum number of iterations for shrinking not in valid range: %ld [1,..]\n",learn_parm->svm_iter_to_shrink);
		wait_any_key();
		print_help();
		exit(0);
	}
	if(struct_parm->C<0) {
		printf("\nYou have to specify a value for the parameter '-c' (C>0)!\n\n");
		wait_any_key();
		print_help();
		exit(0);
	}
	if(((*alg_type) < 1) || ((*alg_type) > 4)) {
		printf("\nAlgorithm type must be either '1', '2', '3', or '4'!\n\n");
		wait_any_key();
		print_help();
		exit(0);
	}
	if(learn_parm->transduction_posratio>1) {
		printf("\nThe fraction of unlabeled examples to classify as positives must\n");
		printf("be less than 1.0 !!!\n\n");
		wait_any_key();
		print_help();
		exit(0);
	}
	if(learn_parm->svm_costratio<=0) {
		printf("\nThe COSTRATIO parameter must be greater than zero!\n\n");
		wait_any_key();
		print_help();
		exit(0);
	}
	if(struct_parm->epsilon<=0) {
		printf("\nThe epsilon parameter must be greater than zero!\n\n");
		wait_any_key();
		print_help();
		exit(0);
	}
	if((struct_parm->slack_norm<1) || (struct_parm->slack_norm>2)) {
		printf("\nThe norm of the slacks must be either 1 (L1-norm) or 2 (L2-norm)!\n\n");
		wait_any_key();
		print_help();
		exit(0);
	}
	if((struct_parm->loss_type != SLACK_RESCALING) 
		&& (struct_parm->loss_type != MARGIN_RESCALING)) {
			printf("\nThe loss type must be either 1 (slack rescaling) or 2 (margin rescaling)!\n\n");
			wait_any_key();
			print_help();
			exit(0);
	}
	if(learn_parm->rho<0) {
		printf("\nThe parameter rho for xi/alpha-estimates and leave-one-out pruning must\n");
		printf("be greater than zero (typically 1.0 or 2.0, see T. Joachims, Estimating the\n");
		printf("Generalization Performance of an SVM Efficiently, ICML, 2000.)!\n\n");
		wait_any_key();
		print_help();
		exit(0);
	}
	if((learn_parm->xa_depth<0) || (learn_parm->xa_depth>100)) {
		printf("\nThe parameter depth for ext. xi/alpha-estimates must be in [0..100] (zero\n");
		printf("for switching to the conventional xa/estimates described in T. Joachims,\n");
		printf("Estimating the Generalization Performance of an SVM Efficiently, ICML, 2000.)\n");
		wait_any_key();
		print_help();
		exit(0);
	}

	parse_struct_parameters(struct_parm);
}

void wait_any_key()
{
	printf("\n(more)\n");
	(void)getc(stdin);
}

void print_help()
{
	printf("\nSVM-struct learning module: %s, %s, %s\n",INST_NAME,INST_VERSION,INST_VERSION_DATE);
	printf("   includes SVM-struct %s for learning complex outputs, %s\n",STRUCT_VERSION,STRUCT_VERSION_DATE);
	printf("   includes SVM-light %s quadratic optimizer, %s\n",VERSION,VERSION_DATE);
	copyright_notice();
	printf("   usage: svm_struct_learn [options] example_file model_file\n\n");
	printf("Arguments:\n");
	printf("         example_file-> file with training data\n");
	printf("         model_file  -> file to store learned decision rule in\n");

	printf("General options:\n");
	printf("         -?          -> this help\n");
	printf("         -v [0..3]   -> verbosity level (default 1)\n");
	printf("         -y [0..3]   -> verbosity level for svm_light (default 0)\n");
	printf("Learning options:\n");
	printf("         -c float    -> C: trade-off between training error\n");
	printf("                        and margin (default 0.01)\n");
	printf("         -p [1,2]    -> L-norm to use for slack variables. Use 1 for L1-norm,\n");
	printf("                        use 2 for squared slacks. (default 1)\n");
	printf("         -o [1,2]    -> Rescaling method to use for loss.\n");
	printf("                        1: slack rescaling\n");
	printf("                        2: margin rescaling\n");
	printf("                        (default %d)\n",DEFAULT_RESCALING);
	printf("         -l [0..]    -> Loss function to use.\n");
	printf("                        0: zero/one loss\n");
	printf("                        (default %d)\n",DEFAULT_LOSS_FCT);
	printf("Kernel options:\n");
	printf("         -t int      -> type of kernel function:\n");
	printf("                        0: linear (default)\n");
	printf("                        1: polynomial (s a*b+c)^d\n");
	printf("                        2: radial basis function exp(-gamma ||a-b||^2)\n");
	printf("                        3: sigmoid tanh(s a*b + c)\n");
	printf("                        4: user defined kernel from kernel.h\n");
	printf("         -d int      -> parameter d in polynomial kernel\n");
	printf("         -g float    -> parameter gamma in rbf kernel\n");
	printf("         -s float    -> parameter s in sigmoid/poly kernel\n");
	printf("         -r float    -> parameter c in sigmoid/poly kernel\n");
	printf("         -u string   -> parameter of user defined kernel\n");
	printf("Optimization options (see [2][3]):\n");
	printf("         -w [1,2,3,4]-> choice of structural learning algorithm (default %d):\n",(int)DEFAULT_ALG_TYPE);
	printf("                        1: algorithm described in [2]\n");
	printf("                        2: joint constraint algorithm (primal) [to be published]\n");
	printf("                        3: joint constraint algorithm (dual) [to be published]\n");
	printf("                        4: joint constraint algorithm (dual) with constr. cache\n");
	printf("         -q [2..]    -> maximum size of QP-subproblems (default 10)\n");
	printf("         -n [2..q]   -> number of new variables entering the working set\n");
	printf("                        in each iteration (default n = q). Set n<q to prevent\n");
	printf("                        zig-zagging.\n");
	printf("         -m [5..]    -> size of cache for kernel evaluations in MB (default 40)\n");
	printf("                        (used only for -w 1 with kernels)\n");
	printf("         -f [5..]    -> number of constraints to cache for each example\n");
	printf("                        (default 5) (used with -w 4)\n");
	printf("         -e float    -> eps: Allow that error for termination criterion\n");
	printf("                        (default %f)\n",DEFAULT_EPS);
	printf("         -h [5..]    -> number of iterations a variable needs to be\n"); 
	printf("                        optimal before considered for shrinking (default 100)\n");
	printf("         -k [1..]    -> number of new constraints to accumulate before\n"); 
	printf("                        recomputing the QP solution (default 100) (-w 1 only)\n");
	printf("         -# int      -> terminate QP subproblem optimization, if no progress\n");
	printf("                        after this number of iterations. (default 100000)\n");
	printf("Output options:\n");
	printf("         -a string   -> write all alphas to this file after learning\n");
	printf("                        (in the same order as in the training set)\n");
	printf("Structure learning options:\n");
	print_struct_help();
	wait_any_key();

	printf("\nMore details in:\n");
	printf("[1] T. Joachims, Learning to Align Sequences: A Maximum Margin Aproach.\n");
	printf("    Technical Report, September, 2003.\n");
	printf("[2] I. Tsochantaridis, T. Joachims, T. Hofmann, and Y. Altun, Large Margin\n");
	printf("    Methods for Structured and Interdependent Output Variables, Journal\n");
	printf("    of Machine Learning Research (JMLR), Vol. 6(Sep):1453-1484, 2005.\n");
	printf("[3] T. Joachims, Making Large-Scale SVM Learning Practical. Advances in\n");
	printf("    Kernel Methods - Support Vector Learning, B. Schölkopf and C. Burges and\n");
	printf("    A. Smola (ed.), MIT Press, 1999.\n");
	printf("[4] T. Joachims, Learning to Classify Text Using Support Vector\n");
	printf("    Machines: Methods, Theory, and Algorithms. Dissertation, Kluwer,\n");
	printf("    2002.\n\n");
}