PageRenderTime 275ms CodeModel.GetById 67ms app.highlight 190ms RepoModel.GetById 1ms app.codeStats 1ms

/code/application_core/Parameters.cpp

http://github.com/sebhtml/ray
C++ | 1791 lines | 1624 code | 120 blank | 47 comment | 35 complexity | d4c5437bd621df343c8a1b8fe701764f MD5 | raw file
   1/*
   2 	Ray
   3    Copyright (C) 2010, 2011  Sébastien Boisvert
   4
   5	http://DeNovoAssembler.SourceForge.Net/
   6
   7    This program is free software: you can redistribute it and/or modify
   8    it under the terms of the GNU General Public License as published by
   9    the Free Software Foundation, version 3 of the License.
  10
  11    This program is distributed in the hope that it will be useful,
  12    but WITHOUT ANY WARRANTY; without even the implied warranty of
  13    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  14    GNU General Public License for more details.
  15
  16    You have received a copy of the GNU General Public License
  17    along with this program (gpl-3.0.txt).  
  18	see <http://www.gnu.org/licenses/>
  19
  20*/
  21
  22/* 
  23 * TODO: add option -minimumScaffoldLength
  24 */
  25
  26#include <core/OperatingSystem.h>
  27#include<application_core/common_functions.h>
  28#include<application_core/Parameters.h>
  29#include <plugin_Library/LibraryPeakFinder.h>
  30#include<plugin_SequencesLoader/Read.h>
  31#include<plugin_SequencesLoader/Loader.h>
  32#include <memory/MyAllocator.h>
  33
  34#include<string>
  35#include<sstream>
  36#include<iostream>
  37#include<vector>
  38#include<cstdlib>
  39#include<fstream>
  40#include<assert.h>
  41#include<math.h>
  42using namespace std;
  43
  44void Parameters::getIndexes(int count,vector<int>*out){
  45	vector<int> numbers;
  46	for(int i=0;i<count;i++)
  47		numbers.push_back(i);
  48
  49	srand(99);
  50	while(numbers.size()>0){
  51		int randomIndex=rand()%numbers.size();
  52		int index=numbers[randomIndex];
  53		out->push_back(index);
  54		vector<int> newNumbers;
  55		for(int i=0;i<(int)numbers.size();i++){
  56			if(randomIndex==i)
  57				continue;
  58			newNumbers.push_back(numbers[i]);
  59		}
  60		numbers=newNumbers;
  61	}
  62}
  63
  64Parameters::Parameters(){
  65	m_providedPeakCoverage=false;
  66	m_providedRepeatCoverage=false;
  67	m_providedMinimumCoverage=false;
  68	m_prefix="RayOutput";
  69	m_initiated=false;
  70	m_showMemoryAllocations=false;
  71	m_directory="assembly";
  72	m_minimumContigLength=100;
  73	m_wordSize=21;
  74	m_colorSpaceMode=false;
  75	m_reducerIsActivated=false;
  76	m_amos=false;
  77	m_error=false;
  78	m_memoryFilePrefix=m_prefix;
  79	m_profiler=false;
  80	m_debugBubbles=false;
  81	m_debugSeeds=false;
  82	m_showMemoryUsage=true;
  83
  84	m_showEndingContext=false;
  85	m_writeKmers=false;
  86	m_showExtensionChoice=false;
  87	m_showReadPlacement=false;
  88
  89	/** use the new NovaEngine (TM) */
  90	m_options.insert("-use-NovaEngine");
  91
  92	/** the default is to not use a default value */
  93	m_degree=2;
  94
  95	m_checkpointDirectory="Checkpoints";
  96	m_hasCheckpointDirectory=false;
  97}
  98
  99bool Parameters::showExtensionChoice(){
 100	return m_showExtensionChoice;
 101}
 102
 103bool Parameters::showEndingContext(){
 104	return m_showEndingContext;
 105}
 106
 107bool Parameters::debugBubbles(){
 108	return m_debugBubbles;
 109}
 110
 111bool Parameters::runProfiler(){
 112	return m_profiler;
 113}
 114
 115bool Parameters::debugSeeds(){
 116	return m_debugSeeds;
 117}
 118
 119int Parameters::getWordSize(){
 120	return m_wordSize;
 121}
 122
 123void Parameters::loadCommandsFromFile(char*file){
 124	ifstream f(file);
 125	while(!f.eof()){
 126		string token="";
 127		f>>token;
 128		if(token!=""){
 129			m_commands.push_back(token);
 130		}
 131	}
 132	f.close();
 133}
 134
 135void Parameters::loadCommandsFromArguments(int argc,char**argv){
 136	for(int i=1;i<argc;i++){
 137		m_commands.push_back(argv[i]);
 138	}
 139}
 140
 141/* parse commands */
 142void Parameters::parseCommands(){
 143
 144	m_initiated=true;
 145	set<string> commands;
 146
 147	for(int i=0;i<(int)m_commands.size();i++)
 148		m_options.insert(m_commands[i]);
 149
 150	m_showCommunicationEvents=false;
 151
 152	if(hasOption("-show-communication-events"))
 153		m_showCommunicationEvents=true;
 154
 155/*
 156	if(hasOption("-test-network-only")){
 157		m_options.insert("-write-network-test-raw-data");
 158	}
 159*/
 160
 161	if(hasOption("-show-read-placement")){
 162		m_showReadPlacement=true;
 163	}
 164
 165	m_originalCommands=m_commands;
 166
 167	/* shuffle randomly arguments */
 168
 169	vector<vector<string> > opCodes;
 170	int i=0;
 171	while(i<(int)m_commands.size()){
 172		int j=i;
 173		while(j+1<(int)m_commands.size() && m_commands[j+1][0]!='-'){
 174			j++;
 175		}
 176		vector<string> opCode;
 177		opCode.push_back(m_commands[i]);
 178		for(int k=i+1;k<=j;k++){
 179			opCode.push_back(m_commands[k]);
 180		}
 181		opCodes.push_back(opCode);
 182		i=j+1;
 183	}
 184
 185	vector<int> indexes;
 186	getIndexes(opCodes.size(),&indexes);
 187
 188	vector<string> newCommands;
 189
 190	for(int i=0;i<(int)indexes.size();i++){
 191		for(int j=0;j<(int)opCodes[indexes[i]].size();j++){
 192			newCommands.push_back(opCodes[indexes[i]][j]);
 193		}
 194	}
 195
 196	#ifdef ASSERT
 197	assert(newCommands.size()==m_commands.size());
 198	#endif
 199
 200	m_commands=newCommands;
 201	newCommands.clear();
 202	opCodes.clear();
 203
 204	if(getRank() == MASTER_RANK){
 205		cout<<"Rank 0: Shuffled opcodes"<<endl;
 206		for(int i=0;i<(int)m_commands.size();i++){
 207			cout<<" "<<m_commands[i]<<" \\"<<endl;
 208		}
 209		cout<<endl;
 210	}
 211
 212	set<string> singleReadsCommands;
 213	singleReadsCommands.insert("-s");
 214	singleReadsCommands.insert("LoadSingleEndReads");
 215	singleReadsCommands.insert("-LoadSingleEndReads");
 216	singleReadsCommands.insert("--LoadSingleEndReads");
 217
 218	set<string> pairedReadsCommands;
 219	pairedReadsCommands.insert("-p");
 220	pairedReadsCommands.insert("LoadPairedEndReads");
 221	pairedReadsCommands.insert("-LoadPairedEndReads");
 222	pairedReadsCommands.insert("--LoadPairedEndReads");
 223	
 224	set<string> interleavedCommands;
 225	interleavedCommands.insert("-i");
 226
 227	set<string> colorSpaceMode;
 228	colorSpaceMode.insert("-color-space");
 229	set<string> outputAmosCommands;
 230	outputAmosCommands.insert("-a");
 231	outputAmosCommands.insert("-amos");
 232	outputAmosCommands.insert("--amos");
 233	outputAmosCommands.insert("--output-amos");
 234	outputAmosCommands.insert("-OutputAmosFile");
 235	outputAmosCommands.insert("--OutputAmosFile");
 236
 237	set<string> showMalloc;
 238	showMalloc.insert("-show-memory-allocations");
 239
 240	set<string> outputFileCommands;
 241	outputFileCommands.insert("-o");
 242	outputFileCommands.insert("-OutputFile");
 243	outputFileCommands.insert("--OutputFile");
 244	
 245	set<string> memoryMappedFileCommands;
 246	memoryMappedFileCommands.insert("-MemoryPrefix");
 247
 248	set<string> kmerSetting;
 249	kmerSetting.insert("-k");
 250
 251	set<string> routingDegree;
 252	routingDegree.insert("-routing-graph-degree");
 253
 254	set<string> connectionType;
 255	connectionType.insert("-connection-type");
 256
 257	set<string> reduceMemoryUsage;
 258	reduceMemoryUsage.insert("-r");
 259
 260	set<string> showMemory;
 261	showMemory.insert("-show-memory-usage");
 262	showMemory.insert("--show-memory-usage");
 263	set<string> debugBubbles;
 264	debugBubbles.insert("-debug-bubbles");
 265	debugBubbles.insert("--debug-bubbles");
 266
 267	set<string> debugSeeds;
 268	debugSeeds.insert("-debug-seeds");
 269	debugSeeds.insert("--debug-seeds");
 270
 271	set<string> runProfiler;
 272	runProfiler.insert("-run-profiler");
 273	runProfiler.insert("--run-profiler");
 274
 275	set<string> showContext;
 276	showContext.insert("-show-ending-context");
 277	showContext.insert("--show-ending-context");
 278
 279	set<string> showExtensionChoiceOption;
 280	showExtensionChoiceOption.insert("-show-extension-choice");
 281
 282	set<string> writeKmers;
 283	writeKmers.insert("-write-kmers");
 284
 285	set<string> setMinimumCoverage;
 286	set<string> setPeakCoverage;
 287	set<string> setRepeatCoverage;
 288	setMinimumCoverage.insert("-minimumCoverage");
 289	setPeakCoverage.insert("-peakCoverage");
 290	setRepeatCoverage.insert("-repeatCoverage");
 291
 292	set<string> minimumContigLength;
 293	minimumContigLength.insert("-minimum-contig-length");
 294
 295	set<string> searchOption;
 296	searchOption.insert("-search");
 297
 298	set<string> phylogeny;
 299	phylogeny.insert("-with-taxonomy");
 300
 301	set<string> coloringOneColor;
 302	coloringOneColor.insert("-one-color-per-file");
 303
 304	set<string> ontology;
 305	ontology.insert("-gene-ontology");
 306
 307	set<string> checkpoints;
 308	checkpoints.insert("-read-write-checkpoints");
 309	checkpoints.insert("-write-checkpoints");
 310	checkpoints.insert("-read-checkpoints");
 311
 312	vector<set<string> > toAdd;
 313	toAdd.push_back(checkpoints);
 314	toAdd.push_back(coloringOneColor);
 315	toAdd.push_back(ontology);
 316	toAdd.push_back(phylogeny);
 317	toAdd.push_back(minimumContigLength);
 318	toAdd.push_back(showExtensionChoiceOption);
 319	toAdd.push_back(setRepeatCoverage);
 320	toAdd.push_back(setPeakCoverage);
 321	toAdd.push_back(setMinimumCoverage);
 322	toAdd.push_back(singleReadsCommands);
 323	toAdd.push_back(pairedReadsCommands);
 324	toAdd.push_back(outputAmosCommands);
 325	toAdd.push_back(outputFileCommands);
 326	toAdd.push_back(kmerSetting);
 327	toAdd.push_back(routingDegree);
 328	toAdd.push_back(interleavedCommands);
 329	toAdd.push_back(searchOption);
 330	toAdd.push_back(reduceMemoryUsage);
 331	toAdd.push_back(memoryMappedFileCommands);
 332	toAdd.push_back(connectionType);
 333	toAdd.push_back(showMemory);	
 334	toAdd.push_back(debugBubbles);
 335	toAdd.push_back(debugSeeds);
 336	toAdd.push_back(runProfiler);
 337	toAdd.push_back(showContext);
 338	toAdd.push_back(showMalloc);
 339	toAdd.push_back(writeKmers);
 340	toAdd.push_back(colorSpaceMode);
 341
 342	for(int i=0;i<(int)toAdd.size();i++){
 343		for(set<string>::iterator j=toAdd[i].begin();j!=toAdd[i].end();j++){
 344			commands.insert(*j);
 345		}
 346	}
 347
 348	m_numberOfLibraries=0;
 349
 350	bool providedMemoryPrefix=false;
 351
 352	for(int i=0;i<(int)m_commands.size();i++){
 353		string token=m_commands[i];
 354		if(singleReadsCommands.count(token)>0){
 355			i++;
 356			int items=m_commands.size()-i;
 357
 358			if(items<1){
 359				if(m_rank==MASTER_RANK){
 360					cout<<"Error: "<<token<<" needs 1 item, you provided only "<<items<<endl;
 361				}
 362				m_error=true;
 363				return;
 364			}
 365			token=m_commands[i];
 366			m_singleEndReadsFile.push_back(token);
 367
 368			fileNameHook(token);
 369
 370			if(m_rank==MASTER_RANK){
 371				cout<<endl;
 372				cout<<"-s (single sequences)"<<endl;
 373				cout<<" Sequences: "<<token<<endl;
 374			}
 375		}else if(memoryMappedFileCommands.count(token)>0){
 376			i++;
 377			int items=m_commands.size()-i;
 378			if(items<1){
 379				if(m_rank==MASTER_RANK){
 380					cout<<"Error: "<<token<<" needs 1 item, you provided "<<items<<endl;
 381				}
 382				m_error=true;
 383				return;
 384			}
 385			token=m_commands[i];
 386			m_memoryFilePrefix=token;
 387			providedMemoryPrefix=true;
 388
 389		}else if(checkpoints.count(token)>0){
 390			i++;
 391			int items=m_commands.size()-i;
 392			if(items<1){
 393				if(m_rank==MASTER_RANK){
 394					cout<<"Error: "<<token<<" needs 1 item, you provided "<<items<<endl;
 395				}
 396				m_error=true;
 397				return;
 398			}
 399			token=m_commands[i];
 400
 401			if(m_hasCheckpointDirectory){
 402				cout<<"Warning: can not set already set checkpoint directory."<<endl;
 403				continue;
 404			}
 405
 406			m_checkpointDirectory=token;
 407			m_hasCheckpointDirectory=true;
 408
 409			cout<<"Rank "<<m_rank<<" checkpoint directory: "<<m_checkpointDirectory;
 410
 411			if(m_rank==MASTER_RANK){
 412
 413				if(!fileExists(m_checkpointDirectory.c_str())){
 414					createDirectory(m_checkpointDirectory.c_str());
 415				}
 416			}
 417
 418		}else if(outputFileCommands.count(token)>0){
 419			i++;
 420			int items=m_commands.size()-i;
 421			if(items<1){
 422				if(m_rank==MASTER_RANK){
 423					cout<<"Error: "<<token<<" needs 1 item, you provided "<<items<<endl;
 424				}
 425				m_error=true;
 426				return;
 427			}
 428			token=m_commands[i];
 429			m_prefix=token;
 430			if(!providedMemoryPrefix){
 431				m_memoryFilePrefix=m_prefix;
 432			}
 433		}else if(interleavedCommands.count(token)>0){
 434			// make sure there is at least 4 elements left.
 435			int items=0;
 436			int k=0;
 437			for(int j=i+1;j<(int)m_commands.size();j++){
 438				string cmd=m_commands[j];
 439				if(commands.count(cmd)==0 && cmd[0]!='-'){
 440					items++;
 441				}else{
 442					break;
 443				}
 444				k++;
 445			}
 446			if(items!=1 && items!=3){
 447				if(m_rank==MASTER_RANK){
 448					cout<<"Error: "<<token<<" needs 1 or 3 items, you provided "<<items<<endl;
 449				}
 450				m_error=true;
 451				return;
 452			}
 453			i++;
 454			token=m_commands[i];
 455
 456			string interleavedFile=token;
 457			int interleavedFileIndex=m_singleEndReadsFile.size();
 458			m_interleavedFiles.insert(interleavedFileIndex);
 459			m_singleEndReadsFile.push_back(interleavedFile);
 460
 461			fileNameHook(interleavedFile);
 462
 463			int meanFragmentLength=0;
 464			int standardDeviation=0;
 465			#ifdef ASSERT
 466			assert(items==1 or items==3);
 467			#endif
 468
 469			if(m_rank==MASTER_RANK){
 470				cout<<endl;
 471				cout<<"Paired library # "<<m_numberOfLibraries<<endl;
 472				cout<<" -i (paired-end interleaved sequences)"<<endl;
 473				cout<<" Sequences: "<<token<<endl;
 474			}
 475			if(items==3){
 476				i++;
 477				token=m_commands[i];
 478				meanFragmentLength=atoi(token.c_str());
 479				i++;
 480				token=m_commands[i];
 481				standardDeviation=atoi(token.c_str());
 482				if(m_rank==MASTER_RANK){
 483					cout<<" Average length: "<<meanFragmentLength<<endl;
 484					cout<<" Standard deviation: "<<standardDeviation<<endl;
 485				}
 486				int distance=meanFragmentLength+standardDeviation;
 487				if(distance>m_maximumDistance){
 488					m_maximumDistance=distance;
 489				}
 490			}else if(items==1){// automatic detection.
 491				map<int,int> t;
 492				m_automaticLibraries.insert(m_numberOfLibraries);
 493				if(m_rank==MASTER_RANK){
 494					cout<<" Average length: automatic detection"<<endl;
 495					cout<<" Standard deviation: automatic detection"<<endl;
 496				}
 497			}else{
 498				#ifdef ASSERT
 499				assert(false);
 500				#endif
 501			}
 502
 503			m_fileLibrary[interleavedFileIndex]=m_numberOfLibraries;
 504			vector<int> files;
 505			files.push_back(interleavedFileIndex);
 506			m_libraryFiles.push_back(files);
 507
 508			addLibraryData(m_numberOfLibraries,meanFragmentLength,standardDeviation);
 509
 510			m_numberOfLibraries++;
 511		}else if(pairedReadsCommands.count(token)>0){
 512			// make sure there is at least 4 elements left.
 513			int items=0;
 514			int k=0;
 515			for(int j=i+1;j<(int)m_commands.size();j++){
 516				string cmd=m_commands[j];
 517				if(commands.count(cmd)==0 && cmd[0]!='-'){
 518					items++;
 519				}else{
 520					break;
 521				}
 522				k++;
 523			}
 524			if(items!=2 && items!=4){
 525				if(m_rank==MASTER_RANK){
 526					cout<<"Error: "<<token<<" needs 2 or 4 items, you provided "<<items<<endl;
 527				}
 528				m_error=true;
 529				return;
 530			}
 531			i++;
 532			token=m_commands[i];
 533			string left=token;
 534			// add left file
 535			int leftFile=m_singleEndReadsFile.size();
 536			m_leftFiles.insert(leftFile);
 537			m_singleEndReadsFile.push_back(left);
 538			i++;
 539			token=m_commands[i];
 540			
 541			// add right file
 542			string right=token;
 543			int rightFile=m_singleEndReadsFile.size();
 544			m_rightFiles.insert(rightFile);
 545			m_singleEndReadsFile.push_back(right);
 546
 547			fileNameHook(left);
 548			fileNameHook(right);
 549
 550			int meanFragmentLength=0;
 551			int standardDeviation=0;
 552			#ifdef ASSERT
 553			assert(items==4 or items==2);
 554			#endif
 555
 556			if(m_rank==MASTER_RANK){
 557				cout<<endl;
 558				cout<<"Paired library # "<<m_numberOfLibraries<<endl;
 559				cout<<" -p (paired-end sequences)"<<endl;
 560				cout<<" Left sequences: "<<left<<endl;
 561				cout<<" Right sequences: "<<right<<endl;
 562			}
 563
 564			if(items==4){
 565				i++;
 566				token=m_commands[i];
 567				meanFragmentLength=atoi(token.c_str());
 568				i++;
 569				token=m_commands[i];
 570				standardDeviation=atoi(token.c_str());
 571				if(m_rank==MASTER_RANK){
 572					cout<<" Average length: "<<meanFragmentLength<<endl;
 573					cout<<" Standard deviation: "<<standardDeviation<<endl;
 574				}
 575			}else if(items==2){// automatic detection.
 576				m_automaticLibraries.insert(m_numberOfLibraries);
 577				if(m_rank==MASTER_RANK){
 578					cout<<" Average length: automatic detection"<<endl;
 579					cout<<" Standard deviation: automatic detection"<<endl;
 580				}
 581			}
 582
 583			m_fileLibrary[rightFile]=m_numberOfLibraries;
 584			m_fileLibrary[leftFile]=m_numberOfLibraries;
 585			vector<int> files;
 586			files.push_back(leftFile);
 587			files.push_back(rightFile);
 588			m_libraryFiles.push_back(files);
 589
 590			addLibraryData(m_numberOfLibraries,meanFragmentLength,standardDeviation);
 591
 592			m_numberOfLibraries++;
 593		}else if(outputAmosCommands.count(token)>0){
 594			m_amos=true;
 595		}else if(showExtensionChoiceOption.count(token)>0){
 596			m_showExtensionChoice=true;
 597		}else if(showMalloc.count(token)>0){
 598			m_showMemoryAllocations=true;
 599		}else if(reduceMemoryUsage.count(token)>0){
 600			int items=0;
 601			for(int j=i+1;j<(int)m_commands.size();j++){
 602				string cmd=m_commands[j];
 603				if(commands.count(cmd)==0){
 604					items++;
 605				}else{
 606					break;
 607				}
 608			}
 609
 610			if(!(items==0||items==1)){
 611				if(m_rank==MASTER_RANK){
 612					cout<<"Error: "<<token<<" needs 0 or 1 item, you provided "<<items<<endl;
 613				}
 614				m_error=true;
 615				return;
 616			}
 617
 618			m_reducerIsActivated=true;
 619			m_reducerPeriod=1000000;
 620			
 621			if(items==1){
 622				m_reducerPeriod=atoi(m_commands[i+1].c_str());
 623			}
 624		}else if(setRepeatCoverage.count(token)>0){
 625			i++;
 626			int items=m_commands.size()-i;
 627
 628			if(items<1){
 629				if(m_rank==MASTER_RANK){
 630					cout<<"Error: "<<token<<" needs 1 item, you provided only "<<items<<endl;
 631				}
 632				m_error=true;
 633				return;
 634			}
 635			token=m_commands[i];
 636			m_repeatCoverage=atoi(token.c_str());
 637			m_providedRepeatCoverage=true;
 638		}else if(setMinimumCoverage.count(token)>0){
 639			i++;
 640			int items=m_commands.size()-i;
 641
 642			if(items<1){
 643				if(m_rank==MASTER_RANK){
 644					cout<<"Error: "<<token<<" needs 1 item, you provided only "<<items<<endl;
 645				}
 646				m_error=true;
 647				return;
 648			}
 649			token=m_commands[i];
 650			m_minimumCoverage=atoi(token.c_str());
 651			m_providedMinimumCoverage=true;
 652		}else if(setPeakCoverage.count(token)>0){
 653			i++;
 654			int items=m_commands.size()-i;
 655
 656			if(items<1){
 657				if(m_rank==MASTER_RANK){
 658					cout<<"Error: "<<token<<" needs 1 item, you provided only "<<items<<endl;
 659				}
 660				m_error=true;
 661				return;
 662			}
 663			token=m_commands[i];
 664			m_peakCoverage=atoi(token.c_str());
 665			m_providedPeakCoverage=true;
 666	
 667		}else if(connectionType.count(token)>0){
 668			i++;
 669			int items=m_commands.size()-i;
 670
 671			if(items<1){
 672				if(m_rank==MASTER_RANK){
 673					cout<<"Error: "<<token<<" needs 1 item but you provided only "<<items<<endl;
 674				}
 675				m_error=true;
 676				return;
 677			}
 678			token=m_commands[i];
 679			m_connectionType=token;
 680
 681		}else if(routingDegree.count(token)>0){
 682			i++;
 683			int items=m_commands.size()-i;
 684
 685			if(items<1){
 686				if(m_rank==MASTER_RANK){
 687					cout<<"Error: "<<token<<" needs 1 item, you provided only "<<items<<endl;
 688				}
 689				m_error=true;
 690				return;
 691			}
 692			token=m_commands[i];
 693			m_degree=atoi(token.c_str());
 694
 695		}else if(phylogeny.count(token)>0){
 696
 697			cout<<"Enabling CorePlugin 'PhylogenyViewer'"<<endl;
 698
 699			i++;
 700			int items=m_commands.size()-i;
 701
 702			if(items<3){
 703				if(m_rank==MASTER_RANK){
 704					cout<<"Error: "<<token<<" needs 3 item, but you provided "<<items<<endl;
 705				}
 706
 707				m_error=true;
 708				return;
 709			}
 710
 711			m_genomeToTaxonFile=m_commands[i];
 712			i++;
 713			m_treeFile=m_commands[i];
 714			i++;
 715			m_taxonNameFile=m_commands[i];
 716
 717		}else if(searchOption.count(token)>0){
 718
 719			i++;
 720			int items=m_commands.size()-i;
 721
 722			if(items<1){
 723				if(m_rank==MASTER_RANK){
 724					cout<<"Error: "<<token<<" needs 1 item, you provided only "<<items<<endl;
 725				}
 726				m_error=true;
 727				return;
 728			}
 729			token=m_commands[i];
 730
 731			m_searchDirectories.push_back(token);
 732
 733		}else if(minimumContigLength.count(token)>0){
 734			i++;
 735			int items=m_commands.size()-i;
 736
 737			if(items<1){
 738				if(m_rank==MASTER_RANK){
 739					cout<<"Error: "<<token<<" needs 1 item, you provided only "<<items<<endl;
 740				}
 741				m_error=true;
 742				return;
 743			}
 744			token=m_commands[i];
 745			m_minimumContigLength=atoi(token.c_str());
 746
 747			if(m_rank==MASTER_RANK){
 748				cout<<endl;
 749				cout<<"Rank "<<MASTER_RANK<<" the minimum contig length is "<<m_minimumContigLength<<endl;
 750				cout<<endl;
 751			}
 752
 753		}else if(kmerSetting.count(token)>0){
 754			i++;
 755			int items=m_commands.size()-i;
 756
 757			if(items<1){
 758				if(m_rank==MASTER_RANK){
 759					cout<<"Error: "<<token<<" needs 1 item, you provided only "<<items<<endl;
 760				}
 761				m_error=true;
 762				return;
 763			}
 764			token=m_commands[i];
 765			m_wordSize=atoi(token.c_str());
 766			if(m_wordSize<15){
 767				m_wordSize=15;
 768			}
 769			if(m_wordSize>MAXKMERLENGTH){
 770				if(m_rank==MASTER_RANK){
 771					cout<<endl;
 772					cout<<"Rank "<<MASTER_RANK<<": Warning, k > MAXKMERLENGTH"<<endl;
 773					cout<<"Rank "<<MASTER_RANK<<": Change MAXKMERLENGTH in the Makefile and recompile Ray."<<endl;
 774				}
 775				m_wordSize=MAXKMERLENGTH;
 776			}
 777
 778			if(m_wordSize%2==0){
 779				m_wordSize--;
 780			}
 781			if(m_rank==MASTER_RANK){
 782				cout<<endl;
 783				cout<<"-k (to set the k-mer size)"<<endl;
 784				cout<<" Value: "<<m_wordSize<<endl;
 785				cout<<endl;
 786			}
 787		}else if(writeKmers.count(token)>0){
 788			m_writeKmers=true;
 789			if(m_rank==MASTER_RANK){
 790				cout<<endl;
 791				cout<<"Ray will write k-mers ("<<token<<")"<<endl;
 792
 793			}
 794		}else if(runProfiler.count(token)>0){
 795			m_profiler=true;
 796			if(m_rank==MASTER_RANK){
 797				printf("Enabling profiler!\n");
 798			}
 799		}else if(debugBubbles.count(token)>0){
 800			m_debugBubbles=true;
 801			if(m_rank==MASTER_RANK){
 802				printf("Enabling bubble debug mode.\n");
 803			}
 804		}else if(colorSpaceMode.count(token)>0){
 805			m_colorSpaceMode=true;
 806			if(m_rank==MASTER_RANK){
 807				cout<<endl;
 808				cout<<"Enabling color-space mode"<<endl;
 809				cout<<"All reads should be in color space."<<endl;
 810			}
 811		}else if(debugSeeds.count(token)>0){
 812			m_debugSeeds=true;
 813			if(m_rank==MASTER_RANK){
 814				printf("Enabling seed debug mode.\n");
 815			}
 816		}else if(showMemory.count(token)>0){
 817			m_showMemoryUsage=true;
 818			if(m_rank==MASTER_RANK){
 819				printf("Enabling memory usage reporting.\n");
 820			}
 821		}else if(showContext.count(token)>0){
 822			m_showEndingContext=true;
 823			if(m_rank==MASTER_RANK){
 824				printf("Ray will show the ending context of extensions.\n");
 825			}
 826		}
 827	}
 828
 829	int maximumNumberOfFiles=MAXIMUM_MESSAGE_SIZE_IN_BYTES/sizeof(uint32_t);
 830	
 831	assert((int)m_singleEndReadsFile.size()<=maximumNumberOfFiles);
 832
 833	LargeCount result=1;
 834	for(int p=0;p<m_wordSize;p++){
 835		result*=4;
 836	}
 837}
 838
 839void Parameters::writeCommandFile(){
 840	ostringstream commandFile;
 841	commandFile<<getPrefix()<<"RayCommand.txt";
 842	ofstream f(commandFile.str().c_str());
 843	f<<"mpiexec -n "<<getSize()<<" Ray \\"<<endl;
 844	for(int i=0;i<(int)m_originalCommands.size();i++){
 845		if(i!=(int)m_originalCommands.size()-1){
 846			f<<" "<<m_originalCommands[i]<<" \\"<<endl;
 847		}else{
 848			f<<" "<<m_originalCommands[i]<<endl;
 849		}
 850	}
 851	f.close();
 852	cout<<"Rank "<<MASTER_RANK<<" wrote "<<commandFile.str()<<endl;
 853	cout<<endl;
 854
 855	cout<<"k-mer length: "<<m_wordSize<<endl;
 856	
 857	if(m_reducerIsActivated){
 858		cout<<"Memory Consumption Reducer is enabled, threshold="<<m_reducerPeriod<<endl;
 859	}
 860	cout<<endl;
 861	cout<<"Output files will be prefixed with "<<getPrefix()<<endl;
 862	cout<<endl;
 863
 864	ostringstream rayRuntime;
 865	rayRuntime<<getPrefix()<<"RayVersion.txt";
 866	ofstream f2(rayRuntime.str().c_str());
 867	f2<<"Ray version: "<<RAY_VERSION<<endl;
 868	f2.close();
 869}
 870
 871void Parameters::constructor(int argc,char**argv,int rank,int size){
 872	m_maximumDistance=0;
 873	m_totalNumberOfSequences=0;
 874
 875	m_rank=rank;
 876	m_size=size;
 877
 878	bool hasCommandFile=false;
 879	if(argc==2){
 880		ifstream f(argv[1]);
 881		hasCommandFile=f;
 882		f.close();
 883	}
 884
 885	if(argc==2&&hasCommandFile){
 886		m_input=argv[1];
 887		loadCommandsFromFile(argv[1]);
 888	}else{
 889		loadCommandsFromArguments(argc,argv);
 890	}
 891	parseCommands();
 892}
 893
 894int Parameters::getRank(){
 895	return m_rank;
 896}
 897
 898bool Parameters::isInitiated(){
 899	return m_initiated;
 900}
 901
 902vector<string> Parameters::getAllFiles(){
 903	vector<string> l;
 904	for(int i=0;i<(int)m_singleEndReadsFile.size();i++)
 905		l.push_back(m_singleEndReadsFile[i]);
 906	return l;
 907}
 908
 909string Parameters::getFile(int file){
 910	return m_singleEndReadsFile[file];
 911}
 912
 913string Parameters::getDirectory(){
 914	return m_directory;
 915}
 916
 917string Parameters::getOutputFile(){
 918	return getPrefix()+"Contigs.fasta";
 919}
 920
 921int Parameters::getMinimumContigLength(){
 922	return m_minimumContigLength;
 923}
 924
 925bool Parameters::isLeftFile(int i){
 926	return m_leftFiles.count(i)>0;
 927}
 928
 929bool Parameters::isRightFile(int i){
 930	return m_rightFiles.count(i)>0;
 931}
 932
 933int Parameters::getLibraryAverageLength(int i,int j){
 934	return m_libraryAverageLength[i][j];
 935}
 936
 937int Parameters::getLibraryStandardDeviation(int i,int j){
 938	return m_libraryDeviation[i][j];
 939}
 940
 941int Parameters::getLibraryMaxAverageLength(int i){
 942	if(m_libraryAverageLength[i].size()==1)
 943		return m_libraryAverageLength[i][0];
 944
 945	int max=0;
 946	for(int j=0;j<(int)m_libraryAverageLength[i].size();j++)
 947		if(m_libraryAverageLength[i][j]>max)
 948			max=m_libraryAverageLength[i][j];
 949	return max;
 950}
 951
 952int Parameters::getLibraryMaxStandardDeviation(int i){
 953	if(m_libraryDeviation[i].size()==1)
 954		return m_libraryDeviation[i][0];
 955
 956	int max=0;
 957	for(int j=0;j<(int)m_libraryDeviation[i].size();j++)
 958		if(m_libraryDeviation[i][j]>max)
 959			max=m_libraryDeviation[i][j];
 960	return max;
 961
 962}
 963
 964bool Parameters::getColorSpaceMode(){
 965	return m_colorSpaceMode;
 966}
 967
 968bool Parameters::useAmos(){
 969	return m_amos;
 970}
 971
 972string Parameters::getInputFile(){
 973	return m_input;
 974}
 975
 976string Parameters::getParametersFile(){
 977	return "Ray-Parameters.txt";
 978}
 979
 980string Parameters::getPrefix(){
 981	ostringstream directory;
 982	directory<<m_prefix<<"/";
 983	return directory.str();
 984}
 985
 986string Parameters::getCoverageDistributionFile(){
 987	return getPrefix()+"CoverageDistribution.txt";
 988}
 989
 990string Parameters::getAmosFile(){
 991	return getPrefix()+"AMOS.afg";
 992}
 993
 994vector<string> Parameters::getCommands(){
 995	return m_commands;
 996}
 997
 998bool Parameters::getError(){
 999	return m_error;
1000}
1001
1002void Parameters::addDistance(int library,int distance,int count){
1003	m_observedDistances[library][distance]+=count;
1004}
1005
1006string Parameters::getLibraryFile(int library){
1007	ostringstream s;
1008	s<<getPrefix();
1009	s<<""<<"Library"<<library<<".txt";
1010	return s.str();
1011}
1012
1013#define WRITE_LIBRARY_OBSERVATIONS
1014
1015void Parameters::computeAverageDistances(){
1016	cout<<endl;
1017	for(map<int,map<int,int> >::iterator i=m_observedDistances.begin();
1018		i!=m_observedDistances.end();i++){
1019		int library=i->first;
1020
1021		if(!isAutomatic(library))
1022			continue;
1023
1024		vector<int> x;
1025		vector<int> y;
1026		string fileName=getLibraryFile(library);
1027		#ifdef WRITE_LIBRARY_OBSERVATIONS
1028		ofstream f(fileName.c_str());
1029		#endif
1030		for(map<int,int>::iterator j=m_observedDistances[library].begin();
1031			j!=m_observedDistances[library].end();j++){
1032			int d=j->first;
1033			int count=j->second;
1034			#ifdef WRITE_LIBRARY_OBSERVATIONS
1035			f<<d<<"\t"<<count<<endl;
1036			#endif
1037			x.push_back(d);
1038			y.push_back(count);
1039		}
1040		#ifdef WRITE_LIBRARY_OBSERVATIONS
1041		f.close();
1042		#endif
1043
1044		vector<int> averages;
1045		vector<int> deviations;
1046		LibraryPeakFinder finder;
1047		finder.findPeaks(&x,&y,&averages,&deviations);
1048	
1049		for(int i=0;i<(int)averages.size();i++)
1050			addLibraryData(library,averages[i],deviations[i]);
1051
1052	}	
1053	cout<<endl;
1054	cout<<endl;
1055
1056	ostringstream fileName;
1057	fileName<<getPrefix();
1058	fileName<<"LibraryStatistics.txt";
1059	ofstream f2(fileName.str().c_str());
1060
1061	f2<<"NumberOfPairedLibraries: "<<m_numberOfLibraries<<endl;
1062	f2<<endl;
1063	for(int i=0;i<(int)m_numberOfLibraries;i++){
1064		int library=i;
1065		string type="Manual";
1066		if(m_automaticLibraries.count(library)>0){
1067			type="Automatic";
1068		}
1069		f2<<"LibraryNumber: "<<library<<endl;
1070		string format="Interleaved,Paired";
1071		vector<int> files=m_libraryFiles[i];
1072		if(files.size()==2){
1073			format="TwoFiles,Paired";
1074		}
1075		f2<<" InputFormat: "<<format<<endl;
1076		f2<<" DetectionType: "<<type<<endl;
1077		f2<<" File: "<<m_singleEndReadsFile[files[0]]<<endl;
1078		f2<<"  NumberOfSequences: "<<m_numberOfSequencesInFile[files[0]]<<endl;
1079		if(files.size()>1){
1080			f2<<" File: "<<m_singleEndReadsFile[files[1]]<<endl;
1081			f2<<"  NumberOfSequences: "<<m_numberOfSequencesInFile[files[1]]<<endl;
1082		}
1083		f2<<" Distribution: "<<getLibraryFile(library)<<endl;
1084		for(int j=0;j<getLibraryPeaks(library);j++){
1085			int average=getLibraryAverageLength(library,j);
1086			int standardDeviation=getLibraryStandardDeviation(library,j);
1087			cout<<"Library # "<<library<<" ("<<type<<") -> average length: "<<average<<" and standard deviation: "<<standardDeviation<<endl;
1088			f2<<" Peak "<<j<<endl;
1089			f2<<"  AverageOuterDistance: "<<average<<endl;
1090			f2<<"  StandardDeviation: "<<standardDeviation<<endl;
1091			if(standardDeviation*2>average){
1092				f2<<"  DetectionFailure: Yes"<<endl;
1093			}
1094		}
1095		f2<<endl;
1096	}
1097	f2.close();
1098}
1099
1100void Parameters::addLibraryData(int library,int average,int deviation){
1101	if(average==0)
1102		return;
1103
1104	m_libraryAverageLength[library].push_back(average);
1105	m_libraryDeviation[library].push_back(deviation);
1106	
1107	int distance=average+4*deviation;
1108	if(distance>m_maximumDistance){
1109		m_maximumDistance=distance;
1110	}
1111}
1112
1113void Parameters::setNumberOfSequences(int file,LargeCount n){
1114	if(m_numberOfSequencesInFile.count(file)==0){
1115		m_numberOfSequencesInFile[file]=n;
1116		m_totalNumberOfSequences+=n;
1117	}
1118}
1119
1120int Parameters::getNumberOfLibraries(){
1121	return m_numberOfLibraries;
1122}
1123
1124LargeCount Parameters::getNumberOfSequences(int file){
1125	#ifdef ASSERT
1126	if(file>=(int)m_numberOfSequencesInFile.size())
1127		cout<<"Error File= "<<file<<" Files: "<<m_numberOfSequencesInFile.size()<<endl;
1128
1129	assert(file<(int)m_numberOfSequencesInFile.size());
1130	#endif
1131	return m_numberOfSequencesInFile[file];
1132}
1133
1134int Parameters::getNumberOfFiles(){
1135	return m_singleEndReadsFile.size();
1136}
1137
1138bool Parameters::isAutomatic(int library){
1139	return m_automaticLibraries.count(library)>0;
1140}
1141
1142int Parameters::getLibrary(int file){
1143	return m_fileLibrary[file];
1144}
1145
1146bool Parameters::isInterleavedFile(int i){
1147	return m_interleavedFiles.count(i)>0;
1148}
1149
1150bool Parameters::showMemoryUsage(){
1151	return m_showMemoryUsage;
1152}
1153
1154string Parameters::getReceivedMessagesFile(){
1155	string outputForMessages=getPrefix()+"ReceivedMessages.txt";
1156	return outputForMessages;
1157}
1158
1159void Parameters::printFinalMessage(){
1160	cout<<"Rank "<<MASTER_RANK<<" wrote library statistics"<<endl;
1161}
1162
1163CoverageDepth Parameters::getMaximumAllowedCoverage(){
1164	CoverageDepth a=0;
1165	a--;
1166	return a;
1167}
1168
1169void Parameters::setPeakCoverage(CoverageDepth a){
1170	if(!m_providedPeakCoverage){
1171		m_peakCoverage=a;
1172	}
1173}
1174
1175void Parameters::setRepeatCoverage(CoverageDepth a){
1176	if(!m_providedRepeatCoverage){
1177		m_repeatCoverage=a;
1178	}
1179}
1180
1181CoverageDepth Parameters::getPeakCoverage(){
1182	return m_peakCoverage;
1183}
1184
1185CoverageDepth Parameters::getRepeatCoverage(){
1186	return m_repeatCoverage;
1187}
1188
1189int Parameters::getSize(){
1190	return m_size;
1191}
1192
1193void Parameters::setSize(int a){
1194	m_size=a;
1195}
1196
1197bool Parameters::runReducer(){
1198	return m_reducerIsActivated;
1199}
1200
1201void Parameters::showOption(string option,string description){
1202	string spacesBeforeOption="       ";
1203	cout<<spacesBeforeOption<<option<<endl;
1204	showOptionDescription(description);
1205}
1206
1207void Parameters::showOptionDescription(string description){
1208	string spacesBeforeDescription="              ";
1209	cout<<spacesBeforeDescription<<description<<endl;
1210}
1211
1212/* obviously shows usage */
1213void Parameters::showUsage(){
1214	string basicSpaces="       ";
1215	cout<<"NAME"<<endl<<basicSpaces<<"Ray - assemble genomes in parallel using the message-passing interface"<<endl<<endl;
1216
1217	cout<<"SYNOPSIS"<<endl;
1218	cout<<basicSpaces<<"mpiexec -np NUMBER_OF_RANKS Ray -k KMERLENGTH -p l1_1.fastq l1_2.fastq -p l2_1.fastq l2_2.fastq -o test"<<endl;
1219	cout<<endl;
1220	cout<<"DESCRIPTION:"<<endl;
1221
1222	cout<<endl;
1223	cout<<"  The Ray genome assembler is built on top of the RayPlatform, a generic plugin-based"<<endl;
1224	cout<<"  distributed and parallel compute engine that uses the message-passing interface"<<endl;
1225	cout<<"  for passing messages."<<endl;
1226	cout<<endl;
1227	cout<<"  Ray targets several applications:"<<endl;
1228	cout<<endl;
1229	cout<<"    - de novo genome assembly"<<endl;
1230	cout<<"    - de novo meta-genome assembly"<<endl;
1231	cout<<"    - de novo transcriptome assembly (works, but not tested a lot)"<<endl;
1232	cout<<"    - quantification of contig abundances"<<endl;
1233	cout<<"    - quantification of microbiome consortia members"<<endl;
1234	cout<<"    - quantification of transcript expression"<<endl;
1235	cout<<"    - taxonomy profiling of samples"<<endl;
1236	cout<<"    - gene ontology profiling of samples"<<endl;
1237	cout<<endl;
1238	showOption("-help","Displays this help page.");
1239	cout<<endl;
1240	showOption("-version","Displays Ray version and compilation options.");
1241	cout<<endl;
1242
1243	cout<<"  K-mer length"<<endl;
1244	cout<<endl;
1245	showOption("-k kmerLength","Selects the length of k-mers. The default value is 21. ");
1246	showOptionDescription("It must be odd because reverse-complement vertices are stored together.");
1247	showOptionDescription("The maximum length is defined at compilation by MAXKMERLENGTH");
1248	showOptionDescription("Larger k-mers utilise more memory.");
1249	cout<<endl;
1250
1251
1252	cout<<"  Inputs"<<endl;
1253	cout<<endl;
1254	showOption("-p leftSequenceFile rightSequenceFile [averageOuterDistance standardDeviation]","Provides two files containing paired-end reads.");
1255	showOptionDescription("averageOuterDistance and standardDeviation are automatically computed if not provided.");
1256	cout<<endl;
1257	showOption("-i interleavedSequenceFile [averageOuterDistance standardDeviation]","Provides one file containing interleaved paired-end reads.");
1258
1259	showOptionDescription("averageOuterDistance and standardDeviation are automatically computed if not provided.");
1260	cout<<endl;
1261
1262	showOption("-s sequenceFile","Provides a file containing single-end reads.");
1263	cout<<endl;
1264
1265	cout<<"  Biological abundances"<<endl;
1266	cout<<endl;
1267	showOption("-search searchDirectory","Provides a directory containing fasta files to be searched in the de Bruijn graph.");
1268	showOptionDescription("Biological abundances will be written to RayOutput/BiologicalAbundances");
1269	showOptionDescription("See Documentation/BiologicalAbundances.txt");
1270	cout<<endl;
1271
1272	showOption("-one-color-per-file", "Sets one color per file instead of one per sequence.");
1273	showOptionDescription("By default, each sequence in each file has a different color.");
1274	showOptionDescription("For files with large numbers of sequences, using one single color per file may be more efficient.");
1275	cout<<endl;
1276
1277	cout<<"  Taxonomic profiling with colored de Bruijn graphs"<<endl;
1278	cout<<endl;
1279	showOption("-with-taxonomy Genome-to-Taxon.tsv TreeOfLife-Edges.tsv Taxon-Names.tsv","Provides a taxonomy.");
1280	showOptionDescription("Computes and writes detailed taxonomic profiles.");
1281	showOptionDescription("See Documentation/Taxonomy.txt for details.");
1282
1283	cout<<endl;
1284	showOption("-gene-ontology OntologyTerms.txt  Annotations.txt","Provides an ontology and annotations.");
1285	showOptionDescription("OntologyTerms.txt is fetched from http://geneontology.org");
1286	showOptionDescription("Annotations.txt is a 2-column file (EMBL_CDS handle	&	gene ontology identifier)");
1287	showOptionDescription("See Documentation/GeneOntology.txt");
1288	
1289
1290	cout<<"  Outputs"<<endl;
1291	cout<<endl;
1292	showOption("-o outputDirectory","Specifies the directory for outputted files. Default is RayOutput");
1293	cout<<endl;
1294
1295	cout<<"  Other outputs"<<endl;
1296	cout<<endl;
1297	showOption("-amos","Writes the AMOS file called RayOutput/AMOS.afg");
1298	showOptionDescription("An AMOS file contains read positions on contigs.");
1299	showOptionDescription("Can be opened with software with graphical user interface.");
1300	cout<<endl;
1301	showOption("-write-kmers","Writes k-mer graph to RayOutput/kmers.txt");
1302	showOptionDescription("The resulting file is not utilised by Ray.");
1303	showOptionDescription("The resulting file is very large.");
1304	cout<<endl;
1305	showOption("-write-read-markers","Writes read markers to disk.");
1306	cout<<endl;
1307	showOption("-write-seeds","Writes seed DNA sequences to RayOutput/Rank<rank>.RaySeeds.fasta");
1308	cout<<endl;
1309	showOption("-write-extensions","Writes extension DNA sequences to RayOutput/Rank<rank>.RayExtensions.fasta");
1310	cout<<endl;
1311	showOption("-write-contig-paths","Writes contig paths with coverage values");
1312	showOptionDescription("to RayOutput/Rank<rank>.RayContigPaths.txt");
1313	cout<<endl;
1314	showOption("-write-marker-summary","Writes marker statistics.");
1315	cout<<endl;
1316
1317	cout<<"  Memory usage"<<endl;
1318	cout<<endl;
1319	showOption("-show-memory-usage","Shows memory usage. Data is fetched from /proc on GNU/Linux");
1320	showOptionDescription("Needs __linux__");
1321	cout<<endl;
1322	showOption("-show-memory-allocations","Shows memory allocation events");
1323	cout<<endl;
1324
1325	cout<<"  Algorithm verbosity"<<endl;
1326	cout<<endl;
1327	showOption("-show-extension-choice","Shows the choice made (with other choices) during the extension.");
1328	cout<<endl;
1329	showOption("-show-ending-context","Shows the ending context of each extension.");
1330	showOptionDescription("Shows the children of the vertex where extension was too difficult.");
1331	cout<<endl;
1332	showOption("-show-distance-summary","Shows summary of outer distances used for an extension path.");
1333	cout<<endl;
1334	showOption("-show-consensus","Shows the consensus when a choice is done.");
1335	cout<<endl;
1336
1337	cout<<"  Assembly options (defaults work well)"<<endl;
1338	cout<<endl;
1339	
1340	showOption("-disable-recycling","Disables read recycling during the assembly");
1341	showOptionDescription("reads will be set free in 3 cases:");
1342	showOptionDescription("1. the distance did not match for a pair");
1343	showOptionDescription("2. the read has not met its mate");
1344	showOptionDescription("3. the library population indicates a wrong placement");
1345	showOptionDescription("see Constrained traversal of repeats with paired sequences.");
1346	showOptionDescription("Sébastien Boisvert, Élénie Godzaridis, François Laviolette & Jacques Corbeil.");
1347	showOptionDescription("First Annual RECOMB Satellite Workshop on Massively Parallel Sequencing, March 26-27 2011, Vancouver, BC, Canada.");
1348	cout<<endl;
1349
1350	showOption("-minimum-contig-length","Changes the minimum contig length, default is 100");
1351	cout<<endl;
1352
1353	showOption("-color-space","Runs in color-space");
1354	showOptionDescription("Needs csfasta files. Activated automatically if csfasta files are provided.");
1355	cout<<endl;
1356
1357/*
1358	showOption("-minimumCoverage minimumCoverage","Sets manually the minimum coverage.");
1359	showOptionDescription("If not provided, it is computed by Ray automatically.");
1360	cout<<endl;
1361	showOption("-peakCoverage peakCoverage","Sets manually the peak coverage.");
1362	showOptionDescription("If not provided, it is computed by Ray automatically.");
1363	cout<<endl;
1364	showOption("-repeatCoverage repeatCoverage","Sets manually the repeat coverage.");
1365	showOptionDescription("If not provided, it is computed by Ray automatically.");
1366	cout<<endl;
1367*/
1368
1369	cout<<"  Checkpointing"<<endl;
1370	cout<<endl;
1371
1372	showOption("-write-checkpoints checkpointDirectory","Write checkpoint files");
1373	cout<<endl;
1374	showOption("-read-checkpoints checkpointDirectory","Read checkpoint files");
1375	cout<<endl;
1376	showOption("-read-write-checkpoints checkpointDirectory","Read and write checkpoint files");
1377	cout<<endl;
1378
1379	cout<<"  Message routing for large number of cores"<<endl;
1380	cout<<endl;
1381	showOption("-route-messages","Enables the Ray message router. Disabled by default.");
1382	showOptionDescription("Messages will be routed accordingly so that any rank can communicate directly with only a few others.");
1383	showOptionDescription("Without -route-messages, any rank can communicate directly with any other rank.");
1384	showOptionDescription("Files generated: Routing/Connections.txt, Routing/Routes.txt and Routing/RelayEvents.txt");
1385	showOptionDescription("and Routing/Summary.txt");
1386	cout<<endl;
1387	showOption("-connection-type type","Sets the connection type for routes.");
1388	showOptionDescription("Accepted values are debruijn, group, random, kautz and complete. Default is debruijn.");
1389	showOptionDescription("With the type debruijn, the number of ranks must be a power of something.");
1390	showOptionDescription("Examples: 256 = 16*16, 512=8*8*8, 49=7*7, and so on.");
1391	showOptionDescription("Otherwise, don't use debruijn routing but use another one");
1392	showOptionDescription("With the type kautz, the number of ranks n must be n=(k+1)*k^(d-1) for some k and d");
1393	cout<<endl;
1394
1395	showOption("-routing-graph-degree degree","Specifies the outgoing degree for the routing graph.");
1396	cout<<endl;
1397
1398	cout<<"  Hardware testing"<<endl;
1399	cout<<endl;
1400	showOption("-test-network-only","Tests the network and returns.");
1401	cout<<endl;
1402	showOption("-write-network-test-raw-data","Writes one additional file per rank detailing the network test.");
1403	cout<<endl;
1404
1405	cout<<"  Debugging"<<endl;
1406	cout<<endl;
1407	showOption("-run-profiler","Runs the profiler as the code runs. By default, only show granularity warnings.");
1408	showOptionDescription("Running the profiler increases running times.");
1409	cout<<endl;
1410	showOption("-with-profiler-details","Shows number of messages sent and received in each methods during in each time slices (epochs). Needs -run-profiler.");
1411	cout<<endl;
1412	showOption("-show-communication-events","Shows all messages sent and received.");
1413	cout<<endl;
1414	showOption("-show-read-placement","Shows read placement in the graph during the extension.");
1415	cout<<endl;
1416	showOption("-debug-bubbles","Debugs bubble code.");
1417	showOptionDescription("Bubbles can be due to heterozygous sites or sequencing errors or other (unknown) events");
1418	cout<<endl;
1419	showOption("-debug-seeds","Debugs seed code.");
1420	showOptionDescription("Seeds are paths in the graph that are likely unique.");
1421	cout<<endl;
1422	showOption("-debug-fusions","Debugs fusion code.");
1423	cout<<endl;
1424	showOption("-debug-scaffolder","Debug the scaffolder.");
1425	cout<<endl;
1426	cout<<endl;
1427
1428
1429	cout<<"FILES"<<endl;
1430	cout<<endl;
1431	cout<<"  Input files"<<endl;
1432	cout<<endl;
1433	cout<<"     Note: file format is determined with file extension."<<endl;
1434	cout<<endl;
1435	cout<<"     .fasta"<<endl;
1436	cout<<"     .fasta.gz (needs HAVE_LIBZ=y at compilation)"<<endl;
1437	cout<<"     .fasta.bz2 (needs HAVE_LIBBZ2=y at compilation)"<<endl;
1438	cout<<"     .fastq"<<endl;
1439	cout<<"     .fastq.gz (needs HAVE_LIBZ=y at compilation)"<<endl;
1440	cout<<"     .fastq.bz2 (needs HAVE_LIBBZ2=y at compilation)"<<endl;
1441	cout<<"     .sff (paired reads must be extracted manually)"<<endl;
1442	cout<<"     .csfasta (color-space reads)"<<endl;
1443
1444
1445	cout<<endl;
1446	cout<<"  Outputted files"<<endl;
1447	cout<<endl;
1448
1449	cout<<"  Scaffolds"<<endl;
1450	cout<<endl;
1451	cout<<"     RayOutput/Scaffolds.fasta"<<endl;
1452	cout<<"     	The scaffold sequences in FASTA format"<<endl;
1453	cout<<"     RayOutput/ScaffoldComponents.txt"<<endl;
1454	cout<<"     	The components of each scaffold"<<endl;
1455	cout<<"     RayOutput/ScaffoldLengths.txt"<<endl;
1456	cout<<"     	The length of each scaffold"<<endl;
1457	cout<<"     RayOutput/ScaffoldLinks.txt"<<endl;
1458	cout<<"     	Scaffold links"<<endl;
1459	cout<<endl;
1460
1461	cout<<"  Contigs"<<endl;
1462	cout<<endl;
1463	cout<<"     RayOutput/Contigs.fasta"<<endl;
1464	cout<<"     	Contiguous sequences in FASTA format"<<endl;
1465	cout<<"     RayOutput/ContigLengths.txt"<<endl;
1466	cout<<"     	The lengths of contiguous sequences"<<endl;
1467	cout<<endl;
1468
1469	cout<<"  Summary"<<endl;
1470	cout<<endl;
1471	cout<<"     RayOutput/OutputNumbers.txt"<<endl;
1472	cout<<"     	Overall numbers for the assembly"<<endl;
1473	cout<<endl;
1474
1475	cout<<"  de Bruijn graph"<<endl;
1476	cout<<endl;
1477	cout<<"     RayOutput/CoverageDistribution.txt"<<endl;
1478	cout<<"     	The distribution of coverage values"<<endl;
1479	cout<<"     RayOutput/CoverageDistributionAnalysis.txt"<<endl;
1480	cout<<"     	Analysis of the coverage distribution"<<endl;
1481	cout<<"     RayOutput/degreeDistribution.txt"<<endl;
1482	cout<<"     	Distribution of ingoing and outgoing degrees"<<endl;
1483	cout<<"     RayOutput/kmers.txt"<<endl;
1484	cout<<"     	k-mer graph, required option: -write-kmers"<<endl;
1485	cout<<"         The resulting file is not utilised by Ray."<<endl;
1486	cout<<"         The resulting file is very large."<<endl;
1487	cout<<endl;
1488	
1489	cout<<"  Assembly steps"<<endl;
1490	cout<<endl;
1491	cout<<"     RayOutput/SeedLengthDistribution.txt"<<endl;
1492	cout<<"         Distribution of seed length"<<endl;
1493	cout<<"     RayOutput/Rank<rank>.OptimalReadMarkers.txt"<<endl;
1494	cout<<"         Read markers."<<endl;
1495	cout<<"     RayOutput/Rank<rank>.RaySeeds.fasta"<<endl;
1496	cout<<"         Seed DNA sequences, required option: -write-seeds"<<endl;
1497	cout<<"     RayOutput/Rank<rank>.RayExtensions.fasta"<<endl;
1498	cout<<"         Extension DNA sequences, required option: -write-extensions"<<endl;
1499	cout<<"     RayOutput/Rank<rank>.RayContigPaths.txt"<<endl;
1500	cout<<"         Contig paths with coverage values, required option: -write-contig-paths"<<endl;
1501	cout<<endl;
1502
1503	cout<<"  Paired reads"<<endl;
1504	cout<<endl;
1505	cout<<"     RayOutput/LibraryStatistics.txt"<<endl;
1506	cout<<"     	Estimation of outer distances for paired reads"<<endl;
1507	cout<<"     RayOutput/Library<LibraryNumber>.txt"<<endl;
1508	cout<<"         Frequencies for observed outer distances (insert size + read lengths)"<<endl;
1509	cout<<endl;
1510
1511	cout<<"  Partition"<<endl;
1512	cout<<endl;
1513	cout<<"     RayOutput/NumberOfSequences.txt"<<endl;
1514	cout<<"         Number of reads in each file"<<endl;
1515	cout<<"     RayOutput/SequencePartition.txt"<<endl;
1516	cout<<"     	Sequence partition"<<endl;
1517	cout<<endl;
1518
1519	cout<<"  Ray software"<<endl;
1520	cout<<endl;
1521	cout<<"     RayOutput/RayVersion.txt"<<endl;
1522	cout<<"     	The version of Ray"<<endl;
1523	cout<<"     RayOutput/RayCommand.txt"<<endl;
1524	cout<<"     	The exact same command provided "<<endl;
1525	cout<<endl;
1526
1527	cout<<"  AMOS"<<endl;
1528	cout<<endl;
1529	cout<<"     RayOutput/AMOS.afg"<<endl;
1530	cout<<"     	Assembly representation in AMOS format, required option: -amos"<<endl;
1531	cout<<endl;
1532
1533
1534	cout<<"  Communication"<<endl;
1535	cout<<endl;
1536	cout<<"     RayOutput/MessagePassingInterface.txt"<<endl;
1537	cout<<"	    	Number of messages sent"<<endl;
1538	cout<<"     RayOutput/NetworkTest.txt"<<endl;
1539	cout<<"	    	Latencies in microseconds"<<endl;
1540	cout<<"     RayOutput/Rank<rank>NetworkTestData.txt"<<endl;
1541	cout<<"	    	Network test raw data"<<endl;
1542	cout<<endl;
1543
1544	cout<<"DOCUMENTATION"<<endl;
1545	cout<<endl;
1546	cout<<basicSpaces<<"This help page (always up-to-date)"<<endl;
1547	cout<<basicSpaces<<"Manual (Portable Document Format): InstructionManual.pdf"<<endl;
1548	cout<<basicSpaces<<"Mailing list archives: http://sourceforge.net/mailarchive/forum.php?forum_name=denovoassembler-users"<<endl;
1549	cout<<endl;
1550	cout<<"AUTHOR"<<endl;
1551	cout<<basicSpaces<<"Written by Sébastien Boisvert."<<endl;
1552	cout<<endl;
1553	cout<<"REPORTING BUGS"<<endl;
1554	cout<<basicSpaces<<"Report bugs to denovoassembler-users@lists.sourceforge.net"<<endl;
1555	cout<<basicSpaces<<"Home page: <http://denovoassembler.sourceforge.net/>"<<endl;
1556	cout<<endl;
1557	cout<<"COPYRIGHT"<<endl;
1558	cout<<basicSpaces<<"This program is free software: you can redistribute it and/or modify"<<endl;
1559	cout<<basicSpaces<<"it under the terms of the GNU General Public License as published by"<<endl;
1560	cout<<basicSpaces<<"the Free Software Foundation, version 3 of the License."<<endl;
1561
1562	cout<<endl;
1563	cout<<basicSpaces<<"This program is distributed in the hope that it will be useful,"<<endl;
1564	cout<<basicSpaces<<"but WITHOUT ANY WARRANTY; without even the implied warranty of"<<endl;
1565	cout<<basicSpaces<<"MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the"<<endl;
1566	cout<<basicSpaces<<"GNU General Public License for more details."<<endl;
1567
1568	cout<<endl;
1569	cout<<basicSpaces<<"You have received a copy of the GNU General Public License"<<endl;
1570	cout<<basicSpaces<<"along with this program (see LICENSE)."<<endl;
1571
1572	cout<<endl;
1573	cout<<"Ray "<<RAY_VERSION<<endl;
1574}
1575
1576/* get the prefix for mmap'ed files 
1577 * currently not used. */
1578string Parameters::getMemoryPrefix(){
1579	return m_memoryFilePrefix;
1580}
1581
1582int Parameters::getReducerValue(){
1583	return m_reducerPeriod;
1584}
1585
1586Rank Parameters::getRankFromGlobalId(ReadHandle a){
1587	uint64_t elementsPerRank=m_totalNumberOfSequences/m_size;
1588	Rank rank=a/elementsPerRank;
1589
1590	if(rank >= m_size){
1591		rank=m_size-1;
1592	}
1593
1594	#ifdef ASSERT
1595
1596	assert(rank>=0);
1597
1598	if(rank>=m_size){
1599		cout<<"GlobalIdentifier="<<a<<" Total="<<m_totalNumberOfSequences<<" Size="<<m_size<<" Rank="<<rank<<endl;
1600	}
1601
1602	assert(rank<m_size);
1603	#endif
1604
1605	return rank;
1606}
1607
1608int Parameters::getIdFromGlobalId(ReadHandle a){
1609	int bin=getRankFromGlobalId(a);
1610	LargeCount x=m_totalNumberOfSequences/m_size;
1611	return a-bin*x;
1612}
1613
1614int Parameters::getMaximumDistance(){
1615	return m_maximumDistance;
1616}
1617
1618uint64_t Parameters::getGlobalIdFromRankAndLocalId(Rank rank,int id){
1619	uint64_t x=m_totalNumberOfSequences/m_size;
1620	return rank*x+id;
1621}
1622
1623CoverageDepth Parameters::getMinimumCoverage(){
1624	return m_minimumCoverage;
1625}
1626
1627void Parameters::setMinimumCoverage(CoverageDepth a){
1628	if(!m_providedMinimumCoverage)
1629		m_minimumCoverage=a;
1630}
1631
1632Kmer Parameters::_complementVertex(Kmer*a){
1633	return a->complementVertex(m_wordSize,m_colorSpaceMode);
1634}
1635
1636bool Parameters::hasPairedReads(){
1637	return m_numberOfLibraries!=0;
1638}
1639
1640int Parameters::_vertexRank(Kmer*a){
1641	return a->vertexRank(m_size,m_wordSize,m_colorSpaceMode);
1642}
1643
1644string Parameters::getScaffoldFile(){
1645	ostringstream a;
1646	a<<getPrefix()<<"Scaffolds.fasta";
1647	return a.str();
1648}
1649
1650int Parameters::getColumns(){
1651	return 60;
1652}
1653
1654int Parameters::getLargeContigThreshold(){
1655	return 500;
1656}
1657
1658bool Parameters::showMemoryAllocations(){
1659	return m_showMemoryAllocations;
1660}
1661
1662bool Parameters::writeKmers(){
1663	return m_writeKmers;
1664}
1665
1666void Parameters::fileNameHook(string fileName){
1667	if(fileName.find(".csfasta")!=string::npos){
1668		if(!m_colorSpaceMode&&m_rank==MASTER_RANK){
1669			cout<<endl;
1670			cout<<"Enabling color-space mode"<<endl;
1671			cout<<"All reads should be in color space."<<endl;
1672		}
1673		m_colorSpaceMode=true;
1674	}
1675}
1676
1677CoverageDepth Parameters::getMinimumCoverageToStore(){
1678	return 2;
1679}
1680
1681int Parameters::getLibraryPeaks(int library){
1682	return m_libraryAverageLength[library].size();
1683}
1684
1685bool Parameters::hasOption(string a){
1686	return m_options.count(a)>0;
1687}
1688
1689bool Parameters::hasFile(const char*file){
1690	ifstream f(file);
1691	bool fileIsOk=f.good();
1692	f.close();
1693	return fileIsOk;
1694}
1695
1696string Parameters::getCheckpointFile(const char*checkpointName){
1697	ostringstream a;
1698	a<<m_checkpointDirectory<<"/";
1699	a<<"Rank"<<getRank()<<".Checkpoint."<<checkpointName<<".ray";
1700	return a.str();
1701}
1702
1703bool Parameters::hasCheckpoint(const char*checkpointName){
1704	//cout<<"hasCheckpoint? "<<checkpointName<<endl;
1705
1706	if(!readCheckpoints())
1707		return false;
1708
1709	return hasFile(getCheckpointFile(checkpointName).c_str());
1710}
1711
1712bool Parameters::writeCheckpoints(){
1713	if(hasOption("-write-checkpoints"))
1714		return true;
1715
1716	if(hasOption("-read-write-checkpoints"))
1717		return true;
1718	return false;
1719}
1720
1721bool Parameters::readCheckpoints(){
1722	if(hasOption("-read-checkpoints"))
1723		return true;
1724	if(hasOption("-read-write-checkpoints"))
1725		return true;
1726	return false;
1727}
1728
1729bool Parameters::showCommunicationEvents(){
1730	return m_showCommunicationEvents;
1731}
1732
1733bool Parameters::showReadPlacement(){
1734	return m_showReadPlacement;
1735}
1736
1737string Parameters::getConnectionType(){
1738	return m_connectionType;
1739}
1740
1741int Parameters::getRoutingDegree(){
1742	return m_degree;
1743}
1744
1745vector<string>*Parameters::getSearchDirectories(){
1746	return &m_searchDirectories;
1747}
1748
1749string Parameters::getGenomeToTaxonFile(){
1750	return m_genomeToTaxonFile;
1751}
1752
1753string Parameters::getTreeFile(){
1754	return m_treeFile;
1755}
1756
1757string Parameters::getTaxonNameFile(){
1758	return m_taxonNameFile;
1759}
1760
1761string Parameters::getSampleName(){
1762	string sample=getPrefix();
1763
1764	return getBaseName(sample);
1765}
1766
1767string Parameters::getBaseName(string directory){
1768
1769	int last=directory.length()-1;
1770	while(last>0 && isDirectorySeparator(directory[last])){
1771		last--;
1772	}
1773
1774	int first=last;
1775	while(first>0 && !isDirectorySeparator(directory[first])){
1776		first--;
1777	}
1778
1779	if(isDirectorySeparator(directory[first])){
1780		first++;
1781	}
1782
1783	int length=last-first+1;
1784
1785	return directory.substr(first,length);
1786}
1787
1788bool Parameters::isDirectorySeparator(char a){
1789	/* POSIX or Redmond separator */
1790	return a=='/' || a=='\\';
1791}