PageRenderTime 67ms CodeModel.GetById 28ms RepoModel.GetById 1ms app.codeStats 0ms

/code/Mock/Parameters.cpp

https://bitbucket.org/sebhtml/ray
C++ | 2404 lines | 1845 code | 478 blank | 81 comment | 193 complexity | 66b2b2a3ee1127d81cdee009aaa2eede MD5 | raw file
Possible License(s): GPL-3.0

Large files files are truncated, but you can click here to view the full file

  1. /*
  2. Ray -- Parallel genome assemblies for parallel DNA sequencing
  3. Copyright (C) 2010, 2011, 2012, 2013 Sébastien Boisvert
  4. http://DeNovoAssembler.SourceForge.Net/
  5. This program is free software: you can redistribute it and/or modify
  6. it under the terms of the GNU General Public License as published by
  7. the Free Software Foundation, version 3 of the License.
  8. This program is distributed in the hope that it will be useful,
  9. but WITHOUT ANY WARRANTY; without even the implied warranty of
  10. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  11. GNU General Public License for more details.
  12. You have received a copy of the GNU General Public License
  13. along with this program (gpl-3.0.txt).
  14. see <http://www.gnu.org/licenses/>
  15. */
  16. /*
  17. * TODO: add option -minimumScaffoldLength
  18. */
  19. #include "Parameters.h"
  20. #include "common_functions.h"
  21. #include <code/Library/LibraryPeakFinder.h>
  22. #include <code/SequencesLoader/Read.h>
  23. #include <code/SequencesLoader/Loader.h>
  24. #include <code/SequencesLoader/ReadHandle.h>
  25. #include <code/SequencesLoader/SequenceFileDetector.h>
  26. #include <RayPlatform/memory/MyAllocator.h>
  27. #include <RayPlatform/core/OperatingSystem.h>
  28. #include <RayPlatform/core/types.h> /* for CONFIG_MINI_RANKS */
  29. #include <string>
  30. #include <sstream>
  31. #include <iostream>
  32. #include <vector>
  33. #include <cstdlib>
  34. #include <fstream>
  35. #include <assert.h>
  36. #include <math.h>
  37. using namespace std;
  38. void Parameters::getIndexes(int count,vector<int>*out){
  39. vector<int> numbers;
  40. for(int i=0;i<count;i++)
  41. numbers.push_back(i);
  42. srand(99);
  43. while(numbers.size()>0){
  44. int randomIndex=rand()%numbers.size();
  45. int index=numbers[randomIndex];
  46. out->push_back(index);
  47. vector<int> newNumbers;
  48. for(int i=0;i<(int)numbers.size();i++){
  49. if(randomIndex==i)
  50. continue;
  51. newNumbers.push_back(numbers[i]);
  52. }
  53. numbers=newNumbers;
  54. }
  55. }
  56. Parameters::Parameters(){
  57. m_providedPeakCoverage=false;
  58. m_providedRepeatCoverage=false;
  59. m_providedMinimumCoverage=false;
  60. m_prefix="RayOutput";
  61. m_initiated=false;
  62. m_showMemoryAllocations=false;
  63. m_directory="assembly";
  64. m_minimumContigLength=100;
  65. m_wordSize=21;
  66. m_colorSpaceMode=false;
  67. m_reducerIsActivated=false;
  68. m_amos=false;
  69. m_error=false;
  70. m_memoryFilePrefix=m_prefix;
  71. m_profiler=false;
  72. m_debugBubbles=false;
  73. m_debugSeeds=false;
  74. m_showMemoryUsage=true;
  75. m_showEndingContext=false;
  76. m_writeKmers=false;
  77. m_showExtensionChoice=false;
  78. m_showReadPlacement=false;
  79. /** use the new NovaEngine (TM) */
  80. m_options.insert("-use-NovaEngine");
  81. m_checkpointDirectory="Checkpoints";
  82. m_hasCheckpointDirectory=false;
  83. m_maximumSeedCoverage=getMaximumAllowedCoverage();
  84. }
  85. bool Parameters::showExtensionChoice(){
  86. return m_showExtensionChoice;
  87. }
  88. bool Parameters::showEndingContext(){
  89. return m_showEndingContext;
  90. }
  91. bool Parameters::debugBubbles(){
  92. return m_debugBubbles;
  93. }
  94. bool Parameters::runProfiler(){
  95. return m_profiler;
  96. }
  97. int Parameters::getWordSize(){
  98. return m_wordSize;
  99. }
  100. void Parameters::loadCommandsFromFile(char*file){
  101. // first, load the configuration into a stream
  102. ostringstream content;
  103. #define __BLOCK_SIZE 4096
  104. ifstream f(file);
  105. while(!f.eof()){
  106. char buffer[__BLOCK_SIZE];
  107. f.read(buffer,__BLOCK_SIZE);
  108. int i=0;
  109. int characterRead=f.gcount();
  110. while(i<characterRead)
  111. content<<buffer[i++];
  112. }
  113. f.close();
  114. #undef __BLOCK_SIZE
  115. ostringstream withoutComments;
  116. m_configurationContent=content.str();
  117. bool insideComment=false;
  118. int total=m_configurationContent.length();
  119. int i=0;
  120. char beginComment='#';
  121. char endComment='\n';
  122. while(i<total){
  123. char data=m_configurationContent[i];
  124. if(data==beginComment && !insideComment){
  125. insideComment=true;
  126. }else if(data==endComment && insideComment){
  127. insideComment=false;
  128. }else if(!insideComment){
  129. withoutComments<<data;
  130. }
  131. i++;
  132. }
  133. string withoutCommentsString=withoutComments.str();
  134. //#define __debug_comments
  135. #ifdef __debug_comments
  136. cout<<"<Original>"<<endl;
  137. cout<<m_configurationContent;
  138. cout<<"</Original>"<<endl;
  139. cout<<"<Without comments>"<<endl;
  140. cout<<withoutCommentsString;
  141. cout<<"</Without comments>"<<endl;
  142. #endif
  143. istringstream realData(withoutCommentsString);
  144. while(1){
  145. string token="";
  146. realData>>token;
  147. if(token=="")
  148. break;
  149. m_commands.push_back(token);
  150. }
  151. }
  152. void Parameters::loadCommandsFromArguments(int argc,char**argv){
  153. for(int i=1;i<argc;i++){
  154. m_commands.push_back(argv[i]);
  155. }
  156. }
  157. /* parse commands */
  158. void Parameters::parseCommands(){
  159. //cout << "DEBUG Parameters::parseCommands" << endl;
  160. m_initiated=true;
  161. set<string> commands;
  162. for(int i=0;i<(int)m_commands.size();i++)
  163. m_options.insert(m_commands[i]);
  164. m_showCommunicationEvents=false;
  165. if(hasOption("-show-communication-events"))
  166. m_showCommunicationEvents=true;
  167. /*
  168. if(hasOption("-test-network-only")){
  169. m_options.insert("-write-network-test-raw-data");
  170. }
  171. */
  172. if(hasOption("-show-read-placement")){
  173. m_showReadPlacement=true;
  174. }
  175. m_originalCommands=m_commands;
  176. #if 0
  177. __shuffleOperationCodes();
  178. #endif
  179. set<string> singleReadsCommands;
  180. singleReadsCommands.insert("-s");
  181. singleReadsCommands.insert("LoadSingleEndReads");
  182. singleReadsCommands.insert("-LoadSingleEndReads");
  183. singleReadsCommands.insert("--LoadSingleEndReads");
  184. set<string> pairedReadsCommands;
  185. pairedReadsCommands.insert("-p");
  186. pairedReadsCommands.insert("LoadPairedEndReads");
  187. pairedReadsCommands.insert("-LoadPairedEndReads");
  188. pairedReadsCommands.insert("--LoadPairedEndReads");
  189. set<string> interleavedCommands;
  190. interleavedCommands.insert("-i");
  191. set<string> colorSpaceMode;
  192. colorSpaceMode.insert("-color-space");
  193. set<string> outputAmosCommands;
  194. outputAmosCommands.insert("-a");
  195. outputAmosCommands.insert("-amos");
  196. outputAmosCommands.insert("--amos");
  197. outputAmosCommands.insert("--output-amos");
  198. outputAmosCommands.insert("-OutputAmosFile");
  199. outputAmosCommands.insert("--OutputAmosFile");
  200. set<string> showMalloc;
  201. showMalloc.insert("-show-memory-allocations");
  202. set<string> outputFileCommands;
  203. outputFileCommands.insert("-o");
  204. outputFileCommands.insert("-output");
  205. outputFileCommands.insert("-OutputFile");
  206. outputFileCommands.insert("--OutputFile");
  207. set<string> memoryMappedFileCommands;
  208. memoryMappedFileCommands.insert("-MemoryPrefix");
  209. set<string> kmerSetting;
  210. kmerSetting.insert("-k");
  211. set<string> routingDegree;
  212. routingDegree.insert("-routing-graph-degree");
  213. set<string> connectionType;
  214. connectionType.insert("-connection-type");
  215. set<string> reduceMemoryUsage;
  216. reduceMemoryUsage.insert("-r");
  217. set<string> showMemory;
  218. showMemory.insert("-show-memory-usage");
  219. showMemory.insert("--show-memory-usage");
  220. set<string> debugBubbles;
  221. debugBubbles.insert("-debug-bubbles");
  222. debugBubbles.insert("--debug-bubbles");
  223. set<string> runProfiler;
  224. runProfiler.insert("-run-profiler");
  225. runProfiler.insert("--run-profiler");
  226. set<string> showContext;
  227. showContext.insert("-show-ending-context");
  228. showContext.insert("--show-ending-context");
  229. set<string> showExtensionChoiceOption;
  230. showExtensionChoiceOption.insert("-show-extension-choice");
  231. set<string> writeKmers;
  232. writeKmers.insert("-write-kmers");
  233. set<string> setMinimumCoverage;
  234. set<string> setPeakCoverage;
  235. set<string> setRepeatCoverage;
  236. setMinimumCoverage.insert("-minimumCoverage");
  237. setPeakCoverage.insert("-peakCoverage");
  238. setRepeatCoverage.insert("-repeatCoverage");
  239. set<string> minimumContigLength;
  240. minimumContigLength.insert("-minimum-contig-length");
  241. set<string> searchOption;
  242. searchOption.insert("-search");
  243. set<string> phylogeny;
  244. phylogeny.insert("-with-taxonomy");
  245. set<string> coloringOneColor;
  246. coloringOneColor.insert("-one-color-per-file");
  247. set<string> ontology;
  248. ontology.insert("-gene-ontology");
  249. set<string> checkpoints;
  250. checkpoints.insert("-read-write-checkpoints");
  251. checkpoints.insert("-write-checkpoints");
  252. checkpoints.insert("-read-checkpoints");
  253. set<string> maximumSeedCoverage;
  254. maximumSeedCoverage.insert("-use-maximum-seed-coverage");
  255. set<string> detectSequenceFiles;
  256. detectSequenceFiles.insert("-detect-sequence-files");
  257. vector<set<string> > toAdd;
  258. toAdd.push_back(checkpoints);
  259. toAdd.push_back(coloringOneColor);
  260. toAdd.push_back(ontology);
  261. toAdd.push_back(phylogeny);
  262. toAdd.push_back(minimumContigLength);
  263. toAdd.push_back(showExtensionChoiceOption);
  264. toAdd.push_back(setRepeatCoverage);
  265. toAdd.push_back(setPeakCoverage);
  266. toAdd.push_back(setMinimumCoverage);
  267. toAdd.push_back(singleReadsCommands);
  268. toAdd.push_back(pairedReadsCommands);
  269. toAdd.push_back(outputAmosCommands);
  270. toAdd.push_back(outputFileCommands);
  271. toAdd.push_back(kmerSetting);
  272. toAdd.push_back(routingDegree);
  273. toAdd.push_back(interleavedCommands);
  274. toAdd.push_back(searchOption);
  275. toAdd.push_back(reduceMemoryUsage);
  276. toAdd.push_back(memoryMappedFileCommands);
  277. toAdd.push_back(connectionType);
  278. toAdd.push_back(showMemory);
  279. toAdd.push_back(debugBubbles);
  280. toAdd.push_back(runProfiler);
  281. toAdd.push_back(showContext);
  282. toAdd.push_back(showMalloc);
  283. toAdd.push_back(writeKmers);
  284. toAdd.push_back(colorSpaceMode);
  285. toAdd.push_back(maximumSeedCoverage);
  286. toAdd.push_back(detectSequenceFiles);
  287. for(int i=0;i<(int)toAdd.size();i++){
  288. for(set<string>::iterator j=toAdd[i].begin();j!=toAdd[i].end();j++){
  289. commands.insert(*j);
  290. }
  291. }
  292. m_numberOfLibraries=0;
  293. bool providedMemoryPrefix=false;
  294. // expand user command
  295. vector<string> & extraCommands = m_commands;
  296. //cout << "DEBUG adding extraCommands" << endl;
  297. for(int i=0;i<(int)m_commands.size();i++){
  298. string token=m_commands[i];
  299. if(detectSequenceFiles.count(token) > 0) {
  300. i++;
  301. int items = m_commands.size() - i;
  302. if(items < 1) {
  303. if(m_rank == MASTER_RANK)
  304. cout << "Error: " << token << " needs 1 item, you provided " << items << endl;
  305. m_error = true;
  306. return;
  307. }
  308. string directory = m_commands[i];
  309. SequenceFileDetector sequenceFileDetector;
  310. sequenceFileDetector.detectSequenceFiles(directory);
  311. for(int i = 0 ; i < (int)sequenceFileDetector.getLeftFiles().size() ; ++i) {
  312. string & leftFile = sequenceFileDetector.getLeftFiles()[i];
  313. string & rightFile = sequenceFileDetector.getRightFiles()[i];
  314. extraCommands.push_back("LoadPairedEndReads");
  315. extraCommands.push_back(leftFile);
  316. extraCommands.push_back(rightFile);
  317. }
  318. for(int i = 0; i < (int)sequenceFileDetector.getSingleFiles().size() ; ++i) {
  319. string & singleFile = sequenceFileDetector.getSingleFiles()[i];
  320. extraCommands.push_back("LoadSingleEndReads");
  321. extraCommands.push_back(singleFile);
  322. }
  323. }
  324. }
  325. // now parse the commands.
  326. for(int i=0;i<(int)m_commands.size();i++){
  327. string token=m_commands[i];
  328. if(singleReadsCommands.count(token)>0){
  329. i++;
  330. int items=m_commands.size()-i;
  331. if(items<1){
  332. if(m_rank==MASTER_RANK){
  333. cout<<"Error: "<<token<<" needs 1 item, you provided only "<<items<<endl;
  334. }
  335. m_error=true;
  336. return;
  337. }
  338. token=m_commands[i];
  339. m_singleEndReadsFile.push_back(token);
  340. fileNameHook(token);
  341. if(m_rank==MASTER_RANK){
  342. cout<<endl;
  343. cout<<"-s (single sequences)"<<endl;
  344. cout<<" Sequences: "<<token<<endl;
  345. }
  346. }else if(memoryMappedFileCommands.count(token)>0){
  347. i++;
  348. int items=m_commands.size()-i;
  349. if(items<1){
  350. if(m_rank==MASTER_RANK){
  351. cout<<"Error: "<<token<<" needs 1 item, you provided "<<items<<endl;
  352. }
  353. m_error=true;
  354. return;
  355. }
  356. token=m_commands[i];
  357. m_memoryFilePrefix=token;
  358. providedMemoryPrefix=true;
  359. }else if(checkpoints.count(token)>0){
  360. i++;
  361. int items=m_commands.size()-i;
  362. if(items<1){
  363. if(m_rank==MASTER_RANK){
  364. cout<<"Error: "<<token<<" needs 1 item, you provided "<<items<<endl;
  365. }
  366. m_error=true;
  367. return;
  368. }
  369. token=m_commands[i];
  370. if(m_hasCheckpointDirectory){
  371. cout<<"Warning: can not set already set checkpoint directory."<<endl;
  372. continue;
  373. }
  374. m_checkpointDirectory=token;
  375. m_hasCheckpointDirectory=true;
  376. cout<<"Rank "<<m_rank<<" checkpoint directory: "<<m_checkpointDirectory;
  377. cout << endl;
  378. if(m_rank==MASTER_RANK){
  379. if(!fileExists(m_checkpointDirectory.c_str())){
  380. createDirectory(m_checkpointDirectory.c_str());
  381. }
  382. }
  383. }else if(outputFileCommands.count(token)>0){
  384. i++;
  385. int items=m_commands.size()-i;
  386. if(items<1){
  387. if(m_rank==MASTER_RANK){
  388. cout<<"Error: "<<token<<" needs 1 item, you provided "<<items<<endl;
  389. }
  390. m_error=true;
  391. return;
  392. }
  393. token=m_commands[i];
  394. m_prefix=token;
  395. if(!providedMemoryPrefix){
  396. m_memoryFilePrefix=m_prefix;
  397. }
  398. }else if(interleavedCommands.count(token)>0){
  399. // make sure there is at least 4 elements left.
  400. int items=0;
  401. int k=0;
  402. for(int j=i+1;j<(int)m_commands.size();j++){
  403. string cmd=m_commands[j];
  404. if(commands.count(cmd)==0 && cmd[0]!='-'){
  405. items++;
  406. }else{
  407. break;
  408. }
  409. k++;
  410. }
  411. if(items!=1 && items!=3){
  412. if(m_rank==MASTER_RANK){
  413. cout<<"Error: "<<token<<" needs 1 or 3 items, you provided "<<items<<endl;
  414. }
  415. m_error=true;
  416. return;
  417. }
  418. i++;
  419. token=m_commands[i];
  420. string interleavedFile=token;
  421. int interleavedFileIndex=m_singleEndReadsFile.size();
  422. m_interleavedFiles.insert(interleavedFileIndex);
  423. m_singleEndReadsFile.push_back(interleavedFile);
  424. fileNameHook(interleavedFile);
  425. int meanFragmentLength=0;
  426. int standardDeviation=0;
  427. #ifdef ASSERT
  428. assert(items==1 or items==3);
  429. #endif
  430. if(m_rank==MASTER_RANK){
  431. cout<<endl;
  432. cout<<"Paired library # "<<m_numberOfLibraries<<endl;
  433. cout<<" -i (paired-end interleaved sequences)"<<endl;
  434. cout<<" Sequences: "<<token<<endl;
  435. }
  436. if(items==3){
  437. i++;
  438. token=m_commands[i];
  439. meanFragmentLength=atoi(token.c_str());
  440. i++;
  441. token=m_commands[i];
  442. standardDeviation=atoi(token.c_str());
  443. if(m_rank==MASTER_RANK){
  444. cout<<" Average length: "<<meanFragmentLength<<endl;
  445. cout<<" Standard deviation: "<<standardDeviation<<endl;
  446. }
  447. int distance=meanFragmentLength+standardDeviation;
  448. if(distance>m_maximumDistance){
  449. m_maximumDistance=distance;
  450. }
  451. }else if(items==1){// automatic detection.
  452. map<int,int> t;
  453. m_automaticLibraries.insert(m_numberOfLibraries);
  454. if(m_rank==MASTER_RANK){
  455. cout<<" Average length: automatic detection"<<endl;
  456. cout<<" Standard deviation: automatic detection"<<endl;
  457. }
  458. }else{
  459. #ifdef ASSERT
  460. assert(false);
  461. #endif
  462. }
  463. m_fileLibrary[interleavedFileIndex]=m_numberOfLibraries;
  464. vector<int> files;
  465. files.push_back(interleavedFileIndex);
  466. m_libraryFiles.push_back(files);
  467. addLibraryData(m_numberOfLibraries,meanFragmentLength,standardDeviation);
  468. m_numberOfLibraries++;
  469. }else if(maximumSeedCoverage.count(token)>0){
  470. i++;
  471. int items=m_commands.size()-i;
  472. if(items<1){
  473. if(m_rank==MASTER_RANK){
  474. cout<<"Error: "<<token<<" needs 1 item, you provided "<<items<<endl;
  475. }
  476. m_error=true;
  477. return;
  478. }
  479. token=m_commands[i];
  480. m_maximumSeedCoverage=atoi(token.c_str());
  481. cout<<"[Parameters] changed the maximum seed coverage depth to ";
  482. cout<<m_maximumSeedCoverage<<endl;
  483. }else if(pairedReadsCommands.count(token)>0){
  484. // make sure there is at least 4 elements left.
  485. int items=0;
  486. int k=0;
  487. for(int j=i+1;j<(int)m_commands.size();j++){
  488. string cmd=m_commands[j];
  489. if(commands.count(cmd)==0 && cmd[0]!='-'){
  490. items++;
  491. }else{
  492. break;
  493. }
  494. k++;
  495. }
  496. if(items!=2 && items!=4){
  497. if(m_rank==MASTER_RANK){
  498. cout<<"Error: "<<token<<" needs 2 or 4 items, you provided "<<items<<endl;
  499. }
  500. m_error=true;
  501. return;
  502. }
  503. i++;
  504. token=m_commands[i];
  505. string left=token;
  506. // add left file
  507. int leftFile=m_singleEndReadsFile.size();
  508. m_leftFiles.insert(leftFile);
  509. m_singleEndReadsFile.push_back(left);
  510. i++;
  511. token=m_commands[i];
  512. // add right file
  513. string right=token;
  514. int rightFile=m_singleEndReadsFile.size();
  515. m_rightFiles.insert(rightFile);
  516. m_singleEndReadsFile.push_back(right);
  517. fileNameHook(left);
  518. fileNameHook(right);
  519. int meanFragmentLength=0;
  520. int standardDeviation=0;
  521. #ifdef ASSERT
  522. assert(items==4 or items==2);
  523. #endif
  524. if(m_rank==MASTER_RANK){
  525. cout<<endl;
  526. cout<<"Paired library # "<<m_numberOfLibraries<<endl;
  527. cout<<" -p (paired-end sequences)"<<endl;
  528. cout<<" Left sequences: "<<left<<endl;
  529. cout<<" Right sequences: "<<right<<endl;
  530. }
  531. bool gotValuesFromEndUser=false;
  532. /* try to use what the user provided */
  533. if(items==4){
  534. if(!isValidInteger(m_commands[i+1].c_str())
  535. || !isValidInteger(m_commands[i+2].c_str())){
  536. if(m_rank==MASTER_RANK){
  537. cout<<"Warning: invalid integer values for distances, you provided: -p ";
  538. cout<<left<<" "<<right<<" "<<m_commands[i+1];
  539. cout<<" "<<m_commands[i+2];
  540. cout<<endl;
  541. }
  542. i+=2; /* consume the data */
  543. }else{
  544. i++;
  545. token=m_commands[i];
  546. meanFragmentLength=atoi(token.c_str());
  547. i++;
  548. token=m_commands[i];
  549. standardDeviation=atoi(token.c_str());
  550. if(m_rank==MASTER_RANK){
  551. cout<<" Average length: "<<meanFragmentLength<<endl;
  552. cout<<" Standard deviation: "<<standardDeviation<<endl;
  553. }
  554. gotValuesFromEndUser=true;
  555. }
  556. }
  557. if(!gotValuesFromEndUser){// automatic detection.
  558. m_automaticLibraries.insert(m_numberOfLibraries);
  559. if(m_rank==MASTER_RANK){
  560. cout<<" Average length: automatic detection"<<endl;
  561. cout<<" Standard deviation: automatic detection"<<endl;
  562. }
  563. }
  564. m_fileLibrary[rightFile]=m_numberOfLibraries;
  565. m_fileLibrary[leftFile]=m_numberOfLibraries;
  566. vector<int> files;
  567. files.push_back(leftFile);
  568. files.push_back(rightFile);
  569. m_libraryFiles.push_back(files);
  570. addLibraryData(m_numberOfLibraries,meanFragmentLength,standardDeviation);
  571. m_numberOfLibraries++;
  572. }else if(outputAmosCommands.count(token)>0){
  573. m_amos=true;
  574. }else if(showExtensionChoiceOption.count(token)>0){
  575. m_showExtensionChoice=true;
  576. }else if(showMalloc.count(token)>0){
  577. m_showMemoryAllocations=true;
  578. }else if(reduceMemoryUsage.count(token)>0){
  579. int items=0;
  580. for(int j=i+1;j<(int)m_commands.size();j++){
  581. string cmd=m_commands[j];
  582. if(commands.count(cmd)==0){
  583. items++;
  584. }else{
  585. break;
  586. }
  587. }
  588. if(!(items==0||items==1)){
  589. if(m_rank==MASTER_RANK){
  590. cout<<"Error: "<<token<<" needs 0 or 1 item, you provided "<<items<<endl;
  591. }
  592. m_error=true;
  593. return;
  594. }
  595. m_reducerIsActivated=true;
  596. m_reducerPeriod=1000000;
  597. if(items==1){
  598. m_reducerPeriod=atoi(m_commands[i+1].c_str());
  599. }
  600. }else if(setRepeatCoverage.count(token)>0){
  601. i++;
  602. int items=m_commands.size()-i;
  603. if(items<1){
  604. if(m_rank==MASTER_RANK){
  605. cout<<"Error: "<<token<<" needs 1 item, you provided only "<<items<<endl;
  606. }
  607. m_error=true;
  608. return;
  609. }
  610. token=m_commands[i];
  611. m_repeatCoverage=atoi(token.c_str());
  612. m_providedRepeatCoverage=true;
  613. }else if(setMinimumCoverage.count(token)>0){
  614. i++;
  615. int items=m_commands.size()-i;
  616. if(items<1){
  617. if(m_rank==MASTER_RANK){
  618. cout<<"Error: "<<token<<" needs 1 item, you provided only "<<items<<endl;
  619. }
  620. m_error=true;
  621. return;
  622. }
  623. token=m_commands[i];
  624. m_minimumCoverage=atoi(token.c_str());
  625. m_providedMinimumCoverage=true;
  626. }else if(setPeakCoverage.count(token)>0){
  627. i++;
  628. int items=m_commands.size()-i;
  629. if(items<1){
  630. if(m_rank==MASTER_RANK){
  631. cout<<"Error: "<<token<<" needs 1 item, you provided only "<<items<<endl;
  632. }
  633. m_error=true;
  634. return;
  635. }
  636. token=m_commands[i];
  637. m_peakCoverage=atoi(token.c_str());
  638. m_providedPeakCoverage=true;
  639. }else if(connectionType.count(token)>0){
  640. i++;
  641. int items=m_commands.size()-i;
  642. if(items<1){
  643. if(m_rank==MASTER_RANK){
  644. cout<<"Error: "<<token<<" needs 1 item but you provided only "<<items<<endl;
  645. }
  646. m_error=true;
  647. return;
  648. }
  649. token=m_commands[i];
  650. m_connectionType=token;
  651. }else if(routingDegree.count(token)>0){
  652. i++;
  653. int items=m_commands.size()-i;
  654. if(items<1){
  655. if(m_rank==MASTER_RANK){
  656. cout<<"Error: "<<token<<" needs 1 item, you provided only "<<items<<endl;
  657. }
  658. m_error=true;
  659. return;
  660. }
  661. token=m_commands[i];
  662. m_degree=atoi(token.c_str());
  663. }else if(phylogeny.count(token)>0){
  664. cout<<"Enabling CorePlugin 'TaxonomyViewer'"<<endl;
  665. i++;
  666. int items=m_commands.size()-i;
  667. if(items<3){
  668. if(m_rank==MASTER_RANK){
  669. cout<<"Error: "<<token<<" needs 3 item, but you provided "<<items<<endl;
  670. }
  671. m_error=true;
  672. return;
  673. }
  674. m_genomeToTaxonFile=m_commands[i];
  675. i++;
  676. m_treeFile=m_commands[i];
  677. i++;
  678. m_taxonNameFile=m_commands[i];
  679. }else if(searchOption.count(token)>0){
  680. i++;
  681. int items=m_commands.size()-i;
  682. if(items<1){
  683. if(m_rank==MASTER_RANK){
  684. cout<<"Error: "<<token<<" needs 1 item, you provided only "<<items<<endl;
  685. }
  686. m_error=true;
  687. return;
  688. }
  689. token=m_commands[i];
  690. m_searchDirectories.push_back(token);
  691. }else if(minimumContigLength.count(token)>0){
  692. i++;
  693. int items=m_commands.size()-i;
  694. if(items<1){
  695. if(m_rank==MASTER_RANK){
  696. cout<<"Error: "<<token<<" needs 1 item, you provided only "<<items<<endl;
  697. }
  698. m_error=true;
  699. return;
  700. }
  701. token=m_commands[i];
  702. m_minimumContigLength=atoi(token.c_str());
  703. if(m_rank==MASTER_RANK){
  704. cout<<endl;
  705. cout<<"Rank "<<MASTER_RANK<<" the minimum contig length is "<<m_minimumContigLength<<endl;
  706. cout<<endl;
  707. }
  708. }else if(kmerSetting.count(token)>0){
  709. i++;
  710. int items=m_commands.size()-i;
  711. if(items<1){
  712. if(m_rank==MASTER_RANK){
  713. cout<<"Error: "<<token<<" needs 1 item, you provided only "<<items<<endl;
  714. }
  715. m_error=true;
  716. return;
  717. }
  718. token=m_commands[i];
  719. m_wordSize=atoi(token.c_str());
  720. if(m_wordSize<15){
  721. m_wordSize=15;
  722. }
  723. if(m_wordSize>CONFIG_MAXKMERLENGTH){
  724. if(m_rank==MASTER_RANK){
  725. cout<<endl;
  726. cout<<"Rank "<<MASTER_RANK<<": Warning, k > CONFIG_MAXKMERLENGTH"<<endl;
  727. cout<<"Rank "<<MASTER_RANK<<": Change CONFIG_MAXKMERLENGTH in the Makefile and recompile Ray."<<endl;
  728. }
  729. m_wordSize=CONFIG_MAXKMERLENGTH;
  730. }
  731. if(m_wordSize%2==0){
  732. m_wordSize--;
  733. }
  734. if(m_rank==MASTER_RANK){
  735. cout<<endl;
  736. cout<<"-k (to set the k-mer size)"<<endl;
  737. cout<<" Value: "<<m_wordSize<<endl;
  738. cout<<endl;
  739. }
  740. }else if(writeKmers.count(token)>0){
  741. m_writeKmers=true;
  742. if(m_rank==MASTER_RANK){
  743. cout<<endl;
  744. cout<<"Ray will write k-mers ("<<token<<")"<<endl;
  745. }
  746. }else if(runProfiler.count(token)>0){
  747. m_profiler=true;
  748. if(m_rank==MASTER_RANK){
  749. printf("Enabling profiler!\n");
  750. }
  751. }else if(debugBubbles.count(token)>0){
  752. m_debugBubbles=true;
  753. if(m_rank==MASTER_RANK){
  754. printf("Enabling bubble debug mode.\n");
  755. }
  756. }else if(colorSpaceMode.count(token)>0){
  757. m_colorSpaceMode=true;
  758. if(m_rank==MASTER_RANK){
  759. cout<<endl;
  760. cout<<"Enabling color-space mode"<<endl;
  761. cout<<"All reads should be in color space."<<endl;
  762. }
  763. }else if(showMemory.count(token)>0){
  764. m_showMemoryUsage=true;
  765. if(m_rank==MASTER_RANK){
  766. printf("Enabling memory usage reporting.\n");
  767. }
  768. }else if(showContext.count(token)>0){
  769. m_showEndingContext=true;
  770. if(m_rank==MASTER_RANK){
  771. printf("Ray will show the ending context of extensions.\n");
  772. }
  773. }
  774. }
  775. /*
  776. * This formula is only valid when using paired reads.
  777. * There is not really any limit on the number of files.
  778. * The limit is on the number of paired libraries (2^sizeof(uint16_t)).
  779. */
  780. #ifdef ASSERT
  781. LibraryHandle maximumLibraryIndex=0;
  782. maximumLibraryIndex--;
  783. int maximumNumberOfFiles=(maximumLibraryIndex+1)*2;
  784. assert((int)m_singleEndReadsFile.size()<=maximumNumberOfFiles);
  785. #endif
  786. LargeCount result=1;
  787. for(int p=0;p<m_wordSize;p++){
  788. result*=4;
  789. }
  790. }
  791. void Parameters::writeCommandFile(){
  792. //cout << "DEBUG Parameters::writeCommandFile" << endl;
  793. cout<<endl;
  794. cout<<"Ray command: "<<endl;
  795. ostringstream commandFile;
  796. commandFile<<getPrefix()<<"RayCommand.txt";
  797. ofstream f(commandFile.str().c_str());
  798. int numberOfRays=getSize();
  799. #ifdef ASSERT
  800. assert(numberOfRays%m_numberOfMiniRanksPerRank==0);
  801. #endif
  802. numberOfRays/=m_numberOfMiniRanksPerRank;
  803. f<<"mpiexec -n "<<numberOfRays<<" Ray \\"<<endl;
  804. cout<<"mpiexec -n "<<numberOfRays<<" Ray \\"<<endl;
  805. for(int i=0;i<(int)m_originalCommands.size();i++){
  806. if(i!=(int)m_originalCommands.size()-1){
  807. f<<" "<<m_originalCommands[i]<<" \\"<<endl;
  808. cout<<" "<<m_originalCommands[i]<<" \\"<<endl;
  809. }else{
  810. f<<" "<<m_originalCommands[i]<<endl;
  811. cout<<" "<<m_originalCommands[i]<<endl;
  812. }
  813. }
  814. f.close();
  815. cout<<endl;
  816. cout<<"Rank "<<MASTER_RANK<<" wrote "<<commandFile.str()<<endl;
  817. cout<<endl;
  818. cout<<"k-mer length: "<<m_wordSize<<endl;
  819. ostringstream rayRuntime;
  820. rayRuntime<<getPrefix()<<"RayVersion.txt";
  821. ofstream f2(rayRuntime.str().c_str());
  822. f2<<"Ray "<<CONFIG_RAY_VERSION<<endl;
  823. f2.close();
  824. writeConfigurationFile();
  825. writeSmartCommand();
  826. }
  827. void Parameters::writeSmartCommand() {
  828. ostringstream commandFile;
  829. commandFile<<getPrefix()<<"RaySmartCommand.txt";
  830. ofstream f(commandFile.str().c_str());
  831. int numberOfRays=getSize();
  832. #ifdef ASSERT
  833. assert(numberOfRays%m_numberOfMiniRanksPerRank==0);
  834. #endif
  835. numberOfRays/=m_numberOfMiniRanksPerRank;
  836. f<<"mpiexec -n "<<numberOfRays<<" Ray \\"<<endl;
  837. for(int i=0;i<(int)m_commands.size();i++){
  838. if(i!=(int)m_commands.size()-1){
  839. f<<" "<<m_commands[i]<<" \\"<<endl;
  840. }else{
  841. f<<" "<<m_commands[i]<<endl;
  842. }
  843. }
  844. f.close();
  845. }
  846. void Parameters::writeConfigurationFile(){
  847. if(m_configurationContent=="")
  848. return;
  849. ostringstream file;
  850. file<<getPrefix()<<"/Ray.conf";
  851. ofstream f(file.str().c_str());
  852. f<<m_configurationContent;
  853. f.close();
  854. }
  855. void Parameters::constructor(int argc,char**argv,int rank,int size,
  856. int miniRanksPerRank){
  857. m_numberOfMiniRanksPerRank=miniRanksPerRank;
  858. m_maximumDistance=0;
  859. m_totalNumberOfSequences=0;
  860. m_rank=rank;
  861. m_size=size;
  862. bool hasCommandFile=false;
  863. if(argc==2){
  864. ifstream f(argv[1]);
  865. hasCommandFile=f;
  866. f.close();
  867. }
  868. if(argc==2&&hasCommandFile){
  869. m_input=argv[1];
  870. loadCommandsFromFile(argv[1]);
  871. }else{
  872. loadCommandsFromArguments(argc,argv);
  873. }
  874. setDefaultRoutingOptions();
  875. parseCommands();
  876. }
  877. void Parameters::setDefaultRoutingOptions() {
  878. //cout << "[DEBUG] setDefaultRoutingOptions m_size " << m_size << endl;
  879. /**
  880. * if the empty string is passed to the RayPlatform API,
  881. * the debruijn model is used.
  882. * however, the polytope model is better.
  883. */
  884. m_connectionType = "debruijn";
  885. /** the default is to not use a default value */
  886. m_degree=2;
  887. // preconfigure with a degree of 2
  888. // or at least try to configure...
  889. /*
  890. * Vertices:= Radix^Dimension
  891. *
  892. * Degree:= (Radix-1)*Dimension
  893. */
  894. // we want the biggest radix possible
  895. int actualVertices = getSize();
  896. int radix = actualVertices - 1;
  897. while(1) {
  898. bool foundSolution = false;
  899. for(int dimension = 2 ; dimension < 32 ; ++dimension) {
  900. int computedVertices = (int)pow(radix, dimension);
  901. if(computedVertices == actualVertices) {
  902. int degree = (radix - 1) * dimension;
  903. m_connectionType = "polytope";
  904. m_degree = degree;
  905. foundSolution = true;
  906. break;
  907. }
  908. }
  909. radix--;
  910. if(radix == 1)
  911. break;
  912. if(foundSolution)
  913. break;
  914. }
  915. }
  916. int Parameters::getRank(){
  917. return m_rank;
  918. }
  919. bool Parameters::isInitiated(){
  920. return m_initiated;
  921. }
  922. vector<string> Parameters::getAllFiles(){
  923. vector<string> l;
  924. for(int i=0;i<(int)m_singleEndReadsFile.size();i++)
  925. l.push_back(m_singleEndReadsFile[i]);
  926. return l;
  927. }
  928. string Parameters::getFile(int file){
  929. return m_singleEndReadsFile[file];
  930. }
  931. string Parameters::getDirectory(){
  932. return m_directory;
  933. }
  934. string Parameters::getOutputFile(){
  935. return getPrefix()+"Contigs.fasta";
  936. }
  937. int Parameters::getMinimumContigLength(){
  938. return m_minimumContigLength;
  939. }
  940. bool Parameters::isLeftFile(int i){
  941. return m_leftFiles.count(i)>0;
  942. }
  943. bool Parameters::isRightFile(int i){
  944. return m_rightFiles.count(i)>0;
  945. }
  946. int Parameters::getLibraryAverageLength(int i,int j){
  947. return m_libraryAverageLength[i][j];
  948. }
  949. int Parameters::getLibraryStandardDeviation(int i,int j){
  950. return m_libraryDeviation[i][j];
  951. }
  952. int Parameters::getLibraryMaxAverageLength(int i){
  953. if(m_libraryAverageLength[i].size()==1)
  954. return m_libraryAverageLength[i][0];
  955. int max=0;
  956. for(int j=0;j<(int)m_libraryAverageLength[i].size();j++)
  957. if(m_libraryAverageLength[i][j]>max)
  958. max=m_libraryAverageLength[i][j];
  959. return max;
  960. }
  961. int Parameters::getLibraryMaxStandardDeviation(int i){
  962. if(m_libraryDeviation[i].size()==1)
  963. return m_libraryDeviation[i][0];
  964. int max=0;
  965. for(int j=0;j<(int)m_libraryDeviation[i].size();j++)
  966. if(m_libraryDeviation[i][j]>max)
  967. max=m_libraryDeviation[i][j];
  968. return max;
  969. }
  970. bool Parameters::getColorSpaceMode(){
  971. return m_colorSpaceMode;
  972. }
  973. bool Parameters::useAmos(){
  974. return m_amos;
  975. }
  976. string Parameters::getInputFile(){
  977. return m_input;
  978. }
  979. string Parameters::getParametersFile(){
  980. return "Ray-Parameters.txt";
  981. }
  982. string Parameters::getPrefix(){
  983. ostringstream directory;
  984. directory<<m_prefix<<"/";
  985. return directory.str();
  986. }
  987. string Parameters::getCoverageDistributionFile(){
  988. return getPrefix()+"CoverageDistribution.txt";
  989. }
  990. string Parameters::getAmosFile(){
  991. return getPrefix()+"AMOS.afg";
  992. }
  993. vector<string>*Parameters::getCommands(){
  994. return &m_commands;
  995. }
  996. bool Parameters::getError(){
  997. return m_error;
  998. }
  999. void Parameters::addDistance(int library,int distance,int count){
  1000. m_observedDistances[library][distance]+=count;
  1001. }
  1002. string Parameters::getLibraryGlobalFile(){
  1003. ostringstream s;
  1004. s<<getPrefix();
  1005. s<<""<<"LibraryData"<<".xml";
  1006. return s.str();
  1007. }
  1008. string Parameters::getLibraryFile(int library){
  1009. ostringstream s;
  1010. s<<getPrefix();
  1011. //s<<""<<"Library"<<library<<".txt";
  1012. // everything is now in one single file:
  1013. s << "LibraryData.xml";
  1014. return s.str();
  1015. }
  1016. #define WRITE_LIBRARY_OBSERVATIONS
  1017. void Parameters::computeAverageDistances(){
  1018. cout<<endl;
  1019. #ifdef WRITE_LIBRARY_OBSERVATIONS
  1020. string globalFileName=getLibraryGlobalFile();
  1021. ofstream f(globalFileName.c_str());
  1022. f<<"<libraryData>"<<endl;
  1023. #endif
  1024. for(map<int,map<int,int> >::iterator i=m_observedDistances.begin();
  1025. i!=m_observedDistances.end();i++){
  1026. int library=i->first;
  1027. if(!isAutomatic(library))
  1028. continue;
  1029. vector<int> x;
  1030. vector<int> y;
  1031. #ifdef WRITE_LIBRARY_OBSERVATIONS
  1032. f<<"<library><handle>"<<library<<"</handle>"<<endl;
  1033. f<<"<data>"<<endl;
  1034. f<<"# OuterDistanceInNucleotides Frequency"<<endl;
  1035. f<<"# OuterDistanceInNucleotides includes the gap length and lengths of left and right reads"<<endl;
  1036. #endif
  1037. for(map<int,int>::iterator j=m_observedDistances[library].begin();
  1038. j!=m_observedDistances[library].end();j++){
  1039. int d=j->first;
  1040. int count=j->second;
  1041. #ifdef WRITE_LIBRARY_OBSERVATIONS
  1042. f<<d<<"\t"<<count<<endl;
  1043. #endif
  1044. x.push_back(d);
  1045. y.push_back(count);
  1046. }
  1047. vector<int> averages;
  1048. vector<int> deviations;
  1049. LibraryPeakFinder finder;
  1050. finder.findPeaks(&x,&y,&averages,&deviations);
  1051. for(int i=0;i<(int)averages.size();i++)
  1052. addLibraryData(library,averages[i],deviations[i]);
  1053. #ifdef WRITE_LIBRARY_OBSERVATIONS
  1054. f<<"</data></library>"<<endl;
  1055. #endif
  1056. }
  1057. #ifdef WRITE_LIBRARY_OBSERVATIONS
  1058. f<<"</libraryData>"<<endl;
  1059. f.close();
  1060. #endif
  1061. cout<<endl;
  1062. cout<<endl;
  1063. ostringstream fileName;
  1064. fileName<<getPrefix();
  1065. fileName<<"LibraryStatistics.txt";
  1066. ofstream f2(fileName.str().c_str());
  1067. f2<<"NumberOfPairedLibraries: "<<m_numberOfLibraries<<endl;
  1068. f2<<endl;
  1069. for(int i=0;i<(int)m_numberOfLibraries;i++){
  1070. int library=i;
  1071. string type="Manual";
  1072. if(m_automaticLibraries.count(library)>0){
  1073. type="Automatic";
  1074. }
  1075. f2<<"LibraryNumber: "<<library<<endl;
  1076. string format="Interleaved,Paired";
  1077. vector<int> files=m_libraryFiles[i];
  1078. if(files.size()==2){
  1079. format="TwoFiles,Paired";
  1080. }
  1081. f2<<" InputFormat: "<<format<<endl;
  1082. f2<<" DetectionType: "<<type<<endl;
  1083. f2<<" File: "<<m_singleEndReadsFile[files[0]]<<endl;
  1084. f2<<" NumberOfSequences: "<<m_numberOfSequencesInFile[files[0]]<<endl;
  1085. if(files.size()>1){
  1086. f2<<" File: "<<m_singleEndReadsFile[files[1]]<<endl;
  1087. f2<<" NumberOfSequences: "<<m_numberOfSequencesInFile[files[1]]<<endl;
  1088. }
  1089. f2<<" Distribution: "<<getLibraryFile(library)<<endl;
  1090. for(int j=0;j<getLibraryPeaks(library);j++){
  1091. int average=getLibraryAverageLength(library,j);
  1092. int standardDeviation=getLibraryStandardDeviation(library,j);
  1093. cout<<"Library # "<<library<<" ("<<type<<") -> average length: "<<average<<" and standard deviation: "<<standardDeviation<<endl;
  1094. f2<<" Peak "<<j<<endl;
  1095. f2<<" AverageOuterDistance: "<<average<<endl;
  1096. f2<<" StandardDeviation: "<<standardDeviation<<endl;
  1097. if(standardDeviation*2>average){
  1098. f2<<" DetectionFailure: Yes"<<endl;
  1099. }
  1100. }
  1101. f2<<endl;
  1102. }
  1103. f2.close();
  1104. }
  1105. void Parameters::addLibraryData(int library,int average,int deviation){
  1106. if(average==0)
  1107. return;
  1108. m_libraryAverageLength[library].push_back(average);
  1109. m_libraryDeviation[library].push_back(deviation);
  1110. int distance=average+4*deviation;
  1111. if(distance>m_maximumDistance){
  1112. m_maximumDistance=distance;
  1113. }
  1114. }
  1115. void Parameters::setNumberOfSequences(int file,LargeCount n){
  1116. if(m_numberOfSequencesInFile.count(file)==0){
  1117. m_numberOfSequencesInFile[file]=n;
  1118. m_totalNumberOfSequences+=n;
  1119. }
  1120. }
  1121. int Parameters::getNumberOfLibraries(){
  1122. return m_numberOfLibraries;
  1123. }
  1124. LargeCount Parameters::getNumberOfSequences(int file){
  1125. #ifdef ASSERT
  1126. if(file>=(int)m_numberOfSequencesInFile.size())
  1127. cout<<"Error File= "<<file<<" Files: "<<m_numberOfSequencesInFile.size()<<endl;
  1128. assert(file<(int)m_numberOfSequencesInFile.size());
  1129. #endif
  1130. return m_numberOfSequencesInFile[file];
  1131. }
  1132. int Parameters::getNumberOfFiles(){
  1133. return m_singleEndReadsFile.size();
  1134. }
  1135. bool Parameters::isAutomatic(int library){
  1136. return m_automaticLibraries.count(library)>0;
  1137. }
  1138. int Parameters::getLibrary(int file){
  1139. return m_fileLibrary[file];
  1140. }
  1141. bool Parameters::isInterleavedFile(int i){
  1142. return m_interleavedFiles.count(i)>0;
  1143. }
  1144. bool Parameters::showMemoryUsage(){
  1145. return m_showMemoryUsage;
  1146. }
  1147. string Parameters::getReceivedMessagesFile(){
  1148. string outputForMessages=getPrefix()+"ReceivedMessages.txt";
  1149. return outputForMessages;
  1150. }
  1151. void Parameters::printFinalMessage(){
  1152. cout<<"Rank "<<MASTER_RANK<<" wrote library statistics"<<endl;
  1153. }
  1154. CoverageDepth Parameters::getMaximumAllowedCoverage(){
  1155. CoverageDepth a=0;
  1156. a--;
  1157. return a;
  1158. }
  1159. void Parameters::setPeakCoverage(CoverageDepth a){
  1160. if(!m_providedPeakCoverage){
  1161. m_peakCoverage=a;
  1162. }
  1163. }
  1164. void Parameters::setRepeatCoverage(CoverageDepth a){
  1165. if(!m_providedRepeatCoverage){
  1166. m_repeatCoverage=a;
  1167. }
  1168. }
  1169. CoverageDepth Parameters::getPeakCoverage(){
  1170. return m_peakCoverage;
  1171. }
  1172. CoverageDepth Parameters::getRepeatCoverage(){
  1173. return m_repeatCoverage;
  1174. }
  1175. int Parameters::getSize(){
  1176. return m_size;
  1177. }
  1178. void Parameters::setSize(int a){
  1179. m_size=a;
  1180. }
  1181. bool Parameters::runReducer(){
  1182. return m_reducerIsActivated;
  1183. }
  1184. void Parameters::showOption(string option,string description){
  1185. string spacesBeforeOption=" ";
  1186. cout<<spacesBeforeOption<<option<<endl;
  1187. showOptionDescription(description);
  1188. }
  1189. void Parameters::showOptionDescription(string description){
  1190. string spacesBeforeDescription=" ";
  1191. cout<<spacesBeforeDescription<<description<<endl;
  1192. }
  1193. /* obviously shows usage */
  1194. void Parameters::showUsage(){
  1195. string basicSpaces=" ";
  1196. cout<<"NAME"<<endl<<basicSpaces<<"Ray - assemble genomes in parallel using the message-passing interface"<<endl<<endl;
  1197. cout<<"SYNOPSIS"<<endl;
  1198. cout<<basicSpaces<<"mpiexec -n 80 Ray -k 31 -p l1_1.fastq l1_2.fastq -p l2_1.fastq l2_2.fastq -o test"<<endl;
  1199. cout<<endl;
  1200. cout<<basicSpaces<<"mpiexec -n 80 Ray Ray.conf # with commands in a file"<<endl;
  1201. cout<<endl;
  1202. cout<<basicSpaces<<"mpiexec -n 80 Ray -k 31 -detect-sequence-files SampleDirectory # auto-detection"<<endl;
  1203. cout<<endl;
  1204. #ifdef CONFIG_MINI_RANKS
  1205. cout<<basicSpaces<<"mpiexec -n 10 Ray -mini-ranks-per-rank 7 Ray.conf # with mini-ranks"<<endl;
  1206. cout<<endl;
  1207. #endif /* CONFIG_MINI_RANKS */
  1208. cout<<"DESCRIPTION:"<<endl;
  1209. cout<<endl;
  1210. cout<<" The Ray genome assembler is built on top of the RayPlatform, a generic plugin-based"<<endl;
  1211. cout<<" distributed and parallel compute engine that uses the message-passing interface"<<endl;
  1212. cout<<" for passing messages."<<endl;
  1213. cout<<endl;
  1214. cout<<" Ray targets several applications:"<<endl;
  1215. cout<<endl;
  1216. cout<<" - de novo genome assembly (with Ray vanilla)"<<endl;
  1217. cout<<" - de novo meta-genome assembly (with Ray Méta)"<<endl;
  1218. cout<<" - de novo transcriptome assembly (works, but not tested a lot)"<<endl;
  1219. cout<<" - quantification of contig abundances"<<endl;
  1220. cout<<" - quantification of microbiome consortia members (with Ray Communities)"<<endl;
  1221. cout<<" - quantification of transcript expression"<<endl;
  1222. cout<<" - taxonomy profiling of samples (with Ray Communities)"<<endl;
  1223. cout<<" - gene ontology profiling of samples (with Ray Ontologies)"<<endl;
  1224. cout<<endl;
  1225. showOption("-help","Displays this help page.");
  1226. cout<<endl;
  1227. showOption("-version","Displays Ray version and compilation options.");
  1228. cout<<endl;
  1229. cout<<" Run Ray in pure MPI mode"<<endl;
  1230. cout<<endl;
  1231. cout<<" mpiexec -n 80 Ray ..."<<endl;
  1232. cout<<endl;
  1233. #ifdef CONFIG_MINI_RANKS
  1234. cout<<" Run Ray with mini-ranks on 10 machines, 8 cores / machine (MPI and IEEE POSIX threads)"<<endl;
  1235. cout<<endl;
  1236. cout<<" mpiexec -n 10 Ray -mini-ranks-per-rank 7 ..."<<endl;
  1237. cout<<endl;
  1238. #endif /* CONFIG_MINI_RANKS */
  1239. cout<<" Run Ray on one core only (still needs MPI)"<<endl;
  1240. cout<<endl;
  1241. cout<<" Ray ..."<<endl;
  1242. cout<<endl;
  1243. cout<<endl;
  1244. cout<<" Using a configuration file"<<endl;
  1245. cout<<endl;
  1246. cout<<" Ray can be launched with"<<endl;
  1247. cout<<" mpiexec -n 16 Ray Ray.conf"<<endl;
  1248. cout<<" The configuration file can include comments (starting with #)."<<endl;
  1249. cout<<endl;
  1250. cout<<" K-mer length"<<endl;
  1251. cout<<endl;
  1252. showOption("-k kmerLength","Selects the length of k-mers. The default value is 21. ");
  1253. showOptionDescription("It must be odd because reverse-complement vertices are stored together.");
  1254. showOptionDescription("The maximum length is defined at compilation by CONFIG_MAXKMERLENGTH");
  1255. showOptionDescription("Larger k-mers utilise more memory.");
  1256. cout<<endl;
  1257. cout<<" Inputs"<<endl;
  1258. cout << endl;
  1259. showOption("-detect-sequence-files SampleDirectory", "Detects files in a directory automatically.");
  1260. showOptionDescription("This option can generate these commands automatically for you: LoadPairedEndReads (-p) and LoadSingleEndReads (-s)");
  1261. cout<<endl;
  1262. showOption("-p leftSequenceFile rightSequenceFile [averageOuterDistance standardDeviation]","Provides two files containing paired-end reads.");
  1263. showOptionDescription("averageOuterDistance and standardDeviation are automatically computed if not provided.");
  1264. showOptionDescription("LoadPairedEndReads is equivalent to -p");
  1265. cout<<endl;
  1266. showOption("-i interleavedSequenceFile [averageOuterDistance standardDeviation]","Provides one file containing interleaved paired-end reads.");
  1267. showOptionDescription("averageOuterDistance and standardDeviation are automatically computed if not provided.");
  1268. cout<<endl;
  1269. showOption("-s sequenceFile","Provides a file containing single-end reads.");
  1270. showOptionDescription("LoadSingleEndReads is equivalent to -s");
  1271. cout<<endl;
  1272. cout<<" Outputs"<<endl;
  1273. cout<<endl;
  1274. showOption("-o outputDirectory","Specifies the directory for outputted files. Default is RayOutput");
  1275. showOptionDescription("Other name: -output");
  1276. cout<<endl;
  1277. cout<<" Assembly options (defaults work well)"<<endl;
  1278. cout<<endl;
  1279. showOption("-disable-recycling","Disables read recycling during the assembly");
  1280. showOptionDescription("reads will be set free in 3 cases:");
  1281. showOptionDescription("1. the distance did not match for a pair");
  1282. showOptionDescription("2. the read has not met its mate");
  1283. showOptionDescription("3. the library population indicates a wrong placement");
  1284. showOptionDescription("see Constrained traversal of repeats with paired sequences.");
  1285. showOptionDescription("Sébastien Boisvert, Élénie Godzaridis, François Laviolette & Jacques Corbeil.");
  1286. showOptionDescription("First Annual RECOMB Satellite Workshop on Massively Parallel Sequencing, March 26-27 2011, Vancouver, BC, Canada.");
  1287. cout<<endl;
  1288. showOption("-debug-recycling", "Debugs the recycling events");
  1289. cout<<endl;
  1290. showOption("-ignore-seeds", "Disables assembly by ignoring seeds.");
  1291. cout<<endl;
  1292. showOption("-merge-seeds", "Merges seeds initially to reduce running time.");
  1293. cout << endl;
  1294. showOption("-disable-scaffolder","Disables the scaffolder.");
  1295. cout<<endl;
  1296. showOption("-minimum-contig-length minimumContigLength",
  1297. "Changes the minimum contig length, default is 100 nucleotides");
  1298. cout<<endl;
  1299. showOption("-color-space","Runs in color-space");
  1300. showOptionDescription("Needs csfasta files. Activated automatically if csfasta files are provided.");
  1301. cout<<endl;
  1302. ostringstream buffer3;
  1303. buffer3<<"The default is "<<getMaximumAllowedCoverage()<<".";
  1304. showOption("-use-maximum-seed-coverage maximumSeedCoverageDepth",
  1305. "Ignores any seed with a coverage depth above this threshold.");
  1306. showOptionDescription(buffer3.str());
  1307. cout<<endl;
  1308. showOption("-use-minimum-seed-coverage minimumSeedCoverageDepth",
  1309. "Sets the minimum seed coverage depth.");
  1310. showOptionDescription("Any path with a coverage depth lower than this will be discarded. The default is 0.");
  1311. cout<<endl;
  1312. cout<<" Distributed storage engine (all these values are for each MPI rank)"<<endl;
  1313. cout<<endl;
  1314. ostringstream text;
  1315. showOption("-bloom-filter-bits bits","Sets the number of bits for the Bloom filter");
  1316. text<<"Default is "<<"auto"<<" bits (adaptive), 0 bits disables the Bloom filter.";
  1317. showOptionDescription(text.str());
  1318. cout<<endl;
  1319. text.str("");
  1320. text<<"Default value: "<<__DEFAULT_BUCKETS;
  1321. showOption("-hash-table-buckets buckets","Sets the initial number of buckets. Must be a power of 2 !");
  1322. showOptionDescription(text.str());
  1323. text.str("");
  1324. cout<<endl;
  1325. text<<"Default value: "<<__DEFAULT_BUCKETS_PER_GROUP<<", Must be between >=1 and <= 64";
  1326. showOption("-hash-table-buckets-per-group buckets",
  1327. "Sets the number of buckets per group for sparse storage");
  1328. showOptionDescription(text.str());
  1329. text.str("");
  1330. cout<<endl;
  1331. showOption("-hash-table-load-factor-threshold threshold","Sets the load factor threshold for real-time resizing");
  1332. text<<"Default value: "<< __DEFAULT_LOAD_FACTOR_THRESHOLD<<", must be >= 0.5 and < 1";
  1333. showOptionDescription(text.str());
  1334. cout<<endl;
  1335. showOption("-hash-table-verbosity","Activates verbosity for the distributed storage engine");
  1336. cout<<endl;
  1337. cout<<" Biological abundances"<<endl;
  1338. cout<<endl;
  1339. showOption("-search searchDirectory","Provides a directory containing fasta files to be searched in the de Bruijn graph.");
  1340. showOptionDescription("Biological abundances will be written to RayOutput/BiologicalAbundances");
  1341. showOptionDescription("See Documentation/BiologicalAbundances.txt");
  1342. cout<<endl;
  1343. showOption("-one-color-per-file", "Sets one color per file instead of one per sequence.");
  1344. showOptionDescription("By default, each sequence in each file has a different color.");
  1345. showOptionDescription("For files with large numbers of sequences, using one single color per file may be more efficient.");
  1346. cout<<endl;
  1347. cout<<" Taxonomic profiling with colored de Bruijn graphs"<<endl;
  1348. cout<<endl;
  1349. showOption("-with-taxonomy Genome-to-Taxon.tsv TreeOfLife-Edges.tsv Taxon-Names.tsv","Provides a taxonomy.");
  1350. showOptionDescription("Computes and writes detailed taxonomic profiles.");
  1351. showOptionDescription("See Documentation/Taxonomy.txt for details.");
  1352. cout<<endl;
  1353. showOption("-gene-ontology OntologyTerms.txt Annotations.txt","Provides an ontology and annotations.");
  1354. showOptionDescription("OntologyTerms.txt is fetched from http://geneontology.org");
  1355. showOptionDescription("Annotations.txt is a 2-column file (EMBL_CDS handle & gene ontology identifier)");
  1356. showOptionDescription("See Documentation/GeneOntology.txt");
  1357. cout<<" Other outputs"<<endl;
  1358. cout<<endl;
  1359. showOption("-enable-neighbourhoods","Computes contig neighborhoods in the de Bruijn graph");
  1360. showOptionDescription("Output file: RayOutput/NeighbourhoodRelations.txt");
  1361. cout<<endl;
  1362. showOption("-amos","Writes the AMOS file called RayOutput/AMOS.afg");
  1363. showOptionDescription("An AMOS file contains read positions on contigs.");
  1364. showOptionDescription("Can be opened with software with graphical user interface.");
  1365. cout<<endl;
  1366. showOption("-write-kmers","Writes k-mer graph to RayOutput/kmers.txt");
  1367. showOptionDescription("The resulting file is not utilised by Ray.");
  1368. showOptionDescription("The resulting file is very large.");
  1369. cout<<endl;
  1370. showOption("-write-read-markers","Writes read markers to disk.");
  1371. cout<<endl;
  1372. showOption("-write-seeds","Writes seed DNA sequences to RayOutput/Rank<rank>.RaySeeds.fasta");
  1373. cout<<endl;
  1374. showOption("-write-extensions","Writes extension DNA sequences to RayOutput/Rank<rank>.RayExtensions.fasta");
  1375. cout<<endl;
  1376. showOption("-write-contig-paths","Writes contig paths with coverage values");
  1377. showOptionDescription("to RayOutput/Rank<rank>.RayContigPaths.txt");
  1378. cout<<endl;
  1379. showOption("-write-marker-summary","Writes marker statistics.");
  1380. cout<<endl;
  1381. cout<<" Memory usage"<<endl;
  1382. cout<<endl;
  1383. showOption("-show-memory-usage","Shows memory usage. Data is fetched from /proc on GNU/Linux");
  1384. showOptionDescription("Needs __linux__");
  1385. cout<<endl;
  1386. showOption("-show-memory-allocations","Shows memory allocation events");
  1387. cout<<endl;
  1388. cout<<" Algorithm verbosity"<<endl;
  1389. cout<<endl;
  1390. showOption("-show-extension-choice","Shows the choice made (with other choices) during the extension.");
  1391. cout<<endl;
  1392. showOption("-show-ending-context","Shows the ending context of each extension.");
  1393. showOptionDescription("Shows the children of the vertex where extension was too difficult.");
  1394. cout<<endl;
  1395. showOption("-show-distance-summary","Shows summary of outer distances used for an extension path.");
  1396. cout<<endl;
  1397. showOption("-show-consensus","Shows the consensus when a choice is done.");
  1398. cout<<endl;
  1399. /*
  1400. showOption("-minimumCoverage minimumCoverage","Sets manually the minimum coverage.");
  1401. showOptionDescription("If not provided, it is computed by Ray automatically.");
  1402. cout<<endl;
  1403. showOption("-peakCoverage peakCoverage","Sets manually the peak coverage.");
  1404. showOptionDescription("If not provided, it is computed by Ray automatically.");
  1405. cout<<endl;
  1406. showOption("-repeatCoverage repeatCoverage","Sets manually the repeat coverage.");
  1407. showOptionDescription("If not provided, it is computed by Ray automatically.");
  1408. cout<<endl;
  1409. */
  1410. cout<<" Checkpointing"<<endl;
  1411. cout<<endl;
  1412. showOption("-write-checkpoints checkpointDirectory","Write checkpoint files");
  1413. cout<<endl;
  1414. showOption("-read-checkpoints checkpointDirectory","Read checkpoint files");
  1415. cout<<endl;
  1416. showOption("-read-write-checkpoints checkpointDirectory","Read and write checkpoint files");
  1417. cout<<endl;
  1418. cout<<" Message routing for large number of cores"<<endl;
  1419. cout<<endl;
  1420. showOption("-route-messages","Enables the Ray message router. Disabled by default.");
  1421. showOptionDescription("Messages will be routed accordingly so that any rank can communicate directly with only a few others.");
  1422. showOptionDescription("Without -route-messages, any rank can communicate directly with any other rank.");
  1423. showOptionDescription("Files generated: Routing/Connections.txt, Routing/Routes.txt and Routing/RelayEvents.txt");
  1424. showOptionDescription("and Routing/Summary.txt");
  1425. cout<<endl;
  1426. showOption("-connection-type type","Sets the connection type for routes.");
  1427. showOptionDescription("Accepted values are debruijn, hypercube, polytope, group, random, kautz and complete. Default is debruijn.");
  1428. showOptionDescription(" torus: a k-ary n-cube, radix: k, dimension: n, degree: 2*dimension, vertices: radix^dimension");
  1429. showOptionDescription(" polytope: a convex regular polytope, alphabet is {0,1,...,B-1} and the vertices is a power of B");
  1430. showOptionDescription(" hypercube: a hypercube, alphabet is {0,1} and the vertices is a power of 2");
  1431. showOptionDescription(" debruijn: a full de Bruijn graph a given alphabet and diameter");
  1432. showOptionDescription(" kautz: a full de Kautz graph, which is a subgraph of a de Bruijn graph");
  1433. showOptionDescription(" group: silly model where one representative per group can communicate with outsiders");
  1434. showOptionDescription(" random: Erdős–Rényi model");
  1435. showOptionDescription(" complete: a full graph with all the possible connections");
  1436. showOptionDescription("With the type debruijn, the number of ranks must be a power of something.");
  1437. showOptionDescription("Examples: 256 = 16*16, 512=8*8*8, 49=7*7, and so on.");
  1438. showOptionDescription("Otherwise, don't use debruijn routing but use another one");
  1439. showOptionDescription("With the type kautz, the number of ranks n must be n=(k+1)*k^(d-1) for some k and d");
  1440. cout<<endl;
  1441. showOption("-routing-graph-degree degree","Specifies the outgoing degree for the routing graph.");
  1442. showOptionDescription("See Documentation/Routing.txt");
  1443. cout<<endl;
  1444. cout<<" Hardware testing"<<endl;
  1445. cout<<endl;
  1446. showOption("-test-network-only","Tests the network and returns.");
  1447. cout<<endl;
  1448. showOption("-write-network-test-raw-data","Writes one additional file per rank detailing the network test.");
  1449. cout<<endl;
  1450. showOption("-exchanges NumberOfExchanges","Sets the number of exchanges");
  1451. cout<<endl;
  1452. showOption("-disable-network-test","Skips the network test.");
  1453. cout<<endl;
  1454. cout<<" Debugging"<<endl;
  1455. cout<<endl;
  1456. showOption("-verify-message-integrity","Checks message data reliability for any non-empty message.");
  1457. showOptionDescription("add '-D CONFIG_SSE_4_2' in the Makefile to use hardware instruction (SSE 4.2)");
  1458. cout<<endl;
  1459. showOption("-write-scheduling-data", "Writes RayPlatform scheduling information to RayOutput/Scheduling/");
  1460. cout<<endl;
  1461. showOption("-write-plugin-data", "Writes data for plugins registered with the RayPlatform API to RayOutp…

Large files files are truncated, but you can click here to view the full file