PageRenderTime 86ms CodeModel.GetById 27ms RepoModel.GetById 1ms app.codeStats 0ms

/dataLinker/Configuration.cpp

http://tud-scaffolding.googlecode.com/
C++ | 405 lines | 370 code | 7 blank | 28 comment | 80 complexity | bb7caaf962498f36858d3591d836f6c9 MD5 | raw file
Possible License(s): GPL-3.0
  1. /*
  2. * dataLinker : creates abstract contig links from the available information sources.
  3. * Copyright (C) 2011 Alexey Gritsenko
  4. *
  5. * This program is free software: you can redistribute it and/or modify
  6. * it under the terms of the GNU General Public License as published by
  7. * the Free Software Foundation, either version 3 of the License, or
  8. * (at your option) any later version.
  9. *
  10. * This program is distributed in the hope that it will be useful,
  11. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  12. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  13. * GNU General Public License for more details.
  14. *
  15. * You should have received a copy of the GNU General Public License
  16. * along with this program. If not, see http://www.gnu.org/licenses/.
  17. *
  18. *
  19. *
  20. * Email: a.gritsenko@tudelft.nl
  21. * Mail: Delft University of Technology
  22. * Faculty of Electrical Engineering, Mathematics, and Computer Science
  23. * Department of Mediamatics
  24. * P.O. Box 5031
  25. * 2600 GA, Delft, The Netherlands
  26. */
  27. #include "Configuration.h"
  28. #include "Defines.h"
  29. #include "Helpers.h"
  30. #include <iostream>
  31. #include <cstring>
  32. #include <cstdlib>
  33. #include <sstream>
  34. using namespace std;
  35. // Construtor with default configuration parameter settings.
  36. Configuration::Configuration()
  37. {
  38. Success = false;
  39. InputFileName = "";
  40. OutputFileName = "output.opt";
  41. ReadCoverageFileName = "";
  42. MaximumLinkHits = 5;
  43. NoOverlapDeviation = 0;
  44. }
  45. // Parses command line arguments. Returns true if successful.
  46. bool Configuration::ProcessCommandLine(int argc, char *argv[])
  47. {
  48. this->Success = true;
  49. stringstream serr;
  50. double weight = 1;
  51. int mapQ = 0;
  52. int minReadLength = 0;
  53. int maxEditDistance = 10000;
  54. if (argc == 1)
  55. {
  56. serr << "[-] Not enough arguments. Consult -help." << endl;
  57. this->Success = false;
  58. }
  59. else
  60. {
  61. int i = 1;
  62. while (i < argc)
  63. {
  64. if (!strcmp("-help", argv[i]) || !strcmp("-h", argv[i]))
  65. {
  66. printHelpMessage(serr);
  67. this->Success = false;
  68. break;
  69. }
  70. else if (!strcmp("-weight", argv[i]))
  71. {
  72. if (argc - i - 1 < 1)
  73. {
  74. serr << "[-] Parsing error in -weight: must have an argument." << endl;
  75. this->Success = false;
  76. break;
  77. }
  78. i++;
  79. double newWeight = atof(argv[i]);
  80. if (newWeight < Helpers::Eps)
  81. {
  82. serr << "[-] Parsing error in -weight: weight must be a positive number." << endl;
  83. this->Success = false;
  84. break;
  85. }
  86. weight = newWeight;
  87. }
  88. else if (!strcmp("-mapq", argv[i]))
  89. {
  90. if (argc - i - 1 < 1)
  91. {
  92. serr << "[-] Parsing error in -mapq: must have an argument." << endl;
  93. this->Success = false;
  94. break;
  95. }
  96. i++;
  97. bool newMapQSuccess;
  98. int newMapQ = Helpers::ParseInt(argv[i], newMapQSuccess);
  99. if (!newMapQSuccess)
  100. {
  101. serr << "[-] Parsing error in -mapq: weight must be a non-negative number." << endl;
  102. this->Success = false;
  103. break;
  104. }
  105. mapQ = newMapQ;
  106. }
  107. else if (!strcmp("-minlength", argv[i]))
  108. {
  109. if (argc - i - 1 < 1)
  110. {
  111. serr << "[-] Parsing error in -minlength: must have an argument." << endl;
  112. this->Success = false;
  113. break;
  114. }
  115. i++;
  116. bool newMinReadLengthSuccess;
  117. int newMinReadlength = Helpers::ParseInt(argv[i], newMinReadLengthSuccess);
  118. if (!newMinReadLengthSuccess)
  119. {
  120. serr << "[-] Parsing error in -minlength: weight must be a non-negative number." << endl;
  121. this->Success = false;
  122. break;
  123. }
  124. minReadLength = newMinReadlength;
  125. }
  126. else if (!strcmp("-maxedit", argv[i]))
  127. {
  128. if (argc - i - 1 < 1)
  129. {
  130. serr << "[-] Parsing error in -maxedit: must have an argument." << endl;
  131. this->Success = false;
  132. break;
  133. }
  134. i++;
  135. bool newMaxEditDistanceSuccess;
  136. int newMaxEditDistance = Helpers::ParseInt(argv[i], newMaxEditDistanceSuccess);
  137. if (!newMaxEditDistanceSuccess)
  138. {
  139. serr << "[-] Parsing error in -maxedit: weight must be a non-negative number." << endl;
  140. this->Success = false;
  141. break;
  142. }
  143. maxEditDistance = newMaxEditDistance;
  144. }
  145. else if (!strcmp("-maxhits", argv[i]))
  146. {
  147. if (argc - i - 1 < 1)
  148. {
  149. serr << "[-] Parsing error in -maxhits: must have an argument." << endl;
  150. this->Success = false;
  151. break;
  152. }
  153. i++;
  154. bool hitsSuccess;
  155. MaximumLinkHits = Helpers::ParseInt(argv[i], hitsSuccess);
  156. if (!hitsSuccess || MaximumLinkHits <= 0)
  157. {
  158. serr << "[-] Parsing error in -maxhits: number of hits must be a positive number." << endl;
  159. this->Success = false;
  160. break;
  161. }
  162. }
  163. else if (!strcmp("-nooverlapdeviation", argv[i]))
  164. {
  165. if (argc - i - 1 < 1)
  166. {
  167. serr << "[-] Parsing error in -nooverlapdeviation: must have an argument." << endl;
  168. this->Success = false;
  169. break;
  170. }
  171. i++;
  172. double newNoOverlapDeviation = atof(argv[i]);
  173. if (newNoOverlapDeviation < Helpers::Eps)
  174. {
  175. serr << "[-] Parsing error in -nooverlapdeviation: deviation must be a positive number." << endl;
  176. this->Success = false;
  177. break;
  178. }
  179. NoOverlapDeviation = newNoOverlapDeviation;
  180. }
  181. else if (!strcmp("-454", argv[i]))
  182. {
  183. if (argc - i - 1 < 4)
  184. {
  185. serr << "[-] Parsing error in -454: must have 4 arguments." << endl;
  186. this->Success = false;
  187. break;
  188. }
  189. i++;
  190. string leftFileName = argv[i]; i++;
  191. string rightFileName = argv[i]; i++;
  192. bool muSuccess;
  193. int mu = Helpers::ParseInt(argv[i], muSuccess);
  194. if (!muSuccess || mu < 0)
  195. {
  196. serr << "[-] Parsing error in -454: mu must be a non-negative number." << endl;
  197. this->Success = false;
  198. break;
  199. }
  200. i++;
  201. bool sigmaSuccess;
  202. int sigma = Helpers::ParseInt(argv[i], sigmaSuccess);
  203. if (!sigmaSuccess || sigma < 0)
  204. {
  205. serr << "[-] Parsing error in -454: sigma must be a non-negative number." << endl;
  206. this->Success = false;
  207. break;
  208. }
  209. this->PairedReadInputs.push_back(PairedInput(leftFileName, rightFileName, mu, sigma, false, weight, mapQ, minReadLength, maxEditDistance));
  210. }
  211. else if (!strcmp("-illumina", argv[i]))
  212. {
  213. if (argc - i - 1 < 4)
  214. {
  215. serr << "[-] Parsing error in -illumina: must have 4 arguments." << endl;
  216. this->Success = false;
  217. break;
  218. }
  219. i++;
  220. string leftFileName = argv[i]; i++;
  221. string rightFileName = argv[i]; i++;
  222. bool muSuccess;
  223. int mu = Helpers::ParseInt(argv[i], muSuccess);
  224. if (!muSuccess || mu < 0)
  225. {
  226. serr << "[-] Parsing error in -illumina: mu must be a non-negative number." << endl;
  227. this->Success = false;
  228. break;
  229. }
  230. i++;
  231. bool sigmaSuccess;
  232. int sigma = Helpers::ParseInt(argv[i], sigmaSuccess);
  233. if (!sigmaSuccess || sigma < 0)
  234. {
  235. serr << "[-] Parsing error in -illumina: sigma must be a non-negative number." << endl;
  236. this->Success = false;
  237. break;
  238. }
  239. this->PairedReadInputs.push_back(PairedInput(leftFileName, rightFileName, mu, sigma, true, weight, mapQ, minReadLength, maxEditDistance));
  240. }
  241. else if (!strcmp("-seq", argv[i]))
  242. {
  243. if (argc - i - 1 < 2)
  244. {
  245. serr << "[-] Parsing error in -seq: must have 2 arguments." << endl;
  246. this->Success = false;
  247. break;
  248. }
  249. i++;
  250. string fileName = argv[i]; i++;
  251. bool sigmaSuccess;
  252. int sigma = Helpers::ParseInt(argv[i], sigmaSuccess);
  253. if (!sigmaSuccess || sigma < 0)
  254. {
  255. serr << "[-] Parsing error in -seq: sigma must be a non-negative number." << endl;
  256. this->Success = false;
  257. break;
  258. }
  259. this->SequenceInputs.push_back(SequenceInput(fileName, sigma, weight, minReadLength));
  260. }
  261. else if (!strcmp("-output", argv[i]))
  262. {
  263. if (argc - i - 1 < 1)
  264. {
  265. serr << "[-] Parsing error in -output: must have an argument." << endl;
  266. this->Success = false;
  267. break;
  268. }
  269. i++;
  270. this->OutputFileName = argv[i];
  271. }
  272. else if (!strcmp("-readcoverage", argv[i]))
  273. {
  274. if (argc - i - 1 < 1)
  275. {
  276. serr << "[-] Parsing error in -readcoverage: must have an argument." << endl;
  277. this->Success = false;
  278. break;
  279. }
  280. i++;
  281. this->ReadCoverageFileName = argv[i];
  282. }
  283. else if (!strcmp("-tmp", argv[i]))
  284. {
  285. if (argc - i - 1 < 1)
  286. {
  287. serr << "[-] Parsing error in -tmp: must have an argument." << endl;
  288. this->Success = false;
  289. break;
  290. }
  291. i++;
  292. this->BWAConfig.TmpPath = this->NovoAlignConfig.TmpPath = this->SAMToolsConfig.TmpPath = this->MummerTilerConfig.TmpPath = argv[i];
  293. }
  294. else if (!strcmp("-bwathreads", argv[i]))
  295. {
  296. if (argc - i - 1 < 1)
  297. {
  298. serr << "[-] Parsing error in -bwathreads: must have an argument." << endl;
  299. this->Success = false;
  300. break;
  301. }
  302. i++;
  303. bool threadsSuccess;
  304. BWAConfig.NumberOfThreads = Helpers::ParseInt(argv[i], threadsSuccess);
  305. if (!threadsSuccess || BWAConfig.NumberOfThreads <= 0)
  306. {
  307. serr << "[-] Parsing error in -bwathreads: number of threads must be a positive number." << endl;
  308. this->Success = false;
  309. break;
  310. }
  311. }
  312. else if (!strcmp("-bwahits", argv[i]))
  313. {
  314. if (argc - i - 1 < 1)
  315. {
  316. serr << "[-] Parsing error in -bwahits: must have an argument." << endl;
  317. this->Success = false;
  318. break;
  319. }
  320. i++;
  321. bool hitsSuccess;
  322. BWAConfig.MaximumHits = Helpers::ParseInt(argv[i], hitsSuccess);
  323. if (!hitsSuccess || BWAConfig.MaximumHits <= 0)
  324. {
  325. serr << "[-] Parsing error in -bwahits: number of hits must be a positive number." << endl;
  326. this->Success = false;
  327. break;
  328. }
  329. }
  330. else if (!strcmp("-bwaexact", argv[i]))
  331. {
  332. if (argc - i - 1 < 1)
  333. {
  334. cerr << "[-] Parsing error in -bwaexact: must have an argument." << endl;
  335. this->Success = false;
  336. break;
  337. }
  338. i++;
  339. bool sw = false;
  340. if (!strcasecmp(argv[i], "yes"))
  341. sw = true;
  342. else if (!strcasecmp(argv[i], "no"))
  343. sw = false;
  344. else
  345. {
  346. cerr << "[-] Parsing error in -bwaexact: argument must be yes/no." << endl;
  347. this->Success = false;
  348. break;
  349. }
  350. BWAConfig.ExactMatch = sw;
  351. }
  352. else if (i == argc - 1)
  353. this->InputFileName = argv[argc - 1];
  354. else
  355. {
  356. serr << "[-] Unknown argument: " << argv[i] << endl;
  357. this->Success = false;
  358. break;
  359. }
  360. i++;
  361. }
  362. if (this->InputFileName == "")
  363. {
  364. serr << "[-] No input file specified." << endl;
  365. this->Success = false;
  366. }
  367. }
  368. if (!this->Success)
  369. LastError = serr.str();
  370. return this->Success;
  371. }
  372. void Configuration::printHelpMessage(stringstream &serr)
  373. {
  374. serr << "[i] Contig linker version " << VERSION << " (" << DATE << ")" << endl;
  375. serr << "[i] By " << AUTHOR << endl;
  376. serr << "[i] Usage: dataLinker [arguments] <sequence.fasta>" << endl;
  377. serr << "[i] -help Print this message and exit." << endl;
  378. serr << "[i] -weight <weight> Set weight for information sources coming after the switch. [1]" << endl;
  379. serr << "[i] -minlength <length> Set minimum length cutoff for information sources coming after the switch. [0]" << endl;
  380. serr << endl;
  381. serr << "[i] -mapq <score> Set MapQ cutoff for information sources coming after the switch. [0]" << endl;
  382. serr << "[i] -maxedit <distance> Set maximum edit distance cutoff for information sources coming after the switch. [10000]" << endl;
  383. serr << "[i] -maxhits <num> Maximum number of allowed link hits. If a link has more hits, it is disregarded. [5]" << endl;
  384. serr << "[i] -nooverlapdeviation <num> Maximum allowed deviation from mean insert size when no overlaps are allowed. [disabled]" << endl;
  385. serr << "[i] -454 <left.fq> <right.fq> <mu> <sigma> Process 454 paired reads with insert size <mu>+/-<sigma> into linking information." << endl;
  386. serr << "[i] -illumina <left.fq> <right.fq> <mu> <sigma> Process Illumina paired reads with insert size <mu>+/-<sigma> into linking information." << endl;
  387. serr << endl;
  388. serr << "[i] -seq <reference.fa> <sigma> Process related sequences into linking information with <sigma> as standard deviation." << endl;
  389. serr << endl;
  390. serr << "[i] -readcoverage <filename> Produce contig read coverage data and output it to file <filename>. [disabled]" << endl;
  391. serr << "[i] -output <filename> Output filename for optimzation information. [output.opt]" << endl;
  392. serr << "[i] -tmp <path> Define scrap path for temporary files. [/tmp]" << endl;
  393. serr << "[i] BWA configuration options:" << endl;
  394. serr << "[i] -bwathreads <n> Number of threads used in BWA alignment. [8]" << endl;
  395. serr << "[i] -bwahits <n> Maximum number of alignment hits BWA should report. [1000]" << endl;
  396. serr << "[i] -bwaexact <yes/no> Use exact matching in BWA? [no]";
  397. serr << endl;
  398. }