/tools/sr_mapping/bwa_wrapper.xml

https://bitbucket.org/cistrome/cistrome-harvard/ · XML · 474 lines · 389 code · 44 blank · 41 comment · 0 complexity · 00a9cbfaf448329ad7eb00a6d91df57a MD5 · raw file

  1. <tool id="bwa_wrapper" name="Map with BWA for Illumina" version="1.2.2">
  2. <description></description>
  3. <parallelism method="basic"></parallelism>
  4. <command interpreter="python">
  5. bwa_wrapper.py
  6. --threads="4"
  7. #if $input1.ext == "fastqillumina":
  8. --illumina1.3
  9. #end if
  10. ## reference source
  11. --fileSource=$genomeSource.refGenomeSource
  12. #if $genomeSource.refGenomeSource == "history":
  13. ##build index on the fly
  14. --ref="${genomeSource.ownFile}"
  15. --dbkey=$dbkey
  16. #else:
  17. ##use precomputed indexes
  18. --ref="${ filter( lambda x: str( x[0] ) == str( $genomeSource.indices ), $__app__.tool_data_tables[ 'bwa_indexes' ].get_fields() )[0][-1] }"
  19. --do_not_build_index
  20. #end if
  21. ## input file(s)
  22. --input1=$paired.input1
  23. #if $paired.sPaired == "paired":
  24. --input2=$paired.input2
  25. #end if
  26. ## output file
  27. --output=$output
  28. ## run parameters
  29. --genAlignType=$paired.sPaired
  30. --params=$params.source_select
  31. #if $params.source_select != "pre_set":
  32. --maxEditDist=$params.maxEditDist
  33. --fracMissingAligns=$params.fracMissingAligns
  34. --maxGapOpens=$params.maxGapOpens
  35. --maxGapExtens=$params.maxGapExtens
  36. --disallowLongDel=$params.disallowLongDel
  37. --disallowIndel=$params.disallowIndel
  38. --seed=$params.seed
  39. --maxEditDistSeed=$params.maxEditDistSeed
  40. --mismatchPenalty=$params.mismatchPenalty
  41. --gapOpenPenalty=$params.gapOpenPenalty
  42. --gapExtensPenalty=$params.gapExtensPenalty
  43. --suboptAlign=$params.suboptAlign
  44. --noIterSearch=$params.noIterSearch
  45. --outputTopN=$params.outputTopN
  46. --outputTopNDisc=$params.outputTopNDisc
  47. --maxInsertSize=$params.maxInsertSize
  48. --maxOccurPairing=$params.maxOccurPairing
  49. #if $params.readGroup.specReadGroup == "yes"
  50. --rgid="$params.readGroup.rgid"
  51. --rgcn="$params.readGroup.rgcn"
  52. --rgds="$params.readGroup.rgds"
  53. --rgdt="$params.readGroup.rgdt"
  54. --rgfo="$params.readGroup.rgfo"
  55. --rgks="$params.readGroup.rgks"
  56. --rglb="$params.readGroup.rglb"
  57. --rgpg="$params.readGroup.rgpg"
  58. --rgpi="$params.readGroup.rgpi"
  59. --rgpl="$params.readGroup.rgpl"
  60. --rgpu="$params.readGroup.rgpu"
  61. --rgsm="$params.readGroup.rgsm"
  62. #end if
  63. #end if
  64. ## suppress output SAM header
  65. --suppressHeader=$suppressHeader
  66. </command>
  67. <requirements>
  68. <requirement type="package">bwa</requirement>
  69. </requirements>
  70. <inputs>
  71. <conditional name="genomeSource">
  72. <param name="refGenomeSource" type="select" label="Will you select a reference genome from your history or use a built-in index?">
  73. <option value="indexed">Use a built-in index</option>
  74. <option value="history">Use one from the history</option>
  75. </param>
  76. <when value="indexed">
  77. <param name="indices" type="select" label="Select a reference genome">
  78. <options from_data_table="bwa_indexes">
  79. <filter type="sort_by" column="2" />
  80. <validator type="no_options" message="No indexes are available" />
  81. </options>
  82. </param>
  83. </when>
  84. <when value="history">
  85. <param name="ownFile" type="data" format="fasta" metadata_name="dbkey" label="Select a reference from history" />
  86. </when>
  87. </conditional>
  88. <conditional name="paired">
  89. <param name="sPaired" type="select" label="Is this library mate-paired?">
  90. <option value="single">Single-end</option>
  91. <option value="paired">Paired-end</option>
  92. </param>
  93. <when value="single">
  94. <param name="input1" type="data" format="fastqsanger,fastqillumina" label="FASTQ file" help="FASTQ with either Sanger-scaled quality values (fastqsanger) or Illumina-scaled quality values (fastqillumina)" />
  95. </when>
  96. <when value="paired">
  97. <param name="input1" type="data" format="fastqsanger,fastqillumina" label="Forward FASTQ file" help="FASTQ with either Sanger-scaled quality values (fastqsanger) or Illumina-scaled quality values (fastqillumina)" />
  98. <param name="input2" type="data" format="fastqsanger,fastqillumina" label="Reverse FASTQ file" help="FASTQ with either Sanger-scaled quality values (fastqsanger) or Illumina-scaled quality values (fastqillumina)" />
  99. </when>
  100. </conditional>
  101. <conditional name="params">
  102. <param name="source_select" type="select" label="BWA settings to use" help="For most mapping needs use Commonly Used settings. If you want full control use Full Parameter List">
  103. <option value="pre_set">Commonly Used</option>
  104. <option value="full">Full Parameter List</option>
  105. </param>
  106. <when value="pre_set" />
  107. <when value="full">
  108. <param name="maxEditDist" type="integer" value="0" label="Maximum edit distance (aln -n)" help="Enter this value OR a fraction of missing alignments, not both" />
  109. <param name="fracMissingAligns" type="float" value="0.04" label="Fraction of missing alignments given 2% uniform base error rate (aln -n)" help="Enter this value OR maximum edit distance, not both" />
  110. <param name="maxGapOpens" type="integer" value="1" label="Maximum number of gap opens (aln -o)" />
  111. <param name="maxGapExtens" type="integer" value="-1" label="Maximum number of gap extensions (aln -e)" help="-1 for k-difference mode (disallowing long gaps)" />
  112. <param name="disallowLongDel" type="integer" value="16" label="Disallow long deletion within [value] bp towards the 3'-end (aln -d)" />
  113. <param name="disallowIndel" type="integer" value="5" label="Disallow insertion/deletion within [value] bp towards the end (aln -i)" />
  114. <param name="seed" type="integer" value="-1" label="Number of first subsequences to take as seed (aln -l)" help="Enter -1 for infinity" />
  115. <param name="maxEditDistSeed" type="integer" value="2" label="Maximum edit distance in the seed (aln -k)" />
  116. <param name="mismatchPenalty" type="integer" value="3" label="Mismatch penalty (aln -M)" help="BWA will not search for suboptimal hits with a score lower than [value]" />
  117. <param name="gapOpenPenalty" type="integer" value="11" label="Gap open penalty (aln -O)" />
  118. <param name="gapExtensPenalty" type="integer" value="4" label="Gap extension penalty (aln -E)" />
  119. <param name="suboptAlign" type="boolean" truevalue="true" falsevalue="false" checked="no" label="Proceed with suboptimal alignments even if the top hit is a repeat (aln -R)" help="For paired-end reads only. By default, BWA only searches for suboptimal alignments if the top hit is unique. Using this option has no effect on accuracy for single-end reads. It is mainly designed for improving the alignment accuracy of paired-end reads. However, the pairing procedure will be slowed down, especially for very short reads (~32bp)" />
  120. <param name="noIterSearch" type="boolean" truevalue="true" falsevalue="false" checked="no" label="Disable iterative search (aln -N)" help="All hits with no more than maxDiff differences will be found. This mode is much slower than the default" />
  121. <param name="outputTopN" type="integer" value="3" label="Maximum number of alignments to output in the XA tag for reads paired properly (samse/sampe -n)" help="If a read has more than INT hits, the XA tag will not be written" />
  122. <param name="outputTopNDisc" type="integer" value="10" label="Maximum number of alignments to output in the XA tag for disconcordant read pairs (excluding singletons) (sampe -N)" help="For paired-end reads only. If a read has more than INT hits, the XA tag will not be written" />
  123. <param name="maxInsertSize" type="integer" value="500" label="Maximum insert size for a read pair to be considered as being mapped properly (sampe -a)" help="For paired-end reads only. Only used when there are not enough good alignments to infer the distribution of insert sizes" />
  124. <param name="maxOccurPairing" type="integer" value="100000" label="Maximum occurrences of a read for pairing (sampe -o)" help="For paired-end reads only. A read with more occurrences will be treated as a single-end read. Reducing this parameter helps faster pairing" />
  125. <conditional name="readGroup">
  126. <param name="specReadGroup" type="select" label="Specify the read group for this file? (samse/sampe -r)">
  127. <option value="yes">Yes</option>
  128. <option value="no" selected="True">No</option>
  129. </param>
  130. <when value="yes">
  131. <param name="rgid" type="text" size="25" label="Read group identi?er (ID). Each @RG line must have a unique ID. The value of ID is used in the RG
  132. tags of alignment records. Must be unique among all read groups in header section." help="Required if RG specified. Read group
  133. IDs may be modi?ed when merging SAM ?les in order to handle collisions." />
  134. <param name="rgcn" type="text" size="25" label="Sequencing center that produced the read (CN)" help="Optional" />
  135. <param name="rgds" type="text" size="25" label="Description (DS)" help="Optional" />
  136. <param name="rgdt" type="text" size="25" label="Date that run was produced (DT)" help="Optional. ISO8601 format date or date/time, like YYYY-MM-DD" />
  137. <param name="rgfo" type="text" size="25" label="Flow order (FO). The array of nucleotide bases that correspond to the nucleotides used for each
  138. ?ow of each read." help="Optional. Multi-base ?ows are encoded in IUPAC format, and non-nucleotide ?ows by
  139. various other characters. Format : /\*|[ACMGRSVTWYHKDBN]+/" />
  140. <param name="rgks" type="text" size="25" label="The array of nucleotide bases that correspond to the key sequence of each read (KS)" help="Optional" />
  141. <param name="rglb" type="text" size="25" label="Library name (LB)" help="Required if RG specified" />
  142. <param name="rgpg" type="text" size="25" label="Programs used for processing the read group (PG)" help="Optional" />
  143. <param name="rgpi" type="text" size="25" label="Predicted median insert size (PI)" help="Optional" />
  144. <param name="rgpl" type="text" size="25" label="Platform/technology used to produce the reads (PL)" help="Required if RG specified. Valid values : CAPILLARY, LS454, ILLUMINA,
  145. SOLID, HELICOS, IONTORRENT and PACBIO" />
  146. <param name="rgpu" type="text" size="25" label="Platform unit (PU)" help="Optional. Unique identi?er (e.g. ?owcell-barcode.lane for Illumina or slide for SOLiD)" />
  147. <param name="rgsm" type="text" size="25" label="Sample (SM)" help="Required if RG specified. Use pool name where a pool is being sequenced" />
  148. </when>
  149. <when value="no" />
  150. </conditional>
  151. </when>
  152. </conditional>
  153. <param name="suppressHeader" type="boolean" truevalue="true" falsevalue="false" checked="False" label="Suppress the header in the output SAM file" help="BWA produces SAM with several lines of header information" />
  154. </inputs>
  155. <outputs>
  156. <data format="sam" name="output" label="${tool.name} on ${on_string}: mapped reads">
  157. <actions>
  158. <conditional name="genomeSource.refGenomeSource">
  159. <when value="indexed">
  160. <action type="metadata" name="dbkey">
  161. <option type="from_data_table" name="bwa_indexes" column="1">
  162. <filter type="param_value" column="0" value="#" compare="startswith" keep="False"/>
  163. <filter type="param_value" ref="genomeSource.indices" column="0"/>
  164. </option>
  165. </action>
  166. </when>
  167. <when value="history">
  168. <action type="metadata" name="dbkey">
  169. <option type="from_param" name="genomeSource.ownFile" param_attribute="dbkey" />
  170. </action>
  171. </when>
  172. </conditional>
  173. </actions>
  174. </data>
  175. </outputs>
  176. <tests>
  177. <test>
  178. <!--
  179. BWA commands:
  180. bwa aln -t 4 phiX.fasta test-data/bwa_wrapper_in1.fastqsanger > bwa_wrapper_out1.sai
  181. bwa samse phiX.fasta bwa_wrapper_out1.sai test-data/bwa_wrapper_in1.fastqsanger > bwa_wrapper_out1.sam
  182. phiX.fasta is the prefix for the reference files (phiX.fasta.amb, phiX.fasta.ann, phiX.fasta.bwt, ...)
  183. remove the comment lines (beginning with '@') from the resulting sam file
  184. plain old sort doesn't handle underscores like python:
  185. python -c "import sys; lines=file(sys.argv[1],'rb').readlines(); lines.sort(); file(sys.argv[2],'wb').write(''.join(lines))" bwa_wrapper_out1.u.sam bwa_wrapper_out1.sam
  186. -->
  187. <param name="refGenomeSource" value="indexed" />
  188. <param name="indices" value="phiX" />
  189. <param name="sPaired" value="single" />
  190. <param name="input1" value="bwa_wrapper_in1.fastqsanger" ftype="fastqsanger" />
  191. <param name="source_select" value="pre_set" />
  192. <param name="suppressHeader" value="true" />
  193. <output name="output" file="bwa_wrapper_out1.sam" ftype="sam" sort="True" />
  194. </test>
  195. <test>
  196. <!--
  197. BWA commands:
  198. cp test-data/phiX.fasta phiX.fasta
  199. bwa index -a is phiX.fasta
  200. bwa aln -n 0.04 -o 1 -e -1 -d 16 -i 5 -k 2 -t 4 -M 3 -O 11 -E 4 -R -N phiX.fasta test-data/bwa_wrapper_in1.fastqsanger > bwa_wrapper_out2.sai
  201. bwa samse -n 3 phiX.fasta bwa_wrapper_out2.sai test-data/bwa_wrapper_in1.fastqsanger > bwa_wrapper_out2.u.sam
  202. phiX.fasta is the prefix for the reference files (phiX.fasta.amb, phiX.fasta.ann, phiX.fasta.bwt, ...)
  203. remove the comment lines (beginning with '@') from the resulting sam file
  204. plain old sort doesn't handle underscores like python:
  205. python -c "import sys; lines=file(sys.argv[1],'rb').readlines(); lines.sort(); file(sys.argv[2],'wb').write(''.join(lines))" bwa_wrapper_out2.u.sam bwa_wrapper_out2.sam
  206. -->
  207. <param name="refGenomeSource" value="history" />
  208. <param name="ownFile" value="phiX.fasta" />
  209. <param name="sPaired" value="single" />
  210. <param name="input1" value="bwa_wrapper_in1.fastqsanger" ftype="fastqsanger" />
  211. <param name="source_select" value="full" />
  212. <param name="maxEditDist" value="0" />
  213. <param name="fracMissingAligns" value="0.04" />
  214. <param name="maxGapOpens" value="1" />
  215. <param name="maxGapExtens" value="-1" />
  216. <param name="disallowLongDel" value="16" />
  217. <param name="disallowIndel" value="5" />
  218. <param name="seed" value="-1" />
  219. <param name="maxEditDistSeed" value="2" />
  220. <param name="mismatchPenalty" value="3" />
  221. <param name="gapOpenPenalty" value="11" />
  222. <param name="gapExtensPenalty" value="4" />
  223. <param name="suboptAlign" value="true" />
  224. <param name="noIterSearch" value="true" />
  225. <param name="outputTopN" value="3" />
  226. <param name="outputTopNDisc" value="10" />
  227. <param name="maxInsertSize" value="500" />
  228. <param name="maxOccurPairing" value="100000" />
  229. <param name="specReadGroup" value="no" />
  230. <param name="suppressHeader" value="true" />
  231. <output name="output" file="bwa_wrapper_out2.sam" ftype="sam" sort="True" />
  232. </test>
  233. <test>
  234. <!--
  235. BWA commands:
  236. bwa aln -n 0.04 -o 1 -e -1 -d 16 -i 5 -k 2 -t 4 -M 3 -O 11 -E 4 -R -N phiX.fasta test-data/bwa_wrapper_in2.fastqsanger > bwa_wrapper_out3a.sai
  237. bwa aln -n 0.04 -o 1 -e -1 -d 16 -i 5 -k 2 -t 4 -M 3 -O 11 -E 4 -R -N phiX.fasta test-data/bwa_wrapper_in3.fastqsanger > bwa_wrapper_out3b.sai
  238. bwa sampe -a 500 -o 100000 -n 3 -N 10 -r "@RG\tID:abcdefg\tDS:descrip\tDT:2010-11-01\tLB:lib-mom-A\tPI:400\tPL:ILLUMINA\tSM:mom" phiX.fasta bwa_wrapper_out3a.sai bwa_wrapper_out3b.sai test-data/bwa_wrapper_in2.fastqsanger test-data/bwa_wrapper_in3.fastqsanger > bwa_wrapper_out3.u.sam
  239. phiX.fasta is the prefix for the reference
  240. plain old sort doesn't handle underscores like python:
  241. python -c "import sys; lines=file(sys.argv[1],'rb').readlines(); lines.sort(); file(sys.argv[2],'wb').write(''.join(lines))" bwa_wrapper_out3.u.sam bwa_wrapper_out3.sam
  242. -->
  243. <param name="refGenomeSource" value="indexed" />
  244. <param name="indices" value="phiX" />
  245. <param name="sPaired" value="paired" />
  246. <param name="input1" value="bwa_wrapper_in2.fastqsanger" ftype="fastqsanger" />
  247. <param name="input2" value="bwa_wrapper_in3.fastqsanger" ftype="fastqsanger" />
  248. <param name="source_select" value="full" />
  249. <param name="maxEditDist" value="0" />
  250. <param name="fracMissingAligns" value="0.04" />
  251. <param name="maxGapOpens" value="1" />
  252. <param name="maxGapExtens" value="-1" />
  253. <param name="disallowLongDel" value="16" />
  254. <param name="disallowIndel" value="5" />
  255. <param name="seed" value="-1" />
  256. <param name="maxEditDistSeed" value="2" />
  257. <param name="mismatchPenalty" value="3" />
  258. <param name="gapOpenPenalty" value="11" />
  259. <param name="gapExtensPenalty" value="4" />
  260. <param name="suboptAlign" value="true" />
  261. <param name="noIterSearch" value="true" />
  262. <param name="outputTopN" value="3" />
  263. <param name="outputTopNDisc" value="10" />
  264. <param name="maxInsertSize" value="500" />
  265. <param name="maxOccurPairing" value="100000" />
  266. <param name="specReadGroup" value="yes" />
  267. <param name="rgid" value="abcdefg" />
  268. <param name="rgcn" value="" />
  269. <param name="rgds" value="descrip" />
  270. <param name="rgdt" value="2010-11-01" />
  271. <param name="rgfo" value="" />
  272. <param name="rgks" value="" />
  273. <param name="rglb" value="lib-mom-A" />
  274. <param name="rgpg" value="" />
  275. <param name="rgpi" value="400" />
  276. <param name="rgpl" value="ILLUMINA" />
  277. <param name="rgpu" value="" />
  278. <param name="rgsm" value="mom" />
  279. <param name="suppressHeader" value="false" />
  280. <output name="output" file="bwa_wrapper_out3.sam" ftype="sam" sort="True" lines_diff="2" />
  281. </test>
  282. <test>
  283. <!--
  284. BWA commands:
  285. cp test-data/phiX.fasta phiX.fasta
  286. bwa index -a is phiX.fasta
  287. bwa aln -t 4 phiX.fasta test-data/bwa_wrapper_in2.fastqsanger > bwa_wrapper_out8a.sai
  288. bwa aln -t 4 phiX.fasta test-data/bwa_wrapper_in3.fastqsanger > bwa_wrapper_out8b.sai
  289. bwa sampe -a 500 -o 100000 phiX.fasta bwa_wrapper_out8a.sai bwa_wrapper_out8b.sai test-data/bwa_wrapper_in2.fastqsanger test-data/bwa_wrapper_in3.fastqsanger > bwa_wrapper_out8.u.sam
  290. phiX.fa is the prefix for the reference
  291. remove the comment lines (beginning with '@') from the resulting sam file
  292. python -c "import sys; lines=file(sys.argv[1],'rb').readlines(); lines.sort(); file(sys.argv[2],'wb').write(''.join(lines))" bwa_wrapper_out8.u.sam bwa_wrapper_out8.sam
  293. -->
  294. <param name="refGenomeSource" value="history" />
  295. <!-- this is the backwards-compatible "unique value" for this index, not an actual path -->
  296. <param name="ownFile" value="phiX.fasta" />
  297. <param name="sPaired" value="paired" />
  298. <param name="input1" value="bwa_wrapper_in2.fastqsanger" ftype="fastqsanger" />
  299. <param name="input2" value="bwa_wrapper_in3.fastqsanger" ftype="fastqsanger" />
  300. <param name="source_select" value="preSet" />
  301. <param name="suppressHeader" value="true" />
  302. <output name="output" file="bwa_wrapper_out8.sam" ftype="sam" sort="True" />
  303. </test>
  304. </tests>
  305. <help>
  306. **What it does**
  307. BWA is a fast light-weighted tool that aligns relatively short sequences (queries) to a sequence database (large), such as the human reference genome. It is developed by Heng Li at the Sanger Insitute. Li H. and Durbin R. (2009) Fast and accurate short read alignment with Burrows-Wheeler transform. Bioinformatics, 25, 1754-60.
  308. ------
  309. **Know what you are doing**
  310. .. class:: warningmark
  311. There is no such thing (yet) as an automated gearshift in short read mapping. It is all like stick-shift driving in San Francisco. In other words = running this tool with default parameters will probably not give you meaningful results. A way to deal with this is to **understand** the parameters by carefully reading the `documentation`__ and experimenting. Fortunately, Galaxy makes experimenting easy.
  312. .. __: http://bio-bwa.sourceforge.net/
  313. ------
  314. **Input formats**
  315. BWA accepts files in either Sanger FASTQ format (galaxy type *fastqsanger*) or Illumina FASTQ format (galaxy type *fastqillumina*). Use the FASTQ Groomer to prepare your files.
  316. ------
  317. **A Note on Built-in Reference Genomes**
  318. Some genomes have multiple variants. If only one "type" of genome is listed, it is the Full version, which means that everything that came in the original genome data download (possibly with mitochondrial and plasmid DNA added if it wasn't already included). The Full version is available for every genome. Some genomes also come in the Canonical variant, which contains only the "canonical" (well-defined) chromosomes or segments, such as chr1-chr22, chrX, chrY, and chrM for human. Other variations include gender. These will come in the canonical form only, so the general Canonical variant is actually Canonical Female and the other is Canonical Male (identical to female excluding chrX).
  319. ------
  320. **Outputs**
  321. The output is in SAM format, and has the following columns::
  322. Column Description
  323. -------- --------------------------------------------------------
  324. 1 QNAME Query (pair) NAME
  325. 2 FLAG bitwise FLAG
  326. 3 RNAME Reference sequence NAME
  327. 4 POS 1-based leftmost POSition/coordinate of clipped sequence
  328. 5 MAPQ MAPping Quality (Phred-scaled)
  329. 6 CIGAR extended CIGAR string
  330. 7 MRNM Mate Reference sequence NaMe ('=' if same as RNAME)
  331. 8 MPOS 1-based Mate POSition
  332. 9 ISIZE Inferred insert SIZE
  333. 10 SEQ query SEQuence on the same strand as the reference
  334. 11 QUAL query QUALity (ASCII-33 gives the Phred base quality)
  335. 12 OPT variable OPTional fields in the format TAG:VTYPE:VALU
  336. The flags are as follows::
  337. Flag Description
  338. ------ -------------------------------------
  339. 0x0001 the read is paired in sequencing
  340. 0x0002 the read is mapped in a proper pair
  341. 0x0004 the query sequence itself is unmapped
  342. 0x0008 the mate is unmapped
  343. 0x0010 strand of the query (1 for reverse)
  344. 0x0020 strand of the mate
  345. 0x0040 the read is the first read in a pair
  346. 0x0080 the read is the second read in a pair
  347. 0x0100 the alignment is not primary
  348. It looks like this (scroll sideways to see the entire example)::
  349. QNAME FLAG RNAME POS MAPQ CIAGR MRNM MPOS ISIZE SEQ QUAL OPT
  350. HWI-EAS91_1_30788AAXX:1:1:1761:343 4 * 0 0 * * 0 0 AAAAAAANNAAAAAAAAAAAAAAAAAAAAAAAAAAACNNANNGAGTNGNNNNNNNGCTTCCCACAGNNCTGG hhhhhhh;;hhhhhhhhhhh^hOhhhhghhhfhhhgh;;h;;hhhh;h;;;;;;;hhhhhhghhhh;;Phhh
  351. HWI-EAS91_1_30788AAXX:1:1:1578:331 4 * 0 0 * * 0 0 GTATAGANNAATAAGAAAAAAAAAAATGAAGACTTTCNNANNTCTGNANNNNNNNTCTTTTTTCAGNNGTAG hhhhhhh;;hhhhhhhhhhhhhhhhhhhhhhhhhhhh;;h;;hhhh;h;;;;;;;hhhhhhhhhhh;;hhVh
  352. -------
  353. **BWA settings**
  354. All of the options have a default value. You can change any of them. All of the options in BWA have been implemented here.
  355. ------
  356. **BWA parameter list**
  357. This is an exhaustive list of BWA options:
  358. For **aln**::
  359. -n NUM Maximum edit distance if the value is INT, or the fraction of missing
  360. alignments given 2% uniform base error rate if FLOAT. In the latter
  361. case, the maximum edit distance is automatically chosen for different
  362. read lengths. [0.04]
  363. -o INT Maximum number of gap opens [1]
  364. -e INT Maximum number of gap extensions, -1 for k-difference mode
  365. (disallowing long gaps) [-1]
  366. -d INT Disallow a long deletion within INT bp towards the 3'-end [16]
  367. -i INT Disallow an indel within INT bp towards the ends [5]
  368. -l INT Take the first INT subsequence as seed. If INT is larger than the
  369. query sequence, seeding will be disabled. For long reads, this option
  370. is typically ranged from 25 to 35 for '-k 2'. [inf]
  371. -k INT Maximum edit distance in the seed [2]
  372. -t INT Number of threads (multi-threading mode) [1]
  373. -M INT Mismatch penalty. BWA will not search for suboptimal hits with a score
  374. lower than (bestScore-misMsc). [3]
  375. -O INT Gap open penalty [11]
  376. -E INT Gap extension penalty [4]
  377. -c Reverse query but not complement it, which is required for alignment
  378. in the color space.
  379. -R Proceed with suboptimal alignments even if the top hit is a repeat. By
  380. default, BWA only searches for suboptimal alignments if the top hit is
  381. unique. Using this option has no effect on accuracy for single-end
  382. reads. It is mainly designed for improving the alignment accuracy of
  383. paired-end reads. However, the pairing procedure will be slowed down,
  384. especially for very short reads (~32bp).
  385. -N Disable iterative search. All hits with no more than maxDiff
  386. differences will be found. This mode is much slower than the default.
  387. For **samse**::
  388. -n INT Maximum number of alignments to output in the XA tag for reads paired
  389. properly. If a read has more than INT hits, the XA tag will not be
  390. written. [3]
  391. -r STR Specify the read group in a format like '@RG\tID:foo\tSM:bar' [null]
  392. For **sampe**::
  393. -a INT Maximum insert size for a read pair to be considered as being mapped
  394. properly. Since version 0.4.5, this option is only used when there
  395. are not enough good alignment to infer the distribution of insert
  396. sizes. [500]
  397. -n INT Maximum number of alignments to output in the XA tag for reads paired
  398. properly. If a read has more than INT hits, the XA tag will not be
  399. written. [3]
  400. -N INT Maximum number of alignments to output in the XA tag for disconcordant
  401. read pairs (excluding singletons). If a read has more than INT hits,
  402. the XA tag will not be written. [10]
  403. -o INT Maximum occurrences of a read for pairing. A read with more
  404. occurrences will be treated as a single-end read. Reducing this
  405. parameter helps faster pairing. [100000]
  406. -r STR Specify the read group in a format like '@RG\tID:foo\tSM:bar' [null]
  407. For specifying the read group in **samse** or **sampe**, use the following::
  408. @RG Read group. Unordered multiple @RG lines are allowed.
  409. ID Read group identi?er. Each @RG line must have a unique ID. The value of
  410. ID is used in the RG tags of alignment records. Must be unique among all
  411. read groups in header section. Read group IDs may be modi?ed when
  412. merging SAM ?les in order to handle collisions.
  413. CN Name of sequencing center producing the read.
  414. DS Description.
  415. DT Date the run was produced (ISO8601 date or date/time).
  416. FO Flow order. The array of nucleotide bases that correspond to the
  417. nucleotides used for each ?ow of each read. Multi-base ?ows are encoded
  418. in IUPAC format, and non-nucleotide ?ows by various other characters.
  419. Format : /\*|[ACMGRSVTWYHKDBN]+/
  420. KS The array of nucleotide bases that correspond to the key sequence of each read.
  421. LB Library.
  422. PG Programs used for processing the read group.
  423. PI Predicted median insert size.
  424. PL Platform/technology used to produce the reads. Valid values : CAPILLARY,
  425. LS454, ILLUMINA, SOLID, HELICOS, IONTORRENT and PACBIO.
  426. PU Platform unit (e.g. ?owcell-barcode.lane for Illumina or slide for
  427. SOLiD). Unique identi?er.
  428. SM Sample. Use pool name where a pool is being sequenced.
  429. </help>
  430. </tool>