PageRenderTime 31ms CodeModel.GetById 14ms app.highlight 10ms RepoModel.GetById 2ms app.codeStats 0ms

/tools/sr_mapping/bwa_wrapper.xml

https://bitbucket.org/cistrome/cistrome-harvard/
XML | 474 lines | 389 code | 44 blank | 41 comment | 0 complexity | 00a9cbfaf448329ad7eb00a6d91df57a MD5 | raw file
  1<tool id="bwa_wrapper" name="Map with BWA for Illumina" version="1.2.2">
  2  <description></description>
  3  <parallelism method="basic"></parallelism>
  4  <command interpreter="python">
  5    bwa_wrapper.py 
  6      --threads="4"
  7
  8      #if $input1.ext == "fastqillumina":
  9            --illumina1.3
 10      #end if
 11
 12      ## reference source
 13      --fileSource=$genomeSource.refGenomeSource
 14      #if $genomeSource.refGenomeSource == "history":
 15        ##build index on the fly
 16        --ref="${genomeSource.ownFile}"
 17        --dbkey=$dbkey
 18      #else:
 19        ##use precomputed indexes
 20        --ref="${ filter( lambda x: str( x[0] ) == str( $genomeSource.indices ), $__app__.tool_data_tables[ 'bwa_indexes' ].get_fields() )[0][-1] }"
 21        --do_not_build_index
 22      #end if
 23
 24      ## input file(s)
 25      --input1=$paired.input1
 26      #if $paired.sPaired == "paired":
 27        --input2=$paired.input2
 28      #end if
 29
 30      ## output file
 31      --output=$output
 32
 33      ## run parameters
 34      --genAlignType=$paired.sPaired
 35      --params=$params.source_select
 36      #if $params.source_select != "pre_set":
 37        --maxEditDist=$params.maxEditDist
 38        --fracMissingAligns=$params.fracMissingAligns
 39        --maxGapOpens=$params.maxGapOpens
 40        --maxGapExtens=$params.maxGapExtens
 41        --disallowLongDel=$params.disallowLongDel
 42        --disallowIndel=$params.disallowIndel
 43        --seed=$params.seed
 44        --maxEditDistSeed=$params.maxEditDistSeed
 45        --mismatchPenalty=$params.mismatchPenalty
 46        --gapOpenPenalty=$params.gapOpenPenalty
 47        --gapExtensPenalty=$params.gapExtensPenalty
 48        --suboptAlign=$params.suboptAlign
 49        --noIterSearch=$params.noIterSearch
 50        --outputTopN=$params.outputTopN
 51        --outputTopNDisc=$params.outputTopNDisc
 52        --maxInsertSize=$params.maxInsertSize
 53        --maxOccurPairing=$params.maxOccurPairing
 54        #if $params.readGroup.specReadGroup == "yes"
 55          --rgid="$params.readGroup.rgid"
 56          --rgcn="$params.readGroup.rgcn"
 57          --rgds="$params.readGroup.rgds"
 58          --rgdt="$params.readGroup.rgdt"
 59          --rgfo="$params.readGroup.rgfo"
 60          --rgks="$params.readGroup.rgks"
 61          --rglb="$params.readGroup.rglb"
 62          --rgpg="$params.readGroup.rgpg"
 63          --rgpi="$params.readGroup.rgpi"
 64          --rgpl="$params.readGroup.rgpl"
 65          --rgpu="$params.readGroup.rgpu"
 66          --rgsm="$params.readGroup.rgsm"
 67        #end if
 68      #end if
 69
 70      ## suppress output SAM header
 71      --suppressHeader=$suppressHeader
 72  </command>
 73  <requirements>
 74    <requirement type="package">bwa</requirement>
 75  </requirements>
 76  <inputs>
 77    <conditional name="genomeSource">
 78      <param name="refGenomeSource" type="select" label="Will you select a reference genome from your history or use a built-in index?">
 79        <option value="indexed">Use a built-in index</option>
 80        <option value="history">Use one from the history</option>
 81      </param>
 82      <when value="indexed">
 83        <param name="indices" type="select" label="Select a reference genome">
 84          <options from_data_table="bwa_indexes">
 85            <filter type="sort_by" column="2" />
 86            <validator type="no_options" message="No indexes are available" />
 87          </options>
 88        </param>
 89      </when>
 90      <when value="history">
 91        <param name="ownFile" type="data" format="fasta" metadata_name="dbkey" label="Select a reference from history" />
 92      </when>
 93    </conditional>
 94    <conditional name="paired">
 95      <param name="sPaired" type="select" label="Is this library mate-paired?">
 96        <option value="single">Single-end</option>
 97        <option value="paired">Paired-end</option>
 98      </param>
 99      <when value="single">
100        <param name="input1" type="data" format="fastqsanger,fastqillumina" label="FASTQ file" help="FASTQ with either Sanger-scaled quality values (fastqsanger) or Illumina-scaled quality values (fastqillumina)" />
101      </when>
102      <when value="paired">
103        <param name="input1" type="data" format="fastqsanger,fastqillumina" label="Forward FASTQ file" help="FASTQ with either Sanger-scaled quality values (fastqsanger) or Illumina-scaled quality values (fastqillumina)" />
104        <param name="input2" type="data" format="fastqsanger,fastqillumina" label="Reverse FASTQ file" help="FASTQ with either Sanger-scaled quality values (fastqsanger) or Illumina-scaled quality values (fastqillumina)" />
105      </when>
106    </conditional>
107    <conditional name="params">
108      <param name="source_select" type="select" label="BWA settings to use" help="For most mapping needs use Commonly Used settings. If you want full control use Full Parameter List">
109        <option value="pre_set">Commonly Used</option>
110        <option value="full">Full Parameter List</option>
111      </param>
112      <when value="pre_set" />
113      <when value="full">
114        <param name="maxEditDist" type="integer" value="0" label="Maximum edit distance (aln -n)" help="Enter this value OR a fraction of missing alignments, not both" />
115        <param name="fracMissingAligns" type="float" value="0.04" label="Fraction of missing alignments given 2% uniform base error rate (aln -n)" help="Enter this value OR maximum edit distance, not both" />
116        <param name="maxGapOpens" type="integer" value="1" label="Maximum number of gap opens (aln -o)" />
117        <param name="maxGapExtens" type="integer" value="-1" label="Maximum number of gap extensions (aln -e)" help="-1 for k-difference mode (disallowing long gaps)" />
118        <param name="disallowLongDel" type="integer" value="16" label="Disallow long deletion within [value] bp towards the 3'-end (aln -d)" />
119        <param name="disallowIndel" type="integer" value="5" label="Disallow insertion/deletion within [value] bp towards the end (aln -i)" />
120        <param name="seed" type="integer" value="-1" label="Number of first subsequences to take as seed (aln -l)" help="Enter -1 for infinity" />
121        <param name="maxEditDistSeed" type="integer" value="2" label="Maximum edit distance in the seed (aln -k)" />
122        <param name="mismatchPenalty" type="integer" value="3" label="Mismatch penalty (aln -M)" help="BWA will not search for suboptimal hits with a score lower than [value]" />
123        <param name="gapOpenPenalty" type="integer" value="11" label="Gap open penalty (aln -O)" />
124        <param name="gapExtensPenalty" type="integer" value="4" label="Gap extension penalty (aln -E)" />
125        <param name="suboptAlign" type="boolean" truevalue="true" falsevalue="false" checked="no" label="Proceed with suboptimal alignments even if the top hit is a repeat (aln -R)" help="For paired-end reads only. By default, BWA only searches for suboptimal alignments if the top hit is unique. Using this option has no effect on accuracy for single-end reads. It is mainly designed for improving the alignment accuracy of paired-end reads. However, the pairing procedure will be slowed down, especially for very short reads (~32bp)" />
126        <param name="noIterSearch" type="boolean" truevalue="true" falsevalue="false" checked="no" label="Disable iterative search (aln -N)" help="All hits with no more than maxDiff differences will be found. This mode is much slower than the default" />
127        <param name="outputTopN" type="integer" value="3" label="Maximum number of alignments to output in the XA tag for reads paired properly (samse/sampe -n)" help="If a read has more than INT hits, the XA tag will not be written" />
128        <param name="outputTopNDisc" type="integer" value="10" label="Maximum number of alignments to output in the XA tag for disconcordant read pairs (excluding singletons) (sampe -N)" help="For paired-end reads only. If a read has more than INT hits, the XA tag will not be written" />
129        <param name="maxInsertSize" type="integer" value="500" label="Maximum insert size for a read pair to be considered as being mapped properly (sampe -a)" help="For paired-end reads only. Only used when there are not enough good alignments to infer the distribution of insert sizes" />
130        <param name="maxOccurPairing" type="integer" value="100000" label="Maximum occurrences of a read for pairing (sampe -o)" help="For paired-end reads only. A read with more occurrences will be treated as a single-end read. Reducing this parameter helps faster pairing" />
131        <conditional name="readGroup">
132          <param name="specReadGroup" type="select" label="Specify the read group for this file? (samse/sampe -r)">
133            <option value="yes">Yes</option>
134            <option value="no" selected="True">No</option>
135          </param>
136          <when value="yes">
137            <param name="rgid" type="text" size="25" label="Read group identi?er (ID). Each @RG line must have a unique ID. The value of ID is used in the RG 
138tags of alignment records. Must be unique among all read groups in header section." help="Required if RG specified. Read group 
139IDs may be modi?ed when merging SAM ?les in order to handle collisions." />
140            <param name="rgcn" type="text" size="25" label="Sequencing center that produced the read (CN)" help="Optional" />
141            <param name="rgds" type="text" size="25" label="Description (DS)" help="Optional" />
142            <param name="rgdt" type="text" size="25" label="Date that run was produced (DT)" help="Optional. ISO8601 format date or date/time, like YYYY-MM-DD" />
143            <param name="rgfo" type="text" size="25" label="Flow order (FO). The array of nucleotide bases that correspond to the nucleotides used for each 
144?ow of each read." help="Optional. Multi-base ?ows are encoded in IUPAC format, and non-nucleotide ?ows by 
145various other characters. Format : /\*|[ACMGRSVTWYHKDBN]+/" />
146            <param name="rgks" type="text" size="25" label="The array of nucleotide bases that correspond to the key sequence of each read (KS)" help="Optional" />
147            <param name="rglb" type="text" size="25" label="Library name (LB)" help="Required if RG specified" />
148            <param name="rgpg" type="text" size="25" label="Programs used for processing the read group (PG)" help="Optional" />
149            <param name="rgpi" type="text" size="25" label="Predicted median insert size (PI)" help="Optional" />
150            <param name="rgpl" type="text" size="25" label="Platform/technology used to produce the reads (PL)" help="Required if RG specified. Valid values : CAPILLARY, LS454, ILLUMINA, 
151SOLID, HELICOS, IONTORRENT and PACBIO" />
152            <param name="rgpu" type="text" size="25" label="Platform unit (PU)" help="Optional. Unique identi?er (e.g. ?owcell-barcode.lane for Illumina or slide for SOLiD)" />
153            <param name="rgsm" type="text" size="25" label="Sample (SM)" help="Required if RG specified. Use pool name where a pool is being sequenced" />
154          </when>
155          <when value="no" />
156        </conditional>
157      </when>
158    </conditional>
159    <param name="suppressHeader" type="boolean" truevalue="true" falsevalue="false" checked="False" label="Suppress the header in the output SAM file" help="BWA produces SAM with several lines of header information" />
160  </inputs>
161  <outputs>
162    <data format="sam" name="output" label="${tool.name} on ${on_string}: mapped reads">
163      <actions>
164        <conditional name="genomeSource.refGenomeSource">
165          <when value="indexed">
166            <action type="metadata" name="dbkey">
167              <option type="from_data_table" name="bwa_indexes" column="1">
168                <filter type="param_value" column="0" value="#" compare="startswith" keep="False"/>
169                <filter type="param_value" ref="genomeSource.indices" column="0"/>
170              </option>
171            </action>
172          </when>
173          <when value="history">
174            <action type="metadata" name="dbkey">
175              <option type="from_param" name="genomeSource.ownFile" param_attribute="dbkey" />
176            </action>
177          </when>
178        </conditional>
179      </actions>
180    </data>
181  </outputs>
182  <tests>
183    <test>
184      <!--
185      BWA commands:
186      bwa aln -t 4 phiX.fasta test-data/bwa_wrapper_in1.fastqsanger > bwa_wrapper_out1.sai
187      bwa samse phiX.fasta bwa_wrapper_out1.sai test-data/bwa_wrapper_in1.fastqsanger > bwa_wrapper_out1.sam
188      phiX.fasta is the prefix for the reference files (phiX.fasta.amb, phiX.fasta.ann, phiX.fasta.bwt, ...)
189      remove the comment lines (beginning with '@') from the resulting sam file
190      plain old sort doesn't handle underscores like python:
191      python -c "import sys; lines=file(sys.argv[1],'rb').readlines(); lines.sort(); file(sys.argv[2],'wb').write(''.join(lines))" bwa_wrapper_out1.u.sam bwa_wrapper_out1.sam
192      -->
193      <param name="refGenomeSource" value="indexed" />
194      <param name="indices" value="phiX" />
195      <param name="sPaired" value="single" />
196      <param name="input1" value="bwa_wrapper_in1.fastqsanger" ftype="fastqsanger" />
197      <param name="source_select" value="pre_set" />
198      <param name="suppressHeader" value="true" />
199      <output name="output" file="bwa_wrapper_out1.sam" ftype="sam" sort="True" />
200    </test>
201    <test>
202      <!--
203      BWA commands:
204      cp test-data/phiX.fasta phiX.fasta
205      bwa index -a is phiX.fasta
206      bwa aln -n 0.04 -o 1 -e -1 -d 16 -i 5 -k 2 -t 4 -M 3 -O 11 -E 4 -R -N phiX.fasta test-data/bwa_wrapper_in1.fastqsanger > bwa_wrapper_out2.sai
207      bwa samse -n 3 phiX.fasta bwa_wrapper_out2.sai test-data/bwa_wrapper_in1.fastqsanger > bwa_wrapper_out2.u.sam
208      phiX.fasta is the prefix for the reference files (phiX.fasta.amb, phiX.fasta.ann, phiX.fasta.bwt, ...)
209      remove the comment lines (beginning with '@') from the resulting sam file
210      plain old sort doesn't handle underscores like python:
211      python -c "import sys; lines=file(sys.argv[1],'rb').readlines(); lines.sort(); file(sys.argv[2],'wb').write(''.join(lines))" bwa_wrapper_out2.u.sam bwa_wrapper_out2.sam
212      -->
213      <param name="refGenomeSource" value="history" />
214      <param name="ownFile" value="phiX.fasta" />
215      <param name="sPaired" value="single" />
216      <param name="input1" value="bwa_wrapper_in1.fastqsanger" ftype="fastqsanger" />
217      <param name="source_select" value="full" />
218      <param name="maxEditDist" value="0" />  
219      <param name="fracMissingAligns" value="0.04" />
220      <param name="maxGapOpens" value="1" />
221      <param name="maxGapExtens" value="-1" />
222      <param name="disallowLongDel" value="16" />
223      <param name="disallowIndel" value="5" />
224      <param name="seed" value="-1" />
225      <param name="maxEditDistSeed" value="2" />
226      <param name="mismatchPenalty" value="3" />
227      <param name="gapOpenPenalty" value="11" />
228      <param name="gapExtensPenalty" value="4" />
229      <param name="suboptAlign" value="true" />
230      <param name="noIterSearch" value="true" />
231      <param name="outputTopN" value="3" />
232      <param name="outputTopNDisc" value="10" />
233      <param name="maxInsertSize" value="500" />
234      <param name="maxOccurPairing" value="100000" />
235      <param name="specReadGroup" value="no" />
236      <param name="suppressHeader" value="true" />
237      <output name="output" file="bwa_wrapper_out2.sam" ftype="sam" sort="True" />
238    </test>
239    <test>
240      <!--
241      BWA commands:
242      bwa aln -n 0.04 -o 1 -e -1 -d 16 -i 5 -k 2 -t 4 -M 3 -O 11 -E 4 -R -N phiX.fasta test-data/bwa_wrapper_in2.fastqsanger > bwa_wrapper_out3a.sai
243      bwa aln -n 0.04 -o 1 -e -1 -d 16 -i 5 -k 2 -t 4 -M 3 -O 11 -E 4 -R -N phiX.fasta test-data/bwa_wrapper_in3.fastqsanger > bwa_wrapper_out3b.sai
244      bwa sampe -a 500 -o 100000 -n 3 -N 10 -r "@RG\tID:abcdefg\tDS:descrip\tDT:2010-11-01\tLB:lib-mom-A\tPI:400\tPL:ILLUMINA\tSM:mom" phiX.fasta bwa_wrapper_out3a.sai bwa_wrapper_out3b.sai test-data/bwa_wrapper_in2.fastqsanger test-data/bwa_wrapper_in3.fastqsanger > bwa_wrapper_out3.u.sam
245      phiX.fasta is the prefix for the reference
246      plain old sort doesn't handle underscores like python:
247      python -c "import sys; lines=file(sys.argv[1],'rb').readlines(); lines.sort(); file(sys.argv[2],'wb').write(''.join(lines))" bwa_wrapper_out3.u.sam bwa_wrapper_out3.sam
248      -->
249      <param name="refGenomeSource" value="indexed" />
250      <param name="indices" value="phiX" />
251      <param name="sPaired" value="paired" />
252      <param name="input1" value="bwa_wrapper_in2.fastqsanger" ftype="fastqsanger" />
253      <param name="input2" value="bwa_wrapper_in3.fastqsanger" ftype="fastqsanger" />
254      <param name="source_select" value="full" />
255      <param name="maxEditDist" value="0" />
256      <param name="fracMissingAligns" value="0.04" />
257      <param name="maxGapOpens" value="1" />
258      <param name="maxGapExtens" value="-1" />
259      <param name="disallowLongDel" value="16" />
260      <param name="disallowIndel" value="5" />
261      <param name="seed" value="-1" />
262      <param name="maxEditDistSeed" value="2" />
263      <param name="mismatchPenalty" value="3" />
264      <param name="gapOpenPenalty" value="11" />
265      <param name="gapExtensPenalty" value="4" />
266      <param name="suboptAlign" value="true" />
267      <param name="noIterSearch" value="true" />
268      <param name="outputTopN" value="3" />
269      <param name="outputTopNDisc" value="10" />
270      <param name="maxInsertSize" value="500" />
271      <param name="maxOccurPairing" value="100000" />
272      <param name="specReadGroup" value="yes" />
273      <param name="rgid" value="abcdefg" />
274      <param name="rgcn" value="" />
275      <param name="rgds" value="descrip" />
276      <param name="rgdt" value="2010-11-01" />
277      <param name="rgfo" value="" />
278      <param name="rgks" value="" />
279      <param name="rglb" value="lib-mom-A" />
280      <param name="rgpg" value="" />
281      <param name="rgpi" value="400" />
282      <param name="rgpl" value="ILLUMINA" />
283      <param name="rgpu" value="" />
284      <param name="rgsm" value="mom" />
285      <param name="suppressHeader" value="false" />
286      <output name="output" file="bwa_wrapper_out3.sam" ftype="sam" sort="True" lines_diff="2" />
287    </test>
288    <test>
289      <!--
290      BWA commands:
291      cp test-data/phiX.fasta phiX.fasta
292      bwa index -a is phiX.fasta
293      bwa aln -t 4 phiX.fasta test-data/bwa_wrapper_in2.fastqsanger > bwa_wrapper_out8a.sai
294      bwa aln -t 4 phiX.fasta test-data/bwa_wrapper_in3.fastqsanger > bwa_wrapper_out8b.sai
295      bwa sampe -a 500 -o 100000 phiX.fasta bwa_wrapper_out8a.sai bwa_wrapper_out8b.sai test-data/bwa_wrapper_in2.fastqsanger test-data/bwa_wrapper_in3.fastqsanger > bwa_wrapper_out8.u.sam
296      phiX.fa is the prefix for the reference
297      remove the comment lines (beginning with '@') from the resulting sam file
298      python -c "import sys; lines=file(sys.argv[1],'rb').readlines(); lines.sort(); file(sys.argv[2],'wb').write(''.join(lines))" bwa_wrapper_out8.u.sam bwa_wrapper_out8.sam
299      -->
300      <param name="refGenomeSource" value="history" />
301      <!-- this is the backwards-compatible "unique value" for this index, not an actual path -->
302      <param name="ownFile" value="phiX.fasta" />
303      <param name="sPaired" value="paired" />
304      <param name="input1" value="bwa_wrapper_in2.fastqsanger" ftype="fastqsanger" />
305      <param name="input2" value="bwa_wrapper_in3.fastqsanger" ftype="fastqsanger" />
306      <param name="source_select" value="preSet" />
307      <param name="suppressHeader" value="true" />
308      <output name="output" file="bwa_wrapper_out8.sam" ftype="sam" sort="True" />
309    </test>
310  </tests> 
311  <help>
312
313**What it does**
314
315BWA is a fast light-weighted tool that aligns relatively short sequences (queries) to a sequence database (large), such as the human reference genome. It is developed by Heng Li at the Sanger Insitute. Li H. and Durbin R. (2009) Fast and accurate short read alignment with Burrows-Wheeler transform. Bioinformatics, 25, 1754-60. 
316
317------
318
319**Know what you are doing**
320
321.. class:: warningmark
322
323There is no such thing (yet) as an automated gearshift in short read mapping. It is all like stick-shift driving in San Francisco. In other words = running this tool with default parameters will probably not give you meaningful results. A way to deal with this is to **understand** the parameters by carefully reading the `documentation`__ and experimenting. Fortunately, Galaxy makes experimenting easy.
324
325 .. __: http://bio-bwa.sourceforge.net/
326
327------
328
329**Input formats**
330
331BWA accepts files in either Sanger FASTQ format (galaxy type *fastqsanger*) or Illumina FASTQ format (galaxy type *fastqillumina*). Use the FASTQ Groomer to prepare your files.
332
333------
334
335**A Note on Built-in Reference Genomes**
336
337Some genomes have multiple variants. If only one "type" of genome is listed, it is the Full version, which means that everything that came in the original genome data download (possibly with mitochondrial and plasmid DNA added if it wasn't already included). The Full version is available for every genome. Some genomes also come in the Canonical variant, which contains only the "canonical" (well-defined) chromosomes or segments, such as chr1-chr22, chrX, chrY, and chrM for human. Other variations include gender. These will come in the canonical form only, so the general Canonical variant is actually Canonical Female and the other is Canonical Male (identical to female excluding chrX).
338
339------
340
341**Outputs**
342
343The output is in SAM format, and has the following columns::
344
345    Column  Description
346  --------  --------------------------------------------------------
347  1  QNAME  Query (pair) NAME
348  2  FLAG   bitwise FLAG
349  3  RNAME  Reference sequence NAME
350  4  POS    1-based leftmost POSition/coordinate of clipped sequence
351  5  MAPQ   MAPping Quality (Phred-scaled)
352  6  CIGAR  extended CIGAR string
353  7  MRNM   Mate Reference sequence NaMe ('=' if same as RNAME)
354  8  MPOS   1-based Mate POSition
355  9  ISIZE  Inferred insert SIZE
356  10 SEQ    query SEQuence on the same strand as the reference
357  11 QUAL   query QUALity (ASCII-33 gives the Phred base quality)
358  12 OPT    variable OPTional fields in the format TAG:VTYPE:VALU
359  
360The flags are as follows::
361
362    Flag  Description
363  ------  -------------------------------------
364  0x0001  the read is paired in sequencing
365  0x0002  the read is mapped in a proper pair
366  0x0004  the query sequence itself is unmapped
367  0x0008  the mate is unmapped
368  0x0010  strand of the query (1 for reverse)
369  0x0020  strand of the mate
370  0x0040  the read is the first read in a pair
371  0x0080  the read is the second read in a pair
372  0x0100  the alignment is not primary
373
374It looks like this (scroll sideways to see the entire example)::
375
376  QNAME	FLAG	RNAME	POS	MAPQ	CIAGR	MRNM	MPOS	ISIZE	SEQ	QUAL	OPT
377  HWI-EAS91_1_30788AAXX:1:1:1761:343	4	*	0	0	*	*	0	0	AAAAAAANNAAAAAAAAAAAAAAAAAAAAAAAAAAACNNANNGAGTNGNNNNNNNGCTTCCCACAGNNCTGG	hhhhhhh;;hhhhhhhhhhh^hOhhhhghhhfhhhgh;;h;;hhhh;h;;;;;;;hhhhhhghhhh;;Phhh
378  HWI-EAS91_1_30788AAXX:1:1:1578:331	4	*	0	0	*	*	0	0	GTATAGANNAATAAGAAAAAAAAAAATGAAGACTTTCNNANNTCTGNANNNNNNNTCTTTTTTCAGNNGTAG	hhhhhhh;;hhhhhhhhhhhhhhhhhhhhhhhhhhhh;;h;;hhhh;h;;;;;;;hhhhhhhhhhh;;hhVh
379
380-------
381
382**BWA settings**
383
384All of the options have a default value. You can change any of them. All of the options in BWA have been implemented here.
385
386------
387
388**BWA parameter list**
389
390This is an exhaustive list of BWA options:
391
392For **aln**::
393
394  -n NUM  Maximum edit distance if the value is INT, or the fraction of missing
395          alignments given 2% uniform base error rate if FLOAT. In the latter
396          case, the maximum edit distance is automatically chosen for different 
397          read lengths. [0.04]
398  -o INT  Maximum number of gap opens [1]
399  -e INT  Maximum number of gap extensions, -1 for k-difference mode
400          (disallowing long gaps) [-1]
401  -d INT  Disallow a long deletion within INT bp towards the 3'-end [16]
402  -i INT  Disallow an indel within INT bp towards the ends [5]
403  -l INT  Take the first INT subsequence as seed. If INT is larger than the
404          query sequence, seeding will be disabled. For long reads, this option 
405          is typically ranged from 25 to 35 for '-k 2'. [inf]
406  -k INT  Maximum edit distance in the seed [2]
407  -t INT  Number of threads (multi-threading mode) [1]
408  -M INT  Mismatch penalty. BWA will not search for suboptimal hits with a score
409          lower than (bestScore-misMsc). [3]
410  -O INT  Gap open penalty [11]
411  -E INT  Gap extension penalty [4]
412  -c      Reverse query but not complement it, which is required for alignment
413          in the color space.
414  -R      Proceed with suboptimal alignments even if the top hit is a repeat. By
415          default, BWA only searches for suboptimal alignments if the top hit is
416          unique. Using this option has no effect on accuracy for single-end
417          reads. It is mainly designed for improving the alignment accuracy of
418          paired-end reads. However, the pairing procedure will be slowed down,
419          especially for very short reads (~32bp).
420  -N      Disable iterative search. All hits with no more than maxDiff
421          differences will be found. This mode is much slower than the default.
422
423For **samse**::
424
425  -n INT  Maximum number of alignments to output in the XA tag for reads paired
426          properly. If a read has more than INT hits, the XA tag will not be
427          written. [3]
428  -r STR  Specify the read group in a format like '@RG\tID:foo\tSM:bar' [null]
429
430For **sampe**::
431
432  -a INT  Maximum insert size for a read pair to be considered as being mapped
433          properly. Since version 0.4.5, this option is only used when there
434          are not enough good alignment to infer the distribution of insert
435          sizes. [500]
436  -n INT  Maximum number of alignments to output in the XA tag for reads paired
437          properly. If a read has more than INT hits, the XA tag will not be
438          written. [3]
439  -N INT  Maximum number of alignments to output in the XA tag for disconcordant
440          read pairs (excluding singletons). If a read has more than INT hits,
441          the XA tag will not be written. [10]
442  -o INT  Maximum occurrences of a read for pairing. A read with more
443          occurrences will be treated as a single-end read. Reducing this
444          parameter helps faster pairing. [100000]
445  -r STR  Specify the read group in a format like '@RG\tID:foo\tSM:bar' [null]
446
447For specifying the read group in **samse** or **sampe**, use the following::
448
449  @RG   Read group. Unordered multiple @RG lines are allowed. 
450  ID    Read group identi?er. Each @RG line must have a unique ID. The value of
451        ID is used in the RG tags of alignment records. Must be unique among all
452        read groups in header section. Read group IDs may be modi?ed when
453        merging SAM ?les in order to handle collisions. 
454  CN    Name of sequencing center producing the read. 
455  DS    Description. 
456  DT    Date the run was produced (ISO8601 date or date/time). 
457  FO    Flow order. The array of nucleotide bases that correspond to the
458        nucleotides used for each ?ow of each read. Multi-base ?ows are encoded
459        in IUPAC format, and non-nucleotide ?ows by various other characters.
460        Format : /\*|[ACMGRSVTWYHKDBN]+/ 
461  KS    The array of nucleotide bases that correspond to the key sequence of each read. 
462  LB    Library. 
463  PG    Programs used for processing the read group. 
464  PI    Predicted median insert size. 
465  PL    Platform/technology used to produce the reads. Valid values : CAPILLARY,
466        LS454, ILLUMINA, SOLID, HELICOS, IONTORRENT and PACBIO. 
467  PU    Platform unit (e.g. ?owcell-barcode.lane for Illumina or slide for
468        SOLiD). Unique identi?er. 
469  SM    Sample. Use pool name where a pool is being sequenced. 
470
471  </help>
472</tool>
473
474