PageRenderTime 39ms CodeModel.GetById 24ms app.highlight 9ms RepoModel.GetById 1ms app.codeStats 0ms

/tools/sr_mapping/bwa_color_wrapper.xml

https://bitbucket.org/cistrome/cistrome-harvard/
XML | 482 lines | 396 code | 45 blank | 41 comment | 0 complexity | ceee640c702a104d2fd2ed2270644256 MD5 | raw file
  1<tool id="bwa_color_wrapper" name="Map with BWA for SOLiD" version="1.0.1">
  2  <description></description>
  3  <parallelism method="basic"></parallelism>
  4  <command interpreter="python">
  5    bwa_wrapper.py 
  6      --threads="4"
  7      --color-space
  8
  9      ## reference source
 10      --fileSource=$genomeSource.refGenomeSource
 11      #if $genomeSource.refGenomeSource == "history":
 12        ##build index on the fly
 13        --ref="${genomeSource.ownFile}"
 14        --dbkey=$dbkey
 15      #else:
 16        ##use precomputed indexes
 17        --ref="${ filter( lambda x: str( x[0] ) == str( $genomeSource.indices ), $__app__.tool_data_tables[ 'bwa_indexes_color' ].get_fields() )[0][-1] }"
 18        --do_not_build_index
 19      #end if
 20
 21      ## input file(s)
 22      --input1=$paired.input1
 23      #if $paired.sPaired == "paired":
 24        --input2=$paired.input2
 25      #end if
 26
 27      ## output file
 28      --output=$output
 29
 30      ## run parameters
 31      --genAlignType=$paired.sPaired
 32      --params=$params.source_select
 33      #if $params.source_select != "pre_set":
 34        --maxEditDist=$params.maxEditDist
 35        --fracMissingAligns=$params.fracMissingAligns
 36        --maxGapOpens=$params.maxGapOpens
 37        --maxGapExtens=$params.maxGapExtens
 38        --disallowLongDel=$params.disallowLongDel
 39        --disallowIndel=$params.disallowIndel
 40        --seed=$params.seed
 41        --maxEditDistSeed=$params.maxEditDistSeed
 42        --mismatchPenalty=$params.mismatchPenalty
 43        --gapOpenPenalty=$params.gapOpenPenalty
 44        --gapExtensPenalty=$params.gapExtensPenalty
 45        --suboptAlign=$params.suboptAlign
 46        --noIterSearch=$params.noIterSearch
 47        --outputTopN=$params.outputTopN
 48        --outputTopNDisc=$params.outputTopNDisc
 49        --maxInsertSize=$params.maxInsertSize
 50        --maxOccurPairing=$params.maxOccurPairing
 51        #if $params.readGroup.specReadGroup == "yes"
 52          --rgid="$params.readGroup.rgid"
 53          --rgcn="$params.readGroup.rgcn"
 54          --rgds="$params.readGroup.rgds"
 55          --rgdt="$params.readGroup.rgdt"
 56          --rgfo="$params.readGroup.rgfo"
 57          --rgks="$params.readGroup.rgks"
 58          --rglb="$params.readGroup.rglb"
 59          --rgpg="$params.readGroup.rgpg"
 60          --rgpi="$params.readGroup.rgpi"
 61          --rgpl="$params.readGroup.rgpl"
 62          --rgpu="$params.readGroup.rgpu"
 63          --rgsm="$params.readGroup.rgsm"
 64        #end if
 65      #end if
 66
 67      ## suppress output SAM header
 68      --suppressHeader=$suppressHeader
 69  </command>
 70  <requirements>
 71    <requirement type="package">bwa</requirement>
 72  </requirements>
 73  <inputs>
 74    <conditional name="genomeSource">
 75      <param name="refGenomeSource" type="select" label="Will you select a reference genome from your history or use a built-in index?">
 76        <option value="indexed">Use a built-in index</option>
 77        <option value="history">Use one from the history</option>
 78      </param>
 79      <when value="indexed">
 80        <param name="indices" type="select" label="Select a reference genome">
 81          <options from_data_table="bwa_indexes_color">
 82            <filter type="sort_by" column="2" />
 83            <validator type="no_options" message="No indexes are available for the selected input dataset" />
 84          </options>
 85        </param>
 86      </when>
 87      <when value="history">
 88        <param name="ownFile" type="data" format="fasta" metadata_name="dbkey" label="Select a reference from history" />
 89      </when>
 90    </conditional>
 91    <conditional name="paired">
 92      <param name="sPaired" type="select" label="Is this library mate-paired?">
 93        <option value="single">Single-end</option>
 94        <option value="paired">Paired-end</option>
 95      </param>
 96      <when value="single">
 97        <param name="input1" type="data" format="fastqcssanger" label="FASTQ file (Nucleotide-space recoded from color-space)">
 98          <help>Convert color-space data to nucleotide-space (see help section below for steps). Must have Sanger-scaled quality values with ASCII offset 33</help>
 99        </param>
100      </when>
101      <when value="paired">
102        <param name="input1" type="data" format="fastqcssanger" label="Forward FASTQ file (Nucleotide-space recoded from color-space)" help="Must have Sanger-scaled quality values with ASCII offset 33">
103          <help>Convert color-space data to nucleotide-space (see help section below for steps). Must have Sanger-scaled quality values with ASCII offset 33</help>
104        </param>
105        <param name="input2" type="data" format="fastqcssanger" label="Reverse FASTQ file (Nucleotide-space recoded from color-space)" help="Must have Sanger-scaled quality values with ASCII offset 33">
106          <help>Convert color-space data to nucleotide-space (see help section below for steps). Must have Sanger-scaled quality values with ASCII offset 33</help>
107        </param>
108      </when>
109    </conditional>
110    <conditional name="params">
111      <param name="source_select" type="select" label="BWA settings to use" help="For most mapping needs use Commonly Used settings. If you want full control use Full Parameter List">
112        <option value="pre_set">Commonly Used</option>
113        <option value="full">Full Parameter List</option>
114      </param>
115      <when value="pre_set" />
116      <when value="full">
117        <param name="maxEditDist" type="integer" value="0" label="Maximum edit distance (aln -n)" help="Enter this value OR a fraction of missing alignments, not both" />
118        <param name="fracMissingAligns" type="float" value="0.04" label="Fraction of missing alignments given 2% uniform base error rate (aln -n)" help="Enter this value OR maximum edit distance, not both" />
119        <param name="maxGapOpens" type="integer" value="1" label="Maximum number of gap opens (aln -o)" />
120        <param name="maxGapExtens" type="integer" value="-1" label="Maximum number of gap extensions (aln -e)" help="-1 for k-difference mode (disallowing long gaps)" />
121        <param name="disallowLongDel" type="integer" value="16" label="Disallow long deletion within [value] bp towards the 3'-end (aln -d)" />
122        <param name="disallowIndel" type="integer" value="5" label="Disallow insertion/deletion within [value] bp towards the end (aln -i)" />
123        <param name="seed" type="integer" value="-1" label="Number of first subsequences to take as seed (aln -l)" help="Enter -1 for infinity" />
124        <param name="maxEditDistSeed" type="integer" value="2" label="Maximum edit distance in the seed (aln -k)" />
125        <param name="mismatchPenalty" type="integer" value="3" label="Mismatch penalty (aln -M)" help="BWA will not search for suboptimal hits with a score lower than [value]" />
126        <param name="gapOpenPenalty" type="integer" value="11" label="Gap open penalty (aln -O)" />
127        <param name="gapExtensPenalty" type="integer" value="4" label="Gap extension penalty (aln -E)" />
128        <param name="suboptAlign" type="boolean" truevalue="true" falsevalue="false" checked="no" label="Proceed with suboptimal alignments even if the top hit is a repeat (aln -R)" help="For paired-end reads only. By default, BWA only searches for suboptimal alignments if the top hit is unique. Using this option has no effect on accuracy for single-end reads. It is mainly designed for improving the alignment accuracy of paired-end reads. However, the pairing procedure will be slowed down, especially for very short reads (~32bp)" />
129        <param name="noIterSearch" type="boolean" truevalue="true" falsevalue="false" checked="no" label="Disable iterative search (aln -N)" help="All hits with no more than maxDiff differences will be found. This mode is much slower than the default" />
130        <param name="outputTopN" type="integer" value="3" label="Maximum number of alignments to output in the XA tag for reads paired properly (samse/sampe -n)" help="If a read has more than INT hits, the XA tag will not be written" />
131        <param name="outputTopNDisc" type="integer" value="10" label="Maximum number of alignments to output in the XA tag for disconcordant read pairs (excluding singletons) (sampe -N)" help="For paired-end reads only. If a read has more than INT hits, the XA tag will not be written" />
132        <param name="maxInsertSize" type="integer" value="500" label="Maximum insert size for a read pair to be considered as being mapped properly (sampe -a)" help="For paired-end reads only. Only used when there are not enough good alignments to infer the distribution of insert sizes" />
133        <param name="maxOccurPairing" type="integer" value="100000" label="Maximum occurrences of a read for pairing (sampe -o)" help="For paired-end reads only. A read with more occurrences will be treated as a single-end read. Reducing this parameter helps faster pairing" />
134        <conditional name="readGroup">
135          <param name="specReadGroup" type="select" label="Specify the read group for this file? (samse/sampe -r)">
136            <option value="yes">Yes</option>
137            <option value="no" selected="True">No</option>
138          </param>
139          <when value="yes">
140            <param name="rgid" type="text" size="25" label="Read group identi?er (ID). Each @RG line must have a unique ID. The value of ID is used in the RG 
141tags of alignment records. Must be unique among all read groups in header section." help="Required if RG specified. Read group 
142IDs may be modi?ed when merging SAM ?les in order to handle collisions." />
143            <param name="rgcn" type="text" size="25" label="Sequencing center that produced the read (CN)" help="Optional" />
144            <param name="rgds" type="text" size="25" label="Description (DS)" help="Optional" />
145            <param name="rgdt" type="text" size="25" label="Date that run was produced (DT)" help="Optional. ISO8601 format date or date/time, like YYYY-MM-DD" />
146            <param name="rgfo" type="text" size="25" label="Flow order (FO). The array of nucleotide bases that correspond to the nucleotides used for each 
147?ow of each read." help="Optional. Multi-base ?ows are encoded in IUPAC format, and non-nucleotide ?ows by 
148various other characters. Format : /\*|[ACMGRSVTWYHKDBN]+/" />
149            <param name="rgks" type="text" size="25" label="The array of nucleotide bases that correspond to the key sequence of each read (KS)" help="Optional" />
150            <param name="rglb" type="text" size="25" label="Library name (LB)" help="Required if RG specified" />
151            <param name="rgpg" type="text" size="25" label="Programs used for processing the read group (PG)" help="Optional" />
152            <param name="rgpi" type="text" size="25" label="Predicted median insert size (PI)" help="Optional" />
153            <param name="rgpl" type="text" size="25" label="Platform/technology used to produce the reads (PL)" help="Required if RG specified. Valid values : CAPILLARY, LS454, ILLUMINA, 
154SOLID, HELICOS, IONTORRENT and PACBIO" />
155            <param name="rgpu" type="text" size="25" label="Platform unit (PU)" help="Optional. Unique identi?er (e.g. ?owcell-barcode.lane for Illumina or slide for SOLiD)" />
156            <param name="rgsm" type="text" size="25" label="Sample (SM)" help="Required if RG specified. Use pool name where a pool is being sequenced" />
157          </when>
158          <when value="no" />
159        </conditional>
160      </when>
161    </conditional>
162    <param name="suppressHeader" type="boolean" truevalue="true" falsevalue="false" checked="False" label="Suppress the header in the output SAM file" help="BWA produces SAM with several lines of header information" />
163  </inputs>
164  <outputs>
165    <data format="sam" name="output" label="${tool.name} on ${on_string}: mapped reads">
166      <actions>
167        <conditional name="genomeSource.refGenomeSource">
168          <when value="indexed">
169            <action type="metadata" name="dbkey">
170              <option type="from_data_table" name="bwa_indexes_color" column="1">
171                <filter type="param_value" column="0" value="#" compare="startswith" keep="False"/>
172                <filter type="param_value" ref="genomeSource.indices" column="0" />
173              </option>
174            </action>
175          </when>
176          <when value="history">
177            <action type="metadata" name="dbkey">
178              <option type="from_param" name="genomeSource.ownFile" param_attribute="dbkey" />
179            </action>
180          </when>
181        </conditional>
182      </actions>
183    </data>
184  </outputs>
185  <tests>
186    <test>
187      <!--
188      BWA commands:
189      cp test-data/hg19chrX_midpart.fasta hg19chrX_midpart.fasta
190      bwa index -c -a is hg19chrX_midpart.fasta
191      bwa aln -t 4 -c hg19chrX_midpart.fasta test-data/bwa_wrapper_in4.fastqcssanger > bwa_wrapper_out4.sai
192      bwa samse hg19chrX_midpart.fasta bwa_wrapper_out4.sai test-data/bwa_wrapper_in4.fastqcssanger > bwa_wrapper_out4.u.sam
193      hg19chrX_midpart.fasta is the prefix for the reference files (hg19chrX_midpart.fasta.amb, hg19chrX_midpart.fasta.ann, hg19chrX_midpart.fasta.bwt, ...)
194      It's just part of hg19 chrX, from the middle of the chromosome
195      plain old sort doesn't handle underscores like python:
196      python -c "import sys; lines=file(sys.argv[1],'rb').readlines(); lines.sort(); file(sys.argv[2],'wb').write(''.join(lines))" bwa_wrapper_out4.u.sam bwa_wrapper_out4.sam
197      -->
198      <param name="refGenomeSource" value="history" />
199      <param name="ownFile" value="hg19chrX_midpart.fasta" />
200      <param name="sPaired" value="single" />
201      <param name="input1" value="bwa_wrapper_in4.fastqcssanger" ftype="fastqcssanger" />
202      <param name="source_select" value="pre_set" />
203      <param name="suppressHeader" value="false" />
204      <output name="output" file="bwa_wrapper_out4.sam" ftype="sam" sort="True" lines_diff="2" />
205    </test>
206    <test>
207      <!--
208      BWA commands:
209      bwa aln -t 4 -c equCab2chrM_cs.fa test-data/bwa_wrapper_in5.fastqcssanger > bwa_wrapper_out5a.sai
210      bwa aln -t 4 -c equCab2chrM_cs.fa test-data/bwa_wrapper_in6.fastqcssanger > bwa_wrapper_out5b.sai
211      bwa sampe equCab2chrM_cs.fa bwa_wrapper_out5a.sai bwa_wrapper_out5b.sai test-data/bwa_wrapper_in5.fastqcssanger test-data/bwa_wrapper_in6.fastqcssanger > bwa_wrapper_out5.u.sam
212      equCab2chrM_cs.fa is the prefix of the index files (equCab2chrM_cs.fa.amb, equCab2chrM_cs.fa.ann, ...)
213      remove the comment lines (beginning with '@') from the resulting sam file
214      plain old sort doesn't handle underscores like python:
215      python -c "import sys; lines=file(sys.argv[1],'rb').readlines(); lines.sort(); file(sys.argv[2],'wb').write(''.join(lines))" bwa_wrapper_out5.u.sam bwa_wrapper_out5.sam
216      -->
217      <param name="refGenomeSource" value="indexed" />
218      <param name="indices" value="equCab2chrM" />
219      <param name="sPaired" value="paired" />
220      <param name="input1" value="bwa_wrapper_in5.fastqcssanger" ftype="fastqcssanger" />
221      <param name="input2" value="bwa_wrapper_in6.fastqcssanger" ftype="fastqcssanger" />
222      <param name="source_select" value="pre_set" />
223      <param name="suppressHeader" value="true" />
224      <output name="output" file="bwa_wrapper_out5.sam" ftype="sam" sort="True" />
225    </test>
226    <test>
227      <!--
228      BWA commands:
229      bwa aln -n 0.04 -o 1 -e -1 -d 16 -i 5 -k 2 -t 4 -M 3 -O 11 -E 4 -R -N -c hg19chrX_midpart.fasta test-data/bwa_wrapper_in4.fastqcssanger > bwa_wrapper_out6.sai
230      bwa samse -n 3 -r "@RG\tID:474747\tDS:description\tDT:2011-03-14\tLB:lib-child-1-A\tPI:200\tPL:SOLID\tSM:child-1" hg19chrX_midpart.fasta bwa_wrapper_out6.sai test-data/bwa_wrapper_in4.fastqcssanger > bwa_wrapper_out6.u.sam
231      hg19chrX_midpart_cs.fa is the prefix of the index files (hg19chrX_midpart.fa.amb, hg19chrX_midpart.fa.ann, ...)
232      (It's just part of hg19 chrX, from the middle of the chromosome)
233      plain old sort doesn't handle underscores like python:
234      python -c "import sys; lines=file(sys.argv[1],'rb').readlines(); lines.sort(); file(sys.argv[2],'wb').write(''.join(lines))" bwa_wrapper_out6.u.sam bwa_wrapper_out6.sam
235      -->
236      <param name="refGenomeSource" value="indexed" />
237      <param name="indices" value="hg19chrX_midpart" />
238      <param name="sPaired" value="single" />
239      <param name="input1" value="bwa_wrapper_in4.fastqcssanger" ftype="fastqcssanger" />
240      <param name="source_select" value="full" />
241      <param name="maxEditDist" value="0" />  
242      <param name="fracMissingAligns" value="0.04" />
243      <param name="maxGapOpens" value="1" />
244      <param name="maxGapExtens" value="-1" />
245      <param name="disallowLongDel" value="16" />
246      <param name="disallowIndel" value="5" />
247      <param name="seed" value="-1" />
248      <param name="maxEditDistSeed" value="2" />
249      <param name="mismatchPenalty" value="3" />
250      <param name="gapOpenPenalty" value="11" />
251      <param name="gapExtensPenalty" value="4" />
252      <param name="suboptAlign" value="true" />
253      <param name="noIterSearch" value="true" />
254      <param name="outputTopN" value="3" />
255      <param name="outputTopNDisc" value="10" />
256      <param name="maxInsertSize" value="500" />
257      <param name="maxOccurPairing" value="100000" />
258      <param name="specReadGroup" value="yes" />
259      <param name="rgid" value="474747" />
260      <param name="rgcn" value="" />
261      <param name="rgds" value="description" />
262      <param name="rgdt" value="2011-03-14" />
263      <param name="rgfo" value="" />
264      <param name="rgks" value="" />
265      <param name="rglb" value="lib-child-1-A" />
266      <param name="rgpg" value="" />
267      <param name="rgpi" value="200" />
268      <param name="rgpl" value="SOLID" />
269      <param name="rgpu" value="" />
270      <param name="rgsm" value="child-1" />
271      <param name="suppressHeader" value="false" />
272      <output name="output" file="bwa_wrapper_out6.sam" ftype="sam" sort="True" lines_diff="2" />
273    </test>
274    <test>
275      <!--
276      BWA commands:
277      cp test-data/chr_m.fasta chr_m.fasta
278      bwa index -c -a is chr_m.fasta
279      bwa aln -n 0.04 -o 1 -e -1 -d 16 -i 5 -k 2 -t 4 -M 3 -O 11 -E 4 -R -N -c chr_m.fasta test-data/bwa_wrapper_in5.fastqcssanger > bwa_wrapper_out7a.sai
280      bwa aln -n 0.04 -o 1 -e -1 -d 16 -i 5 -k 2 -t 4 -M 3 -O 11 -E 4 -R -N -c chr_m.fasta test-data/bwa_wrapper_in6.fastqcssanger > bwa_wrapper_out7b.sai
281      bwa sampe -a 100 -o 2 -n 3 -N 10 chr_m.fasta bwa_wrapper_out7a.sai bwa_wrapper_out7b.sai test-data/bwa_wrapper_in5.fastqcssanger test-data/bwa_wrapper_in6.fastqcssanger > bwa_wrapper_out7.u.sam
282      chr_m.fasta is the prefix of the index files (chr_m.fasta.amb, chr_m.fasta.ann, ...)
283      plain old sort doesn't handle underscores like python:
284      python -c "import sys; lines=file(sys.argv[1],'rb').readlines(); lines.sort(); file(sys.argv[2],'wb').write(''.join(lines))" bwa_wrapper_out7.u.sam bwa_wrapper_out7.sam
285      -->
286      <param name="refGenomeSource" value="history" />
287      <param name="ownFile" value="chr_m.fasta" />
288      <param name="sPaired" value="paired" />
289      <param name="input1" value="bwa_wrapper_in5.fastqcssanger" ftype="fastqcssanger" />
290      <param name="input2" value="bwa_wrapper_in6.fastqcssanger" ftype="fastqcssanger" />
291      <param name="source_select" value="full" />
292      <param name="maxEditDist" value="0" />  
293      <param name="fracMissingAligns" value="0.04" />
294      <param name="maxGapOpens" value="1" />
295      <param name="maxGapExtens" value="-1" />
296      <param name="disallowLongDel" value="16" />
297      <param name="disallowIndel" value="5" />
298      <param name="seed" value="-1" />
299      <param name="maxEditDistSeed" value="2" />
300      <param name="mismatchPenalty" value="3" />
301      <param name="gapOpenPenalty" value="11" />
302      <param name="gapExtensPenalty" value="4" />
303      <param name="suboptAlign" value="true" />
304      <param name="noIterSearch" value="true" />
305      <param name="outputTopN" value="3" />
306      <param name="outputTopNDisc" value="10" />
307      <param name="maxInsertSize" value="100" />
308      <param name="maxOccurPairing" value="2" />
309      <param name="specReadGroup" value="no" />
310      <param name="suppressHeader" value="false" />
311      <output name="output" file="bwa_wrapper_out7.sam" ftype="sam" sort="True" lines_diff="2" />
312    </test>
313  </tests> 
314  <help>
315
316**What it does**
317
318BWA is a fast light-weighted tool that aligns relatively short sequences (queries) to a sequence database (large), such as the human reference genome. It is developed by Heng Li at the Sanger Insitute. Li H. and Durbin R. (2009) Fast and accurate short read alignment with Burrows-Wheeler transform. Bioinformatics, 25, 1754-60. 
319
320------
321
322**Know what you are doing**
323
324.. class:: warningmark
325
326There is no such thing (yet) as an automated gearshift in short read mapping. It is all like stick-shift driving in San Francisco. In other words = running this tool with default parameters will probably not give you meaningful results. A way to deal with this is to **understand** the parameters by carefully reading the `documentation`__ and experimenting. Fortunately, Galaxy makes experimenting easy.
327
328 .. __: http://bio-bwa.sourceforge.net/
329
330------
331
332**Input formats**
333
334BWA accepts files in Sanger FASTQ format. Use the FASTQ Groomer to prepare your files, set to either FASTQ Sanger or FASTQ Color Space Sanger as appropriate. 
335
336If you have Color Space Sanger, it must be converted to nucleotide-space first. To do this, use the Manipulate FASTQ tool under NGS: QC and manipulation, with the following settings:
337    Manipulate reads on Sequence Content, choosing Change Adapter Base, and having the text box empty.
338    Manipulate reads on Sequence Content, doing a String Translate from "01234." to "ACGTN".
339
340
341------
342
343**A Note on Built-in Reference Genomes**
344
345Some genomes have multiple variants. If only one "type" of genome is listed, it is the Full version, which means that everything that came in the original genome data download (possibly with mitochondrial and plasmid DNA added if it wasn't already included). The Full version is available for every genome. Some genomes also come in the Canonical variant, which contains only the "canonical" (well-defined) chromosomes or segments, such as chr1-chr22, chrX, chrY, and chrM for human. Other variations include gender. These will come in the canonical form only, so the general Canonical variant is actually Canonical Female and the other is Canonical Male (identical to female excluding chrX).
346
347------
348
349**Outputs**
350
351The output is in SAM format, and has the following columns::
352
353    Column  Description
354  --------  --------------------------------------------------------
355  1  QNAME  Query (pair) NAME
356  2  FLAG   bitwise FLAG
357  3  RNAME  Reference sequence NAME
358  4  POS    1-based leftmost POSition/coordinate of clipped sequence
359  5  MAPQ   MAPping Quality (Phred-scaled)
360  6  CIGAR  extended CIGAR string
361  7  MRNM   Mate Reference sequence NaMe ('=' if same as RNAME)
362  8  MPOS   1-based Mate POSition
363  9  ISIZE  Inferred insert SIZE
364  10 SEQ    query SEQuence on the same strand as the reference
365  11 QUAL   query QUALity (ASCII-33 gives the Phred base quality)
366  12 OPT    variable OPTional fields in the format TAG:VTYPE:VALU
367  
368The flags are as follows::
369
370    Flag  Description
371  ------  -------------------------------------
372  0x0001  the read is paired in sequencing
373  0x0002  the read is mapped in a proper pair
374  0x0004  the query sequence itself is unmapped
375  0x0008  the mate is unmapped
376  0x0010  strand of the query (1 for reverse)
377  0x0020  strand of the mate
378  0x0040  the read is the first read in a pair
379  0x0080  the read is the second read in a pair
380  0x0100  the alignment is not primary
381
382It looks like this (scroll sideways to see the entire example)::
383
384  QNAME	FLAG	RNAME	POS	MAPQ	CIAGR	MRNM	MPOS	ISIZE	SEQ	QUAL	OPT
385  HWI-EAS91_1_30788AAXX:1:1:1761:343	4	*	0	0	*	*	0	0	AAAAAAANNAAAAAAAAAAAAAAAAAAAAAAAAAAACNNANNGAGTNGNNNNNNNGCTTCCCACAGNNCTGG	hhhhhhh;;hhhhhhhhhhh^hOhhhhghhhfhhhgh;;h;;hhhh;h;;;;;;;hhhhhhghhhh;;Phhh
386  HWI-EAS91_1_30788AAXX:1:1:1578:331	4	*	0	0	*	*	0	0	GTATAGANNAATAAGAAAAAAAAAAATGAAGACTTTCNNANNTCTGNANNNNNNNTCTTTTTTCAGNNGTAG	hhhhhhh;;hhhhhhhhhhhhhhhhhhhhhhhhhhhh;;h;;hhhh;h;;;;;;;hhhhhhhhhhh;;hhVh
387
388-------
389
390**BWA settings**
391
392All of the options have a default value. You can change any of them. All of the options in BWA have been implemented here.
393
394------
395
396**BWA parameter list**
397
398This is an exhaustive list of BWA options:
399
400For **aln**::
401
402  -n NUM  Maximum edit distance if the value is INT, or the fraction of missing
403          alignments given 2% uniform base error rate if FLOAT. In the latter
404          case, the maximum edit distance is automatically chosen for different 
405          read lengths. [0.04]
406  -o INT  Maximum number of gap opens [1]
407  -e INT  Maximum number of gap extensions, -1 for k-difference mode
408          (disallowing long gaps) [-1]
409  -d INT  Disallow a long deletion within INT bp towards the 3'-end [16]
410  -i INT  Disallow an indel within INT bp towards the ends [5]
411  -l INT  Take the first INT subsequence as seed. If INT is larger than the
412          query sequence, seeding will be disabled. For long reads, this option 
413          is typically ranged from 25 to 35 for '-k 2'. [inf]
414  -k INT  Maximum edit distance in the seed [2]
415  -t INT  Number of threads (multi-threading mode) [1]
416  -M INT  Mismatch penalty. BWA will not search for suboptimal hits with a score
417          lower than (bestScore-misMsc). [3]
418  -O INT  Gap open penalty [11]
419  -E INT  Gap extension penalty [4]
420  -c      Reverse query but not complement it, which is required for alignment
421          in the color space.
422  -R      Proceed with suboptimal alignments even if the top hit is a repeat. By
423          default, BWA only searches for suboptimal alignments if the top hit is
424          unique. Using this option has no effect on accuracy for single-end
425          reads. It is mainly designed for improving the alignment accuracy of
426          paired-end reads. However, the pairing procedure will be slowed down,
427          especially for very short reads (~32bp).
428  -N      Disable iterative search. All hits with no more than maxDiff
429          differences will be found. This mode is much slower than the default.
430
431For **samse**::
432
433  -n INT  Maximum number of alignments to output in the XA tag for reads paired
434          properly. If a read has more than INT hits, the XA tag will not be
435          written. [3]
436  -r STR  Specify the read group in a format like '@RG\tID:foo\tSM:bar' [null]
437
438For **sampe**::
439
440  -a INT  Maximum insert size for a read pair to be considered as being mapped
441          properly. Since version 0.4.5, this option is only used when there
442          are not enough good alignment to infer the distribution of insert
443          sizes. [500]
444  -n INT  Maximum number of alignments to output in the XA tag for reads paired
445          properly. If a read has more than INT hits, the XA tag will not be
446          written. [3]
447  -N INT  Maximum number of alignments to output in the XA tag for disconcordant
448          read pairs (excluding singletons). If a read has more than INT hits,
449          the XA tag will not be written. [10]
450  -o INT  Maximum occurrences of a read for pairing. A read with more
451          occurrences will be treated as a single-end read. Reducing this
452          parameter helps faster pairing. [100000]
453  -r STR  Specify the read group in a format like '@RG\tID:foo\tSM:bar' [null]
454
455For specifying the read group in **samse** or **sampe**, use the following::
456
457  @RG   Read group. Unordered multiple @RG lines are allowed. 
458  ID    Read group identi?er. Each @RG line must have a unique ID. The value of
459        ID is used in the RG tags of alignment records. Must be unique among all
460        read groups in header section. Read group IDs may be modi?ed when
461        merging SAM ?les in order to handle collisions. 
462  CN    Name of sequencing center producing the read. 
463  DS    Description. 
464  DT    Date the run was produced (ISO8601 date or date/time). 
465  FO    Flow order. The array of nucleotide bases that correspond to the
466        nucleotides used for each flow of each read. Multi-base flows are encoded
467        in IUPAC format, and non-nucleotide flows by various other characters.
468        Format : /\*|[ACMGRSVTWYHKDBN]+/ 
469  KS    The array of nucleotide bases that correspond to the key sequence of each read. 
470  LB    Library. 
471  PG    Programs used for processing the read group. 
472  PI    Predicted median insert size. 
473  PL    Platform/technology used to produce the reads. Valid values : CAPILLARY,
474        LS454, ILLUMINA, SOLID, HELICOS, IONTORRENT and PACBIO. 
475  PU    Platform unit (e.g. flowcell-barcode.lane for Illumina or slide for
476        SOLiD). Unique identi?er. 
477  SM    Sample. Use pool name where a pool is being sequenced. 
478
479  </help>
480</tool>
481
482