PageRenderTime 30ms CodeModel.GetById 19ms app.highlight 4ms RepoModel.GetById 2ms app.codeStats 0ms

/tools/sr_mapping/srma_wrapper.xml

https://bitbucket.org/cistrome/cistrome-harvard/
XML | 218 lines | 161 code | 36 blank | 21 comment | 0 complexity | 24fbdc9dcdef609260d3b7cc3ee518d9 MD5 | raw file
  1<tool id="srma_wrapper" name="Re-align with SRMA" version="0.2.5">
  2  <description></description>
  3  <command interpreter="python">srma_wrapper.py 
  4    #if $refGenomeSource.refGenomeSource_type == "history":
  5      --ref=$refGenomeSource.ownFile
  6    #else:
  7      --ref="${refGenomeSource.ref.fields.path}"
  8      --refUID=$refGenomeSource.ref
  9      ##--refLocations=${GALAXY_DATA_INDEX_DIR}/srma_index.loc
 10    #end if
 11    --input=$input
 12    --inputIndex=${input.metadata.bam_index}
 13    --output=$output
 14    --params=$params.source_select
 15    --fileSource=$refGenomeSource.refGenomeSource_type
 16    --jarBin="${GALAXY_DATA_INDEX_DIR}/shared/jars"
 17    #if $params.source_select == "full":
 18      --offset=$params.offset
 19      --minMappingQuality=$params.minMappingQuality
 20      --minAlleleProbability=$params.minAlleleProbability
 21      --minAlleleCoverage=$params.minAlleleCoverage
 22      --range=$params.range
 23      --correctBases=$params.correctBases
 24      --useSequenceQualities=$params.useSequenceQualities
 25      --maxHeapSize=$params.maxHeapSize
 26    #end if
 27    --jarFile="srma.jar"
 28  </command>
 29  <inputs>
 30    <conditional name="refGenomeSource">
 31      <param name="refGenomeSource_type" type="select" label="Will you select a reference genome from your history or use a built-in reference?">
 32        <option value="built-in">Use a built-in reference</option>
 33        <option value="history">Use one from the history</option>
 34      </param>
 35      <when value="built-in">
 36        <param name="ref" type="select" label="Select a reference genome">
 37          <options from_data_table="srma_indexes">
 38            <filter type="sort_by" column="2" />
 39            <validator type="no_options" message="No indexes are available" />
 40          </options>
 41        </param>
 42      </when>
 43      <when value="history">
 44        <param name="ownFile" type="data" format="fasta" metadata_name="dbkey" label="Select a reference from history" />
 45      </when>
 46    </conditional>
 47    <param name="input" type="data" format="bam" label="Input BAM file" help="The input BAM file to re-align"/>
 48    <conditional name="params">
 49      <param name="source_select" type="select" label="SRMA settings to use" help="For most re-alignment needs, use Commonly Used settings. If you want full control use Full Parameter List">
 50        <option value="pre_set">Commonly Used</option>
 51        <option value="full">Full Parameter List</option>
 52      </param>
 53      <when value="pre_set" />
 54      <when value="full">
 55        <param name="offset" type="integer" value="20" label="Offset" help="The alignment offset" />
 56        <param name="minMappingQuality" type="integer" value="0" label="Minimum mapping quality" help="The minimum mapping quality" />
 57        <param name="minAlleleProbability" type="float" value="0.1" label="Minimum allele probability" help="The minimum allele probability conditioned on coverage (for the binomial quantile)." />
 58        <param name="minAlleleCoverage" type="integer" value="2" label="Minimum allele coverage" help="The minimum haploid coverage for the consensus. Default value: 3. This option can be set " />
 59        <param name="range" type="text" value="null" label="Range" help="A range to examine" />
 60        <param name="correctBases" type="boolean" truevalue="true" falsevalue="false" checked="no" label="Correct bases" help="Correct bases " />
 61        <param name="useSequenceQualities" type="boolean" truevalue="true" falsevalue="false" checked="no" label="Use sequence qualities" help="Use sequence qualities " />
 62        <param name="maxHeapSize" type="integer" value="8192" label="Maximum heap size" help="The maximum number of nodes on the heap before re-alignment is ignored" />
 63      </when>
 64    </conditional>
 65  </inputs>
 66  <outputs>
 67    <data format="bam" name="output" label="${tool.name} on ${on_string}: re-aligned reads">
 68      <actions>
 69        <conditional name="refGenomeSource.refGenomeSource_type">
 70          <when value="built-in">
 71            <action type="metadata" name="dbkey">
 72              <option type="from_data_table" name="srma_indexes" column="1" offset="0">
 73                <filter type="param_value" column="0" value="#" compare="startswith" keep="False" />
 74                <filter type="param_value" ref="refGenomeSource.ref" column="0" />
 75              </option>
 76            </action>
 77          </when>
 78          <when value="history">
 79            <action type="metadata" name="dbkey">
 80              <option type="from_param" name="refGenomeSource.ownFile" param_attribute="dbkey" />
 81            </action>
 82          </when>
 83        </conditional>
 84      </actions>
 85    </data>
 86  </outputs>
 87  <tests>
 88      <test>
 89          <!-- Commands to run to prepare test files (uses built-in index)
 90          Prepare bam index file:
 91          samtools index srma_in1.bam
 92          Run SRMA:
 93          java -jar srma.jar I=srma_in1.bam O=srma_out1.bam R=/afs/bx.psu.edu/depot/data/genome/hg18/srma_index/chr21.fa
 94          To create the bam file first, start with a sam file (srma_in1.sam) generated with a run using the chr21 fasta file and which contains the header. Run before samtools index:
 95          samtools view -bt /afs/bx.psu.edu/depot/data/genome/hg18/sam_index/chr21.fa -o srma_in1.u.bam srma_in1.sam
 96          samtools sort srma_in1.u.bam srma_in1
 97          -->
 98          <param name="refGenomeSource_type" value="built-in" />
 99          <param name="ref" value="hg18chr21" />
100          <param name="input" value="srma_in1.bam" type="bam" />
101          <param name="source_select" value="pre_set" />
102          <output name="output" file="srma_out1.bam" ftype="bam" lines_diff="2" /><!-- allows tag with version number to be different -->
103      </test>
104      <test>
105          <!-- Commands to run to prepare test files (uses custom genome):
106          Prepare custom dict/index files:
107          samtools faidx srma_in2.fa
108          java -cp srma.jar net.sf.picard.sam.CreateSequenceDictionary R=srma_in2.fa O=srma_in2.dict
109          Prepare bam index file:
110          samtools index srma_in3.bam
111          Run SRMA:
112          java -jar "srma.jar" I=srma_in3.bam O=srma_out2.bam R=srma_in2.fa OFFSET=20 MIN_MAPQ=0 MINIMUM_ALLELE_PROBABILITY=0.1 MINIMUM_ALLELE_COVERAGE=2 RANGES=null RANGE=null CORRECT_BASES=true USE_SEQUENCE_QUALITIES=true MAX_HEAP_SIZE=8192
113          To create the bam file first, the sam file needs to have been run with the same reference file (srma_in2.fa) and have the header present. For instance:
114          samtools view -bT srma_in2.fa -o srma_in3.u.bam srma_in3.sam
115          samtools sort srma_in3.u.bam srma_in3
116          -->
117          <param name="refGenomeSource_type" value="history" />
118          <param name="ownFile" value="srma_in2.fa" ftype="fasta" />
119          <param name="input" value="srma_in3.bam" ftype="bam" />
120          <param name="source_select" value="full" />
121          <param name="offset" value="20" />
122          <param name="minMappingQuality" value="0" />
123          <param name="minAlleleProbability" value="0.1" />
124          <param name="minAlleleCoverage" value="2" />
125          <param name="range" value="null" />
126          <param name="correctBases" value="true" />
127          <param name="useSequenceQualities" value="true" />
128          <param name="maxHeapSize" value="8192" />
129          <output name="output" file="srma_out2.bam" ftype="bam" lines_diff="2" /><!-- allows tag with version number to be different -->
130      </test>
131  </tests>
132  <help>
133**What it does**
134
135SRMA is a short read micro re-aligner for next-generation high throughput sequencing data.
136
137Sequence alignment algorithms examine each read independently. When indels occur towards the ends of reads, the alignment can lead to false SNPs as well as improperly placed indels. This tool aims to perform a re-alignment of each read to a graphical representation of all alignments within a local region to provide a better overall base-resolution consensus.
138
139Currently this tool works well with and has been tested on 30x diploid coverage genome sequencing data from Illumina and ABI SOLiD technology. This tool may not work well with 454 data, as indels are a significant error mode for 454 data. 
140
141------
142
143Please cite the website "http://srma.sourceforge.net" as well as:
144
145Homer N, and Nelson SF.  SRMA: short read micro re-aligner. 2010.
146
147------
148
149**Know what you are doing**
150
151.. class:: warningmark
152
153There is no such thing (yet) as an automated gearshift in short read mapping. It is all like stick-shift driving in San Francisco. In other words = running this tool with default parameters will probably not give you meaningful results. A way to deal with this is to **understand** the parameters by carefully reading the `documentation`__ and experimenting. Fortunately, Galaxy makes experimenting easy.
154
155.. __: http://srma.sourceforge.net/
156
157------
158
159**Input formats**
160
161SRMA accepts a BAM input file. Note that this file should have been generated from a SAM file which contains the header.
162
163------
164
165**Outputs**
166
167The output is in BAM format, see http://samtools.sourceforge.net for more details.
168
169-------
170
171**SRMA settings**
172
173All of the options have a default value. You can change any of them. Most of the options in SRMA have been implemented here.
174
175------
176
177**SRMA parameter list**
178
179This is an exhaustive list of SRMA options:
180
181For **SRMA**::
182
183  INPUT=File
184  I=File                        The input SAM or BAM file. Required. 
185  
186  OUTPUT=File
187  O=File                        The output SAM or BAM file. Default value: null. 
188  
189  REFERENCE=File
190  R=File                        The reference FASTA file. Required. 
191  
192  OFFSET=Integer                The alignment offset. Default value: 20. This option can be set to 'null' to clear the 
193                                default value. 
194  
195  MIN_MAPQ=Integer              The minimum mapping quality. Default value: 0. This option can be set to 'null' to clear 
196                                the default value. 
197  
198  MINIMUM_ALLELE_PROBABILITY=Double
199                                The minimum allele probability conditioned on coverage (for the binomial quantile). 
200                                Default value: 0.1. This option can be set to 'null' to clear the default value. 
201  
202  MINIMUM_ALLELE_COVERAGE=Integer
203                                The minimum haploid coverage for the consensus. Default value: 3. This option can be set 
204                                to 'null' to clear the default value. 
205  
206  RANGE=String                  A range to examine. Default value: null. 
207  
208  CORRECT_BASES=Boolean         Correct bases. Default value: false. This option can be set to 'null' to clear the 
209                                default value. Possible values: {true, false} 
210  
211  USE_SEQUENCE_QUALITIES=BooleanUse sequence qualities Default value: true. This option can be set to 'null' to clear the 
212                                default value. Possible values: {true, false} 
213  
214  MAX_HEAP_SIZE=Integer         The maximum number of nodes on the heap before re-alignment is ignored Default value: 
215                                8192. This option can be set to 'null' to clear the default value. 
216
217  </help>
218</tool>