PageRenderTime 35ms CodeModel.GetById 15ms app.highlight 12ms RepoModel.GetById 1ms app.codeStats 1ms

/tools/ngs_rna/tophat_color_wrapper.xml

https://bitbucket.org/cistrome/cistrome-harvard/
XML | 611 lines | 536 code | 41 blank | 34 comment | 0 complexity | c7af5aea8dea08ea1c52104cb4d329d0 MD5 | raw file
  1<tool id="tophat_color" name="Tophat for SOLiD" version="1.0.0">
  2    <description>Find splice junctions using RNA-seq data</description>
  3    <requirements>
  4        <requirement type="package">tophat</requirement>
  5    </requirements>
  6    <command interpreter="python">
  7        tophat_wrapper.py
  8            ## Change this to accommodate the number of threads you have available.
  9            --num-threads="4"
 10
 11            ## base- or color-space
 12            --color-space
 13
 14            ## Provide outputs.
 15            --junctions-output=$junctions
 16            --hits-output=$accepted_hits
 17
 18            ## Handle reference file.
 19            #if $refGenomeSource.genomeSource == "history":
 20                --own-file=$refGenomeSource.ownFile
 21            #else:
 22                --indexes-path="${refGenomeSource.index.fields.path}"
 23            #end if
 24
 25            ## Are reads single-end or paired?
 26            --single-paired=$singlePaired.sPaired
 27
 28            ## First input file always required.
 29            --input1=$input1
 30
 31            ## Set params based on whether reads are single-end or paired.
 32            #if $singlePaired.sPaired == "single":
 33                --settings=$singlePaired.sParams.sSettingsType
 34                #if $singlePaired.sParams.sSettingsType == "full":
 35                    -a $singlePaired.sParams.anchor_length
 36                    -m $singlePaired.sParams.splice_mismatches
 37                    -i $singlePaired.sParams.min_intron_length
 38                    -I $singlePaired.sParams.max_intron_length
 39                    -F $singlePaired.sParams.junction_filter
 40                    -g $singlePaired.sParams.max_multihits
 41                    --min-segment-intron $singlePaired.sParams.min_segment_intron
 42                    --max-segment-intron $singlePaired.sParams.max_segment_intron
 43                    --initial-read-mismatches=$singlePaired.sParams.initial_read_mismatches
 44                    --seg-mismatches=$singlePaired.sParams.seg_mismatches
 45                    --seg-length=$singlePaired.sParams.seg_length
 46                    --library-type=$singlePaired.sParams.library_type
 47                    
 48                    ## Indel search.
 49                    #if $singlePaired.sParams.indel_search.allow_indel_search == "Yes":
 50                        ## --allow-indels
 51                        --max-insertion-length $singlePaired.sParams.indel_search.max_insertion_length
 52                        --max-deletion-length $singlePaired.sParams.indel_search.max_deletion_length
 53                    #else:
 54                        --no-novel-indels
 55                    #end if
 56
 57                    ## Supplying junctions parameters.
 58                    #if $singlePaired.sParams.own_junctions.use_junctions == "Yes":
 59                        #if $singlePaired.sParams.own_junctions.gene_model_ann.use_annotations == "Yes":
 60                            -G $singlePaired.sParams.own_junctions.gene_model_ann.gene_annotation_model
 61                        #end if
 62                        #if $singlePaired.sParams.own_junctions.raw_juncs.use_juncs == "Yes":
 63                            -j $singlePaired.sParams.own_junctions.raw_juncs.raw_juncs
 64                        #end if
 65                        ## TODO: No idea why a string cast is necessary, but it is:
 66                        #if str($singlePaired.sParams.own_junctions.no_novel_juncs) == "Yes":
 67                            --no-novel-juncs
 68                        #end if
 69                    #end if
 70
 71                    #if $singlePaired.sParams.closure_search.use_search == "Yes":
 72                        --closure-search
 73                        --min-closure-exon $singlePaired.sParams.closure_search.min_closure_exon
 74                        --min-closure-intron $singlePaired.sParams.closure_search.min_closure_intron
 75                        --max-closure-intron $singlePaired.sParams.closure_search.max_closure_intron
 76                    #else:
 77                        --no-closure-search
 78                    #end if
 79                    #if $singlePaired.sParams.coverage_search.use_search == "Yes":
 80                        --coverage-search
 81                        --min-coverage-intron $singlePaired.sParams.coverage_search.min_coverage_intron
 82                        --max-coverage-intron $singlePaired.sParams.coverage_search.max_coverage_intron
 83                    #else:
 84                        --no-coverage-search
 85                    #end if
 86                    ## TODO: No idea why the type conversion is necessary, but it seems to be.
 87                    #if str($singlePaired.sParams.microexon_search) == "Yes":
 88                        --microexon-search
 89                    #end if
 90                #end if
 91            #else:
 92                --input2=$singlePaired.input2
 93                -r $singlePaired.mate_inner_distance
 94                --settings=$singlePaired.pParams.pSettingsType
 95                #if $singlePaired.pParams.pSettingsType == "full":
 96                    --mate-std-dev=$singlePaired.pParams.mate_std_dev
 97                    -a $singlePaired.pParams.anchor_length
 98                    -m $singlePaired.pParams.splice_mismatches
 99                    -i $singlePaired.pParams.min_intron_length
100                    -I $singlePaired.pParams.max_intron_length
101                    -F $singlePaired.pParams.junction_filter
102                    -g $singlePaired.pParams.max_multihits
103                    --min-segment-intron $singlePaired.pParams.min_segment_intron
104                    --max-segment-intron $singlePaired.pParams.max_segment_intron
105                    --initial-read-mismatches=$singlePaired.pParams.initial_read_mismatches
106                    --seg-mismatches=$singlePaired.pParams.seg_mismatches
107                    --seg-length=$singlePaired.pParams.seg_length
108                    --library-type=$singlePaired.pParams.library_type
109                    
110                    ## Indel search.
111                    #if $singlePaired.pParams.indel_search.allow_indel_search == "Yes":
112                        ## --allow-indels
113                        --max-insertion-length $singlePaired.pParams.indel_search.max_insertion_length
114                        --max-deletion-length $singlePaired.pParams.indel_search.max_deletion_length
115                    #else:
116                        --no-novel-indels
117                    #end if
118
119                    ## Supplying junctions parameters.
120                    #if $singlePaired.pParams.own_junctions.use_junctions == "Yes":
121                        #if $singlePaired.pParams.own_junctions.gene_model_ann.use_annotations == "Yes":
122                            -G $singlePaired.pParams.own_junctions.gene_model_ann.gene_annotation_model
123                        #end if
124                        #if $singlePaired.pParams.own_junctions.raw_juncs.use_juncs == "Yes":
125                            -j $singlePaired.pParams.own_junctions.raw_juncs.raw_juncs
126                        #end if
127                        ## TODO: No idea why type cast is necessary, but it is:
128                        #if str($singlePaired.pParams.own_junctions.no_novel_juncs) == "Yes":
129                            --no-novel-juncs
130                        #end if
131                    #end if
132
133                    #if $singlePaired.pParams.closure_search.use_search == "Yes":
134                        --closure-search
135                        --min-closure-exon $singlePaired.pParams.closure_search.min_closure_exon
136                        --min-closure-intron $singlePaired.pParams.closure_search.min_closure_intron
137                        --max-closure-intron $singlePaired.pParams.closure_search.max_closure_intron
138                    #else:
139                        --no-closure-search
140                    #end if
141                    #if $singlePaired.pParams.coverage_search.use_search == "Yes":
142                        --coverage-search
143                        --min-coverage-intron $singlePaired.pParams.coverage_search.min_coverage_intron
144                        --max-coverage-intron $singlePaired.pParams.coverage_search.max_coverage_intron
145                    #else:
146                        --no-coverage-search
147                    #end if
148                    ## TODO: No idea why the type conversion is necessary, but it seems to be.
149                    #if str ($singlePaired.pParams.microexon_search) == "Yes":
150                        --microexon-search
151                   #end if
152                #end if
153            #end if
154    </command>
155    <inputs>
156        <param format="fastqcssanger" name="input1" type="data" label="RNA-Seq FASTQ file" help="Color-space: Must have Sanger-scaled quality values with ASCII offset 33" />
157        <expand macro="refGenomeSourceConditional">
158          <options from_data_table="tophat_indexes_color">
159            <filter type="sort_by" column="2"/>
160            <validator type="no_options" message="No indexes are available for the selected input dataset"/>
161          </options>
162        </expand>
163        <conditional name="singlePaired">
164            <param name="sPaired" type="select" label="Is this library mate-paired?">
165              <option value="single">Single-end</option>
166              <option value="paired">Paired-end</option>
167            </param>
168            <when value="single">
169              <conditional name="sParams">
170                <param name="sSettingsType" type="select" label="TopHat settings to use" help="You can use the default settings or set custom values for any of Tophat's parameters.">
171                  <option value="preSet">Default settings</option>
172                  <option value="full">Full parameter list</option>
173                </param>
174                <when value="preSet" />
175                <!-- Full/advanced params. -->
176                <when value="full">
177                  <param name="library_type" type="select" label="Library Type" help="TopHat will treat the reads as strand specific. Every read alignment will have an XS attribute tag. Consider supplying library type options below to select the correct RNA-seq protocol.">
178                      <option value="fr-unstranded">FR Unstranded</option>
179                      <option value="fr-firststrand">FR First Strand</option>
180                      <option value="fr-secondstrand">FR Second Strand</option>
181                  </param>
182                  <param name="anchor_length" type="integer" value="8" label="Anchor length (at least 3)" help="Report junctions spanned by reads with at least this many bases on each side of the junction." />
183                  <param name="splice_mismatches" type="integer" value="0" label="Maximum number of mismatches that can appear in the anchor region of spliced alignment" />
184                  <param name="min_intron_length" type="integer" value="70" label="The minimum intron length" help="TopHat will ignore donor/acceptor pairs closer than this many bases apart." />
185                  <param name="max_intron_length" type="integer" value="500000" label="The maximum intron length" help="When searching for junctions ab initio, TopHat will ignore donor/acceptor pairs farther than this many bases apart, except when such a pair is supported by a split segment alignment of a long read." />
186                  <expand macro="indel_searchConditional" />
187                  <param name="junction_filter" type="float" value="0.15" label="Minimum isoform fraction: filter out junctions supported by too few alignments (number of reads divided by average depth of coverage)" help="0.0 to 1.0 (0 to turn off)" />
188                  <param name="max_multihits" type="integer" value="40" label="Maximum number of alignments to be allowed" />
189                  <param name="min_segment_intron" type="integer" value="50" label="Minimum intron length that may be found during split-segment (default) search" />
190                  <param name="max_segment_intron" type="integer" value="500000" label="Maximum intron length that may be found during split-segment (default) search" />
191                  <param name="initial_read_mismatches" type="integer" min="0" max="3" value="2" label="Number of mismatches allowed in the initial read mapping" />
192                  <param name="seg_mismatches" type="integer" min="0" max="3" value="2" label="Number of mismatches allowed in each segment alignment for reads mapped independently" />
193                  <param name="seg_length" type="integer" value="25" label="Minimum length of read segments" />
194                  
195                  <!-- Options for supplying own junctions. -->
196                  <conditional name="own_junctions">
197                      <param name="use_junctions" type="select" label="Use Own Junctions">
198                        <option value="No">No</option>
199                        <option value="Yes">Yes</option>
200                      </param>
201                      <when value="Yes">
202                          <conditional name="gene_model_ann">
203                             <param name="use_annotations" type="select" label="Use Gene Annotation Model">
204                                <option value="No">No</option>
205                                <option value="Yes">Yes</option>
206                             </param>
207                             <when value="No" />
208                             <when value="Yes">
209                               <param format="gtf" name="gene_annotation_model" type="data" label="Gene Model Annotations" help="TopHat will use the exon records in this file to build a set of known splice junctions for each gene, and will attempt to align reads to these junctions even if they would not normally be covered by the initial mapping."/>
210                             </when>
211                          </conditional>
212                          <expand macro="raw_juncsConditional" />
213                          <expand macro="no_novel_juncsParam" />
214                      </when>
215                      <when value="No" />
216                  </conditional> <!-- /own_junctions -->
217                  
218                  <!-- Closure search. -->
219                  <conditional name="closure_search">
220                    <param name="use_search" type="select" label="Use Closure Search">
221                      <option value="No">No</option>
222                      <option value="Yes">Yes</option>
223                    </param>
224                    <when value="Yes">
225                        <param name="min_closure_exon" type="integer" value="50" label="During closure search for paired end reads, exonic hops in the potential splice graph must be at least this long. The default is 50." />
226                        <param name="min_closure_intron" type="integer" value="50" label="Minimum intron length that may be found during closure search" />
227                        <param name="max_closure_intron" type="integer" value="5000" label="Maximum intron length that may be found during closure search" />
228                    </when>
229                    <when value="No" />
230                  </conditional>
231                  <!-- Coverage search. -->
232                  <conditional name="coverage_search">
233                    <param name="use_search" type="select" label="Use Coverage Search">
234                        <option selected="true" value="Yes">Yes</option>
235                        <option value="No">No</option>
236                    </param>
237                    <when value="Yes">
238                        <param name="min_coverage_intron" type="integer" value="50" label="Minimum intron length that may be found during coverage search" />
239                        <param name="max_coverage_intron" type="integer" value="20000" label="Maximum intron length that may be found during coverage search" />
240                    </when>
241                    <when value="No" />
242                  </conditional>     
243                  <param name="microexon_search" type="select" label="Use Microexon Search" help="With this option, the pipeline will attempt to find alignments incident to microexons. Works only for reads 50bp or longer.">
244                    <option value="No">No</option>
245                    <option value="Yes">Yes</option>
246                  </param>
247                </when>  <!-- full -->
248              </conditional>  <!-- sParams -->
249            </when>  <!--  single -->
250            <when value="paired">
251              <param format="fastqcssanger" name="input2" type="data" label="RNA-Seq FASTQ file" help="Color-space: Must have Sanger-scaled quality values with ASCII offset 33" />
252              <param name="mate_inner_distance" type="integer" value="20" label="Mean Inner Distance between Mate Pairs" />
253              <conditional name="pParams">
254                <param name="pSettingsType" type="select" label="TopHat settings to use" help="Use the Full parameter list to change default settings.">
255                  <option value="preSet">Default settings</option>
256                  <option value="full">Full parameter list</option>
257                </param>
258                <when value="preSet" />
259                <!-- Full/advanced params. -->
260                <when value="full">
261                    <param name="library_type" type="select" label="Library Type" help="TopHat will treat the reads as strand specific. Every read alignment will have an XS attribute tag. Consider supplying library type options below to select the correct RNA-seq protocol.">
262                        <option value="fr-unstranded">FR Unstranded</option>
263                        <option value="fr-firststrand">FR First Strand</option>
264                        <option value="fr-secondstrand">FR Second Strand</option>
265                    </param>
266                    <param name="mate_std_dev" type="integer" value="20" label="Std. Dev for Distance between Mate Pairs"  help="The standard deviation for the distribution on inner distances between mate pairs."/>
267                  <param name="anchor_length" type="integer" value="8" label="Anchor length (at least 3)" help="Report junctions spanned by reads with at least this many bases on each side of the junction." />
268                  <param name="splice_mismatches" type="integer" value="0" label="Maximum number of mismatches that can appear in the anchor region of spliced alignment" />
269                  <param name="min_intron_length" type="integer" value="70" label="The minimum intron length" help="TopHat will ignore donor/acceptor pairs closer than this many bases apart." />
270                  <param name="max_intron_length" type="integer" value="500000" label="The maximum intron length" help="When searching for junctions ab initio, TopHat will ignore donor/acceptor pairs farther than this many bases apart, except when such a pair is supported by a split segment alignment of a long read." />
271                  <expand macro="indel_searchConditional" />
272                  <param name="junction_filter" type="float" value="0.15" label="Minimum isoform fraction: filter out junctions supported by too few alignments (number of reads divided by average depth of coverage)" help="0.0 to 1.0 (0 to turn off)" />
273                  <param name="max_multihits" type="integer" value="40" label="Maximum number of alignments to be allowed" />
274                  <param name="min_segment_intron" type="integer" value="50" label="Minimum intron length that may be found during split-segment (default) search" />
275                  <param name="max_segment_intron" type="integer" value="500000" label="Maximum intron length that may be found during split-segment (default) search" />
276                  <param name="initial_read_mismatches" type="integer" min="0" max="3" value="2" label="Number of mismatches allowed in the initial read mapping" />
277                  <param name="seg_mismatches" type="integer" min="0" max="3" value="2" label="Number of mismatches allowed in each segment alignment for reads mapped independently" />
278                  <param name="seg_length" type="integer" value="25" label="Minimum length of read segments" />
279                  <!-- Options for supplying own junctions. -->
280                  <conditional name="own_junctions">
281                      <param name="use_junctions" type="select" label="Use Own Junctions">
282                        <option value="No">No</option>
283                        <option value="Yes">Yes</option>
284                      </param>
285                      <when value="Yes">
286                          <conditional name="gene_model_ann">
287                             <param name="use_annotations" type="select" label="Use Gene Annotation Model">
288                                <option value="No">No</option>
289                                <option value="Yes">Yes</option>
290                             </param>
291                             <when value="No" />
292                             <when value="Yes">
293                               <param format="gtf" name="gene_annotation_model" type="data" label="Gene Model Annotations" help="TopHat will use the exon records in this file to build a set of known splice junctions for each gene, and will attempt to align reads to these junctions even if they would not normally be covered by the initial mapping."/>
294                             </when>
295                          </conditional>
296                          <conditional name="raw_juncs">
297                             <param name="use_juncs" type="select" label="Use Raw Junctions">
298                                <option value="No">No</option>
299                                <option value="Yes">Yes</option>
300                             </param>
301                             <when value="No" />
302                             <when value="Yes">
303                               <param format="interval" name="raw_juncs" type="data" label="Raw Junctions" help="Supply TopHat with a list of raw junctions. Junctions are specified one per line, in a tab-delimited format. Records look like: [chrom] [left] [right] [+/-] left and right are zero-based coordinates, and specify the last character of the left sequenced to be spliced to the first character of the right sequence, inclusive."/>
304                             </when>
305                          </conditional>
306                          <param name="no_novel_juncs" type="select" label="Only look for supplied junctions">
307                            <option value="No">No</option>
308                            <option value="Yes">Yes</option>
309                          </param>
310                      </when>
311                      <when value="No" />
312                  </conditional> <!-- /own_junctions -->
313                  
314                  <!-- Closure search. -->
315                  <conditional name="closure_search">
316                    <param name="use_search" type="select" label="Use Closure Search">
317                      <option value="No">No</option>
318                      <option value="Yes">Yes</option>
319                    </param>
320                    <when value="Yes">
321                        <param name="min_closure_exon" type="integer" value="50" label="During closure search for paired end reads, exonic hops in the potential splice graph must be at least this long. The default is 50." />
322                        <param name="min_closure_intron" type="integer" value="50" label="Minimum intron length that may be found during closure search" />
323                        <param name="max_closure_intron" type="integer" value="5000" label="Maximum intron length that may be found during closure search" />
324                    </when>
325                    <when value="No" />
326                  </conditional>
327                  <!-- Coverage search. -->
328                  <conditional name="coverage_search">
329                    <param name="use_search" type="select" label="Use Coverage Search">
330                        <option selected="true" value="Yes">Yes</option>
331                        <option value="No">No</option>
332                    </param>
333                    <when value="Yes">
334                        <param name="min_coverage_intron" type="integer" value="50" label="Minimum intron length that may be found during coverage search" />
335                        <param name="max_coverage_intron" type="integer" value="20000" label="Maximum intron length that may be found during coverage search" />
336                    </when>
337                    <when value="No" />
338                  </conditional>
339                  <param name="microexon_search" type="select" label="Use Microexon Search" help="With this option, the pipeline will attempt to find alignments incident to microexons. Works only for reads 50bp or longer.">
340                    <option value="No">No</option>
341                    <option value="Yes">Yes</option>
342                  </param>
343                </when>  <!-- full -->
344              </conditional>  <!-- pParams -->
345            </when>  <!-- paired -->
346        </conditional>
347    </inputs>
348
349    <outputs>
350        <data format="bed" name="insertions" label="${tool.name} on ${on_string}: insertions" from_work_dir="tophat_out/insertions.bed">
351            <filter>
352                (
353                    ( ( 'sParams' in singlePaired ) and ( 'indel_search' in singlePaired['sParams'] ) and 
354                      ( singlePaired['sParams']['indel_search']['allow_indel_search'] == 'Yes' ) ) or 
355                    ( ( 'pParams' in singlePaired ) and ( 'indel_search' in singlePaired['pParams'] ) and 
356                      ( singlePaired['pParams']['indel_search']['allow_indel_search'] == 'Yes' ) )
357                ) 
358            </filter>
359            <expand macro="dbKeyActions" />
360        </data>
361        <data format="bed" name="deletions" label="${tool.name} on ${on_string}: deletions" from_work_dir="tophat_out/deletions.bed">
362            <expand macro="dbKeyActions" />            
363        </data>
364        <data format="bed" name="junctions" label="${tool.name} on ${on_string}: splice junctions">
365            <expand macro="dbKeyActions" />
366        </data>
367        <data format="bam" name="accepted_hits" label="${tool.name} on ${on_string}: accepted_hits">
368            <expand macro="dbKeyActions" />
369        </data>        
370    </outputs>
371    <macros>
372      <import>tophat_macros.xml</import>
373      <macro name="bedFilter">
374        <filter>
375                (
376                    ( ( 'sParams' in singlePaired ) and ( 'indel_search' in singlePaired['sParams'] ) and 
377                      ( singlePaired['sParams']['indel_search']['allow_indel_search'] == 'Yes' ) ) or 
378                    ( ( 'pParams' in singlePaired ) and ( 'indel_search' in singlePaired['pParams'] ) and 
379                      ( singlePaired['pParams']['indel_search']['allow_indel_search'] == 'Yes' ) )
380                )
381        </filter>        
382      </macro>
383      <macro name="dbKeyActions">
384        <actions>
385          <conditional name="refGenomeSource.genomeSource">
386            <when value="indexed">
387              <action type="metadata" name="dbkey">
388                <option type="from_data_table" name="tophat_indexes_color" column="1" offset="0">
389                  <filter type="param_value" column="0" value="#" compare="startswith" keep="False"/>
390                  <filter type="param_value" ref="refGenomeSource.index" column="0"/>
391                </option>
392              </action>
393            </when>
394            <when value="history">
395              <action type="metadata" name="dbkey">
396                <option type="from_param" name="refGenomeSource.ownFile" param_attribute="dbkey" />
397              </action>
398            </when>
399          </conditional>
400        </actions>
401      </macro>
402    </macros>    
403    <tests>
404        <!-- Test color-space single-end reads with user-supplied reference fasta and preset parameters -->
405        <test>
406            <!-- TopHat commands:
407            cp test-data/tophat_in5.fasta tophat_in5.fa
408            bowtie-build -C -f tophat_in5.fasta tophat_in5
409            tophat -p 1 -C tophat_in5 test-data/tophat_in4.fastqcssanger
410            Rename the files in tmp_dir appropriately
411            -->
412            <param name="input1" ftype="fastqcssanger" value="tophat_in4.fastqcssanger" />
413            <param name="genomeSource" value="history" />
414            <param name="ownFile" ftype="fasta" value="tophat_in5.fasta"/>
415            <param name="sPaired" value="single" />
416            <param name="sSettingsType" value="preSet" />
417            <output name="junctions" file="tophat_out5j.bed" />
418            <output name="accepted_hits" file="tophat_out5h.bam" compare="sim_size" />
419        </test>
420        <!-- Test color-space single-end reads with pre-built index and full parameters -->
421        <test>
422            <!-- Tophat commands:
423            tophat -p 1 -C -a 8 -m 0 -i 70 -I 500000 -F 0.15 -g 40 +allow-indels +coverage-search +min-coverage-intron 50 +max-coverage-intron 20000 +segment-mismatches 2 +segment-length 25 +closure-search +min-closure-exon 50 +min-closure-intron 50 +max-closure-intron 5000 +microexon-search partialMm9chrX_random_cs test-data/tophat_in4.fastqcssanger
424            Replace the + with double-dash
425            Rename the files in tmp_dir appropriately
426            -->
427            <param name="input1" ftype="fastqcssanger" value="tophat_in4.fastqcssanger"/>
428            <param name="genomeSource" value="indexed"/>
429            <param name="index" value="partialMm9chrX_random_cs" />
430            <param name="sPaired" value="single"/>
431            <param name="sSettingsType" value="full"/>
432            <param name="library_type" value="FR Unstranded"/>
433            <param name="anchor_length" value="8"/>
434            <param name="splice_mismatches" value="0"/>
435            <param name="min_intron_length" value="70"/>
436            <param name="max_intron_length" value="500000"/>
437            <param name="junction_filter" value="0.15"/>
438            <param name="max_multihits" value="40"/>
439            <param name="min_segment_intron" value="50" />
440            <param name="max_segment_intron" value="500000" />
441            <param name="seg_mismatches" value="2"/>
442            <param name="seg_length" value="25"/>
443            <param name="allow_indel_search" value="Yes"/>
444            <param name="max_insertion_length" value="3"/>
445            <param name="max_deletion_length" value="3"/>
446            <param name="use_junctions" value="Yes" />
447            <param name="use_annotations" value="No" />
448            <param name="use_juncs" value="No" />
449            <param name="no_novel_juncs" value="No" />
450            <param name="use_search" value="Yes" />
451            <param name="min_closure_exon" value="50" />
452            <param name="min_closure_intron" value="50" />
453            <param name="max_closure_intron" value="5000" />
454            <param name="use_search" value="Yes" />
455            <param name="min_coverage_intron" value="50" />
456            <param name="max_coverage_intron" value="20000" />
457            <param name="microexon_search" value="Yes" />
458            <output name="insertions" file="tophat_out6i.bed" />
459            <output name="deletions" file="tophat_out6d.bed" />
460            <output name="junctions" file="tophat_out6j.bed" />
461            <output name="accepted_hits" file="tophat_out6h.bam" compare="sim_size" />
462        </test>
463        <!-- Test color-space paired-end reads with pre-built index and preset parameters -->
464        <test>
465            <!-- TopHat commands:
466            tophat -C -o tmp_dir -r 50 -p 1 partialMm9chrX_random_cs test-data/tophat_in6.fastqcssanger test-data/tophat_in7.fastqcssanger
467            Rename the files in tmp_dir appropriately
468            -->
469            <param name="input1" ftype="fastqcssanger" value="tophat_in6.fastqcssanger" />
470            <param name="genomeSource" value="indexed" />
471            <param name="index" value="partialMm9chrX_random_cs" />
472            <param name="sPaired" value="paired" />
473            <param name="input2" ftype="fastqcssanger" value="tophat_in7.fastqcssanger"/>
474            <param name="mate_inner_distance" value="50"/>
475            <param name="pSettingsType" value="preSet" />
476            <output name="junctions" file="tophat_out7j.bed" />
477            <output name="accepted_hits" file="tophat_out7h.bam" compare="sim_size" />
478        </test>
479        <!-- Test color-space paired-end reads with user-supplied reference fasta and full parameters -->
480        <test>
481            <!-- TopHat commands:
482            cp test-data/tophat_in5.fasta tophat_in5.fa
483            bowtie-build -C -f tophat_in5.fa tophat_in5
484            tophat -C -o tmp_dir -r 20 -p 1 -a 8 -m 0 -i 70 -I 500000 -F 0.15 -g 40 +library-type fr-unstranded +allow-indels +closure-search +min-closure-exon 50 +min-closure-intron 50 +max-closure-intron 5000 +coverage-search +min-coverage-intron 50 +max-coverage-intron 15000 +mate-std-dev 20 +segment-mismatch 2 +segment-length 20 +min-segment-intron 50 +max-segment-intron 500000 tophat_in5 test-data/tophat_in6.fastqcssanger test-data/tophat_in7.fastqcssanger
485            Replace the + with double-dash
486            Rename the files in tmp_dir appropriately
487            -->
488            <param name="input1" ftype="fastqcssanger" value="tophat_in6.fastqcssanger"/>
489            <param name="genomeSource" value="history"/>
490            <param name="ownFile" ftype="fasta" value="tophat_in5.fasta"/>
491            <param name="sPaired" value="paired"/>
492            <param name="input2" ftype="fastqcssanger" value="tophat_in7.fastqcssanger"/>
493            <param name="mate_inner_distance" value="20"/>
494            <param name="pSettingsType" value="full"/>
495            <param name="library_type" value="FR Unstranded"/>
496            <param name="mate_std_dev" value="20"/>
497            <param name="anchor_length" value="8"/>
498            <param name="splice_mismatches" value="0"/>
499            <param name="min_intron_length" value="70"/>
500            <param name="max_intron_length" value="500000"/>
501            <param name="junction_filter" value="0.15"/>
502            <param name="max_multihits" value="40"/>
503            <param name="min_segment_intron" value="70" />
504            <param name="max_segment_intron" value="400000" />
505            <param name="seg_mismatches" value="2"/>
506            <param name="seg_length" value="20"/>
507            <param name="allow_indel_search" value="Yes"/>
508            <param name="max_insertion_length" value="3"/>
509            <param name="max_deletion_length" value="3"/>
510            <param name="use_junctions" value="No" />
511            <param name="use_search" value="Yes" />
512            <param name="min_closure_exon" value="50" />
513            <param name="min_closure_intron" value="50" />
514            <param name="max_closure_intron" value="5000" />
515            <param name="use_search" value="Yes" />
516            <param name="min_coverage_intron" value="50" />
517            <param name="max_coverage_intron" value="20000" />
518            <param name="microexon_search" value="No" />
519            <output name="insertions" file="tophat_out8i.bed" />
520            <output name="deletions" file="tophat_out8d.bed" />
521            <output name="junctions" file="tophat_out8j.bed" />
522            <output name="accepted_hits" file="tophat_out8h.bam" compare="sim_size" />
523        </test>
524    </tests>
525
526    <help>
527**Tophat Overview**
528
529TopHat_ is a fast splice junction mapper for RNA-Seq reads. It aligns RNA-Seq reads to mammalian-sized genomes using the ultra high-throughput short read aligner Bowtie, and then analyzes the mapping results to identify splice junctions between exons. Please cite: Trapnell, C., Pachter, L. and Salzberg, S.L. TopHat: discovering splice junctions with RNA-Seq. Bioinformatics 25, 1105-1111 (2009).        
530
531.. _Tophat: http://tophat.cbcb.umd.edu/
532        
533------
534
535**Know what you are doing**
536
537.. class:: warningmark
538
539There is no such thing (yet) as an automated gearshift in splice junction identification. It is all like stick-shift driving in San Francisco. In other words, running this tool with default parameters will probably not give you meaningful results. A way to deal with this is to **understand** the parameters by carefully reading the `documentation`__ and experimenting. Fortunately, Galaxy makes experimenting easy.
540
541.. __: http://tophat.cbcb.umd.edu/manual.html
542
543------
544
545**Input formats**
546
547Tophat accepts files in Sanger FASTQ format. Use the FASTQ Groomer to prepare your files.
548
549------
550
551**Outputs**
552
553Tophat produces two main output files:
554
555- junctions -- A UCSC BED_ track of junctions reported by TopHat. Each junction consists of two connected BED blocks, where each block is as long as the maximal overhang of any read spanning the junction. The score is the number of alignments spanning the junction.
556- accepted_hits -- A list of read alignments in BAM_ format.
557
558.. _BED: http://genome.ucsc.edu/FAQ/FAQformat.html#format1
559.. _BAM: http://samtools.sourceforge.net/
560
561Two other possible outputs, depending on the options you choose, are insertions and deletions, both of which are in BED format.
562
563-------
564
565**Tophat settings**
566
567All of the options have a default value. You can change any of them. Some of the options in Tophat have been implemented here.
568
569------
570
571**Tophat parameter list**
572
573This is a list of implemented Tophat options::
574
575This is a list of implemented Tophat options::
576
577  -r                                This is the expected (mean) inner distance between mate pairs. For, example, for paired end runs with fragments 
578                                    selected at 300bp, where each end is 50bp, you should set -r to be 200. There is no default, and this parameter 
579                                    is required for paired end runs.
580  --mate-std-dev INT                The standard deviation for the distribution on inner distances between mate pairs. The default is 20bp.
581  -a/--min-anchor-length INT        The "anchor length". TopHat will report junctions spanned by reads with at least this many bases on each side of the junction. Note that individual spliced     
582                                    alignments may span a junction with fewer than this many bases on one side. However, every junction involved in spliced alignments is supported by at least one 
583                                    read with this many bases on each side. This must be at least 3 and the default is 8.
584  -m/--splice-mismatches INT        The maximum number of mismatches that may appear in the "anchor" region of a spliced alignment. The default is 0.
585  -i/--min-intron-length INT        The minimum intron length. TopHat will ignore donor/acceptor pairs closer than this many bases apart. The default is 70.
586  -I/--max-intron-length INT        The maximum intron length. When searching for junctions ab initio, TopHat will ignore donor/acceptor pairs farther than this many bases apart, except when such a pair is supported by a split segment alignment of a long read. The default is 500000.
587  -F/--min-isoform-fraction 0.0-1.0 TopHat filters out junctions supported by too few alignments. Suppose a junction spanning two exons, is supported by S reads. Let the average depth of coverage of 
588                                    exon A be D, and assume that it is higher than B. If S / D is less than the minimum isoform fraction, the junction is not reported. A value of zero disables the 
589                                    filter. The default is 0.15.
590  -g/--max-multihits INT            Instructs TopHat to allow up to this many alignments to the reference for a given read, and suppresses all alignments for reads with more than this many 
591                                    alignments. The default is 40.
592  -G/--GTF [GTF 2.2 file]           Supply TopHat with a list of gene model annotations. TopHat will use the exon records in this file to build a set of known splice junctions for each gene, and will attempt to align reads to these junctions even if they would not normally be covered by the initial mapping.
593  -j/--raw-juncs [juncs file]       Supply TopHat with a list of raw junctions. Junctions are specified one per line, in a tab-delimited format. Records look like: [chrom] [left] [right] [+/-], left and right are zero-based coordinates, and specify the last character of the left sequenced to be spliced to the first character of the right sequence, inclusive.
594  -no-novel-juncs                   Only look for junctions indicated in the supplied GFF file. (ignored without -G)
595  --no-closure-search               Disables the mate pair closure-based search for junctions. Currently, has no effect - closure search is off by default.
596  --closure-search                  Enables the mate pair closure-based search for junctions. Closure-based search should only be used when the expected inner distance between mates is small (about or less than 50bp)
597  --no-coverage-search              Disables the coverage based search for junctions.
598  --coverage-search                 Enables the coverage based search for junctions. Use when coverage search is disabled by default (such as for reads 75bp or longer), for maximum sensitivity.
599  --microexon-search                With this option, the pipeline will attempt to find alignments incident to microexons. Works only for reads 50bp or longer.
600  --butterfly-search                TopHat will use a slower but potentially more sensitive algorithm to find junctions in addition to its standard search. Consider using this if you expect that your experiment produced a lot of reads from pre-mRNA, that fall within the introns of your transcripts.
601  --segment-mismatches              Read segments are mapped independently, allowing up to this many mismatches in each segment alignment. The default is 2.
602  --segment-length                  Each read is cut up into segments, each at least this long. These segments are mapped independently. The default is 25.
603  --min-closure-exon                During closure search for paired end reads, exonic hops in the potential splice graph must be at least this long. The default is 50.
604  --min-closure-intron              The minimum intron length that may be found during closure search. The default is 50.
605  --max-closure-intron              The maximum intron length that may be found during closure search. The default is 5000.
606  --min-coverage-intron             The minimum intron length that may be found during coverage search. The default is 50.
607  --max-coverage-intron             The maximum intron length that may be found during coverage search. The default is 20000.
608  --min-segment-intron              The minimum intron length that may be found during split-segment search. The default is 50.
609  --max-segment-intron              The maximum intron length that may be found during split-segment search. The default is 500000.
610    </help>
611</tool>