PageRenderTime 22ms CodeModel.GetById 15ms app.highlight 2ms RepoModel.GetById 1ms app.codeStats 1ms

/tools/metag_tools/shrimp_wrapper.xml

https://bitbucket.org/cistrome/cistrome-harvard/
XML | 279 lines | 184 code | 38 blank | 57 comment | 0 complexity | 32a5b3c6112411b2534b8d8e8178df9d MD5 | raw file
  1<tool id="shrimp_wrapper" name="SHRiMP for Letter-space" version="1.0.0">
  2  <description>reads mapping against reference sequence </description>
  3  <command interpreter="python">
  4    #if     ($type_of_reads.single_or_paired=="single" and $param.skip_or_full=="skip") #shrimp_wrapper.py $input_target $output1 $output2 $input_query
  5    #elif   ($type_of_reads.single_or_paired=="paired" and $param.skip_or_full=="skip") #shrimp_wrapper.py $input_target $output1 $output2 $type_of_reads.input1,$type_of_reads.input2,$type_of_reads.insertion_size
  6    #elif   ($type_of_reads.single_or_paired=="single" and $param.skip_or_full=="full") #shrimp_wrapper.py $input_target $output1 $output2 $input_query                                                              $param.spaced_seed $param.seed_matches_per_window $param.seed_hit_taboo_length $param.seed_generation_taboo_length $param.seed_window_length $param.max_hits_per_read $param.max_read_length $param.kmer $param.sw_match_value $param.sw_mismatch_value $param.sw_gap_open_ref $param.sw_gap_open_query $param.sw_gap_ext_ref $param.sw_gap_ext_query $param.sw_hit_threshold 
  7    #elif   ($type_of_reads.single_or_paired=="paired" and $param.skip_or_full=="full") #shrimp_wrapper.py $input_target $output1 $output2 $type_of_reads.input1,$type_of_reads.input2,$type_of_reads.insertion_size $param.spaced_seed $param.seed_matches_per_window $param.seed_hit_taboo_length $param.seed_generation_taboo_length $param.seed_window_length $param.max_hits_per_read $param.max_read_length $param.kmer $param.sw_match_value $param.sw_mismatch_value $param.sw_gap_open_ref $param.sw_gap_open_query $param.sw_gap_ext_ref $param.sw_gap_ext_query $param.sw_hit_threshold
  8    #end if#
  9  </command>
 10    <inputs>
 11        <page>
 12        <conditional name="type_of_reads">
 13            <param name="single_or_paired" type="select" label="Single- or Paired-ends">
 14                <option value="single">Single-end</option>
 15                <option value="paired">Paired-end</option>
 16            </param>
 17            <when value="single">
 18                <param name="input_query" type="data" format="fastqsolexa" label="Align sequencing reads" help="No dataset? Read tip below"/>
 19            </when>
 20            <when value="paired">
 21                <param name="insertion_size" type="integer" size="5" value="600" label="Insertion length between two ends" help="bp" />
 22                <param name="input1" type="data" format="fastqsolexa" label="Align sequencing reads, one end" />
 23                <param name="input2" type="data" format="fastqsolexa" label="and the other end" />
 24            </when> 
 25        </conditional>
 26        <param name="input_target" type="data" format="fasta" label="against reference" />
 27        <conditional name="param">
 28            <param name="skip_or_full" type="select" label="SHRiMP settings to use" help="For most mapping needs use Commonly used settings. If you want full control use Full List">
 29                <option value="skip">Commonly used</option>
 30                <option value="full">Full Parameter List</option>
 31            </param>
 32            <when value="skip" />
 33            <when value="full">
 34                <param name="spaced_seed"                   type="text"     size="30"   value="111111011111"    label="Spaced Seed" />
 35                <param name="seed_matches_per_window"       type="integer"  size="5"    value="2"               label="Seed Matches per Window" />
 36                <param name="seed_hit_taboo_length"         type="integer"  size="5"    value="4"               label="Seed Hit Taboo Length" />
 37                <param name="seed_generation_taboo_length"  type="integer"  size="5"    value="0"               label="Seed Generation Taboo Length" />
 38                <param name="seed_window_length"            type="float"    size="10"   value="115.0"           label="Seed Window Length"          help="in percentage"/>
 39                <param name="max_hits_per_read"             type="integer"  size="10"   value="100"             label="Maximum Hits per Read" />
 40                <param name="max_read_length"               type="integer"  size="10"   value="1000"            label="Maximum Read Length" />
 41                <param name="kmer"                          type="integer"  size="10"   value="-1"              label="Kmer Std. Deviation Limit"   help="-1 as None"/>
 42                <param name="sw_match_value"                type="integer"  size="10"   value="100"             label="S-W Match Value" />
 43                <param name="sw_mismatch_value"             type="integer"  size="10"   value="-150"            label="S-W Mismatch Value" />
 44                <param name="sw_gap_open_ref"               type="integer"  size="10"   value="-400"            label="S-W Gap Open Penalty (Reference)" />
 45                <param name="sw_gap_open_query"             type="integer"  size="10"   value="-400"            label="S-W Gap Open Penalty (Query)" />
 46                <param name="sw_gap_ext_ref"                type="integer"  size="10"   value="-70"             label="S-W Gap Extend Penalty (Reference)" />
 47                <param name="sw_gap_ext_query"              type="integer"  size="10"   value="-70"             label="S-W Gap Extend Penalty (Query)" />
 48                <param name="sw_hit_threshold"              type="float"    size="10"   value="68.0"            label="S-W Hit Threshold"           help="in percentage"/>
 49            </when>
 50        </conditional>
 51        </page>
 52    </inputs>
 53    <outputs>
 54        <data name="output1" format="tabular"/>
 55        <data name="output2" format="tabular"/>
 56    </outputs>
 57    <requirements>
 58      <requirement type="binary">rmapper-ls</requirement>
 59    </requirements>
 60    <tests>
 61        <test>
 62            <param name="single_or_paired" value="single" />
 63            <param name="skip_or_full" value="skip" />
 64            <param name="input_target" value="shrimp_phix_anc.fa" ftype="fasta" />
 65            <param name="input_query" value="shrimp_wrapper_test1.fastq" ftype="fastqsolexa"/>
 66            <output name="output1" file="shrimp_wrapper_test1.out1" />
 67        </test>
 68        <!--  
 69        <test>
 70            <param name="single_or_paired" value="paired" />
 71            <param name="skip_or_full" value="skip" />
 72            <param name="input_target" value="shrimp_eca_chrMT.fa" ftype="fasta" />
 73            <param name="input1" value="shrimp_wrapper_test2_end1.fastq" ftype="fastqsolexa" />
 74            <param name="input2" value="shrimp_wrapper_test2_end2.fastq" ftype="fastqsolexa" />
 75            <param name="insertion_size" value="600" />
 76            <output name="output1" file="shrimp_wrapper_test2.out1" />
 77        </test>
 78        <test>
 79            <param name="single_or_paired" value="single" />
 80            <param name="skip_or_full" value="full" />
 81            <param name="input_target" value="shrimp_phix_anc.fa" ftype="fasta" />
 82            <param name="input_query" value="shrimp_wrapper_test1.fastq" ftype="fastqsolexa"/>
 83            <param name="spaced_seed" value="111111011111" />
 84            <param name="seed_matches_per_window" value="2" />
 85            <param name="seed_hit_taboo_length" value="4" />
 86            <param name="seed_generation_taboo_length" value="0" />
 87            <param name="seed_window_length" value="115.0" />
 88            <param name="max_hits_per_read" value="100" />
 89            <param name="max_read_length" value="1000" />
 90            <param name="kmer" value="-1" />
 91            <param name="sw_match_value" value="100" />
 92            <param name="sw_mismatch_value" value="-150" />
 93            <param name="sw_gap_open_ref" value="-400" />
 94            <param name="sw_gap_open_query" value="-400" />
 95            <param name="sw_gap_ext_ref" value="-70" />
 96            <param name="sw_gap_ext_query" value="-70" />
 97            <param name="sw_hit_threshold" value="68.0" />
 98            <output name="output1" file="shrimp_wrapper_test1.out1" />
 99        </test> 
100        <test>
101            <param name="single_or_paired" value="paired" />
102            <param name="skip_or_full" value="full" />
103            <param name="input_target" value="shrimp_eca_chrMT.fa" ftype="fasta" />
104            <param name="spaced_seed" value="111111011111" />
105            <param name="seed_matches_per_window" value="2" />
106            <param name="seed_hit_taboo_length" value="4" />
107            <param name="seed_generation_taboo_length" value="0" />
108            <param name="seed_window_length" value="115.0" />
109            <param name="max_hits_per_read" value="100" />
110            <param name="max_read_length" value="1000" />
111            <param name="kmer" value="-1" />
112            <param name="sw_match_value" value="100" />
113            <param name="sw_mismatch_value" value="-150" />
114            <param name="sw_gap_open_ref" value="-400" />
115            <param name="sw_gap_open_query" value="-400" />
116            <param name="sw_gap_ext_ref" value="-70" />
117            <param name="sw_gap_ext_query" value="-70" />
118            <param name="sw_hit_threshold" value="68.0" />
119            <param name="input1" value="shrimp_wrapper_test2_end1.fastq" ftype="fastqsolexa"/>
120            <param name="input2" value="shrimp_wrapper_test2_end2.fastq" ftype="fastqsolexa"/>
121            <param name="insertion_size" value="600" />
122            <output name="output1" file="shrimp_wrapper_test2.out1" />
123        </test>
124        -->
125    </tests>
126<help>
127
128.. class:: warningmark
129
130IMPORTANT: This tool currently only supports data where the quality scores are integers or ASCII quality scores with base 64. Click pencil icon next to your dataset to set datatype to *fastqsolexa*.
131
132
133-----
134    
135**What it does**
136 
137SHRiMP (SHort Read Mapping Package) is a software package for aligning genomic reads against a target genome. 
138
139This wrapper post-processes the default SHRiMP/rmapper-ls output and generates a table with all information from reads and reference for the mapping. The tool takes single- or paired-end reads. For single-end reads, only uniquely mapped alignment is considered. In paired-end reads, only pairs that meet the following criteria will be used to generate the table: 1). the ends fall within the insertion size; 2). the ends are mapped at the opposite directions. If there are still multiple mappings after applying the criteria, this paired-end read will be discarded. 
140  
141
142-----
143
144**Input formats**
145
146A multiple-fastq file, for example::
147
148    @seq1
149    TACCCGATTTTTTGCTTTCCACTTTATCCTACCCTT
150    +seq1
151    hhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhh
152
153
154-----
155
156**Outputs**
157
158The tool gives two outputs.
159
160**Table output**
161
162Table output contains 8 columns::
163
164     1     2        3       4     5     6     7     8 
165  ----------------------------------------------------
166  chrM   14711     seq1     0     T     A    40     1 
167  chrM   14712     seq1     1     T     T    40     1 
168
169where::
170    
171  1. (chrM)   - Reference sequence id
172  2. (14711)  - Position of the mapping in the reference
173  3. (seq1)   - Read id
174  4. (0)      - Position of the mapping in the read
175  5. (T)      - Nucleotide in the reference
176  6. (A)      - Nucleotide in the read
177  7. (40)     - Quality score for the nucleotide in the position of the read
178  8. (1)      - The number of times this position is covered by reads
179
180     
181**SHRiMP output**
182
183This is the default output from SHRiMP/rmapper-ls::
184 
185     1     2     3       4      5      6     7     8      9      10
186  -------------------------------------------------------------------
187   seq1  chrM    +     3644    3679    1    36     36    3600    36  
188
189where::
190
191  1. (seq1)   - Read id 
192  2. (chrM)   - Reference sequence id
193  3. (+)      - Strand of the read
194  4. (3466)   - Start position of the alignment in the reference
195  5. (3679)   - End position of the alignment in the reference
196  6. (1)      - Start position of the alignment in the read
197  7. (36)     - End position of the alignment in the read
198  8. (36)     - Length of the read
199  9. (3600)   - Score 
200 10. (36)     - Edit string
201
202 
203-----
204
205**SHRiMP parameter list**
206
207The commonly used parameters with default value setting::
208
209    -s    Spaced Seed                             (default: 111111011111)
210          The spaced seed is a single contiguous string of 0's and 1's. 
211          0's represent wildcards, or positions which will always be 
212          considered as matching, whereas 1's dictate positions that 
213          must match. A string of all 1's will result in a simple kmer scan.
214    -n    Seed Matches per Window                 (default: 2)
215          The number of seed matches per window dictates how many seeds 
216          must match within some window length of the genome before that 
217          region is considered for Smith-Waterman alignment. A lower 
218          value will increase sensitivity while drastically increasing 
219          running time. Higher values will have the opposite effect.
220    -t    Seed Hit Taboo Length                   (default: 4)
221          The seed taboo length specifies how many target genome bases 
222          or colors must exist prior to a previous seed match in order 
223          to count another seed match as a hit.
224    -9    Seed Generation Taboo Length            (default: 0)
225          
226    -w    Seed Window Length                      (default: 115.00%)
227          This parameter specifies the genomic span in bases (or colours) 
228          in which *seed_matches_per_window* must exist before the read 
229          is given consideration by the Simth-Waterman alignment machinery.
230    -o    Maximum Hits per Read                   (default: 100)
231          This parameter specifies how many hits to remember for each read. 
232          If more hits are encountered, ones with lower scores are dropped 
233          to make room.
234    -r    Maximum Read Length                     (default: 1000)
235          This parameter specifies the maximum length of reads that will 
236          be encountered in the dataset. If larger reads than the default 
237          are used, an appropriate value must be passed to *rmapper*.
238    -d    Kmer Std. Deviation Limit               (default: -1 [None])
239          This option permits pruning read kmers, which occur with 
240          frequencies greater than *kmer_std_dev_limit* standard 
241          deviations above the average. This can shorten running 
242          time at the cost of some sensitivity. 
243          *Note*: A negative value disables this option.            
244    -m    S-W Match Value                         (default: 100)
245          The value applied to matches during the Smith-Waterman score calculation.
246    -i    S-W Mismatch Value                      (default: -150)
247          The value applied to mismatches during the Smith-Waterman 
248          score calculation.
249    -g    S-W Gap Open Penalty (Reference)        (default: -400)
250          The value applied to gap opens along the reference sequence 
251          during the Smith-Waterman score calculation.
252          *Note*: Note that for backward compatibility, if -g is set 
253          and -q is not set, the gap open penalty for the query will 
254          be set to the same value as specified for the reference.
255    -q    S-W Gap Open Penalty (Query)            (default: -400)
256          The value applied to gap opens along the query sequence during 
257          the Smith-Waterman score calculation.        
258    -e    S-W Gap Extend Penalty (Reference)      (default: -70)
259          The value applied to gap extends during the Smith-Waterman score calculation.
260          *Note*: Note that for backward compatibility, if -e is set 
261          and -f is not set, the gap exten penalty for the query will 
262          be set to the same value as specified for the reference. 
263    -f    S-W Gap Extend Penalty (Query)          (default: -70)
264          The value applied to gap extends during the Smith-Waterman score calculation.
265    -h    S-W Hit Threshold                       (default: 68.00%)
266          In letter-space, this parameter determines the threshold 
267          score for both vectored and full Smith-Waterman alignments. 
268          Any values less than this quantity will be thrown away.
269          *Note* This option differs slightly in meaning between letter-space and color-space.
270
271
272-----
273
274**Reference**
275 
276 **SHRiMP**: Stephen M. Rumble, Michael Brudno, Phil Lacroute, Vladimir Yanovsky, Marc Fiume, Adrian Dalca. shrimp at cs dot toronto dot edu. 
277
278</help>
279</tool>