PageRenderTime 74ms CodeModel.GetById 65ms app.highlight 5ms RepoModel.GetById 1ms app.codeStats 0ms

/tools/metag_tools/shrimp_color_wrapper.xml

https://bitbucket.org/cistrome/cistrome-harvard/
XML | 181 lines | 151 code | 30 blank | 0 comment | 0 complexity | fb666bb1391403f115604e03941525a8 MD5 | raw file
  1<tool id="shrimp_color_wrapper" name="SHRiMP for Color-space" version="1.0.0">
  2  <description>reads mapping against reference sequence </description>
  3  <command interpreter="python">
  4    #if $param.skip_or_full=="skip" #shrimp_color_wrapper.py $input_target $input_query $output1 
  5    #else                           #shrimp_color_wrapper.py $input_target $input_query $output1 $param.spaced_seed $param.seed_matches_per_window $param.seed_hit_taboo_length $param.seed_generation_taboo_length $param.seed_window_length $param.max_hits_per_read $param.max_read_length $param.kmer $param.sw_match_value $param.sw_mismatch_value $param.sw_gap_open_ref $param.sw_gap_open_query $param.sw_gap_ext_ref $param.sw_gap_ext_query $param.sw_crossover_penalty $param.sw_full_hit_threshold $param.sw_vector_hit_threshold  
  6    #end if#
  7  </command>
  8    <inputs>
  9        <page>
 10        <param name="input_query" type="data" format="csfasta" label="Align sequencing reads" help="No dataset? Read tip below"/>
 11        <param name="input_target" type="data" format="fasta" label="against reference" />
 12        <conditional name="param">
 13            <param name="skip_or_full" type="select" label="SHRiMP settings to use" help="For most mapping needs use Commonly used settings. If you want full control use Full List">
 14                <option value="skip">Commonly used</option>
 15                <option value="full">Full Parameter List</option>
 16            </param>
 17            <when value="skip" />
 18            <when value="full">
 19                <param name="spaced_seed"                   type="text"     size="30"   value="1111001111"    label="Spaced Seed" />
 20                <param name="seed_matches_per_window"       type="integer"  size="5"    value="2"               label="Seed Matches per Window" />
 21                <param name="seed_hit_taboo_length"         type="integer"  size="5"    value="4"               label="Seed Hit Taboo Length" />
 22                <param name="seed_generation_taboo_length"  type="integer"  size="5"    value="0"               label="Seed Generation Taboo Length" />
 23                <param name="seed_window_length"            type="float"    size="10"   value="115.0"           label="Seed Window Length"          help="in percentage"/>
 24                <param name="max_hits_per_read"             type="integer"  size="10"   value="100"             label="Maximum Hits per Read" />
 25                <param name="max_read_length"               type="integer"  size="10"   value="1000"            label="Maximum Read Length" />
 26                <param name="kmer"                          type="integer"  size="10"   value="-1"              label="Kmer Std. Deviation Limit"   help="-1 as None"/>
 27                <param name="sw_match_value"                type="integer"  size="10"   value="100"             label="S-W Match Value" />
 28                <param name="sw_mismatch_value"             type="integer"  size="10"   value="-150"            label="S-W Mismatch Value" />
 29                <param name="sw_gap_open_ref"               type="integer"  size="10"   value="-400"            label="S-W Gap Open Penalty (Reference)" />
 30                <param name="sw_gap_open_query"             type="integer"  size="10"   value="-400"            label="S-W Gap Open Penalty (Query)" />
 31                <param name="sw_gap_ext_ref"                type="integer"  size="10"   value="-70"             label="S-W Gap Extend Penalty (Reference)" />
 32                <param name="sw_gap_ext_query"              type="integer"  size="10"   value="-70"             label="S-W Gap Extend Penalty (Query)" />
 33                <param name="sw_crossover_penalty"          type="integer"  size="10"   value="-140"            label="S-W Crossover Penalty" />               
 34                <param name="sw_full_hit_threshold"         type="float"    size="10"   value="68.0"            label="S-W Full Hit Threshold"      help="in percentage"/>
 35                <param name="sw_vector_hit_threshold"       type="float"    size="10"   value="60.0"            label="S-W Vector Hit Threshold"    help="in percentage"/>
 36            </when>
 37        </conditional>
 38        </page>
 39    </inputs>
 40    <outputs>
 41        <data name="output1" format="tabular"/>
 42    </outputs>
 43    <requirements>
 44      <requirement type="binary">rmapper-cs</requirement>
 45    </requirements>
 46    <tests>
 47        <test>
 48            <param name="skip_or_full" value="skip" />
 49            <param name="input_target" value="Ssuis.fasta" ftype="fasta" />
 50            <param name="input_query" value="shrimp_cs_test1.csfasta" ftype="csfasta"/>
 51            <output name="output1" file="shrimp_cs_test1.out" />
 52        </test>
 53    </tests>
 54<help>
 55    
 56.. class:: warningmark 
 57
 58To use this tool your dataset needs to be in the *csfasta* (as ABI SOLiD color-space sequences) format. Click pencil icon next to your dataset to set the datatype to *csfasta*.   
 59
 60
 61-----
 62    
 63**What it does**
 64 
 65SHRiMP (SHort Read Mapping Package) is a software package for aligning genomic reads against a target genome.  
 66  
 67
 68-----
 69
 70**Input formats**
 71
 72A multiple color-space file, for example::
 73
 74    >2_263_779_F3
 75    T132032030200202202003211302222202230022110222
 76
 77
 78-----
 79
 80**Outputs**
 81
 82The tool returns the default SHRiMP output::
 83
 84 
 85     1                      2               3         4        5        6       7      8      9      10
 86  --------------------------------------------------------------------------------------------------------------------
 87    >2_263_779_F3   Streptococcus_suis      +       814344  814388      1      45      45    3660    8x19x3x2x6x4x3  
 88
 89where::
 90
 91  1. (>2_263_779_F3)        - Read id 
 92  2. (Streptococcus_suis)   - Reference sequence id
 93  3. (+)                    - Strand of the read
 94  4. (814344)               - Start position of the alignment in the reference
 95  5. (814388)               - End position of the alignment in the reference
 96  6. (1)                    - Start position of the alignment in the read
 97  7. (45)                   - End position of the alignment in the read
 98  8. (45)                   - Length of the read
 99  9. (3660)                 - Score 
100 10. (8x19x3x2x6x4x3)       - Edit string
101
102 
103-----
104
105**SHRiMP parameter list**
106
107The commonly used parameters with default value setting::
108
109    -s    Spaced Seed                             (default: 111111011111)
110          The spaced seed is a single contiguous string of 0's and 1's. 
111          0's represent wildcards, or positions which will always be 
112          considered as matching, whereas 1's dictate positions that 
113          must match. A string of all 1's will result in a simple kmer scan.
114    -n    Seed Matches per Window                 (default: 2)
115          The number of seed matches per window dictates how many seeds 
116          must match within some window length of the genome before that 
117          region is considered for Smith-Waterman alignment. A lower 
118          value will increase sensitivity while drastically increasing 
119          running time. Higher values will have the opposite effect.
120    -t    Seed Hit Taboo Length                   (default: 4)
121          The seed taboo length specifies how many target genome bases 
122          or colours must exist prior to a previous seed match in order 
123          to count another seed match as a hit.
124    -9    Seed Generation Taboo Length            (default: 0)
125          
126    -w    Seed Window Length                      (default: 115.00%)
127          This parameter specifies the genomic span in bases (or colours) 
128          in which *seed_matches_per_window* must exist before the read 
129          is given consideration by the Simth-Waterman alignment machinery.
130    -o    Maximum Hits per Read                   (default: 100)
131          This parameter specifies how many hits to remember for each read. 
132          If more hits are encountered, ones with lower scores are dropped 
133          to make room.
134    -r    Maximum Read Length                     (default: 1000)
135          This parameter specifies the maximum length of reads that will 
136          be encountered in the dataset. If larger reads than the default 
137          are used, an appropriate value must be passed to *rmapper*.
138    -d    Kmer Std. Deviation Limit               (default: -1 [None])
139          This option permits pruning read kmers, which occur with 
140          frequencies greater than *kmer_std_dev_limit* standard 
141          deviations above the average. This can shorten running 
142          time at the cost of some sensitivity. 
143          *Note*: A negative value disables this option.            
144    -m    S-W Match Value                         (default: 100)
145          The value applied to matches during the Smith-Waterman score calculation.
146    -i    S-W Mismatch Value                      (default: -150)
147          The value applied to mismatches during the Smith-Waterman 
148          score calculation.
149    -g    S-W Gap Open Penalty (Reference)        (default: -400)
150          The value applied to gap opens along the reference sequence 
151          during the Smith-Waterman score calculation.
152          *Note*: Note that for backward compatibility, if -g is set 
153          and -q is not set, the gap open penalty for the query will 
154          be set to the same value as specified for the reference.
155    -q    S-W Gap Open Penalty (Query)            (default: -400)
156          The value applied to gap opens along the query sequence during 
157          the Smith-Waterman score calculation.        
158    -e    S-W Gap Extend Penalty (Reference)      (default: -70)
159          The value applied to gap extends during the Smith-Waterman score calculation.
160          *Note*: Note that for backward compatibility, if -e is set 
161          and -f is not set, the gap exten penalty for the query will 
162          be set to the same value as specified for the reference. 
163    -f    S-W Gap Extend Penalty (Query)          (default: -70)
164          The value applied to gap extends during the Smith-Waterman score calculation.
165    -x
166    -h    S-W Full Hit Threshold                  (default: 68.00%)
167          In letter-space, this parameter determines the threshold 
168          score for both vectored and full Smith-Waterman alignments. 
169          Any values less than this quantity will be thrown away.
170          *Note* This option differs slightly in meaning between letter-space and color-space.
171    -v
172    
173
174-----
175
176**Reference**
177 
178 **SHRiMP**: Stephen M. Rumble, Michael Brudno, Phil Lacroute, Vladimir Yanovsky, Marc Fiume, Adrian Dalca. shrimp at cs dot toronto dot edu. 
179
180</help>
181</tool>