/tools/metag_tools/shrimp_wrapper.xml

https://bitbucket.org/cistrome/cistrome-harvard/ · XML · 279 lines · 184 code · 38 blank · 57 comment · 0 complexity · 32a5b3c6112411b2534b8d8e8178df9d MD5 · raw file

  1. <tool id="shrimp_wrapper" name="SHRiMP for Letter-space" version="1.0.0">
  2. <description>reads mapping against reference sequence </description>
  3. <command interpreter="python">
  4. #if ($type_of_reads.single_or_paired=="single" and $param.skip_or_full=="skip") #shrimp_wrapper.py $input_target $output1 $output2 $input_query
  5. #elif ($type_of_reads.single_or_paired=="paired" and $param.skip_or_full=="skip") #shrimp_wrapper.py $input_target $output1 $output2 $type_of_reads.input1,$type_of_reads.input2,$type_of_reads.insertion_size
  6. #elif ($type_of_reads.single_or_paired=="single" and $param.skip_or_full=="full") #shrimp_wrapper.py $input_target $output1 $output2 $input_query $param.spaced_seed $param.seed_matches_per_window $param.seed_hit_taboo_length $param.seed_generation_taboo_length $param.seed_window_length $param.max_hits_per_read $param.max_read_length $param.kmer $param.sw_match_value $param.sw_mismatch_value $param.sw_gap_open_ref $param.sw_gap_open_query $param.sw_gap_ext_ref $param.sw_gap_ext_query $param.sw_hit_threshold
  7. #elif ($type_of_reads.single_or_paired=="paired" and $param.skip_or_full=="full") #shrimp_wrapper.py $input_target $output1 $output2 $type_of_reads.input1,$type_of_reads.input2,$type_of_reads.insertion_size $param.spaced_seed $param.seed_matches_per_window $param.seed_hit_taboo_length $param.seed_generation_taboo_length $param.seed_window_length $param.max_hits_per_read $param.max_read_length $param.kmer $param.sw_match_value $param.sw_mismatch_value $param.sw_gap_open_ref $param.sw_gap_open_query $param.sw_gap_ext_ref $param.sw_gap_ext_query $param.sw_hit_threshold
  8. #end if#
  9. </command>
  10. <inputs>
  11. <page>
  12. <conditional name="type_of_reads">
  13. <param name="single_or_paired" type="select" label="Single- or Paired-ends">
  14. <option value="single">Single-end</option>
  15. <option value="paired">Paired-end</option>
  16. </param>
  17. <when value="single">
  18. <param name="input_query" type="data" format="fastqsolexa" label="Align sequencing reads" help="No dataset? Read tip below"/>
  19. </when>
  20. <when value="paired">
  21. <param name="insertion_size" type="integer" size="5" value="600" label="Insertion length between two ends" help="bp" />
  22. <param name="input1" type="data" format="fastqsolexa" label="Align sequencing reads, one end" />
  23. <param name="input2" type="data" format="fastqsolexa" label="and the other end" />
  24. </when>
  25. </conditional>
  26. <param name="input_target" type="data" format="fasta" label="against reference" />
  27. <conditional name="param">
  28. <param name="skip_or_full" type="select" label="SHRiMP settings to use" help="For most mapping needs use Commonly used settings. If you want full control use Full List">
  29. <option value="skip">Commonly used</option>
  30. <option value="full">Full Parameter List</option>
  31. </param>
  32. <when value="skip" />
  33. <when value="full">
  34. <param name="spaced_seed" type="text" size="30" value="111111011111" label="Spaced Seed" />
  35. <param name="seed_matches_per_window" type="integer" size="5" value="2" label="Seed Matches per Window" />
  36. <param name="seed_hit_taboo_length" type="integer" size="5" value="4" label="Seed Hit Taboo Length" />
  37. <param name="seed_generation_taboo_length" type="integer" size="5" value="0" label="Seed Generation Taboo Length" />
  38. <param name="seed_window_length" type="float" size="10" value="115.0" label="Seed Window Length" help="in percentage"/>
  39. <param name="max_hits_per_read" type="integer" size="10" value="100" label="Maximum Hits per Read" />
  40. <param name="max_read_length" type="integer" size="10" value="1000" label="Maximum Read Length" />
  41. <param name="kmer" type="integer" size="10" value="-1" label="Kmer Std. Deviation Limit" help="-1 as None"/>
  42. <param name="sw_match_value" type="integer" size="10" value="100" label="S-W Match Value" />
  43. <param name="sw_mismatch_value" type="integer" size="10" value="-150" label="S-W Mismatch Value" />
  44. <param name="sw_gap_open_ref" type="integer" size="10" value="-400" label="S-W Gap Open Penalty (Reference)" />
  45. <param name="sw_gap_open_query" type="integer" size="10" value="-400" label="S-W Gap Open Penalty (Query)" />
  46. <param name="sw_gap_ext_ref" type="integer" size="10" value="-70" label="S-W Gap Extend Penalty (Reference)" />
  47. <param name="sw_gap_ext_query" type="integer" size="10" value="-70" label="S-W Gap Extend Penalty (Query)" />
  48. <param name="sw_hit_threshold" type="float" size="10" value="68.0" label="S-W Hit Threshold" help="in percentage"/>
  49. </when>
  50. </conditional>
  51. </page>
  52. </inputs>
  53. <outputs>
  54. <data name="output1" format="tabular"/>
  55. <data name="output2" format="tabular"/>
  56. </outputs>
  57. <requirements>
  58. <requirement type="binary">rmapper-ls</requirement>
  59. </requirements>
  60. <tests>
  61. <test>
  62. <param name="single_or_paired" value="single" />
  63. <param name="skip_or_full" value="skip" />
  64. <param name="input_target" value="shrimp_phix_anc.fa" ftype="fasta" />
  65. <param name="input_query" value="shrimp_wrapper_test1.fastq" ftype="fastqsolexa"/>
  66. <output name="output1" file="shrimp_wrapper_test1.out1" />
  67. </test>
  68. <!--
  69. <test>
  70. <param name="single_or_paired" value="paired" />
  71. <param name="skip_or_full" value="skip" />
  72. <param name="input_target" value="shrimp_eca_chrMT.fa" ftype="fasta" />
  73. <param name="input1" value="shrimp_wrapper_test2_end1.fastq" ftype="fastqsolexa" />
  74. <param name="input2" value="shrimp_wrapper_test2_end2.fastq" ftype="fastqsolexa" />
  75. <param name="insertion_size" value="600" />
  76. <output name="output1" file="shrimp_wrapper_test2.out1" />
  77. </test>
  78. <test>
  79. <param name="single_or_paired" value="single" />
  80. <param name="skip_or_full" value="full" />
  81. <param name="input_target" value="shrimp_phix_anc.fa" ftype="fasta" />
  82. <param name="input_query" value="shrimp_wrapper_test1.fastq" ftype="fastqsolexa"/>
  83. <param name="spaced_seed" value="111111011111" />
  84. <param name="seed_matches_per_window" value="2" />
  85. <param name="seed_hit_taboo_length" value="4" />
  86. <param name="seed_generation_taboo_length" value="0" />
  87. <param name="seed_window_length" value="115.0" />
  88. <param name="max_hits_per_read" value="100" />
  89. <param name="max_read_length" value="1000" />
  90. <param name="kmer" value="-1" />
  91. <param name="sw_match_value" value="100" />
  92. <param name="sw_mismatch_value" value="-150" />
  93. <param name="sw_gap_open_ref" value="-400" />
  94. <param name="sw_gap_open_query" value="-400" />
  95. <param name="sw_gap_ext_ref" value="-70" />
  96. <param name="sw_gap_ext_query" value="-70" />
  97. <param name="sw_hit_threshold" value="68.0" />
  98. <output name="output1" file="shrimp_wrapper_test1.out1" />
  99. </test>
  100. <test>
  101. <param name="single_or_paired" value="paired" />
  102. <param name="skip_or_full" value="full" />
  103. <param name="input_target" value="shrimp_eca_chrMT.fa" ftype="fasta" />
  104. <param name="spaced_seed" value="111111011111" />
  105. <param name="seed_matches_per_window" value="2" />
  106. <param name="seed_hit_taboo_length" value="4" />
  107. <param name="seed_generation_taboo_length" value="0" />
  108. <param name="seed_window_length" value="115.0" />
  109. <param name="max_hits_per_read" value="100" />
  110. <param name="max_read_length" value="1000" />
  111. <param name="kmer" value="-1" />
  112. <param name="sw_match_value" value="100" />
  113. <param name="sw_mismatch_value" value="-150" />
  114. <param name="sw_gap_open_ref" value="-400" />
  115. <param name="sw_gap_open_query" value="-400" />
  116. <param name="sw_gap_ext_ref" value="-70" />
  117. <param name="sw_gap_ext_query" value="-70" />
  118. <param name="sw_hit_threshold" value="68.0" />
  119. <param name="input1" value="shrimp_wrapper_test2_end1.fastq" ftype="fastqsolexa"/>
  120. <param name="input2" value="shrimp_wrapper_test2_end2.fastq" ftype="fastqsolexa"/>
  121. <param name="insertion_size" value="600" />
  122. <output name="output1" file="shrimp_wrapper_test2.out1" />
  123. </test>
  124. -->
  125. </tests>
  126. <help>
  127. .. class:: warningmark
  128. IMPORTANT: This tool currently only supports data where the quality scores are integers or ASCII quality scores with base 64. Click pencil icon next to your dataset to set datatype to *fastqsolexa*.
  129. -----
  130. **What it does**
  131. SHRiMP (SHort Read Mapping Package) is a software package for aligning genomic reads against a target genome.
  132. This wrapper post-processes the default SHRiMP/rmapper-ls output and generates a table with all information from reads and reference for the mapping. The tool takes single- or paired-end reads. For single-end reads, only uniquely mapped alignment is considered. In paired-end reads, only pairs that meet the following criteria will be used to generate the table: 1). the ends fall within the insertion size; 2). the ends are mapped at the opposite directions. If there are still multiple mappings after applying the criteria, this paired-end read will be discarded.
  133. -----
  134. **Input formats**
  135. A multiple-fastq file, for example::
  136. @seq1
  137. TACCCGATTTTTTGCTTTCCACTTTATCCTACCCTT
  138. +seq1
  139. hhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhh
  140. -----
  141. **Outputs**
  142. The tool gives two outputs.
  143. **Table output**
  144. Table output contains 8 columns::
  145. 1 2 3 4 5 6 7 8
  146. ----------------------------------------------------
  147. chrM 14711 seq1 0 T A 40 1
  148. chrM 14712 seq1 1 T T 40 1
  149. where::
  150. 1. (chrM) - Reference sequence id
  151. 2. (14711) - Position of the mapping in the reference
  152. 3. (seq1) - Read id
  153. 4. (0) - Position of the mapping in the read
  154. 5. (T) - Nucleotide in the reference
  155. 6. (A) - Nucleotide in the read
  156. 7. (40) - Quality score for the nucleotide in the position of the read
  157. 8. (1) - The number of times this position is covered by reads
  158. **SHRiMP output**
  159. This is the default output from SHRiMP/rmapper-ls::
  160. 1 2 3 4 5 6 7 8 9 10
  161. -------------------------------------------------------------------
  162. seq1 chrM + 3644 3679 1 36 36 3600 36
  163. where::
  164. 1. (seq1) - Read id
  165. 2. (chrM) - Reference sequence id
  166. 3. (+) - Strand of the read
  167. 4. (3466) - Start position of the alignment in the reference
  168. 5. (3679) - End position of the alignment in the reference
  169. 6. (1) - Start position of the alignment in the read
  170. 7. (36) - End position of the alignment in the read
  171. 8. (36) - Length of the read
  172. 9. (3600) - Score
  173. 10. (36) - Edit string
  174. -----
  175. **SHRiMP parameter list**
  176. The commonly used parameters with default value setting::
  177. -s Spaced Seed (default: 111111011111)
  178. The spaced seed is a single contiguous string of 0's and 1's.
  179. 0's represent wildcards, or positions which will always be
  180. considered as matching, whereas 1's dictate positions that
  181. must match. A string of all 1's will result in a simple kmer scan.
  182. -n Seed Matches per Window (default: 2)
  183. The number of seed matches per window dictates how many seeds
  184. must match within some window length of the genome before that
  185. region is considered for Smith-Waterman alignment. A lower
  186. value will increase sensitivity while drastically increasing
  187. running time. Higher values will have the opposite effect.
  188. -t Seed Hit Taboo Length (default: 4)
  189. The seed taboo length specifies how many target genome bases
  190. or colors must exist prior to a previous seed match in order
  191. to count another seed match as a hit.
  192. -9 Seed Generation Taboo Length (default: 0)
  193. -w Seed Window Length (default: 115.00%)
  194. This parameter specifies the genomic span in bases (or colours)
  195. in which *seed_matches_per_window* must exist before the read
  196. is given consideration by the Simth-Waterman alignment machinery.
  197. -o Maximum Hits per Read (default: 100)
  198. This parameter specifies how many hits to remember for each read.
  199. If more hits are encountered, ones with lower scores are dropped
  200. to make room.
  201. -r Maximum Read Length (default: 1000)
  202. This parameter specifies the maximum length of reads that will
  203. be encountered in the dataset. If larger reads than the default
  204. are used, an appropriate value must be passed to *rmapper*.
  205. -d Kmer Std. Deviation Limit (default: -1 [None])
  206. This option permits pruning read kmers, which occur with
  207. frequencies greater than *kmer_std_dev_limit* standard
  208. deviations above the average. This can shorten running
  209. time at the cost of some sensitivity.
  210. *Note*: A negative value disables this option.
  211. -m S-W Match Value (default: 100)
  212. The value applied to matches during the Smith-Waterman score calculation.
  213. -i S-W Mismatch Value (default: -150)
  214. The value applied to mismatches during the Smith-Waterman
  215. score calculation.
  216. -g S-W Gap Open Penalty (Reference) (default: -400)
  217. The value applied to gap opens along the reference sequence
  218. during the Smith-Waterman score calculation.
  219. *Note*: Note that for backward compatibility, if -g is set
  220. and -q is not set, the gap open penalty for the query will
  221. be set to the same value as specified for the reference.
  222. -q S-W Gap Open Penalty (Query) (default: -400)
  223. The value applied to gap opens along the query sequence during
  224. the Smith-Waterman score calculation.
  225. -e S-W Gap Extend Penalty (Reference) (default: -70)
  226. The value applied to gap extends during the Smith-Waterman score calculation.
  227. *Note*: Note that for backward compatibility, if -e is set
  228. and -f is not set, the gap exten penalty for the query will
  229. be set to the same value as specified for the reference.
  230. -f S-W Gap Extend Penalty (Query) (default: -70)
  231. The value applied to gap extends during the Smith-Waterman score calculation.
  232. -h S-W Hit Threshold (default: 68.00%)
  233. In letter-space, this parameter determines the threshold
  234. score for both vectored and full Smith-Waterman alignments.
  235. Any values less than this quantity will be thrown away.
  236. *Note* This option differs slightly in meaning between letter-space and color-space.
  237. -----
  238. **Reference**
  239. **SHRiMP**: Stephen M. Rumble, Michael Brudno, Phil Lacroute, Vladimir Yanovsky, Marc Fiume, Adrian Dalca. shrimp at cs dot toronto dot edu.
  240. </help>
  241. </tool>