/tools/metag_tools/shrimp_color_wrapper.xml

https://bitbucket.org/cistrome/cistrome-harvard/ · XML · 181 lines · 151 code · 30 blank · 0 comment · 0 complexity · fb666bb1391403f115604e03941525a8 MD5 · raw file

  1. <tool id="shrimp_color_wrapper" name="SHRiMP for Color-space" version="1.0.0">
  2. <description>reads mapping against reference sequence </description>
  3. <command interpreter="python">
  4. #if $param.skip_or_full=="skip" #shrimp_color_wrapper.py $input_target $input_query $output1
  5. #else #shrimp_color_wrapper.py $input_target $input_query $output1 $param.spaced_seed $param.seed_matches_per_window $param.seed_hit_taboo_length $param.seed_generation_taboo_length $param.seed_window_length $param.max_hits_per_read $param.max_read_length $param.kmer $param.sw_match_value $param.sw_mismatch_value $param.sw_gap_open_ref $param.sw_gap_open_query $param.sw_gap_ext_ref $param.sw_gap_ext_query $param.sw_crossover_penalty $param.sw_full_hit_threshold $param.sw_vector_hit_threshold
  6. #end if#
  7. </command>
  8. <inputs>
  9. <page>
  10. <param name="input_query" type="data" format="csfasta" label="Align sequencing reads" help="No dataset? Read tip below"/>
  11. <param name="input_target" type="data" format="fasta" label="against reference" />
  12. <conditional name="param">
  13. <param name="skip_or_full" type="select" label="SHRiMP settings to use" help="For most mapping needs use Commonly used settings. If you want full control use Full List">
  14. <option value="skip">Commonly used</option>
  15. <option value="full">Full Parameter List</option>
  16. </param>
  17. <when value="skip" />
  18. <when value="full">
  19. <param name="spaced_seed" type="text" size="30" value="1111001111" label="Spaced Seed" />
  20. <param name="seed_matches_per_window" type="integer" size="5" value="2" label="Seed Matches per Window" />
  21. <param name="seed_hit_taboo_length" type="integer" size="5" value="4" label="Seed Hit Taboo Length" />
  22. <param name="seed_generation_taboo_length" type="integer" size="5" value="0" label="Seed Generation Taboo Length" />
  23. <param name="seed_window_length" type="float" size="10" value="115.0" label="Seed Window Length" help="in percentage"/>
  24. <param name="max_hits_per_read" type="integer" size="10" value="100" label="Maximum Hits per Read" />
  25. <param name="max_read_length" type="integer" size="10" value="1000" label="Maximum Read Length" />
  26. <param name="kmer" type="integer" size="10" value="-1" label="Kmer Std. Deviation Limit" help="-1 as None"/>
  27. <param name="sw_match_value" type="integer" size="10" value="100" label="S-W Match Value" />
  28. <param name="sw_mismatch_value" type="integer" size="10" value="-150" label="S-W Mismatch Value" />
  29. <param name="sw_gap_open_ref" type="integer" size="10" value="-400" label="S-W Gap Open Penalty (Reference)" />
  30. <param name="sw_gap_open_query" type="integer" size="10" value="-400" label="S-W Gap Open Penalty (Query)" />
  31. <param name="sw_gap_ext_ref" type="integer" size="10" value="-70" label="S-W Gap Extend Penalty (Reference)" />
  32. <param name="sw_gap_ext_query" type="integer" size="10" value="-70" label="S-W Gap Extend Penalty (Query)" />
  33. <param name="sw_crossover_penalty" type="integer" size="10" value="-140" label="S-W Crossover Penalty" />
  34. <param name="sw_full_hit_threshold" type="float" size="10" value="68.0" label="S-W Full Hit Threshold" help="in percentage"/>
  35. <param name="sw_vector_hit_threshold" type="float" size="10" value="60.0" label="S-W Vector Hit Threshold" help="in percentage"/>
  36. </when>
  37. </conditional>
  38. </page>
  39. </inputs>
  40. <outputs>
  41. <data name="output1" format="tabular"/>
  42. </outputs>
  43. <requirements>
  44. <requirement type="binary">rmapper-cs</requirement>
  45. </requirements>
  46. <tests>
  47. <test>
  48. <param name="skip_or_full" value="skip" />
  49. <param name="input_target" value="Ssuis.fasta" ftype="fasta" />
  50. <param name="input_query" value="shrimp_cs_test1.csfasta" ftype="csfasta"/>
  51. <output name="output1" file="shrimp_cs_test1.out" />
  52. </test>
  53. </tests>
  54. <help>
  55. .. class:: warningmark
  56. To use this tool your dataset needs to be in the *csfasta* (as ABI SOLiD color-space sequences) format. Click pencil icon next to your dataset to set the datatype to *csfasta*.
  57. -----
  58. **What it does**
  59. SHRiMP (SHort Read Mapping Package) is a software package for aligning genomic reads against a target genome.
  60. -----
  61. **Input formats**
  62. A multiple color-space file, for example::
  63. >2_263_779_F3
  64. T132032030200202202003211302222202230022110222
  65. -----
  66. **Outputs**
  67. The tool returns the default SHRiMP output::
  68. 1 2 3 4 5 6 7 8 9 10
  69. --------------------------------------------------------------------------------------------------------------------
  70. >2_263_779_F3 Streptococcus_suis + 814344 814388 1 45 45 3660 8x19x3x2x6x4x3
  71. where::
  72. 1. (>2_263_779_F3) - Read id
  73. 2. (Streptococcus_suis) - Reference sequence id
  74. 3. (+) - Strand of the read
  75. 4. (814344) - Start position of the alignment in the reference
  76. 5. (814388) - End position of the alignment in the reference
  77. 6. (1) - Start position of the alignment in the read
  78. 7. (45) - End position of the alignment in the read
  79. 8. (45) - Length of the read
  80. 9. (3660) - Score
  81. 10. (8x19x3x2x6x4x3) - Edit string
  82. -----
  83. **SHRiMP parameter list**
  84. The commonly used parameters with default value setting::
  85. -s Spaced Seed (default: 111111011111)
  86. The spaced seed is a single contiguous string of 0's and 1's.
  87. 0's represent wildcards, or positions which will always be
  88. considered as matching, whereas 1's dictate positions that
  89. must match. A string of all 1's will result in a simple kmer scan.
  90. -n Seed Matches per Window (default: 2)
  91. The number of seed matches per window dictates how many seeds
  92. must match within some window length of the genome before that
  93. region is considered for Smith-Waterman alignment. A lower
  94. value will increase sensitivity while drastically increasing
  95. running time. Higher values will have the opposite effect.
  96. -t Seed Hit Taboo Length (default: 4)
  97. The seed taboo length specifies how many target genome bases
  98. or colours must exist prior to a previous seed match in order
  99. to count another seed match as a hit.
  100. -9 Seed Generation Taboo Length (default: 0)
  101. -w Seed Window Length (default: 115.00%)
  102. This parameter specifies the genomic span in bases (or colours)
  103. in which *seed_matches_per_window* must exist before the read
  104. is given consideration by the Simth-Waterman alignment machinery.
  105. -o Maximum Hits per Read (default: 100)
  106. This parameter specifies how many hits to remember for each read.
  107. If more hits are encountered, ones with lower scores are dropped
  108. to make room.
  109. -r Maximum Read Length (default: 1000)
  110. This parameter specifies the maximum length of reads that will
  111. be encountered in the dataset. If larger reads than the default
  112. are used, an appropriate value must be passed to *rmapper*.
  113. -d Kmer Std. Deviation Limit (default: -1 [None])
  114. This option permits pruning read kmers, which occur with
  115. frequencies greater than *kmer_std_dev_limit* standard
  116. deviations above the average. This can shorten running
  117. time at the cost of some sensitivity.
  118. *Note*: A negative value disables this option.
  119. -m S-W Match Value (default: 100)
  120. The value applied to matches during the Smith-Waterman score calculation.
  121. -i S-W Mismatch Value (default: -150)
  122. The value applied to mismatches during the Smith-Waterman
  123. score calculation.
  124. -g S-W Gap Open Penalty (Reference) (default: -400)
  125. The value applied to gap opens along the reference sequence
  126. during the Smith-Waterman score calculation.
  127. *Note*: Note that for backward compatibility, if -g is set
  128. and -q is not set, the gap open penalty for the query will
  129. be set to the same value as specified for the reference.
  130. -q S-W Gap Open Penalty (Query) (default: -400)
  131. The value applied to gap opens along the query sequence during
  132. the Smith-Waterman score calculation.
  133. -e S-W Gap Extend Penalty (Reference) (default: -70)
  134. The value applied to gap extends during the Smith-Waterman score calculation.
  135. *Note*: Note that for backward compatibility, if -e is set
  136. and -f is not set, the gap exten penalty for the query will
  137. be set to the same value as specified for the reference.
  138. -f S-W Gap Extend Penalty (Query) (default: -70)
  139. The value applied to gap extends during the Smith-Waterman score calculation.
  140. -x
  141. -h S-W Full Hit Threshold (default: 68.00%)
  142. In letter-space, this parameter determines the threshold
  143. score for both vectored and full Smith-Waterman alignments.
  144. Any values less than this quantity will be thrown away.
  145. *Note* This option differs slightly in meaning between letter-space and color-space.
  146. -v
  147. -----
  148. **Reference**
  149. **SHRiMP**: Stephen M. Rumble, Michael Brudno, Phil Lacroute, Vladimir Yanovsky, Marc Fiume, Adrian Dalca. shrimp at cs dot toronto dot edu.
  150. </help>
  151. </tool>