/tools/fastq/fastq_trimmer.xml

https://bitbucket.org/cistrome/cistrome-harvard/ · XML · 120 lines · 102 code · 15 blank · 3 comment · 0 complexity · 9c7ca9155c769e08e2948bbe8cfcf190 MD5 · raw file

  1. <tool id="fastq_trimmer" name="FASTQ Trimmer" version="1.0.0">
  2. <description>by column</description>
  3. <command interpreter="python">fastq_trimmer.py '$input_file' '$output_file' '${offset_type['left_column_offset']}' '${offset_type['right_column_offset']}' '${offset_type['base_offset_type']}' '${input_file.extension[len( 'fastq' ):]}' '$keep_zero_length'</command>
  4. <inputs>
  5. <param name="input_file" type="data" format="fastqsanger,fastqcssanger" label="FASTQ File"/>
  6. <conditional name="offset_type">
  7. <param name="base_offset_type" type="select" label="Define Base Offsets as" help="Use Absolute for fixed length reads (Illumina, SOLiD)&lt;br&gt;Use Percentage for variable length reads (Roche/454)">
  8. <option value="offsets_absolute" selected="true">Absolute Values</option>
  9. <option value="offsets_percent">Percentage of Read Length</option>
  10. </param>
  11. <when value="offsets_absolute">
  12. <param name="left_column_offset" label="Offset from 5' end" value="0" type="integer" help="Values start at 0, increasing from the left">
  13. <validator type="in_range" message="Base Offsets must be positive" min="0" max="inf"/>
  14. <validator type="expression" message="An integer is required.">int( float( value ) ) == float( value )</validator>
  15. </param>
  16. <param name="right_column_offset" label="Offset from 3' end" value="0" type="integer" help="Values start at 0, increasing from the right">
  17. <validator type="in_range" message="Base Offsets must be positive" min="0" max="inf"/>
  18. <validator type="expression" message="An integer is required.">int( float( value ) ) == float( value )</validator>
  19. </param>
  20. </when>
  21. <when value="offsets_percent">
  22. <param name="left_column_offset" label="Offset from 5' end" value="0" type="float">
  23. <validator type="in_range" message="Base Offsets must be between 0 and 100" min="0" max="100"/>
  24. </param>
  25. <param name="right_column_offset" label="Offset from 3' end" value="0" type="float">
  26. <validator type="in_range" message="Base Offsets must be between 0 and 100" min="0" max="100"/>
  27. </param>
  28. </when>
  29. </conditional>
  30. <param name="keep_zero_length" label="Keep reads with zero length" type="boolean" truevalue="keep_zero_length" falsevalue="exclude_zero_length" selected="False"/>
  31. </inputs>
  32. <outputs>
  33. <data name="output_file" format="input" />
  34. </outputs>
  35. <tests>
  36. <test>
  37. <!-- Do nothing trim -->
  38. <param name="input_file" value="sanger_full_range_original_sanger.fastqsanger" ftype="fastqsanger" />
  39. <param name="base_offset_type" value="offsets_absolute"/>
  40. <param name="left_column_offset" value="0"/>
  41. <param name="right_column_offset" value="0"/>
  42. <param name="keep_zero_length" value="keep_zero_length" />
  43. <output name="output_file" file="sanger_full_range_original_sanger.fastqsanger" />
  44. </test>
  45. <!-- Trim to empty File -->
  46. <test>
  47. <param name="input_file" value="sanger_full_range_original_sanger.fastqsanger" ftype="fastqsanger" />
  48. <param name="base_offset_type" value="offsets_absolute"/>
  49. <param name="left_column_offset" value="30"/>
  50. <param name="right_column_offset" value="64"/>
  51. <param name="keep_zero_length" value="exclude_zero_length" />
  52. <output name="output_file" file="empty_file.dat" />
  53. </test>
  54. <test>
  55. <param name="input_file" value="sanger_full_range_original_sanger.fastqsanger" ftype="fastqsanger" />
  56. <param name="base_offset_type" value="offsets_percent"/>
  57. <param name="left_column_offset" value="50"/>
  58. <param name="right_column_offset" value="50"/>
  59. <param name="keep_zero_length" value="exclude_zero_length" />
  60. <output name="output_file" file="empty_file.dat" />
  61. </test>
  62. <!-- Trim to 4 inner-most bases -->
  63. <test>
  64. <param name="input_file" value="sanger_full_range_original_sanger.fastqsanger" ftype="fastqsanger" />
  65. <param name="base_offset_type" value="offsets_absolute"/>
  66. <param name="left_column_offset" value="45"/>
  67. <param name="right_column_offset" value="45"/>
  68. <param name="keep_zero_length" value="exclude_zero_length" />
  69. <output name="output_file" file="fastq_trimmer_out1.fastqsanger" />
  70. </test>
  71. <test>
  72. <param name="input_file" value="sanger_full_range_original_sanger.fastqsanger" ftype="fastqsanger" />
  73. <param name="base_offset_type" value="offsets_percent"/>
  74. <param name="left_column_offset" value="47.87"/>
  75. <param name="right_column_offset" value="47.87"/>
  76. <param name="keep_zero_length" value="exclude_zero_length" />
  77. <output name="output_file" file="fastq_trimmer_out1.fastqsanger" />
  78. </test>
  79. </tests>
  80. <help>
  81. This tool allows you to trim the ends of reads.
  82. You can specify either absolute or percent-based offsets. Offsets are calculated, starting at 0, from the respective end to be trimmed. When using the percent-based method, offsets are rounded to the nearest integer.
  83. For example, if you have a read of length 36::
  84. @Some FASTQ Sanger Read
  85. CAATATGTNCTCACTGATAAGTGGATATNAGCNCCA
  86. +
  87. =@@.@;B-%?8&gt;CBA@&gt;7@7BBCA4-48%&lt;;;%&lt;B@
  88. And you set absolute offsets of 2 and 9::
  89. @Some FASTQ Sanger Read
  90. ATATGTNCTCACTGATAAGTGGATA
  91. +
  92. @.@;B-%?8&gt;CBA@&gt;7@7BBCA4-4
  93. Or you set percent offsets of 6% and 20% (corresponds to absolute offsets of 2,7 for a read length of 36)::
  94. @Some FASTQ Sanger Read
  95. ATATGTNCTCACTGATAAGTGGATATN
  96. +
  97. @.@;B-%?8&gt;CBA@&gt;7@7BBCA4-48%
  98. -----
  99. .. class:: warningmark
  100. Trimming a color space read will cause any adapter base to be lost.
  101. ------
  102. **Citation**
  103. If you use this tool, please cite `Blankenberg D, Gordon A, Von Kuster G, Coraor N, Taylor J, Nekrutenko A; Galaxy Team. Manipulation of FASTQ data with Galaxy. Bioinformatics. 2010 Jul 15;26(14):1783-5. &lt;http://www.ncbi.nlm.nih.gov/pubmed/20562416&gt;`_
  104. </help>
  105. </tool>