PageRenderTime 23ms CodeModel.GetById 14ms app.highlight 6ms RepoModel.GetById 1ms app.codeStats 0ms

/tools/fastq/fastq_trimmer.xml

https://bitbucket.org/cistrome/cistrome-harvard/
XML | 120 lines | 102 code | 15 blank | 3 comment | 0 complexity | 9c7ca9155c769e08e2948bbe8cfcf190 MD5 | raw file
  1<tool id="fastq_trimmer" name="FASTQ Trimmer" version="1.0.0">
  2  <description>by column</description>
  3  <command interpreter="python">fastq_trimmer.py '$input_file' '$output_file' '${offset_type['left_column_offset']}' '${offset_type['right_column_offset']}' '${offset_type['base_offset_type']}' '${input_file.extension[len( 'fastq' ):]}' '$keep_zero_length'</command>
  4  <inputs>
  5    <param name="input_file" type="data" format="fastqsanger,fastqcssanger" label="FASTQ File"/>
  6    <conditional name="offset_type">
  7      <param name="base_offset_type" type="select" label="Define Base Offsets as" help="Use Absolute for fixed length reads (Illumina, SOLiD)&lt;br&gt;Use Percentage for variable length reads (Roche/454)">
  8        <option value="offsets_absolute" selected="true">Absolute Values</option>
  9        <option value="offsets_percent">Percentage of Read Length</option>
 10      </param>
 11      <when value="offsets_absolute">
 12        <param name="left_column_offset" label="Offset from 5' end" value="0" type="integer" help="Values start at 0, increasing from the left">
 13          <validator type="in_range" message="Base Offsets must be positive" min="0" max="inf"/>
 14          <validator type="expression" message="An integer is required.">int( float( value ) ) == float( value )</validator>
 15        </param>
 16        <param name="right_column_offset" label="Offset from 3' end" value="0" type="integer" help="Values start at 0, increasing from the right">
 17          <validator type="in_range" message="Base Offsets must be positive" min="0" max="inf"/>
 18          <validator type="expression" message="An integer is required.">int( float( value ) ) == float( value )</validator>
 19        </param>
 20      </when>
 21      <when value="offsets_percent">
 22        <param name="left_column_offset" label="Offset from 5' end" value="0" type="float">
 23          <validator type="in_range" message="Base Offsets must be between 0 and 100" min="0" max="100"/>
 24        </param>
 25        <param name="right_column_offset" label="Offset from 3' end" value="0" type="float">
 26          <validator type="in_range" message="Base Offsets must be between 0 and 100" min="0" max="100"/>
 27        </param>
 28      </when>
 29    </conditional>
 30  <param name="keep_zero_length" label="Keep reads with zero length" type="boolean" truevalue="keep_zero_length" falsevalue="exclude_zero_length" selected="False"/>
 31  </inputs>
 32  <outputs>
 33    <data name="output_file" format="input" />
 34  </outputs>
 35  <tests>
 36    <test>
 37      <!-- Do nothing trim -->
 38      <param name="input_file" value="sanger_full_range_original_sanger.fastqsanger" ftype="fastqsanger" />
 39      <param name="base_offset_type" value="offsets_absolute"/>
 40      <param name="left_column_offset" value="0"/>
 41      <param name="right_column_offset" value="0"/>
 42      <param name="keep_zero_length" value="keep_zero_length" />
 43      <output name="output_file" file="sanger_full_range_original_sanger.fastqsanger" />
 44    </test>
 45    <!-- Trim to empty File -->
 46    <test>
 47      <param name="input_file" value="sanger_full_range_original_sanger.fastqsanger" ftype="fastqsanger" />
 48      <param name="base_offset_type" value="offsets_absolute"/>
 49      <param name="left_column_offset" value="30"/>
 50      <param name="right_column_offset" value="64"/>
 51      <param name="keep_zero_length" value="exclude_zero_length" />
 52      <output name="output_file" file="empty_file.dat" />
 53    </test>
 54    <test>
 55      <param name="input_file" value="sanger_full_range_original_sanger.fastqsanger" ftype="fastqsanger" />
 56      <param name="base_offset_type" value="offsets_percent"/>
 57      <param name="left_column_offset" value="50"/>
 58      <param name="right_column_offset" value="50"/>
 59      <param name="keep_zero_length" value="exclude_zero_length" />
 60      <output name="output_file" file="empty_file.dat" />
 61    </test>
 62    <!-- Trim to 4 inner-most bases -->
 63    <test>
 64      <param name="input_file" value="sanger_full_range_original_sanger.fastqsanger" ftype="fastqsanger" />
 65      <param name="base_offset_type" value="offsets_absolute"/>
 66      <param name="left_column_offset" value="45"/>
 67      <param name="right_column_offset" value="45"/>
 68      <param name="keep_zero_length" value="exclude_zero_length" />
 69      <output name="output_file" file="fastq_trimmer_out1.fastqsanger" />
 70    </test>
 71    <test>
 72      <param name="input_file" value="sanger_full_range_original_sanger.fastqsanger" ftype="fastqsanger" />
 73      <param name="base_offset_type" value="offsets_percent"/>
 74      <param name="left_column_offset" value="47.87"/>
 75      <param name="right_column_offset" value="47.87"/>
 76      <param name="keep_zero_length" value="exclude_zero_length" />
 77      <output name="output_file" file="fastq_trimmer_out1.fastqsanger" />
 78    </test>
 79  </tests>
 80  <help>
 81This tool allows you to trim the ends of reads.
 82
 83You can specify either absolute or percent-based offsets. Offsets are calculated, starting at 0, from the respective end to be trimmed. When using the percent-based method, offsets are rounded to the nearest integer. 
 84
 85For example, if you have a read of length 36::
 86  
 87  @Some FASTQ Sanger Read
 88  CAATATGTNCTCACTGATAAGTGGATATNAGCNCCA
 89  +
 90  =@@.@;B-%?8&gt;CBA@&gt;7@7BBCA4-48%&lt;;;%&lt;B@
 91  
 92And you set absolute offsets of 2 and 9::
 93  
 94  @Some FASTQ Sanger Read
 95  ATATGTNCTCACTGATAAGTGGATA
 96  +
 97  @.@;B-%?8&gt;CBA@&gt;7@7BBCA4-4
 98  
 99Or you set percent offsets of 6% and 20% (corresponds to absolute offsets of 2,7 for a read length of 36)::
100  
101  @Some FASTQ Sanger Read
102  ATATGTNCTCACTGATAAGTGGATATN
103  +
104  @.@;B-%?8&gt;CBA@&gt;7@7BBCA4-48%
105  
106-----
107
108.. class:: warningmark
109
110Trimming a color space read will cause any adapter base to be lost.
111
112------
113
114**Citation**
115
116If you use this tool, please cite `Blankenberg D, Gordon A, Von Kuster G, Coraor N, Taylor J, Nekrutenko A; Galaxy Team. Manipulation of FASTQ data with Galaxy. Bioinformatics. 2010 Jul 15;26(14):1783-5. &lt;http://www.ncbi.nlm.nih.gov/pubmed/20562416&gt;`_
117
118
119  </help>
120</tool>