PageRenderTime 62ms CodeModel.GetById 34ms app.highlight 16ms RepoModel.GetById 4ms app.codeStats 0ms

/tools/filters/trimmer.xml

https://bitbucket.org/cistrome/cistrome-harvard/
XML | 142 lines | 109 code | 33 blank | 0 comment | 0 complexity | bfb13988c6e5139ba020c55d3606a3ab MD5 | raw file
  1<tool id="trimmer" name="Trim" version="0.0.1">
  2    <description>leading or trailing characters</description>
  3    <command interpreter="python">
  4    trimmer.py -a -f $input1 -c $col -s $start -e $end -i $ignore $fastq > $out_file1
  5    </command>
  6    <inputs>
  7        <param format="tabular,txt" name="input1" type="data" label="this dataset"/>
  8        <param name="col" type="integer" value="0" label="Trim this column only" help="0 = process entire line" />
  9        <param name="start" type="integer" size="10" value="1" label="Trim from the beginning up to this position" help="Only positive positions allowed. 1 = do not trim the beginning"/>
 10        <param name="end" type="integer" size="10" value="0" label="Remove everything from this position to the end" help="Use negative position to indicate position starting from the end. 0 = do not trim the end"/>
 11        <param name="fastq" type="select" label="Is input dataset in fastq format?" help="If set to YES, the tool will not trim evenly numbered lines (0, 2, 4, etc...). This allows for trimming the seq and qual lines, only if they are not spread over multiple lines (see warning below).">
 12            <option selected="true" value="">No</option>
 13            <option value="-q">Yes</option>
 14        </param>
 15        <param name="ignore" type="select" display="checkboxes" multiple="True" label="Ignore lines beginning with these characters" help="lines beginning with these are not trimmed">
 16            <option value="62">&gt;</option>
 17            <option value="64">@</option>
 18            <option value="43">+</option>
 19            <option value="60">&lt;</option>
 20            <option value="42">*</option>
 21            <option value="45">-</option>
 22            <option value="61">=</option>
 23            <option value="124">|</option>
 24            <option value="63">?</option>
 25            <option value="36">$</option>
 26            <option value="46">.</option>
 27            <option value="58">:</option>
 28            <option value="38">&amp;</option>
 29            <option value="37">%</option>
 30            <option value="94">^</option>
 31            <option value="35">&#35;</option>
 32         </param>   
 33    </inputs>
 34    <outputs>
 35        <data name="out_file1" format="input" metadata_source="input1"/>
 36    </outputs>
 37    <tests>
 38        <test>
 39           <param name="input1" value="trimmer_tab_delimited.dat"/>
 40           <param name="col" value="0"/>
 41           <param name="start" value="1"/>
 42           <param name="end" value="13"/>
 43           <param name="ignore" value="62"/>
 44           <param name="fastq" value="No"/>
 45           <output name="out_file1" file="trimmer_a_f_c0_s1_e13_i62.dat"/>
 46        </test>
 47        <test>
 48           <param name="input1" value="trimmer_tab_delimited.dat"/>
 49           <param name="col" value="2"/>
 50           <param name="start" value="1"/>
 51           <param name="end" value="2"/>
 52           <param name="ignore" value="62"/>
 53           <param name="fastq" value="No"/>
 54           <output name="out_file1" file="trimmer_a_f_c2_s1_e2_i62.dat"/>
 55        </test>
 56        <test>
 57           <param name="input1" value="trimmer_tab_delimited.dat"/>
 58           <param name="col" value="2"/>
 59           <param name="start" value="2"/>
 60           <param name="end" value="-2"/>
 61           <param name="ignore" value="62"/>
 62           <param name="fastq" value="No"/>
 63           <output name="out_file1" file="trimmer_a_f_c2_s2_e-2_i62.dat"/>
 64        </test>	
 65    </tests>
 66
 67    <help>
 68
 69
 70**What it does**
 71
 72Trims specified number of characters from a dataset or its field (if dataset is tab-delimited).
 73
 74-----
 75
 76**Example 1**
 77
 78Trimming this dataset::
 79
 80  1234567890
 81  abcdefghijk
 82
 83by setting **Trim from the beginning up to this position** to *2* and **Remove everything from this position to the end** to *6* will produce::
 84
 85  23456
 86  bcdef
 87
 88-----
 89
 90**Example 2**
 91
 92Trimming column 2 of this dataset::
 93
 94  abcde 12345 fghij 67890
 95  fghij 67890 abcde 12345
 96
 97by setting **Trim content of this column only** to *2*, **Trim from the beginning up to this position** to *2*, and **Remove everything from this position to the end** to *4* will produce::
 98
 99  abcde  234 fghij 67890
100  fghij  789 abcde 12345
101
102-----
103
104**Example 3**
105
106Trimming column 2 of this dataset::
107
108  abcde 12345 fghij 67890
109  fghij 67890 abcde 12345
110
111by setting **Trim content of this column only** to *2*, **Trim from the beginning up to this position** to *2*, and **Remove everything from this position to the end** to *-2* will produce::
112
113  abcde  23 fghij 67890
114  fghij  78 abcde 12345
115
116----
117
118**Trimming FASTQ datasets**
119
120This tool can be used to trim sequences and quality strings in fastq datasets. This is done by selected *Yes* from the **Is input dataset in fastq format?** dropdown. If set to *Yes*, the tool will skip all even numbered lines (see warning below). For example, trimming last 5 bases of this dataset::
121
122  @081017-and-081020:1:1:1715:1759
123  GGACTCAGATAGTAATCCACGCTCCTTTAAAATATC
124  +
125  II#IIIIIII$5+.(9IIIIIII$%*$G$A31I&amp;&amp;B
126  
127cab done by setting **Remove everything from this position to the end** to 31::
128
129  @081017-and-081020:1:1:1715:1759
130  GGACTCAGATAGTAATCCACGCTCCTTTAAA
131  +
132  II#IIIIIII$5+.(9IIIIIII$%*$G$A3 
133  
134**Note** that headers are skipped.
135
136.. class:: warningmark
137
138**WARNING:** This tool will only work on properly formatted fastq datasets where (1) each read and quality string occupy one line and (2) '@' (read header) and "+" (quality header) lines are evenly numbered like in the above example.
139
140
141    </help>
142</tool>