PageRenderTime 31ms CodeModel.GetById 23ms app.highlight 6ms RepoModel.GetById 1ms app.codeStats 0ms

/tools/metag_tools/short_reads_trim_seq.xml

https://bitbucket.org/ialbert/galaxy-genetrack
XML | 93 lines | 69 code | 24 blank | 0 comment | 0 complexity | 1d67bb2d47265220dc29cc04ab2115d2 MD5 | raw file
 1<tool id="trim_reads" name="Select high quality segments" version="1.0.0">
 2<description>from short reads</description>
 3
 4<command interpreter="python">
 5 short_reads_trim_seq.py $trim $length $output1 $input1 $input2 $sequencing_method_choice.input3
 6</command>
 7<inputs>
 8<page>
 9    <param name="input1" type="data" format="fasta,txtseq.zip" label="Reads" />
10    <param name="input2" type="data" format="qualsolexa,qual454,txtseq.zip" label="Quality scores" />
11	<param name="trim" type="integer" size="5" value="20" label="Minimal quality score" help="bases scoring below this value will trigger splitting"/>
12    <param name="length" type="integer" size="5" value="100" label="Minimal length of contiguous segment" help="report all high quality segments above this length. Setting this option to '0' will cause the program to return a single longest run of high quality bases per read" />
13    <conditional name="sequencing_method_choice">
14        <param name="sequencer" type="select" label="Select technology">
15            <option value="454">Roche (454) or ABI SOLiD</option>
16            <option value="Solexa">Illumina (Solexa)</option>
17        </param>
18        <when value="454">
19            <param name="input3" type="select" label="Low quality bases in homopolymers" help="if set to 'DO NOT trigger splitting' the program will not count low quality bases that are within or adjacent to homonucleotide runs.  This will significantly reduce fragmentation of 454 data">
20                <option value="yes">DO NOT trigger splitting </option>
21                <option value="no">trigger splitting</option>
22            </param>
23        </when>
24        <when value="Solexa">
25            <param name="input3" type="integer" size="5" value="0" label="Restrict length of each read to" help="('0' = do not trim) The quality of Solexa reads drops towards the end. This option allows selecting the specified number of nucleotides from the beginning and then running the tool." />
26        </when> 
27    </conditional>
28</page>
29</inputs>
30
31<outputs>
32    <data name="output1" format="fasta" />
33</outputs>
34
35<tests>
36	<test>
37		<param name="sequencer" value="454" />
38		<param name="input1" value="454.fasta" ftype="fasta" />
39		<param name="input2" value="454.qual" ftype="qual454" />
40		<param name="input3" value="no" />
41		<param name="trim" value="20" />
42		<param name="length" value="0" />
43		<output name="output1" file="short_reads_trim_seq_out1.fasta" />
44	</test>
45	<test>
46		<param name="sequencer" value="Solexa" />
47		<param name="input1" value="solexa.fasta" ftype="fasta" />
48		<param name="input2" value="solexa.qual" ftype="qualsolexa" />
49		<param name="input3" value="0" />
50		<param name="trim" value="20" />
51		<param name="length" value="0" />
52		<output name="output1" file="short_reads_trim_seq_out2.fasta" />
53	</test>
54</tests>
55
56<help>
57  
58.. class:: warningmark
59
60 To use this tool your quality score dataset needs to be in *Quality Score* format. Click pencil icon next to your dataset to set datatype to *Quality Score*.
61 
62-----
63
64**What it does**
65
66This tool finds high quality segments within sequencing reads generated by by Roche (454), Illumina (Solexa), or ABI SOLiD machines.
67
68-----
69
70**Example**
71
72
73Suppose this is your sequencing read::
74  
75   5'---------*-------------*------**----3'
76   
77where **dashes** (-) are HIGH quality bases (above 20) and **asterisks** (*) are LOW quality bases (below 20). If the **Minimal length of contiguous segment** is set to **5** (of course, only for the purposes of this example), the tool will return::
78
79   5'---------
80               -------------
81                             -------
82
83you can see that the tool simply splits the read on low quality bases and then returns all segments longer than 5.  **Note**, that the output of this tool will likely contain higher number of shorter sequences compared to the original input.   If we set the **Minimal length of contiguous segment** to **0**, the tool will only return the single longest segment::
84
85               -------------
86               
87
88               
89
90
91
92</help>
93</tool>