PageRenderTime 22ms CodeModel.GetById 16ms app.highlight 3ms RepoModel.GetById 1ms app.codeStats 0ms

/tools/samtools/pileup_interval.xml

https://bitbucket.org/cistrome/cistrome-harvard/
XML | 189 lines | 159 code | 30 blank | 0 comment | 0 complexity | 2887b1dc5218945cf7235d217e0c1d33 MD5 | raw file
  1<tool id="pileup_interval" name="Pileup-to-Interval" version="1.0.0">
  2  <description>condenses pileup format into ranges of bases</description>
  3  <requirements>
  4    <requirement type="package">samtools</requirement>
  5  </requirements>
  6  <command interpreter="python">
  7    pileup_interval.py 
  8      --input=$input 
  9      --output=$output 
 10      --coverage=$coverage
 11      --format=$format_type.format
 12      #if $format_type.format == "ten":
 13       --base=$format_type.which_base
 14       --seq_column="None"
 15       --loc_column="None"
 16       --base_column="None"
 17       --cvrg_column="None"
 18      #elif $format_type.format == "manual":
 19       --base="None"
 20       --seq_column=$format_type.seq_column
 21       --loc_column=$format_type.loc_column
 22       --base_column=$format_type.base_column
 23       --cvrg_column=$format_type.cvrg_column
 24      #else:
 25       --base="None"
 26       --seq_column="None"
 27       --loc_column="None"
 28       --base_column="None"
 29       --cvrg_column="None"
 30      #end if
 31  </command>
 32  <inputs>
 33    <param name="input" type="data" format="tabular" label="Choose a pileup file to condense:" />
 34    <conditional name="format_type">
 35      <param name="format" type="select" label="which contains:" help="See &quot;Types of pileup datasets&quot; below for examples">
 36        <option value="six" selected="true">Pileup with six columns (simple)</option>
 37        <option value="ten">Pileup with ten columns (with consensus)</option>
 38        <option value="manual">Set columns manually</option>
 39      </param>
 40      <when value="six" />
 41      <when value="ten">
 42        <param name="which_base" type="select" label="Which base do you want to concatenate">
 43          <option value="first" selected="true">Reference base (first)</option>
 44          <option value="second">Consensus base (second)</option>
 45        </param>
 46      </when>
 47      <when value="manual">
 48        <param name="seq_column" label="Select column with sequence name" type="data_column" numerical="false" data_ref="input" />
 49        <param name="loc_column" label="Select column with base location" type="data_column" numerical="false" data_ref="input" />
 50        <param name="base_column" label="Select column with base to concatenate" type="data_column" numerical="false" data_ref="input" />
 51        <param name="cvrg_column" label="Select column with coverage" type="data_column" numerical="true" data_ref="input" />
 52      </when>
 53    </conditional>
 54    <param name="coverage" type="integer" value="3" label="Do not report bases with coverage less than:" />
 55  </inputs>
 56  <outputs>
 57    <data format="tabular" name="output" />
 58  </outputs>
 59  <tests>
 60    <test>
 61      <param name="input" value="pileup_interval_in1.tabular" />
 62      <param name="format" value="six" />
 63      <param name="coverage" value="3" />
 64      <output name="output" file="pileup_interval_out1.tabular" />
 65    </test>
 66    <test>
 67      <param name="input" value="pileup_interval_in2.tabular" />
 68      <param name="format" value="ten" />
 69      <param name="which_base" value="first" />
 70      <param name="coverage" value="3" />
 71      <output name="output" file="pileup_interval_out2.tabular" />
 72    </test>
 73    <test>
 74      <param name="input" value="pileup_interval_in2.tabular" />
 75      <param name="format" value="manual" />
 76      <param name="seq_column" value="1" />
 77      <param name="loc_column" value="2" />
 78      <param name="base_column" value="3" />
 79      <param name="cvrg_column" value="8" />
 80      <param name="coverage" value="3" />
 81      <output name="output" file="pileup_interval_out2.tabular" />
 82    </test>
 83  </tests> 
 84  <help>
 85    	
 86**What is does**
 87
 88Reduces the size of a results set by taking a pileup file and producing a condensed version showing consecutive sequences of bases meeting coverage criteria. The tool works on six and ten column pileup formats produced with *samtools pileup* command. You also can specify columns for the input file manually. The tool assumes that the pileup dataset was produced by *samtools pileup* command (although you can override this by setting column assignments manually).
 89
 90--------
 91
 92**Types of pileup datasets**
 93
 94The description of pileup format below is largely based on information that can be found on SAMTools_ documentation page. The 6- and 10-column variants are described below.
 95
 96.. _SAMTools: http://samtools.sourceforge.net/pileup.shtml
 97
 98**Six column pileup**::
 99
100    1    2  3  4        5        6
101 ---------------------------------   
102 chrM  412  A  2       .,       II
103 chrM  413  G  4     ..t,     IIIH
104 chrM  414  C  4     ...a     III2
105 chrM  415  C  4     TTTt     III7
106   
107where::
108
109 Column Definition
110 ------ ----------------------------
111      1 Chromosome
112      2 Position (1-based)
113      3 Reference base at that position
114      4 Coverage (# reads aligning over that position)
115      5 Bases within reads where (see Galaxy wiki for more info)
116      6 Quality values (phred33 scale, see Galaxy wiki for more)
117       
118**Ten column pileup**
119
120The `ten-column`__ pileup incorporates additional consensus information generated with *-c* option of *samtools pileup* command::
121
122
123    1    2  3  4   5   6   7   8       9       10
124 ------------------------------------------------
125 chrM  412  A  A  75   0  25  2       .,       II
126 chrM  413  G  G  72   0  25  4     ..t,     IIIH
127 chrM  414  C  C  75   0  25  4     ...a     III2
128 chrM  415  C  T  75  75  25  4     TTTt     III7
129
130where::
131
132  Column Definition
133 ------- ----------------------------
134       1 Chromosome
135       2 Position (1-based)
136       3 Reference base at that position
137       4 Consensus bases
138       5 Consensus quality
139       6 SNP quality
140       7 Maximum mapping quality
141       8 Coverage (# reads aligning over that position)
142       9 Bases within reads where (see Galaxy wiki for more info)
143      10 Quality values (phred33 scale, see Galaxy wiki for more)
144
145
146.. __: http://samtools.sourceforge.net/cns0.shtml
147
148------
149
150**The output format**
151
152The output file condenses the information in the pileup file so that consecutive bases are listed together as sequences. The starting and ending points of the sequence range are listed, with the starting value converted to a 0-based value. 
153
154Given the following input with minimum coverage set to 3::
155
156    1    2  3  4        5        6
157 ---------------------------------   
158 chr1  112  G  3     ..Ta     III6
159 chr1  113  T  2     aT..     III5
160 chr1  114  A  5     ,,..     IIH2
161 chr1  115  C  4      ,.,      III
162 chrM  412  A  2       .,       II
163 chrM  413  G  4     ..t,     IIIH
164 chrM  414  C  4     ...a     III2
165 chrM  415  C  4     TTTt     III7
166 chrM  490  T  3        a        I
167 
168the following would be the output::
169 
170    1    2    3  4
171 -------------------
172 chr1  111  112  G
173 chr1  113  115  AC
174 chrM  412  415  GCC
175 chrM  489  490  T
176
177where::
178
179  Column Definition
180 ------- ----------------------------
181       1 Chromosome
182       2 Starting position (0-based)
183       3 Ending position (1-based)
184       4 Sequence of bases
185 	
186  </help>
187</tool>
188
189