PageRenderTime 37ms CodeModel.GetById 24ms app.highlight 9ms RepoModel.GetById 1ms app.codeStats 1ms

/tools/peakcalling/ma2c.xml

https://bitbucket.org/cistrome/cistrome-harvard/
XML | 381 lines | 354 code | 26 blank | 1 comment | 0 complexity | 77ed750e8f11286909c3735dc63b3067 MD5 | raw file
  1<tool name="MA2C" id="peakcalling_ma2c">
  2  <description>Peak Calling for ChIP-chip (Nimblegen)</description>
  3  <command interpreter="command">/bin/bash $shscript</command>
  4  <inputs>
  5    <repeat name="replicates" title="Replicate">
  6      <param name="tfile" type="data" label="Treatment file"/>
  7      <param name="cfile" type="data" label="Control file"/>
  8    </repeat>
  9    <param name="ndf" type="data" label="NDF file"/>
 10    <param name="pos" type="data" label="POS file" optional="true"/>
 11    <param name="bandwidth" type="text" label="Bandwidth" value="300">
 12      <validator type="in_range" max="1000" min="100" message="Bandwidth is out of range, Bandwidth has to be between 100 to 1000" />
 13    </param>
 14    <param name="max_gap" type="text" label="Max Gap" value="250">
 15      <validator type="in_range" max="1000" min="100" message="Max_Gap is out of range, Max_Gap has to be between 100 to 1000" />
 16    </param>
 17    <param name="min_probes" type="text" label="Min Probes" value="5">
 18      <validator type="in_range" max="10" min="2" message="Min_Probes is out of range, Min_Probes has to be between 2 to 10" />
 19    </param>
 20    <conditional name="threshold">
 21      <param name="method1" type="select" label="Threshold method">
 22        <option value="Pvalue">Pvalue</option>
 23        <option value="FDR">FDR</option>
 24      </param>
 25      <when value="Pvalue">
 26        <param name="value" type="text" label="Value (e.g. P=0.00001)" value="0.00001">
 27          <validator type="in_range" max="1" min="0" message="Pvalue is out of range, Pvalue has to be between 0 to 1" />
 28        </param>
 29      </when>
 30      <when value="FDR">
 31        <param name="value" type="text" label="Value (e.g. FDR=5 for 5%)" value="5">
 32          <validator type="in_range" max="100" min="0" message="FDR is out of range, FDR has to be between 0 to 100" />
 33        </param>
 34      </when>
 35    </conditional>
 36    <conditional name="normalization">
 37      <param name="method2" type="select" label="Normalization Method">
 38        <option value="Robust">Robust</option>
 39        <option value="Simple">Simple</option>
 40      </param>
 41      <when value="Robust">
 42        <!--NOTE: C value only when Robust-->
 43        <param name="cvalue" type="text" label="C value" value="2">
 44          <validator type="in_range" max="5" min="2" message="The C value for the robust normalization is out of range, the parameter has to be between 2 to 5" />
 45        </param>
 46      </when>
 47      <when value="Simple"></when>
 48    </conditional>
 49  </inputs>
 50  <configfiles>
 51    <configfile name="tag_file">
 52[sample]
 53#set $tmp = ""
 54#set $tmp2 = ""
 55#set $tmp3 = ""
 56#for $i, $rep in enumerate($replicates)
 57#set $tmp = $tmp + str($rep.tfile) + " "
 58#set $tmp2 = $tmp2 + str($rep.cfile) + " "
 59#set $tmp3 = $tmp3 + "chip_id_foo "
 60#end for
 61IP_FILE = $tmp
 62INPUT_FILE = $tmp2
 63NDF_FILE = $ndf
 64##POS files are optional
 65#if $pos.name != "None": 
 66POS_FILE = $pos
 67#end if
 68DESIGN_ID = design_id_foo
 69CHIP_ID = $tmp3
 70
 71[peak detection]
 72METHOD = $threshold.method1
 73BANDWIDTH = $bandwidth
 74MAX_GAP = $max_gap
 75MIN_PROBES = $min_probes
 76THRESHOLD = $threshold.value
 77
 78[normalization]
 79METHOD = $normalization.method2
 80##C values are only relevant w/ Robust
 81#if $normalization.method2 == "Robust": 
 82C = $normalization.cvalue
 83#end if
 84    </configfile>
 85    <configfile name="shscript">
 86#!/bin/bash
 87
 88#set $gt = chr(62)
 89#set $ad = chr(38)
 90#set $dollar = chr (36)
 91
 92#for $i, $rep in enumerate($replicates)
 93#set $tmp1 = str($rep.tfile)
 94#set $tmp2 = str($rep.cfile)
 95
 96    tfilesize=`du -b $tmp1 | awk '{print ${dollar}1}'`
 97    cfilesize=`du -b $tmp2 | awk '{print ${dollar}1}'`
 98    if [[ ${dollar}tfilesize -gt 1048576000 ]];then
 99        echo "treatment file is too big! 1G is the maximum!" ${gt}${ad}2
100        exit;
101    fi
102
103    if [[ ${dollar}cfilesize -gt 1048576000 ]];then
104        echo "control file is too big! 1G is the maximum!" ${gt}${ad}2
105        exit;
106    fi
107#end for
108ndfsize=`du -b $ndf | awk '{print ${dollar}1}'`
109
110if [[ ${dollar}ndfsize -gt 1048576000 ]];then
111    echo "ndf file is too big! 1G is the maximum!" ${gt}${ad}2
112    exit;
113fi
114
115if [ $pos != "None" ];then
116    possize=`du -b $pos | awk '{print ${dollar}1}'`
117    
118    if [[ ${dollar}possize -gt 1048576000 ]];then
119        echo "pos file is too big! 1G is the maximum!" ${gt}${ad}2
120        exit;
121    fi
122fi
123
124mv $tag_file ${tag_file}.tag
125
126ma2c `basename ${tag_file}`.tag ${ad}${gt} $log
127mv MA2C_Output/`basename ${tag_file}`_peaks.bed $bedoutput
128gunzip MA2C_Output/`basename ${tag_file}`_MA2Cscore.wig.gz
129mv MA2C_Output/`basename ${tag_file}`_MA2Cscore.wig $wigoutput
130
131    </configfile>
132  </configfiles>
133  <outputs>
134    <data format="bed" name="bedoutput" />
135    <data format="wig" name="wigoutput" />
136    <data format="txt" name="log" label="MA2C job log" />
137  </outputs>
138  <tests>
139    <test maxseconds="3600" name="TreatmentFile">
140      <param name="tfile" value="ma2c_treatment_ce4.pair" />
141      <param name="cfile" value="ma2c_control_ce4.pair" />
142      <param name="ndf" value="ma2c_ndf_ce4.ndf" />
143      <param name="pos" value="ma2c_pos_ce4.pos" />
144      <param name="bandwidth" value="300" />
145      <param name="max_gap" value="250" />
146      <param name="min_probes" value="5" />
147      <param name="method1" value="Pvalue" />
148      <param name="value" value="0.001" />
149      <param name="method2" value="Robust" />
150      <param name="cvalue" value="2" />
151      <output name="output" file="ma2c_1/sample_peaks.bed" />
152      <output name="output" file="ma2c_1/sample_MA2Cscore.wig" lines_diff = "2" />
153      <output name="output" file="ma2c_1/sample_log.bed" lines_diff = "100"/>
154    </test>
155    <test maxseconds="3600" name="ControlFile">
156      <param name="tfile" value="ma2c_treatment_ce4.pair" />
157      <param name="cfile" value="ma2c_control_ce4.pair" />
158      <param name="ndf" value="ma2c_ndf_ce4.ndf" />
159      <param name="pos" value="ma2c_pos_ce4.pos" />
160      <param name="bandwidth" value="300" />
161      <param name="max_gap" value="250" />
162      <param name="min_probes" value="5" />
163      <param name="method1" value="Pvalue" />
164      <param name="value" value="0.001" />
165      <param name="method2" value="Robust" />
166      <param name="cvalue" value="2" />
167      <output name="output" file="ma2c_2/sample_peaks.bed" />
168      <output name="output" file="ma2c_2/sample_MA2Cscore.wig" lines_diff = "2" />
169      <output name="output" file="ma2c_2/sample_log.bed" lines_diff = "100"/>
170    </test>
171    <test maxseconds="3600" name="NDFFile">
172      <param name="tfile" value="ma2c_treatment_ce4.pair" />
173      <param name="cfile" value="ma2c_control_ce4.pair" />
174      <param name="ndf" value="ma2c_ndf_ce4.ndf" />
175      <param name="pos" value="ma2c_pos_ce4.pos" />
176      <param name="bandwidth" value="300" />
177      <param name="max_gap" value="250" />
178      <param name="min_probes" value="5" />
179      <param name="method1" value="Pvalue" />
180      <param name="value" value="0.001" />
181      <param name="method2" value="Robust" />
182      <param name="cvalue" value="2" />
183      <output name="output" file="ma2c_3/sample_peaks.bed" />
184      <output name="output" file="ma2c_3/sample_MA2Cscore.wig" lines_diff = "2" />
185      <output name="output" file="ma2c_3/sample_log.bed" lines_diff = "100"/>
186    </test>
187    <test maxseconds="3600" name="POSFile">
188      <param name="tfile" value="ma2c_treatment_ce4.pair" />
189      <param name="cfile" value="ma2c_control_ce4.pair" />
190      <param name="ndf" value="ma2c_ndf_ce4.ndf" />
191      <param name="pos" value="ma2c_pos_ce4.pos" />
192      <param name="bandwidth" value="300" />
193      <param name="max_gap" value="250" />
194      <param name="min_probes" value="5" />
195      <param name="method1" value="Pvalue" />
196      <param name="value" value="0.001" />
197      <param name="method2" value="Robust" />
198      <param name="cvalue" value="2" />
199      <output name="output" file="ma2c_4/sample_peaks.bed" />
200      <output name="output" file="ma2c_4/sample_MA2Cscore.wig" lines_diff = "2" />
201      <output name="output" file="ma2c_4/sample_log.bed" lines_diff = "100"/>
202    </test>
203    <test maxseconds="3600" name="BandWidth">
204      <param name="tfile" value="ma2c_treatment_ce4.pair" />
205      <param name="cfile" value="ma2c_control_ce4.pair" />
206      <param name="ndf" value="ma2c_ndf_ce4.ndf" />
207      <param name="pos" value="ma2c_pos_ce4.pos" />
208      <param name="bandwidth" value="300" />
209      <param name="max_gap" value="250" />
210      <param name="min_probes" value="5" />
211      <param name="method1" value="Pvalue" />
212      <param name="value" value="0.001" />
213      <param name="method2" value="Robust" />
214      <param name="cvalue" value="2" />
215      <output name="output" file="ma2c_5/sample_peaks.bed" />
216      <output name="output" file="ma2c_5/sample_MA2Cscore.wig" lines_diff = "2" />
217      <output name="output" file="ma2c_5/sample_log.bed" lines_diff = "100"/>
218    </test>
219    <test maxseconds="3600" name="MaxGap">
220      <param name="tfile" value="ma2c_treatment_ce4.pair" />
221      <param name="cfile" value="ma2c_control_ce4.pair" />
222      <param name="ndf" value="ma2c_ndf_ce4.ndf" />
223      <param name="pos" value="ma2c_pos_ce4.pos" />
224      <param name="bandwidth" value="300" />
225      <param name="max_gap" value="250" />
226      <param name="min_probes" value="5" />
227      <param name="method1" value="Pvalue" />
228      <param name="value" value="0.001" />
229      <param name="method2" value="Robust" />
230      <param name="cvalue" value="2" />
231      <output name="output" file="ma2c_6/sample_peaks.bed" />
232      <output name="output" file="ma2c_6/sample_MA2Cscore.wig" lines_diff = "2" />
233      <output name="output" file="ma2c_6/sample_log.bed" lines_diff = "100"/>
234    </test>
235    <test maxseconds="3600" name="MinProbes">
236      <param name="tfile" value="ma2c_treatment_ce4.pair" />
237      <param name="cfile" value="ma2c_control_ce4.pair" />
238      <param name="ndf" value="ma2c_ndf_ce4.ndf" />
239      <param name="pos" value="ma2c_pos_ce4.pos" />
240      <param name="bandwidth" value="300" />
241      <param name="max_gap" value="250" />
242      <param name="min_probes" value="5" />
243      <param name="method1" value="Pvalue" />
244      <param name="value" value="0.001" />
245      <param name="method2" value="Robust" />
246      <param name="cvalue" value="2" />
247      <output name="output" file="ma2c_7/sample_peaks.bed" />
248      <output name="output" file="ma2c_7/sample_MA2Cscore.wig" lines_diff = "2" />
249      <output name="output" file="ma2c_7/sample_log.bed" lines_diff = "100"/>
250    </test>
251    <test maxseconds="3600" name="Pvalue">
252      <param name="tfile" value="ma2c_treatment_ce4.pair" />
253      <param name="cfile" value="ma2c_control_ce4.pair" />
254      <param name="ndf" value="ma2c_ndf_ce4.ndf" />
255      <param name="pos" value="ma2c_pos_ce4.pos" />
256      <param name="bandwidth" value="300" />
257      <param name="max_gap" value="250" />
258      <param name="min_probes" value="5" />
259      <param name="method1" value="Pvalue" />
260      <param name="value" value="0.001" />
261      <param name="method2" value="Robust" />
262      <param name="cvalue" value="2" />
263      <output name="output" file="ma2c_8/sample_peaks.bed" />
264      <output name="output" file="ma2c_8/sample_MA2Cscore.wig" lines_diff = "2" />
265      <output name="output" file="ma2c_8/sample_log.bed" lines_diff = "100"/>
266    </test>
267    <test maxseconds="3600" name="FDR">
268      <param name="tfile" value="ma2c_treatment_ce4.pair" />
269      <param name="cfile" value="ma2c_control_ce4.pair" />
270      <param name="ndf" value="ma2c_ndf_ce4.ndf" />
271      <param name="pos" value="ma2c_pos_ce4.pos" />
272      <param name="bandwidth" value="300" />
273      <param name="max_gap" value="250" />
274      <param name="min_probes" value="5" />
275      <param name="method1" value="FDR" />
276      <param name="value" value="5" />
277      <param name="method2" value="Robust" />
278      <param name="cvalue" value="2" />
279      <output name="output" file="ma2c_9/sample_peaks.bed" />
280      <output name="output" file="ma2c_9/sample_MA2Cscore.wig" lines_diff = "2" />
281      <output name="output" file="ma2c_9/sample_log.bed" lines_diff = "100"/>
282    </test>
283    <test maxseconds="3600" name="Robust">
284      <param name="tfile" value="ma2c_treatment_ce4.pair" />
285      <param name="cfile" value="ma2c_control_ce4.pair" />
286      <param name="ndf" value="ma2c_ndf_ce4.ndf" />
287      <param name="pos" value="ma2c_pos_ce4.pos" />
288      <param name="bandwidth" value="300" />
289      <param name="max_gap" value="250" />
290      <param name="min_probes" value="5" />
291      <param name="method1" value="Pvalue" />
292      <param name="value" value="0.001" />
293      <param name="method2" value="Robust" />
294      <param name="cvalue" value="2" />
295      <output name="output" file="ma2c_10/sample_peaks.bed" />
296      <output name="output" file="ma2c_10/sample_MA2Cscore.wig" lines_diff = "2" />
297      <output name="output" file="ma2c_10/sample_log.bed" lines_diff = "100"/>
298    </test>
299    <test maxseconds="3600" name="Simple_1">
300      <param name="tfile" value="ma2c_treatment_ce4.pair" />
301      <param name="cfile" value="ma2c_control_ce4.pair" />
302      <param name="ndf" value="ma2c_ndf_ce4.ndf" />
303      <param name="pos" value="ma2c_pos_ce4.pos" />
304      <param name="bandwidth" value="300" />
305      <param name="max_gap" value="250" />
306      <param name="min_probes" value="5" />
307      <param name="method1" value="Pvalue" />
308      <param name="value" value="0.001" />
309      <param name="method2" value="Simple" />
310      <output name="output" file="ma2c_11/sample_peaks.bed" />
311      <output name="output" file="ma2c_11/sample_MA2Cscore.wig" lines_diff = "2" />
312      <output name="output" file="ma2c_11/sample_log.bed" lines_diff = "100"/>
313    </test>
314    <test maxseconds="3600" name="Simple_2">
315      <param name="tfile" value="ma2c_treatment_ce4.pair" />
316      <param name="cfile" value="ma2c_control_ce4.pair" />
317      <param name="ndf" value="ma2c_ndf_ce4.ndf" />
318      <param name="pos" value="ma2c_pos_ce4.pos" />
319      <param name="bandwidth" value="300" />
320      <param name="max_gap" value="250" />
321      <param name="min_probes" value="5" />
322      <param name="method1" value="Pvalue" />
323      <param name="value" value="0.001" />
324      <param name="method2" value="Simple" />
325      <param name="cvalue" value="231" />
326      <output name="output" file="ma2c_12/sample_peaks.bed" />
327      <output name="output" file="ma2c_12/sample_MA2Cscore.wig" lines_diff = "2" />
328      <output name="output" file="ma2c_12/sample_log.bed" lines_diff = "100"/>
329    </test>
330  </tests>
331  <help>
332This tool performs peak calling for ChIP-chip (Nimblegen) data. MA2C
333is developped in Xiaole Shirley Liu's lab, by Jun Song, and rewritten
334in python by Tao Liu. The original java version is published on Genome
335Biology (pubmed: 17727723). The version deployed here is pMA2C 1.1.3.
336
337.. class:: infomark
338
339**TIP:** Please first upload your treatment and control files using the **Upload File from your computer tool**.
340
341.. class:: warningmark
342
343**NEED IMPROVEMENT**
344
345-----
346
347**Parameters**
348
349- **Replicates** click *Add new Replicate* button to choose NimbleGen
350  pair data files from history.
351- **Treatment file** The input file for ChIP/treatment channel chosen from the
352  history.
353- **Control file** The input file for input/control channel chosen
354  from the history. 
355- **NDF file** is the NimbleGen design file.
356- **POS file** is the optional NimbleGen design file.
357- **Bandwidth** is the bandwidth to detect peaks.
358- **Max Gap** is the maximum gap allowed for joining two significant
359  probes to call peak.
360- **Min Probes** is the minimum number of probes required in the sliding window
361  centered at each probe; a probe having fewer probes than this
362  required number in its window will be ignored in the analysis.
363- **Threshold method** is the criteria used for detecting
364  ChIP-enriched regions. Can be Pvalue or FDR or MA2C score.
365- **Value** is the cutoff used in the threshold method.
366- **Normalization Method** is the normalization method. Choices are
367  Robust or Simple normalization.
368- **C value** is the parameter only for Robust normalization method.
369
370-----
371
372**Outputs**
373
374- **BED file** for peak locations in BED format
375- **WIGGLE file** for MA2C scores in WIGGLE format
376
377
378  </help>
379
380</tool>
381