/tools/peakcalling/ma2c.xml
https://bitbucket.org/cistrome/cistrome-harvard/ · XML · 381 lines · 354 code · 26 blank · 1 comment · 0 complexity · 77ed750e8f11286909c3735dc63b3067 MD5 · raw file
- <tool name="MA2C" id="peakcalling_ma2c">
- <description>Peak Calling for ChIP-chip (Nimblegen)</description>
- <command interpreter="command">/bin/bash $shscript</command>
- <inputs>
- <repeat name="replicates" title="Replicate">
- <param name="tfile" type="data" label="Treatment file"/>
- <param name="cfile" type="data" label="Control file"/>
- </repeat>
- <param name="ndf" type="data" label="NDF file"/>
- <param name="pos" type="data" label="POS file" optional="true"/>
- <param name="bandwidth" type="text" label="Bandwidth" value="300">
- <validator type="in_range" max="1000" min="100" message="Bandwidth is out of range, Bandwidth has to be between 100 to 1000" />
- </param>
- <param name="max_gap" type="text" label="Max Gap" value="250">
- <validator type="in_range" max="1000" min="100" message="Max_Gap is out of range, Max_Gap has to be between 100 to 1000" />
- </param>
- <param name="min_probes" type="text" label="Min Probes" value="5">
- <validator type="in_range" max="10" min="2" message="Min_Probes is out of range, Min_Probes has to be between 2 to 10" />
- </param>
- <conditional name="threshold">
- <param name="method1" type="select" label="Threshold method">
- <option value="Pvalue">Pvalue</option>
- <option value="FDR">FDR</option>
- </param>
- <when value="Pvalue">
- <param name="value" type="text" label="Value (e.g. P=0.00001)" value="0.00001">
- <validator type="in_range" max="1" min="0" message="Pvalue is out of range, Pvalue has to be between 0 to 1" />
- </param>
- </when>
- <when value="FDR">
- <param name="value" type="text" label="Value (e.g. FDR=5 for 5%)" value="5">
- <validator type="in_range" max="100" min="0" message="FDR is out of range, FDR has to be between 0 to 100" />
- </param>
- </when>
- </conditional>
- <conditional name="normalization">
- <param name="method2" type="select" label="Normalization Method">
- <option value="Robust">Robust</option>
- <option value="Simple">Simple</option>
- </param>
- <when value="Robust">
- <!--NOTE: C value only when Robust-->
- <param name="cvalue" type="text" label="C value" value="2">
- <validator type="in_range" max="5" min="2" message="The C value for the robust normalization is out of range, the parameter has to be between 2 to 5" />
- </param>
- </when>
- <when value="Simple"></when>
- </conditional>
- </inputs>
- <configfiles>
- <configfile name="tag_file">
- [sample]
- #set $tmp = ""
- #set $tmp2 = ""
- #set $tmp3 = ""
- #for $i, $rep in enumerate($replicates)
- #set $tmp = $tmp + str($rep.tfile) + " "
- #set $tmp2 = $tmp2 + str($rep.cfile) + " "
- #set $tmp3 = $tmp3 + "chip_id_foo "
- #end for
- IP_FILE = $tmp
- INPUT_FILE = $tmp2
- NDF_FILE = $ndf
- ##POS files are optional
- #if $pos.name != "None":
- POS_FILE = $pos
- #end if
- DESIGN_ID = design_id_foo
- CHIP_ID = $tmp3
- [peak detection]
- METHOD = $threshold.method1
- BANDWIDTH = $bandwidth
- MAX_GAP = $max_gap
- MIN_PROBES = $min_probes
- THRESHOLD = $threshold.value
- [normalization]
- METHOD = $normalization.method2
- ##C values are only relevant w/ Robust
- #if $normalization.method2 == "Robust":
- C = $normalization.cvalue
- #end if
- </configfile>
- <configfile name="shscript">
- #!/bin/bash
- #set $gt = chr(62)
- #set $ad = chr(38)
- #set $dollar = chr (36)
- #for $i, $rep in enumerate($replicates)
- #set $tmp1 = str($rep.tfile)
- #set $tmp2 = str($rep.cfile)
- tfilesize=`du -b $tmp1 | awk '{print ${dollar}1}'`
- cfilesize=`du -b $tmp2 | awk '{print ${dollar}1}'`
- if [[ ${dollar}tfilesize -gt 1048576000 ]];then
- echo "treatment file is too big! 1G is the maximum!" ${gt}${ad}2
- exit;
- fi
- if [[ ${dollar}cfilesize -gt 1048576000 ]];then
- echo "control file is too big! 1G is the maximum!" ${gt}${ad}2
- exit;
- fi
- #end for
- ndfsize=`du -b $ndf | awk '{print ${dollar}1}'`
- if [[ ${dollar}ndfsize -gt 1048576000 ]];then
- echo "ndf file is too big! 1G is the maximum!" ${gt}${ad}2
- exit;
- fi
- if [ $pos != "None" ];then
- possize=`du -b $pos | awk '{print ${dollar}1}'`
-
- if [[ ${dollar}possize -gt 1048576000 ]];then
- echo "pos file is too big! 1G is the maximum!" ${gt}${ad}2
- exit;
- fi
- fi
- mv $tag_file ${tag_file}.tag
- ma2c `basename ${tag_file}`.tag ${ad}${gt} $log
- mv MA2C_Output/`basename ${tag_file}`_peaks.bed $bedoutput
- gunzip MA2C_Output/`basename ${tag_file}`_MA2Cscore.wig.gz
- mv MA2C_Output/`basename ${tag_file}`_MA2Cscore.wig $wigoutput
- </configfile>
- </configfiles>
- <outputs>
- <data format="bed" name="bedoutput" />
- <data format="wig" name="wigoutput" />
- <data format="txt" name="log" label="MA2C job log" />
- </outputs>
- <tests>
- <test maxseconds="3600" name="TreatmentFile">
- <param name="tfile" value="ma2c_treatment_ce4.pair" />
- <param name="cfile" value="ma2c_control_ce4.pair" />
- <param name="ndf" value="ma2c_ndf_ce4.ndf" />
- <param name="pos" value="ma2c_pos_ce4.pos" />
- <param name="bandwidth" value="300" />
- <param name="max_gap" value="250" />
- <param name="min_probes" value="5" />
- <param name="method1" value="Pvalue" />
- <param name="value" value="0.001" />
- <param name="method2" value="Robust" />
- <param name="cvalue" value="2" />
- <output name="output" file="ma2c_1/sample_peaks.bed" />
- <output name="output" file="ma2c_1/sample_MA2Cscore.wig" lines_diff = "2" />
- <output name="output" file="ma2c_1/sample_log.bed" lines_diff = "100"/>
- </test>
- <test maxseconds="3600" name="ControlFile">
- <param name="tfile" value="ma2c_treatment_ce4.pair" />
- <param name="cfile" value="ma2c_control_ce4.pair" />
- <param name="ndf" value="ma2c_ndf_ce4.ndf" />
- <param name="pos" value="ma2c_pos_ce4.pos" />
- <param name="bandwidth" value="300" />
- <param name="max_gap" value="250" />
- <param name="min_probes" value="5" />
- <param name="method1" value="Pvalue" />
- <param name="value" value="0.001" />
- <param name="method2" value="Robust" />
- <param name="cvalue" value="2" />
- <output name="output" file="ma2c_2/sample_peaks.bed" />
- <output name="output" file="ma2c_2/sample_MA2Cscore.wig" lines_diff = "2" />
- <output name="output" file="ma2c_2/sample_log.bed" lines_diff = "100"/>
- </test>
- <test maxseconds="3600" name="NDFFile">
- <param name="tfile" value="ma2c_treatment_ce4.pair" />
- <param name="cfile" value="ma2c_control_ce4.pair" />
- <param name="ndf" value="ma2c_ndf_ce4.ndf" />
- <param name="pos" value="ma2c_pos_ce4.pos" />
- <param name="bandwidth" value="300" />
- <param name="max_gap" value="250" />
- <param name="min_probes" value="5" />
- <param name="method1" value="Pvalue" />
- <param name="value" value="0.001" />
- <param name="method2" value="Robust" />
- <param name="cvalue" value="2" />
- <output name="output" file="ma2c_3/sample_peaks.bed" />
- <output name="output" file="ma2c_3/sample_MA2Cscore.wig" lines_diff = "2" />
- <output name="output" file="ma2c_3/sample_log.bed" lines_diff = "100"/>
- </test>
- <test maxseconds="3600" name="POSFile">
- <param name="tfile" value="ma2c_treatment_ce4.pair" />
- <param name="cfile" value="ma2c_control_ce4.pair" />
- <param name="ndf" value="ma2c_ndf_ce4.ndf" />
- <param name="pos" value="ma2c_pos_ce4.pos" />
- <param name="bandwidth" value="300" />
- <param name="max_gap" value="250" />
- <param name="min_probes" value="5" />
- <param name="method1" value="Pvalue" />
- <param name="value" value="0.001" />
- <param name="method2" value="Robust" />
- <param name="cvalue" value="2" />
- <output name="output" file="ma2c_4/sample_peaks.bed" />
- <output name="output" file="ma2c_4/sample_MA2Cscore.wig" lines_diff = "2" />
- <output name="output" file="ma2c_4/sample_log.bed" lines_diff = "100"/>
- </test>
- <test maxseconds="3600" name="BandWidth">
- <param name="tfile" value="ma2c_treatment_ce4.pair" />
- <param name="cfile" value="ma2c_control_ce4.pair" />
- <param name="ndf" value="ma2c_ndf_ce4.ndf" />
- <param name="pos" value="ma2c_pos_ce4.pos" />
- <param name="bandwidth" value="300" />
- <param name="max_gap" value="250" />
- <param name="min_probes" value="5" />
- <param name="method1" value="Pvalue" />
- <param name="value" value="0.001" />
- <param name="method2" value="Robust" />
- <param name="cvalue" value="2" />
- <output name="output" file="ma2c_5/sample_peaks.bed" />
- <output name="output" file="ma2c_5/sample_MA2Cscore.wig" lines_diff = "2" />
- <output name="output" file="ma2c_5/sample_log.bed" lines_diff = "100"/>
- </test>
- <test maxseconds="3600" name="MaxGap">
- <param name="tfile" value="ma2c_treatment_ce4.pair" />
- <param name="cfile" value="ma2c_control_ce4.pair" />
- <param name="ndf" value="ma2c_ndf_ce4.ndf" />
- <param name="pos" value="ma2c_pos_ce4.pos" />
- <param name="bandwidth" value="300" />
- <param name="max_gap" value="250" />
- <param name="min_probes" value="5" />
- <param name="method1" value="Pvalue" />
- <param name="value" value="0.001" />
- <param name="method2" value="Robust" />
- <param name="cvalue" value="2" />
- <output name="output" file="ma2c_6/sample_peaks.bed" />
- <output name="output" file="ma2c_6/sample_MA2Cscore.wig" lines_diff = "2" />
- <output name="output" file="ma2c_6/sample_log.bed" lines_diff = "100"/>
- </test>
- <test maxseconds="3600" name="MinProbes">
- <param name="tfile" value="ma2c_treatment_ce4.pair" />
- <param name="cfile" value="ma2c_control_ce4.pair" />
- <param name="ndf" value="ma2c_ndf_ce4.ndf" />
- <param name="pos" value="ma2c_pos_ce4.pos" />
- <param name="bandwidth" value="300" />
- <param name="max_gap" value="250" />
- <param name="min_probes" value="5" />
- <param name="method1" value="Pvalue" />
- <param name="value" value="0.001" />
- <param name="method2" value="Robust" />
- <param name="cvalue" value="2" />
- <output name="output" file="ma2c_7/sample_peaks.bed" />
- <output name="output" file="ma2c_7/sample_MA2Cscore.wig" lines_diff = "2" />
- <output name="output" file="ma2c_7/sample_log.bed" lines_diff = "100"/>
- </test>
- <test maxseconds="3600" name="Pvalue">
- <param name="tfile" value="ma2c_treatment_ce4.pair" />
- <param name="cfile" value="ma2c_control_ce4.pair" />
- <param name="ndf" value="ma2c_ndf_ce4.ndf" />
- <param name="pos" value="ma2c_pos_ce4.pos" />
- <param name="bandwidth" value="300" />
- <param name="max_gap" value="250" />
- <param name="min_probes" value="5" />
- <param name="method1" value="Pvalue" />
- <param name="value" value="0.001" />
- <param name="method2" value="Robust" />
- <param name="cvalue" value="2" />
- <output name="output" file="ma2c_8/sample_peaks.bed" />
- <output name="output" file="ma2c_8/sample_MA2Cscore.wig" lines_diff = "2" />
- <output name="output" file="ma2c_8/sample_log.bed" lines_diff = "100"/>
- </test>
- <test maxseconds="3600" name="FDR">
- <param name="tfile" value="ma2c_treatment_ce4.pair" />
- <param name="cfile" value="ma2c_control_ce4.pair" />
- <param name="ndf" value="ma2c_ndf_ce4.ndf" />
- <param name="pos" value="ma2c_pos_ce4.pos" />
- <param name="bandwidth" value="300" />
- <param name="max_gap" value="250" />
- <param name="min_probes" value="5" />
- <param name="method1" value="FDR" />
- <param name="value" value="5" />
- <param name="method2" value="Robust" />
- <param name="cvalue" value="2" />
- <output name="output" file="ma2c_9/sample_peaks.bed" />
- <output name="output" file="ma2c_9/sample_MA2Cscore.wig" lines_diff = "2" />
- <output name="output" file="ma2c_9/sample_log.bed" lines_diff = "100"/>
- </test>
- <test maxseconds="3600" name="Robust">
- <param name="tfile" value="ma2c_treatment_ce4.pair" />
- <param name="cfile" value="ma2c_control_ce4.pair" />
- <param name="ndf" value="ma2c_ndf_ce4.ndf" />
- <param name="pos" value="ma2c_pos_ce4.pos" />
- <param name="bandwidth" value="300" />
- <param name="max_gap" value="250" />
- <param name="min_probes" value="5" />
- <param name="method1" value="Pvalue" />
- <param name="value" value="0.001" />
- <param name="method2" value="Robust" />
- <param name="cvalue" value="2" />
- <output name="output" file="ma2c_10/sample_peaks.bed" />
- <output name="output" file="ma2c_10/sample_MA2Cscore.wig" lines_diff = "2" />
- <output name="output" file="ma2c_10/sample_log.bed" lines_diff = "100"/>
- </test>
- <test maxseconds="3600" name="Simple_1">
- <param name="tfile" value="ma2c_treatment_ce4.pair" />
- <param name="cfile" value="ma2c_control_ce4.pair" />
- <param name="ndf" value="ma2c_ndf_ce4.ndf" />
- <param name="pos" value="ma2c_pos_ce4.pos" />
- <param name="bandwidth" value="300" />
- <param name="max_gap" value="250" />
- <param name="min_probes" value="5" />
- <param name="method1" value="Pvalue" />
- <param name="value" value="0.001" />
- <param name="method2" value="Simple" />
- <output name="output" file="ma2c_11/sample_peaks.bed" />
- <output name="output" file="ma2c_11/sample_MA2Cscore.wig" lines_diff = "2" />
- <output name="output" file="ma2c_11/sample_log.bed" lines_diff = "100"/>
- </test>
- <test maxseconds="3600" name="Simple_2">
- <param name="tfile" value="ma2c_treatment_ce4.pair" />
- <param name="cfile" value="ma2c_control_ce4.pair" />
- <param name="ndf" value="ma2c_ndf_ce4.ndf" />
- <param name="pos" value="ma2c_pos_ce4.pos" />
- <param name="bandwidth" value="300" />
- <param name="max_gap" value="250" />
- <param name="min_probes" value="5" />
- <param name="method1" value="Pvalue" />
- <param name="value" value="0.001" />
- <param name="method2" value="Simple" />
- <param name="cvalue" value="231" />
- <output name="output" file="ma2c_12/sample_peaks.bed" />
- <output name="output" file="ma2c_12/sample_MA2Cscore.wig" lines_diff = "2" />
- <output name="output" file="ma2c_12/sample_log.bed" lines_diff = "100"/>
- </test>
- </tests>
- <help>
- This tool performs peak calling for ChIP-chip (Nimblegen) data. MA2C
- is developped in Xiaole Shirley Liu's lab, by Jun Song, and rewritten
- in python by Tao Liu. The original java version is published on Genome
- Biology (pubmed: 17727723). The version deployed here is pMA2C 1.1.3.
- .. class:: infomark
- **TIP:** Please first upload your treatment and control files using the **Upload File from your computer tool**.
- .. class:: warningmark
- **NEED IMPROVEMENT**
- -----
- **Parameters**
- - **Replicates** click *Add new Replicate* button to choose NimbleGen
- pair data files from history.
- - **Treatment file** The input file for ChIP/treatment channel chosen from the
- history.
- - **Control file** The input file for input/control channel chosen
- from the history.
- - **NDF file** is the NimbleGen design file.
- - **POS file** is the optional NimbleGen design file.
- - **Bandwidth** is the bandwidth to detect peaks.
- - **Max Gap** is the maximum gap allowed for joining two significant
- probes to call peak.
- - **Min Probes** is the minimum number of probes required in the sliding window
- centered at each probe; a probe having fewer probes than this
- required number in its window will be ignored in the analysis.
- - **Threshold method** is the criteria used for detecting
- ChIP-enriched regions. Can be Pvalue or FDR or MA2C score.
- - **Value** is the cutoff used in the threshold method.
- - **Normalization Method** is the normalization method. Choices are
- Robust or Simple normalization.
- - **C value** is the parameter only for Robust normalization method.
- -----
- **Outputs**
- - **BED file** for peak locations in BED format
- - **WIGGLE file** for MA2C scores in WIGGLE format
- </help>
- </tool>