/tools/ceas/sitepro.xml
https://bitbucket.org/cistrome/cistrome-harvard/ · XML · 297 lines · 259 code · 38 blank · 0 comment · 0 complexity · 807a1a56f365513dc3284840e6aec882 MD5 · raw file
- <tool name="SitePro: Aggregation plot tool for signal profiling" id="ceas_sitepro">
- <description>Draw the score profile near a given interval</description>
- <command interpreter="command">/bin/bash $shscript</command>
- <inputs>
- <conditional name="mode">
- <param name="mode_select" type="select" label="Sitepro behaviour mode" force_select="true">
- <option value="single"> 1 wiggle / bigwig vs 1 BED file</option>
- <option value="multiwig"> multiple wiggle / bigwig vs 1 BED</option>
- <option value="multibed"> multiple BED vs 1 wiggle / bigwig</option>
- </param>
- <when value="single">
- <param ftype="wig" format="wig,bigwig" name="wfile" type="data" label="Wiggle / bigwig file"/>
- <param ftype="bed" format="bed" name="bfile" type="data" label="BED file(100,000 lines max)"/>
- </when>
- <when value="multiwig">
- <param format="wig,bigwig" name="wfile" type="data" label="Wiggle / bigwig file"/>
- <param name="label" type="text" label="Wiggle label" optional="false" />
- <repeat name="more" title="wiggle / bigwig file">
- <param format="wig,bigwig" name="wig" type="data" label="Select another wiggle / bigwig file"/>
- <param name="label" type="text" label="Wiggle label" optional="false" />
- </repeat>
- <param format="bed" name="bfile" type="data"
- label="BED file"/>
- </when>
- <when value="multibed">
- <param format="bed" name="bfile" type="data" label="BED file(100,000 lines max)"/>
- <param name="label" type="text" label="BED label" optional="false" />
- <repeat name="more" title="BED file">
- <param format="bed" name="bfile" type="data" label="Select another BED file(100,000 lines max)"/>
- <param name="label" type="text" label="BED label" optional="false" />
- </repeat>
- <param format="wig,bigwig" name="wfile" type="data" label="Wiggle / bigwig file"/>
- </when>
- </conditional>
- <param name="span" type="integer" label="Span" value="1000">
- <validator type="in_range" max="1000000" min="100" message="Span is out of range, Span has to be between 100 to 1000000" />
- </param>
- <param name="pfres" type="integer" label="Profiling Resolution" value="50">
- <validator type="in_range" max="1000" min="10" message="Profiling Resolution is out of range, Profiling Resolution has to be between 10 to 1000" />
- </param>
- <param name="dir" type="boolean" label="consider the direction (+/-) while profiling" checked="no" truevalue="--dir" falsevalue=" " />
- </inputs>
- <outputs>
- <data format="pdf" name="output" />
- <data format="txt" name="log" label="sitepro job log" />
- <data format="txt" name="dump" label="txt file with profiles" />
- </outputs>
- <configfiles>
- <configfile name="shscript">
- #!/bin/bash
- #import os
- #set $dollar = chr(36)
- #set $gt = chr(62)
- #set $lt = chr(60)
- #set $ad = chr(38)
- #set $path = $os.path.abspath($__app__.config.tool_path)
- ##REMOVING WIG VALIDATORS
- ##note: validator for wig file size
- ##if [ $mode.wfile != "None" ];then
- ## wfilesize=`du -b $mode.wfile | awk '{print ${dollar}1}'`
- ##
- ## if [[ ${dollar}wfilesize -gt 2097152000 ]];then
- ## echo "wfile file is too big! 2GB is the maximum!" ${gt}${ad}2
- ## exit;
- ## fi
- ##fi
- ##note: validator for wig label
- #if $mode.mode_select == "multiwig"
- wiglabel=`echo $mode.label |awk '{print length(${dollar}0)}'`
- if [[ ${dollar}wiglabel -gt 255 ]];then
- echo "Wig Label exceed the limit of 255 characters!" ${gt}${ad}2;
- exit;
- fi
- if [[ ${dollar}wiglabel -eq 0 ]];then
- echo "Wig Label is required!" ${gt}${ad}2;
- exit;
- fi
- #end if
- ##note: validator for bed label
- #if $mode.mode_select == "multibed"
- bedlabel=`echo $mode.label |awk '{print length(${dollar}0)}'`
- if [[ ${dollar}bedlabel -gt 255 ]];then
- echo "Bed Label exceed the limit of 255 characters!" ${gt}${ad}2;
- exit;
- fi
- if [[ ${dollar}bedlabel -eq 0 ]];then
- echo "Bed Label is required!" ${gt}${ad}2;
- exit;
- fi
- #end if
- lines=`wc -l $mode.bfile | tail -1 | awk '{print ${dollar}1}'`
- format=`$path/validation/fcfunc.py $mode.bfile`
- if [[ ${dollar}lines -gt 100000 ]];then
- echo "BED file is too big! 100K lines are the maximum!" ${gt}${ad}2
- exit;
- fi
- if [[ ${dollar}format != "passed" ]]; then
- echo "Bed file 1: " ${dollar}format ${gt}${ad}2
- exit;
- fi
- #if $mode.wfile.extension == "wig"
- #set $sitepro = "sitepro"
- #elif $mode.wfile.extension == "bigwig"
- #set $sitepro = "siteproBW"
- #end if
- #if $mode.mode_select == "single"
- ##NOTE: cease, gca, and sitepro require python2.5 and above
- $sitepro -w $mode.wfile -b $mode.bfile --span=$span --pf-res=$pfres $dir --name=sitepro_out --dump ${ad}${gt} $log
- R --vanilla $lt sitepro_out.R ${ad}${gt}/dev/null
- mv sitepro_out.pdf $output
- cat *_dump.txt > $dump
- #elif $mode.mode_select == "multiwig"
- #set $tmp = ""
- #for $m in $mode.more
- #set $tmp = $tmp + "-w " +str($m.wig) + " -l " + str($m.label)+ " "
- #end for
- ##NOTE: cease, gca, and sitepro require python2.5 and above
- $sitepro -w $mode.wfile -l $mode.label $tmp -b $mode.bfile --span=$span --pf-res=$pfres $dir --name=sitepro_out --dump ${ad}${gt} $log
- R --vanilla $lt sitepro_out.R ${ad}${gt}/dev/null
- mv sitepro_out.pdf $output
- cat *_dump.txt > $dump
- #elif $mode.mode_select == "multibed"
- #set $bedcount = 1
- #set $tmp = ""
- #for $m in $mode.more
- #set $bedcount = $bedcount + 1
- lines=`wc -l $m.bfile | tail -1 | awk '{print ${dollar}1}'`
- format=`$path/validation/fcfunc.py $m.bfile`
- if [[ ${dollar}lines -gt 100000 ]];then
- echo "BED file is too big! 100K lines are the maximum!" ${gt}${ad}2
- exit;
- fi
- if [[ ${dollar}format != "passed" ]]; then
- echo "Bed file ${bedcount}: " ${dollar}format ${gt}${ad}2
- exit;
- fi
- #set $tmp = $tmp + "-b " +str($m.bfile) + " -l " + str($m.label)+ " "
- #end for
- ##NOTE: cease, gca, and sitepro require python2.5 and above
-
- $sitepro -w $mode.wfile -b $mode.bfile -l $mode.label $tmp --span=$span --pf-res=$pfres $dir --name=sitepro_out --dump ${ad}${gt} $log
- R --vanilla $lt sitepro_out.R ${ad}${gt}/dev/null
- mv sitepro_out.pdf $output
- cat *_dump.txt > $dump
- #end if
- </configfile>
- </configfiles>
- <tests>
- <test maxseconds="3600" name="Sitepro_1">
- <param name="mode_select" value="single" />
- <param name="wfile" value="wiggle.wig" />
- <param name="bfile" value="bedfile.bed" />
- <param name="span" value="1000" />
- <param name="pfres" value="50" />
- <output name="output" file="sitepro_1/sitepro_1.pdf" />
- <output name="output" file="sitepro_1/sitepro_1.log" lines_diff = "200" />
- <output name="output" file="sitepro_1/sitepro_1_dump.txt" />
- </test>
- <test maxseconds="3600" name="Sitepro_2">
- <param name="mode_select" value="single" />
- <param name="wfile" value="wiggle.wig" />
- <param name="bfile" value="bedfile.bed" />
- <param name="span" value="1000" />
- <param name="pfres" value="50" />
- <output name="output" file="sitepro_2/sitepro_2.pdf" />
- <output name="output" file="sitepro_2/sitepro_2.log" lines_diff = "200" />
- <output name="output" file="sitepro_2/sitepro_2_dump.txt" />
- </test>
- <test maxseconds="3600" name="Sitepro_3">
- <param name="mode_select" value="single" />
- <param name="wfile" value="wiggle.wig" />
- <param name="bfile" value="bedfile.bed" />
- <param name="span" value="100" />
- <param name="pfres" value="10" />
- <output name="output" file="sitepro_3/sitepro_3.pdf" />
- <output name="output" file="sitepro_3/sitepro_3.log" lines_diff = "200" />
- <output name="output" file="sitepro_3/sitepro_3_dump.txt" />
- </test>
- <test maxseconds="3600" name="Sitepro_4">
- <param name="mode_select" value="single" />
- <param name="wfile" value="wiggle.wig" />
- <param name="bfile" value="bedfile.bed" />
- <param name="span" value="100" />
- <param name="pfres" value="10" />
- <output name="output" file="sitepro_4/sitepro_4.pdf" />
- <output name="output" file="sitepro_4/sitepro_4.log" lines_diff = "200" />
- <output name="output" file="sitepro_4/sitepro_4_dump.txt" />
- </test>
- <test maxseconds="3600" name="Sitepro_5">
- <param name="mode_select" value="single" />
- <param name="wfile" value="wiggle.wig" />
- <param name="bfile" value="bedfile.bed" />
- <param name="span" value="5000" />
- <param name="pfres" value="500" />
- <output name="output" file="sitepro_5/sitepro_5.pdf" />
- <output name="output" file="sitepro_5/sitepro_5.log" lines_diff = "200" />
- <output name="output" file="sitepro_5/sitepro_5_dump.txt" />
- </test>
- </tests>
- <help>
- This tool draws the average score profile around given genomic
- sites. It's a module in CEAS package which is written by Hyunjin Gene
- Shin, published in Bioinformatics (pubmed id:19689956).
- .. class:: infomark
- **TIP #1:** If your query does not apper in the pulldown menu for BED Files, please convert your interval files to BED format.
- .. class:: infomark
- **TIP #2:** You can't use multiple BED files *AND* multiple Wiggle files as input.
- .. class:: infomark
- **TIP #3:** The tool can be used to check the signals of your ChIP
- sample around certain regions such as Transcription Start Sites, or
- Transcription Factor Binding Sites.
- .. class:: warningmark
- **NEED IMPROVEMENT**
- -----
- **Parameters**
- - **Sitepro behaviour mode** can only be '1 wiggle file against 1 BED
- file', or 'multiple wiggle files against 1 BED file', or '1 wiggle
- file against multiple BED files'.
- - **Wiggle label** When 'multi wiggle' mode is selected, you need to assign the labels for every wiggle files which will be shown in the final figure.
- - **BED label** When 'multi BED' mode is selected, you need to assign the labels for every BED files which will be shown in the final image.
- - **Span** is the distance from the center of each BED region in both directions(+/-) (eg, [c - span, c + span], where c is the center of a region).
- - **Profiling resolution** is the resolution to bin the scores in the final image.
- -----
- **script parameter list of Sitepro**
- Options:
- --version show program's version number and exit
- -h, --help Show this help message and exit.
- -w WIG, --wig=WIG input WIG file. WARNING: both fixedStep and
- variableStep WIG formats are accepted. Multiple WIG
- files can be given via -w (--wig) individually (eg -w
- WIG1.wig, -w WIG2.wig). WARNING! multiple wig and bed
- files are not allowed.
- -b BED, --bed=BED BED file of regions of interest. (eg, binding sites or
- motif locations) Multiple BED files can be given via
- -b (--bed) individually (eg -b BED1.bed -b BED2.bed).
- WARNING! multiple wig and bed files are not allowed.
- --span=SPAN Span from the center of each BED region in both
- directions(+/-) (eg, [c - span, c + span], where c is
- the center of a region), default:1000 bp
- --pf-res=PF_RES Profiling resolution, default: 50 bp
- --dir If set, the direction (+/-) is considered in
- profiling. If no strand info given in the BED, this
- option is ignored.
- --dump If set, profiles are dumped as a TXT file
- --name=NAME Name of this run. If not given, the body of the bed
- file name will be used,
- -l LABEL, --label=LABEL
- Labels of the wig files. If given, they are used as
- the legends of the plot and in naming the TXT files of
- profile dumps; otherwise, the WIG file names will be
- used as the labels. Multiple labels can be given via
- -l (--label) individually (eg, -l LABEL1 -l LABEL2).
- WARNING! The number and order of the labels must be
- the same as the WIG files.
- -----
- **Output**
- - **PDF** format file.
- - Dumped signals within given intervals in **plain text**.
- </help>
- </tool>