/tools/ceas/conservation.xml
https://bitbucket.org/cistrome/cistrome-harvard/ · XML · 170 lines · 144 code · 26 blank · 0 comment · 0 complexity · 045e3c32f2c0a6384ffcfe3b2cf33a62 MD5 · raw file
- <tool name="Conservation Plot" id="ceas_conservation">
- <description>Calculates the PhastCons scores in several intervals sets</description>
- <command interpreter="command">/bin/bash $shscript</command>
- <inputs>
- <repeat name="more" title="interval file">
- <param ftype="interval" format="bed,interval" name="bfile" type="data" label="Select another interval file(100,000 lines max)"/>
- <param name="blabel" type="text" label="BED file label"
- help="label on the figure" optional="false"/>
- </repeat>
- <param name="size" type="integer" label="window size around the center" value="3000">
- <validator type="in_range" max="10000" min="100" message="window size is out of range, window size has to be between 100 to 10000" />
- </param>
- <param name="gv" type="select" label="UCSC genome/assembly version">
- <option value="hg38">hg38</option>
- <option value="hg19/placentalMammals">hg19 (placental mammals)</option>
- <option value="hg19/vertebrate">hg19 (vertebrate)</option>
- <option value="hg18/placentalMammals">hg18 (placental mammals)</option>
- <option value="hg18/vertebrate">hg18 (vertebrate)</option>
- <option value="mm10/placental">mm10 (placental)</option>
- <option value="mm9/placental">mm9 (placental)</option>
- <option value="mm9/vertebrate">mm9 (vertebrate)</option>
- <option value="mm8/vertebrate">mm8 (vertebrate)</option>
- <option value="dm3">dm3</option>
- <option value="danRer7">danRer7 (vertebrate)</option>
- <option value="ce10">ce10</option>
- <option value="ce6">ce6</option>
- <option value="ce4">ce4</option>
- </param>
- <param type="select" name="imagetype" display="radio" label="Image Type">
- <option value="PDF">PDF format</option>
- <option value="PNG">PNG format</option>
- </param>
- </inputs>
- <outputs>
- <data format="png" name="output">
- <change_format>
- <when input="imagetype" value="PDF" format="pdf" />
- </change_format>
- </data>
- <data format="txt" name="outputr" label="conservation R script" />
- <data format="txt" name="log" label="conservation job log"/>
- </outputs>
- <configfiles>
- <configfile name="shscript">
- #!/bin/bash
- #import os
- #set $dollar = chr(36)
- #set $gt = chr(62)
- #set $lt = chr(60)
- #set $ad = chr(38)
- #set $tmp = ""
- #set $bedcount = 0
- #set $path = $os.path.abspath($__app__.config.tool_path)
- #for $m in $more
- #set $bedcount = $bedcount + 1
- lines=`wc -l $m.bfile | tail -1 | awk '{print ${dollar}1}'`
- format=`$path/validation/fcfunc.py $m.bfile`
- if [[ ${dollar}lines -gt 100000 ]];then
- echo "BED file is too big! 100K lines are the maximum!" ${gt}${ad}2
- exit;
- fi
- if [[ ${dollar}format != "passed" ]]; then
- echo "Bed file ${bedcount}: "${dollar}format ${gt}${ad}2
- exit;
- fi
-
- bedlabel=`echo $m.blabel |awk '{print length(${dollar}0)}'`
- if [[ ${dollar}bedlabel -gt 255 ]];then
- echo "Bed Label exceed the limit of 255 characters!" ${gt}${ad}2;
- exit;
- fi
- if [[ ${dollar}bedlabel -eq 0 ]];then
- echo "Bed Label is required!" ${gt}${ad}2;
- exit;
- fi
- #set $tmp = $tmp + str($m.bfile) + " -l '" + str($m.blabel) + "' "
- #end for
- if [[ $bedcount -eq 0 ]];then
- echo "Need at least one bed file" ${gt}${ad}2
- exit;
- fi
- #set $datapath = os.path.join( os.path.abspath($__app__.config.cistrome_static_library_path), "conservation", $gv.value )
- conservation_plot.py -d $datapath -w $size $tmp ${ad}${gt} $log
- if [ $imagetype.value == "PNG" ]; then
- convert tmp.pdf tmp.png
- mv tmp.png $output
- else
- mv tmp.pdf $output
- fi
- mv tmp.R $outputr
- </configfile>
- </configfiles>
- <tests>
- <test maxseconds="3600" name="Conservation_1">
- <param name="bfile" value="bedfile.bed" />
- <param name="blabel" value="conservation_1" />
- <param name="size" value="1000" />
- <param name="gv" value="ce6" />
- <output name="output" file="conservation_1/conservation_1.bmp" />
- <output name="output" file="conservation_1/conservation_1.log" lines_diff = "200" />
- </test>
- </tests>
- <help>
- This tool plots the PhastCons scores prfiles in several BED
- files. It's based on conservation_plot.py script in Tao Liu's
- library. Original code is written by Ying Lei, then modified by
- Jaqueline Wentz.
- .. class:: infomark
- **Tip:** If you see red Xs, check the BED input file first. Perhaps, the BED file contains some abnormal chromosome names.
- .. class:: infomark
- **Tip:** For best performance, please make sure the regions in the BED file are centered at peak summits.
- -----
- **Parameters**
- - **Title** Conservation Plot Tool
- - **Interval file** is a BED file normally centered at peak summit.
- - **BED file label** is the label marked in the legend of the final figure.
- - **more** You can add more BED files and labels.
- - **Window size** is the regions around peak centers to extract PhastCons scores.
- - **UCSC genome version** must be selected according to your BED files.
- -----
- **script parameter list for conservation_plot.py**
- conservation_plot.py
- Draw conservation plot for many bed files.
- Options:
- --version show program's version number and exit
- -H HEIGHT, --height=HEIGHT
- height of plot
- -W WIDTH, --width=WIDTH
- width of plot
- -w W window width centered at middle of bed
- regions,default: 1000
- -t TITLE, --title=TITLE
- title of the figure. Default: 'Average Phastcons
- around the Center of Sites'
- -d PHASDB, --phasdb=PHASDB
- The directory to store phastcons scores in the server
- -l BEDLABEL, --bed-label=BEDLABEL
- the BED file labels in the figure. No space is
- allowed. This option should be used same times as -w
- option, and please input them in the same order as BED
- files. default: will use the BED file filename as
- labels.
- -h, --help Show this help message and exit.
- </help>
- </tool>