/tools/correlation/wiggle_correlation_union.xml
https://bitbucket.org/cistrome/cistrome-harvard/ · XML · 166 lines · 147 code · 19 blank · 0 comment · 0 complexity · 56c4cc1875c769a11c5b4b03a729ebea MD5 · raw file
- <tool name="Two wiggle file correlation in union regions" id="correlation_intervals">
- <description>Calculate the correlation coefficient of two wiggle / bigwig files in the union regions from two bed files</description>
- <command interpreter="command">/bin/bash $shscript </command>
- <inputs>
- <param format="wig" name="wfile1" type="data" label="WIGGLE / bigwig file 1"/>
- <param format="bed" name="bfile1" type="data" label="BED file 1(100,000 lines max)"/>
- <param format="wig" name="wfile2" type="data" label="WIGGLE / bigwig file 2"/>
- <param format="bed" name="bfile2" type="data" label="BED file 2(100,000 lines max)"/>
- <param name="step" type="integer" label="Step" value="5" help="step in points. This option is only used for wig file.">
- <validator type="in_range" max="100" min="1" message="Step is out of range, Step has to be between 1 to 100" />
- </param>
- <param name="method" type="hidden" label="method:" help="method to process the paired two sets of data in the sampling step." >
- <option value="mean">mean</option>
- </param>
- </inputs>
- <outputs>
- <data format="pdf" name="output" />
- <data format="txt" name="log" label="job log" />
- <data format="txt" name="rscript" label="job rscript" />
- </outputs>
- <configfiles>
- <configfile name="shscript">
- #!/bin/bash
- #import os
- #set $dollar = chr(36)
- #set $gt = chr(62)
- #set $lt = chr(60)
- #set $ad = chr(38)
- #set $path = os.path.abspath($__app__.config.tool_path)
- ## check line count and format accuracy of all the bed files
- lines1=`wc -l $bfile1 | tail -1 | awk '{print ${dollar}1}'`
- lines2=`wc -l $bfile2 | tail -1 | awk '{print ${dollar}1}'`
- format1=`$path/validation/fcfunc.py $bfile1`
- format2=`$path/validation/fcfunc.py $bfile2`
- ##REMOVING WIG VALIDATORS
- ##tfilesize1=`du -b $wfile1 | awk '{print ${dollar}1}'`
- ##tfilesize2=`du -b $wfile2 | awk '{print ${dollar}1}'`
- if [[ ${dollar}lines1 -gt 100000 ]];then
- echo "Total lines of the files exceed the limit of 100000 lines!" ${gt}${ad}2;
- exit;
- elif [[ ${dollar}lines2 -gt 100000 ]];then
- echo "Total lines of the files exceed the limit of 100000 lines!" ${gt}${ad}2;
- exit;
- elif [[ ${dollar}format1 != "passed" ]];then
- echo "Bed file 1: ${dollar}format1" ${gt}${ad}2;
- exit;
- elif [[ ${dollar}format2 != "passed" ]];then
- echo "Bed file 2: ${dollar}format2" ${gt}${ad}2;
- exit;
- ##REMOVING WIG VALIDATORS
- ##elif [[ ${dollar}tfilesize1 -gt 2147483648 ]];then
- ## echo "Wiggle file 1 is too big! 2G is the maximum!" ${gt}${ad}2
- ## exit;
- ##elif [[ ${dollar}tfilesize2 -gt 2147483648 ]];then
- ## echo "Wiggle file 2 is too big! 2G is the maximum!" ${gt}${ad}2
- ## exit;
- else
- #if $wfile1.extension == "wig"
- qc_chIP_peak.py -x $wfile1 -y $wfile2 -p $bfile1 -q $bfile2 -s $step -m mean -f bed -r qc_chIP-output.txt ${gt}${ad} $log
- #elif $wfile1.extension == "bigwig"
- qc_chIP_peakBW.py -x $wfile1 -y $wfile2 -p $bfile1 -q $bfile2 -r qc_chIP-output.txt ${gt}${ad} $log
- #end if
- R --vanilla ${lt} qc_chIP-output.txt ${gt}${ad}/dev/null
- ##convert qc_chIP-output.txt.pdf qc_chIP-output.txt.png
- mv qc_chIP-output.txt.pdf $output
- mv qc_chIP-output.txt $rscript
- fi
- </configfile>
- </configfiles>
- <tests>
- <test maxseconds="3600" name="TwoScores_1">
- <param name="wfile1" value="wiggle1.wig" />
- <param name="bfile1" value="bedfile1.bed" />
- <param name="wfile2" value="wiggle2.wig" />
- <param name="bfile2" value="bedfile2.bed" />
- <param name="step" value="5" />
- <param name="method" value="sample" />
- <output name="output" file="twoscores_1/twoscores_1.R.pdf" lines_diff = "40" />
- <output name="output" file="twoscores_1/twoscores_1.log" lines_diff = "200" />
- </test>
- <test maxseconds="3600" name="TwoScores_2">
- <param name="wfile1" value="wiggle1.wig" />
- <param name="bfile1" value="bedfile1.bed" />
- <param name="wfile2" value="wiggle2.wig" />
- <param name="bfile2" value="bedfile2.bed" />
- <param name="step" value="5" />
- <param name="method" value="median" />
- <output name="output" file="twoscores_2/twoscores_2.R.pdf" lines_diff = "40" />
- <output name="output" file="twoscores_2/twoscores_2.log" lines_diff = "200" />
- </test>
- <test maxseconds="3600" name="TwoScores_3">
- <param name="wfile1" value="wiggle1.wig" />
- <param name="bfile1" value="bedfile1.bed" />
- <param name="wfile2" value="wiggle2.wig" />
- <param name="bfile2" value="bedfile2.bed" />
- <param name="step" value="5" />
- <param name="method" value="mean" />
- <output name="output" file="twoscores_3/twoscores_3.R.pdf" lines_diff = "40" />
- <output name="output" file="twoscores_3/twoscores_3.log" lines_diff = "200" />
- </test>
- <test maxseconds="3600" name="TwoScores_4">
- <param name="wfile1" value="wiggle1.wig" />
- <param name="bfile1" value="bedfile1.bed" />
- <param name="wfile2" value="wiggle2.wig" />
- <param name="bfile2" value="bedfile2.bed" />
- <param name="step" value="5" />
- <param name="method" value="sum" />
- <output name="output" file="twoscores_4/twoscores_4.R.pdf" lines_diff = "40" />
- <output name="output" file="twoscores_4/twoscores_4.log" lines_diff = "200" />
- </test>
- </tests>
- <help>
- This tool calculates the correlation coefficient on two sets where the
- two sets intersect The tool is written by Tao Liu. It calls R for
- plotting.
- .. class:: infomark
- **TIP:** This can be used to evaluate the correlation between
- two biological replicates.
- .. class:: warningmark
- **NEED IMPROVEMENT**
- -----
- **Parameters**
- - **WIGGLE file 1 and 2** are the two wiggle files to be
- included. These two are required.
- - **BED file 1 and 2** are the two BED files to be used to
- extract scores from wiggle files.
- - **wiggle files** click *Add new wiggle file* to add more wiggle
- files and labels.
- - **Genome/Assembly** Genome assembly to be used. The tool will
- download the chromosome information from UCSC database.
- - **Method** When scores are extracted for a region in BED file, a
- method will be applied to calculate a value to represent this
- region. Options are *median* to use the median value or *mean* to
- use the average value.
- - **Step** Step in data points. The step is a window to extract the
- scores from wiggle files along the whole genome. So that every step
- number of points will have a value to represent it by using certain **Method**
- - **Method** When scores are extracted for a step long window, a
- method will be applied to calculate a value to represent this
- window. Options are *median* to use the median value or *mean* to
- use the average value, or *sample* to sample 1 point to represent
- the region, or *sum* to use the sum of values in the region.
- -----
- **Outputs**
- - **PNG file** is the correlation plot
- </help>
- </tool>