wiggle_correlation_union.xml

/tools/correlation/wiggle_correlation_union.xml

https://bitbucket.org/cistrome/cistrome-harvard/ · XML · 166 lines · 147 code · 19 blank · 0 comment · 0 complexity · 56c4cc1875c769a11c5b4b03a729ebea MD5 · raw file

<tool name="Two wiggle file correlation in union regions" id="correlation_intervals">
  <description>Calculate the correlation coefficient of two wiggle / bigwig files in the union regions from two bed files</description>
  <command interpreter="command">/bin/bash $shscript </command>
  <inputs>
    <param format="wig" name="wfile1" type="data" label="WIGGLE / bigwig file 1"/>
    <param format="bed" name="bfile1" type="data" label="BED file 1(100,000 lines max)"/>
    <param format="wig" name="wfile2" type="data" label="WIGGLE / bigwig file 2"/>
    <param format="bed" name="bfile2" type="data" label="BED file 2(100,000 lines max)"/>
    <param name="step" type="integer" label="Step" value="5" help="step in points. This option is only used for wig file.">
      <validator type="in_range" max="100" min="1" message="Step is out of range, Step has to be between 1 to 100" />
    </param>    
    <param name="method" type="hidden" label="method:" help="method to process the paired two sets of data in the sampling step." >
      <option value="mean">mean</option>
    </param>
  </inputs>
  <outputs>
    <data format="pdf" name="output" />
    <data format="txt" name="log" label="job log" />
    <data format="txt" name="rscript" label="job rscript" />
  </outputs>
  <configfiles>
    <configfile name="shscript">
#!/bin/bash
#import os

#set $dollar = chr(36)
#set $gt = chr(62)
#set $lt = chr(60)
#set $ad = chr(38)

#set $path = os.path.abspath($__app__.config.tool_path)

## check line count and format accuracy of all the bed files
lines1=`wc -l $bfile1 | tail -1 | awk '{print ${dollar}1}'`
lines2=`wc -l $bfile2 | tail -1 | awk '{print ${dollar}1}'`
format1=`$path/validation/fcfunc.py $bfile1`
format2=`$path/validation/fcfunc.py $bfile2`

##REMOVING WIG VALIDATORS
##tfilesize1=`du -b $wfile1 | awk '{print ${dollar}1}'`
##tfilesize2=`du -b $wfile2 | awk '{print ${dollar}1}'`

if [[ ${dollar}lines1 -gt 100000 ]];then
    echo "Total lines of the files exceed the limit of 100000 lines!" ${gt}${ad}2;
    exit;
elif [[ ${dollar}lines2 -gt 100000 ]];then
    echo "Total lines of the files exceed the limit of 100000 lines!" ${gt}${ad}2;
    exit;
elif [[ ${dollar}format1 != "passed" ]];then
    echo "Bed file 1: ${dollar}format1" ${gt}${ad}2;
    exit;
elif [[ ${dollar}format2 != "passed" ]];then
    echo "Bed file 2: ${dollar}format2" ${gt}${ad}2;
    exit;
##REMOVING WIG VALIDATORS
##elif [[ ${dollar}tfilesize1 -gt 2147483648 ]];then
##    echo "Wiggle file 1 is too big! 2G is the maximum!" ${gt}${ad}2
##    exit;
##elif [[ ${dollar}tfilesize2 -gt 2147483648 ]];then
##    echo "Wiggle file 2 is too big! 2G is the maximum!" ${gt}${ad}2
##    exit;
else
#if $wfile1.extension == "wig"
    qc_chIP_peak.py -x $wfile1 -y $wfile2 -p $bfile1 -q $bfile2 -s $step -m mean -f bed -r qc_chIP-output.txt ${gt}${ad} $log
#elif $wfile1.extension == "bigwig"
    qc_chIP_peakBW.py -x $wfile1 -y $wfile2 -p $bfile1 -q $bfile2 -r qc_chIP-output.txt ${gt}${ad} $log
#end if
    R --vanilla ${lt} qc_chIP-output.txt ${gt}${ad}/dev/null
    ##convert qc_chIP-output.txt.pdf qc_chIP-output.txt.png
    mv qc_chIP-output.txt.pdf $output
    mv qc_chIP-output.txt $rscript
fi
    </configfile>
  </configfiles>
<tests>
  <test maxseconds="3600" name="TwoScores_1">
    <param name="wfile1" value="wiggle1.wig" />
    <param name="bfile1" value="bedfile1.bed" />
    <param name="wfile2" value="wiggle2.wig" />
    <param name="bfile2" value="bedfile2.bed" />
    <param name="step" value="5" />
    <param name="method" value="sample" />
    <output name="output" file="twoscores_1/twoscores_1.R.pdf" lines_diff = "40" />
    <output name="output" file="twoscores_1/twoscores_1.log" lines_diff = "200" />
  </test>
  <test maxseconds="3600" name="TwoScores_2">
    <param name="wfile1" value="wiggle1.wig" />
    <param name="bfile1" value="bedfile1.bed" />
    <param name="wfile2" value="wiggle2.wig" />
    <param name="bfile2" value="bedfile2.bed" />
    <param name="step" value="5" />
    <param name="method" value="median" />
    <output name="output" file="twoscores_2/twoscores_2.R.pdf" lines_diff = "40" />
    <output name="output" file="twoscores_2/twoscores_2.log" lines_diff = "200" />
  </test>
  <test maxseconds="3600" name="TwoScores_3">
    <param name="wfile1" value="wiggle1.wig" />
    <param name="bfile1" value="bedfile1.bed" />
    <param name="wfile2" value="wiggle2.wig" />
    <param name="bfile2" value="bedfile2.bed" />
    <param name="step" value="5" />
    <param name="method" value="mean" />
    <output name="output" file="twoscores_3/twoscores_3.R.pdf" lines_diff = "40" />
    <output name="output" file="twoscores_3/twoscores_3.log" lines_diff = "200" />
  </test>
  <test maxseconds="3600" name="TwoScores_4">
    <param name="wfile1" value="wiggle1.wig" />
    <param name="bfile1" value="bedfile1.bed" />
    <param name="wfile2" value="wiggle2.wig" />
    <param name="bfile2" value="bedfile2.bed" />
    <param name="step" value="5" />
    <param name="method" value="sum" />
    <output name="output" file="twoscores_4/twoscores_4.R.pdf" lines_diff = "40" />
    <output name="output" file="twoscores_4/twoscores_4.log" lines_diff = "200" />
  </test>
</tests>


  <help>
This tool calculates the correlation coefficient on two sets where the
two sets intersect The tool is written by Tao Liu. It calls R for
plotting.

.. class:: infomark

**TIP:** This can be used to evaluate the correlation between
two biological replicates.

.. class:: warningmark

**NEED IMPROVEMENT**

-----

**Parameters**

- **WIGGLE file 1 and 2** are the two wiggle files to be
  included. These two are required.
- **BED file 1 and 2** are the two BED files to be used to
  extract scores from wiggle files.
- **wiggle files** click *Add new wiggle file* to add more wiggle
  files and labels. 
- **Genome/Assembly** Genome assembly to be used. The tool will
  download the chromosome information from UCSC database.
- **Method** When scores are extracted for a region in BED file, a
  method will be applied to calculate a value to represent this
  region. Options are *median* to use the median value or *mean* to
  use the average value.
- **Step** Step in data points. The step is a window to extract the
  scores from wiggle files along the whole genome. So that every step
  number of points will have a value to represent it by using certain **Method** 
- **Method** When scores are extracted for a step long window, a
  method will be applied to calculate a value to represent this
  window. Options are *median* to use the median value or *mean* to
  use the average value, or *sample* to sample 1 point to represent
  the region, or *sum* to use the sum of values in the region.

-----

**Outputs**

- **PNG file** is the correlation plot

  </help>

</tool>