PageRenderTime 18ms CodeModel.GetById 9ms app.highlight 4ms RepoModel.GetById 1ms app.codeStats 0ms

/tools/correlation/wiggle_correlation_union.xml

https://bitbucket.org/cistrome/cistrome-harvard/
XML | 166 lines | 147 code | 19 blank | 0 comment | 0 complexity | 56c4cc1875c769a11c5b4b03a729ebea MD5 | raw file
  1<tool name="Two wiggle file correlation in union regions" id="correlation_intervals">
  2  <description>Calculate the correlation coefficient of two wiggle / bigwig files in the union regions from two bed files</description>
  3  <command interpreter="command">/bin/bash $shscript </command>
  4  <inputs>
  5    <param format="wig" name="wfile1" type="data" label="WIGGLE / bigwig file 1"/>
  6    <param format="bed" name="bfile1" type="data" label="BED file 1(100,000 lines max)"/>
  7    <param format="wig" name="wfile2" type="data" label="WIGGLE / bigwig file 2"/>
  8    <param format="bed" name="bfile2" type="data" label="BED file 2(100,000 lines max)"/>
  9    <param name="step" type="integer" label="Step" value="5" help="step in points. This option is only used for wig file.">
 10      <validator type="in_range" max="100" min="1" message="Step is out of range, Step has to be between 1 to 100" />
 11    </param>    
 12    <param name="method" type="hidden" label="method:" help="method to process the paired two sets of data in the sampling step." >
 13      <option value="mean">mean</option>
 14    </param>
 15  </inputs>
 16  <outputs>
 17    <data format="pdf" name="output" />
 18    <data format="txt" name="log" label="job log" />
 19    <data format="txt" name="rscript" label="job rscript" />
 20  </outputs>
 21  <configfiles>
 22    <configfile name="shscript">
 23#!/bin/bash
 24#import os
 25
 26#set $dollar = chr(36)
 27#set $gt = chr(62)
 28#set $lt = chr(60)
 29#set $ad = chr(38)
 30
 31#set $path = os.path.abspath($__app__.config.tool_path)
 32
 33## check line count and format accuracy of all the bed files
 34lines1=`wc -l $bfile1 | tail -1 | awk '{print ${dollar}1}'`
 35lines2=`wc -l $bfile2 | tail -1 | awk '{print ${dollar}1}'`
 36format1=`$path/validation/fcfunc.py $bfile1`
 37format2=`$path/validation/fcfunc.py $bfile2`
 38
 39##REMOVING WIG VALIDATORS
 40##tfilesize1=`du -b $wfile1 | awk '{print ${dollar}1}'`
 41##tfilesize2=`du -b $wfile2 | awk '{print ${dollar}1}'`
 42
 43if [[ ${dollar}lines1 -gt 100000 ]];then
 44    echo "Total lines of the files exceed the limit of 100000 lines!" ${gt}${ad}2;
 45    exit;
 46elif [[ ${dollar}lines2 -gt 100000 ]];then
 47    echo "Total lines of the files exceed the limit of 100000 lines!" ${gt}${ad}2;
 48    exit;
 49elif [[ ${dollar}format1 != "passed" ]];then
 50    echo "Bed file 1: ${dollar}format1" ${gt}${ad}2;
 51    exit;
 52elif [[ ${dollar}format2 != "passed" ]];then
 53    echo "Bed file 2: ${dollar}format2" ${gt}${ad}2;
 54    exit;
 55##REMOVING WIG VALIDATORS
 56##elif [[ ${dollar}tfilesize1 -gt 2147483648 ]];then
 57##    echo "Wiggle file 1 is too big! 2G is the maximum!" ${gt}${ad}2
 58##    exit;
 59##elif [[ ${dollar}tfilesize2 -gt 2147483648 ]];then
 60##    echo "Wiggle file 2 is too big! 2G is the maximum!" ${gt}${ad}2
 61##    exit;
 62else
 63#if $wfile1.extension == "wig"
 64    qc_chIP_peak.py -x $wfile1 -y $wfile2 -p $bfile1 -q $bfile2 -s $step -m mean -f bed -r qc_chIP-output.txt ${gt}${ad} $log
 65#elif $wfile1.extension == "bigwig"
 66    qc_chIP_peakBW.py -x $wfile1 -y $wfile2 -p $bfile1 -q $bfile2 -r qc_chIP-output.txt ${gt}${ad} $log
 67#end if
 68    R --vanilla ${lt} qc_chIP-output.txt ${gt}${ad}/dev/null
 69    ##convert qc_chIP-output.txt.pdf qc_chIP-output.txt.png
 70    mv qc_chIP-output.txt.pdf $output
 71    mv qc_chIP-output.txt $rscript
 72fi
 73    </configfile>
 74  </configfiles>
 75<tests>
 76  <test maxseconds="3600" name="TwoScores_1">
 77    <param name="wfile1" value="wiggle1.wig" />
 78    <param name="bfile1" value="bedfile1.bed" />
 79    <param name="wfile2" value="wiggle2.wig" />
 80    <param name="bfile2" value="bedfile2.bed" />
 81    <param name="step" value="5" />
 82    <param name="method" value="sample" />
 83    <output name="output" file="twoscores_1/twoscores_1.R.pdf" lines_diff = "40" />
 84    <output name="output" file="twoscores_1/twoscores_1.log" lines_diff = "200" />
 85  </test>
 86  <test maxseconds="3600" name="TwoScores_2">
 87    <param name="wfile1" value="wiggle1.wig" />
 88    <param name="bfile1" value="bedfile1.bed" />
 89    <param name="wfile2" value="wiggle2.wig" />
 90    <param name="bfile2" value="bedfile2.bed" />
 91    <param name="step" value="5" />
 92    <param name="method" value="median" />
 93    <output name="output" file="twoscores_2/twoscores_2.R.pdf" lines_diff = "40" />
 94    <output name="output" file="twoscores_2/twoscores_2.log" lines_diff = "200" />
 95  </test>
 96  <test maxseconds="3600" name="TwoScores_3">
 97    <param name="wfile1" value="wiggle1.wig" />
 98    <param name="bfile1" value="bedfile1.bed" />
 99    <param name="wfile2" value="wiggle2.wig" />
100    <param name="bfile2" value="bedfile2.bed" />
101    <param name="step" value="5" />
102    <param name="method" value="mean" />
103    <output name="output" file="twoscores_3/twoscores_3.R.pdf" lines_diff = "40" />
104    <output name="output" file="twoscores_3/twoscores_3.log" lines_diff = "200" />
105  </test>
106  <test maxseconds="3600" name="TwoScores_4">
107    <param name="wfile1" value="wiggle1.wig" />
108    <param name="bfile1" value="bedfile1.bed" />
109    <param name="wfile2" value="wiggle2.wig" />
110    <param name="bfile2" value="bedfile2.bed" />
111    <param name="step" value="5" />
112    <param name="method" value="sum" />
113    <output name="output" file="twoscores_4/twoscores_4.R.pdf" lines_diff = "40" />
114    <output name="output" file="twoscores_4/twoscores_4.log" lines_diff = "200" />
115  </test>
116</tests>
117
118
119  <help>
120This tool calculates the correlation coefficient on two sets where the
121two sets intersect The tool is written by Tao Liu. It calls R for
122plotting.
123
124.. class:: infomark
125
126**TIP:** This can be used to evaluate the correlation between
127two biological replicates.
128
129.. class:: warningmark
130
131**NEED IMPROVEMENT**
132
133-----
134
135**Parameters**
136
137- **WIGGLE file 1 and 2** are the two wiggle files to be
138  included. These two are required.
139- **BED file 1 and 2** are the two BED files to be used to
140  extract scores from wiggle files.
141- **wiggle files** click *Add new wiggle file* to add more wiggle
142  files and labels. 
143- **Genome/Assembly** Genome assembly to be used. The tool will
144  download the chromosome information from UCSC database.
145- **Method** When scores are extracted for a region in BED file, a
146  method will be applied to calculate a value to represent this
147  region. Options are *median* to use the median value or *mean* to
148  use the average value.
149- **Step** Step in data points. The step is a window to extract the
150  scores from wiggle files along the whole genome. So that every step
151  number of points will have a value to represent it by using certain **Method** 
152- **Method** When scores are extracted for a step long window, a
153  method will be applied to calculate a value to represent this
154  window. Options are *median* to use the median value or *mean* to
155  use the average value, or *sample* to sample 1 point to represent
156  the region, or *sum* to use the sum of values in the region.
157
158-----
159
160**Outputs**
161
162- **PNG file** is the correlation plot
163
164  </help>
165
166</tool>