/tools/correlation/wiggle_correlation_union.xml

https://bitbucket.org/cistrome/cistrome-harvard/ · XML · 166 lines · 147 code · 19 blank · 0 comment · 0 complexity · 56c4cc1875c769a11c5b4b03a729ebea MD5 · raw file

  1. <tool name="Two wiggle file correlation in union regions" id="correlation_intervals">
  2. <description>Calculate the correlation coefficient of two wiggle / bigwig files in the union regions from two bed files</description>
  3. <command interpreter="command">/bin/bash $shscript </command>
  4. <inputs>
  5. <param format="wig" name="wfile1" type="data" label="WIGGLE / bigwig file 1"/>
  6. <param format="bed" name="bfile1" type="data" label="BED file 1(100,000 lines max)"/>
  7. <param format="wig" name="wfile2" type="data" label="WIGGLE / bigwig file 2"/>
  8. <param format="bed" name="bfile2" type="data" label="BED file 2(100,000 lines max)"/>
  9. <param name="step" type="integer" label="Step" value="5" help="step in points. This option is only used for wig file.">
  10. <validator type="in_range" max="100" min="1" message="Step is out of range, Step has to be between 1 to 100" />
  11. </param>
  12. <param name="method" type="hidden" label="method:" help="method to process the paired two sets of data in the sampling step." >
  13. <option value="mean">mean</option>
  14. </param>
  15. </inputs>
  16. <outputs>
  17. <data format="pdf" name="output" />
  18. <data format="txt" name="log" label="job log" />
  19. <data format="txt" name="rscript" label="job rscript" />
  20. </outputs>
  21. <configfiles>
  22. <configfile name="shscript">
  23. #!/bin/bash
  24. #import os
  25. #set $dollar = chr(36)
  26. #set $gt = chr(62)
  27. #set $lt = chr(60)
  28. #set $ad = chr(38)
  29. #set $path = os.path.abspath($__app__.config.tool_path)
  30. ## check line count and format accuracy of all the bed files
  31. lines1=`wc -l $bfile1 | tail -1 | awk '{print ${dollar}1}'`
  32. lines2=`wc -l $bfile2 | tail -1 | awk '{print ${dollar}1}'`
  33. format1=`$path/validation/fcfunc.py $bfile1`
  34. format2=`$path/validation/fcfunc.py $bfile2`
  35. ##REMOVING WIG VALIDATORS
  36. ##tfilesize1=`du -b $wfile1 | awk '{print ${dollar}1}'`
  37. ##tfilesize2=`du -b $wfile2 | awk '{print ${dollar}1}'`
  38. if [[ ${dollar}lines1 -gt 100000 ]];then
  39. echo "Total lines of the files exceed the limit of 100000 lines!" ${gt}${ad}2;
  40. exit;
  41. elif [[ ${dollar}lines2 -gt 100000 ]];then
  42. echo "Total lines of the files exceed the limit of 100000 lines!" ${gt}${ad}2;
  43. exit;
  44. elif [[ ${dollar}format1 != "passed" ]];then
  45. echo "Bed file 1: ${dollar}format1" ${gt}${ad}2;
  46. exit;
  47. elif [[ ${dollar}format2 != "passed" ]];then
  48. echo "Bed file 2: ${dollar}format2" ${gt}${ad}2;
  49. exit;
  50. ##REMOVING WIG VALIDATORS
  51. ##elif [[ ${dollar}tfilesize1 -gt 2147483648 ]];then
  52. ## echo "Wiggle file 1 is too big! 2G is the maximum!" ${gt}${ad}2
  53. ## exit;
  54. ##elif [[ ${dollar}tfilesize2 -gt 2147483648 ]];then
  55. ## echo "Wiggle file 2 is too big! 2G is the maximum!" ${gt}${ad}2
  56. ## exit;
  57. else
  58. #if $wfile1.extension == "wig"
  59. qc_chIP_peak.py -x $wfile1 -y $wfile2 -p $bfile1 -q $bfile2 -s $step -m mean -f bed -r qc_chIP-output.txt ${gt}${ad} $log
  60. #elif $wfile1.extension == "bigwig"
  61. qc_chIP_peakBW.py -x $wfile1 -y $wfile2 -p $bfile1 -q $bfile2 -r qc_chIP-output.txt ${gt}${ad} $log
  62. #end if
  63. R --vanilla ${lt} qc_chIP-output.txt ${gt}${ad}/dev/null
  64. ##convert qc_chIP-output.txt.pdf qc_chIP-output.txt.png
  65. mv qc_chIP-output.txt.pdf $output
  66. mv qc_chIP-output.txt $rscript
  67. fi
  68. </configfile>
  69. </configfiles>
  70. <tests>
  71. <test maxseconds="3600" name="TwoScores_1">
  72. <param name="wfile1" value="wiggle1.wig" />
  73. <param name="bfile1" value="bedfile1.bed" />
  74. <param name="wfile2" value="wiggle2.wig" />
  75. <param name="bfile2" value="bedfile2.bed" />
  76. <param name="step" value="5" />
  77. <param name="method" value="sample" />
  78. <output name="output" file="twoscores_1/twoscores_1.R.pdf" lines_diff = "40" />
  79. <output name="output" file="twoscores_1/twoscores_1.log" lines_diff = "200" />
  80. </test>
  81. <test maxseconds="3600" name="TwoScores_2">
  82. <param name="wfile1" value="wiggle1.wig" />
  83. <param name="bfile1" value="bedfile1.bed" />
  84. <param name="wfile2" value="wiggle2.wig" />
  85. <param name="bfile2" value="bedfile2.bed" />
  86. <param name="step" value="5" />
  87. <param name="method" value="median" />
  88. <output name="output" file="twoscores_2/twoscores_2.R.pdf" lines_diff = "40" />
  89. <output name="output" file="twoscores_2/twoscores_2.log" lines_diff = "200" />
  90. </test>
  91. <test maxseconds="3600" name="TwoScores_3">
  92. <param name="wfile1" value="wiggle1.wig" />
  93. <param name="bfile1" value="bedfile1.bed" />
  94. <param name="wfile2" value="wiggle2.wig" />
  95. <param name="bfile2" value="bedfile2.bed" />
  96. <param name="step" value="5" />
  97. <param name="method" value="mean" />
  98. <output name="output" file="twoscores_3/twoscores_3.R.pdf" lines_diff = "40" />
  99. <output name="output" file="twoscores_3/twoscores_3.log" lines_diff = "200" />
  100. </test>
  101. <test maxseconds="3600" name="TwoScores_4">
  102. <param name="wfile1" value="wiggle1.wig" />
  103. <param name="bfile1" value="bedfile1.bed" />
  104. <param name="wfile2" value="wiggle2.wig" />
  105. <param name="bfile2" value="bedfile2.bed" />
  106. <param name="step" value="5" />
  107. <param name="method" value="sum" />
  108. <output name="output" file="twoscores_4/twoscores_4.R.pdf" lines_diff = "40" />
  109. <output name="output" file="twoscores_4/twoscores_4.log" lines_diff = "200" />
  110. </test>
  111. </tests>
  112. <help>
  113. This tool calculates the correlation coefficient on two sets where the
  114. two sets intersect The tool is written by Tao Liu. It calls R for
  115. plotting.
  116. .. class:: infomark
  117. **TIP:** This can be used to evaluate the correlation between
  118. two biological replicates.
  119. .. class:: warningmark
  120. **NEED IMPROVEMENT**
  121. -----
  122. **Parameters**
  123. - **WIGGLE file 1 and 2** are the two wiggle files to be
  124. included. These two are required.
  125. - **BED file 1 and 2** are the two BED files to be used to
  126. extract scores from wiggle files.
  127. - **wiggle files** click *Add new wiggle file* to add more wiggle
  128. files and labels.
  129. - **Genome/Assembly** Genome assembly to be used. The tool will
  130. download the chromosome information from UCSC database.
  131. - **Method** When scores are extracted for a region in BED file, a
  132. method will be applied to calculate a value to represent this
  133. region. Options are *median* to use the median value or *mean* to
  134. use the average value.
  135. - **Step** Step in data points. The step is a window to extract the
  136. scores from wiggle files along the whole genome. So that every step
  137. number of points will have a value to represent it by using certain **Method**
  138. - **Method** When scores are extracted for a step long window, a
  139. method will be applied to calculate a value to represent this
  140. window. Options are *median* to use the median value or *mean* to
  141. use the average value, or *sample* to sample 1 point to represent
  142. the region, or *sum* to use the sum of values in the region.
  143. -----
  144. **Outputs**
  145. - **PNG file** is the correlation plot
  146. </help>
  147. </tool>