/tools/ceas/sitepro.xml

https://bitbucket.org/cistrome/cistrome-harvard/ · XML · 297 lines · 259 code · 38 blank · 0 comment · 0 complexity · 807a1a56f365513dc3284840e6aec882 MD5 · raw file

  1. <tool name="SitePro: Aggregation plot tool for signal profiling" id="ceas_sitepro">
  2. <description>Draw the score profile near a given interval</description>
  3. <command interpreter="command">/bin/bash $shscript</command>
  4. <inputs>
  5. <conditional name="mode">
  6. <param name="mode_select" type="select" label="Sitepro behaviour mode" force_select="true">
  7. <option value="single"> 1 wiggle / bigwig vs 1 BED file</option>
  8. <option value="multiwig"> multiple wiggle / bigwig vs 1 BED</option>
  9. <option value="multibed"> multiple BED vs 1 wiggle / bigwig</option>
  10. </param>
  11. <when value="single">
  12. <param ftype="wig" format="wig,bigwig" name="wfile" type="data" label="Wiggle / bigwig file"/>
  13. <param ftype="bed" format="bed" name="bfile" type="data" label="BED file(100,000 lines max)"/>
  14. </when>
  15. <when value="multiwig">
  16. <param format="wig,bigwig" name="wfile" type="data" label="Wiggle / bigwig file"/>
  17. <param name="label" type="text" label="Wiggle label" optional="false" />
  18. <repeat name="more" title="wiggle / bigwig file">
  19. <param format="wig,bigwig" name="wig" type="data" label="Select another wiggle / bigwig file"/>
  20. <param name="label" type="text" label="Wiggle label" optional="false" />
  21. </repeat>
  22. <param format="bed" name="bfile" type="data"
  23. label="BED file"/>
  24. </when>
  25. <when value="multibed">
  26. <param format="bed" name="bfile" type="data" label="BED file(100,000 lines max)"/>
  27. <param name="label" type="text" label="BED label" optional="false" />
  28. <repeat name="more" title="BED file">
  29. <param format="bed" name="bfile" type="data" label="Select another BED file(100,000 lines max)"/>
  30. <param name="label" type="text" label="BED label" optional="false" />
  31. </repeat>
  32. <param format="wig,bigwig" name="wfile" type="data" label="Wiggle / bigwig file"/>
  33. </when>
  34. </conditional>
  35. <param name="span" type="integer" label="Span" value="1000">
  36. <validator type="in_range" max="1000000" min="100" message="Span is out of range, Span has to be between 100 to 1000000" />
  37. </param>
  38. <param name="pfres" type="integer" label="Profiling Resolution" value="50">
  39. <validator type="in_range" max="1000" min="10" message="Profiling Resolution is out of range, Profiling Resolution has to be between 10 to 1000" />
  40. </param>
  41. <param name="dir" type="boolean" label="consider the direction (+/-) while profiling" checked="no" truevalue="--dir" falsevalue=" " />
  42. </inputs>
  43. <outputs>
  44. <data format="pdf" name="output" />
  45. <data format="txt" name="log" label="sitepro job log" />
  46. <data format="txt" name="dump" label="txt file with profiles" />
  47. </outputs>
  48. <configfiles>
  49. <configfile name="shscript">
  50. #!/bin/bash
  51. #import os
  52. #set $dollar = chr(36)
  53. #set $gt = chr(62)
  54. #set $lt = chr(60)
  55. #set $ad = chr(38)
  56. #set $path = $os.path.abspath($__app__.config.tool_path)
  57. ##REMOVING WIG VALIDATORS
  58. ##note: validator for wig file size
  59. ##if [ $mode.wfile != "None" ];then
  60. ## wfilesize=`du -b $mode.wfile | awk '{print ${dollar}1}'`
  61. ##
  62. ## if [[ ${dollar}wfilesize -gt 2097152000 ]];then
  63. ## echo "wfile file is too big! 2GB is the maximum!" ${gt}${ad}2
  64. ## exit;
  65. ## fi
  66. ##fi
  67. ##note: validator for wig label
  68. #if $mode.mode_select == "multiwig"
  69. wiglabel=`echo $mode.label |awk '{print length(${dollar}0)}'`
  70. if [[ ${dollar}wiglabel -gt 255 ]];then
  71. echo "Wig Label exceed the limit of 255 characters!" ${gt}${ad}2;
  72. exit;
  73. fi
  74. if [[ ${dollar}wiglabel -eq 0 ]];then
  75. echo "Wig Label is required!" ${gt}${ad}2;
  76. exit;
  77. fi
  78. #end if
  79. ##note: validator for bed label
  80. #if $mode.mode_select == "multibed"
  81. bedlabel=`echo $mode.label |awk '{print length(${dollar}0)}'`
  82. if [[ ${dollar}bedlabel -gt 255 ]];then
  83. echo "Bed Label exceed the limit of 255 characters!" ${gt}${ad}2;
  84. exit;
  85. fi
  86. if [[ ${dollar}bedlabel -eq 0 ]];then
  87. echo "Bed Label is required!" ${gt}${ad}2;
  88. exit;
  89. fi
  90. #end if
  91. lines=`wc -l $mode.bfile | tail -1 | awk '{print ${dollar}1}'`
  92. format=`$path/validation/fcfunc.py $mode.bfile`
  93. if [[ ${dollar}lines -gt 100000 ]];then
  94. echo "BED file is too big! 100K lines are the maximum!" ${gt}${ad}2
  95. exit;
  96. fi
  97. if [[ ${dollar}format != "passed" ]]; then
  98. echo "Bed file 1: " ${dollar}format ${gt}${ad}2
  99. exit;
  100. fi
  101. #if $mode.wfile.extension == "wig"
  102. #set $sitepro = "sitepro"
  103. #elif $mode.wfile.extension == "bigwig"
  104. #set $sitepro = "siteproBW"
  105. #end if
  106. #if $mode.mode_select == "single"
  107. ##NOTE: cease, gca, and sitepro require python2.5 and above
  108. $sitepro -w $mode.wfile -b $mode.bfile --span=$span --pf-res=$pfres $dir --name=sitepro_out --dump ${ad}${gt} $log
  109. R --vanilla $lt sitepro_out.R ${ad}${gt}/dev/null
  110. mv sitepro_out.pdf $output
  111. cat *_dump.txt > $dump
  112. #elif $mode.mode_select == "multiwig"
  113. #set $tmp = ""
  114. #for $m in $mode.more
  115. #set $tmp = $tmp + "-w " +str($m.wig) + " -l " + str($m.label)+ " "
  116. #end for
  117. ##NOTE: cease, gca, and sitepro require python2.5 and above
  118. $sitepro -w $mode.wfile -l $mode.label $tmp -b $mode.bfile --span=$span --pf-res=$pfres $dir --name=sitepro_out --dump ${ad}${gt} $log
  119. R --vanilla $lt sitepro_out.R ${ad}${gt}/dev/null
  120. mv sitepro_out.pdf $output
  121. cat *_dump.txt > $dump
  122. #elif $mode.mode_select == "multibed"
  123. #set $bedcount = 1
  124. #set $tmp = ""
  125. #for $m in $mode.more
  126. #set $bedcount = $bedcount + 1
  127. lines=`wc -l $m.bfile | tail -1 | awk '{print ${dollar}1}'`
  128. format=`$path/validation/fcfunc.py $m.bfile`
  129. if [[ ${dollar}lines -gt 100000 ]];then
  130. echo "BED file is too big! 100K lines are the maximum!" ${gt}${ad}2
  131. exit;
  132. fi
  133. if [[ ${dollar}format != "passed" ]]; then
  134. echo "Bed file ${bedcount}: " ${dollar}format ${gt}${ad}2
  135. exit;
  136. fi
  137. #set $tmp = $tmp + "-b " +str($m.bfile) + " -l " + str($m.label)+ " "
  138. #end for
  139. ##NOTE: cease, gca, and sitepro require python2.5 and above
  140. $sitepro -w $mode.wfile -b $mode.bfile -l $mode.label $tmp --span=$span --pf-res=$pfres $dir --name=sitepro_out --dump ${ad}${gt} $log
  141. R --vanilla $lt sitepro_out.R ${ad}${gt}/dev/null
  142. mv sitepro_out.pdf $output
  143. cat *_dump.txt > $dump
  144. #end if
  145. </configfile>
  146. </configfiles>
  147. <tests>
  148. <test maxseconds="3600" name="Sitepro_1">
  149. <param name="mode_select" value="single" />
  150. <param name="wfile" value="wiggle.wig" />
  151. <param name="bfile" value="bedfile.bed" />
  152. <param name="span" value="1000" />
  153. <param name="pfres" value="50" />
  154. <output name="output" file="sitepro_1/sitepro_1.pdf" />
  155. <output name="output" file="sitepro_1/sitepro_1.log" lines_diff = "200" />
  156. <output name="output" file="sitepro_1/sitepro_1_dump.txt" />
  157. </test>
  158. <test maxseconds="3600" name="Sitepro_2">
  159. <param name="mode_select" value="single" />
  160. <param name="wfile" value="wiggle.wig" />
  161. <param name="bfile" value="bedfile.bed" />
  162. <param name="span" value="1000" />
  163. <param name="pfres" value="50" />
  164. <output name="output" file="sitepro_2/sitepro_2.pdf" />
  165. <output name="output" file="sitepro_2/sitepro_2.log" lines_diff = "200" />
  166. <output name="output" file="sitepro_2/sitepro_2_dump.txt" />
  167. </test>
  168. <test maxseconds="3600" name="Sitepro_3">
  169. <param name="mode_select" value="single" />
  170. <param name="wfile" value="wiggle.wig" />
  171. <param name="bfile" value="bedfile.bed" />
  172. <param name="span" value="100" />
  173. <param name="pfres" value="10" />
  174. <output name="output" file="sitepro_3/sitepro_3.pdf" />
  175. <output name="output" file="sitepro_3/sitepro_3.log" lines_diff = "200" />
  176. <output name="output" file="sitepro_3/sitepro_3_dump.txt" />
  177. </test>
  178. <test maxseconds="3600" name="Sitepro_4">
  179. <param name="mode_select" value="single" />
  180. <param name="wfile" value="wiggle.wig" />
  181. <param name="bfile" value="bedfile.bed" />
  182. <param name="span" value="100" />
  183. <param name="pfres" value="10" />
  184. <output name="output" file="sitepro_4/sitepro_4.pdf" />
  185. <output name="output" file="sitepro_4/sitepro_4.log" lines_diff = "200" />
  186. <output name="output" file="sitepro_4/sitepro_4_dump.txt" />
  187. </test>
  188. <test maxseconds="3600" name="Sitepro_5">
  189. <param name="mode_select" value="single" />
  190. <param name="wfile" value="wiggle.wig" />
  191. <param name="bfile" value="bedfile.bed" />
  192. <param name="span" value="5000" />
  193. <param name="pfres" value="500" />
  194. <output name="output" file="sitepro_5/sitepro_5.pdf" />
  195. <output name="output" file="sitepro_5/sitepro_5.log" lines_diff = "200" />
  196. <output name="output" file="sitepro_5/sitepro_5_dump.txt" />
  197. </test>
  198. </tests>
  199. <help>
  200. This tool draws the average score profile around given genomic
  201. sites. It's a module in CEAS package which is written by Hyunjin Gene
  202. Shin, published in Bioinformatics (pubmed id:19689956).
  203. .. class:: infomark
  204. **TIP #1:** If your query does not apper in the pulldown menu for BED Files, please convert your interval files to BED format.
  205. .. class:: infomark
  206. **TIP #2:** You can't use multiple BED files *AND* multiple Wiggle files as input.
  207. .. class:: infomark
  208. **TIP #3:** The tool can be used to check the signals of your ChIP
  209. sample around certain regions such as Transcription Start Sites, or
  210. Transcription Factor Binding Sites.
  211. .. class:: warningmark
  212. **NEED IMPROVEMENT**
  213. -----
  214. **Parameters**
  215. - **Sitepro behaviour mode** can only be '1 wiggle file against 1 BED
  216. file', or 'multiple wiggle files against 1 BED file', or '1 wiggle
  217. file against multiple BED files'.
  218. - **Wiggle label** When 'multi wiggle' mode is selected, you need to assign the labels for every wiggle files which will be shown in the final figure.
  219. - **BED label** When 'multi BED' mode is selected, you need to assign the labels for every BED files which will be shown in the final image.
  220. - **Span** is the distance from the center of each BED region in both directions(+/-) (eg, [c - span, c + span], where c is the center of a region).
  221. - **Profiling resolution** is the resolution to bin the scores in the final image.
  222. -----
  223. **script parameter list of Sitepro**
  224. Options:
  225. --version show program's version number and exit
  226. -h, --help Show this help message and exit.
  227. -w WIG, --wig=WIG input WIG file. WARNING: both fixedStep and
  228. variableStep WIG formats are accepted. Multiple WIG
  229. files can be given via -w (--wig) individually (eg -w
  230. WIG1.wig, -w WIG2.wig). WARNING! multiple wig and bed
  231. files are not allowed.
  232. -b BED, --bed=BED BED file of regions of interest. (eg, binding sites or
  233. motif locations) Multiple BED files can be given via
  234. -b (--bed) individually (eg -b BED1.bed -b BED2.bed).
  235. WARNING! multiple wig and bed files are not allowed.
  236. --span=SPAN Span from the center of each BED region in both
  237. directions(+/-) (eg, [c - span, c + span], where c is
  238. the center of a region), default:1000 bp
  239. --pf-res=PF_RES Profiling resolution, default: 50 bp
  240. --dir If set, the direction (+/-) is considered in
  241. profiling. If no strand info given in the BED, this
  242. option is ignored.
  243. --dump If set, profiles are dumped as a TXT file
  244. --name=NAME Name of this run. If not given, the body of the bed
  245. file name will be used,
  246. -l LABEL, --label=LABEL
  247. Labels of the wig files. If given, they are used as
  248. the legends of the plot and in naming the TXT files of
  249. profile dumps; otherwise, the WIG file names will be
  250. used as the labels. Multiple labels can be given via
  251. -l (--label) individually (eg, -l LABEL1 -l LABEL2).
  252. WARNING! The number and order of the labels must be
  253. the same as the WIG files.
  254. -----
  255. **Output**
  256. - **PDF** format file.
  257. - Dumped signals within given intervals in **plain text**.
  258. </help>
  259. </tool>