PageRenderTime 32ms CodeModel.GetById 26ms app.highlight 3ms RepoModel.GetById 1ms app.codeStats 0ms

/tools/ceas/sitepro.xml

https://bitbucket.org/cistrome/cistrome-harvard/
XML | 297 lines | 259 code | 38 blank | 0 comment | 0 complexity | 807a1a56f365513dc3284840e6aec882 MD5 | raw file
  1<tool name="SitePro: Aggregation plot tool for signal profiling" id="ceas_sitepro">
  2  <description>Draw the score profile near a given interval</description>
  3  <command interpreter="command">/bin/bash $shscript</command>
  4  <inputs>
  5    <conditional name="mode">
  6      <param name="mode_select" type="select" label="Sitepro behaviour mode" force_select="true">
  7	<option value="single"> 1 wiggle / bigwig vs 1 BED file</option>
  8	<option value="multiwig"> multiple wiggle / bigwig vs 1 BED</option>
  9	<option value="multibed"> multiple BED vs 1 wiggle / bigwig</option>
 10      </param>
 11
 12      <when value="single">
 13	<param ftype="wig" format="wig,bigwig" name="wfile" type="data" label="Wiggle / bigwig file"/>
 14	<param ftype="bed" format="bed" name="bfile" type="data" label="BED file(100,000 lines max)"/>
 15      </when>
 16
 17      <when value="multiwig">
 18	<param format="wig,bigwig" name="wfile" type="data" label="Wiggle / bigwig file"/>
 19	<param name="label" type="text" label="Wiggle label" optional="false" />
 20	<repeat name="more" title="wiggle / bigwig file">
 21	  <param format="wig,bigwig" name="wig" type="data" label="Select another wiggle / bigwig file"/>
 22	  <param name="label" type="text" label="Wiggle label"  optional="false" />
 23	</repeat>
 24	<param format="bed" name="bfile" type="data"
 25	label="BED file"/>
 26      </when>
 27
 28      <when value="multibed">
 29	<param format="bed" name="bfile" type="data" label="BED file(100,000 lines max)"/>
 30	<param name="label" type="text" label="BED label" optional="false" />
 31	<repeat name="more" title="BED file">
 32	  <param format="bed" name="bfile" type="data" label="Select another BED file(100,000 lines max)"/>
 33	  <param name="label" type="text" label="BED label"  optional="false" />
 34	</repeat>
 35	<param format="wig,bigwig" name="wfile" type="data" label="Wiggle / bigwig file"/>
 36      </when>
 37
 38    </conditional>
 39
 40    <param name="span" type="integer" label="Span" value="1000">
 41    	<validator type="in_range" max="1000000" min="100" message="Span is out of range, Span has to be between 100 to 1000000" />
 42    </param>	
 43    <param name="pfres" type="integer" label="Profiling Resolution" value="50">
 44    	<validator type="in_range" max="1000" min="10" message="Profiling Resolution is out of range, Profiling Resolution has to be between 10 to 1000" />
 45    </param>	
 46    <param name="dir" type="boolean" label="consider the direction (+/-) while profiling" checked="no" truevalue="--dir" falsevalue=" " />
 47  </inputs>
 48  <outputs>
 49    <data format="pdf" name="output" />
 50    <data format="txt" name="log" label="sitepro job log" />
 51    <data format="txt" name="dump" label="txt file with profiles" />
 52  </outputs>
 53
 54  <configfiles>
 55    <configfile name="shscript">
 56#!/bin/bash
 57#import os
 58
 59#set $dollar = chr(36)
 60#set $gt = chr(62)
 61#set $lt = chr(60)
 62#set $ad = chr(38)
 63
 64#set $path = $os.path.abspath($__app__.config.tool_path)
 65
 66##REMOVING WIG VALIDATORS
 67##note: validator for wig file size
 68##if [ $mode.wfile != "None" ];then
 69##    wfilesize=`du -b $mode.wfile | awk '{print ${dollar}1}'`
 70##    
 71##    if [[ ${dollar}wfilesize -gt 2097152000 ]];then
 72##        echo "wfile file is too big! 2GB is the maximum!" ${gt}${ad}2
 73##        exit;
 74##    fi
 75##fi
 76
 77##note: validator for wig label
 78#if $mode.mode_select == "multiwig"
 79    wiglabel=`echo $mode.label |awk '{print length(${dollar}0)}'`
 80    if [[ ${dollar}wiglabel -gt 255 ]];then
 81        echo "Wig Label exceed the limit of 255 characters!" ${gt}${ad}2;
 82        exit;
 83    fi
 84    if [[ ${dollar}wiglabel -eq 0 ]];then
 85        echo "Wig Label is required!" ${gt}${ad}2;
 86        exit;
 87    fi
 88#end if
 89
 90##note: validator for bed label
 91#if $mode.mode_select == "multibed"
 92    bedlabel=`echo $mode.label |awk '{print length(${dollar}0)}'`
 93    if [[ ${dollar}bedlabel -gt 255 ]];then
 94        echo "Bed Label exceed the limit of 255 characters!" ${gt}${ad}2;
 95        exit;
 96    fi
 97    if [[ ${dollar}bedlabel -eq 0 ]];then
 98        echo "Bed Label is required!" ${gt}${ad}2;
 99        exit;
100    fi
101#end if
102
103lines=`wc -l $mode.bfile | tail -1 | awk '{print ${dollar}1}'`
104format=`$path/validation/fcfunc.py $mode.bfile`
105if [[ ${dollar}lines -gt 100000 ]];then
106   echo "BED file is too big! 100K lines are the maximum!" ${gt}${ad}2
107   exit;
108fi
109if [[ ${dollar}format != "passed" ]]; then
110   echo "Bed file 1: " ${dollar}format ${gt}${ad}2
111   exit;
112fi
113
114#if $mode.wfile.extension == "wig"
115#set $sitepro = "sitepro"
116#elif $mode.wfile.extension == "bigwig"
117#set $sitepro = "siteproBW"
118#end if
119
120#if $mode.mode_select == "single"
121##NOTE: cease, gca, and sitepro require python2.5 and above
122$sitepro -w $mode.wfile -b $mode.bfile --span=$span --pf-res=$pfres $dir --name=sitepro_out --dump ${ad}${gt} $log
123R --vanilla $lt sitepro_out.R ${ad}${gt}/dev/null
124mv sitepro_out.pdf $output
125cat *_dump.txt > $dump
126
127#elif $mode.mode_select == "multiwig"
128#set $tmp = ""
129#for $m in $mode.more
130#set $tmp = $tmp + "-w " +str($m.wig) + " -l " + str($m.label)+ " "
131#end for
132##NOTE: cease, gca, and sitepro require python2.5 and above
133
134$sitepro -w $mode.wfile -l $mode.label $tmp -b $mode.bfile --span=$span --pf-res=$pfres $dir --name=sitepro_out --dump ${ad}${gt} $log
135R --vanilla $lt sitepro_out.R ${ad}${gt}/dev/null
136mv sitepro_out.pdf $output
137cat *_dump.txt > $dump
138
139#elif $mode.mode_select == "multibed"
140#set $bedcount = 1
141#set $tmp = ""
142#for $m in $mode.more
143#set $bedcount = $bedcount + 1
144   lines=`wc -l $m.bfile | tail -1 | awk '{print ${dollar}1}'`
145   format=`$path/validation/fcfunc.py $m.bfile`
146   if [[ ${dollar}lines -gt 100000 ]];then
147      echo "BED file is too big! 100K lines are the maximum!" ${gt}${ad}2
148      exit;
149   fi
150   if [[ ${dollar}format != "passed" ]]; then
151      echo "Bed file ${bedcount}: " ${dollar}format ${gt}${ad}2
152      exit;
153   fi
154#set $tmp = $tmp + "-b " +str($m.bfile) + " -l " + str($m.label)+ " "
155#end for
156##NOTE: cease, gca, and sitepro require python2.5 and above
157 
158
159$sitepro -w $mode.wfile -b $mode.bfile -l $mode.label $tmp --span=$span --pf-res=$pfres $dir --name=sitepro_out --dump ${ad}${gt} $log
160R --vanilla $lt sitepro_out.R ${ad}${gt}/dev/null
161mv sitepro_out.pdf $output
162cat *_dump.txt > $dump
163#end if
164    </configfile>
165  </configfiles>
166<tests>
167  <test maxseconds="3600" name="Sitepro_1">
168    <param name="mode_select" value="single" />
169    <param name="wfile" value="wiggle.wig" />
170    <param name="bfile" value="bedfile.bed" />
171    <param name="span" value="1000" />
172    <param name="pfres" value="50" />
173    <output name="output" file="sitepro_1/sitepro_1.pdf" />
174    <output name="output" file="sitepro_1/sitepro_1.log" lines_diff = "200" />
175    <output name="output" file="sitepro_1/sitepro_1_dump.txt" />
176  </test>
177  <test maxseconds="3600" name="Sitepro_2">
178    <param name="mode_select" value="single" />
179    <param name="wfile" value="wiggle.wig" />
180    <param name="bfile" value="bedfile.bed" />
181    <param name="span" value="1000" />
182    <param name="pfres" value="50" />
183    <output name="output" file="sitepro_2/sitepro_2.pdf" />
184    <output name="output" file="sitepro_2/sitepro_2.log" lines_diff = "200" />
185    <output name="output" file="sitepro_2/sitepro_2_dump.txt" />
186  </test>
187  <test maxseconds="3600" name="Sitepro_3">
188    <param name="mode_select" value="single" />
189    <param name="wfile" value="wiggle.wig" />
190    <param name="bfile" value="bedfile.bed" />
191    <param name="span" value="100" />
192    <param name="pfres" value="10" />
193    <output name="output" file="sitepro_3/sitepro_3.pdf" />
194    <output name="output" file="sitepro_3/sitepro_3.log" lines_diff = "200" />
195    <output name="output" file="sitepro_3/sitepro_3_dump.txt" />
196  </test>
197  <test maxseconds="3600" name="Sitepro_4">
198    <param name="mode_select" value="single" />
199    <param name="wfile" value="wiggle.wig" />
200    <param name="bfile" value="bedfile.bed" />
201    <param name="span" value="100" />
202    <param name="pfres" value="10" />
203    <output name="output" file="sitepro_4/sitepro_4.pdf" />
204    <output name="output" file="sitepro_4/sitepro_4.log" lines_diff = "200" />
205    <output name="output" file="sitepro_4/sitepro_4_dump.txt" />
206  </test>
207  <test maxseconds="3600" name="Sitepro_5">
208    <param name="mode_select" value="single" />
209    <param name="wfile" value="wiggle.wig" />
210    <param name="bfile" value="bedfile.bed" />
211    <param name="span" value="5000" />
212    <param name="pfres" value="500" />
213    <output name="output" file="sitepro_5/sitepro_5.pdf" />
214    <output name="output" file="sitepro_5/sitepro_5.log" lines_diff = "200" />
215    <output name="output" file="sitepro_5/sitepro_5_dump.txt" />
216  </test>
217</tests>
218  <help>
219This tool draws the average score profile around given genomic
220sites. It's a module in CEAS package which is written by Hyunjin Gene
221Shin, published in Bioinformatics (pubmed id:19689956).
222
223.. class:: infomark
224
225**TIP #1:** If your query does not apper in the pulldown menu for BED Files, please convert your interval files to BED format.
226
227.. class:: infomark
228
229**TIP #2:** You can't use multiple BED files *AND* multiple Wiggle files as input. 
230
231.. class:: infomark
232
233**TIP #3:** The tool can be used to check the signals of your ChIP
234sample around certain regions such as Transcription Start Sites, or
235Transcription Factor Binding Sites.
236
237.. class:: warningmark
238
239**NEED IMPROVEMENT**
240
241-----
242
243**Parameters**
244
245- **Sitepro behaviour mode** can only be '1 wiggle file against 1 BED
246  file', or 'multiple wiggle files against 1 BED file', or '1 wiggle
247  file against multiple BED files'.
248- **Wiggle label** When 'multi wiggle' mode is selected, you need to assign the labels for every wiggle files which will be shown in the final figure.
249- **BED label** When 'multi BED' mode is selected, you need to assign the labels for every BED files which will be shown in the final image.
250- **Span** is the distance from the center of each BED region in both directions(+/-) (eg, [c - span, c + span], where c is the center of a region).
251- **Profiling resolution** is the resolution to bin the scores in the final image.
252
253-----
254
255**script parameter list of Sitepro**
256
257Options:
258  --version             show program's version number and exit
259  -h, --help            Show this help message and exit.
260  -w WIG, --wig=WIG     input WIG file. WARNING: both fixedStep and
261                        variableStep WIG formats are accepted. Multiple WIG
262                        files can be given via -w (--wig) individually (eg -w
263                        WIG1.wig, -w WIG2.wig). WARNING! multiple wig and bed
264                        files are not allowed.
265  -b BED, --bed=BED     BED file of regions of interest. (eg, binding sites or
266                        motif locations) Multiple BED files can be given via
267                        -b (--bed) individually (eg -b BED1.bed -b BED2.bed).
268                        WARNING! multiple wig and bed files are not allowed.
269  --span=SPAN           Span from the center of each BED region in both
270                        directions(+/-) (eg, [c - span, c + span], where c is
271                        the center of a region), default:1000 bp
272  --pf-res=PF_RES       Profiling resolution, default: 50 bp
273  --dir                 If set, the direction (+/-) is considered in
274                        profiling. If no strand info given in the BED, this
275                        option is ignored.
276  --dump                If set, profiles are dumped as a TXT file
277  --name=NAME           Name of this run. If not given, the body of the bed
278                        file name will be used,
279  -l LABEL, --label=LABEL
280                        Labels of the wig files. If given, they are used as
281                        the legends of the plot and in naming the TXT files of
282                        profile dumps; otherwise, the WIG file names will be
283                        used as the labels. Multiple labels can be given via
284                        -l (--label) individually (eg, -l LABEL1 -l LABEL2).
285                        WARNING! The number and order of the labels must be
286                        the same as the WIG files.
287
288-----
289
290**Output**
291
292- **PDF** format file.
293- Dumped signals within given intervals in **plain text**.
294
295  </help>
296
297</tool>