PageRenderTime 23ms CodeModel.GetById 20ms app.highlight 1ms RepoModel.GetById 1ms app.codeStats 0ms

/tools/ceas/gca.xml

https://bitbucket.org/cistrome/cistrome-harvard/
XML | 134 lines | 116 code | 18 blank | 0 comment | 0 complexity | 981e788ed5c8365e0d7357e96cf673df MD5 | raw file
  1<tool name="GCA: Gene centered annotation" id="ceas_gca">
  2  <description>Find the nearest interval in the given intervals set fo every annotated coding gene</description>
  3  <command interpreter="command">/bin/bash/ $shscript </command>
  4  <inputs>
  5    <param ftype="bed" format="bed" name="bfile" type="data" label="BED file(100,000 lines max)">
  6      <validator type="unspecified_build" />
  7    </param>
  8    <param name="span" type="text" label="Span" value="3000">
  9    	<validator type="in_range" max="1000000" min="100" message="Span is out of range, Span has to be between 100 to 1000000" />
 10    </param>
 11  </inputs>
 12  <outputs>
 13    <data format="xls" name="output" />
 14    <data format="txt" name="log" label="job log"/>
 15  </outputs>
 16
 17  <configfiles>
 18    <configfile name="shscript">
 19#!/bin/bash
 20#import os
 21
 22#set $dollar = chr(36)
 23#set $gt = chr(62)
 24#set $lt = chr(60)
 25#set $ad = chr(38)
 26
 27#set $path = $os.path.abspath($__app__.config.tool_path)
 28
 29##check line count and file format accuracy of bed file
 30lines=`wc -l $bfile | tail -1 | awk '{print ${dollar}1}'`
 31format=`$path/validation/fcfunc.py $bfile`
 32
 33if [[ ${dollar}lines -gt 100000 ]];then
 34    echo "Total lines of the files exceed the limit of 100000 lines!" ${gt}${ad}2;
 35    exit;
 36elif [[ ${dollar}format != "passed" ]];then
 37    echo ${dollar}format ${gt}${ad}2
 38    exit;
 39else
 40#set $gtpath = os.path.join( os.path.abspath($__app__.config.cistrome_static_library_path), "ceaslib", "GeneTable", $bfile.metadata.dbkey )
 41    gca -b $bfile --span=$span -g $gtpath --name=gca_out ${gt}${ad} $log
 42    cp gca_out.xls $output
 43fi
 44    </configfile>
 45  </configfiles>
 46<tests>
 47  <test maxseconds="3600" name="GCA_1">
 48    <param name="bfile" value="bedfile.bed" />
 49    <param name="span" value="3000" />
 50    <param name="genome" value="hg18" />
 51    <output name="output" file="gca_1/gca_1.xls" />
 52    <output name="output" file="gca_1/gca_1.log" lines_diff = "200" />
 53  </test>
 54  <test maxseconds="3600" name="GCA_2">
 55    <param name="bfile" value="bedfile.bed" />
 56    <param name="span" value="100" />
 57    <param name="genome" value="hg18" />
 58    <output name="output" file="gca_2/gca_2.xls" />
 59    <output name="output" file="gca_2/gca_2.log" lines_diff = "200" />
 60  </test>
 61  <test maxseconds="3600" name="GCA_3">
 62    <param name="bfile" value="bedfile.bed" />
 63    <param name="span" value="500" />
 64    <param name="genome" value="hg18" />
 65    <output name="output" file="gca_3/gca_3.xls" />
 66    <output name="output" file="gca_3/gca_3.log" lines_diff = "200" />
 67  </test>
 68  <test maxseconds="3600" name="GCA_4">
 69    <param name="bfile" value="bedfile.bed" />
 70    <param name="span" value="1000" />
 71    <param name="genome" value="hg18" />
 72    <output name="output" file="gca_4/gca_4.xls" />
 73    <output name="output" file="gca_4/gca_4.log" lines_diff = "200" />
 74  </test>
 75  <test maxseconds="3600" name="GCA_5">
 76    <param name="bfile" value="bedfile.bed" />
 77    <param name="span" value="10000" />
 78    <param name="genome" value="hg18" />
 79    <output name="output" file="gca_5/gca_5.xls" />
 80    <output name="output" file="gca_5/gca_5.log" lines_diff = "200" />
 81  </test>
 82</tests>
 83  <help>
 84This tool finds the nearest binding sites in the given BED file for
 85every annotated coding gene. It's a module in CEAS package which is
 86written by Hyunjin Gene Shin, published in Bioinformatics (pubmed
 87id:19689956).
 88
 89.. class:: warningmark
 90
 91**NEED IMPROVEMENT**
 92
 93-----
 94
 95**Parameters**
 96
 97- **BED file** contains the transcription factor binding sites,
 98  generally the BED files for peaks from peak calling tools.
 99- **Span** is the span for ChIP regions.
100- **Genome Annotation Version** to specify the annotations according to
101  the data set. The annotations are downloaded from UCSC genome site.
102
103-----
104
105**Output**
106
107- **XLS file** is the tab-delimited file.
108
109-----
110
111**script parameter list of GCA**
112
113Options:
114  --version            show program's version number and exit
115  -h, --help           Show this help message and exit.
116  -b BED, --bed=BED    BED file of ChIP regions.
117  -g GDB, --gt=GDB     Gene annotation table. This can be a sqlite3 local db
118                       file, BED file or genome version of UCSC. The BED file
119                       must have an extension of '.bed'
120  --span=SPAN          Span in search of ChIP regions from TSS and TTS,
121                       DEFAULT=3000bp
122  --name=NAME          Experiment name. This will be used to name the output
123                       file. If an experiment name is not given, input BED
124                       file name will be used instead.
125  --gn-group=GN_GROUP  A particular group of genes of interest. If a txt file
126                       with one column of gene names (eg RefSeq IDs in case of
127                       using a refGene table) is given, gca returns the gene-
128                       centered annotation of this particular gene group.
129  --gname2=NAME2       The gene names of --gn-group will be regarded as
130                       'name2.' See the schema of the gene annotation table.
131
132  </help>
133
134</tool>