gca.xml | searchcode

/tools/ceas/gca.xml

https://bitbucket.org/cistrome/cistrome-harvard/ · XML · 134 lines · 116 code · 18 blank · 0 comment · 0 complexity · 981e788ed5c8365e0d7357e96cf673df MD5 · raw file

<tool name="GCA: Gene centered annotation" id="ceas_gca">
  <description>Find the nearest interval in the given intervals set fo every annotated coding gene</description>
  <command interpreter="command">/bin/bash/ $shscript </command>
  <inputs>
    <param ftype="bed" format="bed" name="bfile" type="data" label="BED file(100,000 lines max)">
      <validator type="unspecified_build" />
    </param>
    <param name="span" type="text" label="Span" value="3000">
    	<validator type="in_range" max="1000000" min="100" message="Span is out of range, Span has to be between 100 to 1000000" />
    </param>
  </inputs>
  <outputs>
    <data format="xls" name="output" />
    <data format="txt" name="log" label="job log"/>
  </outputs>

  <configfiles>
    <configfile name="shscript">
#!/bin/bash
#import os

#set $dollar = chr(36)
#set $gt = chr(62)
#set $lt = chr(60)
#set $ad = chr(38)

#set $path = $os.path.abspath($__app__.config.tool_path)

##check line count and file format accuracy of bed file
lines=`wc -l $bfile | tail -1 | awk '{print ${dollar}1}'`
format=`$path/validation/fcfunc.py $bfile`

if [[ ${dollar}lines -gt 100000 ]];then
    echo "Total lines of the files exceed the limit of 100000 lines!" ${gt}${ad}2;
    exit;
elif [[ ${dollar}format != "passed" ]];then
    echo ${dollar}format ${gt}${ad}2
    exit;
else
#set $gtpath = os.path.join( os.path.abspath($__app__.config.cistrome_static_library_path), "ceaslib", "GeneTable", $bfile.metadata.dbkey )
    gca -b $bfile --span=$span -g $gtpath --name=gca_out ${gt}${ad} $log
    cp gca_out.xls $output
fi
    </configfile>
  </configfiles>
<tests>
  <test maxseconds="3600" name="GCA_1">
    <param name="bfile" value="bedfile.bed" />
    <param name="span" value="3000" />
    <param name="genome" value="hg18" />
    <output name="output" file="gca_1/gca_1.xls" />
    <output name="output" file="gca_1/gca_1.log" lines_diff = "200" />
  </test>
  <test maxseconds="3600" name="GCA_2">
    <param name="bfile" value="bedfile.bed" />
    <param name="span" value="100" />
    <param name="genome" value="hg18" />
    <output name="output" file="gca_2/gca_2.xls" />
    <output name="output" file="gca_2/gca_2.log" lines_diff = "200" />
  </test>
  <test maxseconds="3600" name="GCA_3">
    <param name="bfile" value="bedfile.bed" />
    <param name="span" value="500" />
    <param name="genome" value="hg18" />
    <output name="output" file="gca_3/gca_3.xls" />
    <output name="output" file="gca_3/gca_3.log" lines_diff = "200" />
  </test>
  <test maxseconds="3600" name="GCA_4">
    <param name="bfile" value="bedfile.bed" />
    <param name="span" value="1000" />
    <param name="genome" value="hg18" />
    <output name="output" file="gca_4/gca_4.xls" />
    <output name="output" file="gca_4/gca_4.log" lines_diff = "200" />
  </test>
  <test maxseconds="3600" name="GCA_5">
    <param name="bfile" value="bedfile.bed" />
    <param name="span" value="10000" />
    <param name="genome" value="hg18" />
    <output name="output" file="gca_5/gca_5.xls" />
    <output name="output" file="gca_5/gca_5.log" lines_diff = "200" />
  </test>
</tests>
  <help>
This tool finds the nearest binding sites in the given BED file for
every annotated coding gene. It's a module in CEAS package which is
written by Hyunjin Gene Shin, published in Bioinformatics (pubmed
id:19689956).

.. class:: warningmark

**NEED IMPROVEMENT**

-----

**Parameters**

- **BED file** contains the transcription factor binding sites,
  generally the BED files for peaks from peak calling tools.
- **Span** is the span for ChIP regions.
- **Genome Annotation Version** to specify the annotations according to
  the data set. The annotations are downloaded from UCSC genome site.

-----

**Output**

- **XLS file** is the tab-delimited file.

-----

**script parameter list of GCA**

Options:
  --version            show program's version number and exit
  -h, --help           Show this help message and exit.
  -b BED, --bed=BED    BED file of ChIP regions.
  -g GDB, --gt=GDB     Gene annotation table. This can be a sqlite3 local db
                       file, BED file or genome version of UCSC. The BED file
                       must have an extension of '.bed'
  --span=SPAN          Span in search of ChIP regions from TSS and TTS,
                       DEFAULT=3000bp
  --name=NAME          Experiment name. This will be used to name the output
                       file. If an experiment name is not given, input BED
                       file name will be used instead.
  --gn-group=GN_GROUP  A particular group of genes of interest. If a txt file
                       with one column of gene names (eg RefSeq IDs in case of
                       using a refGene table) is given, gca returns the gene-
                       centered annotation of this particular gene group.
  --gname2=NAME2       The gene names of --gn-group will be regarded as
                       'name2.' See the schema of the gene annotation table.

  </help>

</tool>