PageRenderTime 30ms CodeModel.GetById 18ms app.highlight 6ms RepoModel.GetById 1ms app.codeStats 1ms

/tools/stats/aggregate_binned_scores_in_intervals.xml

https://bitbucket.org/cistrome/cistrome-harvard/
XML | 113 lines | 92 code | 21 blank | 0 comment | 0 complexity | 9b7941c058fc94a76c81d8bdc351c60c MD5 | raw file
  1<tool id="aggregate_scores_in_intervals2" description="such as phastCons, GERP, binCons, and others for a set of genomic intervals" name="Aggregate datapoints" version="1.1.3">
  2  <description>Appends the average, min, max of datapoints per interval</description>
  3  <command interpreter="python">
  4    #if $score_source_type.score_source == "user" #aggregate_scores_in_intervals.py $score_source_type.input2 $input1 ${input1.metadata.chromCol} ${input1.metadata.startCol} ${input1.metadata.endCol} $out_file1 --chrom_buffer=3
  5    #else                                         #aggregate_scores_in_intervals.py $score_source_type.datasets $input1 ${input1.metadata.chromCol} ${input1.metadata.startCol} ${input1.metadata.endCol} $out_file1 -b
  6    #end if#
  7  </command>
  8  <inputs>
  9    <param format="interval" name="input1" type="data" label="Interval file"/>
 10    <conditional name="score_source_type">
 11      <param name="score_source" type="select" label="Score Source">
 12        <option value="cached" selected="true">Locally Cached Scores</option>
 13        <option value="user">Scores in Your History</option>
 14      </param>
 15      <when value="cached">
 16        <param name="datasets" type="select" label="Available datasets" display="radio">
 17          <options from_file="binned_scores.loc">
 18            <column name="name" index="1"/>
 19            <column name="value" index="2"/>
 20            <column name="dbkey" index="0"/>
 21            <filter type="data_meta" ref="input1" key="dbkey" column="0" />
 22          </options>
 23        </param>
 24      </when>
 25      <when value="user">
 26        <param format="wig" name="input2" type="data" label="Score file">
 27          <options>
 28            <filter type="data_meta" ref="input1" key="dbkey" />
 29          </options>
 30        </param>
 31      </when>
 32    </conditional>
 33  </inputs>
 34  <outputs>
 35    <data format="interval" name="out_file1" metadata_source="input1"/>
 36  </outputs>
 37  <tests>
 38    <test>
 39      <param name="input1" value="6.bed" dbkey="hg17" ftype="bed"/>
 40      <param name="score_source" value="cached"/>
 41      <param name="datasets" value="/galaxy/data/binned_scores/hg17/phastcons_encode_sep2005_tba" />
 42      <output name="out_file1" file="aggregate_binned_scores_in_intervals.out" />
 43    </test>
 44    <test>
 45      <param name="input1" value="9_hg18.bed" dbkey="hg18" ftype="bed"/>
 46      <param name="score_source" value="cached"/>
 47      <param name="datasets" value="/galaxy/data/binned_scores/hg18/phastCons17way/ba" />
 48      <output name="out_file1" file="aggregate_binned_scores_in_intervals2.interval" />
 49    </test>
 50    <test>
 51      <param name="input1" value="6.bed" dbkey="hg17" ftype="bed"/>
 52      <param name="score_source" value="user"/>
 53      <param name="input2" value="aggregate_binned_scores_3.wig" dbkey="hg17" ftype="wig"/>
 54      <output name="out_file1" file="aggregate_binned_scores_in_intervals3.out"/>
 55    </test>
 56  </tests>
 57  <help>
 58
 59.. class:: warningmark
 60
 61This tool currently only has cached data for genome builds hg16, hg17 and hg18. However, you may use your own data point (wiggle) data, such as those available from UCSC. If you are trying to use your own data point file and it is not appearing as an option, make sure that the builds for your history items are the same.
 62
 63.. class:: warningmark
 64
 65This tool assumes that the input dataset is in interval format and contains at least a chrom column, a start column and an end column.  These 3 columns can be dispersed throughout any number of other data columns. 
 66
 67-----
 68
 69.. class:: infomark
 70
 71**TIP:** Computing summary information may throw exceptions if the data type (e.g., string, integer) in every line of the columns is not appropriate for the computation (e.g., attempting numerical calculations on strings).  If an exception is thrown when computing summary information for a line, that line is skipped as invalid for the computation.  The number of invalid skipped lines is documented in the resulting history item as a "Data issue".
 72
 73-----
 74
 75**Syntax**
 76
 77This tool appends columns of summary information for each interval matched against a selected dataset.  For each interval, the average, minimum and maximum for the data falling within the interval is computed.
 78
 79- Several quantitative scores are provided for the ENCODE regions.
 80
 81  - Various Scores
 82      - Regulatory Potential
 83      - Neutral rate (Ancestral Repeats)
 84      - GC fraction
 85  - Conservation Scores
 86      - PhastCons
 87      - binCons
 88      - GERP
 89
 90-----
 91
 92**Example**
 93
 94If your original data has the following format:
 95
 96+------+-----+-----+---+------+
 97|other1|chrom|start|end|other2|
 98+------+-----+-----+---+------+
 99
100and you choose to aggregate phastCons scores, your output will look like this:
101
102+------+-----+-----+---+------+---+---+---+
103|other1|chrom|start|end|other2|avg|min|max|
104+------+-----+-----+---+------+---+---+---+
105
106where:
107
108* **avg** - average phastCons score for each region
109* **min** - minimum phastCons score for each region
110* **max** - maximum phastCons score for each region
111
112  </help>
113</tool>