PageRenderTime 28ms CodeModel.GetById 17ms app.highlight 5ms RepoModel.GetById 1ms app.codeStats 1ms

/tools/new_operations/flanking_features.xml

https://bitbucket.org/cistrome/cistrome-harvard/
XML | 127 lines | 102 code | 24 blank | 1 comment | 0 complexity | 467774429f5c08b2edcdc77b7c7bd960 MD5 | raw file
  1<tool id="flanking_features_1" name="Fetch closest non-overlapping feature" version="4.0.1">
  2  <description>  for every interval</description>
  3  <command interpreter="python">
  4      flanking_features.py $input1 $input2 $out_file1 $direction
  5      
  6      #if isinstance( $input1.datatype, $__app__.datatypes_registry.get_datatype_by_extension('gff').__class__):
  7          -1 1,4,5,7 --gff1
  8      #else:
  9          -1 ${input1.metadata.chromCol},${input1.metadata.startCol},${input1.metadata.endCol},${input1.metadata.strandCol}
 10      #end if
 11          
 12      #if isinstance( $input2.datatype, $__app__.datatypes_registry.get_datatype_by_extension('gff').__class__):
 13          -2 1,4,5,7 --gff2
 14      #else:
 15          -2 ${input2.metadata.chromCol},${input2.metadata.startCol},${input2.metadata.endCol},${input2.metadata.strandCol}
 16      #end if
 17  </command>
 18  <inputs>
 19    <param format="interval,gff" name="input1" type="data" label="For every interval in"/>
 20    <param format="interval,gff" name="input2" type="data" label="Fetch closest feature(s) from"/>
 21    <param name="direction" type="select" label="Located">
 22      <option value="Either">Either Upstream or Downstream</option>
 23      <option value="Both">Both Upstream and Downstream</option>
 24      <option value="Upstream">Upstream</option>
 25      <option value="Downstream">Downstream</option>
 26    </param>
 27  </inputs>
 28  <outputs>
 29    <data format="input" name="out_file1" metadata_source="input1"/>
 30  </outputs>
 31  <tests>
 32    <test>
 33      <param name="input1" value="4_windows.bed"/>
 34      <param name="input2" value="4_windows_2.bed"/>
 35      <param name="direction" value="Either"/>
 36      <output name="out_file1" file="closest_features_either.interval"/>
 37    </test>
 38    <test>
 39      <param name="input1" value="4_windows.bed"/>
 40      <param name="input2" value="4_windows_2.bed"/>
 41      <param name="direction" value="Both"/>
 42      <output name="out_file1" file="closest_features.interval"/>
 43    </test>
 44    <test>
 45      <param name="input1" value="4_windows.bed"/>
 46      <param name="input2" value="4_windows_2.bed"/>
 47      <param name="direction" value="Upstream"/>
 48      <output name="out_file1" file="closest_features_up.interval"/>
 49    </test>
 50    <test>
 51      <param name="input1" value="4_windows.bed"/>
 52      <param name="input2" value="4_windows_2.bed"/>
 53      <param name="direction" value="Downstream"/>
 54      <output name="out_file1" file="closest_features_down.interval"/>
 55    </test>
 56    <test>
 57      <param name="input1" value="4_windows.bed"/>
 58      <param name="input2" value="4_windows_3.bed"/>
 59      <param name="direction" value="Both"/>
 60      <output name="out_file1" file="closest_features_both.interval"/>
 61    </test>
 62    <!-- Tests for GFF functionality. -->
 63
 64    <test>
 65      <param name="input1" value="4_windows.bed"/>
 66      <param name="input2" value="4_windows_2.gff"/>
 67      <param name="direction" value="Either"/>
 68      <output name="out_file1" file="closest_features_both.gff"/>
 69    </test>
 70    <test>
 71      <param name="input1" value="4_windows.gff"/>
 72      <param name="input2" value="4_windows_2.gff"/>
 73      <param name="direction" value="Either"/>
 74      <output name="out_file1" file="closest_features_both2.gff"/>
 75    </test>
 76    
 77  </tests>
 78 <help> 
 79
 80.. class:: infomark
 81
 82**What it does**
 83
 84For every interval in the **interval** dataset, this tool fetches the **closest non-overlapping** upstream and / or downstream features from the **features** dataset.
 85
 86-----
 87
 88.. class:: warningmark
 89
 90**Note:** 
 91
 92Every line should contain at least 3 columns: chromosome number, start and stop coordinates. If any of these columns is missing or if start and stop coordinates are not numerical, the lines will be treated as invalid and skipped. The number of skipped lines is documented in the resulting history item as a "data issue".
 93
 94If the strand column is missing from your input interval dataset, the intervals will be considered to be on positive strand. You can add a strand column to your input dataset by using the *Text Manipulation->Add column* tool.
 95
 96For GFF files, features are added as a GTF-style attribute at the end of the line.
 97
 98-----
 99
100**Example**
101
102If the **intervals** are::
103
104   chr1 10   100  Query1.1
105   chr1 500  1000 Query1.2
106   chr1 1100 1250 Query1.3
107
108and the **features** are::
109
110   chr1 120  180  Query2.1
111   chr1 140  200  Query2.2
112   chr1 580  1050 Query2.3
113   chr1 2000 2204 Query2.4
114   chr1 2500 3000 Query2.5
115
116Running this tool for **Both Upstream and Downstream** will return::
117
118   chr1 10   100  Query1.1 chr1 120  180  Query2.1
119   chr1 500  1000 Query1.2 chr1 140  200  Query2.2
120   chr1 500  1000 Query1.2 chr1 2000 2204 Query2.4
121   chr1 1100 1250 Query1.3 chr1 580  1050 Query2.3
122   chr1 1100 1250 Query1.3 chr1 2000 2204 Query2.4
123
124</help>  
125
126
127</tool>