PageRenderTime 11ms CodeModel.GetById 1ms app.highlight 6ms RepoModel.GetById 1ms app.codeStats 0ms

/tools/stats/grouping.xml

https://bitbucket.org/cistrome/cistrome-harvard/
XML | 142 lines | 105 code | 16 blank | 21 comment | 0 complexity | ae39d0d234e26043f4dd2d8e2995d94e MD5 | raw file
  1<tool id="Grouping1" name="Group" version="2.1.0">
  2  <description>data by a column and perform aggregate operation on other columns.</description>
  3  <command interpreter="python">
  4    grouping.py 
  5      $out_file1
  6      $input1
  7      $groupcol
  8      $ignorecase
  9      $ignorelines
 10      #for $op in $operations
 11       '${op.optype}
 12        ${op.opcol}
 13        ${op.opround}'
 14      #end for
 15  </command>
 16  <inputs>
 17    <param format="tabular" name="input1" type="data" label="Select data" help="Dataset missing? See TIP below."/>
 18    <param name="groupcol" label="Group by column" type="data_column" data_ref="input1" />
 19    <param name="ignorecase" type="boolean" truevalue="1" falsevalue="0">
 20      <label>Ignore case while grouping?</label>
 21    </param>
 22    <param name="ignorelines" type="select" display="checkboxes" multiple="True" label="Ignore lines beginning with these characters" help="lines beginning with these are not grouped">
 23            <option value="62">&gt;</option>
 24            <option value="64">@</option>
 25            <option value="43">+</option>
 26            <option value="60">&lt;</option>
 27            <option value="42">*</option>
 28            <option value="45">-</option>
 29            <option value="61">=</option>
 30            <option value="124">|</option>
 31            <option value="63">?</option>
 32            <option value="36">$</option>
 33            <option value="46">.</option>
 34            <option value="58">:</option>
 35            <option value="38">&amp;</option>
 36            <option value="37">%</option>
 37            <option value="94">^</option>
 38            <option value="35">&#35;</option>
 39    </param>
 40    <repeat name="operations" title="Operation">
 41      <param name="optype" type="select" label="Type">
 42        <option value="mean">Mean</option>
 43        <option value="median">Median</option>
 44        <option value="mode">Mode</option>
 45        <option value="max">Maximum</option>
 46        <option value="min">Minimum</option>
 47        <option value="sum">Sum</option>
 48        <option value="length">Count</option>
 49        <option value="unique">Count Distinct</option>
 50        <option value="cat">Concatenate</option>
 51        <option value="cat_uniq">Concatenate Distinct</option>
 52        <option value="random">Randomly pick</option>
 53        <option value="std">Standard deviation</option>
 54      </param>
 55      <param name="opcol" label="On column" type="data_column" data_ref="input1" />
 56      <param name="opround" type="select" label="Round result to nearest integer?">
 57         <option value="no">NO</option>
 58         <option value="yes">YES</option>
 59       </param>
 60    </repeat>
 61  </inputs>
 62  <outputs>
 63    <data format="tabular" name="out_file1" />
 64  </outputs>
 65  <requirements>
 66    <requirement type="python-module">numpy</requirement>
 67  </requirements>
 68  <tests>
 69    <!-- Test valid data -->
 70    <test>
 71      <param name="input1" value="1.bed"/>
 72      <param name="groupcol" value="1"/>
 73      <param name="ignorecase" value="true"/>
 74      <param name="optype" value="mean"/>
 75      <param name="opcol" value="2"/>
 76      <param name="opround" value="no"/>
 77      <output name="out_file1" file="groupby_out1.dat"/>
 78    </test>
 79    <!-- Long case but test framework doesn't allow yet
 80    <test>
 81      <param name="input1" value="1.bed"/>
 82      <param name="groupcol" value="1"/>
 83      <param name="ignorecase" value="false"/>
 84      <param name="operations" value='[{"opcol": "2", "__index__": 0, "optype": "mean", "opround": "no"}, {"opcol": "2", "__index__": 1, "optype": "median", "opround": "no"}, {"opcol": "6", "__index__": 2, "optype": "mode", "opround": "no"}, {"opcol": "2", "__index__": 3, "optype": "max", "opround": "no"}, {"opcol": "2", "__index__": 4, "optype": "min", "opround": "no"}, {"opcol": "2", "__index__": 5, "optype": "sum", "opround": "no"}, {"opcol": "1", "__index__": 6, "optype": "length", "opround": "no"}, {"opcol": "1", "__index__": 7, "optype": "unique", "opround": "no"}, {"opcol": "1", "__index__": 8, "optype": "cat", "opround": "no"}, {"opcol": "6", "__index__": 9, "optype": "cat_uniq", "opround": "no"}, {"opcol": "2", "__index__": 10, "optype": "random", "opround": "no"}, {"opcol": "2", "__index__": 11, "optype": "std", "opround": "no"}]'/>
 85      <output name="out_file1" file="groupby_out3.tabular"/>
 86    </test>
 87    -->
 88    <!-- Test data with an invalid value in a column. Can't do it because test framework doesn't allow testing of errors
 89    <test>
 90      <param name="input1" value="1.tabular"/>
 91      <param name="groupcol" value="1"/>
 92      <param name="ignorecase" value="true"/>
 93      <param name="optype" value="mean"/>
 94      <param name="opcol" value="2"/>
 95      <param name="opround" value="no"/>
 96      <output name="out_file1" file="groupby_out2.dat"/>
 97    </test>
 98     -->
 99  </tests>
100  <help>
101
102.. class:: infomark
103
104**TIP:** If your data is not TAB delimited, use *Text Manipulation-&gt;Convert*
105
106-----
107
108**Syntax**
109
110This tool allows you to group the input dataset by a particular column and perform aggregate functions: Mean, Median, Mode, Sum, Max, Min, Count, Concatenate, and Randomly pick on any column(s).
111
112The Concatenate function will take, for each group, each item in the specified column and build a comma delimited list. Concatenate Unique will do the same but will build a list of unique items with no repetition.
113
114Count and Count Unique are equivalent to Concatenate and Concatenate Unique, but will only count the number of items and will return an integer.
115
116- If multiple modes are present, all are reported.
117
118-----
119
120**Example**
121
122- For the following input::
123
124   chr22  1000  1003  TTT
125   chr22  2000  2003  aaa
126   chr10  2200  2203  TTT
127   chr10  1200  1203  ttt
128   chr22  1600  1603  AAA
129
130- **Grouping on column 4** while ignoring case, and performing operation **Count on column 1** will return::
131
132   AAA    2
133   TTT    3
134   
135- **Grouping on column 4** while not ignoring case, and performing operation **Count on column 1** will return::
136
137   aaa    1
138   AAA    1
139   ttt    1
140   TTT    2
141  </help>
142</tool>