PageRenderTime 21ms CodeModel.GetById 8ms app.highlight 8ms RepoModel.GetById 1ms app.codeStats 0ms

/tools/peak_calling/sicer_wrapper.xml

https://bitbucket.org/cistrome/cistrome-harvard/
XML | 178 lines | 167 code | 11 blank | 0 comment | 0 complexity | 19d8b0aaab9b93f96de33e4d235bc5a5 MD5 | raw file
  1<tool id="peakcalling_sicer" name="SICER" version="0.0.1">
  2  <description>Statistical approach for the Identification of ChIP-Enriched Regions</description>
  3  <command interpreter="python">sicer_wrapper.py 
  4  --bed_file '${input_bed_file}' 
  5  #if str( $input_control_file ) != 'None':
  6      --control_file '${input_control_file}'
  7      --significant_islands_output_file "${significant_islands_output_file}"
  8      --islands_summary_output_file "${islands_summary_output_file}"
  9      --significant_islands_summary_output_file "${significant_islands_summary_output_file}"
 10  #end if
 11  ${fix_off_by_one_errors}
 12  --dbkey '${input_bed_file.dbkey}'
 13  --redundancy_threshold '${redundancy_threshold}'
 14  --window_size '${window_size}'
 15  --fragment_size '${fragment_size}'
 16  --effective_genome_fraction '${effective_genome_fraction}'
 17  --gap_size '${gap_size}'
 18  --error_cut_off '${error_cut_off}'
 19  ##output files
 20  --stdout "${output_log_file}"
 21  --redundancy_removed_test_bed_output_file "${redundancy_removed_test_bed_output_file}"
 22  --redundancy_removed_control_bed_output_file "${redundancy_removed_control_bed_output_file}"
 23  --score_island_output_file "${score_island_output_file}"
 24  --summary_graph_output_file "${summary_graph_output_file}"
 25  --test_normalized_wig_output_file "${test_normalized_wig_output_file}"
 26  --island_filtered_output_file "${island_filtered_output_file}"
 27  --island_filtered_normalized_wig_output_file "${island_filtered_normalized_wig_output_file}"
 28  </command>
 29  <requirements>
 30    <requirement type="package" version="1.1">SICER</requirement>
 31  </requirements>
 32  <inputs>
 33    <param name="input_bed_file" type="data" format="bed" label="ChIP-Seq Tag File" >
 34      <validator type="expression" message="SICER is not available for the genome.">value is not None and value.dbkey in [ 'mm8', 'mm9', 'hg18', 'hg19', 'dm2', 'dm3', 'sacCer1', 'pombe', 'rn4', 'tair8' ]</validator>
 35    </param>
 36    <param name="input_control_file" type="data" format="bed" label="ChIP-Seq Control File" optional="True"> <!-- fix me, add filter to match dbkeys -->
 37      <options>
 38        <filter type="data_meta" ref="input_bed_file" key="dbkey" />
 39      </options>
 40    </param>
 41    <param name="fix_off_by_one_errors" type="boolean" truevalue="--fix_off_by_one_errors" falsevalue="" checked="True" label="Fix off-by-one errors in output files" help="SICER creates non-standard output files, this option will fix these coordinates"/> 
 42    <param name="redundancy_threshold" type="integer" label="Redundancy Threshold" value="1" help="The number of copies of identical reads allowed in a library" />
 43    <param name="window_size" type="integer" label="Window size" value="200" help="Resolution of SICER algorithm. For histone modifications, one can use 200 bp" />
 44    <param name="fragment_size" type="integer" label="Fragment size" value="150" help="for determination of the amount of shift from the beginning of a read to the center of the DNA fragment represented by the read. FRAGMENT_SIZE=150 means the shift is 75." />
 45    <param name="effective_genome_fraction" type="float" label="Effective genome fraction" value="0.74" help="Effective Genome as fraction of the genome size. It depends on read length." />
 46    <param name="gap_size" type="integer" label="Gap size" value="600" help="Needs to be multiples of window size. Namely if the window size is 200, the gap size should be 0, 200, 400, 600, ..." />
 47    <param name="error_cut_off" type="float" label="Statistic threshold value" value="0.01" help="FDR (with control) or E-value (without control)" />
 48  </inputs>
 49  <outputs>
 50    <data name="redundancy_removed_test_bed_output_file" format="bed" label="${tool.name} on ${on_string} (test-${redundancy_threshold}-removed.bed)"/>
 51    <data name="redundancy_removed_control_bed_output_file" format="bed" label="${tool.name} on ${on_string} (control-${redundancy_threshold}-removed.bed)">
 52      <filter>input_control_file is not None</filter>
 53    </data>
 54    <data name="summary_graph_output_file" format="bedgraph" label="${tool.name} on ${on_string} (test-W${window_size}.graph)"/>
 55    <data name="test_normalized_wig_output_file" format="wig" label="${tool.name} on ${on_string} (test-W${window_size}-normalized.wig)"/>
 56    <data name="significant_islands_output_file" format="interval" label="${tool.name} on ${on_string} (test-W${window_size}-G${gap_size}-FDR${error_cut_off}-island.bed)">
 57      <filter>input_control_file is not None</filter>
 58    </data>
 59    <data name="island_filtered_output_file" format="bed" label="${tool.name} on ${on_string} (#if str( $input_control_file ) != 'None' then ''.join( map( str, [ 'test-W', $window_size, '-G',$gap_size, '-FDR', $error_cut_off, '-islandfiltered.bed' ] ) ) else ''.join( map( str, [ 'test-W', $window_size, '-G', $gap_size, '-E', $error_cut_off, '-islandfiltered.bed' ] ) ) #)"/>
 60    <data name="island_filtered_normalized_wig_output_file" format="wig" label="${tool.name} on ${on_string} (#if str( $input_control_file ) != 'None' then ''.join( map( str, [ 'test-W', $window_size, '-G',$gap_size, '-FDR', $error_cut_off, '-islandfiltered-normalized.wig' ] ) ) else ''.join( map( str, [ 'test-W', $window_size, '-G', $gap_size, '-E', $error_cut_off, '-islandfiltered-normalized.wig' ] ) ) #)"/>
 61    <data name="score_island_output_file" format="interval" label="${tool.name} on ${on_string} (#if str( $input_control_file ) != 'None' then ''.join( map( str, [ 'test-W', $window_size, '-G',$gap_size, '.scoreisland' ] ) ) else ''.join( map( str, [ 'test-W', $window_size, '-G', $gap_size, '-E', $error_cut_off, '.scoreisland' ] ) ) #)"/>
 62    <data name="islands_summary_output_file" format="interval" label="${tool.name} on ${on_string} (test-W${window_size}-G${gap_size}-islands-summary)">
 63      <filter>input_control_file is not None</filter>
 64    </data>
 65    <data name="significant_islands_summary_output_file" format="interval" label="${tool.name} on ${on_string} (test-W${window_size}-G${gap_size}-islands-summary-FDR${error_cut_off})">
 66      <filter>input_control_file is not None</filter>
 67    </data>
 68    <data name="output_log_file" format="txt" label="${tool.name} on ${on_string} (log)"/>
 69  </outputs>
 70  <tests>
 71    <test>
 72      <param name="input_bed_file" value="chipseq_enriched.bed.gz" ftype="bed" dbkey="mm8" />
 73      <param name="input_control_file" />
 74      <param name="fix_off_by_one_errors" />
 75      <param name="redundancy_threshold" value="1" />
 76      <param name="window_size" value="200" />
 77      <param name="fragment_size" value="150" />
 78      <param name="effective_genome_fraction" value="0.74" />
 79      <param name="gap_size" value="600" />
 80      <param name="error_cut_off" value="0.01" />
 81      <output name="redundancy_removed_test_bed_output_file" file="peakcalling_sicer/test_1/test-1-removed.bed" />
 82      <output name="summary_graph_output_file" file="peakcalling_sicer/test_1/test-W200.graph" />
 83      <output name="test_normalized_wig_output_file" file="peakcalling_sicer/test_1/test-W200-normalized.wig" />
 84      <output name="island_filtered_output_file" file="peakcalling_sicer/test_1/test-W200-G600-E0.01-islandfiltered.bed" />
 85      <output name="island_filtered_normalized_wig_output_file" file="peakcalling_sicer/test_1/test-W200-G600-E0.01-islandfiltered-normalized.wig" />
 86      <output name="score_island_output_file" file="peakcalling_sicer/test_1/test-W200-G600-E0.01.scoreisland" />
 87      <output name="output_log_file" file="peakcalling_sicer/test_1/output_log_file.contains" compare="contains"/>
 88    </test>
 89    <test>
 90      <param name="input_bed_file" value="chipseq_enriched.bed.gz" ftype="bed" dbkey="mm8" />
 91      <param name="input_control_file" value="chipseq_input.bed.gz" ftype="bed" dbkey="mm8" />
 92      <param name="fix_off_by_one_errors" />
 93      <param name="redundancy_threshold" value="1" />
 94      <param name="window_size" value="200" />
 95      <param name="fragment_size" value="150" />
 96      <param name="effective_genome_fraction" value="0.74" />
 97      <param name="gap_size" value="600" />
 98      <param name="error_cut_off" value="0.01" />
 99      <output name="redundancy_removed_test_bed_output_file" file="peakcalling_sicer/test_2/test-1-removed.bed" />
100      <output name="redundancy_removed_control_bed_output_file" file="peakcalling_sicer/test_2/control-1-removed.bed" />
101      <output name="summary_graph_output_file" file="peakcalling_sicer/test_2/test-W200.graph" />
102      <output name="test_normalized_wig_output_file" file="peakcalling_sicer/test_2/test-W200-normalized.wig" />
103      <output name="significant_islands_output_file" file="peakcalling_sicer/test_2/test-W200-G600-FDR0.01-island.bed" />
104      <output name="island_filtered_output_file" file="peakcalling_sicer/test_2/test-W200-G600-FDR0.01-islandfiltered.bed" />
105      <output name="island_filtered_normalized_wig_output_file" file="peakcalling_sicer/test_2/test-W200-G600-FDR0.01-islandfiltered-normalized.wig" />
106      <output name="score_island_output_file" file="peakcalling_sicer/test_2/test-W200-G600.scoreisland" />
107      <output name="islands_summary_output_file" file="peakcalling_sicer/test_2/test-W200-G600-islands-summary" />
108      <output name="significant_islands_summary_output_file" file="peakcalling_sicer/test_2/test-W200-G600-islands-summary-FDR0.01" />
109      <output name="output_log_file" file="peakcalling_sicer/test_2/output_log_file.contains" compare="contains"/>
110    </test>
111    <test>
112      <param name="input_bed_file" value="chipseq_enriched.bed.gz" ftype="bed" dbkey="mm8" />
113      <param name="input_control_file" value="chipseq_input.bed.gz" ftype="bed" dbkey="mm8" />
114      <param name="fix_off_by_one_errors" value="True" />
115      <param name="redundancy_threshold" value="1" />
116      <param name="window_size" value="200" />
117      <param name="fragment_size" value="150" />
118      <param name="effective_genome_fraction" value="0.74" />
119      <param name="gap_size" value="600" />
120      <param name="error_cut_off" value="0.01" />
121      <output name="redundancy_removed_test_bed_output_file" file="peakcalling_sicer/test_2/test-1-removed.bed" />
122      <output name="redundancy_removed_control_bed_output_file" file="peakcalling_sicer/test_2/control-1-removed.bed" />
123      <output name="summary_graph_output_file" file="peakcalling_sicer/test_3/test-W200.graph" />
124      <output name="test_normalized_wig_output_file" file="peakcalling_sicer/test_2/test-W200-normalized.wig" />
125      <output name="significant_islands_output_file" file="peakcalling_sicer/test_3/test-W200-G600-FDR0.01-island.bed" />
126      <output name="island_filtered_output_file" file="peakcalling_sicer/test_2/test-W200-G600-FDR0.01-islandfiltered.bed" />
127      <output name="island_filtered_normalized_wig_output_file" file="peakcalling_sicer/test_2/test-W200-G600-FDR0.01-islandfiltered-normalized.wig" />
128      <output name="score_island_output_file" file="peakcalling_sicer/test_3/test-W200-G600.scoreisland" />
129      <output name="islands_summary_output_file" file="peakcalling_sicer/test_3/test-W200-G600-islands-summary" />
130      <output name="significant_islands_summary_output_file" file="peakcalling_sicer/test_3/test-W200-G600-islands-summary-FDR0.01" />
131      <output name="output_log_file" file="peakcalling_sicer/test_2/output_log_file.contains" compare="contains"/>
132    </test>
133    <test>
134      <param name="input_bed_file" value="chipseq_enriched.bed.gz" ftype="bed" dbkey="mm8" />
135      <param name="input_control_file" />
136      <param name="fix_off_by_one_errors" value="True" />
137      <param name="redundancy_threshold" value="1" />
138      <param name="window_size" value="200" />
139      <param name="fragment_size" value="150" />
140      <param name="effective_genome_fraction" value="0.74" />
141      <param name="gap_size" value="600" />
142      <param name="error_cut_off" value="0.01" />
143      <output name="redundancy_removed_test_bed_output_file" file="peakcalling_sicer/test_1/test-1-removed.bed" />
144      <output name="summary_graph_output_file" file="peakcalling_sicer/test_4/test-W200.graph" />
145      <output name="test_normalized_wig_output_file" file="peakcalling_sicer/test_1/test-W200-normalized.wig" />
146      <output name="island_filtered_output_file" file="peakcalling_sicer/test_1/test-W200-G600-E0.01-islandfiltered.bed" />
147      <output name="island_filtered_normalized_wig_output_file" file="peakcalling_sicer/test_1/test-W200-G600-E0.01-islandfiltered-normalized.wig" />
148      <output name="score_island_output_file" file="peakcalling_sicer/test_4/test-W200-G600-E0.01.scoreisland" />
149      <output name="output_log_file" file="peakcalling_sicer/test_1/output_log_file.contains" compare="contains"/>
150    </test>
151  </tests>
152  <help>
153**What it does**
154
155SICER first and foremost is a filtering tool. Its main functions are::
156  
157  1. Delineation of the significantly ChIP-enriched regions, which can be used to associate with other genomic landmarks. 
158  2. Identification of reads on the ChIP-enriched regions, which can be used for profiling and other quantitative analysis.
159
160View the original SICER documentation: http://home.gwu.edu/~wpeng/Software.htm.
161
162------
163
164.. class:: warningmark
165
166  By default, SICER creates files that do not conform to standards (e.g. BED files are closed, not half-open). This could have implications for downstream analysis.
167  To force the output of SICER to be formatted properly to standard file formats, check the **"Fix off-by-one errors in output files"** option.
168
169------
170
171**Citation**
172
173For the underlying tool, please cite `Zang C, Schones DE, Zeng C, Cui K, Zhao K, Peng W. A clustering approach for identification of enriched domains from histone modification ChIP-Seq data. Bioinformatics. 2009 Aug 1;25(15):1952-8. &lt;http://www.ncbi.nlm.nih.gov/pubmed/19505939&gt;`_
174
175If you use this tool in Galaxy, please cite Blankenberg D, et al. *In preparation.*
176
177  </help>
178</tool>