PageRenderTime 16ms CodeModel.GetById 8ms app.highlight 3ms RepoModel.GetById 1ms app.codeStats 0ms

/tools/gatk/analyze_covariates.xml

https://bitbucket.org/cistrome/cistrome-harvard/
XML | 101 lines | 84 code | 16 blank | 1 comment | 0 complexity | 008a34667c7b27c1087b3e9cd23a32cd MD5 | raw file
  1<tool id="gatk_analyze_covariates" name="Analyze Covariates" version="0.0.5">
  2  <description>- draw plots</description>
  3  <requirements>
  4      <requirement type="package" version="1.4">gatk</requirement>
  5  </requirements>
  6  <macros>
  7    <import>gatk_macros.xml</import>
  8  </macros>
  9  <command interpreter="python">gatk_wrapper.py
 10   --max_jvm_heap_fraction "1"
 11   --stdout "${output_log}"
 12   --html_report_from_directory "${output_html}" "${output_html.files_path}"
 13   -p 'java 
 14    -jar "${GALAXY_DATA_INDEX_DIR}/shared/jars/gatk/AnalyzeCovariates.jar"
 15    -recalFile "${input_recal}"
 16    -outputDir "${output_html.files_path}"
 17    ##--num_threads 4 ##hard coded, for now
 18    ##-log "${output_log}"
 19    ##-Rscript,--path_to_Rscript path_to_Rscript; on path is good enough         
 20    #if $analysis_param_type.analysis_param_type_selector == "advanced":
 21        --ignoreQ "${analysis_param_type.ignore_q}"
 22        --numRG "${analysis_param_type.num_read_groups}"
 23        --max_quality_score "${analysis_param_type.max_quality_score}"
 24        --max_histogram_value "${analysis_param_type.max_histogram_value}"
 25         ${analysis_param_type.do_indel_quality}
 26    #end if
 27   '
 28  </command>
 29  <inputs>
 30    <param name="input_recal" type="data" format="csv" label="Covariates table recalibration file" help="-recalFile,--recal_file &amp;lt;recal_file&amp;gt;" />
 31    <conditional name="analysis_param_type">
 32      <param name="analysis_param_type_selector" type="select" label="Basic or Advanced options">
 33        <option value="basic" selected="True">Basic</option>
 34        <option value="advanced">Advanced</option>
 35      </param>
 36      <when value="basic">
 37        <!-- Do nothing here -->
 38      </when>
 39      <when value="advanced">
 40        <param name="ignore_q" type="integer" value="5" label="Ignore bases with reported quality less than this number." help="-ignoreQ,--ignoreQ &amp;lt;ignoreQ&amp;gt; "/>
 41        <param name="num_read_groups" type="integer" value="-1" label="Only process N read groups." help="-numRG,--numRG &amp;lt;numRG&amp;gt;"/>
 42        <param name="max_quality_score" type="integer" value="50" label="Max quality score" help="-maxQ,--max_quality_score &amp;lt;max_quality_score&amp;gt;"/>
 43        <param name="max_histogram_value" type="integer" value="0" label="Max histogram value" help="-maxHist,--max_histogram_value &amp;lt;max_histogram_value&amp;gt;"/>
 44        <param name="do_indel_quality" type="boolean" truevalue="--do_indel_quality" falsevalue="" label="Do indel quality" help="--do_indel_quality"/>
 45      </when>
 46    </conditional>
 47  </inputs>
 48  <outputs>
 49    <data format="html" name="output_html" label="${tool.name} on ${on_string} (HTML)" />
 50    <data format="txt" name="output_log" label="${tool.name} on ${on_string} (log)" />
 51  </outputs>
 52  <tests>
 53      <test>
 54          <param name="input_recal" value="gatk/gatk_count_covariates/gatk_count_covariates_out_1.csv" ftype="csv" /> 
 55          <param name="analysis_param_type_selector" value="basic" />
 56          <output name="output_html" file="gatk/gatk_analyze_covariates/gatk_analyze_covariates_out_1.html" />
 57          <output name="output_log" file="gatk/gatk_analyze_covariates/gatk_analyze_covariates_out_1.log.contains" compare="contains" />
 58      </test>
 59  </tests>
 60  <help>
 61**What it does**
 62
 63Create collapsed versions of the recal csv file and call R scripts to plot residual error versus the various covariates.
 64
 65For more information on base quality score recalibration using the GATK, see this `tool specific page &lt;http://www.broadinstitute.org/gsa/wiki/index.php/Base_quality_score_recalibration&gt;`_.
 66
 67To learn about best practices for variant detection using GATK, see this `overview &lt;http://www.broadinstitute.org/gsa/wiki/index.php/Best_Practice_Variant_Detection_with_the_GATK_v3&gt;`_.
 68
 69If you encounter errors, please view the `GATK FAQ &lt;http://www.broadinstitute.org/gsa/wiki/index.php/Frequently_Asked_Questions&gt;`_.
 70
 71------
 72
 73**Inputs**
 74
 75GenomeAnalysisTK: AnalyzeCovariates accepts an recal CSV file.
 76
 77
 78**Outputs**
 79
 80The output is in CSV and HTML files with links to PDF graphs and a data files.
 81
 82
 83Go `here &lt;http://www.broadinstitute.org/gsa/wiki/index.php/Input_files_for_the_GATK&gt;`_ for details on GATK file formats.
 84
 85-------
 86
 87**Settings**::
 88
 89 recal_file             The input recal csv file to analyze
 90 output_dir             The directory in which to output all the plots and intermediate data files
 91 path_to_Rscript        The path to your implementation of Rscript. For Broad users this is maybe /broad/tools/apps/R-2.6.0/bin/Rscript
 92 path_to_resources      Path to resources folder holding the Sting R scripts.
 93 ignoreQ                Ignore bases with reported quality less than this number.
 94 numRG                  Only process N read groups. Default value: -1 (process all read groups)
 95 max_quality_score      The integer value at which to cap the quality scores, default is 50
 96 max_histogram_value    If supplied, this value will be the max value of the histogram plots
 97 do_indel_quality       If supplied, this value will be the max value of the histogram plots
 98
 99@CITATION_SECTION@
100  </help>
101</tool>