/tools/gatk/variant_apply_recalibration.xml

https://bitbucket.org/cistrome/cistrome-harvard/ · XML · 139 lines · 115 code · 22 blank · 2 comment · 0 complexity · 2e1de3a1d164f465a9a9db2367c795b4 MD5 · raw file

  1. <tool id="gatk_variant_apply_recalibration" name="Apply Variant Recalibration" version="0.0.4">
  2. <description></description>
  3. <requirements>
  4. <requirement type="package" version="1.4">gatk</requirement>
  5. </requirements>
  6. <macros>
  7. <import>gatk_macros.xml</import>
  8. </macros>
  9. <command interpreter="python">gatk_wrapper.py
  10. --max_jvm_heap_fraction "1"
  11. --stdout "${output_log}"
  12. #for $var_count, $variant in enumerate( $reference_source.variants ):
  13. -d "--input:input_${var_count},%(file_type)s" "${variant.input_variants}" "${variant.input_variants.ext}" "input_variants_${var_count}"
  14. #end for
  15. -p 'java
  16. -jar "${GALAXY_DATA_INDEX_DIR}/shared/jars/gatk/GenomeAnalysisTK.jar"
  17. -T "ApplyRecalibration"
  18. ##--num_threads 4 ##hard coded, for now
  19. -et "NO_ET" ##ET no phone home
  20. #if $reference_source.reference_source_selector != "history":
  21. -R "${reference_source.ref_file.fields.path}"
  22. #end if
  23. --recal_file "${reference_source.input_recal}"
  24. --tranches_file "${reference_source.input_tranches}"
  25. --out "${output_variants}"
  26. '
  27. #include source=$standard_gatk_options#
  28. ##start analysis specific options
  29. -p '
  30. --mode "${mode}"
  31. #for $ignore_filter in $ignore_filters:
  32. #set $ignore_filter_name = str( $ignore_filter.ignore_filter_type.ignore_filter_type_selector )
  33. #if $ignore_filter_name == "custom":
  34. #set $ignore_filter_name = str( $ignore_filter.ignore_filter_type.filter_name )
  35. #end if
  36. --ignore_filter "${ignore_filter_name}"
  37. #end for
  38. --ts_filter_level "${ts_filter_level}"
  39. '
  40. </command>
  41. <inputs>
  42. <conditional name="reference_source">
  43. <expand macro="reference_source_selector_param" />
  44. <when value="cached">
  45. <repeat name="variants" title="Variant" min="1" help="-input,--input &amp;lt;input&amp;gt;">
  46. <param name="input_variants" type="data" format="vcf" label="Variant file to annotate"/>
  47. </repeat>
  48. <param name="input_recal" type="data" format="gatk_recal" label="Variant Recalibration file" help="-recalFile,--recal_file &amp;lt;recal_file&amp;gt;" />
  49. <param name="input_tranches" type="data" format="gatk_tranche" label="Variant Tranches file" help="-tranchesFile,--tranches_file &amp;lt;tranches_file&amp;gt;" />
  50. <param name="ref_file" type="select" label="Using reference genome" help="-R,--reference_sequence &amp;lt;reference_sequence&amp;gt;">
  51. <options from_data_table="gatk_picard_indexes">
  52. <!-- <filter type="data_meta" key="dbkey" ref="variants[0].input_variants" column="dbkey"/> -->
  53. </options>
  54. <validator type="no_options" message="A built-in reference genome is not available for the build associated with the selected input file"/>
  55. </param>
  56. </when>
  57. <when value="history"> <!-- FIX ME!!!! -->
  58. <repeat name="variants" title="Variant" min="1" help="-input,--input &amp;lt;input&amp;gt;">
  59. <param name="input_variants" type="data" format="vcf" label="Variant file to annotate" />
  60. </repeat>
  61. <param name="input_recal" type="data" format="gatk_recal" label="Variant Recalibration file" help="-recalFile,--recal_file &amp;lt;recal_file&amp;gt;" />
  62. <param name="input_tranches" type="data" format="gatk_tranche" label="Variant Tranches file" help="-tranchesFile,--tranches_file &amp;lt;tranches_file&amp;gt;" />
  63. <param name="ref_file" type="data" format="fasta" label="Using reference file" help="-R,--reference_sequence &amp;lt;reference_sequence&amp;gt;" />
  64. </when>
  65. </conditional>
  66. <expand macro="gatk_param_type_conditional" />
  67. <param name="mode" type="select" label="Recalibration mode" help="-mode,--mode &amp;lt;mode&amp;gt;">
  68. <option value="SNP" selected="True">SNP</option>
  69. <option value="INDEL">INDEL</option>
  70. <option value="BOTH">BOTH</option>
  71. </param>
  72. <repeat name="ignore_filters" title="Ignore Filter" help="-ignoreFilter,--ignore_filter &amp;lt;ignore_filter&amp;gt;">
  73. <conditional name="ignore_filter_type">
  74. <param name="ignore_filter_type_selector" type="select" label="Filter Type">
  75. <option value="HARD_TO_VALIDATE">HARD_TO_VALIDATE</option>
  76. <option value="LowQual" >LowQual</option>
  77. <option value="custom" selected="True">Other</option>
  78. </param>
  79. <when value="custom">
  80. <param name="filter_name" type="text" value="" label="Filter name"/>
  81. </when>
  82. <when value="HARD_TO_VALIDATE" />
  83. <when value="LowQual" />
  84. </conditional>
  85. </repeat>
  86. <param name="ts_filter_level" type="float" label="truth sensitivity level at which to start filtering, used here to indicate filtered variants in plots" value="99.0" help="-ts_filter_level,--ts_filter_level &amp;lt;ts_filter_level&amp;gt;"/>
  87. </inputs>
  88. <outputs>
  89. <data format="vcf" name="output_variants" label="${tool.name} on ${on_string} (Variants File)" />
  90. <data format="txt" name="output_log" label="${tool.name} on ${on_string} (log)" />
  91. </outputs>
  92. <tests>
  93. <!-- ADD TESTS -->
  94. </tests>
  95. <help>
  96. **What it does**
  97. Applies cuts to the input vcf file (by adding filter lines) to achieve the desired novel FDR levels which were specified during VariantRecalibration
  98. For more information on using the ApplyRecalibration module, see this `tool specific page &lt;http://www.broadinstitute.org/gsa/wiki/index.php/Variant_quality_score_recalibration&gt;`_.
  99. To learn about best practices for variant detection using GATK, see this `overview &lt;http://www.broadinstitute.org/gsa/wiki/index.php/Best_Practice_Variant_Detection_with_the_GATK_v3&gt;`_.
  100. If you encounter errors, please view the `GATK FAQ &lt;http://www.broadinstitute.org/gsa/wiki/index.php/Frequently_Asked_Questions&gt;`_.
  101. ------
  102. **Inputs**
  103. GenomeAnalysisTK: ApplyRecalibration accepts a variant input file, a recalibration file and a tranches file.
  104. **Outputs**
  105. The output is in VCF format.
  106. Go `here &lt;http://www.broadinstitute.org/gsa/wiki/index.php/Input_files_for_the_GATK&gt;`_ for details on GATK file formats.
  107. -------
  108. **Settings**::
  109. recal_file The output recal file used by ApplyRecalibration
  110. tranches_file The input tranches file describing where to cut the data
  111. out The output filtered, recalibrated VCF file
  112. ts_filter_level The truth sensitivity level at which to start filtering
  113. ignore_filter If specified the optimizer will use variants even if the specified filter name is marked in the input VCF file
  114. mode Recalibration mode to employ: 1.) SNP for recalibrating only SNPs (emitting indels untouched in the output VCF); 2.) INDEL for indels; and 3.) BOTH for recalibrating both SNPs and indels simultaneously. (SNP|INDEL|BOTH)
  115. @CITATION_SECTION@
  116. </help>
  117. </tool>