PageRenderTime 27ms CodeModel.GetById 15ms RepoModel.GetById 1ms app.codeStats 0ms

/tools/gatk/variant_filtration.xml

https://bitbucket.org/cistrome/cistrome-harvard/
XML | 182 lines | 150 code | 30 blank | 2 comment | 0 complexity | b482f0d0fa968dc9ca01c2ecc52a24df MD5 | raw file
  1. <tool id="gatk_variant_filtration" name="Variant Filtration" version="0.0.5">
  2. <description>on VCF files</description>
  3. <requirements>
  4. <requirement type="package" version="1.4">gatk</requirement>
  5. </requirements>
  6. <macros>
  7. <import>gatk_macros.xml</import>
  8. </macros>
  9. <command interpreter="python">gatk_wrapper.py
  10. #from binascii import hexlify
  11. --max_jvm_heap_fraction "1"
  12. --stdout "${output_log}"
  13. -d "--variant:variant,%(file_type)s" "${reference_source.input_variant}" "${reference_source.input_variant.ext}" "input_variant"
  14. -p 'java
  15. -jar "${GALAXY_DATA_INDEX_DIR}/shared/jars/gatk/GenomeAnalysisTK.jar"
  16. -T "VariantFiltration"
  17. ##--num_threads 4 ##hard coded, for now
  18. -et "NO_ET" ##ET no phone home
  19. -o "${output_vcf}"
  20. ##-log "${output_log}" ##don't use this to log to file, instead directly capture stdout
  21. #if $reference_source.reference_source_selector != "history":
  22. -R "${reference_source.ref_file.fields.path}"
  23. #end if
  24. '
  25. #for $variant_filter in $variant_filters:
  26. #set $variant_filter = "--%sExpression '%s' --%sName '%s'" % ( str( $variant_filter.is_genotype_filter ), str( $variant_filter.filter_expression ), str( $variant_filter.is_genotype_filter ), str( $variant_filter.filter_name ) )
  27. -o '${ hexlify( $variant_filter ) }'
  28. #end for
  29. #if str( $mask_rod_bind_type.mask_rod_bind_type_selector ) == 'set_mask':
  30. -d "--mask:${mask_rod_bind_type.mask_rod_name},%(file_type)s" "${mask_rod_bind_type.input_mask_rod}" "${mask_rod_bind_type.input_mask_rod.ext}" "input_mask_${mask_rod_bind_type.mask_rod_name}"
  31. -p '
  32. --maskExtension "${mask_rod_bind_type.mask_extension}"
  33. --maskName "${mask_rod_bind_type.mask_rod_name}"
  34. '
  35. #end if
  36. #include source=$standard_gatk_options#
  37. ##start analysis specific options
  38. #if $cluster_snp_type.cluster_snp_type_selector == "cluster_snp":
  39. -p '
  40. --clusterSize "${cluster_snp_type.cluster_size}"
  41. --clusterWindowSize "${cluster_snp_type.cluster_window_size}"
  42. '
  43. #end if
  44. -p '${missing_values_in_expressions_should_evaluate_as_failing}'
  45. </command>
  46. <inputs>
  47. <conditional name="reference_source">
  48. <expand macro="reference_source_selector_param" />
  49. <when value="cached">
  50. <param name="input_variant" type="data" format="vcf" label="Variant file to annotate" help="-V,--variant &amp;lt;variant&amp;gt;" />
  51. <param name="ref_file" type="select" label="Using reference genome" help="-R,--reference_sequence &amp;lt;reference_sequence&amp;gt;">
  52. <options from_data_table="gatk_picard_indexes">
  53. <filter type="data_meta" key="dbkey" ref="input_variant" column="dbkey"/>
  54. </options>
  55. <validator type="no_options" message="A built-in reference genome is not available for the build associated with the selected input file"/>
  56. </param>
  57. </when>
  58. <when value="history"> <!-- FIX ME!!!! -->
  59. <param name="input_variant" type="data" format="vcf" label="Variant file to annotate" help="-V,--variant &amp;lt;variant&amp;gt;" />
  60. <param name="ref_file" type="data" format="fasta" label="Using reference file" help="-R,--reference_sequence &amp;lt;reference_sequence&amp;gt;" />
  61. </when>
  62. </conditional>
  63. <repeat name="variant_filters" title="Variant Filters">
  64. <param name="filter_expression" value="AB &lt; 0.2 || MQ0 &gt; 50" type="text" label="Filter expression" help="JEXL formatted expressions (-filter,--filterExpression &amp;lt;filterExpression&amp;gt;)">
  65. <sanitizer>
  66. <valid initial="string.printable">
  67. <remove value="&apos;"/>
  68. </valid>
  69. <mapping initial="none"/>
  70. </sanitizer>
  71. </param>
  72. <param name="filter_name" value="custom_filter" type="text" label="Filter name" help="-filterName,--filterName &amp;lt;filterName&amp;gt;"/>
  73. <param name="is_genotype_filter" type="boolean" truevalue="genotypeFilter" falsevalue="filter" label="Use filter at the individual sample level" help="Use -G_filter,--genotypeFilterExpression &amp;lt;genotypeFilterExpression&amp;gt; and -G_filterName,--genotypeFilterName &amp;lt;genotypeFilterName&amp;gt; for filter type" />
  74. </repeat>
  75. <conditional name="mask_rod_bind_type">
  76. <param name="mask_rod_bind_type_selector" type="select" label="Provide a Mask reference-ordered data file">
  77. <option value="set_mask" selected="True">Set maskP</option>
  78. <option value="exclude_mask">Don't set mask</option>
  79. </param>
  80. <when value="exclude_mask">
  81. <!-- Do nothing here -->
  82. </when>
  83. <when value="set_mask">
  84. <param name="input_mask_rod" type="data" format="bed,gatk_dbsnp,vcf" label="Mask ROD file" help="--mask &amp;lt;mask&amp;gt;" />
  85. <param name="mask_rod_name" type="text" value="Mask" label="Mask Name" help="-maskName,--maskName &amp;lt;maskName&amp;gt;"/>
  86. <param name="mask_extension" type="integer" value="0" label="Mask Extension" help="-maskExtend,--maskExtension &amp;lt;maskExtension&amp;gt;"/>
  87. </when>
  88. </conditional>
  89. <expand macro="gatk_param_type_conditional" />
  90. <conditional name="cluster_snp_type">
  91. <param name="cluster_snp_type_selector" type="select" label="Cluster SNPs">
  92. <option value="cluster_snp">Cluster SNPs</option>
  93. <option value="do_not_cluster_snp" selected="True">Do not cluster SNPs</option>
  94. </param>
  95. <when value="do_not_cluster_snp">
  96. <!-- Do nothing here -->
  97. </when>
  98. <when value="cluster_snp">
  99. <param name="cluster_size" type="integer" value="3" label="The number of SNPs which make up a cluster" help="-cluster,--clusterSize &amp;lt;clusterSize&amp;gt;"/>
  100. <param name="cluster_window_size" type="integer" value="0" label="The window size (in bases) in which to evaluate clustered SNPs" help="-window,--clusterWindowSize &amp;lt;clusterWindowSize&amp;gt;"/>
  101. </when>
  102. </conditional>
  103. <param name="missing_values_in_expressions_should_evaluate_as_failing" type="boolean" truevalue="--missingValuesInExpressionsShouldEvaluateAsFailing" falsevalue="" label="Should missing values be considered failing the expression" help="--missingValuesInExpressionsShouldEvaluateAsFailing" />
  104. </inputs>
  105. <outputs>
  106. <data format="vcf" name="output_vcf" label="${tool.name} on ${on_string} (Variant File)" />
  107. <data format="txt" name="output_log" label="${tool.name} on ${on_string} (log)" />
  108. </outputs>
  109. <tests>
  110. <test>
  111. <param name="reference_source_selector" value="history" />
  112. <param name="ref_file" value="phiX.fasta" ftype="fasta" />
  113. <param name="input_variant" value="gatk/gatk_variant_annotator/gatk_variant_annotator_out_1.vcf" ftype="vcf" />
  114. <param name="filter_expression" value="MQ &lt; 37.74 || MQ0 &gt; 50" />
  115. <param name="filter_name" value="Galaxy_filter" />
  116. <param name="is_genotype_filter" />
  117. <param name="mask_rod_bind_type_selector" value="set_mask" />
  118. <param name="input_mask_rod" value="gatk/fake_phiX_variant_locations.bed" ftype="bed" />
  119. <param name="mask_rod_name" value="." />
  120. <param name="mask_extension" value="0" />
  121. <param name="gatk_param_type_selector" value="basic" />
  122. <param name="cluster_snp_type_selector" value="do_not_cluster_snp" />
  123. <param name="missing_values_in_expressions_should_evaluate_as_failing" />
  124. <output name="output_vcf" file="gatk/gatk_variant_annotator/gatk_variant_annotator_out_1.vcf" lines_diff="4" />
  125. <output name="output_log" file="gatk/gatk_variant_filtration/gatk_variant_filtration_out_1.log.contains" compare="contains" />
  126. </test>
  127. </tests>
  128. <help>
  129. **What it does**
  130. Filters variant calls using a number of user-selectable, parameterizable criteria.
  131. For more information on using the VariantFiltration module, see this `tool specific page &lt;http://www.broadinstitute.org/gsa/wiki/index.php/VariantFiltrationWalker&gt;`_.
  132. To learn about best practices for variant detection using GATK, see this `overview &lt;http://www.broadinstitute.org/gsa/wiki/index.php/Best_Practice_Variant_Detection_with_the_GATK_v3&gt;`_.
  133. If you encounter errors, please view the `GATK FAQ &lt;http://www.broadinstitute.org/gsa/wiki/index.php/Frequently_Asked_Questions&gt;`_.
  134. ------
  135. **Inputs**
  136. GenomeAnalysisTK: VariantFiltration accepts a VCF input file.
  137. **Outputs**
  138. The output is in VCF format.
  139. Go `here &lt;http://www.broadinstitute.org/gsa/wiki/index.php/Input_files_for_the_GATK&gt;`_ for details on GATK file formats.
  140. -------
  141. **Settings**::
  142. filterExpression One or more expression used with INFO fields to filter (see wiki docs for more info)
  143. filterName Names to use for the list of filters (must be a 1-to-1 mapping); this name is put in the FILTER field for variants that get filtered
  144. genotypeFilterExpression One or more expression used with FORMAT (sample/genotype-level) fields to filter (see wiki docs for more info)
  145. genotypeFilterName Names to use for the list of sample/genotype filters (must be a 1-to-1 mapping); this name is put in the FILTER field for variants that get filtered
  146. clusterSize The number of SNPs which make up a cluster (see also --clusterWindowSize); [default:3]
  147. clusterWindowSize The window size (in bases) in which to evaluate clustered SNPs (to disable the clustered SNP filter, set this value to less than 1); [default:0]
  148. maskName The text to put in the FILTER field if a 'mask' rod is provided and overlaps with a variant call; [default:'Mask']
  149. missingValuesInExpressionsShouldEvaluateAsFailing When evaluating the JEXL expressions, should missing values be considered failing the expression (by default they are considered passing)?
  150. @CITATION_SECTION@
  151. </help>
  152. </tool>