PageRenderTime 21ms CodeModel.GetById 16ms app.highlight 2ms RepoModel.GetById 1ms app.codeStats 0ms

/tools/gatk/variant_filtration.xml

https://bitbucket.org/cistrome/cistrome-harvard/
XML | 182 lines | 150 code | 30 blank | 2 comment | 0 complexity | b482f0d0fa968dc9ca01c2ecc52a24df MD5 | raw file
  1<tool id="gatk_variant_filtration" name="Variant Filtration" version="0.0.5">
  2  <description>on VCF files</description>
  3  <requirements>
  4      <requirement type="package" version="1.4">gatk</requirement>
  5  </requirements>
  6  <macros>
  7    <import>gatk_macros.xml</import>
  8  </macros>
  9  <command interpreter="python">gatk_wrapper.py
 10   #from binascii import hexlify
 11   --max_jvm_heap_fraction "1"
 12   --stdout "${output_log}"
 13   -d "--variant:variant,%(file_type)s" "${reference_source.input_variant}" "${reference_source.input_variant.ext}" "input_variant"
 14   -p 'java 
 15    -jar "${GALAXY_DATA_INDEX_DIR}/shared/jars/gatk/GenomeAnalysisTK.jar"
 16    -T "VariantFiltration"
 17    ##--num_threads 4 ##hard coded, for now
 18    -et "NO_ET" ##ET no phone home
 19    -o "${output_vcf}"
 20    ##-log "${output_log}" ##don't use this to log to file, instead directly capture stdout
 21    #if $reference_source.reference_source_selector != "history":
 22        -R "${reference_source.ref_file.fields.path}"
 23    #end if
 24    '
 25    #for $variant_filter in $variant_filters:
 26        #set $variant_filter = "--%sExpression '%s' --%sName '%s'" % ( str( $variant_filter.is_genotype_filter ), str( $variant_filter.filter_expression ), str( $variant_filter.is_genotype_filter ), str( $variant_filter.filter_name )  )
 27        -o '${ hexlify( $variant_filter ) }'
 28    #end for
 29    
 30    #if str( $mask_rod_bind_type.mask_rod_bind_type_selector ) == 'set_mask':
 31        -d "--mask:${mask_rod_bind_type.mask_rod_name},%(file_type)s" "${mask_rod_bind_type.input_mask_rod}" "${mask_rod_bind_type.input_mask_rod.ext}" "input_mask_${mask_rod_bind_type.mask_rod_name}"
 32        -p '
 33        --maskExtension "${mask_rod_bind_type.mask_extension}"
 34        --maskName "${mask_rod_bind_type.mask_rod_name}"
 35        '
 36    #end if
 37    
 38    #include source=$standard_gatk_options#
 39    
 40    ##start analysis specific options
 41    #if $cluster_snp_type.cluster_snp_type_selector == "cluster_snp":
 42        -p '
 43        --clusterSize "${cluster_snp_type.cluster_size}"
 44        --clusterWindowSize "${cluster_snp_type.cluster_window_size}"
 45        '
 46    #end if
 47    -p '${missing_values_in_expressions_should_evaluate_as_failing}'
 48  </command>
 49  <inputs>
 50    <conditional name="reference_source">
 51      <expand macro="reference_source_selector_param" />
 52      <when value="cached">
 53        <param name="input_variant" type="data" format="vcf" label="Variant file to annotate" help="-V,--variant &amp;lt;variant&amp;gt;" />
 54        <param name="ref_file" type="select" label="Using reference genome" help="-R,--reference_sequence &amp;lt;reference_sequence&amp;gt;">
 55          <options from_data_table="gatk_picard_indexes">
 56            <filter type="data_meta" key="dbkey" ref="input_variant" column="dbkey"/>
 57          </options>
 58          <validator type="no_options" message="A built-in reference genome is not available for the build associated with the selected input file"/>
 59        </param>
 60      </when>
 61      <when value="history"> <!-- FIX ME!!!! -->
 62        <param name="input_variant" type="data" format="vcf" label="Variant file to annotate" help="-V,--variant &amp;lt;variant&amp;gt;" />
 63        <param name="ref_file" type="data" format="fasta" label="Using reference file" help="-R,--reference_sequence &amp;lt;reference_sequence&amp;gt;" />
 64      </when>
 65    </conditional>
 66    
 67    
 68    <repeat name="variant_filters" title="Variant Filters">
 69        <param name="filter_expression" value="AB &lt; 0.2 || MQ0 &gt; 50" type="text" label="Filter expression" help="JEXL formatted expressions (-filter,--filterExpression &amp;lt;filterExpression&amp;gt;)">
 70            <sanitizer>
 71              <valid initial="string.printable">
 72               <remove value="&apos;"/>
 73             </valid>
 74              <mapping initial="none"/>
 75            </sanitizer>
 76        </param>
 77        <param name="filter_name" value="custom_filter" type="text" label="Filter name" help="-filterName,--filterName &amp;lt;filterName&amp;gt;"/>
 78        <param name="is_genotype_filter" type="boolean" truevalue="genotypeFilter" falsevalue="filter" label="Use filter at the individual sample level" help="Use -G_filter,--genotypeFilterExpression &amp;lt;genotypeFilterExpression&amp;gt; and -G_filterName,--genotypeFilterName &amp;lt;genotypeFilterName&amp;gt; for filter type" />
 79    </repeat>
 80    
 81
 82    
 83    <conditional name="mask_rod_bind_type">
 84      <param name="mask_rod_bind_type_selector" type="select" label="Provide a Mask reference-ordered data file">
 85        <option value="set_mask" selected="True">Set maskP</option>
 86        <option value="exclude_mask">Don't set mask</option>
 87      </param>
 88      <when value="exclude_mask">
 89        <!-- Do nothing here -->
 90      </when>
 91      <when value="set_mask">
 92        <param name="input_mask_rod" type="data" format="bed,gatk_dbsnp,vcf" label="Mask ROD file" help="--mask &amp;lt;mask&amp;gt;" />
 93        <param name="mask_rod_name" type="text" value="Mask" label="Mask Name" help="-maskName,--maskName &amp;lt;maskName&amp;gt;"/>
 94        <param name="mask_extension" type="integer" value="0" label="Mask Extension" help="-maskExtend,--maskExtension &amp;lt;maskExtension&amp;gt;"/>
 95      </when>
 96    </conditional>    
 97    
 98    
 99    <expand macro="gatk_param_type_conditional" />
100    
101    <conditional name="cluster_snp_type">
102      <param name="cluster_snp_type_selector" type="select" label="Cluster SNPs">
103        <option value="cluster_snp">Cluster SNPs</option>
104        <option value="do_not_cluster_snp" selected="True">Do not cluster SNPs</option>
105      </param>
106      <when value="do_not_cluster_snp">
107        <!-- Do nothing here -->
108      </when>
109      <when value="cluster_snp">
110        <param name="cluster_size" type="integer" value="3" label="The number of SNPs which make up a cluster" help="-cluster,--clusterSize &amp;lt;clusterSize&amp;gt;"/>
111        <param name="cluster_window_size" type="integer" value="0" label="The window size (in bases) in which to evaluate clustered SNPs" help="-window,--clusterWindowSize &amp;lt;clusterWindowSize&amp;gt;"/>
112      </when>
113    </conditional>
114    
115    <param name="missing_values_in_expressions_should_evaluate_as_failing" type="boolean" truevalue="--missingValuesInExpressionsShouldEvaluateAsFailing" falsevalue="" label="Should missing values be considered failing the expression" help="--missingValuesInExpressionsShouldEvaluateAsFailing" />
116    
117  </inputs>
118  <outputs>
119    <data format="vcf" name="output_vcf" label="${tool.name} on ${on_string} (Variant File)" />
120    <data format="txt" name="output_log" label="${tool.name} on ${on_string} (log)" />
121  </outputs>
122  <tests>
123      <test>
124          <param name="reference_source_selector" value="history" />
125          <param name="ref_file" value="phiX.fasta" ftype="fasta" />
126          <param name="input_variant" value="gatk/gatk_variant_annotator/gatk_variant_annotator_out_1.vcf" ftype="vcf" />
127          <param name="filter_expression" value="MQ &lt; 37.74 || MQ0 &gt; 50" />
128          <param name="filter_name" value="Galaxy_filter" />
129          <param name="is_genotype_filter" />
130          <param name="mask_rod_bind_type_selector" value="set_mask" />
131          <param name="input_mask_rod" value="gatk/fake_phiX_variant_locations.bed" ftype="bed" />
132          <param name="mask_rod_name" value="." />
133          <param name="mask_extension" value="0" />
134          <param name="gatk_param_type_selector" value="basic" />
135          <param name="cluster_snp_type_selector" value="do_not_cluster_snp" />
136          <param name="missing_values_in_expressions_should_evaluate_as_failing" />
137          <output name="output_vcf" file="gatk/gatk_variant_annotator/gatk_variant_annotator_out_1.vcf" lines_diff="4" /> 
138          <output name="output_log" file="gatk/gatk_variant_filtration/gatk_variant_filtration_out_1.log.contains" compare="contains" />
139      </test>
140  </tests>
141  <help>
142**What it does**
143
144Filters variant calls using a number of user-selectable, parameterizable criteria.
145
146For more information on using the VariantFiltration module, see this `tool specific page &lt;http://www.broadinstitute.org/gsa/wiki/index.php/VariantFiltrationWalker&gt;`_.
147
148To learn about best practices for variant detection using GATK, see this `overview &lt;http://www.broadinstitute.org/gsa/wiki/index.php/Best_Practice_Variant_Detection_with_the_GATK_v3&gt;`_.
149
150If you encounter errors, please view the `GATK FAQ &lt;http://www.broadinstitute.org/gsa/wiki/index.php/Frequently_Asked_Questions&gt;`_.
151
152------
153
154**Inputs**
155
156GenomeAnalysisTK: VariantFiltration accepts a VCF input file.
157
158
159**Outputs**
160
161The output is in VCF format.
162
163
164Go `here &lt;http://www.broadinstitute.org/gsa/wiki/index.php/Input_files_for_the_GATK&gt;`_ for details on GATK file formats.
165
166-------
167
168**Settings**::
169
170
171 filterExpression                                     One or more expression used with INFO fields to filter (see wiki docs for more info)
172 filterName                                           Names to use for the list of filters (must be a 1-to-1 mapping); this name is put in the FILTER field for variants that get filtered
173 genotypeFilterExpression                             One or more expression used with FORMAT (sample/genotype-level) fields to filter (see wiki docs for more info)
174 genotypeFilterName                                   Names to use for the list of sample/genotype filters (must be a 1-to-1 mapping); this name is put in the FILTER field for variants that get filtered
175 clusterSize                                          The number of SNPs which make up a cluster (see also --clusterWindowSize); [default:3]
176 clusterWindowSize                                    The window size (in bases) in which to evaluate clustered SNPs (to disable the clustered SNP filter, set this value to less than 1); [default:0]
177 maskName                                             The text to put in the FILTER field if a 'mask' rod is provided and overlaps with a variant call; [default:'Mask']
178 missingValuesInExpressionsShouldEvaluateAsFailing    When evaluating the JEXL expressions, should missing values be considered failing the expression (by default they are considered passing)?
179
180@CITATION_SECTION@
181  </help>
182</tool>