PageRenderTime 26ms CodeModel.GetById 17ms app.highlight 6ms RepoModel.GetById 1ms app.codeStats 0ms

/tools/human_genome_variation/sift.xml

https://bitbucket.org/cistrome/cistrome-harvard/
XML | 174 lines | 145 code | 29 blank | 0 comment | 0 complexity | 070bfb2df8f01cd5fc2f5a791ad21d99 MD5 | raw file
  1<tool id="hgv_sift" name="SIFT" version="1.0.0">
  2  <description>predictions of functional sites</description>
  3
  4  <command interpreter="bash">
  5    sift_variants_wrapper.sh "$input" "$output" "${input.metadata.dbkey}" "${GALAXY_DATA_INDEX_DIR}/sift_db.loc" "$chrom_col" "$pos_col" "$base" "$allele_col" "$strand_source.strand_col" "$comment_source.comment_col" "$output_opts"
  6  </command>
  7
  8  <inputs>
  9    <param name="input" type="data" format="tabular" label="Dataset">
 10      <validator type="unspecified_build"/>
 11      <validator type="dataset_metadata_in_file" filename="sift_db.loc" metadata_name="dbkey" metadata_column="0" message="Data is currently not available for the specified build."/>
 12    </param>
 13    <param name="chrom_col"  type="data_column" data_ref="input" label="Column with chromosome"/>
 14    <param name="pos_col"    type="data_column" data_ref="input" numerical="true" label="Column with position"/>
 15    <param name="base" type="select" label="Position coordinates are">
 16      <option value="1" selected="true">one-based</option>
 17      <option value="0">zero-based</option>
 18    </param>
 19    <param name="allele_col" type="data_column" data_ref="input" label="Column with allele"/>
 20    <conditional name="strand_source">
 21      <param name="strand_choice" type="select" label="Strand info">
 22        <option value="data_column" selected="true">a column in the dataset</option>
 23        <option value="all_pos">all on sense/forward/+ strand</option>
 24        <option value="all_neg">all on antisense/reverse/- strand</option>
 25      </param>
 26      <when value="data_column">
 27        <param name="strand_col" type="data_column" data_ref="input" label="Column with strand"/>
 28      </when>
 29      <when value="all_pos">
 30        <param name="strand_col" type="hidden" value="+"/>
 31      </when>
 32      <when value="all_neg">
 33        <param name="strand_col" type="hidden" value="-"/>
 34      </when>
 35    </conditional>
 36    <conditional name="comment_source">
 37      <param name="comment_choice" type="select" label="Include comment column">
 38        <option value="no" selected="true">no</option>
 39        <option value="yes">yes</option>
 40      </param>
 41      <when value="no">
 42        <param name="comment_col" type="hidden" value="-"/>
 43      </when>
 44      <when value="yes">
 45        <param name="comment_col" type="data_column" data_ref="input" label="Column with comment"/>
 46      </when>
 47    </conditional>
 48    <param name="output_opts" type="select" multiple="true" display="checkboxes" label="Include the following additional fields in the output">
 49      <option value="A">Ensembl Gene ID</option>
 50      <option value="B">Gene Name</option>
 51      <option value="C">Gene Description</option>
 52      <option value="D">Ensembl Protein Family ID</option>
 53      <option value="E">Ensembl Protein Family Description</option>
 54      <option value="F">Ensembl Transcript Status (Known / Novel)</option>
 55      <option value="G">Protein Family Size</option>
 56      <option value="H">Ka/Ks (Human-mouse)</option>
 57      <option value="I">Ka/Ks (Human-macaque)</option>
 58      <option value="J">OMIM Disease</option>
 59      <option value="K">Allele Frequencies (All Hapmap Populations - weighted average)</option>
 60      <option value="L">Allele Frequencies (CEU Hapmap population)</option>
 61    </param>
 62  </inputs>
 63
 64  <outputs>
 65    <data format="tabular" name="output" />
 66  </outputs>
 67
 68  <requirements>
 69    <requirement type="binary">awk</requirement>
 70    <requirement type="binary">rm</requirement>
 71    <requirement type="binary">sed</requirement>
 72  </requirements>
 73
 74  <tests>
 75    <test>
 76      <param name="input" value="sift_variants.tab" ftype="tabular" dbkey="hg18"/>
 77      <param name="chrom_col" value="1"/>
 78      <param name="pos_col" value="3"/>
 79      <param name="base" value="1"/>
 80      <param name="allele_col" value="5"/>
 81      <param name="strand_choice" value="data_column"/>
 82      <param name="strand_col" value="4"/>
 83      <param name="output_opts" value="A"/>
 84      <output name="output" file="sift_variants_result.tab"/>
 85    </test>
 86  </tests>
 87
 88  <help>
 89.. class:: warningmark
 90
 91This currently works only for builds hg18 or hg19.
 92
 93-----
 94
 95**Dataset formats**
 96
 97The input and output datasets are tabular_. 
 98(`Dataset missing?`_)
 99
100.. _tabular: ./static/formatHelp.html#tab
101.. _Dataset missing?: ./static/formatHelp.html
102
103-----
104
105**What it does**
106
107SIFT predicts whether an amino-acid substitution affects protein function,
108based on sequence homology and the physical properties of amino acids.
109SIFT can be applied to naturally occurring non-synonymous polymorphisms
110and laboratory-induced missense mutations.  This tool uses SQLite databases
111containing pre-computed SIFT scores and annotations for all possible nucleotide
112substitutions at each position in the human exome.  Allele frequency data
113are from the HapMap frequency database, and additional transcript and 
114gene-level data are from Ensembl BioMart.
115
116The input dataset must contain columns for the chromosome, position, and
117alleles.  The alleles must be two nucleotides separated by '/',
118usually the reference allele and the allele of interest.
119The strand must either be in another column or all the same.
120The output contains a standard set of columns plus the additional ones that
121have been selected from the list above.
122
123Website: http://sift.jcvi.org/
124
125-----
126
127**Example**
128
129- input file::
130
131    chr3   81780820   +  T/C
132    chr2   230341630  +  G/A
133    chr2   43881517   +  A/T
134    chr2   43857514   +  T/C
135    chr6   88375602   +  G/A
136    chr22  29307353   -  T/A
137    chr10  115912482  -  G/T
138    chr10  115900918  -  C/T
139    chr16  69875502   +  G/T
140    etc.
141
142- output file::
143
144    #Chrom  Position   Strand  Allele  Codons   Transcript ID    Protein ID       Substitution  Region    dbSNP ID      SNP Type       Prediction  Score  Median Info  Num seqs at position  User Comment
145    chr3    81780820   +       T/C     AGA-gGA  ENST00000264326  ENSP00000264326  R190G         EXON CDS  rs2229519:C   Nonsynonymous  DAMAGING    0.04   3.06         149
146    chr2    230341630  +       G/T     -        ENST00000389045  ENSP00000373697  NA            EXON CDS  rs1803846:A   Unknown        Not scored  NA     NA           NA
147    chr2    43881517   +       A/T     ATA-tTA  ENST00000260605  ENSP00000260605  I230L         EXON CDS  rs11556157:T  Nonsynonymous  TOLERATED   0.47   3.19         7
148    chr2    43857514   +       T/C     TTT-TcT  ENST00000260605  ENSP00000260605  F33S          EXON CDS  rs2288709:C   Nonsynonymous  TOLERATED   0.61   3.33         6
149    chr6    88375602   +       G/A     GTT-aTT  ENST00000257789  ENSP00000257789  V217I         EXON CDS  rs2307389:A   Nonsynonymous  TOLERATED   0.75   3.17         13
150    chr22   29307353   +       T/A     ACC-tCC  ENST00000335214  ENSP00000334612  T264S         EXON CDS  rs42942:A     Nonsynonymous  TOLERATED   0.4    3.14         23
151    chr10   115912482  +       C/A     CGA-CtA  ENST00000369285  ENSP00000358291  R179L         EXON CDS  rs12782946:T  Nonsynonymous  TOLERATED   0.06   4.32         2
152    chr10   115900918  +       G/A     CAA-tAA  ENST00000369287  ENSP00000358293  Q271*         EXON CDS  rs7095762:T   Nonsynonymous  N/A         N/A    N/A          N/A
153    chr16   69875502   +       G/T     ACA-AaA  ENST00000338099  ENSP00000337512  T608K         EXON CDS  rs3096381:T   Nonsynonymous  TOLERATED   0.12   3.41         3
154    etc.
155
156-----
157
158**References**
159
160Ng PC, Henikoff S. (2001) Predicting deleterious amino acid substitutions.
161Genome Res. 11(5):863-74.
162
163Ng PC, Henikoff S. (2002) Accounting for human polymorphisms predicted to affect protein function.
164Genome Res. 12(3):436-46.
165
166Ng PC, Henikoff S. (2003) SIFT: Predicting amino acid changes that affect protein function.
167Nucleic Acids Res. 31(13):3812-4.
168
169Kumar P, Henikoff S, Ng PC. (2009) Predicting the effects of coding non-synonymous variants
170on protein function using the SIFT algorithm.
171Nat Protoc. 4(7):1073-81. Epub 2009 Jun 25.
172
173  </help>
174</tool>