PageRenderTime 27ms CodeModel.GetById 15ms app.highlight 6ms RepoModel.GetById 1ms app.codeStats 0ms

/tools/rgenetics/rgQC.xml

https://bitbucket.org/cistrome/cistrome-harvard/
XML | 110 lines | 85 code | 25 blank | 0 comment | 0 complexity | 5b49caafe1bbeb5b94821859d6312ffb MD5 | raw file
  1<tool id="rgQC1" name="QC reports:">
  2
  3    <description>Marker and Subject measures</description>
  4
  5    <command interpreter="python">
  6        rgQC.py -i '$input_file.extra_files_path/$input_file.metadata.base_name' -o "$title"
  7        -s '$html_file' -p '$html_file.files_path'
  8    </command>
  9
 10    <inputs>
 11          <param name="input_file" type="data" label="RGenetics genotype file in compressed Plink format"
 12          size="80" format="pbed" />
 13       <param name="title" size="80" type="text" value="RgQC report" label="Descriptive report title"/>
 14   </inputs>
 15
 16   <outputs>
 17       <data format="html" name="html_file" metadata_source="input_file" label="${title}.html"/>
 18   </outputs>
 19
 20<tests>
 21 <test>
 22    <param name='input_file' value='tinywga' ftype='pbed' >
 23    <metadata name='base_name' value='tinywga' />
 24    <composite_data value='tinywga.bim' />
 25    <composite_data value='tinywga.bed' />
 26    <composite_data value='tinywga.fam' />
 27    <edit_attributes type='name' value='tinywga' /> 
 28    </param>
 29    <param name='title' value='rgQCtest1' />
 30    <output name='html_file' file='rgtestouts/rgQC/rgQCtest1.html' ftype='html' lines_diff='300'>
 31    <param name="dbkey" value="hg18" />
 32    <extra_files type="file" name='tinywga_All_Paged.pdf' value="rgtestouts/rgQC/tinywga_All_Paged.pdf" compare="sim_size" delta = "100000"/>
 33    <extra_files type="file" name='tinywga.log' value="rgtestouts/rgQC/tinywga.log" compare="diff" lines_diff="15"/>
 34    <extra_files type="file" name='tinywga.frq' value="rgtestouts/rgQC/tinywga.frq" compare="diff" />
 35    <extra_files type="file" name='tinywga.het' value="rgtestouts/rgQC/tinywga.het" compare="diff" lines_diff="90"/>
 36    <extra_files type="file" name='tinywga.hwe' value="rgtestouts/rgQC/tinywga.hwe" compare="diff" lines_diff="90"/>
 37    <extra_files type="file" name='tinywga.imendel' value="rgtestouts/rgQC/tinywga.imendel" compare="diff"/>
 38    <extra_files type="file" name='tinywga.imiss' value="rgtestouts/rgQC/tinywga.imiss" compare="diff" />
 39    <extra_files type="file" name='tinywga.lmendel' value="rgtestouts/rgQC/tinywga.lmendel" compare="diff" />
 40    <extra_files type="file" name='tinywga.lmiss' value="rgtestouts/rgQC/tinywga.lmiss" compare="diff" />
 41    <extra_files type="file" name='tinywga_All_3x3.pdf' value="rgtestouts/rgQC/tinywga_All_3x3.pdf" compare="sim_size" delta="100000"/>
 42    <extra_files type="file" name='ldp_tinywga.bed' value="rgtestouts/rgQC/ldp_tinywga.bed" compare="diff" lines_diff="10" />
 43    <extra_files type="file" name='ldp_tinywga.bim' value="rgtestouts/rgQC/ldp_tinywga.bim" compare="sim_size" delta="1000" />
 44    <extra_files type="file" name='ldp_tinywga.fam' value="rgtestouts/rgQC/ldp_tinywga.fam" compare="diff" />
 45    <extra_files type="file" name='ldp_tinywga.log' value="rgtestouts/rgQC/ldp_tinywga.log" compare="diff" lines_diff="20"/>
 46    <extra_files type="file" name='Ranked_Marker_HWE.xls' value="rgtestouts/rgQC/Ranked_Marker_HWE.xls" compare="diff" />
 47    <extra_files type="file" name='Ranked_Marker_MAF.xls' value="rgtestouts/rgQC/Ranked_Marker_MAF.xls" compare="diff" />
 48    <extra_files type="file" name='Ranked_Marker_Missing_Genotype.xls' value="rgtestouts/rgQC/Ranked_Marker_Missing_Genotype.xls" compare="diff" lines_diff="5"/>
 49    <extra_files type="file" name='Ranked_Subject_Missing_Genotype.xls' value="rgtestouts/rgQC/Ranked_Subject_Missing_Genotype.xls" compare="diff" lines_diff="40"/>
 50    <extra_files type="file" name='tinywga_fracmiss_cum.jpg' value="rgtestouts/rgQC/tinywga_fracmiss_cum.jpg" compare="sim_size" delta = "20000"/>     
 51    <extra_files type="file" name='tinywga_fracmiss_cum.pdf' value="rgtestouts/rgQC/tinywga_fracmiss_cum.pdf" compare="sim_size" delta = "100000"/>     
 52 </output>
 53 </test>
 54</tests>
 55 <help>
 56
 57.. class:: infomark
 58
 59**Summary**
 60
 61This tool prepares an extensive and comprehensive series of reports for quality control checking of SNP genotypes from any arbitrary
 62genotyping experiment. Designed for family based data, so includes optional reports on Mendelian errors by
 63subject and by marker.
 64
 65The outputs include histograms and boxplots for missingness, maf, mendel counts and hwe by marker, and the ones that make sense by
 66subject. The report is built as a single web page containing links to the summary marker and subject files.
 67
 68The F (inbreeding) statistic is calculated using a somewhat LD independent group of genotypes
 69The Plink used is --indep-pairwise 40 20 0.5 until we make it configurable.
 70High heterozygosity might mean contaminated sample - more than one DNA. Low heterozygosity might mean inbreeding as in strains
 71of mice.
 72
 73If the data file you want is missing from the option list above,
 74you will first need to "import" it so it will be available here. Files available in the system library
 75can be imported by selecting and completing the "Import ped/map" choice from the Get Data tool group at the top of the Galaxy
 76menu. Your system administrator will be responsible for adding files to the system library.
 77
 78-----
 79
 80.. class:: infomark
 81
 82**Syntax**
 83
 84- **Genotype file** is the input pedfile -
 85- **Prefix** is a string used to name all of the outputs
 86
 87-----
 88
 89**Attribution**
 90
 91This Galaxy tool was written by Ross Lazarus for the Rgenetics project
 92The current version uses Plink for most calculations and R for plotting - for full Plink attribution, source code and documentation,
 93please see http://pngu.mgh.harvard.edu/~purcell/plink/ while R attribution and source code can be found at http://r-project.org
 94
 95Shaun Purcell provides the documentation you need specific to those settings, at
 96http://pngu.mgh.harvard.edu/~purcell/plink/anal.shtml#glm
 97
 98Tool and Galaxy datatypes originally designed and written for the Rgenetics
 99series of whole genome scale statistical genetics tools by ross lazarus (ross.lazarus@gmail.com)
100Shaun Purcell created and maintains Plink, while a cast of many maintain R.
101
102Please acknowledge your use of this tool, Galaxy, R and Plink in your publications and let
103us know so we can keep track. These tools all rely on highly competitive grant funding
104so your letting us know about publications is important to our ongoing support.
105
106</help>
107
108
109
110</tool>