PageRenderTime 22ms CodeModel.GetById 11ms app.highlight 4ms RepoModel.GetById 1ms app.codeStats 0ms

/tools/rgenetics/rgfakePed.xml

https://bitbucket.org/cistrome/cistrome-harvard/
XML | 112 lines | 97 code | 15 blank | 0 comment | 0 complexity | f02345da6e8b79c877830c3289cf0653 MD5 | raw file
  1<tool id="rgfakePed1" name="Null genotypes" version="0.02">
  2  <description>for testing</description>
  3  <command interpreter="python">rgfakePed.py --title '$title'
  4  -o '$out_file1' -p '$out_file1.files_path' -c '$ncases' -n '$ntotal'
  5  -s '$nsnp'  -w '$lowmaf' -v '$missingValue' -l '$outFormat'
  6  -d '$mafdist' -m '$missingRate' -M '$mendelRate' </command>
  7   <inputs>
  8
  9    <param name="title"
 10         type="text" value="Fake_test_geno_data"
 11         help="Name for outputs from this job"
 12         label="Descriptive short name"/>
 13    <param name="ntotal"
 14         type="integer" value = "200"
 15         help="N total: total number of subjects"
 16         label="Create this total N subjects"/>
 17    <param name="ncases" type="integer"
 18         value="100"
 19         help = "N cases: Independent subjects with status set to 2. Set 0 for family data (NSubj/3 trios)"
 20         label="Total N Cases (0=generate family data - trios)"/>
 21    <param name="nsnp"
 22         type="integer" value="1000"
 23         help="nsnp: total number of markers"
 24         label="Total N SNP"/>
 25    <param name="lowmaf" type="float"
 26         value="0.01"
 27         help = "Lower limit for MAF distribution"
 28         label="Lower MAF limit (default=1%)"/>
 29    <param name="mafdist"
 30         type="select"
 31         help="Choose a MAF distribution"
 32         label="SNP Minor Allele Frequency distribution">
 33           <option value="U" selected="true">Uniform</option>
 34           <option value="T">Triangular (more low frequency SNPs)</option>
 35    </param>
 36    <param name="outFormat"
 37         type="select"
 38         help="Choose an output format"
 39         label="Output format file type - linkage ped or fbat ped">
 40           <option value="L" selected="true">Linkage format - separate .map file</option>
 41           <option value="F">fbat style - marker names in a header row</option>
 42    </param>
 43    <param name="missingRate" type="float"
 44         value="0.05"
 45         help = "Fraction of genotypes to be randomly set missing"
 46         label="Missing genotype call fraction"/>
 47    <param name="mendelRate"
 48         type="float" value = "0.05"
 49         help="(family data) Fraction of apparently non-Mendelian transmission patterns"
 50         label="Mendel error transmission rate"/>
 51
 52    <param name="missingValue" type="text" size="1"
 53         value='0'
 54         help = "Missing allele value"
 55         label="Missing value for an allele for the output ped file"/>
 56
 57</inputs>
 58
 59 <outputs>
 60    <data format="lped" name="out_file1" label="${title}.lped"/>
 61  </outputs>
 62<tests>
 63 <test>
 64    <param name='title' value='rgfakePedtest1' />
 65    <param name="ntotal" value="40" />
 66    <param name="ncases" value="20" />
 67    <param name="nsnp" value="10" />
 68    <param name="lowmaf" value="0" />
 69    <param name="mafdist" value="T" />
 70    <param name="outFormat" value="L" />
 71    <param name="missingRate" value="0" />
 72    <param name="mendelRate" value="0" />
 73    <param name="missingValue" value="0" />
 74    <output name='out_file1' file='rgtestouts/rgfakePed/rgfakePedtest1.lped' ftype='lped' compare="diff" lines_diff='5'>
 75    <extra_files type="file" name='RgeneticsData.ped' value="rgtestouts/rgfakePed/rgfakePedtest1.ped" compare="diff" lines_diff='80'/>
 76    <extra_files type="file" name='RgeneticsData.map' value="rgtestouts/rgfakePed/rgfakePedtest1.map" compare="diff" />
 77    </output>
 78 </test>
 79</tests>
 80<help>
 81.. class:: infomark
 82
 83This tool allows you to generate an arbitrary (sort of)
 84synthetic genotype file (no attempt at LD - the markers are independent)
 85with optional missingness, Mendel errors, minor allele frequency settings, family structure
 86These might be used for testing under
 87the null hypothesis of no association and are certainly useful for
 88scale testing.
 89
 90Note that although it runs reasonably fast given it's a script, generating a large data set takes
 91a while. An hour or so should get you a reasonable (3GB) sized simulated null data set..
 92
 93A better simulator can easily be swapped in with this tool interface.
 94
 95-----
 96
 97.. class:: warningmark
 98
 99This tool is very experimental
100
101.. class:: infomark
102
103**Attribution and Licensing**
104
105Designed and written for the Rgenetics Galaxy tools
106copyright Ross Lazarus 2007 (ross.lazarus@gmail.com)
107Licensed under the terms of the _LGPL
108 
109 .. _LGPL: http://www.gnu.org/copyleft/lesser.html
110
111</help>
112</tool>