/tools/rgenetics/rgfakePed.xml

https://bitbucket.org/cistrome/cistrome-harvard/ · XML · 112 lines · 97 code · 15 blank · 0 comment · 0 complexity · f02345da6e8b79c877830c3289cf0653 MD5 · raw file

  1. <tool id="rgfakePed1" name="Null genotypes" version="0.02">
  2. <description>for testing</description>
  3. <command interpreter="python">rgfakePed.py --title '$title'
  4. -o '$out_file1' -p '$out_file1.files_path' -c '$ncases' -n '$ntotal'
  5. -s '$nsnp' -w '$lowmaf' -v '$missingValue' -l '$outFormat'
  6. -d '$mafdist' -m '$missingRate' -M '$mendelRate' </command>
  7. <inputs>
  8. <param name="title"
  9. type="text" value="Fake_test_geno_data"
  10. help="Name for outputs from this job"
  11. label="Descriptive short name"/>
  12. <param name="ntotal"
  13. type="integer" value = "200"
  14. help="N total: total number of subjects"
  15. label="Create this total N subjects"/>
  16. <param name="ncases" type="integer"
  17. value="100"
  18. help = "N cases: Independent subjects with status set to 2. Set 0 for family data (NSubj/3 trios)"
  19. label="Total N Cases (0=generate family data - trios)"/>
  20. <param name="nsnp"
  21. type="integer" value="1000"
  22. help="nsnp: total number of markers"
  23. label="Total N SNP"/>
  24. <param name="lowmaf" type="float"
  25. value="0.01"
  26. help = "Lower limit for MAF distribution"
  27. label="Lower MAF limit (default=1%)"/>
  28. <param name="mafdist"
  29. type="select"
  30. help="Choose a MAF distribution"
  31. label="SNP Minor Allele Frequency distribution">
  32. <option value="U" selected="true">Uniform</option>
  33. <option value="T">Triangular (more low frequency SNPs)</option>
  34. </param>
  35. <param name="outFormat"
  36. type="select"
  37. help="Choose an output format"
  38. label="Output format file type - linkage ped or fbat ped">
  39. <option value="L" selected="true">Linkage format - separate .map file</option>
  40. <option value="F">fbat style - marker names in a header row</option>
  41. </param>
  42. <param name="missingRate" type="float"
  43. value="0.05"
  44. help = "Fraction of genotypes to be randomly set missing"
  45. label="Missing genotype call fraction"/>
  46. <param name="mendelRate"
  47. type="float" value = "0.05"
  48. help="(family data) Fraction of apparently non-Mendelian transmission patterns"
  49. label="Mendel error transmission rate"/>
  50. <param name="missingValue" type="text" size="1"
  51. value='0'
  52. help = "Missing allele value"
  53. label="Missing value for an allele for the output ped file"/>
  54. </inputs>
  55. <outputs>
  56. <data format="lped" name="out_file1" label="${title}.lped"/>
  57. </outputs>
  58. <tests>
  59. <test>
  60. <param name='title' value='rgfakePedtest1' />
  61. <param name="ntotal" value="40" />
  62. <param name="ncases" value="20" />
  63. <param name="nsnp" value="10" />
  64. <param name="lowmaf" value="0" />
  65. <param name="mafdist" value="T" />
  66. <param name="outFormat" value="L" />
  67. <param name="missingRate" value="0" />
  68. <param name="mendelRate" value="0" />
  69. <param name="missingValue" value="0" />
  70. <output name='out_file1' file='rgtestouts/rgfakePed/rgfakePedtest1.lped' ftype='lped' compare="diff" lines_diff='5'>
  71. <extra_files type="file" name='RgeneticsData.ped' value="rgtestouts/rgfakePed/rgfakePedtest1.ped" compare="diff" lines_diff='80'/>
  72. <extra_files type="file" name='RgeneticsData.map' value="rgtestouts/rgfakePed/rgfakePedtest1.map" compare="diff" />
  73. </output>
  74. </test>
  75. </tests>
  76. <help>
  77. .. class:: infomark
  78. This tool allows you to generate an arbitrary (sort of)
  79. synthetic genotype file (no attempt at LD - the markers are independent)
  80. with optional missingness, Mendel errors, minor allele frequency settings, family structure
  81. These might be used for testing under
  82. the null hypothesis of no association and are certainly useful for
  83. scale testing.
  84. Note that although it runs reasonably fast given it's a script, generating a large data set takes
  85. a while. An hour or so should get you a reasonable (3GB) sized simulated null data set..
  86. A better simulator can easily be swapped in with this tool interface.
  87. -----
  88. .. class:: warningmark
  89. This tool is very experimental
  90. .. class:: infomark
  91. **Attribution and Licensing**
  92. Designed and written for the Rgenetics Galaxy tools
  93. copyright Ross Lazarus 2007 (ross.lazarus@gmail.com)
  94. Licensed under the terms of the _LGPL
  95. .. _LGPL: http://www.gnu.org/copyleft/lesser.html
  96. </help>
  97. </tool>