/lib/galaxy/datatypes/converters/lped_to_fped_converter.py

https://bitbucket.org/cistrome/cistrome-harvard/ · Python · 110 lines · 87 code · 12 blank · 11 comment · 14 complexity · f00fe3393499259e186fe32fd3db09a0 MD5 · raw file

  1. # for rgenetics - lped to fbat
  2. # recode to numeric fbat version
  3. # much slower so best to always
  4. # use numeric alleles internally
  5. import sys,os,time
  6. prog = os.path.split(sys.argv[0])[-1]
  7. myversion = 'Oct 10 2009'
  8. galhtmlprefix = """<?xml version="1.0" encoding="utf-8" ?>
  9. <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
  10. <html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en">
  11. <head>
  12. <meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
  13. <meta name="generator" content="Galaxy %s tool output - see http://getgalaxy.org" />
  14. <title></title>
  15. <link rel="stylesheet" href="/static/style/base.css" type="text/css" />
  16. </head>
  17. <body>
  18. <div class="document">
  19. """
  20. def timenow():
  21. """return current time as a string
  22. """
  23. return time.strftime('%d/%m/%Y %H:%M:%S', time.localtime(time.time()))
  24. def rgConv(inpedfilepath,outhtmlname,outfilepath):
  25. """convert linkage ped/map to fbat"""
  26. recode={'A':'1','C':'2','G':'3','T':'4','N':'0','0':'0','1':'1','2':'2','3':'3','4':'4'}
  27. basename = os.path.split(inpedfilepath)[-1] # get basename
  28. inmap = '%s.map' % inpedfilepath
  29. inped = '%s.ped' % inpedfilepath
  30. outf = '%s.ped' % basename # note the fbat exe insists that this is the extension for the ped data
  31. outfpath = os.path.join(outfilepath,outf) # where to write the fbat format file to
  32. try:
  33. mf = file(inmap,'r')
  34. except:
  35. sys.stderr.write('%s cannot open inmap file %s - do you have permission?\n' % (prog,inmap))
  36. sys.exit(1)
  37. try:
  38. rsl = [x.split()[1] for x in mf]
  39. except:
  40. sys.stderr.write('## cannot parse %s' % inmap)
  41. sys.exit(1)
  42. try:
  43. os.makedirs(outfilepath)
  44. except:
  45. pass # already exists
  46. head = ' '.join(rsl) # list of rs numbers
  47. # TODO add anno to rs but fbat will prolly barf?
  48. pedf = file(inped,'r')
  49. o = file(outfpath,'w',2**20)
  50. o.write(head)
  51. o.write('\n')
  52. for i,row in enumerate(pedf):
  53. if i == 0:
  54. lrow = row.split()
  55. try:
  56. x = [int(x) for x in lrow[10:50]] # look for non numeric codes
  57. except:
  58. dorecode = 1
  59. if dorecode:
  60. lrow = row.strip().split()
  61. p = lrow[:6]
  62. g = lrow[6:]
  63. gc = [recode.get(x,'0') for x in g]
  64. lrow = p+gc
  65. row = '%s\n' % ' '.join(lrow)
  66. o.write(row)
  67. o.close()
  68. def main():
  69. """call fbater
  70. need to work with rgenetics composite datatypes
  71. so in and out are html files with data in extrafiles path
  72. <command interpreter="python">rg_convert_lped_fped.py '$input1/$input1.metadata.base_name'
  73. '$output1' '$output1.extra_files_path'
  74. </command>
  75. """
  76. nparm = 3
  77. if len(sys.argv) < nparm:
  78. sys.stderr.write('## %s called with %s - needs %d parameters \n' % (prog,sys.argv,nparm))
  79. sys.exit(1)
  80. inpedfilepath = sys.argv[1]
  81. outhtmlname = sys.argv[2]
  82. outfilepath = sys.argv[3]
  83. try:
  84. os.makedirs(outfilepath)
  85. except:
  86. pass
  87. rgConv(inpedfilepath,outhtmlname,outfilepath)
  88. f = file(outhtmlname,'w')
  89. f.write(galhtmlprefix % prog)
  90. flist = os.listdir(outfilepath)
  91. print '## Rgenetics: http://rgenetics.org Galaxy Tools %s %s' % (prog,timenow()) # becomes info
  92. f.write('<div>## Rgenetics: http://rgenetics.org Galaxy Tools %s %s\n<ol>' % (prog,timenow()))
  93. for i, data in enumerate( flist ):
  94. f.write('<li><a href="%s">%s</a></li>\n' % (os.path.split(data)[-1],os.path.split(data)[-1]))
  95. f.write("</div></body></html>")
  96. f.close()
  97. if __name__ == "__main__":
  98. main()