PageRenderTime 61ms CodeModel.GetById 35ms RepoModel.GetById 0ms app.codeStats 0ms

/apps/autodock_mgl.bak/autodock_mgl_1.125_i686-pc-linux-gnu/test/MGLToolsPckgs/AutoDockTools/XMLParser.py

https://github.com/jackygrahamez/DrugDiscovery-Home
Python | 343 lines | 256 code | 29 blank | 58 comment | 59 complexity | 02ae22d0c45b82565172d51d546a9259 MD5 | raw file
  1. #############################################################################
  2. #
  3. # Author: Ruth HUEY, William Lindstrom
  4. #
  5. # Copyright: M. Sanner TSRI 2005
  6. #
  7. #############################################################################
  8. # $Header: /opt/cvs/python/packages/share1.5/AutoDockTools/XMLParser.py,v 1.14 2008/09/02 22:30:59 gillet Exp $
  9. #
  10. # $Id: XMLParser.py,v 1.14 2008/09/02 22:30:59 gillet Exp $
  11. #
  12. #
  13. #
  14. #
  15. #
  16. #
  17. #
  18. """
  19. This Object parses the xml result of an AutoDock operation. It builds a dictionary.
  20. """
  21. import os
  22. from string import find, join, replace, split, rfind
  23. import re
  24. from AutoDockTools.ResultParser import ResultParser
  25. class XMLParser(ResultParser):
  26. """ reads log from a AutoDock docking and return structured data"""
  27. keywords = ResultParser.keywords + [
  28. #'seed', #rseed1, rseed2
  29. 'dpf',
  30. #'free_NRG_binding', #binding_energy
  31. 'Ki',
  32. 'Temp',
  33. 'final_intermol_NRG',
  34. #'internal_ligand_NRG', #internal_enrgy
  35. 'torsional_free_NRG',
  36. 'move',
  37. 'about',
  38. #'tran0', #trn_x, trn_y, trn_z
  39. #'quat0', #qtn_nx, qtn_ny, qtn_nz, qtn_ang_deg
  40. #'ndihe', #num_torsions
  41. #'dihe0', # torsion_values
  42. ]
  43. def __init__(self, dlgFile=None, dpfFile=None):
  44. """selected dlgFile,ok sets which docked conformations to show"""
  45. ResultParser.__init__(self)
  46. self.filename = dlgFile
  47. self.version = 1.0
  48. if dlgFile:
  49. self.filename = os.path.basename(dlgFile)
  50. self.parse(dlgFile)
  51. if dpfFile:
  52. self.dpf = dpfFile
  53. def parse(self, filename):
  54. """
  55. uses key '<autodock>' to start matching:
  56. next uses '<runs>' to start capturing individual docked results
  57. finally captures '</autodock>' to end
  58. after parsing:
  59. """
  60. self.filename = filename
  61. #reset
  62. dlgptr = open(filename, 'r')
  63. allLines = self.allLines = dlgptr.readlines()
  64. self.clusterRecord = None
  65. #print "calling match with ", len(allLines)
  66. self.match(allLines)
  67. def getReDict(self):
  68. if hasattr(self, 'reDict'):
  69. for k, d in self.reDict.items():
  70. d['lines'] = []
  71. return
  72. self.reDict = {}
  73. self.reKeys = [
  74. '\t<version>',
  75. '\t<autogrid_version>',
  76. '\t<output_xml_version>',
  77. '\t<run_requested>',
  78. '\t<runs>',
  79. ]
  80. self.reFuncs = [
  81. self.set_AD_version,
  82. self.set_AG_version,
  83. self.set_XML_version,
  84. self.set_runs_requested,
  85. self.get_runs,
  86. ]
  87. for i in range(len(self.reKeys)):
  88. k = self.reKeys[i]
  89. dict = self.reDict[k] = {}
  90. dict['re'] = re.compile(k)
  91. dict['lines'] = []
  92. dict['func'] = self.reFuncs[i]
  93. def match(self, allLines, verbose=False):
  94. self.getReDict()
  95. self.tested = 1
  96. #for i in range(5):
  97. for i in range(len(allLines)):
  98. item = allLines[i]
  99. #print "item=", item
  100. #if find item, mark it found + don't test
  101. for k in self.reKeys:
  102. d = self.reDict[k]
  103. m = d['re'].match(item)
  104. if m:
  105. #print "matched ", k
  106. d['lines'].append(item)
  107. break
  108. for k in self.reKeys:
  109. d = self.reDict[k]
  110. lines = d['lines']
  111. apply(d['func'], (lines,), {})
  112. def set_AD_version(self,lines):
  113. if len(lines):
  114. for l in lines:
  115. if find(l, '<version>')>-1:
  116. ll = l.split('>')
  117. if len(ll)>0:
  118. lll = ll[1].split('<')
  119. self.version = float(lll[0])
  120. else:
  121. print "problem autodock version found!"
  122. self.version = 4.03
  123. break
  124. #print "ad version=", self.version
  125. break
  126. else:
  127. print "no autodock version found!"
  128. self.version = 4.03
  129. def set_AG_version(self,lines):
  130. if len(lines):
  131. for l in lines:
  132. if find(l, '<autogrid_version>')>-1:
  133. ll = l.split('>')
  134. if len(ll)>0:
  135. lll = ll[1].split('<')
  136. self.autogrid_version = float(lll[0])
  137. else:
  138. print "problem autogrid version found!"
  139. self.autogrid_version = 4.03
  140. break
  141. #print "ag version=", self.autogrid_version
  142. break
  143. else:
  144. print "no autogrid version found!"
  145. self.autogrid_version = 4.03
  146. def set_XML_version(self,lines):
  147. if len(lines):
  148. for l in lines:
  149. if find(l, '<output_xml_version>')>-1:
  150. ll = l.split('>')
  151. if len(ll)>0:
  152. lll = ll[1].split('<')
  153. self.xml_version = float(lll[0])
  154. else:
  155. print "problem xml version version found!"
  156. self.xml_version = 0.10
  157. break
  158. #print "xml version=", self.xml_version
  159. break
  160. else:
  161. print "no xml version found!"
  162. self.xml_version = 0.10
  163. def set_runs_requested(self, lines):
  164. if len(lines):
  165. for l in lines:
  166. if find(l, '<run_requested>')>-1:
  167. ll = l.split('>')
  168. if len(ll)>0:
  169. lll = ll[1].split('<')
  170. self.run_requested = int(lll[0])
  171. else:
  172. print "problem with run requested found!"
  173. self.run_requested = 1
  174. break
  175. #print "run requested =", self.run_requested
  176. break
  177. def get_runs(self, lines):
  178. #print "in get runs with lines=", lines
  179. if len(lines):
  180. if lines[0].find('<runs>')>-1:
  181. ind = self.allLines.index(lines[0])
  182. run_lines = []
  183. for l in self.allLines[ind:]:
  184. if l.find('</runs>')>-1:
  185. return
  186. elif l.find('</run>')>-1:
  187. #ends with </run>
  188. #print "end of a run!"
  189. self.process_run(run_lines)
  190. run_lines = []
  191. else:
  192. #starts with <run id=" 1">
  193. #accummulate lines for each run
  194. #process these lines and
  195. run_lines.append(l)
  196. def get_floats(self, line):
  197. #print "in get_floats with ", line
  198. ll = line.split('>')[1]
  199. lll = ll.split('<')
  200. return map(float, lll[0].split())
  201. def get_ints(self, line):
  202. #print "in get_ints with ", line
  203. ll = line.split('>')[1]
  204. lll = ll.split('<')
  205. return map(int, lll[0].split())
  206. def process_run(self, run_lines):
  207. #seed, dpf, free_NRG_binding, Ki, Temp, final_intermol_NRG
  208. #internal_ligand_NRG, torsional_free_NRG, move, about, tran0, quat0,
  209. #ndihe, dihe0
  210. #print "in process_run with ", run_lines
  211. dihe_list = [] # initial for rigid ligands (no ndihe tag)
  212. for l in run_lines:
  213. if l.find('<run id="')>-1:
  214. id = int(l.split('"')[1])
  215. elif l.find('<seed>')>-1:
  216. seed1, seed2 = self.get_ints(l)
  217. elif l.find('<dpf>')>-1:
  218. #print l
  219. #print l[7:-7]
  220. dpf = l[7:-7]
  221. #IGNORE dpf in file
  222. if hasattr(self, 'dpf'):
  223. #print "ignoring dpf in file"
  224. dpf = self.dpf
  225. elif l.find('<free_NRG_binding>')>-1:
  226. free_NRG_binding = self.get_floats(l)[0]
  227. elif l.find('<Ki>')>-1:
  228. Ki = self.get_floats(l)[0]
  229. elif l.find('Temp')>-1:
  230. Temp = self.get_floats(l)[0]
  231. elif l.find('final_intermol_NRG')>-1:
  232. final_intermol_NRG = self.get_floats(l)[0]
  233. elif l.find('internal_ligand_NRG')>-1:
  234. internal_ligand_NRG = self.get_floats(l)[0]
  235. elif l.find('torsonial_free_NRG')>-1:
  236. torsional_free_NRG = self.get_floats(l)[0]
  237. elif l.find('move')>-1:
  238. self.ligand = l.split('>')[1].split('<')[0]
  239. elif l.find('about')>-1:
  240. about = self.get_floats(l)
  241. elif l.find('tran0')>-1:
  242. trans = self.get_floats(l)
  243. elif l.find('quat0')>-1:
  244. axisangle = self.get_floats(l)
  245. elif l.find('quaternion0')> -1:
  246. quaternion0 = self.get_floats(l)
  247. elif l.find('ndihe')>-1:
  248. ndihe = self.get_ints(l)[0]
  249. ind = run_lines.index(l)
  250. l = run_lines[ind+1]
  251. dihe_list = self.get_floats(l)
  252. rest_of_run_lines = run_lines[ind+2:]
  253. for l in rest_of_run_lines:
  254. if l.find('</dihe0>')==-1:
  255. more_dihe = self.int_floats(l)
  256. dihe_list.extend(more_dihe)
  257. else:
  258. break
  259. d = {}
  260. d['id'] = id
  261. d['rseed1'] = seed1
  262. d['rseed2'] = seed2
  263. d['org_x'] = about[0]
  264. d['org_y'] = about[1]
  265. d['org_z'] = about[2]
  266. d['trn_x'] = trans[0]
  267. d['trn_y'] = trans[1]
  268. d['trn_z'] = trans[2]
  269. d['qtn_nx'] = axisangle[0]
  270. d['qtn_ny'] = axisangle[1]
  271. d['qtn_nz'] = axisangle[2]
  272. d['qtn_ang_deg'] = axisangle[3]
  273. d['num_torsions'] = d['ndihe'] = len(dihe_list)
  274. d['torsion_values'] = d['dihe0'] = dihe_list
  275. #XML specific values
  276. try:
  277. d['Ki'] = Ki
  278. except:
  279. print 'except on Ki, for run id=', id
  280. d['Ki'] = 0.0
  281. try:
  282. d['Temp'] = Temp
  283. except:
  284. print 'except on Temp, for run id=', id
  285. d['Temp'] = 0.0
  286. #intermol+internal+torsional
  287. d['binding_energy'] = d['free_NRG_binding'] = free_NRG_binding
  288. d['intermol_energy'] = d['final_intermol_NRG'] = final_intermol_NRG
  289. d['internal_energy'] = d['internal_ligand_NRG'] = internal_ligand_NRG
  290. d['torsional_energy'] = d['torsional_free_NRG'] = torsional_free_NRG
  291. #print "d['torsional_energy']=", torsional_free_NRG
  292. d['quat0'] = axisangle
  293. d['quaternion0']= quaternion0
  294. d['tran0'] = trans
  295. d['about'] = about
  296. d['dpf'] = dpf
  297. d['xml'] = self.filename
  298. d['dlg'] = self.filename.replace('.xml','.dlg')
  299. if hasattr(self, 'dpf'):
  300. if dpf!=self.dpf:
  301. print "dpf mismatch"
  302. print dpf, ' vs ', self.dpf
  303. ##assert dpf == self.dpf
  304. else:
  305. self.dpf = dpf
  306. #print "appending new d"
  307. self.clist.append(d)
  308. #print "len(clist)=", len(self.clist)