PageRenderTime 47ms CodeModel.GetById 18ms RepoModel.GetById 0ms app.codeStats 1ms

/python/examples/pizza/pdbfile.py

https://bitbucket.org/siddharthpaliwal/lammps
Python | 299 lines | 286 code | 4 blank | 9 comment | 2 complexity | 4571bad50e8a04d580819b1606591266 MD5 | raw file
Possible License(s): GPL-2.0, GPL-3.0, BSD-3-Clause
  1. # Pizza.py toolkit, www.cs.sandia.gov/~sjplimp/pizza.html
  2. # Steve Plimpton, sjplimp@sandia.gov, Sandia National Laboratories
  3. #
  4. # Copyright (2005) Sandia Corporation. Under the terms of Contract
  5. # DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
  6. # certain rights in this software. This software is distributed under
  7. # the GNU General Public License.
  8. # for python3 compatibility
  9. from __future__ import print_function
  10. # pdb tool
  11. oneline = "Read, write PDB files in combo with LAMMPS snapshots"
  12. docstr = """
  13. p = pdbfile("3CRO") create pdb object from PDB file or WWW
  14. p = pdbfile("pep1 pep2") read in multiple PDB files
  15. p = pdbfile("pep*") can use wildcards
  16. p = pdbfile(d) read in snapshot data with no PDB file
  17. p = pdbfile("3CRO",d) read in single PDB file with snapshot data
  18. string arg contains one or more PDB files
  19. don't need .pdb suffix except wildcard must expand to file.pdb
  20. if only one 4-char file specified and it is not found,
  21. it will be downloaded from http://www.rcsb.org as 3CRO.pdb
  22. d arg is object with atom coordinates (dump, data)
  23. p.one() write all output as one big PDB file to tmp.pdb
  24. p.one("mine") write to mine.pdb
  25. p.many() write one PDB file per snapshot: tmp0000.pdb, ...
  26. p.many("mine") write as mine0000.pdb, mine0001.pdb, ...
  27. p.single(N) write timestamp N as tmp.pdb
  28. p.single(N,"new") write as new.pdb
  29. how new PDB files are created depends on constructor inputs:
  30. if no d: one new PDB file for each file in string arg (just a copy)
  31. if only d specified: one new PDB file per snapshot in generic format
  32. if one file in str arg and d: one new PDB file per snapshot
  33. using input PDB file as template
  34. multiple input PDB files with a d is not allowed
  35. index,time,flag = p.iterator(0)
  36. index,time,flag = p.iterator(1)
  37. iterator = loop over number of PDB files
  38. call first time with arg = 0, thereafter with arg = 1
  39. N = length = # of snapshots or # of input PDB files
  40. index = index of snapshot or input PDB file (0 to N-1)
  41. time = timestep value (time stamp for snapshot, index for multiple PDB)
  42. flag = -1 when iteration is done, 1 otherwise
  43. typically call p.single(time) in iterated loop to write out one PDB file
  44. """
  45. # History
  46. # 8/05, Steve Plimpton (SNL): original version
  47. # 3/17, Richard Berger (Temple U): improve Python 3 compatibility
  48. # ToDo list
  49. # for generic PDB file (no template) from a LJ unit system,
  50. # the atoms in PDB file are too close together
  51. # Variables
  52. # files = list of input PDB files
  53. # data = data object (ccell,data,dump) to read snapshots from
  54. # atomlines = dict of ATOM lines in original PDB file
  55. # key = atom id, value = tuple of (beginning,end) of line
  56. # Imports and external programs
  57. import sys, types, glob, urllib
  58. PY3 = sys.version_info[0] == 3
  59. if PY3:
  60. string_types = str,
  61. else:
  62. string_types = basestring
  63. # Class definition
  64. class pdbfile:
  65. # --------------------------------------------------------------------
  66. def __init__(self,*args):
  67. if len(args) == 1:
  68. if type(args[0]) is string_types:
  69. filestr = args[0]
  70. self.data = None
  71. else:
  72. filestr = None
  73. self.data = args[0]
  74. elif len(args) == 2:
  75. filestr = args[0]
  76. self.data = args[1]
  77. else: raise StandardError("invalid args for pdb()")
  78. # flist = full list of all PDB input file names
  79. # append .pdb if needed
  80. if filestr:
  81. list = filestr.split()
  82. flist = []
  83. for file in list:
  84. if '*' in file: flist += glob.glob(file)
  85. else: flist.append(file)
  86. for i in xrange(len(flist)):
  87. if flist[i][-4:] != ".pdb": flist[i] += ".pdb"
  88. if len(flist) == 0:
  89. raise StandardError("no PDB file specified")
  90. self.files = flist
  91. else: self.files = []
  92. if len(self.files) > 1 and self.data:
  93. raise StandardError("cannot use multiple PDB files with data object")
  94. if len(self.files) == 0 and not self.data:
  95. raise StandardError("no input PDB file(s)")
  96. # grab PDB file from http://rcsb.org if not a local file
  97. if len(self.files) == 1 and len(self.files[0]) == 8:
  98. try:
  99. open(self.files[0],'r').close()
  100. except:
  101. print("downloading %s from http://rcsb.org" % self.files[0])
  102. fetchstr = "http://www.rcsb.org/pdb/cgi/export.cgi/%s?format=PDB&pdbId=2cpk&compression=None" % self.files[0]
  103. urllib.urlretrieve(fetchstr,self.files[0])
  104. if self.data and len(self.files): self.read_template(self.files[0])
  105. # --------------------------------------------------------------------
  106. # write a single large PDB file for concatenating all input data or files
  107. # if data exists:
  108. # only selected atoms returned by extract
  109. # atoms written in order they appear in snapshot
  110. # atom only written if its tag is in PDB template file
  111. # if no data:
  112. # concatenate all input files to one output file
  113. def one(self,*args):
  114. if len(args) == 0: file = "tmp.pdb"
  115. elif args[0][-4:] == ".pdb": file = args[0]
  116. else: file = args[0] + ".pdb"
  117. f = open(file,'w')
  118. # use template PDB file with each snapshot
  119. if self.data:
  120. n = flag = 0
  121. while 1:
  122. which,time,flag = self.data.iterator(flag)
  123. if flag == -1: break
  124. self.convert(f,which)
  125. print("END",file=f)
  126. print(time,end='')
  127. sys.stdout.flush()
  128. n += 1
  129. else:
  130. for file in self.files:
  131. f.write(open(file,'r').read())
  132. print("END",file=f)
  133. print(file,end='')
  134. sys.stdout.flush()
  135. f.close()
  136. print("\nwrote %d datasets to %s in PDB format" % (n,file))
  137. # --------------------------------------------------------------------
  138. # write series of numbered PDB files
  139. # if data exists:
  140. # only selected atoms returned by extract
  141. # atoms written in order they appear in snapshot
  142. # atom only written if its tag is in PDB template file
  143. # if no data:
  144. # just copy all input files to output files
  145. def many(self,*args):
  146. if len(args) == 0: root = "tmp"
  147. else: root = args[0]
  148. if self.data:
  149. n = flag = 0
  150. while 1:
  151. which,time,flag = self.data.iterator(flag)
  152. if flag == -1: break
  153. if n < 10:
  154. file = root + "000" + str(n)
  155. elif n < 100:
  156. file = root + "00" + str(n)
  157. elif n < 1000:
  158. file = root + "0" + str(n)
  159. else:
  160. file = root + str(n)
  161. file += ".pdb"
  162. f = open(file,'w')
  163. self.convert(f,which)
  164. f.close()
  165. print(time,end='')
  166. sys.stdout.flush()
  167. n += 1
  168. else:
  169. n = 0
  170. for infile in self.files:
  171. if n < 10:
  172. file = root + "000" + str(n)
  173. elif n < 100:
  174. file = root + "00" + str(n)
  175. elif n < 1000:
  176. file = root + "0" + str(n)
  177. else:
  178. file = root + str(n)
  179. file += ".pdb"
  180. f = open(file,'w')
  181. f.write(open(infile,'r').read())
  182. f.close()
  183. print(file,end='')
  184. sys.stdout.flush()
  185. n += 1
  186. print("\nwrote %d datasets to %s*.pdb in PDB format" % (n,root))
  187. # --------------------------------------------------------------------
  188. # write a single PDB file
  189. # if data exists:
  190. # time is timestamp in snapshot
  191. # only selected atoms returned by extract
  192. # atoms written in order they appear in snapshot
  193. # atom only written if its tag is in PDB template file
  194. # if no data:
  195. # time is index into list of input PDB files
  196. # just copy one input file to output file
  197. def single(self,time,*args):
  198. if len(args) == 0: file = "tmp.pdb"
  199. elif args[0][-4:] == ".pdb": file = args[0]
  200. else: file = args[0] + ".pdb"
  201. f = open(file,'w')
  202. if self.data:
  203. which = self.data.findtime(time)
  204. self.convert(f,which)
  205. else:
  206. f.write(open(self.files[time],'r').read())
  207. f.close()
  208. # --------------------------------------------------------------------
  209. # iterate over list of input files or selected snapshots
  210. # latter is done via data objects iterator
  211. def iterator(self,flag):
  212. if not self.data:
  213. if not flag: self.iterate = 0
  214. else:
  215. self.iterate += 1
  216. if self.iterate > len(self.files): return 0,0,-1
  217. return self.iterate,self.iterate,1
  218. return self.data.iterator(flag)
  219. # --------------------------------------------------------------------
  220. # read a PDB file and store ATOM lines
  221. def read_template(self,file):
  222. lines = open(file,'r').readlines()
  223. self.atomlines = {}
  224. for line in lines:
  225. if line.find("ATOM") == 0:
  226. tag = int(line[4:11])
  227. begin = line[:30]
  228. end = line[54:]
  229. self.atomlines[tag] = (begin,end)
  230. # --------------------------------------------------------------------
  231. # convert one set of atoms to PDB format and write to f
  232. def convert(self,f,which):
  233. time,box,atoms,bonds,tris,lines = self.data.viz(which)
  234. if len(self.files):
  235. for atom in atoms:
  236. id = atom[0]
  237. if self.atomlines.has_key(id):
  238. (begin,end) = self.atomlines[id]
  239. line = "%s%8.3f%8.3f%8.3f%s" % (begin,atom[2],atom[3],atom[4],end)
  240. print(line,file=f,end='')
  241. else:
  242. for atom in atoms:
  243. begin = "ATOM %6d %2d R00 1 " % (atom[0],atom[1])
  244. middle = "%8.3f%8.3f%8.3f" % (atom[2],atom[3],atom[4])
  245. end = " 1.00 0.00 NONE"
  246. print(begin+middle+end,file=f)