/python/examples/pizza/pdbfile.py
Python | 299 lines | 286 code | 4 blank | 9 comment | 2 complexity | 4571bad50e8a04d580819b1606591266 MD5 | raw file
Possible License(s): GPL-2.0, GPL-3.0, BSD-3-Clause
- # Pizza.py toolkit, www.cs.sandia.gov/~sjplimp/pizza.html
- # Steve Plimpton, sjplimp@sandia.gov, Sandia National Laboratories
- #
- # Copyright (2005) Sandia Corporation. Under the terms of Contract
- # DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
- # certain rights in this software. This software is distributed under
- # the GNU General Public License.
- # for python3 compatibility
- from __future__ import print_function
- # pdb tool
- oneline = "Read, write PDB files in combo with LAMMPS snapshots"
- docstr = """
- p = pdbfile("3CRO") create pdb object from PDB file or WWW
- p = pdbfile("pep1 pep2") read in multiple PDB files
- p = pdbfile("pep*") can use wildcards
- p = pdbfile(d) read in snapshot data with no PDB file
- p = pdbfile("3CRO",d) read in single PDB file with snapshot data
- string arg contains one or more PDB files
- don't need .pdb suffix except wildcard must expand to file.pdb
- if only one 4-char file specified and it is not found,
- it will be downloaded from http://www.rcsb.org as 3CRO.pdb
- d arg is object with atom coordinates (dump, data)
-
- p.one() write all output as one big PDB file to tmp.pdb
- p.one("mine") write to mine.pdb
- p.many() write one PDB file per snapshot: tmp0000.pdb, ...
- p.many("mine") write as mine0000.pdb, mine0001.pdb, ...
- p.single(N) write timestamp N as tmp.pdb
- p.single(N,"new") write as new.pdb
- how new PDB files are created depends on constructor inputs:
- if no d: one new PDB file for each file in string arg (just a copy)
- if only d specified: one new PDB file per snapshot in generic format
- if one file in str arg and d: one new PDB file per snapshot
- using input PDB file as template
- multiple input PDB files with a d is not allowed
-
- index,time,flag = p.iterator(0)
- index,time,flag = p.iterator(1)
- iterator = loop over number of PDB files
- call first time with arg = 0, thereafter with arg = 1
- N = length = # of snapshots or # of input PDB files
- index = index of snapshot or input PDB file (0 to N-1)
- time = timestep value (time stamp for snapshot, index for multiple PDB)
- flag = -1 when iteration is done, 1 otherwise
- typically call p.single(time) in iterated loop to write out one PDB file
- """
- # History
- # 8/05, Steve Plimpton (SNL): original version
- # 3/17, Richard Berger (Temple U): improve Python 3 compatibility
- # ToDo list
- # for generic PDB file (no template) from a LJ unit system,
- # the atoms in PDB file are too close together
- # Variables
- # files = list of input PDB files
- # data = data object (ccell,data,dump) to read snapshots from
- # atomlines = dict of ATOM lines in original PDB file
- # key = atom id, value = tuple of (beginning,end) of line
- # Imports and external programs
- import sys, types, glob, urllib
- PY3 = sys.version_info[0] == 3
- if PY3:
- string_types = str,
- else:
- string_types = basestring
- # Class definition
- class pdbfile:
- # --------------------------------------------------------------------
- def __init__(self,*args):
- if len(args) == 1:
- if type(args[0]) is string_types:
- filestr = args[0]
- self.data = None
- else:
- filestr = None
- self.data = args[0]
- elif len(args) == 2:
- filestr = args[0]
- self.data = args[1]
- else: raise StandardError("invalid args for pdb()")
- # flist = full list of all PDB input file names
- # append .pdb if needed
-
- if filestr:
- list = filestr.split()
- flist = []
- for file in list:
- if '*' in file: flist += glob.glob(file)
- else: flist.append(file)
- for i in xrange(len(flist)):
- if flist[i][-4:] != ".pdb": flist[i] += ".pdb"
- if len(flist) == 0:
- raise StandardError("no PDB file specified")
- self.files = flist
- else: self.files = []
- if len(self.files) > 1 and self.data:
- raise StandardError("cannot use multiple PDB files with data object")
- if len(self.files) == 0 and not self.data:
- raise StandardError("no input PDB file(s)")
- # grab PDB file from http://rcsb.org if not a local file
-
- if len(self.files) == 1 and len(self.files[0]) == 8:
- try:
- open(self.files[0],'r').close()
- except:
- print("downloading %s from http://rcsb.org" % self.files[0])
- fetchstr = "http://www.rcsb.org/pdb/cgi/export.cgi/%s?format=PDB&pdbId=2cpk&compression=None" % self.files[0]
- urllib.urlretrieve(fetchstr,self.files[0])
- if self.data and len(self.files): self.read_template(self.files[0])
-
- # --------------------------------------------------------------------
- # write a single large PDB file for concatenating all input data or files
- # if data exists:
- # only selected atoms returned by extract
- # atoms written in order they appear in snapshot
- # atom only written if its tag is in PDB template file
- # if no data:
- # concatenate all input files to one output file
- def one(self,*args):
- if len(args) == 0: file = "tmp.pdb"
- elif args[0][-4:] == ".pdb": file = args[0]
- else: file = args[0] + ".pdb"
- f = open(file,'w')
- # use template PDB file with each snapshot
-
- if self.data:
- n = flag = 0
- while 1:
- which,time,flag = self.data.iterator(flag)
- if flag == -1: break
- self.convert(f,which)
- print("END",file=f)
- print(time,end='')
- sys.stdout.flush()
- n += 1
- else:
- for file in self.files:
- f.write(open(file,'r').read())
- print("END",file=f)
- print(file,end='')
- sys.stdout.flush()
-
- f.close()
- print("\nwrote %d datasets to %s in PDB format" % (n,file))
- # --------------------------------------------------------------------
- # write series of numbered PDB files
- # if data exists:
- # only selected atoms returned by extract
- # atoms written in order they appear in snapshot
- # atom only written if its tag is in PDB template file
- # if no data:
- # just copy all input files to output files
- def many(self,*args):
- if len(args) == 0: root = "tmp"
- else: root = args[0]
- if self.data:
- n = flag = 0
- while 1:
- which,time,flag = self.data.iterator(flag)
- if flag == -1: break
- if n < 10:
- file = root + "000" + str(n)
- elif n < 100:
- file = root + "00" + str(n)
- elif n < 1000:
- file = root + "0" + str(n)
- else:
- file = root + str(n)
- file += ".pdb"
- f = open(file,'w')
- self.convert(f,which)
- f.close()
-
- print(time,end='')
- sys.stdout.flush()
- n += 1
- else:
- n = 0
- for infile in self.files:
- if n < 10:
- file = root + "000" + str(n)
- elif n < 100:
- file = root + "00" + str(n)
- elif n < 1000:
- file = root + "0" + str(n)
- else:
- file = root + str(n)
- file += ".pdb"
-
- f = open(file,'w')
- f.write(open(infile,'r').read())
- f.close()
- print(file,end='')
- sys.stdout.flush()
-
- n += 1
- print("\nwrote %d datasets to %s*.pdb in PDB format" % (n,root))
- # --------------------------------------------------------------------
- # write a single PDB file
- # if data exists:
- # time is timestamp in snapshot
- # only selected atoms returned by extract
- # atoms written in order they appear in snapshot
- # atom only written if its tag is in PDB template file
- # if no data:
- # time is index into list of input PDB files
- # just copy one input file to output file
- def single(self,time,*args):
- if len(args) == 0: file = "tmp.pdb"
- elif args[0][-4:] == ".pdb": file = args[0]
- else: file = args[0] + ".pdb"
- f = open(file,'w')
- if self.data:
- which = self.data.findtime(time)
- self.convert(f,which)
- else:
- f.write(open(self.files[time],'r').read())
-
- f.close()
- # --------------------------------------------------------------------
- # iterate over list of input files or selected snapshots
- # latter is done via data objects iterator
- def iterator(self,flag):
- if not self.data:
- if not flag: self.iterate = 0
- else:
- self.iterate += 1
- if self.iterate > len(self.files): return 0,0,-1
- return self.iterate,self.iterate,1
- return self.data.iterator(flag)
- # --------------------------------------------------------------------
- # read a PDB file and store ATOM lines
-
- def read_template(self,file):
- lines = open(file,'r').readlines()
- self.atomlines = {}
- for line in lines:
- if line.find("ATOM") == 0:
- tag = int(line[4:11])
- begin = line[:30]
- end = line[54:]
- self.atomlines[tag] = (begin,end)
- # --------------------------------------------------------------------
- # convert one set of atoms to PDB format and write to f
- def convert(self,f,which):
- time,box,atoms,bonds,tris,lines = self.data.viz(which)
- if len(self.files):
- for atom in atoms:
- id = atom[0]
- if self.atomlines.has_key(id):
- (begin,end) = self.atomlines[id]
- line = "%s%8.3f%8.3f%8.3f%s" % (begin,atom[2],atom[3],atom[4],end)
- print(line,file=f,end='')
- else:
- for atom in atoms:
- begin = "ATOM %6d %2d R00 1 " % (atom[0],atom[1])
- middle = "%8.3f%8.3f%8.3f" % (atom[2],atom[3],atom[4])
- end = " 1.00 0.00 NONE"
- print(begin+middle+end,file=f)