PageRenderTime 51ms CodeModel.GetById 22ms RepoModel.GetById 1ms app.codeStats 0ms

/ase/io/lammpsrun.py

https://gitlab.com/mbarbry/ase
Python | 387 lines | 232 code | 55 blank | 100 comment | 58 complexity | 462805ab0d9f04942940d280cf975ae3 MD5 | raw file
  1. import gzip
  2. import struct
  3. from os.path import splitext
  4. from collections import deque
  5. import numpy as np
  6. from ase.atoms import Atoms
  7. from ase.quaternions import Quaternions
  8. from ase.calculators.singlepoint import SinglePointCalculator
  9. from ase.parallel import paropen
  10. from ase.calculators.lammps import convert
  11. def read_lammps_dump(infileobj, **kwargs):
  12. """Method which reads a LAMMPS dump file.
  13. LAMMPS chooses output method depending on the given suffix:
  14. - .bin : binary file
  15. - .gz : output piped through gzip
  16. - .mpiio: using mpiio (should be like cleartext,
  17. with different ordering)
  18. - else : normal clear-text format
  19. :param infileobj: string to file, opened file or file-like stream
  20. """
  21. # !TODO: add support for lammps-regex naming schemes (output per
  22. # processor and timestep wildcards)
  23. opened = False
  24. if isinstance(infileobj, str):
  25. opened = True
  26. suffix = splitext(infileobj)[-1]
  27. if suffix == ".bin":
  28. fileobj = paropen(infileobj, "rb")
  29. elif suffix == ".gz":
  30. # !TODO: save for parallel execution?
  31. fileobj = gzip.open(infileobj, "rb")
  32. else:
  33. fileobj = paropen(infileobj)
  34. else:
  35. suffix = splitext(infileobj.name)[-1]
  36. fileobj = infileobj
  37. if suffix == ".bin":
  38. out = read_lammps_dump_binary(fileobj, **kwargs)
  39. if opened:
  40. fileobj.close()
  41. return out
  42. out = read_lammps_dump_text(fileobj, **kwargs)
  43. if opened:
  44. fileobj.close()
  45. return out
  46. def lammps_data_to_ase_atoms(
  47. data,
  48. colnames,
  49. cell,
  50. celldisp,
  51. pbc=False,
  52. atomsobj=Atoms,
  53. order=True,
  54. specorder=None,
  55. prismobj=None,
  56. units="metal",
  57. ):
  58. """Extract positions and other per-atom parameters and create Atoms
  59. :param data: per atom data
  60. :param colnames: index for data
  61. :param cell: cell dimensions
  62. :param celldisp: origin shift
  63. :param pbc: periodic boundaries
  64. :param atomsobj: function to create ase-Atoms object
  65. :param order: sort atoms by id. Might be faster to turn off
  66. :param specorder: list of species to map lammps types to ase-species
  67. (usually .dump files to not contain type to species mapping)
  68. :param prismobj: Coordinate transformation between lammps and ase
  69. :type prismobj: Prism
  70. :param units: lammps units for unit transformation between lammps and ase
  71. :returns: Atoms object
  72. :rtype: Atoms
  73. """
  74. # data array of doubles
  75. ids = data[:, colnames.index("id")].astype(int)
  76. types = data[:, colnames.index("type")].astype(int)
  77. if order:
  78. sort_order = np.argsort(ids)
  79. ids = ids[sort_order]
  80. data = data[sort_order, :]
  81. types = types[sort_order]
  82. # reconstruct types from given specorder
  83. if specorder:
  84. types = [specorder[t - 1] for t in types]
  85. def get_quantity(labels, quantity=None):
  86. try:
  87. cols = [colnames.index(label) for label in labels]
  88. if quantity:
  89. return convert(data[:, cols], quantity, units, "ASE")
  90. return data[:, cols]
  91. except ValueError:
  92. return None
  93. # slice data block into columns
  94. # + perform necessary conversions to ASE units
  95. positions = get_quantity(["x", "y", "z"], "distance")
  96. scaled_positions = get_quantity(["xs", "ys", "zs"])
  97. velocities = get_quantity(["vx", "vy", "vz"], "velocity")
  98. charges = get_quantity(["q"], "charge")
  99. forces = get_quantity(["fx", "fy", "fz"], "force")
  100. # !TODO: how need quaternions be converted?
  101. quaternions = get_quantity(["c_q[1]", "c_q[2]", "c_q[3]", "c_q[4]"])
  102. # convert cell
  103. cell = convert(cell, "distance", units, "ASE")
  104. celldisp = convert(celldisp, "distance", units, "ASE")
  105. if prismobj:
  106. celldisp = prismobj.vector_to_ase(celldisp)
  107. cell = prismobj.update_cell(cell)
  108. if quaternions:
  109. out_atoms = Quaternions(
  110. symbols=types,
  111. positions=positions,
  112. cell=cell,
  113. celldisp=celldisp,
  114. pbc=pbc,
  115. quaternions=quaternions,
  116. )
  117. elif positions is not None:
  118. # reverse coordinations transform to lammps system
  119. # (for all vectors = pos, vel, force)
  120. if prismobj:
  121. positions = prismobj.vector_to_ase(positions, wrap=True)
  122. out_atoms = atomsobj(
  123. symbols=types,
  124. positions=positions,
  125. pbc=pbc,
  126. celldisp=celldisp,
  127. cell=cell
  128. )
  129. elif scaled_positions is not None:
  130. out_atoms = atomsobj(
  131. symbols=types,
  132. scaled_positions=scaled_positions,
  133. pbc=pbc,
  134. celldisp=celldisp,
  135. cell=cell,
  136. )
  137. if velocities is not None:
  138. if prismobj:
  139. velocities = prismobj.vector_to_ase(velocities)
  140. out_atoms.set_velocities(velocities)
  141. if charges is not None:
  142. out_atoms.set_initial_charges(charges)
  143. if forces is not None:
  144. if prismobj:
  145. forces = prismobj.vector_to_ase(forces)
  146. # !TODO: use another calculator if available (or move forces
  147. # to atoms.property) (other problem: synchronizing
  148. # parallel runs)
  149. calculator = SinglePointCalculator(out_atoms, energy=0.0, forces=forces)
  150. out_atoms.calc = calculator
  151. # process the extra columns of fixes, variables and computes
  152. # that can be dumped, add as additional arrays to atoms object
  153. for colname in colnames:
  154. # determine if it is a compute or fix (but not the quaternian)
  155. if (colname.startswith('f_') or colname.startswith('v_') or
  156. (colname.startswith('c_') and not colname.startswith('c_q['))):
  157. out_atoms.new_array(colname, get_quantity([colname]), dtype='float')
  158. return out_atoms
  159. def construct_cell(diagdisp, offdiag):
  160. """Help function to create an ASE-cell with displacement vector from
  161. the lammps coordination system parameters.
  162. :param diagdisp: cell dimension convoluted with the displacement vector
  163. :param offdiag: off-diagonal cell elements
  164. :returns: cell and cell displacement vector
  165. :rtype: tuple
  166. """
  167. xlo, xhi, ylo, yhi, zlo, zhi = diagdisp
  168. xy, xz, yz = offdiag
  169. # create ase-cell from lammps-box
  170. xhilo = (xhi - xlo) - abs(xy) - abs(xz)
  171. yhilo = (yhi - ylo) - abs(yz)
  172. zhilo = zhi - zlo
  173. celldispx = xlo - min(0, xy) - min(0, xz)
  174. celldispy = ylo - min(0, yz)
  175. celldispz = zlo
  176. cell = np.array([[xhilo, 0, 0], [xy, yhilo, 0], [xz, yz, zhilo]])
  177. celldisp = np.array([celldispx, celldispy, celldispz])
  178. return cell, celldisp
  179. def get_max_index(index):
  180. if np.isscalar(index):
  181. return index
  182. elif isinstance(index, slice):
  183. return index.stop if (index.stop is not None) else float("inf")
  184. def read_lammps_dump_text(fileobj, index=-1, **kwargs):
  185. """Process cleartext lammps dumpfiles
  186. :param fileobj: filestream providing the trajectory data
  187. :param index: integer or slice object (default: get the last timestep)
  188. :returns: list of Atoms objects
  189. :rtype: list
  190. """
  191. # Load all dumped timesteps into memory simultaneously
  192. lines = deque(fileobj.readlines())
  193. index_end = get_max_index(index)
  194. n_atoms = 0
  195. images = []
  196. while len(lines) > n_atoms:
  197. line = lines.popleft()
  198. if "ITEM: TIMESTEP" in line:
  199. n_atoms = 0
  200. line = lines.popleft()
  201. # !TODO: pyflakes complains about this line -> do something
  202. # ntimestep = int(line.split()[0]) # NOQA
  203. if "ITEM: NUMBER OF ATOMS" in line:
  204. line = lines.popleft()
  205. n_atoms = int(line.split()[0])
  206. if "ITEM: BOX BOUNDS" in line:
  207. # save labels behind "ITEM: BOX BOUNDS" in triclinic case
  208. # (>=lammps-7Jul09)
  209. # !TODO: handle periodic boundary conditions in tilt_items
  210. tilt_items = line.split()[3:]
  211. celldatarows = [lines.popleft() for _ in range(3)]
  212. celldata = np.loadtxt(celldatarows)
  213. diagdisp = celldata[:, :2].reshape(6, 1).flatten()
  214. # determine cell tilt (triclinic case!)
  215. if len(celldata[0]) > 2:
  216. # for >=lammps-7Jul09 use labels behind "ITEM: BOX BOUNDS"
  217. # to assign tilt (vector) elements ...
  218. offdiag = celldata[:, 2]
  219. # ... otherwise assume default order in 3rd column
  220. # (if the latter was present)
  221. if len(tilt_items) >= 3:
  222. sort_index = [tilt_items.index(i)
  223. for i in ["xy", "xz", "yz"]]
  224. offdiag = offdiag[sort_index]
  225. else:
  226. offdiag = (0.0,) * 3
  227. cell, celldisp = construct_cell(diagdisp, offdiag)
  228. # Handle pbc conditions
  229. if len(tilt_items) > 3:
  230. pbc = ["p" in d.lower() for d in tilt_items[3:]]
  231. else:
  232. pbc = (False,) * 3
  233. if "ITEM: ATOMS" in line:
  234. colnames = line.split()[2:]
  235. datarows = [lines.popleft() for _ in range(n_atoms)]
  236. data = np.loadtxt(datarows)
  237. out_atoms = lammps_data_to_ase_atoms(
  238. data=data,
  239. colnames=colnames,
  240. cell=cell,
  241. celldisp=celldisp,
  242. atomsobj=Atoms,
  243. pbc=pbc,
  244. **kwargs
  245. )
  246. images.append(out_atoms)
  247. if len(images) > index_end >= 0:
  248. break
  249. return images[index]
  250. def read_lammps_dump_binary(
  251. fileobj, index=-1, colnames=None, intformat="SMALLBIG", **kwargs
  252. ):
  253. """Read binary dump-files (after binary2txt.cpp from lammps/tools)
  254. :param fileobj: file-stream containing the binary lammps data
  255. :param index: integer or slice object (default: get the last timestep)
  256. :param colnames: data is columns and identified by a header
  257. :param intformat: lammps support different integer size. Parameter set \
  258. at compile-time and can unfortunately not derived from data file
  259. :returns: list of Atoms-objects
  260. :rtype: list
  261. """
  262. # depending on the chosen compilation flag lammps uses either normal
  263. # integers or long long for its id or timestep numbering
  264. # !TODO: tags are cast to double -> missing/double ids (add check?)
  265. tagformat, bigformat = dict(
  266. SMALLSMALL=("i", "i"), SMALLBIG=("i", "q"), BIGBIG=("q", "q")
  267. )[intformat]
  268. index_end = get_max_index(index)
  269. # Standard columns layout from lammpsrun
  270. if not colnames:
  271. colnames = ["id", "type", "x", "y", "z",
  272. "vx", "vy", "vz", "fx", "fy", "fz"]
  273. images = []
  274. # wrap struct.unpack to raise EOFError
  275. def read_variables(string):
  276. obj_len = struct.calcsize(string)
  277. data_obj = fileobj.read(obj_len)
  278. if obj_len != len(data_obj):
  279. raise EOFError
  280. return struct.unpack(string, data_obj)
  281. while True:
  282. try:
  283. # read header
  284. ntimestep, = read_variables("=" + bigformat)
  285. n_atoms, triclinic = read_variables("=" + bigformat + "i")
  286. boundary = read_variables("=6i")
  287. diagdisp = read_variables("=6d")
  288. if triclinic != 0:
  289. offdiag = read_variables("=3d")
  290. else:
  291. offdiag = (0.0,) * 3
  292. size_one, nchunk = read_variables("=2i")
  293. if len(colnames) != size_one:
  294. raise ValueError("Provided columns do not match binary file")
  295. # lammps cells/boxes can have different boundary conditions on each
  296. # sides (makes mainly sense for different non-periodic conditions
  297. # (e.g. [f]ixed and [s]hrink for a irradiation simulation))
  298. # periodic case: b 0 = 'p'
  299. # non-peridic cases 1: 'f', 2 : 's', 3: 'm'
  300. pbc = np.sum(np.array(boundary).reshape((3, 2)), axis=1) == 0
  301. cell, celldisp = construct_cell(diagdisp, offdiag)
  302. data = []
  303. for _ in range(nchunk):
  304. # number-of-data-entries
  305. n_data, = read_variables("=i")
  306. # retrieve per atom data
  307. data += read_variables("=" + str(n_data) + "d")
  308. data = np.array(data).reshape((-1, size_one))
  309. # map data-chunk to ase atoms
  310. out_atoms = lammps_data_to_ase_atoms(
  311. data=data,
  312. colnames=colnames,
  313. cell=cell,
  314. celldisp=celldisp,
  315. pbc=pbc,
  316. **kwargs
  317. )
  318. images.append(out_atoms)
  319. # stop if requested index has been found
  320. if len(images) > index_end >= 0:
  321. break
  322. except EOFError:
  323. break
  324. return images[index]