PageRenderTime 33ms CodeModel.GetById 20ms RepoModel.GetById 0ms app.codeStats 0ms

/ase/io/qbox.py

https://gitlab.com/mbarbry/ase
Python | 206 lines | 157 code | 16 blank | 33 comment | 14 complexity | cc310705e7d1d3a57ce99114b9fd20c9 MD5 | raw file
  1. """This module contains functions to read from QBox output files"""
  2. from ase import Atom, Atoms
  3. from ase.calculators.singlepoint import SinglePointCalculator
  4. import re
  5. import xml.etree.ElementTree as ET
  6. # Compile regexs for fixing XML
  7. re_find_bad_xml = re.compile(r'<(/?)([A-z]+) expectation ([a-z]+)')
  8. def read_qbox(f, index=-1):
  9. """Read data from QBox output file
  10. Inputs:
  11. f - str or fileobj, path to file or file object to read from
  12. index - int or slice, which frames to return
  13. Returns:
  14. list of Atoms or atoms, requested frame(s)
  15. """
  16. if isinstance(f, str):
  17. f = open(f, 'r')
  18. # Check whether this is a QB@all output
  19. version = None
  20. for line in f:
  21. if '<release>' in line:
  22. version = ET.fromstring(line)
  23. break
  24. if version is None:
  25. raise Exception('Parse Error: Version not found')
  26. is_qball = 'qb@LL' in version.text or 'qball' in version.text
  27. # Load in atomic species
  28. species = dict()
  29. if is_qball:
  30. # Read all of the lines between release and the first call to `run`
  31. species_data = []
  32. for line in f:
  33. if '<run' in line:
  34. break
  35. species_data.append(line)
  36. species_data = '\n'.join(species_data)
  37. # Read out the species information with regular expressions
  38. symbols = re.findall('symbol_ = ([A-Z][a-z]?)', species_data)
  39. masses = re.findall('mass_ = ([0-9.]+)', species_data)
  40. names = re.findall('name_ = ([a-z]+)', species_data)
  41. numbers = re.findall('atomic_number_ = ([0-9]+)', species_data)
  42. # Compile them into a dictionary
  43. for name, symbol, mass, number in zip(names, symbols, masses, numbers):
  44. spec_data = dict(
  45. symbol=symbol,
  46. mass=float(mass),
  47. number=float(number)
  48. )
  49. species[name] = spec_data
  50. else:
  51. # Find all species
  52. species_blocks = _find_blocks(f, 'species', '<cmd>run')
  53. for spec in species_blocks:
  54. name = spec.get('name')
  55. spec_data = dict(
  56. symbol=spec.find('symbol').text,
  57. mass=float(spec.find('mass').text),
  58. number=int(spec.find('atomic_number').text))
  59. species[name] = spec_data
  60. # Find all of the frames
  61. frames = _find_blocks(f, 'iteration', None)
  62. # If index is an int, return one frame
  63. if isinstance(index, int):
  64. return _parse_frame(frames[index], species)
  65. else:
  66. return [_parse_frame(frame, species) for frame in frames[index]]
  67. def _find_blocks(fp, tag, stopwords='[qbox]'):
  68. """Find and parse a certain block of the file.
  69. Reads a file sequentially and stops when it either encounters the
  70. end of the file, or until the it encounters a line that contains a
  71. user-defined string *after it has already found at least one
  72. desired block*. Use the stopwords ``[qbox]`` to read until the next
  73. command is issued.
  74. Groups the text between the first line that contains <tag> and the
  75. next line that contains </tag>, inclusively. The function then
  76. parses the XML and returns the Element object.
  77. Inputs:
  78. fp - file-like object, file to be read from
  79. tag - str, tag to search for (e.g., 'iteration').
  80. `None` if you want to read until the end of the file
  81. stopwords - str, halt parsing if a line containing this string
  82. is encountered
  83. Returns:
  84. list of xml.ElementTree, parsed XML blocks found by this class
  85. """
  86. start_tag = '<%s' % tag
  87. end_tag = '</%s>' % tag
  88. blocks = [] # Stores all blocks
  89. cur_block = [] # Block being filled
  90. in_block = False # Whether we are currently parsing
  91. for line in fp:
  92. # Check if the block has started
  93. if start_tag in line:
  94. if in_block:
  95. raise Exception('Parsing failed: Encountered nested block')
  96. else:
  97. in_block = True
  98. # Add data to block
  99. if in_block:
  100. cur_block.append(line)
  101. # Check for stopping conditions
  102. if stopwords is not None:
  103. if stopwords in line and len(blocks) > 0:
  104. break
  105. if end_tag in line:
  106. if in_block:
  107. blocks.append(cur_block)
  108. cur_block = []
  109. in_block = False
  110. else:
  111. raise Exception('Parsing failed: End tag found before start '
  112. 'tag')
  113. # Join strings in a block into a single string
  114. blocks = [''.join(b) for b in blocks]
  115. # Ensure XML compatibility. There are two specific tags in QBall that are
  116. # not valid XML, so we need to run a
  117. blocks = [re_find_bad_xml.sub(r'<\1\2_expectation_\3', b) for b in blocks]
  118. # Parse the blocks
  119. return [ET.fromstring(b) for b in blocks]
  120. def _parse_frame(tree, species):
  121. """Parse a certain frame from QBOX output
  122. Inputs:
  123. tree - ElementTree, <iteration> block from output file
  124. species - dict, data about species. Key is name of atom type,
  125. value is data about that type
  126. Return:
  127. Atoms object describing this iteration"""
  128. # Load in data about the system
  129. energy = float(tree.find("etotal").text)
  130. # Load in data about the cell
  131. unitcell = tree.find('atomset').find('unit_cell')
  132. cell = []
  133. for d in ['a', 'b', 'c']:
  134. cell.append([float(x) for x in unitcell.get(d).split()])
  135. stress_tree = tree.find('stress_tensor')
  136. if stress_tree is None:
  137. stresses = None
  138. else:
  139. stresses = [float(stress_tree.find('sigma_%s' % x).text)
  140. for x in ['xx', 'yy', 'zz', 'yz', 'xz', 'xy']]
  141. # Create the Atoms object
  142. atoms = Atoms(pbc=True, cell=cell)
  143. # Load in the atom information
  144. forces = []
  145. for atom in tree.find('atomset').findall('atom'):
  146. # Load data about the atom type
  147. spec = atom.get('species')
  148. symbol = species[spec]['symbol']
  149. mass = species[spec]['mass']
  150. # Get data about position / velocity / force
  151. pos = [float(x) for x in atom.find('position').text.split()]
  152. force = [float(x) for x in atom.find('force').text.split()]
  153. momentum = [float(x) * mass
  154. for x in atom.find('velocity').text.split()]
  155. # Create the objects
  156. atom = Atom(symbol=symbol, mass=mass, position=pos, momentum=momentum)
  157. atoms += atom
  158. forces.append(force)
  159. # Create the calculator object that holds energy/forces
  160. calc = SinglePointCalculator(atoms,
  161. energy=energy, forces=forces, stress=stresses)
  162. atoms.calc = calc
  163. return atoms