PageRenderTime 41ms CodeModel.GetById 10ms RepoModel.GetById 0ms app.codeStats 0ms

/caaba/mecca/check_eqns.py

https://gitlab.com/RolfSander/caaba-mecca
Python | 272 lines | 247 code | 10 blank | 15 comment | 8 complexity | 648d6196e9c94a41a16d04ce4bf0cfcf MD5 | raw file
  1. #!/usr/bin/env python3
  2. # -*- coding: utf-8 -*- Time-stamp: <2020-09-18 13:23:59 sander>
  3. # The python script check_eqns.py analyses the reactions present
  4. # in a *.eqn file, assessing the conservation of the chemical elements
  5. # as given in a *.spc file. Reactions found to be non-conserving are
  6. # written to standard output along with a report of the elemental
  7. # imbalance in each case.
  8. # authors:
  9. # based on original perl code from Tim Butler (2008)
  10. # rewritten from scratch in python: Rolf Sander, Mainz (2018-2020)
  11. # Usage:
  12. # - check_eqns.py is normally executed via xmecca
  13. import sys, os
  14. assert sys.version_info >= (3, 6)
  15. import re
  16. CAABADIR = os.path.realpath(os.path.dirname(__file__)+'/..')
  17. sys.path.insert(1, os.path.realpath(CAABADIR+'/pycaaba'))
  18. HLINE = '-' * 78
  19. DEBUG = False #True
  20. ##############################################################################
  21. def num_var(chunk):
  22. # split a chunk into number and variable, e.g.: 0.7H2O2 -> 0.7, H2O2
  23. search_result = re.search('([0-9.]*)(.*)', chunk)
  24. number = search_result.group(1)
  25. if (number):
  26. number = float(number)
  27. else:
  28. number = float(1)
  29. variable = search_result.group(2).strip()
  30. return number, variable
  31. ##############################################################################
  32. def get_elements_spc(spcfilename):
  33. elements = {} # create a dictionary
  34. if DEBUG: print(HLINE)
  35. SPCFILE = open(spcfilename, 'r', encoding='utf-8')
  36. for line in iter(SPCFILE): # loop over *.spc file
  37. line = line.strip() # remove leading and trailing whitespace
  38. search_result = re.search('^ *([A-z0-9_#]+) *=([A-z0-9+ #]+);', line)
  39. if (not search_result): # skip lines that do not define a species
  40. if DEBUG: print('NO: |%s|' % (line))
  41. continue # proceed with next line in *.spc file
  42. if DEBUG: print(HLINE)
  43. species = search_result.group(1)
  44. elements[species] = {} # create a sub-dictionary for this species
  45. spc_composition = search_result.group(2).replace(' ','') # rm whitespace
  46. if DEBUG: print('|%s|' % (line))
  47. if DEBUG: print('|%s|' % (search_result.group()))
  48. if DEBUG: print('%20s = ' % (species), end='')
  49. if DEBUG: print('|%s|' % (spc_composition))
  50. for chunk in spc_composition.split('+'): # split into atoms
  51. count, element = num_var(chunk) # separate stoichiometric factor
  52. if DEBUG: print('%g * %s + ' % (count, element))
  53. elements[species][element] = count # add element count to dictionary
  54. SPCFILE.close()
  55. if DEBUG: print(HLINE)
  56. if DEBUG: print(elements)
  57. return elements
  58. ##############################################################################
  59. def analyze_eqn(elements, eqnfilename, neweqnfilename):
  60. NEWEQNFILE = open(neweqnfilename,'w', encoding='utf-8')
  61. exitstatus = 0
  62. eqntag_list = []
  63. EQNFILE = open(eqnfilename, 'r', encoding='utf-8')
  64. regexp = re.compile('^<(.*)>(.*)=(.*):(.*);(.*)$')
  65. for line0 in iter(EQNFILE): # loop over *.eqn file
  66. # find a label in current line:
  67. labels = re.findall('{%([^}]*)}', line0)
  68. # create a dictionary for mass balance:
  69. massbal = {}
  70. # check for tab characters:
  71. if ('\t' in line0):
  72. exitstatus = exitstatus | 0b0001000 # set 4th bit for TAB error
  73. print('ERROR: Illegal TAB character detected:')
  74. showtab = line0.replace('\t',r'\t')
  75. print(f' {showtab}')
  76. # remove all {...} comments and final \n:
  77. line = re.sub('{[^}]*}','',line0).strip()
  78. reaction = regexp.search(line) # check if line contains a reaction
  79. if reaction:
  80. labels = labels[0] # convert singleton list to scalar
  81. element_set = set()
  82. eqntag = reaction.group(1).strip() # e.g.: <G2111>
  83. reactants_str = reaction.group(2).strip() # e.g.: H2O + O1D
  84. products_str = reaction.group(3).strip() # e.g.: 2 OH
  85. display_reaction = re.sub(
  86. ' +', ' ', f'<{eqntag}> {reactants_str} -> {products_str}')
  87. rate = reaction.group(4).strip() # e.g.: 1.63E-10*EXP(60./temp)
  88. extra = reaction.group(5).strip() # e.g.: // products assumed
  89. reactants = reactants_str.split('+')
  90. products = products_str.split('+')
  91. all_species = reactants + products
  92. # ----------------------------------------------------------------
  93. if DEBUG: print(' eqntag: <%s>' % (eqntag))
  94. nC = 0
  95. # analyze products first:
  96. if DEBUG: print(' products: %s' % (products_str))
  97. for chunk in products:
  98. chunk = chunk.replace(' ','') # rm whitespace
  99. stoic, product = num_var(chunk) # separate stoichiometric factor
  100. if DEBUG: print('|%s| |%s| |%s|' % (stoic, product, elements[product]))
  101. nC_product = elements[product].get('C')
  102. if nC_product: nC = max(nC, int(nC_product))
  103. # add all atoms from current product to mass balance:
  104. for key, value in elements[product].items():
  105. if (key=='Min'): # subtract negative charge from Pls
  106. key = 'Pls'
  107. value = -value
  108. if key in massbal:
  109. massbal[key] += stoic * value
  110. else:
  111. massbal[key] = stoic * value
  112. # based on the elements in the reaction, define the section where it should be:
  113. elem_sect = '0'
  114. if ('O' in massbal): elem_sect = '1'
  115. if ('H' in massbal): elem_sect = '2'
  116. if ('N' in massbal): elem_sect = '3'
  117. if ('C' in massbal): elem_sect = '4'
  118. if ('F' in massbal): elem_sect = '5'
  119. if ('Cl' in massbal): elem_sect = '6'
  120. if ('Br' in massbal): elem_sect = '7'
  121. if ('I' in massbal): elem_sect = '8'
  122. if ('S' in massbal): elem_sect = '9'
  123. if ('Hg' in massbal): elem_sect = '10'
  124. if ('Fe' in massbal): elem_sect = '11'
  125. if (eqntag.startswith('J0')): elem_sect = '0' # e* reactions
  126. # analyze reactants next:
  127. if DEBUG: print(' reactants: %s' % (reactants_str))
  128. for chunk in reactants:
  129. chunk = chunk.replace(' ','') # rm whitespace
  130. if (chunk=='hv'): continue # ignore pseudo-reactant hv
  131. stoic, reactant = num_var(chunk) # separate stoichiometric factor
  132. if DEBUG: print('|%s| |%s| |%s|' % (stoic, reactant, elements[reactant]))
  133. nC_reactant = elements[reactant].get('C')
  134. if nC_reactant: nC = max(nC, int(nC_reactant))
  135. # subtract all atoms from current reactant from mass balance:
  136. for key, value in list(elements[reactant].items()):
  137. if (key=='Min'): # subtract negative charge from Pls
  138. key = 'Pls'
  139. value = -value
  140. if key in massbal:
  141. massbal[key] -= stoic * value
  142. else:
  143. massbal[key] = -stoic * value
  144. # write errorstring if mass balance is not correct:
  145. errorstring = ''
  146. for element, balance in sorted(massbal.items()):
  147. # activate the following lines to ignore specific elements:
  148. if (element.upper()=='IGNORE'): continue
  149. if (element=='H'): continue
  150. if (element=='O'): continue
  151. if (abs(balance)>1E-14):
  152. errorstring = '%s %+g %s,' % (errorstring, balance, element)
  153. if (errorstring):
  154. exitstatus = exitstatus | 0b0000100 # set 3rd bit for mass balance error
  155. print(f'ERROR: Incorrect mass balance: {errorstring.rstrip(",")}')
  156. print(f' {display_reaction}\n')
  157. if DEBUG: print(' rate: %s' % (rate))
  158. if DEBUG: print(' extra: %s' % (extra))
  159. # ----------------------------------------------------------------
  160. # check if reactions are in correct section:
  161. section = re.search('^[A-z]*([0-9]+)', eqntag).group(1) # number in eqntag
  162. if not section.startswith(elem_sect):
  163. exitstatus = exitstatus | 0b1000000 # set 7th bit for section error
  164. print(f'ERROR: Incorrect section: Reaction <{eqntag}> should ' +
  165. f'be in section {elem_sect}:\n {display_reaction}\n')
  166. # ----------------------------------------------------------------
  167. # check carboncount for reactions A4*, G4*, J4* and PH4*:
  168. if((eqntag[0:2]=='A4') or (eqntag[0:2]=='G4') or
  169. (eqntag[0:2]=='J4') or (eqntag[0:3]=='PH4')):
  170. if (eqntag[0:3]=='PH4'):
  171. nC2 = int(eqntag[3:4]) # PH4* is one character longer than others
  172. else:
  173. nC2 = int(eqntag[2:3])
  174. # 2nd digit of eqntag is 0 for >= 10 C atoms
  175. if ((nC2!=nC) and (nC2!=0 or nC<10)):
  176. exitstatus = exitstatus | 0b0000010 # set 2nd bit for carboncount error
  177. print('ERROR: Incorrect carbon count: The largest species in ' +
  178. f'<{eqntag}> should have {nC2} C atoms, not {nC}:')
  179. print(f' {display_reaction}\n')
  180. # ----------------------------------------------------------------
  181. # add_element_labels:
  182. for species in all_species:
  183. # remove leading and trailing whitespace:
  184. species = species.strip()
  185. # remove stoichiometric factor:
  186. species = re.sub('^[0-9.]* *','',species)
  187. # remove hv:
  188. if (species=='hv'):
  189. continue
  190. # find all elements in this species:
  191. for elem, value in elements[species].items():
  192. if (elem=='C' and value==1):
  193. continue
  194. if elem in ['H', 'Pls', 'Min', 'IGNORE', 'O']:
  195. continue
  196. element_set.add(elem)
  197. elem_labels = ''
  198. labellist = re.findall('[A-Z][a-z0-9]*', labels)
  199. for elem in sorted(element_set):
  200. # check if element label was already in list of labels:
  201. if (elem in labellist):
  202. exitstatus = exitstatus | 0b0000001 # set 1st bit for element label error
  203. print('ERROR: Element labels are generated automatically, ' +
  204. 'do not insert them manually:')
  205. print(f' Remove {elem} in {labels} in the reaction')
  206. print(f' {display_reaction}\n')
  207. # concatenate all element labels from current reaction:
  208. elem_labels += elem
  209. newline = line0.replace('{%'+labels+'}','{%'+labels+elem_labels+'}')
  210. print(newline.rstrip(('\n')), file=NEWEQNFILE)
  211. # ----------------------------------------------------------------
  212. # check length of eqntag:
  213. if (len(eqntag)>27):
  214. exitstatus = exitstatus | 0b0010000 # set 5th bit for eqntag length error
  215. print('ERROR: Equation tag must not be longer than (about) 27 characters:')
  216. print(f' <{eqntag}>\n')
  217. # ----------------------------------------------------------------
  218. # check for duplicate eqntags:
  219. if eqntag in eqntag_list:
  220. exitstatus = exitstatus | 0b0100000 # set 6th bit for duplicate eqntag error
  221. print('ERROR: Duplicate equation tag:')
  222. print(f' <{eqntag}>\n')
  223. else:
  224. eqntag_list.append(eqntag)
  225. # ----------------------------------------------------------------
  226. else:
  227. # current line is not a reaction:
  228. print(line0.rstrip('\n'), file=NEWEQNFILE)
  229. if DEBUG: print('NO: %s' % (line))
  230. EQNFILE.close()
  231. NEWEQNFILE.close()
  232. return exitstatus
  233. ##############################################################################
  234. def check_eqns(spcfilename, eqnfilename, neweqnfilename):
  235. # get elemental composition from *.spc file:
  236. elements = get_elements_spc(spcfilename)
  237. # analyze *.eqn file:
  238. exitstatus = analyze_eqn(elements, eqnfilename, neweqnfilename)
  239. return exitstatus
  240. ##############################################################################
  241. if __name__ == '__main__':
  242. if len(sys.argv) > 3:
  243. spcfilename = sys.argv[1]
  244. eqnfilename = sys.argv[2]
  245. neweqnfilename = sys.argv[3]
  246. else:
  247. sys.exit('ERROR: provide spcfilename, eqnfilename and neweqnfilename')
  248. if DEBUG: print(f'spcfile = {spcfilename}')
  249. if DEBUG: print(f'eqnfile = {eqnfilename}')
  250. exitstatus = check_eqns(spcfilename, eqnfilename, neweqnfilename)
  251. if DEBUG: print(f'exitstatus = {exitstatus}')
  252. sys.exit(exitstatus)
  253. ##############################################################################