PageRenderTime 47ms CodeModel.GetById 20ms RepoModel.GetById 0ms app.codeStats 0ms

/TemaLib/tema/eini/einiparser.py

https://github.com/jaaskel9/tema-tg
Python | 254 lines | 230 code | 4 blank | 20 comment | 9 complexity | 6836c69779d85da5749e969893af9915 MD5 | raw file
  1. # Copyright (c) 2006-2010 Tampere University of Technology
  2. #
  3. # Permission is hereby granted, free of charge, to any person obtaining
  4. # a copy of this software and associated documentation files (the
  5. # "Software"), to deal in the Software without restriction, including
  6. # without limitation the rights to use, copy, modify, merge, publish,
  7. # distribute, sublicense, and/or sell copies of the Software, and to
  8. # permit persons to whom the Software is furnished to do so, subject to
  9. # the following conditions:
  10. #
  11. # The above copyright notice and this permission notice shall be
  12. # included in all copies or substantial portions of the Software.
  13. #
  14. # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
  15. # EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  16. # MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
  17. # NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
  18. # LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
  19. # OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
  20. # WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
  21. test="""
  22. # result['action']['1']['name'] == 'first action'
  23. # result['property']['name']['value'] == 'test_of_new_file_format'
  24. [pr/*
  25. */operty:value]
  26. name: test_of_new_file_format
  27. initial_state: 1
  28. """
  29. import re
  30. # ??? this DIFFERS from EINI-BNF:
  31. # NAME is allowed to start with a number and _, because otherwise
  32. # [action:action_name]
  33. # 1: spam
  34. # would produce an error ("1" is illegal name)
  35. NAME='[a-zA-Z_0-9]\w*'
  36. PURE_ESEP=':' # entity separator
  37. PURE_FSEP=',' # field separator
  38. OPENBR = '\s*\[\s*'
  39. CLOSEBR = '\s*\]\s*'
  40. ESEP='\s*'+PURE_ESEP+'\s*'
  41. FSEP='\s*'+PURE_FSEP+'\s*'
  42. FSEPre = re.compile(FSEP)
  43. # the following elements are directly from EINI-BNF
  44. # no_field_header: [ entityname ]
  45. NO_FIELD_HEADER = re.compile(OPENBR+'('+NAME+')'+CLOSEBR+'$')
  46. # list field header: [entityname:fieldname[]]
  47. LIST_FIELD_HEADER = re.compile(OPENBR+'('+NAME+')'+ESEP+'('+NAME+')'+OPENBR+CLOSEBR+CLOSEBR+'$')
  48. # str field header: [entityname: field1, field2, field3]
  49. STR_FIELD_HEADER = re.compile(OPENBR+'('+NAME+')'+'('+ESEP+'('+NAME+')('+FSEP+'('+NAME+'))*)?'+CLOSEBR+'$')
  50. INSTANCE_LIST = re.compile('\s*('+NAME+')\s*('+FSEP+NAME+')*\s*$')
  51. # <STR_FIELD_DEFINITION> ::= <INSTANCE_NAME> ':' <STR_VALUE> ( ',' <STR_VALUE> )* <ENDL>
  52. # <LIST_FIELD_DEFINITION>::= <INSTANCE_NAME> ':' [ <STR_VALUE> ( ',' <STR_VALUE> )* ] <ENDL>
  53. X_FIELD_DEFINITION = re.compile('\s*('+NAME+')'+ESEP+'(.*)$')
  54. class EiniParserException(Exception):
  55. pass
  56. EPE_typemismatch="Type mismatch: field '%s' had type '%s', cannot make it '%s'."
  57. EPE_sfderror="STR_FIELD_DEFINITION expected."
  58. EPE_field_numbers="Data row should have %s fields. Found %s."
  59. EPE_syntax_error="Syntax error."
  60. comment_hash = re.compile("#[^\n]*")
  61. comment_multiline = re.compile("/\*.*?\*/",re.DOTALL) # TODO! test '/' in /* */
  62. def error(line,errmsg):
  63. raise EiniParserException("Eini parser error\n\ton line: '%s'\n\t%s"
  64. % (line,errmsg))
  65. def remove_comments(s):
  66. i=0
  67. len_s=len(s)
  68. results=[]
  69. while i < len_s:
  70. if s[i]=='#':
  71. i+=1
  72. while (i<len_s) and (s[i]!='\n'): i+=1
  73. elif s[i:i+2]=='/*':
  74. i+=2
  75. while (i<len_s) and (s[i:i+2]!='*/'): i+=1
  76. if s[i:i+2]=='*/': i+=2
  77. else:
  78. results.append(s[i])
  79. i+=1
  80. return "".join(results)
  81. def escape(s):
  82. s=s.replace('\\#',chr(2)+'EINIESCAPED_hash'+chr(3))
  83. s=s.replace('\\/*',chr(2)+'EINIESCAPED_region'+chr(3))
  84. s=s.replace('\\,',chr(2)+'EINIESCAPED_comma'+chr(3))
  85. s=s.replace('\\\\',chr(2)+'EINIESCAPED_backslash'+chr(3))
  86. s=s.replace('\\ ',chr(2)+'EINIESCAPED_space'+chr(3))
  87. s=s.replace('\\0',chr(2)+'EINIESCAPED_emptystring'+chr(3))
  88. s=s.replace('\\N/A',chr(2)+'EINIESCAPED_N/A'+chr(3))
  89. return s
  90. def unescape(s):
  91. if s==chr(2)+'EINIESCAPED_N/A'+chr(3):
  92. return None
  93. elif chr(2)+'EINIESCAPED_N/A'+chr(3) in s:
  94. raise error('\\N/A should be alone.',
  95. EPE_syntax_error)
  96. else:
  97. s=s.replace(chr(2)+'EINIESCAPED_hash'+chr(3),'#')
  98. s=s.replace(chr(2)+'EINIESCAPED_region'+chr(3),'/*')
  99. s=s.replace(chr(2)+'EINIESCAPED_comma'+chr(3),',')
  100. s=s.replace(chr(2)+'EINIESCAPED_backslash'+chr(3),'\\')
  101. s=s.replace(chr(2)+'EINIESCAPED_space'+chr(3),' ')
  102. s=s.replace(chr(2)+'EINIESCAPED_emptystring'+chr(3),'')
  103. return s
  104. def cleanstr(s):
  105. return unescape(s.strip())
  106. class Entity(dict):
  107. def __init__(self,*a,**kw):
  108. dict.__init__(self,*a,**kw)
  109. self._fieldname_fieldtype_dict={}
  110. self._fields=self._fieldname_fieldtype_dict # FIX TODO: SHORT NAME FOR DEBUGGING
  111. def fields(self):
  112. return self._fieldname_fieldtype_dict
  113. class Parser(object):
  114. def _complete(self,contents):
  115. # adds missing fields with data value None to the elements
  116. # which do not have fields
  117. c=contents
  118. for ent in c:
  119. ent_fields=c[ent].fields()
  120. for dkey in c[ent]:
  121. for f in ent_fields:
  122. if c[ent][dkey]==None: c[ent][dkey]={}
  123. if not f in c[ent][dkey]: c[ent][dkey][f]=None
  124. def parse(self,fileobj):
  125. def add_field(entityname,fieldname,fieldtype,d):
  126. """adds field to dictionary d of an entity"""
  127. if not entityname in d:
  128. d[entityname]=Entity()
  129. if fieldname==None: return
  130. if not fieldname in d[entityname]._fields:
  131. d[entityname]._fields[fieldname]=fieldtype
  132. else: # type already defined for the field, it must stay same
  133. if d[entityname]._fields[fieldname]!=fieldtype:
  134. error('',EPE_typemismatch
  135. % (fieldname,d[entityname]._fields[fieldname],fieldtype))
  136. # end of add_field
  137. result={}
  138. uncommented = remove_comments(escape(fileobj.read()))
  139. current_entity=None
  140. current_field_type=None
  141. current_fields=[]
  142. for line in uncommented.split('\n'):
  143. line=line.strip()
  144. if line=="": continue
  145. m=NO_FIELD_HEADER.match(line)
  146. if m:
  147. current_entity=m.group(1)
  148. current_field_type=None
  149. current_fields=[]
  150. add_field(current_entity,None,None,result)
  151. continue
  152. m=LIST_FIELD_HEADER.match(line)
  153. if m:
  154. current_entity=m.group(1)
  155. current_field_type=list
  156. current_fields=[m.group(2)]
  157. add_field(current_entity,m.group(2),current_field_type,result)
  158. continue
  159. m=STR_FIELD_HEADER.match(line)
  160. if m:
  161. current_entity=m.group(1)
  162. current_field_type=str
  163. current_fields=[]
  164. if m.group(2)==None: # no fields: [only_entity]
  165. current_fields=[]
  166. add_field(current_entity,None,None,result)
  167. else: # at least one field: [entity:field1,field2]
  168. for fieldspec in m.group(2)[1:].split(PURE_FSEP):
  169. this_field=fieldspec.strip()
  170. current_fields.append(fieldspec.strip())
  171. add_field(current_entity,this_field,str,result)
  172. del this_field
  173. if not current_entity in result: result[current_entity]={}
  174. continue
  175. m=INSTANCE_LIST.match(line)
  176. if m:
  177. if current_field_type!=None:
  178. error(line,EPE_syntax_error)
  179. for fieldspec in m.group(0).split(PURE_FSEP):
  180. this_field=fieldspec.strip()
  181. if not this_field in result[current_entity]:
  182. result[current_entity][this_field]={}
  183. continue
  184. m=X_FIELD_DEFINITION.match(line)
  185. if m:
  186. data_key=m.group(1).strip()
  187. if not data_key in result[current_entity]:
  188. result[current_entity][data_key]={}
  189. if current_field_type==str:
  190. # try to read STR_FIELD_DEFINITION
  191. if m.group(2).strip()=="":
  192. error(line,EPE_sfderror)
  193. field_data=FSEPre.split(m.group(2))
  194. if len(field_data)!=len(current_fields):
  195. error(line,EPE_field_numbers % (len(current_fields),
  196. len(field_data)))
  197. for i,value in enumerate(field_data):
  198. result[current_entity][data_key][current_fields[i]]=cleanstr(value)
  199. continue
  200. if current_field_type==list:
  201. # try to read LIST_FIELD_DEFINITION
  202. if not current_fields[0] in result[current_entity][data_key]:
  203. result[current_entity][data_key][current_fields[0]]=[]
  204. field_data=FSEPre.split(m.group(2))
  205. for value in field_data:
  206. if value.strip()=='': continue
  207. result[current_entity][data_key][current_fields[0]].append(
  208. cleanstr(value))
  209. continue
  210. assert("code should have continued before this line"==0)
  211. error(line,EPE_syntax_error)
  212. self._complete(result)
  213. return result