/visutils/data/prettifiers.py

https://github.com/vis-netlausnir/visutils · Python · 308 lines · 245 code · 24 blank · 39 comment · 142 complexity · 87221c042c67257317f6c706a9a7b041 MD5 · raw file

  1. # encoding=utf-8
  2. import re
  3. import decimal
  4. import datetime
  5. _SEQ_TYPES = [type(t()) for t in [dict, list]]
  6. class DataAttributeMissing(AttributeError):
  7. pass
  8. def strip_empty_xmlns(tree):
  9. '''
  10. Strips out empty xmlns tags
  11. '''
  12. if isinstance(tree, dict):
  13. marked = []
  14. for key in tree.keys():
  15. if isinstance(tree[key], dict) or isinstance(tree[key], list):
  16. tree[key] = strip_empty_xmlns(tree[key])
  17. elif tree[key] is None:
  18. tree.pop(key)
  19. elif key == 'xmlns' and len(tree[key]) == 0:
  20. tree.pop(key)
  21. for key in marked:
  22. del tree[key]
  23. if isinstance(tree, list):
  24. for i in range(len(tree)):
  25. if isinstance(tree[i], dict) or isinstance(tree[i], list):
  26. tree[i] = strip_empty_xmlns(tree[i])
  27. return tree
  28. def strip_xmlns(tree):
  29. '''
  30. Strips out empty xmlns tags
  31. '''
  32. if isinstance(tree, dict):
  33. marked = []
  34. for key in tree.keys():
  35. if isinstance(tree[key], dict) or isinstance(tree[key], list):
  36. tree[key] = strip_xmlns(tree[key])
  37. elif tree[key] is None:
  38. tree.pop(key)
  39. elif key == 'xmlns':
  40. tree.pop(key)
  41. for key in marked:
  42. del tree[key]
  43. if isinstance(tree, list):
  44. for i in range(len(tree)):
  45. if isinstance(tree[i], dict) or isinstance(tree[i], list):
  46. tree[i] = strip_xmlns(tree[i])
  47. return tree
  48. def collapse_singleton_dict_strings(tree):
  49. '''
  50. Checks for lists in the tree that contain single items and collapses
  51. them if the value of the pair is a string or unicode
  52. '''
  53. if isinstance(tree, dict):
  54. for key in tree.keys():
  55. if isinstance(tree[key], dict) and len(tree[key]) == 1:
  56. for _key in tree[key].keys():
  57. if isinstance(tree[key][_key], str) or isinstance(tree[key][_key], unicode):
  58. tree[key] = tree[key][_key]
  59. if isinstance(tree[key], dict) or isinstance(tree[key], list):
  60. tree[key] = collapse_singleton_dict_strings(tree[key])
  61. elif isinstance(tree, list) and len(tree) > 0:
  62. for i in range(len(tree)):
  63. if isinstance(tree[i], dict) and len(tree[i]) == 1:
  64. for _key in tree[i].keys():
  65. if isinstance(tree[i][_key], str) or isinstance(tree[i][_key], unicode):
  66. tree[i] = unicode(tree[i][_key])
  67. if isinstance(tree[i], dict) or isinstance(tree[i], list):
  68. tree[i] = collapse_singleton_dict_strings(tree[i])
  69. return tree
  70. def collapse_singleton_list_strings(tree):
  71. '''
  72. Checks for lists in the tree that contain single items and collapses
  73. them into a regular scalar value
  74. '''
  75. if isinstance(tree, dict):
  76. for key in tree.keys():
  77. if isinstance(tree[key], list) and len(tree[key]) == 1:
  78. tree[key] = tree[key][0]
  79. elif isinstance(tree[key], dict) or isinstance(tree[key], list):
  80. tree[key] = collapse_singleton_list_strings(tree[key])
  81. if isinstance(tree, list):
  82. for i in range(len(tree)):
  83. if isinstance(tree[i], list) and len(tree[i]) == 1:
  84. tree[i] = tree[i][0]
  85. elif isinstance(tree[i], dict) or isinstance(tree[i], list):
  86. tree[i] = collapse_singleton_list_strings(tree[i])
  87. return tree
  88. def convert_empty_dict_to_string(tree):
  89. '''
  90. Checks for dicts in the dataset that are empty and converts them
  91. to empty string values in their parent dict under the same key
  92. '''
  93. if isinstance(tree, dict):
  94. for key in tree.keys():
  95. if isinstance(tree[key], dict) and len(tree[key]) == 0:
  96. tree[key] = ''
  97. elif isinstance(tree[key], dict) or isinstance(tree[key], list):
  98. tree[key] = convert_empty_dict_to_string(tree[key])
  99. if isinstance(tree, list):
  100. for i in range(len(tree)):
  101. if isinstance(tree[i], dict) and len(tree[i]) == 0:
  102. tree[i] = ''
  103. elif isinstance(tree[i], dict) or isinstance(tree[i], list):
  104. tree[i] = convert_empty_dict_to_string(tree[i])
  105. return tree
  106. def _parse_native_type(value, types=list(), function=None):
  107. if function is not None:
  108. return function(value)
  109. for t in types:
  110. try:
  111. if t.__name__ == 'datetime':
  112. try:
  113. return datetime.datetime.strptime(str(value), "%d.%m.%Y %H:%M")
  114. except:
  115. try:
  116. return datetime.datetime.strptime(str(value), "%d.%m.%Y")
  117. except:
  118. continue
  119. return t(value)
  120. except:
  121. continue
  122. return value
  123. SKIP_KEYS = ['ssid','@ssid','Persidno','id','policyNumber','ownerSSN']
  124. def parse_native_types(tree, types=list(), functions=dict()):
  125. '''
  126. Checks if string values can be converted to native types and does so.
  127. If types is empty, the method defaults to
  128. [str, long, int, decimal.Decimal, datetime.datetime] (in that order)
  129. Decimal is used in preference to float as it is a precise value as
  130. opposed to binary floating points.
  131. If values test as strings, the datetime parsing is attempted.
  132. Forced parsing of a specific type can be attempted by passing only
  133. that type. Note however that this might not be safe and probably
  134. better attempted with the native parser of that type.
  135. '''
  136. if not len(types):
  137. types = [long, int, decimal.Decimal, datetime.datetime]
  138. if isinstance(tree, dict):
  139. for key in tree.keys():
  140. if key in SKIP_KEYS: continue
  141. if type(tree[key]) in _SEQ_TYPES:
  142. parse_native_types(tree[key], types=types, functions=functions)
  143. elif key in functions.keys():
  144. tree[key] = _parse_native_type(tree[key], function=functions[key])
  145. else:
  146. tree[key] = _parse_native_type(tree[key], types=types)
  147. elif isinstance(tree, list):
  148. for i in range(len(tree)):
  149. if type(tree[i]) in _SEQ_TYPES:
  150. parse_native_types(tree[i], types=types, functions=functions)
  151. else:
  152. tree[i] = _parse_native_type(tree[i], types=types)
  153. return tree
  154. def embed_hash_tags(tree):
  155. '''
  156. Checks if dict keys start with '#' and changes it to lstrip('#')
  157. If there is another key with the same name (without the '#') no
  158. action is taken.
  159. '''
  160. if isinstance(tree, dict):
  161. marked = []
  162. for key in tree.keys():
  163. _key = unicode(key).lstrip('#')
  164. if unicode(key).startswith('#') and _key not in tree.keys():
  165. tree[_key] = tree[key]
  166. marked.append(key)
  167. if isinstance(tree[_key], dict) or isinstance(tree[_key], list):
  168. tree[_key] = embed_hash_tags(tree[_key])
  169. for key in marked:
  170. del tree[key]
  171. elif isinstance(tree, list):
  172. for i in range(len(tree)):
  173. if isinstance(tree[i], dict) or isinstance(tree[i], list):
  174. tree[i] = embed_hash_tags(tree[i])
  175. return tree
  176. def embed_at_tags(tree):
  177. '''
  178. Checks if dict keys start with '@' and changes it to lstrip('@')
  179. If there is another key with the same name (without the '@') no
  180. action is taken.
  181. '''
  182. if isinstance(tree, dict):
  183. marked = []
  184. for key in tree.keys():
  185. _key = unicode(key).lstrip('@')
  186. if unicode(key).startswith('@') and _key not in tree.keys():
  187. tree[_key] = tree[key]
  188. marked.append(key)
  189. if isinstance(tree[_key], dict) or isinstance(tree[_key], list):
  190. embed_at_tags(tree[_key])
  191. for key in marked:
  192. del tree[key]
  193. elif isinstance(tree, list):
  194. for i in range(len(tree)):
  195. if isinstance(tree[i], dict) or isinstance(tree[i], list):
  196. tree[i] = embed_at_tags(tree[i])
  197. return tree
  198. _non_id_char = re.compile('[^_0-9a-zA-Z]')
  199. class _SafeObject(object):
  200. def __init__(self):
  201. pass
  202. def __unicode__(self):
  203. return None
  204. def __str__(self):
  205. return None
  206. def __getattr__(self, item):
  207. try:
  208. return object.__getattribute__(self, item)
  209. except:
  210. return _SafeObject()
  211. class BaseObject(object):
  212. def __getattribute__(self, item):
  213. return object.__getattribute__(self, item)
  214. def __getattr__(self, item):
  215. if item in self.__dict__.keys():
  216. return object.__getattribute__(self, item)
  217. else:
  218. concat_keys = lambda sep, keys: sep.join(key for key in keys)
  219. raise DataAttributeMissing(
  220. "Attribute {key} does not exist. Possible choices are: {keys}".format(key=item,
  221. keys=concat_keys(', ',
  222. self.__dict__.keys()))
  223. )
  224. def __setattr__(self, item, value):
  225. item = _non_id_char.sub('', item)
  226. object.__setattr__(self, item, value)
  227. def __unicode__(self):
  228. if hasattr(self, 'text') and type(getattr(self, 'text')) in [type(t()) for t in [str, unicode]]:
  229. return getattr(self, 'text')
  230. elif hasattr(self, 'Text') and type(getattr(self, 'Text')) in [type(t()) for t in [str, unicode]]:
  231. return getattr(self, 'Text')
  232. return self.__class__.__name__
  233. def __str__(self):
  234. if hasattr(self, 'text') and type(getattr(self, 'text')) in [type(t()) for t in [str, unicode]]:
  235. return getattr(self, 'text')
  236. elif hasattr(self, 'Text') and type(getattr(self, 'Text')) in [type(t()) for t in [str, unicode]]:
  237. return getattr(self, 'Text')
  238. return self.__class__.__name__
  239. def _name_mangle(self, name):
  240. return _non_id_char.sub('', name)
  241. def __repr__(self):
  242. fields_string = ", ".join(sorted(self.__dict__.keys()))
  243. return u"<BaseObject: {fields}>".format(fields=fields_string)
  244. def objectify_tree(tree, collations=dict(), parent=''):
  245. '''
  246. Takes a dict and makes an object from it. Note that if this is called
  247. as a prettifier, it will not return a dict.
  248. If parent is set keys in the collations dict can be of the form 'parent.child'
  249. and the collations matcher will include the term in searches.
  250. '''
  251. ret = BaseObject()
  252. if isinstance(tree, dict):
  253. for key in tree.keys():
  254. if key in collations.keys() or parent+'.'+key in collations.keys():
  255. colKey = key
  256. if parent+'.'+key in collations.keys():
  257. colKey = parent+'.'+key
  258. if not hasattr(ret, collations[colKey]):
  259. setattr(ret, collations[colKey], list())
  260. if isinstance(tree[key], dict):
  261. getattr(ret, collations[colKey]).append(objectify_tree(tree[key], collations=collations, parent=key))
  262. elif isinstance(tree[key], list):
  263. for item in tree[key]:
  264. if isinstance(item, list) or isinstance(item, dict):
  265. getattr(ret, collations[colKey]).append(
  266. objectify_tree(item, collations=collations, parent=key)
  267. )
  268. else:
  269. getattr(ret, collations[colKey]).append(item)
  270. else:
  271. setattr(ret, collations[colKey], [tree[key]])
  272. else:
  273. if isinstance(tree[key], dict):
  274. setattr(ret, key, objectify_tree(tree[key], collations=collations, parent=key))
  275. elif isinstance(tree[key], list):
  276. setattr(ret, key, list())
  277. for item in tree[key]:
  278. if isinstance(item, list) or isinstance(item, dict):
  279. getattr(ret, key).append(objectify_tree(item, collations=collations, parent=key))
  280. else:
  281. getattr(ret, key).append(item)
  282. else:
  283. setattr(ret, key, tree[key])
  284. return ret