PageRenderTime 52ms CodeModel.GetById 23ms RepoModel.GetById 1ms app.codeStats 0ms

/serialize_tools.py

https://bitbucket.org/mariusmagureanu/vescu
Python | 304 lines | 277 code | 17 blank | 10 comment | 16 complexity | 86e917b4c50622692fe9d3307cdd5d34 MD5 | raw file
  1. # -*- coding: utf-8 -*-
  2. import codecs
  3. try:
  4. codecs.lookup_error('surrogateescape')
  5. default_errors = 'surrogateescape'
  6. except LookupError:
  7. default_errors = 'strict'
  8. try:
  9. from StringIO import StringIO as BytesIO
  10. except ImportError:
  11. from io import BytesIO as BytesIO
  12. try:
  13. unicode
  14. except NameError:
  15. # Python 3
  16. unicode = str
  17. basestring = (bytes, str)
  18. try:
  19. long
  20. except NameError:
  21. # Python 3
  22. long = int
  23. try:
  24. xrange
  25. except NameError:
  26. xrange = range
  27. __all__ = ('phpobject', 'convert_member_dict', 'dict_to_list', 'dict_to_tuple',
  28. 'load', 'loads', 'dump', 'dumps', 'serialize', 'unserialize')
  29. def _translate_member_name(name):
  30. if name[:1] == ' ':
  31. name = name.split(None, 2)[-1]
  32. return name
  33. class phpobject(object):
  34. """Simple representation for PHP objects. This is used """
  35. __slots__ = ('__name__', '__php_vars__')
  36. def __init__(self, name, d=None):
  37. if d is None:
  38. d = {}
  39. object.__setattr__(self, '__name__', name)
  40. object.__setattr__(self, '__php_vars__', d)
  41. def _asdict(self):
  42. """Returns a new dictionary from the data with Python identifiers."""
  43. return convert_member_dict(self.__php_vars__)
  44. def _lookup_php_var(self, name):
  45. for key, value in self.__php_vars__.items():
  46. if _translate_member_name(key) == name:
  47. return key, value
  48. def __getattr__(self, name):
  49. rv = self._lookup_php_var(name)
  50. if rv is not None:
  51. return rv[1]
  52. raise AttributeError(name)
  53. def __setattr__(self, name, value):
  54. rv = self._lookup_php_var(name)
  55. if rv is not None:
  56. name = rv[0]
  57. self.__php_vars__[name] = value
  58. def __repr__(self):
  59. return '<phpobject %r>' % (self.__name__,)
  60. def convert_member_dict(d):
  61. """Converts the names of a member dict to Python syntax. PHP class data
  62. member names are not the plain identifiers but might be prefixed by the
  63. class name if private or a star if protected. This function converts them
  64. into standard Python identifiers:
  65. >>> convert_member_dict({"username": "user1", " User password":
  66. ... "default", " * is_active": True})
  67. {'username': 'user1', 'password': 'default', 'is_active': True}
  68. """
  69. return dict((_translate_member_name(k), v) for k, v in d.items())
  70. def dumps(data, charset='utf-8', errors=default_errors, object_hook=None):
  71. """Return the PHP-serialized representation of the object as a string,
  72. instead of writing it to a file like `dump` does. On Python 3
  73. this returns bytes objects, on Python 3 this returns bytestrings.
  74. """
  75. def _serialize(obj, keypos):
  76. if keypos:
  77. if isinstance(obj, (int, long, float, bool)):
  78. return ('i:%i;' % obj).encode('latin1')
  79. if isinstance(obj, basestring):
  80. encoded_obj = obj
  81. if isinstance(obj, unicode):
  82. encoded_obj = obj.encode(charset, errors)
  83. s = BytesIO()
  84. s.write(b's:')
  85. s.write(str(len(encoded_obj)).encode('latin1'))
  86. s.write(b':"')
  87. s.write(encoded_obj)
  88. s.write(b'";')
  89. return s.getvalue()
  90. if obj is None:
  91. return b's:0:"";'
  92. raise TypeError('can\'t serialize %r as key' % type(obj))
  93. else:
  94. if obj is None:
  95. return b'N;'
  96. if isinstance(obj, bool):
  97. return ('b:%i;' % obj).encode('latin1')
  98. if isinstance(obj, (int, long)):
  99. return ('i:%s;' % obj).encode('latin1')
  100. if isinstance(obj, float):
  101. return ('d:%s;' % obj).encode('latin1')
  102. if isinstance(obj, basestring):
  103. encoded_obj = obj
  104. if isinstance(obj, unicode):
  105. encoded_obj = obj.encode(charset, errors)
  106. s = BytesIO()
  107. s.write(b's:')
  108. s.write(str(len(encoded_obj)).encode('latin1'))
  109. s.write(b':"')
  110. s.write(encoded_obj)
  111. s.write(b'";')
  112. return s.getvalue()
  113. if isinstance(obj, (list, tuple, dict)):
  114. out = []
  115. if isinstance(obj, dict):
  116. iterable = obj.items()
  117. else:
  118. iterable = enumerate(obj)
  119. for key, value in iterable:
  120. out.append(_serialize(key, True))
  121. out.append(_serialize(value, False))
  122. return b''.join([
  123. b'a:',
  124. str(len(obj)).encode('latin1'),
  125. b':{',
  126. b''.join(out),
  127. b'}'
  128. ])
  129. if isinstance(obj, phpobject):
  130. return b'O' + _serialize(obj.__name__, True)[1:-1] + \
  131. _serialize(obj.__php_vars__, False)[1:]
  132. if object_hook is not None:
  133. return _serialize(object_hook(obj), False)
  134. raise TypeError('can\'t serialize %r' % type(obj))
  135. return _serialize(data, False)
  136. def load(fp, charset='utf-8', errors=default_errors, decode_strings=False,
  137. object_hook=None, array_hook=None):
  138. """Read a string from the open file object `fp` and interpret it as a
  139. data stream of PHP-serialized objects, reconstructing and returning
  140. the original object hierarchy.
  141. `fp` must provide a `read()` method that takes an integer argument. Both
  142. method should return strings. Thus `fp` can be a file object opened for
  143. reading, a `StringIO` object (`BytesIO` on Python 3), or any other custom
  144. object that meets this interface.
  145. `load` will read exactly one object from the stream. See the docstring of
  146. the module for this chained behavior.
  147. If an object hook is given object-opcodes are supported in the serilization
  148. format. The function is called with the class name and a dict of the
  149. class data members. The data member names are in PHP format which is
  150. usually not what you want. The `simple_object_hook` function can convert
  151. them to Python identifier names.
  152. If an `array_hook` is given that function is called with a list of pairs
  153. for all array items. This can for example be set to
  154. `collections.OrderedDict` for an ordered, hashed dictionary.
  155. """
  156. if array_hook is None:
  157. array_hook = dict
  158. def _expect(e):
  159. v = fp.read(len(e))
  160. if v != e:
  161. raise ValueError('failed expectation, expected %r got %r' % (e, v))
  162. def _read_until(delim):
  163. buf = []
  164. while True:
  165. char = fp.read(1)
  166. if char == delim:
  167. break
  168. elif not char:
  169. raise ValueError('unexpected end of stream')
  170. buf.append(char)
  171. return b''.join(buf)
  172. def _load_array():
  173. items = int(_read_until(b':')) * 2
  174. _expect(b'{')
  175. result = []
  176. last_item = Ellipsis
  177. for idx in xrange(items):
  178. item = _unserialize()
  179. if last_item is Ellipsis:
  180. last_item = item
  181. else:
  182. result.append((last_item, item))
  183. last_item = Ellipsis
  184. _expect(b'}')
  185. return result
  186. def _unserialize():
  187. type_ = fp.read(1).lower()
  188. if type_ == b'n':
  189. _expect(b';')
  190. return None
  191. if type_ in b'idb':
  192. _expect(b':')
  193. data = _read_until(b';')
  194. if type_ == b'i':
  195. return int(data)
  196. if type_ == b'd':
  197. return float(data)
  198. return int(data) != 0
  199. if type_ == b's':
  200. _expect(b':')
  201. length = int(_read_until(b':'))
  202. _expect(b'"')
  203. data = fp.read(length)
  204. _expect(b'"')
  205. if decode_strings:
  206. data = data.decode(charset, errors)
  207. _expect(b';')
  208. return data
  209. if type_ == b'a':
  210. _expect(b':')
  211. return array_hook(_load_array())
  212. if type_ == b'o':
  213. if object_hook is None:
  214. raise ValueError('object in serialization dump but '
  215. 'object_hook not given.')
  216. _expect(b':')
  217. name_length = int(_read_until(b':'))
  218. _expect(b'"')
  219. name = fp.read(name_length)
  220. _expect(b'":')
  221. if decode_strings:
  222. name = name.decode(charset, errors)
  223. return object_hook(name, dict(_load_array()))
  224. raise ValueError('unexpected opcode')
  225. return _unserialize()
  226. def loads(data, charset='utf-8', errors=default_errors, decode_strings=False,
  227. object_hook=None, array_hook=None):
  228. """Read a PHP-serialized object hierarchy from a string. Characters in the
  229. string past the object's representation are ignored. On Python 3 the
  230. string must be a bytestring.
  231. """
  232. return load(BytesIO(data), charset, errors, decode_strings,
  233. object_hook, array_hook)
  234. def dump(data, fp, charset='utf-8', errors=default_errors, object_hook=None):
  235. """Write a PHP-serialized representation of obj to the open file object
  236. `fp`. Unicode strings are encoded to `charset` with the error handling
  237. of `errors`.
  238. `fp` must have a `write()` method that accepts a single string argument.
  239. It can thus be a file object opened for writing, a `StringIO` object
  240. (or a `BytesIO` object on Python 3), or any other custom object that meets
  241. this interface.
  242. The `object_hook` is called for each unknown object and has to either
  243. raise an exception if it's unable to convert the object or return a
  244. value that is serializable (such as a `phpobject`).
  245. """
  246. fp.write(dumps(data, charset, errors, object_hook))
  247. def dict_to_list(d):
  248. """Converts an ordered dict into a list."""
  249. # make sure it's a dict, that way dict_to_list can be used as an
  250. # array_hook.
  251. d = dict(d)
  252. try:
  253. return [d[x] for x in xrange(len(d))]
  254. except KeyError:
  255. raise ValueError('dict is not a sequence')
  256. def dict_to_tuple(d):
  257. """Converts an ordered dict into a tuple."""
  258. return tuple(dict_to_list(d))
  259. serialize = dumps
  260. unserialize = loads