PageRenderTime 49ms CodeModel.GetById 19ms RepoModel.GetById 0ms app.codeStats 0ms

/pypy/module/marshal/interp_marshal.py

https://bitbucket.org/pypy/pypy/
Python | 509 lines | 501 code | 8 blank | 0 comment | 7 complexity | 9992c50ebd6dd294a001ae899948ac2a MD5 | raw file
Possible License(s): AGPL-3.0, BSD-3-Clause, Apache-2.0
  1. from pypy.interpreter.error import OperationError, oefmt
  2. from pypy.interpreter.gateway import WrappedDefault, unwrap_spec
  3. from rpython.rlib.rarithmetic import intmask
  4. from rpython.rlib import rstackovf
  5. from pypy.module._file.interp_file import W_File
  6. from pypy.objspace.std.marshal_impl import marshal, get_unmarshallers
  7. Py_MARSHAL_VERSION = 2
  8. @unwrap_spec(w_version=WrappedDefault(Py_MARSHAL_VERSION))
  9. def dump(space, w_data, w_f, w_version):
  10. """Write the 'data' object into the open file 'f'."""
  11. # special case real files for performance
  12. if isinstance(w_f, W_File):
  13. writer = DirectStreamWriter(space, w_f)
  14. else:
  15. writer = FileWriter(space, w_f)
  16. try:
  17. # note: bound methods are currently not supported,
  18. # so we have to pass the instance in, instead.
  19. ##m = Marshaller(space, writer.write, space.int_w(w_version))
  20. m = Marshaller(space, writer, space.int_w(w_version))
  21. m.dump_w_obj(w_data)
  22. finally:
  23. writer.finished()
  24. @unwrap_spec(w_version=WrappedDefault(Py_MARSHAL_VERSION))
  25. def dumps(space, w_data, w_version):
  26. """Return the string that would have been written to a file
  27. by dump(data, file)."""
  28. m = StringMarshaller(space, space.int_w(w_version))
  29. m.dump_w_obj(w_data)
  30. return space.wrap(m.get_value())
  31. def load(space, w_f):
  32. """Read one value from the file 'f' and return it."""
  33. # special case real files for performance
  34. if isinstance(w_f, W_File):
  35. reader = DirectStreamReader(space, w_f)
  36. else:
  37. reader = FileReader(space, w_f)
  38. try:
  39. u = Unmarshaller(space, reader)
  40. return u.load_w_obj()
  41. finally:
  42. reader.finished()
  43. def loads(space, w_str):
  44. """Convert a string back to a value. Extra characters in the string are
  45. ignored."""
  46. u = StringUnmarshaller(space, w_str)
  47. obj = u.load_w_obj()
  48. return obj
  49. class AbstractReaderWriter(object):
  50. def __init__(self, space):
  51. self.space = space
  52. def raise_eof(self):
  53. space = self.space
  54. raise oefmt(space.w_EOFError, "EOF read where object expected")
  55. def finished(self):
  56. pass
  57. def read(self, n):
  58. raise NotImplementedError("Purely abstract method")
  59. def write(self, data):
  60. raise NotImplementedError("Purely abstract method")
  61. class FileWriter(AbstractReaderWriter):
  62. def __init__(self, space, w_f):
  63. AbstractReaderWriter.__init__(self, space)
  64. try:
  65. self.func = space.getattr(w_f, space.wrap('write'))
  66. # XXX how to check if it is callable?
  67. except OperationError as e:
  68. if not e.match(space, space.w_AttributeError):
  69. raise
  70. raise oefmt(space.w_TypeError,
  71. "marshal.dump() 2nd arg must be file-like object")
  72. def write(self, data):
  73. space = self.space
  74. space.call_function(self.func, space.wrap(data))
  75. class FileReader(AbstractReaderWriter):
  76. def __init__(self, space, w_f):
  77. AbstractReaderWriter.__init__(self, space)
  78. try:
  79. self.func = space.getattr(w_f, space.wrap('read'))
  80. # XXX how to check if it is callable?
  81. except OperationError as e:
  82. if not e.match(space, space.w_AttributeError):
  83. raise
  84. raise oefmt(space.w_TypeError,
  85. "marshal.load() arg must be file-like object")
  86. def read(self, n):
  87. space = self.space
  88. w_ret = space.call_function(self.func, space.wrap(n))
  89. ret = space.str_w(w_ret)
  90. if len(ret) != n:
  91. self.raise_eof()
  92. return ret
  93. class StreamReaderWriter(AbstractReaderWriter):
  94. def __init__(self, space, file):
  95. AbstractReaderWriter.__init__(self, space)
  96. self.file = file
  97. file.lock()
  98. def finished(self):
  99. self.file.unlock()
  100. class DirectStreamWriter(StreamReaderWriter):
  101. def write(self, data):
  102. self.file.do_direct_write(data)
  103. class DirectStreamReader(StreamReaderWriter):
  104. def read(self, n):
  105. data = self.file.direct_read(n)
  106. if len(data) < n:
  107. self.raise_eof()
  108. return data
  109. class _Base(object):
  110. def raise_exc(self, msg):
  111. space = self.space
  112. raise OperationError(space.w_ValueError, space.wrap(msg))
  113. class Marshaller(_Base):
  114. """
  115. atomic types including typecode:
  116. atom(tc) puts single typecode
  117. atom_int(tc, int) puts code and int
  118. atom_int64(tc, int64) puts code and int64
  119. atom_str(tc, str) puts code, len and string
  120. building blocks for compound types:
  121. start(typecode) sets the type character
  122. put(s) puts a string with fixed length
  123. put_short(int) puts a short integer
  124. put_int(int) puts an integer
  125. put_pascal(s) puts a short string
  126. put_w_obj(w_obj) puts a wrapped object
  127. put_tuple_w(TYPE, tuple_w) puts tuple_w, an unwrapped list of wrapped objects
  128. """
  129. def __init__(self, space, writer, version):
  130. self.space = space
  131. ## self.put = putfunc
  132. self.writer = writer
  133. self.version = version
  134. self.stringtable = {}
  135. ## currently we cannot use a put that is a bound method
  136. ## from outside. Same holds for get.
  137. def put(self, s):
  138. self.writer.write(s)
  139. def put1(self, c):
  140. self.writer.write(c)
  141. def atom(self, typecode):
  142. #assert type(typecode) is str and len(typecode) == 1
  143. # type(char) not supported
  144. self.put1(typecode)
  145. def atom_int(self, typecode, x):
  146. a = chr(x & 0xff)
  147. x >>= 8
  148. b = chr(x & 0xff)
  149. x >>= 8
  150. c = chr(x & 0xff)
  151. x >>= 8
  152. d = chr(x & 0xff)
  153. self.put(typecode + a + b + c + d)
  154. def atom_int64(self, typecode, x):
  155. self.atom_int(typecode, x)
  156. self.put_int(x>>32)
  157. def atom_str(self, typecode, x):
  158. self.atom_int(typecode, len(x))
  159. self.put(x)
  160. def start(self, typecode):
  161. # type(char) not supported
  162. self.put(typecode)
  163. def put_short(self, x):
  164. a = chr(x & 0xff)
  165. x >>= 8
  166. b = chr(x & 0xff)
  167. self.put(a + b)
  168. def put_int(self, x):
  169. a = chr(x & 0xff)
  170. x >>= 8
  171. b = chr(x & 0xff)
  172. x >>= 8
  173. c = chr(x & 0xff)
  174. x >>= 8
  175. d = chr(x & 0xff)
  176. self.put(a + b + c + d)
  177. def put_pascal(self, x):
  178. lng = len(x)
  179. if lng > 255:
  180. self.raise_exc('not a pascal string')
  181. self.put(chr(lng))
  182. self.put(x)
  183. def put_w_obj(self, w_obj):
  184. marshal(self.space, w_obj, self)
  185. def dump_w_obj(self, w_obj):
  186. space = self.space
  187. if space.type(w_obj).is_heaptype():
  188. try:
  189. buf = space.readbuf_w(w_obj)
  190. except OperationError as e:
  191. if not e.match(space, space.w_TypeError):
  192. raise
  193. self.raise_exc("unmarshallable object")
  194. else:
  195. w_obj = space.newbuffer(buf)
  196. try:
  197. self.put_w_obj(w_obj)
  198. except rstackovf.StackOverflow:
  199. rstackovf.check_stack_overflow()
  200. self._overflow()
  201. def put_tuple_w(self, typecode, lst_w):
  202. self.start(typecode)
  203. lng = len(lst_w)
  204. self.put_int(lng)
  205. idx = 0
  206. while idx < lng:
  207. w_obj = lst_w[idx]
  208. marshal(self.space, w_obj, self)
  209. idx += 1
  210. def _overflow(self):
  211. self.raise_exc('object too deeply nested to marshal')
  212. class StringMarshaller(Marshaller):
  213. def __init__(self, space, version):
  214. Marshaller.__init__(self, space, None, version)
  215. self.buflis = [chr(0)] * 128
  216. self.bufpos = 0
  217. def put(self, s):
  218. pos = self.bufpos
  219. lng = len(s)
  220. newpos = pos + lng
  221. while len(self.buflis) < newpos:
  222. self.buflis *= 2
  223. idx = 0
  224. while idx < lng:
  225. self.buflis[pos + idx] = s[idx]
  226. idx += 1
  227. self.bufpos = newpos
  228. def put1(self, c):
  229. pos = self.bufpos
  230. newpos = pos + 1
  231. if len(self.buflis) < newpos:
  232. self.buflis *= 2
  233. self.buflis[pos] = c
  234. self.bufpos = newpos
  235. def atom_int(self, typecode, x):
  236. a = chr(x & 0xff)
  237. x >>= 8
  238. b = chr(x & 0xff)
  239. x >>= 8
  240. c = chr(x & 0xff)
  241. x >>= 8
  242. d = chr(x & 0xff)
  243. pos = self.bufpos
  244. newpos = pos + 5
  245. if len(self.buflis) < newpos:
  246. self.buflis *= 2
  247. self.buflis[pos] = typecode
  248. self.buflis[pos+1] = a
  249. self.buflis[pos+2] = b
  250. self.buflis[pos+3] = c
  251. self.buflis[pos+4] = d
  252. self.bufpos = newpos
  253. def put_short(self, x):
  254. a = chr(x & 0xff)
  255. x >>= 8
  256. b = chr(x & 0xff)
  257. pos = self.bufpos
  258. newpos = pos + 2
  259. if len(self.buflis) < newpos:
  260. self.buflis *= 2
  261. self.buflis[pos] = a
  262. self.buflis[pos+1] = b
  263. self.bufpos = newpos
  264. def put_int(self, x):
  265. a = chr(x & 0xff)
  266. x >>= 8
  267. b = chr(x & 0xff)
  268. x >>= 8
  269. c = chr(x & 0xff)
  270. x >>= 8
  271. d = chr(x & 0xff)
  272. pos = self.bufpos
  273. newpos = pos + 4
  274. if len(self.buflis) < newpos:
  275. self.buflis *= 2
  276. self.buflis[pos] = a
  277. self.buflis[pos+1] = b
  278. self.buflis[pos+2] = c
  279. self.buflis[pos+3] = d
  280. self.bufpos = newpos
  281. def get_value(self):
  282. return ''.join(self.buflis[:self.bufpos])
  283. def invalid_typecode(space, u, tc):
  284. u.raise_exc("bad marshal data (unknown type code)")
  285. class Unmarshaller(_Base):
  286. _dispatch = [invalid_typecode] * 256
  287. for tc, func in get_unmarshallers():
  288. _dispatch[ord(tc)] = func
  289. def __init__(self, space, reader):
  290. self.space = space
  291. self.reader = reader
  292. self.stringtable_w = []
  293. def get(self, n):
  294. assert n >= 0
  295. return self.reader.read(n)
  296. def get1(self):
  297. # the [0] is used to convince the annotator to return a char
  298. return self.get(1)[0]
  299. def atom_str(self, typecode):
  300. self.start(typecode)
  301. lng = self.get_lng()
  302. return self.get(lng)
  303. def atom_lng(self, typecode):
  304. self.start(typecode)
  305. return self.get_lng()
  306. def start(self, typecode):
  307. tc = self.get1()
  308. if tc != typecode:
  309. self.raise_exc('invalid marshal data')
  310. def get_short(self):
  311. s = self.get(2)
  312. a = ord(s[0])
  313. b = ord(s[1])
  314. x = a | (b << 8)
  315. if x & 0x8000:
  316. x = x - 0x10000
  317. return x
  318. def get_int(self):
  319. s = self.get(4)
  320. a = ord(s[0])
  321. b = ord(s[1])
  322. c = ord(s[2])
  323. d = ord(s[3])
  324. if d & 0x80:
  325. d -= 0x100
  326. x = a | (b<<8) | (c<<16) | (d<<24)
  327. return intmask(x)
  328. def get_lng(self):
  329. s = self.get(4)
  330. a = ord(s[0])
  331. b = ord(s[1])
  332. c = ord(s[2])
  333. d = ord(s[3])
  334. x = a | (b<<8) | (c<<16) | (d<<24)
  335. if x >= 0:
  336. return x
  337. else:
  338. self.raise_exc('bad marshal data')
  339. def get_pascal(self):
  340. lng = ord(self.get1())
  341. return self.get(lng)
  342. def get_str(self):
  343. lng = self.get_lng()
  344. return self.get(lng)
  345. def get_w_obj(self, allow_null=False):
  346. space = self.space
  347. tc = self.get1()
  348. w_ret = self._dispatch[ord(tc)](space, self, tc)
  349. if w_ret is None and not allow_null:
  350. raise oefmt(space.w_TypeError, "NULL object in marshal data")
  351. return w_ret
  352. def load_w_obj(self):
  353. try:
  354. return self.get_w_obj()
  355. except rstackovf.StackOverflow:
  356. rstackovf.check_stack_overflow()
  357. self._overflow()
  358. # inlined version to save a recursion level
  359. def get_tuple_w(self):
  360. lng = self.get_lng()
  361. res_w = [None] * lng
  362. idx = 0
  363. space = self.space
  364. w_ret = space.w_None # something not
  365. while idx < lng:
  366. tc = self.get1()
  367. w_ret = self._dispatch[ord(tc)](space, self, tc)
  368. if w_ret is None:
  369. break
  370. res_w[idx] = w_ret
  371. idx += 1
  372. if w_ret is None:
  373. raise oefmt(space.w_TypeError, "NULL object in marshal data")
  374. return res_w
  375. def get_list_w(self):
  376. return self.get_tuple_w()[:]
  377. def _overflow(self):
  378. self.raise_exc('object too deeply nested to unmarshal')
  379. class StringUnmarshaller(Unmarshaller):
  380. # Unmarshaller with inlined buffer string
  381. def __init__(self, space, w_str):
  382. Unmarshaller.__init__(self, space, None)
  383. self.bufstr = space.getarg_w('s#', w_str)
  384. self.bufpos = 0
  385. self.limit = len(self.bufstr)
  386. def raise_eof(self):
  387. space = self.space
  388. raise oefmt(space.w_EOFError, "EOF read where object expected")
  389. def get(self, n):
  390. pos = self.bufpos
  391. newpos = pos + n
  392. if newpos > self.limit:
  393. self.raise_eof()
  394. self.bufpos = newpos
  395. return self.bufstr[pos : newpos]
  396. def get1(self):
  397. pos = self.bufpos
  398. if pos >= self.limit:
  399. self.raise_eof()
  400. self.bufpos = pos + 1
  401. return self.bufstr[pos]
  402. def get_int(self):
  403. pos = self.bufpos
  404. newpos = pos + 4
  405. if newpos > self.limit:
  406. self.raise_eof()
  407. self.bufpos = newpos
  408. a = ord(self.bufstr[pos])
  409. b = ord(self.bufstr[pos+1])
  410. c = ord(self.bufstr[pos+2])
  411. d = ord(self.bufstr[pos+3])
  412. if d & 0x80:
  413. d -= 0x100
  414. x = a | (b<<8) | (c<<16) | (d<<24)
  415. return intmask(x)
  416. def get_lng(self):
  417. pos = self.bufpos
  418. newpos = pos + 4
  419. if newpos > self.limit:
  420. self.raise_eof()
  421. self.bufpos = newpos
  422. a = ord(self.bufstr[pos])
  423. b = ord(self.bufstr[pos+1])
  424. c = ord(self.bufstr[pos+2])
  425. d = ord(self.bufstr[pos+3])
  426. x = a | (b<<8) | (c<<16) | (d<<24)
  427. if x >= 0:
  428. return x
  429. else:
  430. self.raise_exc('bad marshal data')