PageRenderTime 62ms CodeModel.GetById 29ms RepoModel.GetById 0ms app.codeStats 1ms

/pypy/module/marshal/interp_marshal.py

https://bitbucket.org/rokujyouhitoma/pypy/
Python | 535 lines | 532 code | 3 blank | 0 comment | 1 complexity | d7d878834031cb66a7e3f0a58e584d2d MD5 | raw file
  1. from pypy.interpreter.error import OperationError
  2. from pypy.rlib.rarithmetic import intmask
  3. from pypy.rlib import rstackovf
  4. from pypy.module._file.interp_file import W_File
  5. Py_MARSHAL_VERSION = 2
  6. def dump(space, w_data, w_f, w_version=Py_MARSHAL_VERSION):
  7. """Write the 'data' object into the open file 'f'."""
  8. # special case real files for performance
  9. file = space.interpclass_w(w_f)
  10. if isinstance(file, W_File):
  11. writer = DirectStreamWriter(space, file)
  12. else:
  13. writer = FileWriter(space, w_f)
  14. try:
  15. # note: bound methods are currently not supported,
  16. # so we have to pass the instance in, instead.
  17. ##m = Marshaller(space, writer.write, space.int_w(w_version))
  18. m = Marshaller(space, writer, space.int_w(w_version))
  19. m.dump_w_obj(w_data)
  20. finally:
  21. writer.finished()
  22. def dumps(space, w_data, w_version=Py_MARSHAL_VERSION):
  23. """Return the string that would have been written to a file
  24. by dump(data, file)."""
  25. m = StringMarshaller(space, space.int_w(w_version))
  26. m.dump_w_obj(w_data)
  27. return space.wrap(m.get_value())
  28. def load(space, w_f):
  29. """Read one value from the file 'f' and return it."""
  30. # special case real files for performance
  31. file = space.interpclass_w(w_f)
  32. if isinstance(file, W_File):
  33. reader = DirectStreamReader(space, file)
  34. else:
  35. reader = FileReader(space, w_f)
  36. try:
  37. u = Unmarshaller(space, reader)
  38. return u.load_w_obj()
  39. finally:
  40. reader.finished()
  41. def loads(space, w_str):
  42. """Convert a string back to a value. Extra characters in the string are
  43. ignored."""
  44. space.timer.start("marshal loads")
  45. u = StringUnmarshaller(space, w_str)
  46. obj = u.load_w_obj()
  47. space.timer.stop("marshal loads")
  48. return obj
  49. class AbstractReaderWriter(object):
  50. def __init__(self, space):
  51. self.space = space
  52. def raise_eof(self):
  53. space = self.space
  54. raise OperationError(space.w_EOFError, space.wrap(
  55. 'EOF read where object expected'))
  56. def finished(self):
  57. pass
  58. def read(self, n):
  59. raise NotImplementedError("Purely abstract method")
  60. def write(self, data):
  61. raise NotImplementedError("Purely abstract method")
  62. class FileWriter(AbstractReaderWriter):
  63. def __init__(self, space, w_f):
  64. AbstractReaderWriter.__init__(self, space)
  65. try:
  66. self.func = space.getattr(w_f, space.wrap('write'))
  67. # XXX how to check if it is callable?
  68. except OperationError, e:
  69. if not e.match(space, space.w_AttributeError):
  70. raise
  71. raise OperationError(space.w_TypeError, space.wrap(
  72. 'marshal.dump() 2nd arg must be file-like object'))
  73. def write(self, data):
  74. space = self.space
  75. space.call_function(self.func, space.wrap(data))
  76. class FileReader(AbstractReaderWriter):
  77. def __init__(self, space, w_f):
  78. AbstractReaderWriter.__init__(self, space)
  79. try:
  80. self.func = space.getattr(w_f, space.wrap('read'))
  81. # XXX how to check if it is callable?
  82. except OperationError, e:
  83. if not e.match(space, space.w_AttributeError):
  84. raise
  85. raise OperationError(space.w_TypeError, space.wrap(
  86. 'marshal.load() arg must be file-like object'))
  87. def read(self, n):
  88. space = self.space
  89. w_ret = space.call_function(self.func, space.wrap(n))
  90. ret = space.str_w(w_ret)
  91. if len(ret) != n:
  92. self.raise_eof()
  93. return ret
  94. class StreamReaderWriter(AbstractReaderWriter):
  95. def __init__(self, space, file):
  96. AbstractReaderWriter.__init__(self, space)
  97. self.file = file
  98. file.lock()
  99. def finished(self):
  100. self.file.unlock()
  101. class DirectStreamWriter(StreamReaderWriter):
  102. def write(self, data):
  103. self.file.do_direct_write(data)
  104. class DirectStreamReader(StreamReaderWriter):
  105. def read(self, n):
  106. data = self.file.direct_read(n)
  107. if len(data) < n:
  108. self.raise_eof()
  109. return data
  110. class _Base(object):
  111. def raise_exc(self, msg):
  112. space = self.space
  113. raise OperationError(space.w_ValueError, space.wrap(msg))
  114. class Marshaller(_Base):
  115. # _annspecialcase_ = "specialize:ctr_location" # polymorphic
  116. # does not work with subclassing
  117. def __init__(self, space, writer, version):
  118. self.space = space
  119. ## self.put = putfunc
  120. self.writer = writer
  121. self.version = version
  122. self.stringtable = {}
  123. ## currently we cannot use a put that is a bound method
  124. ## from outside. Same holds for get.
  125. def put(self, s):
  126. self.writer.write(s)
  127. def put1(self, c):
  128. self.writer.write(c)
  129. def atom(self, typecode):
  130. #assert type(typecode) is str and len(typecode) == 1
  131. # type(char) not supported
  132. self.put1(typecode)
  133. def atom_int(self, typecode, x):
  134. a = chr(x & 0xff)
  135. x >>= 8
  136. b = chr(x & 0xff)
  137. x >>= 8
  138. c = chr(x & 0xff)
  139. x >>= 8
  140. d = chr(x & 0xff)
  141. self.put(typecode + a + b + c + d)
  142. def atom_int64(self, typecode, x):
  143. self.atom_int(typecode, x)
  144. self.put_int(x>>32)
  145. def atom_str(self, typecode, x):
  146. self.atom_int(typecode, len(x))
  147. self.put(x)
  148. def atom_strlist(self, typecode, tc2, x):
  149. self.atom_int(typecode, len(x))
  150. atom_str = self.atom_str
  151. for item in x:
  152. # type(str) seems to be forbidden
  153. #if type(item) is not str:
  154. # self.raise_exc('object with wrong type in strlist')
  155. atom_str(tc2, item)
  156. def start(self, typecode):
  157. # type(char) not supported
  158. self.put(typecode)
  159. def put_short(self, x):
  160. a = chr(x & 0xff)
  161. x >>= 8
  162. b = chr(x & 0xff)
  163. self.put(a + b)
  164. def put_int(self, x):
  165. a = chr(x & 0xff)
  166. x >>= 8
  167. b = chr(x & 0xff)
  168. x >>= 8
  169. c = chr(x & 0xff)
  170. x >>= 8
  171. d = chr(x & 0xff)
  172. self.put(a + b + c + d)
  173. def put_pascal(self, x):
  174. lng = len(x)
  175. if lng > 255:
  176. self.raise_exc('not a pascal string')
  177. self.put(chr(lng))
  178. self.put(x)
  179. def put_w_obj(self, w_obj):
  180. self.space.marshal_w(w_obj, self)
  181. def dump_w_obj(self, w_obj):
  182. space = self.space
  183. if (space.type(w_obj).is_heaptype() and
  184. space.lookup(w_obj, "__buffer__") is None):
  185. w_err = space.wrap("only builtins can be marshaled")
  186. raise OperationError(space.w_ValueError, w_err)
  187. try:
  188. self.put_w_obj(w_obj)
  189. except rstackovf.StackOverflow:
  190. rstackovf.check_stack_overflow()
  191. self._overflow()
  192. def put_tuple_w(self, typecode, lst_w):
  193. self.start(typecode)
  194. lng = len(lst_w)
  195. self.put_int(lng)
  196. idx = 0
  197. space = self.space
  198. while idx < lng:
  199. w_obj = lst_w[idx]
  200. self.space.marshal_w(w_obj, self)
  201. idx += 1
  202. def _overflow(self):
  203. self.raise_exc('object too deeply nested to marshal')
  204. class StringMarshaller(Marshaller):
  205. def __init__(self, space, version):
  206. Marshaller.__init__(self, space, None, version)
  207. self.buflis = [chr(0)] * 128
  208. self.bufpos = 0
  209. def put(self, s):
  210. pos = self.bufpos
  211. lng = len(s)
  212. newpos = pos + lng
  213. while len(self.buflis) < newpos:
  214. self.buflis *= 2
  215. idx = 0
  216. while idx < lng:
  217. self.buflis[pos + idx] = s[idx]
  218. idx += 1
  219. self.bufpos = newpos
  220. def put1(self, c):
  221. pos = self.bufpos
  222. newpos = pos + 1
  223. if len(self.buflis) < newpos:
  224. self.buflis *= 2
  225. self.buflis[pos] = c
  226. self.bufpos = newpos
  227. def atom_int(self, typecode, x):
  228. a = chr(x & 0xff)
  229. x >>= 8
  230. b = chr(x & 0xff)
  231. x >>= 8
  232. c = chr(x & 0xff)
  233. x >>= 8
  234. d = chr(x & 0xff)
  235. pos = self.bufpos
  236. newpos = pos + 5
  237. if len(self.buflis) < newpos:
  238. self.buflis *= 2
  239. self.buflis[pos] = typecode
  240. self.buflis[pos+1] = a
  241. self.buflis[pos+2] = b
  242. self.buflis[pos+3] = c
  243. self.buflis[pos+4] = d
  244. self.bufpos = newpos
  245. def put_short(self, x):
  246. a = chr(x & 0xff)
  247. x >>= 8
  248. b = chr(x & 0xff)
  249. pos = self.bufpos
  250. newpos = pos + 2
  251. if len(self.buflis) < newpos:
  252. self.buflis *= 2
  253. self.buflis[pos] = a
  254. self.buflis[pos+1] = b
  255. self.bufpos = newpos
  256. def put_int(self, x):
  257. a = chr(x & 0xff)
  258. x >>= 8
  259. b = chr(x & 0xff)
  260. x >>= 8
  261. c = chr(x & 0xff)
  262. x >>= 8
  263. d = chr(x & 0xff)
  264. pos = self.bufpos
  265. newpos = pos + 4
  266. if len(self.buflis) < newpos:
  267. self.buflis *= 2
  268. self.buflis[pos] = a
  269. self.buflis[pos+1] = b
  270. self.buflis[pos+2] = c
  271. self.buflis[pos+3] = d
  272. self.bufpos = newpos
  273. def get_value(self):
  274. return ''.join(self.buflis[:self.bufpos])
  275. def invalid_typecode(space, u, tc):
  276. # %r not supported in rpython
  277. #u.raise_exc('invalid typecode in unmarshal: %r' % tc)
  278. c = ord(tc)
  279. if c < 16:
  280. s = '\\x0%x' % c
  281. elif c < 32 or c > 126:
  282. s = '\\x%x' % c
  283. elif tc == '\\':
  284. s = r'\\'
  285. else:
  286. s = tc
  287. q = "'"
  288. if s[0] == "'":
  289. q = '"'
  290. u.raise_exc('invalid typecode in unmarshal: ' + q + s + q)
  291. def register(codes, func):
  292. """NOT_RPYTHON"""
  293. for code in codes:
  294. Unmarshaller._dispatch[ord(code)] = func
  295. class Unmarshaller(_Base):
  296. _dispatch = [invalid_typecode] * 256
  297. def __init__(self, space, reader):
  298. self.space = space
  299. self.reader = reader
  300. self.stringtable_w = []
  301. def get(self, n):
  302. assert n >= 0
  303. return self.reader.read(n)
  304. def get1(self):
  305. # the [0] is used to convince the annotator to return a char
  306. return self.get(1)[0]
  307. def atom_str(self, typecode):
  308. self.start(typecode)
  309. lng = self.get_lng()
  310. return self.get(lng)
  311. def atom_lng(self, typecode):
  312. self.start(typecode)
  313. return self.get_lng()
  314. def atom_strlist(self, typecode, tc2):
  315. self.start(typecode)
  316. lng = self.get_lng()
  317. res = [None] * lng
  318. idx = 0
  319. while idx < lng:
  320. res[idx] = self.atom_str(tc2)
  321. idx += 1
  322. return res
  323. def start(self, typecode):
  324. tc = self.get1()
  325. if tc != typecode:
  326. self.raise_exc('invalid marshal data')
  327. def get_short(self):
  328. s = self.get(2)
  329. a = ord(s[0])
  330. b = ord(s[1])
  331. x = a | (b << 8)
  332. if x & 0x8000:
  333. x = x - 0x10000
  334. return x
  335. def get_int(self):
  336. s = self.get(4)
  337. a = ord(s[0])
  338. b = ord(s[1])
  339. c = ord(s[2])
  340. d = ord(s[3])
  341. if d & 0x80:
  342. d -= 0x100
  343. x = a | (b<<8) | (c<<16) | (d<<24)
  344. return intmask(x)
  345. def get_lng(self):
  346. s = self.get(4)
  347. a = ord(s[0])
  348. b = ord(s[1])
  349. c = ord(s[2])
  350. d = ord(s[3])
  351. x = a | (b<<8) | (c<<16) | (d<<24)
  352. if x >= 0:
  353. return x
  354. else:
  355. self.raise_exc('bad marshal data')
  356. def get_pascal(self):
  357. lng = ord(self.get1())
  358. return self.get(lng)
  359. def get_str(self):
  360. lng = self.get_lng()
  361. return self.get(lng)
  362. def get_w_obj(self, allow_null=False):
  363. space = self.space
  364. w_ret = space.w_None # something not None
  365. tc = self.get1()
  366. w_ret = self._dispatch[ord(tc)](space, self, tc)
  367. if w_ret is None and not allow_null:
  368. raise OperationError(space.w_TypeError, space.wrap(
  369. 'NULL object in marshal data'))
  370. return w_ret
  371. def load_w_obj(self):
  372. try:
  373. return self.get_w_obj()
  374. except rstackovf.StackOverflow:
  375. rstackovf.check_stack_overflow()
  376. self._overflow()
  377. # inlined version to save a recursion level
  378. def get_tuple_w(self):
  379. lng = self.get_lng()
  380. res_w = [None] * lng
  381. idx = 0
  382. space = self.space
  383. w_ret = space.w_None # something not
  384. while idx < lng:
  385. tc = self.get1()
  386. w_ret = self._dispatch[ord(tc)](space, self, tc)
  387. if w_ret is None:
  388. break
  389. res_w[idx] = w_ret
  390. idx += 1
  391. if w_ret is None:
  392. raise OperationError(space.w_TypeError, space.wrap(
  393. 'NULL object in marshal data'))
  394. return res_w
  395. def get_list_w(self):
  396. return self.get_tuple_w()[:]
  397. def _overflow(self):
  398. self.raise_exc('object too deeply nested to unmarshal')
  399. class StringUnmarshaller(Unmarshaller):
  400. # Unmarshaller with inlined buffer string
  401. def __init__(self, space, w_str):
  402. Unmarshaller.__init__(self, space, None)
  403. try:
  404. self.bufstr = space.bufferstr_w(w_str)
  405. except OperationError, e:
  406. if not e.match(space, space.w_TypeError):
  407. raise
  408. raise OperationError(space.w_TypeError, space.wrap(
  409. 'marshal.loads() arg must be string or buffer'))
  410. self.bufpos = 0
  411. self.limit = len(self.bufstr)
  412. def raise_eof(self):
  413. space = self.space
  414. raise OperationError(space.w_EOFError, space.wrap(
  415. 'EOF read where object expected'))
  416. def get(self, n):
  417. pos = self.bufpos
  418. newpos = pos + n
  419. if newpos > self.limit:
  420. self.raise_eof()
  421. self.bufpos = newpos
  422. return self.bufstr[pos : newpos]
  423. def get1(self):
  424. pos = self.bufpos
  425. if pos >= self.limit:
  426. self.raise_eof()
  427. self.bufpos = pos + 1
  428. return self.bufstr[pos]
  429. def get_int(self):
  430. pos = self.bufpos
  431. newpos = pos + 4
  432. if newpos > self.limit:
  433. self.raise_eof()
  434. self.bufpos = newpos
  435. a = ord(self.bufstr[pos])
  436. b = ord(self.bufstr[pos+1])
  437. c = ord(self.bufstr[pos+2])
  438. d = ord(self.bufstr[pos+3])
  439. if d & 0x80:
  440. d -= 0x100
  441. x = a | (b<<8) | (c<<16) | (d<<24)
  442. return intmask(x)
  443. def get_lng(self):
  444. pos = self.bufpos
  445. newpos = pos + 4
  446. if newpos > self.limit:
  447. self.raise_eof()
  448. self.bufpos = newpos
  449. a = ord(self.bufstr[pos])
  450. b = ord(self.bufstr[pos+1])
  451. c = ord(self.bufstr[pos+2])
  452. d = ord(self.bufstr[pos+3])
  453. x = a | (b<<8) | (c<<16) | (d<<24)
  454. if x >= 0:
  455. return x
  456. else:
  457. self.raise_exc('bad marshal data')