PageRenderTime 45ms CodeModel.GetById 9ms RepoModel.GetById 0ms app.codeStats 1ms

/pypy/module/bz2/interp_bz2.py

https://bitbucket.org/pypy/pypy/
Python | 793 lines | 756 code | 24 blank | 13 comment | 37 complexity | 2dff392d5315c114da99032abfcb7455 MD5 | raw file
Possible License(s): AGPL-3.0, BSD-3-Clause, Apache-2.0
  1. from __future__ import with_statement
  2. from rpython.rtyper.tool import rffi_platform as platform
  3. from rpython.rtyper.lltypesystem import rffi
  4. from rpython.rtyper.lltypesystem import lltype
  5. from pypy.interpreter.error import OperationError, oefmt
  6. from pypy.interpreter.baseobjspace import W_Root
  7. from pypy.interpreter.typedef import TypeDef, interp_attrproperty
  8. from pypy.interpreter.gateway import interp2app, unwrap_spec
  9. from rpython.rlib.streamio import Stream
  10. from rpython.translator.tool.cbuild import ExternalCompilationInfo
  11. from rpython.translator.platform import platform as compiler
  12. from rpython.rlib.rarithmetic import intmask, r_longlong
  13. import sys
  14. if compiler.name == "msvc":
  15. libname = 'libbz2'
  16. else:
  17. libname = 'bz2'
  18. eci = ExternalCompilationInfo(
  19. includes = ['stdio.h', 'sys/types.h', 'bzlib.h'],
  20. libraries = [libname],
  21. )
  22. eci = platform.configure_external_library(
  23. 'bz2', eci,
  24. [dict(prefix='bzip2-')])
  25. if not eci:
  26. raise ImportError("Could not find bzip2 library")
  27. class CConfig:
  28. _compilation_info_ = eci
  29. calling_conv = 'c'
  30. CHECK_LIBRARY = platform.Has('dump("x", (long)&BZ2_bzCompress)')
  31. off_t = platform.SimpleType("off_t", rffi.LONGLONG)
  32. size_t = platform.SimpleType("size_t", rffi.ULONG)
  33. BUFSIZ = platform.ConstantInteger("BUFSIZ")
  34. _alloc_type = lltype.FuncType([rffi.VOIDP, rffi.INT, rffi.INT], rffi.VOIDP)
  35. _free_type = lltype.FuncType([rffi.VOIDP, rffi.VOIDP], lltype.Void)
  36. SEEK_SET = platform.ConstantInteger("SEEK_SET")
  37. bz_stream = platform.Struct('bz_stream',
  38. [('next_in', rffi.CCHARP),
  39. ('avail_in', rffi.UINT),
  40. ('total_in_lo32', rffi.UINT),
  41. ('total_in_hi32', rffi.UINT),
  42. ('next_out', rffi.CCHARP),
  43. ('avail_out', rffi.UINT),
  44. ('total_out_lo32', rffi.UINT),
  45. ('total_out_hi32', rffi.UINT),
  46. ('state', rffi.VOIDP),
  47. ('bzalloc', lltype.Ptr(_alloc_type)),
  48. ('bzfree', lltype.Ptr(_free_type)),
  49. ('opaque', rffi.VOIDP),
  50. ])
  51. FILE = rffi.COpaquePtr('FILE')
  52. BZFILE = rffi.COpaquePtr('BZFILE')
  53. constants = {}
  54. constant_names = ['BZ_RUN', 'BZ_FLUSH', 'BZ_FINISH', 'BZ_OK',
  55. 'BZ_RUN_OK', 'BZ_FLUSH_OK', 'BZ_FINISH_OK', 'BZ_STREAM_END',
  56. 'BZ_SEQUENCE_ERROR', 'BZ_PARAM_ERROR', 'BZ_MEM_ERROR', 'BZ_DATA_ERROR',
  57. 'BZ_DATA_ERROR_MAGIC', 'BZ_IO_ERROR', 'BZ_UNEXPECTED_EOF',
  58. 'BZ_OUTBUFF_FULL', 'BZ_CONFIG_ERROR']
  59. for name in constant_names:
  60. setattr(CConfig, name, platform.DefinedConstantInteger(name))
  61. class cConfig(object):
  62. pass
  63. for k, v in platform.configure(CConfig).items():
  64. setattr(cConfig, k, v)
  65. if not cConfig.CHECK_LIBRARY:
  66. raise ImportError("Invalid bz2 library")
  67. for name in constant_names:
  68. value = getattr(cConfig, name)
  69. if value is not None:
  70. constants[name] = value
  71. locals().update(constants)
  72. off_t = cConfig.off_t
  73. bz_stream = lltype.Ptr(cConfig.bz_stream)
  74. BUFSIZ = cConfig.BUFSIZ
  75. SEEK_SET = cConfig.SEEK_SET
  76. BZ_OK = cConfig.BZ_OK
  77. BZ_STREAM_END = cConfig.BZ_STREAM_END
  78. BZ_CONFIG_ERROR = cConfig.BZ_CONFIG_ERROR
  79. BZ_PARAM_ERROR = cConfig.BZ_PARAM_ERROR
  80. BZ_DATA_ERROR = cConfig.BZ_DATA_ERROR
  81. BZ_DATA_ERROR_MAGIC = cConfig.BZ_DATA_ERROR_MAGIC
  82. BZ_IO_ERROR = cConfig.BZ_IO_ERROR
  83. BZ_MEM_ERROR = cConfig.BZ_MEM_ERROR
  84. BZ_UNEXPECTED_EOF = cConfig.BZ_UNEXPECTED_EOF
  85. BZ_SEQUENCE_ERROR = cConfig.BZ_SEQUENCE_ERROR
  86. if BUFSIZ < 8192:
  87. SMALLCHUNK = 8192
  88. else:
  89. SMALLCHUNK = BUFSIZ
  90. if rffi.sizeof(rffi.INT) > 4:
  91. BIGCHUNK = 512 * 32
  92. else:
  93. BIGCHUNK = 512 * 1024
  94. if BZ_CONFIG_ERROR:
  95. if rffi.sizeof(rffi.LONG) >= 8:
  96. def _bzs_total_out(bzs):
  97. return (rffi.getintfield(bzs, 'c_total_out_hi32') << 32) + \
  98. rffi.getintfield(bzs, 'c_total_out_lo32')
  99. else:
  100. # we can't return a long long value from here, because most
  101. # callers wouldn't be able to handle it anyway
  102. def _bzs_total_out(bzs):
  103. if rffi.getintfield(bzs, 'c_total_out_hi32') != 0 or \
  104. rffi.getintfield(bzs, 'c_total_out_lo32') > sys.maxint:
  105. raise MemoryError
  106. return rffi.getintfield(bzs, 'c_total_out_lo32')
  107. else:
  108. XXX # this case needs fixing (old bz2 library?)
  109. def _bzs_total_out(bzs):
  110. return bzs.total_out
  111. def external(name, args, result, **kwds):
  112. return rffi.llexternal(name, args, result, compilation_info=
  113. CConfig._compilation_info_, **kwds)
  114. # the least but one parameter should be rffi.VOIDP but it's not used
  115. # so I trick the compiler to not complain about constanst pointer passed
  116. # to void* arg
  117. BZ2_bzReadOpen = external('BZ2_bzReadOpen', [rffi.INTP, FILE, rffi.INT,
  118. rffi.INT, rffi.INTP, rffi.INT], BZFILE)
  119. BZ2_bzWriteOpen = external('BZ2_bzWriteOpen', [rffi.INTP, FILE, rffi.INT,
  120. rffi.INT, rffi.INT], BZFILE)
  121. BZ2_bzReadClose = external('BZ2_bzReadClose', [rffi.INTP, BZFILE], lltype.Void)
  122. BZ2_bzWriteClose = external('BZ2_bzWriteClose', [rffi.INTP, BZFILE,
  123. rffi.INT, rffi.UINTP, rffi.UINTP], lltype.Void)
  124. BZ2_bzRead = external('BZ2_bzRead', [rffi.INTP, BZFILE, rffi.CCHARP, rffi.INT],
  125. rffi.INT)
  126. BZ2_bzWrite = external('BZ2_bzWrite', [rffi.INTP, BZFILE, rffi.CCHARP,
  127. rffi.INT], lltype.Void)
  128. BZ2_bzCompressInit = external('BZ2_bzCompressInit', [bz_stream, rffi.INT,
  129. rffi.INT, rffi.INT], rffi.INT)
  130. BZ2_bzCompressEnd = external('BZ2_bzCompressEnd', [bz_stream], rffi.INT,
  131. releasegil=False)
  132. BZ2_bzCompress = external('BZ2_bzCompress', [bz_stream, rffi.INT], rffi.INT)
  133. BZ2_bzDecompressInit = external('BZ2_bzDecompressInit', [bz_stream, rffi.INT,
  134. rffi.INT], rffi.INT)
  135. BZ2_bzDecompressEnd = external('BZ2_bzDecompressEnd', [bz_stream], rffi.INT,
  136. releasegil=False)
  137. BZ2_bzDecompress = external('BZ2_bzDecompress', [bz_stream], rffi.INT)
  138. def _catch_bz2_error(space, bzerror):
  139. if BZ_CONFIG_ERROR and bzerror == BZ_CONFIG_ERROR:
  140. raise oefmt(space.w_SystemError,
  141. "the bz2 library was not compiled correctly")
  142. if bzerror == BZ_PARAM_ERROR:
  143. raise oefmt(space.w_SystemError,
  144. "the bz2 library has received wrong parameters")
  145. elif bzerror == BZ_MEM_ERROR:
  146. raise OperationError(space.w_MemoryError, space.w_None)
  147. elif bzerror in (BZ_DATA_ERROR, BZ_DATA_ERROR_MAGIC):
  148. raise oefmt(space.w_IOError, "invalid data stream")
  149. elif bzerror == BZ_IO_ERROR:
  150. raise oefmt(space.w_IOError, "unknown IO error")
  151. elif bzerror == BZ_UNEXPECTED_EOF:
  152. raise oefmt(space.w_EOFError,
  153. "compressed file ended before the logical end-of-stream "
  154. "was detected")
  155. elif bzerror == BZ_SEQUENCE_ERROR:
  156. raise oefmt(space.w_RuntimeError,
  157. "wrong sequence of bz2 library commands used")
  158. def _new_buffer_size(current_size):
  159. # keep doubling until we reach BIGCHUNK; then the buffer size is no
  160. # longer increased
  161. if current_size < BIGCHUNK:
  162. return current_size + current_size
  163. return current_size
  164. # ____________________________________________________________
  165. class OutBuffer(object):
  166. """Handler for the output buffer. A bit custom code trying to
  167. encapsulate the logic of setting up the fields of 'bzs' and
  168. allocating raw memory as needed.
  169. """
  170. def __init__(self, bzs, initial_size=SMALLCHUNK):
  171. # when the constructor is called, allocate a piece of memory
  172. # of length 'piece_size' and make bzs ready to dump there.
  173. self.temp = []
  174. self.bzs = bzs
  175. self._allocate_chunk(initial_size)
  176. def _allocate_chunk(self, size):
  177. self.raw_buf, self.gc_buf, self.case_num = rffi.alloc_buffer(size)
  178. self.current_size = size
  179. self.bzs.c_next_out = self.raw_buf
  180. rffi.setintfield(self.bzs, 'c_avail_out', size)
  181. def _get_chunk(self, chunksize):
  182. assert 0 <= chunksize <= self.current_size
  183. raw_buf = self.raw_buf
  184. gc_buf = self.gc_buf
  185. case_num = self.case_num
  186. s = rffi.str_from_buffer(raw_buf, gc_buf, case_num,
  187. self.current_size, chunksize)
  188. rffi.keep_buffer_alive_until_here(raw_buf, gc_buf, case_num)
  189. self.current_size = 0
  190. return s
  191. def prepare_next_chunk(self):
  192. size = self.current_size
  193. self.temp.append(self._get_chunk(size))
  194. self._allocate_chunk(_new_buffer_size(size))
  195. def make_result_string(self):
  196. count_unoccupied = rffi.getintfield(self.bzs, 'c_avail_out')
  197. s = self._get_chunk(self.current_size - count_unoccupied)
  198. if self.temp:
  199. self.temp.append(s)
  200. return ''.join(self.temp)
  201. else:
  202. return s
  203. def free(self):
  204. if self.current_size > 0:
  205. rffi.keep_buffer_alive_until_here(self.raw_buf, self.gc_buf,
  206. self.case_num)
  207. def __enter__(self):
  208. return self
  209. def __exit__(self, *args):
  210. self.free()
  211. # ____________________________________________________________
  212. #
  213. # Make the BZ2File type by internally inheriting from W_File.
  214. # XXX this depends on internal details of W_File to work properly.
  215. from pypy.module._file.interp_file import W_File
  216. class W_BZ2File(W_File):
  217. def check_mode_ok(self, mode):
  218. if (not mode or mode[0] not in ['r', 'w', 'a', 'U']):
  219. space = self.space
  220. raise oefmt(space.w_ValueError, "invalid mode: '%s'", mode)
  221. @unwrap_spec(mode=str, buffering=int, compresslevel=int)
  222. def direct_bz2__init__(self, w_name, mode='r', buffering=-1,
  223. compresslevel=9):
  224. self.direct_close()
  225. self.w_name = w_name
  226. # the stream should always be opened in binary mode
  227. if "b" not in mode:
  228. mode = mode + "b"
  229. self.check_mode_ok(mode)
  230. stream = open_bz2file_as_stream(self.space, w_name, mode,
  231. buffering, compresslevel)
  232. fd = stream.try_to_find_file_descriptor()
  233. self.fdopenstream(stream, fd, mode, w_name)
  234. _exposed_method_names = []
  235. W_File._decl.im_func(locals(), "bz2__init__",
  236. """Opens a BZ2-compressed file.""")
  237. # XXX ^^^ hacking hacking... can't just use the name "__init__" again
  238. # because the RTyper is confused about the two direct__init__() with
  239. # a different signature, confusion caused by the fact that
  240. # W_File.file__init__() would appear to contain an indirect call to
  241. # one of the two versions of direct__init__().
  242. def file_bz2__repr__(self):
  243. if self.stream is None:
  244. head = "closed"
  245. else:
  246. head = "open"
  247. w_name = self.w_name
  248. if w_name is None:
  249. w_name = self.space.wrap('?')
  250. info = "%s bz2.BZ2File %s, mode '%s'" % (head, self.getdisplayname(),
  251. self.mode)
  252. return self.getrepr(self.space, info)
  253. def descr_bz2file__new__(space, w_subtype, __args__):
  254. bz2file = space.allocate_instance(W_BZ2File, w_subtype)
  255. W_BZ2File.__init__(bz2file, space)
  256. return space.wrap(bz2file)
  257. same_attributes_as_in_file = list(W_File._exposed_method_names)
  258. same_attributes_as_in_file.remove('__init__')
  259. same_attributes_as_in_file.extend([
  260. 'name', 'mode', 'encoding', 'closed', 'newlines', 'softspace',
  261. 'writelines', '__exit__', '__weakref__'])
  262. W_BZ2File.typedef = TypeDef(
  263. "BZ2File",
  264. __doc__ = """\
  265. BZ2File(name [, mode='r', buffering=-1, compresslevel=9]) -> file object
  266. Open a bz2 file. The mode can be 'r' or 'w', for reading (default) or
  267. writing. When opened for writing, the file will be created if it doesn't
  268. exist, and truncated otherwise. If the buffering argument is given, 0 means
  269. unbuffered, and larger numbers specify the buffer size. If compresslevel
  270. is given, must be a number between 1 and 9.
  271. Add a 'U' to mode to open the file for input with universal newline
  272. support. Any line ending in the input file will be seen as a '\\n' in
  273. Python. Also, a file so opened gains the attribute 'newlines'; the value
  274. for this attribute is one of None (no newline read yet), '\\r', '\\n',
  275. '\\r\\n' or a tuple containing all the newline types seen. Universal
  276. newlines are available only when reading.""",
  277. __new__ = interp2app(descr_bz2file__new__),
  278. __init__ = interp2app(W_BZ2File.file_bz2__init__),
  279. __repr__ = interp2app(W_BZ2File.file_bz2__repr__),
  280. **dict([(name, W_File.typedef.rawdict[name])
  281. for name in same_attributes_as_in_file]))
  282. # ____________________________________________________________
  283. def open_bz2file_as_stream(space, w_path, mode="r", buffering=-1,
  284. compresslevel=9):
  285. from rpython.rlib.streamio import decode_mode, open_path_helper
  286. from rpython.rlib.streamio import construct_stream_tower
  287. os_flags, universal, reading, writing, basemode, binary = decode_mode(mode)
  288. if reading and writing:
  289. raise oefmt(space.w_ValueError, "cannot open in read-write mode")
  290. if basemode == "a":
  291. raise oefmt(space.w_ValueError, "cannot append to bz2 file")
  292. stream = open_path_helper(space.str0_w(w_path), os_flags, False)
  293. if reading:
  294. bz2stream = ReadBZ2Filter(space, stream, buffering)
  295. buffering = 0 # by construction, the ReadBZ2Filter acts like
  296. # a read buffer too - no need for another one
  297. else:
  298. assert writing
  299. bz2stream = WriteBZ2Filter(space, stream, compresslevel)
  300. stream = construct_stream_tower(bz2stream, buffering, universal, reading,
  301. writing, binary)
  302. return stream
  303. class ReadBZ2Filter(Stream):
  304. """Standard I/O stream filter that decompresses the stream with bz2."""
  305. def __init__(self, space, stream, buffering):
  306. self.space = space
  307. self.stream = stream
  308. self.decompressor = W_BZ2Decompressor(space)
  309. self.readlength = r_longlong(0)
  310. self.buffer = ""
  311. self.pos = 0
  312. self.finished = False
  313. if buffering < 1024:
  314. buffering = 1024 # minimum amount of compressed data read at once
  315. self.buffering = buffering
  316. def close1(self, closefileno):
  317. self.stream.close1(closefileno)
  318. def tell(self):
  319. return self.readlength
  320. def seek(self, offset, whence):
  321. READMAX = 2**18 # 256KB
  322. # Make offset relative to the start of the file
  323. if whence == 2:
  324. # Read everything to arrive at the end
  325. while len(self.read(READMAX)) > 0:
  326. pass
  327. offset += self.readlength
  328. elif whence == 1:
  329. offset += self.readlength
  330. elif whence == 0:
  331. pass
  332. else:
  333. raise oefmt(self.space.w_ValueError,
  334. "Invalid value for whence: %d", whence)
  335. # Make offset relative to the current pos
  336. # Rewind iff necessary
  337. if offset < self.readlength:
  338. self.stream.seek(0, 0)
  339. self.decompressor = W_BZ2Decompressor(self.space)
  340. self.readlength = r_longlong(0)
  341. self.pos = 0
  342. self.buffer = ""
  343. self.finished = False
  344. else:
  345. offset -= self.readlength
  346. # Seek
  347. read = r_longlong(0)
  348. while read < offset:
  349. count = offset - read
  350. if count < READMAX:
  351. count = intmask(count)
  352. else:
  353. count = READMAX
  354. length = len(self.read(count))
  355. if not length:
  356. break
  357. read += length
  358. def readall(self):
  359. raw = self.stream.readall()
  360. if raw:
  361. w_result = self.decompressor.decompress(raw)
  362. if self.decompressor.running:
  363. raise oefmt(self.space.w_EOFError,
  364. "compressed file ended before the logical "
  365. "end-of-the-stream was detected")
  366. result = self.space.str_w(w_result)
  367. self.readlength += len(result)
  368. else:
  369. result = ""
  370. if len(self.buffer) != self.pos:
  371. pos = self.pos
  372. assert pos >= 0
  373. result = self.buffer[pos:] + result
  374. self.buffer = ''
  375. self.pos = 0
  376. return result
  377. def read(self, n):
  378. # XXX not nice
  379. if n <= 0:
  380. return ''
  381. while self.pos == len(self.buffer):
  382. if self.finished:
  383. return ""
  384. moredata = self.stream.read(max(self.buffering, n))
  385. if not moredata:
  386. self.finished = True
  387. return ""
  388. try:
  389. w_read = self.decompressor.decompress(moredata)
  390. except OperationError as e:
  391. if e.match(self.space, self.space.w_EOFError):
  392. self.finished = True
  393. return ""
  394. raise
  395. self.buffer = self.space.str_w(w_read)
  396. self.pos = 0
  397. if len(self.buffer) - self.pos >= n:
  398. pos = self.pos
  399. assert pos >= 0
  400. result = self.buffer[pos:pos + n]
  401. self.pos += n
  402. else:
  403. pos = self.pos
  404. assert pos >= 0
  405. result = self.buffer[pos:]
  406. self.pos = 0
  407. self.buffer = ""
  408. self.readlength += len(result)
  409. return result
  410. def peek(self):
  411. return (self.pos, self.buffer)
  412. def try_to_find_file_descriptor(self):
  413. return self.stream.try_to_find_file_descriptor()
  414. def write(self, s):
  415. raise oefmt(self.space.w_IOError, "file is not ready for writing")
  416. class WriteBZ2Filter(Stream):
  417. """Standard I/O stream filter that compresses the stream with bz2."""
  418. def __init__(self, space, stream, compresslevel):
  419. self.stream = stream
  420. self.space = space
  421. self.compressor = W_BZ2Compressor(space, compresslevel)
  422. self.writtenlength = 0
  423. def close1(self, closefileno):
  424. self.stream.write(self.space.str_w(self.compressor.flush()))
  425. self.stream.close1(closefileno)
  426. def write(self, data):
  427. self.stream.write(self.space.str_w(self.compressor.compress(data)))
  428. self.writtenlength += len(data)
  429. def tell(self):
  430. return self.writtenlength
  431. def seek(self, offset, whence):
  432. raise oefmt(self.space.w_IOError, "seek works only while reading")
  433. def read(self, n):
  434. raise oefmt(self.space.w_IOError, "file is not ready for reading")
  435. def readall(self):
  436. raise oefmt(self.space.w_IOError, "file is not ready for reading")
  437. def try_to_find_file_descriptor(self):
  438. return self.stream.try_to_find_file_descriptor()
  439. @unwrap_spec(compresslevel=int)
  440. def descr_compressor__new__(space, w_subtype, compresslevel=9):
  441. x = space.allocate_instance(W_BZ2Compressor, w_subtype)
  442. x = space.interp_w(W_BZ2Compressor, x)
  443. W_BZ2Compressor.__init__(x, space, compresslevel)
  444. return space.wrap(x)
  445. class W_BZ2Compressor(W_Root):
  446. """BZ2Compressor([compresslevel=9]) -> compressor object
  447. Create a new compressor object. This object may be used to compress
  448. data sequentially. If you want to compress data in one shot, use the
  449. compress() function instead. The compresslevel parameter, if given,
  450. must be a number between 1 and 9."""
  451. def __init__(self, space, compresslevel):
  452. self.space = space
  453. self.bzs = lltype.malloc(bz_stream.TO, flavor='raw', zero=True)
  454. try:
  455. self.running = False
  456. self._init_bz2comp(compresslevel)
  457. except:
  458. lltype.free(self.bzs, flavor='raw')
  459. self.bzs = lltype.nullptr(bz_stream.TO)
  460. raise
  461. self.register_finalizer(space)
  462. def _init_bz2comp(self, compresslevel):
  463. if compresslevel < 1 or compresslevel > 9:
  464. raise oefmt(self.space.w_ValueError,
  465. "compresslevel must be between 1 and 9")
  466. bzerror = intmask(BZ2_bzCompressInit(self.bzs, compresslevel, 0, 0))
  467. if bzerror != BZ_OK:
  468. _catch_bz2_error(self.space, bzerror)
  469. self.running = True
  470. def _finalize_(self):
  471. bzs = self.bzs
  472. if bzs:
  473. self.bzs = lltype.nullptr(bz_stream.TO)
  474. BZ2_bzCompressEnd(bzs)
  475. lltype.free(bzs, flavor='raw')
  476. @unwrap_spec(data='bufferstr')
  477. def compress(self, data):
  478. """compress(data) -> string
  479. Provide more data to the compressor object. It will return chunks of
  480. compressed data whenever possible. When you've finished providing data
  481. to compress, call the flush() method to finish the compression process,
  482. and return what is left in the internal buffers."""
  483. datasize = len(data)
  484. if datasize == 0:
  485. return self.space.newbytes("")
  486. if not self.running:
  487. raise oefmt(self.space.w_ValueError,
  488. "this object was already flushed")
  489. in_bufsize = datasize
  490. with OutBuffer(self.bzs) as out:
  491. with rffi.scoped_nonmovingbuffer(data) as in_buf:
  492. self.bzs.c_next_in = in_buf
  493. rffi.setintfield(self.bzs, 'c_avail_in', in_bufsize)
  494. while True:
  495. bzerror = BZ2_bzCompress(self.bzs, BZ_RUN)
  496. if bzerror != BZ_RUN_OK:
  497. _catch_bz2_error(self.space, bzerror)
  498. if rffi.getintfield(self.bzs, 'c_avail_in') == 0:
  499. break
  500. elif rffi.getintfield(self.bzs, 'c_avail_out') == 0:
  501. out.prepare_next_chunk()
  502. res = out.make_result_string()
  503. return self.space.newbytes(res)
  504. def flush(self):
  505. if not self.running:
  506. raise oefmt(self.space.w_ValueError,
  507. "this object was already flushed")
  508. self.running = False
  509. with OutBuffer(self.bzs) as out:
  510. while True:
  511. bzerror = BZ2_bzCompress(self.bzs, BZ_FINISH)
  512. if bzerror == BZ_STREAM_END:
  513. break
  514. elif bzerror != BZ_FINISH_OK:
  515. _catch_bz2_error(self.space, bzerror)
  516. if rffi.getintfield(self.bzs, 'c_avail_out') == 0:
  517. out.prepare_next_chunk()
  518. res = out.make_result_string()
  519. return self.space.newbytes(res)
  520. W_BZ2Compressor.typedef = TypeDef("BZ2Compressor",
  521. __doc__ = W_BZ2Compressor.__doc__,
  522. __new__ = interp2app(descr_compressor__new__),
  523. compress = interp2app(W_BZ2Compressor.compress),
  524. flush = interp2app(W_BZ2Compressor.flush),
  525. )
  526. def descr_decompressor__new__(space, w_subtype):
  527. x = space.allocate_instance(W_BZ2Decompressor, w_subtype)
  528. x = space.interp_w(W_BZ2Decompressor, x)
  529. W_BZ2Decompressor.__init__(x, space)
  530. return space.wrap(x)
  531. class W_BZ2Decompressor(W_Root):
  532. """BZ2Decompressor() -> decompressor object
  533. Create a new decompressor object. This object may be used to decompress
  534. data sequentially. If you want to decompress data in one shot, use the
  535. decompress() function instead."""
  536. def __init__(self, space):
  537. self.space = space
  538. self.bzs = lltype.malloc(bz_stream.TO, flavor='raw', zero=True)
  539. try:
  540. self.running = False
  541. self.unused_data = ""
  542. self._init_bz2decomp()
  543. except:
  544. lltype.free(self.bzs, flavor='raw')
  545. self.bzs = lltype.nullptr(bz_stream.TO)
  546. raise
  547. self.register_finalizer(space)
  548. def _init_bz2decomp(self):
  549. bzerror = BZ2_bzDecompressInit(self.bzs, 0, 0)
  550. if bzerror != BZ_OK:
  551. _catch_bz2_error(self.space, bzerror)
  552. self.running = True
  553. def _finalize_(self):
  554. bzs = self.bzs
  555. if bzs:
  556. self.bzs = lltype.nullptr(bz_stream.TO)
  557. BZ2_bzDecompressEnd(bzs)
  558. lltype.free(bzs, flavor='raw')
  559. @unwrap_spec(data='bufferstr')
  560. def decompress(self, data):
  561. """decompress(data) -> string
  562. Provide more data to the decompressor object. It will return chunks
  563. of decompressed data whenever possible. If you try to decompress data
  564. after the end of stream is found, EOFError will be raised. If any data
  565. was found after the end of stream, it'll be ignored and saved in
  566. unused_data attribute."""
  567. if not self.running:
  568. raise oefmt(self.space.w_EOFError,
  569. "end of stream was already found")
  570. if data == '':
  571. return self.space.newbytes('')
  572. in_bufsize = len(data)
  573. with rffi.scoped_nonmovingbuffer(data) as in_buf:
  574. self.bzs.c_next_in = in_buf
  575. rffi.setintfield(self.bzs, 'c_avail_in', in_bufsize)
  576. with OutBuffer(self.bzs) as out:
  577. while True:
  578. bzerror = BZ2_bzDecompress(self.bzs)
  579. if bzerror == BZ_STREAM_END:
  580. if rffi.getintfield(self.bzs, 'c_avail_in') != 0:
  581. unused = [self.bzs.c_next_in[i]
  582. for i in range(
  583. rffi.getintfield(self.bzs,
  584. 'c_avail_in'))]
  585. self.unused_data = "".join(unused)
  586. self.running = False
  587. break
  588. if bzerror != BZ_OK:
  589. _catch_bz2_error(self.space, bzerror)
  590. if rffi.getintfield(self.bzs, 'c_avail_in') == 0:
  591. break
  592. elif rffi.getintfield(self.bzs, 'c_avail_out') == 0:
  593. out.prepare_next_chunk()
  594. res = out.make_result_string()
  595. return self.space.newbytes(res)
  596. W_BZ2Decompressor.typedef = TypeDef("BZ2Decompressor",
  597. __doc__ = W_BZ2Decompressor.__doc__,
  598. __new__ = interp2app(descr_decompressor__new__),
  599. unused_data = interp_attrproperty("unused_data", W_BZ2Decompressor),
  600. decompress = interp2app(W_BZ2Decompressor.decompress),
  601. )
  602. @unwrap_spec(data='bufferstr', compresslevel=int)
  603. def compress(space, data, compresslevel=9):
  604. """compress(data [, compresslevel=9]) -> string
  605. Compress data in one shot. If you want to compress data sequentially,
  606. use an instance of BZ2Compressor instead. The compresslevel parameter, if
  607. given, must be a number between 1 and 9."""
  608. if compresslevel < 1 or compresslevel > 9:
  609. raise oefmt(space.w_ValueError,
  610. "compresslevel must be between 1 and 9")
  611. with lltype.scoped_alloc(bz_stream.TO, zero=True) as bzs:
  612. in_bufsize = len(data)
  613. with rffi.scoped_nonmovingbuffer(data) as in_buf:
  614. bzs.c_next_in = in_buf
  615. rffi.setintfield(bzs, 'c_avail_in', in_bufsize)
  616. # conforming to bz2 manual, this is large enough to fit compressed
  617. # data in one shot. We will check it later anyway.
  618. with OutBuffer(bzs,
  619. in_bufsize + (in_bufsize / 100 + 1) + 600) as out:
  620. bzerror = BZ2_bzCompressInit(bzs, compresslevel, 0, 0)
  621. if bzerror != BZ_OK:
  622. _catch_bz2_error(space, bzerror)
  623. while True:
  624. bzerror = BZ2_bzCompress(bzs, BZ_FINISH)
  625. if bzerror == BZ_STREAM_END:
  626. break
  627. elif bzerror != BZ_FINISH_OK:
  628. BZ2_bzCompressEnd(bzs)
  629. _catch_bz2_error(space, bzerror)
  630. if rffi.getintfield(bzs, 'c_avail_out') == 0:
  631. out.prepare_next_chunk()
  632. res = out.make_result_string()
  633. BZ2_bzCompressEnd(bzs)
  634. return space.wrap(res)
  635. @unwrap_spec(data='bufferstr')
  636. def decompress(space, data):
  637. """decompress(data) -> decompressed data
  638. Decompress data in one shot. If you want to decompress data sequentially,
  639. use an instance of BZ2Decompressor instead."""
  640. in_bufsize = len(data)
  641. if in_bufsize == 0:
  642. return space.wrap("")
  643. with lltype.scoped_alloc(bz_stream.TO, zero=True) as bzs:
  644. with rffi.scoped_nonmovingbuffer(data) as in_buf:
  645. bzs.c_next_in = in_buf
  646. rffi.setintfield(bzs, 'c_avail_in', in_bufsize)
  647. with OutBuffer(bzs) as out:
  648. bzerror = BZ2_bzDecompressInit(bzs, 0, 0)
  649. if bzerror != BZ_OK:
  650. _catch_bz2_error(space, bzerror)
  651. while True:
  652. bzerror = BZ2_bzDecompress(bzs)
  653. if bzerror == BZ_STREAM_END:
  654. break
  655. if bzerror != BZ_OK:
  656. BZ2_bzDecompressEnd(bzs)
  657. _catch_bz2_error(space, bzerror)
  658. if rffi.getintfield(bzs, 'c_avail_in') == 0:
  659. BZ2_bzDecompressEnd(bzs)
  660. raise oefmt(space.w_ValueError,
  661. "couldn't find end of stream")
  662. elif rffi.getintfield(bzs, 'c_avail_out') == 0:
  663. out.prepare_next_chunk()
  664. res = out.make_result_string()
  665. BZ2_bzDecompressEnd(bzs)
  666. return space.wrap(res)