PageRenderTime 52ms CodeModel.GetById 24ms RepoModel.GetById 1ms app.codeStats 0ms

/misc/MakePE/examples/packer/aplib.py

http://corkami.googlecode.com/
Python | 384 lines | 376 code | 2 blank | 6 comment | 1 complexity | 7dec9e823bded8edc04d5745c39f99bf MD5 | raw file
  1. # this is a standalone single-file merge of aplib compression and decompression
  2. # taken from my own library Kabopan http://code.google.com/p/kabopan/
  3. # (no other clean-up or improvement)
  4. # Ange Albertini, BSD Licence, 2007-2011
  5. # from kbp\comp\_lz77.py ##################################################
  6. def find_longest_match(s, sub):
  7. """returns the number of byte to look backward and the length of byte to copy)"""
  8. if sub == "":
  9. return 0, 0
  10. limit = len(s)
  11. dic = s[:]
  12. l = 0
  13. offset = 0
  14. length = 0
  15. first = 0
  16. word = ""
  17. word += sub[l]
  18. pos = dic.rfind(word, 0, limit + 1)
  19. if pos == -1:
  20. return offset, length
  21. offset = limit - pos
  22. length = len(word)
  23. dic += sub[l]
  24. while l < len(sub) - 1:
  25. l += 1
  26. word += sub[l]
  27. pos = dic.rfind(word, 0, limit + 1)
  28. if pos == -1:
  29. return offset, length
  30. offset = limit - pos
  31. length = len(word)
  32. dic += sub[l]
  33. return offset, length
  34. # from _misc.py ###############################
  35. def int2lebin(value, size):
  36. """ouputs value in binary, as little-endian"""
  37. result = ""
  38. for i in xrange(size):
  39. result = result + chr((value >> (8 * i)) & 0xFF )
  40. return result
  41. def modifystring(s, sub, offset):
  42. """overwrites 'sub' at 'offset' of 's'"""
  43. return s[:offset] + sub + s[offset + len(sub):]
  44. def getbinlen(value):
  45. """return the bit length of an integer"""
  46. result = 0
  47. if value == 0:
  48. return 1
  49. while value != 0:
  50. value >>= 1
  51. result += 1
  52. return result
  53. # from kbp\_bits.py #################################
  54. class _bits_compress():
  55. """bit machine for variable-sized auto-reloading tag compression"""
  56. def __init__(self, tagsize):
  57. """tagsize is the number of bytes that takes the tag"""
  58. self.out = ""
  59. self.__tagsize = tagsize
  60. self.__tag = 0
  61. self.__tagoffset = -1
  62. self.__maxbit = (self.__tagsize * 8) - 1
  63. self.__curbit = 0
  64. self.__isfirsttag = True
  65. def getdata(self):
  66. """builds an output string of what's currently compressed:
  67. currently output bit + current tag content"""
  68. tagstr = int2lebin(self.__tag, self.__tagsize)
  69. return modifystring(self.out, tagstr, self.__tagoffset)
  70. def write_bit(self, value):
  71. """writes a bit, make space for the tag if necessary"""
  72. if self.__curbit != 0:
  73. self.__curbit -= 1
  74. else:
  75. if self.__isfirsttag:
  76. self.__isfirsttag = False
  77. else:
  78. self.out = self.getdata()
  79. self.__tagoffset = len(self.out)
  80. self.out += "".join(["\x00"] * self.__tagsize)
  81. self.__curbit = self.__maxbit
  82. self.__tag = 0
  83. if value:
  84. self.__tag |= (1 << self.__curbit)
  85. return
  86. def write_bitstring(self, s):
  87. """write a string of bits"""
  88. for c in s:
  89. self.write_bit(0 if c == "0" else 1)
  90. return
  91. def write_byte(self, b):
  92. """writes a char or a number"""
  93. assert len(b) == 1 if isinstance(b, str) else 0 <= b <= 255
  94. self.out += b[0:1] if isinstance(b, str) else chr(b)
  95. return
  96. def write_fixednumber(self, value, nbbit):
  97. """write a value on a fixed range of bits"""
  98. for i in xrange(nbbit - 1, -1, -1):
  99. self.write_bit( (value >> i) & 1)
  100. return
  101. def write_variablenumber(self, value):
  102. assert value >= 2
  103. length = getbinlen(value) - 2 # the highest bit is 1
  104. self.write_bit(value & (1 << length))
  105. for i in xrange(length - 1, -1, -1):
  106. self.write_bit(1)
  107. self.write_bit(value & (1 << i))
  108. self.write_bit(0)
  109. return
  110. class _bits_decompress():
  111. """bit machine for variable-sized auto-reloading tag decompression"""
  112. def __init__(self, data, tagsize):
  113. self.__curbit = 0
  114. self.__offset = 0
  115. self.__tag = None
  116. self.__tagsize = tagsize
  117. self.__in = data
  118. self.out = ""
  119. def getoffset(self):
  120. """return the current byte offset"""
  121. return self.__offset
  122. # def getdata(self):
  123. # return self.__lzdata
  124. def read_bit(self):
  125. """read next bit from the stream, reloads the tag if necessary"""
  126. if self.__curbit != 0:
  127. self.__curbit -= 1
  128. else:
  129. self.__curbit = (self.__tagsize * 8) - 1
  130. self.__tag = ord(self.read_byte())
  131. for i in xrange(self.__tagsize - 1):
  132. self.__tag += ord(self.read_byte()) << (8 * (i + 1))
  133. bit = (self.__tag >> ((self.__tagsize * 8) - 1)) & 0x01
  134. self.__tag <<= 1
  135. return bit
  136. def is_end(self):
  137. return self.__offset == len(self.__in) and self.__curbit == 1
  138. def read_byte(self):
  139. """read next byte from the stream"""
  140. if type(self.__in) == str:
  141. result = self.__in[self.__offset]
  142. elif type(self.__in) == file:
  143. result = self.__in.read(1)
  144. self.__offset += 1
  145. return result
  146. def read_fixednumber(self, nbbit, init=0):
  147. """reads a fixed bit-length number"""
  148. result = init
  149. for i in xrange(nbbit):
  150. result = (result << 1) + self.read_bit()
  151. return result
  152. def read_variablenumber(self):
  153. """return a variable bit-length number x, x >= 2
  154. reads a bit until the next bit in the pair is not set"""
  155. result = 1
  156. result = (result << 1) + self.read_bit()
  157. while self.read_bit():
  158. result = (result << 1) + self.read_bit()
  159. return result
  160. def read_setbits(self, max_, set_=1):
  161. """read bits as long as their set or a maximum is reached"""
  162. result = 0
  163. while result < max_ and self.read_bit() == set_:
  164. result += 1
  165. return result
  166. def back_copy(self, offset, length=1):
  167. for i in xrange(length):
  168. self.out += self.out[-offset]
  169. return
  170. def read_literal(self, value=None):
  171. if value is None:
  172. self.out += self.read_byte()
  173. else:
  174. self.out += value
  175. return False
  176. # from kbp\comp\aplib.py ###################################################
  177. """
  178. aPLib, LZSS based lossless compression algorithm
  179. Jorgen Ibsen U{http://www.ibsensoftware.com}
  180. """
  181. def lengthdelta(offset):
  182. if offset < 0x80 or 0x7D00 <= offset:
  183. return 2
  184. elif 0x500 <= offset:
  185. return 1
  186. return 0
  187. class compress(_bits_compress):
  188. """
  189. aplib compression is based on lz77
  190. """
  191. def __init__(self, data, length=None):
  192. _bits_compress.__init__(self, 1)
  193. self.__in = data
  194. self.__length = length if length is not None else len(data)
  195. self.__offset = 0
  196. self.__lastoffset = 0
  197. self.__pair = True
  198. return
  199. def __literal(self, marker=True):
  200. if marker:
  201. self.write_bit(0)
  202. self.write_byte(self.__in[self.__offset])
  203. self.__offset += 1
  204. self.__pair = True
  205. return
  206. def __block(self, offset, length):
  207. assert offset >= 2
  208. self.write_bitstring("10")
  209. # if the last operations were literal or single byte
  210. # and the offset is unchanged since the last block copy
  211. # we can just store a 'null' offset and the length
  212. if self.__pair and self.__lastoffset == offset:
  213. self.write_variablenumber(2) # 2-
  214. self.write_variablenumber(length)
  215. else:
  216. high = (offset >> 8) + 2
  217. if self.__pair:
  218. high += 1
  219. self.write_variablenumber(high)
  220. low = offset & 0xFF
  221. self.write_byte(low)
  222. self.write_variablenumber(length - lengthdelta(offset))
  223. self.__offset += length
  224. self.__lastoffset = offset
  225. self.__pair = False
  226. return
  227. def __shortblock(self, offset, length):
  228. assert 2 <= length <= 3
  229. assert 0 < offset <= 127
  230. self.write_bitstring("110")
  231. b = (offset << 1 ) + (length - 2)
  232. self.write_byte(b)
  233. self.__offset += length
  234. self.__lastoffset = offset
  235. self.__pair = False
  236. return
  237. def __singlebyte(self, offset):
  238. assert 0 <= offset < 16
  239. self.write_bitstring("111")
  240. self.write_fixednumber(offset, 4)
  241. self.__offset += 1
  242. self.__pair = True
  243. return
  244. def __end(self):
  245. self.write_bitstring("110")
  246. self.write_byte(chr(0))
  247. return
  248. def do(self):
  249. self.__literal(False)
  250. while self.__offset < self.__length:
  251. offset, length = find_longest_match(self.__in[:self.__offset],
  252. self.__in[self.__offset:])
  253. if length == 0:
  254. c = self.__in[self.__offset]
  255. if c == "\x00":
  256. self.__singlebyte(0)
  257. else:
  258. self.__literal()
  259. elif length == 1 and 0 <= offset < 16:
  260. self.__singlebyte(offset)
  261. elif 2 <= length <= 3 and 0 < offset <= 127:
  262. self.__shortblock(offset, length)
  263. elif 3 <= length and 2 <= offset:
  264. self.__block(offset, length)
  265. else:
  266. self.__literal()
  267. #raise ValueError("no parsing found", offset, length)
  268. self.__end()
  269. return self.getdata()
  270. class decompress(_bits_decompress):
  271. def __init__(self, data):
  272. _bits_decompress.__init__(self, data, tagsize=1)
  273. self.__pair = True # paired sequence
  274. self.__lastoffset = 0
  275. self.__functions = [
  276. self.__literal,
  277. self.__block,
  278. self.__shortblock,
  279. self.__singlebyte]
  280. return
  281. def __literal(self):
  282. self.read_literal()
  283. self.__pair = True
  284. return False
  285. def __block(self):
  286. b = self.read_variablenumber() # 2-
  287. if b == 2 and self.__pair : # reuse the same offset
  288. offset = self.__lastoffset
  289. length = self.read_variablenumber() # 2-
  290. else:
  291. high = b - 2 # 0-
  292. if self.__pair:
  293. high -= 1
  294. offset = (high << 8) + ord(self.read_byte())
  295. length = self.read_variablenumber() # 2-
  296. length += lengthdelta(offset)
  297. self.__lastoffset = offset
  298. self.back_copy(offset, length)
  299. self.__pair = False
  300. return False
  301. def __shortblock(self):
  302. b = ord(self.read_byte())
  303. if b <= 1: # likely 0
  304. return True
  305. length = 2 + (b & 0x01) # 2-3
  306. offset = b >> 1 # 1-127
  307. self.back_copy(offset, length)
  308. self.__lastoffset = offset
  309. self.__pair = False
  310. return False
  311. def __singlebyte(self):
  312. offset = self.read_fixednumber(4) # 0-15
  313. if offset:
  314. self.back_copy(offset)
  315. else:
  316. self.read_literal('\x00')
  317. self.__pair = True
  318. return False
  319. def do(self):
  320. """returns decompressed buffer and consumed bytes counter"""
  321. self.read_literal()
  322. while True:
  323. if self.__functions[self.read_setbits(3)]():
  324. break
  325. return self.out, self.getoffset()
  326. if __name__ == "__main__":
  327. # from kbp\test\aplib_test.py ######################################################################
  328. assert decompress(compress("a").do()).do() == ("a", 3)
  329. assert decompress(compress("ababababababab").do()).do() == ('ababababababab', 9)
  330. assert decompress(compress("aaaaaaaaaaaaaacaaaaaa").do()).do() == ('aaaaaaaaaaaaaacaaaaaa', 11)