PageRenderTime 131ms CodeModel.GetById 24ms RepoModel.GetById 1ms app.codeStats 0ms

/External.LCA_RESTRICTED/Languages/IronPython/27/Lib/zipfile.py

http://github.com/IronLanguages/main
Python | 1541 lines | 1466 code | 35 blank | 40 comment | 34 complexity | a233db9d0d32893a1740ba85de9fc201 MD5 | raw file
Possible License(s): CPL-1.0, BSD-3-Clause, ISC, GPL-2.0, MPL-2.0-no-copyleft-exception
  1. """
  2. Read and write ZIP files.
  3. """
  4. import struct, os, time, sys, shutil
  5. import binascii, cStringIO, stat
  6. import io
  7. import re
  8. import string
  9. try:
  10. import zlib # We may need its compression method
  11. crc32 = zlib.crc32
  12. except ImportError:
  13. zlib = None
  14. crc32 = binascii.crc32
  15. __all__ = ["BadZipfile", "error", "ZIP_STORED", "ZIP_DEFLATED", "is_zipfile",
  16. "ZipInfo", "ZipFile", "PyZipFile", "LargeZipFile" ]
  17. class BadZipfile(Exception):
  18. pass
  19. class LargeZipFile(Exception):
  20. """
  21. Raised when writing a zipfile, the zipfile requires ZIP64 extensions
  22. and those extensions are disabled.
  23. """
  24. error = BadZipfile # The exception raised by this module
  25. ZIP64_LIMIT = (1 << 31) - 1
  26. ZIP_FILECOUNT_LIMIT = (1 << 16) - 1
  27. ZIP_MAX_COMMENT = (1 << 16) - 1
  28. # constants for Zip file compression methods
  29. ZIP_STORED = 0
  30. ZIP_DEFLATED = 8
  31. # Other ZIP compression methods not supported
  32. # Below are some formats and associated data for reading/writing headers using
  33. # the struct module. The names and structures of headers/records are those used
  34. # in the PKWARE description of the ZIP file format:
  35. # http://www.pkware.com/documents/casestudies/APPNOTE.TXT
  36. # (URL valid as of January 2008)
  37. # The "end of central directory" structure, magic number, size, and indices
  38. # (section V.I in the format document)
  39. structEndArchive = "<4s4H2LH"
  40. stringEndArchive = "PK\005\006"
  41. sizeEndCentDir = struct.calcsize(structEndArchive)
  42. _ECD_SIGNATURE = 0
  43. _ECD_DISK_NUMBER = 1
  44. _ECD_DISK_START = 2
  45. _ECD_ENTRIES_THIS_DISK = 3
  46. _ECD_ENTRIES_TOTAL = 4
  47. _ECD_SIZE = 5
  48. _ECD_OFFSET = 6
  49. _ECD_COMMENT_SIZE = 7
  50. # These last two indices are not part of the structure as defined in the
  51. # spec, but they are used internally by this module as a convenience
  52. _ECD_COMMENT = 8
  53. _ECD_LOCATION = 9
  54. # The "central directory" structure, magic number, size, and indices
  55. # of entries in the structure (section V.F in the format document)
  56. structCentralDir = "<4s4B4HL2L5H2L"
  57. stringCentralDir = "PK\001\002"
  58. sizeCentralDir = struct.calcsize(structCentralDir)
  59. # indexes of entries in the central directory structure
  60. _CD_SIGNATURE = 0
  61. _CD_CREATE_VERSION = 1
  62. _CD_CREATE_SYSTEM = 2
  63. _CD_EXTRACT_VERSION = 3
  64. _CD_EXTRACT_SYSTEM = 4
  65. _CD_FLAG_BITS = 5
  66. _CD_COMPRESS_TYPE = 6
  67. _CD_TIME = 7
  68. _CD_DATE = 8
  69. _CD_CRC = 9
  70. _CD_COMPRESSED_SIZE = 10
  71. _CD_UNCOMPRESSED_SIZE = 11
  72. _CD_FILENAME_LENGTH = 12
  73. _CD_EXTRA_FIELD_LENGTH = 13
  74. _CD_COMMENT_LENGTH = 14
  75. _CD_DISK_NUMBER_START = 15
  76. _CD_INTERNAL_FILE_ATTRIBUTES = 16
  77. _CD_EXTERNAL_FILE_ATTRIBUTES = 17
  78. _CD_LOCAL_HEADER_OFFSET = 18
  79. # The "local file header" structure, magic number, size, and indices
  80. # (section V.A in the format document)
  81. structFileHeader = "<4s2B4HL2L2H"
  82. stringFileHeader = "PK\003\004"
  83. sizeFileHeader = struct.calcsize(structFileHeader)
  84. _FH_SIGNATURE = 0
  85. _FH_EXTRACT_VERSION = 1
  86. _FH_EXTRACT_SYSTEM = 2
  87. _FH_GENERAL_PURPOSE_FLAG_BITS = 3
  88. _FH_COMPRESSION_METHOD = 4
  89. _FH_LAST_MOD_TIME = 5
  90. _FH_LAST_MOD_DATE = 6
  91. _FH_CRC = 7
  92. _FH_COMPRESSED_SIZE = 8
  93. _FH_UNCOMPRESSED_SIZE = 9
  94. _FH_FILENAME_LENGTH = 10
  95. _FH_EXTRA_FIELD_LENGTH = 11
  96. # The "Zip64 end of central directory locator" structure, magic number, and size
  97. structEndArchive64Locator = "<4sLQL"
  98. stringEndArchive64Locator = "PK\x06\x07"
  99. sizeEndCentDir64Locator = struct.calcsize(structEndArchive64Locator)
  100. # The "Zip64 end of central directory" record, magic number, size, and indices
  101. # (section V.G in the format document)
  102. structEndArchive64 = "<4sQ2H2L4Q"
  103. stringEndArchive64 = "PK\x06\x06"
  104. sizeEndCentDir64 = struct.calcsize(structEndArchive64)
  105. _CD64_SIGNATURE = 0
  106. _CD64_DIRECTORY_RECSIZE = 1
  107. _CD64_CREATE_VERSION = 2
  108. _CD64_EXTRACT_VERSION = 3
  109. _CD64_DISK_NUMBER = 4
  110. _CD64_DISK_NUMBER_START = 5
  111. _CD64_NUMBER_ENTRIES_THIS_DISK = 6
  112. _CD64_NUMBER_ENTRIES_TOTAL = 7
  113. _CD64_DIRECTORY_SIZE = 8
  114. _CD64_OFFSET_START_CENTDIR = 9
  115. def _check_zipfile(fp):
  116. try:
  117. if _EndRecData(fp):
  118. return True # file has correct magic number
  119. except IOError:
  120. pass
  121. return False
  122. def is_zipfile(filename):
  123. """Quickly see if a file is a ZIP file by checking the magic number.
  124. The filename argument may be a file or file-like object too.
  125. """
  126. result = False
  127. try:
  128. if hasattr(filename, "read"):
  129. result = _check_zipfile(fp=filename)
  130. else:
  131. with open(filename, "rb") as fp:
  132. result = _check_zipfile(fp)
  133. except IOError:
  134. pass
  135. return result
  136. def _EndRecData64(fpin, offset, endrec):
  137. """
  138. Read the ZIP64 end-of-archive records and use that to update endrec
  139. """
  140. try:
  141. fpin.seek(offset - sizeEndCentDir64Locator, 2)
  142. except IOError:
  143. # If the seek fails, the file is not large enough to contain a ZIP64
  144. # end-of-archive record, so just return the end record we were given.
  145. return endrec
  146. data = fpin.read(sizeEndCentDir64Locator)
  147. if len(data) != sizeEndCentDir64Locator:
  148. return endrec
  149. sig, diskno, reloff, disks = struct.unpack(structEndArchive64Locator, data)
  150. if sig != stringEndArchive64Locator:
  151. return endrec
  152. if diskno != 0 or disks != 1:
  153. raise BadZipfile("zipfiles that span multiple disks are not supported")
  154. # Assume no 'zip64 extensible data'
  155. fpin.seek(offset - sizeEndCentDir64Locator - sizeEndCentDir64, 2)
  156. data = fpin.read(sizeEndCentDir64)
  157. if len(data) != sizeEndCentDir64:
  158. return endrec
  159. sig, sz, create_version, read_version, disk_num, disk_dir, \
  160. dircount, dircount2, dirsize, diroffset = \
  161. struct.unpack(structEndArchive64, data)
  162. if sig != stringEndArchive64:
  163. return endrec
  164. # Update the original endrec using data from the ZIP64 record
  165. endrec[_ECD_SIGNATURE] = sig
  166. endrec[_ECD_DISK_NUMBER] = disk_num
  167. endrec[_ECD_DISK_START] = disk_dir
  168. endrec[_ECD_ENTRIES_THIS_DISK] = dircount
  169. endrec[_ECD_ENTRIES_TOTAL] = dircount2
  170. endrec[_ECD_SIZE] = dirsize
  171. endrec[_ECD_OFFSET] = diroffset
  172. return endrec
  173. def _EndRecData(fpin):
  174. """Return data from the "End of Central Directory" record, or None.
  175. The data is a list of the nine items in the ZIP "End of central dir"
  176. record followed by a tenth item, the file seek offset of this record."""
  177. # Determine file size
  178. fpin.seek(0, 2)
  179. filesize = fpin.tell()
  180. # Check to see if this is ZIP file with no archive comment (the
  181. # "end of central directory" structure should be the last item in the
  182. # file if this is the case).
  183. try:
  184. fpin.seek(-sizeEndCentDir, 2)
  185. except IOError:
  186. return None
  187. data = fpin.read()
  188. if (len(data) == sizeEndCentDir and
  189. data[0:4] == stringEndArchive and
  190. data[-2:] == b"\000\000"):
  191. # the signature is correct and there's no comment, unpack structure
  192. endrec = struct.unpack(structEndArchive, data)
  193. endrec=list(endrec)
  194. # Append a blank comment and record start offset
  195. endrec.append("")
  196. endrec.append(filesize - sizeEndCentDir)
  197. # Try to read the "Zip64 end of central directory" structure
  198. return _EndRecData64(fpin, -sizeEndCentDir, endrec)
  199. # Either this is not a ZIP file, or it is a ZIP file with an archive
  200. # comment. Search the end of the file for the "end of central directory"
  201. # record signature. The comment is the last item in the ZIP file and may be
  202. # up to 64K long. It is assumed that the "end of central directory" magic
  203. # number does not appear in the comment.
  204. maxCommentStart = max(filesize - (1 << 16) - sizeEndCentDir, 0)
  205. fpin.seek(maxCommentStart, 0)
  206. data = fpin.read()
  207. start = data.rfind(stringEndArchive)
  208. if start >= 0:
  209. # found the magic number; attempt to unpack and interpret
  210. recData = data[start:start+sizeEndCentDir]
  211. if len(recData) != sizeEndCentDir:
  212. # Zip file is corrupted.
  213. return None
  214. endrec = list(struct.unpack(structEndArchive, recData))
  215. commentSize = endrec[_ECD_COMMENT_SIZE] #as claimed by the zip file
  216. comment = data[start+sizeEndCentDir:start+sizeEndCentDir+commentSize]
  217. endrec.append(comment)
  218. endrec.append(maxCommentStart + start)
  219. # Try to read the "Zip64 end of central directory" structure
  220. return _EndRecData64(fpin, maxCommentStart + start - filesize,
  221. endrec)
  222. # Unable to find a valid end of central directory structure
  223. return None
  224. class ZipInfo (object):
  225. """Class with attributes describing each file in the ZIP archive."""
  226. __slots__ = (
  227. 'orig_filename',
  228. 'filename',
  229. 'date_time',
  230. 'compress_type',
  231. 'comment',
  232. 'extra',
  233. 'create_system',
  234. 'create_version',
  235. 'extract_version',
  236. 'reserved',
  237. 'flag_bits',
  238. 'volume',
  239. 'internal_attr',
  240. 'external_attr',
  241. 'header_offset',
  242. 'CRC',
  243. 'compress_size',
  244. 'file_size',
  245. '_raw_time',
  246. )
  247. def __init__(self, filename="NoName", date_time=(1980,1,1,0,0,0)):
  248. self.orig_filename = filename # Original file name in archive
  249. # Terminate the file name at the first null byte. Null bytes in file
  250. # names are used as tricks by viruses in archives.
  251. null_byte = filename.find(chr(0))
  252. if null_byte >= 0:
  253. filename = filename[0:null_byte]
  254. # This is used to ensure paths in generated ZIP files always use
  255. # forward slashes as the directory separator, as required by the
  256. # ZIP format specification.
  257. if os.sep != "/" and os.sep in filename:
  258. filename = filename.replace(os.sep, "/")
  259. self.filename = filename # Normalized file name
  260. self.date_time = date_time # year, month, day, hour, min, sec
  261. if date_time[0] < 1980:
  262. raise ValueError('ZIP does not support timestamps before 1980')
  263. # Standard values:
  264. self.compress_type = ZIP_STORED # Type of compression for the file
  265. self.comment = "" # Comment for each file
  266. self.extra = "" # ZIP extra data
  267. if sys.platform == 'win32':
  268. self.create_system = 0 # System which created ZIP archive
  269. else:
  270. # Assume everything else is unix-y
  271. self.create_system = 3 # System which created ZIP archive
  272. self.create_version = 20 # Version which created ZIP archive
  273. self.extract_version = 20 # Version needed to extract archive
  274. self.reserved = 0 # Must be zero
  275. self.flag_bits = 0 # ZIP flag bits
  276. self.volume = 0 # Volume number of file header
  277. self.internal_attr = 0 # Internal attributes
  278. self.external_attr = 0 # External file attributes
  279. # Other attributes are set by class ZipFile:
  280. # header_offset Byte offset to the file header
  281. # CRC CRC-32 of the uncompressed file
  282. # compress_size Size of the compressed file
  283. # file_size Size of the uncompressed file
  284. def FileHeader(self, zip64=None):
  285. """Return the per-file header as a string."""
  286. dt = self.date_time
  287. dosdate = (dt[0] - 1980) << 9 | dt[1] << 5 | dt[2]
  288. dostime = dt[3] << 11 | dt[4] << 5 | (dt[5] // 2)
  289. if self.flag_bits & 0x08:
  290. # Set these to zero because we write them after the file data
  291. CRC = compress_size = file_size = 0
  292. else:
  293. CRC = self.CRC
  294. compress_size = self.compress_size
  295. file_size = self.file_size
  296. extra = self.extra
  297. if zip64 is None:
  298. zip64 = file_size > ZIP64_LIMIT or compress_size > ZIP64_LIMIT
  299. if zip64:
  300. fmt = '<HHQQ'
  301. extra = extra + struct.pack(fmt,
  302. 1, struct.calcsize(fmt)-4, file_size, compress_size)
  303. if file_size > ZIP64_LIMIT or compress_size > ZIP64_LIMIT:
  304. if not zip64:
  305. raise LargeZipFile("Filesize would require ZIP64 extensions")
  306. # File is larger than what fits into a 4 byte integer,
  307. # fall back to the ZIP64 extension
  308. file_size = 0xffffffff
  309. compress_size = 0xffffffff
  310. self.extract_version = max(45, self.extract_version)
  311. self.create_version = max(45, self.extract_version)
  312. filename, flag_bits = self._encodeFilenameFlags()
  313. header = struct.pack(structFileHeader, stringFileHeader,
  314. self.extract_version, self.reserved, flag_bits,
  315. self.compress_type, dostime, dosdate, CRC,
  316. compress_size, file_size,
  317. len(filename), len(extra))
  318. return header + filename + extra
  319. def _encodeFilenameFlags(self):
  320. if isinstance(self.filename, unicode):
  321. try:
  322. return self.filename.encode('ascii'), self.flag_bits
  323. except UnicodeEncodeError:
  324. return self.filename.encode('utf-8'), self.flag_bits | 0x800
  325. else:
  326. return self.filename, self.flag_bits
  327. def _decodeFilename(self):
  328. if self.flag_bits & 0x800:
  329. return self.filename.decode('utf-8')
  330. else:
  331. return self.filename
  332. def _decodeExtra(self):
  333. # Try to decode the extra field.
  334. extra = self.extra
  335. unpack = struct.unpack
  336. while len(extra) >= 4:
  337. tp, ln = unpack('<HH', extra[:4])
  338. if tp == 1:
  339. if ln >= 24:
  340. counts = unpack('<QQQ', extra[4:28])
  341. elif ln == 16:
  342. counts = unpack('<QQ', extra[4:20])
  343. elif ln == 8:
  344. counts = unpack('<Q', extra[4:12])
  345. elif ln == 0:
  346. counts = ()
  347. else:
  348. raise RuntimeError, "Corrupt extra field %s"%(ln,)
  349. idx = 0
  350. # ZIP64 extension (large files and/or large archives)
  351. if self.file_size in (0xffffffffffffffffL, 0xffffffffL):
  352. self.file_size = counts[idx]
  353. idx += 1
  354. if self.compress_size == 0xFFFFFFFFL:
  355. self.compress_size = counts[idx]
  356. idx += 1
  357. if self.header_offset == 0xffffffffL:
  358. old = self.header_offset
  359. self.header_offset = counts[idx]
  360. idx+=1
  361. extra = extra[ln+4:]
  362. class _ZipDecrypter:
  363. """Class to handle decryption of files stored within a ZIP archive.
  364. ZIP supports a password-based form of encryption. Even though known
  365. plaintext attacks have been found against it, it is still useful
  366. to be able to get data out of such a file.
  367. Usage:
  368. zd = _ZipDecrypter(mypwd)
  369. plain_char = zd(cypher_char)
  370. plain_text = map(zd, cypher_text)
  371. """
  372. def _GenerateCRCTable():
  373. """Generate a CRC-32 table.
  374. ZIP encryption uses the CRC32 one-byte primitive for scrambling some
  375. internal keys. We noticed that a direct implementation is faster than
  376. relying on binascii.crc32().
  377. """
  378. poly = 0xedb88320
  379. table = [0] * 256
  380. for i in range(256):
  381. crc = i
  382. for j in range(8):
  383. if crc & 1:
  384. crc = ((crc >> 1) & 0x7FFFFFFF) ^ poly
  385. else:
  386. crc = ((crc >> 1) & 0x7FFFFFFF)
  387. table[i] = crc
  388. return table
  389. crctable = _GenerateCRCTable()
  390. def _crc32(self, ch, crc):
  391. """Compute the CRC32 primitive on one byte."""
  392. return ((crc >> 8) & 0xffffff) ^ self.crctable[(crc ^ ord(ch)) & 0xff]
  393. def __init__(self, pwd):
  394. self.key0 = 305419896
  395. self.key1 = 591751049
  396. self.key2 = 878082192
  397. for p in pwd:
  398. self._UpdateKeys(p)
  399. def _UpdateKeys(self, c):
  400. self.key0 = self._crc32(c, self.key0)
  401. self.key1 = (self.key1 + (self.key0 & 255)) & 4294967295
  402. self.key1 = (self.key1 * 134775813 + 1) & 4294967295
  403. self.key2 = self._crc32(chr((self.key1 >> 24) & 255), self.key2)
  404. def __call__(self, c):
  405. """Decrypt a single character."""
  406. c = ord(c)
  407. k = self.key2 | 2
  408. c = c ^ (((k * (k^1)) >> 8) & 255)
  409. c = chr(c)
  410. self._UpdateKeys(c)
  411. return c
  412. compressor_names = {
  413. 0: 'store',
  414. 1: 'shrink',
  415. 2: 'reduce',
  416. 3: 'reduce',
  417. 4: 'reduce',
  418. 5: 'reduce',
  419. 6: 'implode',
  420. 7: 'tokenize',
  421. 8: 'deflate',
  422. 9: 'deflate64',
  423. 10: 'implode',
  424. 12: 'bzip2',
  425. 14: 'lzma',
  426. 18: 'terse',
  427. 19: 'lz77',
  428. 97: 'wavpack',
  429. 98: 'ppmd',
  430. }
  431. class ZipExtFile(io.BufferedIOBase):
  432. """File-like object for reading an archive member.
  433. Is returned by ZipFile.open().
  434. """
  435. # Max size supported by decompressor.
  436. MAX_N = 1 << 31 - 1
  437. # Read from compressed files in 4k blocks.
  438. MIN_READ_SIZE = 4096
  439. # Search for universal newlines or line chunks.
  440. PATTERN = re.compile(r'^(?P<chunk>[^\r\n]+)|(?P<newline>\n|\r\n?)')
  441. def __init__(self, fileobj, mode, zipinfo, decrypter=None,
  442. close_fileobj=False):
  443. self._fileobj = fileobj
  444. self._decrypter = decrypter
  445. self._close_fileobj = close_fileobj
  446. self._compress_type = zipinfo.compress_type
  447. self._compress_size = zipinfo.compress_size
  448. self._compress_left = zipinfo.compress_size
  449. if self._compress_type == ZIP_DEFLATED:
  450. self._decompressor = zlib.decompressobj(-15)
  451. elif self._compress_type != ZIP_STORED:
  452. descr = compressor_names.get(self._compress_type)
  453. if descr:
  454. raise NotImplementedError("compression type %d (%s)" % (self._compress_type, descr))
  455. else:
  456. raise NotImplementedError("compression type %d" % (self._compress_type,))
  457. self._unconsumed = ''
  458. self._readbuffer = ''
  459. self._offset = 0
  460. self._universal = 'U' in mode
  461. self.newlines = None
  462. # Adjust read size for encrypted files since the first 12 bytes
  463. # are for the encryption/password information.
  464. if self._decrypter is not None:
  465. self._compress_left -= 12
  466. self.mode = mode
  467. self.name = zipinfo.filename
  468. if hasattr(zipinfo, 'CRC'):
  469. self._expected_crc = zipinfo.CRC
  470. self._running_crc = crc32(b'') & 0xffffffff
  471. else:
  472. self._expected_crc = None
  473. def readline(self, limit=-1):
  474. """Read and return a line from the stream.
  475. If limit is specified, at most limit bytes will be read.
  476. """
  477. if not self._universal and limit < 0:
  478. # Shortcut common case - newline found in buffer.
  479. i = self._readbuffer.find('\n', self._offset) + 1
  480. if i > 0:
  481. line = self._readbuffer[self._offset: i]
  482. self._offset = i
  483. return line
  484. if not self._universal:
  485. return io.BufferedIOBase.readline(self, limit)
  486. line = ''
  487. while limit < 0 or len(line) < limit:
  488. readahead = self.peek(2)
  489. if readahead == '':
  490. return line
  491. #
  492. # Search for universal newlines or line chunks.
  493. #
  494. # The pattern returns either a line chunk or a newline, but not
  495. # both. Combined with peek(2), we are assured that the sequence
  496. # '\r\n' is always retrieved completely and never split into
  497. # separate newlines - '\r', '\n' due to coincidental readaheads.
  498. #
  499. match = self.PATTERN.search(readahead)
  500. newline = match.group('newline')
  501. if newline is not None:
  502. if self.newlines is None:
  503. self.newlines = []
  504. if newline not in self.newlines:
  505. self.newlines.append(newline)
  506. self._offset += len(newline)
  507. return line + '\n'
  508. chunk = match.group('chunk')
  509. if limit >= 0:
  510. chunk = chunk[: limit - len(line)]
  511. self._offset += len(chunk)
  512. line += chunk
  513. return line
  514. def peek(self, n=1):
  515. """Returns buffered bytes without advancing the position."""
  516. if n > len(self._readbuffer) - self._offset:
  517. chunk = self.read(n)
  518. if len(chunk) > self._offset:
  519. self._readbuffer = chunk + self._readbuffer[self._offset:]
  520. self._offset = 0
  521. else:
  522. self._offset -= len(chunk)
  523. # Return up to 512 bytes to reduce allocation overhead for tight loops.
  524. return self._readbuffer[self._offset: self._offset + 512]
  525. def readable(self):
  526. return True
  527. def read(self, n=-1):
  528. """Read and return up to n bytes.
  529. If the argument is omitted, None, or negative, data is read and returned until EOF is reached..
  530. """
  531. buf = ''
  532. if n is None:
  533. n = -1
  534. while True:
  535. if n < 0:
  536. data = self.read1(n)
  537. elif n > len(buf):
  538. data = self.read1(n - len(buf))
  539. else:
  540. return buf
  541. if len(data) == 0:
  542. return buf
  543. buf += data
  544. def _update_crc(self, newdata, eof):
  545. # Update the CRC using the given data.
  546. if self._expected_crc is None:
  547. # No need to compute the CRC if we don't have a reference value
  548. return
  549. self._running_crc = crc32(newdata, self._running_crc) & 0xffffffff
  550. # Check the CRC if we're at the end of the file
  551. if eof and self._running_crc != self._expected_crc:
  552. raise BadZipfile("Bad CRC-32 for file %r" % self.name)
  553. def read1(self, n):
  554. """Read up to n bytes with at most one read() system call."""
  555. # Simplify algorithm (branching) by transforming negative n to large n.
  556. if n < 0 or n is None:
  557. n = self.MAX_N
  558. # Bytes available in read buffer.
  559. len_readbuffer = len(self._readbuffer) - self._offset
  560. # Read from file.
  561. if self._compress_left > 0 and n > len_readbuffer + len(self._unconsumed):
  562. nbytes = n - len_readbuffer - len(self._unconsumed)
  563. nbytes = max(nbytes, self.MIN_READ_SIZE)
  564. nbytes = min(nbytes, self._compress_left)
  565. data = self._fileobj.read(nbytes)
  566. self._compress_left -= len(data)
  567. if data and self._decrypter is not None:
  568. data = ''.join(map(self._decrypter, data))
  569. if self._compress_type == ZIP_STORED:
  570. self._update_crc(data, eof=(self._compress_left==0))
  571. self._readbuffer = self._readbuffer[self._offset:] + data
  572. self._offset = 0
  573. else:
  574. # Prepare deflated bytes for decompression.
  575. self._unconsumed += data
  576. # Handle unconsumed data.
  577. if (len(self._unconsumed) > 0 and n > len_readbuffer and
  578. self._compress_type == ZIP_DEFLATED):
  579. data = self._decompressor.decompress(
  580. self._unconsumed,
  581. max(n - len_readbuffer, self.MIN_READ_SIZE)
  582. )
  583. self._unconsumed = self._decompressor.unconsumed_tail
  584. eof = len(self._unconsumed) == 0 and self._compress_left == 0
  585. if eof:
  586. data += self._decompressor.flush()
  587. self._update_crc(data, eof=eof)
  588. self._readbuffer = self._readbuffer[self._offset:] + data
  589. self._offset = 0
  590. # Read from buffer.
  591. data = self._readbuffer[self._offset: self._offset + n]
  592. self._offset += len(data)
  593. return data
  594. def close(self):
  595. try :
  596. if self._close_fileobj:
  597. self._fileobj.close()
  598. finally:
  599. super(ZipExtFile, self).close()
  600. class ZipFile(object):
  601. """ Class with methods to open, read, write, close, list zip files.
  602. z = ZipFile(file, mode="r", compression=ZIP_STORED, allowZip64=False)
  603. file: Either the path to the file, or a file-like object.
  604. If it is a path, the file will be opened and closed by ZipFile.
  605. mode: The mode can be either read "r", write "w" or append "a".
  606. compression: ZIP_STORED (no compression) or ZIP_DEFLATED (requires zlib).
  607. allowZip64: if True ZipFile will create files with ZIP64 extensions when
  608. needed, otherwise it will raise an exception when this would
  609. be necessary.
  610. """
  611. fp = None # Set here since __del__ checks it
  612. def __init__(self, file, mode="r", compression=ZIP_STORED, allowZip64=False):
  613. """Open the ZIP file with mode read "r", write "w" or append "a"."""
  614. if mode not in ("r", "w", "a"):
  615. raise RuntimeError('ZipFile() requires mode "r", "w", or "a"')
  616. if compression == ZIP_STORED:
  617. pass
  618. elif compression == ZIP_DEFLATED:
  619. if not zlib:
  620. raise RuntimeError,\
  621. "Compression requires the (missing) zlib module"
  622. else:
  623. raise RuntimeError, "That compression method is not supported"
  624. self._allowZip64 = allowZip64
  625. self._didModify = False
  626. self.debug = 0 # Level of printing: 0 through 3
  627. self.NameToInfo = {} # Find file info given name
  628. self.filelist = [] # List of ZipInfo instances for archive
  629. self.compression = compression # Method of compression
  630. self.mode = key = mode.replace('b', '')[0]
  631. self.pwd = None
  632. self._comment = ''
  633. # Check if we were passed a file-like object
  634. if isinstance(file, basestring):
  635. self._filePassed = 0
  636. self.filename = file
  637. modeDict = {'r' : 'rb', 'w': 'wb', 'a' : 'r+b'}
  638. try:
  639. self.fp = open(file, modeDict[mode])
  640. except IOError:
  641. if mode == 'a':
  642. mode = key = 'w'
  643. self.fp = open(file, modeDict[mode])
  644. else:
  645. raise
  646. else:
  647. self._filePassed = 1
  648. self.fp = file
  649. self.filename = getattr(file, 'name', None)
  650. try:
  651. if key == 'r':
  652. self._RealGetContents()
  653. elif key == 'w':
  654. # set the modified flag so central directory gets written
  655. # even if no files are added to the archive
  656. self._didModify = True
  657. elif key == 'a':
  658. try:
  659. # See if file is a zip file
  660. self._RealGetContents()
  661. # seek to start of directory and overwrite
  662. self.fp.seek(self.start_dir, 0)
  663. except BadZipfile:
  664. # file is not a zip file, just append
  665. self.fp.seek(0, 2)
  666. # set the modified flag so central directory gets written
  667. # even if no files are added to the archive
  668. self._didModify = True
  669. else:
  670. raise RuntimeError('Mode must be "r", "w" or "a"')
  671. except:
  672. fp = self.fp
  673. self.fp = None
  674. if not self._filePassed:
  675. fp.close()
  676. raise
  677. def __enter__(self):
  678. return self
  679. def __exit__(self, type, value, traceback):
  680. self.close()
  681. def _RealGetContents(self):
  682. """Read in the table of contents for the ZIP file."""
  683. fp = self.fp
  684. try:
  685. endrec = _EndRecData(fp)
  686. except IOError:
  687. raise BadZipfile("File is not a zip file")
  688. if not endrec:
  689. raise BadZipfile, "File is not a zip file"
  690. if self.debug > 1:
  691. print endrec
  692. size_cd = endrec[_ECD_SIZE] # bytes in central directory
  693. offset_cd = endrec[_ECD_OFFSET] # offset of central directory
  694. self._comment = endrec[_ECD_COMMENT] # archive comment
  695. # "concat" is zero, unless zip was concatenated to another file
  696. concat = endrec[_ECD_LOCATION] - size_cd - offset_cd
  697. if endrec[_ECD_SIGNATURE] == stringEndArchive64:
  698. # If Zip64 extension structures are present, account for them
  699. concat -= (sizeEndCentDir64 + sizeEndCentDir64Locator)
  700. if self.debug > 2:
  701. inferred = concat + offset_cd
  702. print "given, inferred, offset", offset_cd, inferred, concat
  703. # self.start_dir: Position of start of central directory
  704. self.start_dir = offset_cd + concat
  705. fp.seek(self.start_dir, 0)
  706. data = fp.read(size_cd)
  707. fp = cStringIO.StringIO(data)
  708. total = 0
  709. while total < size_cd:
  710. centdir = fp.read(sizeCentralDir)
  711. if len(centdir) != sizeCentralDir:
  712. raise BadZipfile("Truncated central directory")
  713. centdir = struct.unpack(structCentralDir, centdir)
  714. if centdir[_CD_SIGNATURE] != stringCentralDir:
  715. raise BadZipfile("Bad magic number for central directory")
  716. if self.debug > 2:
  717. print centdir
  718. filename = fp.read(centdir[_CD_FILENAME_LENGTH])
  719. # Create ZipInfo instance to store file information
  720. x = ZipInfo(filename)
  721. x.extra = fp.read(centdir[_CD_EXTRA_FIELD_LENGTH])
  722. x.comment = fp.read(centdir[_CD_COMMENT_LENGTH])
  723. x.header_offset = centdir[_CD_LOCAL_HEADER_OFFSET]
  724. (x.create_version, x.create_system, x.extract_version, x.reserved,
  725. x.flag_bits, x.compress_type, t, d,
  726. x.CRC, x.compress_size, x.file_size) = centdir[1:12]
  727. x.volume, x.internal_attr, x.external_attr = centdir[15:18]
  728. # Convert date/time code to (year, month, day, hour, min, sec)
  729. x._raw_time = t
  730. x.date_time = ( (d>>9)+1980, (d>>5)&0xF, d&0x1F,
  731. t>>11, (t>>5)&0x3F, (t&0x1F) * 2 )
  732. x._decodeExtra()
  733. x.header_offset = x.header_offset + concat
  734. x.filename = x._decodeFilename()
  735. self.filelist.append(x)
  736. self.NameToInfo[x.filename] = x
  737. # update total bytes read from central directory
  738. total = (total + sizeCentralDir + centdir[_CD_FILENAME_LENGTH]
  739. + centdir[_CD_EXTRA_FIELD_LENGTH]
  740. + centdir[_CD_COMMENT_LENGTH])
  741. if self.debug > 2:
  742. print "total", total
  743. def namelist(self):
  744. """Return a list of file names in the archive."""
  745. l = []
  746. for data in self.filelist:
  747. l.append(data.filename)
  748. return l
  749. def infolist(self):
  750. """Return a list of class ZipInfo instances for files in the
  751. archive."""
  752. return self.filelist
  753. def printdir(self):
  754. """Print a table of contents for the zip file."""
  755. print "%-46s %19s %12s" % ("File Name", "Modified ", "Size")
  756. for zinfo in self.filelist:
  757. date = "%d-%02d-%02d %02d:%02d:%02d" % zinfo.date_time[:6]
  758. print "%-46s %s %12d" % (zinfo.filename, date, zinfo.file_size)
  759. def testzip(self):
  760. """Read all the files and check the CRC."""
  761. chunk_size = 2 ** 20
  762. for zinfo in self.filelist:
  763. try:
  764. # Read by chunks, to avoid an OverflowError or a
  765. # MemoryError with very large embedded files.
  766. with self.open(zinfo.filename, "r") as f:
  767. while f.read(chunk_size): # Check CRC-32
  768. pass
  769. except BadZipfile:
  770. return zinfo.filename
  771. def getinfo(self, name):
  772. """Return the instance of ZipInfo given 'name'."""
  773. info = self.NameToInfo.get(name)
  774. if info is None:
  775. raise KeyError(
  776. 'There is no item named %r in the archive' % name)
  777. return info
  778. def setpassword(self, pwd):
  779. """Set default password for encrypted files."""
  780. self.pwd = pwd
  781. @property
  782. def comment(self):
  783. """The comment text associated with the ZIP file."""
  784. return self._comment
  785. @comment.setter
  786. def comment(self, comment):
  787. # check for valid comment length
  788. if len(comment) > ZIP_MAX_COMMENT:
  789. import warnings
  790. warnings.warn('Archive comment is too long; truncating to %d bytes'
  791. % ZIP_MAX_COMMENT, stacklevel=2)
  792. comment = comment[:ZIP_MAX_COMMENT]
  793. self._comment = comment
  794. self._didModify = True
  795. def read(self, name, pwd=None):
  796. """Return file bytes (as a string) for name."""
  797. return self.open(name, "r", pwd).read()
  798. def open(self, name, mode="r", pwd=None):
  799. """Return file-like object for 'name'."""
  800. if mode not in ("r", "U", "rU"):
  801. raise RuntimeError, 'open() requires mode "r", "U", or "rU"'
  802. if not self.fp:
  803. raise RuntimeError, \
  804. "Attempt to read ZIP archive that was already closed"
  805. # Only open a new file for instances where we were not
  806. # given a file object in the constructor
  807. if self._filePassed:
  808. zef_file = self.fp
  809. should_close = False
  810. else:
  811. zef_file = open(self.filename, 'rb')
  812. should_close = True
  813. try:
  814. # Make sure we have an info object
  815. if isinstance(name, ZipInfo):
  816. # 'name' is already an info object
  817. zinfo = name
  818. else:
  819. # Get info object for name
  820. zinfo = self.getinfo(name)
  821. zef_file.seek(zinfo.header_offset, 0)
  822. # Skip the file header:
  823. fheader = zef_file.read(sizeFileHeader)
  824. if len(fheader) != sizeFileHeader:
  825. raise BadZipfile("Truncated file header")
  826. fheader = struct.unpack(structFileHeader, fheader)
  827. if fheader[_FH_SIGNATURE] != stringFileHeader:
  828. raise BadZipfile("Bad magic number for file header")
  829. fname = zef_file.read(fheader[_FH_FILENAME_LENGTH])
  830. if fheader[_FH_EXTRA_FIELD_LENGTH]:
  831. zef_file.read(fheader[_FH_EXTRA_FIELD_LENGTH])
  832. if fname != zinfo.orig_filename:
  833. raise BadZipfile, \
  834. 'File name in directory "%s" and header "%s" differ.' % (
  835. zinfo.orig_filename, fname)
  836. # check for encrypted flag & handle password
  837. is_encrypted = zinfo.flag_bits & 0x1
  838. zd = None
  839. if is_encrypted:
  840. if not pwd:
  841. pwd = self.pwd
  842. if not pwd:
  843. raise RuntimeError, "File %s is encrypted, " \
  844. "password required for extraction" % name
  845. zd = _ZipDecrypter(pwd)
  846. # The first 12 bytes in the cypher stream is an encryption header
  847. # used to strengthen the algorithm. The first 11 bytes are
  848. # completely random, while the 12th contains the MSB of the CRC,
  849. # or the MSB of the file time depending on the header type
  850. # and is used to check the correctness of the password.
  851. bytes = zef_file.read(12)
  852. h = map(zd, bytes[0:12])
  853. if zinfo.flag_bits & 0x8:
  854. # compare against the file type from extended local headers
  855. check_byte = (zinfo._raw_time >> 8) & 0xff
  856. else:
  857. # compare against the CRC otherwise
  858. check_byte = (zinfo.CRC >> 24) & 0xff
  859. if ord(h[11]) != check_byte:
  860. raise RuntimeError("Bad password for file", name)
  861. return ZipExtFile(zef_file, mode, zinfo, zd,
  862. close_fileobj=should_close)
  863. except:
  864. if should_close:
  865. zef_file.close()
  866. raise
  867. def extract(self, member, path=None, pwd=None):
  868. """Extract a member from the archive to the current working directory,
  869. using its full name. Its file information is extracted as accurately
  870. as possible. `member' may be a filename or a ZipInfo object. You can
  871. specify a different directory using `path'.
  872. """
  873. if not isinstance(member, ZipInfo):
  874. member = self.getinfo(member)
  875. if path is None:
  876. path = os.getcwd()
  877. return self._extract_member(member, path, pwd)
  878. def extractall(self, path=None, members=None, pwd=None):
  879. """Extract all members from the archive to the current working
  880. directory. `path' specifies a different directory to extract to.
  881. `members' is optional and must be a subset of the list returned
  882. by namelist().
  883. """
  884. if members is None:
  885. members = self.namelist()
  886. for zipinfo in members:
  887. self.extract(zipinfo, path, pwd)
  888. def _extract_member(self, member, targetpath, pwd):
  889. """Extract the ZipInfo object 'member' to a physical
  890. file on the path targetpath.
  891. """
  892. # build the destination pathname, replacing
  893. # forward slashes to platform specific separators.
  894. arcname = member.filename.replace('/', os.path.sep)
  895. if os.path.altsep:
  896. arcname = arcname.replace(os.path.altsep, os.path.sep)
  897. # interpret absolute pathname as relative, remove drive letter or
  898. # UNC path, redundant separators, "." and ".." components.
  899. arcname = os.path.splitdrive(arcname)[1]
  900. arcname = os.path.sep.join(x for x in arcname.split(os.path.sep)
  901. if x not in ('', os.path.curdir, os.path.pardir))
  902. if os.path.sep == '\\':
  903. # filter illegal characters on Windows
  904. illegal = ':<>|"?*'
  905. if isinstance(arcname, unicode):
  906. table = {ord(c): ord('_') for c in illegal}
  907. else:
  908. table = string.maketrans(illegal, '_' * len(illegal))
  909. arcname = arcname.translate(table)
  910. # remove trailing dots
  911. arcname = (x.rstrip('.') for x in arcname.split(os.path.sep))
  912. arcname = os.path.sep.join(x for x in arcname if x)
  913. targetpath = os.path.join(targetpath, arcname)
  914. targetpath = os.path.normpath(targetpath)
  915. # Create all upper directories if necessary.
  916. upperdirs = os.path.dirname(targetpath)
  917. if upperdirs and not os.path.exists(upperdirs):
  918. os.makedirs(upperdirs)
  919. if member.filename[-1] == '/':
  920. if not os.path.isdir(targetpath):
  921. os.mkdir(targetpath)
  922. return targetpath
  923. with self.open(member, pwd=pwd) as source, \
  924. file(targetpath, "wb") as target:
  925. shutil.copyfileobj(source, target)
  926. return targetpath
  927. def _writecheck(self, zinfo):
  928. """Check for errors before writing a file to the archive."""
  929. if zinfo.filename in self.NameToInfo:
  930. import warnings
  931. warnings.warn('Duplicate name: %r' % zinfo.filename, stacklevel=3)
  932. if self.mode not in ("w", "a"):
  933. raise RuntimeError, 'write() requires mode "w" or "a"'
  934. if not self.fp:
  935. raise RuntimeError, \
  936. "Attempt to write ZIP archive that was already closed"
  937. if zinfo.compress_type == ZIP_DEFLATED and not zlib:
  938. raise RuntimeError, \
  939. "Compression requires the (missing) zlib module"
  940. if zinfo.compress_type not in (ZIP_STORED, ZIP_DEFLATED):
  941. raise RuntimeError, \
  942. "That compression method is not supported"
  943. if not self._allowZip64:
  944. requires_zip64 = None
  945. if len(self.filelist) >= ZIP_FILECOUNT_LIMIT:
  946. requires_zip64 = "Files count"
  947. elif zinfo.file_size > ZIP64_LIMIT:
  948. requires_zip64 = "Filesize"
  949. elif zinfo.header_offset > ZIP64_LIMIT:
  950. requires_zip64 = "Zipfile size"
  951. if requires_zip64:
  952. raise LargeZipFile(requires_zip64 +
  953. " would require ZIP64 extensions")
  954. def write(self, filename, arcname=None, compress_type=None):
  955. """Put the bytes from filename into the archive under the name
  956. arcname."""
  957. if not self.fp:
  958. raise RuntimeError(
  959. "Attempt to write to ZIP archive that was already closed")
  960. st = os.stat(filename)
  961. isdir = stat.S_ISDIR(st.st_mode)
  962. mtime = time.localtime(st.st_mtime)
  963. date_time = mtime[0:6]
  964. # Create ZipInfo instance to store file information
  965. if arcname is None:
  966. arcname = filename
  967. arcname = os.path.normpath(os.path.splitdrive(arcname)[1])
  968. while arcname[0] in (os.sep, os.altsep):
  969. arcname = arcname[1:]
  970. if isdir:
  971. arcname += '/'
  972. zinfo = ZipInfo(arcname, date_time)
  973. zinfo.external_attr = (st[0] & 0xFFFF) << 16L # Unix attributes
  974. if isdir:
  975. zinfo.compress_type = ZIP_STORED
  976. elif compress_type is None:
  977. zinfo.compress_type = self.compression
  978. else:
  979. zinfo.compress_type = compress_type
  980. zinfo.file_size = st.st_size
  981. zinfo.flag_bits = 0x00
  982. zinfo.header_offset = self.fp.tell() # Start of header bytes
  983. self._writecheck(zinfo)
  984. self._didModify = True
  985. if isdir:
  986. zinfo.file_size = 0
  987. zinfo.compress_size = 0
  988. zinfo.CRC = 0
  989. zinfo.external_attr |= 0x10 # MS-DOS directory flag
  990. self.filelist.append(zinfo)
  991. self.NameToInfo[zinfo.filename] = zinfo
  992. self.fp.write(zinfo.FileHeader(False))
  993. return
  994. with open(filename, "rb") as fp:
  995. # Must overwrite CRC and sizes with correct data later
  996. zinfo.CRC = CRC = 0
  997. zinfo.compress_size = compress_size = 0
  998. # Compressed size can be larger than uncompressed size
  999. zip64 = self._allowZip64 and \
  1000. zinfo.file_size * 1.05 > ZIP64_LIMIT
  1001. self.fp.write(zinfo.FileHeader(zip64))
  1002. if zinfo.compress_type == ZIP_DEFLATED:
  1003. cmpr = zlib.compressobj(zlib.Z_DEFAULT_COMPRESSION,
  1004. zlib.DEFLATED, -15)
  1005. else:
  1006. cmpr = None
  1007. file_size = 0
  1008. while 1:
  1009. buf = fp.read(1024 * 8)
  1010. if not buf:
  1011. break
  1012. file_size = file_size + len(buf)
  1013. CRC = crc32(buf, CRC) & 0xffffffff
  1014. if cmpr:
  1015. buf = cmpr.compress(buf)
  1016. compress_size = compress_size + len(buf)
  1017. self.fp.write(buf)
  1018. if cmpr:
  1019. buf = cmpr.flush()
  1020. compress_size = compress_size + len(buf)
  1021. self.fp.write(buf)
  1022. zinfo.compress_size = compress_size
  1023. else:
  1024. zinfo.compress_size = file_size
  1025. zinfo.CRC = CRC
  1026. zinfo.file_size = file_size
  1027. if not zip64 and self._allowZip64:
  1028. if file_size > ZIP64_LIMIT:
  1029. raise RuntimeError('File size has increased during compressing')
  1030. if compress_size > ZIP64_LIMIT:
  1031. raise RuntimeError('Compressed size larger than uncompressed size')
  1032. # Seek backwards and write file header (which will now include
  1033. # correct CRC and file sizes)
  1034. position = self.fp.tell() # Preserve current position in file
  1035. self.fp.seek(zinfo.header_offset, 0)
  1036. self.fp.write(zinfo.FileHeader(zip64))
  1037. self.fp.seek(position, 0)
  1038. self.filelist.append(zinfo)
  1039. self.NameToInfo[zinfo.filename] = zinfo
  1040. def writestr(self, zinfo_or_arcname, bytes, compress_type=None):
  1041. """Write a file into the archive. The contents is the string
  1042. 'bytes'. 'zinfo_or_arcname' is either a ZipInfo instance or
  1043. the name of the file in the archive."""
  1044. if not isinstance(zinfo_or_arcname, ZipInfo):
  1045. zinfo = ZipInfo(filename=zinfo_or_arcname,
  1046. date_time=time.localtime(time.time())[:6])
  1047. zinfo.compress_type = self.compression
  1048. if zinfo.filename[-1] == '/':
  1049. zinfo.external_attr = 0o40775 << 16 # drwxrwxr-x
  1050. zinfo.external_attr |= 0x10 # MS-DOS directory flag
  1051. else:
  1052. zinfo.external_attr = 0o600 << 16 # ?rw-------
  1053. else:
  1054. zinfo = zinfo_or_arcname
  1055. if not self.fp:
  1056. raise RuntimeError(
  1057. "Attempt to write to ZIP archive that was already closed")
  1058. if compress_type is not None:
  1059. zinfo.compress_type = compress_type
  1060. zinfo.file_size = len(bytes) # Uncompressed size
  1061. zinfo.header_offset = self.fp.tell() # Start of header bytes
  1062. self._writecheck(zinfo)
  1063. self._didModify = True
  1064. zinfo.CRC = crc32(bytes) & 0xffffffff # CRC-32 checksum
  1065. if zinfo.compress_type == ZIP_DEFLATED:
  1066. co = zlib.compressobj(zlib.Z_DEFAULT_COMPRESSION,
  1067. zlib.DEFLATED, -15)
  1068. bytes = co.compress(bytes) + co.flush()
  1069. zinfo.compress_size = len(bytes) # Compressed size
  1070. else:
  1071. zinfo.compress_size = zinfo.file_size
  1072. zip64 = zinfo.file_size > ZIP64_LIMIT or \
  1073. zinfo.compress_size > ZIP64_LIMIT
  1074. if zip64 and not self._allowZip64:
  1075. raise LargeZipFile("Filesize would require ZIP64 extensions")
  1076. self.fp.write(zinfo.FileHeader(zip64))
  1077. self.fp.write(bytes)
  1078. if zinfo.flag_bits & 0x08:
  1079. # Write CRC and file sizes after the file data
  1080. fmt = '<LQQ' if zip64 else '<LLL'
  1081. self.fp.write(struct.pack(fmt, zinfo.CRC, zinfo.compress_size,
  1082. zinfo.file_size))
  1083. self.fp.flush()
  1084. self.filelist.append(zinfo)
  1085. self.NameToInfo[zinfo.filename] = zinfo
  1086. def __del__(self):
  1087. """Call the "close()" method in case the user forgot."""
  1088. self.close()
  1089. def close(self):
  1090. """Close the file, and for mode "w" and "a" write the ending
  1091. records."""
  1092. if self.fp is None:
  1093. return
  1094. try:
  1095. if self.mode in ("w", "a") and self._didModify: # write ending records
  1096. pos1 = self.fp.tell()
  1097. for zinfo in self.filelist: # write central directory
  1098. dt = zinfo.date_time
  1099. dosdate = (dt[0] - 1980) << 9 | dt[1] << 5 | dt[2]
  1100. dostime = dt[3] << 11 | dt[4] << 5 | (dt[5] // 2)
  1101. extra = []
  1102. if zinfo.file_size > ZIP64_LIMIT \
  1103. or zinfo.compress_size > ZIP64_LIMIT:
  1104. extra.append(zinfo.file_size)
  1105. extra.append(zinfo.compress_size)
  1106. file_size = 0xffffffff
  1107. compress_size = 0xffffffff
  1108. else:
  1109. file_size = zinfo.file_size
  1110. compress_size = zinfo.compress_size
  1111. if zinfo.header_offset > ZIP64_LIMIT:
  1112. extra.append(zinfo.header_offset)
  1113. header_offset = 0xffffffffL
  1114. else:
  1115. header_offset = zinfo.header_offset
  1116. extra_data = zinfo.extra
  1117. if extra:
  1118. # Append a ZIP64 field to the extra's
  1119. extra_data = struct.pack(
  1120. '<HH' + 'Q'*len(extra),
  1121. 1, 8*len(extra), *extra) + extra_data
  1122. extract_version = max(45, zinfo.extract_version)
  1123. create_version = max(45, zinfo.create_version)
  1124. else:
  1125. extract_version = zinfo.extract_version
  1126. create_version = zinfo.create_version
  1127. try:
  1128. filename, flag_bits = zinfo._encodeFilenameFlags()
  1129. centdir = struct.pack(structCentralDir,
  1130. stringCentralDir, create_version,
  1131. zinfo.create_system, extract_version, zinfo.reserved,
  1132. flag_bits, zinfo.compress_type, dostime, dosdate,
  1133. zinfo.CRC, compress_size, file_size,
  1134. len(filename), len(extra_data), len(zinfo.comment),
  1135. 0, zinfo.internal_attr, zinfo.external_attr,
  1136. header_offset)
  1137. except DeprecationWarning:
  1138. print >>sys.stderr, (structCentralDir,
  1139. stringCentralDir, create_version,
  1140. zinfo.create_system, extract_version, zinfo.reserved,
  1141. zinfo.flag_bits, zinfo.compress_type, dostime, dosdate,
  1142. zinfo.CRC, compress_size, file_size,
  1143. len(zinfo.filename), len(extra_data), len(zinfo.comment),
  1144. 0, zinfo.internal_attr, zinfo.external_attr,
  1145. header_offset)
  1146. raise
  1147. self.fp.write(centdir)
  1148. self.fp.write(filename)
  1149. self.fp.write(extra_data)
  1150. self.fp.write(zinfo.comment)
  1151. pos2 = self.fp.tell()
  1152. # Write end-of-zip-archive record
  1153. centDirCount = len(self.filelist)
  1154. centDirSize = pos2 - pos1
  1155. centDirOffset = pos1
  1156. requires_zip64 = None
  1157. if centDirCount > ZIP_FILECOUNT_LIMIT:
  1158. requires_zip64 = "Files count"
  1159. elif centDirOffset > ZIP64_LIMIT:
  1160. requires_zip64 = "Central directory offset"
  1161. elif centDirSize > ZIP64_LIMIT:
  1162. requires_zip64 = "Central directory size"
  1163. if requires_zip64:
  1164. # Need to write the ZIP64 end-of-archive records
  1165. if not self._allowZip64:
  1166. raise LargeZipFile(requires_zip64 +
  1167. " would require ZIP64 extensions")
  1168. zip64endrec = struct.pack(
  1169. structEndArchive64, stringEndArchive64,
  1170. 44, 45, 45, 0, 0, centDirCount, centDirCount,
  1171. centDirSize, centDirOffset)
  1172. self.fp.write(zip64endrec)
  1173. zip64locrec = struct.pack(
  1174. structEndArchive64Locator,
  1175. stringEndArchive64Locator, 0, pos2, 1)
  1176. self.fp.write(zip64locrec)
  1177. centDirCount = min(centDirCount, 0xFFFF)
  1178. centDirSize = min(centDirSize, 0xFFFFFFFF)
  1179. centDirOffset = min(centDirOffset, 0xFFFFFFFF)
  1180. endrec = struct.pack(structEndArchive, stringEndArchive,
  1181. 0, 0, centDirCount, centDirCount,
  1182. centDirSize, centDirOffset, len(self._comment))
  1183. self.fp.write(endrec)
  1184. self.fp.write(self._comment)
  1185. self.fp.flush()
  1186. finally:
  1187. fp = self.fp
  1188. self.fp = None
  1189. if not self._filePassed:
  1190. fp.close()
  1191. class PyZipFile(ZipFile):
  1192. """Class to create ZIP archives with Python library files and packages."""
  1193. def writepy(self, pathname, basename = ""):
  1194. """Add all files from "pathname" to the ZIP archive.
  1195. If pathname is a package directory, search the directory and
  1196. all package subdirectories recursively for all *.py and enter
  1197. the modules into the archive. If pathname is a plain
  1198. directory, listdir *.py and enter all modules. Else, pathname
  1199. must be a Python *.py file and the module will be put into the
  1200. archive. Added modules are always module.pyo or module.pyc.
  1201. This method will compile the module.py into module.pyc if
  1202. necessary.
  1203. """
  1204. dir, name = os.path.split(pathname)
  1205. if os.path.isdir(pathname):
  1206. initname = os.path.join(pathname, "__init__.py")
  1207. if os.path.isfile(initname):
  1208. # This is a package directory, add it
  1209. if basename:
  1210. basename = "%s/%s" % (basename, name)
  1211. else:
  1212. basename = name
  1213. if self.debug:
  1214. print "Adding package in", pathname, "as", basename
  1215. fname, arcname = self._get_codename(initname[0:-3], basename)
  1216. if self.debug:
  1217. print "Adding", arcname
  1218. self.write(fname, arcname)
  1219. dirlist = os.listdir(pathname)
  1220. dirlist.remove("__init__.py")
  1221. # Add all *.py files and package subdirectories
  1222. for filename in dirlist:
  1223. path = os.path.join(pathname, filename)
  1224. root, ext = os.path.splitext(filename)
  1225. if os.path.isdir(path):
  1226. if os.path.isfile(os.path.join(path, "__init__.py")):
  1227. # This is a package directory, add it
  1228. self.writepy(path, basename) # Recursive call
  1229. elif ext == ".py":
  1230. fname, arcname = self._get_codename(path[0:-3],
  1231. basename)
  1232. if self.debug:
  1233. print "Adding", arcname
  1234. self.write(fname, arcname)
  1235. else:
  1236. # This is NOT a package directory, add its files at top level
  1237. if self.debug:
  1238. print "Adding files from directory", pathname
  1239. for filename in os.listdir(pathname):
  1240. path = os.path.join(pathname, filename)
  1241. root, ext = os.path.splitext(filename)
  1242. if ext == ".py":
  1243. fname, arcname = self._get_codename(path[0:-3],
  1244. basename)
  1245. if self.debug:
  1246. print "Adding", arcname
  1247. self.write(fname, arcname)
  1248. else:
  1249. if pathname[-3:] != ".py":
  1250. raise RuntimeError, \
  1251. 'Files added with writepy() must end with ".py"'
  1252. fname, arcname = self._get_codename(pathname[0:-3], basename)
  1253. if self.debug:
  1254. print "Adding file", arcname
  1255. self.write(fname, arcname)
  1256. def _get_codename(self, pathname, basename):
  1257. """Return (filename, archivename) for the path.
  1258. Given a module name path, return the correct file path and
  1259. archive name, compiling if necessary. For example, given
  1260. /python/lib/string, return (/python/lib/string.pyc, string).
  1261. """
  1262. file_py = pathname + ".py"
  1263. file_pyc = pathname + ".pyc"
  1264. file_pyo = pathname + ".pyo"
  1265. if os.path.isfile(file_pyo) and \
  1266. os.stat(file_pyo).st_mtime >= os.stat(file_py).st_mtime:
  1267. fname = file_pyo # Use .pyo file
  1268. elif not os.path.isfile(file_pyc) or \
  1269. os.stat(file_pyc).st_mtime < os.stat(file_py).st_mtime:
  1270. import py_compile
  1271. if self.debug:
  1272. print "Compiling", file_py
  1273. try:
  1274. py_compile.compile(file_py, file_pyc, None, True)
  1275. except py_compile.PyCompileError,err:
  1276. print err.msg
  1277. fname = file_pyc
  1278. else:
  1279. fname = file_pyc
  1280. archivename = os.path.split(fname)[1]
  1281. if basename:
  1282. archivename = "%s/%s" % (basename, archivename)
  1283. return (fname, archivename)
  1284. def main(args = None):
  1285. import textwrap
  1286. USAGE=textwrap.dedent("""\
  1287. Usage:
  1288. zipfile.py -l zipfile.zip # Show listing of a zipfile
  1289. zipfile.py -t zipfile.zip # Test if a zipfile is valid
  1290. zipfile.py -e zipfile.zip target # Extract zipfile into target dir
  1291. zipfile.py -c zipfile.zip src ... # Create zipfile from sources
  1292. """)
  1293. if args is None:
  1294. args = sys.argv[1:]
  1295. if not args or args[0] not in ('-l', '-c', '-e', '-t'):
  1296. print USAGE
  1297. sys.exit(1)
  1298. if args[0] == '-l':
  1299. if len(args) != 2:
  1300. print USAGE
  1301. sys.exit(1)
  1302. with ZipFile(args[1], 'r') as zf:
  1303. zf.printdir()
  1304. elif args[0] == '-t':
  1305. if len(args) != 2:
  1306. print USAGE
  1307. sys.exit(1)
  1308. with ZipFile(args[1], 'r') as zf:
  1309. badfile = zf.testzip()
  1310. if badfile:
  1311. print("The following enclosed file is corrupted: {!r}".format(badfile))
  1312. print "Done testing"
  1313. elif args[0] == '-e':
  1314. if len(args) != 3:
  1315. print USAGE
  1316. sys.exit(1)
  1317. with ZipFile(args[1], 'r') as zf:
  1318. zf.extractall(args[2])
  1319. elif args[0] == '-c':
  1320. if len(args) < 3:
  1321. print USAGE
  1322. sys.exit(1)
  1323. def addToZip(zf, path, zippath):
  1324. if os.path.isfile(path):
  1325. zf.write(path, zippath, ZIP_DEFLATED)
  1326. elif os.path.isdir(path):
  1327. if zippath:
  1328. zf.write(path, zippath)
  1329. for nm in os.listdir(path):
  1330. addToZip(zf,
  1331. os.path.join(path, nm), os.path.join(zippath, nm))
  1332. # else: ignore
  1333. with ZipFile(args[1], 'w', allowZip64=True) as zf:
  1334. for path in args[2:]:
  1335. zippath = os.path.basename(path)
  1336. if not zippath:
  1337. zippath = os.path.basename(os.path.dirname(path))
  1338. if zippath in ('', os.curdir, os.pardir):
  1339. zippath = ''
  1340. addToZip(zf, path, zippath)
  1341. if __name__ == "__main__":
  1342. main()