PageRenderTime 57ms CodeModel.GetById 14ms RepoModel.GetById 0ms app.codeStats 0ms

/astropy/io/fits/hdu/compressed.py

https://gitlab.com/Rockyspade/astropy
Python | 1133 lines | 804 code | 152 blank | 177 comment | 204 complexity | 6f517256587bde95146c9baf1fc92a38 MD5 | raw file
  1. # Licensed under a 3-clause BSD style license - see PYFITS.rst
  2. import ctypes
  3. import gc
  4. import math
  5. import re
  6. import time
  7. import warnings
  8. import numpy as np
  9. from .base import DELAYED, ExtensionHDU
  10. from .image import _ImageBaseHDU, ImageHDU
  11. from .table import BinTableHDU
  12. from ..card import Card
  13. from ..column import Column, ColDefs, TDEF_RE
  14. from ..column import KEYWORD_NAMES as TABLE_KEYWORD_NAMES
  15. from ..fitsrec import FITS_rec
  16. from ..header import Header
  17. from ..util import (_is_pseudo_unsigned, _unsigned_zero, _is_int,
  18. _get_array_mmap)
  19. from ....extern.six import string_types, iteritems
  20. from ....utils import lazyproperty, deprecated
  21. from ....utils.compat import ignored
  22. from ....utils.exceptions import (AstropyPendingDeprecationWarning,
  23. AstropyUserWarning)
  24. try:
  25. from .. import compression
  26. COMPRESSION_SUPPORTED = COMPRESSION_ENABLED = True
  27. except ImportError:
  28. COMPRESSION_SUPPORTED = COMPRESSION_ENABLED = False
  29. # Quantization dithering method constants; these are right out of fitsio.h
  30. NO_DITHER = -1
  31. SUBTRACTIVE_DITHER_1 = 1
  32. SUBTRACTIVE_DITHER_2 = 2
  33. QUANTIZE_METHOD_NAMES = {
  34. NO_DITHER: 'NO_DITHER',
  35. SUBTRACTIVE_DITHER_1: 'SUBTRACTIVE_DITHER_1',
  36. SUBTRACTIVE_DITHER_2: 'SUBTRACTIVE_DITHER_2'
  37. }
  38. DITHER_SEED_CLOCK = 0
  39. DITHER_SEED_CHECKSUM = -1
  40. # Default compression parameter values
  41. DEFAULT_COMPRESSION_TYPE = 'RICE_1'
  42. DEFAULT_QUANTIZE_LEVEL = 16.
  43. DEFAULT_QUANTIZE_METHOD = NO_DITHER
  44. DEFAULT_DITHER_SEED = DITHER_SEED_CLOCK
  45. DEFAULT_HCOMP_SCALE = 0
  46. DEFAULT_HCOMP_SMOOTH = 0
  47. DEFAULT_BLOCK_SIZE = 32
  48. DEFAULT_BYTE_PIX = 4
  49. CMTYPE_ALIASES = {}
  50. # CFITSIO version-specific features
  51. if COMPRESSION_SUPPORTED:
  52. try:
  53. CFITSIO_SUPPORTS_GZIPDATA = compression.CFITSIO_VERSION >= 3.28
  54. CFITSIO_SUPPORTS_Q_FORMAT = compression.CFITSIO_VERSION >= 3.35
  55. if compression.CFITSIO_VERSION >= 3.35:
  56. CMTYPE_ALIASES['RICE_ONE'] = 'RICE_1'
  57. except AttributeError:
  58. # This generally shouldn't happen unless running setup.py in an
  59. # environment where an old build of pyfits exists
  60. CFITSIO_SUPPORTS_GZIPDATA = True
  61. CFITSIO_SUPPORTS_Q_FORMAT = True
  62. COMPRESSION_KEYWORDS = set(['ZIMAGE', 'ZCMPTYPE', 'ZBITPIX', 'ZNAXIS',
  63. 'ZMASKCMP', 'ZSIMPLE', 'ZTENSION', 'ZEXTEND'])
  64. class CompImageHeader(Header):
  65. """
  66. Header object for compressed image HDUs designed to keep the compression
  67. header and the underlying image header properly synchronized.
  68. This essentially wraps the image header, so that all values are read from
  69. and written to the image header. However, updates to the image header will
  70. also update the table header where appropriate.
  71. """
  72. # TODO: The difficulty of implementing this screams a need to rewrite this
  73. # module
  74. _keyword_remaps = {
  75. 'SIMPLE': 'ZSIMPLE', 'XTENSION': 'ZTENSION', 'BITPIX': 'ZBITPIX',
  76. 'NAXIS': 'ZNAXIS', 'EXTEND': 'ZEXTEND', 'BLOCKED': 'ZBLOCKED',
  77. 'PCOUNT': 'ZPCOUNT', 'GCOUNT': 'ZGCOUNT', 'CHECKSUM': 'ZHECKSUM',
  78. 'DATASUM': 'ZDATASUM'
  79. }
  80. _zdef_re = re.compile(r'(?P<label>^[Zz][a-zA-Z]*)(?P<num>[1-9][0-9 ]*$)?')
  81. _compression_keywords = set(_keyword_remaps.values()).union(
  82. ['ZIMAGE', 'ZCMPTYPE', 'ZMASKCMP', 'ZQUANTIZ', 'ZDITHER0'])
  83. _indexed_compression_keywords = set(['ZNAXIS', 'ZTILE', 'ZNAME', 'ZVAL'])
  84. # TODO: Once it place it should be possible to manage some of this through
  85. # the schema system, but it's not quite ready for that yet. Also it still
  86. # makes more sense to change CompImageHDU to subclass ImageHDU :/
  87. def __init__(self, table_header, image_header=None):
  88. if image_header is None:
  89. image_header = Header()
  90. self._cards = image_header._cards
  91. self._keyword_indices = image_header._keyword_indices
  92. self._rvkc_indices = image_header._rvkc_indices
  93. self._modified = image_header._modified
  94. self._table_header = table_header
  95. # We need to override and Header methods that can modify the header, and
  96. # ensure that they sync with the underlying _table_header
  97. def __setitem__(self, key, value):
  98. # This isn't pretty, but if the `key` is either an int or a tuple we
  99. # need to figure out what keyword name that maps to before doing
  100. # anything else; these checks will be repeated later in the
  101. # super().__setitem__ call but I don't see another way around it
  102. # without some major refactoring
  103. if self._set_slice(key, value, self):
  104. return
  105. if isinstance(key, int):
  106. keyword, index = self._keyword_from_index(key)
  107. elif isinstance(key, tuple):
  108. keyword, index = key
  109. else:
  110. # We don't want to specify and index otherwise, because that will
  111. # break the behavior for new keywords and for commentary keywords
  112. keyword, index = key, None
  113. if self._is_reserved_keyword(keyword):
  114. return
  115. super(CompImageHeader, self).__setitem__(key, value)
  116. if index is not None:
  117. remapped_keyword = self._remap_keyword(keyword)
  118. self._table_header[remapped_keyword, index] = value
  119. # Else this will pass through to ._update
  120. def __delitem__(self, key):
  121. if isinstance(key, slice) or self._haswildcard(key):
  122. # If given a slice pass that on to the superclass and bail out
  123. # early; we only want to make updates to _table_header when given
  124. # a key specifying a single keyword
  125. return super(CompImageHeader, self).__delitem__(key)
  126. if isinstance(key, int):
  127. keyword, index = self._keyword_from_index(key)
  128. elif isinstance(key, tuple):
  129. keyword, index = key
  130. else:
  131. keyword, index = key, None
  132. if key not in self:
  133. raise KeyError("Keyword %r not found." % key)
  134. super(CompImageHeader, self).__delitem__(key)
  135. remapped_keyword = self._remap_keyword(keyword)
  136. if remapped_keyword in self._table_header:
  137. if index is not None:
  138. del self._table_header[(remapped_keyword, index)]
  139. else:
  140. del self._table_header[remapped_keyword]
  141. def append(self, card=None, useblanks=True, bottom=False, end=False):
  142. # This logic unfortunately needs to be duplicated from the base class
  143. # in order to determine the keyword
  144. if isinstance(card, string_types):
  145. card = Card(card)
  146. elif isinstance(card, tuple):
  147. card = Card(*card)
  148. elif card is None:
  149. card = Card()
  150. elif not isinstance(card, Card):
  151. raise ValueError(
  152. 'The value appended to a Header must be either a keyword or '
  153. '(keyword, value, [comment]) tuple; got: %r' % card)
  154. if self._is_reserved_keyword(card.keyword):
  155. return
  156. super(CompImageHeader, self).append(card=card, useblanks=useblanks,
  157. bottom=bottom, end=end)
  158. remapped_keyword = self._remap_keyword(card.keyword)
  159. card = Card(remapped_keyword, card.value, card.comment)
  160. self._table_header.append(card=card, useblanks=useblanks,
  161. bottom=bottom, end=end)
  162. def insert(self, key, card, useblanks=True, after=False):
  163. if isinstance(key, int):
  164. # Determine condition to pass through to append
  165. if after:
  166. if key == -1:
  167. key = len(self._cards)
  168. else:
  169. key += 1
  170. if key >= len(self._cards):
  171. self.append(card, end=True)
  172. return
  173. if isinstance(card, string_types):
  174. card = Card(card)
  175. elif isinstance(card, tuple):
  176. card = Card(*card)
  177. elif not isinstance(card, Card):
  178. raise ValueError(
  179. 'The value inserted into a Header must be either a keyword or '
  180. '(keyword, value, [comment]) tuple; got: %r' % card)
  181. if self._is_reserved_keyword(card.keyword):
  182. return
  183. # Now the tricky part is to determine where to insert in the table
  184. # header. If given a numerical index we need to map that to the
  185. # corresponding index in the table header. Although rare, there may be
  186. # cases where there is no mapping in which case we just try the same
  187. # index
  188. # NOTE: It is crucial that remapped_index in particular is figured out
  189. # before the image header is modified
  190. remapped_index = self._remap_index(key)
  191. remapped_keyword = self._remap_keyword(card.keyword)
  192. super(CompImageHeader, self).insert(key, card, useblanks=useblanks,
  193. after=after)
  194. card = Card(remapped_keyword, card.value, card.comment)
  195. self._table_header.insert(remapped_index, card, useblanks=useblanks,
  196. after=after)
  197. def _update(self, card):
  198. keyword = card[0]
  199. if self._is_reserved_keyword(keyword):
  200. return
  201. super(CompImageHeader, self)._update(card)
  202. if keyword in Card._commentary_keywords:
  203. # Otherwise this will result in a duplicate insertion
  204. return
  205. remapped_keyword = self._remap_keyword(keyword)
  206. self._table_header._update((remapped_keyword,) + card[1:])
  207. # Last piece needed (I think) for synchronizing with the real header
  208. # This one is tricky since _relativeinsert calls insert
  209. def _relativeinsert(self, card, before=None, after=None, replace=False):
  210. keyword = card[0]
  211. if self._is_reserved_keyword(keyword):
  212. return
  213. # Now we have to figure out how to remap 'before' and 'after'
  214. if before is None:
  215. if isinstance(after, int):
  216. remapped_after = self._remap_index(after)
  217. else:
  218. remapped_after = self._remap_keyword(after)
  219. remapped_before = None
  220. else:
  221. if isinstance(before, int):
  222. remapped_before = self._remap_index(before)
  223. else:
  224. remapped_before = self._remap_keyword(before)
  225. remapped_after = None
  226. super(CompImageHeader, self)._relativeinsert(card, before=before,
  227. after=after,
  228. replace=replace)
  229. remapped_keyword = self._remap_keyword(keyword)
  230. card = Card(remapped_keyword, card[1], card[2])
  231. self._table_header._relativeinsert(card, before=remapped_before,
  232. after=remapped_after,
  233. replace=replace)
  234. @classmethod
  235. def _is_reserved_keyword(cls, keyword, warn=True):
  236. msg = ('Keyword %r is reserved for use by the FITS Tiled Image '
  237. 'Convention and will not be stored in the header for the '
  238. 'image being compressed.' % keyword)
  239. if keyword == 'TFIELDS':
  240. if warn:
  241. warnings.warn(msg)
  242. return True
  243. m = TDEF_RE.match(keyword)
  244. if m and m.group('label').upper() in TABLE_KEYWORD_NAMES:
  245. if warn:
  246. warnings.warn(msg)
  247. return True
  248. m = cls._zdef_re.match(keyword)
  249. if m:
  250. label = m.group('label').upper()
  251. num = m.group('num')
  252. if num is not None and label in cls._indexed_compression_keywords:
  253. if warn:
  254. warnings.warn(msg)
  255. return True
  256. elif label in cls._compression_keywords:
  257. if warn:
  258. warnings.warn(msg)
  259. return True
  260. return False
  261. @classmethod
  262. def _remap_keyword(cls, keyword):
  263. # Given a keyword that one might set on an image, remap that keyword to
  264. # the name used for it in the COMPRESSED HDU header
  265. # This is mostly just a lookup in _keyword_remaps, but needs handling
  266. # for NAXISn keywords
  267. is_naxisn = False
  268. if keyword[:5] == 'NAXIS':
  269. with ignored(ValueError):
  270. index = int(keyword[5:])
  271. is_naxisn = index > 0
  272. if is_naxisn:
  273. return 'ZNAXIS%d' % index
  274. # If the keyword does not need to be remapped then just return the
  275. # original keyword
  276. return cls._keyword_remaps.get(keyword, keyword)
  277. def _remap_index(self, idx):
  278. # Given an integer index into this header, map that to the index in the
  279. # table header for the same card. If the card doesn't exist in the
  280. # table header (generally should *not* be the case) this will just
  281. # return the same index
  282. # This *does* also accept a keyword or (keyword, repeat) tuple and
  283. # obtains the associated numerical index with self._cardindex
  284. if not isinstance(idx, int):
  285. idx = self._cardindex(idx)
  286. keyword, repeat = self._keyword_from_index(idx)
  287. remapped_insert_keyword = self._remap_keyword(keyword)
  288. with ignored(IndexError, KeyError):
  289. idx = self._table_header._cardindex((remapped_insert_keyword,
  290. repeat))
  291. return idx
  292. # TODO: Fix this class so that it doesn't actually inherit from BinTableHDU,
  293. # but instead has an internal BinTableHDU reference
  294. class CompImageHDU(BinTableHDU):
  295. """
  296. Compressed Image HDU class.
  297. """
  298. # Maps deprecated keyword arguments to __init__ to their new names
  299. DEPRECATED_KWARGS = {
  300. 'compressionType': 'compression_type', 'tileSize': 'tile_size',
  301. 'hcompScale': 'hcomp_scale', 'hcompSmooth': 'hcomp_smooth',
  302. 'quantizeLevel': 'quantize_level'
  303. }
  304. _manages_own_heap = True
  305. """
  306. The calls to CFITSIO lay out the heap data in memory, and we write it out
  307. the same way CFITSIO organizes it. In principle this would break if a user
  308. manually changes the underlying compressed data by hand, but there is no
  309. reason they would want to do that (and if they do that's their
  310. responsibility).
  311. """
  312. def __init__(self, data=None, header=None, name=None,
  313. compression_type=DEFAULT_COMPRESSION_TYPE,
  314. tile_size=None,
  315. hcomp_scale=DEFAULT_HCOMP_SCALE,
  316. hcomp_smooth=DEFAULT_HCOMP_SMOOTH,
  317. quantize_level=DEFAULT_QUANTIZE_LEVEL,
  318. quantize_method=DEFAULT_QUANTIZE_METHOD,
  319. dither_seed=DEFAULT_DITHER_SEED,
  320. do_not_scale_image_data=False,
  321. uint=False, scale_back=False, **kwargs):
  322. """
  323. Parameters
  324. ----------
  325. data : array, optional
  326. Uncompressed image data
  327. header : Header instance, optional
  328. Header to be associated with the image; when reading the HDU from a
  329. file (data=DELAYED), the header read from the file
  330. name : str, optional
  331. The ``EXTNAME`` value; if this value is `None`, then the name from
  332. the input image header will be used; if there is no name in the
  333. input image header then the default name ``COMPRESSED_IMAGE`` is
  334. used.
  335. compression_type : str, optional
  336. Compression algorithm: one of
  337. ``'RICE_1'``, ``'RICE_ONE'``, ``'PLIO_1'``, ``'GZIP_1'``,
  338. ``'GZIP_2'``, ``'HCOMPRESS_1'``
  339. tile_size : int, optional
  340. Compression tile sizes. Default treats each row of image as a
  341. tile.
  342. hcomp_scale : float, optional
  343. HCOMPRESS scale parameter
  344. hcomp_smooth : float, optional
  345. HCOMPRESS smooth parameter
  346. quantize_level : float, optional
  347. Floating point quantization level; see note below
  348. quantize_method : int, optional
  349. Floating point quantization dithering method; can be either
  350. ``NO_DITHER`` (-1), ``SUBTRACTIVE_DITHER_1`` (1; default), or
  351. ``SUBTRACTIVE_DITHER_2`` (2); see note below
  352. dither_seed : int, optional
  353. Random seed to use for dithering; can be either an integer in the
  354. range 1 to 1000 (inclusive), ``DITHER_SEED_CLOCK`` (0; default), or
  355. ``DITHER_SEED_CHECKSUM`` (-1); see note below
  356. Notes
  357. -----
  358. The astropy.io.fits package supports 2 methods of image compression:
  359. 1) The entire FITS file may be externally compressed with the gzip
  360. or pkzip utility programs, producing a ``*.gz`` or ``*.zip``
  361. file, respectively. When reading compressed files of this type,
  362. Astropy first uncompresses the entire file into a temporary file
  363. before performing the requested read operations. The
  364. astropy.io.fits package does not support writing to these types
  365. of compressed files. This type of compression is supported in
  366. the ``_File`` class, not in the `CompImageHDU` class. The file
  367. compression type is recognized by the ``.gz`` or ``.zip`` file
  368. name extension.
  369. 2) The `CompImageHDU` class supports the FITS tiled image
  370. compression convention in which the image is subdivided into a
  371. grid of rectangular tiles, and each tile of pixels is
  372. individually compressed. The details of this FITS compression
  373. convention are described at the `FITS Support Office web site
  374. <http://fits.gsfc.nasa.gov/registry/tilecompression.html>`_.
  375. Basically, the compressed image tiles are stored in rows of a
  376. variable length array column in a FITS binary table. The
  377. astropy.io.fits recognizes that this binary table extension
  378. contains an image and treats it as if it were an image
  379. extension. Under this tile-compression format, FITS header
  380. keywords remain uncompressed. At this time, Astropy does not
  381. support the ability to extract and uncompress sections of the
  382. image without having to uncompress the entire image.
  383. The astropy.io.fits package supports 3 general-purpose compression
  384. algorithms plus one other special-purpose compression technique that is
  385. designed for data masks with positive integer pixel values. The 3
  386. general purpose algorithms are GZIP, Rice, and HCOMPRESS, and the
  387. special-purpose technique is the IRAF pixel list compression technique
  388. (PLIO). The ``compression_type`` parameter defines the compression
  389. algorithm to be used.
  390. The FITS image can be subdivided into any desired rectangular grid of
  391. compression tiles. With the GZIP, Rice, and PLIO algorithms, the
  392. default is to take each row of the image as a tile. The HCOMPRESS
  393. algorithm is inherently 2-dimensional in nature, so the default in this
  394. case is to take 16 rows of the image per tile. In most cases, it makes
  395. little difference what tiling pattern is used, so the default tiles are
  396. usually adequate. In the case of very small images, it could be more
  397. efficient to compress the whole image as a single tile. Note that the
  398. image dimensions are not required to be an integer multiple of the tile
  399. dimensions; if not, then the tiles at the edges of the image will be
  400. smaller than the other tiles. The ``tile_size`` parameter may be
  401. provided as a list of tile sizes, one for each dimension in the image.
  402. For example a ``tile_size`` value of ``[100,100]`` would divide a 300 X
  403. 300 image into 9 100 X 100 tiles.
  404. The 4 supported image compression algorithms are all 'lossless' when
  405. applied to integer FITS images; the pixel values are preserved exactly
  406. with no loss of information during the compression and uncompression
  407. process. In addition, the HCOMPRESS algorithm supports a 'lossy'
  408. compression mode that will produce larger amount of image compression.
  409. This is achieved by specifying a non-zero value for the ``hcomp_scale``
  410. parameter. Since the amount of compression that is achieved depends
  411. directly on the RMS noise in the image, it is usually more convenient
  412. to specify the ``hcomp_scale`` factor relative to the RMS noise.
  413. Setting ``hcomp_scale = 2.5`` means use a scale factor that is 2.5
  414. times the calculated RMS noise in the image tile. In some cases it may
  415. be desirable to specify the exact scaling to be used, instead of
  416. specifying it relative to the calculated noise value. This may be done
  417. by specifying the negative of the desired scale value (typically in the
  418. range -2 to -100).
  419. Very high compression factors (of 100 or more) can be achieved by using
  420. large ``hcomp_scale`` values, however, this can produce undesirable
  421. 'blocky' artifacts in the compressed image. A variation of the
  422. HCOMPRESS algorithm (called HSCOMPRESS) can be used in this case to
  423. apply a small amount of smoothing of the image when it is uncompressed
  424. to help cover up these artifacts. This smoothing is purely cosmetic
  425. and does not cause any significant change to the image pixel values.
  426. Setting the ``hcomp_smooth`` parameter to 1 will engage the smoothing
  427. algorithm.
  428. Floating point FITS images (which have ``BITPIX`` = -32 or -64) usually
  429. contain too much 'noise' in the least significant bits of the mantissa
  430. of the pixel values to be effectively compressed with any lossless
  431. algorithm. Consequently, floating point images are first quantized
  432. into scaled integer pixel values (and thus throwing away much of the
  433. noise) before being compressed with the specified algorithm (either
  434. GZIP, RICE, or HCOMPRESS). This technique produces much higher
  435. compression factors than simply using the GZIP utility to externally
  436. compress the whole FITS file, but it also means that the original
  437. floating point value pixel values are not exactly preserved. When done
  438. properly, this integer scaling technique will only discard the
  439. insignificant noise while still preserving all the real information in
  440. the image. The amount of precision that is retained in the pixel
  441. values is controlled by the ``quantize_level`` parameter. Larger
  442. values will result in compressed images whose pixels more closely match
  443. the floating point pixel values, but at the same time the amount of
  444. compression that is achieved will be reduced. Users should experiment
  445. with different values for this parameter to determine the optimal value
  446. that preserves all the useful information in the image, without
  447. needlessly preserving all the 'noise' which will hurt the compression
  448. efficiency.
  449. The default value for the ``quantize_level`` scale factor is 16, which
  450. means that scaled integer pixel values will be quantized such that the
  451. difference between adjacent integer values will be 1/16th of the noise
  452. level in the image background. An optimized algorithm is used to
  453. accurately estimate the noise in the image. As an example, if the RMS
  454. noise in the background pixels of an image = 32.0, then the spacing
  455. between adjacent scaled integer pixel values will equal 2.0 by default.
  456. Note that the RMS noise is independently calculated for each tile of
  457. the image, so the resulting integer scaling factor may fluctuate
  458. slightly for each tile. In some cases, it may be desirable to specify
  459. the exact quantization level to be used, instead of specifying it
  460. relative to the calculated noise value. This may be done by specifying
  461. the negative of desired quantization level for the value of
  462. ``quantize_level``. In the previous example, one could specify
  463. ``quantize_level = -2.0`` so that the quantized integer levels differ
  464. by 2.0. Larger negative values for ``quantize_level`` means that the
  465. levels are more coarsely-spaced, and will produce higher compression
  466. factors.
  467. The quantization algorithm can also apply one of two random dithering
  468. methods in order to reduce bias in the measured intensity of background
  469. regions. The default method, specified with the constant
  470. ``SUBTRACTIVE_DITHER_1`` adds dithering to the zero-point of the
  471. quantization array itself rather than adding noise to the actual image.
  472. The random noise is added on a pixel-by-pixel basis, so in order
  473. restore each pixel from its integer value to its floating point value
  474. it is necessary to replay the same sequence of random numbers for each
  475. pixel (see below). The other method, ``SUBTRACTIVE_DITHER_2``, is
  476. exactly like the first except that before dithering any pixel with a
  477. floating point value of ``0.0`` is replaced with the special integer
  478. value ``-2147483647``. When the image is uncompressed, pixels with
  479. this value are restored back to ``0.0`` exactly. Finally, a value of
  480. ``NO_DITHER`` disables dithering entirely.
  481. As mentioned above, when using the subtractive dithering algorithm it
  482. is necessary to be able to generate a (pseudo-)random sequence of noise
  483. for each pixel, and replay that same sequence upon decompressing. To
  484. facilitate this, a random seed between 1 and 10000 (inclusive) is used
  485. to seed a random number generator, and that seed is stored in the
  486. ``ZDITHER0`` keyword in the header of the compressed HDU. In order to
  487. use that seed to generate the same sequence of random numbers the same
  488. random number generator must be used at compression and decompression
  489. time; for that reason the tiled image convention provides an
  490. implementation of a very simple pseudo-random number generator. The
  491. seed itself can be provided in one of three ways, controllable by the
  492. ``dither_seed`` argument: It may be specified manually, or it may be
  493. generated arbitrarily based on the system's clock
  494. (``DITHER_SEED_CLOCK``) or based on a checksum of the pixels in the
  495. image's first tile (``DITHER_SEED_CHECKSUM``). The clock-based method
  496. is the default, and is sufficient to ensure that the value is
  497. reasonably "arbitrary" and that the same seed is unlikely to be
  498. generated sequentially. The checksum method, on the other hand,
  499. ensures that the same seed is used every time for a specific image.
  500. This is particularly useful for software testing as it ensures that the
  501. same image will always use the same seed.
  502. """
  503. if not COMPRESSION_SUPPORTED:
  504. # TODO: Raise a more specific Exception type
  505. raise Exception('The astropy.io.fits.compression module is not '
  506. 'available. Creation of compressed image HDUs is '
  507. 'disabled.')
  508. compression_type = CMTYPE_ALIASES.get(compression_type, compression_type)
  509. # Handle deprecated keyword arguments
  510. compression_opts = {}
  511. for oldarg, newarg in self.DEPRECATED_KWARGS.items():
  512. if oldarg in kwargs:
  513. warnings.warn('Keyword argument %s to %s is pending '
  514. 'deprecation; use %s instead' %
  515. (oldarg, self.__class__.__name__, newarg),
  516. AstropyPendingDeprecationWarning)
  517. compression_opts[newarg] = kwargs[oldarg]
  518. del kwargs[oldarg]
  519. else:
  520. compression_opts[newarg] = locals()[newarg]
  521. # Include newer compression options that don't required backwards
  522. # compatibility with deprecated spellings
  523. compression_opts['quantize_method'] = quantize_method
  524. compression_opts['dither_seed'] = dither_seed
  525. if data is DELAYED:
  526. # Reading the HDU from a file
  527. super(CompImageHDU, self).__init__(data=data, header=header)
  528. else:
  529. # Create at least a skeleton HDU that matches the input
  530. # header and data (if any were input)
  531. super(CompImageHDU, self).__init__(data=None, header=header)
  532. # Store the input image data
  533. self.data = data
  534. # Update the table header (_header) to the compressed
  535. # image format and to match the input data (if any);
  536. # Create the image header (_image_header) from the input
  537. # image header (if any) and ensure it matches the input
  538. # data; Create the initially empty table data array to
  539. # hold the compressed data.
  540. self._update_header_data(header, name, **compression_opts)
  541. # TODO: A lot of this should be passed on to an internal image HDU o
  542. # something like that, see ticket #88
  543. self._do_not_scale_image_data = do_not_scale_image_data
  544. self._uint = uint
  545. self._scale_back = scale_back
  546. self._axes = [self._header.get('ZNAXIS' + str(axis + 1), 0)
  547. for axis in range(self._header.get('ZNAXIS', 0))]
  548. # store any scale factors from the table header
  549. if do_not_scale_image_data:
  550. self._bzero = 0
  551. self._bscale = 1
  552. else:
  553. self._bzero = self._header.get('BZERO', 0)
  554. self._bscale = self._header.get('BSCALE', 1)
  555. self._bitpix = self._header['ZBITPIX']
  556. self._orig_bzero = self._bzero
  557. self._orig_bscale = self._bscale
  558. self._orig_bitpix = self._bitpix
  559. @classmethod
  560. def match_header(cls, header):
  561. card = header.cards[0]
  562. if card.keyword != 'XTENSION':
  563. return False
  564. xtension = card.value
  565. if isinstance(xtension, string_types):
  566. xtension = xtension.rstrip()
  567. if xtension not in ('BINTABLE', 'A3DTABLE'):
  568. return False
  569. if 'ZIMAGE' not in header or header['ZIMAGE'] != True:
  570. return False
  571. if COMPRESSION_SUPPORTED and COMPRESSION_ENABLED:
  572. return True
  573. elif not COMPRESSION_SUPPORTED:
  574. warnings.warn('Failure matching header to a compressed image '
  575. 'HDU: The compression module is not available.\n'
  576. 'The HDU will be treated as a Binary Table HDU.',
  577. AstropyUserWarning)
  578. return False
  579. else:
  580. # Compression is supported but disabled; just pass silently (#92)
  581. return False
  582. def _update_header_data(self, image_header,
  583. name=None,
  584. compression_type=None,
  585. tile_size=None,
  586. hcomp_scale=None,
  587. hcomp_smooth=None,
  588. quantize_level=None,
  589. quantize_method=None,
  590. dither_seed=None):
  591. """
  592. Update the table header (`_header`) to the compressed
  593. image format and to match the input data (if any). Create
  594. the image header (`_image_header`) from the input image
  595. header (if any) and ensure it matches the input
  596. data. Create the initially-empty table data array to hold
  597. the compressed data.
  598. This method is mainly called internally, but a user may wish to
  599. call this method after assigning new data to the `CompImageHDU`
  600. object that is of a different type.
  601. Parameters
  602. ----------
  603. image_header : Header instance
  604. header to be associated with the image
  605. name : str, optional
  606. the ``EXTNAME`` value; if this value is `None`, then the name from
  607. the input image header will be used; if there is no name in the
  608. input image header then the default name 'COMPRESSED_IMAGE' is used
  609. compression_type : str, optional
  610. compression algorithm 'RICE_1', 'PLIO_1', 'GZIP_1', 'HCOMPRESS_1';
  611. if this value is `None`, use value already in the header; if no
  612. value already in the header, use 'RICE_1'
  613. tile_size : sequence of int, optional
  614. compression tile sizes as a list; if this value is `None`, use
  615. value already in the header; if no value already in the header,
  616. treat each row of image as a tile
  617. hcomp_scale : float, optional
  618. HCOMPRESS scale parameter; if this value is `None`, use the value
  619. already in the header; if no value already in the header, use 1
  620. hcomp_smooth : float, optional
  621. HCOMPRESS smooth parameter; if this value is `None`, use the value
  622. already in the header; if no value already in the header, use 0
  623. quantize_level : float, optional
  624. floating point quantization level; if this value is `None`, use the
  625. value already in the header; if no value already in header, use 16
  626. quantize_method : int, optional
  627. floating point quantization dithering method; can be either
  628. NO_DITHER (-1), SUBTRACTIVE_DITHER_1 (1; default), or
  629. SUBTRACTIVE_DITHER_2 (2)
  630. dither_seed : int, optional
  631. random seed to use for dithering; can be either an integer in the
  632. range 1 to 1000 (inclusive), DITHER_SEED_CLOCK (0; default), or
  633. DITHER_SEED_CHECKSUM (-1)
  634. """
  635. image_hdu = ImageHDU(data=self.data, header=self._header)
  636. self._image_header = CompImageHeader(self._header, image_hdu.header)
  637. self._axes = image_hdu._axes
  638. del image_hdu
  639. # Determine based on the size of the input data whether to use the Q
  640. # column format to store compressed data or the P format.
  641. # The Q format is used only if the uncompressed data is larger than
  642. # 4 GB. This is not a perfect heuristic, as one can contrive an input
  643. # array which, when compressed, the entire binary table representing
  644. # the compressed data is larger than 4GB. That said, this is the same
  645. # heuristic used by CFITSIO, so this should give consistent results.
  646. # And the cases where this heuristic is insufficient are extreme and
  647. # almost entirely contrived corner cases, so it will do for now
  648. if self._has_data:
  649. huge_hdu = self.data.nbytes > 2 ** 32
  650. if huge_hdu and not CFITSIO_SUPPORTS_Q_FORMAT:
  651. raise IOError(
  652. "Astropy cannot compress images greater than 4 GB in size "
  653. "(%s is %s bytes) without CFITSIO >= 3.35" %
  654. ((self.name, self.ver), self.data.nbytes))
  655. else:
  656. huge_hdu = False
  657. # Update the extension name in the table header
  658. if not name and not 'EXTNAME' in self._header:
  659. name = 'COMPRESSED_IMAGE'
  660. if name:
  661. self._header.set('EXTNAME', name,
  662. 'name of this binary table extension',
  663. after='TFIELDS')
  664. self.name = name
  665. else:
  666. self.name = self._header['EXTNAME']
  667. # Set the compression type in the table header.
  668. if compression_type:
  669. if compression_type not in ['RICE_1', 'GZIP_1', 'PLIO_1',
  670. 'HCOMPRESS_1']:
  671. warnings.warn('Unknown compression type provided. Default '
  672. '(%s) compression used.' %
  673. DEFAULT_COMPRESSION_TYPE, AstropyUserWarning)
  674. compression_type = DEFAULT_COMPRESSION_TYPE
  675. self._header.set('ZCMPTYPE', compression_type,
  676. 'compression algorithm', after='TFIELDS')
  677. else:
  678. compression_type = self._header.get('ZCMPTYPE',
  679. DEFAULT_COMPRESSION_TYPE)
  680. compression_type = CMTYPE_ALIASES.get(compression_type,
  681. compression_type)
  682. # If the input image header had BSCALE/BZERO cards, then insert
  683. # them in the table header.
  684. if image_header:
  685. bzero = image_header.get('BZERO', 0.0)
  686. bscale = image_header.get('BSCALE', 1.0)
  687. after_keyword = 'EXTNAME'
  688. if bscale != 1.0:
  689. self._header.set('BSCALE', bscale, after=after_keyword)
  690. after_keyword = 'BSCALE'
  691. if bzero != 0.0:
  692. self._header.set('BZERO', bzero, after=after_keyword)
  693. bitpix_comment = image_header.comments['BITPIX']
  694. naxis_comment = image_header.comments['NAXIS']
  695. else:
  696. bitpix_comment = 'data type of original image'
  697. naxis_comment = 'dimension of original image'
  698. # Set the label for the first column in the table
  699. self._header.set('TTYPE1', 'COMPRESSED_DATA', 'label for field 1',
  700. after='TFIELDS')
  701. # Set the data format for the first column. It is dependent
  702. # on the requested compression type.
  703. if compression_type == 'PLIO_1':
  704. tform1 = '1QI' if huge_hdu else '1PI'
  705. else:
  706. tform1 = '1QB' if huge_hdu else '1PB'
  707. self._header.set('TFORM1', tform1,
  708. 'data format of field: variable length array',
  709. after='TTYPE1')
  710. # Create the first column for the table. This column holds the
  711. # compressed data.
  712. col1 = Column(name=self._header['TTYPE1'], format=tform1)
  713. # Create the additional columns required for floating point
  714. # data and calculate the width of the output table.
  715. zbitpix = self._image_header['BITPIX']
  716. if zbitpix < 0 and quantize_level != 0.0:
  717. # floating point image has 'COMPRESSED_DATA',
  718. # 'UNCOMPRESSED_DATA', 'ZSCALE', and 'ZZERO' columns (unless using
  719. # lossless compression, per CFITSIO)
  720. ncols = 4
  721. # CFITSIO 3.28 and up automatically use the GZIP_COMPRESSED_DATA
  722. # store floating point data that couldn't be quantized, instead
  723. # of the UNCOMPRESSED_DATA column. There's no way to control
  724. # this behavior so the only way to determine which behavior will
  725. # be employed is via the CFITSIO version
  726. if CFITSIO_SUPPORTS_GZIPDATA:
  727. ttype2 = 'GZIP_COMPRESSED_DATA'
  728. # The required format for the GZIP_COMPRESSED_DATA is actually
  729. # missing from the standard docs, but CFITSIO suggests it
  730. # should be 1PB, which is logical.
  731. tform2 = '1QB' if huge_hdu else '1PB'
  732. else:
  733. # Q format is not supported for UNCOMPRESSED_DATA columns.
  734. ttype2 = 'UNCOMPRESSED_DATA'
  735. if zbitpix == 8:
  736. tform2 = '1QB' if huge_hdu else '1PB'
  737. elif zbitpix == 16:
  738. tform2 = '1QI' if huge_hdu else '1PI'
  739. elif zbitpix == 32:
  740. tform2 = '1QJ' if huge_hdu else '1PJ'
  741. elif zbitpix == -32:
  742. tform2 = '1QE' if huge_hdu else '1PE'
  743. else:
  744. tform2 = '1QD' if huge_hdu else '1PD'
  745. # Set up the second column for the table that will hold any
  746. # uncompressable data.
  747. self._header.set('TTYPE2', ttype2, 'label for field 2',
  748. after='TFORM1')
  749. self._header.set('TFORM2', tform2,
  750. 'data format of field: variable length array',
  751. after='TTYPE2')
  752. col2 = Column(name=ttype2, format=tform2)
  753. # Set up the third column for the table that will hold
  754. # the scale values for quantized data.
  755. self._header.set('TTYPE3', 'ZSCALE', 'label for field 3',
  756. after='TFORM2')
  757. self._header.set('TFORM3', '1D',
  758. 'data format of field: 8-byte DOUBLE',
  759. after='TTYPE3')
  760. col3 = Column(name=self._header['TTYPE3'],
  761. format=self._header['TFORM3'])
  762. # Set up the fourth column for the table that will hold
  763. # the zero values for the quantized data.
  764. self._header.set('TTYPE4', 'ZZERO', 'label for field 4',
  765. after='TFORM3')
  766. self._header.set('TFORM4', '1D',
  767. 'data format of field: 8-byte DOUBLE',
  768. after='TTYPE4')
  769. after = 'TFORM4'
  770. col4 = Column(name=self._header['TTYPE4'],
  771. format=self._header['TFORM4'])
  772. # Create the ColDefs object for the table
  773. cols = ColDefs([col1, col2, col3, col4])
  774. else:
  775. # default table has just one 'COMPRESSED_DATA' column
  776. ncols = 1
  777. after = 'TFORM1'
  778. # remove any header cards for the additional columns that
  779. # may be left over from the previous data
  780. to_remove = ['TTYPE2', 'TFORM2', 'TTYPE3', 'TFORM3', 'TTYPE4',
  781. 'TFORM4']
  782. for k in to_remove:
  783. try:
  784. del self._header[k]
  785. except KeyError:
  786. pass
  787. # Create the ColDefs object for the table
  788. cols = ColDefs([col1])
  789. # Update the table header with the width of the table, the
  790. # number of fields in the table, the indicator for a compressed
  791. # image HDU, the data type of the image data and the number of
  792. # dimensions in the image data array.
  793. self._header.set('NAXIS1', cols.dtype.itemsize,
  794. 'width of table in bytes')
  795. self._header.set('TFIELDS', ncols, 'number of fields in each row')
  796. self._header.set('ZIMAGE', True, 'extension contains compressed image',
  797. after=after)
  798. self._header.set('ZBITPIX', zbitpix,
  799. bitpix_comment, after='ZIMAGE')
  800. self._header.set('ZNAXIS', self._image_header['NAXIS'], naxis_comment,
  801. after='ZBITPIX')
  802. # Strip the table header of all the ZNAZISn and ZTILEn keywords
  803. # that may be left over from the previous data
  804. idx = 1
  805. while True:
  806. try:
  807. del self._header['ZNAXIS' + str(idx)]
  808. del self._header['ZTILE' + str(idx)]
  809. idx += 1
  810. except KeyError:
  811. break
  812. # Verify that any input tile size parameter is the appropriate
  813. # size to match the HDU's data.
  814. naxis = self._image_header['NAXIS']
  815. if not tile_size:
  816. tile_size = []
  817. elif len(tile_size) != naxis:
  818. warnings.warn('Provided tile size not appropriate for the data. '
  819. 'Default tile size will be used.', AstropyUserWarning)
  820. tile_size = []
  821. # Set default tile dimensions for HCOMPRESS_1
  822. if compression_type == 'HCOMPRESS_1':
  823. if (self._image_header['NAXIS1'] < 4 or
  824. self._image_header['NAXIS2'] < 4):
  825. raise ValueError('Hcompress minimum image dimension is '
  826. '4 pixels')
  827. elif tile_size:
  828. if tile_size[0] < 4 or tile_size[1] < 4:
  829. # user specified tile size is too small
  830. raise ValueError('Hcompress minimum tile dimension is '
  831. '4 pixels')
  832. major_dims = len([ts for ts in tile_size if ts > 1])
  833. if major_dims > 2:
  834. raise ValueError(
  835. 'HCOMPRESS can only support 2-dimensional tile sizes.'
  836. 'All but two of the tile_size dimensions must be set '
  837. 'to 1.')
  838. if tile_size and (tile_size[0] == 0 and tile_size[1] == 0):
  839. # compress the whole image as a single tile
  840. tile_size[0] = self._image_header['NAXIS1']
  841. tile_size[1] = self._image_header['NAXIS2']
  842. for i in range(2, naxis):
  843. # set all higher tile dimensions = 1
  844. tile_size[i] = 1
  845. elif not tile_size:
  846. # The Hcompress algorithm is inherently 2D in nature, so the
  847. # row by row tiling that is used for other compression
  848. # algorithms is not appropriate. If the image has less than 30
  849. # rows, then the entire image will be compressed as a single
  850. # tile. Otherwise the tiles will consist of 16 rows of the
  851. # image. This keeps the tiles to a reasonable size, and it
  852. # also includes enough rows to allow good compression
  853. # efficiency. It the last tile of the image happens to contain
  854. # less than 4 rows, then find another tile size with between 14
  855. # and 30 rows (preferably even), so that the last tile has at
  856. # least 4 rows.
  857. # 1st tile dimension is the row length of the image
  858. tile_size.append(self._image_header['NAXIS1'])
  859. if self._image_header['NAXIS2'] <= 30:
  860. tile_size.append(self._image_header['NAXIS1'])
  861. else:
  862. # look for another good tile dimension
  863. naxis2 = self._image_header['NAXIS2']
  864. for dim in [16, 24, 20, 30, 28, 26, 22, 18, 14]:
  865. if naxis2 % dim == 0 or naxis2 % dim > 3:
  866. tile_size.append(dim)
  867. break
  868. else:
  869. tile_size.append(17)
  870. for i in range(2, naxis):
  871. # set all higher tile dimensions = 1
  872. tile_size.append(1)
  873. # check if requested tile size causes the last tile to have
  874. # less than 4 pixels
  875. remain = self._image_header['NAXIS1'] % tile_size[0] # 1st dimen
  876. if remain > 0 and remain < 4:
  877. tile_size[0] += 1 # try increasing tile size by 1
  878. remain = self._image_header['NAXIS1'] % tile_size[0]
  879. if remain > 0 and remain < 4:
  880. raise ValueError('Last tile along 1st dimension has '
  881. 'less than 4 pixels')
  882. remain = self._image_header['NAXIS2'] % tile_size[1] # 2nd dimen
  883. if remain > 0 and remain < 4:
  884. tile_size[1] += 1 # try increasing tile size by 1
  885. remain = self._image_header['NAXIS2'] % tile_size[1]
  886. if remain > 0 and remain < 4:
  887. raise ValueError('Last tile along 2nd dimension has '
  888. 'less than 4 pixels')
  889. # Set up locations for writing the next cards in the header.
  890. last_znaxis = 'ZNAXIS'
  891. if self._image_header['NAXIS'] > 0:
  892. after1 = 'ZNAXIS1'
  893. else:
  894. after1 = 'ZNAXIS'
  895. # Calculate the number of rows in the output table and
  896. # write the ZNAXISn and ZTILEn cards to the table header.
  897. nrows = 0
  898. for idx, axis in enumerate(self._axes):
  899. naxis = 'NAXIS' + str(idx + 1)
  900. znaxis = 'ZNAXIS' + str(idx + 1)
  901. ztile = 'ZTILE' + str(idx + 1)
  902. if tile_size and len(tile_size) >= idx + 1:
  903. ts = tile_size[idx]
  904. else:
  905. if not ztile in self._header:
  906. # Default tile size
  907. if not idx:
  908. ts = self._image_header['NAXIS1']
  909. else:
  910. ts = 1
  911. else:
  912. ts = self._header[ztile]
  913. tile_size.append(ts)
  914. if not nrows:
  915. nrows = (axis - 1) // ts + 1
  916. else:
  917. nrows *= ((axis - 1) // ts + 1)
  918. if image_header and naxis in image_header:
  919. self._header.set(znaxis, axis, image_header.comments[naxis],
  920. after=last_znaxis)
  921. else:
  922. self._header.set(znaxis, axis,
  923. 'length of original image axis',
  924. after=last_znaxis)
  925. self._header.set(ztile, ts, 'size of tiles to be compressed',
  926. after=after1)
  927. last_znaxis = znaxis
  928. after1 = ztile
  929. # Set the NAXIS2 header card in the table hdu to the number of
  930. # rows in the table.
  931. self._header.set('NAXIS2', nrows, 'number of rows in table')
  932. self.columns = cols
  933. # Set the compression parameters in the table header.
  934. # First, setup the values to be used for the compression parameters
  935. # in case none were passed in. This will be either the value
  936. # already in the table header for that parameter or the default
  937. # value.
  938. idx = 1
  939. while True:
  940. zname = 'ZNAME' + str(idx)
  941. if zname not in self._header:
  942. break
  943. zval = 'ZVAL' + str(idx)
  944. if