PageRenderTime 75ms CodeModel.GetById 39ms RepoModel.GetById 1ms app.codeStats 0ms

/astropy/table/column.py

https://github.com/mdboom/astropy
Python | 1124 lines | 976 code | 48 blank | 100 comment | 38 complexity | efe0cd2c7440cbe9ebd828cb6e3dc4c2 MD5 | raw file
  1. # Licensed under a 3-clause BSD style license - see LICENSE.rst
  2. from __future__ import (absolute_import, division, print_function,
  3. unicode_literals)
  4. from ..extern import six
  5. import weakref
  6. from copy import deepcopy
  7. import numpy as np
  8. from numpy import ma
  9. from ..units import Unit, Quantity
  10. from ..utils.compat import NUMPY_LT_1_8
  11. from ..utils.console import color_print
  12. from ..utils.metadata import MetaData
  13. from ..utils.data_info import BaseColumnInfo, InfoDescriptor, dtype_info_name
  14. from . import groups
  15. from . import pprint
  16. from .np_utils import fix_column_name
  17. from ..config import ConfigAlias
  18. AUTO_COLNAME = ConfigAlias(
  19. '0.4', 'AUTO_COLNAME', 'auto_colname',
  20. 'astropy.table.column', 'astropy.table')
  21. # Create a generic TableFormatter object for use by bare columns with no
  22. # parent table.
  23. FORMATTER = pprint.TableFormatter()
  24. INTEGER_TYPES = (int, long, np.integer) if six.PY2 else (int, np.integer)
  25. def _auto_names(n_cols):
  26. from . import conf
  27. return [str(conf.auto_colname).format(i) for i in range(n_cols)]
  28. # list of one and two-dimensional comparison functions, which sometimes return
  29. # a Column class and sometimes a plain array. Used in __array_wrap__ to ensure
  30. # they only return plain (masked) arrays (see #1446 and #1685)
  31. _comparison_functions = set(
  32. [np.greater, np.greater_equal, np.less, np.less_equal,
  33. np.not_equal, np.equal,
  34. np.isfinite, np.isinf, np.isnan, np.sign, np.signbit])
  35. def col_copy(col):
  36. """
  37. This is a mixin-safe version of Column.copy() (with copy_data=True).
  38. """
  39. if isinstance(col, BaseColumn):
  40. return col.copy()
  41. # The new column should have None for the parent_table ref. If the
  42. # original parent_table weakref there at the point of copying then it
  43. # generates an infinite recursion. Instead temporarily remove the weakref
  44. # on the original column and restore after the copy in an exception-safe
  45. # manner.
  46. parent_table = col.info.parent_table
  47. col.info.parent_table = None
  48. try:
  49. newcol = col.copy() if hasattr(col, 'copy') else deepcopy(col)
  50. newcol.info = col.info
  51. finally:
  52. col.info.parent_table = parent_table
  53. return newcol
  54. class FalseArray(np.ndarray):
  55. def __new__(cls, shape):
  56. obj = np.zeros(shape, dtype=np.bool).view(cls)
  57. return obj
  58. def __setitem__(self, item, val):
  59. val = np.asarray(val)
  60. if np.any(val):
  61. raise ValueError('Cannot set any element of {0} class to True'
  62. .format(self.__class__.__name__))
  63. def __setslice__(self, start, stop, val):
  64. val = np.asarray(val)
  65. if np.any(val):
  66. raise ValueError('Cannot set any element of {0} class to True'
  67. .format(self.__class__.__name__))
  68. class ColumnInfo(BaseColumnInfo):
  69. attrs_from_parent = BaseColumnInfo.attr_names
  70. class _NDColumnProxyShim(np.ndarray):
  71. """
  72. This mixin class exists solely to provide an override to
  73. ndarray.__getitem__ that provides the desirable behavior for single
  74. item gets on columns with multi-dimensional data types. The default
  75. behavior from Numpy is to automatically view-cast these to the ndarray
  76. subclass (i.e. Column), but the multi-dimensional array elements of
  77. multi-dimensional columns are not, themselves, Columns.
  78. This class is shimmed into a new class used for any BaseColumn instances
  79. that contain multi-dimensional data via BaseColumn._get_nd_proxy_class
  80. (this is also done explicitly in MaskedColumn.__new__ due to the
  81. peculiarities of MaskedColumn).
  82. """
  83. def __getitem__(self, item):
  84. if isinstance(item, INTEGER_TYPES):
  85. return self.data[item] # Return as plain ndarray or ma.MaskedArray
  86. else:
  87. return super(_NDColumnProxyShim, self).__getitem__(item)
  88. class BaseColumn(np.ndarray):
  89. meta = MetaData()
  90. _nd_proxy_classes = {}
  91. """
  92. Alternate versions of BaseColumn and any subclasses that have the
  93. _NDColumnProxyShim, mapped to by the original class. The shimmed
  94. classes have the same name as the original class and are otherwise
  95. indistinguishable. This hack exists only as a performance tweak.
  96. """
  97. def __new__(cls, data=None, name=None,
  98. dtype=None, shape=(), length=0,
  99. description=None, unit=None, format=None, meta=None, copy=False):
  100. if data is None:
  101. dtype = (np.dtype(dtype).str, shape)
  102. self_data = np.zeros(length, dtype=dtype)
  103. elif isinstance(data, BaseColumn) and hasattr(data, '_name'):
  104. # When unpickling a MaskedColumn, ``data`` will be a bare
  105. # BaseColumn with none of the expected attributes. In this case
  106. # do NOT execute this block which initializes from ``data``
  107. # attributes.
  108. self_data = np.array(data.data, dtype=dtype, copy=copy)
  109. if description is None:
  110. description = data.description
  111. if unit is None:
  112. unit = unit or data.unit
  113. if format is None:
  114. format = data.format
  115. if meta is None:
  116. meta = deepcopy(data.meta)
  117. if name is None:
  118. name = data.name
  119. elif isinstance(data, Quantity):
  120. if unit is None:
  121. self_data = np.array(data, dtype=dtype, copy=copy)
  122. unit = data.unit
  123. else:
  124. self_data = np.array(data.to(unit), dtype=dtype, copy=copy)
  125. if description is None:
  126. description = data.info.description
  127. if format is None:
  128. format = data.info.format
  129. if meta is None:
  130. meta = deepcopy(data.info.meta)
  131. else:
  132. self_data = np.array(data, dtype=dtype, copy=copy)
  133. cls = cls._get_nd_proxy_class(self_data)
  134. self = self_data.view(cls)
  135. self._name = fix_column_name(name)
  136. self.unit = unit
  137. self.format = format
  138. self.description = description
  139. self.meta = meta
  140. self._parent_table = None
  141. return self
  142. @classmethod
  143. def _get_nd_proxy_class(cls, data):
  144. """
  145. Creates new classes with the _NDColumnProxyShim. See the docstring
  146. for _NDColumnProxyShim for more detail.
  147. The data argument should be the array data that will be held by the
  148. column--this can be used to determine what proxy class to use if any at
  149. all.
  150. """
  151. if data.ndim < 2:
  152. # We only this special proxy for columns whose individual elements
  153. # are themselves arrays
  154. return cls
  155. if cls not in cls._nd_proxy_classes:
  156. cls._nd_proxy_classes[cls] = type(cls.__name__,
  157. (_NDColumnProxyShim, cls), {})
  158. return cls._nd_proxy_classes[cls]
  159. @property
  160. def data(self):
  161. return self.view(np.ndarray)
  162. @property
  163. def parent_table(self):
  164. if self._parent_table is None:
  165. return None
  166. else:
  167. return self._parent_table()
  168. @parent_table.setter
  169. def parent_table(self, table):
  170. if table is None:
  171. self._parent_table = None
  172. else:
  173. self._parent_table = weakref.ref(table)
  174. info = InfoDescriptor(ColumnInfo)
  175. def copy(self, order='C', data=None, copy_data=True):
  176. """
  177. Return a copy of the current instance.
  178. If ``data`` is supplied then a view (reference) of ``data`` is used,
  179. and ``copy_data`` is ignored.
  180. Parameters
  181. ----------
  182. order : {'C', 'F', 'A', 'K'}, optional
  183. Controls the memory layout of the copy. 'C' means C-order,
  184. 'F' means F-order, 'A' means 'F' if ``a`` is Fortran contiguous,
  185. 'C' otherwise. 'K' means match the layout of ``a`` as closely
  186. as possible. (Note that this function and :func:numpy.copy are very
  187. similar, but have different default values for their order=
  188. arguments.) Default is 'C'.
  189. data : array, optional
  190. If supplied then use a view of ``data`` instead of the instance
  191. data. This allows copying the instance attributes and meta.
  192. copy_data : bool, optional
  193. Make a copy of the internal numpy array instead of using a
  194. reference. Default is True.
  195. Returns
  196. -------
  197. col : Column or MaskedColumn
  198. Copy of the current column (same type as original)
  199. """
  200. if data is None:
  201. data = self.data
  202. if copy_data:
  203. data = data.copy(order)
  204. out = data.view(self.__class__)
  205. out.__array_finalize__(self)
  206. # for MaskedColumn, MaskedArray.__array_finalize__ also copies mask
  207. # from self, which is not the idea here, so undo
  208. if isinstance(self, MaskedColumn):
  209. out._mask = data._mask
  210. self._copy_groups(out)
  211. return out
  212. def __setstate__(self, state):
  213. """
  214. Restore the internal state of the Column/MaskedColumn for pickling
  215. purposes. This requires that the last element of ``state`` is a
  216. 5-tuple that has Column-specific state values.
  217. """
  218. # Get the Column attributes and meta
  219. name, unit, format, description, meta = state[-1]
  220. state = state[:-1]
  221. # Using super(type(self), self).__setstate__() gives an infinite
  222. # recursion. Manually call the right super class to actually set up
  223. # the array object.
  224. super_class = ma.MaskedArray if isinstance(self, ma.MaskedArray) else np.ndarray
  225. super_class.__setstate__(self, state)
  226. # Set the Column attributes and meta
  227. self._name = name
  228. self.unit = unit
  229. self.format = format
  230. self.description = description
  231. self.meta = meta
  232. def __reduce__(self):
  233. """
  234. Return a 3-tuple for pickling a Column. Use the super-class
  235. functionality but then add in a 5-tuple of Column-specific values
  236. that get used in __setstate__.
  237. """
  238. super_class = ma.MaskedArray if isinstance(self, ma.MaskedArray) else np.ndarray
  239. reconstruct_func, reconstruct_func_args, state = super_class.__reduce__(self)
  240. # Define Column-specific attrs and meta that gets added to state.
  241. column_state = (self.name, self.unit, self.format, self.description,
  242. self.meta)
  243. state = state + (column_state,)
  244. return reconstruct_func, reconstruct_func_args, state
  245. # avoid == and != to be done based on type of subclass
  246. # (helped solve #1446; see also __array_wrap__)
  247. def __eq__(self, other):
  248. return self.data.__eq__(other)
  249. def __ne__(self, other):
  250. return self.data.__ne__(other)
  251. def __array_finalize__(self, obj):
  252. # Obj will be none for direct call to Column() creator
  253. if obj is None:
  254. return
  255. if six.callable(super(BaseColumn, self).__array_finalize__):
  256. super(BaseColumn, self).__array_finalize__(obj)
  257. # Self was created from template (e.g. obj[slice] or (obj * 2))
  258. # or viewcast e.g. obj.view(Column). In either case we want to
  259. # init Column attributes for self from obj if possible.
  260. self.parent_table = None
  261. self._copy_attrs(obj)
  262. def __array_wrap__(self, out_arr, context=None):
  263. """
  264. __array_wrap__ is called at the end of every ufunc.
  265. Normally, we want a Column object back and do not have to do anything
  266. special. But there are two exceptions:
  267. 1) If the output shape is different (e.g. for reduction ufuncs
  268. like sum() or mean()), a Column still linking to a parent_table
  269. makes little sense, so we return the output viewed as the
  270. column content (ndarray or MaskedArray).
  271. For this case, we use "[()]" to select everything, and to ensure we
  272. convert a zero rank array to a scalar. (For some reason np.sum()
  273. returns a zero rank scalar array while np.mean() returns a scalar;
  274. So the [()] is needed for this case.
  275. 2) When the output is created by any function that returns a boolean
  276. we also want to consistently return an array rather than a column
  277. (see #1446 and #1685)
  278. """
  279. out_arr = super(BaseColumn, self).__array_wrap__(out_arr, context)
  280. if (self.shape != out_arr.shape or
  281. (isinstance(out_arr, BaseColumn) and
  282. (context is not None and context[0] in _comparison_functions))):
  283. return out_arr.data[()]
  284. else:
  285. return out_arr
  286. @property
  287. def name(self):
  288. """
  289. The name of this column.
  290. """
  291. return self._name
  292. @name.setter
  293. def name(self, val):
  294. val = fix_column_name(val)
  295. if self.parent_table is not None:
  296. table = self.parent_table
  297. table.columns._rename_column(self.name, val)
  298. self._name = val
  299. @property
  300. def descr(self):
  301. """Array-interface compliant full description of the column.
  302. This returns a 3-tuple (name, type, shape) that can always be
  303. used in a structured array dtype definition.
  304. """
  305. return (self.name, self.dtype.str, self.shape[1:])
  306. def iter_str_vals(self):
  307. """
  308. Return an iterator that yields the string-formatted values of this
  309. column.
  310. Returns
  311. -------
  312. str_vals : iterator
  313. Column values formatted as strings
  314. """
  315. # Iterate over formatted values with no max number of lines, no column
  316. # name, no unit, and ignoring the returned header info in outs.
  317. _pformat_col_iter = self._formatter._pformat_col_iter
  318. for str_val in _pformat_col_iter(self, -1, show_name=False, show_unit=False,
  319. show_dtype=False, outs={}):
  320. yield str_val
  321. def attrs_equal(self, col):
  322. """Compare the column attributes of ``col`` to this object.
  323. The comparison attributes are: ``name``, ``unit``, ``dtype``,
  324. ``format``, ``description``, and ``meta``.
  325. Parameters
  326. ----------
  327. col : Column
  328. Comparison column
  329. Returns
  330. -------
  331. equal : boolean
  332. True if all attributes are equal
  333. """
  334. if not isinstance(col, BaseColumn):
  335. raise ValueError('Comparison `col` must be a Column or '
  336. 'MaskedColumn object')
  337. attrs = ('name', 'unit', 'dtype', 'format', 'description', 'meta')
  338. equal = all(getattr(self, x) == getattr(col, x) for x in attrs)
  339. return equal
  340. @property
  341. def _formatter(self):
  342. return FORMATTER if (self.parent_table is None) else self.parent_table.formatter
  343. def pformat(self, max_lines=None, show_name=True, show_unit=False, show_dtype=False,
  344. html=False):
  345. """Return a list of formatted string representation of column values.
  346. If no value of ``max_lines`` is supplied then the height of the
  347. screen terminal is used to set ``max_lines``. If the terminal
  348. height cannot be determined then the default will be
  349. determined using the ``astropy.conf.max_lines`` configuration
  350. item. If a negative value of ``max_lines`` is supplied then
  351. there is no line limit applied.
  352. Parameters
  353. ----------
  354. max_lines : int
  355. Maximum lines of output (header + data rows)
  356. show_name : bool
  357. Include column name (default=True)
  358. show_unit : bool
  359. Include a header row for unit (default=False)
  360. show_dtype : bool
  361. Include column dtype (default=False)
  362. html : bool
  363. Format the output as an HTML table (default=False)
  364. Returns
  365. -------
  366. lines : list
  367. List of lines with header and formatted column values
  368. """
  369. _pformat_col = self._formatter._pformat_col
  370. lines, outs = _pformat_col(self, max_lines, show_name=show_name,
  371. show_unit=show_unit, show_dtype=show_dtype,
  372. html=html)
  373. return lines
  374. def pprint(self, max_lines=None, show_name=True, show_unit=False, show_dtype=False):
  375. """Print a formatted string representation of column values.
  376. If no value of ``max_lines`` is supplied then the height of the
  377. screen terminal is used to set ``max_lines``. If the terminal
  378. height cannot be determined then the default will be
  379. determined using the ``astropy.conf.max_lines`` configuration
  380. item. If a negative value of ``max_lines`` is supplied then
  381. there is no line limit applied.
  382. Parameters
  383. ----------
  384. max_lines : int
  385. Maximum number of values in output
  386. show_name : bool
  387. Include column name (default=True)
  388. show_unit : bool
  389. Include a header row for unit (default=False)
  390. show_dtype : bool
  391. Include column dtype (default=True)
  392. """
  393. _pformat_col = self._formatter._pformat_col
  394. lines, outs = _pformat_col(self, max_lines, show_name=show_name, show_unit=show_unit,
  395. show_dtype=show_dtype)
  396. n_header = outs['n_header']
  397. for i, line in enumerate(lines):
  398. if i < n_header:
  399. color_print(line, 'red')
  400. else:
  401. print(line)
  402. def more(self, max_lines=None, show_name=True, show_unit=False):
  403. """Interactively browse column with a paging interface.
  404. Supported keys::
  405. f, <space> : forward one page
  406. b : back one page
  407. r : refresh same page
  408. n : next row
  409. p : previous row
  410. < : go to beginning
  411. > : go to end
  412. q : quit browsing
  413. h : print this help
  414. Parameters
  415. ----------
  416. max_lines : int
  417. Maximum number of lines in table output
  418. show_name : bool
  419. Include a header row for column names (default=True)
  420. show_unit : bool
  421. Include a header row for unit (default=False)
  422. """
  423. _more_tabcol = self._formatter._more_tabcol
  424. _more_tabcol(self, max_lines=max_lines, show_name=show_name,
  425. show_unit=show_unit)
  426. @property
  427. def unit(self):
  428. """
  429. The unit associated with this column. May be a string or a
  430. `astropy.units.UnitBase` instance.
  431. Setting the ``unit`` property does not change the values of the
  432. data. To perform a unit conversion, use ``convert_unit_to``.
  433. """
  434. return self._unit
  435. @unit.setter
  436. def unit(self, unit):
  437. if unit is None:
  438. self._unit = None
  439. else:
  440. self._unit = Unit(unit, parse_strict='silent')
  441. @unit.deleter
  442. def unit(self):
  443. self._unit = None
  444. def convert_unit_to(self, new_unit, equivalencies=[]):
  445. """
  446. Converts the values of the column in-place from the current
  447. unit to the given unit.
  448. To change the unit associated with this column without
  449. actually changing the data values, simply set the ``unit``
  450. property.
  451. Parameters
  452. ----------
  453. new_unit : str or `astropy.units.UnitBase` instance
  454. The unit to convert to.
  455. equivalencies : list of equivalence pairs, optional
  456. A list of equivalence pairs to try if the unit are not
  457. directly convertible. See :ref:`unit_equivalencies`.
  458. Raises
  459. ------
  460. astropy.units.UnitsError
  461. If units are inconsistent
  462. """
  463. if self.unit is None:
  464. raise ValueError("No unit set on column")
  465. self.data[:] = self.unit.to(
  466. new_unit, self.data, equivalencies=equivalencies)
  467. self.unit = new_unit
  468. @property
  469. def groups(self):
  470. if not hasattr(self, '_groups'):
  471. self._groups = groups.ColumnGroups(self)
  472. return self._groups
  473. def group_by(self, keys):
  474. """
  475. Group this column by the specified ``keys``
  476. This effectively splits the column into groups which correspond to
  477. unique values of the ``keys`` grouping object. The output is a new
  478. `Column` or `MaskedColumn` which contains a copy of this column but
  479. sorted by row according to ``keys``.
  480. The ``keys`` input to ``group_by`` must be a numpy array with the
  481. same length as this column.
  482. Parameters
  483. ----------
  484. keys : numpy array
  485. Key grouping object
  486. Returns
  487. -------
  488. out : Column
  489. New column with groups attribute set accordingly
  490. """
  491. return groups.column_group_by(self, keys)
  492. def _copy_groups(self, out):
  493. """
  494. Copy current groups into a copy of self ``out``
  495. """
  496. if self.parent_table:
  497. if hasattr(self.parent_table, '_groups'):
  498. out._groups = groups.ColumnGroups(out, indices=self.parent_table._groups._indices)
  499. elif hasattr(self, '_groups'):
  500. out._groups = groups.ColumnGroups(out, indices=self._groups._indices)
  501. # Strip off the BaseColumn-ness for repr and str so that
  502. # MaskedColumn.data __repr__ does not include masked_BaseColumn(data =
  503. # [1 2], ...).
  504. def __repr__(self):
  505. return np.asarray(self).__repr__()
  506. @property
  507. def quantity(self):
  508. """
  509. A view of this table column as a `~astropy.units.Quantity` object with
  510. units given by the Column's `unit` parameter.
  511. """
  512. # the Quantity initializer is used here because it correctly fails
  513. # if the column's values are non-numeric (like strings), while .view
  514. # will happily return a quantity with gibberish for numerical values
  515. return Quantity(self, copy=False, dtype=self.dtype, order='A')
  516. def to(self, unit, equivalencies=[], **kwargs):
  517. """
  518. Converts this table column to a `~astropy.units.Quantity` object with
  519. the requested units.
  520. Parameters
  521. ----------
  522. unit : `~astropy.units.Unit` or str
  523. The unit to convert to (i.e., a valid argument to the
  524. :meth:`astropy.units.Quantity.to` method).
  525. equivalencies : list of equivalence pairs, optional
  526. Equivalencies to use for this conversion. See
  527. :meth:`astropy.units.Quantity.to` for more details.
  528. Returns
  529. -------
  530. quantity : `~astropy.units.Quantity`
  531. A quantity object with the contents of this column in the units
  532. ``unit``.
  533. """
  534. return self.quantity.to(unit, equivalencies)
  535. def _copy_attrs(self, obj):
  536. """
  537. Copy key column attributes from ``obj`` to self
  538. """
  539. for attr in ('name', 'unit', 'format', 'description'):
  540. val = getattr(obj, attr, None)
  541. setattr(self, attr, val)
  542. self.meta = deepcopy(getattr(obj, 'meta', {}))
  543. class Column(BaseColumn):
  544. """Define a data column for use in a Table object.
  545. Parameters
  546. ----------
  547. data : list, ndarray or None
  548. Column data values
  549. name : str
  550. Column name and key for reference within Table
  551. dtype : numpy.dtype compatible value
  552. Data type for column
  553. shape : tuple or ()
  554. Dimensions of a single row element in the column data
  555. length : int or 0
  556. Number of row elements in column data
  557. description : str or None
  558. Full description of column
  559. unit : str or None
  560. Physical unit
  561. format : str or None or function or callable
  562. Format string for outputting column values. This can be an
  563. "old-style" (``format % value``) or "new-style" (`str.format`)
  564. format specification string or a function or any callable object that
  565. accepts a single value and returns a string.
  566. meta : dict-like or None
  567. Meta-data associated with the column
  568. Examples
  569. --------
  570. A Column can be created in two different ways:
  571. - Provide a ``data`` value but not ``shape`` or ``length`` (which are
  572. inferred from the data).
  573. Examples::
  574. col = Column(data=[1, 2], name='name') # shape=(2,)
  575. col = Column(data=[[1, 2], [3, 4]], name='name') # shape=(2, 2)
  576. col = Column(data=[1, 2], name='name', dtype=float)
  577. col = Column(data=np.array([1, 2]), name='name')
  578. col = Column(data=['hello', 'world'], name='name')
  579. The ``dtype`` argument can be any value which is an acceptable
  580. fixed-size data-type initializer for the numpy.dtype() method. See
  581. `<http://docs.scipy.org/doc/numpy/reference/arrays.dtypes.html>`_.
  582. Examples include:
  583. - Python non-string type (float, int, bool)
  584. - Numpy non-string type (e.g. np.float32, np.int64, np.bool)
  585. - Numpy.dtype array-protocol type strings (e.g. 'i4', 'f8', 'S15')
  586. If no ``dtype`` value is provide then the type is inferred using
  587. ``np.array(data)``.
  588. - Provide ``length`` and optionally ``shape``, but not ``data``
  589. Examples::
  590. col = Column(name='name', length=5)
  591. col = Column(name='name', dtype=int, length=10, shape=(3,4))
  592. The default ``dtype`` is ``np.float64``. The ``shape`` argument is the
  593. array shape of a single cell in the column.
  594. """
  595. def __new__(cls, data=None, name=None,
  596. dtype=None, shape=(), length=0,
  597. description=None, unit=None, format=None, meta=None, copy=False):
  598. if isinstance(data, MaskedColumn) and np.any(data.mask):
  599. raise TypeError("Cannot convert a MaskedColumn with masked value to a Column")
  600. self = super(Column, cls).__new__(cls, data=data, name=name, dtype=dtype,
  601. shape=shape, length=length, description=description,
  602. unit=unit, format=format, meta=meta, copy=copy)
  603. return self
  604. def _base_repr_(self, html=False):
  605. # If scalar then just convert to correct numpy type and use numpy repr
  606. if self.ndim == 0:
  607. return repr(self.item())
  608. descr_vals = [self.__class__.__name__]
  609. unit = None if self.unit is None else str(self.unit)
  610. shape = None if self.ndim <= 1 else self.shape[1:]
  611. for attr, val in (('name', self.name),
  612. ('dtype', dtype_info_name(self.dtype)),
  613. ('shape', shape),
  614. ('unit', unit),
  615. ('format', self.format),
  616. ('description', self.description),
  617. ('length', len(self))):
  618. if val is not None:
  619. descr_vals.append('{0}={1}'.format(attr, repr(val)))
  620. descr = '<' + ' '.join(descr_vals) + '>\n'
  621. if html:
  622. from ..utils.xml.writer import xml_escape
  623. descr = xml_escape(descr)
  624. data_lines, outs = self._formatter._pformat_col(
  625. self, show_name=False, show_unit=False, show_length=False, html=html)
  626. out = descr + '\n'.join(data_lines)
  627. if six.PY2 and isinstance(out, six.text_type):
  628. out = out.encode('utf-8')
  629. return out
  630. def _repr_html_(self):
  631. return self._base_repr_(html=True)
  632. def __repr__(self):
  633. return self._base_repr_(html=False)
  634. def __unicode__(self):
  635. # If scalar then just convert to correct numpy type and use numpy repr
  636. if self.ndim == 0:
  637. return str(self.item())
  638. lines, outs = self._formatter._pformat_col(self)
  639. return '\n'.join(lines)
  640. if six.PY3:
  641. __str__ = __unicode__
  642. def __bytes__(self):
  643. return six.text_type(self).encode('utf-8')
  644. if six.PY2:
  645. __str__ = __bytes__
  646. # Set items using a view of the underlying data, as it gives an
  647. # order-of-magnitude speed-up. [#2994]
  648. def __setitem__(self, index, value):
  649. self.data[index] = value
  650. # # Set slices using a view of the underlying data, as it gives an
  651. # # order-of-magnitude speed-up. Only gets called in Python 2. [#3020]
  652. def __setslice__(self, start, stop, value):
  653. self.data.__setslice__(start, stop, value)
  654. def insert(self, obj, values):
  655. """
  656. Insert values before the given indices in the column and return
  657. a new `~astropy.table.Column` object.
  658. Parameters
  659. ----------
  660. obj : int, slice or sequence of ints
  661. Object that defines the index or indices before which ``values`` is
  662. inserted.
  663. values : array_like
  664. Value(s) to insert. If the type of ``values`` is different
  665. from that of quantity, ``values`` is converted to the matching type.
  666. ``values`` should be shaped so that it can be broadcast appropriately
  667. Returns
  668. -------
  669. out : `~astropy.table.Column`
  670. A copy of column with ``values`` and ``mask`` inserted. Note that the
  671. insertion does not occur in-place: a new column is returned.
  672. """
  673. if self.dtype.kind == 'O':
  674. # Even if values is array-like (e.g. [1,2,3]), insert as a single
  675. # object. Numpy.insert instead inserts each element in an array-like
  676. # input individually.
  677. data = np.insert(self, obj, None, axis=0)
  678. data[obj] = values
  679. else:
  680. # Explicitly convert to dtype of this column. Needed because numpy 1.7
  681. # enforces safe casting by default, so . This isn't the case for 1.6 or 1.8+.
  682. values = np.asarray(values, dtype=self.dtype)
  683. data = np.insert(self, obj, values, axis=0)
  684. out = data.view(self.__class__)
  685. out.__array_finalize__(self)
  686. return out
  687. # We do this to make the methods show up in the API docs
  688. name = BaseColumn.name
  689. unit = BaseColumn.unit
  690. copy = BaseColumn.copy
  691. more = BaseColumn.more
  692. pprint = BaseColumn.pprint
  693. pformat = BaseColumn.pformat
  694. convert_unit_to = BaseColumn.convert_unit_to
  695. quantity = BaseColumn.quantity
  696. to = BaseColumn.to
  697. class MaskedColumn(Column, ma.MaskedArray):
  698. """Define a masked data column for use in a Table object.
  699. Parameters
  700. ----------
  701. data : list, ndarray or None
  702. Column data values
  703. name : str
  704. Column name and key for reference within Table
  705. mask : list, ndarray or None
  706. Boolean mask for which True indicates missing or invalid data
  707. fill_value : float, int, str or None
  708. Value used when filling masked column elements
  709. dtype : numpy.dtype compatible value
  710. Data type for column
  711. shape : tuple or ()
  712. Dimensions of a single row element in the column data
  713. length : int or 0
  714. Number of row elements in column data
  715. description : str or None
  716. Full description of column
  717. unit : str or None
  718. Physical unit
  719. format : str or None or function or callable
  720. Format string for outputting column values. This can be an
  721. "old-style" (``format % value``) or "new-style" (`str.format`)
  722. format specification string or a function or any callable object that
  723. accepts a single value and returns a string.
  724. meta : dict-like or None
  725. Meta-data associated with the column
  726. Examples
  727. --------
  728. A MaskedColumn is similar to a Column except that it includes ``mask`` and
  729. ``fill_value`` attributes. It can be created in two different ways:
  730. - Provide a ``data`` value but not ``shape`` or ``length`` (which are
  731. inferred from the data).
  732. Examples::
  733. col = MaskedColumn(data=[1, 2], name='name')
  734. col = MaskedColumn(data=[1, 2], name='name', mask=[True, False])
  735. col = MaskedColumn(data=[1, 2], name='name', dtype=float, fill_value=99)
  736. The ``mask`` argument will be cast as a boolean array and specifies
  737. which elements are considered to be missing or invalid.
  738. The ``dtype`` argument can be any value which is an acceptable
  739. fixed-size data-type initializer for the numpy.dtype() method. See
  740. `<http://docs.scipy.org/doc/numpy/reference/arrays.dtypes.html>`_.
  741. Examples include:
  742. - Python non-string type (float, int, bool)
  743. - Numpy non-string type (e.g. np.float32, np.int64, np.bool)
  744. - Numpy.dtype array-protocol type strings (e.g. 'i4', 'f8', 'S15')
  745. If no ``dtype`` value is provide then the type is inferred using
  746. ``np.array(data)``. When ``data`` is provided then the ``shape``
  747. and ``length`` arguments are ignored.
  748. - Provide ``length`` and optionally ``shape``, but not ``data``
  749. Examples::
  750. col = MaskedColumn(name='name', length=5)
  751. col = MaskedColumn(name='name', dtype=int, length=10, shape=(3,4))
  752. The default ``dtype`` is ``np.float64``. The ``shape`` argument is the
  753. array shape of a single cell in the column.
  754. """
  755. def __new__(cls, data=None, name=None, mask=None, fill_value=None,
  756. dtype=None, shape=(), length=0,
  757. description=None, unit=None, format=None, meta=None, copy=False):
  758. if mask is None and hasattr(data, 'mask'):
  759. mask = data.mask
  760. else:
  761. mask = deepcopy(mask)
  762. # Create self using MaskedArray as a wrapper class, following the example of
  763. # class MSubArray in
  764. # https://github.com/numpy/numpy/blob/maintenance/1.8.x/numpy/ma/tests/test_subclassing.py
  765. # This pattern makes it so that __array_finalize__ is called as expected (e.g. #1471 and
  766. # https://github.com/astropy/astropy/commit/ff6039e8)
  767. # First just pass through all args and kwargs to BaseColumn, then wrap that object
  768. # with MaskedArray.
  769. self_data = BaseColumn(data, dtype=dtype, shape=shape, length=length, name=name,
  770. unit=unit, format=format, description=description, meta=meta, copy=copy)
  771. cls = cls._get_nd_proxy_class(self_data)
  772. self = ma.MaskedArray.__new__(cls, data=self_data, mask=mask)
  773. # Note: do not set fill_value in the MaskedArray constructor because this does not
  774. # go through the fill_value workarounds (see _fix_fill_value below).
  775. if fill_value is None and hasattr(data, 'fill_value'):
  776. fill_value = data.fill_value
  777. self.fill_value = fill_value
  778. self.parent_table = None
  779. return self
  780. def _fix_fill_value(self, val):
  781. """Fix a fill value (if needed) to work around a bug with setting the fill
  782. value of a string array in MaskedArray with Python 3.x. See
  783. https://github.com/numpy/numpy/pull/2733. This mimics the check in
  784. numpy.ma.core._check_fill_value() (version < 1.8) which incorrectly sets
  785. fill_value to a default if self.dtype.char is 'U' (which is the case for Python
  786. 3). Here we change the string to a byte string so that in Python 3 the
  787. isinstance(val, basestring) part fails.
  788. """
  789. if (NUMPY_LT_1_8 and isinstance(val, six.string_types) and
  790. (self.dtype.char not in 'SV')):
  791. val = val.encode()
  792. return val
  793. @property
  794. def fill_value(self):
  795. return self.get_fill_value() # defer to native ma.MaskedArray method
  796. @fill_value.setter
  797. def fill_value(self, val):
  798. """Set fill value both in the masked column view and in the parent table
  799. if it exists. Setting one or the other alone doesn't work."""
  800. val = self._fix_fill_value(val)
  801. # Yet another ma bug workaround: If the value of fill_value for a string array is
  802. # requested but not yet set then it gets created as 'N/A'. From this point onward
  803. # any new fill_values are truncated to 3 characters. Note that this does not
  804. # occur if the masked array is a structured array (as in the previous block that
  805. # deals with the parent table).
  806. #
  807. # >>> x = ma.array(['xxxx'])
  808. # >>> x.fill_value # fill_value now gets represented as an 'S3' array
  809. # 'N/A'
  810. # >>> x.fill_value='yyyy'
  811. # >>> x.fill_value
  812. # 'yyy'
  813. #
  814. # To handle this we are forced to reset a private variable first:
  815. self._fill_value = None
  816. self.set_fill_value(val) # defer to native ma.MaskedArray method
  817. @property
  818. def data(self):
  819. out = self.view(ma.MaskedArray)
  820. # The following is necessary because of a bug in Numpy, which was
  821. # fixed in numpy/numpy#2703. The fix should be included in Numpy 1.8.0.
  822. out.fill_value = self.fill_value
  823. return out
  824. def filled(self, fill_value=None):
  825. """Return a copy of self, with masked values filled with a given value.
  826. Parameters
  827. ----------
  828. fill_value : scalar; optional
  829. The value to use for invalid entries (`None` by default). If
  830. `None`, the ``fill_value`` attribute of the array is used
  831. instead.
  832. Returns
  833. -------
  834. filled_column : Column
  835. A copy of ``self`` with masked entries replaced by `fill_value`
  836. (be it the function argument or the attribute of ``self``).
  837. """
  838. if fill_value is None:
  839. fill_value = self.fill_value
  840. fill_value = self._fix_fill_value(fill_value)
  841. data = super(MaskedColumn, self).filled(fill_value)
  842. # Use parent table definition of Column if available
  843. column_cls = self.parent_table.Column if (self.parent_table is not None) else Column
  844. out = column_cls(name=self.name, data=data, unit=self.unit,
  845. format=self.format, description=self.description,
  846. meta=deepcopy(self.meta))
  847. return out
  848. def insert(self, obj, values, mask=None):
  849. """
  850. Insert values along the given axis before the given indices and return
  851. a new `~astropy.table.MaskedColumn` object.
  852. Parameters
  853. ----------
  854. obj : int, slice or sequence of ints
  855. Object that defines the index or indices before which ``values`` is
  856. inserted.
  857. values : array_like
  858. Value(s) to insert. If the type of ``values`` is different
  859. from that of quantity, ``values`` is converted to the matching type.
  860. ``values`` should be shaped so that it can be broadcast appropriately
  861. mask : boolean array_like
  862. Mask value(s) to insert. If not supplied then False is used.
  863. Returns
  864. -------
  865. out : `~astropy.table.MaskedColumn`
  866. A copy of column with ``values`` and ``mask`` inserted. Note that the
  867. insertion does not occur in-place: a new masked column is returned.
  868. """
  869. self_ma = self.data # self viewed as MaskedArray
  870. if self.dtype.kind == 'O':
  871. # Even if values is array-like (e.g. [1,2,3]), insert as a single
  872. # object. Numpy.insert instead inserts each element in an array-like
  873. # input individually.
  874. new_data = np.insert(self_ma.data, obj, None, axis=0)
  875. new_data[obj] = values
  876. else:
  877. # Explicitly convert to dtype of this column. Needed because numpy 1.7
  878. # enforces safe casting by default, so . This isn't the case for 1.6 or 1.8+.
  879. values = np.asarray(values, dtype=self.dtype)
  880. new_data = np.insert(self_ma.data, obj, values, axis=0)
  881. if mask is None:
  882. if self.dtype.kind == 'O':
  883. mask = False
  884. else:
  885. mask = np.zeros(values.shape, dtype=np.bool)
  886. new_mask = np.insert(self_ma.mask, obj, mask, axis=0)
  887. new_ma = np.ma.array(new_data, mask=new_mask, copy=False)
  888. out = new_ma.view(self.__class__)
  889. out.parent_table = None
  890. out._copy_attrs(self)
  891. return out
  892. def __getitem__(self, item):
  893. out = super(MaskedColumn, self).__getitem__(item)
  894. # Fixes issue #3023: when calling getitem with a MaskedArray subclass
  895. # the original object attributes are not copied.
  896. if out.__class__ is self.__class__:
  897. out.parent_table = None
  898. out._copy_attrs(self)
  899. return out
  900. # Set items and slices using MaskedArray method, instead of falling through
  901. # to the (faster) Column version which uses an ndarray view. This doesn't
  902. # copy the mask properly. See test_setting_from_masked_column test.
  903. def __setitem__(self, index, value):
  904. ma.MaskedArray.__setitem__(self, index, value)
  905. def __setslice__(self, start, stop, value):
  906. ma.MaskedArray.__setslice__(self, start, stop, value)
  907. # We do this to make the methods show up in the API docs
  908. name = BaseColumn.name
  909. copy = BaseColumn.copy
  910. more = BaseColumn.more
  911. pprint = BaseColumn.pprint
  912. pformat = BaseColumn.pformat
  913. convert_unit_to = BaseColumn.convert_unit_to