PageRenderTime 79ms CodeModel.GetById 40ms RepoModel.GetById 0ms app.codeStats 0ms

/astropy/table/column.py

https://github.com/crawfordsm/astropy
Python | 1093 lines | 983 code | 38 blank | 72 comment | 41 complexity | 26eaba65d39d05bac3dc740e4f284a50 MD5 | raw file
  1. # Licensed under a 3-clause BSD style license - see LICENSE.rst
  2. from __future__ import (absolute_import, division, print_function,
  3. unicode_literals)
  4. from ..extern import six
  5. import weakref
  6. from copy import deepcopy
  7. import numpy as np
  8. from numpy import ma
  9. from ..units import Unit, Quantity
  10. from ..utils.compat import NUMPY_LT_1_8
  11. from ..utils.console import color_print
  12. from ..utils.metadata import MetaData
  13. from ..utils.data_info import BaseColumnInfo, dtype_info_name
  14. from . import groups
  15. from . import pprint
  16. from .np_utils import fix_column_name
  17. # These "shims" provide __getitem__ implementations for Column and MaskedColumn
  18. from ._column_mixins import _ColumnGetitemShim, _MaskedColumnGetitemShim
  19. # Create a generic TableFormatter object for use by bare columns with no
  20. # parent table.
  21. FORMATTER = pprint.TableFormatter()
  22. INTEGER_TYPES = (int, long, np.integer) if six.PY2 else (int, np.integer)
  23. def _auto_names(n_cols):
  24. from . import conf
  25. return [str(conf.auto_colname).format(i) for i in range(n_cols)]
  26. # list of one and two-dimensional comparison functions, which sometimes return
  27. # a Column class and sometimes a plain array. Used in __array_wrap__ to ensure
  28. # they only return plain (masked) arrays (see #1446 and #1685)
  29. _comparison_functions = set(
  30. [np.greater, np.greater_equal, np.less, np.less_equal,
  31. np.not_equal, np.equal,
  32. np.isfinite, np.isinf, np.isnan, np.sign, np.signbit])
  33. def col_copy(col, copy_indices=True):
  34. """
  35. This is a mixin-safe version of Column.copy() (with copy_data=True).
  36. """
  37. if isinstance(col, BaseColumn):
  38. return col.copy()
  39. # The new column should have None for the parent_table ref. If the
  40. # original parent_table weakref there at the point of copying then it
  41. # generates an infinite recursion. Instead temporarily remove the weakref
  42. # on the original column and restore after the copy in an exception-safe
  43. # manner.
  44. parent_table = col.info.parent_table
  45. indices = col.info.indices
  46. col.info.parent_table = None
  47. col.info.indices = []
  48. try:
  49. newcol = col.copy() if hasattr(col, 'copy') else deepcopy(col)
  50. newcol.info = col.info
  51. newcol.info.indices = deepcopy(indices or []) if copy_indices else []
  52. for index in newcol.info.indices:
  53. index.replace_col(col, newcol)
  54. finally:
  55. col.info.parent_table = parent_table
  56. col.info.indices = indices
  57. return newcol
  58. class FalseArray(np.ndarray):
  59. def __new__(cls, shape):
  60. obj = np.zeros(shape, dtype=np.bool).view(cls)
  61. return obj
  62. def __setitem__(self, item, val):
  63. val = np.asarray(val)
  64. if np.any(val):
  65. raise ValueError('Cannot set any element of {0} class to True'
  66. .format(self.__class__.__name__))
  67. def __setslice__(self, start, stop, val):
  68. val = np.asarray(val)
  69. if np.any(val):
  70. raise ValueError('Cannot set any element of {0} class to True'
  71. .format(self.__class__.__name__))
  72. class ColumnInfo(BaseColumnInfo):
  73. attrs_from_parent = BaseColumnInfo.attr_names
  74. _supports_indexing = True
  75. class BaseColumn(_ColumnGetitemShim, np.ndarray):
  76. meta = MetaData()
  77. def __new__(cls, data=None, name=None,
  78. dtype=None, shape=(), length=0,
  79. description=None, unit=None, format=None, meta=None,
  80. copy=False, copy_indices=True):
  81. if data is None:
  82. dtype = (np.dtype(dtype).str, shape)
  83. self_data = np.zeros(length, dtype=dtype)
  84. elif isinstance(data, BaseColumn) and hasattr(data, '_name'):
  85. # When unpickling a MaskedColumn, ``data`` will be a bare
  86. # BaseColumn with none of the expected attributes. In this case
  87. # do NOT execute this block which initializes from ``data``
  88. # attributes.
  89. self_data = np.array(data.data, dtype=dtype, copy=copy)
  90. if description is None:
  91. description = data.description
  92. if unit is None:
  93. unit = unit or data.unit
  94. if format is None:
  95. format = data.format
  96. if meta is None:
  97. meta = deepcopy(data.meta)
  98. if name is None:
  99. name = data.name
  100. elif isinstance(data, Quantity):
  101. if unit is None:
  102. self_data = np.array(data, dtype=dtype, copy=copy)
  103. unit = data.unit
  104. else:
  105. self_data = np.array(data.to(unit), dtype=dtype, copy=copy)
  106. if description is None:
  107. description = data.info.description
  108. if format is None:
  109. format = data.info.format
  110. if meta is None:
  111. meta = deepcopy(data.info.meta)
  112. else:
  113. self_data = np.array(data, dtype=dtype, copy=copy)
  114. self = self_data.view(cls)
  115. self._name = fix_column_name(name)
  116. self.unit = unit
  117. self.format = format
  118. self.description = description
  119. self.meta = meta
  120. self._parent_table = None
  121. self.indices = deepcopy(getattr(data, 'indices', [])) if \
  122. copy_indices else []
  123. for index in self.indices:
  124. index.replace_col(data, self)
  125. return self
  126. @property
  127. def data(self):
  128. return self.view(np.ndarray)
  129. @property
  130. def parent_table(self):
  131. if self._parent_table is None:
  132. return None
  133. else:
  134. return self._parent_table()
  135. @parent_table.setter
  136. def parent_table(self, table):
  137. if table is None:
  138. self._parent_table = None
  139. else:
  140. self._parent_table = weakref.ref(table)
  141. info = ColumnInfo()
  142. def copy(self, order='C', data=None, copy_data=True):
  143. """
  144. Return a copy of the current instance.
  145. If ``data`` is supplied then a view (reference) of ``data`` is used,
  146. and ``copy_data`` is ignored.
  147. Parameters
  148. ----------
  149. order : {'C', 'F', 'A', 'K'}, optional
  150. Controls the memory layout of the copy. 'C' means C-order,
  151. 'F' means F-order, 'A' means 'F' if ``a`` is Fortran contiguous,
  152. 'C' otherwise. 'K' means match the layout of ``a`` as closely
  153. as possible. (Note that this function and :func:numpy.copy are very
  154. similar, but have different default values for their order=
  155. arguments.) Default is 'C'.
  156. data : array, optional
  157. If supplied then use a view of ``data`` instead of the instance
  158. data. This allows copying the instance attributes and meta.
  159. copy_data : bool, optional
  160. Make a copy of the internal numpy array instead of using a
  161. reference. Default is True.
  162. Returns
  163. -------
  164. col : Column or MaskedColumn
  165. Copy of the current column (same type as original)
  166. """
  167. if data is None:
  168. data = self.data
  169. if copy_data:
  170. data = data.copy(order)
  171. out = data.view(self.__class__)
  172. out.__array_finalize__(self)
  173. # for MaskedColumn, MaskedArray.__array_finalize__ also copies mask
  174. # from self, which is not the idea here, so undo
  175. if isinstance(self, MaskedColumn):
  176. out._mask = data._mask
  177. self._copy_groups(out)
  178. return out
  179. def __setstate__(self, state):
  180. """
  181. Restore the internal state of the Column/MaskedColumn for pickling
  182. purposes. This requires that the last element of ``state`` is a
  183. 5-tuple that has Column-specific state values.
  184. """
  185. # Get the Column attributes and meta
  186. name, unit, format, description, meta = state[-1]
  187. state = state[:-1]
  188. # Using super(type(self), self).__setstate__() gives an infinite
  189. # recursion. Manually call the right super class to actually set up
  190. # the array object.
  191. super_class = ma.MaskedArray if isinstance(self, ma.MaskedArray) else np.ndarray
  192. super_class.__setstate__(self, state)
  193. # Set the Column attributes and meta
  194. self._name = name
  195. self.unit = unit
  196. self.format = format
  197. self.description = description
  198. self.meta = meta
  199. self._parent_table = None
  200. def __reduce__(self):
  201. """
  202. Return a 3-tuple for pickling a Column. Use the super-class
  203. functionality but then add in a 5-tuple of Column-specific values
  204. that get used in __setstate__.
  205. """
  206. super_class = ma.MaskedArray if isinstance(self, ma.MaskedArray) else np.ndarray
  207. reconstruct_func, reconstruct_func_args, state = super_class.__reduce__(self)
  208. # Define Column-specific attrs and meta that gets added to state.
  209. column_state = (self.name, self.unit, self.format, self.description,
  210. self.meta)
  211. state = state + (column_state,)
  212. return reconstruct_func, reconstruct_func_args, state
  213. # avoid == and != to be done based on type of subclass
  214. # (helped solve #1446; see also __array_wrap__)
  215. def __eq__(self, other):
  216. return self.data.__eq__(other)
  217. def __ne__(self, other):
  218. return self.data.__ne__(other)
  219. def __array_finalize__(self, obj):
  220. # Obj will be none for direct call to Column() creator
  221. if obj is None:
  222. return
  223. if six.callable(super(BaseColumn, self).__array_finalize__):
  224. super(BaseColumn, self).__array_finalize__(obj)
  225. # Self was created from template (e.g. obj[slice] or (obj * 2))
  226. # or viewcast e.g. obj.view(Column). In either case we want to
  227. # init Column attributes for self from obj if possible.
  228. self.parent_table = None
  229. if not hasattr(self, 'indices'): # may have been copied in __new__
  230. self.indices = []
  231. self._copy_attrs(obj)
  232. def __array_wrap__(self, out_arr, context=None):
  233. """
  234. __array_wrap__ is called at the end of every ufunc.
  235. Normally, we want a Column object back and do not have to do anything
  236. special. But there are two exceptions:
  237. 1) If the output shape is different (e.g. for reduction ufuncs
  238. like sum() or mean()), a Column still linking to a parent_table
  239. makes little sense, so we return the output viewed as the
  240. column content (ndarray or MaskedArray).
  241. For this case, we use "[()]" to select everything, and to ensure we
  242. convert a zero rank array to a scalar. (For some reason np.sum()
  243. returns a zero rank scalar array while np.mean() returns a scalar;
  244. So the [()] is needed for this case.
  245. 2) When the output is created by any function that returns a boolean
  246. we also want to consistently return an array rather than a column
  247. (see #1446 and #1685)
  248. """
  249. out_arr = super(BaseColumn, self).__array_wrap__(out_arr, context)
  250. if (self.shape != out_arr.shape or
  251. (isinstance(out_arr, BaseColumn) and
  252. (context is not None and context[0] in _comparison_functions))):
  253. return out_arr.data[()]
  254. else:
  255. return out_arr
  256. @property
  257. def name(self):
  258. """
  259. The name of this column.
  260. """
  261. return self._name
  262. @name.setter
  263. def name(self, val):
  264. val = fix_column_name(val)
  265. if self.parent_table is not None:
  266. table = self.parent_table
  267. table.columns._rename_column(self.name, val)
  268. self._name = val
  269. @property
  270. def descr(self):
  271. """Array-interface compliant full description of the column.
  272. This returns a 3-tuple (name, type, shape) that can always be
  273. used in a structured array dtype definition.
  274. """
  275. return (self.name, self.dtype.str, self.shape[1:])
  276. def iter_str_vals(self):
  277. """
  278. Return an iterator that yields the string-formatted values of this
  279. column.
  280. Returns
  281. -------
  282. str_vals : iterator
  283. Column values formatted as strings
  284. """
  285. # Iterate over formatted values with no max number of lines, no column
  286. # name, no unit, and ignoring the returned header info in outs.
  287. _pformat_col_iter = self._formatter._pformat_col_iter
  288. for str_val in _pformat_col_iter(self, -1, show_name=False, show_unit=False,
  289. show_dtype=False, outs={}):
  290. yield str_val
  291. def attrs_equal(self, col):
  292. """Compare the column attributes of ``col`` to this object.
  293. The comparison attributes are: ``name``, ``unit``, ``dtype``,
  294. ``format``, ``description``, and ``meta``.
  295. Parameters
  296. ----------
  297. col : Column
  298. Comparison column
  299. Returns
  300. -------
  301. equal : boolean
  302. True if all attributes are equal
  303. """
  304. if not isinstance(col, BaseColumn):
  305. raise ValueError('Comparison `col` must be a Column or '
  306. 'MaskedColumn object')
  307. attrs = ('name', 'unit', 'dtype', 'format', 'description', 'meta')
  308. equal = all(getattr(self, x) == getattr(col, x) for x in attrs)
  309. return equal
  310. @property
  311. def _formatter(self):
  312. return FORMATTER if (self.parent_table is None) else self.parent_table.formatter
  313. def pformat(self, max_lines=None, show_name=True, show_unit=False, show_dtype=False,
  314. html=False):
  315. """Return a list of formatted string representation of column values.
  316. If no value of ``max_lines`` is supplied then the height of the
  317. screen terminal is used to set ``max_lines``. If the terminal
  318. height cannot be determined then the default will be
  319. determined using the ``astropy.conf.max_lines`` configuration
  320. item. If a negative value of ``max_lines`` is supplied then
  321. there is no line limit applied.
  322. Parameters
  323. ----------
  324. max_lines : int
  325. Maximum lines of output (header + data rows)
  326. show_name : bool
  327. Include column name (default=True)
  328. show_unit : bool
  329. Include a header row for unit (default=False)
  330. show_dtype : bool
  331. Include column dtype (default=False)
  332. html : bool
  333. Format the output as an HTML table (default=False)
  334. Returns
  335. -------
  336. lines : list
  337. List of lines with header and formatted column values
  338. """
  339. _pformat_col = self._formatter._pformat_col
  340. lines, outs = _pformat_col(self, max_lines, show_name=show_name,
  341. show_unit=show_unit, show_dtype=show_dtype,
  342. html=html)
  343. return lines
  344. def pprint(self, max_lines=None, show_name=True, show_unit=False, show_dtype=False):
  345. """Print a formatted string representation of column values.
  346. If no value of ``max_lines`` is supplied then the height of the
  347. screen terminal is used to set ``max_lines``. If the terminal
  348. height cannot be determined then the default will be
  349. determined using the ``astropy.conf.max_lines`` configuration
  350. item. If a negative value of ``max_lines`` is supplied then
  351. there is no line limit applied.
  352. Parameters
  353. ----------
  354. max_lines : int
  355. Maximum number of values in output
  356. show_name : bool
  357. Include column name (default=True)
  358. show_unit : bool
  359. Include a header row for unit (default=False)
  360. show_dtype : bool
  361. Include column dtype (default=True)
  362. """
  363. _pformat_col = self._formatter._pformat_col
  364. lines, outs = _pformat_col(self, max_lines, show_name=show_name, show_unit=show_unit,
  365. show_dtype=show_dtype)
  366. n_header = outs['n_header']
  367. for i, line in enumerate(lines):
  368. if i < n_header:
  369. color_print(line, 'red')
  370. else:
  371. print(line)
  372. def more(self, max_lines=None, show_name=True, show_unit=False):
  373. """Interactively browse column with a paging interface.
  374. Supported keys::
  375. f, <space> : forward one page
  376. b : back one page
  377. r : refresh same page
  378. n : next row
  379. p : previous row
  380. < : go to beginning
  381. > : go to end
  382. q : quit browsing
  383. h : print this help
  384. Parameters
  385. ----------
  386. max_lines : int
  387. Maximum number of lines in table output
  388. show_name : bool
  389. Include a header row for column names (default=True)
  390. show_unit : bool
  391. Include a header row for unit (default=False)
  392. """
  393. _more_tabcol = self._formatter._more_tabcol
  394. _more_tabcol(self, max_lines=max_lines, show_name=show_name,
  395. show_unit=show_unit)
  396. @property
  397. def unit(self):
  398. """
  399. The unit associated with this column. May be a string or a
  400. `astropy.units.UnitBase` instance.
  401. Setting the ``unit`` property does not change the values of the
  402. data. To perform a unit conversion, use ``convert_unit_to``.
  403. """
  404. return self._unit
  405. @unit.setter
  406. def unit(self, unit):
  407. if unit is None:
  408. self._unit = None
  409. else:
  410. self._unit = Unit(unit, parse_strict='silent')
  411. @unit.deleter
  412. def unit(self):
  413. self._unit = None
  414. def convert_unit_to(self, new_unit, equivalencies=[]):
  415. """
  416. Converts the values of the column in-place from the current
  417. unit to the given unit.
  418. To change the unit associated with this column without
  419. actually changing the data values, simply set the ``unit``
  420. property.
  421. Parameters
  422. ----------
  423. new_unit : str or `astropy.units.UnitBase` instance
  424. The unit to convert to.
  425. equivalencies : list of equivalence pairs, optional
  426. A list of equivalence pairs to try if the unit are not
  427. directly convertible. See :ref:`unit_equivalencies`.
  428. Raises
  429. ------
  430. astropy.units.UnitsError
  431. If units are inconsistent
  432. """
  433. if self.unit is None:
  434. raise ValueError("No unit set on column")
  435. self.data[:] = self.unit.to(
  436. new_unit, self.data, equivalencies=equivalencies)
  437. self.unit = new_unit
  438. @property
  439. def groups(self):
  440. if not hasattr(self, '_groups'):
  441. self._groups = groups.ColumnGroups(self)
  442. return self._groups
  443. def group_by(self, keys):
  444. """
  445. Group this column by the specified ``keys``
  446. This effectively splits the column into groups which correspond to
  447. unique values of the ``keys`` grouping object. The output is a new
  448. `Column` or `MaskedColumn` which contains a copy of this column but
  449. sorted by row according to ``keys``.
  450. The ``keys`` input to ``group_by`` must be a numpy array with the
  451. same length as this column.
  452. Parameters
  453. ----------
  454. keys : numpy array
  455. Key grouping object
  456. Returns
  457. -------
  458. out : Column
  459. New column with groups attribute set accordingly
  460. """
  461. return groups.column_group_by(self, keys)
  462. def _copy_groups(self, out):
  463. """
  464. Copy current groups into a copy of self ``out``
  465. """
  466. if self.parent_table:
  467. if hasattr(self.parent_table, '_groups'):
  468. out._groups = groups.ColumnGroups(out, indices=self.parent_table._groups._indices)
  469. elif hasattr(self, '_groups'):
  470. out._groups = groups.ColumnGroups(out, indices=self._groups._indices)
  471. # Strip off the BaseColumn-ness for repr and str so that
  472. # MaskedColumn.data __repr__ does not include masked_BaseColumn(data =
  473. # [1 2], ...).
  474. def __repr__(self):
  475. return np.asarray(self).__repr__()
  476. @property
  477. def quantity(self):
  478. """
  479. A view of this table column as a `~astropy.units.Quantity` object with
  480. units given by the Column's `unit` parameter.
  481. """
  482. # the Quantity initializer is used here because it correctly fails
  483. # if the column's values are non-numeric (like strings), while .view
  484. # will happily return a quantity with gibberish for numerical values
  485. return Quantity(self, copy=False, dtype=self.dtype, order='A')
  486. def to(self, unit, equivalencies=[], **kwargs):
  487. """
  488. Converts this table column to a `~astropy.units.Quantity` object with
  489. the requested units.
  490. Parameters
  491. ----------
  492. unit : `~astropy.units.Unit` or str
  493. The unit to convert to (i.e., a valid argument to the
  494. :meth:`astropy.units.Quantity.to` method).
  495. equivalencies : list of equivalence pairs, optional
  496. Equivalencies to use for this conversion. See
  497. :meth:`astropy.units.Quantity.to` for more details.
  498. Returns
  499. -------
  500. quantity : `~astropy.units.Quantity`
  501. A quantity object with the contents of this column in the units
  502. ``unit``.
  503. """
  504. return self.quantity.to(unit, equivalencies)
  505. def _copy_attrs(self, obj):
  506. """
  507. Copy key column attributes from ``obj`` to self
  508. """
  509. for attr in ('name', 'unit', 'format', 'description'):
  510. val = getattr(obj, attr, None)
  511. setattr(self, attr, val)
  512. self.meta = deepcopy(getattr(obj, 'meta', {}))
  513. class Column(BaseColumn):
  514. """Define a data column for use in a Table object.
  515. Parameters
  516. ----------
  517. data : list, ndarray or None
  518. Column data values
  519. name : str
  520. Column name and key for reference within Table
  521. dtype : numpy.dtype compatible value
  522. Data type for column
  523. shape : tuple or ()
  524. Dimensions of a single row element in the column data
  525. length : int or 0
  526. Number of row elements in column data
  527. description : str or None
  528. Full description of column
  529. unit : str or None
  530. Physical unit
  531. format : str or None or function or callable
  532. Format string for outputting column values. This can be an
  533. "old-style" (``format % value``) or "new-style" (`str.format`)
  534. format specification string or a function or any callable object that
  535. accepts a single value and returns a string.
  536. meta : dict-like or None
  537. Meta-data associated with the column
  538. Examples
  539. --------
  540. A Column can be created in two different ways:
  541. - Provide a ``data`` value but not ``shape`` or ``length`` (which are
  542. inferred from the data).
  543. Examples::
  544. col = Column(data=[1, 2], name='name') # shape=(2,)
  545. col = Column(data=[[1, 2], [3, 4]], name='name') # shape=(2, 2)
  546. col = Column(data=[1, 2], name='name', dtype=float)
  547. col = Column(data=np.array([1, 2]), name='name')
  548. col = Column(data=['hello', 'world'], name='name')
  549. The ``dtype`` argument can be any value which is an acceptable
  550. fixed-size data-type initializer for the numpy.dtype() method. See
  551. `<http://docs.scipy.org/doc/numpy/reference/arrays.dtypes.html>`_.
  552. Examples include:
  553. - Python non-string type (float, int, bool)
  554. - Numpy non-string type (e.g. np.float32, np.int64, np.bool)
  555. - Numpy.dtype array-protocol type strings (e.g. 'i4', 'f8', 'S15')
  556. If no ``dtype`` value is provide then the type is inferred using
  557. ``np.array(data)``.
  558. - Provide ``length`` and optionally ``shape``, but not ``data``
  559. Examples::
  560. col = Column(name='name', length=5)
  561. col = Column(name='name', dtype=int, length=10, shape=(3,4))
  562. The default ``dtype`` is ``np.float64``. The ``shape`` argument is the
  563. array shape of a single cell in the column.
  564. """
  565. def __new__(cls, data=None, name=None,
  566. dtype=None, shape=(), length=0,
  567. description=None, unit=None, format=None, meta=None,
  568. copy=False, copy_indices=True):
  569. if isinstance(data, MaskedColumn) and np.any(data.mask):
  570. raise TypeError("Cannot convert a MaskedColumn with masked value to a Column")
  571. self = super(Column, cls).__new__(cls, data=data, name=name, dtype=dtype,
  572. shape=shape, length=length, description=description,
  573. unit=unit, format=format, meta=meta,
  574. copy=copy, copy_indices=copy_indices)
  575. return self
  576. def _base_repr_(self, html=False):
  577. # If scalar then just convert to correct numpy type and use numpy repr
  578. if self.ndim == 0:
  579. return repr(self.item())
  580. descr_vals = [self.__class__.__name__]
  581. unit = None if self.unit is None else str(self.unit)
  582. shape = None if self.ndim <= 1 else self.shape[1:]
  583. for attr, val in (('name', self.name),
  584. ('dtype', dtype_info_name(self.dtype)),
  585. ('shape', shape),
  586. ('unit', unit),
  587. ('format', self.format),
  588. ('description', self.description),
  589. ('length', len(self))):
  590. if val is not None:
  591. descr_vals.append('{0}={1}'.format(attr, repr(val)))
  592. descr = '<' + ' '.join(descr_vals) + '>\n'
  593. if html:
  594. from ..utils.xml.writer import xml_escape
  595. descr = xml_escape(descr)
  596. data_lines, outs = self._formatter._pformat_col(
  597. self, show_name=False, show_unit=False, show_length=False, html=html)
  598. out = descr + '\n'.join(data_lines)
  599. if six.PY2 and isinstance(out, six.text_type):
  600. out = out.encode('utf-8')
  601. return out
  602. def _repr_html_(self):
  603. return self._base_repr_(html=True)
  604. def __repr__(self):
  605. return self._base_repr_(html=False)
  606. def __unicode__(self):
  607. # If scalar then just convert to correct numpy type and use numpy repr
  608. if self.ndim == 0:
  609. return str(self.item())
  610. lines, outs = self._formatter._pformat_col(self)
  611. return '\n'.join(lines)
  612. if six.PY3:
  613. __str__ = __unicode__
  614. def __bytes__(self):
  615. return six.text_type(self).encode('utf-8')
  616. if six.PY2:
  617. __str__ = __bytes__
  618. # Set items using a view of the underlying data, as it gives an
  619. # order-of-magnitude speed-up. [#2994]
  620. def __setitem__(self, index, value):
  621. # update indices
  622. self.info.adjust_indices(index, value, len(self))
  623. self.data[index] = value
  624. # # Set slices using a view of the underlying data, as it gives an
  625. # # order-of-magnitude speed-up. Only gets called in Python 2. [#3020]
  626. def __setslice__(self, start, stop, value):
  627. self.info.adjust_indices(slice(start, stop), value, len(self))
  628. self.data.__setslice__(start, stop, value)
  629. def insert(self, obj, values):
  630. """
  631. Insert values before the given indices in the column and return
  632. a new `~astropy.table.Column` object.
  633. Parameters
  634. ----------
  635. obj : int, slice or sequence of ints
  636. Object that defines the index or indices before which ``values`` is
  637. inserted.
  638. values : array_like
  639. Value(s) to insert. If the type of ``values`` is different
  640. from that of quantity, ``values`` is converted to the matching type.
  641. ``values`` should be shaped so that it can be broadcast appropriately
  642. Returns
  643. -------
  644. out : `~astropy.table.Column`
  645. A copy of column with ``values`` and ``mask`` inserted. Note that the
  646. insertion does not occur in-place: a new column is returned.
  647. """
  648. if self.dtype.kind == 'O':
  649. # Even if values is array-like (e.g. [1,2,3]), insert as a single
  650. # object. Numpy.insert instead inserts each element in an array-like
  651. # input individually.
  652. data = np.insert(self, obj, None, axis=0)
  653. data[obj] = values
  654. else:
  655. # Explicitly convert to dtype of this column. Needed because numpy 1.7
  656. # enforces safe casting by default, so . This isn't the case for 1.6 or 1.8+.
  657. values = np.asarray(values, dtype=self.dtype)
  658. data = np.insert(self, obj, values, axis=0)
  659. out = data.view(self.__class__)
  660. out.__array_finalize__(self)
  661. return out
  662. # We do this to make the methods show up in the API docs
  663. name = BaseColumn.name
  664. unit = BaseColumn.unit
  665. copy = BaseColumn.copy
  666. more = BaseColumn.more
  667. pprint = BaseColumn.pprint
  668. pformat = BaseColumn.pformat
  669. convert_unit_to = BaseColumn.convert_unit_to
  670. quantity = BaseColumn.quantity
  671. to = BaseColumn.to
  672. class MaskedColumn(Column, _MaskedColumnGetitemShim, ma.MaskedArray):
  673. """Define a masked data column for use in a Table object.
  674. Parameters
  675. ----------
  676. data : list, ndarray or None
  677. Column data values
  678. name : str
  679. Column name and key for reference within Table
  680. mask : list, ndarray or None
  681. Boolean mask for which True indicates missing or invalid data
  682. fill_value : float, int, str or None
  683. Value used when filling masked column elements
  684. dtype : numpy.dtype compatible value
  685. Data type for column
  686. shape : tuple or ()
  687. Dimensions of a single row element in the column data
  688. length : int or 0
  689. Number of row elements in column data
  690. description : str or None
  691. Full description of column
  692. unit : str or None
  693. Physical unit
  694. format : str or None or function or callable
  695. Format string for outputting column values. This can be an
  696. "old-style" (``format % value``) or "new-style" (`str.format`)
  697. format specification string or a function or any callable object that
  698. accepts a single value and returns a string.
  699. meta : dict-like or None
  700. Meta-data associated with the column
  701. Examples
  702. --------
  703. A MaskedColumn is similar to a Column except that it includes ``mask`` and
  704. ``fill_value`` attributes. It can be created in two different ways:
  705. - Provide a ``data`` value but not ``shape`` or ``length`` (which are
  706. inferred from the data).
  707. Examples::
  708. col = MaskedColumn(data=[1, 2], name='name')
  709. col = MaskedColumn(data=[1, 2], name='name', mask=[True, False])
  710. col = MaskedColumn(data=[1, 2], name='name', dtype=float, fill_value=99)
  711. The ``mask`` argument will be cast as a boolean array and specifies
  712. which elements are considered to be missing or invalid.
  713. The ``dtype`` argument can be any value which is an acceptable
  714. fixed-size data-type initializer for the numpy.dtype() method. See
  715. `<http://docs.scipy.org/doc/numpy/reference/arrays.dtypes.html>`_.
  716. Examples include:
  717. - Python non-string type (float, int, bool)
  718. - Numpy non-string type (e.g. np.float32, np.int64, np.bool)
  719. - Numpy.dtype array-protocol type strings (e.g. 'i4', 'f8', 'S15')
  720. If no ``dtype`` value is provide then the type is inferred using
  721. ``np.array(data)``. When ``data`` is provided then the ``shape``
  722. and ``length`` arguments are ignored.
  723. - Provide ``length`` and optionally ``shape``, but not ``data``
  724. Examples::
  725. col = MaskedColumn(name='name', length=5)
  726. col = MaskedColumn(name='name', dtype=int, length=10, shape=(3,4))
  727. The default ``dtype`` is ``np.float64``. The ``shape`` argument is the
  728. array shape of a single cell in the column.
  729. """
  730. def __new__(cls, data=None, name=None, mask=None, fill_value=None,
  731. dtype=None, shape=(), length=0,
  732. description=None, unit=None, format=None, meta=None,
  733. copy=False, copy_indices=True):
  734. if mask is None and hasattr(data, 'mask'):
  735. mask = data.mask
  736. else:
  737. mask = deepcopy(mask)
  738. # Create self using MaskedArray as a wrapper class, following the example of
  739. # class MSubArray in
  740. # https://github.com/numpy/numpy/blob/maintenance/1.8.x/numpy/ma/tests/test_subclassing.py
  741. # This pattern makes it so that __array_finalize__ is called as expected (e.g. #1471 and
  742. # https://github.com/astropy/astropy/commit/ff6039e8)
  743. # First just pass through all args and kwargs to BaseColumn, then wrap that object
  744. # with MaskedArray.
  745. self_data = BaseColumn(data, dtype=dtype, shape=shape, length=length, name=name,
  746. unit=unit, format=format, description=description,
  747. meta=meta, copy=copy, copy_indices=copy_indices)
  748. self = ma.MaskedArray.__new__(cls, data=self_data, mask=mask)
  749. # Note: do not set fill_value in the MaskedArray constructor because this does not
  750. # go through the fill_value workarounds (see _fix_fill_value below).
  751. if fill_value is None and hasattr(data, 'fill_value') and data.fill_value is not None:
  752. # Coerce the fill_value to the correct type since `data` may be a
  753. # different dtype than self.
  754. fill_value = self.dtype.type(data.fill_value)
  755. self.fill_value = fill_value
  756. self.parent_table = None
  757. # needs to be done here since self doesn't come from BaseColumn.__new__
  758. for index in self.indices:
  759. index.replace_col(self_data, self)
  760. return self
  761. def _fix_fill_value(self, val):
  762. """Fix a fill value (if needed) to work around a bug with setting the fill
  763. value of a string array in MaskedArray with Python 3.x. See
  764. https://github.com/numpy/numpy/pull/2733. This mimics the check in
  765. numpy.ma.core._check_fill_value() (version < 1.8) which incorrectly sets
  766. fill_value to a default if self.dtype.char is 'U' (which is the case for Python
  767. 3). Here we change the string to a byte string so that in Python 3 the
  768. isinstance(val, basestring) part fails.
  769. """
  770. if (NUMPY_LT_1_8 and isinstance(val, six.string_types) and
  771. (self.dtype.char not in 'SV')):
  772. val = val.encode()
  773. return val
  774. @property
  775. def fill_value(self):
  776. return self.get_fill_value() # defer to native ma.MaskedArray method
  777. @fill_value.setter
  778. def fill_value(self, val):
  779. """Set fill value both in the masked column view and in the parent table
  780. if it exists. Setting one or the other alone doesn't work."""
  781. val = self._fix_fill_value(val)
  782. # Yet another ma bug workaround: If the value of fill_value for a string array is
  783. # requested but not yet set then it gets created as 'N/A'. From this point onward
  784. # any new fill_values are truncated to 3 characters. Note that this does not
  785. # occur if the masked array is a structured array (as in the previous block that
  786. # deals with the parent table).
  787. #
  788. # >>> x = ma.array(['xxxx'])
  789. # >>> x.fill_value # fill_value now gets represented as an 'S3' array
  790. # 'N/A'
  791. # >>> x.fill_value='yyyy'
  792. # >>> x.fill_value
  793. # 'yyy'
  794. #
  795. # To handle this we are forced to reset a private variable first:
  796. self._fill_value = None
  797. self.set_fill_value(val) # defer to native ma.MaskedArray method
  798. @property
  799. def data(self):
  800. out = self.view(ma.MaskedArray)
  801. # The following is necessary because of a bug in Numpy, which was
  802. # fixed in numpy/numpy#2703. The fix should be included in Numpy 1.8.0.
  803. out.fill_value = self.fill_value
  804. return out
  805. def filled(self, fill_value=None):
  806. """Return a copy of self, with masked values filled with a given value.
  807. Parameters
  808. ----------
  809. fill_value : scalar; optional
  810. The value to use for invalid entries (`None` by default). If
  811. `None`, the ``fill_value`` attribute of the array is used
  812. instead.
  813. Returns
  814. -------
  815. filled_column : Column
  816. A copy of ``self`` with masked entries replaced by `fill_value`
  817. (be it the function argument or the attribute of ``self``).
  818. """
  819. if fill_value is None:
  820. fill_value = self.fill_value
  821. fill_value = self._fix_fill_value(fill_value)
  822. data = super(MaskedColumn, self).filled(fill_value)
  823. # Use parent table definition of Column if available
  824. column_cls = self.parent_table.Column if (self.parent_table is not None) else Column
  825. out = column_cls(name=self.name, data=data, unit=self.unit,
  826. format=self.format, description=self.description,
  827. meta=deepcopy(self.meta))
  828. return out
  829. def insert(self, obj, values, mask=None):
  830. """
  831. Insert values along the given axis before the given indices and return
  832. a new `~astropy.table.MaskedColumn` object.
  833. Parameters
  834. ----------
  835. obj : int, slice or sequence of ints
  836. Object that defines the index or indices before which ``values`` is
  837. inserted.
  838. values : array_like
  839. Value(s) to insert. If the type of ``values`` is different
  840. from that of quantity, ``values`` is converted to the matching type.
  841. ``values`` should be shaped so that it can be broadcast appropriately
  842. mask : boolean array_like
  843. Mask value(s) to insert. If not supplied then False is used.
  844. Returns
  845. -------
  846. out : `~astropy.table.MaskedColumn`
  847. A copy of column with ``values`` and ``mask`` inserted. Note that the
  848. insertion does not occur in-place: a new masked column is returned.
  849. """
  850. self_ma = self.data # self viewed as MaskedArray
  851. if self.dtype.kind == 'O':
  852. # Even if values is array-like (e.g. [1,2,3]), insert as a single
  853. # object. Numpy.insert instead inserts each element in an array-like
  854. # input individually.
  855. new_data = np.insert(self_ma.data, obj, None, axis=0)
  856. new_data[obj] = values
  857. else:
  858. # Explicitly convert to dtype of this column. Needed because numpy 1.7
  859. # enforces safe casting by default, so . This isn't the case for 1.6 or 1.8+.
  860. values = np.asarray(values, dtype=self.dtype)
  861. new_data = np.insert(self_ma.data, obj, values, axis=0)
  862. if mask is None:
  863. if self.dtype.kind == 'O':
  864. mask = False
  865. else:
  866. mask = np.zeros(values.shape, dtype=np.bool)
  867. new_mask = np.insert(self_ma.mask, obj, mask, axis=0)
  868. new_ma = np.ma.array(new_data, mask=new_mask, copy=False)
  869. out = new_ma.view(self.__class__)
  870. out.parent_table = None
  871. out.indices = []
  872. out._copy_attrs(self)
  873. return out
  874. def _copy_attrs_slice(self, out):
  875. # Fixes issue #3023: when calling getitem with a MaskedArray subclass
  876. # the original object attributes are not copied.
  877. if out.__class__ is self.__class__:
  878. out.parent_table = None
  879. # we need this because __getitem__ does a shallow copy of indices
  880. if out.indices is self.indices:
  881. out.indices = []
  882. out._copy_attrs(self)
  883. return out
  884. def __setitem__(self, index, value):
  885. # update indices
  886. self.info.adjust_indices(index, value, len(self))
  887. ma.MaskedArray.__setitem__(self, index, value)
  888. def __setslice__(self, start, stop, value):
  889. # defers to __setitem__, so we don't adjust indices here
  890. ma.MaskedArray.__setslice__(self, start, stop, value)
  891. # We do this to make the methods show up in the API docs
  892. name = BaseColumn.name
  893. copy = BaseColumn.copy
  894. more = BaseColumn.more
  895. pprint = BaseColumn.pprint
  896. pformat = BaseColumn.pformat
  897. convert_unit_to = BaseColumn.convert_unit_to