PageRenderTime 4ms CodeModel.GetById 32ms app.highlight 27ms RepoModel.GetById 1ms app.codeStats 0ms

/astropy/table/column.py

https://github.com/crawfordsm/astropy
Python | 1093 lines | 983 code | 38 blank | 72 comment | 36 complexity | 26eaba65d39d05bac3dc740e4f284a50 MD5 | raw file
   1# Licensed under a 3-clause BSD style license - see LICENSE.rst
   2from __future__ import (absolute_import, division, print_function,
   3                        unicode_literals)
   4from ..extern import six
   5
   6import weakref
   7
   8from copy import deepcopy
   9
  10import numpy as np
  11from numpy import ma
  12
  13from ..units import Unit, Quantity
  14from ..utils.compat import NUMPY_LT_1_8
  15from ..utils.console import color_print
  16from ..utils.metadata import MetaData
  17from ..utils.data_info import BaseColumnInfo, dtype_info_name
  18from . import groups
  19from . import pprint
  20from .np_utils import fix_column_name
  21
  22# These "shims" provide __getitem__ implementations for Column and MaskedColumn
  23from ._column_mixins import _ColumnGetitemShim, _MaskedColumnGetitemShim
  24
  25
  26# Create a generic TableFormatter object for use by bare columns with no
  27# parent table.
  28FORMATTER = pprint.TableFormatter()
  29INTEGER_TYPES = (int, long, np.integer) if six.PY2 else (int, np.integer)
  30
  31def _auto_names(n_cols):
  32    from . import conf
  33    return [str(conf.auto_colname).format(i) for i in range(n_cols)]
  34
  35
  36# list of one and two-dimensional comparison functions, which sometimes return
  37# a Column class and sometimes a plain array. Used in __array_wrap__ to ensure
  38# they only return plain (masked) arrays (see #1446 and #1685)
  39_comparison_functions = set(
  40    [np.greater, np.greater_equal, np.less, np.less_equal,
  41     np.not_equal, np.equal,
  42     np.isfinite, np.isinf, np.isnan, np.sign, np.signbit])
  43
  44
  45def col_copy(col, copy_indices=True):
  46    """
  47    This is a mixin-safe version of Column.copy() (with copy_data=True).
  48    """
  49    if isinstance(col, BaseColumn):
  50        return col.copy()
  51
  52    # The new column should have None for the parent_table ref.  If the
  53    # original parent_table weakref there at the point of copying then it
  54    # generates an infinite recursion.  Instead temporarily remove the weakref
  55    # on the original column and restore after the copy in an exception-safe
  56    # manner.
  57
  58    parent_table = col.info.parent_table
  59    indices = col.info.indices
  60    col.info.parent_table = None
  61    col.info.indices = []
  62
  63    try:
  64        newcol = col.copy() if hasattr(col, 'copy') else deepcopy(col)
  65        newcol.info = col.info
  66        newcol.info.indices = deepcopy(indices or []) if copy_indices else []
  67        for index in newcol.info.indices:
  68            index.replace_col(col, newcol)
  69    finally:
  70        col.info.parent_table = parent_table
  71        col.info.indices = indices
  72
  73    return newcol
  74
  75
  76class FalseArray(np.ndarray):
  77    def __new__(cls, shape):
  78        obj = np.zeros(shape, dtype=np.bool).view(cls)
  79        return obj
  80
  81    def __setitem__(self, item, val):
  82        val = np.asarray(val)
  83        if np.any(val):
  84            raise ValueError('Cannot set any element of {0} class to True'
  85                             .format(self.__class__.__name__))
  86
  87    def __setslice__(self, start, stop, val):
  88        val = np.asarray(val)
  89        if np.any(val):
  90            raise ValueError('Cannot set any element of {0} class to True'
  91                             .format(self.__class__.__name__))
  92
  93
  94class ColumnInfo(BaseColumnInfo):
  95    attrs_from_parent = BaseColumnInfo.attr_names
  96    _supports_indexing = True
  97
  98
  99class BaseColumn(_ColumnGetitemShim, np.ndarray):
 100
 101    meta = MetaData()
 102
 103    def __new__(cls, data=None, name=None,
 104                dtype=None, shape=(), length=0,
 105                description=None, unit=None, format=None, meta=None,
 106                copy=False, copy_indices=True):
 107        if data is None:
 108            dtype = (np.dtype(dtype).str, shape)
 109            self_data = np.zeros(length, dtype=dtype)
 110        elif isinstance(data, BaseColumn) and hasattr(data, '_name'):
 111            # When unpickling a MaskedColumn, ``data`` will be a bare
 112            # BaseColumn with none of the expected attributes.  In this case
 113            # do NOT execute this block which initializes from ``data``
 114            # attributes.
 115            self_data = np.array(data.data, dtype=dtype, copy=copy)
 116            if description is None:
 117                description = data.description
 118            if unit is None:
 119                unit = unit or data.unit
 120            if format is None:
 121                format = data.format
 122            if meta is None:
 123                meta = deepcopy(data.meta)
 124            if name is None:
 125                name = data.name
 126        elif isinstance(data, Quantity):
 127            if unit is None:
 128                self_data = np.array(data, dtype=dtype, copy=copy)
 129                unit = data.unit
 130            else:
 131                self_data = np.array(data.to(unit), dtype=dtype, copy=copy)
 132            if description is None:
 133                description = data.info.description
 134            if format is None:
 135                format = data.info.format
 136            if meta is None:
 137                meta = deepcopy(data.info.meta)
 138
 139        else:
 140            self_data = np.array(data, dtype=dtype, copy=copy)
 141
 142        self = self_data.view(cls)
 143        self._name = fix_column_name(name)
 144        self.unit = unit
 145        self.format = format
 146        self.description = description
 147        self.meta = meta
 148        self._parent_table = None
 149        self.indices = deepcopy(getattr(data, 'indices', [])) if \
 150                       copy_indices else []
 151        for index in self.indices:
 152            index.replace_col(data, self)
 153
 154        return self
 155
 156    @property
 157    def data(self):
 158        return self.view(np.ndarray)
 159
 160    @property
 161    def parent_table(self):
 162        if self._parent_table is None:
 163            return None
 164        else:
 165            return self._parent_table()
 166
 167    @parent_table.setter
 168    def parent_table(self, table):
 169        if table is None:
 170            self._parent_table = None
 171        else:
 172            self._parent_table = weakref.ref(table)
 173
 174    info = ColumnInfo()
 175
 176    def copy(self, order='C', data=None, copy_data=True):
 177        """
 178        Return a copy of the current instance.
 179
 180        If ``data`` is supplied then a view (reference) of ``data`` is used,
 181        and ``copy_data`` is ignored.
 182
 183        Parameters
 184        ----------
 185        order : {'C', 'F', 'A', 'K'}, optional
 186            Controls the memory layout of the copy. 'C' means C-order,
 187            'F' means F-order, 'A' means 'F' if ``a`` is Fortran contiguous,
 188            'C' otherwise. 'K' means match the layout of ``a`` as closely
 189            as possible. (Note that this function and :func:numpy.copy are very
 190            similar, but have different default values for their order=
 191            arguments.)  Default is 'C'.
 192        data : array, optional
 193            If supplied then use a view of ``data`` instead of the instance
 194            data.  This allows copying the instance attributes and meta.
 195        copy_data : bool, optional
 196            Make a copy of the internal numpy array instead of using a
 197            reference.  Default is True.
 198
 199        Returns
 200        -------
 201        col : Column or MaskedColumn
 202            Copy of the current column (same type as original)
 203        """
 204        if data is None:
 205            data = self.data
 206            if copy_data:
 207                data = data.copy(order)
 208
 209        out = data.view(self.__class__)
 210        out.__array_finalize__(self)
 211        # for MaskedColumn, MaskedArray.__array_finalize__ also copies mask
 212        # from self, which is not the idea here, so undo
 213        if isinstance(self, MaskedColumn):
 214            out._mask = data._mask
 215
 216        self._copy_groups(out)
 217
 218        return out
 219
 220    def __setstate__(self, state):
 221        """
 222        Restore the internal state of the Column/MaskedColumn for pickling
 223        purposes.  This requires that the last element of ``state`` is a
 224        5-tuple that has Column-specific state values.
 225        """
 226        # Get the Column attributes and meta
 227        name, unit, format, description, meta = state[-1]
 228        state = state[:-1]
 229
 230        # Using super(type(self), self).__setstate__() gives an infinite
 231        # recursion.  Manually call the right super class to actually set up
 232        # the array object.
 233        super_class = ma.MaskedArray if isinstance(self, ma.MaskedArray) else np.ndarray
 234        super_class.__setstate__(self, state)
 235
 236        # Set the Column attributes and meta
 237        self._name = name
 238        self.unit = unit
 239        self.format = format
 240        self.description = description
 241        self.meta = meta
 242        self._parent_table = None
 243
 244    def __reduce__(self):
 245        """
 246        Return a 3-tuple for pickling a Column.  Use the super-class
 247        functionality but then add in a 5-tuple of Column-specific values
 248        that get used in __setstate__.
 249        """
 250        super_class = ma.MaskedArray if isinstance(self, ma.MaskedArray) else np.ndarray
 251        reconstruct_func, reconstruct_func_args, state = super_class.__reduce__(self)
 252
 253        # Define Column-specific attrs and meta that gets added to state.
 254        column_state = (self.name, self.unit, self.format, self.description,
 255                        self.meta)
 256        state = state + (column_state,)
 257
 258        return reconstruct_func, reconstruct_func_args, state
 259
 260    # avoid == and != to be done based on type of subclass
 261    # (helped solve #1446; see also __array_wrap__)
 262    def __eq__(self, other):
 263        return self.data.__eq__(other)
 264
 265    def __ne__(self, other):
 266        return self.data.__ne__(other)
 267
 268    def __array_finalize__(self, obj):
 269        # Obj will be none for direct call to Column() creator
 270        if obj is None:
 271            return
 272
 273        if six.callable(super(BaseColumn, self).__array_finalize__):
 274            super(BaseColumn, self).__array_finalize__(obj)
 275
 276        # Self was created from template (e.g. obj[slice] or (obj * 2))
 277        # or viewcast e.g. obj.view(Column).  In either case we want to
 278        # init Column attributes for self from obj if possible.
 279        self.parent_table = None
 280        if not hasattr(self, 'indices'): # may have been copied in __new__
 281            self.indices = []
 282        self._copy_attrs(obj)
 283
 284    def __array_wrap__(self, out_arr, context=None):
 285        """
 286        __array_wrap__ is called at the end of every ufunc.
 287
 288        Normally, we want a Column object back and do not have to do anything
 289        special. But there are two exceptions:
 290
 291        1) If the output shape is different (e.g. for reduction ufuncs
 292           like sum() or mean()), a Column still linking to a parent_table
 293           makes little sense, so we return the output viewed as the
 294           column content (ndarray or MaskedArray).
 295           For this case, we use "[()]" to select everything, and to ensure we
 296           convert a zero rank array to a scalar. (For some reason np.sum()
 297           returns a zero rank scalar array while np.mean() returns a scalar;
 298           So the [()] is needed for this case.
 299
 300        2) When the output is created by any function that returns a boolean
 301           we also want to consistently return an array rather than a column
 302           (see #1446 and #1685)
 303        """
 304        out_arr = super(BaseColumn, self).__array_wrap__(out_arr, context)
 305        if (self.shape != out_arr.shape or
 306            (isinstance(out_arr, BaseColumn) and
 307             (context is not None and context[0] in _comparison_functions))):
 308            return out_arr.data[()]
 309        else:
 310            return out_arr
 311
 312    @property
 313    def name(self):
 314        """
 315        The name of this column.
 316        """
 317        return self._name
 318
 319    @name.setter
 320    def name(self, val):
 321        val = fix_column_name(val)
 322
 323        if self.parent_table is not None:
 324            table = self.parent_table
 325            table.columns._rename_column(self.name, val)
 326
 327        self._name = val
 328
 329    @property
 330    def descr(self):
 331        """Array-interface compliant full description of the column.
 332
 333        This returns a 3-tuple (name, type, shape) that can always be
 334        used in a structured array dtype definition.
 335        """
 336        return (self.name, self.dtype.str, self.shape[1:])
 337
 338    def iter_str_vals(self):
 339        """
 340        Return an iterator that yields the string-formatted values of this
 341        column.
 342
 343        Returns
 344        -------
 345        str_vals : iterator
 346            Column values formatted as strings
 347        """
 348        # Iterate over formatted values with no max number of lines, no column
 349        # name, no unit, and ignoring the returned header info in outs.
 350        _pformat_col_iter = self._formatter._pformat_col_iter
 351        for str_val in _pformat_col_iter(self, -1, show_name=False, show_unit=False,
 352                                         show_dtype=False, outs={}):
 353            yield str_val
 354
 355    def attrs_equal(self, col):
 356        """Compare the column attributes of ``col`` to this object.
 357
 358        The comparison attributes are: ``name``, ``unit``, ``dtype``,
 359        ``format``, ``description``, and ``meta``.
 360
 361        Parameters
 362        ----------
 363        col : Column
 364            Comparison column
 365
 366        Returns
 367        -------
 368        equal : boolean
 369            True if all attributes are equal
 370        """
 371        if not isinstance(col, BaseColumn):
 372            raise ValueError('Comparison `col` must be a Column or '
 373                             'MaskedColumn object')
 374
 375        attrs = ('name', 'unit', 'dtype', 'format', 'description', 'meta')
 376        equal = all(getattr(self, x) == getattr(col, x) for x in attrs)
 377
 378        return equal
 379
 380    @property
 381    def _formatter(self):
 382        return FORMATTER if (self.parent_table is None) else self.parent_table.formatter
 383
 384    def pformat(self, max_lines=None, show_name=True, show_unit=False, show_dtype=False,
 385                html=False):
 386        """Return a list of formatted string representation of column values.
 387
 388        If no value of ``max_lines`` is supplied then the height of the
 389        screen terminal is used to set ``max_lines``.  If the terminal
 390        height cannot be determined then the default will be
 391        determined using the ``astropy.conf.max_lines`` configuration
 392        item. If a negative value of ``max_lines`` is supplied then
 393        there is no line limit applied.
 394
 395        Parameters
 396        ----------
 397        max_lines : int
 398            Maximum lines of output (header + data rows)
 399
 400        show_name : bool
 401            Include column name (default=True)
 402
 403        show_unit : bool
 404            Include a header row for unit (default=False)
 405
 406        show_dtype : bool
 407            Include column dtype (default=False)
 408
 409        html : bool
 410            Format the output as an HTML table (default=False)
 411
 412        Returns
 413        -------
 414        lines : list
 415            List of lines with header and formatted column values
 416
 417        """
 418        _pformat_col = self._formatter._pformat_col
 419        lines, outs = _pformat_col(self, max_lines, show_name=show_name,
 420                                   show_unit=show_unit, show_dtype=show_dtype,
 421                                   html=html)
 422        return lines
 423
 424    def pprint(self, max_lines=None, show_name=True, show_unit=False, show_dtype=False):
 425        """Print a formatted string representation of column values.
 426
 427        If no value of ``max_lines`` is supplied then the height of the
 428        screen terminal is used to set ``max_lines``.  If the terminal
 429        height cannot be determined then the default will be
 430        determined using the ``astropy.conf.max_lines`` configuration
 431        item. If a negative value of ``max_lines`` is supplied then
 432        there is no line limit applied.
 433
 434        Parameters
 435        ----------
 436        max_lines : int
 437            Maximum number of values in output
 438
 439        show_name : bool
 440            Include column name (default=True)
 441
 442        show_unit : bool
 443            Include a header row for unit (default=False)
 444
 445        show_dtype : bool
 446            Include column dtype (default=True)
 447        """
 448        _pformat_col = self._formatter._pformat_col
 449        lines, outs = _pformat_col(self, max_lines, show_name=show_name, show_unit=show_unit,
 450                                   show_dtype=show_dtype)
 451
 452        n_header = outs['n_header']
 453        for i, line in enumerate(lines):
 454            if i < n_header:
 455                color_print(line, 'red')
 456            else:
 457                print(line)
 458
 459    def more(self, max_lines=None, show_name=True, show_unit=False):
 460        """Interactively browse column with a paging interface.
 461
 462        Supported keys::
 463
 464          f, <space> : forward one page
 465          b : back one page
 466          r : refresh same page
 467          n : next row
 468          p : previous row
 469          < : go to beginning
 470          > : go to end
 471          q : quit browsing
 472          h : print this help
 473
 474        Parameters
 475        ----------
 476        max_lines : int
 477            Maximum number of lines in table output
 478
 479        show_name : bool
 480            Include a header row for column names (default=True)
 481
 482        show_unit : bool
 483            Include a header row for unit (default=False)
 484
 485        """
 486        _more_tabcol = self._formatter._more_tabcol
 487        _more_tabcol(self, max_lines=max_lines, show_name=show_name,
 488                     show_unit=show_unit)
 489
 490    @property
 491    def unit(self):
 492        """
 493        The unit associated with this column.  May be a string or a
 494        `astropy.units.UnitBase` instance.
 495
 496        Setting the ``unit`` property does not change the values of the
 497        data.  To perform a unit conversion, use ``convert_unit_to``.
 498        """
 499        return self._unit
 500
 501    @unit.setter
 502    def unit(self, unit):
 503        if unit is None:
 504            self._unit = None
 505        else:
 506            self._unit = Unit(unit, parse_strict='silent')
 507
 508    @unit.deleter
 509    def unit(self):
 510        self._unit = None
 511
 512    def convert_unit_to(self, new_unit, equivalencies=[]):
 513        """
 514        Converts the values of the column in-place from the current
 515        unit to the given unit.
 516
 517        To change the unit associated with this column without
 518        actually changing the data values, simply set the ``unit``
 519        property.
 520
 521        Parameters
 522        ----------
 523        new_unit : str or `astropy.units.UnitBase` instance
 524            The unit to convert to.
 525
 526        equivalencies : list of equivalence pairs, optional
 527           A list of equivalence pairs to try if the unit are not
 528           directly convertible.  See :ref:`unit_equivalencies`.
 529
 530        Raises
 531        ------
 532        astropy.units.UnitsError
 533            If units are inconsistent
 534        """
 535        if self.unit is None:
 536            raise ValueError("No unit set on column")
 537        self.data[:] = self.unit.to(
 538            new_unit, self.data, equivalencies=equivalencies)
 539        self.unit = new_unit
 540
 541    @property
 542    def groups(self):
 543        if not hasattr(self, '_groups'):
 544            self._groups = groups.ColumnGroups(self)
 545        return self._groups
 546
 547    def group_by(self, keys):
 548        """
 549        Group this column by the specified ``keys``
 550
 551        This effectively splits the column into groups which correspond to
 552        unique values of the ``keys`` grouping object.  The output is a new
 553        `Column` or `MaskedColumn` which contains a copy of this column but
 554        sorted by row according to ``keys``.
 555
 556        The ``keys`` input to ``group_by`` must be a numpy array with the
 557        same length as this column.
 558
 559        Parameters
 560        ----------
 561        keys : numpy array
 562            Key grouping object
 563
 564        Returns
 565        -------
 566        out : Column
 567            New column with groups attribute set accordingly
 568        """
 569        return groups.column_group_by(self, keys)
 570
 571    def _copy_groups(self, out):
 572        """
 573        Copy current groups into a copy of self ``out``
 574        """
 575        if self.parent_table:
 576            if hasattr(self.parent_table, '_groups'):
 577                out._groups = groups.ColumnGroups(out, indices=self.parent_table._groups._indices)
 578        elif hasattr(self, '_groups'):
 579            out._groups = groups.ColumnGroups(out, indices=self._groups._indices)
 580
 581    # Strip off the BaseColumn-ness for repr and str so that
 582    # MaskedColumn.data __repr__ does not include masked_BaseColumn(data =
 583    # [1 2], ...).
 584    def __repr__(self):
 585        return np.asarray(self).__repr__()
 586
 587    @property
 588    def quantity(self):
 589        """
 590        A view of this table column as a `~astropy.units.Quantity` object with
 591        units given by the Column's `unit` parameter.
 592        """
 593        # the Quantity initializer is used here because it correctly fails
 594        # if the column's values are non-numeric (like strings), while .view
 595        # will happily return a quantity with gibberish for numerical values
 596        return Quantity(self, copy=False, dtype=self.dtype, order='A')
 597
 598    def to(self, unit, equivalencies=[], **kwargs):
 599        """
 600        Converts this table column to a `~astropy.units.Quantity` object with
 601        the requested units.
 602
 603        Parameters
 604        ----------
 605        unit : `~astropy.units.Unit` or str
 606            The unit to convert to (i.e., a valid argument to the
 607            :meth:`astropy.units.Quantity.to` method).
 608        equivalencies : list of equivalence pairs, optional
 609            Equivalencies to use for this conversion.  See
 610            :meth:`astropy.units.Quantity.to` for more details.
 611
 612        Returns
 613        -------
 614        quantity : `~astropy.units.Quantity`
 615            A quantity object with the contents of this column in the units
 616            ``unit``.
 617        """
 618        return self.quantity.to(unit, equivalencies)
 619
 620    def _copy_attrs(self, obj):
 621        """
 622        Copy key column attributes from ``obj`` to self
 623        """
 624        for attr in ('name', 'unit', 'format', 'description'):
 625            val = getattr(obj, attr, None)
 626            setattr(self, attr, val)
 627        self.meta = deepcopy(getattr(obj, 'meta', {}))
 628
 629
 630class Column(BaseColumn):
 631    """Define a data column for use in a Table object.
 632
 633    Parameters
 634    ----------
 635    data : list, ndarray or None
 636        Column data values
 637    name : str
 638        Column name and key for reference within Table
 639    dtype : numpy.dtype compatible value
 640        Data type for column
 641    shape : tuple or ()
 642        Dimensions of a single row element in the column data
 643    length : int or 0
 644        Number of row elements in column data
 645    description : str or None
 646        Full description of column
 647    unit : str or None
 648        Physical unit
 649    format : str or None or function or callable
 650        Format string for outputting column values.  This can be an
 651        "old-style" (``format % value``) or "new-style" (`str.format`)
 652        format specification string or a function or any callable object that
 653        accepts a single value and returns a string.
 654    meta : dict-like or None
 655        Meta-data associated with the column
 656
 657    Examples
 658    --------
 659    A Column can be created in two different ways:
 660
 661    - Provide a ``data`` value but not ``shape`` or ``length`` (which are
 662      inferred from the data).
 663
 664      Examples::
 665
 666        col = Column(data=[1, 2], name='name')  # shape=(2,)
 667        col = Column(data=[[1, 2], [3, 4]], name='name')  # shape=(2, 2)
 668        col = Column(data=[1, 2], name='name', dtype=float)
 669        col = Column(data=np.array([1, 2]), name='name')
 670        col = Column(data=['hello', 'world'], name='name')
 671
 672      The ``dtype`` argument can be any value which is an acceptable
 673      fixed-size data-type initializer for the numpy.dtype() method.  See
 674      `<http://docs.scipy.org/doc/numpy/reference/arrays.dtypes.html>`_.
 675      Examples include:
 676
 677      - Python non-string type (float, int, bool)
 678      - Numpy non-string type (e.g. np.float32, np.int64, np.bool)
 679      - Numpy.dtype array-protocol type strings (e.g. 'i4', 'f8', 'S15')
 680
 681      If no ``dtype`` value is provide then the type is inferred using
 682      ``np.array(data)``.
 683
 684    - Provide ``length`` and optionally ``shape``, but not ``data``
 685
 686      Examples::
 687
 688        col = Column(name='name', length=5)
 689        col = Column(name='name', dtype=int, length=10, shape=(3,4))
 690
 691      The default ``dtype`` is ``np.float64``.  The ``shape`` argument is the
 692      array shape of a single cell in the column.
 693    """
 694
 695    def __new__(cls, data=None, name=None,
 696                dtype=None, shape=(), length=0,
 697                description=None, unit=None, format=None, meta=None,
 698                copy=False, copy_indices=True):
 699
 700        if isinstance(data, MaskedColumn) and np.any(data.mask):
 701            raise TypeError("Cannot convert a MaskedColumn with masked value to a Column")
 702
 703        self = super(Column, cls).__new__(cls, data=data, name=name, dtype=dtype,
 704                                          shape=shape, length=length, description=description,
 705                                          unit=unit, format=format, meta=meta,
 706                                          copy=copy, copy_indices=copy_indices)
 707        return self
 708
 709    def _base_repr_(self, html=False):
 710        # If scalar then just convert to correct numpy type and use numpy repr
 711        if self.ndim == 0:
 712            return repr(self.item())
 713
 714        descr_vals = [self.__class__.__name__]
 715        unit = None if self.unit is None else str(self.unit)
 716        shape = None if self.ndim <= 1 else self.shape[1:]
 717        for attr, val in (('name', self.name),
 718                          ('dtype', dtype_info_name(self.dtype)),
 719                          ('shape', shape),
 720                          ('unit', unit),
 721                          ('format', self.format),
 722                          ('description', self.description),
 723                          ('length', len(self))):
 724
 725            if val is not None:
 726                descr_vals.append('{0}={1}'.format(attr, repr(val)))
 727
 728        descr = '<' + ' '.join(descr_vals) + '>\n'
 729
 730        if html:
 731            from ..utils.xml.writer import xml_escape
 732            descr = xml_escape(descr)
 733
 734        data_lines, outs = self._formatter._pformat_col(
 735            self, show_name=False, show_unit=False, show_length=False, html=html)
 736
 737        out = descr + '\n'.join(data_lines)
 738        if six.PY2 and isinstance(out, six.text_type):
 739            out = out.encode('utf-8')
 740
 741        return out
 742
 743    def _repr_html_(self):
 744        return self._base_repr_(html=True)
 745
 746    def __repr__(self):
 747        return self._base_repr_(html=False)
 748
 749    def __unicode__(self):
 750        # If scalar then just convert to correct numpy type and use numpy repr
 751        if self.ndim == 0:
 752            return str(self.item())
 753
 754        lines, outs = self._formatter._pformat_col(self)
 755        return '\n'.join(lines)
 756    if six.PY3:
 757        __str__ = __unicode__
 758
 759    def __bytes__(self):
 760        return six.text_type(self).encode('utf-8')
 761    if six.PY2:
 762        __str__ = __bytes__
 763
 764    # Set items using a view of the underlying data, as it gives an
 765    # order-of-magnitude speed-up. [#2994]
 766    def __setitem__(self, index, value):
 767        # update indices
 768        self.info.adjust_indices(index, value, len(self))
 769        self.data[index] = value
 770
 771    # # Set slices using a view of the underlying data, as it gives an
 772    # # order-of-magnitude speed-up.  Only gets called in Python 2.  [#3020]
 773    def __setslice__(self, start, stop, value):
 774        self.info.adjust_indices(slice(start, stop), value, len(self))
 775        self.data.__setslice__(start, stop, value)
 776
 777    def insert(self, obj, values):
 778        """
 779        Insert values before the given indices in the column and return
 780        a new `~astropy.table.Column` object.
 781
 782        Parameters
 783        ----------
 784        obj : int, slice or sequence of ints
 785            Object that defines the index or indices before which ``values`` is
 786            inserted.
 787        values : array_like
 788            Value(s) to insert.  If the type of ``values`` is different
 789            from that of quantity, ``values`` is converted to the matching type.
 790            ``values`` should be shaped so that it can be broadcast appropriately
 791
 792        Returns
 793        -------
 794        out : `~astropy.table.Column`
 795            A copy of column with ``values`` and ``mask`` inserted.  Note that the
 796            insertion does not occur in-place: a new column is returned.
 797        """
 798        if self.dtype.kind == 'O':
 799            # Even if values is array-like (e.g. [1,2,3]), insert as a single
 800            # object.  Numpy.insert instead inserts each element in an array-like
 801            # input individually.
 802            data = np.insert(self, obj, None, axis=0)
 803            data[obj] = values
 804        else:
 805            # Explicitly convert to dtype of this column.  Needed because numpy 1.7
 806            # enforces safe casting by default, so .  This isn't the case for 1.6 or 1.8+.
 807            values = np.asarray(values, dtype=self.dtype)
 808            data = np.insert(self, obj, values, axis=0)
 809        out = data.view(self.__class__)
 810        out.__array_finalize__(self)
 811        return out
 812
 813    # We do this to make the methods show up in the API docs
 814    name = BaseColumn.name
 815    unit = BaseColumn.unit
 816    copy = BaseColumn.copy
 817    more = BaseColumn.more
 818    pprint = BaseColumn.pprint
 819    pformat = BaseColumn.pformat
 820    convert_unit_to = BaseColumn.convert_unit_to
 821    quantity = BaseColumn.quantity
 822    to = BaseColumn.to
 823
 824
 825class MaskedColumn(Column, _MaskedColumnGetitemShim, ma.MaskedArray):
 826    """Define a masked data column for use in a Table object.
 827
 828    Parameters
 829    ----------
 830    data : list, ndarray or None
 831        Column data values
 832    name : str
 833        Column name and key for reference within Table
 834    mask : list, ndarray or None
 835        Boolean mask for which True indicates missing or invalid data
 836    fill_value : float, int, str or None
 837        Value used when filling masked column elements
 838    dtype : numpy.dtype compatible value
 839        Data type for column
 840    shape : tuple or ()
 841        Dimensions of a single row element in the column data
 842    length : int or 0
 843        Number of row elements in column data
 844    description : str or None
 845        Full description of column
 846    unit : str or None
 847        Physical unit
 848    format : str or None or function or callable
 849        Format string for outputting column values.  This can be an
 850        "old-style" (``format % value``) or "new-style" (`str.format`)
 851        format specification string or a function or any callable object that
 852        accepts a single value and returns a string.
 853    meta : dict-like or None
 854        Meta-data associated with the column
 855
 856    Examples
 857    --------
 858    A MaskedColumn is similar to a Column except that it includes ``mask`` and
 859    ``fill_value`` attributes.  It can be created in two different ways:
 860
 861    - Provide a ``data`` value but not ``shape`` or ``length`` (which are
 862      inferred from the data).
 863
 864      Examples::
 865
 866        col = MaskedColumn(data=[1, 2], name='name')
 867        col = MaskedColumn(data=[1, 2], name='name', mask=[True, False])
 868        col = MaskedColumn(data=[1, 2], name='name', dtype=float, fill_value=99)
 869
 870      The ``mask`` argument will be cast as a boolean array and specifies
 871      which elements are considered to be missing or invalid.
 872
 873      The ``dtype`` argument can be any value which is an acceptable
 874      fixed-size data-type initializer for the numpy.dtype() method.  See
 875      `<http://docs.scipy.org/doc/numpy/reference/arrays.dtypes.html>`_.
 876      Examples include:
 877
 878      - Python non-string type (float, int, bool)
 879      - Numpy non-string type (e.g. np.float32, np.int64, np.bool)
 880      - Numpy.dtype array-protocol type strings (e.g. 'i4', 'f8', 'S15')
 881
 882      If no ``dtype`` value is provide then the type is inferred using
 883      ``np.array(data)``.  When ``data`` is provided then the ``shape``
 884      and ``length`` arguments are ignored.
 885
 886    - Provide ``length`` and optionally ``shape``, but not ``data``
 887
 888      Examples::
 889
 890        col = MaskedColumn(name='name', length=5)
 891        col = MaskedColumn(name='name', dtype=int, length=10, shape=(3,4))
 892
 893      The default ``dtype`` is ``np.float64``.  The ``shape`` argument is the
 894      array shape of a single cell in the column.
 895    """
 896
 897    def __new__(cls, data=None, name=None, mask=None, fill_value=None,
 898                dtype=None, shape=(), length=0,
 899                description=None, unit=None, format=None, meta=None,
 900                copy=False, copy_indices=True):
 901
 902        if mask is None and hasattr(data, 'mask'):
 903            mask = data.mask
 904        else:
 905            mask = deepcopy(mask)
 906
 907        # Create self using MaskedArray as a wrapper class, following the example of
 908        # class MSubArray in
 909        # https://github.com/numpy/numpy/blob/maintenance/1.8.x/numpy/ma/tests/test_subclassing.py
 910        # This pattern makes it so that __array_finalize__ is called as expected (e.g. #1471 and
 911        # https://github.com/astropy/astropy/commit/ff6039e8)
 912
 913        # First just pass through all args and kwargs to BaseColumn, then wrap that object
 914        # with MaskedArray.
 915        self_data = BaseColumn(data, dtype=dtype, shape=shape, length=length, name=name,
 916                               unit=unit, format=format, description=description,
 917                               meta=meta, copy=copy, copy_indices=copy_indices)
 918        self = ma.MaskedArray.__new__(cls, data=self_data, mask=mask)
 919
 920        # Note: do not set fill_value in the MaskedArray constructor because this does not
 921        # go through the fill_value workarounds (see _fix_fill_value below).
 922        if fill_value is None and hasattr(data, 'fill_value') and data.fill_value is not None:
 923            # Coerce the fill_value to the correct type since `data` may be a
 924            # different dtype than self.
 925            fill_value = self.dtype.type(data.fill_value)
 926        self.fill_value = fill_value
 927
 928        self.parent_table = None
 929
 930        # needs to be done here since self doesn't come from BaseColumn.__new__
 931        for index in self.indices:
 932            index.replace_col(self_data, self)
 933
 934        return self
 935
 936    def _fix_fill_value(self, val):
 937        """Fix a fill value (if needed) to work around a bug with setting the fill
 938        value of a string array in MaskedArray with Python 3.x.  See
 939        https://github.com/numpy/numpy/pull/2733.  This mimics the check in
 940        numpy.ma.core._check_fill_value() (version < 1.8) which incorrectly sets
 941        fill_value to a default if self.dtype.char is 'U' (which is the case for Python
 942        3).  Here we change the string to a byte string so that in Python 3 the
 943        isinstance(val, basestring) part fails.
 944        """
 945
 946        if (NUMPY_LT_1_8 and isinstance(val, six.string_types) and
 947                (self.dtype.char not in 'SV')):
 948            val = val.encode()
 949        return val
 950
 951    @property
 952    def fill_value(self):
 953        return self.get_fill_value()  # defer to native ma.MaskedArray method
 954
 955    @fill_value.setter
 956    def fill_value(self, val):
 957        """Set fill value both in the masked column view and in the parent table
 958        if it exists.  Setting one or the other alone doesn't work."""
 959        val = self._fix_fill_value(val)
 960
 961        # Yet another ma bug workaround: If the value of fill_value for a string array is
 962        # requested but not yet set then it gets created as 'N/A'.  From this point onward
 963        # any new fill_values are truncated to 3 characters.  Note that this does not
 964        # occur if the masked array is a structured array (as in the previous block that
 965        # deals with the parent table).
 966        #
 967        # >>> x = ma.array(['xxxx'])
 968        # >>> x.fill_value  # fill_value now gets represented as an 'S3' array
 969        # 'N/A'
 970        # >>> x.fill_value='yyyy'
 971        # >>> x.fill_value
 972        # 'yyy'
 973        #
 974        # To handle this we are forced to reset a private variable first:
 975        self._fill_value = None
 976
 977        self.set_fill_value(val)  # defer to native ma.MaskedArray method
 978
 979    @property
 980    def data(self):
 981        out = self.view(ma.MaskedArray)
 982        # The following is necessary because of a bug in Numpy, which was
 983        # fixed in numpy/numpy#2703. The fix should be included in Numpy 1.8.0.
 984        out.fill_value = self.fill_value
 985        return out
 986
 987    def filled(self, fill_value=None):
 988        """Return a copy of self, with masked values filled with a given value.
 989
 990        Parameters
 991        ----------
 992        fill_value : scalar; optional
 993            The value to use for invalid entries (`None` by default).  If
 994            `None`, the ``fill_value`` attribute of the array is used
 995            instead.
 996
 997        Returns
 998        -------
 999        filled_column : Column
1000            A copy of ``self`` with masked entries replaced by `fill_value`
1001            (be it the function argument or the attribute of ``self``).
1002        """
1003        if fill_value is None:
1004            fill_value = self.fill_value
1005        fill_value = self._fix_fill_value(fill_value)
1006
1007        data = super(MaskedColumn, self).filled(fill_value)
1008        # Use parent table definition of Column if available
1009        column_cls = self.parent_table.Column if (self.parent_table is not None) else Column
1010        out = column_cls(name=self.name, data=data, unit=self.unit,
1011                         format=self.format, description=self.description,
1012                         meta=deepcopy(self.meta))
1013        return out
1014
1015    def insert(self, obj, values, mask=None):
1016        """
1017        Insert values along the given axis before the given indices and return
1018        a new `~astropy.table.MaskedColumn` object.
1019
1020        Parameters
1021        ----------
1022        obj : int, slice or sequence of ints
1023            Object that defines the index or indices before which ``values`` is
1024            inserted.
1025        values : array_like
1026            Value(s) to insert.  If the type of ``values`` is different
1027            from that of quantity, ``values`` is converted to the matching type.
1028            ``values`` should be shaped so that it can be broadcast appropriately
1029        mask : boolean array_like
1030            Mask value(s) to insert.  If not supplied then False is used.
1031
1032        Returns
1033        -------
1034        out : `~astropy.table.MaskedColumn`
1035            A copy of column with ``values`` and ``mask`` inserted.  Note that the
1036            insertion does not occur in-place: a new masked column is returned.
1037        """
1038        self_ma = self.data  # self viewed as MaskedArray
1039
1040        if self.dtype.kind == 'O':
1041            # Even if values is array-like (e.g. [1,2,3]), insert as a single
1042            # object.  Numpy.insert instead inserts each element in an array-like
1043            # input individually.
1044            new_data = np.insert(self_ma.data, obj, None, axis=0)
1045            new_data[obj] = values
1046        else:
1047            # Explicitly convert to dtype of this column.  Needed because numpy 1.7
1048            # enforces safe casting by default, so .  This isn't the case for 1.6 or 1.8+.
1049            values = np.asarray(values, dtype=self.dtype)
1050            new_data = np.insert(self_ma.data, obj, values, axis=0)
1051
1052        if mask is None:
1053            if self.dtype.kind == 'O':
1054                mask = False
1055            else:
1056                mask = np.zeros(values.shape, dtype=np.bool)
1057        new_mask = np.insert(self_ma.mask, obj, mask, axis=0)
1058        new_ma = np.ma.array(new_data, mask=new_mask, copy=False)
1059
1060        out = new_ma.view(self.__class__)
1061        out.parent_table = None
1062        out.indices = []
1063        out._copy_attrs(self)
1064
1065        return out
1066
1067    def _copy_attrs_slice(self, out):
1068        # Fixes issue #3023: when calling getitem with a MaskedArray subclass
1069        # the original object attributes are not copied.
1070        if out.__class__ is self.__class__:
1071            out.parent_table = None
1072            # we need this because __getitem__ does a shallow copy of indices
1073            if out.indices is self.indices:
1074                out.indices = []
1075            out._copy_attrs(self)
1076        return out
1077
1078    def __setitem__(self, index, value):
1079        # update indices
1080        self.info.adjust_indices(index, value, len(self))
1081        ma.MaskedArray.__setitem__(self, index, value)
1082
1083    def __setslice__(self, start, stop, value):
1084        # defers to __setitem__, so we don't adjust indices here
1085        ma.MaskedArray.__setslice__(self, start, stop, value)
1086
1087    # We do this to make the methods show up in the API docs
1088    name = BaseColumn.name
1089    copy = BaseColumn.copy
1090    more = BaseColumn.more
1091    pprint = BaseColumn.pprint
1092    pformat = BaseColumn.pformat
1093    convert_unit_to = BaseColumn.convert_unit_to