/astropy/table/column.py
Python | 1124 lines | 976 code | 48 blank | 100 comment | 32 complexity | efe0cd2c7440cbe9ebd828cb6e3dc4c2 MD5 | raw file
1# Licensed under a 3-clause BSD style license - see LICENSE.rst 2from __future__ import (absolute_import, division, print_function, 3 unicode_literals) 4from ..extern import six 5 6import weakref 7 8from copy import deepcopy 9 10import numpy as np 11from numpy import ma 12 13from ..units import Unit, Quantity 14from ..utils.compat import NUMPY_LT_1_8 15from ..utils.console import color_print 16from ..utils.metadata import MetaData 17from ..utils.data_info import BaseColumnInfo, InfoDescriptor, dtype_info_name 18from . import groups 19from . import pprint 20from .np_utils import fix_column_name 21 22from ..config import ConfigAlias 23 24 25AUTO_COLNAME = ConfigAlias( 26 '0.4', 'AUTO_COLNAME', 'auto_colname', 27 'astropy.table.column', 'astropy.table') 28 29# Create a generic TableFormatter object for use by bare columns with no 30# parent table. 31FORMATTER = pprint.TableFormatter() 32INTEGER_TYPES = (int, long, np.integer) if six.PY2 else (int, np.integer) 33 34def _auto_names(n_cols): 35 from . import conf 36 return [str(conf.auto_colname).format(i) for i in range(n_cols)] 37 38 39# list of one and two-dimensional comparison functions, which sometimes return 40# a Column class and sometimes a plain array. Used in __array_wrap__ to ensure 41# they only return plain (masked) arrays (see #1446 and #1685) 42_comparison_functions = set( 43 [np.greater, np.greater_equal, np.less, np.less_equal, 44 np.not_equal, np.equal, 45 np.isfinite, np.isinf, np.isnan, np.sign, np.signbit]) 46 47 48def col_copy(col): 49 """ 50 This is a mixin-safe version of Column.copy() (with copy_data=True). 51 """ 52 if isinstance(col, BaseColumn): 53 return col.copy() 54 55 # The new column should have None for the parent_table ref. If the 56 # original parent_table weakref there at the point of copying then it 57 # generates an infinite recursion. Instead temporarily remove the weakref 58 # on the original column and restore after the copy in an exception-safe 59 # manner. 60 61 parent_table = col.info.parent_table 62 col.info.parent_table = None 63 64 try: 65 newcol = col.copy() if hasattr(col, 'copy') else deepcopy(col) 66 newcol.info = col.info 67 finally: 68 col.info.parent_table = parent_table 69 70 return newcol 71 72 73class FalseArray(np.ndarray): 74 def __new__(cls, shape): 75 obj = np.zeros(shape, dtype=np.bool).view(cls) 76 return obj 77 78 def __setitem__(self, item, val): 79 val = np.asarray(val) 80 if np.any(val): 81 raise ValueError('Cannot set any element of {0} class to True' 82 .format(self.__class__.__name__)) 83 84 def __setslice__(self, start, stop, val): 85 val = np.asarray(val) 86 if np.any(val): 87 raise ValueError('Cannot set any element of {0} class to True' 88 .format(self.__class__.__name__)) 89 90 91class ColumnInfo(BaseColumnInfo): 92 attrs_from_parent = BaseColumnInfo.attr_names 93 94 95class _NDColumnProxyShim(np.ndarray): 96 """ 97 This mixin class exists solely to provide an override to 98 ndarray.__getitem__ that provides the desirable behavior for single 99 item gets on columns with multi-dimensional data types. The default 100 behavior from Numpy is to automatically view-cast these to the ndarray 101 subclass (i.e. Column), but the multi-dimensional array elements of 102 multi-dimensional columns are not, themselves, Columns. 103 104 This class is shimmed into a new class used for any BaseColumn instances 105 that contain multi-dimensional data via BaseColumn._get_nd_proxy_class 106 (this is also done explicitly in MaskedColumn.__new__ due to the 107 peculiarities of MaskedColumn). 108 """ 109 110 def __getitem__(self, item): 111 if isinstance(item, INTEGER_TYPES): 112 return self.data[item] # Return as plain ndarray or ma.MaskedArray 113 else: 114 return super(_NDColumnProxyShim, self).__getitem__(item) 115 116 117class BaseColumn(np.ndarray): 118 119 meta = MetaData() 120 121 _nd_proxy_classes = {} 122 """ 123 Alternate versions of BaseColumn and any subclasses that have the 124 _NDColumnProxyShim, mapped to by the original class. The shimmed 125 classes have the same name as the original class and are otherwise 126 indistinguishable. This hack exists only as a performance tweak. 127 """ 128 129 def __new__(cls, data=None, name=None, 130 dtype=None, shape=(), length=0, 131 description=None, unit=None, format=None, meta=None, copy=False): 132 133 if data is None: 134 dtype = (np.dtype(dtype).str, shape) 135 self_data = np.zeros(length, dtype=dtype) 136 elif isinstance(data, BaseColumn) and hasattr(data, '_name'): 137 # When unpickling a MaskedColumn, ``data`` will be a bare 138 # BaseColumn with none of the expected attributes. In this case 139 # do NOT execute this block which initializes from ``data`` 140 # attributes. 141 self_data = np.array(data.data, dtype=dtype, copy=copy) 142 if description is None: 143 description = data.description 144 if unit is None: 145 unit = unit or data.unit 146 if format is None: 147 format = data.format 148 if meta is None: 149 meta = deepcopy(data.meta) 150 if name is None: 151 name = data.name 152 elif isinstance(data, Quantity): 153 if unit is None: 154 self_data = np.array(data, dtype=dtype, copy=copy) 155 unit = data.unit 156 else: 157 self_data = np.array(data.to(unit), dtype=dtype, copy=copy) 158 if description is None: 159 description = data.info.description 160 if format is None: 161 format = data.info.format 162 if meta is None: 163 meta = deepcopy(data.info.meta) 164 165 else: 166 self_data = np.array(data, dtype=dtype, copy=copy) 167 168 cls = cls._get_nd_proxy_class(self_data) 169 170 self = self_data.view(cls) 171 self._name = fix_column_name(name) 172 self.unit = unit 173 self.format = format 174 self.description = description 175 self.meta = meta 176 self._parent_table = None 177 178 return self 179 180 @classmethod 181 def _get_nd_proxy_class(cls, data): 182 """ 183 Creates new classes with the _NDColumnProxyShim. See the docstring 184 for _NDColumnProxyShim for more detail. 185 186 The data argument should be the array data that will be held by the 187 column--this can be used to determine what proxy class to use if any at 188 all. 189 """ 190 191 if data.ndim < 2: 192 # We only this special proxy for columns whose individual elements 193 # are themselves arrays 194 return cls 195 196 if cls not in cls._nd_proxy_classes: 197 cls._nd_proxy_classes[cls] = type(cls.__name__, 198 (_NDColumnProxyShim, cls), {}) 199 return cls._nd_proxy_classes[cls] 200 201 @property 202 def data(self): 203 return self.view(np.ndarray) 204 205 @property 206 def parent_table(self): 207 if self._parent_table is None: 208 return None 209 else: 210 return self._parent_table() 211 212 @parent_table.setter 213 def parent_table(self, table): 214 if table is None: 215 self._parent_table = None 216 else: 217 self._parent_table = weakref.ref(table) 218 219 info = InfoDescriptor(ColumnInfo) 220 221 def copy(self, order='C', data=None, copy_data=True): 222 """ 223 Return a copy of the current instance. 224 225 If ``data`` is supplied then a view (reference) of ``data`` is used, 226 and ``copy_data`` is ignored. 227 228 Parameters 229 ---------- 230 order : {'C', 'F', 'A', 'K'}, optional 231 Controls the memory layout of the copy. 'C' means C-order, 232 'F' means F-order, 'A' means 'F' if ``a`` is Fortran contiguous, 233 'C' otherwise. 'K' means match the layout of ``a`` as closely 234 as possible. (Note that this function and :func:numpy.copy are very 235 similar, but have different default values for their order= 236 arguments.) Default is 'C'. 237 data : array, optional 238 If supplied then use a view of ``data`` instead of the instance 239 data. This allows copying the instance attributes and meta. 240 copy_data : bool, optional 241 Make a copy of the internal numpy array instead of using a 242 reference. Default is True. 243 244 Returns 245 ------- 246 col : Column or MaskedColumn 247 Copy of the current column (same type as original) 248 """ 249 if data is None: 250 data = self.data 251 if copy_data: 252 data = data.copy(order) 253 254 out = data.view(self.__class__) 255 out.__array_finalize__(self) 256 # for MaskedColumn, MaskedArray.__array_finalize__ also copies mask 257 # from self, which is not the idea here, so undo 258 if isinstance(self, MaskedColumn): 259 out._mask = data._mask 260 261 self._copy_groups(out) 262 263 return out 264 265 def __setstate__(self, state): 266 """ 267 Restore the internal state of the Column/MaskedColumn for pickling 268 purposes. This requires that the last element of ``state`` is a 269 5-tuple that has Column-specific state values. 270 """ 271 # Get the Column attributes and meta 272 name, unit, format, description, meta = state[-1] 273 state = state[:-1] 274 275 # Using super(type(self), self).__setstate__() gives an infinite 276 # recursion. Manually call the right super class to actually set up 277 # the array object. 278 super_class = ma.MaskedArray if isinstance(self, ma.MaskedArray) else np.ndarray 279 super_class.__setstate__(self, state) 280 281 # Set the Column attributes and meta 282 self._name = name 283 self.unit = unit 284 self.format = format 285 self.description = description 286 self.meta = meta 287 288 def __reduce__(self): 289 """ 290 Return a 3-tuple for pickling a Column. Use the super-class 291 functionality but then add in a 5-tuple of Column-specific values 292 that get used in __setstate__. 293 """ 294 super_class = ma.MaskedArray if isinstance(self, ma.MaskedArray) else np.ndarray 295 reconstruct_func, reconstruct_func_args, state = super_class.__reduce__(self) 296 297 # Define Column-specific attrs and meta that gets added to state. 298 column_state = (self.name, self.unit, self.format, self.description, 299 self.meta) 300 state = state + (column_state,) 301 302 return reconstruct_func, reconstruct_func_args, state 303 304 # avoid == and != to be done based on type of subclass 305 # (helped solve #1446; see also __array_wrap__) 306 def __eq__(self, other): 307 return self.data.__eq__(other) 308 309 def __ne__(self, other): 310 return self.data.__ne__(other) 311 312 def __array_finalize__(self, obj): 313 # Obj will be none for direct call to Column() creator 314 if obj is None: 315 return 316 317 if six.callable(super(BaseColumn, self).__array_finalize__): 318 super(BaseColumn, self).__array_finalize__(obj) 319 320 # Self was created from template (e.g. obj[slice] or (obj * 2)) 321 # or viewcast e.g. obj.view(Column). In either case we want to 322 # init Column attributes for self from obj if possible. 323 self.parent_table = None 324 self._copy_attrs(obj) 325 326 def __array_wrap__(self, out_arr, context=None): 327 """ 328 __array_wrap__ is called at the end of every ufunc. 329 330 Normally, we want a Column object back and do not have to do anything 331 special. But there are two exceptions: 332 333 1) If the output shape is different (e.g. for reduction ufuncs 334 like sum() or mean()), a Column still linking to a parent_table 335 makes little sense, so we return the output viewed as the 336 column content (ndarray or MaskedArray). 337 For this case, we use "[()]" to select everything, and to ensure we 338 convert a zero rank array to a scalar. (For some reason np.sum() 339 returns a zero rank scalar array while np.mean() returns a scalar; 340 So the [()] is needed for this case. 341 342 2) When the output is created by any function that returns a boolean 343 we also want to consistently return an array rather than a column 344 (see #1446 and #1685) 345 """ 346 out_arr = super(BaseColumn, self).__array_wrap__(out_arr, context) 347 if (self.shape != out_arr.shape or 348 (isinstance(out_arr, BaseColumn) and 349 (context is not None and context[0] in _comparison_functions))): 350 return out_arr.data[()] 351 else: 352 return out_arr 353 354 @property 355 def name(self): 356 """ 357 The name of this column. 358 """ 359 return self._name 360 361 @name.setter 362 def name(self, val): 363 val = fix_column_name(val) 364 365 if self.parent_table is not None: 366 table = self.parent_table 367 table.columns._rename_column(self.name, val) 368 369 self._name = val 370 371 @property 372 def descr(self): 373 """Array-interface compliant full description of the column. 374 375 This returns a 3-tuple (name, type, shape) that can always be 376 used in a structured array dtype definition. 377 """ 378 return (self.name, self.dtype.str, self.shape[1:]) 379 380 def iter_str_vals(self): 381 """ 382 Return an iterator that yields the string-formatted values of this 383 column. 384 385 Returns 386 ------- 387 str_vals : iterator 388 Column values formatted as strings 389 """ 390 # Iterate over formatted values with no max number of lines, no column 391 # name, no unit, and ignoring the returned header info in outs. 392 _pformat_col_iter = self._formatter._pformat_col_iter 393 for str_val in _pformat_col_iter(self, -1, show_name=False, show_unit=False, 394 show_dtype=False, outs={}): 395 yield str_val 396 397 def attrs_equal(self, col): 398 """Compare the column attributes of ``col`` to this object. 399 400 The comparison attributes are: ``name``, ``unit``, ``dtype``, 401 ``format``, ``description``, and ``meta``. 402 403 Parameters 404 ---------- 405 col : Column 406 Comparison column 407 408 Returns 409 ------- 410 equal : boolean 411 True if all attributes are equal 412 """ 413 if not isinstance(col, BaseColumn): 414 raise ValueError('Comparison `col` must be a Column or ' 415 'MaskedColumn object') 416 417 attrs = ('name', 'unit', 'dtype', 'format', 'description', 'meta') 418 equal = all(getattr(self, x) == getattr(col, x) for x in attrs) 419 420 return equal 421 422 @property 423 def _formatter(self): 424 return FORMATTER if (self.parent_table is None) else self.parent_table.formatter 425 426 def pformat(self, max_lines=None, show_name=True, show_unit=False, show_dtype=False, 427 html=False): 428 """Return a list of formatted string representation of column values. 429 430 If no value of ``max_lines`` is supplied then the height of the 431 screen terminal is used to set ``max_lines``. If the terminal 432 height cannot be determined then the default will be 433 determined using the ``astropy.conf.max_lines`` configuration 434 item. If a negative value of ``max_lines`` is supplied then 435 there is no line limit applied. 436 437 Parameters 438 ---------- 439 max_lines : int 440 Maximum lines of output (header + data rows) 441 442 show_name : bool 443 Include column name (default=True) 444 445 show_unit : bool 446 Include a header row for unit (default=False) 447 448 show_dtype : bool 449 Include column dtype (default=False) 450 451 html : bool 452 Format the output as an HTML table (default=False) 453 454 Returns 455 ------- 456 lines : list 457 List of lines with header and formatted column values 458 459 """ 460 _pformat_col = self._formatter._pformat_col 461 lines, outs = _pformat_col(self, max_lines, show_name=show_name, 462 show_unit=show_unit, show_dtype=show_dtype, 463 html=html) 464 return lines 465 466 def pprint(self, max_lines=None, show_name=True, show_unit=False, show_dtype=False): 467 """Print a formatted string representation of column values. 468 469 If no value of ``max_lines`` is supplied then the height of the 470 screen terminal is used to set ``max_lines``. If the terminal 471 height cannot be determined then the default will be 472 determined using the ``astropy.conf.max_lines`` configuration 473 item. If a negative value of ``max_lines`` is supplied then 474 there is no line limit applied. 475 476 Parameters 477 ---------- 478 max_lines : int 479 Maximum number of values in output 480 481 show_name : bool 482 Include column name (default=True) 483 484 show_unit : bool 485 Include a header row for unit (default=False) 486 487 show_dtype : bool 488 Include column dtype (default=True) 489 """ 490 _pformat_col = self._formatter._pformat_col 491 lines, outs = _pformat_col(self, max_lines, show_name=show_name, show_unit=show_unit, 492 show_dtype=show_dtype) 493 494 n_header = outs['n_header'] 495 for i, line in enumerate(lines): 496 if i < n_header: 497 color_print(line, 'red') 498 else: 499 print(line) 500 501 def more(self, max_lines=None, show_name=True, show_unit=False): 502 """Interactively browse column with a paging interface. 503 504 Supported keys:: 505 506 f, <space> : forward one page 507 b : back one page 508 r : refresh same page 509 n : next row 510 p : previous row 511 < : go to beginning 512 > : go to end 513 q : quit browsing 514 h : print this help 515 516 Parameters 517 ---------- 518 max_lines : int 519 Maximum number of lines in table output 520 521 show_name : bool 522 Include a header row for column names (default=True) 523 524 show_unit : bool 525 Include a header row for unit (default=False) 526 527 """ 528 _more_tabcol = self._formatter._more_tabcol 529 _more_tabcol(self, max_lines=max_lines, show_name=show_name, 530 show_unit=show_unit) 531 532 @property 533 def unit(self): 534 """ 535 The unit associated with this column. May be a string or a 536 `astropy.units.UnitBase` instance. 537 538 Setting the ``unit`` property does not change the values of the 539 data. To perform a unit conversion, use ``convert_unit_to``. 540 """ 541 return self._unit 542 543 @unit.setter 544 def unit(self, unit): 545 if unit is None: 546 self._unit = None 547 else: 548 self._unit = Unit(unit, parse_strict='silent') 549 550 @unit.deleter 551 def unit(self): 552 self._unit = None 553 554 def convert_unit_to(self, new_unit, equivalencies=[]): 555 """ 556 Converts the values of the column in-place from the current 557 unit to the given unit. 558 559 To change the unit associated with this column without 560 actually changing the data values, simply set the ``unit`` 561 property. 562 563 Parameters 564 ---------- 565 new_unit : str or `astropy.units.UnitBase` instance 566 The unit to convert to. 567 568 equivalencies : list of equivalence pairs, optional 569 A list of equivalence pairs to try if the unit are not 570 directly convertible. See :ref:`unit_equivalencies`. 571 572 Raises 573 ------ 574 astropy.units.UnitsError 575 If units are inconsistent 576 """ 577 if self.unit is None: 578 raise ValueError("No unit set on column") 579 self.data[:] = self.unit.to( 580 new_unit, self.data, equivalencies=equivalencies) 581 self.unit = new_unit 582 583 @property 584 def groups(self): 585 if not hasattr(self, '_groups'): 586 self._groups = groups.ColumnGroups(self) 587 return self._groups 588 589 def group_by(self, keys): 590 """ 591 Group this column by the specified ``keys`` 592 593 This effectively splits the column into groups which correspond to 594 unique values of the ``keys`` grouping object. The output is a new 595 `Column` or `MaskedColumn` which contains a copy of this column but 596 sorted by row according to ``keys``. 597 598 The ``keys`` input to ``group_by`` must be a numpy array with the 599 same length as this column. 600 601 Parameters 602 ---------- 603 keys : numpy array 604 Key grouping object 605 606 Returns 607 ------- 608 out : Column 609 New column with groups attribute set accordingly 610 """ 611 return groups.column_group_by(self, keys) 612 613 def _copy_groups(self, out): 614 """ 615 Copy current groups into a copy of self ``out`` 616 """ 617 if self.parent_table: 618 if hasattr(self.parent_table, '_groups'): 619 out._groups = groups.ColumnGroups(out, indices=self.parent_table._groups._indices) 620 elif hasattr(self, '_groups'): 621 out._groups = groups.ColumnGroups(out, indices=self._groups._indices) 622 623 # Strip off the BaseColumn-ness for repr and str so that 624 # MaskedColumn.data __repr__ does not include masked_BaseColumn(data = 625 # [1 2], ...). 626 def __repr__(self): 627 return np.asarray(self).__repr__() 628 629 @property 630 def quantity(self): 631 """ 632 A view of this table column as a `~astropy.units.Quantity` object with 633 units given by the Column's `unit` parameter. 634 """ 635 # the Quantity initializer is used here because it correctly fails 636 # if the column's values are non-numeric (like strings), while .view 637 # will happily return a quantity with gibberish for numerical values 638 return Quantity(self, copy=False, dtype=self.dtype, order='A') 639 640 def to(self, unit, equivalencies=[], **kwargs): 641 """ 642 Converts this table column to a `~astropy.units.Quantity` object with 643 the requested units. 644 645 Parameters 646 ---------- 647 unit : `~astropy.units.Unit` or str 648 The unit to convert to (i.e., a valid argument to the 649 :meth:`astropy.units.Quantity.to` method). 650 equivalencies : list of equivalence pairs, optional 651 Equivalencies to use for this conversion. See 652 :meth:`astropy.units.Quantity.to` for more details. 653 654 Returns 655 ------- 656 quantity : `~astropy.units.Quantity` 657 A quantity object with the contents of this column in the units 658 ``unit``. 659 """ 660 return self.quantity.to(unit, equivalencies) 661 662 def _copy_attrs(self, obj): 663 """ 664 Copy key column attributes from ``obj`` to self 665 """ 666 for attr in ('name', 'unit', 'format', 'description'): 667 val = getattr(obj, attr, None) 668 setattr(self, attr, val) 669 self.meta = deepcopy(getattr(obj, 'meta', {})) 670 671 672class Column(BaseColumn): 673 """Define a data column for use in a Table object. 674 675 Parameters 676 ---------- 677 data : list, ndarray or None 678 Column data values 679 name : str 680 Column name and key for reference within Table 681 dtype : numpy.dtype compatible value 682 Data type for column 683 shape : tuple or () 684 Dimensions of a single row element in the column data 685 length : int or 0 686 Number of row elements in column data 687 description : str or None 688 Full description of column 689 unit : str or None 690 Physical unit 691 format : str or None or function or callable 692 Format string for outputting column values. This can be an 693 "old-style" (``format % value``) or "new-style" (`str.format`) 694 format specification string or a function or any callable object that 695 accepts a single value and returns a string. 696 meta : dict-like or None 697 Meta-data associated with the column 698 699 Examples 700 -------- 701 A Column can be created in two different ways: 702 703 - Provide a ``data`` value but not ``shape`` or ``length`` (which are 704 inferred from the data). 705 706 Examples:: 707 708 col = Column(data=[1, 2], name='name') # shape=(2,) 709 col = Column(data=[[1, 2], [3, 4]], name='name') # shape=(2, 2) 710 col = Column(data=[1, 2], name='name', dtype=float) 711 col = Column(data=np.array([1, 2]), name='name') 712 col = Column(data=['hello', 'world'], name='name') 713 714 The ``dtype`` argument can be any value which is an acceptable 715 fixed-size data-type initializer for the numpy.dtype() method. See 716 `<http://docs.scipy.org/doc/numpy/reference/arrays.dtypes.html>`_. 717 Examples include: 718 719 - Python non-string type (float, int, bool) 720 - Numpy non-string type (e.g. np.float32, np.int64, np.bool) 721 - Numpy.dtype array-protocol type strings (e.g. 'i4', 'f8', 'S15') 722 723 If no ``dtype`` value is provide then the type is inferred using 724 ``np.array(data)``. 725 726 - Provide ``length`` and optionally ``shape``, but not ``data`` 727 728 Examples:: 729 730 col = Column(name='name', length=5) 731 col = Column(name='name', dtype=int, length=10, shape=(3,4)) 732 733 The default ``dtype`` is ``np.float64``. The ``shape`` argument is the 734 array shape of a single cell in the column. 735 """ 736 737 def __new__(cls, data=None, name=None, 738 dtype=None, shape=(), length=0, 739 description=None, unit=None, format=None, meta=None, copy=False): 740 741 if isinstance(data, MaskedColumn) and np.any(data.mask): 742 raise TypeError("Cannot convert a MaskedColumn with masked value to a Column") 743 744 self = super(Column, cls).__new__(cls, data=data, name=name, dtype=dtype, 745 shape=shape, length=length, description=description, 746 unit=unit, format=format, meta=meta, copy=copy) 747 return self 748 749 def _base_repr_(self, html=False): 750 # If scalar then just convert to correct numpy type and use numpy repr 751 if self.ndim == 0: 752 return repr(self.item()) 753 754 descr_vals = [self.__class__.__name__] 755 unit = None if self.unit is None else str(self.unit) 756 shape = None if self.ndim <= 1 else self.shape[1:] 757 for attr, val in (('name', self.name), 758 ('dtype', dtype_info_name(self.dtype)), 759 ('shape', shape), 760 ('unit', unit), 761 ('format', self.format), 762 ('description', self.description), 763 ('length', len(self))): 764 765 if val is not None: 766 descr_vals.append('{0}={1}'.format(attr, repr(val))) 767 768 descr = '<' + ' '.join(descr_vals) + '>\n' 769 770 if html: 771 from ..utils.xml.writer import xml_escape 772 descr = xml_escape(descr) 773 774 data_lines, outs = self._formatter._pformat_col( 775 self, show_name=False, show_unit=False, show_length=False, html=html) 776 777 out = descr + '\n'.join(data_lines) 778 if six.PY2 and isinstance(out, six.text_type): 779 out = out.encode('utf-8') 780 781 return out 782 783 def _repr_html_(self): 784 return self._base_repr_(html=True) 785 786 def __repr__(self): 787 return self._base_repr_(html=False) 788 789 def __unicode__(self): 790 # If scalar then just convert to correct numpy type and use numpy repr 791 if self.ndim == 0: 792 return str(self.item()) 793 794 lines, outs = self._formatter._pformat_col(self) 795 return '\n'.join(lines) 796 if six.PY3: 797 __str__ = __unicode__ 798 799 def __bytes__(self): 800 return six.text_type(self).encode('utf-8') 801 if six.PY2: 802 __str__ = __bytes__ 803 804 # Set items using a view of the underlying data, as it gives an 805 # order-of-magnitude speed-up. [#2994] 806 def __setitem__(self, index, value): 807 self.data[index] = value 808 809 # # Set slices using a view of the underlying data, as it gives an 810 # # order-of-magnitude speed-up. Only gets called in Python 2. [#3020] 811 def __setslice__(self, start, stop, value): 812 self.data.__setslice__(start, stop, value) 813 814 def insert(self, obj, values): 815 """ 816 Insert values before the given indices in the column and return 817 a new `~astropy.table.Column` object. 818 819 Parameters 820 ---------- 821 obj : int, slice or sequence of ints 822 Object that defines the index or indices before which ``values`` is 823 inserted. 824 values : array_like 825 Value(s) to insert. If the type of ``values`` is different 826 from that of quantity, ``values`` is converted to the matching type. 827 ``values`` should be shaped so that it can be broadcast appropriately 828 829 Returns 830 ------- 831 out : `~astropy.table.Column` 832 A copy of column with ``values`` and ``mask`` inserted. Note that the 833 insertion does not occur in-place: a new column is returned. 834 """ 835 if self.dtype.kind == 'O': 836 # Even if values is array-like (e.g. [1,2,3]), insert as a single 837 # object. Numpy.insert instead inserts each element in an array-like 838 # input individually. 839 data = np.insert(self, obj, None, axis=0) 840 data[obj] = values 841 else: 842 # Explicitly convert to dtype of this column. Needed because numpy 1.7 843 # enforces safe casting by default, so . This isn't the case for 1.6 or 1.8+. 844 values = np.asarray(values, dtype=self.dtype) 845 data = np.insert(self, obj, values, axis=0) 846 out = data.view(self.__class__) 847 out.__array_finalize__(self) 848 return out 849 850 # We do this to make the methods show up in the API docs 851 name = BaseColumn.name 852 unit = BaseColumn.unit 853 copy = BaseColumn.copy 854 more = BaseColumn.more 855 pprint = BaseColumn.pprint 856 pformat = BaseColumn.pformat 857 convert_unit_to = BaseColumn.convert_unit_to 858 quantity = BaseColumn.quantity 859 to = BaseColumn.to 860 861 862class MaskedColumn(Column, ma.MaskedArray): 863 """Define a masked data column for use in a Table object. 864 865 Parameters 866 ---------- 867 data : list, ndarray or None 868 Column data values 869 name : str 870 Column name and key for reference within Table 871 mask : list, ndarray or None 872 Boolean mask for which True indicates missing or invalid data 873 fill_value : float, int, str or None 874 Value used when filling masked column elements 875 dtype : numpy.dtype compatible value 876 Data type for column 877 shape : tuple or () 878 Dimensions of a single row element in the column data 879 length : int or 0 880 Number of row elements in column data 881 description : str or None 882 Full description of column 883 unit : str or None 884 Physical unit 885 format : str or None or function or callable 886 Format string for outputting column values. This can be an 887 "old-style" (``format % value``) or "new-style" (`str.format`) 888 format specification string or a function or any callable object that 889 accepts a single value and returns a string. 890 meta : dict-like or None 891 Meta-data associated with the column 892 893 Examples 894 -------- 895 A MaskedColumn is similar to a Column except that it includes ``mask`` and 896 ``fill_value`` attributes. It can be created in two different ways: 897 898 - Provide a ``data`` value but not ``shape`` or ``length`` (which are 899 inferred from the data). 900 901 Examples:: 902 903 col = MaskedColumn(data=[1, 2], name='name') 904 col = MaskedColumn(data=[1, 2], name='name', mask=[True, False]) 905 col = MaskedColumn(data=[1, 2], name='name', dtype=float, fill_value=99) 906 907 The ``mask`` argument will be cast as a boolean array and specifies 908 which elements are considered to be missing or invalid. 909 910 The ``dtype`` argument can be any value which is an acceptable 911 fixed-size data-type initializer for the numpy.dtype() method. See 912 `<http://docs.scipy.org/doc/numpy/reference/arrays.dtypes.html>`_. 913 Examples include: 914 915 - Python non-string type (float, int, bool) 916 - Numpy non-string type (e.g. np.float32, np.int64, np.bool) 917 - Numpy.dtype array-protocol type strings (e.g. 'i4', 'f8', 'S15') 918 919 If no ``dtype`` value is provide then the type is inferred using 920 ``np.array(data)``. When ``data`` is provided then the ``shape`` 921 and ``length`` arguments are ignored. 922 923 - Provide ``length`` and optionally ``shape``, but not ``data`` 924 925 Examples:: 926 927 col = MaskedColumn(name='name', length=5) 928 col = MaskedColumn(name='name', dtype=int, length=10, shape=(3,4)) 929 930 The default ``dtype`` is ``np.float64``. The ``shape`` argument is the 931 array shape of a single cell in the column. 932 """ 933 934 def __new__(cls, data=None, name=None, mask=None, fill_value=None, 935 dtype=None, shape=(), length=0, 936 description=None, unit=None, format=None, meta=None, copy=False): 937 938 if mask is None and hasattr(data, 'mask'): 939 mask = data.mask 940 else: 941 mask = deepcopy(mask) 942 943 # Create self using MaskedArray as a wrapper class, following the example of 944 # class MSubArray in 945 # https://github.com/numpy/numpy/blob/maintenance/1.8.x/numpy/ma/tests/test_subclassing.py 946 # This pattern makes it so that __array_finalize__ is called as expected (e.g. #1471 and 947 # https://github.com/astropy/astropy/commit/ff6039e8) 948 949 # First just pass through all args and kwargs to BaseColumn, then wrap that object 950 # with MaskedArray. 951 self_data = BaseColumn(data, dtype=dtype, shape=shape, length=length, name=name, 952 unit=unit, format=format, description=description, meta=meta, copy=copy) 953 954 cls = cls._get_nd_proxy_class(self_data) 955 956 self = ma.MaskedArray.__new__(cls, data=self_data, mask=mask) 957 958 # Note: do not set fill_value in the MaskedArray constructor because this does not 959 # go through the fill_value workarounds (see _fix_fill_value below). 960 if fill_value is None and hasattr(data, 'fill_value'): 961 fill_value = data.fill_value 962 self.fill_value = fill_value 963 964 self.parent_table = None 965 966 return self 967 968 def _fix_fill_value(self, val): 969 """Fix a fill value (if needed) to work around a bug with setting the fill 970 value of a string array in MaskedArray with Python 3.x. See 971 https://github.com/numpy/numpy/pull/2733. This mimics the check in 972 numpy.ma.core._check_fill_value() (version < 1.8) which incorrectly sets 973 fill_value to a default if self.dtype.char is 'U' (which is the case for Python 974 3). Here we change the string to a byte string so that in Python 3 the 975 isinstance(val, basestring) part fails. 976 """ 977 978 if (NUMPY_LT_1_8 and isinstance(val, six.string_types) and 979 (self.dtype.char not in 'SV')): 980 val = val.encode() 981 return val 982 983 @property 984 def fill_value(self): 985 return self.get_fill_value() # defer to native ma.MaskedArray method 986 987 @fill_value.setter 988 def fill_value(self, val): 989 """Set fill value both in the masked column view and in the parent table 990 if it exists. Setting one or the other alone doesn't work.""" 991 val = self._fix_fill_value(val) 992 993 # Yet another ma bug workaround: If the value of fill_value for a string array is 994 # requested but not yet set then it gets created as 'N/A'. From this point onward 995 # any new fill_values are truncated to 3 characters. Note that this does not 996 # occur if the masked array is a structured array (as in the previous block that 997 # deals with the parent table). 998 # 999 # >>> x = ma.array(['xxxx']) 1000 # >>> x.fill_value # fill_value now gets represented as an 'S3' array 1001 # 'N/A' 1002 # >>> x.fill_value='yyyy' 1003 # >>> x.fill_value 1004 # 'yyy' 1005 # 1006 # To handle this we are forced to reset a private variable first: 1007 self._fill_value = None 1008 1009 self.set_fill_value(val) # defer to native ma.MaskedArray method 1010 1011 @property 1012 def data(self): 1013 out = self.view(ma.MaskedArray) 1014 # The following is necessary because of a bug in Numpy, which was 1015 # fixed in numpy/numpy#2703. The fix should be included in Numpy 1.8.0. 1016 out.fill_value = self.fill_value 1017 return out 1018 1019 def filled(self, fill_value=None): 1020 """Return a copy of self, with masked values filled with a given value. 1021 1022 Parameters 1023 ---------- 1024 fill_value : scalar; optional 1025 The value to use for invalid entries (`None` by default). If 1026 `None`, the ``fill_value`` attribute of the array is used 1027 instead. 1028 1029 Returns 1030 ------- 1031 filled_column : Column 1032 A copy of ``self`` with masked entries replaced by `fill_value` 1033 (be it the function argument or the attribute of ``self``). 1034 """ 1035 if fill_value is None: 1036 fill_value = self.fill_value 1037 fill_value = self._fix_fill_value(fill_value) 1038 1039 data = super(MaskedColumn, self).filled(fill_value) 1040 # Use parent table definition of Column if available 1041 column_cls = self.parent_table.Column if (self.parent_table is not None) else Column 1042 out = column_cls(name=self.name, data=data, unit=self.unit, 1043 format=self.format, description=self.description, 1044 meta=deepcopy(self.meta)) 1045 return out 1046 1047 def insert(self, obj, values, mask=None): 1048 """ 1049 Insert values along the given axis before the given indices and return 1050 a new `~astropy.table.MaskedColumn` object. 1051 1052 Parameters 1053 ---------- 1054 obj : int, slice or sequence of ints 1055 Object that defines the index or indices before which ``values`` is 1056 inserted. 1057 values : array_like 1058 Value(s) to insert. If the type of ``values`` is different 1059 from that of quantity, ``values`` is converted to the matching type. 1060 ``values`` should be shaped so that it can be broadcast appropriately 1061 mask : boolean array_like 1062 Mask value(s) to insert. If not supplied then False is used. 1063 1064 Returns 1065 ------- 1066 out : `~astropy.table.MaskedColumn` 1067 A copy of column with ``values`` and ``mask`` inserted. Note that the 1068 insertion does not occur in-place: a new masked column is returned. 1069 """ 1070 self_ma = self.data # self viewed as MaskedArray 1071 1072 if self.dtype.kind == 'O': 1073 # Even if values is array-like (e.g. [1,2,3]), insert as a single 1074 # object. Numpy.insert instead inserts each element in an array-like 1075 # input individually. 1076 new_data = np.insert(self_ma.data, obj, None, axis=0) 1077 new_data[obj] = values 1078 else: 1079 # Explicitly convert to dtype of this column. Needed because numpy 1.7 1080 # enforces safe casting by default, so . This isn't the case for 1.6 or 1.8+. 1081 values = np.asarray(values, dtype=self.dtype) 1082 new_data = np.insert(self_ma.data, obj, values, axis=0) 1083 1084 if mask is None: 1085 if self.dtype.kind == 'O': 1086 mask = False 1087 else: 1088 mask = np.zeros(values.shape, dtype=np.bool) 1089 new_mask = np.insert(self_ma.mask, obj, mask, axis=0) 1090 new_ma = np.ma.array(new_data, mask=new_mask, copy=False) 1091 1092 out = new_ma.view(self.__class__) 1093 out.parent_table = None 1094 out._copy_attrs(self) 1095 1096 return out 1097 1098 def __getitem__(self, item): 1099 out = super(MaskedColumn, self).__getitem__(item) 1100 1101 # Fixes issue #3023: when calling getitem with a MaskedArray subclass 1102 # the original object attributes are not copied. 1103 if out.__class__ is self.__class__: 1104 out.parent_table = None 1105 out._copy_attrs(self) 1106 1107 return out 1108 1109 # Set items and slices using MaskedArray method, instead of falling through 1110 # to the (faster) Column version which uses an ndarray view. This doesn't 1111 # copy the mask properly. See test_setting_from_masked_column test. 1112 def __setitem__(self, index, value): 1113 ma.MaskedArray.__setitem__(self, index, value) 1114 1115 def __setslice__(self, start, stop, value): 1116 ma.MaskedArray.__setslice__(self, start, stop, value) 1117 1118 # We do this to make the methods show up in the API docs 1119 name = BaseColumn.name 1120 copy = BaseColumn.copy 1121 more = BaseColumn.more 1122 pprint = BaseColumn.pprint 1123 pformat = BaseColumn.pformat 1124 convert_unit_to = BaseColumn.convert_unit_to