/astropy/table/column.py
Python | 1093 lines | 983 code | 38 blank | 72 comment | 36 complexity | 26eaba65d39d05bac3dc740e4f284a50 MD5 | raw file
1# Licensed under a 3-clause BSD style license - see LICENSE.rst 2from __future__ import (absolute_import, division, print_function, 3 unicode_literals) 4from ..extern import six 5 6import weakref 7 8from copy import deepcopy 9 10import numpy as np 11from numpy import ma 12 13from ..units import Unit, Quantity 14from ..utils.compat import NUMPY_LT_1_8 15from ..utils.console import color_print 16from ..utils.metadata import MetaData 17from ..utils.data_info import BaseColumnInfo, dtype_info_name 18from . import groups 19from . import pprint 20from .np_utils import fix_column_name 21 22# These "shims" provide __getitem__ implementations for Column and MaskedColumn 23from ._column_mixins import _ColumnGetitemShim, _MaskedColumnGetitemShim 24 25 26# Create a generic TableFormatter object for use by bare columns with no 27# parent table. 28FORMATTER = pprint.TableFormatter() 29INTEGER_TYPES = (int, long, np.integer) if six.PY2 else (int, np.integer) 30 31def _auto_names(n_cols): 32 from . import conf 33 return [str(conf.auto_colname).format(i) for i in range(n_cols)] 34 35 36# list of one and two-dimensional comparison functions, which sometimes return 37# a Column class and sometimes a plain array. Used in __array_wrap__ to ensure 38# they only return plain (masked) arrays (see #1446 and #1685) 39_comparison_functions = set( 40 [np.greater, np.greater_equal, np.less, np.less_equal, 41 np.not_equal, np.equal, 42 np.isfinite, np.isinf, np.isnan, np.sign, np.signbit]) 43 44 45def col_copy(col, copy_indices=True): 46 """ 47 This is a mixin-safe version of Column.copy() (with copy_data=True). 48 """ 49 if isinstance(col, BaseColumn): 50 return col.copy() 51 52 # The new column should have None for the parent_table ref. If the 53 # original parent_table weakref there at the point of copying then it 54 # generates an infinite recursion. Instead temporarily remove the weakref 55 # on the original column and restore after the copy in an exception-safe 56 # manner. 57 58 parent_table = col.info.parent_table 59 indices = col.info.indices 60 col.info.parent_table = None 61 col.info.indices = [] 62 63 try: 64 newcol = col.copy() if hasattr(col, 'copy') else deepcopy(col) 65 newcol.info = col.info 66 newcol.info.indices = deepcopy(indices or []) if copy_indices else [] 67 for index in newcol.info.indices: 68 index.replace_col(col, newcol) 69 finally: 70 col.info.parent_table = parent_table 71 col.info.indices = indices 72 73 return newcol 74 75 76class FalseArray(np.ndarray): 77 def __new__(cls, shape): 78 obj = np.zeros(shape, dtype=np.bool).view(cls) 79 return obj 80 81 def __setitem__(self, item, val): 82 val = np.asarray(val) 83 if np.any(val): 84 raise ValueError('Cannot set any element of {0} class to True' 85 .format(self.__class__.__name__)) 86 87 def __setslice__(self, start, stop, val): 88 val = np.asarray(val) 89 if np.any(val): 90 raise ValueError('Cannot set any element of {0} class to True' 91 .format(self.__class__.__name__)) 92 93 94class ColumnInfo(BaseColumnInfo): 95 attrs_from_parent = BaseColumnInfo.attr_names 96 _supports_indexing = True 97 98 99class BaseColumn(_ColumnGetitemShim, np.ndarray): 100 101 meta = MetaData() 102 103 def __new__(cls, data=None, name=None, 104 dtype=None, shape=(), length=0, 105 description=None, unit=None, format=None, meta=None, 106 copy=False, copy_indices=True): 107 if data is None: 108 dtype = (np.dtype(dtype).str, shape) 109 self_data = np.zeros(length, dtype=dtype) 110 elif isinstance(data, BaseColumn) and hasattr(data, '_name'): 111 # When unpickling a MaskedColumn, ``data`` will be a bare 112 # BaseColumn with none of the expected attributes. In this case 113 # do NOT execute this block which initializes from ``data`` 114 # attributes. 115 self_data = np.array(data.data, dtype=dtype, copy=copy) 116 if description is None: 117 description = data.description 118 if unit is None: 119 unit = unit or data.unit 120 if format is None: 121 format = data.format 122 if meta is None: 123 meta = deepcopy(data.meta) 124 if name is None: 125 name = data.name 126 elif isinstance(data, Quantity): 127 if unit is None: 128 self_data = np.array(data, dtype=dtype, copy=copy) 129 unit = data.unit 130 else: 131 self_data = np.array(data.to(unit), dtype=dtype, copy=copy) 132 if description is None: 133 description = data.info.description 134 if format is None: 135 format = data.info.format 136 if meta is None: 137 meta = deepcopy(data.info.meta) 138 139 else: 140 self_data = np.array(data, dtype=dtype, copy=copy) 141 142 self = self_data.view(cls) 143 self._name = fix_column_name(name) 144 self.unit = unit 145 self.format = format 146 self.description = description 147 self.meta = meta 148 self._parent_table = None 149 self.indices = deepcopy(getattr(data, 'indices', [])) if \ 150 copy_indices else [] 151 for index in self.indices: 152 index.replace_col(data, self) 153 154 return self 155 156 @property 157 def data(self): 158 return self.view(np.ndarray) 159 160 @property 161 def parent_table(self): 162 if self._parent_table is None: 163 return None 164 else: 165 return self._parent_table() 166 167 @parent_table.setter 168 def parent_table(self, table): 169 if table is None: 170 self._parent_table = None 171 else: 172 self._parent_table = weakref.ref(table) 173 174 info = ColumnInfo() 175 176 def copy(self, order='C', data=None, copy_data=True): 177 """ 178 Return a copy of the current instance. 179 180 If ``data`` is supplied then a view (reference) of ``data`` is used, 181 and ``copy_data`` is ignored. 182 183 Parameters 184 ---------- 185 order : {'C', 'F', 'A', 'K'}, optional 186 Controls the memory layout of the copy. 'C' means C-order, 187 'F' means F-order, 'A' means 'F' if ``a`` is Fortran contiguous, 188 'C' otherwise. 'K' means match the layout of ``a`` as closely 189 as possible. (Note that this function and :func:numpy.copy are very 190 similar, but have different default values for their order= 191 arguments.) Default is 'C'. 192 data : array, optional 193 If supplied then use a view of ``data`` instead of the instance 194 data. This allows copying the instance attributes and meta. 195 copy_data : bool, optional 196 Make a copy of the internal numpy array instead of using a 197 reference. Default is True. 198 199 Returns 200 ------- 201 col : Column or MaskedColumn 202 Copy of the current column (same type as original) 203 """ 204 if data is None: 205 data = self.data 206 if copy_data: 207 data = data.copy(order) 208 209 out = data.view(self.__class__) 210 out.__array_finalize__(self) 211 # for MaskedColumn, MaskedArray.__array_finalize__ also copies mask 212 # from self, which is not the idea here, so undo 213 if isinstance(self, MaskedColumn): 214 out._mask = data._mask 215 216 self._copy_groups(out) 217 218 return out 219 220 def __setstate__(self, state): 221 """ 222 Restore the internal state of the Column/MaskedColumn for pickling 223 purposes. This requires that the last element of ``state`` is a 224 5-tuple that has Column-specific state values. 225 """ 226 # Get the Column attributes and meta 227 name, unit, format, description, meta = state[-1] 228 state = state[:-1] 229 230 # Using super(type(self), self).__setstate__() gives an infinite 231 # recursion. Manually call the right super class to actually set up 232 # the array object. 233 super_class = ma.MaskedArray if isinstance(self, ma.MaskedArray) else np.ndarray 234 super_class.__setstate__(self, state) 235 236 # Set the Column attributes and meta 237 self._name = name 238 self.unit = unit 239 self.format = format 240 self.description = description 241 self.meta = meta 242 self._parent_table = None 243 244 def __reduce__(self): 245 """ 246 Return a 3-tuple for pickling a Column. Use the super-class 247 functionality but then add in a 5-tuple of Column-specific values 248 that get used in __setstate__. 249 """ 250 super_class = ma.MaskedArray if isinstance(self, ma.MaskedArray) else np.ndarray 251 reconstruct_func, reconstruct_func_args, state = super_class.__reduce__(self) 252 253 # Define Column-specific attrs and meta that gets added to state. 254 column_state = (self.name, self.unit, self.format, self.description, 255 self.meta) 256 state = state + (column_state,) 257 258 return reconstruct_func, reconstruct_func_args, state 259 260 # avoid == and != to be done based on type of subclass 261 # (helped solve #1446; see also __array_wrap__) 262 def __eq__(self, other): 263 return self.data.__eq__(other) 264 265 def __ne__(self, other): 266 return self.data.__ne__(other) 267 268 def __array_finalize__(self, obj): 269 # Obj will be none for direct call to Column() creator 270 if obj is None: 271 return 272 273 if six.callable(super(BaseColumn, self).__array_finalize__): 274 super(BaseColumn, self).__array_finalize__(obj) 275 276 # Self was created from template (e.g. obj[slice] or (obj * 2)) 277 # or viewcast e.g. obj.view(Column). In either case we want to 278 # init Column attributes for self from obj if possible. 279 self.parent_table = None 280 if not hasattr(self, 'indices'): # may have been copied in __new__ 281 self.indices = [] 282 self._copy_attrs(obj) 283 284 def __array_wrap__(self, out_arr, context=None): 285 """ 286 __array_wrap__ is called at the end of every ufunc. 287 288 Normally, we want a Column object back and do not have to do anything 289 special. But there are two exceptions: 290 291 1) If the output shape is different (e.g. for reduction ufuncs 292 like sum() or mean()), a Column still linking to a parent_table 293 makes little sense, so we return the output viewed as the 294 column content (ndarray or MaskedArray). 295 For this case, we use "[()]" to select everything, and to ensure we 296 convert a zero rank array to a scalar. (For some reason np.sum() 297 returns a zero rank scalar array while np.mean() returns a scalar; 298 So the [()] is needed for this case. 299 300 2) When the output is created by any function that returns a boolean 301 we also want to consistently return an array rather than a column 302 (see #1446 and #1685) 303 """ 304 out_arr = super(BaseColumn, self).__array_wrap__(out_arr, context) 305 if (self.shape != out_arr.shape or 306 (isinstance(out_arr, BaseColumn) and 307 (context is not None and context[0] in _comparison_functions))): 308 return out_arr.data[()] 309 else: 310 return out_arr 311 312 @property 313 def name(self): 314 """ 315 The name of this column. 316 """ 317 return self._name 318 319 @name.setter 320 def name(self, val): 321 val = fix_column_name(val) 322 323 if self.parent_table is not None: 324 table = self.parent_table 325 table.columns._rename_column(self.name, val) 326 327 self._name = val 328 329 @property 330 def descr(self): 331 """Array-interface compliant full description of the column. 332 333 This returns a 3-tuple (name, type, shape) that can always be 334 used in a structured array dtype definition. 335 """ 336 return (self.name, self.dtype.str, self.shape[1:]) 337 338 def iter_str_vals(self): 339 """ 340 Return an iterator that yields the string-formatted values of this 341 column. 342 343 Returns 344 ------- 345 str_vals : iterator 346 Column values formatted as strings 347 """ 348 # Iterate over formatted values with no max number of lines, no column 349 # name, no unit, and ignoring the returned header info in outs. 350 _pformat_col_iter = self._formatter._pformat_col_iter 351 for str_val in _pformat_col_iter(self, -1, show_name=False, show_unit=False, 352 show_dtype=False, outs={}): 353 yield str_val 354 355 def attrs_equal(self, col): 356 """Compare the column attributes of ``col`` to this object. 357 358 The comparison attributes are: ``name``, ``unit``, ``dtype``, 359 ``format``, ``description``, and ``meta``. 360 361 Parameters 362 ---------- 363 col : Column 364 Comparison column 365 366 Returns 367 ------- 368 equal : boolean 369 True if all attributes are equal 370 """ 371 if not isinstance(col, BaseColumn): 372 raise ValueError('Comparison `col` must be a Column or ' 373 'MaskedColumn object') 374 375 attrs = ('name', 'unit', 'dtype', 'format', 'description', 'meta') 376 equal = all(getattr(self, x) == getattr(col, x) for x in attrs) 377 378 return equal 379 380 @property 381 def _formatter(self): 382 return FORMATTER if (self.parent_table is None) else self.parent_table.formatter 383 384 def pformat(self, max_lines=None, show_name=True, show_unit=False, show_dtype=False, 385 html=False): 386 """Return a list of formatted string representation of column values. 387 388 If no value of ``max_lines`` is supplied then the height of the 389 screen terminal is used to set ``max_lines``. If the terminal 390 height cannot be determined then the default will be 391 determined using the ``astropy.conf.max_lines`` configuration 392 item. If a negative value of ``max_lines`` is supplied then 393 there is no line limit applied. 394 395 Parameters 396 ---------- 397 max_lines : int 398 Maximum lines of output (header + data rows) 399 400 show_name : bool 401 Include column name (default=True) 402 403 show_unit : bool 404 Include a header row for unit (default=False) 405 406 show_dtype : bool 407 Include column dtype (default=False) 408 409 html : bool 410 Format the output as an HTML table (default=False) 411 412 Returns 413 ------- 414 lines : list 415 List of lines with header and formatted column values 416 417 """ 418 _pformat_col = self._formatter._pformat_col 419 lines, outs = _pformat_col(self, max_lines, show_name=show_name, 420 show_unit=show_unit, show_dtype=show_dtype, 421 html=html) 422 return lines 423 424 def pprint(self, max_lines=None, show_name=True, show_unit=False, show_dtype=False): 425 """Print a formatted string representation of column values. 426 427 If no value of ``max_lines`` is supplied then the height of the 428 screen terminal is used to set ``max_lines``. If the terminal 429 height cannot be determined then the default will be 430 determined using the ``astropy.conf.max_lines`` configuration 431 item. If a negative value of ``max_lines`` is supplied then 432 there is no line limit applied. 433 434 Parameters 435 ---------- 436 max_lines : int 437 Maximum number of values in output 438 439 show_name : bool 440 Include column name (default=True) 441 442 show_unit : bool 443 Include a header row for unit (default=False) 444 445 show_dtype : bool 446 Include column dtype (default=True) 447 """ 448 _pformat_col = self._formatter._pformat_col 449 lines, outs = _pformat_col(self, max_lines, show_name=show_name, show_unit=show_unit, 450 show_dtype=show_dtype) 451 452 n_header = outs['n_header'] 453 for i, line in enumerate(lines): 454 if i < n_header: 455 color_print(line, 'red') 456 else: 457 print(line) 458 459 def more(self, max_lines=None, show_name=True, show_unit=False): 460 """Interactively browse column with a paging interface. 461 462 Supported keys:: 463 464 f, <space> : forward one page 465 b : back one page 466 r : refresh same page 467 n : next row 468 p : previous row 469 < : go to beginning 470 > : go to end 471 q : quit browsing 472 h : print this help 473 474 Parameters 475 ---------- 476 max_lines : int 477 Maximum number of lines in table output 478 479 show_name : bool 480 Include a header row for column names (default=True) 481 482 show_unit : bool 483 Include a header row for unit (default=False) 484 485 """ 486 _more_tabcol = self._formatter._more_tabcol 487 _more_tabcol(self, max_lines=max_lines, show_name=show_name, 488 show_unit=show_unit) 489 490 @property 491 def unit(self): 492 """ 493 The unit associated with this column. May be a string or a 494 `astropy.units.UnitBase` instance. 495 496 Setting the ``unit`` property does not change the values of the 497 data. To perform a unit conversion, use ``convert_unit_to``. 498 """ 499 return self._unit 500 501 @unit.setter 502 def unit(self, unit): 503 if unit is None: 504 self._unit = None 505 else: 506 self._unit = Unit(unit, parse_strict='silent') 507 508 @unit.deleter 509 def unit(self): 510 self._unit = None 511 512 def convert_unit_to(self, new_unit, equivalencies=[]): 513 """ 514 Converts the values of the column in-place from the current 515 unit to the given unit. 516 517 To change the unit associated with this column without 518 actually changing the data values, simply set the ``unit`` 519 property. 520 521 Parameters 522 ---------- 523 new_unit : str or `astropy.units.UnitBase` instance 524 The unit to convert to. 525 526 equivalencies : list of equivalence pairs, optional 527 A list of equivalence pairs to try if the unit are not 528 directly convertible. See :ref:`unit_equivalencies`. 529 530 Raises 531 ------ 532 astropy.units.UnitsError 533 If units are inconsistent 534 """ 535 if self.unit is None: 536 raise ValueError("No unit set on column") 537 self.data[:] = self.unit.to( 538 new_unit, self.data, equivalencies=equivalencies) 539 self.unit = new_unit 540 541 @property 542 def groups(self): 543 if not hasattr(self, '_groups'): 544 self._groups = groups.ColumnGroups(self) 545 return self._groups 546 547 def group_by(self, keys): 548 """ 549 Group this column by the specified ``keys`` 550 551 This effectively splits the column into groups which correspond to 552 unique values of the ``keys`` grouping object. The output is a new 553 `Column` or `MaskedColumn` which contains a copy of this column but 554 sorted by row according to ``keys``. 555 556 The ``keys`` input to ``group_by`` must be a numpy array with the 557 same length as this column. 558 559 Parameters 560 ---------- 561 keys : numpy array 562 Key grouping object 563 564 Returns 565 ------- 566 out : Column 567 New column with groups attribute set accordingly 568 """ 569 return groups.column_group_by(self, keys) 570 571 def _copy_groups(self, out): 572 """ 573 Copy current groups into a copy of self ``out`` 574 """ 575 if self.parent_table: 576 if hasattr(self.parent_table, '_groups'): 577 out._groups = groups.ColumnGroups(out, indices=self.parent_table._groups._indices) 578 elif hasattr(self, '_groups'): 579 out._groups = groups.ColumnGroups(out, indices=self._groups._indices) 580 581 # Strip off the BaseColumn-ness for repr and str so that 582 # MaskedColumn.data __repr__ does not include masked_BaseColumn(data = 583 # [1 2], ...). 584 def __repr__(self): 585 return np.asarray(self).__repr__() 586 587 @property 588 def quantity(self): 589 """ 590 A view of this table column as a `~astropy.units.Quantity` object with 591 units given by the Column's `unit` parameter. 592 """ 593 # the Quantity initializer is used here because it correctly fails 594 # if the column's values are non-numeric (like strings), while .view 595 # will happily return a quantity with gibberish for numerical values 596 return Quantity(self, copy=False, dtype=self.dtype, order='A') 597 598 def to(self, unit, equivalencies=[], **kwargs): 599 """ 600 Converts this table column to a `~astropy.units.Quantity` object with 601 the requested units. 602 603 Parameters 604 ---------- 605 unit : `~astropy.units.Unit` or str 606 The unit to convert to (i.e., a valid argument to the 607 :meth:`astropy.units.Quantity.to` method). 608 equivalencies : list of equivalence pairs, optional 609 Equivalencies to use for this conversion. See 610 :meth:`astropy.units.Quantity.to` for more details. 611 612 Returns 613 ------- 614 quantity : `~astropy.units.Quantity` 615 A quantity object with the contents of this column in the units 616 ``unit``. 617 """ 618 return self.quantity.to(unit, equivalencies) 619 620 def _copy_attrs(self, obj): 621 """ 622 Copy key column attributes from ``obj`` to self 623 """ 624 for attr in ('name', 'unit', 'format', 'description'): 625 val = getattr(obj, attr, None) 626 setattr(self, attr, val) 627 self.meta = deepcopy(getattr(obj, 'meta', {})) 628 629 630class Column(BaseColumn): 631 """Define a data column for use in a Table object. 632 633 Parameters 634 ---------- 635 data : list, ndarray or None 636 Column data values 637 name : str 638 Column name and key for reference within Table 639 dtype : numpy.dtype compatible value 640 Data type for column 641 shape : tuple or () 642 Dimensions of a single row element in the column data 643 length : int or 0 644 Number of row elements in column data 645 description : str or None 646 Full description of column 647 unit : str or None 648 Physical unit 649 format : str or None or function or callable 650 Format string for outputting column values. This can be an 651 "old-style" (``format % value``) or "new-style" (`str.format`) 652 format specification string or a function or any callable object that 653 accepts a single value and returns a string. 654 meta : dict-like or None 655 Meta-data associated with the column 656 657 Examples 658 -------- 659 A Column can be created in two different ways: 660 661 - Provide a ``data`` value but not ``shape`` or ``length`` (which are 662 inferred from the data). 663 664 Examples:: 665 666 col = Column(data=[1, 2], name='name') # shape=(2,) 667 col = Column(data=[[1, 2], [3, 4]], name='name') # shape=(2, 2) 668 col = Column(data=[1, 2], name='name', dtype=float) 669 col = Column(data=np.array([1, 2]), name='name') 670 col = Column(data=['hello', 'world'], name='name') 671 672 The ``dtype`` argument can be any value which is an acceptable 673 fixed-size data-type initializer for the numpy.dtype() method. See 674 `<http://docs.scipy.org/doc/numpy/reference/arrays.dtypes.html>`_. 675 Examples include: 676 677 - Python non-string type (float, int, bool) 678 - Numpy non-string type (e.g. np.float32, np.int64, np.bool) 679 - Numpy.dtype array-protocol type strings (e.g. 'i4', 'f8', 'S15') 680 681 If no ``dtype`` value is provide then the type is inferred using 682 ``np.array(data)``. 683 684 - Provide ``length`` and optionally ``shape``, but not ``data`` 685 686 Examples:: 687 688 col = Column(name='name', length=5) 689 col = Column(name='name', dtype=int, length=10, shape=(3,4)) 690 691 The default ``dtype`` is ``np.float64``. The ``shape`` argument is the 692 array shape of a single cell in the column. 693 """ 694 695 def __new__(cls, data=None, name=None, 696 dtype=None, shape=(), length=0, 697 description=None, unit=None, format=None, meta=None, 698 copy=False, copy_indices=True): 699 700 if isinstance(data, MaskedColumn) and np.any(data.mask): 701 raise TypeError("Cannot convert a MaskedColumn with masked value to a Column") 702 703 self = super(Column, cls).__new__(cls, data=data, name=name, dtype=dtype, 704 shape=shape, length=length, description=description, 705 unit=unit, format=format, meta=meta, 706 copy=copy, copy_indices=copy_indices) 707 return self 708 709 def _base_repr_(self, html=False): 710 # If scalar then just convert to correct numpy type and use numpy repr 711 if self.ndim == 0: 712 return repr(self.item()) 713 714 descr_vals = [self.__class__.__name__] 715 unit = None if self.unit is None else str(self.unit) 716 shape = None if self.ndim <= 1 else self.shape[1:] 717 for attr, val in (('name', self.name), 718 ('dtype', dtype_info_name(self.dtype)), 719 ('shape', shape), 720 ('unit', unit), 721 ('format', self.format), 722 ('description', self.description), 723 ('length', len(self))): 724 725 if val is not None: 726 descr_vals.append('{0}={1}'.format(attr, repr(val))) 727 728 descr = '<' + ' '.join(descr_vals) + '>\n' 729 730 if html: 731 from ..utils.xml.writer import xml_escape 732 descr = xml_escape(descr) 733 734 data_lines, outs = self._formatter._pformat_col( 735 self, show_name=False, show_unit=False, show_length=False, html=html) 736 737 out = descr + '\n'.join(data_lines) 738 if six.PY2 and isinstance(out, six.text_type): 739 out = out.encode('utf-8') 740 741 return out 742 743 def _repr_html_(self): 744 return self._base_repr_(html=True) 745 746 def __repr__(self): 747 return self._base_repr_(html=False) 748 749 def __unicode__(self): 750 # If scalar then just convert to correct numpy type and use numpy repr 751 if self.ndim == 0: 752 return str(self.item()) 753 754 lines, outs = self._formatter._pformat_col(self) 755 return '\n'.join(lines) 756 if six.PY3: 757 __str__ = __unicode__ 758 759 def __bytes__(self): 760 return six.text_type(self).encode('utf-8') 761 if six.PY2: 762 __str__ = __bytes__ 763 764 # Set items using a view of the underlying data, as it gives an 765 # order-of-magnitude speed-up. [#2994] 766 def __setitem__(self, index, value): 767 # update indices 768 self.info.adjust_indices(index, value, len(self)) 769 self.data[index] = value 770 771 # # Set slices using a view of the underlying data, as it gives an 772 # # order-of-magnitude speed-up. Only gets called in Python 2. [#3020] 773 def __setslice__(self, start, stop, value): 774 self.info.adjust_indices(slice(start, stop), value, len(self)) 775 self.data.__setslice__(start, stop, value) 776 777 def insert(self, obj, values): 778 """ 779 Insert values before the given indices in the column and return 780 a new `~astropy.table.Column` object. 781 782 Parameters 783 ---------- 784 obj : int, slice or sequence of ints 785 Object that defines the index or indices before which ``values`` is 786 inserted. 787 values : array_like 788 Value(s) to insert. If the type of ``values`` is different 789 from that of quantity, ``values`` is converted to the matching type. 790 ``values`` should be shaped so that it can be broadcast appropriately 791 792 Returns 793 ------- 794 out : `~astropy.table.Column` 795 A copy of column with ``values`` and ``mask`` inserted. Note that the 796 insertion does not occur in-place: a new column is returned. 797 """ 798 if self.dtype.kind == 'O': 799 # Even if values is array-like (e.g. [1,2,3]), insert as a single 800 # object. Numpy.insert instead inserts each element in an array-like 801 # input individually. 802 data = np.insert(self, obj, None, axis=0) 803 data[obj] = values 804 else: 805 # Explicitly convert to dtype of this column. Needed because numpy 1.7 806 # enforces safe casting by default, so . This isn't the case for 1.6 or 1.8+. 807 values = np.asarray(values, dtype=self.dtype) 808 data = np.insert(self, obj, values, axis=0) 809 out = data.view(self.__class__) 810 out.__array_finalize__(self) 811 return out 812 813 # We do this to make the methods show up in the API docs 814 name = BaseColumn.name 815 unit = BaseColumn.unit 816 copy = BaseColumn.copy 817 more = BaseColumn.more 818 pprint = BaseColumn.pprint 819 pformat = BaseColumn.pformat 820 convert_unit_to = BaseColumn.convert_unit_to 821 quantity = BaseColumn.quantity 822 to = BaseColumn.to 823 824 825class MaskedColumn(Column, _MaskedColumnGetitemShim, ma.MaskedArray): 826 """Define a masked data column for use in a Table object. 827 828 Parameters 829 ---------- 830 data : list, ndarray or None 831 Column data values 832 name : str 833 Column name and key for reference within Table 834 mask : list, ndarray or None 835 Boolean mask for which True indicates missing or invalid data 836 fill_value : float, int, str or None 837 Value used when filling masked column elements 838 dtype : numpy.dtype compatible value 839 Data type for column 840 shape : tuple or () 841 Dimensions of a single row element in the column data 842 length : int or 0 843 Number of row elements in column data 844 description : str or None 845 Full description of column 846 unit : str or None 847 Physical unit 848 format : str or None or function or callable 849 Format string for outputting column values. This can be an 850 "old-style" (``format % value``) or "new-style" (`str.format`) 851 format specification string or a function or any callable object that 852 accepts a single value and returns a string. 853 meta : dict-like or None 854 Meta-data associated with the column 855 856 Examples 857 -------- 858 A MaskedColumn is similar to a Column except that it includes ``mask`` and 859 ``fill_value`` attributes. It can be created in two different ways: 860 861 - Provide a ``data`` value but not ``shape`` or ``length`` (which are 862 inferred from the data). 863 864 Examples:: 865 866 col = MaskedColumn(data=[1, 2], name='name') 867 col = MaskedColumn(data=[1, 2], name='name', mask=[True, False]) 868 col = MaskedColumn(data=[1, 2], name='name', dtype=float, fill_value=99) 869 870 The ``mask`` argument will be cast as a boolean array and specifies 871 which elements are considered to be missing or invalid. 872 873 The ``dtype`` argument can be any value which is an acceptable 874 fixed-size data-type initializer for the numpy.dtype() method. See 875 `<http://docs.scipy.org/doc/numpy/reference/arrays.dtypes.html>`_. 876 Examples include: 877 878 - Python non-string type (float, int, bool) 879 - Numpy non-string type (e.g. np.float32, np.int64, np.bool) 880 - Numpy.dtype array-protocol type strings (e.g. 'i4', 'f8', 'S15') 881 882 If no ``dtype`` value is provide then the type is inferred using 883 ``np.array(data)``. When ``data`` is provided then the ``shape`` 884 and ``length`` arguments are ignored. 885 886 - Provide ``length`` and optionally ``shape``, but not ``data`` 887 888 Examples:: 889 890 col = MaskedColumn(name='name', length=5) 891 col = MaskedColumn(name='name', dtype=int, length=10, shape=(3,4)) 892 893 The default ``dtype`` is ``np.float64``. The ``shape`` argument is the 894 array shape of a single cell in the column. 895 """ 896 897 def __new__(cls, data=None, name=None, mask=None, fill_value=None, 898 dtype=None, shape=(), length=0, 899 description=None, unit=None, format=None, meta=None, 900 copy=False, copy_indices=True): 901 902 if mask is None and hasattr(data, 'mask'): 903 mask = data.mask 904 else: 905 mask = deepcopy(mask) 906 907 # Create self using MaskedArray as a wrapper class, following the example of 908 # class MSubArray in 909 # https://github.com/numpy/numpy/blob/maintenance/1.8.x/numpy/ma/tests/test_subclassing.py 910 # This pattern makes it so that __array_finalize__ is called as expected (e.g. #1471 and 911 # https://github.com/astropy/astropy/commit/ff6039e8) 912 913 # First just pass through all args and kwargs to BaseColumn, then wrap that object 914 # with MaskedArray. 915 self_data = BaseColumn(data, dtype=dtype, shape=shape, length=length, name=name, 916 unit=unit, format=format, description=description, 917 meta=meta, copy=copy, copy_indices=copy_indices) 918 self = ma.MaskedArray.__new__(cls, data=self_data, mask=mask) 919 920 # Note: do not set fill_value in the MaskedArray constructor because this does not 921 # go through the fill_value workarounds (see _fix_fill_value below). 922 if fill_value is None and hasattr(data, 'fill_value') and data.fill_value is not None: 923 # Coerce the fill_value to the correct type since `data` may be a 924 # different dtype than self. 925 fill_value = self.dtype.type(data.fill_value) 926 self.fill_value = fill_value 927 928 self.parent_table = None 929 930 # needs to be done here since self doesn't come from BaseColumn.__new__ 931 for index in self.indices: 932 index.replace_col(self_data, self) 933 934 return self 935 936 def _fix_fill_value(self, val): 937 """Fix a fill value (if needed) to work around a bug with setting the fill 938 value of a string array in MaskedArray with Python 3.x. See 939 https://github.com/numpy/numpy/pull/2733. This mimics the check in 940 numpy.ma.core._check_fill_value() (version < 1.8) which incorrectly sets 941 fill_value to a default if self.dtype.char is 'U' (which is the case for Python 942 3). Here we change the string to a byte string so that in Python 3 the 943 isinstance(val, basestring) part fails. 944 """ 945 946 if (NUMPY_LT_1_8 and isinstance(val, six.string_types) and 947 (self.dtype.char not in 'SV')): 948 val = val.encode() 949 return val 950 951 @property 952 def fill_value(self): 953 return self.get_fill_value() # defer to native ma.MaskedArray method 954 955 @fill_value.setter 956 def fill_value(self, val): 957 """Set fill value both in the masked column view and in the parent table 958 if it exists. Setting one or the other alone doesn't work.""" 959 val = self._fix_fill_value(val) 960 961 # Yet another ma bug workaround: If the value of fill_value for a string array is 962 # requested but not yet set then it gets created as 'N/A'. From this point onward 963 # any new fill_values are truncated to 3 characters. Note that this does not 964 # occur if the masked array is a structured array (as in the previous block that 965 # deals with the parent table). 966 # 967 # >>> x = ma.array(['xxxx']) 968 # >>> x.fill_value # fill_value now gets represented as an 'S3' array 969 # 'N/A' 970 # >>> x.fill_value='yyyy' 971 # >>> x.fill_value 972 # 'yyy' 973 # 974 # To handle this we are forced to reset a private variable first: 975 self._fill_value = None 976 977 self.set_fill_value(val) # defer to native ma.MaskedArray method 978 979 @property 980 def data(self): 981 out = self.view(ma.MaskedArray) 982 # The following is necessary because of a bug in Numpy, which was 983 # fixed in numpy/numpy#2703. The fix should be included in Numpy 1.8.0. 984 out.fill_value = self.fill_value 985 return out 986 987 def filled(self, fill_value=None): 988 """Return a copy of self, with masked values filled with a given value. 989 990 Parameters 991 ---------- 992 fill_value : scalar; optional 993 The value to use for invalid entries (`None` by default). If 994 `None`, the ``fill_value`` attribute of the array is used 995 instead. 996 997 Returns 998 ------- 999 filled_column : Column 1000 A copy of ``self`` with masked entries replaced by `fill_value` 1001 (be it the function argument or the attribute of ``self``). 1002 """ 1003 if fill_value is None: 1004 fill_value = self.fill_value 1005 fill_value = self._fix_fill_value(fill_value) 1006 1007 data = super(MaskedColumn, self).filled(fill_value) 1008 # Use parent table definition of Column if available 1009 column_cls = self.parent_table.Column if (self.parent_table is not None) else Column 1010 out = column_cls(name=self.name, data=data, unit=self.unit, 1011 format=self.format, description=self.description, 1012 meta=deepcopy(self.meta)) 1013 return out 1014 1015 def insert(self, obj, values, mask=None): 1016 """ 1017 Insert values along the given axis before the given indices and return 1018 a new `~astropy.table.MaskedColumn` object. 1019 1020 Parameters 1021 ---------- 1022 obj : int, slice or sequence of ints 1023 Object that defines the index or indices before which ``values`` is 1024 inserted. 1025 values : array_like 1026 Value(s) to insert. If the type of ``values`` is different 1027 from that of quantity, ``values`` is converted to the matching type. 1028 ``values`` should be shaped so that it can be broadcast appropriately 1029 mask : boolean array_like 1030 Mask value(s) to insert. If not supplied then False is used. 1031 1032 Returns 1033 ------- 1034 out : `~astropy.table.MaskedColumn` 1035 A copy of column with ``values`` and ``mask`` inserted. Note that the 1036 insertion does not occur in-place: a new masked column is returned. 1037 """ 1038 self_ma = self.data # self viewed as MaskedArray 1039 1040 if self.dtype.kind == 'O': 1041 # Even if values is array-like (e.g. [1,2,3]), insert as a single 1042 # object. Numpy.insert instead inserts each element in an array-like 1043 # input individually. 1044 new_data = np.insert(self_ma.data, obj, None, axis=0) 1045 new_data[obj] = values 1046 else: 1047 # Explicitly convert to dtype of this column. Needed because numpy 1.7 1048 # enforces safe casting by default, so . This isn't the case for 1.6 or 1.8+. 1049 values = np.asarray(values, dtype=self.dtype) 1050 new_data = np.insert(self_ma.data, obj, values, axis=0) 1051 1052 if mask is None: 1053 if self.dtype.kind == 'O': 1054 mask = False 1055 else: 1056 mask = np.zeros(values.shape, dtype=np.bool) 1057 new_mask = np.insert(self_ma.mask, obj, mask, axis=0) 1058 new_ma = np.ma.array(new_data, mask=new_mask, copy=False) 1059 1060 out = new_ma.view(self.__class__) 1061 out.parent_table = None 1062 out.indices = [] 1063 out._copy_attrs(self) 1064 1065 return out 1066 1067 def _copy_attrs_slice(self, out): 1068 # Fixes issue #3023: when calling getitem with a MaskedArray subclass 1069 # the original object attributes are not copied. 1070 if out.__class__ is self.__class__: 1071 out.parent_table = None 1072 # we need this because __getitem__ does a shallow copy of indices 1073 if out.indices is self.indices: 1074 out.indices = [] 1075 out._copy_attrs(self) 1076 return out 1077 1078 def __setitem__(self, index, value): 1079 # update indices 1080 self.info.adjust_indices(index, value, len(self)) 1081 ma.MaskedArray.__setitem__(self, index, value) 1082 1083 def __setslice__(self, start, stop, value): 1084 # defers to __setitem__, so we don't adjust indices here 1085 ma.MaskedArray.__setslice__(self, start, stop, value) 1086 1087 # We do this to make the methods show up in the API docs 1088 name = BaseColumn.name 1089 copy = BaseColumn.copy 1090 more = BaseColumn.more 1091 pprint = BaseColumn.pprint 1092 pformat = BaseColumn.pformat 1093 convert_unit_to = BaseColumn.convert_unit_to