/pandas/tests/test_graphics.py
Python | 2578 lines | 2124 code | 300 blank | 154 comment | 145 complexity | 8fe4ffa60ffe4a5bf6ca69d9d11d5e58 MD5 | raw file
Possible License(s): BSD-3-Clause, Apache-2.0
Large files files are truncated, but you can click here to view the full file
- #!/usr/bin/env python
- # coding: utf-8
- import nose
- import itertools
- import os
- import string
- from distutils.version import LooseVersion
- from datetime import datetime, date
- from pandas import Series, DataFrame, MultiIndex, PeriodIndex, date_range
- from pandas.compat import (range, lrange, StringIO, lmap, lzip, u, zip,
- iteritems, OrderedDict)
- from pandas.util.decorators import cache_readonly
- import pandas.core.common as com
- import pandas.util.testing as tm
- from pandas.util.testing import ensure_clean
- from pandas.core.config import set_option
- import numpy as np
- from numpy import random
- from numpy.random import rand, randn
- from numpy.testing import assert_array_equal, assert_allclose
- from numpy.testing.decorators import slow
- import pandas.tools.plotting as plotting
- def _skip_if_no_scipy_gaussian_kde():
- try:
- import scipy
- from scipy.stats import gaussian_kde
- except ImportError:
- raise nose.SkipTest("scipy version doesn't support gaussian_kde")
- def _ok_for_gaussian_kde(kind):
- if kind in ['kde','density']:
- try:
- import scipy
- from scipy.stats import gaussian_kde
- except ImportError:
- return False
- return True
- @tm.mplskip
- class TestPlotBase(tm.TestCase):
- def setUp(self):
- import matplotlib as mpl
- mpl.rcdefaults()
- n = 100
- with tm.RNGContext(42):
- gender = tm.choice(['Male', 'Female'], size=n)
- classroom = tm.choice(['A', 'B', 'C'], size=n)
- self.hist_df = DataFrame({'gender': gender,
- 'classroom': classroom,
- 'height': random.normal(66, 4, size=n),
- 'weight': random.normal(161, 32, size=n),
- 'category': random.randint(4, size=n)})
- def tearDown(self):
- tm.close()
- @cache_readonly
- def plt(self):
- import matplotlib.pyplot as plt
- return plt
- @cache_readonly
- def colorconverter(self):
- import matplotlib.colors as colors
- return colors.colorConverter
- def _check_legend_labels(self, axes, labels=None, visible=True):
- """
- Check each axes has expected legend labels
- Parameters
- ----------
- axes : matplotlib Axes object, or its list-like
- labels : list-like
- expected legend labels
- visible : bool
- expected legend visibility. labels are checked only when visible is True
- """
- if visible and (labels is None):
- raise ValueError('labels must be specified when visible is True')
- axes = self._flatten_visible(axes)
- for ax in axes:
- if visible:
- self.assertTrue(ax.get_legend() is not None)
- self._check_text_labels(ax.get_legend().get_texts(), labels)
- else:
- self.assertTrue(ax.get_legend() is None)
- def _check_data(self, xp, rs):
- """
- Check each axes has identical lines
- Parameters
- ----------
- xp : matplotlib Axes object
- rs : matplotlib Axes object
- """
- xp_lines = xp.get_lines()
- rs_lines = rs.get_lines()
- def check_line(xpl, rsl):
- xpdata = xpl.get_xydata()
- rsdata = rsl.get_xydata()
- assert_allclose(xpdata, rsdata)
- self.assertEqual(len(xp_lines), len(rs_lines))
- [check_line(xpl, rsl) for xpl, rsl in zip(xp_lines, rs_lines)]
- tm.close()
- def _check_visible(self, collections, visible=True):
- """
- Check each artist is visible or not
- Parameters
- ----------
- collections : list-like
- list or collection of target artist
- visible : bool
- expected visibility
- """
- for patch in collections:
- self.assertEqual(patch.get_visible(), visible)
- def _get_colors_mapped(self, series, colors):
- unique = series.unique()
- # unique and colors length can be differed
- # depending on slice value
- mapped = dict(zip(unique, colors))
- return [mapped[v] for v in series.values]
- def _check_colors(self, collections, linecolors=None, facecolors=None,
- mapping=None):
- """
- Check each artist has expected line colors and face colors
- Parameters
- ----------
- collections : list-like
- list or collection of target artist
- linecolors : list-like which has the same length as collections
- list of expected line colors
- facecolors : list-like which has the same length as collections
- list of expected face colors
- mapping : Series
- Series used for color grouping key
- used for andrew_curves, parallel_coordinates, radviz test
- """
- from matplotlib.lines import Line2D
- from matplotlib.collections import Collection
- conv = self.colorconverter
- if linecolors is not None:
- if mapping is not None:
- linecolors = self._get_colors_mapped(mapping, linecolors)
- linecolors = linecolors[:len(collections)]
- self.assertEqual(len(collections), len(linecolors))
- for patch, color in zip(collections, linecolors):
- if isinstance(patch, Line2D):
- result = patch.get_color()
- # Line2D may contains string color expression
- result = conv.to_rgba(result)
- else:
- result = patch.get_edgecolor()
- expected = conv.to_rgba(color)
- self.assertEqual(result, expected)
- if facecolors is not None:
- if mapping is not None:
- facecolors = self._get_colors_mapped(mapping, facecolors)
- facecolors = facecolors[:len(collections)]
- self.assertEqual(len(collections), len(facecolors))
- for patch, color in zip(collections, facecolors):
- if isinstance(patch, Collection):
- # returned as list of np.array
- result = patch.get_facecolor()[0]
- else:
- result = patch.get_facecolor()
- if isinstance(result, np.ndarray):
- result = tuple(result)
- expected = conv.to_rgba(color)
- self.assertEqual(result, expected)
- def _check_text_labels(self, texts, expected):
- """
- Check each text has expected labels
- Parameters
- ----------
- texts : matplotlib Text object, or its list-like
- target text, or its list
- expected : str or list-like which has the same length as texts
- expected text label, or its list
- """
- if not com.is_list_like(texts):
- self.assertEqual(texts.get_text(), expected)
- else:
- labels = [t.get_text() for t in texts]
- self.assertEqual(len(labels), len(expected))
- for l, e in zip(labels, expected):
- self.assertEqual(l, e)
- def _check_ticks_props(self, axes, xlabelsize=None, xrot=None,
- ylabelsize=None, yrot=None):
- """
- Check each axes has expected tick properties
- Parameters
- ----------
- axes : matplotlib Axes object, or its list-like
- xlabelsize : number
- expected xticks font size
- xrot : number
- expected xticks rotation
- ylabelsize : number
- expected yticks font size
- yrot : number
- expected yticks rotation
- """
- axes = self._flatten_visible(axes)
- for ax in axes:
- if xlabelsize or xrot:
- xtick = ax.get_xticklabels()[0]
- if xlabelsize is not None:
- self.assertAlmostEqual(xtick.get_fontsize(), xlabelsize)
- if xrot is not None:
- self.assertAlmostEqual(xtick.get_rotation(), xrot)
- if ylabelsize or yrot:
- ytick = ax.get_yticklabels()[0]
- if ylabelsize is not None:
- self.assertAlmostEqual(ytick.get_fontsize(), ylabelsize)
- if yrot is not None:
- self.assertAlmostEqual(ytick.get_rotation(), yrot)
- def _check_ax_scales(self, axes, xaxis='linear', yaxis='linear'):
- """
- Check each axes has expected scales
- Parameters
- ----------
- axes : matplotlib Axes object, or its list-like
- xaxis : {'linear', 'log'}
- expected xaxis scale
- yaxis : {'linear', 'log'}
- expected yaxis scale
- """
- axes = self._flatten_visible(axes)
- for ax in axes:
- self.assertEqual(ax.xaxis.get_scale(), xaxis)
- self.assertEqual(ax.yaxis.get_scale(), yaxis)
- def _check_axes_shape(self, axes, axes_num=None, layout=None, figsize=(8.0, 6.0)):
- """
- Check expected number of axes is drawn in expected layout
- Parameters
- ----------
- axes : matplotlib Axes object, or its list-like
- axes_num : number
- expected number of axes. Unnecessary axes should be set to invisible.
- layout : tuple
- expected layout, (expected number of rows , columns)
- figsize : tuple
- expected figsize. default is matplotlib default
- """
- visible_axes = self._flatten_visible(axes)
- if axes_num is not None:
- self.assertEqual(len(visible_axes), axes_num)
- for ax in visible_axes:
- # check something drawn on visible axes
- self.assertTrue(len(ax.get_children()) > 0)
- if layout is not None:
- result = self._get_axes_layout(plotting._flatten(axes))
- self.assertEqual(result, layout)
- self.assert_numpy_array_equal(np.round(visible_axes[0].figure.get_size_inches()),
- np.array(figsize))
- def _get_axes_layout(self, axes):
- x_set = set()
- y_set = set()
- for ax in axes:
- # check axes coordinates to estimate layout
- points = ax.get_position().get_points()
- x_set.add(points[0][0])
- y_set.add(points[0][1])
- return (len(y_set), len(x_set))
- def _flatten_visible(self, axes):
- """
- Flatten axes, and filter only visible
- Parameters
- ----------
- axes : matplotlib Axes object, or its list-like
- """
- axes = plotting._flatten(axes)
- axes = [ax for ax in axes if ax.get_visible()]
- return axes
- def _check_has_errorbars(self, axes, xerr=0, yerr=0):
- """
- Check axes has expected number of errorbars
- Parameters
- ----------
- axes : matplotlib Axes object, or its list-like
- xerr : number
- expected number of x errorbar
- yerr : number
- expected number of y errorbar
- """
- axes = self._flatten_visible(axes)
- for ax in axes:
- containers = ax.containers
- xerr_count = 0
- yerr_count = 0
- for c in containers:
- has_xerr = getattr(c, 'has_xerr', False)
- has_yerr = getattr(c, 'has_yerr', False)
- if has_xerr:
- xerr_count += 1
- if has_yerr:
- yerr_count += 1
- self.assertEqual(xerr, xerr_count)
- self.assertEqual(yerr, yerr_count)
- def _check_box_return_type(self, returned, return_type, expected_keys=None):
- """
- Check box returned type is correct
- Parameters
- ----------
- returned : object to be tested, returned from boxplot
- return_type : str
- return_type passed to boxplot
- expected_keys : list-like, optional
- group labels in subplot case. If not passed,
- the function checks assuming boxplot uses single ax
- """
- from matplotlib.axes import Axes
- types = {'dict': dict, 'axes': Axes, 'both': tuple}
- if expected_keys is None:
- # should be fixed when the returning default is changed
- if return_type is None:
- return_type = 'dict'
- self.assertTrue(isinstance(returned, types[return_type]))
- if return_type == 'both':
- self.assertIsInstance(returned.ax, Axes)
- self.assertIsInstance(returned.lines, dict)
- else:
- # should be fixed when the returning default is changed
- if return_type is None:
- for r in self._flatten_visible(returned):
- self.assertIsInstance(r, Axes)
- return
- self.assertTrue(isinstance(returned, OrderedDict))
- self.assertEqual(sorted(returned.keys()), sorted(expected_keys))
- for key, value in iteritems(returned):
- self.assertTrue(isinstance(value, types[return_type]))
- # check returned dict has correct mapping
- if return_type == 'axes':
- self.assertEqual(value.get_title(), key)
- elif return_type == 'both':
- self.assertEqual(value.ax.get_title(), key)
- self.assertIsInstance(value.ax, Axes)
- self.assertIsInstance(value.lines, dict)
- elif return_type == 'dict':
- line = value['medians'][0]
- self.assertEqual(line.get_axes().get_title(), key)
- else:
- raise AssertionError
- @tm.mplskip
- class TestSeriesPlots(TestPlotBase):
- def setUp(self):
- TestPlotBase.setUp(self)
- import matplotlib as mpl
- mpl.rcdefaults()
- self.mpl_le_1_2_1 = str(mpl.__version__) <= LooseVersion('1.2.1')
- self.ts = tm.makeTimeSeries()
- self.ts.name = 'ts'
- self.series = tm.makeStringSeries()
- self.series.name = 'series'
- self.iseries = tm.makePeriodSeries()
- self.iseries.name = 'iseries'
- @slow
- def test_plot(self):
- _check_plot_works(self.ts.plot, label='foo')
- _check_plot_works(self.ts.plot, use_index=False)
- axes = _check_plot_works(self.ts.plot, rot=0)
- self._check_ticks_props(axes, xrot=0)
- ax = _check_plot_works(self.ts.plot, style='.', logy=True)
- self._check_ax_scales(ax, yaxis='log')
- ax = _check_plot_works(self.ts.plot, style='.', logx=True)
- self._check_ax_scales(ax, xaxis='log')
- ax = _check_plot_works(self.ts.plot, style='.', loglog=True)
- self._check_ax_scales(ax, xaxis='log', yaxis='log')
- _check_plot_works(self.ts[:10].plot, kind='bar')
- _check_plot_works(self.ts.plot, kind='area', stacked=False)
- _check_plot_works(self.iseries.plot)
- for kind in ['line', 'bar', 'barh', 'kde']:
- if not _ok_for_gaussian_kde(kind):
- continue
- _check_plot_works(self.series[:5].plot, kind=kind)
- _check_plot_works(self.series[:10].plot, kind='barh')
- ax = _check_plot_works(Series(randn(10)).plot, kind='bar', color='black')
- self._check_colors([ax.patches[0]], facecolors=['black'])
- # GH 6951
- ax = _check_plot_works(self.ts.plot, subplots=True)
- self._check_axes_shape(ax, axes_num=1, layout=(1, 1))
- @slow
- def test_plot_figsize_and_title(self):
- # figsize and title
- ax = self.series.plot(title='Test', figsize=(16, 8))
- self._check_text_labels(ax.title, 'Test')
- self._check_axes_shape(ax, axes_num=1, layout=(1, 1), figsize=(16, 8))
- def test_ts_line_lim(self):
- ax = self.ts.plot()
- xmin, xmax = ax.get_xlim()
- lines = ax.get_lines()
- self.assertEqual(xmin, lines[0].get_data(orig=False)[0][0])
- self.assertEqual(xmax, lines[0].get_data(orig=False)[0][-1])
- tm.close()
- ax = self.ts.plot(secondary_y=True)
- xmin, xmax = ax.get_xlim()
- lines = ax.get_lines()
- self.assertEqual(xmin, lines[0].get_data(orig=False)[0][0])
- self.assertEqual(xmax, lines[0].get_data(orig=False)[0][-1])
- def test_ts_area_lim(self):
- ax = self.ts.plot(kind='area', stacked=False)
- xmin, xmax = ax.get_xlim()
- line = ax.get_lines()[0].get_data(orig=False)[0]
- self.assertEqual(xmin, line[0])
- self.assertEqual(xmax, line[-1])
- tm.close()
- # GH 7471
- ax = self.ts.plot(kind='area', stacked=False, x_compat=True)
- xmin, xmax = ax.get_xlim()
- line = ax.get_lines()[0].get_data(orig=False)[0]
- self.assertEqual(xmin, line[0])
- self.assertEqual(xmax, line[-1])
- tm.close()
- tz_ts = self.ts.copy()
- tz_ts.index = tz_ts.tz_localize('GMT').tz_convert('CET')
- ax = tz_ts.plot(kind='area', stacked=False, x_compat=True)
- xmin, xmax = ax.get_xlim()
- line = ax.get_lines()[0].get_data(orig=False)[0]
- self.assertEqual(xmin, line[0])
- self.assertEqual(xmax, line[-1])
- tm.close()
- ax = tz_ts.plot(kind='area', stacked=False, secondary_y=True)
- xmin, xmax = ax.get_xlim()
- line = ax.get_lines()[0].get_data(orig=False)[0]
- self.assertEqual(xmin, line[0])
- self.assertEqual(xmax, line[-1])
- def test_line_area_nan_series(self):
- values = [1, 2, np.nan, 3]
- s = Series(values)
- ts = Series(values, index=tm.makeDateIndex(k=4))
- for d in [s, ts]:
- ax = _check_plot_works(d.plot)
- masked = ax.lines[0].get_ydata()
- # remove nan for comparison purpose
- self.assert_numpy_array_equal(np.delete(masked.data, 2), np.array([1, 2, 3]))
- self.assert_numpy_array_equal(masked.mask, np.array([False, False, True, False]))
- expected = np.array([1, 2, 0, 3])
- ax = _check_plot_works(d.plot, stacked=True)
- self.assert_numpy_array_equal(ax.lines[0].get_ydata(), expected)
- ax = _check_plot_works(d.plot, kind='area')
- self.assert_numpy_array_equal(ax.lines[0].get_ydata(), expected)
- ax = _check_plot_works(d.plot, kind='area', stacked=False)
- self.assert_numpy_array_equal(ax.lines[0].get_ydata(), expected)
- @slow
- def test_bar_log(self):
- expected = np.array([1., 10., 100., 1000.])
- if not self.mpl_le_1_2_1:
- expected = np.hstack((.1, expected, 1e4))
- ax = Series([200, 500]).plot(log=True, kind='bar')
- assert_array_equal(ax.yaxis.get_ticklocs(), expected)
- @slow
- def test_bar_ignore_index(self):
- df = Series([1, 2, 3, 4], index=['a', 'b', 'c', 'd'])
- ax = df.plot(kind='bar', use_index=False)
- self._check_text_labels(ax.get_xticklabels(), ['0', '1', '2', '3'])
- def test_rotation(self):
- df = DataFrame(randn(5, 5))
- axes = df.plot(rot=30)
- self._check_ticks_props(axes, xrot=30)
- def test_irregular_datetime(self):
- rng = date_range('1/1/2000', '3/1/2000')
- rng = rng[[0, 1, 2, 3, 5, 9, 10, 11, 12]]
- ser = Series(randn(len(rng)), rng)
- ax = ser.plot()
- xp = datetime(1999, 1, 1).toordinal()
- ax.set_xlim('1/1/1999', '1/1/2001')
- self.assertEqual(xp, ax.get_xlim()[0])
- @slow
- def test_pie_series(self):
- # if sum of values is less than 1.0, pie handle them as rate and draw semicircle.
- series = Series(np.random.randint(1, 5),
- index=['a', 'b', 'c', 'd', 'e'], name='YLABEL')
- ax = _check_plot_works(series.plot, kind='pie')
- self._check_text_labels(ax.texts, series.index)
- self.assertEqual(ax.get_ylabel(), 'YLABEL')
- # without wedge labels
- ax = _check_plot_works(series.plot, kind='pie', labels=None)
- self._check_text_labels(ax.texts, [''] * 5)
- # with less colors than elements
- color_args = ['r', 'g', 'b']
- ax = _check_plot_works(series.plot, kind='pie', colors=color_args)
- color_expected = ['r', 'g', 'b', 'r', 'g']
- self._check_colors(ax.patches, facecolors=color_expected)
- # with labels and colors
- labels = ['A', 'B', 'C', 'D', 'E']
- color_args = ['r', 'g', 'b', 'c', 'm']
- ax = _check_plot_works(series.plot, kind='pie', labels=labels, colors=color_args)
- self._check_text_labels(ax.texts, labels)
- self._check_colors(ax.patches, facecolors=color_args)
- # with autopct and fontsize
- ax = _check_plot_works(series.plot, kind='pie', colors=color_args,
- autopct='%.2f', fontsize=7)
- pcts = ['{0:.2f}'.format(s * 100) for s in series.values / float(series.sum())]
- iters = [iter(series.index), iter(pcts)]
- expected_texts = list(next(it) for it in itertools.cycle(iters))
- self._check_text_labels(ax.texts, expected_texts)
- for t in ax.texts:
- self.assertEqual(t.get_fontsize(), 7)
- # includes negative value
- with tm.assertRaises(ValueError):
- series = Series([1, 2, 0, 4, -1], index=['a', 'b', 'c', 'd', 'e'])
- series.plot(kind='pie')
- # includes nan
- series = Series([1, 2, np.nan, 4],
- index=['a', 'b', 'c', 'd'], name='YLABEL')
- ax = _check_plot_works(series.plot, kind='pie')
- self._check_text_labels(ax.texts, series.index)
- @slow
- def test_hist(self):
- _check_plot_works(self.ts.hist)
- _check_plot_works(self.ts.hist, grid=False)
- _check_plot_works(self.ts.hist, figsize=(8, 10))
- _check_plot_works(self.ts.hist, by=self.ts.index.month)
- _check_plot_works(self.ts.hist, by=self.ts.index.month, bins=5)
- fig, ax = self.plt.subplots(1, 1)
- _check_plot_works(self.ts.hist, ax=ax)
- _check_plot_works(self.ts.hist, ax=ax, figure=fig)
- _check_plot_works(self.ts.hist, figure=fig)
- tm.close()
- fig, (ax1, ax2) = self.plt.subplots(1, 2)
- _check_plot_works(self.ts.hist, figure=fig, ax=ax1)
- _check_plot_works(self.ts.hist, figure=fig, ax=ax2)
- with tm.assertRaises(ValueError):
- self.ts.hist(by=self.ts.index, figure=fig)
- @slow
- def test_hist_bins(self):
- df = DataFrame(np.random.randn(10, 2))
- ax = df.hist(bins=2)[0][0]
- self.assertEqual(len(ax.patches), 2)
- @slow
- def test_hist_layout(self):
- df = self.hist_df
- with tm.assertRaises(ValueError):
- df.height.hist(layout=(1, 1))
- with tm.assertRaises(ValueError):
- df.height.hist(layout=[1, 1])
- @slow
- def test_hist_layout_with_by(self):
- df = self.hist_df
- axes = _check_plot_works(df.height.hist, by=df.gender, layout=(2, 1))
- self._check_axes_shape(axes, axes_num=2, layout=(2, 1))
- axes = _check_plot_works(df.height.hist, by=df.category, layout=(4, 1))
- self._check_axes_shape(axes, axes_num=4, layout=(4, 1))
- axes = _check_plot_works(df.height.hist, by=df.classroom, layout=(2, 2))
- self._check_axes_shape(axes, axes_num=3, layout=(2, 2))
- axes = _check_plot_works(df.height.hist, by=df.category, layout=(4, 2), figsize=(12, 7))
- self._check_axes_shape(axes, axes_num=4, layout=(4, 2), figsize=(12, 7))
- @slow
- def test_hist_no_overlap(self):
- from matplotlib.pyplot import subplot, gcf
- x = Series(randn(2))
- y = Series(randn(2))
- subplot(121)
- x.hist()
- subplot(122)
- y.hist()
- fig = gcf()
- axes = fig.get_axes()
- self.assertEqual(len(axes), 2)
- @slow
- def test_plot_fails_with_dupe_color_and_style(self):
- x = Series(randn(2))
- with tm.assertRaises(ValueError):
- x.plot(style='k--', color='k')
- @slow
- def test_hist_by_no_extra_plots(self):
- df = self.hist_df
- axes = df.height.hist(by=df.gender)
- self.assertEqual(len(self.plt.get_fignums()), 1)
- def test_plot_fails_when_ax_differs_from_figure(self):
- from pylab import figure
- fig1 = figure()
- fig2 = figure()
- ax1 = fig1.add_subplot(111)
- with tm.assertRaises(AssertionError):
- self.ts.hist(ax=ax1, figure=fig2)
- @slow
- def test_kde(self):
- tm._skip_if_no_scipy()
- _skip_if_no_scipy_gaussian_kde()
- _check_plot_works(self.ts.plot, kind='kde')
- _check_plot_works(self.ts.plot, kind='density')
- ax = self.ts.plot(kind='kde', logy=True)
- self._check_ax_scales(ax, yaxis='log')
- @slow
- def test_kde_kwargs(self):
- tm._skip_if_no_scipy()
- _skip_if_no_scipy_gaussian_kde()
- from numpy import linspace
- _check_plot_works(self.ts.plot, kind='kde', bw_method=.5, ind=linspace(-100,100,20))
- _check_plot_works(self.ts.plot, kind='density', bw_method=.5, ind=linspace(-100,100,20))
- ax = self.ts.plot(kind='kde', logy=True, bw_method=.5, ind=linspace(-100,100,20))
- self._check_ax_scales(ax, yaxis='log')
- @slow
- def test_kde_color(self):
- tm._skip_if_no_scipy()
- _skip_if_no_scipy_gaussian_kde()
- ax = self.ts.plot(kind='kde', logy=True, color='r')
- self._check_ax_scales(ax, yaxis='log')
- lines = ax.get_lines()
- self.assertEqual(len(lines), 1)
- self._check_colors(lines, ['r'])
- @slow
- def test_autocorrelation_plot(self):
- from pandas.tools.plotting import autocorrelation_plot
- _check_plot_works(autocorrelation_plot, self.ts)
- _check_plot_works(autocorrelation_plot, self.ts.values)
- ax = autocorrelation_plot(self.ts, label='Test')
- self._check_legend_labels(ax, labels=['Test'])
- @slow
- def test_lag_plot(self):
- from pandas.tools.plotting import lag_plot
- _check_plot_works(lag_plot, self.ts)
- _check_plot_works(lag_plot, self.ts, lag=5)
- @slow
- def test_bootstrap_plot(self):
- from pandas.tools.plotting import bootstrap_plot
- _check_plot_works(bootstrap_plot, self.ts, size=10)
- def test_invalid_plot_data(self):
- s = Series(list('abcd'))
- for kind in plotting._common_kinds:
- if not _ok_for_gaussian_kde(kind):
- continue
- with tm.assertRaises(TypeError):
- s.plot(kind=kind)
- @slow
- def test_valid_object_plot(self):
- s = Series(lrange(10), dtype=object)
- for kind in plotting._common_kinds:
- if not _ok_for_gaussian_kde(kind):
- continue
- _check_plot_works(s.plot, kind=kind)
- def test_partially_invalid_plot_data(self):
- s = Series(['a', 'b', 1.0, 2])
- for kind in plotting._common_kinds:
- if not _ok_for_gaussian_kde(kind):
- continue
- with tm.assertRaises(TypeError):
- s.plot(kind=kind)
- def test_invalid_kind(self):
- s = Series([1, 2])
- with tm.assertRaises(ValueError):
- s.plot(kind='aasdf')
- @slow
- def test_dup_datetime_index_plot(self):
- dr1 = date_range('1/1/2009', periods=4)
- dr2 = date_range('1/2/2009', periods=4)
- index = dr1.append(dr2)
- values = randn(index.size)
- s = Series(values, index=index)
- _check_plot_works(s.plot)
- @slow
- def test_errorbar_plot(self):
- s = Series(np.arange(10), name='x')
- s_err = np.random.randn(10)
- d_err = DataFrame(randn(10, 2), index=s.index, columns=['x', 'y'])
- # test line and bar plots
- kinds = ['line', 'bar']
- for kind in kinds:
- ax = _check_plot_works(s.plot, yerr=Series(s_err), kind=kind)
- self._check_has_errorbars(ax, xerr=0, yerr=1)
- ax = _check_plot_works(s.plot, yerr=s_err, kind=kind)
- self._check_has_errorbars(ax, xerr=0, yerr=1)
- ax = _check_plot_works(s.plot, yerr=s_err.tolist(), kind=kind)
- self._check_has_errorbars(ax, xerr=0, yerr=1)
- ax = _check_plot_works(s.plot, yerr=d_err, kind=kind)
- self._check_has_errorbars(ax, xerr=0, yerr=1)
- ax = _check_plot_works(s.plot, xerr=0.2, yerr=0.2, kind=kind)
- self._check_has_errorbars(ax, xerr=1, yerr=1)
- ax = _check_plot_works(s.plot, xerr=s_err)
- self._check_has_errorbars(ax, xerr=1, yerr=0)
- # test time series plotting
- ix = date_range('1/1/2000', '1/1/2001', freq='M')
- ts = Series(np.arange(12), index=ix, name='x')
- ts_err = Series(np.random.randn(12), index=ix)
- td_err = DataFrame(randn(12, 2), index=ix, columns=['x', 'y'])
- ax = _check_plot_works(ts.plot, yerr=ts_err)
- self._check_has_errorbars(ax, xerr=0, yerr=1)
- ax = _check_plot_works(ts.plot, yerr=td_err)
- self._check_has_errorbars(ax, xerr=0, yerr=1)
- # check incorrect lengths and types
- with tm.assertRaises(ValueError):
- s.plot(yerr=np.arange(11))
- s_err = ['zzz']*10
- with tm.assertRaises(TypeError):
- s.plot(yerr=s_err)
- def test_table(self):
- _check_plot_works(self.series.plot, table=True)
- _check_plot_works(self.series.plot, table=self.series)
- @tm.mplskip
- class TestDataFramePlots(TestPlotBase):
- def setUp(self):
- TestPlotBase.setUp(self)
- import matplotlib as mpl
- mpl.rcdefaults()
- self.mpl_le_1_2_1 = str(mpl.__version__) <= LooseVersion('1.2.1')
- self.tdf = tm.makeTimeDataFrame()
- self.hexbin_df = DataFrame({"A": np.random.uniform(size=20),
- "B": np.random.uniform(size=20),
- "C": np.arange(20) + np.random.uniform(size=20)})
- from pandas import read_csv
- path = os.path.join(curpath(), 'data', 'iris.csv')
- self.iris = read_csv(path)
- @slow
- def test_plot(self):
- df = self.tdf
- _check_plot_works(df.plot, grid=False)
- axes = _check_plot_works(df.plot, subplots=True)
- self._check_axes_shape(axes, axes_num=4, layout=(4, 1))
- _check_plot_works(df.plot, subplots=True, use_index=False)
- self._check_axes_shape(axes, axes_num=4, layout=(4, 1))
- df = DataFrame({'x': [1, 2], 'y': [3, 4]})
- with tm.assertRaises(TypeError):
- df.plot(kind='line', blarg=True)
- df = DataFrame(np.random.rand(10, 3),
- index=list(string.ascii_letters[:10]))
- _check_plot_works(df.plot, use_index=True)
- _check_plot_works(df.plot, sort_columns=False)
- _check_plot_works(df.plot, yticks=[1, 5, 10])
- _check_plot_works(df.plot, xticks=[1, 5, 10])
- _check_plot_works(df.plot, ylim=(-100, 100), xlim=(-100, 100))
- axes = _check_plot_works(df.plot, subplots=True, title='blah')
- self._check_axes_shape(axes, axes_num=3, layout=(3, 1))
- for ax in axes[:2]:
- self._check_visible(ax.get_xticklabels(), visible=False)
- self._check_visible([ax.xaxis.get_label()], visible=False)
- for ax in [axes[2]]:
- self._check_visible(ax.get_xticklabels())
- self._check_visible([ax.xaxis.get_label()])
- _check_plot_works(df.plot, title='blah')
- tuples = lzip(string.ascii_letters[:10], range(10))
- df = DataFrame(np.random.rand(10, 3),
- index=MultiIndex.from_tuples(tuples))
- _check_plot_works(df.plot, use_index=True)
- # unicode
- index = MultiIndex.from_tuples([(u('\u03b1'), 0),
- (u('\u03b1'), 1),
- (u('\u03b2'), 2),
- (u('\u03b2'), 3),
- (u('\u03b3'), 4),
- (u('\u03b3'), 5),
- (u('\u03b4'), 6),
- (u('\u03b4'), 7)], names=['i0', 'i1'])
- columns = MultiIndex.from_tuples([('bar', u('\u0394')),
- ('bar', u('\u0395'))], names=['c0',
- 'c1'])
- df = DataFrame(np.random.randint(0, 10, (8, 2)),
- columns=columns,
- index=index)
- _check_plot_works(df.plot, title=u('\u03A3'))
- # GH 6951
- # Test with single column
- df = DataFrame({'x': np.random.rand(10)})
- axes = _check_plot_works(df.plot, kind='bar', subplots=True)
- self._check_axes_shape(axes, axes_num=1, layout=(1, 1))
- # When ax is supplied and required number of axes is 1,
- # passed ax should be used:
- fig, ax = self.plt.subplots()
- axes = df.plot(kind='bar', subplots=True, ax=ax)
- self.assertEqual(len(axes), 1)
- self.assertIs(ax.get_axes(), axes[0])
- def test_nonnumeric_exclude(self):
- df = DataFrame({'A': ["x", "y", "z"], 'B': [1, 2, 3]})
- ax = df.plot()
- self.assertEqual(len(ax.get_lines()), 1) # B was plotted
- @slow
- def test_implicit_label(self):
- df = DataFrame(randn(10, 3), columns=['a', 'b', 'c'])
- ax = df.plot(x='a', y='b')
- self._check_text_labels(ax.xaxis.get_label(), 'a')
- @slow
- def test_explicit_label(self):
- df = DataFrame(randn(10, 3), columns=['a', 'b', 'c'])
- ax = df.plot(x='a', y='b', label='LABEL')
- self._check_text_labels(ax.xaxis.get_label(), 'LABEL')
- @slow
- def test_plot_xy(self):
- # columns.inferred_type == 'string'
- df = self.tdf
- self._check_data(df.plot(x=0, y=1),
- df.set_index('A')['B'].plot())
- self._check_data(df.plot(x=0), df.set_index('A').plot())
- self._check_data(df.plot(y=0), df.B.plot())
- self._check_data(df.plot(x='A', y='B'),
- df.set_index('A').B.plot())
- self._check_data(df.plot(x='A'), df.set_index('A').plot())
- self._check_data(df.plot(y='B'), df.B.plot())
- # columns.inferred_type == 'integer'
- df.columns = lrange(1, len(df.columns) + 1)
- self._check_data(df.plot(x=1, y=2),
- df.set_index(1)[2].plot())
- self._check_data(df.plot(x=1), df.set_index(1).plot())
- self._check_data(df.plot(y=1), df[1].plot())
- # figsize and title
- ax = df.plot(x=1, y=2, title='Test', figsize=(16, 8))
- self._check_text_labels(ax.title, 'Test')
- self._check_axes_shape(ax, axes_num=1, layout=(1, 1), figsize=(16., 8.))
- # columns.inferred_type == 'mixed'
- # TODO add MultiIndex test
- @slow
- def test_logscales(self):
- df = DataFrame({'a': np.arange(100)},
- index=np.arange(100))
- ax = df.plot(logy=True)
- self._check_ax_scales(ax, yaxis='log')
- ax = df.plot(logx=True)
- self._check_ax_scales(ax, xaxis='log')
- ax = df.plot(loglog=True)
- self._check_ax_scales(ax, xaxis='log', yaxis='log')
- @slow
- def test_xcompat(self):
- import pandas as pd
- df = self.tdf
- ax = df.plot(x_compat=True)
- lines = ax.get_lines()
- self.assertNotIsInstance(lines[0].get_xdata(), PeriodIndex)
- tm.close()
- pd.plot_params['xaxis.compat'] = True
- ax = df.plot()
- lines = ax.get_lines()
- self.assertNotIsInstance(lines[0].get_xdata(), PeriodIndex)
- tm.close()
- pd.plot_params['x_compat'] = False
- ax = df.plot()
- lines = ax.get_lines()
- tm.assert_isinstance(lines[0].get_xdata(), PeriodIndex)
- tm.close()
- # useful if you're plotting a bunch together
- with pd.plot_params.use('x_compat', True):
- ax = df.plot()
- lines = ax.get_lines()
- self.assertNotIsInstance(lines[0].get_xdata(), PeriodIndex)
- tm.close()
- ax = df.plot()
- lines = ax.get_lines()
- tm.assert_isinstance(lines[0].get_xdata(), PeriodIndex)
- def test_unsorted_index(self):
- df = DataFrame({'y': np.arange(100)},
- index=np.arange(99, -1, -1), dtype=np.int64)
- ax = df.plot()
- l = ax.get_lines()[0]
- rs = l.get_xydata()
- rs = Series(rs[:, 1], rs[:, 0], dtype=np.int64)
- tm.assert_series_equal(rs, df.y)
- @slow
- def test_subplots(self):
- df = DataFrame(np.random.rand(10, 3),
- index=list(string.ascii_letters[:10]))
- for kind in ['bar', 'barh', 'line', 'area']:
- axes = df.plot(kind=kind, subplots=True, sharex=True, legend=True)
- self._check_axes_shape(axes, axes_num=3, layout=(3, 1))
- for ax, column in zip(axes, df.columns):
- self._check_legend_labels(ax, labels=[com.pprint_thing(column)])
- for ax in axes[:-2]:
- self._check_visible(ax.get_xticklabels(), visible=False)
- self._check_visible(ax.get_yticklabels())
- self._check_visible(axes[-1].get_xticklabels())
- self._check_visible(axes[-1].get_yticklabels())
- axes = df.plot(kind=kind, subplots=True, sharex=False)
- for ax in axes:
- self._check_visible(ax.get_xticklabels())
- self._check_visible(ax.get_yticklabels())
- axes = df.plot(kind=kind, subplots=True, legend=False)
- for ax in axes:
- self.assertTrue(ax.get_legend() is None)
- def test_negative_log(self):
- df = - DataFrame(rand(6, 4),
- index=list(string.ascii_letters[:6]),
- columns=['x', 'y', 'z', 'four'])
- with tm.assertRaises(ValueError):
- df.plot(kind='area', logy=True)
- with tm.assertRaises(ValueError):
- df.plot(kind='area', loglog=True)
- def _compare_stacked_y_cood(self, normal_lines, stacked_lines):
- base = np.zeros(len(normal_lines[0].get_data()[1]))
- for nl, sl in zip(normal_lines, stacked_lines):
- base += nl.get_data()[1] # get y coodinates
- sy = sl.get_data()[1]
- self.assert_numpy_array_equal(base, sy)
- def test_line_area_stacked(self):
- with tm.RNGContext(42):
- df = DataFrame(rand(6, 4),
- columns=['w', 'x', 'y', 'z'])
- neg_df = - df
- # each column has either positive or negative value
- sep_df = DataFrame({'w': rand(6), 'x': rand(6),
- 'y': - rand(6), 'z': - rand(6)})
- # each column has positive-negative mixed value
- mixed_df = DataFrame(randn(6, 4), index=list(string.ascii_letters[:6]),
- columns=['w', 'x', 'y', 'z'])
- for kind in ['line', 'area']:
- ax1 = _check_plot_works(df.plot, kind=kind, stacked=False)
- ax2 = _check_plot_works(df.plot, kind=kind, stacked=True)
- self._compare_stacked_y_cood(ax1.lines, ax2.lines)
- ax1 = _check_plot_works(neg_df.plot, kind=kind, stacked=False)
- ax2 = _check_plot_works(neg_df.plot, kind=kind, stacked=True)
- self._compare_stacked_y_cood(ax1.lines, ax2.lines)
- ax1 = _check_plot_works(sep_df.plot, kind=kind, stacked=False)
- ax2 = _check_plot_works(sep_df.plot, kind=kind, stacked=True)
- self._compare_stacked_y_cood(ax1.lines[:2], ax2.lines[:2])
- self._compare_stacked_y_cood(ax1.lines[2:], ax2.lines[2:])
- _check_plot_works(mixed_df.plot, stacked=False)
- with tm.assertRaises(ValueError):
- mixed_df.plot(stacked=True)
- _check_plot_works(df.plot, kind=kind, logx=True, stacked=True)
- def test_line_area_nan_df(self):
- values1 = [1, 2, np.nan, 3]
- values2 = [3, np.nan, 2, 1]
- df = DataFrame({'a': values1, 'b': values2})
- tdf = DataFrame({'a': values1, 'b': values2}, index=tm.makeDateIndex(k=4))
- for d in [df, tdf]:
- ax = _check_plot_works(d.plot)
- masked1 = ax.lines[0].get_ydata()
- masked2 = ax.lines[1].get_ydata()
- # remove nan for comparison purpose
- self.assert_numpy_array_equal(np.delete(masked1.data, 2), np.array([1, 2, 3]))
- self.assert_numpy_array_equal(np.delete(masked2.data, 1), np.array([3, 2, 1]))
- self.assert_numpy_array_equal(masked1.mask, np.array([False, False, True, False]))
- self.assert_numpy_array_equal(masked2.mask, np.array([False, True, False, False]))
- expected1 = np.array([1, 2, 0, 3])
- expected2 = np.array([3, 0, 2, 1])
- ax = _check_plot_works(d.plot, stacked=True)
- self.assert_numpy_array_equal(ax.lines[0].get_ydata(), expected1)
- self.assert_numpy_array_equal(ax.lines[1].get_ydata(), expected1 + expected2)
- ax = _check_plot_works(d.plot, kind='area')
- self.assert_numpy_array_equal(ax.lines[0].get_ydata(), expected1)
- self.assert_numpy_array_equal(ax.lines[1].get_ydata(), expected1 + expected2)
- ax = _check_plot_works(d.plot, kind='area', stacked=False)
- self.assert_numpy_array_equal(ax.lines[0].get_ydata(), expected1)
- self.assert_numpy_array_equal(ax.lines[1].get_ydata(), expected2)
- def test_line_lim(self):
- df = DataFrame(rand(6, 3), columns=['x', 'y', 'z'])
- ax = df.plot()
- xmin, xmax = ax.get_xlim()
- lines = ax.get_lines()
- self.assertEqual(xmin, lines[0].get_data()[0][0])
- self.assertEqual(xmax, lines[0].get_data()[0][-1])
- ax = df.plot(secondary_y=True)
- xmin, xmax = ax.get_xlim()
- lines = ax.get_lines()
- self.assertEqual(xmin, lines[0].get_data()[0][0])
- self.assertEqual(xmax, lines[0].get_data()[0][-1])
- axes = df.plot(secondary_y=True, subplots=True)
- for ax in axes:
- xmin, xmax = ax.get_xlim()
- lines = ax.get_lines()
- self.assertEqual(xmin, lines[0].get_data()[0][0])
- self.assertEqual(xmax, lines[0].get_data()[0][-1])
- def test_area_lim(self):
- df = DataFrame(rand(6, 4),
- columns=['x', 'y', 'z', 'four'])
- neg_df = - df
- for stacked in [True, False]:
- ax = _check_plot_works(df.plot, kind='area', stacked=stacked)
- xmin, xmax = ax.get_xlim()
- ymin, ymax = ax.get_ylim()
- lines = ax.get_lines()
- self.assertEqual(xmin, lines[0].get_data()[0][0])
- self.assertEqual(xmax, lines[0].get_data()[0][-1])
- self.assertEqual(ymin, 0)
- ax = _check_plot_works(neg_df.plot, kind='area', stacked=stacked)
- ymin, ymax = ax.get_ylim()
- self.assertEqual(ymax, 0)
- @slow
- def test_bar_colors(self):
- import matplotlib.pyplot as plt
- default_colors = plt.rcParams.get('axes.color_cycle')
- df = DataFrame(randn(5, 5))
- ax = df.plot(kind='bar')
- self._check_colors(ax.patches[::5], facecolors=default_colors[:5])
- tm.close()
- custom_colors = 'rgcby'
- ax = df.plot(kind='bar', color=custom_colors)
- self._check_colors(ax.patches[::5], facecolors=custom_colors)
- tm.close()
- from matplotlib import cm
- # Test str -> colormap functionality
- ax = df.plot(kind='bar', colormap='jet')
- rgba_colors = lmap(cm.jet, np.linspace(0, 1, 5))
- self._check_colors(ax.patches[::5], facecolors=rgba_colors)
- tm.close()
- # Test colormap functionality
- ax = df.plot(kind='bar', colormap=cm.jet)
- rgba_colors = lmap(cm.jet, np.linspace(0, 1, 5))
- self._check_colors(ax.patches[::5], facecolors=rgba_colors)
- tm.close()
- ax = df.ix[:, [0]].plot(kind='bar', color='DodgerBlue')
- self._check_colors([ax.patches[0]], facecolors=['DodgerBlue'])
- @slow
- def test_bar_linewidth(self):
- df = DataFrame(randn(5, 5))
- # regular
- ax = df.plot(kind='bar', linewidth=2)
- for r in ax.patches:
- self.assertEqual(r.get_linewidth(), 2)
- # stacked
- ax = df.plot(kind='bar', stacked=True, linewidth=2)
- for r in ax.patches:
- self.assertEqual(r.get_linewidth(), 2)
- # subplots
- axes = df.plot(kind='bar', linewidth=2, subplots=True)
- self._check_axes_shape(axes, axes_num=5, layout=(5, 1))
- for ax in axes:
- for r in ax.patches:
- self.assertEqual(r.get_linewidth(), 2)
- @slow
- def test_bar_barwidth(self):
- df = DataFrame(randn(5, 5))
- width = 0.9
- # regular
- ax = df.plot(kind='bar', width=width)
- for r in ax.patches:
- self.assertEqual(r.get_width(), width / len(df.columns))
- # stacked
- ax = df.plot(kind='bar', stacked=True, width=width)
- for r in ax.patches:
- self.assertEqual(r.get_width(), width)
- # horizontal regular
- ax = df.plot(kind='barh', width=width)
- for r in ax.patches:
- self.assertEqual(r.get_height(), width / len(df.columns))
- # horizontal stacked
- ax = df.plot(kind='barh', stacked=True, width=width)
- for r in ax.patches:
- self.assertEqual(r.get_height(), width)
- # subplots
- axes = df.plot(kind='bar', width=width, subplots=True)
- for ax in axes:
- for r in ax.patches:
- self.assertEqual(r.get_width(), width)
- # horizontal subplots
- axes = df.plot(kind='barh', width=width, subplots=True)
- for ax in axes:
- for r in ax.patches:
- self.assertEqual(r.get_height(), width)
- @slow
- def test_bar_barwidth_position(self):
- df = DataFrame(randn(5, 5))
- self._check_bar_alignment(df, kind='bar', stacked=False, width=0.9, position=0.2)
- self._check_bar_alignment(df, kind='bar', stacked=True, width=0.9, position=0.2)
- self._check_bar_alignment(df, kind='barh', stacked=False, width=0.9, position=0.2)
- self._check_bar_alignment(df, kind='barh', stacked=True, width=0.9, position=0.2)
- self._check_bar_alignment(df, kind='bar', subplots=True, width=0.9, position=0.2)
- self._check_bar_alignment(df, kind='barh', subplots=True, width=0.9, position=0.2)
- @slow
- def test_bar_bottom_left(self):
- df = DataFrame(rand(5, 5))
- ax = df.plot(kind='bar', stacked=False, bottom=1)
- result = [p.get_y() for p in ax.patches]
- self.assertEqual(result, [1] * 25)
- ax = df.plot(kind='bar', stacked=True, bottom=[-1, -2, -3, -4, -5])
- result = [p.get_y() for p in ax.patches[:5]]
- self.assertEqual(result, [-1, -2, -3, -4, -5])
- ax = df.plot(kind='barh', stacked=False, left=np.array([1, 1, 1, 1, 1]))
- result = [p.get_x() for p in ax.patches]
- self.assertEqual(result, [1] * 25)
- ax = df.plot(kind='barh', stacked=True, left=[1, 2, 3, 4, 5])
- result = [p.get_x() for p in ax.patches[:5]]
- self.assertEqual(result, [1, 2, 3, 4, 5])
- axes = df.plot(kind='bar', subplots=True, bottom=-1)
- for ax in axes:
- result = [p.get_y() for p in ax.patches]
- self.assertEqual(result, [-1] * 5)
- axes = df.plot(kind='barh', subplots=True, left=np.array([1, 1, 1, 1, 1]))
- for ax in axes:
- result = [p.get_x() for p in ax.patches]
- self.assertEqual(result, [1] * 5)
- @slow
- def test_plot_scatter(self):
- df = DataFrame(randn(6, 4),
- index=list(string.ascii_letters[:6]),
- columns=['x', 'y', 'z', 'four'])
- _check_plot_works(df.plot, x='x', y='y', kind='scatter')
- _check_plot_works(df.plot, x=1, y=2, kind='scatter')
- with tm.assertRaises(ValueError):
- df.plot(x='x', kind='scatter')
- with tm.assertRaises(ValueError):
- df.plot(y='y', kind='scatter')
- # GH 6951
- axes = df.plot(x='x', y='y', kind='scatter', subplots=True)
- self._check_axes_shape(axes, axes_num=1, layout=(1, 1))
- @slow
- def test_plot_bar(self):
- df = DataFrame(randn(6, 4),
- index=list(string.ascii_letters[:6]),
- columns=['one', 'two', 'three', 'four'])
- _check_plot_works(df.plot, kind='bar')
- _check_plot_works(df.plot, kind='bar', legend=False)
- _check_plot_works(df.plot, kind='bar', subplots=True)
- _check_plot_works(df.plot, kind='bar', stacked=True)
- df = DataFrame(randn(10, 15),
- index=list(string.ascii_letters[:10]),
- columns=lrange(15))
- _check_plot_works(df.plot, kind='bar')
- df = DataFrame({'a': [0, 1], 'b': [1, 0]})
- _check_plot_works(df.plot, kind='bar')
- def _check_bar_alignment(self, df, kind='bar', stacked=False,
- subplots=False, align='center',
- width=0.5, position=0.5):
- axes = df.plot(kind=kind, stacked=stacked, subplots=subplots,
- align=align, width=width, position=position,
- grid=True)
- axes = self._flatten_visible(axes)
- for ax in axes:
- if kind == 'bar':
- axis = ax.xaxis
- ax_min, ax_max = ax.get_xlim()
- min_edge = min([p.get_x() for p in ax.patches])
- max_edge = max([p.get_x() + p.get_width() for p in ax.patches])
- elif kind == 'barh':
- axis = ax.yaxis
- …
Large files files are truncated, but you can click here to view the full file