/HousePrices/PythonSolution/venv/Lib/site-packages/sklearn/model_selection/tests/test_validation.py
Python | 1563 lines | 1136 code | 288 blank | 139 comment | 68 complexity | 06dd6279220eac171f1bd33514ad4a1f MD5 | raw file
Possible License(s): BSD-3-Clause, Apache-2.0, 0BSD, BSD-2-Clause, MPL-2.0-no-copyleft-exception, LGPL-3.0
Large files files are truncated, but you can click here to view the full file
- """Test the validation module"""
- from __future__ import division
- import sys
- import warnings
- import tempfile
- import os
- from time import sleep
- import pytest
- import numpy as np
- from scipy.sparse import coo_matrix, csr_matrix
- from sklearn.exceptions import FitFailedWarning
- from sklearn.model_selection.tests.test_search import FailingClassifier
- from sklearn.utils.testing import assert_true
- from sklearn.utils.testing import assert_false
- from sklearn.utils.testing import assert_equal
- from sklearn.utils.testing import assert_almost_equal
- from sklearn.utils.testing import assert_raises
- from sklearn.utils.testing import assert_raise_message
- from sklearn.utils.testing import assert_warns
- from sklearn.utils.testing import assert_warns_message
- from sklearn.utils.testing import assert_no_warnings
- from sklearn.utils.testing import assert_raises_regex
- from sklearn.utils.testing import assert_greater
- from sklearn.utils.testing import assert_less
- from sklearn.utils.testing import assert_array_almost_equal
- from sklearn.utils.testing import assert_array_equal
- from sklearn.utils.mocking import CheckingClassifier, MockDataFrame
- from sklearn.model_selection import cross_val_score
- from sklearn.model_selection import cross_val_predict
- from sklearn.model_selection import cross_validate
- from sklearn.model_selection import permutation_test_score
- from sklearn.model_selection import KFold
- from sklearn.model_selection import StratifiedKFold
- from sklearn.model_selection import LeaveOneOut
- from sklearn.model_selection import LeaveOneGroupOut
- from sklearn.model_selection import LeavePGroupsOut
- from sklearn.model_selection import GroupKFold
- from sklearn.model_selection import GroupShuffleSplit
- from sklearn.model_selection import learning_curve
- from sklearn.model_selection import validation_curve
- from sklearn.model_selection._validation import _check_is_permutation
- from sklearn.model_selection._validation import _fit_and_score
- from sklearn.datasets import make_regression
- from sklearn.datasets import load_boston
- from sklearn.datasets import load_iris
- from sklearn.datasets import load_digits
- from sklearn.metrics import explained_variance_score
- from sklearn.metrics import make_scorer
- from sklearn.metrics import accuracy_score
- from sklearn.metrics import confusion_matrix
- from sklearn.metrics import precision_recall_fscore_support
- from sklearn.metrics import precision_score
- from sklearn.metrics import r2_score
- from sklearn.metrics.scorer import check_scoring
- from sklearn.linear_model import Ridge, LogisticRegression, SGDClassifier
- from sklearn.linear_model import PassiveAggressiveClassifier, RidgeClassifier
- from sklearn.neighbors import KNeighborsClassifier
- from sklearn.svm import SVC
- from sklearn.cluster import KMeans
- from sklearn.impute import SimpleImputer
- from sklearn.preprocessing import LabelEncoder
- from sklearn.pipeline import Pipeline
- from sklearn.externals.six.moves import cStringIO as StringIO
- from sklearn.base import BaseEstimator
- from sklearn.base import clone
- from sklearn.multiclass import OneVsRestClassifier
- from sklearn.utils import shuffle
- from sklearn.datasets import make_classification
- from sklearn.datasets import make_multilabel_classification
- from sklearn.model_selection.tests.common import OneTimeSplitter
- from sklearn.model_selection import GridSearchCV
- try:
- WindowsError
- except NameError:
- WindowsError = None
- class MockImprovingEstimator(BaseEstimator):
- """Dummy classifier to test the learning curve"""
- def __init__(self, n_max_train_sizes):
- self.n_max_train_sizes = n_max_train_sizes
- self.train_sizes = 0
- self.X_subset = None
- def fit(self, X_subset, y_subset=None):
- self.X_subset = X_subset
- self.train_sizes = X_subset.shape[0]
- return self
- def predict(self, X):
- raise NotImplementedError
- def score(self, X=None, Y=None):
- # training score becomes worse (2 -> 1), test error better (0 -> 1)
- if self._is_training_data(X):
- return 2. - float(self.train_sizes) / self.n_max_train_sizes
- else:
- return float(self.train_sizes) / self.n_max_train_sizes
- def _is_training_data(self, X):
- return X is self.X_subset
- class MockIncrementalImprovingEstimator(MockImprovingEstimator):
- """Dummy classifier that provides partial_fit"""
- def __init__(self, n_max_train_sizes):
- super(MockIncrementalImprovingEstimator,
- self).__init__(n_max_train_sizes)
- self.x = None
- def _is_training_data(self, X):
- return self.x in X
- def partial_fit(self, X, y=None, **params):
- self.train_sizes += X.shape[0]
- self.x = X[0]
- class MockEstimatorWithParameter(BaseEstimator):
- """Dummy classifier to test the validation curve"""
- def __init__(self, param=0.5):
- self.X_subset = None
- self.param = param
- def fit(self, X_subset, y_subset):
- self.X_subset = X_subset
- self.train_sizes = X_subset.shape[0]
- return self
- def predict(self, X):
- raise NotImplementedError
- def score(self, X=None, y=None):
- return self.param if self._is_training_data(X) else 1 - self.param
- def _is_training_data(self, X):
- return X is self.X_subset
- class MockEstimatorWithSingleFitCallAllowed(MockEstimatorWithParameter):
- """Dummy classifier that disallows repeated calls of fit method"""
- def fit(self, X_subset, y_subset):
- assert_false(
- hasattr(self, 'fit_called_'),
- 'fit is called the second time'
- )
- self.fit_called_ = True
- return super(type(self), self).fit(X_subset, y_subset)
- def predict(self, X):
- raise NotImplementedError
- class MockClassifier(object):
- """Dummy classifier to test the cross-validation"""
- def __init__(self, a=0, allow_nd=False):
- self.a = a
- self.allow_nd = allow_nd
- def fit(self, X, Y=None, sample_weight=None, class_prior=None,
- sparse_sample_weight=None, sparse_param=None, dummy_int=None,
- dummy_str=None, dummy_obj=None, callback=None):
- """The dummy arguments are to test that this fit function can
- accept non-array arguments through cross-validation, such as:
- - int
- - str (this is actually array-like)
- - object
- - function
- """
- self.dummy_int = dummy_int
- self.dummy_str = dummy_str
- self.dummy_obj = dummy_obj
- if callback is not None:
- callback(self)
- if self.allow_nd:
- X = X.reshape(len(X), -1)
- if X.ndim >= 3 and not self.allow_nd:
- raise ValueError('X cannot be d')
- if sample_weight is not None:
- assert_true(sample_weight.shape[0] == X.shape[0],
- 'MockClassifier extra fit_param sample_weight.shape[0]'
- ' is {0}, should be {1}'.format(sample_weight.shape[0],
- X.shape[0]))
- if class_prior is not None:
- assert_true(class_prior.shape[0] == len(np.unique(y)),
- 'MockClassifier extra fit_param class_prior.shape[0]'
- ' is {0}, should be {1}'.format(class_prior.shape[0],
- len(np.unique(y))))
- if sparse_sample_weight is not None:
- fmt = ('MockClassifier extra fit_param sparse_sample_weight'
- '.shape[0] is {0}, should be {1}')
- assert_true(sparse_sample_weight.shape[0] == X.shape[0],
- fmt.format(sparse_sample_weight.shape[0], X.shape[0]))
- if sparse_param is not None:
- fmt = ('MockClassifier extra fit_param sparse_param.shape '
- 'is ({0}, {1}), should be ({2}, {3})')
- assert_true(sparse_param.shape == P_sparse.shape,
- fmt.format(sparse_param.shape[0],
- sparse_param.shape[1],
- P_sparse.shape[0], P_sparse.shape[1]))
- return self
- def predict(self, T):
- if self.allow_nd:
- T = T.reshape(len(T), -1)
- return T[:, 0]
- def score(self, X=None, Y=None):
- return 1. / (1 + np.abs(self.a))
- def get_params(self, deep=False):
- return {'a': self.a, 'allow_nd': self.allow_nd}
- # XXX: use 2D array, since 1D X is being detected as a single sample in
- # check_consistent_length
- X = np.ones((10, 2))
- X_sparse = coo_matrix(X)
- y = np.array([0, 0, 1, 1, 2, 2, 3, 3, 4, 4])
- # The number of samples per class needs to be > n_splits,
- # for StratifiedKFold(n_splits=3)
- y2 = np.array([1, 1, 1, 2, 2, 2, 3, 3, 3, 3])
- P_sparse = coo_matrix(np.eye(5))
- @pytest.mark.filterwarnings('ignore: You should specify a value') # 0.22
- def test_cross_val_score():
- clf = MockClassifier()
- for a in range(-10, 10):
- clf.a = a
- # Smoke test
- scores = cross_val_score(clf, X, y2)
- assert_array_equal(scores, clf.score(X, y2))
- # test with multioutput y
- multioutput_y = np.column_stack([y2, y2[::-1]])
- scores = cross_val_score(clf, X_sparse, multioutput_y)
- assert_array_equal(scores, clf.score(X_sparse, multioutput_y))
- scores = cross_val_score(clf, X_sparse, y2)
- assert_array_equal(scores, clf.score(X_sparse, y2))
- # test with multioutput y
- scores = cross_val_score(clf, X_sparse, multioutput_y)
- assert_array_equal(scores, clf.score(X_sparse, multioutput_y))
- # test with X and y as list
- list_check = lambda x: isinstance(x, list)
- clf = CheckingClassifier(check_X=list_check)
- scores = cross_val_score(clf, X.tolist(), y2.tolist())
- clf = CheckingClassifier(check_y=list_check)
- scores = cross_val_score(clf, X, y2.tolist())
- assert_raises(ValueError, cross_val_score, clf, X, y2, scoring="sklearn")
- # test with 3d X and
- X_3d = X[:, :, np.newaxis]
- clf = MockClassifier(allow_nd=True)
- scores = cross_val_score(clf, X_3d, y2)
- clf = MockClassifier(allow_nd=False)
- assert_raises(ValueError, cross_val_score, clf, X_3d, y2,
- error_score='raise')
- @pytest.mark.filterwarnings('ignore:You should specify a value for') # 0.22
- def test_cross_validate_many_jobs():
- # regression test for #12154: cv='warn' with n_jobs>1 trigger a copy of
- # the parameters leading to a failure in check_cv due to cv is 'warn'
- # instead of cv == 'warn'.
- X, y = load_iris(return_X_y=True)
- clf = SVC(gamma='auto')
- grid = GridSearchCV(clf, param_grid={'C': [1, 10]})
- cross_validate(grid, X, y, n_jobs=2)
- @pytest.mark.filterwarnings('ignore: You should specify a value') # 0.22
- def test_cross_validate_invalid_scoring_param():
- X, y = make_classification(random_state=0)
- estimator = MockClassifier()
- # Test the errors
- error_message_regexp = ".*must be unique strings.*"
- # List/tuple of callables should raise a message advising users to use
- # dict of names to callables mapping
- assert_raises_regex(ValueError, error_message_regexp,
- cross_validate, estimator, X, y,
- scoring=(make_scorer(precision_score),
- make_scorer(accuracy_score)))
- assert_raises_regex(ValueError, error_message_regexp,
- cross_validate, estimator, X, y,
- scoring=(make_scorer(precision_score),))
- # So should empty lists/tuples
- assert_raises_regex(ValueError, error_message_regexp + "Empty list.*",
- cross_validate, estimator, X, y, scoring=())
- # So should duplicated entries
- assert_raises_regex(ValueError, error_message_regexp + "Duplicate.*",
- cross_validate, estimator, X, y,
- scoring=('f1_micro', 'f1_micro'))
- # Nested Lists should raise a generic error message
- assert_raises_regex(ValueError, error_message_regexp,
- cross_validate, estimator, X, y,
- scoring=[[make_scorer(precision_score)]])
- error_message_regexp = (".*should either be.*string or callable.*for "
- "single.*.*dict.*for multi.*")
- # Empty dict should raise invalid scoring error
- assert_raises_regex(ValueError, "An empty dict",
- cross_validate, estimator, X, y, scoring=(dict()))
- # And so should any other invalid entry
- assert_raises_regex(ValueError, error_message_regexp,
- cross_validate, estimator, X, y, scoring=5)
- multiclass_scorer = make_scorer(precision_recall_fscore_support)
- # Multiclass Scorers that return multiple values are not supported yet
- assert_raises_regex(ValueError,
- "Classification metrics can't handle a mix of "
- "binary and continuous targets",
- cross_validate, estimator, X, y,
- scoring=multiclass_scorer)
- assert_raises_regex(ValueError,
- "Classification metrics can't handle a mix of "
- "binary and continuous targets",
- cross_validate, estimator, X, y,
- scoring={"foo": multiclass_scorer})
- multivalued_scorer = make_scorer(confusion_matrix)
- # Multiclass Scorers that return multiple values are not supported yet
- assert_raises_regex(ValueError, "scoring must return a number, got",
- cross_validate, SVC(gamma='scale'), X, y,
- scoring=multivalued_scorer)
- assert_raises_regex(ValueError, "scoring must return a number, got",
- cross_validate, SVC(gamma='scale'), X, y,
- scoring={"foo": multivalued_scorer})
- assert_raises_regex(ValueError, "'mse' is not a valid scoring value.",
- cross_validate, SVC(), X, y, scoring="mse")
- def test_cross_validate():
- # Compute train and test mse/r2 scores
- cv = KFold(n_splits=5)
- # Regression
- X_reg, y_reg = make_regression(n_samples=30, random_state=0)
- reg = Ridge(random_state=0)
- # Classification
- X_clf, y_clf = make_classification(n_samples=30, random_state=0)
- clf = SVC(kernel="linear", random_state=0)
- for X, y, est in ((X_reg, y_reg, reg), (X_clf, y_clf, clf)):
- # It's okay to evaluate regression metrics on classification too
- mse_scorer = check_scoring(est, 'neg_mean_squared_error')
- r2_scorer = check_scoring(est, 'r2')
- train_mse_scores = []
- test_mse_scores = []
- train_r2_scores = []
- test_r2_scores = []
- fitted_estimators = []
- for train, test in cv.split(X, y):
- est = clone(reg).fit(X[train], y[train])
- train_mse_scores.append(mse_scorer(est, X[train], y[train]))
- train_r2_scores.append(r2_scorer(est, X[train], y[train]))
- test_mse_scores.append(mse_scorer(est, X[test], y[test]))
- test_r2_scores.append(r2_scorer(est, X[test], y[test]))
- fitted_estimators.append(est)
- train_mse_scores = np.array(train_mse_scores)
- test_mse_scores = np.array(test_mse_scores)
- train_r2_scores = np.array(train_r2_scores)
- test_r2_scores = np.array(test_r2_scores)
- fitted_estimators = np.array(fitted_estimators)
- scores = (train_mse_scores, test_mse_scores, train_r2_scores,
- test_r2_scores, fitted_estimators)
- check_cross_validate_single_metric(est, X, y, scores)
- check_cross_validate_multi_metric(est, X, y, scores)
- def test_cross_validate_return_train_score_warn():
- # Test that warnings are raised. Will be removed in 0.21
- X, y = make_classification(random_state=0)
- estimator = MockClassifier()
- result = {}
- for val in [False, True, 'warn']:
- result[val] = assert_no_warnings(cross_validate, estimator, X, y,
- return_train_score=val, cv=5)
- msg = (
- 'You are accessing a training score ({!r}), '
- 'which will not be available by default '
- 'any more in 0.21. If you need training scores, '
- 'please set return_train_score=True').format('train_score')
- train_score = assert_warns_message(FutureWarning, msg,
- result['warn'].get, 'train_score')
- assert np.allclose(train_score, result[True]['train_score'])
- assert 'train_score' not in result[False]
- def check_cross_validate_single_metric(clf, X, y, scores):
- (train_mse_scores, test_mse_scores, train_r2_scores,
- test_r2_scores, fitted_estimators) = scores
- # Test single metric evaluation when scoring is string or singleton list
- for (return_train_score, dict_len) in ((True, 4), (False, 3)):
- # Single metric passed as a string
- if return_train_score:
- # It must be True by default - deprecated
- mse_scores_dict = cross_validate(clf, X, y, cv=5,
- scoring='neg_mean_squared_error',
- return_train_score=True)
- assert_array_almost_equal(mse_scores_dict['train_score'],
- train_mse_scores)
- else:
- mse_scores_dict = cross_validate(clf, X, y, cv=5,
- scoring='neg_mean_squared_error',
- return_train_score=False)
- assert isinstance(mse_scores_dict, dict)
- assert_equal(len(mse_scores_dict), dict_len)
- assert_array_almost_equal(mse_scores_dict['test_score'],
- test_mse_scores)
- # Single metric passed as a list
- if return_train_score:
- # It must be True by default - deprecated
- r2_scores_dict = cross_validate(clf, X, y, cv=5, scoring=['r2'],
- return_train_score=True)
- assert_array_almost_equal(r2_scores_dict['train_r2'],
- train_r2_scores, True)
- else:
- r2_scores_dict = cross_validate(clf, X, y, cv=5, scoring=['r2'],
- return_train_score=False)
- assert isinstance(r2_scores_dict, dict)
- assert_equal(len(r2_scores_dict), dict_len)
- assert_array_almost_equal(r2_scores_dict['test_r2'], test_r2_scores)
- # Test return_estimator option
- mse_scores_dict = cross_validate(clf, X, y, cv=5,
- scoring='neg_mean_squared_error',
- return_estimator=True)
- for k, est in enumerate(mse_scores_dict['estimator']):
- assert_almost_equal(est.coef_, fitted_estimators[k].coef_)
- assert_almost_equal(est.intercept_, fitted_estimators[k].intercept_)
- def check_cross_validate_multi_metric(clf, X, y, scores):
- # Test multimetric evaluation when scoring is a list / dict
- (train_mse_scores, test_mse_scores, train_r2_scores,
- test_r2_scores, fitted_estimators) = scores
- all_scoring = (('r2', 'neg_mean_squared_error'),
- {'r2': make_scorer(r2_score),
- 'neg_mean_squared_error': 'neg_mean_squared_error'})
- keys_sans_train = set(('test_r2', 'test_neg_mean_squared_error',
- 'fit_time', 'score_time'))
- keys_with_train = keys_sans_train.union(
- set(('train_r2', 'train_neg_mean_squared_error')))
- for return_train_score in (True, False):
- for scoring in all_scoring:
- if return_train_score:
- # return_train_score must be True by default - deprecated
- cv_results = cross_validate(clf, X, y, cv=5, scoring=scoring,
- return_train_score=True)
- assert_array_almost_equal(cv_results['train_r2'],
- train_r2_scores)
- assert_array_almost_equal(
- cv_results['train_neg_mean_squared_error'],
- train_mse_scores)
- else:
- cv_results = cross_validate(clf, X, y, cv=5, scoring=scoring,
- return_train_score=False)
- assert isinstance(cv_results, dict)
- assert_equal(set(cv_results.keys()),
- keys_with_train if return_train_score
- else keys_sans_train)
- assert_array_almost_equal(cv_results['test_r2'], test_r2_scores)
- assert_array_almost_equal(
- cv_results['test_neg_mean_squared_error'], test_mse_scores)
- # Make sure all the arrays are of np.ndarray type
- assert type(cv_results['test_r2']) == np.ndarray
- assert (type(cv_results['test_neg_mean_squared_error']) ==
- np.ndarray)
- assert type(cv_results['fit_time']) == np.ndarray
- assert type(cv_results['score_time']) == np.ndarray
- # Ensure all the times are within sane limits
- assert np.all(cv_results['fit_time'] >= 0)
- assert np.all(cv_results['fit_time'] < 10)
- assert np.all(cv_results['score_time'] >= 0)
- assert np.all(cv_results['score_time'] < 10)
- @pytest.mark.filterwarnings('ignore: You should specify a value') # 0.22
- def test_cross_val_score_predict_groups():
- # Check if ValueError (when groups is None) propagates to cross_val_score
- # and cross_val_predict
- # And also check if groups is correctly passed to the cv object
- X, y = make_classification(n_samples=20, n_classes=2, random_state=0)
- clf = SVC(kernel="linear")
- group_cvs = [LeaveOneGroupOut(), LeavePGroupsOut(2), GroupKFold(),
- GroupShuffleSplit()]
- for cv in group_cvs:
- assert_raise_message(ValueError,
- "The 'groups' parameter should not be None.",
- cross_val_score, estimator=clf, X=X, y=y, cv=cv)
- assert_raise_message(ValueError,
- "The 'groups' parameter should not be None.",
- cross_val_predict, estimator=clf, X=X, y=y, cv=cv)
- @pytest.mark.filterwarnings('ignore: Using or importing the ABCs from')
- @pytest.mark.filterwarnings('ignore: You should specify a value') # 0.22
- def test_cross_val_score_pandas():
- # check cross_val_score doesn't destroy pandas dataframe
- types = [(MockDataFrame, MockDataFrame)]
- try:
- from pandas import Series, DataFrame
- types.append((Series, DataFrame))
- except ImportError:
- pass
- for TargetType, InputFeatureType in types:
- # X dataframe, y series
- # 3 fold cross val is used so we need atleast 3 samples per class
- X_df, y_ser = InputFeatureType(X), TargetType(y2)
- check_df = lambda x: isinstance(x, InputFeatureType)
- check_series = lambda x: isinstance(x, TargetType)
- clf = CheckingClassifier(check_X=check_df, check_y=check_series)
- cross_val_score(clf, X_df, y_ser)
- def test_cross_val_score_mask():
- # test that cross_val_score works with boolean masks
- svm = SVC(kernel="linear")
- iris = load_iris()
- X, y = iris.data, iris.target
- kfold = KFold(5)
- scores_indices = cross_val_score(svm, X, y, cv=kfold)
- kfold = KFold(5)
- cv_masks = []
- for train, test in kfold.split(X, y):
- mask_train = np.zeros(len(y), dtype=np.bool)
- mask_test = np.zeros(len(y), dtype=np.bool)
- mask_train[train] = 1
- mask_test[test] = 1
- cv_masks.append((train, test))
- scores_masks = cross_val_score(svm, X, y, cv=cv_masks)
- assert_array_equal(scores_indices, scores_masks)
- @pytest.mark.filterwarnings('ignore: You should specify a value') # 0.22
- def test_cross_val_score_precomputed():
- # test for svm with precomputed kernel
- svm = SVC(kernel="precomputed")
- iris = load_iris()
- X, y = iris.data, iris.target
- linear_kernel = np.dot(X, X.T)
- score_precomputed = cross_val_score(svm, linear_kernel, y)
- svm = SVC(kernel="linear")
- score_linear = cross_val_score(svm, X, y)
- assert_array_almost_equal(score_precomputed, score_linear)
- # test with callable
- svm = SVC(gamma='scale', kernel=lambda x, y: np.dot(x, y.T))
- score_callable = cross_val_score(svm, X, y)
- assert_array_almost_equal(score_precomputed, score_callable)
- # Error raised for non-square X
- svm = SVC(kernel="precomputed")
- assert_raises(ValueError, cross_val_score, svm, X, y)
- # test error is raised when the precomputed kernel is not array-like
- # or sparse
- assert_raises(ValueError, cross_val_score, svm,
- linear_kernel.tolist(), y)
- @pytest.mark.filterwarnings('ignore: You should specify a value') # 0.22
- def test_cross_val_score_fit_params():
- clf = MockClassifier()
- n_samples = X.shape[0]
- n_classes = len(np.unique(y))
- W_sparse = coo_matrix((np.array([1]), (np.array([1]), np.array([0]))),
- shape=(10, 1))
- P_sparse = coo_matrix(np.eye(5))
- DUMMY_INT = 42
- DUMMY_STR = '42'
- DUMMY_OBJ = object()
- def assert_fit_params(clf):
- # Function to test that the values are passed correctly to the
- # classifier arguments for non-array type
- assert_equal(clf.dummy_int, DUMMY_INT)
- assert_equal(clf.dummy_str, DUMMY_STR)
- assert_equal(clf.dummy_obj, DUMMY_OBJ)
- fit_params = {'sample_weight': np.ones(n_samples),
- 'class_prior': np.full(n_classes, 1. / n_classes),
- 'sparse_sample_weight': W_sparse,
- 'sparse_param': P_sparse,
- 'dummy_int': DUMMY_INT,
- 'dummy_str': DUMMY_STR,
- 'dummy_obj': DUMMY_OBJ,
- 'callback': assert_fit_params}
- cross_val_score(clf, X, y, fit_params=fit_params)
- def test_cross_val_score_score_func():
- clf = MockClassifier()
- _score_func_args = []
- def score_func(y_test, y_predict):
- _score_func_args.append((y_test, y_predict))
- return 1.0
- with warnings.catch_warnings(record=True):
- scoring = make_scorer(score_func)
- score = cross_val_score(clf, X, y, scoring=scoring, cv=3)
- assert_array_equal(score, [1.0, 1.0, 1.0])
- # Test that score function is called only 3 times (for cv=3)
- assert len(_score_func_args) == 3
- def test_cross_val_score_errors():
- class BrokenEstimator:
- pass
- assert_raises(TypeError, cross_val_score, BrokenEstimator(), X)
- def test_cross_val_score_with_score_func_classification():
- iris = load_iris()
- clf = SVC(kernel='linear')
- # Default score (should be the accuracy score)
- scores = cross_val_score(clf, iris.data, iris.target, cv=5)
- assert_array_almost_equal(scores, [0.97, 1., 0.97, 0.97, 1.], 2)
- # Correct classification score (aka. zero / one score) - should be the
- # same as the default estimator score
- zo_scores = cross_val_score(clf, iris.data, iris.target,
- scoring="accuracy", cv=5)
- assert_array_almost_equal(zo_scores, [0.97, 1., 0.97, 0.97, 1.], 2)
- # F1 score (class are balanced so f1_score should be equal to zero/one
- # score
- f1_scores = cross_val_score(clf, iris.data, iris.target,
- scoring="f1_weighted", cv=5)
- assert_array_almost_equal(f1_scores, [0.97, 1., 0.97, 0.97, 1.], 2)
- def test_cross_val_score_with_score_func_regression():
- X, y = make_regression(n_samples=30, n_features=20, n_informative=5,
- random_state=0)
- reg = Ridge()
- # Default score of the Ridge regression estimator
- scores = cross_val_score(reg, X, y, cv=5)
- assert_array_almost_equal(scores, [0.94, 0.97, 0.97, 0.99, 0.92], 2)
- # R2 score (aka. determination coefficient) - should be the
- # same as the default estimator score
- r2_scores = cross_val_score(reg, X, y, scoring="r2", cv=5)
- assert_array_almost_equal(r2_scores, [0.94, 0.97, 0.97, 0.99, 0.92], 2)
- # Mean squared error; this is a loss function, so "scores" are negative
- neg_mse_scores = cross_val_score(reg, X, y, cv=5,
- scoring="neg_mean_squared_error")
- expected_neg_mse = np.array([-763.07, -553.16, -274.38, -273.26, -1681.99])
- assert_array_almost_equal(neg_mse_scores, expected_neg_mse, 2)
- # Explained variance
- scoring = make_scorer(explained_variance_score)
- ev_scores = cross_val_score(reg, X, y, cv=5, scoring=scoring)
- assert_array_almost_equal(ev_scores, [0.94, 0.97, 0.97, 0.99, 0.92], 2)
- def test_permutation_score():
- iris = load_iris()
- X = iris.data
- X_sparse = coo_matrix(X)
- y = iris.target
- svm = SVC(kernel='linear')
- cv = StratifiedKFold(2)
- score, scores, pvalue = permutation_test_score(
- svm, X, y, n_permutations=30, cv=cv, scoring="accuracy")
- assert_greater(score, 0.9)
- assert_almost_equal(pvalue, 0.0, 1)
- score_group, _, pvalue_group = permutation_test_score(
- svm, X, y, n_permutations=30, cv=cv, scoring="accuracy",
- groups=np.ones(y.size), random_state=0)
- assert score_group == score
- assert pvalue_group == pvalue
- # check that we obtain the same results with a sparse representation
- svm_sparse = SVC(kernel='linear')
- cv_sparse = StratifiedKFold(2)
- score_group, _, pvalue_group = permutation_test_score(
- svm_sparse, X_sparse, y, n_permutations=30, cv=cv_sparse,
- scoring="accuracy", groups=np.ones(y.size), random_state=0)
- assert score_group == score
- assert pvalue_group == pvalue
- # test with custom scoring object
- def custom_score(y_true, y_pred):
- return (((y_true == y_pred).sum() - (y_true != y_pred).sum()) /
- y_true.shape[0])
- scorer = make_scorer(custom_score)
- score, _, pvalue = permutation_test_score(
- svm, X, y, n_permutations=100, scoring=scorer, cv=cv, random_state=0)
- assert_almost_equal(score, .93, 2)
- assert_almost_equal(pvalue, 0.01, 3)
- # set random y
- y = np.mod(np.arange(len(y)), 3)
- score, scores, pvalue = permutation_test_score(
- svm, X, y, n_permutations=30, cv=cv, scoring="accuracy")
- assert_less(score, 0.5)
- assert_greater(pvalue, 0.2)
- def test_permutation_test_score_allow_nans():
- # Check that permutation_test_score allows input data with NaNs
- X = np.arange(200, dtype=np.float64).reshape(10, -1)
- X[2, :] = np.nan
- y = np.repeat([0, 1], X.shape[0] / 2)
- p = Pipeline([
- ('imputer', SimpleImputer(strategy='mean', missing_values=np.nan)),
- ('classifier', MockClassifier()),
- ])
- permutation_test_score(p, X, y, cv=5)
- def test_cross_val_score_allow_nans():
- # Check that cross_val_score allows input data with NaNs
- X = np.arange(200, dtype=np.float64).reshape(10, -1)
- X[2, :] = np.nan
- y = np.repeat([0, 1], X.shape[0] / 2)
- p = Pipeline([
- ('imputer', SimpleImputer(strategy='mean', missing_values=np.nan)),
- ('classifier', MockClassifier()),
- ])
- cross_val_score(p, X, y, cv=5)
- def test_cross_val_score_multilabel():
- X = np.array([[-3, 4], [2, 4], [3, 3], [0, 2], [-3, 1],
- [-2, 1], [0, 0], [-2, -1], [-1, -2], [1, -2]])
- y = np.array([[1, 1], [0, 1], [0, 1], [0, 1], [1, 1],
- [0, 1], [1, 0], [1, 1], [1, 0], [0, 0]])
- clf = KNeighborsClassifier(n_neighbors=1)
- scoring_micro = make_scorer(precision_score, average='micro')
- scoring_macro = make_scorer(precision_score, average='macro')
- scoring_samples = make_scorer(precision_score, average='samples')
- score_micro = cross_val_score(clf, X, y, scoring=scoring_micro, cv=5)
- score_macro = cross_val_score(clf, X, y, scoring=scoring_macro, cv=5)
- score_samples = cross_val_score(clf, X, y, scoring=scoring_samples, cv=5)
- assert_almost_equal(score_micro, [1, 1 / 2, 3 / 4, 1 / 2, 1 / 3])
- assert_almost_equal(score_macro, [1, 1 / 2, 3 / 4, 1 / 2, 1 / 4])
- assert_almost_equal(score_samples, [1, 1 / 2, 3 / 4, 1 / 2, 1 / 4])
- @pytest.mark.filterwarnings('ignore: Default solver will be changed') # 0.22
- @pytest.mark.filterwarnings('ignore: Default multi_class will') # 0.22
- @pytest.mark.filterwarnings('ignore: You should specify a value') # 0.22
- def test_cross_val_predict():
- boston = load_boston()
- X, y = boston.data, boston.target
- cv = KFold()
- est = Ridge()
- # Naive loop (should be same as cross_val_predict):
- preds2 = np.zeros_like(y)
- for train, test in cv.split(X, y):
- est.fit(X[train], y[train])
- preds2[test] = est.predict(X[test])
- preds = cross_val_predict(est, X, y, cv=cv)
- assert_array_almost_equal(preds, preds2)
- preds = cross_val_predict(est, X, y)
- assert_equal(len(preds), len(y))
- cv = LeaveOneOut()
- preds = cross_val_predict(est, X, y, cv=cv)
- assert_equal(len(preds), len(y))
- Xsp = X.copy()
- Xsp *= (Xsp > np.median(Xsp))
- Xsp = coo_matrix(Xsp)
- preds = cross_val_predict(est, Xsp, y)
- assert_array_almost_equal(len(preds), len(y))
- preds = cross_val_predict(KMeans(), X)
- assert_equal(len(preds), len(y))
- class BadCV():
- def split(self, X, y=None, groups=None):
- for i in range(4):
- yield np.array([0, 1, 2, 3]), np.array([4, 5, 6, 7, 8])
- assert_raises(ValueError, cross_val_predict, est, X, y, cv=BadCV())
- X, y = load_iris(return_X_y=True)
- warning_message = ('Number of classes in training fold (2) does '
- 'not match total number of classes (3). '
- 'Results may not be appropriate for your use case.')
- assert_warns_message(RuntimeWarning, warning_message,
- cross_val_predict, LogisticRegression(),
- X, y, method='predict_proba', cv=KFold(2))
- @pytest.mark.filterwarnings('ignore: Default solver will be changed') # 0.22
- @pytest.mark.filterwarnings('ignore: Default multi_class will') # 0.22
- @pytest.mark.filterwarnings('ignore: You should specify a value') # 0.22
- def test_cross_val_predict_decision_function_shape():
- X, y = make_classification(n_classes=2, n_samples=50, random_state=0)
- preds = cross_val_predict(LogisticRegression(), X, y,
- method='decision_function')
- assert_equal(preds.shape, (50,))
- X, y = load_iris(return_X_y=True)
- preds = cross_val_predict(LogisticRegression(), X, y,
- method='decision_function')
- assert_equal(preds.shape, (150, 3))
- # This specifically tests imbalanced splits for binary
- # classification with decision_function. This is only
- # applicable to classifiers that can be fit on a single
- # class.
- X = X[:100]
- y = y[:100]
- assert_raise_message(ValueError,
- 'Only 1 class/es in training fold, this'
- ' is not supported for decision_function'
- ' with imbalanced folds. To fix '
- 'this, use a cross-validation technique '
- 'resulting in properly stratified folds',
- cross_val_predict, RidgeClassifier(), X, y,
- method='decision_function', cv=KFold(2))
- X, y = load_digits(return_X_y=True)
- est = SVC(kernel='linear', decision_function_shape='ovo')
- preds = cross_val_predict(est,
- X, y,
- method='decision_function')
- assert_equal(preds.shape, (1797, 45))
- ind = np.argsort(y)
- X, y = X[ind], y[ind]
- assert_raises_regex(ValueError,
- r'Output shape \(599L?, 21L?\) of decision_function '
- r'does not match number of classes \(7\) in fold. '
- 'Irregular decision_function .*',
- cross_val_predict, est, X, y,
- cv=KFold(n_splits=3), method='decision_function')
- @pytest.mark.filterwarnings('ignore: Default solver will be changed') # 0.22
- @pytest.mark.filterwarnings('ignore: Default multi_class will') # 0.22
- @pytest.mark.filterwarnings('ignore: You should specify a value') # 0.22
- def test_cross_val_predict_predict_proba_shape():
- X, y = make_classification(n_classes=2, n_samples=50, random_state=0)
- preds = cross_val_predict(LogisticRegression(), X, y,
- method='predict_proba')
- assert_equal(preds.shape, (50, 2))
- X, y = load_iris(return_X_y=True)
- preds = cross_val_predict(LogisticRegression(), X, y,
- method='predict_proba')
- assert_equal(preds.shape, (150, 3))
- @pytest.mark.filterwarnings('ignore: Default solver will be changed') # 0.22
- @pytest.mark.filterwarnings('ignore: Default multi_class will') # 0.22
- @pytest.mark.filterwarnings('ignore: You should specify a value') # 0.22
- def test_cross_val_predict_predict_log_proba_shape():
- X, y = make_classification(n_classes=2, n_samples=50, random_state=0)
- preds = cross_val_predict(LogisticRegression(), X, y,
- method='predict_log_proba')
- assert_equal(preds.shape, (50, 2))
- X, y = load_iris(return_X_y=True)
- preds = cross_val_predict(LogisticRegression(), X, y,
- method='predict_log_proba')
- assert_equal(preds.shape, (150, 3))
- @pytest.mark.filterwarnings('ignore: Default solver will be changed') # 0.22
- @pytest.mark.filterwarnings('ignore: Default multi_class will') # 0.22
- @pytest.mark.filterwarnings('ignore: You should specify a value') # 0.22
- def test_cross_val_predict_input_types():
- iris = load_iris()
- X, y = iris.data, iris.target
- X_sparse = coo_matrix(X)
- multioutput_y = np.column_stack([y, y[::-1]])
- clf = Ridge(fit_intercept=False, random_state=0)
- # 3 fold cv is used --> atleast 3 samples per class
- # Smoke test
- predictions = cross_val_predict(clf, X, y)
- assert_equal(predictions.shape, (150,))
- # test with multioutput y
- predictions = cross_val_predict(clf, X_sparse, multioutput_y)
- assert_equal(predictions.shape, (150, 2))
- predictions = cross_val_predict(clf, X_sparse, y)
- assert_array_equal(predictions.shape, (150,))
- # test with multioutput y
- predictions = cross_val_predict(clf, X_sparse, multioutput_y)
- assert_array_equal(predictions.shape, (150, 2))
- # test with X and y as list
- list_check = lambda x: isinstance(x, list)
- clf = CheckingClassifier(check_X=list_check)
- predictions = cross_val_predict(clf, X.tolist(), y.tolist())
- clf = CheckingClassifier(check_y=list_check)
- predictions = cross_val_predict(clf, X, y.tolist())
- # test with X and y as list and non empty method
- predictions = cross_val_predict(LogisticRegression(), X.tolist(),
- y.tolist(), method='decision_function')
- predictions = cross_val_predict(LogisticRegression(), X,
- y.tolist(), method='decision_function')
- # test with 3d X and
- X_3d = X[:, :, np.newaxis]
- check_3d = lambda x: x.ndim == 3
- clf = CheckingClassifier(check_X=check_3d)
- predictions = cross_val_predict(clf, X_3d, y)
- assert_array_equal(predictions.shape, (150,))
- @pytest.mark.filterwarnings('ignore: Using or importing the ABCs from')
- @pytest.mark.filterwarnings('ignore: You should specify a value') # 0.22
- # python3.7 deprecation warnings in pandas via matplotlib :-/
- def test_cross_val_predict_pandas():
- # check cross_val_score doesn't destroy pandas dataframe
- types = [(MockDataFrame, MockDataFrame)]
- try:
- from pandas import Series, DataFrame
- types.append((Series, DataFrame))
- except ImportError:
- pass
- for TargetType, InputFeatureType in types:
- # X dataframe, y series
- X_df, y_ser = InputFeatureType(X), TargetType(y2)
- check_df = lambda x: isinstance(x, InputFeatureType)
- check_series = lambda x: isinstance(x, TargetType)
- clf = CheckingClassifier(check_X=check_df, check_y=check_series)
- cross_val_predict(clf, X_df, y_ser)
- @pytest.mark.filterwarnings('ignore: You should specify a value') # 0.22
- def test_cross_val_score_sparse_fit_params():
- iris = load_iris()
- X, y = iris.data, iris.target
- clf = MockClassifier()
- fit_params = {'sparse_sample_weight': coo_matrix(np.eye(X.shape[0]))}
- a = cross_val_score(clf, X, y, fit_params=fit_params)
- assert_array_equal(a, np.ones(3))
- def test_learning_curve():
- n_samples = 30
- n_splits = 3
- X, y = make_classification(n_samples=n_samples, n_features=1,
- n_informative=1, n_redundant=0, n_classes=2,
- n_clusters_per_class=1, random_state=0)
- estimator = MockImprovingEstimator(n_samples * ((n_splits - 1) / n_splits))
- for shuffle_train in [False, True]:
- with warnings.catch_warnings(record=True) as w:
- train_sizes, train_scores, test_scores = learning_curve(
- estimator, X, y, cv=KFold(n_splits=n_splits),
- train_sizes=np.linspace(0.1, 1.0, 10),
- shuffle=shuffle_train)
- if len(w) > 0:
- raise RuntimeError("Unexpected warning: %r" % w[0].message)
- assert_equal(train_scores.shape, (10, 3))
- assert_equal(test_scores.shape, (10, 3))
- assert_array_equal(train_sizes, np.linspace(2, 20, 10))
- assert_array_almost_equal(train_scores.mean(axis=1),
- np.linspace(1.9, 1.0, 10))
- assert_array_almost_equal(test_scores.mean(axis=1),
- np.linspace(0.1, 1.0, 10))
- # Test a custom cv splitter that can iterate only once
- with warnings.catch_warnings(record=True) as w:
- train_sizes2, train_scores2, test_scores2 = learning_curve(
- estimator, X, y,
- cv=OneTimeSplitter(n_splits=n_splits, n_samples=n_samples),
- train_sizes=np.linspace(0.1, 1.0, 10),
- shuffle=shuffle_train)
- if len(w) > 0:
- raise RuntimeError("Unexpected warning: %r" % w[0].message)
- assert_array_almost_equal(train_scores2, train_scores)
- assert_array_almost_equal(test_scores2, test_scores)
- def test_learning_curve_unsupervised():
- X, _ = make_classification(n_samples=30, n_features=1, n_informative=1,
- n_redundant=0, n_classes=2,
- n_clusters_per_class=1, random_state=0)
- estimator = MockImprovingEstimator(20)
- train_sizes, train_scores, test_scores = learning_curve(
- estimator, X, y=None, cv=3, train_sizes=np.linspace(0.1, 1.0, 10))
- assert_array_equal(train_sizes, np.linspace(2, 20, 10))
- assert_array_almost_equal(train_scores.mean(axis=1),
- np.linspace(1.9, 1.0, 10))
- assert_array_almost_equal(test_scores.mean(axis=1),
- np.linspace(0.1, 1.0, 10))
- def test_learning_curve_verbose():
- X, y = make_classification(n_samples=30, n_features=1, n_informative=1,
- n_redundant=0, n_classes=2,
- n_clusters_per_class=1, random_state=0)
- estimator = MockImprovingEstimator(20)
- old_stdout = sys.stdout
- sys.stdout = StringIO()
- try:
- train_sizes, train_scores, test_scores = \
- learning_curve(estimator, X, y, cv=3, verbose=1)
- finally:
- out = sys.stdout.getvalue()
- sys.stdout.close()
- sys.stdout = old_stdout
- assert("[learning_curve]" in out)
- def test_learning_curve_incremental_learning_not_possible():
- X, y = make_classification(n_samples=2, n_features=1, n_informative=1,
- n_redundant=0, n_classes=2,
- n_clusters_per_class=1, random_state=0)
- # The mockup does not have partial_fit()
- estimator = MockImprovingEstimator(1)
- assert_raises(ValueError, learning_curve, estimator, X, y,
- exploit_incremental_learning=True)
- def test_learning_curve_incremental_learning():
- X, y = make_classification(n_samples=30, n_features=1, n_informative=1,
- n_redundant=0, n_classes=2,
- n_clusters_per_class=1, random_state=0)
- estimator = MockIncrementalImprovingEstimator(20)
- for shuffle_train in [False, True]:
- train_sizes, train_scores, test_scores = learning_curve(
- estimator, X, y, cv=3, exploit_incremental_learning=True,
- train_sizes=np.linspace(0.1, 1.0, 10), shuffle=shuffle_train)
- assert_array_equal(train_sizes, np.linspace(2, 20, 10))
- assert_array_almost_equal(train_scores.mean(axis=1),
- np.linspace(1.9, 1.0, 10))
- assert_array_almost_equal(test_scores.mean(axis=1),
- np.linspace(0.1, 1.0, 10))
- def test_learning_curve_incremental_learning_unsupervised():
- X, _ = make_classification(n_samples=30, n_features=1, n_informative=1,
- n_redundant=0, n_classes=2,
- n_clusters_per_class=1, random_state=0)
- estimator = MockIncrementalImprovingEstimator(20)
- train_sizes, train_scores, test_scores = learning_curve(
- estimator, X, y=None, cv=3, exploit_incremental_learning=True,
- train_sizes=np.linspace(0.1, 1.0, 10))
- assert_array_equal(train_sizes, np.linspace(2, 20, 10))
- assert_array_almost_equal(train_scores.mean(axis=1),
- np.linspace(1.9, 1.0, 10))
- assert_array_almost_equal(test_scores.mean(axis=1),
- np.linspace(0.1, 1.0, 10))
- # 0.23. warning about tol not having its correct default value.
- @pytest.mark.filterwarnings('ignore:max_iter and tol parameters have been')
- def test_learning_curve_batch_and_incremental_learning_are_equal():
- X, y = make_classification(n_samples=30, n_features=1, n_informative=1,
- n_redundant=0, n_classes=2,
- n_clusters_per_class=1, random_state=0)
- train_sizes = np.linspace(0.2, 1.0, 5)
- estimator = PassiveAggressiveClassifier(max_iter=1, tol=None,
- shuffle=False)
- train_sizes_inc, train_scores_inc, test_scores_inc = \
- learning_curve(
- estimator, X, y, train_sizes=train_sizes,
- cv=3, exploit_incremental_learning=True)
- train_sizes_batch, train_scores_batch, test_scores_batch = \
- learning_curve(
- estimator, X, y, cv=3, train_sizes=train_sizes,
- exploit_incremental_learning=False)
- assert_array_equal(train_sizes_inc, train_sizes_batch)
- assert_array_almost_equal(train_scores_inc.mean(axis=1),
- train_scores_batch.mean(axis=1))
- assert_array_almost_equal(test_scores_inc.mean(axis=1),
- test_scores_batch.mean(axis=1))
- def test_learning_curve_n_sample_range_out_of_bounds():
- X, y = make_classification(n_samples=30, n_features=1, n_informative=1,
- n_redundant=0, n_classes=2,
- n_clusters_per_class=1, random_state=0)
- estimator = MockImprovingEstimator(20)
- assert_raises(ValueError, learning_curve, estimator, X, y, cv=3,
- train_sizes=[0, 1])
- assert_raises(ValueError, learning_curve, estimator, X, y, cv=3,
- train_sizes=[0.0, 1.0])
- assert_raises(ValueError, learning_curve, estimator, X, y, cv=3,
- train_sizes=[0.1, 1.1])
- assert_raises(ValueError, learning_curve, estimator, X, y, cv=3,
- train_sizes=[0, 20])
- assert_raises(ValueError, learning_curve, estimator, X, y, cv=3,
- train_sizes=[1, 21])
- def test_learning_curve_remove_duplicate_sample_sizes():
- X, y = make_classification(n_samples=3, n_features=1, n_informative=1,
- n_redundant=0, n_classes=2,
- n_clusters_per_class=1, random_state=0)
- estimator = MockImprovingEstimator(2)
- train_sizes, _, _ = assert_warns(
- RuntimeWarning, learning_curve, estimator, X, y, cv=3,
- train_sizes=np.linspace(0.33, 1.0, 3))
- assert_array_equal(train_sizes, [1, 2])
- def test_learning_curve_with_boolean_indices():
- X, y = make_classification(n_samples=30, n_features=1, n_informative=1,
- n_redundant=0, n_classes=2,
- n_clusters_per_class=1, random_state=0)
- estimator = MockImprovingEstimator(20)
- cv = KFold(n_splits=3)
- train_sizes, train_scores, test_scores = learning_curve(
- estimator, X, y, cv=cv, train_sizes=np.linspace(0.1, 1.0, 10))
- assert_array_equal(train_sizes, np.linspace(2, 20, 10))
- assert_array_almost_equal(train_scores.mean(axis=1),
- np.linspace(1.9, 1.0, 10))
- assert_array_almost_equal(test_scores.mean(axis=1),
- np.linspace(0.1, 1.0, 10))
- # 0.23. warning about tol not having its correct default value.
- @pytest.mark.filterwarnings('ignore:max_iter and tol parameters have been')
- def test_learning_curve_with_shuffle():
- # Following test case was designed this way to verify the code
- # changes made in pull request: #7506.
- X = np.array([[1, 2], [3, 4], [5, 6], [7, 8], [11, 12], [13, 14], [15, 16],
- [17, 18], [19, 20], [7, 8], [9, 10], [11, 12], [13, 14],
- [15, 16], [17, 18]])
- y = np.array([1, 1, 1, 2, 3, 4, 1, 1, 2, 3, 4, 1, 2, 3, 4])
- groups = np.array([1, 1, 1, 1, 1, 1, 3, 3, 3, 3, 3, 4, 4, 4, 4])
- # Splits on these groups fail without shuffle as the first iteration
- # of the learning curve doesn't contain la…
Large files files are truncated, but you can click here to view the full file