PageRenderTime 45ms CodeModel.GetById 18ms RepoModel.GetById 1ms app.codeStats 0ms

/code/older/small.py

https://github.com/zorro4/NS_Classify
Python | 62 lines | 36 code | 20 blank | 6 comment | 12 complexity | 073590a45e825bcae2dcd2f53c8c639c MD5 | raw file
  1. import numpy as np
  2. from scipy import stats
  3. def get_regions_frequency(clf):
  4. region_frequency = np.array([classify.get_studies_by_regions(clf.dataset, [mask[0]], threshold=clf.thresh, features=reduced_topics_2, regularization=None)[0].mean(axis=0) for mask in clf.masklist])
  5. # stats.pearsonr(region_frequency.flatten(), clf.feature_importances.mean(axis=0).flatten())
  6. def calc_fi_progression(clf):
  7. """ Generates a matrix of shape regions x regions x features x number of trees. Calculates feature importance for each feature over time.
  8. Features useful early should be more general while those later shoudl differentiate more difficult cases"""
  9. fis = np.empty(clf.feature_importances.shape + (clf.fit_clfs[0,1].n_estimators,))
  10. for i in range(0, clf.mask_num):
  11. for j in range(0, clf.mask_num):
  12. if i == j:
  13. fis[i, j] = None
  14. else:
  15. fis[i, j] = np.apply_along_axis(lambda x: x[0].feature_importances_, 1, clf.fit_clfs[i, j].estimators_).T
  16. clf.fi_x_estimators = np.ma.masked_array(fis, mask=np.isnan(fis))
  17. # np.apply_along_axis(lambda x: stats.pearsonr(x, np.arange(0, clf.fit_clfs[1, 2].n_estimators))[0], 3, clf.fi_x_estimators)
  18. def calc_fi_progression_2(clf):
  19. """ Generates a matrix of shape regions x regions x features x number of trees. Calculates feature importance for each feature over time.
  20. Features useful early should be more general while those later shoudl differentiate more difficult cases"""
  21. import pandas
  22. def get_fis(fit_clf):
  23. if fit_clf is None:
  24. return None
  25. else:
  26. return np.apply_along_axis(lambda x: x[0].feature_importances_, 1, fit_clf.estimators_)
  27. get_fis = np.vectorize(get_fis)
  28. fis = get_fis(clf.fit_clfs)
  29. clf.fi_x_estimators = np.ma.masked_array(fis, mask=pandas.isnull(fis))
  30. def calc_avg_pdp(clf):
  31. from sklearn.ensemble.partial_dependence import partial_dependence
  32. import pandas
  33. pdps = np.empty(clf.fit_clfs.shape + (clf.feature_importances.shape[2], 2, 100))
  34. for i in range(0, clf.mask_num):
  35. for j in range(0, clf.mask_num):
  36. if i == j:
  37. pdps[i, j] = None
  38. else:
  39. for feature in range(0, clf.feature_importances.shape[2]):
  40. pdp, a = partial_dependence(clf.fit_clfs[i, j], [feature], X=clf.c_data[i, j][0])
  41. pdps[i, j, feature] = [pdp[0], a[0]]
  42. clf.pdps = np.ma.masked_array(pdps, mask= pandas.isnull(pdps))