PageRenderTime 53ms CodeModel.GetById 25ms RepoModel.GetById 0ms app.codeStats 0ms

/python_data_analysis/affective_crossvalidation.py

https://bitbucket.org/diogo149/affective-computing
Python | 137 lines | 98 code | 20 blank | 19 comment | 18 complexity | 5d930aa4dd0ee5817de03a6fc7dff74c MD5 | raw file
  1. """
  2. to read the file:
  3. import pandas as pd; tmp = pd.read_csv(<filename>, index_col=0)
  4. -use balanced error rate
  5. """
  6. from scipy.io import loadmat
  7. import pandas as pd
  8. from itertools import permutations
  9. import numpy as np
  10. from sklearn.cross_validation import StratifiedKFold
  11. import cPickle as pickle
  12. perplexity = 30
  13. games = ["escape", "sahara"]
  14. windows = [2, 1]
  15. window_features = {}
  16. window_features[1] = [278, 318, 130, 126, 10, 259, 336, 246, 223, 303, 158, 332, 233, 201, 291, 162, 234, 347, 117, 227, 274, 304, 219, 263, 118, 293, 235, 346, 120, 189, 97, 292, 276, 172, 294, 288, 177, 147, 321, 236, 249, 334, 173, 187, 289, 160, 96, 178, 323, 119, 260, 99, 220, 253, 184, 202, 175, 324, 281, 98, 248, 230, 252, 176, 245, 317, 205, 74, 285, 231, 77, 218, 250, 143, 188, 68, 306, 232, 216, 310, 217, 16, 194, 14, 210, 78, 311, 17, 1, 225, 290, 85, 2, 133, 46, 181, 307, 31, 308, 207]
  17. window_features[2] = [303, 318, 158, 126, 10, 223, 278, 259, 336, 219, 77, 97, 289, 130, 201, 304, 249, 233, 162, 332, 189, 248, 274, 227, 347, 74, 96, 117, 246, 294, 118, 234, 187, 291, 99, 16, 78, 346, 120, 288, 252, 220, 292, 263, 147, 232, 293, 236, 172, 321, 177, 276, 253, 160, 334, 184, 235, 98, 310, 119, 224, 178, 173, 230, 250, 17, 323, 199, 216, 133, 260, 143, 202, 19, 231, 306, 75, 175, 176, 217, 188, 311, 205, 183, 324, 307, 218, 221, 251, 68, 84, 194, 210, 225, 20, 317, 182, 181, 132, 245]
  18. def get_filename(game, window):
  19. assert game in games
  20. assert window in windows
  21. return "../new_data/%s_all_window%d.mat" % (game, window)
  22. def read_mat(game, window):
  23. """because of indexing from 0 and skipping the first 2 columns, we need to add 1 to each of the feature numbers
  24. """
  25. filename = get_filename(game, window)
  26. features = [0, 1] + map(lambda x: x + 1, window_features[window])
  27. return pd.DataFrame(loadmat(filename)["new"])[features]
  28. def split_by(df, column):
  29. """generates data frames for each unique value in specified column.
  30. """
  31. tmp = df[column]
  32. for val in tmp.unique():
  33. yield val, df[tmp == val]
  34. def preprocess(df):
  35. """scales data frame to 0 mean and 1 std, fills missing values, then returns as a numpy matrix
  36. """
  37. return ((df - df.mean()) / df.std()).fillna(0).as_matrix()
  38. def load_data():
  39. data, emotions = dict(), dict()
  40. for game in games:
  41. data[game], emotions[game] = dict(), dict()
  42. for window in windows:
  43. data[game][window], emotions[game][window] = dict(), dict()
  44. tmp = read_mat(game, window)
  45. for val, df in split_by(tmp, 0):
  46. subject, emotion = df[0], df[1]
  47. del df[0], df[1]
  48. data[game][window][val] = preprocess(df)
  49. emotions[game][window][val] = np.array(emotion)
  50. return data, emotions
  51. def balanced_error_rate(y_true, y_pred):
  52. # return float(sum(y_true != y_pred)) / len(y_true)
  53. error = []
  54. for cls in np.unique(y_true):
  55. true_positive = map(lambda x: x[0] == x[1] == cls, zip(y_true, y_pred))
  56. true_cls = y_true == cls
  57. err = float(sum(true_positive)) / sum(true_cls)
  58. error.append(err)
  59. return 1.0 - sum(error) / len(error)
  60. def affective_crossvalidation(clf):
  61. if 0:
  62. with open("../new_data/data.pickle", 'w') as outfile:
  63. tmp = load_data()
  64. pickle.dump(tmp, outfile)
  65. else:
  66. with open("../new_data/data.pickle") as infile:
  67. tmp = pickle.load(infile)
  68. data, emotions = tmp
  69. for game, game2 in permutations(games):
  70. for window in windows:
  71. vals = sorted(data[game][window].keys())
  72. output = np.zeros((len(vals), len(vals) + 2))
  73. columns = vals + ["Other Game", "10-fold CV"]
  74. for idx_train, val_train in enumerate(vals):
  75. X_train = data[game][window][val_train]
  76. Y_train = emotions[game][window][val_train]
  77. if len(np.unique(Y_train)) == 1:
  78. print val_train
  79. clf.fit(X_train, Y_train)
  80. # testing player vs other player same game
  81. for idx_test, val_test in enumerate(vals):
  82. X_test = data[game][window][val_test]
  83. Y_test = emotions[game][window][val_test]
  84. Y_pred = clf.predict(X_test)
  85. err = balanced_error_rate(Y_test, Y_pred)
  86. output[idx_train, idx_test] = err
  87. # testing player vs self on other game
  88. if val_train in data[game2][window]:
  89. X_test = data[game2][window][val_train]
  90. Y_test = emotions[game2][window][val_train]
  91. Y_pred = clf.predict(X_test)
  92. err = balanced_error_rate(Y_test, Y_pred)
  93. else:
  94. err = -1
  95. output[idx_train, -2] = err
  96. # testing player vs self same game
  97. n_folds = 5
  98. skf = StratifiedKFold(Y_train, n_folds=n_folds)
  99. err = []
  100. for train_idx, test_idx in skf:
  101. X_train2, X_test = X_train[train_idx], X_train[test_idx]
  102. Y_train2, Y_test = Y_train[train_idx], Y_train[test_idx]
  103. clf.fit(X_train2, Y_train2)
  104. Y_pred = clf.predict(X_test)
  105. err.append(balanced_error_rate(Y_test, Y_pred))
  106. err = sum(err) / len(err)
  107. output[idx_train, -1] = err
  108. filename = "../new_data/error_%s_%d.csv" % (game, window)
  109. pd.DataFrame(output, index=vals, columns=columns).to_csv(filename)
  110. if __name__ == "__main__":
  111. # from sklearn.svm import SVC
  112. # svm = SVC()
  113. from sklearn.ensemble import RandomForestClassifier
  114. rf = RandomForestClassifier(n_estimators=50, n_jobs=-1)
  115. affective_crossvalidation(rf)