affective_crossvalidation.py

/python_data_analysis/affective_crossvalidation.py

https://bitbucket.org/diogo149/affective-computing · Python · 137 lines · 98 code · 20 blank · 19 comment · 18 complexity · 5d930aa4dd0ee5817de03a6fc7dff74c MD5 · raw file

"""

to read the file:
import pandas as pd; tmp = pd.read_csv(<filename>, index_col=0)

    -use balanced error rate
"""

from scipy.io import loadmat
import pandas as pd
from itertools import permutations
import numpy as np
from sklearn.cross_validation import StratifiedKFold
import cPickle as pickle

perplexity = 30
games = ["escape", "sahara"]
windows = [2, 1]
window_features = {}
window_features[1] = [278, 318, 130, 126, 10, 259, 336, 246, 223, 303, 158, 332, 233, 201, 291, 162, 234, 347, 117, 227, 274, 304, 219, 263, 118, 293, 235, 346, 120, 189, 97, 292, 276, 172, 294, 288, 177, 147, 321, 236, 249, 334, 173, 187, 289, 160, 96, 178, 323, 119, 260, 99, 220, 253, 184, 202, 175, 324, 281, 98, 248, 230, 252, 176, 245, 317, 205, 74, 285, 231, 77, 218, 250, 143, 188, 68, 306, 232, 216, 310, 217, 16, 194, 14, 210, 78, 311, 17, 1, 225, 290, 85, 2, 133, 46, 181, 307, 31, 308, 207]
window_features[2] = [303, 318, 158, 126, 10, 223, 278, 259, 336, 219, 77, 97, 289, 130, 201, 304, 249, 233, 162, 332, 189, 248, 274, 227, 347, 74, 96, 117, 246, 294, 118, 234, 187, 291, 99, 16, 78, 346, 120, 288, 252, 220, 292, 263, 147, 232, 293, 236, 172, 321, 177, 276, 253, 160, 334, 184, 235, 98, 310, 119, 224, 178, 173, 230, 250, 17, 323, 199, 216, 133, 260, 143, 202, 19, 231, 306, 75, 175, 176, 217, 188, 311, 205, 183, 324, 307, 218, 221, 251, 68, 84, 194, 210, 225, 20, 317, 182, 181, 132, 245]


def get_filename(game, window):
    assert game in games
    assert window in windows
    return "../new_data/%s_all_window%d.mat" % (game, window)


def read_mat(game, window):
    """because of indexing from 0 and skipping the first 2 columns, we need to add 1 to each of the feature numbers
    """
    filename = get_filename(game, window)
    features = [0, 1] + map(lambda x: x + 1, window_features[window])
    return pd.DataFrame(loadmat(filename)["new"])[features]


def split_by(df, column):
    """generates data frames for each unique value in specified column.
    """
    tmp = df[column]
    for val in tmp.unique():
        yield val, df[tmp == val]


def preprocess(df):
    """scales data frame to 0 mean and 1 std, fills missing values, then returns as a numpy matrix
    """
    return ((df - df.mean()) / df.std()).fillna(0).as_matrix()


def load_data():
    data, emotions = dict(), dict()
    for game in games:
        data[game], emotions[game] = dict(), dict()
        for window in windows:
            data[game][window], emotions[game][window] = dict(), dict()
            tmp = read_mat(game, window)
            for val, df in split_by(tmp, 0):
                subject, emotion = df[0], df[1]
                del df[0], df[1]
                data[game][window][val] = preprocess(df)
                emotions[game][window][val] = np.array(emotion)
    return data, emotions


def balanced_error_rate(y_true, y_pred):
    # return float(sum(y_true != y_pred)) / len(y_true)
    error = []
    for cls in np.unique(y_true):
        true_positive = map(lambda x: x[0] == x[1] == cls, zip(y_true, y_pred))
        true_cls = y_true == cls
        err = float(sum(true_positive)) / sum(true_cls)
        error.append(err)
    return 1.0 - sum(error) / len(error)


def affective_crossvalidation(clf):
    if 0:
        with open("../new_data/data.pickle", 'w') as outfile:
            tmp = load_data()
            pickle.dump(tmp, outfile)
    else:
        with open("../new_data/data.pickle") as infile:
            tmp = pickle.load(infile)
    data, emotions = tmp
    for game, game2 in permutations(games):
        for window in windows:
            vals = sorted(data[game][window].keys())
            output = np.zeros((len(vals), len(vals) + 2))
            columns = vals + ["Other Game", "10-fold CV"]
            for idx_train, val_train in enumerate(vals):
                X_train = data[game][window][val_train]
                Y_train = emotions[game][window][val_train]
                if len(np.unique(Y_train)) == 1:
                    print val_train
                clf.fit(X_train, Y_train)
                # testing player vs other player same game
                for idx_test, val_test in enumerate(vals):
                    X_test = data[game][window][val_test]
                    Y_test = emotions[game][window][val_test]
                    Y_pred = clf.predict(X_test)
                    err = balanced_error_rate(Y_test, Y_pred)
                    output[idx_train, idx_test] = err
                # testing player vs self on other game
                if val_train in data[game2][window]:
                    X_test = data[game2][window][val_train]
                    Y_test = emotions[game2][window][val_train]
                    Y_pred = clf.predict(X_test)
                    err = balanced_error_rate(Y_test, Y_pred)
                else:
                    err = -1
                output[idx_train, -2] = err

                # testing player vs self same game
                n_folds = 5
                skf = StratifiedKFold(Y_train, n_folds=n_folds)
                err = []
                for train_idx, test_idx in skf:
                    X_train2, X_test = X_train[train_idx], X_train[test_idx]
                    Y_train2, Y_test = Y_train[train_idx], Y_train[test_idx]
                    clf.fit(X_train2, Y_train2)
                    Y_pred = clf.predict(X_test)
                    err.append(balanced_error_rate(Y_test, Y_pred))
                err = sum(err) / len(err)
                output[idx_train, -1] = err

                filename = "../new_data/error_%s_%d.csv" % (game, window)
                pd.DataFrame(output, index=vals, columns=columns).to_csv(filename)


if __name__ == "__main__":
    # from sklearn.svm import SVC
    # svm = SVC()
    from sklearn.ensemble import RandomForestClassifier
    rf = RandomForestClassifier(n_estimators=50, n_jobs=-1)
    affective_crossvalidation(rf)
Tech Fingerprint

Alerts (5)

'def' Ensure functions have docstrings for documentation
24 52 67 78
'del' Avoid unless necessary; Python's garbage collector typically handles object deletion
61