/engine.py
Python | 274 lines | 167 code | 80 blank | 27 comment | 39 complexity | 2e7baef81a5337f1b01258ba573a78db MD5 | raw file
- from dataprep import DataPreparation
- from model_aggregators import ModelAggregation
- from modify_scores import ModifyScores
- import pandas as pd
- from config import *
- from multiprocessing import Process, Manager
- import warnings
- from utils.dbCon import connect_db
- # warnings.filterwarnings("ignore")
- # import sys
- # out_file = open("output.txt", "a+")
- # sys.stdout = out_file
- class Engine(object):
- def __init__(self, smoother=None, type='multi'):
- data_obj = DataPreparation()
- data = data_obj.fetch_()
- score = self.prepare_models(type=type, data=data, smoother=smoother)
- self.scores = score
- for k in self.scores:
- fname = "{}_score.csv".format(k)
- self.scores[k].to_csv(fname)
- self.push_results_to_table(self.scores)
- pass
- def push_results_to_table(self, score, data_type='dict'):
- if data_type == 'dict':
- dict_type = True
- else:
- dict_type = False
- tmpDF = pd.DataFrame(columns=['server', 'cpu', 'memory', 'network-write(kb/s)', 'network-read(kb/s)'])
- for server in score.keys():
- data_len = len(score[server].index)
- server_col = [server for i in range(data_len)]
- df = score[server]
- # df.reset_index(inplace=True)
- df['server'] = server_col
- tmpDF = tmpDF.append(df)
- c = connect_db('write', package='sqlalchemy', assist=True)
- c.set_params(**dbParams)
- c.push_data(tmpDF, 'pt_scores')
- pass
- def prepare_models(self, **kwargs):
- type = kwargs['type']
- if type == 'uni':
- kwargs['models'] = univariate_model_dict
- scores = self.univariate_models(**kwargs)
- elif type == 'multi':
- kwargs['models'] = multivariate_model_dict
- scores = self.multivariate_models(**kwargs)
- else:
- raise AssertionError('Wrong param passed. Passed type can only be uni or multi')
- return scores
- def univariate_models(self, **kwargs):
- if 'data' in kwargs:
- data = kwargs['data']
- else:
- raise KeyError('No data passed as keyword argument. Kindly check your arguments passed')
- if 'smoother' in kwargs:
- smoother = kwargs['smoother']
- else:
- raise KeyError('No smoother passed as keyword argument. Kindly check your arguments passed')
- scores = dict()
- models_to_use = kwargs['models']
- manager = Manager()
- return_dict = manager.dict()
- jobs = []
- for k in data.keys():
- scores[k] = dict()
- v = data[k]
- for col in cols_to_del:
- del v[col]
- for col in v.columns.tolist():
- if col not in metrics_to_ignore:
- data_f = v[[col]]
- model_agg_obj = ModelAggregation()
- ret = model_agg_obj.apply_(data_f, smoother, models_to_use)
- scores[k][col] = ret
- # p = Process(target=model_agg_obj.apply_,
- # args=(data_f, smoother, models_to_use, return_dict, [k, col]))
- # jobs.append(p)
- # p.start()
- #
- # for proc in jobs:
- # proc.join()
- # scores = return_dict
- # scores[k][col] = score_per_metric
- score = dict()
- model_based_score = dict()
- # Now, converting to z-score & then scaling them to range 0-100
- for k in scores.keys():
- tmpScores = pd.DataFrame()
- model_based_score[k] = dict()
- for metric in scores[k].keys():
- m = metric
- if m not in metrics_to_ignore:
- modify_obj = ModifyScores()
- score_df = pd.DataFrame()
- for model in scores[k][m].keys():
- scaled_scores = modify_obj.apply_(scores[k][m][model])
- if 'timestamp' in score_df.columns.tolist():
- pass
- else:
- score_df['timestamp'] = scaled_scores.index.tolist()
- col_to_use = scaled_scores.columns.tolist()[0]
- score_df[col_to_use] = scaled_scores[col_to_use].values
- score_df.set_index('timestamp', inplace=True)
- s = score_df.apply(self.average_scores, axis=1)
- split_score = self.split_scores(score_df)
- model_based_score[k][m] = split_score
- tmpScores[m] = s
- score[k] = tmpScores
- # Modify score structure. It should be dictionary with keys as server names. Each key shall have 'metric_anomalous_score' or 'metric_algo_name_score' columns as DF
- return score
- def multivariate_models(self, **kwargs):
- if 'data' in kwargs:
- data = kwargs['data']
- else:
- raise KeyError('No data passed as keyword argument. Kindly check your arguments passed')
- if 'smoother' in kwargs:
- smoother = kwargs['smoother']
- else:
- raise KeyError('No smoother passed as keyword argument. Kindly check your arguments passed')
- scores = dict()
- models_to_use = kwargs['models']
- manager = Manager()
- return_dict = manager.dict()
- jobs = []
- for k_server in data.keys():
- v = data[k_server]
- for col in cols_to_del:
- del v[col]
- for col in metrics_to_ignore:
- del v[col]
- data_f = v
- model_agg_obj = ModelAggregation()
- # score_per_model = model_agg_obj.apply_(data_f, smoother=smoother, models_to_use=models_to_use)
- p = Process(target=model_agg_obj.apply_, args=(data_f, smoother, models_to_use, return_dict, k_server))
- jobs.append(p)
- p.start()
- for proc in jobs:
- proc.join()
- #
- scores = return_dict
- # scores[k_server] = score_per_model
- score = dict()
- model_based_score = dict()
- # Now, converting to z-score & then scaling them to range 0-100
- for k in scores.keys():
- # k is server
- tmpScores = pd.DataFrame()
- model_based_score[k] = dict()
- modify_obj = ModifyScores()
- score_df = pd.DataFrame()
- for model in scores[k].keys():
- scaled_scores = modify_obj.apply_(scores[k][model])
- if 'timestamp' in score_df.columns.tolist():
- pass
- else:
- score_df['timestamp'] = scaled_scores.index.tolist()
- col_to_use = scaled_scores.columns.tolist()[0]
- score_df[col_to_use] = scaled_scores[col_to_use].values
- score_df.set_index('timestamp', inplace=True)
- # print("Score_df", score_df)
- # print("score_df", score_df)
- # s = score_df.apply(self.average_scores, axis=1)
- # split_score = self.split_scores(score_df)
- # model_based_score[k] = split_score
- # tmpScores['score'] = score_df
- score[k] = score_df
- return score
- def fetch_model_based_score(self):
- return self.model_based_score
- def split_scores(self, score):
- cols = score.columns.tolist()
- for col in cols:
- if 'LOF' in col:
- lof_score = score[[col]]
- elif 'FFT' in col:
- fft_score = score[[col]]
- elif 'MAD' in col:
- mad_score = score[[col]]
- split_score = dict()
- split_score['MAD'] = mad_score
- split_score['FFT'] = fft_score
- split_score['LOF'] = lof_score
- return split_score
- def fetch_(self):
- return self.scores
- def average_scores(self, a):
- cols = a.index.tolist()
- avg = a.values
- avg = sum(avg)/len(avg)
- return avg
- if __name__ == '__main__':
- eng_obj = Engine()