PageRenderTime 1206ms CodeModel.GetById 18ms RepoModel.GetById 1ms app.codeStats 0ms

/engine.py

https://gitlab.com/debasishk/PerformanceTest-Monitoring
Python | 274 lines | 167 code | 80 blank | 27 comment | 39 complexity | 2e7baef81a5337f1b01258ba573a78db MD5 | raw file
  1. from dataprep import DataPreparation
  2. from model_aggregators import ModelAggregation
  3. from modify_scores import ModifyScores
  4. import pandas as pd
  5. from config import *
  6. from multiprocessing import Process, Manager
  7. import warnings
  8. from utils.dbCon import connect_db
  9. # warnings.filterwarnings("ignore")
  10. # import sys
  11. # out_file = open("output.txt", "a+")
  12. # sys.stdout = out_file
  13. class Engine(object):
  14. def __init__(self, smoother=None, type='multi'):
  15. data_obj = DataPreparation()
  16. data = data_obj.fetch_()
  17. score = self.prepare_models(type=type, data=data, smoother=smoother)
  18. self.scores = score
  19. for k in self.scores:
  20. fname = "{}_score.csv".format(k)
  21. self.scores[k].to_csv(fname)
  22. self.push_results_to_table(self.scores)
  23. pass
  24. def push_results_to_table(self, score, data_type='dict'):
  25. if data_type == 'dict':
  26. dict_type = True
  27. else:
  28. dict_type = False
  29. tmpDF = pd.DataFrame(columns=['server', 'cpu', 'memory', 'network-write(kb/s)', 'network-read(kb/s)'])
  30. for server in score.keys():
  31. data_len = len(score[server].index)
  32. server_col = [server for i in range(data_len)]
  33. df = score[server]
  34. # df.reset_index(inplace=True)
  35. df['server'] = server_col
  36. tmpDF = tmpDF.append(df)
  37. c = connect_db('write', package='sqlalchemy', assist=True)
  38. c.set_params(**dbParams)
  39. c.push_data(tmpDF, 'pt_scores')
  40. pass
  41. def prepare_models(self, **kwargs):
  42. type = kwargs['type']
  43. if type == 'uni':
  44. kwargs['models'] = univariate_model_dict
  45. scores = self.univariate_models(**kwargs)
  46. elif type == 'multi':
  47. kwargs['models'] = multivariate_model_dict
  48. scores = self.multivariate_models(**kwargs)
  49. else:
  50. raise AssertionError('Wrong param passed. Passed type can only be uni or multi')
  51. return scores
  52. def univariate_models(self, **kwargs):
  53. if 'data' in kwargs:
  54. data = kwargs['data']
  55. else:
  56. raise KeyError('No data passed as keyword argument. Kindly check your arguments passed')
  57. if 'smoother' in kwargs:
  58. smoother = kwargs['smoother']
  59. else:
  60. raise KeyError('No smoother passed as keyword argument. Kindly check your arguments passed')
  61. scores = dict()
  62. models_to_use = kwargs['models']
  63. manager = Manager()
  64. return_dict = manager.dict()
  65. jobs = []
  66. for k in data.keys():
  67. scores[k] = dict()
  68. v = data[k]
  69. for col in cols_to_del:
  70. del v[col]
  71. for col in v.columns.tolist():
  72. if col not in metrics_to_ignore:
  73. data_f = v[[col]]
  74. model_agg_obj = ModelAggregation()
  75. ret = model_agg_obj.apply_(data_f, smoother, models_to_use)
  76. scores[k][col] = ret
  77. # p = Process(target=model_agg_obj.apply_,
  78. # args=(data_f, smoother, models_to_use, return_dict, [k, col]))
  79. # jobs.append(p)
  80. # p.start()
  81. #
  82. # for proc in jobs:
  83. # proc.join()
  84. # scores = return_dict
  85. # scores[k][col] = score_per_metric
  86. score = dict()
  87. model_based_score = dict()
  88. # Now, converting to z-score & then scaling them to range 0-100
  89. for k in scores.keys():
  90. tmpScores = pd.DataFrame()
  91. model_based_score[k] = dict()
  92. for metric in scores[k].keys():
  93. m = metric
  94. if m not in metrics_to_ignore:
  95. modify_obj = ModifyScores()
  96. score_df = pd.DataFrame()
  97. for model in scores[k][m].keys():
  98. scaled_scores = modify_obj.apply_(scores[k][m][model])
  99. if 'timestamp' in score_df.columns.tolist():
  100. pass
  101. else:
  102. score_df['timestamp'] = scaled_scores.index.tolist()
  103. col_to_use = scaled_scores.columns.tolist()[0]
  104. score_df[col_to_use] = scaled_scores[col_to_use].values
  105. score_df.set_index('timestamp', inplace=True)
  106. s = score_df.apply(self.average_scores, axis=1)
  107. split_score = self.split_scores(score_df)
  108. model_based_score[k][m] = split_score
  109. tmpScores[m] = s
  110. score[k] = tmpScores
  111. # Modify score structure. It should be dictionary with keys as server names. Each key shall have 'metric_anomalous_score' or 'metric_algo_name_score' columns as DF
  112. return score
  113. def multivariate_models(self, **kwargs):
  114. if 'data' in kwargs:
  115. data = kwargs['data']
  116. else:
  117. raise KeyError('No data passed as keyword argument. Kindly check your arguments passed')
  118. if 'smoother' in kwargs:
  119. smoother = kwargs['smoother']
  120. else:
  121. raise KeyError('No smoother passed as keyword argument. Kindly check your arguments passed')
  122. scores = dict()
  123. models_to_use = kwargs['models']
  124. manager = Manager()
  125. return_dict = manager.dict()
  126. jobs = []
  127. for k_server in data.keys():
  128. v = data[k_server]
  129. for col in cols_to_del:
  130. del v[col]
  131. for col in metrics_to_ignore:
  132. del v[col]
  133. data_f = v
  134. model_agg_obj = ModelAggregation()
  135. # score_per_model = model_agg_obj.apply_(data_f, smoother=smoother, models_to_use=models_to_use)
  136. p = Process(target=model_agg_obj.apply_, args=(data_f, smoother, models_to_use, return_dict, k_server))
  137. jobs.append(p)
  138. p.start()
  139. for proc in jobs:
  140. proc.join()
  141. #
  142. scores = return_dict
  143. # scores[k_server] = score_per_model
  144. score = dict()
  145. model_based_score = dict()
  146. # Now, converting to z-score & then scaling them to range 0-100
  147. for k in scores.keys():
  148. # k is server
  149. tmpScores = pd.DataFrame()
  150. model_based_score[k] = dict()
  151. modify_obj = ModifyScores()
  152. score_df = pd.DataFrame()
  153. for model in scores[k].keys():
  154. scaled_scores = modify_obj.apply_(scores[k][model])
  155. if 'timestamp' in score_df.columns.tolist():
  156. pass
  157. else:
  158. score_df['timestamp'] = scaled_scores.index.tolist()
  159. col_to_use = scaled_scores.columns.tolist()[0]
  160. score_df[col_to_use] = scaled_scores[col_to_use].values
  161. score_df.set_index('timestamp', inplace=True)
  162. # print("Score_df", score_df)
  163. # print("score_df", score_df)
  164. # s = score_df.apply(self.average_scores, axis=1)
  165. # split_score = self.split_scores(score_df)
  166. # model_based_score[k] = split_score
  167. # tmpScores['score'] = score_df
  168. score[k] = score_df
  169. return score
  170. def fetch_model_based_score(self):
  171. return self.model_based_score
  172. def split_scores(self, score):
  173. cols = score.columns.tolist()
  174. for col in cols:
  175. if 'LOF' in col:
  176. lof_score = score[[col]]
  177. elif 'FFT' in col:
  178. fft_score = score[[col]]
  179. elif 'MAD' in col:
  180. mad_score = score[[col]]
  181. split_score = dict()
  182. split_score['MAD'] = mad_score
  183. split_score['FFT'] = fft_score
  184. split_score['LOF'] = lof_score
  185. return split_score
  186. def fetch_(self):
  187. return self.scores
  188. def average_scores(self, a):
  189. cols = a.index.tolist()
  190. avg = a.values
  191. avg = sum(avg)/len(avg)
  192. return avg
  193. if __name__ == '__main__':
  194. eng_obj = Engine()