PageRenderTime 49ms CodeModel.GetById 19ms RepoModel.GetById 0ms app.codeStats 0ms

/DaCena/dacena/server/Multithreaded_server.py

https://gitlab.com/Angiolillo/SOKE
Python | 348 lines | 223 code | 9 blank | 116 comment | 3 complexity | 534872561843264643d4ec3fdba7f636 MD5 | raw file
  1. from server import Clustering_dirichlet
  2. from sklearn.naive_bayes import MultinomialNB
  3. import numpy as np
  4. import pandas as pd
  5. import pickle
  6. from server import Preprocessing
  7. import threading
  8. import socket
  9. """
  10. @attention: This class implemented the server
  11. """
  12. class ThreadedServer(object):
  13. session = {};
  14. '''
  15. @attention: This method takes only the associations' data in the variable "associations" from associations_score.csv
  16. @param associations: List of association's id
  17. @param artucle: article's id
  18. @param graph: Flag. If it is True the method returns the data to use to create the graph
  19. @param assoc_data: It contains the association's data from associations_score.csv
  20. '''
  21. def find(self, associations, article, graph):
  22. assoc_data = []
  23. all_score = Preprocessing.extract_association_score(article, graph)
  24. for i in associations:
  25. for j in all_score:
  26. if int(i) == int(j[0]):
  27. assoc_data.append(j)
  28. return assoc_data
  29. """
  30. @attention: This method deletes the associations in "ids" from "associations"
  31. @param associations: list of associations
  32. @param ids: list of association's id to remove
  33. @return new_assoc: new list of associations without the associations in ids
  34. """
  35. def delete_from(self, associations, ids):
  36. new_assoc = []
  37. for i in associations:
  38. if int(i[0]) not in ids :
  39. new_assoc.append(i)
  40. return new_assoc
  41. """
  42. @attention: This method is used in "sort_prob" to return
  43. the value of a item
  44. @param item
  45. @return: item value
  46. """
  47. def getKey(self, item):
  48. return item[1]
  49. """
  50. @attentions: It sorts the probability
  51. @param prob:list of probability
  52. @return sort_id: list of probability sorted
  53. """
  54. def sort_prob(self, prob):
  55. sort_list_1 = []
  56. sort_list_2 = []
  57. sort_list_3 = []
  58. sort_list_4 = []
  59. sort_list_5 = []
  60. sort_list_6 = []
  61. for row in prob:
  62. name = row[0]
  63. feasibility = row[1]
  64. cl = 0
  65. max_value = 0
  66. for col in range(0,6):
  67. if max_value < feasibility[col]:
  68. max_value = feasibility[col]
  69. cl = col
  70. if cl == 0:
  71. sort_list_1.append((name, max_value))
  72. elif cl == 1:
  73. sort_list_2.append((name, max_value))
  74. elif cl == 2:
  75. sort_list_3.append((name, max_value))
  76. elif cl == 3:
  77. sort_list_4.append((name, max_value))
  78. elif cl == 4:
  79. sort_list_5.append((name, max_value))
  80. elif cl == 5:
  81. sort_list_6.append((name, max_value))
  82. sort_list_1 = sorted(sort_list_1, key=self.getKey, reverse=True)
  83. sort_list_2 = sorted(sort_list_2, key=self.getKey, reverse=True)
  84. sort_list_3 = sorted(sort_list_3, key=self.getKey, reverse=True)
  85. sort_list_4 = sorted(sort_list_4, key=self.getKey, reverse=True)
  86. sort_list_5 = sorted(sort_list_5, key=self.getKey, reverse=True)
  87. sort_list_6 = sorted(sort_list_6, key=self.getKey, reverse=True)
  88. sort = sort_list_6 + sort_list_5 + sort_list_4 + sort_list_3 + sort_list_2 + sort_list_1
  89. sort = np.array(sort)[:,:1]
  90. sort_id = []
  91. for element in sort:
  92. sort_id.append(int(element[0]))
  93. return sort_id
  94. """
  95. @attention: It calculates the entropy of a prediction, where the entropy shows how much a prediction is unknown
  96. @param predictions: list of predictions (probabilities)
  97. @return: entropy_list: list of entropy
  98. """
  99. def entropy(self, predictions):
  100. entropy_list = {}
  101. for row in predictions:
  102. id_assoc = row[0]
  103. prob = row[1]
  104. entropy = 0
  105. for i in range(0, len(prob)):
  106. if prob[i] != 0:
  107. entropy += -prob[i] * np.log2(prob[i])
  108. entropy_list[id_assoc] = entropy
  109. return entropy_list
  110. '''
  111. @attention: This method executes a clutering of the associations to get the centroids
  112. @param article: article ID
  113. @param user: user's id
  114. @return list of centroids
  115. '''
  116. def clustering(self,article,user):
  117. all_score = Preprocessing.extract_association_score(article)
  118. associations_score = all_score[:, [0, 5, 8, 9, 6, 3, 4]]
  119. df = pd.DataFrame(data = associations_score[0:, 0:],
  120. index = associations_score[0:, 0],
  121. columns = ["association_id",
  122. "localPageRankMean",
  123. "path_informativeness",
  124. "path_pattern_informativeness",
  125. "localHubMean",
  126. "relevance_score",
  127. "rarity_score"])
  128. df = df.set_index("association_id")
  129. diri = Clustering_dirichlet.DirichletClustering()
  130. diri.dirichlet(df,user, article)
  131. ids = diri.predict(df, user, article)
  132. return self.find(np.sort(ids), article, True), np.sort(ids)
  133. """
  134. @attention: This method executes the online learning
  135. @param ids: id of the associations used in the learning phase
  136. @param valuation: valuation of the associations in "ids"
  137. @param article: article's id
  138. @param learner: instance of the class MultinomialNB
  139. @return predictions: predictions
  140. @return assoc_ids: id of the predictions
  141. """
  142. def learning(self, ids, valuation, article,learner):
  143. #getting info about association evaluated
  144. data = self.find(ids, article, False)
  145. #Learning stage
  146. for row in range(0, len(valuation)):
  147. x = np.array(data[row])
  148. x = x[2:] # remove ID, article
  149. y = np.array([valuation[row]])
  150. if row == 0:
  151. learner.partial_fit(x, y, [1, 2, 3, 4, 5, 6])
  152. else:
  153. learner.partial_fit(x, y)
  154. #getting all the article's associations for the prediction
  155. all_assoc = Preprocessing.extract_association_score(article, False)
  156. #deleting associations used for learning from all_assoc
  157. all_assoc = self.delete_from(all_assoc, ids)
  158. all_assoc = np.asarray(all_assoc)
  159. #removing ID and article from associations
  160. assoc_for_prediction = np.zeros((len(all_assoc),9))
  161. assoc_ids = []
  162. for i in range (0, all_assoc.shape[0]):
  163. assoc_for_prediction[i] = all_assoc[i][2:]
  164. assoc_ids.append(all_assoc[i][0])
  165. predictions = learner.predict(assoc_for_prediction)
  166. return predictions, assoc_ids
  167. """
  168. @attention: This method initializes the server
  169. @param host: IP address
  170. @param port: port number
  171. """
  172. def __init__(self, host, port):
  173. self.host = host
  174. self.port = port
  175. self.sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
  176. self.sock.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1)
  177. self.sock.bind((self.host, self.port))
  178. """
  179. @attention: listening method
  180. """
  181. def listen(self):
  182. self.sock.listen(5)
  183. while True:
  184. client, address = self.sock.accept()
  185. client.settimeout(300000) #wait up to 5 minutes
  186. threading.Thread(target = self.listenToClient,args = (client,address)).start()
  187. """
  188. @attention: This method executes the first step of the online learning
  189. @param client: client's instance
  190. @param article: article's id
  191. @param user: user's id
  192. """
  193. #fist loop (clustering)
  194. def first_step(self,client, article, user):
  195. learner = MultinomialNB()
  196. '''SECOND STEP: executing the cluster to find the associations to evaluate in the first loop'''
  197. data, ids = self.clustering(article, user)
  198. string_assoc_to_evaluate = ""
  199. for i in data:
  200. for j in i:
  201. string_assoc_to_evaluate = string_assoc_to_evaluate + "," + j
  202. string_assoc_to_evaluate = string_assoc_to_evaluate + "."
  203. #sending the associations to evaluate
  204. assoc = pickle.dumps(string_assoc_to_evaluate)#serialization
  205. client.send(assoc)
  206. '''THIRD STEP: getting the evaluations from the Client
  207. '''
  208. evaluate = client.recv(1024)
  209. evaluate = eval("[" + evaluate + "]")
  210. ''' FOURTH STEP: executing online learning '''
  211. predictions, assoc_ids = self.learning(ids, np.asarray(evaluate), article, learner)
  212. ''' FIFTH STEP: find new associations to evaluate '''
  213. assoc_measures_ids = self.find(assoc_ids, article, False) #get the measures for all assoc_ids (contains id and article_id)
  214. #now remove ids and article_id from assoc_measures_ids
  215. assoc_measures = []
  216. for item in assoc_measures_ids:
  217. assoc_measures.append(item[2:])
  218. prob = learner.predict_proba(assoc_measures)
  219. id_score = []
  220. len_p = len(predictions)
  221. if len_p == len(assoc_ids):
  222. for i in range (0, len_p):
  223. id_score.append((assoc_ids[i], prob[i]))
  224. sorted_associations = self.sort_prob(id_score)#first associations are those we will select
  225. data= ', '.join(str(x) for x in sorted_associations[:10])
  226. self.session[user + str(article) + "learner"] = learner # saving the learner
  227. self.session[user + str(article) + "id_score"] = id_score #saving the id_score for the second step
  228. client.send(data)
  229. """
  230. @attention: This method executes the second step of the online learning
  231. @param client: client's instance
  232. @param article: article's id
  233. @param user: user's id
  234. """
  235. def second_step(self, client, article, user):
  236. id_score = self.session[user + str(article) + "id_score"]
  237. learner = self.session[user + str(article) + "learner"]
  238. entropies = self.entropy(id_score)
  239. entropies = sorted(entropies.items(), key=lambda x: x[1], reverse=True)
  240. to_be_evalueted = entropies[:2]
  241. ids = []
  242. for item in to_be_evalueted:
  243. ids.append(item[0])
  244. assoc_to_evaluate = self.find(ids, article, True)
  245. string_assoc_to_evaluate = ""
  246. for i in assoc_to_evaluate[0:2]:
  247. for j in i:
  248. string_assoc_to_evaluate = string_assoc_to_evaluate + "," + j
  249. string_assoc_to_evaluate = string_assoc_to_evaluate + "."
  250. serialized_data = pickle.dumps(string_assoc_to_evaluate)
  251. client.send(serialized_data)#sending the 2 association to be evaluated
  252. evaluate = (client.recv(1024))
  253. evaluate = eval("[" + evaluate + "]")
  254. predictions, assoc_ids = self.learning(ids, np.asarray(evaluate), article, learner)
  255. ''' FIFTH STEP: find new associations to evaluate '''
  256. assoc_measures_ids = self.find(assoc_ids, article, False) #get the measures for all assoc_ids (contains id and article_id)
  257. #now it removes ids and article_id from assoc_measures_ids
  258. assoc_measures = []
  259. for item in assoc_measures_ids:
  260. assoc_measures.append(item[2:])
  261. prob = learner.predict_proba(assoc_measures)
  262. id_score = []
  263. len_p = len(predictions)
  264. if len_p == len(assoc_ids):
  265. for i in range (0, len_p):
  266. id_score.append((assoc_ids[i], prob[i]))
  267. sorted_associations = self.sort_prob(id_score)#first associations are those we will select
  268. data= ', '.join(str(x) for x in sorted_associations[:10])
  269. self.session[user + str(article) + "learner"] = learner # saving the learner
  270. self.session[user + str(article) + "id_score"] = id_score #saving the id_score for the second step
  271. client.send(data)
  272. """
  273. @attention: Main method
  274. @param client: client's instance
  275. @param address: IP address
  276. """
  277. def listenToClient(self, client, address):
  278. while True:
  279. #getting a flag to know if is the first iteration
  280. clustering = (client.recv(1024))
  281. ''' FIRST STEP: getting user and article from Client'''
  282. #get user
  283. user = (client.recv(1024))
  284. if not user:
  285. break
  286. #get article
  287. article = int(client.recv(1024))
  288. if not article:
  289. break
  290. if clustering == "true":
  291. self.first_step(client, article, user)
  292. else:
  293. self.second_step(client, article,user)
  294. if __name__ == "__main__":
  295. host = "127.0.0.1"
  296. port_num = 6000
  297. ThreadedServer('',port_num).listen() #assigns a free port to client's thread