Multithreaded_server.py

/DaCena/dacena/server/Multithreaded_server.py

https://gitlab.com/Angiolillo/SOKE
Python | 348 lines | 223 code | 9 blank | 116 comment | 3 complexity | 534872561843264643d4ec3fdba7f636 MD5 | raw file

from server import Clustering_dirichlet
from sklearn.naive_bayes import MultinomialNB
import numpy as np
import pandas as pd
import pickle
from server import Preprocessing
import threading
import socket

"""
@attention: This class implemented the server
"""
class ThreadedServer(object):
    session = {};
    
    '''
    @attention: This method takes only the associations' data in the variable "associations" from associations_score.csv 
    @param associations: List of association's id
    @param artucle: article's id
    @param graph: Flag. If it is True the method returns the data to use to create the graph 
    @param assoc_data: It contains the association's data from associations_score.csv
    '''
    def find(self, associations, article, graph):
        assoc_data = []
        all_score = Preprocessing.extract_association_score(article, graph)
        for i in associations:
            for j in all_score:
                if int(i) == int(j[0]):
                    assoc_data.append(j)
    
        return assoc_data

    """
    @attention: This method deletes the associations in "ids" from "associations"
    @param associations: list of associations 
    @param ids: list of association's id to remove
    @return new_assoc: new list of associations without the associations in ids
    """
    def delete_from(self, associations, ids):
        new_assoc = []
        for i in associations:
            if int(i[0]) not in ids : 
                new_assoc.append(i)
        return new_assoc


    """
    @attention: This method is used in "sort_prob" to return 
    the value of a item
    @param item
    @return: item value
    """
    def getKey(self, item):
        return item[1]

    """
    @attentions: It sorts the probability
    @param prob:list of probability
    @return sort_id: list of probability sorted 
    """
    def sort_prob(self, prob):
        sort_list_1 = []
        sort_list_2 = []
        sort_list_3 = []
        sort_list_4 = []
        sort_list_5 = []
        sort_list_6 = []
        
        for row in prob:
            name = row[0]
            feasibility = row[1]
            cl = 0
            max_value = 0
            for col in range(0,6):
                if max_value < feasibility[col]:
                    max_value = feasibility[col]
                    cl = col
            if cl == 0:
                sort_list_1.append((name, max_value))
            elif cl == 1:
                sort_list_2.append((name, max_value))
            elif cl == 2:
                sort_list_3.append((name, max_value))
            elif cl == 3:
                sort_list_4.append((name, max_value))
            elif cl == 4:
                sort_list_5.append((name, max_value))
            elif cl == 5:
                sort_list_6.append((name, max_value))
        
        sort_list_1 = sorted(sort_list_1, key=self.getKey,  reverse=True)
        sort_list_2 = sorted(sort_list_2, key=self.getKey,  reverse=True)
        sort_list_3 = sorted(sort_list_3, key=self.getKey,  reverse=True)
        sort_list_4 = sorted(sort_list_4, key=self.getKey,  reverse=True)
        sort_list_5 = sorted(sort_list_5, key=self.getKey,  reverse=True)
        sort_list_6 = sorted(sort_list_6, key=self.getKey,  reverse=True)
        sort = sort_list_6 + sort_list_5 + sort_list_4 + sort_list_3 + sort_list_2 + sort_list_1 
        sort = np.array(sort)[:,:1]
        sort_id = []
        
        for element in sort:
            sort_id.append(int(element[0]))
        
        return sort_id
    
    """
    @attention: It calculates the entropy of a prediction, where the entropy shows how much a prediction is unknown
    @param predictions: list of predictions (probabilities)
    @return: entropy_list: list of entropy
    """
    def entropy(self, predictions):
        entropy_list = {}
        for row in predictions:
            id_assoc = row[0]
            prob = row[1]
            entropy = 0
            for i in range(0, len(prob)):
                if prob[i] != 0:
                    entropy += -prob[i] * np.log2(prob[i])
                
            entropy_list[id_assoc] = entropy
             
        return entropy_list
 
    '''
    @attention: This method executes a clutering of the associations to get the centroids 
    @param article: article ID
    @param user: user's id
    @return list of centroids
    '''               
    def clustering(self,article,user):            
            all_score = Preprocessing.extract_association_score(article)
            associations_score = all_score[:, [0, 5, 8, 9, 6, 3, 4]]
             
            df = pd.DataFrame(data = associations_score[0:, 0:],
                            index = associations_score[0:, 0],
                            columns = ["association_id",
                                             "localPageRankMean",
                                             "path_informativeness",
                                             "path_pattern_informativeness",
                                             "localHubMean",
                                             "relevance_score",
                                             "rarity_score"])
            df = df.set_index("association_id")
            diri = Clustering_dirichlet.DirichletClustering()
            diri.dirichlet(df,user, article)
            ids = diri.predict(df, user, article)
            return self.find(np.sort(ids), article, True), np.sort(ids) 
            
    """
    @attention: This method executes the online learning 
    @param ids: id of the associations used in the learning phase
    @param valuation: valuation of the associations in "ids"
    @param article: article's id
    @param learner: instance of the class MultinomialNB
    @return predictions: predictions 
    @return assoc_ids: id of the predictions   
    """
    def learning(self, ids, valuation, article,learner):
        #getting info about association evaluated
        data = self.find(ids, article, False)
        
        #Learning stage
        for row in range(0, len(valuation)):
            x = np.array(data[row])
            x = x[2:] # remove ID, article 
            y = np.array([valuation[row]])
            if row == 0:           
                learner.partial_fit(x, y, [1, 2, 3, 4, 5, 6])
            else:
                learner.partial_fit(x, y)
        
        #getting all the article's associations for the prediction
        all_assoc = Preprocessing.extract_association_score(article, False)
        
        #deleting associations used for learning from all_assoc
        all_assoc = self.delete_from(all_assoc, ids)
        all_assoc = np.asarray(all_assoc)
        
        #removing ID and article from associations
        assoc_for_prediction = np.zeros((len(all_assoc),9))
        assoc_ids = []
        for i in range (0, all_assoc.shape[0]):
            assoc_for_prediction[i] = all_assoc[i][2:]
            assoc_ids.append(all_assoc[i][0])
            
            
        predictions = learner.predict(assoc_for_prediction)  
        return predictions, assoc_ids
        
    """
    @attention: This method initializes the server
    @param host: IP address
    @param port: port number  
    """ 
    def __init__(self, host, port):
        self.host = host
        self.port = port
        self.sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
        self.sock.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1)
        self.sock.bind((self.host, self.port))
        
    """
    @attention: listening method
    """
    def listen(self):
        self.sock.listen(5)
        while True:
            client, address = self.sock.accept()
            client.settimeout(300000) #wait up to 5 minutes
            threading.Thread(target = self.listenToClient,args = (client,address)).start()
    
    """
    @attention: This method executes the first step of the online learning 
    @param client: client's instance
    @param article: article's id
    @param user: user's id
    """
    #fist loop (clustering)
    def first_step(self,client, article, user):
        learner = MultinomialNB()
        
        '''SECOND STEP: executing the cluster to find the associations to evaluate in the first loop'''
        data, ids = self.clustering(article, user)
        string_assoc_to_evaluate = ""
        for i in data:
            for j in i:
                string_assoc_to_evaluate = string_assoc_to_evaluate + ","  + j 
         
            string_assoc_to_evaluate = string_assoc_to_evaluate + "."
            
        #sending the associations to evaluate
        assoc = pickle.dumps(string_assoc_to_evaluate)#serialization
        client.send(assoc)
        
        '''THIRD STEP: getting the evaluations from the Client
        '''
        evaluate = client.recv(1024)
        evaluate = eval("[" + evaluate + "]")
        
        ''' FOURTH STEP: executing online learning '''
        predictions, assoc_ids = self.learning(ids, np.asarray(evaluate), article, learner)
        
        ''' FIFTH STEP: find new associations to evaluate '''
        assoc_measures_ids = self.find(assoc_ids, article, False) #get the measures for all assoc_ids (contains id and article_id)
        
        #now remove ids and article_id from assoc_measures_ids
        assoc_measures = []
        for item in assoc_measures_ids:
            assoc_measures.append(item[2:])
        
        prob = learner.predict_proba(assoc_measures)             
        id_score = []
        len_p = len(predictions)
        if len_p == len(assoc_ids):
            for i in range (0, len_p):
                id_score.append((assoc_ids[i], prob[i]))
                
        sorted_associations = self.sort_prob(id_score)#first associations are those we will select
        data= ', '.join(str(x) for x in sorted_associations[:10])
        self.session[user + str(article) + "learner"] =  learner # saving the learner
        self.session[user + str(article)  + "id_score"] = id_score #saving the id_score for the second step
        client.send(data)
    
    """
    @attention: This method executes the second step of the online learning 
    @param client: client's instance
    @param article: article's id
    @param user: user's id      
    """
    def second_step(self, client, article, user):
        id_score = self.session[user + str(article) + "id_score"]
        learner = self.session[user + str(article) + "learner"]
        entropies = self.entropy(id_score)         
        entropies = sorted(entropies.items(), key=lambda x: x[1], reverse=True)
        to_be_evalueted = entropies[:2]
        ids = []
        for item in to_be_evalueted:
            ids.append(item[0])
          
        assoc_to_evaluate = self.find(ids, article, True)
        string_assoc_to_evaluate = ""
        for i in assoc_to_evaluate[0:2]:
            for j in i:
                string_assoc_to_evaluate = string_assoc_to_evaluate + ","  + j 
         
            string_assoc_to_evaluate = string_assoc_to_evaluate + "."
        
        serialized_data = pickle.dumps(string_assoc_to_evaluate)
        client.send(serialized_data)#sending the 2 association to be evaluated
        evaluate = (client.recv(1024))
        evaluate = eval("[" + evaluate + "]")
        predictions, assoc_ids = self.learning(ids, np.asarray(evaluate), article, learner)
            
        ''' FIFTH STEP: find new associations to evaluate '''
        assoc_measures_ids = self.find(assoc_ids, article, False) #get the measures for all assoc_ids (contains id and article_id)
        
        #now it removes ids and article_id from assoc_measures_ids
        assoc_measures = []
        for item in assoc_measures_ids:
            assoc_measures.append(item[2:])
        
        prob = learner.predict_proba(assoc_measures)  
        id_score = []
        len_p = len(predictions)
        if len_p == len(assoc_ids):
            for i in range (0, len_p):
                id_score.append((assoc_ids[i], prob[i])) 
                 
        sorted_associations = self.sort_prob(id_score)#first associations are those we will select
        data= ', '.join(str(x) for x in sorted_associations[:10])
        self.session[user + str(article) + "learner"] =  learner # saving the learner
        self.session[user + str(article) + "id_score"] = id_score #saving the id_score for the second step
        client.send(data)
    
    """
    @attention: Main method
    @param client: client's instance
    @param address: IP address  
    """
    def listenToClient(self, client, address):
        while True:
            
            #getting a flag to know if is the first iteration
            clustering = (client.recv(1024))
            
            ''' FIRST STEP: getting user and article from Client'''
            #get user
            user  = (client.recv(1024))
            if not user:
                break
            
            #get article
            article = int(client.recv(1024))
            if not article:
                break
            
            if clustering == "true":
                self.first_step(client, article, user) 
            else:
                self.second_step(client, article,user)
            
    
if __name__ == "__main__":
    host = "127.0.0.1"
    port_num = 6000
    ThreadedServer('',port_num).listen() #assigns a free port to client's thread