PageRenderTime 24ms CodeModel.GetById 22ms RepoModel.GetById 0ms app.codeStats 0ms

/AI/test_sklearn.py

https://gitlab.com/peter__barnes/work
Python | 109 lines | 82 code | 18 blank | 9 comment | 12 complexity | d891933cc0ce8c2fee82f2dd1fc0062d MD5 | raw file
  1. import pandas as pd
  2. import numpy as np
  3. from sklearn.tree import DecisionTreeClassifier
  4. from sklearn.naive_bayes import GaussianNB
  5. from sklearn.metrics import confusion_matrix
  6. from sklearn.metrics import accuracy_score, precision_score, recall_score
  7. df_origin = pd.read_csv('heart_failure_clinical_records_dataset.csv')
  8. df = df_origin
  9. def reset_creatinine_phosphokinase(x):
  10. if x>1000:
  11. return round(11 + x/1000)
  12. return round(x/100)
  13. df['creatinine_phosphokinase'] = df['creatinine_phosphokinase'].apply(lambda x: reset_creatinine_phosphokinase(x))
  14. df['creatinine_phosphokinase']=df['creatinine_phosphokinase'].astype(int);
  15. df['time']=df['time']/100
  16. df['time']=df['time'].astype(int)
  17. df['ejection_fraction']=df['ejection_fraction']/20
  18. df['ejection_fraction']=df['ejection_fraction'].astype(int)
  19. quantile = pd.Series(np.arange(4))
  20. quantile[0] =df['platelets'].quantile(0.2)
  21. quantile[1] =df['platelets'].quantile(0.4)
  22. quantile[2] =df['platelets'].quantile(0.6)
  23. quantile[3] =df['platelets'].quantile(0.8)
  24. def reset_by_quantile(x, quant):
  25. if(x<quant[0]):
  26. return 0
  27. elif(x<quant[1]):
  28. return 1
  29. elif(x<quant[2]):
  30. return 2
  31. elif(x<quant[3]):
  32. return 3
  33. else:
  34. return 4
  35. df['platelets']=df['platelets'].apply(lambda x: reset_by_quantile(x,quantile))
  36. df['platelets']=df['platelets'].astype(int)
  37. quantile = pd.Series(np.arange(4))
  38. quantile[0] =df['serum_creatinine'].quantile(0.2)
  39. quantile[1] =df['serum_creatinine'].quantile(0.4)
  40. quantile[2] =df['serum_creatinine'].quantile(0.6)
  41. quantile[3] =df['serum_creatinine'].quantile(0.8)
  42. def reset_by_quantile(x, quant):
  43. if(x<quant[0]):
  44. return 0
  45. elif(x<quant[1]):
  46. return 1
  47. elif(x<quant[2]):
  48. return 2
  49. elif(x<quant[3]):
  50. return 3
  51. else:
  52. return 4
  53. df['serum_creatinine']=df['serum_creatinine'].apply(lambda x: reset_by_quantile(x,quantile))
  54. df['serum_creatinine']=df['serum_creatinine'].astype(int)
  55. df = df.loc[:,['anaemia','creatinine_phosphokinase', \
  56. 'serum_creatinine','ejection_fraction','diabetes','platelets', 'high_blood_pressure','sex', \
  57. 'smoking','time','DEATH_EVENT']]
  58. #realdata = df.loc[0:20,['age','anaemia','diabetes','high_blood_pressure','sex','smoking','DEATH_EVENT']]
  59. #realdata['age']=round(realdata['age']/20)
  60. #realdata['age']=realdata['age'].astype(int);
  61. #realdata = df.loc[0:20,['diabetes','sex','smoking','DEATH_EVENT']]
  62. df_train = df.sample(frac=0.8)
  63. rowlist=[]
  64. for indexs in df_train.index:
  65. rowlist.append(indexs)
  66. df_test=df.drop(rowlist,axis=0)
  67. print(df.loc[0,:])
  68. print(df.loc[1,:])
  69. print(df.loc[150,:])
  70. #print(df_train)
  71. #print(df_test)
  72. X = df_train.iloc[:,:10]
  73. Y = df_train.iloc[:,10]
  74. X_t = df_test.iloc[:,:10]
  75. Y_t = df_test.iloc[:,10]
  76. #print(X)
  77. #print(Y)
  78. #model = DecisionTreeClassifier()
  79. model = GaussianNB()
  80. model.fit(X,Y)
  81. pred = model.predict(X_t)
  82. print(pred)
  83. print(Y_t)
  84. confusion_matrix = confusion_matrix(Y_t,pred)
  85. print(confusion_matrix)
  86. accuracy = accuracy_score(Y_t,pred)
  87. precision = precision_score(Y_t,pred)
  88. recall = recall_score(Y_t,pred)
  89. print(accuracy)
  90. print(precision)
  91. print(recall)