PageRenderTime 53ms CodeModel.GetById 26ms RepoModel.GetById 0ms app.codeStats 0ms

/greenavailability_1/evaluation/data/horizon_data/get_erros.py

https://gitlab.com/abushoeb/ghadoop-master
Python | 313 lines | 223 code | 52 blank | 38 comment | 60 complexity | a1600fa6a3afaaa28e40ccc1a58a8763 MD5 | raw file
  1. #!/usr/bin/python
  2. import os,sys,re,string,getopt,glob,math
  3. totalHoursInOneDay = None
  4. totalHours = 5*24-9
  5. from datetime import *
  6. noofmethods = 3
  7. error_corr_count = [0, 0, 0]
  8. probable_count = [0 , 0, 0]
  9. #horizon = [1,3,6,12,24,48]
  10. format = "%Y-%m-%d %H:%M:%S"
  11. #all_hours = {}
  12. #all_errors = {}
  13. #all_bases = {}
  14. def addtodict(date, value, base):
  15. if all_hours.has_key(date):
  16. return
  17. all_hours[date] = True
  18. #print date, value, base
  19. d = datetime.strptime(date, format)
  20. d = datetime(d.year,d.month,d.day)
  21. if not all_errors.has_key(d):
  22. all_errors[d] = 0
  23. all_errors[d] += value
  24. if not all_bases.has_key(d):
  25. all_bases[d] = 0
  26. all_bases[d] += base
  27. return
  28. def getfromdict(date):
  29. if all_errors.has_key(d):
  30. value = all_errors[d]
  31. return value
  32. def parseErrorForFile(filename,horizon=49,long=False):
  33. global totalHoursInOneDay
  34. global all_hours
  35. global all_errors
  36. global all_bases
  37. all_hours = {}
  38. all_errors = {}
  39. all_bases = {}
  40. startTime=None
  41. endTime = None
  42. totalActual = 0
  43. totalPredicted = []
  44. totalErrorProduction = []
  45. count = 0
  46. errorSum = []
  47. weightedSum=[]
  48. actualCount=0
  49. predictedCount=[]
  50. done = [False, False, False]
  51. state = [0,0,0]
  52. daily_prod = []
  53. daily_pred = []
  54. day_count = 0
  55. grand_total_predicted = 0
  56. grand_total_actual = 0
  57. avg_daily_error = 0
  58. daily_errors = []
  59. weighted_errors = [[],[],[]]
  60. for i in range(noofmethods):
  61. totalPredicted.append(0)
  62. totalErrorProduction.append(0)
  63. errorSum.append(0)
  64. weightedSum.append(0)
  65. predictedCount.append(0)
  66. fd = open(filename,'r')
  67. #call_hour = int(filename.split(".")[1].split("_")[-1])
  68. #print call_hour
  69. #if call_hour<8 or call_hour>17:
  70. #for i in range(noofmethods):
  71. # state[i]=2
  72. if long:
  73. totalHoursInOneDay=48
  74. else:
  75. totalHoursInOneDay= 24
  76. line_no = 0
  77. for line in fd:
  78. line = line.strip()
  79. if not line or line.startswith("#"):
  80. continue
  81. elements = line.split()
  82. if startTime == None:
  83. if long:
  84. startTime = elements[0]+" "+elements[1]
  85. else:
  86. startTime = elements[0]
  87. dtime = elements[0]+" "+elements[1]
  88. predicted = []
  89. #error = []
  90. #errorRate = []
  91. #weightedRate = []
  92. hour = int(elements[3])
  93. actual = float(elements[7])
  94. base = float(elements[8])
  95. tag = float(elements[9])
  96. for i in range(noofmethods):
  97. predicted.append(float(elements[i+4]))
  98. error = (abs(predicted[i]-actual))
  99. if abs(predicted[i]-base*tag/100)>0.5 and not done[i]:
  100. #print base, tag, predicted[i], base*tag/100
  101. error_corr_count[i] += 1
  102. #done[i] = True
  103. if hour>8:
  104. #print state[i], call_hour,hour
  105. if state[i]==0:
  106. probable_count[i] += 1
  107. state[i] = 1
  108. elif state[i]==1:
  109. probable_count[i] += 1
  110. elif state[i]==1:
  111. state[i] = 2
  112. errorRate = 0
  113. weightedRate = 0
  114. if(actual>1):
  115. errorRate = error/actual
  116. weightedRate = errorRate*actual
  117. totalErrorProduction[i]+=actual
  118. predictedCount[i]+=1
  119. weighted_errors[i].append(weightedRate)
  120. elif predicted[i]>1:
  121. errorRate = error/predicted[i]
  122. weightedRate = errorRate*predicted[i]
  123. totalErrorProduction[i]+=predicted[i]
  124. predictedCount[i]+=1
  125. weighted_errors[i].append(weightedRate)
  126. errorSum[i]+=errorRate
  127. weightedSum[i]+=weightedRate
  128. if i==2:
  129. #print dtime, abs(predicted[i]-actual), predicted[2], actual, base, tag
  130. addtodict(dtime, predicted[i]-actual, base)
  131. totalPredicted[i]+=predicted[i]
  132. if actual>0:
  133. actualCount+=1
  134. totalActual+=actual
  135. if line_no>0 and (line_no%24)==0:
  136. daily_prod.append(totalActual)
  137. daily_pred.append(totalPredicted[2])
  138. daily_errors.append(abs(totalPredicted[2]-totalActual)/totalActual*100)
  139. #if abs(totalPredicted[2]-totalActual)/totalActual*100>40:
  140. #print "dailyerror", filename, line_no, totalPredicted[2], totalActual, abs(totalPredicted[2]-totalActual), abs(totalPredicted[2]-totalActual)/totalActual*100
  141. avg_daily_error += abs(totalPredicted[2]-totalActual)/totalActual*100
  142. grand_total_predicted += totalPredicted[2]
  143. grand_total_actual += totalActual
  144. totalPredicted[2] = 0
  145. totalActual = 0
  146. day_count += 1
  147. line_no += 1
  148. count += 1
  149. #print i, day_count
  150. fd.close()
  151. #print filename, totalPredicted, totalActual, totalHoursInOneDay, count
  152. #print filename, avg_daily_error, day_count
  153. avg_daily_error = avg_daily_error/float(day_count)
  154. avgError=[]
  155. avgWeightedError=[]
  156. productionError=[]
  157. avgErrorIncludeingNights=[]
  158. for i in range(noofmethods):
  159. avgError.append(errorSum[i]/predictedCount[i]*100)
  160. avgWeightedError.append(weightedSum[i]/totalErrorProduction[i]*100)
  161. #productionError.append(abs(totalPredicted[i]-totalActual)/totalActual*100)
  162. productionError.append(avg_daily_error)
  163. avgErrorIncludeingNights.append(errorSum[i]/count*100)
  164. #fd = open(filename+"_weight,txt",'w')
  165. #for i in range(len(weighted_errors[2])):
  166. #dtime = startTime
  167. #for i in range(len(daily_errors)):
  168. #if weighted_errors[2][i] > 0:
  169. #print >>fd,weighted_errors[2][i]
  170. #print dtime,"\t",daily_errors[i],"\t",all_errors[dtime]
  171. #for i in range(len(daily_prod)):
  172. # if daily_prod[i]<>0:
  173. # productionError.append()
  174. print filename
  175. for i in sorted(all_errors.keys()):
  176. try:
  177. print i,"\t",i.month,"\t",abs(all_errors[i]),"\t", all_bases[i],"\t", abs(all_errors[i])/all_bases[i]*100
  178. except ZeroDivisionError:
  179. print filename, i, all_bases[i]
  180. fd.close()
  181. return (avgErrorIncludeingNights,avgError,avgWeightedError,productionError,count,actualCount,predictedCount,startTime,grand_total_predicted,grand_total_actual)
  182. if __name__=='__main__':
  183. commonOptions="f:d:hl"
  184. opts, args = getopt.getopt(sys.argv[1:], commonOptions)
  185. printHtml=False
  186. twodays = False
  187. files = []
  188. for o,a in opts:
  189. if o == '-b':
  190. a = a.strip()
  191. d = datetime.strptime(a,format)
  192. BASE_DATE = datetime(d.year,d.month,d.day)
  193. elif o == '-d':
  194. dirname = a
  195. files.extend(glob.glob(os.path.join(dirname,"*fore.txt")))
  196. elif o == '-f':
  197. files.extend(a.split(","))
  198. elif o == "-l":
  199. twodays = True
  200. elif o == "-f":
  201. shift = int(a)
  202. files.sort()
  203. filecount = 1
  204. if printHtml:
  205. print "<table border='2'>"
  206. #print "<tr><th>Day/Time</th><th>Avg Error(24 hours)</th><th>Avg Error(nonzero Actual or Prediction)</th><th>Weighted Average with Production</th><th>Daily Production Error</th></tr>"
  207. print "<tr><th>Day/Time</th><th>Avg Error(24 hours)</th><th>Avg Error(24 hours)</th><th>Avg Error(24 hours)</th><th>Avg Error(nonzero Actual)</th><th>Avg Error(nonzero Actual)</th><th>Avg Error(nonzero Actual)</th><th>Weighted Error</th><th>Weighted Error</th><th>Weighted Error</th><th>Daily Production Error</th><th>Daily Production Error</th><th>Daily Production Error</th></tr>"
  208. print "<tr><td></td><td>Normal</td><td>Threshold</td><td>No threshold</td><td>Normal</td><td>Threshold</td><td>No threshold</td><td>Normal</td><td>Threshold</td><td>No threshold</td><td>Normal</td><td>Threshold</td><td>No threshold</td></tr>"
  209. for f in files:
  210. (avgErrorIncludeingNights,avgError,avgWeightedError,productionError,count,actualCount,predictedCount,day, tot_pred, tot_actual) = parseErrorForFile(f,long=twodays)
  211. str1 = ""
  212. if printHtml:
  213. str1 += "<td>"+"<a href=\"#fig%d\">"%filecount+day+"</a></td>"
  214. filecount += 1
  215. else:
  216. str1 += day+"\t"
  217. for i in range(noofmethods):
  218. if printHtml:
  219. #print "<tr><td>%s</td><td>%.2f</td><td>%.2f</td><td>%.2f</td><td>%.2f</td></tr>"%(day,avgErrorIncludeingNights,avgError,avgWeightedError,productionError)
  220. str1 += "<td>%.2f</td>"%(avgErrorIncludeingNights[i])
  221. else:
  222. #print f,"%.2f\t%.2f\t%.2f\t%.2f\t%d\t%d\t%d"%(avgErrorIncludeingNights[i],avgError[i],avgWeightedError[i],productionError[i],count,actualCount,predictedCount[i])
  223. str1 += "%.2f\t"%(avgErrorIncludeingNights[i])
  224. for i in range(noofmethods):
  225. if printHtml:
  226. str1 += "<td>%.2f</td>"%(avgError[i])
  227. else:
  228. str1 += "%.2f\t"%(avgError[i])
  229. for i in range(noofmethods):
  230. if printHtml:
  231. str1 += "<td>%.2f</td>"%(avgWeightedError[i])
  232. else:
  233. str1 += "%.2f\t"%(avgWeightedError[i])
  234. for i in range(noofmethods):
  235. if printHtml:
  236. str1 += "<td>%.2f</td>"%(productionError[i])
  237. else:
  238. str1 += "%.2f\t"%(productionError[i])
  239. for i in range(noofmethods):
  240. if not printHtml:
  241. str1 += "%d\t%d\t"%(count,predictedCount[i])
  242. if printHtml:
  243. str1 += "</tr>"
  244. #print str1, tot_pred,"\t" ,tot_actual#, error_corr_count[0], error_corr_count[1], error_corr_count[2], probable_count[0], probable_count[1], probable_count[2]
  245. if printHtml:
  246. print "</table>"
  247. files = glob.glob(dirname+"/../*.png")
  248. files.sort()
  249. imgcount = 1
  250. i = 0
  251. if printHtml:
  252. print "<table border='2'>"
  253. for f in files:
  254. if i%2==0: #on every label we have two files, one prediction and another tag
  255. print "<a name=fig%d>\n"%(imgcount)
  256. imgcount += 1
  257. print "<tr><img src=\"%s\" alt=\"%s\"></img></tr>\n"%(f,f)
  258. i += 1
  259. print "</table>"
  260. if imgcount<>filecount:
  261. print imgcount, filecount