/greenavailability_1/evaluation/data/horizon_data/get_erros.py
Python | 313 lines | 223 code | 52 blank | 38 comment | 60 complexity | a1600fa6a3afaaa28e40ccc1a58a8763 MD5 | raw file
- #!/usr/bin/python
- import os,sys,re,string,getopt,glob,math
- totalHoursInOneDay = None
- totalHours = 5*24-9
- from datetime import *
- noofmethods = 3
- error_corr_count = [0, 0, 0]
- probable_count = [0 , 0, 0]
- #horizon = [1,3,6,12,24,48]
- format = "%Y-%m-%d %H:%M:%S"
- #all_hours = {}
- #all_errors = {}
- #all_bases = {}
- def addtodict(date, value, base):
- if all_hours.has_key(date):
- return
- all_hours[date] = True
- #print date, value, base
- d = datetime.strptime(date, format)
- d = datetime(d.year,d.month,d.day)
- if not all_errors.has_key(d):
- all_errors[d] = 0
- all_errors[d] += value
- if not all_bases.has_key(d):
- all_bases[d] = 0
- all_bases[d] += base
- return
- def getfromdict(date):
-
- if all_errors.has_key(d):
- value = all_errors[d]
- return value
- def parseErrorForFile(filename,horizon=49,long=False):
- global totalHoursInOneDay
- global all_hours
- global all_errors
- global all_bases
- all_hours = {}
- all_errors = {}
- all_bases = {}
- startTime=None
- endTime = None
- totalActual = 0
- totalPredicted = []
- totalErrorProduction = []
- count = 0
- errorSum = []
- weightedSum=[]
- actualCount=0
- predictedCount=[]
- done = [False, False, False]
- state = [0,0,0]
- daily_prod = []
- daily_pred = []
- day_count = 0
- grand_total_predicted = 0
- grand_total_actual = 0
- avg_daily_error = 0
- daily_errors = []
- weighted_errors = [[],[],[]]
- for i in range(noofmethods):
- totalPredicted.append(0)
- totalErrorProduction.append(0)
- errorSum.append(0)
- weightedSum.append(0)
- predictedCount.append(0)
- fd = open(filename,'r')
- #call_hour = int(filename.split(".")[1].split("_")[-1])
- #print call_hour
- #if call_hour<8 or call_hour>17:
- #for i in range(noofmethods):
- # state[i]=2
-
- if long:
- totalHoursInOneDay=48
- else:
- totalHoursInOneDay= 24
-
- line_no = 0
- for line in fd:
- line = line.strip()
- if not line or line.startswith("#"):
- continue
-
- elements = line.split()
-
- if startTime == None:
- if long:
- startTime = elements[0]+" "+elements[1]
- else:
- startTime = elements[0]
- dtime = elements[0]+" "+elements[1]
- predicted = []
- #error = []
- #errorRate = []
- #weightedRate = []
- hour = int(elements[3])
- actual = float(elements[7])
- base = float(elements[8])
- tag = float(elements[9])
- for i in range(noofmethods):
- predicted.append(float(elements[i+4]))
- error = (abs(predicted[i]-actual))
- if abs(predicted[i]-base*tag/100)>0.5 and not done[i]:
- #print base, tag, predicted[i], base*tag/100
- error_corr_count[i] += 1
- #done[i] = True
- if hour>8:
- #print state[i], call_hour,hour
- if state[i]==0:
- probable_count[i] += 1
- state[i] = 1
- elif state[i]==1:
- probable_count[i] += 1
- elif state[i]==1:
- state[i] = 2
- errorRate = 0
- weightedRate = 0
-
- if(actual>1):
- errorRate = error/actual
- weightedRate = errorRate*actual
- totalErrorProduction[i]+=actual
- predictedCount[i]+=1
- weighted_errors[i].append(weightedRate)
- elif predicted[i]>1:
- errorRate = error/predicted[i]
- weightedRate = errorRate*predicted[i]
- totalErrorProduction[i]+=predicted[i]
- predictedCount[i]+=1
- weighted_errors[i].append(weightedRate)
-
- errorSum[i]+=errorRate
- weightedSum[i]+=weightedRate
-
- if i==2:
- #print dtime, abs(predicted[i]-actual), predicted[2], actual, base, tag
- addtodict(dtime, predicted[i]-actual, base)
-
- totalPredicted[i]+=predicted[i]
- if actual>0:
- actualCount+=1
-
-
- totalActual+=actual
- if line_no>0 and (line_no%24)==0:
- daily_prod.append(totalActual)
- daily_pred.append(totalPredicted[2])
- daily_errors.append(abs(totalPredicted[2]-totalActual)/totalActual*100)
- #if abs(totalPredicted[2]-totalActual)/totalActual*100>40:
- #print "dailyerror", filename, line_no, totalPredicted[2], totalActual, abs(totalPredicted[2]-totalActual), abs(totalPredicted[2]-totalActual)/totalActual*100
- avg_daily_error += abs(totalPredicted[2]-totalActual)/totalActual*100
- grand_total_predicted += totalPredicted[2]
- grand_total_actual += totalActual
- totalPredicted[2] = 0
- totalActual = 0
- day_count += 1
- line_no += 1
- count += 1
- #print i, day_count
- fd.close()
- #print filename, totalPredicted, totalActual, totalHoursInOneDay, count
- #print filename, avg_daily_error, day_count
- avg_daily_error = avg_daily_error/float(day_count)
-
- avgError=[]
- avgWeightedError=[]
- productionError=[]
- avgErrorIncludeingNights=[]
- for i in range(noofmethods):
- avgError.append(errorSum[i]/predictedCount[i]*100)
- avgWeightedError.append(weightedSum[i]/totalErrorProduction[i]*100)
- #productionError.append(abs(totalPredicted[i]-totalActual)/totalActual*100)
- productionError.append(avg_daily_error)
- avgErrorIncludeingNights.append(errorSum[i]/count*100)
-
- #fd = open(filename+"_weight,txt",'w')
- #for i in range(len(weighted_errors[2])):
- #dtime = startTime
- #for i in range(len(daily_errors)):
- #if weighted_errors[2][i] > 0:
- #print >>fd,weighted_errors[2][i]
- #print dtime,"\t",daily_errors[i],"\t",all_errors[dtime]
- #for i in range(len(daily_prod)):
- # if daily_prod[i]<>0:
- # productionError.append()
- print filename
- for i in sorted(all_errors.keys()):
- try:
- print i,"\t",i.month,"\t",abs(all_errors[i]),"\t", all_bases[i],"\t", abs(all_errors[i])/all_bases[i]*100
- except ZeroDivisionError:
- print filename, i, all_bases[i]
-
- fd.close()
- return (avgErrorIncludeingNights,avgError,avgWeightedError,productionError,count,actualCount,predictedCount,startTime,grand_total_predicted,grand_total_actual)
- if __name__=='__main__':
- commonOptions="f:d:hl"
- opts, args = getopt.getopt(sys.argv[1:], commonOptions)
- printHtml=False
- twodays = False
- files = []
-
- for o,a in opts:
- if o == '-b':
- a = a.strip()
- d = datetime.strptime(a,format)
- BASE_DATE = datetime(d.year,d.month,d.day)
- elif o == '-d':
- dirname = a
- files.extend(glob.glob(os.path.join(dirname,"*fore.txt")))
- elif o == '-f':
- files.extend(a.split(","))
- elif o == "-l":
- twodays = True
- elif o == "-f":
- shift = int(a)
-
-
- files.sort()
- filecount = 1
-
- if printHtml:
- print "<table border='2'>"
- #print "<tr><th>Day/Time</th><th>Avg Error(24 hours)</th><th>Avg Error(nonzero Actual or Prediction)</th><th>Weighted Average with Production</th><th>Daily Production Error</th></tr>"
- print "<tr><th>Day/Time</th><th>Avg Error(24 hours)</th><th>Avg Error(24 hours)</th><th>Avg Error(24 hours)</th><th>Avg Error(nonzero Actual)</th><th>Avg Error(nonzero Actual)</th><th>Avg Error(nonzero Actual)</th><th>Weighted Error</th><th>Weighted Error</th><th>Weighted Error</th><th>Daily Production Error</th><th>Daily Production Error</th><th>Daily Production Error</th></tr>"
- print "<tr><td></td><td>Normal</td><td>Threshold</td><td>No threshold</td><td>Normal</td><td>Threshold</td><td>No threshold</td><td>Normal</td><td>Threshold</td><td>No threshold</td><td>Normal</td><td>Threshold</td><td>No threshold</td></tr>"
- for f in files:
- (avgErrorIncludeingNights,avgError,avgWeightedError,productionError,count,actualCount,predictedCount,day, tot_pred, tot_actual) = parseErrorForFile(f,long=twodays)
- str1 = ""
- if printHtml:
- str1 += "<td>"+"<a href=\"#fig%d\">"%filecount+day+"</a></td>"
- filecount += 1
- else:
- str1 += day+"\t"
- for i in range(noofmethods):
- if printHtml:
- #print "<tr><td>%s</td><td>%.2f</td><td>%.2f</td><td>%.2f</td><td>%.2f</td></tr>"%(day,avgErrorIncludeingNights,avgError,avgWeightedError,productionError)
- str1 += "<td>%.2f</td>"%(avgErrorIncludeingNights[i])
- else:
- #print f,"%.2f\t%.2f\t%.2f\t%.2f\t%d\t%d\t%d"%(avgErrorIncludeingNights[i],avgError[i],avgWeightedError[i],productionError[i],count,actualCount,predictedCount[i])
- str1 += "%.2f\t"%(avgErrorIncludeingNights[i])
- for i in range(noofmethods):
- if printHtml:
- str1 += "<td>%.2f</td>"%(avgError[i])
- else:
- str1 += "%.2f\t"%(avgError[i])
- for i in range(noofmethods):
- if printHtml:
- str1 += "<td>%.2f</td>"%(avgWeightedError[i])
- else:
- str1 += "%.2f\t"%(avgWeightedError[i])
- for i in range(noofmethods):
- if printHtml:
- str1 += "<td>%.2f</td>"%(productionError[i])
- else:
- str1 += "%.2f\t"%(productionError[i])
- for i in range(noofmethods):
- if not printHtml:
- str1 += "%d\t%d\t"%(count,predictedCount[i])
- if printHtml:
- str1 += "</tr>"
- #print str1, tot_pred,"\t" ,tot_actual#, error_corr_count[0], error_corr_count[1], error_corr_count[2], probable_count[0], probable_count[1], probable_count[2]
-
- if printHtml:
- print "</table>"
- files = glob.glob(dirname+"/../*.png")
- files.sort()
- imgcount = 1
- i = 0
- if printHtml:
- print "<table border='2'>"
- for f in files:
- if i%2==0: #on every label we have two files, one prediction and another tag
- print "<a name=fig%d>\n"%(imgcount)
- imgcount += 1
- print "<tr><img src=\"%s\" alt=\"%s\"></img></tr>\n"%(f,f)
- i += 1
- print "</table>"
- if imgcount<>filecount:
- print imgcount, filecount