get_erros.py | searchcode

/greenavailability_1/evaluation/data/horizon_data/get_erros.py

https://gitlab.com/abushoeb/ghadoop-master
Python | 313 lines | 223 code | 52 blank | 38 comment | 60 complexity | a1600fa6a3afaaa28e40ccc1a58a8763 MD5 | raw file

#!/usr/bin/python

import os,sys,re,string,getopt,glob,math
totalHoursInOneDay = None
totalHours = 5*24-9
from datetime import *

noofmethods = 3
error_corr_count = [0, 0, 0]
probable_count = [0 , 0, 0]
#horizon = [1,3,6,12,24,48]

format = "%Y-%m-%d %H:%M:%S"
#all_hours = {}
#all_errors = {}
#all_bases = {}

def addtodict(date, value, base):

	if all_hours.has_key(date):
		return
	all_hours[date] = True
	#print date, value, base
	d = datetime.strptime(date, format)
	d = datetime(d.year,d.month,d.day)
	if not all_errors.has_key(d):
		all_errors[d] = 0
	all_errors[d] += value

	if not all_bases.has_key(d):
		all_bases[d] = 0
	all_bases[d] += base

	return

def getfromdict(date):
	
	if all_errors.has_key(d):
		value = all_errors[d]
	return value

def parseErrorForFile(filename,horizon=49,long=False):
	global totalHoursInOneDay

	global all_hours
	global all_errors
	global all_bases

	all_hours = {}
	all_errors = {}
	all_bases = {}

	startTime=None    
	endTime = None    
	totalActual = 0    
	totalPredicted = []    
	totalErrorProduction = []    
	count = 0    
	errorSum = []    
	weightedSum=[]    
	actualCount=0    
	predictedCount=[]    
	done = [False, False, False]
	state = [0,0,0]
	daily_prod = []
	daily_pred = []    
	day_count = 0
	grand_total_predicted = 0
	grand_total_actual = 0
	avg_daily_error = 0
	daily_errors  = []
	weighted_errors = [[],[],[]]



	for i in range(noofmethods):
		totalPredicted.append(0)
		totalErrorProduction.append(0)
		errorSum.append(0)
		weightedSum.append(0)	
		predictedCount.append(0)

	fd = open(filename,'r')
	#call_hour = int(filename.split(".")[1].split("_")[-1])
	#print call_hour
	#if call_hour<8 or call_hour>17:
		#for i in range(noofmethods):
		#	state[i]=2
    
	if long:
		totalHoursInOneDay=48
	else:
		totalHoursInOneDay= 24
        
	line_no = 0
	for line in fd:
		line = line.strip()
		if not line or line.startswith("#"):
			continue

    
		elements = line.split()
        
		if startTime == None:
			if long:
				startTime = elements[0]+" "+elements[1]
			else:
				startTime = elements[0]

		dtime = elements[0]+" "+elements[1]

		predicted = []
	#error = []

	#errorRate = []
        #weightedRate = []

		hour = int(elements[3])
		actual = float(elements[7])
		base = float(elements[8])
		tag = float(elements[9])
		for i in range(noofmethods):
			predicted.append(float(elements[i+4]))
			error = (abs(predicted[i]-actual))
			if abs(predicted[i]-base*tag/100)>0.5 and not done[i]:
				#print base, tag, predicted[i], base*tag/100
				error_corr_count[i] += 1
				#done[i] = True
			if hour>8:
				#print state[i], call_hour,hour
				if state[i]==0:
					probable_count[i] += 1
					state[i] = 1
				elif state[i]==1:
					probable_count[i] += 1
			elif state[i]==1:
				state[i] = 2

			errorRate = 0
			weightedRate = 0
        
			if(actual>1):
				errorRate = error/actual
				weightedRate = errorRate*actual            
				totalErrorProduction[i]+=actual	       	
				predictedCount[i]+=1
				weighted_errors[i].append(weightedRate)
			elif predicted[i]>1:
				errorRate = error/predicted[i]        
				weightedRate = errorRate*predicted[i]        	
				totalErrorProduction[i]+=predicted[i]        	
				predictedCount[i]+=1        	
				weighted_errors[i].append(weightedRate)
	
			errorSum[i]+=errorRate
			weightedSum[i]+=weightedRate
			
			if i==2:
				#print dtime, abs(predicted[i]-actual), predicted[2], actual, base, tag
				addtodict(dtime, predicted[i]-actual, base)
				
			totalPredicted[i]+=predicted[i]
		if actual>0:		
			actualCount+=1
        
                
		totalActual+=actual

		if line_no>0 and (line_no%24)==0:
			daily_prod.append(totalActual)
			daily_pred.append(totalPredicted[2])
			daily_errors.append(abs(totalPredicted[2]-totalActual)/totalActual*100)
			#if abs(totalPredicted[2]-totalActual)/totalActual*100>40:
			#print "dailyerror", filename, line_no, totalPredicted[2], totalActual, abs(totalPredicted[2]-totalActual), abs(totalPredicted[2]-totalActual)/totalActual*100
			avg_daily_error += abs(totalPredicted[2]-totalActual)/totalActual*100
			grand_total_predicted += totalPredicted[2]
			grand_total_actual += totalActual
			totalPredicted[2] = 0
			totalActual = 0
			day_count += 1
		line_no += 1		
		count += 1
		#print i, day_count
	fd.close()

	#print filename, totalPredicted, totalActual, totalHoursInOneDay, count
	#print filename, avg_daily_error, day_count

	avg_daily_error = avg_daily_error/float(day_count)
	
	avgError=[]
	avgWeightedError=[]
	productionError=[]
	avgErrorIncludeingNights=[]

	for i in range(noofmethods):
		avgError.append(errorSum[i]/predictedCount[i]*100)
		avgWeightedError.append(weightedSum[i]/totalErrorProduction[i]*100)
		#productionError.append(abs(totalPredicted[i]-totalActual)/totalActual*100)
		productionError.append(avg_daily_error)
		avgErrorIncludeingNights.append(errorSum[i]/count*100)

	
	#fd = open(filename+"_weight,txt",'w')
	#for i in range(len(weighted_errors[2])):
	#dtime = startTime
	#for i in range(len(daily_errors)):
		#if weighted_errors[2][i] > 0:
		#print >>fd,weighted_errors[2][i]
		#print dtime,"\t",daily_errors[i],"\t",all_errors[dtime]
	#for i in range(len(daily_prod)):
	#	if daily_prod[i]<>0:
	#		productionError.append()
	print filename
	for i in sorted(all_errors.keys()):
		try:
			print i,"\t",i.month,"\t",abs(all_errors[i]),"\t", all_bases[i],"\t", abs(all_errors[i])/all_bases[i]*100
		except ZeroDivisionError:
			print filename, i, all_bases[i]
		
	fd.close()

	return (avgErrorIncludeingNights,avgError,avgWeightedError,productionError,count,actualCount,predictedCount,startTime,grand_total_predicted,grand_total_actual)



if __name__=='__main__':

    commonOptions="f:d:hl"
    opts, args = getopt.getopt(sys.argv[1:], commonOptions)
    printHtml=False
    twodays = False
    files = []
    
    for o,a in opts:
        if o == '-b':
            a = a.strip()
            d = datetime.strptime(a,format)
            BASE_DATE = datetime(d.year,d.month,d.day)
        elif o == '-d':
            dirname = a
            files.extend(glob.glob(os.path.join(dirname,"*fore.txt")))
        elif o == '-f':
            files.extend(a.split(","))
        elif o == "-l":
            twodays = True
        elif o == "-f":
            shift = int(a)
            
    
    files.sort()
    filecount = 1
    
    if printHtml:
        print "<table border='2'>"
        #print "<tr><th>Day/Time</th><th>Avg Error(24 hours)</th><th>Avg Error(nonzero Actual or Prediction)</th><th>Weighted Average with Production</th><th>Daily Production Error</th></tr>"
	print "<tr><th>Day/Time</th><th>Avg Error(24 hours)</th><th>Avg Error(24 hours)</th><th>Avg Error(24 hours)</th><th>Avg Error(nonzero Actual)</th><th>Avg Error(nonzero Actual)</th><th>Avg Error(nonzero Actual)</th><th>Weighted Error</th><th>Weighted Error</th><th>Weighted Error</th><th>Daily Production Error</th><th>Daily Production Error</th><th>Daily Production Error</th></tr>"
	print "<tr><td></td><td>Normal</td><td>Threshold</td><td>No threshold</td><td>Normal</td><td>Threshold</td><td>No threshold</td><td>Normal</td><td>Threshold</td><td>No threshold</td><td>Normal</td><td>Threshold</td><td>No threshold</td></tr>"
    for f in files:
        (avgErrorIncludeingNights,avgError,avgWeightedError,productionError,count,actualCount,predictedCount,day, tot_pred, tot_actual) = parseErrorForFile(f,long=twodays)
        str1 = ""
	if printHtml:
		str1 += "<td>"+"<a href=\"#fig%d\">"%filecount+day+"</a></td>"
		filecount += 1
	else:
		str1 += day+"\t"
	for i in range(noofmethods):
  		if printHtml:
        	    #print "<tr><td>%s</td><td>%.2f</td><td>%.2f</td><td>%.2f</td><td>%.2f</td></tr>"%(day,avgErrorIncludeingNights,avgError,avgWeightedError,productionError)
			str1 += "<td>%.2f</td>"%(avgErrorIncludeingNights[i])
        	else:
        	    #print f,"%.2f\t%.2f\t%.2f\t%.2f\t%d\t%d\t%d"%(avgErrorIncludeingNights[i],avgError[i],avgWeightedError[i],productionError[i],count,actualCount,predictedCount[i])
			str1 += "%.2f\t"%(avgErrorIncludeingNights[i])
	for i in range(noofmethods):
  		if printHtml:
			str1 += "<td>%.2f</td>"%(avgError[i])
        	else:
			str1 += "%.2f\t"%(avgError[i])
	for i in range(noofmethods):
  		if printHtml:
			str1 += "<td>%.2f</td>"%(avgWeightedError[i])
        	else:
			str1 += "%.2f\t"%(avgWeightedError[i])
	for i in range(noofmethods):
  		if printHtml:
			str1 += "<td>%.2f</td>"%(productionError[i])
        	else:
			str1 += "%.2f\t"%(productionError[i])

	for i in range(noofmethods):
		if not printHtml:
			str1 += "%d\t%d\t"%(count,predictedCount[i])
	if printHtml:
		str1 += "</tr>"
	#print str1, tot_pred,"\t" ,tot_actual#, error_corr_count[0], error_corr_count[1], error_corr_count[2], probable_count[0], probable_count[1], probable_count[2]
        
    if printHtml:
        print "</table>"
    	files = glob.glob(dirname+"/../*.png")        
    	files.sort()
    imgcount = 1
    i = 0
    if printHtml:    
		print "<table border='2'>"
		for f in files:
			if i%2==0: #on every label we have two files, one prediction and another tag
				print "<a name=fig%d>\n"%(imgcount)
				imgcount += 1
		print "<tr><img src=\"%s\" alt=\"%s\"></img></tr>\n"%(f,f)
		i += 1
		print "</table>"
		if imgcount<>filecount:
			print imgcount, filecount