PageRenderTime 25ms CodeModel.GetById 21ms RepoModel.GetById 1ms app.codeStats 0ms

/greenavailability/pastWeather.py

https://gitlab.com/abushoeb/ghadoop-v1
Python | 589 lines | 243 code | 129 blank | 217 comment | 50 complexity | 91d35db1955d09acb75f5c533a26f80d MD5 | raw file
  1. #!/usr/bin/python
  2. #import os,sys,time
  3. from datetime import datetime,timedelta
  4. import os,re,sys,glob,os.path,time,string
  5. import weatherPrediction
  6. import tempfile,subprocess
  7. #from datetime import datetime,timedelta
  8. locations = weatherPrediction.locations
  9. zipcodes = weatherPrediction.zipcodes
  10. timezones = weatherPrediction.timezones
  11. conditions = weatherPrediction.conditions
  12. format = '%b_%d_%Y_%H_%M'
  13. outputFormat = '%Y_%m_%d_%H'
  14. currentDate = None
  15. dayFormat = "%b_%d_%Y"
  16. class WeatherCondition:
  17. def __init__(self,conditionString,conditionGroup):
  18. self.conditionString = conditionString
  19. self.conditionGroup = conditionGroup
  20. def __str__(self):
  21. return "%s\t%s"%(self.conditionString,str(self.conditionGroup))
  22. class TemperatureRecord:
  23. def __init__(self,time,filename,line):
  24. self.time = time
  25. self.filename = filename
  26. self.line = line
  27. def setAttribute(self,attribute,value):
  28. if not hasattr(self, attribute):
  29. setattr(self, attribute, value)
  30. def setCondition(self,cond):
  31. if not hasattr(self, 'condition'):
  32. self.condition=cond
  33. def setTemp(self, temp):
  34. if not hasattr(self, 'temp'):
  35. self.temp = temp
  36. def setFeelLike(self, feelLike):
  37. if not hasattr(self, 'feelLike'):
  38. self.feelLike = feelLike
  39. def setHumidity(self,hum):
  40. if not hasattr(self, 'humidity'):
  41. self.humidity = hum
  42. def setPrecipation(self,pre):
  43. if hasattr(self, 'precipitation'):
  44. print self.precipitation
  45. if not hasattr(self, 'precipitation'):
  46. self.precipation = pre
  47. def setWind(self,direction,speed):
  48. if not hasattr(self, 'direction'):
  49. self.direction=direction
  50. self.speed = speed
  51. #print str(self)
  52. # fout.write(str(self)+"\n")
  53. def check(self):
  54. try:
  55. a = self.direction
  56. a = self.speed
  57. a = self.condition
  58. a = self.feelLike
  59. a = self.temp
  60. a = self.time
  61. except AttributeError:
  62. print "bad time",self.time,self.filename
  63. raise
  64. def __str__(self):
  65. outFormat = "%Y_%m_%d_%H"#"%A_%H"
  66. try:
  67. return "%s\t%d\t%d\t%d\t%s\t%d\t%s\t%d"%(self.time.strftime(outFormat),self.temp,self.feelLike,self.humidity,self.condition,self.precipation,self.direction,self.speed)
  68. except AttributeError:
  69. print "bad",self.time,self.filename,self.line
  70. raise
  71. def getTime(self):
  72. return time.mktime(self.time.timetuple())
  73. class OneDayRecords:
  74. def __init__(self):
  75. self.map = {}
  76. # def __init__(self,day):
  77. # self.day = day
  78. # self.init()
  79. def append(self,record):
  80. if not self.map.has_key(record.time.hour):
  81. self.map[record.time.hour] = record
  82. def getRecords(self):
  83. #print self.map.values()
  84. return self.map.values()
  85. def roundToNereastHour(time):
  86. retval = datetime(time.year,time.month,time.day,time.hour)
  87. if time.minute>30:
  88. retval+=timedelta(hours=1)
  89. return retval
  90. def getTimeDelta(t1,t2):
  91. if t1>t2:
  92. return t1-t2
  93. else:
  94. return t2-t1
  95. def parseStore(htmlFile,day,timeToRecord={}):
  96. # pass
  97. #
  98. #def parseStore(htmlFile, d):
  99. retval = {}
  100. hourToTsp = {}
  101. fd = open(htmlFile,'r')
  102. # print htmlFile
  103. # ftemp = tempfile.TemporaryFile()#open("tempdata.txt",'w')
  104. # 2010-06-05 22:04:00
  105. format = "%I:%M %p"
  106. timeRe = re.compile('<td valign="middle" class="inDentA".*<b>(.*)</b></font></td>')
  107. tempRe = re.compile('<td valign="middle" align="left".*>(.+)<b>(-?\d+)&deg;F</b></td>')
  108. feltRe = re.compile('<td align="center" valign="middle" class="blueFont10" bgcolor="#.+"><b>(\-*\d+)&deg;F</b></td>')
  109. dewpRe = re.compile('<td align="center" valign="middle" class="blueFont10" bgcolor="#.+">(\-*\d+)&deg;F</td>') #<td align="center" valign="middle" class="blueFont10" bgcolor="#f1f4f5">30&deg;F</td>
  110. humiRe = re.compile('<td align="center" valign="middle" class="blueFont10" bgcolor="#.+">(\d+)%</td>') #<td align="center" valign="middle" class="blueFont10" bgcolor="#f1f4f5">92%</td>
  111. visiRe = re.compile('<td align="center" valign="middle" class="blueFont10" bgcolor="#.+">(\d+\.\d+)<BR>miles</td>') #<td align="center" valign="middle" class="blueFont10" bgcolor="#f1f4f5">9.0<BR>miles</td>
  112. presRe = re.compile('<td align="center" valign="middle" class="blueFont10" bgcolor="#.+">(\d+\.\d+)$') #<td align="center" valign="middle" class="blueFont10" bgcolor="#f1f4f5">30.01
  113. windRe = re.compile('<td align="center" valign="middle" class="blueFont10" bgcolor="#.+">(.+)</td>') #<td align="center" valign="middle" class="blueFont10" bgcolor="#f1f4f5">CALM</td>
  114. nextHour=0
  115. dayJump=False
  116. # arrayDate = []
  117. # arrayOut = []
  118. currentRecord = None
  119. discardRecord = False
  120. for line in fd:
  121. line = line.strip()
  122. if not line:
  123. continue
  124. #print line
  125. # Get time
  126. r = timeRe.search(line)
  127. if r:
  128. repeat = False
  129. #print r.group(0)
  130. tsp = r.group(1)
  131. t = datetime.strptime(tsp,format)
  132. t = datetime(day.year,day.month,day.day,t.hour,t.minute)
  133. nearestHour = roundToNereastHour(t)
  134. currentRecord = TemperatureRecord(t, htmlFile, line)
  135. if timeToRecord.has_key(nearestHour):
  136. r = timeToRecord[nearestHour]
  137. deltaPrev = getTimeDelta(r.time,nearestHour)
  138. deltaNow = getTimeDelta(t,nearestHour)
  139. if deltaNow<deltaPrev:
  140. timeToRecord[nearestHour] = currentRecord
  141. discardRecord = False
  142. else:
  143. discardRecord = True
  144. else:
  145. timeToRecord[nearestHour] = currentRecord
  146. discardRecord = False
  147. # try:
  148. # if prevNearestHour == nearestHour:
  149. # repeat = True
  150. # except:
  151. # None
  152. #
  153. # try:
  154. # prevRecord=currentRecord
  155. # except:
  156. # prevOut=None
  157. # out = str(nearestHour.year)+"\t"+str(nearestHour.month)+"\t"+str(nearestHour.day)+"\t"+str(nearestHour.hour)+"\t"+str(nearestHour)
  158. # if seconds<deltaNow.seconds:
  159. # continue
  160. continue
  161. # Get condition and temperature
  162. #<td valign="middle" align="left" class="blueFont10">Light Rain and Freezing Rain <b>32&deg;F</b></td>
  163. if discardRecord:
  164. continue
  165. r = tempRe.search(line)
  166. if(r):
  167. #print r.group(0)
  168. cond = r.group(1)
  169. temp = r.group(2)
  170. cond=cond.strip().lower()
  171. condOrig=cond
  172. if cond in conditions:
  173. cond = conditions[cond]
  174. #print cond
  175. # out+="\t"+str(cond)+"\t"+str(condOrig)+"\t"+str(temp)
  176. c = WeatherCondition(condOrig,cond)
  177. currentRecord.setTemp(temp.strip())
  178. currentRecord.setCondition(c)
  179. #print out
  180. prevNearestHour = nearestHour
  181. continue
  182. #Felt like
  183. #<td align="center" valign="middle" class="blueFont10" bgcolor="#f1f4f5"><b>32&deg;F</b></td>
  184. r = feltRe.search(line)
  185. if(r):
  186. # out+="\t"+r.group(1)
  187. currentRecord.setFeelLike(r.group(1).strip())
  188. continue
  189. #Dew Point
  190. #<td align="center" valign="middle" class="blueFont10" bgcolor="#f1f4f5">30&deg;F</td>
  191. r = dewpRe.search(line)
  192. if(r):
  193. # out+="\t"+r.group(1)
  194. currentRecord.setAttribute("Dew", r.group(1).strip())
  195. continue
  196. #Humidity
  197. #<td align="center" valign="middle" class="blueFont10" bgcolor="#f1f4f5">92%</td>
  198. r = humiRe.search(line)
  199. if(r):
  200. # out+="\t"+r.group(1)
  201. currentRecord.setHumidity(r.group(1).strip())
  202. # currentRecord.setAttribute("Humidity", r.group(1).strip())
  203. continue
  204. #Visibility
  205. #<td align="center" valign="middle" class="blueFont10" bgcolor="#f1f4f5">9.0<BR>miles</td>
  206. r = visiRe.search(line)
  207. if(r):
  208. # out+="\t"+r.group(1)
  209. currentRecord.setAttribute("Visibility", r.group(1))
  210. continue
  211. #Pressure
  212. #<td align="center" valign="middle" class="blueFont10" bgcolor="#f1f4f5">30.01
  213. #<IMG SRC="http://image.weather.com/web/common/icons/steady_pressure.gif?20061207" WIDTH="5" HEIGHT="8" BORDER="0" ALT="steady">
  214. r = presRe.search(line)
  215. if(r):
  216. # out+="\t"+r.group(1)
  217. currentRecord.setAttribute("Pressure", r.group(1))
  218. continue
  219. #Wind
  220. #<td align="center" valign="middle" class="blueFont10" bgcolor="#f1f4f5">CALM</td>
  221. r = windRe.search(line)
  222. if(r):
  223. wind = r.group(1)
  224. if wind == 'CALM':
  225. wind = 0
  226. else:
  227. index1 = string.index(wind, '<BR>')
  228. index2 = string.index(wind, 'mph')
  229. wind = wind[index1+4:index2]
  230. currentRecord.setWind("DIRECTION", wind)
  231. # out+="\t"+str(wind)
  232. #if not repeat:
  233. #print out
  234. # Print things out
  235. #print str(nextHour)+">"+str(nearestHour.hour)+"--------"+out
  236. #if not repeat:
  237. #if nextHour==nearestHour.hour:
  238. #print str(nextHour-nearestHour.hour)+" - "+str(nextHour)+" - "+str(nearestHour.hour)+"\t"+out
  239. #else:
  240. #while nextHour<nearestHour.hour and (nextHour-nearestHour.hour)>0:
  241. #print str(nextHour-nearestHour.hour)+" - "+str(nextHour)+" - "+str(nearestHour.hour)+"\t"+out+" added"
  242. #nextHour = (nextHour+1)%24
  243. #print str(nextHour-nearestHour.hour)+" - "+str(nextHour)+" - "+str(nearestHour.hour)+"\t"+out+" after adding"
  244. #nextHour = (nextHour+1)%24
  245. if nearestHour.hour>2:
  246. dayJump=True
  247. #check = nextHour-nearestHour.hour
  248. #done=False
  249. #if check == 0:
  250. ##print str(nextHour-nearestHour.hour)+" - "+str(nextHour)+" - "+str(nearestHour.hour)+"\t"+out
  251. #print out
  252. #done=True
  253. #if check<0 and check>-10:
  254. #nextHour=nextHour+1
  255. ##print str(nextHour-nearestHour.hour)+" - "+str(nextHour)+" - "+str(nearestHour.hour)+"\t"+prevOut
  256. ##print str(nextHour-nearestHour.hour)+" - "+str(nextHour)+" - "+str(nearestHour.hour)+"\t"+out
  257. #print prevOut
  258. #print out
  259. #done=True
  260. ## Day jump
  261. #if check<=-10:
  262. #if not dayJump:
  263. ##print str(nextHour-nearestHour.hour)+" - "+str(nextHour)+" - "+str(nearestHour.hour)+"\t"+out+" extrange"
  264. #print out
  265. #dayJump=True
  266. #nextHour=nextHour-1
  267. #done=True
  268. #if check>0:
  269. ##print str(nextHour-nearestHour.hour)+" - "+str(nextHour)+" - "+str(nearestHour.hour)+"\t"+prevOut+" skipped"
  270. #nextHour=nextHour-1
  271. #done=True
  272. #if not done:
  273. ##print str(nextHour-nearestHour.hour)+" - "+str(nextHour)+" - "+str(nearestHour.hour)+"\t"+out+" other situation"
  274. #print out+" other situation"
  275. #nextHour = (nextHour+1)%24
  276. #print out
  277. # arrayOut.append(out)
  278. # arrayDate.append(nearestHour)
  279. continue
  280. fd.close()
  281. return timeToRecord
  282. # if len(arrayDate)==0:
  283. # return retval
  284. #
  285. # arrayDate2 = []
  286. # arrayOut2 = []
  287. # # Remove dupe
  288. #
  289. # #some times the first enrty may contain 12am of next day
  290. # #target = datetime(arrayDate[1].year,arrayDate[1].month,arrayDate[1].day,0)
  291. #
  292. # #print arrayDate
  293. #
  294. # i = 0
  295. # while arrayDate[i].hour<>0 or arrayDate[i].day<>arrayDate[i+1].day:
  296. # arrayDate.pop(i)
  297. # arrayOut.pop(i)
  298. #
  299. # #print arrayDate
  300. #
  301. # for i in range(0,len(arrayDate)):
  302. # #print i, arrayDate[i], arrayDate[i]-timedelta(hours=1), arrayDate[i-1]
  303. # if i==0:
  304. # #parts = arrayDate[0].split("\t")
  305. # #hour = arrayDate[0].hour#int(parts[3])
  306. # #if hour==23:
  307. # # arrayDate2.append(arrayDate[1])
  308. # # arrayOut2.append(arrayOut[1])
  309. # #else:
  310. # arrayDate2.append(arrayDate[i])
  311. # arrayOut2.append(arrayOut[i])
  312. # elif (arrayDate[i]-timedelta(hours=1)) == arrayDate[i-1]:
  313. # arrayDate2.append(arrayDate[i])
  314. # arrayOut2.append(arrayOut[i])
  315. # #if (arrayDate[i]-timedelta(hours=23))==target or (arrayDate[i]-timeDelta[hours=1])
  316. #
  317. # i=1
  318. # iniHour = arrayDate2[0]
  319. # iniHour = datetime(iniHour.year,iniHour.month,iniHour.day,0)
  320. # #print arrayOut2[0]
  321. # ftemp.write(arrayOut2[0]+"\n")
  322. # for h in range(1,24):
  323. # #print h, i, len(arrayDate2)
  324. # if (i<len(arrayDate2)) and arrayDate2[i].day == iniHour.day:
  325. # #print arrayOut2[i]
  326. # ftemp.write(arrayOut2[i]+"\n")
  327. # else:
  328. # i = i+1
  329. # if (i<len(arrayDate2)):
  330. # #print arrayOut2[i]
  331. # ftemp.write(arrayOut2[i]+"\n")
  332. #
  333. # if (i<len(arrayDate2)) and arrayDate2[i]==(iniHour+timedelta(hours=h)):
  334. # i = i+1
  335. #
  336. # #ftemp.close()
  337. #
  338. # ftemp.seek(0)# = open("tempdata.txt",'r')
  339. # for line in ftemp:
  340. # #print line.split("\t")
  341. # str1 = line.split("\t")[0]+"_"+line.split("\t")[1]+"_"+line.split("\t")[2]+"_"+line.split("\t")[3]
  342. # str1 += "\t"+line.split("\t")[7]+"\t"+line.split("\t")[8]+"\t"+line.split("\t")[10]+"\t"+line.split("\t")[6]
  343. # str1 += "\t"+"10"+"\t"+"NNW"+"\t"+line.split("\t")[13]
  344. # fout.write(str1)
  345. # ftemp.close()
  346. # Let's double check to see if we have all the required hour
  347. #startTime = datetime(day.year,day.month,day.day)
  348. #oneHour = timedelta(hours=1)
  349. #endTime = startTime + timedelta(days=1)
  350. #while(startTime<endTime):
  351. #if not retval.has_key(startTime):
  352. #print location,startTime
  353. #startTime+=oneHour
  354. return retval
  355. def fromDirToTime(x):
  356. return int(time.mktime(time.strptime(os.path.basename(x), getWeatherPrediction.format)))
  357. def execCmd(cmd):
  358. try:
  359. retcode = subprocess.call(cmd, shell=True)
  360. if retcode < 0:
  361. print >>sys.stderr,"cmd return code",retcode,cmd
  362. print >>sys.stderr, "Child was terminated by signal", -retcode
  363. raise()
  364. except OSError, e:
  365. print >>sys.stderr,"cmd failed",cmd
  366. print >>sys.stderr, "Execution failed:", e
  367. raise()
  368. def process(date, num_hours=24, path="."):
  369. global outputFormat
  370. #global fout
  371. #global d
  372. # fout = tempfile.TemporaryFile()#= open("full_fore.txt","w")
  373. loc = 'nj'
  374. url = "http://www.weather.com/weather/pastweather/hourly/"
  375. d = date
  376. if d.minute>0:
  377. d = datetime(d.year,d.month,d.day,d.hour)
  378. #d += timedelta(hours=1)
  379. date = d
  380. # date = d - timedelta(hours=2)
  381. d = datetime(date.year,date.month,date.day)#date
  382. #print date
  383. # num_hours += 2
  384. # print "process",date,num_hours,d
  385. timeToRecords = {}
  386. while d < date+timedelta(hours=num_hours+1):
  387. url = "http://www.weather.com/weather/pastweather/hourly/"
  388. url += "%s?when=%s&stn=0"%(zipcodes[loc],d.strftime("%m%d%y"))
  389. htmlFile = path+"/htmlarchive/%s_%d_%d_%d.html"%(loc,d.year,d.month,d.day)
  390. #print htmlFile
  391. # Download if they cannot be found
  392. if not os.path.isfile(htmlFile):
  393. #print url
  394. cmd = 'wget -O %s -o /dev/null "%s"'%(htmlFile,url)
  395. execCmd(cmd)
  396. else:
  397. if os.path.getsize(htmlFile)==0:
  398. cmd = 'wget -O %s -o /dev/null "%s"'%(htmlFile,url)
  399. execCmd(cmd)
  400. parseStore(htmlFile,d,timeToRecords)
  401. #print d
  402. d += timedelta(days=1)
  403. # keys = timeToRecords.keys()
  404. # keys.sort()
  405. #
  406. # print keys
  407. # print "++++++++++++++++++++++++++++"
  408. #fout.close()
  409. # finp = fout#open("full_fore.txt","r")
  410. # finp.seek(0)
  411. # fout = tempfile.TemporaryFile()#open("fore.txt","w")
  412. dates = timeToRecords.keys()
  413. dates.sort()
  414. # shift = date.hour
  415. # i = 0
  416. # print shift
  417. retval = {}
  418. lastDate = None
  419. tdelta = timedelta(hours=1)
  420. for d in dates:
  421. # if shift>0:
  422. # shift -= 1
  423. # continue
  424. #print line.split("\t")
  425. r = timeToRecords[d]
  426. if conditions.has_key(r.condition.conditionString):
  427. tagint = conditions[r.condition.conditionString]
  428. else:
  429. print "key missing", r.condition.conditionString
  430. tagint = conditions[weatherPrediction.long_substr(r.condition.conditionString)]
  431. # str1 = "%s\t%s\t%d"%(d.strftime(outputFormat),r.condition.conditionString,tagint)
  432. r.condition.conditionGroup = tagint
  433. retval[d] = r.condition
  434. if lastDate:
  435. lastDate=lastDate+tdelta
  436. while lastDate<d:
  437. retval[lastDate] = WeatherCondition("Unkown", -1)
  438. lastDate=lastDate+tdelta
  439. lastDate = d
  440. # fout.write(str1+"\n")
  441. # i += 1
  442. # if i==num_hours:
  443. # break
  444. # finp.close()
  445. # #fout.close()
  446. # fout.seek(0)
  447. #
  448. # return fout
  449. return retval
  450. if __name__ == '__main__':
  451. print "past weather"
  452. now = datetime(2010, 8, 23, 9, 0,0)
  453. retval = process(now)
  454. keys = retval.keys()
  455. keys.sort()
  456. for k in keys:
  457. print k.strftime(outputFormat),retval[k]
  458. # for line in fd:
  459. # print line.strip()
  460. # fd.close()