PageRenderTime 61ms CodeModel.GetById 30ms RepoModel.GetById 1ms app.codeStats 0ms

/plugins/video/XBMCEyetvScraper/default.py

http://xbmc-addons.googlecode.com/
Python | 1036 lines | 862 code | 80 blank | 94 comment | 87 complexity | 57934512db14be59fc782cd374894b88 MD5 | raw file
Possible License(s): AGPL-1.0, GPL-3.0, GPL-2.0, BSD-2-Clause

Large files files are truncated, but you can click here to view the full file

  1. #!/usr/bin/env python
  2. # XBMCEyetvScraper
  3. # version 1.53
  4. # by prophead
  5. # ThumbnailOverlayGenerator by Nic Wolfe (midgetspy)
  6. # user variables, these are overidden by the gui settings
  7. # ---------------
  8. # my default path
  9. path = '/Volumes/RAID/Movies/EyeTV/EyeTV Archive'
  10. # my TV Show linked directory path
  11. dummy = '/Volumes/RAID/Movies/TV'
  12. # TOG
  13. # requires Imagemagick, and ffmpeg
  14. # 0=no thumbail overlays, 1=try thumbnail overlays
  15. tog = 1
  16. # TV Show linked directory (library) support
  17. # 0=no linked directory support, 1=try to create linked directory
  18. LDS = 1
  19. # nfo file support
  20. # requires LDS
  21. # 0=no nfo file support, 1=try to create nfo files from Eyetv/thetvdb.com EPG data
  22. nfo = 1
  23. # thetvdb.com scraper support
  24. # requires nfo
  25. # 0=use Eyetv EPG data, 1=try to scrape thetvdb.com for data
  26. STVDB = 1
  27. # 1=Extended output in log
  28. DEBUG = 0
  29. # ---------------
  30. import os, re, glob, sys
  31. import xbmc,xbmcgui,xbmcplugin
  32. from datetime import datetime
  33. # set settings (Eyetv folder) from the XBMC interface
  34. def get_settings():
  35. settings = {}
  36. try:
  37. settings['path'] = xbmcplugin.getSetting( 'path' )
  38. settings['dummy'] = xbmcplugin.getSetting( 'dummy' )
  39. settings['EyetvTOG'] = xbmcplugin.getSetting( 'EyetvTOG' )
  40. return settings
  41. except:
  42. print "couldn't load settings"
  43. pass
  44. # test for Boxee
  45. #BoxeePath = os.getcwd()[:-1]+"/"
  46. #p=re.compile('.+(oxee).+')
  47. #m=p.match(BoxeePath)
  48. #if not m:
  49. # get settings from gui settings
  50. settings=get_settings()
  51. path=settings['path']
  52. dummy=settings['dummy']
  53. tog=settings['EyetvTOG']
  54. if tog == "true":
  55. tog = 1
  56. #else:
  57. # print "[XBMCEyetvScraper] - Boxee"
  58. def go_tog(cmd, scandir):
  59. # Author: Nic Wolfe (midgetspy)
  60. # Contact: PM me on the xbmc.org forums
  61. # Version: 0.3.3.EM
  62. ###########################
  63. ## Configuration Options ##
  64. ###########################
  65. # relative size of the overlays
  66. IMG_HEIGHT = 0.24
  67. # default to macport standards
  68. FFMPEG_PATH = '/opt/local/bin'
  69. FFMPEG_PROCESS = 'ffmpeg'
  70. IMAGEMAGICK_PATH = '/opt/local/bin'
  71. IMAGEMAGICK_IDENTIFY_PROCESS = 'identify'
  72. IMAGEMAGICK_COMPOSITE_PROCESS = 'composite'
  73. IMAGEMAGICK_CONVERT_PROCESS = 'convert'
  74. AUTOSCAN_EXTENSIONS = ('.mpg')
  75. # moved to global
  76. # 1=Extended output in log
  77. # DEBUG = 1
  78. # put the dimensions of your logos here (height/width)
  79. LOGO_RATIO = 1024/1024
  80. #################################################################################################################
  81. #################################################################################################################
  82. #################################################################################################################
  83. # Don't touch below this line unless you know what you're doing.
  84. SUCCESS = 1
  85. FAILURE = 2
  86. SKIPPED = 3
  87. IMG_MARGIN = 0.02
  88. import sys
  89. import shutil
  90. import os
  91. import re
  92. import subprocess
  93. def checkDependencies ():
  94. result = SUCCESS
  95. if not os.path.isfile(os.path.join(FFMPEG_PATH, FFMPEG_PROCESS)):
  96. print "ERROR: unable to find ffmpeg. Verify that it's installed and that FFMPEG_PATH is set correctly."
  97. result = FAILURE
  98. if not os.path.isfile(os.path.join(IMAGEMAGICK_PATH, IMAGEMAGICK_IDENTIFY_PROCESS)):
  99. print "ERROR: unable to find identify (part of ImageMagick). Verify that it's installed and that IMAGEMAGICK_PATH is set correctly."
  100. result = FAILURE
  101. if not os.path.isfile(os.path.join(IMAGEMAGICK_PATH, IMAGEMAGICK_COMPOSITE_PROCESS)):
  102. print "ERROR: unable to find composite (part of ImageMagick). Verify that it's installed and that IMAGEMAGICK_PATH is set correctly."
  103. result = FAILURE
  104. if result != SUCCESS:
  105. print "Quitting..."
  106. sys.exit()
  107. def doFile (filename, results=None):
  108. print "Doing overlays for "+filename
  109. if results == None:
  110. ffmpegResults = parseffmpegInfo(getffmpegInfo(filename))
  111. else:
  112. ffmpegResults = results
  113. if DEBUG:
  114. print "DEBUG: INFO: Scanned"+filename+"got", ffmpegResults
  115. if ffmpegResults == None:
  116. print "ERROR: unable to scan "+filename+" with ffmpeg"
  117. return FAILURE
  118. thumbName = changeExtension(filename, "tbn")
  119. thumbOrigName = changeExtension(filename, "tbn-orig")
  120. eyethumb = changeExtension(filename, "tiff")
  121. # if they have only tbn-orig then they want me to regenerate the thumb
  122. if os.path.isfile(thumbOrigName) and not os.path.isfile(thumbName):
  123. shutil.copyfile(thumbOrigName, thumbName)
  124. # if they have only a tbn (and not an eyetv tiff) I need to back it up then make the thumb
  125. elif os.path.isfile(thumbName) and not os.path.isfile(thumbOrigName) and not os.path.isfile(eyethumb):
  126. shutil.copyfile(thumbName, thumbOrigName)
  127. # if they have both I'm assuming it's done and leaving it alone
  128. elif os.path.isfile(thumbName) and os.path.isfile(eyethumb):
  129. print filename+" already has a generated thumb, skipping"
  130. return SKIPPED
  131. # no thumb files means nothing I can do
  132. # added support for no thumb, grab thumb from tiff
  133. else:
  134. thumbName = changeExtension(filename, "jpg")
  135. thumbOrigName = changeExtension(filename, "tiff")
  136. if os.path.isfile(thumbOrigName):
  137. print filename+" converting eyetv tiff icon to jpg icon"
  138. # convert to jpg
  139. convProcCmd = os.path.join(IMAGEMAGICK_PATH, IMAGEMAGICK_CONVERT_PROCESS)+' \"'+thumbOrigName+'\" \"'+thumbName+'\"'
  140. # convProcCmd = convProcCmd.replace("'", "\\'")
  141. # convProcCmd = convProcCmd.replace('\"', '\\"')
  142. if DEBUG:
  143. print 'DEBUG: Convert command: '+convProcCmd
  144. os.system(convProcCmd)
  145. # use XBMC tbn standard
  146. XBMCthumbName = changeExtension(filename, "tbn")
  147. shutil.move(thumbName, XBMCthumbName)
  148. thumbName = XBMCthumbName
  149. else:
  150. print "ERROR: no thumbnail found for"+filename
  151. return FAILURE
  152. # get thumb size
  153. identifyProcCmd = (os.path.join(IMAGEMAGICK_PATH, IMAGEMAGICK_IDENTIFY_PROCESS), '-ping', thumbName)
  154. if DEBUG:
  155. print 'DEBUG: Identify command: ', identifyProcCmd
  156. identifyProc = subprocess.Popen(identifyProcCmd, stdout=subprocess.PIPE)
  157. identifyOutput = identifyProc.communicate()[0]
  158. thumbSize = [int(a) for a in identifyOutput[len(thumbName)+1:-1].split(' ')[1].split('x')]
  159. #thumbSize[0]=int(1024)
  160. #thumbSize[1]=int(1024)
  161. if DEBUG:
  162. print 'DEBUG: thumbsize=', thumbSize
  163. # use ImageMagick to overlay "images/" + ffmpegResults[1] + ".png" onto thumbName bottom left corner
  164. videoOverlayFilename = os.path.join(sys.path[0], 'EyetvTOG/images', ffmpegResults[1] + '.png')
  165. if not os.path.isfile(videoOverlayFilename):
  166. print "WARN: Couldn't find "+videoOverlayFilename+" skipping video overlay"
  167. else:
  168. videoImgProcCmd = (os.path.join(IMAGEMAGICK_PATH, IMAGEMAGICK_COMPOSITE_PROCESS), '-compose', 'atop', '-geometry', '1000x' + str(int(thumbSize[1]*IMG_HEIGHT)) + '+' + str(int(thumbSize[0]*IMG_MARGIN)) + '+' + str(int(thumbSize[1]*(1-IMG_HEIGHT)-thumbSize[0]*IMG_MARGIN)), videoOverlayFilename, thumbName, thumbName)
  169. videoImgProc = subprocess.Popen(videoImgProcCmd, stderr=subprocess.PIPE)
  170. videoImgProcOutput = videoImgProc.communicate()
  171. #check stderr for errors
  172. if len(videoImgProcOutput[1]) > 0:
  173. print 'ERROR: '
  174. print videoImgProcOutput[1]
  175. # use ImageMagick to overlay "images/" + ffmpegResults[3] + ".png" onto thumbName in bottom right corner
  176. audioOverlayFilename = os.path.join(sys.path[0], 'EyetvTOG/images', ffmpegResults[3] + '.png')
  177. if not os.path.isfile(audioOverlayFilename):
  178. print "WARN: Couldn't find "+audioOverlayFilename+" skipping audio overlay"
  179. else:
  180. audioImgProcCmd = (os.path.join(IMAGEMAGICK_PATH, IMAGEMAGICK_COMPOSITE_PROCESS), '-compose', 'atop', '-geometry', '1000x' + str(int(thumbSize[1]*IMG_HEIGHT)) + '+' + str(int(thumbSize[0]-thumbSize[1]*(IMG_HEIGHT/LOGO_RATIO)-thumbSize[0]*IMG_MARGIN)) + '+' + str(int(thumbSize[1]*(1.0-IMG_HEIGHT)-thumbSize[0]*IMG_MARGIN)), audioOverlayFilename, thumbName, thumbName)
  181. audioImgProc = subprocess.Popen(audioImgProcCmd, stderr=subprocess.PIPE)
  182. audioImgProcOutput = audioImgProc.communicate()
  183. # check stderr for errors
  184. if len(audioImgProcOutput[1]) > 0:
  185. print 'ERROR: '
  186. print audioImgProcOutput[1]
  187. return SUCCESS
  188. def revertFile (filename):
  189. thumbName = changeExtension(filename, "tbn")
  190. thumbOrigName = changeExtension(filename, "tbn-orig")
  191. # if they have tbn-orig then I restore for them
  192. if os.path.isfile(thumbOrigName):
  193. shutil.copyfile(thumbOrigName, thumbName)
  194. os.remove(thumbOrigName)
  195. return SUCCESS
  196. # if they have only a tbn there's nothing I can do
  197. elif os.path.isfile(thumbName):
  198. print "WARN: no tbn-orig file for "+filename+" unable to revert"
  199. return FAILURE
  200. # no thumb files means nothing I can do
  201. else:
  202. print "WARN: no thumbnail for"+filename
  203. return SKIPPED
  204. def changeExtension (filename, newExtension):
  205. "Replaces filename's extension with newExtension"
  206. stubName = filename[:filename.rfind(".")+1]
  207. return stubName + newExtension
  208. def getffmpegInfo (filename):
  209. ffmpegProcCmd = (os.path.join(FFMPEG_PATH, FFMPEG_PROCESS), '-i', filename)
  210. if DEBUG:
  211. print 'DEBUG: Command string:', ffmpegProcCmd
  212. ffmpegProc = subprocess.Popen(ffmpegProcCmd, stderr=subprocess.PIPE)
  213. output = ffmpegProc.communicate()[1].split('\n')
  214. if DEBUG:
  215. print "DEBUG: ffmpeg output:", output
  216. return output
  217. def parseffmpegInfo (output):
  218. streamRegex = re.compile("\s+Stream #0\.(\d)(\[\w+\])?.*?: (Video|Audio): (.+)")
  219. videoCodec = None
  220. videoResolution = None
  221. audioCodec = None
  222. audioChannels = None
  223. for line in output:
  224. if DEBUG:
  225. print "PARSING:"+line
  226. streamResult = streamRegex.match(line)
  227. if streamResult != None:
  228. params = streamResult.group(4).split(", ")
  229. if streamResult.group(3) == "Video" and videoCodec == None and videoResolution == None:
  230. if len(params) < 3:
  231. continue
  232. print "PARSING: Found video line containing "+params[0]+" and "+params[2]
  233. videoCodec = params[0]
  234. videoResolution = params[2]
  235. elif streamResult.group(3) == "Audio" and audioCodec == None and audioChannels == None:
  236. print "PARSING: Found audio line containing "+params[0]+" and "+params[2]
  237. audioCodec = params[0]
  238. audioChannels = params[2]
  239. elif DEBUG:
  240. print "PARSING: Line didn't match known results format."
  241. if (videoCodec == None or videoResolution == None) and (audioCodec == None or audioChannels == None):
  242. return None
  243. return [videoCodec, parseVideoResolution(videoResolution), parseAudioCodec(audioCodec), parseAudioChannels(audioChannels)]
  244. def parseAudioCodec (codec):
  245. if codec == "0x0000":
  246. return "AC3"
  247. elif codec == "mp3":
  248. return "MP3"
  249. elif codec == "dca":
  250. return "DTS"
  251. else:
  252. return "??"
  253. def parseAudioChannels (channels):
  254. if channels == "5:1":
  255. return "5.1"
  256. elif channels == "7:1":
  257. return "7.1"
  258. else:
  259. return channels
  260. def parseVideoResolution (resolution):
  261. if DEBUG:
  262. print 'DEBUG: Parsing resolution to tuple:'+resolution
  263. width = int(resolution.split("x")[0])
  264. if width < 1280:
  265. return "SD"
  266. elif 1280 <= width < 1920:
  267. return "720p"
  268. elif width >= 1920:
  269. return "1080p"
  270. else:
  271. return "??"
  272. def scanAllFiles (file):
  273. i = 0
  274. good = 0
  275. bad = 0
  276. skipped = 0
  277. # for file in files:
  278. # if it's a folder, scan it automatically for all applicable files (recursive)
  279. #if os.path.isdir(file):
  280. #
  281. # results = scanAllFiles (filter(lambda x: x.endswith(AUTOSCAN_EXTENSIONS) or os.path.isdir(x), [os.path.join(file, x) for x in os.listdir(file)]))
  282. # good += results[0]
  283. # bad += results[1]
  284. # skipped += results[2]
  285. #
  286. #else:
  287. i += 1
  288. # print "PDEBUG- file-"+file
  289. result = doFile(file)
  290. if result == SUCCESS:
  291. good += 1
  292. elif result == SKIPPED:
  293. skipped += 1
  294. else:
  295. bad += 1
  296. return (good, bad, skipped)
  297. def revertAllFiles (files):
  298. good = 0
  299. bad = 0
  300. skipped = 0
  301. for file in files:
  302. # if it's a folder, scan it automatically for all mkv and avi files (recursive)
  303. if os.path.isdir(file):
  304. results = revertAllFiles (filter(lambda x: x.endswith(AUTOSCAN_EXTENSIONS) or os.path.isdir(x), [os.path.join(file, x) for x in os.listdir(file)]))
  305. good += results[0]
  306. bad += results[1]
  307. skipped += results[2]
  308. else:
  309. result = revertFile(file);
  310. if result == SUCCESS:
  311. good += 1
  312. elif result == SKIPPED:
  313. skipped += 1
  314. else:
  315. bad += 1
  316. return (good, bad, skipped)
  317. def getSyntax (name):
  318. return 'Syntax: python ' + name + ' <-scan|-revert> [file/folder list]\n\tpython ' + name + ' -force <1080p|720p|SD> <7.1|5.1|stereo|mono> <filename>'
  319. # main TOG function
  320. checkDependencies()
  321. if len(sys.argv) == 1:
  322. print getSyntax(sys.argv[0])
  323. elif sys.argv[1] == '-revert' and len(sys.argv) >= 3:
  324. results = revertAllFiles(sys.argv[2:])
  325. print 'Successfully reverted overlays on'+results[0]+'of'+results[0]+results[1]+results[2]+'thumbnails (' + str(results[1]) + ' failures)'
  326. # this is the only option used in this context
  327. elif cmd == '-scan':
  328. results = scanAllFiles(scandir)
  329. print 'Successfully created overlays on '+str(results[0])+' of 1 thumbnails (' + str(results[1]) + ' failures)'
  330. elif sys.argv[1] == '-force' and len(sys.argv) == 5:
  331. doFile(sys.argv[4], ('', sys.argv[2], '', sys.argv[3]))
  332. else:
  333. print getSyntax(sys.argv[0])
  334. # EOT
  335. # check eyetv dir
  336. for dirpath, dirnames, filenames in os.walk(path):
  337. # ignore eyetvsched files
  338. if dirpath.endswith(".eyetv"):
  339. # ignore eyetv buffer (for now)
  340. if not dirpath.endswith("Live TV Buffer.eyetv"):
  341. # print 'Directory', dirpath
  342. for filename in filenames:
  343. if filename.endswith(".mpg"):
  344. fqname=dirpath+'/'+filename
  345. fqname = unicode(fqname, "utf-8" )
  346. # print fqname
  347. p=re.compile('.+/(.+).eyetv')
  348. m=p.match(dirpath)
  349. shortdirpath=m.group(1)
  350. shortdirpath = unicode( shortdirpath, "utf-8" )
  351. # match icon name
  352. p=re.compile('(.+)\.mpg')
  353. m=p.match(fqname)
  354. tbn=m.group(1)
  355. tifftbn=tbn+'.tiff'
  356. # detect tbn /integrate with tog
  357. tbn=tbn+'.tbn'
  358. if os.path.isfile(tifftbn) and not os.path.isfile(tbn):
  359. # convert Unicode string to regular string
  360. # togname = str(fqname)
  361. togname = fqname.encode( 'utf-8' )
  362. togname = togname.replace("'", "\'")
  363. if tog == 1:
  364. try:
  365. go_tog('-scan', togname)
  366. except:
  367. print "Failed to set thumbnail overlay for:"+togname
  368. pass
  369. # if TOG fails fallback to tiff tbns
  370. if os.path.isfile(tifftbn) and not os.path.isfile(tbn):
  371. tbn = tifftbn
  372. statinfo = os.stat(fqname)
  373. size = statinfo.st_size
  374. filedate = statinfo.st_ctime
  375. objdate=datetime.fromtimestamp(filedate)
  376. date=objdate.strftime("%d/%m/%Y")
  377. # %H:%M:%S")
  378. # print size, date
  379. # open Eyetv program info xml file
  380. for file in os.listdir(dirpath):
  381. if( re.search('.eyetvp$', file)):
  382. filePl = dirpath+"/"+file
  383. #fObj=dirpath+'/*.eyetvp'
  384. #filePl=unicode((str(glob.glob(fObj))), "utf-8" )
  385. ## print filePl
  386. ##filePl = filePl.replace("[", "")
  387. ##filePl = filePl.replace("]", "")
  388. #filePl = filePl.replace("\"", "")
  389. ## filePl = filePl.replace("'", "")
  390. ## print filePl
  391. #filePl = filePl.strip('[')
  392. #filePl = filePl.strip(']')
  393. #filePl = filePl.strip('\'')
  394. # print filePl
  395. if not filePl=="":
  396. title=""
  397. subtitle=""
  398. episode=""
  399. dsubtitle=""
  400. depisode=""
  401. plot=""
  402. actors=""
  403. genre=""
  404. director=""
  405. aired=""
  406. actorlist=[]
  407. file = open(filePl)
  408. pl=""
  409. for line in file:
  410. line = line.replace("\n", "")
  411. line = line.replace("\t", "")
  412. line = line.replace("'", "\'")
  413. pl = pl + line
  414. file.close()
  415. epg=unicode(pl, "utf-8" )
  416. p=re.compile('<key>TITLE</key><string>(.*?)</string>')
  417. m=p.search(epg)
  418. if m:
  419. title=m.group(1)
  420. title = title.strip()
  421. p=re.compile('<key>SUBTITLE</key><string>(.*?)</string>')
  422. m=p.search(epg)
  423. if m:
  424. subtitle=m.group(1)
  425. subtitle = subtitle.strip()
  426. if subtitle:
  427. dsubtitle='.'+subtitle
  428. # print subtitle
  429. p=re.compile('<key>EPISODENUM</key><string>(.*?)</string>')
  430. m=p.search(epg)
  431. if m:
  432. episode=m.group(1)
  433. episode = episode.strip()
  434. if episode:
  435. depisode='.'+episode
  436. # print episode
  437. p=re.compile('<key>DESCRIPTION</key><string>(.*?)</string>')
  438. m=p.search(epg)
  439. if m:
  440. plot=m.group(1)
  441. plot = plot.strip()
  442. # print plot
  443. libname=title+depisode+dsubtitle
  444. libname = libname.encode( 'utf-8' )
  445. # print libname
  446. p=re.compile('<key>ACTORS</key><string>(.*?)</string>')
  447. m=p.search(epg)
  448. if m:
  449. actors=m.group(1)
  450. actors = actors.strip()
  451. p=re.compile('<key>CONTENT</key><string>(.*?)</string>')
  452. m=p.search(epg)
  453. if m:
  454. genre=m.group(1)
  455. genre = genre.strip()
  456. p=re.compile('<key>DIRECTOR</key><string>(.*?)</string>')
  457. m=p.search(epg)
  458. if m:
  459. director=m.group(1)
  460. director = director.strip()
  461. p=re.compile('<date>(\d\d\d\d-\d\d-\d\d).*?\d\d:\d\d:\d\d.*?</date>')
  462. m=p.search(epg)
  463. if m:
  464. aired=m.group(1)
  465. aired = aired.strip()
  466. # linked directory support
  467. if LDS == 1:
  468. # build dummy dir
  469. if not os.path.isdir(dummy):
  470. os.mkdir(dummy)
  471. dummyShowName = dummy+'/'+title
  472. if not os.path.isdir(dummyShowName):
  473. os.mkdir(dummyShowName)
  474. scrapethetvdb = STVDB
  475. done = 0
  476. if DEBUG:
  477. print 'processing - '+title+'-'+subtitle
  478. # .nfo file support
  479. if nfo == 1:
  480. # thetvdb.com scraper support
  481. while scrapethetvdb == 1 and done == 0:
  482. import urllib,urllib2
  483. # scrape thetvdb.com
  484. # load search results
  485. UEtitle = urllib.quote_plus(title, safe='/')
  486. Base_URL = "http://www.thetvdb.com/index.php?seriesname="+UEtitle+"&fieldlocation=2&language=7&genre=&year=&network=&zap2it_id=&tvcom_id=&imdb_id=&order=translation&searching=Search&tab=advancedsearch"
  487. WebSock = urllib.urlopen(Base_URL) # Opens a 'Socket' to URL
  488. WebHTML = WebSock.read() # Reads Contents of URL and saves to Variable
  489. WebSock.close() # Closes connection to url
  490. # parse series search results page
  491. seriesLink = re.compile('<tr><td class="odd">1</td><td class="odd"><a href="(/index.php\?tab=series&amp;id=\d+&amp;lid=\d)">', re.IGNORECASE).findall(WebHTML)
  492. if len(seriesLink) == 0:
  493. if DEBUG:
  494. print 'seriesLink not match -'+title+'-'+subtitle
  495. scrapethetvdb = 0
  496. break
  497. # ...
  498. # load series page
  499. Base_URL = "http://www.thetvdb.com"+urllib.unquote(seriesLink[0])
  500. Base_URL = Base_URL.replace('&amp;', '&')
  501. if DEBUG:
  502. print 'Base_URL-'+Base_URL
  503. WebSock = urllib.urlopen(Base_URL) # Opens a 'Socket' to URL
  504. WebHTML = WebSock.read() # Reads Contents of URL and saves to Variable
  505. WebSock.close() # Closes connection to url
  506. # ...
  507. # parse series page
  508. m = re.compile('<a href="(/\?tab=seasonall&id=\d+&amp;lid=\d)" class="seasonlink">All</a>', re.IGNORECASE).findall(WebHTML)
  509. if len(m) == 0:
  510. if DEBUG:
  511. print 'episodesLink not matched -'+title+'-'+subtitle
  512. scrapethtvdb = 0
  513. break
  514. episodesLink = str(m[0])
  515. episodesLink = episodesLink.replace('&amp;', '&')
  516. episodesLink = 'http://www.thetvdb.com'+episodesLink
  517. if DEBUG:
  518. print 'episodesLink-'+episodesLink
  519. m = re.compile('<!-- Right upper -->\s+<td>\s+<div id="content">\s+<h1>.+</h1>\s*(.+)\s*', re.IGNORECASE).findall(WebHTML)
  520. if len(m) == 0:
  521. if DEBUG:
  522. print 'thetvdbPlot not matched'
  523. scrapethtvdb = 0
  524. break
  525. thetvdbPlot = str(m[0])
  526. if DEBUG:
  527. print 'thetvdbPlot-'+thetvdbPlot
  528. pSID = re.compile('/\?tab=seasonall&id=(\d+)&lid=\d')
  529. mSID = pSID.search(episodesLink)
  530. if mSID:
  531. SID=mSID.group(1)
  532. else:
  533. if DEBUG:
  534. print 'ShowID not matched -'+title+'-'+subtitle
  535. scrapethtvdb = 0
  536. break
  537. if DEBUG:
  538. print 'SID-'+SID
  539. actorsLink = 'http://www.thetvdb.com/?tab=actors&id='+SID
  540. if DEBUG:
  541. print 'actorsLink-'+actorsLink
  542. #bannersLink = 'http://www.thetvdb.com/?tab=seriesbanners&id='+SID
  543. #fanartLink = 'http://www.thetvdb.com/?tab=seriesfanart&id='+SID
  544. thetvdbBanners = re.compile('<img src="(.*)" class="banner" border="0"></a>', re.IGNORECASE).findall(WebHTML)
  545. if DEBUG:
  546. print 'thetvdbBanners[0]-'+thetvdbBanners[0]
  547. firstBanner = 'http://www.thetvdb.com'+thetvdbBanners[0]
  548. thetvdbFanart = re.compile('<div id="fanart" style="background-image: url\((.*)\)">', re.IGNORECASE).findall(WebHTML)
  549. if DEBUG:
  550. print 'thetvdbFanart[0]-'+thetvdbFanart[0]
  551. firstFanart = 'http://www.thetvdb.com'+thetvdbFanart[0]
  552. # ...
  553. # load actors page
  554. Base_URL = actorsLink
  555. WebSock = urllib.urlopen(Base_URL) # Opens a 'Socket' to URL
  556. WebHTML = WebSock.read() # Reads Contents of URL and saves to Variable
  557. WebSock.close() # Closes connection to url
  558. # ...
  559. # parse actors page
  560. actorLists= re.compile('<table cellspacing="0" cellpadding="0" border="0" width="100%" class="infotable"><tr><td>\s+<img src="(.*)" class="banner" border="0" alt=".*">\s+<h2><a href="http://www.imdb.com/find\?s=nm&q=.*" target="_blank">(.*)</a></h2>(?:as )*(.*)(?:<br>)*').findall(WebHTML)
  561. #if DEBUG:
  562. # print 'actorLists - '+str(actorLists)
  563. thetvdbActorThumbs = []
  564. thetvdbActors = []
  565. thetvdbActorRoles = []
  566. v=0
  567. if len(actorLists) == 0:
  568. print "Actors did not match"
  569. scrapethtvdb = 0
  570. break
  571. while v < len(actorLists):
  572. record = actorLists[v]
  573. #if DEBUG:
  574. # print 'record - '+str(record)
  575. thetvdbActorThumbs.append(record[0])
  576. thetvdbActors.append(record[1])
  577. thetvdbActorRoles.append(record[2])
  578. v=v+1
  579. #if DEBUG:
  580. # print "thetvdbActorThumbs - "+str(thetvdbActorThumbs)
  581. # print "thetvdbActors - "+str(thetvdbActors)
  582. # print "thetvdbActorRoles - "+str(thetvdbActorRoles)
  583. # ...
  584. # load banners page
  585. #Base_URL = bannersLink
  586. #WebSock = urllib.urlopen(Base_URL) # Opens a 'Socket' to URL
  587. #WebHTML = WebSock.read() # Reads Contents of URL and saves to Variable
  588. #WebSock.close() # Closes connection to url
  589. ## ...
  590. ## parse banners page
  591. #thetvdbBanners = re.compile('<img src="(.*)" class="banner" border="0"></a>', re.IGNORECASE).findall(WebHTML)
  592. # ...
  593. # load fanart page
  594. #Base_URL = fanartLink
  595. #WebSock = urllib.urlopen(Base_URL) # Opens a 'Socket' to URL
  596. #WebHTML = WebSock.read() # Reads Contents of URL and saves to Variable
  597. #WebSock.close() # Closes connection to url
  598. ## ...
  599. ## parse fanart page
  600. #thetvdbFanarts = re.compile('<img src="(.*)" class="banner" border="0"></a>', re.IGNORECASE).findall(WebHTML)
  601. # ...
  602. # load episodes page
  603. Base_URL = episodesLink
  604. WebSock = urllib.urlopen(Base_URL) # Opens a 'Socket' to URL
  605. WebHTML = WebSock.read() # Reads Contents of URL and saves to Variable
  606. WebSock.close() # Closes connection to url
  607. # ...
  608. # parse episodes page
  609. def fuzzy_substring(needle, haystack):
  610. """Calculates the fuzzy match of needle in haystack,
  611. using a modified version of the Levenshtein distance
  612. algorithm.
  613. The function is modified from the levenshtein function
  614. in the bktree module by Adam Hupp"""
  615. m, n = len(needle), len(haystack)
  616. # base cases
  617. if m == 1:
  618. return not needle in haystack
  619. if not n:
  620. return m
  621. row1 = [0] * (n+1)
  622. for i in range(0,m):
  623. row2 = [i+1]
  624. for j in range(0,n):
  625. cost = ( needle[i] != haystack[j] )
  626. row2.append( min(row1[j+1]+1, # deletion
  627. row2[j]+1, #insertion
  628. row1[j]+cost) #substitution
  629. )
  630. row1 = row2
  631. return min(row1)
  632. episodeList = re.compile('<tr><td class="(?:even|odd)+"><a href="/\?tab=episode&seriesid=\d+&seasonid=\d*&id=\d*&amp;lid=\d">(\d*)(?: - )*(\d*)</a></td><td class="(?:even|odd)+"><a href="/\?tab=episode&seriesid=\d+&seasonid=(\d*)&id=(\d*)&amp;lid=\d">(.+)</a></td>').findall(WebHTML)
  633. candidateSeasons = []
  634. candidateEpisodes = []
  635. candidateSeasonID = []
  636. candidateSpecificEpisodeID = []
  637. specificEpisodeCandidates = []
  638. v=0
  639. if len(episodeList) == 0:
  640. print 'Empty episodeList'
  641. scrapethtvdb = 0
  642. break
  643. while v < len(episodeList):
  644. record = episodeList[v]
  645. #if DEBUG:
  646. # print 'record - '+str(record)
  647. candidateSeasons.append(record[0])
  648. candidateEpisodes.append(record[1])
  649. candidateSeasonID.append(record[2])
  650. candidateSpecificEpisodeID.append(record[3])
  651. specificEpisodeCandidates.append(record[4])
  652. v=v+1
  653. # for specificEpisodeCandidate in specificEpisodeCandidates:
  654. i=0
  655. candidateScore = []
  656. while i < len(specificEpisodeCandidates):
  657. specificEpisodeCandidate = specificEpisodeCandidates[i]
  658. # attempt fuzzy match
  659. candidateScore.append(fuzzy_substring(subtitle, specificEpisodeCandidate))
  660. i=i+1
  661. # grab best match
  662. bestMatch = min(candidateScore)
  663. if DEBUG:
  664. print 'bestMatch-'+str(bestMatch)
  665. BMI = candidateScore.index(bestMatch)
  666. if DEBUG:
  667. print 'BMI-'+str(BMI)
  668. if bestMatch > 11:
  669. if DEBUG:
  670. print 'no specific episode found for:'+title+'-'+subtitle
  671. scrapethetvdb = 0
  672. break
  673. else:
  674. SSID = candidateSeasonID[BMI]
  675. SEID = candidateSpecificEpisodeID[BMI]
  676. SpecificEpisodeLink = 'http://www.thetvdb.com/?tab=episode&seriesid='+SID+'&seasonid='+SSID+'&id='+SEID+'&lid=7'
  677. if DEBUG:
  678. print 'SpecificEpisodeLink - '+str(SpecificEpisodeLink)
  679. # ...
  680. # load specific episode page
  681. Base_URL = SpecificEpisodeLink
  682. WebSock = urllib.urlopen(Base_URL) # Opens a 'Socket' to URL
  683. WebHTML = WebSock.read() # Reads Contents of URL and saves to Variable
  684. WebSock.close() # Closes connection to url
  685. # ...
  686. # parse specific episode page
  687. scrapermatch = re.compile('<input type="text" name="EpisodeName_\d+" value="(.*)" style="display: inline" >', re.IGNORECASE).findall(WebHTML)
  688. if len(scrapermatch) == 0:
  689. print 'Empty scrapersubtitle'
  690. scrapersubtitle = ''
  691. else:
  692. scrapersubtitle = scrapermatch[0]
  693. if DEBUG:
  694. print 'scrapersubtitle - '+str(scrapersubtitle)
  695. scraperseason = candidateSeasons[BMI]
  696. if DEBUG:
  697. print 'scraperseason - '+str(scraperseason)
  698. scrapermatch = re.compile('<td><input type="text" name="EpisodeNumber" value="(.*)" maxlength="45"></td>', re.IGNORECASE).findall(WebHTML)
  699. if len(scrapermatch) == 0:
  700. print 'Empty scraperepisode'
  701. scraperepisode = ''
  702. else:
  703. scraperepisode = scrapermatch[0]
  704. if DEBUG:
  705. print 'scraperepisode - '+str(scraperepisode)
  706. scrapermatch = re.compile('<textarea rows="\d+" cols="\d+" name="Overview_\d+" style="display: inline">(.*)</textarea>', re.IGNORECASE).findall(WebHTML)
  707. if len(scrapermatch) == 0:
  708. print 'Empty scraperplot'
  709. scraperplot = ''
  710. else:
  711. scraperplot = scrapermatch[0]
  712. if DEBUG:
  713. print 'scraperplot - '+str(scraperplot)
  714. scrapermatch = re.compile('<td><input name="Director" value="(.*)" maxlength="255" type="text"></td>', re.IGNORECASE).findall(WebHTML)
  715. if len(scrapermatch) == 0:
  716. print 'Empty scraperdirector'
  717. scraperdirector = ''
  718. else:
  719. scraperdirector = scrapermatch[0]
  720. if DEBUG:
  721. print 'scraperdirector - '+str(scraperdirector)
  722. scrapermatch = re.compile('<td><input name="FirstAired" value="(.*)" maxlength="255" type="text"></td>', re.IGNORECASE).findall(WebHTML)
  723. if len(scrapermatch) == 0:
  724. print 'Empty scraperaired'
  725. scraperaired = ''
  726. else:
  727. scraperaired = scrapermatch[0]
  728. if DEBUG:
  729. print 'scraperaired - '+str(scraperaired)
  730. scrapername = title+'.S'+scraperseason+'E'+scraperepisode+'.'+scrapersubtitle
  731. if DEBUG:
  732. print 'scrapername - '+str(scrapername)
  733. # ...
  734. # use thetvdb.com data for show nfo
  735. # test for existing tvshow.nfo
  736. shownfo=dummyShowName+'/tvshow.nfo'
  737. if not os.path.isfile(shownfo):
  738. if DEBUG:
  739. print 'using thetvdb.com data for show nfo - '+title
  740. # write tvshow.nfo (Series Info)
  741. shownfofile=open(shownfo, 'w')
  742. shownfofile.write('<tvshow>\n')
  743. shownfofile.write(' <title>'+title+'</title>\n')
  744. shownfofile.write(' <season>-1</season>\n')
  745. shownfofile.write(' <episode>0</episode>\n')
  746. shownfofile.write(' <displayseason>-1</displayseason>\n')
  747. shownfofile.write(' <displayepisode>-1</displayepisode>\n')
  748. if not genre == '':
  749. shownfofile.write(' <genre>'+genre+'</genre>\n')
  750. if not thetvdbPlot == '':
  751. shownfofile.write(' <plot>'+thetvdbPlot+'</plot>\n')
  752. if not thetvdbActors == '':
  753. j=0
  754. #for actor in thetvdbActors:
  755. y=len(thetvdbActors)
  756. while j<y:
  757. actor = thetvdbActors[j]
  758. if not actor == '':
  759. shownfofile.write(' <actor>\n')
  760. shownfofile.write(' <name>'+actor+'</name>\n')
  761. if not thetvdbActorRoles[j] == '':
  762. thetvdbActorRole = thetvdbActorRoles[j]
  763. if not thetvdbActorRole == '':
  764. shownfofile.write(' <role>'+thetvdbActorRole+'</role>\n')
  765. if not thetvdbActorThumbs[j] == '':
  766. thetvdbActorThumb = thetvdbActorThumbs[j]
  767. if not thetvdbActorThumb == '':
  768. if not thetvdbActorThumb == '/banners/actors/0.jpg':
  769. thetvdbActorThumb = 'http://www.thetvdb.com'+thetvdbActorThumb
  770. shownfofile.write(' <thumb>'+thetvdbActorThumb+'</thumb>\n')
  771. shownfofile.write(' </actor>\n')
  772. j=j+1
  773. shownfofile.write('</tvshow>\n')
  774. shownfofile.close()
  775. # banners and fanart
  776. if DEBUG:
  777. print 'firstBanner - '+firstBanner
  778. Base_URL = firstBanner
  779. WebSock = urllib.urlopen(Base_URL) # Opens a 'Socket' to URL
  780. WebHTML = WebSock.read() # Reads Contents of URL and saves to Variable
  781. WebSock.close() # Closes connection to url
  782. if not WebHTML == '':
  783. showBanner=dummyShowName+'/folder.jpg'
  784. if not os.path.isfile(showBanner):
  785. showBannerFile=open(showBanner, 'w')
  786. showBannerFile.write(WebHTML)
  787. showBannerFile.close()
  788. if DEBUG:
  789. print 'firstFanart - '+firstFanart
  790. if not firstFanart == 'http://www.thetvdb.com/banners/actors/0.jpg' or firstFanart == 'http://www.thetvdb.com':
  791. Base_URL = firstFanart
  792. WebSock = urllib.urlopen(Base_URL) # Opens a 'Socket' to URL
  793. WebHTML = WebSock.read() # Reads Contents of URL and saves to Variable
  794. WebSock.close() # Closes connection to url
  795. if not WebHTML == '':
  796. showFanart=dummyShowName+'/fanart.jpg'
  797. if not os.path.isfile(showFanart):
  798. showFanartFile=open(showFanart, 'w')
  799. showFanartFile.write(WebHTML)
  800. showFanartFile.close()
  801. done = 1
  802. # else:
  803. if not done == 1:
  804. # use Eyetv EPG data for show nfo
  805. # test for existing tvshow.nfo
  806. shownfo=dummyShowName+'/tvshow.nfo'
  807. if not os.path.isfile(shownfo):
  808. if DEBUG:
  809. print 'using Eyetv.com data for show nfo - '+title
  810. # write tvshow.nfo (Series Info)
  811. shownfofile=open(shownfo, 'w')
  812. shownfofile.write('<tvshow>\n')
  813. shownfofile.write(' <title>'+title+'</title>\n')
  814. shownfofile.write(' <season>-1</season>\n')
  815. shownfofile.write(' <episode>0</episode>\n')

Large files files are truncated, but you can click here to view the full file