/plugins/video/XBMCEyetvScraper/default.py
Python | 1036 lines | 862 code | 80 blank | 94 comment | 87 complexity | 57934512db14be59fc782cd374894b88 MD5 | raw file
Possible License(s): AGPL-1.0, GPL-3.0, GPL-2.0, BSD-2-Clause
Large files files are truncated, but you can click here to view the full file
- #!/usr/bin/env python
- # XBMCEyetvScraper
- # version 1.53
- # by prophead
- # ThumbnailOverlayGenerator by Nic Wolfe (midgetspy)
- # user variables, these are overidden by the gui settings
- # ---------------
- # my default path
- path = '/Volumes/RAID/Movies/EyeTV/EyeTV Archive'
- # my TV Show linked directory path
- dummy = '/Volumes/RAID/Movies/TV'
- # TOG
- # requires Imagemagick, and ffmpeg
- # 0=no thumbail overlays, 1=try thumbnail overlays
- tog = 1
- # TV Show linked directory (library) support
- # 0=no linked directory support, 1=try to create linked directory
- LDS = 1
- # nfo file support
- # requires LDS
- # 0=no nfo file support, 1=try to create nfo files from Eyetv/thetvdb.com EPG data
- nfo = 1
- # thetvdb.com scraper support
- # requires nfo
- # 0=use Eyetv EPG data, 1=try to scrape thetvdb.com for data
- STVDB = 1
- # 1=Extended output in log
- DEBUG = 0
- # ---------------
- import os, re, glob, sys
- import xbmc,xbmcgui,xbmcplugin
- from datetime import datetime
- # set settings (Eyetv folder) from the XBMC interface
- def get_settings():
- settings = {}
- try:
- settings['path'] = xbmcplugin.getSetting( 'path' )
- settings['dummy'] = xbmcplugin.getSetting( 'dummy' )
- settings['EyetvTOG'] = xbmcplugin.getSetting( 'EyetvTOG' )
- return settings
- except:
- print "couldn't load settings"
- pass
- # test for Boxee
- #BoxeePath = os.getcwd()[:-1]+"/"
- #p=re.compile('.+(oxee).+')
- #m=p.match(BoxeePath)
- #if not m:
- # get settings from gui settings
- settings=get_settings()
- path=settings['path']
- dummy=settings['dummy']
- tog=settings['EyetvTOG']
- if tog == "true":
- tog = 1
- #else:
- # print "[XBMCEyetvScraper] - Boxee"
- def go_tog(cmd, scandir):
- # Author: Nic Wolfe (midgetspy)
- # Contact: PM me on the xbmc.org forums
- # Version: 0.3.3.EM
-
- ###########################
- ## Configuration Options ##
- ###########################
- # relative size of the overlays
- IMG_HEIGHT = 0.24
- # default to macport standards
- FFMPEG_PATH = '/opt/local/bin'
- FFMPEG_PROCESS = 'ffmpeg'
- IMAGEMAGICK_PATH = '/opt/local/bin'
- IMAGEMAGICK_IDENTIFY_PROCESS = 'identify'
- IMAGEMAGICK_COMPOSITE_PROCESS = 'composite'
- IMAGEMAGICK_CONVERT_PROCESS = 'convert'
- AUTOSCAN_EXTENSIONS = ('.mpg')
-
- # moved to global
- # 1=Extended output in log
- # DEBUG = 1
-
- # put the dimensions of your logos here (height/width)
- LOGO_RATIO = 1024/1024
-
- #################################################################################################################
- #################################################################################################################
- #################################################################################################################
- # Don't touch below this line unless you know what you're doing.
- SUCCESS = 1
- FAILURE = 2
- SKIPPED = 3
- IMG_MARGIN = 0.02
-
- import sys
- import shutil
- import os
- import re
- import subprocess
-
-
- def checkDependencies ():
- result = SUCCESS
- if not os.path.isfile(os.path.join(FFMPEG_PATH, FFMPEG_PROCESS)):
- print "ERROR: unable to find ffmpeg. Verify that it's installed and that FFMPEG_PATH is set correctly."
- result = FAILURE
- if not os.path.isfile(os.path.join(IMAGEMAGICK_PATH, IMAGEMAGICK_IDENTIFY_PROCESS)):
- print "ERROR: unable to find identify (part of ImageMagick). Verify that it's installed and that IMAGEMAGICK_PATH is set correctly."
- result = FAILURE
- if not os.path.isfile(os.path.join(IMAGEMAGICK_PATH, IMAGEMAGICK_COMPOSITE_PROCESS)):
- print "ERROR: unable to find composite (part of ImageMagick). Verify that it's installed and that IMAGEMAGICK_PATH is set correctly."
- result = FAILURE
- if result != SUCCESS:
- print "Quitting..."
- sys.exit()
-
- def doFile (filename, results=None):
- print "Doing overlays for "+filename
- if results == None:
- ffmpegResults = parseffmpegInfo(getffmpegInfo(filename))
- else:
- ffmpegResults = results
- if DEBUG:
- print "DEBUG: INFO: Scanned"+filename+"got", ffmpegResults
- if ffmpegResults == None:
- print "ERROR: unable to scan "+filename+" with ffmpeg"
- return FAILURE
- thumbName = changeExtension(filename, "tbn")
- thumbOrigName = changeExtension(filename, "tbn-orig")
- eyethumb = changeExtension(filename, "tiff")
-
- # if they have only tbn-orig then they want me to regenerate the thumb
- if os.path.isfile(thumbOrigName) and not os.path.isfile(thumbName):
- shutil.copyfile(thumbOrigName, thumbName)
- # if they have only a tbn (and not an eyetv tiff) I need to back it up then make the thumb
- elif os.path.isfile(thumbName) and not os.path.isfile(thumbOrigName) and not os.path.isfile(eyethumb):
- shutil.copyfile(thumbName, thumbOrigName)
- # if they have both I'm assuming it's done and leaving it alone
- elif os.path.isfile(thumbName) and os.path.isfile(eyethumb):
- print filename+" already has a generated thumb, skipping"
- return SKIPPED
- # no thumb files means nothing I can do
- # added support for no thumb, grab thumb from tiff
- else:
- thumbName = changeExtension(filename, "jpg")
- thumbOrigName = changeExtension(filename, "tiff")
- if os.path.isfile(thumbOrigName):
- print filename+" converting eyetv tiff icon to jpg icon"
- # convert to jpg
- convProcCmd = os.path.join(IMAGEMAGICK_PATH, IMAGEMAGICK_CONVERT_PROCESS)+' \"'+thumbOrigName+'\" \"'+thumbName+'\"'
- # convProcCmd = convProcCmd.replace("'", "\\'")
- # convProcCmd = convProcCmd.replace('\"', '\\"')
- if DEBUG:
- print 'DEBUG: Convert command: '+convProcCmd
- os.system(convProcCmd)
- # use XBMC tbn standard
- XBMCthumbName = changeExtension(filename, "tbn")
- shutil.move(thumbName, XBMCthumbName)
- thumbName = XBMCthumbName
- else:
- print "ERROR: no thumbnail found for"+filename
- return FAILURE
- # get thumb size
- identifyProcCmd = (os.path.join(IMAGEMAGICK_PATH, IMAGEMAGICK_IDENTIFY_PROCESS), '-ping', thumbName)
- if DEBUG:
- print 'DEBUG: Identify command: ', identifyProcCmd
- identifyProc = subprocess.Popen(identifyProcCmd, stdout=subprocess.PIPE)
- identifyOutput = identifyProc.communicate()[0]
- thumbSize = [int(a) for a in identifyOutput[len(thumbName)+1:-1].split(' ')[1].split('x')]
- #thumbSize[0]=int(1024)
- #thumbSize[1]=int(1024)
- if DEBUG:
- print 'DEBUG: thumbsize=', thumbSize
- # use ImageMagick to overlay "images/" + ffmpegResults[1] + ".png" onto thumbName bottom left corner
- videoOverlayFilename = os.path.join(sys.path[0], 'EyetvTOG/images', ffmpegResults[1] + '.png')
- if not os.path.isfile(videoOverlayFilename):
- print "WARN: Couldn't find "+videoOverlayFilename+" skipping video overlay"
- else:
- videoImgProcCmd = (os.path.join(IMAGEMAGICK_PATH, IMAGEMAGICK_COMPOSITE_PROCESS), '-compose', 'atop', '-geometry', '1000x' + str(int(thumbSize[1]*IMG_HEIGHT)) + '+' + str(int(thumbSize[0]*IMG_MARGIN)) + '+' + str(int(thumbSize[1]*(1-IMG_HEIGHT)-thumbSize[0]*IMG_MARGIN)), videoOverlayFilename, thumbName, thumbName)
- videoImgProc = subprocess.Popen(videoImgProcCmd, stderr=subprocess.PIPE)
- videoImgProcOutput = videoImgProc.communicate()
-
- #check stderr for errors
- if len(videoImgProcOutput[1]) > 0:
- print 'ERROR: '
- print videoImgProcOutput[1]
-
- # use ImageMagick to overlay "images/" + ffmpegResults[3] + ".png" onto thumbName in bottom right corner
- audioOverlayFilename = os.path.join(sys.path[0], 'EyetvTOG/images', ffmpegResults[3] + '.png')
- if not os.path.isfile(audioOverlayFilename):
- print "WARN: Couldn't find "+audioOverlayFilename+" skipping audio overlay"
- else:
- audioImgProcCmd = (os.path.join(IMAGEMAGICK_PATH, IMAGEMAGICK_COMPOSITE_PROCESS), '-compose', 'atop', '-geometry', '1000x' + str(int(thumbSize[1]*IMG_HEIGHT)) + '+' + str(int(thumbSize[0]-thumbSize[1]*(IMG_HEIGHT/LOGO_RATIO)-thumbSize[0]*IMG_MARGIN)) + '+' + str(int(thumbSize[1]*(1.0-IMG_HEIGHT)-thumbSize[0]*IMG_MARGIN)), audioOverlayFilename, thumbName, thumbName)
- audioImgProc = subprocess.Popen(audioImgProcCmd, stderr=subprocess.PIPE)
- audioImgProcOutput = audioImgProc.communicate()
-
- # check stderr for errors
- if len(audioImgProcOutput[1]) > 0:
- print 'ERROR: '
- print audioImgProcOutput[1]
- return SUCCESS
-
- def revertFile (filename):
- thumbName = changeExtension(filename, "tbn")
- thumbOrigName = changeExtension(filename, "tbn-orig")
-
- # if they have tbn-orig then I restore for them
- if os.path.isfile(thumbOrigName):
- shutil.copyfile(thumbOrigName, thumbName)
- os.remove(thumbOrigName)
- return SUCCESS
- # if they have only a tbn there's nothing I can do
- elif os.path.isfile(thumbName):
- print "WARN: no tbn-orig file for "+filename+" unable to revert"
- return FAILURE
- # no thumb files means nothing I can do
- else:
- print "WARN: no thumbnail for"+filename
- return SKIPPED
-
- def changeExtension (filename, newExtension):
- "Replaces filename's extension with newExtension"
- stubName = filename[:filename.rfind(".")+1]
- return stubName + newExtension
-
- def getffmpegInfo (filename):
- ffmpegProcCmd = (os.path.join(FFMPEG_PATH, FFMPEG_PROCESS), '-i', filename)
- if DEBUG:
- print 'DEBUG: Command string:', ffmpegProcCmd
- ffmpegProc = subprocess.Popen(ffmpegProcCmd, stderr=subprocess.PIPE)
- output = ffmpegProc.communicate()[1].split('\n')
- if DEBUG:
- print "DEBUG: ffmpeg output:", output
- return output
-
- def parseffmpegInfo (output):
- streamRegex = re.compile("\s+Stream #0\.(\d)(\[\w+\])?.*?: (Video|Audio): (.+)")
- videoCodec = None
- videoResolution = None
- audioCodec = None
- audioChannels = None
-
- for line in output:
- if DEBUG:
- print "PARSING:"+line
- streamResult = streamRegex.match(line)
- if streamResult != None:
- params = streamResult.group(4).split(", ")
- if streamResult.group(3) == "Video" and videoCodec == None and videoResolution == None:
- if len(params) < 3:
- continue
- print "PARSING: Found video line containing "+params[0]+" and "+params[2]
- videoCodec = params[0]
- videoResolution = params[2]
- elif streamResult.group(3) == "Audio" and audioCodec == None and audioChannels == None:
- print "PARSING: Found audio line containing "+params[0]+" and "+params[2]
- audioCodec = params[0]
- audioChannels = params[2]
- elif DEBUG:
- print "PARSING: Line didn't match known results format."
- if (videoCodec == None or videoResolution == None) and (audioCodec == None or audioChannels == None):
- return None
-
- return [videoCodec, parseVideoResolution(videoResolution), parseAudioCodec(audioCodec), parseAudioChannels(audioChannels)]
-
- def parseAudioCodec (codec):
- if codec == "0x0000":
- return "AC3"
- elif codec == "mp3":
- return "MP3"
- elif codec == "dca":
- return "DTS"
- else:
- return "??"
-
- def parseAudioChannels (channels):
- if channels == "5:1":
- return "5.1"
- elif channels == "7:1":
- return "7.1"
- else:
- return channels
-
- def parseVideoResolution (resolution):
- if DEBUG:
- print 'DEBUG: Parsing resolution to tuple:'+resolution
- width = int(resolution.split("x")[0])
-
- if width < 1280:
- return "SD"
- elif 1280 <= width < 1920:
- return "720p"
- elif width >= 1920:
- return "1080p"
- else:
- return "??"
-
- def scanAllFiles (file):
- i = 0
- good = 0
- bad = 0
- skipped = 0
- # for file in files:
-
- # if it's a folder, scan it automatically for all applicable files (recursive)
- #if os.path.isdir(file):
- #
- # results = scanAllFiles (filter(lambda x: x.endswith(AUTOSCAN_EXTENSIONS) or os.path.isdir(x), [os.path.join(file, x) for x in os.listdir(file)]))
- # good += results[0]
- # bad += results[1]
- # skipped += results[2]
- #
- #else:
- i += 1
- # print "PDEBUG- file-"+file
- result = doFile(file)
- if result == SUCCESS:
- good += 1
- elif result == SKIPPED:
- skipped += 1
- else:
- bad += 1
- return (good, bad, skipped)
-
- def revertAllFiles (files):
- good = 0
- bad = 0
- skipped = 0
- for file in files:
- # if it's a folder, scan it automatically for all mkv and avi files (recursive)
- if os.path.isdir(file):
- results = revertAllFiles (filter(lambda x: x.endswith(AUTOSCAN_EXTENSIONS) or os.path.isdir(x), [os.path.join(file, x) for x in os.listdir(file)]))
- good += results[0]
- bad += results[1]
- skipped += results[2]
- else:
- result = revertFile(file);
- if result == SUCCESS:
- good += 1
- elif result == SKIPPED:
- skipped += 1
- else:
- bad += 1
-
- return (good, bad, skipped)
-
- def getSyntax (name):
- return 'Syntax: python ' + name + ' <-scan|-revert> [file/folder list]\n\tpython ' + name + ' -force <1080p|720p|SD> <7.1|5.1|stereo|mono> <filename>'
-
- # main TOG function
-
- checkDependencies()
-
- if len(sys.argv) == 1:
- print getSyntax(sys.argv[0])
- elif sys.argv[1] == '-revert' and len(sys.argv) >= 3:
- results = revertAllFiles(sys.argv[2:])
- print 'Successfully reverted overlays on'+results[0]+'of'+results[0]+results[1]+results[2]+'thumbnails (' + str(results[1]) + ' failures)'
- # this is the only option used in this context
- elif cmd == '-scan':
- results = scanAllFiles(scandir)
- print 'Successfully created overlays on '+str(results[0])+' of 1 thumbnails (' + str(results[1]) + ' failures)'
- elif sys.argv[1] == '-force' and len(sys.argv) == 5:
- doFile(sys.argv[4], ('', sys.argv[2], '', sys.argv[3]))
- else:
- print getSyntax(sys.argv[0])
- # EOT
- # check eyetv dir
- for dirpath, dirnames, filenames in os.walk(path):
- # ignore eyetvsched files
- if dirpath.endswith(".eyetv"):
- # ignore eyetv buffer (for now)
- if not dirpath.endswith("Live TV Buffer.eyetv"):
- # print 'Directory', dirpath
- for filename in filenames:
- if filename.endswith(".mpg"):
- fqname=dirpath+'/'+filename
- fqname = unicode(fqname, "utf-8" )
- # print fqname
- p=re.compile('.+/(.+).eyetv')
- m=p.match(dirpath)
- shortdirpath=m.group(1)
- shortdirpath = unicode( shortdirpath, "utf-8" )
- # match icon name
- p=re.compile('(.+)\.mpg')
- m=p.match(fqname)
- tbn=m.group(1)
- tifftbn=tbn+'.tiff'
- # detect tbn /integrate with tog
- tbn=tbn+'.tbn'
- if os.path.isfile(tifftbn) and not os.path.isfile(tbn):
- # convert Unicode string to regular string
- # togname = str(fqname)
- togname = fqname.encode( 'utf-8' )
- togname = togname.replace("'", "\'")
- if tog == 1:
- try:
- go_tog('-scan', togname)
- except:
- print "Failed to set thumbnail overlay for:"+togname
- pass
- # if TOG fails fallback to tiff tbns
- if os.path.isfile(tifftbn) and not os.path.isfile(tbn):
- tbn = tifftbn
- statinfo = os.stat(fqname)
- size = statinfo.st_size
- filedate = statinfo.st_ctime
- objdate=datetime.fromtimestamp(filedate)
- date=objdate.strftime("%d/%m/%Y")
- # %H:%M:%S")
- # print size, date
- # open Eyetv program info xml file
- for file in os.listdir(dirpath):
- if( re.search('.eyetvp$', file)):
- filePl = dirpath+"/"+file
- #fObj=dirpath+'/*.eyetvp'
- #filePl=unicode((str(glob.glob(fObj))), "utf-8" )
- ## print filePl
- ##filePl = filePl.replace("[", "")
- ##filePl = filePl.replace("]", "")
- #filePl = filePl.replace("\"", "")
- ## filePl = filePl.replace("'", "")
- ## print filePl
- #filePl = filePl.strip('[')
- #filePl = filePl.strip(']')
- #filePl = filePl.strip('\'')
- # print filePl
- if not filePl=="":
- title=""
- subtitle=""
- episode=""
- dsubtitle=""
- depisode=""
- plot=""
- actors=""
- genre=""
- director=""
- aired=""
- actorlist=[]
-
- file = open(filePl)
- pl=""
- for line in file:
- line = line.replace("\n", "")
- line = line.replace("\t", "")
- line = line.replace("'", "\'")
- pl = pl + line
- file.close()
- epg=unicode(pl, "utf-8" )
- p=re.compile('<key>TITLE</key><string>(.*?)</string>')
- m=p.search(epg)
- if m:
- title=m.group(1)
- title = title.strip()
- p=re.compile('<key>SUBTITLE</key><string>(.*?)</string>')
- m=p.search(epg)
- if m:
- subtitle=m.group(1)
- subtitle = subtitle.strip()
- if subtitle:
- dsubtitle='.'+subtitle
- # print subtitle
- p=re.compile('<key>EPISODENUM</key><string>(.*?)</string>')
- m=p.search(epg)
- if m:
- episode=m.group(1)
- episode = episode.strip()
- if episode:
- depisode='.'+episode
- # print episode
- p=re.compile('<key>DESCRIPTION</key><string>(.*?)</string>')
- m=p.search(epg)
- if m:
- plot=m.group(1)
- plot = plot.strip()
- # print plot
- libname=title+depisode+dsubtitle
- libname = libname.encode( 'utf-8' )
- # print libname
-
- p=re.compile('<key>ACTORS</key><string>(.*?)</string>')
- m=p.search(epg)
- if m:
- actors=m.group(1)
- actors = actors.strip()
- p=re.compile('<key>CONTENT</key><string>(.*?)</string>')
- m=p.search(epg)
- if m:
- genre=m.group(1)
- genre = genre.strip()
- p=re.compile('<key>DIRECTOR</key><string>(.*?)</string>')
- m=p.search(epg)
- if m:
- director=m.group(1)
- director = director.strip()
- p=re.compile('<date>(\d\d\d\d-\d\d-\d\d).*?\d\d:\d\d:\d\d.*?</date>')
- m=p.search(epg)
- if m:
- aired=m.group(1)
- aired = aired.strip()
-
- # linked directory support
- if LDS == 1:
- # build dummy dir
- if not os.path.isdir(dummy):
- os.mkdir(dummy)
- dummyShowName = dummy+'/'+title
- if not os.path.isdir(dummyShowName):
- os.mkdir(dummyShowName)
-
- scrapethetvdb = STVDB
- done = 0
- if DEBUG:
- print 'processing - '+title+'-'+subtitle
-
- # .nfo file support
- if nfo == 1:
- # thetvdb.com scraper support
- while scrapethetvdb == 1 and done == 0:
- import urllib,urllib2
- # scrape thetvdb.com
- # load search results
- UEtitle = urllib.quote_plus(title, safe='/')
- Base_URL = "http://www.thetvdb.com/index.php?seriesname="+UEtitle+"&fieldlocation=2&language=7&genre=&year=&network=&zap2it_id=&tvcom_id=&imdb_id=&order=translation&searching=Search&tab=advancedsearch"
- WebSock = urllib.urlopen(Base_URL) # Opens a 'Socket' to URL
- WebHTML = WebSock.read() # Reads Contents of URL and saves to Variable
- WebSock.close() # Closes connection to url
-
- # parse series search results page
- seriesLink = re.compile('<tr><td class="odd">1</td><td class="odd"><a href="(/index.php\?tab=series&id=\d+&lid=\d)">', re.IGNORECASE).findall(WebHTML)
-
- if len(seriesLink) == 0:
- if DEBUG:
- print 'seriesLink not match -'+title+'-'+subtitle
- scrapethetvdb = 0
- break
-
- # ...
- # load series page
- Base_URL = "http://www.thetvdb.com"+urllib.unquote(seriesLink[0])
- Base_URL = Base_URL.replace('&', '&')
- if DEBUG:
- print 'Base_URL-'+Base_URL
-
- WebSock = urllib.urlopen(Base_URL) # Opens a 'Socket' to URL
- WebHTML = WebSock.read() # Reads Contents of URL and saves to Variable
- WebSock.close() # Closes connection to url
- # ...
-
- # parse series page
- m = re.compile('<a href="(/\?tab=seasonall&id=\d+&lid=\d)" class="seasonlink">All</a>', re.IGNORECASE).findall(WebHTML)
- if len(m) == 0:
- if DEBUG:
- print 'episodesLink not matched -'+title+'-'+subtitle
- scrapethtvdb = 0
- break
- episodesLink = str(m[0])
- episodesLink = episodesLink.replace('&', '&')
- episodesLink = 'http://www.thetvdb.com'+episodesLink
- if DEBUG:
- print 'episodesLink-'+episodesLink
- m = re.compile('<!-- Right upper -->\s+<td>\s+<div id="content">\s+<h1>.+</h1>\s*(.+)\s*', re.IGNORECASE).findall(WebHTML)
- if len(m) == 0:
- if DEBUG:
- print 'thetvdbPlot not matched'
- scrapethtvdb = 0
- break
- thetvdbPlot = str(m[0])
- if DEBUG:
- print 'thetvdbPlot-'+thetvdbPlot
- pSID = re.compile('/\?tab=seasonall&id=(\d+)&lid=\d')
- mSID = pSID.search(episodesLink)
- if mSID:
- SID=mSID.group(1)
- else:
- if DEBUG:
- print 'ShowID not matched -'+title+'-'+subtitle
- scrapethtvdb = 0
- break
-
- if DEBUG:
- print 'SID-'+SID
-
- actorsLink = 'http://www.thetvdb.com/?tab=actors&id='+SID
- if DEBUG:
- print 'actorsLink-'+actorsLink
- #bannersLink = 'http://www.thetvdb.com/?tab=seriesbanners&id='+SID
- #fanartLink = 'http://www.thetvdb.com/?tab=seriesfanart&id='+SID
- thetvdbBanners = re.compile('<img src="(.*)" class="banner" border="0"></a>', re.IGNORECASE).findall(WebHTML)
- if DEBUG:
- print 'thetvdbBanners[0]-'+thetvdbBanners[0]
- firstBanner = 'http://www.thetvdb.com'+thetvdbBanners[0]
- thetvdbFanart = re.compile('<div id="fanart" style="background-image: url\((.*)\)">', re.IGNORECASE).findall(WebHTML)
- if DEBUG:
- print 'thetvdbFanart[0]-'+thetvdbFanart[0]
- firstFanart = 'http://www.thetvdb.com'+thetvdbFanart[0]
- # ...
- # load actors page
- Base_URL = actorsLink
- WebSock = urllib.urlopen(Base_URL) # Opens a 'Socket' to URL
- WebHTML = WebSock.read() # Reads Contents of URL and saves to Variable
- WebSock.close() # Closes connection to url
- # ...
- # parse actors page
- actorLists= re.compile('<table cellspacing="0" cellpadding="0" border="0" width="100%" class="infotable"><tr><td>\s+<img src="(.*)" class="banner" border="0" alt=".*">\s+<h2><a href="http://www.imdb.com/find\?s=nm&q=.*" target="_blank">(.*)</a></h2>(?:as )*(.*)(?:<br>)*').findall(WebHTML)
-
- #if DEBUG:
- # print 'actorLists - '+str(actorLists)
-
- thetvdbActorThumbs = []
- thetvdbActors = []
- thetvdbActorRoles = []
- v=0
- if len(actorLists) == 0:
- print "Actors did not match"
- scrapethtvdb = 0
- break
- while v < len(actorLists):
- record = actorLists[v]
- #if DEBUG:
- # print 'record - '+str(record)
- thetvdbActorThumbs.append(record[0])
- thetvdbActors.append(record[1])
- thetvdbActorRoles.append(record[2])
- v=v+1
-
- #if DEBUG:
- # print "thetvdbActorThumbs - "+str(thetvdbActorThumbs)
- # print "thetvdbActors - "+str(thetvdbActors)
- # print "thetvdbActorRoles - "+str(thetvdbActorRoles)
-
- # ...
- # load banners page
- #Base_URL = bannersLink
- #WebSock = urllib.urlopen(Base_URL) # Opens a 'Socket' to URL
- #WebHTML = WebSock.read() # Reads Contents of URL and saves to Variable
- #WebSock.close() # Closes connection to url
- ## ...
- ## parse banners page
- #thetvdbBanners = re.compile('<img src="(.*)" class="banner" border="0"></a>', re.IGNORECASE).findall(WebHTML)
- # ...
- # load fanart page
- #Base_URL = fanartLink
- #WebSock = urllib.urlopen(Base_URL) # Opens a 'Socket' to URL
- #WebHTML = WebSock.read() # Reads Contents of URL and saves to Variable
- #WebSock.close() # Closes connection to url
- ## ...
- ## parse fanart page
- #thetvdbFanarts = re.compile('<img src="(.*)" class="banner" border="0"></a>', re.IGNORECASE).findall(WebHTML)
- # ...
- # load episodes page
- Base_URL = episodesLink
- WebSock = urllib.urlopen(Base_URL) # Opens a 'Socket' to URL
- WebHTML = WebSock.read() # Reads Contents of URL and saves to Variable
- WebSock.close() # Closes connection to url
- # ...
-
- # parse episodes page
- def fuzzy_substring(needle, haystack):
- """Calculates the fuzzy match of needle in haystack,
- using a modified version of the Levenshtein distance
- algorithm.
- The function is modified from the levenshtein function
- in the bktree module by Adam Hupp"""
- m, n = len(needle), len(haystack)
- # base cases
- if m == 1:
- return not needle in haystack
- if not n:
- return m
- row1 = [0] * (n+1)
- for i in range(0,m):
- row2 = [i+1]
- for j in range(0,n):
- cost = ( needle[i] != haystack[j] )
- row2.append( min(row1[j+1]+1, # deletion
- row2[j]+1, #insertion
- row1[j]+cost) #substitution
- )
- row1 = row2
- return min(row1)
-
- episodeList = re.compile('<tr><td class="(?:even|odd)+"><a href="/\?tab=episode&seriesid=\d+&seasonid=\d*&id=\d*&lid=\d">(\d*)(?: - )*(\d*)</a></td><td class="(?:even|odd)+"><a href="/\?tab=episode&seriesid=\d+&seasonid=(\d*)&id=(\d*)&lid=\d">(.+)</a></td>').findall(WebHTML)
- candidateSeasons = []
- candidateEpisodes = []
- candidateSeasonID = []
- candidateSpecificEpisodeID = []
- specificEpisodeCandidates = []
- v=0
- if len(episodeList) == 0:
- print 'Empty episodeList'
- scrapethtvdb = 0
- break
- while v < len(episodeList):
- record = episodeList[v]
- #if DEBUG:
- # print 'record - '+str(record)
- candidateSeasons.append(record[0])
- candidateEpisodes.append(record[1])
- candidateSeasonID.append(record[2])
- candidateSpecificEpisodeID.append(record[3])
- specificEpisodeCandidates.append(record[4])
- v=v+1
-
- # for specificEpisodeCandidate in specificEpisodeCandidates:
- i=0
- candidateScore = []
- while i < len(specificEpisodeCandidates):
- specificEpisodeCandidate = specificEpisodeCandidates[i]
- # attempt fuzzy match
- candidateScore.append(fuzzy_substring(subtitle, specificEpisodeCandidate))
- i=i+1
- # grab best match
- bestMatch = min(candidateScore)
- if DEBUG:
- print 'bestMatch-'+str(bestMatch)
- BMI = candidateScore.index(bestMatch)
- if DEBUG:
- print 'BMI-'+str(BMI)
- if bestMatch > 11:
- if DEBUG:
- print 'no specific episode found for:'+title+'-'+subtitle
- scrapethetvdb = 0
- break
- else:
- SSID = candidateSeasonID[BMI]
- SEID = candidateSpecificEpisodeID[BMI]
- SpecificEpisodeLink = 'http://www.thetvdb.com/?tab=episode&seriesid='+SID+'&seasonid='+SSID+'&id='+SEID+'&lid=7'
- if DEBUG:
- print 'SpecificEpisodeLink - '+str(SpecificEpisodeLink)
- # ...
- # load specific episode page
- Base_URL = SpecificEpisodeLink
- WebSock = urllib.urlopen(Base_URL) # Opens a 'Socket' to URL
- WebHTML = WebSock.read() # Reads Contents of URL and saves to Variable
- WebSock.close() # Closes connection to url
- # ...
- # parse specific episode page
- scrapermatch = re.compile('<input type="text" name="EpisodeName_\d+" value="(.*)" style="display: inline" >', re.IGNORECASE).findall(WebHTML)
- if len(scrapermatch) == 0:
- print 'Empty scrapersubtitle'
- scrapersubtitle = ''
- else:
- scrapersubtitle = scrapermatch[0]
- if DEBUG:
- print 'scrapersubtitle - '+str(scrapersubtitle)
- scraperseason = candidateSeasons[BMI]
- if DEBUG:
- print 'scraperseason - '+str(scraperseason)
- scrapermatch = re.compile('<td><input type="text" name="EpisodeNumber" value="(.*)" maxlength="45"></td>', re.IGNORECASE).findall(WebHTML)
- if len(scrapermatch) == 0:
- print 'Empty scraperepisode'
- scraperepisode = ''
- else:
- scraperepisode = scrapermatch[0]
- if DEBUG:
- print 'scraperepisode - '+str(scraperepisode)
- scrapermatch = re.compile('<textarea rows="\d+" cols="\d+" name="Overview_\d+" style="display: inline">(.*)</textarea>', re.IGNORECASE).findall(WebHTML)
- if len(scrapermatch) == 0:
- print 'Empty scraperplot'
- scraperplot = ''
- else:
- scraperplot = scrapermatch[0]
- if DEBUG:
- print 'scraperplot - '+str(scraperplot)
- scrapermatch = re.compile('<td><input name="Director" value="(.*)" maxlength="255" type="text"></td>', re.IGNORECASE).findall(WebHTML)
- if len(scrapermatch) == 0:
- print 'Empty scraperdirector'
- scraperdirector = ''
- else:
- scraperdirector = scrapermatch[0]
- if DEBUG:
- print 'scraperdirector - '+str(scraperdirector)
- scrapermatch = re.compile('<td><input name="FirstAired" value="(.*)" maxlength="255" type="text"></td>', re.IGNORECASE).findall(WebHTML)
- if len(scrapermatch) == 0:
- print 'Empty scraperaired'
- scraperaired = ''
- else:
- scraperaired = scrapermatch[0]
- if DEBUG:
- print 'scraperaired - '+str(scraperaired)
- scrapername = title+'.S'+scraperseason+'E'+scraperepisode+'.'+scrapersubtitle
- if DEBUG:
- print 'scrapername - '+str(scrapername)
- # ...
- # use thetvdb.com data for show nfo
- # test for existing tvshow.nfo
- shownfo=dummyShowName+'/tvshow.nfo'
- if not os.path.isfile(shownfo):
- if DEBUG:
- print 'using thetvdb.com data for show nfo - '+title
- # write tvshow.nfo (Series Info)
- shownfofile=open(shownfo, 'w')
- shownfofile.write('<tvshow>\n')
- shownfofile.write(' <title>'+title+'</title>\n')
- shownfofile.write(' <season>-1</season>\n')
- shownfofile.write(' <episode>0</episode>\n')
- shownfofile.write(' <displayseason>-1</displayseason>\n')
- shownfofile.write(' <displayepisode>-1</displayepisode>\n')
- if not genre == '':
- shownfofile.write(' <genre>'+genre+'</genre>\n')
- if not thetvdbPlot == '':
- shownfofile.write(' <plot>'+thetvdbPlot+'</plot>\n')
- if not thetvdbActors == '':
- j=0
- #for actor in thetvdbActors:
- y=len(thetvdbActors)
- while j<y:
- actor = thetvdbActors[j]
- if not actor == '':
- shownfofile.write(' <actor>\n')
- shownfofile.write(' <name>'+actor+'</name>\n')
- if not thetvdbActorRoles[j] == '':
- thetvdbActorRole = thetvdbActorRoles[j]
- if not thetvdbActorRole == '':
- shownfofile.write(' <role>'+thetvdbActorRole+'</role>\n')
- if not thetvdbActorThumbs[j] == '':
- thetvdbActorThumb = thetvdbActorThumbs[j]
- if not thetvdbActorThumb == '':
- if not thetvdbActorThumb == '/banners/actors/0.jpg':
- thetvdbActorThumb = 'http://www.thetvdb.com'+thetvdbActorThumb
- shownfofile.write(' <thumb>'+thetvdbActorThumb+'</thumb>\n')
- shownfofile.write(' </actor>\n')
- j=j+1
- shownfofile.write('</tvshow>\n')
- shownfofile.close()
-
- # banners and fanart
- if DEBUG:
- print 'firstBanner - '+firstBanner
- Base_URL = firstBanner
- WebSock = urllib.urlopen(Base_URL) # Opens a 'Socket' to URL
- WebHTML = WebSock.read() # Reads Contents of URL and saves to Variable
- WebSock.close() # Closes connection to url
- if not WebHTML == '':
- showBanner=dummyShowName+'/folder.jpg'
- if not os.path.isfile(showBanner):
- showBannerFile=open(showBanner, 'w')
- showBannerFile.write(WebHTML)
- showBannerFile.close()
- if DEBUG:
- print 'firstFanart - '+firstFanart
- if not firstFanart == 'http://www.thetvdb.com/banners/actors/0.jpg' or firstFanart == 'http://www.thetvdb.com':
- Base_URL = firstFanart
- WebSock = urllib.urlopen(Base_URL) # Opens a 'Socket' to URL
- WebHTML = WebSock.read() # Reads Contents of URL and saves to Variable
- WebSock.close() # Closes connection to url
- if not WebHTML == '':
- showFanart=dummyShowName+'/fanart.jpg'
- if not os.path.isfile(showFanart):
- showFanartFile=open(showFanart, 'w')
- showFanartFile.write(WebHTML)
- showFanartFile.close()
- done = 1
- # else:
- if not done == 1:
- # use Eyetv EPG data for show nfo
- # test for existing tvshow.nfo
- shownfo=dummyShowName+'/tvshow.nfo'
- if not os.path.isfile(shownfo):
- if DEBUG:
- print 'using Eyetv.com data for show nfo - '+title
- # write tvshow.nfo (Series Info)
- shownfofile=open(shownfo, 'w')
- shownfofile.write('<tvshow>\n')
- shownfofile.write(' <title>'+title+'</title>\n')
- shownfofile.write(' <season>-1</season>\n')
- shownfofile.write(' <episode>0</episode>\n')
- …
Large files files are truncated, but you can click here to view the full file