primewire.py | searchcode

/script.module.universalscrapers/lib/universalscrapers/scraperplugins/primewire.py

https://bitbucket.org/Leia18/gmc · Python · 207 lines · 178 code · 29 blank · 0 comment · 46 complexity · e82b499e04d8044f9d7190aaee79adf0 MD5 · raw file

import base64
import re,xbmcaddon,time
import urllib
import urlparse

from BeautifulSoup import BeautifulSoup
from universalscrapers import proxy
from ..common import clean_title, replaceHTMLCodes, filter_host,send_log,error_log
from ..scraper import Scraper
import xbmcaddon

dev_log = xbmcaddon.Addon('script.module.universalscrapers').getSetting("dev_log")


class Primewire(Scraper):
    domains = ['primewire.ag']
    name = "primewire"

    def __init__(self):
        self.base_link = xbmcaddon.Addon('script.module.universalscrapers').getSetting("%s_baseurl" % (self.name))
        self.search_link = '%s/index.php?search' % (self.base_link)
        self.moviesearch_link = '/index.php?search_keywords=%s&key=%s&search_section=1'
        self.tvsearch_link = '/index.php?search_keywords=%s&key=%s&search_section=2'
        if dev_log=='true':
            self.start_time = time.time()

    def scrape_movie(self, title, year, imdb, debrid = False):
        try:
            html = BeautifulSoup(self.get_html(title, self.moviesearch_link))
            index_items = html.findAll('div', attrs={'class': 'index_item index_item_ie'})
            title = 'watch' + clean_title(title).replace(": ", "").replace("'", "")
            years = ['(%s)' % str(year), '(%s)' % str(int(year) + 1), '(%s)' % str(int(year) - 1)]
            fallback = None

            for index_item in index_items:
                try:
                    links = index_item.findAll('a')
                    for link in links:
                        href = link['href']
                        link_title = link['title']

                        if any(x in link_title for x in years) or not "(" in link_title:
                            try:
                                href = urlparse.parse_qs(urlparse.urlparse(href).query)['u'][0]
                            except:
                                pass
                            try:
                                href = urlparse.parse_qs(urlparse.urlparse(href).query)['q'][0]
                            except:
                                pass
                            if title.lower() == clean_title(link_title):

                                if '(%s)' % str(year) in link_title:
                                    return self.sources(href)
                                else:
                                    fallback = href

                except:
                    continue
            return []
        except Exception, argument:        
            if dev_log == 'true':
                error_log(self.name,'Check Search')
            return []


    def scrape_episode(self, title, show_year, year, season, episode, imdb, tvdb, debrid = False):
        try:
            html = BeautifulSoup(self.get_html(title, self.tvsearch_link))
            index_items = html.findAll('div', attrs={'class': re.compile('index_item.+?')})
            title = 'watch' + clean_title(" ".join(title.translate(None, '\'"?:!@#$&-,')))

            for index_item in index_items:
                try:
                    links = index_item.findAll('a')
                    for link in links:
                        href = link['href']
                        link_title = link['title']
                        try:
                            href = urlparse.parse_qs(urlparse.urlparse(href).query)['u'][0]
                        except:
                            pass
                        try:
                            href = urlparse.parse_qs(urlparse.urlparse(href).query)['q'][0]
                        except:
                            pass
                        clean_link_title = clean_title(" ".join(link_title.encode().translate(None, '\'"?:!@#$&-,')))
                        if title == clean_link_title:  # href is the show page relative url
                            show_url = urlparse.urljoin(self.base_link, href)
                            html = BeautifulSoup(proxy.get(show_url, 'tv_episode_item'))

                            seasons = html.findAll('div', attrs={'class': 'show_season'})
                            for scraped_season in seasons:
                                if scraped_season['data-id'] == season:
                                    tv_episode_items = scraped_season.findAll('div', attrs={'class': 'tv_episode_item'})
                                    for tv_episode_item in tv_episode_items:
                                        links = tv_episode_item.findAll('a')
                                        for link in links:
                                            if link.contents[0].strip() == "E%s" % episode:
                                                episode_href = link['href']
                                                try:
                                                    episode_href = \
                                                        urlparse.parse_qs(urlparse.urlparse(episode_href).query)['u'][0]
                                                except:
                                                    pass
                                                try:
                                                    episode_href = \
                                                        urlparse.parse_qs(urlparse.urlparse(episode_href).query)['q'][0]
                                                except:
                                                    pass
                                                return self.sources(episode_href)
                except:
                    continue
            return []
        except Exception, argument:        
            if dev_log == 'true':
                error_log(self.name,'Check Search')
            return []


    def get_key(self):
        url = self.search_link
        html = proxy.get(url, 'searchform')
        parsed_html = BeautifulSoup(html)
        key = parsed_html.findAll('input', attrs={'name': 'key'})[0]["value"]
        return key

    def get_html(self, title, search_link):
        key = self.get_key()
        query = search_link % (
        urllib.quote_plus(" ".join(title.translate(None, '\'"?:!@#$&-,').split()).rsplit(':', 1)[0]), key)
        query = urlparse.urljoin(self.base_link, query)

        html = proxy.get(query, ('index_item'))
        if 'index_item' in html:
            if 'page=2' in html or 'page%3D2' in html:
                html2 = proxy.get(query + '&page=2', 'index_item')
                html += html2
            return html

    def sources(self, url):
        sources = []
        try:
            if url == None: return sources

            url = urlparse.urljoin(self.base_link, url)
            html = proxy.get(url, 'choose_tabs')
            parsed_html = BeautifulSoup(html)

            table_bodies = parsed_html.findAll('tbody')
            count = 0
            for table_body in table_bodies:
                try:
                    link = table_body.findAll('a')[0]["href"]
                    try:
                        link = urlparse.parse_qs(urlparse.urlparse(link).query)['u'][
                            0]  # replace link with ?u= part if present
                    except:
                        pass
                    try:
                        link = urlparse.parse_qs(urlparse.urlparse(link).query)['q'][
                            0]  # replace link with ?q= part if present
                    except:
                        pass

                    link = urlparse.parse_qs(urlparse.urlparse(link).query)['url'][
                        0]  # replace link with ?url= part if present
                    link = base64.b64decode(link)  # decode base 64

                    if link.startswith("//"):
                        link = "http:" + link
                    link = replaceHTMLCodes(link)
                    link = link.encode('utf-8')

                    host = re.findall('([\w]+[.][\w]+)$', urlparse.urlparse(link.strip().lower()).netloc)[0]
                    host = replaceHTMLCodes(host)
                    host = host.encode('utf-8')

                    quality = table_body.findAll('span')[0]["class"]
                    if quality == 'quality_cam' or quality == 'quality_ts':
                        quality = 'CAM'
                    elif quality == 'quality_dvd':
                        quality = 'SD'
                        
                    if not filter_host(host):
                        continue
                    count +=1
                    sources.append(
                        {'source': host, 'quality': quality, 'scraper': 'Primewire', 'url': link, 'direct': False})


                except:
                    pass
            if dev_log=='true':
                end_time = time.time() - self.start_time
                send_log(self.name,end_time,count)
            return sources
        except:
            return sources

    @classmethod
    def get_settings_xml(clas):
        xml = [
            '<setting id="%s_enabled" ''type="bool" label="Enabled" default="true"/>' % (clas.name),
            '<setting id= "%s_baseurl" type="text" label="Base Url" default="http://www.primewire.ag"/>' % (clas.name)
        ]
        return xml
Tech Fingerprint

Alerts (16)

'def' Ensure functions have docstrings for documentation
27 67 121 128 202
'try:' Ensure try blocks have corresponding except or finally blocks
28
Complexity hotspot; lines 42 to 43 (total complexity: 4)
42 43
'except:' Avoid catching all exceptions; specify exception types to catch only expected errors
45 49 58 104 109 112 192 198