PageRenderTime 45ms CodeModel.GetById 12ms RepoModel.GetById 0ms app.codeStats 0ms

/script.module.universalscrapers/lib/universalscrapers/scraperplugins/primewire.py

https://bitbucket.org/Leia18/gmc
Python | 207 lines | 185 code | 22 blank | 0 comment | 27 complexity | e82b499e04d8044f9d7190aaee79adf0 MD5 | raw file
Possible License(s): GPL-3.0
  1. import base64
  2. import re,xbmcaddon,time
  3. import urllib
  4. import urlparse
  5. from BeautifulSoup import BeautifulSoup
  6. from universalscrapers import proxy
  7. from ..common import clean_title, replaceHTMLCodes, filter_host,send_log,error_log
  8. from ..scraper import Scraper
  9. import xbmcaddon
  10. dev_log = xbmcaddon.Addon('script.module.universalscrapers').getSetting("dev_log")
  11. class Primewire(Scraper):
  12. domains = ['primewire.ag']
  13. name = "primewire"
  14. def __init__(self):
  15. self.base_link = xbmcaddon.Addon('script.module.universalscrapers').getSetting("%s_baseurl" % (self.name))
  16. self.search_link = '%s/index.php?search' % (self.base_link)
  17. self.moviesearch_link = '/index.php?search_keywords=%s&key=%s&search_section=1'
  18. self.tvsearch_link = '/index.php?search_keywords=%s&key=%s&search_section=2'
  19. if dev_log=='true':
  20. self.start_time = time.time()
  21. def scrape_movie(self, title, year, imdb, debrid = False):
  22. try:
  23. html = BeautifulSoup(self.get_html(title, self.moviesearch_link))
  24. index_items = html.findAll('div', attrs={'class': 'index_item index_item_ie'})
  25. title = 'watch' + clean_title(title).replace(": ", "").replace("'", "")
  26. years = ['(%s)' % str(year), '(%s)' % str(int(year) + 1), '(%s)' % str(int(year) - 1)]
  27. fallback = None
  28. for index_item in index_items:
  29. try:
  30. links = index_item.findAll('a')
  31. for link in links:
  32. href = link['href']
  33. link_title = link['title']
  34. if any(x in link_title for x in years) or not "(" in link_title:
  35. try:
  36. href = urlparse.parse_qs(urlparse.urlparse(href).query)['u'][0]
  37. except:
  38. pass
  39. try:
  40. href = urlparse.parse_qs(urlparse.urlparse(href).query)['q'][0]
  41. except:
  42. pass
  43. if title.lower() == clean_title(link_title):
  44. if '(%s)' % str(year) in link_title:
  45. return self.sources(href)
  46. else:
  47. fallback = href
  48. except:
  49. continue
  50. return []
  51. except Exception, argument:
  52. if dev_log == 'true':
  53. error_log(self.name,'Check Search')
  54. return []
  55. def scrape_episode(self, title, show_year, year, season, episode, imdb, tvdb, debrid = False):
  56. try:
  57. html = BeautifulSoup(self.get_html(title, self.tvsearch_link))
  58. index_items = html.findAll('div', attrs={'class': re.compile('index_item.+?')})
  59. title = 'watch' + clean_title(" ".join(title.translate(None, '\'"?:!@#$&-,')))
  60. for index_item in index_items:
  61. try:
  62. links = index_item.findAll('a')
  63. for link in links:
  64. href = link['href']
  65. link_title = link['title']
  66. try:
  67. href = urlparse.parse_qs(urlparse.urlparse(href).query)['u'][0]
  68. except:
  69. pass
  70. try:
  71. href = urlparse.parse_qs(urlparse.urlparse(href).query)['q'][0]
  72. except:
  73. pass
  74. clean_link_title = clean_title(" ".join(link_title.encode().translate(None, '\'"?:!@#$&-,')))
  75. if title == clean_link_title: # href is the show page relative url
  76. show_url = urlparse.urljoin(self.base_link, href)
  77. html = BeautifulSoup(proxy.get(show_url, 'tv_episode_item'))
  78. seasons = html.findAll('div', attrs={'class': 'show_season'})
  79. for scraped_season in seasons:
  80. if scraped_season['data-id'] == season:
  81. tv_episode_items = scraped_season.findAll('div', attrs={'class': 'tv_episode_item'})
  82. for tv_episode_item in tv_episode_items:
  83. links = tv_episode_item.findAll('a')
  84. for link in links:
  85. if link.contents[0].strip() == "E%s" % episode:
  86. episode_href = link['href']
  87. try:
  88. episode_href = \
  89. urlparse.parse_qs(urlparse.urlparse(episode_href).query)['u'][0]
  90. except:
  91. pass
  92. try:
  93. episode_href = \
  94. urlparse.parse_qs(urlparse.urlparse(episode_href).query)['q'][0]
  95. except:
  96. pass
  97. return self.sources(episode_href)
  98. except:
  99. continue
  100. return []
  101. except Exception, argument:
  102. if dev_log == 'true':
  103. error_log(self.name,'Check Search')
  104. return []
  105. def get_key(self):
  106. url = self.search_link
  107. html = proxy.get(url, 'searchform')
  108. parsed_html = BeautifulSoup(html)
  109. key = parsed_html.findAll('input', attrs={'name': 'key'})[0]["value"]
  110. return key
  111. def get_html(self, title, search_link):
  112. key = self.get_key()
  113. query = search_link % (
  114. urllib.quote_plus(" ".join(title.translate(None, '\'"?:!@#$&-,').split()).rsplit(':', 1)[0]), key)
  115. query = urlparse.urljoin(self.base_link, query)
  116. html = proxy.get(query, ('index_item'))
  117. if 'index_item' in html:
  118. if 'page=2' in html or 'page%3D2' in html:
  119. html2 = proxy.get(query + '&page=2', 'index_item')
  120. html += html2
  121. return html
  122. def sources(self, url):
  123. sources = []
  124. try:
  125. if url == None: return sources
  126. url = urlparse.urljoin(self.base_link, url)
  127. html = proxy.get(url, 'choose_tabs')
  128. parsed_html = BeautifulSoup(html)
  129. table_bodies = parsed_html.findAll('tbody')
  130. count = 0
  131. for table_body in table_bodies:
  132. try:
  133. link = table_body.findAll('a')[0]["href"]
  134. try:
  135. link = urlparse.parse_qs(urlparse.urlparse(link).query)['u'][
  136. 0] # replace link with ?u= part if present
  137. except:
  138. pass
  139. try:
  140. link = urlparse.parse_qs(urlparse.urlparse(link).query)['q'][
  141. 0] # replace link with ?q= part if present
  142. except:
  143. pass
  144. link = urlparse.parse_qs(urlparse.urlparse(link).query)['url'][
  145. 0] # replace link with ?url= part if present
  146. link = base64.b64decode(link) # decode base 64
  147. if link.startswith("//"):
  148. link = "http:" + link
  149. link = replaceHTMLCodes(link)
  150. link = link.encode('utf-8')
  151. host = re.findall('([\w]+[.][\w]+)$', urlparse.urlparse(link.strip().lower()).netloc)[0]
  152. host = replaceHTMLCodes(host)
  153. host = host.encode('utf-8')
  154. quality = table_body.findAll('span')[0]["class"]
  155. if quality == 'quality_cam' or quality == 'quality_ts':
  156. quality = 'CAM'
  157. elif quality == 'quality_dvd':
  158. quality = 'SD'
  159. if not filter_host(host):
  160. continue
  161. count +=1
  162. sources.append(
  163. {'source': host, 'quality': quality, 'scraper': 'Primewire', 'url': link, 'direct': False})
  164. except:
  165. pass
  166. if dev_log=='true':
  167. end_time = time.time() - self.start_time
  168. send_log(self.name,end_time,count)
  169. return sources
  170. except:
  171. return sources
  172. @classmethod
  173. def get_settings_xml(clas):
  174. xml = [
  175. '<setting id="%s_enabled" ''type="bool" label="Enabled" default="true"/>' % (clas.name),
  176. '<setting id= "%s_baseurl" type="text" label="Base Url" default="http://www.primewire.ag"/>' % (clas.name)
  177. ]
  178. return xml