PageRenderTime 242ms CodeModel.GetById 15ms RepoModel.GetById 0ms app.codeStats 1ms

/lib/subliminal/providers/tvsubtitles.py

https://github.com/junalmeida/Sick-Beard
Python | 191 lines | 133 code | 16 blank | 42 comment | 40 complexity | 7c6d22a54437114e03034cc1ea50d706 MD5 | raw file
  1. # -*- coding: utf-8 -*-
  2. from __future__ import unicode_literals
  3. import io
  4. import logging
  5. import re
  6. import zipfile
  7. import babelfish
  8. import bs4
  9. import requests
  10. from . import Provider
  11. from .. import __version__
  12. #from ..cache import region, SHOW_EXPIRATION_TIME, EPISODE_EXPIRATION_TIME
  13. from ..exceptions import ProviderError
  14. from ..subtitle import Subtitle, fix_line_endings, compute_guess_properties_matches
  15. from ..video import Episode
  16. logger = logging.getLogger(__name__)
  17. babelfish.language_converters.register('tvsubtitles = subliminal.converters.tvsubtitles:TVsubtitlesConverter')
  18. class TVsubtitlesSubtitle(Subtitle):
  19. provider_name = 'tvsubtitles'
  20. def __init__(self, language, series, season, episode, year, id, rip, release, page_link): # @ReservedAssignment
  21. super(TVsubtitlesSubtitle, self).__init__(language, page_link=page_link)
  22. self.series = series
  23. self.season = season
  24. self.episode = episode
  25. self.year = year
  26. self.id = id
  27. self.rip = rip
  28. self.release = release
  29. def compute_matches(self, video):
  30. matches = set()
  31. # series
  32. if video.series and self.series == video.series:
  33. matches.add('series')
  34. # season
  35. if video.season and self.season == video.season:
  36. matches.add('season')
  37. # episode
  38. if video.episode and self.episode == video.episode:
  39. matches.add('episode')
  40. # year
  41. if self.year == video.year:
  42. matches.add('year')
  43. # release_group
  44. if video.release_group and self.release and video.release_group.lower() in self.release.lower():
  45. matches.add('release_group')
  46. """
  47. # video_codec
  48. if video.video_codec and self.release and (video.video_codec in self.release.lower()
  49. or video.video_codec == 'h264' and 'x264' in self.release.lower()):
  50. matches.add('video_codec')
  51. # resolution
  52. if video.resolution and self.rip and video.resolution in self.rip.lower():
  53. matches.add('resolution')
  54. # format
  55. if video.format and self.rip and video.format in self.rip.lower():
  56. matches.add('format')
  57. """
  58. # we don't have the complete filename, so we need to guess the matches separately
  59. # guess video_codec (videoCodec in guessit)
  60. matches |= compute_guess_properties_matches(video, self.release, 'videoCodec')
  61. # guess resolution (screenSize in guessit)
  62. matches |= compute_guess_properties_matches(video, self.rip, 'screenSize')
  63. # guess format
  64. matches |= compute_guess_properties_matches(video, self.rip, 'format')
  65. return matches
  66. class TVsubtitlesProvider(Provider):
  67. languages = {babelfish.Language('por', 'BR')} | {babelfish.Language(l)
  68. for l in ['ara', 'bul', 'ces', 'dan', 'deu', 'ell', 'eng', 'fin', 'fra', 'hun', 'ita', 'jpn', 'kor',
  69. 'nld', 'pol', 'por', 'ron', 'rus', 'spa', 'swe', 'tur', 'ukr', 'zho']}
  70. video_types = (Episode,)
  71. server = 'http://www.tvsubtitles.net'
  72. episode_id_re = re.compile('^episode-\d+\.html$')
  73. subtitle_re = re.compile('^\/subtitle-\d+\.html$')
  74. link_re = re.compile('^(?P<series>[A-Za-z0-9 \'.]+).*\((?P<first_year>\d{4})-\d{4}\)$')
  75. def initialize(self):
  76. self.session = requests.Session()
  77. self.session.headers = {'User-Agent': 'Subliminal/%s' % __version__.split('-')[0]}
  78. def terminate(self):
  79. self.session.close()
  80. def request(self, url, params=None, data=None, method='GET'):
  81. """Make a `method` request on `url` with the given parameters
  82. :param string url: part of the URL to reach with the leading slash
  83. :param dict params: params of the request
  84. :param dict data: data of the request
  85. :param string method: method of the request
  86. :return: the response
  87. :rtype: :class:`bs4.BeautifulSoup`
  88. """
  89. r = self.session.request(method, self.server + url, params=params, data=data, timeout=10)
  90. if r.status_code != 200:
  91. raise ProviderError('Request failed with status code %d' % r.status_code)
  92. return bs4.BeautifulSoup(r.content, ['permissive'])
  93. #@region.cache_on_arguments(expiration_time=SHOW_EXPIRATION_TIME)
  94. def find_show_id(self, series, year=None):
  95. """Find the show id from the `series` with optional `year`
  96. :param string series: series of the episode in lowercase
  97. :param year: year of the series, if any
  98. :type year: int or None
  99. :return: the show id, if any
  100. :rtype: int or None
  101. """
  102. data = {'q': series}
  103. logger.debug('Searching series %r', data)
  104. soup = self.request('/search.php', data=data, method='POST')
  105. links = soup.select('div.left li div a[href^="/tvshow-"]')
  106. if not links:
  107. logger.info('Series %r not found', series)
  108. return None
  109. matched_links = [link for link in links if self.link_re.match(link.string)]
  110. for link in matched_links: # first pass with exact match on series
  111. match = self.link_re.match(link.string)
  112. if match.group('series').lower().replace('.', ' ').strip() == series:
  113. if year is not None and int(match.group('first_year')) != year:
  114. continue
  115. return int(link['href'][8:-5])
  116. for link in matched_links: # less selective second pass
  117. match = self.link_re.match(link.string)
  118. if match.group('series').lower().replace('.', ' ').strip().startswith(series):
  119. if year is not None and int(match.group('first_year')) != year:
  120. continue
  121. return int(link['href'][8:-5])
  122. return None
  123. #@region.cache_on_arguments(expiration_time=EPISODE_EXPIRATION_TIME)
  124. def find_episode_ids(self, show_id, season):
  125. """Find episode ids from the show id and the season
  126. :param int show_id: show id
  127. :param int season: season of the episode
  128. :return: episode ids per episode number
  129. :rtype: dict
  130. """
  131. params = {'show_id': show_id, 'season': season}
  132. logger.debug('Searching episodes %r', params)
  133. soup = self.request('/tvshow-{show_id}-{season}.html'.format(**params))
  134. episode_ids = {}
  135. for row in soup.select('table#table5 tr'):
  136. if not row('a', href=self.episode_id_re):
  137. continue
  138. cells = row('td')
  139. episode_ids[int(cells[0].string.split('x')[1])] = int(cells[1].a['href'][8:-5])
  140. return episode_ids
  141. def query(self, series, season, episode, year=None):
  142. show_id = self.find_show_id(series.lower(), year)
  143. if show_id is None:
  144. return []
  145. episode_ids = self.find_episode_ids(show_id, season)
  146. if episode not in episode_ids:
  147. logger.info('Episode %d not found', episode)
  148. return []
  149. params = {'episode_id': episode_ids[episode]}
  150. logger.debug('Searching episode %r', params)
  151. link = '/episode-{episode_id}.html'.format(**params)
  152. soup = self.request(link)
  153. return [TVsubtitlesSubtitle(babelfish.Language.fromtvsubtitles(row.h5.img['src'][13:-4]), series, season,
  154. episode, year if year and show_id != self.find_show_id(series.lower()) else None,
  155. int(row['href'][10:-5]), row.find('p', title='rip').text.strip() or None,
  156. row.find('p', title='release').text.strip() or None,
  157. self.server + '/subtitle-%d.html' % int(row['href'][10:-5]))
  158. for row in soup('a', href=self.subtitle_re)]
  159. def list_subtitles(self, video, languages):
  160. return [s for s in self.query(video.series, video.season, video.episode, video.year) if s.language in languages]
  161. def download_subtitle(self, subtitle):
  162. r = self.session.get(self.server + '/download-{subtitle_id}.html'.format(subtitle_id=subtitle.id),
  163. timeout=10)
  164. if r.status_code != 200:
  165. raise ProviderError('Request failed with status code %d' % r.status_code)
  166. with zipfile.ZipFile(io.BytesIO(r.content)) as zf:
  167. if len(zf.namelist()) > 1:
  168. raise ProviderError('More than one file to unzip')
  169. subtitle.content = fix_line_endings(zf.read(zf.namelist()[0]))