PageRenderTime 34ms CodeModel.GetById 15ms RepoModel.GetById 0ms app.codeStats 0ms

/film20/showtimes/base_fetcher.py

https://bitbucket.org/filmaster/filmaster-stable/
Python | 193 lines | 188 code | 5 blank | 0 comment | 1 complexity | 9be7d995e3d39cb53ec9739a92057fdf MD5 | raw file
Possible License(s): BSD-2-Clause, GPL-2.0, BSD-3-Clause, JSON
  1. from django.core.management.base import CommandError
  2. from django.db.models import Max
  3. from beautifulsoup import BeautifulSoup
  4. from optparse import make_option
  5. import logging
  6. import datetime
  7. import re
  8. import sys
  9. import pytz
  10. from film20.showtimes.models import FilmOnChannel, Screening, Country
  11. from film20.utils.texts import normalized_text
  12. from film20.core.management.base import BaseCommand
  13. logger = logging.getLogger(__name__)
  14. quote_re = re.compile(r'("[^"]*")([^\s])')
  15. BeautifulSoup.MARKUP_MASSAGE += [
  16. (re.compile(r"<[^>]+>"), lambda tag:quote_re.sub(r"\1 \2", tag.group(0))),
  17. (re.compile(r"<!\[CDATA\[.*?\]\]>",re.I|re.M|re.DOTALL),lambda x:''),
  18. (re.compile("<script[^>]*?>.*?</script>",re.I|re.M|re.DOTALL),lambda x:''),
  19. ]
  20. def class_test(class_name):
  21. def test(tag):
  22. return any((class_name in value.split(' ')) for (name, value) in tag.attrs if name=='class')
  23. return test
  24. def tag_text(tag, **kw):
  25. return (' '.join(unicode(t) for t in tag.findAll(text=True, **kw))).strip()
  26. title_re = re.compile(r'(.*)\([^\)]+\)\s*')
  27. def normalized_title(t):
  28. m = title_re.match(t)
  29. return normalized_text(m and m.group(1) or t)
  30. class BaseFetcher(BaseCommand):
  31. option_list = BaseCommand.option_list + (
  32. make_option('--all',
  33. action='store_true',
  34. dest='all',
  35. default=False,
  36. help='All locations'
  37. ),
  38. make_option('--day',
  39. action='store',
  40. dest='day',
  41. default=0,
  42. type='int',
  43. help='Day (0-today, 1-tomorrow, etc)'
  44. ),
  45. make_option('--disable-if-no-movie',
  46. action='store_true',
  47. dest='disable_no_movie',
  48. default=False,
  49. help='Disable channels without movies',
  50. ),
  51. )
  52. class ScreeningsNotFound(Exception):
  53. pass
  54. def soup(self, data):
  55. return BeautifulSoup(data)
  56. def handle_base_options(self, *args, **opt):
  57. """
  58. self.logger = logging.getLogger(__name__)
  59. self.logger.addHandler(logging.StreamHandler(sys.stdout))
  60. LEVEL = (logging.WARNING, logging.INFO, logging.DEBUG)
  61. self.logger.setLevel(LEVEL[int(opt.get('verbosity',0))])
  62. """
  63. self.logger = logger
  64. self.day = opt.get('day')
  65. self.date = datetime.date.today() + datetime.timedelta(days=self.day)
  66. self.all = opt.get('all')
  67. self.opt = opt
  68. def run(self, *args, **opt):
  69. raise NotImplementedError
  70. def handle(self, *args, **opt):
  71. self.handle_base_options(*args, **opt)
  72. self.run(*args, **opt)
  73. @classmethod
  74. def time_to_utc(cls, t, timezone_id=None):
  75. if not t.tzinfo and timezone_id:
  76. t = pytz.timezone(timezone_id).localize(t)
  77. if t.tzinfo:
  78. t=t.astimezone(pytz.utc).replace(tzinfo=None)
  79. return t
  80. def update_movie(self, channel, movie, timezone_id=None):
  81. times = [self.time_to_utc(t, timezone_id) for t in movie.get('times', ())]
  82. if channel.last_screening_time:
  83. times = filter(lambda t: t > channel.last_screening_time, times)
  84. if times:
  85. fetcher_name = getattr(self, 'name', '')
  86. film = FilmOnChannel.objects.match(movie, source=fetcher_name)
  87. logger.info("%s %s", film, "matched" if film.film else "unmatched")
  88. for t in times:
  89. Screening.objects.get_or_create(channel=channel, film=film, utc_time=t)
  90. return True
  91. else:
  92. logger.debug('no newer screening times for %r on %r', movie, channel)
  93. @classmethod
  94. def parse_tm(cls, date, t):
  95. try:
  96. hm = tuple(map(int, t.split(':')))
  97. return datetime.datetime(*(date.timetuple()[:3]+hm))
  98. except Exception, e:
  99. logger.warning("%s: cannot parse: %s", e, t)
  100. def last_screening_date(self, channel):
  101. from film20.showtimes.showtimes_helper import DAY_START_DELTA
  102. if channel.last_screening_time:
  103. return (pytz.utc.localize(channel.last_screening_time).astimezone(self.channel_timezone(channel)) - DAY_START_DELTA).date()
  104. def channel_timezone(self, channel):
  105. raise NotImplementedError()
  106. def update_channel_screenings(self, channel):
  107. logger.info("update screenings for %s", channel)
  108. # auto detect start date using existing channel screenings
  109. date = self.last_screening_date(channel)
  110. today = datetime.datetime.now(self.channel_timezone(channel)).date()
  111. if date:
  112. logger.debug("last date form %s: %s", channel, date)
  113. date = date + datetime.timedelta(days=1)
  114. date = max(date, today)
  115. else:
  116. date = today
  117. cnt = 0
  118. start_date = date
  119. err = False
  120. try:
  121. while True:
  122. for movie in self.fetch_movies(channel, date):
  123. logger.info("movie %s in %s", movie, channel)
  124. if self.update_movie(channel, movie):
  125. cnt += 1
  126. date += datetime.timedelta(days=1)
  127. except self.ScreeningsNotFound, e:
  128. logger.info("no more screenings for %s at %s", channel, date)
  129. except Exception, e:
  130. logger.exception(e)
  131. err = True
  132. if cnt:
  133. max_time = channel.screening_set.aggregate(Max('utc_time')).get('utc_time__max')
  134. if max_time:
  135. channel.last_screening_time = max_time
  136. channel.save()
  137. logger.debug("last screening time for %s: %s", channel, max_time)
  138. elif self.opt['disable_no_movie'] and (date - start_date).days >= 3 and not err:
  139. channel.is_active=False
  140. channel.save()
  141. logger.info("channel %s disabled", channel)
  142. class BaseCinemaFetcher(BaseFetcher):
  143. option_list = BaseFetcher.option_list + (
  144. make_option('--update-cinemas',
  145. action='store_true',
  146. dest='update_cinemas',
  147. default=False,
  148. help='Update cinemas at a given location'
  149. ),
  150. )
  151. class BaseTVFetcher(BaseFetcher):
  152. option_list = BaseFetcher.option_list + (
  153. make_option('--update-channels',
  154. action='store_true',
  155. dest='update_channels',
  156. default=False,
  157. help='Update channels'
  158. ),
  159. make_option('--country',
  160. dest='country',
  161. ),
  162. )
  163. def handle_base_options(self, *args, **opt):
  164. super(BaseTVFetcher, self).handle_base_options(*args, **opt)
  165. code = opt.get('country')
  166. self.country = code and Country.objects.get(code=code.upper())