PageRenderTime 2421ms CodeModel.GetById 35ms RepoModel.GetById 2ms app.codeStats 0ms

/nrlbot.py

https://gitlab.com/g.davis13/nrlbot
Python | 344 lines | 203 code | 49 blank | 92 comment | 21 complexity | 02c0082f89caf75dec6f48d7606bcc9a MD5 | raw file
  1. #!/usr/bin/env python3
  2. # -*- coding: utf-8 -*-
  3. #
  4. # nrlbot.py
  5. #
  6. # Copyright 2016 G. Davis <g.davis13+nrlbot@gmail.com>
  7. #
  8. # This program is free software; you can redistribute it and/or modify
  9. # it under the terms of the GNU General Public License as published by
  10. # the Free Software Foundation; either version 2 of the License, or
  11. # (at your option) any later version.
  12. #
  13. # This program is distributed in the hope that it will be useful,
  14. # but WITHOUT ANY WARRANTY; without even the implied warranty of
  15. # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  16. # GNU General Public License for more details.
  17. #
  18. # You should have received a copy of the GNU General Public License
  19. # along with this program; if not, write to the Free Software
  20. # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
  21. # MA 02110-1301, USA.
  22. """
  23. /r/NRL Match Thread Assistant Bot.
  24. v0.3.0 2016-09-10
  25. TODO:
  26. - logging
  27. - error handling and custom errors
  28. - refactor
  29. - SubmissionParser - get the context of different submissions
  30. - handle reddit downtime?
  31. - more & improved templates
  32. """
  33. from bs4 import BeautifulSoup
  34. from collections import namedtuple
  35. import argparse
  36. import fuzzywuzzy.process
  37. import jinja2
  38. import livestream
  39. import m3u8
  40. import praw
  41. import re
  42. import requests
  43. import sys
  44. TEMPLATE_FILE = 'post.txt'
  45. USER_AGENT = '/r/nrl Match Thread Assistant (by /u/thephoenixfoundation)'
  46. VERSION = '0.3.0'
  47. HOST = 'http://api.new.livestream.com/'
  48. ACCOUNT_ID = 3161248
  49. HEADERS = {
  50. 'host': "api.new.livestream.com",
  51. 'connection': "keep-alive",
  52. 'accept': "*/*",
  53. 'origin': "http://livestream.com",
  54. 'x-requested-with': "XMLHttpRequest",
  55. 'user-agent': "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/53.0.2785.57 Safari/537.36", # noqa
  56. 'dnt': "1",
  57. 'referer': "http://livestream.com/nrl",
  58. 'accept-encoding': "gzip, deflate, sdch",
  59. 'accept-language': "en-AU,en;q=0.8,en-US;q=0.6",
  60. 'cache-control': "no-cache",
  61. }
  62. class NrlbotError(Exception):
  63. """Base class for exceptions in this module."""
  64. pass
  65. class EventNotFoundError(NrlbotError):
  66. """."""
  67. def __init__(self, message):
  68. """."""
  69. self.message = message
  70. class StreamNotFoundError(NrlbotError):
  71. """."""
  72. def __init__(self, message):
  73. """."""
  74. self.message = message
  75. class EventNotLiveError(NrlbotError):
  76. """."""
  77. def __init__(self, message):
  78. """."""
  79. self.message = message
  80. def make_soup(url):
  81. """Load page & return data for parsing."""
  82. r = requests.get(url)
  83. soup = BeautifulSoup(r.text, 'html.parser')
  84. return soup
  85. def find_config_script(soup):
  86. """."""
  87. script = soup.find('script', string=re.compile('window.config'))
  88. return script.string
  89. def clean_config_script(script):
  90. """."""
  91. return script[script.find('{'):-1]
  92. def find_streams(url):
  93. """."""
  94. playlist = m3u8.load(url)
  95. streams = [{'res': resolution_formatter(stream.stream_info.resolution),
  96. 'url': stream.uri} for stream in playlist.playlists]
  97. return streams
  98. def resolution_formatter(dims_tuple):
  99. """."""
  100. return 'x'.join(str(dim) for dim in dims_tuple)
  101. def render_output(streams, template_file=TEMPLATE_FILE):
  102. """."""
  103. with open(template_file, 'r') as f:
  104. template = f.read()
  105. j2_template = jinja2.Template(template)
  106. return j2_template.render(streams=streams)
  107. def is_match_thread(submission):
  108. """Determine if a given post is an /r/NRL match thread.
  109. This is based on the current match thread format used by the /r/NRL
  110. moderators. If/when that format changes, the checks used will need to be
  111. updated.
  112. Returns a bool.
  113. """
  114. result = True # default to true, checks will set to false if they fail
  115. # is the author the automoderator?
  116. if submission.author.name != 'NRLgamethread':
  117. print('failed on author')
  118. result = False
  119. # are we in /r/nrl?
  120. elif submission.subreddit.display_name != 'nrl':
  121. print('failed on subreddit name')
  122. result = False
  123. # is the first word in the title "round"?
  124. # commented out in 0.2.4: "fade bowl" breaks this
  125. # elif submission.title.split()[0].lower() != 'round':
  126. # print('failed on "round"')
  127. # result = False
  128. #
  129. # are the last two words in the title "discussion thread"?
  130. # TODO: break this down a bit so it can be more easily fixed if the
  131. # thread format changes
  132. elif submission.title.split("|")[1].lower()[1:] != 'match discussion thread':
  133. print('failed on "discussion thread"')
  134. result = False
  135. return result
  136. def normalise_team_name(name):
  137. """Convert a string referencing an NRL team name to the full name.
  138. For example, if name is 'tigers' or 'wests', will return 'Wests Tigers'.
  139. Also works for the three-letter acronyms in the markdown tags used to post
  140. the team logos in the match threads.
  141. """
  142. normalised_name = None
  143. # for starters, we can do a straight map from team three letter acronyms
  144. # used in the match thread to proper team name
  145. names_tlas = ('bri', 'can', 'bul', 'cro', 'gld', 'man', 'mel', 'war',
  146. 'new', 'nql', 'par', 'pen', 'sou', 'sgi', 'syd', 'wst')
  147. names_full = ('Brisbane Broncos',
  148. 'Canberra Raiders',
  149. 'Canterbury-Bankstown Bulldogs',
  150. 'Cronulla Sharks',
  151. 'Gold Coast Titans',
  152. 'Manly-Warringah Sea Eagles',
  153. 'Melbourne Storm',
  154. 'New Zealand Warriors',
  155. 'Newcastle Knights',
  156. 'North Queensland Cowboys',
  157. 'Parramatta Eels',
  158. 'Penrith Panthers',
  159. 'South Sydney Rabbitohs',
  160. 'St George-Illawarra Dragons',
  161. 'Sydney Roosters',
  162. 'Wests Tigers')
  163. # make a dictionary mapping TLAs to full names
  164. tla_to_proper = dict(zip(names_tlas, names_full))
  165. if name in tla_to_proper:
  166. # If 'name' is a TLA from a match thread this will give us the proper
  167. # team name
  168. normalised_name = tla_to_proper[name]
  169. else:
  170. # Not a TLA
  171. # Rather than go through all of livestream's source to figure out
  172. # what terms they use (Tigers? Wests? Wests Tigers?), I think some
  173. # fuzzy string matching will do the trick, and is more flexible in the
  174. # long run (if it's reliable)
  175. normalised_name = fuzzywuzzy.process.extractOne(name, names_full)[0]
  176. return normalised_name
  177. def identify_game(submission):
  178. """Attempt to determine match details from a match thread.
  179. From the match thread submission we try to extract:
  180. - home_team
  181. - away_team
  182. We actually only need one team name to figure out which match this is (as
  183. each team obviously only plays one game per round) but for the sake of
  184. completeness we find both.
  185. Returns a namedtuple of type Game with keys: home, away.
  186. """
  187. # body text of the match thread containing match details as posted by the
  188. # /r/nrl automoderator
  189. text = submission.selftext
  190. # HF 0.2.5: default round_no to None for "fade_bowl"
  191. # it's not used anywhere yet anyway & this will help with finals
  192. round_no = None
  193. # find round number by searching for 'Round' and then grab the digits
  194. for line in text.splitlines():
  195. if 'round' in line.lower():
  196. round_no = ''.join(c for c in line if c.isdigit())
  197. break
  198. # find team names
  199. # we use a regex to match the codes used for the team logos as
  200. # it's predictable and unlikely to change, eg: canberra away is (#can-a)
  201. # this regular expression will match to '#can-h' & return 'can-h'
  202. regex = re.compile('(?<=#)\w{3}(?:-\w)')
  203. teams = re.findall(regex, text)
  204. # re.findall returns a list of matching strings
  205. # loop over it & use the succeeding '-h' or '-a' to determine home & away
  206. for team in teams:
  207. if team[-1] == 'h':
  208. home_team = normalise_team_name(team[:3])
  209. if team[-1] == 'a':
  210. away_team = normalise_team_name(team[:3])
  211. # return results in a nice object
  212. Game = namedtuple('game', ['round', 'home', 'away'])
  213. return Game(round_no, home_team, away_team)
  214. def find_event_playlist(game):
  215. """."""
  216. nrl_account = livestream.Account(ACCOUNT_ID)
  217. events = nrl_account.events()
  218. print('find_event_playlist')
  219. print('Game:')
  220. print(game)
  221. for event in events:
  222. event.reload()
  223. home = event.full_name.split('v')[0].strip().lower()
  224. print('ls home (raw): ' + home)
  225. away = event.full_name.split('v')[1].strip().lower()
  226. print('ls away (raw): ' + away)
  227. home_n = normalise_team_name(home)
  228. print('ls home (normalised): ' + home_n)
  229. away_n = normalise_team_name(away)
  230. print('ls away (normalised): ' + away_n)
  231. if game.home == home_n and game.away == away_n:
  232. print('event found')
  233. if event.is_live:
  234. print('event is live')
  235. return event.m3u8_url
  236. else:
  237. print('event is not live')
  238. raise EventNotLiveError('event is not live')
  239. print('event not found')
  240. raise EventNotFoundError('event not found')
  241. def main(args):
  242. """/r/NRL Match Thread Assistant."""
  243. ag_desc = '/r/NRL Match Thread Assistant'
  244. parser = argparse.ArgumentParser(description=ag_desc)
  245. parser.add_argument('username', help='Bot account username')
  246. parser.add_argument('password', help='Bot account password')
  247. parser.add_argument('--version', action='version', version=VERSION)
  248. args = parser.parse_args()
  249. r = praw.Reddit(USER_AGENT)
  250. r.login(username=args.username, password=args.password)
  251. for c in praw.helpers.comment_stream(r, 'nrl'):
  252. if c.body == 'nrlbot streams':
  253. if is_match_thread(c.submission):
  254. game = identify_game(c.submission)
  255. if not game:
  256. print('failed to identify game from match thread')
  257. c.reply("""Sorry, I couldn't identify this match from
  258. the submission text. This is an error.""")
  259. continue
  260. try:
  261. playlist_url = find_event_playlist(game)
  262. except EventNotFoundError:
  263. c.reply('Livestream event not found :(')
  264. continue
  265. except EventNotLiveError:
  266. c.reply('This event is not currently live :(')
  267. continue
  268. streams = find_streams(playlist_url)
  269. stream_text = render_output(streams=streams)
  270. if not stream_text:
  271. print('failed to load streams')
  272. c.reply("""Sorry, I couldn't load the playlist file from
  273. livestream. This may be an error, or it it may be a
  274. temporary network problem.""")
  275. continue
  276. # post a reply
  277. c.reply(stream_text)
  278. else:
  279. print('not from a match thread')
  280. pass
  281. if __name__ == '__main__':
  282. sys.exit(main(sys.argv))