/nrlbot.py
Python | 344 lines | 203 code | 49 blank | 92 comment | 21 complexity | 02c0082f89caf75dec6f48d7606bcc9a MD5 | raw file
- #!/usr/bin/env python3
- # -*- coding: utf-8 -*-
- #
- # nrlbot.py
- #
- # Copyright 2016 G. Davis <g.davis13+nrlbot@gmail.com>
- #
- # This program is free software; you can redistribute it and/or modify
- # it under the terms of the GNU General Public License as published by
- # the Free Software Foundation; either version 2 of the License, or
- # (at your option) any later version.
- #
- # This program is distributed in the hope that it will be useful,
- # but WITHOUT ANY WARRANTY; without even the implied warranty of
- # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- # GNU General Public License for more details.
- #
- # You should have received a copy of the GNU General Public License
- # along with this program; if not, write to the Free Software
- # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
- # MA 02110-1301, USA.
- """
- /r/NRL Match Thread Assistant Bot.
- v0.3.0 2016-09-10
- TODO:
- - logging
- - error handling and custom errors
- - refactor
- - SubmissionParser - get the context of different submissions
- - handle reddit downtime?
- - more & improved templates
- """
- from bs4 import BeautifulSoup
- from collections import namedtuple
- import argparse
- import fuzzywuzzy.process
- import jinja2
- import livestream
- import m3u8
- import praw
- import re
- import requests
- import sys
- TEMPLATE_FILE = 'post.txt'
- USER_AGENT = '/r/nrl Match Thread Assistant (by /u/thephoenixfoundation)'
- VERSION = '0.3.0'
- HOST = 'http://api.new.livestream.com/'
- ACCOUNT_ID = 3161248
- HEADERS = {
- 'host': "api.new.livestream.com",
- 'connection': "keep-alive",
- 'accept': "*/*",
- 'origin': "http://livestream.com",
- 'x-requested-with': "XMLHttpRequest",
- 'user-agent': "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/53.0.2785.57 Safari/537.36", # noqa
- 'dnt': "1",
- 'referer': "http://livestream.com/nrl",
- 'accept-encoding': "gzip, deflate, sdch",
- 'accept-language': "en-AU,en;q=0.8,en-US;q=0.6",
- 'cache-control': "no-cache",
- }
- class NrlbotError(Exception):
- """Base class for exceptions in this module."""
- pass
- class EventNotFoundError(NrlbotError):
- """."""
- def __init__(self, message):
- """."""
- self.message = message
- class StreamNotFoundError(NrlbotError):
- """."""
- def __init__(self, message):
- """."""
- self.message = message
- class EventNotLiveError(NrlbotError):
- """."""
- def __init__(self, message):
- """."""
- self.message = message
- def make_soup(url):
- """Load page & return data for parsing."""
- r = requests.get(url)
- soup = BeautifulSoup(r.text, 'html.parser')
- return soup
- def find_config_script(soup):
- """."""
- script = soup.find('script', string=re.compile('window.config'))
- return script.string
- def clean_config_script(script):
- """."""
- return script[script.find('{'):-1]
- def find_streams(url):
- """."""
- playlist = m3u8.load(url)
- streams = [{'res': resolution_formatter(stream.stream_info.resolution),
- 'url': stream.uri} for stream in playlist.playlists]
- return streams
- def resolution_formatter(dims_tuple):
- """."""
- return 'x'.join(str(dim) for dim in dims_tuple)
- def render_output(streams, template_file=TEMPLATE_FILE):
- """."""
- with open(template_file, 'r') as f:
- template = f.read()
- j2_template = jinja2.Template(template)
- return j2_template.render(streams=streams)
- def is_match_thread(submission):
- """Determine if a given post is an /r/NRL match thread.
- This is based on the current match thread format used by the /r/NRL
- moderators. If/when that format changes, the checks used will need to be
- updated.
- Returns a bool.
- """
- result = True # default to true, checks will set to false if they fail
- # is the author the automoderator?
- if submission.author.name != 'NRLgamethread':
- print('failed on author')
- result = False
- # are we in /r/nrl?
- elif submission.subreddit.display_name != 'nrl':
- print('failed on subreddit name')
- result = False
- # is the first word in the title "round"?
- # commented out in 0.2.4: "fade bowl" breaks this
- # elif submission.title.split()[0].lower() != 'round':
- # print('failed on "round"')
- # result = False
- #
- # are the last two words in the title "discussion thread"?
- # TODO: break this down a bit so it can be more easily fixed if the
- # thread format changes
- elif submission.title.split("|")[1].lower()[1:] != 'match discussion thread':
- print('failed on "discussion thread"')
- result = False
- return result
- def normalise_team_name(name):
- """Convert a string referencing an NRL team name to the full name.
- For example, if name is 'tigers' or 'wests', will return 'Wests Tigers'.
- Also works for the three-letter acronyms in the markdown tags used to post
- the team logos in the match threads.
- """
- normalised_name = None
- # for starters, we can do a straight map from team three letter acronyms
- # used in the match thread to proper team name
- names_tlas = ('bri', 'can', 'bul', 'cro', 'gld', 'man', 'mel', 'war',
- 'new', 'nql', 'par', 'pen', 'sou', 'sgi', 'syd', 'wst')
- names_full = ('Brisbane Broncos',
- 'Canberra Raiders',
- 'Canterbury-Bankstown Bulldogs',
- 'Cronulla Sharks',
- 'Gold Coast Titans',
- 'Manly-Warringah Sea Eagles',
- 'Melbourne Storm',
- 'New Zealand Warriors',
- 'Newcastle Knights',
- 'North Queensland Cowboys',
- 'Parramatta Eels',
- 'Penrith Panthers',
- 'South Sydney Rabbitohs',
- 'St George-Illawarra Dragons',
- 'Sydney Roosters',
- 'Wests Tigers')
- # make a dictionary mapping TLAs to full names
- tla_to_proper = dict(zip(names_tlas, names_full))
- if name in tla_to_proper:
- # If 'name' is a TLA from a match thread this will give us the proper
- # team name
- normalised_name = tla_to_proper[name]
- else:
- # Not a TLA
- # Rather than go through all of livestream's source to figure out
- # what terms they use (Tigers? Wests? Wests Tigers?), I think some
- # fuzzy string matching will do the trick, and is more flexible in the
- # long run (if it's reliable)
- normalised_name = fuzzywuzzy.process.extractOne(name, names_full)[0]
- return normalised_name
- def identify_game(submission):
- """Attempt to determine match details from a match thread.
- From the match thread submission we try to extract:
- - home_team
- - away_team
- We actually only need one team name to figure out which match this is (as
- each team obviously only plays one game per round) but for the sake of
- completeness we find both.
- Returns a namedtuple of type Game with keys: home, away.
- """
- # body text of the match thread containing match details as posted by the
- # /r/nrl automoderator
- text = submission.selftext
- # HF 0.2.5: default round_no to None for "fade_bowl"
- # it's not used anywhere yet anyway & this will help with finals
- round_no = None
- # find round number by searching for 'Round' and then grab the digits
- for line in text.splitlines():
- if 'round' in line.lower():
- round_no = ''.join(c for c in line if c.isdigit())
- break
- # find team names
- # we use a regex to match the codes used for the team logos as
- # it's predictable and unlikely to change, eg: canberra away is (#can-a)
- # this regular expression will match to '#can-h' & return 'can-h'
- regex = re.compile('(?<=#)\w{3}(?:-\w)')
- teams = re.findall(regex, text)
- # re.findall returns a list of matching strings
- # loop over it & use the succeeding '-h' or '-a' to determine home & away
- for team in teams:
- if team[-1] == 'h':
- home_team = normalise_team_name(team[:3])
- if team[-1] == 'a':
- away_team = normalise_team_name(team[:3])
- # return results in a nice object
- Game = namedtuple('game', ['round', 'home', 'away'])
- return Game(round_no, home_team, away_team)
- def find_event_playlist(game):
- """."""
- nrl_account = livestream.Account(ACCOUNT_ID)
- events = nrl_account.events()
- print('find_event_playlist')
- print('Game:')
- print(game)
- for event in events:
- event.reload()
- home = event.full_name.split('v')[0].strip().lower()
- print('ls home (raw): ' + home)
- away = event.full_name.split('v')[1].strip().lower()
- print('ls away (raw): ' + away)
- home_n = normalise_team_name(home)
- print('ls home (normalised): ' + home_n)
- away_n = normalise_team_name(away)
- print('ls away (normalised): ' + away_n)
- if game.home == home_n and game.away == away_n:
- print('event found')
- if event.is_live:
- print('event is live')
- return event.m3u8_url
- else:
- print('event is not live')
- raise EventNotLiveError('event is not live')
- print('event not found')
- raise EventNotFoundError('event not found')
- def main(args):
- """/r/NRL Match Thread Assistant."""
- ag_desc = '/r/NRL Match Thread Assistant'
- parser = argparse.ArgumentParser(description=ag_desc)
- parser.add_argument('username', help='Bot account username')
- parser.add_argument('password', help='Bot account password')
- parser.add_argument('--version', action='version', version=VERSION)
- args = parser.parse_args()
- r = praw.Reddit(USER_AGENT)
- r.login(username=args.username, password=args.password)
- for c in praw.helpers.comment_stream(r, 'nrl'):
- if c.body == 'nrlbot streams':
- if is_match_thread(c.submission):
- game = identify_game(c.submission)
- if not game:
- print('failed to identify game from match thread')
- c.reply("""Sorry, I couldn't identify this match from
- the submission text. This is an error.""")
- continue
- try:
- playlist_url = find_event_playlist(game)
- except EventNotFoundError:
- c.reply('Livestream event not found :(')
- continue
- except EventNotLiveError:
- c.reply('This event is not currently live :(')
- continue
- streams = find_streams(playlist_url)
- stream_text = render_output(streams=streams)
- if not stream_text:
- print('failed to load streams')
- c.reply("""Sorry, I couldn't load the playlist file from
- livestream. This may be an error, or it it may be a
- temporary network problem.""")
- continue
- # post a reply
- c.reply(stream_text)
- else:
- print('not from a match thread')
- pass
- if __name__ == '__main__':
- sys.exit(main(sys.argv))