nrlbot.py - Copyright 2016 G. Davis <g.davis13+nrlbot@gmail…

/nrlbot.py

https://gitlab.com/g.davis13/nrlbot · Python · 344 lines · 196 code · 52 blank · 96 comment · 28 complexity · 02c0082f89caf75dec6f48d7606bcc9a MD5 · raw file

#!/usr/bin/env python3
# -*- coding: utf-8 -*-
#
#  nrlbot.py
#
#  Copyright 2016 G. Davis <g.davis13+nrlbot@gmail.com>
#
#  This program is free software; you can redistribute it and/or modify
#  it under the terms of the GNU General Public License as published by
#  the Free Software Foundation; either version 2 of the License, or
#  (at your option) any later version.
#
#  This program is distributed in the hope that it will be useful,
#  but WITHOUT ANY WARRANTY; without even the implied warranty of
#  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
#  GNU General Public License for more details.
#
#  You should have received a copy of the GNU General Public License
#  along with this program; if not, write to the Free Software
#  Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
#  MA 02110-1301, USA.

"""
/r/NRL Match Thread Assistant Bot.

v0.3.0 2016-09-10
TODO:
- logging
- error handling and custom errors
- refactor
- SubmissionParser - get the context of different submissions
- handle reddit downtime?
- more & improved templates
"""

from bs4 import BeautifulSoup
from collections import namedtuple
import argparse
import fuzzywuzzy.process
import jinja2
import livestream
import m3u8
import praw
import re
import requests
import sys

TEMPLATE_FILE = 'post.txt'
USER_AGENT = '/r/nrl Match Thread Assistant (by /u/thephoenixfoundation)'
VERSION = '0.3.0'
HOST = 'http://api.new.livestream.com/'
ACCOUNT_ID = 3161248
HEADERS = {
    'host': "api.new.livestream.com",
    'connection': "keep-alive",
    'accept': "*/*",
    'origin': "http://livestream.com",
    'x-requested-with': "XMLHttpRequest",
    'user-agent': "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/53.0.2785.57 Safari/537.36",  # noqa
    'dnt': "1",
    'referer': "http://livestream.com/nrl",
    'accept-encoding': "gzip, deflate, sdch",
    'accept-language': "en-AU,en;q=0.8,en-US;q=0.6",
    'cache-control': "no-cache",
    }


class NrlbotError(Exception):
    """Base class for exceptions in this module."""

    pass


class EventNotFoundError(NrlbotError):
    """."""

    def __init__(self, message):
        """."""
        self.message = message


class StreamNotFoundError(NrlbotError):
    """."""

    def __init__(self, message):
        """."""
        self.message = message


class EventNotLiveError(NrlbotError):
    """."""

    def __init__(self, message):
        """."""
        self.message = message


def make_soup(url):
    """Load page & return data for parsing."""
    r = requests.get(url)
    soup = BeautifulSoup(r.text, 'html.parser')
    return soup


def find_config_script(soup):
    """."""
    script = soup.find('script', string=re.compile('window.config'))
    return script.string


def clean_config_script(script):
    """."""
    return script[script.find('{'):-1]


def find_streams(url):
    """."""
    playlist = m3u8.load(url)
    streams = [{'res': resolution_formatter(stream.stream_info.resolution),
                'url': stream.uri} for stream in playlist.playlists]

    return streams


def resolution_formatter(dims_tuple):
    """."""
    return 'x'.join(str(dim) for dim in dims_tuple)


def render_output(streams, template_file=TEMPLATE_FILE):
    """."""
    with open(template_file, 'r') as f:
        template = f.read()

    j2_template = jinja2.Template(template)
    return j2_template.render(streams=streams)


def is_match_thread(submission):
    """Determine if a given post is an /r/NRL match thread.

    This is based on the current match thread format used by the /r/NRL
    moderators. If/when that format changes, the checks used will need to be
    updated.

    Returns a bool.
    """
    result = True  # default to true, checks will set to false if they fail
    # is the author the automoderator?
    if submission.author.name != 'NRLgamethread':
        print('failed on author')
        result = False
    # are we in /r/nrl?
    elif submission.subreddit.display_name != 'nrl':
        print('failed on subreddit name')
        result = False
    # is the first word in the title "round"?
    # commented out in 0.2.4: "fade bowl" breaks this
    # elif submission.title.split()[0].lower() != 'round':
    #    print('failed on "round"')
    #    result = False
    #
    # are the last two words in the title "discussion thread"?
    # TODO: break this down a bit so it can be more easily fixed if the
    # thread format changes
    elif submission.title.split("|")[1].lower()[1:] != 'match discussion thread':
        print('failed on "discussion thread"')
        result = False

    return result


def normalise_team_name(name):
    """Convert a string referencing an NRL team name to the full name.

    For example, if name is 'tigers' or 'wests', will return 'Wests Tigers'.
    Also works for the three-letter acronyms in the markdown tags used to post
    the team logos in the match threads.
    """
    normalised_name = None

    # for starters, we can do a straight map from team three letter acronyms
    # used in the match thread to proper team name
    names_tlas = ('bri', 'can', 'bul', 'cro', 'gld', 'man', 'mel', 'war',
                  'new', 'nql', 'par', 'pen', 'sou', 'sgi', 'syd', 'wst')

    names_full = ('Brisbane Broncos',
                  'Canberra Raiders',
                  'Canterbury-Bankstown Bulldogs',
                  'Cronulla Sharks',
                  'Gold Coast Titans',
                  'Manly-Warringah Sea Eagles',
                  'Melbourne Storm',
                  'New Zealand Warriors',
                  'Newcastle Knights',
                  'North Queensland Cowboys',
                  'Parramatta Eels',
                  'Penrith Panthers',
                  'South Sydney Rabbitohs',
                  'St George-Illawarra Dragons',
                  'Sydney Roosters',
                  'Wests Tigers')

    # make a dictionary mapping TLAs to full names
    tla_to_proper = dict(zip(names_tlas, names_full))

    if name in tla_to_proper:
        # If 'name' is a TLA from a match thread this will give us the proper
        # team name
        normalised_name = tla_to_proper[name]
    else:
        # Not a TLA
        # Rather than go through all of livestream's source to figure out
        # what terms they use (Tigers? Wests? Wests Tigers?), I think some
        # fuzzy string matching will do the trick, and is more flexible in the
        # long run (if it's reliable)
        normalised_name = fuzzywuzzy.process.extractOne(name, names_full)[0]

    return normalised_name


def identify_game(submission):
    """Attempt to determine match details from a match thread.

    From the match thread submission we try to extract:
    - home_team
    - away_team
    We actually only need one team name to figure out which match this is (as
    each team obviously only plays one game per round) but for the sake of
    completeness we find both.

    Returns a namedtuple of type Game with keys: home, away.
    """
    # body text of the match thread containing match details as posted by the
    # /r/nrl automoderator
    text = submission.selftext

    # HF 0.2.5: default round_no to None for "fade_bowl"
    # it's not used anywhere yet anyway & this will help with finals
    round_no = None
    # find round number by searching for 'Round' and then grab the digits
    for line in text.splitlines():
        if 'round' in line.lower():
            round_no = ''.join(c for c in line if c.isdigit())
            break

    # find team names
    # we use a regex to match the codes used for the team logos as
    # it's predictable and unlikely to change, eg: canberra away is (#can-a)
    # this regular expression will match to '#can-h' & return 'can-h'
    regex = re.compile('(?<=#)\w{3}(?:-\w)')
    teams = re.findall(regex, text)

    # re.findall returns a list of matching strings
    # loop over it & use the succeeding '-h' or '-a' to determine home & away
    for team in teams:
        if team[-1] == 'h':
            home_team = normalise_team_name(team[:3])
        if team[-1] == 'a':
            away_team = normalise_team_name(team[:3])

    # return results in a nice object
    Game = namedtuple('game', ['round', 'home', 'away'])
    return Game(round_no, home_team, away_team)


def find_event_playlist(game):
    """."""
    nrl_account = livestream.Account(ACCOUNT_ID)
    events = nrl_account.events()

    print('find_event_playlist')
    print('Game:')
    print(game)

    for event in events:
        event.reload()
        home = event.full_name.split('v')[0].strip().lower()
        print('ls home (raw): ' + home)
        away = event.full_name.split('v')[1].strip().lower()
        print('ls away (raw): ' + away)
        home_n = normalise_team_name(home)
        print('ls home (normalised): ' + home_n)
        away_n = normalise_team_name(away)
        print('ls away (normalised): ' + away_n)
        if game.home == home_n and game.away == away_n:
            print('event found')
            if event.is_live:
                print('event is live')
                return event.m3u8_url
            else:
                print('event is not live')
                raise EventNotLiveError('event is not live')

    print('event not found')
    raise EventNotFoundError('event not found')


def main(args):
    """/r/NRL Match Thread Assistant."""
    ag_desc = '/r/NRL Match Thread Assistant'
    parser = argparse.ArgumentParser(description=ag_desc)
    parser.add_argument('username', help='Bot account username')
    parser.add_argument('password', help='Bot account password')
    parser.add_argument('--version', action='version', version=VERSION)
    args = parser.parse_args()

    r = praw.Reddit(USER_AGENT)
    r.login(username=args.username, password=args.password)

    for c in praw.helpers.comment_stream(r, 'nrl'):
        if c.body == 'nrlbot streams':
            if is_match_thread(c.submission):
                game = identify_game(c.submission)
                if not game:
                    print('failed to identify game from match thread')
                    c.reply("""Sorry, I couldn't identify this match from
                            the submission text. This is an error.""")
                    continue
                try:
                    playlist_url = find_event_playlist(game)
                except EventNotFoundError:
                    c.reply('Livestream event not found :(')
                    continue
                except EventNotLiveError:
                    c.reply('This event is not currently live :(')
                    continue
                streams = find_streams(playlist_url)
                stream_text = render_output(streams=streams)
                if not stream_text:
                    print('failed to load streams')
                    c.reply("""Sorry, I couldn't load the playlist file from
                            livestream. This may be an error, or it it may be a
                            temporary network problem.""")
                    continue
                # post a reply
                c.reply(stream_text)
            else:
                print('not from a match thread')
            pass


if __name__ == '__main__':
    sys.exit(main(sys.argv))