/notebooks/get-data-2016.ipynb
Jupyter | 410 lines | 410 code | 0 blank | 0 comment | 0 complexity | 581b303f3d20c6ebd4f9b143aa5ea7f9 MD5 | raw file
- {
- "cells": [
- {
- "cell_type": "code",
- "execution_count": 109,
- "metadata": {
- "collapsed": true
- },
- "outputs": [],
- "source": [
- "import pandas\n",
- "import requests\n",
- "import bs4\n",
- "import os"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 127,
- "metadata": {
- "collapsed": false
- },
- "outputs": [],
- "source": [
- "url = \"http://www.fftoday.com/nfl/schedule.php\"\n",
- "soup = bs4.BeautifulSoup(requests.get(url).text)\n",
- "df = pandas.read_html(str(soup.find_all('table')[8]))[0]\n",
- "df.columns = ['date', 'time', 'Visitor', 'Home Team']"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 128,
- "metadata": {
- "collapsed": false
- },
- "outputs": [],
- "source": [
- "# get these of the pesky characters\n",
- "bad_chars = [u'\\xc2' , u'\\xa0', u' \\xb9']\n",
- "\n",
- "for cc in df.columns:\n",
- " for c in bad_chars:\n",
- " df[cc] = df[cc].str.replace(c,'')"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 129,
- "metadata": {
- "collapsed": false
- },
- "outputs": [],
- "source": [
- "# fill in dates\n",
- "for ii,row in df.iterrows():\n",
- " if ii > 0:\n",
- " if row['date'] == '':\n",
- " row['date'] = prev['date']\n",
- " prev = row \n",
- " \n",
- "# fix the rows \n",
- "df = df.drop_duplicates().dropna()\n",
- "df = df[1:]"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 130,
- "metadata": {
- "collapsed": false
- },
- "outputs": [],
- "source": [
- "# add year to dates\n",
- "df.date = df.date + ' 2016'\n",
- "\n",
- "# fix the ones in Jan\n",
- "date_filter = df.date.str.contains('Jan')\n",
- "df.loc[date_filter, 'date'] = df[date_filter].date.str.replace('2016', '2017')\n",
- "\n",
- "# convert to datetime\n",
- "df.date = pandas.to_datetime(df.date, format='%a %b %d %Y')"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 131,
- "metadata": {
- "collapsed": false
- },
- "outputs": [],
- "source": [
- "# define the root directory for the nfl code in $MLNLF_ROOT\n",
- "\n",
- "rootDir = '/Users/amit/repos/mlnfl/nfl'\n",
- "codeDir = \"\".join([rootDir, os.path.sep])\n",
- "dataRoot = \"\".join([codeDir, \"data\", os.path.sep])\n",
- "\n",
- "df.to_csv(dataRoot + '/nfl_schedule_2016.csv', index=False)"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 138,
- "metadata": {
- "collapsed": false,
- "scrolled": false
- },
- "outputs": [
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- ">>Denver Broncos<<\n",
- ">>Baltimore Ravens<<\n",
- ">>Houston Texans<<\n",
- ">>New York Jets<<\n",
- ">>Philadelphia Eagles<<\n",
- ">>Jacksonville Jaguars<<\n",
- ">>Tennessee Titans<<\n",
- ">>New Orleans Saints<<\n",
- ">>Kansas City Chiefs<<\n",
- ">>Atlanta Falcons<<\n",
- ">>Seattle Seahawks<<\n",
- ">>Indianapolis Colts<<\n",
- ">>Dallas Cowboys<<\n",
- ">>Arizona Cardinals<<\n",
- ">>Washington Redskins<<\n",
- ">>San Francisco 49ers<<\n",
- ">>Buffalo Bills<<\n",
- ">>Cleveland Browns<<\n",
- ">>Pittsburgh Steelers<<\n",
- ">>Washington Redskins<<\n",
- ">>Houston Texans<<\n",
- ">>New England Patriots<<\n",
- ">>New York Giants<<\n",
- ">>Carolina Panthers<<\n",
- ">>Detroit Lions<<\n",
- ">>Los Angeles Rams<<\n",
- ">>Arizona Cardinals<<\n",
- ">>Oakland Raiders<<\n",
- ">>Denver Broncos<<\n",
- ">>San Diego Chargers<<\n",
- ">>Minnesota Vikings<<\n",
- ">>Chicago Bears<<\n",
- ">>New England Patriots<<\n",
- ">>Buffalo Bills<<\n",
- ">>Jacksonville Jaguars<<\n",
- ">>Miami Dolphins<<\n",
- ">>Cincinnati Bengals<<\n",
- ">>Green Bay Packers<<\n",
- ">>Carolina Panthers<<\n",
- ">>Tennessee Titans<<\n",
- ">>New York Giants<<\n",
- ">>Tampa Bay Buccaneers<<\n",
- ">>Seattle Seahawks<<\n",
- ">>Kansas City Chiefs<<\n",
- ">>Philadelphia Eagles<<\n",
- ">>Indianapolis Colts<<\n",
- ">>Dallas Cowboys<<\n",
- ">>New Orleans Saints<<\n",
- ">>Cincinnati Bengals<<\n",
- ">>Jacksonville Jaguars <<\n",
- ">>New England Patriots<<\n",
- ">>Atlanta Falcons<<\n",
- ">>Washington Redskins<<\n",
- ">>Chicago Bears<<\n",
- ">>Baltimore Ravens<<\n",
- ">>New York Jets<<\n",
- ">>Houston Texans<<\n",
- ">>Tampa Bay Buccaneers<<\n",
- ">>San Francisco 49ers<<\n",
- ">>Arizona Cardinals<<\n",
- ">>San Diego Chargers<<\n",
- ">>Pittsburgh Steelers<<\n",
- ">>Minnesota Vikings<<\n",
- ">>San Francisco 49ers<<\n",
- ">>Indianapolis Colts<<\n",
- ">>Minnesota Vikings<<\n",
- ">>Cleveland Browns<<\n",
- ">>Pittsburgh Steelers<<\n",
- ">>Detroit Lions<<\n",
- ">>Miami Dolphins<<\n",
- ">>Baltimore Ravens<<\n",
- ">>Denver Broncos<<\n",
- ">>Los Angeles Rams<<\n",
- ">>Dallas Cowboys<<\n",
- ">>Oakland Raiders<<\n",
- ">>Green Bay Packers<<\n",
- ">>Carolina Panthers<<\n",
- ">>San Diego Chargers<<\n",
- ">>New York Giants<<\n",
- ">>New Orleans Saints<<\n",
- ">>New England Patriots<<\n",
- ">>Tennessee Titans<<\n",
- ">>Chicago Bears<<\n",
- ">>Detroit Lions<<\n",
- ">>Washington Redskins<<\n",
- ">>Miami Dolphins<<\n",
- ">>Buffalo Bills<<\n",
- ">>Oakland Raiders<<\n",
- ">>Seattle Seahawks<<\n",
- ">>Green Bay Packers<<\n",
- ">>Houston Texans<<\n",
- ">>Arizona Cardinals<<\n",
- ">>Green Bay Packers<<\n",
- ">>Los Angeles Rams <<\n",
- ">>New York Jets<<\n",
- ">>Miami Dolphins<<\n",
- ">>Cincinnati Bengals<<\n",
- ">>Tennessee Titans<<\n",
- ">>Philadelphia Eagles<<\n",
- ">>Kansas City Chiefs<<\n",
- ">>Jacksonville Jaguars<<\n",
- ">>Detroit Lions<<\n",
- ">>Atlanta Falcons<<\n",
- ">>San Francisco 49ers<<\n",
- ">>Pittsburgh Steelers<<\n",
- ">>Arizona Cardinals<<\n",
- ">>Denver Broncos<<\n",
- ">>Tennessee Titans<<\n",
- ">>Cincinnati Bengals <<\n",
- ">>Houston Texans<<\n",
- ">>Atlanta Falcons<<\n",
- ">>Indianapolis Colts<<\n",
- ">>Buffalo Bills<<\n",
- ">>Cleveland Browns<<\n",
- ">>Tampa Bay Buccaneers<<\n",
- ">>New Orleans Saints<<\n",
- ">>Denver Broncos<<\n",
- ">>Carolina Panthers<<\n",
- ">>Dallas Cowboys<<\n",
- ">>Chicago Bears<<\n",
- ">>Tampa Bay Buccaneers<<\n",
- ">>Cleveland Browns<<\n",
- ">>Minnesota Vikings<<\n",
- ">>Kansas City Chiefs<<\n",
- ">>Miami Dolphins<<\n",
- ">>New York Giants<<\n",
- ">>Baltimore Ravens<<\n",
- ">>Los Angeles Rams<<\n",
- ">>San Francisco 49ers<<\n",
- ">>Green Bay Packers<<\n",
- ">>San Diego Chargers<<\n",
- ">>Oakland Raiders<<\n",
- ">>Seattle Seahawks<<\n",
- ">>Baltimore Ravens<<\n",
- ">>Philadelphia Eagles<<\n",
- ">>Tampa Bay Buccaneers<<\n",
- ">>New Orleans Saints<<\n",
- ">>Tennessee Titans<<\n",
- ">>Jacksonville Jaguars<<\n",
- ">>Carolina Panthers<<\n",
- ">>New York Jets<<\n",
- ">>Washington Redskins<<\n",
- ">>San Diego Chargers<<\n",
- ">>Pittsburgh Steelers<<\n",
- ">>Arizona Cardinals<<\n",
- ">>New England Patriots<<\n",
- ">>New York Giants<<\n",
- ">>Carolina Panthers<<\n",
- ">>Minnesota Vikings<<\n",
- ">>Dallas Cowboys<<\n",
- ">>Cincinnati Bengals<<\n",
- ">>New York Giants<<\n",
- ">>Detroit Lions<<\n",
- ">>Cleveland Browns<<\n",
- ">>Kansas City Chiefs<<\n",
- ">>Indianapolis Colts<<\n",
- ">>Los Angeles Rams<<\n",
- ">>San Francisco 49ers<<\n",
- ">>Seattle Seahawks<<\n",
- ">>Washington Redskins<<\n",
- ">>Oakland Raiders<<\n",
- ">>Detroit Lions<<\n",
- ">>Dallas Cowboys<<\n",
- ">>Indianapolis Colts<<\n",
- ">>Atlanta Falcons<<\n",
- ">>Baltimore Ravens<<\n",
- ">>Buffalo Bills<<\n",
- ">>New Orleans Saints<<\n",
- ">>Cleveland Browns<<\n",
- ">>Houston Texans<<\n",
- ">>Miami Dolphins<<\n",
- ">>Chicago Bears<<\n",
- ">>Tampa Bay Buccaneers<<\n",
- ">>Oakland Raiders<<\n",
- ">>Denver Broncos<<\n",
- ">>New York Jets<<\n",
- ">>Philadelphia Eagles<<\n",
- ">>Minnesota Vikings<<\n",
- ">>Jacksonville Jaguars<<\n",
- ">>New Orleans Saints<<\n",
- ">>Green Bay Packers<<\n",
- ">>Atlanta Falcons<<\n",
- ">>New England Patriots<<\n",
- ">>Baltimore Ravens<<\n",
- ">>Cincinnati Bengals<<\n",
- ">>Chicago Bears<<\n",
- ">>Oakland Raiders<<\n",
- ">>Pittsburgh Steelers<<\n",
- ">>San Diego Chargers<<\n",
- ">>Arizona Cardinals<<\n",
- ">>Seattle Seahawks<<\n",
- ">>New York Jets<<\n",
- ">>Kansas City Chiefs<<\n",
- ">>Miami Dolphins<<\n",
- ">>Detroit Lions<<\n",
- ">>Cleveland Browns<<\n",
- ">>Tennessee Titans<<\n",
- ">>Indianapolis Colts<<\n",
- ">>Jacksonville Jaguars<<\n",
- ">>Tampa Bay Buccaneers<<\n",
- ">>Buffalo Bills<<\n",
- ">>Carolina Panthers<<\n",
- ">>Philadelphia Eagles<<\n",
- ">>San Francisco 49ers<<\n",
- ">>Los Angeles Rams<<\n",
- ">>Green Bay Packers<<\n",
- ">>New York Giants<<\n",
- ">>New England Patriots<<\n",
- ">>Seattle Seahawks<<\n",
- ">>New York Jets<<\n",
- ">>Buffalo Bills<<\n",
- ">>New York Giants<<\n",
- ">>Chicago Bears<<\n",
- ">>Minnesota Vikings<<\n",
- ">>Houston Texans<<\n",
- ">>Baltimore Ravens<<\n",
- ">>Dallas Cowboys<<\n",
- ">>Kansas City Chiefs<<\n",
- ">>Arizona Cardinals<<\n",
- ">>Atlanta Falcons<<\n",
- ">>Denver Broncos<<\n",
- ">>San Diego Chargers<<\n",
- ">>Cincinnati Bengals<<\n",
- ">>Washington Redskins<<\n",
- ">>Philadelphia Eagles<<\n",
- ">>Carolina Panthers<<\n",
- ">>Buffalo Bills<<\n",
- ">>Green Bay Packers<<\n",
- ">>New England Patriots<<\n",
- ">>Cleveland Browns<<\n",
- ">>New Orleans Saints<<\n",
- ">>Jacksonville Jaguars<<\n",
- ">>Chicago Bears<<\n",
- ">>Oakland Raiders<<\n",
- ">>Seattle Seahawks<<\n",
- ">>Los Angeles Rams<<\n",
- ">>Houston Texans<<\n",
- ">>Pittsburgh Steelers<<\n",
- ">>Kansas City Chiefs<<\n",
- ">>Dallas Cowboys<<\n",
- ">>Cincinnati Bengals<<\n",
- ">>New York Jets<<\n",
- ">>Tampa Bay Buccaneers<<\n",
- ">>Minnesota Vikings<<\n",
- ">>Pittsburgh Steelers<<\n",
- ">>Philadelphia Eagles<<\n",
- ">>Detroit Lions<<\n",
- ">>Tennessee Titans<<\n",
- ">>Indianapolis Colts<<\n",
- ">>Miami Dolphins<<\n",
- ">>Atlanta Falcons<<\n",
- ">>Washington Redskins<<\n",
- ">>Los Angeles Rams<<\n",
- ">>San Diego Chargers<<\n",
- ">>Denver Broncos<<\n",
- ">>San Francisco 49ers<<\n"
- ]
- }
- ],
- "source": [
- "for ii, rr in df.iterrows():\n",
- " print '>>'+rr['Home Team']+'<<'"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {
- "collapsed": true
- },
- "outputs": [],
- "source": []
- }
- ],
- "metadata": {
- "kernelspec": {
- "display_name": "Python 2",
- "language": "python",
- "name": "python2"
- },
- "language_info": {
- "codemirror_mode": {
- "name": "ipython",
- "version": 2
- },
- "file_extension": ".py",
- "mimetype": "text/x-python",
- "name": "python",
- "nbconvert_exporter": "python",
- "pygments_lexer": "ipython2",
- "version": "2.7.10"
- }
- },
- "nbformat": 4,
- "nbformat_minor": 0
- }