tabular_comparison.ipynb

/notebooks/tabular_comparison.ipynb

https://gitlab.com/rldotai/td-variance
Jupyter | 439 lines | 439 code | 0 blank | 0 comment | 0 complexity | 08e445cd0c775db7e39caca3b28abccf MD5 | raw file

{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "import numpy as np\n",
    "from numpy.linalg import pinv\n",
    "\n",
    "import pandas as pd\n",
    "\n",
    "import networkx as nx\n",
    "import pydot\n",
    "from IPython.display import Image, display\n",
    "\n",
    "import matplotlib.pyplot as plt\n",
    "%matplotlib inline\n",
    "\n",
    "np.set_printoptions(precision=4, suppress=True)\n",
    "pd.set_option('precision', 4)\n",
    "pd.set_option('display.float_format', lambda x: '%.4f' % x)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "%load_ext autoreload\n",
    "%autoreload 2"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "from algorithms import TDVarTraces\n",
    "from features import Int2Unary\n",
    "from simulation import trajectory_gen, compute_return"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>true_value</th>\n",
       "      <th>exp_value</th>\n",
       "      <th>true_var</th>\n",
       "      <th>exp_var</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>0.0312</td>\n",
       "      <td>0.0325</td>\n",
       "      <td>0.0303</td>\n",
       "      <td>0.0314</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>0.0625</td>\n",
       "      <td>0.0651</td>\n",
       "      <td>0.0586</td>\n",
       "      <td>0.0609</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>0.1250</td>\n",
       "      <td>0.1304</td>\n",
       "      <td>0.1094</td>\n",
       "      <td>0.1134</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>0.2500</td>\n",
       "      <td>0.2582</td>\n",
       "      <td>0.1875</td>\n",
       "      <td>0.1916</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>0.5000</td>\n",
       "      <td>0.5063</td>\n",
       "      <td>0.2500</td>\n",
       "      <td>0.2500</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>0.0000</td>\n",
       "      <td>0.0000</td>\n",
       "      <td>0.0000</td>\n",
       "      <td>0.0000</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   true_value  exp_value  true_var  exp_var\n",
       "0      0.0312     0.0325    0.0303   0.0314\n",
       "1      0.0625     0.0651    0.0586   0.0609\n",
       "2      0.1250     0.1304    0.1094   0.1134\n",
       "3      0.2500     0.2582    0.1875   0.1916\n",
       "4      0.5000     0.5063    0.2500   0.2500\n",
       "5      0.0000     0.0000    0.0000   0.0000"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "# Chicken problem solved analytically\n",
    "nstates = ns = 6\n",
    "I = np.eye(ns)\n",
    "\n",
    "# Probability of transitioning from state s_i --> s_j = P[i,j]\n",
    "P = np.diag(np.ones(ns-1), 1) * 0.5\n",
    "P[:,0] = 0.5\n",
    "P[-1, 0] = 1\n",
    "\n",
    "# Expected reward for transitioning from s_i --> s_j = R[i,j]\n",
    "R = np.zeros((ns, ns)) * 0.9\n",
    "R[-2, -1] = 1.0\n",
    "r = np.sum(P*R, axis=1)\n",
    "\n",
    "# Discount\n",
    "gmvec = np.ones(ns)\n",
    "gmvec[0] = 0\n",
    "G = np.diag(gmvec)\n",
    "\n",
    "# Bootstrapping\n",
    "lmvec = np.zeros(ns)\n",
    "L = np.diag(lmvec)\n",
    "\n",
    "# Function approximation\n",
    "X = np.eye(ns)\n",
    "\n",
    "# Value function\n",
    "v_pi = pinv(I - P @ G) @ r\n",
    "\n",
    "\n",
    "# From sobel, recursive expected variance contribution?\n",
    "# T = -v_pi**2\n",
    "# for i in range(ns):\n",
    "#     for j in range(ns):\n",
    "#         T[i] += P[i,j] * (R[i,j] + gmvec[j]*v_pi[j])**2\n",
    "\n",
    "# Alternatively,\n",
    "T = np.sum(P * (R + G @ v_pi)**2, axis=1) - v_pi**2\n",
    "        \n",
    "# Variance (again from Sobel)\n",
    "v_var = pinv(I - P @ G @ G) @ T \n",
    "\n",
    "# Define the experiment\n",
    "nsteps = 100000\n",
    "gmfunc = lambda x: gmvec[x]\n",
    "lmfunc = lambda x: lmvec[x]\n",
    "phi = Int2Unary(nstates)\n",
    "\n",
    "# Generate a trajectory\n",
    "gen = trajectory_gen(P, R)\n",
    "slst = [next(gen) for i in range(nsteps)]\n",
    "\n",
    "# Compute the MC-return\n",
    "glst = compute_return(slst, gmfunc)\n",
    "xlst = [(phi(s), r, phi(sp)) for s, r, sp in slst] \n",
    "\n",
    "# Compare analytical and experimental values\n",
    "gdf = pd.DataFrame(glst, columns=['s', 'g', 'sp'])\n",
    "grouped = pd.groupby(gdf, 's')\n",
    "true_value = pd.Series(v_pi, name='true_value')\n",
    "true_var = pd.Series(v_var, name='true_var')\n",
    "exp_value = grouped.aggregate({'g': np.mean}).rename(columns={'g': 'exp_value'})\n",
    "exp_var = grouped.aggregate({'g': np.var}).rename(columns={'g': 'exp_var'})\n",
    "\n",
    "combined = pd.concat([true_value, exp_value, true_var, exp_var], axis=1)\n",
    "display(combined)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "(array([0]),)"
      ]
     },
     "execution_count": 10,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": 30,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Epoch: 0\n",
      "[ 0.0326  0.0617  0.1226  0.2289  0.5152  0.    ]\n",
      "[ 0.0021  0.0081  0.0298  0.0966  0.2424  0.    ]\n",
      "\n",
      "Epoch: 1\n",
      "[ 0.035   0.0663  0.1301  0.2453  0.5161  0.    ]\n",
      "[ 0.0022  0.0084  0.0303  0.099   0.2504  0.    ]\n",
      "\n",
      "Epoch: 2\n",
      "[ 0.0355  0.0679  0.1334  0.2534  0.5176  0.    ]\n",
      "[ 0.0022  0.0083  0.0301  0.0989  0.2504  0.    ]\n",
      "\n",
      "Epoch: 3\n",
      "[ 0.0349  0.0676  0.1338  0.2569  0.518   0.    ]\n",
      "[ 0.0021  0.0081  0.0299  0.0987  0.2503  0.    ]\n",
      "\n",
      "Epoch: 4\n",
      "[ 0.0341  0.0669  0.1333  0.2583  0.5178  0.    ]\n",
      "[ 0.0021  0.0081  0.0297  0.0984  0.2502  0.    ]\n",
      "\n",
      "Epoch: 5\n",
      "[ 0.0335  0.0662  0.1327  0.2589  0.5173  0.    ]\n",
      "[ 0.0021  0.008   0.0295  0.0982  0.2501  0.    ]\n",
      "\n",
      "Epoch: 6\n",
      "[ 0.0331  0.0657  0.1321  0.259   0.5166  0.    ]\n",
      "[ 0.0021  0.0079  0.0294  0.098   0.2501  0.    ]\n",
      "\n",
      "Epoch: 7\n",
      "[ 0.0328  0.0653  0.1316  0.259   0.516   0.    ]\n",
      "[ 0.002   0.0079  0.0293  0.0979  0.25    0.    ]\n",
      "\n",
      "Epoch: 8\n",
      "[ 0.0326  0.065   0.1312  0.2589  0.5154  0.    ]\n",
      "[ 0.002   0.0079  0.0292  0.0978  0.25    0.    ]\n",
      "\n",
      "Epoch: 9\n",
      "[ 0.0324  0.0648  0.1309  0.2588  0.5148  0.    ]\n",
      "[ 0.002   0.0078  0.0291  0.0976  0.25    0.    ]\n",
      "\n"
     ]
    }
   ],
   "source": [
    "# Setup\n",
    "num_epochs = 10\n",
    "\n",
    "# Algorithm\n",
    "num_features = nstates\n",
    "alg = TDVarTraces(num_features)\n",
    "\n",
    "# Representation\n",
    "phi = Int2Unary(num_features)\n",
    "\n",
    "# Parameters\n",
    "gamma = lambda x: gmvec[x]\n",
    "lmbda = lambda x: lmvec[x]\n",
    "\n",
    "# Simulation\n",
    "for epoch in range(num_epochs):\n",
    "    alpha = 0.01/(1 + epoch)\n",
    "    for step in slst:\n",
    "        s, r, sp = step\n",
    "        x = phi(s)\n",
    "        xp = phi(sp)\n",
    "        \n",
    "        # Value update parameters\n",
    "        gm = gamma(s)\n",
    "        gm_p = gamma(sp)\n",
    "        lm = lmbda(s)\n",
    "        lm_p = lmbda(sp)\n",
    "        \n",
    "        # Variance update parameters\n",
    "        v_gm = gamma(s)\n",
    "        v_gm_p = gamma(sp)\n",
    "        v_lm = 0.5\n",
    "        v_lm_p = 0.5\n",
    "        v_alpha = 0.001\n",
    "        v_beta = 0.001\n",
    "        v_eta = 0\n",
    "        \n",
    "        # Perform the update\n",
    "        alg.update(x, r, xp, alpha, gm, gm_p, lm,\n",
    "                   v_gm, v_gm_p, v_lm, v_lm_p, v_alpha, v_beta, v_eta)\n",
    "        \n",
    "        # Tracking\n",
    "        \n",
    "    print(\"Epoch:\", epoch)\n",
    "    print(alg.theta)\n",
    "    print(alg.w)\n",
    "    print()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 21,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>exp_value</th>\n",
       "      <th>exp_var</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>s</th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>0.0325</td>\n",
       "      <td>0.0314</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>0.0651</td>\n",
       "      <td>0.0609</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>0.1304</td>\n",
       "      <td>0.1134</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>0.2582</td>\n",
       "      <td>0.1916</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>0.5063</td>\n",
       "      <td>0.2500</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>0.0000</td>\n",
       "      <td>0.0000</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   exp_value  exp_var\n",
       "s                    \n",
       "0     0.0325   0.0314\n",
       "1     0.0651   0.0609\n",
       "2     0.1304   0.1134\n",
       "3     0.2582   0.1916\n",
       "4     0.5063   0.2500\n",
       "5     0.0000   0.0000"
      ]
     },
     "execution_count": 21,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "pd.concat([exp_value, exp_var], axis=1)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "anaconda-cloud": {},
  "kernelspec": {
   "display_name": "Python [py35]",
   "language": "python",
   "name": "Python [py35]"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.5.2"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 0
}