/notebooks/tabular_comparison.ipynb
Jupyter | 439 lines | 439 code | 0 blank | 0 comment | 0 complexity | 08e445cd0c775db7e39caca3b28abccf MD5 | raw file
- {
- "cells": [
- {
- "cell_type": "code",
- "execution_count": 1,
- "metadata": {
- "collapsed": true
- },
- "outputs": [],
- "source": [
- "import numpy as np\n",
- "from numpy.linalg import pinv\n",
- "\n",
- "import pandas as pd\n",
- "\n",
- "import networkx as nx\n",
- "import pydot\n",
- "from IPython.display import Image, display\n",
- "\n",
- "import matplotlib.pyplot as plt\n",
- "%matplotlib inline\n",
- "\n",
- "np.set_printoptions(precision=4, suppress=True)\n",
- "pd.set_option('precision', 4)\n",
- "pd.set_option('display.float_format', lambda x: '%.4f' % x)"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 2,
- "metadata": {
- "collapsed": true
- },
- "outputs": [],
- "source": [
- "%load_ext autoreload\n",
- "%autoreload 2"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 7,
- "metadata": {
- "collapsed": false
- },
- "outputs": [],
- "source": [
- "from algorithms import TDVarTraces\n",
- "from features import Int2Unary\n",
- "from simulation import trajectory_gen, compute_return"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 9,
- "metadata": {
- "collapsed": false
- },
- "outputs": [
- {
- "data": {
- "text/html": [
- "<div>\n",
- "<table border=\"1\" class=\"dataframe\">\n",
- " <thead>\n",
- " <tr style=\"text-align: right;\">\n",
- " <th></th>\n",
- " <th>true_value</th>\n",
- " <th>exp_value</th>\n",
- " <th>true_var</th>\n",
- " <th>exp_var</th>\n",
- " </tr>\n",
- " </thead>\n",
- " <tbody>\n",
- " <tr>\n",
- " <th>0</th>\n",
- " <td>0.0312</td>\n",
- " <td>0.0325</td>\n",
- " <td>0.0303</td>\n",
- " <td>0.0314</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>1</th>\n",
- " <td>0.0625</td>\n",
- " <td>0.0651</td>\n",
- " <td>0.0586</td>\n",
- " <td>0.0609</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>2</th>\n",
- " <td>0.1250</td>\n",
- " <td>0.1304</td>\n",
- " <td>0.1094</td>\n",
- " <td>0.1134</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>3</th>\n",
- " <td>0.2500</td>\n",
- " <td>0.2582</td>\n",
- " <td>0.1875</td>\n",
- " <td>0.1916</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>4</th>\n",
- " <td>0.5000</td>\n",
- " <td>0.5063</td>\n",
- " <td>0.2500</td>\n",
- " <td>0.2500</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>5</th>\n",
- " <td>0.0000</td>\n",
- " <td>0.0000</td>\n",
- " <td>0.0000</td>\n",
- " <td>0.0000</td>\n",
- " </tr>\n",
- " </tbody>\n",
- "</table>\n",
- "</div>"
- ],
- "text/plain": [
- " true_value exp_value true_var exp_var\n",
- "0 0.0312 0.0325 0.0303 0.0314\n",
- "1 0.0625 0.0651 0.0586 0.0609\n",
- "2 0.1250 0.1304 0.1094 0.1134\n",
- "3 0.2500 0.2582 0.1875 0.1916\n",
- "4 0.5000 0.5063 0.2500 0.2500\n",
- "5 0.0000 0.0000 0.0000 0.0000"
- ]
- },
- "metadata": {},
- "output_type": "display_data"
- }
- ],
- "source": [
- "# Chicken problem solved analytically\n",
- "nstates = ns = 6\n",
- "I = np.eye(ns)\n",
- "\n",
- "# Probability of transitioning from state s_i --> s_j = P[i,j]\n",
- "P = np.diag(np.ones(ns-1), 1) * 0.5\n",
- "P[:,0] = 0.5\n",
- "P[-1, 0] = 1\n",
- "\n",
- "# Expected reward for transitioning from s_i --> s_j = R[i,j]\n",
- "R = np.zeros((ns, ns)) * 0.9\n",
- "R[-2, -1] = 1.0\n",
- "r = np.sum(P*R, axis=1)\n",
- "\n",
- "# Discount\n",
- "gmvec = np.ones(ns)\n",
- "gmvec[0] = 0\n",
- "G = np.diag(gmvec)\n",
- "\n",
- "# Bootstrapping\n",
- "lmvec = np.zeros(ns)\n",
- "L = np.diag(lmvec)\n",
- "\n",
- "# Function approximation\n",
- "X = np.eye(ns)\n",
- "\n",
- "# Value function\n",
- "v_pi = pinv(I - P @ G) @ r\n",
- "\n",
- "\n",
- "# From sobel, recursive expected variance contribution?\n",
- "# T = -v_pi**2\n",
- "# for i in range(ns):\n",
- "# for j in range(ns):\n",
- "# T[i] += P[i,j] * (R[i,j] + gmvec[j]*v_pi[j])**2\n",
- "\n",
- "# Alternatively,\n",
- "T = np.sum(P * (R + G @ v_pi)**2, axis=1) - v_pi**2\n",
- " \n",
- "# Variance (again from Sobel)\n",
- "v_var = pinv(I - P @ G @ G) @ T \n",
- "\n",
- "# Define the experiment\n",
- "nsteps = 100000\n",
- "gmfunc = lambda x: gmvec[x]\n",
- "lmfunc = lambda x: lmvec[x]\n",
- "phi = Int2Unary(nstates)\n",
- "\n",
- "# Generate a trajectory\n",
- "gen = trajectory_gen(P, R)\n",
- "slst = [next(gen) for i in range(nsteps)]\n",
- "\n",
- "# Compute the MC-return\n",
- "glst = compute_return(slst, gmfunc)\n",
- "xlst = [(phi(s), r, phi(sp)) for s, r, sp in slst] \n",
- "\n",
- "# Compare analytical and experimental values\n",
- "gdf = pd.DataFrame(glst, columns=['s', 'g', 'sp'])\n",
- "grouped = pd.groupby(gdf, 's')\n",
- "true_value = pd.Series(v_pi, name='true_value')\n",
- "true_var = pd.Series(v_var, name='true_var')\n",
- "exp_value = grouped.aggregate({'g': np.mean}).rename(columns={'g': 'exp_value'})\n",
- "exp_var = grouped.aggregate({'g': np.var}).rename(columns={'g': 'exp_var'})\n",
- "\n",
- "combined = pd.concat([true_value, exp_value, true_var, exp_var], axis=1)\n",
- "display(combined)"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 10,
- "metadata": {
- "collapsed": false
- },
- "outputs": [
- {
- "data": {
- "text/plain": [
- "(array([0]),)"
- ]
- },
- "execution_count": 10,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
- "source": []
- },
- {
- "cell_type": "code",
- "execution_count": 30,
- "metadata": {
- "collapsed": false
- },
- "outputs": [
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "Epoch: 0\n",
- "[ 0.0326 0.0617 0.1226 0.2289 0.5152 0. ]\n",
- "[ 0.0021 0.0081 0.0298 0.0966 0.2424 0. ]\n",
- "\n",
- "Epoch: 1\n",
- "[ 0.035 0.0663 0.1301 0.2453 0.5161 0. ]\n",
- "[ 0.0022 0.0084 0.0303 0.099 0.2504 0. ]\n",
- "\n",
- "Epoch: 2\n",
- "[ 0.0355 0.0679 0.1334 0.2534 0.5176 0. ]\n",
- "[ 0.0022 0.0083 0.0301 0.0989 0.2504 0. ]\n",
- "\n",
- "Epoch: 3\n",
- "[ 0.0349 0.0676 0.1338 0.2569 0.518 0. ]\n",
- "[ 0.0021 0.0081 0.0299 0.0987 0.2503 0. ]\n",
- "\n",
- "Epoch: 4\n",
- "[ 0.0341 0.0669 0.1333 0.2583 0.5178 0. ]\n",
- "[ 0.0021 0.0081 0.0297 0.0984 0.2502 0. ]\n",
- "\n",
- "Epoch: 5\n",
- "[ 0.0335 0.0662 0.1327 0.2589 0.5173 0. ]\n",
- "[ 0.0021 0.008 0.0295 0.0982 0.2501 0. ]\n",
- "\n",
- "Epoch: 6\n",
- "[ 0.0331 0.0657 0.1321 0.259 0.5166 0. ]\n",
- "[ 0.0021 0.0079 0.0294 0.098 0.2501 0. ]\n",
- "\n",
- "Epoch: 7\n",
- "[ 0.0328 0.0653 0.1316 0.259 0.516 0. ]\n",
- "[ 0.002 0.0079 0.0293 0.0979 0.25 0. ]\n",
- "\n",
- "Epoch: 8\n",
- "[ 0.0326 0.065 0.1312 0.2589 0.5154 0. ]\n",
- "[ 0.002 0.0079 0.0292 0.0978 0.25 0. ]\n",
- "\n",
- "Epoch: 9\n",
- "[ 0.0324 0.0648 0.1309 0.2588 0.5148 0. ]\n",
- "[ 0.002 0.0078 0.0291 0.0976 0.25 0. ]\n",
- "\n"
- ]
- }
- ],
- "source": [
- "# Setup\n",
- "num_epochs = 10\n",
- "\n",
- "# Algorithm\n",
- "num_features = nstates\n",
- "alg = TDVarTraces(num_features)\n",
- "\n",
- "# Representation\n",
- "phi = Int2Unary(num_features)\n",
- "\n",
- "# Parameters\n",
- "gamma = lambda x: gmvec[x]\n",
- "lmbda = lambda x: lmvec[x]\n",
- "\n",
- "# Simulation\n",
- "for epoch in range(num_epochs):\n",
- " alpha = 0.01/(1 + epoch)\n",
- " for step in slst:\n",
- " s, r, sp = step\n",
- " x = phi(s)\n",
- " xp = phi(sp)\n",
- " \n",
- " # Value update parameters\n",
- " gm = gamma(s)\n",
- " gm_p = gamma(sp)\n",
- " lm = lmbda(s)\n",
- " lm_p = lmbda(sp)\n",
- " \n",
- " # Variance update parameters\n",
- " v_gm = gamma(s)\n",
- " v_gm_p = gamma(sp)\n",
- " v_lm = 0.5\n",
- " v_lm_p = 0.5\n",
- " v_alpha = 0.001\n",
- " v_beta = 0.001\n",
- " v_eta = 0\n",
- " \n",
- " # Perform the update\n",
- " alg.update(x, r, xp, alpha, gm, gm_p, lm,\n",
- " v_gm, v_gm_p, v_lm, v_lm_p, v_alpha, v_beta, v_eta)\n",
- " \n",
- " # Tracking\n",
- " \n",
- " print(\"Epoch:\", epoch)\n",
- " print(alg.theta)\n",
- " print(alg.w)\n",
- " print()"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 21,
- "metadata": {
- "collapsed": false
- },
- "outputs": [
- {
- "data": {
- "text/html": [
- "<div>\n",
- "<table border=\"1\" class=\"dataframe\">\n",
- " <thead>\n",
- " <tr style=\"text-align: right;\">\n",
- " <th></th>\n",
- " <th>exp_value</th>\n",
- " <th>exp_var</th>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>s</th>\n",
- " <th></th>\n",
- " <th></th>\n",
- " </tr>\n",
- " </thead>\n",
- " <tbody>\n",
- " <tr>\n",
- " <th>0</th>\n",
- " <td>0.0325</td>\n",
- " <td>0.0314</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>1</th>\n",
- " <td>0.0651</td>\n",
- " <td>0.0609</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>2</th>\n",
- " <td>0.1304</td>\n",
- " <td>0.1134</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>3</th>\n",
- " <td>0.2582</td>\n",
- " <td>0.1916</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>4</th>\n",
- " <td>0.5063</td>\n",
- " <td>0.2500</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>5</th>\n",
- " <td>0.0000</td>\n",
- " <td>0.0000</td>\n",
- " </tr>\n",
- " </tbody>\n",
- "</table>\n",
- "</div>"
- ],
- "text/plain": [
- " exp_value exp_var\n",
- "s \n",
- "0 0.0325 0.0314\n",
- "1 0.0651 0.0609\n",
- "2 0.1304 0.1134\n",
- "3 0.2582 0.1916\n",
- "4 0.5063 0.2500\n",
- "5 0.0000 0.0000"
- ]
- },
- "execution_count": 21,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
- "source": [
- "pd.concat([exp_value, exp_var], axis=1)"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {
- "collapsed": true
- },
- "outputs": [],
- "source": []
- }
- ],
- "metadata": {
- "anaconda-cloud": {},
- "kernelspec": {
- "display_name": "Python [py35]",
- "language": "python",
- "name": "Python [py35]"
- },
- "language_info": {
- "codemirror_mode": {
- "name": "ipython",
- "version": 3
- },
- "file_extension": ".py",
- "mimetype": "text/x-python",
- "name": "python",
- "nbconvert_exporter": "python",
- "pygments_lexer": "ipython3",
- "version": "3.5.2"
- }
- },
- "nbformat": 4,
- "nbformat_minor": 0
- }