/AI/reinforcement/qlearningAgents.py
https://bitbucket.org/MrOrz/homework · Python · 214 lines · 85 code · 33 blank · 96 comment · 12 complexity · 02bdcc78b7ef2e4f074d256eee4e1d92 MD5 · raw file
- # qlearningAgents.py
- # ------------------
- # Licensing Information: Please do not distribute or publish solutions to this
- # project. You are free to use and extend these projects for educational
- # purposes. The Pacman AI projects were developed at UC Berkeley, primarily by
- # John DeNero (denero@cs.berkeley.edu) and Dan Klein (klein@cs.berkeley.edu).
- # For more info, see http://inst.eecs.berkeley.edu/~cs188/sp09/pacman.html
- from game import *
- from learningAgents import ReinforcementAgent
- from featureExtractors import *
- import random,util,math
- INF = float('inf')
- class QLearningAgent(ReinforcementAgent):
- """
- Q-Learning Agent
- Functions you should fill in:
- - getQValue
- - getAction
- - getValue
- - getPolicy
- - update
- Instance variables you have access to
- - self.epsilon (exploration prob)
- - self.alpha (learning rate)
- - self.gamma (discount rate)
- Functions you should use
- - self.getLegalActions(state)
- which returns legal actions
- for a state
- """
- def __init__(self, **args):
- "You can initialize Q-values here..."
- ReinforcementAgent.__init__(self, **args)
- self.q = util.Counter()
- "*** YOUR CODE HERE ***"
- def getQValue(self, state, action):
- """
- Returns Q(state,action)
- Should return 0.0 if we never seen
- a state or (state,action) tuple
- """
- "*** YOUR CODE HERE ***"
- return self.q[(state, action)]
- def getValue(self, state):
- """
- Returns max_action Q(state,action)
- where the max is over legal actions. Note that if
- there are no legal actions, which is the case at the
- terminal state, you should return a value of 0.0.
- """
- legalActions = self.getLegalActions(state)
- if len(legalActions) == 0:
- return 0.0
- maxQValue = -INF
- for a in legalActions:
- q = self.getQValue(state, a)
- if q > maxQValue:
- maxQValue = q
- return maxQValue
- def getPolicy(self, state):
- """
- Compute the best action to take in a state. Note that if there
- are no legal actions, which is the case at the terminal state,
- you should return None.
- """
- "*** YOUR CODE HERE ***"
- legalActions = self.getLegalActions(state)
- if len(legalActions) == 0:
- return None
- #: List of best choices (qValue, action)
- bestChoices = [(-INF, )]
- for a in legalActions:
- q_s_a = self.getQValue(state, a)
- if q_s_a > bestChoices[0][0]: #: better choice is found
- bestChoices = [(q_s_a, a)]
- elif q_s_a == bestChoices[0][0]: #: a choice as good as the original best one
- bestChoices.append( (q_s_a, a) )
- return random.choice(bestChoices)[1]
- def getAction(self, state):
- """
- Compute the action to take in the current state. With
- probability self.epsilon, we should take a random action and
- take the best policy action otherwise. Note that if there are
- no legal actions, which is the case at the terminal state, you
- should choose None as the action.
- HINT: You might want to use util.flipCoin(prob)
- HINT: To pick randomly from a list, use random.choice(list)
- """
- # Pick Action
- "*** YOUR CODE HERE ***"
- if util.flipCoin(self.epsilon): #: random
- legalActions = self.getLegalActions(state)
- if len(legalActions) == 0:
- return None
- return random.choice(legalActions)
- else:
- return self.getPolicy(state)
- def update(self, state, action, nextState, reward):
- """
- The parent class calls this to observe a
- state = action => nextState and reward transition.
- You should do your Q-Value update here
- NOTE: You should never call this function,
- it will be called on your behalf
- """
- "*** YOUR CODE HERE ***"
- q_s_a = self.q[(state, action)]
- self.q[(state, action)] = q_s_a + \
- self.alpha * (reward + self.gamma * self.getValue(nextState) - q_s_a)
- class PacmanQAgent(QLearningAgent):
- "Exactly the same as QLearningAgent, but with different default parameters"
- def __init__(self, epsilon=0.05,gamma=0.8,alpha=0.2, numTraining=0, **args):
- """
- These default parameters can be changed from the pacman.py command line.
- For example, to change the exploration rate, try:
- python pacman.py -p PacmanQLearningAgent -a epsilon=0.1
- alpha - learning rate
- epsilon - exploration rate
- gamma - discount factor
- numTraining - number of training episodes, i.e. no learning after these many episodes
- """
- args['epsilon'] = epsilon
- args['gamma'] = gamma
- args['alpha'] = alpha
- args['numTraining'] = numTraining
- self.index = 0 # This is always Pacman
- QLearningAgent.__init__(self, **args)
- def getAction(self, state):
- """
- Simply calls the getAction method of QLearningAgent and then
- informs parent of action for Pacman. Do not change or remove this
- method.
- """
- action = QLearningAgent.getAction(self,state)
- self.doAction(state,action)
- return action
- class ApproximateQAgent(PacmanQAgent):
- """
- ApproximateQLearningAgent
- You should only have to overwrite getQValue
- and update. All other QLearningAgent functions
- should work as is.
- """
- def __init__(self, extractor='IdentityExtractor', **args):
- self.featExtractor = util.lookup(extractor, globals())()
- PacmanQAgent.__init__(self, **args)
- # You might want to initialize weights here.
- "*** YOUR CODE HERE ***"
- self.w = util.Counter()
- def getQValue(self, state, action):
- """
- Should return Q(state,action) = w * featureVector
- where * is the dotProduct operator
- """
- "*** YOUR CODE HERE ***"
- return self.w * self.featExtractor.getFeatures(state, action)
- def update(self, state, action, nextState, reward):
- """
- Should update your weights based on transition
- """
- "*** YOUR CODE HERE ***"
- f = self.featExtractor.getFeatures(state, action)
- for (i, fi) in f.items():
- correction = reward + self.gamma * self.getValue(nextState) - self.getQValue(state, action)
- self.w[i] = self.w[i] + self.alpha * correction * fi
- def final(self, state):
- "Called at the end of each game."
- # call the super-class final method
- PacmanQAgent.final(self, state)
- # did we finish training?
- if self.episodesSoFar == self.numTraining:
- # you might want to print your weights here for debugging
- "*** YOUR CODE HERE ***"
- pass