-
Notifications
You must be signed in to change notification settings - Fork 0
/
mdp.py
63 lines (51 loc) · 1.78 KB
/
mdp.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
# mdp.py
# ------
# Licensing Information: Please do not distribute or publish solutions to this
# project. You are free to use and extend these projects for educational
# purposes. The Pacman AI projects were developed at UC Berkeley, primarily by
# John DeNero ([email protected]) and Dan Klein ([email protected]).
# For more info, see http://inst.eecs.berkeley.edu/~cs188/sp09/pacman.html
import random
class MarkovDecisionProcess:
def getStates(self):
"""
Return a list of all states in the MDP.
Not generally possible for large MDPs.
"""
abstract
def getStartState(self):
"""
Return the start state of the MDP.
"""
abstract
def getPossibleActions(self, state):
"""
Return list of possible actions from 'state'.
"""
abstract
def getTransitionStatesAndProbs(self, state, action):
"""
Returns list of (nextState, prob) pairs
representing the states reachable
from 'state' by taking 'action' along
with their transition probabilities.
Note that in Q-Learning and reinforcment
learning in general, we do not know these
probabilities nor do we directly model them.
"""
abstract
def getReward(self, state, action, nextState):
"""
Get the reward for the state, action, nextState transition.
Not available in reinforcement learning.
"""
abstract
def isTerminal(self, state):
"""
Returns true if the current state is a terminal state. By convention,
a terminal state has zero future rewards. Sometimes the terminal state(s)
may have no possible actions. It is also common to think of the terminal
state as having a self-loop action 'pass' with zero reward; the formulations
are equivalent.
"""
abstract