-
Notifications
You must be signed in to change notification settings - Fork 0
/
Q-Learning.py
79 lines (60 loc) · 2.08 KB
/
Q-Learning.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
import gym
import numpy as np
import random
# Build the environment
env = gym.make("FrozenLake-v0")
# initialize the Q Table
action = env.action_space.n
state = env.observation_space.n
qtable = np.zeros((state, action))
# Set the hyperparameters
num_episode = 1000 # total number of episodes
alpha = 0.8 # learning rate
num_step = 100 # total number of steps per episode
gamma = 0.95 # discount rate
# Tradeoff between exploration and exploitation
epsilon = 1.0 # the extent of greed
max_epsilon = 1.0
min_epsilon = 0.01
decay_rate = 0.005 # exponential decay rate
# Q Learning Algorithm
rewards = []
for episode in range(num_episode):
# initialization for each episode
state = env.reset()
done = False # indicate whether this episode is over
sum_reward = 0
for step in range(num_step):
tradeoff = random.random()
if tradeoff < epsilon: # exploration
action = env.action_space.sample()
else:
action = np.argmax(qtable[state, :])
# next step:
new_state, reward, done, info = env.step(action)
qtable[state, action] = qtable[state, action] + alpha * (reward + gamma * np.max(qtable[new_state, :]) - qtable[state, action])
sum_reward += reward
if done == True:
break
state = new_state
rewards.append(sum_reward)
#decay epsilon, induce less and less exploration
epsilon = min_epsilon + (max_epsilon-min_epsilon)*np.exp(-decay_rate*epsilon)
print(qtable)
# use "cheat sheet" to play
env.reset()
for episode in range(15):
state = env.reset()
step = 0
done = False
print("****************************************************")
print("Episode: ", episode)
for step in range(num_step):
action = np.argmax(qtable[state,:])
new_state, reward, done, info = env.step(action)
if done:
env.render() # show the status of agent and environment
print("Number of steps", step)
break
state = new_state
env.close()