-
Notifications
You must be signed in to change notification settings - Fork 0
/
Q2.py
111 lines (84 loc) · 3.32 KB
/
Q2.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
import numpy as np
import matplotlib.pyplot as plt
import argparse
from ActionSelect import actionSelect
from Rewards import *
def main(iterations, automaton, learning_rate):
iterations = iterations
config = {}
nArms = 10
lr = learning_rate
automatonType = automaton
nTimesChosen = np.zeros((10,))
trueDistrib = np.random.rand(nArms,)
print(trueDistrib)
# trueDistrib = [0.20667192, 0.21488821, 0.60482498, 0.30061569, 0.44926704, 0.98593824, 0.3833595, 0.98111571, 0.99304015, 0.96186659]
estDistrib = np.full((nArms,), 1/nArms)
probs = np.full((nArms,), 1/nArms)
# probs = np.linspace(0, 1, 11)[:-1]
optimalAction = np.argmax(trueDistrib)
# optimalAction = 8
nTimesOptimal = 0
averageReward = 0
optimalHistory = []
rewardHistory = []
config['nArms'] = nArms
config['trueDistrib'] = trueDistrib
config['estDistrib'] = estDistrib
config['nTimesChosen'] = nTimesChosen
config['probs'] = probs
for t in range(1, iterations):
config['t'] = t
armPulled = actionSelect('automaton', config)
config['armPulled'] = armPulled
rewardTable = rollReward(config)
reward = np.amax(rewardTable)
averageReward = averageReward * (t-1)/t + reward/t
if armPulled == optimalAction:
nTimesOptimal += 1
# print(nTimesOptimal)
if np.amax(rewardTable) == 1:
probs = updateAutomatonSuccess(armPulled, config['probs'], lr)
config['probs'] = probs
else:
if automatonType == 'linear':
pass
else:
probs = updateAutomatonFailure(armPulled, config['probs'], lr)
config['probs'] = probs
if t % 100 == 0:
optPercent = (nTimesOptimal/t) * 100
optimalHistory.append(optPercent)
rewardHistory.append(averageReward)
print("Optimal choice was made " + str(optPercent) + "% of the time")
print("Average Reward is: " + str(averageReward))
plotOptimal(optimalHistory)
plotReward(rewardHistory)
plotHistograms(trueDistrib, probs)
def plotHistograms(true, est):
plt.suptitle("True Reward Probability Distribution for 10-Armed Bandit Problem and i-th Arm Choice Probability Distribution Using Linear Learning Automata", fontsize=10)
plt.subplot(1, 2, 1)
plt.title("True Reward Distribution")
plt.plot(true)
plt.subplot(1, 2, 2)
plt.title("i-th Arm Choice Probability Distribution")
plt.plot(est)
plt.show()
def plotOptimal(hist):
plt.suptitle("Frequency of Optimal Actions for 10-Armed Bandit Problem using Linear Learning Automata", fontsize=14)
plt.plot(hist)
plt.show()
def plotReward(hist):
plt.suptitle("Average Reward Over Time for 10-Armed Bandit Problem using Linear Learning Automata", fontsize=14)
plt.plot(hist)
plt.show()
if __name__ == '__main__':
parser = argparse.ArgumentParser()
parser.add_argument('-i', '--iterations', default=100000, type=int)
parser.add_argument('-a', '--automaton', default='linear')
parser.add_argument('-lr', '--learning_rate', default=0.1, type=float)
args = parser.parse_args()
iterations = args.iterations
automaton = args.automaton
learning_rate = args.learning_rate
main(iterations, automaton, learning_rate)