-
Notifications
You must be signed in to change notification settings - Fork 0
/
animalBehavior.py
123 lines (99 loc) · 3.55 KB
/
animalBehavior.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
import MDP
import sys
import agent
import plotLearning as learn
import args
import animalMDP
def youngData():
with open('10279Responses.txt','r') as yngData:
yng = yngData.readlines()
data = [d.split(',') for d in yng][0]
return [int(num) for num in data]
def oldData():
with open('10282Responses.txt','r') as oldData:
old = oldData.readlines()
data = [d.split(',') for d in old][0]
return [int(num) for num in data]
def runEpisode(agent, environment,episode,f,animalData,trials,test):
"""
Runs a single episode and documents the behavior of the
agent in decisions.txt.
"""
returns = 0
totalDiscount = 1.0
environment.reset()
if 'startEpisode' in dir(agent): agent.startEpisode()
print("BEGINNING EPISODE: " + str(episode) + "\n")
while True:
state = environment.state
action = agent.getAction(state,agent)
# END IF IN A TERMINAL STATE
if environment.termination(state,trials):
print("EPISODE " + str(episode) + " COMPLETE: RETURN WAS " + str(returns) + "\n")
if test: f.write('1')
rat.accumTrainRewards=returns
return returns
# GET ACTION (USUALLY FROM AGENT)
if action == None:
raise 'Error: Agent returned None action'
# EXECUTE ACTION
nextState = environment.nextState(state,action)
reward = environment.reward(nextState,animalData,action)
if test:
if reward == 1:
f.write('1,')
else:
f.write('0,')
# UPDATE LEARNER
if 'observeTransition' in dir(agent):
agent.observeTransition(environment,state, action, reward)
returns += reward * totalDiscount
gamma = agent.getGamma(agent,state)
totalDiscount *= gamma
environment.state = nextState
######################################################
#### This runs q-learning with specified arguments####
######################################################
parameters = args.parseArgs(sys.argv[1:])
if parameters['m'] == 'old':
data = oldData()
if parameters['m'] == 'young':
data = youngData()
environment = animalMDP.animalMDP(data)
rat = agent.ratAgent(environment,parameters['e'],parameters['a'],parameters['d'])
startState= MDP.State('f2',1,0,0,None,0)
iterations = 100
with open('decisions.txt','w') as fi:
fi.write('0,')
if iterations > 0:
print
print "RUNNING", iterations, "EPISODES"
print
returns = 0
for episode in range(iterations):
if episode == iterations - 1:
returns += runEpisode(rat,environment,episode,fi,data,len(data),True)
rat.episodesSoFar += 1
if iterations > 0:
print
print "AVERAGE RETURNS FROM START STATE: "+str((returns+0.0) / iterations)
print
print
else:
returns += runEpisode(rat,environment,episode,fi,data,len(data),False)
rat.episodesSoFar += 1
if iterations > 0:
print
print "AVERAGE RETURNS FROM START STATE: "+str((returns+0.0) / iterations)
print
print
######################################################
###This plots results of the agent compared to rats###
######################################################
eps = args.eps(parameters)
alp = args.alp(parameters)
dis = args.dis(parameters)
# if parameters['p'] == 'polynomial':
# learn.plot(alp,eps,dis)
if parameters['p'] == 'movAvg':
learn.movAvg(alp,eps,dis)