-
Notifications
You must be signed in to change notification settings - Fork 0
/
Elevator.m
212 lines (195 loc) · 7.01 KB
/
Elevator.m
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
classdef Elevator < rl.env.MATLABEnvironment
% RLENVDOUBLEINTEGRATORABSTRACT: Creates abstract class for Elevator
% control
properties
% Gain
Gain = 1.0
% time step
Ts = 1
% Final Point, will terminate if absolute position is exceeded
FinalPoint = 5
% Force to be applied
act = 0;
% Lower Bound y-axis
lby = -2
% Upper Bound y-axis
uby = 8
% Lower Bound vy
lbv = -6
% Upper Bound vy
ubv = 6
%Enabling Plotting Env Observer
PlotValue=0
% Reward Weights (in continuous time to remove dependence on sample
% time)
% reward = - integral(x'*Q*x + u'*R*u)
Q = diag([10 1]) %infinite horizon Q, used to find Qd
R = 0.01
end
properties (Access = protected)
MaxForce_ = Inf
end
properties (Dependent)
% Max force
MaxForce
end
properties
% system state [s,ds]'
State = zeros(1,2)
end
properties (Transient,Access = public)
Visualizer = []
end
properties (Access = private)
% reward = - sum_i(x_i'*Qd*x_i + u_i'*Rd*u_i)
% Note, these weights are derived from Q and R and will be updated
% after Q and R are set
Qd (2,2) double = eye(2)
Rd (1,1) double = 0.01
Nd (2,1) double = [0 0]'
% discrete system matrices
Ad (2,2) double
Bd (2,1) double
Cd (2,2) double
end
methods (Abstract, Access = protected)
force = getForce(this,force);
end
methods (Access = protected)
function setMaxForce_(this,val)
% define how the setting of max force will behave, which is
% different among continuous/discrete implementations of the
% environment
this.MaxForce_ = val;
this.ActionInfo.Values = [-val,val];
end
function updatePerformanceWeights(this)
% get the continuous linearized system
a = [0 1;0 0];
b = [0;this.Gain];
% Determine discrete equivalent of continuous cost function
% along with Ad,Bd matrices of discretized system. This is
% equivalent to the discretization in lqrd
q = this.Q;
r = this.R;
nn = [0 0]';
Nx = 2; Nu = 1;
n = Nx+Nu;
Za = zeros(Nx); Zb = zeros(Nx,Nu); Zu = zeros(Nu);
M = [ -a' Zb q nn
-b' Zu nn' r
Za Zb a b
Zb' Zu Zb' Zu];
phi = expm(M*this.Ts);
phi12 = phi(1:n,n+1:2*n);
phi22 = phi(n+1:2*n,n+1:2*n);
QQ = phi22'*phi12;
QQ = (QQ+QQ')/2; % Make sure QQ is symmetric
qd = QQ(1:Nx,1:Nx);
rd = QQ(Nx+1:n,Nx+1:n);
nd = QQ(1:Nx,Nx+1:n);
ad = phi22(1:Nx,1:Nx);
bd = phi22(1:Nx,Nx+1:n);
this.Rd = rd;
this.Qd = qd;
this.Ad = ad;
this.Bd = bd;
this.Nd = nd;
this.Cd = eye(2);
end
end
methods
function this = Elevator(ActionInfo)
% Define observation info
ObservationInfo = rlNumericSpec([2 1]);
ObservationInfo.Name = 'states';
ObservationInfo.Description = 's, ds';
this = [email protected](ObservationInfo,ActionInfo);
updatePerformanceWeights(this);
end
function set.State(this,state)
validateattributes(state,{'numeric'},{'finite','real','vector','numel',2},'','State');
this.State = state(:);
if this.PlotValue == 1
notifyEnvUpdated(this);
end
end
function set.FinalPoint(this,d)
validateattributes(d,{'numeric'},{'finite','real','positive','scalar'},'','FinalPoint');
this.FinalPoint = d;
notifyEnvUpdated(this);
end
function varargout = plot(this)
if isempty(this.Visualizer) || ~isvalid(this.Visualizer)
this.Visualizer = ElevatorVisualizer(this);
else
bringToFront(this.Visualizer);
end
if nargout
varargout{1} = this.Visualizer;
end
end
function set.MaxForce(this,val)
validateattributes(val,{'numeric'},{'real','positive','scalar'},'','MaxForce');
setMaxForce_(this,val);
end
function val = get.MaxForce(this)
val = this.MaxForce_;
end
function set.PlotValue(this,val)
validateattributes(val,{'numeric'},{'real','finite','size',[1 1]},'','PlotValue');
this.PlotValue = val;
end
function set.Q(this,val)
validateattributes(val,{'numeric'},{'real','finite','size',[2 2]},'','Q');
this.Q = val;
updatePerformanceWeights(this);
end
function set.R(this,val)
validateattributes(val,{'numeric'},{'real','finite','size',[1 1]},'','R');
this.R = val;
updatePerformanceWeights(this);
end
function set.Gain(this,val)
validateattributes(val,{'numeric'},{'finite','real','scalar'},'','Gain');
this.Gain = val;
updatePerformanceWeights(this);
end
function set.Ts(this,val)
validateattributes(val,{'numeric'},{'finite','real','positive','scalar'},'','Ts');
this.Ts = val;
updatePerformanceWeights(this);
end
function [nextobs,rwd,isTerminal] = step(this,s,action,plotValue)
this.PlotValue = plotValue;
action =getForce(this,action);
this.act = this.act+ action;
% Updating Enviroment State
x = s.';
xk1 = this.Ad*x+this.Bd*this.act;
% Saturating position and velocity
xk1(1) = min(max(xk1(1),this.lby),this.uby);
xk1(2) = min(max(xk1(2),this.lbv),this.ubv);
% Output Linearized System y(t)= [vel;pos] = C*x
nextobs = (this.Cd*xk1).';
this.State = xk1.';
% The episode will terminate under the following conditions:
% 1. the mass moves more than X units away from the origin
isdone = nextobs(1) == this.FinalPoint && nextobs(2) == 0 && this.act == 0;
%rwd = - x'*this.Qd*x - action'*this.Rd*action - 2*x'*this.Nd*action;
rwd = -1;
if nextobs(1) == this.lby || nextobs(1) == this.uby
nextobs(2) = 0;
this.act = 0;
this.State = nextobs;
rwd = -1e2;
end
if isdone == 1
this.act = 0;
isTerminal = true;
else
isTerminal = false;
end
end
end
end