-
Notifications
You must be signed in to change notification settings - Fork 1
/
ilqr_main.m
262 lines (253 loc) · 9 KB
/
ilqr_main.m
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
clear; clc; %close all;
set(groot,'defaultLineLineWidth',1,'defaultAxesGridAlpha',.3,...
'defaultAxesGridColor','k','defaultAxesGridLineStyle',':');
addpath('ode');
%% Initialiation
opos0 = [.1;.6;0]; % [m,m,rad] initial object position
oposd = [.1;.5]; % [m,m] desired object position
% parameters
ilqr_params;
% weights
w = [1e4; 1e-3; 0; 1e-4];
% save flag
saveFlag = 0;
%% Initial configuration and control trajectory
qpos0 = [-pi/3; pi/4; pi/4; pi/2.3];
qvel0 = [0;0;0;0]; ovel = [0;0;0];
epos0(1,1) = 2/5 - (3*sin(qpos0(1) + qpos0(2) + qpos0(3) + qpos0(4)))/10 - (3*sin(qpos0(1) + qpos0(2)))/10 - (3*sin(qpos0(1)))/10 - (3*sin(qpos0(1) + qpos0(2) + qpos0(3)))/10;
epos0(2,1) = (3*cos(qpos0(1) + qpos0(2) + qpos0(3)))/10 + (3*cos(qpos0(1) + qpos0(2) + qpos0(3) + qpos0(4)))/10 + (3*cos(qpos0(1) + qpos0(2)))/10 + (3*cos(qpos0(1)))/10;
pc0(:,1:4) = opos0(1:2)*ones(1,4) + f_rot2d(opos0(3))*ec*eo/2;
nd0 = f_rot2d(opos0(3))*ec;
pv(:,1:4) = opos0(1:2)*ones(1,4) + f_rot2d(opos0(3))*vd*eo/2; pv(:,5) = pv(:,1);
for i = 1:ncc
phi_n0(i) = dot(pc0(:,i)-epos0,-nd0(:,i));
phi_e0 = norm(pc0(:,i)-epos0,2);
zeta0(i) = tanh(20*phi_e0);
phi0(i,1) = zeta0(i)*phi_e0 + (1-zeta0(i))*phi_n0(i);
end
x0(:,1) = [qpos0;qvel0;opos0;ovel;zeros(ncc,1)];
% initial controls
tau = 0*ones(ndof,1); kcon = kcon0*ones(ncc,1);
U = [tau;kcon]*ones(1,ncts);
% bounds on controls
lb = [-tau_lim*ones(size(tau)); zeros(size(kcon))];
ub = [ tau_lim*ones(size(tau)); kcon0*ones(size(kcon))];
% dimensions of the state and control vectors
global n m
n = size(x0,1); % [-] number of state variables
m = size(U,1); % [-] number of control variables
%% Iterative Linear Quadratic Regulator
% Parameters
lambda = 1; dlambda = 1; lambdaFactor = 1.6; lambdaMax = 1e10; lambdaMin = 1e-6;
Alpha = 10.^linspace(0,-3,11); zMin = 0; tolGrad = 1e-4; tolFun = 1e-6; maxIter = 100;
% Main loop
diffDone = 0;
t_ILQR = tic;
for iter = 1:maxIter
trace(iter).iter = iter;
% update the initial gamma values
gamma0 = U(5:8,1).*exp(-acon*phi0);
x0(15:18,1) = gamma0;
%% Differentiation
if ~diffDone
t_diff = tic;
[X,c,fx,fu,cx,cu,cxx,cxu,cuu] = f_diff_dyncst(x0,[U, nan(m,1)],oposd,w,h);
trace(iter).timingDiff = toc(t_diff);
diffDone = 1;
end
%% Backward pass
bwdPassDone = 0;
t_bwd_pass = tic;
while ~bwdPassDone
[diverge,kff,Kfb,Vx,Vxx,dV] = f_bwd_pass(fx,fu,cx,cu,cxx,cxu,cuu,lambda,m,n,ncts,ndts,lb,ub,U);
% Check for divergence
if diverge
fprintf('\tWARNING: Cholesky failed at timestep %d.\n',diverge);
dlambda = max(dlambda*lambdaFactor, lambdaFactor);
lambda = max(lambda*dlambda, lambdaMin);
if lambda > lambdaMax, break; end
continue
end
bwdPassDone = 1;
end
trace(iter).timingBwdPass = toc(t_bwd_pass);
% Check for small gradient
gradNorm = mean(max(abs(kff)./(abs(U)+1),[],1));
trace(iter).gradNorm = gradNorm;
if gradNorm < tolGrad && lambda < 1e-5
dlambda = min(dlambda/lambdaFactor, 1/lambdaFactor);
lambda = lambda*dlambda*(lambda>lambdaMin);
fprintf('\nSUCCESS: gradient norm < tolGrad\n');
break;
end
%% Line search
fwdPassDone = 0;
if bwdPassDone
t_fwd = tic;
na = length(Alpha);
for a = 1:na
alpha = Alpha(a);
[Xnew,Unew,cnew] = f_fwd_pass(x0,X,U,oposd,w,kff,Kfb,alpha);
dcost = c(end) - cnew(end);
expected = -alpha*(dV(1) + alpha*dV(2));
if expected > 0
z = dcost/expected;
else
z = sign(dcost);
warning('non-positive expected reduction: should not occur');
end
if z > zMin
fwdPassDone = 1;
break;
end
end
if ~fwdPassDone
alpha = nan;
end
trace(iter).timingFwdPass = toc(t_fwd);
end
%% Accept or reject changes & show results
% Print the header
if mod(iter,10) == 1
fprintf('%-12s','iteration','cost','reduction','expected','gradient','log10(lambda)');
fprintf('\n');
end
% Accept changes
if fwdPassDone
% Print iteration details
fprintf('%-12d%-12.6g%-12.3g%-12.3g%-12.3g%-12.1f\n', ...
iter, c(end), dcost, expected, gradNorm, log10(lambda));
% Decrease lambda
dlambda = min(dlambda/lambdaFactor, 1/lambdaFactor);
lambda = lambda*dlambda*(lambda>lambdaMin);
% Accept changes
U = Unew;
X = Xnew;
c = cnew;
diffDone = 0;
% Check stopping criteria
if dcost < tolFun
fprintf('\nSUCCESS: cost change < tolFun\n');
break;
end
% Reject changes
else
% Increase lambda
dlambda = max(dlambda*lambdaFactor, lambdaFactor);
lambda = max(lambda*dlambda, lambdaMin);
% Print status
fprintf('%-12d%-12s%-12.3g%-12.3g%-12.3g%-12.1f\n', ...
iter,'NO STEP', dcost, expected, gradNorm, log10(lambda));
% Check stopping criteria
if lambda > lambdaMax
fprintf('\nEXIT: lambda > lambdaMax\n');
break;
end
end
% Update trace
trace(iter).lambda = lambda;
trace(iter).dlambda = dlambda;
trace(iter).alpha = alpha;
trace(iter).improvement = dcost;
trace(iter).cost = c(end);
trace(iter).rdc_ratio = z;
% velocity cost
qvel = X(5:8,:); ovel = X(12:14,:);
trace(iter).cvel = w(2)*(qvel(:)'*qvel(:)+ovel(:)'*ovel(:));
end
trace(iter).timingILQR = toc(t_ILQR);
clear X; xl = x0;
ndpc = floor(tc/dt);
for l = 1:ncts
X(:,(l-1)*ndpc+1:l*ndpc+1) = f_ode_show(xl,U(:,l),dt,tc,acon,vd,ec,eo,mo,Jo);
xl = X(:,end);
end
ready = input('Are you ready?');
f_show_motion(X',[0:dt:tf],ndof,oposd,vd,eo);
gamma = X(15:18,1:end); disp(['integral of gamma = ',num2str(dt*sum(abs(gamma(:))))]);
trace(iter).totalGamma = dt*sum(abs(gamma(:)));
trace(iter).X = X; trace(iter).oposd = oposd;
if saveFlag
save('trace_ilqr.mat','trace');
end
%% Functions
function [Xnew,Unew,cnew] = f_fwd_pass(x0,X,U,oposd,w,kff,Kfb,alpha)
% Forward pass
ilqr_params; global n m;
Xnew = zeros(n,ndts); Unew = zeros(m,ncts); cnew = zeros(1,ndts);
Xnew(:,1) = x0;
ix = 1:n; iu = n+[1:m];
for l = 1:ncts
dx = X(:,l) - Xnew(:,l);
Unew(:,l) = U(:,l) + kff(:,l)*alpha - Kfb(:,:,l)*dx;
Xnew(:,l+1) = f_ode_test(Xnew(:,l),Unew(:,l),dt,tc,acon,vd,ec,eo,mo,Jo);
cnew(1,l) = f_ilqr_cost(Xnew(:,1:l),Unew(:,1:l),oposd,w,ndts);
end
cnew(1,ndts) = f_ilqr_cost(Xnew,Unew,oposd,w,ndts);
end
function [X,c,fx,fu,cx,cu,cxx,cxu,cuu] = f_diff_dyncst(x0,U,oposd,w,h)
% Numerical differentiation of dynamics and cost
ilqr_params; global n m;
X = x0;
ix = 1:n; iu = n+[1:m];
fJ = zeros(n,n+m,ncts); cJ = zeros(1,n+m,ndts); cJJ = zeros(n+m,n+m,ndts);
for l = 1:ndts
xl = X(:,l); ul = U(:,l); Ul = U(:,1:l);
if l < ndts
fJ_dyn = @(xu)f_ode_test(xu(ix),xu(iu),dt,tc,acon,vd,ec,eo,mo,Jo);
fJ(:,:,l) = f_num_diff(fJ_dyn,[xl;ul],h,cdiff);
end
fJ_cost = @(xu) f_ilqr_cost(xu(ix,:),xu(iu,:),oposd,w,ndts);
fJJ_cost = @(xu) squeeze(f_num_diff(fJ_cost,xu,h,cdiff));
cJ(:,:,l) = f_num_diff(fJ_cost,[X;Ul],h,cdiff);
cJJ(:,:,l) = f_num_diff(fJJ_cost,[X;Ul],h,cdiff);
c(1,l) = f_ilqr_cost(X,Ul,oposd,w,ndts);
X(:,l+1) = f_ode_test(xl,ul,dt,tc,acon,vd,ec,eo,mo,Jo);
end
fx = fJ(:,ix,:); fu = fJ(:,iu,:);
cx = squeeze(cJ(:,ix,:)); cu = squeeze(cJ(:,iu,:));
cJJ = 0.5*(cJJ+permute(cJJ,[2 1 3]));
cxx = cJJ(ix,ix,:); cxu = cJJ(ix,iu,:); cuu = cJJ(iu,iu,:);
end
function [diverge,kff,Kfb,Vx,Vxx,dV] = f_bwd_pass(fx,fu,cx,cu,cxx,cxu,cuu,lambda,m,n,ncts,ndts,lb,ub,U)
% backward pass
kff = zeros(m,ncts);
Kfb = zeros(m,n,ncts);
Vx = zeros(n,ndts);
Vxx = zeros(n,n,ndts);
Vx(:,ndts) = cx(:,ndts);
Vxx(:,:,ndts) = cxx(:,:,ndts);
dV = [0 0];
diverge = 0;
for l = ncts:-1:1
Qx = cx(:,l) + fx(:,:,l)'*Vx(:,l+1);
Qu = cu(:,l) + fu(:,:,l)'*Vx(:,l+1);
Qxx = cxx(:,:,l) + fx(:,:,l)'*Vxx(:,:,l+1)*fx(:,:,l);
Qux = cxu(:,:,l)' + fu(:,:,l)'*Vxx(:,:,l+1)*fx(:,:,l);
Quu = cuu(:,:,l) + fu(:,:,l)'*Vxx(:,:,l+1)*fu(:,:,l);
Quu_f = Quu + lambda*eye(m);
Qux_reg = cxu(:,:,l)' + fu(:,:,l)'*Vxx(:,:,l+1)*fx(:,:,l);
if isempty(lb) && isempty(ub)
[R,d] = chol(Quu_f);
if d, diverge = l; return; end
kK = -R\(R'\[Qu Qux_reg]);
kl = kK(:,1);
Kl = kK(:,2:end);
else
Ulb = lb-U(:,l); Uub = ub-U(:,l);
[kl,result,R,free] = boxQP(Quu_f,Qu,Ulb,Uub,kff);
if result < 1, diverge = l; return; end
Kl = zeros(m,n);
if any(free)
Lfree = -R\(R'\Qux_reg(free,:));
Kl(free,:) = Lfree;
end
end
dV = dV + [kl'*Qu, 0.5*kl'*Quu*kl];
Vx(:,l) = Qx + Kl'*Quu*kl + Kl'*Qu + Qux'*kl;
Vxx(:,:,l) = Qxx + Kl'*Quu*Kl + Kl'*Qux + Qux'*Kl;
Vxx(:,:,l) = 0.5*(Vxx(:,:,l) + Vxx(:,:,l)');
kff(:,l) = kl;
Kfb(:,:,l) = Kl;
end
end