Add files via upload

clementetienam · Sep 6, 2020 · a0ac8e9 · a0ac8e9
1 parent ed32cba
commit a0ac8e9
Show file tree

Hide file tree

Showing 56 changed files with 4,778 additions and 0 deletions.
diff --git a/cov/apx.m b/cov/apx.m
diff --git a/cov/apxGrid.m b/cov/apxGrid.m
diff --git a/cov/apxSparse.m b/cov/apxSparse.m
@@ -0,0 +1,61 @@
+function K = apxSparse(cov, xu, hyp, x, z)
+
+% apxSparse - Covariance function for sparse approximations.
+%
+% This is a special covariance function which handles some non-standard
+% requirements of sparse covariances
+% 1) it holds the inducing inputs in xu, and
+% 2) it returns covariances for prediction purposes. Note, that sparse
+% inference methods don't actually call this function.
+%
+% Any sparse approximation to the posterior Gaussian process is equivalent to
+% using the approximate covariance function:
+%   Kt = Q + s*diag(g); g = diag(K-Q); Q = Ku'*inv(Kuu+diag(snu2))*Ku;
+% where Ku and Kuu are covariances w.r.t. to inducing inputs xu and
+% snu2 is a vector with the noise variance of the inducing inputs.
+% As a default, we fix the standard deviation of the inducing inputs
+% snu = sfu/1e3 * ones(nu,1) to be a one per mil of the signal standard
+% deviation sfu^2 = trace(Kuu)/nu of the inducing inputs. Alternatively, the
+% noise of the inducing inputs can be treated as a hyperparameter by means
+% of the field hyp.snu that can contain a scalar value log(snu) or a vector
+% of inducing point specific log noise standard deviations.
+%
+% The parameter s from [0,1] (see [1]) interpolates between the two limiting
+% cases s=1: FITC [2] and s=0 VFE [3].
+%
+% The function is designed to be used with infGaussLik and infLaplace.
+%
+% The implementation exploits the Woodbury matrix identity
+%   inv(Kt+inv(W)) = D - D*Ku'*inv(Kuu+snu2*eye(nu)+Ku*D*Ku')*Ku*D, where
+%                    D = diag(d), and d = W./(s*g.*W+1)
+% and the Cholesky decomposition in order to be applicable to large datasets.
+% The computational complexity is O(n nu^2) where n is the number of data
+% points x and nu the number of inducing inputs in xu.
+%
+% The inducing points can be specified through
+%  1) the 2nd apxSparse parameter or by
+%  2) providing a hyp.xu hyperparameters to the inference function.
+% Note that 2) has priority over 1).
+% In case 2) is provided and derivatives dnlZ are requested, there will also be
+% a dnlZ.xu field allowing to optimise w.r.t. to the inducing points xu.
+%
+% [1] Bui, Yan & Turner, A Unifying Framework for Sparse GP Approximation
+%     using Power EP, 2016, https://arxiv.org/abs/1605.07066.
+% [2] Snelson & Ghahramani, Sparse GPs using pseudo-inputs, NIPS, 2006.
+% [3] Titsias, Var. Learning of Inducing Variables in Sparse GPs, AISTATS, 2009
+%
+% Copyright (c) by Carl Edward Rasmussen and Hannes Nickisch, 2016-12-13.
+%
+% See also COVFUNCTIONS.M, APX.M, INFLAPLACE.M, INFGAUSSLIK.M.
+
+if nargin<4, K = feval(cov{:}); return, end
+
+if size(xu,2) ~= size(x,2)
+  error('Dimensionality of inducing inputs must match training inputs')
+end
+
+if strcmp(z, 'diag')
+  K = feval(cov{:}, hyp, x,  z);                % normal evaluation for diagonal
+else
+  K = feval(cov{:}, hyp, xu, z);               % substitue inducing inputs for x
+end
diff --git a/cov/apxState.m b/cov/apxState.m
diff --git a/cov/covADD.m b/cov/covADD.m
@@ -0,0 +1,100 @@
+function [K,dK] = covADD(cov, hyp, x, z)
+
+% Additive covariance function using 1d base covariance functions 
+% cov_d(x_d,z_d;hyp_d) with individual hyperparameters hyp_d, d=1..D.
+%
+% k  (x,z) = \sum_{r \in R} sf^2_r k_r(x,z), where 1<=r<=D and
+% k_r(x,z) = \sum_{|I|=r} \prod_{i \in I} cov_i(x_i,z_i;hyp_i)
+%
+% hyp = [ hyp_1
+%         hyp_2
+%          ...
+%         hyp_D 
+%         log(sf_R(1))
+%          ...
+%         log(sf_R(end)) ]
+%
+% where hyp_d are the parameters of the 1d covariance function which are shared
+% over the different values of r=R(1),..,R(end) where 1<=r<=D.
+%
+% Usage: covADD({[1,3,4],cov}, ...) or
+%        covADD({[1,3,4],cov_1,..,cov_D}, ...).
+%
+% Please see the paper "Additive Gaussian Processes" by Duvenaud, Nickisch and 
+% Rasmussen, NIPS, 2011 for details.
+%
+% Copyright (c) by Carl Edward Rasmussen and Hannes Nickisch, 2017-02-21.
+%                  multiple covariance support contributed by Truong X. Nghiem
+%
+% See also COVFUNCTIONS.M.
+
+R = fix(cov{1});                            % only positive integers are allowed
+if min(R)<1, error('only positive R up to D allowed'), end
+nr = numel(R);                      % number of different degrees of interaction
+nc = numel(cov)-1; D = 1;         % number of provided covariances, D for indiv.
+for j=1:nc, if ~iscell(cov{j+1}), cov{j+1} = {cov{j+1}}; end, end
+if nc==1, nh = eval(feval(cov{2}{:}));  % no of hypers per individual covariance
+else nh = zeros(nc,1); for j=1:nc, nh(j) = eval(feval(cov{j+1}{:})); end, end
+
+if nargin<3                                  % report number of hyper parameters
+  if nc==1, K = ['D*', int2str(nh), '+', int2str(nr)];
+  else
+    K = ['(',int2str(nh(1))]; for ii=2:nc, K = [K,'+',int2str(nh(ii))]; end
+    K = [K, ')+', int2str(nr)];
+  end
+  return
+end
+if nargin<4, z = []; end                                   % make sure, z exists
+
+[n,D] = size(x);                                                % dimensionality
+if nc==1, nh = ones(D,1)*nh; cov = [cov(1),repmat(cov(2),1,D)]; end
+nch = sum(nh);                                      % total number of cov hypers
+sf2 = exp( 2*hyp(nch+(1:nr)) );         % signal variances of individual degrees
+
+[Kd,dKd] = Kdim(cov(2:end),nh,hyp(1:nch),x,z);  % eval dimensionwise covariances
+EE = elsympol(Kd,max(R));                 % Rth elementary symmetric polynomials
+K = 0; for ii=1:nr, K = K + sf2(ii)*EE(:,:,R(ii)+1); end      % sf2 weighted sum
+
+if nargout > 1
+  dK = @(Q) dirder(Q,Kd,dKd,EE,R,sf2);
+end
+
+function [dhyp,dx] = dirder(Q,Kd,dKd,EE,R,sf2)
+  D = numel(dKd); n = size(Q,1); nr = numel(R); dhyp = zeros(0,1);
+  if nargout > 1, dx = zeros(n,D); end                    % allocate if required
+  for j=1:D
+    % the final derivative is a sum of multilinear terms, so if only one term
+    % depends on the hyperparameter under consideration, we can factorise it 
+    % out and compute the sum with one degree less, the j-th elementary
+    % covariance depends on the hyperparameter batch hyp(j) and inputs x(:,j)
+    E = elsympol(Kd(:,:,[1:j-1,j+1:D]),max(R)-1);  %  R-1th elementary sym polyn
+    Qj = 0; for ii=1:nr, Qj = Qj + sf2(ii)*E(:,:,R(ii)); end  % sf2 weighted sum
+    if nargout > 1, [dhypj,dxj] = dKd{j}(Qj.*Q); dx(:,j) = dxj;
+    else dhypj = dKd{j}(Qj.*Q); end, dhyp = [dhyp; dhypj];
+  end
+  dhyp = [dhyp; 2*squeeze(sum(sum(bsxfun(@times,EE(:,:,R+1),Q),1),2)).*sf2];
+
+% evaluate dimensionwise covariances K
+function [K,dK] = Kdim(cov,nh,hyp,x,z)
+  [n,D] = size(x);                                              % dimensionality
+  xeqz = numel(z)==0; dg = strcmp(z,'diag') && numel(z)>0;      % determine mode
+
+  if dg                                                        % allocate memory
+    K = zeros(n,1,D);
+  else
+    if xeqz, K = zeros(n,n,D); else K = zeros(n,size(z,1),D); end
+  end
+  dK = cell(D,1);
+
+  for d=1:D
+    hyp_d = hyp(sum(nh(1:d-1))+(1:nh(d)));        % hyperparamter of dimension d
+    if dg
+      [K(:,:,d),dK{d}] = feval(cov{d}{:},hyp_d,x(:,d),'diag');
+    else
+      if xeqz
+        [K(:,:,d),dK{d}] = feval(cov{d}{:},hyp_d,x(:,d));
+      else
+        [K(:,:,d),dK{d}] = feval(cov{d}{:},hyp_d,x(:,d),z(:,d));
+      end
+    end
+  end
diff --git a/cov/covConst.m b/cov/covConst.m
@@ -0,0 +1,21 @@
+function varargout = covConst(varargin)
+
+% Wrapper for unit constant covariance function covOne.m.
+%
+% Covariance function for a constant function. The covariance function is
+% parameterized as:
+%
+% k(x,z) = sf^2
+%
+% The scalar hyperparameter is:
+%
+% hyp = [ log(sf) ]
+%
+% For more help on design of covariance functions, try "help covFunctions".
+%
+% Copyright (c) by Carl Edward Rasmussen and Hannes Nickisch, 2016-04-19.
+%
+% See also covOne.m.
+
+varargout = cell(max(nargout,1),1);
+[varargout{:}] = covScale({'covOne'},varargin{:});
diff --git a/cov/covCos.m b/cov/covCos.m
@@ -0,0 +1,56 @@
+function [K,dK] = covCos(hyp, x, z)
+
+% Stationary covariance function for a sinusoid with period p in 1d:
+%
+% k(x,z) = sf^2*cos(2*pi*(x-z)/p)
+%
+% where the hyperparameters are:
+%
+% hyp = [ log(p)
+%         log(sf) ]
+%
+% Note that covPeriodicNoDC converges to covCos as ell goes to infinity.
+%
+% Copyright (c) by James Robert Lloyd and Hannes Nickisch, 2016-11-05.
+%
+% See also COVFUNCTIONS.M, COVPERIODICNODC.M.
+
+if nargin<2, K = '2'; return; end                  % report number of parameters
+if nargin<3, z = []; end                                   % make sure, z exists
+xeqz = isempty(z); dg = strcmp(z,'diag');                       % determine mode
+
+[n,D] = size(x);
+if D~=1, error('Covariance is defined for 1d data only.'), end
+p   = exp(hyp(1));
+sf2 = exp(2*hyp(2));
+
+% precompute deviations and exploit symmetry of cos
+if dg                                                               % vector txx
+  T = zeros(size(x,1),1);
+else
+  if xeqz                                                 % symmetric matrix Txx
+    T = 2*pi/p*bsxfun(@plus,x,-x');
+  else                                                   % cross covariances Txz
+    T = 2*pi/p*bsxfun(@plus,x,-z');
+  end
+end
+
+K = sf2*cos(T);                                                    % covariances
+if nargout > 1
+  dK = @(Q) dirder(Q,K,T,x,p,sf2,dg,xeqz);
+end
+
+function [dhyp,dx] = dirder(Q,K,T,x,p,sf2,dg,xeqz)
+  dhyp = [sf2*(sin(T(:)).*T(:))'*Q(:); 2*Q(:)'*K(:)];
+  if nargout > 1
+    R = -sf2*pi/p * Q .* sin(T);
+    if dg
+      dx = zeros(size(x));
+    else
+      if xeqz
+        dx = 2*(sum(R,2)-sum(R,1)');
+      else
+        dx = 2*sum(R,2);
+      end
+    end
+  end
diff --git a/cov/covDiscrete.m b/cov/covDiscrete.m
@@ -0,0 +1,71 @@
+function [K,dK] = covDiscrete(s, hyp, x, z)
+
+% Covariance function for discrete inputs. Given a function defined on the
+% integers 1,2,3,..,s, the covariance function is parameterized as:
+%
+% k(x,z) = K_{xz},
+%
+% where K is a matrix of size (s x s).
+%
+% This implementation assumes that the inputs x and z are given as integers
+% between 1 and s, which simply index the matrix K.
+%
+% The hyperparameters specify the upper-triangular part of the Cholesky factor
+% of K, where the (positive) diagonal elements are specified by their logarithm.
+% If L = chol(K), so K = L'*L, then the hyperparameters are:
+%
+% hyp = [ log(L_11)
+%             L_21
+%         log(L_22)
+%             L_31
+%             L_32
+%         ..
+%         log(L_ss) ]
+%
+% The hyperparameters hyp can be generated from K using:
+% L = chol(K); L(1:(s+1):end) = log(diag(L));
+% hyp = L(triu(true(s)));
+%
+% The covariance matrix K is obtained from the hyperparameters hyp by:
+% L = zeros(s); L(triu(true(s))) = hyp(:);
+% L(1:(s+1):end) = exp(diag(L)); K = L'*L;
+%
+% This parametrization allows unconstrained optimization of K.
+%
+% For more help on design of covariance functions, try "help covFunctions".
+%
+% Copyright (c) by Roman Garnett, 2016-04-18.
+%
+% See also MEANDISCRETE.M, COVFUNCTIONS.M.
+
+if nargin==0, error('s must be specified.'), end           % check for dimension
+if nargin<3, K = num2str(s*(s+1)/2); return; end   % report number of parameters
+if nargin<4, z = []; end                                   % make sure, z exists
+xeqz = isempty(z); dg = strcmp(z,'diag');                       % determine mode
+if xeqz, z = x; end                                % make sure we have a valid z
+
+L = zeros(s); L(triu(true(s))) = hyp(:);                 % build Cholesky factor
+L(1:(s+1):end) = exp(diag(L));
+
+A = L'*L;   % A is a placeholder for K to avoid a name clash with the return arg
+if dg
+  K = A(sub2ind(size(A),fix(x),fix(x)));
+else
+  K = A(fix(x),fix(z));
+end
+
+if nargout > 1
+  dK = @(Q) dirder(Q,s,L,x,z,dg);                 % directional hyper derivative
+end
+
+function [dhyp,dx] = dirder(Q,s,L,x,z,dg)
+  nx = numel(x); Ix = sparse(fix(x),1:nx,ones(nx,1),s,nx);      % K==Ix'*L'*L*Iz
+  if dg
+    B = Ix*diag(Q)*Ix';
+  else
+    nz = numel(z); Iz = sparse(fix(z),1:nz,ones(nz,1),s,nz);
+    B = Iz*Q'*Ix';
+  end
+  B = L*(B+B'); B(1:(s+1):end) = diag(B).*diag(L);         % fix exp on diagonal
+  dhyp = B(triu(true(s)));
+  if nargout > 1, dx = zeros(size(x)); end