% WLRA - Weighted Low-Rank Approximation.
% [PH,LH,INFO] = WLRA(D,M,S,OPT)
% Finads locally optimal solution to the problem: 
% Minimize over DH norm(S.*(D - DH),'fro') subject to rank(DH) <= M
%
% D - data matrix of dimension qxN, q < N
% M - rank constraint, M < q
% S - element-wise nonnegative weight matrix of dimension qxN 
% OPT  - options for the optimization algorithm
%   OPT.Method has possible values
%     'ap' - alternating projections (default) and
%     'vp' - variable projections (requires Optimization Toolbox)
%   OPT.Algorithm - algorithm for the variable projections
%     'fminunc' - quasi-Newton type method 
%     'lsqnonlin' - Levenberg-Marquardt method (default)
%   OPT.P       - initial approximation (default computed via svds)
%   OPT.TolFun  - convergence tolerance for the function value
%   OPT.MaxIter - maximum number of iterations 
%   OPT.Display - level of printed information
%     'iter' - prints the cost function value per iteration
% PH, LH - PH*LH is the rank-m approximation DH of D
% INFO - exit information:
%   INFO.err   - approximation error
%   INFO.time  - execution time
%   INFO.iter  - number of iterations performed
%   Note: INFO.iter = OPT.MaxIter indicates lack of convergence
%
% Note: S(i,j) = 0 implies that D(i,j) is missing. Missing elements
% are ignored and in particular can be set to 0. This convention is 
% convenient for large sparse data sets when SPARSE repr. is used.
function [p,l,info] = wlra(d,m,s,opt)

tic % measure the execution time
try opt.MaxIter;   catch opt.MaxIter   = 100;  end
try opt.TolFun;    catch opt.TolFun    = 1e-5; end
try opt.Display;   catch opt.Display   = 'off'; end
try opt.Method;    catch opt.Method    = 'ap'; end 
try opt.Algorithm; catch opt.Algorithm = 'lsqnonlin'; end 
try p = opt.P;     catch 
  switch lower(opt.Display)
    case 'iter', fprintf('Computing an initial approximation ...\n')
  end  
    p = lra(d,m); % low-rank approximation
end
switch lower(opt.Method)
case {'altpro','ap'} 
  [q,N] = size(d); % define q and N
  switch lower(opt.Display)
    case {'iter'}, sd = norm(s.*d,'fro')^2; % size of D
  end 

  % Main iteration loop
  k    = 0; % iteration counter
  cont = 1;
  while (cont)
    dd = []; % vec(D - DH)
    for j = 1:N
      J   = find(s(:,j)); 
      sJj = full(s(J,j));
      c   = sJj .* full(d(J,j));
      P   = sJj(:,ones(1,m)) .* p(J,:); % = diag(sJj) * p(J,:)
      l(:,j) = P \ c;
      dd  = [dd; c - P*l(:,j)];
    end
    ep = norm(dd)^2;
    dd = []; % vec(D - DH)
    for i = 1:q
      I   = find(s(i,:));
      sIi = full(s(i,I));
      r   = sIi .* full(d(i,I));
      L   = sIi(ones(m,1),:) .* l(:,I); % = l(:,I) * diag(sIi)
      p(i,:) = r / L;
      dd  = [dd, r - p(i,:)*L];
    end
    el = norm(dd)^2;
    k    = k + 1;
    re   = abs(el - ep) / el;
    cont = (k < opt.MaxIter) & (re > opt.TolFun) & (el > eps); 
    switch lower(opt.Display)
      case 'iter', fprintf('%2d : relative error = %18.8f\n', k, el/sd)
    end  
  end
  info.err  = el; % approximation error
  info.iter = k;  % number of iterations
case {'varpro','vp'}
  switch lower(opt.Algorithm)
    case {'fminunc'} 
      [p,err,f,info] = fminunc(@(p)wlra_err(p,d,s),p,opt);
    case {'lsqnonlin'}
      [p,rn,r,f,info] = lsqnonlin(@(p)wlra_err_mat(p,d,s),p,[],[]);
    otherwise
      error('Unknown algorithm %s.',opt.Algorithm) 
  end
  [info.err,l] = wlra_err(p,d,s); % in order to obtain the L parameter
otherwise
  error('Unknown method %s',opt.Method) 
end
info.time = toc; % execution time

% LRA - Low-Rank Approximation.
% [PH,LH] = LRA(D,M)
% Finads optimal solution to the problem: 
% Minimize over DH norm(D - DH, 'fro') subject to rank(DH) <= M
%
% D  - data matrix of dimension qxN, q < N
% M  - rank constraint, M < q
% PH, LH - PH*LH is the rank-m approximation DH of D
function [p,l] = lra(d,m)

d(isnan(d)) = 0; % Convert missing elements (NaNs) to 0s
[q,N] = size(d); % matrix dimension

if nargout == 1 
  d = triu(qr(d'))'; % = R,  where D = QR
  d = d(:,1:q);      % = R1, where R = [R1 0]
end
[u,s,v] = svds(d,m);
p = u(:,1:m); % basis for the optimal model for D
if nargout == 2
  s = diag(s); % column vector
  l = s(1:m,ones(1,N)) .* v(:,1:m)'; % diag(S) * V'
end % needed for the initial approximation
function [ep,l] = wlra_err(p,d,s)
N = size(d,2); m = size(p,2);
dd = []; % vec(D - DH)
for j = 1:N
  J   = find(s(:,j)); 
  sJj = full(s(J,j));
  c   = sJj .* full(d(J,j));
  P   = sJj(:,ones(1,m)) .* p(J,:); % = diag(sJj) * p(J,:)
  l(:,j) = P \ c;
  dd  = [dd; c - P*l(:,j)];
end
ep = norm(dd)^2; 
function dd = wlra_err_mat(p,d,s)
N = size(d,2); m = size(p,2); 
dd = []; % vec(D - DH)
for j = 1:N
  J   = find(s(:,j)); 
  sJj = full(s(J,j));
  c   = sJj .* full(d(J,j));
  P   = sJj(:,ones(1,m)) .* p(J,:); % = diag(sJj) * p(J,:)
  l(:,j) = P \ c;
  dd  = [dd; c - P*l(:,j)];
end
ep = norm(dd)^2;  % needed for the variable projections method
