/code_oth/mds.m
MATLAB | 1067 lines | 646 code | 127 blank | 294 comment | 66 complexity | 81506a89dff561fac821079d48ac9208 MD5 | raw file
- %MDS - Multidimensional Scaling - a variant of Sammon mapping
- %
- % [W,J,stress] = MDS(D,Y,OPTIONS)
- % [W,J,stress] = MDS(D,N,OPTIONS)
- %
- % INPUT
- % D Square (M x M) dissimilarity matrix
- % Y M x N matrix containing starting configuration, or
- % N Desired output dimensionality
- % OPTIONS Various parameters of the minimization procedure put into
- % a structure consisting of the following fields: 'q', 'optim',
- % 'init','etol','maxiter', 'isratio', 'itmap', 'st' and 'inspect'
- % (default:
- % OPTIONS.q = 0
- % OPTIONS.optim = 'pn'
- % OPTIONS.init = 'cs'
- % OPTIONS.etol = 1e-6 (the precise value depends on q)
- % OPTIONS.maxiter = inf
- % OPTIONS.isratio = 0
- % OPTIONS.itmap = 'yes'
- % OPTIONS.st = 1
- % OPTIONS.inspect = 2).
- %
- % OUTPUT
- % W Multidimensional scaling mapping
- % J Index of points removed before optimization
- % stress Vector of stress values
- %
- % DESCRIPTION
- % Finds a nonlinear MDS map (a variant of the Sammon map) of objects
- % represented by a symmetric distance matrix D with zero diagonal, given
- % either the dimensionality N or the initial configuration Y. This is done
- % in an iterative manner by minimizing the Sammon stress:
- %
- % e = 1/(sum_{i<j} D_{ij}^(Q+2)) sum_{i<j} (D_{ij} - DY_{ij})^2 * D_{ij}^Q
- %
- % where DY is the distance matrix of Y, which should approximate D. If D(i,j)
- % = 0 for any different points i and j, then one of them is superfluous. The
- % indices of these points are returned in J.
- %
- % OPTIONS is an optional variable, using which the parameters for the mapping
- % can be controlled. It may contain the following fields:
- %
- % Q Stress measure to use (see above): -2,-1,0,1 or 2.
- % INIT Initialisation method for Y: 'randp', 'randnp', 'maxv', 'cs'
- % or 'kl'. See MDS_INIT for an explanation.
- % OPTIM Minimization procedure to use: 'pn' for Pseudo-Newton or
- % 'scg' for Scaled Conjugate Gradients.
- % ETOL Tolerance of the minimization procedure. Usually, it should be
- % MAXITER in the order of 1e-6. If MAXITER is given (see below), then the
- % optimization is stopped either when the error drops below ETOL or
- % MAXITER iterations are reached.
- % ISRATIO Indicates whether a ratio MDS should be performed (1) or not (0).
- % If ISRATIO is 1, then instead of fitting the dissimilarities
- % D_{ij}, A*D_{ij} is fitted in the stress function. The value A
- % is estimated analytically in each iteration.
- % ITMAP Determines the way new points are mapped, either in an iterative
- % manner ('yes') by minimizing the stress; or by a linear projection
- % ('no').
- % ST Status, determines whether after each iteration the stress should
- % INSPECT be printed on the screen (1) or not (0). When INSPECT > 0,
- % ST = 1 and the mapping is onto 2D or larger, then the progress
- % is plotted during the minimization every INSPECT iterations.
- %
- % Important:
- % 1. It is assumed that D either is or approximates a Euclidean distance
- % matrix, i.e. D_{ij} = sqrt (sum_k(x_i - x_j)^2).
- % 2. Missing values can be handled; they should be marked by 'NaN' in D.
- %
- % EXAMPLES:
- % opt.optim = 'scg';
- % opt.init = 'cs';
- % D = sqrt(distm(a)); % Compute the Euclidean distance dataset of A
- % w1 = mds(D,2,opt); % An MDS map onto 2D initialized by Classical Scaling,
- % % optimized by a Scaled Conjugate Gradients algorithm
- % n = size(D,1);
- % y = rand(n,2);
- % w2 = mds(D,y,opt); % An MDS map onto 2D initialized by random vectors
- %
- % z = rand(n,n); % Set around 40% of the random distances to NaN, i.e.
- % z = (z+z')/2; % not used in the MDS mapping
- % z = find(z <= 0.6);
- % D(z) = NaN;
- % D(1:n+1:n^2) = 0; % Set the diagonal to zero
- % opt.optim = 'pn';
- % opt.init = 'randnp';
- % opt.etol = 1e-8; % Should be high, as only some distances are used
- % w3 = mds(D,2,opt); % An MDS map onto 2D initialized by a random projection
- %
- % REFERENCES
- % 1. M.F. Moler, A Scaled Conjugate Gradient Algorithm for Fast Supervised
- % Learning', Neural Networks, vol. 6, 525-533, 1993.
- % 2. W.H. Press, S.A. Teukolsky, W.T. Vetterling and B.P. Flannery,
- % Numerical Recipes in C, Cambridge University Press, Cambridge, 1992.
- % 3. I. Borg and P. Groenen, Modern Multidimensional Scaling, Springer
- % Verlag, Berlin, 1997.
- % 4. T.F. Cox and M.A.A. Cox, Multidimensional Scaling, Chapman and Hall,
- % London, 1994.
- %
- % SEE ALSO
- % MAPPINGS, MDS_STRESS, MDS_INIT, MDS_CS
- %
- % Copyright: Elzbieta Pekalska, Robert P.W. Duin, ela@ph.tn.tudelft.nl, 2000-2003
- % Faculty of Applied Sciences, Delft University of Technology
- %
- function [w,J,err,opt,y] = mds(D,y,options)
- if (nargin < 3)
- options = []; % Will be filled by MDS_SETOPT, below.
- end
- opt = mds_setopt(options);
- if (nargin < 2) | (isempty(y))
- prwarning(2,'no output dimensionality given, assuming N = 2.');
- y = 2;
- end
- % No arguments given: return an untrained mapping.
- if (nargin < 1) | (isempty(D))
- w = mapping(mfilename,{y,opt,[],[],[],[]});
- w = setname(w,'MDS');
- return
- end
- % YREP contains representative objects in the projected MDS space, i.e.
- % for which the mapping exists. YREP is empty for the original MDS, since
- % no projection is available yet.
- yrep = [];
- if (isdataset(D) | isa(D,'double'))
- [m,mm] = size(D);
- % Convert D to double, but retain labels in LAB.
- if (isdataset(D)), lab = getlab(D); D = +D; else, lab = ones(m,1); end;
- if (ismapping(y))
- % The MDS mapping exists; this means that YREP has already been stored.
- pars = getdata(y); [k,c] = size(y);
- y = []; % Empty, should now be found.
- yrep = pars{1}; % There exists an MDS map, hence YREP is stored.
- opt = pars{2}; % Options used for the mapping.
- II = pars{3}; % Index of non-repeated points in YREP.
- winit = pars{4}; % The Classical Scaling map, if INIT = 'cs'.
- v = pars{5}; % Weights used for adding new points if ITMAP = 'no'.
- n = c; % Number of dimensions of the projected space.
- % Initialization by 'cs' is not possible when there is no winit
- % (i.e. the CS map) and new points should be added.
- if (strcmp(opt.init,'cs')) & (isempty(winit))
- prwarning(2,'OPTIONS.init = cs is not possible when adding points; using kl.');
- opt.init = 'kl';
- end
- % If YREP is a scalar, we have an empty mapping.
- if (max(size(yrep)) == 1)
- y = yrep;
- yrep = [];
- end
- % Check whether D is a matrix with the zero diagonal for the existing map.
- if (m == mm) & (length(intersect(find(D(:)<eps),1:m+1:(m*mm))) >= m)
- w = yrep; % D is the same matrix as in the projection process;
- return % YREP is then the solution
- end
- if (length(pars) < 6) | (isempty(pars{6}))
- yinit = [];
- else
- yinit = pars{6}; % Possible initial configuration for points to
- % be added to an existing map
- if (size(yinit,1) ~= size(D,1))
- prwarning(2,'the size of the initial configuration does not match that of the dissimilarity matrix, using random initialization.')
- yinit =[];
- end
- end
- else
- % No MDS mapping available yet; perform the checks.
- if (~issym(D,1e-12))
- prwarning(2,'D is not a symmetric matrix; will average.');
- D = (D+D')/2;
- end
- % Check the number of zeros on the diagonal
- if (any(abs(diag(D)) > 0))
- error('D should have a zero diagonal');
- end
- end
- else % D is neither a dataset nor a matrix of doubles
- error('D should be a dataset or a matrix of doubles.');
- end
- if (~isempty(y))
- % Y is the initial configuration or N, no MDS map exists yet;
- % D is a square matrix.
-
- % Remove identical points, i.e. points for which D(i,j) = 0 for i ~= j.
- % I contains the indices of points left for the MDS mapping, J those
- % of the removed points and P those of the points left in I which were
- % identical to those removed.
- [I,J,P] = mds_reppoints(D);
- D = D(I,I);
- [ni,nc] = size(D);
- % NANID is an extra field in the OPTIONS structure, containing the indices
- % of NaN values (= missing values) in distance matrix D.
- opt.nanid = find(isnan(D(:)) == 1);
- % Initialise Y.
- [m2,n] = size(y);
- if (max(m2,n) == 1) % Y is a scalar, hence really is a dimensionality N.
- n = y;
- [y,winit] = mds_init(D,n,opt.init);
- else
- if (mm ~= m2)
- error('The matrix D and the starting configuration Y should have the same number of columns.');
- end
- winit = [];
- y = +y(I,:);
- end
- % The final number of true distances is:
- no_dist = (ni*(ni-1) - length(opt.nanid))/2;
- else
- % This happens only if we add extra points to an existing MDS map.
- % Remove identical points, i.e. points for which D(i,j) = 0 for i ~= j.
- % I contains the indices of points left for the MDS mapping, J those
- % of the removed points and P those of the points left in I which were
- % identical to those removed.
- [I,J,P] = mds_reppoints(D(:,II));
- D = D(I,II);
- [ni,nc] = size(D);
- yrep = yrep(II,:);
- n = size(yrep,2);
- % NANID is an extra field in the OPTIONS structure, containing the indices
- % of NaN values (= missing values) in distance matrix D.
- opt.nanid = find(isnan(D(:)));
- % Initialise Y. if the new points should be added in an iterative manner.
- [m2,n] = size(yrep);
- if (~isempty(yinit)) % An initial configuration exists.
- y = yinit;
- elseif (strcmp(opt.init, 'cs')) & (~isempty(winit))
- y = D*winit;
- else
- y = mds_init(D,n,opt.init);
- end
- if (~isempty(opt.nanid)) % Rescale.
- scale = (max(yrep)-min(yrep))./(max(y)-min(y));
- y = y .* repmat(scale,ni,1);
- end
- % The final number of true distances is:
- no_dist = (ni*nc - length(opt.nanid));
- end
- % Check whether there is enough data left.
- if (~isempty(opt.nanid))
- if (n*ni+2 > no_dist),
- error('There are too many missing distances: it is not possible to determine the MDS map.');
- end
- if (strcmp (opt.itmap,'no'))
- opt.itmap = 'yes';
- prwarning(1,'Due to the missing values, the projection can only be iterative. OPTIONS are changed appropriately.')
- end
- end
- if (opt.inspect > 0)
- opt.plotlab = lab(I,:); % Labels to be used for plotting in MDS_SAMMAP.
- end
- if (isempty(yrep)) | (~isempty(yrep) & strcmp(opt.itmap, 'yes'))
- % Either no MDS exists yet OR new points should be mapped in an iterative manner.
- printinfo(opt);
- [yy,err] = mds_sammap(D,y,yrep,opt);
- % Define the linear projection of distances.
- v = [];
- if (isempty(yrep)) & (isempty(opt.nanid))
- if (rank(D) < m)
- v = pinv(D)*yy;
- else
- v = D \ yy;
- end
- end
- else
- % New points should be added by a linear projection of dissimilarity data.
- yy = D*v;
- end
- % Establish the projected configuration including the removed points.
- y = zeros(m,n); y(I,:) = +yy;
- if (~isempty(J))
- if (~isempty(yrep))
- y(J,:) = +yrep(II(P),:);
- else
- for k=length(J):-1:1 % J: indices of removed points.
- y(J(k),:) = y(P(k),:); % P: indices of points equal to points in J.
- end
- end
- end
- % In the definition step: shift the obtained configuration such that the
- % mean lies at the origin.
- if (isempty(yrep))
- y = y - ones(length(y),1)*mean(y);
- y = dataset(y,lab);
- else
- w = dataset(y,lab);
- return;
- end
- % In the definition step: the mapping should be stored.
- opt = rmfield(opt,'nanid'); % These fields are to be used internally only;
- opt = rmfield(opt,'plotlab'); % not to be set up from outside
- w = mapping(mfilename,'trained',{y,opt,I,winit,v,[]},[],m,n);
- w = setname(w,'MDS');
- return
- % **********************************************************************************
- % Extra functions
- % **********************************************************************************
- % PRINTINFO(OPT)
- %
- % Prints progress information to the file handle given by OPT.ST.
- function printinfo(opt)
- fprintf(opt.st,'Sammon mapping, error function with the parameter q=%d\n',opt.q);
- switch (opt.optim)
- case 'pn',
- fprintf(opt.st,'Minimization by Pseudo-Newton algorithm\n');
- case 'scg',
- fprintf(opt.st,'Minimization by Scaled Conjugate Gradients algorithm\n');
- otherwise
- error(strcat('Possible initialization methods: pn (Pseudo-Newton), ',...
- 'or scg (Scaled Conjugate Gradients).'));
- end
- return
- % **********************************************************************************
- % [I,J,P] = MDS_REPPOINTS(D)
- %
- % Finds the indices of repeated/left points. J contains the indices of
- % repeated points in D. This means that for each j in J, there is a point
- % k ~= j such that D(J(j),k) = 0. I contains the indices of the remaining
- % points, and P those of the points in I that were identical to those in J.
- % Directly used in MDS routine.
- function [I,J,P] = mds_reppoints(D)
- epsilon = 1e-20; % Differences smaller than this are assumed to be zero.
- [m,mm] = size(D);
- I = 1:m; J = []; P = [];
- if (m == mm) & (all(abs(D(1:m+1:end)) <= epsilon))
- K = intersect (find (triu(ones(m),1)), find(D < epsilon));
- if (~isempty(K))
- P = mod(K,m);
- J = fix(K./m) + 1;
- I(J) = [];
- end
- else
- [J,P] = find(D<=epsilon);
- I(J) = [];
- end
- return;
- % **********************************************************************************
- % MDS_SETOPT Sets parameters for the MDS mapping
- %
- % OPT = MDS_SETOPT(OPT_GIVEN)
- %
- % INPUT
- % OPT_GIVEN Parameters for the MDS mapping, described below (default: [])
- %
- % OUTPUT
- % OPT Structure of chosen options; if OPT_GIVEN is empty, then
- % OPT.q = 0
- % OPT.optim = 'pn'
- % OPT.init = 'cs'
- % OPT.etol = 1e-6 (the precise value depends on q)
- % OPT.maxiter = inf
- % OPT.itmap = 'yes'
- % OPT.isratio = 0
- % OPT.st = 1
- % OPT.inspect = 2
- %
- % DESCRIPTION
- % Parameters for the MDS mapping can be set or changed. OPT_GIVEN consists
- % of the following fields: 'q', 'init', 'optim', 'etol', 'maxiter','itmap',
- % 'isratio', 'st' and 'inspect'. OPTIONS can include all the fields or some
- % of them only. The fields of OPT have some default values, which can be
- % changed by the OPT_GIVEN field values. If OPT_GIVEN is empty, then OPT
- % contains all default values. For a description of the fields, see MDS.
- %
- % Copyright: Elzbieta Pekalska, ela@ph.tn.tudelft.nl, 2000-2003
- % Faculty of Applied Sciences, Delft University of Technology
- %
- function opt = mds_setopt (opt_given)
- opt.q = 0;
- opt.init = 'cs';
- opt.optim = 'pn';
- opt.st = 1;
- opt.itmap = 'yes';
- opt.maxiter = inf;
- opt.inspect = 2;
- opt.etol = inf;
- opt.isratio = 0;
- opt.nanid = []; % Here are some extra values; set up in the MDS routine.
- opt.plotlab = []; % Not to be changed by the user.
- if (~isempty(opt_given))
- if (~isstruct(opt_given))
- error('OPTIONS should be a structure with at least one of the following fields: q, init, etol, optim, maxiter, itmap, isratio, st or inspect.');
- end
- fn = fieldnames(opt_given);
- if (~all(ismember(fn,fieldnames(opt))))
- error('Wrong field names; valid field names are: q, init, optim, etol, maxiter, itmap, isratio, st or inspect.')
- end
- for i = 1:length(fn)
- opt = setfield(opt,fn{i},getfield(opt_given,fn{i}));
- end
- end
- if (isempty(intersect(opt.q,-2:1:2)))
- error ('OPTIONS.q should be -2, -1, 0, 1 or 2.');
- end
- if (opt.maxiter < 2)
- error ('OPTIONS.iter should be at least 1.');
- end
- if (isinf(opt.etol))
- switch (opt.q) % Different defaults for different stresses.
- case -2,
- opt.etol = 1e-6;
- case {-1,0}
- opt.etol = 10*sqrt(eps);
- case {1,2}
- opt.etol = sqrt(eps);
- end
- elseif (opt.etol <= 0) | (opt.etol >= 0.1)
- error ('OPTIONS.etol should be positive and smaller than 0.1.');
- end
- if (~ismember(opt.optim, {'pn','scg'}))
- error('OPTIONS.optim should be pn or scg.');
- end
- if (~ismember(opt.itmap, {'yes','no'}))
- error('OPTIONS.itmap should be yes or no.');
- end
- return
- % **********************************************************************************
- % MDS_SAMMAP Sammon iterative nonlinear mapping for MDS
- %
- % [YMAP,ERR] = MDS_SAMMAP(D,Y,YREP,OPTIONS)
- %
- % INPUT
- % D Square (M x M) dissimilarity matrix
- % Y M x N matrix containing starting configuration, or
- % YREP Configuration of the representation points
- % OPTIONS Various parameters of the minimization procedure put into
- % a structure consisting of the following fields: 'q', 'optim',
- % 'init','etol','maxiter', 'isratio', 'itmap', 'st' and 'inspect'
- % (default:
- % OPTIONS.q = 0
- % OPTIONS.optim = 'pn'
- % OPTIONS.init = 'cs'
- % OPTIONS.etol = 1e-6 (the precise value depends on q)
- % OPTIONS.maxiter = inf
- % OPTIONS.isratio = 0
- % OPTIONS.itmap = 'yes'
- % OPTIONS.st = 1
- % OPTIONS.inspect = 2).
- %
- % OUTPUT
- % YMAP Mapped configuration
- % ERR Sammon stress
- %
- % DESCRIPTION
- % Maps the objects given by a symmetric distance matrix D (with a zero
- % diagonal) onto, say, an N-dimensional configuration YMAP by an iterative
- % minimization of a variant of the Sammon stress. The minimization starts
- % from the initial configuration Y; see MDS_INIT.
- %
- % YREP is the Sammon configuration of the representation set. It is used
- % when new points have to be projected. In other words, if D is an M x M
- % symmetric distance matrix, then YREP is empty; if D is an M x N matrix,
- % then YMAP is sought such that D can approximate the distances between YMAP
- % and YREP.
- %
- % Missing values can be handled by marking them by NaN in D.
- %
- % SEE ALSO
- % MAPPINGS, MDS, MDS_CS, MDS_INIT, MDS_SETOPT
- %
- % Copyright: Elzbieta Pekalska, ela@ph.tn.tudelft.nl, 2000-2003
- % Faculty of Applied Sciences, Delft University of Technology
- %
- function [y,err] = mds_sammap(Ds,y,yrep,opt)
- if (nargin < 4)
- opt = []; % Will be filled by MDS_SETOPT, below.
- end
- if isempty(opt), opt = mds_setopt(opt); end
- if (nargin < 3)
- yrep = [];
- end
- % Extract labels and calculate distance matrix.
- [m,n] = size(y);
- if (~isempty(yrep))
- replab = getlab(yrep);
- yrep = +yrep;
- D = sqrt(distm(y,yrep));
- else
- D = sqrt(distm(y));
- yrep = [];
- replab = [];
- end
- if (isempty(opt.plotlab))
- opt.plotlab = ones(m,1);
- end
- it = 0; % Iteration number.
- eold = inf; % Previous stress (error).
- % Calculate initial stress.
- [e,a]= mds_samstress(opt.q,y,yrep,Ds,D,opt.nanid,opt.isratio); err = e;
- fprintf(opt.st,'iteration: %4i stress: %3.8d\n',it,e);
- switch (opt.optim)
- case 'pn', % Pseudo-Newton minimization.
- epr = e; % Previous values of E, EOLD for line search algorithm.
- eepr = e;
- add = 1e-4; % Avoid G2 equal 0; see below.
-
- BETA = 1e-3; % Parameter for line search algorithm.
- lam1 = 0;
- lam2 = 1; % LAM (the step factor) lies in (LAM1, LAM2).
- LMIN = 1e-10; % Minimum acceptable value for LAM in line search.
- lambest = 1e-4; % LAM = LAMBEST, if LAM was not found.
- % Loop until the error change falls below the tolerance or until
- % the maximum number of iterations is reached.
- while (abs(e-eold) >= opt.etol*(1 + abs(e))) & (it < opt.maxiter)
- % Plot progress if requested.
- if (opt.st > 0) & (opt.inspect > 0) & (mod(it,opt.inspect) == 0)
- % if (it == 0), figure(1); clf; end
- mds_plot(y,yrep,opt.plotlab,replab,e);
- end
- yold = y; eold = e;
- % Calculate the stress.
- [e,a] = mds_samstress (opt.q,yold,yrep,Ds,[],opt.nanid,opt.isratio);
- % Calculate gradients and pseudo-Newton update P.
- [g1,g2,cc] = mds_gradients (opt.q,yold,yrep,a*Ds,[],opt.nanid);
- p = (g1/cc)./(abs(g2/cc) + add);
- slope = g1(:)' * p(:);
- lam = 1;
- % Search for a suitable step LAM using a line search.
- while (1)
- % Take a step and calculate the delta error DE.
- y = yold + lam .* p;
- [e,a] = mds_samstress(opt.q,y,yrep,Ds,[],opt.nanid,opt.isratio);
- de = e - eold;
- % Stop if the condition for a suitable lam is fulfilled.
- if (de < BETA * lam * slope)
- break;
- end
-
- % Try to find a suitable step LAM.
- if (lam ~= 1)
- r1 = de - lam*slope;
- r2 = epr - eepr - lam2*slope;
- aa = (2*de + r1/lam^2 - r2/lam2^2)/(lam2-lam1)^3;
- bb = ((-lam2*r1)/lam^2 +(lam*r2)/lam2^2)/(lam2-lam1)^2;
- if (abs(aa) <= eps)
- lamtmp = -0.5 * slope/bb;
- else
- lamtmp = (-bb + sqrt(max(0,(bb^2 - 3*aa*slope))))/(3*aa);
- end
- % Prevent LAM from becoming too large.
- if (lamtmp > 0.5 * lam), lamtmp = 0.5 * lam; end
- else
- lamtmp = -0.5 * slope/(e - eold - slope);
- end
- % Prevent LAM from becoming too small.
- lam2 = lam;
- lam = max(lamtmp, 0.1*lam);
- epr = e;
- eepr = eold;
- if (lam < LMIN)
- y = yold + lambest .* p;
- [e,a] = mds_samstress(opt.q,y,yrep,Ds,[],opt.nanid,opt.isratio);
- break;
- end
- end
- it = it + 1;
- err = [err; e];
- fprintf(opt.st,'iteration: %4i stress: %3.8d\n',it,e);
- end
- case 'scg', % Scaled Conjugate Gradient minimization.
- sigma0 = 1e-8;
- lambda = 1e-8; % Regularization parameter.
- lambda1 = 0;
- % Calculate initial stress and direction of decrease P.
- [e,a] = mds_samstress(opt.q,y,yrep,Ds,D,opt.nanid,opt.isratio);
- g1 = mds_gradients(opt.q,y,yrep,a*Ds,D,opt.nanid); % Gradient
- p = -g1; % Direction of decrease
- gnorm2 = g1(:)' * g1(:);
- success = 1;
- % Sanity check.
- if (gnorm2 < 1e-15)
- prwarning(2,['Gradient is nearly zero: ' gnorm2 ' (unlikely); initial configuration is the right one.']);
- return
- end
- % Loop until the error change falls below the tolerance or until
- % the maximum number of iterations is reached.
- while (abs(eold - e) > opt.etol * (1 + e)) & (it < opt.maxiter)
- g0 = g1; % Previous gradient
- pnorm2 = p(:)' * p(:);
- eold = e;
- % Plot progress if requested.
- if (opt.inspect > 0) & (mod(it,opt.inspect) == 0)
- % if (it == 0), figure(1); clf; end
- mds_plot(y,yrep,opt.plotlab,replab,e);
- end
- if (success)
- sigma = sigma0/sqrt(pnorm2); % SIGMA: a small step from y to yy
- yy = y + sigma .*p;
- [e,a] = mds_samstress(opt.q,yy,yrep,Ds,[],opt.nanid,opt.isratio);
- g2 = mds_gradients(opt.q,yy,yrep,a*Ds,[],opt.nanid); % Gradient for yy
- s = (g2-g1)/sigma; % Approximation of Hessian*P directly, instead of computing
- delta = p(:)' * s(:); % the Hessian, since only DELTA = P'*Hessian*P is in fact needed
- end
- % Regularize the Hessian indirectly to make it positive definite.
- % DELTA is now computed as P'* (Hessian + regularization * Identity) * P
- delta = delta + (lambda1 - lambda) * pnorm2;
- % Indicate if the Hessian is negative definite; the regularization above was too small.
- if (delta < 0)
- lambda1 = 2 * (lambda - delta/pnorm2); % Now the Hessian will be positive definite
- delta = -delta + lambda * pnorm2; % This is obtained after plugging lambda1
- lambda = lambda1; % into the formulation a few lines above.
- end
- mi = - p(:)' * g1(:);
- yy = y + (mi/delta) .*p; % mi/delta is a step size
- [ee,a] = mds_samstress(opt.q,yy,yrep,Ds,[],opt.nanid,opt.isratio);
- % This minimization procedure is based on the second order approximation of the
- % stress function by using the gradient and the Hessian approximation. The Hessian
- % is regularized, but maybe not sufficiently. The ratio Dc (<=1) below indicates
- % a proper approximation, if Dc is close to 1.
- Dc = 2 * delta/mi^2 * (e - ee);
- e = ee;
- % If Dc > 0, then the stress can be successfully decreased.
- success = (Dc >= 0);
- if (success)
- y = yy;
- [ee,a] = mds_samstress(opt.q,yy,yrep,Ds,[],opt.nanid,opt.isratio);
- g1 = mds_gradients(opt.q,yy,yrep,a*Ds,[],opt.nanid);
- gnorm2 = g1(:)' * g1(:);
- lambda1 = 0;
- beta = max(0,(g1(:)'*(g1(:)-g0(:)))/mi);
- p = -g1 + beta .* p; % P is a new conjugate direction
- if (g1(:)'*p(:) >= 0 | mod(it-1,n*m) == 0),
- p = -g1; % No much improvement, restart
- end
- if (Dc >= 0.75)
- lambda = 0.25 * lambda; % Make the regularization smaller
- end
- it = it + 1;
- err = [err; e];
- fprintf (opt.st,'iteration: %4i stress: %3.8d\n',it,e);
- else % Dc < 0
- % Note that for Dc < 0, the iteration number IT is not increased,
- % so the Hessian is further regularized until SUCCESS equals 1.
- lambda1 = lambda;
- end
- % The approximation of the Hessian was poor or the stress was not
- % decreased (Dc < 0), hence the regularization lambda is enlarged.
- if (Dc < 0.25)
- lambda = lambda + delta * (1 - Dc)/pnorm2;
- end
- end
- end
- return;
- % **********************************************************************************
- %MDS_STRESS - Calculate Sammon stress during optimization
- %
- % E = MDS_SAMSTRESS(Q,Y,YREP,Ds,D,NANINDEX)
- %
- % INPUT
- % Q Indicator of the Sammon stress; Q = -2,-1,0,1,2
- % Y Current lower-dimensional configuration
- % YREP Configuration of the representation objects; it should be
- % empty when no representation set is considered
- % Ds Original distance matrix
- % D Approximate distance matrix (optional; otherwise computed from Y)
- % NANINDEX Index of the missing values; marked in Ds by NaN (optional; to
- % be found in Ds)
- %
- % OUTPUT
- % E Sammon stress
- %
- % DESCRIPTION
- % Computes the Sammon stress between the original distance matrix Ds and the
- % approximated distance matrix D between the mapped configuration Y and the
- % configuration of the representation set YREP, expressed as follows:
- %
- % E = 1/(sum_{i<j} Ds_{ij}^(q+2)) sum_{i<j} (Ds_{ij} - D_{ij})^2 * Ds_{ij}^q
- %
- % It is directly used in the MDS_SAMMAP routine.
- %
- % Copyright: Elzbieta Pekalska, Robert P.W. Duin, ela@ph.tn.tudelft.nl, 2000-2003
- % Faculty of Applied Sciences, Delft University of Technology
- %
- function [e,alpha] = mds_samstress (q,y,yrep,Ds,D,nanindex,isratio)
- % If D is not given, calculate it from Y, the current mapped points.
- if (nargin < 5) | (isempty(D))
- if (~isempty(yrep))
- D = sqrt(distm(y,yrep));
- else
- D = sqrt(distm(y));
- end
- end
- if (nargin < 6)
- nanindex = []; % Not given, so calculate below.
- end
- if (nargin < 7)
- isratio = 0; % Assume this is meant.
- end
- todefine = isempty(yrep);
- [m,n] = size(y); [mm,k] = size(Ds);
- if (m ~= mm)
- error('The sizes of Y and Ds do not match.');
- end
- if (any(size(D) ~= size(Ds)))
- error ('The sizes of D and Ds do not match.');
- end
- m2 = m*k;
- % Convert to double.
- D = +D; Ds = +Ds;
- % I is the index of non-NaN, non-zero (> eps) values to be included
- % for the computation of the stress.
- I = 1:m2;
- if (~isempty(nanindex))
- I(nanindex) = [];
- end
- O = [];
- if (todefine), O = 1:m+1:m2; end
- I = setdiff(I,O);
- % If OPTIONS.isratio is set, calculate optimal ALPHA to scale with.
- if (isratio)
- alpha = sum((Ds(I).^q).*D(I).^2)/sum((Ds(I).^(q+1)).*D(I));
- Ds = alpha*Ds;
- else
- alpha = 1;
- end
-
- % C is the normalization factor.
- c = sum(Ds(I).^(q+2));
- % If Q == 0, prevent unnecessary calculation (X^0 == 1).
- if (q ~= 0)
- e = sum(Ds(I).^q .*((Ds(I)-D(I)).^2))/c;
- else
- e = sum(((Ds(I)-D(I)).^2))/c;
- end
- return;
- % **********************************************************************************
- %MDS_GRADIENTS - Gradients for variants of the Sammon stress
- %
- % [G1,G2,CC] = MDS_GRADIENTS(Q,Y,YREP,Ds,D,NANINDEX)
- %
- % INPUT
- % Q Indicator of the Sammon stress; Q = -2,-1,0,1,2
- % Y Current lower-dimensional configuration
- % YREP Configuration of the representation objects; it should be
- % empty when no representation set is considered
- % Ds Original distance matrix
- % D Approximate distance matrix (optional; otherwise computed from Y)
- % nanindex Index of missing values; marked in Ds by NaN (optional;
- % otherwise found from Ds)
- %
- % OUTPUT
- % G1 Gradient direction
- % G2 Approximation of the Hessian by its diagonal
- %
- % DESCRIPTION
- % This is a routine used directly in the MDS_SAMMAP routine.
- %
- % SEE ALSO
- % MDS, MDS_INIT, MDS_SAMMAP, MDS_SAMSTRESS
- %
- % Copyright: Elzbieta Pekalska, Robert P.W. Duin, ela@ph.tn.tudelft.nl, 2000-2003
- % Faculty of Applied Sciences, Delft University of Technology
- %
- function [g1,g2,c] = mds_gradients(q,y,yrep,Ds,D,nanindex)
- % If D is not given, calculate it from Y, the current mapped points.
- if (nargin < 5) | (isempty(D))
- if (~isempty(yrep))
- D = sqrt(distm(y,yrep));
- else
- D = sqrt(distm(y));
- end
- end
- % If NANINDEX is not given, find it from Ds.
- if (nargin < 6)
- nanindex = find(isnan(Ds(:))==1);
- end
- % YREP is empty if no representation set is defined yet.
- % This happens when the mapping should be defined.
- todefine = (isempty(yrep));
- if (todefine)
- yrep = y;
- end
- [m,n] = size(y); [mm,k] = size(Ds);
- if (m ~= mm)
- error('The sizes of Y and Ds do not match.');
- end
- if (any(size(D) ~= size(Ds)))
- error ('The sizes of D and Ds do not match.');
- end
- m2 = m*k;
- % Convert to doubles.
- D = +D; Ds = +Ds;
- % I is the index of non-NaN, non-zero (> eps) values to be included
- % for the computation of the gradient and the Hessians diagonal.
- I = (1:m2)';
- if (~isempty(nanindex))
- I(nanindex) = [];
- end
- K = find(Ds(I) <= eps | D(I) <= eps);
- I(K) = [];
- % C is a normalization factor.
- c = -2/sum(Ds(I).^(q+2));
- % Compute G1, the gradient.
- h1 = zeros(m2,1);
- if (q == 0) % Prevent unnecessary computation when Q = 0.
- h1(I) = (Ds(I)-D(I)) ./ D(I);
- else
- h1(I) = (Ds(I)-D(I)) ./ (D(I).*(Ds(I).^(-q)));
- end
- h1 = reshape (h1',m,k);
- g2 = h1 * ones(k,n); % Here G2 is assigned only temporarily,
- g1 = c * (g2.*y - h1*yrep); % for the computation of G1.
- % Compute G2, the diagonal of the Hessian, if requested.
- if (nargout > 1)
- h2 = zeros(m2,1);
- switch (q)
- case -2,
- h2(I) = -1./(Ds(I).*D(I).^3);
- case -1,
- h2(I) = -1./(D(I).^3);
- case 0,
- h2(I) = - Ds(I)./(D(I).^3);
- case 1,
- h2(I) = - Ds(I).^2./(D(I).^3);
- case 2,
- h2(I) = -(Ds(I)./D(I)).^3;
- end
- h2 = reshape (h2',m,k);
- g2 = c * (g2 + (h2*ones(k,n)).*y.^2 + h2*yrep.^2 - 2*(h2*yrep).*y);
- end
- return;
- % **********************************************************************************
- %MDS_PLOT Plots the results of the MDS mapping in a 2D or 3D figure
- %
- % MDS_PLOT(Y,YREP,LAB,REPLAB,E)
- %
- % INPUT
- % Y Configuration in 2D or 3D space
- % YREP Configuration of the representation points in 2D or 3D space
- % LAB Labels of Y
- % REPLAB Labels of YREP
- % E Stress value
- %
- % DESCRIPTION
- % Used directly in MDS_SAMMAP routine.
- function mds_plot (y,yrep,lab,replab,e)
- % This is done in a rather complicated way in order to speed up drawing.
- K = min(size(y,2),3);
- if (K > 1)
- y = +y;
- col = 'brmk';
- sym = ['+*xosdv^<>p']';
- ii = [1:44];
- s = [col(ii-floor((ii-1)/4)*4)' sym(ii-floor((ii-1)/11)*11)];
- [nlab,lablist] = renumlab([replab;lab]); c = max(nlab);
- [ms,ns] = size(s); if (ms == 1), s = setstr(ones(m,1)*s); end
- yy = [yrep; y];
- cla;
- if (K == 2)
- hold on;
- for i = 1:c
- J = find(nlab==i); plot(yy(J,1),yy(J,2),s(i,:));
- end
- else
- for i = 1:c
- J = find(nlab==i); plot3(yy(J,1),yy(J,2),yy(J,3),s(i,:));
- if (i == 1), hold on; grid on; end
- end
- end
- title(sprintf('STRESS: %f', e));
- axis equal;
- drawnow;
- end
- return;