pa_HartigansDipTest.m - This is a direct translation by F. …

/ThirdParty/Bimodality/pa_HartigansDipTest.m

http://p-and-a.googlecode.com/ · MATLAB · 304 lines · 210 code · 27 blank · 67 comment · 34 complexity · 0e42b490c68caab72b09fb89b51e6cfa MD5 · raw file

function	[dip,xl,xu, ifault, gcm, lcm, mn, mj] = pa_HartigansDipTest(xpdf)



% function	[dip,xl,xu, ifault, gcm, lcm, mn, mj]=HartigansDipTest(xpdf)

%

% This is a direct translation by F. Mechler (August 27 2002)

% into MATLAB from the original FORTRAN code of Hartigan's Subroutine DIPTST algorithm 

% Ref: Algorithm AS 217 APPL. STATIST. (1985) Vol. 34. No.3 pg 322-325

%

% Appended by F. Mechler (September 2 2002) to deal with a perfectly unimodal input

% This check the original Hartigan algorithm omitted, which leads to an infinite cycle

%

% HartigansDipTest, like DIPTST, does the dip calculation for an ordered vector XPDF using

% the greatest convex minorant (gcm) and the least concave majorant (lcm),

% skipping through the data using the change points of these distributions.

% It returns the 'DIP' statistic, and 7 more optional results, which include

% the modal interval (XL,XU), ann error flag IFAULT (>0 flags an error)

% as well as the minorant and majorant fits GCM, LCM, and the corresponding support indices MN, and MJ



% sort X in increasing order in column vector

x=sort(xpdf(:));

N=length(x);

mn=zeros(size(x));

mj=zeros(size(x));

lcm=zeros(size(x));

gcm=zeros(size(x));

ifault=0;



% Check that N is positive

if (N<=0) 

   ifault=1;

   fprintf(1,'\nHartigansDipTest.    InputError :  ifault=%d\n',ifault);

   return;

end;



% Check if N is one

if (N==1)

   xl=x(1);

   xu=x(N);

   dip=0.0;

   ifault=2;

   fprintf(1,'\nHartigansDipTest.    InputError :  ifault=%d\n',ifault);

   return;

end;



if (N>1)

   % Check that X is sorted

   if (x ~= sort(x))

      ifault=3;

      fprintf(1,'\nHartigansDipTest.    InputError :  ifault=%d\n',ifault);

      return;

   end;

   % Check for all values of X identical OR for case 1<N<4

   if ~((x(N)>x(1)) & (4<=N))

      xl=x(1);

      xu=x(N);

      dip=0.0;

      ifault=4;

      fprintf(1,'\nHartigansDipTest.    InputError :  ifault=%d\n',ifault);

      return;

   end;

end;



% Check if X is perfectly unimodal

% Hartigan's original DIPTST algorithm did not check for this condition

% and DIPTST runs into infinite cycle for a unimodal input

% The condition that the input is unimodal is equivalent to having 

% at most 1 sign change in the second derivative of the input p.d.f.

xsign=-sign(diff(diff(x)));

% This condition check below works even 

% if the unimodal p.d.f. has its mode in the very first or last point of the input 

% because then the boolean argument is Empty Matrix, and ANY returns 1 for an Empty Matrix

posi=find(xsign>0);

negi=find(xsign<0);

if isempty(posi) | isempty(negi) | all(posi<min(negi))

   % A unimodal function is its own best unimodal approximation, with a zero corresponding dip

   xl=x(1);

   xu=x(N);

   dip=0.0;

   ifault=5;

	%fprintf(1,'\n  The input is a perfectly UNIMODAL input function\n');

   return;

end;



% LOW  contains the index of the current estimate of the lower end of the modal interval

% HIGH contains the index of the current estimate of the upper end of the modal interval

fn=N;

low=1;

high=N;

dip=1/fn;

xl=x(low);

xu=x(high);



% establish the indices over which combination is necessary for the convex minorant fit

mn(1)=1;

for j=2:N

   mn(j)=j-1;

   % here is the beginning of a while loop

   mnj=mn(j);

   mnmnj=mn(mnj);

   a=mnj-mnmnj;

   b=j-mnj;

   while ~( (mnj==1) | ((x(j)-x(mnj))*a < (x(mnj)-x(mnmnj))*b))

      mn(j)=mnmnj;

      mnj=mn(j);

      mnmnj=mn(mnj);

      a=mnj-mnmnj;

      b=j-mnj;

   end;   % here is the end of the while loop

end; % end  for j=2:N



% establish the indices over which combination is necessary for the concave majorant fit

mj(N)=N;

na=N-1;

for jk=1:na

   k=N-jk;

   mj(k)=k+1;

   % here is the beginning of a while loop

   mjk=mj(k);

   mjmjk=mj(mjk);

   a=mjk-mjmjk;

   b=k-mjk;

   while ~( (mjk==N) | ((x(k)-x(mjk))*a < (x(mjk)-x(mjmjk))*b))

      mj(k)=mjmjk;

      mjk=mj(k);

      mjmjk=mj(mjk);

      a=mjk-mjmjk;

      b=k-mjk;

   end;   % here is the end of the while loop

end; % end  for jk=1:na



itarate_flag = 1;



% start the cycling of great RECYCLE

while itarate_flag 



% collect the change points for the GCM from HIGH to LOW

% CODE BREAK POINT 40

ic=1;

gcm(1)=high;

igcm1=gcm(ic);

ic=ic+1;

gcm(ic)=mn(igcm1);

while(gcm(ic) > low)

   igcm1=gcm(ic);

   ic=ic+1;

   gcm(ic)=mn(igcm1);

end;

icx=ic;



% collect the change points for the LCM from LOW to HIGH

ic=1;

lcm(1)=low;

lcm1=lcm(ic);

ic=ic+1;

lcm(ic)=mj(lcm1);

while(lcm(ic) < high)

   lcm1=lcm(ic);

   ic=ic+1;

   lcm(ic)=mj(lcm1);

end;

icv=ic;



% ICX, IX, IG are counters for the convex minorant

% ICV, IV, IH are counters for the concave majorant

ig=icx;

ih=icv;



% find the largest distance greater than 'DIP' between the GCM and the LCM from low to high

ix=icx-1;

iv=2;

d=0.0;



% Either GOTO CODE BREAK POINT 65 OR ELSE GOTO CODE BREAK POINT 50;

if ~(icx~=2 | icv~=2)

   d=1.0/fn;

else

   iterate_BP50=1;

   while iterate_BP50

		% CODE BREAK POINT 50

		igcmx=gcm(ix);

      lcmiv=lcm(iv);

      if ~(igcmx > lcmiv)

         % if the next point of either the GCM or LCM is from the LCM then calculate distance here

         % OTHERWISE, GOTO BREAK POINT 55

         lcmiv1=lcm(iv-1);

         a=lcmiv-lcmiv1;

         b=igcmx-lcmiv1-1;

         dx=(x(igcmx)-x(lcmiv1))*a/(fn*(x(lcmiv)-x(lcmiv1)))-b/fn;

         ix=ix-1;

         if(dx < d) 

            goto60 = 1; 

         else

            d=dx;

            ig=ix+1;

            ih=iv;

            goto60 = 1;

         end;

      else

         % if the next point of either the GCM or LCM is from the GCM then calculate distance here

         % CODE BREAK POINT 55

         lcmiv=lcm(iv);

         igcm=gcm(ix);

         igcm1=gcm(ix+1);

         a=lcmiv-igcm1+1;

         b=igcm-igcm1;

         dx=a/fn-((x(lcmiv)-x(igcm1))*b)/(fn*(x(igcm)-x(igcm1)));

         iv=iv+1;

         if ~(dx < d) 

            d=dx;

            ig=ix+1;

            ih=iv-1;

         end;

         goto60 = 1;

      end;

      

      if goto60

         % CODE BREAK POINT 60

         if (ix < 1) ix=1; end;

         if (iv > icv) iv=icv; end;

         iterate_BP50 = (gcm(ix) ~= lcm(iv)); 

      end;

   end; % End of WHILE iterate_BP50

end; % End of ELSE (IF ~(icx~=2 | icv~=2)) i.e., either GOTO CODE BREAK POINT 65 OR ELSE GOTO CODE BREAK POINT 50



% CODE BREAK POINT 65

itarate_flag = ~(d < dip);

if itarate_flag

% if itarate_flag is true, then continue calculations and the great iteration cycle

% if itarate_flag is NOT true, then stop calculations here, and break out of great iteration cycle to BREAK POINT 100

   

% calculate the DIPs for the corrent LOW and HIGH



% the DIP for the convex minorant

dl=0.0;

% if not true, go to CODE BREAK POINT 80

if (ig ~= icx)

   icxa=icx-1;

   for j=ig:icxa

      temp=1.0/fn;

   	jb=gcm(j+1);

      je=gcm(j);

      % if not true either, go to CODE BREAK POINT 74

      if ~(je-jb <= 1)

         if~(x(je)==x(jb))

            a=(je-jb);

            const=a/(fn*(x(je)-x(jb)));

            for jr=jb:je

               b=jr-jb+1;

               t=b/fn-(x(jr)-x(jb))*const;

               if (t>temp) temp=t; end;

            end;

         end;

      end;

      % CODE BREAK POINT 74

      if (dl < temp) dl=temp; end;

   end;

end;



% the DIP for the concave majorant

% CODE BREAK POINT 80

du=0.0;

% if not true, go to CODE BREAK POINT 90

if ~(ih==icv)

   icva=icv-1;

   for k=ih:icva

      temp=1.0/fn;

      kb=lcm(k);

      ke=lcm(k+1);

      % if not true either, go to CODE BREAK POINT 86

      if ~(ke-kb <= 1)

         if ~(x(ke)==x(kb))

            a=ke-kb;

            const=a/(fn*(x(ke)-x(kb)));

            for kr=kb:ke

               b=kr-kb-1;

               t=(x(kr)-x(kb))*const-b/fn;

               if (t>temp) temp=t; end;

            end;

         end;

      end;

      % CODE BREAK POINT 86

      if (du < temp) du=temp; end;

   end;

end;



% determine the current maximum

% CODE BREAK POINT 90

dipnew=dl;

if (du > dl) dipnew=du; end;

if (dip < dipnew) dip=dipnew; end;

low=gcm(ig);

high=lcm(ih);      



end; % end of IF(itarate_flag) CODE from BREAK POINT 65



% return to CODE BREAK POINT 40 or break out of great RECYCLE;

end; % end of WHILE of great RECYCLE



% CODE BREAK POINT 100

dip=0.5*dip;

xl=x(low);

xu=x(high);