/ThirdParty/Bimodality/pa_HartigansDipTest.m
MATLAB | 304 lines | 210 code | 27 blank | 67 comment | 34 complexity | 0e42b490c68caab72b09fb89b51e6cfa MD5 | raw file
Possible License(s): BSD-2-Clause
- function [dip,xl,xu, ifault, gcm, lcm, mn, mj] = pa_HartigansDipTest(xpdf)
-
- % function [dip,xl,xu, ifault, gcm, lcm, mn, mj]=HartigansDipTest(xpdf)
- %
- % This is a direct translation by F. Mechler (August 27 2002)
- % into MATLAB from the original FORTRAN code of Hartigan's Subroutine DIPTST algorithm
- % Ref: Algorithm AS 217 APPL. STATIST. (1985) Vol. 34. No.3 pg 322-325
- %
- % Appended by F. Mechler (September 2 2002) to deal with a perfectly unimodal input
- % This check the original Hartigan algorithm omitted, which leads to an infinite cycle
- %
- % HartigansDipTest, like DIPTST, does the dip calculation for an ordered vector XPDF using
- % the greatest convex minorant (gcm) and the least concave majorant (lcm),
- % skipping through the data using the change points of these distributions.
- % It returns the 'DIP' statistic, and 7 more optional results, which include
- % the modal interval (XL,XU), ann error flag IFAULT (>0 flags an error)
- % as well as the minorant and majorant fits GCM, LCM, and the corresponding support indices MN, and MJ
-
- % sort X in increasing order in column vector
- x=sort(xpdf(:));
- N=length(x);
- mn=zeros(size(x));
- mj=zeros(size(x));
- lcm=zeros(size(x));
- gcm=zeros(size(x));
- ifault=0;
-
- % Check that N is positive
- if (N<=0)
- ifault=1;
- fprintf(1,'\nHartigansDipTest. InputError : ifault=%d\n',ifault);
- return;
- end;
-
- % Check if N is one
- if (N==1)
- xl=x(1);
- xu=x(N);
- dip=0.0;
- ifault=2;
- fprintf(1,'\nHartigansDipTest. InputError : ifault=%d\n',ifault);
- return;
- end;
-
- if (N>1)
- % Check that X is sorted
- if (x ~= sort(x))
- ifault=3;
- fprintf(1,'\nHartigansDipTest. InputError : ifault=%d\n',ifault);
- return;
- end;
- % Check for all values of X identical OR for case 1<N<4
- if ~((x(N)>x(1)) & (4<=N))
- xl=x(1);
- xu=x(N);
- dip=0.0;
- ifault=4;
- fprintf(1,'\nHartigansDipTest. InputError : ifault=%d\n',ifault);
- return;
- end;
- end;
-
- % Check if X is perfectly unimodal
- % Hartigan's original DIPTST algorithm did not check for this condition
- % and DIPTST runs into infinite cycle for a unimodal input
- % The condition that the input is unimodal is equivalent to having
- % at most 1 sign change in the second derivative of the input p.d.f.
- xsign=-sign(diff(diff(x)));
- % This condition check below works even
- % if the unimodal p.d.f. has its mode in the very first or last point of the input
- % because then the boolean argument is Empty Matrix, and ANY returns 1 for an Empty Matrix
- posi=find(xsign>0);
- negi=find(xsign<0);
- if isempty(posi) | isempty(negi) | all(posi<min(negi))
- % A unimodal function is its own best unimodal approximation, with a zero corresponding dip
- xl=x(1);
- xu=x(N);
- dip=0.0;
- ifault=5;
- %fprintf(1,'\n The input is a perfectly UNIMODAL input function\n');
- return;
- end;
-
- % LOW contains the index of the current estimate of the lower end of the modal interval
- % HIGH contains the index of the current estimate of the upper end of the modal interval
- fn=N;
- low=1;
- high=N;
- dip=1/fn;
- xl=x(low);
- xu=x(high);
-
- % establish the indices over which combination is necessary for the convex minorant fit
- mn(1)=1;
- for j=2:N
- mn(j)=j-1;
- % here is the beginning of a while loop
- mnj=mn(j);
- mnmnj=mn(mnj);
- a=mnj-mnmnj;
- b=j-mnj;
- while ~( (mnj==1) | ((x(j)-x(mnj))*a < (x(mnj)-x(mnmnj))*b))
- mn(j)=mnmnj;
- mnj=mn(j);
- mnmnj=mn(mnj);
- a=mnj-mnmnj;
- b=j-mnj;
- end; % here is the end of the while loop
- end; % end for j=2:N
-
- % establish the indices over which combination is necessary for the concave majorant fit
- mj(N)=N;
- na=N-1;
- for jk=1:na
- k=N-jk;
- mj(k)=k+1;
- % here is the beginning of a while loop
- mjk=mj(k);
- mjmjk=mj(mjk);
- a=mjk-mjmjk;
- b=k-mjk;
- while ~( (mjk==N) | ((x(k)-x(mjk))*a < (x(mjk)-x(mjmjk))*b))
- mj(k)=mjmjk;
- mjk=mj(k);
- mjmjk=mj(mjk);
- a=mjk-mjmjk;
- b=k-mjk;
- end; % here is the end of the while loop
- end; % end for jk=1:na
-
- itarate_flag = 1;
-
- % start the cycling of great RECYCLE
- while itarate_flag
-
- % collect the change points for the GCM from HIGH to LOW
- % CODE BREAK POINT 40
- ic=1;
- gcm(1)=high;
- igcm1=gcm(ic);
- ic=ic+1;
- gcm(ic)=mn(igcm1);
- while(gcm(ic) > low)
- igcm1=gcm(ic);
- ic=ic+1;
- gcm(ic)=mn(igcm1);
- end;
- icx=ic;
-
- % collect the change points for the LCM from LOW to HIGH
- ic=1;
- lcm(1)=low;
- lcm1=lcm(ic);
- ic=ic+1;
- lcm(ic)=mj(lcm1);
- while(lcm(ic) < high)
- lcm1=lcm(ic);
- ic=ic+1;
- lcm(ic)=mj(lcm1);
- end;
- icv=ic;
-
- % ICX, IX, IG are counters for the convex minorant
- % ICV, IV, IH are counters for the concave majorant
- ig=icx;
- ih=icv;
-
- % find the largest distance greater than 'DIP' between the GCM and the LCM from low to high
- ix=icx-1;
- iv=2;
- d=0.0;
-
- % Either GOTO CODE BREAK POINT 65 OR ELSE GOTO CODE BREAK POINT 50;
- if ~(icx~=2 | icv~=2)
- d=1.0/fn;
- else
- iterate_BP50=1;
- while iterate_BP50
- % CODE BREAK POINT 50
- igcmx=gcm(ix);
- lcmiv=lcm(iv);
- if ~(igcmx > lcmiv)
- % if the next point of either the GCM or LCM is from the LCM then calculate distance here
- % OTHERWISE, GOTO BREAK POINT 55
- lcmiv1=lcm(iv-1);
- a=lcmiv-lcmiv1;
- b=igcmx-lcmiv1-1;
- dx=(x(igcmx)-x(lcmiv1))*a/(fn*(x(lcmiv)-x(lcmiv1)))-b/fn;
- ix=ix-1;
- if(dx < d)
- goto60 = 1;
- else
- d=dx;
- ig=ix+1;
- ih=iv;
- goto60 = 1;
- end;
- else
- % if the next point of either the GCM or LCM is from the GCM then calculate distance here
- % CODE BREAK POINT 55
- lcmiv=lcm(iv);
- igcm=gcm(ix);
- igcm1=gcm(ix+1);
- a=lcmiv-igcm1+1;
- b=igcm-igcm1;
- dx=a/fn-((x(lcmiv)-x(igcm1))*b)/(fn*(x(igcm)-x(igcm1)));
- iv=iv+1;
- if ~(dx < d)
- d=dx;
- ig=ix+1;
- ih=iv-1;
- end;
- goto60 = 1;
- end;
-
- if goto60
- % CODE BREAK POINT 60
- if (ix < 1) ix=1; end;
- if (iv > icv) iv=icv; end;
- iterate_BP50 = (gcm(ix) ~= lcm(iv));
- end;
- end; % End of WHILE iterate_BP50
- end; % End of ELSE (IF ~(icx~=2 | icv~=2)) i.e., either GOTO CODE BREAK POINT 65 OR ELSE GOTO CODE BREAK POINT 50
-
- % CODE BREAK POINT 65
- itarate_flag = ~(d < dip);
- if itarate_flag
- % if itarate_flag is true, then continue calculations and the great iteration cycle
- % if itarate_flag is NOT true, then stop calculations here, and break out of great iteration cycle to BREAK POINT 100
-
- % calculate the DIPs for the corrent LOW and HIGH
-
- % the DIP for the convex minorant
- dl=0.0;
- % if not true, go to CODE BREAK POINT 80
- if (ig ~= icx)
- icxa=icx-1;
- for j=ig:icxa
- temp=1.0/fn;
- jb=gcm(j+1);
- je=gcm(j);
- % if not true either, go to CODE BREAK POINT 74
- if ~(je-jb <= 1)
- if~(x(je)==x(jb))
- a=(je-jb);
- const=a/(fn*(x(je)-x(jb)));
- for jr=jb:je
- b=jr-jb+1;
- t=b/fn-(x(jr)-x(jb))*const;
- if (t>temp) temp=t; end;
- end;
- end;
- end;
- % CODE BREAK POINT 74
- if (dl < temp) dl=temp; end;
- end;
- end;
-
- % the DIP for the concave majorant
- % CODE BREAK POINT 80
- du=0.0;
- % if not true, go to CODE BREAK POINT 90
- if ~(ih==icv)
- icva=icv-1;
- for k=ih:icva
- temp=1.0/fn;
- kb=lcm(k);
- ke=lcm(k+1);
- % if not true either, go to CODE BREAK POINT 86
- if ~(ke-kb <= 1)
- if ~(x(ke)==x(kb))
- a=ke-kb;
- const=a/(fn*(x(ke)-x(kb)));
- for kr=kb:ke
- b=kr-kb-1;
- t=(x(kr)-x(kb))*const-b/fn;
- if (t>temp) temp=t; end;
- end;
- end;
- end;
- % CODE BREAK POINT 86
- if (du < temp) du=temp; end;
- end;
- end;
-
- % determine the current maximum
- % CODE BREAK POINT 90
- dipnew=dl;
- if (du > dl) dipnew=du; end;
- if (dip < dipnew) dip=dipnew; end;
- low=gcm(ig);
- high=lcm(ih);
-
- end; % end of IF(itarate_flag) CODE from BREAK POINT 65
-
- % return to CODE BREAK POINT 40 or break out of great RECYCLE;
- end; % end of WHILE of great RECYCLE
-
- % CODE BREAK POINT 100
- dip=0.5*dip;
- xl=x(low);
- xu=x(high);
-
-