PageRenderTime 52ms CodeModel.GetById 22ms RepoModel.GetById 0ms app.codeStats 0ms

/ThirdParty/Bimodality/pa_HartigansDipTest.m

http://p-and-a.googlecode.com/
MATLAB | 304 lines | 210 code | 27 blank | 67 comment | 34 complexity | 0e42b490c68caab72b09fb89b51e6cfa MD5 | raw file
Possible License(s): BSD-2-Clause
  1. function [dip,xl,xu, ifault, gcm, lcm, mn, mj] = pa_HartigansDipTest(xpdf)
  2. % function [dip,xl,xu, ifault, gcm, lcm, mn, mj]=HartigansDipTest(xpdf)
  3. %
  4. % This is a direct translation by F. Mechler (August 27 2002)
  5. % into MATLAB from the original FORTRAN code of Hartigan's Subroutine DIPTST algorithm
  6. % Ref: Algorithm AS 217 APPL. STATIST. (1985) Vol. 34. No.3 pg 322-325
  7. %
  8. % Appended by F. Mechler (September 2 2002) to deal with a perfectly unimodal input
  9. % This check the original Hartigan algorithm omitted, which leads to an infinite cycle
  10. %
  11. % HartigansDipTest, like DIPTST, does the dip calculation for an ordered vector XPDF using
  12. % the greatest convex minorant (gcm) and the least concave majorant (lcm),
  13. % skipping through the data using the change points of these distributions.
  14. % It returns the 'DIP' statistic, and 7 more optional results, which include
  15. % the modal interval (XL,XU), ann error flag IFAULT (>0 flags an error)
  16. % as well as the minorant and majorant fits GCM, LCM, and the corresponding support indices MN, and MJ
  17. % sort X in increasing order in column vector
  18. x=sort(xpdf(:));
  19. N=length(x);
  20. mn=zeros(size(x));
  21. mj=zeros(size(x));
  22. lcm=zeros(size(x));
  23. gcm=zeros(size(x));
  24. ifault=0;
  25. % Check that N is positive
  26. if (N<=0)
  27. ifault=1;
  28. fprintf(1,'\nHartigansDipTest. InputError : ifault=%d\n',ifault);
  29. return;
  30. end;
  31. % Check if N is one
  32. if (N==1)
  33. xl=x(1);
  34. xu=x(N);
  35. dip=0.0;
  36. ifault=2;
  37. fprintf(1,'\nHartigansDipTest. InputError : ifault=%d\n',ifault);
  38. return;
  39. end;
  40. if (N>1)
  41. % Check that X is sorted
  42. if (x ~= sort(x))
  43. ifault=3;
  44. fprintf(1,'\nHartigansDipTest. InputError : ifault=%d\n',ifault);
  45. return;
  46. end;
  47. % Check for all values of X identical OR for case 1<N<4
  48. if ~((x(N)>x(1)) & (4<=N))
  49. xl=x(1);
  50. xu=x(N);
  51. dip=0.0;
  52. ifault=4;
  53. fprintf(1,'\nHartigansDipTest. InputError : ifault=%d\n',ifault);
  54. return;
  55. end;
  56. end;
  57. % Check if X is perfectly unimodal
  58. % Hartigan's original DIPTST algorithm did not check for this condition
  59. % and DIPTST runs into infinite cycle for a unimodal input
  60. % The condition that the input is unimodal is equivalent to having
  61. % at most 1 sign change in the second derivative of the input p.d.f.
  62. xsign=-sign(diff(diff(x)));
  63. % This condition check below works even
  64. % if the unimodal p.d.f. has its mode in the very first or last point of the input
  65. % because then the boolean argument is Empty Matrix, and ANY returns 1 for an Empty Matrix
  66. posi=find(xsign>0);
  67. negi=find(xsign<0);
  68. if isempty(posi) | isempty(negi) | all(posi<min(negi))
  69. % A unimodal function is its own best unimodal approximation, with a zero corresponding dip
  70. xl=x(1);
  71. xu=x(N);
  72. dip=0.0;
  73. ifault=5;
  74. %fprintf(1,'\n The input is a perfectly UNIMODAL input function\n');
  75. return;
  76. end;
  77. % LOW contains the index of the current estimate of the lower end of the modal interval
  78. % HIGH contains the index of the current estimate of the upper end of the modal interval
  79. fn=N;
  80. low=1;
  81. high=N;
  82. dip=1/fn;
  83. xl=x(low);
  84. xu=x(high);
  85. % establish the indices over which combination is necessary for the convex minorant fit
  86. mn(1)=1;
  87. for j=2:N
  88. mn(j)=j-1;
  89. % here is the beginning of a while loop
  90. mnj=mn(j);
  91. mnmnj=mn(mnj);
  92. a=mnj-mnmnj;
  93. b=j-mnj;
  94. while ~( (mnj==1) | ((x(j)-x(mnj))*a < (x(mnj)-x(mnmnj))*b))
  95. mn(j)=mnmnj;
  96. mnj=mn(j);
  97. mnmnj=mn(mnj);
  98. a=mnj-mnmnj;
  99. b=j-mnj;
  100. end; % here is the end of the while loop
  101. end; % end for j=2:N
  102. % establish the indices over which combination is necessary for the concave majorant fit
  103. mj(N)=N;
  104. na=N-1;
  105. for jk=1:na
  106. k=N-jk;
  107. mj(k)=k+1;
  108. % here is the beginning of a while loop
  109. mjk=mj(k);
  110. mjmjk=mj(mjk);
  111. a=mjk-mjmjk;
  112. b=k-mjk;
  113. while ~( (mjk==N) | ((x(k)-x(mjk))*a < (x(mjk)-x(mjmjk))*b))
  114. mj(k)=mjmjk;
  115. mjk=mj(k);
  116. mjmjk=mj(mjk);
  117. a=mjk-mjmjk;
  118. b=k-mjk;
  119. end; % here is the end of the while loop
  120. end; % end for jk=1:na
  121. itarate_flag = 1;
  122. % start the cycling of great RECYCLE
  123. while itarate_flag
  124. % collect the change points for the GCM from HIGH to LOW
  125. % CODE BREAK POINT 40
  126. ic=1;
  127. gcm(1)=high;
  128. igcm1=gcm(ic);
  129. ic=ic+1;
  130. gcm(ic)=mn(igcm1);
  131. while(gcm(ic) > low)
  132. igcm1=gcm(ic);
  133. ic=ic+1;
  134. gcm(ic)=mn(igcm1);
  135. end;
  136. icx=ic;
  137. % collect the change points for the LCM from LOW to HIGH
  138. ic=1;
  139. lcm(1)=low;
  140. lcm1=lcm(ic);
  141. ic=ic+1;
  142. lcm(ic)=mj(lcm1);
  143. while(lcm(ic) < high)
  144. lcm1=lcm(ic);
  145. ic=ic+1;
  146. lcm(ic)=mj(lcm1);
  147. end;
  148. icv=ic;
  149. % ICX, IX, IG are counters for the convex minorant
  150. % ICV, IV, IH are counters for the concave majorant
  151. ig=icx;
  152. ih=icv;
  153. % find the largest distance greater than 'DIP' between the GCM and the LCM from low to high
  154. ix=icx-1;
  155. iv=2;
  156. d=0.0;
  157. % Either GOTO CODE BREAK POINT 65 OR ELSE GOTO CODE BREAK POINT 50;
  158. if ~(icx~=2 | icv~=2)
  159. d=1.0/fn;
  160. else
  161. iterate_BP50=1;
  162. while iterate_BP50
  163. % CODE BREAK POINT 50
  164. igcmx=gcm(ix);
  165. lcmiv=lcm(iv);
  166. if ~(igcmx > lcmiv)
  167. % if the next point of either the GCM or LCM is from the LCM then calculate distance here
  168. % OTHERWISE, GOTO BREAK POINT 55
  169. lcmiv1=lcm(iv-1);
  170. a=lcmiv-lcmiv1;
  171. b=igcmx-lcmiv1-1;
  172. dx=(x(igcmx)-x(lcmiv1))*a/(fn*(x(lcmiv)-x(lcmiv1)))-b/fn;
  173. ix=ix-1;
  174. if(dx < d)
  175. goto60 = 1;
  176. else
  177. d=dx;
  178. ig=ix+1;
  179. ih=iv;
  180. goto60 = 1;
  181. end;
  182. else
  183. % if the next point of either the GCM or LCM is from the GCM then calculate distance here
  184. % CODE BREAK POINT 55
  185. lcmiv=lcm(iv);
  186. igcm=gcm(ix);
  187. igcm1=gcm(ix+1);
  188. a=lcmiv-igcm1+1;
  189. b=igcm-igcm1;
  190. dx=a/fn-((x(lcmiv)-x(igcm1))*b)/(fn*(x(igcm)-x(igcm1)));
  191. iv=iv+1;
  192. if ~(dx < d)
  193. d=dx;
  194. ig=ix+1;
  195. ih=iv-1;
  196. end;
  197. goto60 = 1;
  198. end;
  199. if goto60
  200. % CODE BREAK POINT 60
  201. if (ix < 1) ix=1; end;
  202. if (iv > icv) iv=icv; end;
  203. iterate_BP50 = (gcm(ix) ~= lcm(iv));
  204. end;
  205. end; % End of WHILE iterate_BP50
  206. end; % End of ELSE (IF ~(icx~=2 | icv~=2)) i.e., either GOTO CODE BREAK POINT 65 OR ELSE GOTO CODE BREAK POINT 50
  207. % CODE BREAK POINT 65
  208. itarate_flag = ~(d < dip);
  209. if itarate_flag
  210. % if itarate_flag is true, then continue calculations and the great iteration cycle
  211. % if itarate_flag is NOT true, then stop calculations here, and break out of great iteration cycle to BREAK POINT 100
  212. % calculate the DIPs for the corrent LOW and HIGH
  213. % the DIP for the convex minorant
  214. dl=0.0;
  215. % if not true, go to CODE BREAK POINT 80
  216. if (ig ~= icx)
  217. icxa=icx-1;
  218. for j=ig:icxa
  219. temp=1.0/fn;
  220. jb=gcm(j+1);
  221. je=gcm(j);
  222. % if not true either, go to CODE BREAK POINT 74
  223. if ~(je-jb <= 1)
  224. if~(x(je)==x(jb))
  225. a=(je-jb);
  226. const=a/(fn*(x(je)-x(jb)));
  227. for jr=jb:je
  228. b=jr-jb+1;
  229. t=b/fn-(x(jr)-x(jb))*const;
  230. if (t>temp) temp=t; end;
  231. end;
  232. end;
  233. end;
  234. % CODE BREAK POINT 74
  235. if (dl < temp) dl=temp; end;
  236. end;
  237. end;
  238. % the DIP for the concave majorant
  239. % CODE BREAK POINT 80
  240. du=0.0;
  241. % if not true, go to CODE BREAK POINT 90
  242. if ~(ih==icv)
  243. icva=icv-1;
  244. for k=ih:icva
  245. temp=1.0/fn;
  246. kb=lcm(k);
  247. ke=lcm(k+1);
  248. % if not true either, go to CODE BREAK POINT 86
  249. if ~(ke-kb <= 1)
  250. if ~(x(ke)==x(kb))
  251. a=ke-kb;
  252. const=a/(fn*(x(ke)-x(kb)));
  253. for kr=kb:ke
  254. b=kr-kb-1;
  255. t=(x(kr)-x(kb))*const-b/fn;
  256. if (t>temp) temp=t; end;
  257. end;
  258. end;
  259. end;
  260. % CODE BREAK POINT 86
  261. if (du < temp) du=temp; end;
  262. end;
  263. end;
  264. % determine the current maximum
  265. % CODE BREAK POINT 90
  266. dipnew=dl;
  267. if (du > dl) dipnew=du; end;
  268. if (dip < dipnew) dip=dipnew; end;
  269. low=gcm(ig);
  270. high=lcm(ih);
  271. end; % end of IF(itarate_flag) CODE from BREAK POINT 65
  272. % return to CODE BREAK POINT 40 or break out of great RECYCLE;
  273. end; % end of WHILE of great RECYCLE
  274. % CODE BREAK POINT 100
  275. dip=0.5*dip;
  276. xl=x(low);
  277. xu=x(high);