PageRenderTime 58ms CodeModel.GetById 15ms RepoModel.GetById 1ms app.codeStats 0ms

/tags/cvs_final/octave-forge/main/econometrics/inst/kernel_density.m

#
MATLAB | 116 lines | 101 code | 15 blank | 0 comment | 16 complexity | bc74b051c25a2c4903e9ecccd6d92d9d MD5 | raw file
Possible License(s): GPL-2.0, BSD-3-Clause, LGPL-2.1, GPL-3.0, LGPL-3.0
  1. # Copyright (C) 2006 Michael Creel <michael.creel@uab.es>
  2. #
  3. # This program is free software; you can redistribute it and/or modify
  4. # it under the terms of the GNU General Public License as published by
  5. # the Free Software Foundation; either version 2 of the License, or
  6. # (at your option) any later version.
  7. #
  8. # This program is distributed in the hope that it will be useful,
  9. # but WITHOUT ANY WARRANTY; without even the implied warranty of
  10. # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  11. # GNU General Public License for more details.
  12. #
  13. # You should have received a copy of the GNU General Public License
  14. # along with this program; if not, write to the Free Software
  15. # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  16. # kernel_density: multivariate kernel density estimator
  17. #
  18. # usage:
  19. # dens = kernel_density(eval_points, data, bandwidth)
  20. #
  21. # inputs:
  22. # eval_points: PxK matrix of points at which to calculate the density
  23. # data: NxK matrix of data points
  24. # bandwidth: positive scalar, the smoothing parameter. The fit
  25. # is more smooth as the bandwidth increases.
  26. # do_cv: bool (optional). default false. If true, calculate leave-1-out
  27. # density for cross validation
  28. # nslaves: int (optional, default 0). Number of compute nodes for parallel evaluation
  29. # debug: bool (optional, default false). show results on compute nodes if doing
  30. # parallel run
  31. # bandwith_matrix (optional): nonsingular KxK matrix. Rotates data.
  32. # Default is Choleski decomposition of inverse of covariance,
  33. # to approximate independence after the transformation, which
  34. # makes a product kernel a reasonable choice.
  35. # kernel (optional): string. Name of the kernel function. Default is radial
  36. # symmetric Epanechnikov kernel.
  37. # outputs:
  38. # dens: Px1 vector: the fitted density value at each of the P evaluation points.
  39. #
  40. # References:
  41. # Wand, M.P. and Jones, M.C. (1995), 'Kernel smoothing'.
  42. # http://www.xplore-stat.de/ebooks/scripts/spm/html/spmhtmlframe73.html
  43. function z = kernel_density(eval_points, data, bandwidth, do_cv, nslaves, debug, bandwith_matrix, kernel)
  44. if nargin < 3; error("kernel_density: at least 3 arguments are required"); endif
  45. # set defaults for optional args
  46. # default ordinary density, not leave-1-out
  47. if (nargin < 4) do_cv = false; endif
  48. # default serial
  49. if (nargin < 5) nslaves = 0; endif
  50. # debug or not (default)
  51. if (nargin < 6) debug = false; endif;
  52. # default bandwidth matrix (up to factor of proportionality)
  53. if (nargin < 7) bandwidth_matrix = chol(cov(data)); endif # default bandwidth matrix
  54. # default kernel
  55. if (nargin < 8) kernel = "__kernel_epanechnikov"; endif # default kernel
  56. nn = rows(eval_points);
  57. n = rows(data);
  58. # Inverse bandwidth matrix H_inv
  59. H = bandwidth_matrix*bandwidth;
  60. H_inv = inv(H);
  61. # weight by inverse bandwidth matrix
  62. eval_points = eval_points*H_inv;
  63. data = data*H_inv;
  64. # check if doing this parallel or serial
  65. global PARALLEL NSLAVES NEWORLD NSLAVES TAG
  66. PARALLEL = 0;
  67. if nslaves > 0
  68. PARALLEL = 1;
  69. NSLAVES = nslaves;
  70. LAM_Init(nslaves, debug);
  71. endif
  72. if !PARALLEL # ordinary serial version
  73. points_per_node = nn; # do the all on this node
  74. z = kernel_density_nodes(eval_points, data, do_cv, kernel, points_per_node, nslaves, debug);
  75. else # parallel version
  76. z = zeros(nn,1);
  77. points_per_node = floor(nn/(NSLAVES + 1)); # number of obsns per slave
  78. # The command that the slave nodes will execute
  79. cmd=['z_on_node = kernel_density_nodes(eval_points, data, do_cv, kernel, points_per_node, nslaves, debug); ',...
  80. 'MPI_Send(z_on_node, 0, TAG, NEWORLD);'];
  81. # send items to slaves
  82. NumCmds_Send({"eval_points", "data", "do_cv", "kernel", "points_per_node", "nslaves", "debug","cmd"}, {eval_points, data, do_cv, kernel, points_per_node, nslaves, debug, cmd});
  83. # evaluate last block on master while slaves are busy
  84. z_on_node = kernel_density_nodes(eval_points, data, do_cv, kernel, points_per_node, nslaves, debug);
  85. startblock = NSLAVES*points_per_node + 1;
  86. endblock = nn;
  87. z(startblock:endblock,:) = z(startblock:endblock,:) + z_on_node;
  88. # collect slaves' results
  89. z_on_node = zeros(points_per_node,1); # size may differ between master and compute nodes - reset here
  90. for i = 1:NSLAVES
  91. MPI_Recv(z_on_node,i,TAG,NEWORLD);
  92. startblock = i*points_per_node - points_per_node + 1;
  93. endblock = i*points_per_node;
  94. z(startblock:endblock,:) = z(startblock:endblock,:) + z_on_node;
  95. endfor
  96. # clean up after parallel
  97. LAM_Finalize;
  98. endif
  99. z = z*det(H_inv);
  100. endfunction