/src/C/hierarchical.c

http://github.com/beechung/Latent-Factor-Models · C · 149 lines · 103 code · 12 blank · 34 comment · 16 complexity · 87c878f1948eac516c44e4fc5cfa80f0 MD5 · raw file

  1. /*
  2. Copyright (c) 2011, Yahoo! Inc. All rights reserved.
  3. Copyrights licensed under the New BSD License. See the accompanying LICENSE file for terms.
  4. Author: Bee-Chung Chen
  5. */
  6. #include <Rinternals.h>
  7. #include "util.h"
  8. #include <stdio.h>
  9. void hierarchical_smoothing_1D_2Levels(
  10. // Output
  11. double *a_mean, double *a_var, // nItems x 1
  12. double *b_mean, double *b_var, double *b_cov, // nItems x nCategories
  13. // Input
  14. const int *itemIndex, const int *categoryIndex, const double *obs, const double *var_obs, // nObs x 1
  15. const double *q, const double *var_b, // nCategories x 1
  16. const double *var_a, // 1x1
  17. const int *numObs, const int *numItems, const int *numCategories,
  18. // Control
  19. const int *verbose, const int *debug
  20. ){
  21. int nObs = (*numObs), nItems = (*numItems), nCategories = (*numCategories);
  22. // Compute sufficient statistics
  23. // b_mean[i,k] = sum_j { o_{ijk}/var_obs_{ijk} } = C_ik
  24. // b_var[i,k] = sum_j { 1/var_obs_{ijk} } = F_ik
  25. for(int n=0; n < nItems*nCategories; n++){ b_mean[n] = 0; b_var[n] = 0; }
  26. for(int j=0; j<nObs; j++){
  27. double o = obs[j];
  28. int i = itemIndex[j] - 1;
  29. int k = categoryIndex[j] - 1;
  30. CHK_C_INDEX(i,nItems); CHK_C_INDEX(k,nCategories);
  31. // double var_obs_plus_var_b = var_obs[j] + var_b[k];
  32. b_mean[C_MAT(i,k,nItems)] += o / var_obs[j];
  33. b_var[C_MAT(i,k,nItems)] += 1 / var_obs[j];
  34. }
  35. // Compute E[a[i] | obs] and Var[a[i] | obs]
  36. for(int i=0; i<nItems; i++){
  37. double sum_for_mean = 0; // sum_k E[a_i | obs_ik]/Var[a_i | obs_ik]
  38. double sum_for_var = 0; // sum_k (1/Var[a_i | obs_ik] - 1/var_a)
  39. for(int k=0; k<nCategories; k++){
  40. double F = b_var[C_MAT(i,k,nItems)];
  41. if(F == 0) continue;
  42. double C = b_mean[C_MAT(i,k,nItems)];
  43. double A = (1/var_b[k]) + F;
  44. // E[a_i | obs_ik]/Var[a_i | obs_ik] = (C * q[k]) / (A * var_b[k])
  45. // (1/Var[a_i | obs_ik] - 1/var_a) = (q[k]^2 / var_b[k]) * (1 - 1/(A * var_b[k]))
  46. sum_for_mean += (C * q[k]) / (A * var_b[k]);
  47. sum_for_var += (q[k]*q[k] / var_b[k]) * (1 - 1/(A * var_b[k]));
  48. }
  49. if((*verbose) >= 100) printf(" i=%d: sum.for.a.mean=%f, sum.for.a.var=%f\n", i+1, sum_for_mean, sum_for_var);
  50. // Compute E[a[i] | obs] and Var[a[i] | obs]
  51. a_var[i] = 1/( (1/var_a[0]) + sum_for_var );
  52. a_mean[i] = a_var[i] * sum_for_mean;
  53. }
  54. // Compute E[b[i,k] | obs], Var[b[i,k] | obs], Cov[b[i,k], a[i] | obs]
  55. for(int i=0; i<nItems; i++){ for(int k=0; k<nCategories; k++){
  56. int ik = C_MAT(i,k,nItems);
  57. double A = 1 / ( (1/var_b[k]) + b_var[ik] );
  58. double D = (A * q[k]) / var_b[k];
  59. b_mean[ik] = D * a_mean[i] + A * b_mean[ik];
  60. b_var[ik] = A + D*D*a_var[i];
  61. b_cov[ik] = D * a_var[i];
  62. }}
  63. }
  64. SEXP hierarchical_smoothing_1D_2Levels_Call(
  65. // Output
  66. SEXP a_mean, SEXP a_var, // nItems x 1
  67. SEXP b_mean, SEXP b_var, SEXP b_cov, // nItems x nCategories
  68. // Input
  69. SEXP itemIndex, SEXP categoryIndex, SEXP obs, SEXP var_obs, // nObs x 1
  70. SEXP q, SEXP var_b, // nCategories x 1
  71. SEXP var_a, // 1x1
  72. SEXP numObs, SEXP numItems, SEXP numCategories,
  73. // Control
  74. SEXP verbose, SEXP debug
  75. ){
  76. hierarchical_smoothing_1D_2Levels(
  77. // Output
  78. MY_REAL(a_mean), MY_REAL(a_var), // nItems x 1
  79. MY_REAL(b_mean), MY_REAL(b_var), MY_REAL(b_cov), // nItems x nCategories
  80. // Input
  81. MY_INTEGER(itemIndex), MY_INTEGER(categoryIndex), MY_REAL(obs), MY_REAL(var_obs), // nObs x 1
  82. MY_REAL(q), MY_REAL(var_b), // nCategories x 1
  83. MY_REAL(var_a), // 1x1
  84. MY_INTEGER(numObs), MY_INTEGER(numItems), MY_INTEGER(numCategories),
  85. // Control
  86. MY_INTEGER(verbose), MY_INTEGER(debug)
  87. );
  88. return R_NilValue;
  89. }
  90. void hierarchical_smoothing_1D_2Levels_incorrect(
  91. // Output
  92. double *a_mean, double *a_var, // nItems x 1
  93. double *b_mean, double *b_var, double *b_cov, // nItems x nCategories
  94. // Input
  95. const int *itemIndex, const int *categoryIndex, const double *obs, const double *var_obs, // nObs x 1
  96. const double *q, const double *var_b, // nCategories x 1
  97. const double *var_a, // 1x1
  98. const int *numObs, const int *numItems, const int *numCategories,
  99. // Control
  100. const int *verbose, const int *debug
  101. ){
  102. int nObs = (*numObs), nItems = (*numItems), nCategories = (*numCategories);
  103. // Compute sufficient statistics
  104. // a_mean[i] = sum{ (q_k * o_{ijk})/(var_obs_{ijk} + var_b_{k}) }
  105. // a_var[i] = sum{ (q_k * q_k)/(var_obs_{ijk} + var_b_{k}) }
  106. // b_mean[i,k] = sum{ o_{ijk}/var_obs_{ijk} }
  107. // b_var[i,k] = sum{ 1/var_obs_{ijk} }
  108. for(int i=0; i<nItems; i++){ a_mean[i] = 0; a_var[i] = 0; }
  109. for(int n=0; n < nItems*nCategories; n++){ b_mean[n] = 0; b_var[n] = 0; }
  110. for(int j=0; j<nObs; j++){
  111. double o = obs[j];
  112. int i = itemIndex[j] - 1;
  113. int k = categoryIndex[j] - 1;
  114. CHK_C_INDEX(i,nItems); CHK_C_INDEX(k,nCategories);
  115. double var_obs_plus_var_b = var_obs[j] + var_b[k];
  116. a_mean[i] += (q[k]*o) / var_obs_plus_var_b;
  117. a_var[i] += (q[k]*q[k]) / var_obs_plus_var_b;
  118. b_mean[C_MAT(i,k,nItems)] += o / var_obs[j];
  119. b_var[C_MAT(i,k,nItems)] += 1 / var_obs[j];
  120. }
  121. // Compute E[a[i] | obs] and Var[a[i] | obs]
  122. for(int i=0; i<nItems; i++){
  123. if((*verbose) >= 100) printf(" i=%d: sum.for.a.mean=%f, sum.for.a.var=%f\n", i+1, a_mean[i], a_var[i]);
  124. a_var[i] = 1/( (1/var_a[0]) + a_var[i] );
  125. a_mean[i] = a_var[i] * a_mean[i];
  126. }
  127. // Compute E[b[i,k] | obs], Var[b[i,k] | obs], Cov[b[i,k], a[i] | obs]
  128. for(int i=0; i<nItems; i++){ for(int k=0; k<nCategories; k++){
  129. int ik = C_MAT(i,k,nItems);
  130. double A = 1 / ( (1/var_b[k]) + b_var[ik] );
  131. double D = (A * q[k]) / var_b[k];
  132. b_mean[ik] = D * a_mean[i] + A * b_mean[ik];
  133. b_var[ik] = A + D*D*a_var[i];
  134. b_cov[ik] = D * a_var[i];
  135. }}
  136. }