/src/C/util.h

http://github.com/beechung/Latent-Factor-Models · C Header · 223 lines · 144 code · 34 blank · 45 comment · 20 complexity · 0f732d9c7f1fa789aad69b46ad153b50 MD5 · raw file

  1. /*
  2. Copyright (c) 2011, Yahoo! Inc. All rights reserved.
  3. Copyrights licensed under the New BSD License. See the accompanying LICENSE file for terms.
  4. Author: Bee-Chung Chen
  5. */
  6. #include <R_ext/Applic.h>
  7. #define R_VEC(i) (i-1)
  8. #define R_MAT(i,j,nrow) (((j-1)*(nrow))+(i-1))
  9. #define R_3DA(i,j,k,nrow,ncol) ((((k-1)*(ncol))+(j-1))*(nrow) + (i-1))
  10. #define R_4DA(i,j,k,m,dim1,dim2,dim3) (((((m-1)*(dim3)+(k-1))*(dim2))+(j-1))*(dim1) + (i-1))
  11. #define C_MAT(i,j,nrow) (((j)*(nrow))+(i))
  12. #define C_3DA(i,j,k,nrow,ncol) ((((k)*(ncol))+(j))*(nrow) + (i))
  13. #define C_4DA(i,j,k,m,dim1,dim2,dim3) (((((m)*(dim3)+(k))*(dim2))+(j))*(dim1) + (i))
  14. #define CHK_C_INDEX(index, num) if(index < 0 || index >= num) error("index out of bound: index=%d, bound=%d (file: %s, line: %d)", index, num, __FILE__, __LINE__)
  15. #define CHK_C_INDEX_2D(row_i, col_j, nrow, ncol) if(row_i < 0 || row_i >= nrow || col_j < 0 || col_j >= ncol) error("index out of bound: i=%d, j=%d nrow=%d ncol=%d (file: %s, line: %d)", row_i, col_j, nrow, ncol, __FILE__, __LINE__)
  16. #define CHK_R_INDEX(index, num) if(index < 1 || index > num) error("index out of bound: index=%d, bound=%d (file: %s, line: %d)", index, num, __FILE__, __LINE__)
  17. #define CHK_SYMMETRIC(A, nrow) for(int iii=0; iii<nrow; iii++) for(int jjj=0; jjj<iii; jjj++) if(fabs(A[C_MAT(iii,jjj,nrow)] - A[C_MAT(jjj,iii,nrow)]) > 1e-10) error("A symmetric matrix is not symmetric: %f vs %f, diff=%e (file: %s, line: %d)", A[C_MAT(iii,jjj,nrow)], A[C_MAT(jjj,iii,nrow)], A[C_MAT(iii,jjj,nrow)] - A[C_MAT(jjj,iii,nrow)], __FILE__, __LINE__)
  18. #define CHK_SAME_NUMBER(msg, x, y) if((x != 0 || y != 0) && (fabs(x-y) / fmax(fabs(x), fabs(y)) > 1e-8)) error("Error: %s. The two number should be the same: %f vs %f (file: %s, line: %d)", msg, x, y, __FILE__, __LINE__)
  19. #define MY_REAL(x) (TYPEOF(x) == NILSXP ? NULL : REAL(x))
  20. #define MY_INTEGER(x) (TYPEOF(x) == NILSXP ? NULL : INTEGER(x))
  21. #define CHK_MAT_SYM(msg, A, nrow) for(int CHK_MAT_SYM_i=0; CHK_MAT_SYM_i<nrow; CHK_MAT_SYM_i++) for(int CHK_MAT_SYM_j=0; CHK_MAT_SYM_j<CHK_MAT_SYM_i; CHK_MAT_SYM_j++) CHK_SAME_NUMBER(msg, A[C_MAT(CHK_MAT_SYM_i,CHK_MAT_SYM_j,nrow)], A[C_MAT(CHK_MAT_SYM_j,CHK_MAT_SYM_i,nrow)])
  22. #define MAX(x,y) ((x) > (y) ? (x) : (y))
  23. #define SQR(x) ((x) * (x))
  24. #define STOP(msg) error("Error at %s on line %d: %s\n", __FILE__, __LINE__, msg)
  25. #define STOP1(msg,x) error2(__FILE__, __LINE__, msg, x)
  26. #define STOP2(msg,x1,x2) error2(__FILE__, __LINE__, msg, x1, x2)
  27. #define STOP3(msg,x1,x2,x3) error2(__FILE__, __LINE__, msg, x1, x2, x3)
  28. #define STOP4(msg,x1,x2,x3,x4) error2(__FILE__, __LINE__, msg, x1, x2, x3, x4)
  29. #define DIE_HERE error("Error in file: %s, at line: %d", __FILE__, __LINE__)
  30. void error2(const char *filename, int lineno, const char *fmt, ...);
  31. // Matrix inversion (only for a symmetric matrix)
  32. void sym_inv_byCholesky(double *A /* n x n matrix */, const int *n, const int *check_sym);
  33. // Matrix inversion (only for a symmetric matrix) 3-dim array
  34. void sym_inv3DA_byCholesky(
  35. double *invA /* k x n x n matrix */, const double *A /* k x n x n matrix */,
  36. const int *k, const int *n, double *temp /* n x n */, const int *check_sym
  37. );
  38. // Compute the eigenvalues and vectors of a symmetric matrix x
  39. void sym_eigen(const double* x, const int *nrow, double *eigen_val, double *eigen_vec);
  40. // Compute the eigenvalues and vectors of a symmetric matrix x
  41. void sym_eigen2(const double* x, const int *nrow, double *eigen_val, double *eigen_vec, double *workspace, const int *workspace_size, const int *check_sym);
  42. int sym_eigen2_workspace_size(const int *nrow);
  43. void sum_margin(
  44. // OUTPUT
  45. double *ans,
  46. // INPUT
  47. const double *A, const int *nrow, const int *ncol,
  48. const int *side // side=1: Sum up each row and return a vector with length nrow
  49. // side=2: Sum up each column and return a vector with length ncol
  50. );
  51. // The intput/output are all R indices (start from 1, NOT 0)
  52. void generateObsIndex(
  53. //OUTPUT
  54. int* obsIndex, // E.g., consider user i (i starts from 0)
  55. int* start, // y[ obsIndex[ start[i]+(0:(num[i]-1)) ] ]
  56. int* num, // are the observations of user i
  57. //INPUT
  58. const int* effIndex /* user or item */,
  59. const int* nObs, const int* nEff,
  60. //OTHER
  61. const int* debug
  62. );
  63. void normalizeToSumUpToOne2(double *output, const double *input, const int length);
  64. void normalizeToSumUpToOne(double *vector, const int length);
  65. void indexWithQuantities(int *output, int *vector, const int *length);
  66. void print_vector(const char* prefix, const double* vector, const int length);
  67. void print_intVector(const char* prefix, const int* vector, const int length);
  68. void print_matrix(const char* prefix, const double* matrix, const int nrow, const int ncol);
  69. void compute_uBv_dense(
  70. // Output
  71. double *score, // nObs x 1
  72. // Input
  73. const int *userIndex, const int *itemIndex, // Both nObs x 1; index starts from 1 (not 0)
  74. const double *u, // nUsers x nUserFactors
  75. const double *B, // nUserFeatures x nItemFeatures
  76. const double *v, // nItems x nItemFactors
  77. const int *nObs, const int *nUsers, const int *nItems,
  78. const int *nUserFactors, const int *nItemFactors
  79. );
  80. double compute_szuBv_single_dense(
  81. int i, // user i (start from 1)
  82. int j, // item j (start from 1)
  83. const double *s, // nUsers x nUserClusters
  84. const double *z, // nItems x nItemClusters
  85. const double *u, // nUsers x nFactorsPerUser
  86. const double *B, // nUserClusters x nItemClusters x nFeaturesPerUser x nFactorsPerItem
  87. const double *v, // nItems x nFactorsPerItem
  88. const int nUsers, const int nItems,
  89. const int nUserClusters, const int nItemClusters,
  90. const int nFactorsPerUser, const int nFactorsPerItem
  91. );
  92. void compute_szuBv_dense(
  93. double *ans, // nObs x 1
  94. const int* userIndex, // nObs x 1 (start from 1)
  95. const int* itemIndex, // nObs x 1 (start from 1)
  96. const double *s, // nUsers x nUserClusters
  97. const double *z, // nItems x nItemClusters
  98. const double *u, // nUsers x nFactorsPerUser
  99. const double *B, // nUserClusters x nItemClusters x nFeaturesPerUser x nFactorsPerItem
  100. const double *v, // nItems x nFactorsPerItem
  101. const int *nObs, const int *nUsers, const int *nItems,
  102. const int *nUserClusters, const int *nItemClusters,
  103. const int *nFactorsPerUser, const int *nFactorsPerItem,
  104. const int *debug
  105. );
  106. int rdiscrete(double* probabilities, int nOutcomes, double *rnd_out);
  107. void bayesian_gaussian_regression(
  108. double *postMean, // (output) nFeatures x 1
  109. double *postVar, // (output) nFeatures x nFeatures
  110. const double *y, // (response) nObs x 1
  111. const double *X, // (features) nObs x nFeatures
  112. const double *offset, // nObs x 1
  113. const double *prior_mean, // nFeatures x 1
  114. const double *prior_var, // nFeatures x nFeatures
  115. const double *var_y, // nObs x 1
  116. const int *nObs, const int *nFeatures, const int *nOffset,
  117. const int *debug, const int *verbose
  118. );
  119. void computeMeanSumvar(
  120. // OUTPUT
  121. double *mean /* length x 1 */,
  122. double *sumvar /* 1x1 */,
  123. // INPUT
  124. const double *sum /* length x 1 */,
  125. const double *sos /* sum of squares: length x 1 */,
  126. const int length, const int nSamples
  127. );
  128. void computeMeanVar(
  129. // OUTPUT
  130. double *mean /* nLevels x nFactors */,
  131. double *sumvar /* 1x1 */,
  132. double *outputVar /*IN:sum-of-products; OUT:variance; nLevels x nFactors x nFactors*/,
  133. // INPUT
  134. const double *sum /* nLevels x nFactors */,
  135. const int nLevels, const int nFactors, const int nSamples
  136. );
  137. void computeCov(
  138. // OUTPUT
  139. double *outputCov /*IN:sum-of-products; OUT:covariance; size: num1 x num2*/,
  140. // INPUT
  141. const double *mean1, const double *mean2,
  142. const int num1, const int num2, const int nSamples
  143. );
  144. /**
  145. * output[m] = u[src_id[m], , src_ctx[m]]' v[dst_id[m, , dst_ctx[m]]
  146. */
  147. void computeMultiResponseUV(
  148. // OUTPUT
  149. double *output,
  150. // INPUT
  151. const double *u, // nSrcNodes x nFactors x nSrcContexts
  152. const double *v, // nDstNodes x nFactors x nDstContexts
  153. const int *src_id, const int *dst_id, // nObs x 1
  154. const int *src_ctx, const int *dst_ctx, // nObs x 1
  155. const int *nObs, const int *nFactors,
  156. const int *nSrcNodes, const int *nSrcContexts,
  157. const int *nDstNodes, const int *nDstContexts,
  158. const int *debug
  159. );
  160. /**
  161. * Draw a multivariate Gaussian vector
  162. * option == 0 : A is the variance-covariance matrix
  163. * option == 1 : A^-1 is the variance-covariance matrix
  164. * option == 2 : eigen_val and eigen_vec are the eigen value and vector of
  165. * the variance-covariance matrix (A is not used)
  166. * In any case, the output eigen_val and eigen_vec are the eigen value and vector
  167. * of the variance-covariance matrix.
  168. *
  169. * GetRNGstate() and PutRNGstate() are NOT called inside draw_multivar_gaussian().
  170. */
  171. void draw_multivar_gaussian(
  172. // OUTPUT
  173. double* out /* nDim x 1 */,
  174. // INPUT/OUTPUT
  175. double* eigen_val /* nDim x 1 */,
  176. double* eigen_vec /* nDim x nDim */,
  177. // INPUT
  178. const double* mean /* nDim x 1*/,
  179. const double* A /* nDim x nDim or NULL */,
  180. const int* nDim,
  181. const int* option, const int* debug,
  182. // WORK/TEMP SPACE (see workspace_size_for_draw_multivar_gaussian(nDim))
  183. double *workspace /* workspace_size x 1 */,
  184. const int* workspace_size /* 0 or at least 3*nDim or call workspace_size_for_draw_multivar_gaussian(nDim) */,
  185. double *temp1 /* nDim x 1 */,
  186. double *temp2 /* nDim x nDim */
  187. );
  188. int workspace_size_for_draw_multivar_gaussian(int nDim);