PageRenderTime 27ms CodeModel.GetById 17ms RepoModel.GetById 1ms app.codeStats 0ms

/include/veclib/cpx_vec.c

https://gitlab.com/oytunistrator/QuIP
C | 389 lines | 229 code | 100 blank | 60 comment | 12 complexity | 480fb82e13108a3f22c5b9b8db99ab63 MD5 | raw file
  1. /* complex number stuff */
  2. /* Real only */
  3. // for debugging...
  4. #define SHOW_CPX_3 \
  5. sprintf(error_string,"csrc1 = %g %g",csrc1.re,csrc1.im); \
  6. advise(error_string); \
  7. sprintf(error_string,"csrc2 = %g %g",csrc2.re,csrc2.im); \
  8. advise(error_string);
  9. #define CPX_PROD_RE(c1,c2) ( c1.re*c2.re - c1.im*c2.im )
  10. #define CPX_PROD_IM(c1,c2) ( c1.re*c2.im + c1.im*c2.re )
  11. _VEC_FUNC_CPX_2V( cvmov , cdst.re = csrc1.re ; cdst.im = csrc1.im )
  12. _VEC_FUNC_CPX_2V( cvneg , cdst.re = - csrc1.re ; cdst.im = - csrc1.im )
  13. _VEC_FUNC_CPX_2V_T2( cvsqr , tmpc.re = CPX_PROD_RE(csrc1,csrc1);
  14. tmpc.im = csrc1.re*csrc1.im*2 ;
  15. ASSIGN_CPX(cdst,tmpc); )
  16. _VEC_FUNC_CPX_2V( vconj , cdst.re = csrc1.re;
  17. cdst.im = - csrc1.im )
  18. // complex exponential - e ^ a + bi = e^a (cos b + i sin b)
  19. // This can work in-place...
  20. _VEC_FUNC_CPX_2V_T2( cvexp , tmpc.re = exp_func(csrc1.re); \
  21. cdst.re = tmpc.re * cos_func(csrc1.im); \
  22. cdst.im = tmpc.re * sin_func(csrc1.im); \
  23. )
  24. _VEC_FUNC_CPX_3V( cvadd , cdst.re = csrc1.re + csrc2.re ; cdst.im = csrc1.im + csrc2.im )
  25. _VEC_FUNC_CPX_3V( cvsub , cdst.re = csrc2.re - csrc1.re ; cdst.im = csrc2.im - csrc1.im )
  26. #define ASSIGN_CPX_PROD(cd,c1,c2) { cd.re = CPX_PROD_RE(c1,c2); \
  27. cd.im = CPX_PROD_IM(c1,c2); }
  28. _VEC_FUNC_CPX_3V_T2( cvmul , ASSIGN_CPX_PROD(tmpc,csrc1,csrc2)
  29. ASSIGN_CPX(cdst,tmpc) )
  30. #define CPX_NORM(c) ( c.re*c.re +c.im*c.im )
  31. // conjugate second operand
  32. #define CPX_CPROD_RE(c1,c2) ( c1.re*c2.re + c1.im*c2.im )
  33. #define CPX_CPROD_IM(c1,c2) ( c2.re*c1.im - c2.im*c1.re )
  34. #define ASSIGN_CPX_CPROD(cd,c1,c2) { cd.re = CPX_CPROD_RE(c1,c2); \
  35. cd.im = CPX_CPROD_IM(c1,c2); }
  36. #define ASSIGN_CPX_CPROD_NORM(cd,c1,c2,denom) { cd.re = CPX_CPROD_RE(c1,c2)/denom; \
  37. cd.im = CPX_CPROD_IM(c1,c2)/denom; }
  38. // are we dividing by src1 or src2 ???
  39. _VEC_FUNC_CPX_3V_T3( cvdiv , tmp_denom = CPX_NORM(csrc2);
  40. ASSIGN_CPX_CPROD_NORM(tmpc,csrc1,csrc2,tmp_denom)
  41. ASSIGN_CPX(cdst,tmpc) )
  42. /* vcmul seems to be redundant with cvmul, but actually it is conjugate multiplication? */
  43. _VEC_FUNC_CPX_3V_T2( vcmul , ASSIGN_CPX_CPROD(tmpc,csrc1,csrc2)
  44. ASSIGN_CPX(cdst,tmpc) )
  45. /* float times complex */
  46. _VEC_FUNC_CCR_3V( mvadd , cdst.re = csrc1.re + src2; cdst.im = csrc1.im )
  47. _VEC_FUNC_CCR_3V( mvsub , cdst.re = csrc1.re - src2; cdst.im = csrc1.im )
  48. _VEC_FUNC_CCR_3V( mvmul , cdst.re = csrc1.re * src2; cdst.im = csrc1.im * src2 )
  49. _VEC_FUNC_CCR_3V( mvdiv , cdst.re=csrc1.re/src2; cdst.im = csrc1.im / src2 )
  50. _VEC_FUNC_2V_MIXED( vmgsq , dst = csrc1.re*csrc1.re + csrc1.im*csrc1.im )
  51. /* BUG check for divide by zero */
  52. // These appear to have a complex destination, but a real source,
  53. // and real scalar... what's the point?
  54. _VEC_FUNC_CR_1S_2V( mvsdiv , cdst.re = scalar1_val / src1 ;
  55. cdst.im = 0
  56. )
  57. _VEC_FUNC_CR_1S_2V( mvsdiv2 , cdst.re = src1 / scalar1_val ;
  58. cdst.im = 0
  59. )
  60. _VEC_FUNC_CR_1S_2V( mvsmul , cdst.re = scalar1_val * src1 ;
  61. cdst.im = 0
  62. )
  63. _VEC_FUNC_CR_1S_2V( mvssub , cdst.re = scalar1_val - src1 ;
  64. cdst.im = 0
  65. )
  66. _VEC_FUNC_CR_1S_2V( mvsadd , cdst.re = src1 + scalar1_val ;
  67. cdst.im = 0
  68. )
  69. _VEC_FUNC_CPX_1S_2V( cvsadd , cdst.re = csrc1.re + cscalar1_val.re ;
  70. cdst.im = csrc1.im + cscalar1_val.im
  71. )
  72. _VEC_FUNC_CPX_1S_2V( cvssub , cdst.re = cscalar1_val.re - csrc1.re ;
  73. cdst.im = cscalar1_val.im - csrc1.im
  74. )
  75. _VEC_FUNC_CPX_1S_2V_T2( cvsmul , ASSIGN_CPX_PROD(tmpc,csrc1,cscalar1_val)
  76. ASSIGN_CPX(cdst,tmpc)
  77. )
  78. #define QUAT_NORM(q) ( q.re * q.re + q._i * q._i + q._j * q._j + q._k * q._k )
  79. // dst = scalar / src
  80. _VEC_FUNC_CPX_1S_2V_T3( cvsdiv , tmp_denom=CPX_NORM(csrc1);
  81. ASSIGN_CPX_CPROD_NORM(tmpc,cscalar1_val,csrc1,tmp_denom)
  82. ASSIGN_CPX(cdst,tmpc)
  83. )
  84. _VEC_FUNC_CPX_1S_2V_T3( cvsdiv2 , tmp_denom=CPX_NORM(cscalar1_val);
  85. ASSIGN_CPX_CPROD_NORM(tmpc,csrc1,cscalar1_val,tmp_denom)
  86. ASSIGN_CPX(cdst,tmpc)
  87. )
  88. /* for mixed (MDECLS2), v1 is complex and v2 is real? */
  89. _VEC_FUNC_CPX_1S_1V( cvset , cdst.re = cscalar1_val.re ; cdst.im = cscalar1_val.im )
  90. #ifdef NOT_YET
  91. /* dot product with complex conjugate */
  92. FUNC_DECL( vcdot )
  93. {
  94. RCSCALAR_DECL;
  95. CINIT2;
  96. RSCALAR_SETUP;
  97. scalar->re = scalar->im = 0.0;
  98. V2LOOP( scalar->re += csrc1.re * csrc2.re + csrc1.im * csrc2.im ; scalar->im += csrc1.re * csrc2.im - csrc1.im * csrc2.re )
  99. }
  100. #endif /* NOT_YET */
  101. /* We use std_tmp here so this will work if called w/ in-place argument (v3 = v1 or v2)
  102. */
  103. /* conjugated vsmul ? */
  104. _VEC_FUNC_CPX_1S_2V_T2( vscml , ASSIGN_CPX_CPROD(tmpc,csrc1,cscalar1_val)
  105. ASSIGN_CPX(cdst,tmpc) )
  106. /* These are clean, but don't work when destination and source have different precisions */
  107. /*
  108. _VEC_FUNC_SBM_CPX_3V( cvvv_slct , cdst = srcbit ? csrc1 : csrc2 )
  109. CPX_VS_SELECTION_METHOD( cvvs_slct , cdst = srcbit ? csrc1 : cscalar1_val )
  110. CPX_SS_SELECTION_METHOD( cvss_slct , cdst = srcbit ? cscalar1_val : cscalar2_val )
  111. */
  112. // why not structure assign?
  113. #define COPY_CPX( c ) { cdst.re=c.re; cdst.im=c.im; }
  114. _VEC_FUNC_SBM_CPX_3V( cvvv_slct , if( srcbit ) COPY_CPX(csrc1) else COPY_CPX(csrc2) )
  115. _VEC_FUNC_SBM_CPX_1S_2V( cvvs_slct , if( srcbit ) COPY_CPX(csrc1) else COPY_CPX(cscalar1_val) )
  116. _VEC_FUNC_SBM_CPX_2S_1V( cvss_slct , if( srcbit ) COPY_CPX(cscalar1_val) else COPY_CPX(cscalar2_val) )
  117. // no CPX in this macro?
  118. _VEC_FUNC_CPX_2V_PROJ( cvsum,
  119. cdst.re = 0 ; cdst.im = 0 ,
  120. cdst.re += csrc1.re; cdst.im += csrc1.im ,
  121. psrc1.re + psrc2.re,
  122. psrc1.im + psrc2.im
  123. )
  124. _VEC_FUNC_CPX_3V_PROJ( cvdot, cdst.re = 0; cdst.im = 0 , cdst.re += csrc1.re * csrc2.re - csrc1.im * csrc2.im; cdst.im += csrc1.re * csrc2.im + csrc1.im * csrc2.re )
  125. #ifndef BUILD_FOR_GPU
  126. _VEC_FUNC_CPX_2V( cvrand , cdst.re = rn((u_long)csrc1.re); cdst.im = rn((u_long)csrc1.im) )
  127. #endif /* ! BUILD_FOR_GPU */
  128. #ifdef QUATERNION_SUPPORT
  129. /* Quaternions */
  130. /* Here is the multiplication chart:
  131. * note that multiplication of the imaginary terms is non-commutative...
  132. *
  133. * X 1 i j k
  134. *
  135. * 1 1 i j k
  136. * i i -1 k -j
  137. * j j -k -1 i
  138. * k k j -i -1
  139. */
  140. #define QUAT_PROD_RE(q1,q2) ( q1.re * q2.re - q1._i * q2._i - q1._j * q2._j - q1._k * q2._k )
  141. #define QUAT_PROD_I(q1,q2) ( q1.re * q2._i + q1._i * q2.re + q1._j * q2._k - q1._k * q2._j )
  142. #define QUAT_PROD_J(q1,q2) ( q1.re * q2._j + q1._j * q2.re + q1._k * q2._i - q1._i * q2._k )
  143. #define QUAT_PROD_K(q1,q2) ( q1.re * q2._k + q1._k * q2.re + q1._i * q2._j - q1._j * q2._i )
  144. _VEC_FUNC_QUAT_2V( qvmov , qdst.re = qsrc1.re ;
  145. qdst._i = qsrc1._i ;
  146. qdst._j = qsrc1._j ;
  147. qdst._k = qsrc1._k )
  148. _VEC_FUNC_QUAT_2V( qvneg , qdst.re = - qsrc1.re ;
  149. qdst._i = - qsrc1._i ;
  150. qdst._j = - qsrc1._j ;
  151. qdst._k = - qsrc1._k )
  152. _VEC_FUNC_QUAT_2V_T4( qvsqr , tmpq.re = QUAT_PROD_RE(qsrc1,qsrc1);
  153. tmpq._i = 2 * qsrc1.re * qsrc1._i;
  154. tmpq._j = 2 * qsrc1.re * qsrc1._j;
  155. tmpq._k = 2 * qsrc1.re * qsrc1._k;
  156. ASSIGN_QUAT(qdst,tmpq) )
  157. _VEC_FUNC_QUAT_3V( qvadd , qdst.re = qsrc1.re + qsrc2.re ;
  158. qdst._i = qsrc1._i + qsrc2._i ;
  159. qdst._j = qsrc1._j + qsrc2._j ;
  160. qdst._k = qsrc1._k + qsrc2._k )
  161. _VEC_FUNC_QUAT_3V( qvsub , qdst.re = qsrc2.re - qsrc1.re ;
  162. qdst._i = qsrc2._i - qsrc1._i ;
  163. qdst._j = qsrc2._j - qsrc1._j ;
  164. qdst._k = qsrc2._k - qsrc1._k )
  165. #define ASSIGN_QUAT_PROD(qd,q1,q2) { qd.re = QUAT_PROD_RE(q1,q2); \
  166. qd._i = QUAT_PROD_I(q1,q2); \
  167. qd._j = QUAT_PROD_J(q1,q2); \
  168. qd._k = QUAT_PROD_K(q1,q2); }
  169. _VEC_FUNC_QUAT_3V_T4( qvmul , ASSIGN_QUAT_PROD(tmpq,qsrc1,qsrc2)
  170. ASSIGN_QUAT(qdst,tmpq) )
  171. // Quaternion division is like complex division: q1 / q2 = q1 * conj(q2) / maqsq(q2)
  172. // Conjugation negates all three imaginary components...
  173. // T4 declares tmpq, T5 tmpq plus tmp_denom
  174. #define QUAT_NORM(q) ( q.re * q.re + q._i * q._i + q._j * q._j + q._k * q._k )
  175. // QUAT_CPROD conjugates the second operand...
  176. #define QUAT_CPROD_RE(q1,q2) ( q1.re * q2.re + q1._i * q2._i + q1._j * q2._j + q1._k * q2._k )
  177. #define QUAT_CPROD_I(q1,q2) ( - q1.re * q2._i + q1._i * q2.re - q1._j * q2._k + q1._k * q2._j )
  178. #define QUAT_CPROD_J(q1,q2) ( - q1.re * q2._j + q1._j * q2.re - q1._k * q2._i + q1._i * q2._k )
  179. #define QUAT_CPROD_K(q1,q2) ( - q1.re * q2._k + q1._k * q2.re - q1._i * q2._j + q1._j * q2._i )
  180. #define ASSIGN_QUAT_CPROD_NORM(qd,q1,q2,denom) { qd.re = QUAT_CPROD_RE(q1,q2)/denom; \
  181. qd._i = QUAT_CPROD_I(q1,q2)/denom; \
  182. qd._j = QUAT_CPROD_J(q1,q2)/denom; \
  183. qd._k = QUAT_CPROD_K(q1,q2)/denom; }
  184. _VEC_FUNC_QUAT_3V_T5( qvdiv , tmp_denom = QUAT_NORM(qsrc2);
  185. ASSIGN_QUAT_CPROD_NORM(tmpq,qsrc1,qsrc2,tmp_denom)
  186. ASSIGN_QUAT(qdst,tmpq) )
  187. // dst = scalar / src1
  188. _VEC_FUNC_QUAT_1S_2V_T5( qvsdiv , tmp_denom = QUAT_NORM(qsrc1);
  189. ASSIGN_QUAT_CPROD_NORM(tmpq,qscalar1_val,qsrc1,tmp_denom)
  190. ASSIGN_QUAT(qdst,tmpq) )
  191. // dst = src1 / scalar
  192. _VEC_FUNC_QUAT_1S_2V_T5( qvsdiv2 , tmp_denom = QUAT_NORM(qscalar1_val);
  193. ASSIGN_QUAT_CPROD_NORM(tmpq,qsrc1,qscalar1_val,tmp_denom)
  194. ASSIGN_QUAT(qdst,tmpq) )
  195. /* float times quaternion */
  196. // These appear to have a quaternion destination, but a real source,
  197. // and real scalar... what's the point?
  198. _VEC_FUNC_QQR_3V( pvadd , qdst.re = qsrc1.re + src2;
  199. qdst._i = qsrc1._i ;
  200. qdst._j = qsrc1._j ;
  201. qdst._k = qsrc1._k )
  202. _VEC_FUNC_QQR_3V( pvsub , qdst.re = qsrc1.re - src2;
  203. qdst._i = qsrc1._i ;
  204. qdst._j = qsrc1._j ;
  205. qdst._k = qsrc1._k )
  206. _VEC_FUNC_QQR_3V( pvmul , qdst.re = qsrc1.re * src2;
  207. qdst._i = qsrc1._i * src2 ;
  208. qdst._j = qsrc1._j * src2 ;
  209. qdst._k = qsrc1._k * src2 )
  210. _VEC_FUNC_QQR_3V( pvdiv , qdst.re = qsrc1.re / src2;
  211. qdst._i = qsrc1._i / src2 ;
  212. qdst._j = qsrc1._j / src2 ;
  213. qdst._k = qsrc1._k / src2 )
  214. /* BUG check for divide by zero */
  215. //
  216. //_VF_QR_1S_2V( name, type_code, stat)
  217. _VEC_FUNC_QR_1S_2V( pvsdiv , qdst.re = scalar1_val / src1 ;
  218. qdst._i = 0;
  219. qdst._j = 0;
  220. qdst._k = 0
  221. )
  222. _VEC_FUNC_QR_1S_2V( pvsdiv2 , qdst.re = src1 / scalar1_val ;
  223. qdst._i = 0;
  224. qdst._j = 0;
  225. qdst._k = 0
  226. )
  227. _VEC_FUNC_QR_1S_2V( pvsmul , qdst.re = scalar1_val * src1 ;
  228. qdst._i = 0;
  229. qdst._j = 0;
  230. qdst._k = 0
  231. )
  232. _VEC_FUNC_QR_1S_2V( pvssub , qdst.re = scalar1_val - src1 ;
  233. qdst._i = 0;
  234. qdst._j = 0;
  235. qdst._k = 0
  236. )
  237. _VEC_FUNC_QR_1S_2V( pvsadd , qdst.re = src1 + scalar1_val ;
  238. qdst._i = 0;
  239. qdst._j = 0;
  240. qdst._k = 0
  241. )
  242. _VEC_FUNC_QUAT_1S_2V( qvsadd , qdst.re = qsrc1.re + qscalar1_val.re ;
  243. qdst._i = qsrc1._i + qscalar1_val._i ;
  244. qdst._j = qsrc1._j + qscalar1_val._j ;
  245. qdst._k = qsrc1._k + qscalar1_val._k
  246. )
  247. _VEC_FUNC_QUAT_1S_2V( qvssub , qdst.re = qscalar1_val.re - qsrc1.re ;
  248. qdst._i = qscalar1_val._i - qsrc1._i ;
  249. qdst._j = qscalar1_val._j - qsrc1._j ;
  250. qdst._k = qscalar1_val._k - qsrc1._k
  251. )
  252. /* For real or complex, multiplication is commutative, but not for quaternions!? */
  253. _VEC_FUNC_QUAT_1S_2V_T4( qvsmul , ASSIGN_QUAT_PROD(tmpq,qsrc1,qscalar1_val)
  254. ASSIGN_QUAT(qdst,tmpq) )
  255. /* not yet...
  256. _VEC_FUNC_QUAT_1S_2V_T4( qvsmul2 ,ASSIGN_QUAT_PROD(tmpq,qscalar1_val,qsrc1)
  257. ASSIGN_QUAT(qdst,tmpq) )
  258. */
  259. _VEC_FUNC_QUAT_1S_1V( qvset , qdst.re = qscalar1_val.re ;
  260. qdst._i = qscalar1_val._i ;
  261. qdst._j = qscalar1_val._j ;
  262. qdst._k = qscalar1_val._k )
  263. /*
  264. QUAT_VV_SELECTION_METHOD( qvvv_slct , qdst = srcbit ? qsrc1 : qsrc2 )
  265. QUAT_VS_SELECTION_METHOD( qvvs_slct , qdst = srcbit ? qsrc1 : qscalar1_val )
  266. QUAT_SS_SELECTION_METHOD( qvss_slct , qdst = srcbit ? qscalar1_val : qscalar2_val )
  267. */
  268. #define COPY_QUAT( q ) { qdst.re=q.re; qdst._i=q._i; qdst._j=q._j; qdst._k=q._k; }
  269. _VEC_FUNC_SBM_QUAT_3V( qvvv_slct , if( srcbit ) COPY_QUAT(qsrc1) else COPY_QUAT(qsrc2) )
  270. _VEC_FUNC_SBM_QUAT_1S_2V( qvvs_slct , if( srcbit ) COPY_QUAT(qsrc1) else COPY_QUAT(qscalar1_val) )
  271. _VEC_FUNC_SBM_QUAT_2S_1V( qvss_slct , if( srcbit ) COPY_QUAT(qscalar1_val) else COPY_QUAT(qscalar2_val) )
  272. /* BUG - gpu implementation? */
  273. _VEC_FUNC_QUAT_2V_PROJ( qvsum,
  274. qdst.re = 0 ;
  275. qdst._i = 0 ;
  276. qdst._j = 0 ;
  277. qdst._k = 0
  278. ,
  279. qdst.re += qsrc1.re ;
  280. qdst._i += qsrc1._i ;
  281. qdst._j += qsrc1._j ;
  282. qdst._k += qsrc1._k
  283. ,
  284. psrc1.re + psrc2.re,
  285. psrc1._i + psrc2._i,
  286. psrc1._j + psrc2._j,
  287. psrc1._k + psrc2._k
  288. )
  289. #endif /* QUATERNION_SUPPORT */