PageRenderTime 56ms CodeModel.GetById 17ms RepoModel.GetById 0ms app.codeStats 1ms

/surface/include/pcl/surface/3rdparty/poisson4/sparse_matrix.hpp

https://bitbucket.org/amaalej/pcl
C++ Header | 970 lines | 875 code | 51 blank | 44 comment | 222 complexity | 0a4e76e951c80268ecebae5cc6bc115f MD5 | raw file
Possible License(s): Apache-2.0
  1. /*
  2. Copyright (c) 2006, Michael Kazhdan and Matthew Bolitho
  3. All rights reserved.
  4. Redistribution and use in source and binary forms, with or without modification,
  5. are permitted provided that the following conditions are met:
  6. Redistributions of source code must retain the above copyright notice, this list of
  7. conditions and the following disclaimer. Redistributions in binary form must reproduce
  8. the above copyright notice, this list of conditions and the following disclaimer
  9. in the documentation and/or other materials provided with the distribution.
  10. Neither the name of the Johns Hopkins University nor the names of its contributors
  11. may be used to endorse or promote products derived from this software without specific
  12. prior written permission.
  13. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY
  14. EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO THE IMPLIED WARRANTIES
  15. OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT
  16. SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
  17. INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
  18. TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
  19. BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
  20. CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
  21. ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH
  22. DAMAGE.
  23. */
  24. #include <float.h>
  25. #ifdef _WIN32
  26. # ifndef WIN32_LEAN_AND_MEAN
  27. # define WIN32_LEAN_AND_MEAN
  28. # endif // WIN32_LEAN_AND_MEAN
  29. # ifndef NOMINMAX
  30. # define NOMINMAX
  31. # endif // NOMINMAX
  32. # include <windows.h>
  33. #endif //_WIN32
  34. ///////////////////
  35. // SparseMatrix //
  36. ///////////////////
  37. ///////////////////////////////////////////
  38. // Static Allocator Methods and Memebers //
  39. ///////////////////////////////////////////
  40. namespace pcl
  41. {
  42. namespace poisson
  43. {
  44. template<class T> int SparseMatrix<T>::UseAlloc=0;
  45. template<class T> Allocator<MatrixEntry<T> > SparseMatrix<T>::internalAllocator;
  46. template<class T> int SparseMatrix<T>::UseAllocator(void){return UseAlloc;}
  47. template<class T>
  48. void SparseMatrix<T>::SetAllocator( int blockSize )
  49. {
  50. if(blockSize>0){
  51. UseAlloc=1;
  52. internalAllocator.set(blockSize);
  53. }
  54. else{UseAlloc=0;}
  55. }
  56. ///////////////////////////////////////
  57. // SparseMatrix Methods and Memebers //
  58. ///////////////////////////////////////
  59. template< class T >
  60. SparseMatrix< T >::SparseMatrix( void )
  61. {
  62. _contiguous = false;
  63. _maxEntriesPerRow = 0;
  64. rows = 0;
  65. rowSizes = NULL;
  66. m_ppElements = NULL;
  67. }
  68. template< class T > SparseMatrix< T >::SparseMatrix( int rows ) : SparseMatrix< T >() { Resize( rows ); }
  69. template< class T > SparseMatrix< T >::SparseMatrix( int rows , int maxEntriesPerRow ) : SparseMatrix< T >() { Resize( rows , maxEntriesPerRow ); }
  70. template< class T >
  71. SparseMatrix< T >::SparseMatrix( const SparseMatrix& M ) : SparseMatrix< T >()
  72. {
  73. if( M._contiguous ) Resize( M.rows , M._maxEntriesPerRow );
  74. else Resize( M.rows );
  75. for( int i=0 ; i<rows ; i++ )
  76. {
  77. SetRowSize( i , M.rowSizes[i] );
  78. memcpy( (*this)[i] , M[i] , sizeof( MatrixEntry< T > ) * rowSizes[i] );
  79. }
  80. }
  81. template<class T>
  82. int SparseMatrix<T>::Entries( void ) const
  83. {
  84. int e = 0;
  85. for( int i=0 ; i<rows ; i++ ) e += int( rowSizes[i] );
  86. return e;
  87. }
  88. template<class T>
  89. SparseMatrix<T>& SparseMatrix<T>::operator = (const SparseMatrix<T>& M)
  90. {
  91. if( M._contiguous ) Resize( M.rows , M._maxEntriesPerRow );
  92. else Resize( M.rows );
  93. for( int i=0 ; i<rows ; i++ )
  94. {
  95. SetRowSize( i , M.rowSizes[i] );
  96. memcpy( (*this)[i] , M[i] , sizeof( MatrixEntry< T > ) * rowSizes[i] );
  97. }
  98. return *this;
  99. }
  100. template<class T>
  101. SparseMatrix<T>::~SparseMatrix( void ){ Resize( 0 ); }
  102. template< class T >
  103. bool SparseMatrix< T >::write( const char* fileName ) const
  104. {
  105. FILE* fp = fopen( fileName , "wb" );
  106. if( !fp ) return false;
  107. bool ret = write( fp );
  108. fclose( fp );
  109. return ret;
  110. }
  111. template< class T >
  112. bool SparseMatrix< T >::read( const char* fileName )
  113. {
  114. FILE* fp = fopen( fileName , "rb" );
  115. if( !fp ) return false;
  116. bool ret = read( fp );
  117. fclose( fp );
  118. return ret;
  119. }
  120. template< class T >
  121. bool SparseMatrix< T >::write( FILE* fp ) const
  122. {
  123. if( fwrite( &rows , sizeof( int ) , 1 , fp )!=1 ) return false;
  124. if( fwrite( rowSizes , sizeof( int ) , rows , fp )!=rows ) return false;
  125. for( int i=0 ; i<rows ; i++ ) if( fwrite( (*this)[i] , sizeof( MatrixEntry< T > ) , rowSizes[i] , fp )!=rowSizes[i] ) return false;
  126. return true;
  127. }
  128. template< class T >
  129. bool SparseMatrix< T >::read( FILE* fp )
  130. {
  131. int r;
  132. if( fread( &r , sizeof( int ) , 1 , fp )!=1 ) return false;
  133. Resize( r );
  134. if( fread( rowSizes , sizeof( int ) , rows , fp )!=rows ) return false;
  135. for( int i=0 ; i<rows ; i++ )
  136. {
  137. r = rowSizes[i];
  138. rowSizes[i] = 0;
  139. SetRowSize( i , r );
  140. if( fread( (*this)[i] , sizeof( MatrixEntry< T > ) , rowSizes[i] , fp )!=rowSizes[i] ) return false;
  141. }
  142. return true;
  143. }
  144. template< class T >
  145. void SparseMatrix< T >::Resize( int r )
  146. {
  147. if( rows>0 )
  148. {
  149. if( !UseAlloc )
  150. if( _contiguous ){ if( _maxEntriesPerRow ) free( m_ppElements[0] ); }
  151. else for( int i=0 ; i<rows ; i++ ){ if( rowSizes[i] ) free( m_ppElements[i] ); }
  152. free( m_ppElements );
  153. free( rowSizes );
  154. }
  155. rows = r;
  156. if( r )
  157. {
  158. rowSizes = ( int* )malloc( sizeof( int ) * r );
  159. memset( rowSizes , 0 , sizeof( int ) * r );
  160. m_ppElements = ( MatrixEntry< T >** )malloc( sizeof( MatrixEntry< T >* ) * r );
  161. }
  162. _contiguous = false;
  163. _maxEntriesPerRow = 0;
  164. }
  165. template< class T >
  166. void SparseMatrix< T >::Resize( int r , int e )
  167. {
  168. if( rows>0 )
  169. {
  170. if( !UseAlloc )
  171. if( _contiguous ){ if( _maxEntriesPerRow ) free( m_ppElements[0] ); }
  172. else for( int i=0 ; i<rows ; i++ ){ if( rowSizes[i] ) free( m_ppElements[i] ); }
  173. free( m_ppElements );
  174. free( rowSizes );
  175. }
  176. rows = r;
  177. if( r )
  178. {
  179. rowSizes = ( int* )malloc( sizeof( int ) * r );
  180. memset( rowSizes , 0 , sizeof( int ) * r );
  181. m_ppElements = ( MatrixEntry< T >** )malloc( sizeof( MatrixEntry< T >* ) * r );
  182. m_ppElements[0] = ( MatrixEntry< T >* )malloc( sizeof( MatrixEntry< T > ) * r * e );
  183. for( int i=1 ; i<r ; i++ ) m_ppElements[i] = m_ppElements[i-1] + e;
  184. }
  185. _contiguous = true;
  186. _maxEntriesPerRow = e;
  187. }
  188. template<class T>
  189. void SparseMatrix< T >::SetRowSize( int row , int count )
  190. {
  191. if( _contiguous )
  192. {
  193. if( count>_maxEntriesPerRow ) fprintf( stderr , "[ERROR] Cannot set row size on contiguous matrix: %d<=%d\n" , count , _maxEntriesPerRow ) , exit( 0 );
  194. rowSizes[row] = count;
  195. }
  196. else if( row>=0 && row<rows )
  197. {
  198. if( UseAlloc ) m_ppElements[row] = internalAllocator.newElements(count);
  199. else
  200. {
  201. if( rowSizes[row] ) free( m_ppElements[row] );
  202. if( count>0 ) m_ppElements[row] = ( MatrixEntry< T >* )malloc( sizeof( MatrixEntry< T > ) * count );
  203. }
  204. }
  205. }
  206. template<class T>
  207. void SparseMatrix<T>::SetZero()
  208. {
  209. Resize(this->m_N, this->m_M);
  210. }
  211. template<class T>
  212. void SparseMatrix<T>::SetIdentity()
  213. {
  214. SetZero();
  215. for(int ij=0; ij < Min( this->Rows(), this->Columns() ); ij++)
  216. (*this)(ij,ij) = T(1);
  217. }
  218. template<class T>
  219. SparseMatrix<T> SparseMatrix<T>::operator * (const T& V) const
  220. {
  221. SparseMatrix<T> M(*this);
  222. M *= V;
  223. return M;
  224. }
  225. template<class T>
  226. SparseMatrix<T>& SparseMatrix<T>::operator *= (const T& V)
  227. {
  228. for (int i=0; i<this->Rows(); i++)
  229. {
  230. for(int ii=0;ii<m_ppElements[i].size();i++){m_ppElements[i][ii].Value*=V;}
  231. }
  232. return *this;
  233. }
  234. template<class T>
  235. SparseMatrix<T> SparseMatrix<T>::Multiply( const SparseMatrix<T>& M ) const
  236. {
  237. SparseMatrix<T> R( this->Rows(), M.Columns() );
  238. for(int i=0; i<R.Rows(); i++){
  239. for(int ii=0;ii<m_ppElements[i].size();ii++){
  240. int N=m_ppElements[i][ii].N;
  241. T Value=m_ppElements[i][ii].Value;
  242. for(int jj=0;jj<M.m_ppElements[N].size();jj++){
  243. R(i,M.m_ppElements[N][jj].N) += Value * M.m_ppElements[N][jj].Value;
  244. }
  245. }
  246. }
  247. return R;
  248. }
  249. template<class T>
  250. template<class T2>
  251. Vector<T2> SparseMatrix<T>::Multiply( const Vector<T2>& V ) const
  252. {
  253. Vector<T2> R( rows );
  254. for (int i=0; i<rows; i++)
  255. {
  256. T2 temp=T2();
  257. for(int ii=0;ii<rowSizes[i];ii++){
  258. temp+=m_ppElements[i][ii].Value * V.m_pV[m_ppElements[i][ii].N];
  259. }
  260. R(i)=temp;
  261. }
  262. return R;
  263. }
  264. template<class T>
  265. template<class T2>
  266. void SparseMatrix<T>::Multiply( const Vector<T2>& In , Vector<T2>& Out , int threads ) const
  267. {
  268. #pragma omp parallel for num_threads( threads ) schedule( static )
  269. for( int i=0 ; i<rows ; i++ )
  270. {
  271. T2 temp = T2();
  272. temp *= 0;
  273. for( int j=0 ; j<rowSizes[i] ; j++ ) temp += m_ppElements[i][j].Value * In.m_pV[m_ppElements[i][j].N];
  274. Out.m_pV[i] = temp;
  275. }
  276. }
  277. template<class T>
  278. SparseMatrix<T> SparseMatrix<T>::operator * (const SparseMatrix<T>& M) const
  279. {
  280. return Multiply(M);
  281. }
  282. template<class T>
  283. template<class T2>
  284. Vector<T2> SparseMatrix<T>::operator * (const Vector<T2>& V) const
  285. {
  286. return Multiply(V);
  287. }
  288. template<class T>
  289. SparseMatrix<T> SparseMatrix<T>::Transpose() const
  290. {
  291. SparseMatrix<T> M( this->Columns(), this->Rows() );
  292. for (int i=0; i<this->Rows(); i++)
  293. {
  294. for(int ii=0;ii<m_ppElements[i].size();ii++){
  295. M(m_ppElements[i][ii].N,i) = m_ppElements[i][ii].Value;
  296. }
  297. }
  298. return M;
  299. }
  300. template<class T>
  301. template<class T2>
  302. int SparseMatrix<T>::SolveSymmetric( const SparseMatrix<T>& M , const Vector<T2>& b , int iters , Vector<T2>& solution , const T2 eps , int reset , int threads )
  303. {
  304. if( reset )
  305. {
  306. solution.Resize( b.Dimensions() );
  307. solution.SetZero();
  308. }
  309. Vector< T2 > r;
  310. r.Resize( solution.Dimensions() );
  311. M.Multiply( solution , r );
  312. r = b - r;
  313. Vector< T2 > d = r;
  314. double delta_new , delta_0;
  315. for( int i=0 ; i<r.Dimensions() ; i++ ) delta_new += r.m_pV[i] * r.m_pV[i];
  316. delta_0 = delta_new;
  317. if( delta_new<eps ) return 0;
  318. int ii;
  319. Vector< T2 > q;
  320. q.Resize( d.Dimensions() );
  321. for( ii=0; ii<iters && delta_new>eps*delta_0 ; ii++ )
  322. {
  323. M.Multiply( d , q , threads );
  324. double dDotQ = 0 , alpha = 0;
  325. for( int i=0 ; i<d.Dimensions() ; i++ ) dDotQ += d.m_pV[i] * q.m_pV[i];
  326. alpha = delta_new / dDotQ;
  327. #pragma omp parallel for num_threads( threads ) schedule( static )
  328. for( int i=0 ; i<r.Dimensions() ; i++ ) solution.m_pV[i] += d.m_pV[i] * T2( alpha );
  329. if( !(ii%50) )
  330. {
  331. r.Resize( solution.Dimensions() );
  332. M.Multiply( solution , r , threads );
  333. r = b - r;
  334. }
  335. else
  336. #pragma omp parallel for num_threads( threads ) schedule( static )
  337. for( int i=0 ; i<r.Dimensions() ; i++ ) r.m_pV[i] = r.m_pV[i] - q.m_pV[i] * T2(alpha);
  338. double delta_old = delta_new , beta;
  339. delta_new = 0;
  340. for( int i=0 ; i<r.Dimensions() ; i++ ) delta_new += r.m_pV[i]*r.m_pV[i];
  341. beta = delta_new / delta_old;
  342. #pragma omp parallel for num_threads( threads ) schedule( static )
  343. for( int i=0 ; i<d.Dimensions() ; i++ ) d.m_pV[i] = r.m_pV[i] + d.m_pV[i] * T2( beta );
  344. }
  345. return ii;
  346. }
  347. // Solve for x s.t. M(x)=b by solving for x s.t. M^tM(x)=M^t(b)
  348. template<class T>
  349. int SparseMatrix<T>::Solve(const SparseMatrix<T>& M,const Vector<T>& b,int iters,Vector<T>& solution,const T eps){
  350. SparseMatrix mTranspose=M.Transpose();
  351. Vector<T> bb=mTranspose*b;
  352. Vector<T> d,r,Md;
  353. T alpha,beta,rDotR;
  354. int i;
  355. solution.Resize(M.Columns());
  356. solution.SetZero();
  357. d=r=bb;
  358. rDotR=r.Dot(r);
  359. for(i=0;i<iters && rDotR>eps;i++){
  360. T temp;
  361. Md=mTranspose*(M*d);
  362. alpha=rDotR/d.Dot(Md);
  363. solution+=d*alpha;
  364. r-=Md*alpha;
  365. temp=r.Dot(r);
  366. beta=temp/rDotR;
  367. rDotR=temp;
  368. d=r+d*beta;
  369. }
  370. return i;
  371. }
  372. ///////////////////////////
  373. // SparseSymmetricMatrix //
  374. ///////////////////////////
  375. template<class T>
  376. template<class T2>
  377. Vector<T2> SparseSymmetricMatrix<T>::operator * (const Vector<T2>& V) const {return Multiply(V);}
  378. template<class T>
  379. template<class T2>
  380. Vector<T2> SparseSymmetricMatrix<T>::Multiply( const Vector<T2>& V ) const
  381. {
  382. Vector<T2> R( SparseMatrix<T>::rows );
  383. for(int i=0; i<SparseMatrix<T>::rows; i++){
  384. for(int ii=0;ii<SparseMatrix<T>::rowSizes[i];ii++){
  385. int j=SparseMatrix<T>::m_ppElements[i][ii].N;
  386. R(i)+=SparseMatrix<T>::m_ppElements[i][ii].Value * V.m_pV[j];
  387. R(j)+=SparseMatrix<T>::m_ppElements[i][ii].Value * V.m_pV[i];
  388. }
  389. }
  390. return R;
  391. }
  392. template<class T>
  393. template<class T2>
  394. void SparseSymmetricMatrix<T>::Multiply( const Vector<T2>& In , Vector<T2>& Out , bool addDCTerm ) const
  395. {
  396. Out.SetZero();
  397. const T2* in = &In[0];
  398. T2* out = &Out[0];
  399. T2 dcTerm = T2( 0 );
  400. if( addDCTerm )
  401. {
  402. for( int i=0 ; i<SparseMatrix<T>::rows ; i++ ) dcTerm += in[i];
  403. dcTerm /= SparseMatrix<T>::rows;
  404. }
  405. for( int i=0 ; i<this->SparseMatrix<T>::rows ; i++ )
  406. {
  407. const MatrixEntry<T>* temp = SparseMatrix<T>::m_ppElements[i];
  408. const MatrixEntry<T>* end = temp + SparseMatrix<T>::rowSizes[i];
  409. const T2& in_i_ = in[i];
  410. T2 out_i = T2(0);
  411. for( ; temp!=end ; temp++ )
  412. {
  413. int j=temp->N;
  414. T2 v=temp->Value;
  415. out_i += v * in[j];
  416. out[j] += v * in_i_;
  417. }
  418. out[i] += out_i;
  419. }
  420. if( addDCTerm ) for( int i=0 ; i<SparseMatrix<T>::rows ; i++ ) out[i] += dcTerm;
  421. }
  422. template<class T>
  423. template<class T2>
  424. void SparseSymmetricMatrix<T>::Multiply( const Vector<T2>& In , Vector<T2>& Out , MapReduceVector< T2 >& OutScratch , bool addDCTerm ) const
  425. {
  426. int dim = int( In.Dimensions() );
  427. const T2* in = &In[0];
  428. int threads = OutScratch.threads();
  429. if( addDCTerm )
  430. {
  431. T2 dcTerm = 0;
  432. #pragma omp parallel for num_threads( threads ) reduction ( + : dcTerm )
  433. for( int t=0 ; t<threads ; t++ )
  434. {
  435. T2* out = OutScratch[t];
  436. memset( out , 0 , sizeof( T2 ) * dim );
  437. for( int i=(SparseMatrix<T>::rows*t)/threads ; i<(SparseMatrix<T>::rows*(t+1))/threads ; i++ )
  438. {
  439. const T2& in_i_ = in[i];
  440. T2& out_i_ = out[i];
  441. for( const MatrixEntry< T > *temp = SparseMatrix<T>::m_ppElements[i] , *end = temp+SparseMatrix<T>::rowSizes[i] ; temp!=end ; temp++ )
  442. {
  443. int j = temp->N;
  444. T2 v = temp->Value;
  445. out_i_ += v * in[j];
  446. out[j] += v * in_i_;
  447. }
  448. dcTerm += in_i_;
  449. }
  450. }
  451. dcTerm /= dim;
  452. dim = int( Out.Dimensions() );
  453. T2* out = &Out[0];
  454. #pragma omp parallel for num_threads( threads ) schedule( static )
  455. for( int i=0 ; i<dim ; i++ )
  456. {
  457. T2 _out = dcTerm;
  458. for( int t=0 ; t<threads ; t++ ) _out += OutScratch[t][i];
  459. out[i] = _out;
  460. }
  461. }
  462. else
  463. {
  464. #pragma omp parallel for num_threads( threads )
  465. for( int t=0 ; t<threads ; t++ )
  466. {
  467. T2* out = OutScratch[t];
  468. memset( out , 0 , sizeof( T2 ) * dim );
  469. for( int i=(SparseMatrix<T>::rows*t)/threads ; i<(SparseMatrix<T>::rows*(t+1))/threads ; i++ )
  470. {
  471. const T2& in_i_ = in[i];
  472. T2& out_i_ = out[i];
  473. for( const MatrixEntry< T > *temp = SparseMatrix<T>::m_ppElements[i] , *end = temp+SparseMatrix<T>::rowSizes[i] ; temp!=end ; temp++ )
  474. {
  475. int j = temp->N;
  476. T2 v = temp->Value;
  477. out_i_ += v * in[j];
  478. out[j] += v * in_i_;
  479. }
  480. }
  481. }
  482. dim = int( Out.Dimensions() );
  483. T2* out = &Out[0];
  484. #pragma omp parallel for num_threads( threads ) schedule( static )
  485. for( int i=0 ; i<dim ; i++ )
  486. {
  487. T2 _out = T2(0);
  488. for( int t=0 ; t<threads ; t++ ) _out += OutScratch[t][i];
  489. out[i] = _out;
  490. }
  491. }
  492. }
  493. template<class T>
  494. template<class T2>
  495. void SparseSymmetricMatrix<T>::Multiply( const Vector<T2>& In , Vector<T2>& Out , std::vector< T2* >& OutScratch , const std::vector< int >& bounds ) const
  496. {
  497. int dim = In.Dimensions();
  498. const T2* in = &In[0];
  499. int threads = OutScratch.size();
  500. #pragma omp parallel for num_threads( threads )
  501. for( int t=0 ; t<threads ; t++ )
  502. for( int i=0 ; i<dim ; i++ ) OutScratch[t][i] = T2(0);
  503. #pragma omp parallel for num_threads( threads )
  504. for( int t=0 ; t<threads ; t++ )
  505. {
  506. T2* out = OutScratch[t];
  507. for( int i=bounds[t] ; i<bounds[t+1] ; i++ )
  508. {
  509. const MatrixEntry<T>* temp = SparseMatrix<T>::m_ppElements[i];
  510. const MatrixEntry<T>* end = temp + SparseMatrix<T>::rowSizes[i];
  511. const T2& in_i_ = in[i];
  512. T2& out_i_ = out[i];
  513. for( ; temp!=end ; temp++ )
  514. {
  515. int j = temp->N;
  516. T2 v = temp->Value;
  517. out_i_ += v * in[j];
  518. out[j] += v * in_i_;
  519. }
  520. }
  521. }
  522. T2* out = &Out[0];
  523. #pragma omp parallel for num_threads( threads ) schedule( static )
  524. for( int i=0 ; i<Out.Dimensions() ; i++ )
  525. {
  526. T2& _out = out[i];
  527. _out = T2(0);
  528. for( int t=0 ; t<threads ; t++ ) _out += OutScratch[t][i];
  529. }
  530. }
  531. #ifdef WIN32
  532. #ifndef _AtomicIncrement_
  533. #define _AtomicIncrement_
  534. inline void AtomicIncrement( volatile float* ptr , float addend )
  535. {
  536. float newValue = *ptr;
  537. LONG& _newValue = *( (LONG*)&newValue );
  538. LONG _oldValue;
  539. for( ;; )
  540. {
  541. _oldValue = _newValue;
  542. newValue += addend;
  543. _newValue = InterlockedCompareExchange( (LONG*) ptr , _newValue , _oldValue );
  544. if( _newValue==_oldValue ) break;
  545. }
  546. }
  547. inline void AtomicIncrement( volatile double* ptr , double addend )
  548. //inline void AtomicIncrement( double* ptr , double addend )
  549. {
  550. double newValue = *ptr;
  551. LONGLONG& _newValue = *( (LONGLONG*)&newValue );
  552. LONGLONG _oldValue;
  553. do
  554. {
  555. _oldValue = _newValue;
  556. newValue += addend;
  557. _newValue = InterlockedCompareExchange64( (LONGLONG*) ptr , _newValue , _oldValue );
  558. }
  559. while( _newValue!=_oldValue );
  560. }
  561. #endif // _AtomicIncrement_
  562. template< class T >
  563. void MultiplyAtomic( const SparseSymmetricMatrix< T >& A , const Vector< float >& In , Vector< float >& Out , int threads , const int* partition=NULL )
  564. {
  565. Out.SetZero();
  566. const float* in = &In[0];
  567. float* out = &Out[0];
  568. if( partition )
  569. #pragma omp parallel for num_threads( threads )
  570. for( int t=0 ; t<threads ; t++ )
  571. for( int i=partition[t] ; i<partition[t+1] ; i++ )
  572. {
  573. const MatrixEntry< T >* temp = A[i];
  574. const MatrixEntry< T >* end = temp + A.rowSizes[i];
  575. const float& in_i = in[i];
  576. float out_i = 0.;
  577. for( ; temp!=end ; temp++ )
  578. {
  579. int j = temp->N;
  580. float v = temp->Value;
  581. out_i += v * in[j];
  582. AtomicIncrement( out+j , v * in_i );
  583. }
  584. AtomicIncrement( out+i , out_i );
  585. }
  586. else
  587. #pragma omp parallel for num_threads( threads )
  588. for( int i=0 ; i<A.rows ; i++ )
  589. {
  590. const MatrixEntry< T >* temp = A[i];
  591. const MatrixEntry< T >* end = temp + A.rowSizes[i];
  592. const float& in_i = in[i];
  593. float out_i = 0.f;
  594. for( ; temp!=end ; temp++ )
  595. {
  596. int j = temp->N;
  597. float v = temp->Value;
  598. out_i += v * in[j];
  599. AtomicIncrement( out+j , v * in_i );
  600. }
  601. AtomicIncrement( out+i , out_i );
  602. }
  603. }
  604. template< class T >
  605. void MultiplyAtomic( const SparseSymmetricMatrix< T >& A , const Vector< double >& In , Vector< double >& Out , int threads , const int* partition=NULL )
  606. {
  607. Out.SetZero();
  608. const double* in = &In[0];
  609. double* out = &Out[0];
  610. if( partition )
  611. #pragma omp parallel for num_threads( threads )
  612. for( int t=0 ; t<threads ; t++ )
  613. for( int i=partition[t] ; i<partition[t+1] ; i++ )
  614. {
  615. const MatrixEntry< T >* temp = A[i];
  616. const MatrixEntry< T >* end = temp + A.rowSizes[i];
  617. const double& in_i = in[i];
  618. double out_i = 0.;
  619. for( ; temp!=end ; temp++ )
  620. {
  621. int j = temp->N;
  622. T v = temp->Value;
  623. out_i += v * in[j];
  624. AtomicIncrement( out+j , v * in_i );
  625. }
  626. AtomicIncrement( out+i , out_i );
  627. }
  628. else
  629. #pragma omp parallel for num_threads( threads )
  630. for( int i=0 ; i<A.rows ; i++ )
  631. {
  632. const MatrixEntry< T >* temp = A[i];
  633. const MatrixEntry< T >* end = temp + A.rowSizes[i];
  634. const double& in_i = in[i];
  635. double out_i = 0.;
  636. for( ; temp!=end ; temp++ )
  637. {
  638. int j = temp->N;
  639. T v = temp->Value;
  640. out_i += v * in[j];
  641. AtomicIncrement( out+j , v * in_i );
  642. }
  643. AtomicIncrement( out+i , out_i );
  644. }
  645. }
  646. template< class T >
  647. template< class T2 >
  648. int SparseSymmetricMatrix< T >::SolveAtomic( const SparseSymmetricMatrix< T >& A , const Vector< T2 >& b , int iters , Vector< T2 >& x , T2 eps , int reset , int threads , bool solveNormal )
  649. {
  650. eps *= eps;
  651. int dim = b.Dimensions();
  652. if( reset )
  653. {
  654. x.Resize( dim );
  655. x.SetZero();
  656. }
  657. Vector< T2 > r( dim ) , d( dim ) , q( dim );
  658. Vector< T2 > temp;
  659. if( solveNormal ) temp.Resize( dim );
  660. T2 *_x = &x[0] , *_r = &r[0] , *_d = &d[0] , *_q = &q[0];
  661. const T2* _b = &b[0];
  662. std::vector< int > partition( threads+1 );
  663. {
  664. int eCount = 0;
  665. for( int i=0 ; i<A.rows ; i++ ) eCount += A.rowSizes[i];
  666. partition[0] = 0;
  667. #pragma omp parallel for num_threads( threads )
  668. for( int t=0 ; t<threads ; t++ )
  669. {
  670. int _eCount = 0;
  671. for( int i=0 ; i<A.rows ; i++ )
  672. {
  673. _eCount += A.rowSizes[i];
  674. if( _eCount*threads>=eCount*(t+1) )
  675. {
  676. partition[t+1] = i;
  677. break;
  678. }
  679. }
  680. }
  681. partition[threads] = A.rows;
  682. }
  683. if( solveNormal )
  684. {
  685. MultiplyAtomic( A , x , temp , threads , &partition[0] );
  686. MultiplyAtomic( A , temp , r , threads , &partition[0] );
  687. MultiplyAtomic( A , b , temp , threads , &partition[0] );
  688. #pragma omp parallel for num_threads( threads ) schedule( static )
  689. for( int i=0 ; i<dim ; i++ ) _d[i] = _r[i] = temp[i] - _r[i];
  690. }
  691. else
  692. {
  693. MultiplyAtomic( A , x , r , threads , &partition[0] );
  694. #pragma omp parallel for num_threads( threads ) schedule( static )
  695. for( int i=0 ; i<dim ; i++ ) _d[i] = _r[i] = _b[i] - _r[i];
  696. }
  697. double delta_new = 0 , delta_0;
  698. for( size_t i=0 ; i<dim ; i++ ) delta_new += _r[i] * _r[i];
  699. delta_0 = delta_new;
  700. if( delta_new<eps )
  701. {
  702. fprintf( stderr , "[WARNING] Initial residual too low: %g < %f\n" , delta_new , eps );
  703. return 0;
  704. }
  705. int ii;
  706. for( ii=0; ii<iters && delta_new>eps*delta_0 ; ii++ )
  707. {
  708. if( solveNormal ) MultiplyAtomic( A , d , temp , threads , &partition[0] ) , MultiplyAtomic( A , temp , q , threads , &partition[0] );
  709. else MultiplyAtomic( A , d , q , threads , &partition[0] );
  710. double dDotQ = 0;
  711. for( int i=0 ; i<dim ; i++ ) dDotQ += _d[i] * _q[i];
  712. T2 alpha = T2( delta_new / dDotQ );
  713. #pragma omp parallel for num_threads( threads ) schedule( static )
  714. for( int i=0 ; i<dim ; i++ ) _x[i] += _d[i] * alpha;
  715. if( (ii%50)==(50-1) )
  716. {
  717. r.Resize( dim );
  718. if( solveNormal ) MultiplyAtomic( A , x , temp , threads , &partition[0] ) , MultiplyAtomic( A , temp , r , threads , &partition[0] );
  719. else MultiplyAtomic( A , x , r , threads , &partition[0] );
  720. #pragma omp parallel for num_threads( threads ) schedule( static )
  721. for( int i=0 ; i<dim ; i++ ) _r[i] = _b[i] - _r[i];
  722. }
  723. else
  724. #pragma omp parallel for num_threads( threads ) schedule( static )
  725. for( int i=0 ; i<dim ; i++ ) _r[i] -= _q[i] * alpha;
  726. double delta_old = delta_new;
  727. delta_new = 0;
  728. for( size_t i=0 ; i<dim ; i++ ) delta_new += _r[i] * _r[i];
  729. T2 beta = T2( delta_new / delta_old );
  730. #pragma omp parallel for num_threads( threads ) schedule( static )
  731. for( int i=0 ; i<dim ; i++ ) _d[i] = _r[i] + _d[i] * beta;
  732. }
  733. return ii;
  734. }
  735. #endif // WIN32
  736. template< class T >
  737. template< class T2 >
  738. int SparseSymmetricMatrix< T >::Solve( const SparseSymmetricMatrix<T>& A , const Vector<T2>& b , int iters , Vector<T2>& x , MapReduceVector< T2 >& scratch , T2 eps , int reset , bool addDCTerm , bool solveNormal )
  739. {
  740. int threads = scratch.threads();
  741. eps *= eps;
  742. int dim = int( b.Dimensions() );
  743. Vector< T2 > r( dim ) , d( dim ) , q( dim ) , temp;
  744. if( reset ) x.Resize( dim );
  745. if( solveNormal ) temp.Resize( dim );
  746. T2 *_x = &x[0] , *_r = &r[0] , *_d = &d[0] , *_q = &q[0];
  747. const T2* _b = &b[0];
  748. double delta_new = 0 , delta_0;
  749. if( solveNormal )
  750. {
  751. A.Multiply( x , temp , scratch , addDCTerm ) , A.Multiply( temp , r , scratch , addDCTerm ) , A.Multiply( b , temp , scratch , addDCTerm );
  752. #pragma omp parallel for num_threads( threads ) reduction( + : delta_new )
  753. for( int i=0 ; i<dim ; i++ ) _d[i] = _r[i] = temp[i] - _r[i] , delta_new += _r[i] * _r[i];
  754. }
  755. else
  756. {
  757. A.Multiply( x , r , scratch , addDCTerm );
  758. #pragma omp parallel for num_threads( threads ) reduction ( + : delta_new )
  759. for( int i=0 ; i<dim ; i++ ) _d[i] = _r[i] = _b[i] - _r[i] , delta_new += _r[i] * _r[i];
  760. }
  761. delta_0 = delta_new;
  762. if( delta_new<eps )
  763. {
  764. fprintf( stderr , "[WARNING] Initial residual too low: %g < %f\n" , delta_new , eps );
  765. return 0;
  766. }
  767. int ii;
  768. for( ii=0 ; ii<iters && delta_new>eps*delta_0 ; ii++ )
  769. {
  770. if( solveNormal ) A.Multiply( d , temp , scratch , addDCTerm ) , A.Multiply( temp , q , scratch , addDCTerm );
  771. else A.Multiply( d , q , scratch , addDCTerm );
  772. double dDotQ = 0;
  773. #pragma omp parallel for num_threads( threads ) reduction( + : dDotQ )
  774. for( int i=0 ; i<dim ; i++ ) dDotQ += _d[i] * _q[i];
  775. T2 alpha = T2( delta_new / dDotQ );
  776. double delta_old = delta_new;
  777. delta_new = 0;
  778. if( (ii%50)==(50-1) )
  779. {
  780. #pragma omp parallel for num_threads( threads )
  781. for( int i=0 ; i<dim ; i++ ) _x[i] += _d[i] * alpha;
  782. r.Resize( dim );
  783. if( solveNormal ) A.Multiply( x , temp , scratch , addDCTerm ) , A.Multiply( temp , r , scratch , addDCTerm );
  784. else A.Multiply( x , r , scratch , addDCTerm );
  785. #pragma omp parallel for num_threads( threads ) reduction( + : delta_new )
  786. for( int i=0 ; i<dim ; i++ ) _r[i] = _b[i] - _r[i] , delta_new += _r[i] * _r[i] , _x[i] += _d[i] * alpha;
  787. }
  788. else
  789. #pragma omp parallel for num_threads( threads ) reduction( + : delta_new )
  790. for( int i=0 ; i<dim ; i++ ) _r[i] -= _q[i] * alpha , delta_new += _r[i] * _r[i] , _x[i] += _d[i] * alpha;
  791. T2 beta = T2( delta_new / delta_old );
  792. #pragma omp parallel for num_threads( threads )
  793. for( int i=0 ; i<dim ; i++ ) _d[i] = _r[i] + _d[i] * beta;
  794. }
  795. return ii;
  796. }
  797. template< class T >
  798. template< class T2 >
  799. int SparseSymmetricMatrix<T>::Solve( const SparseSymmetricMatrix<T>& A , const Vector<T2>& b , int iters , Vector<T2>& x , T2 eps , int reset , int threads , bool addDCTerm , bool solveNormal )
  800. {
  801. eps *= eps;
  802. int dim = int( b.Dimensions() );
  803. MapReduceVector< T2 > outScratch;
  804. if( threads<1 ) threads = 1;
  805. if( threads>1 ) outScratch.resize( threads , dim );
  806. if( reset ) x.Resize( dim );
  807. Vector< T2 > r( dim ) , d( dim ) , q( dim );
  808. Vector< T2 > temp;
  809. if( solveNormal ) temp.Resize( dim );
  810. T2 *_x = &x[0] , *_r = &r[0] , *_d = &d[0] , *_q = &q[0];
  811. const T2* _b = &b[0];
  812. double delta_new = 0 , delta_0;
  813. if( solveNormal )
  814. {
  815. if( threads>1 ) A.Multiply( x , temp , outScratch , addDCTerm ) , A.Multiply( temp , r , outScratch , addDCTerm ) , A.Multiply( b , temp , outScratch , addDCTerm );
  816. else A.Multiply( x , temp , addDCTerm ) , A.Multiply( temp , r , addDCTerm ) , A.Multiply( b , temp , addDCTerm );
  817. #pragma omp parallel for num_threads( threads ) reduction( + : delta_new )
  818. for( int i=0 ; i<dim ; i++ ) _d[i] = _r[i] = temp[i] - _r[i] , delta_new += _r[i] * _r[i];
  819. }
  820. else
  821. {
  822. if( threads>1 ) A.Multiply( x , r , outScratch , addDCTerm );
  823. else A.Multiply( x , r , addDCTerm );
  824. #pragma omp parallel for num_threads( threads ) reduction( + : delta_new )
  825. for( int i=0 ; i<dim ; i++ ) _d[i] = _r[i] = _b[i] - _r[i] , delta_new += _r[i] * _r[i];
  826. }
  827. delta_0 = delta_new;
  828. if( delta_new<eps )
  829. {
  830. fprintf( stderr , "[WARNING] Initial residual too low: %g < %f\n" , delta_new , eps );
  831. return 0;
  832. }
  833. int ii;
  834. for( ii=0 ; ii<iters && delta_new>eps*delta_0 ; ii++ )
  835. {
  836. if( solveNormal )
  837. {
  838. if( threads>1 ) A.Multiply( d , temp , outScratch , addDCTerm ) , A.Multiply( temp , q , outScratch , addDCTerm );
  839. else A.Multiply( d , temp , addDCTerm ) , A.Multiply( temp , q , addDCTerm );
  840. }
  841. else
  842. {
  843. if( threads>1 ) A.Multiply( d , q , outScratch , addDCTerm );
  844. else A.Multiply( d , q , addDCTerm );
  845. }
  846. double dDotQ = 0;
  847. #pragma omp parallel for num_threads( threads ) reduction( + : dDotQ )
  848. for( int i=0 ; i<dim ; i++ ) dDotQ += _d[i] * _q[i];
  849. T2 alpha = T2( delta_new / dDotQ );
  850. double delta_old = delta_new;
  851. delta_new = 0;
  852. if( (ii%50)==(50-1) )
  853. {
  854. #pragma omp parallel for num_threads( threads )
  855. for( int i=0 ; i<dim ; i++ ) _x[i] += _d[i] * alpha;
  856. r.SetZero();
  857. if( solveNormal )
  858. {
  859. if( threads>1 ) A.Multiply( x , temp , outScratch , addDCTerm ) , A.Multiply( temp , r , outScratch , addDCTerm );
  860. else A.Multiply( x , temp , addDCTerm ) , A.Multiply( temp , r , addDCTerm );
  861. }
  862. else
  863. {
  864. if( threads>1 ) A.Multiply( x , r , outScratch , addDCTerm );
  865. else A.Multiply( x , r , addDCTerm );
  866. }
  867. #pragma omp parallel for num_threads( threads ) reduction ( + : delta_new )
  868. for( int i=0 ; i<dim ; i++ ) _r[i] = _b[i] - _r[i] , delta_new += _r[i] * _r[i] , _x[i] += _d[i] * alpha;
  869. }
  870. else
  871. {
  872. #pragma omp parallel for num_threads( threads ) reduction( + : delta_new )
  873. for( int i=0 ; i<dim ; i++ ) _r[i] -= _q[i] * alpha , delta_new += _r[i] * _r[i] , _x[i] += _d[i] * alpha;
  874. }
  875. T2 beta = T2( delta_new / delta_old );
  876. #pragma omp parallel for num_threads( threads )
  877. for( int i=0 ; i<dim ; i++ ) _d[i] = _r[i] + _d[i] * beta;
  878. }
  879. return ii;
  880. }
  881. template<class T>
  882. template<class T2>
  883. int SparseSymmetricMatrix<T>::Solve( const SparseSymmetricMatrix<T>& M , const Vector<T2>& diagonal , const Vector<T2>& b , int iters , Vector<T2>& solution , int reset )
  884. {
  885. Vector<T2> d,r,Md;
  886. if(reset)
  887. {
  888. solution.Resize(b.Dimensions());
  889. solution.SetZero();
  890. }
  891. Md.Resize(M.rows);
  892. for( int i=0 ; i<iters ; i++ )
  893. {
  894. M.Multiply( solution , Md );
  895. r = b-Md;
  896. // solution_new[j] * diagonal[j] + ( Md[j] - solution_old[j] * diagonal[j] ) = b[j]
  897. // solution_new[j] = ( b[j] - ( Md[j] - solution_old[j] * diagonal[j] ) ) / diagonal[j]
  898. // solution_new[j] = ( b[j] - Md[j] ) / diagonal[j] + solution_old[j]
  899. for( int j=0 ; j<int(M.rows) ; j++ ) solution[j] += (b[j]-Md[j])/diagonal[j];
  900. }
  901. return iters;
  902. }
  903. template< class T >
  904. template< class T2 >
  905. void SparseSymmetricMatrix< T >::getDiagonal( Vector< T2 >& diagonal ) const
  906. {
  907. diagonal.Resize( SparseMatrix<T>::rows );
  908. for( int i=0 ; i<SparseMatrix<T>::rows ; i++ )
  909. {
  910. diagonal[i] = 0.;
  911. for( int j=0 ; j<SparseMatrix<T>::rowSizes[i] ; j++ ) if( SparseMatrix<T>::m_ppElements[i][j].N==i ) diagonal[i] += SparseMatrix<T>::m_ppElements[i][j].Value * 2;
  912. }
  913. }
  914. }
  915. }