PageRenderTime 28ms CodeModel.GetById 26ms RepoModel.GetById 0ms app.codeStats 0ms

/cf3/math/LSS/Trilinos/TrilinosDetail.cpp

https://github.com/tlmquintino/coolfluid3
C++ | 417 lines | 337 code | 54 blank | 26 comment | 106 complexity | 18fd965d2bcd521af76ed990c0a164ad MD5 | raw file
  1. // Copyright (C) 2010 von Karman Institute for Fluid Dynamics, Belgium
  2. //
  3. // This software is distributed under the terms of the
  4. // GNU Lesser General Public License version 3 (LGPLv3).
  5. // See doc/lgpl.txt and doc/gpl.txt for the license text.
  6. ////////////////////////////////////////////////////////////////////////////////////////////
  7. #include <set>
  8. #include "common/PE/Comm.hpp"
  9. #include "common/PE/CommPattern.hpp"
  10. #include "common/Log.hpp"
  11. #include "math/VariablesDescriptor.hpp"
  12. #include "math/LSS/Trilinos/TrilinosDetail.hpp"
  13. #include "TrilinosVector.hpp"
  14. #include <Epetra_Operator.h>
  15. ////////////////////////////////////////////////////////////////////////////////////////////
  16. /**
  17. @file TrilinosDetail.hpp Shared functions between Trilinos classes
  18. @author Tamas Banyai, Bart Janssens
  19. **/
  20. ////////////////////////////////////////////////////////////////////////////////////////////
  21. namespace cf3 {
  22. namespace math {
  23. namespace LSS {
  24. namespace detail
  25. {
  26. struct GidConverter
  27. {
  28. GidConverter(const std::vector<Uint>& periodic_links_nodes, const std::vector<bool>& periodic_links_active, common::PE::CommPattern& cp)
  29. {
  30. cf3_assert(periodic_links_active.size() == periodic_links_nodes.size());
  31. const Uint nb_nodes = cp.gid()->size();
  32. gids.resize(nb_nodes);
  33. cp.gid()->pack(&gids[0]);
  34. common::PE::Comm& comm = common::PE::Comm::instance();
  35. const Uint nb_procs = comm.size();
  36. const int my_rank = comm.rank();
  37. if(periodic_links_nodes.empty())
  38. {
  39. int nb_local_nodes = 0;
  40. for(int i = 0; i != nb_nodes; ++i)
  41. {
  42. if(cp.isUpdatable()[i])
  43. {
  44. nb_local_nodes++;
  45. }
  46. }
  47. comm.all_reduce(common::PE::plus(), &nb_local_nodes, 1, &global_nb_gid);
  48. }
  49. else // When periodic links are provided, we must skip them in the GID list
  50. {
  51. cf3_assert(periodic_links_active.size() == nb_nodes);
  52. int nb_local_nodes = 0;
  53. int nb_local_periodic = 0;
  54. for(int i = 0; i != nb_nodes; ++i)
  55. {
  56. if(cp.isUpdatable()[i])
  57. {
  58. if(!periodic_links_active[i])
  59. nb_local_nodes++;
  60. else
  61. {
  62. cf3_assert_desc("Periodic link for owned node " + common::to_str(i) + "is on a different process", cp.isUpdatable()[periodic_links_nodes[i]]);
  63. nb_local_periodic++;
  64. }
  65. } else if (periodic_links_active[i])
  66. {
  67. cf3_assert_desc("Periodic link for ghost node " + common::to_str(i) + "is on a my process", !cp.isUpdatable()[periodic_links_nodes[i]]);
  68. }
  69. }
  70. std::vector<int> base_gid_distribution; base_gid_distribution.reserve(nb_procs);
  71. std::vector<int> periodic_gid_distribution; periodic_gid_distribution.reserve(nb_procs);
  72. if(comm.is_active())
  73. {
  74. // Get the total number of elements on each rank
  75. comm.all_gather(nb_local_nodes, base_gid_distribution);
  76. comm.all_gather(nb_local_periodic, periodic_gid_distribution);
  77. }
  78. else
  79. {
  80. base_gid_distribution.push_back(nb_local_nodes);
  81. periodic_gid_distribution.push_back(nb_local_periodic);
  82. }
  83. cf3_assert(base_gid_distribution.size() == nb_procs);
  84. cf3_assert(periodic_gid_distribution.size() == nb_procs);
  85. for(Uint i = 1; i != nb_procs; ++i)
  86. {
  87. base_gid_distribution[i] += base_gid_distribution[i-1];
  88. periodic_gid_distribution[i] += periodic_gid_distribution[i-1];
  89. }
  90. global_nb_gid = base_gid_distribution.back();
  91. // first gid on this rank. We renumber so all GIDs for non-periodic nodes come first
  92. Uint base_gid_counter = my_rank == 0 ? 0 : base_gid_distribution[my_rank-1];
  93. Uint periodic_gid_counter = (my_rank == 0 ? 0 : periodic_gid_distribution[my_rank-1]) + global_nb_gid;
  94. // Renumber gids
  95. for(Uint i = 0; i != nb_nodes; ++i)
  96. {
  97. if(cp.isUpdatable()[i])
  98. {
  99. gids[i] = periodic_links_active[i] ? -1 : base_gid_counter++;
  100. }
  101. }
  102. if(comm.is_active())
  103. {
  104. cp.insert("PeriodicGids", gids);
  105. cp.synchronize("PeriodicGids");
  106. cp.remove_component("PeriodicGids");
  107. }
  108. }
  109. }
  110. inline int operator[](const int i) const
  111. {
  112. cf3_assert(gids[i] >= 0);
  113. return gids[i];
  114. }
  115. std::vector<int> gids;
  116. int global_nb_gid;
  117. };
  118. }
  119. void create_map_data(common::PE::CommPattern& cp, const VariablesDescriptor& variables, std::vector< int >& p2m, std::vector< int >& my_global_elements, std::vector<Uint>& my_ranks, int& num_my_elements, const std::vector<Uint>& periodic_links_nodes, const std::vector<bool>& periodic_links_active)
  120. {
  121. // get global ids vector
  122. const detail::GidConverter gid(periodic_links_nodes, periodic_links_active, cp);
  123. num_my_elements = 0;
  124. const Uint nb_vars = variables.nb_vars();
  125. const Uint total_nb_eq = variables.size();
  126. const Uint nb_nodes_for_rank = cp.isUpdatable().size();
  127. my_global_elements.reserve(nb_nodes_for_rank*total_nb_eq);
  128. my_ranks.reserve(nb_nodes_for_rank*total_nb_eq);
  129. int nb_ghosts = 0;
  130. for(Uint var_idx = 0; var_idx != nb_vars; ++var_idx)
  131. {
  132. const Uint neq = variables.var_length(var_idx);
  133. const Uint var_offset = variables.offset(var_idx);
  134. const int var_start_gid = var_offset * gid.global_nb_gid;
  135. for (int i=0; i<nb_nodes_for_rank; i++)
  136. {
  137. if (cp.isUpdatable()[i] && !(periodic_links_active.size() && periodic_links_active[i]))
  138. {
  139. num_my_elements += neq;
  140. const int start_gid = var_start_gid + gid[i]*neq;
  141. for(int j = 0; j != neq; ++j)
  142. {
  143. my_global_elements.push_back(start_gid+j);
  144. my_ranks.push_back(cp.rank(i));
  145. }
  146. }
  147. else if (!cp.isUpdatable()[i] && !(periodic_links_active.size() && periodic_links_active[i]))
  148. {
  149. nb_ghosts += neq;
  150. }
  151. }
  152. }
  153. // process local to matrix local numbering mapper
  154. const int nb_local_nodes = num_my_elements / total_nb_eq;
  155. nb_ghosts /= total_nb_eq;
  156. p2m.resize(nb_nodes_for_rank*total_nb_eq);
  157. for(Uint var_idx = 0; var_idx != nb_vars; ++var_idx)
  158. {
  159. const Uint neq = variables.var_length(var_idx);
  160. const Uint var_offset = variables.offset(var_idx);
  161. int iupd=nb_local_nodes*var_offset;
  162. int ighost=num_my_elements + nb_ghosts*var_offset;
  163. for (int i=0; i<nb_nodes_for_rank; ++i)
  164. {
  165. const int p_start = i*total_nb_eq+var_offset;
  166. if (cp.isUpdatable()[i] && !(periodic_links_active.size() && periodic_links_active[i]))
  167. {
  168. for(Uint j = 0; j != neq; ++j)
  169. p2m[p_start + j] = iupd++;
  170. }
  171. else if (!cp.isUpdatable()[i] && !(periodic_links_active.size() && periodic_links_active[i]))
  172. {
  173. for(Uint j = 0; j != neq; ++j)
  174. p2m[p_start + j] = ighost++;
  175. }
  176. }
  177. if(!periodic_links_active.empty())
  178. {
  179. for (int i=0; i<nb_nodes_for_rank; ++i)
  180. {
  181. const int p_start = i*total_nb_eq+var_offset;
  182. if(periodic_links_active[i])
  183. {
  184. Uint final_linked_node = periodic_links_nodes[i];
  185. while(periodic_links_active[final_linked_node])
  186. final_linked_node = periodic_links_nodes[final_linked_node];
  187. const int p_linked = final_linked_node*total_nb_eq+var_offset;
  188. for(Uint j = 0; j != neq; ++j)
  189. p2m[p_start + j] = p2m[p_linked + j];
  190. }
  191. }
  192. }
  193. }
  194. // append the ghosts at the end of the element list
  195. for(Uint var_idx = 0; var_idx != nb_vars; ++var_idx)
  196. {
  197. const Uint neq = variables.var_length(var_idx);
  198. const Uint var_offset = variables.offset(var_idx);
  199. const int var_start_gid = var_offset * gid.global_nb_gid;
  200. for (int i=0; i<nb_nodes_for_rank; i++)
  201. {
  202. if (!cp.isUpdatable()[i] && !(periodic_links_active.size() && periodic_links_active[i]))
  203. {
  204. const int start_gid = var_start_gid + gid[i]*neq;
  205. for(int j = 0; j != neq; ++j)
  206. {
  207. my_global_elements.push_back(start_gid+j);
  208. my_ranks.push_back(cp.rank(i));
  209. }
  210. }
  211. }
  212. }
  213. }
  214. void create_indices_per_row(cf3::common::PE::CommPattern& cp,
  215. const VariablesDescriptor& variables,
  216. const std::vector<Uint>& node_connectivity,
  217. const std::vector<Uint>& starting_indices,
  218. const std::vector<int>& p2m,
  219. std::vector<int>& num_indices_per_row,
  220. std::vector<int>& indices_per_row,
  221. const std::vector<Uint>& periodic_links_nodes,
  222. const std::vector<bool>& periodic_links_active
  223. )
  224. {
  225. const Uint nb_vars = variables.nb_vars();
  226. const Uint total_nb_eq = variables.size();
  227. const Uint nb_nodes_for_rank = cp.isUpdatable().size();
  228. cf3_assert(nb_nodes_for_rank+1 == starting_indices.size());
  229. if(periodic_links_active.empty())
  230. {
  231. // Count the entries for each row
  232. Uint total_elements = 0;
  233. for(Uint var_idx = 0; var_idx != nb_vars; ++var_idx)
  234. {
  235. const Uint neq = variables.var_length(var_idx);
  236. for (int i=0; i<nb_nodes_for_rank; i++)
  237. {
  238. if (cp.isUpdatable()[i])
  239. {
  240. for(int j = 0; j != neq; ++j)
  241. {
  242. num_indices_per_row.push_back(total_nb_eq*(starting_indices[i+1]-starting_indices[i]));
  243. total_elements += num_indices_per_row.back();
  244. }
  245. }
  246. }
  247. }
  248. // Store the indices for each row
  249. indices_per_row.reserve(total_elements);
  250. for(Uint var_idx = 0; var_idx != nb_vars; ++var_idx)
  251. {
  252. const Uint neq = variables.var_length(var_idx);
  253. for (int i=0; i<nb_nodes_for_rank; i++)
  254. {
  255. if (cp.isUpdatable()[i])
  256. {
  257. for(int j = 0; j != neq; ++j)
  258. {
  259. const Uint columns_begin = starting_indices[i];
  260. const Uint columns_end = starting_indices[i+1];
  261. for(Uint l = columns_begin; l != columns_end; ++l)
  262. {
  263. const Uint node_idx = node_connectivity[l]*total_nb_eq;
  264. for(int k = 0; k != total_nb_eq; ++k)
  265. {
  266. indices_per_row.push_back(p2m[node_idx+k]);
  267. }
  268. }
  269. }
  270. }
  271. }
  272. }
  273. }
  274. else
  275. {
  276. std::vector< std::vector<Uint> > inverse_periodic_links(nb_nodes_for_rank);
  277. cf3_assert(nb_nodes_for_rank == periodic_links_active.size());
  278. for(Uint i = 0; i != nb_nodes_for_rank; ++i)
  279. {
  280. if(periodic_links_active[i])
  281. {
  282. Uint final_target_node = periodic_links_nodes[i];
  283. while(periodic_links_active[final_target_node])
  284. {
  285. final_target_node = periodic_links_nodes[final_target_node];
  286. }
  287. inverse_periodic_links[final_target_node].push_back(i);
  288. }
  289. }
  290. // With periodic links, we need to make sure each column is only accounted for once, hence the use of a set.
  291. std::vector< std::set<Uint> > indices_per_row_sets;
  292. indices_per_row_sets.reserve(nb_nodes_for_rank);
  293. Uint total_elements = 0;
  294. std::vector<int> row_nodes(8); // 3D full periodic collapses at most 8 nodes
  295. for(Uint var_idx = 0; var_idx != nb_vars; ++var_idx)
  296. {
  297. const Uint neq = variables.var_length(var_idx);
  298. for (int i=0; i<nb_nodes_for_rank; i++)
  299. {
  300. if (cp.isUpdatable()[i] && !periodic_links_active[i])
  301. {
  302. for(int j = 0; j != neq; ++j)
  303. {
  304. indices_per_row_sets.push_back( std::set<Uint>() );
  305. row_nodes[0] = i;
  306. std::copy(inverse_periodic_links[i].begin(), inverse_periodic_links[i].end(), row_nodes.begin()+1);
  307. const Uint nb_row_nodes = 1 + inverse_periodic_links[i].size();
  308. for(int m = 0; m != nb_row_nodes; ++m)
  309. {
  310. const Uint columns_begin = starting_indices[row_nodes[m]];
  311. const Uint columns_end = starting_indices[row_nodes[m]+1];
  312. for(Uint l = columns_begin; l != columns_end; ++l)
  313. {
  314. const Uint node_idx = node_connectivity[l]*total_nb_eq;
  315. for(int k = 0; k != total_nb_eq; ++k)
  316. {
  317. indices_per_row_sets.back().insert(p2m[node_idx+k]);
  318. }
  319. }
  320. }
  321. total_elements += indices_per_row_sets.back().size();
  322. }
  323. }
  324. }
  325. }
  326. indices_per_row.reserve(total_elements);
  327. BOOST_FOREACH(const std::set<Uint>& row_nodes, indices_per_row_sets)
  328. {
  329. num_indices_per_row.push_back(row_nodes.size());
  330. indices_per_row.insert(indices_per_row.end(), row_nodes.begin(), row_nodes.end());
  331. }
  332. }
  333. }
  334. void apply_matrix ( const Epetra_Operator& op, const Handle< Vector >& y, const cf3::Handle< const Vector >& x, const Real alpha, const Real beta )
  335. {
  336. Handle<TrilinosVector> y_tril(y);
  337. Handle<TrilinosVector const> x_tril(x);
  338. if(is_null(y_tril) || is_null(x_tril))
  339. throw common::SetupError(FromHere(), "Trilinos*Matrix::apply must be given TrilinosVector arguments");
  340. cf3_assert(op.OperatorDomainMap().PointSameAs(x_tril->epetra_vector()->Map()));
  341. // Store the old values of y since we need them later on
  342. Teuchos::RCP<Epetra_Vector> temp;
  343. if(beta != 0.)
  344. {
  345. temp = Teuchos::rcp(new Epetra_Vector(*y_tril->epetra_vector()));
  346. }
  347. // Apply the operator
  348. TRILINOS_THROW(op.Apply(*x_tril->epetra_vector(), *y_tril->epetra_vector()));
  349. // Scale the result if needed
  350. if(alpha != 1.)
  351. {
  352. y_tril->epetra_vector()->Scale(alpha);
  353. }
  354. // Add in old values if needed
  355. if(beta != 0.)
  356. {
  357. y_tril->epetra_vector()->Update(beta, *temp, 1.);
  358. }
  359. }
  360. } // namespace LSS
  361. } // namespace math
  362. } // namespace cf3