PageRenderTime 25ms CodeModel.GetById 21ms RepoModel.GetById 1ms app.codeStats 0ms

/gnome-chemistry-utils-0.13.91/libs/gcu/molecule.cc

#
C++ | 521 lines | 451 code | 38 blank | 32 comment | 120 complexity | f512b14b6ee338f71734556e2a01ca8d MD5 | raw file
Possible License(s): GPL-3.0, CC-BY-SA-4.0, GPL-2.0
  1. // -*- C++ -*-
  2. /*
  3. * Gnome Chemistry Utils
  4. * molecule.cc
  5. *
  6. * Copyright (C) 2001-2011 Jean Br?Šfort <jean.brefort@normalesup.org>
  7. *
  8. * This program is free software; you can redistribute it and/or
  9. * modify it under the terms of the GNU General Public License as
  10. * published by the Free Software Foundation; either version 3 of the
  11. * License, or (at your option) any later version.
  12. *
  13. * This program is distributed in the hope that it will be useful,
  14. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  15. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  16. * GNU General Public License for more details.
  17. *
  18. * You should have received a copy of the GNU General Public License
  19. * along with this program; if not, write to the Free Software
  20. * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301
  21. * USA
  22. */
  23. #include "config.h"
  24. #include "molecule.h"
  25. #include "application.h"
  26. #include "atom.h"
  27. #include "bond.h"
  28. #include "chain.h"
  29. #include "cycle.h"
  30. #include "document.h"
  31. #include "formula.h"
  32. #include "residue.h"
  33. #include <gsf/gsf-output-memory.h>
  34. #include <glib/gi18n-lib.h>
  35. #include <stack>
  36. #include <sstream>
  37. using namespace std;
  38. namespace gcu
  39. {
  40. Molecule::Molecule (TypeId Type, ContentType ct): Object (Type)
  41. {
  42. SetId ("m1");
  43. m_Content = ct;
  44. }
  45. Molecule::Molecule (Atom* pAtom, ContentType ct): Object (MoleculeType)
  46. {
  47. SetId ("m1");
  48. m_Content = ct;
  49. SetParent (pAtom->GetDocument ());
  50. AddAtom (pAtom);
  51. Chain* pChain = new Chain (this, pAtom); //will find the cycles
  52. delete pChain;
  53. }
  54. Molecule::~Molecule ()
  55. {
  56. Clear ();
  57. }
  58. void Molecule::Clear ()
  59. {
  60. std::list<Bond*>::iterator n, end = m_Bonds.end ();
  61. for (n = m_Bonds.begin (); n != end; n++)
  62. (*n)->RemoveAllCycles ();
  63. while (!m_Cycles.empty ()) {
  64. delete m_Cycles.front ();
  65. m_Cycles.pop_front ();
  66. }
  67. while (!m_Chains.empty ()) {
  68. delete m_Chains.front ();
  69. m_Chains.pop_front ();
  70. }
  71. }
  72. void Molecule::AddChild (Object* object)
  73. {
  74. switch (object->GetType ()) {
  75. case AtomType: {
  76. Atom *atom = reinterpret_cast<Atom *> (object);
  77. AddAtom (atom);
  78. break;
  79. }
  80. case gcu::BondType: {
  81. Bond *bond = reinterpret_cast<Bond *> (object);
  82. m_Bonds.remove (bond);
  83. AddBond (bond);
  84. break;
  85. }
  86. default:
  87. Object::AddChild (object);
  88. }
  89. }
  90. void Molecule::AddAtom (Atom* pAtom)
  91. {
  92. m_Atoms.remove (pAtom); // avoid duplicates
  93. m_Atoms.push_back (pAtom);
  94. Object::AddChild (pAtom);
  95. }
  96. void Molecule::AddBond (Bond* pBond)
  97. {
  98. m_Bonds.remove (pBond); // avoid duplicates
  99. m_Bonds.push_back (pBond);
  100. Object::AddChild (pBond);
  101. }
  102. void Molecule::Remove (gcu::Object* pObject)
  103. {
  104. switch (pObject->GetType ()) {
  105. case AtomType:
  106. m_Atoms.remove ((Atom*) pObject);
  107. break;
  108. case gcu::BondType:
  109. m_Bonds.remove ((Bond*) pObject);
  110. break;
  111. }
  112. pObject->SetParent (GetParent ());
  113. }
  114. void Molecule::UpdateCycles (Bond* pBond) //FIXME: function not totally implemented
  115. {
  116. Chain* pChain = new Chain (this, pBond); //will find the cycles
  117. delete pChain;
  118. }
  119. void Molecule::ClearCycles ()
  120. {
  121. std::list<Bond*>::iterator n, end = m_Bonds.end ();
  122. for (n = m_Bonds.begin (); n != end; n++)
  123. (*n)->RemoveAllCycles ();
  124. while (!m_Cycles.empty()) {
  125. delete m_Cycles.front ();
  126. m_Cycles.pop_front ();
  127. }
  128. }
  129. void Molecule::UpdateCycles ()
  130. {
  131. Lock (true);
  132. ClearCycles ();
  133. if (!m_Atoms.empty()) {
  134. std::list<Atom*>::iterator i = m_Atoms.begin (), end = m_Atoms.end ();
  135. i++;
  136. for (; i != end; i++)
  137. (*i)->SetParent(NULL);
  138. Chain* pChain = new Chain (this, *(m_Atoms.begin ())); //will find the cycles
  139. delete pChain;
  140. end = m_Atoms.end ();
  141. list<Atom*> orphans;
  142. for (i = m_Atoms.begin (); i != end; i++)
  143. if ((*i)->GetParent () == NULL)
  144. orphans.push_back (*i);
  145. end = orphans.end ();
  146. for (i = orphans.begin (); i != end; i++)
  147. (*i)->SetParent (this);
  148. }
  149. Lock (false);
  150. }
  151. bool Molecule::operator== (Molecule const& molecule) const
  152. {
  153. // first examine each atom of each molecule and sort by Z.
  154. map<int, set<Atom*> > atoms1, atoms2;
  155. list<Atom*>::const_iterator ia, enda = m_Atoms.end ();
  156. for (ia = m_Atoms.begin (); ia != enda; ia++)
  157. atoms1[(*ia)->GetZ ()].insert (*ia);
  158. enda = molecule.m_Atoms.end ();
  159. for (ia = molecule.m_Atoms.begin (); ia != enda; ia++){
  160. atoms2[(*ia)->GetZ ()].insert (*ia);
  161. }
  162. if (atoms1.size () != atoms2.size ())
  163. return false;
  164. map<int, set<Atom*> >::iterator ib, endb = atoms1.end (), ic, endc = atoms2.end ();
  165. unsigned n = m_Atoms.size (), m;
  166. int z = 200;
  167. for (ib = atoms1.begin (); ib != endb; ib++) {
  168. if ((ic = atoms2.find ((*ib).first)) == endc)
  169. return false;
  170. if ((m = (*ib).second.size ()) != (*ib).second.size ())
  171. return false;
  172. if (m < n)
  173. n = m;
  174. if (m == n && (*ib).first < z)
  175. z = (*ib).first;
  176. }
  177. AtomMatchState state;
  178. state.atoms.reserve (GetAtomsNumber ());
  179. if (z == 200) {
  180. return false; // should do something more meaningful the molecule contains no normal atoms, only groups, probably
  181. }
  182. Atom *atom = *atoms1[z].begin (); // take the firts atom, and try to match it with the other molecule atoms of same Z.
  183. set<Atom*> &starters = atoms2[z];
  184. set<Atom*>::iterator j, jend = starters.end ();
  185. for (j = starters.begin (); j != jend; j++)
  186. if (atom->Match (*j, state))
  187. return true;
  188. return false;
  189. }
  190. Molecule *Molecule::MoleculeFromFormula (Document *Doc, Formula const &formula, bool add_pseudo)
  191. {
  192. Application *app = Doc->GetApp ();
  193. Molecule *mol = reinterpret_cast <Molecule*> (app->CreateObject ("molecule", Doc));
  194. if (!mol)
  195. return NULL;
  196. stack <Atom*> atoms;
  197. Atom *atom = NULL;
  198. list<FormulaElt *> const &elts = formula.GetElements ();
  199. list<FormulaElt *>::const_reverse_iterator i, iend = elts.rend ();
  200. FormulaAtom *fatom;
  201. FormulaResidue *fresidue;
  202. int valence, in;
  203. unsigned PendingHs = 0;
  204. stack<Atom*> PendingAtoms;
  205. Bond *bond;
  206. bool done = false;
  207. for (i = elts.rbegin (); i != iend; i++) {
  208. if (done) {
  209. g_warning ("Can't interpret formula");
  210. // destroy the molecule
  211. mol->SetParent (NULL);
  212. delete mol;
  213. return NULL;
  214. }
  215. if ((fatom = dynamic_cast <FormulaAtom *> (*i))) {
  216. valence = fatom->GetValence ();
  217. if (valence == 1) {
  218. if (fatom->elt == 1)
  219. PendingHs += fatom->stoich;
  220. else for (in = 0; in < fatom->stoich; in++) {
  221. atom = reinterpret_cast <Atom*> (CreateObject ("atom", mol));
  222. atom->SetZ (fatom->elt);
  223. PendingAtoms.push (atom);
  224. }
  225. } else {
  226. int n = valence - PendingHs - PendingAtoms.size ();
  227. if (n == 1) {
  228. atom = reinterpret_cast <Atom*> (CreateObject ("atom", mol));
  229. atom->SetZ (fatom->elt);
  230. while (!PendingAtoms.empty ()) {
  231. // FIXME: we do not support multiple bonds !!!
  232. bond = reinterpret_cast <Bond*> (CreateObject ("bond", mol));
  233. bond->SetOrder (1);
  234. bond->ReplaceAtom (NULL, atom);
  235. bond->ReplaceAtom (NULL, PendingAtoms.top ());
  236. PendingAtoms.top ()->AddBond (bond);
  237. PendingAtoms.pop ();
  238. }
  239. PendingAtoms.push (atom);
  240. PendingHs = 0;
  241. } else if (n == 0) {
  242. atom = reinterpret_cast <Atom*> (CreateObject ("atom", mol));
  243. atom->SetZ (fatom->elt);
  244. while (!PendingAtoms.empty ()) {
  245. // FIXME: we do not support multiple bonds !!!
  246. bond = reinterpret_cast <Bond*> (CreateObject ("bond", mol));
  247. bond->SetOrder (1);
  248. bond->ReplaceAtom (NULL, atom);
  249. bond->ReplaceAtom (NULL, PendingAtoms.top ());
  250. PendingAtoms.top ()->AddBond (bond);
  251. PendingAtoms.pop ();
  252. }
  253. done = true;
  254. }
  255. }
  256. } else if ((fresidue = dynamic_cast <FormulaResidue *> (*i))) {
  257. // get the residue molecule and duplicate it
  258. map<Atom*, Atom*> Corr;
  259. Residue const *residue = Doc->GetResidue (fresidue->Symbol.c_str ());
  260. Molecule const *orig = residue->GetMolecule ();
  261. Atom *pseudo = NULL;
  262. std::list<Atom*>::const_iterator ai, aiend = orig->m_Atoms.end ();
  263. for (in = 0; in < fresidue->stoich; in++) {
  264. for (ai = orig->m_Atoms.begin (); ai != aiend; ai++) {
  265. if ((*ai)->GetZ () == 0)
  266. pseudo = atom = reinterpret_cast <Atom*> (CreateObject ("pseudo-atom", mol));
  267. else {
  268. atom = reinterpret_cast <Atom*> (CreateObject ("atom", mol));
  269. *atom = *(*ai);
  270. }
  271. atom->SetId ("a1");
  272. Corr[*ai] = atom;
  273. atom->SetId ("a1");
  274. }
  275. std::list<Bond*>::const_iterator bi, biend = orig->m_Bonds.end ();
  276. for (bi = orig->m_Bonds.begin (); bi != biend; bi++) {
  277. // stereochemistry is lost for now
  278. bond = reinterpret_cast <Bond*> (CreateObject ("bond", mol));
  279. bond->SetId ("b1");
  280. bond->SetOrder ((*bi)->GetOrder ());
  281. bond->ReplaceAtom (NULL, Corr[(*bi)->GetAtom (0)]);
  282. bond->ReplaceAtom (NULL, Corr[(*bi)->GetAtom (1)]);
  283. Corr[(*bi)->GetAtom (1)]->AddBond (bond);
  284. }
  285. // remove the pseudo-atom
  286. // FIXME: we drop the orientation of the bond and the positionof the pseudo-atom
  287. // which will make 2D autogeneration problematic
  288. map<Atom*, Bond*>::iterator ci;
  289. bond = pseudo->GetFirstBond (ci);
  290. atom = bond->GetAtom (pseudo);
  291. if (atom) {
  292. PendingAtoms.push (atom);
  293. atom->RemoveBond (bond);
  294. }
  295. mol->Remove (bond);
  296. delete bond;
  297. mol->Remove (pseudo);
  298. delete pseudo;
  299. }
  300. } else {
  301. // FIXME: need to support blocks as well
  302. mol->SetParent (NULL); // ensure children wil be destroyed
  303. delete mol;
  304. return NULL;
  305. }
  306. }
  307. if (add_pseudo) {
  308. if (PendingHs + PendingAtoms.size () != 1) {
  309. mol->SetParent (NULL); // ensure children wil be destroyed
  310. delete mol;
  311. return NULL;
  312. }
  313. atom = reinterpret_cast <Atom*> (app->CreateObject ("pseudo-atom", mol));
  314. bond = reinterpret_cast <Bond*> (app->CreateObject ("bond", mol));
  315. bond->SetOrder (1);
  316. bond->ReplaceAtom (NULL, atom);
  317. if (PendingAtoms.size () > 0)
  318. atom = PendingAtoms.top ();
  319. else {
  320. atom = reinterpret_cast <Atom*> (app->CreateObject ("atom", mol));
  321. atom->SetZ (1);
  322. }
  323. bond->ReplaceAtom (NULL, atom);
  324. atom->AddBond (bond);
  325. PendingAtoms.pop ();
  326. } else if (PendingHs + PendingAtoms.size () == 2) {
  327. bond = reinterpret_cast <Bond*> (app->CreateObject ("bond", mol));
  328. bond->ReplaceAtom (NULL, PendingAtoms.top ());
  329. PendingAtoms.pop ();
  330. bond->ReplaceAtom (NULL, PendingAtoms.top ());
  331. } else if (PendingHs + PendingAtoms.size () != 0) {
  332. mol->SetParent (NULL); // ensure children wil be destroyed
  333. delete mol;
  334. return NULL;
  335. }
  336. return mol;
  337. }
  338. Atom *Molecule::GetFirstAtom (std::list < Atom * >::iterator &i)
  339. {
  340. i = m_Atoms.begin ();
  341. return (i == m_Atoms.end ())? NULL: *i;
  342. }
  343. Atom const *Molecule::GetFirstAtom (std::list < Atom * >::const_iterator &i) const
  344. {
  345. i = m_Atoms.begin ();
  346. return (i == m_Atoms.end ())? NULL: *i;
  347. }
  348. Atom *Molecule::GetNextAtom (std::list < Atom* >::iterator &i)
  349. {
  350. i++;
  351. return (i == m_Atoms.end ())? NULL: *i;
  352. }
  353. Atom const *Molecule::GetNextAtom (std::list < Atom * >::const_iterator &i) const
  354. {
  355. i++;
  356. return (i == m_Atoms.end ())? NULL: *i;
  357. }
  358. Bond const *Molecule::GetFirstBond (std::list<Bond*>::const_iterator &i) const
  359. {
  360. i = m_Bonds.begin ();
  361. return (i == m_Bonds.end ())? NULL: *i;
  362. }
  363. Bond const *Molecule::GetNextBond(std::list<Bond*>::const_iterator &i) const
  364. {
  365. i++;
  366. return (i == m_Bonds.end ())? NULL: *i;
  367. }
  368. void Molecule::SetName (char const *name, char const *convention)
  369. {
  370. if (!convention)
  371. convention = "Unknown";
  372. m_Names[convention] = name;
  373. }
  374. char const *Molecule::GetName (char const *convention)
  375. {
  376. if (m_Names.empty ())
  377. return NULL;
  378. if (!convention)
  379. return (*m_Names.begin()).second.c_str ();
  380. map <string, string>::iterator it = m_Names.find (convention);
  381. return (it != m_Names.end ())? (*it).second.c_str (): NULL;
  382. }
  383. std::string Molecule::Name ()
  384. {
  385. return _("Molecule");
  386. }
  387. std::string const &Molecule::GetCML ()
  388. {
  389. if (m_CML.length () == 0) {
  390. GsfOutput *output = gsf_output_memory_new ();
  391. GetDocument ()->GetApp ()->Save (output, "chemical/x-cml", this, m_Content);
  392. size_t l = gsf_output_size (output);
  393. if (l > 0)
  394. m_CML.assign (reinterpret_cast <char const *> (gsf_output_memory_get_bytes (GSF_OUTPUT_MEMORY (output))), l);
  395. g_object_unref (output);
  396. }
  397. return m_CML;
  398. }
  399. std::string const &Molecule::GetInChI ()
  400. {
  401. if (m_InChI.length () == 0) {
  402. if (m_CML.length () == 0)
  403. GetCML ();
  404. GsfOutput *output = gsf_output_memory_new ();
  405. GetDocument ()->GetApp ()->ConvertFromCML (m_CML.c_str (), output, "inchi");
  406. size_t l = gsf_output_size (output);
  407. if (l > 0) {
  408. char const *res = reinterpret_cast <char const *> (gsf_output_memory_get_bytes (GSF_OUTPUT_MEMORY (output)));
  409. while (res[l - 1] < ' ')
  410. l--;
  411. m_InChI.assign (res, l);
  412. }
  413. g_object_unref (output);
  414. }
  415. return m_InChI;
  416. }
  417. std::string const &Molecule::GetInChIKey ()
  418. {
  419. if (m_InChIKey.length () == 0) {
  420. if (m_CML.length () == 0)
  421. GetCML ();
  422. GsfOutput *output = gsf_output_memory_new ();
  423. GetDocument ()->GetApp ()->ConvertFromCML (m_CML.c_str (), output, "inchi", "-xK");
  424. size_t l = gsf_output_size (output);
  425. if (l > 0) {
  426. char const *res = reinterpret_cast <char const *> (gsf_output_memory_get_bytes (GSF_OUTPUT_MEMORY (output)));
  427. while (res[l - 1] < ' ')
  428. l--;
  429. m_InChIKey.assign (res, l);
  430. }
  431. g_object_unref (output);
  432. }
  433. return m_InChIKey;
  434. }
  435. std::string const &Molecule::GetSMILES ()
  436. {
  437. if (m_SMILES.length () == 0) {
  438. if (m_CML.length () == 0)
  439. GetCML ();
  440. GsfOutput *output = gsf_output_memory_new ();
  441. GetDocument ()->GetApp ()->ConvertFromCML (m_CML.c_str (), output, "can");
  442. size_t l = gsf_output_size (output);
  443. if (l > 0) {
  444. char const *res = reinterpret_cast <char const *> (gsf_output_memory_get_bytes (GSF_OUTPUT_MEMORY (output)));
  445. while (res[l - 1] < ' ')
  446. l--;
  447. m_SMILES.assign (res, l);
  448. }
  449. g_object_unref (output);
  450. }
  451. return m_SMILES;
  452. }
  453. void Molecule::ResetIndentifiers ()
  454. {
  455. m_CML.clear ();
  456. m_InChI.clear ();
  457. m_InChIKey.clear ();
  458. m_SMILES.clear ();
  459. }
  460. std::string Molecule::GetRawFormula () const
  461. {
  462. std::ostringstream ofs;
  463. map<string, int> elts;
  464. list<Atom*>::const_iterator ia, enda = m_Atoms.end ();
  465. for (ia = m_Atoms.begin(); ia != enda; ia++) {
  466. if ((*ia)->GetZ () == 0)
  467. continue;
  468. elts[(*ia)->GetSymbol ()]++;
  469. }
  470. if (elts["C"] > 0) {
  471. ofs << "C" << elts["C"];
  472. elts.erase ("C");
  473. }
  474. if (elts["H"] > 0) {
  475. ofs << "H" << elts["H"];
  476. elts.erase ("H");
  477. }
  478. map<string, int>::iterator is, isend = elts.end ();
  479. for (is = elts.begin (); is != isend; is++)
  480. ofs << (*is).first << (*is).second;
  481. return ofs.str ();
  482. }
  483. } //namespace gcu