PageRenderTime 15ms CodeModel.GetById 30ms RepoModel.GetById 0ms app.codeStats 0ms

/rcdkjar/src/org/guha/rcdk/util/Misc.java

http://github.com/rajarshi/cdkr
Java | 368 lines | 264 code | 43 blank | 61 comment | 42 complexity | ed00fae10417b3ff36bc910434465892 MD5 | raw file
  1. /**
  2. *
  3. */
  4. package org.guha.rcdk.util;
  5. import org.guha.rcdk.view.RcdkDepictor;
  6. import org.openscience.cdk.DefaultChemObjectBuilder;
  7. import org.openscience.cdk.aromaticity.CDKHueckelAromaticityDetector;
  8. import org.openscience.cdk.config.IsotopeFactory;
  9. import org.openscience.cdk.config.Isotopes;
  10. import org.openscience.cdk.exception.CDKException;
  11. import org.openscience.cdk.formula.MolecularFormulaGenerator;
  12. import org.openscience.cdk.formula.MolecularFormulaRange;
  13. import org.openscience.cdk.inchi.InChIGenerator;
  14. import org.openscience.cdk.inchi.InChIGeneratorFactory;
  15. import org.openscience.cdk.interfaces.*;
  16. import org.openscience.cdk.io.ISimpleChemObjectReader;
  17. import org.openscience.cdk.io.ReaderFactory;
  18. import org.openscience.cdk.io.SDFWriter;
  19. import org.openscience.cdk.io.SMILESReader;
  20. import org.openscience.cdk.io.listener.PropertiesListener;
  21. import org.openscience.cdk.isomorphism.UniversalIsomorphismTester;
  22. import org.openscience.cdk.layout.StructureDiagramGenerator;
  23. import org.openscience.cdk.silent.SilentChemObjectBuilder;
  24. import org.openscience.cdk.smiles.SmilesGenerator;
  25. import org.openscience.cdk.smsd.Isomorphism;
  26. import org.openscience.cdk.tools.manipulator.AtomContainerManipulator;
  27. import org.openscience.cdk.tools.manipulator.ChemFileManipulator;
  28. import org.openscience.cdk.tools.manipulator.MolecularFormulaManipulator;
  29. import java.io.File;
  30. import java.io.FileReader;
  31. import java.io.FileWriter;
  32. import java.io.IOException;
  33. import java.util.*;
  34. /**
  35. * @author Rajarshi Guha
  36. */
  37. public class Misc {
  38. public Misc() {
  39. }
  40. public static void writeMoleculesInOneFile(IAtomContainer[] molecules,
  41. String filename,
  42. int writeProps) throws Exception {
  43. SDFWriter writer = new SDFWriter(new FileWriter(new File(filename)));
  44. Properties props = new Properties();
  45. props.put("WriteAromaticBondTypes", "true");
  46. if (writeProps == 0) {
  47. props.put("writeProperties", "false");
  48. }
  49. PropertiesListener listener = new PropertiesListener(props);
  50. writer.addChemObjectIOListener(listener);
  51. writer.customizeJob();
  52. for (IAtomContainer molecule : molecules) {
  53. writer.write(molecule);
  54. }
  55. writer.close();
  56. }
  57. public static void writeMolecules(IAtomContainer[] molecules, String prefix, int writeProps) throws Exception {
  58. int counter = 1;
  59. for (IAtomContainer molecule : molecules) {
  60. String filename = prefix + counter + ".sdf";
  61. SDFWriter writer = new SDFWriter(new FileWriter(new File(filename)));
  62. Properties props = new Properties();
  63. props.put("WriteAromaticBondTypes", "true");
  64. if (writeProps == 0) {
  65. props.put("writeProperties", "false");
  66. }
  67. PropertiesListener listener = new PropertiesListener(props);
  68. writer.addChemObjectIOListener(listener);
  69. writer.customizeJob();
  70. writer.write(molecule);
  71. writer.close();
  72. counter += 1;
  73. }
  74. }
  75. public static void setProperty(IAtomContainer molecule, String key, Object value) {
  76. molecule.setProperty(key, value);
  77. }
  78. public static void setProperty(IAtomContainer molecule, String key, int value) {
  79. setProperty(molecule, key, new Integer(value));
  80. }
  81. public static void setProperty(IAtomContainer molecule, String key, double value) {
  82. setProperty(molecule, key, new Double(value));
  83. }
  84. public static Object getProperty(IAtomContainer molecule, String key) {
  85. return molecule.getProperty(key);
  86. }
  87. public static void removeProperty(IAtomContainer molecule, String key) {
  88. molecule.removeProperty(key);
  89. }
  90. /**
  91. * Generates a canonical SMILES string from an IAtomContainer.
  92. * <p/>
  93. * The SMILES output will include aromaticity
  94. *
  95. * @param container The molecule to convert
  96. * @return A SMILES string
  97. */
  98. public static String getSmiles(IAtomContainer container, String type, boolean aromatic, boolean atomClasses) throws CDKException {
  99. SmilesGenerator smigen;
  100. switch (type) {
  101. case "generic":
  102. smigen = SmilesGenerator.generic();
  103. break;
  104. case "unique":
  105. smigen = SmilesGenerator.unique();
  106. break;
  107. case "isomeric":
  108. smigen = SmilesGenerator.isomeric();
  109. break;
  110. default:
  111. smigen = SmilesGenerator.absolute();
  112. break;
  113. }
  114. if (aromatic) smigen = smigen.aromatic();
  115. if (atomClasses) smigen = smigen.withAtomClasses();
  116. return smigen.create(container);
  117. }
  118. /**
  119. * Loads one or more files into IAtomContainer objects.
  120. * <p/>
  121. * This method does not need knowledge of the format since it is autodetected. Note that if aromaticity detection
  122. * or atom typing is specified and fails for a specific molecule, that molecule will be set to <i>null</i>
  123. *
  124. * @param filenames An array of String's containing the filenames of the structures we want to load
  125. * @param doAromaticity If true, then aromaticity perception is performed
  126. * @param doTyping If true, atom typing and configuration is performed. This will use the internal CDK atom
  127. * typing scheme
  128. * @return An array of AtoContainer's
  129. * @throws CDKException if there is an error when reading a file
  130. */
  131. public static IAtomContainer[] loadMolecules(String[] filenames,
  132. boolean doAromaticity,
  133. boolean doTyping,
  134. boolean doIsotopes) throws CDKException, IOException {
  135. Vector<IAtomContainer> v = new Vector<IAtomContainer>();
  136. IChemObjectBuilder builder = DefaultChemObjectBuilder.getInstance();
  137. try {
  138. int i;
  139. int j;
  140. for (i = 0; i < filenames.length; i++) {
  141. File input = new File(filenames[i]);
  142. ReaderFactory readerFactory = new ReaderFactory();
  143. ISimpleChemObjectReader reader = readerFactory.createReader(new FileReader(input));
  144. if (reader == null) { // see if it's a SMI file
  145. if (filenames[i].endsWith(".smi")) {
  146. reader = new SMILESReader(new FileReader(input));
  147. }
  148. }
  149. IChemFile content = (IChemFile) reader.read(builder.newInstance(IChemFile.class));
  150. if (content == null) continue;
  151. List<IAtomContainer> c = ChemFileManipulator.getAllAtomContainers(content);
  152. // we should do this loop in case we have files
  153. // that contain multiple molecules
  154. v.addAll(c);
  155. }
  156. } catch (Exception e) {
  157. e.printStackTrace();
  158. throw new CDKException(e.toString());
  159. }
  160. // convert the vector to a simple array
  161. IAtomContainer[] retValues = new IAtomContainer[v.size()];
  162. for (int i = 0; i < v.size(); i++) {
  163. retValues[i] = v.get(i);
  164. }
  165. if (doTyping) {
  166. for (int i = 0; i < retValues.length; i++) {
  167. try {
  168. AtomContainerManipulator.percieveAtomTypesAndConfigureAtoms(retValues[i]);
  169. } catch (CDKException e) {
  170. retValues[i] = null;
  171. }
  172. }
  173. }
  174. // before returning, lets make see if we
  175. // need to perceive aromaticity and atom typing
  176. if (doAromaticity) {
  177. for (int i = 0; i < retValues.length; i++) {
  178. try {
  179. CDKHueckelAromaticityDetector.detectAromaticity(retValues[i]);
  180. } catch (CDKException e) {
  181. retValues[i] = null;
  182. }
  183. }
  184. }
  185. if (doIsotopes) {
  186. Isotopes ifac = Isotopes.getInstance();
  187. for (IAtomContainer retValue : retValues) {
  188. ifac.configureAtoms(retValue);
  189. }
  190. }
  191. return retValues;
  192. }
  193. public static IAtomContainer getMoleculeWithCoordinates(IAtomContainer molecule) throws Exception {
  194. StructureDiagramGenerator sdg = new StructureDiagramGenerator();
  195. sdg.setMolecule(molecule);
  196. sdg.generateCoordinates();
  197. return sdg.getMolecule();
  198. }
  199. public static IAtomContainer getMcsAsNewContainerUIT(IAtomContainer mol1, IAtomContainer mol2) throws CDKException, CloneNotSupportedException {
  200. UniversalIsomorphismTester uit = new UniversalIsomorphismTester();
  201. List<IAtomContainer> overlaps = uit.getOverlaps(mol1, mol2);
  202. int maxmcss = -9999999;
  203. IAtomContainer maxac = null;
  204. for (IAtomContainer ac : overlaps) {
  205. if (ac.getAtomCount() > maxmcss) {
  206. maxmcss = ac.getAtomCount();
  207. maxac = ac;
  208. }
  209. }
  210. return maxac;
  211. }
  212. public static IAtomContainer getMcsAsNewContainer(IAtomContainer mol1, IAtomContainer mol2) throws CDKException, CloneNotSupportedException {
  213. Isomorphism mcs = new Isomorphism(org.openscience.cdk.smsd.interfaces.Algorithm.DEFAULT, true);
  214. mcs.init(mol1, mol2, true, true);
  215. mcs.setChemFilters(true, true, true);
  216. mol1 = mcs.getReactantMolecule();
  217. mol2 = mcs.getProductMolecule();
  218. if (mol1 == null || mol2 == null || mcs.getFirstMapping() == null)
  219. return(null);
  220. IAtomContainer mcsmolecule = DefaultChemObjectBuilder.getInstance().newInstance(IAtomContainer.class, mol1);
  221. List<IAtom> atomsToBeRemoved = new ArrayList<IAtom>();
  222. for (IAtom atom : mcsmolecule.atoms()) {
  223. int index = mcsmolecule.getAtomNumber(atom);
  224. if (!mcs.getFirstMapping().containsKey(index)) {
  225. atomsToBeRemoved.add(atom);
  226. }
  227. }
  228. for (IAtom atom : atomsToBeRemoved) {
  229. mcsmolecule.removeAtomAndConnectedElectronContainers(atom);
  230. }
  231. return mcsmolecule;
  232. }
  233. public static int[][] getMcsAsAtomIndexMapping(IAtomContainer mol1, IAtomContainer mol2) throws CDKException {
  234. Isomorphism mcs = new Isomorphism(org.openscience.cdk.smsd.interfaces.Algorithm.DEFAULT, true);
  235. mcs.init(mol1, mol2, true, true);
  236. mcs.setChemFilters(true, true, true);
  237. int mcsSize = mcs.getFirstMapping().size();
  238. int[][] mapping = new int[mcsSize][2];
  239. int i = 0;
  240. for (Map.Entry map : mcs.getFirstMapping().entrySet()) {
  241. mapping[i][0] = (Integer) map.getKey();
  242. mapping[i][1] = (Integer) map.getValue();
  243. i++;
  244. }
  245. return mapping;
  246. }
  247. public static String getInChi(IAtomContainer mol) throws CDKException {
  248. InChIGeneratorFactory factory = InChIGeneratorFactory.getInstance();
  249. factory.setIgnoreAromaticBonds(true);
  250. InChIGenerator gen = factory.getInChIGenerator(mol);
  251. return gen.getInchi();
  252. }
  253. public static String getInChiKey(IAtomContainer mol) throws CDKException {
  254. InChIGeneratorFactory factory = InChIGeneratorFactory.getInstance();
  255. factory.setIgnoreAromaticBonds(true);
  256. InChIGenerator gen = factory.getInChIGenerator(mol);
  257. return gen.getInchiKey();
  258. }
  259. /**
  260. * Returns a depictor with default settings.
  261. *
  262. * @return A {@link RcdkDepictor} object with default values.
  263. * @throws IOException
  264. */
  265. public static RcdkDepictor getDefaultDepictor() throws IOException {
  266. return new RcdkDepictor(300, 300, 1.3, "cow", "off", "on", true, false, 100, "");
  267. }
  268. /**
  269. * Construct {@link MolecularFormulaRange} object from a text representation of ranges.
  270. * Parts of the code lifted from https://github.com/cdk/cdk-paper-3/blob/master/formula_generator_benchmark/CDK/CDKFormulaGeneratorCLI.java
  271. *
  272. * @param ranges An array of range strings, of the form <code>X min max</code>, where
  273. * <code>X</code> is the element symbol, <code>min</code> is the minimum
  274. * number of this element and <code>max</code> is the maximum
  275. * @return A {@link MolecularFormulaRange} object, other <code>null</code> if any error occurs
  276. */
  277. public static MolecularFormulaRange getMFRange(String[] ranges) throws IOException {
  278. if (ranges == null)
  279. return (null);
  280. IsotopeFactory ifac = Isotopes.getInstance();
  281. MolecularFormulaRange mfRange = new MolecularFormulaRange();
  282. for (String rstr : ranges) {
  283. String[] toks = rstr.split(" ");
  284. if (toks.length != 3)
  285. throw new IllegalArgumentException("Each range string must have three elements");
  286. String element = toks[0];
  287. int min = Integer.parseInt(toks[1]);
  288. int max = Integer.parseInt(toks[2]);
  289. IIsotope i;
  290. if (element.equals("D"))
  291. i = ifac.getIsotope("H", 2);
  292. else
  293. i = ifac.getMajorIsotope(element);
  294. mfRange.addIsotope(i, min, max);
  295. }
  296. return mfRange;
  297. }
  298. public static void main(String[] args) throws Exception, CloneNotSupportedException, IOException {
  299. // IAtomContainer[] mols = Misc.loadMolecules(new String[]{"/Users/guhar/Downloads/Benzene.sdf"}, true, true, true);
  300. //
  301. // SmilesParser sp = new SmilesParser(DefaultChemObjectBuilder.getInstance());
  302. //
  303. // IAtomContainer mol1 = sp.parseSmiles("c1cccc(COC(=O)NC(CC(C)C)C(=O)NC(CCc2ccccc2)C(=O)COC)c1");
  304. // IAtomContainer mol2 = sp.parseSmiles("c1cccc(COC(=O)NC(CC(C)C)C(=O)NCC#N)c1");
  305. // CDKHueckelAromaticityDetector.detectAromaticity(mol1);
  306. // CDKHueckelAromaticityDetector.detectAromaticity(mol2);
  307. // AtomContainerManipulator.percieveAtomTypesAndConfigureAtoms(mol2);
  308. // AtomContainerManipulator.percieveAtomTypesAndConfigureAtoms(mol1);
  309. // int[][] map = getMcsAsAtomIndexMapping(mol1, mol2);
  310. // for (int i = 0; i < map.length; i++) {
  311. // System.out.println(map[i][0] + " <-> " + map[i][1]);
  312. // }
  313. String[] ranges = new String[]{"C 0 50", "N 0 20", "O 0 20", "H 0 50"};
  314. MolecularFormulaRange mfr = getMFRange(ranges);
  315. IChemObjectBuilder builder = SilentChemObjectBuilder.getInstance();
  316. double mass = 300;
  317. double tol = 5e-3;
  318. MolecularFormulaGenerator gen = new MolecularFormulaGenerator(builder, mass - tol, mass + tol, mfr);
  319. IMolecularFormula formula;
  320. while ((formula = gen.getNextFormula()) != null) {
  321. String formulaString = MolecularFormulaManipulator.getString(formula);
  322. System.out.println(formulaString);
  323. }
  324. }
  325. }