PageRenderTime 186ms CodeModel.GetById 70ms app.highlight 49ms RepoModel.GetById 61ms app.codeStats 1ms

/rcdkjar/src/org/guha/rcdk/util/Misc.java

http://github.com/rajarshi/cdkr
Java | 368 lines | 264 code | 43 blank | 61 comment | 42 complexity | ed00fae10417b3ff36bc910434465892 MD5 | raw file
  1/**
  2 *
  3 */
  4package org.guha.rcdk.util;
  5
  6import org.guha.rcdk.view.RcdkDepictor;
  7import org.openscience.cdk.DefaultChemObjectBuilder;
  8import org.openscience.cdk.aromaticity.CDKHueckelAromaticityDetector;
  9import org.openscience.cdk.config.IsotopeFactory;
 10import org.openscience.cdk.config.Isotopes;
 11import org.openscience.cdk.exception.CDKException;
 12import org.openscience.cdk.formula.MolecularFormulaGenerator;
 13import org.openscience.cdk.formula.MolecularFormulaRange;
 14import org.openscience.cdk.inchi.InChIGenerator;
 15import org.openscience.cdk.inchi.InChIGeneratorFactory;
 16import org.openscience.cdk.interfaces.*;
 17import org.openscience.cdk.io.ISimpleChemObjectReader;
 18import org.openscience.cdk.io.ReaderFactory;
 19import org.openscience.cdk.io.SDFWriter;
 20import org.openscience.cdk.io.SMILESReader;
 21import org.openscience.cdk.io.listener.PropertiesListener;
 22import org.openscience.cdk.isomorphism.UniversalIsomorphismTester;
 23import org.openscience.cdk.layout.StructureDiagramGenerator;
 24import org.openscience.cdk.silent.SilentChemObjectBuilder;
 25import org.openscience.cdk.smiles.SmilesGenerator;
 26import org.openscience.cdk.smsd.Isomorphism;
 27import org.openscience.cdk.tools.manipulator.AtomContainerManipulator;
 28import org.openscience.cdk.tools.manipulator.ChemFileManipulator;
 29import org.openscience.cdk.tools.manipulator.MolecularFormulaManipulator;
 30
 31import java.io.File;
 32import java.io.FileReader;
 33import java.io.FileWriter;
 34import java.io.IOException;
 35import java.util.*;
 36
 37/**
 38 * @author Rajarshi Guha
 39 */
 40
 41public class Misc {
 42
 43    public Misc() {
 44    }
 45
 46    public static void writeMoleculesInOneFile(IAtomContainer[] molecules,
 47                                               String filename,
 48                                               int writeProps) throws Exception {
 49        SDFWriter writer = new SDFWriter(new FileWriter(new File(filename)));
 50
 51        Properties props = new Properties();
 52        props.put("WriteAromaticBondTypes", "true");
 53        if (writeProps == 0) {
 54            props.put("writeProperties", "false");
 55        }
 56        PropertiesListener listener = new PropertiesListener(props);
 57        writer.addChemObjectIOListener(listener);
 58        writer.customizeJob();
 59        for (IAtomContainer molecule : molecules) {
 60            writer.write(molecule);
 61        }
 62        writer.close();
 63    }
 64
 65    public static void writeMolecules(IAtomContainer[] molecules, String prefix, int writeProps) throws Exception {
 66        int counter = 1;
 67        for (IAtomContainer molecule : molecules) {
 68            String filename = prefix + counter + ".sdf";
 69            SDFWriter writer = new SDFWriter(new FileWriter(new File(filename)));
 70
 71            Properties props = new Properties();
 72            props.put("WriteAromaticBondTypes", "true");
 73            if (writeProps == 0) {
 74                props.put("writeProperties", "false");
 75            }
 76            PropertiesListener listener = new PropertiesListener(props);
 77            writer.addChemObjectIOListener(listener);
 78            writer.customizeJob();
 79
 80            writer.write(molecule);
 81            writer.close();
 82            counter += 1;
 83        }
 84    }
 85
 86
 87    public static void setProperty(IAtomContainer molecule, String key, Object value) {
 88        molecule.setProperty(key, value);
 89    }
 90
 91    public static void setProperty(IAtomContainer molecule, String key, int value) {
 92        setProperty(molecule, key, new Integer(value));
 93    }
 94
 95    public static void setProperty(IAtomContainer molecule, String key, double value) {
 96        setProperty(molecule, key, new Double(value));
 97    }
 98
 99    public static Object getProperty(IAtomContainer molecule, String key) {
100        return molecule.getProperty(key);
101    }
102
103    public static void removeProperty(IAtomContainer molecule, String key) {
104        molecule.removeProperty(key);
105    }
106
107    /**
108     * Generates a canonical SMILES string from an IAtomContainer.
109     * <p/>
110     * The SMILES output will include aromaticity
111     *
112     * @param container The molecule to convert
113     * @return A SMILES string
114     */
115    public static String getSmiles(IAtomContainer container, String type, boolean aromatic, boolean atomClasses) throws CDKException {
116        SmilesGenerator smigen;
117        switch (type) {
118            case "generic":
119                smigen = SmilesGenerator.generic();
120                break;
121            case "unique":
122                smigen = SmilesGenerator.unique();
123                break;
124            case "isomeric":
125                smigen = SmilesGenerator.isomeric();
126                break;
127            default:
128                smigen = SmilesGenerator.absolute();
129                break;
130        }
131        if (aromatic) smigen = smigen.aromatic();
132        if (atomClasses) smigen = smigen.withAtomClasses();
133        return smigen.create(container);
134    }
135
136    /**
137     * Loads one or more files into IAtomContainer objects.
138     * <p/>
139     * This method does not need knowledge of the format since it is autodetected.    Note that if aromaticity detection
140     * or atom typing is specified and fails for a specific molecule, that molecule will be set to <i>null</i>
141     *
142     * @param filenames     An array of String's containing the filenames of the structures we want to load
143     * @param doAromaticity If true, then aromaticity perception is performed
144     * @param doTyping      If true, atom typing and configuration is performed. This will use the internal CDK atom
145     *                      typing scheme
146     * @return An array of AtoContainer's
147     * @throws CDKException if there is an error when reading a file
148     */
149    public static IAtomContainer[] loadMolecules(String[] filenames,
150                                                 boolean doAromaticity,
151                                                 boolean doTyping,
152                                                 boolean doIsotopes) throws CDKException, IOException {
153        Vector<IAtomContainer> v = new Vector<IAtomContainer>();
154        IChemObjectBuilder builder = DefaultChemObjectBuilder.getInstance();
155        try {
156            int i;
157            int j;
158
159            for (i = 0; i < filenames.length; i++) {
160                File input = new File(filenames[i]);
161                ReaderFactory readerFactory = new ReaderFactory();
162                ISimpleChemObjectReader reader = readerFactory.createReader(new FileReader(input));
163
164                if (reader == null) { // see if it's a SMI file
165                    if (filenames[i].endsWith(".smi")) {
166                        reader = new SMILESReader(new FileReader(input));
167                    }
168                }
169                IChemFile content = (IChemFile) reader.read(builder.newInstance(IChemFile.class));
170                if (content == null) continue;
171
172                List<IAtomContainer> c = ChemFileManipulator.getAllAtomContainers(content);
173
174                // we should do this loop in case we have files
175                // that contain multiple molecules
176                v.addAll(c);
177            }
178
179        } catch (Exception e) {
180            e.printStackTrace();
181            throw new CDKException(e.toString());
182        }
183
184        // convert the vector to a simple array
185        IAtomContainer[] retValues = new IAtomContainer[v.size()];
186        for (int i = 0; i < v.size(); i++) {
187            retValues[i] = v.get(i);
188        }
189
190        if (doTyping) {
191            for (int i = 0; i < retValues.length; i++) {
192                try {
193                    AtomContainerManipulator.percieveAtomTypesAndConfigureAtoms(retValues[i]);
194                } catch (CDKException e) {
195                    retValues[i] = null;
196                }
197            }
198        }
199
200        // before returning, lets make see if we
201        // need to perceive aromaticity and atom typing
202        if (doAromaticity) {
203            for (int i = 0; i < retValues.length; i++) {
204                try {
205                    CDKHueckelAromaticityDetector.detectAromaticity(retValues[i]);
206                } catch (CDKException e) {
207                    retValues[i] = null;
208                }
209            }
210        }
211
212        if (doIsotopes) {
213            Isotopes ifac = Isotopes.getInstance();
214            for (IAtomContainer retValue : retValues) {
215                ifac.configureAtoms(retValue);
216            }
217        }
218
219        return retValues;
220    }
221
222    public static IAtomContainer getMoleculeWithCoordinates(IAtomContainer molecule) throws Exception {
223        StructureDiagramGenerator sdg = new StructureDiagramGenerator();
224        sdg.setMolecule(molecule);
225        sdg.generateCoordinates();
226        return sdg.getMolecule();
227    }
228
229    public static IAtomContainer getMcsAsNewContainerUIT(IAtomContainer mol1, IAtomContainer mol2) throws CDKException, CloneNotSupportedException {
230        UniversalIsomorphismTester uit = new UniversalIsomorphismTester();
231        List<IAtomContainer> overlaps = uit.getOverlaps(mol1, mol2);
232        int maxmcss = -9999999;
233        IAtomContainer maxac = null;
234        for (IAtomContainer ac : overlaps) {
235            if (ac.getAtomCount() > maxmcss) {
236                maxmcss = ac.getAtomCount();
237                maxac = ac;
238            }
239        }
240        return maxac;
241    }
242
243    public static IAtomContainer getMcsAsNewContainer(IAtomContainer mol1, IAtomContainer mol2) throws CDKException, CloneNotSupportedException {
244        Isomorphism mcs = new Isomorphism(org.openscience.cdk.smsd.interfaces.Algorithm.DEFAULT, true);
245        mcs.init(mol1, mol2, true, true);
246        mcs.setChemFilters(true, true, true);
247	    
248        mol1 = mcs.getReactantMolecule();
249        mol2 = mcs.getProductMolecule();
250	if (mol1 == null || mol2 == null || mcs.getFirstMapping() == null)
251	    return(null);
252	
253        IAtomContainer mcsmolecule = DefaultChemObjectBuilder.getInstance().newInstance(IAtomContainer.class, mol1);
254
255        List<IAtom> atomsToBeRemoved = new ArrayList<IAtom>();
256        for (IAtom atom : mcsmolecule.atoms()) {
257            int index = mcsmolecule.getAtomNumber(atom);
258            if (!mcs.getFirstMapping().containsKey(index)) {
259                atomsToBeRemoved.add(atom);
260            }
261        }
262
263        for (IAtom atom : atomsToBeRemoved) {
264            mcsmolecule.removeAtomAndConnectedElectronContainers(atom);
265        }
266
267        return mcsmolecule;
268    }
269
270    public static int[][] getMcsAsAtomIndexMapping(IAtomContainer mol1, IAtomContainer mol2) throws CDKException {
271        Isomorphism mcs = new Isomorphism(org.openscience.cdk.smsd.interfaces.Algorithm.DEFAULT, true);
272        mcs.init(mol1, mol2, true, true);
273        mcs.setChemFilters(true, true, true);
274        int mcsSize = mcs.getFirstMapping().size();
275        int[][] mapping = new int[mcsSize][2];
276        int i = 0;
277        for (Map.Entry map : mcs.getFirstMapping().entrySet()) {
278            mapping[i][0] = (Integer) map.getKey();
279            mapping[i][1] = (Integer) map.getValue();
280            i++;
281        }
282        return mapping;
283    }
284
285    public static String getInChi(IAtomContainer mol) throws CDKException {
286        InChIGeneratorFactory factory = InChIGeneratorFactory.getInstance();
287        factory.setIgnoreAromaticBonds(true);
288        InChIGenerator gen = factory.getInChIGenerator(mol);
289        return gen.getInchi();
290    }
291
292    public static String getInChiKey(IAtomContainer mol) throws CDKException {
293        InChIGeneratorFactory factory = InChIGeneratorFactory.getInstance();
294        factory.setIgnoreAromaticBonds(true);
295        InChIGenerator gen = factory.getInChIGenerator(mol);
296        return gen.getInchiKey();
297    }
298
299    /**
300     * Returns a depictor with default settings.
301     *
302     * @return A {@link RcdkDepictor} object with default values.
303     * @throws IOException
304     */
305    public static RcdkDepictor getDefaultDepictor() throws IOException {
306        return new RcdkDepictor(300, 300, 1.3, "cow", "off", "on", true, false, 100, "");
307    }
308
309    /**
310     * Construct {@link MolecularFormulaRange} object from a text representation of ranges.
311     * Parts of the code lifted from https://github.com/cdk/cdk-paper-3/blob/master/formula_generator_benchmark/CDK/CDKFormulaGeneratorCLI.java
312     *
313     * @param ranges An array of range strings, of the form <code>X min max</code>, where
314     *               <code>X</code> is the element symbol, <code>min</code> is the minimum
315     *               number of this element and <code>max</code> is the maximum
316     * @return A {@link MolecularFormulaRange} object, other <code>null</code> if any error occurs
317     */
318    public static MolecularFormulaRange getMFRange(String[] ranges) throws IOException {
319        if (ranges == null)
320            return (null);
321
322        IsotopeFactory ifac = Isotopes.getInstance();
323        MolecularFormulaRange mfRange = new MolecularFormulaRange();
324        for (String rstr : ranges) {
325            String[] toks = rstr.split(" ");
326            if (toks.length != 3)
327                throw new IllegalArgumentException("Each range string must have three elements");
328            String element = toks[0];
329            int min = Integer.parseInt(toks[1]);
330            int max = Integer.parseInt(toks[2]);
331            IIsotope i;
332            if (element.equals("D"))
333                i = ifac.getIsotope("H", 2);
334            else
335                i = ifac.getMajorIsotope(element);
336            mfRange.addIsotope(i, min, max);
337        }
338        return mfRange;
339    }
340
341    public static void main(String[] args) throws Exception, CloneNotSupportedException, IOException {
342//        IAtomContainer[] mols = Misc.loadMolecules(new String[]{"/Users/guhar/Downloads/Benzene.sdf"}, true, true, true);
343//
344//        SmilesParser sp = new SmilesParser(DefaultChemObjectBuilder.getInstance());
345//
346//        IAtomContainer mol1 = sp.parseSmiles("c1cccc(COC(=O)NC(CC(C)C)C(=O)NC(CCc2ccccc2)C(=O)COC)c1");
347//        IAtomContainer mol2 = sp.parseSmiles("c1cccc(COC(=O)NC(CC(C)C)C(=O)NCC#N)c1");
348//        CDKHueckelAromaticityDetector.detectAromaticity(mol1);
349//        CDKHueckelAromaticityDetector.detectAromaticity(mol2);
350//        AtomContainerManipulator.percieveAtomTypesAndConfigureAtoms(mol2);
351//        AtomContainerManipulator.percieveAtomTypesAndConfigureAtoms(mol1);
352//        int[][] map = getMcsAsAtomIndexMapping(mol1, mol2);
353//        for (int i = 0; i < map.length; i++) {
354//            System.out.println(map[i][0] + " <-> " + map[i][1]);
355//        }
356        String[] ranges = new String[]{"C 0 50", "N 0 20", "O 0 20", "H 0 50"};
357        MolecularFormulaRange mfr = getMFRange(ranges);
358        IChemObjectBuilder builder = SilentChemObjectBuilder.getInstance();
359        double mass = 300;
360        double tol = 5e-3;
361        MolecularFormulaGenerator gen = new MolecularFormulaGenerator(builder, mass - tol, mass + tol, mfr);
362        IMolecularFormula formula;
363        while ((formula = gen.getNextFormula()) != null) {
364            String formulaString = MolecularFormulaManipulator.getString(formula);
365            System.out.println(formulaString);
366        }
367    }
368}