PageRenderTime 178ms CodeModel.GetById 80ms app.highlight 7ms RepoModel.GetById 74ms app.codeStats 0ms

/rcdk/R/smiles.R

http://github.com/rajarshi/cdkr
R | 207 lines | 85 code | 6 blank | 116 comment | 9 complexity | e96831fcd05e13133ad5552921c56658 MD5 | raw file
  1#' Generate flag for customizing SMILES generation.
  2#'
  3#' The CDK supports a variety of customizations for SMILES generation including
  4#' the use of lower case symbols for aromatic compounds to the use of the ChemAxon
  5#' \href{http://www.chemeddl.org/tools/marvin/help/formats/cxsmiles-doc.html}{CxSmiles}
  6#' format. Each 'flavor' is represented by an integer and multiple
  7#' customizations are bitwise OR'ed. This method accepts the names of one or
  8#' more customizations and returns the bitwise OR of them.
  9#' See \href{https://cdk.github.io/cdk/2.3/docs/api/index.html?org/openscience/cdk/smiles/SmiFlavor.html}{CDK documentation}
 10#' for the list of flavors and what they mean.
 11#'
 12#' @param flavors A character vector of flavors. The default is \code{Generic} 
 13#' (output non-canonical SMILES without stereochemistry, atomic masses). Possible 
 14#' values are
 15#' * Absolute
 16#' * AtomAtomMap
 17#' * AtomicMass
 18#' * AtomicMassStrict
 19#' * Canonical
 20#' * Cx2dCoordinates
 21#' * Cx3dCoordinates
 22#' * CxAtomLabel
 23#' * CxAtomValue
 24#' * CxCoordinates
 25#' * CxFragmentGroup
 26#' * CxMulticenter
 27#' * CxPolymer
 28#' * CxRadical
 29#' * CxSmiles
 30#' * CxSmilesWithCoords
 31#' * Default
 32#' * Generic
 33#' * InChILabelling
 34#' * Isomeric
 35#' * Stereo
 36#' * StereoCisTrans
 37#' * StereoExTetrahedral
 38#' * StereoTetrahedral
 39#' * Unique
 40#' * UniversalSmiles
 41#' * UseAromaticSymbols
 42#' @md
 43#' @return A numeric representing the bitwise `OR`` of the specified flavors
 44#' @seealso \code{\link{get.smiles}}
 45#' @references \href{https://cdk.github.io/cdk/2.3/docs/api/index.html?org/openscience/cdk/smiles/SmiFlavor.html}{CDK documentation}
 46#' @examples
 47#' m <- parse.smiles('C1C=CCC1N(C)c1ccccc1')[[1]]
 48#' get.smiles(m)
 49#' get.smiles(m, smiles.flavors(c('Generic','UseAromaticSymbols')))
 50#'
 51#' m <- parse.smiles("OS(=O)(=O)c1ccc(cc1)C(CC)CC |Sg:n:13:m:ht,Sg:n:11:n:ht|")[[1]]
 52#' get.smiles(m,flavor = smiles.flavors(c("CxSmiles")))
 53#' get.smiles(m,flavor = smiles.flavors(c("CxSmiles","UseAromaticSymbols")))
 54#'
 55#' @export
 56#' @author Rajarshi Guha \email{rajarshi.guha@@gmail.com}
 57smiles.flavors <- function(flavors = c('Generic')) {
 58    valid.flavors <- c('Absolute',
 59                       'AtomAtomMap',
 60                       'AtomicMass',
 61                       'AtomicMassStrict',
 62                       'Canonical',
 63                       'Cx2dCoordinates',
 64                       'Cx3dCoordinates',
 65                       'CxAtomLabel',
 66                       'CxAtomValue',
 67                       'CxCoordinates',
 68                       'CxFragmentGroup',
 69                       'CxMulticenter',
 70                       'CxPolymer',
 71                       'CxRadical',
 72                       'CxSmiles',
 73                       'CxSmilesWithCoords',
 74                       'Default',
 75                       'Generic',
 76                       'InChILabelling',
 77                       'Isomeric',
 78                       'Stereo',
 79                       'StereoCisTrans',
 80                       'StereoExTetrahedral',
 81                       'StereoTetrahedral',
 82                       'Unique',
 83                       'UniversalSmiles',
 84                       'UseAromaticSymbols')
 85    if (any(is.na(match(flavors, valid.flavors)))) {
 86        stop("Invalid flavor specified")
 87    }
 88    vals <- sapply(flavors, function(x) {
 89        .jfield('org.openscience.cdk.smiles.SmiFlavor', 'I', x)
 90    })
 91    Reduce(bitwOr, vals, 0)
 92}
 93
 94#' Generate a SMILES representation of a molecule.
 95#' 
 96#' The function will generate a SMILES representation of an
 97#' `IAtomContainer` object. The default parameters of the CDK SMILES
 98#' generator are used. This can mean that for large ring systems the
 99#' method may fail. See CDK \href{http://cdk.github.io/cdk/2.2/docs/api/org/openscience/cdk/smiles/SmilesGenerator.html}{Javadocs}
100#' for more information
101#' @param molecule The molecule to query. Should be a `jobjRef` representing an `IAtomContainer`
102#' @param flavor The type of SMILES to generate. See \code{\link{smiles.flavors}}. Default is `Generic`
103#' SMILES
104#' @param smigen A pre-existing SMILES generator object. By default, a new one is created from the specified flavor
105#' @return A character string containing the generated SMILES
106#' @seealso \code{\link{parse.smiles}}, \code{\link{smiles.flavors}}
107#' @export
108#' @author Rajarshi Guha (\email{rajarshi.guha@@gmail.com})
109#' @references \href{http://cdk.github.io/cdk/2.2/docs/api/org/openscience/cdk/smiles/SmilesGenerator.html}{SmilesGenerator} 
110#' @examples 
111#' m <- parse.smiles('C1C=CCC1N(C)c1ccccc1')[[1]]
112#' get.smiles(m)
113#' get.smiles(m, smiles.flavors(c('Generic','UseAromaticSymbols')))
114get.smiles <- function(molecule, flavor = smiles.flavors(c('Generic')), smigen = NULL) {
115    if (is.null(smigen))
116        smigen <- .jnew("org/openscience/cdk/smiles/SmilesGenerator", flavor)
117    return(.jcall(smigen, 'S', 'create', molecule))
118}
119
120#' Get a SMILES parser object.
121#' 
122#' This function returns a reference to a SMILES parser
123#' object. If you are parsing multiple SMILES strings using multiple
124#' calls to \code{\link{parse.smiles}}, it is
125#' preferable to create your own parser and supply it to
126#' \code{\link{parse.smiles}} rather than forcing that function
127#' to instantiate a new parser for each call
128#' 
129#' @return A `jobjRef` object corresponding to the CDK 
130#' \href{http://cdk.github.io/cdk/2.2/docs/api/org/openscience/cdk/smiles/SmilesParser.html}{SmilesParser} class
131#' @seealso \code{\link{get.smiles}}, \code{\link{parse.smiles}}
132#' @export
133#' @author Rajarshi Guha (\email{rajarshi.guha@@gmail.com})
134get.smiles.parser <- function() {
135    .jnew("org/openscience/cdk/smiles/SmilesParser", get.chem.object.builder())
136}
137
138#' Parse SMILES strings into molecule objects.
139#' 
140#' This function parses a vector of SMILES strings to generate a list of
141#' `IAtomContainer` objects. Note that the resultant molecule will
142#' not have any 2D or 3D coordinates.
143#' Note that the molecules obtained from this method will not have any
144#' aromaticity perception (unless aromatic symbols are encountered, in which 
145#' case the relevant atoms are automatically set to aromatic), atom typing or 
146#' isotopic configuration done on them. This is in contrast to the 
147#' \code{\link{load.molecules}} method. Thus, you should
148#' perform these steps manually on the molecules.
149#' @param smiles A single SMILES string or a vector of SMILES strings
150#' @param kekulise If set to `FALSE` disables electron checking and
151#' allows for parsing of incorrect SMILES. If a SMILES does not parse by default, try
152#' setting this to `FALSE` - though the resultant molecule may not have consistent
153#' bonding. As an example, `c4ccc2c(cc1=Nc3ncccc3(Cn12))c4` will not be parsed by default
154#' because it is missing a nitrogen. With this argument set to `FALSE` it will parse
155#' successfully, but this is a hack to handle an incorrect SMILES
156#' @param omit.nulls If set to `TRUE`, omits SMILES which were parsed as `NULL`
157#' @param smiles.parser A SMILES parser object obtained from \code{\link{get.smiles.parser}}
158#' @return A `list` of `jobjRef`s to their corresponding CDK `IAtomContainer` 
159#' objects. If a SMILES string could not be parsed and `omit.nulls=TRUE` it 
160#' is omited from the output list.
161#' @seealso \code{\link{get.smiles}}, \code{\link{parse.smiles}}
162#' @export
163#' @author Rajarshi Guha (\email{rajarshi.guha@@gmail.com})
164parse.smiles <-
165  function(smiles,
166           kekulise = TRUE,
167           omit.nulls = FALSE,
168           smiles.parser = NULL) {
169    if (!is.character(smiles)) {
170      stop("Must supply a character vector of SMILES strings")
171    }
172    if (!is.null(smiles.parser)) {
173      parser <- smiles.parser
174    } else {
175      parser <- get.smiles.parser()
176    }
177    .jcall(parser, "V", "kekulise", kekulise)
178    returnValue_withnulls <- sapply(smiles,
179                          function(x) {
180                              mol <- tryCatch(
181                              {
182                                  .jcall(parser, "Lorg/openscience/cdk/interfaces/IAtomContainer;", "parseSmiles", x)
183                              }, error = function(e) {
184                                  return(NULL)
185                              }
186                              )
187                              if (is.null(mol)){
188                                  return(NULL)
189                              } else {
190                                  return(.jcast(mol, "org/openscience/cdk/interfaces/IAtomContainer"))
191                              }
192                          })
193    returnValue_nonulls <- Filter(Negate(is.null), returnValue_withnulls)
194    returnValue <- returnValue_withnulls
195
196    if (omit.nulls==TRUE) {
197        returnValue <- returnValue_nonulls
198    }
199
200    nulls_count <- length(returnValue_withnulls)-length(returnValue_nonulls)
201
202    if (nulls_count > 0) {
203        warning(paste(nulls_count)," out of ",paste(length(returnValue_withnulls)),
204        " SMILES were not successfully parsed, resulting in NULLs.")
205    }
206    return(returnValue)
207}