PageRenderTime 27ms CodeModel.GetById 17ms RepoModel.GetById 0ms app.codeStats 0ms

/rcdk/R/smiles.R

http://github.com/rajarshi/cdkr
R | 207 lines | 85 code | 6 blank | 116 comment | 9 complexity | e96831fcd05e13133ad5552921c56658 MD5 | raw file
  1. #' Generate flag for customizing SMILES generation.
  2. #'
  3. #' The CDK supports a variety of customizations for SMILES generation including
  4. #' the use of lower case symbols for aromatic compounds to the use of the ChemAxon
  5. #' \href{http://www.chemeddl.org/tools/marvin/help/formats/cxsmiles-doc.html}{CxSmiles}
  6. #' format. Each 'flavor' is represented by an integer and multiple
  7. #' customizations are bitwise OR'ed. This method accepts the names of one or
  8. #' more customizations and returns the bitwise OR of them.
  9. #' See \href{https://cdk.github.io/cdk/2.3/docs/api/index.html?org/openscience/cdk/smiles/SmiFlavor.html}{CDK documentation}
  10. #' for the list of flavors and what they mean.
  11. #'
  12. #' @param flavors A character vector of flavors. The default is \code{Generic}
  13. #' (output non-canonical SMILES without stereochemistry, atomic masses). Possible
  14. #' values are
  15. #' * Absolute
  16. #' * AtomAtomMap
  17. #' * AtomicMass
  18. #' * AtomicMassStrict
  19. #' * Canonical
  20. #' * Cx2dCoordinates
  21. #' * Cx3dCoordinates
  22. #' * CxAtomLabel
  23. #' * CxAtomValue
  24. #' * CxCoordinates
  25. #' * CxFragmentGroup
  26. #' * CxMulticenter
  27. #' * CxPolymer
  28. #' * CxRadical
  29. #' * CxSmiles
  30. #' * CxSmilesWithCoords
  31. #' * Default
  32. #' * Generic
  33. #' * InChILabelling
  34. #' * Isomeric
  35. #' * Stereo
  36. #' * StereoCisTrans
  37. #' * StereoExTetrahedral
  38. #' * StereoTetrahedral
  39. #' * Unique
  40. #' * UniversalSmiles
  41. #' * UseAromaticSymbols
  42. #' @md
  43. #' @return A numeric representing the bitwise `OR`` of the specified flavors
  44. #' @seealso \code{\link{get.smiles}}
  45. #' @references \href{https://cdk.github.io/cdk/2.3/docs/api/index.html?org/openscience/cdk/smiles/SmiFlavor.html}{CDK documentation}
  46. #' @examples
  47. #' m <- parse.smiles('C1C=CCC1N(C)c1ccccc1')[[1]]
  48. #' get.smiles(m)
  49. #' get.smiles(m, smiles.flavors(c('Generic','UseAromaticSymbols')))
  50. #'
  51. #' m <- parse.smiles("OS(=O)(=O)c1ccc(cc1)C(CC)CC |Sg:n:13:m:ht,Sg:n:11:n:ht|")[[1]]
  52. #' get.smiles(m,flavor = smiles.flavors(c("CxSmiles")))
  53. #' get.smiles(m,flavor = smiles.flavors(c("CxSmiles","UseAromaticSymbols")))
  54. #'
  55. #' @export
  56. #' @author Rajarshi Guha \email{rajarshi.guha@@gmail.com}
  57. smiles.flavors <- function(flavors = c('Generic')) {
  58. valid.flavors <- c('Absolute',
  59. 'AtomAtomMap',
  60. 'AtomicMass',
  61. 'AtomicMassStrict',
  62. 'Canonical',
  63. 'Cx2dCoordinates',
  64. 'Cx3dCoordinates',
  65. 'CxAtomLabel',
  66. 'CxAtomValue',
  67. 'CxCoordinates',
  68. 'CxFragmentGroup',
  69. 'CxMulticenter',
  70. 'CxPolymer',
  71. 'CxRadical',
  72. 'CxSmiles',
  73. 'CxSmilesWithCoords',
  74. 'Default',
  75. 'Generic',
  76. 'InChILabelling',
  77. 'Isomeric',
  78. 'Stereo',
  79. 'StereoCisTrans',
  80. 'StereoExTetrahedral',
  81. 'StereoTetrahedral',
  82. 'Unique',
  83. 'UniversalSmiles',
  84. 'UseAromaticSymbols')
  85. if (any(is.na(match(flavors, valid.flavors)))) {
  86. stop("Invalid flavor specified")
  87. }
  88. vals <- sapply(flavors, function(x) {
  89. .jfield('org.openscience.cdk.smiles.SmiFlavor', 'I', x)
  90. })
  91. Reduce(bitwOr, vals, 0)
  92. }
  93. #' Generate a SMILES representation of a molecule.
  94. #'
  95. #' The function will generate a SMILES representation of an
  96. #' `IAtomContainer` object. The default parameters of the CDK SMILES
  97. #' generator are used. This can mean that for large ring systems the
  98. #' method may fail. See CDK \href{http://cdk.github.io/cdk/2.2/docs/api/org/openscience/cdk/smiles/SmilesGenerator.html}{Javadocs}
  99. #' for more information
  100. #' @param molecule The molecule to query. Should be a `jobjRef` representing an `IAtomContainer`
  101. #' @param flavor The type of SMILES to generate. See \code{\link{smiles.flavors}}. Default is `Generic`
  102. #' SMILES
  103. #' @param smigen A pre-existing SMILES generator object. By default, a new one is created from the specified flavor
  104. #' @return A character string containing the generated SMILES
  105. #' @seealso \code{\link{parse.smiles}}, \code{\link{smiles.flavors}}
  106. #' @export
  107. #' @author Rajarshi Guha (\email{rajarshi.guha@@gmail.com})
  108. #' @references \href{http://cdk.github.io/cdk/2.2/docs/api/org/openscience/cdk/smiles/SmilesGenerator.html}{SmilesGenerator}
  109. #' @examples
  110. #' m <- parse.smiles('C1C=CCC1N(C)c1ccccc1')[[1]]
  111. #' get.smiles(m)
  112. #' get.smiles(m, smiles.flavors(c('Generic','UseAromaticSymbols')))
  113. get.smiles <- function(molecule, flavor = smiles.flavors(c('Generic')), smigen = NULL) {
  114. if (is.null(smigen))
  115. smigen <- .jnew("org/openscience/cdk/smiles/SmilesGenerator", flavor)
  116. return(.jcall(smigen, 'S', 'create', molecule))
  117. }
  118. #' Get a SMILES parser object.
  119. #'
  120. #' This function returns a reference to a SMILES parser
  121. #' object. If you are parsing multiple SMILES strings using multiple
  122. #' calls to \code{\link{parse.smiles}}, it is
  123. #' preferable to create your own parser and supply it to
  124. #' \code{\link{parse.smiles}} rather than forcing that function
  125. #' to instantiate a new parser for each call
  126. #'
  127. #' @return A `jobjRef` object corresponding to the CDK
  128. #' \href{http://cdk.github.io/cdk/2.2/docs/api/org/openscience/cdk/smiles/SmilesParser.html}{SmilesParser} class
  129. #' @seealso \code{\link{get.smiles}}, \code{\link{parse.smiles}}
  130. #' @export
  131. #' @author Rajarshi Guha (\email{rajarshi.guha@@gmail.com})
  132. get.smiles.parser <- function() {
  133. .jnew("org/openscience/cdk/smiles/SmilesParser", get.chem.object.builder())
  134. }
  135. #' Parse SMILES strings into molecule objects.
  136. #'
  137. #' This function parses a vector of SMILES strings to generate a list of
  138. #' `IAtomContainer` objects. Note that the resultant molecule will
  139. #' not have any 2D or 3D coordinates.
  140. #' Note that the molecules obtained from this method will not have any
  141. #' aromaticity perception (unless aromatic symbols are encountered, in which
  142. #' case the relevant atoms are automatically set to aromatic), atom typing or
  143. #' isotopic configuration done on them. This is in contrast to the
  144. #' \code{\link{load.molecules}} method. Thus, you should
  145. #' perform these steps manually on the molecules.
  146. #' @param smiles A single SMILES string or a vector of SMILES strings
  147. #' @param kekulise If set to `FALSE` disables electron checking and
  148. #' allows for parsing of incorrect SMILES. If a SMILES does not parse by default, try
  149. #' setting this to `FALSE` - though the resultant molecule may not have consistent
  150. #' bonding. As an example, `c4ccc2c(cc1=Nc3ncccc3(Cn12))c4` will not be parsed by default
  151. #' because it is missing a nitrogen. With this argument set to `FALSE` it will parse
  152. #' successfully, but this is a hack to handle an incorrect SMILES
  153. #' @param omit.nulls If set to `TRUE`, omits SMILES which were parsed as `NULL`
  154. #' @param smiles.parser A SMILES parser object obtained from \code{\link{get.smiles.parser}}
  155. #' @return A `list` of `jobjRef`s to their corresponding CDK `IAtomContainer`
  156. #' objects. If a SMILES string could not be parsed and `omit.nulls=TRUE` it
  157. #' is omited from the output list.
  158. #' @seealso \code{\link{get.smiles}}, \code{\link{parse.smiles}}
  159. #' @export
  160. #' @author Rajarshi Guha (\email{rajarshi.guha@@gmail.com})
  161. parse.smiles <-
  162. function(smiles,
  163. kekulise = TRUE,
  164. omit.nulls = FALSE,
  165. smiles.parser = NULL) {
  166. if (!is.character(smiles)) {
  167. stop("Must supply a character vector of SMILES strings")
  168. }
  169. if (!is.null(smiles.parser)) {
  170. parser <- smiles.parser
  171. } else {
  172. parser <- get.smiles.parser()
  173. }
  174. .jcall(parser, "V", "kekulise", kekulise)
  175. returnValue_withnulls <- sapply(smiles,
  176. function(x) {
  177. mol <- tryCatch(
  178. {
  179. .jcall(parser, "Lorg/openscience/cdk/interfaces/IAtomContainer;", "parseSmiles", x)
  180. }, error = function(e) {
  181. return(NULL)
  182. }
  183. )
  184. if (is.null(mol)){
  185. return(NULL)
  186. } else {
  187. return(.jcast(mol, "org/openscience/cdk/interfaces/IAtomContainer"))
  188. }
  189. })
  190. returnValue_nonulls <- Filter(Negate(is.null), returnValue_withnulls)
  191. returnValue <- returnValue_withnulls
  192. if (omit.nulls==TRUE) {
  193. returnValue <- returnValue_nonulls
  194. }
  195. nulls_count <- length(returnValue_withnulls)-length(returnValue_nonulls)
  196. if (nulls_count > 0) {
  197. warning(paste(nulls_count)," out of ",paste(length(returnValue_withnulls)),
  198. " SMILES were not successfully parsed, resulting in NULLs.")
  199. }
  200. return(returnValue)
  201. }