PageRenderTime 109ms CodeModel.GetById 17ms RepoModel.GetById 1ms app.codeStats 0ms

/rcdk/R/desc.R

http://github.com/rajarshi/cdkr
R | 324 lines | 208 code | 37 blank | 79 comment | 64 complexity | 58dceb51e46ee59785c09421339a1fd9 MD5 | raw file
  1. #' @keywords internal
  2. .get.desc.values <- function(dval, nexpected) {
  3. if (!inherits(dval, "jobjRef")) {
  4. if (is.null(dval) || is.na(dval)) return(NA)
  5. }
  6. if (!is.null(.jcall(dval, "Ljava/lang/Exception;", "getException"))) {
  7. return(rep(NA, nexpected))
  8. }
  9. nval <- numeric()
  10. if (!inherits(dval,'jobjRef') && is.na(dval)) {
  11. return(NA)
  12. }
  13. result <- .jcall(dval, "Lorg/openscience/cdk/qsar/result/IDescriptorResult;", "getValue")
  14. methods <- .jmethods(result)
  15. if ("public double org.openscience.cdk.qsar.result.DoubleArrayResult.get(int)" %in% methods) {
  16. result <- .jcast(result, "org/openscience/cdk/qsar/result/DoubleArrayResult")
  17. len <- .jcall(result, "I", "length")
  18. for (i in 1:len) nval[i] <- .jcall(result, "D", "get", as.integer(i-1))
  19. } else if ("public int org.openscience.cdk.qsar.result.IntegerArrayResult.get(int)" %in% methods) {
  20. result <- .jcast(result, "org/openscience/cdk/qsar/result/IntegerArrayResult")
  21. len <- .jcall(result, "I", "length")
  22. for (i in 1:len) nval[i] <- .jcall(result, "I", "get", as.integer(i-1))
  23. } else if ("public int org.openscience.cdk.qsar.result.IntegerResult.intValue()" %in% methods) {
  24. result <- .jcast(result, "org/openscience/cdk/qsar/result/IntegerResult")
  25. nval <- .jcall(result, "I", "intValue")
  26. } else if ("public double org.openscience.cdk.qsar.result.DoubleResult.doubleValue()" %in% methods) {
  27. result <- .jcast(result, "org/openscience/cdk/qsar/result/DoubleResult")
  28. nval <- .jcall(result, "D", "doubleValue")
  29. }
  30. return(nval)
  31. }
  32. #' @keywords internal
  33. .get.desc.engine <- function(type = 'molecular') {
  34. if (!(type %in% c('molecular', 'atomic', 'bond'))) {
  35. stop('type must bond, molecular or atomic')
  36. }
  37. if (type == 'molecular') {
  38. interface <- J("org.openscience.cdk.qsar.IMolecularDescriptor")
  39. } else if (type == 'atomic') {
  40. interface <- J("org.openscience.cdk.qsar.IAtomicDescriptor")
  41. } else if (type == 'bond') {
  42. interface <- J("org.openscience.cdk.qsar.IBondDescriptor")
  43. }
  44. dklass <- interface@jobj
  45. dcob <- get.chem.object.builder()
  46. dengine <- .jnew('org/openscience/cdk/qsar/DescriptorEngine', dklass, dcob)
  47. attr(dengine, 'descType') <- type
  48. pkg <- c('org.openscience.cdk.qsar.descriptors.atomic',
  49. 'org.openscience.cdk.qsar.descriptors.bond',
  50. 'org.openscience.cdk.qsar.descriptors.molecular')[ type ]
  51. attr(dengine, 'descPkg') <- pkg
  52. dengine
  53. }
  54. .get.desc.all.classnames <- function(type = 'molecular') {
  55. dengine <- .get.desc.engine(type)
  56. type <- attr(dengine, "descType")
  57. pkg <- attr(dengine, "descPkg")
  58. cn <- .jcall(dengine, 'Ljava/util/List;', 'getDescriptorClassNames')
  59. size <- .jcall(cn, "I", "size")
  60. cnames <- list()
  61. for (i in 1:size)
  62. cnames[[i]] <- .jsimplify(.jcast(.jcall(cn, "Ljava/lang/Object;", "get", as.integer(i-1)), "java/lang/String"))
  63. #cnames <- gsub(paste(pkg, '.', sep='',collapse=''), '', unlist(cnames))
  64. unique(unlist(cnames) )
  65. }
  66. #' Get descriptor class names
  67. #'
  68. #' @param type A string indicating which class of descriptors to return. Specifying
  69. #' `"all"` will return class names for all molecular descriptors. Options include
  70. #' * topological
  71. #' * geometrical
  72. #' * hybrid
  73. #' * constitutional
  74. #' * protein
  75. #' * electronic
  76. #' @seealso \link{get.atomic.desc.names}
  77. #' @author Rajarshi Guha (\email{rajarshi.guha@@gmail.com})
  78. #' @export
  79. get.desc.names <- function(type = "all") {
  80. if (type == 'all') return(.get.desc.all.classnames())
  81. if (!(type %in% c('topological', 'geometrical', 'hybrid',
  82. 'constitutional', 'protein', 'electronic'))) {
  83. stop("Invalid descriptor category specified")
  84. }
  85. ret <- .jcall("org/guha/rcdk/descriptors/DescriptorUtilities", "[Ljava/lang/String;",
  86. "getDescriptorNamesByCategory", type)
  87. if ("org.openscience.cdk.qsar.descriptors.molecular.IPMolecularLearningDescriptor" %in% ret) {
  88. pos <- which(ret == "org.openscience.cdk.qsar.descriptors.molecular.IPMolecularLearningDescriptor")
  89. return(ret[-pos])
  90. } else {
  91. return(ret)
  92. }
  93. }
  94. #' List available descriptor categories
  95. #'
  96. #' @return A character vector listing available descriptor categories. This can be
  97. #' used in \link{get.desc.names}
  98. #' @seealso \link{get.desc.names}
  99. #' @author Rajarshi Guha (\email{rajarshi.guha@@gmail.com})
  100. #' @export
  101. get.desc.categories <- function() {
  102. cats <- .jcall("org/guha/rcdk/descriptors/DescriptorUtilities", "[Ljava/lang/String;",
  103. "getDescriptorCategories");
  104. gsub("Descriptor", "", cats)
  105. }
  106. #' Compute descriptor values for a set of molecules
  107. #'
  108. #' @param molecules A `list` of molecule objects
  109. #' @param which.desc A character vector listing descriptor class names
  110. #' @param verbose If `TRUE`, verbose output
  111. #' @return A `data.frame` with molecules in the rows and descriptors in the columns. If
  112. #' a descriptor value cannot be computed for a molecule, `NA` is returned.
  113. #' @author Rajarshi Guha (\email{rajarshi.guha@@gmail.com})
  114. #' @export
  115. eval.desc <- function(molecules, which.desc, verbose = FALSE) {
  116. if (class(molecules) != 'list') {
  117. jclassAttr <- attr(molecules, "jclass")
  118. if (jclassAttr != "org/openscience/cdk/interfaces/IAtomContainer") {
  119. stop("Must provide a list of molecule objects or a single molecule object")
  120. }
  121. molecules <- list(molecules)
  122. } else {
  123. jclassAttr <- lapply(molecules, attr, "jclass")
  124. if (any(jclassAttr != "org/openscience/cdk/interfaces/IAtomContainer")) {
  125. stop("molecule must be an IAtomContainer")
  126. }
  127. }
  128. dcob <- get.chem.object.builder()
  129. if (length(which.desc) == 1) {
  130. desc <- .jnew(which.desc)
  131. .jcall(desc, "V", "initialise", dcob)
  132. dnames <- .jcall(desc, "[Ljava/lang/String;", "getDescriptorNames")
  133. dnames <- gsub('-', '.', dnames)
  134. descvals <- lapply(molecules, function(a,b) {
  135. val <- tryCatch({.jcall(b, "Lorg/openscience/cdk/qsar/DescriptorValue;", "calculate", a)},
  136. warning = function(e) return(NA),
  137. error = function(e) return(NA))
  138. }, b=desc)
  139. vals <- lapply(descvals, .get.desc.values, nexpected = length(dnames))
  140. vals <- data.frame(do.call('rbind', vals))
  141. names(vals) <- dnames
  142. return(vals)
  143. } else {
  144. counter <- 1
  145. dl <- list()
  146. dnames <- c()
  147. for (desc in which.desc) {
  148. if (verbose) { cat("Processing ", gsub('org.openscience.cdk.qsar.descriptors.molecular.', '', desc)
  149. , "\n") }
  150. desc <- .jnew(desc)
  151. .jcall(desc, "V", "initialise", dcob)
  152. dnames <- .jcall(desc, "[Ljava/lang/String;", "getDescriptorNames")
  153. dnames <- gsub('-', '.', dnames)
  154. descvals <- lapply(molecules, function(a, check) {
  155. val <- tryCatch({.jcall(desc, "Lorg/openscience/cdk/qsar/DescriptorValue;", "calculate", a, check=check)})
  156. }, check=FALSE)
  157. vals <- lapply(descvals, .get.desc.values, nexpected = length(dnames))
  158. vals <- data.frame(do.call('rbind', vals))
  159. if (length(vals) == 1 && any(is.na(vals))) {
  160. vals <- as.data.frame(matrix(NA, nrow=1, ncol=length(dnames)))
  161. }
  162. names(vals) <- dnames
  163. ## idx <- which(is.na(names(vals)))
  164. ## if (length(idx) > 0) vals <- vals[,-idx]
  165. dl[[counter]] <- vals
  166. counter <- counter + 1
  167. }
  168. do.call('cbind', dl)
  169. }
  170. }
  171. #' Get class names for atomic descriptors
  172. #'
  173. #' @param type A string indicating which class of descriptors to return. Specifying
  174. #' `"all"` will return class names for all molecular descriptors. Options include
  175. #' * topological
  176. #' * geometrical
  177. #' * hybrid
  178. #' * constitutional
  179. #' * protein
  180. #' * electronic
  181. #' @return A character vector containing class names for atomic descriptors
  182. #' @author Rajarshi Guha (\email{rajarshi.guha@@gmail.com})
  183. #' @export
  184. get.atomic.desc.names <- function(type = "all") {
  185. if (type == 'all') return(.get.desc.all.classnames('atomic'))
  186. return(.jcall("org/guha/rcdk/descriptors/DescriptorUtilities", "[Ljava/lang/String;",
  187. "getDescriptorNamesByCategory", type))
  188. }
  189. #' Compute descriptors for each atom in a molecule
  190. #'
  191. #' @param molecule A molecule object
  192. #' @param which.desc A character vector of atomic descriptor class names
  193. #' @param verbose Optional. Default \code{FALSE}. Toggle verbosity.
  194. #' @return A `data.frame` with atoms in the rows and descriptors in the columns
  195. #' @export
  196. #' @author Rajarshi Guha (\email{rajarshi.guha@@gmail.com})
  197. #' @seealso \link{get.atomic.desc.names}
  198. eval.atomic.desc <- function(molecule, which.desc, verbose = FALSE) {
  199. if (attr(molecule, "jclass") != "org/openscience/cdk/interfaces/IAtomContainer") {
  200. stop("Must supply an IAtomContainer object")
  201. }
  202. if (length(which.desc) > 1) {
  203. counter <- 1
  204. dl <- list()
  205. for (desc in which.desc) {
  206. if (verbose) { cat("Processing ", gsub('org.openscience.cdk.qsar.descriptors.atomic.', '', desc)
  207. , "\n") }
  208. desc <- .jnew(desc)
  209. atoms = get.atoms(molecule)
  210. descvals <- lapply(atoms, function(a) {
  211. dval <- .jcall(desc, "Lorg/openscience/cdk/qsar/DescriptorValue;", "calculate", a, molecule, check=FALSE)
  212. if (!is.null(e<-.jgetEx())) {
  213. print("Java exception was raised")
  214. .jclear()
  215. dval <- NA
  216. }
  217. return(dval)
  218. })
  219. vals <- lapply(descvals, .get.desc.values)
  220. vals <- data.frame(do.call('rbind', vals))
  221. if (inherits(descvals[[1]], "jobjRef")) {
  222. names(vals) <- .jcall(descvals[[1]], "[Ljava/lang/String;", "getNames")
  223. } else {
  224. names(vals) <- gsub('org.openscience.cdk.qsar.descriptors.atomic.', '', desc)
  225. }
  226. dl[[counter]] <- vals
  227. counter <- counter + 1
  228. }
  229. do.call('cbind', dl)
  230. }
  231. }
  232. #' Compute TPSA for a molecule
  233. #' @param molecule A molecule object
  234. #' @return A double value representing the TPSA value
  235. #' @author Rajarshi Guha (\email{rajarshi.guha@@gmail.com})
  236. #' @export
  237. get.tpsa <- function(molecule) {
  238. if (attr(molecule, "jclass") != "org/openscience/cdk/interfaces/IAtomContainer") {
  239. stop("Must supply an IAtomContainer object")
  240. }
  241. desc <- .jnew("org.openscience.cdk.qsar.descriptors.molecular.TPSADescriptor")
  242. descval <- .jcall(desc, "Lorg/openscience/cdk/qsar/DescriptorValue;", "calculate", molecule)
  243. value <- .get.desc.values(descval, 1)
  244. return(value)
  245. }
  246. #' Compute ALogP for a molecule
  247. #' @param molecule A molecule object
  248. #' @return A double value representing the ALogP value
  249. #' @export
  250. #' @author Rajarshi Guha (\email{rajarshi.guha@@gmail.com})
  251. get.alogp <- function(molecule) {
  252. if (attr(molecule, "jclass") != "org/openscience/cdk/interfaces/IAtomContainer") {
  253. stop("Must supply an IAtomContainer object")
  254. }
  255. desc <- .jnew("org.openscience.cdk.qsar.descriptors.molecular.ALOGPDescriptor")
  256. descval <- .jcall(desc, "Lorg/openscience/cdk/qsar/DescriptorValue;", "calculate", molecule)
  257. value <- .get.desc.values(descval, 3)
  258. return(value[1])
  259. }
  260. #' Compute XLogP for a molecule
  261. #' @param molecule A molecule object
  262. #' @return A double value representing the XLogP value
  263. #' @author Rajarshi Guha (\email{rajarshi.guha@@gmail.com})
  264. #' @export
  265. get.xlogp <- function(molecule) {
  266. if (attr(molecule, "jclass") != "org/openscience/cdk/interfaces/IAtomContainer") {
  267. stop("Must supply an IAtomContainer object")
  268. }
  269. desc <- .jnew("org.openscience.cdk.qsar.descriptors.molecular.XLogPDescriptor")
  270. descval <- .jcall(desc, "Lorg/openscience/cdk/qsar/DescriptorValue;", "calculate", molecule)
  271. value <- .get.desc.values(descval, 3)
  272. return(value)
  273. }
  274. #' Compute volume of a molecule
  275. #'
  276. #' This method does not require 3D coordinates. As a result its an
  277. #' approximation
  278. #' @param molecule A molecule object
  279. #' @return A double value representing the volume
  280. #' @export
  281. #' @author Rajarshi Guha (\email{rajarshi.guha@@gmail.com})
  282. get.volume <- function(molecule) {
  283. if (attr(molecule, "jclass") != "org/openscience/cdk/interfaces/IAtomContainer") {
  284. stop("Must supply an IAtomContainer object")
  285. }
  286. return(J("org.openscience.cdk.geometry.volume.VABCVolume", "calculate", molecule))
  287. }