PageRenderTime 173ms CodeModel.GetById 163ms app.highlight 7ms RepoModel.GetById 1ms app.codeStats 0ms

/rcdk/R/desc.R

http://github.com/rajarshi/cdkr
R | 324 lines | 208 code | 37 blank | 79 comment | 64 complexity | 58dceb51e46ee59785c09421339a1fd9 MD5 | raw file
  1
  2#' @keywords internal
  3.get.desc.values <- function(dval, nexpected) {
  4  if (!inherits(dval, "jobjRef")) {
  5    if (is.null(dval) || is.na(dval)) return(NA)
  6  }
  7
  8  if (!is.null(.jcall(dval, "Ljava/lang/Exception;", "getException"))) {
  9    return(rep(NA, nexpected))
 10  }
 11  
 12  nval <- numeric()
 13  if (!inherits(dval,'jobjRef') && is.na(dval)) {
 14    return(NA)
 15  }
 16  
 17  result <- .jcall(dval, "Lorg/openscience/cdk/qsar/result/IDescriptorResult;", "getValue")
 18  methods <- .jmethods(result)
 19
 20  if ("public double org.openscience.cdk.qsar.result.DoubleArrayResult.get(int)" %in% methods) {
 21    result <- .jcast(result, "org/openscience/cdk/qsar/result/DoubleArrayResult")
 22    len <- .jcall(result, "I", "length")
 23    for (i in 1:len) nval[i] <- .jcall(result, "D", "get", as.integer(i-1))
 24  } else if ("public int org.openscience.cdk.qsar.result.IntegerArrayResult.get(int)" %in% methods) {
 25    result <- .jcast(result, "org/openscience/cdk/qsar/result/IntegerArrayResult")    
 26    len <- .jcall(result, "I", "length")
 27    for (i in 1:len) nval[i] <- .jcall(result, "I", "get", as.integer(i-1))    
 28  }  else if ("public int org.openscience.cdk.qsar.result.IntegerResult.intValue()" %in% methods) {
 29    result <- .jcast(result, "org/openscience/cdk/qsar/result/IntegerResult")    
 30    nval <- .jcall(result, "I", "intValue")
 31  } else if ("public double org.openscience.cdk.qsar.result.DoubleResult.doubleValue()" %in% methods) {
 32    result <- .jcast(result, "org/openscience/cdk/qsar/result/DoubleResult")    
 33    nval <- .jcall(result, "D", "doubleValue")    
 34  }
 35
 36  return(nval)
 37}
 38
 39
 40#' @keywords internal
 41.get.desc.engine <- function(type = 'molecular') {
 42  if (!(type %in% c('molecular', 'atomic', 'bond'))) {
 43    stop('type must bond, molecular or atomic')
 44  }
 45  if (type == 'molecular') {
 46    interface <- J("org.openscience.cdk.qsar.IMolecularDescriptor")
 47  } else if (type == 'atomic') {
 48    interface <- J("org.openscience.cdk.qsar.IAtomicDescriptor")    
 49  } else if (type == 'bond') {
 50    interface <- J("org.openscience.cdk.qsar.IBondDescriptor")        
 51  }
 52  dklass <- interface@jobj
 53  dcob <- get.chem.object.builder()
 54  dengine <- .jnew('org/openscience/cdk/qsar/DescriptorEngine', dklass, dcob)
 55  attr(dengine, 'descType') <- type
 56  pkg <- c('org.openscience.cdk.qsar.descriptors.atomic',
 57           'org.openscience.cdk.qsar.descriptors.bond',
 58           'org.openscience.cdk.qsar.descriptors.molecular')[ type ]
 59  attr(dengine, 'descPkg') <- pkg
 60  dengine
 61}
 62
 63.get.desc.all.classnames <- function(type = 'molecular') {
 64  dengine <- .get.desc.engine(type)
 65  type <- attr(dengine, "descType")
 66  pkg <- attr(dengine, "descPkg")
 67  cn <- .jcall(dengine, 'Ljava/util/List;', 'getDescriptorClassNames')
 68  size <- .jcall(cn, "I", "size")
 69  cnames <- list()
 70  for (i in 1:size)
 71    cnames[[i]] <- .jsimplify(.jcast(.jcall(cn, "Ljava/lang/Object;", "get", as.integer(i-1)), "java/lang/String"))
 72                                        #cnames <- gsub(paste(pkg, '.', sep='',collapse=''), '',  unlist(cnames))
 73  unique(unlist(cnames)  )
 74}
 75
 76#' Get descriptor class names
 77#' 
 78#' @param type A string indicating which class of descriptors to return. Specifying
 79#' `"all"` will return class names for all molecular descriptors. Options include
 80#' * topological
 81#' * geometrical
 82#' * hybrid
 83#' * constitutional
 84#' * protein
 85#' * electronic
 86#' @seealso \link{get.atomic.desc.names}
 87#' @author Rajarshi Guha (\email{rajarshi.guha@@gmail.com})
 88#' @export
 89get.desc.names <- function(type = "all") {
 90  if (type == 'all') return(.get.desc.all.classnames())
 91  if (!(type %in% c('topological', 'geometrical', 'hybrid',
 92                    'constitutional', 'protein', 'electronic'))) {
 93    stop("Invalid descriptor category specified")
 94  }
 95  ret <- .jcall("org/guha/rcdk/descriptors/DescriptorUtilities", "[Ljava/lang/String;",
 96                "getDescriptorNamesByCategory", type)
 97  if ("org.openscience.cdk.qsar.descriptors.molecular.IPMolecularLearningDescriptor" %in% ret) {
 98    pos <- which(ret == "org.openscience.cdk.qsar.descriptors.molecular.IPMolecularLearningDescriptor")
 99    return(ret[-pos])
100  } else {
101    return(ret)
102  }
103}
104
105#' List available descriptor categories
106#' 
107#' @return A character vector listing available descriptor categories. This can be 
108#' used in \link{get.desc.names}
109#' @seealso \link{get.desc.names}
110#' @author Rajarshi Guha (\email{rajarshi.guha@@gmail.com})
111#' @export
112get.desc.categories <- function() {
113  cats <- .jcall("org/guha/rcdk/descriptors/DescriptorUtilities", "[Ljava/lang/String;",
114                 "getDescriptorCategories");
115  gsub("Descriptor", "", cats)
116}
117
118#' Compute descriptor values for a set of molecules
119#' 
120#' @param molecules A `list` of molecule objects
121#' @param which.desc A character vector listing descriptor class names
122#' @param verbose If `TRUE`, verbose output
123#' @return A `data.frame` with molecules in the rows and descriptors in the columns. If
124#' a descriptor value cannot be computed for a molecule, `NA` is returned.
125#' @author Rajarshi Guha (\email{rajarshi.guha@@gmail.com})
126#' @export
127eval.desc <- function(molecules, which.desc, verbose = FALSE) {
128  if (class(molecules) != 'list') {
129    jclassAttr <- attr(molecules, "jclass")
130    if (jclassAttr != "org/openscience/cdk/interfaces/IAtomContainer") {
131      stop("Must provide a list of molecule objects or a single molecule object")
132    }
133    molecules <- list(molecules)
134  } else {
135    jclassAttr <- lapply(molecules, attr, "jclass")
136    if (any(jclassAttr != "org/openscience/cdk/interfaces/IAtomContainer")) {
137      stop("molecule must be an IAtomContainer")
138    }
139  }
140
141  dcob <- get.chem.object.builder()
142  
143  if (length(which.desc) == 1) {
144    desc <- .jnew(which.desc)
145    .jcall(desc, "V", "initialise", dcob)
146    
147    dnames <- .jcall(desc, "[Ljava/lang/String;", "getDescriptorNames")
148    dnames <- gsub('-', '.', dnames)
149    
150    descvals <- lapply(molecules, function(a,b) {
151      val <- tryCatch({.jcall(b, "Lorg/openscience/cdk/qsar/DescriptorValue;", "calculate", a)},
152                      warning = function(e) return(NA),
153                      error = function(e) return(NA))
154    }, b=desc)
155
156
157    vals <- lapply(descvals, .get.desc.values, nexpected = length(dnames))
158    vals <- data.frame(do.call('rbind', vals))
159    names(vals) <- dnames 
160    return(vals)
161  } else {
162    counter <- 1
163    dl <- list()
164    dnames <- c()
165    for (desc in which.desc) {
166      if (verbose) { cat("Processing ", gsub('org.openscience.cdk.qsar.descriptors.molecular.', '', desc)
167                         , "\n") }
168      desc <- .jnew(desc)
169      .jcall(desc, "V", "initialise", dcob)
170      
171      dnames <- .jcall(desc, "[Ljava/lang/String;", "getDescriptorNames")
172      dnames <- gsub('-', '.', dnames)
173
174      descvals <- lapply(molecules, function(a, check) {
175        val <- tryCatch({.jcall(desc, "Lorg/openscience/cdk/qsar/DescriptorValue;", "calculate", a, check=check)})
176      }, check=FALSE)
177
178      vals <- lapply(descvals, .get.desc.values, nexpected = length(dnames))
179      vals <- data.frame(do.call('rbind', vals))
180
181      
182      if (length(vals) == 1 && any(is.na(vals))) {
183
184        vals <- as.data.frame(matrix(NA, nrow=1, ncol=length(dnames)))
185      }
186      
187      names(vals) <- dnames
188      ## idx <- which(is.na(names(vals)))
189      ## if (length(idx) > 0) vals <- vals[,-idx]
190      
191      dl[[counter]] <- vals
192      counter <- counter + 1
193    }
194    do.call('cbind', dl)
195  }
196}
197
198#' Get class names for atomic descriptors
199#' 
200#' @param type A string indicating which class of descriptors to return. Specifying
201#' `"all"` will return class names for all molecular descriptors. Options include
202#' * topological
203#' * geometrical
204#' * hybrid
205#' * constitutional
206#' * protein
207#' * electronic
208#' @return A character vector containing class names for atomic descriptors
209#' @author Rajarshi Guha (\email{rajarshi.guha@@gmail.com})
210#' @export
211get.atomic.desc.names <- function(type = "all") {
212  if (type == 'all') return(.get.desc.all.classnames('atomic'))
213  return(.jcall("org/guha/rcdk/descriptors/DescriptorUtilities", "[Ljava/lang/String;",
214                "getDescriptorNamesByCategory", type))
215}
216
217#' Compute descriptors for each atom in a molecule
218#' 
219#' @param molecule A molecule object
220#' @param which.desc A character vector of atomic descriptor class names
221#' @param verbose Optional. Default \code{FALSE}. Toggle verbosity.
222#' @return A `data.frame` with atoms in the rows and descriptors in the columns
223#' @export
224#' @author Rajarshi Guha (\email{rajarshi.guha@@gmail.com})
225#' @seealso \link{get.atomic.desc.names}
226eval.atomic.desc <- function(molecule, which.desc, verbose = FALSE) {
227  if (attr(molecule, "jclass") != "org/openscience/cdk/interfaces/IAtomContainer") {
228    stop("Must supply an IAtomContainer object")
229  }
230
231  if (length(which.desc) > 1) {
232    counter <- 1
233    dl <- list()
234    for (desc in which.desc) {
235      if (verbose) { cat("Processing ", gsub('org.openscience.cdk.qsar.descriptors.atomic.', '', desc)
236                         , "\n") }
237      desc <- .jnew(desc)
238      atoms = get.atoms(molecule)
239      descvals <- lapply(atoms, function(a) {
240        dval <- .jcall(desc, "Lorg/openscience/cdk/qsar/DescriptorValue;", "calculate", a, molecule, check=FALSE)
241        if (!is.null(e<-.jgetEx())) {
242          print("Java exception was raised")
243          .jclear()
244          dval <- NA
245        }
246        return(dval)
247      })
248      vals <- lapply(descvals, .get.desc.values)
249      vals <- data.frame(do.call('rbind', vals))
250
251      if (inherits(descvals[[1]], "jobjRef")) {
252        names(vals) <- .jcall(descvals[[1]], "[Ljava/lang/String;", "getNames")
253      } else {
254        names(vals) <- gsub('org.openscience.cdk.qsar.descriptors.atomic.', '', desc)
255      }
256      dl[[counter]] <- vals
257      counter <- counter + 1
258    }
259    do.call('cbind', dl)
260  }
261}
262
263#' Compute TPSA for a molecule
264#' @param molecule A molecule object
265#' @return A double value representing the TPSA value
266#' @author Rajarshi Guha (\email{rajarshi.guha@@gmail.com})
267#' @export
268get.tpsa <- function(molecule) {
269  if (attr(molecule, "jclass") != "org/openscience/cdk/interfaces/IAtomContainer") {
270    stop("Must supply an IAtomContainer object")
271  }
272
273  desc <- .jnew("org.openscience.cdk.qsar.descriptors.molecular.TPSADescriptor")
274  descval <- .jcall(desc, "Lorg/openscience/cdk/qsar/DescriptorValue;", "calculate", molecule)
275  value <- .get.desc.values(descval, 1)
276  return(value)
277}
278
279#' Compute ALogP for a molecule
280#' @param molecule A molecule object
281#' @return A double value representing the ALogP value
282#' @export
283#' @author Rajarshi Guha (\email{rajarshi.guha@@gmail.com})
284get.alogp <- function(molecule) {
285  if (attr(molecule, "jclass") != "org/openscience/cdk/interfaces/IAtomContainer") {
286    stop("Must supply an IAtomContainer object")
287  }
288
289  desc <- .jnew("org.openscience.cdk.qsar.descriptors.molecular.ALOGPDescriptor")
290  descval <- .jcall(desc, "Lorg/openscience/cdk/qsar/DescriptorValue;", "calculate", molecule)
291  value <- .get.desc.values(descval, 3)
292  return(value[1])
293}
294
295#' Compute XLogP for a molecule
296#' @param molecule A molecule object
297#' @return A double value representing the XLogP value
298#' @author Rajarshi Guha (\email{rajarshi.guha@@gmail.com})
299#' @export
300get.xlogp <- function(molecule) {
301  if (attr(molecule, "jclass") != "org/openscience/cdk/interfaces/IAtomContainer") {
302    stop("Must supply an IAtomContainer object")
303  }
304
305  desc <- .jnew("org.openscience.cdk.qsar.descriptors.molecular.XLogPDescriptor")
306  descval <- .jcall(desc, "Lorg/openscience/cdk/qsar/DescriptorValue;", "calculate", molecule)
307  value <- .get.desc.values(descval, 3)
308  return(value)
309}
310
311#' Compute volume of a molecule
312#' 
313#' This method does not require 3D coordinates. As a result its an 
314#' approximation
315#' @param molecule A molecule object
316#' @return A double value representing the volume
317#' @export
318#' @author Rajarshi Guha (\email{rajarshi.guha@@gmail.com})
319get.volume <- function(molecule) {
320  if (attr(molecule, "jclass") != "org/openscience/cdk/interfaces/IAtomContainer") {
321    stop("Must supply an IAtomContainer object")
322  }
323  return(J("org.openscience.cdk.geometry.volume.VABCVolume", "calculate", molecule))
324}