/rcdk/R/frags.R

http://github.com/rajarshi/cdkr · R · 77 lines · 39 code · 5 blank · 33 comment · 10 complexity · e13e3d5373213417ca109a3065e64c12 MD5 · raw file

  1. #' Generate Bemis-Murcko Fragments
  2. #'
  3. #' Fragment the input molecule using the Bemis-Murcko scheme
  4. #'
  5. #' A variety of methods for fragmenting molecules are available ranging from
  6. #' exhaustive, rings to more specific methods such as Murcko frameworks. Fragmenting a
  7. #' collection of molecules can be a useful for a variety of analyses. In addition
  8. #' fragment based analysis can be a useful and faster alternative to traditional
  9. #' clustering of the whole collection, especially when it is large.
  10. #'
  11. #' Note that exhaustive fragmentation of large molecules (with many single bonds) can become
  12. #' time consuming.
  13. #'
  14. #' @param mols A list of `jobjRef` objects of Java class `IAtomContainer`
  15. #' @param min.frag.size The smallest fragment to consider (in terms of heavy atoms)
  16. #' @param as.smiles If `TRUE` return the fragments as SMILES strings. If not, then fragments
  17. #' are returned as `jobjRef` objects
  18. #' @param single.framework If `TRUE`, then a single framework (i.e., the framework consisting of the
  19. #' union of all ring systems and linkers) is returned for each molecule. Otherwise, all combinations
  20. #' of ring systems and linkers are returned
  21. #' @return Returns a list with each element being a list with two elements: `rings` and
  22. #' `frameworks`. Each of these elements is either a character vector of SMILES strings or a list of
  23. #' `IAtomContainer` objects.
  24. #' @author Rajarshi Guha (\email{rajarshi.guha@@gmail.com})
  25. #' @seealso [get.exhuastive.fragments()]
  26. #' @export
  27. #' @examples
  28. #' mol <- parse.smiles('c1ccc(cc1)CN(c2cc(ccc2[N+](=O)[O-])c3c(nc(nc3CC)N)N)C')[[1]]
  29. #' mf1 <- get.murcko.fragments(mol, as.smiles=TRUE, single.framework=TRUE)
  30. #' mf1 <- get.murcko.fragments(mol, as.smiles=TRUE, single.framework=FALSE)
  31. get.murcko.fragments <- function(mols, min.frag.size = 6, as.smiles = TRUE, single.framework = FALSE) {
  32. if (!is.list(mols)) mols <- list(mols)
  33. klasses <- unlist(lapply(mols, function(x) attr(x, "jclass")))
  34. if (!all(klasses == "org/openscience/cdk/interfaces/IAtomContainer")) {
  35. stop("Must supply an IAtomContainer object")
  36. }
  37. fragmenter <- .jnew("org/openscience/cdk/fragment/MurckoFragmenter",
  38. single.framework, as.integer(min.frag.size))
  39. ret <- lapply(mols, function(x) {
  40. .jcall(fragmenter, "V", "generateFragments", x)
  41. if (as.smiles) {
  42. rings <- .jcall(fragmenter, "[S", "getRingSystems")
  43. frames <- .jcall(fragmenter, "[S", "getFrameworks")
  44. } else {
  45. rings <- .jcall(fragmenter, "[Lorg/openscience/cdk/interfaces/IAtomContainer;", "getRingSystemsAsContainers")
  46. frames <- .jcall(fragmenter, "[Lorg/openscience/cdk/interfaces/IAtomContainer;", "getFrameworksAsContainers")
  47. }
  48. return(list(rings = rings, frameworks = frames))
  49. })
  50. return(ret)
  51. }
  52. #' @inherit get.murcko.fragments
  53. #' @return returns a list of length equal to the number of input molecules. Each
  54. #' element is a character vector of SMILES strings or a list of `jobjRef` objects.
  55. get.exhaustive.fragments <- function(mols, min.frag.size = 6, as.smiles = TRUE) {
  56. if (!is.list(mols)) mols <- list(mols)
  57. klasses <- unlist(lapply(mols, function(x) attr(x, "jclass")))
  58. if (!all(klasses == "org/openscience/cdk/interfaces/IAtomContainer")) {
  59. stop("Must supply an IAtomContainer object")
  60. }
  61. fragmenter <- .jnew("org/openscience/cdk/fragment/ExhaustiveFragmenter", as.integer(min.frag.size))
  62. ret <- lapply(mols, function(x) {
  63. .jcall(fragmenter, "V", "generateFragments", x)
  64. if (as.smiles) {
  65. fragments <- .jcall(fragmenter, "[S", "getFragments")
  66. } else {
  67. fragments <- .jcall(fragmenter, "[Lorg/openscience/cdk/interfaces/IAtomContainer;", "getFragmentsSystemsAsContainers")
  68. }
  69. return(fragments)
  70. })
  71. return(ret)
  72. }