PageRenderTime 25ms CodeModel.GetById 29ms RepoModel.GetById 0ms app.codeStats 0ms

/BRI_RNAseq_truseq_plus_trinity_optimized/R/3.2.2/site-library/biovizBase/examples/data-gen.R

https://gitlab.com/pooja043/Globus_Docker_1
R | 172 lines | 128 code | 11 blank | 33 comment | 2 complexity | 7d2ed09dc97dbcb909a3ebca73164fbe MD5 | raw file
  1. library(biovizBase)
  2. library(GenomicRanges)
  3. ## with no
  4. hg19IdeogramCyto <- getIdeogram("hg19", cytobands = TRUE)
  5. seqinfo(hg19Ideogram)
  6. hg19Ideogram <- getIdeogram("hg19", cytobands = FALSE)
  7. ## then for genesymbol
  8. library("org.Hs.eg.db")
  9. ls("package:org.Hs.eg.db")
  10. available.dbschemas()
  11. symbol_id <- mappedRkeys(org.Hs.egSYMBOL)
  12. head(symbol_id)
  13. length(symbol_id)
  14. gene_id <- mget(symbol_id, revmap(org.Hs.egSYMBOL), ifnotfound=NA)
  15. gene_id <- unlist2(gene_id)
  16. gene_id <- gene_id[!is.na(gene_id)]
  17. chr <- toTable(org.Hs.egCHR[gene_id])
  18. chrloc <- toTable(org.Hs.egCHRLOC[gene_id])
  19. chrlocend <- toTable(org.Hs.egCHRLOCEND[gene_id])
  20. identical(chrloc$gene_id, chrlocend$gene_id)
  21. chrs <- chrloc
  22. head(chrlocend)
  23. chrs$end_location <- chrlocend$end_location
  24. head(chrs)
  25. res <- chrs
  26. sum(duplicated(gene_id))# no duplicate
  27. idx <- match(res$gene_id, gene_id)
  28. res$symbol <- names(gene_id[idx])
  29. sum(with(res, start_location > 0 & end_location < 0))# good
  30. sum(with(res, start_location < 0 & end_location > 0))# good
  31. ## merge(merge(data.frame(gene_id=gene_id, symbol=names(gene_id)), chrloc),
  32. ## chrlocend)
  33. ## df <- merge(merge(merge(data.frame(gene_id = gene_id,
  34. ## symbol = names(gene_id)), chr), chrloc), chrlocend)
  35. ## do I need to subset it? No.
  36. ## add extra column to indicate sense or antisense.
  37. ## FOXD4L2,
  38. res$sense <- with(res, start_location>=0&end_location>=0)
  39. head(res)
  40. res.sense <- res[res$sense,]
  41. sum(with(res.sense, start_location > end_location))
  42. res.antisense <- res[!res$sense,]
  43. sum(with(res.antisense, start_location < end_location))
  44. head(res)
  45. ## need to make sure you get this right
  46. res$strand <- strand(!res$sense)
  47. head(res)
  48. sum(with(res, start_location == end_location))
  49. ## df2 <- subset(df, start_location>=0&end_location>=0)
  50. ## strand <- ifelse(df2$start_location>df2$end_location, "-", "+")
  51. ## strand[df2$start_location == df2$end_location] <- "*"
  52. ## id <- strand == "-"
  53. ## tmp <- df2[id,"start_location"]
  54. ## df2[id,"start_location"] <- df2[id,"end_location"]
  55. ## df2[id,"end_location"] <- tmp
  56. ## df2$strand <- strand
  57. ## df2 <- df2[!duplicated(df2),]
  58. ## for ensembl
  59. ensembl_id <- unlist(mget(gene_id, org.Hs.egENSEMBL))
  60. head(ensembl_id)
  61. idx <- match(res$gene_id, names(ensembl_id))
  62. res$ensembl_id <- ensembl_id[idx]
  63. head(res)
  64. library(GenomicRanges)
  65. genesymbol <- with(res,GRanges(seqnames = paste("chr",Chromosome,sep = ""),
  66. IRanges(abs(start_location), abs(end_location)),
  67. strand = strand,
  68. symbol = symbol,
  69. ensembl_id = ensembl_id))
  70. names(genesymbol) <- values(genesymbol)$symbol
  71. save(genesymbol, file = "~/Datas/rdas/genesymbol.rda")
  72. save(genesymbol, file = "~/Codes/gitrepos/biovizBase/data/genesymbol.rda")
  73. save(hg19IdeogramCyto, file = "~/Codes/gitrepos/biovizBase/data/hg19IdeogramCyto.rda")
  74. save(hg19Ideogram, file = "~/Codes/gitrepos/biovizBase/data/hg19Ideogram.rda")
  75. id <- unlist2(mget("FOXD4L2", revmap(org.Hs.egSYMBOL)))
  76. id <- revmap(org.Hs.egSYMBOL)[["FOXD4L2"]]
  77. org.Hs.egCHRLOC[[id]]
  78. org.Hs.egCHRLOCEND[[id]]
  79. ## new data generation method, with seqlengths
  80. ideogramCyto <- getIdeogram(c("hg19", "hg18", "mm10", "mm9"))
  81. ideogram <- getIdeogram(c("hg19", "hg18", "mm10", "mm9"), cytoband = FALSE)
  82. ideo <- ideogram
  83. ideoCyto <- ideogramCyto
  84. save(ideo, file = "../../../../bioc-devel/biovizBase/data/ideo.rda")
  85. save(ideoCyto, file = "/Users/tengfei/Code/svnrepos/bioc-devel/biovizBase/data/ideoCyto.rda")
  86. hg19 <- ideoCyto$hg19
  87. ideoCyto$hg18 <- keepSeqlevels(ideoCyto$hg18, paste0("chr", c(1:22, "X", "Y")))
  88. ideoCyto$hg19 <- keepSeqlevels(ideoCyto$hg19, paste0("chr", c(1:22, "X", "Y")))
  89. ideoCyto$mm10 <- keepSeqlevels(ideoCyto$mm10, paste0("chr", c(1:19, "X", "Y")))
  90. ideoCyto$mm9 <- keepSeqlevels(ideoCyto$mm9, paste0("chr", c(1:19, "X", "Y")))
  91. ## for circular view
  92. crc1 <- system.file("extdata", "crc1-missense.csv", package = "biovizBase")
  93. crc1 <- read.csv(crc1)
  94. library(GenomicRanges)
  95. mut.gr <- with(crc1,GRanges(Chromosome, IRanges(Start_position, End_position),
  96. strand = Strand))
  97. values(mut.gr) <- subset(crc1, select = -c(Start_position, End_position, Chromosome))
  98. data("hg19Ideogram", package = "biovizBase")
  99. seqs <- seqlengths(hg19Ideogram)
  100. ## subset_chr
  101. chr.sub <- paste("chr", 1:22, sep = "")
  102. ## levels tweak
  103. seqlevels(mut.gr) <- c(chr.sub, "chrX")
  104. mut.gr <- keepSeqlevels(mut.gr, chr.sub)
  105. seqs.sub <- seqs[chr.sub]
  106. ## remove wrong position
  107. bidx <- end(mut.gr) <= seqs.sub[match(as.character(seqnames(mut.gr)),
  108. names(seqs.sub))]
  109. mut.gr <- mut.gr[which(bidx)]
  110. ## assign_seqlengths
  111. seqlengths(mut.gr) <- seqs.sub
  112. ## reanme to shorter names
  113. new.names <- as.character(1:22)
  114. names(new.names) <- paste("chr", new.names, sep = "")
  115. new.names
  116. mut.gr.new <- renameSeqlevels(mut.gr, new.names)
  117. head(mut.gr.new)
  118. hg19Ideo <- hg19Ideogram
  119. hg19Ideo <- keepSeqlevels(hg19Ideogram, chr.sub)
  120. hg19Ideo <- rena
  121. rearr <- read.csv(system.file("extdata", "crc-rearrangment.csv", package = "biovizBase"))
  122. ## start position
  123. gr1 <- with(rearr, GRanges(chr1, IRanges(pos1, width = 1)))
  124. ## end position
  125. gr2 <- with(rearr, GRanges(chr2, IRanges(pos2, width = 1)))
  126. ## add extra column
  127. nms <- colnames(rearr)
  128. .extra.nms <- setdiff(nms, c("chr1", "chr2", "pos1", "pos2"))
  129. values(gr1) <- rearr[,.extra.nms]
  130. ## remove out-of-limits data
  131. seqs <- as.character(seqnames(gr1))
  132. .mx <- seqlengths(hg19Ideo)[seqs]
  133. idx1 <- start(gr1) > .mx
  134. seqs <- as.character(seqnames(gr2))
  135. .mx <- seqlengths(hg19Ideo)[seqs]
  136. idx2 <- start(gr2) > .mx
  137. idx <- !idx1 & !idx2
  138. gr1 <- gr1[idx]
  139. seqlengths(gr1) <- seqlengths(hg19Ideo)
  140. gr2 <- gr2[idx]
  141. seqlengths(gr2) <- seqlengths(hg19Ideo)
  142. values(gr1)$to.gr <- gr2
  143. ## rename to gr
  144. gr <- gr1
  145. values(gr)$rearrangements <- ifelse(as.character(seqnames(gr))
  146. == as.character(seqnames((values(gr)$to.gr))),
  147. "intrachromosomal", "interchromosomal")
  148. crc.gr <- gr
  149. crc.gr
  150. mut.gr <- mut.gr.new
  151. hg19sub <- hg19Ideo
  152. save(crc.gr, mut.gr, hg19sub, file = "/Users/tengfei/Code/svnrepos/bioc-devel/biovizBase/data/CRC.rda")
  153. snp <- read.table("/Users/tengfei/Downloads/plink.assoc.txt", header = TRUE)
  154. N <- 2000
  155. set.seed(123)
  156. snp <- snp[sample(1:nrow(snp), size = N, replace = FALSE),]
  157. rownames(snp) <- NULL
  158. head(snp)
  159. write.table(snp,
  160. file = "/Users/tengfei/Code/svnrepos/bioc-devel/biovizBase/inst/extdata/plink.assoc.sub.txt", row.names = FALSE)
  161. ## https://github.com/stephenturner/qqman