/test-data/rgtestouts/rgManQQ/rgManQQtest1.R

https://bitbucket.org/h_morita_dbcls/galaxy-central · R · 158 lines · 110 code · 19 blank · 29 comment · 27 complexity · 67b374c87805140b3028668b5b533d23 MD5 · raw file

  1. # license not stated so I'm assuming LGPL is ok for my derived work?
  2. # generalised so 3 core fields passed as parameters ross lazarus March 24 2010 for rgenetics
  3. # Originally created as qqman with the following
  4. # attribution:
  5. #--------------
  6. # Stephen Turner
  7. # http://StephenTurner.us/
  8. # http://GettingGeneticsDone.blogspot.com/
  9. # Last updated: Tuesday, December 22, 2009
  10. # R code for making manhattan plots and QQ plots from plink output files.
  11. # With GWAS data this can take a lot of memory. Recommended for use on
  12. # 64bit machines only, for now.
  13. #
  14. library(ggplot2)
  15. coloursTouse = c('firebrick','darkblue','goldenrod','darkgreen')
  16. # not too fugly but need a colour expert please...
  17. manhattan = function(chrom=NULL,offset=NULL,pvals=NULL, title=NULL, max.y="max",
  18. suggestiveline=0, genomewide=T, size.x.labels=9, size.y.labels=10, annotate=F, SNPlist=NULL,grey=0) {
  19. if (annotate & is.null(SNPlist)) stop("You requested annotation but provided no SNPlist!")
  20. genomewideline=NULL # was genomewideline=-log10(5e-8)
  21. if (genomewide) { # use bonferroni since might be only a small region?
  22. genomewideline = -log10(0.05/length(pvals)) }
  23. d=data.frame(CHR=chrom,BP=offset,P=pvals)
  24. #limit to only chrs 1-23?
  25. d=d[d$CHR %in% 1:23, ]
  26. if ("CHR" %in% names(d) & "BP" %in% names(d) & "P" %in% names(d) ) {
  27. d=na.omit(d)
  28. d=d[d$P>0 & d$P<=1, ]
  29. d$logp = -log10(d$P)
  30. d$pos=NA
  31. ticks=NULL
  32. lastbase=0
  33. chrlist = unique(d$CHR)
  34. nchr = length(chrlist) # may be any number?
  35. for (x in c(1:nchr)) {
  36. i = chrlist[x] # need the chrom number - may not == index
  37. if (x == 1) { # first time
  38. d[d$CHR==i, ]$pos=d[d$CHR==i, ]$BP
  39. } else {
  40. lastchr = chrlist[x-1] # previous whatever the list
  41. lastbase=lastbase+tail(subset(d,CHR==lastchr)$BP, 1)
  42. d[d$CHR==i, ]$pos=d[d$CHR==i, ]$BP+lastbase
  43. }
  44. ticks=c(ticks, d[d$CHR==i, ]$pos[floor(length(d[d$CHR==i, ]$pos)/2)+1])
  45. }
  46. ticklim=c(min(d$pos),max(d$pos))
  47. if (grey) {mycols=rep(c("gray10","gray60"),max(d$CHR))
  48. } else {
  49. mycols=rep(coloursTouse,max(d$CHR))
  50. }
  51. if (max.y=="max") maxy=ceiling(max(d$logp)) else maxy=max.y
  52. maxy = max(maxy,1.1*genomewideline)
  53. # if (maxy<8) maxy=8
  54. # only makes sense if genome wide is assumed - we could have a fine mapping region?
  55. if (annotate) d.annotate=d[as.numeric(substr(d$SNP,3,100)) %in% SNPlist, ]
  56. plot=qplot(pos,logp,data=d, ylab=expression(-log[10](italic(p))) , colour=factor(CHR))
  57. plot=plot+scale_x_continuous(name="Chromosome", breaks=ticks, labels=(unique(d$CHR)))
  58. plot=plot+scale_y_continuous(limits=c(0,maxy), breaks=1:maxy, labels=1:maxy)
  59. plot=plot+scale_colour_manual(value=mycols)
  60. if (annotate) plot=plot + geom_point(data=d.annotate, colour=I("green3"))
  61. plot=plot + opts(legend.position = "none")
  62. plot=plot + opts(title=title)
  63. plot=plot+opts(
  64. panel.background=theme_blank(),
  65. axis.text.x=theme_text(size=size.x.labels, colour="grey50"),
  66. axis.text.y=theme_text(size=size.y.labels, colour="grey50"),
  67. axis.ticks=theme_segment(colour=NA)
  68. )
  69. #plot = plot + opts(panel.grid.y.minor=theme_blank(),panel.grid.y.major=theme_blank())
  70. #plot = plot + opts(panel.grid.major=theme_blank())
  71. if (suggestiveline) plot=plot+geom_hline(yintercept=suggestiveline,colour="blue", alpha=I(1/3))
  72. if (genomewideline) plot=plot+geom_hline(yintercept=genomewideline,colour="red")
  73. plot
  74. } else {
  75. stop("Make sure your data frame contains columns CHR, BP, and P")
  76. }
  77. }
  78. qq = function(pvector, title=NULL, spartan=F) {
  79. # Thanks to Daniel Shriner at NHGRI for providing this code for creating expected and observed values
  80. o = -log10(sort(pvector,decreasing=F))
  81. e = -log10( 1:length(o)/length(o) )
  82. # you could use base graphics
  83. # plot(e,o,pch=19,cex=0.25, xlab=expression(Expected~~-log[10](italic(p))),
  84. # ylab=expression(Observed~~-log[10](italic(p))), xlim=c(0,max(e)), ylim=c(0,max(e)))
  85. # lines(e,e,col="red")
  86. #You'll need ggplot2 installed to do the rest
  87. plot=qplot(e,o, xlim=c(0,max(e)), ylim=c(0,max(o))) + stat_abline(intercept=0,slope=1, col="red")
  88. plot=plot+opts(title=title)
  89. plot=plot+scale_x_continuous(name=expression(Expected~~-log[10](italic(p))))
  90. plot=plot+scale_y_continuous(name=expression(Observed~~-log[10](italic(p))))
  91. if (spartan) plot=plot+opts(panel.background=theme_rect(col="grey50"), panel.grid.minor=theme_blank())
  92. plot
  93. }
  94. rgqqMan = function(infile="/share/shared/galaxy/test-data/smallwgaP.xls",chromcolumn=2, offsetcolumn=3, pvalscolumns=c(6,8),
  95. title="rgManQQtest1",grey=0) {
  96. rawd = read.table(infile,head=T,sep=' ')
  97. dn = names(rawd)
  98. cc = dn[chromcolumn]
  99. oc = dn[offsetcolumn]
  100. nams = c(cc,oc)
  101. plen = length(rawd[,1])
  102. doreorder=1
  103. print(paste('###',plen,'values read from',infile,'read - now running plots',sep=' '))
  104. if (plen > 0) {
  105. for (pvalscolumn in pvalscolumns) {
  106. if (pvalscolumn > 0)
  107. {
  108. cname = names(rawd)[pvalscolumn]
  109. mytitle = paste('p=',cname,', ',title,sep='')
  110. myfname = chartr(' ','_',cname)
  111. myqqplot = qq(rawd[,pvalscolumn],title=mytitle)
  112. print(paste('## qqplot on',cname,'done'))
  113. if ((chromcolumn > 0) & (offsetcolumn > 0)) {
  114. if (doreorder) {
  115. rawd = rawd[do.call(order,rawd[nams]),]
  116. # mmmf - suggested by http://onertipaday.blogspot.com/2007/08/sortingordering-dataframe-according.html
  117. # in case not yet ordered
  118. doreorder = 0
  119. }
  120. print(paste('## manhattan on',cname,'starting',chromcolumn,offsetcolumn,pvalscolumn))
  121. mymanplot= manhattan(chrom=rawd[,chromcolumn],offset=rawd[,offsetcolumn],pvals=rawd[,pvalscolumn],title=mytitle,grey=grey)
  122. print(paste('## manhattan plot on',cname,'done'))
  123. ggsave(file=paste(myfname,"manhattan.png",sep='_'),mymanplot,width=11,height=8,dpi=100)
  124. }
  125. else {
  126. print(paste('chrom column =',chromcolumn,'offset column = ',offsetcolumn,
  127. 'so no Manhattan plot - supply both chromosome and offset as numerics for Manhattan plots if required'))
  128. }
  129. ggsave(file=paste(myfname,"qqplot.png",sep='_'),myqqplot,width=8,height=11,dpi=100)
  130. }
  131. else {
  132. print(paste('pvalue column =',pvalscolumn,'Cannot parse it so no plots possible'))
  133. }
  134. } # for pvalscolumn
  135. } else { print('## Problem - no values available to plot - was there really a chromosome and offset column?') }
  136. }
  137. rgqqMan()
  138. # execute with defaults as substituted
  139. #R script autogenerated by rgenetics/rgutils.py on 09/05/2010 21:23:26