PageRenderTime 22ms CodeModel.GetById 12ms app.highlight 7ms RepoModel.GetById 0ms app.codeStats 0ms

/test-data/rgtestouts/rgManQQ/rgManQQtest1.R

https://bitbucket.org/h_morita_dbcls/galaxy-central
R | 158 lines | 110 code | 19 blank | 29 comment | 27 complexity | 67b374c87805140b3028668b5b533d23 MD5 | raw file
  1
  2# license not stated so I'm assuming LGPL is ok for my derived work?
  3# generalised so 3 core fields passed as parameters ross lazarus March 24 2010 for rgenetics
  4# Originally created as qqman with the following 
  5# attribution:
  6#--------------
  7# Stephen Turner
  8# http://StephenTurner.us/
  9# http://GettingGeneticsDone.blogspot.com/
 10
 11# Last updated: Tuesday, December 22, 2009
 12# R code for making manhattan plots and QQ plots from plink output files. 
 13# With GWAS data this can take a lot of memory. Recommended for use on 
 14# 64bit machines only, for now. 
 15
 16#
 17
 18library(ggplot2)
 19
 20coloursTouse = c('firebrick','darkblue','goldenrod','darkgreen')
 21# not too fugly but need a colour expert please...
 22
 23
 24manhattan = function(chrom=NULL,offset=NULL,pvals=NULL, title=NULL, max.y="max", 
 25   suggestiveline=0, genomewide=T, size.x.labels=9, size.y.labels=10, annotate=F, SNPlist=NULL,grey=0) {
 26
 27        if (annotate & is.null(SNPlist)) stop("You requested annotation but provided no SNPlist!")
 28        genomewideline=NULL # was genomewideline=-log10(5e-8)
 29        if (genomewide) { # use bonferroni since might be only a small region?
 30            genomewideline = -log10(0.05/length(pvals)) }
 31        d=data.frame(CHR=chrom,BP=offset,P=pvals)
 32
 33        #limit to only chrs 1-23?
 34        d=d[d$CHR %in% 1:23, ]
 35
 36        if ("CHR" %in% names(d) & "BP" %in% names(d) & "P" %in% names(d) ) {
 37                d=na.omit(d)
 38                d=d[d$P>0 & d$P<=1, ]
 39                d$logp = -log10(d$P)
 40
 41                d$pos=NA
 42                ticks=NULL
 43                lastbase=0
 44                chrlist = unique(d$CHR)
 45                nchr = length(chrlist) # may be any number?
 46                for (x in c(1:nchr)) {
 47                        i = chrlist[x] # need the chrom number - may not == index
 48                        if (x == 1) { # first time
 49                                d[d$CHR==i, ]$pos=d[d$CHR==i, ]$BP
 50                        }       else {
 51                                lastchr = chrlist[x-1] # previous whatever the list
 52                                lastbase=lastbase+tail(subset(d,CHR==lastchr)$BP, 1)
 53                                d[d$CHR==i, ]$pos=d[d$CHR==i, ]$BP+lastbase
 54                        }
 55                        ticks=c(ticks, d[d$CHR==i, ]$pos[floor(length(d[d$CHR==i, ]$pos)/2)+1])
 56                }
 57                ticklim=c(min(d$pos),max(d$pos))
 58                if (grey) {mycols=rep(c("gray10","gray60"),max(d$CHR))
 59                           } else {
 60                           mycols=rep(coloursTouse,max(d$CHR))
 61                           }
 62
 63                if (max.y=="max") maxy=ceiling(max(d$logp)) else maxy=max.y
 64                maxy = max(maxy,1.1*genomewideline)
 65                # if (maxy<8) maxy=8
 66                # only makes sense if genome wide is assumed - we could have a fine mapping region?  
 67                if (annotate) d.annotate=d[as.numeric(substr(d$SNP,3,100)) %in% SNPlist, ]
 68
 69                plot=qplot(pos,logp,data=d, ylab=expression(-log[10](italic(p))) , colour=factor(CHR))
 70                plot=plot+scale_x_continuous(name="Chromosome", breaks=ticks, labels=(unique(d$CHR)))
 71                plot=plot+scale_y_continuous(limits=c(0,maxy), breaks=1:maxy, labels=1:maxy)
 72                plot=plot+scale_colour_manual(value=mycols)
 73                if (annotate)   plot=plot + geom_point(data=d.annotate, colour=I("green3")) 
 74                plot=plot + opts(legend.position = "none") 
 75                plot=plot + opts(title=title)
 76                plot=plot+opts(
 77                        panel.background=theme_blank(), 
 78                        axis.text.x=theme_text(size=size.x.labels, colour="grey50"), 
 79                        axis.text.y=theme_text(size=size.y.labels, colour="grey50"), 
 80                        axis.ticks=theme_segment(colour=NA)
 81                )
 82                #plot = plot + opts(panel.grid.y.minor=theme_blank(),panel.grid.y.major=theme_blank())
 83                #plot = plot + opts(panel.grid.major=theme_blank())
 84                 
 85                if (suggestiveline) plot=plot+geom_hline(yintercept=suggestiveline,colour="blue", alpha=I(1/3))
 86                if (genomewideline) plot=plot+geom_hline(yintercept=genomewideline,colour="red")
 87                plot
 88        }       else {
 89                stop("Make sure your data frame contains columns CHR, BP, and P")
 90        }
 91}
 92
 93
 94
 95qq = function(pvector, title=NULL, spartan=F) {
 96        # Thanks to Daniel Shriner at NHGRI for providing this code for creating expected and observed values
 97        o = -log10(sort(pvector,decreasing=F))
 98        e = -log10( 1:length(o)/length(o) )
 99        # you could use base graphics
100        # plot(e,o,pch=19,cex=0.25, xlab=expression(Expected~~-log[10](italic(p))), 
101        # ylab=expression(Observed~~-log[10](italic(p))), xlim=c(0,max(e)), ylim=c(0,max(e)))
102        # lines(e,e,col="red")
103        #You'll need ggplot2 installed to do the rest
104        plot=qplot(e,o, xlim=c(0,max(e)), ylim=c(0,max(o))) + stat_abline(intercept=0,slope=1, col="red")
105        plot=plot+opts(title=title)
106        plot=plot+scale_x_continuous(name=expression(Expected~~-log[10](italic(p))))
107        plot=plot+scale_y_continuous(name=expression(Observed~~-log[10](italic(p))))
108        if (spartan) plot=plot+opts(panel.background=theme_rect(col="grey50"), panel.grid.minor=theme_blank())
109        plot
110}
111rgqqMan = function(infile="/share/shared/galaxy/test-data/smallwgaP.xls",chromcolumn=2, offsetcolumn=3, pvalscolumns=c(6,8), 
112title="rgManQQtest1",grey=0) {
113rawd = read.table(infile,head=T,sep='	')
114dn = names(rawd)
115cc = dn[chromcolumn]
116oc = dn[offsetcolumn] 
117nams = c(cc,oc)
118plen = length(rawd[,1])
119doreorder=1
120print(paste('###',plen,'values read from',infile,'read - now running plots',sep=' '))
121if (plen > 0) {
122  for (pvalscolumn in pvalscolumns) {
123  if (pvalscolumn > 0) 
124     {
125     cname = names(rawd)[pvalscolumn]
126     mytitle = paste('p=',cname,', ',title,sep='')
127     myfname = chartr(' ','_',cname)
128     myqqplot = qq(rawd[,pvalscolumn],title=mytitle)
129     print(paste('## qqplot on',cname,'done'))
130     if ((chromcolumn > 0) & (offsetcolumn > 0)) {
131         if (doreorder) {
132             rawd = rawd[do.call(order,rawd[nams]),]
133             # mmmf - suggested by http://onertipaday.blogspot.com/2007/08/sortingordering-dataframe-according.html
134             # in case not yet ordered
135             doreorder = 0
136             }
137         print(paste('## manhattan on',cname,'starting',chromcolumn,offsetcolumn,pvalscolumn))
138         mymanplot= manhattan(chrom=rawd[,chromcolumn],offset=rawd[,offsetcolumn],pvals=rawd[,pvalscolumn],title=mytitle,grey=grey)
139         print(paste('## manhattan plot on',cname,'done'))
140         ggsave(file=paste(myfname,"manhattan.png",sep='_'),mymanplot,width=11,height=8,dpi=100)
141         }
142         else {
143              print(paste('chrom column =',chromcolumn,'offset column = ',offsetcolumn,
144              'so no Manhattan plot - supply both chromosome and offset as numerics for Manhattan plots if required'))
145              } 
146     ggsave(file=paste(myfname,"qqplot.png",sep='_'),myqqplot,width=8,height=11,dpi=100)
147     } 
148  else {
149        print(paste('pvalue column =',pvalscolumn,'Cannot parse it so no plots possible'))
150      }
151  } # for pvalscolumn
152 } else { print('## Problem - no values available to plot - was there really a chromosome and offset column?') }
153}
154
155rgqqMan() 
156# execute with defaults as substituted
157
158#R script autogenerated by rgenetics/rgutils.py on 09/05/2010 21:23:26