/test-data/rgtestouts/rgManQQ/rgManQQtest1.R
R | 158 lines | 110 code | 19 blank | 29 comment | 27 complexity | 67b374c87805140b3028668b5b533d23 MD5 | raw file
1 2# license not stated so I'm assuming LGPL is ok for my derived work? 3# generalised so 3 core fields passed as parameters ross lazarus March 24 2010 for rgenetics 4# Originally created as qqman with the following 5# attribution: 6#-------------- 7# Stephen Turner 8# http://StephenTurner.us/ 9# http://GettingGeneticsDone.blogspot.com/ 10 11# Last updated: Tuesday, December 22, 2009 12# R code for making manhattan plots and QQ plots from plink output files. 13# With GWAS data this can take a lot of memory. Recommended for use on 14# 64bit machines only, for now. 15 16# 17 18library(ggplot2) 19 20coloursTouse = c('firebrick','darkblue','goldenrod','darkgreen') 21# not too fugly but need a colour expert please... 22 23 24manhattan = function(chrom=NULL,offset=NULL,pvals=NULL, title=NULL, max.y="max", 25 suggestiveline=0, genomewide=T, size.x.labels=9, size.y.labels=10, annotate=F, SNPlist=NULL,grey=0) { 26 27 if (annotate & is.null(SNPlist)) stop("You requested annotation but provided no SNPlist!") 28 genomewideline=NULL # was genomewideline=-log10(5e-8) 29 if (genomewide) { # use bonferroni since might be only a small region? 30 genomewideline = -log10(0.05/length(pvals)) } 31 d=data.frame(CHR=chrom,BP=offset,P=pvals) 32 33 #limit to only chrs 1-23? 34 d=d[d$CHR %in% 1:23, ] 35 36 if ("CHR" %in% names(d) & "BP" %in% names(d) & "P" %in% names(d) ) { 37 d=na.omit(d) 38 d=d[d$P>0 & d$P<=1, ] 39 d$logp = -log10(d$P) 40 41 d$pos=NA 42 ticks=NULL 43 lastbase=0 44 chrlist = unique(d$CHR) 45 nchr = length(chrlist) # may be any number? 46 for (x in c(1:nchr)) { 47 i = chrlist[x] # need the chrom number - may not == index 48 if (x == 1) { # first time 49 d[d$CHR==i, ]$pos=d[d$CHR==i, ]$BP 50 } else { 51 lastchr = chrlist[x-1] # previous whatever the list 52 lastbase=lastbase+tail(subset(d,CHR==lastchr)$BP, 1) 53 d[d$CHR==i, ]$pos=d[d$CHR==i, ]$BP+lastbase 54 } 55 ticks=c(ticks, d[d$CHR==i, ]$pos[floor(length(d[d$CHR==i, ]$pos)/2)+1]) 56 } 57 ticklim=c(min(d$pos),max(d$pos)) 58 if (grey) {mycols=rep(c("gray10","gray60"),max(d$CHR)) 59 } else { 60 mycols=rep(coloursTouse,max(d$CHR)) 61 } 62 63 if (max.y=="max") maxy=ceiling(max(d$logp)) else maxy=max.y 64 maxy = max(maxy,1.1*genomewideline) 65 # if (maxy<8) maxy=8 66 # only makes sense if genome wide is assumed - we could have a fine mapping region? 67 if (annotate) d.annotate=d[as.numeric(substr(d$SNP,3,100)) %in% SNPlist, ] 68 69 plot=qplot(pos,logp,data=d, ylab=expression(-log[10](italic(p))) , colour=factor(CHR)) 70 plot=plot+scale_x_continuous(name="Chromosome", breaks=ticks, labels=(unique(d$CHR))) 71 plot=plot+scale_y_continuous(limits=c(0,maxy), breaks=1:maxy, labels=1:maxy) 72 plot=plot+scale_colour_manual(value=mycols) 73 if (annotate) plot=plot + geom_point(data=d.annotate, colour=I("green3")) 74 plot=plot + opts(legend.position = "none") 75 plot=plot + opts(title=title) 76 plot=plot+opts( 77 panel.background=theme_blank(), 78 axis.text.x=theme_text(size=size.x.labels, colour="grey50"), 79 axis.text.y=theme_text(size=size.y.labels, colour="grey50"), 80 axis.ticks=theme_segment(colour=NA) 81 ) 82 #plot = plot + opts(panel.grid.y.minor=theme_blank(),panel.grid.y.major=theme_blank()) 83 #plot = plot + opts(panel.grid.major=theme_blank()) 84 85 if (suggestiveline) plot=plot+geom_hline(yintercept=suggestiveline,colour="blue", alpha=I(1/3)) 86 if (genomewideline) plot=plot+geom_hline(yintercept=genomewideline,colour="red") 87 plot 88 } else { 89 stop("Make sure your data frame contains columns CHR, BP, and P") 90 } 91} 92 93 94 95qq = function(pvector, title=NULL, spartan=F) { 96 # Thanks to Daniel Shriner at NHGRI for providing this code for creating expected and observed values 97 o = -log10(sort(pvector,decreasing=F)) 98 e = -log10( 1:length(o)/length(o) ) 99 # you could use base graphics 100 # plot(e,o,pch=19,cex=0.25, xlab=expression(Expected~~-log[10](italic(p))), 101 # ylab=expression(Observed~~-log[10](italic(p))), xlim=c(0,max(e)), ylim=c(0,max(e))) 102 # lines(e,e,col="red") 103 #You'll need ggplot2 installed to do the rest 104 plot=qplot(e,o, xlim=c(0,max(e)), ylim=c(0,max(o))) + stat_abline(intercept=0,slope=1, col="red") 105 plot=plot+opts(title=title) 106 plot=plot+scale_x_continuous(name=expression(Expected~~-log[10](italic(p)))) 107 plot=plot+scale_y_continuous(name=expression(Observed~~-log[10](italic(p)))) 108 if (spartan) plot=plot+opts(panel.background=theme_rect(col="grey50"), panel.grid.minor=theme_blank()) 109 plot 110} 111rgqqMan = function(infile="/share/shared/galaxy/test-data/smallwgaP.xls",chromcolumn=2, offsetcolumn=3, pvalscolumns=c(6,8), 112title="rgManQQtest1",grey=0) { 113rawd = read.table(infile,head=T,sep=' ') 114dn = names(rawd) 115cc = dn[chromcolumn] 116oc = dn[offsetcolumn] 117nams = c(cc,oc) 118plen = length(rawd[,1]) 119doreorder=1 120print(paste('###',plen,'values read from',infile,'read - now running plots',sep=' ')) 121if (plen > 0) { 122 for (pvalscolumn in pvalscolumns) { 123 if (pvalscolumn > 0) 124 { 125 cname = names(rawd)[pvalscolumn] 126 mytitle = paste('p=',cname,', ',title,sep='') 127 myfname = chartr(' ','_',cname) 128 myqqplot = qq(rawd[,pvalscolumn],title=mytitle) 129 print(paste('## qqplot on',cname,'done')) 130 if ((chromcolumn > 0) & (offsetcolumn > 0)) { 131 if (doreorder) { 132 rawd = rawd[do.call(order,rawd[nams]),] 133 # mmmf - suggested by http://onertipaday.blogspot.com/2007/08/sortingordering-dataframe-according.html 134 # in case not yet ordered 135 doreorder = 0 136 } 137 print(paste('## manhattan on',cname,'starting',chromcolumn,offsetcolumn,pvalscolumn)) 138 mymanplot= manhattan(chrom=rawd[,chromcolumn],offset=rawd[,offsetcolumn],pvals=rawd[,pvalscolumn],title=mytitle,grey=grey) 139 print(paste('## manhattan plot on',cname,'done')) 140 ggsave(file=paste(myfname,"manhattan.png",sep='_'),mymanplot,width=11,height=8,dpi=100) 141 } 142 else { 143 print(paste('chrom column =',chromcolumn,'offset column = ',offsetcolumn, 144 'so no Manhattan plot - supply both chromosome and offset as numerics for Manhattan plots if required')) 145 } 146 ggsave(file=paste(myfname,"qqplot.png",sep='_'),myqqplot,width=8,height=11,dpi=100) 147 } 148 else { 149 print(paste('pvalue column =',pvalscolumn,'Cannot parse it so no plots possible')) 150 } 151 } # for pvalscolumn 152 } else { print('## Problem - no values available to plot - was there really a chromosome and offset column?') } 153} 154 155rgqqMan() 156# execute with defaults as substituted 157 158#R script autogenerated by rgenetics/rgutils.py on 09/05/2010 21:23:26