PageRenderTime 24ms CodeModel.GetById 16ms app.highlight 4ms RepoModel.GetById 2ms app.codeStats 0ms

/test-data/rgtestouts/rgManQQ/rgManQQtest1.html

https://bitbucket.org/cistrome/cistrome-harvard/
HTML | 255 lines | 217 code | 38 blank | 0 comment | 0 complexity | 40a1212c2e931b506f9235a0beb0aa6c MD5 | raw file
  1<?xml version="1.0" encoding="utf-8" ?>
  2<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
  3<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en">
  4<head>
  5<meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
  6<meta name="generator" content="Galaxy rgManQQ.py tool output - see http://g2.trac.bx.psu.edu/" />
  7<title></title>
  8<link rel="stylesheet" href="/static/style/base.css" type="text/css" />
  9</head>
 10<body>
 11<div class="document">
 12
 13<h1>rgManQQtest1</h1>
 14<table>
 15
 16<tr><td><a href="Allelep_manhattan.pdf">Allelep_manhattan.pdf</a></td></tr>
 17<tr><td><a href="Allelep_manhattan.pdf"><img src="Allelep_manhattan.png" title="Allelep_manhattan.png" hspace="10" width="800"></a></td></tr>
 18<tr><td><a href="Allelep_qqplot.pdf">Allelep_qqplot.pdf</a></td></tr>
 19<tr><td><a href="Allelep_qqplot.pdf"><img src="Allelep_qqplot.png" title="Allelep_qqplot.png" hspace="10" width="800"></a></td></tr>
 20<tr><td><a href="rgManQQtest1.R">rgManQQtest1.R</a></td></tr>
 21<tr><td><a href="rgManQQtest1.R.log">rgManQQtest1.R.log</a></td></tr>
 22</table>
 23
 24<h3>R log follows below</h3><hr><pre>
 25
 26Loading required package: reshape
 27
 28Loading required package: plyr
 29
 30
 31
 32Attaching package: 'reshape'
 33
 34
 35
 36The following object(s) are masked from 'package:plyr':
 37
 38
 39
 40    rename, round_any
 41
 42
 43
 44Loading required package: grid
 45
 46Loading required package: proto
 47
 48[1] "### 101 values read from /data/tmp/tmpNaxDwH/database/files/000/dataset_1.dat read - now running plots"
 49
 50[1] "## qqplot on Allelep done"
 51
 52[1] "## manhattan on Allelep starting 2 3 8"
 53
 54[1] "## manhattan plot on Allelep done"
 55
 56## R script=
 57
 58# generalised so 3 core fields passed as parameters ross lazarus March 24 2010 for rgenetics
 59# Originally created as qqman with the following 
 60# attribution:
 61#--------------
 62# Stephen Turner
 63# http://StephenTurner.us/
 64# http://GettingGeneticsDone.blogspot.com/
 65
 66# Last updated: 19 July 2011 by Ross Lazarus
 67# R code for making manhattan plots and QQ plots from plink output files. 
 68# With GWAS data this can take a lot of memory. Recommended for use on 
 69# 64bit machines only, for now. 
 70
 71#
 72
 73library(ggplot2)
 74
 75coloursTouse = c('firebrick','darkblue','goldenrod','darkgreen')
 76# not too ugly but need a colour expert please...
 77
 78
 79DrawManhattan = function(pvals=Null,chrom=Null,offset=Null,title=NULL, max.y="max",suggestiveline=0, genomewide=T, size.x.labels=9, 
 80              size.y.labels=10, annotate=F, SNPlist=NULL,grey=0) {
 81        if (annotate & is.null(SNPlist)) stop("You requested annotation but provided no SNPlist!")
 82        genomewideline=NULL # was genomewideline=-log10(5e-8)
 83        n = length(pvals)
 84        if (genomewide) { # use bonferroni since might be only a small region?
 85            genomewideline = -log10(0.05/n) }
 86        offset = as.integer(offset)
 87        if (n > 1000000) { offset = offset/10000 }
 88        else if (n > 10000) { offset = offset/1000}
 89        chro = as.integer(chrom) # already dealt with X and friends?
 90        pvals = as.double(pvals)
 91        d=data.frame(CHR=chro,BP=offset,P=pvals)
 92        if ("CHR" %in% names(d) & "BP" %in% names(d) & "P" %in% names(d) ) {
 93                d=d[!is.na(d$P), ]
 94                d=d[!is.na(d$BP), ]
 95                d=d[!is.na(d$CHR), ]
 96                #limit to only chrs 1-22, x=23,y=24,Mt=25?
 97                d=d[d$CHR %in% 1:25, ]
 98                d=d[d$P>0 & d$P<=1, ]
 99                d$logp = as.double(-log10(d$P))
100                dlen = length(d$P)
101                d$pos=NA
102                ticks=NULL
103                lastbase=0
104                chrlist = unique(d$CHR)
105                chrlist = as.integer(chrlist)
106                chrlist = sort(chrlist) # returns lexical ordering 
107                if (max.y=="max") { maxy = ceiling(max(d$logp)) } 
108                   else { maxy = max.y }
109                nchr = length(chrlist) # may be any number?
110                maxy = max(maxy,1.1*genomewideline)
111                if (nchr >= 2) {
112                    for (x in c(1:nchr)) {
113                        i = chrlist[x] # need the chrom number - may not == index
114                        if (x == 1) { # first time
115                            d[d$CHR==i, ]$pos = d[d$CHR==i, ]$BP # initialize to first BP of chr1
116                            dsub = subset(d,CHR==i)
117                            dlen = length(dsub$P)
118                            lastbase = max(dsub$pos) # last one
119                            tks = d[d$CHR==i, ]$pos[floor(length(d[d$CHR==i, ]$pos)/2)+1]
120                            lastchr = i
121                        } else {
122                            d[d$CHR==i, ]$pos = d[d$CHR==i, ]$BP+lastbase # one humongous contig
123                            if (sum(is.na(lastchr),is.na(lastbase),is.na(d[d$CHR==i, ]$pos))) { 
124                                cat(paste('manhattan: For',title,'chrlistx=',i,'lastchr=',lastchr,'lastbase=',lastbase,'pos=',d[d$CHR==i,]$pos))
125                             }   
126                            tks=c(tks, d[d$CHR==i, ]$pos[floor(length(d[d$CHR==i, ]$pos)/2)+1])
127                            lastchr = i
128                            dsub = subset(d,CHR==i)
129                            lastbase = max(dsub$pos) # last one
130                        }
131                    ticklim=c(min(d$pos),max(d$pos))
132                    xlabs = chrlist
133                    }
134                } else { # nchr is 1
135                   nticks = 10
136                   last = max(d$BP)
137                   first = min(d$BP)
138                   tks = c(first)
139                   t = (last-first)/nticks # units per tick
140                   for (x in c(1:(nticks))) { 
141                        tks = c(tks,round(x*t)+first) }
142                   ticklim = c(first,last)
143                } # else
144                if (grey) {mycols=rep(c("gray10","gray60"),max(d$CHR))
145                           } else {
146                           mycols=rep(coloursTouse,max(d$CHR))
147                           }
148                dlen = length(d$P)
149                d$pranks = rank(d$P)/dlen
150                d$centiles = 100*d$pranks # small are interesting
151                d$sizes = ifelse((d$centile < 1),2,1)
152                if (annotate) d.annotate=d[as.numeric(substr(d$SNP,3,100)) %in% SNPlist, ]
153                if (nchr >= 2) {
154                        manplot=qplot(pos,logp,data=d, ylab=expression(-log[10](italic(p))) , colour=factor(CHR),size=factor(sizes))
155                        manplot=manplot+scale_x_continuous(name="Chromosome", breaks=tks, labels=xlabs,limits=ticklim) 
156                        manplot=manplot+scale_size_manual(values = c(0.5,1.5)) # requires discreet scale - eg factor
157                        #manplot=manplot+scale_size(values=c(0.5,2)) # requires continuous 
158                        }
159                else {
160                        manplot=qplot(BP,logp,data=d, ylab=expression(-log[10](italic(p))) , colour=factor(CHR))
161                        manplot=manplot+scale_x_continuous(name=paste("Chromosome",chrlist[1]), breaks=tks, labels=tks,limits=ticklim) 
162                     }                 
163                manplot=manplot+scale_y_continuous(limits=c(0,maxy), breaks=1:maxy, labels=1:maxy)
164                manplot=manplot+scale_colour_manual(value=mycols)
165                if (annotate) {  manplot=manplot + geom_point(data=d.annotate, colour=I("green3")) } 
166                manplot=manplot + opts(legend.position = "none") 
167                manplot=manplot + opts(title=title)
168                manplot=manplot+opts(
169                     panel.background=theme_blank(), 
170                     axis.text.x=theme_text(size=size.x.labels, colour="grey50"), 
171                     axis.text.y=theme_text(size=size.y.labels, colour="grey50"), 
172                     axis.ticks=theme_segment(colour=NA)
173                )
174                if (suggestiveline) manplot=manplot+geom_hline(yintercept=suggestiveline,colour="blue", alpha=I(1/3))
175                if (genomewideline) manplot=manplot+geom_hline(yintercept=genomewideline,colour="red")
176                manplot
177        }       else {
178                stop("Make sure your data frame contains columns CHR, BP, and P")
179        }
180}
181
182
183
184qq = function(pvector, title=NULL, spartan=F) {
185        # Thanks to Daniel Shriner at NHGRI for providing this code for creating expected and observed values
186        o = -log10(sort(pvector,decreasing=F))
187        e = -log10( 1:length(o)/length(o) )
188        # you could use base graphics
189        # plot(e,o,pch=19,cex=0.25, xlab=expression(Expected~~-log[10](italic(p))), 
190        # ylab=expression(Observed~~-log[10](italic(p))), xlim=c(0,max(e)), ylim=c(0,max(e)))
191        # lines(e,e,col="red")
192        #You'll need ggplot2 installed to do the rest
193        qq=qplot(e,o, xlim=c(0,max(e)), ylim=c(0,max(o))) + stat_abline(intercept=0,slope=1, col="red")
194        qq=qq+opts(title=title)
195        qq=qq+scale_x_continuous(name=expression(Expected~~-log[10](italic(p))))
196        qq=qq+scale_y_continuous(name=expression(Observed~~-log[10](italic(p))))
197        if (spartan) plot=plot+opts(panel.background=theme_rect(col="grey50"), panel.grid.minor=theme_blank())
198        qq
199}
200
201rgqqMan = function(infile="/data/tmp/tmpNaxDwH/database/files/000/dataset_1.dat",chromcolumn=2, offsetcolumn=3, pvalscolumns=c(8), 
202title="rgManQQtest1",grey=0) {
203rawd = read.table(infile,head=T,sep='\t')
204dn = names(rawd)
205cc = dn[chromcolumn]
206oc = dn[offsetcolumn] 
207rawd[,cc] = sub('chr','',rawd[,cc],ignore.case = T) # just in case
208rawd[,cc] = sub(':','',rawd[,cc],ignore.case = T) # ugh
209rawd[,cc] = sub('X',23,rawd[,cc],ignore.case = T)
210rawd[,cc] = sub('Y',24,rawd[,cc],ignore.case = T)
211rawd[,cc] = sub('Mt',25,rawd[,cc], ignore.case = T)
212nams = c(cc,oc) # for sorting
213plen = length(rawd[,1])
214print(paste('###',plen,'values read from',infile,'read - now running plots',sep=' '))
215rawd = rawd[do.call(order,rawd[nams]),]
216# mmmf - suggested by http://onertipaday.blogspot.com/2007/08/sortingordering-dataframe-according.html
217# in case not yet ordered
218if (plen > 0) {
219  for (pvalscolumn in pvalscolumns) {
220  if (pvalscolumn > 0) 
221     {
222     cname = names(rawd)[pvalscolumn]
223     mytitle = paste('p=',cname,', ',title,sep='')
224     myfname = chartr(' ','_',cname)
225     myqqplot = qq(rawd[,pvalscolumn],title=mytitle)
226     ggsave(filename=paste(myfname,"qqplot.png",sep='_'),myqqplot,width=8,height=6,dpi=96)
227     ggsave(filename=paste(myfname,"qqplot.pdf",sep='_'),myqqplot,width=8,height=6,dpi=96)
228     print(paste('## qqplot on',cname,'done'))
229     if ((chromcolumn > 0) & (offsetcolumn > 0)) {
230         print(paste('## manhattan on',cname,'starting',chromcolumn,offsetcolumn,pvalscolumn))
231         mymanplot= DrawManhattan(chrom=rawd[,chromcolumn],offset=rawd[,offsetcolumn],pvals=rawd[,pvalscolumn],title=mytitle,grey=grey)
232         ggsave(filename=paste(myfname,"manhattan.png",sep='_'),mymanplot,width=8,height=6,dpi=96)
233         ggsave(filename=paste(myfname,"manhattan.pdf",sep='_'),mymanplot,width=8,height=6,dpi=96)
234         print(paste('## manhattan plot on',cname,'done'))
235         }
236         else {
237              print(paste('chrom column =',chromcolumn,'offset column = ',offsetcolumn,
238              'so no Manhattan plot - supply both chromosome and offset as numerics for Manhattan plots if required'))
239              } 
240     } 
241  else {
242        print(paste('pvalue column =',pvalscolumn,'Cannot parse it so no plots possible'))
243      }
244  } # for pvalscolumn
245 } else { print('## Problem - no values available to plot - was there really a chromosome and offset column?') }
246}
247
248rgqqMan() 
249# execute with defaults as substituted
250
251</pre>
252
253<h3><a href="http://rgenetics.org">Rgenetics</a> tool rgManQQ.py run at 04/08/2011 11:46:22</h3>
254</div></body></html>
255