PageRenderTime 15ms CodeModel.GetById 2ms app.highlight 8ms RepoModel.GetById 1ms app.codeStats 1ms

/scripts/cluster_papers.R

http://github.com/hpiwowar/alt-metrics_stats
R | 71 lines | 33 code | 17 blank | 21 comment | 0 complexity | d263d00af01b4c1b67687315a908d0e1 MD5 | raw file
 1(flip = t(dat.indep.stats.norm)
 2#flip.df = as.data.frame(flip[,sample(1:dim(flip)[2], 100, T)])
 3flip.df = as.data.frame(flip[,20000:20050])
 4
 5myhetcorr = hetcor.modified(flip.df, 
 6							use="pairwise.complete.obs", 
 7							std.err=FALSE, 
 8							pd=FALSE, 
 9							type="spearman")
10mycor.unadjusted = myhetcorr$correlations
11#write.table(mycor.unadjusted,"/Users/hpiwowar/statslink/mycor.unadjusted.txt",append=F,quote=F,sep="\t",row.names=T)
12
13# Are some correlations NA?
14which(is.na(mycor.unadjusted))
15#mycor.unadjusted[which(is.na(mycor.unadjusted))] = 1
16
17# Now fix the correlation matrix if it is not positive-definite
18mycor = adjust.to.positive.definite(mycor.unadjusted)
19
20#display
21library(gplots)
22colorRange = round(range(mycor) * 15) + 16
23colorChoices = bluered(32)[colorRange[1]:colorRange[2]]
24heatmap.2(mycor, col=colorChoices, cexRow=0.9, cexCol = .9, symm = TRUE, dend = "row", trace = "none", main = "Thesis Data", margins=c(10,10), key=FALSE, keysize=0.1)
25
26showpanel <- function(col) {
27  image(z=matrix(1:100, ncol=1), col=col, xaxt="n", yaxt="n" )
28}
29quartz()
30showpanel(colorChoices)
31showpanel(bluered(32))
32
33#pdf("heatmap.pdf", height=10, width=10)
34#heatmap.2(mycor, col=colorChoices, cexRow=0.9, cexCol = .9, symm = TRUE, dend = "row", trace = "none", main = "Thesis Data", margins=c(10,10), key=FALSE, keysize=0.1)
35#dev.off
36
37############## FIRST ORDER ANALYSIS
38
39##############  Determine number of First-Order factors
40
41# Determine Number of Factors to Extract
42library(nFactors)
43eigenvectors.1st <- eigen(mycor) # get eigenvalues
44# this line takes a long time
45aparallel.1st <- parallel(subject=nrow(dat.indep.stats.norm), var=ncol(dat.indep.stats.norm), rep=100, cent=.05)
46scree.results.1st <- nScree(eigenvectors.1st$values, aparallel.1st$eigen$qevpea)
47summary(scree.results.1st)
48plotnScree(scree.results.1st) 
49
50# Pull out the "Optimal Coordinate"
51# defined in nScree help page as 
52# The optimal coordinates (OC) corresponds to an extrapolation of the preceding eigenvalue by a regression line between the eignvalue coordinates and the last eigenvalue coordinate
53#number.factors.1st = scree.results.1st$Components$noc
54number.factors.1st = 5
55
56
57##############  Do First-Order Factor Analysis
58
59# Maximum liklihood doesn't converge because too 
60fit.ml = factanal(dat.indep.stats.norm, number.factors.1st, rotation="promax", covmat=mycor)
61print(fit.ml, sort=TRUE)
62
63
64# Use princip axis when maximum liklihood fails to converge:
65library(psych)
66fit.fa.1st = fa(mycor, number.factors.1st, fm="minres", rotate="promax", 
67                scores=FALSE, residuals=TRUE, n.obs=max(dim(dat.indep.stats.norm)))
68
69#to show the loadings sorted by absolute value
70print(fit.fa.1st, sort=TRUE)
71