/scripts/cluster_papers.R

http://github.com/hpiwowar/alt-metrics_stats · R · 71 lines · 33 code · 17 blank · 21 comment · 0 complexity · d263d00af01b4c1b67687315a908d0e1 MD5 · raw file

  1. (flip = t(dat.indep.stats.norm)
  2. #flip.df = as.data.frame(flip[,sample(1:dim(flip)[2], 100, T)])
  3. flip.df = as.data.frame(flip[,20000:20050])
  4. myhetcorr = hetcor.modified(flip.df,
  5. use="pairwise.complete.obs",
  6. std.err=FALSE,
  7. pd=FALSE,
  8. type="spearman")
  9. mycor.unadjusted = myhetcorr$correlations
  10. #write.table(mycor.unadjusted,"/Users/hpiwowar/statslink/mycor.unadjusted.txt",append=F,quote=F,sep="\t",row.names=T)
  11. # Are some correlations NA?
  12. which(is.na(mycor.unadjusted))
  13. #mycor.unadjusted[which(is.na(mycor.unadjusted))] = 1
  14. # Now fix the correlation matrix if it is not positive-definite
  15. mycor = adjust.to.positive.definite(mycor.unadjusted)
  16. #display
  17. library(gplots)
  18. colorRange = round(range(mycor) * 15) + 16
  19. colorChoices = bluered(32)[colorRange[1]:colorRange[2]]
  20. heatmap.2(mycor, col=colorChoices, cexRow=0.9, cexCol = .9, symm = TRUE, dend = "row", trace = "none", main = "Thesis Data", margins=c(10,10), key=FALSE, keysize=0.1)
  21. showpanel <- function(col) {
  22. image(z=matrix(1:100, ncol=1), col=col, xaxt="n", yaxt="n" )
  23. }
  24. quartz()
  25. showpanel(colorChoices)
  26. showpanel(bluered(32))
  27. #pdf("heatmap.pdf", height=10, width=10)
  28. #heatmap.2(mycor, col=colorChoices, cexRow=0.9, cexCol = .9, symm = TRUE, dend = "row", trace = "none", main = "Thesis Data", margins=c(10,10), key=FALSE, keysize=0.1)
  29. #dev.off
  30. ############## FIRST ORDER ANALYSIS
  31. ############## Determine number of First-Order factors
  32. # Determine Number of Factors to Extract
  33. library(nFactors)
  34. eigenvectors.1st <- eigen(mycor) # get eigenvalues
  35. # this line takes a long time
  36. aparallel.1st <- parallel(subject=nrow(dat.indep.stats.norm), var=ncol(dat.indep.stats.norm), rep=100, cent=.05)
  37. scree.results.1st <- nScree(eigenvectors.1st$values, aparallel.1st$eigen$qevpea)
  38. summary(scree.results.1st)
  39. plotnScree(scree.results.1st)
  40. # Pull out the "Optimal Coordinate"
  41. # defined in nScree help page as
  42. # The optimal coordinates (OC) corresponds to an extrapolation of the preceding eigenvalue by a regression line between the eignvalue coordinates and the last eigenvalue coordinate
  43. #number.factors.1st = scree.results.1st$Components$noc
  44. number.factors.1st = 5
  45. ############## Do First-Order Factor Analysis
  46. # Maximum liklihood doesn't converge because too
  47. fit.ml = factanal(dat.indep.stats.norm, number.factors.1st, rotation="promax", covmat=mycor)
  48. print(fit.ml, sort=TRUE)
  49. # Use princip axis when maximum liklihood fails to converge:
  50. library(psych)
  51. fit.fa.1st = fa(mycor, number.factors.1st, fm="minres", rotate="promax",
  52. scores=FALSE, residuals=TRUE, n.obs=max(dim(dat.indep.stats.norm)))
  53. #to show the loadings sorted by absolute value
  54. print(fit.fa.1st, sort=TRUE)