PageRenderTime 14ms CodeModel.GetById 0ms RepoModel.GetById 1ms app.codeStats 0ms

/scripts/descriptive_stats.R

http://github.com/hpiwowar/alt-metrics_stats
R | 43 lines | 23 code | 13 blank | 7 comment | 1 complexity | b8040296d575c41afe8ee4d1607e6973 MD5 | raw file
Possible License(s): MIT
  1. library(Hmisc)
  2. # Placeholder content. Will make more browseable soon.
  3. metadataColumns = c("doi", "pubDate", "daysSincePublished", "journal", "articleType", "authorsCount")
  4. altmetricsColumns = names(dat.eventcounts)[names(dat.eventcounts) %nin% metadataColumns]
  5. dat.metricIsUsed = dat.eventcounts
  6. dat.metricIsUsed[,altmetricsColumns][dat.metricIsUsed[,altmetricsColumns] > 1] = 1
  7. summary(dat.metricIsUsed[,altmetricsColumns])
  8. options(scipen=100)
  9. options(digits=2)
  10. options(width=100)
  11. # Metrics by Average number of articles with at least one event
  12. apply(dat.metricIsUsed[,altmetricsColumns], 2, mean, na.rm=T)
  13. # Number of articles by how many metrics they have with at least one event
  14. hist.has.events = table(apply(dat.metricIsUsed[,altmetricsColumns], 1, sum, na.rm=T))
  15. cbind(hist.has.events)
  16. pdf("results/num_articles_nonzero_event_counts.pdf")
  17. plot(hist.has.events/sum(hist.has.events), main="number of articles by count of nonzero altmetric types")
  18. dev.off()
  19. # Look at the distributions
  20. for (col in altmetricsColumns) {
  21. #pdf(paste("results/hist_has_events/", col, ".pdf", sep=""))
  22. quartz()
  23. par(mfrow = c(2, 1))
  24. titletext = paste(col, "\nnot normalized by pubdate", sep="")
  25. hist(dat.eventcounts[,col], breaks=50, main=titletext)
  26. hist(tr(dat.eventcounts[,col]), breaks=50, main=paste("sqrt(1+", col, ")", "\nnot normalized by pubdate", sep=""))
  27. #dev.off()
  28. }
  29. ### Write out to examine in other programs
  30. write.csv(dat.metricIsUsed, "data/derived/all_metrics_used.csv")