PageRenderTime 5ms CodeModel.GetById 1ms app.highlight 2ms RepoModel.GetById 1ms app.codeStats 0ms

/scripts/descriptive_stats.R

http://github.com/hpiwowar/alt-metrics_stats
R | 43 lines | 23 code | 13 blank | 7 comment | 1 complexity | b8040296d575c41afe8ee4d1607e6973 MD5 | raw file
 1library(Hmisc)
 2
 3# Placeholder content.  Will make more browseable soon.
 4
 5metadataColumns = c("doi", "pubDate", "daysSincePublished", "journal", "articleType", "authorsCount")
 6altmetricsColumns = names(dat.eventcounts)[names(dat.eventcounts) %nin% metadataColumns]
 7
 8dat.metricIsUsed = dat.eventcounts
 9
10dat.metricIsUsed[,altmetricsColumns][dat.metricIsUsed[,altmetricsColumns] > 1] = 1
11summary(dat.metricIsUsed[,altmetricsColumns])
12
13options(scipen=100)
14options(digits=2)
15options(width=100)
16
17# Metrics by Average number of articles with at least one event
18apply(dat.metricIsUsed[,altmetricsColumns], 2, mean, na.rm=T)
19
20# Number of articles by how many metrics they have with at least one event
21hist.has.events = table(apply(dat.metricIsUsed[,altmetricsColumns], 1, sum, na.rm=T))
22cbind(hist.has.events)
23pdf("results/num_articles_nonzero_event_counts.pdf")
24plot(hist.has.events/sum(hist.has.events), main="number of articles by count of nonzero altmetric types")
25dev.off()
26
27# Look at the distributions
28for (col in altmetricsColumns) {
29	#pdf(paste("results/hist_has_events/", col, ".pdf", sep=""))
30	quartz()
31	par(mfrow = c(2, 1))
32	titletext = paste(col, "\nnot normalized by pubdate", sep="")
33	hist(dat.eventcounts[,col], breaks=50, main=titletext)
34	hist(tr(dat.eventcounts[,col]), breaks=50, main=paste("sqrt(1+", col, ")", "\nnot normalized by pubdate", sep=""))
35	#dev.off()
36}
37
38
39### Write out to examine in other programs
40write.csv(dat.metricIsUsed, "data/derived/all_metrics_used.csv")
41
42
43