descriptive_stats.R - This R code analyzes altmetric data f…

/scripts/descriptive_stats.R

http://github.com/hpiwowar/alt-metrics_stats · R · 43 lines · 23 code · 13 blank · 7 comment · 1 complexity · b8040296d575c41afe8ee4d1607e6973 MD5 · raw file


library(Hmisc)

# Placeholder content.  Will make more browseable soon.

metadataColumns = c("doi", "pubDate", "daysSincePublished", "journal", "articleType", "authorsCount")
altmetricsColumns = names(dat.eventcounts)[names(dat.eventcounts) %nin% metadataColumns]

dat.metricIsUsed = dat.eventcounts

dat.metricIsUsed[,altmetricsColumns][dat.metricIsUsed[,altmetricsColumns] > 1] = 1
summary(dat.metricIsUsed[,altmetricsColumns])

options(scipen=100)
options(digits=2)
options(width=100)

# Metrics by Average number of articles with at least one event
apply(dat.metricIsUsed[,altmetricsColumns], 2, mean, na.rm=T)

# Number of articles by how many metrics they have with at least one event
hist.has.events = table(apply(dat.metricIsUsed[,altmetricsColumns], 1, sum, na.rm=T))
cbind(hist.has.events)
pdf("results/num_articles_nonzero_event_counts.pdf")
plot(hist.has.events/sum(hist.has.events), main="number of articles by count of nonzero altmetric types")
dev.off()

# Look at the distributions
for (col in altmetricsColumns) {
	#pdf(paste("results/hist_has_events/", col, ".pdf", sep=""))
	quartz()
	par(mfrow = c(2, 1))
	titletext = paste(col, "\nnot normalized by pubdate", sep="")
	hist(dat.eventcounts[,col], breaks=50, main=titletext)
	hist(tr(dat.eventcounts[,col]), breaks=50, main=paste("sqrt(1+", col, ")", "\nnot normalized by pubdate", sep=""))
	#dev.off()
}


### Write out to examine in other programs
write.csv(dat.metricIsUsed, "data/derived/all_metrics_used.csv")