/scripts/preprocessing_wos.R
http://github.com/hpiwowar/alt-metrics_stats · R · 34 lines · 11 code · 12 blank · 11 comment · 0 complexity · ec6a9c873e86c9945545e4153b675b76 MD5 · raw file
- #library(Rserve)
- #Rserve(args="--no-save")
- ######## event_counts.txt
- ## This data contains into on metrics for which we only have aggregate counts
- ### READ DATA
- dat.raw.wos = read.csv("data/raw/isi_wos.txt", header=TRUE, sep="\t", stringsAsFactors=FALSE, quote="")
- ## Look at it
- dim(dat.raw.wos)
- names(dat.raw.wos)
- summary(dat.raw.wos)
- ## A bit of data cleaning
- dat.wos = data.frame(doi=dat.raw.wos$DI, wosCount=as.numeric(dat.raw.wos$TC))
- summary(dat.wos)
- write.table(dat.wos, "data/raw/isi_wos_counts.txt", sep="\t", row.names=F)
- # start with the data in the repository
- dat.wos = read.csv("data/raw/isi_wos_counts.txt", header=TRUE, sep="\t", quote="")
- summary(dat.wos)
- # Merge with eventcounts
- ## eliminate columns not in use right now
- dat.eventcounts = merge(dat.eventcounts, dat.wos, by.x="doi", by.y="doi")
- ## Look again
- summary(dat.eventcounts)