PageRenderTime 21ms CodeModel.GetById 13ms app.highlight 4ms RepoModel.GetById 1ms app.codeStats 0ms

/scripts/preprocessing_wos.R

http://github.com/hpiwowar/alt-metrics_stats
R | 34 lines | 11 code | 12 blank | 11 comment | 0 complexity | ec6a9c873e86c9945545e4153b675b76 MD5 | raw file
 1#library(Rserve)
 2#Rserve(args="--no-save")
 3
 4######## event_counts.txt
 5## This data contains into on metrics for which we only have aggregate counts
 6
 7### READ DATA
 8dat.raw.wos = read.csv("data/raw/isi_wos.txt", header=TRUE, sep="\t", stringsAsFactors=FALSE, quote="")
 9
10## Look at it
11dim(dat.raw.wos)
12names(dat.raw.wos)
13summary(dat.raw.wos)
14
15## A bit of data cleaning
16dat.wos = data.frame(doi=dat.raw.wos$DI, wosCount=as.numeric(dat.raw.wos$TC)) 
17summary(dat.wos)
18
19write.table(dat.wos, "data/raw/isi_wos_counts.txt", sep="\t", row.names=F)
20
21# start with the data in the repository
22dat.wos = read.csv("data/raw/isi_wos_counts.txt", header=TRUE, sep="\t", quote="")
23summary(dat.wos)
24
25# Merge with eventcounts
26
27## eliminate columns not in use right now
28dat.eventcounts = merge(dat.eventcounts, dat.wos, by.x="doi", by.y="doi")
29
30
31## Look again
32summary(dat.eventcounts)
33
34