PageRenderTime 26ms CodeModel.GetById 10ms RepoModel.GetById 0ms app.codeStats 0ms

/scripts/preprocessing_wos.R

http://github.com/hpiwowar/alt-metrics_stats
R | 34 lines | 11 code | 12 blank | 11 comment | 0 complexity | ec6a9c873e86c9945545e4153b675b76 MD5 | raw file
Possible License(s): MIT
  1. #library(Rserve)
  2. #Rserve(args="--no-save")
  3. ######## event_counts.txt
  4. ## This data contains into on metrics for which we only have aggregate counts
  5. ### READ DATA
  6. dat.raw.wos = read.csv("data/raw/isi_wos.txt", header=TRUE, sep="\t", stringsAsFactors=FALSE, quote="")
  7. ## Look at it
  8. dim(dat.raw.wos)
  9. names(dat.raw.wos)
  10. summary(dat.raw.wos)
  11. ## A bit of data cleaning
  12. dat.wos = data.frame(doi=dat.raw.wos$DI, wosCount=as.numeric(dat.raw.wos$TC))
  13. summary(dat.wos)
  14. write.table(dat.wos, "data/raw/isi_wos_counts.txt", sep="\t", row.names=F)
  15. # start with the data in the repository
  16. dat.wos = read.csv("data/raw/isi_wos_counts.txt", header=TRUE, sep="\t", quote="")
  17. summary(dat.wos)
  18. # Merge with eventcounts
  19. ## eliminate columns not in use right now
  20. dat.eventcounts = merge(dat.eventcounts, dat.wos, by.x="doi", by.y="doi")
  21. ## Look again
  22. summary(dat.eventcounts)