/scripts/preprocessing_wos.R

http://github.com/hpiwowar/alt-metrics_stats · R · 34 lines · 11 code · 12 blank · 11 comment · 0 complexity · ec6a9c873e86c9945545e4153b675b76 MD5 · raw file

  1. #library(Rserve)
  2. #Rserve(args="--no-save")
  3. ######## event_counts.txt
  4. ## This data contains into on metrics for which we only have aggregate counts
  5. ### READ DATA
  6. dat.raw.wos = read.csv("data/raw/isi_wos.txt", header=TRUE, sep="\t", stringsAsFactors=FALSE, quote="")
  7. ## Look at it
  8. dim(dat.raw.wos)
  9. names(dat.raw.wos)
  10. summary(dat.raw.wos)
  11. ## A bit of data cleaning
  12. dat.wos = data.frame(doi=dat.raw.wos$DI, wosCount=as.numeric(dat.raw.wos$TC))
  13. summary(dat.wos)
  14. write.table(dat.wos, "data/raw/isi_wos_counts.txt", sep="\t", row.names=F)
  15. # start with the data in the repository
  16. dat.wos = read.csv("data/raw/isi_wos_counts.txt", header=TRUE, sep="\t", quote="")
  17. summary(dat.wos)
  18. # Merge with eventcounts
  19. ## eliminate columns not in use right now
  20. dat.eventcounts = merge(dat.eventcounts, dat.wos, by.x="doi", by.y="doi")
  21. ## Look again
  22. summary(dat.eventcounts)