/R/cochrane2df.R

https://github.com/massimoaria/bibliometrix · R · 63 lines · 41 code · 17 blank · 5 comment · 0 complexity · ac0ac1f7d76c9cb056d32df1485482b2 MD5 · raw file

  1. cochrane2df<-function(D){
  2. D <- D[nchar(D)>0] # remove empty rows
  3. Papers <- which(substr(D,1,8)=="Record #") # first row of each document
  4. nP=length(Papers) # number of documents
  5. rowPapers <- diff(c(Papers, length(D)+1))
  6. numPapers <- rep(1:nP,rowPapers)
  7. DATA <- data.frame(Tag = substr(D,1,4), content = substr(D,5,nchar(D)), Paper=numPapers, stringsAsFactors = FALSE)
  8. DATA$Tag <- gsub(" ","",gsub(":","",DATA$Tag))
  9. df <- DATA %>% group_by(.data$Paper, .data$Tag) %>%
  10. summarise(cont=paste(.data$content, collapse="---",sep="")) %>%
  11. arrange(.data$Tag, .data$Paper) %>%
  12. pivot_wider(names_from = .data$Tag,values_from = .data$cont) %>%
  13. ungroup() %>%
  14. rename("PY" = "YR",
  15. "UT" = "ID",
  16. "ID" = "KY",
  17. "URL" = "US",
  18. "DI" = "DOI",
  19. "NR" = "NO") %>%
  20. as.data.frame()
  21. df$PY <- as.numeric(df$PY)
  22. ### replace "---" with ";"
  23. tagsComma <- c("AU","ID")
  24. df1 <- data.frame(lapply(df[tagsComma],function(x){
  25. gsub("---",";",x)
  26. }),stringsAsFactors = FALSE)
  27. ### replace "---" with " "
  28. otherTags <- setdiff(names(df),tagsComma)
  29. df2 <- data.frame(lapply(df[otherTags],function(x){
  30. trimES(gsub("---"," ",x))
  31. }),stringsAsFactors = FALSE)
  32. df <- cbind(df1,df2)
  33. rm(df1,df2)
  34. df$ID=gsub(" ;",";",gsub("; ",";", gsub("\\[[^\\]]*\\]", "", df$ID, perl=TRUE)))
  35. df$DB <- "COCHRANE"
  36. # Authors
  37. #df$AU <- trimES(gsub("-","",df$AU))
  38. # Toupper
  39. DI <- df$DI
  40. df <- data.frame(lapply(df,toupper),stringsAsFactors = FALSE)
  41. df$DI <- gsub(" ","",DI)
  42. df <- df[!(names(df) %in% c("Paper", "Reco"))]
  43. df$DE <- df$ID
  44. df$JI <- df$J9 <- df$SO
  45. return(df)
  46. }