cochrane2df.R | searchcode

/R/cochrane2df.R

https://github.com/massimoaria/bibliometrix · R · 63 lines · 41 code · 17 blank · 5 comment · 0 complexity · ac0ac1f7d76c9cb056d32df1485482b2 MD5 · raw file


cochrane2df<-function(D){

  

  D <- D[nchar(D)>0]  # remove empty rows



  Papers <- which(substr(D,1,8)=="Record #") # first row of each document

  nP=length(Papers)  # number of documents

  

  rowPapers <- diff(c(Papers, length(D)+1))

  

  numPapers <- rep(1:nP,rowPapers)

  

  DATA <- data.frame(Tag = substr(D,1,4), content = substr(D,5,nchar(D)), Paper=numPapers, stringsAsFactors = FALSE)

  DATA$Tag <- gsub(" ","",gsub(":","",DATA$Tag))

  df <- DATA %>% group_by(.data$Paper, .data$Tag) %>%

    summarise(cont=paste(.data$content, collapse="---",sep="")) %>%

    arrange(.data$Tag, .data$Paper) %>%

    pivot_wider(names_from =  .data$Tag,values_from = .data$cont) %>%

    ungroup() %>%

    rename("PY" = "YR",

           "UT" = "ID",

           "ID" = "KY",

           "URL" = "US",

           "DI" = "DOI",

           "NR" = "NO") %>%

    as.data.frame()

  

  df$PY <- as.numeric(df$PY)

  

  ### replace "---" with ";"

  tagsComma <- c("AU","ID")

  df1 <- data.frame(lapply(df[tagsComma],function(x){

    gsub("---",";",x)

  }),stringsAsFactors = FALSE)

  

  ### replace "---" with " "

  otherTags <- setdiff(names(df),tagsComma)

  df2 <- data.frame(lapply(df[otherTags],function(x){

    trimES(gsub("---"," ",x))

  }),stringsAsFactors = FALSE)

  df <- cbind(df1,df2)

  rm(df1,df2)

  

  df$ID=gsub(" ;",";",gsub("; ",";", gsub("\\[[^\\]]*\\]", "", df$ID, perl=TRUE)))

  

  df$DB <- "COCHRANE"

  

  # Authors

  #df$AU <- trimES(gsub("-","",df$AU))

  

  # Toupper

  DI <- df$DI

  df <- data.frame(lapply(df,toupper),stringsAsFactors = FALSE)

  df$DI <- gsub(" ","",DI)

  

  df <- df[!(names(df) %in% c("Paper", "Reco"))]

  

  df$DE <- df$ID

  

  df$JI <- df$J9 <- df$SO

  

  return(df)

}