/R/isi2df.R

https://github.com/massimoaria/bibliometrix · R · 72 lines · 48 code · 19 blank · 5 comment · 3 complexity · 70320fca8b36da8e56080fd0676a2315 MD5 · raw file

  1. isi2df<-function(D){
  2. D <- D[nchar(D)>0] # remove empty rows
  3. D <- D[!(substr(D,1,3) %in% c("FN ", "VR "))]
  4. for (i in 1:length(D)){
  5. if (substr(D[i],1,3)==" ") substr(D[i],1,3) <- substr(D[i-1],1,3)
  6. }
  7. Papers <- which(substr(D,1,3)=="PT ") # first row of each document
  8. nP=length(Papers) # number of documents
  9. rowPapers <- diff(c(Papers, length(D)+1))
  10. numPapers <- rep(1:nP,rowPapers)
  11. DATA <- data.frame(Tag = substr(D,1,3), content = substr(D,4,nchar(D)), Paper=numPapers, stringsAsFactors = FALSE)
  12. DATA$Tag <- gsub(" ","",DATA$Tag)
  13. df <- DATA %>% group_by(.data$Paper, .data$Tag) %>%
  14. summarise(cont=paste(.data$content, collapse="---",sep="")) %>%
  15. arrange(.data$Tag, .data$Paper) %>%
  16. pivot_wider(names_from = .data$Tag,values_from = .data$cont) %>%
  17. ungroup() %>%
  18. as.data.frame()
  19. df$PY <- as.numeric(df$PY)
  20. ### replace "---" with ";"
  21. tagsComma <- c("AU","AF","DE","ID","CR")
  22. nolab <- setdiff(tagsComma,names(df))
  23. if (length(nolab)>0){
  24. cat("\nWarning:\nIn your file, some mandatory metadata are missing. Bibliometrix functions may not work properly!\n
  25. Please, take a look at the vignettes:
  26. - 'Data Importing and Converting' (https://cran.r-project.org/web/packages/bibliometrix/vignettes/Data-Importing-and-Converting.html)
  27. - 'A brief introduction to bibliometrix' (https://cran.r-project.org/web/packages/bibliometrix/vignettes/bibliometrix-vignette.html)\n\n")
  28. cat("\nMissing fields: ",nolab)
  29. }
  30. tagsComma <- tagsComma[(!(tagsComma %in% nolab))]
  31. df1 <- data.frame(lapply(df[tagsComma],function(x){
  32. gsub("---",";",x)
  33. }),stringsAsFactors = FALSE)
  34. ### replace "---" with " "
  35. otherTags <- setdiff(names(df),tagsComma)
  36. df2 <- data.frame(lapply(df[otherTags],function(x){
  37. trimES(gsub("---"," ",x))
  38. }),stringsAsFactors = FALSE)
  39. df <- cbind(df1,df2)
  40. rm(df1,df2)
  41. df$DB <- "ISI"
  42. # Authors
  43. df$AU <- trimES(gsub(","," ",df$AU))
  44. # Toupper
  45. DI <- df$DI
  46. df <- data.frame(lapply(df,toupper),stringsAsFactors = FALSE)
  47. df$DI <- DI
  48. # add sep ; to affiliations
  49. df$C1 <- trim(gsub("\\[.*?\\]", "", df$C1)) # to test
  50. df$C1 <- gsub("\\.",".;",df$C1)
  51. df <- df[names(df)!="Paper"]
  52. return(df)
  53. }