PageRenderTime 50ms CodeModel.GetById 24ms RepoModel.GetById 1ms app.codeStats 0ms

/unixSoft/bin/OCLC-to-bibtex.awk

https://bitbucket.org/durin42/dotfiles
AWK | 244 lines | 182 code | 40 blank | 22 comment | 0 complexity | 2657f9c0278975191d0635d693241770 MD5 | raw file
  1. #
  2. # OCLC-to-bibtex.awk is an GAWK script to convert the export format of the
  3. # OCLC databases to BibTeX. It processes the input and tries to convert it into
  4. # BibTeX entries which are written to a file in /tmp. This file is then opened
  5. # using the program specified in "prog" (by default: emacsclient).
  6. #
  7. # NOTE: It does not do an extensive job of testing what kind of publications
  8. # are being processed. It has some rudimentary checks of discovering wether the
  9. # processed publications are either InBook's or Articles.
  10. #
  11. # Hedderik van Rijn, 020912-020914
  12. #
  13. # Do whatever you want with this script, but if you improve it, please send me a copy!
  14. # email: hvr-OCLC@van-rijn.org
  15. #
  16. BEGIN {
  17. tmpfile = "/tmp/tobib." systime() ".tmp.bib";
  18. oclc-version = "OLCL-to-bibtex v0.1";
  19. # External interactive progs
  20. # prog = "xless ";
  21. prog = "emacsclient ";
  22. # prog = "open -a TextEdit ";
  23. atEnd = "&";
  24. # (Indirect) Output to stdout
  25. # prog = "cat ";
  26. # atEnd = "";
  27. print "# Exported from the OLCL FirstSearch PsychINFO database using" olcl-version;
  28. }
  29. /* ------------------------------------------------------------------------- */
  30. (match($1,/[A-Za-z]+:/) || match($2,/[A-Za-z]+:/)) {
  31. if (inDescriptor == 1) {
  32. keywords = keywords "}";
  33. inDescriptor = 0;
  34. }
  35. if (inAbstract == 1) {
  36. abstract = abstract "}";
  37. inAbstract = 0;
  38. }
  39. }
  40. (!match($1,/[A-Za-z()]+:/) && !match($2,/[A-Za-z()]+:/)) {
  41. if (inDescriptor == 1) {
  42. keywords = keywords ", " $0;
  43. }
  44. if (inAbstract == 1) {
  45. abstract = abstract " " $0;
  46. }
  47. }
  48. $1 == "Author(s):" {
  49. author = "\tauthor = {";
  50. gsub(/Affiliation:.*/,"")
  51. firstauthor = 1;
  52. for (i=2;i<=NF;i++) {
  53. if ($i == ";") {
  54. $i = "and";
  55. firstauthor = 0;
  56. }
  57. author = author $i;
  58. if (firstauthor) {
  59. mainauthor = mainauthor tolower($i);
  60. }
  61. if (match($i,",")) {
  62. firstauthor = 0;
  63. }
  64. if (i<NF) {
  65. author = author " ";
  66. }
  67. }
  68. author = author "}";
  69. gsub(",","",mainauthor)
  70. }
  71. $1 == "Descriptor:" {
  72. inDescriptor = 1;
  73. gsub(/Descriptor:[ \t]+/,"")
  74. gsub(/\(Major\):[ \t]+/,"")
  75. keywords = "\tkeywords = {{" $0;
  76. }
  77. $1 == "Identifier:" {
  78. descriptor = 0;
  79. gsub(/Identifier:[ \t]+/,"")
  80. keywords = keywords "{" $0 "}}";
  81. }
  82. $1 == "Source:" {
  83. if ($2 == "In:") {
  84. type = 1; # In Book
  85. pages = "\tpages = {" $NF "}";
  86. gsub("-","--",pages)
  87. booktitle = "";
  88. for (i=NF-2;$i != "Ed;";i--) {
  89. if (booktitle == "") {
  90. booktitle = $i;
  91. } else {
  92. booktitle = $i " " booktitle;
  93. }
  94. }
  95. gsub(";","",booktitle);
  96. booktitle = "\tbooktitle = {" booktitle "}";
  97. gsub("\\.}","}",booktitle);
  98. editors = "";
  99. for (;i > 2;i--) {
  100. if (editors == "") {
  101. editors = $i;
  102. } else {
  103. editors = $i " " editors;
  104. }
  105. }
  106. gsub(" Ed;","",editors);
  107. gsub("; "," and ",editors);
  108. gsub(";","",editors);
  109. editors = "\teditors = {" editors "}";
  110. } else {
  111. type = 2; # Journal
  112. journal = "\tjournal = {";
  113. for (i=2;$i!="Vol";i++) {
  114. journal = journal $i " ";
  115. }
  116. journal = journal "}";
  117. i++;
  118. vol = $i;
  119. sub(/\(.*\),/,"",vol)
  120. volume = "\tvolume = {" vol "}"
  121. sub(/.*\(/,"",$i)
  122. sub(/\),/,"",$i)
  123. number = "\tnumber = {" $i "}"
  124. i++;
  125. if ($i+1 == 1) { # Skip the month if necessary
  126. i++;
  127. }
  128. sub(",","",$i);
  129. year = "\tyear = {" $i "}";
  130. sub("[0-9][0-9]","",$i);
  131. mainyear = $i;
  132. pages = "\tpages = {" $NF "}";
  133. gsub("-","--",pages)
  134. gsub("\\.","",pages)
  135. }
  136. }
  137. $1 == "Title:" {
  138. title = "\ttitle = {";
  139. for (i=2;i<=NF;i++) {
  140. if ($i == toupper($i)) {
  141. $i = "{" $i "}";
  142. } else {
  143. gsub(/[A-Z]/,"{&}",$i);
  144. }
  145. title = title ($i);
  146. if (i<NF) {
  147. title = title " ";
  148. }
  149. }
  150. title = title "}";
  151. gsub("\\.}","}",title);
  152. }
  153. $1 == "Abstract:" {
  154. gsub(/Abstract:[ \t]*/,"")
  155. abstract = "\tabstract = {" $0;
  156. inAbstract = 1;
  157. }
  158. ## Use the Accession No: for the year if the year has not been found yet.
  159. $1 == "Accession" {
  160. if (mainyear == "") {
  161. gsub(/-.*/,"",$3);
  162. year = "\tyear = {" $3 " (had to use heuristics to determine the year!)}";
  163. sub("[0-9][0-9]","",$3);
  164. mainyear = $3 "?";
  165. }
  166. }
  167. function printEntry() {
  168. if (mainauthor != "") {
  169. if (type == 1) { # In Book
  170. typestring = "InBook";
  171. } else {
  172. typestring = "Article";
  173. }
  174. print("@" typestring "{" mainauthor ":" mainyear "x,") >> tmpfile;
  175. print(author ",") >> tmpfile;
  176. print(title ",") >> tmpfile;
  177. print(year ",") >> tmpfile;
  178. if (type == 1) { # In Book
  179. print(booktitle ",") >> tmpfile;
  180. print(editors ",") >> tmpfile;
  181. print(pages ",") >> tmpfile;
  182. }
  183. if (type == 2) { # Article
  184. print(journal ",") >> tmpfile;
  185. print(volume ",") >> tmpfile;
  186. print(number ",") >> tmpfile;
  187. print(pages ",") >> tmpfile;
  188. }
  189. print(abstract ",") >> tmpfile;
  190. print(keywords) >> tmpfile;
  191. print("}") >> tmpfile;
  192. print("") >> tmpfile;
  193. print("") >> tmpfile;
  194. }
  195. mainauthor = "";
  196. mainyear = "";
  197. }
  198. NF == 0 {
  199. printEntry();
  200. }
  201. END {
  202. printEntry();
  203. system(prog " " tmpfile " " atEnd);
  204. }