/unixSoft/bin/OCLC-to-bibtex.awk
AWK | 244 lines | 182 code | 40 blank | 22 comment | 0 complexity | 2657f9c0278975191d0635d693241770 MD5 | raw file
- #
- # OCLC-to-bibtex.awk is an GAWK script to convert the export format of the
- # OCLC databases to BibTeX. It processes the input and tries to convert it into
- # BibTeX entries which are written to a file in /tmp. This file is then opened
- # using the program specified in "prog" (by default: emacsclient).
- #
- # NOTE: It does not do an extensive job of testing what kind of publications
- # are being processed. It has some rudimentary checks of discovering wether the
- # processed publications are either InBook's or Articles.
- #
- # Hedderik van Rijn, 020912-020914
- #
- # Do whatever you want with this script, but if you improve it, please send me a copy!
- # email: hvr-OCLC@van-rijn.org
- #
- BEGIN {
- tmpfile = "/tmp/tobib." systime() ".tmp.bib";
- oclc-version = "OLCL-to-bibtex v0.1";
- # External interactive progs
- # prog = "xless ";
- prog = "emacsclient ";
- # prog = "open -a TextEdit ";
- atEnd = "&";
- # (Indirect) Output to stdout
- # prog = "cat ";
- # atEnd = "";
- print "# Exported from the OLCL FirstSearch PsychINFO database using" olcl-version;
-
- }
- /* ------------------------------------------------------------------------- */
- (match($1,/[A-Za-z]+:/) || match($2,/[A-Za-z]+:/)) {
- if (inDescriptor == 1) {
- keywords = keywords "}";
- inDescriptor = 0;
- }
- if (inAbstract == 1) {
- abstract = abstract "}";
- inAbstract = 0;
- }
- }
- (!match($1,/[A-Za-z()]+:/) && !match($2,/[A-Za-z()]+:/)) {
- if (inDescriptor == 1) {
- keywords = keywords ", " $0;
- }
- if (inAbstract == 1) {
- abstract = abstract " " $0;
- }
- }
- $1 == "Author(s):" {
- author = "\tauthor = {";
- gsub(/Affiliation:.*/,"")
- firstauthor = 1;
- for (i=2;i<=NF;i++) {
- if ($i == ";") {
- $i = "and";
- firstauthor = 0;
- }
- author = author $i;
- if (firstauthor) {
- mainauthor = mainauthor tolower($i);
- }
- if (match($i,",")) {
- firstauthor = 0;
- }
-
- if (i<NF) {
- author = author " ";
- }
- }
- author = author "}";
- gsub(",","",mainauthor)
- }
- $1 == "Descriptor:" {
- inDescriptor = 1;
- gsub(/Descriptor:[ \t]+/,"")
- gsub(/\(Major\):[ \t]+/,"")
- keywords = "\tkeywords = {{" $0;
- }
- $1 == "Identifier:" {
- descriptor = 0;
- gsub(/Identifier:[ \t]+/,"")
- keywords = keywords "{" $0 "}}";
- }
- $1 == "Source:" {
- if ($2 == "In:") {
- type = 1; # In Book
-
- pages = "\tpages = {" $NF "}";
- gsub("-","--",pages)
- booktitle = "";
- for (i=NF-2;$i != "Ed;";i--) {
- if (booktitle == "") {
- booktitle = $i;
- } else {
- booktitle = $i " " booktitle;
- }
- }
- gsub(";","",booktitle);
- booktitle = "\tbooktitle = {" booktitle "}";
- gsub("\\.}","}",booktitle);
- editors = "";
- for (;i > 2;i--) {
- if (editors == "") {
- editors = $i;
- } else {
- editors = $i " " editors;
- }
- }
- gsub(" Ed;","",editors);
- gsub("; "," and ",editors);
- gsub(";","",editors);
- editors = "\teditors = {" editors "}";
- } else {
- type = 2; # Journal
-
- journal = "\tjournal = {";
- for (i=2;$i!="Vol";i++) {
- journal = journal $i " ";
- }
- journal = journal "}";
- i++;
- vol = $i;
- sub(/\(.*\),/,"",vol)
- volume = "\tvolume = {" vol "}"
- sub(/.*\(/,"",$i)
- sub(/\),/,"",$i)
- number = "\tnumber = {" $i "}"
- i++;
- if ($i+1 == 1) { # Skip the month if necessary
- i++;
- }
- sub(",","",$i);
- year = "\tyear = {" $i "}";
- sub("[0-9][0-9]","",$i);
- mainyear = $i;
- pages = "\tpages = {" $NF "}";
- gsub("-","--",pages)
- gsub("\\.","",pages)
- }
- }
- $1 == "Title:" {
- title = "\ttitle = {";
- for (i=2;i<=NF;i++) {
- if ($i == toupper($i)) {
- $i = "{" $i "}";
- } else {
- gsub(/[A-Z]/,"{&}",$i);
- }
- title = title ($i);
- if (i<NF) {
- title = title " ";
- }
- }
- title = title "}";
- gsub("\\.}","}",title);
- }
- $1 == "Abstract:" {
- gsub(/Abstract:[ \t]*/,"")
- abstract = "\tabstract = {" $0;
- inAbstract = 1;
- }
- ## Use the Accession No: for the year if the year has not been found yet.
- $1 == "Accession" {
- if (mainyear == "") {
- gsub(/-.*/,"",$3);
- year = "\tyear = {" $3 " (had to use heuristics to determine the year!)}";
- sub("[0-9][0-9]","",$3);
- mainyear = $3 "?";
- }
- }
- function printEntry() {
- if (mainauthor != "") {
- if (type == 1) { # In Book
- typestring = "InBook";
- } else {
- typestring = "Article";
- }
- print("@" typestring "{" mainauthor ":" mainyear "x,") >> tmpfile;
- print(author ",") >> tmpfile;
- print(title ",") >> tmpfile;
- print(year ",") >> tmpfile;
- if (type == 1) { # In Book
- print(booktitle ",") >> tmpfile;
- print(editors ",") >> tmpfile;
- print(pages ",") >> tmpfile;
- }
- if (type == 2) { # Article
- print(journal ",") >> tmpfile;
- print(volume ",") >> tmpfile;
- print(number ",") >> tmpfile;
- print(pages ",") >> tmpfile;
- }
- print(abstract ",") >> tmpfile;
- print(keywords) >> tmpfile;
- print("}") >> tmpfile;
- print("") >> tmpfile;
- print("") >> tmpfile;
- }
- mainauthor = "";
- mainyear = "";
- }
- NF == 0 {
- printEntry();
- }
- END {
- printEntry();
- system(prog " " tmpfile " " atEnd);
- }