PageRenderTime 49ms CodeModel.GetById 22ms RepoModel.GetById 1ms app.codeStats 0ms

/InfoTrac.js

https://gitlab.com/mba811/translators
JavaScript | 276 lines | 239 code | 24 blank | 13 comment | 115 complexity | 4a176d8de23625aa130209ea37224734 MD5 | raw file
  1. {
  2. "translatorID": "6773a9af-5375-3224-d148-d32793884dec",
  3. "label": "InfoTrac",
  4. "creator": "Simon Kornblith",
  5. "target": "^https?://[^/]+/itw/infomark/",
  6. "minVersion": "1.0.0b3.r1",
  7. "maxVersion": "",
  8. "priority": 100,
  9. "inRepository": true,
  10. "translatorType": 4,
  11. "browserSupport": "g",
  12. "lastUpdated": "2013-06-06 00:19:59"
  13. }
  14. function detectWeb(doc, url) {
  15. // ensure that there is an InfoTrac logo
  16. if(!doc.evaluate('//img[substring(@alt, 1, 8) = "InfoTrac"]', doc, null, XPathResult.ANY_TYPE, null).iterateNext()) return false;
  17. if(doc.title.substring(0, 8) == "Article ") {
  18. if (ZU.xpathText(doc, '//td//img[contains(@src, "ncnp_logo.gif")]/@title')) return "newspaperArticle";
  19. var genre = doc.evaluate('//comment()[substring(., 1, 6) = " Genre"]', doc, null, XPathResult.ANY_TYPE, null).iterateNext();
  20. if(genre) {
  21. var value = Zotero.Utilities.trimInternal(genre.nodeValue.substr(7));
  22. if(value == "article") {
  23. return "journalArticle";
  24. } else if(value == "book") {
  25. return "book";
  26. } else if(value == "dissertation") {
  27. return "thesis";
  28. } else if(value == "bookitem") {
  29. return "bookSection";
  30. }
  31. }
  32. return "magazineArticle";
  33. } else if(doc.title.substring(0, 10) == "Citations ") {
  34. return "multiple";
  35. }
  36. }
  37. function scrape(doc, url){
  38. var newItem = new Zotero.Item();
  39. var xpath = '/html/body//comment()';
  40. var elmts = doc.evaluate(xpath, doc, null, XPathResult.ANY_TYPE, null);
  41. var citation = ZU.xpath(doc, '//p/table/tbody//td/table/tbody[not(./script)]');
  42. newItem.title = ZU.xpathText(citation, './/font/b');
  43. newItem.itemType = "newspaperArticle";
  44. var author = ZU.xpathText(citation, './/td/i');
  45. if (author) newItem.creators.push(ZU.cleanAuthor(author, "author`"));
  46. var date = ZU.xpathText(citation, './/td/text()');
  47. if (date) date = date.match(/[A-Z][a-z]+\s\d+,\s\d{4}/);
  48. if (date) newItem.date = date[0];
  49. var pdfurl = ZU.xpathText(doc, '//blockquote/a[contains(@href, "!pdf")][1]/@href');
  50. if (pdfurl){
  51. newItem.attachments.push({url: pdfurl, title: "Infotrac Full Text PDF", mimeType: "application/pdf"})
  52. }
  53. newItem.attachments.push({document: doc, title: "Infotrac Snapshot", mimeType: "text/html"});
  54. while(elmt = elmts.iterateNext()) {
  55. var colon = elmt.nodeValue.indexOf(":");
  56. var field = elmt.nodeValue.substring(1, colon).toLowerCase();
  57. var value = elmt.nodeValue.substring(colon+1, elmt.nodeValue.length-1);
  58. if(field == "journal") {
  59. newItem.publicationTitle = value;
  60. }
  61. }
  62. if (newItem.publicationTitle.search(/\(.+\)/)){
  63. newItem.place = newItem.publicationTitle.match(/\((.+)\)/)[1];
  64. newItem.publicationTitle = newItem.publicationTitle.replace(/\(.+\).*/, "");
  65. }
  66. newItem.complete();
  67. }
  68. function extractCitation(url, elmts, title, doc) {
  69. var newItem = new Zotero.Item();
  70. newItem.url = url;
  71. if(title) {
  72. newItem.title = Zotero.Utilities.superCleanString(title);
  73. }
  74. newItem.title = ZU.xpathText(citation, './/font/b');
  75. newItem.itemType = "newspaperArticle";
  76. var date = ZU.xpathText(citation, './/td/text()');
  77. if (date) date = date.match(/[A-Z][a-z]+\s\d+,\s\d{4}/);
  78. if (date) newItem.date = date[0];
  79. while(elmt = elmts.iterateNext()) {
  80. var colon = elmt.nodeValue.indexOf(":");
  81. var field = elmt.nodeValue.substring(1, colon).toLowerCase();
  82. var value = elmt.nodeValue.substring(colon+1, elmt.nodeValue.length-1);
  83. if(field == "title") {
  84. newItem.title = Zotero.Utilities.superCleanString(value);
  85. } else if(field == "journal") {
  86. newItem.publicationTitle = value;
  87. } else if(field == "pi") {
  88. parts = value.split(" ");
  89. var date = "";
  90. var field = null;
  91. for(j in parts) {
  92. firstChar = parts[j].substring(0, 1);
  93. if(firstChar == "v") {
  94. newItem.itemType = "journalArticle";
  95. field = "volume";
  96. } else if(firstChar == "i") {
  97. field = "issue";
  98. } else if(firstChar == "p") {
  99. field = "pages";
  100. var pagesRegexp = /p(\w+)\((\w+)\)/; // weird looking page range
  101. var match = pagesRegexp.exec(parts[j]);
  102. if(match) { // yup, it's weird
  103. var finalPage = parseInt(match[1])+parseInt(match[2])
  104. parts[j] = "p"+match[1]+"-"+finalPage.toString();
  105. } else if(!newItem.itemType) { // no, it's normal
  106. // check to see if it's numeric, bc newspaper pages aren't
  107. var justPageNumber = parts[j].substr(1);
  108. if(parseInt(justPageNumber).toString() != justPageNumber) {
  109. newItem.itemType = "newspaperArticle";
  110. }
  111. }
  112. } else if(!field) { // date parts at the beginning, before
  113. // anything else
  114. date += " "+parts[j];
  115. }
  116. if(field) {
  117. isDate = false;
  118. if(parts[j] != "pNA") { // make sure it's not an invalid
  119. // page number
  120. // chop of letter
  121. newItem[field] = parts[j].substring(1);
  122. } else if(!newItem.itemType) { // only newspapers are missing
  123. // page numbers on infotrac
  124. newItem.itemType = "newspaperArticle";
  125. }
  126. }
  127. }
  128. // Set type
  129. if(!newItem.itemType) {
  130. newItem.itemType = "magazineArticle";
  131. }
  132. if(date != "") {
  133. newItem.date = date.substring(1);
  134. }
  135. } else if(field == "author") {
  136. var author = Zotero.Utilities.cleanAuthor(value, "author", true);
  137. // ensure author is not already there
  138. var add = true;
  139. for each(var existingAuthor in newItem.creators) {
  140. if(existingAuthor.firstName == author.firstName && existingAuthor.lastName == author.lastName) {
  141. add = false;
  142. break;
  143. }
  144. }
  145. if(add) newItem.creators.push(author);
  146. } else if(field == "issue") {
  147. newItem.issue = value;
  148. } else if(field == "volume") {
  149. newItem.volume = value;
  150. } else if(field == "issn") {
  151. newItem.ISSN = value;
  152. } else if(field == "gjd") {
  153. var m = value.match(/\(([0-9]{4}[^\)]*)\)(?:, pp\. ([0-9\-]+))?/);
  154. if(m) {
  155. newItem.date = m[1];
  156. newItem.pages = m[2];
  157. }
  158. } else if(field == "BookTitle") {
  159. newItem.publicationTitle = value;
  160. } else if(field == "genre") {
  161. value = value.toLowerCase();
  162. if(value == "article") {
  163. newItem.itemType = "journalArticle";
  164. } else if(value == "book") {
  165. newItem.itemType = "book";
  166. } else if(value == "dissertation") {
  167. newItem.itemType = "thesis";
  168. } else if(value == "bookitem") {
  169. newItem.itemType = "bookSection";
  170. }
  171. }
  172. }
  173. if(doc) {
  174. newItem.attachments.push({document:doc, title:"InfoTrac Snapshot"});
  175. } else {
  176. newItem.attachments.push({url:url, title:"InfoTrac Snapshot",
  177. mimeType:"text/html"});
  178. }
  179. newItem.complete();
  180. }
  181. function doWeb(doc, url) {
  182. var ncnp;
  183. if (ZU.xpathText(doc, '//td//img[contains(@src, "ncnp_logo.gif")]/@title')) ncnp = true;
  184. /*the only Infotrac Site that's still up & I'm aware of is 19th Century Newspapers.
  185. But there may well be others, so I'm leaving a lot of legacy code in just in case */
  186. var uri = doc.location.href;
  187. if(doc.title.substring(0, 8) == "Article ") { // article
  188. if(ncnp) scrape(doc, url);
  189. else{
  190. var xpath = '/html/body//comment()';
  191. var elmts = doc.evaluate(xpath, doc, null, XPathResult.ANY_TYPE, null);
  192. extractCitation(uri, elmts);
  193. }
  194. } else { // search results
  195. var items = new Object();
  196. var uris = new Array();
  197. var elmts = new Array();
  198. var host = doc.location.href.match(/^https?:\/\/[^\/]+/)[0];
  199. var baseurl = doc.location.href.match(/(.+)\/purl=/);
  200. var institution = url.match(/\?sw_aep=.+/)[0];
  201. var tableRows = doc.evaluate('/html/body//table/tbody/tr/td[b or strong]', doc, null,
  202. XPathResult.ANY_TYPE, null);
  203. var tableRow;
  204. var javaScriptRe = /'([^']*)' *, *'([^']*)'/
  205. var i = 0;
  206. // Go through table rows
  207. if(ncnp){
  208. while(tableRow = tableRows.iterateNext()) {
  209. var title = ZU.trimInternal(ZU.xpathText(tableRow, './strong'));
  210. var link = ZU.xpathText(tableRow, './a[1]/@href');
  211. link = link.match(/\(\'(\/.+)\',\'/)[1];
  212. link = baseurl[1] + link + institution;
  213. //Z.debug(link)
  214. items[link] = title;
  215. }
  216. Zotero.selectItems(items, function (items) {
  217. if (!items) {
  218. return true;
  219. }
  220. for (var i in items) {
  221. uris.push(i);
  222. }
  223. Zotero.Utilities.processDocuments(uris, scrape)
  224. });
  225. }
  226. else{
  227. while(tableRow = tableRows.iterateNext()) {
  228. var link = doc.evaluate('./a', tableRow, null, XPathResult.ANY_TYPE, null).iterateNext();
  229. var m = javaScriptRe.exec(link.href);
  230. if(m) {
  231. uris[i] = host+"/itw/infomark/192/215/90714844w6"+m[1]+"?sw_aep=olr_wad"+m[2];
  232. }
  233. var article = doc.evaluate('./b/text()|./strong/text', link, null, XPathResult.ANY_TYPE, null).iterateNext();
  234. items[i] = article.nodeValue;
  235. // Chop off final period
  236. if(items[i].substr(items[i].length-1) == ".") {
  237. items[i] = items[i].substr(0, items[i].length-1);
  238. }
  239. elmts[i] = doc.evaluate(".//comment()", tableRow, null, XPathResult.ANY_TYPE, null);
  240. citation[i] = ZU.xpath(tableRow, '//')
  241. i++;
  242. }
  243. items = Zotero.selectItems(items);
  244. if(!items) {
  245. return true;
  246. }
  247. for(var i in items) {
  248. extractCitation(uris[i], elmts[i], items[i]);
  249. }
  250. }
  251. }
  252. }