PageRenderTime 49ms CodeModel.GetById 13ms RepoModel.GetById 1ms app.codeStats 0ms

/CNKI.js

https://gitlab.com/mba811/translators
JavaScript | 225 lines | 160 code | 28 blank | 37 comment | 35 complexity | 41405d0b04e8a7e1a30b694345a66387 MD5 | raw file
  1. {
  2. "translatorID": "5c95b67b-41c5-4f55-b71a-48d5d7183063",
  3. "label": "CNKI",
  4. "creator": "Aurimas Vinckevicius",
  5. "target": "^https?://(?:[^/]+\\.)?cnki.net",
  6. "minVersion": "3.0",
  7. "maxVersion": "",
  8. "priority": 100,
  9. "inRepository": true,
  10. "translatorType": 4,
  11. "browserSupport": "gcs",
  12. "lastUpdated": "2013-08-25 04:10:34"
  13. }
  14. /*
  15. CNKI(China National Knowledge Infrastructure) Translator
  16. Copyright (C) 2013 Aurimas Vinckevicius
  17. This program is free software: you can redistribute it and/or modify
  18. it under the terms of the GNU General Public License as published by
  19. the Free Software Foundation, either version 3 of the License, or
  20. (at your option) any later version.
  21. This program is distributed in the hope that it will be useful,
  22. but WITHOUT ANY WARRANTY; without even the implied warranty of
  23. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  24. GNU General Public License for more details.
  25. You should have received a copy of the GNU General Public License
  26. along with this program. If not, see <http://www.gnu.org/licenses/>.
  27. */
  28. // fetches Refworks record for provided IDs and calls next with resulting text
  29. // ids should be in the form [{dbname: "CDFDLAST2013", filename: "1013102302.nh"}]
  30. function getRefworksByID(ids, next) {
  31. var postData = "";
  32. for(var i=0, n=ids.length; i<n; i++) {
  33. postData += ids[i].dbname + "!" + ids[i].filename + "!0!0,";
  34. }
  35. postData = "formfilenames=" + encodeURIComponent(postData);
  36. ZU.doPost('http://epub.cnki.net/kns/ViewPage/viewsave.aspx?TablePre=SCDB', postData, function() {
  37. ZU.doPost(
  38. 'http://epub.cnki.net/KNS/ViewPage/SaveSelectedNoteFormat.aspx?type=txt',
  39. 'CurSaveModeType=REFWORKS',
  40. function(text) {
  41. //fix item types
  42. text = text.replace(/^RT\s+Dissertation\/Thesis/gmi, 'RT Dissertation')
  43. //Zotero doesn't do well with mixed line endings. Make everything \n
  44. .replace(/\r\n?/g, '\n')
  45. //split authors
  46. .replace(/^(A[1-4]|U2)\s*([^\r\n]+)/gm, function(m, tag, authors) {
  47. var authors = authors.split(/\s*[;,]\s*/); //that's a special comma
  48. if(!authors[authors.length-1].trim()) authors.pop();
  49. return tag + ' ' + authors.join('\n' + tag + ' ');
  50. });
  51. next(text);
  52. }
  53. );
  54. });
  55. }
  56. function getIDFromURL(url) {
  57. if(!url) return;
  58. var dbname = url.match(/[?&]dbname=([^&#]*)/i);
  59. var filename = url.match(/[?&]filename=([^&#]*)/i);
  60. if(!dbname || !dbname[1] || !filename || !filename[1]) return;
  61. return {dbname: dbname[1], filename: filename[1], url: url};
  62. }
  63. function getIDFromPage(doc, url) {
  64. return getIDFromURL(url)
  65. || getIDFromURL(ZU.xpathText(doc, '//div[@class="zwjdown"]/a/@href'));
  66. }
  67. function getTypeFromDBName(dbname) {
  68. switch(dbname.substr(0,4).toUpperCase()) {
  69. case "CJFQ":
  70. case "CJFD":
  71. case "CAPJ":
  72. return "journalArticle";
  73. case "CDFD":
  74. case "CMFD":
  75. case "CLKM":
  76. return "thesis";
  77. case "CPFD":
  78. return "conferencePaper";
  79. case "CCND":
  80. return "newspaperArticle";
  81. default:
  82. return;
  83. }
  84. }
  85. function getItemsFromSearchResults(doc, url, itemInfo) {
  86. var links = ZU.xpath(doc, '//tr[not(.//tr) and .//a[@class="fz14"]]');
  87. var aXpath = './/a[@class="fz14"]';
  88. if(!links.length) {
  89. links = ZU.xpath(doc, '//table[@class="GridTableContent"]/tbody/tr[./td[2]/a]');
  90. aXpath = './td[2]/a';
  91. }
  92. if(!links.length) return;
  93. var items = {};
  94. var count = 0;
  95. for(var i=0, n=links.length; i<n; i++) {
  96. var a = ZU.xpath(links[i], aXpath)[0];
  97. var title = ZU.xpathText(a, './node()[not(name()="SCRIPT")]', null, '');
  98. if(title) title = ZU.trimInternal(title);
  99. var id = getIDFromURL(a.href);
  100. if(!title || !id) continue;
  101. count++;
  102. if(itemInfo) {
  103. itemInfo[a.href] = {id: id};
  104. /*var pdfLink = ZU.xpath(links[i], './/a[@class="brief_downloadIcon"]')[0];
  105. if(pdfLink) itemInfo[a.href].pdfURL = pdfLink.href;*/
  106. }
  107. items[a.href] = title;
  108. }
  109. if(count) return items;
  110. }
  111. function detectWeb(doc, url) {
  112. var id = getIDFromPage(doc, url);
  113. Z.debug(id);
  114. if(id) {
  115. return getTypeFromDBName(id.dbname);
  116. }
  117. var items = getItemsFromSearchResults(doc, url);
  118. if(items) return "multiple";
  119. }
  120. function doWeb(doc, url) {
  121. if(detectWeb(doc, url) == "multiple") {
  122. var itemInfo = {};
  123. var items = getItemsFromSearchResults(doc, url, itemInfo);
  124. Z.selectItems(items, function(selectedItems) {
  125. if(!selectedItems) return true;
  126. var itemInfoByTitle = {};
  127. var ids = [];
  128. for(var url in selectedItems) {
  129. ids.push(itemInfo[url].id);
  130. itemInfoByTitle[selectedItems[url]] = itemInfo[url];
  131. itemInfoByTitle[selectedItems[url]].url = url;
  132. }
  133. scrape(ids, doc, url, itemInfoByTitle);
  134. });
  135. } else {
  136. scrape([getIDFromPage(doc, url)], doc, url);
  137. }
  138. }
  139. function scrape(ids, doc, url, itemInfo) {
  140. getRefworksByID(ids, function(text) {
  141. Z.debug(text);
  142. var translator = Z.loadTranslator('import');
  143. translator.setTranslator('1a3506da-a303-4b0a-a1cd-f216e6138d86'); //Refworks
  144. translator.setString(text);
  145. var i = 0;
  146. translator.setHandler('itemDone', function(obj, newItem) {
  147. //split names
  148. for(var i=0, n=newItem.creators.length; i<n; i++) {
  149. var creator = newItem.creators[i];
  150. if(creator.firstName) continue;
  151. var lastSpace = creator.lastName.lastIndexOf(' ');
  152. if(creator.lastName.search(/[A-Za-z]/) !== -1 && lastSpace !== -1) {
  153. //western name. split on last space
  154. creator.firstName = creator.lastName.substr(0,lastSpace);
  155. creator.lastName = creator.lastName.substr(lastSpace+1);
  156. } else {
  157. //Chinese name. first character is last name, the rest are first name
  158. creator.firstName = creator.lastName.substr(1);
  159. creator.lastName = creator.lastName.charAt(0);
  160. }
  161. }
  162. if(newItem.abstractNote) {
  163. newItem.abstractNote = newItem.abstractNote.replace(/\s*[\r\n]\s*/g, '\n');
  164. }
  165. //clean up tags. Remove numbers from end
  166. for(var i=0, n=newItem.tags.length; i<n; i++) {
  167. newItem.tags[i] = newItem.tags[i].replace(/:\d+$/, '');
  168. }
  169. newItem.title = ZU.trimInternal(newItem.title);
  170. if(itemInfo) {
  171. var info = itemInfo[newItem.title];
  172. if(!info) {
  173. Z.debug('No item info for "' + newItem.title + '"');
  174. } else {
  175. /*if(!info.pdfURL) {
  176. Z.debug('No PDF URL passed from multiples page');
  177. } else {
  178. newItem.attachments.push({
  179. title: 'Full Text PDF',
  180. mimeType: 'application/pdf',
  181. url: info.pdfURL
  182. })
  183. }*/
  184. newItem.url = info.url;
  185. }
  186. } else {
  187. newItem.url = url;
  188. }
  189. i++;
  190. newItem.complete();
  191. });
  192. translator.translate();
  193. })
  194. }