PageRenderTime 42ms CodeModel.GetById 17ms RepoModel.GetById 0ms app.codeStats 0ms

/Gallica.js

https://gitlab.com/mba811/translators
JavaScript | 296 lines | 257 code | 23 blank | 16 comment | 74 complexity | 8438cbfa6d4984fa37cd77141e227fa9 MD5 | raw file
  1. {
  2. "translatorID": "58ab2618-4a25-4b9b-83a7-80cd0259f896",
  3. "label": "Gallica",
  4. "creator": "Sylvain Machefert",
  5. "target": "^https?://gallica\\.bnf\\.fr",
  6. "minVersion": "1.0.0b3.r1",
  7. "maxVersion": "",
  8. "priority": 100,
  9. "inRepository": true,
  10. "translatorType": 4,
  11. "browserSupport": "gcsbv",
  12. "lastUpdated": "2014-04-04 10:09:18"
  13. }
  14. function detectWeb(doc, url) {
  15. var indexSearch = url.toString().indexOf('http://gallica.bnf.fr/Search');
  16. var indexArk = url.toString().indexOf('http://gallica.bnf.fr/ark:');
  17. var indexSNE = url.toString().indexOf('http://gallica.bnf.fr/VisuSNE');
  18. if (indexSearch == 0)
  19. {
  20. var errorXpath = '//div[@class="errorMessage"]';
  21. if (elt = doc.evaluate(errorXpath, doc, null, XPathResult.ANY_TYPE, null).iterateNext()) {
  22. // We are on a search page result but it can be an empty result page.
  23. // Nothing to return;
  24. }
  25. else
  26. {
  27. return "multiple";
  28. }
  29. }
  30. else if (indexArk == 0)
  31. {
  32. var iconxpath = '//div[@class="contenu1"]/img';
  33. if (elt = doc.evaluate(iconxpath, doc, null, XPathResult.ANY_TYPE, null).iterateNext())
  34. {
  35. var icon = elt.getAttribute('src');
  36. return getDoctypeGallica(icon);
  37. }
  38. // For some biblio, the icon picture is located in another div ...
  39. var iconxpath = '//div[@class="titrePeriodiqueGauche"]/img';
  40. if (elt = doc.evaluate(iconxpath, doc, null,
  41. XPathResult.ANY_TYPE, null).iterateNext())
  42. {
  43. var icon = elt.getAttribute('src');
  44. return getDoctypeGallica(icon);
  45. }
  46. }
  47. else if (indexSNE == 0)
  48. {
  49. return "book";
  50. }
  51. }
  52. // This function takes the name of the icon, and returns the Zotero item name
  53. function getDoctypeGallica(img)
  54. {
  55. var iconname = img.substring(img.lastIndexOf('/') + 1);
  56. if (iconname =='livre_a.png')
  57. {
  58. return "book";
  59. }
  60. else if (iconname == 'carte.png')
  61. {
  62. return "map";
  63. }
  64. else if (iconname == 'images.png')
  65. {
  66. return "artwork";
  67. }
  68. else if (iconname == 'docsonore.png')
  69. {
  70. return "audioRecording";
  71. }
  72. else if (iconname == 'musiquenotee.png')
  73. {
  74. // This icon is for Sheet music type. But no Zotero type matches
  75. // as of today (2010-02)
  76. return "book";
  77. }
  78. else if ( (iconname == 'picto_type_document1.png') || (iconname == 'perio_vol_ocr.png') )
  79. {
  80. return "book";
  81. }
  82. else
  83. {
  84. Zotero.debug("Undefined icon : " + iconname);
  85. return "book";
  86. }
  87. }
  88. function doWeb(doc, url) {
  89. if (detectWeb(doc, url) == "multiple")
  90. {
  91. var availableItems = {};
  92. var xpath = '//div[@class="resultats_line"]';
  93. var elmts = doc.evaluate(xpath, doc, null, XPathResult.ANY_TYPE, null);
  94. var elmt = elmts.iterateNext();
  95. var itemsId = new Array();
  96. var i = 1;
  97. do {
  98. var id = doc.evaluate('.//div[@class="resultat_id"]', elmt, null, XPathResult.ANY_TYPE, null).iterateNext().textContent;
  99. var this_result = doc.evaluate('div[@class="resultat_desc"]/div[@class="titre"]/a', elmt, null, XPathResult.ANY_TYPE, null).iterateNext();
  100. availableItems[i] = Zotero.Utilities.cleanTags(this_result.getAttribute('title'));
  101. i++;
  102. } while (elmt = elmts.iterateNext());
  103. Z.selectItems(availableItems, function(items) {
  104. for (var i in items) {
  105. // All informations are available on search result page. We don't need to query
  106. // every subpage with scrape. We'are going to call the special Gallica scrape function
  107. // This function (scrapeGallica) is reused in scrape.
  108. var fullpath = '//div[@class="resultats_line"][' + i + ']';
  109. var item_element = doc.evaluate(fullpath, doc, null, XPathResult.ANY_TYPE, null).iterateNext();
  110. if (item_element != undefined)
  111. {
  112. var detail = doc.evaluate('.//div[@class="notice"]', item_element, null, XPathResult.ANY_TYPE, null).iterateNext();
  113. var iconType = doc.evaluate('.//span[@class="picto"]/img', item_element, null, XPathResult.ANY_TYPE, null).iterateNext();
  114. var docType = getDoctypeGallica(iconType.getAttribute('src'));
  115. var docUrl = doc.evaluate('.//div[@class="liens"]/a', item_element, null, XPathResult.ANY_TYPE, null).iterateNext();
  116. docUrl = docUrl.getAttribute("href");
  117. scrapeGallica(doc, detail, docType, docUrl);
  118. }
  119. }
  120. })
  121. }
  122. else
  123. {
  124. var docType = detectWeb(doc, url);
  125. var xpath = '//div[@class="notice"]';
  126. var detail = doc.evaluate(xpath, doc, null, XPathResult.ANY_TYPE, null).iterateNext();
  127. scrapeGallica(doc, detail, docType, "");
  128. }
  129. }
  130. function scrapeGallica(doc, div, type, direct_url)
  131. {
  132. var item = new Zotero.Item;
  133. item.itemType = type;
  134. var elmts = doc.evaluate('p', div, null, XPathResult.ANY_TYPE, null);
  135. var elmt = elmts.iterateNext();
  136. do {
  137. var text = Zotero.Utilities.trimInternal(elmt.textContent);
  138. var contenu = '';
  139. if (contenu = text.split(/^(Titre|Title|Título) : /)[2])
  140. {
  141. item.title = Zotero.Utilities.trimInternal(contenu);
  142. }
  143. else if ( contenu = text.split(/^(Auteur|Author|Autor) : /)[2])
  144. {
  145. contenu = contenu.replace(/(See only the results matching this author|Ne voir que les résultats de cet auteur)/, '').replace(/\(.+?\)/, "");
  146. if (type == 'artwork')
  147. {
  148. item.creators.push(Zotero.Utilities.cleanAuthor(contenu, "artist", true));
  149. }
  150. else
  151. {
  152. item.creators.push(Zotero.Utilities.cleanAuthor(contenu, "author", true));
  153. }
  154. }
  155. else if ( contenu = text.split(/^(Publisher|Éditeur|Editor) : /)[2])
  156. {
  157. item.publisher = Zotero.Utilities.trimInternal(contenu);
  158. }
  159. else if ( contenu = text.split(/^(Date of publication|Date d'édition|Data de publicação|Fecha de publicación) : /)[2])
  160. {
  161. item.date = Zotero.Utilities.trimInternal(contenu);
  162. }
  163. else if ( contenu = text.split(/^(Contributeur|Contributor|Contribuidor) : /)[2])
  164. {
  165. item.creators.push(Zotero.Utilities.cleanAuthor(contenu, "contributor", true));
  166. }
  167. else if ( contenu = text.split(/^(Language|Langue|Língua|Idioma) : /)[2])
  168. {
  169. item.language = Zotero.Utilities.trimInternal(contenu);
  170. }
  171. else if ( contenu = text.split(/^(Format|Formato) : /)[2])
  172. {
  173. // This field contains : application/pdf for example.
  174. }
  175. else if ( contenu = text.split(/^(Copyright|Droits|Direitos) : /)[2])
  176. {
  177. item.rights = Zotero.Utilities.trimInternal(contenu);
  178. }
  179. else if (contenu = text.split(/^(Identifier|Identifiant|Senha) : /)[2])
  180. {
  181. var temp = '';
  182. if (temp = contenu.split(/^ISSN /)[1])
  183. {
  184. item.ISSN = temp;
  185. }
  186. else if (contenu.match(/^https?:\/\//))
  187. {
  188. // If identifier starts with http it is the url of the document
  189. item.url = contenu;
  190. }
  191. else if (contenu.match(/^ark:/))
  192. {
  193. item.url = "http://gallica.bnf.fr/" + contenu;
  194. }
  195. }
  196. else if (contenu = text.split(/^(Description|Descrição) : /)[2])
  197. {
  198. var temp = '';
  199. if (temp = contenu.split(/^Variante\(s\) de titre : /)[1])
  200. {
  201. // Alternative title : no field in zotero ?
  202. // Zotero.debug("Titre : " + temp);
  203. }
  204. else if (temp = contenu.split(/^Collection : /)[1])
  205. {
  206. item.collection = temp;
  207. }
  208. else
  209. {
  210. // Zotero.debug(contenu);
  211. }
  212. }
  213. else if (contenu = text.split(/^(Sujet|Assunto|Tema|Subject) : /)[2])
  214. {
  215. var tagList = contenu.split(/; ?/);
  216. for (var tag in tagList)
  217. {
  218. item.tags.push(Zotero.Utilities.trimInternal(tagList[tag]));
  219. }
  220. }
  221. } while (elmt = elmts.iterateNext());
  222. if ( (item.url == "") || (item.url == undefined) )
  223. {
  224. if (direct_url != "")
  225. {
  226. item.url = "http://gallica.bnf.fr" + direct_url;
  227. }
  228. else
  229. {
  230. item.url = doc.location.href;
  231. }
  232. }
  233. item.complete();
  234. }
  235. /** BEGIN TEST CASES **/
  236. var testCases = [
  237. {
  238. "type": "web",
  239. "url": "http://gallica.bnf.fr/ark:/12148/bpt6k58121413.r=cervantes.langEN",
  240. "items": [
  241. {
  242. "itemType": "book",
  243. "creators": [
  244. {
  245. "firstName": "Édouard",
  246. "lastName": "Cat",
  247. "creatorType": "author"
  248. }
  249. ],
  250. "notes": [],
  251. "tags": [
  252. "Cervantes Saavedra, Miguel de (1547-1616)"
  253. ],
  254. "seeAlso": [],
  255. "attachments": [],
  256. "title": "Miguel Cervantès / par É. Cat,...",
  257. "publisher": "Gedalge (Paris)",
  258. "date": "1892",
  259. "language": "Français",
  260. "rights": "domaine public",
  261. "url": "http://gallica.bnf.fr/ark:/12148/bpt6k58121413",
  262. "libraryCatalog": "Gallica",
  263. "accessDate": "CURRENT_TIMESTAMP"
  264. }
  265. ]
  266. },
  267. {
  268. "type": "web",
  269. "url": "http://gallica.bnf.fr/Search?ArianeWireIndex=index&p=1&lang=EN&q=cervantes",
  270. "items": "multiple"
  271. }
  272. ]
  273. /** END TEST CASES **/