PageRenderTime 45ms CodeModel.GetById 18ms RepoModel.GetById 0ms app.codeStats 0ms

/lib/pkp/classes/citation/parser/paracite/ParaciteRawCitationNlmCitationSchemaFilter.inc.php

https://github.com/lib-uoguelph-ca/ocs
PHP | 283 lines | 167 code | 34 blank | 82 comment | 23 complexity | ff8e2be1a5b5af4dd1d4f776fbba7cb2 MD5 | raw file
Possible License(s): GPL-2.0
  1. <?php
  2. /**
  3. * @defgroup citation_parser_paracite
  4. */
  5. /**
  6. * @file classes/citation/parser/paracite/ParaciteRawCitationNlmCitationSchemaFilter.inc.php
  7. *
  8. * Copyright (c) 2000-2012 John Willinsky
  9. * Distributed under the GNU GPL v2. For full terms see the file docs/COPYING.
  10. *
  11. * @class ParaciteRawCitationNlmCitationSchemaFilter
  12. * @ingroup citation_parser_paracite
  13. *
  14. * @brief Paracite parsing filter implementation.
  15. *
  16. * The paracite parsing filter has one parameter: the citation module
  17. * to be used. This can be one of "Standard", "Citebase" or "Jiao".
  18. *
  19. * If you want to use various modules at the same time then you have
  20. * to instantiate this parser filter several times with different
  21. * configuration and chain all instances.
  22. *
  23. * NB: This filter requires perl and CPAN's Biblio::Citation::Parser
  24. * and Text::Unidecode packages to be installed on the server. It also
  25. * requires the PHP shell_exec() function to be available which is often
  26. * disabled in shared hosting environments.
  27. */
  28. // $Id$
  29. import('citation.NlmCitationSchemaFilter');
  30. import('metadata.nlm.OpenUrlNlmCitationSchemaCrosswalkFilter');
  31. import('metadata.openurl.OpenUrlBookSchema');
  32. import('metadata.openurl.OpenUrlJournalSchema');
  33. define('CITATION_PARSER_PARACITE_STANDARD', 'Standard');
  34. define('CITATION_PARSER_PARACITE_CITEBASE', 'Citebase');
  35. define('CITATION_PARSER_PARACITE_JIAO', 'Jiao');
  36. class ParaciteRawCitationNlmCitationSchemaFilter extends NlmCitationSchemaFilter {
  37. /** @var string the paracite citation parser module to be used (default: 'Standard') */
  38. var $_citationModule;
  39. /*
  40. * Constructor
  41. */
  42. function ParaciteRawCitationNlmCitationSchemaFilter($citationModule = CITATION_PARSER_PARACITE_STANDARD) {
  43. assert(in_array($citationModule, ParaciteRawCitationNlmCitationSchemaFilter::getSupportedCitationModules()));
  44. $this->_citationModule = $citationModule;
  45. parent::NlmCitationSchemaFilter();
  46. }
  47. //
  48. // Getters and Setters
  49. //
  50. /**
  51. * get the citationModule
  52. * @return string
  53. */
  54. function getCitationModule() {
  55. return $this->_citationModule;
  56. }
  57. //
  58. // Implement template methods from Filter
  59. //
  60. /**
  61. * @see Filter::supports()
  62. * @param $input mixed
  63. * @param $output mixed
  64. * @return boolean
  65. */
  66. function supports(&$input, &$output) {
  67. return parent::supports($input, $output, true);
  68. }
  69. /**
  70. * @see Filter::process()
  71. * @param $citationString string
  72. * @return MetadataDescription
  73. */
  74. function &process($citationString) {
  75. $nullVar = null;
  76. // Check the availability of perl
  77. $perlCommand = Config::getVar('cli', 'perl');
  78. if (empty($perlCommand) || !file_exists($perlCommand)) return $nullVar;
  79. // Convert to ASCII - Paracite doesn't handle UTF-8 well
  80. $citationString = String::utf8_to_ascii($citationString);
  81. // Call the paracite parser
  82. $wrapperScript = dirname(__FILE__).DIRECTORY_SEPARATOR.'paracite.pl';
  83. $paraciteCommand = $perlCommand.' '.escapeshellarg($wrapperScript).' '.
  84. $this->_citationModule.' '.escapeshellarg($citationString);
  85. $xmlResult = shell_exec($paraciteCommand);
  86. if (empty($xmlResult)) return $nullVar;
  87. if ( Config::getVar('i18n', 'charset_normalization') == 'On' && !String::utf8_compliant($xmlResult) ) {
  88. $xmlResult = String::utf8_normalize($xmlResult);
  89. }
  90. // Create a temporary DOM document
  91. $resultDOM = new DOMDocument();
  92. $resultDOM->recover = true;
  93. $resultDOM->loadXML($xmlResult);
  94. // Extract the parser results as an array
  95. $xmlHelper = new XMLHelper();
  96. $metadata = $xmlHelper->xmlToArray($resultDOM->documentElement);
  97. // We have to merge subtitle and title as neither OpenURL
  98. // nor NLM can handle subtitles.
  99. if (isset($metadata['subtitle'])) {
  100. $metadata['title'] .= '. '.$metadata['subtitle'];
  101. unset($metadata['subtitle']);
  102. }
  103. // Break up the authors field
  104. if (isset($metadata['authors'])) {
  105. $metadata['authors'] = String::trimPunctuation($metadata['authors']);
  106. $metadata['authors'] = String::iterativeExplode(array(':', ';'), $metadata['authors']);
  107. }
  108. // Convert pages to integers
  109. foreach(array('spage', 'epage') as $pageProperty) {
  110. if (isset($metadata[$pageProperty])) $metadata[$pageProperty] = (integer)$metadata[$pageProperty];
  111. }
  112. // Convert titles to title case
  113. foreach(array('title', 'chapter', 'publication') as $titleProperty) {
  114. if (isset($metadata[$titleProperty])) $metadata[$titleProperty] = String::titleCase($metadata[$titleProperty]);
  115. }
  116. // Map ParaCite results to OpenURL - null means
  117. // throw the value away.
  118. $metadataMapping = array(
  119. 'genre' => 'genre',
  120. '_class' => null,
  121. 'any' => null,
  122. 'authors' => 'au',
  123. 'aufirst' => 'aufirst',
  124. 'aufull' => null,
  125. 'auinit' => 'auinit',
  126. 'aulast' => 'aulast',
  127. 'atitle' => 'atitle',
  128. 'cappublication' => null,
  129. 'captitle' => null,
  130. 'date' => 'date',
  131. 'epage' => 'epage',
  132. 'featureID' => null,
  133. 'id' => null,
  134. 'issue' => 'issue',
  135. 'jnl_epos' => null,
  136. 'jnl_spos' => null,
  137. 'match' => null,
  138. 'marked' => null,
  139. 'num_of_fig' => null,
  140. 'pages' => 'pages',
  141. 'publisher' => 'pub',
  142. 'publoc' => 'place',
  143. 'ref' => null,
  144. 'rest_text' => null,
  145. 'spage' => 'spage',
  146. 'targetURL' => 'url',
  147. 'text' => null,
  148. 'ucpublication' => null,
  149. 'uctitle' => null,
  150. 'volume' => 'volume',
  151. 'year' => 'date'
  152. );
  153. // Ignore 'year' if 'date' is set
  154. if (isset($metadata['date'])) {
  155. $metadataMapping['year'] = null;
  156. }
  157. // Set default genre
  158. if (empty($metadata['genre'])) $metadata['genre'] = 'article';
  159. // Handle title, chapter and publication depending on
  160. // the (inferred) genre. Also instantiate the target schema.
  161. switch($metadata['genre']) {
  162. case OPENURL_GENRE_BOOK:
  163. case OPENURL_GENRE_BOOKITEM:
  164. case OPENURL_GENRE_REPORT:
  165. case OPENURL_GENRE_DOCUMENT:
  166. $metadataMapping += array(
  167. 'publication' => 'btitle',
  168. 'chapter' => 'atitle'
  169. );
  170. if (isset($metadata['title'])) {
  171. if (!isset($metadata['publication'])) {
  172. $metadata['publication'] = $metadata['title'];
  173. } elseif (!isset($metadata['chapter'])) {
  174. $metadata['chapter'] = $metadata['title'];
  175. }
  176. unset($metadata['title']);
  177. }
  178. $openUrlSchema = new OpenUrlBookSchema();
  179. break;
  180. case OPENURL_GENRE_ARTICLE:
  181. case OPENURL_GENRE_JOURNAL:
  182. case OPENURL_GENRE_ISSUE:
  183. case OPENURL_GENRE_CONFERENCE:
  184. case OPENURL_GENRE_PROCEEDING:
  185. case OPENURL_GENRE_PREPRINT:
  186. default:
  187. $metadataMapping += array('publication' => 'jtitle');
  188. if (isset($metadata['title'])) {
  189. if (!isset($metadata['publication'])) {
  190. $metadata['publication'] = $metadata['title'];
  191. } elseif (!isset($metadata['atitle'])) {
  192. $metadata['atitle'] = $metadata['title'];
  193. }
  194. unset($metadata['title']);
  195. }
  196. $openUrlSchema = new OpenUrlJournalSchema();
  197. break;
  198. }
  199. // Instantiate an OpenURL description
  200. $openUrlDescription = new MetadataDescription($openUrlSchema, ASSOC_TYPE_CITATION);
  201. // Map the ParaCite result to OpenURL
  202. foreach ($metadata as $paraciteElementName => $paraciteValue) {
  203. if (!empty($paraciteValue)) {
  204. // Trim punctuation
  205. if (is_string($paraciteValue)) $paraciteValue = String::trimPunctuation($paraciteValue);
  206. // Transfer the value to the OpenURL result array
  207. assert(array_key_exists($paraciteElementName, $metadataMapping));
  208. $openUrlPropertyName = $metadataMapping[$paraciteElementName];
  209. if (!is_null($openUrlPropertyName) && $openUrlSchema->hasProperty($openUrlPropertyName)) {
  210. if (is_array($paraciteValue)) {
  211. foreach($paraciteValue as $singleValue) {
  212. $success = $openUrlDescription->addStatement($openUrlPropertyName, $singleValue);
  213. assert($success);
  214. }
  215. } else {
  216. $success = $openUrlDescription->addStatement($openUrlPropertyName, $paraciteValue);
  217. assert($success);
  218. }
  219. }
  220. }
  221. }
  222. // Crosswalk to NLM
  223. $crosswalkFilter = new OpenUrlNlmCitationSchemaCrosswalkFilter();
  224. $nlmDescription =& $crosswalkFilter->execute($openUrlDescription);
  225. assert(is_a($nlmDescription, 'MetadataDescription'));
  226. // Add 'rest_text' as NLM comment (if given)
  227. if (isset($metadata['rest_text'])) {
  228. $nlmDescription->addStatement('comment', String::trimPunctuation($metadata['rest_text']));
  229. }
  230. return $nlmDescription;
  231. }
  232. //
  233. // Private helper methods
  234. //
  235. /**
  236. * Return supported paracite citation parser modules
  237. * NB: PHP4 work-around for a public static class member
  238. * @return array supported citation modules
  239. */
  240. function getSupportedCitationModules() {
  241. static $_supportedCitationModules = array(
  242. CITATION_PARSER_PARACITE_STANDARD,
  243. CITATION_PARSER_PARACITE_CITEBASE,
  244. CITATION_PARSER_PARACITE_JIAO
  245. );
  246. return $_supportedCitationModules;
  247. }
  248. }
  249. ?>