PageRenderTime 34ms CodeModel.GetById 10ms RepoModel.GetById 0ms app.codeStats 0ms

/lib/pkp/classes/citation/NlmCitationSchemaFilter.inc.php

https://github.com/lib-uoguelph-ca/ocs
PHP | 399 lines | 195 code | 52 blank | 152 comment | 36 complexity | 7adf24cc62de0c62107366572053cf5b MD5 | raw file
Possible License(s): GPL-2.0
  1. <?php
  2. /**
  3. * @file classes/citation/NlmCitationSchemaFilter.inc.php
  4. *
  5. * Copyright (c) 2000-2012 John Willinsky
  6. * Distributed under the GNU GPL v2. For full terms see the file docs/COPYING.
  7. *
  8. * @class NlmCitationSchemaFilter
  9. * @ingroup classes_citation
  10. *
  11. * @brief Abstract base class for all filters that transform
  12. * NLM citation metadata descriptions.
  13. */
  14. // $Id$
  15. import('filter.Filter');
  16. import('metadata.MetadataDescription');
  17. import('metadata.nlm.NlmCitationSchema');
  18. import('metadata.nlm.NlmNameSchema');
  19. import('metadata.nlm.PersonStringNlmNameSchemaFilter');
  20. import('metadata.DateStringNormalizerFilter');
  21. import('webservice.XmlWebService');
  22. import('xml.XMLHelper');
  23. import('xslt.XSLTransformationFilter');
  24. class NlmCitationSchemaFilter extends Filter {
  25. /** @var array */
  26. var $_supportedPublicationTypes;
  27. /**
  28. * Constructor
  29. */
  30. function NlmCitationSchemaFilter($supportedPublicationTypes = array()) {
  31. assert(is_array($supportedPublicationTypes));
  32. foreach ($supportedPublicationTypes as $supportedPublicationType) {
  33. assert(in_array($supportedPublicationType, $this->_allowedPublicationTypes()));
  34. }
  35. $this->_supportedPublicationTypes = $supportedPublicationTypes;
  36. }
  37. //
  38. // Setters and Getters
  39. //
  40. /**
  41. * Get the supported publication types
  42. * @return array
  43. */
  44. function getSupportedPublicationTypes() {
  45. return $this->_supportedPublicationTypes;
  46. }
  47. //
  48. // Implement template methods from Filter
  49. //
  50. /**
  51. * @see Filter::supports()
  52. * @param $input mixed
  53. * @param $output mixed
  54. * @param $fromString boolean true if the filter accepts a string as input.
  55. * @param $toString boolean true if the filter produces a string as output.
  56. * @return boolean
  57. */
  58. function supports(&$input, &$output, $fromString = false, $toString = false) {
  59. // Make sure that the filter registry has correctly
  60. // checked the environment.
  61. assert(checkPhpVersion('5.0.0'));
  62. // Check the input
  63. if ($fromString) {
  64. if (!is_string($input)) return false;
  65. } else {
  66. if (!$this->isNlmCitationDescription($input)) return false;
  67. // Check that the given publication type is supported by this filter
  68. // If no publication type is given then we'll support the description
  69. // by default.
  70. $publicationType = $input->getStatement('[@publication-type]');
  71. if (!empty($publicationType) && !in_array($publicationType, $this->getSupportedPublicationTypes())) return false;
  72. }
  73. // Check the output
  74. if (is_null($output)) return true;
  75. if ($toString) {
  76. return is_string($output);
  77. } else {
  78. return $this->isNlmCitationDescription($output);
  79. }
  80. }
  81. //
  82. // Protected helper methods
  83. //
  84. /**
  85. * Checks whether a given input is a nlm citation description
  86. * @param $metadataDescription mixed
  87. * @return boolean
  88. */
  89. function isNlmCitationDescription(&$metadataDescription) {
  90. if (!is_a($metadataDescription, 'MetadataDescription')) return false;
  91. $metadataSchema =& $metadataDescription->getMetadataSchema();
  92. if ($metadataSchema->getName() != 'nlm-3.0-element-citation') return false;
  93. return true;
  94. }
  95. /**
  96. * Construct an array of search strings from a citation
  97. * description and an array of search templates.
  98. * The templates may contain the placeholders
  99. * %aulast%: the first author's surname
  100. * %au%: the first author full name
  101. * %title%: the article-title (if it exists),
  102. * otherwise the source
  103. * %date%: the publication year
  104. * %isbn%: ISBN
  105. * @param $searchTemplates an array of templates
  106. * @param $citationDescription MetadataDescription
  107. * @return array
  108. */
  109. function constructSearchStrings(&$searchTemplates, &$citationDescription) {
  110. // Retrieve the authors
  111. $firstAuthorSurname = $firstAuthor = '';
  112. $authors = $citationDescription->getStatement('person-group[@person-group-type="author"]');
  113. if (is_array($authors) && count($authors)) {
  114. $firstAuthorSurname = (string)$authors[0]->getStatement('surname');
  115. // Convert first authors' name description to a string
  116. import('metadata.nlm.NlmNameSchemaPersonStringFilter');
  117. $personStringFilter = new NlmNameSchemaPersonStringFilter();
  118. $firstAuthor = $personStringFilter->execute($authors[0]);
  119. }
  120. // Retrieve (default language) title
  121. $title = (string)($citationDescription->hasStatement('article-title') ?
  122. $citationDescription->getStatement('article-title') :
  123. $citationDescription->getStatement('source'));
  124. // Extract the year from the publication date
  125. $year = (string)$citationDescription->getStatement('date');
  126. $year = (String::strlen($year) > 4 ? String::substr($year, 0, 4) : $year);
  127. // Retrieve ISBN
  128. $isbn = (string)$citationDescription->getStatement('isbn');
  129. // Replace the placeholders in the templates
  130. $searchStrings = array();
  131. foreach($searchTemplates as $searchTemplate) {
  132. $searchStrings[] = str_replace(
  133. array('%aulast%', '%au%', '%title%', '%date%', '%isbn%'),
  134. array($firstAuthorSurname, $firstAuthor, $title, $year, $isbn),
  135. $searchTemplate
  136. );
  137. }
  138. // Remove empty or duplicate searches
  139. $searchStrings = array_map(array('String', 'trimPunctuation'), $searchStrings);
  140. $searchStrings = array_unique($searchStrings);
  141. $searchStrings = arrayClean($searchStrings);
  142. return $searchStrings;
  143. }
  144. /**
  145. * Call web service with the given parameters
  146. * @param $params array GET or POST parameters
  147. * @return DOMDocument or null in case of error
  148. */
  149. function &callWebService($url, &$params, $returnType = XSL_TRANSFORMER_DOCTYPE_DOM, $method = 'GET') {
  150. // Create a request
  151. $webServiceRequest = new WebServiceRequest($url, $params, $method);
  152. // Configure and call the web service
  153. $xmlWebService = new XmlWebService();
  154. $xmlWebService->setReturnType($returnType);
  155. $result =& $xmlWebService->call($webServiceRequest);
  156. return $result;
  157. }
  158. /**
  159. * Takes the raw xml result of a web service and
  160. * transforms it via XSL to a (preliminary) XML similar
  161. * to NLM which is then re-encoded into an array. Finally
  162. * some typical post-processing is performed.
  163. * FIXME: Rewrite parser/lookup filter XSL to produce real NLM
  164. * element-citation XML and factor this code into an NLM XML to
  165. * NLM description filter.
  166. * @param $xmlResult string or DOMDocument
  167. * @param $xslFileName string
  168. * @return array a metadata array
  169. */
  170. function &transformWebServiceResults(&$xmlResult, $xslFileName) {
  171. // Send the result through the XSL to generate a (preliminary) NLM XML.
  172. $xslFilter = new XSLTransformationFilter();
  173. $xslFilter->setXSLFilename($xslFileName);
  174. $xslFilter->setResultType(XSL_TRANSFORMER_DOCTYPE_DOM);
  175. $preliminaryNlmDOM =& $xslFilter->execute($xmlResult);
  176. if (is_null($preliminaryNlmDOM)) return $preliminaryNlmDOM;
  177. // Transform the result to an array.
  178. $xmlHelper = new XMLHelper();
  179. $preliminaryNlmArray = $xmlHelper->xmlToArray($preliminaryNlmDOM->documentElement);
  180. $preliminaryNlmArray =& $this->postProcessMetadataArray($preliminaryNlmArray);
  181. return $preliminaryNlmArray;
  182. }
  183. /**
  184. * Post processes an NLM meta-data array
  185. * @param $preliminaryNlmArray array
  186. * @return array
  187. */
  188. function &postProcessMetadataArray(&$preliminaryNlmArray) {
  189. // Clean array
  190. $preliminaryNlmArray =& arrayClean($preliminaryNlmArray);
  191. // Trim punctuation
  192. $preliminaryNlmArray =& $this->_recursivelyTrimPunctuation($preliminaryNlmArray);
  193. // Parse (=filter) author/editor strings into NLM name descriptions
  194. foreach(array('author', 'editor') as $personType) {
  195. if (isset($preliminaryNlmArray[$personType])) {
  196. // Get the author/editor strings from the result
  197. $personStrings = $preliminaryNlmArray[$personType];
  198. unset($preliminaryNlmArray[$personType]);
  199. // Parse the author/editor strings into NLM name descriptions
  200. $personStringFilter = new PersonStringNlmNameSchemaFilter(ASSOC_TYPE_AUTHOR);
  201. // Interpret a scalar as a textual authors list
  202. if (is_scalar($personStrings)) {
  203. $personStringFilter->setFilterMode(PERSON_STRING_FILTER_MULTIPLE);
  204. $persons =& $personStringFilter->execute($personStrings);
  205. } else {
  206. $persons =& array_map(array($personStringFilter, 'execute'), $personStrings);
  207. }
  208. $preliminaryNlmArray['person-group[@person-group-type="'.$personType.'"]'] = $persons;
  209. unset($persons);
  210. }
  211. }
  212. // Join comments
  213. if (isset($preliminaryNlmArray['comment']) && is_array($preliminaryNlmArray['comment'])) {
  214. // Implode comments from the result into a single string
  215. // as required by the NLM citation schema.
  216. $preliminaryNlmArray['comment'] = implode("\n", $preliminaryNlmArray['comment']);
  217. }
  218. // Normalize date strings
  219. foreach(array('date', 'conf-date', 'access-date') as $dateProperty) {
  220. if (isset($preliminaryNlmArray[$dateProperty])) {
  221. $dateFilter = new DateStringNormalizerFilter();
  222. $preliminaryNlmArray[$dateProperty] = $dateFilter->execute($preliminaryNlmArray[$dateProperty]);
  223. }
  224. }
  225. // Cast strings to integers where necessary
  226. foreach(array('fpage', 'lpage', 'size') as $integerProperty) {
  227. if (isset($preliminaryNlmArray[$integerProperty]) && is_numeric($preliminaryNlmArray[$integerProperty])) {
  228. $preliminaryNlmArray[$integerProperty] = (integer)$preliminaryNlmArray[$integerProperty];
  229. }
  230. }
  231. // Rename elements that are stored in attributes in NLM citation
  232. $elementToAttributeMap = array(
  233. 'access-date' => 'date-in-citation[@content-type="access-date"]',
  234. 'issn-ppub' => 'issn[@pub-type="ppub"]',
  235. 'issn-epub' => 'issn[@pub-type="epub"]',
  236. 'pub-id-doi' => 'pub-id[@pub-id-type="doi"]',
  237. 'pub-id-publisher-id' => 'pub-id[@pub-id-type="publisher-id"]',
  238. 'pub-id-coden' => 'pub-id[@pub-id-type="coden"]',
  239. 'pub-id-sici' => 'pub-id[@pub-id-type="sici"]',
  240. 'pub-id-pmid' => 'pub-id[@pub-id-type="pmid"]',
  241. 'publication-type' => '[@publication-type]'
  242. );
  243. foreach($elementToAttributeMap as $elementName => $nlmPropertyName) {
  244. if (isset($preliminaryNlmArray[$elementName])) {
  245. $preliminaryNlmArray[$nlmPropertyName] = $preliminaryNlmArray[$elementName];
  246. unset($preliminaryNlmArray[$elementName]);
  247. }
  248. }
  249. return $preliminaryNlmArray;
  250. }
  251. /**
  252. * Adds the data of an array of property/value pairs
  253. * as statements to an NLM citation description.
  254. * If no citation description is given, a new one will
  255. * be instantiated.
  256. * @param $metadataArray array
  257. * @param $citationDescription MetadataDescription
  258. * @return MetadataDescription
  259. */
  260. function &addMetadataArrayToNlmCitationDescription(&$metadataArray, $citationDescription = null) {
  261. // Create a new citation description if no one was given
  262. if (is_null($citationDescription)) {
  263. $metadataSchema = new NlmCitationSchema();
  264. $citationDescription = new MetadataDescription($metadataSchema, ASSOC_TYPE_CITATION);
  265. }
  266. // Add the meta-data to the description
  267. $metadataArray = arrayClean($metadataArray);
  268. if (!$citationDescription->setStatements($metadataArray)) {
  269. $nullVar = null;
  270. return $nullVar;
  271. }
  272. return $citationDescription;
  273. }
  274. /**
  275. * Take an NLM preliminary meta-data array and fix publisher-loc
  276. * and publisher-name entries:
  277. * - If there is a location but no name then try to extract a
  278. * publisher name from the location string.
  279. * - Make sure that location and name are not the same.
  280. * - Copy institution to publisher if no publisher is set,
  281. * otherwise leave the institution.
  282. * @param $metadata array
  283. * @return array
  284. */
  285. function &fixPublisherNameAndLocation(&$metadata) {
  286. if (isset($metadata['publisher-loc'])) {
  287. // Extract publisher-name from publisher-loc if we don't have a
  288. // publisher-name in the parsing result.
  289. if (empty($metadata['publisher-name'])) {
  290. $metadata['publisher-name'] = String::regexp_replace('/.*:([^,]+),?.*/', '\1', $metadata['publisher-loc']);
  291. }
  292. // Remove publisher-name from publisher-loc
  293. $metadata['publisher-loc'] = String::regexp_replace('/^(.+):.*/', '\1', $metadata['publisher-loc']);
  294. // Check that publisher-name and location are not the same
  295. if (!empty($metadata['publisher-name']) && $metadata['publisher-name'] == $metadata['publisher-loc']) unset($metadata['publisher-name']);
  296. }
  297. // Copy the institution property (if any) as the publisher-name
  298. if (isset($metadata['institution']) &&
  299. (!isset($metadata['publisher-name']) || empty($metadata['publisher-name']))) {
  300. $metadata['publisher-name'] = $metadata['institution'];
  301. }
  302. // Clean the result
  303. foreach(array('publisher-name', 'publisher-loc') as $publisherProperty) {
  304. if (isset($metadata[$publisherProperty])) {
  305. $metadata[$publisherProperty] = String::trimPunctuation($metadata[$publisherProperty]);
  306. }
  307. }
  308. return $metadata;
  309. }
  310. //
  311. // Private helper methods
  312. //
  313. /**
  314. * Recursively trim punctuation from a metadata array.
  315. */
  316. function &_recursivelyTrimPunctuation(&$metadataArray) {
  317. assert(is_array($metadataArray));
  318. foreach($metadataArray as $metadataKey => $metadataValue) {
  319. // If we find an array then we'll recurse
  320. if (is_array($metadataValue)) {
  321. $metadataArray[$metadataKey] = $this->_recursivelyTrimPunctuation($metadataValue);
  322. }
  323. // String scalars will be trimmed
  324. if (is_string($metadataValue)) {
  325. $metadataArray[$metadataKey] = String::trimPunctuation($metadataValue);
  326. }
  327. // All other value types (i.e. integers, composite values, etc.)
  328. // will be ignored.
  329. }
  330. return $metadataArray;
  331. }
  332. /**
  333. * Static method that returns a list of permitted
  334. * publication types.
  335. * NB: PHP4 workaround for static class member.
  336. */
  337. function _allowedPublicationTypes() {
  338. static $allowedPublicationTypes = array(
  339. NLM_PUBLICATION_TYPE_JOURNAL,
  340. NLM_PUBLICATION_TYPE_CONFPROC,
  341. NLM_PUBLICATION_TYPE_BOOK,
  342. NLM_PUBLICATION_TYPE_THESIS
  343. );
  344. return $allowedPublicationTypes;
  345. }
  346. }
  347. ?>