PageRenderTime 48ms CodeModel.GetById 18ms RepoModel.GetById 0ms app.codeStats 0ms

/lib/pkp/classes/citation/lookup/worldcat/WorldcatNlmCitationSchemaFilter.inc.php

https://github.com/lib-uoguelph-ca/ocs
PHP | 216 lines | 99 code | 31 blank | 86 comment | 15 complexity | 2c6c7e141494ea7f83b0b11182aa355e MD5 | raw file
Possible License(s): GPL-2.0
  1. <?php
  2. /**
  3. * @defgroup citation_lookup_worldcat
  4. */
  5. /**
  6. * @file classes/citation/lookup/worldcat/WorldcatNlmCitationSchemaFilter.inc.php
  7. *
  8. * Copyright (c) 2000-2012 John Willinsky
  9. * Distributed under the GNU GPL v2. For full terms see the file docs/COPYING.
  10. *
  11. * @class WorldcatNlmCitationSchemaFilter
  12. * @ingroup citation_lookup_worldcat
  13. * @see CitationMangager
  14. *
  15. * @brief Citation lookup filter that uses the OCLC Worldcat Search API
  16. * and xISBN services to search for book citation metadata.
  17. */
  18. // $Id$
  19. import('citation.NlmCitationSchemaFilter');
  20. // TODO: Might wish to change this if the publication type is NLM_PUBLICATION_TYPE_BOOK, etc. for advanced search
  21. define('WORLDCAT_WEBSERVICE_SEARCH', 'http://www.worldcat.org/search');
  22. define('WORLDCAT_WEBSERVICE_OCLC', 'http://xisbn.worldcat.org/webservices/xid/oclcnum/');
  23. // Lookup in MARCXML which has better granularity than Dublin Core
  24. define('WORLDCAT_WEBSERVICE_EXTRACT', 'http://www.worldcat.org/webservices/catalog/content/');
  25. define('WORLDCAT_WEBSERVICE_XISBN', 'http://xisbn.worldcat.org/webservices/xid/isbn/');
  26. // TODO: Should we use OCLC basic API as fallback (see <http://www.worldcat.org/devnet/wiki/BasicAPIDetails>)?
  27. class WorldcatNlmCitationSchemaFilter extends NlmCitationSchemaFilter {
  28. /** @var string Worldcat API key */
  29. var $_apiKey;
  30. /**
  31. * Constructor
  32. * @param $apiKey string
  33. */
  34. function WorldcatNlmCitationSchemaFilter($apiKey = '') {
  35. $this->_apiKey = $apiKey;
  36. parent::NlmCitationSchemaFilter(array(NLM_PUBLICATION_TYPE_BOOK));
  37. }
  38. //
  39. // Getters and Setters
  40. //
  41. /**
  42. * Get the apiKey
  43. * @return string
  44. */
  45. function getApiKey() {
  46. return $this->_apiKey;
  47. }
  48. //
  49. // Implement template methods from Filter
  50. //
  51. /**
  52. * @see Filter::process()
  53. * @param $citationDescription MetadataDescription
  54. * @return string a DOI or null
  55. */
  56. function &process(&$citationDescription) {
  57. $nullVar = null;
  58. // Get the search strings
  59. $searchTemplates =& $this->_getSearchTemplates();
  60. $searchStrings = $this->constructSearchStrings($searchTemplates, $citationDescription);
  61. // Run the searches, in order, until we have a result
  62. $searchParams = array('qt' => 'worldcat_org_all');
  63. foreach ($searchStrings as $searchString) {
  64. $searchParams['q'] = $searchString;
  65. // Worldcat Web search; results are (mal-formed) XHTML
  66. if (is_null($result = $this->callWebService(WORLDCAT_WEBSERVICE_SEARCH, $searchParams, XSL_TRANSFORMER_DOCTYPE_STRING))) return $nullVar;
  67. // parse the OCLC numbers from search results
  68. String::regexp_match_all('/id="itemid_(\d+)"/', $result, $matches);
  69. if (!empty($matches[1])) break;
  70. }
  71. // If we don't have an OCLC number, then we cannot get any metadata
  72. if (empty($matches[1])) return $nullVar;
  73. // use xISBN because it's free
  74. $isbns = $this->_oclcToIsbns($matches[1][0]);
  75. $apiKey = $this->getApiKey();
  76. if (empty($apiKey)) {
  77. // Use the first ISBN if we have multiple
  78. $citationDescription =& $this->_lookupXIsbn($isbns[0], $citationDescription);
  79. return $citationDescription;
  80. } elseif (!empty($isbns[0])) {
  81. // Worldcat lookup only works with an API key
  82. if (is_null($citationDescription =& $this->_lookupWorldcat($matches[1][0], $citationDescription))) return $nullVar;
  83. // Prefer ISBN from xISBN if possible
  84. if (!empty($isbns[0])) $citationDescription->addStatement('ibsn', $isbns[0], null, true);
  85. return $citationDescription;
  86. }
  87. // Nothing found
  88. return $nullVar;
  89. }
  90. //
  91. // Private methods
  92. //
  93. /**
  94. * Take an OCLC number and return the associated ISBNs as an array
  95. * @param $oclcId string
  96. * @return array an array of ISBNs or an empty array if none found
  97. */
  98. function _oclcToIsbns($oclcId) {
  99. $nullVar = null;
  100. $lookupParams = array(
  101. 'method' => 'getMetadata',
  102. 'format' => 'xml',
  103. 'fl' => '*'
  104. );
  105. if (is_null($resultDOM = $this->callWebService(WORLDCAT_WEBSERVICE_OCLC.urlencode($oclcId), $lookupParams))) return $nullVar;
  106. // Extract ISBN from response
  107. $oclcNode = $resultDOM->getElementsByTagName('oclcnum')->item(0);
  108. if (isset($oclcNode)) {
  109. return explode(' ', $oclcNode->getAttribute('isbn'));
  110. } else {
  111. return array();
  112. }
  113. }
  114. /**
  115. * Fills the given citation description with
  116. * meta-data retrieved from Worldcat
  117. * @param $oclcId string
  118. * @param $citationDescription MetadataDescription
  119. * @return MetadataDescription
  120. */
  121. function &_lookupWorldcat($oclcId, &$citationDescription) {
  122. $nullVar = null;
  123. $lookupParams = array('wskey' => $this->getApiKey());
  124. if (is_null($resultDOM = $this->callWebService(WORLDCAT_WEBSERVICE_EXTRACT.urlencode($oclcId), $lookupParams))) return $nullVar;
  125. if (is_null($metadata = $this->transformWebServiceResults($resultDOM, dirname(__FILE__).DIRECTORY_SEPARATOR.'worldcat.xsl'))) return $nullVar;
  126. // FIXME: Use MARC parsed author field in XSL rather than full name
  127. // Clean non-numerics from ISBN
  128. if (!empty($metadata['isbn'])) $metadata['isbn'] = String::regexp_replace('/[^\dX]*/', '', $metadata['isbn']);
  129. // Clean non-numerics from issued date (year)
  130. if (!empty($metadata['date'])) {
  131. $metadata['date'] = String::regexp_replace('/,.*/', ', ', $metadata['date']);
  132. $metadata['date'] = String::regexp_replace('/[^\d{4}]/', '', $metadata['date']);
  133. }
  134. $citationDescription =& $this->addMetadataArrayToNlmCitationDescription($metadata, $citationDescription);
  135. return $citationDescription;
  136. }
  137. /**
  138. * Fills the given citation object with
  139. * meta-data retrieved from xISBN
  140. * @param $isbn string
  141. * @param $citationDescription Citation
  142. * @return Citation
  143. */
  144. function &_lookupXIsbn($isbn, &$citationDescription) {
  145. $nullVar = null;
  146. $lookupParams = array(
  147. 'method' => 'getMetadata',
  148. 'format' => 'xml',
  149. 'fl' => '*'
  150. );
  151. if (is_null($resultDOM = $this->callWebService(WORLDCAT_WEBSERVICE_XISBN.urlencode($isbn), $lookupParams))) return $nullVar;
  152. // Extract metadata from response
  153. if (is_null($recordNode = $resultDOM->getElementsByTagName('isbn')->item(0))) return $nullVar;
  154. $metadata['isbn'] = $isbn;
  155. $metadata['date'] = $recordNode->getAttribute('year');
  156. $metadata['edition'] = $recordNode->getAttribute('ed');
  157. $metadata['source'] = $recordNode->getAttribute('title');
  158. $metadata['publisher-name'] = $recordNode->getAttribute('publisher');
  159. $metadata['publisher-loc'] = $recordNode->getAttribute('city');
  160. // Authors are of low quality in xISBN compared to Worldcat's MARC records
  161. $metadata['author'] = $recordNode->getAttribute('author');
  162. // Clean and process the meta-data
  163. $metadata =& $this->postProcessMetadataArray($metadata);
  164. $citationDescription =& $this->addMetadataArrayToNlmCitationDescription($metadata, $citationDescription);
  165. return $citationDescription;
  166. }
  167. //
  168. // Private methods
  169. //
  170. /**
  171. * Return an array of search templates.
  172. * @return array
  173. */
  174. function &_getSearchTemplates() {
  175. $searchTemplates = array(
  176. '%isbn%',
  177. '%aulast% %title% %date%',
  178. '%title% %date%',
  179. '%aulast% %date%',
  180. '%aulast% %title%',
  181. );
  182. return $searchTemplates;
  183. }
  184. }
  185. ?>