PageRenderTime 144ms CodeModel.GetById 60ms app.highlight 41ms RepoModel.GetById 39ms app.codeStats 0ms

/lib/pkp/classes/citation/lookup/worldcat/WorldcatNlmCitationSchemaFilter.inc.php

https://github.com/lib-uoguelph-ca/ocs
PHP | 216 lines | 99 code | 31 blank | 86 comment | 15 complexity | 2c6c7e141494ea7f83b0b11182aa355e MD5 | raw file
  1<?php
  2
  3/**
  4 * @defgroup citation_lookup_worldcat
  5 */
  6
  7/**
  8 * @file classes/citation/lookup/worldcat/WorldcatNlmCitationSchemaFilter.inc.php
  9 *
 10 * Copyright (c) 2000-2012 John Willinsky
 11 * Distributed under the GNU GPL v2. For full terms see the file docs/COPYING.
 12 *
 13 * @class WorldcatNlmCitationSchemaFilter
 14 * @ingroup citation_lookup_worldcat
 15 * @see CitationMangager
 16 *
 17 * @brief Citation lookup filter that uses the OCLC Worldcat Search API
 18 *  and xISBN services to search for book citation metadata.
 19 */
 20
 21// $Id$
 22
 23import('citation.NlmCitationSchemaFilter');
 24
 25// TODO: Might wish to change this if the publication type is NLM_PUBLICATION_TYPE_BOOK, etc. for advanced search
 26define('WORLDCAT_WEBSERVICE_SEARCH', 'http://www.worldcat.org/search');
 27define('WORLDCAT_WEBSERVICE_OCLC', 'http://xisbn.worldcat.org/webservices/xid/oclcnum/');
 28// Lookup in MARCXML which has better granularity than Dublin Core
 29define('WORLDCAT_WEBSERVICE_EXTRACT', 'http://www.worldcat.org/webservices/catalog/content/');
 30define('WORLDCAT_WEBSERVICE_XISBN', 'http://xisbn.worldcat.org/webservices/xid/isbn/');
 31// TODO: Should we use OCLC basic API as fallback (see <http://www.worldcat.org/devnet/wiki/BasicAPIDetails>)?
 32
 33class WorldcatNlmCitationSchemaFilter extends NlmCitationSchemaFilter {
 34	/** @var string Worldcat API key */
 35	var $_apiKey;
 36
 37	/**
 38	 * Constructor
 39	 * @param $apiKey string
 40	 */
 41	function WorldcatNlmCitationSchemaFilter($apiKey = '') {
 42		$this->_apiKey = $apiKey;
 43
 44		parent::NlmCitationSchemaFilter(array(NLM_PUBLICATION_TYPE_BOOK));
 45	}
 46
 47	//
 48	// Getters and Setters
 49	//
 50	/**
 51	 * Get the apiKey
 52	 * @return string
 53	 */
 54	function getApiKey() {
 55		return $this->_apiKey;
 56	}
 57
 58	//
 59	// Implement template methods from Filter
 60	//
 61	/**
 62	 * @see Filter::process()
 63	 * @param $citationDescription MetadataDescription
 64	 * @return string a DOI or null
 65	 */
 66	function &process(&$citationDescription) {
 67		$nullVar = null;
 68
 69		// Get the search strings
 70		$searchTemplates =& $this->_getSearchTemplates();
 71		$searchStrings = $this->constructSearchStrings($searchTemplates, $citationDescription);
 72
 73		// Run the searches, in order, until we have a result
 74		$searchParams = array('qt' => 'worldcat_org_all');
 75		foreach ($searchStrings as $searchString) {
 76			$searchParams['q'] = $searchString;
 77			// Worldcat Web search; results are (mal-formed) XHTML
 78			if (is_null($result = $this->callWebService(WORLDCAT_WEBSERVICE_SEARCH, $searchParams, XSL_TRANSFORMER_DOCTYPE_STRING))) return $nullVar;
 79
 80			// parse the OCLC numbers from search results
 81			String::regexp_match_all('/id="itemid_(\d+)"/', $result, $matches);
 82			if (!empty($matches[1])) break;
 83		}
 84
 85		// If we don't have an OCLC number, then we cannot get any metadata
 86		if (empty($matches[1])) return $nullVar;
 87
 88		// use xISBN because it's free
 89		$isbns = $this->_oclcToIsbns($matches[1][0]);
 90
 91		$apiKey = $this->getApiKey();
 92		if (empty($apiKey)) {
 93			// Use the first ISBN if we have multiple
 94			$citationDescription =& $this->_lookupXIsbn($isbns[0], $citationDescription);
 95			return $citationDescription;
 96		} elseif (!empty($isbns[0])) {
 97			// Worldcat lookup only works with an API key
 98			if (is_null($citationDescription =& $this->_lookupWorldcat($matches[1][0], $citationDescription))) return $nullVar;
 99
100			// Prefer ISBN from xISBN if possible
101			if (!empty($isbns[0])) $citationDescription->addStatement('ibsn', $isbns[0], null, true);
102			return $citationDescription;
103		}
104
105		// Nothing found
106		return $nullVar;
107	}
108
109	//
110	// Private methods
111	//
112	/**
113	 * Take an OCLC number and return the associated ISBNs as an array
114	 * @param $oclcId string
115	 * @return array an array of ISBNs or an empty array if none found
116	 */
117	function _oclcToIsbns($oclcId) {
118		$nullVar = null;
119		$lookupParams = array(
120			'method' => 'getMetadata',
121			'format' => 'xml',
122			'fl' => '*'
123		);
124		if (is_null($resultDOM = $this->callWebService(WORLDCAT_WEBSERVICE_OCLC.urlencode($oclcId), $lookupParams))) return $nullVar;
125
126		// Extract ISBN from response
127		$oclcNode = $resultDOM->getElementsByTagName('oclcnum')->item(0);
128
129		if (isset($oclcNode)) {
130			return explode(' ', $oclcNode->getAttribute('isbn'));
131		} else {
132			return array();
133		}
134	}
135
136	/**
137	 * Fills the given citation description with
138	 * meta-data retrieved from Worldcat
139	 * @param $oclcId string
140	 * @param $citationDescription MetadataDescription
141	 * @return MetadataDescription
142	 */
143	function &_lookupWorldcat($oclcId, &$citationDescription) {
144		$nullVar = null;
145		$lookupParams = array('wskey' => $this->getApiKey());
146		if (is_null($resultDOM = $this->callWebService(WORLDCAT_WEBSERVICE_EXTRACT.urlencode($oclcId), $lookupParams))) return $nullVar;
147
148		if (is_null($metadata = $this->transformWebServiceResults($resultDOM, dirname(__FILE__).DIRECTORY_SEPARATOR.'worldcat.xsl'))) return $nullVar;
149		// FIXME: Use MARC parsed author field in XSL rather than full name
150
151		// Clean non-numerics from ISBN
152		if (!empty($metadata['isbn'])) $metadata['isbn'] = String::regexp_replace('/[^\dX]*/', '', $metadata['isbn']);
153
154		// Clean non-numerics from issued date (year)
155		if (!empty($metadata['date'])) {
156			$metadata['date'] = String::regexp_replace('/,.*/', ', ', $metadata['date']);
157			$metadata['date'] = String::regexp_replace('/[^\d{4}]/', '', $metadata['date']);
158		}
159
160		$citationDescription =& $this->addMetadataArrayToNlmCitationDescription($metadata, $citationDescription);
161		return $citationDescription;
162	}
163
164	/**
165	 * Fills the given citation object with
166	 * meta-data retrieved from xISBN
167	 * @param $isbn string
168	 * @param $citationDescription Citation
169	 * @return Citation
170	 */
171	function &_lookupXIsbn($isbn, &$citationDescription) {
172		$nullVar = null;
173		$lookupParams = array(
174			'method' => 'getMetadata',
175			'format' => 'xml',
176			'fl' => '*'
177		);
178		if (is_null($resultDOM = $this->callWebService(WORLDCAT_WEBSERVICE_XISBN.urlencode($isbn), $lookupParams))) return $nullVar;
179
180		// Extract metadata from response
181		if (is_null($recordNode = $resultDOM->getElementsByTagName('isbn')->item(0))) return $nullVar;
182
183		$metadata['isbn'] = $isbn;
184		$metadata['date'] = $recordNode->getAttribute('year');
185		$metadata['edition'] = $recordNode->getAttribute('ed');
186		$metadata['source'] = $recordNode->getAttribute('title');
187		$metadata['publisher-name'] = $recordNode->getAttribute('publisher');
188		$metadata['publisher-loc'] = $recordNode->getAttribute('city');
189		// Authors are of low quality in xISBN compared to Worldcat's MARC records
190		$metadata['author'] = $recordNode->getAttribute('author');
191
192		// Clean and process the meta-data
193		$metadata =& $this->postProcessMetadataArray($metadata);
194		$citationDescription =& $this->addMetadataArrayToNlmCitationDescription($metadata, $citationDescription);
195		return $citationDescription;
196	}
197
198	//
199	// Private methods
200	//
201	/**
202	 * Return an array of search templates.
203	 * @return array
204	 */
205	function &_getSearchTemplates() {
206		$searchTemplates = array(
207			'%isbn%',
208			'%aulast% %title% %date%',
209			'%title% %date%',
210			'%aulast% %date%',
211			'%aulast% %title%',
212		);
213		return $searchTemplates;
214	}
215}
216?>