PageRenderTime 53ms CodeModel.GetById 21ms RepoModel.GetById 0ms app.codeStats 0ms

/lib/Xerxes/metalib/MetalibRecord.php

http://xerxes-portal.googlecode.com/
PHP | 919 lines | 527 code | 217 blank | 175 comment | 122 complexity | aa7efc3822af43e6280fb7db328df33b MD5 | raw file
  1. <?php
  2. /**
  3. * Extract multiple Marc records from Metalib X-Server response
  4. *
  5. * @author David Walker
  6. * @copyright 2009 California State University
  7. * @link http://xerxes.calstate.edu
  8. * @license http://www.gnu.org/licenses/
  9. * @version $Id: MetalibRecord.php 2054 2012-05-01 21:34:24Z dwalker.calstate@gmail.com $
  10. * @todo ->__toString() madness below due to php 5.1 object-string casting problem
  11. * @package Xerxes
  12. */
  13. class Xerxes_MetalibRecord_Document extends Xerxes_Marc_Document
  14. {
  15. protected $record_type = "Xerxes_MetalibRecord";
  16. }
  17. /**
  18. * Extract properties for books, articles, and dissertations from MARC-XML record
  19. * with special handling for Metalib X-Server response
  20. *
  21. * @author David Walker
  22. * @copyright 2009 California State University
  23. * @link http://xerxes.calstate.edu
  24. * @license http://www.gnu.org/licenses/
  25. * @version $Id: MetalibRecord.php 2054 2012-05-01 21:34:24Z dwalker.calstate@gmail.com $
  26. * @todo ->__toString() madness below due to php 5.1 object-string casting problem, remove
  27. * when redhat provides php 5.2 package, since that is keeping people from upgrading
  28. * * @package Xerxes
  29. */
  30. class Xerxes_MetalibRecord extends Xerxes_Record
  31. {
  32. protected $metalib_id;
  33. protected $result_set;
  34. protected $record_number;
  35. public function map()
  36. {
  37. $leader = $this->leader();
  38. ## source database
  39. $sid = $this->datafield ( "SID" );
  40. $this->metalib_id = $sid->subfield( "d" )->__toString();
  41. $this->record_number = $sid->subfield( "j" )->__toString();
  42. $this->result_set = $sid->subfield( "s" )->__toString();
  43. $this->database_name = $sid->subfield( "t" )->__toString();
  44. $this->source = $sid->subfield( "b" )->__toString();
  45. ## metalib weirdness
  46. // puts leader in control field
  47. $strLeaderMetalib = $this->controlfield( "LDR" )->__toString();
  48. if ( $strLeaderMetalib != "" )
  49. {
  50. $leader->value = $strLeaderMetalib;
  51. }
  52. $demunge = array("1XX", "6XX");
  53. // character entity references de-munging code -- thanks a lot metalib!
  54. foreach ( $demunge as $field )
  55. {
  56. $got_one = true;
  57. do // this until all references are re-combined
  58. {
  59. $authors = $this->datafield($field);
  60. $got_one = false; // whether we found any in the list
  61. for ( $x = 0; $x < $authors->length(); $x++ )
  62. {
  63. $this_datafield = $authors->item($x);
  64. $this_value = $this_datafield->subfield()->__toString();
  65. // we found an un-terminated char entity ref
  66. $matches = array();
  67. if ( preg_match('/\&\#\d{3}$/', $this_value, $matches) )
  68. {
  69. $got_one = true;
  70. $new_value = "";
  71. // grab the value out of the next field
  72. $x++;
  73. // hopefully we aren't at the end?
  74. if ( $x < $authors->length() )
  75. {
  76. // nope, so grab the next field's value
  77. $next_datafield = $authors->item($x);
  78. $next_value = $next_datafield->subfield()->__toString();
  79. // add back in the terminating semi-colon
  80. $new_value = "$this_value;$next_value";
  81. // blank it so we don't re-process it
  82. $next_datafield->tag = "XXX";
  83. }
  84. else
  85. {
  86. // yup, just add a terminating char to the value
  87. $new_value = $this_value . ";";
  88. }
  89. // now create a new datafield composed of both old and new values
  90. $fixed_datafield = new Xerxes_Marc_DataField();
  91. $fixed_datafield->tag = $this_datafield->tag;
  92. // we'll just assume this is |a
  93. $fixed_subfield = new Xerxes_Marc_Subfield();
  94. $fixed_subfield->code = "a";
  95. $fixed_subfield->value = $new_value;
  96. $fixed_datafield->addSubField($fixed_subfield);
  97. // add it to the main record
  98. $this->addDataField($fixed_datafield);
  99. // now blank the old ones
  100. $this_datafield->tag = "XXX";
  101. }
  102. else
  103. {
  104. // we need to shift this to the end to keep field order in tact
  105. // (critical for authors) so the above code works right
  106. $new_field = clone($this_datafield);
  107. $this->addDataField($new_field);
  108. $this_datafield->tag = "XXX";
  109. }
  110. }
  111. // if we found one, cycle back again to see if our now-combined
  112. // field(s) *also* have un-terminated references since there may have
  113. // been more than one broken char reference for a single author, e.g.
  114. }
  115. while ( $got_one == true);
  116. }
  117. // z3950/sutrs and some screen-scrapers have multiple authors in repeating 100 fields;
  118. // invalid marc, so switch all but first to 700
  119. $authors = $this->datafield("100");
  120. if ( $authors->length() > 1 )
  121. {
  122. for ( $x = 1; $x < $authors->length(); $x++ )
  123. {
  124. $author = $authors->item($x);
  125. $author->tag = "700";
  126. }
  127. }
  128. // there are often multiple 773's, just combine them into one so we don't
  129. // have to iterate over all of them in other code
  130. $obj773 = new Xerxes_Marc_DataField();
  131. $obj773->tag = "773";
  132. foreach ( $this->datafield("773") as $linked_entry )
  133. {
  134. // add all of its subfields to the new one
  135. foreach ($linked_entry->subfield() as $linked_entry_subfield )
  136. {
  137. $obj773->addSubField($linked_entry_subfield);
  138. }
  139. // now blank this one to take it out of the mix
  140. $linked_entry->tag = "XXX";
  141. }
  142. // add our new one to the document
  143. $this->addDataField($obj773);
  144. ## ebsco format
  145. if (strstr ( $this->source, "EBSCO" ))
  146. {
  147. // leader appears to be hard-wired; useless
  148. $leader->value = "";
  149. // format
  150. array_push($this->format_array, $this->datafield( "656" )->subfield( "a" )->__toString());
  151. array_push($this->format_array, $this->datafield( "514" )->subfield( "a" )->__toString());
  152. $strEbscoType = $this->datafield( "072" )->subfield( "a" )->__toString();
  153. if (strstr ( $this->source, "EBSCO_PSY" ) || strstr ( $this->source, "EBSCO_PDH" ))
  154. {
  155. $strEbscoType = "";
  156. }
  157. array_push($this->format_array, $strEbscoType);
  158. // ebsco book chapter
  159. $strEbscoBookTitle = $this->datafield( "771" )->subfield( "a" )->__toString();
  160. if ($strEbscoBookTitle != "")
  161. {
  162. array_push ( $this->format_array, "Book Chapter" );
  163. }
  164. }
  165. // gale puts issn in 773b
  166. if (strstr ( $this->source, 'GALE' ))
  167. {
  168. $strGaleIssn = $this->datafield("773")->subfield("b")->__toString();
  169. if ($strGaleIssn != null)
  170. {
  171. array_push ( $this->issns, $strGaleIssn );
  172. }
  173. }
  174. // eric doc number
  175. $this->eric_number = $this->datafield( "ERI" )->subfield( "a" )->__toString();
  176. ## full-text
  177. // some databases have full-text but no 856
  178. // will capture these here and add to links array
  179. // pychcritiques -- no indicator of full-text either, assume all to be (9/5/07)
  180. // no unique metalib config either, using psycinfo, so make determination based on name. yikes!
  181. if (stristr ( $this->database_name, "psycCRITIQUES" ))
  182. {
  183. array_push ( $this->links, array ("Full-Text in HTML", array ("001" => $this->controlfield("001")->__toString() ), "html" ) );
  184. }
  185. // factiva -- no indicator of full-text either, assume all to be (9/5/07)
  186. if (stristr ( $this->source, "FACTIVA" ))
  187. {
  188. array_push ( $this->links, array("Full-Text Available", array("035_a" => $this->datafield("035")->subfield("a")->__toString() ), "online" ) );
  189. }
  190. // eric -- document is recent enough to likely contain full-text;
  191. // 340000 being a rough approximation of the document number after which they
  192. // started digitizing
  193. // EBSCO provides an indication of Full text available in 037 (jdwyn 01/12/2012)
  194. if (strstr ( $this->source, "ERIC" ) && strlen ( $this->eric_number ) > 3)
  195. {
  196. $strEricType = substr ( $this->eric_number, 0, 2 );
  197. $strEricNumber = ( int ) substr ( $this->eric_number, 2 );
  198. $strEricAvailabilityNote = $this->datafield("037")->subfield("a")->__toString();
  199. if ($strEricType == "ED")
  200. {
  201. if ($strEricNumber >= 340000 || (stristr ( $this->source, "EBSCO_ERIC" ) && $strEricAvailabilityNote != "Not available from ERIC" && $strEricAvailabilityNote != "Available on microfiche only"))
  202. {
  203. $strFullTextPdf = "http://www.eric.ed.gov/ERICWebPortal/contentdelivery/servlet/ERICServlet?accno=" . $this->eric_number;
  204. array_push ( $this->links, array ("Full-text at ERIC.gov", $strFullTextPdf, "pdf" ) );
  205. }
  206. }
  207. }
  208. // 7 Apr 09, jrochkind. Gale Biography Resource Center
  209. // No 856 is included at all, but a full text link can be
  210. // constructed from the 001 record id.
  211. if ( stristr($this->source,"GALE_ZBRC") )
  212. {
  213. $url = "http://ic.galegroup.com/ic/bic1/ReferenceDetailsPage/ReferenceDetailsWindow?displayGroupName=K12-Reference&action=e&windowstate=normal&mode=view&documentId=GALE|" . $this->controlfield("001")->__toString();
  214. array_push ( $this->links, array ("Full-Text in HTML", $url, "html" ) );
  215. }
  216. // special handling of 856
  217. $notes = $this->fieldArray("500", "a"); // needed for gale
  218. foreach ( $this->datafield( "856" ) as $link )
  219. {
  220. $strDisplay = $link->subfield("z")->__toString();
  221. $strUrl = $link->subfield( "u" )->__toString();
  222. // bad links
  223. // records that have 856s, but are not always for full-text; in that case, specify them
  224. // here as original records, and remove 856 so parent code doesn't process them as full-text links
  225. //
  226. // springer (metapress): does not distinguish between things in your subscription or not (9/16/08)
  227. // cinahl (bzh): not only is 856 bad, but link missing http:// bah! thanks greg at upacific! (9/10/08)
  228. // wilson: 901|t shows an indication of full-text (9/16/10)
  229. // cabi: just point back to site (10/30/07)
  230. // google scholar: just point back to site (3/26/07)
  231. // amazon: just point back to site (3/20/08)
  232. // abc-clio: just point back to site (7/30/07)
  233. // engineering village (evii): has unreliable full-text links in a consortium environment (4/1/08)
  234. // wiley interscience: wiley does not limit full-text links only to your subscription (4/29/08)
  235. // oxford: only include the links that are free, otherwise just a link to abstract (5/7/08)
  236. // gale: only has full-text if 'text available' note in 500 field (9/7/07) BUT: Not true of Gale virtual reference library (GALE_GVRL). 10/14/08 jrochkind.
  237. // ieee xplore: does not distinguish between things in your subscription or not (2/13/09)
  238. // elsevier links are not based on subscription (6/2/10)
  239. // harvard business: these links in business source premiere are not part of your subscription (5/26/10)
  240. // proquest (umi): these links are not full-text (thanks jerry @ uni) (5/26/10)
  241. // proquest (gateway): there doesn't appear to be a general rule to this, so only doing it for fiaf (5/26/10)
  242. // primo central: no actual links here
  243. if ( stristr ( $this->source, "METAPRESS_XML" ) ||
  244. stristr ( $this->source, "EBSCO_RZH" ) ||
  245. ( stristr ( $this->source, "WILSON_" ) && $this->datafield("901")->subfield("t")->__toString() == "" ) ||
  246. stristr ( $this->source, "CABI" ) ||
  247. stristr ( $this->source, "SCOPUS4" ) ||
  248. stristr ( $this->source, "GOOGLE_SCH" ) ||
  249. stristr ( $this->source, "AMAZON" ) ||
  250. stristr ( $this->source, "ABCCLIO" ) ||
  251. stristr ( $this->source, "EVII" ) ||
  252. stristr ( $this->source, "WILEY_IS" ) ||
  253. (stristr ( $this->source, "OXFORD_JOU" ) && ! strstr ( $strUrl, "content/full/" )) ||
  254. (strstr ( $this->source, "GALE" ) && ! strstr( $this->source, "GALE_GVRL") && ! in_array ( "Text available", $notes )) ||
  255. stristr ( $this->source, "IEEE_XPLORE" ) ||
  256. stristr ($this->source, "ELSEVIER_SCOPUS") ||
  257. stristr ($this->source, "ELSEVIER_SCIRUS") ||
  258. ( stristr ($this->source,"EBSCO") && $strUrl != "" && ! strstr ($strUrl, "epnet") ) ||
  259. ( strstr($strUrl, "proquest.umi.com") && strstr($strUrl, "Fmt=2") ) ||
  260. ( strstr($strUrl, "gateway.proquest.com") && strstr($strUrl, "xri:fiaf:article") ) ||
  261. stristr ( $this->source, "NEWPC" )
  262. )
  263. {
  264. // take it out so the parent class doesn't treat it as full-text
  265. $link->tag = "XXX";
  266. array_push ( $this->links, array ($strDisplay, $strUrl, "original_record" ) );
  267. }
  268. // ebsco
  269. elseif ( stristr ( $this->source, "EBSCO" ) )
  270. {
  271. $strEbscoFullText = $link->subfield( "i" )->__toString();
  272. $ebsco_fulltext_type = "";
  273. // html
  274. // there is (a) an indicator from ebsco that the record has full-text, or
  275. // (b) an abberant 856 link that doesn't work, but the construct link will work,
  276. // so we take that as something of a full-text indicator
  277. if ( strstr($strEbscoFullText, "T") || strstr($strDisplay, "View Full Text" ) )
  278. {
  279. $ebsco_fulltext_type = "html";
  280. }
  281. elseif ( strstr($link->subfield( "az" )->__toString(), "PDF") )
  282. {
  283. $ebsco_fulltext_type = "pdf";
  284. }
  285. if ( $ebsco_fulltext_type != "" )
  286. {
  287. $str001 = $this->controlfield("001")->__toString();
  288. $str016 = $this->datafield("016")->subfield("a")->__toString();
  289. // see if the id number is 'dirty'
  290. $bolAlpha001 = preg_match('/^\W/', $str001);
  291. // if so, and there is a 016, use that instead, if not go ahead and use
  292. // the 001; if neither do nothing
  293. if ( $bolAlpha001 == true && $str016 != "" )
  294. {
  295. array_push ( $this->links, array ($strDisplay, array ("016" => $str016 ), $ebsco_fulltext_type ) );
  296. }
  297. elseif ( $bolAlpha001 == false )
  298. {
  299. array_push ( $this->links, array ($strDisplay, array ("001" => $str001 ), $ebsco_fulltext_type ) );
  300. }
  301. $link->tag = "XXX";
  302. array_push ( $this->links, array ($strDisplay, $strUrl, "original_record" ) );
  303. }
  304. }
  305. }
  306. // Gale title clean-up, because for some reason unknown to man they put weird
  307. // notes and junk at the end of the title. so remove them here and add them to notes.
  308. if (strstr ( $this->source, 'GALE_' ))
  309. {
  310. $arrMatches = array ();
  311. $strGaleRegExp = '/\(([^)]*)\)/';
  312. $title = $this->datafield("245");
  313. $title_main = $title->subfield("a");
  314. $title_sub = $title->subfield("b");
  315. $note_field = new Xerxes_Marc_DataField();
  316. $note_field->tag = "500";
  317. if ( $title_main != null )
  318. {
  319. if (preg_match_all ( $strGaleRegExp, $title_main->value, $arrMatches ) != 0)
  320. {
  321. $title_main->value = preg_replace ( $strGaleRegExp, "", $title_main->value );
  322. }
  323. foreach ( $arrMatches[1] as $strMatch )
  324. {
  325. $subfield = new Xerxes_Marc_Subfield();
  326. $subfield->code = "a";
  327. $subfield->value = "From title: " . $strMatch;
  328. $note_field->addSubField($subfield);
  329. }
  330. }
  331. // sub title is only these wacky notes
  332. if ($title_sub != null)
  333. {
  334. $subfield = new Xerxes_Marc_Subfield();
  335. $subfield->code = "a";
  336. $subfield->value = "From title: " . $title_sub->value;
  337. $note_field->addSubField($subfield);
  338. $title_sub->value = "";
  339. }
  340. if ( $note_field->subfield("a")->length() > 0 )
  341. {
  342. $this->addDataField($note_field);
  343. }
  344. }
  345. // psycinfo and related databases
  346. if ( strstr($this->source, "EBSCO_PDH") || strstr($this->source, "EBSCO_PSYH") || strstr($this->source, "EBSCO_LOH") )
  347. {
  348. // includes a 502 that is not a thesis note -- bonkers!
  349. // need to make this a basic note, otherwise xerxes will assume this is a thesis
  350. foreach ( $this->datafield("502") as $thesis )
  351. {
  352. $thesis->tag = "500";
  353. }
  354. }
  355. ######## PARENT MAPPING ###########
  356. parent::map();
  357. ###################################
  358. // metalib's own year, issue, volume fields
  359. $year = $this->datafield("YR ")->subfield("a")->__toString();
  360. if ( $year != "" )
  361. {
  362. $this->year = $year;
  363. }
  364. if ( $this->issue == null )
  365. {
  366. $this->issue = $this->datafield("ISS")->subfield("a")->__toString();
  367. }
  368. if ( $this->volume == null )
  369. {
  370. $this->volume = $this->datafield("VOL")->subfield("a")->__toString();
  371. }
  372. // book chapters
  373. if ( $this->journal_title != "" && count($this->isbns) > 0 && count($this->issns) == 0 )
  374. {
  375. $this->book_title = $this->journal_title;
  376. $this->journal_title = "";
  377. $this->format = "Book Chapter";
  378. }
  379. ## ebsco 77X weirdness
  380. if ( strstr($this->source, "EBSCO") )
  381. {
  382. // pages in $p (abbreviated title)
  383. $pages = $this->datafield("773")->subfield('p')->__toString();
  384. if ( $pages != "" )
  385. {
  386. $this->short_title = "";
  387. }
  388. // book chapter
  389. $btitle = $this->datafield("771")->subfield('a')->__toString();
  390. if ( $btitle != "" )
  391. {
  392. $this->book_title = $btitle;
  393. $this->format = "Book Chapter";
  394. }
  395. }
  396. ## oclc dissertation abstracts
  397. // (HACK) 10/1/2007 this assumes that the diss abs record includes the 904, which means
  398. // there needs to be a local search config that performs an 'add new' action rather than
  399. // the 'remove' action that the parser uses by default
  400. if (strstr ( $this->source, "OCLC_DABS" ))
  401. {
  402. $this->degree = $this->datafield( "904" )->subfield( "j" )->__toString();
  403. $this->institution = $this->datafield( "904" )->subfield( "h" )->__toString();
  404. $this->journal_title = $this->datafield( "904" )->subfield( "c" )->__toString();
  405. $this->journal = $this->journal_title . " " . $this->journal;
  406. if ($this->journal_title == "MAI")
  407. {
  408. $this->format = "Thesis";
  409. }
  410. else
  411. {
  412. $this->format = "Dissertation";
  413. }
  414. }
  415. // random format related changes
  416. if ( strstr ( $this->source, 'ERIC' ) && strstr ( $this->eric_number, 'ED' ) && ! stristr ( $this->title, "proceeding" ))
  417. {
  418. $this->format = "Report";
  419. }
  420. elseif (strstr ( $this->source, 'ERIC' ) && ! strstr ( $this->eric_number, 'ED' ) )
  421. {
  422. $this->format = "Article";
  423. }
  424. elseif (strstr ( $this->source, 'OCLC_PAPERS' ))
  425. {
  426. $this->format = "Conference Paper";
  427. }
  428. elseif (strstr ( $this->source, 'PCF1' ))
  429. {
  430. $this->format = "Conference Proceeding";
  431. }
  432. elseif ( stristr($this->source,"GOOGLE_B") )
  433. {
  434. $this->format = "Book";
  435. }
  436. elseif ( strstr($this->source, "EBSCO_LOH") )
  437. {
  438. $this->format = "Tests & Measures";
  439. }
  440. elseif ( strstr($this->source, "OXFORD_MUSIC_ONLINE") )
  441. {
  442. $this->format = "Article";
  443. }
  444. elseif ( strstr($this->source, "ESPACENET") )
  445. {
  446. $this->format = "Patent";
  447. }
  448. elseif ( strstr($this->source, "WIPO_PCT") )
  449. {
  450. $this->format = "Patent";
  451. }
  452. elseif ( strstr($this->source, "USPA") )
  453. {
  454. $this->format = "Patent";
  455. }
  456. elseif ( strstr($this->source, "DEPATIS") )
  457. {
  458. $this->format = "Patent";
  459. }
  460. elseif ( strstr($this->source, "DART") )
  461. {
  462. $this->format = "Thesis";
  463. }
  464. elseif ( strstr($this->source, "DDI") )
  465. {
  466. $this->format = "Thesis";
  467. }
  468. elseif ( strstr($this->source, "ETHOS") )
  469. {
  470. $this->format = "Thesis";
  471. }
  472. elseif ( strstr($this->source, "DIVA_EXTR") )
  473. {
  474. $this->format = "Thesis";
  475. }
  476. elseif ( strstr($this->source, "UNION_NDLTD") )
  477. {
  478. $this->format = "Thesis";
  479. }
  480. // JSTOR book review correction: title is meaningless, but subjects
  481. // contain the title of the books, so we'll swap them to the title here
  482. if (strstr ( $this->source, 'JSTOR' ) && $this->title == "Review")
  483. {
  484. $this->title = "";
  485. $this->sub_title = "";
  486. foreach ( $this->subjects as $subject )
  487. {
  488. $this->title .= " " . $subject->value;
  489. }
  490. $this->title = trim ( $this->title );
  491. $this->subjects = null;
  492. $this->format = "Book Review";
  493. }
  494. // jstor links are all pdfs
  495. if (strstr ( $this->source, 'JSTOR' ))
  496. {
  497. for( $x = 0; $x < count($this->links); $x++ )
  498. {
  499. $link = $this->links[$x];
  500. $link[2] = "pdf";
  501. $this->links[$x] = $link;
  502. }
  503. }
  504. // CSA subject term clean-up,
  505. // since they put an asterick in front of each term (2009-09-30)
  506. if (strstr ( $this->source, 'CSA_' ))
  507. {
  508. for ( $x = 0; $x < count($this->subjects); $x++ )
  509. {
  510. $subject_object = $this->subjects[$x];
  511. $subject_object->value = str_replace("*", "", $subject_object->value);
  512. $this->subjects[$x] = $subject_object;
  513. }
  514. }
  515. // demote links based on config
  516. $objConfig = Xerxes_Framework_Registry::getInstance ();
  517. $configIgnoreFullText = $objConfig->getConfig ( "FULLTEXT_IGNORE_SOURCES", false );
  518. $configIgnoreFullText = str_replace ( " ", "", $configIgnoreFullText );
  519. $arrIgnore = explode ( ",", $configIgnoreFullText );
  520. for($x = 0; $x < count ( $this->links ); $x ++)
  521. {
  522. $link = $this->links [$x];
  523. if (in_array ( $this->source, $arrIgnore ) || in_array ( $this->metalib_id, $arrIgnore ))
  524. {
  525. $link [2] = "original_record";
  526. }
  527. $this->links [$x] = $link;
  528. }
  529. }
  530. ### PROPERTIES ###
  531. public function getMetalibID()
  532. {
  533. return $this->metalib_id;
  534. }
  535. public function getResultSet()
  536. {
  537. return $this->result_set;
  538. }
  539. public function setResultSet($data)
  540. {
  541. $this->result_set = $data;
  542. }
  543. public function getRecordNumber()
  544. {
  545. return $this->record_number;
  546. }
  547. public function setRecordNumber($data)
  548. {
  549. $this->record_number = $data;
  550. }
  551. public function getDatabaseName()
  552. {
  553. return $this->database_name;
  554. }
  555. ### until we move this elsewhere
  556. static $TemplateEmptyValue = "Xerxes_Record_lookupTemplateValue_placeholder_missing";
  557. /**
  558. * Take a Metalib-style template for a URL, including $100_a style
  559. * placeholders, and replace placeholders with actual values
  560. * taken from $this->marcXML
  561. *
  562. * @param string $template
  563. * @return string url
  564. */
  565. protected function resolveUrlTemplate($template)
  566. {
  567. # For some reason Metalib uses $0100 placeholder to correspond
  568. # to an SID field. If I understand how this works, this is nothing
  569. # but a synonym for $SID_c, so we'll use that. Absolutely no idea
  570. # why Metalib uses $0100 as syntactic sugar instead.
  571. $template = str_replace ( '$0100', '$SID_c', $template );
  572. $filled_out = preg_replace_callback ( '/\$([a-zA-Z0-9]{2,3})(_(.))?/', array ($this, 'lookupTemplateValue' ), $template );
  573. // Make sure it doesn't have our special value indicating a placeholder
  574. // could not be resolved.
  575. if (strpos ( $filled_out, self::$TemplateEmptyValue ))
  576. {
  577. // Consistent with Metalib behavior, if a placeholder can't be resolved,
  578. // there is no link generated.
  579. return null;
  580. }
  581. return $filled_out;
  582. }
  583. /* This function is just used as a callback in resolveUrlTemplate.
  584. Takes a $matches array returned by PHP regexp function that
  585. has a MARC field in $matches[1] and a subfield in $matches[3].
  586. Returns the value from $this->marcXML */
  587. protected function lookupTemplateValue($matches)
  588. {
  589. $field = $matches [1];
  590. $subfield = (count ( $matches ) >= 4) ? $matches [3] : null;
  591. $value = null;
  592. if ($subfield)
  593. {
  594. $value = $this->datafield($field)->subfield( $subfield )->__toString();
  595. }
  596. else
  597. {
  598. //assume it's a control field, those are the only ones without subfields
  599. $value = $this->controlfield($field )->__toString();
  600. }
  601. if (empty ( $value ) && true)
  602. {
  603. // Couldn't resolve the placeholder, that means we should NOT
  604. // generate a URL, in this mode. Sadly we can't just throw
  605. // an exception, PHP eats it before we get it. I hate PHP.
  606. // Put a special token in there.
  607. return self::$TemplateEmptyValue;
  608. }
  609. return $value;
  610. }
  611. /* Fills out an array of Xerxes_Record to include links that are created
  612. by Metalib link templates (type 'holdings', 'original_record').
  613. @param $records, an array of Xerxes_Record
  614. @param &$database_links_dom a DOMDocument containing a <database_links> section with Xerxes db information. Note that this is an optional parameter, if not given it will be calculated internally. If a variable with a null value is passed in, the variable will actually be SET to a valid DOMDocument on the way out (magic of pass by reference), so you can
  615. use this method to calculate a <database_links> section for you. */
  616. public static function completeUrlTemplates($records, $objRequest, $objRegistry, &$database_links_dom = null)
  617. {
  618. // If we weren't passed in a cached DOMDocument with a database_links
  619. // section, create one. Note that the var was passed by reference,
  620. // so this is available to the caller.
  621. if ($database_links_dom == null)
  622. {
  623. $metalib_ids = array ();
  624. foreach ( $records as $r )
  625. {
  626. array_push ( $metalib_ids, $r->getMetalibID () );
  627. }
  628. $objData = new Xerxes_DataMap ( );
  629. $databases = $objData->getDatabases ( $metalib_ids );
  630. $database_links_dom = new DOMDocument ( );
  631. $database_links_dom->loadXML ( "<database_links/>" );
  632. foreach ( $databases as $db )
  633. {
  634. $objNodeDatabase = Xerxes_Helper::databaseToLinksNodeset ( $db, $objRequest, $objRegistry );
  635. $objNodeDatabase = $database_links_dom->importNode ( $objNodeDatabase, true );
  636. $database_links_dom->documentElement->appendChild ( $objNodeDatabase );
  637. }
  638. }
  639. // Pick out the templates into a convenient structure
  640. $linkTemplates = self::getLinkTemplates ( $database_links_dom );
  641. ### Add link to native record and to external holdings URL too, if
  642. # available from metalib template.
  643. foreach ( $records as $r )
  644. {
  645. if ($r->getMetalibID () && array_key_exists ( $r->getMetalibID (), $linkTemplates ))
  646. {
  647. $arrTemplates = $linkTemplates [$r->getMetalibID ()];
  648. foreach ( $arrTemplates as $type => $template )
  649. {
  650. $filled_in_link = $r->resolveUrlTemplate ( $template );
  651. if (! empty ( $filled_in_link ))
  652. {
  653. array_push ( $r->links, array (null, $filled_in_link, $type ) );
  654. }
  655. }
  656. }
  657. }
  658. }
  659. /* Creates a hash data structure of metalib-style URL templates for a given
  660. set of databases. Extracts this from Xerxes XML including a
  661. <database_links> section. Extracts into a hash for more convenient
  662. and quicker use. Structure of hash is:
  663. { metalib_id1 => { "xerxes_link_type_a" => template,
  664. "xerxes_link_type_b" => template }
  665. metalib_id2 => [...]
  666. Input is an XML DOMDocument containing a Xerxes <database_links>
  667. structure.
  668. */
  669. protected function getLinkTemplates($xml)
  670. {
  671. $link_templates = array ();
  672. $dbXPath = new DOMXPath ( $xml );
  673. $objDbXml = $dbXPath->evaluate ( '//database_links/database' );
  674. for($i = 0; $i < $objDbXml->length; $i ++)
  675. {
  676. $dbXml = $objDbXml->item ( $i );
  677. $metalib_id = $dbXml->getAttribute ( "metalib_id" );
  678. $link_templates [$metalib_id] = array ();
  679. for($j = 0; $j < $dbXml->childNodes->length; $j ++)
  680. {
  681. $node = $dbXml->childNodes->item( $j );
  682. if ( $node instanceof DOMComment )
  683. {
  684. continue;
  685. }
  686. if ($node->tagName == 'link_native_record')
  687. {
  688. $link_templates [$metalib_id] ["original_record"] = $node->textContent;
  689. }
  690. if ($node->tagName == 'link_native_holdings')
  691. {
  692. $link_templates [$metalib_id] ["holdings"] = $node->textContent;
  693. }
  694. }
  695. }
  696. return $link_templates;
  697. }
  698. }
  699. class UrlTemplatePlaceholderMissing extends Exception {}
  700. ?>