PageRenderTime 31ms CodeModel.GetById 6ms RepoModel.GetById 0ms app.codeStats 0ms

/lib/php-views/ezproxy-export.php

http://xerxes-portal.googlecode.com/
PHP | 286 lines | 197 code | 52 blank | 37 comment | 37 complexity | b47f5a171e40060005042783302b349f MD5 | raw file
  1. <?php
  2. /**
  3. * Outputs an EZProxy config file from Metalib/Xerxes KB. See:
  4. * http://code.google.com/p/xerxes-portal/wiki/EzProxyExport
  5. *
  6. * @author Jonathan Rochkind
  7. * @copyright 2009 Johns Hopkins University
  8. * @link http://xerxes.calstate.edu
  9. * @license http://www.gnu.org/licenses/
  10. * @version $Id: ezproxy-export.php 1552 2010-12-07 22:54:43Z rochkind@jhu.edu $
  11. * @package Xerxes
  12. */
  13. /*
  14. For documentation of the EZProxy config format, see:
  15. http://www.oclc.org/us/en/support/documentation/ezproxy/cfg/database.htm
  16. http://www.oclc.org/us/en/support/documentation/ezproxy/cfg/groups.htm
  17. */
  18. // inherits $objRegistry and $objXml
  19. header("Content-type: text/plain");
  20. // simpleXml is a lot easier to work with than DOM
  21. $objDatabases = simplexml_import_dom($objXml->documentElement);
  22. // First sort and process
  23. $warnings = array();
  24. $exporter = new EzProxyExportGen($objRegistry);
  25. foreach ( $objDatabases->databases->database as $xmlDatabase) {
  26. try {
  27. if ($xmlDatabase->proxy == '1') {
  28. $exporter->addDbXml( $xmlDatabase );
  29. }
  30. }
  31. catch (Exception $e) {
  32. array_push($warnings, $e->getMessage());
  33. }
  34. }
  35. $exporter->createConfig($warnings);
  36. if (count($warnings) > 0 ) {
  37. print "# NOTE: Warnings for Xerxes EZProxy output: \n#\n";
  38. foreach ($warnings as $w) {
  39. print "# " . str_replace("\n", " ", $w) . "\n#\n";
  40. }
  41. }
  42. class EzProxyExportGen {
  43. private $objRegistry = null;
  44. private $index_by_domain = array();
  45. private $index_by_restriction = array();
  46. public function __construct($argRegistry) {
  47. $this->objRegistry = $argRegistry;
  48. }
  49. // Takes a simple xml object representing a database, in Xerxes.
  50. public function addDbXml($xmlDatabase) {
  51. $dbHash = $this->makeDbHash($xmlDatabase);
  52. // To begin with, index it by domain, to get urls in same
  53. // domain together.
  54. $domain = $dbHash['domain'];
  55. if (! array_key_exists($domain, $this->index_by_domain)) {
  56. $this->index_by_domain[ $domain ] = array();
  57. }
  58. array_push( $this->index_by_domain[$domain], $dbHash );
  59. }
  60. public function createConfig(&$warnings) {
  61. // We have our individual entries grouped by domains.
  62. // We need to take these domain groups, and group them
  63. // by access control.
  64. foreach ($this->index_by_domain as $domainList) {
  65. $first_entry = $domainList[0];
  66. $groups = array_merge($first_entry['group_restrictions']); // array_merge to make a copy
  67. for ($i = 1; $i < count($domainList) - 1; $i++ ) {
  68. $entry = $domainList[$i];
  69. if ( count( array_diff($entry['group_restrictions'], $groups)) > 0) {
  70. array_push($warnings, "Conflicting group restrictions in two resources with same domain can not be enforced. Ezproxy restrictions may be more generous than intended: 1) " . $first_entry["title"] . "(" . $first_entry["metalib_id"] . ") 2) " . $entry["title"] . ")" . $entry["metalib_id"] .")");
  71. $groups = array_unique(array_merge( $groups, $entry['group_restrictions']));
  72. }
  73. }
  74. // We can't combine default with other restrictions, default
  75. // trumps it.
  76. if ( in_array('Default', $groups)) {
  77. $restriction_key = 'Default';
  78. }
  79. else {
  80. sort($groups);
  81. $restriction_key = join(';',$groups);
  82. }
  83. if (! array_key_exists($restriction_key, $this->index_by_restriction)) {
  84. $this->index_by_restriction[$restriction_key] = array();
  85. }
  86. array_push( $this->index_by_restriction[$restriction_key], $domainList );
  87. }
  88. // Now we've grouped by restriction, let's output.
  89. foreach (array_keys( $this->index_by_restriction ) as $restriction_key) {
  90. $groups = explode(';', $restriction_key);
  91. $ezproxy_groups = array();
  92. foreach ($groups as $group) {
  93. array_push($ezproxy_groups, $this->getEzProxyGroup($group));
  94. }
  95. if (count($ezproxy_groups) > 0) {
  96. print "\n\n#EZProxy group for metalib secondary affiliations: $restriction_key\n";
  97. print ("Group " . implode("+", $ezproxy_groups) . "\n\n");
  98. }
  99. foreach ( $this->index_by_restriction[$restriction_key] as $domainList ) {
  100. $first_entry = $domainList[0];
  101. print "Title ". $first_entry['title'] . " (" . $first_entry['metalib_id'] . ")";
  102. if ( count($domainList) > 1 ) {
  103. print " (and others) ";
  104. print "\n# Complete list of included Metalib IRD IDs: ";
  105. foreach ($domainList as $domainHash) {
  106. print $domainHash["metalib_id"] . " ";
  107. }
  108. }
  109. print "\n";
  110. print "URL " . $first_entry['url'] . "\n";
  111. print "Domain " . $first_entry['domain'] . "\n";
  112. // Any other hosts in this domain group? Find em and unique em.
  113. $included_hosts = array( $first_entry['url_host'] );
  114. for($i = 2; $i < count($domainList) - 1 ; $i++) {
  115. $new_host = $domainList[$i]['url_host'];
  116. if ( ! in_array($new_host, $included_hosts)) {
  117. array_push($included_hosts, $new_host);
  118. print "Host " . $new_host . "\n";
  119. }
  120. }
  121. // Derived hosts.
  122. $new_host = $first_entry['domain'];
  123. if (! in_array( $new_host, $included_hosts )) {
  124. array_push($included_hosts, $new_host);
  125. print "Host " . $new_host . "\n";
  126. }
  127. $new_host = "www." . $first_entry['domain'];
  128. if (! in_array($new_host, $included_hosts)) {
  129. array_push($included_hosts, $new_host);
  130. print "Host " . $new_host . "\n";
  131. }
  132. print "\n\n";
  133. }
  134. }
  135. }
  136. // Takes a simple xml object representing a database, in Xerxes.
  137. public function makeDbHash($xmlDatabase) {
  138. $hash = array();
  139. $hash['title'] = $xmlDatabase->title_display;
  140. if (! $hash['title'] ) $hash['title'] = 'unknown/missing';
  141. if ( $this->shouldOmitResourceID( $xmlDatabase->metalib_id ) ) {
  142. throw new Exception("Omitted as per Xerxes ezp_exp_resourceid_omit config: " . $xmlDatabase->metalib_id);
  143. }
  144. if (! $xmlDatabase->link_native_home ) {
  145. throw new Exception("Could not include db in ezproxy export, missing title: " . $xmlDatabase->metalib_id);
  146. }
  147. $parsed_url = parse_url($xmlDatabase->link_native_home);
  148. if (! array_key_exists('host', $parsed_url)) {
  149. throw new Exception("Could not include db in ezproxy export. Malformed url? " . $xmlDatabase->metalib_id . " url: " . $xmlDatabase->link_native_home );
  150. }
  151. $hash['metalib_id'] = (string) $xmlDatabase->metalib_id;
  152. $hash['url'] = (string) $xmlDatabase->link_native_home;
  153. $hash['url_host'] = $parsed_url['host'];
  154. $hash['domain'] = $this->getDomain($parsed_url);
  155. // Any access restrictions?
  156. $hash['group_restrictions'] = array();
  157. foreach ($xmlDatabase->group_restriction as $restriction) {
  158. array_push( $hash['group_restrictions'], (string) $restriction);
  159. }
  160. if (count($hash['group_restrictions']) == 0) {
  161. $hash['group_restrictions'] = array('Default');
  162. }
  163. return $hash;
  164. }
  165. protected function getDomain($parsed_url) {
  166. $host = $parsed_url['host'];
  167. // If host is numeric, we can't create a domain statement.
  168. // It's bad to use a numeric host, but oh well.
  169. if (preg_match("/^(\d|\.)+$/", $host)) {
  170. return $host;
  171. }
  172. $components = explode('.', $host);
  173. if ( count($components) > 2 ) {
  174. $domain = join('.', array_slice($components, 1));
  175. // Make sure it's not on our configured avoid list
  176. if (! $this->shouldAvoidDomain($domain)) {
  177. return $domain;
  178. }
  179. elseif (! $this->shouldAvoidDomain($host)) {
  180. return $host;
  181. }
  182. else {
  183. throw new Exception("Can't include resource because domain is in ezproxy domain avoid list: ". $host );
  184. }
  185. }
  186. elseif (! $this->shouldAvoidDomain($host)) {
  187. // No domain statement needed, two-element host, like "ebsco.com".
  188. return $host;
  189. }
  190. else {
  191. throw new Exception("Can't include resource because domain is in ezproxy domain avoid list: ". $host);
  192. }
  193. }
  194. protected function shouldAvoidDomain($domain) {
  195. $avoidList = $this->objRegistry->getConfig('ezp_exp_domain_avoid', false, '');
  196. foreach( explode(',', $avoidList) as $avoid) {
  197. if (trim($avoid) == $domain) {
  198. # nevermind, can't use that domain, just use the host.
  199. return true;
  200. }
  201. }
  202. return false;
  203. }
  204. protected function shouldOmitResourceId($resourceID) {
  205. $avoidList = $this->objRegistry->getConfig('ezp_exp_resourceid_omit', false, '');
  206. foreach( explode(',', $avoidList) as $avoid) {
  207. if (trim($avoid) == $resourceID) {
  208. # nevermind, can't use that domain, just use the host.
  209. return true;
  210. }
  211. }
  212. return false;
  213. }
  214. protected function getEzProxyGroup($metalib_group) {
  215. if ($metalib_group == "Default") {
  216. $config = $this->objRegistry->getConfig("ezp_exp_default_group", false);
  217. if ( $config ) {
  218. return $config;
  219. }
  220. else {
  221. return "Default";
  222. }
  223. }
  224. else {
  225. $xml = $this->objRegistry->getGroupXml($metalib_group);
  226. if ($xml && $xml->ezp_exp_group ) {
  227. return $xml->ezp_exp_group;
  228. }
  229. else {
  230. return $metalib_group;
  231. }
  232. }
  233. }
  234. }