PageRenderTime 27ms CodeModel.GetById 29ms RepoModel.GetById 1ms app.codeStats 0ms

/app/arc/ARC2_Reader.php

https://github.com/rinie/TransFormr
PHP | 400 lines | 325 code | 33 blank | 42 comment | 78 complexity | 2eb90747ea5796d14de1cb44cc3b38a8 MD5 | raw file
  1. <?php
  2. /**
  3. * ARC2 Web Client
  4. *
  5. * @author Benjamin Nowack
  6. * @license <http://arc.semsol.org/license>
  7. * @homepage <http://arc.semsol.org/>
  8. * @package ARC2
  9. * @version 2010-03-31
  10. */
  11. ARC2::inc('Class');
  12. class ARC2_Reader extends ARC2_Class {
  13. function __construct($a = '', &$caller) {
  14. parent::__construct($a, $caller);
  15. }
  16. function ARC2_Reader($a = '', &$caller) {
  17. $this->__construct($a, $caller);
  18. }
  19. function __init() {/* inc_path, proxy_host, proxy_port, proxy_skip, http_accept_header, http_user_agent_header, max_redirects */
  20. parent::__init();
  21. $this->http_method = $this->v('http_method', 'GET', $this->a);
  22. $this->message_body = $this->v('message_body', '', $this->a);;
  23. $this->http_accept_header = $this->v('http_accept_header', 'Accept: application/rdf+xml; q=0.9, */*; q=0.1', $this->a);
  24. $this->http_user_agent_header = $this->v('http_user_agent_header', 'User-Agent: ARC Reader (http://arc.semsol.org/)', $this->a);
  25. $this->http_custom_headers = $this->v('http_custom_headers', '', $this->a);
  26. $this->max_redirects = $this->v('max_redirects', 3, $this->a);
  27. $this->format = $this->v('format', false, $this->a);
  28. $this->redirects = array();
  29. $this->stream_id = '';
  30. $this->timeout = $this->v('reader_timeout', 30, $this->a);
  31. $this->response_headers = array();
  32. $this->digest_auth = 0;
  33. $this->auth_infos = $this->v('reader_auth_infos', array(), $this->a);
  34. }
  35. /* */
  36. function setHTTPMethod($v) {
  37. $this->http_method = $v;
  38. }
  39. function setMessageBody($v) {
  40. $this->message_body = $v;
  41. }
  42. function setAcceptHeader($v) {
  43. $this->http_accept_header = $v;
  44. }
  45. function setCustomHeaders($v) {
  46. $this->http_custom_headers = $v;
  47. }
  48. function addCustomHeaders($v) {
  49. if ($this->http_custom_headers) $this->http_custom_headers .= "\r\n";
  50. $this->http_custom_headers .= $v;
  51. }
  52. /* */
  53. function activate($path, $data = '', $ping_only = 0, $timeout = 0) {
  54. $this->setCredentials($path);
  55. $this->ping_only = $ping_only;
  56. if ($timeout) $this->timeout = $timeout;
  57. $id = md5($path . ' ' . $data);
  58. if ($this->stream_id != $id) {
  59. $this->stream_id = $id;
  60. /* data uri? */
  61. if (!$data && preg_match('/^data\:([^\,]+)\,(.*)$/', $path, $m)) {
  62. $path = '';
  63. $data = preg_match('/base64/', $m[1]) ? base64_decode($m[2]) : rawurldecode($m[2]);
  64. }
  65. $this->base = $this->calcBase($path);
  66. $this->uri = $this->calcURI($path, $this->base);
  67. $this->stream = ($data) ? $this->getDataStream($data) : $this->getSocketStream($this->base, $ping_only);
  68. if ($this->stream && !$this->ping_only) {
  69. $this->getFormat();
  70. }
  71. }
  72. }
  73. function setCredentials($url) {
  74. if (!$creds = $this->v('arc_reader_credentials', array(), $this->a)) return 0;
  75. foreach ($creds as $pattern => $cred) {
  76. $regex = '/' . preg_replace('/([\:\/\.\?])/', '\\\\\1', $pattern) . '/';
  77. if (!preg_match($regex, $url)) continue;
  78. $parts = parse_url($url);
  79. $path = $this->v1('path', '/', $parts);
  80. /* Basic auth */
  81. $auth = 'Basic ' . base64_encode($cred);
  82. /* Digest auth */
  83. if (preg_match('/(.*)\:\:(.*)/', $cred, $m)) {
  84. $username = $m[1];
  85. $pwd = $m[2];
  86. $auth = '';
  87. $hs = $this->getResponseHeaders();
  88. /* 401 received */
  89. $h = $this->v('www-authenticate', '', $hs);
  90. if ($h && preg_match('/Digest/i', $h)) {
  91. $auth = 'Digest ';
  92. /* Digest realm="$realm", nonce="$nonce", qop="auth", opaque="$opaque" */
  93. $ks = array('realm', 'nonce', 'opaque');/* skipping qop, assuming "auth" */
  94. foreach ($ks as $i => $k) {
  95. $$k = preg_match('/' . $k . '=\"?([^\"]+)\"?/i', $h, $m) ? $m[1] : '';
  96. $auth .= ($i ? ', ' : '') . $k . '="' . $$k . '"';
  97. $this->auth_infos[$k] = $$k;
  98. }
  99. $this->auth_infos['auth'] = $auth;
  100. $this->auth_infos['request_count'] = 1;
  101. }
  102. /* 401 or repeated request */
  103. if ($this->v('auth', 0, $this->auth_infos)) {
  104. $qop = 'auth';
  105. $auth = $this->auth_infos['auth'];
  106. $rc = $this->auth_infos['request_count'];
  107. $realm = $this->auth_infos['realm'];
  108. $nonce = $this->auth_infos['nonce'];
  109. $ha1 = md5($username . ':' . $realm . ':' . $pwd);
  110. $ha2 = md5($this->http_method . ':' . $path);
  111. $nc = dechex($rc);
  112. $cnonce = dechex($rc * 2);
  113. $resp = md5($ha1 . ':' . $nonce . ':' . $nc . ':' . $cnonce . ':' . $qop . ':' . $ha2);
  114. $auth .= ', username="' . $username . '"' .
  115. ', uri="' . $path . '"' .
  116. ', qop=' . $qop . '' .
  117. ', nc=' . $nc .
  118. ', cnonce="' . $cnonce . '"' .
  119. ', uri="' . $path . '"' .
  120. ', response="' . $resp . '"' .
  121. '';
  122. $this->auth_infos['request_count'] = $rc + 1;
  123. }
  124. }
  125. /* add header */
  126. if ($auth) {
  127. $this->addCustomHeaders('Authorization: ' . $auth);
  128. break;
  129. }
  130. }
  131. }
  132. /* */
  133. function useProxy($url) {
  134. if (!$this->v1('proxy_host', 0, $this->a)) {
  135. return false;
  136. }
  137. $skips = $this->v1('proxy_skip', array(), $this->a);
  138. foreach ($skips as $skip) {
  139. if (strpos($url, $skip) !== false) {
  140. return false;
  141. }
  142. }
  143. return true;
  144. }
  145. /* */
  146. function createStream($path, $data = '') {
  147. $this->base = $this->calcBase($path);
  148. $this->stream = ($data) ? $this->getDataStream($data) : $this->getSocketStream($this->base);
  149. }
  150. function getDataStream($data) {
  151. return array('type' => 'data', 'pos' => 0, 'headers' => array(), 'size' => strlen($data), 'data' => $data, 'buffer' => '');
  152. }
  153. function getSocketStream($url) {
  154. if ($url == 'file://') {
  155. return $this->addError('Error: file does not exists or is not accessible');
  156. }
  157. $parts = parse_url($url);
  158. $mappings = array('file' => 'File', 'http' => 'HTTP', 'https' => 'HTTP');
  159. if ($scheme = $this->v(strtolower($parts['scheme']), '', $mappings)) {
  160. return $this->m('get' . $scheme . 'Socket', $url, $this->getDataStream(''));
  161. }
  162. }
  163. function getFileSocket($url) {
  164. $parts = parse_url($url);
  165. $s = file_exists($parts['path']) ? @fopen($parts['path'], 'rb') : false;
  166. if (!$s) {
  167. return $this->addError('Socket error: Could not open "' . $parts['path'] . '"');
  168. }
  169. return array('type' => 'socket', 'socket' =>& $s, 'headers' => array(), 'pos' => 0, 'size' => filesize($parts['path']), 'buffer' => '');
  170. }
  171. function getHTTPSocket($url, $redirs = 0, $prev_parts = '') {
  172. $parts = parse_url($url);
  173. /* relative redirect */
  174. if (!isset($parts['scheme']) && $prev_parts) $parts['scheme'] = $prev_parts['scheme'];
  175. if (!isset($parts['host']) && $prev_parts) $parts['host'] = $prev_parts['host'];
  176. /* no scheme */
  177. if (!$this->v('scheme', '', $parts)) return $this->addError('Socket error: Missing URI scheme.');
  178. /* port tweaks */
  179. $parts['port'] = ($parts['scheme'] == 'https') ? $this->v1('port', 443, $parts) : $this->v1('port', 80, $parts);
  180. $nl = "\r\n";
  181. $http_mthd = strtoupper($this->http_method);
  182. if ($this->v1('user', 0, $parts) || $this->useProxy($url)) {
  183. $h_code = $http_mthd . ' ' . $url;
  184. }
  185. else {
  186. $h_code = $http_mthd . ' ' . $this->v1('path', '/', $parts) . (($v = $this->v1('query', 0, $parts)) ? '?' . $v : '') . (($v = $this->v1('fragment', 0, $parts)) ? '#' . $v : '');
  187. }
  188. $port_code = ($parts['port'] != 80) ? ':' . $parts['port'] : '';
  189. $h_code .= ' HTTP/1.0' . $nl.
  190. 'Host: ' . $parts['host'] . $port_code . $nl .
  191. (($v = $this->http_accept_header) ? $v . $nl : '') .
  192. (($v = $this->http_user_agent_header) && !preg_match('/User\-Agent\:/', $this->http_custom_headers) ? $v . $nl : '') .
  193. (($http_mthd == 'POST') ? 'Content-Length: ' . strlen($this->message_body) . $nl : '') .
  194. ($this->http_custom_headers ? trim($this->http_custom_headers) . $nl : '') .
  195. $nl .
  196. '';
  197. /* post body */
  198. if ($http_mthd == 'POST') {
  199. $h_code .= $this->message_body . $nl;
  200. }
  201. /* connect */
  202. if ($this->useProxy($url)) {
  203. $s = @fsockopen($this->a['proxy_host'], $this->a['proxy_port'], $errno, $errstr, $this->timeout);
  204. }
  205. elseif (($parts['scheme'] == 'https') && function_exists('stream_socket_client')) {
  206. // SSL options via config array, code by Hannes Muehleisen (muehleis@informatik.hu-berlin.de)
  207. $context = stream_context_create();
  208. foreach ($this->a as $k => $v) {
  209. if (preg_match('/^arc_reader_ssl_(.+)$/', $k, $m)) {
  210. stream_context_set_option($context, 'ssl', $m[1], $v);
  211. }
  212. }
  213. $s = stream_socket_client('ssl://' . $parts['host'] . $port_code, $errno, $errstr, $this->timeout, STREAM_CLIENT_CONNECT, $context);
  214. }
  215. elseif ($parts['scheme'] == 'https') {
  216. $s = @fsockopen('ssl://' . $parts['host'], $parts['port'], $errno, $errstr, $this->timeout);
  217. }
  218. elseif ($parts['scheme'] == 'http') {
  219. $s = @fsockopen($parts['host'], $parts['port'], $errno, $errstr, $this->timeout);
  220. }
  221. if (!$s) {
  222. return $this->addError('Socket error: Could not connect to "' . $url . '" (proxy: ' . ($this->useProxy($url) ? '1' : '0') . '): ' . $errstr);
  223. }
  224. /* request */
  225. fwrite($s, $h_code);
  226. /* timeout */
  227. if ($this->timeout) {
  228. //stream_set_blocking($s, false);
  229. stream_set_timeout($s, $this->timeout);
  230. }
  231. /* response headers */
  232. $h = array();
  233. $this->response_headers = $h;
  234. if (!$this->ping_only) {
  235. do {
  236. $line = trim(fgets($s, 4096));
  237. $info = stream_get_meta_data($s);
  238. if (preg_match("/^HTTP[^\s]+\s+([0-9]{1})([0-9]{2})(.*)$/i", $line, $m)) {/* response code */
  239. $error = in_array($m[1], array('4', '5')) ? $m[1] . $m[2] . ' ' . $m[3] : '';
  240. $error = ($m[1].$m[2] == '304') ? '304 '.$m[3] : $error;
  241. $h['response-code'] = $m[1] . $m[2];
  242. $h['error'] = $error;
  243. $h['redirect'] = ($m[1] == '3') ? true : false;
  244. }
  245. elseif (preg_match('/^([^\:]+)\:\s*(.*)$/', $line, $m)) {/* header */
  246. $h_name = strtolower($m[1]);
  247. if (!isset($h[$h_name])) {/* 1st value */
  248. $h[$h_name] = trim($m[2]);
  249. }
  250. elseif (!is_array($h[$h_name])) {/* 2nd value */
  251. $h[$h_name] = array($h[$h_name], trim($m[2]));
  252. }
  253. else {/* more values */
  254. $h[$h_name][] = trim($m[2]);
  255. }
  256. }
  257. } while(!$info['timed_out'] && !feof($s) && $line);
  258. $h['format'] = strtolower(preg_replace('/^([^\s]+).*$/', '\\1', $this->v('content-type', '', $h)));
  259. $h['encoding'] = preg_match('/(utf\-8|iso\-8859\-1|us\-ascii)/', $this->v('content-type', '', $h), $m) ? strtoupper($m[1]) : '';
  260. $h['encoding'] = preg_match('/charset=\s*([^\s]+)/si', $this->v('content-type', '', $h), $m) ? strtoupper($m[1]) : $h['encoding'];
  261. $this->response_headers = $h;
  262. /* result */
  263. if ($info['timed_out']) {
  264. return $this->addError('Connection timed out after ' . $this->timeout . ' seconds');
  265. }
  266. /* error */
  267. if ($v = $this->v('error', 0, $h)) {
  268. /* digest auth */
  269. /* 401 received */
  270. if (preg_match('/Digest/i', $this->v('www-authenticate', '', $h)) && !$this->digest_auth) {
  271. $this->setCredentials($url);
  272. $this->digest_auth = 1;
  273. return $this->getHTTPSocket($url);
  274. }
  275. return $this->addError($error . ' "' . (!feof($s) ? trim(strip_tags(fread($s, 128))) . '..."' : ''));
  276. }
  277. /* redirect */
  278. if ($this->v('redirect', 0, $h) && ($new_url = $this->v1('location', 0, $h))) {
  279. fclose($s);
  280. $this->redirects[$url] = $new_url;
  281. $this->base = $new_url;
  282. if ($redirs > $this->max_redirects) {
  283. return $this->addError('Max numbers of redirects exceeded.');
  284. }
  285. return $this->getHTTPSocket($new_url, $redirs+1, $parts);
  286. }
  287. }
  288. if ($this->timeout) {
  289. stream_set_blocking($s, true);
  290. }
  291. return array('type' => 'socket', 'url' => $url, 'socket' =>& $s, 'headers' => $h, 'pos' => 0, 'size' => $this->v('content-length', 0, $h), 'buffer' => '');
  292. }
  293. function readStream($buffer_xml = true, $d_size = 1024) {
  294. //if (!$s = $this->v('stream')) return '';
  295. if (!$s = $this->v('stream')) return $this->addError('missing stream in "readStream" ' . $this->uri);
  296. $s_type = $this->v('type', '', $s);
  297. $r = $s['buffer'];
  298. $s['buffer'] = '';
  299. if ($s['size']) $d_size = min($d_size, $s['size'] - $s['pos']);
  300. /* data */
  301. if ($s_type == 'data') {
  302. $d = ($d_size > 0) ? substr($s['data'], $s['pos'], $d_size) : '';
  303. }
  304. /* socket */
  305. elseif ($s_type == 'socket') {
  306. $d = ($d_size > 0) && !feof($s['socket']) ? fread($s['socket'], $d_size) : '';
  307. }
  308. $eof = $d ? false : true;
  309. /* chunked despite HTTP 1.0 request */
  310. if (isset($s['headers']) && isset($s['headers']['transfer-encoding']) && ($s['headers']['transfer-encoding'] == 'chunked')) {
  311. $d = preg_replace('/(^|[\r\n]+)[0-9a-f]{1,4}[\r\n]+/', '', $d);
  312. }
  313. $s['pos'] += strlen($d);
  314. if ($buffer_xml) {/* stop after last closing xml tag (if available) */
  315. if (preg_match('/^(.*\>)([^\>]*)$/s', $d, $m)) {
  316. $d = $m[1];
  317. $s['buffer'] = $m[2];
  318. }
  319. elseif (!$eof) {
  320. $s['buffer'] = $r . $d;
  321. $this->stream = $s;
  322. return $this->readStream(true, $d_size);
  323. }
  324. }
  325. $this->stream = $s;
  326. return $r . $d;
  327. }
  328. function closeStream() {
  329. if (isset($this->stream)) {
  330. if ($this->v('type', 0, $this->stream) == 'socket') {
  331. @fclose($this->stream['socket']);
  332. }
  333. unset($this->stream);
  334. }
  335. }
  336. /* */
  337. function getFormat() {
  338. if (!$this->format) {
  339. if (!$this->v('stream')) {
  340. return $this->addError('missing stream in "getFormat"');
  341. }
  342. $v = $this->readStream(false);
  343. $mtype = $this->v('format', '', $this->stream['headers']);
  344. $this->stream['buffer'] = $v . $this->stream['buffer'];
  345. $ext = preg_match('/\.([^\.]+)$/', $this->uri, $m) ? $m[1] : '';
  346. $this->format = ARC2::getFormat($v, $mtype, $ext);
  347. }
  348. return $this->format;
  349. }
  350. /* */
  351. function getResponseHeaders() {
  352. if (isset($this->stream) && isset($this->stream['headers'])) {
  353. return $this->stream['headers'];
  354. }
  355. return $this->response_headers;
  356. }
  357. function getEncoding($default = 'UTF-8') {
  358. return $this->v1('encoding', $default, $this->stream['headers']);
  359. }
  360. function getRedirects() {
  361. return $this->redirects;
  362. }
  363. function getAuthInfos() {
  364. return $this->auth_infos;
  365. }
  366. /* */
  367. }