PageRenderTime 59ms CodeModel.GetById 12ms RepoModel.GetById 0ms app.codeStats 0ms

/s3db3.5.10/pearlib/arc/ARC2_Reader.php

https://github.com/drobbins/s3db
PHP | 254 lines | 201 code | 29 blank | 24 comment | 41 complexity | 6d1243687f4a757220420bb3a0bbedb7 MD5 | raw file
  1. <?php
  2. /*
  3. homepage: http://arc.semsol.org/
  4. license: http://arc.semsol.org/license
  5. class: ARC2 Web Reader
  6. author: Benjamin Nowack
  7. version: 2008-07-14 (Fix: readStream considers content-length header and adjusts d_size)
  8. */
  9. ARC2::inc('Class');
  10. class ARC2_Reader extends ARC2_Class {
  11. function __construct($a = '', &$caller) {
  12. parent::__construct($a, $caller);
  13. }
  14. function ARC2_Reader($a = '', &$caller) {
  15. $this->__construct($a, $caller);
  16. }
  17. function __init() {/* inc_path, proxy_host, proxy_port, proxy_skip, http_accept_header, http_user_agent_header, max_redirects */
  18. parent::__init();
  19. $this->http_method = 'GET';
  20. $this->message_body = '';
  21. $this->http_accept_header = $this->v('http_accept_header', 'Accept: application/rdf+xml; q=0.9, */*; q=0.1', $this->a);
  22. $this->http_user_agent_header = $this->v('http_user_agent_header', 'User-Agent: ARC Reader (http://arc.semsol.org/)', $this->a);
  23. $this->http_custom_headers = $this->v('http_custom_headers', '', $this->a);
  24. $this->max_redirects = $this->v('max_redirects', 3, $this->a);
  25. $this->format = $this->v('format', false, $this->a);
  26. $this->redirects = array();
  27. $this->stream_id = '';
  28. }
  29. /* */
  30. function setHTTPMethod($v) {
  31. $this->http_method = $v;
  32. }
  33. function setMessageBody($v) {
  34. $this->message_body = $v;
  35. }
  36. function setAcceptHeader($v) {
  37. $this->http_accept_header = $v;
  38. }
  39. function setCustomHeaders($v) {
  40. $this->http_custom_headers = $v;
  41. }
  42. /* */
  43. function activate($path, $data = '') {
  44. $id = md5($path . ' ' . $data);
  45. if ($this->stream_id != $id) {
  46. $this->stream_id = $id;
  47. $this->base = $this->calcBase($path);
  48. $this->uri = $this->calcURI($path, $this->base);
  49. $this->stream = ($data) ? $this->getDataStream($data) : $this->getSocketStream($this->base);
  50. if ($this->stream) {
  51. $this->getFormat();
  52. }
  53. }
  54. }
  55. /* */
  56. function useProxy($url) {
  57. if (!$this->v1('proxy_host', 0, $this->a)) {
  58. return false;
  59. }
  60. $skips = $this->v1('proxy_skip', array(), $this->a);
  61. foreach ($skips as $skip) {
  62. if (strpos($url, $skip) !== false) {
  63. return false;
  64. }
  65. }
  66. return true;
  67. }
  68. /* */
  69. function createStream($path, $data = '') {
  70. $this->base = $this->calcBase($path);
  71. $this->stream = ($data) ? $this->getDataStream($data) : $this->getSocketStream($this->base);
  72. }
  73. function getDataStream($data) {
  74. return array('type' => 'data', 'pos' => 0, 'headers' => array(), 'size' => strlen($data), 'data' => $data, 'buffer' => '');
  75. }
  76. function getSocketStream($url) {
  77. $parts = parse_url($url);
  78. #echo $url.'<br />';
  79. $mappings = array('file' => 'File', 'http' => 'HTTP', 'https' => 'HTTP');
  80. if ($scheme = $this->v(strtolower($parts['scheme']), '', $mappings)) {
  81. return $this->m('get' . $scheme . 'Socket', $url, $this->getDataStream(''));
  82. }
  83. }
  84. function getFileSocket($url) {
  85. $parts = parse_url($url);
  86. $s = file_exists($parts['path']) ? @fopen($parts['path'], 'rb') : false;
  87. if (!$s) {
  88. return $this->addError('Socket error: Could not open "' . $parts['path'] . '"');
  89. }
  90. return array('type' => 'socket', 'socket' =>& $s, 'headers' => array(), 'pos' => 0, 'size' => filesize($parts['path']), 'buffer' => '');
  91. }
  92. function getHTTPSocket($url, $redirs = 0) {
  93. $parts = parse_url($url);
  94. if (!isset($parts['scheme'])) {
  95. return $this->addError('Socket error: No supported URI scheme detected.');
  96. }
  97. $parts['port'] = ($parts['scheme'] == 'https') ? $this->v1('port', 443, $parts) : $this->v1('port', 80, $parts);
  98. $nl = "\r\n";
  99. $http_mthd = strtoupper($this->http_method);
  100. if ($this->v1('user', 0, $parts) || $this->useProxy($url)) {
  101. $h_code = $http_mthd . ' ' . $url;
  102. }
  103. else {
  104. $h_code = $http_mthd . ' ' . $this->v1('path', '/', $parts) . (($v = $this->v1('query', 0, $parts)) ? '?' . $v : '') . (($v = $this->v1('fragment', 0, $parts)) ? '#' . $v : '');
  105. }
  106. $h_code .= ' HTTP/1.0' . $nl.
  107. 'Host: ' . $parts['host'] . $nl.
  108. (($v = $this->http_accept_header) ? $v . $nl : '') .
  109. (($v = $this->http_user_agent_header) && !preg_match('/User\-Agent\:/', $this->http_custom_headers) ? $v . $nl : '') .
  110. (($http_mthd == 'POST') ? 'Content-Length: ' . strlen($this->message_body) . $nl : '') .
  111. ($this->http_custom_headers ? trim($this->http_custom_headers) . $nl : '') .
  112. $nl .
  113. '';
  114. /* post body */
  115. if ($http_mthd == 'POST') {
  116. $h_code .= $this->message_body . $nl;
  117. }
  118. /* connect */
  119. if ($this->useProxy($url)) {
  120. $s = @fsockopen($this->a['proxy_host'], $this->a['proxy_port']);
  121. }
  122. elseif ($parts['scheme'] == 'https') {
  123. $s = @fsockopen('ssl://' . $parts['host'], $parts['port']);
  124. }
  125. elseif ($parts['scheme'] == 'http') {
  126. $s = @fsockopen($parts['host'], $parts['port']);
  127. }
  128. if (!$s) {
  129. return $this->addError('Socket error: Could not connect to "' . $url . '" (proxy: ' . ($this->useProxy($url) ? '1' : '0') . ')');
  130. }
  131. /* request */
  132. fwrite($s, $h_code);
  133. /* response headers */
  134. $h = array();
  135. do {
  136. $line = trim(fgets($s, 256));
  137. if (preg_match("/^HTTP[^\s]+\s+([0-9]{1})([0-9]{2})(.*)$/i", $line, $m)) {/* response code */
  138. $error = in_array($m[1], array('4', '5')) ? $m[1] . $m[2] . ' ' . $m[3] : '';
  139. $error = ($m[1].$m[2] == '304') ? '304 '.$m[3] : $error;
  140. $h['response-code'] = $m[1] . $m[2];
  141. $h['error'] = $error;
  142. $h['redirect'] = ($m[1] == '3') ? true : false;
  143. }
  144. elseif (preg_match('/^([^\:]+)\:\s*(.*)$/', $line, $m)) {/* header */
  145. $h[strtolower($m[1])] = trim($m[2]);
  146. }
  147. } while(!feof($s) && $line);
  148. $h['format'] = strtolower(preg_replace('/^([^\s]+).*$/', '\\1', $this->v('content-type', '', $h)));
  149. $h['encoding'] = preg_match('/(utf\-8|iso\-8859\-1|us\-ascii)/', $this->v('content-type', '', $h), $m) ? strtoupper($m[1]) : '';
  150. $h['encoding'] = preg_match('/charset=\s*([^\s]+)/si', $this->v('content-type', '', $h), $m) ? strtoupper($m[1]) : $h['encoding'];
  151. /* result */
  152. if ($v = $this->v('error', 0, $h)) {
  153. return $this->addError($error);
  154. }
  155. if ($this->v('redirect', 0, $h) && ($new_url = $this->v1('location', 0, $h))) {
  156. fclose($s);
  157. $this->redirects[$url] = $new_url;
  158. $this->base = $new_url;
  159. if ($redirs > $this->max_redirects) {
  160. return $this->addError('Max numbers of redirects exceeded.');
  161. }
  162. return $this->getHTTPSocket($new_url, $redirs+1);
  163. }
  164. //stream_set_timeout($s, 5);
  165. return array('type' => 'socket', 'socket' =>& $s, 'headers' => $h, 'pos' => 0, 'size' => $this->v('content-length', 0, $h), 'buffer' => '');
  166. }
  167. function readStream($buffer_xml = true, $d_size = 1024) {
  168. if (!$s = $this->v('stream')) return $this->addError('missing stream in "readStream"');
  169. $s_type = $this->v('type', '', $s);
  170. $r = $s['buffer'];
  171. $s['buffer'] = '';
  172. if ($s['size']) $d_size = min($d_size, $s['size'] - $s['pos']);
  173. /* data */
  174. if ($s_type == 'data') {
  175. $d = ($d_size > 0) ? substr($s['data'], $s['pos'], $d_size) : '';
  176. }
  177. /* socket */
  178. elseif ($s_type == 'socket') {
  179. $d = ($d_size > 0) && !feof($s['socket']) ? fread($s['socket'], $d_size) : '';
  180. }
  181. $eof = $d ? false : true;
  182. $s['pos'] += strlen($d);
  183. if ($buffer_xml) {/* stop after last closing xml tag (if available) */
  184. if (preg_match('/^(.*\>)([^\>]*)$/s', $d, $m)) {
  185. $d = $m[1];
  186. $s['buffer'] = $m[2];
  187. }
  188. elseif (!$eof) {
  189. $s['buffer'] = $r . $d;
  190. $this->stream = $s;
  191. return $this->readStream(true, $d_size);
  192. }
  193. }
  194. $this->stream = $s;
  195. return $r . $d;
  196. }
  197. function closeStream() {
  198. if (isset($this->stream)) {
  199. if ($this->v('type', 0, $this->stream) == 'socket') {
  200. @fclose($this->stream['socket']);
  201. }
  202. unset($this->stream);
  203. }
  204. }
  205. /* */
  206. function getFormat() {
  207. if (!$this->format) {
  208. if (!$this->v('stream')) {
  209. return $this->addError('missing stream in "getFormat"');
  210. }
  211. $v = $this->readStream(false);
  212. $mtype = $this->v('format', '', $this->stream['headers']);
  213. $this->stream['buffer'] = $v . $this->stream['buffer'];
  214. $ext = preg_match('/\.([^\.]+)$/', $this->uri, $m) ? $m[1] : '';
  215. $this->format = ARC2::getFormat($v, $mtype, $ext);
  216. }
  217. return $this->format;
  218. }
  219. /* */
  220. function getEncoding($default = 'UTF-8') {
  221. return $this->v1('encoding', $default, $this->stream['headers']);
  222. }
  223. /* */
  224. }