PageRenderTime 60ms CodeModel.GetById 15ms RepoModel.GetById 1ms app.codeStats 0ms

/tests/simpletest/url.php

https://github.com/quarkness/piwik
PHP | 528 lines | 261 code | 36 blank | 231 comment | 35 complexity | 141ec851cb09b15db9ea16383c9ad51a MD5 | raw file
  1. <?php
  2. /**
  3. * base include file for SimpleTest
  4. * @package SimpleTest
  5. * @subpackage WebTester
  6. * @version $Id: url.php 1723 2008-04-08 00:34:10Z lastcraft $
  7. */
  8. /**#@+
  9. * include other SimpleTest class files
  10. */
  11. require_once(dirname(__FILE__) . '/encoding.php');
  12. /**#@-*/
  13. /**
  14. * URL parser to replace parse_url() PHP function which
  15. * got broken in PHP 4.3.0. Adds some browser specific
  16. * functionality such as expandomatics.
  17. * Guesses a bit trying to separate the host from
  18. * the path and tries to keep a raw, possibly unparsable,
  19. * request string as long as possible.
  20. * @package SimpleTest
  21. * @subpackage WebTester
  22. */
  23. class SimpleUrl {
  24. var $_scheme;
  25. var $_username;
  26. var $_password;
  27. var $_host;
  28. var $_port;
  29. var $_path;
  30. var $_request;
  31. var $_fragment;
  32. var $_x;
  33. var $_y;
  34. var $_target;
  35. var $_raw = false;
  36. /**
  37. * Constructor. Parses URL into sections.
  38. * @param string $url Incoming URL.
  39. * @access public
  40. */
  41. function SimpleUrl($url = '') {
  42. list($x, $y) = $this->_chompCoordinates($url);
  43. $this->setCoordinates($x, $y);
  44. $this->_scheme = $this->_chompScheme($url);
  45. list($this->_username, $this->_password) = $this->_chompLogin($url);
  46. $this->_host = $this->_chompHost($url);
  47. $this->_port = false;
  48. if (preg_match('/(.*?):(.*)/', $this->_host, $host_parts)) {
  49. $this->_host = $host_parts[1];
  50. $this->_port = (integer)$host_parts[2];
  51. }
  52. $this->_path = $this->_chompPath($url);
  53. $this->_request = $this->_parseRequest($this->_chompRequest($url));
  54. $this->_fragment = (strncmp($url, "#", 1) == 0 ? substr($url, 1) : false);
  55. $this->_target = false;
  56. }
  57. /**
  58. * Extracts the X, Y coordinate pair from an image map.
  59. * @param string $url URL so far. The coordinates will be
  60. * removed.
  61. * @return array X, Y as a pair of integers.
  62. * @access private
  63. */
  64. function _chompCoordinates(&$url) {
  65. if (preg_match('/(.*)\?(\d+),(\d+)$/', $url, $matches)) {
  66. $url = $matches[1];
  67. return array((integer)$matches[2], (integer)$matches[3]);
  68. }
  69. return array(false, false);
  70. }
  71. /**
  72. * Extracts the scheme part of an incoming URL.
  73. * @param string $url URL so far. The scheme will be
  74. * removed.
  75. * @return string Scheme part or false.
  76. * @access private
  77. */
  78. function _chompScheme(&$url) {
  79. if (preg_match('/^([^\/:]*):(\/\/)(.*)/', $url, $matches)) {
  80. $url = $matches[2] . $matches[3];
  81. return $matches[1];
  82. }
  83. return false;
  84. }
  85. /**
  86. * Extracts the username and password from the
  87. * incoming URL. The // prefix will be reattached
  88. * to the URL after the doublet is extracted.
  89. * @param string $url URL so far. The username and
  90. * password are removed.
  91. * @return array Two item list of username and
  92. * password. Will urldecode() them.
  93. * @access private
  94. */
  95. function _chompLogin(&$url) {
  96. $prefix = '';
  97. if (preg_match('/^(\/\/)(.*)/', $url, $matches)) {
  98. $prefix = $matches[1];
  99. $url = $matches[2];
  100. }
  101. if (preg_match('/^([^\/]*)@(.*)/', $url, $matches)) {
  102. $url = $prefix . $matches[2];
  103. $parts = explode(':', $matches[1]);
  104. return array(
  105. urldecode($parts[0]),
  106. isset($parts[1]) ? urldecode($parts[1]) : false);
  107. }
  108. $url = $prefix . $url;
  109. return array(false, false);
  110. }
  111. /**
  112. * Extracts the host part of an incoming URL.
  113. * Includes the port number part. Will extract
  114. * the host if it starts with // or it has
  115. * a top level domain or it has at least two
  116. * dots.
  117. * @param string $url URL so far. The host will be
  118. * removed.
  119. * @return string Host part guess or false.
  120. * @access private
  121. */
  122. function _chompHost(&$url) {
  123. if (preg_match('/^(\/\/)(.*?)(\/.*|\?.*|#.*|$)/', $url, $matches)) {
  124. $url = $matches[3];
  125. return $matches[2];
  126. }
  127. if (preg_match('/(.*?)(\.\.\/|\.\/|\/|\?|#|$)(.*)/', $url, $matches)) {
  128. $tlds = SimpleUrl::getAllTopLevelDomains();
  129. if (preg_match('/[a-z0-9\-]+\.(' . $tlds . ')/i', $matches[1])) {
  130. $url = $matches[2] . $matches[3];
  131. return $matches[1];
  132. } elseif (preg_match('/[a-z0-9\-]+\.[a-z0-9\-]+\.[a-z0-9\-]+/i', $matches[1])) {
  133. $url = $matches[2] . $matches[3];
  134. return $matches[1];
  135. }
  136. }
  137. return false;
  138. }
  139. /**
  140. * Extracts the path information from the incoming
  141. * URL. Strips this path from the URL.
  142. * @param string $url URL so far. The host will be
  143. * removed.
  144. * @return string Path part or '/'.
  145. * @access private
  146. */
  147. function _chompPath(&$url) {
  148. if (preg_match('/(.*?)(\?|#|$)(.*)/', $url, $matches)) {
  149. $url = $matches[2] . $matches[3];
  150. return ($matches[1] ? $matches[1] : '');
  151. }
  152. return '';
  153. }
  154. /**
  155. * Strips off the request data.
  156. * @param string $url URL so far. The request will be
  157. * removed.
  158. * @return string Raw request part.
  159. * @access private
  160. */
  161. function _chompRequest(&$url) {
  162. if (preg_match('/\?(.*?)(#|$)(.*)/', $url, $matches)) {
  163. $url = $matches[2] . $matches[3];
  164. return $matches[1];
  165. }
  166. return '';
  167. }
  168. /**
  169. * Breaks the request down into an object.
  170. * @param string $raw Raw request.
  171. * @return SimpleFormEncoding Parsed data.
  172. * @access private
  173. */
  174. function _parseRequest($raw) {
  175. $this->_raw = $raw;
  176. $request = new SimpleGetEncoding();
  177. foreach (explode('&', $raw) as $pair) {
  178. if (preg_match('/(.*?)=(.*)/', $pair, $matches)) {
  179. $request->add($matches[1], urldecode($matches[2]));
  180. } elseif ($pair) {
  181. $request->add($pair, '');
  182. }
  183. }
  184. return $request;
  185. }
  186. /**
  187. * Accessor for protocol part.
  188. * @param string $default Value to use if not present.
  189. * @return string Scheme name, e.g "http".
  190. * @access public
  191. */
  192. function getScheme($default = false) {
  193. return $this->_scheme ? $this->_scheme : $default;
  194. }
  195. /**
  196. * Accessor for user name.
  197. * @return string Username preceding host.
  198. * @access public
  199. */
  200. function getUsername() {
  201. return $this->_username;
  202. }
  203. /**
  204. * Accessor for password.
  205. * @return string Password preceding host.
  206. * @access public
  207. */
  208. function getPassword() {
  209. return $this->_password;
  210. }
  211. /**
  212. * Accessor for hostname and port.
  213. * @param string $default Value to use if not present.
  214. * @return string Hostname only.
  215. * @access public
  216. */
  217. function getHost($default = false) {
  218. return $this->_host ? $this->_host : $default;
  219. }
  220. /**
  221. * Accessor for top level domain.
  222. * @return string Last part of host.
  223. * @access public
  224. */
  225. function getTld() {
  226. $path_parts = pathinfo($this->getHost());
  227. return (isset($path_parts['extension']) ? $path_parts['extension'] : false);
  228. }
  229. /**
  230. * Accessor for port number.
  231. * @return integer TCP/IP port number.
  232. * @access public
  233. */
  234. function getPort() {
  235. return $this->_port;
  236. }
  237. /**
  238. * Accessor for path.
  239. * @return string Full path including leading slash if implied.
  240. * @access public
  241. */
  242. function getPath() {
  243. if (! $this->_path && $this->_host) {
  244. return '/';
  245. }
  246. return $this->_path;
  247. }
  248. /**
  249. * Accessor for page if any. This may be a
  250. * directory name if ambiguious.
  251. * @return Page name.
  252. * @access public
  253. */
  254. function getPage() {
  255. if (! preg_match('/([^\/]*?)$/', $this->getPath(), $matches)) {
  256. return false;
  257. }
  258. return $matches[1];
  259. }
  260. /**
  261. * Gets the path to the page.
  262. * @return string Path less the page.
  263. * @access public
  264. */
  265. function getBasePath() {
  266. if (! preg_match('/(.*\/)[^\/]*?$/', $this->getPath(), $matches)) {
  267. return false;
  268. }
  269. return $matches[1];
  270. }
  271. /**
  272. * Accessor for fragment at end of URL after the "#".
  273. * @return string Part after "#".
  274. * @access public
  275. */
  276. function getFragment() {
  277. return $this->_fragment;
  278. }
  279. /**
  280. * Sets image coordinates. Set to false to clear
  281. * them.
  282. * @param integer $x Horizontal position.
  283. * @param integer $y Vertical position.
  284. * @access public
  285. */
  286. function setCoordinates($x = false, $y = false) {
  287. if (($x === false) || ($y === false)) {
  288. $this->_x = $this->_y = false;
  289. return;
  290. }
  291. $this->_x = (integer)$x;
  292. $this->_y = (integer)$y;
  293. }
  294. /**
  295. * Accessor for horizontal image coordinate.
  296. * @return integer X value.
  297. * @access public
  298. */
  299. function getX() {
  300. return $this->_x;
  301. }
  302. /**
  303. * Accessor for vertical image coordinate.
  304. * @return integer Y value.
  305. * @access public
  306. */
  307. function getY() {
  308. return $this->_y;
  309. }
  310. /**
  311. * Accessor for current request parameters
  312. * in URL string form. Will return teh original request
  313. * if at all possible even if it doesn't make much
  314. * sense.
  315. * @return string Form is string "?a=1&b=2", etc.
  316. * @access public
  317. */
  318. function getEncodedRequest() {
  319. if ($this->_raw) {
  320. $encoded = $this->_raw;
  321. } else {
  322. $encoded = $this->_request->asUrlRequest();
  323. }
  324. if ($encoded) {
  325. return '?' . preg_replace('/^\?/', '', $encoded);
  326. }
  327. return '';
  328. }
  329. /**
  330. * Adds an additional parameter to the request.
  331. * @param string $key Name of parameter.
  332. * @param string $value Value as string.
  333. * @access public
  334. */
  335. function addRequestParameter($key, $value) {
  336. $this->_raw = false;
  337. $this->_request->add($key, $value);
  338. }
  339. /**
  340. * Adds additional parameters to the request.
  341. * @param hash/SimpleFormEncoding $parameters Additional
  342. * parameters.
  343. * @access public
  344. */
  345. function addRequestParameters($parameters) {
  346. $this->_raw = false;
  347. $this->_request->merge($parameters);
  348. }
  349. /**
  350. * Clears down all parameters.
  351. * @access public
  352. */
  353. function clearRequest() {
  354. $this->_raw = false;
  355. $this->_request = new SimpleGetEncoding();
  356. }
  357. /**
  358. * Gets the frame target if present. Although
  359. * not strictly part of the URL specification it
  360. * acts as similarily to the browser.
  361. * @return boolean/string Frame name or false if none.
  362. * @access public
  363. */
  364. function getTarget() {
  365. return $this->_target;
  366. }
  367. /**
  368. * Attaches a frame target.
  369. * @param string $frame Name of frame.
  370. * @access public
  371. */
  372. function setTarget($frame) {
  373. $this->_raw = false;
  374. $this->_target = $frame;
  375. }
  376. /**
  377. * Renders the URL back into a string.
  378. * @return string URL in canonical form.
  379. * @access public
  380. */
  381. function asString() {
  382. $path = $this->_path;
  383. $scheme = $identity = $host = $encoded = $fragment = '';
  384. if ($this->_username && $this->_password) {
  385. $identity = $this->_username . ':' . $this->_password . '@';
  386. }
  387. if ($this->getHost()) {
  388. $scheme = $this->getScheme() ? $this->getScheme() : 'http';
  389. $scheme .= "://";
  390. $host = $this->getHost();
  391. }
  392. if (substr($this->_path, 0, 1) == '/') {
  393. $path = $this->normalisePath($this->_path);
  394. }
  395. $encoded = $this->getEncodedRequest();
  396. $fragment = $this->getFragment() ? '#'. $this->getFragment() : '';
  397. $coords = $this->getX() === false ? '' : '?' . $this->getX() . ',' . $this->getY();
  398. return "$scheme$identity$host$path$encoded$fragment$coords";
  399. }
  400. /**
  401. * Replaces unknown sections to turn a relative
  402. * URL into an absolute one. The base URL can
  403. * be either a string or a SimpleUrl object.
  404. * @param string/SimpleUrl $base Base URL.
  405. * @access public
  406. */
  407. function makeAbsolute($base) {
  408. if (! is_object($base)) {
  409. $base = new SimpleUrl($base);
  410. }
  411. if ($this->getHost()) {
  412. $scheme = $this->getScheme();
  413. $host = $this->getHost();
  414. $port = $this->getPort() ? ':' . $this->getPort() : '';
  415. $identity = $this->getIdentity() ? $this->getIdentity() . '@' : '';
  416. if (! $identity) {
  417. $identity = $base->getIdentity() ? $base->getIdentity() . '@' : '';
  418. }
  419. } else {
  420. $scheme = $base->getScheme();
  421. $host = $base->getHost();
  422. $port = $base->getPort() ? ':' . $base->getPort() : '';
  423. $identity = $base->getIdentity() ? $base->getIdentity() . '@' : '';
  424. }
  425. $path = $this->normalisePath($this->_extractAbsolutePath($base));
  426. $encoded = $this->getEncodedRequest();
  427. $fragment = $this->getFragment() ? '#'. $this->getFragment() : '';
  428. $coords = $this->getX() === false ? '' : '?' . $this->getX() . ',' . $this->getY();
  429. return new SimpleUrl("$scheme://$identity$host$port$path$encoded$fragment$coords");
  430. }
  431. /**
  432. * Replaces unknown sections of the path with base parts
  433. * to return a complete absolute one.
  434. * @param string/SimpleUrl $base Base URL.
  435. * @param string Absolute path.
  436. * @access private
  437. */
  438. function _extractAbsolutePath($base) {
  439. if ($this->getHost()) {
  440. return $this->_path;
  441. }
  442. if (! $this->_isRelativePath($this->_path)) {
  443. return $this->_path;
  444. }
  445. if ($this->_path) {
  446. return $base->getBasePath() . $this->_path;
  447. }
  448. return $base->getPath();
  449. }
  450. /**
  451. * Simple test to see if a path part is relative.
  452. * @param string $path Path to test.
  453. * @return boolean True if starts with a "/".
  454. * @access private
  455. */
  456. function _isRelativePath($path) {
  457. return (substr($path, 0, 1) != '/');
  458. }
  459. /**
  460. * Extracts the username and password for use in rendering
  461. * a URL.
  462. * @return string/boolean Form of username:password or false.
  463. * @access public
  464. */
  465. function getIdentity() {
  466. if ($this->_username && $this->_password) {
  467. return $this->_username . ':' . $this->_password;
  468. }
  469. return false;
  470. }
  471. /**
  472. * Replaces . and .. sections of the path.
  473. * @param string $path Unoptimised path.
  474. * @return string Path with dots removed if possible.
  475. * @access public
  476. */
  477. function normalisePath($path) {
  478. $path = preg_replace('|/\./|', '/', $path);
  479. return preg_replace('|/[^/]+/\.\./|', '/', $path);
  480. }
  481. /**
  482. * A pipe seperated list of all TLDs that result in two part
  483. * domain names.
  484. * @return string Pipe separated list.
  485. * @access public
  486. * @static
  487. */
  488. function getAllTopLevelDomains() {
  489. return 'com|edu|net|org|gov|mil|int|biz|info|name|pro|aero|coop|museum';
  490. }
  491. }
  492. ?>