PageRenderTime 52ms CodeModel.GetById 19ms RepoModel.GetById 1ms app.codeStats 0ms

/core/src/main/php/peer/URL.class.php

http://github.com/xp-framework/xp-framework
PHP | 691 lines | 345 code | 66 blank | 280 comment | 55 complexity | 601a99cc49c6b332577cf636b7a13334 MD5 | raw file
Possible License(s): BSD-3-Clause
  1. <?php
  2. /* This class is part of the XP framework
  3. *
  4. * $Id$
  5. */
  6. /**
  7. * Represents a Uniform Resource Locator
  8. *
  9. * Warning:
  10. * This class does not validate the URL, it simply tries its best
  11. * in parsing it.
  12. *
  13. * Usage example:
  14. * <code>
  15. * $u= new URL('http://user:pass@foo.bar:8081/news/1,2,6100.html?a=a#frag');
  16. * echo $u->toString();
  17. * </code>
  18. *
  19. * @test xp://net.xp_framework.unittest.peer.URLTest
  20. * @see php://parse_url
  21. */
  22. class URL extends Object {
  23. protected static
  24. $defaultPorts= array(
  25. 'http' => 80,
  26. 'https'=> 443
  27. );
  28. public $_info= array();
  29. /**
  30. * Constructor
  31. *
  32. * @param string str
  33. * @throws lang.FormatException if string is unparseable
  34. */
  35. public function __construct($str= NULL) {
  36. if (NULL !== $str) $this->setURL($str);
  37. }
  38. /**
  39. * Helper to create a string representation. Used by toString() and getURL().
  40. *
  41. * @param var $pass A function to represent the password
  42. * @return string
  43. */
  44. protected function asString($pass) {
  45. $str= $this->_info['scheme'].'://';
  46. if (isset($this->_info['user'])) $str.= sprintf(
  47. '%s%s@',
  48. rawurlencode($this->_info['user']),
  49. (isset($this->_info['pass']) ? ':'.$pass($this->_info['pass']) : '')
  50. );
  51. $str.= $this->_info['host'];
  52. isset($this->_info['port']) && $str.= ':'.$this->_info['port'];
  53. isset($this->_info['path']) && $str.= $this->_info['path'];
  54. if ($this->_info['params']) {
  55. $str.= '?'.$this->getQuery();
  56. }
  57. isset($this->_info['fragment']) && $str.= '#'.$this->_info['fragment'];
  58. return $str;
  59. }
  60. /**
  61. * Creates a string representation of this URL
  62. *
  63. * @return string
  64. */
  65. public function toString() {
  66. return $this->asString(function($pass) { return '********'; });
  67. }
  68. /**
  69. * Retrieve scheme
  70. *
  71. * @param var default default NULL
  72. * @return string scheme or default if none is set
  73. */
  74. public function getScheme($default= NULL) {
  75. return isset($this->_info['scheme']) ? $this->_info['scheme'] : $default;
  76. }
  77. /**
  78. * Set scheme
  79. *
  80. * @param string scheme
  81. * @return peer.URL this object
  82. */
  83. public function setScheme($scheme) {
  84. $this->_info['scheme']= $scheme;
  85. unset($this->_info['url']);
  86. return $this;
  87. }
  88. /**
  89. * Retrieve host
  90. *
  91. * @param var default default NULL
  92. * @return string host or default if none is set
  93. */
  94. public function getHost($default= NULL) {
  95. return isset($this->_info['host']) ? $this->_info['host'] : $default;
  96. }
  97. /**
  98. * Set host
  99. *
  100. * @param string host
  101. * @return peer.URL this object
  102. */
  103. public function setHost($host) {
  104. $this->_info['host']= $host;
  105. unset($this->_info['url']);
  106. return $this;
  107. }
  108. /**
  109. * Retrieve path
  110. *
  111. * @param var default default NULL
  112. * @return string path or default if none is set
  113. */
  114. public function getPath($default= NULL) {
  115. return isset($this->_info['path']) ? $this->_info['path'] : $default;
  116. }
  117. /**
  118. * Set path
  119. *
  120. * @param string path
  121. * @return peer.URL this object
  122. */
  123. public function setPath($path) {
  124. $this->_info['path']= $path;
  125. unset($this->_info['url']);
  126. return $this;
  127. }
  128. /**
  129. * Retrieve user
  130. *
  131. * @param var default default NULL
  132. * @return string user or default if none is set
  133. */
  134. public function getUser($default= NULL) {
  135. return isset($this->_info['user']) ? $this->_info['user'] : $default;
  136. }
  137. /**
  138. * Set user
  139. *
  140. * @param string user
  141. * @return peer.URL this object
  142. */
  143. public function setUser($user) {
  144. $this->_info['user']= $user;
  145. unset($this->_info['url']);
  146. return $this;
  147. }
  148. /**
  149. * Retrieve password
  150. *
  151. * @param var default default NULL
  152. * @return string password or default if none is set
  153. */
  154. public function getPassword($default= NULL) {
  155. return isset($this->_info['pass']) ? $this->_info['pass'] : $default;
  156. }
  157. /**
  158. * Set password
  159. *
  160. * @param string password
  161. * @return peer.URL this object
  162. */
  163. public function setPassword($password) {
  164. $this->_info['pass']= $password;
  165. unset($this->_info['url']);
  166. return $this;
  167. }
  168. /**
  169. * Calculates query string
  170. *
  171. * @param string key
  172. * @param var value
  173. * @param string prefix The postfix to use for each variable (defaults to '')
  174. * @return string
  175. */
  176. protected function buildQuery($key, $value, $postfix= '') {
  177. $query= '';
  178. if (is_array($value)) {
  179. if (is_int(key($value))) {
  180. foreach ($value as $i => $v) {
  181. $query.= $this->buildQuery(NULL, $v, $postfix.$key.'[]');
  182. }
  183. } else {
  184. foreach ($value as $k => $v) {
  185. $query.= $this->buildQuery(NULL, $v, $postfix.$key.'['.$k.']');
  186. }
  187. }
  188. } else if ('' === $value) {
  189. $query.= '&'.urlencode($key).$postfix;
  190. } else {
  191. $query.= '&'.urlencode($key).$postfix.'='.urlencode($value);
  192. }
  193. return $query;
  194. }
  195. /**
  196. * Parses a query string. Replaces builtin string parsing as that
  197. * breaks (by design) on query parameters with dots inside, e.g.
  198. *
  199. * @see php://parse_str
  200. * @param string query
  201. * @return [:var] parsed parameters
  202. */
  203. protected function parseQuery($query) {
  204. if ('' === $query) return array();
  205. $params= array();
  206. foreach (explode('&', $query) as $pair) {
  207. $key= $value= NULL;
  208. sscanf($pair, "%[^=]=%[^\r]", $key, $value);
  209. $key= urldecode($key);
  210. if (substr_count($key, '[') !== substr_count($key, ']')) {
  211. throw new FormatException('Unbalanced [] in query string');
  212. }
  213. if ($start= strpos($key, '[')) { // Array notation
  214. $base= substr($key, 0, $start);
  215. if (!isset($params[$base]) || !is_array($params[$base])) {
  216. $params[$base]= array();
  217. }
  218. $ptr= &$params[$base];
  219. $offset= 0;
  220. do {
  221. $end= strpos($key, ']', $offset);
  222. if ($start === $end- 1) {
  223. $ptr= &$ptr[];
  224. } else {
  225. $end+= substr_count($key, '[', $start+ 1, $end- $start- 1);
  226. $ptr= &$ptr[substr($key, $start+ 1, $end- $start- 1)];
  227. }
  228. $offset= $end+ 1;
  229. } while ($start= strpos($key, '[', $offset));
  230. $ptr= urldecode($value);
  231. } else {
  232. $params[$key]= urldecode($value);
  233. }
  234. }
  235. return $params;
  236. }
  237. /**
  238. * Retrieve query
  239. *
  240. * @param var default default NULL
  241. * @return string query or default if none is set
  242. */
  243. public function getQuery($default= NULL) {
  244. if (!$this->_info['params']) return $default;
  245. $query= '';
  246. foreach ($this->_info['params'] as $key => $value) {
  247. $query.= $this->buildQuery($key, $value);
  248. }
  249. return substr($query, 1);
  250. }
  251. /**
  252. * Set query
  253. *
  254. * @param string query
  255. * @return peer.URL this object
  256. */
  257. public function setQuery($query) {
  258. $this->_info['params']= $this->parseQuery((string)$query);
  259. unset($this->_info['url']);
  260. return $this;
  261. }
  262. /**
  263. * Retrieve fragment
  264. *
  265. * @param var default default NULL
  266. * @return string fragment or default if none is set
  267. */
  268. public function getFragment($default= NULL) {
  269. return isset($this->_info['fragment']) ? $this->_info['fragment'] : $default;
  270. }
  271. /**
  272. * Set fragment
  273. *
  274. * @param string fragment
  275. * @return peer.URL this object
  276. */
  277. public function setFragment($fragment) {
  278. $this->_info['fragment']= $fragment;
  279. unset($this->_info['url']);
  280. return $this;
  281. }
  282. /**
  283. * Retrieve port
  284. *
  285. * @param var default default NULL
  286. * @return int port or default if none is set
  287. */
  288. public function getPort($default= NULL) {
  289. return isset($this->_info['port']) ? $this->_info['port'] : $default;
  290. }
  291. /**
  292. * Set port
  293. *
  294. * @param int port
  295. * @return peer.URL this object
  296. */
  297. public function setPort($port) {
  298. $this->_info['port']= $port;
  299. unset($this->_info['url']);
  300. return $this;
  301. }
  302. /**
  303. * Retrieve parameter by a specified name
  304. *
  305. * @param string name
  306. * @param var default default NULL
  307. * @return string url-decoded parameter value or default if none is set
  308. */
  309. public function getParam($name, $default= NULL) {
  310. return isset($this->_info['params'][$name]) ? $this->_info['params'][$name] : $default;
  311. }
  312. /**
  313. * Retrieve parameters
  314. *
  315. * @return array params
  316. */
  317. public function getParams() {
  318. return $this->_info['params'];
  319. }
  320. /**
  321. * Set a parameter
  322. *
  323. * @param string key
  324. * @param var value either a string or a string[]
  325. * @return peer.URL this object
  326. */
  327. public function setParam($key, $value= '') {
  328. $this->_info['params'][$key]= $value;
  329. unset($this->_info['url']);
  330. return $this;
  331. }
  332. /**
  333. * Set parameters
  334. *
  335. * @param array<string, var> hash parameters
  336. * @return peer.URL this object
  337. */
  338. public function setParams($hash) {
  339. foreach ($hash as $key => $value) {
  340. $this->setParam($key, $value);
  341. }
  342. unset($this->_info['url']);
  343. return $this;
  344. }
  345. /**
  346. * Add a parameter
  347. *
  348. * @param string key
  349. * @param var value either a string or a string[]
  350. * @return peer.URL this object
  351. */
  352. public function addParam($key, $value= '') {
  353. if (isset($this->_info['params'][$key])) {
  354. throw new IllegalArgumentException('A parameter named "'.$key.'" already exists');
  355. }
  356. $this->_info['params'][$key]= $value;
  357. unset($this->_info['url']);
  358. return $this;
  359. }
  360. /**
  361. * Add parameters from an associative array. The key is taken as
  362. * parameter name and the value as parameter value.
  363. *
  364. * @param array<string, var> hash parameters
  365. * @return peer.URL this object
  366. */
  367. public function addParams($hash) {
  368. $params= $this->_info['params'];
  369. try {
  370. foreach ($hash as $key => $value) {
  371. $this->addParam($key, $value);
  372. }
  373. } catch (IllegalArgumentException $e) {
  374. $this->_info['params']= $params;
  375. throw $e;
  376. }
  377. unset($this->_info['url']);
  378. return $this;
  379. }
  380. /**
  381. * Remove a parameter
  382. *
  383. * @param string key
  384. * @return peer.URL this object
  385. */
  386. public function removeParam($key) {
  387. unset($this->_info['params'][$key]);
  388. unset($this->_info['url']);
  389. return $this;
  390. }
  391. /**
  392. * Retrieve whether a parameter with a given name exists
  393. *
  394. * @param string name
  395. * @return bool
  396. */
  397. public function hasParam($name) {
  398. return isset($this->_info['params'][$name]);
  399. }
  400. /**
  401. * Retrieve whether parameters exist
  402. *
  403. * @return bool
  404. */
  405. public function hasParams() {
  406. return !empty($this->_info['params']);
  407. }
  408. /**
  409. * Get full URL
  410. *
  411. * @return string
  412. */
  413. public function getURL() {
  414. if (!isset($this->_info['url'])) {
  415. $this->_info['url']= $this->asString(function($pass) { return rawurlencode($pass); });
  416. }
  417. return $this->_info['url'];
  418. }
  419. /**
  420. * Set full URL
  421. *
  422. * @param string str URL
  423. * @throws lang.FormatException if string is unparseable
  424. */
  425. public function setURL($str) {
  426. if (!preg_match('!^([a-z][a-z0-9\+]*)://([^@/?#]+@)?([^/?#]*)(/([^#?]*))?(.*)$!', $str, $matches)) {
  427. throw new FormatException('Cannot parse "'.$str.'"');
  428. }
  429. $this->_info= array();
  430. $this->_info['scheme']= $matches[1];
  431. // Credentials
  432. if ('' !== $matches[2]) {
  433. sscanf($matches[2], '%[^:@]:%[^@]@', $user, $password);
  434. $this->_info['user']= rawurldecode($user);
  435. $this->_info['pass']= NULL === $password ? NULL : rawurldecode($password);
  436. } else {
  437. $this->_info['user']= NULL;
  438. $this->_info['pass']= NULL;
  439. }
  440. // Host and port, optionally
  441. if ('' === $matches[3] && '' !== $matches[4]) {
  442. $this->_info['host']= NULL;
  443. } else {
  444. if (!preg_match('!^([a-zA-Z0-9\.-]+|\[[^\]]+\])(:([0-9]+))?$!', $matches[3], $host)) {
  445. throw new FormatException('Cannot parse "'.$str.'": Host and/or port malformed');
  446. }
  447. $this->_info['host']= $host[1];
  448. $this->_info['port']= isset($host[2]) ? (int)$host[3] : NULL;
  449. }
  450. // Path
  451. if ('' === $matches[4]) {
  452. $this->_info['path']= NULL;
  453. } else if (strlen($matches[4]) > 3 && (':' === $matches[4]{2} || '|' === $matches[4]{2})) {
  454. $this->_info['path']= $matches[4]{1}.':'.substr($matches[4], 3);
  455. } else {
  456. $this->_info['path']= $matches[4];
  457. }
  458. // Query string and fragment
  459. if ('' === $matches[6] || '?' === $matches[6] || '#' === $matches[6]) {
  460. $this->_info['params']= array();
  461. $this->_info['fragment']= NULL;
  462. } else if ('#' === $matches[6]{0}) {
  463. $this->_info['params']= array();
  464. $this->_info['fragment']= substr($matches[6], 1);
  465. } else if ('?' === $matches[6]{0}) {
  466. $p= strcspn($matches[6], '#');
  467. $this->_info['params']= $this->parseQuery(substr($matches[6], 1, $p- 1));
  468. $this->_info['fragment']= $p >= strlen($matches[6])- 1 ? NULL : substr($matches[6], $p+ 1);
  469. }
  470. }
  471. /**
  472. * Returns a hashcode for this URL
  473. *
  474. * @return string
  475. */
  476. public function hashCode() {
  477. return md5($this->getURL());
  478. }
  479. /**
  480. * Returns whether a given object is equal to this.
  481. *
  482. * @param lang.Object cmp
  483. * @return bool
  484. */
  485. public function equals($cmp) {
  486. return $cmp instanceof self && $this->getURL() === $cmp->getURL();
  487. }
  488. /**
  489. * Capitalize letters in escape sequence
  490. *
  491. * @param string string
  492. * @return string
  493. */
  494. protected function capitalizeLettersInEscapeSequence($string) {
  495. return preg_replace_callback('/%[\w]{2}/',
  496. create_function('$matches','return strtoupper($matches[0]);'),
  497. $string
  498. );
  499. }
  500. /**
  501. * Decode percent encoded octets
  502. *
  503. * @see http://www.apps.ietf.org/rfc/rfc3986.html#sec-2.3
  504. * @param string string
  505. * @return string
  506. */
  507. protected function decodePercentEncodedOctets($string) {
  508. $unreserved = array();
  509. for($octet= 65; $octet <= 90; $octet++) {
  510. $unreserved[]= dechex($octet);
  511. }
  512. for($octet= 97; $octet<=122; $octet++) {
  513. $unreserved[]= dechex($octet);
  514. }
  515. for($octet= 48; $octet<=57; $octet++) {
  516. $unreserved[]= dechex($octet);
  517. }
  518. $unreserved[]= dechex(ord( '-' ));
  519. $unreserved[]= dechex(ord( '.' ));
  520. $unreserved[]= dechex(ord( '_' ));
  521. $unreserved[]= dechex(ord( '~' ));
  522. return preg_replace_callback(
  523. array_map(create_function('$str', 'return "/%".strtoupper($str)."/x";'), $unreserved),
  524. create_function('$matches', 'return chr(hexdec($matches[0]));'),
  525. $string
  526. );
  527. }
  528. /**
  529. * Remove dot segments
  530. *
  531. * @see http://www.apps.ietf.org/rfc/rfc3986.html#sec-5.2.4
  532. * @param string string
  533. * @return string
  534. */
  535. protected function removeDotSegments($path){
  536. $cleanPath = '';
  537. // A. If the input begins with a prefix of "../" or "./"
  538. $patterns['stepA'] = '!^(\.\./|\./)!';
  539. // B1. If the input begins with a prefix of "/./"
  540. $patterns['stepB1'] = '!^(/\./)!';
  541. // B2. If the input begins with a prefix of "/."
  542. $patterns['stepB2'] = '!^(/\.)$!';
  543. // C. If the input begins with a prefix of "/../" or "/.."
  544. $patterns['stepC'] = '!^(/\.\./|/\.\.)!';
  545. // D. If the input consists only of "." or ".."
  546. $patterns['stepD'] = '!^(\.|\.\.)$!';
  547. // E. Move the first path segment in the input to the end of the output
  548. $patterns['stepE'] = '!(/*[^/]*)!';
  549. while(!empty($path)) {
  550. switch (TRUE) {
  551. case preg_match($patterns['stepA'], $path):
  552. $path= preg_replace($patterns['stepA'], '', $path);
  553. break;
  554. case preg_match($patterns['stepB1'], $path, $matches):
  555. case preg_match($patterns['stepB2'], $path, $matches):
  556. $path= preg_replace('!^'.$matches[1].'!', '/', $path);
  557. break;
  558. case preg_match($patterns['stepC'], $path, $matches):
  559. $path= preg_replace('!^'.preg_quote($matches[1], '!').'!', '/', $path);
  560. $cleanPath= preg_replace('!/([^/]+)$!', '', $cleanPath);
  561. break;
  562. case preg_match($patterns['stepD'], $path):
  563. $path= preg_replace($patterns['stepD'], '', $path);
  564. break;
  565. case preg_match($patterns['stepE'], $path, $matches):
  566. $path= preg_replace('/^'.preg_quote($matches[1], '/').'/', '', $path, 1);
  567. $cleanPath.= $matches[1];
  568. break;
  569. }
  570. }
  571. return $cleanPath;
  572. }
  573. /**
  574. * Check if current port is the default one for this scheme
  575. *
  576. * @see http://www.apps.ietf.org/rfc/rfc3986.html#sec-5.2.4
  577. * @param string scheme
  578. * @param string port
  579. * @return bool
  580. */
  581. protected function isDefaultPort($scheme, $port) {
  582. return (array_key_exists($scheme, self::$defaultPorts) && $port==self::$defaultPorts[$scheme]);
  583. }
  584. /**
  585. * Get standard URL
  586. *
  587. * @see http://tools.ietf.org/html/rfc3986#page-38
  588. * @return string
  589. */
  590. public function getCanonicalURL() {
  591. sscanf($this->_info['scheme'], '%[^+]', $scheme);
  592. // Convert the scheme to lower case
  593. $url= strtolower($scheme).'://';
  594. // Convert the host to lower case
  595. $url.= strtolower($this->_info['host']);
  596. // Add port if exist and is not the default one for this scheme
  597. if (isset($this->_info['port']) && !$this->isDefaultPort($scheme, $this->_info['port']))
  598. $url.= ':'.$this->_info['port'];
  599. // Adding trailing /
  600. $url.= '/';
  601. // Capitalize letters in escape sequences &
  602. // Decode percent-encoded octets of unreserved characters &
  603. // Remove dot-segments
  604. if (isset($this->_info['path'])) {
  605. $path= $this->capitalizeLettersInEscapeSequence($this->_info['path']);
  606. $path= $this->decodePercentEncodedOctets($path);
  607. $path= $this->removeDotSegments($path);
  608. $url.= (strstr($path, '/')!==0) ? substr($path, 1) : $path;
  609. }
  610. // Same steps as for path
  611. if ($this->_info['params']) {
  612. $query= $this->capitalizeLettersInEscapeSequence($this->getQuery());
  613. $query= $this->decodePercentEncodedOctets($query);
  614. $url.= '?'.$this->removeDotSegments($query);
  615. }
  616. // Same steps as for path
  617. if (isset($this->_info['fragment'])) {
  618. $fragment= $this->capitalizeLettersInEscapeSequence($this->_info['fragment']);
  619. $fragment= $this->decodePercentEncodedOctets($fragment);
  620. $url.= '#'.$this->removeDotSegments($fragment);
  621. }
  622. return $url;
  623. }
  624. }
  625. ?>