PageRenderTime 56ms CodeModel.GetById 7ms RepoModel.GetById 0ms app.codeStats 1ms

/modules/requests/Requests/IRI.php

https://gitlab.com/x33n/ampache
PHP | 1220 lines | 833 code | 78 blank | 309 comment | 154 complexity | 87db793de10599ee1affeb34dc83dc37 MD5 | raw file
  1. <?php
  2. /**
  3. * IRI parser/serialiser/normaliser
  4. *
  5. * @package Requests
  6. * @subpackage Utilities
  7. */
  8. /**
  9. * IRI parser/serialiser/normaliser
  10. *
  11. * Copyright (c) 2007-2010, Geoffrey Sneddon and Steve Minutillo.
  12. * All rights reserved.
  13. *
  14. * Redistribution and use in source and binary forms, with or without
  15. * modification, are permitted provided that the following conditions are met:
  16. *
  17. * * Redistributions of source code must retain the above copyright notice,
  18. * this list of conditions and the following disclaimer.
  19. *
  20. * * Redistributions in binary form must reproduce the above copyright notice,
  21. * this list of conditions and the following disclaimer in the documentation
  22. * and/or other materials provided with the distribution.
  23. *
  24. * * Neither the name of the SimplePie Team nor the names of its contributors
  25. * may be used to endorse or promote products derived from this software
  26. * without specific prior written permission.
  27. *
  28. * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
  29. * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  30. * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  31. * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDERS AND CONTRIBUTORS BE
  32. * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
  33. * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
  34. * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
  35. * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
  36. * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
  37. * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
  38. * POSSIBILITY OF SUCH DAMAGE.
  39. *
  40. * @package Requests
  41. * @subpackage Utilities
  42. * @author Geoffrey Sneddon
  43. * @author Steve Minutillo
  44. * @copyright 2007-2009 Geoffrey Sneddon and Steve Minutillo
  45. * @license http://www.opensource.org/licenses/bsd-license.php
  46. * @link http://hg.gsnedders.com/iri/
  47. */
  48. class Requests_IRI
  49. {
  50. /**
  51. * Scheme
  52. *
  53. * @var string
  54. */
  55. protected $scheme = null;
  56. /**
  57. * User Information
  58. *
  59. * @var string
  60. */
  61. protected $iuserinfo = null;
  62. /**
  63. * ihost
  64. *
  65. * @var string
  66. */
  67. protected $ihost = null;
  68. /**
  69. * Port
  70. *
  71. * @var string
  72. */
  73. protected $port = null;
  74. /**
  75. * ipath
  76. *
  77. * @var string
  78. */
  79. protected $ipath = '';
  80. /**
  81. * iquery
  82. *
  83. * @var string
  84. */
  85. protected $iquery = null;
  86. /**
  87. * ifragment
  88. *
  89. * @var string
  90. */
  91. protected $ifragment = null;
  92. /**
  93. * Normalization database
  94. *
  95. * Each key is the scheme, each value is an array with each key as the IRI
  96. * part and value as the default value for that part.
  97. */
  98. protected $normalization = array(
  99. 'acap' => array(
  100. 'port' => 674
  101. ),
  102. 'dict' => array(
  103. 'port' => 2628
  104. ),
  105. 'file' => array(
  106. 'ihost' => 'localhost'
  107. ),
  108. 'http' => array(
  109. 'port' => 80,
  110. 'ipath' => '/'
  111. ),
  112. 'https' => array(
  113. 'port' => 443,
  114. 'ipath' => '/'
  115. ),
  116. );
  117. /**
  118. * Return the entire IRI when you try and read the object as a string
  119. *
  120. * @return string
  121. */
  122. public function __toString()
  123. {
  124. return $this->get_iri();
  125. }
  126. /**
  127. * Overload __set() to provide access via properties
  128. *
  129. * @param string $name Property name
  130. * @param mixed $value Property value
  131. */
  132. public function __set($name, $value)
  133. {
  134. if (method_exists($this, 'set_' . $name))
  135. {
  136. call_user_func(array($this, 'set_' . $name), $value);
  137. }
  138. elseif (
  139. $name === 'iauthority'
  140. || $name === 'iuserinfo'
  141. || $name === 'ihost'
  142. || $name === 'ipath'
  143. || $name === 'iquery'
  144. || $name === 'ifragment'
  145. )
  146. {
  147. call_user_func(array($this, 'set_' . substr($name, 1)), $value);
  148. }
  149. }
  150. /**
  151. * Overload __get() to provide access via properties
  152. *
  153. * @param string $name Property name
  154. * @return mixed
  155. */
  156. public function __get($name)
  157. {
  158. // isset() returns false for null, we don't want to do that
  159. // Also why we use array_key_exists below instead of isset()
  160. $props = get_object_vars($this);
  161. if (
  162. $name === 'iri' ||
  163. $name === 'uri' ||
  164. $name === 'iauthority' ||
  165. $name === 'authority'
  166. )
  167. {
  168. $return = $this->{"get_$name"}();
  169. }
  170. elseif (array_key_exists($name, $props))
  171. {
  172. $return = $this->$name;
  173. }
  174. // host -> ihost
  175. elseif (($prop = 'i' . $name) && array_key_exists($prop, $props))
  176. {
  177. $name = $prop;
  178. $return = $this->$prop;
  179. }
  180. // ischeme -> scheme
  181. elseif (($prop = substr($name, 1)) && array_key_exists($prop, $props))
  182. {
  183. $name = $prop;
  184. $return = $this->$prop;
  185. }
  186. else
  187. {
  188. trigger_error('Undefined property: ' . get_class($this) . '::' . $name, E_USER_NOTICE);
  189. $return = null;
  190. }
  191. if ($return === null && isset($this->normalization[$this->scheme][$name]))
  192. {
  193. return $this->normalization[$this->scheme][$name];
  194. }
  195. else
  196. {
  197. return $return;
  198. }
  199. }
  200. /**
  201. * Overload __isset() to provide access via properties
  202. *
  203. * @param string $name Property name
  204. * @return bool
  205. */
  206. public function __isset($name)
  207. {
  208. if (method_exists($this, 'get_' . $name) || isset($this->$name))
  209. {
  210. return true;
  211. }
  212. else
  213. {
  214. return false;
  215. }
  216. }
  217. /**
  218. * Overload __unset() to provide access via properties
  219. *
  220. * @param string $name Property name
  221. */
  222. public function __unset($name)
  223. {
  224. if (method_exists($this, 'set_' . $name))
  225. {
  226. call_user_func(array($this, 'set_' . $name), '');
  227. }
  228. }
  229. /**
  230. * Create a new IRI object, from a specified string
  231. *
  232. * @param string $iri
  233. */
  234. public function __construct($iri = null)
  235. {
  236. $this->set_iri($iri);
  237. }
  238. /**
  239. * Create a new IRI object by resolving a relative IRI
  240. *
  241. * Returns false if $base is not absolute, otherwise an IRI.
  242. *
  243. * @param IRI|string $base (Absolute) Base IRI
  244. * @param IRI|string $relative Relative IRI
  245. * @return IRI|false
  246. */
  247. public static function absolutize($base, $relative)
  248. {
  249. if (!($relative instanceof Requests_IRI))
  250. {
  251. $relative = new Requests_IRI($relative);
  252. }
  253. if (!$relative->is_valid())
  254. {
  255. return false;
  256. }
  257. elseif ($relative->scheme !== null)
  258. {
  259. return clone $relative;
  260. }
  261. else
  262. {
  263. if (!($base instanceof Requests_IRI))
  264. {
  265. $base = new Requests_IRI($base);
  266. }
  267. if ($base->scheme !== null && $base->is_valid())
  268. {
  269. if ($relative->get_iri() !== '')
  270. {
  271. if ($relative->iuserinfo !== null || $relative->ihost !== null || $relative->port !== null)
  272. {
  273. $target = clone $relative;
  274. $target->scheme = $base->scheme;
  275. }
  276. else
  277. {
  278. $target = new Requests_IRI;
  279. $target->scheme = $base->scheme;
  280. $target->iuserinfo = $base->iuserinfo;
  281. $target->ihost = $base->ihost;
  282. $target->port = $base->port;
  283. if ($relative->ipath !== '')
  284. {
  285. if ($relative->ipath[0] === '/')
  286. {
  287. $target->ipath = $relative->ipath;
  288. }
  289. elseif (($base->iuserinfo !== null || $base->ihost !== null || $base->port !== null) && $base->ipath === '')
  290. {
  291. $target->ipath = '/' . $relative->ipath;
  292. }
  293. elseif (($last_segment = strrpos($base->ipath, '/')) !== false)
  294. {
  295. $target->ipath = substr($base->ipath, 0, $last_segment + 1) . $relative->ipath;
  296. }
  297. else
  298. {
  299. $target->ipath = $relative->ipath;
  300. }
  301. $target->ipath = $target->remove_dot_segments($target->ipath);
  302. $target->iquery = $relative->iquery;
  303. }
  304. else
  305. {
  306. $target->ipath = $base->ipath;
  307. if ($relative->iquery !== null)
  308. {
  309. $target->iquery = $relative->iquery;
  310. }
  311. elseif ($base->iquery !== null)
  312. {
  313. $target->iquery = $base->iquery;
  314. }
  315. }
  316. $target->ifragment = $relative->ifragment;
  317. }
  318. }
  319. else
  320. {
  321. $target = clone $base;
  322. $target->ifragment = null;
  323. }
  324. $target->scheme_normalization();
  325. return $target;
  326. }
  327. else
  328. {
  329. return false;
  330. }
  331. }
  332. }
  333. /**
  334. * Parse an IRI into scheme/authority/path/query/fragment segments
  335. *
  336. * @param string $iri
  337. * @return array
  338. */
  339. protected function parse_iri($iri)
  340. {
  341. $iri = trim($iri, "\x20\x09\x0A\x0C\x0D");
  342. if (preg_match('/^((?P<scheme>[^:\/?#]+):)?(\/\/(?P<authority>[^\/?#]*))?(?P<path>[^?#]*)(\?(?P<query>[^#]*))?(#(?P<fragment>.*))?$/', $iri, $match))
  343. {
  344. if ($match[1] === '')
  345. {
  346. $match['scheme'] = null;
  347. }
  348. if (!isset($match[3]) || $match[3] === '')
  349. {
  350. $match['authority'] = null;
  351. }
  352. if (!isset($match[5]))
  353. {
  354. $match['path'] = '';
  355. }
  356. if (!isset($match[6]) || $match[6] === '')
  357. {
  358. $match['query'] = null;
  359. }
  360. if (!isset($match[8]) || $match[8] === '')
  361. {
  362. $match['fragment'] = null;
  363. }
  364. return $match;
  365. }
  366. else
  367. {
  368. trigger_error('This should never happen', E_USER_ERROR);
  369. die;
  370. }
  371. }
  372. /**
  373. * Remove dot segments from a path
  374. *
  375. * @param string $input
  376. * @return string
  377. */
  378. protected function remove_dot_segments($input)
  379. {
  380. $output = '';
  381. while (strpos($input, './') !== false || strpos($input, '/.') !== false || $input === '.' || $input === '..')
  382. {
  383. // A: If the input buffer begins with a prefix of "../" or "./", then remove that prefix from the input buffer; otherwise,
  384. if (strpos($input, '../') === 0)
  385. {
  386. $input = substr($input, 3);
  387. }
  388. elseif (strpos($input, './') === 0)
  389. {
  390. $input = substr($input, 2);
  391. }
  392. // B: if the input buffer begins with a prefix of "/./" or "/.", where "." is a complete path segment, then replace that prefix with "/" in the input buffer; otherwise,
  393. elseif (strpos($input, '/./') === 0)
  394. {
  395. $input = substr($input, 2);
  396. }
  397. elseif ($input === '/.')
  398. {
  399. $input = '/';
  400. }
  401. // C: if the input buffer begins with a prefix of "/../" or "/..", where ".." is a complete path segment, then replace that prefix with "/" in the input buffer and remove the last segment and its preceding "/" (if any) from the output buffer; otherwise,
  402. elseif (strpos($input, '/../') === 0)
  403. {
  404. $input = substr($input, 3);
  405. $output = substr_replace($output, '', strrpos($output, '/'));
  406. }
  407. elseif ($input === '/..')
  408. {
  409. $input = '/';
  410. $output = substr_replace($output, '', strrpos($output, '/'));
  411. }
  412. // D: if the input buffer consists only of "." or "..", then remove that from the input buffer; otherwise,
  413. elseif ($input === '.' || $input === '..')
  414. {
  415. $input = '';
  416. }
  417. // E: move the first path segment in the input buffer to the end of the output buffer, including the initial "/" character (if any) and any subsequent characters up to, but not including, the next "/" character or the end of the input buffer
  418. elseif (($pos = strpos($input, '/', 1)) !== false)
  419. {
  420. $output .= substr($input, 0, $pos);
  421. $input = substr_replace($input, '', 0, $pos);
  422. }
  423. else
  424. {
  425. $output .= $input;
  426. $input = '';
  427. }
  428. }
  429. return $output . $input;
  430. }
  431. /**
  432. * Replace invalid character with percent encoding
  433. *
  434. * @param string $string Input string
  435. * @param string $extra_chars Valid characters not in iunreserved or
  436. * iprivate (this is ASCII-only)
  437. * @param bool $iprivate Allow iprivate
  438. * @return string
  439. */
  440. protected function replace_invalid_with_pct_encoding($string, $extra_chars, $iprivate = false)
  441. {
  442. // Normalize as many pct-encoded sections as possible
  443. $string = preg_replace_callback('/(?:%[A-Fa-f0-9]{2})+/', array(&$this, 'remove_iunreserved_percent_encoded'), $string);
  444. // Replace invalid percent characters
  445. $string = preg_replace('/%(?![A-Fa-f0-9]{2})/', '%25', $string);
  446. // Add unreserved and % to $extra_chars (the latter is safe because all
  447. // pct-encoded sections are now valid).
  448. $extra_chars .= 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-._~%';
  449. // Now replace any bytes that aren't allowed with their pct-encoded versions
  450. $position = 0;
  451. $strlen = strlen($string);
  452. while (($position += strspn($string, $extra_chars, $position)) < $strlen)
  453. {
  454. $value = ord($string[$position]);
  455. // Start position
  456. $start = $position;
  457. // By default we are valid
  458. $valid = true;
  459. // No one byte sequences are valid due to the while.
  460. // Two byte sequence:
  461. if (($value & 0xE0) === 0xC0)
  462. {
  463. $character = ($value & 0x1F) << 6;
  464. $length = 2;
  465. $remaining = 1;
  466. }
  467. // Three byte sequence:
  468. elseif (($value & 0xF0) === 0xE0)
  469. {
  470. $character = ($value & 0x0F) << 12;
  471. $length = 3;
  472. $remaining = 2;
  473. }
  474. // Four byte sequence:
  475. elseif (($value & 0xF8) === 0xF0)
  476. {
  477. $character = ($value & 0x07) << 18;
  478. $length = 4;
  479. $remaining = 3;
  480. }
  481. // Invalid byte:
  482. else
  483. {
  484. $valid = false;
  485. $length = 1;
  486. $remaining = 0;
  487. }
  488. if ($remaining)
  489. {
  490. if ($position + $length <= $strlen)
  491. {
  492. for ($position++; $remaining; $position++)
  493. {
  494. $value = ord($string[$position]);
  495. // Check that the byte is valid, then add it to the character:
  496. if (($value & 0xC0) === 0x80)
  497. {
  498. $character |= ($value & 0x3F) << (--$remaining * 6);
  499. }
  500. // If it is invalid, count the sequence as invalid and reprocess the current byte:
  501. else
  502. {
  503. $valid = false;
  504. $position--;
  505. break;
  506. }
  507. }
  508. }
  509. else
  510. {
  511. $position = $strlen - 1;
  512. $valid = false;
  513. }
  514. }
  515. // Percent encode anything invalid or not in ucschar
  516. if (
  517. // Invalid sequences
  518. !$valid
  519. // Non-shortest form sequences are invalid
  520. || $length > 1 && $character <= 0x7F
  521. || $length > 2 && $character <= 0x7FF
  522. || $length > 3 && $character <= 0xFFFF
  523. // Outside of range of ucschar codepoints
  524. // Noncharacters
  525. || ($character & 0xFFFE) === 0xFFFE
  526. || $character >= 0xFDD0 && $character <= 0xFDEF
  527. || (
  528. // Everything else not in ucschar
  529. $character > 0xD7FF && $character < 0xF900
  530. || $character < 0xA0
  531. || $character > 0xEFFFD
  532. )
  533. && (
  534. // Everything not in iprivate, if it applies
  535. !$iprivate
  536. || $character < 0xE000
  537. || $character > 0x10FFFD
  538. )
  539. )
  540. {
  541. // If we were a character, pretend we weren't, but rather an error.
  542. if ($valid)
  543. $position--;
  544. for ($j = $start; $j <= $position; $j++)
  545. {
  546. $string = substr_replace($string, sprintf('%%%02X', ord($string[$j])), $j, 1);
  547. $j += 2;
  548. $position += 2;
  549. $strlen += 2;
  550. }
  551. }
  552. }
  553. return $string;
  554. }
  555. /**
  556. * Callback function for preg_replace_callback.
  557. *
  558. * Removes sequences of percent encoded bytes that represent UTF-8
  559. * encoded characters in iunreserved
  560. *
  561. * @param array $match PCRE match
  562. * @return string Replacement
  563. */
  564. protected function remove_iunreserved_percent_encoded($match)
  565. {
  566. // As we just have valid percent encoded sequences we can just explode
  567. // and ignore the first member of the returned array (an empty string).
  568. $bytes = explode('%', $match[0]);
  569. // Initialize the new string (this is what will be returned) and that
  570. // there are no bytes remaining in the current sequence (unsurprising
  571. // at the first byte!).
  572. $string = '';
  573. $remaining = 0;
  574. // Loop over each and every byte, and set $value to its value
  575. for ($i = 1, $len = count($bytes); $i < $len; $i++)
  576. {
  577. $value = hexdec($bytes[$i]);
  578. // If we're the first byte of sequence:
  579. if (!$remaining)
  580. {
  581. // Start position
  582. $start = $i;
  583. // By default we are valid
  584. $valid = true;
  585. // One byte sequence:
  586. if ($value <= 0x7F)
  587. {
  588. $character = $value;
  589. $length = 1;
  590. }
  591. // Two byte sequence:
  592. elseif (($value & 0xE0) === 0xC0)
  593. {
  594. $character = ($value & 0x1F) << 6;
  595. $length = 2;
  596. $remaining = 1;
  597. }
  598. // Three byte sequence:
  599. elseif (($value & 0xF0) === 0xE0)
  600. {
  601. $character = ($value & 0x0F) << 12;
  602. $length = 3;
  603. $remaining = 2;
  604. }
  605. // Four byte sequence:
  606. elseif (($value & 0xF8) === 0xF0)
  607. {
  608. $character = ($value & 0x07) << 18;
  609. $length = 4;
  610. $remaining = 3;
  611. }
  612. // Invalid byte:
  613. else
  614. {
  615. $valid = false;
  616. $remaining = 0;
  617. }
  618. }
  619. // Continuation byte:
  620. else
  621. {
  622. // Check that the byte is valid, then add it to the character:
  623. if (($value & 0xC0) === 0x80)
  624. {
  625. $remaining--;
  626. $character |= ($value & 0x3F) << ($remaining * 6);
  627. }
  628. // If it is invalid, count the sequence as invalid and reprocess the current byte as the start of a sequence:
  629. else
  630. {
  631. $valid = false;
  632. $remaining = 0;
  633. $i--;
  634. }
  635. }
  636. // If we've reached the end of the current byte sequence, append it to Unicode::$data
  637. if (!$remaining)
  638. {
  639. // Percent encode anything invalid or not in iunreserved
  640. if (
  641. // Invalid sequences
  642. !$valid
  643. // Non-shortest form sequences are invalid
  644. || $length > 1 && $character <= 0x7F
  645. || $length > 2 && $character <= 0x7FF
  646. || $length > 3 && $character <= 0xFFFF
  647. // Outside of range of iunreserved codepoints
  648. || $character < 0x2D
  649. || $character > 0xEFFFD
  650. // Noncharacters
  651. || ($character & 0xFFFE) === 0xFFFE
  652. || $character >= 0xFDD0 && $character <= 0xFDEF
  653. // Everything else not in iunreserved (this is all BMP)
  654. || $character === 0x2F
  655. || $character > 0x39 && $character < 0x41
  656. || $character > 0x5A && $character < 0x61
  657. || $character > 0x7A && $character < 0x7E
  658. || $character > 0x7E && $character < 0xA0
  659. || $character > 0xD7FF && $character < 0xF900
  660. )
  661. {
  662. for ($j = $start; $j <= $i; $j++)
  663. {
  664. $string .= '%' . strtoupper($bytes[$j]);
  665. }
  666. }
  667. else
  668. {
  669. for ($j = $start; $j <= $i; $j++)
  670. {
  671. $string .= chr(hexdec($bytes[$j]));
  672. }
  673. }
  674. }
  675. }
  676. // If we have any bytes left over they are invalid (i.e., we are
  677. // mid-way through a multi-byte sequence)
  678. if ($remaining)
  679. {
  680. for ($j = $start; $j < $len; $j++)
  681. {
  682. $string .= '%' . strtoupper($bytes[$j]);
  683. }
  684. }
  685. return $string;
  686. }
  687. protected function scheme_normalization()
  688. {
  689. if (isset($this->normalization[$this->scheme]['iuserinfo']) && $this->iuserinfo === $this->normalization[$this->scheme]['iuserinfo'])
  690. {
  691. $this->iuserinfo = null;
  692. }
  693. if (isset($this->normalization[$this->scheme]['ihost']) && $this->ihost === $this->normalization[$this->scheme]['ihost'])
  694. {
  695. $this->ihost = null;
  696. }
  697. if (isset($this->normalization[$this->scheme]['port']) && $this->port === $this->normalization[$this->scheme]['port'])
  698. {
  699. $this->port = null;
  700. }
  701. if (isset($this->normalization[$this->scheme]['ipath']) && $this->ipath === $this->normalization[$this->scheme]['ipath'])
  702. {
  703. $this->ipath = '';
  704. }
  705. if (isset($this->normalization[$this->scheme]['iquery']) && $this->iquery === $this->normalization[$this->scheme]['iquery'])
  706. {
  707. $this->iquery = null;
  708. }
  709. if (isset($this->normalization[$this->scheme]['ifragment']) && $this->ifragment === $this->normalization[$this->scheme]['ifragment'])
  710. {
  711. $this->ifragment = null;
  712. }
  713. }
  714. /**
  715. * Check if the object represents a valid IRI. This needs to be done on each
  716. * call as some things change depending on another part of the IRI.
  717. *
  718. * @return bool
  719. */
  720. public function is_valid()
  721. {
  722. $isauthority = $this->iuserinfo !== null || $this->ihost !== null || $this->port !== null;
  723. if ($this->ipath !== '' &&
  724. (
  725. $isauthority && (
  726. $this->ipath[0] !== '/' ||
  727. substr($this->ipath, 0, 2) === '//'
  728. ) ||
  729. (
  730. $this->scheme === null &&
  731. !$isauthority &&
  732. strpos($this->ipath, ':') !== false &&
  733. (strpos($this->ipath, '/') === false ? true : strpos($this->ipath, ':') < strpos($this->ipath, '/'))
  734. )
  735. )
  736. )
  737. {
  738. return false;
  739. }
  740. return true;
  741. }
  742. /**
  743. * Set the entire IRI. Returns true on success, false on failure (if there
  744. * are any invalid characters).
  745. *
  746. * @param string $iri
  747. * @return bool
  748. */
  749. protected function set_iri($iri)
  750. {
  751. static $cache;
  752. if (!$cache)
  753. {
  754. $cache = array();
  755. }
  756. if ($iri === null)
  757. {
  758. return true;
  759. }
  760. elseif (isset($cache[$iri]))
  761. {
  762. list($this->scheme,
  763. $this->iuserinfo,
  764. $this->ihost,
  765. $this->port,
  766. $this->ipath,
  767. $this->iquery,
  768. $this->ifragment,
  769. $return) = $cache[$iri];
  770. return $return;
  771. }
  772. else
  773. {
  774. $parsed = $this->parse_iri((string) $iri);
  775. $return = $this->set_scheme($parsed['scheme'])
  776. && $this->set_authority($parsed['authority'])
  777. && $this->set_path($parsed['path'])
  778. && $this->set_query($parsed['query'])
  779. && $this->set_fragment($parsed['fragment']);
  780. $cache[$iri] = array($this->scheme,
  781. $this->iuserinfo,
  782. $this->ihost,
  783. $this->port,
  784. $this->ipath,
  785. $this->iquery,
  786. $this->ifragment,
  787. $return);
  788. return $return;
  789. }
  790. }
  791. /**
  792. * Set the scheme. Returns true on success, false on failure (if there are
  793. * any invalid characters).
  794. *
  795. * @param string $scheme
  796. * @return bool
  797. */
  798. protected function set_scheme($scheme)
  799. {
  800. if ($scheme === null)
  801. {
  802. $this->scheme = null;
  803. }
  804. elseif (!preg_match('/^[A-Za-z][0-9A-Za-z+\-.]*$/', $scheme))
  805. {
  806. $this->scheme = null;
  807. return false;
  808. }
  809. else
  810. {
  811. $this->scheme = strtolower($scheme);
  812. }
  813. return true;
  814. }
  815. /**
  816. * Set the authority. Returns true on success, false on failure (if there are
  817. * any invalid characters).
  818. *
  819. * @param string $authority
  820. * @return bool
  821. */
  822. protected function set_authority($authority)
  823. {
  824. static $cache;
  825. if (!$cache)
  826. $cache = array();
  827. if ($authority === null)
  828. {
  829. $this->iuserinfo = null;
  830. $this->ihost = null;
  831. $this->port = null;
  832. return true;
  833. }
  834. elseif (isset($cache[$authority]))
  835. {
  836. list($this->iuserinfo,
  837. $this->ihost,
  838. $this->port,
  839. $return) = $cache[$authority];
  840. return $return;
  841. }
  842. else
  843. {
  844. $remaining = $authority;
  845. if (($iuserinfo_end = strrpos($remaining, '@')) !== false)
  846. {
  847. $iuserinfo = substr($remaining, 0, $iuserinfo_end);
  848. $remaining = substr($remaining, $iuserinfo_end + 1);
  849. }
  850. else
  851. {
  852. $iuserinfo = null;
  853. }
  854. if (($port_start = strpos($remaining, ':', strpos($remaining, ']'))) !== false)
  855. {
  856. if (($port = substr($remaining, $port_start + 1)) === false)
  857. {
  858. $port = null;
  859. }
  860. $remaining = substr($remaining, 0, $port_start);
  861. }
  862. else
  863. {
  864. $port = null;
  865. }
  866. $return = $this->set_userinfo($iuserinfo) &&
  867. $this->set_host($remaining) &&
  868. $this->set_port($port);
  869. $cache[$authority] = array($this->iuserinfo,
  870. $this->ihost,
  871. $this->port,
  872. $return);
  873. return $return;
  874. }
  875. }
  876. /**
  877. * Set the iuserinfo.
  878. *
  879. * @param string $iuserinfo
  880. * @return bool
  881. */
  882. protected function set_userinfo($iuserinfo)
  883. {
  884. if ($iuserinfo === null)
  885. {
  886. $this->iuserinfo = null;
  887. }
  888. else
  889. {
  890. $this->iuserinfo = $this->replace_invalid_with_pct_encoding($iuserinfo, '!$&\'()*+,;=:');
  891. $this->scheme_normalization();
  892. }
  893. return true;
  894. }
  895. /**
  896. * Set the ihost. Returns true on success, false on failure (if there are
  897. * any invalid characters).
  898. *
  899. * @param string $ihost
  900. * @return bool
  901. */
  902. protected function set_host($ihost)
  903. {
  904. if ($ihost === null)
  905. {
  906. $this->ihost = null;
  907. return true;
  908. }
  909. elseif (substr($ihost, 0, 1) === '[' && substr($ihost, -1) === ']')
  910. {
  911. if (Requests_IPv6::check_ipv6(substr($ihost, 1, -1)))
  912. {
  913. $this->ihost = '[' . Requests_IPv6::compress(substr($ihost, 1, -1)) . ']';
  914. }
  915. else
  916. {
  917. $this->ihost = null;
  918. return false;
  919. }
  920. }
  921. else
  922. {
  923. $ihost = $this->replace_invalid_with_pct_encoding($ihost, '!$&\'()*+,;=');
  924. // Lowercase, but ignore pct-encoded sections (as they should
  925. // remain uppercase). This must be done after the previous step
  926. // as that can add unescaped characters.
  927. $position = 0;
  928. $strlen = strlen($ihost);
  929. while (($position += strcspn($ihost, 'ABCDEFGHIJKLMNOPQRSTUVWXYZ%', $position)) < $strlen)
  930. {
  931. if ($ihost[$position] === '%')
  932. {
  933. $position += 3;
  934. }
  935. else
  936. {
  937. $ihost[$position] = strtolower($ihost[$position]);
  938. $position++;
  939. }
  940. }
  941. $this->ihost = $ihost;
  942. }
  943. $this->scheme_normalization();
  944. return true;
  945. }
  946. /**
  947. * Set the port. Returns true on success, false on failure (if there are
  948. * any invalid characters).
  949. *
  950. * @param string $port
  951. * @return bool
  952. */
  953. protected function set_port($port)
  954. {
  955. if ($port === null)
  956. {
  957. $this->port = null;
  958. return true;
  959. }
  960. elseif (strspn($port, '0123456789') === strlen($port))
  961. {
  962. $this->port = (int) $port;
  963. $this->scheme_normalization();
  964. return true;
  965. }
  966. else
  967. {
  968. $this->port = null;
  969. return false;
  970. }
  971. }
  972. /**
  973. * Set the ipath.
  974. *
  975. * @param string $ipath
  976. * @return bool
  977. */
  978. protected function set_path($ipath)
  979. {
  980. static $cache;
  981. if (!$cache)
  982. {
  983. $cache = array();
  984. }
  985. $ipath = (string) $ipath;
  986. if (isset($cache[$ipath]))
  987. {
  988. $this->ipath = $cache[$ipath][(int) ($this->scheme !== null)];
  989. }
  990. else
  991. {
  992. $valid = $this->replace_invalid_with_pct_encoding($ipath, '!$&\'()*+,;=@:/');
  993. $removed = $this->remove_dot_segments($valid);
  994. $cache[$ipath] = array($valid, $removed);
  995. $this->ipath = ($this->scheme !== null) ? $removed : $valid;
  996. }
  997. $this->scheme_normalization();
  998. return true;
  999. }
  1000. /**
  1001. * Set the iquery.
  1002. *
  1003. * @param string $iquery
  1004. * @return bool
  1005. */
  1006. protected function set_query($iquery)
  1007. {
  1008. if ($iquery === null)
  1009. {
  1010. $this->iquery = null;
  1011. }
  1012. else
  1013. {
  1014. $this->iquery = $this->replace_invalid_with_pct_encoding($iquery, '!$&\'()*+,;=:@/?', true);
  1015. $this->scheme_normalization();
  1016. }
  1017. return true;
  1018. }
  1019. /**
  1020. * Set the ifragment.
  1021. *
  1022. * @param string $ifragment
  1023. * @return bool
  1024. */
  1025. protected function set_fragment($ifragment)
  1026. {
  1027. if ($ifragment === null)
  1028. {
  1029. $this->ifragment = null;
  1030. }
  1031. else
  1032. {
  1033. $this->ifragment = $this->replace_invalid_with_pct_encoding($ifragment, '!$&\'()*+,;=:@/?');
  1034. $this->scheme_normalization();
  1035. }
  1036. return true;
  1037. }
  1038. /**
  1039. * Convert an IRI to a URI (or parts thereof)
  1040. *
  1041. * @return string
  1042. */
  1043. protected function to_uri($string)
  1044. {
  1045. static $non_ascii;
  1046. if (!$non_ascii)
  1047. {
  1048. $non_ascii = implode('', range("\x80", "\xFF"));
  1049. }
  1050. $position = 0;
  1051. $strlen = strlen($string);
  1052. while (($position += strcspn($string, $non_ascii, $position)) < $strlen)
  1053. {
  1054. $string = substr_replace($string, sprintf('%%%02X', ord($string[$position])), $position, 1);
  1055. $position += 3;
  1056. $strlen += 2;
  1057. }
  1058. return $string;
  1059. }
  1060. /**
  1061. * Get the complete IRI
  1062. *
  1063. * @return string
  1064. */
  1065. protected function get_iri()
  1066. {
  1067. if (!$this->is_valid())
  1068. {
  1069. return false;
  1070. }
  1071. $iri = '';
  1072. if ($this->scheme !== null)
  1073. {
  1074. $iri .= $this->scheme . ':';
  1075. }
  1076. if (($iauthority = $this->get_iauthority()) !== null)
  1077. {
  1078. $iri .= '//' . $iauthority;
  1079. }
  1080. $iri .= $this->ipath;
  1081. if ($this->iquery !== null)
  1082. {
  1083. $iri .= '?' . $this->iquery;
  1084. }
  1085. if ($this->ifragment !== null)
  1086. {
  1087. $iri .= '#' . $this->ifragment;
  1088. }
  1089. return $iri;
  1090. }
  1091. /**
  1092. * Get the complete URI
  1093. *
  1094. * @return string
  1095. */
  1096. protected function get_uri()
  1097. {
  1098. return $this->to_uri($this->get_iri());
  1099. }
  1100. /**
  1101. * Get the complete iauthority
  1102. *
  1103. * @return string
  1104. */
  1105. protected function get_iauthority()
  1106. {
  1107. if ($this->iuserinfo !== null || $this->ihost !== null || $this->port !== null)
  1108. {
  1109. $iauthority = '';
  1110. if ($this->iuserinfo !== null)
  1111. {
  1112. $iauthority .= $this->iuserinfo . '@';
  1113. }
  1114. if ($this->ihost !== null)
  1115. {
  1116. $iauthority .= $this->ihost;
  1117. }
  1118. if ($this->port !== null)
  1119. {
  1120. $iauthority .= ':' . $this->port;
  1121. }
  1122. return $iauthority;
  1123. }
  1124. else
  1125. {
  1126. return null;
  1127. }
  1128. }
  1129. /**
  1130. * Get the complete authority
  1131. *
  1132. * @return string
  1133. */
  1134. protected function get_authority()
  1135. {
  1136. $iauthority = $this->get_iauthority();
  1137. if (is_string($iauthority))
  1138. return $this->to_uri($iauthority);
  1139. else
  1140. return $iauthority;
  1141. }
  1142. }