PageRenderTime 51ms CodeModel.GetById 23ms RepoModel.GetById 1ms app.codeStats 0ms

/joomla/libraries/joomla/utilities/string.php

https://github.com/reechalee/joomla1.6
PHP | 715 lines | 304 code | 41 blank | 370 comment | 99 complexity | 7361a023af4a3a80fc4269d5f37b780e MD5 | raw file
Possible License(s): GPL-2.0, LGPL-2.1, BSD-3-Clause, JSON
  1. <?php
  2. /**
  3. * @version $Id: string.php 20196 2011-01-09 02:40:25Z ian $
  4. * @package Joomla.Framework
  5. * @subpackage Utilities
  6. * @copyright Copyright (C) 2005 - 2011 Open Source Matters, Inc. All rights reserved.
  7. * @license GNU General Public License version 2 or later; see LICENSE.txt
  8. */
  9. // Check to ensure this file is within the rest of the framework
  10. defined('JPATH_BASE') or die;
  11. /**
  12. * PHP mbstring and iconv local configuration
  13. */
  14. // check if mbstring extension is loaded and attempt to load it if not present except for windows
  15. if (extension_loaded('mbstring') || ((!strtoupper(substr(PHP_OS, 0, 3)) === 'WIN' && dl('mbstring.so')))) {
  16. //Make sure to surpress the output in case ini_set is disabled
  17. @ini_set('mbstring.internal_encoding', 'UTF-8');
  18. @ini_set('mbstring.http_input', 'UTF-8');
  19. @ini_set('mbstring.http_output', 'UTF-8');
  20. }
  21. // same for iconv
  22. if (function_exists('iconv') || ((!strtoupper(substr(PHP_OS, 0, 3)) === 'WIN' && dl('iconv.so')))) {
  23. // these are settings that can be set inside code
  24. iconv_set_encoding("internal_encoding", "UTF-8");
  25. iconv_set_encoding("input_encoding", "UTF-8");
  26. iconv_set_encoding("output_encoding", "UTF-8");
  27. }
  28. /**
  29. * Include the utf8 package
  30. */
  31. jimport('phputf8.utf8');
  32. jimport('phputf8.strcasecmp');
  33. /**
  34. * String handling class for utf-8 data
  35. * Wraps the phputf8 library
  36. * All functions assume the validity of utf-8 strings.
  37. *
  38. * @static
  39. * @package Joomla.Framework
  40. * @subpackage Utilities
  41. * @since 1.5
  42. */
  43. abstract class JString
  44. {
  45. /**
  46. * UTF-8 aware alternative to strpos
  47. * Find position of first occurrence of a string
  48. *
  49. * @static
  50. * @access public
  51. * @param $str - string String being examined
  52. * @param $search - string String being searced for
  53. * @param $offset - int Optional, specifies the position from which the search should be performed
  54. * @return mixed Number of characters before the first match or FALSE on failure
  55. * @see http://www.php.net/strpos
  56. */
  57. public static function strpos($str, $search, $offset = FALSE)
  58. {
  59. if ( $offset === FALSE ) {
  60. return utf8_strpos($str, $search);
  61. } else {
  62. return utf8_strpos($str, $search, $offset);
  63. }
  64. }
  65. /**
  66. * UTF-8 aware alternative to strrpos
  67. * Finds position of last occurrence of a string
  68. *
  69. * @static
  70. * @access public
  71. * @param $str - string String being examined
  72. * @param $search - string String being searced for
  73. * @return mixed Number of characters before the last match or FALSE on failure
  74. * @see http://www.php.net/strrpos
  75. */
  76. public static function strrpos($str, $search, $offset = false)
  77. {
  78. return utf8_strrpos($str, $search);
  79. }
  80. /**
  81. * UTF-8 aware alternative to substr
  82. * Return part of a string given character offset (and optionally length)
  83. *
  84. * @static
  85. * @access public
  86. * @param string
  87. * @param integer number of UTF-8 characters offset (from left)
  88. * @param integer (optional) length in UTF-8 characters from offset
  89. * @return mixed string or FALSE if failure
  90. * @see http://www.php.net/substr
  91. */
  92. public static function substr($str, $offset, $length = FALSE)
  93. {
  94. if ($length === FALSE) {
  95. return utf8_substr($str, $offset);
  96. } else {
  97. return utf8_substr($str, $offset, $length);
  98. }
  99. }
  100. /**
  101. * UTF-8 aware alternative to strtlower
  102. * Make a string lowercase
  103. * Note: The concept of a characters "case" only exists is some alphabets
  104. * such as Latin, Greek, Cyrillic, Armenian and archaic Georgian - it does
  105. * not exist in the Chinese alphabet, for example. See Unicode Standard
  106. * Annex #21: Case Mappings
  107. *
  108. * @access public
  109. * @param string
  110. * @return mixed either string in lowercase or FALSE is UTF-8 invalid
  111. * @see http://www.php.net/strtolower
  112. */
  113. public static function strtolower($str){
  114. return utf8_strtolower($str);
  115. }
  116. /**
  117. * UTF-8 aware alternative to strtoupper
  118. * Make a string uppercase
  119. * Note: The concept of a characters "case" only exists is some alphabets
  120. * such as Latin, Greek, Cyrillic, Armenian and archaic Georgian - it does
  121. * not exist in the Chinese alphabet, for example. See Unicode Standard
  122. * Annex #21: Case Mappings
  123. *
  124. * @access public
  125. * @param string
  126. * @return mixed either string in uppercase or FALSE is UTF-8 invalid
  127. * @see http://www.php.net/strtoupper
  128. */
  129. public static function strtoupper($str){
  130. return utf8_strtoupper($str);
  131. }
  132. /**
  133. * UTF-8 aware alternative to strlen
  134. * Returns the number of characters in the string (NOT THE NUMBER OF BYTES),
  135. *
  136. * @access public
  137. * @param string UTF-8 string
  138. * @return int number of UTF-8 characters in string
  139. * @see http://www.php.net/strlen
  140. */
  141. public static function strlen($str){
  142. return utf8_strlen($str);
  143. }
  144. /**
  145. * UTF-8 aware alternative to str_ireplace
  146. * Case-insensitive version of str_replace
  147. *
  148. * @static
  149. * @access public
  150. * @param string string to search
  151. * @param string existing string to replace
  152. * @param string new string to replace with
  153. * @param int optional count value to be passed by referene
  154. * @see http://www.php.net/str_ireplace
  155. */
  156. public static function str_ireplace($search, $replace, $str, $count = NULL)
  157. {
  158. jimport('phputf8.str_ireplace');
  159. if ( $count === FALSE ) {
  160. return utf8_ireplace($search, $replace, $str);
  161. } else {
  162. return utf8_ireplace($search, $replace, $str, $count);
  163. }
  164. }
  165. /**
  166. * UTF-8 aware alternative to str_split
  167. * Convert a string to an array
  168. *
  169. * @static
  170. * @access public
  171. * @param string UTF-8 encoded
  172. * @param int number to characters to split string by
  173. * @return array
  174. * @see http://www.php.net/str_split
  175. */
  176. public static function str_split($str, $split_len = 1)
  177. {
  178. jimport('phputf8.str_split');
  179. return utf8_str_split($str, $split_len);
  180. }
  181. /**
  182. * UTF-8/LOCALE aware alternative to strcasecmp
  183. * A case insensivite string comparison
  184. *
  185. * @static
  186. * @access public
  187. * @param string string 1 to compare
  188. * @param string string 2 to compare
  189. * @param mixed The locale used by strcoll or false to use classical comparison
  190. * @return int < 0 if str1 is less than str2; > 0 if str1 is greater than str2, and 0 if they are equal.
  191. * @see http://www.php.net/strcasecmp
  192. * @see http://www.php.net/strcoll
  193. * @see http://www.php.net/setlocale
  194. */
  195. public static function strcasecmp($str1, $str2, $locale = false)
  196. {
  197. if ($locale)
  198. {
  199. // get current locale
  200. $locale0 = setlocale(LC_COLLATE, 0);
  201. if (!$locale = setlocale(LC_COLLATE, $locale)) {
  202. $locale = $locale0;
  203. }
  204. // See if we have successfully set locale to UTF-8
  205. if(!stristr($locale, 'UTF-8') && stristr($locale, '_') && preg_match('~\.(\d+)$~', $locale, $m)) {
  206. $encoding = 'CP' . $m[1];
  207. }
  208. else if(stristr($locale, 'UTF-8')){
  209. $encoding = 'UTF-8';
  210. }
  211. else {
  212. $encoding = 'nonrecodable';
  213. }
  214. // if we sucesfuly set encoding it to utf-8 or encoding is sth weird don't recode
  215. if ($encoding == 'UTF-8' || $encoding == 'nonrecodable') {
  216. return strcoll(utf8_strtolower($str1), utf8_strtolower($str2));
  217. } else {
  218. return strcoll(self::transcode(utf8_strtolower($str1),'UTF-8', $encoding), self::transcode(utf8_strtolower($str2),'UTF-8', $encoding));
  219. }
  220. }
  221. else
  222. {
  223. return utf8_strcasecmp($str1, $str2);
  224. }
  225. }
  226. /**
  227. * UTF-8/LOCALE aware alternative to strcmp
  228. * A case sensivite string comparison
  229. *
  230. * @static
  231. * @access public
  232. * @param string string 1 to compare
  233. * @param string string 2 to compare
  234. * @param mixed The locale used by strcoll or false to use classical comparison
  235. * @return int < 0 if str1 is less than str2; > 0 if str1 is greater than str2, and 0 if they are equal.
  236. * @see http://www.php.net/strcmp
  237. * @see http://www.php.net/strcoll
  238. * @see http://www.php.net/setlocale
  239. */
  240. public static function strcmp($str1, $str2, $locale = false)
  241. {
  242. if ($locale)
  243. {
  244. // get current locale
  245. $locale0 = setlocale(LC_COLLATE, 0);
  246. if (!$locale = setlocale(LC_COLLATE, $locale)) {
  247. $locale = $locale0;
  248. }
  249. // See if we have successfully set locale to UTF-8
  250. if(!stristr($locale, 'UTF-8') && stristr($locale, '_') && preg_match('~\.(\d+)$~', $locale, $m)) {
  251. $encoding = 'CP' . $m[1];
  252. }
  253. else if(stristr($locale, 'UTF-8')){
  254. $encoding = 'UTF-8';
  255. }
  256. else {
  257. $encoding = 'nonrecodable';
  258. }
  259. // if we sucesfuly set encoding it to utf-8 or encoding is sth weird don't recode
  260. if ($encoding == 'UTF-8' || $encoding == 'nonrecodable') {
  261. return strcoll($str1, $str2);
  262. }
  263. else {
  264. return strcoll(self::transcode($str1,'UTF-8', $encoding), self::transcode($str2,'UTF-8', $encoding));
  265. }
  266. }
  267. else
  268. {
  269. return strcmp($str1, $str2);
  270. }
  271. }
  272. /**
  273. * UTF-8 aware alternative to strcspn
  274. * Find length of initial segment not matching mask
  275. *
  276. * @static
  277. * @access public
  278. * @param string
  279. * @param string the mask
  280. * @param int Optional starting character position (in characters)
  281. * @param int Optional length
  282. * @return int the length of the initial segment of str1 which does not contain any of the characters in str2
  283. * @see http://www.php.net/strcspn
  284. */
  285. public static function strcspn($str, $mask, $start = NULL, $length = NULL)
  286. {
  287. jimport('phputf8.strcspn');
  288. if ( $start === FALSE && $length === FALSE ) {
  289. return utf8_strcspn($str, $mask);
  290. } else if ( $length === FALSE ) {
  291. return utf8_strcspn($str, $mask, $start);
  292. } else {
  293. return utf8_strcspn($str, $mask, $start, $length);
  294. }
  295. }
  296. /**
  297. * UTF-8 aware alternative to stristr
  298. * Returns all of haystack from the first occurrence of needle to the end.
  299. * needle and haystack are examined in a case-insensitive manner
  300. * Find first occurrence of a string using case insensitive comparison
  301. *
  302. * @static
  303. * @access public
  304. * @param string the haystack
  305. * @param string the needle
  306. * @return string the sub string
  307. * @see http://www.php.net/stristr
  308. */
  309. public static function stristr($str, $search)
  310. {
  311. jimport('phputf8.stristr');
  312. return utf8_stristr($str, $search);
  313. }
  314. /**
  315. * UTF-8 aware alternative to strrev
  316. * Reverse a string
  317. *
  318. * @static
  319. * @access public
  320. * @param string String to be reversed
  321. * @return string The string in reverse character order
  322. * @see http://www.php.net/strrev
  323. */
  324. public static function strrev($str)
  325. {
  326. jimport('phputf8.strrev');
  327. return utf8_strrev($str);
  328. }
  329. /**
  330. * UTF-8 aware alternative to strspn
  331. * Find length of initial segment matching mask
  332. *
  333. * @static
  334. * @access public
  335. * @param string the haystack
  336. * @param string the mask
  337. * @param int start optional
  338. * @param int length optional
  339. * @see http://www.php.net/strspn
  340. */
  341. public static function strspn($str, $mask, $start = NULL, $length = NULL)
  342. {
  343. jimport('phputf8.strspn');
  344. if ( $start === NULL && $length === NULL ) {
  345. return utf8_strspn($str, $mask);
  346. } else if ( $length === NULL ) {
  347. return utf8_strspn($str, $mask, $start);
  348. } else {
  349. return utf8_strspn($str, $mask, $start, $length);
  350. }
  351. }
  352. /**
  353. * UTF-8 aware substr_replace
  354. * Replace text within a portion of a string
  355. *
  356. * @static
  357. * @access public
  358. * @param string the haystack
  359. * @param string the replacement string
  360. * @param int start
  361. * @param int length (optional)
  362. * @see http://www.php.net/substr_replace
  363. */
  364. public static function substr_replace($str, $repl, $start, $length = NULL)
  365. {
  366. // loaded by library loader
  367. if ( $length === FALSE ) {
  368. return utf8_substr_replace($str, $repl, $start);
  369. } else {
  370. return utf8_substr_replace($str, $repl, $start, $length);
  371. }
  372. }
  373. /**
  374. * UTF-8 aware replacement for ltrim()
  375. * Strip whitespace (or other characters) from the beginning of a string
  376. * Note: you only need to use this if you are supplying the charlist
  377. * optional arg and it contains UTF-8 characters. Otherwise ltrim will
  378. * work normally on a UTF-8 string
  379. *
  380. * @static
  381. * @access public
  382. * @param string the string to be trimmed
  383. * @param string the optional charlist of additional characters to trim
  384. * @return string the trimmed string
  385. * @see http://www.php.net/ltrim
  386. */
  387. public static function ltrim($str, $charlist = FALSE)
  388. {
  389. if (empty($charlist) && $charlist !== false) {
  390. return $str;
  391. }
  392. jimport('phputf8.trim');
  393. if ( $charlist === FALSE ) {
  394. return utf8_ltrim( $str );
  395. } else {
  396. return utf8_ltrim( $str, $charlist );
  397. }
  398. }
  399. /**
  400. * UTF-8 aware replacement for rtrim()
  401. * Strip whitespace (or other characters) from the end of a string
  402. * Note: you only need to use this if you are supplying the charlist
  403. * optional arg and it contains UTF-8 characters. Otherwise rtrim will
  404. * work normally on a UTF-8 string
  405. *
  406. * @static
  407. * @access public
  408. * @param string the string to be trimmed
  409. * @param string the optional charlist of additional characters to trim
  410. * @return string the trimmed string
  411. * @see http://www.php.net/rtrim
  412. */
  413. public static function rtrim($str, $charlist = FALSE)
  414. {
  415. if (empty($charlist) && $charlist !== false) {
  416. return $str;
  417. }
  418. jimport('phputf8.trim');
  419. if ( $charlist === FALSE ) {
  420. return utf8_rtrim($str);
  421. } else {
  422. return utf8_rtrim( $str, $charlist );
  423. }
  424. }
  425. /**
  426. * UTF-8 aware replacement for trim()
  427. * Strip whitespace (or other characters) from the beginning and end of a string
  428. * Note: you only need to use this if you are supplying the charlist
  429. * optional arg and it contains UTF-8 characters. Otherwise trim will
  430. * work normally on a UTF-8 string
  431. *
  432. * @static
  433. * @access public
  434. * @param string the string to be trimmed
  435. * @param string the optional charlist of additional characters to trim
  436. * @return string the trimmed string
  437. * @see http://www.php.net/trim
  438. */
  439. public static function trim($str, $charlist = FALSE)
  440. {
  441. if (empty($charlist) && $charlist !== false) {
  442. return $str;
  443. }
  444. jimport('phputf8.trim');
  445. if ( $charlist === FALSE ) {
  446. return utf8_trim( $str );
  447. } else {
  448. return utf8_trim( $str, $charlist );
  449. }
  450. }
  451. /**
  452. * UTF-8 aware alternative to ucfirst
  453. * Make a string's first character uppercase
  454. *
  455. * @static
  456. * @access public
  457. * @param string
  458. * @return string with first character as upper case (if applicable)
  459. * @see http://www.php.net/ucfirst
  460. */
  461. public static function ucfirst($str)
  462. {
  463. jimport('phputf8.ucfirst');
  464. return utf8_ucfirst($str);
  465. }
  466. /**
  467. * UTF-8 aware alternative to ucwords
  468. * Uppercase the first character of each word in a string
  469. *
  470. * @static
  471. * @access public
  472. * @param string
  473. * @return string with first char of each word uppercase
  474. * @see http://www.php.net/ucwords
  475. */
  476. public static function ucwords($str)
  477. {
  478. jimport('phputf8.ucwords');
  479. return utf8_ucwords($str);
  480. }
  481. /**
  482. * Transcode a string.
  483. *
  484. * @static
  485. * @param string $source The string to transcode.
  486. * @param string $from_encoding The source encoding.
  487. * @param string $to_encoding The target encoding.
  488. * @return string Transcoded string
  489. * @since 1.5
  490. */
  491. public static function transcode($source, $from_encoding, $to_encoding)
  492. {
  493. if (is_string($source)) {
  494. /*
  495. * "//TRANSLIT" is appendd to the $to_encoding to ensure that when iconv comes
  496. * across a character that cannot be represented in the target charset, it can
  497. * be approximated through one or several similarly looking characters.
  498. */
  499. return iconv($from_encoding, $to_encoding.'//TRANSLIT', $source);
  500. }
  501. }
  502. /**
  503. * Tests a string as to whether it's valid UTF-8 and supported by the
  504. * Unicode standard
  505. * Note: this function has been modified to simple return true or false
  506. * @author <hsivonen@iki.fi>
  507. * @param string UTF-8 encoded string
  508. * @return boolean true if valid
  509. * @since 1.6
  510. * @see http://hsivonen.iki.fi/php-utf8/
  511. * @see compliant
  512. */
  513. public static function valid($str)
  514. {
  515. $mState = 0; // cached expected number of octets after the current octet
  516. // until the beginning of the next UTF8 character sequence
  517. $mUcs4 = 0; // cached Unicode character
  518. $mBytes = 1; // cached expected number of octets in the current sequence
  519. $len = strlen($str);
  520. for ($i = 0; $i < $len; $i++)
  521. {
  522. $in = ord($str{$i});
  523. if ($mState == 0)
  524. {
  525. // When mState is zero we expect either a US-ASCII character or a
  526. // multi-octet sequence.
  527. if (0 == (0x80 & ($in))) {
  528. // US-ASCII, pass straight through.
  529. $mBytes = 1;
  530. } else if (0xC0 == (0xE0 & ($in))) {
  531. // First octet of 2 octet sequence
  532. $mUcs4 = ($in);
  533. $mUcs4 = ($mUcs4 & 0x1F) << 6;
  534. $mState = 1;
  535. $mBytes = 2;
  536. } else if (0xE0 == (0xF0 & ($in))) {
  537. // First octet of 3 octet sequence
  538. $mUcs4 = ($in);
  539. $mUcs4 = ($mUcs4 & 0x0F) << 12;
  540. $mState = 2;
  541. $mBytes = 3;
  542. } else if (0xF0 == (0xF8 & ($in))) {
  543. // First octet of 4 octet sequence
  544. $mUcs4 = ($in);
  545. $mUcs4 = ($mUcs4 & 0x07) << 18;
  546. $mState = 3;
  547. $mBytes = 4;
  548. } else if (0xF8 == (0xFC & ($in))) {
  549. /* First octet of 5 octet sequence.
  550. *
  551. * This is illegal because the encoded codepoint must be either
  552. * (a) not the shortest form or
  553. * (b) outside the Unicode range of 0-0x10FFFF.
  554. * Rather than trying to resynchronize, we will carry on until the end
  555. * of the sequence and let the later error handling code catch it.
  556. */
  557. $mUcs4 = ($in);
  558. $mUcs4 = ($mUcs4 & 0x03) << 24;
  559. $mState = 4;
  560. $mBytes = 5;
  561. } else if (0xFC == (0xFE & ($in))) {
  562. // First octet of 6 octet sequence, see comments for 5 octet sequence.
  563. $mUcs4 = ($in);
  564. $mUcs4 = ($mUcs4 & 1) << 30;
  565. $mState = 5;
  566. $mBytes = 6;
  567. } else {
  568. /* Current octet is neither in the US-ASCII range nor a legal first
  569. * octet of a multi-octet sequence.
  570. */
  571. return FALSE;
  572. }
  573. }
  574. else
  575. {
  576. // When mState is non-zero, we expect a continuation of the multi-octet
  577. // sequence
  578. if (0x80 == (0xC0 & ($in)))
  579. {
  580. // Legal continuation.
  581. $shift = ($mState - 1) * 6;
  582. $tmp = $in;
  583. $tmp = ($tmp & 0x0000003F) << $shift;
  584. $mUcs4 |= $tmp;
  585. /**
  586. * End of the multi-octet sequence. mUcs4 now contains the final
  587. * Unicode codepoint to be output
  588. */
  589. if (0 == --$mState)
  590. {
  591. /*
  592. * Check for illegal sequences and codepoints.
  593. */
  594. // From Unicode 3.1, non-shortest form is illegal
  595. if (((2 == $mBytes) && ($mUcs4 < 0x0080)) ||
  596. ((3 == $mBytes) && ($mUcs4 < 0x0800)) ||
  597. ((4 == $mBytes) && ($mUcs4 < 0x10000)) ||
  598. (4 < $mBytes) ||
  599. // From Unicode 3.2, surrogate characters are illegal
  600. (($mUcs4 & 0xFFFFF800) == 0xD800) ||
  601. // Codepoints outside the Unicode range are illegal
  602. ($mUcs4 > 0x10FFFF)) {
  603. return FALSE;
  604. }
  605. // Initialize UTF8 cache.
  606. $mState = 0;
  607. $mUcs4 = 0;
  608. $mBytes = 1;
  609. }
  610. }
  611. else
  612. {
  613. /**
  614. *((0xC0 & (*in) != 0x80) && (mState != 0))
  615. * Incomplete multi-octet sequence.
  616. */
  617. return FALSE;
  618. }
  619. }
  620. }
  621. return TRUE;
  622. }
  623. /**
  624. * Tests whether a string complies as UTF-8. This will be much
  625. * faster than utf8_is_valid but will pass five and six octet
  626. * UTF-8 sequences, which are not supported by Unicode and
  627. * so cannot be displayed correctly in a browser. In other words
  628. * it is not as strict as utf8_is_valid but it's faster. If you use
  629. * is to validate user input, you place yourself at the risk that
  630. * attackers will be able to inject 5 and 6 byte sequences (which
  631. * may or may not be a significant risk, depending on what you are
  632. * are doing)
  633. * @see valid
  634. * @see http://www.php.net/manual/en/reference.pcre.pattern.modifiers.php#54805
  635. * @param string UTF-8 string to check
  636. * @return boolean TRUE if string is valid UTF-8
  637. * @since 1.6
  638. */
  639. public static function compliant($str)
  640. {
  641. if (strlen($str) == 0) {
  642. return TRUE;
  643. }
  644. // If even just the first character can be matched, when the /u
  645. // modifier is used, then it's valid UTF-8. If the UTF-8 is somehow
  646. // invalid, nothing at all will match, even if the string contains
  647. // some valid sequences
  648. return (preg_match('/^.{1}/us',$str,$ar) == 1);
  649. }
  650. /**
  651. * Does a UTF-8 safe version of PHP parse_url function
  652. * @see http://us3.php.net/manual/en/function.parse-url.php
  653. *
  654. * @param string URL to parse
  655. * @return associative array or false if badly formed URL.
  656. * @since 1.6
  657. */
  658. public static function parse_url($url) {
  659. $result = array();
  660. // Build arrays of values we need to decode before parsing
  661. $entities = array('%21', '%2A', '%27', '%28', '%29', '%3B', '%3A', '%40', '%26', '%3D', '%24', '%2C', '%2F', '%3F', '%25', '%23', '%5B', '%5D');
  662. $replacements = array('!', '*', "'", "(", ")", ";", ":", "@", "&", "=", "$", ",", "/", "?", "%", "#", "[", "]");
  663. // Create encoded URL with special URL characters decoded so it can be parsed
  664. // All other charcters will be encoded
  665. $encodedURL = str_replace($entities, $replacements, urlencode($url));
  666. // Parse the encoded URL
  667. $encodedParts = parse_url($encodedURL);
  668. // Now, decode each value of the resulting array
  669. foreach ($encodedParts as $key => $value) {
  670. $result[$key] = urldecode($value);
  671. }
  672. return $result;
  673. }
  674. }