PageRenderTime 26ms CodeModel.GetById 22ms RepoModel.GetById 0ms app.codeStats 0ms

/libraries/joomla/utilities/string.php

https://bitbucket.org/elin/joomla-platform
PHP | 717 lines | 307 code | 41 blank | 369 comment | 99 complexity | cadf491b588ae301ee09d136325f4259 MD5 | raw file
Possible License(s): BSD-3-Clause, GPL-2.0, LGPL-2.1
  1. <?php
  2. /**
  3. * @package Joomla.Platform
  4. * @subpackage Utilities
  5. *
  6. * @copyright Copyright (C) 2005 - 2011 Open Source Matters, Inc. All rights reserved.
  7. * @license GNU General Public License version 2 or later; see LICENSE
  8. */
  9. defined('JPATH_PLATFORM') or die;
  10. /**
  11. * PHP mbstring and iconv local configuration
  12. */
  13. // check if mbstring extension is loaded and attempt to load it if not present except for windows
  14. if (extension_loaded('mbstring') || ((!strtoupper(substr(PHP_OS, 0, 3)) === 'WIN' && dl('mbstring.so')))) {
  15. //Make sure to surpress the output in case ini_set is disabled
  16. @ini_set('mbstring.internal_encoding', 'UTF-8');
  17. @ini_set('mbstring.http_input', 'UTF-8');
  18. @ini_set('mbstring.http_output', 'UTF-8');
  19. }
  20. // same for iconv
  21. if (function_exists('iconv') || ((!strtoupper(substr(PHP_OS, 0, 3)) === 'WIN' && dl('iconv.so')))) {
  22. // these are settings that can be set inside code
  23. iconv_set_encoding("internal_encoding", "UTF-8");
  24. iconv_set_encoding("input_encoding", "UTF-8");
  25. iconv_set_encoding("output_encoding", "UTF-8");
  26. }
  27. /**
  28. * Include the utf8 package
  29. */
  30. jimport('phputf8.utf8');
  31. jimport('phputf8.strcasecmp');
  32. /**
  33. * String handling class for utf-8 data
  34. * Wraps the phputf8 library
  35. * All functions assume the validity of utf-8 strings.
  36. *
  37. * @static
  38. * @package Joomla.Platform
  39. * @subpackage Utilities
  40. * @since 11.1
  41. */
  42. abstract class JString
  43. {
  44. /**
  45. * UTF-8 aware alternative to strpos
  46. * Find position of first occurrence of a string
  47. *
  48. * @static
  49. * @access public
  50. * @param $str - string String being examined
  51. * @param $search - string String being searced for
  52. * @param $offset - int Optional, specifies the position from which the search should be performed
  53. * @return mixed Number of characters before the first match or FALSE on failure
  54. * @see http://www.php.net/strpos
  55. */
  56. public static function strpos($str, $search, $offset = FALSE)
  57. {
  58. if ( $offset === FALSE ) {
  59. return utf8_strpos($str, $search);
  60. } else {
  61. return utf8_strpos($str, $search, $offset);
  62. }
  63. }
  64. /**
  65. * UTF-8 aware alternative to strrpos
  66. * Finds position of last occurrence of a string
  67. *
  68. * @static
  69. * @access public
  70. * @param $str - string String being examined
  71. * @param $search - string String being searced for
  72. * @return mixed Number of characters before the last match or FALSE on failure
  73. * @see http://www.php.net/strrpos
  74. */
  75. public static function strrpos($str, $search, $offset = false)
  76. {
  77. return utf8_strrpos($str, $search);
  78. }
  79. /**
  80. * UTF-8 aware alternative to substr
  81. * Return part of a string given character offset (and optionally length)
  82. *
  83. * @static
  84. * @access public
  85. * @param string
  86. * @param integer number of UTF-8 characters offset (from left)
  87. * @param integer (optional) length in UTF-8 characters from offset
  88. * @return mixed string or FALSE if failure
  89. * @see http://www.php.net/substr
  90. */
  91. public static function substr($str, $offset, $length = FALSE)
  92. {
  93. if ($length === FALSE) {
  94. return utf8_substr($str, $offset);
  95. } else {
  96. return utf8_substr($str, $offset, $length);
  97. }
  98. }
  99. /**
  100. * UTF-8 aware alternative to strtlower
  101. * Make a string lowercase
  102. * Note: The concept of a characters "case" only exists is some alphabets
  103. * such as Latin, Greek, Cyrillic, Armenian and archaic Georgian - it does
  104. * not exist in the Chinese alphabet, for example. See Unicode Standard
  105. * Annex #21: Case Mappings
  106. *
  107. * @access public
  108. * @param string
  109. * @return mixed either string in lowercase or FALSE is UTF-8 invalid
  110. * @see http://www.php.net/strtolower
  111. */
  112. public static function strtolower($str){
  113. return utf8_strtolower($str);
  114. }
  115. /**
  116. * UTF-8 aware alternative to strtoupper
  117. * Make a string uppercase
  118. * Note: The concept of a characters "case" only exists is some alphabets
  119. * such as Latin, Greek, Cyrillic, Armenian and archaic Georgian - it does
  120. * not exist in the Chinese alphabet, for example. See Unicode Standard
  121. * Annex #21: Case Mappings
  122. *
  123. * @access public
  124. * @param string
  125. * @return mixed either string in uppercase or FALSE is UTF-8 invalid
  126. * @see http://www.php.net/strtoupper
  127. */
  128. public static function strtoupper($str){
  129. return utf8_strtoupper($str);
  130. }
  131. /**
  132. * UTF-8 aware alternative to strlen
  133. * Returns the number of characters in the string (NOT THE NUMBER OF BYTES),
  134. *
  135. * @access public
  136. * @param string UTF-8 string
  137. * @return int number of UTF-8 characters in string
  138. * @see http://www.php.net/strlen
  139. */
  140. public static function strlen($str){
  141. return utf8_strlen($str);
  142. }
  143. /**
  144. * UTF-8 aware alternative to str_ireplace
  145. * Case-insensitive version of str_replace
  146. *
  147. * @static
  148. * @access public
  149. * @param string string to search
  150. * @param string existing string to replace
  151. * @param string new string to replace with
  152. * @param int optional count value to be passed by referene
  153. * @see http://www.php.net/str_ireplace
  154. */
  155. public static function str_ireplace($search, $replace, $str, $count = NULL)
  156. {
  157. jimport('phputf8.str_ireplace');
  158. if ( $count === FALSE ) {
  159. return utf8_ireplace($search, $replace, $str);
  160. } else {
  161. return utf8_ireplace($search, $replace, $str, $count);
  162. }
  163. }
  164. /**
  165. * UTF-8 aware alternative to str_split
  166. * Convert a string to an array
  167. *
  168. * @static
  169. * @access public
  170. * @param string UTF-8 encoded
  171. * @param int number to characters to split string by
  172. * @return array
  173. * @see http://www.php.net/str_split
  174. */
  175. public static function str_split($str, $split_len = 1)
  176. {
  177. jimport('phputf8.str_split');
  178. return utf8_str_split($str, $split_len);
  179. }
  180. /**
  181. * UTF-8/LOCALE aware alternative to strcasecmp
  182. * A case insensivite string comparison
  183. *
  184. * @static
  185. * @access public
  186. * @param string string 1 to compare
  187. * @param string string 2 to compare
  188. * @param mixed The locale used by strcoll or false to use classical comparison
  189. * @return int < 0 if str1 is less than str2; > 0 if str1 is greater than str2, and 0 if they are equal.
  190. * @see http://www.php.net/strcasecmp
  191. * @see http://www.php.net/strcoll
  192. * @see http://www.php.net/setlocale
  193. */
  194. public static function strcasecmp($str1, $str2, $locale = false)
  195. {
  196. if ($locale)
  197. {
  198. // get current locale
  199. $locale0 = setlocale(LC_COLLATE, 0);
  200. if (!$locale = setlocale(LC_COLLATE, $locale)) {
  201. $locale = $locale0;
  202. }
  203. // See if we have successfully set locale to UTF-8
  204. if(!stristr($locale, 'UTF-8') && stristr($locale, '_') && preg_match('~\.(\d+)$~', $locale, $m)) {
  205. $encoding = 'CP' . $m[1];
  206. }
  207. else if(stristr($locale, 'UTF-8')){
  208. $encoding = 'UTF-8';
  209. }
  210. else {
  211. $encoding = 'nonrecodable';
  212. }
  213. // if we sucesfuly set encoding it to utf-8 or encoding is sth weird don't recode
  214. if ($encoding == 'UTF-8' || $encoding == 'nonrecodable') {
  215. return strcoll(utf8_strtolower($str1), utf8_strtolower($str2));
  216. } else {
  217. return strcoll(self::transcode(utf8_strtolower($str1),'UTF-8', $encoding), self::transcode(utf8_strtolower($str2),'UTF-8', $encoding));
  218. }
  219. }
  220. else
  221. {
  222. return utf8_strcasecmp($str1, $str2);
  223. }
  224. }
  225. /**
  226. * UTF-8/LOCALE aware alternative to strcmp
  227. * A case sensivite string comparison
  228. *
  229. * @static
  230. * @access public
  231. * @param string string 1 to compare
  232. * @param string string 2 to compare
  233. * @param mixed The locale used by strcoll or false to use classical comparison
  234. * @return int < 0 if str1 is less than str2; > 0 if str1 is greater than str2, and 0 if they are equal.
  235. * @see http://www.php.net/strcmp
  236. * @see http://www.php.net/strcoll
  237. * @see http://www.php.net/setlocale
  238. */
  239. public static function strcmp($str1, $str2, $locale = false)
  240. {
  241. if ($locale)
  242. {
  243. // get current locale
  244. $locale0 = setlocale(LC_COLLATE, 0);
  245. if (!$locale = setlocale(LC_COLLATE, $locale)) {
  246. $locale = $locale0;
  247. }
  248. // See if we have successfully set locale to UTF-8
  249. if(!stristr($locale, 'UTF-8') && stristr($locale, '_') && preg_match('~\.(\d+)$~', $locale, $m)) {
  250. $encoding = 'CP' . $m[1];
  251. }
  252. else if(stristr($locale, 'UTF-8')){
  253. $encoding = 'UTF-8';
  254. }
  255. else {
  256. $encoding = 'nonrecodable';
  257. }
  258. // if we sucesfuly set encoding it to utf-8 or encoding is sth weird don't recode
  259. if ($encoding == 'UTF-8' || $encoding == 'nonrecodable') {
  260. return strcoll($str1, $str2);
  261. }
  262. else {
  263. return strcoll(self::transcode($str1,'UTF-8', $encoding), self::transcode($str2,'UTF-8', $encoding));
  264. }
  265. }
  266. else
  267. {
  268. return strcmp($str1, $str2);
  269. }
  270. }
  271. /**
  272. * UTF-8 aware alternative to strcspn
  273. * Find length of initial segment not matching mask
  274. *
  275. * @static
  276. * @access public
  277. * @param string
  278. * @param string the mask
  279. * @param int Optional starting character position (in characters)
  280. * @param int Optional length
  281. * @return int the length of the initial segment of str1 which does not contain any of the characters in str2
  282. * @see http://www.php.net/strcspn
  283. */
  284. public static function strcspn($str, $mask, $start = NULL, $length = NULL)
  285. {
  286. jimport('phputf8.strcspn');
  287. if ( $start === FALSE && $length === FALSE ) {
  288. return utf8_strcspn($str, $mask);
  289. } else if ( $length === FALSE ) {
  290. return utf8_strcspn($str, $mask, $start);
  291. } else {
  292. return utf8_strcspn($str, $mask, $start, $length);
  293. }
  294. }
  295. /**
  296. * UTF-8 aware alternative to stristr
  297. * Returns all of haystack from the first occurrence of needle to the end.
  298. * needle and haystack are examined in a case-insensitive manner
  299. * Find first occurrence of a string using case insensitive comparison
  300. *
  301. * @static
  302. * @access public
  303. * @param string the haystack
  304. * @param string the needle
  305. * @return string the sub string
  306. * @see http://www.php.net/stristr
  307. */
  308. public static function stristr($str, $search)
  309. {
  310. jimport('phputf8.stristr');
  311. return utf8_stristr($str, $search);
  312. }
  313. /**
  314. * UTF-8 aware alternative to strrev
  315. * Reverse a string
  316. *
  317. * @static
  318. * @access public
  319. * @param string String to be reversed
  320. * @return string The string in reverse character order
  321. * @see http://www.php.net/strrev
  322. */
  323. public static function strrev($str)
  324. {
  325. jimport('phputf8.strrev');
  326. return utf8_strrev($str);
  327. }
  328. /**
  329. * UTF-8 aware alternative to strspn
  330. * Find length of initial segment matching mask
  331. *
  332. * @static
  333. * @access public
  334. * @param string the haystack
  335. * @param string the mask
  336. * @param int start optional
  337. * @param int length optional
  338. * @see http://www.php.net/strspn
  339. */
  340. public static function strspn($str, $mask, $start = NULL, $length = NULL)
  341. {
  342. jimport('phputf8.strspn');
  343. if ( $start === NULL && $length === NULL ) {
  344. return utf8_strspn($str, $mask);
  345. } else if ( $length === NULL ) {
  346. return utf8_strspn($str, $mask, $start);
  347. } else {
  348. return utf8_strspn($str, $mask, $start, $length);
  349. }
  350. }
  351. /**
  352. * UTF-8 aware substr_replace
  353. * Replace text within a portion of a string
  354. *
  355. * @static
  356. * @access public
  357. * @param string the haystack
  358. * @param string the replacement string
  359. * @param int start
  360. * @param int length (optional)
  361. * @see http://www.php.net/substr_replace
  362. */
  363. public static function substr_replace($str, $repl, $start, $length = NULL)
  364. {
  365. // loaded by library loader
  366. if ( $length === FALSE ) {
  367. return utf8_substr_replace($str, $repl, $start);
  368. } else {
  369. return utf8_substr_replace($str, $repl, $start, $length);
  370. }
  371. }
  372. /**
  373. * UTF-8 aware replacement for ltrim()
  374. * Strip whitespace (or other characters) from the beginning of a string
  375. * Note: you only need to use this if you are supplying the charlist
  376. * optional arg and it contains UTF-8 characters. Otherwise ltrim will
  377. * work normally on a UTF-8 string
  378. *
  379. * @static
  380. * @access public
  381. * @param string the string to be trimmed
  382. * @param string the optional charlist of additional characters to trim
  383. * @return string the trimmed string
  384. * @see http://www.php.net/ltrim
  385. */
  386. public static function ltrim($str, $charlist = FALSE)
  387. {
  388. if (empty($charlist) && $charlist !== false) {
  389. return $str;
  390. }
  391. jimport('phputf8.trim');
  392. if ( $charlist === FALSE ) {
  393. return utf8_ltrim( $str );
  394. } else {
  395. return utf8_ltrim( $str, $charlist );
  396. }
  397. }
  398. /**
  399. * UTF-8 aware replacement for rtrim()
  400. * Strip whitespace (or other characters) from the end of a string
  401. * Note: you only need to use this if you are supplying the charlist
  402. * optional arg and it contains UTF-8 characters. Otherwise rtrim will
  403. * work normally on a UTF-8 string
  404. *
  405. * @static
  406. * @access public
  407. * @param string the string to be trimmed
  408. * @param string the optional charlist of additional characters to trim
  409. * @return string the trimmed string
  410. * @see http://www.php.net/rtrim
  411. */
  412. public static function rtrim($str, $charlist = FALSE)
  413. {
  414. if (empty($charlist) && $charlist !== false) {
  415. return $str;
  416. }
  417. jimport('phputf8.trim');
  418. if ( $charlist === FALSE ) {
  419. return utf8_rtrim($str);
  420. } else {
  421. return utf8_rtrim( $str, $charlist );
  422. }
  423. }
  424. /**
  425. * UTF-8 aware replacement for trim()
  426. * Strip whitespace (or other characters) from the beginning and end of a string
  427. * Note: you only need to use this if you are supplying the charlist
  428. * optional arg and it contains UTF-8 characters. Otherwise trim will
  429. * work normally on a UTF-8 string
  430. *
  431. * @static
  432. * @access public
  433. * @param string the string to be trimmed
  434. * @param string the optional charlist of additional characters to trim
  435. * @return string the trimmed string
  436. * @see http://www.php.net/trim
  437. */
  438. public static function trim($str, $charlist = FALSE)
  439. {
  440. if (empty($charlist) && $charlist !== false) {
  441. return $str;
  442. }
  443. jimport('phputf8.trim');
  444. if ( $charlist === FALSE ) {
  445. return utf8_trim( $str );
  446. } else {
  447. return utf8_trim( $str, $charlist );
  448. }
  449. }
  450. /**
  451. * UTF-8 aware alternative to ucfirst
  452. * Make a string's first character uppercase
  453. *
  454. * @static
  455. * @access public
  456. * @param string
  457. * @return string with first character as upper case (if applicable)
  458. * @see http://www.php.net/ucfirst
  459. */
  460. public static function ucfirst($str)
  461. {
  462. jimport('phputf8.ucfirst');
  463. return utf8_ucfirst($str);
  464. }
  465. /**
  466. * UTF-8 aware alternative to ucwords
  467. * Uppercase the first character of each word in a string
  468. *
  469. * @static
  470. * @access public
  471. * @param string
  472. * @return string with first char of each word uppercase
  473. * @see http://www.php.net/ucwords
  474. */
  475. public static function ucwords($str)
  476. {
  477. jimport('phputf8.ucwords');
  478. return utf8_ucwords($str);
  479. }
  480. /**
  481. * Transcode a string.
  482. *
  483. * @static
  484. * @param string $source The string to transcode.
  485. * @param string $from_encoding The source encoding.
  486. * @param string $to_encoding The target encoding.
  487. * @return string Transcoded string
  488. * @since 11.1
  489. */
  490. public static function transcode($source, $from_encoding, $to_encoding)
  491. {
  492. if (is_string($source)) {
  493. /*
  494. * "//TRANSLIT" is appendd to the $to_encoding to ensure that when iconv comes
  495. * across a character that cannot be represented in the target charset, it can
  496. * be approximated through one or several similarly looking characters.
  497. */
  498. return iconv($from_encoding, $to_encoding.'//TRANSLIT', $source);
  499. }
  500. }
  501. /**
  502. * Tests a string as to whether it's valid UTF-8 and supported by the
  503. * Unicode standard
  504. * Note: this function has been modified to simple return true or false
  505. * @author <hsivonen@iki.fi>
  506. * @param string UTF-8 encoded string
  507. * @return boolean true if valid
  508. * @since 11.1
  509. * @see http://hsivonen.iki.fi/php-utf8/
  510. * @see compliant
  511. */
  512. public static function valid($str)
  513. {
  514. $mState = 0; // cached expected number of octets after the current octet
  515. // until the beginning of the next UTF8 character sequence
  516. $mUcs4 = 0; // cached Unicode character
  517. $mBytes = 1; // cached expected number of octets in the current sequence
  518. $len = strlen($str);
  519. for ($i = 0; $i < $len; $i++)
  520. {
  521. $in = ord($str{$i});
  522. if ($mState == 0)
  523. {
  524. // When mState is zero we expect either a US-ASCII character or a
  525. // multi-octet sequence.
  526. if (0 == (0x80 & ($in))) {
  527. // US-ASCII, pass straight through.
  528. $mBytes = 1;
  529. } else if (0xC0 == (0xE0 & ($in))) {
  530. // First octet of 2 octet sequence
  531. $mUcs4 = ($in);
  532. $mUcs4 = ($mUcs4 & 0x1F) << 6;
  533. $mState = 1;
  534. $mBytes = 2;
  535. } else if (0xE0 == (0xF0 & ($in))) {
  536. // First octet of 3 octet sequence
  537. $mUcs4 = ($in);
  538. $mUcs4 = ($mUcs4 & 0x0F) << 12;
  539. $mState = 2;
  540. $mBytes = 3;
  541. } else if (0xF0 == (0xF8 & ($in))) {
  542. // First octet of 4 octet sequence
  543. $mUcs4 = ($in);
  544. $mUcs4 = ($mUcs4 & 0x07) << 18;
  545. $mState = 3;
  546. $mBytes = 4;
  547. } else if (0xF8 == (0xFC & ($in))) {
  548. /* First octet of 5 octet sequence.
  549. *
  550. * This is illegal because the encoded codepoint must be either
  551. * (a) not the shortest form or
  552. * (b) outside the Unicode range of 0-0x10FFFF.
  553. * Rather than trying to resynchronize, we will carry on until the end
  554. * of the sequence and let the later error handling code catch it.
  555. */
  556. $mUcs4 = ($in);
  557. $mUcs4 = ($mUcs4 & 0x03) << 24;
  558. $mState = 4;
  559. $mBytes = 5;
  560. } else if (0xFC == (0xFE & ($in))) {
  561. // First octet of 6 octet sequence, see comments for 5 octet sequence.
  562. $mUcs4 = ($in);
  563. $mUcs4 = ($mUcs4 & 1) << 30;
  564. $mState = 5;
  565. $mBytes = 6;
  566. } else {
  567. /* Current octet is neither in the US-ASCII range nor a legal first
  568. * octet of a multi-octet sequence.
  569. */
  570. return FALSE;
  571. }
  572. }
  573. else
  574. {
  575. // When mState is non-zero, we expect a continuation of the multi-octet
  576. // sequence
  577. if (0x80 == (0xC0 & ($in)))
  578. {
  579. // Legal continuation.
  580. $shift = ($mState - 1) * 6;
  581. $tmp = $in;
  582. $tmp = ($tmp & 0x0000003F) << $shift;
  583. $mUcs4 |= $tmp;
  584. /**
  585. * End of the multi-octet sequence. mUcs4 now contains the final
  586. * Unicode codepoint to be output
  587. */
  588. if (0 == --$mState)
  589. {
  590. /*
  591. * Check for illegal sequences and codepoints.
  592. */
  593. // From Unicode 3.1, non-shortest form is illegal
  594. if (((2 == $mBytes) && ($mUcs4 < 0x0080)) ||
  595. ((3 == $mBytes) && ($mUcs4 < 0x0800)) ||
  596. ((4 == $mBytes) && ($mUcs4 < 0x10000)) ||
  597. (4 < $mBytes) ||
  598. // From Unicode 3.2, surrogate characters are illegal
  599. (($mUcs4 & 0xFFFFF800) == 0xD800) ||
  600. // Codepoints outside the Unicode range are illegal
  601. ($mUcs4 > 0x10FFFF)) {
  602. return FALSE;
  603. }
  604. // Initialize UTF8 cache.
  605. $mState = 0;
  606. $mUcs4 = 0;
  607. $mBytes = 1;
  608. }
  609. }
  610. else
  611. {
  612. /**
  613. *((0xC0 & (*in) != 0x80) && (mState != 0))
  614. * Incomplete multi-octet sequence.
  615. */
  616. return FALSE;
  617. }
  618. }
  619. }
  620. return TRUE;
  621. }
  622. /**
  623. * Tests whether a string complies as UTF-8. This will be much
  624. * faster than utf8_is_valid but will pass five and six octet
  625. * UTF-8 sequences, which are not supported by Unicode and
  626. * so cannot be displayed correctly in a browser. In other words
  627. * it is not as strict as utf8_is_valid but it's faster. If you use
  628. * is to validate user input, you place yourself at the risk that
  629. * attackers will be able to inject 5 and 6 byte sequences (which
  630. * may or may not be a significant risk, depending on what you are
  631. * are doing)
  632. * @see valid
  633. * @see http://www.php.net/manual/en/reference.pcre.pattern.modifiers.php#54805
  634. * @param string UTF-8 string to check
  635. * @return boolean TRUE if string is valid UTF-8
  636. * @since 11.1
  637. */
  638. public static function compliant($str)
  639. {
  640. if (strlen($str) == 0) {
  641. return TRUE;
  642. }
  643. // If even just the first character can be matched, when the /u
  644. // modifier is used, then it's valid UTF-8. If the UTF-8 is somehow
  645. // invalid, nothing at all will match, even if the string contains
  646. // some valid sequences
  647. return (preg_match('/^.{1}/us',$str,$ar) == 1);
  648. }
  649. /**
  650. * Does a UTF-8 safe version of PHP parse_url function
  651. * @see http://us3.php.net/manual/en/function.parse-url.php
  652. *
  653. * @param string URL to parse
  654. * @return associative array or false if badly formed URL.
  655. * @since 11.1
  656. */
  657. public static function parse_url($url) {
  658. $result = array();
  659. // Build arrays of values we need to decode before parsing
  660. $entities = array('%21', '%2A', '%27', '%28', '%29', '%3B', '%3A', '%40', '%26', '%3D', '%24', '%2C', '%2F', '%3F', '%25', '%23', '%5B', '%5D');
  661. $replacements = array('!', '*', "'", "(", ")", ";", ":", "@", "&", "=", "$", ",", "/", "?", "%", "#", "[", "]");
  662. // Create encoded URL with special URL characters decoded so it can be parsed
  663. // All other charcters will be encoded
  664. $encodedURL = str_replace($entities, $replacements, urlencode($url));
  665. // Parse the encoded URL
  666. $encodedParts = parse_url($encodedURL);
  667. // Was the string parsed or was it malformed? If it parsed, then decode.
  668. if ($encodedParts != false){
  669. // It parsed so now, decode each value of the resulting array
  670. foreach ($encodedParts as $key => $value) {
  671. $result[$key] = urldecode($value);
  672. }
  673. }
  674. return $result;
  675. }
  676. }