PageRenderTime 60ms CodeModel.GetById 17ms RepoModel.GetById 1ms app.codeStats 0ms

/lib/Cake/I18n/Multibyte.php

https://github.com/msahlihin/cakephp
PHP | 1136 lines | 660 code | 140 blank | 336 comment | 199 complexity | 082feb14489f5e9d0218dc8770c9583b MD5 | raw file
  1. <?php
  2. /**
  3. * Multibyte handling methods.
  4. *
  5. *
  6. * PHP 5
  7. *
  8. * CakePHP(tm) : Rapid Development Framework (http://cakephp.org)
  9. * Copyright 2005-2012, Cake Software Foundation, Inc. (http://cakefoundation.org)
  10. *
  11. * Licensed under The MIT License
  12. * Redistributions of files must retain the above copyright notice.
  13. *
  14. * @copyright Copyright 2005-2012, Cake Software Foundation, Inc. (http://cakefoundation.org)
  15. * @link http://cakephp.org CakePHP(tm) Project
  16. * @package Cake.I18n
  17. * @since CakePHP(tm) v 1.2.0.6833
  18. * @license MIT License (http://www.opensource.org/licenses/mit-license.php)
  19. */
  20. if (!function_exists('mb_stripos')) {
  21. /**
  22. * Find position of first occurrence of a case-insensitive string.
  23. *
  24. * @param string $haystack The string from which to get the position of the first occurrence of $needle.
  25. * @param string $needle The string to find in $haystack.
  26. * @param integer $offset The position in $haystack to start searching.
  27. * @param string $encoding Character encoding name to use. If it is omitted, internal character encoding is used.
  28. * @return integer|boolean The numeric position of the first occurrence of $needle in the $haystack string, or false
  29. * if $needle is not found.
  30. */
  31. function mb_stripos($haystack, $needle, $offset = 0, $encoding = null) {
  32. return Multibyte::stripos($haystack, $needle, $offset);
  33. }
  34. }
  35. if (!function_exists('mb_stristr')) {
  36. /**
  37. * Finds first occurrence of a string within another, case insensitive.
  38. *
  39. * @param string $haystack The string from which to get the first occurrence of $needle.
  40. * @param string $needle The string to find in $haystack.
  41. * @param boolean $part Determines which portion of $haystack this function returns.
  42. * If set to true, it returns all of $haystack from the beginning to the first occurrence of $needle.
  43. * If set to false, it returns all of $haystack from the first occurrence of $needle to the end,
  44. * Default value is false.
  45. * @param string $encoding Character encoding name to use. If it is omitted, internal character encoding is used.
  46. * @return string|boolean The portion of $haystack, or false if $needle is not found.
  47. */
  48. function mb_stristr($haystack, $needle, $part = false, $encoding = null) {
  49. return Multibyte::stristr($haystack, $needle, $part);
  50. }
  51. }
  52. if (!function_exists('mb_strlen')) {
  53. /**
  54. * Get string length.
  55. *
  56. * @param string $string The string being checked for length.
  57. * @param string $encoding Character encoding name to use. If it is omitted, internal character encoding is used.
  58. * @return integer The number of characters in string $string having character encoding encoding.
  59. * A multi-byte character is counted as 1.
  60. */
  61. function mb_strlen($string, $encoding = null) {
  62. return Multibyte::strlen($string);
  63. }
  64. }
  65. if (!function_exists('mb_strpos')) {
  66. /**
  67. * Find position of first occurrence of a string.
  68. *
  69. * @param string $haystack The string being checked.
  70. * @param string $needle The position counted from the beginning of haystack.
  71. * @param integer $offset The search offset. If it is not specified, 0 is used.
  72. * @param string $encoding Character encoding name to use. If it is omitted, internal character encoding is used.
  73. * @return integer|boolean The numeric position of the first occurrence of $needle in the $haystack string.
  74. * If $needle is not found, it returns false.
  75. */
  76. function mb_strpos($haystack, $needle, $offset = 0, $encoding = null) {
  77. return Multibyte::strpos($haystack, $needle, $offset);
  78. }
  79. }
  80. if (!function_exists('mb_strrchr')) {
  81. /**
  82. * Finds the last occurrence of a character in a string within another.
  83. *
  84. * @param string $haystack The string from which to get the last occurrence of $needle.
  85. * @param string $needle The string to find in $haystack.
  86. * @param boolean $part Determines which portion of $haystack this function returns.
  87. * If set to true, it returns all of $haystack from the beginning to the last occurrence of $needle.
  88. * If set to false, it returns all of $haystack from the last occurrence of $needle to the end,
  89. * Default value is false.
  90. * @param string $encoding Character encoding name to use. If it is omitted, internal character encoding is used.
  91. * @return string|boolean The portion of $haystack. or false if $needle is not found.
  92. */
  93. function mb_strrchr($haystack, $needle, $part = false, $encoding = null) {
  94. return Multibyte::strrchr($haystack, $needle, $part);
  95. }
  96. }
  97. if (!function_exists('mb_strrichr')) {
  98. /**
  99. * Finds the last occurrence of a character in a string within another, case insensitive.
  100. *
  101. * @param string $haystack The string from which to get the last occurrence of $needle.
  102. * @param string $needle The string to find in $haystack.
  103. * @param boolean $part Determines which portion of $haystack this function returns.
  104. * If set to true, it returns all of $haystack from the beginning to the last occurrence of $needle.
  105. * If set to false, it returns all of $haystack from the last occurrence of $needle to the end,
  106. * Default value is false.
  107. * @param string $encoding Character encoding name to use. If it is omitted, internal character encoding is used.
  108. * @return string|boolean The portion of $haystack. or false if $needle is not found.
  109. */
  110. function mb_strrichr($haystack, $needle, $part = false, $encoding = null) {
  111. return Multibyte::strrichr($haystack, $needle, $part);
  112. }
  113. }
  114. if (!function_exists('mb_strripos')) {
  115. /**
  116. * Finds position of last occurrence of a string within another, case insensitive
  117. *
  118. * @param string $haystack The string from which to get the position of the last occurrence of $needle.
  119. * @param string $needle The string to find in $haystack.
  120. * @param integer $offset The position in $haystack to start searching.
  121. * @param string $encoding Character encoding name to use. If it is omitted, internal character encoding is used.
  122. * @return integer|boolean The numeric position of the last occurrence of $needle in the $haystack string,
  123. * or false if $needle is not found.
  124. */
  125. function mb_strripos($haystack, $needle, $offset = 0, $encoding = null) {
  126. return Multibyte::strripos($haystack, $needle, $offset);
  127. }
  128. }
  129. if (!function_exists('mb_strrpos')) {
  130. /**
  131. * Find position of last occurrence of a string in a string.
  132. *
  133. * @param string $haystack The string being checked, for the last occurrence of $needle.
  134. * @param string $needle The string to find in $haystack.
  135. * @param integer $offset May be specified to begin searching an arbitrary number of characters into the string.
  136. * Negative values will stop searching at an arbitrary point prior to the end of the string.
  137. * @param string $encoding Character encoding name to use. If it is omitted, internal character encoding is used.
  138. * @return integer|boolean The numeric position of the last occurrence of $needle in the $haystack string.
  139. * If $needle is not found, it returns false.
  140. */
  141. function mb_strrpos($haystack, $needle, $offset = 0, $encoding = null) {
  142. return Multibyte::strrpos($haystack, $needle, $offset);
  143. }
  144. }
  145. if (!function_exists('mb_strstr')) {
  146. /**
  147. * Finds first occurrence of a string within another
  148. *
  149. * @param string $haystack The string from which to get the first occurrence of $needle.
  150. * @param string $needle The string to find in $haystack
  151. * @param boolean $part Determines which portion of $haystack this function returns.
  152. * If set to true, it returns all of $haystack from the beginning to the first occurrence of $needle.
  153. * If set to false, it returns all of $haystack from the first occurrence of $needle to the end,
  154. * Default value is FALSE.
  155. * @param string $encoding Character encoding name to use. If it is omitted, internal character encoding is used.
  156. * @return string|boolean The portion of $haystack, or true if $needle is not found.
  157. */
  158. function mb_strstr($haystack, $needle, $part = false, $encoding = null) {
  159. return Multibyte::strstr($haystack, $needle, $part);
  160. }
  161. }
  162. if (!function_exists('mb_strtolower')) {
  163. /**
  164. * Make a string lowercase
  165. *
  166. * @param string $string The string being lowercased.
  167. * @param string $encoding Character encoding name to use. If it is omitted, internal character encoding is used.
  168. * @return string with all alphabetic characters converted to lowercase.
  169. */
  170. function mb_strtolower($string, $encoding = null) {
  171. return Multibyte::strtolower($string);
  172. }
  173. }
  174. if (!function_exists('mb_strtoupper')) {
  175. /**
  176. * Make a string uppercase
  177. *
  178. * @param string $string The string being uppercased.
  179. * @param string $encoding Character encoding name to use. If it is omitted, internal character encoding is used.
  180. * @return string with all alphabetic characters converted to uppercase.
  181. */
  182. function mb_strtoupper($string, $encoding = null) {
  183. return Multibyte::strtoupper($string);
  184. }
  185. }
  186. if (!function_exists('mb_substr_count')) {
  187. /**
  188. * Count the number of substring occurrences
  189. *
  190. * @param string $haystack The string being checked.
  191. * @param string $needle The string being found.
  192. * @param string $encoding Character encoding name to use. If it is omitted, internal character encoding is used.
  193. * @return integer The number of times the $needle substring occurs in the $haystack string.
  194. */
  195. function mb_substr_count($haystack, $needle, $encoding = null) {
  196. return Multibyte::substrCount($haystack, $needle);
  197. }
  198. }
  199. if (!function_exists('mb_substr')) {
  200. /**
  201. * Get part of string
  202. *
  203. * @param string $string The string being checked.
  204. * @param integer $start The first position used in $string.
  205. * @param integer $length The maximum length of the returned string.
  206. * @param string $encoding Character encoding name to use. If it is omitted, internal character encoding is used.
  207. * @return string The portion of $string specified by the $string and $length parameters.
  208. */
  209. function mb_substr($string, $start, $length = null, $encoding = null) {
  210. return Multibyte::substr($string, $start, $length);
  211. }
  212. }
  213. if (!function_exists('mb_encode_mimeheader')) {
  214. /**
  215. * Encode string for MIME header
  216. *
  217. * @param string $str The string being encoded
  218. * @param string $charset specifies the name of the character set in which str is represented in.
  219. * The default value is determined by the current NLS setting (mbstring.language).
  220. * @param string $transfer_encoding specifies the scheme of MIME encoding.
  221. * It should be either "B" (Base64) or "Q" (Quoted-Printable). Falls back to "B" if not given.
  222. * @param string $linefeed specifies the EOL (end-of-line) marker with which
  223. * mb_encode_mimeheader() performs line-folding
  224. * (a Âť RFC term, the act of breaking a line longer than a certain length into multiple lines.
  225. * The length is currently hard-coded to 74 characters). Falls back to "\r\n" (CRLF) if not given.
  226. * @param integer $indent [definition unknown and appears to have no affect]
  227. * @return string A converted version of the string represented in ASCII.
  228. */
  229. function mb_encode_mimeheader($str, $charset = 'UTF-8', $transferEncoding = 'B', $linefeed = "\r\n", $indent = 1) {
  230. return Multibyte::mimeEncode($str, $charset, $linefeed);
  231. }
  232. }
  233. /**
  234. * Multibyte handling methods.
  235. *
  236. * @package Cake.I18n
  237. */
  238. class Multibyte {
  239. /**
  240. * Holds the case folding values
  241. *
  242. * @var array
  243. */
  244. protected static $_caseFold = array();
  245. /**
  246. * Holds an array of Unicode code point ranges
  247. *
  248. * @var array
  249. */
  250. protected static $_codeRange = array();
  251. /**
  252. * Holds the current code point range
  253. *
  254. * @var string
  255. */
  256. protected static $_table = null;
  257. /**
  258. * Converts a multibyte character string
  259. * to the decimal value of the character
  260. *
  261. * @param string $string
  262. * @return array
  263. */
  264. public static function utf8($string) {
  265. $map = array();
  266. $values = array();
  267. $find = 1;
  268. $length = strlen($string);
  269. for ($i = 0; $i < $length; $i++) {
  270. $value = ord($string[$i]);
  271. if ($value < 128) {
  272. $map[] = $value;
  273. } else {
  274. if (empty($values)) {
  275. $find = ($value < 224) ? 2 : 3;
  276. }
  277. $values[] = $value;
  278. if (count($values) === $find) {
  279. if ($find == 3) {
  280. $map[] = (($values[0] % 16) * 4096) + (($values[1] % 64) * 64) + ($values[2] % 64);
  281. } else {
  282. $map[] = (($values[0] % 32) * 64) + ($values[1] % 64);
  283. }
  284. $values = array();
  285. $find = 1;
  286. }
  287. }
  288. }
  289. return $map;
  290. }
  291. /**
  292. * Converts the decimal value of a multibyte character string
  293. * to a string
  294. *
  295. * @param array $array
  296. * @return string
  297. */
  298. public static function ascii($array) {
  299. $ascii = '';
  300. foreach ($array as $utf8) {
  301. if ($utf8 < 128) {
  302. $ascii .= chr($utf8);
  303. } elseif ($utf8 < 2048) {
  304. $ascii .= chr(192 + (($utf8 - ($utf8 % 64)) / 64));
  305. $ascii .= chr(128 + ($utf8 % 64));
  306. } else {
  307. $ascii .= chr(224 + (($utf8 - ($utf8 % 4096)) / 4096));
  308. $ascii .= chr(128 + ((($utf8 % 4096) - ($utf8 % 64)) / 64));
  309. $ascii .= chr(128 + ($utf8 % 64));
  310. }
  311. }
  312. return $ascii;
  313. }
  314. /**
  315. * Find position of first occurrence of a case-insensitive string.
  316. *
  317. * @param string $haystack The string from which to get the position of the first occurrence of $needle.
  318. * @param string $needle The string to find in $haystack.
  319. * @param integer $offset The position in $haystack to start searching.
  320. * @return integer|boolean The numeric position of the first occurrence of $needle in the $haystack string,
  321. * or false if $needle is not found.
  322. */
  323. public static function stripos($haystack, $needle, $offset = 0) {
  324. if (Multibyte::checkMultibyte($haystack)) {
  325. $haystack = Multibyte::strtoupper($haystack);
  326. $needle = Multibyte::strtoupper($needle);
  327. return Multibyte::strpos($haystack, $needle, $offset);
  328. }
  329. return stripos($haystack, $needle, $offset);
  330. }
  331. /**
  332. * Finds first occurrence of a string within another, case insensitive.
  333. *
  334. * @param string $haystack The string from which to get the first occurrence of $needle.
  335. * @param string $needle The string to find in $haystack.
  336. * @param boolean $part Determines which portion of $haystack this function returns.
  337. * If set to true, it returns all of $haystack from the beginning to the first occurrence of $needle.
  338. * If set to false, it returns all of $haystack from the first occurrence of $needle to the end,
  339. * Default value is false.
  340. * @return integer|boolean The portion of $haystack, or false if $needle is not found.
  341. */
  342. public static function stristr($haystack, $needle, $part = false) {
  343. $php = (PHP_VERSION < 5.3);
  344. if (($php && $part) || Multibyte::checkMultibyte($haystack)) {
  345. $check = Multibyte::strtoupper($haystack);
  346. $check = Multibyte::utf8($check);
  347. $found = false;
  348. $haystack = Multibyte::utf8($haystack);
  349. $haystackCount = count($haystack);
  350. $needle = Multibyte::strtoupper($needle);
  351. $needle = Multibyte::utf8($needle);
  352. $needleCount = count($needle);
  353. $parts = array();
  354. $position = 0;
  355. while (($found === false) && ($position < $haystackCount)) {
  356. if (isset($needle[0]) && $needle[0] === $check[$position]) {
  357. for ($i = 1; $i < $needleCount; $i++) {
  358. if ($needle[$i] !== $check[$position + $i]) {
  359. break;
  360. }
  361. }
  362. if ($i === $needleCount) {
  363. $found = true;
  364. }
  365. }
  366. if (!$found) {
  367. $parts[] = $haystack[$position];
  368. unset($haystack[$position]);
  369. }
  370. $position++;
  371. }
  372. if ($found && $part && !empty($parts)) {
  373. return Multibyte::ascii($parts);
  374. } elseif ($found && !empty($haystack)) {
  375. return Multibyte::ascii($haystack);
  376. }
  377. return false;
  378. }
  379. if (!$php) {
  380. return stristr($haystack, $needle, $part);
  381. }
  382. return stristr($haystack, $needle);
  383. }
  384. /**
  385. * Get string length.
  386. *
  387. * @param string $string The string being checked for length.
  388. * @return integer The number of characters in string $string
  389. */
  390. public static function strlen($string) {
  391. if (Multibyte::checkMultibyte($string)) {
  392. $string = Multibyte::utf8($string);
  393. return count($string);
  394. }
  395. return strlen($string);
  396. }
  397. /**
  398. * Find position of first occurrence of a string.
  399. *
  400. * @param string $haystack The string being checked.
  401. * @param string $needle The position counted from the beginning of haystack.
  402. * @param integer $offset The search offset. If it is not specified, 0 is used.
  403. * @return integer|boolean The numeric position of the first occurrence of $needle in the $haystack string.
  404. * If $needle is not found, it returns false.
  405. */
  406. public static function strpos($haystack, $needle, $offset = 0) {
  407. if (Multibyte::checkMultibyte($haystack)) {
  408. $found = false;
  409. $haystack = Multibyte::utf8($haystack);
  410. $haystackCount = count($haystack);
  411. $needle = Multibyte::utf8($needle);
  412. $needleCount = count($needle);
  413. $position = $offset;
  414. while (($found === false) && ($position < $haystackCount)) {
  415. if (isset($needle[0]) && $needle[0] === $haystack[$position]) {
  416. for ($i = 1; $i < $needleCount; $i++) {
  417. if ($needle[$i] !== $haystack[$position + $i]) {
  418. break;
  419. }
  420. }
  421. if ($i === $needleCount) {
  422. $found = true;
  423. $position--;
  424. }
  425. }
  426. $position++;
  427. }
  428. if ($found) {
  429. return $position;
  430. }
  431. return false;
  432. }
  433. return strpos($haystack, $needle, $offset);
  434. }
  435. /**
  436. * Finds the last occurrence of a character in a string within another.
  437. *
  438. * @param string $haystack The string from which to get the last occurrence of $needle.
  439. * @param string $needle The string to find in $haystack.
  440. * @param boolean $part Determines which portion of $haystack this function returns.
  441. * If set to true, it returns all of $haystack from the beginning to the last occurrence of $needle.
  442. * If set to false, it returns all of $haystack from the last occurrence of $needle to the end,
  443. * Default value is false.
  444. * @return string|boolean The portion of $haystack. or false if $needle is not found.
  445. */
  446. public static function strrchr($haystack, $needle, $part = false) {
  447. $check = Multibyte::utf8($haystack);
  448. $found = false;
  449. $haystack = Multibyte::utf8($haystack);
  450. $haystackCount = count($haystack);
  451. $matches = array_count_values($check);
  452. $needle = Multibyte::utf8($needle);
  453. $needleCount = count($needle);
  454. $parts = array();
  455. $position = 0;
  456. while (($found === false) && ($position < $haystackCount)) {
  457. if (isset($needle[0]) && $needle[0] === $check[$position]) {
  458. for ($i = 1; $i < $needleCount; $i++) {
  459. if ($needle[$i] !== $check[$position + $i]) {
  460. if ($needle[$i] === $check[($position + $i) - 1]) {
  461. $found = true;
  462. }
  463. unset($parts[$position - 1]);
  464. $haystack = array_merge(array($haystack[$position]), $haystack);
  465. break;
  466. }
  467. }
  468. if (isset($matches[$needle[0]]) && $matches[$needle[0]] > 1) {
  469. $matches[$needle[0]] = $matches[$needle[0]] - 1;
  470. } elseif ($i === $needleCount) {
  471. $found = true;
  472. }
  473. }
  474. if (!$found && isset($haystack[$position])) {
  475. $parts[] = $haystack[$position];
  476. unset($haystack[$position]);
  477. }
  478. $position++;
  479. }
  480. if ($found && $part && !empty($parts)) {
  481. return Multibyte::ascii($parts);
  482. } elseif ($found && !empty($haystack)) {
  483. return Multibyte::ascii($haystack);
  484. }
  485. return false;
  486. }
  487. /**
  488. * Finds the last occurrence of a character in a string within another, case insensitive.
  489. *
  490. * @param string $haystack The string from which to get the last occurrence of $needle.
  491. * @param string $needle The string to find in $haystack.
  492. * @param boolean $part Determines which portion of $haystack this function returns.
  493. * If set to true, it returns all of $haystack from the beginning to the last occurrence of $needle.
  494. * If set to false, it returns all of $haystack from the last occurrence of $needle to the end,
  495. * Default value is false.
  496. * @return string|boolean The portion of $haystack. or false if $needle is not found.
  497. */
  498. public static function strrichr($haystack, $needle, $part = false) {
  499. $check = Multibyte::strtoupper($haystack);
  500. $check = Multibyte::utf8($check);
  501. $found = false;
  502. $haystack = Multibyte::utf8($haystack);
  503. $haystackCount = count($haystack);
  504. $matches = array_count_values($check);
  505. $needle = Multibyte::strtoupper($needle);
  506. $needle = Multibyte::utf8($needle);
  507. $needleCount = count($needle);
  508. $parts = array();
  509. $position = 0;
  510. while (($found === false) && ($position < $haystackCount)) {
  511. if (isset($needle[0]) && $needle[0] === $check[$position]) {
  512. for ($i = 1; $i < $needleCount; $i++) {
  513. if ($needle[$i] !== $check[$position + $i]) {
  514. if ($needle[$i] === $check[($position + $i) - 1]) {
  515. $found = true;
  516. }
  517. unset($parts[$position - 1]);
  518. $haystack = array_merge(array($haystack[$position]), $haystack);
  519. break;
  520. }
  521. }
  522. if (isset($matches[$needle[0]]) && $matches[$needle[0]] > 1) {
  523. $matches[$needle[0]] = $matches[$needle[0]] - 1;
  524. } elseif ($i === $needleCount) {
  525. $found = true;
  526. }
  527. }
  528. if (!$found && isset($haystack[$position])) {
  529. $parts[] = $haystack[$position];
  530. unset($haystack[$position]);
  531. }
  532. $position++;
  533. }
  534. if ($found && $part && !empty($parts)) {
  535. return Multibyte::ascii($parts);
  536. } elseif ($found && !empty($haystack)) {
  537. return Multibyte::ascii($haystack);
  538. }
  539. return false;
  540. }
  541. /**
  542. * Finds position of last occurrence of a string within another, case insensitive
  543. *
  544. * @param string $haystack The string from which to get the position of the last occurrence of $needle.
  545. * @param string $needle The string to find in $haystack.
  546. * @param integer $offset The position in $haystack to start searching.
  547. * @return integer|boolean The numeric position of the last occurrence of $needle in the $haystack string,
  548. * or false if $needle is not found.
  549. */
  550. public static function strripos($haystack, $needle, $offset = 0) {
  551. if (Multibyte::checkMultibyte($haystack)) {
  552. $found = false;
  553. $haystack = Multibyte::strtoupper($haystack);
  554. $haystack = Multibyte::utf8($haystack);
  555. $haystackCount = count($haystack);
  556. $matches = array_count_values($haystack);
  557. $needle = Multibyte::strtoupper($needle);
  558. $needle = Multibyte::utf8($needle);
  559. $needleCount = count($needle);
  560. $position = $offset;
  561. while (($found === false) && ($position < $haystackCount)) {
  562. if (isset($needle[0]) && $needle[0] === $haystack[$position]) {
  563. for ($i = 1; $i < $needleCount; $i++) {
  564. if ($needle[$i] !== $haystack[$position + $i]) {
  565. if ($needle[$i] === $haystack[($position + $i) - 1]) {
  566. $position--;
  567. $found = true;
  568. continue;
  569. }
  570. }
  571. }
  572. if (!$offset && isset($matches[$needle[0]]) && $matches[$needle[0]] > 1) {
  573. $matches[$needle[0]] = $matches[$needle[0]] - 1;
  574. } elseif ($i === $needleCount) {
  575. $found = true;
  576. $position--;
  577. }
  578. }
  579. $position++;
  580. }
  581. return ($found) ? $position : false;
  582. }
  583. return strripos($haystack, $needle, $offset);
  584. }
  585. /**
  586. * Find position of last occurrence of a string in a string.
  587. *
  588. * @param string $haystack The string being checked, for the last occurrence of $needle.
  589. * @param string $needle The string to find in $haystack.
  590. * @param integer $offset May be specified to begin searching an arbitrary number of characters into the string.
  591. * Negative values will stop searching at an arbitrary point prior to the end of the string.
  592. * @return integer|boolean The numeric position of the last occurrence of $needle in the $haystack string.
  593. * If $needle is not found, it returns false.
  594. */
  595. public static function strrpos($haystack, $needle, $offset = 0) {
  596. if (Multibyte::checkMultibyte($haystack)) {
  597. $found = false;
  598. $haystack = Multibyte::utf8($haystack);
  599. $haystackCount = count($haystack);
  600. $matches = array_count_values($haystack);
  601. $needle = Multibyte::utf8($needle);
  602. $needleCount = count($needle);
  603. $position = $offset;
  604. while (($found === false) && ($position < $haystackCount)) {
  605. if (isset($needle[0]) && $needle[0] === $haystack[$position]) {
  606. for ($i = 1; $i < $needleCount; $i++) {
  607. if ($needle[$i] !== $haystack[$position + $i]) {
  608. if ($needle[$i] === $haystack[($position + $i) - 1]) {
  609. $position--;
  610. $found = true;
  611. continue;
  612. }
  613. }
  614. }
  615. if (!$offset && isset($matches[$needle[0]]) && $matches[$needle[0]] > 1) {
  616. $matches[$needle[0]] = $matches[$needle[0]] - 1;
  617. } elseif ($i === $needleCount) {
  618. $found = true;
  619. $position--;
  620. }
  621. }
  622. $position++;
  623. }
  624. return ($found) ? $position : false;
  625. }
  626. return strrpos($haystack, $needle, $offset);
  627. }
  628. /**
  629. * Finds first occurrence of a string within another
  630. *
  631. * @param string $haystack The string from which to get the first occurrence of $needle.
  632. * @param string $needle The string to find in $haystack
  633. * @param boolean $part Determines which portion of $haystack this function returns.
  634. * If set to true, it returns all of $haystack from the beginning to the first occurrence of $needle.
  635. * If set to false, it returns all of $haystack from the first occurrence of $needle to the end,
  636. * Default value is FALSE.
  637. * @return string|boolean The portion of $haystack, or true if $needle is not found.
  638. */
  639. public static function strstr($haystack, $needle, $part = false) {
  640. $php = (PHP_VERSION < 5.3);
  641. if (($php && $part) || Multibyte::checkMultibyte($haystack)) {
  642. $check = Multibyte::utf8($haystack);
  643. $found = false;
  644. $haystack = Multibyte::utf8($haystack);
  645. $haystackCount = count($haystack);
  646. $needle = Multibyte::utf8($needle);
  647. $needleCount = count($needle);
  648. $parts = array();
  649. $position = 0;
  650. while (($found === false) && ($position < $haystackCount)) {
  651. if (isset($needle[0]) && $needle[0] === $check[$position]) {
  652. for ($i = 1; $i < $needleCount; $i++) {
  653. if ($needle[$i] !== $check[$position + $i]) {
  654. break;
  655. }
  656. }
  657. if ($i === $needleCount) {
  658. $found = true;
  659. }
  660. }
  661. if (!$found) {
  662. $parts[] = $haystack[$position];
  663. unset($haystack[$position]);
  664. }
  665. $position++;
  666. }
  667. if ($found && $part && !empty($parts)) {
  668. return Multibyte::ascii($parts);
  669. } elseif ($found && !empty($haystack)) {
  670. return Multibyte::ascii($haystack);
  671. }
  672. return false;
  673. }
  674. if (!$php) {
  675. return strstr($haystack, $needle, $part);
  676. }
  677. return strstr($haystack, $needle);
  678. }
  679. /**
  680. * Make a string lowercase
  681. *
  682. * @param string $string The string being lowercased.
  683. * @return string with all alphabetic characters converted to lowercase.
  684. */
  685. public static function strtolower($string) {
  686. $utf8Map = Multibyte::utf8($string);
  687. $length = count($utf8Map);
  688. $lowerCase = array();
  689. for ($i = 0; $i < $length; $i++) {
  690. $char = $utf8Map[$i];
  691. if ($char < 128) {
  692. $str = strtolower(chr($char));
  693. $strlen = strlen($str);
  694. for ($ii = 0; $ii < $strlen; $ii++) {
  695. $lower = ord(substr($str, $ii, 1));
  696. }
  697. $lowerCase[] = $lower;
  698. $matched = true;
  699. } else {
  700. $matched = false;
  701. $keys = self::_find($char, 'upper');
  702. if (!empty($keys)) {
  703. foreach ($keys as $key => $value) {
  704. if ($keys[$key]['upper'] == $char && count($keys[$key]['lower'][0]) === 1) {
  705. $lowerCase[] = $keys[$key]['lower'][0];
  706. $matched = true;
  707. break 1;
  708. }
  709. }
  710. }
  711. }
  712. if ($matched === false) {
  713. $lowerCase[] = $char;
  714. }
  715. }
  716. return Multibyte::ascii($lowerCase);
  717. }
  718. /**
  719. * Make a string uppercase
  720. *
  721. * @param string $string The string being uppercased.
  722. * @return string with all alphabetic characters converted to uppercase.
  723. */
  724. public static function strtoupper($string) {
  725. $utf8Map = Multibyte::utf8($string);
  726. $length = count($utf8Map);
  727. $replaced = array();
  728. $upperCase = array();
  729. for ($i = 0; $i < $length; $i++) {
  730. $char = $utf8Map[$i];
  731. if ($char < 128) {
  732. $str = strtoupper(chr($char));
  733. $strlen = strlen($str);
  734. for ($ii = 0; $ii < $strlen; $ii++) {
  735. $upper = ord(substr($str, $ii, 1));
  736. }
  737. $upperCase[] = $upper;
  738. $matched = true;
  739. } else {
  740. $matched = false;
  741. $keys = self::_find($char);
  742. $keyCount = count($keys);
  743. if (!empty($keys)) {
  744. foreach ($keys as $key => $value) {
  745. $matched = false;
  746. $replace = 0;
  747. if ($length > 1 && count($keys[$key]['lower']) > 1) {
  748. $j = 0;
  749. for ($ii = 0, $count = count($keys[$key]['lower']); $ii < $count; $ii++) {
  750. $nextChar = $utf8Map[$i + $ii];
  751. if (isset($nextChar) && ($nextChar == $keys[$key]['lower'][$j + $ii])) {
  752. $replace++;
  753. }
  754. }
  755. if ($replace == $count) {
  756. $upperCase[] = $keys[$key]['upper'];
  757. $replaced = array_merge($replaced, array_values($keys[$key]['lower']));
  758. $matched = true;
  759. break 1;
  760. }
  761. } elseif ($length > 1 && $keyCount > 1) {
  762. $j = 0;
  763. for ($ii = 1; $ii < $keyCount; $ii++) {
  764. $nextChar = $utf8Map[$i + $ii - 1];
  765. if (in_array($nextChar, $keys[$ii]['lower'])) {
  766. for ($jj = 0, $count = count($keys[$ii]['lower']); $jj < $count; $jj++) {
  767. $nextChar = $utf8Map[$i + $jj];
  768. if (isset($nextChar) && ($nextChar == $keys[$ii]['lower'][$j + $jj])) {
  769. $replace++;
  770. }
  771. }
  772. if ($replace == $count) {
  773. $upperCase[] = $keys[$ii]['upper'];
  774. $replaced = array_merge($replaced, array_values($keys[$ii]['lower']));
  775. $matched = true;
  776. break 2;
  777. }
  778. }
  779. }
  780. }
  781. if ($keys[$key]['lower'][0] == $char) {
  782. $upperCase[] = $keys[$key]['upper'];
  783. $matched = true;
  784. break 1;
  785. }
  786. }
  787. }
  788. }
  789. if ($matched === false && !in_array($char, $replaced, true)) {
  790. $upperCase[] = $char;
  791. }
  792. }
  793. return Multibyte::ascii($upperCase);
  794. }
  795. /**
  796. * Count the number of substring occurrences
  797. *
  798. * @param string $haystack The string being checked.
  799. * @param string $needle The string being found.
  800. * @return integer The number of times the $needle substring occurs in the $haystack string.
  801. */
  802. public static function substrCount($haystack, $needle) {
  803. $count = 0;
  804. $haystack = Multibyte::utf8($haystack);
  805. $haystackCount = count($haystack);
  806. $matches = array_count_values($haystack);
  807. $needle = Multibyte::utf8($needle);
  808. $needleCount = count($needle);
  809. if ($needleCount === 1 && isset($matches[$needle[0]])) {
  810. return $matches[$needle[0]];
  811. }
  812. for ($i = 0; $i < $haystackCount; $i++) {
  813. if (isset($needle[0]) && $needle[0] === $haystack[$i]) {
  814. for ($ii = 1; $ii < $needleCount; $ii++) {
  815. if ($needle[$ii] === $haystack[$i + 1]) {
  816. if ((isset($needle[$ii + 1]) && $haystack[$i + 2]) && $needle[$ii + 1] !== $haystack[$i + 2]) {
  817. $count--;
  818. } else {
  819. $count++;
  820. }
  821. }
  822. }
  823. }
  824. }
  825. return $count;
  826. }
  827. /**
  828. * Get part of string
  829. *
  830. * @param string $string The string being checked.
  831. * @param integer $start The first position used in $string.
  832. * @param integer $length The maximum length of the returned string.
  833. * @return string The portion of $string specified by the $string and $length parameters.
  834. */
  835. public static function substr($string, $start, $length = null) {
  836. if ($start === 0 && $length === null) {
  837. return $string;
  838. }
  839. $string = Multibyte::utf8($string);
  840. for ($i = 1; $i <= $start; $i++) {
  841. unset($string[$i - 1]);
  842. }
  843. if ($length === null || count($string) < $length) {
  844. return Multibyte::ascii($string);
  845. }
  846. $string = array_values($string);
  847. $value = array();
  848. for ($i = 0; $i < $length; $i++) {
  849. $value[] = $string[$i];
  850. }
  851. return Multibyte::ascii($value);
  852. }
  853. /**
  854. * Prepare a string for mail transport, using the provided encoding
  855. *
  856. * @param string $string value to encode
  857. * @param string $charset charset to use for encoding. defaults to UTF-8
  858. * @param string $newline
  859. * @return string
  860. * @TODO: add support for 'Q'('Quoted Printable') encoding
  861. */
  862. public static function mimeEncode($string, $charset = null, $newline = "\r\n") {
  863. if (!Multibyte::checkMultibyte($string) && strlen($string) < 75) {
  864. return $string;
  865. }
  866. if (empty($charset)) {
  867. $charset = Configure::read('App.encoding');
  868. }
  869. $charset = strtoupper($charset);
  870. $start = '=?' . $charset . '?B?';
  871. $end = '?=';
  872. $spacer = $end . $newline . ' ' . $start;
  873. $length = 75 - strlen($start) - strlen($end);
  874. $length = $length - ($length % 4);
  875. if ($charset == 'UTF-8') {
  876. $parts = array();
  877. $maxchars = floor(($length * 3) / 4);
  878. $stringLength = strlen($string);
  879. while ($stringLength > $maxchars) {
  880. $i = (int)$maxchars;
  881. $test = ord($string[$i]);
  882. while ($test >= 128 && $test <= 191) {
  883. $i--;
  884. $test = ord($string[$i]);
  885. }
  886. $parts[] = base64_encode(substr($string, 0, $i));
  887. $string = substr($string, $i);
  888. $stringLength = strlen($string);
  889. }
  890. $parts[] = base64_encode($string);
  891. $string = implode($spacer, $parts);
  892. } else {
  893. $string = chunk_split(base64_encode($string), $length, $spacer);
  894. $string = preg_replace('/' . preg_quote($spacer) . '$/', '', $string);
  895. }
  896. return $start . $string . $end;
  897. }
  898. /**
  899. * Return the Code points range for Unicode characters
  900. *
  901. * @param integer $decimal
  902. * @return string
  903. */
  904. protected static function _codepoint($decimal) {
  905. if ($decimal > 128 && $decimal < 256) {
  906. $return = '0080_00ff'; // Latin-1 Supplement
  907. } elseif ($decimal < 384) {
  908. $return = '0100_017f'; // Latin Extended-A
  909. } elseif ($decimal < 592) {
  910. $return = '0180_024F'; // Latin Extended-B
  911. } elseif ($decimal < 688) {
  912. $return = '0250_02af'; // IPA Extensions
  913. } elseif ($decimal >= 880 && $decimal < 1024) {
  914. $return = '0370_03ff'; // Greek and Coptic
  915. } elseif ($decimal < 1280) {
  916. $return = '0400_04ff'; // Cyrillic
  917. } elseif ($decimal < 1328) {
  918. $return = '0500_052f'; // Cyrillic Supplement
  919. } elseif ($decimal < 1424) {
  920. $return = '0530_058f'; // Armenian
  921. } elseif ($decimal >= 7680 && $decimal < 7936) {
  922. $return = '1e00_1eff'; // Latin Extended Additional
  923. } elseif ($decimal < 8192) {
  924. $return = '1f00_1fff'; // Greek Extended
  925. } elseif ($decimal >= 8448 && $decimal < 8528) {
  926. $return = '2100_214f'; // Letterlike Symbols
  927. } elseif ($decimal < 8592) {
  928. $return = '2150_218f'; // Number Forms
  929. } elseif ($decimal >= 9312 && $decimal < 9472) {
  930. $return = '2460_24ff'; // Enclosed Alphanumerics
  931. } elseif ($decimal >= 11264 && $decimal < 11360) {
  932. $return = '2c00_2c5f'; // Glagolitic
  933. } elseif ($decimal < 11392) {
  934. $return = '2c60_2c7f'; // Latin Extended-C
  935. } elseif ($decimal < 11520) {
  936. $return = '2c80_2cff'; // Coptic
  937. } elseif ($decimal >= 65280 && $decimal < 65520) {
  938. $return = 'ff00_ffef'; // Halfwidth and Fullwidth Forms
  939. } else {
  940. $return = false;
  941. }
  942. self::$_codeRange[$decimal] = $return;
  943. return $return;
  944. }
  945. /**
  946. * Find the related code folding values for $char
  947. *
  948. * @param integer $char decimal value of character
  949. * @param string $type
  950. * @return array
  951. */
  952. protected static function _find($char, $type = 'lower') {
  953. $found = array();
  954. if (!isset(self::$_codeRange[$char])) {
  955. $range = self::_codepoint($char);
  956. if ($range === false) {
  957. return null;
  958. }
  959. if (!Configure::configured('_cake_core_')) {
  960. App::uses('PhpReader', 'Configure');
  961. Configure::config('_cake_core_', new PhpReader(CAKE . 'Config' . DS));
  962. }
  963. Configure::load('unicode' . DS . 'casefolding' . DS . $range, '_cake_core_');
  964. self::$_caseFold[$range] = Configure::read($range);
  965. Configure::delete($range);
  966. }
  967. if (!self::$_codeRange[$char]) {
  968. return null;
  969. }
  970. self::$_table = self::$_codeRange[$char];
  971. $count = count(self::$_caseFold[self::$_table]);
  972. for ($i = 0; $i < $count; $i++) {
  973. if ($type === 'lower' && self::$_caseFold[self::$_table][$i][$type][0] === $char) {
  974. $found[] = self::$_caseFold[self::$_table][$i];
  975. } elseif ($type === 'upper' && self::$_caseFold[self::$_table][$i][$type] === $char) {
  976. $found[] = self::$_caseFold[self::$_table][$i];
  977. }
  978. }
  979. return $found;
  980. }
  981. /**
  982. * Check the $string for multibyte characters
  983. * @param string $string value to test
  984. * @return boolean
  985. */
  986. public static function checkMultibyte($string) {
  987. $length = strlen($string);
  988. for ($i = 0; $i < $length; $i++ ) {
  989. $value = ord(($string[$i]));
  990. if ($value > 128) {
  991. return true;
  992. }
  993. }
  994. return false;
  995. }
  996. }