PageRenderTime 44ms CodeModel.GetById 15ms RepoModel.GetById 0ms app.codeStats 1ms

/lib/Cake/I18n/Multibyte.php

https://bitbucket.org/udeshika/fake_twitter
PHP | 1106 lines | 658 code | 111 blank | 337 comment | 199 complexity | efcb867010de53d3bf4ad7b9e9731696 MD5 | raw file
  1. <?php
  2. /**
  3. * Multibyte handling methods.
  4. *
  5. *
  6. * PHP 5
  7. *
  8. * CakePHP(tm) : Rapid Development Framework (http://cakephp.org)
  9. * Copyright 2005-2011, Cake Software Foundation, Inc. (http://cakefoundation.org)
  10. *
  11. * Licensed under The MIT License
  12. * Redistributions of files must retain the above copyright notice.
  13. *
  14. * @copyright Copyright 2005-2011, Cake Software Foundation, Inc. (http://cakefoundation.org)
  15. * @link http://cakephp.org CakePHP(tm) Project
  16. * @package Cake.I18n
  17. * @since CakePHP(tm) v 1.2.0.6833
  18. * @license MIT License (http://www.opensource.org/licenses/mit-license.php)
  19. */
  20. /**
  21. * Find position of first occurrence of a case-insensitive string.
  22. *
  23. * @param string $haystack The string from which to get the position of the first occurrence of $needle.
  24. * @param string $needle The string to find in $haystack.
  25. * @param integer $offset The position in $haystack to start searching.
  26. * @param string $encoding Character encoding name to use. If it is omitted, internal character encoding is used.
  27. * @return integer|boolean The numeric position of the first occurrence of $needle in the $haystack string, or false
  28. * if $needle is not found.
  29. */
  30. if (!function_exists('mb_stripos')) {
  31. function mb_stripos($haystack, $needle, $offset = 0, $encoding = null) {
  32. return Multibyte::stripos($haystack, $needle, $offset);
  33. }
  34. }
  35. /**
  36. * Finds first occurrence of a string within another, case insensitive.
  37. *
  38. * @param string $haystack The string from which to get the first occurrence of $needle.
  39. * @param string $needle The string to find in $haystack.
  40. * @param boolean $part Determines which portion of $haystack this function returns.
  41. * If set to true, it returns all of $haystack from the beginning to the first occurrence of $needle.
  42. * If set to false, it returns all of $haystack from the first occurrence of $needle to the end,
  43. * Default value is false.
  44. * @param string $encoding Character encoding name to use. If it is omitted, internal character encoding is used.
  45. * @return string|boolean The portion of $haystack, or false if $needle is not found.
  46. */
  47. if (!function_exists('mb_stristr')) {
  48. function mb_stristr($haystack, $needle, $part = false, $encoding = null) {
  49. return Multibyte::stristr($haystack, $needle, $part);
  50. }
  51. }
  52. /**
  53. * Get string length.
  54. *
  55. * @param string $string The string being checked for length.
  56. * @param string $encoding Character encoding name to use. If it is omitted, internal character encoding is used.
  57. * @return integer The number of characters in string $string having character encoding encoding.
  58. * A multi-byte character is counted as 1.
  59. */
  60. if (!function_exists('mb_strlen')) {
  61. function mb_strlen($string, $encoding = null) {
  62. return Multibyte::strlen($string);
  63. }
  64. }
  65. /**
  66. * Find position of first occurrence of a string.
  67. *
  68. * @param string $haystack The string being checked.
  69. * @param string $needle The position counted from the beginning of haystack.
  70. * @param integer $offset The search offset. If it is not specified, 0 is used.
  71. * @param string $encoding Character encoding name to use. If it is omitted, internal character encoding is used.
  72. * @return integer|boolean The numeric position of the first occurrence of $needle in the $haystack string.
  73. * If $needle is not found, it returns false.
  74. */
  75. if (!function_exists('mb_strpos')) {
  76. function mb_strpos($haystack, $needle, $offset = 0, $encoding = null) {
  77. return Multibyte::strpos($haystack, $needle, $offset);
  78. }
  79. }
  80. /**
  81. * Finds the last occurrence of a character in a string within another.
  82. *
  83. * @param string $haystack The string from which to get the last occurrence of $needle.
  84. * @param string $needle The string to find in $haystack.
  85. * @param boolean $part Determines which portion of $haystack this function returns.
  86. * If set to true, it returns all of $haystack from the beginning to the last occurrence of $needle.
  87. * If set to false, it returns all of $haystack from the last occurrence of $needle to the end,
  88. * Default value is false.
  89. * @param string $encoding Character encoding name to use. If it is omitted, internal character encoding is used.
  90. * @return string|boolean The portion of $haystack. or false if $needle is not found.
  91. */
  92. if (!function_exists('mb_strrchr')) {
  93. function mb_strrchr($haystack, $needle, $part = false, $encoding = null) {
  94. return Multibyte::strrchr($haystack, $needle, $part);
  95. }
  96. }
  97. /**
  98. * Finds the last occurrence of a character in a string within another, case insensitive.
  99. *
  100. * @param string $haystack The string from which to get the last occurrence of $needle.
  101. * @param string $needle The string to find in $haystack.
  102. * @param boolean $part Determines which portion of $haystack this function returns.
  103. * If set to true, it returns all of $haystack from the beginning to the last occurrence of $needle.
  104. * If set to false, it returns all of $haystack from the last occurrence of $needle to the end,
  105. * Default value is false.
  106. * @param string $encoding Character encoding name to use. If it is omitted, internal character encoding is used.
  107. * @return string|boolean The portion of $haystack. or false if $needle is not found.
  108. */
  109. if (!function_exists('mb_strrichr')) {
  110. function mb_strrichr($haystack, $needle, $part = false, $encoding = null) {
  111. return Multibyte::strrichr($haystack, $needle, $part);
  112. }
  113. }
  114. /**
  115. * Finds position of last occurrence of a string within another, case insensitive
  116. *
  117. * @param string $haystack The string from which to get the position of the last occurrence of $needle.
  118. * @param string $needle The string to find in $haystack.
  119. * @param integer $offset The position in $haystack to start searching.
  120. * @param string $encoding Character encoding name to use. If it is omitted, internal character encoding is used.
  121. * @return integer|boolean The numeric position of the last occurrence of $needle in the $haystack string,
  122. * or false if $needle is not found.
  123. */
  124. if (!function_exists('mb_strripos')) {
  125. function mb_strripos($haystack, $needle, $offset = 0, $encoding = null) {
  126. return Multibyte::strripos($haystack, $needle, $offset);
  127. }
  128. }
  129. /**
  130. * Find position of last occurrence of a string in a string.
  131. *
  132. * @param string $haystack The string being checked, for the last occurrence of $needle.
  133. * @param string $needle The string to find in $haystack.
  134. * @param integer $offset May be specified to begin searching an arbitrary number of characters into the string.
  135. * Negative values will stop searching at an arbitrary point prior to the end of the string.
  136. * @param string $encoding Character encoding name to use. If it is omitted, internal character encoding is used.
  137. * @return integer|boolean The numeric position of the last occurrence of $needle in the $haystack string.
  138. * If $needle is not found, it returns false.
  139. */
  140. if (!function_exists('mb_strrpos')) {
  141. function mb_strrpos($haystack, $needle, $offset = 0, $encoding = null) {
  142. return Multibyte::strrpos($haystack, $needle, $offset);
  143. }
  144. }
  145. /**
  146. * Finds first occurrence of a string within another
  147. *
  148. * @param string $haystack The string from which to get the first occurrence of $needle.
  149. * @param string $needle The string to find in $haystack
  150. * @param boolean $part Determines which portion of $haystack this function returns.
  151. * If set to true, it returns all of $haystack from the beginning to the first occurrence of $needle.
  152. * If set to false, it returns all of $haystack from the first occurrence of $needle to the end,
  153. * Default value is FALSE.
  154. * @param string $encoding Character encoding name to use. If it is omitted, internal character encoding is used.
  155. * @return string|boolean The portion of $haystack, or true if $needle is not found.
  156. */
  157. if (!function_exists('mb_strstr')) {
  158. function mb_strstr($haystack, $needle, $part = false, $encoding = null) {
  159. return Multibyte::strstr($haystack, $needle, $part);
  160. }
  161. }
  162. /**
  163. * Make a string lowercase
  164. *
  165. * @param string $string The string being lowercased.
  166. * @param string $encoding Character encoding name to use. If it is omitted, internal character encoding is used.
  167. * @return string with all alphabetic characters converted to lowercase.
  168. */
  169. if (!function_exists('mb_strtolower')) {
  170. function mb_strtolower($string, $encoding = null) {
  171. return Multibyte::strtolower($string);
  172. }
  173. }
  174. /**
  175. * Make a string uppercase
  176. *
  177. * @param string $string The string being uppercased.
  178. * @param string $encoding Character encoding name to use. If it is omitted, internal character encoding is used.
  179. * @return string with all alphabetic characters converted to uppercase.
  180. */
  181. if (!function_exists('mb_strtoupper')) {
  182. function mb_strtoupper($string, $encoding = null) {
  183. return Multibyte::strtoupper($string);
  184. }
  185. }
  186. /**
  187. * Count the number of substring occurrences
  188. *
  189. * @param string $haystack The string being checked.
  190. * @param string $needle The string being found.
  191. * @param string $encoding Character encoding name to use. If it is omitted, internal character encoding is used.
  192. * @return integer The number of times the $needle substring occurs in the $haystack string.
  193. */
  194. if (!function_exists('mb_substr_count')) {
  195. function mb_substr_count($haystack, $needle, $encoding = null) {
  196. return Multibyte::substrCount($haystack, $needle);
  197. }
  198. }
  199. /**
  200. * Get part of string
  201. *
  202. * @param string $string The string being checked.
  203. * @param integer $start The first position used in $string.
  204. * @param integer $length The maximum length of the returned string.
  205. * @param string $encoding Character encoding name to use. If it is omitted, internal character encoding is used.
  206. * @return string The portion of $string specified by the $string and $length parameters.
  207. */
  208. if (!function_exists('mb_substr')) {
  209. function mb_substr($string, $start, $length = null, $encoding = null) {
  210. return Multibyte::substr($string, $start, $length);
  211. }
  212. }
  213. /**
  214. * Encode string for MIME header
  215. *
  216. * @param string $str The string being encoded
  217. * @param string $charset specifies the name of the character set in which str is represented in.
  218. * The default value is determined by the current NLS setting (mbstring.language).
  219. * @param string $transfer_encoding specifies the scheme of MIME encoding.
  220. * It should be either "B" (Base64) or "Q" (Quoted-Printable). Falls back to "B" if not given.
  221. * @param string $linefeed specifies the EOL (end-of-line) marker with which
  222. * mb_encode_mimeheader() performs line-folding
  223. * (a Âť RFC term, the act of breaking a line longer than a certain length into multiple lines.
  224. * The length is currently hard-coded to 74 characters). Falls back to "\r\n" (CRLF) if not given.
  225. * @param integer $indent [definition unknown and appears to have no affect]
  226. * @return string A converted version of the string represented in ASCII.
  227. */
  228. if (!function_exists('mb_encode_mimeheader')) {
  229. function mb_encode_mimeheader($str, $charset = 'UTF-8', $transfer_encoding = 'B', $linefeed = "\r\n", $indent = 1) {
  230. return Multibyte::mimeEncode($str, $charset, $linefeed);
  231. }
  232. }
  233. /**
  234. * Multibyte handling methods.
  235. *
  236. *
  237. * @package Cake.I18n
  238. */
  239. class Multibyte {
  240. /**
  241. * Holds the case folding values
  242. *
  243. * @var array
  244. */
  245. protected static $_caseFold = array();
  246. /**
  247. * Holds an array of Unicode code point ranges
  248. *
  249. * @var array
  250. */
  251. protected static $_codeRange = array();
  252. /**
  253. * Holds the current code point range
  254. *
  255. * @var string
  256. */
  257. protected static $_table = null;
  258. /**
  259. * Converts a multibyte character string
  260. * to the decimal value of the character
  261. *
  262. * @param string $string
  263. * @return array
  264. */
  265. public static function utf8($string) {
  266. $map = array();
  267. $values = array();
  268. $find = 1;
  269. $length = strlen($string);
  270. for ($i = 0; $i < $length; $i++) {
  271. $value = ord($string[$i]);
  272. if ($value < 128) {
  273. $map[] = $value;
  274. } else {
  275. if (empty($values)) {
  276. $find = ($value < 224) ? 2 : 3;
  277. }
  278. $values[] = $value;
  279. if (count($values) === $find) {
  280. if ($find == 3) {
  281. $map[] = (($values[0] % 16) * 4096) + (($values[1] % 64) * 64) + ($values[2] % 64);
  282. } else {
  283. $map[] = (($values[0] % 32) * 64) + ($values[1] % 64);
  284. }
  285. $values = array();
  286. $find = 1;
  287. }
  288. }
  289. }
  290. return $map;
  291. }
  292. /**
  293. * Converts the decimal value of a multibyte character string
  294. * to a string
  295. *
  296. * @param array $array
  297. * @return string
  298. */
  299. public static function ascii($array) {
  300. $ascii = '';
  301. foreach ($array as $utf8) {
  302. if ($utf8 < 128) {
  303. $ascii .= chr($utf8);
  304. } elseif ($utf8 < 2048) {
  305. $ascii .= chr(192 + (($utf8 - ($utf8 % 64)) / 64));
  306. $ascii .= chr(128 + ($utf8 % 64));
  307. } else {
  308. $ascii .= chr(224 + (($utf8 - ($utf8 % 4096)) / 4096));
  309. $ascii .= chr(128 + ((($utf8 % 4096) - ($utf8 % 64)) / 64));
  310. $ascii .= chr(128 + ($utf8 % 64));
  311. }
  312. }
  313. return $ascii;
  314. }
  315. /**
  316. * Find position of first occurrence of a case-insensitive string.
  317. *
  318. * @param string $haystack The string from which to get the position of the first occurrence of $needle.
  319. * @param string $needle The string to find in $haystack.
  320. * @param integer $offset The position in $haystack to start searching.
  321. * @return integer|boolean The numeric position of the first occurrence of $needle in the $haystack string,
  322. * or false if $needle is not found.
  323. */
  324. public static function stripos($haystack, $needle, $offset = 0) {
  325. if (Multibyte::checkMultibyte($haystack)) {
  326. $haystack = Multibyte::strtoupper($haystack);
  327. $needle = Multibyte::strtoupper($needle);
  328. return Multibyte::strpos($haystack, $needle, $offset);
  329. }
  330. return stripos($haystack, $needle, $offset);
  331. }
  332. /**
  333. * Finds first occurrence of a string within another, case insensitive.
  334. *
  335. * @param string $haystack The string from which to get the first occurrence of $needle.
  336. * @param string $needle The string to find in $haystack.
  337. * @param boolean $part Determines which portion of $haystack this function returns.
  338. * If set to true, it returns all of $haystack from the beginning to the first occurrence of $needle.
  339. * If set to false, it returns all of $haystack from the first occurrence of $needle to the end,
  340. * Default value is false.
  341. * @return integer|boolean The portion of $haystack, or false if $needle is not found.
  342. */
  343. public static function stristr($haystack, $needle, $part = false) {
  344. $php = (PHP_VERSION < 5.3);
  345. if (($php && $part) || Multibyte::checkMultibyte($haystack)) {
  346. $check = Multibyte::strtoupper($haystack);
  347. $check = Multibyte::utf8($check);
  348. $found = false;
  349. $haystack = Multibyte::utf8($haystack);
  350. $haystackCount = count($haystack);
  351. $needle = Multibyte::strtoupper($needle);
  352. $needle = Multibyte::utf8($needle);
  353. $needleCount = count($needle);
  354. $parts = array();
  355. $position = 0;
  356. while (($found === false) && ($position < $haystackCount)) {
  357. if (isset($needle[0]) && $needle[0] === $check[$position]) {
  358. for ($i = 1; $i < $needleCount; $i++) {
  359. if ($needle[$i] !== $check[$position + $i]) {
  360. break;
  361. }
  362. }
  363. if ($i === $needleCount) {
  364. $found = true;
  365. }
  366. }
  367. if (!$found) {
  368. $parts[] = $haystack[$position];
  369. unset($haystack[$position]);
  370. }
  371. $position++;
  372. }
  373. if ($found && $part && !empty($parts)) {
  374. return Multibyte::ascii($parts);
  375. } elseif ($found && !empty($haystack)) {
  376. return Multibyte::ascii($haystack);
  377. }
  378. return false;
  379. }
  380. if (!$php) {
  381. return stristr($haystack, $needle, $part);
  382. }
  383. return stristr($haystack, $needle);
  384. }
  385. /**
  386. * Get string length.
  387. *
  388. * @param string $string The string being checked for length.
  389. * @return integer The number of characters in string $string
  390. */
  391. public static function strlen($string) {
  392. if (Multibyte::checkMultibyte($string)) {
  393. $string = Multibyte::utf8($string);
  394. return count($string);
  395. }
  396. return strlen($string);
  397. }
  398. /**
  399. * Find position of first occurrence of a string.
  400. *
  401. * @param string $haystack The string being checked.
  402. * @param string $needle The position counted from the beginning of haystack.
  403. * @param integer $offset The search offset. If it is not specified, 0 is used.
  404. * @return integer|boolean The numeric position of the first occurrence of $needle in the $haystack string.
  405. * If $needle is not found, it returns false.
  406. */
  407. public static function strpos($haystack, $needle, $offset = 0) {
  408. if (Multibyte::checkMultibyte($haystack)) {
  409. $found = false;
  410. $haystack = Multibyte::utf8($haystack);
  411. $haystackCount = count($haystack);
  412. $needle = Multibyte::utf8($needle);
  413. $needleCount = count($needle);
  414. $position = $offset;
  415. while (($found === false) && ($position < $haystackCount)) {
  416. if (isset($needle[0]) && $needle[0] === $haystack[$position]) {
  417. for ($i = 1; $i < $needleCount; $i++) {
  418. if ($needle[$i] !== $haystack[$position + $i]) {
  419. break;
  420. }
  421. }
  422. if ($i === $needleCount) {
  423. $found = true;
  424. $position--;
  425. }
  426. }
  427. $position++;
  428. }
  429. if ($found) {
  430. return $position;
  431. }
  432. return false;
  433. }
  434. return strpos($haystack, $needle, $offset);
  435. }
  436. /**
  437. * Finds the last occurrence of a character in a string within another.
  438. *
  439. * @param string $haystack The string from which to get the last occurrence of $needle.
  440. * @param string $needle The string to find in $haystack.
  441. * @param boolean $part Determines which portion of $haystack this function returns.
  442. * If set to true, it returns all of $haystack from the beginning to the last occurrence of $needle.
  443. * If set to false, it returns all of $haystack from the last occurrence of $needle to the end,
  444. * Default value is false.
  445. * @return string|boolean The portion of $haystack. or false if $needle is not found.
  446. */
  447. public static function strrchr($haystack, $needle, $part = false) {
  448. $check = Multibyte::utf8($haystack);
  449. $found = false;
  450. $haystack = Multibyte::utf8($haystack);
  451. $haystackCount = count($haystack);
  452. $matches = array_count_values($check);
  453. $needle = Multibyte::utf8($needle);
  454. $needleCount = count($needle);
  455. $parts = array();
  456. $position = 0;
  457. while (($found === false) && ($position < $haystackCount)) {
  458. if (isset($needle[0]) && $needle[0] === $check[$position]) {
  459. for ($i = 1; $i < $needleCount; $i++) {
  460. if ($needle[$i] !== $check[$position + $i]) {
  461. if ($needle[$i] === $check[($position + $i) -1]) {
  462. $found = true;
  463. }
  464. unset($parts[$position - 1]);
  465. $haystack = array_merge(array($haystack[$position]), $haystack);
  466. break;
  467. }
  468. }
  469. if (isset($matches[$needle[0]]) && $matches[$needle[0]] > 1) {
  470. $matches[$needle[0]] = $matches[$needle[0]] - 1;
  471. } elseif ($i === $needleCount) {
  472. $found = true;
  473. }
  474. }
  475. if (!$found && isset($haystack[$position])) {
  476. $parts[] = $haystack[$position];
  477. unset($haystack[$position]);
  478. }
  479. $position++;
  480. }
  481. if ($found && $part && !empty($parts)) {
  482. return Multibyte::ascii($parts);
  483. } elseif ($found && !empty($haystack)) {
  484. return Multibyte::ascii($haystack);
  485. }
  486. return false;
  487. }
  488. /**
  489. * Finds the last occurrence of a character in a string within another, case insensitive.
  490. *
  491. * @param string $haystack The string from which to get the last occurrence of $needle.
  492. * @param string $needle The string to find in $haystack.
  493. * @param boolean $part Determines which portion of $haystack this function returns.
  494. * If set to true, it returns all of $haystack from the beginning to the last occurrence of $needle.
  495. * If set to false, it returns all of $haystack from the last occurrence of $needle to the end,
  496. * Default value is false.
  497. * @return string|boolean The portion of $haystack. or false if $needle is not found.
  498. */
  499. public static function strrichr($haystack, $needle, $part = false) {
  500. $check = Multibyte::strtoupper($haystack);
  501. $check = Multibyte::utf8($check);
  502. $found = false;
  503. $haystack = Multibyte::utf8($haystack);
  504. $haystackCount = count($haystack);
  505. $matches = array_count_values($check);
  506. $needle = Multibyte::strtoupper($needle);
  507. $needle = Multibyte::utf8($needle);
  508. $needleCount = count($needle);
  509. $parts = array();
  510. $position = 0;
  511. while (($found === false) && ($position < $haystackCount)) {
  512. if (isset($needle[0]) && $needle[0] === $check[$position]) {
  513. for ($i = 1; $i < $needleCount; $i++) {
  514. if ($needle[$i] !== $check[$position + $i]) {
  515. if ($needle[$i] === $check[($position + $i) -1]) {
  516. $found = true;
  517. }
  518. unset($parts[$position - 1]);
  519. $haystack = array_merge(array($haystack[$position]), $haystack);
  520. break;
  521. }
  522. }
  523. if (isset($matches[$needle[0]]) && $matches[$needle[0]] > 1) {
  524. $matches[$needle[0]] = $matches[$needle[0]] - 1;
  525. } elseif ($i === $needleCount) {
  526. $found = true;
  527. }
  528. }
  529. if (!$found && isset($haystack[$position])) {
  530. $parts[] = $haystack[$position];
  531. unset($haystack[$position]);
  532. }
  533. $position++;
  534. }
  535. if ($found && $part && !empty($parts)) {
  536. return Multibyte::ascii($parts);
  537. } elseif ($found && !empty($haystack)) {
  538. return Multibyte::ascii($haystack);
  539. }
  540. return false;
  541. }
  542. /**
  543. * Finds position of last occurrence of a string within another, case insensitive
  544. *
  545. * @param string $haystack The string from which to get the position of the last occurrence of $needle.
  546. * @param string $needle The string to find in $haystack.
  547. * @param integer $offset The position in $haystack to start searching.
  548. * @return integer|boolean The numeric position of the last occurrence of $needle in the $haystack string,
  549. * or false if $needle is not found.
  550. */
  551. public static function strripos($haystack, $needle, $offset = 0) {
  552. if (Multibyte::checkMultibyte($haystack)) {
  553. $found = false;
  554. $haystack = Multibyte::strtoupper($haystack);
  555. $haystack = Multibyte::utf8($haystack);
  556. $haystackCount = count($haystack);
  557. $matches = array_count_values($haystack);
  558. $needle = Multibyte::strtoupper($needle);
  559. $needle = Multibyte::utf8($needle);
  560. $needleCount = count($needle);
  561. $position = $offset;
  562. while (($found === false) && ($position < $haystackCount)) {
  563. if (isset($needle[0]) && $needle[0] === $haystack[$position]) {
  564. for ($i = 1; $i < $needleCount; $i++) {
  565. if ($needle[$i] !== $haystack[$position + $i]) {
  566. if ($needle[$i] === $haystack[($position + $i) -1]) {
  567. $position--;
  568. $found = true;
  569. continue;
  570. }
  571. }
  572. }
  573. if (!$offset && isset($matches[$needle[0]]) && $matches[$needle[0]] > 1) {
  574. $matches[$needle[0]] = $matches[$needle[0]] - 1;
  575. } elseif ($i === $needleCount) {
  576. $found = true;
  577. $position--;
  578. }
  579. }
  580. $position++;
  581. }
  582. return ($found) ? $position : false;
  583. }
  584. return strripos($haystack, $needle, $offset);
  585. }
  586. /**
  587. * Find position of last occurrence of a string in a string.
  588. *
  589. * @param string $haystack The string being checked, for the last occurrence of $needle.
  590. * @param string $needle The string to find in $haystack.
  591. * @param integer $offset May be specified to begin searching an arbitrary number of characters into the string.
  592. * Negative values will stop searching at an arbitrary point prior to the end of the string.
  593. * @return integer|boolean The numeric position of the last occurrence of $needle in the $haystack string.
  594. * If $needle is not found, it returns false.
  595. */
  596. public static function strrpos($haystack, $needle, $offset = 0) {
  597. if (Multibyte::checkMultibyte($haystack)) {
  598. $found = false;
  599. $haystack = Multibyte::utf8($haystack);
  600. $haystackCount = count($haystack);
  601. $matches = array_count_values($haystack);
  602. $needle = Multibyte::utf8($needle);
  603. $needleCount = count($needle);
  604. $position = $offset;
  605. while (($found === false) && ($position < $haystackCount)) {
  606. if (isset($needle[0]) && $needle[0] === $haystack[$position]) {
  607. for ($i = 1; $i < $needleCount; $i++) {
  608. if ($needle[$i] !== $haystack[$position + $i]) {
  609. if ($needle[$i] === $haystack[($position + $i) -1]) {
  610. $position--;
  611. $found = true;
  612. continue;
  613. }
  614. }
  615. }
  616. if (!$offset && isset($matches[$needle[0]]) && $matches[$needle[0]] > 1) {
  617. $matches[$needle[0]] = $matches[$needle[0]] - 1;
  618. } elseif ($i === $needleCount) {
  619. $found = true;
  620. $position--;
  621. }
  622. }
  623. $position++;
  624. }
  625. return ($found) ? $position : false;
  626. }
  627. return strrpos($haystack, $needle, $offset);
  628. }
  629. /**
  630. * Finds first occurrence of a string within another
  631. *
  632. * @param string $haystack The string from which to get the first occurrence of $needle.
  633. * @param string $needle The string to find in $haystack
  634. * @param boolean $part Determines which portion of $haystack this function returns.
  635. * If set to true, it returns all of $haystack from the beginning to the first occurrence of $needle.
  636. * If set to false, it returns all of $haystack from the first occurrence of $needle to the end,
  637. * Default value is FALSE.
  638. * @return string|boolean The portion of $haystack, or true if $needle is not found.
  639. */
  640. public static function strstr($haystack, $needle, $part = false) {
  641. $php = (PHP_VERSION < 5.3);
  642. if (($php && $part) || Multibyte::checkMultibyte($haystack)) {
  643. $check = Multibyte::utf8($haystack);
  644. $found = false;
  645. $haystack = Multibyte::utf8($haystack);
  646. $haystackCount = count($haystack);
  647. $needle = Multibyte::utf8($needle);
  648. $needleCount = count($needle);
  649. $parts = array();
  650. $position = 0;
  651. while (($found === false) && ($position < $haystackCount)) {
  652. if (isset($needle[0]) && $needle[0] === $check[$position]) {
  653. for ($i = 1; $i < $needleCount; $i++) {
  654. if ($needle[$i] !== $check[$position + $i]) {
  655. break;
  656. }
  657. }
  658. if ($i === $needleCount) {
  659. $found = true;
  660. }
  661. }
  662. if (!$found) {
  663. $parts[] = $haystack[$position];
  664. unset($haystack[$position]);
  665. }
  666. $position++;
  667. }
  668. if ($found && $part && !empty($parts)) {
  669. return Multibyte::ascii($parts);
  670. } elseif ($found && !empty($haystack)) {
  671. return Multibyte::ascii($haystack);
  672. }
  673. return false;
  674. }
  675. if (!$php) {
  676. return strstr($haystack, $needle, $part);
  677. }
  678. return strstr($haystack, $needle);
  679. }
  680. /**
  681. * Make a string lowercase
  682. *
  683. * @param string $string The string being lowercased.
  684. * @return string with all alphabetic characters converted to lowercase.
  685. */
  686. public static function strtolower($string) {
  687. $utf8Map = Multibyte::utf8($string);
  688. $length = count($utf8Map);
  689. $lowerCase = array();
  690. for ($i = 0 ; $i < $length; $i++) {
  691. $char = $utf8Map[$i];
  692. if ($char < 128) {
  693. $str = strtolower(chr($char));
  694. $strlen = strlen($str);
  695. for ($ii = 0 ; $ii < $strlen; $ii++) {
  696. $lower = ord(substr($str, $ii, 1));
  697. }
  698. $lowerCase[] = $lower;
  699. $matched = true;
  700. } else {
  701. $matched = false;
  702. $keys = self::_find($char, 'upper');
  703. if (!empty($keys)) {
  704. foreach ($keys as $key => $value) {
  705. if ($keys[$key]['upper'] == $char && count($keys[$key]['lower'][0]) === 1) {
  706. $lowerCase[] = $keys[$key]['lower'][0];
  707. $matched = true;
  708. break 1;
  709. }
  710. }
  711. }
  712. }
  713. if ($matched === false) {
  714. $lowerCase[] = $char;
  715. }
  716. }
  717. return Multibyte::ascii($lowerCase);
  718. }
  719. /**
  720. * Make a string uppercase
  721. *
  722. * @param string $string The string being uppercased.
  723. * @return string with all alphabetic characters converted to uppercase.
  724. */
  725. public static function strtoupper($string) {
  726. $utf8Map = Multibyte::utf8($string);
  727. $length = count($utf8Map);
  728. $replaced = array();
  729. $upperCase = array();
  730. for ($i = 0 ; $i < $length; $i++) {
  731. $char = $utf8Map[$i];
  732. if ($char < 128) {
  733. $str = strtoupper(chr($char));
  734. $strlen = strlen($str);
  735. for ($ii = 0 ; $ii < $strlen; $ii++) {
  736. $upper = ord(substr($str, $ii, 1));
  737. }
  738. $upperCase[] = $upper;
  739. $matched = true;
  740. } else {
  741. $matched = false;
  742. $keys = self::_find($char);
  743. $keyCount = count($keys);
  744. if (!empty($keys)) {
  745. foreach ($keys as $key => $value) {
  746. $matched = false;
  747. $replace = 0;
  748. if ($length > 1 && count($keys[$key]['lower']) > 1) {
  749. $j = 0;
  750. for ($ii = 0, $count = count($keys[$key]['lower']); $ii < $count; $ii++) {
  751. $nextChar = $utf8Map[$i + $ii];
  752. if (isset($nextChar) && ($nextChar == $keys[$key]['lower'][$j + $ii])) {
  753. $replace++;
  754. }
  755. }
  756. if ($replace == $count) {
  757. $upperCase[] = $keys[$key]['upper'];
  758. $replaced = array_merge($replaced, array_values($keys[$key]['lower']));
  759. $matched = true;
  760. break 1;
  761. }
  762. } elseif ($length > 1 && $keyCount > 1) {
  763. $j = 0;
  764. for ($ii = 1; $ii < $keyCount; $ii++) {
  765. $nextChar = $utf8Map[$i + $ii - 1];
  766. if (in_array($nextChar, $keys[$ii]['lower'])) {
  767. for ($jj = 0, $count = count($keys[$ii]['lower']); $jj < $count; $jj++) {
  768. $nextChar = $utf8Map[$i + $jj];
  769. if (isset($nextChar) && ($nextChar == $keys[$ii]['lower'][$j + $jj])) {
  770. $replace++;
  771. }
  772. }
  773. if ($replace == $count) {
  774. $upperCase[] = $keys[$ii]['upper'];
  775. $replaced = array_merge($replaced, array_values($keys[$ii]['lower']));
  776. $matched = true;
  777. break 2;
  778. }
  779. }
  780. }
  781. }
  782. if ($keys[$key]['lower'][0] == $char) {
  783. $upperCase[] = $keys[$key]['upper'];
  784. $matched = true;
  785. break 1;
  786. }
  787. }
  788. }
  789. }
  790. if ($matched === false && !in_array($char, $replaced, true)) {
  791. $upperCase[] = $char;
  792. }
  793. }
  794. return Multibyte::ascii($upperCase);
  795. }
  796. /**
  797. * Count the number of substring occurrences
  798. *
  799. * @param string $haystack The string being checked.
  800. * @param string $needle The string being found.
  801. * @return integer The number of times the $needle substring occurs in the $haystack string.
  802. */
  803. public static function substrCount($haystack, $needle) {
  804. $count = 0;
  805. $haystack = Multibyte::utf8($haystack);
  806. $haystackCount = count($haystack);
  807. $matches = array_count_values($haystack);
  808. $needle = Multibyte::utf8($needle);
  809. $needleCount = count($needle);
  810. if ($needleCount === 1 && isset($matches[$needle[0]])) {
  811. return $matches[$needle[0]];
  812. }
  813. for ($i = 0; $i < $haystackCount; $i++) {
  814. if (isset($needle[0]) && $needle[0] === $haystack[$i]) {
  815. for ($ii = 1; $ii < $needleCount; $ii++) {
  816. if ($needle[$ii] === $haystack[$i + 1]) {
  817. if ((isset($needle[$ii + 1]) && $haystack[$i + 2]) && $needle[$ii + 1] !== $haystack[$i + 2]) {
  818. $count--;
  819. } else {
  820. $count++;
  821. }
  822. }
  823. }
  824. }
  825. }
  826. return $count;
  827. }
  828. /**
  829. * Get part of string
  830. *
  831. * @param string $string The string being checked.
  832. * @param integer $start The first position used in $string.
  833. * @param integer $length The maximum length of the returned string.
  834. * @return string The portion of $string specified by the $string and $length parameters.
  835. */
  836. public static function substr($string, $start, $length = null) {
  837. if ($start === 0 && $length === null) {
  838. return $string;
  839. }
  840. $string = Multibyte::utf8($string);
  841. for ($i = 1; $i <= $start; $i++) {
  842. unset($string[$i - 1]);
  843. }
  844. if ($length === null || count($string) < $length) {
  845. return Multibyte::ascii($string);
  846. }
  847. $string = array_values($string);
  848. $value = array();
  849. for ($i = 0; $i < $length; $i++) {
  850. $value[] = $string[$i];
  851. }
  852. return Multibyte::ascii($value);
  853. }
  854. /**
  855. * Prepare a string for mail transport, using the provided encoding
  856. *
  857. * @param string $string value to encode
  858. * @param string $charset charset to use for encoding. defaults to UTF-8
  859. * @param string $newline
  860. * @return string
  861. * @TODO: add support for 'Q'('Quoted Printable') encoding
  862. */
  863. public static function mimeEncode($string, $charset = null, $newline = "\r\n") {
  864. if (!Multibyte::checkMultibyte($string) && strlen($string) < 75) {
  865. return $string;
  866. }
  867. if (empty($charset)) {
  868. $charset = Configure::read('App.encoding');
  869. }
  870. $charset = strtoupper($charset);
  871. $start = '=?' . $charset . '?B?';
  872. $end = '?=';
  873. $spacer = $end . $newline . ' ' . $start;
  874. $length = 75 - strlen($start) - strlen($end);
  875. $length = $length - ($length % 4);
  876. if ($charset == 'UTF-8') {
  877. $parts = array();
  878. $maxchars = floor(($length * 3) / 4);
  879. while (strlen($string) > $maxchars) {
  880. $i = (int)$maxchars;
  881. $test = ord($string[$i]);
  882. while ($test >= 128 && $test <= 191) {
  883. $i--;
  884. $test = ord($string[$i]);
  885. }
  886. $parts[] = base64_encode(substr($string, 0, $i));
  887. $string = substr($string, $i);
  888. }
  889. $parts[] = base64_encode($string);
  890. $string = implode($spacer, $parts);
  891. } else {
  892. $string = chunk_split(base64_encode($string), $length, $spacer);
  893. $string = preg_replace('/' . preg_quote($spacer) . '$/', '', $string);
  894. }
  895. return $start . $string . $end;
  896. }
  897. /**
  898. * Return the Code points range for Unicode characters
  899. *
  900. * @param integer $decimal
  901. * @return string
  902. */
  903. protected static function _codepoint($decimal) {
  904. if ($decimal > 128 && $decimal < 256) {
  905. $return = '0080_00ff'; // Latin-1 Supplement
  906. } elseif ($decimal < 384) {
  907. $return = '0100_017f'; // Latin Extended-A
  908. } elseif ($decimal < 592) {
  909. $return = '0180_024F'; // Latin Extended-B
  910. } elseif ($decimal < 688) {
  911. $return = '0250_02af'; // IPA Extensions
  912. } elseif ($decimal >= 880 && $decimal < 1024) {
  913. $return = '0370_03ff'; // Greek and Coptic
  914. } elseif ($decimal < 1280) {
  915. $return = '0400_04ff'; // Cyrillic
  916. } elseif ($decimal < 1328) {
  917. $return = '0500_052f'; // Cyrillic Supplement
  918. } elseif ($decimal < 1424) {
  919. $return = '0530_058f'; // Armenian
  920. } elseif ($decimal >= 7680 && $decimal < 7936) {
  921. $return = '1e00_1eff'; // Latin Extended Additional
  922. } elseif ($decimal < 8192) {
  923. $return = '1f00_1fff'; // Greek Extended
  924. } elseif ($decimal >= 8448 && $decimal < 8528) {
  925. $return = '2100_214f'; // Letterlike Symbols
  926. } elseif ($decimal < 8592) {
  927. $return = '2150_218f'; // Number Forms
  928. } elseif ($decimal >= 9312 && $decimal < 9472) {
  929. $return = '2460_24ff'; // Enclosed Alphanumerics
  930. } elseif ($decimal >= 11264 && $decimal < 11360) {
  931. $return = '2c00_2c5f'; // Glagolitic
  932. } elseif ($decimal < 11392) {
  933. $return = '2c60_2c7f'; // Latin Extended-C
  934. } elseif ($decimal < 11520) {
  935. $return = '2c80_2cff'; // Coptic
  936. } elseif ($decimal >= 65280 && $decimal < 65520) {
  937. $return = 'ff00_ffef'; // Halfwidth and Fullwidth Forms
  938. } else {
  939. $return = false;
  940. }
  941. self::$_codeRange[$decimal] = $return;
  942. return $return;
  943. }
  944. /**
  945. * Find the related code folding values for $char
  946. *
  947. * @param integer $char decimal value of character
  948. * @param string $type
  949. * @return array
  950. */
  951. protected static function _find($char, $type = 'lower') {
  952. $found = array();
  953. if (!isset(self::$_codeRange[$char])) {
  954. $range = self::_codepoint($char);
  955. if ($range === false) {
  956. return null;
  957. }
  958. if (!Configure::configured('_cake_core_')) {
  959. App::uses('PhpReader', 'Configure');
  960. Configure::config('_cake_core_', new PhpReader(CAKE . 'Config' . DS));
  961. }
  962. Configure::load('unicode' . DS . 'casefolding' . DS . $range, '_cake_core_');
  963. self::$_caseFold[$range] = Configure::read($range);
  964. Configure::delete($range);
  965. }
  966. if (!self::$_codeRange[$char]) {
  967. return null;
  968. }
  969. self::$_table = self::$_codeRange[$char];
  970. $count = count(self::$_caseFold[self::$_table]);
  971. for ($i = 0; $i < $count; $i++) {
  972. if ($type === 'lower' && self::$_caseFold[self::$_table][$i][$type][0] === $char) {
  973. $found[] = self::$_caseFold[self::$_table][$i];
  974. } elseif ($type === 'upper' && self::$_caseFold[self::$_table][$i][$type] === $char) {
  975. $found[] = self::$_caseFold[self::$_table][$i];
  976. }
  977. }
  978. return $found;
  979. }
  980. /**
  981. * Check the $string for multibyte characters
  982. * @param string $string value to test
  983. * @return boolean
  984. */
  985. public static function checkMultibyte($string) {
  986. $length = strlen($string);
  987. for ($i = 0; $i < $length; $i++ ) {
  988. $value = ord(($string[$i]));
  989. if ($value > 128) {
  990. return true;
  991. }
  992. }
  993. return false;
  994. }
  995. }