PageRenderTime 54ms CodeModel.GetById 20ms RepoModel.GetById 0ms app.codeStats 0ms

/atlas/cake/libs/multibyte.php

http://atlascakephp.googlecode.com/
PHP | 1126 lines | 674 code | 76 blank | 376 comment | 205 complexity | 486853d9fa9dfef0a2d675eaf1156433 MD5 | raw file
Possible License(s): LGPL-2.1
  1. <?php
  2. /* SVN FILE: $Id$ */
  3. /**
  4. * Multibyte handling methods.
  5. *
  6. *
  7. * PHP versions 4 and 5
  8. *
  9. * CakePHP(tm) : Rapid Development Framework (http://www.cakephp.org)
  10. * Copyright 2005-2008, Cake Software Foundation, Inc. (http://www.cakefoundation.org)
  11. *
  12. * Licensed under The MIT License
  13. * Redistributions of files must retain the above copyright notice.
  14. *
  15. * @filesource
  16. * @copyright Copyright 2005-2008, Cake Software Foundation, Inc. (http://www.cakefoundation.org)
  17. * @link http://www.cakefoundation.org/projects/info/cakephp CakePHP(tm) Project
  18. * @package cake
  19. * @subpackage cake.cake.libs
  20. * @since CakePHP(tm) v 1.2.0.6833
  21. * @version $Revision$
  22. * @modifiedby $LastChangedBy$
  23. * @lastmodified $Date$
  24. * @license http://www.opensource.org/licenses/mit-license.php The MIT License
  25. */
  26. if (function_exists('mb_internal_encoding')) {
  27. $encoding = Configure::read('App.encoding');
  28. if (!empty($encoding)) {
  29. mb_internal_encoding($encoding);
  30. }
  31. }
  32. /**
  33. * Find position of first occurrence of a case-insensitive string.
  34. *
  35. * @param string $haystack The string from which to get the position of the first occurrence of $needle.
  36. * @param string $needle The string to find in $haystack.
  37. * @param integer $offset The position in $haystack to start searching.
  38. * @param string $encoding Character encoding name to use. If it is omitted, internal character encoding is used.
  39. * @return integer|boolean The numeric position of the first occurrence of $needle in the $haystack string, or false if $needle is not found.
  40. */
  41. if (!function_exists('mb_stripos')) {
  42. function mb_stripos($haystack, $needle, $offset = 0, $encoding = null) {
  43. return Multibyte::stripos($haystack, $needle, $offset);
  44. }
  45. }
  46. /**
  47. * Finds first occurrence of a string within another, case insensitive.
  48. *
  49. * @param string $haystack The string from which to get the first occurrence of $needle.
  50. * @param string $needle The string to find in $haystack.
  51. * @param boolean $part Determines which portion of $haystack this function returns.
  52. * If set to true, it returns all of $haystack from the beginning to the first occurrence of $needle.
  53. * If set to false, it returns all of $haystack from the first occurrence of $needle to the end, Default value is false.
  54. * @param string $encoding Character encoding name to use. If it is omitted, internal character encoding is used.
  55. * @return string|boolean The portion of $haystack, or false if $needle is not found.
  56. */
  57. if (!function_exists('mb_stristr')) {
  58. function mb_stristr($haystack, $needle, $part = false, $encoding = null) {
  59. return Multibyte::stristr($haystack, $needle, $part);
  60. }
  61. }
  62. /**
  63. * Get string length.
  64. *
  65. * @param string $string The string being checked for length.
  66. * @param string $encoding Character encoding name to use. If it is omitted, internal character encoding is used.
  67. * @return integer The number of characters in string $string having character encoding encoding.
  68. * A multi-byte character is counted as 1.
  69. */
  70. if (!function_exists('mb_strlen')) {
  71. function mb_strlen($string, $encoding = null) {
  72. return Multibyte::strlen($string);
  73. }
  74. }
  75. /**
  76. * Find position of first occurrence of a string.
  77. *
  78. * @param string $haystack The string being checked.
  79. * @param string $needle The position counted from the beginning of haystack.
  80. * @param integer $offset The search offset. If it is not specified, 0 is used.
  81. * @param string $encoding Character encoding name to use. If it is omitted, internal character encoding is used.
  82. * @return integer|boolean The numeric position of the first occurrence of $needle in the $haystack string.
  83. * If $needle is not found, it returns false.
  84. */
  85. if (!function_exists('mb_strpos')) {
  86. function mb_strpos($haystack, $needle, $offset = 0, $encoding = null) {
  87. return Multibyte::strpos($haystack, $needle, $offset);
  88. }
  89. }
  90. /**
  91. * Finds the last occurrence of a character in a string within another.
  92. *
  93. * @param string $haystack The string from which to get the last occurrence of $needle.
  94. * @param string $needle The string to find in $haystack.
  95. * @param boolean $part Determines which portion of $haystack this function returns.
  96. * If set to true, it returns all of $haystack from the beginning to the last occurrence of $needle.
  97. * If set to false, it returns all of $haystack from the last occurrence of $needle to the end, Default value is false.
  98. * @param string $encoding Character encoding name to use. If it is omitted, internal character encoding is used.
  99. * @return string|boolean The portion of $haystack. or false if $needle is not found.
  100. */
  101. if (!function_exists('mb_strrchr')) {
  102. function mb_strrchr($haystack, $needle, $part = false, $encoding = null) {
  103. return Multibyte::strrchr($haystack, $needle, $part);
  104. }
  105. }
  106. /**
  107. * Finds the last occurrence of a character in a string within another, case insensitive.
  108. *
  109. * @param string $haystack The string from which to get the last occurrence of $needle.
  110. * @param string $needle The string to find in $haystack.
  111. * @param boolean $part Determines which portion of $haystack this function returns.
  112. * If set to true, it returns all of $haystack from the beginning to the last occurrence of $needle.
  113. * If set to false, it returns all of $haystack from the last occurrence of $needle to the end, Default value is false.
  114. * @param string $encoding Character encoding name to use. If it is omitted, internal character encoding is used.
  115. * @return string|boolean The portion of $haystack. or false if $needle is not found.
  116. */
  117. if (!function_exists('mb_strrichr')) {
  118. function mb_strrichr($haystack, $needle, $part = false, $encoding = null) {
  119. return Multibyte::strrichr($haystack, $needle, $part);
  120. }
  121. }
  122. /**
  123. * Finds position of last occurrence of a string within another, case insensitive
  124. *
  125. * @param string $haystack The string from which to get the position of the last occurrence of $needle.
  126. * @param string $needle The string to find in $haystack.
  127. * @param integer $offset The position in $haystack to start searching.
  128. * @param string $encoding Character encoding name to use. If it is omitted, internal character encoding is used.
  129. * @return integer|boolean The numeric position of the last occurrence of $needle in the $haystack string, or false if $needle is not found.
  130. */
  131. if (!function_exists('mb_strripos')) {
  132. function mb_strripos($haystack, $needle, $offset = 0, $encoding = null) {
  133. return Multibyte::strripos($haystack, $needle, $offset);
  134. }
  135. }
  136. /**
  137. * Find position of last occurrence of a string in a string.
  138. *
  139. * @param string $haystack The string being checked, for the last occurrence of $needle.
  140. * @param string $needle The string to find in $haystack.
  141. * @param integer $offset May be specified to begin searching an arbitrary number of characters into the string.
  142. * Negative values will stop searching at an arbitrary point prior to the end of the string.
  143. * @param string $encoding Character encoding name to use. If it is omitted, internal character encoding is used.
  144. * @return integer|boolean The numeric position of the last occurrence of $needle in the $haystack string. If $needle is not found, it returns false.
  145. */
  146. if (!function_exists('mb_strrpos')) {
  147. function mb_strrpos($haystack, $needle, $offset = 0, $encoding = null) {
  148. return Multibyte::strrpos($haystack, $needle, $offset);
  149. }
  150. }
  151. /**
  152. * Finds first occurrence of a string within another
  153. *
  154. * @param string $haystack The string from which to get the first occurrence of $needle.
  155. * @param string $needle The string to find in $haystack
  156. * @param boolean $part Determines which portion of $haystack this function returns.
  157. * If set to true, it returns all of $haystack from the beginning to the first occurrence of $needle.
  158. * If set to false, it returns all of $haystack from the first occurrence of $needle to the end, Default value is FALSE.
  159. * @param string $encoding Character encoding name to use. If it is omitted, internal character encoding is used.
  160. * @return string|boolean The portion of $haystack, or true if $needle is not found.
  161. */
  162. if (!function_exists('mb_strstr')) {
  163. function mb_strstr($haystack, $needle, $part = false, $encoding = null) {
  164. return Multibyte::strstr($haystack, $needle, $part);
  165. }
  166. }
  167. /**
  168. * Make a string lowercase
  169. *
  170. * @param string $string The string being lowercased.
  171. * @param string $encoding Character encoding name to use. If it is omitted, internal character encoding is used.
  172. * @return string with all alphabetic characters converted to lowercase.
  173. */
  174. if (!function_exists('mb_strtolower')) {
  175. function mb_strtolower($string, $encoding = null) {
  176. return Multibyte::strtolower($string);
  177. }
  178. }
  179. /**
  180. * Make a string uppercase
  181. *
  182. * @param string $string The string being uppercased.
  183. * @param string $encoding Character encoding name to use. If it is omitted, internal character encoding is used.
  184. * @return string with all alphabetic characters converted to uppercase.
  185. */
  186. if (!function_exists('mb_strtoupper')) {
  187. function mb_strtoupper($string, $encoding = null) {
  188. return Multibyte::strtoupper($string);
  189. }
  190. }
  191. /**
  192. * Count the number of substring occurrences
  193. *
  194. * @param string $haystack The string being checked.
  195. * @param string $needle The string being found.
  196. * @param string $encoding Character encoding name to use. If it is omitted, internal character encoding is used.
  197. * @return integer The number of times the $needle substring occurs in the $haystack string.
  198. */
  199. if (!function_exists('mb_substr_count')) {
  200. function mb_substr_count($haystack, $needle, $encoding = null) {
  201. return Multibyte::substrCount($haystack, $needle);
  202. }
  203. }
  204. /**
  205. * Get part of string
  206. *
  207. * @param string $string The string being checked.
  208. * @param integer $start The first position used in $string.
  209. * @param integer $length The maximum length of the returned string.
  210. * @param string $encoding Character encoding name to use. If it is omitted, internal character encoding is used.
  211. * @return string The portion of $string specified by the $string and $length parameters.
  212. */
  213. if (!function_exists('mb_substr')) {
  214. function mb_substr($string, $start, $length = null, $encoding = null) {
  215. return Multibyte::substr($string, $start, $length);
  216. }
  217. }
  218. /**
  219. * Encode string for MIME header
  220. *
  221. * @param string $str The string being encoded
  222. * @param string $charset specifies the name of the character set in which str is represented in.
  223. * The default value is determined by the current NLS setting (mbstring.language).
  224. * @param string $transfer_encoding specifies the scheme of MIME encoding. It should be either "B" (Base64) or "Q" (Quoted-Printable).
  225. * Falls back to "B" if not given.
  226. * @param string $linefeed specifies the EOL (end-of-line) marker with which mb_encode_mimeheader() performs line-folding
  227. * (a Â? RFC term, the act of breaking a line longer than a certain length into multiple lines.
  228. * The length is currently hard-coded to 74 characters). Falls back to "\r\n" (CRLF) if not given.
  229. * @param integer $indent [definition unknown and appears to have no affect]
  230. * @return string A converted version of the string represented in ASCII.
  231. */
  232. if (!function_exists('mb_encode_mimeheader')) {
  233. function mb_encode_mimeheader($str, $charset = 'UTF-8', $transfer_encoding = 'B', $linefeed = "\r\n", $indent = 1) {
  234. return Multibyte::mimeEncode($str, $charset, $linefeed);
  235. }
  236. }
  237. /**
  238. * Multibyte handling methods.
  239. *
  240. *
  241. * @package cake
  242. * @subpackage cake.cake.libs
  243. */
  244. class Multibyte extends Object {
  245. /**
  246. * Holds the case folding values
  247. *
  248. * @var array
  249. * @access private
  250. */
  251. var $__caseFold = array();
  252. /**
  253. * Holds an array of Unicode code point ranges
  254. *
  255. * @var array
  256. * @access private
  257. */
  258. var $__codeRange = array();
  259. /**
  260. * Holds the current code point range
  261. *
  262. * @var string
  263. * @access private
  264. */
  265. var $__table = null;
  266. /**
  267. * Gets a reference to the Multibyte object instance
  268. *
  269. * @return object Multibyte instance
  270. * @access public
  271. * @static
  272. */
  273. function &getInstance() {
  274. static $instance = array();
  275. if (!$instance) {
  276. $instance[0] =& new Multibyte();
  277. }
  278. return $instance[0];
  279. }
  280. /**
  281. * Converts a multibyte character string
  282. * to the decimal value of the character
  283. *
  284. * @param multibyte string $string
  285. * @return array
  286. * @access public
  287. * @static
  288. */
  289. function utf8($string) {
  290. $map = array();
  291. $values = array();
  292. $find = 1;
  293. $length = strlen($string);
  294. for ($i = 0; $i < $length; $i++) {
  295. $value = ord($string[$i]);
  296. if ($value < 128) {
  297. $map[] = $value;
  298. } else {
  299. if (count($values) == 0) {
  300. $find = ($value < 224) ? 2 : 3;
  301. }
  302. $values[] = $value;
  303. if (count($values) === $find) {
  304. if ($find == 3) {
  305. $map[] = (($values[0] % 16) * 4096) + (($values[1] % 64) * 64) + ($values[2] % 64);
  306. } else {
  307. $map[] = (($values[0] % 32) * 64) + ($values[1] % 64);
  308. }
  309. $values = array();
  310. $find = 1;
  311. }
  312. }
  313. }
  314. return $map;
  315. }
  316. /**
  317. * Converts the decimal value of a multibyte character string
  318. * to a string
  319. *
  320. * @param array $array
  321. * @return string
  322. * @access public
  323. * @static
  324. */
  325. function ascii($array) {
  326. $ascii = '';
  327. foreach ($array as $utf8) {
  328. if ($utf8 < 128) {
  329. $ascii .= chr($utf8);
  330. } elseif ($utf8 < 2048) {
  331. $ascii .= chr(192 + (($utf8 - ($utf8 % 64)) / 64));
  332. $ascii .= chr(128 + ($utf8 % 64));
  333. } else {
  334. $ascii .= chr(224 + (($utf8 - ($utf8 % 4096)) / 4096));
  335. $ascii .= chr(128 + ((($utf8 % 4096) - ($utf8 % 64)) / 64));
  336. $ascii .= chr(128 + ($utf8 % 64));
  337. }
  338. }
  339. return $ascii;
  340. }
  341. /**
  342. * Find position of first occurrence of a case-insensitive string.
  343. *
  344. * @param multi-byte string $haystack The string from which to get the position of the first occurrence of $needle.
  345. * @param multi-byte string $needle The string to find in $haystack.
  346. * @param integer $offset The position in $haystack to start searching.
  347. * @return integer|boolean The numeric position of the first occurrence of $needle in the $haystack string, or false if $needle is not found.
  348. * @access public
  349. * @static
  350. */
  351. function stripos($haystack, $needle, $offset = 0) {
  352. if (!PHP5 || Multibyte::checkMultibyte($haystack)) {
  353. $haystack = Multibyte::strtoupper($haystack);
  354. $needle = Multibyte::strtoupper($needle);
  355. return Multibyte::strpos($haystack, $needle, $offset);
  356. }
  357. return stripos($haystack, $needle, $offset);
  358. }
  359. /**
  360. * Finds first occurrence of a string within another, case insensitive.
  361. *
  362. * @param string $haystack The string from which to get the first occurrence of $needle.
  363. * @param string $needle The string to find in $haystack.
  364. * @param boolean $part Determines which portion of $haystack this function returns.
  365. * If set to true, it returns all of $haystack from the beginning to the first occurrence of $needle.
  366. * If set to false, it returns all of $haystack from the first occurrence of $needle to the end, Default value is false.
  367. * @return int|boolean The portion of $haystack, or false if $needle is not found.
  368. * @access public
  369. * @static
  370. */
  371. function stristr($haystack, $needle, $part = false) {
  372. $php = (PHP_VERSION < 5.3);
  373. if (($php && $part) || Multibyte::checkMultibyte($haystack)) {
  374. $check = Multibyte::strtoupper($haystack);
  375. $check = Multibyte::utf8($check);
  376. $found = false;
  377. $haystack = Multibyte::utf8($haystack);
  378. $haystackCount = count($haystack);
  379. $needle = Multibyte::strtoupper($needle);
  380. $needle = Multibyte::utf8($needle);
  381. $needleCount = count($needle);
  382. $parts = array();
  383. $position = 0;
  384. while (($found === false) && ($position < $haystackCount)) {
  385. if (isset($needle[0]) && $needle[0] === $check[$position]) {
  386. for ($i = 1; $i < $needleCount; $i++) {
  387. if ($needle[$i] !== $check[$position + $i]) {
  388. break;
  389. }
  390. }
  391. if ($i === $needleCount) {
  392. $found = true;
  393. }
  394. }
  395. if (!$found) {
  396. $parts[] = $haystack[$position];
  397. unset($haystack[$position]);
  398. }
  399. $position++;
  400. }
  401. if ($found && $part && !empty($parts)) {
  402. return Multibyte::ascii($parts);
  403. } elseif ($found && !empty($haystack)) {
  404. return Multibyte::ascii($haystack);
  405. }
  406. return false;
  407. }
  408. if (!$php) {
  409. return stristr($haystack, $needle, $part);
  410. }
  411. return stristr($haystack, $needle);
  412. }
  413. /**
  414. * Get string length.
  415. *
  416. * @param string $string The string being checked for length.
  417. * @return integer The number of characters in string $string
  418. * @access public
  419. * @static
  420. */
  421. function strlen($string) {
  422. if (Multibyte::checkMultibyte($string)) {
  423. $string = Multibyte::utf8($string);
  424. return count($string);
  425. }
  426. return strlen($string);
  427. }
  428. /**
  429. * Find position of first occurrence of a string.
  430. *
  431. * @param string $haystack The string being checked.
  432. * @param string $needle The position counted from the beginning of haystack.
  433. * @param integer $offset The search offset. If it is not specified, 0 is used.
  434. * @return integer|boolean The numeric position of the first occurrence of $needle in the $haystack string.
  435. * If $needle is not found, it returns false.
  436. * @access public
  437. * @static
  438. */
  439. function strpos($haystack, $needle, $offset = 0) {
  440. if (Multibyte::checkMultibyte($haystack)) {
  441. $found = false;
  442. $haystack = Multibyte::utf8($haystack);
  443. $haystackCount = count($haystack);
  444. $needle = Multibyte::utf8($needle);
  445. $needleCount = count($needle);
  446. $position = $offset;
  447. while (($found === false) && ($position < $haystackCount)) {
  448. if (isset($needle[0]) && $needle[0] === $haystack[$position]) {
  449. for ($i = 1; $i < $needleCount; $i++) {
  450. if ($needle[$i] !== $haystack[$position + $i]) {
  451. break;
  452. }
  453. }
  454. if ($i === $needleCount) {
  455. $found = true;
  456. $position--;
  457. }
  458. }
  459. $position++;
  460. }
  461. if ($found) {
  462. return $position;
  463. }
  464. return false;
  465. }
  466. return strpos($haystack, $needle, $offset);
  467. }
  468. /**
  469. * Finds the last occurrence of a character in a string within another.
  470. *
  471. * @param string $haystack The string from which to get the last occurrence of $needle.
  472. * @param string $needle The string to find in $haystack.
  473. * @param boolean $part Determines which portion of $haystack this function returns.
  474. * If set to true, it returns all of $haystack from the beginning to the last occurrence of $needle.
  475. * If set to false, it returns all of $haystack from the last occurrence of $needle to the end, Default value is false.
  476. * @return string|boolean The portion of $haystack. or false if $needle is not found.
  477. * @access public
  478. * @static
  479. */
  480. function strrchr($haystack, $needle, $part = false) {
  481. $check = Multibyte::utf8($haystack);
  482. $found = false;
  483. $haystack = Multibyte::utf8($haystack);
  484. $haystackCount = count($haystack);
  485. $matches = array_count_values($check);
  486. $needle = Multibyte::utf8($needle);
  487. $needleCount = count($needle);
  488. $parts = array();
  489. $position = 0;
  490. while (($found === false) && ($position < $haystackCount)) {
  491. if (isset($needle[0]) && $needle[0] === $check[$position]) {
  492. for ($i = 1; $i < $needleCount; $i++) {
  493. if ($needle[$i] !== $check[$position + $i]) {
  494. if ($needle[$i] === $check[($position + $i) -1]) {
  495. $found = true;
  496. }
  497. unset($parts[$position - 1]);
  498. $haystack = array_merge(array($haystack[$position]), $haystack);
  499. break;
  500. }
  501. }
  502. if (isset($matches[$needle[0]]) && $matches[$needle[0]] > 1) {
  503. $matches[$needle[0]] = $matches[$needle[0]] - 1;
  504. } elseif ($i === $needleCount) {
  505. $found = true;
  506. }
  507. }
  508. if (!$found && isset($haystack[$position])) {
  509. $parts[] = $haystack[$position];
  510. unset($haystack[$position]);
  511. }
  512. $position++;
  513. }
  514. if ($found && $part && !empty($parts)) {
  515. return Multibyte::ascii($parts);
  516. } elseif ($found && !empty($haystack)) {
  517. return Multibyte::ascii($haystack);
  518. }
  519. return false;
  520. }
  521. /**
  522. * Finds the last occurrence of a character in a string within another, case insensitive.
  523. *
  524. * @param string $haystack The string from which to get the last occurrence of $needle.
  525. * @param string $needle The string to find in $haystack.
  526. * @param boolean $part Determines which portion of $haystack this function returns.
  527. * If set to true, it returns all of $haystack from the beginning to the last occurrence of $needle.
  528. * If set to false, it returns all of $haystack from the last occurrence of $needle to the end, Default value is false.
  529. * @return string|boolean The portion of $haystack. or false if $needle is not found.
  530. * @access public
  531. * @static
  532. */
  533. function strrichr($haystack, $needle, $part = false) {
  534. $check = Multibyte::strtoupper($haystack);
  535. $check = Multibyte::utf8($check);
  536. $found = false;
  537. $haystack = Multibyte::utf8($haystack);
  538. $haystackCount = count($haystack);
  539. $matches = array_count_values($check);
  540. $needle = Multibyte::strtoupper($needle);
  541. $needle = Multibyte::utf8($needle);
  542. $needleCount = count($needle);
  543. $parts = array();
  544. $position = 0;
  545. while (($found === false) && ($position < $haystackCount)) {
  546. if (isset($needle[0]) && $needle[0] === $check[$position]) {
  547. for ($i = 1; $i < $needleCount; $i++) {
  548. if ($needle[$i] !== $check[$position + $i]) {
  549. if ($needle[$i] === $check[($position + $i) -1]) {
  550. $found = true;
  551. }
  552. unset($parts[$position - 1]);
  553. $haystack = array_merge(array($haystack[$position]), $haystack);
  554. break;
  555. }
  556. }
  557. if (isset($matches[$needle[0]]) && $matches[$needle[0]] > 1) {
  558. $matches[$needle[0]] = $matches[$needle[0]] - 1;
  559. } elseif ($i === $needleCount) {
  560. $found = true;
  561. }
  562. }
  563. if (!$found && isset($haystack[$position])) {
  564. $parts[] = $haystack[$position];
  565. unset($haystack[$position]);
  566. }
  567. $position++;
  568. }
  569. if ($found && $part && !empty($parts)) {
  570. return Multibyte::ascii($parts);
  571. } elseif ($found && !empty($haystack)) {
  572. return Multibyte::ascii($haystack);
  573. }
  574. return false;
  575. }
  576. /**
  577. * Finds position of last occurrence of a string within another, case insensitive
  578. *
  579. * @param string $haystack The string from which to get the position of the last occurrence of $needle.
  580. * @param string $needle The string to find in $haystack.
  581. * @param integer $offset The position in $haystack to start searching.
  582. * @return integer|boolean The numeric position of the last occurrence of $needle in the $haystack string, or false if $needle is not found.
  583. * @access public
  584. * @static
  585. */
  586. function strripos($haystack, $needle, $offset = 0) {
  587. if (!PHP5 || Multibyte::checkMultibyte($haystack)) {
  588. $found = false;
  589. $haystack = Multibyte::strtoupper($haystack);
  590. $haystack = Multibyte::utf8($haystack);
  591. $haystackCount = count($haystack);
  592. $matches = array_count_values($haystack);
  593. $needle = Multibyte::strtoupper($needle);
  594. $needle = Multibyte::utf8($needle);
  595. $needleCount = count($needle);
  596. $position = $offset;
  597. while (($found === false) && ($position < $haystackCount)) {
  598. if (isset($needle[0]) && $needle[0] === $haystack[$position]) {
  599. for ($i = 1; $i < $needleCount; $i++) {
  600. if ($needle[$i] !== $haystack[$position + $i]) {
  601. if ($needle[$i] === $haystack[($position + $i) -1]) {
  602. $position--;
  603. $found = true;
  604. continue;
  605. }
  606. }
  607. }
  608. if (!$offset && isset($matches[$needle[0]]) && $matches[$needle[0]] > 1) {
  609. $matches[$needle[0]] = $matches[$needle[0]] - 1;
  610. } elseif ($i === $needleCount) {
  611. $found = true;
  612. $position--;
  613. }
  614. }
  615. $position++;
  616. }
  617. return ($found) ? $position : false;
  618. }
  619. return strripos($haystack, $needle, $offset);
  620. }
  621. /**
  622. * Find position of last occurrence of a string in a string.
  623. *
  624. * @param string $haystack The string being checked, for the last occurrence of $needle.
  625. * @param string $needle The string to find in $haystack.
  626. * @param integer $offset May be specified to begin searching an arbitrary number of characters into the string.
  627. * Negative values will stop searching at an arbitrary point prior to the end of the string.
  628. * @return integer|boolean The numeric position of the last occurrence of $needle in the $haystack string. If $needle is not found, it returns false.
  629. * @access public
  630. * @static
  631. */
  632. function strrpos($haystack, $needle, $offset = 0) {
  633. if (!PHP5 || Multibyte::checkMultibyte($haystack)) {
  634. $found = false;
  635. $haystack = Multibyte::utf8($haystack);
  636. $haystackCount = count($haystack);
  637. $matches = array_count_values($haystack);
  638. $needle = Multibyte::utf8($needle);
  639. $needleCount = count($needle);
  640. $position = $offset;
  641. while (($found === false) && ($position < $haystackCount)) {
  642. if (isset($needle[0]) && $needle[0] === $haystack[$position]) {
  643. for ($i = 1; $i < $needleCount; $i++) {
  644. if ($needle[$i] !== $haystack[$position + $i]) {
  645. if ($needle[$i] === $haystack[($position + $i) -1]) {
  646. $position--;
  647. $found = true;
  648. continue;
  649. }
  650. }
  651. }
  652. if (!$offset && isset($matches[$needle[0]]) && $matches[$needle[0]] > 1) {
  653. $matches[$needle[0]] = $matches[$needle[0]] - 1;
  654. } elseif ($i === $needleCount) {
  655. $found = true;
  656. $position--;
  657. }
  658. }
  659. $position++;
  660. }
  661. return ($found) ? $position : false;
  662. }
  663. return strrpos($haystack, $needle, $offset);
  664. }
  665. /**
  666. * Finds first occurrence of a string within another
  667. *
  668. * @param string $haystack The string from which to get the first occurrence of $needle.
  669. * @param string $needle The string to find in $haystack
  670. * @param boolean $part Determines which portion of $haystack this function returns.
  671. * If set to true, it returns all of $haystack from the beginning to the first occurrence of $needle.
  672. * If set to false, it returns all of $haystack from the first occurrence of $needle to the end, Default value is FALSE.
  673. * @return string|boolean The portion of $haystack, or true if $needle is not found.
  674. * @access public
  675. * @static
  676. */
  677. function strstr($haystack, $needle, $part = false) {
  678. $php = (PHP_VERSION < 5.3);
  679. if (($php && $part) || Multibyte::checkMultibyte($haystack)) {
  680. $check = Multibyte::utf8($haystack);
  681. $found = false;
  682. $haystack = Multibyte::utf8($haystack);
  683. $haystackCount = count($haystack);
  684. $needle = Multibyte::utf8($needle);
  685. $needleCount = count($needle);
  686. $parts = array();
  687. $position = 0;
  688. while (($found === false) && ($position < $haystackCount)) {
  689. if (isset($needle[0]) && $needle[0] === $check[$position]) {
  690. for ($i = 1; $i < $needleCount; $i++) {
  691. if ($needle[$i] !== $check[$position + $i]) {
  692. break;
  693. }
  694. }
  695. if ($i === $needleCount) {
  696. $found = true;
  697. }
  698. }
  699. if (!$found) {
  700. $parts[] = $haystack[$position];
  701. unset($haystack[$position]);
  702. }
  703. $position++;
  704. }
  705. if ($found && $part && !empty($parts)) {
  706. return Multibyte::ascii($parts);
  707. } elseif ($found && !empty($haystack)) {
  708. return Multibyte::ascii($haystack);
  709. }
  710. return false;
  711. }
  712. if (!$php) {
  713. return strstr($haystack, $needle, $part);
  714. }
  715. return strstr($haystack, $needle);
  716. }
  717. /**
  718. * Make a string lowercase
  719. *
  720. * @param string $string The string being lowercased.
  721. * @return string with all alphabetic characters converted to lowercase.
  722. * @access public
  723. * @static
  724. */
  725. function strtolower($string) {
  726. $_this =& Multibyte::getInstance();
  727. $utf8Map = Multibyte::utf8($string);
  728. $length = count($utf8Map);
  729. $lowerCase = array();
  730. $matched = false;
  731. for ($i = 0 ; $i < $length; $i++) {
  732. $char = $utf8Map[$i];
  733. if ($char < 128) {
  734. $str = strtolower(chr($char));
  735. $strlen = strlen($str);
  736. for ($ii = 0 ; $ii < $strlen; $ii++) {
  737. $lower = ord(substr($str, $ii, 1));
  738. }
  739. $lowerCase[] = $lower;
  740. $matched = true;
  741. } else {
  742. $matched = false;
  743. $keys = $_this->__find($char, 'upper');
  744. if (!empty($keys)) {
  745. foreach ($keys as $key => $value) {
  746. if ($keys[$key]['upper'] == $char && count($keys[$key]['lower'][0]) === 1) {
  747. $lowerCase[] = $keys[$key]['lower'][0];
  748. $matched = true;
  749. break 1;
  750. }
  751. }
  752. }
  753. }
  754. if ($matched === false) {
  755. $lowerCase[] = $char;
  756. }
  757. }
  758. return Multibyte::ascii($lowerCase);
  759. }
  760. /**
  761. * Make a string uppercase
  762. *
  763. * @param string $string The string being uppercased.
  764. * @param string $encoding Character encoding name to use. If it is omitted, internal character encoding is used.
  765. * @return string with all alphabetic characters converted to uppercase.
  766. * @access public
  767. * @static
  768. */
  769. function strtoupper($string) {
  770. $_this =& Multibyte::getInstance();
  771. $utf8Map = Multibyte::utf8($string);
  772. $length = count($utf8Map);
  773. $matched = false;
  774. $replaced = array();
  775. $upperCase = array();
  776. for ($i = 0 ; $i < $length; $i++) {
  777. $char = $utf8Map[$i];
  778. if ($char < 128) {
  779. $str = strtoupper(chr($char));
  780. $strlen = strlen($str);
  781. for ($ii = 0 ; $ii < $strlen; $ii++) {
  782. $upper = ord(substr($str, $ii, 1));
  783. }
  784. $upperCase[] = $upper;
  785. $matched = true;
  786. } else {
  787. $matched = false;
  788. $keys = $_this->__find($char);
  789. $keyCount = count($keys);
  790. if (!empty($keys)) {
  791. foreach ($keys as $key => $value) {
  792. $matched = false;
  793. $replace = 0;
  794. if ($length > 1 && count($keys[$key]['lower']) > 1) {
  795. $j = 0;
  796. for ($ii = 0; $ii < count($keys[$key]['lower']); $ii++) {
  797. $nextChar = $utf8Map[$i + $ii];
  798. if (isset($nextChar) && ($nextChar == $keys[$key]['lower'][$j + $ii])) {
  799. $replace++;
  800. }
  801. }
  802. if ($replace == count($keys[$key]['lower'])) {
  803. $upperCase[] = $keys[$key]['upper'];
  804. $replaced = array_merge($replaced, array_values($keys[$key]['lower']));
  805. $matched = true;
  806. break 1;
  807. }
  808. } elseif ($length > 1 && $keyCount > 1) {
  809. $j = 0;
  810. for ($ii = 1; $ii < $keyCount; $ii++) {
  811. $nextChar = $utf8Map[$i + $ii - 1];
  812. if (in_array($nextChar, $keys[$ii]['lower'])) {
  813. for ($jj = 0; $jj < count($keys[$ii]['lower']); $jj++) {
  814. $nextChar = $utf8Map[$i + $jj];
  815. if (isset($nextChar) && ($nextChar == $keys[$ii]['lower'][$j + $jj])) {
  816. $replace++;
  817. }
  818. }
  819. if ($replace == count($keys[$ii]['lower'])) {
  820. $upperCase[] = $keys[$ii]['upper'];
  821. $replaced = array_merge($replaced, array_values($keys[$ii]['lower']));
  822. $matched = true;
  823. break 2;
  824. }
  825. }
  826. }
  827. }
  828. if ($keys[$key]['lower'][0] == $char) {
  829. $upperCase[] = $keys[$key]['upper'];
  830. $matched = true;
  831. break 1;
  832. }
  833. }
  834. }
  835. }
  836. if ($matched === false && !in_array($char, $replaced, true)) {
  837. $upperCase[] = $char;
  838. }
  839. }
  840. return Multibyte::ascii($upperCase);
  841. }
  842. /**
  843. * Count the number of substring occurrences
  844. *
  845. * @param string $haystack The string being checked.
  846. * @param string $needle The string being found.
  847. * @return integer The number of times the $needle substring occurs in the $haystack string.
  848. * @access public
  849. * @static
  850. */
  851. function substrCount($haystack, $needle) {
  852. $count = 0;
  853. $haystack = Multibyte::utf8($haystack);
  854. $haystackCount = count($haystack);
  855. $matches = array_count_values($haystack);
  856. $needle = Multibyte::utf8($needle);
  857. $needleCount = count($needle);
  858. if ($needleCount === 1 && isset($matches[$needle[0]])) {
  859. return $matches[$needle[0]];
  860. }
  861. for ($i = 0; $i < $haystackCount; $i++) {
  862. if (isset($needle[0]) && $needle[0] === $haystack[$i]) {
  863. for ($ii = 1; $ii < $needleCount; $ii++) {
  864. if ($needle[$ii] === $haystack[$i + 1]) {
  865. if ((isset($needle[$ii + 1]) && $haystack[$i + 2]) && $needle[$ii + 1] !== $haystack[$i + 2]) {
  866. $count--;
  867. } else {
  868. $count++;
  869. }
  870. }
  871. }
  872. }
  873. }
  874. return $count;
  875. }
  876. /**
  877. * Get part of string
  878. *
  879. * @param string $string The string being checked.
  880. * @param integer $start The first position used in $string.
  881. * @param integer $length The maximum length of the returned string.
  882. * @return string The portion of $string specified by the $string and $length parameters.
  883. * @access public
  884. * @static
  885. */
  886. function substr($string, $start, $length = null) {
  887. if ($start === 0 && $length === null) {
  888. return $string;
  889. }
  890. $string = Multibyte::utf8($string);
  891. $stringCount = count($string);
  892. for ($i = 1; $i <= $start; $i++) {
  893. unset($string[$i - 1]);
  894. }
  895. if ($length === null || count($string) < $length) {
  896. return Multibyte::ascii($string);
  897. }
  898. $string = array_values($string);
  899. $value = array();
  900. for ($i = 0; $i < $length; $i++) {
  901. $value[] = $string[$i];
  902. }
  903. return Multibyte::ascii($value);
  904. }
  905. /**
  906. * Prepare a string for mail transport, using the provided encoding
  907. *
  908. * @param string $string value to encode
  909. * @param string $charset charset to use for encoding. defaults to UTF-8
  910. * @param string $newline
  911. * @return string
  912. * @access public
  913. * @static
  914. * @TODO: add support for 'Q'('Quoted Printable') encoding
  915. */
  916. function mimeEncode($string, $charset = null, $newline = "\r\n") {
  917. if (!Multibyte::checkMultibyte($string) && strlen($string) < 75) {
  918. return $string;
  919. }
  920. if (empty($charset)) {
  921. $charset = Configure::read('App.encoding');
  922. }
  923. $charset = strtoupper($charset);
  924. $start = '=?' . $charset . '?B?';
  925. $end = '?=';
  926. $spacer = $end . $newline . ' ' . $start;
  927. $length = 75 - strlen($start) - strlen($end);
  928. $length = $length - ($length % 4);
  929. if ($charset == 'UTF-8') {
  930. $parts = array();
  931. $maxchars = floor(($length * 3) / 4);
  932. while (strlen($string) > $maxchars) {
  933. $i = $maxchars;
  934. $test = ord($string[$i]);
  935. while ($test >= 128 && $test <= 191) {
  936. $i--;
  937. $test = ord($string[$i]);
  938. }
  939. $parts[] = base64_encode(substr($string, 0, $i));
  940. $string = substr($string, $i);
  941. }
  942. $parts[] = base64_encode($string);
  943. $string = implode($spacer, $parts);
  944. } else {
  945. $string = chunk_split(base64_encode($string), $length, $spacer);
  946. $string = preg_replace('/' . preg_quote($spacer) . '$/', '', $string);
  947. }
  948. return $start . $string . $end;
  949. }
  950. /**
  951. * Return the Code points range for Unicode characters
  952. *
  953. * @param interger $decimal
  954. * @return string
  955. * @access private
  956. */
  957. function __codepoint ($decimal) {
  958. if ($decimal > 128 && $decimal < 256) {
  959. $return = '0080_00ff'; // Latin-1 Supplement
  960. } elseif ($decimal < 384) {
  961. $return = '0100_017f'; // Latin Extended-A
  962. } elseif ($decimal < 592) {
  963. $return = '0180_024F'; // Latin Extended-B
  964. } elseif ($decimal < 688) {
  965. $return = '0250_02af'; // IPA Extensions
  966. } elseif ($decimal >= 880 && $decimal < 1024) {
  967. $return = '0370_03ff'; // Greek and Coptic
  968. } elseif ($decimal < 1280) {
  969. $return = '0400_04ff'; // Cyrillic
  970. } elseif ($decimal < 1328) {
  971. $return = '0500_052f'; // Cyrillic Supplement
  972. } elseif ($decimal < 1424) {
  973. $return = '0530_058f'; // Armenian
  974. } elseif ($decimal >= 7680 && $decimal < 7936) {
  975. $return = '1e00_1eff'; // Latin Extended Additional
  976. } elseif ($decimal < 8192) {
  977. $return = '1f00_1fff'; // Greek Extended
  978. } elseif ($decimal >= 8448 && $decimal < 8528) {
  979. $return = '2100_214f'; // Letterlike Symbols
  980. } elseif ($decimal < 8592) {
  981. $return = '2150_218f'; // Number Forms
  982. } elseif ($decimal >= 9312 && $decimal < 9472) {
  983. $return = '2460_24ff'; // Enclosed Alphanumerics
  984. } elseif ($decimal >= 11264 && $decimal < 11360) {
  985. $return = '2c00_2c5f'; // Glagolitic
  986. } elseif ($decimal < 11392) {
  987. $return = '2c60_2c7f'; // Latin Extended-C
  988. } elseif ($decimal < 11520) {
  989. $return = '2c80_2cff'; // Coptic
  990. } elseif ($decimal >= 65280 && $decimal < 65520) {
  991. $return = 'ff00_ffef'; // Halfwidth and Fullwidth Forms
  992. } else {
  993. $return = false;
  994. }
  995. $this->__codeRange[$decimal] = $return;
  996. return $return;
  997. }
  998. /**
  999. * Find the related code folding values for $char
  1000. *
  1001. * @param integer $char decimal value of character
  1002. * @param string $type
  1003. * @return array
  1004. * @access private
  1005. */
  1006. function __find($char, $type = 'lower') {
  1007. $value = false;
  1008. $found = array();
  1009. if (!isset($this->__codeRange[$char])) {
  1010. $range = $this->__codepoint($char);
  1011. if ($range === false) {
  1012. return null;
  1013. }
  1014. Configure::load('unicode' . DS . 'casefolding' . DS . $range);
  1015. $this->__caseFold[$range] = Configure::read($range);
  1016. Configure::delete($range);
  1017. }
  1018. if (!$this->__codeRange[$char]) {
  1019. return null;
  1020. }
  1021. $this->__table = $this->__codeRange[$char];
  1022. $count = count($this->__caseFold[$this->__table]);
  1023. for ($i = 0; $i < $count; $i++) {
  1024. if ($type === 'lower' && $this->__caseFold[$this->__table][$i][$type][0] === $char) {
  1025. $found[] = $this->__caseFold[$this->__table][$i];
  1026. } elseif ($type === 'upper' && $this->__caseFold[$this->__table][$i][$type] === $char) {
  1027. $found[] = $this->__caseFold[$this->__table][$i];
  1028. }
  1029. }
  1030. return $found;
  1031. }
  1032. /**
  1033. * Check the $string for multibyte characters
  1034. * @param string $string value to test
  1035. * @return boolean
  1036. * @access public
  1037. * @static
  1038. */
  1039. function checkMultibyte($string) {
  1040. $length = strlen($string);
  1041. for ($i = 0; $i < $length; $i++ ) {
  1042. $value = ord(($string[$i]));
  1043. if ($value > 128) {
  1044. return true;
  1045. }
  1046. }
  1047. return false;
  1048. }
  1049. }
  1050. ?>