PageRenderTime 105ms CodeModel.GetById 12ms RepoModel.GetById 1ms app.codeStats 0ms

/system/classes/multibyte.php

https://github.com/HabariMag/habarimag-old
PHP | 785 lines | 353 code | 142 blank | 290 comment | 87 complexity | 39527fcfc81ab99c85b7363e0425946c MD5 | raw file
Possible License(s): Apache-2.0
  1. <?php
  2. /*
  3. * @package Habari
  4. *
  5. */
  6. /*
  7. * Habari MultiByte Class
  8. *
  9. * Provides multibyte character set services,
  10. * a necessity since all of Habari's internal string
  11. * manipulations are done in UTF-8. Currently
  12. * this class is a wrapper around mbstring functions.
  13. *
  14. */
  15. class MultiByte
  16. {
  17. const USE_MBSTRING = 1;
  18. /*
  19. * @var $hab_enc String holding the current encoding the class is using
  20. */
  21. static $hab_enc = 'UTF-8';
  22. /*
  23. * @var $use_library Integer denoting the current multibyte
  24. * library the class is using
  25. */
  26. private static $use_library = self::USE_MBSTRING;
  27. /**
  28. * function __construct
  29. *
  30. * An empty constructor since all functions are static
  31. */
  32. private function __construct()
  33. {
  34. }
  35. /*
  36. * function hab_encoding
  37. *
  38. * Sets and returns the internal encoding.
  39. *
  40. * @param $use_enc string. The encoding to be used
  41. *
  42. * @return string. If $enc is null, returns the current
  43. * encoding. If $enc is not null, returns the old encoding
  44. */
  45. public static function hab_encoding( $use_enc = null )
  46. {
  47. if ( $use_enc === null ) {
  48. return self::$hab_enc;
  49. }
  50. else {
  51. $old_enc = self::$hab_enc;
  52. self::$hab_enc = $use_enc;
  53. return $old_enc;
  54. }
  55. }
  56. /*
  57. * function library
  58. *
  59. * Sets and returns the multibyte library being used internally
  60. *
  61. * @param $int The new library to use. One of the self::USE_* constants, null to simply return, or false to disable and use native non-multibyte-safe PHP methods.
  62. *
  63. * @return mixed If $new_library is null, returns the current library
  64. * being used. If $new_library has a valid value, returns the old library,
  65. * else returns false.
  66. */
  67. public static function library( $new_library = null )
  68. {
  69. if ( $new_library === null ) {
  70. return self::$use_library;
  71. }
  72. else if ( $new_library === self::USE_MBSTRING ) {
  73. $old_library = self::$use_library;
  74. self::$use_library = $new_library;
  75. return $old_library;
  76. }
  77. else if ( $new_library === false ) {
  78. $old_library = self::$use_library;
  79. self::$use_library = $new_library;
  80. return $old_library;
  81. }
  82. else {
  83. return false;
  84. }
  85. }
  86. /*
  87. * function convert_encoding
  88. *
  89. * Converts a string's encoding to a new encoding
  90. *
  91. * @param $str string. The string who's encoding is being changed.
  92. * @param $use_enc string. The encoding to convert to. If not set,
  93. * the internal encoding will be used.
  94. * @param $from_enc string. encoding before conversion. If not set,
  95. * encoding is detected automatically.
  96. *
  97. * @return mixed The source string in the new encoding or boolean false.
  98. */
  99. public static function convert_encoding( $str, $use_enc = null, $from_enc = null )
  100. {
  101. $ret = false;
  102. $enc = self::$hab_enc;
  103. if ( $use_enc !== null ) {
  104. $enc = $use_enc;
  105. }
  106. if ( self::$use_library == self::USE_MBSTRING ) {
  107. if ( $from_enc == null ) {
  108. $from_enc = MultiByte::detect_encoding( $str );
  109. }
  110. $ret = mb_convert_encoding( $str, $enc, $from_enc );
  111. }
  112. return $ret;
  113. }
  114. /*
  115. * function detect_encoding
  116. *
  117. * Detects the encoding being used for a string
  118. *
  119. * @param $str string. The string who's encoding is being detected
  120. *
  121. * @return mixed The source string's detected encoding, or boolean false.
  122. */
  123. public static function detect_encoding( $str )
  124. {
  125. $enc = false;
  126. if ( self::$use_library == self::USE_MBSTRING ) {
  127. // get original detection order
  128. $old_order = mb_detect_order();
  129. // make sure ISO-8859-1 is included
  130. mb_detect_order( array( 'ASCII', 'JIS', 'UTF-8', 'ISO-8859-1', 'EUC-JP', 'SJIS' ) );
  131. //detect the encoding . the detected encoding may be wrong, but it's better than guessing
  132. $enc = mb_detect_encoding( $str );
  133. // reset detection order
  134. mb_detect_order( $old_order );
  135. }
  136. return $enc;
  137. }
  138. /*
  139. * function substr
  140. *
  141. * Get a section of a string
  142. *
  143. * @param $str string. The original string
  144. * @param $begin. integer. The beginning character of the string to return.
  145. * @param $len integer. How long the returned string should be. If $len is
  146. * not set, the section of the string from $begin to the end of the string is
  147. * returned.
  148. * @param $use_enc string. The encoding to be used. If not set,
  149. * the internal encoding will be used.
  150. *
  151. * @return mixed The section of the source string requested in the encoding requested or false.
  152. * If $len is not set, returns substring from $begin to end of string.
  153. *
  154. */
  155. public static function substr( $str, $begin, $len = null, $use_enc = null )
  156. {
  157. $ret = false;
  158. $enc = self::$hab_enc;
  159. if ( $use_enc !== null ) {
  160. $enc = $use_enc;
  161. }
  162. if ( self::$use_library == self::USE_MBSTRING ) {
  163. if ( ! isset( $len ) ) {
  164. $len = MultiByte::strlen( $str ) - $begin;
  165. }
  166. $ret = mb_substr( $str, $begin, $len, $enc );
  167. }
  168. else {
  169. $ret = substr( $str, $begin, $len );
  170. }
  171. return $ret;
  172. }
  173. /*
  174. * function strlen
  175. *
  176. * Gets the length of a string in characters
  177. *
  178. * @param $str string. The string who's length is being returned.
  179. * @param $use_enc string. The encoding to be used. If not set,
  180. * the internal encoding will be used.
  181. *
  182. * @return integer. The length in characters of the string, or the length in bytes if a valid
  183. * multibyte library isn't loaded.
  184. */
  185. public static function strlen( $str, $use_enc = null )
  186. {
  187. $len = 0;
  188. $enc = self::$hab_enc;
  189. if ( $use_enc !== null ) {
  190. $enc = $use_enc;
  191. }
  192. if ( self::$use_library == self::USE_MBSTRING ) {
  193. $len = mb_strlen( $str, $enc );
  194. }
  195. else {
  196. $len = strlen( $str );
  197. }
  198. return $len;
  199. }
  200. /*
  201. * function strpos
  202. *
  203. * Find position of first occurrence of string in a string
  204. *
  205. * @param $haysack string. The string being checked.
  206. * @param $needle. string. The position counted from the beginning of haystack .
  207. * @param $offset integer. The search offset. If it is not specified, 0 is used.
  208. * @param $use_enc string. The encoding to be used. If not set,
  209. * the internal encoding will be used.
  210. *
  211. * @return mixed The section of the source string requested in the encoding requested or false.
  212. * If $len is not set, returns substring from $begin to end of string.
  213. *
  214. */
  215. public static function strpos( $haysack, $needle, $offset = 0, $use_enc = null )
  216. {
  217. $enc = self::$hab_enc;
  218. if ( $use_enc !== null ) {
  219. $enc = $use_enc;
  220. }
  221. if ( self::$use_library == self::USE_MBSTRING ) {
  222. $ret = mb_strpos( $haysack, $needle, $offset, $enc );
  223. }
  224. else {
  225. $ret = strpos( $haysack, $needle, $offset );
  226. }
  227. return $ret;
  228. }
  229. /*
  230. * function stripos
  231. *
  232. * Find position of first occurrence of string in a string. Case insensitive.
  233. *
  234. * @param $haysack string. The string being checked.
  235. * @param $needle. string. The position counted from the beginning of haystack .
  236. * @param $offset integer. The search offset. If it is not specified, 0 is used.
  237. * @param $use_enc string. The encoding to be used. If not set,
  238. * the internal encoding will be used.
  239. *
  240. * @return mixed The section of the source string requested in the encoding requested or false.
  241. * If $len is not set, returns substring from $begin to end of string.
  242. *
  243. */
  244. public static function stripos( $haysack, $needle, $offset = 0, $use_enc = null )
  245. {
  246. $enc = self::$hab_enc;
  247. if ( $use_enc !== null ) {
  248. $enc = $use_enc;
  249. }
  250. if ( self::$use_library == self::USE_MBSTRING ) {
  251. $ret = mb_stripos( $haysack, $needle, $offset, $enc );
  252. }
  253. else {
  254. $ret = stripos( $haysack, $needle, $offset );
  255. }
  256. return $ret;
  257. }
  258. /*
  259. * function strrpos
  260. *
  261. * Find position of last occurrence of string in a string.
  262. *
  263. * @param $haysack string. The string being checked.
  264. * @param $needle. string. The position counted from the beginning of haystack .
  265. * @param $offset integer. The search offset. If it is not specified, 0 is used.
  266. * @param $use_enc string. The encoding to be used. If not set,
  267. * the internal encoding will be used.
  268. *
  269. * @return mixed The section of the source string requested in the encoding requested or false.
  270. * If $len is not set, returns substring from $begin to end of string.
  271. *
  272. */
  273. public static function strrpos( $haysack, $needle, $offset = 0, $use_enc = null )
  274. {
  275. $enc = self::$hab_enc;
  276. if ( $use_enc !== null ) {
  277. $enc = $use_enc;
  278. }
  279. if ( self::$use_library == self::USE_MBSTRING ) {
  280. $ret = mb_strrpos( $haysack, $needle, $offset, $enc );
  281. }
  282. else {
  283. $ret = strrpos( $haysack, $needle, $offset );
  284. }
  285. return $ret;
  286. }
  287. /*
  288. * function strripos
  289. *
  290. * Find position of last occurrence of string in a string. Case insensitive.
  291. *
  292. * @param $haysack string. The string being checked.
  293. * @param $needle. string. The position counted from the beginning of haystack .
  294. * @param $offset integer. The search offset. If it is not specified, 0 is used.
  295. * @param $use_enc string. The encoding to be used. If not set,
  296. * the internal encoding will be used.
  297. *
  298. * @return mixed The section of the source string requested in the encoding requested or false.
  299. * If $len is not set, returns substring from $begin to end of string.
  300. *
  301. */
  302. public static function strripos( $haysack, $needle, $offset = 0, $use_enc = null )
  303. {
  304. $enc = self::$hab_enc;
  305. if ( $use_enc !== null ) {
  306. $enc = $use_enc;
  307. }
  308. if ( self::$use_library == self::USE_MBSTRING ) {
  309. $ret = mb_strripos( $haysack, $needle, $offset, $enc );
  310. }
  311. else {
  312. $ret = strripos( $haysack, $needle, $offset );
  313. }
  314. return $ret;
  315. }
  316. /*
  317. * function strtolower
  318. *
  319. * Converts a multibyte string to lowercase. If a valid multibyte library
  320. * isn't loaded, strtolower() will be used, which can lead to unexpected results.
  321. *
  322. * @param $str string. The string to lowercase
  323. * @param $use_enc string. The encoding to be used. If not set,
  324. * the internal encoding will be used.
  325. *
  326. * @return string. The lowercased string.
  327. */
  328. public static function strtolower( $str, $use_enc = null )
  329. {
  330. $enc = self::$hab_enc;
  331. if ( $use_enc !== null ) {
  332. $enc = $use_enc;
  333. }
  334. if ( self::$use_library == self::USE_MBSTRING ) {
  335. $ret = mb_strtolower( $str, $enc );
  336. }
  337. else {
  338. $ret = strtolower( $str );
  339. }
  340. return $ret;
  341. }
  342. /*
  343. * function strtoupper
  344. *
  345. * Converts a multibyte string to uppercase. If a valid multibyte library
  346. * isn't loaded, strtoupper() will be used, which can lead to unexpected results.
  347. *
  348. * @param $str string. The string to uppercase
  349. * @param $use_enc string. The encoding to be used. If not set,
  350. * the internal encoding will be used.
  351. *
  352. * @return string. The uppercased string.
  353. */
  354. public static function strtoupper( $str, $use_enc = null )
  355. {
  356. $enc = self::$hab_enc;
  357. if ( $use_enc !== null ) {
  358. $enc = $use_enc;
  359. }
  360. if ( self::$use_library == self::USE_MBSTRING ) {
  361. $ret = mb_strtoupper( $str, $enc );
  362. }
  363. else {
  364. $ret = strtoupper( $str );
  365. }
  366. return $ret;
  367. }
  368. /**
  369. * Determines if the passed string is valid character data (according to mbstring)
  370. *
  371. * @param string $str the string to check
  372. * @return bool
  373. */
  374. public static function valid_data( $str )
  375. {
  376. return mb_check_encoding( $str, self::$hab_enc );
  377. }
  378. /**
  379. * Makes a string's first character uppercase
  380. *
  381. * @see http://php.net/ucfirst
  382. * @param string $str The string to capitalize.
  383. * @param string $use_enc The encoding to be used. If null, the internal encoding will be used.
  384. * @return string The capitalized string.
  385. */
  386. public static function ucfirst ( $str, $use_enc = null )
  387. {
  388. $enc = self::$hab_enc;
  389. if ( $use_enc !== null ) {
  390. $enc = $use_enc;
  391. }
  392. if ( self::$use_library == self::USE_MBSTRING ) {
  393. // get the first character
  394. $first = self::substr( $str, 0, 1, $enc );
  395. // uppercase it
  396. $first = self::strtoupper( $first, $enc );
  397. // get the rest of the characters
  398. $last = self::substr( $str, 1, null, $enc );
  399. // put them back together
  400. $ret = $first . $last;
  401. }
  402. else {
  403. $ret = ucfirst( $str );
  404. }
  405. return $ret;
  406. }
  407. /**
  408. * Makes a string's first character lowercase
  409. *
  410. * @see http://php.net/ucfirst
  411. * @param string $str The string to lowercase.
  412. * @param string $use_enc The encoding to be used. If null, the internal encoding will be used.
  413. * @return string The lowercased string.
  414. */
  415. public static function lcfirst ( $str, $use_enc = null )
  416. {
  417. $enc = self::$hab_enc;
  418. if ( $use_enc !== null ) {
  419. $enc = $use_enc;
  420. }
  421. if ( self::$use_library == self::USE_MBSTRING ) {
  422. // get the first character
  423. $first = self::substr( $str, 0, 1, $enc );
  424. // lowercase it
  425. $first = self::strtolower( $first, $enc );
  426. // get the rest of the characters
  427. $last = self::substr( $str, 1, null, $enc );
  428. // put them back together
  429. $ret = $first . $last;
  430. }
  431. else {
  432. // lcfirst() is php 5.3+ so we'll emulate it
  433. $first = substr( $str, 0, 1 );
  434. $first = strtolower( $first );
  435. $last = substr( $str, 1 );
  436. $ret = $first . $last;
  437. }
  438. return $ret;
  439. }
  440. /**
  441. * Replace all occurrences of the search string with the replacement string.
  442. *
  443. * @see http://php.net/str_replace
  444. * @param mixed $search A string or an array of strings to search for.
  445. * @param mixed $replace A string or an array of strings to replace search values with.
  446. * @param string $subject The string to perform the search and replace on.
  447. * @param int $count If passed, this value will hold the number of matched and replaced needles.
  448. * @param string $use_enc The encoding to be used. If null, the internal encoding will be used.
  449. * @return string The subject with replaced values.
  450. */
  451. public static function str_replace ( $search, $replace, $subject, &$count = 0, $use_enc = null )
  452. {
  453. $enc = self::$hab_enc;
  454. if ( $use_enc !== null ) {
  455. $enc = $use_enc;
  456. }
  457. if ( self::$use_library == self::USE_MBSTRING ) {
  458. // if search is an array and replace is not, we need to make replace an array and pad it to the same number of values as search
  459. if ( is_array( $search ) && !is_array( $replace ) ) {
  460. $replace = array_fill( 0, count( $search ), $replace );
  461. }
  462. // if search is an array and replace is as well, we need to make sure replace has the same number of values - pad it with empty strings
  463. if ( is_array( $search ) && is_array( $replace ) ) {
  464. $replace = array_pad( $replace, count( $search ), '' );
  465. }
  466. // if search is not an array, make it one
  467. if ( !is_array( $search ) ) {
  468. $search = array( $search );
  469. }
  470. // if replace is not an array, make it one
  471. if ( !is_array( $replace ) ) {
  472. $replace = array( $replace );
  473. }
  474. // if subject is an array, recursively call ourselves on each element of it
  475. if ( is_array( $subject ) ) {
  476. foreach ( $subject as $k => $v ) {
  477. $subject[ $k ] = self::str_replace( $search, $replace, $v, $count, $use_enc );
  478. }
  479. return $subject;
  480. }
  481. // now we've got an array of characters and arrays of search / replace characters with the same values - loop and replace them!
  482. $search_count = count( $search ); // we modify $search, so we can't include it in the condition next
  483. for ( $i = 0; $i < $search_count; $i++ ) {
  484. // the values we'll match
  485. $s = array_shift( $search );
  486. $r = array_shift( $replace );
  487. // to avoid an infinite loop if you're replacing with a value that contains the subject we get the position of each instance first
  488. $positions = array();
  489. $offset = 0;
  490. while ( self::strpos( $subject, $s, $offset, $enc ) !== false ) {
  491. // get the position
  492. $pos = self::strpos( $subject, $s, $offset, $enc );
  493. // add it to the list
  494. $positions[] = $pos;
  495. // and set the offset to skip over this value
  496. $offset = $pos + self::strlen( $s, $enc );
  497. }
  498. // if we pick through from the beginning, our positions will change if the replacement string is longer
  499. // instead, we pick through from the last place
  500. $positions = array_reverse( $positions );
  501. // now that we've got the position of each one, just loop through that and replace them
  502. foreach ( $positions as $pos ) {
  503. // pull out the part before the string
  504. $before = self::substr( $subject, 0, $pos, $enc );
  505. // pull out the part after
  506. $after = self::substr( $subject, $pos + self::strlen( $s, $enc ), null, $enc );
  507. // now we have the string in two parts without the string we're searching for
  508. // put it back together with the replacement
  509. $subject = $before . $r . $after;
  510. // increment our count, a replacement was made
  511. $count++;
  512. }
  513. }
  514. }
  515. else {
  516. $subject = str_replace( $search, $replace, $subject, $count );
  517. }
  518. return $subject;
  519. }
  520. /**
  521. * Replace all occurrences of the search string with the replacement string.
  522. *
  523. * @see http://php.net/str_ireplace
  524. * @param mixed $search A string or an array of strings to search for.
  525. * @param mixed $replace A string or an array of strings to replace search values with.
  526. * @param string $subject The string to perform the search and replace on.
  527. * @param int $count If passed, this value will hold the number of matched and replaced needles.
  528. * @param string $use_enc The encoding to be used. If null, the internal encoding will be used.
  529. * @return string The subject with replaced values.
  530. */
  531. public static function str_ireplace( $search, $replace, $subject, &$count = 0, $use_enc = null )
  532. {
  533. $enc = self::$hab_enc;
  534. if ( $use_enc !== null ) {
  535. $enc = $use_enc;
  536. }
  537. if ( self::$use_library == self::USE_MBSTRING ) {
  538. // if search is an array and replace is not, we need to make replace an array and pad it to the same number of values as search
  539. if ( is_array( $search ) && !is_array( $replace ) ) {
  540. $replace = array_fill( 0, count( $search ), $replace );
  541. }
  542. // if search is an array and replace is as well, we need to make sure replace has the same number of values - pad it with empty strings
  543. if ( is_array( $search ) && is_array( $replace ) ) {
  544. $replace = array_pad( $replace, count( $search ), '' );
  545. }
  546. // if search is not an array, make it one
  547. if ( !is_array( $search ) ) {
  548. $search = array( $search );
  549. }
  550. // if replace is not an array, make it one
  551. if ( !is_array( $replace ) ) {
  552. $replace = array( $replace );
  553. }
  554. // if subject is an array, recursively call ourselves on each element of it
  555. if ( is_array( $subject ) ) {
  556. foreach ( $subject as $k => $v ) {
  557. $subject[ $k ] = self::str_ireplace( $search, $replace, $v, $count, $use_enc );
  558. }
  559. return $subject;
  560. }
  561. $search_count = count( $search ); // we modify $search, so we can't include it in the condition next
  562. for ( $i = 0; $i < $search_count; $i++ ) {
  563. // the values we'll match
  564. $s = array_shift( $search );
  565. $r = array_shift( $replace );
  566. // to avoid an infinite loop if you're replacing with a value that contains the subject we get the position of each instance first
  567. $positions = array();
  568. $offset = 0;
  569. while ( self::stripos( $subject, $s, $offset, $enc ) !== false ) {
  570. // get the position
  571. $pos = self::stripos( $subject, $s, $offset, $enc );
  572. // add it to the list
  573. $positions[] = $pos;
  574. // and set the offset to skip over this value
  575. $offset = $pos + self::strlen( $s, $enc );
  576. }
  577. // if we pick through from the beginning, our positions will change if the replacement string is longer
  578. // instead, we pick through from the last place
  579. $positions = array_reverse( $positions );
  580. // now that we've got the position of each one, just loop through that and replace them
  581. foreach ( $positions as $pos ) {
  582. // pull out the part before the string
  583. $before = self::substr( $subject, 0, $pos, $enc );
  584. // pull out the part after
  585. $after = self::substr( $subject, $pos + self::strlen( $s, $enc ), null, $enc );
  586. // now we have the string in two parts without the string we're searching for
  587. // put it back together with the replacement
  588. $subject = $before . $r . $after;
  589. // increment our count, a replacement was made
  590. $count++;
  591. }
  592. }
  593. }
  594. else {
  595. $subject = str_ireplace( $search, $replace, $subject, $count );
  596. }
  597. return $subject;
  598. }
  599. /**
  600. * Uppercase the first character of each word in a string.
  601. *
  602. * From php.net/ucwords:
  603. * The definition of a word is any string of characters that is immediately after a whitespace
  604. * (These are: space, form-feed, newline, carriage return, horizontal tab, and vertical tab).
  605. *
  606. * @see http://php.net/ucwords
  607. * @param string $str The input string.
  608. * @param string $use_enc The encoding to be used. If null, the internal encoding will be used.
  609. * @return string The modified string.
  610. */
  611. public static function ucwords ( $str, $use_enc = null )
  612. {
  613. $enc = self::$hab_enc;
  614. if ( $use_enc !== null ) {
  615. $enc = $use_enc;
  616. }
  617. if ( self::$use_library == self::USE_MBSTRING ) {
  618. $delimiters = array(
  619. chr( 32 ), // space
  620. chr( 12 ), // form-feed
  621. chr( 10 ), // newline
  622. chr( 13 ), // carriage return
  623. chr( 9 ), // horizontal tab
  624. chr( 11 ), // vertical tab
  625. );
  626. // loop through the delimiters and explode the string by each one
  627. foreach ( $delimiters as $d ) {
  628. $pieces = explode( $d, $str );
  629. for ( $i = 0; $i < count( $pieces ); $i++ ) {
  630. // capitalize each word
  631. $pieces[ $i ] = self::ucfirst( $pieces[ $i ], $enc );
  632. }
  633. // put the string back together
  634. $str = implode( $d, $pieces );
  635. }
  636. }
  637. else {
  638. $str = ucwords( $str );
  639. }
  640. return $str;
  641. }
  642. }
  643. ?>