PageRenderTime 48ms CodeModel.GetById 20ms RepoModel.GetById 0ms app.codeStats 0ms

/lib/classes/text.php

https://github.com/gwamgerald/moodle-1
PHP | 716 lines | 428 code | 68 blank | 220 comment | 75 complexity | 1893fbe036894656c82fe6b7675c65cf MD5 | raw file
Possible License(s): GPL-3.0, LGPL-2.1, BSD-3-Clause, Apache-2.0
  1. <?php
  2. // This file is part of Moodle - http://moodle.org/
  3. //
  4. // Moodle is free software: you can redistribute it and/or modify
  5. // it under the terms of the GNU General Public License as published by
  6. // the Free Software Foundation, either version 3 of the License, or
  7. // (at your option) any later version.
  8. //
  9. // Moodle is distributed in the hope that it will be useful,
  10. // but WITHOUT ANY WARRANTY; without even the implied warranty of
  11. // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  12. // GNU General Public License for more details.
  13. //
  14. // You should have received a copy of the GNU General Public License
  15. // along with Moodle. If not, see <http://www.gnu.org/licenses/>.
  16. /**
  17. * Defines string apis
  18. *
  19. * @package core
  20. * @copyright (C) 2001-3001 Eloy Lafuente (stronk7) {@link http://contiento.com}
  21. * @license http://www.gnu.org/copyleft/gpl.html GNU GPL v3 or later
  22. */
  23. defined('MOODLE_INTERNAL') || die();
  24. /**
  25. * defines string api's for manipulating strings
  26. *
  27. * This class is used to manipulate strings under Moodle 1.6 an later. As
  28. * utf-8 text become mandatory a pool of safe functions under this encoding
  29. * become necessary. The name of the methods is exactly the
  30. * same than their PHP originals.
  31. *
  32. * A big part of this class acts as a wrapper over the Typo3 charset library,
  33. * really a cool group of utilities to handle texts and encoding conversion.
  34. *
  35. * Take a look to its own copyright and license details.
  36. *
  37. * IMPORTANT Note: Typo3 libraries always expect lowercase charsets to use 100%
  38. * its capabilities so, don't forget to make the conversion
  39. * from every wrapper function!
  40. *
  41. * @package core
  42. * @category string
  43. * @copyright 1999 onwards Martin Dougiamas {@link http://moodle.com}
  44. * @license http://www.gnu.org/copyleft/gpl.html GNU GPL v3 or later
  45. */
  46. class core_text {
  47. /**
  48. * Return t3lib helper class, which is used for conversion between charsets
  49. *
  50. * @param bool $reset
  51. * @return t3lib_cs
  52. */
  53. protected static function typo3($reset = false) {
  54. static $typo3cs = null;
  55. if ($reset) {
  56. $typo3cs = null;
  57. return null;
  58. }
  59. if (isset($typo3cs)) {
  60. return $typo3cs;
  61. }
  62. global $CFG;
  63. // Required files
  64. require_once($CFG->libdir.'/typo3/class.t3lib_cs.php');
  65. require_once($CFG->libdir.'/typo3/class.t3lib_div.php');
  66. require_once($CFG->libdir.'/typo3/interface.t3lib_singleton.php');
  67. require_once($CFG->libdir.'/typo3/class.t3lib_l10n_locales.php');
  68. // do not use mbstring or recode because it may return invalid results in some corner cases
  69. $GLOBALS['TYPO3_CONF_VARS']['SYS']['t3lib_cs_convMethod'] = 'iconv';
  70. $GLOBALS['TYPO3_CONF_VARS']['SYS']['t3lib_cs_utils'] = 'iconv';
  71. // Tell Typo3 we are curl enabled always (mandatory since 2.0)
  72. $GLOBALS['TYPO3_CONF_VARS']['SYS']['curlUse'] = '1';
  73. // And this directory must exist to allow Typo to cache conversion
  74. // tables when using internal functions
  75. make_temp_directory('typo3temp/cs');
  76. // Make sure typo is using our dir permissions
  77. $GLOBALS['TYPO3_CONF_VARS']['BE']['folderCreateMask'] = decoct($CFG->directorypermissions);
  78. // Default mask for Typo
  79. $GLOBALS['TYPO3_CONF_VARS']['BE']['fileCreateMask'] = $CFG->directorypermissions;
  80. // This full path constants must be defined too, transforming backslashes
  81. // to forward slashed because Typo3 requires it.
  82. if (!defined('PATH_t3lib')) {
  83. define('PATH_t3lib', str_replace('\\','/',$CFG->libdir.'/typo3/'));
  84. define('PATH_typo3', str_replace('\\','/',$CFG->libdir.'/typo3/'));
  85. define('PATH_site', str_replace('\\','/',$CFG->tempdir.'/'));
  86. define('TYPO3_OS', stristr(PHP_OS,'win')&&!stristr(PHP_OS,'darwin')?'WIN':'');
  87. }
  88. $typo3cs = new t3lib_cs();
  89. return $typo3cs;
  90. }
  91. /**
  92. * Reset internal textlib caches.
  93. * @static
  94. */
  95. public static function reset_caches() {
  96. self::typo3(true);
  97. }
  98. /**
  99. * Standardise charset name
  100. *
  101. * Please note it does not mean the returned charset is actually supported.
  102. *
  103. * @static
  104. * @param string $charset raw charset name
  105. * @return string normalised lowercase charset name
  106. */
  107. public static function parse_charset($charset) {
  108. $charset = strtolower($charset);
  109. // shortcuts so that we do not have to load typo3 on every page
  110. if ($charset === 'utf8' or $charset === 'utf-8') {
  111. return 'utf-8';
  112. }
  113. if (preg_match('/^(cp|win|windows)-?(12[0-9]{2})$/', $charset, $matches)) {
  114. return 'windows-'.$matches[2];
  115. }
  116. if (preg_match('/^iso-8859-[0-9]+$/', $charset, $matches)) {
  117. return $charset;
  118. }
  119. if ($charset === 'euc-jp') {
  120. return 'euc-jp';
  121. }
  122. if ($charset === 'iso-2022-jp') {
  123. return 'iso-2022-jp';
  124. }
  125. if ($charset === 'shift-jis' or $charset === 'shift_jis') {
  126. return 'shift_jis';
  127. }
  128. if ($charset === 'gb2312') {
  129. return 'gb2312';
  130. }
  131. if ($charset === 'gb18030') {
  132. return 'gb18030';
  133. }
  134. // fallback to typo3
  135. return self::typo3()->parse_charset($charset);
  136. }
  137. /**
  138. * Converts the text between different encodings. It uses iconv extension with //TRANSLIT parameter,
  139. * falls back to typo3. If both source and target are utf-8 it tries to fix invalid characters only.
  140. *
  141. * @param string $text
  142. * @param string $fromCS source encoding
  143. * @param string $toCS result encoding
  144. * @return string|bool converted string or false on error
  145. */
  146. public static function convert($text, $fromCS, $toCS='utf-8') {
  147. $fromCS = self::parse_charset($fromCS);
  148. $toCS = self::parse_charset($toCS);
  149. $text = (string)$text; // we can work only with strings
  150. if ($text === '') {
  151. return '';
  152. }
  153. if ($fromCS === 'utf-8') {
  154. $text = fix_utf8($text);
  155. if ($toCS === 'utf-8') {
  156. return $text;
  157. }
  158. }
  159. if ($toCS === 'ascii') {
  160. // Try to normalize the conversion a bit.
  161. $text = self::specialtoascii($text, $fromCS);
  162. }
  163. // Prevent any error notices, do not use //IGNORE so that we get
  164. // consistent result from Typo3 if iconv fails.
  165. $result = @iconv($fromCS, $toCS.'//TRANSLIT', $text);
  166. if ($result === false or $result === '') {
  167. // note: iconv is prone to return empty string when invalid char encountered, or false if encoding unsupported
  168. $oldlevel = error_reporting(E_PARSE);
  169. $result = self::typo3()->conv((string)$text, $fromCS, $toCS);
  170. error_reporting($oldlevel);
  171. }
  172. return $result;
  173. }
  174. /**
  175. * Multibyte safe substr() function, uses mbstring or iconv for UTF-8, falls back to typo3.
  176. *
  177. * @param string $text string to truncate
  178. * @param int $start negative value means from end
  179. * @param int $len maximum length of characters beginning from start
  180. * @param string $charset encoding of the text
  181. * @return string portion of string specified by the $start and $len
  182. */
  183. public static function substr($text, $start, $len=null, $charset='utf-8') {
  184. $charset = self::parse_charset($charset);
  185. if ($charset === 'utf-8') {
  186. if (function_exists('mb_substr')) {
  187. // this is much faster than iconv - see MDL-31142
  188. if ($len === null) {
  189. $oldcharset = mb_internal_encoding();
  190. mb_internal_encoding('UTF-8');
  191. $result = mb_substr($text, $start);
  192. mb_internal_encoding($oldcharset);
  193. return $result;
  194. } else {
  195. return mb_substr($text, $start, $len, 'UTF-8');
  196. }
  197. } else {
  198. if ($len === null) {
  199. $len = iconv_strlen($text, 'UTF-8');
  200. }
  201. return iconv_substr($text, $start, $len, 'UTF-8');
  202. }
  203. }
  204. $oldlevel = error_reporting(E_PARSE);
  205. if ($len === null) {
  206. $result = self::typo3()->substr($charset, (string)$text, $start);
  207. } else {
  208. $result = self::typo3()->substr($charset, (string)$text, $start, $len);
  209. }
  210. error_reporting($oldlevel);
  211. return $result;
  212. }
  213. /**
  214. * Finds the last occurrence of a character in a string within another.
  215. * UTF-8 ONLY safe mb_strrchr().
  216. *
  217. * @param string $haystack The string from which to get the last occurrence of needle.
  218. * @param string $needle The string to find in haystack.
  219. * @param boolean $part If true, returns the portion before needle, else return the portion after (including needle).
  220. * @return string|false False when not found.
  221. * @since Moodle 2.4.6, 2.5.2, 2.6
  222. */
  223. public static function strrchr($haystack, $needle, $part = false) {
  224. if (function_exists('mb_strrchr')) {
  225. return mb_strrchr($haystack, $needle, $part, 'UTF-8');
  226. }
  227. $pos = self::strrpos($haystack, $needle);
  228. if ($pos === false) {
  229. return false;
  230. }
  231. $length = null;
  232. if ($part) {
  233. $length = $pos;
  234. $pos = 0;
  235. }
  236. return self::substr($haystack, $pos, $length, 'utf-8');
  237. }
  238. /**
  239. * Multibyte safe strlen() function, uses mbstring or iconv for UTF-8, falls back to typo3.
  240. *
  241. * @param string $text input string
  242. * @param string $charset encoding of the text
  243. * @return int number of characters
  244. */
  245. public static function strlen($text, $charset='utf-8') {
  246. $charset = self::parse_charset($charset);
  247. if ($charset === 'utf-8') {
  248. if (function_exists('mb_strlen')) {
  249. return mb_strlen($text, 'UTF-8');
  250. } else {
  251. return iconv_strlen($text, 'UTF-8');
  252. }
  253. }
  254. $oldlevel = error_reporting(E_PARSE);
  255. $result = self::typo3()->strlen($charset, (string)$text);
  256. error_reporting($oldlevel);
  257. return $result;
  258. }
  259. /**
  260. * Multibyte safe strtolower() function, uses mbstring, falls back to typo3.
  261. *
  262. * @param string $text input string
  263. * @param string $charset encoding of the text (may not work for all encodings)
  264. * @return string lower case text
  265. */
  266. public static function strtolower($text, $charset='utf-8') {
  267. $charset = self::parse_charset($charset);
  268. if ($charset === 'utf-8' and function_exists('mb_strtolower')) {
  269. return mb_strtolower($text, 'UTF-8');
  270. }
  271. $oldlevel = error_reporting(E_PARSE);
  272. $result = self::typo3()->conv_case($charset, (string)$text, 'toLower');
  273. error_reporting($oldlevel);
  274. return $result;
  275. }
  276. /**
  277. * Multibyte safe strtoupper() function, uses mbstring, falls back to typo3.
  278. *
  279. * @param string $text input string
  280. * @param string $charset encoding of the text (may not work for all encodings)
  281. * @return string upper case text
  282. */
  283. public static function strtoupper($text, $charset='utf-8') {
  284. $charset = self::parse_charset($charset);
  285. if ($charset === 'utf-8' and function_exists('mb_strtoupper')) {
  286. return mb_strtoupper($text, 'UTF-8');
  287. }
  288. $oldlevel = error_reporting(E_PARSE);
  289. $result = self::typo3()->conv_case($charset, (string)$text, 'toUpper');
  290. error_reporting($oldlevel);
  291. return $result;
  292. }
  293. /**
  294. * Find the position of the first occurrence of a substring in a string.
  295. * UTF-8 ONLY safe strpos(), uses mbstring, falls back to iconv.
  296. *
  297. * @param string $haystack the string to search in
  298. * @param string $needle one or more charachters to search for
  299. * @param int $offset offset from begining of string
  300. * @return int the numeric position of the first occurrence of needle in haystack.
  301. */
  302. public static function strpos($haystack, $needle, $offset=0) {
  303. if (function_exists('mb_strpos')) {
  304. return mb_strpos($haystack, $needle, $offset, 'UTF-8');
  305. } else {
  306. return iconv_strpos($haystack, $needle, $offset, 'UTF-8');
  307. }
  308. }
  309. /**
  310. * Find the position of the last occurrence of a substring in a string
  311. * UTF-8 ONLY safe strrpos(), uses mbstring, falls back to iconv.
  312. *
  313. * @param string $haystack the string to search in
  314. * @param string $needle one or more charachters to search for
  315. * @return int the numeric position of the last occurrence of needle in haystack
  316. */
  317. public static function strrpos($haystack, $needle) {
  318. if (function_exists('mb_strrpos')) {
  319. return mb_strrpos($haystack, $needle, null, 'UTF-8');
  320. } else {
  321. return iconv_strrpos($haystack, $needle, 'UTF-8');
  322. }
  323. }
  324. /**
  325. * Try to convert upper unicode characters to plain ascii,
  326. * the returned string may contain unconverted unicode characters.
  327. *
  328. * @param string $text input string
  329. * @param string $charset encoding of the text
  330. * @return string converted ascii string
  331. */
  332. public static function specialtoascii($text, $charset='utf-8') {
  333. $charset = self::parse_charset($charset);
  334. $oldlevel = error_reporting(E_PARSE);
  335. $result = self::typo3()->specCharsToASCII($charset, (string)$text);
  336. error_reporting($oldlevel);
  337. return $result;
  338. }
  339. /**
  340. * Generate a correct base64 encoded header to be used in MIME mail messages.
  341. * This function seems to be 100% compliant with RFC1342. Credits go to:
  342. * paravoid (http://www.php.net/manual/en/function.mb-encode-mimeheader.php#60283).
  343. *
  344. * @param string $text input string
  345. * @param string $charset encoding of the text
  346. * @return string base64 encoded header
  347. */
  348. public static function encode_mimeheader($text, $charset='utf-8') {
  349. if (empty($text)) {
  350. return (string)$text;
  351. }
  352. // Normalize charset
  353. $charset = self::parse_charset($charset);
  354. // If the text is pure ASCII, we don't need to encode it
  355. if (self::convert($text, $charset, 'ascii') == $text) {
  356. return $text;
  357. }
  358. // Although RFC says that line feed should be \r\n, it seems that
  359. // some mailers double convert \r, so we are going to use \n alone
  360. $linefeed="\n";
  361. // Define start and end of every chunk
  362. $start = "=?$charset?B?";
  363. $end = "?=";
  364. // Accumulate results
  365. $encoded = '';
  366. // Max line length is 75 (including start and end)
  367. $length = 75 - strlen($start) - strlen($end);
  368. // Multi-byte ratio
  369. $multilength = self::strlen($text, $charset);
  370. // Detect if strlen and friends supported
  371. if ($multilength === false) {
  372. if ($charset == 'GB18030' or $charset == 'gb18030') {
  373. while (strlen($text)) {
  374. // try to encode first 22 chars - we expect most chars are two bytes long
  375. if (preg_match('/^(([\x00-\x7f])|([\x81-\xfe][\x40-\x7e])|([\x81-\xfe][\x80-\xfe])|([\x81-\xfe][\x30-\x39]..)){1,22}/m', $text, $matches)) {
  376. $chunk = $matches[0];
  377. $encchunk = base64_encode($chunk);
  378. if (strlen($encchunk) > $length) {
  379. // find first 11 chars - each char in 4 bytes - worst case scenario
  380. preg_match('/^(([\x00-\x7f])|([\x81-\xfe][\x40-\x7e])|([\x81-\xfe][\x80-\xfe])|([\x81-\xfe][\x30-\x39]..)){1,11}/m', $text, $matches);
  381. $chunk = $matches[0];
  382. $encchunk = base64_encode($chunk);
  383. }
  384. $text = substr($text, strlen($chunk));
  385. $encoded .= ' '.$start.$encchunk.$end.$linefeed;
  386. } else {
  387. break;
  388. }
  389. }
  390. $encoded = trim($encoded);
  391. return $encoded;
  392. } else {
  393. return false;
  394. }
  395. }
  396. $ratio = $multilength / strlen($text);
  397. // Base64 ratio
  398. $magic = $avglength = floor(3 * $length * $ratio / 4);
  399. // basic infinite loop protection
  400. $maxiterations = strlen($text)*2;
  401. $iteration = 0;
  402. // Iterate over the string in magic chunks
  403. for ($i=0; $i <= $multilength; $i+=$magic) {
  404. if ($iteration++ > $maxiterations) {
  405. return false; // probably infinite loop
  406. }
  407. $magic = $avglength;
  408. $offset = 0;
  409. // Ensure the chunk fits in length, reducing magic if necessary
  410. do {
  411. $magic -= $offset;
  412. $chunk = self::substr($text, $i, $magic, $charset);
  413. $chunk = base64_encode($chunk);
  414. $offset++;
  415. } while (strlen($chunk) > $length);
  416. // This chunk doesn't break any multi-byte char. Use it.
  417. if ($chunk)
  418. $encoded .= ' '.$start.$chunk.$end.$linefeed;
  419. }
  420. // Strip the first space and the last linefeed
  421. $encoded = substr($encoded, 1, -strlen($linefeed));
  422. return $encoded;
  423. }
  424. /**
  425. * Returns HTML entity transliteration table.
  426. * @return array with (html entity => utf-8) elements
  427. */
  428. protected static function get_entities_table() {
  429. static $trans_tbl = null;
  430. // Generate/create $trans_tbl
  431. if (!isset($trans_tbl)) {
  432. if (version_compare(phpversion(), '5.3.4') < 0) {
  433. $trans_tbl = array();
  434. foreach (get_html_translation_table(HTML_ENTITIES) as $val=>$key) {
  435. $trans_tbl[$key] = self::convert($val, 'ISO-8859-1', 'utf-8');
  436. }
  437. } else if (version_compare(phpversion(), '5.4.0') < 0) {
  438. $trans_tbl = get_html_translation_table(HTML_ENTITIES, ENT_COMPAT, 'UTF-8');
  439. $trans_tbl = array_flip($trans_tbl);
  440. } else {
  441. $trans_tbl = get_html_translation_table(HTML_ENTITIES, ENT_COMPAT | ENT_HTML401, 'UTF-8');
  442. $trans_tbl = array_flip($trans_tbl);
  443. }
  444. }
  445. return $trans_tbl;
  446. }
  447. /**
  448. * Converts all the numeric entities &#nnnn; or &#xnnn; to UTF-8
  449. * Original from laurynas dot butkus at gmail at:
  450. * http://php.net/manual/en/function.html-entity-decode.php#75153
  451. * with some custom mods to provide more functionality
  452. *
  453. * @param string $str input string
  454. * @param boolean $htmlent convert also html entities (defaults to true)
  455. * @return string encoded UTF-8 string
  456. */
  457. public static function entities_to_utf8($str, $htmlent=true) {
  458. static $callback1 = null ;
  459. static $callback2 = null ;
  460. if (!$callback1 or !$callback2) {
  461. $callback1 = create_function('$matches', 'return core_text::code2utf8(hexdec($matches[1]));');
  462. $callback2 = create_function('$matches', 'return core_text::code2utf8($matches[1]);');
  463. }
  464. $result = (string)$str;
  465. $result = preg_replace_callback('/&#x([0-9a-f]+);/i', $callback1, $result);
  466. $result = preg_replace_callback('/&#([0-9]+);/', $callback2, $result);
  467. // Replace literal entities (if desired)
  468. if ($htmlent) {
  469. $trans_tbl = self::get_entities_table();
  470. // It should be safe to search for ascii strings and replace them with utf-8 here.
  471. $result = strtr($result, $trans_tbl);
  472. }
  473. // Return utf8-ised string
  474. return $result;
  475. }
  476. /**
  477. * Converts all Unicode chars > 127 to numeric entities &#nnnn; or &#xnnn;.
  478. *
  479. * @param string $str input string
  480. * @param boolean $dec output decadic only number entities
  481. * @param boolean $nonnum remove all non-numeric entities
  482. * @return string converted string
  483. */
  484. public static function utf8_to_entities($str, $dec=false, $nonnum=false) {
  485. static $callback = null ;
  486. if ($nonnum) {
  487. $str = self::entities_to_utf8($str, true);
  488. }
  489. // Avoid some notices from Typo3 code
  490. $oldlevel = error_reporting(E_PARSE);
  491. $result = self::typo3()->utf8_to_entities((string)$str);
  492. error_reporting($oldlevel);
  493. if ($dec) {
  494. if (!$callback) {
  495. $callback = create_function('$matches', 'return \'&#\'.(hexdec($matches[1])).\';\';');
  496. }
  497. $result = preg_replace_callback('/&#x([0-9a-f]+);/i', $callback, $result);
  498. }
  499. return $result;
  500. }
  501. /**
  502. * Removes the BOM from unicode string {@link http://unicode.org/faq/utf_bom.html}
  503. *
  504. * @param string $str input string
  505. * @return string
  506. */
  507. public static function trim_utf8_bom($str) {
  508. $bom = "\xef\xbb\xbf";
  509. if (strpos($str, $bom) === 0) {
  510. return substr($str, strlen($bom));
  511. }
  512. return $str;
  513. }
  514. /**
  515. * Returns encoding options for select boxes, utf-8 and platform encoding first
  516. *
  517. * @return array encodings
  518. */
  519. public static function get_encodings() {
  520. $encodings = array();
  521. $encodings['UTF-8'] = 'UTF-8';
  522. $winenc = strtoupper(get_string('localewincharset', 'langconfig'));
  523. if ($winenc != '') {
  524. $encodings[$winenc] = $winenc;
  525. }
  526. $nixenc = strtoupper(get_string('oldcharset', 'langconfig'));
  527. $encodings[$nixenc] = $nixenc;
  528. foreach (self::typo3()->synonyms as $enc) {
  529. $enc = strtoupper($enc);
  530. $encodings[$enc] = $enc;
  531. }
  532. return $encodings;
  533. }
  534. /**
  535. * Returns the utf8 string corresponding to the unicode value
  536. * (from php.net, courtesy - romans@void.lv)
  537. *
  538. * @param int $num one unicode value
  539. * @return string the UTF-8 char corresponding to the unicode value
  540. */
  541. public static function code2utf8($num) {
  542. if ($num < 128) {
  543. return chr($num);
  544. }
  545. if ($num < 2048) {
  546. return chr(($num >> 6) + 192) . chr(($num & 63) + 128);
  547. }
  548. if ($num < 65536) {
  549. return chr(($num >> 12) + 224) . chr((($num >> 6) & 63) + 128) . chr(($num & 63) + 128);
  550. }
  551. if ($num < 2097152) {
  552. return chr(($num >> 18) + 240) . chr((($num >> 12) & 63) + 128) . chr((($num >> 6) & 63) + 128) . chr(($num & 63) + 128);
  553. }
  554. return '';
  555. }
  556. /**
  557. * Returns the code of the given UTF-8 character
  558. *
  559. * @param string $utf8char one UTF-8 character
  560. * @return int the code of the given character
  561. */
  562. public static function utf8ord($utf8char) {
  563. if ($utf8char == '') {
  564. return 0;
  565. }
  566. $ord0 = ord($utf8char{0});
  567. if ($ord0 >= 0 && $ord0 <= 127) {
  568. return $ord0;
  569. }
  570. $ord1 = ord($utf8char{1});
  571. if ($ord0 >= 192 && $ord0 <= 223) {
  572. return ($ord0 - 192) * 64 + ($ord1 - 128);
  573. }
  574. $ord2 = ord($utf8char{2});
  575. if ($ord0 >= 224 && $ord0 <= 239) {
  576. return ($ord0 - 224) * 4096 + ($ord1 - 128) * 64 + ($ord2 - 128);
  577. }
  578. $ord3 = ord($utf8char{3});
  579. if ($ord0 >= 240 && $ord0 <= 247) {
  580. return ($ord0 - 240) * 262144 + ($ord1 - 128 )* 4096 + ($ord2 - 128) * 64 + ($ord3 - 128);
  581. }
  582. return false;
  583. }
  584. /**
  585. * Makes first letter of each word capital - words must be separated by spaces.
  586. * Use with care, this function does not work properly in many locales!!!
  587. *
  588. * @param string $text input string
  589. * @return string
  590. */
  591. public static function strtotitle($text) {
  592. if (empty($text)) {
  593. return $text;
  594. }
  595. if (function_exists('mb_convert_case')) {
  596. return mb_convert_case($text, MB_CASE_TITLE, 'UTF-8');
  597. }
  598. $text = self::strtolower($text);
  599. $words = explode(' ', $text);
  600. foreach ($words as $i=>$word) {
  601. $length = self::strlen($word);
  602. if (!$length) {
  603. continue;
  604. } else if ($length == 1) {
  605. $words[$i] = self::strtoupper($word);
  606. } else {
  607. $letter = self::substr($word, 0, 1);
  608. $letter = self::strtoupper($letter);
  609. $rest = self::substr($word, 1);
  610. $words[$i] = $letter.$rest;
  611. }
  612. }
  613. return implode(' ', $words);
  614. }
  615. }
  616. /**
  617. * Legacy tectlib.
  618. * @deprecated since 2.6, use core_text:: instead.
  619. */
  620. class textlib extends core_text {
  621. /**
  622. * Locale aware sorting, the key associations are kept, values are sorted alphabetically.
  623. *
  624. * @param array $arr array to be sorted (reference)
  625. * @param int $sortflag One of Collator::SORT_REGULAR, Collator::SORT_NUMERIC, Collator::SORT_STRING
  626. * @return void modifies parameter
  627. */
  628. public static function asort(array &$arr, $sortflag = null) {
  629. debugging('textlib::asort has been superseeded by collatorlib::asort please upgrade your code to use that', DEBUG_DEVELOPER);
  630. collatorlib::asort($arr, $sortflag);
  631. }
  632. }