PageRenderTime 43ms CodeModel.GetById 14ms RepoModel.GetById 1ms app.codeStats 0ms

/lib/classes/text.php

https://bitbucket.org/synergylearning/campusconnect
PHP | 728 lines | 432 code | 69 blank | 227 comment | 75 complexity | 2a8c65d4f225394e16f4a74e7571e552 MD5 | raw file
Possible License(s): MPL-2.0-no-copyleft-exception, LGPL-3.0, GPL-3.0, LGPL-2.1, Apache-2.0, BSD-3-Clause, AGPL-3.0
  1. <?php
  2. // This file is part of Moodle - http://moodle.org/
  3. //
  4. // Moodle is free software: you can redistribute it and/or modify
  5. // it under the terms of the GNU General Public License as published by
  6. // the Free Software Foundation, either version 3 of the License, or
  7. // (at your option) any later version.
  8. //
  9. // Moodle is distributed in the hope that it will be useful,
  10. // but WITHOUT ANY WARRANTY; without even the implied warranty of
  11. // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  12. // GNU General Public License for more details.
  13. //
  14. // You should have received a copy of the GNU General Public License
  15. // along with Moodle. If not, see <http://www.gnu.org/licenses/>.
  16. /**
  17. * Defines string apis
  18. *
  19. * @package core
  20. * @copyright (C) 2001-3001 Eloy Lafuente (stronk7) {@link http://contiento.com}
  21. * @license http://www.gnu.org/copyleft/gpl.html GNU GPL v3 or later
  22. */
  23. defined('MOODLE_INTERNAL') || die();
  24. /**
  25. * defines string api's for manipulating strings
  26. *
  27. * This class is used to manipulate strings under Moodle 1.6 an later. As
  28. * utf-8 text become mandatory a pool of safe functions under this encoding
  29. * become necessary. The name of the methods is exactly the
  30. * same than their PHP originals.
  31. *
  32. * A big part of this class acts as a wrapper over the Typo3 charset library,
  33. * really a cool group of utilities to handle texts and encoding conversion.
  34. *
  35. * Take a look to its own copyright and license details.
  36. *
  37. * IMPORTANT Note: Typo3 libraries always expect lowercase charsets to use 100%
  38. * its capabilities so, don't forget to make the conversion
  39. * from every wrapper function!
  40. *
  41. * @package core
  42. * @category string
  43. * @copyright 1999 onwards Martin Dougiamas {@link http://moodle.com}
  44. * @license http://www.gnu.org/copyleft/gpl.html GNU GPL v3 or later
  45. */
  46. class core_text {
  47. /**
  48. * Return t3lib helper class, which is used for conversion between charsets
  49. *
  50. * @param bool $reset
  51. * @return t3lib_cs
  52. */
  53. protected static function typo3($reset = false) {
  54. static $typo3cs = null;
  55. if ($reset) {
  56. $typo3cs = null;
  57. return null;
  58. }
  59. if (isset($typo3cs)) {
  60. return $typo3cs;
  61. }
  62. global $CFG;
  63. // Required files
  64. require_once($CFG->libdir.'/typo3/class.t3lib_cs.php');
  65. require_once($CFG->libdir.'/typo3/class.t3lib_div.php');
  66. require_once($CFG->libdir.'/typo3/interface.t3lib_singleton.php');
  67. require_once($CFG->libdir.'/typo3/class.t3lib_l10n_locales.php');
  68. // do not use mbstring or recode because it may return invalid results in some corner cases
  69. $GLOBALS['TYPO3_CONF_VARS']['SYS']['t3lib_cs_convMethod'] = 'iconv';
  70. $GLOBALS['TYPO3_CONF_VARS']['SYS']['t3lib_cs_utils'] = 'iconv';
  71. // Tell Typo3 we are curl enabled always (mandatory since 2.0)
  72. $GLOBALS['TYPO3_CONF_VARS']['SYS']['curlUse'] = '1';
  73. // And this directory must exist to allow Typo to cache conversion
  74. // tables when using internal functions
  75. make_temp_directory('typo3temp/cs');
  76. // Make sure typo is using our dir permissions
  77. $GLOBALS['TYPO3_CONF_VARS']['BE']['folderCreateMask'] = decoct($CFG->directorypermissions);
  78. // Default mask for Typo
  79. $GLOBALS['TYPO3_CONF_VARS']['BE']['fileCreateMask'] = $CFG->directorypermissions;
  80. // This full path constants must be defined too, transforming backslashes
  81. // to forward slashed because Typo3 requires it.
  82. if (!defined('PATH_t3lib')) {
  83. define('PATH_t3lib', str_replace('\\','/',$CFG->libdir.'/typo3/'));
  84. define('PATH_typo3', str_replace('\\','/',$CFG->libdir.'/typo3/'));
  85. define('PATH_site', str_replace('\\','/',$CFG->tempdir.'/'));
  86. define('TYPO3_OS', stristr(PHP_OS,'win')&&!stristr(PHP_OS,'darwin')?'WIN':'');
  87. }
  88. $typo3cs = new t3lib_cs();
  89. return $typo3cs;
  90. }
  91. /**
  92. * Reset internal textlib caches.
  93. * @static
  94. */
  95. public static function reset_caches() {
  96. self::typo3(true);
  97. }
  98. /**
  99. * Standardise charset name
  100. *
  101. * Please note it does not mean the returned charset is actually supported.
  102. *
  103. * @static
  104. * @param string $charset raw charset name
  105. * @return string normalised lowercase charset name
  106. */
  107. public static function parse_charset($charset) {
  108. $charset = strtolower($charset);
  109. // shortcuts so that we do not have to load typo3 on every page
  110. if ($charset === 'utf8' or $charset === 'utf-8') {
  111. return 'utf-8';
  112. }
  113. if (preg_match('/^(cp|win|windows)-?(12[0-9]{2})$/', $charset, $matches)) {
  114. return 'windows-'.$matches[2];
  115. }
  116. if (preg_match('/^iso-8859-[0-9]+$/', $charset, $matches)) {
  117. return $charset;
  118. }
  119. if ($charset === 'euc-jp') {
  120. return 'euc-jp';
  121. }
  122. if ($charset === 'iso-2022-jp') {
  123. return 'iso-2022-jp';
  124. }
  125. if ($charset === 'shift-jis' or $charset === 'shift_jis') {
  126. return 'shift_jis';
  127. }
  128. if ($charset === 'gb2312') {
  129. return 'gb2312';
  130. }
  131. if ($charset === 'gb18030') {
  132. return 'gb18030';
  133. }
  134. // fallback to typo3
  135. return self::typo3()->parse_charset($charset);
  136. }
  137. /**
  138. * Converts the text between different encodings. It uses iconv extension with //TRANSLIT parameter,
  139. * falls back to typo3. If both source and target are utf-8 it tries to fix invalid characters only.
  140. *
  141. * @param string $text
  142. * @param string $fromCS source encoding
  143. * @param string $toCS result encoding
  144. * @return string|bool converted string or false on error
  145. */
  146. public static function convert($text, $fromCS, $toCS='utf-8') {
  147. $fromCS = self::parse_charset($fromCS);
  148. $toCS = self::parse_charset($toCS);
  149. $text = (string)$text; // we can work only with strings
  150. if ($text === '') {
  151. return '';
  152. }
  153. if ($fromCS === 'utf-8') {
  154. $text = fix_utf8($text);
  155. if ($toCS === 'utf-8') {
  156. return $text;
  157. }
  158. }
  159. if ($toCS === 'ascii') {
  160. // Try to normalize the conversion a bit.
  161. $text = self::specialtoascii($text, $fromCS);
  162. }
  163. // Prevent any error notices, do not use //IGNORE so that we get
  164. // consistent result from Typo3 if iconv fails.
  165. $result = @iconv($fromCS, $toCS.'//TRANSLIT', $text);
  166. if ($result === false or $result === '') {
  167. // note: iconv is prone to return empty string when invalid char encountered, or false if encoding unsupported
  168. $oldlevel = error_reporting(E_PARSE);
  169. $result = self::typo3()->conv((string)$text, $fromCS, $toCS);
  170. error_reporting($oldlevel);
  171. }
  172. return $result;
  173. }
  174. /**
  175. * Multibyte safe substr() function, uses mbstring or iconv for UTF-8, falls back to typo3.
  176. *
  177. * @param string $text string to truncate
  178. * @param int $start negative value means from end
  179. * @param int $len maximum length of characters beginning from start
  180. * @param string $charset encoding of the text
  181. * @return string portion of string specified by the $start and $len
  182. */
  183. public static function substr($text, $start, $len=null, $charset='utf-8') {
  184. $charset = self::parse_charset($charset);
  185. if ($charset === 'utf-8') {
  186. if (function_exists('mb_substr')) {
  187. // this is much faster than iconv - see MDL-31142
  188. if ($len === null) {
  189. $oldcharset = mb_internal_encoding();
  190. mb_internal_encoding('UTF-8');
  191. $result = mb_substr($text, $start);
  192. mb_internal_encoding($oldcharset);
  193. return $result;
  194. } else {
  195. return mb_substr($text, $start, $len, 'UTF-8');
  196. }
  197. } else {
  198. if ($len === null) {
  199. $len = iconv_strlen($text, 'UTF-8');
  200. }
  201. return iconv_substr($text, $start, $len, 'UTF-8');
  202. }
  203. }
  204. $oldlevel = error_reporting(E_PARSE);
  205. if ($len === null) {
  206. $result = self::typo3()->substr($charset, (string)$text, $start);
  207. } else {
  208. $result = self::typo3()->substr($charset, (string)$text, $start, $len);
  209. }
  210. error_reporting($oldlevel);
  211. return $result;
  212. }
  213. /**
  214. * Finds the last occurrence of a character in a string within another.
  215. * UTF-8 ONLY safe mb_strrchr().
  216. *
  217. * @param string $haystack The string from which to get the last occurrence of needle.
  218. * @param string $needle The string to find in haystack.
  219. * @param boolean $part If true, returns the portion before needle, else return the portion after (including needle).
  220. * @return string|false False when not found.
  221. * @since 2.4.6, 2.5.2, 2.6
  222. */
  223. public static function strrchr($haystack, $needle, $part = false) {
  224. if (function_exists('mb_strrchr')) {
  225. return mb_strrchr($haystack, $needle, $part, 'UTF-8');
  226. }
  227. $pos = self::strrpos($haystack, $needle);
  228. if ($pos === false) {
  229. return false;
  230. }
  231. $length = null;
  232. if ($part) {
  233. $length = $pos;
  234. $pos = 0;
  235. }
  236. return self::substr($haystack, $pos, $length, 'utf-8');
  237. }
  238. /**
  239. * Multibyte safe strlen() function, uses mbstring or iconv for UTF-8, falls back to typo3.
  240. *
  241. * @param string $text input string
  242. * @param string $charset encoding of the text
  243. * @return int number of characters
  244. */
  245. public static function strlen($text, $charset='utf-8') {
  246. $charset = self::parse_charset($charset);
  247. if ($charset === 'utf-8') {
  248. if (function_exists('mb_strlen')) {
  249. return mb_strlen($text, 'UTF-8');
  250. } else {
  251. return iconv_strlen($text, 'UTF-8');
  252. }
  253. }
  254. $oldlevel = error_reporting(E_PARSE);
  255. $result = self::typo3()->strlen($charset, (string)$text);
  256. error_reporting($oldlevel);
  257. return $result;
  258. }
  259. /**
  260. * Multibyte safe strtolower() function, uses mbstring, falls back to typo3.
  261. *
  262. * @param string $text input string
  263. * @param string $charset encoding of the text (may not work for all encodings)
  264. * @return string lower case text
  265. */
  266. public static function strtolower($text, $charset='utf-8') {
  267. $charset = self::parse_charset($charset);
  268. if ($charset === 'utf-8' and function_exists('mb_strtolower')) {
  269. return mb_strtolower($text, 'UTF-8');
  270. }
  271. $oldlevel = error_reporting(E_PARSE);
  272. $result = self::typo3()->conv_case($charset, (string)$text, 'toLower');
  273. error_reporting($oldlevel);
  274. return $result;
  275. }
  276. /**
  277. * Multibyte safe strtoupper() function, uses mbstring, falls back to typo3.
  278. *
  279. * @param string $text input string
  280. * @param string $charset encoding of the text (may not work for all encodings)
  281. * @return string upper case text
  282. */
  283. public static function strtoupper($text, $charset='utf-8') {
  284. $charset = self::parse_charset($charset);
  285. if ($charset === 'utf-8' and function_exists('mb_strtoupper')) {
  286. return mb_strtoupper($text, 'UTF-8');
  287. }
  288. $oldlevel = error_reporting(E_PARSE);
  289. $result = self::typo3()->conv_case($charset, (string)$text, 'toUpper');
  290. error_reporting($oldlevel);
  291. return $result;
  292. }
  293. /**
  294. * Find the position of the first occurrence of a substring in a string.
  295. * UTF-8 ONLY safe strpos(), uses mbstring, falls back to iconv.
  296. *
  297. * @param string $haystack the string to search in
  298. * @param string $needle one or more charachters to search for
  299. * @param int $offset offset from begining of string
  300. * @return int the numeric position of the first occurrence of needle in haystack.
  301. */
  302. public static function strpos($haystack, $needle, $offset=0) {
  303. if (function_exists('mb_strpos')) {
  304. return mb_strpos($haystack, $needle, $offset, 'UTF-8');
  305. } else {
  306. return iconv_strpos($haystack, $needle, $offset, 'UTF-8');
  307. }
  308. }
  309. /**
  310. * Find the position of the last occurrence of a substring in a string
  311. * UTF-8 ONLY safe strrpos(), uses mbstring, falls back to iconv.
  312. *
  313. * @param string $haystack the string to search in
  314. * @param string $needle one or more charachters to search for
  315. * @return int the numeric position of the last occurrence of needle in haystack
  316. */
  317. public static function strrpos($haystack, $needle) {
  318. if (function_exists('mb_strrpos')) {
  319. return mb_strrpos($haystack, $needle, null, 'UTF-8');
  320. } else {
  321. return iconv_strrpos($haystack, $needle, 'UTF-8');
  322. }
  323. }
  324. /**
  325. * Reverse UTF-8 multibytes character sets (used for RTL languages)
  326. * (We only do this because there is no mb_strrev or iconv_strrev)
  327. *
  328. * @param string $str the multibyte string to reverse
  329. * @return string the reversed multi byte string
  330. */
  331. public static function strrev($str) {
  332. preg_match_all('/./us', $str, $ar);
  333. return join('', array_reverse($ar[0]));
  334. }
  335. /**
  336. * Try to convert upper unicode characters to plain ascii,
  337. * the returned string may contain unconverted unicode characters.
  338. *
  339. * @param string $text input string
  340. * @param string $charset encoding of the text
  341. * @return string converted ascii string
  342. */
  343. public static function specialtoascii($text, $charset='utf-8') {
  344. $charset = self::parse_charset($charset);
  345. $oldlevel = error_reporting(E_PARSE);
  346. $result = self::typo3()->specCharsToASCII($charset, (string)$text);
  347. error_reporting($oldlevel);
  348. return $result;
  349. }
  350. /**
  351. * Generate a correct base64 encoded header to be used in MIME mail messages.
  352. * This function seems to be 100% compliant with RFC1342. Credits go to:
  353. * paravoid (http://www.php.net/manual/en/function.mb-encode-mimeheader.php#60283).
  354. *
  355. * @param string $text input string
  356. * @param string $charset encoding of the text
  357. * @return string base64 encoded header
  358. */
  359. public static function encode_mimeheader($text, $charset='utf-8') {
  360. if (empty($text)) {
  361. return (string)$text;
  362. }
  363. // Normalize charset
  364. $charset = self::parse_charset($charset);
  365. // If the text is pure ASCII, we don't need to encode it
  366. if (self::convert($text, $charset, 'ascii') == $text) {
  367. return $text;
  368. }
  369. // Although RFC says that line feed should be \r\n, it seems that
  370. // some mailers double convert \r, so we are going to use \n alone
  371. $linefeed="\n";
  372. // Define start and end of every chunk
  373. $start = "=?$charset?B?";
  374. $end = "?=";
  375. // Accumulate results
  376. $encoded = '';
  377. // Max line length is 75 (including start and end)
  378. $length = 75 - strlen($start) - strlen($end);
  379. // Multi-byte ratio
  380. $multilength = self::strlen($text, $charset);
  381. // Detect if strlen and friends supported
  382. if ($multilength === false) {
  383. if ($charset == 'GB18030' or $charset == 'gb18030') {
  384. while (strlen($text)) {
  385. // try to encode first 22 chars - we expect most chars are two bytes long
  386. if (preg_match('/^(([\x00-\x7f])|([\x81-\xfe][\x40-\x7e])|([\x81-\xfe][\x80-\xfe])|([\x81-\xfe][\x30-\x39]..)){1,22}/m', $text, $matches)) {
  387. $chunk = $matches[0];
  388. $encchunk = base64_encode($chunk);
  389. if (strlen($encchunk) > $length) {
  390. // find first 11 chars - each char in 4 bytes - worst case scenario
  391. preg_match('/^(([\x00-\x7f])|([\x81-\xfe][\x40-\x7e])|([\x81-\xfe][\x80-\xfe])|([\x81-\xfe][\x30-\x39]..)){1,11}/m', $text, $matches);
  392. $chunk = $matches[0];
  393. $encchunk = base64_encode($chunk);
  394. }
  395. $text = substr($text, strlen($chunk));
  396. $encoded .= ' '.$start.$encchunk.$end.$linefeed;
  397. } else {
  398. break;
  399. }
  400. }
  401. $encoded = trim($encoded);
  402. return $encoded;
  403. } else {
  404. return false;
  405. }
  406. }
  407. $ratio = $multilength / strlen($text);
  408. // Base64 ratio
  409. $magic = $avglength = floor(3 * $length * $ratio / 4);
  410. // basic infinite loop protection
  411. $maxiterations = strlen($text)*2;
  412. $iteration = 0;
  413. // Iterate over the string in magic chunks
  414. for ($i=0; $i <= $multilength; $i+=$magic) {
  415. if ($iteration++ > $maxiterations) {
  416. return false; // probably infinite loop
  417. }
  418. $magic = $avglength;
  419. $offset = 0;
  420. // Ensure the chunk fits in length, reducing magic if necessary
  421. do {
  422. $magic -= $offset;
  423. $chunk = self::substr($text, $i, $magic, $charset);
  424. $chunk = base64_encode($chunk);
  425. $offset++;
  426. } while (strlen($chunk) > $length);
  427. // This chunk doesn't break any multi-byte char. Use it.
  428. if ($chunk)
  429. $encoded .= ' '.$start.$chunk.$end.$linefeed;
  430. }
  431. // Strip the first space and the last linefeed
  432. $encoded = substr($encoded, 1, -strlen($linefeed));
  433. return $encoded;
  434. }
  435. /**
  436. * Returns HTML entity transliteration table.
  437. * @return array with (html entity => utf-8) elements
  438. */
  439. protected static function get_entities_table() {
  440. static $trans_tbl = null;
  441. // Generate/create $trans_tbl
  442. if (!isset($trans_tbl)) {
  443. if (version_compare(phpversion(), '5.3.4') < 0) {
  444. $trans_tbl = array();
  445. foreach (get_html_translation_table(HTML_ENTITIES) as $val=>$key) {
  446. $trans_tbl[$key] = self::convert($val, 'ISO-8859-1', 'utf-8');
  447. }
  448. } else if (version_compare(phpversion(), '5.4.0') < 0) {
  449. $trans_tbl = get_html_translation_table(HTML_ENTITIES, ENT_COMPAT, 'UTF-8');
  450. $trans_tbl = array_flip($trans_tbl);
  451. } else {
  452. $trans_tbl = get_html_translation_table(HTML_ENTITIES, ENT_COMPAT | ENT_HTML401, 'UTF-8');
  453. $trans_tbl = array_flip($trans_tbl);
  454. }
  455. }
  456. return $trans_tbl;
  457. }
  458. /**
  459. * Converts all the numeric entities &#nnnn; or &#xnnn; to UTF-8
  460. * Original from laurynas dot butkus at gmail at:
  461. * http://php.net/manual/en/function.html-entity-decode.php#75153
  462. * with some custom mods to provide more functionality
  463. *
  464. * @param string $str input string
  465. * @param boolean $htmlent convert also html entities (defaults to true)
  466. * @return string encoded UTF-8 string
  467. */
  468. public static function entities_to_utf8($str, $htmlent=true) {
  469. static $callback1 = null ;
  470. static $callback2 = null ;
  471. if (!$callback1 or !$callback2) {
  472. $callback1 = create_function('$matches', 'return core_text::code2utf8(hexdec($matches[1]));');
  473. $callback2 = create_function('$matches', 'return core_text::code2utf8($matches[1]);');
  474. }
  475. $result = (string)$str;
  476. $result = preg_replace_callback('/&#x([0-9a-f]+);/i', $callback1, $result);
  477. $result = preg_replace_callback('/&#([0-9]+);/', $callback2, $result);
  478. // Replace literal entities (if desired)
  479. if ($htmlent) {
  480. $trans_tbl = self::get_entities_table();
  481. // It should be safe to search for ascii strings and replace them with utf-8 here.
  482. $result = strtr($result, $trans_tbl);
  483. }
  484. // Return utf8-ised string
  485. return $result;
  486. }
  487. /**
  488. * Converts all Unicode chars > 127 to numeric entities &#nnnn; or &#xnnn;.
  489. *
  490. * @param string $str input string
  491. * @param boolean $dec output decadic only number entities
  492. * @param boolean $nonnum remove all non-numeric entities
  493. * @return string converted string
  494. */
  495. public static function utf8_to_entities($str, $dec=false, $nonnum=false) {
  496. static $callback = null ;
  497. if ($nonnum) {
  498. $str = self::entities_to_utf8($str, true);
  499. }
  500. // Avoid some notices from Typo3 code
  501. $oldlevel = error_reporting(E_PARSE);
  502. $result = self::typo3()->utf8_to_entities((string)$str);
  503. error_reporting($oldlevel);
  504. if ($dec) {
  505. if (!$callback) {
  506. $callback = create_function('$matches', 'return \'&#\'.(hexdec($matches[1])).\';\';');
  507. }
  508. $result = preg_replace_callback('/&#x([0-9a-f]+);/i', $callback, $result);
  509. }
  510. return $result;
  511. }
  512. /**
  513. * Removes the BOM from unicode string {@link http://unicode.org/faq/utf_bom.html}
  514. *
  515. * @param string $str input string
  516. * @return string
  517. */
  518. public static function trim_utf8_bom($str) {
  519. $bom = "\xef\xbb\xbf";
  520. if (strpos($str, $bom) === 0) {
  521. return substr($str, strlen($bom));
  522. }
  523. return $str;
  524. }
  525. /**
  526. * Returns encoding options for select boxes, utf-8 and platform encoding first
  527. *
  528. * @return array encodings
  529. */
  530. public static function get_encodings() {
  531. $encodings = array();
  532. $encodings['UTF-8'] = 'UTF-8';
  533. $winenc = strtoupper(get_string('localewincharset', 'langconfig'));
  534. if ($winenc != '') {
  535. $encodings[$winenc] = $winenc;
  536. }
  537. $nixenc = strtoupper(get_string('oldcharset', 'langconfig'));
  538. $encodings[$nixenc] = $nixenc;
  539. foreach (self::typo3()->synonyms as $enc) {
  540. $enc = strtoupper($enc);
  541. $encodings[$enc] = $enc;
  542. }
  543. return $encodings;
  544. }
  545. /**
  546. * Returns the utf8 string corresponding to the unicode value
  547. * (from php.net, courtesy - romans@void.lv)
  548. *
  549. * @param int $num one unicode value
  550. * @return string the UTF-8 char corresponding to the unicode value
  551. */
  552. public static function code2utf8($num) {
  553. if ($num < 128) {
  554. return chr($num);
  555. }
  556. if ($num < 2048) {
  557. return chr(($num >> 6) + 192) . chr(($num & 63) + 128);
  558. }
  559. if ($num < 65536) {
  560. return chr(($num >> 12) + 224) . chr((($num >> 6) & 63) + 128) . chr(($num & 63) + 128);
  561. }
  562. if ($num < 2097152) {
  563. return chr(($num >> 18) + 240) . chr((($num >> 12) & 63) + 128) . chr((($num >> 6) & 63) + 128) . chr(($num & 63) + 128);
  564. }
  565. return '';
  566. }
  567. /**
  568. * Returns the code of the given UTF-8 character
  569. *
  570. * @param string $utf8char one UTF-8 character
  571. * @return int the code of the given character
  572. */
  573. public static function utf8ord($utf8char) {
  574. if ($utf8char == '') {
  575. return 0;
  576. }
  577. $ord0 = ord($utf8char{0});
  578. if ($ord0 >= 0 && $ord0 <= 127) {
  579. return $ord0;
  580. }
  581. $ord1 = ord($utf8char{1});
  582. if ($ord0 >= 192 && $ord0 <= 223) {
  583. return ($ord0 - 192) * 64 + ($ord1 - 128);
  584. }
  585. $ord2 = ord($utf8char{2});
  586. if ($ord0 >= 224 && $ord0 <= 239) {
  587. return ($ord0 - 224) * 4096 + ($ord1 - 128) * 64 + ($ord2 - 128);
  588. }
  589. $ord3 = ord($utf8char{3});
  590. if ($ord0 >= 240 && $ord0 <= 247) {
  591. return ($ord0 - 240) * 262144 + ($ord1 - 128 )* 4096 + ($ord2 - 128) * 64 + ($ord3 - 128);
  592. }
  593. return false;
  594. }
  595. /**
  596. * Makes first letter of each word capital - words must be separated by spaces.
  597. * Use with care, this function does not work properly in many locales!!!
  598. *
  599. * @param string $text input string
  600. * @return string
  601. */
  602. public static function strtotitle($text) {
  603. if (empty($text)) {
  604. return $text;
  605. }
  606. if (function_exists('mb_convert_case')) {
  607. return mb_convert_case($text, MB_CASE_TITLE, 'UTF-8');
  608. }
  609. $text = self::strtolower($text);
  610. $words = explode(' ', $text);
  611. foreach ($words as $i=>$word) {
  612. $length = self::strlen($word);
  613. if (!$length) {
  614. continue;
  615. } else if ($length == 1) {
  616. $words[$i] = self::strtoupper($word);
  617. } else {
  618. $letter = self::substr($word, 0, 1);
  619. $letter = self::strtoupper($letter);
  620. $rest = self::substr($word, 1);
  621. $words[$i] = $letter.$rest;
  622. }
  623. }
  624. return implode(' ', $words);
  625. }
  626. }
  627. /**
  628. * Legacy tectlib.
  629. * @deprecated since 2.6, use core_text:: instead.
  630. */
  631. class textlib extends core_text {
  632. /**
  633. * Locale aware sorting, the key associations are kept, values are sorted alphabetically.
  634. *
  635. * @param array $arr array to be sorted (reference)
  636. * @param int $sortflag One of Collator::SORT_REGULAR, Collator::SORT_NUMERIC, Collator::SORT_STRING
  637. * @return void modifies parameter
  638. */
  639. public static function asort(array &$arr, $sortflag = null) {
  640. debugging('textlib::asort has been superseeded by collatorlib::asort please upgrade your code to use that', DEBUG_DEVELOPER);
  641. collatorlib::asort($arr, $sortflag);
  642. }
  643. }