PageRenderTime 45ms CodeModel.GetById 20ms RepoModel.GetById 1ms app.codeStats 0ms

/lib/textlib.class.php

https://bitbucket.org/ceu/moodle_demo
PHP | 452 lines | 240 code | 35 blank | 177 comment | 43 complexity | c16d37ef57c20c113116e9d070c84a5c MD5 | raw file
Possible License(s): BSD-3-Clause, LGPL-2.0, LGPL-2.1
  1. <?php // $Id: textlib.class.php,v 1.25 2007/10/10 05:25:15 nicolasconnault Exp $
  2. ///////////////////////////////////////////////////////////////////////////
  3. // //
  4. // NOTICE OF COPYRIGHT //
  5. // //
  6. // Moodle - Modular Object-Oriented Dynamic Learning Environment //
  7. // http://moodle.com //
  8. // //
  9. // Copyright (C) 1999 onwards Martin Dougiamas http://dougiamas.com //
  10. // (C) 2001-3001 Eloy Lafuente (stronk7) http://contiento.com //
  11. // //
  12. // This program is free software; you can redistribute it and/or modify //
  13. // it under the terms of the GNU General Public License as published by //
  14. // the Free Software Foundation; either version 2 of the License, or //
  15. // (at your option) any later version. //
  16. // //
  17. // This program is distributed in the hope that it will be useful, //
  18. // but WITHOUT ANY WARRANTY; without even the implied warranty of //
  19. // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the //
  20. // GNU General Public License for more details: //
  21. // //
  22. // http://www.gnu.org/copyleft/gpl.html //
  23. // //
  24. ///////////////////////////////////////////////////////////////////////////
  25. /// Required files
  26. require_once($CFG->libdir.'/typo3/class.t3lib_cs.php');
  27. require_once($CFG->libdir.'/typo3/class.t3lib_div.php');
  28. /// If ICONV is available, lets Typo3 library use it for convert
  29. if (extension_loaded('iconv')) {
  30. $GLOBALS['TYPO3_CONF_VARS']['SYS']['t3lib_cs_convMethod'] = 'iconv';
  31. /// Else if mbstring is available, lets Typo3 library use it
  32. } else if (extension_loaded('mbstring')) {
  33. $GLOBALS['TYPO3_CONF_VARS']['SYS']['t3lib_cs_convMethod'] = 'mbstring';
  34. /// Else if recode is available, lets Typo3 library use it
  35. } else if (extension_loaded('recode')) {
  36. $GLOBALS['TYPO3_CONF_VARS']['SYS']['t3lib_cs_convMethod'] = 'recode';
  37. } else {
  38. $GLOBALS['TYPO3_CONF_VARS']['SYS']['t3lib_cs_convMethod'] = '';
  39. }
  40. /// If mbstring is available, lets Typo3 library use it for functions
  41. if (extension_loaded('mbstring')) {
  42. $GLOBALS['TYPO3_CONF_VARS']['SYS']['t3lib_cs_utils'] = 'mbstring';
  43. } else {
  44. $GLOBALS['TYPO3_CONF_VARS']['SYS']['t3lib_cs_utils'] = '';
  45. }
  46. /// And this directory must exist to allow Typo to cache conversion
  47. /// tables when using internal functions
  48. make_upload_directory('temp/typo3temp/cs');
  49. /// Default mask for Typo
  50. $GLOBALS['TYPO3_CONF_VARS']['BE']['fileCreateMask'] = $CFG->directorypermissions;
  51. /// This full path constants must be defined too, transforming backslashes
  52. /// to forward slashed beacuse Typo3 requires it.
  53. define ('PATH_t3lib', str_replace('\\','/',$CFG->libdir.'/typo3/'));
  54. define ('PATH_typo3', str_replace('\\','/',$CFG->libdir.'/typo3/'));
  55. define ('PATH_site', str_replace('\\','/',$CFG->dataroot.'/temp/'));
  56. define ('TYPO3_OS', stristr(PHP_OS,'win')&&!stristr(PHP_OS,'darwin')?'WIN':'');
  57. /// As we implement the singleton pattern to use this class (only one instance
  58. /// is shared globally), we need this helper function
  59. /// IMPORTANT Note: Typo3 libraries always expect lowercase charsets to use 100%
  60. /// its capabilities so, don't forget to make the conversion
  61. /// from every wrapper function!
  62. function textlib_get_instance() {
  63. static $instance;
  64. if (!is_object($instance)) {
  65. $instance = new textlib();
  66. }
  67. return $instance;
  68. }
  69. /**
  70. * This class is used to manipulate strings under Moodle 1.6 an later. As
  71. * utf-8 text become mandatory a pool of safe functions under this encoding
  72. * become necessary. The name of the methods is exactly the
  73. * same than their PHP originals.
  74. *
  75. * A big part of this class acts as a wrapper over the Typo3 charset library,
  76. * really a cool group of utilities to handle texts and encoding conversion.
  77. *
  78. * Take a look to its own copyright and license details.
  79. */
  80. class textlib {
  81. var $typo3cs;
  82. /**
  83. * Standard constructor of the class. All it does is to instantiate
  84. * a new t3lib_cs object to have all their functions ready.
  85. *
  86. * Instead of istantiating a lot of objects of this class everytime
  87. * some of their functions is going to be used, you can invoke the:
  88. * textlib_get_instance() function, avoiding the creation of them
  89. * (following the singleton pattern)
  90. */
  91. function textlib() {
  92. /// Instantiate a conversor object some of the methods in typo3
  93. /// reference to $this and cannot be executed in a static context
  94. $this->typo3cs = new t3lib_cs();
  95. }
  96. /**
  97. * Converts the text between different encodings. It will use iconv, mbstring
  98. * or internal (typo3) methods to try such conversion. Returns false if fails.
  99. */
  100. function convert($text, $fromCS, $toCS='utf-8') {
  101. /// Normalize charsets
  102. $fromCS = $this->typo3cs->parse_charset($fromCS);
  103. $toCS = $this->typo3cs->parse_charset($toCS);
  104. /// Avoid some notices from Typo3 code
  105. $oldlevel = error_reporting(E_PARSE);
  106. /// Call Typo3 conv() function. It will do all the work
  107. $result = $this->typo3cs->conv($text, $fromCS, $toCS);
  108. /// Restore original debug level
  109. error_reporting($oldlevel);
  110. return $result;
  111. }
  112. /**
  113. * Multibyte safe substr() function, uses mbstring if available.
  114. */
  115. function substr($text, $start, $len=null, $charset='utf-8') {
  116. /// Normalize charset
  117. $charset = $this->typo3cs->parse_charset($charset);
  118. /// Avoid some notices from Typo3 code
  119. $oldlevel = error_reporting(E_PARSE);
  120. /// Call Typo3 substr() function. It will do all the work
  121. $result = $this->typo3cs->substr($charset,$text,$start,$len);
  122. /// Restore original debug level
  123. error_reporting($oldlevel);
  124. return $result;
  125. }
  126. /**
  127. * Multibyte safe strlen() function, uses mbstring if available.
  128. */
  129. function strlen($text, $charset='utf-8') {
  130. /// Normalize charset
  131. $charset = $this->typo3cs->parse_charset($charset);
  132. /// Avoid some notices from Typo3 code
  133. $oldlevel = error_reporting(E_PARSE);
  134. /// Call Typo3 strlen() function. It will do all the work
  135. $result = $this->typo3cs->strlen($charset,$text);
  136. /// Restore original debug level
  137. error_reporting($oldlevel);
  138. return $result;
  139. }
  140. /**
  141. * Multibyte safe strtolower() function, uses mbstring if available.
  142. */
  143. function strtolower($text, $charset='utf-8') {
  144. /// Normalize charset
  145. $charset = $this->typo3cs->parse_charset($charset);
  146. /// Avoid some notices from Typo3 code
  147. $oldlevel = error_reporting(E_PARSE);
  148. /// Call Typo3 conv_case() function. It will do all the work
  149. $result = $this->typo3cs->conv_case($charset,$text,'toLower');
  150. /// Restore original debug level
  151. error_reporting($oldlevel);
  152. return $result;
  153. }
  154. /**
  155. * Multibyte safe strtoupper() function, uses mbstring if available.
  156. */
  157. function strtoupper($text, $charset='utf-8') {
  158. /// Normalize charset
  159. $charset = $this->typo3cs->parse_charset($charset);
  160. /// Avoid some notices from Typo3 code
  161. $oldlevel = error_reporting(E_PARSE);
  162. /// Call Typo3 conv_case() function. It will do all the work
  163. $result = $this->typo3cs->conv_case($charset,$text,'toUpper');
  164. /// Restore original debug level
  165. error_reporting($oldlevel);
  166. return $result;
  167. }
  168. /**
  169. * UTF-8 ONLY safe strpos() function, uses mbstring if available.
  170. */
  171. function strpos($haystack,$needle,$offset=0) {
  172. /// Call Typo3 utf8_strpos() function. It will do all the work
  173. return $this->typo3cs->utf8_strpos($haystack,$needle,$offset);
  174. }
  175. /**
  176. * UTF-8 ONLY safe strrpos() function, uses mbstring if available.
  177. */
  178. function strrpos($haystack,$needle) {
  179. /// Call Typo3 utf8_strrpos() function. It will do all the work
  180. return $this->typo3cs->utf8_strrpos($haystack,$needle);
  181. }
  182. /**
  183. * Try to convert upper unicode characters to plain ascii,
  184. * the returned string may cantain unconverted unicode characters.
  185. */
  186. function specialtoascii($text,$charset='utf-8') {
  187. /// Normalize charset
  188. $charset = $this->typo3cs->parse_charset($charset);
  189. /// Avoid some notices from Typo3 code
  190. $oldlevel = error_reporting(E_PARSE);
  191. $result = $this->typo3cs->specCharsToASCII($charset,$text);
  192. /// Restore original debug level
  193. error_reporting($oldlevel);
  194. return $result;
  195. }
  196. /**
  197. * Generate a correct base64 encoded header to be used in MIME mail messages.
  198. * This function seems to be 100% compliant with RFC1342. Credits go to:
  199. * paravoid (http://www.php.net/manual/en/function.mb-encode-mimeheader.php#60283).
  200. */
  201. function encode_mimeheader($text, $charset='utf-8') {
  202. if (empty($text)) {
  203. return (string)$text;
  204. }
  205. /// Normalize charset
  206. $charset = $this->typo3cs->parse_charset($charset);
  207. /// If the text is pure ASCII, we don't need to encode it
  208. if ($this->convert($text, $charset, 'ascii') == $text) {
  209. return $text;
  210. }
  211. /// Although RFC says that line feed should be \r\n, it seems that
  212. /// some mailers double convert \r, so we are going to use \n alone
  213. $linefeed="\n";
  214. /// Define start and end of every chunk
  215. $start = "=?$charset?B?";
  216. $end = "?=";
  217. /// Acumulate results
  218. $encoded = '';
  219. /// Max line length is 75 (including start and end)
  220. $length = 75 - strlen($start) - strlen($end);
  221. /// Multi-byte ratio
  222. $multilength = $this->strlen($text, $charset);
  223. /// Detect if strlen and friends supported
  224. if ($multilength === false) {
  225. if ($charset == 'GB18030' or $charset == 'gb18030') {
  226. while (strlen($text)) {
  227. // try to encode first 22 chars - we expect most chars are two bytes long
  228. if (preg_match('/^(([\x00-\x7f])|([\x81-\xfe][\x40-\x7e])|([\x81-\xfe][\x80-\xfe])|([\x81-\xfe][\x30-\x39]..)){1,22}/m', $text, $matches)) {
  229. $chunk = $matches[0];
  230. $encchunk = base64_encode($chunk);
  231. if (strlen($encchunk) > $length) {
  232. // find first 11 chars - each char in 4 bytes - worst case scenario
  233. preg_match('/^(([\x00-\x7f])|([\x81-\xfe][\x40-\x7e])|([\x81-\xfe][\x80-\xfe])|([\x81-\xfe][\x30-\x39]..)){1,11}/m', $text, $matches);
  234. $chunk = $matches[0];
  235. $encchunk = base64_encode($chunk);
  236. }
  237. $text = substr($text, strlen($chunk));
  238. $encoded .= ' '.$start.$encchunk.$end.$linefeed;
  239. } else {
  240. break;
  241. }
  242. }
  243. $encoded = trim($encoded);
  244. return $encoded;
  245. } else {
  246. return false;
  247. }
  248. }
  249. $ratio = $multilength / strlen($text);
  250. /// Base64 ratio
  251. $magic = $avglength = floor(3 * $length * $ratio / 4);
  252. /// basic infinite loop protection
  253. $maxiterations = strlen($text)*2;
  254. $iteration = 0;
  255. /// Iterate over the string in magic chunks
  256. for ($i=0; $i <= $multilength; $i+=$magic) {
  257. if ($iteration++ > $maxiterations) {
  258. return false; // probably infinite loop
  259. }
  260. $magic = $avglength;
  261. $offset = 0;
  262. /// Ensure the chunk fits in length, reduding magic if necessary
  263. do {
  264. $magic -= $offset;
  265. $chunk = $this->substr($text, $i, $magic, $charset);
  266. $chunk = base64_encode($chunk);
  267. $offset++;
  268. } while (strlen($chunk) > $length);
  269. /// This chunk doen't break any multi-byte char. Use it.
  270. if ($chunk)
  271. $encoded .= ' '.$start.$chunk.$end.$linefeed;
  272. }
  273. /// Strip the first space and the last linefeed
  274. $encoded = substr($encoded, 1, -strlen($linefeed));
  275. return $encoded;
  276. }
  277. /**
  278. * Converts all the numeric entities &#nnnn; or &#xnnn; to UTF-8
  279. * Original from laurynas dot butkus at gmail at:
  280. * http://php.net/manual/en/function.html-entity-decode.php#75153
  281. * with some custom mods to provide more functionality
  282. *
  283. * @param string $str input string
  284. * @param boolean $htmlent convert also html entities (defaults to true)
  285. *
  286. * NOTE: we could have used typo3 entities_to_utf8() here
  287. * but the direct alternative used runs 400% quicker
  288. * and uses 0.5Mb less memory, so, let's use it
  289. * (tested agains 10^6 conversions)
  290. */
  291. function entities_to_utf8($str, $htmlent=true) {
  292. static $trans_tbl; /// Going to use static translit table
  293. /// Replace numeric entities
  294. $result = preg_replace('~&#x([0-9a-f]+);~ei', 'textlib::code2utf8(hexdec("\\1"))', $str);
  295. $result = preg_replace('~&#([0-9]+);~e', 'textlib::code2utf8(\\1)', $result);
  296. /// Replace literal entities (if desired)
  297. if ($htmlent) {
  298. /// Generate/create $trans_tbl
  299. if (!isset($trans_tbl)) {
  300. $trans_tbl = array();
  301. foreach (get_html_translation_table(HTML_ENTITIES) as $val=>$key) {
  302. $trans_tbl[$key] = utf8_encode($val);
  303. }
  304. }
  305. $result = strtr($result, $trans_tbl);
  306. }
  307. /// Return utf8-ised string
  308. return $result;
  309. }
  310. /**
  311. * Converts all Unicode chars > 127 to numeric entities &#nnnn; or &#xnnn;.
  312. *
  313. * @param string input string
  314. * @param boolean output decadic only number entities
  315. * @param boolean remove all nonumeric entities
  316. * @return string converted string
  317. */
  318. function utf8_to_entities($str, $dec=false, $nonnum=false) {
  319. /// Avoid some notices from Typo3 code
  320. $oldlevel = error_reporting(E_PARSE);
  321. if ($nonnum) {
  322. $str = $this->typo3cs->entities_to_utf8($str, true);
  323. }
  324. $result = $this->typo3cs->utf8_to_entities($str);
  325. if ($dec) {
  326. $result = preg_replace('/&#x([0-9a-f]+);/ie', "'&#'.hexdec('$1').';'", $result);
  327. }
  328. /// Restore original debug level
  329. error_reporting($oldlevel);
  330. return $result;
  331. }
  332. /**
  333. * Removes the BOM from unicode string - see http://unicode.org/faq/utf_bom.html
  334. */
  335. function trim_utf8_bom($str) {
  336. $bom = "\xef\xbb\xbf";
  337. if (strpos($str, $bom) === 0) {
  338. return substr($str, strlen($bom));
  339. }
  340. return $str;
  341. }
  342. /**
  343. * Returns encoding options for select boxes, utf-8 and platform encoding first
  344. * @return array encodings
  345. */
  346. function get_encodings() {
  347. $encodings = array();
  348. $encodings['UTF-8'] = 'UTF-8';
  349. $winenc = strtoupper(get_string('localewincharset'));
  350. if ($winenc != '') {
  351. $encodings[$winenc] = $winenc;
  352. }
  353. $nixenc = strtoupper(get_string('oldcharset'));
  354. $encodings[$nixenc] = $nixenc;
  355. foreach ($this->typo3cs->synonyms as $enc) {
  356. $enc = strtoupper($enc);
  357. $encodings[$enc] = $enc;
  358. }
  359. return $encodings;
  360. }
  361. /**
  362. * Returns the utf8 string corresponding to the unicode value
  363. * (from php.net, courtesy - romans@void.lv)
  364. *
  365. * @param int $num one unicode value
  366. * @return string the UTF-8 char corresponding to the unicode value
  367. */
  368. function code2utf8($num) {
  369. if ($num < 128) {
  370. return chr($num);
  371. }
  372. if ($num < 2048) {
  373. return chr(($num >> 6) + 192) . chr(($num & 63) + 128);
  374. }
  375. if ($num < 65536) {
  376. return chr(($num >> 12) + 224) . chr((($num >> 6) & 63) + 128) . chr(($num & 63) + 128);
  377. }
  378. if ($num < 2097152) {
  379. return chr(($num >> 18) + 240) . chr((($num >> 12) & 63) + 128) . chr((($num >> 6) & 63) + 128) . chr(($num & 63) + 128);
  380. }
  381. return '';
  382. }
  383. /**
  384. * Makes first letter of each word capital - words must be separated by spaces.
  385. * Use with care, this function does not work properly in many locales!!!
  386. * @param string $text
  387. * @return string
  388. */
  389. function strtotitle($text) {
  390. if (empty($text)) {
  391. return $text;
  392. }
  393. if (function_exists('mb_convert_case')) {
  394. return mb_convert_case($text, MB_CASE_TITLE,"UTF-8");
  395. }
  396. $text = $this->strtolower($text);
  397. $words = explode(' ', $text);
  398. foreach ($words as $i=>$word) {
  399. $length = $this->strlen($word);
  400. if (!$length) {
  401. continue;
  402. } else if ($length == 1) {
  403. $words[$i] = $this->strtoupper($word);
  404. } else {
  405. $letter = $this->substr($word, 0, 1);
  406. $letter = $this->strtoupper($letter);
  407. $rest = $this->substr($word, 1);
  408. $words[$i] = $letter.$rest;
  409. }
  410. }
  411. return implode(' ', $words);
  412. }
  413. }
  414. ?>