PageRenderTime 53ms CodeModel.GetById 23ms RepoModel.GetById 0ms app.codeStats 0ms

/leganto/libs/texy/texy.php

http://preader.googlecode.com/
PHP | 973 lines | 485 code | 218 blank | 270 comment | 51 complexity | 157568a83bd00d3c25d6838342ec3778 MD5 | raw file
Possible License(s): GPL-2.0
  1. <?php
  2. /**
  3. * Texy! - web text markup-language
  4. * --------------------------------
  5. *
  6. * Copyright (c) 2004, 2009 David Grudl (http://davidgrudl.com)
  7. *
  8. * This source file is subject to the GNU GPL license that is bundled
  9. * with this package in the file license.txt.
  10. *
  11. * For more information please see http://texy.info
  12. *
  13. * @copyright Copyright (c) 2004, 2009 David Grudl
  14. * @license GNU GENERAL PUBLIC LICENSE version 2 or 3
  15. * @link http://texy.info
  16. * @package Texy
  17. */
  18. define('TEXY_VERSION', '2.0');
  19. /**
  20. * Check PHP configuration.
  21. */
  22. if (extension_loaded('mbstring')) {
  23. if (mb_get_info('func_overload') & 2 && substr(mb_get_info('internal_encoding'), 0, 1) === 'U') { // U??
  24. mb_internal_encoding('pass');
  25. trigger_error("Texy: mb_internal_encoding changed to 'pass'", E_USER_WARNING);
  26. }
  27. }
  28. if (ini_get('zend.ze1_compatibility_mode') % 256 ||
  29. preg_match('#on$|true$|yes$#iA', ini_get('zend.ze1_compatibility_mode'))) {
  30. throw new RuntimeException("Texy cannot run with zend.ze1_compatibility_mode enabled.");
  31. }
  32. // Texy! libraries
  33. require_once dirname(__FILE__) . '/libs/RegExp.Patterns.php';
  34. require_once dirname(__FILE__) . '/libs/TexyObject.php';
  35. require_once dirname(__FILE__) . '/libs/TexyHtml.php';
  36. require_once dirname(__FILE__) . '/libs/TexyModifier.php';
  37. require_once dirname(__FILE__) . '/libs/TexyModule.php';
  38. require_once dirname(__FILE__) . '/libs/TexyParser.php';
  39. require_once dirname(__FILE__) . '/libs/TexyUtf.php';
  40. require_once dirname(__FILE__) . '/libs/TexyConfigurator.php';
  41. require_once dirname(__FILE__) . '/libs/TexyHandlerInvocation.php';
  42. require_once dirname(__FILE__) . '/modules/TexyParagraphModule.php';
  43. require_once dirname(__FILE__) . '/modules/TexyBlockModule.php';
  44. require_once dirname(__FILE__) . '/modules/TexyHeadingModule.php';
  45. require_once dirname(__FILE__) . '/modules/TexyHorizLineModule.php';
  46. require_once dirname(__FILE__) . '/modules/TexyHtmlModule.php';
  47. require_once dirname(__FILE__) . '/modules/TexyFigureModule.php';
  48. require_once dirname(__FILE__) . '/modules/TexyImageModule.php';
  49. require_once dirname(__FILE__) . '/modules/TexyLinkModule.php';
  50. require_once dirname(__FILE__) . '/modules/TexyListModule.php';
  51. require_once dirname(__FILE__) . '/modules/TexyLongWordsModule.php';
  52. require_once dirname(__FILE__) . '/modules/TexyPhraseModule.php';
  53. require_once dirname(__FILE__) . '/modules/TexyBlockQuoteModule.php';
  54. require_once dirname(__FILE__) . '/modules/TexyScriptModule.php';
  55. require_once dirname(__FILE__) . '/modules/TexyEmoticonModule.php';
  56. require_once dirname(__FILE__) . '/modules/TexyTableModule.php';
  57. require_once dirname(__FILE__) . '/modules/TexyTypographyModule.php';
  58. require_once dirname(__FILE__) . '/modules/TexyHtmlOutputModule.php';
  59. /**
  60. * Compatibility with PHP < 5.1.
  61. */
  62. if (!class_exists('LogicException', FALSE)) {
  63. class LogicException extends Exception {}
  64. }
  65. if (!class_exists('InvalidArgumentException', FALSE)) {
  66. class InvalidArgumentException extends LogicException {}
  67. }
  68. if (!class_exists('RuntimeException', FALSE)) {
  69. class RuntimeException extends Exception {}
  70. }
  71. if (!class_exists('UnexpectedValueException', FALSE)) {
  72. class UnexpectedValueException extends RuntimeException {}
  73. }
  74. /**
  75. * Compatibility with Nette
  76. */
  77. if (!class_exists('NotSupportedException', FALSE)) {
  78. class NotSupportedException extends LogicException {}
  79. }
  80. if (!class_exists('MemberAccessException', FALSE)) {
  81. class MemberAccessException extends LogicException {}
  82. }
  83. if (!class_exists('InvalidStateException', FALSE)) {
  84. class InvalidStateException extends RuntimeException {}
  85. }
  86. /**
  87. * For Texy 1 backward compatibility.
  88. */
  89. define('TEXY_ALL', TRUE);
  90. define('TEXY_NONE', FALSE);
  91. define('TEXY_CONTENT_MARKUP', "\x17");
  92. define('TEXY_CONTENT_REPLACED', "\x16");
  93. define('TEXY_CONTENT_TEXTUAL', "\x15");
  94. define('TEXY_CONTENT_BLOCK', "\x14");
  95. /**
  96. * Texy! - Convert plain text to XHTML format using {@link process()}.
  97. *
  98. * <code>
  99. * $texy = new Texy();
  100. * $html = $texy->process($text);
  101. * </code>
  102. *
  103. * @author David Grudl
  104. * @copyright Copyright (c) 2004, 2009 David Grudl
  105. * @package Texy
  106. */
  107. class Texy extends TexyObject
  108. {
  109. // configuration directives
  110. const ALL = TRUE;
  111. const NONE = FALSE;
  112. // Texy version
  113. const VERSION = TEXY_VERSION;
  114. const REVISION = '8da0750 released on 2009-09-26';
  115. // types of protection marks
  116. const CONTENT_MARKUP = "\x17";
  117. const CONTENT_REPLACED = "\x16";
  118. const CONTENT_TEXTUAL = "\x15";
  119. const CONTENT_BLOCK = "\x14";
  120. // url filters
  121. const FILTER_ANCHOR = 'anchor';
  122. const FILTER_IMAGE = 'image';
  123. // HTML minor-modes
  124. const XML = 2;
  125. // HTML modes
  126. const HTML4_TRANSITIONAL = 0;
  127. const HTML4_STRICT = 1;
  128. const HTML5 = 4;
  129. const XHTML1_TRANSITIONAL = 2; // Texy::HTML4_TRANSITIONAL | Texy::XML;
  130. const XHTML1_STRICT = 3; // Texy::HTML4_STRICT | Texy::XML;
  131. const XHTML5 = 6; // Texy::HTML5 | Texy::XML;
  132. /** @var string input & output text encoding */
  133. public $encoding = 'utf-8';
  134. /** @var array Texy! syntax configuration */
  135. public $allowed = array();
  136. /** @var TRUE|FALSE|array Allowed HTML tags */
  137. public $allowedTags;
  138. /** @var TRUE|FALSE|array Allowed classes */
  139. public $allowedClasses = Texy::ALL; // all classes and id are allowed
  140. /** @var TRUE|FALSE|array Allowed inline CSS style */
  141. public $allowedStyles = Texy::ALL; // all inline styles are allowed
  142. /** @var int TAB width (for converting tabs to spaces) */
  143. public $tabWidth = 8;
  144. /** @var boolean Do obfuscate e-mail addresses? */
  145. public $obfuscateEmail = TRUE;
  146. /** @var array regexps to check URL schemes */
  147. public $urlSchemeFilters = NULL; // disable URL scheme filter
  148. /** @var bool Paragraph merging mode */
  149. public $mergeLines = TRUE;
  150. /** @var array Parsing summary */
  151. public $summary = array(
  152. 'images' => array(),
  153. 'links' => array(),
  154. 'preload' => array(),
  155. );
  156. /** @var string Generated stylesheet */
  157. public $styleSheet = '';
  158. /** @var array CSS classes for align modifiers */
  159. public $alignClasses = array(
  160. 'left' => NULL,
  161. 'right' => NULL,
  162. 'center' => NULL,
  163. 'justify' => NULL,
  164. 'top' => NULL,
  165. 'middle' => NULL,
  166. 'bottom' => NULL,
  167. );
  168. /** @var bool remove soft hyphens (SHY)? */
  169. public $removeSoftHyphens = TRUE;
  170. /** @var mixed */
  171. public static $advertisingNotice = 'once';
  172. /** @var string */
  173. public $nontextParagraph = 'div';
  174. /** @var TexyScriptModule */
  175. public $scriptModule;
  176. /** @var TexyParagraphModule */
  177. public $paragraphModule;
  178. /** @var TexyHtmlModule */
  179. public $htmlModule;
  180. /** @var TexyImageModule */
  181. public $imageModule;
  182. /** @var TexyLinkModule */
  183. public $linkModule;
  184. /** @var TexyPhraseModule */
  185. public $phraseModule;
  186. /** @var TexyEmoticonModule */
  187. public $emoticonModule;
  188. /** @var TexyBlockModule */
  189. public $blockModule;
  190. /** @var TexyHeadingModule */
  191. public $headingModule;
  192. /** @var TexyHorizLineModule */
  193. public $horizLineModule;
  194. /** @var TexyBlockQuoteModule */
  195. public $blockQuoteModule;
  196. /** @var TexyListModule */
  197. public $listModule;
  198. /** @var TexyTableModule */
  199. public $tableModule;
  200. /** @var TexyFigureModule */
  201. public $figureModule;
  202. /** @var TexyTypographyModule */
  203. public $typographyModule;
  204. /** @var TexyLongWordsModule */
  205. public $longWordsModule;
  206. /** @var TexyHtmlOutputModule */
  207. public $htmlOutputModule;
  208. /**
  209. * Registered regexps and associated handlers for inline parsing.
  210. * @var array of ('handler' => callback
  211. * 'pattern' => regular expression)
  212. */
  213. private $linePatterns = array();
  214. private $_linePatterns;
  215. /**
  216. * Registered regexps and associated handlers for block parsing.
  217. * @var array of ('handler' => callback
  218. * 'pattern' => regular expression)
  219. */
  220. private $blockPatterns = array();
  221. private $_blockPatterns;
  222. /** @var array */
  223. private $postHandlers = array();
  224. /** @var TexyHtml DOM structure for parsed text */
  225. private $DOM;
  226. /** @var array Texy protect markup table */
  227. private $marks = array();
  228. /** @var array for internal usage */
  229. public $_classes, $_styles;
  230. /** @var bool */
  231. private $processing;
  232. /** @var array of events and registered handlers */
  233. private $handlers = array();
  234. /**
  235. * DTD descriptor.
  236. * $dtd[element][0] - allowed attributes (as array keys)
  237. * $dtd[element][1] - allowed content for an element (content model) (as array keys)
  238. * - array of allowed elements (as keys)
  239. * - FALSE - empty element
  240. * - 0 - special case for ins & del
  241. * @var array
  242. */
  243. public $dtd;
  244. /** @var array */
  245. private static $dtdCache;
  246. /** @var int HTML mode */
  247. private $mode;
  248. /** DEPRECATED */
  249. public static $strictDTD;
  250. public $cleaner;
  251. public $xhtml;
  252. public function __construct()
  253. {
  254. // load all modules
  255. $this->loadModules();
  256. // DEPRECATED
  257. if (self::$strictDTD !== NULL) {
  258. $this->setOutputMode(self::$strictDTD ? self::XHTML1_STRICT : self::XHTML1_TRANSITIONAL);
  259. } else {
  260. $this->setOutputMode(self::XHTML1_TRANSITIONAL);
  261. }
  262. // DEPRECATED
  263. $this->cleaner = & $this->htmlOutputModule;
  264. // examples of link references ;-)
  265. $link = new TexyLink('http://texy.info/');
  266. $link->modifier->title = 'The best text -> HTML converter and formatter';
  267. $link->label = 'Texy!';
  268. $this->linkModule->addReference('texy', $link);
  269. $link = new TexyLink('http://www.google.com/search?q=%s');
  270. $this->linkModule->addReference('google', $link);
  271. $link = new TexyLink('http://en.wikipedia.org/wiki/Special:Search?search=%s');
  272. $this->linkModule->addReference('wikipedia', $link);
  273. }
  274. /**
  275. * Set HTML/XHTML output mode (overwrites self::$allowedTags)
  276. * @param int
  277. * @return void
  278. */
  279. public function setOutputMode($mode)
  280. {
  281. if (!in_array($mode, array(self::HTML4_TRANSITIONAL, self::HTML4_STRICT,
  282. self::HTML5, self::XHTML1_TRANSITIONAL, self::XHTML1_STRICT, self::XHTML5), TRUE)) {
  283. throw new InvalidArgumentException("Invalid mode.");
  284. }
  285. if (!isset(self::$dtdCache[$mode])) {
  286. require dirname(__FILE__) . '/libs/DTD.php';
  287. self::$dtdCache[$mode] = $dtd;
  288. }
  289. $this->mode = $mode;
  290. $this->dtd = self::$dtdCache[$mode];
  291. TexyHtml::$xhtml = (bool) ($mode & self::XML); // TODO: remove?
  292. // accept all valid HTML tags and attributes by default
  293. $this->allowedTags = array();
  294. foreach ($this->dtd as $tag => $dtd) {
  295. $this->allowedTags[$tag] = self::ALL;
  296. }
  297. }
  298. /**
  299. * Get HTML/XHTML output mode
  300. * @return int
  301. */
  302. public function getOutputMode()
  303. {
  304. return $this->mode;
  305. }
  306. /**
  307. * Create array of all used modules ($this->modules).
  308. * This array can be changed by overriding this method (by subclasses)
  309. */
  310. protected function loadModules()
  311. {
  312. // line parsing
  313. $this->scriptModule = new TexyScriptModule($this);
  314. $this->htmlModule = new TexyHtmlModule($this);
  315. $this->imageModule = new TexyImageModule($this);
  316. $this->phraseModule = new TexyPhraseModule($this);
  317. $this->linkModule = new TexyLinkModule($this);
  318. $this->emoticonModule = new TexyEmoticonModule($this);
  319. // block parsing
  320. $this->paragraphModule = new TexyParagraphModule($this);
  321. $this->blockModule = new TexyBlockModule($this);
  322. $this->figureModule = new TexyFigureModule($this);
  323. $this->horizLineModule = new TexyHorizLineModule($this);
  324. $this->blockQuoteModule = new TexyBlockQuoteModule($this);
  325. $this->tableModule = new TexyTableModule($this);
  326. $this->headingModule = new TexyHeadingModule($this);
  327. $this->listModule = new TexyListModule($this);
  328. // post process
  329. $this->typographyModule = new TexyTypographyModule($this);
  330. $this->longWordsModule = new TexyLongWordsModule($this);
  331. $this->htmlOutputModule = new TexyHtmlOutputModule($this);
  332. }
  333. final public function registerLinePattern($handler, $pattern, $name)
  334. {
  335. if (!is_callable($handler)) {
  336. $able = is_callable($handler, TRUE, $textual);
  337. throw new InvalidArgumentException("Handler '$textual' is not " . ($able ? 'callable.' : 'valid PHP callback.'));
  338. }
  339. if (!isset($this->allowed[$name])) $this->allowed[$name] = TRUE;
  340. $this->linePatterns[$name] = array(
  341. 'handler' => $handler,
  342. 'pattern' => $pattern,
  343. );
  344. }
  345. final public function registerBlockPattern($handler, $pattern, $name)
  346. {
  347. if (!is_callable($handler)) {
  348. $able = is_callable($handler, TRUE, $textual);
  349. throw new InvalidArgumentException("Handler '$textual' is not " . ($able ? 'callable.' : 'valid PHP callback.'));
  350. }
  351. // if (!preg_match('#(.)\^.*\$\\1[a-z]*#is', $pattern)) die("Texy: Not a block pattern $name");
  352. if (!isset($this->allowed[$name])) $this->allowed[$name] = TRUE;
  353. $this->blockPatterns[$name] = array(
  354. 'handler' => $handler,
  355. 'pattern' => $pattern . 'm', // force multiline
  356. );
  357. }
  358. final public function registerPostLine($handler, $name)
  359. {
  360. if (!is_callable($handler)) {
  361. $able = is_callable($handler, TRUE, $textual);
  362. throw new InvalidArgumentException("Handler '$textual' is not " . ($able ? 'callable.' : 'valid PHP callback.'));
  363. }
  364. if (!isset($this->allowed[$name])) $this->allowed[$name] = TRUE;
  365. $this->postHandlers[$name] = $handler;
  366. }
  367. /**
  368. * Converts document in Texy! to (X)HTML code.
  369. *
  370. * @param string input text
  371. * @param bool is single line?
  372. * @return string output HTML code
  373. */
  374. public function process($text, $singleLine = FALSE)
  375. {
  376. if ($this->processing) {
  377. throw new InvalidStateException('Processing is in progress yet.');
  378. }
  379. // initialization
  380. $this->marks = array();
  381. $this->processing = TRUE;
  382. // speed-up
  383. if (is_array($this->allowedClasses)) $this->_classes = array_flip($this->allowedClasses);
  384. else $this->_classes = $this->allowedClasses;
  385. if (is_array($this->allowedStyles)) $this->_styles = array_flip($this->allowedStyles);
  386. else $this->_styles = $this->allowedStyles;
  387. // convert to UTF-8 (and check source encoding)
  388. $text = TexyUtf::toUtf($text, $this->encoding);
  389. if ($this->removeSoftHyphens) {
  390. $text = str_replace("\xC2\xAD", '', $text);
  391. }
  392. // standardize line endings and spaces
  393. $text = self::normalize($text);
  394. // replace tabs with spaces
  395. $this->tabWidth = max(1, (int) $this->tabWidth);
  396. while (strpos($text, "\t") !== FALSE) {
  397. $text = preg_replace_callback('#^(.*)\t#mU', array($this, 'tabCb'), $text);
  398. }
  399. // user before handler
  400. $this->invokeHandlers('beforeParse', array($this, & $text, $singleLine));
  401. // select patterns
  402. $this->_linePatterns = $this->linePatterns;
  403. $this->_blockPatterns = $this->blockPatterns;
  404. foreach ($this->_linePatterns as $name => $foo) {
  405. if (empty($this->allowed[$name])) unset($this->_linePatterns[$name]);
  406. }
  407. foreach ($this->_blockPatterns as $name => $foo) {
  408. if (empty($this->allowed[$name])) unset($this->_blockPatterns[$name]);
  409. }
  410. // parse Texy! document into internal DOM structure
  411. $this->DOM = TexyHtml::el();
  412. if ($singleLine) {
  413. $this->DOM->parseLine($this, $text);
  414. } else {
  415. $this->DOM->parseBlock($this, $text);
  416. }
  417. // user after handler
  418. $this->invokeHandlers('afterParse', array($this, $this->DOM, $singleLine));
  419. // converts internal DOM structure to final HTML code
  420. $html = $this->DOM->toHtml($this);
  421. // this notice should remain
  422. if (self::$advertisingNotice) {
  423. $html .= "\n<!-- by Texy2! -->";
  424. if (self::$advertisingNotice === 'once') {
  425. self::$advertisingNotice = FALSE;
  426. }
  427. }
  428. $this->processing = FALSE;
  429. return TexyUtf::utf2html($html, $this->encoding);
  430. }
  431. /**
  432. * Converts single line in Texy! to (X)HTML code.
  433. *
  434. * @param string input text
  435. * @return string output HTML code
  436. */
  437. public function processLine($text)
  438. {
  439. return $this->process($text, TRUE);
  440. }
  441. /**
  442. * Makes only typographic corrections.
  443. * @param string input text (in encoding defined by Texy::$encoding)
  444. * @return string output text (in UTF-8)
  445. */
  446. public function processTypo($text)
  447. {
  448. // convert to UTF-8 (and check source encoding)
  449. $text = TexyUtf::toUtf($text, $this->encoding);
  450. // standardize line endings and spaces
  451. $text = self::normalize($text);
  452. $this->typographyModule->beforeParse($this, $text);
  453. $text = $this->typographyModule->postLine($text);
  454. if (!empty($this->allowed['longwords'])) {
  455. $text = $this->longWordsModule->postLine($text);
  456. }
  457. return TexyUtf::utf2html($text, $this->encoding);
  458. }
  459. /**
  460. * Converts DOM structure to pure text.
  461. * @return string
  462. */
  463. public function toText()
  464. {
  465. if (!$this->DOM) {
  466. throw new InvalidStateException('Call $texy->process() first.');
  467. }
  468. return TexyUtf::utfTo($this->DOM->toText($this), $this->encoding);
  469. }
  470. /**
  471. * Converts internal string representation to final HTML code in UTF-8.
  472. * @return string
  473. */
  474. final public function stringToHtml($s)
  475. {
  476. // decode HTML entities to UTF-8
  477. $s = self::unescapeHtml($s);
  478. // line-postprocessing
  479. $blocks = explode(self::CONTENT_BLOCK, $s);
  480. foreach ($this->postHandlers as $name => $handler) {
  481. if (empty($this->allowed[$name])) continue;
  482. foreach ($blocks as $n => $s) {
  483. if ($n % 2 === 0 && $s !== '') {
  484. $blocks[$n] = call_user_func($handler, $s);
  485. }
  486. }
  487. }
  488. $s = implode(self::CONTENT_BLOCK, $blocks);
  489. // encode < > &
  490. $s = self::escapeHtml($s);
  491. // replace protected marks
  492. $s = $this->unProtect($s);
  493. // wellform and reformat HTML
  494. $this->invokeHandlers('postProcess', array($this, & $s));
  495. // unfreeze spaces
  496. $s = self::unfreezeSpaces($s);
  497. return $s;
  498. }
  499. /**
  500. * Converts internal string representation to final HTML code in UTF-8.
  501. * @return string
  502. */
  503. final public function stringToText($s)
  504. {
  505. $save = $this->htmlOutputModule->lineWrap;
  506. $this->htmlOutputModule->lineWrap = FALSE;
  507. $s = $this->stringToHtml( $s );
  508. $this->htmlOutputModule->lineWrap = $save;
  509. // remove tags
  510. $s = preg_replace('#<(script|style)(.*)</\\1>#Uis', '', $s);
  511. $s = strip_tags($s);
  512. $s = preg_replace('#\n\s*\n\s*\n[\n\s]*\n#', "\n\n", $s);
  513. // entities -> chars
  514. $s = self::unescapeHtml($s);
  515. // convert nbsp to normal space and remove shy
  516. $s = strtr($s, array(
  517. "\xC2\xAD" => '', // shy
  518. "\xC2\xA0" => ' ', // nbsp
  519. ));
  520. return $s;
  521. }
  522. /**
  523. * Add new event handler.
  524. *
  525. * @param string event name
  526. * @param callback
  527. * @return void
  528. */
  529. final public function addHandler($event, $callback)
  530. {
  531. if (!is_callable($callback)) {
  532. $able = is_callable($callback, TRUE, $textual);
  533. throw new InvalidArgumentException("Handler '$textual' is not " . ($able ? 'callable.' : 'valid PHP callback.'));
  534. }
  535. $this->handlers[$event][] = $callback;
  536. }
  537. /**
  538. * Invoke registered around-handlers.
  539. *
  540. * @param string event name
  541. * @param TexyParser actual parser object
  542. * @param array arguments passed into handler
  543. * @return mixed
  544. */
  545. final public function invokeAroundHandlers($event, $parser, $args)
  546. {
  547. if (!isset($this->handlers[$event])) return FALSE;
  548. $invocation = new TexyHandlerInvocation($this->handlers[$event], $parser, $args);
  549. $res = $invocation->proceed();
  550. $invocation->free();
  551. return $res;
  552. }
  553. /**
  554. * Invoke registered after-handlers.
  555. *
  556. * @param string event name
  557. * @param array arguments passed into handler
  558. * @return void
  559. */
  560. final public function invokeHandlers($event, $args)
  561. {
  562. if (!isset($this->handlers[$event])) return;
  563. foreach ($this->handlers[$event] as $handler) {
  564. call_user_func_array($handler, $args);
  565. }
  566. }
  567. /**
  568. * Translate all white spaces (\t \n \r space) to meta-spaces \x01-\x04.
  569. * which are ignored by TexyHtmlOutputModule routine
  570. * @param string
  571. * @return string
  572. */
  573. final public static function freezeSpaces($s)
  574. {
  575. return strtr($s, " \t\r\n", "\x01\x02\x03\x04");
  576. }
  577. /**
  578. * Reverts meta-spaces back to normal spaces.
  579. * @param string
  580. * @return string
  581. */
  582. final public static function unfreezeSpaces($s)
  583. {
  584. return strtr($s, "\x01\x02\x03\x04", " \t\r\n");
  585. }
  586. /**
  587. * Removes special controls characters and normalizes line endings and spaces.
  588. * @param string
  589. * @return string
  590. */
  591. final public static function normalize($s)
  592. {
  593. // standardize line endings to unix-like
  594. $s = str_replace("\r\n", "\n", $s); // DOS
  595. $s = strtr($s, "\r", "\n"); // Mac
  596. // remove special chars; leave \t + \n
  597. $s = preg_replace('#[\x00-\x08\x0B-\x1F]+#', '', $s);
  598. // right trim
  599. $s = preg_replace("#[\t ]+$#m", '', $s);
  600. // trailing spaces
  601. $s = trim($s, "\n");
  602. return $s;
  603. }
  604. /**
  605. * Converts to web safe characters [a-z0-9-] text.
  606. * @param string
  607. * @param string
  608. * @return string
  609. */
  610. final public static function webalize($s, $charlist = NULL)
  611. {
  612. $s = TexyUtf::utf2ascii($s);
  613. $s = strtolower($s);
  614. $s = preg_replace('#[^a-z0-9'.preg_quote($charlist, '#').']+#', '-', $s);
  615. $s = trim($s, '-');
  616. return $s;
  617. }
  618. /**
  619. * Texy! version of htmlSpecialChars (much faster than htmlSpecialChars!).
  620. * note: &quot; is not encoded!
  621. * @param string
  622. * @return string
  623. */
  624. final public static function escapeHtml($s)
  625. {
  626. return str_replace(array('&', '<', '>'), array('&amp;', '&lt;', '&gt;'), $s);
  627. }
  628. /**
  629. * Texy! version of html_entity_decode (always UTF-8, much faster than original!).
  630. * @param string
  631. * @return string
  632. */
  633. final public static function unescapeHtml($s)
  634. {
  635. if (strpos($s, '&') === FALSE) return $s;
  636. return html_entity_decode($s, ENT_QUOTES, 'UTF-8');
  637. }
  638. /**
  639. * Outdents text block.
  640. * @param string
  641. * @return string
  642. */
  643. final public static function outdent($s)
  644. {
  645. $s = trim($s, "\n");
  646. $spaces = strspn($s, ' ');
  647. if ($spaces) return preg_replace("#^ {1,$spaces}#m", '', $s);
  648. return $s;
  649. }
  650. /**
  651. * Generate unique mark - useful for freezing (folding) some substrings.
  652. * @param string any string to froze
  653. * @param int Texy::CONTENT_* constant
  654. * @return string internal mark
  655. */
  656. final public function protect($child, $contentType)
  657. {
  658. if ($child==='') return '';
  659. $key = $contentType
  660. . strtr(base_convert(count($this->marks), 10, 8), '01234567', "\x18\x19\x1A\x1B\x1C\x1D\x1E\x1F")
  661. . $contentType;
  662. $this->marks[$key] = $child;
  663. return $key;
  664. }
  665. final public function unProtect($html)
  666. {
  667. return strtr($html, $this->marks);
  668. }
  669. /**
  670. * Filters bad URLs.
  671. * @param string user URL
  672. * @param string type: a-anchor, i-image, c-cite
  673. * @return bool
  674. */
  675. final public function checkURL($URL, $type)
  676. {
  677. // absolute URL with scheme? check scheme!
  678. if (!empty($this->urlSchemeFilters[$type])
  679. && preg_match('#'.TEXY_URLSCHEME.'#A', $URL)
  680. && !preg_match($this->urlSchemeFilters[$type], $URL))
  681. return FALSE;
  682. return TRUE;
  683. }
  684. /**
  685. * Is given URL relative?
  686. * @param string URL
  687. * @return bool
  688. */
  689. final public static function isRelative($URL)
  690. {
  691. // check for scheme, or absolute path, or absolute URL
  692. return !preg_match('#'.TEXY_URLSCHEME.'|[\#/?]#A', $URL);
  693. }
  694. /**
  695. * Prepends root to URL, if possible.
  696. * @param string URL
  697. * @param string root
  698. * @return string
  699. */
  700. final public static function prependRoot($URL, $root)
  701. {
  702. if ($root == NULL || !self::isRelative($URL)) return $URL;
  703. return rtrim($root, '/\\') . '/' . $URL;
  704. }
  705. final public function getLinePatterns()
  706. {
  707. return $this->_linePatterns;
  708. }
  709. final public function getBlockPatterns()
  710. {
  711. return $this->_blockPatterns;
  712. }
  713. final public function getDOM()
  714. {
  715. return $this->DOM;
  716. }
  717. private function tabCb($m)
  718. {
  719. return $m[1] . str_repeat(' ', $this->tabWidth - strlen($m[1]) % $this->tabWidth);
  720. }
  721. /**
  722. * PHP garbage collector helper.
  723. */
  724. final public function free()
  725. {
  726. if (version_compare(PHP_VERSION , '5.3', '<')) {
  727. foreach (array_keys(get_object_vars($this)) as $key) {
  728. $this->$key = NULL;
  729. }
  730. }
  731. }
  732. final public function __clone()
  733. {
  734. throw new NotSupportedException('Clone is not supported.');
  735. }
  736. }