PageRenderTime 48ms CodeModel.GetById 16ms RepoModel.GetById 0ms app.codeStats 0ms

/apigen/libs/Texy/texy/texy.php

https://github.com/boxyman/woocommerce
PHP | 965 lines | 486 code | 218 blank | 261 comment | 51 complexity | 06bbac76390365aa96fff69903330f45 MD5 | raw file
Possible License(s): GPL-2.0, GPL-3.0, BSD-3-Clause
  1. <?php
  2. /**
  3. * Texy! - human-readable text to HTML converter.
  4. *
  5. * @copyright Copyright (c) 2004, 2010 David Grudl
  6. * @license GNU GENERAL PUBLIC LICENSE version 2 or 3
  7. * @link http://texy.info
  8. * @package Texy
  9. */
  10. define('TEXY_VERSION', '2.1');
  11. /**
  12. * Check PHP configuration.
  13. */
  14. if (extension_loaded('mbstring')) {
  15. if (mb_get_info('func_overload') & 2 && substr(mb_get_info('internal_encoding'), 0, 1) === 'U') { // U??
  16. mb_internal_encoding('pass');
  17. trigger_error("Texy: mb_internal_encoding changed to 'pass'", E_USER_WARNING);
  18. }
  19. }
  20. if (ini_get('zend.ze1_compatibility_mode') % 256 ||
  21. preg_match('#on$|true$|yes$#iA', ini_get('zend.ze1_compatibility_mode'))) {
  22. throw new RuntimeException("Texy cannot run with zend.ze1_compatibility_mode enabled.");
  23. }
  24. // Texy! libraries
  25. require_once dirname(__FILE__) . '/libs/RegExp.Patterns.php';
  26. require_once dirname(__FILE__) . '/libs/TexyObject.php';
  27. require_once dirname(__FILE__) . '/libs/TexyHtml.php';
  28. require_once dirname(__FILE__) . '/libs/TexyModifier.php';
  29. require_once dirname(__FILE__) . '/libs/TexyModule.php';
  30. require_once dirname(__FILE__) . '/libs/TexyParser.php';
  31. require_once dirname(__FILE__) . '/libs/TexyUtf.php';
  32. require_once dirname(__FILE__) . '/libs/TexyConfigurator.php';
  33. require_once dirname(__FILE__) . '/libs/TexyHandlerInvocation.php';
  34. require_once dirname(__FILE__) . '/modules/TexyParagraphModule.php';
  35. require_once dirname(__FILE__) . '/modules/TexyBlockModule.php';
  36. require_once dirname(__FILE__) . '/modules/TexyHeadingModule.php';
  37. require_once dirname(__FILE__) . '/modules/TexyHorizLineModule.php';
  38. require_once dirname(__FILE__) . '/modules/TexyHtmlModule.php';
  39. require_once dirname(__FILE__) . '/modules/TexyFigureModule.php';
  40. require_once dirname(__FILE__) . '/modules/TexyImageModule.php';
  41. require_once dirname(__FILE__) . '/modules/TexyLinkModule.php';
  42. require_once dirname(__FILE__) . '/modules/TexyListModule.php';
  43. require_once dirname(__FILE__) . '/modules/TexyLongWordsModule.php';
  44. require_once dirname(__FILE__) . '/modules/TexyPhraseModule.php';
  45. require_once dirname(__FILE__) . '/modules/TexyBlockQuoteModule.php';
  46. require_once dirname(__FILE__) . '/modules/TexyScriptModule.php';
  47. require_once dirname(__FILE__) . '/modules/TexyEmoticonModule.php';
  48. require_once dirname(__FILE__) . '/modules/TexyTableModule.php';
  49. require_once dirname(__FILE__) . '/modules/TexyTypographyModule.php';
  50. require_once dirname(__FILE__) . '/modules/TexyHtmlOutputModule.php';
  51. /**
  52. * Compatibility with PHP < 5.1.
  53. */
  54. if (!class_exists('LogicException', FALSE)) {
  55. class LogicException extends Exception {}
  56. }
  57. if (!class_exists('InvalidArgumentException', FALSE)) {
  58. class InvalidArgumentException extends LogicException {}
  59. }
  60. if (!class_exists('RuntimeException', FALSE)) {
  61. class RuntimeException extends Exception {}
  62. }
  63. if (!class_exists('UnexpectedValueException', FALSE)) {
  64. class UnexpectedValueException extends RuntimeException {}
  65. }
  66. /**
  67. * Compatibility with Nette
  68. */
  69. if (!class_exists('NotSupportedException', FALSE)) {
  70. class NotSupportedException extends LogicException {}
  71. }
  72. if (!class_exists('MemberAccessException', FALSE)) {
  73. class MemberAccessException extends LogicException {}
  74. }
  75. if (!class_exists('InvalidStateException', FALSE)) {
  76. class InvalidStateException extends RuntimeException {}
  77. }
  78. /**
  79. * For Texy 1 backward compatibility.
  80. */
  81. define('TEXY_ALL', TRUE);
  82. define('TEXY_NONE', FALSE);
  83. define('TEXY_CONTENT_MARKUP', "\x17");
  84. define('TEXY_CONTENT_REPLACED', "\x16");
  85. define('TEXY_CONTENT_TEXTUAL', "\x15");
  86. define('TEXY_CONTENT_BLOCK', "\x14");
  87. /**
  88. * Texy! - Convert plain text to XHTML format using {@link process()}.
  89. *
  90. * <code>
  91. * $texy = new Texy();
  92. * $html = $texy->process($text);
  93. * </code>
  94. *
  95. * @copyright Copyright (c) 2004, 2010 David Grudl
  96. * @package Texy
  97. */
  98. class Texy extends TexyObject
  99. {
  100. // configuration directives
  101. const ALL = TRUE;
  102. const NONE = FALSE;
  103. // Texy version
  104. const VERSION = TEXY_VERSION;
  105. const REVISION = '$WCREV$ released on $WCDATE$';
  106. // types of protection marks
  107. const CONTENT_MARKUP = "\x17";
  108. const CONTENT_REPLACED = "\x16";
  109. const CONTENT_TEXTUAL = "\x15";
  110. const CONTENT_BLOCK = "\x14";
  111. // url filters
  112. const FILTER_ANCHOR = 'anchor';
  113. const FILTER_IMAGE = 'image';
  114. // HTML minor-modes
  115. const XML = 2;
  116. // HTML modes
  117. const HTML4_TRANSITIONAL = 0;
  118. const HTML4_STRICT = 1;
  119. const HTML5 = 4;
  120. const XHTML1_TRANSITIONAL = 2; // Texy::HTML4_TRANSITIONAL | Texy::XML;
  121. const XHTML1_STRICT = 3; // Texy::HTML4_STRICT | Texy::XML;
  122. const XHTML5 = 6; // Texy::HTML5 | Texy::XML;
  123. /** @var string input & output text encoding */
  124. public $encoding = 'utf-8';
  125. /** @var array Texy! syntax configuration */
  126. public $allowed = array();
  127. /** @var TRUE|FALSE|array Allowed HTML tags */
  128. public $allowedTags;
  129. /** @var TRUE|FALSE|array Allowed classes */
  130. public $allowedClasses = Texy::ALL; // all classes and id are allowed
  131. /** @var TRUE|FALSE|array Allowed inline CSS style */
  132. public $allowedStyles = Texy::ALL; // all inline styles are allowed
  133. /** @var int TAB width (for converting tabs to spaces) */
  134. public $tabWidth = 8;
  135. /** @var boolean Do obfuscate e-mail addresses? */
  136. public $obfuscateEmail = TRUE;
  137. /** @var array regexps to check URL schemes */
  138. public $urlSchemeFilters = NULL; // disable URL scheme filter
  139. /** @var bool Paragraph merging mode */
  140. public $mergeLines = TRUE;
  141. /** @var array Parsing summary */
  142. public $summary = array(
  143. 'images' => array(),
  144. 'links' => array(),
  145. 'preload' => array(),
  146. );
  147. /** @var string Generated stylesheet */
  148. public $styleSheet = '';
  149. /** @var array CSS classes for align modifiers */
  150. public $alignClasses = array(
  151. 'left' => NULL,
  152. 'right' => NULL,
  153. 'center' => NULL,
  154. 'justify' => NULL,
  155. 'top' => NULL,
  156. 'middle' => NULL,
  157. 'bottom' => NULL,
  158. );
  159. /** @var bool remove soft hyphens (SHY)? */
  160. public $removeSoftHyphens = TRUE;
  161. /** @var mixed */
  162. public static $advertisingNotice = 'once';
  163. /** @var string */
  164. public $nontextParagraph = 'div';
  165. /** @var TexyScriptModule */
  166. public $scriptModule;
  167. /** @var TexyParagraphModule */
  168. public $paragraphModule;
  169. /** @var TexyHtmlModule */
  170. public $htmlModule;
  171. /** @var TexyImageModule */
  172. public $imageModule;
  173. /** @var TexyLinkModule */
  174. public $linkModule;
  175. /** @var TexyPhraseModule */
  176. public $phraseModule;
  177. /** @var TexyEmoticonModule */
  178. public $emoticonModule;
  179. /** @var TexyBlockModule */
  180. public $blockModule;
  181. /** @var TexyHeadingModule */
  182. public $headingModule;
  183. /** @var TexyHorizLineModule */
  184. public $horizLineModule;
  185. /** @var TexyBlockQuoteModule */
  186. public $blockQuoteModule;
  187. /** @var TexyListModule */
  188. public $listModule;
  189. /** @var TexyTableModule */
  190. public $tableModule;
  191. /** @var TexyFigureModule */
  192. public $figureModule;
  193. /** @var TexyTypographyModule */
  194. public $typographyModule;
  195. /** @var TexyLongWordsModule */
  196. public $longWordsModule;
  197. /** @var TexyHtmlOutputModule */
  198. public $htmlOutputModule;
  199. /**
  200. * Registered regexps and associated handlers for inline parsing.
  201. * @var array of ('handler' => callback
  202. * 'pattern' => regular expression)
  203. */
  204. private $linePatterns = array();
  205. private $_linePatterns;
  206. /**
  207. * Registered regexps and associated handlers for block parsing.
  208. * @var array of ('handler' => callback
  209. * 'pattern' => regular expression)
  210. */
  211. private $blockPatterns = array();
  212. private $_blockPatterns;
  213. /** @var array */
  214. private $postHandlers = array();
  215. /** @var TexyHtml DOM structure for parsed text */
  216. private $DOM;
  217. /** @var array Texy protect markup table */
  218. private $marks = array();
  219. /** @var array for internal usage */
  220. public $_classes, $_styles;
  221. /** @var bool */
  222. private $processing;
  223. /** @var array of events and registered handlers */
  224. private $handlers = array();
  225. /**
  226. * DTD descriptor.
  227. * $dtd[element][0] - allowed attributes (as array keys)
  228. * $dtd[element][1] - allowed content for an element (content model) (as array keys)
  229. * - array of allowed elements (as keys)
  230. * - FALSE - empty element
  231. * - 0 - special case for ins & del
  232. * @var array
  233. */
  234. public $dtd;
  235. /** @var array */
  236. private static $dtdCache;
  237. /** @var int HTML mode */
  238. private $mode;
  239. /** DEPRECATED */
  240. public static $strictDTD;
  241. public $cleaner;
  242. public $xhtml;
  243. public function __construct()
  244. {
  245. // load all modules
  246. $this->loadModules();
  247. // DEPRECATED
  248. if (self::$strictDTD !== NULL) {
  249. $this->setOutputMode(self::$strictDTD ? self::XHTML1_STRICT : self::XHTML1_TRANSITIONAL);
  250. } else {
  251. $this->setOutputMode(self::XHTML1_TRANSITIONAL);
  252. }
  253. // DEPRECATED
  254. $this->cleaner = & $this->htmlOutputModule;
  255. // examples of link references ;-)
  256. $link = new TexyLink('http://texy.info/');
  257. $link->modifier->title = 'The best text -> HTML converter and formatter';
  258. $link->label = 'Texy!';
  259. $this->linkModule->addReference('texy', $link);
  260. $link = new TexyLink('http://www.google.com/search?q=%s');
  261. $this->linkModule->addReference('google', $link);
  262. $link = new TexyLink('http://en.wikipedia.org/wiki/Special:Search?search=%s');
  263. $this->linkModule->addReference('wikipedia', $link);
  264. }
  265. /**
  266. * Set HTML/XHTML output mode (overwrites self::$allowedTags)
  267. * @param int
  268. * @return void
  269. */
  270. public function setOutputMode($mode)
  271. {
  272. if (!in_array($mode, array(self::HTML4_TRANSITIONAL, self::HTML4_STRICT,
  273. self::HTML5, self::XHTML1_TRANSITIONAL, self::XHTML1_STRICT, self::XHTML5), TRUE)) {
  274. throw new InvalidArgumentException("Invalid mode.");
  275. }
  276. if (!isset(self::$dtdCache[$mode])) {
  277. require dirname(__FILE__) . '/libs/DTD.php';
  278. self::$dtdCache[$mode] = $dtd;
  279. }
  280. $this->mode = $mode;
  281. $this->dtd = self::$dtdCache[$mode];
  282. TexyHtml::$xhtml = (bool) ($mode & self::XML); // TODO: remove?
  283. // accept all valid HTML tags and attributes by default
  284. $this->allowedTags = array();
  285. foreach ($this->dtd as $tag => $dtd) {
  286. $this->allowedTags[$tag] = self::ALL;
  287. }
  288. }
  289. /**
  290. * Get HTML/XHTML output mode
  291. * @return int
  292. */
  293. public function getOutputMode()
  294. {
  295. return $this->mode;
  296. }
  297. /**
  298. * Create array of all used modules ($this->modules).
  299. * This array can be changed by overriding this method (by subclasses)
  300. */
  301. protected function loadModules()
  302. {
  303. // line parsing
  304. $this->scriptModule = new TexyScriptModule($this);
  305. $this->htmlModule = new TexyHtmlModule($this);
  306. $this->imageModule = new TexyImageModule($this);
  307. $this->phraseModule = new TexyPhraseModule($this);
  308. $this->linkModule = new TexyLinkModule($this);
  309. $this->emoticonModule = new TexyEmoticonModule($this);
  310. // block parsing
  311. $this->paragraphModule = new TexyParagraphModule($this);
  312. $this->blockModule = new TexyBlockModule($this);
  313. $this->figureModule = new TexyFigureModule($this);
  314. $this->horizLineModule = new TexyHorizLineModule($this);
  315. $this->blockQuoteModule = new TexyBlockQuoteModule($this);
  316. $this->tableModule = new TexyTableModule($this);
  317. $this->headingModule = new TexyHeadingModule($this);
  318. $this->listModule = new TexyListModule($this);
  319. // post process
  320. $this->typographyModule = new TexyTypographyModule($this);
  321. $this->longWordsModule = new TexyLongWordsModule($this);
  322. $this->htmlOutputModule = new TexyHtmlOutputModule($this);
  323. }
  324. final public function registerLinePattern($handler, $pattern, $name, $againTest = NULL)
  325. {
  326. if (!is_callable($handler)) {
  327. $able = is_callable($handler, TRUE, $textual);
  328. throw new InvalidArgumentException("Handler '$textual' is not " . ($able ? 'callable.' : 'valid PHP callback.'));
  329. }
  330. if (!isset($this->allowed[$name])) $this->allowed[$name] = TRUE;
  331. $this->linePatterns[$name] = array(
  332. 'handler' => $handler,
  333. 'pattern' => $pattern,
  334. 'again' => $againTest,
  335. );
  336. }
  337. final public function registerBlockPattern($handler, $pattern, $name)
  338. {
  339. if (!is_callable($handler)) {
  340. $able = is_callable($handler, TRUE, $textual);
  341. throw new InvalidArgumentException("Handler '$textual' is not " . ($able ? 'callable.' : 'valid PHP callback.'));
  342. }
  343. // if (!preg_match('#(.)\^.*\$\\1[a-z]*#is', $pattern)) die("Texy: Not a block pattern $name");
  344. if (!isset($this->allowed[$name])) $this->allowed[$name] = TRUE;
  345. $this->blockPatterns[$name] = array(
  346. 'handler' => $handler,
  347. 'pattern' => $pattern . 'm', // force multiline
  348. );
  349. }
  350. final public function registerPostLine($handler, $name)
  351. {
  352. if (!is_callable($handler)) {
  353. $able = is_callable($handler, TRUE, $textual);
  354. throw new InvalidArgumentException("Handler '$textual' is not " . ($able ? 'callable.' : 'valid PHP callback.'));
  355. }
  356. if (!isset($this->allowed[$name])) $this->allowed[$name] = TRUE;
  357. $this->postHandlers[$name] = $handler;
  358. }
  359. /**
  360. * Converts document in Texy! to (X)HTML code.
  361. *
  362. * @param string input text
  363. * @param bool is single line?
  364. * @return string output HTML code
  365. */
  366. public function process($text, $singleLine = FALSE)
  367. {
  368. if ($this->processing) {
  369. throw new InvalidStateException('Processing is in progress yet.');
  370. }
  371. // initialization
  372. $this->marks = array();
  373. $this->processing = TRUE;
  374. // speed-up
  375. if (is_array($this->allowedClasses)) $this->_classes = array_flip($this->allowedClasses);
  376. else $this->_classes = $this->allowedClasses;
  377. if (is_array($this->allowedStyles)) $this->_styles = array_flip($this->allowedStyles);
  378. else $this->_styles = $this->allowedStyles;
  379. // convert to UTF-8 (and check source encoding)
  380. $text = TexyUtf::toUtf($text, $this->encoding);
  381. if ($this->removeSoftHyphens) {
  382. $text = str_replace("\xC2\xAD", '', $text);
  383. }
  384. // standardize line endings and spaces
  385. $text = self::normalize($text);
  386. // replace tabs with spaces
  387. $this->tabWidth = max(1, (int) $this->tabWidth);
  388. while (strpos($text, "\t") !== FALSE) {
  389. $text = preg_replace_callback('#^(.*)\t#mU', array($this, 'tabCb'), $text);
  390. }
  391. // user before handler
  392. $this->invokeHandlers('beforeParse', array($this, & $text, $singleLine));
  393. // select patterns
  394. $this->_linePatterns = $this->linePatterns;
  395. $this->_blockPatterns = $this->blockPatterns;
  396. foreach ($this->_linePatterns as $name => $foo) {
  397. if (empty($this->allowed[$name])) unset($this->_linePatterns[$name]);
  398. }
  399. foreach ($this->_blockPatterns as $name => $foo) {
  400. if (empty($this->allowed[$name])) unset($this->_blockPatterns[$name]);
  401. }
  402. // parse Texy! document into internal DOM structure
  403. $this->DOM = TexyHtml::el();
  404. if ($singleLine) {
  405. $this->DOM->parseLine($this, $text);
  406. } else {
  407. $this->DOM->parseBlock($this, $text);
  408. }
  409. // user after handler
  410. $this->invokeHandlers('afterParse', array($this, $this->DOM, $singleLine));
  411. // converts internal DOM structure to final HTML code
  412. $html = $this->DOM->toHtml($this);
  413. // this notice should remain
  414. if (self::$advertisingNotice) {
  415. $html .= "\n<!-- by Texy2! -->";
  416. if (self::$advertisingNotice === 'once') {
  417. self::$advertisingNotice = FALSE;
  418. }
  419. }
  420. $this->processing = FALSE;
  421. return TexyUtf::utf2html($html, $this->encoding);
  422. }
  423. /**
  424. * Converts single line in Texy! to (X)HTML code.
  425. *
  426. * @param string input text
  427. * @return string output HTML code
  428. */
  429. public function processLine($text)
  430. {
  431. return $this->process($text, TRUE);
  432. }
  433. /**
  434. * Makes only typographic corrections.
  435. * @param string input text (in encoding defined by Texy::$encoding)
  436. * @return string output text (in UTF-8)
  437. */
  438. public function processTypo($text)
  439. {
  440. // convert to UTF-8 (and check source encoding)
  441. $text = TexyUtf::toUtf($text, $this->encoding);
  442. // standardize line endings and spaces
  443. $text = self::normalize($text);
  444. $this->typographyModule->beforeParse($this, $text);
  445. $text = $this->typographyModule->postLine($text, TRUE);
  446. if (!empty($this->allowed['longwords'])) {
  447. $text = $this->longWordsModule->postLine($text);
  448. }
  449. return TexyUtf::utf2html($text, $this->encoding);
  450. }
  451. /**
  452. * Converts DOM structure to pure text.
  453. * @return string
  454. */
  455. public function toText()
  456. {
  457. if (!$this->DOM) {
  458. throw new InvalidStateException('Call $texy->process() first.');
  459. }
  460. return TexyUtf::utfTo($this->DOM->toText($this), $this->encoding);
  461. }
  462. /**
  463. * Converts internal string representation to final HTML code in UTF-8.
  464. * @return string
  465. */
  466. final public function stringToHtml($s)
  467. {
  468. // decode HTML entities to UTF-8
  469. $s = self::unescapeHtml($s);
  470. // line-postprocessing
  471. $blocks = explode(self::CONTENT_BLOCK, $s);
  472. foreach ($this->postHandlers as $name => $handler) {
  473. if (empty($this->allowed[$name])) continue;
  474. foreach ($blocks as $n => $s) {
  475. if ($n % 2 === 0 && $s !== '') {
  476. $blocks[$n] = call_user_func($handler, $s);
  477. }
  478. }
  479. }
  480. $s = implode(self::CONTENT_BLOCK, $blocks);
  481. // encode < > &
  482. $s = self::escapeHtml($s);
  483. // replace protected marks
  484. $s = $this->unProtect($s);
  485. // wellform and reformat HTML
  486. $this->invokeHandlers('postProcess', array($this, & $s));
  487. // unfreeze spaces
  488. $s = self::unfreezeSpaces($s);
  489. return $s;
  490. }
  491. /**
  492. * Converts internal string representation to final HTML code in UTF-8.
  493. * @return string
  494. */
  495. final public function stringToText($s)
  496. {
  497. $save = $this->htmlOutputModule->lineWrap;
  498. $this->htmlOutputModule->lineWrap = FALSE;
  499. $s = $this->stringToHtml( $s );
  500. $this->htmlOutputModule->lineWrap = $save;
  501. // remove tags
  502. $s = preg_replace('#<(script|style)(.*)</\\1>#Uis', '', $s);
  503. $s = strip_tags($s);
  504. $s = preg_replace('#\n\s*\n\s*\n[\n\s]*\n#', "\n\n", $s);
  505. // entities -> chars
  506. $s = self::unescapeHtml($s);
  507. // convert nbsp to normal space and remove shy
  508. $s = strtr($s, array(
  509. "\xC2\xAD" => '', // shy
  510. "\xC2\xA0" => ' ', // nbsp
  511. ));
  512. return $s;
  513. }
  514. /**
  515. * Add new event handler.
  516. *
  517. * @param string event name
  518. * @param callback
  519. * @return void
  520. */
  521. final public function addHandler($event, $callback)
  522. {
  523. if (!is_callable($callback)) {
  524. $able = is_callable($callback, TRUE, $textual);
  525. throw new InvalidArgumentException("Handler '$textual' is not " . ($able ? 'callable.' : 'valid PHP callback.'));
  526. }
  527. $this->handlers[$event][] = $callback;
  528. }
  529. /**
  530. * Invoke registered around-handlers.
  531. *
  532. * @param string event name
  533. * @param TexyParser actual parser object
  534. * @param array arguments passed into handler
  535. * @return mixed
  536. */
  537. final public function invokeAroundHandlers($event, $parser, $args)
  538. {
  539. if (!isset($this->handlers[$event])) return FALSE;
  540. $invocation = new TexyHandlerInvocation($this->handlers[$event], $parser, $args);
  541. $res = $invocation->proceed();
  542. $invocation->free();
  543. return $res;
  544. }
  545. /**
  546. * Invoke registered after-handlers.
  547. *
  548. * @param string event name
  549. * @param array arguments passed into handler
  550. * @return void
  551. */
  552. final public function invokeHandlers($event, $args)
  553. {
  554. if (!isset($this->handlers[$event])) return;
  555. foreach ($this->handlers[$event] as $handler) {
  556. call_user_func_array($handler, $args);
  557. }
  558. }
  559. /**
  560. * Translate all white spaces (\t \n \r space) to meta-spaces \x01-\x04.
  561. * which are ignored by TexyHtmlOutputModule routine
  562. * @param string
  563. * @return string
  564. */
  565. final public static function freezeSpaces($s)
  566. {
  567. return strtr($s, " \t\r\n", "\x01\x02\x03\x04");
  568. }
  569. /**
  570. * Reverts meta-spaces back to normal spaces.
  571. * @param string
  572. * @return string
  573. */
  574. final public static function unfreezeSpaces($s)
  575. {
  576. return strtr($s, "\x01\x02\x03\x04", " \t\r\n");
  577. }
  578. /**
  579. * Removes special controls characters and normalizes line endings and spaces.
  580. * @param string
  581. * @return string
  582. */
  583. final public static function normalize($s)
  584. {
  585. // standardize line endings to unix-like
  586. $s = str_replace("\r\n", "\n", $s); // DOS
  587. $s = strtr($s, "\r", "\n"); // Mac
  588. // remove special chars; leave \t + \n
  589. $s = preg_replace('#[\x00-\x08\x0B-\x1F]+#', '', $s);
  590. // right trim
  591. $s = preg_replace("#[\t ]+$#m", '', $s);
  592. // trailing spaces
  593. $s = trim($s, "\n");
  594. return $s;
  595. }
  596. /**
  597. * Converts to web safe characters [a-z0-9-] text.
  598. * @param string
  599. * @param string
  600. * @return string
  601. */
  602. final public static function webalize($s, $charlist = NULL)
  603. {
  604. $s = TexyUtf::utf2ascii($s);
  605. $s = strtolower($s);
  606. $s = preg_replace('#[^a-z0-9'.preg_quote($charlist, '#').']+#', '-', $s);
  607. $s = trim($s, '-');
  608. return $s;
  609. }
  610. /**
  611. * Texy! version of htmlSpecialChars (much faster than htmlSpecialChars!).
  612. * note: &quot; is not encoded!
  613. * @param string
  614. * @return string
  615. */
  616. final public static function escapeHtml($s)
  617. {
  618. return str_replace(array('&', '<', '>'), array('&amp;', '&lt;', '&gt;'), $s);
  619. }
  620. /**
  621. * Texy! version of html_entity_decode (always UTF-8, much faster than original!).
  622. * @param string
  623. * @return string
  624. */
  625. final public static function unescapeHtml($s)
  626. {
  627. if (strpos($s, '&') === FALSE) return $s;
  628. return html_entity_decode($s, ENT_QUOTES, 'UTF-8');
  629. }
  630. /**
  631. * Outdents text block.
  632. * @param string
  633. * @return string
  634. */
  635. final public static function outdent($s)
  636. {
  637. $s = trim($s, "\n");
  638. $spaces = strspn($s, ' ');
  639. if ($spaces) return preg_replace("#^ {1,$spaces}#m", '', $s);
  640. return $s;
  641. }
  642. /**
  643. * Generate unique mark - useful for freezing (folding) some substrings.
  644. * @param string any string to froze
  645. * @param int Texy::CONTENT_* constant
  646. * @return string internal mark
  647. */
  648. final public function protect($child, $contentType)
  649. {
  650. if ($child==='') return '';
  651. $key = $contentType
  652. . strtr(base_convert(count($this->marks), 10, 8), '01234567', "\x18\x19\x1A\x1B\x1C\x1D\x1E\x1F")
  653. . $contentType;
  654. $this->marks[$key] = $child;
  655. return $key;
  656. }
  657. final public function unProtect($html)
  658. {
  659. return strtr($html, $this->marks);
  660. }
  661. /**
  662. * Filters bad URLs.
  663. * @param string user URL
  664. * @param string type: a-anchor, i-image, c-cite
  665. * @return bool
  666. */
  667. final public function checkURL($URL, $type)
  668. {
  669. // absolute URL with scheme? check scheme!
  670. if (!empty($this->urlSchemeFilters[$type])
  671. && preg_match('#'.TEXY_URLSCHEME.'#A', $URL)
  672. && !preg_match($this->urlSchemeFilters[$type], $URL))
  673. return FALSE;
  674. return TRUE;
  675. }
  676. /**
  677. * Is given URL relative?
  678. * @param string URL
  679. * @return bool
  680. */
  681. final public static function isRelative($URL)
  682. {
  683. // check for scheme, or absolute path, or absolute URL
  684. return !preg_match('#'.TEXY_URLSCHEME.'|[\#/?]#A', $URL);
  685. }
  686. /**
  687. * Prepends root to URL, if possible.
  688. * @param string URL
  689. * @param string root
  690. * @return string
  691. */
  692. final public static function prependRoot($URL, $root)
  693. {
  694. if ($root == NULL || !self::isRelative($URL)) return $URL;
  695. return rtrim($root, '/\\') . '/' . $URL;
  696. }
  697. final public function getLinePatterns()
  698. {
  699. return $this->_linePatterns;
  700. }
  701. final public function getBlockPatterns()
  702. {
  703. return $this->_blockPatterns;
  704. }
  705. final public function getDOM()
  706. {
  707. return $this->DOM;
  708. }
  709. private function tabCb($m)
  710. {
  711. return $m[1] . str_repeat(' ', $this->tabWidth - strlen($m[1]) % $this->tabWidth);
  712. }
  713. /**
  714. * PHP garbage collector helper.
  715. */
  716. final public function free()
  717. {
  718. if (version_compare(PHP_VERSION , '5.3', '<')) {
  719. foreach (array_keys(get_object_vars($this)) as $key) {
  720. $this->$key = NULL;
  721. }
  722. }
  723. }
  724. final public function __clone()
  725. {
  726. throw new NotSupportedException('Clone is not supported.');
  727. }
  728. }