PageRenderTime 52ms CodeModel.GetById 23ms RepoModel.GetById 1ms app.codeStats 0ms

/library/Zend/Markup/Parser/Textile.php

https://bitbucket.org/fabiancarlos/feature_seguimentos
PHP | 570 lines | 376 code | 66 blank | 128 comment | 70 complexity | 60314734fdaa33f56d712e5a38807eba MD5 | raw file
  1. <?php
  2. /**
  3. * Zend Framework
  4. *
  5. * LICENSE
  6. *
  7. * This source file is subject to the new BSD license that is bundled
  8. * with this package in the file LICENSE.txt.
  9. * It is also available through the world-wide-web at this URL:
  10. * http://framework.zend.com/license/new-bsd
  11. * If you did not receive a copy of the license and are unable to
  12. * obtain it through the world-wide-web, please send an email
  13. * to license@zend.com so we can send you a copy immediately.
  14. *
  15. * @category Zend
  16. * @package Zend_Markup
  17. * @subpackage Parser
  18. * @copyright Copyright (c) 2005-2011 Zend Technologies USA Inc. (http://www.zend.com)
  19. * @license http://framework.zend.com/license/new-bsd New BSD License
  20. * @version $Id: Textile.php 24266 2011-07-24 14:37:43Z ramon $
  21. */
  22. /**
  23. * @see Zend_Markup_TokenList
  24. */
  25. require_once 'Zend/Markup/TokenList.php';
  26. /**
  27. * @see Zend_Markup_Parser_ParserInterface
  28. */
  29. require_once 'Zend/Markup/Parser/ParserInterface.php';
  30. /**
  31. * @category Zend
  32. * @package Zend_Markup
  33. * @subpackage Parser
  34. * @copyright Copyright (c) 2005-2011 Zend Technologies USA Inc. (http://www.zend.com)
  35. * @license http://framework.zend.com/license/new-bsd New BSD License
  36. */
  37. class Zend_Markup_Parser_Textile implements Zend_Markup_Parser_ParserInterface
  38. {
  39. const STATE_SCAN = 0;
  40. const STATE_NEW_PARAGRAPH = 1;
  41. const STATE_NEWLINE = 2;
  42. const MATCH_ATTR_CLASSID = '\((?<attr_class>[a-zA-Z0-9_]+)?(?:\#(?<attr_id>[a-zA-Z0-9_]+))?\)';
  43. const MATCH_ATTR_STYLE = "\{(?<attr_style>[^\}\n]+)\}";
  44. const MATCH_ATTR_LANG = '\[(?<attr_lang>[a-zA-Z_]+)\]';
  45. const MATCH_ATTR_ALIGN = '(?<attr_align>\<\>?|\>|=)';
  46. /**
  47. * Token tree
  48. *
  49. * @var Zend_Markup_TokenList
  50. */
  51. protected $_tree;
  52. /**
  53. * Current token
  54. *
  55. * @var Zend_Markup_Token
  56. */
  57. protected $_current;
  58. /**
  59. * Source to tokenize
  60. *
  61. * @var string
  62. */
  63. protected $_value = '';
  64. /**
  65. * Length of the value
  66. *
  67. * @var int
  68. */
  69. protected $_valueLen = 0;
  70. /**
  71. * Current pointer
  72. *
  73. * @var int
  74. */
  75. protected $_pointer = 0;
  76. /**
  77. * The buffer
  78. *
  79. * @var string
  80. */
  81. protected $_buffer = '';
  82. /**
  83. * Simple tag translation
  84. *
  85. * @var array
  86. */
  87. protected $_simpleTags = array(
  88. '*' => 'strong',
  89. '**' => 'bold',
  90. '_' => 'emphasized',
  91. '__' => 'italic',
  92. '??' => 'citation',
  93. '-' => 'deleted',
  94. '+' => 'insert',
  95. '^' => 'superscript',
  96. '~' => 'subscript',
  97. '%' => 'span',
  98. // these are a little more complicated
  99. '@' => 'code',
  100. '!' => 'img',
  101. );
  102. /**
  103. * Token array
  104. *
  105. * @var array
  106. */
  107. protected $_tokens = array();
  108. /**
  109. * Prepare the parsing of a Textile string, the real parsing is done in {@link _parse()}
  110. *
  111. * @param string $value
  112. *
  113. * @return array
  114. */
  115. public function parse($value)
  116. {
  117. if (!is_string($value)) {
  118. /**
  119. * @see Zend_Markup_Parser_Exception
  120. */
  121. require_once 'Zend/Markup/Parser/Exception.php';
  122. throw new Zend_Markup_Parser_Exception('Value to parse should be a string.');
  123. }
  124. if (empty($value)) {
  125. /**
  126. * @see Zend_Markup_Parser_Exception
  127. */
  128. require_once 'Zend/Markup/Parser/Exception.php';
  129. throw new Zend_Markup_Parser_Exception('Value to parse cannot be left empty.');
  130. }
  131. // first make we only have LF newlines, also trim the value
  132. $this->_value = str_replace(array("\r\n", "\r"), "\n", $value);
  133. $this->_value = trim($this->_value);
  134. // initialize variables and tokenize
  135. $this->_valueLen = iconv_strlen($this->_value, 'UTF-8');
  136. $this->_pointer = 0;
  137. $this->_buffer = '';
  138. $this->_temp = array();
  139. $this->_tokens = array();
  140. $this->_tokenize();
  141. // create the tree
  142. $this->_tree = new Zend_Markup_TokenList();
  143. $this->_current = new Zend_Markup_Token('', Zend_Markup_Token::TYPE_NONE, 'Zend_Markup_Root');
  144. $this->_tree->addChild($this->_current);
  145. $this->_createTree();
  146. return $this->_tree;
  147. }
  148. /**
  149. * Tokenize a textile string
  150. *
  151. * @return array
  152. */
  153. protected function _tokenize()
  154. {
  155. $state = self::STATE_NEW_PARAGRAPH;
  156. $attrsMatch = implode('|', array(
  157. self::MATCH_ATTR_CLASSID,
  158. self::MATCH_ATTR_STYLE,
  159. self::MATCH_ATTR_LANG,
  160. self::MATCH_ATTR_ALIGN
  161. ));
  162. $paragraph = '';
  163. while ($this->_pointer < $this->_valueLen) {
  164. switch ($state) {
  165. case self::STATE_SCAN:
  166. $matches = array(); //[^\n*_?+~%@!-]
  167. $acronym = '(?<acronym>[A-Z]{2,})\((?<title>[^\)]+)\)';
  168. $regex = '#\G(?<text>.*?)(?:'
  169. . "(?:(?<nl_paragraph>\n{2,})|(?<nl_break>\n))|"
  170. . '(?<tag>'
  171. . "(?<name>\*{1,2}|_{1,2}|\?{2}|\-|\+|\~|\^|%|@|!|$|{$acronym}"
  172. . '|":(?<url>[^\s]+)|")'
  173. . "(?:{$attrsMatch})*)"
  174. . ')#si';
  175. preg_match($regex, $this->_value, $matches, null, $this->_pointer);
  176. $this->_pointer += strlen($matches[0]);
  177. if (!empty($matches['text'])) {
  178. $this->_buffer .= $matches['text'];
  179. }
  180. // first add the buffer
  181. if (!empty($this->_buffer)) {
  182. $this->_tokens[] = array(
  183. 'tag' => $this->_buffer,
  184. 'type' => Zend_Markup_Token::TYPE_NONE
  185. );
  186. $this->_buffer = '';
  187. }
  188. if (!empty($matches['nl_paragraph'])) {
  189. $this->_temp = array(
  190. 'tag' => $matches['nl_paragraph'],
  191. 'name' => 'p',
  192. 'type' => Zend_Markup_Token::TYPE_TAG,
  193. 'attributes' => array()
  194. );
  195. $state = self::STATE_NEW_PARAGRAPH;
  196. } elseif (!empty($matches['nl_break'])) {
  197. $this->_tokens[] = array(
  198. 'tag' => $matches['nl_break'],
  199. 'name' => 'break',
  200. 'type' => Zend_Markup_Token::TYPE_TAG,
  201. 'attributes' => array()
  202. );
  203. $state = self::STATE_NEWLINE;
  204. } elseif (!empty($matches['tag'])) {
  205. if (isset($this->_simpleTags[$matches['name']])) {
  206. // now add the new token
  207. $this->_tokens[] = array(
  208. 'tag' => $matches['tag'],
  209. 'type' => Zend_Markup_Token::TYPE_TAG,
  210. 'name' => $this->_simpleTags[$matches['name']],
  211. 'attributes' => $this->_extractAttributes($matches)
  212. );
  213. } else {
  214. $attributes = $this->_extractAttributes($matches);
  215. if ($matches['tag'][0] == '"') {
  216. $name = 'url';
  217. if (isset($matches['url'])) {
  218. $attributes['url'] = $matches['url'];
  219. }
  220. $this->_tokens[] = array(
  221. 'tag' => $matches['tag'],
  222. 'type' => Zend_Markup_Token::TYPE_TAG,
  223. 'name' => $name,
  224. 'attributes' => $attributes
  225. );
  226. } else {
  227. $name = 'acronym';
  228. $this->_tokens[] = array(
  229. 'tag' => '',
  230. 'type' => Zend_Markup_Token::TYPE_TAG,
  231. 'name' => 'acronym',
  232. 'attributes' => array(
  233. 'title' => $matches['title']
  234. )
  235. );
  236. $this->_tokens[] = array(
  237. 'tag' => $matches['acronym'],
  238. 'type' => Zend_Markup_Token::TYPE_NONE
  239. );
  240. $this->_tokens[] = array(
  241. 'tag' => '(' . $matches['title'] . ')',
  242. 'type' => Zend_Markup_Token::TYPE_TAG,
  243. 'name' => 'acronym',
  244. 'attributes' => array()
  245. );
  246. }
  247. }
  248. $state = self::STATE_SCAN;
  249. }
  250. break;
  251. case self::STATE_NEW_PARAGRAPH:
  252. if (empty($this->_temp)) {
  253. $this->_temp = array(
  254. 'tag' => '',
  255. 'name' => 'p',
  256. 'type' => Zend_Markup_Token::TYPE_TAG,
  257. 'attributes' => array()
  258. );
  259. } else {
  260. $this->_tokens[] = array(
  261. 'tag' => "\n",
  262. 'name' => 'p',
  263. 'type' => Zend_Markup_Token::TYPE_TAG,
  264. 'attributes' => array()
  265. );
  266. $this->_temp['tag'] = substr($this->_temp['tag'], 1);
  267. }
  268. $matches = array(); //[^\n*_?+~%@!-] (\()? [^()]+ (?(1)\))
  269. $regex = "#\G(?<name>(h[1-6]|p)|(?:\#|\*))(?:{$attrsMatch})*(?(2)\.\s|\s)#i";
  270. if (!preg_match($regex, $this->_value, $matches, null, $this->_pointer)) {
  271. $this->_tokens[] = $this->_temp;
  272. $state = self::STATE_SCAN;
  273. break;
  274. }
  275. $this->_pointer += strlen($matches[0]);
  276. if ($matches['name'] == 'p') {
  277. $this->_temp['tag'] .= $matches[0];
  278. $this->_temp['attributes'] = $this->_extractAttributes($matches);
  279. $this->_tokens[] = $this->_temp;
  280. $this->_temp = array();
  281. } else {
  282. $this->_tokens[] = $this->_temp;
  283. $this->_temp = array();
  284. $name = $matches['name'];
  285. $attributes = $this->_extractAttributes($matches);
  286. if ($name == '#') {
  287. $name = 'list';
  288. $attributes['list'] = 'decimal';
  289. } elseif ($name == '*') {
  290. $name = 'list';
  291. }
  292. $this->_tokens[] = array(
  293. 'tag' => $matches[0],
  294. 'name' => $name,
  295. 'type' => Zend_Markup_Token::TYPE_TAG,
  296. 'attributes' => $attributes
  297. );
  298. }
  299. $state = self::STATE_SCAN;
  300. break;
  301. case self::STATE_NEWLINE:
  302. $matches = array(); //[^\n*_?+~%@!-]
  303. $regex = "#\G(?<name>(h[1-6])|(?:\#|\*))(?:{$attrsMatch})*(?(2)\.\s|\s)#si";
  304. if (!preg_match($regex, $this->_value, $matches, null, $this->_pointer)) {
  305. $state = self::STATE_SCAN;
  306. break;
  307. }
  308. $this->_pointer += strlen($matches[0]);
  309. $name = $matches['name'];
  310. $attributes = $this->_extractAttributes($matches);
  311. if ($name == '#') {
  312. $name = 'list';
  313. $attributes['list'] = 'decimal';
  314. } elseif ($name == '*') {
  315. $name = 'list';
  316. }
  317. $this->_tokens[] = array(
  318. 'tag' => $matches[0],
  319. 'name' => $name,
  320. 'type' => Zend_Markup_Token::TYPE_TAG,
  321. 'attributes' => $attributes
  322. );
  323. break;
  324. }
  325. }
  326. }
  327. /**
  328. * Create a tree from the tokenized text
  329. *
  330. * @return void
  331. */
  332. protected function _createTree()
  333. {
  334. $inside = true;
  335. foreach ($this->_tokens as $key => $token) {
  336. // first check if the token is a stopper
  337. if ($this->_isStopper($token, $this->_current)) {
  338. if ($this->_current->getName() == 'li') {
  339. // list items are handled differently
  340. if (isset($this->_tokens[$key + 1])
  341. && ($this->_tokens[$key + 1]['type'] == Zend_Markup_Token::TYPE_TAG)
  342. && ($this->_tokens[$key + 1]['name'] == 'list')
  343. ) {
  344. // the next item is a correct tag
  345. $this->_current->setStopper($token['tag']);
  346. $this->_current = $this->_current->getParent();
  347. } else {
  348. // close the list
  349. $this->_current->setStopper($token['tag']);
  350. $this->_current = $this->_current->getParent()->getParent();
  351. // go up in the tree until we found the end
  352. while ($this->_isStopper($token, $this->_current)) {
  353. $this->_current->setStopper($token['tag']);
  354. $this->_current = $this->_current->getParent();
  355. }
  356. }
  357. } else {
  358. // go up in the tree until we found the end of stoppers
  359. while ($this->_isStopper($token, $this->_current)) {
  360. $this->_current->setStopper($token['tag']);
  361. if (!empty($token['attributes'])) {
  362. foreach ($token['attributes'] as $name => $value) {
  363. $this->_current->addAttribute($name, $value);
  364. }
  365. }
  366. $this->_current = $this->_current->getParent();
  367. }
  368. }
  369. $inside = true;
  370. } elseif (($token['type'] == Zend_Markup_Token::TYPE_TAG) && $inside) {
  371. if ($token['name'] == 'break') {
  372. // add the newline and continue parsing
  373. $this->_current->addChild(new Zend_Markup_Token(
  374. $token['tag'],
  375. Zend_Markup_Token::TYPE_NONE,
  376. '',
  377. array(),
  378. $this->_current
  379. ));
  380. } else {
  381. // handle a list item
  382. if ($token['name'] == 'list') {
  383. $attributes = array();
  384. if (isset($token['attributes']['list'])) {
  385. $attributes['list'] = $token['attributes']['list'];
  386. unset($token['attributes']['list']);
  387. }
  388. if ($this->_current->getName() != 'list') {
  389. // the list isn't started yet, create it
  390. $child = new Zend_Markup_Token(
  391. '',
  392. Zend_Markup_Token::TYPE_TAG,
  393. 'list',
  394. $attributes,
  395. $this->_current
  396. );
  397. $this->_current->addChild($child);
  398. $this->_current = $child;
  399. }
  400. $token['name'] = 'li';
  401. } elseif (($token['name'] == 'img') || ($token['name'] == 'url')) {
  402. $inside = false;
  403. }
  404. // add the token
  405. $child = new Zend_Markup_Token(
  406. $token['tag'],
  407. Zend_Markup_Token::TYPE_TAG,
  408. $token['name'],
  409. $token['attributes'],
  410. $this->_current
  411. );
  412. $this->_current->addChild($child);
  413. $this->_current = $child;
  414. }
  415. } else {
  416. // simply add the token as text
  417. $this->_current->addChild(new Zend_Markup_Token(
  418. $token['tag'],
  419. Zend_Markup_Token::TYPE_NONE,
  420. '',
  421. array(),
  422. $this->_current
  423. ));
  424. }
  425. }
  426. }
  427. /**
  428. * Check if a tag is a stopper
  429. *
  430. * @param array $token
  431. * @param Zend_Markup_Token $current
  432. *
  433. * @return bool
  434. */
  435. protected function _isStopper(array $token, Zend_Markup_Token $current)
  436. {
  437. switch ($current->getName()) {
  438. case 'h1':
  439. case 'h2':
  440. case 'h3':
  441. case 'h4':
  442. case 'h5':
  443. case 'h6':
  444. case 'list':
  445. case 'li':
  446. if (($token['type'] == Zend_Markup_Token::TYPE_TAG)
  447. && (($token['name'] == 'break') || ($token['name'] == 'p'))
  448. ) {
  449. return true;
  450. }
  451. break;
  452. case 'break':
  453. return false;
  454. break;
  455. default:
  456. if (($token['type'] == Zend_Markup_Token::TYPE_TAG) && ($token['name'] == $current->getName())) {
  457. return true;
  458. }
  459. break;
  460. }
  461. return false;
  462. }
  463. /**
  464. * Extract the attributes
  465. *
  466. * @param array $matches
  467. *
  468. * @return array
  469. */
  470. protected function _extractAttributes(array $matches)
  471. {
  472. $attributes = array();
  473. if (!empty($matches['attr_class'])) {
  474. $attributes['class'] = $matches['attr_class'];
  475. }
  476. if (!empty($matches['attr_id'])) {
  477. $attributes['id'] = $matches['attr_id'];
  478. }
  479. if (!empty($matches['attr_style'])) {
  480. $attributes['style'] = $matches['attr_style'];
  481. }
  482. if (!empty($matches['attr_lang'])) {
  483. $attributes['lang'] = $matches['attr_lang'];
  484. }
  485. if (!empty($matches['attr_align'])) {
  486. switch ($matches['attr_align']) {
  487. case '=':
  488. $attributes['align'] = 'center';
  489. break;
  490. case '>':
  491. $attributes['align'] = 'right';
  492. break;
  493. case '<>':
  494. $attributes['align'] = 'justify';
  495. break;
  496. default:
  497. case '<':
  498. $attributes['align'] = 'left';
  499. break;
  500. }
  501. }
  502. return $attributes;
  503. }
  504. }