PageRenderTime 36ms CodeModel.GetById 12ms RepoModel.GetById 0ms app.codeStats 0ms

/system/vendor/htmlpurifier/HTMLPurifier/Injector/AutoParagraph.php

https://github.com/Toushi/flow
PHP | 248 lines | 141 code | 27 blank | 80 comment | 49 complexity | 95b4a97bbdf61d7cc793fd709306161e MD5 | raw file
  1. <?php
  2. /**
  3. * Injector that auto paragraphs text in the root node based on
  4. * double-spacing.
  5. */
  6. class HTMLPurifier_Injector_AutoParagraph extends HTMLPurifier_Injector
  7. {
  8. public $name = 'AutoParagraph';
  9. public $needed = array('p');
  10. private function _pStart() {
  11. $par = new HTMLPurifier_Token_Start('p');
  12. $par->armor['MakeWellFormed_TagClosedError'] = true;
  13. return $par;
  14. }
  15. public function handleText(&$token) {
  16. $text = $token->data;
  17. if (empty($this->currentNesting)) {
  18. if (!$this->allowsElement('p')) return;
  19. // case 1: we're in root node (and it allows paragraphs)
  20. $token = array($this->_pStart());
  21. $this->_splitText($text, $token);
  22. } elseif ($this->currentNesting[count($this->currentNesting)-1]->name == 'p') {
  23. // case 2: we're in a paragraph
  24. $token = array();
  25. $this->_splitText($text, $token);
  26. } elseif ($this->allowsElement('p')) {
  27. // case 3: we're in an element that allows paragraphs
  28. if (strpos($text, "\n\n") !== false) {
  29. // case 3.1: this text node has a double-newline
  30. $token = array($this->_pStart());
  31. $this->_splitText($text, $token);
  32. } else {
  33. $ok = false;
  34. // test if up-coming tokens are either block or have
  35. // a double newline in them
  36. $nesting = 0;
  37. for ($i = $this->inputIndex + 1; isset($this->inputTokens[$i]); $i++) {
  38. if ($this->inputTokens[$i] instanceof HTMLPurifier_Token_Start){
  39. if (!$this->_isInline($this->inputTokens[$i])) {
  40. // we haven't found a double-newline, and
  41. // we've hit a block element, so don't paragraph
  42. $ok = false;
  43. break;
  44. }
  45. $nesting++;
  46. }
  47. if ($this->inputTokens[$i] instanceof HTMLPurifier_Token_End) {
  48. if ($nesting <= 0) break;
  49. $nesting--;
  50. }
  51. if ($this->inputTokens[$i] instanceof HTMLPurifier_Token_Text) {
  52. // found it!
  53. if (strpos($this->inputTokens[$i]->data, "\n\n") !== false) {
  54. $ok = true;
  55. break;
  56. }
  57. }
  58. }
  59. if ($ok) {
  60. // case 3.2: this text node is next to another node
  61. // that will start a paragraph
  62. $token = array($this->_pStart(), $token);
  63. }
  64. }
  65. }
  66. }
  67. public function handleElement(&$token) {
  68. // check if we're inside a tag already
  69. if (!empty($this->currentNesting)) {
  70. if ($this->allowsElement('p')) {
  71. // special case: we're in an element that allows paragraphs
  72. // this token is already paragraph, abort
  73. if ($token->name == 'p') return;
  74. // this token is a block level, abort
  75. if (!$this->_isInline($token)) return;
  76. // check if this token is adjacent to the parent token
  77. $prev = $this->inputTokens[$this->inputIndex - 1];
  78. if (!$prev instanceof HTMLPurifier_Token_Start) {
  79. // not adjacent, we can abort early
  80. // add lead paragraph tag if our token is inline
  81. // and the previous tag was an end paragraph
  82. if (
  83. $prev->name == 'p' && $prev instanceof HTMLPurifier_Token_End &&
  84. $this->_isInline($token)
  85. ) {
  86. $token = array($this->_pStart(), $token);
  87. }
  88. return;
  89. }
  90. // this token is the first child of the element that allows
  91. // paragraph. We have to peek ahead and see whether or not
  92. // there is anything inside that suggests that a paragraph
  93. // will be needed
  94. $ok = false;
  95. // maintain a mini-nesting counter, this lets us bail out
  96. // early if possible
  97. $j = 1; // current nesting, one is due to parent (we recalculate current token)
  98. for ($i = $this->inputIndex; isset($this->inputTokens[$i]); $i++) {
  99. if ($this->inputTokens[$i] instanceof HTMLPurifier_Token_Start) $j++;
  100. if ($this->inputTokens[$i] instanceof HTMLPurifier_Token_End) $j--;
  101. if ($this->inputTokens[$i] instanceof HTMLPurifier_Token_Text) {
  102. if (strpos($this->inputTokens[$i]->data, "\n\n") !== false) {
  103. $ok = true;
  104. break;
  105. }
  106. }
  107. if ($j <= 0) break;
  108. }
  109. if ($ok) {
  110. $token = array($this->_pStart(), $token);
  111. }
  112. }
  113. return;
  114. }
  115. // check if the start tag counts as a "block" element
  116. if (!$this->_isInline($token)) return;
  117. // append a paragraph tag before the token
  118. $token = array($this->_pStart(), $token);
  119. }
  120. /**
  121. * Splits up a text in paragraph tokens and appends them
  122. * to the result stream that will replace the original
  123. * @param $data String text data that will be processed
  124. * into paragraphs
  125. * @param $result Reference to array of tokens that the
  126. * tags will be appended onto
  127. * @param $config Instance of HTMLPurifier_Config
  128. * @param $context Instance of HTMLPurifier_Context
  129. */
  130. private function _splitText($data, &$result) {
  131. $raw_paragraphs = explode("\n\n", $data);
  132. // remove empty paragraphs
  133. $paragraphs = array();
  134. $needs_start = false;
  135. $needs_end = false;
  136. $c = count($raw_paragraphs);
  137. if ($c == 1) {
  138. // there were no double-newlines, abort quickly
  139. $result[] = new HTMLPurifier_Token_Text($data);
  140. return;
  141. }
  142. for ($i = 0; $i < $c; $i++) {
  143. $par = $raw_paragraphs[$i];
  144. if (trim($par) !== '') {
  145. $paragraphs[] = $par;
  146. continue;
  147. }
  148. if ($i == 0 && empty($result)) {
  149. // The empty result indicates that the AutoParagraph
  150. // injector did not add any start paragraph tokens.
  151. // The fact that the first paragraph is empty indicates
  152. // that there was a double-newline at the start of the
  153. // data.
  154. // Combined together, this means that we are in a paragraph,
  155. // and the newline means we should start a new one.
  156. $result[] = new HTMLPurifier_Token_End('p');
  157. // However, the start token should only be added if
  158. // there is more processing to be done (i.e. there are
  159. // real paragraphs in here). If there are none, the
  160. // next start paragraph tag will be handled by the
  161. // next run-around the injector
  162. $needs_start = true;
  163. } elseif ($i + 1 == $c) {
  164. // a double-paragraph at the end indicates that
  165. // there is an overriding need to start a new paragraph
  166. // for the next section. This has no effect until
  167. // we've processed all of the other paragraphs though
  168. $needs_end = true;
  169. }
  170. }
  171. // check if there are no "real" paragraphs to be processed
  172. if (empty($paragraphs)) {
  173. return;
  174. }
  175. // add a start tag if an end tag was added while processing
  176. // the raw paragraphs (that happens if there's a leading double
  177. // newline)
  178. if ($needs_start) $result[] = $this->_pStart();
  179. // append the paragraphs onto the result
  180. foreach ($paragraphs as $par) {
  181. $result[] = new HTMLPurifier_Token_Text($par);
  182. $result[] = new HTMLPurifier_Token_End('p');
  183. $result[] = $this->_pStart();
  184. }
  185. // remove trailing start token, if one is needed, it will
  186. // be handled the next time this injector is called
  187. array_pop($result);
  188. // check the outside to determine whether or not the
  189. // end paragraph tag should be removed. It should be removed
  190. // unless the next non-whitespace token is a paragraph
  191. // or a block element.
  192. $remove_paragraph_end = true;
  193. if (!$needs_end) {
  194. // Start of the checks one after the current token's index
  195. for ($i = $this->inputIndex + 1; isset($this->inputTokens[$i]); $i++) {
  196. if ($this->inputTokens[$i] instanceof HTMLPurifier_Token_Start || $this->inputTokens[$i] instanceof HTMLPurifier_Token_Empty) {
  197. $remove_paragraph_end = $this->_isInline($this->inputTokens[$i]);
  198. }
  199. // check if we can abort early (whitespace means we carry-on!)
  200. if ($this->inputTokens[$i] instanceof HTMLPurifier_Token_Text && !$this->inputTokens[$i]->is_whitespace) break;
  201. // end tags will automatically be handled by MakeWellFormed,
  202. // so we don't have to worry about them
  203. if ($this->inputTokens[$i] instanceof HTMLPurifier_Token_End) break;
  204. }
  205. } else {
  206. $remove_paragraph_end = false;
  207. }
  208. // check the outside to determine whether or not the
  209. // end paragraph tag should be removed
  210. if ($remove_paragraph_end) {
  211. array_pop($result);
  212. }
  213. }
  214. /**
  215. * Returns true if passed token is inline (and, ergo, allowed in
  216. * paragraph tags)
  217. */
  218. private function _isInline($token) {
  219. return isset($this->htmlDefinition->info['p']->child->elements[$token->name]);
  220. }
  221. }