PageRenderTime 54ms CodeModel.GetById 27ms RepoModel.GetById 1ms app.codeStats 0ms

/external_lib/HTMLPurifier/HTMLPurifier/Injector/AutoParagraph.php

https://github.com/OwlManAtt/kittokittokitto
PHP | 340 lines | 153 code | 40 blank | 147 comment | 50 complexity | 0d1dc975105ef1208e3228586335abaf MD5 | raw file
  1. <?php
  2. /**
  3. * Injector that auto paragraphs text in the root node based on
  4. * double-spacing.
  5. * @todo Ensure all states are unit tested, including variations as well.
  6. * @todo Make a graph of the flow control for this Injector.
  7. */
  8. class HTMLPurifier_Injector_AutoParagraph extends HTMLPurifier_Injector
  9. {
  10. public $name = 'AutoParagraph';
  11. public $needed = array('p');
  12. private function _pStart() {
  13. $par = new HTMLPurifier_Token_Start('p');
  14. $par->armor['MakeWellFormed_TagClosedError'] = true;
  15. return $par;
  16. }
  17. public function handleText(&$token) {
  18. $text = $token->data;
  19. // Does the current parent allow <p> tags?
  20. if ($this->allowsElement('p')) {
  21. if (empty($this->currentNesting) || strpos($text, "\n\n") !== false) {
  22. // Note that we have differing behavior when dealing with text
  23. // in the anonymous root node, or a node inside the document.
  24. // If the text as a double-newline, the treatment is the same;
  25. // if it doesn't, see the next if-block if you're in the document.
  26. $i = $nesting = null;
  27. if (!$this->forwardUntilEndToken($i, $current, $nesting) && $token->is_whitespace) {
  28. // State 1.1: ... ^ (whitespace, then document end)
  29. // ----
  30. // This is a degenerate case
  31. } else {
  32. // State 1.2: PAR1
  33. // ----
  34. // State 1.3: PAR1\n\nPAR2
  35. // ------------
  36. // State 1.4: <div>PAR1\n\nPAR2 (see State 2)
  37. // ------------
  38. $token = array($this->_pStart());
  39. $this->_splitText($text, $token);
  40. }
  41. } else {
  42. // State 2: <div>PAR1... (similar to 1.4)
  43. // ----
  44. // We're in an element that allows paragraph tags, but we're not
  45. // sure if we're going to need them.
  46. if ($this->_pLookAhead()) {
  47. // State 2.1: <div>PAR1<b>PAR1\n\nPAR2
  48. // ----
  49. // Note: This will always be the first child, since any
  50. // previous inline element would have triggered this very
  51. // same routine, and found the double newline. One possible
  52. // exception would be a comment.
  53. $token = array($this->_pStart(), $token);
  54. } else {
  55. // State 2.2.1: <div>PAR1<div>
  56. // ----
  57. // State 2.2.2: <div>PAR1<b>PAR1</b></div>
  58. // ----
  59. }
  60. }
  61. // Is the current parent a <p> tag?
  62. } elseif (
  63. !empty($this->currentNesting) &&
  64. $this->currentNesting[count($this->currentNesting)-1]->name == 'p'
  65. ) {
  66. // State 3.1: ...<p>PAR1
  67. // ----
  68. // State 3.2: ...<p>PAR1\n\nPAR2
  69. // ------------
  70. $token = array();
  71. $this->_splitText($text, $token);
  72. // Abort!
  73. } else {
  74. // State 4.1: ...<b>PAR1
  75. // ----
  76. // State 4.2: ...<b>PAR1\n\nPAR2
  77. // ------------
  78. }
  79. }
  80. public function handleElement(&$token) {
  81. // We don't have to check if we're already in a <p> tag for block
  82. // tokens, because the tag would have been autoclosed by MakeWellFormed.
  83. if ($this->allowsElement('p')) {
  84. if (!empty($this->currentNesting)) {
  85. if ($this->_isInline($token)) {
  86. // State 1: <div>...<b>
  87. // ---
  88. // Check if this token is adjacent to the parent token
  89. // (seek backwards until token isn't whitespace)
  90. $i = null;
  91. $this->backward($i, $prev);
  92. if (!$prev instanceof HTMLPurifier_Token_Start) {
  93. // Token wasn't adjacent
  94. if (
  95. $prev instanceof HTMLPurifier_Token_Text &&
  96. substr($prev->data, -2) === "\n\n"
  97. ) {
  98. // State 1.1.4: <div><p>PAR1</p>\n\n<b>
  99. // ---
  100. // Quite frankly, this should be handled by splitText
  101. $token = array($this->_pStart(), $token);
  102. } else {
  103. // State 1.1.1: <div><p>PAR1</p><b>
  104. // ---
  105. // State 1.1.2: <div><br /><b>
  106. // ---
  107. // State 1.1.3: <div>PAR<b>
  108. // ---
  109. }
  110. } else {
  111. // State 1.2.1: <div><b>
  112. // ---
  113. // Lookahead to see if <p> is needed.
  114. if ($this->_pLookAhead()) {
  115. // State 1.3.1: <div><b>PAR1\n\nPAR2
  116. // ---
  117. $token = array($this->_pStart(), $token);
  118. } else {
  119. // State 1.3.2: <div><b>PAR1</b></div>
  120. // ---
  121. // State 1.3.3: <div><b>PAR1</b><div></div>\n\n</div>
  122. // ---
  123. }
  124. }
  125. } else {
  126. // State 2.3: ...<div>
  127. // -----
  128. }
  129. } else {
  130. if ($this->_isInline($token)) {
  131. // State 3.1: <b>
  132. // ---
  133. // This is where the {p} tag is inserted, not reflected in
  134. // inputTokens yet, however.
  135. $token = array($this->_pStart(), $token);
  136. } else {
  137. // State 3.2: <div>
  138. // -----
  139. }
  140. $i = null;
  141. if ($this->backward($i, $prev)) {
  142. if (
  143. !$prev instanceof HTMLPurifier_Token_Text
  144. ) {
  145. // State 3.1.1: ...</p>{p}<b>
  146. // ---
  147. // State 3.2.1: ...</p><div>
  148. // -----
  149. if (!is_array($token)) $token = array($token);
  150. array_unshift($token, new HTMLPurifier_Token_Text("\n\n"));
  151. } else {
  152. // State 3.1.2: ...</p>\n\n{p}<b>
  153. // ---
  154. // State 3.2.2: ...</p>\n\n<div>
  155. // -----
  156. // Note: PAR<ELEM> cannot occur because PAR would have been
  157. // wrapped in <p> tags.
  158. }
  159. }
  160. }
  161. } else {
  162. // State 2.2: <ul><li>
  163. // ----
  164. // State 2.4: <p><b>
  165. // ---
  166. }
  167. }
  168. /**
  169. * Splits up a text in paragraph tokens and appends them
  170. * to the result stream that will replace the original
  171. * @param $data String text data that will be processed
  172. * into paragraphs
  173. * @param $result Reference to array of tokens that the
  174. * tags will be appended onto
  175. * @param $config Instance of HTMLPurifier_Config
  176. * @param $context Instance of HTMLPurifier_Context
  177. */
  178. private function _splitText($data, &$result) {
  179. $raw_paragraphs = explode("\n\n", $data);
  180. $paragraphs = array(); // without empty paragraphs
  181. $needs_start = false;
  182. $needs_end = false;
  183. $c = count($raw_paragraphs);
  184. if ($c == 1) {
  185. // There were no double-newlines, abort quickly. In theory this
  186. // should never happen.
  187. $result[] = new HTMLPurifier_Token_Text($data);
  188. return;
  189. }
  190. for ($i = 0; $i < $c; $i++) {
  191. $par = $raw_paragraphs[$i];
  192. if (trim($par) !== '') {
  193. $paragraphs[] = $par;
  194. } else {
  195. if ($i == 0) {
  196. // Double newline at the front
  197. if (empty($result)) {
  198. // The empty result indicates that the AutoParagraph
  199. // injector did not add any start paragraph tokens.
  200. // This means that we have been in a paragraph for
  201. // a while, and the newline means we should start a new one.
  202. $result[] = new HTMLPurifier_Token_End('p');
  203. $result[] = new HTMLPurifier_Token_Text("\n\n");
  204. // However, the start token should only be added if
  205. // there is more processing to be done (i.e. there are
  206. // real paragraphs in here). If there are none, the
  207. // next start paragraph tag will be handled by the
  208. // next call to the injector
  209. $needs_start = true;
  210. } else {
  211. // We just started a new paragraph!
  212. // Reinstate a double-newline for presentation's sake, since
  213. // it was in the source code.
  214. array_unshift($result, new HTMLPurifier_Token_Text("\n\n"));
  215. }
  216. } elseif ($i + 1 == $c) {
  217. // Double newline at the end
  218. // There should be a trailing </p> when we're finally done.
  219. $needs_end = true;
  220. }
  221. }
  222. }
  223. // Check if this was just a giant blob of whitespace. Move this earlier,
  224. // perhaps?
  225. if (empty($paragraphs)) {
  226. return;
  227. }
  228. // Add the start tag indicated by \n\n at the beginning of $data
  229. if ($needs_start) {
  230. $result[] = $this->_pStart();
  231. }
  232. // Append the paragraphs onto the result
  233. foreach ($paragraphs as $par) {
  234. $result[] = new HTMLPurifier_Token_Text($par);
  235. $result[] = new HTMLPurifier_Token_End('p');
  236. $result[] = new HTMLPurifier_Token_Text("\n\n");
  237. $result[] = $this->_pStart();
  238. }
  239. // Remove trailing start token; Injector will handle this later if
  240. // it was indeed needed. This prevents from needing to do a lookahead,
  241. // at the cost of a lookbehind later.
  242. array_pop($result);
  243. // If there is no need for an end tag, remove all of it and let
  244. // MakeWellFormed close it later.
  245. if (!$needs_end) {
  246. array_pop($result); // removes \n\n
  247. array_pop($result); // removes </p>
  248. }
  249. }
  250. /**
  251. * Returns true if passed token is inline (and, ergo, allowed in
  252. * paragraph tags)
  253. */
  254. private function _isInline($token) {
  255. return isset($this->htmlDefinition->info['p']->child->elements[$token->name]);
  256. }
  257. /**
  258. * Looks ahead in the token list and determines whether or not we need
  259. * to insert a <p> tag.
  260. */
  261. private function _pLookAhead() {
  262. $this->current($i, $current);
  263. if ($current instanceof HTMLPurifier_Token_Start) $nesting = 1;
  264. else $nesting = 0;
  265. $ok = false;
  266. while ($this->forwardUntilEndToken($i, $current, $nesting)) {
  267. $result = $this->_checkNeedsP($current);
  268. if ($result !== null) {
  269. $ok = $result;
  270. break;
  271. }
  272. }
  273. return $ok;
  274. }
  275. /**
  276. * Determines if a particular token requires an earlier inline token
  277. * to get a paragraph. This should be used with _forwardUntilEndToken
  278. */
  279. private function _checkNeedsP($current) {
  280. if ($current instanceof HTMLPurifier_Token_Start){
  281. if (!$this->_isInline($current)) {
  282. // <div>PAR1<div>
  283. // ----
  284. // Terminate early, since we hit a block element
  285. return false;
  286. }
  287. } elseif ($current instanceof HTMLPurifier_Token_Text) {
  288. if (strpos($current->data, "\n\n") !== false) {
  289. // <div>PAR1<b>PAR1\n\nPAR2
  290. // ----
  291. return true;
  292. } else {
  293. // <div>PAR1<b>PAR1...
  294. // ----
  295. }
  296. }
  297. return null;
  298. }
  299. }
  300. // vim: et sw=4 sts=4