PageRenderTime 56ms CodeModel.GetById 29ms RepoModel.GetById 0ms app.codeStats 0ms

/Template/src/parsers/tst_to_tst/implementations/whitespace_removal.php

https://github.com/Yannix/zetacomponents
PHP | 496 lines | 207 code | 27 blank | 262 comment | 46 complexity | 5dcf1937e25b80733abc404d9b12b749 MD5 | raw file
  1. <?php
  2. /**
  3. * File containing the ezcTemplateWhitespaceRemoval class
  4. *
  5. * Licensed to the Apache Software Foundation (ASF) under one
  6. * or more contributor license agreements. See the NOTICE file
  7. * distributed with this work for additional information
  8. * regarding copyright ownership. The ASF licenses this file
  9. * to you under the Apache License, Version 2.0 (the
  10. * "License"); you may not use this file except in compliance
  11. * with the License. You may obtain a copy of the License at
  12. *
  13. * http://www.apache.org/licenses/LICENSE-2.0
  14. *
  15. * Unless required by applicable law or agreed to in writing,
  16. * software distributed under the License is distributed on an
  17. * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
  18. * KIND, either express or implied. See the License for the
  19. * specific language governing permissions and limitations
  20. * under the License.
  21. *
  22. * @package Template
  23. * @version //autogen//
  24. * @license http://www.apache.org/licenses/LICENSE-2.0 Apache License, Version 2.0
  25. * @access private
  26. */
  27. /**
  28. * Trims away whitespace from parser elements.
  29. *
  30. * This class can perform several types of whitespace removal on the parsed
  31. * result to ensure that the output given to the end user does not contain
  32. * unneccesary whitespaces which can be important in some output contexts. It
  33. * is important to note that this will only remove whitespace from the parsed
  34. * result and is not applied at run-time, this means that whitespace in
  35. * outputted strings are kept.
  36. *
  37. * The various removal types are configurable in the constructor which allows
  38. * it to be tailored to what is set as the current output context, e.g.
  39. * whitespace removal for plain text might be different than XHTML output.
  40. * Controlling the removal types are done with boolean switches in member
  41. * variables, they are:
  42. *
  43. * - $trimTrailing - If enabled it will remove the trailing whitespace from
  44. * text blocks after the last block in the code, it checks each line to see if
  45. * it contains whitespace only and if it does the line is removed.
  46. * - $trimLeading - Same as $trimTrailing but the trimming is done for the
  47. * leading lines of the text block found before the first block in the code.
  48. * - $trimBlockEol - Trims away the whitespace and newline after all command
  49. * blocks, this essentially makes the block line disappear from the output.
  50. * - $trimIndent - Trims away whitespace for each line in each block level by
  51. * using the minimum column as the last trimming point. All lines in the same
  52. * block level will get the same amount of whitespace removed.
  53. *
  54. * Example of leading whitespace removal:
  55. * <code>
  56. * "\n" .
  57. * " " .
  58. * " some text\n" .
  59. * " "
  60. * becomes:
  61. * " some text\n" .
  62. * " "
  63. * </code>
  64. * here whitespace is kept after the first non-whitespace line.
  65. *
  66. * Example of trailing whitespace removal:
  67. * <code>
  68. * "\n" .
  69. * " " .
  70. * " some text\n" .
  71. * " "
  72. * becomes:
  73. * "\n" .
  74. * " " .
  75. * " some text"
  76. * </code>
  77. * here whitespace is kept before the first non-whitespace line.
  78. *
  79. * Example of block-line eol removal:
  80. * <code>
  81. * "{if}\n" .
  82. * " {$item}\n" .
  83. * "{/if} \n"
  84. * becomes
  85. * "{if}" .
  86. * " {$item}\n" .
  87. * "{/if}"
  88. * </code>
  89. *
  90. * here the whitespace with EOL marker is removed only at the end of the the
  91. * block line, this ensures that critical newlines are kept for the {$item}
  92. * code and that the {if} block do not add extra newlines
  93. *
  94. * Example of indent removal:
  95. * <code>
  96. * "{if}\n" .
  97. * " {$item}\n" .
  98. * "{/if} \n"
  99. * becomes
  100. * "{if}" .
  101. * " {$item}\n" .
  102. * "{/if}"
  103. * </code>
  104. *
  105. * here the whitespace with EOL marker is removed only at the end of the the
  106. * block line, this ensures that critical newlines are kept for the {$item}
  107. * code and that the {if} block do not add extra newlines
  108. *
  109. * @package Template
  110. * @version //autogen//
  111. * @access private
  112. */
  113. class ezcTemplateWhitespaceRemoval
  114. {
  115. /**
  116. *
  117. */
  118. public function __construct()
  119. {
  120. $this->trimTrailing = true; // Remove trailing whitespace after last block
  121. $this->trimLeading = true; // Remove leading whitespace before first block
  122. $this->trimBlockEol = true; // Remove whitespace found on the same line as the end of the block definition
  123. $this->trimIndent = true; // Remove indentation whitespace until the minimum column of the current column is reached
  124. $this->tabSize = 8; // Size of vertical tabs
  125. }
  126. /**
  127. * Trims away trailing and leading whitespace lines from the top of the
  128. * element tree.
  129. *
  130. * @param ezcTemplateProgramTstNode $tree The program element for the tree.
  131. */
  132. public function trimProgram( ezcTemplateProgramTstNode $tree )
  133. {
  134. if ( !$tree->hasChildren() )
  135. {
  136. // echo "has no children\n";
  137. return;
  138. }
  139. if ( $this->trimLeading )
  140. {
  141. // echo "trim leading\n";
  142. $child = $tree->getFirstChild();
  143. // We only touch text block elements, not literal text
  144. if ( $child instanceof ezcTemplateTextBlockTstNode )
  145. {
  146. $lines = $this->trimLeading( $child->lines );
  147. // Set back modified lines if they are modified
  148. if ( $lines !== false )
  149. {
  150. $child->setTextLines( $lines );
  151. }
  152. }
  153. else
  154. {
  155. // echo "first child is not text\n";
  156. }
  157. }
  158. if ( $this->trimTrailing )
  159. {
  160. // echo "trim trailing\n";
  161. $child = $tree->getLastChild();
  162. // We only touch text block elements, not literal text
  163. if ( $child instanceof ezcTemplateTextBlockTstNode )
  164. {
  165. $lines = $this->trimTrailing( $child->lines );
  166. // Set back modified lines if they are modified
  167. if ( $lines !== false )
  168. {
  169. $child->setTextLines( $lines );
  170. }
  171. }
  172. else
  173. {
  174. // echo "last child is not text\n";
  175. }
  176. }
  177. }
  178. /**
  179. * Trim away the minimum indentation level for all elements in $elements.
  180. *
  181. * @param ezcTemplateBlockTstNode $parentBlock The block which owns elements in $elements.
  182. * @param array(ezcTemplateTstNode) $elements List of elements to trim.
  183. */
  184. public function trimBlockLevelIndentation( ezcTemplateTstNode $parentBlock, Array $elements )
  185. {
  186. // First figure out the smallest amount of indentation that can be removed
  187. $indentation = $parentBlock->minimumWhitespaceColumn();
  188. $nrOfElements = sizeof( $elements );
  189. for ( $el = 0; $el < $nrOfElements ; $el++ )
  190. {
  191. $element = $elements[$el];
  192. if ( $element instanceof ezcTemplateTextBlockTstNode )
  193. {
  194. $lines = $element->lines;
  195. $count = count( $lines );
  196. for ( $i = 0; $i < $count; ++$i )
  197. {
  198. // Skip the first line if it is placed after another element (column > 0 ).
  199. // We can only modify lines with leading point at column 0.
  200. if ( $i == 0 && $element->firstLineColumn() > 0 )
  201. {
  202. // It prevents some text nodes from removal.
  203. continue;
  204. }
  205. // Trim the line and leave EOL alone
  206. $lines[$i][0] = $this->trimIndentationLine( $lines[$i][0], $indentation );
  207. if ( $i == $count - 1 )
  208. {
  209. if ( $el < $nrOfElements - 1 )
  210. {
  211. if ( $elements[ $el + 1 ] instanceof ezcTemplateBlockTstNode && !( $elements[ $el + 1 ] instanceof ezcTemplateOutputBlockTstNode ) )
  212. {
  213. $trimmed = trim( $lines[$i][0], " \t" );
  214. if ( strlen( $trimmed ) == 0 )
  215. {
  216. $lines[$i][0] = "";
  217. }
  218. }
  219. }
  220. else
  221. {
  222. if ( $parentBlock instanceof ezcTemplateBlockTstNode && !( $parentBlock instanceof ezcTemplateOutputBlockTstNode ) )
  223. {
  224. $last = sizeof( $lines ) -1;
  225. $trimmed = trim( $lines[$last][0], " \t" );
  226. if ( strlen ( trim( $lines[$last][0], " \t") ) == 0 )
  227. {
  228. $lines[ $last ][0] = "";
  229. }
  230. }
  231. }
  232. }
  233. }
  234. $element->setTextLines( $lines );
  235. }
  236. elseif ( $element instanceof ezcTemplateConditionBodyTstNode )
  237. {
  238. $this->trimBlockLevelIndentation( $element, $element->children );
  239. }
  240. }
  241. }
  242. /**
  243. * Trim away the excess whitespace which makes up the block lines.
  244. *
  245. * It will examine all elements in $elements until it finds a text block.
  246. * If the text block is found directly after a block element (of any kind)
  247. * it will trim the first line of the text.
  248. *
  249. * @param ezcTemplateBlockTstNode $parentBlock
  250. * The block which owns the text element.
  251. * @param array(ezcTemplateTstNode) $elements
  252. * Element list to check for block objects.
  253. * Note: The block line is considered the first line of a text block placed
  254. */
  255. public function trimBlockLines( ezcTemplateTstNode $parentBlock, Array $elements )
  256. {
  257. // Trim after all sub-blocks
  258. $previousSibling = null;
  259. foreach ( $elements as $element )
  260. {
  261. if ( $element instanceof ezcTemplateTextTstNode &&
  262. ( $previousSibling instanceof ezcTemplateBlockTstNode &&
  263. !$previousSibling instanceof ezcTemplateOutputBlockTstNode ) )
  264. {
  265. // This text element is placed directly after a block element
  266. // so we need to trim it.
  267. $this->trimBlockLine( $parentBlock, $element );
  268. }
  269. $previousSibling = $element;
  270. }
  271. }
  272. /**
  273. * Trim away the excess whitespace which makes up the block line.
  274. * after a block element.
  275. *
  276. * The first line of the text block is examined, if it contains whitespace
  277. * only the line will be emptied and the EOL marker is disabled (set to
  278. * false).
  279. *
  280. * @see ezcTemplateTextTstNode::setTextLines for details of the line format
  281. * of text blocks.
  282. * @param ezcTemplateBlockTstNode $parentBlock
  283. * The block which owns the text element.
  284. * @param ezcTemplateTextTstNode $textElement
  285. * Text element to trim.
  286. * Note: The block line is considered the first line of a text block placed
  287. */
  288. public function trimBlockLine( ezcTemplateTstNode $parentBlock, ezcTemplateTextTstNode $textElement )
  289. {
  290. $lines = $textElement->lines;
  291. if ( count( $lines ) == 0 )
  292. return;
  293. $line = $lines[0];
  294. // Find first non-whitespace character, if we find one we cannot trim
  295. if ( preg_match( "#[^ \t\x0B]#", $line[0] ) )
  296. {
  297. return;
  298. }
  299. // Clear line text and EOL marker
  300. $line[0] = '';
  301. $line[1] = false;
  302. $lines[0] = $line;
  303. $textElement->setTextLines( $lines );
  304. }
  305. /**
  306. * Trims characters in the text line $line until the required indentation
  307. * level is reached.
  308. * The function will check for vertical tabs \t and handle that specially
  309. * by using self::$tabSize for size.
  310. * If the line is too short it will become an empty string.
  311. *
  312. * Note: If the indentation is stops within a vertical tab, the returned
  313. * string will start right after the tab character.
  314. * @param string $line
  315. * A text string contain a line but without the EOL marker.
  316. * @param int $indentation
  317. * The required indentation level.
  318. * @return string
  319. */
  320. public function trimIndentationLine( $line, $indentation )
  321. {
  322. $len = strlen( $line );
  323. $i = 0;
  324. $column = 0;
  325. while ( $i < $len )
  326. {
  327. if ( $column == $indentation )
  328. {
  329. return (string)substr( $line, $i );
  330. }
  331. // Vertical tabs need special care
  332. if ( $line[$i] == "\t" )
  333. {
  334. $tabCharacters = $column % $this->tabSize;
  335. $tabLeft = $this->tabSize - $tabCharacters;
  336. $column += $tabLeft;
  337. if ( $column >= $indentation )
  338. {
  339. // Return string after tab character
  340. return (string)substr( $line, $i + 1 );
  341. }
  342. }
  343. else
  344. {
  345. if ( $column >= $indentation )
  346. {
  347. return (string)substr( $line, $i );
  348. }
  349. ++$column;
  350. }
  351. ++$i;
  352. }
  353. // Identation is larger than string so we return an empty one.
  354. return '';
  355. }
  356. /**
  357. * Removes all lines (from the start) which are empty after trimming.
  358. * As soon as a non-empty line is found it stops the process and keeps the
  359. * rest of the lines and returns the modified lines or false it nothing
  360. * was modified.
  361. *
  362. * For instance the text:
  363. * <code>
  364. * " \n" .
  365. * "\n" .
  366. * " abc\n" .
  367. * " \n" .
  368. * "\n"
  369. * </code>
  370. * Will be turned into:
  371. * <code>
  372. * " abc\n" .
  373. * " \n" .
  374. * "\n"
  375. * </code>
  376. *
  377. * @param array(array) $lines The text lines to trim.
  378. * @return array(array)/false
  379. */
  380. public function trimLeading( $lines )
  381. {
  382. $count = count( $lines );
  383. for ( $i = 0; $i < $count; ++$i )
  384. {
  385. $line = $lines[$i];
  386. $lineText = ltrim( $line[0] );
  387. if ( strlen( $lineText ) != 0 )
  388. {
  389. break;
  390. }
  391. // The line is empty so we disable the line by setting empty line
  392. // text and removing the EOL marker.
  393. $line[0] = $lineText;
  394. $line[1] = false;
  395. $lines[$i] = $line;
  396. }
  397. // If $i is 0 it means no lines have been modified.
  398. if ( $i == 0 )
  399. return false;
  400. return $lines;
  401. }
  402. /**
  403. * Removes all lines (from the end) which are empty after trimming.
  404. * As soon as a non-empty line is found it stops the process and keeps the
  405. * rest of the lines and returns the modified lines, or false it nothing
  406. * was modified.
  407. *
  408. * For instance the text:
  409. * <code>
  410. * " \n" .
  411. * "\n" .
  412. * " abc\n" .
  413. * " \n" .
  414. * "\n"
  415. * </code>
  416. * Will be turned into:
  417. * <code>
  418. * " \n" .
  419. * "\n" .
  420. * " abc\n"
  421. * </code>
  422. *
  423. * and the text:
  424. * <code>
  425. * " \n" .
  426. * "\r\n" .
  427. * " \r" .
  428. * " \n" .
  429. * "\n"
  430. * </code>
  431. * Will be turned into:
  432. * <code>
  433. * "\n"
  434. * </code>
  435. *
  436. * @param array(array) $lines The text lines to trim.
  437. * @return array(array)/false
  438. */
  439. public function trimTrailing( $lines )
  440. {
  441. $count = count( $lines );
  442. for ( $i = $count - 1; $i >= 0; --$i )
  443. {
  444. $line = $lines[$i];
  445. $lineText = rtrim( $line[0] );
  446. if ( strlen( $lineText ) != 0 )
  447. {
  448. // EOL marker is kept as it is, while line is replaced with new trimmed text
  449. $lines[$i] = $line;
  450. break;
  451. }
  452. // The line is empty so we disable the line by setting empty line
  453. // text and removing the EOL marker.
  454. $line[0] = $lineText;
  455. // Keep the EOL marker if this is the top-most line
  456. if ( $i > 0 )
  457. {
  458. $line[1] = false;
  459. }
  460. $lines[$i] = $line;
  461. }
  462. // If $i is is the same as the starting iteration value
  463. // it means no lines have been modified.
  464. if ( $i == $count - 1 )
  465. return false;
  466. return $lines;
  467. }
  468. }
  469. ?>