PageRenderTime 29ms CodeModel.GetById 18ms RepoModel.GetById 1ms app.codeStats 0ms

/src/PhpWord/Shared/Html.php

https://github.com/navnorth/PHPWord
PHP | 370 lines | 181 code | 38 blank | 151 comment | 21 complexity | 3e1c2a87702bf95279ee969839e793c8 MD5 | raw file
Possible License(s): GPL-3.0, LGPL-3.0
  1. <?php
  2. /**
  3. * This file is part of PHPWord - A pure PHP library for reading and writing
  4. * word processing documents.
  5. *
  6. * PHPWord is free software distributed under the terms of the GNU Lesser
  7. * General Public License version 3 as published by the Free Software Foundation.
  8. *
  9. * For the full copyright and license information, please read the LICENSE
  10. * file that was distributed with this source code. For the full list of
  11. * contributors, visit https://github.com/PHPOffice/PHPWord/contributors.
  12. *
  13. * @link https://github.com/PHPOffice/PHPWord
  14. * @copyright 2010-2014 PHPWord contributors
  15. * @license http://www.gnu.org/licenses/lgpl.txt LGPL version 3
  16. */
  17. namespace PhpOffice\PhpWord\Shared;
  18. use PhpOffice\PhpWord\Element\AbstractContainer;
  19. /**
  20. * Common Html functions
  21. *
  22. * @SuppressWarnings(PHPMD.UnusedPrivateMethod) For readWPNode
  23. */
  24. class Html
  25. {
  26. /**
  27. * Add HTML parts
  28. *
  29. * Note: $stylesheet parameter is removed to avoid PHPMD error for unused parameter
  30. *
  31. * @param \PhpOffice\PhpWord\Element\AbstractContainer $element Where the parts need to be added
  32. * @param string $html The code to parse
  33. * @param bool $fullHTML If it's a full HTML, no need to add 'body' tag
  34. */
  35. public static function addHtml($element, $html, $fullHTML = false)
  36. {
  37. /*
  38. * @todo parse $stylesheet for default styles. Should result in an array based on id, class and element,
  39. * which could be applied when such an element occurs in the parseNode function.
  40. */
  41. // Preprocess: remove all line ends, decode HTML entity, and add body tag for HTML fragments
  42. $html = str_replace(array("\n", "\r"), '', $html);
  43. $html = html_entity_decode($html);
  44. if ($fullHTML === false) {
  45. $html = '<body>' . $html . '</body>';
  46. }
  47. // Load DOM
  48. $dom = new \DOMDocument();
  49. $dom->preserveWhiteSpace = true;
  50. $dom->loadXML($html);
  51. $node = $dom->getElementsByTagName('body');
  52. self::parseNode($node->item(0), $element);
  53. }
  54. /**
  55. * parse Inline style of a node
  56. *
  57. * @param \DOMNode $node Node to check on attributes and to compile a style array
  58. * @param array $styles is supplied, the inline style attributes are added to the already existing style
  59. * @return array
  60. */
  61. protected static function parseInlineStyle($node, $styles = array())
  62. {
  63. if ($node->nodeType == XML_ELEMENT_NODE) {
  64. $attributes = $node->attributes; // get all the attributes(eg: id, class)
  65. foreach ($attributes as $attribute) {
  66. switch ($attribute->name) {
  67. case 'style':
  68. $styles = self::parseStyle($attribute, $styles);
  69. break;
  70. }
  71. }
  72. }
  73. return $styles;
  74. }
  75. /**
  76. * Parse a node and add a corresponding element to the parent element
  77. *
  78. * @param \DOMNode $node node to parse
  79. * @param \PhpOffice\PhpWord\Element\AbstractContainer $element object to add an element corresponding with the node
  80. * @param array $styles Array with all styles
  81. * @param array $data Array to transport data to a next level in the DOM tree, for example level of listitems
  82. */
  83. protected static function parseNode($node, $element, $styles = array(), $data = array())
  84. {
  85. // Populate styles array
  86. $styleTypes = array('font', 'paragraph', 'list');
  87. foreach ($styleTypes as $styleType) {
  88. if (!isset($styles[$styleType])) {
  89. $styles[$styleType] = array();
  90. }
  91. }
  92. // Node mapping table
  93. $nodes = array(
  94. // $method $node $element $styles $data $argument1 $argument2
  95. 'p' => array('Paragraph', $node, $element, $styles, null, null, null),
  96. 'h1' => array('Heading', null, $element, $styles, null, 'Heading1', null),
  97. 'h2' => array('Heading', null, $element, $styles, null, 'Heading2', null),
  98. 'h3' => array('Heading', null, $element, $styles, null, 'Heading3', null),
  99. 'h4' => array('Heading', null, $element, $styles, null, 'Heading4', null),
  100. 'h5' => array('Heading', null, $element, $styles, null, 'Heading5', null),
  101. 'h6' => array('Heading', null, $element, $styles, null, 'Heading6', null),
  102. '#text' => array('Text', $node, $element, $styles, null, null, null),
  103. 'strong' => array('Property', null, null, $styles, null, 'bold', true),
  104. 'em' => array('Property', null, null, $styles, null, 'italic', true),
  105. 'sup' => array('Property', null, null, $styles, null, 'superScript', true),
  106. 'sub' => array('Property', null, null, $styles, null, 'subScript', true),
  107. 'table' => array('Table', $node, $element, $styles, null, 'addTable', true),
  108. 'tr' => array('Table', $node, $element, $styles, null, 'addRow', true),
  109. 'td' => array('Table', $node, $element, $styles, null, 'addCell', true),
  110. 'ul' => array('List', null, null, $styles, $data, 3, null),
  111. 'ol' => array('List', null, null, $styles, $data, 7, null),
  112. 'li' => array('ListItem', $node, $element, $styles, $data, null, null),
  113. );
  114. $newElement = null;
  115. $keys = array('node', 'element', 'styles', 'data', 'argument1', 'argument2');
  116. if (array_key_exists($node->nodeName, $nodes)) {
  117. // Execute method based on node mapping table and return $newElement or null
  118. // Arguments are passed by reference
  119. $arguments = array();
  120. $args = array();
  121. list($method, $args[0], $args[1], $args[2], $args[3], $args[4], $args[5]) = $nodes[$node->nodeName];
  122. for ($i = 0; $i <= 5; $i++) {
  123. if ($args[$i] !== null) {
  124. $arguments[$keys[$i]] = &$args[$i];
  125. }
  126. }
  127. $method = "parse{$method}";
  128. $newElement = call_user_func_array(array('PhpOffice\PhpWord\Shared\Html', $method), $arguments);
  129. // Retrieve back variables from arguments
  130. foreach ($keys as $key) {
  131. if (array_key_exists($key, $arguments)) {
  132. $$key = $arguments[$key];
  133. }
  134. }
  135. }
  136. if ($newElement === null) {
  137. $newElement = $element;
  138. }
  139. self::parseChildNodes($node, $newElement, $styles, $data);
  140. }
  141. /**
  142. * Parse child nodes
  143. *
  144. * @param \DOMNode $node
  145. * @param \PhpOffice\PhpWord\Element\AbstractContainer $element
  146. * @param array $styles
  147. * @param array $data
  148. */
  149. private static function parseChildNodes($node, $element, $styles, $data)
  150. {
  151. if ($node->nodeName != 'li') {
  152. $cNodes = $node->childNodes;
  153. if (count($cNodes) > 0) {
  154. foreach ($cNodes as $cNode) {
  155. if ($element instanceof AbstractContainer) {
  156. self::parseNode($cNode, $element, $styles, $data);
  157. }
  158. }
  159. }
  160. }
  161. }
  162. /**
  163. * Parse paragraph node
  164. *
  165. * @param \DOMNode $node
  166. * @param \PhpOffice\PhpWord\Element\AbstractContainer $element
  167. * @param array $styles
  168. * @return \PhpOffice\PhpWord\Element\TextRun
  169. */
  170. private static function parseParagraph($node, $element, &$styles)
  171. {
  172. $styles['paragraph'] = self::parseInlineStyle($node, $styles['paragraph']);
  173. $newElement = $element->addTextRun($styles['paragraph']);
  174. return $newElement;
  175. }
  176. /**
  177. * Parse heading node
  178. *
  179. * @param \PhpOffice\PhpWord\Element\AbstractContainer $element
  180. * @param array $styles
  181. * @param string $argument1 Name of heading style
  182. * @return \PhpOffice\PhpWord\Element\TextRun
  183. *
  184. * @todo Think of a clever way of defining header styles, now it is only based on the assumption, that
  185. * Heading1 - Heading6 are already defined somewhere
  186. */
  187. private static function parseHeading($element, &$styles, $argument1)
  188. {
  189. $styles['paragraph'] = $argument1;
  190. $newElement = $element->addTextRun($styles['paragraph']);
  191. return $newElement;
  192. }
  193. /**
  194. * Parse text node
  195. *
  196. * @param \DOMNode $node
  197. * @param \PhpOffice\PhpWord\Element\AbstractContainer $element
  198. * @param array $styles
  199. * @return null
  200. */
  201. private static function parseText($node, $element, &$styles)
  202. {
  203. $styles['font'] = self::parseInlineStyle($node, $styles['font']);
  204. // Commented as source of bug #257. `method_exists` doesn't seems to work properly in this case.
  205. // @todo Find better error checking for this one
  206. // if (method_exists($element, 'addText')) {
  207. $element->addText($node->nodeValue, $styles['font'], $styles['paragraph']);
  208. // }
  209. return null;
  210. }
  211. /**
  212. * Parse property node
  213. *
  214. * @param array $styles
  215. * @param string $argument1 Style name
  216. * @param string $argument2 Style value
  217. * @return null
  218. */
  219. private static function parseProperty(&$styles, $argument1, $argument2)
  220. {
  221. $styles['font'][$argument1] = $argument2;
  222. return null;
  223. }
  224. /**
  225. * Parse table node
  226. *
  227. * @param \DOMNode $node
  228. * @param \PhpOffice\PhpWord\Element\AbstractContainer $element
  229. * @param array $styles
  230. * @param string $argument1 Method name
  231. * @return \PhpOffice\PhpWord\Element\AbstractContainer $element
  232. *
  233. * @todo As soon as TableItem, RowItem and CellItem support relative width and height
  234. */
  235. private static function parseTable($node, $element, &$styles, $argument1)
  236. {
  237. $styles['paragraph'] = self::parseInlineStyle($node, $styles['paragraph']);
  238. $newElement = $element->$argument1();
  239. // $attributes = $node->attributes;
  240. // if ($attributes->getNamedItem('width') !== null) {
  241. // $newElement->setWidth($attributes->getNamedItem('width')->value);
  242. // }
  243. // if ($attributes->getNamedItem('height') !== null) {
  244. // $newElement->setHeight($attributes->getNamedItem('height')->value);
  245. // }
  246. // if ($attributes->getNamedItem('width') !== null) {
  247. // $newElement=$element->addCell($width=$attributes->getNamedItem('width')->value);
  248. // }
  249. return $newElement;
  250. }
  251. /**
  252. * Parse list node
  253. *
  254. * @param array $styles
  255. * @param array $data
  256. * @param string $argument1 List type
  257. * @return null
  258. */
  259. private static function parseList(&$styles, &$data, $argument1)
  260. {
  261. if (isset($data['listdepth'])) {
  262. $data['listdepth']++;
  263. } else {
  264. $data['listdepth'] = 0;
  265. }
  266. $styles['list']['listType'] = $argument1;
  267. return null;
  268. }
  269. /**
  270. * Parse list item node
  271. *
  272. * @param \DOMNode $node
  273. * @param \PhpOffice\PhpWord\Element\AbstractContainer $element
  274. * @param array $styles
  275. * @param array $data
  276. * @return null
  277. *
  278. * @todo This function is almost the same like `parseChildNodes`. Merged?
  279. * @todo As soon as ListItem inherits from AbstractContainer or TextRun delete parsing part of childNodes
  280. */
  281. private static function parseListItem($node, $element, &$styles, $data)
  282. {
  283. $cNodes = $node->childNodes;
  284. if (count($cNodes) > 0) {
  285. $text = '';
  286. foreach ($cNodes as $cNode) {
  287. if ($cNode->nodeName == '#text') {
  288. $text = $cNode->nodeValue;
  289. }
  290. }
  291. $element->addListItem($text, $data['listdepth'], $styles['font'], $styles['list'], $styles['paragraph']);
  292. }
  293. return null;
  294. }
  295. /**
  296. * Parse style
  297. *
  298. * @param \DOMAttr $attribute
  299. * @param array $styles
  300. * @return array
  301. */
  302. private static function parseStyle($attribute, $styles)
  303. {
  304. $properties = explode(';', trim($attribute->value, " \t\n\r\0\x0B;"));
  305. foreach ($properties as $property) {
  306. list($cKey, $cValue) = explode(':', $property, 2);
  307. $cValue = trim($cValue);
  308. switch (trim($cKey)) {
  309. case 'text-decoration':
  310. switch ($cValue) {
  311. case 'underline':
  312. $styles['underline'] = 'single';
  313. break;
  314. case 'line-through':
  315. $styles['strikethrough'] = true;
  316. break;
  317. }
  318. break;
  319. case 'text-align':
  320. $styles['align'] = $cValue;
  321. break;
  322. case 'color':
  323. $styles['color'] = trim($cValue, "#");
  324. break;
  325. case 'background-color':
  326. $styles['bgColor'] = trim($cValue, "#");
  327. break;
  328. }
  329. }
  330. return $styles;
  331. }
  332. }