PageRenderTime 35ms CodeModel.GetById 13ms RepoModel.GetById 0ms app.codeStats 1ms

/src/PHPagstract/Token/Tokens/Pagstract/PagstractAbstractToken.php

https://gitlab.com/php.bjoernbartels.earth/phpagstract
PHP | 535 lines | 323 code | 67 blank | 145 comment | 33 complexity | 5c4ee10864bd9c600f54204bbd91be33 MD5 | raw file
  1. <?php
  2. /**
  3. * Pagstract token abstract class
  4. */
  5. namespace PHPagstract\Token\Tokens;
  6. use PHPagstract\Token\Exception\TokenizerException;
  7. use PHPagstract\Token\PagstractTokenizer;
  8. /**
  9. * Pagstract token abstract class
  10. *
  11. * @package PHPagstract
  12. * @author Björn Bartels <coding@bjoernbartels.earth>
  13. * @link https://gitlab.bjoernbartels.earth/php/phpagstract
  14. * @license http://www.apache.org/licenses/LICENSE-2.0 Apache License, Version 2.0
  15. * @copyright copyright (c) 2016 Björn Bartels <coding@bjoernbartels.earth>
  16. */
  17. class PagstractAbstractToken extends AbstractToken
  18. {
  19. /**
  20. * @var array the $matching
  21. */
  22. public static $matching = array(
  23. "start" => "/^\s*<pma|^\s*<object |^\s*<a |^\s*<area |^\s*<input |^\s*<select /i",
  24. "end" => ">"
  25. );
  26. /**
  27. * nesting tags allowed?
  28. *
  29. * @var boolean
  30. */
  31. public $nested = true;
  32. /**
  33. * tag attributes
  34. *
  35. * @var null|array
  36. */
  37. protected $attributes;
  38. /**
  39. * list of mandatory attributes
  40. *
  41. * @var array
  42. */
  43. protected $mandatoryAttributes = [];
  44. /**
  45. * list of child tokens if nesting tags is allowed
  46. *
  47. * @var null|array[Token]
  48. */
  49. protected $children;
  50. /**
  51. * tag name
  52. *
  53. * @var string
  54. */
  55. protected $name;
  56. /**
  57. * tag value/content
  58. *
  59. * @var mixed
  60. */
  61. protected $value;
  62. /**
  63. * is this a closing tag?
  64. *
  65. * @var boolean
  66. */
  67. protected $isClosing;
  68. /**
  69. * list of valid token types
  70. *
  71. * @var array
  72. */
  73. protected $validTypes = array(
  74. Token::CDATA,
  75. Token::COMMENT,
  76. Token::DOCTYPE,
  77. Token::ELEMENT,
  78. Token::PHP,
  79. Token::TEXT,
  80. Token::CONTENIDO,
  81. Token::PAGSTRACT,
  82. Token::PAGSTRACTMARKUP, // any other markup than pagstract markup
  83. Token::PAGSTRACTCOMMENT, // special '<!--- ... -->' handling
  84. Token::PAGSTRACTRESOURCE, // special 'resource(_ext)://...' handling
  85. Token::PAGSTRACTMESSAGE, // special 'msg://...' handling
  86. Token::PAGSTRACTSIMPLEVALUE,
  87. Token::PAGSTRACTRENDERED,
  88. Token::PAGSTRACTTILE,
  89. Token::PAGSTRACTTILEVARIABLE,
  90. Token::PAGSTRACTBEAN,
  91. Token::PAGSTRACTIFVISIBLE,
  92. Token::PAGSTRACTLIST,
  93. Token::PAGSTRACTLISTHEADER,
  94. Token::PAGSTRACTLISTFOOTER,
  95. Token::PAGSTRACTLISTCONTENT,
  96. Token::PAGSTRACTLISTNOCONTENT,
  97. Token::PAGSTRACTLISTSEPARATOR,
  98. Token::PAGSTRACTLISTEVEN,
  99. Token::PAGSTRACTLISTODD,
  100. Token::PAGSTRACTLISTFIRST,
  101. Token::PAGSTRACTLISTLAST,
  102. Token::PAGSTRACTMODLIST,
  103. Token::PAGSTRACTMODSEPARATOR,
  104. Token::PAGSTRACTMODCONTENT,
  105. Token::PAGSTRACTSWITCH,
  106. Token::PAGSTRACTOBJECT,
  107. Token::PAGSTRACTFORM,
  108. Token::PAGSTRACTTEXTIMG,
  109. Token::PAGSTRACTLINK,
  110. Token::PAGSTRACTAREA,
  111. Token::PAGSTRACTINPUT,
  112. Token::PAGSTRACTSELECT,
  113. Token::PAGSTRACTDEBUG,
  114. Token::PAGSTRACTPROPERTYREFERENCE, // special '${...}' handling
  115. Token::PAGSTRACTPROPERTYREFERENCETEXT, // every other text around a '${...}'
  116. );
  117. /**
  118. * Constructor
  119. *
  120. * @param string $type
  121. * @param null|Token $parent
  122. * @param boolean $throwOnError
  123. */
  124. public function __construct($type, Token $parent = null, $throwOnError = false)
  125. {
  126. parent::__construct($type, $parent, $throwOnError);
  127. $this->throwOnError = (boolean) $throwOnError;
  128. $this->name = null;
  129. $this->value = null;
  130. $this->attributes = null;
  131. $this->children = null;
  132. }
  133. /**
  134. * Does the parent have an implied closing tag?
  135. *
  136. * @param string $html
  137. *
  138. * @return boolean
  139. */
  140. public function isClosingElementImplied($html)
  141. {
  142. return false;
  143. }
  144. /**
  145. * Will parse this element.
  146. *
  147. * @param string $html
  148. *
  149. * @return string Remaining HTML.
  150. */
  151. public function parse($html)
  152. {
  153. $html = ltrim($html);
  154. // Get token position.
  155. $positionArray = PagstractTokenizer::getPosition($html);
  156. $this->setLine($positionArray['line']);
  157. $this->setPosition($positionArray['position']);
  158. // Parse name.
  159. $this->name = $this->parseElementName($html);
  160. // Parse attributes.
  161. $remainingHtml = mb_substr($html, mb_strlen($this->name) + ($this->isClosing ? 2 : 1));
  162. while ( (mb_strpos($remainingHtml, '>') !== false) &&
  163. (preg_match("/^\s*[\/]?>/", $remainingHtml) === 0) ) {
  164. $remainingHtml = $this->parseAttribute($remainingHtml);
  165. }
  166. // Find position of end of tag.
  167. $posOfClosingBracket = mb_strpos($remainingHtml, '>');
  168. if ($posOfClosingBracket === false) {
  169. if ($this->getThrowOnError()) {
  170. throw new TokenizerException(
  171. 'Invalid element: missing closing bracket '.
  172. 'in line: '.$this->getLine().', position: '.$this->getPosition().''
  173. );
  174. }
  175. return '';
  176. }
  177. // Is self-closing?
  178. $posOfSelfClosingBracket = mb_strpos($remainingHtml, '/>');
  179. $remainingHtml = mb_substr($remainingHtml, $posOfClosingBracket + 1);
  180. if (($posOfSelfClosingBracket !== false)
  181. && ($posOfSelfClosingBracket == $posOfClosingBracket - 1)
  182. ) {
  183. // Self-closing element.
  184. return $remainingHtml;
  185. }
  186. // Lets close those closed-only elements that are left open.
  187. $closedOnlyElements = array(
  188. 'area',
  189. 'base',
  190. 'br',
  191. 'col',
  192. 'embed',
  193. 'hr',
  194. 'img',
  195. 'input',
  196. 'link',
  197. 'meta',
  198. 'param',
  199. 'source',
  200. 'track',
  201. 'wbr'
  202. );
  203. if (array_search($this->name, $closedOnlyElements) !== false) {
  204. return $remainingHtml;
  205. }
  206. $nested = $this->nested();
  207. if (!$nested) {
  208. return $remainingHtml;
  209. }
  210. // Open element.
  211. return $this->parseContents($remainingHtml);
  212. }
  213. /**
  214. * Will parse attributes.
  215. *
  216. * @param string $html
  217. *
  218. * @return string Remaining HTML.
  219. */
  220. private function parseAttribute($html)
  221. {
  222. $remainingHtml = ltrim($html);
  223. // Will match the first entire name/value attribute pair.
  224. preg_match(
  225. "/((([a-z0-9\-_]+:)?[a-z0-9\-_]+)(\s*=\s*)?)/i",
  226. $remainingHtml,
  227. $attributeMatches
  228. );
  229. $name = $attributeMatches[2];
  230. $remainingHtml = mb_substr(mb_strstr($remainingHtml, $name), mb_strlen($name));
  231. if (preg_match("/^\s*=\s*/", $remainingHtml) === 0) {
  232. // Valueless attribute.
  233. $this->attributes[trim($name)] = true;
  234. } else {
  235. $remainingHtml = ltrim($remainingHtml, ' =');
  236. if ($remainingHtml[0] === "'" || $remainingHtml[0] === '"') {
  237. // Quote enclosed attribute value.
  238. $valueMatchSuccessful = preg_match(
  239. "/".$remainingHtml[0]."(.*?(?<!\\\))".$remainingHtml[0]."/s",
  240. $remainingHtml,
  241. $valueMatches
  242. );
  243. if ($valueMatchSuccessful !== 1) {
  244. if ($this->getThrowOnError()) {
  245. throw new TokenizerException(
  246. 'Invalid value encapsulation '.
  247. 'in line: '.$this->getLine().', position: '.$this->getPosition().'.'
  248. );
  249. }
  250. return '';
  251. }
  252. $value = $valueMatches[1];
  253. } else {
  254. // No quotes enclosing the attribute value.
  255. preg_match("/(\s*([^>\s]*(?<!\/)))/", $remainingHtml, $valueMatches);
  256. $value = $valueMatches[2];
  257. }
  258. $this->attributes[trim($name)] = $value;
  259. // Determine remaining html.
  260. if ($value == '') {
  261. $remainingHtml = ltrim(mb_substr(ltrim($html), mb_strlen($name) + 3));
  262. } else {
  263. $remainingHtml = ltrim($html);
  264. // Remove attribute name.
  265. $remainingHtml = mb_substr($remainingHtml, mb_strlen($name));
  266. $posOfAttributeValue = mb_strpos($remainingHtml, $value);
  267. $remainingHtml = ltrim(
  268. mb_substr(
  269. $remainingHtml,
  270. $posOfAttributeValue + mb_strlen($value)
  271. )
  272. );
  273. }
  274. $remainingHtml = ltrim($remainingHtml, '\'" ');
  275. }
  276. return $remainingHtml;
  277. }
  278. /**
  279. * Will parse the contents of this element.
  280. *
  281. * @param string $html
  282. *
  283. * @return string Remaining HTML.
  284. */
  285. private function parseContents($html)
  286. {
  287. if (ltrim($html) == '') {
  288. return '';
  289. }
  290. /* do we really have tags to omit parsing for?!?
  291. // Don't parse contents of "iframe" element.
  292. if ($this->name == 'iframe') {
  293. return $this->parseNoContents('iframe', $html);
  294. }
  295. // Only TEXT inside a "script" element.
  296. if ($this->name == 'script') {
  297. return $this->parseForeignContents('script', $html);
  298. }
  299. // Only TEXT inside a "style" element.
  300. if ($this->name == 'style') {
  301. return $this->parseForeignContents('style', $html);
  302. }
  303. */
  304. // Parse contents one token at a time.
  305. $remainingHtml = $html;
  306. while (preg_match("/^\s*<\/\s*".$this->name."\s*>/is", $remainingHtml) === 0) {
  307. $token = TokenFactory::buildFromHtml(
  308. $remainingHtml,
  309. $this,
  310. //false
  311. $this->getThrowOnError()
  312. );
  313. if ($token === false || $token->isClosingElementImplied($remainingHtml)) {
  314. return $remainingHtml;
  315. }
  316. if (!is_array($this->children)) {
  317. $this->children = array();
  318. }
  319. $remainingHtml = $token->parse($remainingHtml);
  320. $this->children[] = $token;
  321. }
  322. // Remove last token if contains only whitespace.
  323. if (!empty($this->children)) {
  324. $lastChildArray = array_slice($this->children, -1);
  325. $lastChild = array_pop($lastChildArray);
  326. if ($lastChild->isText() && trim($lastChild->getValue()) == '') {
  327. array_pop($this->children);
  328. }
  329. }
  330. // Remove remaining closing tag.
  331. $posOfClosingBracket = mb_strpos($remainingHtml, '>');
  332. return mb_substr($remainingHtml, $posOfClosingBracket + 1);
  333. }
  334. /**
  335. * Will get the element name from the html string.
  336. *
  337. * @param string $html
  338. *
  339. * @return null|string The element name.
  340. */
  341. private function parseElementName($html)
  342. {
  343. $elementMatchSuccessful = preg_match(
  344. "/(<([\/]?)(([a-z0-9\-]+:)?[a-z0-9\-]+))/i",
  345. //"/(<(([a-z0-9\-]+:)?[a-z0-9\-]+))/i",
  346. $html,
  347. $elementMatches
  348. );
  349. if ($elementMatchSuccessful !== 1) {
  350. if ($this->getThrowOnError()) {
  351. throw new TokenizerException('Invalid element name.');
  352. }
  353. return null;
  354. }
  355. if (!empty($elementMatches[2])) {
  356. $this->isClosing = true;
  357. return '';
  358. }
  359. return mb_strtolower($elementMatches[3]);
  360. }
  361. /**
  362. * Getter for 'attributes'.
  363. *
  364. * @return null|array
  365. */
  366. public function getAttributes()
  367. {
  368. return $this->attributes;
  369. }
  370. /**
  371. * @return boolean
  372. */
  373. public function hasAttributes()
  374. {
  375. return !empty($this->attributes);
  376. }
  377. /**
  378. * Getter for 'children'.
  379. *
  380. * @return null|array
  381. */
  382. public function getChildren()
  383. {
  384. return $this->children;
  385. }
  386. /**
  387. * @return boolean
  388. */
  389. public function hasChildren()
  390. {
  391. return !empty($this->children);
  392. }
  393. /**
  394. * Getter for 'name'.
  395. *
  396. * @return null|string
  397. */
  398. public function getName()
  399. {
  400. return $this->name;
  401. }
  402. /**
  403. * Getter for 'value'.
  404. *
  405. * @return null|mixed
  406. */
  407. public function getValue()
  408. {
  409. return $this->value;
  410. }
  411. /**
  412. * Getter/Setter for 'nested'.
  413. *
  414. * @return boolean
  415. * @return boolean
  416. */
  417. public function nested($nested = null)
  418. {
  419. if ($nested !== null) {
  420. $this->nested = !!$nested;
  421. }
  422. return $this->nested;
  423. }
  424. public function toArray()
  425. {
  426. $result = array(
  427. 'type' => $this->getType(),
  428. 'name' => $this->getName(),
  429. 'value' => $this->getValue(),
  430. 'line' => $this->getLine(),
  431. 'position' => $this->getPosition()
  432. );
  433. if (!empty($this->attributes)) {
  434. $result['attributes'] = array();
  435. foreach ($this->attributes as $name => $value) {
  436. $result['attributes'][$name] = $value;
  437. }
  438. }
  439. if (($this->children !== null)) {
  440. $result['children'] = array();
  441. if (!empty($this->children)) {
  442. foreach ($this->children as $child) {
  443. $result['children'][] = $child->toArray();
  444. }
  445. }
  446. }
  447. return $result;
  448. }
  449. /**
  450. * check for valid type
  451. * {@inheritDoc}
  452. *
  453. * @see \PHPagstract\Token\Tokens\AbstractToken::isValidType()
  454. */
  455. protected function isValidType($type)
  456. {
  457. return (in_array($type, $this->validTypes));
  458. }
  459. }