PageRenderTime 29ms CodeModel.GetById 18ms RepoModel.GetById 1ms app.codeStats 0ms

/vendor/pelago/emogrifier/Classes/Emogrifier.php

https://gitlab.com/yousafsyed/easternglamor
PHP | 970 lines | 493 code | 99 blank | 378 comment | 46 complexity | c4b4bae62a6c69b2dd8ea63d347b9ca1 MD5 | raw file
  1. <?php
  2. namespace Pelago;
  3. /**
  4. * This class provides functions for converting CSS styles into inline style attributes in your HTML code.
  5. *
  6. * For more information, please see the README.md file.
  7. *
  8. * @author Cameron Brooks
  9. * @author Jaime Prado
  10. * @author Roman Ožana <ozana@omdesign.cz>
  11. */
  12. class Emogrifier
  13. {
  14. /**
  15. * @var string
  16. */
  17. const ENCODING = 'UTF-8';
  18. /**
  19. * @var int
  20. */
  21. const CACHE_KEY_CSS = 0;
  22. /**
  23. * @var int
  24. */
  25. const CACHE_KEY_SELECTOR = 1;
  26. /**
  27. * @var int
  28. */
  29. const CACHE_KEY_XPATH = 2;
  30. /**
  31. * @var int
  32. */
  33. const CACHE_KEY_CSS_DECLARATION_BLOCK = 3;
  34. /**
  35. * for calculating nth-of-type and nth-child selectors
  36. *
  37. * @var int
  38. */
  39. const INDEX = 0;
  40. /**
  41. * for calculating nth-of-type and nth-child selectors
  42. *
  43. * @var int
  44. */
  45. const MULTIPLIER = 1;
  46. /**
  47. * @var string
  48. */
  49. const ID_ATTRIBUTE_MATCHER = '/(\\w+)?\\#([\\w\\-]+)/';
  50. /**
  51. * @var string
  52. */
  53. const CLASS_ATTRIBUTE_MATCHER = '/(\\w+|[\\*\\]])?((\\.[\\w\\-]+)+)/';
  54. /**
  55. * @var string
  56. */
  57. private $html = '';
  58. /**
  59. * @var string
  60. */
  61. private $css = '';
  62. /**
  63. * @var string[]
  64. */
  65. private $unprocessableHtmlTags = array('wbr');
  66. /**
  67. * @var array[]
  68. */
  69. private $caches = array(
  70. self::CACHE_KEY_CSS => array(),
  71. self::CACHE_KEY_SELECTOR => array(),
  72. self::CACHE_KEY_XPATH => array(),
  73. self::CACHE_KEY_CSS_DECLARATION_BLOCK => array(),
  74. );
  75. /**
  76. * the visited nodes with the XPath paths as array keys
  77. *
  78. * @var \DOMNode[]
  79. */
  80. private $visitedNodes = array();
  81. /**
  82. * the styles to apply to the nodes with the XPath paths as array keys for the outer array
  83. * and the attribute names/values as key/value pairs for the inner array
  84. *
  85. * @var array[]
  86. */
  87. private $styleAttributesForNodes = array();
  88. /**
  89. * Determines whether the "style" attributes of tags in the the HTML passed to this class should be preserved.
  90. * If set to false, the value of the style attributes will be discarded.
  91. *
  92. * @var bool
  93. */
  94. private $isInlineStyleAttributesParsingEnabled = true;
  95. /**
  96. * Determines whether the <style> blocks in the HTML passed to this class should be parsed.
  97. *
  98. * If set to true, the <style> blocks will be removed from the HTML and their contents will be applied to the HTML
  99. * via inline styles.
  100. *
  101. * If set to false, the <style> blocks will be left as they are in the HTML.
  102. *
  103. * @var bool
  104. */
  105. private $isStyleBlocksParsingEnabled = true;
  106. /**
  107. * This attribute applies to the case where you want to preserve your original text encoding.
  108. *
  109. * By default, emogrifier translates your text into HTML entities for two reasons:
  110. *
  111. * 1. Because of client incompatibilities, it is better practice to send out HTML entities
  112. * rather than unicode over email.
  113. *
  114. * 2. It translates any illegal XML characters that DOMDocument cannot work with.
  115. *
  116. * If you would like to preserve your original encoding, set this attribute to true.
  117. *
  118. * @var bool
  119. */
  120. public $preserveEncoding = false;
  121. /**
  122. * The constructor.
  123. *
  124. * @param string $html the HTML to emogrify, must be UTF-8-encoded
  125. * @param string $css the CSS to merge, must be UTF-8-encoded
  126. */
  127. public function __construct($html = '', $css = '')
  128. {
  129. $this->setHtml($html);
  130. $this->setCss($css);
  131. }
  132. /**
  133. * The destructor.
  134. */
  135. public function __destruct()
  136. {
  137. $this->purgeVisitedNodes();
  138. }
  139. /**
  140. * Sets the HTML to emogrify.
  141. *
  142. * @param string $html the HTML to emogrify, must be UTF-8-encoded
  143. *
  144. * @return void
  145. */
  146. public function setHtml($html)
  147. {
  148. $this->html = $html;
  149. }
  150. /**
  151. * Sets the CSS to merge with the HTML.
  152. *
  153. * @param string $css the CSS to merge, must be UTF-8-encoded
  154. *
  155. * @return void
  156. */
  157. public function setCss($css)
  158. {
  159. $this->css = $css;
  160. }
  161. /**
  162. * Applies the CSS you submit to the HTML you submit.
  163. *
  164. * This method places the CSS inline.
  165. *
  166. * @return string
  167. *
  168. * @throws \BadMethodCallException
  169. */
  170. public function emogrify()
  171. {
  172. if ($this->html === '') {
  173. throw new \BadMethodCallException('Please set some HTML first before calling emogrify.', 1390393096);
  174. }
  175. $xmlDocument = $this->createXmlDocument();
  176. $xpath = new \DOMXPath($xmlDocument);
  177. $this->clearAllCaches();
  178. // Before be begin processing the CSS file, parse the document and normalize all existing CSS attributes.
  179. // This changes 'DISPLAY: none' to 'display: none'.
  180. // We wouldn't have to do this if DOMXPath supported XPath 2.0.
  181. // Also store a reference of nodes with existing inline styles so we don't overwrite them.
  182. $this->purgeVisitedNodes();
  183. $nodesWithStyleAttributes = $xpath->query('//*[@style]');
  184. if ($nodesWithStyleAttributes !== false) {
  185. /** @var \DOMElement $node */
  186. foreach ($nodesWithStyleAttributes as $node) {
  187. if ($this->isInlineStyleAttributesParsingEnabled) {
  188. $this->normalizeStyleAttributes($node);
  189. } else {
  190. $node->removeAttribute('style');
  191. }
  192. }
  193. }
  194. // grab any existing style blocks from the html and append them to the existing CSS
  195. // (these blocks should be appended so as to have precedence over conflicting styles in the existing CSS)
  196. $allCss = $this->css;
  197. if ($this->isStyleBlocksParsingEnabled) {
  198. $allCss .= $this->getCssFromAllStyleNodes($xpath);
  199. }
  200. $cssParts = $this->splitCssAndMediaQuery($allCss);
  201. $cssKey = md5($cssParts['css']);
  202. if (!isset($this->caches[self::CACHE_KEY_CSS][$cssKey])) {
  203. // process the CSS file for selectors and definitions
  204. preg_match_all('/(?:^|[\\s^{}]*)([^{]+){([^}]*)}/mis', $cssParts['css'], $matches, PREG_SET_ORDER);
  205. $allSelectors = array();
  206. foreach ($matches as $key => $selectorString) {
  207. // if there is a blank definition, skip
  208. if (!strlen(trim($selectorString[2]))) {
  209. continue;
  210. }
  211. // else split by commas and duplicate attributes so we can sort by selector precedence
  212. $selectors = explode(',', $selectorString[1]);
  213. foreach ($selectors as $selector) {
  214. // don't process pseudo-elements and behavioral (dynamic) pseudo-classes;
  215. // only allow structural pseudo-classes
  216. if (strpos($selector, ':') !== false && !preg_match('/:\\S+\\-(child|type)\\(/i', $selector)
  217. ) {
  218. continue;
  219. }
  220. $allSelectors[] = array('selector' => trim($selector),
  221. 'attributes' => trim($selectorString[2]),
  222. // keep track of where it appears in the file, since order is important
  223. 'line' => $key,
  224. );
  225. }
  226. }
  227. // now sort the selectors by precedence
  228. usort($allSelectors, array($this,'sortBySelectorPrecedence'));
  229. $this->caches[self::CACHE_KEY_CSS][$cssKey] = $allSelectors;
  230. }
  231. foreach ($this->caches[self::CACHE_KEY_CSS][$cssKey] as $value) {
  232. // query the body for the xpath selector
  233. $nodesMatchingCssSelectors = $xpath->query($this->translateCssToXpath($value['selector']));
  234. /** @var \DOMElement $node */
  235. foreach ($nodesMatchingCssSelectors as $node) {
  236. // if it has a style attribute, get it, process it, and append (overwrite) new stuff
  237. if ($node->hasAttribute('style')) {
  238. // break it up into an associative array
  239. $oldStyleDeclarations = $this->parseCssDeclarationBlock($node->getAttribute('style'));
  240. } else {
  241. $oldStyleDeclarations = array();
  242. }
  243. $newStyleDeclarations = $this->parseCssDeclarationBlock($value['attributes']);
  244. $node->setAttribute(
  245. 'style',
  246. $this->generateStyleStringFromDeclarationsArrays($oldStyleDeclarations, $newStyleDeclarations)
  247. );
  248. }
  249. }
  250. if ($this->isInlineStyleAttributesParsingEnabled) {
  251. $this->fillStyleAttributesWithMergedStyles();
  252. }
  253. // This removes styles from your email that contain display:none.
  254. // We need to look for display:none, but we need to do a case-insensitive search. Since DOMDocument only
  255. // supports XPath 1.0, lower-case() isn't available to us. We've thus far only set attributes to lowercase,
  256. // not attribute values. Consequently, we need to translate() the letters that would be in 'NONE' ("NOE")
  257. // to lowercase.
  258. $nodesWithStyleDisplayNone = $xpath->query(
  259. '//*[contains(translate(translate(@style," ",""),"NOE","noe"),"display:none")]'
  260. );
  261. // The checks on parentNode and is_callable below ensure that if we've deleted the parent node,
  262. // we don't try to call removeChild on a nonexistent child node
  263. if ($nodesWithStyleDisplayNone->length > 0) {
  264. /** @var \DOMNode $node */
  265. foreach ($nodesWithStyleDisplayNone as $node) {
  266. if ($node->parentNode && is_callable(array($node->parentNode,'removeChild'))) {
  267. $node->parentNode->removeChild($node);
  268. }
  269. }
  270. }
  271. $this->copyCssWithMediaToStyleNode($cssParts, $xmlDocument);
  272. if ($this->preserveEncoding) {
  273. return mb_convert_encoding($xmlDocument->saveHTML(), self::ENCODING, 'HTML-ENTITIES');
  274. } else {
  275. return $xmlDocument->saveHTML();
  276. }
  277. }
  278. /**
  279. * Disables the parsing of inline styles.
  280. *
  281. * @return void
  282. */
  283. public function disableInlineStyleAttributesParsing()
  284. {
  285. $this->isInlineStyleAttributesParsingEnabled = false;
  286. }
  287. /**
  288. * Disables the parsing of <style> blocks.
  289. *
  290. * @return void
  291. */
  292. public function disableStyleBlocksParsing()
  293. {
  294. $this->isStyleBlocksParsingEnabled = false;
  295. }
  296. /**
  297. * Clears all caches.
  298. *
  299. * @return void
  300. */
  301. private function clearAllCaches()
  302. {
  303. $this->clearCache(self::CACHE_KEY_CSS);
  304. $this->clearCache(self::CACHE_KEY_SELECTOR);
  305. $this->clearCache(self::CACHE_KEY_XPATH);
  306. $this->clearCache(self::CACHE_KEY_CSS_DECLARATION_BLOCK);
  307. }
  308. /**
  309. * Clears a single cache by key.
  310. *
  311. * @param int $key the cache key, must be CACHE_KEY_CSS, CACHE_KEY_SELECTOR, CACHE_KEY_XPATH
  312. * or CACHE_KEY_CSS_DECLARATION_BLOCK
  313. *
  314. * @return void
  315. *
  316. * @throws \InvalidArgumentException
  317. */
  318. private function clearCache($key)
  319. {
  320. $allowedCacheKeys = array(
  321. self::CACHE_KEY_CSS,
  322. self::CACHE_KEY_SELECTOR,
  323. self::CACHE_KEY_XPATH,
  324. self::CACHE_KEY_CSS_DECLARATION_BLOCK,
  325. );
  326. if (!in_array($key, $allowedCacheKeys, true)) {
  327. throw new \InvalidArgumentException('Invalid cache key: ' . $key, 1391822035);
  328. }
  329. $this->caches[$key] = array();
  330. }
  331. /**
  332. * Purges the visited nodes.
  333. *
  334. * @return void
  335. */
  336. private function purgeVisitedNodes()
  337. {
  338. $this->visitedNodes = array();
  339. $this->styleAttributesForNodes = array();
  340. }
  341. /**
  342. * Marks a tag for removal.
  343. *
  344. * There are some HTML tags that DOMDocument cannot process, and it will throw an error if it encounters them.
  345. * In particular, DOMDocument will complain if you try to use HTML5 tags in an XHTML document.
  346. *
  347. * Note: The tags will not be removed if they have any content.
  348. *
  349. * @param string $tagName the tag name, e.g., "p"
  350. *
  351. * @return void
  352. */
  353. public function addUnprocessableHtmlTag($tagName)
  354. {
  355. $this->unprocessableHtmlTags[] = $tagName;
  356. }
  357. /**
  358. * Drops a tag from the removal list.
  359. *
  360. * @param string $tagName the tag name, e.g., "p"
  361. *
  362. * @return void
  363. */
  364. public function removeUnprocessableHtmlTag($tagName)
  365. {
  366. $key = array_search($tagName, $this->unprocessableHtmlTags, true);
  367. if ($key !== false) {
  368. unset($this->unprocessableHtmlTags[$key]);
  369. }
  370. }
  371. /**
  372. * Normalizes the value of the "style" attribute and saves it.
  373. *
  374. * @param \DOMElement $node
  375. *
  376. * @return void
  377. */
  378. private function normalizeStyleAttributes(\DOMElement $node)
  379. {
  380. $normalizedOriginalStyle = preg_replace_callback(
  381. '/[A-z\\-]+(?=\\:)/S',
  382. function (array $m) {
  383. return strtolower($m[0]);
  384. },
  385. $node->getAttribute('style')
  386. );
  387. // in order to not overwrite existing style attributes in the HTML, we
  388. // have to save the original HTML styles
  389. $nodePath = $node->getNodePath();
  390. if (!isset($this->styleAttributesForNodes[$nodePath])) {
  391. $this->styleAttributesForNodes[$nodePath] = $this->parseCssDeclarationBlock($normalizedOriginalStyle);
  392. $this->visitedNodes[$nodePath] = $node;
  393. }
  394. $node->setAttribute('style', $normalizedOriginalStyle);
  395. }
  396. /**
  397. * Merges styles from styles attributes and style nodes and applies them to the attribute nodes
  398. *
  399. * return @void
  400. */
  401. private function fillStyleAttributesWithMergedStyles()
  402. {
  403. foreach ($this->styleAttributesForNodes as $nodePath => $styleAttributesForNode) {
  404. $node = $this->visitedNodes[$nodePath];
  405. $currentStyleAttributes = $this->parseCssDeclarationBlock($node->getAttribute('style'));
  406. $node->setAttribute(
  407. 'style',
  408. $this->generateStyleStringFromDeclarationsArrays(
  409. $currentStyleAttributes,
  410. $styleAttributesForNode
  411. )
  412. );
  413. }
  414. }
  415. /**
  416. * This method merges old or existing name/value array with new name/value array
  417. * and then generates a string of the combined style suitable for placing inline.
  418. * This becomes the single point for CSS string generation allowing for consistent
  419. * CSS output no matter where the CSS originally came from.
  420. *
  421. * @param string[] $oldStyles
  422. * @param string[] $newStyles
  423. *
  424. * @return string
  425. */
  426. private function generateStyleStringFromDeclarationsArrays(array $oldStyles, array $newStyles)
  427. {
  428. $combinedStyles = array_merge($oldStyles, $newStyles);
  429. $style = '';
  430. foreach ($combinedStyles as $attributeName => $attributeValue) {
  431. $style .= (strtolower(trim($attributeName)) . ': ' . trim($attributeValue) . '; ');
  432. }
  433. return trim($style);
  434. }
  435. /**
  436. * Copies the media part from CSS array parts to $xmlDocument.
  437. *
  438. * @param string[] $cssParts
  439. * @param \DOMDocument $xmlDocument
  440. *
  441. * @return void
  442. */
  443. public function copyCssWithMediaToStyleNode(array $cssParts, \DOMDocument $xmlDocument)
  444. {
  445. if (isset($cssParts['media']) && $cssParts['media'] !== '') {
  446. $this->addStyleElementToDocument($xmlDocument, $cssParts['media']);
  447. }
  448. }
  449. /**
  450. * Returns CSS content.
  451. *
  452. * @param \DOMXPath $xpath
  453. *
  454. * @return string
  455. */
  456. private function getCssFromAllStyleNodes(\DOMXPath $xpath)
  457. {
  458. $styleNodes = $xpath->query('//style');
  459. if ($styleNodes === false) {
  460. return '';
  461. }
  462. $css = '';
  463. /** @var \DOMNode $styleNode */
  464. foreach ($styleNodes as $styleNode) {
  465. $css .= "\n\n" . $styleNode->nodeValue;
  466. $styleNode->parentNode->removeChild($styleNode);
  467. }
  468. return $css;
  469. }
  470. /**
  471. * Adds a style element with $css to $document.
  472. *
  473. * This method is protected to allow overriding.
  474. *
  475. * @see https://github.com/jjriv/emogrifier/issues/103
  476. *
  477. * @param \DOMDocument $document
  478. * @param string $css
  479. *
  480. * @return void
  481. */
  482. protected function addStyleElementToDocument(\DOMDocument $document, $css)
  483. {
  484. $styleElement = $document->createElement('style', $css);
  485. $styleAttribute = $document->createAttribute('type');
  486. $styleAttribute->value = 'text/css';
  487. $styleElement->appendChild($styleAttribute);
  488. $head = $this->getOrCreateHeadElement($document);
  489. $head->appendChild($styleElement);
  490. }
  491. /**
  492. * Returns the existing or creates a new head element in $document.
  493. *
  494. * @param \DOMDocument $document
  495. *
  496. * @return \DOMNode the head element
  497. */
  498. private function getOrCreateHeadElement(\DOMDocument $document)
  499. {
  500. $head = $document->getElementsByTagName('head')->item(0);
  501. if ($head === null) {
  502. $head = $document->createElement('head');
  503. $html = $document->getElementsByTagName('html')->item(0);
  504. $html->insertBefore($head, $document->getElementsByTagName('body')->item(0));
  505. }
  506. return $head;
  507. }
  508. /**
  509. * Splits input CSS code to an array where:
  510. *
  511. * - key "css" will be contains clean CSS code
  512. * - key "media" will be contains all valuable media queries
  513. *
  514. * Example:
  515. *
  516. * The CSS code
  517. *
  518. * "@import "file.css"; h1 { color:red; } @media { h1 {}} @media tv { h1 {}}"
  519. *
  520. * will be parsed into the following array:
  521. *
  522. * "css" => "h1 { color:red; }"
  523. * "media" => "@media { h1 {}}"
  524. *
  525. * @param string $css
  526. *
  527. * @return string[]
  528. */
  529. private function splitCssAndMediaQuery($css)
  530. {
  531. $media = '';
  532. $css = preg_replace_callback(
  533. '#@media\\s+(?:only\\s)?(?:[\\s{\\(]|screen|all)\\s?[^{]+{.*}\\s*}\\s*#misU',
  534. function ($matches) use (&$media) {
  535. $media .= $matches[0];
  536. },
  537. $css
  538. );
  539. // filter the CSS
  540. $search = array(
  541. // get rid of css comment code
  542. '/\\/\\*.*\\*\\//sU',
  543. // strip out any import directives
  544. '/^\\s*@import\\s[^;]+;/misU',
  545. // strip remains media enclosures
  546. '/^\\s*@media\\s[^{]+{(.*)}\\s*}\\s/misU',
  547. );
  548. $replace = array(
  549. '',
  550. '',
  551. '',
  552. );
  553. // clean CSS before output
  554. $css = preg_replace($search, $replace, $css);
  555. return array('css' => $css, 'media' => $media);
  556. }
  557. /**
  558. * Creates a DOMDocument instance with the current HTML.
  559. *
  560. * @return \DOMDocument
  561. */
  562. private function createXmlDocument()
  563. {
  564. $xmlDocument = new \DOMDocument;
  565. $xmlDocument->encoding = self::ENCODING;
  566. $xmlDocument->strictErrorChecking = false;
  567. $xmlDocument->formatOutput = true;
  568. $libXmlState = libxml_use_internal_errors(true);
  569. $xmlDocument->loadHTML($this->getUnifiedHtml());
  570. libxml_clear_errors();
  571. libxml_use_internal_errors($libXmlState);
  572. $xmlDocument->normalizeDocument();
  573. return $xmlDocument;
  574. }
  575. /**
  576. * Returns the HTML with the non-ASCII characters converts into HTML entities and the unprocessable
  577. * HTML tags removed.
  578. *
  579. * @return string the unified HTML
  580. *
  581. * @throws \BadMethodCallException
  582. */
  583. private function getUnifiedHtml()
  584. {
  585. if (!empty($this->unprocessableHtmlTags)) {
  586. $unprocessableHtmlTags = implode('|', $this->unprocessableHtmlTags);
  587. $bodyWithoutUnprocessableTags = preg_replace(
  588. '/<\\/?(' . $unprocessableHtmlTags . ')[^>]*>/i',
  589. '',
  590. $this->html
  591. );
  592. } else {
  593. $bodyWithoutUnprocessableTags = $this->html;
  594. }
  595. return mb_convert_encoding($bodyWithoutUnprocessableTags, 'HTML-ENTITIES', self::ENCODING);
  596. }
  597. /**
  598. * @param string[] $a
  599. * @param string[] $b
  600. *
  601. * @return int
  602. */
  603. private function sortBySelectorPrecedence(array $a, array $b)
  604. {
  605. $precedenceA = $this->getCssSelectorPrecedence($a['selector']);
  606. $precedenceB = $this->getCssSelectorPrecedence($b['selector']);
  607. // We want these sorted in ascending order so selectors with lesser precedence get processed first and
  608. // selectors with greater precedence get sorted last.
  609. $precedenceForEquals = ($a['line'] < $b['line'] ? -1 : 1);
  610. $precedenceForNotEquals = ($precedenceA < $precedenceB ? -1 : 1);
  611. return ($precedenceA === $precedenceB) ? $precedenceForEquals : $precedenceForNotEquals;
  612. }
  613. /**
  614. * @param string $selector
  615. *
  616. * @return int
  617. */
  618. private function getCssSelectorPrecedence($selector)
  619. {
  620. $selectorKey = md5($selector);
  621. if (!isset($this->caches[self::CACHE_KEY_SELECTOR][$selectorKey])) {
  622. $precedence = 0;
  623. $value = 100;
  624. // ids: worth 100, classes: worth 10, elements: worth 1
  625. $search = array('\\#','\\.','');
  626. foreach ($search as $s) {
  627. if (trim($selector) === '') {
  628. break;
  629. }
  630. $number = 0;
  631. $selector = preg_replace('/' . $s . '\\w+/', '', $selector, -1, $number);
  632. $precedence += ($value * $number);
  633. $value /= 10;
  634. }
  635. $this->caches[self::CACHE_KEY_SELECTOR][$selectorKey] = $precedence;
  636. }
  637. return $this->caches[self::CACHE_KEY_SELECTOR][$selectorKey];
  638. }
  639. /**
  640. * Right now, we support all CSS 1 selectors and most CSS2/3 selectors.
  641. *
  642. * @see http://plasmasturm.org/log/444/
  643. *
  644. * @param string $paramCssSelector
  645. *
  646. * @return string
  647. */
  648. private function translateCssToXpath($paramCssSelector)
  649. {
  650. $cssSelector = ' ' . $paramCssSelector . ' ';
  651. $cssSelector = preg_replace_callback(
  652. '/\\s+\\w+\\s+/',
  653. function (array $matches) {
  654. return strtolower($matches[0]);
  655. },
  656. $cssSelector
  657. );
  658. $cssSelector = trim($cssSelector);
  659. $xpathKey = md5($cssSelector);
  660. if (!isset($this->caches[self::CACHE_KEY_XPATH][$xpathKey])) {
  661. // returns an Xpath selector
  662. $search = array(
  663. // Matches any element that is a child of parent.
  664. '/\\s+>\\s+/',
  665. // Matches any element that is an adjacent sibling.
  666. '/\\s+\\+\\s+/',
  667. // Matches any element that is a descendant of an parent element element.
  668. '/\\s+/',
  669. // first-child pseudo-selector
  670. '/([^\\/]+):first-child/i',
  671. // last-child pseudo-selector
  672. '/([^\\/]+):last-child/i',
  673. // Matches attribute only selector
  674. '/^\\[(\\w+)\\]/',
  675. // Matches element with attribute
  676. '/(\\w)\\[(\\w+)\\]/',
  677. // Matches element with EXACT attribute
  678. '/(\\w)\\[(\\w+)\\=[\'"]?(\\w+)[\'"]?\\]/',
  679. );
  680. $replace = array(
  681. '/',
  682. '/following-sibling::*[1]/self::',
  683. '//',
  684. '*[1]/self::\\1',
  685. '*[last()]/self::\\1',
  686. '*[@\\1]',
  687. '\\1[@\\2]',
  688. '\\1[@\\2="\\3"]',
  689. );
  690. $cssSelector = '//' . preg_replace($search, $replace, $cssSelector);
  691. $cssSelector = preg_replace_callback(
  692. self::ID_ATTRIBUTE_MATCHER,
  693. array($this, 'matchIdAttributes'),
  694. $cssSelector
  695. );
  696. $cssSelector = preg_replace_callback(
  697. self::CLASS_ATTRIBUTE_MATCHER,
  698. array($this, 'matchClassAttributes'),
  699. $cssSelector
  700. );
  701. // Advanced selectors are going to require a bit more advanced emogrification.
  702. // When we required PHP 5.3, we could do this with closures.
  703. $cssSelector = preg_replace_callback(
  704. '/([^\\/]+):nth-child\\(\\s*(odd|even|[+\\-]?\\d|[+\\-]?\\d?n(\\s*[+\\-]\\s*\\d)?)\\s*\\)/i',
  705. array($this, 'translateNthChild'),
  706. $cssSelector
  707. );
  708. $cssSelector = preg_replace_callback(
  709. '/([^\\/]+):nth-of-type\\(\s*(odd|even|[+\\-]?\\d|[+\\-]?\\d?n(\\s*[+\\-]\\s*\\d)?)\\s*\\)/i',
  710. array($this, 'translateNthOfType'),
  711. $cssSelector
  712. );
  713. $this->caches[self::CACHE_KEY_SELECTOR][$xpathKey] = $cssSelector;
  714. }
  715. return $this->caches[self::CACHE_KEY_SELECTOR][$xpathKey];
  716. }
  717. /**
  718. * @param string[] $match
  719. *
  720. * @return string
  721. */
  722. private function matchIdAttributes(array $match)
  723. {
  724. return (strlen($match[1]) ? $match[1] : '*') . '[@id="' . $match[2] . '"]';
  725. }
  726. /**
  727. * @param string[] $match
  728. *
  729. * @return string
  730. */
  731. private function matchClassAttributes(array $match)
  732. {
  733. return (strlen($match[1]) ? $match[1] : '*') . '[contains(concat(" ",@class," "),concat(" ","' .
  734. implode(
  735. '"," "))][contains(concat(" ",@class," "),concat(" ","',
  736. explode('.', substr($match[2], 1))
  737. ) . '"," "))]';
  738. }
  739. /**
  740. * @param string[] $match
  741. *
  742. * @return string
  743. */
  744. private function translateNthChild(array $match)
  745. {
  746. $result = $this->parseNth($match);
  747. if (isset($result[self::MULTIPLIER])) {
  748. if ($result[self::MULTIPLIER] < 0) {
  749. $result[self::MULTIPLIER] = abs($result[self::MULTIPLIER]);
  750. return sprintf(
  751. '*[(last() - position()) mod %u = %u]/self::%s',
  752. $result[self::MULTIPLIER],
  753. $result[self::INDEX],
  754. $match[1]
  755. );
  756. } else {
  757. return sprintf(
  758. '*[position() mod %u = %u]/self::%s',
  759. $result[self::MULTIPLIER],
  760. $result[self::INDEX],
  761. $match[1]
  762. );
  763. }
  764. } else {
  765. return sprintf('*[%u]/self::%s', $result[self::INDEX], $match[1]);
  766. }
  767. }
  768. /**
  769. * @param string[] $match
  770. *
  771. * @return string
  772. */
  773. private function translateNthOfType(array $match)
  774. {
  775. $result = $this->parseNth($match);
  776. if (isset($result[self::MULTIPLIER])) {
  777. if ($result[self::MULTIPLIER] < 0) {
  778. $result[self::MULTIPLIER] = abs($result[self::MULTIPLIER]);
  779. return sprintf(
  780. '%s[(last() - position()) mod %u = %u]',
  781. $match[1],
  782. $result[self::MULTIPLIER],
  783. $result[self::INDEX]
  784. );
  785. } else {
  786. return sprintf(
  787. '%s[position() mod %u = %u]',
  788. $match[1],
  789. $result[self::MULTIPLIER],
  790. $result[self::INDEX]
  791. );
  792. }
  793. } else {
  794. return sprintf('%s[%u]', $match[1], $result[self::INDEX]);
  795. }
  796. }
  797. /**
  798. * @param string[] $match
  799. *
  800. * @return int[]
  801. */
  802. private function parseNth(array $match)
  803. {
  804. if (in_array(strtolower($match[2]), array('even','odd'), true)) {
  805. $index = strtolower($match[2]) === 'even' ? 0 : 1;
  806. return array(self::MULTIPLIER => 2, self::INDEX => $index);
  807. } elseif (stripos($match[2], 'n') === false) {
  808. // if there is a multiplier
  809. $index = (int) str_replace(' ', '', $match[2]);
  810. return array(self::INDEX => $index);
  811. } else {
  812. if (isset($match[3])) {
  813. $multipleTerm = str_replace($match[3], '', $match[2]);
  814. $index = (int) str_replace(' ', '', $match[3]);
  815. } else {
  816. $multipleTerm = $match[2];
  817. $index = 0;
  818. }
  819. $multiplier = (int) str_ireplace('n', '', $multipleTerm);
  820. if (!strlen($multiplier)) {
  821. $multiplier = 1;
  822. } elseif ($multiplier === 0) {
  823. return array(self::INDEX => $index);
  824. } else {
  825. $multiplier = (int) $multiplier;
  826. }
  827. while ($index < 0) {
  828. $index += abs($multiplier);
  829. }
  830. return array(self::MULTIPLIER => $multiplier, self::INDEX => $index);
  831. }
  832. }
  833. /**
  834. * Parses a CSS declaration block into property name/value pairs.
  835. *
  836. * Example:
  837. *
  838. * The declaration block
  839. *
  840. * "color: #000; font-weight: bold;"
  841. *
  842. * will be parsed into the following array:
  843. *
  844. * "color" => "#000"
  845. * "font-weight" => "bold"
  846. *
  847. * @param string $cssDeclarationBlock the CSS declaration block without the curly braces, may be empty
  848. *
  849. * @return string[]
  850. * the CSS declarations with the property names as array keys and the property values as array values
  851. */
  852. private function parseCssDeclarationBlock($cssDeclarationBlock)
  853. {
  854. if (isset($this->caches[self::CACHE_KEY_CSS_DECLARATION_BLOCK][$cssDeclarationBlock])) {
  855. return $this->caches[self::CACHE_KEY_CSS_DECLARATION_BLOCK][$cssDeclarationBlock];
  856. }
  857. $properties = array();
  858. $declarations = explode(';', $cssDeclarationBlock);
  859. foreach ($declarations as $declaration) {
  860. $matches = array();
  861. if (!preg_match('/ *([A-Za-z\\-]+) *: *([^;]+) */', $declaration, $matches)) {
  862. continue;
  863. }
  864. $propertyName = strtolower($matches[1]);
  865. $propertyValue = $matches[2];
  866. $properties[$propertyName] = $propertyValue;
  867. }
  868. $this->caches[self::CACHE_KEY_CSS_DECLARATION_BLOCK][$cssDeclarationBlock] = $properties;
  869. return $properties;
  870. }
  871. }