PageRenderTime 39ms CodeModel.GetById 10ms RepoModel.GetById 0ms app.codeStats 0ms

/core/modules/system/src/Tests/Mail/HtmlToTextTest.php

https://gitlab.com/geeta7/drupal
PHP | 391 lines | 263 code | 20 blank | 108 comment | 3 complexity | cc3b88feacda856a39918cd5cd9ab0c7 MD5 | raw file
  1. <?php
  2. /**
  3. * @file
  4. * Contains \Drupal\system\Tests\Mail\HtmlToTextTest.
  5. */
  6. namespace Drupal\system\Tests\Mail;
  7. use Drupal\Component\Utility\Html;
  8. use Drupal\Component\Utility\Unicode;
  9. use Drupal\Core\Mail\MailFormatHelper;
  10. use Drupal\Core\Site\Settings;
  11. use Drupal\simpletest\WebTestBase;
  12. /**
  13. * Tests for \Drupal\Core\Mail\MailFormatHelper::htmlToText().
  14. *
  15. * @group Mail
  16. */
  17. class HtmlToTextTest extends WebTestBase {
  18. /**
  19. * Converts a string to its PHP source equivalent for display in test messages.
  20. *
  21. * @param $text
  22. * The text string to convert.
  23. *
  24. * @return
  25. * An HTML representation of the text string that, when displayed in a
  26. * browser, represents the PHP source code equivalent of $text.
  27. */
  28. protected function stringToHtml($text) {
  29. return '"' .
  30. str_replace(
  31. array("\n", ' '),
  32. array('\n', '&nbsp;'),
  33. Html::escape($text)
  34. ) . '"';
  35. }
  36. /**
  37. * Helper function to test \Drupal\Core\Mail\MailFormatHelper::htmlToText().
  38. *
  39. * @param $html
  40. * The source HTML string to be converted.
  41. * @param $text
  42. * The expected result of converting $html to text.
  43. * @param $message
  44. * A text message to display in the assertion message.
  45. * @param $allowed_tags
  46. * (optional) An array of allowed tags, or NULL to default to the full
  47. * set of tags supported by
  48. * \Drupal\Core\Mail\MailFormatHelper::htmlToText().
  49. */
  50. protected function assertHtmlToText($html, $text, $message, $allowed_tags = NULL) {
  51. preg_match_all('/<([a-z0-6]+)/', Unicode::strtolower($html), $matches);
  52. $tested_tags = implode(', ', array_unique($matches[1]));
  53. $message .= ' (' . $tested_tags . ')';
  54. $result = MailFormatHelper::htmlToText($html, $allowed_tags);
  55. $pass = $this->assertEqual($result, $text, Html::escape($message));
  56. $verbose = 'html = <pre>' . $this->stringToHtml($html)
  57. . '</pre><br />' . 'result = <pre>' . $this->stringToHtml($result)
  58. . '</pre><br />' . 'expected = <pre>' . $this->stringToHtml($text)
  59. . '</pre>';
  60. $this->verbose($verbose);
  61. if (!$pass) {
  62. $this->pass("Previous test verbose info:<br />$verbose");
  63. }
  64. }
  65. /**
  66. * Test supported tags of \Drupal\Core\Mail\MailFormatHelper::htmlToText().
  67. */
  68. public function testTags() {
  69. global $base_path, $base_url;
  70. $tests = array(
  71. // @todo Trailing linefeeds should be trimmed.
  72. '<a href = "https://www.drupal.org">Drupal.org</a>' => "Drupal.org [1]\n\n[1] https://www.drupal.org\n",
  73. // @todo Footer URLs should be absolute.
  74. "<a href = \"$base_path\">Homepage</a>" => "Homepage [1]\n\n[1] $base_url/\n",
  75. '<address>Drupal</address>' => "Drupal\n",
  76. // @todo The <address> tag is currently not supported.
  77. '<address>Drupal</address><address>Drupal</address>' => "DrupalDrupal\n",
  78. '<b>Drupal</b>' => "*Drupal*\n",
  79. // @todo There should be a space between the '>' and the text.
  80. '<blockquote>Drupal</blockquote>' => ">Drupal\n",
  81. '<blockquote>Drupal</blockquote><blockquote>Drupal</blockquote>' => ">Drupal\n>Drupal\n",
  82. '<br />Drupal<br />Drupal<br /><br />Drupal' => "Drupal\nDrupal\nDrupal\n",
  83. '<br/>Drupal<br/>Drupal<br/><br/>Drupal' => "Drupal\nDrupal\nDrupal\n",
  84. // @todo There should be two line breaks before the paragraph.
  85. '<br/>Drupal<br/>Drupal<br/><br/>Drupal<p>Drupal</p>' => "Drupal\nDrupal\nDrupal\nDrupal\n\n",
  86. '<div>Drupal</div>' => "Drupal\n",
  87. // @todo The <div> tag is currently not supported.
  88. '<div>Drupal</div><div>Drupal</div>' => "DrupalDrupal\n",
  89. '<em>Drupal</em>' => "/Drupal/\n",
  90. '<h1>Drupal</h1>' => "======== DRUPAL ==============================================================\n\n",
  91. '<h1>Drupal</h1><p>Drupal</p>' => "======== DRUPAL ==============================================================\n\nDrupal\n\n",
  92. '<h2>Drupal</h2>' => "-------- DRUPAL --------------------------------------------------------------\n\n",
  93. '<h2>Drupal</h2><p>Drupal</p>' => "-------- DRUPAL --------------------------------------------------------------\n\nDrupal\n\n",
  94. '<h3>Drupal</h3>' => ".... Drupal\n\n",
  95. '<h3>Drupal</h3><p>Drupal</p>' => ".... Drupal\n\nDrupal\n\n",
  96. '<h4>Drupal</h4>' => ".. Drupal\n\n",
  97. '<h4>Drupal</h4><p>Drupal</p>' => ".. Drupal\n\nDrupal\n\n",
  98. '<h5>Drupal</h5>' => "Drupal\n\n",
  99. '<h5>Drupal</h5><p>Drupal</p>' => "Drupal\n\nDrupal\n\n",
  100. '<h6>Drupal</h6>' => "Drupal\n\n",
  101. '<h6>Drupal</h6><p>Drupal</p>' => "Drupal\n\nDrupal\n\n",
  102. '<hr />Drupal<hr />' => "------------------------------------------------------------------------------\nDrupal\n------------------------------------------------------------------------------\n",
  103. '<hr/>Drupal<hr/>' => "------------------------------------------------------------------------------\nDrupal\n------------------------------------------------------------------------------\n",
  104. '<hr/>Drupal<hr/><p>Drupal</p>' => "------------------------------------------------------------------------------\nDrupal\n------------------------------------------------------------------------------\nDrupal\n\n",
  105. '<i>Drupal</i>' => "/Drupal/\n",
  106. '<p>Drupal</p>' => "Drupal\n\n",
  107. '<p>Drupal</p><p>Drupal</p>' => "Drupal\n\nDrupal\n\n",
  108. '<strong>Drupal</strong>' => "*Drupal*\n",
  109. // @todo Tables are currently not supported.
  110. '<table><tr><td>Drupal</td><td>Drupal</td></tr><tr><td>Drupal</td><td>Drupal</td></tr></table>' => "DrupalDrupalDrupalDrupal\n",
  111. '<table><tr><td>Drupal</td></tr></table><p>Drupal</p>' => "Drupal\nDrupal\n\n",
  112. // @todo The <u> tag is currently not supported.
  113. '<u>Drupal</u>' => "Drupal\n",
  114. '<ul><li>Drupal</li></ul>' => " * Drupal\n\n",
  115. '<ul><li>Drupal <em>Drupal</em> Drupal</li></ul>' => " * Drupal /Drupal/ Drupal\n\n",
  116. // @todo Lines containing nothing but spaces should be trimmed.
  117. '<ul><li>Drupal</li><li><ol><li>Drupal</li><li>Drupal</li></ol></li></ul>' => " * Drupal\n * 1) Drupal\n 2) Drupal\n \n\n",
  118. '<ul><li>Drupal</li><li><ol><li>Drupal</li></ol></li><li>Drupal</li></ul>' => " * Drupal\n * 1) Drupal\n \n * Drupal\n\n",
  119. '<ul><li>Drupal</li><li>Drupal</li></ul>' => " * Drupal\n * Drupal\n\n",
  120. '<ul><li>Drupal</li></ul><p>Drupal</p>' => " * Drupal\n\nDrupal\n\n",
  121. '<ol><li>Drupal</li></ol>' => " 1) Drupal\n\n",
  122. '<ol><li>Drupal</li><li><ul><li>Drupal</li><li>Drupal</li></ul></li></ol>' => " 1) Drupal\n 2) * Drupal\n * Drupal\n \n\n",
  123. '<ol><li>Drupal</li><li>Drupal</li></ol>' => " 1) Drupal\n 2) Drupal\n\n",
  124. '<ol>Drupal</ol>' => "Drupal\n\n",
  125. '<ol><li>Drupal</li></ol><p>Drupal</p>' => " 1) Drupal\n\nDrupal\n\n",
  126. '<dl><dt>Drupal</dt></dl>' => "Drupal\n\n",
  127. '<dl><dt>Drupal</dt><dd>Drupal</dd></dl>' => "Drupal\n Drupal\n\n",
  128. '<dl><dt>Drupal</dt><dd>Drupal</dd><dt>Drupal</dt><dd>Drupal</dd></dl>' => "Drupal\n Drupal\nDrupal\n Drupal\n\n",
  129. '<dl><dt>Drupal</dt><dd>Drupal</dd></dl><p>Drupal</p>' => "Drupal\n Drupal\n\nDrupal\n\n",
  130. '<dl><dt>Drupal<dd>Drupal</dl>' => "Drupal\n Drupal\n\n",
  131. '<dl><dt>Drupal</dt></dl><p>Drupal</p>' => "Drupal\n\nDrupal\n\n",
  132. // @todo Again, lines containing only spaces should be trimmed.
  133. '<ul><li>Drupal</li><li><dl><dt>Drupal</dt><dd>Drupal</dd><dt>Drupal</dt><dd>Drupal</dd></dl></li><li>Drupal</li></ul>' => " * Drupal\n * Drupal\n Drupal\n Drupal\n Drupal\n \n * Drupal\n\n",
  134. // Tests malformed HTML tags.
  135. '<br>Drupal<br>Drupal' => "Drupal\nDrupal\n",
  136. '<hr>Drupal<hr>Drupal' => "------------------------------------------------------------------------------\nDrupal\n------------------------------------------------------------------------------\nDrupal\n",
  137. '<ol><li>Drupal<li>Drupal</ol>' => " 1) Drupal\n 2) Drupal\n\n",
  138. '<ul><li>Drupal <em>Drupal</em> Drupal</ul></ul>' => " * Drupal /Drupal/ Drupal\n\n",
  139. '<ul><li>Drupal<li>Drupal</ol>' => " * Drupal\n * Drupal\n\n",
  140. '<ul><li>Drupal<li>Drupal</ul>' => " * Drupal\n * Drupal\n\n",
  141. '<ul>Drupal</ul>' => "Drupal\n\n",
  142. 'Drupal</ul></ol></dl><li>Drupal' => "Drupal\n * Drupal\n",
  143. '<dl>Drupal</dl>' => "Drupal\n\n",
  144. '<dl>Drupal</dl><p>Drupal</p>' => "Drupal\n\nDrupal\n\n",
  145. '<dt>Drupal</dt>' => "Drupal\n",
  146. // Tests some unsupported HTML tags.
  147. '<html>Drupal</html>' => "Drupal\n",
  148. // @todo Perhaps the contents of <script> tags should be dropped.
  149. '<script type="text/javascript">Drupal</script>' => "Drupal\n",
  150. // A couple of tests for Unicode characters.
  151. '<q>I <em>will</em> be back…</q>' => "I /will/ be back…\n",
  152. 'FrançAIS is ÜBER-åwesome' => "FrançAIS is ÜBER-åwesome\n",
  153. );
  154. foreach ($tests as $html => $text) {
  155. $this->assertHtmlToText($html, $text, 'Supported tags');
  156. }
  157. }
  158. /**
  159. * Tests allowing tags in \Drupal\Core\Mail\MailFormatHelper::htmlToText().
  160. */
  161. public function testDrupalHtmlToTextArgs() {
  162. // The second parameter of \Drupal\Core\Mail\MailFormatHelper::htmlToText()
  163. // overrules the allowed tags.
  164. $this->assertHtmlToText(
  165. 'Drupal <b>Drupal</b> Drupal',
  166. "Drupal *Drupal* Drupal\n",
  167. 'Allowed <b> tag found',
  168. array('b')
  169. );
  170. $this->assertHtmlToText(
  171. 'Drupal <h1>Drupal</h1> Drupal',
  172. "Drupal Drupal Drupal\n",
  173. 'Disallowed <h1> tag not found',
  174. array('b')
  175. );
  176. $this->assertHtmlToText(
  177. 'Drupal <p><em><b>Drupal</b></em><p> Drupal',
  178. "Drupal Drupal Drupal\n",
  179. 'Disallowed <p>, <em>, and <b> tags not found',
  180. array('a', 'br', 'h1')
  181. );
  182. $this->assertHtmlToText(
  183. '<html><body>Drupal</body></html>',
  184. "Drupal\n",
  185. 'Unsupported <html> and <body> tags not found',
  186. array('html', 'body')
  187. );
  188. }
  189. /**
  190. * Test that whitespace is collapsed.
  191. */
  192. public function testDrupalHtmltoTextCollapsesWhitespace() {
  193. $input = "<p>Drupal Drupal\n\nDrupal<pre>Drupal Drupal\n\nDrupal</pre>Drupal Drupal\n\nDrupal</p>";
  194. // @todo The whitespace should be collapsed.
  195. $collapsed = "Drupal Drupal\n\nDrupalDrupal Drupal\n\nDrupalDrupal Drupal\n\nDrupal\n\n";
  196. $this->assertHtmlToText(
  197. $input,
  198. $collapsed,
  199. 'Whitespace is collapsed',
  200. array('p')
  201. );
  202. }
  203. /**
  204. * Test that text separated by block-level tags in HTML get separated by
  205. * (at least) a newline in the plaintext version.
  206. */
  207. public function testDrupalHtmlToTextBlockTagToNewline() {
  208. $input = '[text]'
  209. . '<blockquote>[blockquote]</blockquote>'
  210. . '<br />[br]'
  211. . '<dl><dt>[dl-dt]</dt>'
  212. . '<dt>[dt]</dt>'
  213. . '<dd>[dd]</dd>'
  214. . '<dd>[dd-dl]</dd></dl>'
  215. . '<h1>[h1]</h1>'
  216. . '<h2>[h2]</h2>'
  217. . '<h3>[h3]</h3>'
  218. . '<h4>[h4]</h4>'
  219. . '<h5>[h5]</h5>'
  220. . '<h6>[h6]</h6>'
  221. . '<hr />[hr]'
  222. . '<ol><li>[ol-li]</li>'
  223. . '<li>[li]</li>'
  224. . '<li>[li-ol]</li></ol>'
  225. . '<p>[p]</p>'
  226. . '<ul><li>[ul-li]</li>'
  227. . '<li>[li-ul]</li></ul>'
  228. . '[text]';
  229. $output = MailFormatHelper::htmlToText($input);
  230. $pass = $this->assertFalse(
  231. preg_match('/\][^\n]*\[/s', $output),
  232. 'Block-level HTML tags should force newlines'
  233. );
  234. if (!$pass) {
  235. $this->verbose($this->stringToHtml($output));
  236. }
  237. $output_upper = Unicode::strtoupper($output);
  238. $upper_input = Unicode::strtoupper($input);
  239. $upper_output = MailFormatHelper::htmlToText($upper_input);
  240. $pass = $this->assertEqual(
  241. $upper_output,
  242. $output_upper,
  243. 'Tag recognition should be case-insensitive'
  244. );
  245. if (!$pass) {
  246. $this->verbose(
  247. $upper_output
  248. . '<br />should be equal to <br />'
  249. . $output_upper
  250. );
  251. }
  252. }
  253. /**
  254. * Test that headers are properly separated from surrounding text.
  255. */
  256. public function testHeaderSeparation() {
  257. $html = 'Drupal<h1>Drupal</h1>Drupal';
  258. // @todo There should be more space above the header than below it.
  259. $text = "Drupal\n======== DRUPAL ==============================================================\n\nDrupal\n";
  260. $this->assertHtmlToText($html, $text,
  261. 'Text before and after <h1> tag');
  262. $html = '<p>Drupal</p><h1>Drupal</h1>Drupal';
  263. // @todo There should be more space above the header than below it.
  264. $text = "Drupal\n\n======== DRUPAL ==============================================================\n\nDrupal\n";
  265. $this->assertHtmlToText($html, $text,
  266. 'Paragraph before and text after <h1> tag');
  267. $html = 'Drupal<h1>Drupal</h1><p>Drupal</p>';
  268. // @todo There should be more space above the header than below it.
  269. $text = "Drupal\n======== DRUPAL ==============================================================\n\nDrupal\n\n";
  270. $this->assertHtmlToText($html, $text,
  271. 'Text before and paragraph after <h1> tag');
  272. $html = '<p>Drupal</p><h1>Drupal</h1><p>Drupal</p>';
  273. $text = "Drupal\n\n======== DRUPAL ==============================================================\n\nDrupal\n\n";
  274. $this->assertHtmlToText($html, $text,
  275. 'Paragraph before and after <h1> tag');
  276. }
  277. /**
  278. * Test that footnote references are properly generated.
  279. */
  280. public function testFootnoteReferences() {
  281. global $base_path, $base_url;
  282. $source = '<a href="http://www.example.com/node/1">Host and path</a>'
  283. . '<br /><a href="http://www.example.com">Host, no path</a>'
  284. . '<br /><a href="' . $base_path . 'node/1">Path, no host</a>'
  285. . '<br /><a href="node/1">Relative path</a>';
  286. // @todo Footnote URLs should be absolute.
  287. $tt = "Host and path [1]"
  288. . "\nHost, no path [2]"
  289. // @todo The following two references should be combined.
  290. . "\nPath, no host [3]"
  291. . "\nRelative path [4]"
  292. . "\n"
  293. . "\n[1] http://www.example.com/node/1"
  294. . "\n[2] http://www.example.com"
  295. // @todo The following two references should be combined.
  296. . "\n[3] $base_url/node/1"
  297. . "\n[4] node/1\n";
  298. $this->assertHtmlToText($source, $tt, 'Footnotes');
  299. }
  300. /**
  301. * Test that combinations of paragraph breaks, line breaks, linefeeds,
  302. * and spaces are properly handled.
  303. */
  304. public function testDrupalHtmlToTextParagraphs() {
  305. $tests = array();
  306. $tests[] = array(
  307. 'html' => "<p>line 1<br />\nline 2<br />line 3\n<br />line 4</p><p>paragraph</p>",
  308. // @todo Trailing line breaks should be trimmed.
  309. 'text' => "line 1\nline 2\nline 3\nline 4\n\nparagraph\n\n",
  310. );
  311. $tests[] = array(
  312. 'html' => "<p>line 1<br /> line 2</p> <p>line 4<br /> line 5</p> <p>0</p>",
  313. // @todo Trailing line breaks should be trimmed.
  314. 'text' => "line 1\nline 2\n\nline 4\nline 5\n\n0\n\n",
  315. );
  316. foreach ($tests as $test) {
  317. $this->assertHtmlToText($test['html'], $test['text'], 'Paragraph breaks');
  318. }
  319. }
  320. /**
  321. * Tests \Drupal\Core\Mail\MailFormatHelper::htmlToText() wrapping.
  322. *
  323. * RFC 3676 says, "The Text/Plain media type is the lowest common
  324. * denominator of Internet email, with lines of no more than 998 characters."
  325. *
  326. * RFC 2046 says, "SMTP [RFC-821] allows a maximum of 998 octets before the
  327. * next CRLF sequence."
  328. *
  329. * RFC 821 says, "The maximum total length of a text line including the
  330. * <CRLF> is 1000 characters."
  331. */
  332. public function testVeryLongLineWrap() {
  333. $input = 'Drupal<br /><p>' . str_repeat('x', 2100) . '</p><br />Drupal';
  334. $output = MailFormatHelper::htmlToText($input);
  335. $eol = Settings::get('mail_line_endings', PHP_EOL);
  336. $maximum_line_length = 0;
  337. foreach (explode($eol, $output) as $line) {
  338. // We must use strlen() rather than Unicode::strlen() in order to count
  339. // octets rather than characters.
  340. $maximum_line_length = max($maximum_line_length, strlen($line . $eol));
  341. }
  342. $verbose = 'Maximum line length found was ' . $maximum_line_length . ' octets.';
  343. $this->assertTrue($maximum_line_length <= 1000, $verbose);
  344. }
  345. /**
  346. * Tests that trailing whitespace is removed before newlines.
  347. *
  348. * @see \Drupal\Core\Mail\MailFormatHelper::wrapMail()
  349. */
  350. public function testRemoveTrailingWhitespace() {
  351. $text = "Hi there! \nHerp Derp";
  352. $mail_lines = explode("\n", MailFormatHelper::wrapMail($text));
  353. $this->assertNotEqual(" ", substr($mail_lines[0], -1), 'Trailing whitespace removed.');
  354. }
  355. /**
  356. * Tests that trailing whitespace from Usenet style signatures is not removed.
  357. *
  358. * RFC 3676 says, "This is a special case; an (optionally quoted or quoted and
  359. * stuffed) line consisting of DASH DASH SP is neither fixed nor flowed."
  360. *
  361. * @see \Drupal\Core\Mail\MailFormatHelper::wrapMail()
  362. */
  363. public function testUsenetSignature() {
  364. $text = "Hi there!\n-- \nHerp Derp";
  365. $mail_lines = explode("\n", MailFormatHelper::wrapMail($text));
  366. $this->assertEqual("-- ", $mail_lines[1], 'Trailing whitespace not removed for dash-dash-space signatures.');
  367. $text = "Hi there!\n-- \nHerp Derp";
  368. $mail_lines = explode("\n", MailFormatHelper::wrapMail($text));
  369. $this->assertEqual("--", $mail_lines[1], 'Trailing whitespace removed for incorrect dash-dash-space signatures.');
  370. }
  371. }