PageRenderTime 29ms CodeModel.GetById 8ms RepoModel.GetById 0ms app.codeStats 0ms

/daisydiff-php/src/HTMLDiff.php

http://daisydiff.googlecode.com/
PHP | 1007 lines | 750 code | 182 blank | 75 comment | 189 complexity | 6d9bd9bb6f2c9ecd50f02d283921591e MD5 | raw file
Possible License(s): Apache-2.0
  1. <?php
  2. /** Copyright (C) 2008 Guy Van den Broeck <guy@guyvdb.eu>
  3. *
  4. * This program is free software; you can redistribute it and/or modify
  5. * it under the terms of the GNU General Public License as published by
  6. * the Free Software Foundation; either version 2 of the License, or
  7. * (at your option) any later version.
  8. *
  9. * This program is distributed in the hope that it will be useful,
  10. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  11. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  12. * GNU General Public License for more details.
  13. *
  14. * You should have received a copy of the GNU General Public License
  15. * along with this program; if not, write to the Free Software
  16. * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
  17. * or see http://www.gnu.org/
  18. *
  19. * @ingroup DifferenceEngine
  20. */
  21. /**
  22. * When detecting the last common parent of two nodes, all results are stored as
  23. * a LastCommonParentResult.
  24. */
  25. include_once 'Diff.php';
  26. include_once 'Nodes.php';
  27. include_once 'Sanitizer.php';
  28. include_once 'Xml.php';
  29. class LastCommonParentResult {
  30. // Parent
  31. public $parent;
  32. // Splitting
  33. public $splittingNeeded = false;
  34. // Depth
  35. public $lastCommonParentDepth = -1;
  36. // Index
  37. public $indexInLastCommonParent = -1;
  38. }
  39. class Modification{
  40. const NONE = 1;
  41. const REMOVED = 2;
  42. const ADDED = 4;
  43. const CHANGED = 8;
  44. public $type;
  45. public $id = -1;
  46. public $firstOfID = false;
  47. public $changes;
  48. function __construct($type) {
  49. $this->type = $type;
  50. }
  51. public static function typeToString($type) {
  52. switch($type) {
  53. case self::NONE: return 'none';
  54. case self::REMOVED: return 'removed';
  55. case self::ADDED: return 'added';
  56. case self::CHANGED: return 'changed';
  57. }
  58. }
  59. }
  60. class DomTreeBuilder {
  61. public $textNodes = array();
  62. public $bodyNode;
  63. private $currentParent;
  64. private $newWord = '';
  65. protected $bodyStarted = false;
  66. protected $bodyEnded = false;
  67. private $whiteSpaceBeforeThis = false;
  68. private $lastSibling;
  69. private $notInPre = true;
  70. function __construct() {
  71. $this->bodyNode = $this->currentParent = new BodyNode();
  72. $this->lastSibling = new DummyNode();
  73. }
  74. /**
  75. * Must be called manually
  76. */
  77. public function endDocument() {
  78. $this->endWord();
  79. HTMLDiffer::diffDebug( count($this->textNodes) . " text nodes in document.\n" );
  80. }
  81. public function startElement($parser, $name, /*array*/ $attributes) {
  82. if (strcasecmp($name, 'body') != 0) {
  83. HTMLDiffer::diffDebug( "Starting $name node.\n" );
  84. $this->endWord();
  85. $newNode = new TagNode($this->currentParent, $name, $attributes);
  86. $this->currentParent->children[] = $newNode;
  87. $this->currentParent = $newNode;
  88. $this->lastSibling = new DummyNode();
  89. if ($this->whiteSpaceBeforeThis && !in_array(strtolower($this->currentParent->qName),TagNode::$blocks)) {
  90. $this->currentParent->whiteBefore = true;
  91. }
  92. $this->whiteSpaceBeforeThis = false;
  93. if(strcasecmp($name, 'pre') == 0) {
  94. $this->notInPre = false;
  95. }
  96. }
  97. }
  98. public function endElement($parser, $name) {
  99. if(strcasecmp($name, 'body') != 0) {
  100. HTMLDiffer::diffDebug( "Ending $name node.\n");
  101. if (0 == strcasecmp($name,'img') ||
  102. 0 == strcasecmp($name,'br') ||
  103. 0 == strcasecmp($name,'hr')) {
  104. // Insert a dummy leaf for the element
  105. $tag = new VisibleTagNode($this->currentParent, $name, $this->currentParent->attributes);
  106. $this->currentParent->children[] = $tag;
  107. $img->whiteBefore = $this->whiteSpaceBeforeThis;
  108. $this->lastSibling = $tag;
  109. $this->textNodes[] = $tag;
  110. }
  111. $this->endWord();
  112. if (!in_array(strtolower($this->currentParent->qName),TagNode::$blocks)) {
  113. $this->lastSibling = $this->currentParent;
  114. } else {
  115. $this->lastSibling = new DummyNode();
  116. }
  117. $this->currentParent = $this->currentParent->parent;
  118. $this->whiteSpaceBeforeThis = false;
  119. if (!$this->notInPre && strcasecmp($name, 'pre') == 0) {
  120. $this->notInPre = true;
  121. }
  122. } else {
  123. $this->endDocument();
  124. }
  125. }
  126. const regex = '/([\s\.\,\"\\\'\(\)\?\:\;\!\{\}\-\+\*\=\_\[\]\&\|\$]{1})/';
  127. const whitespace = '/^[\s]{1}$/';
  128. const delimiter = '/^[\s\.\,\"\\\'\(\)\?\:\;\!\{\}\-\+\*\=\_\[\]\&\|\$]{1}$/';
  129. public function characters($parser, $data) {
  130. $matches = preg_split(self::regex, htmlentities($data,ENT_NOQUOTES,'UTF-8'), -1, PREG_SPLIT_DELIM_CAPTURE);
  131. foreach($matches as &$word) {
  132. if (preg_match(self::whitespace, $word) && $this->notInPre) {
  133. $this->endWord();
  134. $this->lastSibling->whiteAfter = true;
  135. $this->whiteSpaceBeforeThis = true;
  136. } else if (preg_match(self::delimiter, $word)) {
  137. $this->endWord();
  138. $textNode = new TextNode($this->currentParent, $word);
  139. $this->currentParent->children[] = $textNode;
  140. $textNode->whiteBefore = $this->whiteSpaceBeforeThis;
  141. $this->whiteSpaceBeforeThis = false;
  142. $this->lastSibling = $textNode;
  143. $this->textNodes[] = $textNode;
  144. } else {
  145. $this->newWord .= $word;
  146. }
  147. }
  148. }
  149. private function endWord() {
  150. if ($this->newWord !== '') {
  151. $node = new TextNode($this->currentParent, $this->newWord);
  152. $this->currentParent->children[] = $node;
  153. $node->whiteBefore = $this->whiteSpaceBeforeThis;
  154. $this->whiteSpaceBeforeThis = false;
  155. $this->lastSibling = $node;
  156. $this->textNodes[] = $node;
  157. $this->newWord = "";
  158. }
  159. }
  160. public function getDiffLines() {
  161. return array_map(array('TextNode','toDiffLine'), $this->textNodes);
  162. }
  163. }
  164. class TextNodeDiffer {
  165. private $textNodes;
  166. public $bodyNode;
  167. private $oldTextNodes;
  168. private $oldBodyNode;
  169. private $newID = 0;
  170. private $changedID = 0;
  171. private $changedIDUsed = false;
  172. // used to remove the whitespace between a red and green block
  173. private $whiteAfterLastChangedPart = false;
  174. private $deletedID = 0;
  175. function __construct(DomTreeBuilder $tree, DomTreeBuilder $oldTree) {
  176. $this->textNodes = $tree->textNodes;
  177. $this->bodyNode = $tree->bodyNode;
  178. $this->oldTextNodes = $oldTree->textNodes;
  179. $this->oldBodyNode = $oldTree->bodyNode;
  180. }
  181. public function markAsNew($start, $end) {
  182. if ($end <= $start) {
  183. return;
  184. }
  185. if ($this->whiteAfterLastChangedPart) {
  186. $this->textNodes[$start]->whiteBefore = false;
  187. }
  188. for ($i = $start; $i < $end; ++$i) {
  189. $mod = new Modification(Modification::ADDED);
  190. $mod->id = $this->newID;
  191. $this->textNodes[$i]->modification = $mod;
  192. }
  193. if ($start < $end) {
  194. $this->textNodes[$start]->modification->firstOfID = true;
  195. }
  196. ++$this->newID;
  197. }
  198. public function handlePossibleChangedPart($leftstart, $leftend, $rightstart, $rightend) {
  199. $i = $rightstart;
  200. $j = $leftstart;
  201. if ($this->changedIDUsed) {
  202. ++$this->changedID;
  203. $this->changedIDUsed = false;
  204. }
  205. $changes;
  206. while ($i < $rightend) {
  207. $acthis = new AncestorComparator($this->textNodes[$i]->getParentTree());
  208. $acother = new AncestorComparator($this->oldTextNodes[$j]->getParentTree());
  209. $result = $acthis->getResult($acother);
  210. unset($acthis, $acother);
  211. if ( $result ) {
  212. $mod = new Modification(Modification::CHANGED);
  213. if (!$this->changedIDUsed) {
  214. $mod->firstOfID = true;
  215. } else if (!is_null( $result ) && $result !== $this->changes) {
  216. ++$this->changedID;
  217. $mod->firstOfID = true;
  218. }
  219. $mod->changes = $result;
  220. $mod->id = $this->changedID;
  221. $this->textNodes[$i]->modification = $mod;
  222. $this->changes = $result;
  223. $this->changedIDUsed = true;
  224. } else if ($this->changedIDUsed) {
  225. ++$this->changedID;
  226. $this->changedIDUsed = false;
  227. }
  228. ++$i;
  229. ++$j;
  230. }
  231. }
  232. public function markAsDeleted($start, $end, $before) {
  233. if ($end <= $start) {
  234. return;
  235. }
  236. if ($before > 0 && $this->textNodes[$before - 1]->whiteAfter) {
  237. $this->whiteAfterLastChangedPart = true;
  238. } else {
  239. $this->whiteAfterLastChangedPart = false;
  240. }
  241. for ($i = $start; $i < $end; ++$i) {
  242. $mod = new Modification(Modification::REMOVED);
  243. $mod->id = $this->deletedID;
  244. // oldTextNodes is used here because we're going to move its deleted
  245. // elements to this tree!
  246. $this->oldTextNodes[$i]->modification = $mod;
  247. }
  248. $this->oldTextNodes[$start]->modification->firstOfID = true;
  249. $root = $this->oldTextNodes[$start]->getLastCommonParent($this->oldTextNodes[$end-1])->parent;
  250. $junk1 = $junk2 = null;
  251. $deletedNodes = $root->getMinimalDeletedSet($this->deletedID, $junk1, $junk2);
  252. HTMLDiffer::diffDebug( "Minimal set of deleted nodes of size " . count($deletedNodes) . "\n" );
  253. // Set prevLeaf to the leaf after which the old HTML needs to be
  254. // inserted
  255. if ($before > 0) {
  256. $prevLeaf = $this->textNodes[$before - 1];
  257. }
  258. // Set nextLeaf to the leaf before which the old HTML needs to be
  259. // inserted
  260. if ($before < count($this->textNodes)) {
  261. $nextLeaf = $this->textNodes[$before];
  262. }
  263. while (count($deletedNodes) > 0) {
  264. if (isset($prevLeaf)) {
  265. $prevResult = $prevLeaf->getLastCommonParent($deletedNodes[0]);
  266. } else {
  267. $prevResult = new LastCommonParentResult();
  268. $prevResult->parent = $this->bodyNode;
  269. $prevResult->indexInLastCommonParent = -1;
  270. }
  271. if (isset($nextLeaf)) {
  272. $nextResult = $nextLeaf->getLastCommonParent($deletedNodes[count($deletedNodes) - 1]);
  273. } else {
  274. $nextResult = new LastCommonParentResult();
  275. $nextResult->parent = $this->bodyNode;
  276. $nextResult->indexInLastCommonParent = $this->bodyNode->getNbChildren();
  277. }
  278. if ($prevResult->lastCommonParentDepth == $nextResult->lastCommonParentDepth) {
  279. // We need some metric to choose which way to add-...
  280. if ($deletedNodes[0]->parent === $deletedNodes[count($deletedNodes) - 1]->parent
  281. && $prevResult->parent === $nextResult->parent) {
  282. // The difference is not in the parent
  283. $prevResult->lastCommonParentDepth = $prevResult->lastCommonParentDepth + 1;
  284. } else {
  285. // The difference is in the parent, so compare them
  286. // now THIS is tricky
  287. $distancePrev = $deletedNodes[0]->parent->getMatchRatio($prevResult->parent);
  288. $distanceNext = $deletedNodes[count($deletedNodes) - 1]->parent->getMatchRatio($nextResult->parent);
  289. if ($distancePrev <= $distanceNext) {
  290. $prevResult->lastCommonParentDepth = $prevResult->lastCommonParentDepth + 1;
  291. } else {
  292. $nextResult->lastCommonParentDepth = $nextResult->lastCommonParentDepth + 1;
  293. }
  294. }
  295. }
  296. if ($prevResult->lastCommonParentDepth > $nextResult->lastCommonParentDepth) {
  297. // Inserting at the front
  298. if ($prevResult->splittingNeeded) {
  299. $prevLeaf->parent->splitUntil($prevResult->parent, $prevLeaf, true);
  300. }
  301. $prevLeaf = $deletedNodes[0]->copyTree();
  302. unset($deletedNodes[0]);
  303. $deletedNodes = array_values($deletedNodes);
  304. $prevLeaf->setParent($prevResult->parent);
  305. $prevResult->parent->addChildAbsolute($prevLeaf,$prevResult->indexInLastCommonParent + 1);
  306. } else if ($prevResult->lastCommonParentDepth < $nextResult->lastCommonParentDepth) {
  307. // Inserting at the back
  308. if ($nextResult->splittingNeeded) {
  309. $splitOccured = $nextLeaf->parent->splitUntil($nextResult->parent, $nextLeaf, false);
  310. if ($splitOccured) {
  311. // The place where to insert is shifted one place to the
  312. // right
  313. $nextResult->indexInLastCommonParent = $nextResult->indexInLastCommonParent + 1;
  314. }
  315. }
  316. $nextLeaf = $deletedNodes[count($deletedNodes) - 1]->copyTree();
  317. unset($deletedNodes[count($deletedNodes) - 1]);
  318. $deletedNodes = array_values($deletedNodes);
  319. $nextLeaf->setParent($nextResult->parent);
  320. $nextResult->parent->addChildAbsolute($nextLeaf,$nextResult->indexInLastCommonParent);
  321. }
  322. }
  323. ++$this->deletedID;
  324. }
  325. public function expandWhiteSpace() {
  326. $this->bodyNode->expandWhiteSpace();
  327. }
  328. public function lengthNew(){
  329. return count($this->textNodes);
  330. }
  331. public function lengthOld(){
  332. return count($this->oldTextNodes);
  333. }
  334. }
  335. class HTMLDiffer {
  336. private static $debug = '';
  337. private $output;
  338. function htmlDiff($from, $to) {
  339. // Create an XML parser
  340. $xml_parser = xml_parser_create('');
  341. $domfrom = new DomTreeBuilder();
  342. // Set the functions to handle opening and closing tags
  343. xml_set_element_handler($xml_parser, array($domfrom, "startElement"), array($domfrom, "endElement"));
  344. // Set the function to handle blocks of character data
  345. xml_set_character_data_handler($xml_parser, array($domfrom, "characters"));
  346. HTMLDiffer::diffDebug( "Parsing " . strlen($from) . " characters worth of HTML\n" );
  347. if (!xml_parse($xml_parser, '<?xml version="1.0" encoding="UTF-8"?>'.Sanitizer::hackDocType().'<body>', false)
  348. || !xml_parse($xml_parser, $from, false)
  349. || !xml_parse($xml_parser, '</body>', true)){
  350. $error = xml_error_string(xml_get_error_code($xml_parser));
  351. $line = xml_get_current_line_number($xml_parser);
  352. $col = xml_get_current_column_number($xml_parser);
  353. HTMLDiffer::diffDebug( "XML error: $error at line $line and column $col\n" );
  354. }
  355. xml_parser_free($xml_parser);
  356. unset($from);
  357. $xml_parser = xml_parser_create('');
  358. $domto = new DomTreeBuilder();
  359. // Set the functions to handle opening and closing tags
  360. xml_set_element_handler($xml_parser, array($domto, "startElement"), array($domto, "endElement"));
  361. // Set the function to handle blocks of character data
  362. xml_set_character_data_handler($xml_parser, array($domto, "characters"));
  363. HTMLDiffer::diffDebug( "Parsing " . strlen($to) . " characters worth of HTML\n" );
  364. if (!xml_parse($xml_parser, '<?xml version="1.0" encoding="UTF-8"?>'.Sanitizer::hackDocType().'<body>', false)
  365. || !xml_parse($xml_parser, $to, false)
  366. || !xml_parse($xml_parser, '</body>', true)){
  367. $error = xml_error_string(xml_get_error_code($xml_parser));
  368. $line = xml_get_current_line_number($xml_parser);
  369. $col = xml_get_current_column_number($xml_parser);
  370. HTMLDiffer::diffDebug( "XML error: $error at line $line and column $col\n" );
  371. }
  372. xml_parser_free($xml_parser);
  373. unset($to);
  374. $diffengine = new WikiDiff3();
  375. $differences = $this->preProcess($diffengine->diff_range($domfrom->getDiffLines(), $domto->getDiffLines()));
  376. unset($xml_parser, $diffengine);
  377. $textNodeDiffer = new TextNodeDiffer($domto, $domfrom);
  378. $currentIndexLeft = 0;
  379. $currentIndexRight = 0;
  380. foreach ($differences as &$d) {
  381. if ($d->leftstart > $currentIndexLeft) {
  382. $textNodeDiffer->handlePossibleChangedPart($currentIndexLeft, $d->leftstart,
  383. $currentIndexRight, $d->rightstart);
  384. }
  385. if ($d->leftlength > 0) {
  386. $textNodeDiffer->markAsDeleted($d->leftstart, $d->leftend, $d->rightstart);
  387. }
  388. $textNodeDiffer->markAsNew($d->rightstart, $d->rightend);
  389. $currentIndexLeft = $d->leftend;
  390. $currentIndexRight = $d->rightend;
  391. }
  392. $oldLength = $textNodeDiffer->lengthOld();
  393. if ($currentIndexLeft < $oldLength) {
  394. $textNodeDiffer->handlePossibleChangedPart($currentIndexLeft, $oldLength, $currentIndexRight, $textNodeDiffer->lengthNew());
  395. }
  396. $textNodeDiffer->expandWhiteSpace();
  397. $output = new HTMLOutput('htmldiff');
  398. return $output->parse($textNodeDiffer->bodyNode);
  399. }
  400. private function preProcess(/*array*/ $differences) {
  401. $newRanges = array();
  402. $nbDifferences = count($differences);
  403. for ($i = 0; $i < $nbDifferences; ++$i) {
  404. $leftStart = $differences[$i]->leftstart;
  405. $leftEnd = $differences[$i]->leftend;
  406. $rightStart = $differences[$i]->rightstart;
  407. $rightEnd = $differences[$i]->rightend;
  408. $leftLength = $leftEnd - $leftStart;
  409. $rightLength = $rightEnd - $rightStart;
  410. while ($i + 1 < $nbDifferences && self::score($leftLength,
  411. $differences[$i + 1]->leftlength,
  412. $rightLength,
  413. $differences[$i + 1]->rightlength)
  414. > ($differences[$i + 1]->leftstart - $leftEnd)) {
  415. $leftEnd = $differences[$i + 1]->leftend;
  416. $rightEnd = $differences[$i + 1]->rightend;
  417. $leftLength = $leftEnd - $leftStart;
  418. $rightLength = $rightEnd - $rightStart;
  419. ++$i;
  420. }
  421. $newRanges[] = new RangeDifference($leftStart, $leftEnd, $rightStart, $rightEnd);
  422. }
  423. return $newRanges;
  424. }
  425. /**
  426. * Heuristic to merge differences for readability.
  427. */
  428. public static function score($ll, $nll, $rl, $nrl) {
  429. if (($ll == 0 && $nll == 0)
  430. || ($rl == 0 && $nrl == 0)) {
  431. return 0;
  432. }
  433. $numbers = array($ll, $nll, $rl, $nrl);
  434. $d = 0;
  435. foreach ($numbers as &$number) {
  436. while ($number > 3) {
  437. $d += 3;
  438. $number -= 3;
  439. $number *= 0.5;
  440. }
  441. $d += $number;
  442. }
  443. return $d / (1.5 * count($numbers));
  444. }
  445. /**
  446. * Add to debug output
  447. * @param string $str Debug output
  448. */
  449. public static function diffDebug( $str ) {
  450. self :: $debug .= $str;
  451. }
  452. /**
  453. * Get debug output
  454. * @return string
  455. */
  456. public static function getDebugOutput() {
  457. return self :: $debug;
  458. }
  459. }
  460. class TextOnlyComparator {
  461. public $leafs = array();
  462. function __construct(TagNode $tree) {
  463. $this->addRecursive($tree);
  464. $this->leafs = array_map(array('TextNode','toDiffLine'), $this->leafs);
  465. }
  466. private function addRecursive(TagNode $tree) {
  467. foreach ($tree->children as &$child) {
  468. if ($child instanceof TagNode) {
  469. $this->addRecursive($child);
  470. } else if ($child instanceof TextNode) {
  471. $this->leafs[] = $child;
  472. }
  473. }
  474. }
  475. public function getMatchRatio(TextOnlyComparator $other) {
  476. $nbOthers = count($other->leafs);
  477. $nbThis = count($this->leafs);
  478. if($nbOthers == 0 || $nbThis == 0){
  479. return -log(0);
  480. }
  481. $diffengine = new WikiDiff3(25000, 1.35);
  482. $diffengine->diff($this->leafs, $other->leafs);
  483. $lcsLength = $diffengine->getLcsLength();
  484. $distanceThis = $nbThis-$lcsLength;
  485. return (2.0 - $lcsLength/$nbOthers - $lcsLength/$nbThis) / 2.0;
  486. }
  487. }
  488. /**
  489. * A comparator used when calculating the difference in ancestry of two Nodes.
  490. */
  491. class AncestorComparator {
  492. public $ancestors;
  493. public $ancestorsText;
  494. function __construct(/*array*/ $ancestors) {
  495. $this->ancestors = $ancestors;
  496. $this->ancestorsText = array_map(array('TagNode','toDiffLine'), $ancestors);
  497. }
  498. public $compareTxt = "";
  499. public function getResult(AncestorComparator $other) {
  500. $diffengine = new WikiDiff3(10000, 1.35);
  501. $differences = $diffengine->diff_range($other->ancestorsText,$this->ancestorsText);
  502. if (count($differences) == 0){
  503. return null;
  504. }
  505. $changeTxt = new ChangeTextGenerator($this, $other);
  506. return $changeTxt->getChanged($differences)->toString();;
  507. }
  508. }
  509. class ChangeTextGenerator {
  510. private $ancestorComparator;
  511. private $other;
  512. private $factory;
  513. function __construct(AncestorComparator $ancestorComparator, AncestorComparator $other) {
  514. $this->ancestorComparator = $ancestorComparator;
  515. $this->other = $other;
  516. $this->factory = new TagToStringFactory();
  517. }
  518. public function getChanged(/*array*/ $differences) {
  519. $txt = new ChangeText;
  520. $rootlistopened = false;
  521. if (count($differences) > 1) {
  522. $txt->addHtml('<ul class="changelist">');
  523. $rootlistopened = true;
  524. }
  525. $nbDifferences = count($differences);
  526. for ($j = 0; $j < $nbDifferences; ++$j) {
  527. $d = $differences[$j];
  528. $lvl1listopened = false;
  529. if ($rootlistopened) {
  530. $txt->addHtml('<li>');
  531. }
  532. if ($d->leftlength + $d->rightlength > 1) {
  533. $txt->addHtml('<ul class="changelist">');
  534. $lvl1listopened = true;
  535. }
  536. // left are the old ones
  537. for ($i = $d->leftstart; $i < $d->leftend; ++$i) {
  538. if ($lvl1listopened){
  539. $txt->addHtml('<li>');
  540. }
  541. // add a bullet for a old tag
  542. $this->addTagOld($txt, $this->other->ancestors[$i]);
  543. if ($lvl1listopened){
  544. $txt->addHtml('</li>');
  545. }
  546. }
  547. // right are the new ones
  548. for ($i = $d->rightstart; $i < $d->rightend; ++$i) {
  549. if ($lvl1listopened){
  550. $txt->addHtml('<li>');
  551. }
  552. // add a bullet for a new tag
  553. $this->addTagNew($txt, $this->ancestorComparator->ancestors[$i]);
  554. if ($lvl1listopened){
  555. $txt->addHtml('</li>');
  556. }
  557. }
  558. if ($lvl1listopened) {
  559. $txt->addHtml('</ul>');
  560. }
  561. if ($rootlistopened) {
  562. $txt->addHtml('</li>');
  563. }
  564. }
  565. if ($rootlistopened) {
  566. $txt->addHtml('</ul>');
  567. }
  568. return $txt;
  569. }
  570. private function addTagOld(ChangeText $txt, TagNode $ancestor) {
  571. $this->factory->create($ancestor)->getRemovedDescription($txt);
  572. }
  573. private function addTagNew(ChangeText $txt, TagNode $ancestor) {
  574. $this->factory->create($ancestor)->getAddedDescription($txt);
  575. }
  576. }
  577. class ChangeText {
  578. private $txt = "";
  579. public function addHtml($s) {
  580. $this->txt .= $s;
  581. }
  582. public function toString() {
  583. return $this->txt;
  584. }
  585. }
  586. class TagToStringFactory {
  587. private static $containerTags = array('html', 'body', 'p', 'blockquote',
  588. 'h1', 'h2', 'h3', 'h4', 'h5', 'pre', 'div', 'ul', 'ol', 'li',
  589. 'table', 'tbody', 'tr', 'td', 'th', 'br', 'hr', 'code', 'dl',
  590. 'dt', 'dd', 'input', 'form', 'img', 'span', 'a');
  591. private static $styleTags = array('i', 'b', 'strong', 'em', 'font',
  592. 'big', 'del', 'tt', 'sub', 'sup', 'strike');
  593. const MOVED = 1;
  594. const STYLE = 2;
  595. const UNKNOWN = 4;
  596. public function create(TagNode $node) {
  597. $sem = $this->getChangeSemantic($node->qName);
  598. if (strcasecmp($node->qName,'a') == 0) {
  599. return new AnchorToString($node, $sem);
  600. }
  601. if (strcasecmp($node->qName,'img') == 0 ||
  602. strcasecmp($node->qName,'br') == 0 ||
  603. strcasecmp($node->qName,'hr') == 0) {
  604. return new NoContentTagToString($node, $sem);
  605. }
  606. return new TagToString($node, $sem);
  607. }
  608. protected function getChangeSemantic($qname) {
  609. if (in_array(strtolower($qname),self::$containerTags)) {
  610. return self::MOVED;
  611. }
  612. if (in_array(strtolower($qname),self::$styleTags)) {
  613. return self::STYLE;
  614. }
  615. return self::UNKNOWN;
  616. }
  617. }
  618. class TagToString {
  619. protected $node;
  620. protected $sem;
  621. function __construct(TagNode $node, $sem) {
  622. $this->node = $node;
  623. $this->sem = $sem;
  624. }
  625. public function getRemovedDescription(ChangeText $txt) {
  626. $tagDescription = "&lt;" . $this->node->qName . "&gt;";
  627. if ($this->sem == TagToStringFactory::MOVED) {
  628. $txt->addHtml( 'Moved out of: ' . $tagDescription);
  629. } else if ($this->sem == TagToStringFactory::STYLE) {
  630. $txt->addHtml( 'Style removed: <' . $this->node->qName . '>' . $tagDescription . '</' . $this->node->qName . '>');
  631. } else {
  632. $txt->addHtml( 'Removed: ' . $tagDescription);
  633. }
  634. $this->addAttributes($txt, $this->node->attributes);
  635. $txt->addHtml('.');
  636. }
  637. public function getAddedDescription(ChangeText $txt) {
  638. $tagDescription = "&lt;" . $this->node->qName . "&gt;";
  639. if ($this->sem == TagToStringFactory::MOVED) {
  640. $txt->addHtml( 'Moved into: ' . $tagDescription);
  641. } else if ($this->sem == TagToStringFactory::STYLE) {
  642. $txt->addHtml( 'Style added: <' . $this->node->qName . '>' . $tagDescription . '</' . $this->node->qName . '>');
  643. } else {
  644. $txt->addHtml( 'Added: ' . $tagDescription);
  645. }
  646. $this->addAttributes($txt, $this->node->attributes);
  647. $txt->addHtml('.');
  648. }
  649. protected function addAttributes(ChangeText $txt, array $attributes) {
  650. if (count($attributes) < 1) {
  651. return;
  652. }
  653. $keys = array_keys($attributes);
  654. $txt->addHtml(Sanitizer::normalizeCharReferences( ' with "' . $keys[0] . '" attribute as "' . $attributes[$keys[0]] . '"'));
  655. $nbAttributes_min_1 = count($attributes)-1;
  656. for ($i=1;$i<$nbAttributes_min_1;$i++) {
  657. $key = $keys[$i];
  658. $attr = $attributes[$key];
  659. $txt->addHtml(Sanitizer::normalizeCharReferences( ', with "' . $key . '" attribute as "' . $attr . '"'));
  660. }
  661. if ($nbAttributes_min_1 > 1) {
  662. $txt->addHtml(Sanitizer::normalizeCharReferences( ' and with "' . $keys[$nbAttributes_min_1] . '" attribute as "' . $attributes[$keys[$nbAttributes_min_1]] . '"'));
  663. }
  664. }
  665. }
  666. class NoContentTagToString extends TagToString {
  667. function __construct(TagNode $node, $sem) {
  668. parent::__construct($node, $sem);
  669. }
  670. public function getAddedDescription(ChangeText $txt) {
  671. $tagDescription = "&lt;" . $this->node->qName . "&gt;";
  672. $txt->addHtml( 'Changed to ' . $tagDescription );
  673. $this->addAttributes($txt, $this->node->attributes);
  674. $txt->addHtml('.');
  675. }
  676. public function getRemovedDescription(ChangeText $txt) {
  677. $tagDescription = "&lt;" . $this->node->qName . "&gt;";
  678. $txt->addHtml( 'Changed from ' . $tagDescription );
  679. $this->addAttributes($txt, $this->node->attributes);
  680. $txt->addHtml('.');
  681. }
  682. }
  683. class AnchorToString extends TagToString {
  684. function __construct(TagNode $node, $sem) {
  685. parent::__construct($node, $sem);
  686. }
  687. protected function addAttributes(ChangeText $txt, array $attributes) {
  688. if (array_key_exists('href', $attributes)) {
  689. $txt->addHtml(' with destination ' . htmlspecialchars($attributes['href']));
  690. unset($attributes['href']);
  691. }
  692. parent::addAttributes($txt, $attributes);
  693. }
  694. }
  695. /**
  696. * Takes a branch root and creates an HTML file for it.
  697. */
  698. class HTMLOutput{
  699. private $prefix;
  700. private $handler;
  701. function __construct($prefix) {
  702. $this->prefix = $prefix;
  703. $this->handler = new ContentHandler();
  704. }
  705. public function parse(TagNode $node) {
  706. $handler = $this->handler;
  707. if (strcasecmp($node->qName, 'img') != 0 &&
  708. strcasecmp($node->qName, 'br') != 0 &&
  709. strcasecmp($node->qName, 'hr') != 0 &&
  710. strcasecmp($node->qName, 'body') != 0) {
  711. $handler->startElement($node->qName, $node->attributes);
  712. }
  713. $newStarted = false;
  714. $remStarted = false;
  715. $changeStarted = false;
  716. $changeTXT = '';
  717. foreach ($node->children as &$child) {
  718. if ($child instanceof TagNode) {
  719. if ($newStarted) {
  720. $handler->endElement('span');
  721. $newStarted = false;
  722. } else if ($changeStarted) {
  723. $handler->endElement('span');
  724. $changeStarted = false;
  725. } else if ($remStarted) {
  726. $handler->endElement('span');
  727. $remStarted = false;
  728. }
  729. $this->parse($child);
  730. } else if ($child instanceof TextNode) {
  731. $mod = $child->modification;
  732. if ($newStarted && ($mod->type != Modification::ADDED || $mod->firstOfID)) {
  733. $handler->endElement('span');
  734. $newStarted = false;
  735. } else if ($changeStarted && ($mod->type != Modification::CHANGED
  736. || $mod->changes != $changeTXT || $mod->firstOfID)) {
  737. $handler->endElement('span');
  738. $changeStarted = false;
  739. } else if ($remStarted && ($mod->type != Modification::REMOVED || $mod ->firstOfID)) {
  740. $handler->endElement('span');
  741. $remStarted = false;
  742. }
  743. // no else because a removed part can just be closed and a new
  744. // part can start
  745. if (!$newStarted && $mod->type == Modification::ADDED) {
  746. $attrs = array('class' => 'diff-html-added');
  747. if ($mod->firstOfID) {
  748. $attrs['id'] = "added-{$this->prefix}-{$mod->id}";
  749. }
  750. $handler->startElement('span', $attrs);
  751. $newStarted = true;
  752. } else if (!$changeStarted && $mod->type == Modification::CHANGED) {
  753. $attrs = array('class' => 'diff-html-changed');
  754. if ($mod->firstOfID) {
  755. $attrs['id'] = "changed-{$this->prefix}-{$mod->id}";
  756. }
  757. $handler->startElement('span', $attrs);
  758. //tooltip
  759. $handler->startElement('span', array('class' => 'tip'));
  760. $handler->html($mod->changes);
  761. $handler->endElement('span');
  762. $changeStarted = true;
  763. $changeTXT = $mod->changes;
  764. } else if (!$remStarted && $mod->type == Modification::REMOVED) {
  765. $attrs = array('class'=>'diff-html-removed');
  766. if ($mod->firstOfID) {
  767. $attrs['id'] = "removed-{$this->prefix}-{$mod->id}";
  768. }
  769. $handler->startElement('span', $attrs);
  770. $remStarted = true;
  771. }
  772. $chars = $child->text;
  773. if ($child instanceof VisibleTagNode) {
  774. if (strcasecmp($child->qName, 'br') == 0 && ($newStarted || $changeStarted || $remStarted)) {
  775. $handler->characters('&nbsp;');
  776. }
  777. $this->handler->element($child->qName, $child->attributes);
  778. } else {
  779. $handler->characters($chars);
  780. }
  781. }
  782. }
  783. if ($newStarted) {
  784. $handler->endElement('span');
  785. $newStarted = false;
  786. } else if ($changeStarted) {
  787. $handler->endElement('span');
  788. $changeStarted = false;
  789. } else if ($remStarted) {
  790. $handler->endElement('span');
  791. $remStarted = false;
  792. }
  793. if (strcasecmp($node->qName, 'img') != 0 &&
  794. strcasecmp($node->qName, 'br') != 0 &&
  795. strcasecmp($node->qName, 'hr') != 0 &&
  796. strcasecmp($node->qName, 'body') != 0) {
  797. $handler->endElement($node->qName);
  798. }
  799. return $handler->getContent();
  800. }
  801. }
  802. class ContentHandler {
  803. private $string;
  804. function __construct() {
  805. $this->string = '';
  806. }
  807. function startElement($qname, /*array*/ $arguments) {
  808. $this->string .= Xml::openElement($qname, $arguments);
  809. }
  810. function endElement($qname){
  811. $this->string .= Xml::closeElement($qname);
  812. }
  813. function element($qname, /*array*/ $arguments) {
  814. $this->string .= Xml::element($qname, $arguments);
  815. }
  816. function characters($chars){
  817. $this->string .= $chars;
  818. }
  819. function html($html){
  820. $this->string .= $html;
  821. }
  822. function getContent() {
  823. return $this->string;
  824. }
  825. }