PageRenderTime 51ms CodeModel.GetById 23ms RepoModel.GetById 1ms app.codeStats 0ms

/daisydiff-php/src/Nodes.php

http://daisydiff.googlecode.com/
PHP | 457 lines | 325 code | 79 blank | 53 comment | 56 complexity | 365413232f4841a4469000274f58f5cd MD5 | raw file
Possible License(s): Apache-2.0
  1. <?php
  2. /** Copyright (C) 2008 Guy Van den Broeck <guy@guyvdb.eu>
  3. *
  4. * This program is free software; you can redistribute it and/or modify
  5. * it under the terms of the GNU General Public License as published by
  6. * the Free Software Foundation; either version 2 of the License, or
  7. * (at your option) any later version.
  8. *
  9. * This program is distributed in the hope that it will be useful,
  10. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  11. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  12. * GNU General Public License for more details.
  13. *
  14. * You should have received a copy of the GNU General Public License
  15. * along with this program; if not, write to the Free Software
  16. * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
  17. * or see http://www.gnu.org/
  18. *
  19. */
  20. /**
  21. * Any element in the DOM tree of an HTML document.
  22. * @ingroup DifferenceEngine
  23. */
  24. include_once 'Xml.php';
  25. class Node {
  26. public $parent;
  27. protected $parentTree;
  28. public $whiteBefore = false;
  29. public $whiteAfter = false;
  30. function __construct($parent) {
  31. $this->parent = $parent;
  32. }
  33. public function getParentTree() {
  34. if (!isset($this->parentTree)) {
  35. if (!is_null($this->parent)) {
  36. $this->parentTree = $this->parent->getParentTree();
  37. $this->parentTree[] = $this->parent;
  38. } else {
  39. $this->parentTree = array();
  40. }
  41. }
  42. return $this->parentTree;
  43. }
  44. public function getLastCommonParent(Node $other) {
  45. $result = new LastCommonParentResult();
  46. $myParents = $this->getParentTree();
  47. $otherParents = $other->getParentTree();
  48. $i = 1;
  49. $isSame = true;
  50. $nbMyParents = count($myParents);
  51. $nbOtherParents = count($otherParents);
  52. while ($isSame && $i < $nbMyParents && $i < $nbOtherParents) {
  53. if ($myParents[$i]->toDiffTag !== $otherParents[$i]->toDiffTag ||
  54. $myParents[$i-1]->getIndexOf($myParents[$i]) !== $otherParents[$i-1]->getIndexOf($otherParents[$i])) {
  55. $isSame = false;
  56. } else {
  57. // After a while, the index i-1 must be the last common parent
  58. $i++;
  59. }
  60. }
  61. $result->lastCommonParentDepth = $i - 1;
  62. $result->parent = $myParents[$i - 1];
  63. if (!$isSame || $nbMyParents > $nbOtherParents) {
  64. // Not all tags matched, or all tags matched but
  65. // there are tags left in this tree
  66. $result->indexInLastCommonParent = $myParents[$i - 1]->getIndexOf($myParents[$i]);
  67. $result->splittingNeeded = true;
  68. } else if ($nbMyParents <= $nbOtherParents) {
  69. $result->indexInLastCommonParent = $myParents[$i - 1]->getIndexOf($this);
  70. }
  71. return $result;
  72. }
  73. public function setParent($parent) {
  74. $this->parent = $parent;
  75. unset($this->parentTree);
  76. }
  77. public function inPre() {
  78. $tree = $this->getParentTree();
  79. foreach ($tree as &$ancestor) {
  80. if ($ancestor->isPre()) {
  81. return true;
  82. }
  83. }
  84. return false;
  85. }
  86. }
  87. /**
  88. * Node that can contain other nodes. Represents an HTML tag.
  89. * @ingroup DifferenceEngine
  90. */
  91. class TagNode extends Node {
  92. public $children = array();
  93. public $qName;
  94. public $attributes = array();
  95. public $openingTag;
  96. public $toDiffTag;
  97. public static $uncomparableAttributes = array('style');
  98. function __construct($parent, $qName, /*array*/ $attributes) {
  99. parent::__construct($parent);
  100. $this->qName = strtolower($qName);
  101. foreach($attributes as $key => &$value){
  102. if (!in_array(strtolower($key),self::$uncomparableAttributes)) {
  103. $toDiffAttributes[strtolower($key)] = $value;
  104. }
  105. $this->attributes[strtolower($key)] = $value;
  106. }
  107. return ($this->openingTag = Xml::openElement($this->qName, $this->attributes)) &&
  108. ($this->toDiffTag = Xml::openElement($this->qName, $toDiffAttributes));
  109. }
  110. public function addChildAbsolute(Node $node, $index) {
  111. array_splice($this->children, $index, 0, array(&$node));
  112. }
  113. public function getIndexOf(Node $child) {
  114. // don't trust array_search with objects
  115. foreach ($this->children as $key => &$value){
  116. if ($value === $child) {
  117. return $key;
  118. }
  119. }
  120. return null;
  121. }
  122. public function getNbChildren() {
  123. return count($this->children);
  124. }
  125. public function getMinimalDeletedSet($id, &$allDeleted, &$somethingDeleted) {
  126. $nodes = array();
  127. $allDeleted = false;
  128. $somethingDeleted = false;
  129. $hasNonDeletedDescendant = false;
  130. if (empty($this->children)) {
  131. return $nodes;
  132. }
  133. foreach ($this->children as &$child) {
  134. $allDeleted_local = false;
  135. $somethingDeleted_local = false;
  136. $childrenChildren = $child->getMinimalDeletedSet($id, $allDeleted_local, $somethingDeleted_local);
  137. if ($somethingDeleted_local) {
  138. $nodes = array_merge($nodes, $childrenChildren);
  139. $somethingDeleted = true;
  140. }
  141. if (!$allDeleted_local) {
  142. $hasNonDeletedDescendant = true;
  143. }
  144. }
  145. if (!$hasNonDeletedDescendant) {
  146. $nodes = array($this);
  147. $allDeleted = true;
  148. }
  149. return $nodes;
  150. }
  151. public function splitUntil(TagNode $parent, Node $split, $includeLeft) {
  152. $splitOccured = false;
  153. if ($parent !== $this) {
  154. $part1 = new TagNode(null, $this->qName, $this->attributes);
  155. $part2 = new TagNode(null, $this->qName, $this->attributes);
  156. $part1->setParent($this->parent);
  157. $part2->setParent($this->parent);
  158. $onSplit = false;
  159. $pastSplit = false;
  160. foreach ($this->children as &$child)
  161. {
  162. if ($child === $split) {
  163. $onSplit = true;
  164. }
  165. if((!$pastSplit && !$onSplit) || ($onSplit && $includeLeft)) {
  166. $child->setParent($part1);
  167. $part1->children[] = $child;
  168. } else {
  169. $child->setParent($part2);
  170. $part2->children[] = $child;
  171. }
  172. if ($onSplit) {
  173. $onSplit = false;
  174. $pastSplit = true;
  175. }
  176. }
  177. $myindexinparent = $this->parent->getIndexOf($this);
  178. $this->parent->removeChild($myindexinparent);
  179. if (!empty($part2->children)) {
  180. $this->parent->addChildAbsolute($part2, $myindexinparent);
  181. }
  182. if (!empty($part1->children)) {
  183. $this->parent->addChildAbsolute($part1, $myindexinparent);
  184. }
  185. if (!empty($part1->children) && !empty($part2->children)) {
  186. $splitOccured = true;
  187. }
  188. if ($includeLeft) {
  189. $this->parent->splitUntil($parent, $part1, $includeLeft);
  190. } else {
  191. $this->parent->splitUntil($parent, $part2, $includeLeft);
  192. }
  193. }
  194. return $splitOccured;
  195. }
  196. private function removeChild($index) {
  197. unset($this->children[$index]);
  198. $this->children = array_values($this->children);
  199. }
  200. public static $blocks = array('html', 'body','p','blockquote', 'h1',
  201. 'h2', 'h3', 'h4', 'h5', 'pre', 'div', 'ul', 'ol', 'li', 'table',
  202. 'tbody', 'tr', 'td', 'th', 'br');
  203. public function copyTree() {
  204. $newThis = new TagNode(null, $this->qName, $this->attributes);
  205. $newThis->whiteBefore = $this->whiteBefore;
  206. $newThis->whiteAfter = $this->whiteAfter;
  207. foreach ($this->children as &$child) {
  208. $newChild = $child->copyTree();
  209. $newChild->setParent($newThis);
  210. $newThis->children[] = $newChild;
  211. }
  212. return $newThis;
  213. }
  214. public function getMatchRatio(TagNode $other) {
  215. $txtComp = new TextOnlyComparator($other);
  216. return $txtComp->getMatchRatio(new TextOnlyComparator($this));
  217. }
  218. public function expandWhiteSpace() {
  219. $shift = 0;
  220. $spaceAdded = false;
  221. $nbOriginalChildren = $this->getNbChildren();
  222. for ($i = 0; $i < $nbOriginalChildren; ++$i) {
  223. $child = $this->children[$i + $shift];
  224. if ($child instanceof TagNode) {
  225. if (!$child->isPre()) {
  226. $child->expandWhiteSpace();
  227. }
  228. }
  229. if (!$spaceAdded && $child->whiteBefore) {
  230. $ws = new WhiteSpaceNode(null, ' ', $child->getLeftMostChild());
  231. $ws->setParent($this);
  232. $this->addChildAbsolute($ws,$i + ($shift++));
  233. }
  234. if ($child->whiteAfter) {
  235. $ws = new WhiteSpaceNode(null, ' ', $child->getRightMostChild());
  236. $ws->setParent($this);
  237. $this->addChildAbsolute($ws,$i + 1 + ($shift++));
  238. $spaceAdded = true;
  239. } else {
  240. $spaceAdded = false;
  241. }
  242. }
  243. }
  244. public function getLeftMostChild() {
  245. if (empty($this->children)) {
  246. return $this;
  247. }
  248. return $this->children[0]->getLeftMostChild();
  249. }
  250. public function getRightMostChild() {
  251. if (empty($this->children)) {
  252. return $this;
  253. }
  254. return $this->children[$this->getNbChildren() - 1]->getRightMostChild();
  255. }
  256. public function isPre() {
  257. return 0 == strcasecmp($this->qName,'pre');
  258. }
  259. public static function toDiffLine(TagNode $node) {
  260. return $node->toDiffTag;
  261. }
  262. }
  263. /**
  264. * Represents a piece of text in the HTML file.
  265. * @ingroup DifferenceEngine
  266. */
  267. class TextNode extends Node {
  268. public $text;
  269. public $modification;
  270. function __construct($parent, $text) {
  271. parent::__construct($parent);
  272. $this->modification = new Modification(Modification::NONE);
  273. $this->text = $text;
  274. }
  275. public function copyTree() {
  276. $clone = clone $this;
  277. $clone->setParent(null);
  278. return $clone;
  279. }
  280. public function getLeftMostChild() {
  281. return $this;
  282. }
  283. public function getRightMostChild() {
  284. return $this;
  285. }
  286. public function getMinimalDeletedSet($id, &$allDeleted, &$somethingDeleted) {
  287. if ($this->modification->type == Modification::REMOVED
  288. && $this->modification->id == $id){
  289. $somethingDeleted = true;
  290. $allDeleted = true;
  291. return array($this);
  292. }
  293. return array();
  294. }
  295. public function isSameText($other) {
  296. if (is_null($other) || ! $other instanceof TextNode) {
  297. return false;
  298. }
  299. return preg_replace('/[\n\r]/',' ',$this->text) === preg_replace('/[\n\r]/',' ',$html2);
  300. }
  301. public static function toDiffLine(TextNode $node) {
  302. return preg_replace('/[\n\r]/',' ',$node->text);
  303. }
  304. }
  305. /**
  306. * @todo Document
  307. * @ingroup DifferenceEngine
  308. */
  309. class WhiteSpaceNode extends TextNode {
  310. function __construct($parent, $s, Node $like = null) {
  311. parent::__construct($parent, $s);
  312. if(!is_null($like) && $like instanceof TextNode) {
  313. $newModification = clone $like->modification;
  314. $newModification->firstOfID = false;
  315. $this->modification = $newModification;
  316. }
  317. }
  318. }
  319. /**
  320. * Represents the root of a HTML document.
  321. * @ingroup DifferenceEngine
  322. */
  323. class BodyNode extends TagNode {
  324. function __construct() {
  325. parent::__construct(null, 'body', array());
  326. }
  327. public function copyTree() {
  328. $newThis = new BodyNode();
  329. foreach ($this->children as &$child) {
  330. $newChild = $child->copyTree();
  331. $newChild->setParent($newThis);
  332. $newThis->children[] = $newChild;
  333. }
  334. return $newThis;
  335. }
  336. public function getMinimalDeletedSet($id, &$allDeleted, &$somethingDeleted) {
  337. $nodes = array();
  338. foreach ($this->children as &$child) {
  339. $childrenChildren = $child->getMinimalDeletedSet($id,
  340. $allDeleted, $somethingDeleted);
  341. $nodes = array_merge($nodes, $childrenChildren);
  342. }
  343. return $nodes;
  344. }
  345. }
  346. /**
  347. * Represents a tag that represents a visible object, for example, an image
  348. * in HTML. Even though images do not contain any text they are independent
  349. * visible objects on the page. They are logically a TextNode.
  350. * @ingroup DifferenceEngine
  351. */
  352. class VisibleTagNode extends TextNode {
  353. public $qName;
  354. public $attributes;
  355. function __construct(TagNode $parent, $qName, /*array*/ $attrs) {
  356. if (strcasecmp($qName,'img') == 0 && !array_key_exists('src', $attrs)) {
  357. HTMLDiffer::diffDebug( "Image without a source\n" );
  358. }
  359. parent::__construct($parent,'');
  360. $this->qName = strtolower($qName);
  361. $this->attributes = $attrs;
  362. }
  363. public function isSameText($other) {
  364. if (is_null($other) || ! $other instanceof VisibleTextNode) {
  365. return false;
  366. }
  367. return $this->text === $other->text;
  368. }
  369. }
  370. /**
  371. * No-op node
  372. * @ingroup DifferenceEngine
  373. */
  374. class DummyNode extends Node {
  375. function __construct() {
  376. // no op
  377. }
  378. }