PageRenderTime 28ms CodeModel.GetById 21ms RepoModel.GetById 0ms app.codeStats 1ms

/core/lib/Drupal/Component/Diff/DiffEngine.php

https://bitbucket.org/aswinvk28/smartpan-stock-drupal
PHP | 1226 lines | 781 code | 136 blank | 309 comment | 177 complexity | fa17213b46bee437333493514b184416 MD5 | raw file
Possible License(s): LGPL-2.1
  1. <?php
  2. /**
  3. * @file
  4. * A PHP diff engine for phpwiki. (Taken from phpwiki-1.3.3)
  5. *
  6. * Copyright (C) 2000, 2001 Geoffrey T. Dairiki <dairiki@dairiki.org>
  7. * You may copy this code freely under the conditions of the GPL.
  8. */
  9. use Drupal\Component\Utility\Settings;
  10. use Drupal\Component\Utility\String;
  11. use Drupal\Component\Utility\Unicode;
  12. define('USE_ASSERTS', FALSE);
  13. /**
  14. * @todo document
  15. * @private
  16. * @subpackage DifferenceEngine
  17. */
  18. class _DiffOp {
  19. var $type;
  20. var $orig;
  21. var $closing;
  22. function reverse() {
  23. trigger_error('pure virtual', E_USER_ERROR);
  24. }
  25. function norig() {
  26. return $this->orig ? sizeof($this->orig) : 0;
  27. }
  28. function nclosing() {
  29. return $this->closing ? sizeof($this->closing) : 0;
  30. }
  31. }
  32. /**
  33. * @todo document
  34. * @private
  35. * @subpackage DifferenceEngine
  36. */
  37. class _DiffOp_Copy extends _DiffOp {
  38. var $type = 'copy';
  39. function _DiffOp_Copy($orig, $closing = FALSE) {
  40. if (!is_array($closing)) {
  41. $closing = $orig;
  42. }
  43. $this->orig = $orig;
  44. $this->closing = $closing;
  45. }
  46. function reverse() {
  47. return new _DiffOp_Copy($this->closing, $this->orig);
  48. }
  49. }
  50. /**
  51. * @todo document
  52. * @private
  53. * @subpackage DifferenceEngine
  54. */
  55. class _DiffOp_Delete extends _DiffOp {
  56. var $type = 'delete';
  57. function _DiffOp_Delete($lines) {
  58. $this->orig = $lines;
  59. $this->closing = FALSE;
  60. }
  61. function reverse() {
  62. return new _DiffOp_Add($this->orig);
  63. }
  64. }
  65. /**
  66. * @todo document
  67. * @private
  68. * @subpackage DifferenceEngine
  69. */
  70. class _DiffOp_Add extends _DiffOp {
  71. var $type = 'add';
  72. function _DiffOp_Add($lines) {
  73. $this->closing = $lines;
  74. $this->orig = FALSE;
  75. }
  76. function reverse() {
  77. return new _DiffOp_Delete($this->closing);
  78. }
  79. }
  80. /**
  81. * @todo document
  82. * @private
  83. * @subpackage DifferenceEngine
  84. */
  85. class _DiffOp_Change extends _DiffOp {
  86. var $type = 'change';
  87. function _DiffOp_Change($orig, $closing) {
  88. $this->orig = $orig;
  89. $this->closing = $closing;
  90. }
  91. function reverse() {
  92. return new _DiffOp_Change($this->closing, $this->orig);
  93. }
  94. }
  95. /**
  96. * Class used internally by Diff to actually compute the diffs.
  97. *
  98. * The algorithm used here is mostly lifted from the perl module
  99. * Algorithm::Diff (version 1.06) by Ned Konz, which is available at:
  100. * http://www.perl.com/CPAN/authors/id/N/NE/NEDKONZ/Algorithm-Diff-1.06.zip
  101. *
  102. * More ideas are taken from:
  103. * http://www.ics.uci.edu/~eppstein/161/960229.html
  104. *
  105. * Some ideas are (and a bit of code) are from from analyze.c, from GNU
  106. * diffutils-2.7, which can be found at:
  107. * ftp://gnudist.gnu.org/pub/gnu/diffutils/diffutils-2.7.tar.gz
  108. *
  109. * closingly, some ideas (subdivision by NCHUNKS > 2, and some optimizations)
  110. * are my own.
  111. *
  112. * Line length limits for robustness added by Tim Starling, 2005-08-31
  113. *
  114. * @author Geoffrey T. Dairiki, Tim Starling
  115. * @private
  116. * @subpackage DifferenceEngine
  117. */
  118. class _DiffEngine {
  119. function MAX_XREF_LENGTH() {
  120. return 10000;
  121. }
  122. function diff($from_lines, $to_lines) {
  123. $n_from = sizeof($from_lines);
  124. $n_to = sizeof($to_lines);
  125. $this->xchanged = $this->ychanged = array();
  126. $this->xv = $this->yv = array();
  127. $this->xind = $this->yind = array();
  128. unset($this->seq);
  129. unset($this->in_seq);
  130. unset($this->lcs);
  131. // Skip leading common lines.
  132. for ($skip = 0; $skip < $n_from && $skip < $n_to; $skip++) {
  133. if ($from_lines[$skip] !== $to_lines[$skip]) {
  134. break;
  135. }
  136. $this->xchanged[$skip] = $this->ychanged[$skip] = FALSE;
  137. }
  138. // Skip trailing common lines.
  139. $xi = $n_from;
  140. $yi = $n_to;
  141. for ($endskip = 0; --$xi > $skip && --$yi > $skip; $endskip++) {
  142. if ($from_lines[$xi] !== $to_lines[$yi]) {
  143. break;
  144. }
  145. $this->xchanged[$xi] = $this->ychanged[$yi] = FALSE;
  146. }
  147. // Ignore lines which do not exist in both files.
  148. for ($xi = $skip; $xi < $n_from - $endskip; $xi++) {
  149. $xhash[$this->_line_hash($from_lines[$xi])] = 1;
  150. }
  151. for ($yi = $skip; $yi < $n_to - $endskip; $yi++) {
  152. $line = $to_lines[$yi];
  153. if ($this->ychanged[$yi] = empty($xhash[$this->_line_hash($line)])) {
  154. continue;
  155. }
  156. $yhash[$this->_line_hash($line)] = 1;
  157. $this->yv[] = $line;
  158. $this->yind[] = $yi;
  159. }
  160. for ($xi = $skip; $xi < $n_from - $endskip; $xi++) {
  161. $line = $from_lines[$xi];
  162. if ($this->xchanged[$xi] = empty($yhash[$this->_line_hash($line)])) {
  163. continue;
  164. }
  165. $this->xv[] = $line;
  166. $this->xind[] = $xi;
  167. }
  168. // Find the LCS.
  169. $this->_compareseq(0, sizeof($this->xv), 0, sizeof($this->yv));
  170. // Merge edits when possible
  171. $this->_shift_boundaries($from_lines, $this->xchanged, $this->ychanged);
  172. $this->_shift_boundaries($to_lines, $this->ychanged, $this->xchanged);
  173. // Compute the edit operations.
  174. $edits = array();
  175. $xi = $yi = 0;
  176. while ($xi < $n_from || $yi < $n_to) {
  177. USE_ASSERTS && assert($yi < $n_to || $this->xchanged[$xi]);
  178. USE_ASSERTS && assert($xi < $n_from || $this->ychanged[$yi]);
  179. // Skip matching "snake".
  180. $copy = array();
  181. while ( $xi < $n_from && $yi < $n_to && !$this->xchanged[$xi] && !$this->ychanged[$yi]) {
  182. $copy[] = $from_lines[$xi++];
  183. ++$yi;
  184. }
  185. if ($copy) {
  186. $edits[] = new _DiffOp_Copy($copy);
  187. }
  188. // Find deletes & adds.
  189. $delete = array();
  190. while ($xi < $n_from && $this->xchanged[$xi]) {
  191. $delete[] = $from_lines[$xi++];
  192. }
  193. $add = array();
  194. while ($yi < $n_to && $this->ychanged[$yi]) {
  195. $add[] = $to_lines[$yi++];
  196. }
  197. if ($delete && $add) {
  198. $edits[] = new _DiffOp_Change($delete, $add);
  199. }
  200. elseif ($delete) {
  201. $edits[] = new _DiffOp_Delete($delete);
  202. }
  203. elseif ($add) {
  204. $edits[] = new _DiffOp_Add($add);
  205. }
  206. }
  207. return $edits;
  208. }
  209. /**
  210. * Returns the whole line if it's small enough, or the MD5 hash otherwise.
  211. */
  212. function _line_hash($line) {
  213. if (Unicode::strlen($line) > $this->MAX_XREF_LENGTH()) {
  214. return md5($line);
  215. }
  216. else {
  217. return $line;
  218. }
  219. }
  220. /**
  221. * Divide the Largest Common Subsequence (LCS) of the sequences
  222. * [XOFF, XLIM) and [YOFF, YLIM) into NCHUNKS approximately equally
  223. * sized segments.
  224. *
  225. * Returns (LCS, PTS). LCS is the length of the LCS. PTS is an
  226. * array of NCHUNKS+1 (X, Y) indexes giving the diving points between
  227. * sub sequences. The first sub-sequence is contained in [X0, X1),
  228. * [Y0, Y1), the second in [X1, X2), [Y1, Y2) and so on. Note
  229. * that (X0, Y0) == (XOFF, YOFF) and
  230. * (X[NCHUNKS], Y[NCHUNKS]) == (XLIM, YLIM).
  231. *
  232. * This function assumes that the first lines of the specified portions
  233. * of the two files do not match, and likewise that the last lines do not
  234. * match. The caller must trim matching lines from the beginning and end
  235. * of the portions it is going to specify.
  236. */
  237. function _diag($xoff, $xlim, $yoff, $ylim, $nchunks) {
  238. $flip = FALSE;
  239. if ($xlim - $xoff > $ylim - $yoff) {
  240. // Things seems faster (I'm not sure I understand why)
  241. // when the shortest sequence in X.
  242. $flip = TRUE;
  243. list($xoff, $xlim, $yoff, $ylim) = array($yoff, $ylim, $xoff, $xlim);
  244. }
  245. if ($flip) {
  246. for ($i = $ylim - 1; $i >= $yoff; $i--) {
  247. $ymatches[$this->xv[$i]][] = $i;
  248. }
  249. }
  250. else {
  251. for ($i = $ylim - 1; $i >= $yoff; $i--) {
  252. $ymatches[$this->yv[$i]][] = $i;
  253. }
  254. }
  255. $this->lcs = 0;
  256. $this->seq[0]= $yoff - 1;
  257. $this->in_seq = array();
  258. $ymids[0] = array();
  259. $numer = $xlim - $xoff + $nchunks - 1;
  260. $x = $xoff;
  261. for ($chunk = 0; $chunk < $nchunks; $chunk++) {
  262. if ($chunk > 0) {
  263. for ($i = 0; $i <= $this->lcs; $i++) {
  264. $ymids[$i][$chunk-1] = $this->seq[$i];
  265. }
  266. }
  267. $x1 = $xoff + (int)(($numer + ($xlim-$xoff)*$chunk) / $nchunks);
  268. for ( ; $x < $x1; $x++) {
  269. $line = $flip ? $this->yv[$x] : $this->xv[$x];
  270. if (empty($ymatches[$line])) {
  271. continue;
  272. }
  273. $matches = $ymatches[$line];
  274. reset($matches);
  275. while (list ($junk, $y) = each($matches)) {
  276. if (empty($this->in_seq[$y])) {
  277. $k = $this->_lcs_pos($y);
  278. USE_ASSERTS && assert($k > 0);
  279. $ymids[$k] = $ymids[$k-1];
  280. break;
  281. }
  282. }
  283. while (list ($junk, $y) = each($matches)) {
  284. if ($y > $this->seq[$k-1]) {
  285. USE_ASSERTS && assert($y < $this->seq[$k]);
  286. // Optimization: this is a common case:
  287. // next match is just replacing previous match.
  288. $this->in_seq[$this->seq[$k]] = FALSE;
  289. $this->seq[$k] = $y;
  290. $this->in_seq[$y] = 1;
  291. }
  292. elseif (empty($this->in_seq[$y])) {
  293. $k = $this->_lcs_pos($y);
  294. USE_ASSERTS && assert($k > 0);
  295. $ymids[$k] = $ymids[$k-1];
  296. }
  297. }
  298. }
  299. }
  300. $seps[] = $flip ? array($yoff, $xoff) : array($xoff, $yoff);
  301. $ymid = $ymids[$this->lcs];
  302. for ($n = 0; $n < $nchunks - 1; $n++) {
  303. $x1 = $xoff + (int)(($numer + ($xlim - $xoff) * $n) / $nchunks);
  304. $y1 = $ymid[$n] + 1;
  305. $seps[] = $flip ? array($y1, $x1) : array($x1, $y1);
  306. }
  307. $seps[] = $flip ? array($ylim, $xlim) : array($xlim, $ylim);
  308. return array($this->lcs, $seps);
  309. }
  310. function _lcs_pos($ypos) {
  311. $end = $this->lcs;
  312. if ($end == 0 || $ypos > $this->seq[$end]) {
  313. $this->seq[++$this->lcs] = $ypos;
  314. $this->in_seq[$ypos] = 1;
  315. return $this->lcs;
  316. }
  317. $beg = 1;
  318. while ($beg < $end) {
  319. $mid = (int)(($beg + $end) / 2);
  320. if ($ypos > $this->seq[$mid]) {
  321. $beg = $mid + 1;
  322. }
  323. else {
  324. $end = $mid;
  325. }
  326. }
  327. USE_ASSERTS && assert($ypos != $this->seq[$end]);
  328. $this->in_seq[$this->seq[$end]] = FALSE;
  329. $this->seq[$end] = $ypos;
  330. $this->in_seq[$ypos] = 1;
  331. return $end;
  332. }
  333. /**
  334. * Find LCS of two sequences.
  335. *
  336. * The results are recorded in the vectors $this->{x,y}changed[], by
  337. * storing a 1 in the element for each line that is an insertion
  338. * or deletion (ie. is not in the LCS).
  339. *
  340. * The subsequence of file 0 is [XOFF, XLIM) and likewise for file 1.
  341. *
  342. * Note that XLIM, YLIM are exclusive bounds.
  343. * All line numbers are origin-0 and discarded lines are not counted.
  344. */
  345. function _compareseq($xoff, $xlim, $yoff, $ylim) {
  346. // Slide down the bottom initial diagonal.
  347. while ($xoff < $xlim && $yoff < $ylim && $this->xv[$xoff] == $this->yv[$yoff]) {
  348. ++$xoff;
  349. ++$yoff;
  350. }
  351. // Slide up the top initial diagonal.
  352. while ($xlim > $xoff && $ylim > $yoff && $this->xv[$xlim - 1] == $this->yv[$ylim - 1]) {
  353. --$xlim;
  354. --$ylim;
  355. }
  356. if ($xoff == $xlim || $yoff == $ylim) {
  357. $lcs = 0;
  358. }
  359. else {
  360. // This is ad hoc but seems to work well.
  361. //$nchunks = sqrt(min($xlim - $xoff, $ylim - $yoff) / 2.5);
  362. //$nchunks = max(2, min(8, (int)$nchunks));
  363. $nchunks = min(7, $xlim - $xoff, $ylim - $yoff) + 1;
  364. list($lcs, $seps)
  365. = $this->_diag($xoff, $xlim, $yoff, $ylim, $nchunks);
  366. }
  367. if ($lcs == 0) {
  368. // X and Y sequences have no common subsequence:
  369. // mark all changed.
  370. while ($yoff < $ylim) {
  371. $this->ychanged[$this->yind[$yoff++]] = 1;
  372. }
  373. while ($xoff < $xlim) {
  374. $this->xchanged[$this->xind[$xoff++]] = 1;
  375. }
  376. }
  377. else {
  378. // Use the partitions to split this problem into subproblems.
  379. reset($seps);
  380. $pt1 = $seps[0];
  381. while ($pt2 = next($seps)) {
  382. $this->_compareseq ($pt1[0], $pt2[0], $pt1[1], $pt2[1]);
  383. $pt1 = $pt2;
  384. }
  385. }
  386. }
  387. /**
  388. * Adjust inserts/deletes of identical lines to join changes
  389. * as much as possible.
  390. *
  391. * We do something when a run of changed lines include a
  392. * line at one end and has an excluded, identical line at the other.
  393. * We are free to choose which identical line is included.
  394. * `compareseq' usually chooses the one at the beginning,
  395. * but usually it is cleaner to consider the following identical line
  396. * to be the "change".
  397. *
  398. * This is extracted verbatim from analyze.c (GNU diffutils-2.7).
  399. */
  400. function _shift_boundaries($lines, &$changed, $other_changed) {
  401. $i = 0;
  402. $j = 0;
  403. USE_ASSERTS && assert('sizeof($lines) == sizeof($changed)');
  404. $len = sizeof($lines);
  405. $other_len = sizeof($other_changed);
  406. while (1) {
  407. /*
  408. * Scan forwards to find beginning of another run of changes.
  409. * Also keep track of the corresponding point in the other file.
  410. *
  411. * Throughout this code, $i and $j are adjusted together so that
  412. * the first $i elements of $changed and the first $j elements
  413. * of $other_changed both contain the same number of zeros
  414. * (unchanged lines).
  415. * Furthermore, $j is always kept so that $j == $other_len or
  416. * $other_changed[$j] == FALSE.
  417. */
  418. while ($j < $other_len && $other_changed[$j]) {
  419. $j++;
  420. }
  421. while ($i < $len && ! $changed[$i]) {
  422. USE_ASSERTS && assert('$j < $other_len && ! $other_changed[$j]');
  423. $i++;
  424. $j++;
  425. while ($j < $other_len && $other_changed[$j]) {
  426. $j++;
  427. }
  428. }
  429. if ($i == $len) {
  430. break;
  431. }
  432. $start = $i;
  433. // Find the end of this run of changes.
  434. while (++$i < $len && $changed[$i]) {
  435. continue;
  436. }
  437. do {
  438. /*
  439. * Record the length of this run of changes, so that
  440. * we can later determine whether the run has grown.
  441. */
  442. $runlength = $i - $start;
  443. /*
  444. * Move the changed region back, so long as the
  445. * previous unchanged line matches the last changed one.
  446. * This merges with previous changed regions.
  447. */
  448. while ($start > 0 && $lines[$start - 1] == $lines[$i - 1]) {
  449. $changed[--$start] = 1;
  450. $changed[--$i] = FALSE;
  451. while ($start > 0 && $changed[$start - 1]) {
  452. $start--;
  453. }
  454. USE_ASSERTS && assert('$j > 0');
  455. while ($other_changed[--$j]) {
  456. continue;
  457. }
  458. USE_ASSERTS && assert('$j >= 0 && !$other_changed[$j]');
  459. }
  460. /*
  461. * Set CORRESPONDING to the end of the changed run, at the last
  462. * point where it corresponds to a changed run in the other file.
  463. * CORRESPONDING == LEN means no such point has been found.
  464. */
  465. $corresponding = $j < $other_len ? $i : $len;
  466. /*
  467. * Move the changed region forward, so long as the
  468. * first changed line matches the following unchanged one.
  469. * This merges with following changed regions.
  470. * Do this second, so that if there are no merges,
  471. * the changed region is moved forward as far as possible.
  472. */
  473. while ($i < $len && $lines[$start] == $lines[$i]) {
  474. $changed[$start++] = FALSE;
  475. $changed[$i++] = 1;
  476. while ($i < $len && $changed[$i]) {
  477. $i++;
  478. }
  479. USE_ASSERTS && assert('$j < $other_len && ! $other_changed[$j]');
  480. $j++;
  481. if ($j < $other_len && $other_changed[$j]) {
  482. $corresponding = $i;
  483. while ($j < $other_len && $other_changed[$j]) {
  484. $j++;
  485. }
  486. }
  487. }
  488. } while ($runlength != $i - $start);
  489. /*
  490. * If possible, move the fully-merged run of changes
  491. * back to a corresponding run in the other file.
  492. */
  493. while ($corresponding < $i) {
  494. $changed[--$start] = 1;
  495. $changed[--$i] = 0;
  496. USE_ASSERTS && assert('$j > 0');
  497. while ($other_changed[--$j]) {
  498. continue;
  499. }
  500. USE_ASSERTS && assert('$j >= 0 && !$other_changed[$j]');
  501. }
  502. }
  503. }
  504. }
  505. /**
  506. * Class representing a 'diff' between two sequences of strings.
  507. * @todo document
  508. * @private
  509. * @subpackage DifferenceEngine
  510. */
  511. class Diff {
  512. var $edits;
  513. /**
  514. * Constructor.
  515. * Computes diff between sequences of strings.
  516. *
  517. * @param $from_lines array An array of strings.
  518. * (Typically these are lines from a file.)
  519. * @param $to_lines array An array of strings.
  520. */
  521. function Diff($from_lines, $to_lines) {
  522. $eng = new _DiffEngine;
  523. $this->edits = $eng->diff($from_lines, $to_lines);
  524. //$this->_check($from_lines, $to_lines);
  525. }
  526. /**
  527. * Compute reversed Diff.
  528. *
  529. * SYNOPSIS:
  530. *
  531. * $diff = new Diff($lines1, $lines2);
  532. * $rev = $diff->reverse();
  533. * @return object A Diff object representing the inverse of the
  534. * original diff.
  535. */
  536. function reverse() {
  537. $rev = $this;
  538. $rev->edits = array();
  539. foreach ($this->edits as $edit) {
  540. $rev->edits[] = $edit->reverse();
  541. }
  542. return $rev;
  543. }
  544. /**
  545. * Check for empty diff.
  546. *
  547. * @return bool True iff two sequences were identical.
  548. */
  549. function isEmpty() {
  550. foreach ($this->edits as $edit) {
  551. if ($edit->type != 'copy') {
  552. return FALSE;
  553. }
  554. }
  555. return TRUE;
  556. }
  557. /**
  558. * Compute the length of the Longest Common Subsequence (LCS).
  559. *
  560. * This is mostly for diagnostic purposed.
  561. *
  562. * @return int The length of the LCS.
  563. */
  564. function lcs() {
  565. $lcs = 0;
  566. foreach ($this->edits as $edit) {
  567. if ($edit->type == 'copy') {
  568. $lcs += sizeof($edit->orig);
  569. }
  570. }
  571. return $lcs;
  572. }
  573. /**
  574. * Get the original set of lines.
  575. *
  576. * This reconstructs the $from_lines parameter passed to the
  577. * constructor.
  578. *
  579. * @return array The original sequence of strings.
  580. */
  581. function orig() {
  582. $lines = array();
  583. foreach ($this->edits as $edit) {
  584. if ($edit->orig) {
  585. array_splice($lines, sizeof($lines), 0, $edit->orig);
  586. }
  587. }
  588. return $lines;
  589. }
  590. /**
  591. * Get the closing set of lines.
  592. *
  593. * This reconstructs the $to_lines parameter passed to the
  594. * constructor.
  595. *
  596. * @return array The sequence of strings.
  597. */
  598. function closing() {
  599. $lines = array();
  600. foreach ($this->edits as $edit) {
  601. if ($edit->closing) {
  602. array_splice($lines, sizeof($lines), 0, $edit->closing);
  603. }
  604. }
  605. return $lines;
  606. }
  607. /**
  608. * Check a Diff for validity.
  609. *
  610. * This is here only for debugging purposes.
  611. */
  612. function _check($from_lines, $to_lines) {
  613. if (serialize($from_lines) != serialize($this->orig())) {
  614. trigger_error("Reconstructed original doesn't match", E_USER_ERROR);
  615. }
  616. if (serialize($to_lines) != serialize($this->closing())) {
  617. trigger_error("Reconstructed closing doesn't match", E_USER_ERROR);
  618. }
  619. $rev = $this->reverse();
  620. if (serialize($to_lines) != serialize($rev->orig())) {
  621. trigger_error("Reversed original doesn't match", E_USER_ERROR);
  622. }
  623. if (serialize($from_lines) != serialize($rev->closing())) {
  624. trigger_error("Reversed closing doesn't match", E_USER_ERROR);
  625. }
  626. $prevtype = 'none';
  627. foreach ($this->edits as $edit) {
  628. if ( $prevtype == $edit->type ) {
  629. trigger_error("Edit sequence is non-optimal", E_USER_ERROR);
  630. }
  631. $prevtype = $edit->type;
  632. }
  633. $lcs = $this->lcs();
  634. trigger_error('Diff okay: LCS = ' . $lcs, E_USER_NOTICE);
  635. }
  636. }
  637. /**
  638. * FIXME: bad name.
  639. * @todo document
  640. * @private
  641. * @subpackage DifferenceEngine
  642. */
  643. class MappedDiff extends Diff {
  644. /**
  645. * Constructor.
  646. *
  647. * Computes diff between sequences of strings.
  648. *
  649. * This can be used to compute things like
  650. * case-insensitve diffs, or diffs which ignore
  651. * changes in white-space.
  652. *
  653. * @param $from_lines array An array of strings.
  654. * (Typically these are lines from a file.)
  655. *
  656. * @param $to_lines array An array of strings.
  657. *
  658. * @param $mapped_from_lines array This array should
  659. * have the same size number of elements as $from_lines.
  660. * The elements in $mapped_from_lines and
  661. * $mapped_to_lines are what is actually compared
  662. * when computing the diff.
  663. *
  664. * @param $mapped_to_lines array This array should
  665. * have the same number of elements as $to_lines.
  666. */
  667. function MappedDiff($from_lines, $to_lines, $mapped_from_lines, $mapped_to_lines) {
  668. assert(sizeof($from_lines) == sizeof($mapped_from_lines));
  669. assert(sizeof($to_lines) == sizeof($mapped_to_lines));
  670. $this->Diff($mapped_from_lines, $mapped_to_lines);
  671. $xi = $yi = 0;
  672. for ($i = 0; $i < sizeof($this->edits); $i++) {
  673. $orig = &$this->edits[$i]->orig;
  674. if (is_array($orig)) {
  675. $orig = array_slice($from_lines, $xi, sizeof($orig));
  676. $xi += sizeof($orig);
  677. }
  678. $closing = &$this->edits[$i]->closing;
  679. if (is_array($closing)) {
  680. $closing = array_slice($to_lines, $yi, sizeof($closing));
  681. $yi += sizeof($closing);
  682. }
  683. }
  684. }
  685. }
  686. /**
  687. * A class to format Diffs
  688. *
  689. * This class formats the diff in classic diff format.
  690. * It is intended that this class be customized via inheritance,
  691. * to obtain fancier outputs.
  692. * @todo document
  693. * @private
  694. * @subpackage DifferenceEngine
  695. */
  696. class DiffFormatter {
  697. /**
  698. * Should a block header be shown?
  699. */
  700. var $show_header = TRUE;
  701. /**
  702. * Number of leading context "lines" to preserve.
  703. *
  704. * This should be left at zero for this class, but subclasses
  705. * may want to set this to other values.
  706. */
  707. var $leading_context_lines = 0;
  708. /**
  709. * Number of trailing context "lines" to preserve.
  710. *
  711. * This should be left at zero for this class, but subclasses
  712. * may want to set this to other values.
  713. */
  714. var $trailing_context_lines = 0;
  715. /**
  716. * Format a diff.
  717. *
  718. * @param $diff object A Diff object.
  719. * @return string The formatted output.
  720. */
  721. function format($diff) {
  722. $xi = $yi = 1;
  723. $block = FALSE;
  724. $context = array();
  725. $nlead = $this->leading_context_lines;
  726. $ntrail = $this->trailing_context_lines;
  727. $this->_start_diff();
  728. foreach ($diff->edits as $edit) {
  729. if ($edit->type == 'copy') {
  730. if (is_array($block)) {
  731. if (sizeof($edit->orig) <= $nlead + $ntrail) {
  732. $block[] = $edit;
  733. }
  734. else {
  735. if ($ntrail) {
  736. $context = array_slice($edit->orig, 0, $ntrail);
  737. $block[] = new _DiffOp_Copy($context);
  738. }
  739. $this->_block($x0, $ntrail + $xi - $x0, $y0, $ntrail + $yi - $y0, $block);
  740. $block = FALSE;
  741. }
  742. }
  743. $context = $edit->orig;
  744. }
  745. else {
  746. if (! is_array($block)) {
  747. $context = array_slice($context, sizeof($context) - $nlead);
  748. $x0 = $xi - sizeof($context);
  749. $y0 = $yi - sizeof($context);
  750. $block = array();
  751. if ($context) {
  752. $block[] = new _DiffOp_Copy($context);
  753. }
  754. }
  755. $block[] = $edit;
  756. }
  757. if ($edit->orig) {
  758. $xi += sizeof($edit->orig);
  759. }
  760. if ($edit->closing) {
  761. $yi += sizeof($edit->closing);
  762. }
  763. }
  764. if (is_array($block)) {
  765. $this->_block($x0, $xi - $x0, $y0, $yi - $y0, $block);
  766. }
  767. $end = $this->_end_diff();
  768. if (!empty($xi)) {
  769. $this->line_stats['counter']['x'] += $xi;
  770. }
  771. if (!empty($yi)) {
  772. $this->line_stats['counter']['y'] += $yi;
  773. }
  774. return $end;
  775. }
  776. function _block($xbeg, $xlen, $ybeg, $ylen, &$edits) {
  777. $this->_start_block($this->_block_header($xbeg, $xlen, $ybeg, $ylen));
  778. foreach ($edits as $edit) {
  779. if ($edit->type == 'copy') {
  780. $this->_context($edit->orig);
  781. }
  782. elseif ($edit->type == 'add') {
  783. $this->_added($edit->closing);
  784. }
  785. elseif ($edit->type == 'delete') {
  786. $this->_deleted($edit->orig);
  787. }
  788. elseif ($edit->type == 'change') {
  789. $this->_changed($edit->orig, $edit->closing);
  790. }
  791. else {
  792. trigger_error('Unknown edit type', E_USER_ERROR);
  793. }
  794. }
  795. $this->_end_block();
  796. }
  797. function _start_diff() {
  798. ob_start();
  799. }
  800. function _end_diff() {
  801. $val = ob_get_contents();
  802. ob_end_clean();
  803. return $val;
  804. }
  805. function _block_header($xbeg, $xlen, $ybeg, $ylen) {
  806. if ($xlen > 1) {
  807. $xbeg .= "," . ($xbeg + $xlen - 1);
  808. }
  809. if ($ylen > 1) {
  810. $ybeg .= "," . ($ybeg + $ylen - 1);
  811. }
  812. return $xbeg . ($xlen ? ($ylen ? 'c' : 'd') : 'a') . $ybeg;
  813. }
  814. function _start_block($header) {
  815. if ($this->show_header) {
  816. echo $header . "\n";
  817. }
  818. }
  819. function _end_block() {
  820. }
  821. function _lines($lines, $prefix = ' ') {
  822. foreach ($lines as $line) {
  823. echo "$prefix $line\n";
  824. }
  825. }
  826. function _context($lines) {
  827. $this->_lines($lines);
  828. }
  829. function _added($lines) {
  830. $this->_lines($lines, '>');
  831. }
  832. function _deleted($lines) {
  833. $this->_lines($lines, '<');
  834. }
  835. function _changed($orig, $closing) {
  836. $this->_deleted($orig);
  837. echo "---\n";
  838. $this->_added($closing);
  839. }
  840. }
  841. /**
  842. * Additions by Axel Boldt follow, partly taken from diff.php, phpwiki-1.3.3
  843. *
  844. */
  845. define('NBSP', '&#160;'); // iso-8859-x non-breaking space.
  846. /**
  847. * @todo document
  848. * @private
  849. * @subpackage DifferenceEngine
  850. */
  851. class _HWLDF_WordAccumulator {
  852. function _HWLDF_WordAccumulator() {
  853. $this->_lines = array();
  854. $this->_line = '';
  855. $this->_group = '';
  856. $this->_tag = '';
  857. }
  858. function _flushGroup($new_tag) {
  859. if ($this->_group !== '') {
  860. if ($this->_tag == 'mark') {
  861. $this->_line .= '<span class="diffchange">' . String::checkPlain($this->_group) . '</span>';
  862. }
  863. else {
  864. $this->_line .= String::checkPlain($this->_group);
  865. }
  866. }
  867. $this->_group = '';
  868. $this->_tag = $new_tag;
  869. }
  870. function _flushLine($new_tag) {
  871. $this->_flushGroup($new_tag);
  872. if ($this->_line != '') {
  873. array_push($this->_lines, $this->_line);
  874. }
  875. else {
  876. // make empty lines visible by inserting an NBSP
  877. array_push($this->_lines, NBSP);
  878. }
  879. $this->_line = '';
  880. }
  881. function addWords($words, $tag = '') {
  882. if ($tag != $this->_tag) {
  883. $this->_flushGroup($tag);
  884. }
  885. foreach ($words as $word) {
  886. // new-line should only come as first char of word.
  887. if ($word == '') {
  888. continue;
  889. }
  890. if ($word[0] == "\n") {
  891. $this->_flushLine($tag);
  892. $word = Unicode::substr($word, 1);
  893. }
  894. assert(!strstr($word, "\n"));
  895. $this->_group .= $word;
  896. }
  897. }
  898. function getLines() {
  899. $this->_flushLine('~done');
  900. return $this->_lines;
  901. }
  902. }
  903. /**
  904. * @todo document
  905. * @private
  906. * @subpackage DifferenceEngine
  907. */
  908. class WordLevelDiff extends MappedDiff {
  909. function MAX_LINE_LENGTH() {
  910. return 10000;
  911. }
  912. function WordLevelDiff($orig_lines, $closing_lines) {
  913. list($orig_words, $orig_stripped) = $this->_split($orig_lines);
  914. list($closing_words, $closing_stripped) = $this->_split($closing_lines);
  915. $this->MappedDiff($orig_words, $closing_words, $orig_stripped, $closing_stripped);
  916. }
  917. function _split($lines) {
  918. $words = array();
  919. $stripped = array();
  920. $first = TRUE;
  921. foreach ($lines as $line) {
  922. // If the line is too long, just pretend the entire line is one big word
  923. // This prevents resource exhaustion problems
  924. if ( $first ) {
  925. $first = FALSE;
  926. }
  927. else {
  928. $words[] = "\n";
  929. $stripped[] = "\n";
  930. }
  931. if ( Unicode::strlen( $line ) > $this->MAX_LINE_LENGTH() ) {
  932. $words[] = $line;
  933. $stripped[] = $line;
  934. }
  935. else {
  936. if (preg_match_all('/ ( [^\S\n]+ | [0-9_A-Za-z\x80-\xff]+ | . ) (?: (?!< \n) [^\S\n])? /xs', $line, $m)) {
  937. $words = array_merge($words, $m[0]);
  938. $stripped = array_merge($stripped, $m[1]);
  939. }
  940. }
  941. }
  942. return array($words, $stripped);
  943. }
  944. function orig() {
  945. $orig = new _HWLDF_WordAccumulator;
  946. foreach ($this->edits as $edit) {
  947. if ($edit->type == 'copy') {
  948. $orig->addWords($edit->orig);
  949. }
  950. elseif ($edit->orig) {
  951. $orig->addWords($edit->orig, 'mark');
  952. }
  953. }
  954. $lines = $orig->getLines();
  955. return $lines;
  956. }
  957. function closing() {
  958. $closing = new _HWLDF_WordAccumulator;
  959. foreach ($this->edits as $edit) {
  960. if ($edit->type == 'copy') {
  961. $closing->addWords($edit->closing);
  962. }
  963. elseif ($edit->closing) {
  964. $closing->addWords($edit->closing, 'mark');
  965. }
  966. }
  967. $lines = $closing->getLines();
  968. return $lines;
  969. }
  970. }
  971. /**
  972. * Diff formatter which uses Drupal theme functions.
  973. * @private
  974. * @subpackage DifferenceEngine
  975. */
  976. class DrupalDiffFormatter extends DiffFormatter {
  977. var $rows;
  978. var $line_stats = array(
  979. 'counter' => array('x' => 0, 'y' => 0),
  980. 'offset' => array('x' => 0, 'y' => 0),
  981. );
  982. function DrupalDiffFormatter() {
  983. $this->leading_context_lines = Settings::getSingleton()->get('diff_context_lines_leading', 2);
  984. $this->trailing_context_lines = Settings::getSingleton()->get('diff_context_lines_trailing', 2);
  985. }
  986. function _start_diff() {
  987. $this->rows = array();
  988. }
  989. function _end_diff() {
  990. return $this->rows;
  991. }
  992. function _block_header($xbeg, $xlen, $ybeg, $ylen) {
  993. return array(
  994. array(
  995. 'data' => $xbeg + $this->line_stats['offset']['x'],
  996. 'colspan' => 2,
  997. ),
  998. array(
  999. 'data' => $ybeg + $this->line_stats['offset']['y'],
  1000. 'colspan' => 2,
  1001. )
  1002. );
  1003. }
  1004. function _start_block($header) {
  1005. if ($this->show_header) {
  1006. $this->rows[] = $header;
  1007. }
  1008. }
  1009. function _end_block() {
  1010. }
  1011. function _lines($lines, $prefix=' ', $color='white') {
  1012. }
  1013. /**
  1014. * Note: you should HTML-escape parameter before calling this.
  1015. */
  1016. function addedLine($line) {
  1017. return array(
  1018. array(
  1019. 'data' => '+',
  1020. 'class' => 'diff-marker',
  1021. ),
  1022. array(
  1023. 'data' => $line,
  1024. 'class' => 'diff-context diff-addedline',
  1025. )
  1026. );
  1027. }
  1028. /**
  1029. * Note: you should HTML-escape parameter before calling this.
  1030. */
  1031. function deletedLine($line) {
  1032. return array(
  1033. array(
  1034. 'data' => '-',
  1035. 'class' => 'diff-marker',
  1036. ),
  1037. array(
  1038. 'data' => $line,
  1039. 'class' => 'diff-context diff-deletedline',
  1040. )
  1041. );
  1042. }
  1043. /**
  1044. * Note: you should HTML-escape parameter before calling this.
  1045. */
  1046. function contextLine($line) {
  1047. return array(
  1048. '&nbsp;',
  1049. array(
  1050. 'data' => $line,
  1051. 'class' => 'diff-context',
  1052. )
  1053. );
  1054. }
  1055. function emptyLine() {
  1056. return array(
  1057. '&nbsp;',
  1058. '&nbsp;',
  1059. );
  1060. }
  1061. function _added($lines) {
  1062. foreach ($lines as $line) {
  1063. $this->rows[] = array_merge($this->emptyLine(), $this->addedLine(String::checkPlain($line)));
  1064. }
  1065. }
  1066. function _deleted($lines) {
  1067. foreach ($lines as $line) {
  1068. $this->rows[] = array_merge($this->deletedLine(String::checkPlain($line)), $this->emptyLine());
  1069. }
  1070. }
  1071. function _context($lines) {
  1072. foreach ($lines as $line) {
  1073. $this->rows[] = array_merge($this->contextLine(String::checkPlain($line)), $this->contextLine(String::checkPlain($line)));
  1074. }
  1075. }
  1076. function _changed($orig, $closing) {
  1077. $diff = new WordLevelDiff($orig, $closing);
  1078. $del = $diff->orig();
  1079. $add = $diff->closing();
  1080. // Notice that WordLevelDiff returns HTML-escaped output.
  1081. // Hence, we will be calling addedLine/deletedLine without HTML-escaping.
  1082. while ($line = array_shift($del)) {
  1083. $aline = array_shift( $add );
  1084. $this->rows[] = array_merge($this->deletedLine($line), isset($aline) ? $this->addedLine($aline) : $this->emptyLine());
  1085. }
  1086. foreach ($add as $line) { // If any leftovers
  1087. $this->rows[] = array_merge($this->emptyLine(), $this->addedLine($line));
  1088. }
  1089. }
  1090. }