/application/third_party/Diff.php

https://bitbucket.org/hoardingsinc/wikitabbook · PHP · 386 lines · 185 code · 74 blank · 127 comment · 35 complexity · cf91422682b644a8840bfa1bf09222a2 MD5 · raw file

  1. <?php
  2. /*
  3. class.Diff.php
  4. A class containing a diff implementation
  5. Created by Stephen Morley - http://stephenmorley.org/ - and released under the
  6. terms of the CC0 1.0 Universal legal code:
  7. http://creativecommons.org/publicdomain/zero/1.0/legalcode
  8. */
  9. // A class containing functions for computing diffs and formatting the output.
  10. class Diff{
  11. // define the constants
  12. const UNMODIFIED = 0;
  13. const DELETED = 1;
  14. const INSERTED = 2;
  15. /* Returns the diff for two strings. The return value is an array, each of
  16. * whose values is an array containing two values: a line (or character, if
  17. * $compareCharacters is true), and one of the constants DIFF::UNMODIFIED (the
  18. * line or character is in both strings), DIFF::DELETED (the line or character
  19. * is only in the first string), and DIFF::INSERTED (the line or character is
  20. * only in the second string). The parameters are:
  21. *
  22. * $string1 - the first string
  23. * $string2 - the second string
  24. * $compareCharacters - true to compare characters, and false to compare
  25. * lines; this optional parameter defaults to false
  26. */
  27. public static function compare(
  28. $string1, $string2, $compareCharacters = false){
  29. // initialise the sequences and comparison start and end positions
  30. $start = 0;
  31. if ($compareCharacters){
  32. $sequence1 = $string1;
  33. $sequence2 = $string2;
  34. $end1 = strlen($string1) - 1;
  35. $end2 = strlen($string2) - 1;
  36. }else{
  37. $sequence1 = preg_split('/\R/', $string1);
  38. $sequence2 = preg_split('/\R/', $string2);
  39. $end1 = count($sequence1) - 1;
  40. $end2 = count($sequence2) - 1;
  41. }
  42. // skip any common prefix
  43. while ($start <= $end1 && $start <= $end2
  44. && $sequence1[$start] == $sequence2[$start]){
  45. $start ++;
  46. }
  47. // skip any common suffix
  48. while ($end1 >= $start && $end2 >= $start
  49. && $sequence1[$end1] == $sequence2[$end2]){
  50. $end1 --;
  51. $end2 --;
  52. }
  53. // compute the table of longest common subsequence lengths
  54. $table = self::computeTable($sequence1, $sequence2, $start, $end1, $end2);
  55. // generate the partial diff
  56. $partialDiff =
  57. self::generatePartialDiff($table, $sequence1, $sequence2, $start);
  58. // generate the full diff
  59. $diff = array();
  60. for ($index = 0; $index < $start; $index ++){
  61. $diff[] = array($sequence1[$index], self::UNMODIFIED);
  62. }
  63. while (count($partialDiff) > 0) $diff[] = array_pop($partialDiff);
  64. for ($index = $end1 + 1;
  65. $index < ($compareCharacters ? strlen($sequence1) : count($sequence1));
  66. $index ++){
  67. $diff[] = array($sequence1[$index], self::UNMODIFIED);
  68. }
  69. // return the diff
  70. return $diff;
  71. }
  72. /* Returns the diff for two files. The parameters are:
  73. *
  74. * $file1 - the path to the first file
  75. * $file2 - the path to the second file
  76. * $compareCharacters - true to compare characters, and false to compare
  77. * lines; this optional parameter defaults to false
  78. */
  79. public static function compareFiles(
  80. $file1, $file2, $compareCharacters = false){
  81. // return the diff of the files
  82. return self::compare(
  83. file_get_contents($file1),
  84. file_get_contents($file2),
  85. $compareCharacters);
  86. }
  87. /* Returns the table of longest common subsequence lengths for the specified
  88. * sequences. The parameters are:
  89. *
  90. * $sequence1 - the first sequence
  91. * $sequence2 - the second sequence
  92. * $start - the starting index
  93. * $end1 - the ending index for the first sequence
  94. * $end2 - the ending index for the second sequence
  95. */
  96. private static function computeTable(
  97. $sequence1, $sequence2, $start, $end1, $end2){
  98. // determine the lengths to be compared
  99. $length1 = $end1 - $start + 1;
  100. $length2 = $end2 - $start + 1;
  101. // initialise the table
  102. $table = array(array_fill(0, $length2 + 1, 0));
  103. // loop over the rows
  104. for ($index1 = 1; $index1 <= $length1; $index1 ++){
  105. // create the new row
  106. $table[$index1] = array(0);
  107. // loop over the columns
  108. for ($index2 = 1; $index2 <= $length2; $index2 ++){
  109. // store the longest common subsequence length
  110. if ($sequence1[$index1 + $start - 1]
  111. == $sequence2[$index2 + $start - 1]){
  112. $table[$index1][$index2] = $table[$index1 - 1][$index2 - 1] + 1;
  113. }else{
  114. $table[$index1][$index2] =
  115. max($table[$index1 - 1][$index2], $table[$index1][$index2 - 1]);
  116. }
  117. }
  118. }
  119. // return the table
  120. return $table;
  121. }
  122. /* Returns the partial diff for the specificed sequences, in reverse order.
  123. * The parameters are:
  124. *
  125. * $table - the table returned by the computeTable function
  126. * $sequence1 - the first sequence
  127. * $sequence2 - the second sequence
  128. * $start - the starting index
  129. */
  130. private static function generatePartialDiff(
  131. $table, $sequence1, $sequence2, $start){
  132. // initialise the diff
  133. $diff = array();
  134. // initialise the indices
  135. $index1 = count($table) - 1;
  136. $index2 = count($table[0]) - 1;
  137. // loop until there are no items remaining in either sequence
  138. while ($index1 > 0 || $index2 > 0){
  139. // check what has happened to the items at these indices
  140. if ($index1 > 0 && $index2 > 0
  141. && $sequence1[$index1 + $start - 1]
  142. == $sequence2[$index2 + $start - 1]){
  143. // update the diff and the indices
  144. $diff[] = array($sequence1[$index1 + $start - 1], self::UNMODIFIED);
  145. $index1 --;
  146. $index2 --;
  147. }elseif ($index2 > 0
  148. && $table[$index1][$index2] == $table[$index1][$index2 - 1]){
  149. // update the diff and the indices
  150. $diff[] = array($sequence2[$index2 + $start - 1], self::INSERTED);
  151. $index2 --;
  152. }else{
  153. // update the diff and the indices
  154. $diff[] = array($sequence1[$index1 + $start - 1], self::DELETED);
  155. $index1 --;
  156. }
  157. }
  158. // return the diff
  159. return $diff;
  160. }
  161. /* Returns a diff as a string, where unmodified lines are prefixed by ' ',
  162. * deletions are prefixed by '- ', and insertions are prefixed by '+ '. The
  163. * parameters are:
  164. *
  165. * $diff - the diff array
  166. * $separator - the separator between lines; this optional parameter defaults
  167. * to "\n"
  168. */
  169. public static function toString($diff, $separator = "\n"){
  170. // initialise the string
  171. $string = '';
  172. // loop over the lines in the diff
  173. foreach ($diff as $line){
  174. // extend the string with the line
  175. switch ($line[1]){
  176. case self::UNMODIFIED : $string .= ' ' . $line[0];break;
  177. case self::DELETED : $string .= '- ' . $line[0];break;
  178. case self::INSERTED : $string .= '+ ' . $line[0];break;
  179. }
  180. // extend the string with the separator
  181. $string .= $separator;
  182. }
  183. // return the string
  184. return $string;
  185. }
  186. /* Returns a diff as an HTML string, where unmodified lines are contained
  187. * within 'span' elements, deletions are contained within 'del' elements, and
  188. * insertions are contained within 'ins' elements. The parameters are:
  189. *
  190. * $diff - the diff array
  191. * $separator - the separator between lines; this optional parameter defaults
  192. * to '<br>'
  193. */
  194. public static function toHTML($diff, $separator = '<br>'){
  195. // initialise the HTML
  196. $html = '';
  197. // loop over the lines in the diff
  198. foreach ($diff as $line){
  199. // extend the HTML with the line
  200. switch ($line[1]){
  201. case self::UNMODIFIED : $element = 'span'; break;
  202. case self::DELETED : $element = 'del'; break;
  203. case self::INSERTED : $element = 'ins'; break;
  204. }
  205. $html .=
  206. '<' . $element . '>'
  207. . htmlspecialchars($line[0])
  208. . '</' . $element . '>';
  209. // extend the HTML with the separator
  210. $html .= $separator;
  211. }
  212. // return the HTML
  213. return $html;
  214. }
  215. /* Returns a diff as an HTML table. The parameters are:
  216. *
  217. * $diff - the diff array
  218. * $indentation - indentation to add to every line of the generated HTML; this
  219. * optional parameter defaults to ''
  220. * $separator - the separator between lines; this optional parameter
  221. * defaults to '<br>'
  222. */
  223. public static function toTable($diff, $indentation = '', $separator = '<br>'){
  224. // initialise the HTML
  225. $html = $indentation . "<table class=\"diff\">\n";
  226. // loop over the lines in the diff
  227. $index = 0;
  228. while ($index < count($diff)){
  229. // determine the line type
  230. switch ($diff[$index][1]){
  231. // display the content on the left and right
  232. case self::UNMODIFIED:
  233. $leftCell =
  234. self::getCellContent(
  235. $diff, $indentation, $separator, $index, self::UNMODIFIED);
  236. $rightCell = $leftCell;
  237. break;
  238. // display the deleted on the left and inserted content on the right
  239. case self::DELETED:
  240. $leftCell =
  241. self::getCellContent(
  242. $diff, $indentation, $separator, $index, self::DELETED);
  243. $rightCell =
  244. self::getCellContent(
  245. $diff, $indentation, $separator, $index, self::INSERTED);
  246. break;
  247. // display the inserted content on the right
  248. case self::INSERTED:
  249. $leftCell = '';
  250. $rightCell =
  251. self::getCellContent(
  252. $diff, $indentation, $separator, $index, self::INSERTED);
  253. break;
  254. }
  255. // extend the HTML with the new row
  256. $html .=
  257. $indentation
  258. . " <tr>\n"
  259. . $indentation
  260. . ' <td class="diff'
  261. . ($leftCell == $rightCell
  262. ? 'Unmodified'
  263. : ($leftCell == '' ? 'Blank' : 'Deleted'))
  264. . '">'
  265. . $leftCell
  266. . "</td>\n"
  267. . $indentation
  268. . ' <td class="diff'
  269. . ($leftCell == $rightCell
  270. ? 'Unmodified'
  271. : ($rightCell == '' ? 'Blank' : 'Inserted'))
  272. . '">'
  273. . $rightCell
  274. . "</td>\n"
  275. . $indentation
  276. . " </tr>\n";
  277. }
  278. // return the HTML
  279. return $html . $indentation . "</table>\n";
  280. }
  281. /* Returns the content of the cell, for use in the toTable function. The
  282. * parameters are:
  283. *
  284. * $diff - the diff array
  285. * $indentation - indentation to add to every line of the generated HTML
  286. * $separator - the separator between lines
  287. * $index - the current index, passes by reference
  288. * $type - the type of line
  289. */
  290. private static function getCellContent(
  291. $diff, $indentation, $separator, &$index, $type){
  292. // initialise the HTML
  293. $html = '';
  294. // loop over the matching lines, adding them to the HTML
  295. while ($index < count($diff) && $diff[$index][1] == $type){
  296. $html .=
  297. '<span>'
  298. . htmlspecialchars($diff[$index][0])
  299. . '</span>'
  300. . $separator;
  301. $index ++;
  302. }
  303. // return the HTML
  304. return $html;
  305. }
  306. }
  307. ?>