api.diff.php 12 KB

  1. <?php
  2. /**
  3. * A class containing a diff implementation
  4. *
  5. * Created by Kate Morley - http://iamkate.com/ - and released under the terms of
  6. * the CC0 1.0 Universal legal code:
  7. *
  8. * http://creativecommons.org/publicdomain/zero/1.0/legalcode
  9. *
  10. */
  11. class Diff {
  12. // define the constants
  13. const UNMODIFIED = 0;
  14. const DELETED = 1;
  15. const INSERTED = 2;
  16. /* Returns the diff for two strings. The return value is an array, each of
  17. * whose values is an array containing two values: a line (or character, if
  18. * $compareCharacters is true), and one of the constants DIFF::UNMODIFIED (the
  19. * line or character is in both strings), DIFF::DELETED (the line or character
  20. * is only in the first string), and DIFF::INSERTED (the line or character is
  21. * only in the second string). The parameters are:
  22. *
  23. * $string1 - the first string
  24. * $string2 - the second string
  25. * $compareCharacters - true to compare characters, and false to compare
  26. * lines; this optional parameter defaults to false
  27. */
  28. public static function compare(
  29. $string1, $string2, $compareCharacters = false) {
  30. // initialise the sequences and comparison start and end positions
  31. $start = 0;
  32. if ($compareCharacters) {
  33. $sequence1 = $string1;
  34. $sequence2 = $string2;
  35. $end1 = strlen($string1) - 1;
  36. $end2 = strlen($string2) - 1;
  37. } else {
  38. $sequence1 = preg_split('/\R/', $string1);
  39. $sequence2 = preg_split('/\R/', $string2);
  40. $end1 = count($sequence1) - 1;
  41. $end2 = count($sequence2) - 1;
  42. }
  43. // skip any common prefix
  44. while ($start <= $end1 && $start <= $end2 && $sequence1[$start] == $sequence2[$start]) {
  45. $start ++;
  46. }
  47. // skip any common suffix
  48. while ($end1 >= $start && $end2 >= $start && $sequence1[$end1] == $sequence2[$end2]) {
  49. $end1 --;
  50. $end2 --;
  51. }
  52. // compute the table of longest common subsequence lengths
  53. $table = self::computeTable($sequence1, $sequence2, $start, $end1, $end2);
  54. // generate the partial diff
  55. $partialDiff = self::generatePartialDiff($table, $sequence1, $sequence2, $start);
  56. // generate the full diff
  57. $diff = array();
  58. for ($index = 0; $index < $start; $index ++) {
  59. $diff[] = array($sequence1[$index], self::UNMODIFIED);
  60. }
  61. while (count($partialDiff) > 0)
  62. $diff[] = array_pop($partialDiff);
  63. for ($index = $end1 + 1; $index < ($compareCharacters ? strlen($sequence1) : count($sequence1)); $index ++) {
  64. $diff[] = array($sequence1[$index], self::UNMODIFIED);
  65. }
  66. // return the diff
  67. return $diff;
  68. }
  69. /* Returns the diff for two files. The parameters are:
  70. *
  71. * $file1 - the path to the first file
  72. * $file2 - the path to the second file
  73. * $compareCharacters - true to compare characters, and false to compare
  74. * lines; this optional parameter defaults to false
  75. */
  76. public static function compareFiles(
  77. $file1, $file2, $compareCharacters = false) {
  78. // return the diff of the files
  79. return self::compare(
  80. file_get_contents($file1), file_get_contents($file2), $compareCharacters);
  81. }
  82. /* Returns the table of longest common subsequence lengths for the specified
  83. * sequences. The parameters are:
  84. *
  85. * $sequence1 - the first sequence
  86. * $sequence2 - the second sequence
  87. * $start - the starting index
  88. * $end1 - the ending index for the first sequence
  89. * $end2 - the ending index for the second sequence
  90. */
  91. private static function computeTable(
  92. $sequence1, $sequence2, $start, $end1, $end2) {
  93. // determine the lengths to be compared
  94. $length1 = $end1 - $start + 1;
  95. $length2 = $end2 - $start + 1;
  96. // initialise the table
  97. $table = array(array_fill(0, $length2 + 1, 0));
  98. // loop over the rows
  99. for ($index1 = 1; $index1 <= $length1; $index1 ++) {
  100. // create the new row
  101. $table[$index1] = array(0);
  102. // loop over the columns
  103. for ($index2 = 1; $index2 <= $length2; $index2 ++) {
  104. // store the longest common subsequence length
  105. if ($sequence1[$index1 + $start - 1] == $sequence2[$index2 + $start - 1]) {
  106. $table[$index1][$index2] = $table[$index1 - 1][$index2 - 1] + 1;
  107. } else {
  108. $table[$index1][$index2] = max($table[$index1 - 1][$index2], $table[$index1][$index2 - 1]);
  109. }
  110. }
  111. }
  112. // return the table
  113. return $table;
  114. }
  115. /* Returns the partial diff for the specificed sequences, in reverse order.
  116. * The parameters are:
  117. *
  118. * $table - the table returned by the computeTable function
  119. * $sequence1 - the first sequence
  120. * $sequence2 - the second sequence
  121. * $start - the starting index
  122. */
  123. private static function generatePartialDiff(
  124. $table, $sequence1, $sequence2, $start) {
  125. // initialise the diff
  126. $diff = array();
  127. // initialise the indices
  128. $index1 = count($table) - 1;
  129. $index2 = count($table[0]) - 1;
  130. // loop until there are no items remaining in either sequence
  131. while ($index1 > 0 || $index2 > 0) {
  132. // check what has happened to the items at these indices
  133. if ($index1 > 0 && $index2 > 0 && $sequence1[$index1 + $start - 1] == $sequence2[$index2 + $start - 1]) {
  134. // update the diff and the indices
  135. $diff[] = array($sequence1[$index1 + $start - 1], self::UNMODIFIED);
  136. $index1 --;
  137. $index2 --;
  138. } elseif ($index2 > 0 && $table[$index1][$index2] == $table[$index1][$index2 - 1]) {
  139. // update the diff and the indices
  140. $diff[] = array($sequence2[$index2 + $start - 1], self::INSERTED);
  141. $index2 --;
  142. } else {
  143. // update the diff and the indices
  144. $diff[] = array($sequence1[$index1 + $start - 1], self::DELETED);
  145. $index1 --;
  146. }
  147. }
  148. // return the diff
  149. return $diff;
  150. }
  151. /* Returns a diff as a string, where unmodified lines are prefixed by ' ',
  152. * deletions are prefixed by '- ', and insertions are prefixed by '+ '. The
  153. * parameters are:
  154. *
  155. * $diff - the diff array
  156. * $separator - the separator between lines; this optional parameter defaults
  157. * to "\n"
  158. */
  159. public static function toString($diff, $separator = "\n") {
  160. // initialise the string
  161. $string = '';
  162. // loop over the lines in the diff
  163. foreach ($diff as $line) {
  164. // extend the string with the line
  165. switch ($line[1]) {
  166. case self::UNMODIFIED : $string .= ' ' . $line[0];
  167. break;
  168. case self::DELETED : $string .= '- ' . $line[0];
  169. break;
  170. case self::INSERTED : $string .= '+ ' . $line[0];
  171. break;
  172. }
  173. // extend the string with the separator
  174. $string .= $separator;
  175. }
  176. // return the string
  177. return $string;
  178. }
  179. /* Returns a diff as an HTML string, where unmodified lines are contained
  180. * within 'span' elements, deletions are contained within 'del' elements, and
  181. * insertions are contained within 'ins' elements. The parameters are:
  182. *
  183. * $diff - the diff array
  184. * $separator - the separator between lines; this optional parameter defaults
  185. * to '<br>'
  186. */
  187. public static function toHTML($diff, $separator = '<br>') {
  188. // initialise the HTML
  189. $html = '';
  190. // loop over the lines in the diff
  191. foreach ($diff as $line) {
  192. // extend the HTML with the line
  193. switch ($line[1]) {
  194. case self::UNMODIFIED : $element = 'span';
  195. break;
  196. case self::DELETED : $element = 'del';
  197. break;
  198. case self::INSERTED : $element = 'ins';
  199. break;
  200. }
  201. $html .= '<' . $element . '>'
  202. . htmlspecialchars($line[0])
  203. . '</' . $element . '>';
  204. // extend the HTML with the separator
  205. $html .= $separator;
  206. }
  207. // return the HTML
  208. return $html;
  209. }
  210. /* Returns a diff as an HTML table. The parameters are:
  211. *
  212. * $diff - the diff array
  213. * $indentation - indentation to add to every line of the generated HTML; this
  214. * optional parameter defaults to ''
  215. * $separator - the separator between lines; this optional parameter
  216. * defaults to '<br>'
  217. */
  218. public static function toTable($diff, $indentation = '', $separator = '<br>') {
  219. // initialise the HTML
  220. $html = $indentation . "<table class=\"diff\">\n";
  221. // loop over the lines in the diff
  222. $index = 0;
  223. while ($index < count($diff)) {
  224. // determine the line type
  225. switch ($diff[$index][1]) {
  226. // display the content on the left and right
  227. case self::UNMODIFIED:
  228. $leftCell = self::getCellContent(
  229. $diff, $indentation, $separator, $index, self::UNMODIFIED);
  230. $rightCell = $leftCell;
  231. break;
  232. // display the deleted on the left and inserted content on the right
  233. case self::DELETED:
  234. $leftCell = self::getCellContent(
  235. $diff, $indentation, $separator, $index, self::DELETED);
  236. $rightCell = self::getCellContent(
  237. $diff, $indentation, $separator, $index, self::INSERTED);
  238. break;
  239. // display the inserted content on the right
  240. case self::INSERTED:
  241. $leftCell = '';
  242. $rightCell = self::getCellContent(
  243. $diff, $indentation, $separator, $index, self::INSERTED);
  244. break;
  245. }
  246. // extend the HTML with the new row
  247. $html .= $indentation
  248. . " <tr>\n"
  249. . $indentation
  250. . ' <td class="diff'
  251. . ($leftCell == $rightCell ? 'Unmodified' : ($leftCell == '' ? 'Blank' : 'Deleted'))
  252. . '">'
  253. . $leftCell
  254. . "</td>\n"
  255. . $indentation
  256. . ' <td class="diff'
  257. . ($leftCell == $rightCell ? 'Unmodified' : ($rightCell == '' ? 'Blank' : 'Inserted'))
  258. . '">'
  259. . $rightCell
  260. . "</td>\n"
  261. . $indentation
  262. . " </tr>\n";
  263. }
  264. // return the HTML
  265. return $html . $indentation . "</table>\n";
  266. }
  267. /* Returns the content of the cell, for use in the toTable function. The
  268. * parameters are:
  269. *
  270. * $diff - the diff array
  271. * $indentation - indentation to add to every line of the generated HTML
  272. * $separator - the separator between lines
  273. * $index - the current index, passes by reference
  274. * $type - the type of line
  275. */
  276. private static function getCellContent(
  277. $diff, $indentation, $separator, &$index, $type) {
  278. // initialise the HTML
  279. $html = '';
  280. // loop over the matching lines, adding them to the HTML
  281. while ($index < count($diff) && $diff[$index][1] == $type) {
  282. $html .= '<span>'
  283. . htmlspecialchars($diff[$index][0])
  284. . '</span>'
  285. . $separator;
  286. $index ++;
  287. }
  288. // return the HTML
  289. return $html;
  290. }
  291. }
  292. ?>