123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357 |
- <?php
- /**
- * A class containing a diff implementation
- *
- * Created by Kate Morley - http://iamkate.com/ - and released under the terms of
- * the CC0 1.0 Universal legal code:
- *
- * http://creativecommons.org/publicdomain/zero/1.0/legalcode
- *
- */
- class Diff {
- // define the constants
- const UNMODIFIED = 0;
- const DELETED = 1;
- const INSERTED = 2;
- /* Returns the diff for two strings. The return value is an array, each of
- * whose values is an array containing two values: a line (or character, if
- * $compareCharacters is true), and one of the constants DIFF::UNMODIFIED (the
- * line or character is in both strings), DIFF::DELETED (the line or character
- * is only in the first string), and DIFF::INSERTED (the line or character is
- * only in the second string). The parameters are:
- *
- * $string1 - the first string
- * $string2 - the second string
- * $compareCharacters - true to compare characters, and false to compare
- * lines; this optional parameter defaults to false
- */
- public static function compare(
- $string1, $string2, $compareCharacters = false) {
- // initialise the sequences and comparison start and end positions
- $start = 0;
- if ($compareCharacters) {
- $sequence1 = $string1;
- $sequence2 = $string2;
- $end1 = strlen($string1) - 1;
- $end2 = strlen($string2) - 1;
- } else {
- $sequence1 = preg_split('/\R/', $string1);
- $sequence2 = preg_split('/\R/', $string2);
- $end1 = count($sequence1) - 1;
- $end2 = count($sequence2) - 1;
- }
- // skip any common prefix
- while ($start <= $end1 && $start <= $end2 && $sequence1[$start] == $sequence2[$start]) {
- $start ++;
- }
- // skip any common suffix
- while ($end1 >= $start && $end2 >= $start && $sequence1[$end1] == $sequence2[$end2]) {
- $end1 --;
- $end2 --;
- }
- // compute the table of longest common subsequence lengths
- $table = self::computeTable($sequence1, $sequence2, $start, $end1, $end2);
- // generate the partial diff
- $partialDiff = self::generatePartialDiff($table, $sequence1, $sequence2, $start);
- // generate the full diff
- $diff = array();
- for ($index = 0; $index < $start; $index ++) {
- $diff[] = array($sequence1[$index], self::UNMODIFIED);
- }
- while (count($partialDiff) > 0)
- $diff[] = array_pop($partialDiff);
- for ($index = $end1 + 1; $index < ($compareCharacters ? strlen($sequence1) : count($sequence1)); $index ++) {
- $diff[] = array($sequence1[$index], self::UNMODIFIED);
- }
- // return the diff
- return $diff;
- }
- /* Returns the diff for two files. The parameters are:
- *
- * $file1 - the path to the first file
- * $file2 - the path to the second file
- * $compareCharacters - true to compare characters, and false to compare
- * lines; this optional parameter defaults to false
- */
- public static function compareFiles(
- $file1, $file2, $compareCharacters = false) {
- // return the diff of the files
- return self::compare(
- file_get_contents($file1), file_get_contents($file2), $compareCharacters);
- }
- /* Returns the table of longest common subsequence lengths for the specified
- * sequences. The parameters are:
- *
- * $sequence1 - the first sequence
- * $sequence2 - the second sequence
- * $start - the starting index
- * $end1 - the ending index for the first sequence
- * $end2 - the ending index for the second sequence
- */
- private static function computeTable(
- $sequence1, $sequence2, $start, $end1, $end2) {
- // determine the lengths to be compared
- $length1 = $end1 - $start + 1;
- $length2 = $end2 - $start + 1;
- // initialise the table
- $table = array(array_fill(0, $length2 + 1, 0));
- // loop over the rows
- for ($index1 = 1; $index1 <= $length1; $index1 ++) {
- // create the new row
- $table[$index1] = array(0);
- // loop over the columns
- for ($index2 = 1; $index2 <= $length2; $index2 ++) {
- // store the longest common subsequence length
- if ($sequence1[$index1 + $start - 1] == $sequence2[$index2 + $start - 1]) {
- $table[$index1][$index2] = $table[$index1 - 1][$index2 - 1] + 1;
- } else {
- $table[$index1][$index2] = max($table[$index1 - 1][$index2], $table[$index1][$index2 - 1]);
- }
- }
- }
- // return the table
- return $table;
- }
- /* Returns the partial diff for the specificed sequences, in reverse order.
- * The parameters are:
- *
- * $table - the table returned by the computeTable function
- * $sequence1 - the first sequence
- * $sequence2 - the second sequence
- * $start - the starting index
- */
- private static function generatePartialDiff(
- $table, $sequence1, $sequence2, $start) {
- // initialise the diff
- $diff = array();
- // initialise the indices
- $index1 = count($table) - 1;
- $index2 = count($table[0]) - 1;
- // loop until there are no items remaining in either sequence
- while ($index1 > 0 || $index2 > 0) {
- // check what has happened to the items at these indices
- if ($index1 > 0 && $index2 > 0 && $sequence1[$index1 + $start - 1] == $sequence2[$index2 + $start - 1]) {
- // update the diff and the indices
- $diff[] = array($sequence1[$index1 + $start - 1], self::UNMODIFIED);
- $index1 --;
- $index2 --;
- } elseif ($index2 > 0 && $table[$index1][$index2] == $table[$index1][$index2 - 1]) {
- // update the diff and the indices
- $diff[] = array($sequence2[$index2 + $start - 1], self::INSERTED);
- $index2 --;
- } else {
- // update the diff and the indices
- $diff[] = array($sequence1[$index1 + $start - 1], self::DELETED);
- $index1 --;
- }
- }
- // return the diff
- return $diff;
- }
- /* Returns a diff as a string, where unmodified lines are prefixed by ' ',
- * deletions are prefixed by '- ', and insertions are prefixed by '+ '. The
- * parameters are:
- *
- * $diff - the diff array
- * $separator - the separator between lines; this optional parameter defaults
- * to "\n"
- */
- public static function toString($diff, $separator = "\n") {
- // initialise the string
- $string = '';
- // loop over the lines in the diff
- foreach ($diff as $line) {
- // extend the string with the line
- switch ($line[1]) {
- case self::UNMODIFIED : $string .= ' ' . $line[0];
- break;
- case self::DELETED : $string .= '- ' . $line[0];
- break;
- case self::INSERTED : $string .= '+ ' . $line[0];
- break;
- }
- // extend the string with the separator
- $string .= $separator;
- }
- // return the string
- return $string;
- }
- /* Returns a diff as an HTML string, where unmodified lines are contained
- * within 'span' elements, deletions are contained within 'del' elements, and
- * insertions are contained within 'ins' elements. The parameters are:
- *
- * $diff - the diff array
- * $separator - the separator between lines; this optional parameter defaults
- * to '<br>'
- */
- public static function toHTML($diff, $separator = '<br>') {
- // initialise the HTML
- $html = '';
- // loop over the lines in the diff
- foreach ($diff as $line) {
- // extend the HTML with the line
- switch ($line[1]) {
- case self::UNMODIFIED : $element = 'span';
- break;
- case self::DELETED : $element = 'del';
- break;
- case self::INSERTED : $element = 'ins';
- break;
- }
- $html .= '<' . $element . '>'
- . htmlspecialchars($line[0])
- . '</' . $element . '>';
- // extend the HTML with the separator
- $html .= $separator;
- }
- // return the HTML
- return $html;
- }
- /* Returns a diff as an HTML table. The parameters are:
- *
- * $diff - the diff array
- * $indentation - indentation to add to every line of the generated HTML; this
- * optional parameter defaults to ''
- * $separator - the separator between lines; this optional parameter
- * defaults to '<br>'
- */
- public static function toTable($diff, $indentation = '', $separator = '<br>') {
- // initialise the HTML
- $html = $indentation . "<table class=\"diff\">\n";
- // loop over the lines in the diff
- $index = 0;
- while ($index < count($diff)) {
- // determine the line type
- switch ($diff[$index][1]) {
- // display the content on the left and right
- case self::UNMODIFIED:
- $leftCell = self::getCellContent(
- $diff, $indentation, $separator, $index, self::UNMODIFIED);
- $rightCell = $leftCell;
- break;
- // display the deleted on the left and inserted content on the right
- case self::DELETED:
- $leftCell = self::getCellContent(
- $diff, $indentation, $separator, $index, self::DELETED);
- $rightCell = self::getCellContent(
- $diff, $indentation, $separator, $index, self::INSERTED);
- break;
- // display the inserted content on the right
- case self::INSERTED:
- $leftCell = '';
- $rightCell = self::getCellContent(
- $diff, $indentation, $separator, $index, self::INSERTED);
- break;
- }
- // extend the HTML with the new row
- $html .= $indentation
- . " <tr>\n"
- . $indentation
- . ' <td class="diff'
- . ($leftCell == $rightCell ? 'Unmodified' : ($leftCell == '' ? 'Blank' : 'Deleted'))
- . '">'
- . $leftCell
- . "</td>\n"
- . $indentation
- . ' <td class="diff'
- . ($leftCell == $rightCell ? 'Unmodified' : ($rightCell == '' ? 'Blank' : 'Inserted'))
- . '">'
- . $rightCell
- . "</td>\n"
- . $indentation
- . " </tr>\n";
- }
- // return the HTML
- return $html . $indentation . "</table>\n";
- }
- /* Returns the content of the cell, for use in the toTable function. The
- * parameters are:
- *
- * $diff - the diff array
- * $indentation - indentation to add to every line of the generated HTML
- * $separator - the separator between lines
- * $index - the current index, passes by reference
- * $type - the type of line
- */
- private static function getCellContent(
- $diff, $indentation, $separator, &$index, $type) {
- // initialise the HTML
- $html = '';
- // loop over the matching lines, adding them to the HTML
- while ($index < count($diff) && $diff[$index][1] == $type) {
- $html .= '<span>'
- . htmlspecialchars($diff[$index][0])
- . '</span>'
- . $separator;
- $index ++;
- }
- // return the HTML
- return $html;
- }
- }
- ?>
|