diff.rb 5.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165
  1. module HTMLDiff
  2. Match = Struct.new(:start_in_old, :start_in_new, :size)
  3. class Match
  4. def end_in_old
  5. self.start_in_old + self.size
  6. end
  7. def end_in_new
  8. self.start_in_new + self.size
  9. end
  10. end
  11. Operation = Struct.new(:action, :start_in_old, :end_in_old, :start_in_new, :end_in_new)
  12. class DiffBuilder
  13. def initialize(old_version, new_version)
  14. @old_version, @new_version = old_version, new_version
  15. split_inputs_to_words
  16. index_new_words
  17. end
  18. def split_inputs_to_words
  19. @old_words = explode(@old_version)
  20. @new_words = explode(@new_version)
  21. end
  22. def index_new_words
  23. @word_indices = Hash.new { |h, word| h[word] = [] }
  24. @new_words.each_with_index { |word, i| @word_indices[word] << i }
  25. end
  26. def operations
  27. position_in_old = position_in_new = 0
  28. operations = []
  29. matches = matching_blocks
  30. # an empty match at the end forces the loop below to handle the unmatched tails
  31. # I'm sure it can be done more gracefully, but not at 23:52
  32. matches << Match.new(@old_words.length, @new_words.length, 0)
  33. matches.each_with_index do |match, i|
  34. match_starts_at_current_position_in_old = (position_in_old == match.start_in_old)
  35. match_starts_at_current_position_in_new = (position_in_new == match.start_in_new)
  36. action_upto_match_positions =
  37. case [match_starts_at_current_position_in_old, match_starts_at_current_position_in_new]
  38. when [false, false]
  39. :replace
  40. when [true, false]
  41. :insert
  42. when [false, true]
  43. :delete
  44. else
  45. # this happens if the first few words are same in both versions
  46. :none
  47. end
  48. if action_upto_match_positions != :none
  49. operation_upto_match_positions =
  50. Operation.new(action_upto_match_positions,
  51. position_in_old, match.start_in_old,
  52. position_in_new, match.start_in_new)
  53. operations << operation_upto_match_positions
  54. end
  55. if match.size != 0
  56. match_operation = Operation.new(:equal,
  57. match.start_in_old, match.end_in_old,
  58. match.start_in_new, match.end_in_new)
  59. operations << match_operation
  60. end
  61. position_in_old = match.end_in_old
  62. position_in_new = match.end_in_new
  63. end
  64. operations
  65. end
  66. def matching_blocks
  67. matching_blocks = []
  68. recursively_find_matching_blocks(0, @old_words.size, 0, @new_words.size, matching_blocks)
  69. matching_blocks
  70. end
  71. def recursively_find_matching_blocks(start_in_old, end_in_old, start_in_new, end_in_new, matching_blocks)
  72. match = find_match(start_in_old, end_in_old, start_in_new, end_in_new)
  73. if match
  74. if start_in_old < match.start_in_old and start_in_new < match.start_in_new
  75. recursively_find_matching_blocks(
  76. start_in_old, match.start_in_old, start_in_new, match.start_in_new, matching_blocks)
  77. end
  78. matching_blocks << match
  79. if match.end_in_old < end_in_old and match.end_in_new < end_in_new
  80. recursively_find_matching_blocks(
  81. match.end_in_old, end_in_old, match.end_in_new, end_in_new, matching_blocks)
  82. end
  83. end
  84. end
  85. def find_match(start_in_old, end_in_old, start_in_new, end_in_new)
  86. best_match_in_old = start_in_old
  87. best_match_in_new = start_in_new
  88. best_match_size = 0
  89. match_length_at = Hash.new { |h, index| h[index] = 0 }
  90. start_in_old.upto(end_in_old - 1) do |index_in_old|
  91. new_match_length_at = Hash.new { |h, index| h[index] = 0 }
  92. @word_indices[@old_words[index_in_old]].each do |index_in_new|
  93. next if index_in_new < start_in_new
  94. break if index_in_new >= end_in_new
  95. new_match_length = match_length_at[index_in_new - 1] + 1
  96. new_match_length_at[index_in_new] = new_match_length
  97. if new_match_length > best_match_size
  98. best_match_in_old = index_in_old - new_match_length + 1
  99. best_match_in_new = index_in_new - new_match_length + 1
  100. best_match_size = new_match_length
  101. end
  102. end
  103. match_length_at = new_match_length_at
  104. end
  105. # best_match_in_old, best_match_in_new, best_match_size = add_matching_words_left(
  106. # best_match_in_old, best_match_in_new, best_match_size, start_in_old, start_in_new)
  107. # best_match_in_old, best_match_in_new, match_size = add_matching_words_right(
  108. # best_match_in_old, best_match_in_new, best_match_size, end_in_old, end_in_new)
  109. return (best_match_size != 0 ? Match.new(best_match_in_old, best_match_in_new, best_match_size) : nil)
  110. end
  111. def add_matching_words_left(match_in_old, match_in_new, match_size, start_in_old, start_in_new)
  112. while match_in_old > start_in_old and
  113. match_in_new > start_in_new and
  114. @old_words[match_in_old - 1] == @new_words[match_in_new - 1]
  115. match_in_old -= 1
  116. match_in_new -= 1
  117. match_size += 1
  118. end
  119. [match_in_old, match_in_new, match_size]
  120. end
  121. def add_matching_words_right(match_in_old, match_in_new, match_size, end_in_old, end_in_new)
  122. while match_in_old + match_size < end_in_old and
  123. match_in_new + match_size < end_in_new and
  124. @old_words[match_in_old + match_size] == @new_words[match_in_new + match_size]
  125. match_size += 1
  126. end
  127. [match_in_old, match_in_new, match_size]
  128. end
  129. def explode(sequence)
  130. sequence.is_a?(String) ? sequence.split(//) : sequence
  131. end
  132. end # of class Diff Builder
  133. end