diff.cpp 4.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188
  1. #include <cstdio>
  2. #include <algorithm>
  3. #include <vector>
  4. #include <string>
  5. #include <iterator>
  6. #include <deque>
  7. #include "simple/file.hpp"
  8. #include "simple/geom/vector.hpp"
  9. #include "simple/geom/bool_algebra.hpp"
  10. #include "simple/geom/segment.hpp"
  11. #include "simple/support/arithmetic.hpp"
  12. #include "simple/support/enum.hpp"
  13. #include "simple/support/misc.hpp"
  14. #include "simple/support/algorithm/split.hpp"
  15. #include "simple/support/iterator/match.hpp"
  16. #include "simple/support/tuple_utils/transform.hpp"
  17. using namespace simple::file;
  18. using index_type = simple::geom::vector<size_t, 2>;
  19. using double_buffer = std::pair<std::vector<char>, std::vector<char>>;
  20. enum class Options
  21. {
  22. Wordwise,
  23. Distance,
  24. Invalid
  25. };
  26. using Option = simple::support::mapped_enum<Options, Options::Invalid, 2>;
  27. template <> Option::guts::map_type Option::guts::map
  28. {{
  29. { "-w"s, "--words"s },
  30. { "-d"s, "--distance"s },
  31. }};
  32. template <typename Buffers>
  33. index_type get_size(const Buffers& buffers)
  34. {
  35. return index_type(buffers.first.size(), buffers.second.size());
  36. }
  37. template <typename Buffers>
  38. bool diff_at(const Buffers& buffers, index_type position)
  39. {
  40. return buffers.first[position.x()] != buffers.second[position.y()];
  41. }
  42. template <typename Buffers>
  43. index_type find_change(const Buffers& buffers, index_type start = index_type::zero()) // NOTE: this is almost std::find_if, except the != bound comparison is not sufficient in this case... such a shame...
  44. {
  45. auto size = get_size(buffers);
  46. while(start < size)
  47. {
  48. if(diff_at(buffers, start))
  49. break;
  50. ++start;
  51. }
  52. return start;
  53. }
  54. template <typename Buffers>
  55. std::pair<index_type, index_type> measure_change(const Buffers& buffers, index_type start, size_t min_distance)
  56. {
  57. auto remaining = get_size(buffers) - start;
  58. auto minmax = std::minmax_element(remaining.begin(), remaining.end());
  59. auto min_index = index_type::unit(minmax.first - remaining.begin());
  60. auto max_index = index_type::unit(minmax.second - remaining.begin());
  61. size_t change_size = 1;
  62. auto change = max_index * change_size;
  63. auto step = - max_index + min_index;
  64. while(change < remaining)
  65. {
  66. do
  67. {
  68. if(not diff_at(buffers, start + change))
  69. {
  70. auto next_change = find_change(buffers, start + change);
  71. auto distance = next_change - (start + change);
  72. if(not (distance < index_type::one(min_distance)))
  73. return {change, next_change};
  74. }
  75. change += step;
  76. }
  77. while(change < remaining);
  78. ++change_size;
  79. size_t excess; // NOTE: don't you hate it when edge cases are not cleanly handled by the main loop?... must figure this out...
  80. if(simple::support::sub_overflow(excess, change_size, *minmax.second - 1))
  81. excess = 0;
  82. change = max_index * (change_size - excess) + min_index * excess;
  83. }
  84. return {remaining, get_size(buffers)};
  85. }
  86. void showChange(index_type position, index_type change)
  87. {
  88. using simple::support::to_string;
  89. for(size_t i = 0; i < index_type::dimensions; ++i)
  90. std::puts( to_string<index_type::value_type>(simple::geom::segment<size_t>{change[i], position[i]}, ':').c_str());
  91. }
  92. auto split(std::vector<char> in)
  93. {
  94. std::vector<std::string> ret;
  95. simple::support::split(in, simple::support::match_iterator(simple::support::is_space), std::back_inserter(ret));
  96. return ret;
  97. }
  98. void diff(std::array<std::string,2> filenames, bool wordwise, size_t distance)
  99. {
  100. const auto buffers = std::make_pair( dump(bropex(filenames[0])), dump(bropex(filenames[1])) );
  101. auto do_diff = [distance](const auto& buffers)
  102. {
  103. auto size = get_size(buffers);
  104. auto it = find_change(buffers, index_type::zero());
  105. while(it < size)
  106. {
  107. auto [change, next] = measure_change(buffers, it, distance);
  108. showChange(it, change);
  109. it = next;
  110. }
  111. auto change = size - it;
  112. if(index_type::zero() != change)
  113. showChange(it, change);
  114. };
  115. if(wordwise) do_diff(std::pair{ split(buffers.first), split(buffers.second) });
  116. else do_diff(buffers);
  117. }
  118. void process_arguments(std::deque<string> args)
  119. {
  120. std::array<std::string,2> filenames;
  121. size_t file_count = 0;
  122. size_t distance = 0;
  123. bool wordwise = false;
  124. args.pop_front();
  125. bool diffed = false;
  126. while(!args.empty())
  127. {
  128. switch(Option(args.front()))
  129. {
  130. case Options::Distance:
  131. args.pop_front();
  132. distance = simple::support::ston<size_t>(args.at(0));
  133. break;
  134. case Options::Wordwise:
  135. args.pop_front();
  136. wordwise = simple::support::ston<bool>(args.at(0));
  137. break;
  138. default:
  139. filenames[file_count++] = args.at(0);
  140. if(file_count == 2)
  141. {
  142. if(diffed)
  143. std::puts("");
  144. else
  145. diffed = true;
  146. diff(filenames, wordwise, distance);
  147. file_count = 0;
  148. }
  149. break;
  150. }
  151. args.pop_front();
  152. }
  153. if(not diffed)
  154. std::fputs("Specify 2 files to diff!\n", stderr);
  155. }
  156. int main(int argc, char const* argv[]) try
  157. {
  158. process_arguments({argv, argv + argc});
  159. return 0;
  160. }
  161. catch(...)
  162. {
  163. if(errno) std::perror("Oh nooo!");
  164. throw;
  165. }