123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188 |
- #include <cstdio>
- #include <algorithm>
- #include <vector>
- #include <string>
- #include <iterator>
- #include <deque>
- #include "simple/file.hpp"
- #include "simple/geom/vector.hpp"
- #include "simple/geom/bool_algebra.hpp"
- #include "simple/geom/segment.hpp"
- #include "simple/support/arithmetic.hpp"
- #include "simple/support/enum.hpp"
- #include "simple/support/misc.hpp"
- #include "simple/support/algorithm/split.hpp"
- #include "simple/support/iterator/match.hpp"
- #include "simple/support/tuple_utils/transform.hpp"
- using namespace simple::file;
- using index_type = simple::geom::vector<size_t, 2>;
- using double_buffer = std::pair<std::vector<char>, std::vector<char>>;
- enum class Options
- {
- Wordwise,
- Distance,
- Invalid
- };
- using Option = simple::support::mapped_enum<Options, Options::Invalid, 2>;
- template <> Option::guts::map_type Option::guts::map
- {{
- { "-w"s, "--words"s },
- { "-d"s, "--distance"s },
- }};
- template <typename Buffers>
- index_type get_size(const Buffers& buffers)
- {
- return index_type(buffers.first.size(), buffers.second.size());
- }
- template <typename Buffers>
- bool diff_at(const Buffers& buffers, index_type position)
- {
- return buffers.first[position.x()] != buffers.second[position.y()];
- }
- template <typename Buffers>
- index_type find_change(const Buffers& buffers, index_type start = index_type::zero()) // NOTE: this is almost std::find_if, except the != bound comparison is not sufficient in this case... such a shame...
- {
- auto size = get_size(buffers);
- while(start < size)
- {
- if(diff_at(buffers, start))
- break;
- ++start;
- }
- return start;
- }
- template <typename Buffers>
- std::pair<index_type, index_type> measure_change(const Buffers& buffers, index_type start, size_t min_distance)
- {
- auto remaining = get_size(buffers) - start;
- auto minmax = std::minmax_element(remaining.begin(), remaining.end());
- auto min_index = index_type::unit(minmax.first - remaining.begin());
- auto max_index = index_type::unit(minmax.second - remaining.begin());
- size_t change_size = 1;
- auto change = max_index * change_size;
- auto step = - max_index + min_index;
- while(change < remaining)
- {
- do
- {
- if(not diff_at(buffers, start + change))
- {
- auto next_change = find_change(buffers, start + change);
- auto distance = next_change - (start + change);
- if(not (distance < index_type::one(min_distance)))
- return {change, next_change};
- }
- change += step;
- }
- while(change < remaining);
- ++change_size;
- size_t excess; // NOTE: don't you hate it when edge cases are not cleanly handled by the main loop?... must figure this out...
- if(simple::support::sub_overflow(excess, change_size, *minmax.second - 1))
- excess = 0;
- change = max_index * (change_size - excess) + min_index * excess;
- }
- return {remaining, get_size(buffers)};
- }
- void showChange(index_type position, index_type change)
- {
- using simple::support::to_string;
- for(size_t i = 0; i < index_type::dimensions; ++i)
- std::puts( to_string<index_type::value_type>(simple::geom::segment<size_t>{change[i], position[i]}, ':').c_str());
- }
- auto split(std::vector<char> in)
- {
- std::vector<std::string> ret;
- simple::support::split(in, simple::support::match_iterator(simple::support::is_space), std::back_inserter(ret));
- return ret;
- }
- void diff(std::array<std::string,2> filenames, bool wordwise, size_t distance)
- {
- const auto buffers = std::make_pair( dump(bropex(filenames[0])), dump(bropex(filenames[1])) );
- auto do_diff = [distance](const auto& buffers)
- {
- auto size = get_size(buffers);
- auto it = find_change(buffers, index_type::zero());
- while(it < size)
- {
- auto [change, next] = measure_change(buffers, it, distance);
- showChange(it, change);
- it = next;
- }
- auto change = size - it;
- if(index_type::zero() != change)
- showChange(it, change);
- };
- if(wordwise) do_diff(std::pair{ split(buffers.first), split(buffers.second) });
- else do_diff(buffers);
- }
- void process_arguments(std::deque<string> args)
- {
- std::array<std::string,2> filenames;
- size_t file_count = 0;
- size_t distance = 0;
- bool wordwise = false;
- args.pop_front();
- bool diffed = false;
- while(!args.empty())
- {
- switch(Option(args.front()))
- {
- case Options::Distance:
- args.pop_front();
- distance = simple::support::ston<size_t>(args.at(0));
- break;
- case Options::Wordwise:
- args.pop_front();
- wordwise = simple::support::ston<bool>(args.at(0));
- break;
- default:
- filenames[file_count++] = args.at(0);
- if(file_count == 2)
- {
- if(diffed)
- std::puts("");
- else
- diffed = true;
- diff(filenames, wordwise, distance);
- file_count = 0;
- }
- break;
- }
- args.pop_front();
- }
- if(not diffed)
- std::fputs("Specify 2 files to diff!\n", stderr);
- }
- int main(int argc, char const* argv[]) try
- {
- process_arguments({argv, argv + argc});
- return 0;
- }
- catch(...)
- {
- if(errno) std::perror("Oh nooo!");
- throw;
- }
|