123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105 |
- #!/usr/bin/ruby
- # Author: Trizen
- # Date: 20 June 2023
- # https://github.com/trizen
- # Implementation of the Delta encoding scheme, optimized for large deltas, using Elias coding.
- # Reference:
- # Data Compression (Summer 2023) - Lecture 6 - Delta Compression and Prediction
- # https://youtube.com/watch?v=-3H_eDbWNEU
- func read_bit (FileHandle fh, Ref bitstring) {
- if ((*bitstring \\ '').is_empty) {
- *bitstring = unpack('b*', fh.getc \\ die "error")
- }
- var bit = (*bitstring).substr(-1)
- *bitstring = (*bitstring).substr(0, -1)
- return bit
- }
- func delta_encode (Array integers, Bool double = false) {
- var deltas = [0, integers.len, integers...].diffs
- var bitstring = FileHandle.new_buf(:raw)
- for d in (deltas) {
- if (d == 0) {
- bitstring << '0'
- }
- elsif (double) {
- var t = d.abs.inc.as_bin
- var l = t.len.as_bin
- bitstring << join('', '1', ((d < 0) ? '0' : '1'), ('1' * (l.len-1)), '0', l.substr(1), t.substr(1))
- }
- else {
- var t = d.abs.as_bin
- bitstring << join('', '1', ((d < 0) ? '0' : '1'), ('1' * (t.len-1)), '0', t.substr(1))
- }
- }
- pack('B*', bitstring.parent)
- }
- func delta_decode (FileHandle fh, Bool double = false) {
- var deltas = []
- var buffer = ''
- var len = 0
- for (var k = 0 ; k <= len ; ++k) {
- var bit = read_bit(fh, \buffer)
- if (bit == '0') {
- deltas << 0
- }
- elsif (double) {
- var bit = read_bit(fh, \buffer)
- var bl = (^Inf -> first { read_bit(fh, \buffer) != '1' })
- var bl2 = Num('1' + bl.of { read_bit(fh, \buffer) }.join, 2)
- var int = Num('1' + (bl2-1).of { read_bit(fh, \buffer) }.join, 2)-1
- deltas << (bit == '1' ? int : -int)
- }
- else {
- var bit = read_bit(fh, \buffer)
- var n = (^Inf -> first { read_bit(fh, \buffer) != '1' })
- var d = Num('1' + n.of { read_bit(fh, \buffer) }.join, 2)
- deltas << (bit == '1' ? d : -d)
- }
- if (k == 0) {
- len = deltas.pop
- }
- }
- var acc = [len, deltas...].acc
- acc.shift
- return acc
- }
- var str = "TOBEORNOTTOBEORTOBEORNOT"
- var enc = delta_encode(str.bytes, true)
- var dec = delta_decode(enc.open_r(:raw), true)
- say "Encoded: #{unpack('B*', enc)}"
- say "Decoded: #{dec.pack('C*')}"
- assert_eq(dec.pack('C*'), str)
- do {
- var str = File(__FILE__).read(:raw)
- var encoded = delta_encode(str.bytes)
- var decoded = delta_decode(encoded.open_r(:raw))
- assert_eq(str, decoded.pack('C*'))
- }
- __END__
- Encoded: 11110101000111101111100101100001101100110111101111110010101110111011000001110011110000101011000011011001101111011111100101011101111101010110000110110011011110111111001010111011101100000111001111000010
- Decoded: TOBEORNOTTOBEORTOBEORNOT
|