123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172 |
- #!/usr/bin/ruby
- # Author: Trizen
- # Date: 13 June 2023
- # https://github.com/trizen
- # Implementation of the Delta encoding scheme, optimized for large deltas, using Elias coding.
- # Reference:
- # Data Compression (Summer 2023) - Lecture 6 - Delta Compression and Prediction
- # https://youtube.com/watch?v=-3H_eDbWNEU
- func delta_encode (bytes) {
- var deltas = ([0] + bytes -> diffs)
- var bitstring = FileHandle.new_buf(:raw)
- for d in (deltas) {
- if (d == 0) {
- bitstring << '0'
- }
- else {
- var t = d.abs.as_bin
- bitstring << ('1' + ((d < 0) ? '0' : '1') + '1'*(t.len-1) + '0' + t.substr(1))
- }
- }
- return bitstring.parent
- }
- func delta_decode (bitstring) {
- var bits = bitstring.chars
- var deltas = []
- while (bits) {
- var bit = bits.shift
- if (bit == '0') {
- deltas << 0
- }
- else {
- var bit = bits.shift
- var n = (^Inf -> first { bits.shift != '1' })
- var d = Num('1' + n.of { bits.shift }.join, 2)
- deltas << (bit == '1' ? d : -d)
- }
- }
- deltas.acc
- }
- var str = "TOBEORNOTTOBEORTOBEORNOT"
- var enc = delta_encode(str.bytes)
- var dec = delta_decode(enc)
- say "Encoded: #{enc}"
- say "Decoded: #{dec.pack('C*')}"
- assert_eq(dec.pack('C*'), str)
- do {
- var str = File(__FILE__).read(:raw)
- var encoded = delta_encode(str.bytes)
- var decoded = delta_decode(encoded)
- assert_eq(str, decoded.pack('C*'))
- }
- __END__
- Encoded: 1111111100101001011001101110101111011111100101110110110001101111001010110011011101011110111111001011101111001011001101110101111011111100101110110110001101111001
- Decoded: TOBEORNOTTOBEORTOBEORNOT
|