adaptive_huffman_coding_with_escape_symbols_2.sf 3.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148
  1. #!/usr/bin/ruby
  2. # Implementation of the Adaptive Huffman Coding with escape symbols.
  3. # See also:
  4. # https://rosettacode.org/wiki/huffman_coding
  5. # Reference:
  6. # Data Compression (Summer 2023) - Lecture 16 - Adaptive Methods
  7. # https://youtube.com/watch?v=YKv-w8bXi9c
  8. define(
  9. ESCAPE_SYMBOL => 256, # escape symbol
  10. )
  11. func walk(Hash n, String s, Hash h) {
  12. if (n.has(:a)) {
  13. h{n{:a}} = s
  14. return nil
  15. }
  16. walk(n{:0}, s+'0', h) if n.has(:0)
  17. walk(n{:1}, s+'1', h) if n.has(:1)
  18. }
  19. func mktree_from_freq(Hash freqs) {
  20. var nodes = freqs.sort.map_2d { |b,f|
  21. Hash(a => b, freq => f)
  22. }
  23. loop {
  24. nodes.sort_by!{|h| h{:freq} }
  25. var(x, y) = (nodes.shift, nodes.shift)
  26. if (defined(x)) {
  27. if (defined(y)) {
  28. nodes << Hash(:0 => x, :1 => y, :freq => x{:freq}+y{:freq})
  29. }
  30. else {
  31. nodes << Hash(:0 => x, :freq => x{:freq})
  32. }
  33. }
  34. nodes.len > 1 || break
  35. }
  36. var n = nodes.first
  37. walk(n, '', n{:tree} = Hash())
  38. return n
  39. }
  40. func encode(Array bytes, Array alphabet) {
  41. var contexts = [
  42. Hash(
  43. freq => alphabet.freq,
  44. ),
  45. Hash(
  46. freq => [ESCAPE_SYMBOL].freq,
  47. )
  48. ]
  49. for c in contexts {
  50. c{:tree} = mktree_from_freq(c{:freq}){:tree}
  51. }
  52. var enc = []
  53. bytes.each {|b|
  54. for i in (1..contexts.end) {
  55. contexts[i]{:tree} = mktree_from_freq(contexts[i]{:freq}){:tree}
  56. }
  57. for c in (contexts.flip) {
  58. if (c{:freq}.has(b)) {
  59. enc << c{:tree}{b}
  60. c{:freq}{b} += 1
  61. break
  62. }
  63. enc << c{:tree}{ESCAPE_SYMBOL}
  64. c{:freq}{b} = 1
  65. }
  66. }
  67. return enc.join
  68. }
  69. func decode(String enc, Array alphabet) {
  70. var out = []
  71. var prefix = ''
  72. var contexts = [
  73. Hash(
  74. freq => alphabet.freq,
  75. ),
  76. Hash(
  77. freq => [ESCAPE_SYMBOL].freq,
  78. )
  79. ]
  80. for c in contexts {
  81. c{:tree} = mktree_from_freq(c{:freq}){:tree}.flip
  82. }
  83. var escape = false
  84. var k = contexts.end
  85. enc.each {|bit|
  86. prefix += bit
  87. loop {
  88. if (contexts[k]{:tree}.has(prefix)) {
  89. var b = Num(contexts[k]{:tree}{prefix})
  90. if (b == ESCAPE_SYMBOL) {
  91. k -= 1
  92. escape = true
  93. }
  94. else {
  95. out << b
  96. for i in (max(k, 1) .. contexts.end) {
  97. contexts[i]{:freq}{b} := 0 ++
  98. contexts[i]{:tree} = mktree_from_freq(contexts[i]{:freq}){:tree}.flip
  99. }
  100. k = contexts.end
  101. escape = false
  102. }
  103. prefix = ''
  104. }
  105. escape || break
  106. k == 0 && break
  107. }
  108. }
  109. return out
  110. }
  111. var text = "this is an example for huffman encoding"
  112. var bytes = text.bytes
  113. say var enc = encode(bytes, bytes.uniq)
  114. say var dec = decode(enc, bytes.uniq).pack('C*')
  115. assert_eq(dec, text)