lzss_encoding_hash_table_fast.sf 2.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137
  1. #!/usr/bin/ruby
  2. # Author: Trizen
  3. # Date: 02 May 2024
  4. # Translated: 02 June 2024
  5. # https://github.com/trizen
  6. # Implementation of LZSS encoding, using an hash table.
  7. # A non-optimal, but very fast approach.
  8. func lzss_encode (str) {
  9. var la = 0
  10. var chars = str.chars
  11. var end = chars.end
  12. var min_len = 4 # minimum match length
  13. var max_len = 255 # maximum match length
  14. var max_dist = ((1 << 16) - 1) # maximum match distance
  15. var (*literals, *distances, *lengths, :table)
  16. while (la <= end) {
  17. var best_n = 1
  18. var best_p = la
  19. var lookahead = str.substr(la, min_len)
  20. if (table.has(lookahead) && (la - table{lookahead} <= max_dist)) {
  21. var p = table{lookahead}
  22. var n = min_len
  23. while ((n <= max_len) && (la+n <= end) && (chars[la + n - 1] == chars[p + n - 1])) {
  24. ++n
  25. }
  26. best_p = p
  27. best_n = n
  28. }
  29. table{lookahead} = la
  30. if (best_n > min_len) {
  31. lengths << (best_n - 1)
  32. distances << (la - best_p)
  33. literals << nil
  34. la += (best_n - 1)
  35. }
  36. else {
  37. lengths << best_n.of(0)...
  38. distances << best_n.of(0)...
  39. literals << chars[best_p .. (best_p + best_n - 1)]
  40. la += best_n
  41. }
  42. }
  43. return (literals, distances, lengths)
  44. }
  45. func lzss_decode (literals, distances, lengths) {
  46. var data = []
  47. var data_len = 0
  48. for i in (^lengths) {
  49. if (lengths[i] == 0) {
  50. data << literals[i]
  51. data_len += 1
  52. next
  53. }
  54. var length = lengths[i];
  55. var dist = distances[i];
  56. for j in (1 .. length) {
  57. data << data[data_len + j - dist - 1]
  58. }
  59. data_len += length
  60. }
  61. data.join
  62. }
  63. var string = "abbaabbaabaabaaaa"
  64. var (literals, distances, lengths) = lzss_encode(string)
  65. var decoded = lzss_decode(literals, distances, lengths)
  66. assert_eq(string, decoded)
  67. for i in (^literals) {
  68. if (lengths[i] == 0) {
  69. say literals[i]
  70. }
  71. else {
  72. say "[#{distances[i]}, #{lengths[i]}]"
  73. }
  74. }
  75. for file in ([__FILE__, $^PERL]) { # several tests
  76. var string = File(file).read(:raw)
  77. var (literals, distances, lengths) = lzss_encode(string)
  78. var decoded = lzss_decode(literals, distances, lengths)
  79. say("Ratio: ", literals.len / literals.grep{defined(_)}.len)
  80. assert_eq(string, decoded)
  81. }
  82. __END__
  83. a
  84. b
  85. b
  86. a
  87. [4, 6]
  88. a
  89. a
  90. b
  91. a
  92. a
  93. a
  94. a
  95. Ratio: 1.34062927496580027359781121751025991792065663475
  96. Ratio: 1.46043165467625899280575539568345323741007194245