lzw_file_compression_utf8.sf 2.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108
  1. #!/usr/bin/ruby
  2. # Encode and decode small files using LZW compression and UTF-8 encoding.
  3. # See also:
  4. # https://en.wikipedia.org/wiki/Lempel%E2%80%93Ziv%E2%80%93Welch
  5. # Compress a string to a list of output symbols.
  6. func compress(String uncompressed) -> String {
  7. var dict_size = 256
  8. var dictionary = Hash()
  9. ^dict_size -> each { |i|
  10. dictionary{i.chr} = i.chr
  11. }
  12. var w = ''
  13. var result = []
  14. func append_entry(w) {
  15. var v = dictionary{w}
  16. if (v.kind_of(Num)) {
  17. result.append(v.chr)
  18. }
  19. else {
  20. result.append(v)
  21. }
  22. }
  23. uncompressed.each { |c|
  24. var wc = w+c
  25. if (dictionary.has_key(wc)) {
  26. w = wc
  27. } else {
  28. append_entry(w)
  29. dictionary{wc} = dict_size
  30. dict_size++
  31. w = c
  32. }
  33. }
  34. # Output the code for w.
  35. if (w != '') {
  36. append_entry(w)
  37. }
  38. return result.join
  39. }
  40. # Decompress a list of output ks to a string.
  41. func decompress(String compressed) -> String {
  42. var dict_size = 256
  43. var dictionary = Hash()
  44. ^dict_size -> each { |i|
  45. dictionary{i.chr} = i.chr
  46. }
  47. compressed.chars!
  48. var w = compressed.shift
  49. var result = w
  50. compressed.each { |k|
  51. var entry
  52. if (dictionary.has_key(k)) {
  53. entry = dictionary{k}
  54. }
  55. elsif (k.ord == dict_size) {
  56. entry = w+w.substr(0,1)
  57. }
  58. else {
  59. die "Bad compressed k: #{k}"
  60. }
  61. result += entry
  62. dictionary{dict_size.chr} = w+entry.substr(0,1)
  63. dict_size++
  64. w = entry
  65. }
  66. return result
  67. }
  68. ARGV.getopt!('d', \var decode)
  69. var file = File(ARGV.shift) || do {
  70. say "usage: #{File(__MAIN__).basename} [-d] [input file]"
  71. Sys.exit(2)
  72. }
  73. if (decode || file.match(/\.lzw\.enc\z/)) {
  74. var compressed = file.read(:utf8)
  75. var decompressed = decompress(compressed)
  76. File("output.lzw.dec").write(decompressed, :raw)
  77. }
  78. else {
  79. var orig = file.read(:raw)
  80. var compressed = compress(orig)
  81. File("output.lzw.enc").write(compressed, :utf8)
  82. }