lz4_decompressor.sf 4.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162
  1. #!/usr/bin/ruby
  2. # Author: Trizen
  3. # Date: 08 July 2024
  4. # https://github.com/trizen
  5. # A simple LZ4 decompressor.
  6. # References:
  7. # https://github.com/lz4/lz4/blob/dev/doc/lz4_Frame_format.md
  8. # https://github.com/lz4/lz4/blob/dev/doc/lz4_Block_format.md
  9. var file = ARGV[0] \\ die "usage: #{__MAIN__} [file.lz4]\n";
  10. var fh = File(file).open('<:raw') || die "Can't open file <<#{file}>> for reading: #{$!}";
  11. func bytes2int_lsb(fh, n) {
  12. Num(unpack('b*', n.of{fh.getc}.join).flip, 2)
  13. }
  14. while (!fh.eof) {
  15. bytes2int_lsb(fh, 4) == 0x184D2204 || die "Not an LZ4 file\n";
  16. var FLG = fh.getc.ord
  17. var BD = fh.getc.ord
  18. var version = (FLG & 0b11_00_00_00)
  19. var B_indep = (FLG & 0b00_10_00_00)
  20. var B_checksum = (FLG & 0b00_01_00_00)
  21. var C_size = (FLG & 0b00_00_10_00)
  22. var C_checksum = (FLG & 0b00_00_01_00)
  23. var DictID = (FLG & 0b00_00_00_01)
  24. var Block_MaxSize = (BD & 0b0_111_0000)
  25. STDERR.say("Block max size: ", Block_MaxSize)
  26. if (version != 0b01_00_00_00) {
  27. die "Error: Invalid version number";
  28. }
  29. if (C_size) {
  30. var content_size = bytes2int_lsb(fh, 8)
  31. STDERR.say("Content size: ", content_size)
  32. }
  33. if (DictID) {
  34. var dict_id = bytes2int_lsb(fh, 4);
  35. STDERR.say("Dictionary ID: ", dict_id)
  36. }
  37. var header_checksum = fh.getc.ord
  38. STDERR.say("Header checksum: ", header_checksum)
  39. var decoded = FileHandle.new_buf(:raw)
  40. STDOUT.binmode(:raw)
  41. while (!fh.eof) {
  42. var block_size = bytes2int_lsb(fh, 4)
  43. if (block_size == 0x00000000) { # signifies an EndMark
  44. STDERR.say("Block size == 0")
  45. break
  46. }
  47. STDERR.say("Block size: #{block_size}")
  48. if (block_size >> 31) {
  49. STDERR.say("Highest bit set: ", block_size)
  50. block_size &= ((1 << 31) - 1)
  51. STDERR.say("Block size: ", block_size)
  52. var uncompressed = ''
  53. fh.read(\uncompressed, block_size)
  54. decoded << uncompressed
  55. }
  56. else {
  57. var compressed = ''
  58. fh.read(\compressed, block_size)
  59. var block_fh = compressed.open_r(:raw)
  60. while (!block_fh.eof) {
  61. var len_byte = block_fh.getc.ord
  62. var literals_length = (len_byte >> 4)
  63. var match_len = (len_byte & 0b1111)
  64. if (literals_length == 15) {
  65. loop {
  66. var byte_len = block_fh.getc.ord
  67. literals_length += byte_len
  68. break if (byte_len != 255)
  69. }
  70. }
  71. var literals = ''
  72. if (literals_length > 0) {
  73. block_fh.read(\literals, literals_length)
  74. }
  75. if (block_fh.eof) { # end of block
  76. decoded << literals
  77. break
  78. }
  79. var offset = bytes2int_lsb(block_fh, 2)
  80. if (offset == 0) {
  81. die "Corrupted block";
  82. }
  83. if (match_len == 15) {
  84. loop {
  85. var byte_len = block_fh.getc.ord
  86. match_len += byte_len
  87. break if (byte_len != 255)
  88. }
  89. }
  90. decoded << literals
  91. match_len += 4
  92. if (offset >= match_len) { # non-overlapping matches
  93. decoded << decoded.parent.substr(decoded.parent.length - offset, match_len)
  94. }
  95. elsif (offset == 1) {
  96. decoded << (decoded.parent.last * match_len)
  97. }
  98. else { # overlapping matches
  99. for i in (1 .. match_len) {
  100. decoded << decoded.parent.substr(decoded.parent.length - offset, 1)
  101. }
  102. }
  103. }
  104. }
  105. if (B_checksum) {
  106. var content_checksum = bytes2int_lsb(fh, 4)
  107. STDERR.say("Block checksum: #{content_checksum}")
  108. }
  109. if (B_indep) { # blocks are independent of each other
  110. STDOUT.print(decoded.parent)
  111. decoded = FileHandle.new_buf(:raw)
  112. }
  113. elsif (decoded.parent.length > 2**16) { # blocks are dependent
  114. STDOUT.print(decoded.parent.substr(0, -(2**16)))
  115. decoded = FileHandle.new_buf(:raw, decoded.parent.substr(-(2**16)))
  116. }
  117. }
  118. if (C_checksum) {
  119. var content_checksum = bytes2int_lsb(fh, 4)
  120. STDERR.say("Content checksum: #{content_checksum}")
  121. }
  122. STDOUT.print(decoded.parent)
  123. }