lz_decoder.h 5.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235
  1. ///////////////////////////////////////////////////////////////////////////////
  2. //
  3. /// \file lz_decoder.h
  4. /// \brief LZ out window
  5. ///
  6. // Authors: Igor Pavlov
  7. // Lasse Collin
  8. //
  9. // This file has been put into the public domain.
  10. // You can do whatever you want with this file.
  11. //
  12. ///////////////////////////////////////////////////////////////////////////////
  13. #ifndef LZMA_LZ_DECODER_H
  14. #define LZMA_LZ_DECODER_H
  15. #include "common.h"
  16. typedef struct {
  17. /// Pointer to the dictionary buffer. It can be an allocated buffer
  18. /// internal to liblzma, or it can a be a buffer given by the
  19. /// application when in single-call mode (not implemented yet).
  20. uint8_t *buf;
  21. /// Write position in dictionary. The next byte will be written to
  22. /// buf[pos].
  23. size_t pos;
  24. /// Indicates how full the dictionary is. This is used by
  25. /// dict_is_distance_valid() to detect corrupt files that would
  26. /// read beyond the beginning of the dictionary.
  27. size_t full;
  28. /// Write limit
  29. size_t limit;
  30. /// Size of the dictionary
  31. size_t size;
  32. /// True when dictionary should be reset before decoding more data.
  33. bool need_reset;
  34. } lzma_dict;
  35. typedef struct {
  36. size_t dict_size;
  37. const uint8_t *preset_dict;
  38. size_t preset_dict_size;
  39. } lzma_lz_options;
  40. typedef struct {
  41. /// Data specific to the LZ-based decoder
  42. void *coder;
  43. /// Function to decode from in[] to *dict
  44. lzma_ret (*code)(void *coder,
  45. lzma_dict *restrict dict, const uint8_t *restrict in,
  46. size_t *restrict in_pos, size_t in_size);
  47. void (*reset)(void *coder, const void *options);
  48. /// Set the uncompressed size
  49. void (*set_uncompressed)(void *coder, lzma_vli uncompressed_size);
  50. /// Free allocated resources
  51. void (*end)(void *coder, const lzma_allocator *allocator);
  52. } lzma_lz_decoder;
  53. #define LZMA_LZ_DECODER_INIT \
  54. (lzma_lz_decoder){ \
  55. .coder = NULL, \
  56. .code = NULL, \
  57. .reset = NULL, \
  58. .set_uncompressed = NULL, \
  59. .end = NULL, \
  60. }
  61. extern lzma_ret lzma_lz_decoder_init(lzma_next_coder *next,
  62. const lzma_allocator *allocator,
  63. const lzma_filter_info *filters,
  64. lzma_ret (*lz_init)(lzma_lz_decoder *lz,
  65. const lzma_allocator *allocator, const void *options,
  66. lzma_lz_options *lz_options));
  67. extern uint64_t lzma_lz_decoder_memusage(size_t dictionary_size);
  68. extern void lzma_lz_decoder_uncompressed(
  69. void *coder, lzma_vli uncompressed_size);
  70. //////////////////////
  71. // Inline functions //
  72. //////////////////////
  73. /// Get a byte from the history buffer.
  74. static inline uint8_t
  75. dict_get(const lzma_dict *const dict, const uint32_t distance)
  76. {
  77. return dict->buf[dict->pos - distance - 1
  78. + (distance < dict->pos ? 0 : dict->size)];
  79. }
  80. /// Test if dictionary is empty.
  81. static inline bool
  82. dict_is_empty(const lzma_dict *const dict)
  83. {
  84. return dict->full == 0;
  85. }
  86. /// Validate the match distance
  87. static inline bool
  88. dict_is_distance_valid(const lzma_dict *const dict, const size_t distance)
  89. {
  90. return dict->full > distance;
  91. }
  92. /// Repeat *len bytes at distance.
  93. static inline bool
  94. dict_repeat(lzma_dict *dict, uint32_t distance, uint32_t *len)
  95. {
  96. // Don't write past the end of the dictionary.
  97. const size_t dict_avail = dict->limit - dict->pos;
  98. uint32_t left = my_min(dict_avail, *len);
  99. *len -= left;
  100. // Repeat a block of data from the history. Because memcpy() is faster
  101. // than copying byte by byte in a loop, the copying process gets split
  102. // into three cases.
  103. if (distance < left) {
  104. // Source and target areas overlap, thus we can't use
  105. // memcpy() nor even memmove() safely.
  106. do {
  107. dict->buf[dict->pos] = dict_get(dict, distance);
  108. ++dict->pos;
  109. } while (--left > 0);
  110. } else if (distance < dict->pos) {
  111. // The easiest and fastest case
  112. memcpy(dict->buf + dict->pos,
  113. dict->buf + dict->pos - distance - 1,
  114. left);
  115. dict->pos += left;
  116. } else {
  117. // The bigger the dictionary, the more rare this
  118. // case occurs. We need to "wrap" the dict, thus
  119. // we might need two memcpy() to copy all the data.
  120. assert(dict->full == dict->size);
  121. const uint32_t copy_pos
  122. = dict->pos - distance - 1 + dict->size;
  123. uint32_t copy_size = dict->size - copy_pos;
  124. if (copy_size < left) {
  125. memmove(dict->buf + dict->pos, dict->buf + copy_pos,
  126. copy_size);
  127. dict->pos += copy_size;
  128. copy_size = left - copy_size;
  129. memcpy(dict->buf + dict->pos, dict->buf, copy_size);
  130. dict->pos += copy_size;
  131. } else {
  132. memmove(dict->buf + dict->pos, dict->buf + copy_pos,
  133. left);
  134. dict->pos += left;
  135. }
  136. }
  137. // Update how full the dictionary is.
  138. if (dict->full < dict->pos)
  139. dict->full = dict->pos;
  140. return unlikely(*len != 0);
  141. }
  142. /// Puts one byte into the dictionary. Returns true if the dictionary was
  143. /// already full and the byte couldn't be added.
  144. static inline bool
  145. dict_put(lzma_dict *dict, uint8_t byte)
  146. {
  147. if (unlikely(dict->pos == dict->limit))
  148. return true;
  149. dict->buf[dict->pos++] = byte;
  150. if (dict->pos > dict->full)
  151. dict->full = dict->pos;
  152. return false;
  153. }
  154. /// Copies arbitrary amount of data into the dictionary.
  155. static inline void
  156. dict_write(lzma_dict *restrict dict, const uint8_t *restrict in,
  157. size_t *restrict in_pos, size_t in_size,
  158. size_t *restrict left)
  159. {
  160. // NOTE: If we are being given more data than the size of the
  161. // dictionary, it could be possible to optimize the LZ decoder
  162. // so that not everything needs to go through the dictionary.
  163. // This shouldn't be very common thing in practice though, and
  164. // the slowdown of one extra memcpy() isn't bad compared to how
  165. // much time it would have taken if the data were compressed.
  166. if (in_size - *in_pos > *left)
  167. in_size = *in_pos + *left;
  168. *left -= lzma_bufcpy(in, in_pos, in_size,
  169. dict->buf, &dict->pos, dict->limit);
  170. if (dict->pos > dict->full)
  171. dict->full = dict->pos;
  172. return;
  173. }
  174. static inline void
  175. dict_reset(lzma_dict *dict)
  176. {
  177. dict->need_reset = true;
  178. return;
  179. }
  180. #endif