block_buffer_encoder.c 9.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338
  1. ///////////////////////////////////////////////////////////////////////////////
  2. //
  3. /// \file block_buffer_encoder.c
  4. /// \brief Single-call .xz Block encoder
  5. //
  6. // Author: Lasse Collin
  7. //
  8. // This file has been put into the public domain.
  9. // You can do whatever you want with this file.
  10. //
  11. ///////////////////////////////////////////////////////////////////////////////
  12. #include "block_buffer_encoder.h"
  13. #include "block_encoder.h"
  14. #include "filter_encoder.h"
  15. #include "lzma2_encoder.h"
  16. #include "check.h"
  17. /// Estimate the maximum size of the Block Header and Check fields for
  18. /// a Block that uses LZMA2 uncompressed chunks. We could use
  19. /// lzma_block_header_size() but this is simpler.
  20. ///
  21. /// Block Header Size + Block Flags + Compressed Size
  22. /// + Uncompressed Size + Filter Flags for LZMA2 + CRC32 + Check
  23. /// and round up to the next multiple of four to take Header Padding
  24. /// into account.
  25. #define HEADERS_BOUND ((1 + 1 + 2 * LZMA_VLI_BYTES_MAX + 3 + 4 \
  26. + LZMA_CHECK_SIZE_MAX + 3) & ~3)
  27. static uint64_t
  28. lzma2_bound(uint64_t uncompressed_size)
  29. {
  30. // Prevent integer overflow in overhead calculation.
  31. if (uncompressed_size > COMPRESSED_SIZE_MAX)
  32. return 0;
  33. // Calculate the exact overhead of the LZMA2 headers: Round
  34. // uncompressed_size up to the next multiple of LZMA2_CHUNK_MAX,
  35. // multiply by the size of per-chunk header, and add one byte for
  36. // the end marker.
  37. const uint64_t overhead = ((uncompressed_size + LZMA2_CHUNK_MAX - 1)
  38. / LZMA2_CHUNK_MAX)
  39. * LZMA2_HEADER_UNCOMPRESSED + 1;
  40. // Catch the possible integer overflow.
  41. if (COMPRESSED_SIZE_MAX - overhead < uncompressed_size)
  42. return 0;
  43. return uncompressed_size + overhead;
  44. }
  45. extern uint64_t
  46. lzma_block_buffer_bound64(uint64_t uncompressed_size)
  47. {
  48. // If the data doesn't compress, we always use uncompressed
  49. // LZMA2 chunks.
  50. uint64_t lzma2_size = lzma2_bound(uncompressed_size);
  51. if (lzma2_size == 0)
  52. return 0;
  53. // Take Block Padding into account.
  54. lzma2_size = (lzma2_size + 3) & ~UINT64_C(3);
  55. // No risk of integer overflow because lzma2_bound() already takes
  56. // into account the size of the headers in the Block.
  57. return HEADERS_BOUND + lzma2_size;
  58. }
  59. extern LZMA_API(size_t)
  60. lzma_block_buffer_bound(size_t uncompressed_size)
  61. {
  62. uint64_t ret = lzma_block_buffer_bound64(uncompressed_size);
  63. #if SIZE_MAX < UINT64_MAX
  64. // Catch the possible integer overflow on 32-bit systems.
  65. if (ret > SIZE_MAX)
  66. return 0;
  67. #endif
  68. return ret;
  69. }
  70. static lzma_ret
  71. block_encode_uncompressed(lzma_block *block, const uint8_t *in, size_t in_size,
  72. uint8_t *out, size_t *out_pos, size_t out_size)
  73. {
  74. // Use LZMA2 uncompressed chunks. We wouldn't need a dictionary at
  75. // all, but LZMA2 always requires a dictionary, so use the minimum
  76. // value to minimize memory usage of the decoder.
  77. lzma_options_lzma lzma2 = {
  78. .dict_size = LZMA_DICT_SIZE_MIN,
  79. };
  80. lzma_filter filters[2];
  81. filters[0].id = LZMA_FILTER_LZMA2;
  82. filters[0].options = &lzma2;
  83. filters[1].id = LZMA_VLI_UNKNOWN;
  84. // Set the above filter options to *block temporarily so that we can
  85. // encode the Block Header.
  86. lzma_filter *filters_orig = block->filters;
  87. block->filters = filters;
  88. if (lzma_block_header_size(block) != LZMA_OK) {
  89. block->filters = filters_orig;
  90. return LZMA_PROG_ERROR;
  91. }
  92. // Check that there's enough output space. The caller has already
  93. // set block->compressed_size to what lzma2_bound() has returned,
  94. // so we can reuse that value. We know that compressed_size is a
  95. // known valid VLI and header_size is a small value so their sum
  96. // will never overflow.
  97. assert(block->compressed_size == lzma2_bound(in_size));
  98. if (out_size - *out_pos
  99. < block->header_size + block->compressed_size) {
  100. block->filters = filters_orig;
  101. return LZMA_BUF_ERROR;
  102. }
  103. if (lzma_block_header_encode(block, out + *out_pos) != LZMA_OK) {
  104. block->filters = filters_orig;
  105. return LZMA_PROG_ERROR;
  106. }
  107. block->filters = filters_orig;
  108. *out_pos += block->header_size;
  109. // Encode the data using LZMA2 uncompressed chunks.
  110. size_t in_pos = 0;
  111. uint8_t control = 0x01; // Dictionary reset
  112. while (in_pos < in_size) {
  113. // Control byte: Indicate uncompressed chunk, of which
  114. // the first resets the dictionary.
  115. out[(*out_pos)++] = control;
  116. control = 0x02; // No dictionary reset
  117. // Size of the uncompressed chunk
  118. const size_t copy_size
  119. = my_min(in_size - in_pos, LZMA2_CHUNK_MAX);
  120. out[(*out_pos)++] = (copy_size - 1) >> 8;
  121. out[(*out_pos)++] = (copy_size - 1) & 0xFF;
  122. // The actual data
  123. assert(*out_pos + copy_size <= out_size);
  124. memcpy(out + *out_pos, in + in_pos, copy_size);
  125. in_pos += copy_size;
  126. *out_pos += copy_size;
  127. }
  128. // End marker
  129. out[(*out_pos)++] = 0x00;
  130. assert(*out_pos <= out_size);
  131. return LZMA_OK;
  132. }
  133. static lzma_ret
  134. block_encode_normal(lzma_block *block, const lzma_allocator *allocator,
  135. const uint8_t *in, size_t in_size,
  136. uint8_t *out, size_t *out_pos, size_t out_size)
  137. {
  138. // Find out the size of the Block Header.
  139. return_if_error(lzma_block_header_size(block));
  140. // Reserve space for the Block Header and skip it for now.
  141. if (out_size - *out_pos <= block->header_size)
  142. return LZMA_BUF_ERROR;
  143. const size_t out_start = *out_pos;
  144. *out_pos += block->header_size;
  145. // Limit out_size so that we stop encoding if the output would grow
  146. // bigger than what uncompressed Block would be.
  147. if (out_size - *out_pos > block->compressed_size)
  148. out_size = *out_pos + block->compressed_size;
  149. // TODO: In many common cases this could be optimized to use
  150. // significantly less memory.
  151. lzma_next_coder raw_encoder = LZMA_NEXT_CODER_INIT;
  152. lzma_ret ret = lzma_raw_encoder_init(
  153. &raw_encoder, allocator, block->filters);
  154. if (ret == LZMA_OK) {
  155. size_t in_pos = 0;
  156. ret = raw_encoder.code(raw_encoder.coder, allocator,
  157. in, &in_pos, in_size, out, out_pos, out_size,
  158. LZMA_FINISH);
  159. }
  160. // NOTE: This needs to be run even if lzma_raw_encoder_init() failed.
  161. lzma_next_end(&raw_encoder, allocator);
  162. if (ret == LZMA_STREAM_END) {
  163. // Compression was successful. Write the Block Header.
  164. block->compressed_size
  165. = *out_pos - (out_start + block->header_size);
  166. ret = lzma_block_header_encode(block, out + out_start);
  167. if (ret != LZMA_OK)
  168. ret = LZMA_PROG_ERROR;
  169. } else if (ret == LZMA_OK) {
  170. // Output buffer became full.
  171. ret = LZMA_BUF_ERROR;
  172. }
  173. // Reset *out_pos if something went wrong.
  174. if (ret != LZMA_OK)
  175. *out_pos = out_start;
  176. return ret;
  177. }
  178. static lzma_ret
  179. block_buffer_encode(lzma_block *block, const lzma_allocator *allocator,
  180. const uint8_t *in, size_t in_size,
  181. uint8_t *out, size_t *out_pos, size_t out_size,
  182. bool try_to_compress)
  183. {
  184. // Validate the arguments.
  185. if (block == NULL || (in == NULL && in_size != 0) || out == NULL
  186. || out_pos == NULL || *out_pos > out_size)
  187. return LZMA_PROG_ERROR;
  188. // The contents of the structure may depend on the version so
  189. // check the version before validating the contents of *block.
  190. if (block->version > 1)
  191. return LZMA_OPTIONS_ERROR;
  192. if ((unsigned int)(block->check) > LZMA_CHECK_ID_MAX
  193. || (try_to_compress && block->filters == NULL))
  194. return LZMA_PROG_ERROR;
  195. if (!lzma_check_is_supported(block->check))
  196. return LZMA_UNSUPPORTED_CHECK;
  197. // Size of a Block has to be a multiple of four, so limit the size
  198. // here already. This way we don't need to check it again when adding
  199. // Block Padding.
  200. out_size -= (out_size - *out_pos) & 3;
  201. // Get the size of the Check field.
  202. const size_t check_size = lzma_check_size(block->check);
  203. assert(check_size != UINT32_MAX);
  204. // Reserve space for the Check field.
  205. if (out_size - *out_pos <= check_size)
  206. return LZMA_BUF_ERROR;
  207. out_size -= check_size;
  208. // Initialize block->uncompressed_size and calculate the worst-case
  209. // value for block->compressed_size.
  210. block->uncompressed_size = in_size;
  211. block->compressed_size = lzma2_bound(in_size);
  212. if (block->compressed_size == 0)
  213. return LZMA_DATA_ERROR;
  214. // Do the actual compression.
  215. lzma_ret ret = LZMA_BUF_ERROR;
  216. if (try_to_compress)
  217. ret = block_encode_normal(block, allocator,
  218. in, in_size, out, out_pos, out_size);
  219. if (ret != LZMA_OK) {
  220. // If the error was something else than output buffer
  221. // becoming full, return the error now.
  222. if (ret != LZMA_BUF_ERROR)
  223. return ret;
  224. // The data was uncompressible (at least with the options
  225. // given to us) or the output buffer was too small. Use the
  226. // uncompressed chunks of LZMA2 to wrap the data into a valid
  227. // Block. If we haven't been given enough output space, even
  228. // this may fail.
  229. return_if_error(block_encode_uncompressed(block, in, in_size,
  230. out, out_pos, out_size));
  231. }
  232. assert(*out_pos <= out_size);
  233. // Block Padding. No buffer overflow here, because we already adjusted
  234. // out_size so that (out_size - out_start) is a multiple of four.
  235. // Thus, if the buffer is full, the loop body can never run.
  236. for (size_t i = (size_t)(block->compressed_size); i & 3; ++i) {
  237. assert(*out_pos < out_size);
  238. out[(*out_pos)++] = 0x00;
  239. }
  240. // If there's no Check field, we are done now.
  241. if (check_size > 0) {
  242. // Calculate the integrity check. We reserved space for
  243. // the Check field earlier so we don't need to check for
  244. // available output space here.
  245. lzma_check_state check;
  246. lzma_check_init(&check, block->check);
  247. lzma_check_update(&check, block->check, in, in_size);
  248. lzma_check_finish(&check, block->check);
  249. memcpy(block->raw_check, check.buffer.u8, check_size);
  250. memcpy(out + *out_pos, check.buffer.u8, check_size);
  251. *out_pos += check_size;
  252. }
  253. return LZMA_OK;
  254. }
  255. extern LZMA_API(lzma_ret)
  256. lzma_block_buffer_encode(lzma_block *block, const lzma_allocator *allocator,
  257. const uint8_t *in, size_t in_size,
  258. uint8_t *out, size_t *out_pos, size_t out_size)
  259. {
  260. return block_buffer_encode(block, allocator,
  261. in, in_size, out, out_pos, out_size, true);
  262. }
  263. extern LZMA_API(lzma_ret)
  264. lzma_block_uncomp_encode(lzma_block *block,
  265. const uint8_t *in, size_t in_size,
  266. uint8_t *out, size_t *out_pos, size_t out_size)
  267. {
  268. // It won't allocate any memory from heap so no need
  269. // for lzma_allocator.
  270. return block_buffer_encode(block, NULL,
  271. in, in_size, out, out_pos, out_size, false);
  272. }