index_decoder.c 8.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353
  1. ///////////////////////////////////////////////////////////////////////////////
  2. //
  3. /// \file index_decoder.c
  4. /// \brief Decodes the Index field
  5. //
  6. // Author: Lasse Collin
  7. //
  8. // This file has been put into the public domain.
  9. // You can do whatever you want with this file.
  10. //
  11. ///////////////////////////////////////////////////////////////////////////////
  12. #include "index.h"
  13. #include "check.h"
  14. typedef struct {
  15. enum {
  16. SEQ_INDICATOR,
  17. SEQ_COUNT,
  18. SEQ_MEMUSAGE,
  19. SEQ_UNPADDED,
  20. SEQ_UNCOMPRESSED,
  21. SEQ_PADDING_INIT,
  22. SEQ_PADDING,
  23. SEQ_CRC32,
  24. } sequence;
  25. /// Memory usage limit
  26. uint64_t memlimit;
  27. /// Target Index
  28. lzma_index *index;
  29. /// Pointer give by the application, which is set after
  30. /// successful decoding.
  31. lzma_index **index_ptr;
  32. /// Number of Records left to decode.
  33. lzma_vli count;
  34. /// The most recent Unpadded Size field
  35. lzma_vli unpadded_size;
  36. /// The most recent Uncompressed Size field
  37. lzma_vli uncompressed_size;
  38. /// Position in integers
  39. size_t pos;
  40. /// CRC32 of the List of Records field
  41. uint32_t crc32;
  42. } lzma_index_coder;
  43. static lzma_ret
  44. index_decode(void *coder_ptr, const lzma_allocator *allocator,
  45. const uint8_t *restrict in, size_t *restrict in_pos,
  46. size_t in_size,
  47. uint8_t *restrict out lzma_attribute((__unused__)),
  48. size_t *restrict out_pos lzma_attribute((__unused__)),
  49. size_t out_size lzma_attribute((__unused__)),
  50. lzma_action action lzma_attribute((__unused__)))
  51. {
  52. lzma_index_coder *coder = coder_ptr;
  53. // Similar optimization as in index_encoder.c
  54. const size_t in_start = *in_pos;
  55. lzma_ret ret = LZMA_OK;
  56. while (*in_pos < in_size)
  57. switch (coder->sequence) {
  58. case SEQ_INDICATOR:
  59. // Return LZMA_DATA_ERROR instead of e.g. LZMA_PROG_ERROR or
  60. // LZMA_FORMAT_ERROR, because a typical usage case for Index
  61. // decoder is when parsing the Stream backwards. If seeking
  62. // backward from the Stream Footer gives us something that
  63. // doesn't begin with Index Indicator, the file is considered
  64. // corrupt, not "programming error" or "unrecognized file
  65. // format". One could argue that the application should
  66. // verify the Index Indicator before trying to decode the
  67. // Index, but well, I suppose it is simpler this way.
  68. if (in[(*in_pos)++] != 0x00)
  69. return LZMA_DATA_ERROR;
  70. coder->sequence = SEQ_COUNT;
  71. break;
  72. case SEQ_COUNT:
  73. ret = lzma_vli_decode(&coder->count, &coder->pos,
  74. in, in_pos, in_size);
  75. if (ret != LZMA_STREAM_END)
  76. goto out;
  77. coder->pos = 0;
  78. coder->sequence = SEQ_MEMUSAGE;
  79. // Fall through
  80. case SEQ_MEMUSAGE:
  81. if (lzma_index_memusage(1, coder->count) > coder->memlimit) {
  82. ret = LZMA_MEMLIMIT_ERROR;
  83. goto out;
  84. }
  85. // Tell the Index handling code how many Records this
  86. // Index has to allow it to allocate memory more efficiently.
  87. lzma_index_prealloc(coder->index, coder->count);
  88. ret = LZMA_OK;
  89. coder->sequence = coder->count == 0
  90. ? SEQ_PADDING_INIT : SEQ_UNPADDED;
  91. break;
  92. case SEQ_UNPADDED:
  93. case SEQ_UNCOMPRESSED: {
  94. lzma_vli *size = coder->sequence == SEQ_UNPADDED
  95. ? &coder->unpadded_size
  96. : &coder->uncompressed_size;
  97. ret = lzma_vli_decode(size, &coder->pos,
  98. in, in_pos, in_size);
  99. if (ret != LZMA_STREAM_END)
  100. goto out;
  101. ret = LZMA_OK;
  102. coder->pos = 0;
  103. if (coder->sequence == SEQ_UNPADDED) {
  104. // Validate that encoded Unpadded Size isn't too small
  105. // or too big.
  106. if (coder->unpadded_size < UNPADDED_SIZE_MIN
  107. || coder->unpadded_size
  108. > UNPADDED_SIZE_MAX)
  109. return LZMA_DATA_ERROR;
  110. coder->sequence = SEQ_UNCOMPRESSED;
  111. } else {
  112. // Add the decoded Record to the Index.
  113. return_if_error(lzma_index_append(
  114. coder->index, allocator,
  115. coder->unpadded_size,
  116. coder->uncompressed_size));
  117. // Check if this was the last Record.
  118. coder->sequence = --coder->count == 0
  119. ? SEQ_PADDING_INIT
  120. : SEQ_UNPADDED;
  121. }
  122. break;
  123. }
  124. case SEQ_PADDING_INIT:
  125. coder->pos = lzma_index_padding_size(coder->index);
  126. coder->sequence = SEQ_PADDING;
  127. // Fall through
  128. case SEQ_PADDING:
  129. if (coder->pos > 0) {
  130. --coder->pos;
  131. if (in[(*in_pos)++] != 0x00)
  132. return LZMA_DATA_ERROR;
  133. break;
  134. }
  135. // Finish the CRC32 calculation.
  136. coder->crc32 = lzma_crc32(in + in_start,
  137. *in_pos - in_start, coder->crc32);
  138. coder->sequence = SEQ_CRC32;
  139. // Fall through
  140. case SEQ_CRC32:
  141. do {
  142. if (*in_pos == in_size)
  143. return LZMA_OK;
  144. if (((coder->crc32 >> (coder->pos * 8)) & 0xFF)
  145. != in[(*in_pos)++])
  146. return LZMA_DATA_ERROR;
  147. } while (++coder->pos < 4);
  148. // Decoding was successful, now we can let the application
  149. // see the decoded Index.
  150. *coder->index_ptr = coder->index;
  151. // Make index NULL so we don't free it unintentionally.
  152. coder->index = NULL;
  153. return LZMA_STREAM_END;
  154. default:
  155. assert(0);
  156. return LZMA_PROG_ERROR;
  157. }
  158. out:
  159. // Update the CRC32,
  160. coder->crc32 = lzma_crc32(in + in_start,
  161. *in_pos - in_start, coder->crc32);
  162. return ret;
  163. }
  164. static void
  165. index_decoder_end(void *coder_ptr, const lzma_allocator *allocator)
  166. {
  167. lzma_index_coder *coder = coder_ptr;
  168. lzma_index_end(coder->index, allocator);
  169. lzma_free(coder, allocator);
  170. return;
  171. }
  172. static lzma_ret
  173. index_decoder_memconfig(void *coder_ptr, uint64_t *memusage,
  174. uint64_t *old_memlimit, uint64_t new_memlimit)
  175. {
  176. lzma_index_coder *coder = coder_ptr;
  177. *memusage = lzma_index_memusage(1, coder->count);
  178. *old_memlimit = coder->memlimit;
  179. if (new_memlimit != 0) {
  180. if (new_memlimit < *memusage)
  181. return LZMA_MEMLIMIT_ERROR;
  182. coder->memlimit = new_memlimit;
  183. }
  184. return LZMA_OK;
  185. }
  186. static lzma_ret
  187. index_decoder_reset(lzma_index_coder *coder, const lzma_allocator *allocator,
  188. lzma_index **i, uint64_t memlimit)
  189. {
  190. // Remember the pointer given by the application. We will set it
  191. // to point to the decoded Index only if decoding is successful.
  192. // Before that, keep it NULL so that applications can always safely
  193. // pass it to lzma_index_end() no matter did decoding succeed or not.
  194. coder->index_ptr = i;
  195. *i = NULL;
  196. // We always allocate a new lzma_index.
  197. coder->index = lzma_index_init(allocator);
  198. if (coder->index == NULL)
  199. return LZMA_MEM_ERROR;
  200. // Initialize the rest.
  201. coder->sequence = SEQ_INDICATOR;
  202. coder->memlimit = my_max(1, memlimit);
  203. coder->count = 0; // Needs to be initialized due to _memconfig().
  204. coder->pos = 0;
  205. coder->crc32 = 0;
  206. return LZMA_OK;
  207. }
  208. static lzma_ret
  209. index_decoder_init(lzma_next_coder *next, const lzma_allocator *allocator,
  210. lzma_index **i, uint64_t memlimit)
  211. {
  212. lzma_next_coder_init(&index_decoder_init, next, allocator);
  213. if (i == NULL)
  214. return LZMA_PROG_ERROR;
  215. lzma_index_coder *coder = next->coder;
  216. if (coder == NULL) {
  217. coder = lzma_alloc(sizeof(lzma_index_coder), allocator);
  218. if (coder == NULL)
  219. return LZMA_MEM_ERROR;
  220. next->coder = coder;
  221. next->code = &index_decode;
  222. next->end = &index_decoder_end;
  223. next->memconfig = &index_decoder_memconfig;
  224. coder->index = NULL;
  225. } else {
  226. lzma_index_end(coder->index, allocator);
  227. }
  228. return index_decoder_reset(coder, allocator, i, memlimit);
  229. }
  230. extern LZMA_API(lzma_ret)
  231. lzma_index_decoder(lzma_stream *strm, lzma_index **i, uint64_t memlimit)
  232. {
  233. lzma_next_strm_init(index_decoder_init, strm, i, memlimit);
  234. strm->internal->supported_actions[LZMA_RUN] = true;
  235. strm->internal->supported_actions[LZMA_FINISH] = true;
  236. return LZMA_OK;
  237. }
  238. extern LZMA_API(lzma_ret)
  239. lzma_index_buffer_decode(lzma_index **i, uint64_t *memlimit,
  240. const lzma_allocator *allocator,
  241. const uint8_t *in, size_t *in_pos, size_t in_size)
  242. {
  243. // Sanity checks
  244. if (i == NULL || memlimit == NULL
  245. || in == NULL || in_pos == NULL || *in_pos > in_size)
  246. return LZMA_PROG_ERROR;
  247. // Initialize the decoder.
  248. lzma_index_coder coder;
  249. return_if_error(index_decoder_reset(&coder, allocator, i, *memlimit));
  250. // Store the input start position so that we can restore it in case
  251. // of an error.
  252. const size_t in_start = *in_pos;
  253. // Do the actual decoding.
  254. lzma_ret ret = index_decode(&coder, allocator, in, in_pos, in_size,
  255. NULL, NULL, 0, LZMA_RUN);
  256. if (ret == LZMA_STREAM_END) {
  257. ret = LZMA_OK;
  258. } else {
  259. // Something went wrong, free the Index structure and restore
  260. // the input position.
  261. lzma_index_end(coder.index, allocator);
  262. *in_pos = in_start;
  263. if (ret == LZMA_OK) {
  264. // The input is truncated or otherwise corrupt.
  265. // Use LZMA_DATA_ERROR instead of LZMA_BUF_ERROR
  266. // like lzma_vli_decode() does in single-call mode.
  267. ret = LZMA_DATA_ERROR;
  268. } else if (ret == LZMA_MEMLIMIT_ERROR) {
  269. // Tell the caller how much memory would have
  270. // been needed.
  271. *memlimit = lzma_index_memusage(1, coder.count);
  272. }
  273. }
  274. return ret;
  275. }