123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522 |
- /* Copyright 2017 Google Inc. All Rights Reserved.
- Distributed under MIT license.
- See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
- */
- /* Shared Dictionary definition and utilities. */
- #include <brotli/shared_dictionary.h>
- #include <memory.h>
- #include <stdlib.h> /* malloc, free */
- #include <stdio.h>
- #include "dictionary.h"
- #include "platform.h"
- #include "shared_dictionary_internal.h"
- #if defined(__cplusplus) || defined(c_plusplus)
- extern "C" {
- #endif
- #if defined(BROTLI_EXPERIMENTAL)
- #define BROTLI_NUM_ENCODED_LENGTHS (SHARED_BROTLI_MAX_DICTIONARY_WORD_LENGTH \
- - SHARED_BROTLI_MIN_DICTIONARY_WORD_LENGTH + 1)
- /* Max allowed by spec */
- #define BROTLI_MAX_SIZE_BITS 15u
- /* Returns BROTLI_TRUE on success, BROTLI_FALSE on failure. */
- static BROTLI_BOOL ReadBool(const uint8_t* encoded, size_t size, size_t* pos,
- BROTLI_BOOL* result) {
- uint8_t value;
- size_t position = *pos;
- if (position >= size) return BROTLI_FALSE; /* past file end */
- value = encoded[position++];
- if (value > 1) return BROTLI_FALSE; /* invalid bool */
- *result = TO_BROTLI_BOOL(value);
- *pos = position;
- return BROTLI_TRUE; /* success */
- }
- /* Returns BROTLI_TRUE on success, BROTLI_FALSE on failure. */
- static BROTLI_BOOL ReadUint8(const uint8_t* encoded, size_t size, size_t* pos,
- uint8_t* result) {
- size_t position = *pos;
- if (position + sizeof(uint8_t) > size) return BROTLI_FALSE;
- *result = encoded[position++];
- *pos = position;
- return BROTLI_TRUE;
- }
- /* Returns BROTLI_TRUE on success, BROTLI_FALSE on failure. */
- static BROTLI_BOOL ReadUint16(const uint8_t* encoded, size_t size, size_t* pos,
- uint16_t* result) {
- size_t position = *pos;
- if (position + sizeof(uint16_t) > size) return BROTLI_FALSE;
- *result = BROTLI_UNALIGNED_LOAD16LE(&encoded[position]);
- position += 2;
- *pos = position;
- return BROTLI_TRUE;
- }
- /* Reads a varint into a uint32_t, and returns error if it's too large */
- /* Returns BROTLI_TRUE on success, BROTLI_FALSE on failure. */
- static BROTLI_BOOL ReadVarint32(const uint8_t* encoded, size_t size,
- size_t* pos, uint32_t* result) {
- int num = 0;
- uint8_t byte;
- *result = 0;
- for (;;) {
- if (*pos >= size) return BROTLI_FALSE;
- byte = encoded[(*pos)++];
- if (num == 4 && byte > 15) return BROTLI_FALSE;
- *result |= (uint32_t)(byte & 127) << (num * 7);
- if (byte < 128) return BROTLI_TRUE;
- num++;
- }
- }
- /* Returns the total length of word list. */
- static size_t BrotliSizeBitsToOffsets(const uint8_t* size_bits_by_length,
- uint32_t* offsets_by_length) {
- uint32_t pos = 0;
- uint32_t i;
- for (i = 0; i <= SHARED_BROTLI_MAX_DICTIONARY_WORD_LENGTH; i++) {
- offsets_by_length[i] = pos;
- if (size_bits_by_length[i] != 0) {
- pos += i << size_bits_by_length[i];
- }
- }
- return pos;
- }
- static BROTLI_BOOL ParseWordList(size_t size, const uint8_t* encoded,
- size_t* pos, BrotliDictionary* out) {
- size_t offset;
- size_t i;
- size_t position = *pos;
- if (position + BROTLI_NUM_ENCODED_LENGTHS > size) {
- return BROTLI_FALSE;
- }
- memset(out->size_bits_by_length, 0, SHARED_BROTLI_MIN_DICTIONARY_WORD_LENGTH);
- memcpy(out->size_bits_by_length + SHARED_BROTLI_MIN_DICTIONARY_WORD_LENGTH,
- &encoded[position], BROTLI_NUM_ENCODED_LENGTHS);
- for (i = SHARED_BROTLI_MIN_DICTIONARY_WORD_LENGTH;
- i <= SHARED_BROTLI_MAX_DICTIONARY_WORD_LENGTH; i++) {
- if (out->size_bits_by_length[i] > BROTLI_MAX_SIZE_BITS) {
- return BROTLI_FALSE;
- }
- }
- position += BROTLI_NUM_ENCODED_LENGTHS;
- offset = BrotliSizeBitsToOffsets(
- out->size_bits_by_length, out->offsets_by_length);
- out->data = &encoded[position];
- out->data_size = offset;
- position += offset;
- if (position > size) return BROTLI_FALSE;
- *pos = position;
- return BROTLI_TRUE;
- }
- /* Computes the cutOffTransforms of a BrotliTransforms which already has the
- transforms data correctly filled in. */
- static void ComputeCutoffTransforms(BrotliTransforms* transforms) {
- uint32_t i;
- for (i = 0; i < BROTLI_TRANSFORMS_MAX_CUT_OFF + 1; i++) {
- transforms->cutOffTransforms[i] = -1;
- }
- for (i = 0; i < transforms->num_transforms; i++) {
- const uint8_t* prefix = BROTLI_TRANSFORM_PREFIX(transforms, i);
- uint8_t type = BROTLI_TRANSFORM_TYPE(transforms, i);
- const uint8_t* suffix = BROTLI_TRANSFORM_SUFFIX(transforms, i);
- if (type <= BROTLI_TRANSFORM_OMIT_LAST_9 && *prefix == 0 && *suffix == 0 &&
- transforms->cutOffTransforms[type] == -1) {
- transforms->cutOffTransforms[type] = (int16_t)i;
- }
- }
- }
- static BROTLI_BOOL ParsePrefixSuffixTable(size_t size, const uint8_t* encoded,
- size_t* pos, BrotliTransforms* out, uint16_t* out_table,
- size_t* out_table_size) {
- size_t position = *pos;
- size_t offset = 0;
- size_t stringlet_count = 0; /* NUM_PREFIX_SUFFIX */
- size_t data_length = 0;
- /* PREFIX_SUFFIX_LENGTH */
- if (!ReadUint16(encoded, size, &position, &out->prefix_suffix_size)) {
- return BROTLI_FALSE;
- }
- data_length = out->prefix_suffix_size;
- /* Must at least have space for null terminator. */
- if (data_length < 1) return BROTLI_FALSE;
- out->prefix_suffix = &encoded[position];
- if (position + data_length >= size) return BROTLI_FALSE;
- while (BROTLI_TRUE) {
- /* STRING_LENGTH */
- size_t stringlet_len = encoded[position + offset];
- out_table[stringlet_count] = (uint16_t)offset;
- stringlet_count++;
- offset++;
- if (stringlet_len == 0) {
- if (offset == data_length) {
- break;
- } else {
- return BROTLI_FALSE;
- }
- }
- if (stringlet_count > 255) return BROTLI_FALSE;
- offset += stringlet_len;
- if (offset >= data_length) return BROTLI_FALSE;
- }
- position += data_length;
- *pos = position;
- *out_table_size = (uint16_t)stringlet_count;
- return BROTLI_TRUE;
- }
- static BROTLI_BOOL ParseTransformsList(size_t size, const uint8_t* encoded,
- size_t* pos, BrotliTransforms* out, uint16_t* prefix_suffix_table,
- size_t* prefix_suffix_count) {
- uint32_t i;
- BROTLI_BOOL has_params = BROTLI_FALSE;
- BROTLI_BOOL prefix_suffix_ok = BROTLI_FALSE;
- size_t position = *pos;
- size_t stringlet_cnt = 0;
- if (position >= size) return BROTLI_FALSE;
- prefix_suffix_ok = ParsePrefixSuffixTable(
- size, encoded, &position, out, prefix_suffix_table, &stringlet_cnt);
- if (!prefix_suffix_ok) return BROTLI_FALSE;
- out->prefix_suffix_map = prefix_suffix_table;
- *prefix_suffix_count = stringlet_cnt;
- out->num_transforms = encoded[position++];
- out->transforms = &encoded[position];
- position += (size_t)out->num_transforms * 3;
- if (position > size) return BROTLI_FALSE;
- /* Check for errors and read extra parameters. */
- for (i = 0; i < out->num_transforms; i++) {
- uint8_t prefix_id = BROTLI_TRANSFORM_PREFIX_ID(out, i);
- uint8_t type = BROTLI_TRANSFORM_TYPE(out, i);
- uint8_t suffix_id = BROTLI_TRANSFORM_SUFFIX_ID(out, i);
- if (prefix_id >= stringlet_cnt) return BROTLI_FALSE;
- if (type >= BROTLI_NUM_TRANSFORM_TYPES) return BROTLI_FALSE;
- if (suffix_id >= stringlet_cnt) return BROTLI_FALSE;
- if (type == BROTLI_TRANSFORM_SHIFT_FIRST ||
- type == BROTLI_TRANSFORM_SHIFT_ALL) {
- has_params = BROTLI_TRUE;
- }
- }
- if (has_params) {
- out->params = &encoded[position];
- position += (size_t)out->num_transforms * 2;
- if (position > size) return BROTLI_FALSE;
- for (i = 0; i < out->num_transforms; i++) {
- uint8_t type = BROTLI_TRANSFORM_TYPE(out, i);
- if (type != BROTLI_TRANSFORM_SHIFT_FIRST &&
- type != BROTLI_TRANSFORM_SHIFT_ALL) {
- if (out->params[i * 2] != 0 || out->params[i * 2 + 1] != 0) {
- return BROTLI_FALSE;
- }
- }
- }
- } else {
- out->params = NULL;
- }
- ComputeCutoffTransforms(out);
- *pos = position;
- return BROTLI_TRUE;
- }
- static BROTLI_BOOL DryParseDictionary(const uint8_t* encoded,
- size_t size, uint32_t* num_prefix, BROTLI_BOOL* is_custom_static_dict) {
- size_t pos = 0;
- uint32_t chunk_size = 0;
- uint8_t num_word_lists;
- uint8_t num_transform_lists;
- *is_custom_static_dict = BROTLI_FALSE;
- *num_prefix = 0;
- /* Skip magic header bytes. */
- pos += 2;
- /* LZ77_DICTIONARY_LENGTH */
- if (!ReadVarint32(encoded, size, &pos, &chunk_size)) return BROTLI_FALSE;
- if (chunk_size != 0) {
- /* This limitation is not specified but the 32-bit Brotli decoder for now */
- if (chunk_size > 1073741823) return BROTLI_FALSE;
- *num_prefix = 1;
- if (pos + chunk_size > size) return BROTLI_FALSE;
- pos += chunk_size;
- }
- if (!ReadUint8(encoded, size, &pos, &num_word_lists)) {
- return BROTLI_FALSE;
- }
- if (!ReadUint8(encoded, size, &pos, &num_transform_lists)) {
- return BROTLI_FALSE;
- }
- if (num_word_lists > 0 || num_transform_lists > 0) {
- *is_custom_static_dict = BROTLI_TRUE;
- }
- return BROTLI_TRUE;
- }
- static BROTLI_BOOL ParseDictionary(const uint8_t* encoded, size_t size,
- BrotliSharedDictionary* dict) {
- uint32_t i;
- size_t pos = 0;
- uint32_t chunk_size = 0;
- size_t total_prefix_suffix_count = 0;
- size_t trasform_list_start[SHARED_BROTLI_NUM_DICTIONARY_CONTEXTS];
- uint16_t temporary_prefix_suffix_table[256];
- /* Skip magic header bytes. */
- pos += 2;
- /* LZ77_DICTIONARY_LENGTH */
- if (!ReadVarint32(encoded, size, &pos, &chunk_size)) return BROTLI_FALSE;
- if (chunk_size != 0) {
- if (pos + chunk_size > size) return BROTLI_FALSE;
- dict->prefix_size[dict->num_prefix] = chunk_size;
- dict->prefix[dict->num_prefix] = &encoded[pos];
- dict->num_prefix++;
- /* LZ77_DICTIONARY_LENGTH bytes. */
- pos += chunk_size;
- }
- /* NUM_WORD_LISTS */
- if (!ReadUint8(encoded, size, &pos, &dict->num_word_lists)) {
- return BROTLI_FALSE;
- }
- if (dict->num_word_lists > SHARED_BROTLI_NUM_DICTIONARY_CONTEXTS) {
- return BROTLI_FALSE;
- }
- if (dict->num_word_lists != 0) {
- dict->words_instances = (BrotliDictionary*)dict->alloc_func(
- dict->memory_manager_opaque,
- dict->num_word_lists * sizeof(*dict->words_instances));
- if (!dict->words_instances) return BROTLI_FALSE; /* OOM */
- }
- for (i = 0; i < dict->num_word_lists; i++) {
- if (!ParseWordList(size, encoded, &pos, &dict->words_instances[i])) {
- return BROTLI_FALSE;
- }
- }
- /* NUM_TRANSFORM_LISTS */
- if (!ReadUint8(encoded, size, &pos, &dict->num_transform_lists)) {
- return BROTLI_FALSE;
- }
- if (dict->num_transform_lists > SHARED_BROTLI_NUM_DICTIONARY_CONTEXTS) {
- return BROTLI_FALSE;
- }
- if (dict->num_transform_lists != 0) {
- dict->transforms_instances = (BrotliTransforms*)dict->alloc_func(
- dict->memory_manager_opaque,
- dict->num_transform_lists * sizeof(*dict->transforms_instances));
- if (!dict->transforms_instances) return BROTLI_FALSE; /* OOM */
- }
- for (i = 0; i < dict->num_transform_lists; i++) {
- BROTLI_BOOL ok = BROTLI_FALSE;
- size_t prefix_suffix_count = 0;
- trasform_list_start[i] = pos;
- dict->transforms_instances[i].prefix_suffix_map =
- temporary_prefix_suffix_table;
- ok = ParseTransformsList(
- size, encoded, &pos, &dict->transforms_instances[i],
- temporary_prefix_suffix_table, &prefix_suffix_count);
- if (!ok) return BROTLI_FALSE;
- total_prefix_suffix_count += prefix_suffix_count;
- }
- if (total_prefix_suffix_count != 0) {
- dict->prefix_suffix_maps = (uint16_t*)dict->alloc_func(
- dict->memory_manager_opaque,
- total_prefix_suffix_count * sizeof(*dict->prefix_suffix_maps));
- if (!dict->prefix_suffix_maps) return BROTLI_FALSE; /* OOM */
- }
- total_prefix_suffix_count = 0;
- for (i = 0; i < dict->num_transform_lists; i++) {
- size_t prefix_suffix_count = 0;
- size_t position = trasform_list_start[i];
- uint16_t* prefix_suffix_map =
- &dict->prefix_suffix_maps[total_prefix_suffix_count];
- BROTLI_BOOL ok = ParsePrefixSuffixTable(
- size, encoded, &position, &dict->transforms_instances[i],
- prefix_suffix_map, &prefix_suffix_count);
- if (!ok) return BROTLI_FALSE;
- dict->transforms_instances[i].prefix_suffix_map = prefix_suffix_map;
- total_prefix_suffix_count += prefix_suffix_count;
- }
- if (dict->num_word_lists != 0 || dict->num_transform_lists != 0) {
- if (!ReadUint8(encoded, size, &pos, &dict->num_dictionaries)) {
- return BROTLI_FALSE;
- }
- if (dict->num_dictionaries == 0 ||
- dict->num_dictionaries > SHARED_BROTLI_NUM_DICTIONARY_CONTEXTS) {
- return BROTLI_FALSE;
- }
- for (i = 0; i < dict->num_dictionaries; i++) {
- uint8_t words_index;
- uint8_t transforms_index;
- if (!ReadUint8(encoded, size, &pos, &words_index)) {
- return BROTLI_FALSE;
- }
- if (words_index > dict->num_word_lists) return BROTLI_FALSE;
- if (!ReadUint8(encoded, size, &pos, &transforms_index)) {
- return BROTLI_FALSE;
- }
- if (transforms_index > dict->num_transform_lists) return BROTLI_FALSE;
- dict->words[i] = words_index == dict->num_word_lists ?
- BrotliGetDictionary() : &dict->words_instances[words_index];
- dict->transforms[i] = transforms_index == dict->num_transform_lists ?
- BrotliGetTransforms(): &dict->transforms_instances[transforms_index];
- }
- /* CONTEXT_ENABLED */
- if (!ReadBool(encoded, size, &pos, &dict->context_based)) {
- return BROTLI_FALSE;
- }
- /* CONTEXT_MAP */
- if (dict->context_based) {
- for (i = 0; i < SHARED_BROTLI_NUM_DICTIONARY_CONTEXTS; i++) {
- if (!ReadUint8(encoded, size, &pos, &dict->context_map[i])) {
- return BROTLI_FALSE;
- }
- if (dict->context_map[i] >= dict->num_dictionaries) {
- return BROTLI_FALSE;
- }
- }
- }
- } else {
- dict->context_based = BROTLI_FALSE;
- dict->num_dictionaries = 1;
- dict->words[0] = BrotliGetDictionary();
- dict->transforms[0] = BrotliGetTransforms();
- }
- return BROTLI_TRUE;
- }
- /* Decodes shared dictionary and verifies correctness.
- Returns BROTLI_TRUE if dictionary is valid, BROTLI_FALSE otherwise.
- The BrotliSharedDictionary must already have been initialized. If the
- BrotliSharedDictionary already contains data, compound dictionaries
- will be appended, but an error will be returned if it already has
- custom words or transforms.
- TODO(lode): link to RFC for shared brotli once published. */
- static BROTLI_BOOL DecodeSharedDictionary(
- const uint8_t* encoded, size_t size, BrotliSharedDictionary* dict) {
- uint32_t num_prefix = 0;
- BROTLI_BOOL is_custom_static_dict = BROTLI_FALSE;
- BROTLI_BOOL has_custom_static_dict =
- dict->num_word_lists > 0 || dict->num_transform_lists > 0;
- /* Check magic header bytes. */
- if (size < 2) return BROTLI_FALSE;
- if (encoded[0] != 0x91 || encoded[1] != 0) return BROTLI_FALSE;
- if (!DryParseDictionary(encoded, size, &num_prefix, &is_custom_static_dict)) {
- return BROTLI_FALSE;
- }
- if (num_prefix + dict->num_prefix > SHARED_BROTLI_MAX_COMPOUND_DICTS) {
- return BROTLI_FALSE;
- }
- /* Cannot combine different static dictionaries, only prefix dictionaries */
- if (has_custom_static_dict && is_custom_static_dict) return BROTLI_FALSE;
- return ParseDictionary(encoded, size, dict);
- }
- #endif /* BROTLI_EXPERIMENTAL */
- void BrotliSharedDictionaryDestroyInstance(
- BrotliSharedDictionary* dict) {
- if (!dict) {
- return;
- } else {
- brotli_free_func free_func = dict->free_func;
- void* opaque = dict->memory_manager_opaque;
- /* Cleanup. */
- free_func(opaque, dict->words_instances);
- free_func(opaque, dict->transforms_instances);
- free_func(opaque, dict->prefix_suffix_maps);
- /* Self-destruction. */
- free_func(opaque, dict);
- }
- }
- BROTLI_BOOL BrotliSharedDictionaryAttach(
- BrotliSharedDictionary* dict, BrotliSharedDictionaryType type,
- size_t data_size, const uint8_t data[BROTLI_ARRAY_PARAM(data_size)]) {
- if (!dict) {
- return BROTLI_FALSE;
- }
- #if defined(BROTLI_EXPERIMENTAL)
- if (type == BROTLI_SHARED_DICTIONARY_SERIALIZED) {
- return DecodeSharedDictionary(data, data_size, dict);
- }
- #endif /* BROTLI_EXPERIMENTAL */
- if (type == BROTLI_SHARED_DICTIONARY_RAW) {
- if (dict->num_prefix >= SHARED_BROTLI_MAX_COMPOUND_DICTS) {
- return BROTLI_FALSE;
- }
- dict->prefix_size[dict->num_prefix] = data_size;
- dict->prefix[dict->num_prefix] = data;
- dict->num_prefix++;
- return BROTLI_TRUE;
- }
- return BROTLI_FALSE;
- }
- BrotliSharedDictionary* BrotliSharedDictionaryCreateInstance(
- brotli_alloc_func alloc_func, brotli_free_func free_func, void* opaque) {
- BrotliSharedDictionary* dict = 0;
- if (!alloc_func && !free_func) {
- dict = (BrotliSharedDictionary*)malloc(sizeof(BrotliSharedDictionary));
- } else if (alloc_func && free_func) {
- dict = (BrotliSharedDictionary*)alloc_func(
- opaque, sizeof(BrotliSharedDictionary));
- }
- if (dict == 0) {
- return 0;
- }
- /* TODO(eustas): explicitly initialize all the fields? */
- memset(dict, 0, sizeof(BrotliSharedDictionary));
- dict->context_based = BROTLI_FALSE;
- dict->num_dictionaries = 1;
- dict->num_word_lists = 0;
- dict->num_transform_lists = 0;
- dict->words[0] = BrotliGetDictionary();
- dict->transforms[0] = BrotliGetTransforms();
- dict->alloc_func = alloc_func ? alloc_func : BrotliDefaultAllocFunc;
- dict->free_func = free_func ? free_func : BrotliDefaultFreeFunc;
- dict->memory_manager_opaque = opaque;
- return dict;
- }
- #if defined(__cplusplus) || defined(c_plusplus)
- } /* extern "C" */
- #endif
|