123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516 |
- /********************************************************************
- * *
- * THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE. *
- * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS *
- * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE *
- * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING. *
- * *
- * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2009 *
- * by the Xiph.Org Foundation and contributors http://www.xiph.org/ *
- * *
- ********************************************************************
- function:
- last mod: $Id$
- ********************************************************************/
- #include <stdlib.h>
- #include <string.h>
- #include <ogg/ogg.h>
- #include "huffdec.h"
- #include "decint.h"
- /*Instead of storing every branching in the tree, subtrees can be collapsed
- into one node, with a table of size 1<<nbits pointing directly to its
- descedents nbits levels down.
- This allows more than one bit to be read at a time, and avoids following all
- the intermediate branches with next to no increased code complexity once
- the collapsed tree has been built.
- We do _not_ require that a subtree be complete to be collapsed, but instead
- store duplicate pointers in the table, and record the actual depth of the
- node below its parent.
- This tells us the number of bits to advance the stream after reaching it.
- This turns out to be equivalent to the method described in \cite{Hash95},
- without the requirement that codewords be sorted by length.
- If the codewords were sorted by length (so-called ``canonical-codes''), they
- could be decoded much faster via either Lindell and Moffat's approach or
- Hashemian's Condensed Huffman Code approach, the latter of which has an
- extremely small memory footprint.
- We can't use Choueka et al.'s finite state machine approach, which is
- extremely fast, because we can't allow multiple symbols to be output at a
- time; the codebook can and does change between symbols.
- It also has very large memory requirements, which impairs cache coherency.
- We store the tree packed in an array of 16-bit integers (words).
- Each node consists of a single word, followed consecutively by two or more
- indices of its children.
- Let n be the value of this first word.
- This is the number of bits that need to be read to traverse the node, and
- must be positive.
- 1<<n entries follow in the array, each an index to a child node.
- If the child is positive, then it is the index of another internal node in
- the table.
- If the child is negative or zero, then it is a leaf node.
- These are stored directly in the child pointer to save space, since they only
- require a single word.
- If a leaf node would have been encountered before reading n bits, then it is
- duplicated the necessary number of times in this table.
- Leaf nodes pack both a token value and their actual depth in the tree.
- The token in the leaf node is (-leaf&255).
- The number of bits that need to be consumed to reach the leaf, starting from
- the current node, is (-leaf>>8).
- @ARTICLE{Hash95,
- author="Reza Hashemian",
- title="Memory Efficient and High-Speed Search {Huffman} Coding",
- journal="{IEEE} Transactions on Communications",
- volume=43,
- number=10,
- pages="2576--2581",
- month=Oct,
- year=1995
- }*/
- /*The map from external spec-defined tokens to internal tokens.
- This is constructed so that any extra bits read with the original token value
- can be masked off the least significant bits of its internal token index.
- In addition, all of the tokens which require additional extra bits are placed
- at the start of the list, and grouped by type.
- OC_DCT_REPEAT_RUN3_TOKEN is placed first, as it is an extra-special case, so
- giving it index 0 may simplify comparisons on some architectures.
- These requirements require some substantial reordering.*/
- static const unsigned char OC_DCT_TOKEN_MAP[TH_NDCT_TOKENS]={
- /*OC_DCT_EOB1_TOKEN (0 extra bits)*/
- 15,
- /*OC_DCT_EOB2_TOKEN (0 extra bits)*/
- 16,
- /*OC_DCT_EOB3_TOKEN (0 extra bits)*/
- 17,
- /*OC_DCT_REPEAT_RUN0_TOKEN (2 extra bits)*/
- 88,
- /*OC_DCT_REPEAT_RUN1_TOKEN (3 extra bits)*/
- 80,
- /*OC_DCT_REPEAT_RUN2_TOKEN (4 extra bits)*/
- 1,
- /*OC_DCT_REPEAT_RUN3_TOKEN (12 extra bits)*/
- 0,
- /*OC_DCT_SHORT_ZRL_TOKEN (3 extra bits)*/
- 48,
- /*OC_DCT_ZRL_TOKEN (6 extra bits)*/
- 14,
- /*OC_ONE_TOKEN (0 extra bits)*/
- 56,
- /*OC_MINUS_ONE_TOKEN (0 extra bits)*/
- 57,
- /*OC_TWO_TOKEN (0 extra bits)*/
- 58,
- /*OC_MINUS_TWO_TOKEN (0 extra bits)*/
- 59,
- /*OC_DCT_VAL_CAT2 (1 extra bit)*/
- 60,
- 62,
- 64,
- 66,
- /*OC_DCT_VAL_CAT3 (2 extra bits)*/
- 68,
- /*OC_DCT_VAL_CAT4 (3 extra bits)*/
- 72,
- /*OC_DCT_VAL_CAT5 (4 extra bits)*/
- 2,
- /*OC_DCT_VAL_CAT6 (5 extra bits)*/
- 4,
- /*OC_DCT_VAL_CAT7 (6 extra bits)*/
- 6,
- /*OC_DCT_VAL_CAT8 (10 extra bits)*/
- 8,
- /*OC_DCT_RUN_CAT1A (1 extra bit)*/
- 18,
- 20,
- 22,
- 24,
- 26,
- /*OC_DCT_RUN_CAT1B (3 extra bits)*/
- 32,
- /*OC_DCT_RUN_CAT1C (4 extra bits)*/
- 12,
- /*OC_DCT_RUN_CAT2A (2 extra bits)*/
- 28,
- /*OC_DCT_RUN_CAT2B (3 extra bits)*/
- 40
- };
- /*The log base 2 of number of internal tokens associated with each of the spec
- tokens (i.e., how many of the extra bits are folded into the token value).
- Increasing the maximum value beyond 3 will enlarge the amount of stack
- required for tree construction.*/
- static const unsigned char OC_DCT_TOKEN_MAP_LOG_NENTRIES[TH_NDCT_TOKENS]={
- 0,0,0,2,3,0,0,3,0,0,0,0,0,1,1,1,1,2,3,1,1,1,2,1,1,1,1,1,3,1,2,3
- };
- /*The size a lookup table is allowed to grow to relative to the number of
- unique nodes it contains.
- E.g., if OC_HUFF_SLUSH is 4, then at most 75% of the space in the tree is
- wasted (1/4 of the space must be used).
- Larger numbers can decode tokens with fewer read operations, while smaller
- numbers may save more space.
- With a sample file:
- 32233473 read calls are required when no tree collapsing is done (100.0%).
- 19269269 read calls are required when OC_HUFF_SLUSH is 1 (59.8%).
- 11144969 read calls are required when OC_HUFF_SLUSH is 2 (34.6%).
- 10538563 read calls are required when OC_HUFF_SLUSH is 4 (32.7%).
- 10192578 read calls are required when OC_HUFF_SLUSH is 8 (31.6%).
- Since a value of 2 gets us the vast majority of the speed-up with only a
- small amount of wasted memory, this is what we use.
- This value must be less than 128, or you could create a tree with more than
- 32767 entries, which would overflow the 16-bit words used to index it.*/
- #define OC_HUFF_SLUSH (2)
- /*The root of the tree is on the fast path, and a larger value here is more
- beneficial than elsewhere in the tree.
- 7 appears to give the best performance, trading off between increased use of
- the single-read fast path and cache footprint for the tables, though
- obviously this will depend on your cache size.
- Using 7 here, the VP3 tables are about twice as large compared to using 2.*/
- #define OC_ROOT_HUFF_SLUSH (7)
- /*Unpacks a Huffman codebook.
- _opb: The buffer to unpack from.
- _tokens: Stores a list of internal tokens, in the order they were found in
- the codebook, and the lengths of their corresponding codewords.
- This is enough to completely define the codebook, while minimizing
- stack usage and avoiding temporary allocations (for platforms
- where free() is a no-op).
- Return: The number of internal tokens in the codebook, or a negative value
- on error.*/
- int oc_huff_tree_unpack(oc_pack_buf *_opb,unsigned char _tokens[256][2]){
- ogg_uint32_t code;
- int len;
- int ntokens;
- int nleaves;
- code=0;
- len=ntokens=nleaves=0;
- for(;;){
- long bits;
- bits=oc_pack_read1(_opb);
- /*Only process nodes so long as there's more bits in the buffer.*/
- if(oc_pack_bytes_left(_opb)<0)return TH_EBADHEADER;
- /*Read an internal node:*/
- if(!bits){
- len++;
- /*Don't allow codewords longer than 32 bits.*/
- if(len>32)return TH_EBADHEADER;
- }
- /*Read a leaf node:*/
- else{
- ogg_uint32_t code_bit;
- int neb;
- int nentries;
- int token;
- /*Don't allow more than 32 spec-tokens per codebook.*/
- if(++nleaves>32)return TH_EBADHEADER;
- bits=oc_pack_read(_opb,OC_NDCT_TOKEN_BITS);
- neb=OC_DCT_TOKEN_MAP_LOG_NENTRIES[bits];
- token=OC_DCT_TOKEN_MAP[bits];
- nentries=1<<neb;
- while(nentries-->0){
- _tokens[ntokens][0]=(unsigned char)token++;
- _tokens[ntokens][1]=(unsigned char)(len+neb);
- ntokens++;
- }
- code_bit=0x80000000U>>len-1;
- while(len>0&&(code&code_bit)){
- code^=code_bit;
- code_bit<<=1;
- len--;
- }
- if(len<=0)break;
- code|=code_bit;
- }
- }
- return ntokens;
- }
- /*Count how many tokens would be required to fill a subtree at depth _depth.
- _tokens: A list of internal tokens, in the order they are found in the
- codebook, and the lengths of their corresponding codewords.
- _depth: The depth of the desired node in the corresponding tree structure.
- Return: The number of tokens that belong to that subtree.*/
- static int oc_huff_subtree_tokens(unsigned char _tokens[][2],int _depth){
- ogg_uint32_t code;
- int ti;
- code=0;
- ti=0;
- do{
- if(_tokens[ti][1]-_depth<32)code+=0x80000000U>>_tokens[ti++][1]-_depth;
- else{
- /*Because of the expanded internal tokens, we can have codewords as long
- as 35 bits.
- A single recursion here is enough to advance past them.*/
- code++;
- ti+=oc_huff_subtree_tokens(_tokens+ti,_depth+31);
- }
- }
- while(code<0x80000000U);
- return ti;
- }
- /*Compute the number of bits to use for a collapsed tree node at the given
- depth.
- _tokens: A list of internal tokens, in the order they are found in the
- codebook, and the lengths of their corresponding codewords.
- _ntokens: The number of tokens corresponding to this tree node.
- _depth: The depth of this tree node.
- Return: The number of bits to use for a collapsed tree node rooted here.
- This is always at least one, even if this was a leaf node.*/
- static int oc_huff_tree_collapse_depth(unsigned char _tokens[][2],
- int _ntokens,int _depth){
- int got_leaves;
- int loccupancy;
- int occupancy;
- int slush;
- int nbits;
- int best_nbits;
- slush=_depth>0?OC_HUFF_SLUSH:OC_ROOT_HUFF_SLUSH;
- /*It's legal to have a tree with just a single node, which requires no bits
- to decode and always returns the same token.
- However, no encoder actually does this (yet).
- To avoid a special case in oc_huff_token_decode(), we force the number of
- lookahead bits to be at least one.
- This will produce a tree that looks ahead one bit and then advances the
- stream zero bits.*/
- nbits=1;
- occupancy=2;
- got_leaves=1;
- do{
- int ti;
- if(got_leaves)best_nbits=nbits;
- nbits++;
- got_leaves=0;
- loccupancy=occupancy;
- for(occupancy=ti=0;ti<_ntokens;occupancy++){
- if(_tokens[ti][1]<_depth+nbits)ti++;
- else if(_tokens[ti][1]==_depth+nbits){
- got_leaves=1;
- ti++;
- }
- else ti+=oc_huff_subtree_tokens(_tokens+ti,_depth+nbits);
- }
- }
- while(occupancy>loccupancy&&occupancy*slush>=1<<nbits);
- return best_nbits;
- }
- /*Determines the size in words of a Huffman tree node that represents a
- subtree of depth _nbits.
- _nbits: The depth of the subtree.
- This must be greater than zero.
- Return: The number of words required to store the node.*/
- static size_t oc_huff_node_size(int _nbits){
- return 1+(1<<_nbits);
- }
- /*Produces a collapsed-tree representation of the given token list.
- _tree: The storage for the collapsed Huffman tree.
- This may be NULL to compute the required storage size instead of
- constructing the tree.
- _tokens: A list of internal tokens, in the order they are found in the
- codebook, and the lengths of their corresponding codewords.
- _ntokens: The number of tokens corresponding to this tree node.
- Return: The number of words required to store the tree.*/
- static size_t oc_huff_tree_collapse(ogg_int16_t *_tree,
- unsigned char _tokens[][2],int _ntokens){
- ogg_int16_t node[34];
- unsigned char depth[34];
- unsigned char last[34];
- size_t ntree;
- int ti;
- int l;
- depth[0]=0;
- last[0]=(unsigned char)(_ntokens-1);
- ntree=0;
- ti=0;
- l=0;
- do{
- int nbits;
- nbits=oc_huff_tree_collapse_depth(_tokens+ti,last[l]+1-ti,depth[l]);
- node[l]=(ogg_int16_t)ntree;
- ntree+=oc_huff_node_size(nbits);
- if(_tree!=NULL)_tree[node[l]++]=(ogg_int16_t)nbits;
- do{
- while(ti<=last[l]&&_tokens[ti][1]<=depth[l]+nbits){
- if(_tree!=NULL){
- ogg_int16_t leaf;
- int nentries;
- nentries=1<<depth[l]+nbits-_tokens[ti][1];
- leaf=(ogg_int16_t)-(_tokens[ti][1]-depth[l]<<8|_tokens[ti][0]);
- while(nentries-->0)_tree[node[l]++]=leaf;
- }
- ti++;
- }
- if(ti<=last[l]){
- /*We need to recurse*/
- depth[l+1]=(unsigned char)(depth[l]+nbits);
- if(_tree!=NULL)_tree[node[l]++]=(ogg_int16_t)ntree;
- l++;
- last[l]=
- (unsigned char)(ti+oc_huff_subtree_tokens(_tokens+ti,depth[l])-1);
- break;
- }
- /*Pop back up a level of recursion.*/
- else if(l-->0)nbits=depth[l+1]-depth[l];
- }
- while(l>=0);
- }
- while(l>=0);
- return ntree;
- }
- /*Unpacks a set of Huffman trees, and reduces them to a collapsed
- representation.
- _opb: The buffer to unpack the trees from.
- _nodes: The table to fill with the Huffman trees.
- Return: 0 on success, or a negative value on error.
- The caller is responsible for cleaning up any partially initialized
- _nodes on failure.*/
- int oc_huff_trees_unpack(oc_pack_buf *_opb,
- ogg_int16_t *_nodes[TH_NHUFFMAN_TABLES]){
- int i;
- for(i=0;i<TH_NHUFFMAN_TABLES;i++){
- unsigned char tokens[256][2];
- int ntokens;
- ogg_int16_t *tree;
- size_t size;
- /*Unpack the full tree into a temporary buffer.*/
- ntokens=oc_huff_tree_unpack(_opb,tokens);
- if(ntokens<0)return ntokens;
- /*Figure out how big the collapsed tree will be and allocate space for it.*/
- size=oc_huff_tree_collapse(NULL,tokens,ntokens);
- /*This should never happen; if it does it means you set OC_HUFF_SLUSH or
- OC_ROOT_HUFF_SLUSH too large.*/
- if(size>32767)return TH_EIMPL;
- tree=(ogg_int16_t *)_ogg_malloc(size*sizeof(*tree));
- if(tree==NULL)return TH_EFAULT;
- /*Construct the collapsed the tree.*/
- oc_huff_tree_collapse(tree,tokens,ntokens);
- _nodes[i]=tree;
- }
- return 0;
- }
- /*Determines the size in words of a Huffman subtree.
- _tree: The complete Huffman tree.
- _node: The index of the root of the desired subtree.
- Return: The number of words required to store the tree.*/
- static size_t oc_huff_tree_size(const ogg_int16_t *_tree,int _node){
- size_t size;
- int nchildren;
- int n;
- int i;
- n=_tree[_node];
- size=oc_huff_node_size(n);
- nchildren=1<<n;
- i=0;
- do{
- int child;
- child=_tree[_node+i+1];
- if(child<=0)i+=1<<n-(-child>>8);
- else{
- size+=oc_huff_tree_size(_tree,child);
- i++;
- }
- }
- while(i<nchildren);
- return size;
- }
- /*Makes a copy of the given set of Huffman trees.
- _dst: The array to store the copy in.
- _src: The array of trees to copy.*/
- int oc_huff_trees_copy(ogg_int16_t *_dst[TH_NHUFFMAN_TABLES],
- const ogg_int16_t *const _src[TH_NHUFFMAN_TABLES]){
- int total;
- int i;
- total=0;
- for(i=0;i<TH_NHUFFMAN_TABLES;i++){
- size_t size;
- size=oc_huff_tree_size(_src[i],0);
- total+=size;
- _dst[i]=(ogg_int16_t *)_ogg_malloc(size*sizeof(*_dst[i]));
- if(_dst[i]==NULL){
- while(i-->0)_ogg_free(_dst[i]);
- return TH_EFAULT;
- }
- memcpy(_dst[i],_src[i],size*sizeof(*_dst[i]));
- }
- return 0;
- }
- /*Frees the memory used by a set of Huffman trees.
- _nodes: The array of trees to free.*/
- void oc_huff_trees_clear(ogg_int16_t *_nodes[TH_NHUFFMAN_TABLES]){
- int i;
- for(i=0;i<TH_NHUFFMAN_TABLES;i++)_ogg_free(_nodes[i]);
- }
- /*Unpacks a single token using the given Huffman tree.
- _opb: The buffer to unpack the token from.
- _node: The tree to unpack the token with.
- Return: The token value.*/
- int oc_huff_token_decode_c(oc_pack_buf *_opb,const ogg_int16_t *_tree){
- const unsigned char *ptr;
- const unsigned char *stop;
- oc_pb_window window;
- int available;
- long bits;
- int node;
- int n;
- ptr=_opb->ptr;
- window=_opb->window;
- stop=_opb->stop;
- available=_opb->bits;
- node=0;
- for(;;){
- n=_tree[node];
- if(n>available){
- unsigned shift;
- shift=OC_PB_WINDOW_SIZE-available;
- do{
- /*We don't bother setting eof because we won't check for it after we've
- started decoding DCT tokens.*/
- if(ptr>=stop){
- shift=(unsigned)-OC_LOTS_OF_BITS;
- break;
- }
- shift-=8;
- window|=(oc_pb_window)*ptr++<<shift;
- }
- while(shift>=8);
- /*Note: We never request more than 24 bits, so there's no need to fill in
- the last partial byte here.*/
- available=OC_PB_WINDOW_SIZE-shift;
- }
- bits=window>>OC_PB_WINDOW_SIZE-n;
- node=_tree[node+1+bits];
- if(node<=0)break;
- window<<=n;
- available-=n;
- }
- node=-node;
- n=node>>8;
- window<<=n;
- available-=n;
- _opb->ptr=ptr;
- _opb->window=window;
- _opb->bits=available;
- return node&255;
- }
|