1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396139713981399140014011402140314041405140614071408140914101411141214131414141514161417141814191420142114221423142414251426142714281429143014311432143314341435143614371438143914401441144214431444144514461447144814491450145114521453145414551456145714581459146014611462146314641465146614671468146914701471147214731474147514761477147814791480148114821483148414851486148714881489149014911492149314941495149614971498149915001501150215031504150515061507150815091510151115121513151415151516151715181519152015211522152315241525152615271528152915301531153215331534153515361537153815391540154115421543154415451546154715481549155015511552155315541555155615571558155915601561156215631564156515661567156815691570157115721573157415751576157715781579158015811582158315841585158615871588158915901591159215931594159515961597159815991600160116021603160416051606160716081609161016111612161316141615161616171618161916201621162216231624162516261627162816291630163116321633163416351636163716381639164016411642164316441645164616471648164916501651165216531654165516561657165816591660166116621663166416651666166716681669167016711672167316741675167616771678167916801681168216831684168516861687168816891690169116921693169416951696169716981699170017011702170317041705170617071708170917101711171217131714171517161717171817191720172117221723172417251726172717281729173017311732173317341735173617371738173917401741174217431744174517461747174817491750175117521753175417551756175717581759176017611762176317641765176617671768176917701771177217731774177517761777177817791780178117821783178417851786178717881789179017911792179317941795179617971798179918001801180218031804180518061807180818091810181118121813181418151816181718181819182018211822182318241825182618271828182918301831183218331834183518361837183818391840184118421843184418451846184718481849185018511852185318541855185618571858185918601861186218631864186518661867186818691870187118721873187418751876187718781879188018811882188318841885188618871888188918901891189218931894189518961897189818991900190119021903190419051906190719081909191019111912191319141915191619171918191919201921192219231924192519261927192819291930193119321933193419351936193719381939194019411942194319441945194619471948194919501951195219531954195519561957195819591960196119621963196419651966196719681969197019711972197319741975197619771978197919801981198219831984198519861987198819891990199119921993199419951996199719981999200020012002200320042005200620072008200920102011201220132014201520162017201820192020202120222023202420252026202720282029203020312032203320342035203620372038203920402041204220432044204520462047204820492050205120522053205420552056205720582059206020612062206320642065206620672068206920702071207220732074207520762077207820792080208120822083208420852086208720882089209020912092209320942095209620972098209921002101210221032104210521062107210821092110211121122113211421152116211721182119212021212122212321242125212621272128212921302131213221332134213521362137213821392140214121422143214421452146214721482149215021512152215321542155215621572158215921602161216221632164216521662167216821692170217121722173217421752176217721782179218021812182218321842185218621872188218921902191219221932194219521962197219821992200220122022203220422052206220722082209221022112212221322142215221622172218221922202221222222232224222522262227222822292230223122322233223422352236223722382239224022412242224322442245224622472248224922502251225222532254225522562257225822592260226122622263226422652266226722682269227022712272227322742275227622772278227922802281228222832284228522862287228822892290229122922293229422952296229722982299230023012302230323042305230623072308230923102311231223132314231523162317231823192320232123222323232423252326232723282329233023312332233323342335233623372338233923402341234223432344234523462347234823492350235123522353235423552356235723582359236023612362236323642365236623672368236923702371237223732374237523762377237823792380238123822383238423852386238723882389239023912392239323942395239623972398239924002401240224032404240524062407240824092410241124122413241424152416241724182419242024212422242324242425242624272428242924302431243224332434243524362437243824392440244124422443244424452446244724482449245024512452245324542455245624572458245924602461246224632464246524662467246824692470247124722473247424752476247724782479248024812482248324842485248624872488248924902491249224932494249524962497249824992500250125022503250425052506250725082509251025112512251325142515251625172518251925202521252225232524252525262527252825292530253125322533253425352536253725382539254025412542254325442545254625472548254925502551255225532554255525562557255825592560256125622563256425652566256725682569257025712572257325742575257625772578257925802581258225832584258525862587258825892590259125922593259425952596259725982599260026012602260326042605260626072608260926102611261226132614261526162617261826192620262126222623262426252626262726282629263026312632263326342635263626372638263926402641264226432644264526462647264826492650265126522653265426552656265726582659266026612662266326642665266626672668266926702671267226732674267526762677267826792680268126822683268426852686268726882689269026912692269326942695269626972698269927002701270227032704270527062707270827092710 |
- /********************************************************************
- * *
- * THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE. *
- * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS *
- * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE *
- * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING. *
- * *
- * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2009 *
- * by the Xiph.Org Foundation http://www.xiph.org/ *
- * *
- ********************************************************************
- function: mode selection code
- last mod: $Id$
- ********************************************************************/
- #include <limits.h>
- #include <string.h>
- #include "encint.h"
- #include "modedec.h"
- typedef struct oc_fr_state oc_fr_state;
- typedef struct oc_qii_state oc_qii_state;
- typedef struct oc_enc_pipeline_state oc_enc_pipeline_state;
- typedef struct oc_rd_metric oc_rd_metric;
- typedef struct oc_mode_choice oc_mode_choice;
- /*There are 8 possible schemes used to encode macro block modes.
- Schemes 0-6 use a maximally-skewed Huffman code to code each of the modes.
- The same set of Huffman codes is used for each of these 7 schemes, but the
- mode assigned to each codeword varies.
- Scheme 0 writes a custom mapping from codeword to MB mode to the bitstream,
- while schemes 1-6 have a fixed mapping.
- Scheme 7 just encodes each mode directly in 3 bits.*/
- /*The mode orderings for the various mode coding schemes.
- Scheme 0 uses a custom alphabet, which is not stored in this table.
- This is the inverse of the equivalent table OC_MODE_ALPHABETS in the
- decoder.*/
- static const unsigned char OC_MODE_RANKS[7][OC_NMODES]={
- /*Last MV dominates.*/
- /*L P M N I G GM 4*/
- {3,4,2,0,1,5,6,7},
- /*L P N M I G GM 4*/
- {2,4,3,0,1,5,6,7},
- /*L M P N I G GM 4*/
- {3,4,1,0,2,5,6,7},
- /*L M N P I G GM 4*/
- {2,4,1,0,3,5,6,7},
- /*No MV dominates.*/
- /*N L P M I G GM 4*/
- {0,4,3,1,2,5,6,7},
- /*N G L P M I GM 4*/
- {0,5,4,2,3,1,6,7},
- /*Default ordering.*/
- /*N I M L P G GM 4*/
- {0,1,2,3,4,5,6,7}
- };
- /*Initialize the mode scheme chooser.
- This need only be called once per encoder.*/
- void oc_mode_scheme_chooser_init(oc_mode_scheme_chooser *_chooser){
- int si;
- _chooser->mode_ranks[0]=_chooser->scheme0_ranks;
- for(si=1;si<8;si++)_chooser->mode_ranks[si]=OC_MODE_RANKS[si-1];
- }
- /*Reset the mode scheme chooser.
- This needs to be called once for each frame, including the first.*/
- static void oc_mode_scheme_chooser_reset(oc_mode_scheme_chooser *_chooser){
- int si;
- memset(_chooser->mode_counts,0,OC_NMODES*sizeof(*_chooser->mode_counts));
- /*Scheme 0 starts with 24 bits to store the mode list in.*/
- _chooser->scheme_bits[0]=24;
- memset(_chooser->scheme_bits+1,0,7*sizeof(*_chooser->scheme_bits));
- for(si=0;si<8;si++){
- /*Scheme 7 should always start first, and scheme 0 should always start
- last.*/
- _chooser->scheme_list[si]=7-si;
- _chooser->scheme0_list[si]=_chooser->scheme0_ranks[si]=si;
- }
- }
- /*This is the real purpose of this data structure: not actually selecting a
- mode scheme, but estimating the cost of coding a given mode given all the
- modes selected so far.
- This is done via opportunity cost: the cost is defined as the number of bits
- required to encode all the modes selected so far including the current one
- using the best possible scheme, minus the number of bits required to encode
- all the modes selected so far not including the current one using the best
- possible scheme.
- The computational expense of doing this probably makes it overkill.
- Just be happy we take a greedy approach instead of trying to solve the
- global mode-selection problem (which is NP-hard).
- _mb_mode: The mode to determine the cost of.
- Return: The number of bits required to code this mode.*/
- static int oc_mode_scheme_chooser_cost(oc_mode_scheme_chooser *_chooser,
- int _mb_mode){
- int scheme0;
- int scheme1;
- int best_bits;
- int mode_bits;
- int si;
- int scheme_bits;
- scheme0=_chooser->scheme_list[0];
- scheme1=_chooser->scheme_list[1];
- best_bits=_chooser->scheme_bits[scheme0];
- mode_bits=OC_MODE_BITS[scheme0+1>>3][_chooser->mode_ranks[scheme0][_mb_mode]];
- /*Typical case: If the difference between the best scheme and the next best
- is greater than 6 bits, then adding just one mode cannot change which
- scheme we use.*/
- if(_chooser->scheme_bits[scheme1]-best_bits>6)return mode_bits;
- /*Otherwise, check to see if adding this mode selects a different scheme as
- the best.*/
- si=1;
- best_bits+=mode_bits;
- do{
- /*For any scheme except 0, we can just use the bit cost of the mode's rank
- in that scheme.*/
- if(scheme1!=0){
- scheme_bits=_chooser->scheme_bits[scheme1]+
- OC_MODE_BITS[scheme1+1>>3][_chooser->mode_ranks[scheme1][_mb_mode]];
- }
- else{
- int ri;
- /*For scheme 0, incrementing the mode count could potentially change the
- mode's rank.
- Find the index where the mode would be moved to in the optimal list,
- and use its bit cost instead of the one for the mode's current
- position in the list.*/
- /*We don't recompute scheme bits; this is computing opportunity cost, not
- an update.*/
- for(ri=_chooser->scheme0_ranks[_mb_mode];ri>0&&
- _chooser->mode_counts[_mb_mode]>=
- _chooser->mode_counts[_chooser->scheme0_list[ri-1]];ri--);
- scheme_bits=_chooser->scheme_bits[0]+OC_MODE_BITS[0][ri];
- }
- if(scheme_bits<best_bits)best_bits=scheme_bits;
- if(++si>=8)break;
- scheme1=_chooser->scheme_list[si];
- }
- while(_chooser->scheme_bits[scheme1]-_chooser->scheme_bits[scheme0]<=6);
- return best_bits-_chooser->scheme_bits[scheme0];
- }
- /*Incrementally update the mode counts and per-scheme bit counts and re-order
- the scheme lists once a mode has been selected.
- _mb_mode: The mode that was chosen.*/
- static void oc_mode_scheme_chooser_update(oc_mode_scheme_chooser *_chooser,
- int _mb_mode){
- int ri;
- int si;
- _chooser->mode_counts[_mb_mode]++;
- /*Re-order the scheme0 mode list if necessary.*/
- for(ri=_chooser->scheme0_ranks[_mb_mode];ri>0;ri--){
- int pmode;
- pmode=_chooser->scheme0_list[ri-1];
- if(_chooser->mode_counts[pmode]>=_chooser->mode_counts[_mb_mode])break;
- /*Reorder the mode ranking.*/
- _chooser->scheme0_ranks[pmode]++;
- _chooser->scheme0_list[ri]=pmode;
- }
- _chooser->scheme0_ranks[_mb_mode]=ri;
- _chooser->scheme0_list[ri]=_mb_mode;
- /*Now add the bit cost for the mode to each scheme.*/
- for(si=0;si<8;si++){
- _chooser->scheme_bits[si]+=
- OC_MODE_BITS[si+1>>3][_chooser->mode_ranks[si][_mb_mode]];
- }
- /*Finally, re-order the list of schemes.*/
- for(si=1;si<8;si++){
- int sj;
- int scheme0;
- int bits0;
- sj=si;
- scheme0=_chooser->scheme_list[si];
- bits0=_chooser->scheme_bits[scheme0];
- do{
- int scheme1;
- scheme1=_chooser->scheme_list[sj-1];
- if(bits0>=_chooser->scheme_bits[scheme1])break;
- _chooser->scheme_list[sj]=scheme1;
- }
- while(--sj>0);
- _chooser->scheme_list[sj]=scheme0;
- }
- }
- /*The number of bits required to encode a super block run.
- _run_count: The desired run count; must be positive and less than 4130.*/
- static int oc_sb_run_bits(int _run_count){
- int i;
- for(i=0;_run_count>=OC_SB_RUN_VAL_MIN[i+1];i++);
- return OC_SB_RUN_CODE_NBITS[i];
- }
- /*The number of bits required to encode a block run.
- _run_count: The desired run count; must be positive and less than 30.*/
- static int oc_block_run_bits(int _run_count){
- return OC_BLOCK_RUN_CODE_NBITS[_run_count-1];
- }
- /*State to track coded block flags and their bit cost.*/
- struct oc_fr_state{
- ptrdiff_t bits;
- unsigned sb_partial_count:16;
- unsigned sb_full_count:16;
- unsigned b_coded_count_prev:8;
- unsigned b_coded_count:8;
- unsigned b_count:8;
- signed int sb_partial:2;
- signed int sb_full:2;
- signed int b_coded_prev:2;
- signed int b_coded:2;
- };
- static void oc_fr_state_init(oc_fr_state *_fr){
- _fr->bits=0;
- _fr->sb_partial_count=0;
- _fr->sb_full_count=0;
- _fr->b_coded_count_prev=0;
- _fr->b_coded_count=0;
- _fr->b_count=0;
- _fr->sb_partial=-1;
- _fr->sb_full=-1;
- _fr->b_coded_prev=-1;
- _fr->b_coded=-1;
- }
- static void oc_fr_state_advance_sb(oc_fr_state *_fr,
- int _sb_partial,int _sb_full){
- ptrdiff_t bits;
- int sb_partial_count;
- int sb_full_count;
- bits=_fr->bits;
- /*Extend the sb_partial run, or start a new one.*/
- sb_partial_count=_fr->sb_partial;
- if(_fr->sb_partial==_sb_partial){
- if(sb_partial_count>=4129){
- bits++;
- sb_partial_count=0;
- }
- else bits-=oc_sb_run_bits(sb_partial_count);
- }
- else sb_partial_count=0;
- sb_partial_count++;
- bits+=oc_sb_run_bits(sb_partial_count);
- if(!_sb_partial){
- /*Extend the sb_full run, or start a new one.*/
- sb_full_count=_fr->sb_full_count;
- if(_fr->sb_full==_sb_full){
- if(sb_full_count>=4129){
- bits++;
- sb_full_count=0;
- }
- else bits-=oc_sb_run_bits(sb_full_count);
- }
- else sb_full_count=0;
- sb_full_count++;
- bits+=oc_sb_run_bits(sb_full_count);
- _fr->sb_full=_sb_full;
- _fr->sb_full_count=sb_full_count;
- }
- _fr->bits=bits;
- _fr->sb_partial=_sb_partial;
- _fr->sb_partial_count=sb_partial_count;
- }
- /*Flush any outstanding block flags for a SB (e.g., one with fewer than 16
- blocks).*/
- static void oc_fr_state_flush_sb(oc_fr_state *_fr){
- ptrdiff_t bits;
- int sb_partial;
- int sb_full=sb_full;
- int b_coded_count;
- int b_coded;
- int b_count;
- b_count=_fr->b_count;
- if(b_count>0){
- bits=_fr->bits;
- b_coded=_fr->b_coded;
- b_coded_count=_fr->b_coded_count;
- if(b_coded_count>=b_count){
- /*This SB was fully coded/uncoded; roll back the partial block flags.*/
- bits-=oc_block_run_bits(b_coded_count);
- if(b_coded_count>b_count)bits+=oc_block_run_bits(b_coded_count-b_count);
- sb_partial=0;
- sb_full=b_coded;
- b_coded=_fr->b_coded_prev;
- b_coded_count=_fr->b_coded_count_prev;
- }
- else{
- /*It was partially coded.*/
- sb_partial=1;
- /*sb_full is unused.*/
- }
- _fr->bits=bits;
- _fr->b_coded_count=b_coded_count;
- _fr->b_coded_count_prev=b_coded_count;
- _fr->b_count=0;
- _fr->b_coded=b_coded;
- _fr->b_coded_prev=b_coded;
- oc_fr_state_advance_sb(_fr,sb_partial,sb_full);
- }
- }
- static void oc_fr_state_advance_block(oc_fr_state *_fr,int _b_coded){
- ptrdiff_t bits;
- int b_coded_count;
- int b_count;
- int sb_partial;
- int sb_full=sb_full;
- bits=_fr->bits;
- /*Extend the b_coded run, or start a new one.*/
- b_coded_count=_fr->b_coded_count;
- if(_fr->b_coded==_b_coded)bits-=oc_block_run_bits(b_coded_count);
- else b_coded_count=0;
- b_coded_count++;
- b_count=_fr->b_count+1;
- if(b_count>=16){
- /*We finished a superblock.*/
- if(b_coded_count>=16){
- /*It was fully coded/uncoded; roll back the partial block flags.*/
- if(b_coded_count>16)bits+=oc_block_run_bits(b_coded_count-16);
- sb_partial=0;
- sb_full=_b_coded;
- _b_coded=_fr->b_coded_prev;
- b_coded_count=_fr->b_coded_count_prev;
- }
- else{
- bits+=oc_block_run_bits(b_coded_count);
- /*It was partially coded.*/
- sb_partial=1;
- /*sb_full is unused.*/
- }
- _fr->bits=bits;
- _fr->b_coded_count=b_coded_count;
- _fr->b_coded_count_prev=b_coded_count;
- _fr->b_count=0;
- _fr->b_coded=_b_coded;
- _fr->b_coded_prev=_b_coded;
- oc_fr_state_advance_sb(_fr,sb_partial,sb_full);
- }
- else{
- bits+=oc_block_run_bits(b_coded_count);
- _fr->bits=bits;
- _fr->b_coded_count=b_coded_count;
- _fr->b_count=b_count;
- _fr->b_coded=_b_coded;
- }
- }
- static void oc_fr_skip_block(oc_fr_state *_fr){
- oc_fr_state_advance_block(_fr,0);
- }
- static void oc_fr_code_block(oc_fr_state *_fr){
- oc_fr_state_advance_block(_fr,1);
- }
- static int oc_fr_cost1(const oc_fr_state *_fr){
- oc_fr_state tmp;
- ptrdiff_t bits;
- *&tmp=*_fr;
- oc_fr_skip_block(&tmp);
- bits=tmp.bits;
- *&tmp=*_fr;
- oc_fr_code_block(&tmp);
- return (int)(tmp.bits-bits);
- }
- static int oc_fr_cost4(const oc_fr_state *_pre,const oc_fr_state *_post){
- oc_fr_state tmp;
- *&tmp=*_pre;
- oc_fr_skip_block(&tmp);
- oc_fr_skip_block(&tmp);
- oc_fr_skip_block(&tmp);
- oc_fr_skip_block(&tmp);
- return (int)(_post->bits-tmp.bits);
- }
- struct oc_qii_state{
- ptrdiff_t bits;
- unsigned qi01_count:14;
- signed int qi01:2;
- unsigned qi12_count:14;
- signed int qi12:2;
- };
- static void oc_qii_state_init(oc_qii_state *_qs){
- _qs->bits=0;
- _qs->qi01_count=0;
- _qs->qi01=-1;
- _qs->qi12_count=0;
- _qs->qi12=-1;
- }
- static void oc_qii_state_advance(oc_qii_state *_qd,
- const oc_qii_state *_qs,int _qii){
- ptrdiff_t bits;
- int qi01;
- int qi01_count;
- int qi12;
- int qi12_count;
- bits=_qs->bits;
- qi01=_qii+1>>1;
- qi01_count=_qs->qi01_count;
- if(qi01==_qs->qi01){
- if(qi01_count>=4129){
- bits++;
- qi01_count=0;
- }
- else bits-=oc_sb_run_bits(qi01_count);
- }
- else qi01_count=0;
- qi01_count++;
- bits+=oc_sb_run_bits(qi01_count);
- qi12_count=_qs->qi12_count;
- if(_qii){
- qi12=_qii>>1;
- if(qi12==_qs->qi12){
- if(qi12_count>=4129){
- bits++;
- qi12_count=0;
- }
- else bits-=oc_sb_run_bits(qi12_count);
- }
- else qi12_count=0;
- qi12_count++;
- bits+=oc_sb_run_bits(qi12_count);
- }
- else qi12=_qs->qi12;
- _qd->bits=bits;
- _qd->qi01=qi01;
- _qd->qi01_count=qi01_count;
- _qd->qi12=qi12;
- _qd->qi12_count=qi12_count;
- }
- /*Temporary encoder state for the analysis pipeline.*/
- struct oc_enc_pipeline_state{
- int bounding_values[256];
- oc_fr_state fr[3];
- oc_qii_state qs[3];
- /*Condensed dequantization tables.*/
- const ogg_uint16_t *dequant[3][3][2];
- /*Condensed quantization tables.*/
- const oc_iquant *enquant[3][3][2];
- /*Skip SSD storage for the current MCU in each plane.*/
- unsigned *skip_ssd[3];
- /*Coded/uncoded fragment lists for each plane for the current MCU.*/
- ptrdiff_t *coded_fragis[3];
- ptrdiff_t *uncoded_fragis[3];
- ptrdiff_t ncoded_fragis[3];
- ptrdiff_t nuncoded_fragis[3];
- /*The starting fragment for the current MCU in each plane.*/
- ptrdiff_t froffset[3];
- /*The starting row for the current MCU in each plane.*/
- int fragy0[3];
- /*The ending row for the current MCU in each plane.*/
- int fragy_end[3];
- /*The starting superblock for the current MCU in each plane.*/
- unsigned sbi0[3];
- /*The ending superblock for the current MCU in each plane.*/
- unsigned sbi_end[3];
- /*The number of tokens for zzi=1 for each color plane.*/
- int ndct_tokens1[3];
- /*The outstanding eob_run count for zzi=1 for each color plane.*/
- int eob_run1[3];
- /*Whether or not the loop filter is enabled.*/
- int loop_filter;
- };
- static void oc_enc_pipeline_init(oc_enc_ctx *_enc,oc_enc_pipeline_state *_pipe){
- ptrdiff_t *coded_fragis;
- unsigned mcu_nvsbs;
- ptrdiff_t mcu_nfrags;
- int hdec;
- int vdec;
- int pli;
- int qii;
- int qti;
- /*Initialize the per-plane coded block flag trackers.
- These are used for bit-estimation purposes only; the real flag bits span
- all three planes, so we can't compute them in parallel.*/
- for(pli=0;pli<3;pli++)oc_fr_state_init(_pipe->fr+pli);
- for(pli=0;pli<3;pli++)oc_qii_state_init(_pipe->qs+pli);
- /*Set up the per-plane skip SSD storage pointers.*/
- mcu_nvsbs=_enc->mcu_nvsbs;
- mcu_nfrags=mcu_nvsbs*_enc->state.fplanes[0].nhsbs*16;
- hdec=!(_enc->state.info.pixel_fmt&1);
- vdec=!(_enc->state.info.pixel_fmt&2);
- _pipe->skip_ssd[0]=_enc->mcu_skip_ssd;
- _pipe->skip_ssd[1]=_pipe->skip_ssd[0]+mcu_nfrags;
- _pipe->skip_ssd[2]=_pipe->skip_ssd[1]+(mcu_nfrags>>hdec+vdec);
- /*Set up per-plane pointers to the coded and uncoded fragments lists.
- Unlike the decoder, each planes' coded and uncoded fragment list is kept
- separate during the analysis stage; we only make the coded list for all
- three planes contiguous right before the final packet is output
- (destroying the uncoded lists, which are no longer needed).*/
- coded_fragis=_enc->state.coded_fragis;
- for(pli=0;pli<3;pli++){
- _pipe->coded_fragis[pli]=coded_fragis;
- coded_fragis+=_enc->state.fplanes[pli].nfrags;
- _pipe->uncoded_fragis[pli]=coded_fragis;
- }
- memset(_pipe->ncoded_fragis,0,sizeof(_pipe->ncoded_fragis));
- memset(_pipe->nuncoded_fragis,0,sizeof(_pipe->nuncoded_fragis));
- /*Set up condensed quantizer tables.*/
- for(pli=0;pli<3;pli++){
- for(qii=0;qii<_enc->state.nqis;qii++){
- int qi;
- qi=_enc->state.qis[qii];
- for(qti=0;qti<2;qti++){
- _pipe->dequant[pli][qii][qti]=_enc->state.dequant_tables[qi][pli][qti];
- _pipe->enquant[pli][qii][qti]=_enc->enquant_tables[qi][pli][qti];
- }
- }
- }
- /*Initialize the tokenization state.*/
- for(pli=0;pli<3;pli++){
- _pipe->ndct_tokens1[pli]=0;
- _pipe->eob_run1[pli]=0;
- }
- /*Initialize the bounding value array for the loop filter.*/
- _pipe->loop_filter=!oc_state_loop_filter_init(&_enc->state,
- _pipe->bounding_values);
- }
- /*Sets the current MCU stripe to super block row _sby.
- Return: A non-zero value if this was the last MCU.*/
- static int oc_enc_pipeline_set_stripe(oc_enc_ctx *_enc,
- oc_enc_pipeline_state *_pipe,int _sby){
- const oc_fragment_plane *fplane;
- unsigned mcu_nvsbs;
- int sby_end;
- int notdone;
- int vdec;
- int pli;
- mcu_nvsbs=_enc->mcu_nvsbs;
- sby_end=_enc->state.fplanes[0].nvsbs;
- notdone=_sby+mcu_nvsbs<sby_end;
- if(notdone)sby_end=_sby+mcu_nvsbs;
- vdec=0;
- for(pli=0;pli<3;pli++){
- fplane=_enc->state.fplanes+pli;
- _pipe->sbi0[pli]=fplane->sboffset+(_sby>>vdec)*fplane->nhsbs;
- _pipe->fragy0[pli]=_sby<<2-vdec;
- _pipe->froffset[pli]=fplane->froffset
- +_pipe->fragy0[pli]*(ptrdiff_t)fplane->nhfrags;
- if(notdone){
- _pipe->sbi_end[pli]=fplane->sboffset+(sby_end>>vdec)*fplane->nhsbs;
- _pipe->fragy_end[pli]=sby_end<<2-vdec;
- }
- else{
- _pipe->sbi_end[pli]=fplane->sboffset+fplane->nsbs;
- _pipe->fragy_end[pli]=fplane->nvfrags;
- }
- vdec=!(_enc->state.info.pixel_fmt&2);
- }
- return notdone;
- }
- static void oc_enc_pipeline_finish_mcu_plane(oc_enc_ctx *_enc,
- oc_enc_pipeline_state *_pipe,int _pli,int _sdelay,int _edelay){
- int refi;
- /*Copy over all the uncoded fragments from this plane and advance the uncoded
- fragment list.*/
- _pipe->uncoded_fragis[_pli]-=_pipe->nuncoded_fragis[_pli];
- oc_state_frag_copy_list(&_enc->state,_pipe->uncoded_fragis[_pli],
- _pipe->nuncoded_fragis[_pli],OC_FRAME_SELF,OC_FRAME_PREV,_pli);
- _pipe->nuncoded_fragis[_pli]=0;
- /*Perform DC prediction.*/
- oc_enc_pred_dc_frag_rows(_enc,_pli,
- _pipe->fragy0[_pli],_pipe->fragy_end[_pli]);
- /*Finish DC tokenization.*/
- oc_enc_tokenize_dc_frag_list(_enc,_pli,
- _pipe->coded_fragis[_pli],_pipe->ncoded_fragis[_pli],
- _pipe->ndct_tokens1[_pli],_pipe->eob_run1[_pli]);
- _pipe->ndct_tokens1[_pli]=_enc->ndct_tokens[_pli][1];
- _pipe->eob_run1[_pli]=_enc->eob_run[_pli][1];
- /*And advance the coded fragment list.*/
- _enc->state.ncoded_fragis[_pli]+=_pipe->ncoded_fragis[_pli];
- _pipe->coded_fragis[_pli]+=_pipe->ncoded_fragis[_pli];
- _pipe->ncoded_fragis[_pli]=0;
- /*Apply the loop filter if necessary.*/
- refi=_enc->state.ref_frame_idx[OC_FRAME_SELF];
- if(_pipe->loop_filter){
- oc_state_loop_filter_frag_rows(&_enc->state,_pipe->bounding_values,
- refi,_pli,_pipe->fragy0[_pli]-_sdelay,_pipe->fragy_end[_pli]-_edelay);
- }
- else _sdelay=_edelay=0;
- /*To fill borders, we have an additional two pixel delay, since a fragment
- in the next row could filter its top edge, using two pixels from a
- fragment in this row.
- But there's no reason to delay a full fragment between the two.*/
- oc_state_borders_fill_rows(&_enc->state,refi,_pli,
- (_pipe->fragy0[_pli]-_sdelay<<3)-(_sdelay<<1),
- (_pipe->fragy_end[_pli]-_edelay<<3)-(_edelay<<1));
- }
- /*Cost information about the coded blocks in a MB.*/
- struct oc_rd_metric{
- int uncoded_ac_ssd;
- int coded_ac_ssd;
- int ac_bits;
- int dc_flag;
- };
- static int oc_enc_block_transform_quantize(oc_enc_ctx *_enc,
- oc_enc_pipeline_state *_pipe,int _pli,ptrdiff_t _fragi,int _overhead_bits,
- oc_rd_metric *_mo,oc_token_checkpoint **_stack){
- OC_ALIGN16(ogg_int16_t dct[64]);
- OC_ALIGN16(ogg_int16_t data[64]);
- ogg_uint16_t dc_dequant;
- const ogg_uint16_t *dequant;
- const oc_iquant *enquant;
- ptrdiff_t frag_offs;
- int ystride;
- const unsigned char *src;
- const unsigned char *ref;
- unsigned char *dst;
- int frame_type;
- int nonzero;
- unsigned uncoded_ssd;
- unsigned coded_ssd;
- int coded_dc;
- oc_token_checkpoint *checkpoint;
- oc_fragment *frags;
- int mb_mode;
- int mv_offs[2];
- int nmv_offs;
- int ac_bits;
- int borderi;
- int qti;
- int qii;
- int pi;
- int zzi;
- int v;
- int val;
- int d;
- int s;
- int dc;
- frags=_enc->state.frags;
- frag_offs=_enc->state.frag_buf_offs[_fragi];
- ystride=_enc->state.ref_ystride[_pli];
- src=_enc->state.ref_frame_data[OC_FRAME_IO]+frag_offs;
- borderi=frags[_fragi].borderi;
- qii=frags[_fragi].qii;
- if(qii&~3){
- #if !defined(OC_COLLECT_METRICS)
- if(_enc->sp_level>=OC_SP_LEVEL_EARLY_SKIP){
- /*Enable early skip detection.*/
- frags[_fragi].coded=0;
- return 0;
- }
- #endif
- /*Try and code this block anyway.*/
- qii&=3;
- frags[_fragi].qii=qii;
- }
- mb_mode=frags[_fragi].mb_mode;
- ref=_enc->state.ref_frame_data[
- _enc->state.ref_frame_idx[OC_FRAME_FOR_MODE(mb_mode)]]+frag_offs;
- dst=_enc->state.ref_frame_data[_enc->state.ref_frame_idx[OC_FRAME_SELF]]
- +frag_offs;
- /*Motion compensation:*/
- switch(mb_mode){
- case OC_MODE_INTRA:{
- nmv_offs=0;
- oc_enc_frag_sub_128(_enc,data,src,ystride);
- }break;
- case OC_MODE_GOLDEN_NOMV:
- case OC_MODE_INTER_NOMV:{
- nmv_offs=1;
- mv_offs[0]=0;
- oc_enc_frag_sub(_enc,data,src,ref,ystride);
- }break;
- default:{
- const oc_mv *frag_mvs;
- frag_mvs=(const oc_mv *)_enc->state.frag_mvs;
- nmv_offs=oc_state_get_mv_offsets(&_enc->state,mv_offs,_pli,
- frag_mvs[_fragi][0],frag_mvs[_fragi][1]);
- if(nmv_offs>1){
- oc_enc_frag_copy2(_enc,dst,
- ref+mv_offs[0],ref+mv_offs[1],ystride);
- oc_enc_frag_sub(_enc,data,src,dst,ystride);
- }
- else oc_enc_frag_sub(_enc,data,src,ref+mv_offs[0],ystride);
- }break;
- }
- #if defined(OC_COLLECT_METRICS)
- {
- unsigned satd;
- switch(nmv_offs){
- case 0:satd=oc_enc_frag_intra_satd(_enc,src,ystride);break;
- case 1:{
- satd=oc_enc_frag_satd_thresh(_enc,src,ref+mv_offs[0],ystride,UINT_MAX);
- }break;
- default:{
- satd=oc_enc_frag_satd_thresh(_enc,src,dst,ystride,UINT_MAX);
- }
- }
- _enc->frag_satd[_fragi]=satd;
- }
- #endif
- /*Transform:*/
- oc_enc_fdct8x8(_enc,dct,data);
- /*Quantize the DC coefficient:*/
- qti=mb_mode!=OC_MODE_INTRA;
- enquant=_pipe->enquant[_pli][0][qti];
- dc_dequant=_pipe->dequant[_pli][0][qti][0];
- v=dct[0];
- val=v<<1;
- s=OC_SIGNMASK(val);
- val+=dc_dequant+s^s;
- val=((enquant[0].m*(ogg_int32_t)val>>16)+val>>enquant[0].l)-s;
- dc=OC_CLAMPI(-580,val,580);
- nonzero=0;
- /*Quantize the AC coefficients:*/
- dequant=_pipe->dequant[_pli][qii][qti];
- enquant=_pipe->enquant[_pli][qii][qti];
- for(zzi=1;zzi<64;zzi++){
- v=dct[OC_FZIG_ZAG[zzi]];
- d=dequant[zzi];
- val=v<<1;
- v=abs(val);
- if(v>=d){
- s=OC_SIGNMASK(val);
- /*The bias added here rounds ties away from zero, since token
- optimization can only decrease the magnitude of the quantized
- value.*/
- val+=d+s^s;
- /*Note the arithmetic right shift is not guaranteed by ANSI C.
- Hopefully no one still uses ones-complement architectures.*/
- val=((enquant[zzi].m*(ogg_int32_t)val>>16)+val>>enquant[zzi].l)-s;
- data[zzi]=OC_CLAMPI(-580,val,580);
- nonzero=zzi;
- }
- else data[zzi]=0;
- }
- /*Tokenize.*/
- checkpoint=*_stack;
- ac_bits=oc_enc_tokenize_ac(_enc,_pli,_fragi,data,dequant,dct,nonzero+1,
- _stack,qti?0:3);
- /*Reconstruct.
- TODO: nonzero may need to be adjusted after tokenization.*/
- if(nonzero==0){
- ogg_int16_t p;
- int ci;
- /*We round this dequant product (and not any of the others) because there's
- no iDCT rounding.*/
- p=(ogg_int16_t)(dc*(ogg_int32_t)dc_dequant+15>>5);
- /*LOOP VECTORIZES.*/
- for(ci=0;ci<64;ci++)data[ci]=p;
- }
- else{
- data[0]=dc*dc_dequant;
- oc_idct8x8(&_enc->state,data,nonzero+1);
- }
- if(!qti)oc_enc_frag_recon_intra(_enc,dst,ystride,data);
- else{
- oc_enc_frag_recon_inter(_enc,dst,
- nmv_offs==1?ref+mv_offs[0]:dst,ystride,data);
- }
- frame_type=_enc->state.frame_type;
- #if !defined(OC_COLLECT_METRICS)
- if(frame_type!=OC_INTRA_FRAME)
- #endif
- {
- /*In retrospect, should we have skipped this block?*/
- oc_enc_frag_sub(_enc,data,src,dst,ystride);
- coded_ssd=coded_dc=0;
- if(borderi<0){
- for(pi=0;pi<64;pi++){
- coded_ssd+=data[pi]*data[pi];
- coded_dc+=data[pi];
- }
- }
- else{
- ogg_int64_t mask;
- mask=_enc->state.borders[borderi].mask;
- for(pi=0;pi<64;pi++,mask>>=1)if(mask&1){
- coded_ssd+=data[pi]*data[pi];
- coded_dc+=data[pi];
- }
- }
- /*Scale to match DCT domain.*/
- coded_ssd<<=4;
- /*We actually only want the AC contribution to the SSD.*/
- coded_ssd-=coded_dc*coded_dc>>2;
- #if defined(OC_COLLECT_METRICS)
- _enc->frag_ssd[_fragi]=coded_ssd;
- }
- if(frame_type!=OC_INTRA_FRAME){
- #endif
- uncoded_ssd=_pipe->skip_ssd[_pli][_fragi-_pipe->froffset[_pli]];
- if(uncoded_ssd<UINT_MAX){
- /*Although the fragment coding overhead determination is accurate, it is
- greedy, using very coarse-grained local information.
- Allowing it to mildly discourage coding turns out to be beneficial, but
- it's not clear that allowing it to encourage coding through negative
- coding overhead deltas is useful.
- For that reason, we disallow negative coding_overheads.*/
- if(_overhead_bits<0)_overhead_bits=0;
- if(uncoded_ssd<=coded_ssd+(_overhead_bits+ac_bits)*_enc->lambda&&
- /*Don't allow luma blocks to be skipped in 4MV mode when VP3
- compatibility is enabled.*/
- (!_enc->vp3_compatible||mb_mode!=OC_MODE_INTER_MV_FOUR||_pli)){
- /*Hm, not worth it; roll back.*/
- oc_enc_tokenlog_rollback(_enc,checkpoint,(*_stack)-checkpoint);
- *_stack=checkpoint;
- frags[_fragi].coded=0;
- return 0;
- }
- }
- else _mo->dc_flag=1;
- _mo->uncoded_ac_ssd+=uncoded_ssd;
- _mo->coded_ac_ssd+=coded_ssd;
- _mo->ac_bits+=ac_bits;
- }
- oc_qii_state_advance(_pipe->qs+_pli,_pipe->qs+_pli,qii);
- frags[_fragi].dc=dc;
- frags[_fragi].coded=1;
- return 1;
- }
- static int oc_enc_mb_transform_quantize_luma(oc_enc_ctx *_enc,
- oc_enc_pipeline_state *_pipe,unsigned _mbi,int _mode_overhead){
- /*Worst case token stack usage for 4 fragments.*/
- oc_token_checkpoint stack[64*4];
- oc_token_checkpoint *stackptr;
- const oc_sb_map *sb_maps;
- signed char *mb_modes;
- oc_fragment *frags;
- ptrdiff_t *coded_fragis;
- ptrdiff_t ncoded_fragis;
- ptrdiff_t *uncoded_fragis;
- ptrdiff_t nuncoded_fragis;
- oc_rd_metric mo;
- oc_fr_state fr_checkpoint;
- oc_qii_state qs_checkpoint;
- int mb_mode;
- int ncoded;
- ptrdiff_t fragi;
- int bi;
- *&fr_checkpoint=*(_pipe->fr+0);
- *&qs_checkpoint=*(_pipe->qs+0);
- sb_maps=(const oc_sb_map *)_enc->state.sb_maps;
- mb_modes=_enc->state.mb_modes;
- frags=_enc->state.frags;
- coded_fragis=_pipe->coded_fragis[0];
- ncoded_fragis=_pipe->ncoded_fragis[0];
- uncoded_fragis=_pipe->uncoded_fragis[0];
- nuncoded_fragis=_pipe->nuncoded_fragis[0];
- mb_mode=mb_modes[_mbi];
- ncoded=0;
- stackptr=stack;
- memset(&mo,0,sizeof(mo));
- for(bi=0;bi<4;bi++){
- fragi=sb_maps[_mbi>>2][_mbi&3][bi];
- frags[fragi].mb_mode=mb_mode;
- if(oc_enc_block_transform_quantize(_enc,
- _pipe,0,fragi,oc_fr_cost1(_pipe->fr+0),&mo,&stackptr)){
- oc_fr_code_block(_pipe->fr+0);
- coded_fragis[ncoded_fragis++]=fragi;
- ncoded++;
- }
- else{
- *(uncoded_fragis-++nuncoded_fragis)=fragi;
- oc_fr_skip_block(_pipe->fr+0);
- }
- }
- if(_enc->state.frame_type!=OC_INTRA_FRAME){
- if(ncoded>0&&!mo.dc_flag){
- int cost;
- /*Some individual blocks were worth coding.
- See if that's still true when accounting for mode and MV overhead.*/
- cost=mo.coded_ac_ssd+_enc->lambda*(mo.ac_bits
- +oc_fr_cost4(&fr_checkpoint,_pipe->fr+0)+_mode_overhead);
- if(mo.uncoded_ac_ssd<=cost){
- /*Taking macroblock overhead into account, it is not worth coding this
- MB.*/
- oc_enc_tokenlog_rollback(_enc,stack,stackptr-stack);
- *(_pipe->fr+0)=*&fr_checkpoint;
- *(_pipe->qs+0)=*&qs_checkpoint;
- for(bi=0;bi<4;bi++){
- fragi=sb_maps[_mbi>>2][_mbi&3][bi];
- if(frags[fragi].coded){
- *(uncoded_fragis-++nuncoded_fragis)=fragi;
- frags[fragi].coded=0;
- }
- oc_fr_skip_block(_pipe->fr+0);
- }
- ncoded_fragis-=ncoded;
- ncoded=0;
- }
- }
- /*If no luma blocks coded, the mode is forced.*/
- if(ncoded==0)mb_modes[_mbi]=OC_MODE_INTER_NOMV;
- /*Assume that a 1MV with a single coded block is always cheaper than a 4MV
- with a single coded block.
- This may not be strictly true: a 4MV computes chroma MVs using (0,0) for
- skipped blocks, while a 1MV does not.*/
- else if(ncoded==1&&mb_mode==OC_MODE_INTER_MV_FOUR){
- mb_modes[_mbi]=OC_MODE_INTER_MV;
- }
- }
- _pipe->ncoded_fragis[0]=ncoded_fragis;
- _pipe->nuncoded_fragis[0]=nuncoded_fragis;
- return ncoded;
- }
- static void oc_enc_sb_transform_quantize_chroma(oc_enc_ctx *_enc,
- oc_enc_pipeline_state *_pipe,int _pli,int _sbi_start,int _sbi_end){
- const oc_sb_map *sb_maps;
- oc_sb_flags *sb_flags;
- ptrdiff_t *coded_fragis;
- ptrdiff_t ncoded_fragis;
- ptrdiff_t *uncoded_fragis;
- ptrdiff_t nuncoded_fragis;
- int sbi;
- sb_maps=(const oc_sb_map *)_enc->state.sb_maps;
- sb_flags=_enc->state.sb_flags;
- coded_fragis=_pipe->coded_fragis[_pli];
- ncoded_fragis=_pipe->ncoded_fragis[_pli];
- uncoded_fragis=_pipe->uncoded_fragis[_pli];
- nuncoded_fragis=_pipe->nuncoded_fragis[_pli];
- for(sbi=_sbi_start;sbi<_sbi_end;sbi++){
- /*Worst case token stack usage for 1 fragment.*/
- oc_token_checkpoint stack[64];
- oc_rd_metric mo;
- int quadi;
- int bi;
- memset(&mo,0,sizeof(mo));
- for(quadi=0;quadi<4;quadi++)for(bi=0;bi<4;bi++){
- ptrdiff_t fragi;
- fragi=sb_maps[sbi][quadi][bi];
- if(fragi>=0){
- oc_token_checkpoint *stackptr;
- stackptr=stack;
- if(oc_enc_block_transform_quantize(_enc,
- _pipe,_pli,fragi,oc_fr_cost1(_pipe->fr+_pli),&mo,&stackptr)){
- coded_fragis[ncoded_fragis++]=fragi;
- oc_fr_code_block(_pipe->fr+_pli);
- }
- else{
- *(uncoded_fragis-++nuncoded_fragis)=fragi;
- oc_fr_skip_block(_pipe->fr+_pli);
- }
- }
- }
- oc_fr_state_flush_sb(_pipe->fr+_pli);
- sb_flags[sbi].coded_fully=_pipe->fr[_pli].sb_full;
- sb_flags[sbi].coded_partially=_pipe->fr[_pli].sb_partial;
- }
- _pipe->ncoded_fragis[_pli]=ncoded_fragis;
- _pipe->nuncoded_fragis[_pli]=nuncoded_fragis;
- }
- /*Mode decision is done by exhaustively examining all potential choices.
- Obviously, doing the motion compensation, fDCT, tokenization, and then
- counting the bits each token uses is computationally expensive.
- Theora's EOB runs can also split the cost of these tokens across multiple
- fragments, and naturally we don't know what the optimal choice of Huffman
- codes will be until we know all the tokens we're going to encode in all the
- fragments.
- So we use a simple approach to estimating the bit cost and distortion of each
- mode based upon the SATD value of the residual before coding.
- The mathematics behind the technique are outlined by Kim \cite{Kim03}, but
- the process (modified somewhat from that of the paper) is very simple.
- We build a non-linear regression of the mappings from
- (pre-transform+quantization) SATD to (post-transform+quantization) bits and
- SSD for each qi.
- A separate set of mappings is kept for each quantization type and color
- plane.
- The mappings are constructed by partitioning the SATD values into a small
- number of bins (currently 24) and using a linear regression in each bin
- (as opposed to the 0th-order regression used by Kim).
- The bit counts and SSD measurements are obtained by examining actual encoded
- frames, with appropriate lambda values and optimal Huffman codes selected.
- EOB bits are assigned to the fragment that started the EOB run (as opposed to
- dividing them among all the blocks in the run; though the latter approach
- seems more theoretically correct, Monty's testing showed a small improvement
- with the former, though that may have been merely statistical noise).
- @ARTICLE{Kim03,
- author="Hyun Mun Kim",
- title="Adaptive Rate Control Using Nonlinear Regression",
- journal="IEEE Transactions on Circuits and Systems for Video Technology",
- volume=13,
- number=5,
- pages="432--439",
- month=May,
- year=2003
- }*/
- /*Computes (_ssd+_lambda*_rate)/(1<<OC_BIT_SCALE) with rounding, avoiding
- overflow for large lambda values.*/
- #define OC_MODE_RD_COST(_ssd,_rate,_lambda) \
- ((_ssd)>>OC_BIT_SCALE)+((_rate)>>OC_BIT_SCALE)*(_lambda) \
- +(((_ssd)&(1<<OC_BIT_SCALE)-1)+((_rate)&(1<<OC_BIT_SCALE)-1)*(_lambda) \
- +((1<<OC_BIT_SCALE)>>1)>>OC_BIT_SCALE)
- /*Estimate the R-D cost of the DCT coefficients given the SATD of a block after
- prediction.*/
- static unsigned oc_dct_cost2(unsigned *_ssd,
- int _qi,int _pli,int _qti,int _satd){
- unsigned rmse;
- int bin;
- int dx;
- int y0;
- int z0;
- int dy;
- int dz;
- /*SATD metrics for chroma planes vary much less than luma, so we scale them
- by 4 to distribute them into the mode decision bins more evenly.*/
- _satd<<=_pli+1&2;
- bin=OC_MINI(_satd>>OC_SAD_SHIFT,OC_SAD_BINS-2);
- dx=_satd-(bin<<OC_SAD_SHIFT);
- y0=OC_MODE_RD[_qi][_pli][_qti][bin].rate;
- z0=OC_MODE_RD[_qi][_pli][_qti][bin].rmse;
- dy=OC_MODE_RD[_qi][_pli][_qti][bin+1].rate-y0;
- dz=OC_MODE_RD[_qi][_pli][_qti][bin+1].rmse-z0;
- rmse=OC_MAXI(z0+(dz*dx>>OC_SAD_SHIFT),0);
- *_ssd=rmse*rmse>>2*OC_RMSE_SCALE-OC_BIT_SCALE;
- return OC_MAXI(y0+(dy*dx>>OC_SAD_SHIFT),0);
- }
- /*Select luma block-level quantizers for a MB in an INTRA frame.*/
- static unsigned oc_analyze_intra_mb_luma(oc_enc_ctx *_enc,
- const oc_qii_state *_qs,unsigned _mbi){
- const unsigned char *src;
- const ptrdiff_t *frag_buf_offs;
- const oc_sb_map *sb_maps;
- oc_fragment *frags;
- ptrdiff_t frag_offs;
- ptrdiff_t fragi;
- oc_qii_state qs[4][3];
- unsigned cost[4][3];
- unsigned ssd[4][3];
- unsigned rate[4][3];
- int prev[3][3];
- unsigned satd;
- unsigned best_cost;
- unsigned best_ssd;
- unsigned best_rate;
- int best_qii;
- int qii;
- int lambda;
- int ystride;
- int nqis;
- int bi;
- frag_buf_offs=_enc->state.frag_buf_offs;
- sb_maps=(const oc_sb_map *)_enc->state.sb_maps;
- src=_enc->state.ref_frame_data[OC_FRAME_IO];
- ystride=_enc->state.ref_ystride[0];
- fragi=sb_maps[_mbi>>2][_mbi&3][0];
- frag_offs=frag_buf_offs[fragi];
- satd=oc_enc_frag_intra_satd(_enc,src+frag_offs,ystride);
- nqis=_enc->state.nqis;
- lambda=_enc->lambda;
- for(qii=0;qii<nqis;qii++){
- oc_qii_state_advance(qs[0]+qii,_qs,qii);
- rate[0][qii]=oc_dct_cost2(ssd[0]+qii,_enc->state.qis[qii],0,0,satd)
- +(qs[0][qii].bits-_qs->bits<<OC_BIT_SCALE);
- cost[0][qii]=OC_MODE_RD_COST(ssd[0][qii],rate[0][qii],lambda);
- }
- for(bi=1;bi<4;bi++){
- fragi=sb_maps[_mbi>>2][_mbi&3][bi];
- frag_offs=frag_buf_offs[fragi];
- satd=oc_enc_frag_intra_satd(_enc,src+frag_offs,ystride);
- for(qii=0;qii<nqis;qii++){
- oc_qii_state qt[3];
- unsigned cur_ssd;
- unsigned cur_rate;
- int best_qij;
- int qij;
- oc_qii_state_advance(qt+0,qs[bi-1]+0,qii);
- cur_rate=oc_dct_cost2(&cur_ssd,_enc->state.qis[qii],0,0,satd);
- best_ssd=ssd[bi-1][0]+cur_ssd;
- best_rate=rate[bi-1][0]+cur_rate
- +(qt[0].bits-qs[bi-1][0].bits<<OC_BIT_SCALE);
- best_cost=OC_MODE_RD_COST(best_ssd,best_rate,lambda);
- best_qij=0;
- for(qij=1;qij<nqis;qij++){
- unsigned chain_ssd;
- unsigned chain_rate;
- unsigned chain_cost;
- oc_qii_state_advance(qt+qij,qs[bi-1]+qij,qii);
- chain_ssd=ssd[bi-1][qij]+cur_ssd;
- chain_rate=rate[bi-1][qij]+cur_rate
- +(qt[qij].bits-qs[bi-1][qij].bits<<OC_BIT_SCALE);
- chain_cost=OC_MODE_RD_COST(chain_ssd,chain_rate,lambda);
- if(chain_cost<best_cost){
- best_cost=chain_cost;
- best_ssd=chain_ssd;
- best_rate=chain_rate;
- best_qij=qij;
- }
- }
- *(qs[bi]+qii)=*(qt+best_qij);
- cost[bi][qii]=best_cost;
- ssd[bi][qii]=best_ssd;
- rate[bi][qii]=best_rate;
- prev[bi-1][qii]=best_qij;
- }
- }
- best_qii=0;
- best_cost=cost[3][0];
- for(qii=1;qii<nqis;qii++){
- if(cost[3][qii]<best_cost){
- best_cost=cost[3][qii];
- best_qii=qii;
- }
- }
- frags=_enc->state.frags;
- for(bi=3;;){
- fragi=sb_maps[_mbi>>2][_mbi&3][bi];
- frags[fragi].qii=best_qii;
- if(bi--<=0)break;
- best_qii=prev[bi][best_qii];
- }
- return best_cost;
- }
- /*Select a block-level quantizer for a single chroma block in an INTRA frame.*/
- static unsigned oc_analyze_intra_chroma_block(oc_enc_ctx *_enc,
- const oc_qii_state *_qs,int _pli,ptrdiff_t _fragi){
- const unsigned char *src;
- oc_fragment *frags;
- ptrdiff_t frag_offs;
- oc_qii_state qt[3];
- unsigned cost[3];
- unsigned satd;
- unsigned best_cost;
- int best_qii;
- int qii;
- int lambda;
- int ystride;
- int nqis;
- src=_enc->state.ref_frame_data[OC_FRAME_IO];
- ystride=_enc->state.ref_ystride[_pli];
- frag_offs=_enc->state.frag_buf_offs[_fragi];
- satd=oc_enc_frag_intra_satd(_enc,src+frag_offs,ystride);
- nqis=_enc->state.nqis;
- lambda=_enc->lambda;
- best_qii=0;
- for(qii=0;qii<nqis;qii++){
- unsigned cur_rate;
- unsigned cur_ssd;
- oc_qii_state_advance(qt+qii,_qs,qii);
- cur_rate=oc_dct_cost2(&cur_ssd,_enc->state.qis[qii],_pli,0,satd)
- +(qt[qii].bits-_qs->bits<<OC_BIT_SCALE);
- cost[qii]=OC_MODE_RD_COST(cur_ssd,cur_rate,lambda);
- }
- best_cost=cost[0];
- for(qii=1;qii<nqis;qii++){
- if(cost[qii]<best_cost){
- best_cost=cost[qii];
- best_qii=qii;
- }
- }
- frags=_enc->state.frags;
- frags[_fragi].qii=best_qii;
- return best_cost;
- }
- static void oc_enc_sb_transform_quantize_intra_chroma(oc_enc_ctx *_enc,
- oc_enc_pipeline_state *_pipe,int _pli,int _sbi_start,int _sbi_end){
- const oc_sb_map *sb_maps;
- oc_sb_flags *sb_flags;
- ptrdiff_t *coded_fragis;
- ptrdiff_t ncoded_fragis;
- int sbi;
- sb_maps=(const oc_sb_map *)_enc->state.sb_maps;
- sb_flags=_enc->state.sb_flags;
- coded_fragis=_pipe->coded_fragis[_pli];
- ncoded_fragis=_pipe->ncoded_fragis[_pli];
- for(sbi=_sbi_start;sbi<_sbi_end;sbi++){
- /*Worst case token stack usage for 1 fragment.*/
- oc_token_checkpoint stack[64];
- int quadi;
- int bi;
- for(quadi=0;quadi<4;quadi++)for(bi=0;bi<4;bi++){
- ptrdiff_t fragi;
- fragi=sb_maps[sbi][quadi][bi];
- if(fragi>=0){
- oc_token_checkpoint *stackptr;
- oc_analyze_intra_chroma_block(_enc,_pipe->qs+_pli,_pli,fragi);
- stackptr=stack;
- oc_enc_block_transform_quantize(_enc,
- _pipe,_pli,fragi,0,NULL,&stackptr);
- coded_fragis[ncoded_fragis++]=fragi;
- }
- }
- }
- _pipe->ncoded_fragis[_pli]=ncoded_fragis;
- }
- /*Analysis stage for an INTRA frame.*/
- void oc_enc_analyze_intra(oc_enc_ctx *_enc,int _recode){
- oc_enc_pipeline_state pipe;
- const unsigned char *map_idxs;
- int nmap_idxs;
- oc_sb_flags *sb_flags;
- signed char *mb_modes;
- const oc_mb_map *mb_maps;
- oc_mb_enc_info *embs;
- oc_fragment *frags;
- unsigned stripe_sby;
- unsigned mcu_nvsbs;
- int notstart;
- int notdone;
- int refi;
- int pli;
- _enc->state.frame_type=OC_INTRA_FRAME;
- oc_enc_tokenize_start(_enc);
- oc_enc_pipeline_init(_enc,&pipe);
- /*Choose MVs and MB modes and quantize and code luma.
- Must be done in Hilbert order.*/
- map_idxs=OC_MB_MAP_IDXS[_enc->state.info.pixel_fmt];
- nmap_idxs=OC_MB_MAP_NIDXS[_enc->state.info.pixel_fmt];
- _enc->state.ncoded_fragis[0]=0;
- _enc->state.ncoded_fragis[1]=0;
- _enc->state.ncoded_fragis[2]=0;
- sb_flags=_enc->state.sb_flags;
- mb_modes=_enc->state.mb_modes;
- mb_maps=(const oc_mb_map *)_enc->state.mb_maps;
- embs=_enc->mb_info;
- frags=_enc->state.frags;
- notstart=0;
- notdone=1;
- mcu_nvsbs=_enc->mcu_nvsbs;
- for(stripe_sby=0;notdone;stripe_sby+=mcu_nvsbs){
- unsigned sbi;
- unsigned sbi_end;
- notdone=oc_enc_pipeline_set_stripe(_enc,&pipe,stripe_sby);
- sbi_end=pipe.sbi_end[0];
- for(sbi=pipe.sbi0[0];sbi<sbi_end;sbi++){
- int quadi;
- /*Mode addressing is through Y plane, always 4 MB per SB.*/
- for(quadi=0;quadi<4;quadi++)if(sb_flags[sbi].quad_valid&1<<quadi){
- unsigned mbi;
- int mapii;
- int mapi;
- int bi;
- ptrdiff_t fragi;
- mbi=sbi<<2|quadi;
- /*Motion estimation:
- We always do a basic 1MV search for all macroblocks, coded or not,
- keyframe or not.*/
- if(!_recode&&_enc->state.curframe_num>0)oc_mcenc_search(_enc,mbi);
- oc_analyze_intra_mb_luma(_enc,pipe.qs+0,mbi);
- mb_modes[mbi]=OC_MODE_INTRA;
- oc_enc_mb_transform_quantize_luma(_enc,&pipe,mbi,0);
- /*Propagate final MB mode and MVs to the chroma blocks.*/
- for(mapii=4;mapii<nmap_idxs;mapii++){
- mapi=map_idxs[mapii];
- pli=mapi>>2;
- bi=mapi&3;
- fragi=mb_maps[mbi][pli][bi];
- frags[fragi].mb_mode=OC_MODE_INTRA;
- }
- }
- }
- oc_enc_pipeline_finish_mcu_plane(_enc,&pipe,0,notstart,notdone);
- /*Code chroma planes.*/
- for(pli=1;pli<3;pli++){
- oc_enc_sb_transform_quantize_intra_chroma(_enc,&pipe,
- pli,pipe.sbi0[pli],pipe.sbi_end[pli]);
- oc_enc_pipeline_finish_mcu_plane(_enc,&pipe,pli,notstart,notdone);
- }
- notstart=1;
- }
- /*Finish filling in the reference frame borders.*/
- refi=_enc->state.ref_frame_idx[OC_FRAME_SELF];
- for(pli=0;pli<3;pli++)oc_state_borders_fill_caps(&_enc->state,refi,pli);
- _enc->state.ntotal_coded_fragis=_enc->state.nfrags;
- }
- /*Cost information about a MB mode.*/
- struct oc_mode_choice{
- unsigned cost;
- unsigned ssd;
- unsigned rate;
- unsigned overhead;
- unsigned char qii[12];
- };
- static void oc_mode_set_cost(oc_mode_choice *_modec,int _lambda){
- _modec->cost=OC_MODE_RD_COST(_modec->ssd,
- _modec->rate+_modec->overhead,_lambda);
- }
- /*A set of skip SSD's to use to disable early skipping.*/
- static const unsigned OC_NOSKIP[12]={
- UINT_MAX,UINT_MAX,UINT_MAX,UINT_MAX,
- UINT_MAX,UINT_MAX,UINT_MAX,UINT_MAX,
- UINT_MAX,UINT_MAX,UINT_MAX,UINT_MAX
- };
- /*The estimated number of bits used by a coded chroma block to specify the AC
- quantizer.
- TODO: Currently this is just 0.5*log2(3) (estimating about 50% compression);
- measurements suggest this is in the right ballpark, but it varies somewhat
- with lambda.*/
- #define OC_CHROMA_QII_RATE ((0xCAE00D1DU>>31-OC_BIT_SCALE)+1>>1)
- static void oc_analyze_mb_mode_luma(oc_enc_ctx *_enc,
- oc_mode_choice *_modec,const oc_fr_state *_fr,const oc_qii_state *_qs,
- const unsigned _frag_satd[12],const unsigned _skip_ssd[12],int _qti){
- oc_fr_state fr;
- oc_qii_state qs;
- unsigned ssd;
- unsigned rate;
- int overhead;
- unsigned satd;
- unsigned best_ssd;
- unsigned best_rate;
- int best_overhead;
- int best_fri;
- int best_qii;
- unsigned cur_cost;
- unsigned cur_ssd;
- unsigned cur_rate;
- int cur_overhead;
- int lambda;
- int nqis;
- int nskipped;
- int bi;
- int qii;
- lambda=_enc->lambda;
- nqis=_enc->state.nqis;
- /*We could do a trellis optimization here, but we don't make final skip
- decisions until after transform+quantization, so the result wouldn't be
- optimal anyway.
- Instead we just use a greedy approach; for most SATD values, the
- differences between the qiis are large enough to drown out the cost to
- code the flags, anyway.*/
- *&fr=*_fr;
- *&qs=*_qs;
- ssd=rate=overhead=nskipped=0;
- for(bi=0;bi<4;bi++){
- oc_fr_state ft[2];
- oc_qii_state qt[3];
- unsigned best_cost;
- satd=_frag_satd[bi];
- *(ft+0)=*&fr;
- oc_fr_code_block(ft+0);
- oc_qii_state_advance(qt+0,&qs,0);
- best_overhead=(ft[0].bits-fr.bits<<OC_BIT_SCALE);
- best_rate=oc_dct_cost2(&best_ssd,_enc->state.qis[0],0,_qti,satd)
- +(qt[0].bits-qs.bits<<OC_BIT_SCALE);
- best_cost=OC_MODE_RD_COST(ssd+best_ssd,rate+best_rate+best_overhead,lambda);
- best_fri=0;
- best_qii=0;
- for(qii=1;qii<nqis;qii++){
- oc_qii_state_advance(qt+qii,&qs,qii);
- cur_rate=oc_dct_cost2(&cur_ssd,_enc->state.qis[qii],0,_qti,satd)
- +(qt[qii].bits-qs.bits<<OC_BIT_SCALE);
- cur_cost=OC_MODE_RD_COST(ssd+cur_ssd,rate+cur_rate+best_overhead,lambda);
- if(cur_cost<best_cost){
- best_cost=cur_cost;
- best_ssd=cur_ssd;
- best_rate=cur_rate;
- best_qii=qii;
- }
- }
- if(_skip_ssd[bi]<UINT_MAX&&nskipped<3){
- *(ft+1)=*&fr;
- oc_fr_skip_block(ft+1);
- cur_overhead=ft[1].bits-fr.bits<<OC_BIT_SCALE;
- cur_ssd=_skip_ssd[bi]<<OC_BIT_SCALE;
- cur_cost=OC_MODE_RD_COST(ssd+cur_ssd,rate+cur_overhead,lambda);
- if(cur_cost<=best_cost){
- best_ssd=cur_ssd;
- best_rate=0;
- best_overhead=cur_overhead;
- best_fri=1;
- best_qii+=4;
- }
- }
- rate+=best_rate;
- ssd+=best_ssd;
- overhead+=best_overhead;
- *&fr=*(ft+best_fri);
- if(best_fri==0)*&qs=*(qt+best_qii);
- else nskipped++;
- _modec->qii[bi]=best_qii;
- }
- _modec->ssd=ssd;
- _modec->rate=rate;
- _modec->overhead=OC_MAXI(overhead,0);
- }
- static void oc_analyze_mb_mode_chroma(oc_enc_ctx *_enc,
- oc_mode_choice *_modec,const oc_fr_state *_fr,const oc_qii_state *_qs,
- const unsigned _frag_satd[12],const unsigned _skip_ssd[12],int _qti){
- unsigned ssd;
- unsigned rate;
- unsigned satd;
- unsigned best_ssd;
- unsigned best_rate;
- int best_qii;
- unsigned cur_cost;
- unsigned cur_ssd;
- unsigned cur_rate;
- int lambda;
- int nblocks;
- int nqis;
- int pli;
- int bi;
- int qii;
- lambda=_enc->lambda;
- nqis=_enc->state.nqis;
- ssd=_modec->ssd;
- rate=_modec->rate;
- /*Because (except in 4:4:4 mode) we aren't considering chroma blocks in coded
- order, we assume a constant overhead for coded block and qii flags.*/
- nblocks=OC_MB_MAP_NIDXS[_enc->state.info.pixel_fmt];
- nblocks=(nblocks-4>>1)+4;
- bi=4;
- for(pli=1;pli<3;pli++){
- for(;bi<nblocks;bi++){
- unsigned best_cost;
- satd=_frag_satd[bi];
- best_rate=oc_dct_cost2(&best_ssd,_enc->state.qis[0],pli,_qti,satd)
- +OC_CHROMA_QII_RATE;
- best_cost=OC_MODE_RD_COST(ssd+best_ssd,rate+best_rate,lambda);
- best_qii=0;
- for(qii=1;qii<nqis;qii++){
- cur_rate=oc_dct_cost2(&cur_ssd,_enc->state.qis[qii],0,_qti,satd)
- +OC_CHROMA_QII_RATE;
- cur_cost=OC_MODE_RD_COST(ssd+cur_ssd,rate+cur_rate,lambda);
- if(cur_cost<best_cost){
- best_cost=cur_cost;
- best_ssd=cur_ssd;
- best_rate=cur_rate;
- best_qii=qii;
- }
- }
- if(_skip_ssd[bi]<UINT_MAX){
- cur_ssd=_skip_ssd[bi]<<OC_BIT_SCALE;
- cur_cost=OC_MODE_RD_COST(ssd+cur_ssd,rate,lambda);
- if(cur_cost<=best_cost){
- best_ssd=cur_ssd;
- best_rate=0;
- best_qii+=4;
- }
- }
- rate+=best_rate;
- ssd+=best_ssd;
- _modec->qii[bi]=best_qii;
- }
- nblocks=(nblocks-4<<1)+4;
- }
- _modec->ssd=ssd;
- _modec->rate=rate;
- }
- static void oc_skip_cost(oc_enc_ctx *_enc,oc_enc_pipeline_state *_pipe,
- unsigned _mbi,unsigned _ssd[12]){
- OC_ALIGN16(ogg_int16_t buffer[64]);
- const unsigned char *src;
- const unsigned char *ref;
- int ystride;
- const oc_fragment *frags;
- const ptrdiff_t *frag_buf_offs;
- const ptrdiff_t *sb_map;
- const oc_mb_map_plane *mb_map;
- const unsigned char *map_idxs;
- int map_nidxs;
- ogg_int64_t mask;
- unsigned uncoded_ssd;
- int uncoded_dc;
- unsigned dc_dequant;
- int dc_flag;
- int mapii;
- int mapi;
- int pli;
- int bi;
- ptrdiff_t fragi;
- ptrdiff_t frag_offs;
- int borderi;
- int pi;
- src=_enc->state.ref_frame_data[OC_FRAME_IO];
- ref=_enc->state.ref_frame_data[_enc->state.ref_frame_idx[OC_FRAME_PREV]];
- ystride=_enc->state.ref_ystride[0];
- frags=_enc->state.frags;
- frag_buf_offs=_enc->state.frag_buf_offs;
- sb_map=_enc->state.sb_maps[_mbi>>2][_mbi&3];
- dc_dequant=_enc->state.dequant_tables[_enc->state.qis[0]][0][1][0];
- for(bi=0;bi<4;bi++){
- fragi=sb_map[bi];
- frag_offs=frag_buf_offs[fragi];
- oc_enc_frag_sub(_enc,buffer,src+frag_offs,ref+frag_offs,ystride);
- borderi=frags[fragi].borderi;
- uncoded_ssd=uncoded_dc=0;
- if(borderi<0){
- for(pi=0;pi<64;pi++){
- uncoded_ssd+=buffer[pi]*buffer[pi];
- uncoded_dc+=buffer[pi];
- }
- }
- else{
- ogg_int64_t mask;
- mask=_enc->state.borders[borderi].mask;
- for(pi=0;pi<64;pi++,mask>>=1)if(mask&1){
- uncoded_ssd+=buffer[pi]*buffer[pi];
- uncoded_dc+=buffer[pi];
- }
- }
- /*Scale to match DCT domain.*/
- uncoded_ssd<<=4;
- /*We actually only want the AC contribution to the SSD.*/
- uncoded_ssd-=uncoded_dc*uncoded_dc>>2;
- /*DC is a special case; if there's more than a full-quantizer improvement
- in the effective DC component, always force-code the block.*/
- dc_flag=abs(uncoded_dc)>dc_dequant<<1;
- uncoded_ssd|=-dc_flag;
- _pipe->skip_ssd[0][fragi-_pipe->froffset[0]]=_ssd[bi]=uncoded_ssd;
- }
- mb_map=(const oc_mb_map_plane *)_enc->state.mb_maps[_mbi];
- map_nidxs=OC_MB_MAP_NIDXS[_enc->state.info.pixel_fmt];
- map_idxs=OC_MB_MAP_IDXS[_enc->state.info.pixel_fmt];
- map_nidxs=(map_nidxs-4>>1)+4;
- mapii=4;
- for(pli=1;pli<3;pli++){
- ystride=_enc->state.ref_ystride[pli];
- dc_dequant=_enc->state.dequant_tables[_enc->state.qis[0]][pli][1][0];
- for(;mapii<map_nidxs;mapii++){
- mapi=map_idxs[mapii];
- bi=mapi&3;
- fragi=mb_map[pli][bi];
- frag_offs=frag_buf_offs[fragi];
- oc_enc_frag_sub(_enc,buffer,src+frag_offs,ref+frag_offs,ystride);
- borderi=frags[fragi].borderi;
- uncoded_ssd=uncoded_dc=0;
- if(borderi<0){
- for(pi=0;pi<64;pi++){
- uncoded_ssd+=buffer[pi]*buffer[pi];
- uncoded_dc+=buffer[pi];
- }
- }
- else{
- mask=_enc->state.borders[borderi].mask;
- for(pi=0;pi<64;pi++,mask>>=1)if(mask&1){
- uncoded_ssd+=buffer[pi]*buffer[pi];
- uncoded_dc+=buffer[pi];
- }
- }
- /*Scale to match DCT domain.*/
- uncoded_ssd<<=4;
- /*We actually only want the AC contribution to the SSD.*/
- uncoded_ssd-=uncoded_dc*uncoded_dc>>2;
- /*DC is a special case; if there's more than a full-quantizer improvement
- in the effective DC component, always force-code the block.*/
- dc_flag=abs(uncoded_dc)>dc_dequant<<1;
- uncoded_ssd|=-dc_flag;
- _pipe->skip_ssd[pli][fragi-_pipe->froffset[pli]]=_ssd[mapii]=uncoded_ssd;
- }
- map_nidxs=(map_nidxs-4<<1)+4;
- }
- }
- static void oc_mb_intra_satd(oc_enc_ctx *_enc,unsigned _mbi,
- unsigned _frag_satd[12]){
- const unsigned char *src;
- const ptrdiff_t *frag_buf_offs;
- const ptrdiff_t *sb_map;
- const oc_mb_map_plane *mb_map;
- const unsigned char *map_idxs;
- int map_nidxs;
- int mapii;
- int mapi;
- int ystride;
- int pli;
- int bi;
- ptrdiff_t fragi;
- ptrdiff_t frag_offs;
- frag_buf_offs=_enc->state.frag_buf_offs;
- sb_map=_enc->state.sb_maps[_mbi>>2][_mbi&3];
- src=_enc->state.ref_frame_data[OC_FRAME_IO];
- ystride=_enc->state.ref_ystride[0];
- for(bi=0;bi<4;bi++){
- fragi=sb_map[bi];
- frag_offs=frag_buf_offs[fragi];
- _frag_satd[bi]=oc_enc_frag_intra_satd(_enc,src+frag_offs,ystride);
- }
- mb_map=(const oc_mb_map_plane *)_enc->state.mb_maps[_mbi];
- map_idxs=OC_MB_MAP_IDXS[_enc->state.info.pixel_fmt];
- map_nidxs=OC_MB_MAP_NIDXS[_enc->state.info.pixel_fmt];
- /*Note: This assumes ref_ystride[1]==ref_ystride[2].*/
- ystride=_enc->state.ref_ystride[1];
- for(mapii=4;mapii<map_nidxs;mapii++){
- mapi=map_idxs[mapii];
- pli=mapi>>2;
- bi=mapi&3;
- fragi=mb_map[pli][bi];
- frag_offs=frag_buf_offs[fragi];
- _frag_satd[mapii]=oc_enc_frag_intra_satd(_enc,src+frag_offs,ystride);
- }
- }
- static void oc_cost_intra(oc_enc_ctx *_enc,oc_mode_choice *_modec,
- unsigned _mbi,const oc_fr_state *_fr,const oc_qii_state *_qs,
- const unsigned _frag_satd[12],const unsigned _skip_ssd[12]){
- oc_analyze_mb_mode_luma(_enc,_modec,_fr,_qs,_frag_satd,_skip_ssd,0);
- oc_analyze_mb_mode_chroma(_enc,_modec,_fr,_qs,_frag_satd,_skip_ssd,0);
- _modec->overhead+=
- oc_mode_scheme_chooser_cost(&_enc->chooser,OC_MODE_INTRA)<<OC_BIT_SCALE;
- oc_mode_set_cost(_modec,_enc->lambda);
- }
- static void oc_cost_inter(oc_enc_ctx *_enc,oc_mode_choice *_modec,
- unsigned _mbi,int _mb_mode,const signed char *_mv,
- const oc_fr_state *_fr,const oc_qii_state *_qs,const unsigned _skip_ssd[12]){
- unsigned frag_satd[12];
- const unsigned char *src;
- const unsigned char *ref;
- int ystride;
- const ptrdiff_t *frag_buf_offs;
- const ptrdiff_t *sb_map;
- const oc_mb_map_plane *mb_map;
- const unsigned char *map_idxs;
- int map_nidxs;
- int mapii;
- int mapi;
- int mv_offs[2];
- int dx;
- int dy;
- int pli;
- int bi;
- ptrdiff_t fragi;
- ptrdiff_t frag_offs;
- src=_enc->state.ref_frame_data[OC_FRAME_IO];
- ref=_enc->state.ref_frame_data[
- _enc->state.ref_frame_idx[OC_FRAME_FOR_MODE(_mb_mode)]];
- ystride=_enc->state.ref_ystride[0];
- frag_buf_offs=_enc->state.frag_buf_offs;
- sb_map=_enc->state.sb_maps[_mbi>>2][_mbi&3];
- dx=_mv[0];
- dy=_mv[1];
- _modec->rate=_modec->ssd=0;
- if(oc_state_get_mv_offsets(&_enc->state,mv_offs,0,dx,dy)>1){
- for(bi=0;bi<4;bi++){
- fragi=sb_map[bi];
- frag_offs=frag_buf_offs[fragi];
- frag_satd[bi]=oc_enc_frag_satd2_thresh(_enc,src+frag_offs,
- ref+frag_offs+mv_offs[0],ref+frag_offs+mv_offs[1],ystride,UINT_MAX);
- }
- }
- else{
- for(bi=0;bi<4;bi++){
- fragi=sb_map[bi];
- frag_offs=frag_buf_offs[fragi];
- frag_satd[bi]=oc_enc_frag_satd_thresh(_enc,src+frag_offs,
- ref+frag_offs+mv_offs[0],ystride,UINT_MAX);
- }
- }
- mb_map=(const oc_mb_map_plane *)_enc->state.mb_maps[_mbi];
- map_idxs=OC_MB_MAP_IDXS[_enc->state.info.pixel_fmt];
- map_nidxs=OC_MB_MAP_NIDXS[_enc->state.info.pixel_fmt];
- /*Note: This assumes ref_ystride[1]==ref_ystride[2].*/
- ystride=_enc->state.ref_ystride[1];
- if(oc_state_get_mv_offsets(&_enc->state,mv_offs,1,dx,dy)>1){
- for(mapii=4;mapii<map_nidxs;mapii++){
- mapi=map_idxs[mapii];
- pli=mapi>>2;
- bi=mapi&3;
- fragi=mb_map[pli][bi];
- frag_offs=frag_buf_offs[fragi];
- frag_satd[mapii]=oc_enc_frag_satd2_thresh(_enc,src+frag_offs,
- ref+frag_offs+mv_offs[0],ref+frag_offs+mv_offs[1],ystride,UINT_MAX);
- }
- }
- else{
- for(mapii=4;mapii<map_nidxs;mapii++){
- mapi=map_idxs[mapii];
- pli=mapi>>2;
- bi=mapi&3;
- fragi=mb_map[pli][bi];
- frag_offs=frag_buf_offs[fragi];
- frag_satd[mapii]=oc_enc_frag_satd_thresh(_enc,src+frag_offs,
- ref+frag_offs+mv_offs[0],ystride,UINT_MAX);
- }
- }
- oc_analyze_mb_mode_luma(_enc,_modec,_fr,_qs,frag_satd,_skip_ssd,1);
- oc_analyze_mb_mode_chroma(_enc,_modec,_fr,_qs,frag_satd,_skip_ssd,1);
- _modec->overhead+=
- oc_mode_scheme_chooser_cost(&_enc->chooser,_mb_mode)<<OC_BIT_SCALE;
- oc_mode_set_cost(_modec,_enc->lambda);
- }
- static void oc_cost_inter_nomv(oc_enc_ctx *_enc,oc_mode_choice *_modec,
- unsigned _mbi,int _mb_mode,const oc_fr_state *_fr,const oc_qii_state *_qs,
- const unsigned _skip_ssd[12]){
- static const oc_mv OC_MV_ZERO;
- oc_cost_inter(_enc,_modec,_mbi,_mb_mode,OC_MV_ZERO,_fr,_qs,_skip_ssd);
- }
- static int oc_cost_inter1mv(oc_enc_ctx *_enc,oc_mode_choice *_modec,
- unsigned _mbi,int _mb_mode,const signed char *_mv,
- const oc_fr_state *_fr,const oc_qii_state *_qs,const unsigned _skip_ssd[12]){
- int bits0;
- oc_cost_inter(_enc,_modec,_mbi,_mb_mode,_mv,_fr,_qs,_skip_ssd);
- bits0=OC_MV_BITS[0][_mv[0]+31]+OC_MV_BITS[0][_mv[1]+31];
- _modec->overhead+=OC_MINI(_enc->mv_bits[0]+bits0,_enc->mv_bits[1]+12)
- -OC_MINI(_enc->mv_bits[0],_enc->mv_bits[1])<<OC_BIT_SCALE;
- oc_mode_set_cost(_modec,_enc->lambda);
- return bits0;
- }
- /*A mapping from oc_mb_map (raster) ordering to oc_sb_map (Hilbert) ordering.*/
- static const unsigned char OC_MB_PHASE[4][4]={
- {0,1,3,2},{0,3,1,2},{0,3,1,2},{2,3,1,0}
- };
- static void oc_cost_inter4mv(oc_enc_ctx *_enc,oc_mode_choice *_modec,
- unsigned _mbi,oc_mv _mv[4],const oc_fr_state *_fr,const oc_qii_state *_qs,
- const unsigned _skip_ssd[12]){
- unsigned frag_satd[12];
- oc_mv lbmvs[4];
- oc_mv cbmvs[4];
- const unsigned char *src;
- const unsigned char *ref;
- int ystride;
- const ptrdiff_t *frag_buf_offs;
- oc_mv *frag_mvs;
- const oc_mb_map_plane *mb_map;
- const unsigned char *map_idxs;
- int map_nidxs;
- int nqis;
- int mapii;
- int mapi;
- int mv_offs[2];
- int dx;
- int dy;
- int pli;
- int bi;
- ptrdiff_t fragi;
- ptrdiff_t frag_offs;
- int bits0;
- int bits1;
- unsigned satd;
- src=_enc->state.ref_frame_data[OC_FRAME_IO];
- ref=_enc->state.ref_frame_data[_enc->state.ref_frame_idx[OC_FRAME_PREV]];
- ystride=_enc->state.ref_ystride[0];
- frag_buf_offs=_enc->state.frag_buf_offs;
- frag_mvs=_enc->state.frag_mvs;
- mb_map=(const oc_mb_map_plane *)_enc->state.mb_maps[_mbi];
- _modec->rate=_modec->ssd=0;
- for(bi=0;bi<4;bi++){
- fragi=mb_map[0][bi];
- dx=_mv[bi][0];
- dy=_mv[bi][1];
- /*Save the block MVs as the current ones while we're here; we'll replace
- them if we don't ultimately choose 4MV mode.*/
- frag_mvs[fragi][0]=(signed char)dx;
- frag_mvs[fragi][1]=(signed char)dy;
- frag_offs=frag_buf_offs[fragi];
- if(oc_state_get_mv_offsets(&_enc->state,mv_offs,0,dx,dy)>1){
- satd=oc_enc_frag_satd2_thresh(_enc,src+frag_offs,
- ref+frag_offs+mv_offs[0],ref+frag_offs+mv_offs[1],ystride,UINT_MAX);
- }
- else{
- satd=oc_enc_frag_satd_thresh(_enc,src+frag_offs,
- ref+frag_offs+mv_offs[0],ystride,UINT_MAX);
- }
- frag_satd[OC_MB_PHASE[_mbi&3][bi]]=satd;
- }
- oc_analyze_mb_mode_luma(_enc,_modec,_fr,_qs,frag_satd,
- _enc->vp3_compatible?OC_NOSKIP:_skip_ssd,1);
- /*Figure out which blocks are being skipped and give them (0,0) MVs.*/
- bits0=0;
- bits1=0;
- nqis=_enc->state.nqis;
- for(bi=0;bi<4;bi++){
- if(_modec->qii[OC_MB_PHASE[_mbi&3][bi]]>=nqis){
- memset(lbmvs+bi,0,sizeof(*lbmvs));
- }
- else{
- memcpy(lbmvs+bi,_mv+bi,sizeof(*lbmvs));
- bits0+=OC_MV_BITS[0][_mv[bi][0]+31]+OC_MV_BITS[0][_mv[bi][1]+31];
- bits1+=12;
- }
- }
- (*OC_SET_CHROMA_MVS_TABLE[_enc->state.info.pixel_fmt])(cbmvs,
- (const oc_mv *)lbmvs);
- map_idxs=OC_MB_MAP_IDXS[_enc->state.info.pixel_fmt];
- map_nidxs=OC_MB_MAP_NIDXS[_enc->state.info.pixel_fmt];
- /*Note: This assumes ref_ystride[1]==ref_ystride[2].*/
- ystride=_enc->state.ref_ystride[1];
- for(mapii=4;mapii<map_nidxs;mapii++){
- mapi=map_idxs[mapii];
- pli=mapi>>2;
- bi=mapi&3;
- fragi=mb_map[pli][bi];
- dx=cbmvs[bi][0];
- dy=cbmvs[bi][1];
- frag_offs=frag_buf_offs[fragi];
- /*TODO: We could save half these calls by re-using the results for the Cb
- and Cr planes; is it worth it?*/
- if(oc_state_get_mv_offsets(&_enc->state,mv_offs,pli,dx,dy)>1){
- satd=oc_enc_frag_satd2_thresh(_enc,src+frag_offs,
- ref+frag_offs+mv_offs[0],ref+frag_offs+mv_offs[1],ystride,UINT_MAX);
- }
- else{
- satd=oc_enc_frag_satd_thresh(_enc,src+frag_offs,
- ref+frag_offs+mv_offs[0],ystride,UINT_MAX);
- }
- frag_satd[mapii]=satd;
- }
- oc_analyze_mb_mode_chroma(_enc,_modec,_fr,_qs,frag_satd,_skip_ssd,1);
- _modec->overhead+=
- oc_mode_scheme_chooser_cost(&_enc->chooser,OC_MODE_INTER_MV_FOUR)
- +OC_MINI(_enc->mv_bits[0]+bits0,_enc->mv_bits[1]+bits1)
- -OC_MINI(_enc->mv_bits[0],_enc->mv_bits[1])<<OC_BIT_SCALE;
- oc_mode_set_cost(_modec,_enc->lambda);
- }
- int oc_enc_analyze_inter(oc_enc_ctx *_enc,int _allow_keyframe,int _recode){
- oc_set_chroma_mvs_func set_chroma_mvs;
- oc_enc_pipeline_state pipe;
- oc_qii_state intra_luma_qs;
- oc_mv last_mv;
- oc_mv prior_mv;
- ogg_int64_t interbits;
- ogg_int64_t intrabits;
- const unsigned char *map_idxs;
- int nmap_idxs;
- unsigned *coded_mbis;
- unsigned *uncoded_mbis;
- size_t ncoded_mbis;
- size_t nuncoded_mbis;
- oc_sb_flags *sb_flags;
- signed char *mb_modes;
- const oc_sb_map *sb_maps;
- const oc_mb_map *mb_maps;
- oc_mb_enc_info *embs;
- oc_fragment *frags;
- oc_mv *frag_mvs;
- int qi;
- unsigned stripe_sby;
- unsigned mcu_nvsbs;
- int notstart;
- int notdone;
- int vdec;
- unsigned sbi;
- unsigned sbi_end;
- int refi;
- int pli;
- set_chroma_mvs=OC_SET_CHROMA_MVS_TABLE[_enc->state.info.pixel_fmt];
- _enc->state.frame_type=OC_INTER_FRAME;
- oc_mode_scheme_chooser_reset(&_enc->chooser);
- oc_enc_tokenize_start(_enc);
- oc_enc_pipeline_init(_enc,&pipe);
- if(_allow_keyframe)oc_qii_state_init(&intra_luma_qs);
- _enc->mv_bits[0]=_enc->mv_bits[1]=0;
- interbits=intrabits=0;
- last_mv[0]=last_mv[1]=prior_mv[0]=prior_mv[1]=0;
- /*Choose MVs and MB modes and quantize and code luma.
- Must be done in Hilbert order.*/
- map_idxs=OC_MB_MAP_IDXS[_enc->state.info.pixel_fmt];
- nmap_idxs=OC_MB_MAP_NIDXS[_enc->state.info.pixel_fmt];
- qi=_enc->state.qis[0];
- coded_mbis=_enc->coded_mbis;
- uncoded_mbis=coded_mbis+_enc->state.nmbs;
- ncoded_mbis=0;
- nuncoded_mbis=0;
- _enc->state.ncoded_fragis[0]=0;
- _enc->state.ncoded_fragis[1]=0;
- _enc->state.ncoded_fragis[2]=0;
- sb_flags=_enc->state.sb_flags;
- mb_modes=_enc->state.mb_modes;
- sb_maps=(const oc_sb_map *)_enc->state.sb_maps;
- mb_maps=(const oc_mb_map *)_enc->state.mb_maps;
- embs=_enc->mb_info;
- frags=_enc->state.frags;
- frag_mvs=_enc->state.frag_mvs;
- vdec=!(_enc->state.info.pixel_fmt&2);
- notstart=0;
- notdone=1;
- mcu_nvsbs=_enc->mcu_nvsbs;
- for(stripe_sby=0;notdone;stripe_sby+=mcu_nvsbs){
- notdone=oc_enc_pipeline_set_stripe(_enc,&pipe,stripe_sby);
- sbi_end=pipe.sbi_end[0];
- for(sbi=pipe.sbi0[0];sbi<sbi_end;sbi++){
- int quadi;
- /*Mode addressing is through Y plane, always 4 MB per SB.*/
- for(quadi=0;quadi<4;quadi++)if(sb_flags[sbi].quad_valid&1<<quadi){
- oc_mode_choice modes[8];
- unsigned skip_ssd[12];
- unsigned intra_satd[12];
- int mb_mv_bits_0;
- int mb_gmv_bits_0;
- int inter_mv_pref;
- int mb_mode;
- int dx;
- int dy;
- unsigned mbi;
- int mapii;
- int mapi;
- int bi;
- ptrdiff_t fragi;
- mbi=sbi<<2|quadi;
- /*Motion estimation:
- We always do a basic 1MV search for all macroblocks, coded or not,
- keyframe or not.*/
- if(!_recode&&_enc->sp_level<OC_SP_LEVEL_NOMC)oc_mcenc_search(_enc,mbi);
- dx=dy=0;
- /*Find the block choice with the lowest estimated coding cost.
- If a Cb or Cr block is coded but no Y' block from a macro block then
- the mode MUST be OC_MODE_INTER_NOMV.
- This is the default state to which the mode data structure is
- initialised in encoder and decoder at the start of each frame.*/
- /*Block coding cost is estimated from correlated SATD metrics.*/
- /*At this point, all blocks that are in frame are still marked coded.*/
- if(!_recode){
- memcpy(embs[mbi].unref_mv,
- embs[mbi].analysis_mv[0],sizeof(embs[mbi].unref_mv));
- embs[mbi].refined=0;
- }
- oc_mb_intra_satd(_enc,mbi,intra_satd);
- /*Estimate the cost of coding this MB in a keyframe.*/
- if(_allow_keyframe){
- oc_cost_intra(_enc,modes+OC_MODE_INTRA,mbi,
- pipe.fr+0,&intra_luma_qs,intra_satd,OC_NOSKIP);
- intrabits+=modes[OC_MODE_INTRA].rate;
- for(bi=0;bi<4;bi++){
- oc_qii_state_advance(&intra_luma_qs,&intra_luma_qs,
- modes[OC_MODE_INTRA].qii[bi]);
- }
- }
- /*Estimate the cost in a delta frame for various modes.*/
- oc_skip_cost(_enc,&pipe,mbi,skip_ssd);
- oc_cost_inter_nomv(_enc,modes+OC_MODE_INTER_NOMV,mbi,
- OC_MODE_INTER_NOMV,pipe.fr+0,pipe.qs+0,skip_ssd);
- if(_enc->sp_level<OC_SP_LEVEL_NOMC){
- oc_cost_intra(_enc,modes+OC_MODE_INTRA,mbi,
- pipe.fr+0,pipe.qs+0,intra_satd,skip_ssd);
- mb_mv_bits_0=oc_cost_inter1mv(_enc,modes+OC_MODE_INTER_MV,mbi,
- OC_MODE_INTER_MV,embs[mbi].unref_mv[OC_FRAME_PREV],
- pipe.fr+0,pipe.qs+0,skip_ssd);
- oc_cost_inter(_enc,modes+OC_MODE_INTER_MV_LAST,mbi,
- OC_MODE_INTER_MV_LAST,last_mv,pipe.fr+0,pipe.qs+0,skip_ssd);
- oc_cost_inter(_enc,modes+OC_MODE_INTER_MV_LAST2,mbi,
- OC_MODE_INTER_MV_LAST2,prior_mv,pipe.fr+0,pipe.qs+0,skip_ssd);
- oc_cost_inter4mv(_enc,modes+OC_MODE_INTER_MV_FOUR,mbi,
- embs[mbi].block_mv,pipe.fr+0,pipe.qs+0,skip_ssd);
- oc_cost_inter_nomv(_enc,modes+OC_MODE_GOLDEN_NOMV,mbi,
- OC_MODE_GOLDEN_NOMV,pipe.fr+0,pipe.qs+0,skip_ssd);
- mb_gmv_bits_0=oc_cost_inter1mv(_enc,modes+OC_MODE_GOLDEN_MV,mbi,
- OC_MODE_GOLDEN_MV,embs[mbi].unref_mv[OC_FRAME_GOLD],
- pipe.fr+0,pipe.qs+0,skip_ssd);
- /*The explicit MV modes (2,6,7) have not yet gone through halfpel
- refinement.
- We choose the explicit MV mode that's already furthest ahead on
- R-D cost and refine only that one.
- We have to be careful to remember which ones we've refined so that
- we don't refine it again if we re-encode this frame.*/
- inter_mv_pref=_enc->lambda*3;
- if(modes[OC_MODE_INTER_MV_FOUR].cost<modes[OC_MODE_INTER_MV].cost&&
- modes[OC_MODE_INTER_MV_FOUR].cost<modes[OC_MODE_GOLDEN_MV].cost){
- if(!(embs[mbi].refined&0x80)){
- oc_mcenc_refine4mv(_enc,mbi);
- embs[mbi].refined|=0x80;
- }
- oc_cost_inter4mv(_enc,modes+OC_MODE_INTER_MV_FOUR,mbi,
- embs[mbi].ref_mv,pipe.fr+0,pipe.qs+0,skip_ssd);
- }
- else if(modes[OC_MODE_GOLDEN_MV].cost+inter_mv_pref<
- modes[OC_MODE_INTER_MV].cost){
- if(!(embs[mbi].refined&0x40)){
- oc_mcenc_refine1mv(_enc,mbi,OC_FRAME_GOLD);
- embs[mbi].refined|=0x40;
- }
- mb_gmv_bits_0=oc_cost_inter1mv(_enc,modes+OC_MODE_GOLDEN_MV,mbi,
- OC_MODE_GOLDEN_MV,embs[mbi].analysis_mv[0][OC_FRAME_GOLD],
- pipe.fr+0,pipe.qs+0,skip_ssd);
- }
- if(!(embs[mbi].refined&0x04)){
- oc_mcenc_refine1mv(_enc,mbi,OC_FRAME_PREV);
- embs[mbi].refined|=0x04;
- }
- mb_mv_bits_0=oc_cost_inter1mv(_enc,modes+OC_MODE_INTER_MV,mbi,
- OC_MODE_INTER_MV,embs[mbi].analysis_mv[0][OC_FRAME_PREV],
- pipe.fr+0,pipe.qs+0,skip_ssd);
- /*Finally, pick the mode with the cheapest estimated R-D cost.*/
- mb_mode=OC_MODE_INTER_NOMV;
- if(modes[OC_MODE_INTRA].cost<modes[OC_MODE_INTER_NOMV].cost){
- mb_mode=OC_MODE_INTRA;
- }
- if(modes[OC_MODE_INTER_MV_LAST].cost<modes[mb_mode].cost){
- mb_mode=OC_MODE_INTER_MV_LAST;
- }
- if(modes[OC_MODE_INTER_MV_LAST2].cost<modes[mb_mode].cost){
- mb_mode=OC_MODE_INTER_MV_LAST2;
- }
- if(modes[OC_MODE_GOLDEN_NOMV].cost<modes[mb_mode].cost){
- mb_mode=OC_MODE_GOLDEN_NOMV;
- }
- if(modes[OC_MODE_GOLDEN_MV].cost<modes[mb_mode].cost){
- mb_mode=OC_MODE_GOLDEN_MV;
- }
- if(modes[OC_MODE_INTER_MV_FOUR].cost<modes[mb_mode].cost){
- mb_mode=OC_MODE_INTER_MV_FOUR;
- }
- /*We prefer OC_MODE_INTER_MV, but not over LAST and LAST2.*/
- if(mb_mode==OC_MODE_INTER_MV_LAST||mb_mode==OC_MODE_INTER_MV_LAST2){
- inter_mv_pref=0;
- }
- if(modes[OC_MODE_INTER_MV].cost<modes[mb_mode].cost+inter_mv_pref){
- mb_mode=OC_MODE_INTER_MV;
- }
- }
- else{
- oc_cost_inter_nomv(_enc,modes+OC_MODE_GOLDEN_NOMV,mbi,
- OC_MODE_GOLDEN_NOMV,pipe.fr+0,pipe.qs+0,skip_ssd);
- mb_mode=OC_MODE_INTER_NOMV;
- if(modes[OC_MODE_INTRA].cost<modes[OC_MODE_INTER_NOMV].cost){
- mb_mode=OC_MODE_INTRA;
- }
- if(modes[OC_MODE_GOLDEN_NOMV].cost<modes[mb_mode].cost){
- mb_mode=OC_MODE_GOLDEN_NOMV;
- }
- mb_mv_bits_0=mb_gmv_bits_0=0;
- }
- mb_modes[mbi]=mb_mode;
- /*Propagate the MVs to the luma blocks.*/
- if(mb_mode!=OC_MODE_INTER_MV_FOUR){
- switch(mb_mode){
- case OC_MODE_INTER_MV:{
- dx=embs[mbi].analysis_mv[0][OC_FRAME_PREV][0];
- dy=embs[mbi].analysis_mv[0][OC_FRAME_PREV][1];
- }break;
- case OC_MODE_INTER_MV_LAST:{
- dx=last_mv[0];
- dy=last_mv[1];
- }break;
- case OC_MODE_INTER_MV_LAST2:{
- dx=prior_mv[0];
- dy=prior_mv[1];
- }break;
- case OC_MODE_GOLDEN_MV:{
- dx=embs[mbi].analysis_mv[0][OC_FRAME_GOLD][0];
- dy=embs[mbi].analysis_mv[0][OC_FRAME_GOLD][1];
- }break;
- }
- for(bi=0;bi<4;bi++){
- fragi=mb_maps[mbi][0][bi];
- frag_mvs[fragi][0]=(signed char)dx;
- frag_mvs[fragi][1]=(signed char)dy;
- }
- }
- for(bi=0;bi<4;bi++){
- fragi=sb_maps[mbi>>2][mbi&3][bi];
- frags[fragi].qii=modes[mb_mode].qii[bi];
- }
- if(oc_enc_mb_transform_quantize_luma(_enc,&pipe,mbi,
- modes[mb_mode].overhead>>OC_BIT_SCALE)>0){
- int orig_mb_mode;
- orig_mb_mode=mb_mode;
- mb_mode=mb_modes[mbi];
- switch(mb_mode){
- case OC_MODE_INTER_MV:{
- memcpy(prior_mv,last_mv,sizeof(prior_mv));
- /*If we're backing out from 4MV, find the MV we're actually
- using.*/
- if(orig_mb_mode==OC_MODE_INTER_MV_FOUR){
- for(bi=0;;bi++){
- fragi=mb_maps[mbi][0][bi];
- if(frags[fragi].coded){
- memcpy(last_mv,frag_mvs[fragi],sizeof(last_mv));
- dx=frag_mvs[fragi][0];
- dy=frag_mvs[fragi][1];
- break;
- }
- }
- mb_mv_bits_0=OC_MV_BITS[0][dx+31]+OC_MV_BITS[0][dy+31];
- }
- /*Otherwise we used the original analysis MV.*/
- else{
- memcpy(last_mv,
- embs[mbi].analysis_mv[0][OC_FRAME_PREV],sizeof(last_mv));
- }
- _enc->mv_bits[0]+=mb_mv_bits_0;
- _enc->mv_bits[1]+=12;
- }break;
- case OC_MODE_INTER_MV_LAST2:{
- oc_mv tmp_mv;
- memcpy(tmp_mv,prior_mv,sizeof(tmp_mv));
- memcpy(prior_mv,last_mv,sizeof(prior_mv));
- memcpy(last_mv,tmp_mv,sizeof(last_mv));
- }break;
- case OC_MODE_GOLDEN_MV:{
- _enc->mv_bits[0]+=mb_gmv_bits_0;
- _enc->mv_bits[1]+=12;
- }break;
- case OC_MODE_INTER_MV_FOUR:{
- oc_mv lbmvs[4];
- oc_mv cbmvs[4];
- memcpy(prior_mv,last_mv,sizeof(prior_mv));
- for(bi=0;bi<4;bi++){
- fragi=mb_maps[mbi][0][bi];
- if(frags[fragi].coded){
- memcpy(last_mv,frag_mvs[fragi],sizeof(last_mv));
- memcpy(lbmvs[bi],frag_mvs[fragi],sizeof(lbmvs[bi]));
- _enc->mv_bits[0]+=OC_MV_BITS[0][frag_mvs[fragi][0]+31]
- +OC_MV_BITS[0][frag_mvs[fragi][1]+31];
- _enc->mv_bits[1]+=12;
- }
- /*Replace the block MVs for not-coded blocks with (0,0).*/
- else memset(lbmvs[bi],0,sizeof(lbmvs[bi]));
- }
- (*set_chroma_mvs)(cbmvs,(const oc_mv *)lbmvs);
- for(mapii=4;mapii<nmap_idxs;mapii++){
- mapi=map_idxs[mapii];
- pli=mapi>>2;
- bi=mapi&3;
- fragi=mb_maps[mbi][pli][bi];
- frags[fragi].mb_mode=mb_mode;
- frags[fragi].qii=modes[OC_MODE_INTER_MV_FOUR].qii[mapii];
- memcpy(frag_mvs[fragi],cbmvs[bi],sizeof(frag_mvs[fragi]));
- }
- }break;
- }
- coded_mbis[ncoded_mbis++]=mbi;
- oc_mode_scheme_chooser_update(&_enc->chooser,mb_mode);
- interbits+=modes[mb_mode].rate+modes[mb_mode].overhead;
- }
- else{
- *(uncoded_mbis-++nuncoded_mbis)=mbi;
- mb_mode=OC_MODE_INTER_NOMV;
- dx=dy=0;
- }
- /*Propagate final MB mode and MVs to the chroma blocks.
- This has already been done for 4MV mode, since it requires individual
- block motion vectors.*/
- if(mb_mode!=OC_MODE_INTER_MV_FOUR){
- for(mapii=4;mapii<nmap_idxs;mapii++){
- mapi=map_idxs[mapii];
- pli=mapi>>2;
- bi=mapi&3;
- fragi=mb_maps[mbi][pli][bi];
- frags[fragi].mb_mode=mb_mode;
- /*If we switched from 4MV mode to INTER_MV mode, then the qii
- values won't have been chosen with the right MV, but it's
- probaby not worth re-estimating them.*/
- frags[fragi].qii=modes[mb_mode].qii[mapii];
- frag_mvs[fragi][0]=(signed char)dx;
- frag_mvs[fragi][1]=(signed char)dy;
- }
- }
- }
- oc_fr_state_flush_sb(pipe.fr+0);
- sb_flags[sbi].coded_fully=pipe.fr[0].sb_full;
- sb_flags[sbi].coded_partially=pipe.fr[0].sb_partial;
- }
- oc_enc_pipeline_finish_mcu_plane(_enc,&pipe,0,notstart,notdone);
- /*Code chroma planes.*/
- for(pli=1;pli<3;pli++){
- oc_enc_sb_transform_quantize_chroma(_enc,&pipe,
- pli,pipe.sbi0[pli],pipe.sbi_end[pli]);
- oc_enc_pipeline_finish_mcu_plane(_enc,&pipe,pli,notstart,notdone);
- }
- notstart=1;
- }
- /*Finish filling in the reference frame borders.*/
- refi=_enc->state.ref_frame_idx[OC_FRAME_SELF];
- for(pli=0;pli<3;pli++)oc_state_borders_fill_caps(&_enc->state,refi,pli);
- /*Finish adding flagging overhead costs to inter bit counts to determine if
- we should have coded a key frame instead.*/
- if(_allow_keyframe){
- if(interbits>intrabits)return 1;
- /*Technically the chroma plane counts are over-estimations, because they
- don't account for continuing runs from the luma planes, but the
- inaccuracy is small.*/
- for(pli=0;pli<3;pli++)interbits+=pipe.fr[pli].bits<<OC_BIT_SCALE;
- interbits+=OC_MINI(_enc->mv_bits[0],_enc->mv_bits[1])<<OC_BIT_SCALE;
- interbits+=
- _enc->chooser.scheme_bits[_enc->chooser.scheme_list[0]]<<OC_BIT_SCALE;
- if(interbits>intrabits)return 1;
- }
- _enc->ncoded_mbis=ncoded_mbis;
- /*Compact the coded fragment list.*/
- {
- ptrdiff_t ncoded_fragis;
- ncoded_fragis=_enc->state.ncoded_fragis[0];
- for(pli=1;pli<3;pli++){
- memmove(_enc->state.coded_fragis+ncoded_fragis,
- _enc->state.coded_fragis+_enc->state.fplanes[pli].froffset,
- _enc->state.ncoded_fragis[pli]*sizeof(*_enc->state.coded_fragis));
- ncoded_fragis+=_enc->state.ncoded_fragis[pli];
- }
- _enc->state.ntotal_coded_fragis=ncoded_fragis;
- }
- return 0;
- }
- #if defined(OC_COLLECT_METRICS)
- # include <stdio.h>
- # include <math.h>
- /*TODO: It may be helpful (for block-level quantizers especially) to separate
- out the contributions from AC and DC into separate tables.*/
- # define OC_ZWEIGHT (0.25)
- static void oc_mode_metrics_add(oc_mode_metrics *_metrics,
- double _w,int _satd,int _rate,double _rmse){
- double rate;
- /*Accumulate statistics without the scaling; this lets us change the scale
- factor yet still use old data.*/
- rate=ldexp(_rate,-OC_BIT_SCALE);
- if(_metrics->fragw>0){
- double dsatd;
- double drate;
- double drmse;
- double w;
- dsatd=_satd-_metrics->satd/_metrics->fragw;
- drate=rate-_metrics->rate/_metrics->fragw;
- drmse=_rmse-_metrics->rmse/_metrics->fragw;
- w=_metrics->fragw*_w/(_metrics->fragw+_w);
- _metrics->satd2+=dsatd*dsatd*w;
- _metrics->satdrate+=dsatd*drate*w;
- _metrics->rate2+=drate*drate*w;
- _metrics->satdrmse+=dsatd*drmse*w;
- _metrics->rmse2+=drmse*drmse*w;
- }
- _metrics->fragw+=_w;
- _metrics->satd+=_satd*_w;
- _metrics->rate+=rate*_w;
- _metrics->rmse+=_rmse*_w;
- }
- static void oc_mode_metrics_merge(oc_mode_metrics *_dst,
- const oc_mode_metrics *_src,int _n){
- int i;
- /*Find a non-empty set of metrics.*/
- for(i=0;i<_n&&_src[i].fragw<=0;i++);
- if(i>=_n){
- memset(_dst,0,sizeof(*_dst));
- return;
- }
- memcpy(_dst,_src+i,sizeof(*_dst));
- /*And iterate over the remaining non-empty sets of metrics.*/
- for(i++;i<_n;i++)if(_src[i].fragw>0){
- double wa;
- double wb;
- double dsatd;
- double drate;
- double drmse;
- double w;
- wa=_dst->fragw;
- wb=_src[i].fragw;
- dsatd=_src[i].satd/wb-_dst->satd/wa;
- drate=_src[i].rate/wb-_dst->rate/wa;
- drmse=_src[i].rmse/wb-_dst->rmse/wa;
- w=wa*wb/(wa+wb);
- _dst->fragw+=_src[i].fragw;
- _dst->satd+=_src[i].satd;
- _dst->rate+=_src[i].rate;
- _dst->rmse+=_src[i].rmse;
- _dst->satd2+=_src[i].satd2+dsatd*dsatd*w;
- _dst->satdrate+=_src[i].satdrate+dsatd*drate*w;
- _dst->rate2+=_src[i].rate2+drate*drate*w;
- _dst->satdrmse+=_src[i].satdrmse+dsatd*drmse*w;
- _dst->rmse2+=_src[i].rmse2+drmse*drmse*w;
- }
- }
- /*Compile collected SATD/rate/RMSE metrics into a form that's immediately
- useful for mode decision.*/
- static void oc_enc_mode_metrics_update(oc_enc_ctx *_enc,int _qi){
- int pli;
- int qti;
- oc_restore_fpu(&_enc->state);
- /*Convert raw collected data into cleaned up sample points.*/
- for(pli=0;pli<3;pli++){
- for(qti=0;qti<2;qti++){
- double fragw;
- int bin0;
- int bin1;
- int bin;
- fragw=0;
- bin0=bin1=0;
- for(bin=0;bin<OC_SAD_BINS;bin++){
- oc_mode_metrics metrics;
- OC_MODE_RD[_qi][pli][qti][bin].rate=0;
- OC_MODE_RD[_qi][pli][qti][bin].rmse=0;
- /*Find some points on either side of the current bin.*/
- while((bin1<bin+1||fragw<OC_ZWEIGHT)&&bin1<OC_SAD_BINS-1){
- fragw+=OC_MODE_METRICS[_qi][pli][qti][bin1++].fragw;
- }
- while(bin0+1<bin&&bin0+1<bin1&&
- fragw-OC_MODE_METRICS[_qi][pli][qti][bin0].fragw>=OC_ZWEIGHT){
- fragw-=OC_MODE_METRICS[_qi][pli][qti][bin0++].fragw;
- }
- /*Merge statistics and fit lines.*/
- oc_mode_metrics_merge(&metrics,
- OC_MODE_METRICS[_qi][pli][qti]+bin0,bin1-bin0);
- if(metrics.fragw>0&&metrics.satd2>0){
- double a;
- double b;
- double msatd;
- double mrate;
- double mrmse;
- double rate;
- double rmse;
- msatd=metrics.satd/metrics.fragw;
- mrate=metrics.rate/metrics.fragw;
- mrmse=metrics.rmse/metrics.fragw;
- /*Compute the points on these lines corresponding to the actual bin
- value.*/
- b=metrics.satdrate/metrics.satd2;
- a=mrate-b*msatd;
- rate=ldexp(a+b*(bin<<OC_SAD_SHIFT),OC_BIT_SCALE);
- OC_MODE_RD[_qi][pli][qti][bin].rate=
- (ogg_int16_t)OC_CLAMPI(-32768,(int)(rate+0.5),32767);
- b=metrics.satdrmse/metrics.satd2;
- a=mrmse-b*msatd;
- rmse=ldexp(a+b*(bin<<OC_SAD_SHIFT),OC_RMSE_SCALE);
- OC_MODE_RD[_qi][pli][qti][bin].rmse=
- (ogg_int16_t)OC_CLAMPI(-32768,(int)(rmse+0.5),32767);
- }
- }
- }
- }
- }
- /*The following token skipping code used to also be used in the decoder (and
- even at one point other places in the encoder).
- However, it was obsoleted by other optimizations, and is now only used here.
- It has been moved here to avoid generating the code when it's not needed.*/
- /*Determines the number of blocks or coefficients to be skipped for a given
- token value.
- _token: The token value to skip.
- _extra_bits: The extra bits attached to this token.
- Return: A positive value indicates that number of coefficients are to be
- skipped in the current block.
- Otherwise, the negative of the return value indicates that number of
- blocks are to be ended.*/
- typedef ptrdiff_t (*oc_token_skip_func)(int _token,int _extra_bits);
- /*Handles the simple end of block tokens.*/
- static ptrdiff_t oc_token_skip_eob(int _token,int _extra_bits){
- int nblocks_adjust;
- nblocks_adjust=OC_UNIBBLE_TABLE32(0,1,2,3,7,15,0,0,_token)+1;
- return -_extra_bits-nblocks_adjust;
- }
- /*The last EOB token has a special case, where an EOB run of size zero ends all
- the remaining blocks in the frame.*/
- static ptrdiff_t oc_token_skip_eob6(int _token,int _extra_bits){
- /*Note: We want to return -PTRDIFF_MAX, but that requires C99, which is not
- yet available everywhere; this should be equivalent.*/
- if(!_extra_bits)return -(~(size_t)0>>1);
- return -_extra_bits;
- }
- /*Handles the pure zero run tokens.*/
- static ptrdiff_t oc_token_skip_zrl(int _token,int _extra_bits){
- return _extra_bits+1;
- }
- /*Handles a normal coefficient value token.*/
- static ptrdiff_t oc_token_skip_val(void){
- return 1;
- }
- /*Handles a category 1A zero run/coefficient value combo token.*/
- static ptrdiff_t oc_token_skip_run_cat1a(int _token){
- return _token-OC_DCT_RUN_CAT1A+2;
- }
- /*Handles category 1b, 1c, 2a, and 2b zero run/coefficient value combo tokens.*/
- static ptrdiff_t oc_token_skip_run(int _token,int _extra_bits){
- int run_cati;
- int ncoeffs_mask;
- int ncoeffs_adjust;
- run_cati=_token-OC_DCT_RUN_CAT1B;
- ncoeffs_mask=OC_BYTE_TABLE32(3,7,0,1,run_cati);
- ncoeffs_adjust=OC_BYTE_TABLE32(7,11,2,3,run_cati);
- return (_extra_bits&ncoeffs_mask)+ncoeffs_adjust;
- }
- /*A jump table for computing the number of coefficients or blocks to skip for
- a given token value.
- This reduces all the conditional branches, etc., needed to parse these token
- values down to one indirect jump.*/
- static const oc_token_skip_func OC_TOKEN_SKIP_TABLE[TH_NDCT_TOKENS]={
- oc_token_skip_eob,
- oc_token_skip_eob,
- oc_token_skip_eob,
- oc_token_skip_eob,
- oc_token_skip_eob,
- oc_token_skip_eob,
- oc_token_skip_eob6,
- oc_token_skip_zrl,
- oc_token_skip_zrl,
- (oc_token_skip_func)oc_token_skip_val,
- (oc_token_skip_func)oc_token_skip_val,
- (oc_token_skip_func)oc_token_skip_val,
- (oc_token_skip_func)oc_token_skip_val,
- (oc_token_skip_func)oc_token_skip_val,
- (oc_token_skip_func)oc_token_skip_val,
- (oc_token_skip_func)oc_token_skip_val,
- (oc_token_skip_func)oc_token_skip_val,
- (oc_token_skip_func)oc_token_skip_val,
- (oc_token_skip_func)oc_token_skip_val,
- (oc_token_skip_func)oc_token_skip_val,
- (oc_token_skip_func)oc_token_skip_val,
- (oc_token_skip_func)oc_token_skip_val,
- (oc_token_skip_func)oc_token_skip_val,
- (oc_token_skip_func)oc_token_skip_run_cat1a,
- (oc_token_skip_func)oc_token_skip_run_cat1a,
- (oc_token_skip_func)oc_token_skip_run_cat1a,
- (oc_token_skip_func)oc_token_skip_run_cat1a,
- (oc_token_skip_func)oc_token_skip_run_cat1a,
- oc_token_skip_run,
- oc_token_skip_run,
- oc_token_skip_run,
- oc_token_skip_run
- };
- /*Determines the number of blocks or coefficients to be skipped for a given
- token value.
- _token: The token value to skip.
- _extra_bits: The extra bits attached to this token.
- Return: A positive value indicates that number of coefficients are to be
- skipped in the current block.
- Otherwise, the negative of the return value indicates that number of
- blocks are to be ended.
- 0 will never be returned, so that at least one coefficient in one
- block will always be decoded for every token.*/
- static ptrdiff_t oc_dct_token_skip(int _token,int _extra_bits){
- return (*OC_TOKEN_SKIP_TABLE[_token])(_token,_extra_bits);
- }
- void oc_enc_mode_metrics_collect(oc_enc_ctx *_enc){
- static const unsigned char OC_ZZI_HUFF_OFFSET[64]={
- 0,16,16,16,16,16,32,32,
- 32,32,32,32,32,32,32,48,
- 48,48,48,48,48,48,48,48,
- 48,48,48,48,64,64,64,64,
- 64,64,64,64,64,64,64,64,
- 64,64,64,64,64,64,64,64,
- 64,64,64,64,64,64,64,64
- };
- const oc_fragment *frags;
- const unsigned *frag_satd;
- const unsigned *frag_ssd;
- const ptrdiff_t *coded_fragis;
- ptrdiff_t ncoded_fragis;
- ptrdiff_t fragii;
- double fragw;
- int qti;
- int qii;
- int qi;
- int pli;
- int zzi;
- int token;
- int eb;
- oc_restore_fpu(&_enc->state);
- /*Load any existing mode metrics if we haven't already.*/
- if(!oc_has_mode_metrics){
- FILE *fmetrics;
- memset(OC_MODE_METRICS,0,sizeof(OC_MODE_METRICS));
- fmetrics=fopen("modedec.stats","rb");
- if(fmetrics!=NULL){
- fread(OC_MODE_METRICS,sizeof(OC_MODE_METRICS),1,fmetrics);
- fclose(fmetrics);
- }
- for(qi=0;qi<64;qi++)oc_enc_mode_metrics_update(_enc,qi);
- oc_has_mode_metrics=1;
- }
- qti=_enc->state.frame_type;
- frags=_enc->state.frags;
- frag_satd=_enc->frag_satd;
- frag_ssd=_enc->frag_ssd;
- coded_fragis=_enc->state.coded_fragis;
- ncoded_fragis=fragii=0;
- /*Weight the fragments by the inverse frame size; this prevents HD content
- from dominating the statistics.*/
- fragw=1.0/_enc->state.nfrags;
- for(pli=0;pli<3;pli++){
- ptrdiff_t ti[64];
- int eob_token[64];
- int eob_run[64];
- /*Set up token indices and eob run counts.
- We don't bother trying to figure out the real cost of the runs that span
- coefficients; instead we use the costs that were available when R-D
- token optimization was done.*/
- for(zzi=0;zzi<64;zzi++){
- ti[zzi]=_enc->dct_token_offs[pli][zzi];
- if(ti[zzi]>0){
- token=_enc->dct_tokens[pli][zzi][0];
- eb=_enc->extra_bits[pli][zzi][0];
- eob_token[zzi]=token;
- eob_run[zzi]=-oc_dct_token_skip(token,eb);
- }
- else{
- eob_token[zzi]=OC_NDCT_EOB_TOKEN_MAX;
- eob_run[zzi]=0;
- }
- }
- /*Scan the list of coded fragments for this plane.*/
- ncoded_fragis+=_enc->state.ncoded_fragis[pli];
- for(;fragii<ncoded_fragis;fragii++){
- ptrdiff_t fragi;
- ogg_uint32_t frag_bits;
- int huffi;
- int skip;
- int mb_mode;
- unsigned satd;
- int bin;
- fragi=coded_fragis[fragii];
- frag_bits=0;
- for(zzi=0;zzi<64;){
- if(eob_run[zzi]>0){
- /*We've reached the end of the block.*/
- eob_run[zzi]--;
- break;
- }
- huffi=_enc->huff_idxs[qti][zzi>0][pli+1>>1]
- +OC_ZZI_HUFF_OFFSET[zzi];
- if(eob_token[zzi]<OC_NDCT_EOB_TOKEN_MAX){
- /*This token caused an EOB run to be flushed.
- Therefore it gets the bits associated with it.*/
- frag_bits+=_enc->huff_codes[huffi][eob_token[zzi]].nbits
- +OC_DCT_TOKEN_EXTRA_BITS[eob_token[zzi]];
- eob_token[zzi]=OC_NDCT_EOB_TOKEN_MAX;
- }
- token=_enc->dct_tokens[pli][zzi][ti[zzi]];
- eb=_enc->extra_bits[pli][zzi][ti[zzi]];
- ti[zzi]++;
- skip=oc_dct_token_skip(token,eb);
- if(skip<0){
- eob_token[zzi]=token;
- eob_run[zzi]=-skip;
- }
- else{
- /*A regular DCT value token; accumulate the bits for it.*/
- frag_bits+=_enc->huff_codes[huffi][token].nbits
- +OC_DCT_TOKEN_EXTRA_BITS[token];
- zzi+=skip;
- }
- }
- mb_mode=frags[fragi].mb_mode;
- qi=_enc->state.qis[frags[fragi].qii];
- satd=frag_satd[fragi]<<(pli+1&2);
- bin=OC_MINI(satd>>OC_SAD_SHIFT,OC_SAD_BINS-1);
- oc_mode_metrics_add(OC_MODE_METRICS[qi][pli][mb_mode!=OC_MODE_INTRA]+bin,
- fragw,satd,frag_bits<<OC_BIT_SCALE,sqrt(frag_ssd[fragi]));
- }
- }
- /*Update global SATD/rate/RMSE estimation matrix.*/
- for(qii=0;qii<_enc->state.nqis;qii++){
- oc_enc_mode_metrics_update(_enc,_enc->state.qis[qii]);
- }
- }
- void oc_enc_mode_metrics_dump(oc_enc_ctx *_enc){
- FILE *fmetrics;
- int qi;
- /*Generate sample points for complete list of QI values.*/
- for(qi=0;qi<64;qi++)oc_enc_mode_metrics_update(_enc,qi);
- fmetrics=fopen("modedec.stats","wb");
- if(fmetrics!=NULL){
- fwrite(OC_MODE_METRICS,sizeof(OC_MODE_METRICS),1,fmetrics);
- fclose(fmetrics);
- }
- fprintf(stdout,
- "/*File generated by libtheora with OC_COLLECT_METRICS"
- " defined at compile time.*/\n"
- "#if !defined(_modedec_H)\n"
- "# define _modedec_H (1)\n"
- "\n"
- "\n"
- "\n"
- "# if defined(OC_COLLECT_METRICS)\n"
- "typedef struct oc_mode_metrics oc_mode_metrics;\n"
- "# endif\n"
- "typedef struct oc_mode_rd oc_mode_rd;\n"
- "\n"
- "\n"
- "\n"
- "/*The number of extra bits of precision at which to store rate"
- " metrics.*/\n"
- "# define OC_BIT_SCALE (%i)\n"
- "/*The number of extra bits of precision at which to store RMSE metrics.\n"
- " This must be at least half OC_BIT_SCALE (rounded up).*/\n"
- "# define OC_RMSE_SCALE (%i)\n"
- "/*The number of bins to partition statistics into.*/\n"
- "# define OC_SAD_BINS (%i)\n"
- "/*The number of bits of precision to drop"
- " from SAD scores to assign them to a\n"
- " bin.*/\n"
- "# define OC_SAD_SHIFT (%i)\n"
- "\n"
- "\n"
- "\n"
- "# if defined(OC_COLLECT_METRICS)\n"
- "struct oc_mode_metrics{\n"
- " double fragw;\n"
- " double satd;\n"
- " double rate;\n"
- " double rmse;\n"
- " double satd2;\n"
- " double satdrate;\n"
- " double rate2;\n"
- " double satdrmse;\n"
- " double rmse2;\n"
- "};\n"
- "\n"
- "\n"
- "int oc_has_mode_metrics;\n"
- "oc_mode_metrics OC_MODE_METRICS[64][3][2][OC_SAD_BINS];\n"
- "# endif\n"
- "\n"
- "\n"
- "\n"
- "struct oc_mode_rd{\n"
- " ogg_int16_t rate;\n"
- " ogg_int16_t rmse;\n"
- "};\n"
- "\n"
- "\n"
- "# if !defined(OC_COLLECT_METRICS)\n"
- "static const\n"
- "# endif\n"
- "oc_mode_rd OC_MODE_RD[64][3][2][OC_SAD_BINS]={\n",
- OC_BIT_SCALE,OC_RMSE_SCALE,OC_SAD_BINS,OC_SAD_SHIFT);
- for(qi=0;qi<64;qi++){
- int pli;
- fprintf(stdout," {\n");
- for(pli=0;pli<3;pli++){
- int qti;
- fprintf(stdout," {\n");
- for(qti=0;qti<2;qti++){
- int bin;
- static const char *pl_names[3]={"Y'","Cb","Cr"};
- static const char *qti_names[2]={"INTRA","INTER"};
- fprintf(stdout," /*%s qi=%i %s*/\n",
- pl_names[pli],qi,qti_names[qti]);
- fprintf(stdout," {\n");
- fprintf(stdout," ");
- for(bin=0;bin<OC_SAD_BINS;bin++){
- if(bin&&!(bin&0x3))fprintf(stdout,"\n ");
- fprintf(stdout,"{%5i,%5i}",
- OC_MODE_RD[qi][pli][qti][bin].rate,
- OC_MODE_RD[qi][pli][qti][bin].rmse);
- if(bin+1<OC_SAD_BINS)fprintf(stdout,",");
- }
- fprintf(stdout,"\n }");
- if(qti<1)fprintf(stdout,",");
- fprintf(stdout,"\n");
- }
- fprintf(stdout," }");
- if(pli<2)fprintf(stdout,",");
- fprintf(stdout,"\n");
- }
- fprintf(stdout," }");
- if(qi<63)fprintf(stdout,",");
- fprintf(stdout,"\n");
- }
- fprintf(stdout,
- "};\n"
- "\n"
- "#endif\n");
- }
- #endif
|