vp9_mcomp.c 85 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506150715081509151015111512151315141515151615171518151915201521152215231524152515261527152815291530153115321533153415351536153715381539154015411542154315441545154615471548154915501551155215531554155515561557155815591560156115621563156415651566156715681569157015711572157315741575157615771578157915801581158215831584158515861587158815891590159115921593159415951596159715981599160016011602160316041605160616071608160916101611161216131614161516161617161816191620162116221623162416251626162716281629163016311632163316341635163616371638163916401641164216431644164516461647164816491650165116521653165416551656165716581659166016611662166316641665166616671668166916701671167216731674167516761677167816791680168116821683168416851686168716881689169016911692169316941695169616971698169917001701170217031704170517061707170817091710171117121713171417151716171717181719172017211722172317241725172617271728172917301731173217331734173517361737173817391740174117421743174417451746174717481749175017511752175317541755175617571758175917601761176217631764176517661767176817691770177117721773177417751776177717781779178017811782178317841785178617871788178917901791179217931794179517961797179817991800180118021803180418051806180718081809181018111812181318141815181618171818181918201821182218231824182518261827182818291830183118321833183418351836183718381839184018411842184318441845184618471848184918501851185218531854185518561857185818591860186118621863186418651866186718681869187018711872187318741875187618771878187918801881188218831884188518861887188818891890189118921893189418951896189718981899190019011902190319041905190619071908190919101911191219131914191519161917191819191920192119221923192419251926192719281929193019311932193319341935193619371938193919401941194219431944194519461947194819491950195119521953195419551956195719581959196019611962196319641965196619671968196919701971197219731974197519761977197819791980198119821983198419851986198719881989199019911992199319941995199619971998199920002001200220032004200520062007200820092010201120122013201420152016201720182019202020212022202320242025202620272028202920302031203220332034203520362037203820392040204120422043204420452046204720482049205020512052205320542055205620572058205920602061206220632064206520662067206820692070207120722073207420752076207720782079208020812082208320842085208620872088208920902091209220932094209520962097209820992100210121022103210421052106210721082109211021112112211321142115211621172118211921202121212221232124212521262127212821292130213121322133213421352136213721382139214021412142214321442145214621472148214921502151215221532154215521562157215821592160216121622163216421652166216721682169217021712172217321742175217621772178217921802181218221832184218521862187218821892190219121922193219421952196219721982199220022012202220322042205220622072208220922102211221222132214221522162217221822192220222122222223222422252226222722282229223022312232223322342235223622372238223922402241224222432244224522462247224822492250225122522253225422552256225722582259226022612262226322642265226622672268226922702271227222732274227522762277227822792280228122822283228422852286228722882289229022912292229322942295229622972298229923002301230223032304230523062307230823092310231123122313231423152316231723182319232023212322232323242325232623272328232923302331233223332334233523362337233823392340234123422343234423452346234723482349235023512352235323542355235623572358
  1. /*
  2. * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
  3. *
  4. * Use of this source code is governed by a BSD-style license
  5. * that can be found in the LICENSE file in the root of the source
  6. * tree. An additional intellectual property rights grant can be found
  7. * in the file PATENTS. All contributing project authors may
  8. * be found in the AUTHORS file in the root of the source tree.
  9. */
  10. #include <limits.h>
  11. #include <math.h>
  12. #include <stdio.h>
  13. #include "./vpx_config.h"
  14. #include "./vpx_dsp_rtcd.h"
  15. #include "vpx_mem/vpx_mem.h"
  16. #include "vpx_ports/mem.h"
  17. #include "vp9/common/vp9_common.h"
  18. #include "vp9/common/vp9_reconinter.h"
  19. #include "vp9/encoder/vp9_encoder.h"
  20. #include "vp9/encoder/vp9_mcomp.h"
  21. // #define NEW_DIAMOND_SEARCH
  22. static INLINE const uint8_t *get_buf_from_mv(const struct buf_2d *buf,
  23. const MV *mv) {
  24. return &buf->buf[mv->row * buf->stride + mv->col];
  25. }
  26. void vp9_set_mv_search_range(MACROBLOCK *x, const MV *mv) {
  27. int col_min = (mv->col >> 3) - MAX_FULL_PEL_VAL + (mv->col & 7 ? 1 : 0);
  28. int row_min = (mv->row >> 3) - MAX_FULL_PEL_VAL + (mv->row & 7 ? 1 : 0);
  29. int col_max = (mv->col >> 3) + MAX_FULL_PEL_VAL;
  30. int row_max = (mv->row >> 3) + MAX_FULL_PEL_VAL;
  31. col_min = MAX(col_min, (MV_LOW >> 3) + 1);
  32. row_min = MAX(row_min, (MV_LOW >> 3) + 1);
  33. col_max = MIN(col_max, (MV_UPP >> 3) - 1);
  34. row_max = MIN(row_max, (MV_UPP >> 3) - 1);
  35. // Get intersection of UMV window and valid MV window to reduce # of checks
  36. // in diamond search.
  37. if (x->mv_col_min < col_min)
  38. x->mv_col_min = col_min;
  39. if (x->mv_col_max > col_max)
  40. x->mv_col_max = col_max;
  41. if (x->mv_row_min < row_min)
  42. x->mv_row_min = row_min;
  43. if (x->mv_row_max > row_max)
  44. x->mv_row_max = row_max;
  45. }
  46. int vp9_init_search_range(int size) {
  47. int sr = 0;
  48. // Minimum search size no matter what the passed in value.
  49. size = MAX(16, size);
  50. while ((size << sr) < MAX_FULL_PEL_VAL)
  51. sr++;
  52. sr = MIN(sr, MAX_MVSEARCH_STEPS - 2);
  53. return sr;
  54. }
  55. static INLINE int mv_cost(const MV *mv,
  56. const int *joint_cost, int *const comp_cost[2]) {
  57. return joint_cost[vp9_get_mv_joint(mv)] +
  58. comp_cost[0][mv->row] + comp_cost[1][mv->col];
  59. }
  60. int vp9_mv_bit_cost(const MV *mv, const MV *ref,
  61. const int *mvjcost, int *mvcost[2], int weight) {
  62. const MV diff = { mv->row - ref->row,
  63. mv->col - ref->col };
  64. return ROUND_POWER_OF_TWO(mv_cost(&diff, mvjcost, mvcost) * weight, 7);
  65. }
  66. static int mv_err_cost(const MV *mv, const MV *ref,
  67. const int *mvjcost, int *mvcost[2],
  68. int error_per_bit) {
  69. if (mvcost) {
  70. const MV diff = { mv->row - ref->row,
  71. mv->col - ref->col };
  72. return ROUND_POWER_OF_TWO(mv_cost(&diff, mvjcost, mvcost) *
  73. error_per_bit, 13);
  74. }
  75. return 0;
  76. }
  77. static int mvsad_err_cost(const MACROBLOCK *x, const MV *mv, const MV *ref,
  78. int error_per_bit) {
  79. const MV diff = { mv->row - ref->row,
  80. mv->col - ref->col };
  81. return ROUND_POWER_OF_TWO(mv_cost(&diff, x->nmvjointsadcost,
  82. x->nmvsadcost) * error_per_bit, 8);
  83. }
  84. void vp9_init_dsmotion_compensation(search_site_config *cfg, int stride) {
  85. int len, ss_count = 1;
  86. cfg->ss[0].mv.col = cfg->ss[0].mv.row = 0;
  87. cfg->ss[0].offset = 0;
  88. for (len = MAX_FIRST_STEP; len > 0; len /= 2) {
  89. // Generate offsets for 4 search sites per step.
  90. const MV ss_mvs[] = {{-len, 0}, {len, 0}, {0, -len}, {0, len}};
  91. int i;
  92. for (i = 0; i < 4; ++i) {
  93. search_site *const ss = &cfg->ss[ss_count++];
  94. ss->mv = ss_mvs[i];
  95. ss->offset = ss->mv.row * stride + ss->mv.col;
  96. }
  97. }
  98. cfg->ss_count = ss_count;
  99. cfg->searches_per_step = 4;
  100. }
  101. void vp9_init3smotion_compensation(search_site_config *cfg, int stride) {
  102. int len, ss_count = 1;
  103. cfg->ss[0].mv.col = cfg->ss[0].mv.row = 0;
  104. cfg->ss[0].offset = 0;
  105. for (len = MAX_FIRST_STEP; len > 0; len /= 2) {
  106. // Generate offsets for 8 search sites per step.
  107. const MV ss_mvs[8] = {
  108. {-len, 0 }, {len, 0 }, { 0, -len}, {0, len},
  109. {-len, -len}, {-len, len}, {len, -len}, {len, len}
  110. };
  111. int i;
  112. for (i = 0; i < 8; ++i) {
  113. search_site *const ss = &cfg->ss[ss_count++];
  114. ss->mv = ss_mvs[i];
  115. ss->offset = ss->mv.row * stride + ss->mv.col;
  116. }
  117. }
  118. cfg->ss_count = ss_count;
  119. cfg->searches_per_step = 8;
  120. }
  121. /*
  122. * To avoid the penalty for crossing cache-line read, preload the reference
  123. * area in a small buffer, which is aligned to make sure there won't be crossing
  124. * cache-line read while reading from this buffer. This reduced the cpu
  125. * cycles spent on reading ref data in sub-pixel filter functions.
  126. * TODO: Currently, since sub-pixel search range here is -3 ~ 3, copy 22 rows x
  127. * 32 cols area that is enough for 16x16 macroblock. Later, for SPLITMV, we
  128. * could reduce the area.
  129. */
  130. /* estimated cost of a motion vector (r,c) */
  131. #define MVC(r, c) \
  132. (mvcost ? \
  133. ((mvjcost[((r) != rr) * 2 + ((c) != rc)] + \
  134. mvcost[0][((r) - rr)] + mvcost[1][((c) - rc)]) * \
  135. error_per_bit + 4096) >> 13 : 0)
  136. // convert motion vector component to offset for sv[a]f calc
  137. static INLINE int sp(int x) {
  138. return x & 7;
  139. }
  140. static INLINE const uint8_t *pre(const uint8_t *buf, int stride, int r, int c) {
  141. return &buf[(r >> 3) * stride + (c >> 3)];
  142. }
  143. /* checks if (r, c) has better score than previous best */
  144. #define CHECK_BETTER(v, r, c) \
  145. if (c >= minc && c <= maxc && r >= minr && r <= maxr) { \
  146. if (second_pred == NULL) \
  147. thismse = vfp->svf(pre(y, y_stride, r, c), y_stride, sp(c), sp(r), z, \
  148. src_stride, &sse); \
  149. else \
  150. thismse = vfp->svaf(pre(y, y_stride, r, c), y_stride, sp(c), sp(r), \
  151. z, src_stride, &sse, second_pred); \
  152. if ((v = MVC(r, c) + thismse) < besterr) { \
  153. besterr = v; \
  154. br = r; \
  155. bc = c; \
  156. *distortion = thismse; \
  157. *sse1 = sse; \
  158. } \
  159. } else { \
  160. v = INT_MAX; \
  161. }
  162. #define FIRST_LEVEL_CHECKS \
  163. { \
  164. unsigned int left, right, up, down, diag; \
  165. CHECK_BETTER(left, tr, tc - hstep); \
  166. CHECK_BETTER(right, tr, tc + hstep); \
  167. CHECK_BETTER(up, tr - hstep, tc); \
  168. CHECK_BETTER(down, tr + hstep, tc); \
  169. whichdir = (left < right ? 0 : 1) + \
  170. (up < down ? 0 : 2); \
  171. switch (whichdir) { \
  172. case 0: \
  173. CHECK_BETTER(diag, tr - hstep, tc - hstep); \
  174. break; \
  175. case 1: \
  176. CHECK_BETTER(diag, tr - hstep, tc + hstep); \
  177. break; \
  178. case 2: \
  179. CHECK_BETTER(diag, tr + hstep, tc - hstep); \
  180. break; \
  181. case 3: \
  182. CHECK_BETTER(diag, tr + hstep, tc + hstep); \
  183. break; \
  184. } \
  185. }
  186. #define SECOND_LEVEL_CHECKS \
  187. { \
  188. int kr, kc; \
  189. unsigned int second; \
  190. if (tr != br && tc != bc) { \
  191. kr = br - tr; \
  192. kc = bc - tc; \
  193. CHECK_BETTER(second, tr + kr, tc + 2 * kc); \
  194. CHECK_BETTER(second, tr + 2 * kr, tc + kc); \
  195. } else if (tr == br && tc != bc) { \
  196. kc = bc - tc; \
  197. CHECK_BETTER(second, tr + hstep, tc + 2 * kc); \
  198. CHECK_BETTER(second, tr - hstep, tc + 2 * kc); \
  199. switch (whichdir) { \
  200. case 0: \
  201. case 1: \
  202. CHECK_BETTER(second, tr + hstep, tc + kc); \
  203. break; \
  204. case 2: \
  205. case 3: \
  206. CHECK_BETTER(second, tr - hstep, tc + kc); \
  207. break; \
  208. } \
  209. } else if (tr != br && tc == bc) { \
  210. kr = br - tr; \
  211. CHECK_BETTER(second, tr + 2 * kr, tc + hstep); \
  212. CHECK_BETTER(second, tr + 2 * kr, tc - hstep); \
  213. switch (whichdir) { \
  214. case 0: \
  215. case 2: \
  216. CHECK_BETTER(second, tr + kr, tc + hstep); \
  217. break; \
  218. case 1: \
  219. case 3: \
  220. CHECK_BETTER(second, tr + kr, tc - hstep); \
  221. break; \
  222. } \
  223. } \
  224. }
  225. #define SETUP_SUBPEL_SEARCH \
  226. const uint8_t *const z = x->plane[0].src.buf; \
  227. const int src_stride = x->plane[0].src.stride; \
  228. const MACROBLOCKD *xd = &x->e_mbd; \
  229. unsigned int besterr = INT_MAX; \
  230. unsigned int sse; \
  231. unsigned int whichdir; \
  232. int thismse; \
  233. const unsigned int halfiters = iters_per_step; \
  234. const unsigned int quarteriters = iters_per_step; \
  235. const unsigned int eighthiters = iters_per_step; \
  236. const int y_stride = xd->plane[0].pre[0].stride; \
  237. const int offset = bestmv->row * y_stride + bestmv->col; \
  238. const uint8_t *const y = xd->plane[0].pre[0].buf; \
  239. \
  240. int rr = ref_mv->row; \
  241. int rc = ref_mv->col; \
  242. int br = bestmv->row * 8; \
  243. int bc = bestmv->col * 8; \
  244. int hstep = 4; \
  245. const int minc = MAX(x->mv_col_min * 8, ref_mv->col - MV_MAX); \
  246. const int maxc = MIN(x->mv_col_max * 8, ref_mv->col + MV_MAX); \
  247. const int minr = MAX(x->mv_row_min * 8, ref_mv->row - MV_MAX); \
  248. const int maxr = MIN(x->mv_row_max * 8, ref_mv->row + MV_MAX); \
  249. int tr = br; \
  250. int tc = bc; \
  251. \
  252. bestmv->row *= 8; \
  253. bestmv->col *= 8;
  254. static INLINE unsigned int setup_center_error(const MACROBLOCKD *xd,
  255. const MV *bestmv,
  256. const MV *ref_mv,
  257. int error_per_bit,
  258. const vp9_variance_fn_ptr_t *vfp,
  259. const uint8_t *const src,
  260. const int src_stride,
  261. const uint8_t *const y,
  262. int y_stride,
  263. const uint8_t *second_pred,
  264. int w, int h, int offset,
  265. int *mvjcost, int *mvcost[2],
  266. unsigned int *sse1,
  267. int *distortion) {
  268. unsigned int besterr;
  269. #if CONFIG_VP9_HIGHBITDEPTH
  270. if (second_pred != NULL) {
  271. if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
  272. DECLARE_ALIGNED(16, uint16_t, comp_pred16[64 * 64]);
  273. vpx_highbd_comp_avg_pred(comp_pred16, second_pred, w, h, y + offset,
  274. y_stride);
  275. besterr = vfp->vf(CONVERT_TO_BYTEPTR(comp_pred16), w, src, src_stride,
  276. sse1);
  277. } else {
  278. DECLARE_ALIGNED(16, uint8_t, comp_pred[64 * 64]);
  279. vpx_comp_avg_pred(comp_pred, second_pred, w, h, y + offset, y_stride);
  280. besterr = vfp->vf(comp_pred, w, src, src_stride, sse1);
  281. }
  282. } else {
  283. besterr = vfp->vf(y + offset, y_stride, src, src_stride, sse1);
  284. }
  285. *distortion = besterr;
  286. besterr += mv_err_cost(bestmv, ref_mv, mvjcost, mvcost, error_per_bit);
  287. #else
  288. (void) xd;
  289. if (second_pred != NULL) {
  290. DECLARE_ALIGNED(16, uint8_t, comp_pred[64 * 64]);
  291. vpx_comp_avg_pred(comp_pred, second_pred, w, h, y + offset, y_stride);
  292. besterr = vfp->vf(comp_pred, w, src, src_stride, sse1);
  293. } else {
  294. besterr = vfp->vf(y + offset, y_stride, src, src_stride, sse1);
  295. }
  296. *distortion = besterr;
  297. besterr += mv_err_cost(bestmv, ref_mv, mvjcost, mvcost, error_per_bit);
  298. #endif // CONFIG_VP9_HIGHBITDEPTH
  299. return besterr;
  300. }
  301. static INLINE int divide_and_round(const int n, const int d) {
  302. return ((n < 0) ^ (d < 0)) ? ((n - d / 2) / d) : ((n + d / 2) / d);
  303. }
  304. static INLINE int is_cost_list_wellbehaved(int *cost_list) {
  305. return cost_list[0] < cost_list[1] &&
  306. cost_list[0] < cost_list[2] &&
  307. cost_list[0] < cost_list[3] &&
  308. cost_list[0] < cost_list[4];
  309. }
  310. // Returns surface minima estimate at given precision in 1/2^n bits.
  311. // Assume a model for the cost surface: S = A(x - x0)^2 + B(y - y0)^2 + C
  312. // For a given set of costs S0, S1, S2, S3, S4 at points
  313. // (y, x) = (0, 0), (0, -1), (1, 0), (0, 1) and (-1, 0) respectively,
  314. // the solution for the location of the minima (x0, y0) is given by:
  315. // x0 = 1/2 (S1 - S3)/(S1 + S3 - 2*S0),
  316. // y0 = 1/2 (S4 - S2)/(S4 + S2 - 2*S0).
  317. // The code below is an integerized version of that.
  318. static void get_cost_surf_min(int *cost_list, int *ir, int *ic,
  319. int bits) {
  320. *ic = divide_and_round((cost_list[1] - cost_list[3]) * (1 << (bits - 1)),
  321. (cost_list[1] - 2 * cost_list[0] + cost_list[3]));
  322. *ir = divide_and_round((cost_list[4] - cost_list[2]) * (1 << (bits - 1)),
  323. (cost_list[4] - 2 * cost_list[0] + cost_list[2]));
  324. }
  325. int vp9_find_best_sub_pixel_tree_pruned_evenmore(
  326. const MACROBLOCK *x,
  327. MV *bestmv, const MV *ref_mv,
  328. int allow_hp,
  329. int error_per_bit,
  330. const vp9_variance_fn_ptr_t *vfp,
  331. int forced_stop,
  332. int iters_per_step,
  333. int *cost_list,
  334. int *mvjcost, int *mvcost[2],
  335. int *distortion,
  336. unsigned int *sse1,
  337. const uint8_t *second_pred,
  338. int w, int h) {
  339. SETUP_SUBPEL_SEARCH;
  340. besterr = setup_center_error(xd, bestmv, ref_mv, error_per_bit, vfp,
  341. z, src_stride, y, y_stride, second_pred,
  342. w, h, offset, mvjcost, mvcost,
  343. sse1, distortion);
  344. (void) halfiters;
  345. (void) quarteriters;
  346. (void) eighthiters;
  347. (void) whichdir;
  348. (void) allow_hp;
  349. (void) forced_stop;
  350. (void) hstep;
  351. if (cost_list &&
  352. cost_list[0] != INT_MAX && cost_list[1] != INT_MAX &&
  353. cost_list[2] != INT_MAX && cost_list[3] != INT_MAX &&
  354. cost_list[4] != INT_MAX &&
  355. is_cost_list_wellbehaved(cost_list)) {
  356. int ir, ic;
  357. unsigned int minpt;
  358. get_cost_surf_min(cost_list, &ir, &ic, 2);
  359. if (ir != 0 || ic != 0) {
  360. CHECK_BETTER(minpt, tr + 2 * ir, tc + 2 * ic);
  361. }
  362. } else {
  363. FIRST_LEVEL_CHECKS;
  364. if (halfiters > 1) {
  365. SECOND_LEVEL_CHECKS;
  366. }
  367. tr = br;
  368. tc = bc;
  369. // Each subsequent iteration checks at least one point in common with
  370. // the last iteration could be 2 ( if diag selected) 1/4 pel
  371. // Note forced_stop: 0 - full, 1 - qtr only, 2 - half only
  372. if (forced_stop != 2) {
  373. hstep >>= 1;
  374. FIRST_LEVEL_CHECKS;
  375. if (quarteriters > 1) {
  376. SECOND_LEVEL_CHECKS;
  377. }
  378. }
  379. }
  380. tr = br;
  381. tc = bc;
  382. if (allow_hp && vp9_use_mv_hp(ref_mv) && forced_stop == 0) {
  383. hstep >>= 1;
  384. FIRST_LEVEL_CHECKS;
  385. if (eighthiters > 1) {
  386. SECOND_LEVEL_CHECKS;
  387. }
  388. }
  389. bestmv->row = br;
  390. bestmv->col = bc;
  391. if ((abs(bestmv->col - ref_mv->col) > (MAX_FULL_PEL_VAL << 3)) ||
  392. (abs(bestmv->row - ref_mv->row) > (MAX_FULL_PEL_VAL << 3)))
  393. return INT_MAX;
  394. return besterr;
  395. }
  396. int vp9_find_best_sub_pixel_tree_pruned_more(const MACROBLOCK *x,
  397. MV *bestmv, const MV *ref_mv,
  398. int allow_hp,
  399. int error_per_bit,
  400. const vp9_variance_fn_ptr_t *vfp,
  401. int forced_stop,
  402. int iters_per_step,
  403. int *cost_list,
  404. int *mvjcost, int *mvcost[2],
  405. int *distortion,
  406. unsigned int *sse1,
  407. const uint8_t *second_pred,
  408. int w, int h) {
  409. SETUP_SUBPEL_SEARCH;
  410. besterr = setup_center_error(xd, bestmv, ref_mv, error_per_bit, vfp,
  411. z, src_stride, y, y_stride, second_pred,
  412. w, h, offset, mvjcost, mvcost,
  413. sse1, distortion);
  414. if (cost_list &&
  415. cost_list[0] != INT_MAX && cost_list[1] != INT_MAX &&
  416. cost_list[2] != INT_MAX && cost_list[3] != INT_MAX &&
  417. cost_list[4] != INT_MAX &&
  418. is_cost_list_wellbehaved(cost_list)) {
  419. unsigned int minpt;
  420. int ir, ic;
  421. get_cost_surf_min(cost_list, &ir, &ic, 1);
  422. if (ir != 0 || ic != 0) {
  423. CHECK_BETTER(minpt, tr + ir * hstep, tc + ic * hstep);
  424. }
  425. } else {
  426. FIRST_LEVEL_CHECKS;
  427. if (halfiters > 1) {
  428. SECOND_LEVEL_CHECKS;
  429. }
  430. }
  431. // Each subsequent iteration checks at least one point in common with
  432. // the last iteration could be 2 ( if diag selected) 1/4 pel
  433. // Note forced_stop: 0 - full, 1 - qtr only, 2 - half only
  434. if (forced_stop != 2) {
  435. tr = br;
  436. tc = bc;
  437. hstep >>= 1;
  438. FIRST_LEVEL_CHECKS;
  439. if (quarteriters > 1) {
  440. SECOND_LEVEL_CHECKS;
  441. }
  442. }
  443. if (allow_hp && vp9_use_mv_hp(ref_mv) && forced_stop == 0) {
  444. tr = br;
  445. tc = bc;
  446. hstep >>= 1;
  447. FIRST_LEVEL_CHECKS;
  448. if (eighthiters > 1) {
  449. SECOND_LEVEL_CHECKS;
  450. }
  451. }
  452. // These lines insure static analysis doesn't warn that
  453. // tr and tc aren't used after the above point.
  454. (void) tr;
  455. (void) tc;
  456. bestmv->row = br;
  457. bestmv->col = bc;
  458. if ((abs(bestmv->col - ref_mv->col) > (MAX_FULL_PEL_VAL << 3)) ||
  459. (abs(bestmv->row - ref_mv->row) > (MAX_FULL_PEL_VAL << 3)))
  460. return INT_MAX;
  461. return besterr;
  462. }
  463. int vp9_find_best_sub_pixel_tree_pruned(const MACROBLOCK *x,
  464. MV *bestmv, const MV *ref_mv,
  465. int allow_hp,
  466. int error_per_bit,
  467. const vp9_variance_fn_ptr_t *vfp,
  468. int forced_stop,
  469. int iters_per_step,
  470. int *cost_list,
  471. int *mvjcost, int *mvcost[2],
  472. int *distortion,
  473. unsigned int *sse1,
  474. const uint8_t *second_pred,
  475. int w, int h) {
  476. SETUP_SUBPEL_SEARCH;
  477. besterr = setup_center_error(xd, bestmv, ref_mv, error_per_bit, vfp,
  478. z, src_stride, y, y_stride, second_pred,
  479. w, h, offset, mvjcost, mvcost,
  480. sse1, distortion);
  481. if (cost_list &&
  482. cost_list[0] != INT_MAX && cost_list[1] != INT_MAX &&
  483. cost_list[2] != INT_MAX && cost_list[3] != INT_MAX &&
  484. cost_list[4] != INT_MAX) {
  485. unsigned int left, right, up, down, diag;
  486. whichdir = (cost_list[1] < cost_list[3] ? 0 : 1) +
  487. (cost_list[2] < cost_list[4] ? 0 : 2);
  488. switch (whichdir) {
  489. case 0:
  490. CHECK_BETTER(left, tr, tc - hstep);
  491. CHECK_BETTER(down, tr + hstep, tc);
  492. CHECK_BETTER(diag, tr + hstep, tc - hstep);
  493. break;
  494. case 1:
  495. CHECK_BETTER(right, tr, tc + hstep);
  496. CHECK_BETTER(down, tr + hstep, tc);
  497. CHECK_BETTER(diag, tr + hstep, tc + hstep);
  498. break;
  499. case 2:
  500. CHECK_BETTER(left, tr, tc - hstep);
  501. CHECK_BETTER(up, tr - hstep, tc);
  502. CHECK_BETTER(diag, tr - hstep, tc - hstep);
  503. break;
  504. case 3:
  505. CHECK_BETTER(right, tr, tc + hstep);
  506. CHECK_BETTER(up, tr - hstep, tc);
  507. CHECK_BETTER(diag, tr - hstep, tc + hstep);
  508. break;
  509. }
  510. } else {
  511. FIRST_LEVEL_CHECKS;
  512. if (halfiters > 1) {
  513. SECOND_LEVEL_CHECKS;
  514. }
  515. }
  516. tr = br;
  517. tc = bc;
  518. // Each subsequent iteration checks at least one point in common with
  519. // the last iteration could be 2 ( if diag selected) 1/4 pel
  520. // Note forced_stop: 0 - full, 1 - qtr only, 2 - half only
  521. if (forced_stop != 2) {
  522. hstep >>= 1;
  523. FIRST_LEVEL_CHECKS;
  524. if (quarteriters > 1) {
  525. SECOND_LEVEL_CHECKS;
  526. }
  527. tr = br;
  528. tc = bc;
  529. }
  530. if (allow_hp && vp9_use_mv_hp(ref_mv) && forced_stop == 0) {
  531. hstep >>= 1;
  532. FIRST_LEVEL_CHECKS;
  533. if (eighthiters > 1) {
  534. SECOND_LEVEL_CHECKS;
  535. }
  536. tr = br;
  537. tc = bc;
  538. }
  539. // These lines insure static analysis doesn't warn that
  540. // tr and tc aren't used after the above point.
  541. (void) tr;
  542. (void) tc;
  543. bestmv->row = br;
  544. bestmv->col = bc;
  545. if ((abs(bestmv->col - ref_mv->col) > (MAX_FULL_PEL_VAL << 3)) ||
  546. (abs(bestmv->row - ref_mv->row) > (MAX_FULL_PEL_VAL << 3)))
  547. return INT_MAX;
  548. return besterr;
  549. }
  550. const MV search_step_table[12] = {
  551. // left, right, up, down
  552. {0, -4}, {0, 4}, {-4, 0}, {4, 0},
  553. {0, -2}, {0, 2}, {-2, 0}, {2, 0},
  554. {0, -1}, {0, 1}, {-1, 0}, {1, 0}
  555. };
  556. int vp9_find_best_sub_pixel_tree(const MACROBLOCK *x,
  557. MV *bestmv, const MV *ref_mv,
  558. int allow_hp,
  559. int error_per_bit,
  560. const vp9_variance_fn_ptr_t *vfp,
  561. int forced_stop,
  562. int iters_per_step,
  563. int *cost_list,
  564. int *mvjcost, int *mvcost[2],
  565. int *distortion,
  566. unsigned int *sse1,
  567. const uint8_t *second_pred,
  568. int w, int h) {
  569. const uint8_t *const z = x->plane[0].src.buf;
  570. const uint8_t *const src_address = z;
  571. const int src_stride = x->plane[0].src.stride;
  572. const MACROBLOCKD *xd = &x->e_mbd;
  573. unsigned int besterr = INT_MAX;
  574. unsigned int sse;
  575. unsigned int whichdir = 0;
  576. int thismse;
  577. const int y_stride = xd->plane[0].pre[0].stride;
  578. const int offset = bestmv->row * y_stride + bestmv->col;
  579. const uint8_t *const y = xd->plane[0].pre[0].buf;
  580. int rr = ref_mv->row;
  581. int rc = ref_mv->col;
  582. int br = bestmv->row * 8;
  583. int bc = bestmv->col * 8;
  584. int hstep = 4;
  585. int iter, round = 3 - forced_stop;
  586. const int minc = MAX(x->mv_col_min * 8, ref_mv->col - MV_MAX);
  587. const int maxc = MIN(x->mv_col_max * 8, ref_mv->col + MV_MAX);
  588. const int minr = MAX(x->mv_row_min * 8, ref_mv->row - MV_MAX);
  589. const int maxr = MIN(x->mv_row_max * 8, ref_mv->row + MV_MAX);
  590. int tr = br;
  591. int tc = bc;
  592. const MV *search_step = search_step_table;
  593. int idx, best_idx = -1;
  594. unsigned int cost_array[5];
  595. if (!(allow_hp && vp9_use_mv_hp(ref_mv)))
  596. if (round == 3)
  597. round = 2;
  598. bestmv->row *= 8;
  599. bestmv->col *= 8;
  600. besterr = setup_center_error(xd, bestmv, ref_mv, error_per_bit, vfp,
  601. z, src_stride, y, y_stride, second_pred,
  602. w, h, offset, mvjcost, mvcost,
  603. sse1, distortion);
  604. (void) cost_list; // to silence compiler warning
  605. for (iter = 0; iter < round; ++iter) {
  606. // Check vertical and horizontal sub-pixel positions.
  607. for (idx = 0; idx < 4; ++idx) {
  608. tr = br + search_step[idx].row;
  609. tc = bc + search_step[idx].col;
  610. if (tc >= minc && tc <= maxc && tr >= minr && tr <= maxr) {
  611. const uint8_t *const pre_address = y + (tr >> 3) * y_stride + (tc >> 3);
  612. MV this_mv;
  613. this_mv.row = tr;
  614. this_mv.col = tc;
  615. if (second_pred == NULL)
  616. thismse = vfp->svf(pre_address, y_stride, sp(tc), sp(tr),
  617. src_address, src_stride, &sse);
  618. else
  619. thismse = vfp->svaf(pre_address, y_stride, sp(tc), sp(tr),
  620. src_address, src_stride, &sse, second_pred);
  621. cost_array[idx] = thismse +
  622. mv_err_cost(&this_mv, ref_mv, mvjcost, mvcost, error_per_bit);
  623. if (cost_array[idx] < besterr) {
  624. best_idx = idx;
  625. besterr = cost_array[idx];
  626. *distortion = thismse;
  627. *sse1 = sse;
  628. }
  629. } else {
  630. cost_array[idx] = INT_MAX;
  631. }
  632. }
  633. // Check diagonal sub-pixel position
  634. tc = bc + (cost_array[0] < cost_array[1] ? -hstep : hstep);
  635. tr = br + (cost_array[2] < cost_array[3] ? -hstep : hstep);
  636. if (tc >= minc && tc <= maxc && tr >= minr && tr <= maxr) {
  637. const uint8_t *const pre_address = y + (tr >> 3) * y_stride + (tc >> 3);
  638. MV this_mv = {tr, tc};
  639. if (second_pred == NULL)
  640. thismse = vfp->svf(pre_address, y_stride, sp(tc), sp(tr),
  641. src_address, src_stride, &sse);
  642. else
  643. thismse = vfp->svaf(pre_address, y_stride, sp(tc), sp(tr),
  644. src_address, src_stride, &sse, second_pred);
  645. cost_array[4] = thismse +
  646. mv_err_cost(&this_mv, ref_mv, mvjcost, mvcost, error_per_bit);
  647. if (cost_array[4] < besterr) {
  648. best_idx = 4;
  649. besterr = cost_array[4];
  650. *distortion = thismse;
  651. *sse1 = sse;
  652. }
  653. } else {
  654. cost_array[idx] = INT_MAX;
  655. }
  656. if (best_idx < 4 && best_idx >= 0) {
  657. br += search_step[best_idx].row;
  658. bc += search_step[best_idx].col;
  659. } else if (best_idx == 4) {
  660. br = tr;
  661. bc = tc;
  662. }
  663. if (iters_per_step > 1)
  664. SECOND_LEVEL_CHECKS;
  665. tr = br;
  666. tc = bc;
  667. search_step += 4;
  668. hstep >>= 1;
  669. best_idx = -1;
  670. }
  671. // Each subsequent iteration checks at least one point in common with
  672. // the last iteration could be 2 ( if diag selected) 1/4 pel
  673. // These lines insure static analysis doesn't warn that
  674. // tr and tc aren't used after the above point.
  675. (void) tr;
  676. (void) tc;
  677. bestmv->row = br;
  678. bestmv->col = bc;
  679. if ((abs(bestmv->col - ref_mv->col) > (MAX_FULL_PEL_VAL << 3)) ||
  680. (abs(bestmv->row - ref_mv->row) > (MAX_FULL_PEL_VAL << 3)))
  681. return INT_MAX;
  682. return besterr;
  683. }
  684. #undef MVC
  685. #undef PRE
  686. #undef CHECK_BETTER
  687. static INLINE int check_bounds(const MACROBLOCK *x, int row, int col,
  688. int range) {
  689. return ((row - range) >= x->mv_row_min) &
  690. ((row + range) <= x->mv_row_max) &
  691. ((col - range) >= x->mv_col_min) &
  692. ((col + range) <= x->mv_col_max);
  693. }
  694. static INLINE int is_mv_in(const MACROBLOCK *x, const MV *mv) {
  695. return (mv->col >= x->mv_col_min) && (mv->col <= x->mv_col_max) &&
  696. (mv->row >= x->mv_row_min) && (mv->row <= x->mv_row_max);
  697. }
  698. #define CHECK_BETTER \
  699. {\
  700. if (thissad < bestsad) {\
  701. if (use_mvcost) \
  702. thissad += mvsad_err_cost(x, &this_mv, &fcenter_mv, sad_per_bit);\
  703. if (thissad < bestsad) {\
  704. bestsad = thissad;\
  705. best_site = i;\
  706. }\
  707. }\
  708. }
  709. #define MAX_PATTERN_SCALES 11
  710. #define MAX_PATTERN_CANDIDATES 8 // max number of canddiates per scale
  711. #define PATTERN_CANDIDATES_REF 3 // number of refinement candidates
  712. // Calculate and return a sad+mvcost list around an integer best pel.
  713. static INLINE void calc_int_cost_list(const MACROBLOCK *x,
  714. const MV *ref_mv,
  715. int sadpb,
  716. const vp9_variance_fn_ptr_t *fn_ptr,
  717. const MV *best_mv,
  718. int *cost_list) {
  719. static const MV neighbors[4] = {{0, -1}, {1, 0}, {0, 1}, {-1, 0}};
  720. const struct buf_2d *const what = &x->plane[0].src;
  721. const struct buf_2d *const in_what = &x->e_mbd.plane[0].pre[0];
  722. const MV fcenter_mv = {ref_mv->row >> 3, ref_mv->col >> 3};
  723. int br = best_mv->row;
  724. int bc = best_mv->col;
  725. MV this_mv;
  726. int i;
  727. unsigned int sse;
  728. this_mv.row = br;
  729. this_mv.col = bc;
  730. cost_list[0] = fn_ptr->vf(what->buf, what->stride,
  731. get_buf_from_mv(in_what, &this_mv),
  732. in_what->stride, &sse) +
  733. mvsad_err_cost(x, &this_mv, &fcenter_mv, sadpb);
  734. if (check_bounds(x, br, bc, 1)) {
  735. for (i = 0; i < 4; i++) {
  736. const MV this_mv = {br + neighbors[i].row,
  737. bc + neighbors[i].col};
  738. cost_list[i + 1] = fn_ptr->vf(what->buf, what->stride,
  739. get_buf_from_mv(in_what, &this_mv),
  740. in_what->stride, &sse) +
  741. // mvsad_err_cost(x, &this_mv, &fcenter_mv, sadpb);
  742. mv_err_cost(&this_mv, &fcenter_mv, x->nmvjointcost, x->mvcost,
  743. x->errorperbit);
  744. }
  745. } else {
  746. for (i = 0; i < 4; i++) {
  747. const MV this_mv = {br + neighbors[i].row,
  748. bc + neighbors[i].col};
  749. if (!is_mv_in(x, &this_mv))
  750. cost_list[i + 1] = INT_MAX;
  751. else
  752. cost_list[i + 1] = fn_ptr->vf(what->buf, what->stride,
  753. get_buf_from_mv(in_what, &this_mv),
  754. in_what->stride, &sse) +
  755. // mvsad_err_cost(x, &this_mv, &fcenter_mv, sadpb);
  756. mv_err_cost(&this_mv, &fcenter_mv, x->nmvjointcost, x->mvcost,
  757. x->errorperbit);
  758. }
  759. }
  760. }
  761. // Generic pattern search function that searches over multiple scales.
  762. // Each scale can have a different number of candidates and shape of
  763. // candidates as indicated in the num_candidates and candidates arrays
  764. // passed into this function
  765. //
  766. static int vp9_pattern_search(const MACROBLOCK *x,
  767. MV *ref_mv,
  768. int search_param,
  769. int sad_per_bit,
  770. int do_init_search,
  771. int *cost_list,
  772. const vp9_variance_fn_ptr_t *vfp,
  773. int use_mvcost,
  774. const MV *center_mv,
  775. MV *best_mv,
  776. const int num_candidates[MAX_PATTERN_SCALES],
  777. const MV candidates[MAX_PATTERN_SCALES]
  778. [MAX_PATTERN_CANDIDATES]) {
  779. const MACROBLOCKD *const xd = &x->e_mbd;
  780. static const int search_param_to_steps[MAX_MVSEARCH_STEPS] = {
  781. 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
  782. };
  783. int i, s, t;
  784. const struct buf_2d *const what = &x->plane[0].src;
  785. const struct buf_2d *const in_what = &xd->plane[0].pre[0];
  786. int br, bc;
  787. int bestsad = INT_MAX;
  788. int thissad;
  789. int k = -1;
  790. const MV fcenter_mv = {center_mv->row >> 3, center_mv->col >> 3};
  791. int best_init_s = search_param_to_steps[search_param];
  792. // adjust ref_mv to make sure it is within MV range
  793. clamp_mv(ref_mv, x->mv_col_min, x->mv_col_max, x->mv_row_min, x->mv_row_max);
  794. br = ref_mv->row;
  795. bc = ref_mv->col;
  796. // Work out the start point for the search
  797. bestsad = vfp->sdf(what->buf, what->stride,
  798. get_buf_from_mv(in_what, ref_mv), in_what->stride) +
  799. mvsad_err_cost(x, ref_mv, &fcenter_mv, sad_per_bit);
  800. // Search all possible scales upto the search param around the center point
  801. // pick the scale of the point that is best as the starting scale of
  802. // further steps around it.
  803. if (do_init_search) {
  804. s = best_init_s;
  805. best_init_s = -1;
  806. for (t = 0; t <= s; ++t) {
  807. int best_site = -1;
  808. if (check_bounds(x, br, bc, 1 << t)) {
  809. for (i = 0; i < num_candidates[t]; i++) {
  810. const MV this_mv = {br + candidates[t][i].row,
  811. bc + candidates[t][i].col};
  812. thissad = vfp->sdf(what->buf, what->stride,
  813. get_buf_from_mv(in_what, &this_mv),
  814. in_what->stride);
  815. CHECK_BETTER
  816. }
  817. } else {
  818. for (i = 0; i < num_candidates[t]; i++) {
  819. const MV this_mv = {br + candidates[t][i].row,
  820. bc + candidates[t][i].col};
  821. if (!is_mv_in(x, &this_mv))
  822. continue;
  823. thissad = vfp->sdf(what->buf, what->stride,
  824. get_buf_from_mv(in_what, &this_mv),
  825. in_what->stride);
  826. CHECK_BETTER
  827. }
  828. }
  829. if (best_site == -1) {
  830. continue;
  831. } else {
  832. best_init_s = t;
  833. k = best_site;
  834. }
  835. }
  836. if (best_init_s != -1) {
  837. br += candidates[best_init_s][k].row;
  838. bc += candidates[best_init_s][k].col;
  839. }
  840. }
  841. // If the center point is still the best, just skip this and move to
  842. // the refinement step.
  843. if (best_init_s != -1) {
  844. int best_site = -1;
  845. s = best_init_s;
  846. do {
  847. // No need to search all 6 points the 1st time if initial search was used
  848. if (!do_init_search || s != best_init_s) {
  849. if (check_bounds(x, br, bc, 1 << s)) {
  850. for (i = 0; i < num_candidates[s]; i++) {
  851. const MV this_mv = {br + candidates[s][i].row,
  852. bc + candidates[s][i].col};
  853. thissad = vfp->sdf(what->buf, what->stride,
  854. get_buf_from_mv(in_what, &this_mv),
  855. in_what->stride);
  856. CHECK_BETTER
  857. }
  858. } else {
  859. for (i = 0; i < num_candidates[s]; i++) {
  860. const MV this_mv = {br + candidates[s][i].row,
  861. bc + candidates[s][i].col};
  862. if (!is_mv_in(x, &this_mv))
  863. continue;
  864. thissad = vfp->sdf(what->buf, what->stride,
  865. get_buf_from_mv(in_what, &this_mv),
  866. in_what->stride);
  867. CHECK_BETTER
  868. }
  869. }
  870. if (best_site == -1) {
  871. continue;
  872. } else {
  873. br += candidates[s][best_site].row;
  874. bc += candidates[s][best_site].col;
  875. k = best_site;
  876. }
  877. }
  878. do {
  879. int next_chkpts_indices[PATTERN_CANDIDATES_REF];
  880. best_site = -1;
  881. next_chkpts_indices[0] = (k == 0) ? num_candidates[s] - 1 : k - 1;
  882. next_chkpts_indices[1] = k;
  883. next_chkpts_indices[2] = (k == num_candidates[s] - 1) ? 0 : k + 1;
  884. if (check_bounds(x, br, bc, 1 << s)) {
  885. for (i = 0; i < PATTERN_CANDIDATES_REF; i++) {
  886. const MV this_mv = {br + candidates[s][next_chkpts_indices[i]].row,
  887. bc + candidates[s][next_chkpts_indices[i]].col};
  888. thissad = vfp->sdf(what->buf, what->stride,
  889. get_buf_from_mv(in_what, &this_mv),
  890. in_what->stride);
  891. CHECK_BETTER
  892. }
  893. } else {
  894. for (i = 0; i < PATTERN_CANDIDATES_REF; i++) {
  895. const MV this_mv = {br + candidates[s][next_chkpts_indices[i]].row,
  896. bc + candidates[s][next_chkpts_indices[i]].col};
  897. if (!is_mv_in(x, &this_mv))
  898. continue;
  899. thissad = vfp->sdf(what->buf, what->stride,
  900. get_buf_from_mv(in_what, &this_mv),
  901. in_what->stride);
  902. CHECK_BETTER
  903. }
  904. }
  905. if (best_site != -1) {
  906. k = next_chkpts_indices[best_site];
  907. br += candidates[s][k].row;
  908. bc += candidates[s][k].col;
  909. }
  910. } while (best_site != -1);
  911. } while (s--);
  912. }
  913. // Returns the one-away integer pel sad values around the best as follows:
  914. // cost_list[0]: cost at the best integer pel
  915. // cost_list[1]: cost at delta {0, -1} (left) from the best integer pel
  916. // cost_list[2]: cost at delta { 1, 0} (bottom) from the best integer pel
  917. // cost_list[3]: cost at delta { 0, 1} (right) from the best integer pel
  918. // cost_list[4]: cost at delta {-1, 0} (top) from the best integer pel
  919. if (cost_list) {
  920. const MV best_mv = { br, bc };
  921. calc_int_cost_list(x, &fcenter_mv, sad_per_bit, vfp, &best_mv, cost_list);
  922. }
  923. best_mv->row = br;
  924. best_mv->col = bc;
  925. return bestsad;
  926. }
  927. // A specialized function where the smallest scale search candidates
  928. // are 4 1-away neighbors, and cost_list is non-null
  929. // TODO(debargha): Merge this function with the one above. Also remove
  930. // use_mvcost option since it is always 1, to save unnecessary branches.
  931. static int vp9_pattern_search_sad(const MACROBLOCK *x,
  932. MV *ref_mv,
  933. int search_param,
  934. int sad_per_bit,
  935. int do_init_search,
  936. int *cost_list,
  937. const vp9_variance_fn_ptr_t *vfp,
  938. int use_mvcost,
  939. const MV *center_mv,
  940. MV *best_mv,
  941. const int num_candidates[MAX_PATTERN_SCALES],
  942. const MV candidates[MAX_PATTERN_SCALES]
  943. [MAX_PATTERN_CANDIDATES]) {
  944. const MACROBLOCKD *const xd = &x->e_mbd;
  945. static const int search_param_to_steps[MAX_MVSEARCH_STEPS] = {
  946. 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
  947. };
  948. int i, s, t;
  949. const struct buf_2d *const what = &x->plane[0].src;
  950. const struct buf_2d *const in_what = &xd->plane[0].pre[0];
  951. int br, bc;
  952. int bestsad = INT_MAX;
  953. int thissad;
  954. int k = -1;
  955. const MV fcenter_mv = {center_mv->row >> 3, center_mv->col >> 3};
  956. int best_init_s = search_param_to_steps[search_param];
  957. // adjust ref_mv to make sure it is within MV range
  958. clamp_mv(ref_mv, x->mv_col_min, x->mv_col_max, x->mv_row_min, x->mv_row_max);
  959. br = ref_mv->row;
  960. bc = ref_mv->col;
  961. if (cost_list != NULL) {
  962. cost_list[0] = cost_list[1] = cost_list[2] = cost_list[3] = cost_list[4] =
  963. INT_MAX;
  964. }
  965. // Work out the start point for the search
  966. bestsad = vfp->sdf(what->buf, what->stride,
  967. get_buf_from_mv(in_what, ref_mv), in_what->stride) +
  968. mvsad_err_cost(x, ref_mv, &fcenter_mv, sad_per_bit);
  969. // Search all possible scales upto the search param around the center point
  970. // pick the scale of the point that is best as the starting scale of
  971. // further steps around it.
  972. if (do_init_search) {
  973. s = best_init_s;
  974. best_init_s = -1;
  975. for (t = 0; t <= s; ++t) {
  976. int best_site = -1;
  977. if (check_bounds(x, br, bc, 1 << t)) {
  978. for (i = 0; i < num_candidates[t]; i++) {
  979. const MV this_mv = {br + candidates[t][i].row,
  980. bc + candidates[t][i].col};
  981. thissad = vfp->sdf(what->buf, what->stride,
  982. get_buf_from_mv(in_what, &this_mv),
  983. in_what->stride);
  984. CHECK_BETTER
  985. }
  986. } else {
  987. for (i = 0; i < num_candidates[t]; i++) {
  988. const MV this_mv = {br + candidates[t][i].row,
  989. bc + candidates[t][i].col};
  990. if (!is_mv_in(x, &this_mv))
  991. continue;
  992. thissad = vfp->sdf(what->buf, what->stride,
  993. get_buf_from_mv(in_what, &this_mv),
  994. in_what->stride);
  995. CHECK_BETTER
  996. }
  997. }
  998. if (best_site == -1) {
  999. continue;
  1000. } else {
  1001. best_init_s = t;
  1002. k = best_site;
  1003. }
  1004. }
  1005. if (best_init_s != -1) {
  1006. br += candidates[best_init_s][k].row;
  1007. bc += candidates[best_init_s][k].col;
  1008. }
  1009. }
  1010. // If the center point is still the best, just skip this and move to
  1011. // the refinement step.
  1012. if (best_init_s != -1) {
  1013. int do_sad = (num_candidates[0] == 4 && cost_list != NULL);
  1014. int best_site = -1;
  1015. s = best_init_s;
  1016. for (; s >= do_sad; s--) {
  1017. if (!do_init_search || s != best_init_s) {
  1018. if (check_bounds(x, br, bc, 1 << s)) {
  1019. for (i = 0; i < num_candidates[s]; i++) {
  1020. const MV this_mv = {br + candidates[s][i].row,
  1021. bc + candidates[s][i].col};
  1022. thissad = vfp->sdf(what->buf, what->stride,
  1023. get_buf_from_mv(in_what, &this_mv),
  1024. in_what->stride);
  1025. CHECK_BETTER
  1026. }
  1027. } else {
  1028. for (i = 0; i < num_candidates[s]; i++) {
  1029. const MV this_mv = {br + candidates[s][i].row,
  1030. bc + candidates[s][i].col};
  1031. if (!is_mv_in(x, &this_mv))
  1032. continue;
  1033. thissad = vfp->sdf(what->buf, what->stride,
  1034. get_buf_from_mv(in_what, &this_mv),
  1035. in_what->stride);
  1036. CHECK_BETTER
  1037. }
  1038. }
  1039. if (best_site == -1) {
  1040. continue;
  1041. } else {
  1042. br += candidates[s][best_site].row;
  1043. bc += candidates[s][best_site].col;
  1044. k = best_site;
  1045. }
  1046. }
  1047. do {
  1048. int next_chkpts_indices[PATTERN_CANDIDATES_REF];
  1049. best_site = -1;
  1050. next_chkpts_indices[0] = (k == 0) ? num_candidates[s] - 1 : k - 1;
  1051. next_chkpts_indices[1] = k;
  1052. next_chkpts_indices[2] = (k == num_candidates[s] - 1) ? 0 : k + 1;
  1053. if (check_bounds(x, br, bc, 1 << s)) {
  1054. for (i = 0; i < PATTERN_CANDIDATES_REF; i++) {
  1055. const MV this_mv = {br + candidates[s][next_chkpts_indices[i]].row,
  1056. bc + candidates[s][next_chkpts_indices[i]].col};
  1057. thissad = vfp->sdf(what->buf, what->stride,
  1058. get_buf_from_mv(in_what, &this_mv),
  1059. in_what->stride);
  1060. CHECK_BETTER
  1061. }
  1062. } else {
  1063. for (i = 0; i < PATTERN_CANDIDATES_REF; i++) {
  1064. const MV this_mv = {br + candidates[s][next_chkpts_indices[i]].row,
  1065. bc + candidates[s][next_chkpts_indices[i]].col};
  1066. if (!is_mv_in(x, &this_mv))
  1067. continue;
  1068. thissad = vfp->sdf(what->buf, what->stride,
  1069. get_buf_from_mv(in_what, &this_mv),
  1070. in_what->stride);
  1071. CHECK_BETTER
  1072. }
  1073. }
  1074. if (best_site != -1) {
  1075. k = next_chkpts_indices[best_site];
  1076. br += candidates[s][k].row;
  1077. bc += candidates[s][k].col;
  1078. }
  1079. } while (best_site != -1);
  1080. }
  1081. // Note: If we enter the if below, then cost_list must be non-NULL.
  1082. if (s == 0) {
  1083. cost_list[0] = bestsad;
  1084. if (!do_init_search || s != best_init_s) {
  1085. if (check_bounds(x, br, bc, 1 << s)) {
  1086. for (i = 0; i < num_candidates[s]; i++) {
  1087. const MV this_mv = {br + candidates[s][i].row,
  1088. bc + candidates[s][i].col};
  1089. cost_list[i + 1] =
  1090. thissad = vfp->sdf(what->buf, what->stride,
  1091. get_buf_from_mv(in_what, &this_mv),
  1092. in_what->stride);
  1093. CHECK_BETTER
  1094. }
  1095. } else {
  1096. for (i = 0; i < num_candidates[s]; i++) {
  1097. const MV this_mv = {br + candidates[s][i].row,
  1098. bc + candidates[s][i].col};
  1099. if (!is_mv_in(x, &this_mv))
  1100. continue;
  1101. cost_list[i + 1] =
  1102. thissad = vfp->sdf(what->buf, what->stride,
  1103. get_buf_from_mv(in_what, &this_mv),
  1104. in_what->stride);
  1105. CHECK_BETTER
  1106. }
  1107. }
  1108. if (best_site != -1) {
  1109. br += candidates[s][best_site].row;
  1110. bc += candidates[s][best_site].col;
  1111. k = best_site;
  1112. }
  1113. }
  1114. while (best_site != -1) {
  1115. int next_chkpts_indices[PATTERN_CANDIDATES_REF];
  1116. best_site = -1;
  1117. next_chkpts_indices[0] = (k == 0) ? num_candidates[s] - 1 : k - 1;
  1118. next_chkpts_indices[1] = k;
  1119. next_chkpts_indices[2] = (k == num_candidates[s] - 1) ? 0 : k + 1;
  1120. cost_list[1] = cost_list[2] = cost_list[3] = cost_list[4] = INT_MAX;
  1121. cost_list[((k + 2) % 4) + 1] = cost_list[0];
  1122. cost_list[0] = bestsad;
  1123. if (check_bounds(x, br, bc, 1 << s)) {
  1124. for (i = 0; i < PATTERN_CANDIDATES_REF; i++) {
  1125. const MV this_mv = {br + candidates[s][next_chkpts_indices[i]].row,
  1126. bc + candidates[s][next_chkpts_indices[i]].col};
  1127. cost_list[next_chkpts_indices[i] + 1] =
  1128. thissad = vfp->sdf(what->buf, what->stride,
  1129. get_buf_from_mv(in_what, &this_mv),
  1130. in_what->stride);
  1131. CHECK_BETTER
  1132. }
  1133. } else {
  1134. for (i = 0; i < PATTERN_CANDIDATES_REF; i++) {
  1135. const MV this_mv = {br + candidates[s][next_chkpts_indices[i]].row,
  1136. bc + candidates[s][next_chkpts_indices[i]].col};
  1137. if (!is_mv_in(x, &this_mv)) {
  1138. cost_list[next_chkpts_indices[i] + 1] = INT_MAX;
  1139. continue;
  1140. }
  1141. cost_list[next_chkpts_indices[i] + 1] =
  1142. thissad = vfp->sdf(what->buf, what->stride,
  1143. get_buf_from_mv(in_what, &this_mv),
  1144. in_what->stride);
  1145. CHECK_BETTER
  1146. }
  1147. }
  1148. if (best_site != -1) {
  1149. k = next_chkpts_indices[best_site];
  1150. br += candidates[s][k].row;
  1151. bc += candidates[s][k].col;
  1152. }
  1153. }
  1154. }
  1155. }
  1156. // Returns the one-away integer pel sad values around the best as follows:
  1157. // cost_list[0]: sad at the best integer pel
  1158. // cost_list[1]: sad at delta {0, -1} (left) from the best integer pel
  1159. // cost_list[2]: sad at delta { 1, 0} (bottom) from the best integer pel
  1160. // cost_list[3]: sad at delta { 0, 1} (right) from the best integer pel
  1161. // cost_list[4]: sad at delta {-1, 0} (top) from the best integer pel
  1162. if (cost_list) {
  1163. static const MV neighbors[4] = {{0, -1}, {1, 0}, {0, 1}, {-1, 0}};
  1164. if (cost_list[0] == INT_MAX) {
  1165. cost_list[0] = bestsad;
  1166. if (check_bounds(x, br, bc, 1)) {
  1167. for (i = 0; i < 4; i++) {
  1168. const MV this_mv = { br + neighbors[i].row,
  1169. bc + neighbors[i].col };
  1170. cost_list[i + 1] = vfp->sdf(what->buf, what->stride,
  1171. get_buf_from_mv(in_what, &this_mv),
  1172. in_what->stride);
  1173. }
  1174. } else {
  1175. for (i = 0; i < 4; i++) {
  1176. const MV this_mv = {br + neighbors[i].row,
  1177. bc + neighbors[i].col};
  1178. if (!is_mv_in(x, &this_mv))
  1179. cost_list[i + 1] = INT_MAX;
  1180. else
  1181. cost_list[i + 1] = vfp->sdf(what->buf, what->stride,
  1182. get_buf_from_mv(in_what, &this_mv),
  1183. in_what->stride);
  1184. }
  1185. }
  1186. } else {
  1187. if (use_mvcost) {
  1188. for (i = 0; i < 4; i++) {
  1189. const MV this_mv = {br + neighbors[i].row,
  1190. bc + neighbors[i].col};
  1191. if (cost_list[i + 1] != INT_MAX) {
  1192. cost_list[i + 1] +=
  1193. mvsad_err_cost(x, &this_mv, &fcenter_mv, sad_per_bit);
  1194. }
  1195. }
  1196. }
  1197. }
  1198. }
  1199. best_mv->row = br;
  1200. best_mv->col = bc;
  1201. return bestsad;
  1202. }
  1203. int vp9_get_mvpred_var(const MACROBLOCK *x,
  1204. const MV *best_mv, const MV *center_mv,
  1205. const vp9_variance_fn_ptr_t *vfp,
  1206. int use_mvcost) {
  1207. const MACROBLOCKD *const xd = &x->e_mbd;
  1208. const struct buf_2d *const what = &x->plane[0].src;
  1209. const struct buf_2d *const in_what = &xd->plane[0].pre[0];
  1210. const MV mv = {best_mv->row * 8, best_mv->col * 8};
  1211. unsigned int unused;
  1212. return vfp->vf(what->buf, what->stride,
  1213. get_buf_from_mv(in_what, best_mv), in_what->stride, &unused) +
  1214. (use_mvcost ? mv_err_cost(&mv, center_mv, x->nmvjointcost,
  1215. x->mvcost, x->errorperbit) : 0);
  1216. }
  1217. int vp9_get_mvpred_av_var(const MACROBLOCK *x,
  1218. const MV *best_mv, const MV *center_mv,
  1219. const uint8_t *second_pred,
  1220. const vp9_variance_fn_ptr_t *vfp,
  1221. int use_mvcost) {
  1222. const MACROBLOCKD *const xd = &x->e_mbd;
  1223. const struct buf_2d *const what = &x->plane[0].src;
  1224. const struct buf_2d *const in_what = &xd->plane[0].pre[0];
  1225. const MV mv = {best_mv->row * 8, best_mv->col * 8};
  1226. unsigned int unused;
  1227. return vfp->svaf(get_buf_from_mv(in_what, best_mv), in_what->stride, 0, 0,
  1228. what->buf, what->stride, &unused, second_pred) +
  1229. (use_mvcost ? mv_err_cost(&mv, center_mv, x->nmvjointcost,
  1230. x->mvcost, x->errorperbit) : 0);
  1231. }
  1232. int vp9_hex_search(const MACROBLOCK *x,
  1233. MV *ref_mv,
  1234. int search_param,
  1235. int sad_per_bit,
  1236. int do_init_search,
  1237. int *cost_list,
  1238. const vp9_variance_fn_ptr_t *vfp,
  1239. int use_mvcost,
  1240. const MV *center_mv, MV *best_mv) {
  1241. // First scale has 8-closest points, the rest have 6 points in hex shape
  1242. // at increasing scales
  1243. static const int hex_num_candidates[MAX_PATTERN_SCALES] = {
  1244. 8, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6
  1245. };
  1246. // Note that the largest candidate step at each scale is 2^scale
  1247. static const MV hex_candidates[MAX_PATTERN_SCALES][MAX_PATTERN_CANDIDATES] = {
  1248. {{-1, -1}, {0, -1}, {1, -1}, {1, 0}, {1, 1}, { 0, 1}, { -1, 1}, {-1, 0}},
  1249. {{-1, -2}, {1, -2}, {2, 0}, {1, 2}, { -1, 2}, { -2, 0}},
  1250. {{-2, -4}, {2, -4}, {4, 0}, {2, 4}, { -2, 4}, { -4, 0}},
  1251. {{-4, -8}, {4, -8}, {8, 0}, {4, 8}, { -4, 8}, { -8, 0}},
  1252. {{-8, -16}, {8, -16}, {16, 0}, {8, 16}, { -8, 16}, { -16, 0}},
  1253. {{-16, -32}, {16, -32}, {32, 0}, {16, 32}, { -16, 32}, { -32, 0}},
  1254. {{-32, -64}, {32, -64}, {64, 0}, {32, 64}, { -32, 64}, { -64, 0}},
  1255. {{-64, -128}, {64, -128}, {128, 0}, {64, 128}, { -64, 128}, { -128, 0}},
  1256. {{-128, -256}, {128, -256}, {256, 0}, {128, 256}, { -128, 256}, { -256, 0}},
  1257. {{-256, -512}, {256, -512}, {512, 0}, {256, 512}, { -256, 512}, { -512, 0}},
  1258. {{-512, -1024}, {512, -1024}, {1024, 0}, {512, 1024}, { -512, 1024},
  1259. { -1024, 0}},
  1260. };
  1261. return vp9_pattern_search(x, ref_mv, search_param, sad_per_bit,
  1262. do_init_search, cost_list, vfp, use_mvcost,
  1263. center_mv, best_mv,
  1264. hex_num_candidates, hex_candidates);
  1265. }
  1266. int vp9_bigdia_search(const MACROBLOCK *x,
  1267. MV *ref_mv,
  1268. int search_param,
  1269. int sad_per_bit,
  1270. int do_init_search,
  1271. int *cost_list,
  1272. const vp9_variance_fn_ptr_t *vfp,
  1273. int use_mvcost,
  1274. const MV *center_mv,
  1275. MV *best_mv) {
  1276. // First scale has 4-closest points, the rest have 8 points in diamond
  1277. // shape at increasing scales
  1278. static const int bigdia_num_candidates[MAX_PATTERN_SCALES] = {
  1279. 4, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
  1280. };
  1281. // Note that the largest candidate step at each scale is 2^scale
  1282. static const MV bigdia_candidates[MAX_PATTERN_SCALES]
  1283. [MAX_PATTERN_CANDIDATES] = {
  1284. {{0, -1}, {1, 0}, { 0, 1}, {-1, 0}},
  1285. {{-1, -1}, {0, -2}, {1, -1}, {2, 0}, {1, 1}, {0, 2}, {-1, 1}, {-2, 0}},
  1286. {{-2, -2}, {0, -4}, {2, -2}, {4, 0}, {2, 2}, {0, 4}, {-2, 2}, {-4, 0}},
  1287. {{-4, -4}, {0, -8}, {4, -4}, {8, 0}, {4, 4}, {0, 8}, {-4, 4}, {-8, 0}},
  1288. {{-8, -8}, {0, -16}, {8, -8}, {16, 0}, {8, 8}, {0, 16}, {-8, 8}, {-16, 0}},
  1289. {{-16, -16}, {0, -32}, {16, -16}, {32, 0}, {16, 16}, {0, 32},
  1290. {-16, 16}, {-32, 0}},
  1291. {{-32, -32}, {0, -64}, {32, -32}, {64, 0}, {32, 32}, {0, 64},
  1292. {-32, 32}, {-64, 0}},
  1293. {{-64, -64}, {0, -128}, {64, -64}, {128, 0}, {64, 64}, {0, 128},
  1294. {-64, 64}, {-128, 0}},
  1295. {{-128, -128}, {0, -256}, {128, -128}, {256, 0}, {128, 128}, {0, 256},
  1296. {-128, 128}, {-256, 0}},
  1297. {{-256, -256}, {0, -512}, {256, -256}, {512, 0}, {256, 256}, {0, 512},
  1298. {-256, 256}, {-512, 0}},
  1299. {{-512, -512}, {0, -1024}, {512, -512}, {1024, 0}, {512, 512}, {0, 1024},
  1300. {-512, 512}, {-1024, 0}},
  1301. };
  1302. return vp9_pattern_search_sad(x, ref_mv, search_param, sad_per_bit,
  1303. do_init_search, cost_list, vfp, use_mvcost,
  1304. center_mv, best_mv,
  1305. bigdia_num_candidates, bigdia_candidates);
  1306. }
  1307. int vp9_square_search(const MACROBLOCK *x,
  1308. MV *ref_mv,
  1309. int search_param,
  1310. int sad_per_bit,
  1311. int do_init_search,
  1312. int *cost_list,
  1313. const vp9_variance_fn_ptr_t *vfp,
  1314. int use_mvcost,
  1315. const MV *center_mv,
  1316. MV *best_mv) {
  1317. // All scales have 8 closest points in square shape
  1318. static const int square_num_candidates[MAX_PATTERN_SCALES] = {
  1319. 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
  1320. };
  1321. // Note that the largest candidate step at each scale is 2^scale
  1322. static const MV square_candidates[MAX_PATTERN_SCALES]
  1323. [MAX_PATTERN_CANDIDATES] = {
  1324. {{-1, -1}, {0, -1}, {1, -1}, {1, 0}, {1, 1}, {0, 1}, {-1, 1}, {-1, 0}},
  1325. {{-2, -2}, {0, -2}, {2, -2}, {2, 0}, {2, 2}, {0, 2}, {-2, 2}, {-2, 0}},
  1326. {{-4, -4}, {0, -4}, {4, -4}, {4, 0}, {4, 4}, {0, 4}, {-4, 4}, {-4, 0}},
  1327. {{-8, -8}, {0, -8}, {8, -8}, {8, 0}, {8, 8}, {0, 8}, {-8, 8}, {-8, 0}},
  1328. {{-16, -16}, {0, -16}, {16, -16}, {16, 0}, {16, 16}, {0, 16},
  1329. {-16, 16}, {-16, 0}},
  1330. {{-32, -32}, {0, -32}, {32, -32}, {32, 0}, {32, 32}, {0, 32},
  1331. {-32, 32}, {-32, 0}},
  1332. {{-64, -64}, {0, -64}, {64, -64}, {64, 0}, {64, 64}, {0, 64},
  1333. {-64, 64}, {-64, 0}},
  1334. {{-128, -128}, {0, -128}, {128, -128}, {128, 0}, {128, 128}, {0, 128},
  1335. {-128, 128}, {-128, 0}},
  1336. {{-256, -256}, {0, -256}, {256, -256}, {256, 0}, {256, 256}, {0, 256},
  1337. {-256, 256}, {-256, 0}},
  1338. {{-512, -512}, {0, -512}, {512, -512}, {512, 0}, {512, 512}, {0, 512},
  1339. {-512, 512}, {-512, 0}},
  1340. {{-1024, -1024}, {0, -1024}, {1024, -1024}, {1024, 0}, {1024, 1024},
  1341. {0, 1024}, {-1024, 1024}, {-1024, 0}},
  1342. };
  1343. return vp9_pattern_search(x, ref_mv, search_param, sad_per_bit,
  1344. do_init_search, cost_list, vfp, use_mvcost,
  1345. center_mv, best_mv,
  1346. square_num_candidates, square_candidates);
  1347. }
  1348. int vp9_fast_hex_search(const MACROBLOCK *x,
  1349. MV *ref_mv,
  1350. int search_param,
  1351. int sad_per_bit,
  1352. int do_init_search, // must be zero for fast_hex
  1353. int *cost_list,
  1354. const vp9_variance_fn_ptr_t *vfp,
  1355. int use_mvcost,
  1356. const MV *center_mv,
  1357. MV *best_mv) {
  1358. return vp9_hex_search(x, ref_mv, MAX(MAX_MVSEARCH_STEPS - 2, search_param),
  1359. sad_per_bit, do_init_search, cost_list, vfp, use_mvcost,
  1360. center_mv, best_mv);
  1361. }
  1362. int vp9_fast_dia_search(const MACROBLOCK *x,
  1363. MV *ref_mv,
  1364. int search_param,
  1365. int sad_per_bit,
  1366. int do_init_search,
  1367. int *cost_list,
  1368. const vp9_variance_fn_ptr_t *vfp,
  1369. int use_mvcost,
  1370. const MV *center_mv,
  1371. MV *best_mv) {
  1372. return vp9_bigdia_search(x, ref_mv, MAX(MAX_MVSEARCH_STEPS - 2, search_param),
  1373. sad_per_bit, do_init_search, cost_list, vfp,
  1374. use_mvcost, center_mv, best_mv);
  1375. }
  1376. #undef CHECK_BETTER
  1377. int vp9_full_range_search_c(const MACROBLOCK *x,
  1378. const search_site_config *cfg,
  1379. MV *ref_mv, MV *best_mv,
  1380. int search_param, int sad_per_bit, int *num00,
  1381. const vp9_variance_fn_ptr_t *fn_ptr,
  1382. const MV *center_mv) {
  1383. const MACROBLOCKD *const xd = &x->e_mbd;
  1384. const struct buf_2d *const what = &x->plane[0].src;
  1385. const struct buf_2d *const in_what = &xd->plane[0].pre[0];
  1386. const int range = 64;
  1387. const MV fcenter_mv = {center_mv->row >> 3, center_mv->col >> 3};
  1388. unsigned int best_sad = INT_MAX;
  1389. int r, c, i;
  1390. int start_col, end_col, start_row, end_row;
  1391. // The cfg and search_param parameters are not used in this search variant
  1392. (void)cfg;
  1393. (void)search_param;
  1394. clamp_mv(ref_mv, x->mv_col_min, x->mv_col_max, x->mv_row_min, x->mv_row_max);
  1395. *best_mv = *ref_mv;
  1396. *num00 = 11;
  1397. best_sad = fn_ptr->sdf(what->buf, what->stride,
  1398. get_buf_from_mv(in_what, ref_mv), in_what->stride) +
  1399. mvsad_err_cost(x, ref_mv, &fcenter_mv, sad_per_bit);
  1400. start_row = MAX(-range, x->mv_row_min - ref_mv->row);
  1401. start_col = MAX(-range, x->mv_col_min - ref_mv->col);
  1402. end_row = MIN(range, x->mv_row_max - ref_mv->row);
  1403. end_col = MIN(range, x->mv_col_max - ref_mv->col);
  1404. for (r = start_row; r <= end_row; ++r) {
  1405. for (c = start_col; c <= end_col; c += 4) {
  1406. if (c + 3 <= end_col) {
  1407. unsigned int sads[4];
  1408. const uint8_t *addrs[4];
  1409. for (i = 0; i < 4; ++i) {
  1410. const MV mv = {ref_mv->row + r, ref_mv->col + c + i};
  1411. addrs[i] = get_buf_from_mv(in_what, &mv);
  1412. }
  1413. fn_ptr->sdx4df(what->buf, what->stride, addrs, in_what->stride, sads);
  1414. for (i = 0; i < 4; ++i) {
  1415. if (sads[i] < best_sad) {
  1416. const MV mv = {ref_mv->row + r, ref_mv->col + c + i};
  1417. const unsigned int sad = sads[i] +
  1418. mvsad_err_cost(x, &mv, &fcenter_mv, sad_per_bit);
  1419. if (sad < best_sad) {
  1420. best_sad = sad;
  1421. *best_mv = mv;
  1422. }
  1423. }
  1424. }
  1425. } else {
  1426. for (i = 0; i < end_col - c; ++i) {
  1427. const MV mv = {ref_mv->row + r, ref_mv->col + c + i};
  1428. unsigned int sad = fn_ptr->sdf(what->buf, what->stride,
  1429. get_buf_from_mv(in_what, &mv), in_what->stride);
  1430. if (sad < best_sad) {
  1431. sad += mvsad_err_cost(x, &mv, &fcenter_mv, sad_per_bit);
  1432. if (sad < best_sad) {
  1433. best_sad = sad;
  1434. *best_mv = mv;
  1435. }
  1436. }
  1437. }
  1438. }
  1439. }
  1440. }
  1441. return best_sad;
  1442. }
  1443. int vp9_diamond_search_sad_c(const MACROBLOCK *x,
  1444. const search_site_config *cfg,
  1445. MV *ref_mv, MV *best_mv, int search_param,
  1446. int sad_per_bit, int *num00,
  1447. const vp9_variance_fn_ptr_t *fn_ptr,
  1448. const MV *center_mv) {
  1449. int i, j, step;
  1450. const MACROBLOCKD *const xd = &x->e_mbd;
  1451. uint8_t *what = x->plane[0].src.buf;
  1452. const int what_stride = x->plane[0].src.stride;
  1453. const uint8_t *in_what;
  1454. const int in_what_stride = xd->plane[0].pre[0].stride;
  1455. const uint8_t *best_address;
  1456. unsigned int bestsad = INT_MAX;
  1457. int best_site = 0;
  1458. int last_site = 0;
  1459. int ref_row;
  1460. int ref_col;
  1461. // search_param determines the length of the initial step and hence the number
  1462. // of iterations.
  1463. // 0 = initial step (MAX_FIRST_STEP) pel
  1464. // 1 = (MAX_FIRST_STEP/2) pel,
  1465. // 2 = (MAX_FIRST_STEP/4) pel...
  1466. const search_site *ss = &cfg->ss[search_param * cfg->searches_per_step];
  1467. const int tot_steps = (cfg->ss_count / cfg->searches_per_step) - search_param;
  1468. const MV fcenter_mv = {center_mv->row >> 3, center_mv->col >> 3};
  1469. clamp_mv(ref_mv, x->mv_col_min, x->mv_col_max, x->mv_row_min, x->mv_row_max);
  1470. ref_row = ref_mv->row;
  1471. ref_col = ref_mv->col;
  1472. *num00 = 0;
  1473. best_mv->row = ref_row;
  1474. best_mv->col = ref_col;
  1475. // Work out the start point for the search
  1476. in_what = xd->plane[0].pre[0].buf + ref_row * in_what_stride + ref_col;
  1477. best_address = in_what;
  1478. // Check the starting position
  1479. bestsad = fn_ptr->sdf(what, what_stride, in_what, in_what_stride)
  1480. + mvsad_err_cost(x, best_mv, &fcenter_mv, sad_per_bit);
  1481. i = 1;
  1482. for (step = 0; step < tot_steps; step++) {
  1483. int all_in = 1, t;
  1484. // All_in is true if every one of the points we are checking are within
  1485. // the bounds of the image.
  1486. all_in &= ((best_mv->row + ss[i].mv.row) > x->mv_row_min);
  1487. all_in &= ((best_mv->row + ss[i + 1].mv.row) < x->mv_row_max);
  1488. all_in &= ((best_mv->col + ss[i + 2].mv.col) > x->mv_col_min);
  1489. all_in &= ((best_mv->col + ss[i + 3].mv.col) < x->mv_col_max);
  1490. // If all the pixels are within the bounds we don't check whether the
  1491. // search point is valid in this loop, otherwise we check each point
  1492. // for validity..
  1493. if (all_in) {
  1494. unsigned int sad_array[4];
  1495. for (j = 0; j < cfg->searches_per_step; j += 4) {
  1496. unsigned char const *block_offset[4];
  1497. for (t = 0; t < 4; t++)
  1498. block_offset[t] = ss[i + t].offset + best_address;
  1499. fn_ptr->sdx4df(what, what_stride, block_offset, in_what_stride,
  1500. sad_array);
  1501. for (t = 0; t < 4; t++, i++) {
  1502. if (sad_array[t] < bestsad) {
  1503. const MV this_mv = {best_mv->row + ss[i].mv.row,
  1504. best_mv->col + ss[i].mv.col};
  1505. sad_array[t] += mvsad_err_cost(x, &this_mv, &fcenter_mv,
  1506. sad_per_bit);
  1507. if (sad_array[t] < bestsad) {
  1508. bestsad = sad_array[t];
  1509. best_site = i;
  1510. }
  1511. }
  1512. }
  1513. }
  1514. } else {
  1515. for (j = 0; j < cfg->searches_per_step; j++) {
  1516. // Trap illegal vectors
  1517. const MV this_mv = {best_mv->row + ss[i].mv.row,
  1518. best_mv->col + ss[i].mv.col};
  1519. if (is_mv_in(x, &this_mv)) {
  1520. const uint8_t *const check_here = ss[i].offset + best_address;
  1521. unsigned int thissad = fn_ptr->sdf(what, what_stride, check_here,
  1522. in_what_stride);
  1523. if (thissad < bestsad) {
  1524. thissad += mvsad_err_cost(x, &this_mv, &fcenter_mv, sad_per_bit);
  1525. if (thissad < bestsad) {
  1526. bestsad = thissad;
  1527. best_site = i;
  1528. }
  1529. }
  1530. }
  1531. i++;
  1532. }
  1533. }
  1534. if (best_site != last_site) {
  1535. best_mv->row += ss[best_site].mv.row;
  1536. best_mv->col += ss[best_site].mv.col;
  1537. best_address += ss[best_site].offset;
  1538. last_site = best_site;
  1539. #if defined(NEW_DIAMOND_SEARCH)
  1540. while (1) {
  1541. const MV this_mv = {best_mv->row + ss[best_site].mv.row,
  1542. best_mv->col + ss[best_site].mv.col};
  1543. if (is_mv_in(x, &this_mv)) {
  1544. const uint8_t *const check_here = ss[best_site].offset + best_address;
  1545. unsigned int thissad = fn_ptr->sdf(what, what_stride, check_here,
  1546. in_what_stride);
  1547. if (thissad < bestsad) {
  1548. thissad += mvsad_err_cost(x, &this_mv, &fcenter_mv, sad_per_bit);
  1549. if (thissad < bestsad) {
  1550. bestsad = thissad;
  1551. best_mv->row += ss[best_site].mv.row;
  1552. best_mv->col += ss[best_site].mv.col;
  1553. best_address += ss[best_site].offset;
  1554. continue;
  1555. }
  1556. }
  1557. }
  1558. break;
  1559. };
  1560. #endif
  1561. } else if (best_address == in_what) {
  1562. (*num00)++;
  1563. }
  1564. }
  1565. return bestsad;
  1566. }
  1567. static int vector_match(int16_t *ref, int16_t *src, int bwl) {
  1568. int best_sad = INT_MAX;
  1569. int this_sad;
  1570. int d;
  1571. int center, offset = 0;
  1572. int bw = 4 << bwl; // redundant variable, to be changed in the experiments.
  1573. for (d = 0; d <= bw; d += 16) {
  1574. this_sad = vp9_vector_var(&ref[d], src, bwl);
  1575. if (this_sad < best_sad) {
  1576. best_sad = this_sad;
  1577. offset = d;
  1578. }
  1579. }
  1580. center = offset;
  1581. for (d = -8; d <= 8; d += 16) {
  1582. int this_pos = offset + d;
  1583. // check limit
  1584. if (this_pos < 0 || this_pos > bw)
  1585. continue;
  1586. this_sad = vp9_vector_var(&ref[this_pos], src, bwl);
  1587. if (this_sad < best_sad) {
  1588. best_sad = this_sad;
  1589. center = this_pos;
  1590. }
  1591. }
  1592. offset = center;
  1593. for (d = -4; d <= 4; d += 8) {
  1594. int this_pos = offset + d;
  1595. // check limit
  1596. if (this_pos < 0 || this_pos > bw)
  1597. continue;
  1598. this_sad = vp9_vector_var(&ref[this_pos], src, bwl);
  1599. if (this_sad < best_sad) {
  1600. best_sad = this_sad;
  1601. center = this_pos;
  1602. }
  1603. }
  1604. offset = center;
  1605. for (d = -2; d <= 2; d += 4) {
  1606. int this_pos = offset + d;
  1607. // check limit
  1608. if (this_pos < 0 || this_pos > bw)
  1609. continue;
  1610. this_sad = vp9_vector_var(&ref[this_pos], src, bwl);
  1611. if (this_sad < best_sad) {
  1612. best_sad = this_sad;
  1613. center = this_pos;
  1614. }
  1615. }
  1616. offset = center;
  1617. for (d = -1; d <= 1; d += 2) {
  1618. int this_pos = offset + d;
  1619. // check limit
  1620. if (this_pos < 0 || this_pos > bw)
  1621. continue;
  1622. this_sad = vp9_vector_var(&ref[this_pos], src, bwl);
  1623. if (this_sad < best_sad) {
  1624. best_sad = this_sad;
  1625. center = this_pos;
  1626. }
  1627. }
  1628. return (center - (bw >> 1));
  1629. }
  1630. static const MV search_pos[4] = {
  1631. {-1, 0}, {0, -1}, {0, 1}, {1, 0},
  1632. };
  1633. unsigned int vp9_int_pro_motion_estimation(const VP9_COMP *cpi, MACROBLOCK *x,
  1634. BLOCK_SIZE bsize,
  1635. int mi_row, int mi_col) {
  1636. MACROBLOCKD *xd = &x->e_mbd;
  1637. MB_MODE_INFO *mbmi = &xd->mi[0]->mbmi;
  1638. struct buf_2d backup_yv12[MAX_MB_PLANE] = {{0, 0}};
  1639. DECLARE_ALIGNED(16, int16_t, hbuf[128]);
  1640. DECLARE_ALIGNED(16, int16_t, vbuf[128]);
  1641. DECLARE_ALIGNED(16, int16_t, src_hbuf[64]);
  1642. DECLARE_ALIGNED(16, int16_t, src_vbuf[64]);
  1643. int idx;
  1644. const int bw = 4 << b_width_log2_lookup[bsize];
  1645. const int bh = 4 << b_height_log2_lookup[bsize];
  1646. const int search_width = bw << 1;
  1647. const int search_height = bh << 1;
  1648. const int src_stride = x->plane[0].src.stride;
  1649. const int ref_stride = xd->plane[0].pre[0].stride;
  1650. uint8_t const *ref_buf, *src_buf;
  1651. MV *tmp_mv = &xd->mi[0]->mbmi.mv[0].as_mv;
  1652. unsigned int best_sad, tmp_sad, this_sad[4];
  1653. MV this_mv;
  1654. const int norm_factor = 3 + (bw >> 5);
  1655. const YV12_BUFFER_CONFIG *scaled_ref_frame =
  1656. vp9_get_scaled_ref_frame(cpi, mbmi->ref_frame[0]);
  1657. if (scaled_ref_frame) {
  1658. int i;
  1659. // Swap out the reference frame for a version that's been scaled to
  1660. // match the resolution of the current frame, allowing the existing
  1661. // motion search code to be used without additional modifications.
  1662. for (i = 0; i < MAX_MB_PLANE; i++)
  1663. backup_yv12[i] = xd->plane[i].pre[0];
  1664. vp9_setup_pre_planes(xd, 0, scaled_ref_frame, mi_row, mi_col, NULL);
  1665. }
  1666. #if CONFIG_VP9_HIGHBITDEPTH
  1667. {
  1668. unsigned int this_sad;
  1669. tmp_mv->row = 0;
  1670. tmp_mv->col = 0;
  1671. this_sad = cpi->fn_ptr[bsize].sdf(x->plane[0].src.buf, src_stride,
  1672. xd->plane[0].pre[0].buf, ref_stride);
  1673. if (scaled_ref_frame) {
  1674. int i;
  1675. for (i = 0; i < MAX_MB_PLANE; i++)
  1676. xd->plane[i].pre[0] = backup_yv12[i];
  1677. }
  1678. return this_sad;
  1679. }
  1680. #endif
  1681. // Set up prediction 1-D reference set
  1682. ref_buf = xd->plane[0].pre[0].buf - (bw >> 1);
  1683. for (idx = 0; idx < search_width; idx += 16) {
  1684. vp9_int_pro_row(&hbuf[idx], ref_buf, ref_stride, bh);
  1685. ref_buf += 16;
  1686. }
  1687. ref_buf = xd->plane[0].pre[0].buf - (bh >> 1) * ref_stride;
  1688. for (idx = 0; idx < search_height; ++idx) {
  1689. vbuf[idx] = vp9_int_pro_col(ref_buf, bw) >> norm_factor;
  1690. ref_buf += ref_stride;
  1691. }
  1692. // Set up src 1-D reference set
  1693. for (idx = 0; idx < bw; idx += 16) {
  1694. src_buf = x->plane[0].src.buf + idx;
  1695. vp9_int_pro_row(&src_hbuf[idx], src_buf, src_stride, bh);
  1696. }
  1697. src_buf = x->plane[0].src.buf;
  1698. for (idx = 0; idx < bh; ++idx) {
  1699. src_vbuf[idx] = vp9_int_pro_col(src_buf, bw) >> norm_factor;
  1700. src_buf += src_stride;
  1701. }
  1702. // Find the best match per 1-D search
  1703. tmp_mv->col = vector_match(hbuf, src_hbuf, b_width_log2_lookup[bsize]);
  1704. tmp_mv->row = vector_match(vbuf, src_vbuf, b_height_log2_lookup[bsize]);
  1705. this_mv = *tmp_mv;
  1706. src_buf = x->plane[0].src.buf;
  1707. ref_buf = xd->plane[0].pre[0].buf + this_mv.row * ref_stride + this_mv.col;
  1708. best_sad = cpi->fn_ptr[bsize].sdf(src_buf, src_stride, ref_buf, ref_stride);
  1709. {
  1710. const uint8_t * const pos[4] = {
  1711. ref_buf - ref_stride,
  1712. ref_buf - 1,
  1713. ref_buf + 1,
  1714. ref_buf + ref_stride,
  1715. };
  1716. cpi->fn_ptr[bsize].sdx4df(src_buf, src_stride, pos, ref_stride, this_sad);
  1717. }
  1718. for (idx = 0; idx < 4; ++idx) {
  1719. if (this_sad[idx] < best_sad) {
  1720. best_sad = this_sad[idx];
  1721. tmp_mv->row = search_pos[idx].row + this_mv.row;
  1722. tmp_mv->col = search_pos[idx].col + this_mv.col;
  1723. }
  1724. }
  1725. if (this_sad[0] < this_sad[3])
  1726. this_mv.row -= 1;
  1727. else
  1728. this_mv.row += 1;
  1729. if (this_sad[1] < this_sad[2])
  1730. this_mv.col -= 1;
  1731. else
  1732. this_mv.col += 1;
  1733. ref_buf = xd->plane[0].pre[0].buf + this_mv.row * ref_stride + this_mv.col;
  1734. tmp_sad = cpi->fn_ptr[bsize].sdf(src_buf, src_stride,
  1735. ref_buf, ref_stride);
  1736. if (best_sad > tmp_sad) {
  1737. *tmp_mv = this_mv;
  1738. best_sad = tmp_sad;
  1739. }
  1740. tmp_mv->row *= 8;
  1741. tmp_mv->col *= 8;
  1742. if (scaled_ref_frame) {
  1743. int i;
  1744. for (i = 0; i < MAX_MB_PLANE; i++)
  1745. xd->plane[i].pre[0] = backup_yv12[i];
  1746. }
  1747. return best_sad;
  1748. }
  1749. /* do_refine: If last step (1-away) of n-step search doesn't pick the center
  1750. point as the best match, we will do a final 1-away diamond
  1751. refining search */
  1752. int vp9_full_pixel_diamond(const VP9_COMP *cpi, MACROBLOCK *x,
  1753. MV *mvp_full, int step_param,
  1754. int sadpb, int further_steps, int do_refine,
  1755. int *cost_list,
  1756. const vp9_variance_fn_ptr_t *fn_ptr,
  1757. const MV *ref_mv, MV *dst_mv) {
  1758. MV temp_mv;
  1759. int thissme, n, num00 = 0;
  1760. int bestsme = cpi->diamond_search_sad(x, &cpi->ss_cfg, mvp_full, &temp_mv,
  1761. step_param, sadpb, &n,
  1762. fn_ptr, ref_mv);
  1763. if (bestsme < INT_MAX)
  1764. bestsme = vp9_get_mvpred_var(x, &temp_mv, ref_mv, fn_ptr, 1);
  1765. *dst_mv = temp_mv;
  1766. // If there won't be more n-step search, check to see if refining search is
  1767. // needed.
  1768. if (n > further_steps)
  1769. do_refine = 0;
  1770. while (n < further_steps) {
  1771. ++n;
  1772. if (num00) {
  1773. num00--;
  1774. } else {
  1775. thissme = cpi->diamond_search_sad(x, &cpi->ss_cfg, mvp_full, &temp_mv,
  1776. step_param + n, sadpb, &num00,
  1777. fn_ptr, ref_mv);
  1778. if (thissme < INT_MAX)
  1779. thissme = vp9_get_mvpred_var(x, &temp_mv, ref_mv, fn_ptr, 1);
  1780. // check to see if refining search is needed.
  1781. if (num00 > further_steps - n)
  1782. do_refine = 0;
  1783. if (thissme < bestsme) {
  1784. bestsme = thissme;
  1785. *dst_mv = temp_mv;
  1786. }
  1787. }
  1788. }
  1789. // final 1-away diamond refining search
  1790. if (do_refine) {
  1791. const int search_range = 8;
  1792. MV best_mv = *dst_mv;
  1793. thissme = vp9_refining_search_sad(x, &best_mv, sadpb, search_range,
  1794. fn_ptr, ref_mv);
  1795. if (thissme < INT_MAX)
  1796. thissme = vp9_get_mvpred_var(x, &best_mv, ref_mv, fn_ptr, 1);
  1797. if (thissme < bestsme) {
  1798. bestsme = thissme;
  1799. *dst_mv = best_mv;
  1800. }
  1801. }
  1802. // Return cost list.
  1803. if (cost_list) {
  1804. calc_int_cost_list(x, ref_mv, sadpb, fn_ptr, dst_mv, cost_list);
  1805. }
  1806. return bestsme;
  1807. }
  1808. int vp9_full_search_sad_c(const MACROBLOCK *x, const MV *ref_mv,
  1809. int sad_per_bit, int distance,
  1810. const vp9_variance_fn_ptr_t *fn_ptr,
  1811. const MV *center_mv, MV *best_mv) {
  1812. int r, c;
  1813. const MACROBLOCKD *const xd = &x->e_mbd;
  1814. const struct buf_2d *const what = &x->plane[0].src;
  1815. const struct buf_2d *const in_what = &xd->plane[0].pre[0];
  1816. const int row_min = MAX(ref_mv->row - distance, x->mv_row_min);
  1817. const int row_max = MIN(ref_mv->row + distance, x->mv_row_max);
  1818. const int col_min = MAX(ref_mv->col - distance, x->mv_col_min);
  1819. const int col_max = MIN(ref_mv->col + distance, x->mv_col_max);
  1820. const MV fcenter_mv = {center_mv->row >> 3, center_mv->col >> 3};
  1821. int best_sad = fn_ptr->sdf(what->buf, what->stride,
  1822. get_buf_from_mv(in_what, ref_mv), in_what->stride) +
  1823. mvsad_err_cost(x, ref_mv, &fcenter_mv, sad_per_bit);
  1824. *best_mv = *ref_mv;
  1825. for (r = row_min; r < row_max; ++r) {
  1826. for (c = col_min; c < col_max; ++c) {
  1827. const MV mv = {r, c};
  1828. const int sad = fn_ptr->sdf(what->buf, what->stride,
  1829. get_buf_from_mv(in_what, &mv), in_what->stride) +
  1830. mvsad_err_cost(x, &mv, &fcenter_mv, sad_per_bit);
  1831. if (sad < best_sad) {
  1832. best_sad = sad;
  1833. *best_mv = mv;
  1834. }
  1835. }
  1836. }
  1837. return best_sad;
  1838. }
  1839. int vp9_full_search_sadx3(const MACROBLOCK *x, const MV *ref_mv,
  1840. int sad_per_bit, int distance,
  1841. const vp9_variance_fn_ptr_t *fn_ptr,
  1842. const MV *center_mv, MV *best_mv) {
  1843. int r;
  1844. const MACROBLOCKD *const xd = &x->e_mbd;
  1845. const struct buf_2d *const what = &x->plane[0].src;
  1846. const struct buf_2d *const in_what = &xd->plane[0].pre[0];
  1847. const int row_min = MAX(ref_mv->row - distance, x->mv_row_min);
  1848. const int row_max = MIN(ref_mv->row + distance, x->mv_row_max);
  1849. const int col_min = MAX(ref_mv->col - distance, x->mv_col_min);
  1850. const int col_max = MIN(ref_mv->col + distance, x->mv_col_max);
  1851. const MV fcenter_mv = {center_mv->row >> 3, center_mv->col >> 3};
  1852. unsigned int best_sad = fn_ptr->sdf(what->buf, what->stride,
  1853. get_buf_from_mv(in_what, ref_mv), in_what->stride) +
  1854. mvsad_err_cost(x, ref_mv, &fcenter_mv, sad_per_bit);
  1855. *best_mv = *ref_mv;
  1856. for (r = row_min; r < row_max; ++r) {
  1857. int c = col_min;
  1858. const uint8_t *check_here = &in_what->buf[r * in_what->stride + c];
  1859. if (fn_ptr->sdx3f != NULL) {
  1860. while ((c + 2) < col_max) {
  1861. int i;
  1862. DECLARE_ALIGNED(16, uint32_t, sads[3]);
  1863. fn_ptr->sdx3f(what->buf, what->stride, check_here, in_what->stride,
  1864. sads);
  1865. for (i = 0; i < 3; ++i) {
  1866. unsigned int sad = sads[i];
  1867. if (sad < best_sad) {
  1868. const MV mv = {r, c};
  1869. sad += mvsad_err_cost(x, &mv, &fcenter_mv, sad_per_bit);
  1870. if (sad < best_sad) {
  1871. best_sad = sad;
  1872. *best_mv = mv;
  1873. }
  1874. }
  1875. ++check_here;
  1876. ++c;
  1877. }
  1878. }
  1879. }
  1880. while (c < col_max) {
  1881. unsigned int sad = fn_ptr->sdf(what->buf, what->stride,
  1882. check_here, in_what->stride);
  1883. if (sad < best_sad) {
  1884. const MV mv = {r, c};
  1885. sad += mvsad_err_cost(x, &mv, &fcenter_mv, sad_per_bit);
  1886. if (sad < best_sad) {
  1887. best_sad = sad;
  1888. *best_mv = mv;
  1889. }
  1890. }
  1891. ++check_here;
  1892. ++c;
  1893. }
  1894. }
  1895. return best_sad;
  1896. }
  1897. int vp9_full_search_sadx8(const MACROBLOCK *x, const MV *ref_mv,
  1898. int sad_per_bit, int distance,
  1899. const vp9_variance_fn_ptr_t *fn_ptr,
  1900. const MV *center_mv, MV *best_mv) {
  1901. int r;
  1902. const MACROBLOCKD *const xd = &x->e_mbd;
  1903. const struct buf_2d *const what = &x->plane[0].src;
  1904. const struct buf_2d *const in_what = &xd->plane[0].pre[0];
  1905. const int row_min = MAX(ref_mv->row - distance, x->mv_row_min);
  1906. const int row_max = MIN(ref_mv->row + distance, x->mv_row_max);
  1907. const int col_min = MAX(ref_mv->col - distance, x->mv_col_min);
  1908. const int col_max = MIN(ref_mv->col + distance, x->mv_col_max);
  1909. const MV fcenter_mv = {center_mv->row >> 3, center_mv->col >> 3};
  1910. unsigned int best_sad = fn_ptr->sdf(what->buf, what->stride,
  1911. get_buf_from_mv(in_what, ref_mv), in_what->stride) +
  1912. mvsad_err_cost(x, ref_mv, &fcenter_mv, sad_per_bit);
  1913. *best_mv = *ref_mv;
  1914. for (r = row_min; r < row_max; ++r) {
  1915. int c = col_min;
  1916. const uint8_t *check_here = &in_what->buf[r * in_what->stride + c];
  1917. if (fn_ptr->sdx8f != NULL) {
  1918. while ((c + 7) < col_max) {
  1919. int i;
  1920. DECLARE_ALIGNED(16, uint32_t, sads[8]);
  1921. fn_ptr->sdx8f(what->buf, what->stride, check_here, in_what->stride,
  1922. sads);
  1923. for (i = 0; i < 8; ++i) {
  1924. unsigned int sad = sads[i];
  1925. if (sad < best_sad) {
  1926. const MV mv = {r, c};
  1927. sad += mvsad_err_cost(x, &mv, &fcenter_mv, sad_per_bit);
  1928. if (sad < best_sad) {
  1929. best_sad = sad;
  1930. *best_mv = mv;
  1931. }
  1932. }
  1933. ++check_here;
  1934. ++c;
  1935. }
  1936. }
  1937. }
  1938. if (fn_ptr->sdx3f != NULL) {
  1939. while ((c + 2) < col_max) {
  1940. int i;
  1941. DECLARE_ALIGNED(16, uint32_t, sads[3]);
  1942. fn_ptr->sdx3f(what->buf, what->stride, check_here, in_what->stride,
  1943. sads);
  1944. for (i = 0; i < 3; ++i) {
  1945. unsigned int sad = sads[i];
  1946. if (sad < best_sad) {
  1947. const MV mv = {r, c};
  1948. sad += mvsad_err_cost(x, &mv, &fcenter_mv, sad_per_bit);
  1949. if (sad < best_sad) {
  1950. best_sad = sad;
  1951. *best_mv = mv;
  1952. }
  1953. }
  1954. ++check_here;
  1955. ++c;
  1956. }
  1957. }
  1958. }
  1959. while (c < col_max) {
  1960. unsigned int sad = fn_ptr->sdf(what->buf, what->stride,
  1961. check_here, in_what->stride);
  1962. if (sad < best_sad) {
  1963. const MV mv = {r, c};
  1964. sad += mvsad_err_cost(x, &mv, &fcenter_mv, sad_per_bit);
  1965. if (sad < best_sad) {
  1966. best_sad = sad;
  1967. *best_mv = mv;
  1968. }
  1969. }
  1970. ++check_here;
  1971. ++c;
  1972. }
  1973. }
  1974. return best_sad;
  1975. }
  1976. int vp9_refining_search_sad(const MACROBLOCK *x,
  1977. MV *ref_mv, int error_per_bit,
  1978. int search_range,
  1979. const vp9_variance_fn_ptr_t *fn_ptr,
  1980. const MV *center_mv) {
  1981. const MACROBLOCKD *const xd = &x->e_mbd;
  1982. const MV neighbors[4] = {{ -1, 0}, {0, -1}, {0, 1}, {1, 0}};
  1983. const struct buf_2d *const what = &x->plane[0].src;
  1984. const struct buf_2d *const in_what = &xd->plane[0].pre[0];
  1985. const MV fcenter_mv = {center_mv->row >> 3, center_mv->col >> 3};
  1986. const uint8_t *best_address = get_buf_from_mv(in_what, ref_mv);
  1987. unsigned int best_sad = fn_ptr->sdf(what->buf, what->stride, best_address,
  1988. in_what->stride) +
  1989. mvsad_err_cost(x, ref_mv, &fcenter_mv, error_per_bit);
  1990. int i, j;
  1991. for (i = 0; i < search_range; i++) {
  1992. int best_site = -1;
  1993. const int all_in = ((ref_mv->row - 1) > x->mv_row_min) &
  1994. ((ref_mv->row + 1) < x->mv_row_max) &
  1995. ((ref_mv->col - 1) > x->mv_col_min) &
  1996. ((ref_mv->col + 1) < x->mv_col_max);
  1997. if (all_in) {
  1998. unsigned int sads[4];
  1999. const uint8_t *const positions[4] = {
  2000. best_address - in_what->stride,
  2001. best_address - 1,
  2002. best_address + 1,
  2003. best_address + in_what->stride
  2004. };
  2005. fn_ptr->sdx4df(what->buf, what->stride, positions, in_what->stride, sads);
  2006. for (j = 0; j < 4; ++j) {
  2007. if (sads[j] < best_sad) {
  2008. const MV mv = {ref_mv->row + neighbors[j].row,
  2009. ref_mv->col + neighbors[j].col};
  2010. sads[j] += mvsad_err_cost(x, &mv, &fcenter_mv, error_per_bit);
  2011. if (sads[j] < best_sad) {
  2012. best_sad = sads[j];
  2013. best_site = j;
  2014. }
  2015. }
  2016. }
  2017. } else {
  2018. for (j = 0; j < 4; ++j) {
  2019. const MV mv = {ref_mv->row + neighbors[j].row,
  2020. ref_mv->col + neighbors[j].col};
  2021. if (is_mv_in(x, &mv)) {
  2022. unsigned int sad = fn_ptr->sdf(what->buf, what->stride,
  2023. get_buf_from_mv(in_what, &mv),
  2024. in_what->stride);
  2025. if (sad < best_sad) {
  2026. sad += mvsad_err_cost(x, &mv, &fcenter_mv, error_per_bit);
  2027. if (sad < best_sad) {
  2028. best_sad = sad;
  2029. best_site = j;
  2030. }
  2031. }
  2032. }
  2033. }
  2034. }
  2035. if (best_site == -1) {
  2036. break;
  2037. } else {
  2038. ref_mv->row += neighbors[best_site].row;
  2039. ref_mv->col += neighbors[best_site].col;
  2040. best_address = get_buf_from_mv(in_what, ref_mv);
  2041. }
  2042. }
  2043. return best_sad;
  2044. }
  2045. // This function is called when we do joint motion search in comp_inter_inter
  2046. // mode.
  2047. int vp9_refining_search_8p_c(const MACROBLOCK *x,
  2048. MV *ref_mv, int error_per_bit,
  2049. int search_range,
  2050. const vp9_variance_fn_ptr_t *fn_ptr,
  2051. const MV *center_mv,
  2052. const uint8_t *second_pred) {
  2053. const MV neighbors[8] = {{-1, 0}, {0, -1}, {0, 1}, {1, 0},
  2054. {-1, -1}, {1, -1}, {-1, 1}, {1, 1}};
  2055. const MACROBLOCKD *const xd = &x->e_mbd;
  2056. const struct buf_2d *const what = &x->plane[0].src;
  2057. const struct buf_2d *const in_what = &xd->plane[0].pre[0];
  2058. const MV fcenter_mv = {center_mv->row >> 3, center_mv->col >> 3};
  2059. unsigned int best_sad = fn_ptr->sdaf(what->buf, what->stride,
  2060. get_buf_from_mv(in_what, ref_mv), in_what->stride, second_pred) +
  2061. mvsad_err_cost(x, ref_mv, &fcenter_mv, error_per_bit);
  2062. int i, j;
  2063. for (i = 0; i < search_range; ++i) {
  2064. int best_site = -1;
  2065. for (j = 0; j < 8; ++j) {
  2066. const MV mv = {ref_mv->row + neighbors[j].row,
  2067. ref_mv->col + neighbors[j].col};
  2068. if (is_mv_in(x, &mv)) {
  2069. unsigned int sad = fn_ptr->sdaf(what->buf, what->stride,
  2070. get_buf_from_mv(in_what, &mv), in_what->stride, second_pred);
  2071. if (sad < best_sad) {
  2072. sad += mvsad_err_cost(x, &mv, &fcenter_mv, error_per_bit);
  2073. if (sad < best_sad) {
  2074. best_sad = sad;
  2075. best_site = j;
  2076. }
  2077. }
  2078. }
  2079. }
  2080. if (best_site == -1) {
  2081. break;
  2082. } else {
  2083. ref_mv->row += neighbors[best_site].row;
  2084. ref_mv->col += neighbors[best_site].col;
  2085. }
  2086. }
  2087. return best_sad;
  2088. }
  2089. int vp9_full_pixel_search(VP9_COMP *cpi, MACROBLOCK *x,
  2090. BLOCK_SIZE bsize, MV *mvp_full,
  2091. int step_param, int error_per_bit,
  2092. int *cost_list,
  2093. const MV *ref_mv, MV *tmp_mv,
  2094. int var_max, int rd) {
  2095. const SPEED_FEATURES *const sf = &cpi->sf;
  2096. const SEARCH_METHODS method = sf->mv.search_method;
  2097. vp9_variance_fn_ptr_t *fn_ptr = &cpi->fn_ptr[bsize];
  2098. int var = 0;
  2099. if (cost_list) {
  2100. cost_list[0] = INT_MAX;
  2101. cost_list[1] = INT_MAX;
  2102. cost_list[2] = INT_MAX;
  2103. cost_list[3] = INT_MAX;
  2104. cost_list[4] = INT_MAX;
  2105. }
  2106. switch (method) {
  2107. case FAST_DIAMOND:
  2108. var = vp9_fast_dia_search(x, mvp_full, step_param, error_per_bit, 0,
  2109. cost_list, fn_ptr, 1, ref_mv, tmp_mv);
  2110. break;
  2111. case FAST_HEX:
  2112. var = vp9_fast_hex_search(x, mvp_full, step_param, error_per_bit, 0,
  2113. cost_list, fn_ptr, 1, ref_mv, tmp_mv);
  2114. break;
  2115. case HEX:
  2116. var = vp9_hex_search(x, mvp_full, step_param, error_per_bit, 1,
  2117. cost_list, fn_ptr, 1, ref_mv, tmp_mv);
  2118. break;
  2119. case SQUARE:
  2120. var = vp9_square_search(x, mvp_full, step_param, error_per_bit, 1,
  2121. cost_list, fn_ptr, 1, ref_mv, tmp_mv);
  2122. break;
  2123. case BIGDIA:
  2124. var = vp9_bigdia_search(x, mvp_full, step_param, error_per_bit, 1,
  2125. cost_list, fn_ptr, 1, ref_mv, tmp_mv);
  2126. break;
  2127. case NSTEP:
  2128. var = vp9_full_pixel_diamond(cpi, x, mvp_full, step_param, error_per_bit,
  2129. MAX_MVSEARCH_STEPS - 1 - step_param,
  2130. 1, cost_list, fn_ptr, ref_mv, tmp_mv);
  2131. break;
  2132. default:
  2133. assert(0 && "Invalid search method.");
  2134. }
  2135. if (method != NSTEP && rd && var < var_max)
  2136. var = vp9_get_mvpred_var(x, tmp_mv, ref_mv, fn_ptr, 1);
  2137. return var;
  2138. }