vp9_mbgraph.c 14 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417
  1. /*
  2. * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
  3. *
  4. * Use of this source code is governed by a BSD-style license
  5. * that can be found in the LICENSE file in the root of the source
  6. * tree. An additional intellectual property rights grant can be found
  7. * in the file PATENTS. All contributing project authors may
  8. * be found in the AUTHORS file in the root of the source tree.
  9. */
  10. #include <limits.h>
  11. #include "./vp9_rtcd.h"
  12. #include "./vpx_dsp_rtcd.h"
  13. #include "vpx_mem/vpx_mem.h"
  14. #include "vp9/encoder/vp9_segmentation.h"
  15. #include "vp9/encoder/vp9_mcomp.h"
  16. #include "vp9/common/vp9_blockd.h"
  17. #include "vp9/common/vp9_reconinter.h"
  18. #include "vp9/common/vp9_reconintra.h"
  19. #include "vp9/common/vp9_systemdependent.h"
  20. static unsigned int do_16x16_motion_iteration(VP9_COMP *cpi,
  21. const MV *ref_mv,
  22. MV *dst_mv,
  23. int mb_row,
  24. int mb_col) {
  25. MACROBLOCK *const x = &cpi->td.mb;
  26. MACROBLOCKD *const xd = &x->e_mbd;
  27. const MV_SPEED_FEATURES *const mv_sf = &cpi->sf.mv;
  28. const vp9_variance_fn_ptr_t v_fn_ptr = cpi->fn_ptr[BLOCK_16X16];
  29. const int tmp_col_min = x->mv_col_min;
  30. const int tmp_col_max = x->mv_col_max;
  31. const int tmp_row_min = x->mv_row_min;
  32. const int tmp_row_max = x->mv_row_max;
  33. MV ref_full;
  34. int cost_list[5];
  35. // Further step/diamond searches as necessary
  36. int step_param = mv_sf->reduce_first_step_size;
  37. step_param = MIN(step_param, MAX_MVSEARCH_STEPS - 2);
  38. vp9_set_mv_search_range(x, ref_mv);
  39. ref_full.col = ref_mv->col >> 3;
  40. ref_full.row = ref_mv->row >> 3;
  41. /*cpi->sf.search_method == HEX*/
  42. vp9_hex_search(x, &ref_full, step_param, x->errorperbit, 0,
  43. cond_cost_list(cpi, cost_list),
  44. &v_fn_ptr, 0, ref_mv, dst_mv);
  45. // Try sub-pixel MC
  46. // if (bestsme > error_thresh && bestsme < INT_MAX)
  47. {
  48. int distortion;
  49. unsigned int sse;
  50. cpi->find_fractional_mv_step(
  51. x, dst_mv, ref_mv, cpi->common.allow_high_precision_mv, x->errorperbit,
  52. &v_fn_ptr, 0, mv_sf->subpel_iters_per_step,
  53. cond_cost_list(cpi, cost_list),
  54. NULL, NULL,
  55. &distortion, &sse, NULL, 0, 0);
  56. }
  57. xd->mi[0]->mbmi.mode = NEWMV;
  58. xd->mi[0]->mbmi.mv[0].as_mv = *dst_mv;
  59. vp9_build_inter_predictors_sby(xd, mb_row, mb_col, BLOCK_16X16);
  60. /* restore UMV window */
  61. x->mv_col_min = tmp_col_min;
  62. x->mv_col_max = tmp_col_max;
  63. x->mv_row_min = tmp_row_min;
  64. x->mv_row_max = tmp_row_max;
  65. return vpx_sad16x16(x->plane[0].src.buf, x->plane[0].src.stride,
  66. xd->plane[0].dst.buf, xd->plane[0].dst.stride);
  67. }
  68. static int do_16x16_motion_search(VP9_COMP *cpi, const MV *ref_mv,
  69. int_mv *dst_mv, int mb_row, int mb_col) {
  70. MACROBLOCK *const x = &cpi->td.mb;
  71. MACROBLOCKD *const xd = &x->e_mbd;
  72. unsigned int err, tmp_err;
  73. MV tmp_mv;
  74. // Try zero MV first
  75. // FIXME should really use something like near/nearest MV and/or MV prediction
  76. err = vpx_sad16x16(x->plane[0].src.buf, x->plane[0].src.stride,
  77. xd->plane[0].pre[0].buf, xd->plane[0].pre[0].stride);
  78. dst_mv->as_int = 0;
  79. // Test last reference frame using the previous best mv as the
  80. // starting point (best reference) for the search
  81. tmp_err = do_16x16_motion_iteration(cpi, ref_mv, &tmp_mv, mb_row, mb_col);
  82. if (tmp_err < err) {
  83. err = tmp_err;
  84. dst_mv->as_mv = tmp_mv;
  85. }
  86. // If the current best reference mv is not centered on 0,0 then do a 0,0
  87. // based search as well.
  88. if (ref_mv->row != 0 || ref_mv->col != 0) {
  89. unsigned int tmp_err;
  90. MV zero_ref_mv = {0, 0}, tmp_mv;
  91. tmp_err = do_16x16_motion_iteration(cpi, &zero_ref_mv, &tmp_mv,
  92. mb_row, mb_col);
  93. if (tmp_err < err) {
  94. dst_mv->as_mv = tmp_mv;
  95. err = tmp_err;
  96. }
  97. }
  98. return err;
  99. }
  100. static int do_16x16_zerozero_search(VP9_COMP *cpi, int_mv *dst_mv) {
  101. MACROBLOCK *const x = &cpi->td.mb;
  102. MACROBLOCKD *const xd = &x->e_mbd;
  103. unsigned int err;
  104. // Try zero MV first
  105. // FIXME should really use something like near/nearest MV and/or MV prediction
  106. err = vpx_sad16x16(x->plane[0].src.buf, x->plane[0].src.stride,
  107. xd->plane[0].pre[0].buf, xd->plane[0].pre[0].stride);
  108. dst_mv->as_int = 0;
  109. return err;
  110. }
  111. static int find_best_16x16_intra(VP9_COMP *cpi, PREDICTION_MODE *pbest_mode) {
  112. MACROBLOCK *const x = &cpi->td.mb;
  113. MACROBLOCKD *const xd = &x->e_mbd;
  114. PREDICTION_MODE best_mode = -1, mode;
  115. unsigned int best_err = INT_MAX;
  116. // calculate SATD for each intra prediction mode;
  117. // we're intentionally not doing 4x4, we just want a rough estimate
  118. for (mode = DC_PRED; mode <= TM_PRED; mode++) {
  119. unsigned int err;
  120. xd->mi[0]->mbmi.mode = mode;
  121. vp9_predict_intra_block(xd, 0, 2, TX_16X16, mode,
  122. x->plane[0].src.buf, x->plane[0].src.stride,
  123. xd->plane[0].dst.buf, xd->plane[0].dst.stride,
  124. 0, 0, 0);
  125. err = vpx_sad16x16(x->plane[0].src.buf, x->plane[0].src.stride,
  126. xd->plane[0].dst.buf, xd->plane[0].dst.stride);
  127. // find best
  128. if (err < best_err) {
  129. best_err = err;
  130. best_mode = mode;
  131. }
  132. }
  133. if (pbest_mode)
  134. *pbest_mode = best_mode;
  135. return best_err;
  136. }
  137. static void update_mbgraph_mb_stats
  138. (
  139. VP9_COMP *cpi,
  140. MBGRAPH_MB_STATS *stats,
  141. YV12_BUFFER_CONFIG *buf,
  142. int mb_y_offset,
  143. YV12_BUFFER_CONFIG *golden_ref,
  144. const MV *prev_golden_ref_mv,
  145. YV12_BUFFER_CONFIG *alt_ref,
  146. int mb_row,
  147. int mb_col
  148. ) {
  149. MACROBLOCK *const x = &cpi->td.mb;
  150. MACROBLOCKD *const xd = &x->e_mbd;
  151. int intra_error;
  152. VP9_COMMON *cm = &cpi->common;
  153. // FIXME in practice we're completely ignoring chroma here
  154. x->plane[0].src.buf = buf->y_buffer + mb_y_offset;
  155. x->plane[0].src.stride = buf->y_stride;
  156. xd->plane[0].dst.buf = get_frame_new_buffer(cm)->y_buffer + mb_y_offset;
  157. xd->plane[0].dst.stride = get_frame_new_buffer(cm)->y_stride;
  158. // do intra 16x16 prediction
  159. intra_error = find_best_16x16_intra(cpi,
  160. &stats->ref[INTRA_FRAME].m.mode);
  161. if (intra_error <= 0)
  162. intra_error = 1;
  163. stats->ref[INTRA_FRAME].err = intra_error;
  164. // Golden frame MV search, if it exists and is different than last frame
  165. if (golden_ref) {
  166. int g_motion_error;
  167. xd->plane[0].pre[0].buf = golden_ref->y_buffer + mb_y_offset;
  168. xd->plane[0].pre[0].stride = golden_ref->y_stride;
  169. g_motion_error = do_16x16_motion_search(cpi,
  170. prev_golden_ref_mv,
  171. &stats->ref[GOLDEN_FRAME].m.mv,
  172. mb_row, mb_col);
  173. stats->ref[GOLDEN_FRAME].err = g_motion_error;
  174. } else {
  175. stats->ref[GOLDEN_FRAME].err = INT_MAX;
  176. stats->ref[GOLDEN_FRAME].m.mv.as_int = 0;
  177. }
  178. // Do an Alt-ref frame MV search, if it exists and is different than
  179. // last/golden frame.
  180. if (alt_ref) {
  181. int a_motion_error;
  182. xd->plane[0].pre[0].buf = alt_ref->y_buffer + mb_y_offset;
  183. xd->plane[0].pre[0].stride = alt_ref->y_stride;
  184. a_motion_error = do_16x16_zerozero_search(cpi,
  185. &stats->ref[ALTREF_FRAME].m.mv);
  186. stats->ref[ALTREF_FRAME].err = a_motion_error;
  187. } else {
  188. stats->ref[ALTREF_FRAME].err = INT_MAX;
  189. stats->ref[ALTREF_FRAME].m.mv.as_int = 0;
  190. }
  191. }
  192. static void update_mbgraph_frame_stats(VP9_COMP *cpi,
  193. MBGRAPH_FRAME_STATS *stats,
  194. YV12_BUFFER_CONFIG *buf,
  195. YV12_BUFFER_CONFIG *golden_ref,
  196. YV12_BUFFER_CONFIG *alt_ref) {
  197. MACROBLOCK *const x = &cpi->td.mb;
  198. MACROBLOCKD *const xd = &x->e_mbd;
  199. VP9_COMMON *const cm = &cpi->common;
  200. int mb_col, mb_row, offset = 0;
  201. int mb_y_offset = 0, arf_y_offset = 0, gld_y_offset = 0;
  202. MV gld_top_mv = {0, 0};
  203. MODE_INFO mi_local;
  204. vp9_zero(mi_local);
  205. // Set up limit values for motion vectors to prevent them extending outside
  206. // the UMV borders.
  207. x->mv_row_min = -BORDER_MV_PIXELS_B16;
  208. x->mv_row_max = (cm->mb_rows - 1) * 8 + BORDER_MV_PIXELS_B16;
  209. xd->up_available = 0;
  210. xd->plane[0].dst.stride = buf->y_stride;
  211. xd->plane[0].pre[0].stride = buf->y_stride;
  212. xd->plane[1].dst.stride = buf->uv_stride;
  213. xd->mi[0] = &mi_local;
  214. mi_local.mbmi.sb_type = BLOCK_16X16;
  215. mi_local.mbmi.ref_frame[0] = LAST_FRAME;
  216. mi_local.mbmi.ref_frame[1] = NONE;
  217. for (mb_row = 0; mb_row < cm->mb_rows; mb_row++) {
  218. MV gld_left_mv = gld_top_mv;
  219. int mb_y_in_offset = mb_y_offset;
  220. int arf_y_in_offset = arf_y_offset;
  221. int gld_y_in_offset = gld_y_offset;
  222. // Set up limit values for motion vectors to prevent them extending outside
  223. // the UMV borders.
  224. x->mv_col_min = -BORDER_MV_PIXELS_B16;
  225. x->mv_col_max = (cm->mb_cols - 1) * 8 + BORDER_MV_PIXELS_B16;
  226. xd->left_available = 0;
  227. for (mb_col = 0; mb_col < cm->mb_cols; mb_col++) {
  228. MBGRAPH_MB_STATS *mb_stats = &stats->mb_stats[offset + mb_col];
  229. update_mbgraph_mb_stats(cpi, mb_stats, buf, mb_y_in_offset,
  230. golden_ref, &gld_left_mv, alt_ref,
  231. mb_row, mb_col);
  232. gld_left_mv = mb_stats->ref[GOLDEN_FRAME].m.mv.as_mv;
  233. if (mb_col == 0) {
  234. gld_top_mv = gld_left_mv;
  235. }
  236. xd->left_available = 1;
  237. mb_y_in_offset += 16;
  238. gld_y_in_offset += 16;
  239. arf_y_in_offset += 16;
  240. x->mv_col_min -= 16;
  241. x->mv_col_max -= 16;
  242. }
  243. xd->up_available = 1;
  244. mb_y_offset += buf->y_stride * 16;
  245. gld_y_offset += golden_ref->y_stride * 16;
  246. if (alt_ref)
  247. arf_y_offset += alt_ref->y_stride * 16;
  248. x->mv_row_min -= 16;
  249. x->mv_row_max -= 16;
  250. offset += cm->mb_cols;
  251. }
  252. }
  253. // void separate_arf_mbs_byzz
  254. static void separate_arf_mbs(VP9_COMP *cpi) {
  255. VP9_COMMON *const cm = &cpi->common;
  256. int mb_col, mb_row, offset, i;
  257. int mi_row, mi_col;
  258. int ncnt[4] = { 0 };
  259. int n_frames = cpi->mbgraph_n_frames;
  260. int *arf_not_zz;
  261. CHECK_MEM_ERROR(cm, arf_not_zz,
  262. vpx_calloc(cm->mb_rows * cm->mb_cols * sizeof(*arf_not_zz),
  263. 1));
  264. // We are not interested in results beyond the alt ref itself.
  265. if (n_frames > cpi->rc.frames_till_gf_update_due)
  266. n_frames = cpi->rc.frames_till_gf_update_due;
  267. // defer cost to reference frames
  268. for (i = n_frames - 1; i >= 0; i--) {
  269. MBGRAPH_FRAME_STATS *frame_stats = &cpi->mbgraph_stats[i];
  270. for (offset = 0, mb_row = 0; mb_row < cm->mb_rows;
  271. offset += cm->mb_cols, mb_row++) {
  272. for (mb_col = 0; mb_col < cm->mb_cols; mb_col++) {
  273. MBGRAPH_MB_STATS *mb_stats = &frame_stats->mb_stats[offset + mb_col];
  274. int altref_err = mb_stats->ref[ALTREF_FRAME].err;
  275. int intra_err = mb_stats->ref[INTRA_FRAME ].err;
  276. int golden_err = mb_stats->ref[GOLDEN_FRAME].err;
  277. // Test for altref vs intra and gf and that its mv was 0,0.
  278. if (altref_err > 1000 ||
  279. altref_err > intra_err ||
  280. altref_err > golden_err) {
  281. arf_not_zz[offset + mb_col]++;
  282. }
  283. }
  284. }
  285. }
  286. // arf_not_zz is indexed by MB, but this loop is indexed by MI to avoid out
  287. // of bound access in segmentation_map
  288. for (mi_row = 0; mi_row < cm->mi_rows; mi_row++) {
  289. for (mi_col = 0; mi_col < cm->mi_cols; mi_col++) {
  290. // If any of the blocks in the sequence failed then the MB
  291. // goes in segment 0
  292. if (arf_not_zz[mi_row / 2 * cm->mb_cols + mi_col / 2]) {
  293. ncnt[0]++;
  294. cpi->segmentation_map[mi_row * cm->mi_cols + mi_col] = 0;
  295. } else {
  296. cpi->segmentation_map[mi_row * cm->mi_cols + mi_col] = 1;
  297. ncnt[1]++;
  298. }
  299. }
  300. }
  301. // Only bother with segmentation if over 10% of the MBs in static segment
  302. // if ( ncnt[1] && (ncnt[0] / ncnt[1] < 10) )
  303. if (1) {
  304. // Note % of blocks that are marked as static
  305. if (cm->MBs)
  306. cpi->static_mb_pct = (ncnt[1] * 100) / (cm->mi_rows * cm->mi_cols);
  307. // This error case should not be reachable as this function should
  308. // never be called with the common data structure uninitialized.
  309. else
  310. cpi->static_mb_pct = 0;
  311. vp9_enable_segmentation(&cm->seg);
  312. } else {
  313. cpi->static_mb_pct = 0;
  314. vp9_disable_segmentation(&cm->seg);
  315. }
  316. // Free localy allocated storage
  317. vpx_free(arf_not_zz);
  318. }
  319. void vp9_update_mbgraph_stats(VP9_COMP *cpi) {
  320. VP9_COMMON *const cm = &cpi->common;
  321. int i, n_frames = vp9_lookahead_depth(cpi->lookahead);
  322. YV12_BUFFER_CONFIG *golden_ref = get_ref_frame_buffer(cpi, GOLDEN_FRAME);
  323. assert(golden_ref != NULL);
  324. // we need to look ahead beyond where the ARF transitions into
  325. // being a GF - so exit if we don't look ahead beyond that
  326. if (n_frames <= cpi->rc.frames_till_gf_update_due)
  327. return;
  328. if (n_frames > MAX_LAG_BUFFERS)
  329. n_frames = MAX_LAG_BUFFERS;
  330. cpi->mbgraph_n_frames = n_frames;
  331. for (i = 0; i < n_frames; i++) {
  332. MBGRAPH_FRAME_STATS *frame_stats = &cpi->mbgraph_stats[i];
  333. memset(frame_stats->mb_stats, 0,
  334. cm->mb_rows * cm->mb_cols * sizeof(*cpi->mbgraph_stats[i].mb_stats));
  335. }
  336. // do motion search to find contribution of each reference to data
  337. // later on in this GF group
  338. // FIXME really, the GF/last MC search should be done forward, and
  339. // the ARF MC search backwards, to get optimal results for MV caching
  340. for (i = 0; i < n_frames; i++) {
  341. MBGRAPH_FRAME_STATS *frame_stats = &cpi->mbgraph_stats[i];
  342. struct lookahead_entry *q_cur = vp9_lookahead_peek(cpi->lookahead, i);
  343. assert(q_cur != NULL);
  344. update_mbgraph_frame_stats(cpi, frame_stats, &q_cur->img,
  345. golden_ref, cpi->Source);
  346. }
  347. vp9_clear_system_state();
  348. separate_arf_mbs(cpi);
  349. }