vp9_encodemb.c 39 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047
  1. /*
  2. * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
  3. *
  4. * Use of this source code is governed by a BSD-style license
  5. * that can be found in the LICENSE file in the root of the source
  6. * tree. An additional intellectual property rights grant can be found
  7. * in the file PATENTS. All contributing project authors may
  8. * be found in the AUTHORS file in the root of the source tree.
  9. */
  10. #include "./vp9_rtcd.h"
  11. #include "./vpx_config.h"
  12. #include "vpx_mem/vpx_mem.h"
  13. #include "vpx_ports/mem.h"
  14. #include "vp9/common/vp9_idct.h"
  15. #include "vp9/common/vp9_reconinter.h"
  16. #include "vp9/common/vp9_reconintra.h"
  17. #include "vp9/common/vp9_scan.h"
  18. #include "vp9/common/vp9_systemdependent.h"
  19. #include "vp9/encoder/vp9_encodemb.h"
  20. #include "vp9/encoder/vp9_quantize.h"
  21. #include "vp9/encoder/vp9_rd.h"
  22. #include "vp9/encoder/vp9_tokenize.h"
  23. struct optimize_ctx {
  24. ENTROPY_CONTEXT ta[MAX_MB_PLANE][16];
  25. ENTROPY_CONTEXT tl[MAX_MB_PLANE][16];
  26. };
  27. void vp9_subtract_block_c(int rows, int cols,
  28. int16_t *diff, ptrdiff_t diff_stride,
  29. const uint8_t *src, ptrdiff_t src_stride,
  30. const uint8_t *pred, ptrdiff_t pred_stride) {
  31. int r, c;
  32. for (r = 0; r < rows; r++) {
  33. for (c = 0; c < cols; c++)
  34. diff[c] = src[c] - pred[c];
  35. diff += diff_stride;
  36. pred += pred_stride;
  37. src += src_stride;
  38. }
  39. }
  40. #if CONFIG_VP9_HIGHBITDEPTH
  41. void vp9_highbd_subtract_block_c(int rows, int cols,
  42. int16_t *diff, ptrdiff_t diff_stride,
  43. const uint8_t *src8, ptrdiff_t src_stride,
  44. const uint8_t *pred8, ptrdiff_t pred_stride,
  45. int bd) {
  46. int r, c;
  47. uint16_t *src = CONVERT_TO_SHORTPTR(src8);
  48. uint16_t *pred = CONVERT_TO_SHORTPTR(pred8);
  49. (void) bd;
  50. for (r = 0; r < rows; r++) {
  51. for (c = 0; c < cols; c++) {
  52. diff[c] = src[c] - pred[c];
  53. }
  54. diff += diff_stride;
  55. pred += pred_stride;
  56. src += src_stride;
  57. }
  58. }
  59. #endif // CONFIG_VP9_HIGHBITDEPTH
  60. void vp9_subtract_plane(MACROBLOCK *x, BLOCK_SIZE bsize, int plane) {
  61. struct macroblock_plane *const p = &x->plane[plane];
  62. const struct macroblockd_plane *const pd = &x->e_mbd.plane[plane];
  63. const BLOCK_SIZE plane_bsize = get_plane_block_size(bsize, pd);
  64. const int bw = 4 * num_4x4_blocks_wide_lookup[plane_bsize];
  65. const int bh = 4 * num_4x4_blocks_high_lookup[plane_bsize];
  66. #if CONFIG_VP9_HIGHBITDEPTH
  67. if (x->e_mbd.cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
  68. vp9_highbd_subtract_block(bh, bw, p->src_diff, bw, p->src.buf,
  69. p->src.stride, pd->dst.buf, pd->dst.stride,
  70. x->e_mbd.bd);
  71. return;
  72. }
  73. #endif // CONFIG_VP9_HIGHBITDEPTH
  74. vp9_subtract_block(bh, bw, p->src_diff, bw, p->src.buf, p->src.stride,
  75. pd->dst.buf, pd->dst.stride);
  76. }
  77. #define RDTRUNC(RM, DM, R, D) ((128 + (R) * (RM)) & 0xFF)
  78. typedef struct vp9_token_state {
  79. int rate;
  80. int error;
  81. int next;
  82. int16_t token;
  83. short qc;
  84. } vp9_token_state;
  85. // TODO(jimbankoski): experiment to find optimal RD numbers.
  86. static const int plane_rd_mult[PLANE_TYPES] = { 4, 2 };
  87. #define UPDATE_RD_COST()\
  88. {\
  89. rd_cost0 = RDCOST(rdmult, rddiv, rate0, error0);\
  90. rd_cost1 = RDCOST(rdmult, rddiv, rate1, error1);\
  91. if (rd_cost0 == rd_cost1) {\
  92. rd_cost0 = RDTRUNC(rdmult, rddiv, rate0, error0);\
  93. rd_cost1 = RDTRUNC(rdmult, rddiv, rate1, error1);\
  94. }\
  95. }
  96. // This function is a place holder for now but may ultimately need
  97. // to scan previous tokens to work out the correct context.
  98. static int trellis_get_coeff_context(const int16_t *scan,
  99. const int16_t *nb,
  100. int idx, int token,
  101. uint8_t *token_cache) {
  102. int bak = token_cache[scan[idx]], pt;
  103. token_cache[scan[idx]] = vp9_pt_energy_class[token];
  104. pt = get_coef_context(nb, token_cache, idx + 1);
  105. token_cache[scan[idx]] = bak;
  106. return pt;
  107. }
  108. static int optimize_b(MACROBLOCK *mb, int plane, int block,
  109. TX_SIZE tx_size, int ctx) {
  110. MACROBLOCKD *const xd = &mb->e_mbd;
  111. struct macroblock_plane *const p = &mb->plane[plane];
  112. struct macroblockd_plane *const pd = &xd->plane[plane];
  113. const int ref = is_inter_block(&xd->mi[0]->mbmi);
  114. vp9_token_state tokens[1025][2];
  115. unsigned best_index[1025][2];
  116. uint8_t token_cache[1024];
  117. const tran_low_t *const coeff = BLOCK_OFFSET(mb->plane[plane].coeff, block);
  118. tran_low_t *const qcoeff = BLOCK_OFFSET(p->qcoeff, block);
  119. tran_low_t *const dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block);
  120. const int eob = p->eobs[block];
  121. const PLANE_TYPE type = pd->plane_type;
  122. const int default_eob = 16 << (tx_size << 1);
  123. const int mul = 1 + (tx_size == TX_32X32);
  124. const int16_t *dequant_ptr = pd->dequant;
  125. const uint8_t *const band_translate = get_band_translate(tx_size);
  126. const scan_order *const so = get_scan(xd, tx_size, type, block);
  127. const int16_t *const scan = so->scan;
  128. const int16_t *const nb = so->neighbors;
  129. int next = eob, sz = 0;
  130. int64_t rdmult = mb->rdmult * plane_rd_mult[type], rddiv = mb->rddiv;
  131. int64_t rd_cost0, rd_cost1;
  132. int rate0, rate1, error0, error1;
  133. int16_t t0, t1;
  134. EXTRABIT e0;
  135. int best, band, pt, i, final_eob;
  136. #if CONFIG_VP9_HIGHBITDEPTH
  137. const int16_t *cat6_high_cost = vp9_get_high_cost_table(xd->bd);
  138. #else
  139. const int16_t *cat6_high_cost = vp9_get_high_cost_table(8);
  140. #endif
  141. assert((!type && !plane) || (type && plane));
  142. assert(eob <= default_eob);
  143. /* Now set up a Viterbi trellis to evaluate alternative roundings. */
  144. if (!ref)
  145. rdmult = (rdmult * 9) >> 4;
  146. /* Initialize the sentinel node of the trellis. */
  147. tokens[eob][0].rate = 0;
  148. tokens[eob][0].error = 0;
  149. tokens[eob][0].next = default_eob;
  150. tokens[eob][0].token = EOB_TOKEN;
  151. tokens[eob][0].qc = 0;
  152. tokens[eob][1] = tokens[eob][0];
  153. for (i = 0; i < eob; i++)
  154. token_cache[scan[i]] =
  155. vp9_pt_energy_class[vp9_get_token(qcoeff[scan[i]])];
  156. for (i = eob; i-- > 0;) {
  157. int base_bits, d2, dx;
  158. const int rc = scan[i];
  159. int x = qcoeff[rc];
  160. /* Only add a trellis state for non-zero coefficients. */
  161. if (x) {
  162. int shortcut = 0;
  163. error0 = tokens[next][0].error;
  164. error1 = tokens[next][1].error;
  165. /* Evaluate the first possibility for this state. */
  166. rate0 = tokens[next][0].rate;
  167. rate1 = tokens[next][1].rate;
  168. vp9_get_token_extra(x, &t0, &e0);
  169. /* Consider both possible successor states. */
  170. if (next < default_eob) {
  171. band = band_translate[i + 1];
  172. pt = trellis_get_coeff_context(scan, nb, i, t0, token_cache);
  173. rate0 += mb->token_costs[tx_size][type][ref][band][0][pt]
  174. [tokens[next][0].token];
  175. rate1 += mb->token_costs[tx_size][type][ref][band][0][pt]
  176. [tokens[next][1].token];
  177. }
  178. UPDATE_RD_COST();
  179. /* And pick the best. */
  180. best = rd_cost1 < rd_cost0;
  181. base_bits = vp9_get_cost(t0, e0, cat6_high_cost);
  182. dx = mul * (dqcoeff[rc] - coeff[rc]);
  183. #if CONFIG_VP9_HIGHBITDEPTH
  184. if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
  185. dx >>= xd->bd - 8;
  186. }
  187. #endif // CONFIG_VP9_HIGHBITDEPTH
  188. d2 = dx * dx;
  189. tokens[i][0].rate = base_bits + (best ? rate1 : rate0);
  190. tokens[i][0].error = d2 + (best ? error1 : error0);
  191. tokens[i][0].next = next;
  192. tokens[i][0].token = t0;
  193. tokens[i][0].qc = x;
  194. best_index[i][0] = best;
  195. /* Evaluate the second possibility for this state. */
  196. rate0 = tokens[next][0].rate;
  197. rate1 = tokens[next][1].rate;
  198. if ((abs(x) * dequant_ptr[rc != 0] > abs(coeff[rc]) * mul) &&
  199. (abs(x) * dequant_ptr[rc != 0] < abs(coeff[rc]) * mul +
  200. dequant_ptr[rc != 0]))
  201. shortcut = 1;
  202. else
  203. shortcut = 0;
  204. if (shortcut) {
  205. sz = -(x < 0);
  206. x -= 2 * sz + 1;
  207. }
  208. /* Consider both possible successor states. */
  209. if (!x) {
  210. /* If we reduced this coefficient to zero, check to see if
  211. * we need to move the EOB back here.
  212. */
  213. t0 = tokens[next][0].token == EOB_TOKEN ? EOB_TOKEN : ZERO_TOKEN;
  214. t1 = tokens[next][1].token == EOB_TOKEN ? EOB_TOKEN : ZERO_TOKEN;
  215. e0 = 0;
  216. } else {
  217. vp9_get_token_extra(x, &t0, &e0);
  218. t1 = t0;
  219. }
  220. if (next < default_eob) {
  221. band = band_translate[i + 1];
  222. if (t0 != EOB_TOKEN) {
  223. pt = trellis_get_coeff_context(scan, nb, i, t0, token_cache);
  224. rate0 += mb->token_costs[tx_size][type][ref][band][!x][pt]
  225. [tokens[next][0].token];
  226. }
  227. if (t1 != EOB_TOKEN) {
  228. pt = trellis_get_coeff_context(scan, nb, i, t1, token_cache);
  229. rate1 += mb->token_costs[tx_size][type][ref][band][!x][pt]
  230. [tokens[next][1].token];
  231. }
  232. }
  233. UPDATE_RD_COST();
  234. /* And pick the best. */
  235. best = rd_cost1 < rd_cost0;
  236. base_bits = vp9_get_cost(t0, e0, cat6_high_cost);
  237. if (shortcut) {
  238. #if CONFIG_VP9_HIGHBITDEPTH
  239. if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
  240. dx -= ((dequant_ptr[rc != 0] >> (xd->bd - 8)) + sz) ^ sz;
  241. } else {
  242. dx -= (dequant_ptr[rc != 0] + sz) ^ sz;
  243. }
  244. #else
  245. dx -= (dequant_ptr[rc != 0] + sz) ^ sz;
  246. #endif // CONFIG_VP9_HIGHBITDEPTH
  247. d2 = dx * dx;
  248. }
  249. tokens[i][1].rate = base_bits + (best ? rate1 : rate0);
  250. tokens[i][1].error = d2 + (best ? error1 : error0);
  251. tokens[i][1].next = next;
  252. tokens[i][1].token = best ? t1 : t0;
  253. tokens[i][1].qc = x;
  254. best_index[i][1] = best;
  255. /* Finally, make this the new head of the trellis. */
  256. next = i;
  257. } else {
  258. /* There's no choice to make for a zero coefficient, so we don't
  259. * add a new trellis node, but we do need to update the costs.
  260. */
  261. band = band_translate[i + 1];
  262. t0 = tokens[next][0].token;
  263. t1 = tokens[next][1].token;
  264. /* Update the cost of each path if we're past the EOB token. */
  265. if (t0 != EOB_TOKEN) {
  266. tokens[next][0].rate +=
  267. mb->token_costs[tx_size][type][ref][band][1][0][t0];
  268. tokens[next][0].token = ZERO_TOKEN;
  269. }
  270. if (t1 != EOB_TOKEN) {
  271. tokens[next][1].rate +=
  272. mb->token_costs[tx_size][type][ref][band][1][0][t1];
  273. tokens[next][1].token = ZERO_TOKEN;
  274. }
  275. best_index[i][0] = best_index[i][1] = 0;
  276. /* Don't update next, because we didn't add a new node. */
  277. }
  278. }
  279. /* Now pick the best path through the whole trellis. */
  280. band = band_translate[i + 1];
  281. rate0 = tokens[next][0].rate;
  282. rate1 = tokens[next][1].rate;
  283. error0 = tokens[next][0].error;
  284. error1 = tokens[next][1].error;
  285. t0 = tokens[next][0].token;
  286. t1 = tokens[next][1].token;
  287. rate0 += mb->token_costs[tx_size][type][ref][band][0][ctx][t0];
  288. rate1 += mb->token_costs[tx_size][type][ref][band][0][ctx][t1];
  289. UPDATE_RD_COST();
  290. best = rd_cost1 < rd_cost0;
  291. final_eob = -1;
  292. memset(qcoeff, 0, sizeof(*qcoeff) * (16 << (tx_size * 2)));
  293. memset(dqcoeff, 0, sizeof(*dqcoeff) * (16 << (tx_size * 2)));
  294. for (i = next; i < eob; i = next) {
  295. const int x = tokens[i][best].qc;
  296. const int rc = scan[i];
  297. if (x) {
  298. final_eob = i;
  299. }
  300. qcoeff[rc] = x;
  301. dqcoeff[rc] = (x * dequant_ptr[rc != 0]) / mul;
  302. next = tokens[i][best].next;
  303. best = best_index[i][best];
  304. }
  305. final_eob++;
  306. mb->plane[plane].eobs[block] = final_eob;
  307. return final_eob;
  308. }
  309. static INLINE void fdct32x32(int rd_transform,
  310. const int16_t *src, tran_low_t *dst,
  311. int src_stride) {
  312. if (rd_transform)
  313. vp9_fdct32x32_rd(src, dst, src_stride);
  314. else
  315. vp9_fdct32x32(src, dst, src_stride);
  316. }
  317. #if CONFIG_VP9_HIGHBITDEPTH
  318. static INLINE void highbd_fdct32x32(int rd_transform, const int16_t *src,
  319. tran_low_t *dst, int src_stride) {
  320. if (rd_transform)
  321. vp9_highbd_fdct32x32_rd(src, dst, src_stride);
  322. else
  323. vp9_highbd_fdct32x32(src, dst, src_stride);
  324. }
  325. #endif // CONFIG_VP9_HIGHBITDEPTH
  326. void vp9_xform_quant_fp(MACROBLOCK *x, int plane, int block,
  327. BLOCK_SIZE plane_bsize, TX_SIZE tx_size) {
  328. MACROBLOCKD *const xd = &x->e_mbd;
  329. const struct macroblock_plane *const p = &x->plane[plane];
  330. const struct macroblockd_plane *const pd = &xd->plane[plane];
  331. const scan_order *const scan_order = &vp9_default_scan_orders[tx_size];
  332. tran_low_t *const coeff = BLOCK_OFFSET(p->coeff, block);
  333. tran_low_t *const qcoeff = BLOCK_OFFSET(p->qcoeff, block);
  334. tran_low_t *const dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block);
  335. uint16_t *const eob = &p->eobs[block];
  336. const int diff_stride = 4 * num_4x4_blocks_wide_lookup[plane_bsize];
  337. int i, j;
  338. const int16_t *src_diff;
  339. txfrm_block_to_raster_xy(plane_bsize, tx_size, block, &i, &j);
  340. src_diff = &p->src_diff[4 * (j * diff_stride + i)];
  341. #if CONFIG_VP9_HIGHBITDEPTH
  342. if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
  343. switch (tx_size) {
  344. case TX_32X32:
  345. highbd_fdct32x32(x->use_lp32x32fdct, src_diff, coeff, diff_stride);
  346. vp9_highbd_quantize_fp_32x32(coeff, 1024, x->skip_block, p->zbin,
  347. p->round_fp, p->quant_fp, p->quant_shift,
  348. qcoeff, dqcoeff, pd->dequant,
  349. eob, scan_order->scan,
  350. scan_order->iscan);
  351. break;
  352. case TX_16X16:
  353. vp9_highbd_fdct16x16(src_diff, coeff, diff_stride);
  354. vp9_highbd_quantize_fp(coeff, 256, x->skip_block, p->zbin, p->round_fp,
  355. p->quant_fp, p->quant_shift, qcoeff, dqcoeff,
  356. pd->dequant, eob,
  357. scan_order->scan, scan_order->iscan);
  358. break;
  359. case TX_8X8:
  360. vp9_highbd_fdct8x8(src_diff, coeff, diff_stride);
  361. vp9_highbd_quantize_fp(coeff, 64, x->skip_block, p->zbin, p->round_fp,
  362. p->quant_fp, p->quant_shift, qcoeff, dqcoeff,
  363. pd->dequant, eob,
  364. scan_order->scan, scan_order->iscan);
  365. break;
  366. case TX_4X4:
  367. x->fwd_txm4x4(src_diff, coeff, diff_stride);
  368. vp9_highbd_quantize_fp(coeff, 16, x->skip_block, p->zbin, p->round_fp,
  369. p->quant_fp, p->quant_shift, qcoeff, dqcoeff,
  370. pd->dequant, eob,
  371. scan_order->scan, scan_order->iscan);
  372. break;
  373. default:
  374. assert(0);
  375. }
  376. return;
  377. }
  378. #endif // CONFIG_VP9_HIGHBITDEPTH
  379. switch (tx_size) {
  380. case TX_32X32:
  381. fdct32x32(x->use_lp32x32fdct, src_diff, coeff, diff_stride);
  382. vp9_quantize_fp_32x32(coeff, 1024, x->skip_block, p->zbin, p->round_fp,
  383. p->quant_fp, p->quant_shift, qcoeff, dqcoeff,
  384. pd->dequant, eob, scan_order->scan,
  385. scan_order->iscan);
  386. break;
  387. case TX_16X16:
  388. vp9_fdct16x16(src_diff, coeff, diff_stride);
  389. vp9_quantize_fp(coeff, 256, x->skip_block, p->zbin, p->round_fp,
  390. p->quant_fp, p->quant_shift, qcoeff, dqcoeff,
  391. pd->dequant, eob,
  392. scan_order->scan, scan_order->iscan);
  393. break;
  394. case TX_8X8:
  395. vp9_fdct8x8_quant(src_diff, diff_stride, coeff, 64,
  396. x->skip_block, p->zbin, p->round_fp,
  397. p->quant_fp, p->quant_shift, qcoeff, dqcoeff,
  398. pd->dequant, eob,
  399. scan_order->scan, scan_order->iscan);
  400. break;
  401. case TX_4X4:
  402. x->fwd_txm4x4(src_diff, coeff, diff_stride);
  403. vp9_quantize_fp(coeff, 16, x->skip_block, p->zbin, p->round_fp,
  404. p->quant_fp, p->quant_shift, qcoeff, dqcoeff,
  405. pd->dequant, eob,
  406. scan_order->scan, scan_order->iscan);
  407. break;
  408. default:
  409. assert(0);
  410. break;
  411. }
  412. }
  413. void vp9_xform_quant_dc(MACROBLOCK *x, int plane, int block,
  414. BLOCK_SIZE plane_bsize, TX_SIZE tx_size) {
  415. MACROBLOCKD *const xd = &x->e_mbd;
  416. const struct macroblock_plane *const p = &x->plane[plane];
  417. const struct macroblockd_plane *const pd = &xd->plane[plane];
  418. tran_low_t *const coeff = BLOCK_OFFSET(p->coeff, block);
  419. tran_low_t *const qcoeff = BLOCK_OFFSET(p->qcoeff, block);
  420. tran_low_t *const dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block);
  421. uint16_t *const eob = &p->eobs[block];
  422. const int diff_stride = 4 * num_4x4_blocks_wide_lookup[plane_bsize];
  423. int i, j;
  424. const int16_t *src_diff;
  425. txfrm_block_to_raster_xy(plane_bsize, tx_size, block, &i, &j);
  426. src_diff = &p->src_diff[4 * (j * diff_stride + i)];
  427. #if CONFIG_VP9_HIGHBITDEPTH
  428. if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
  429. switch (tx_size) {
  430. case TX_32X32:
  431. vp9_highbd_fdct32x32_1(src_diff, coeff, diff_stride);
  432. vp9_highbd_quantize_dc_32x32(coeff, x->skip_block, p->round,
  433. p->quant_fp[0], qcoeff, dqcoeff,
  434. pd->dequant[0], eob);
  435. break;
  436. case TX_16X16:
  437. vp9_highbd_fdct16x16_1(src_diff, coeff, diff_stride);
  438. vp9_highbd_quantize_dc(coeff, 256, x->skip_block, p->round,
  439. p->quant_fp[0], qcoeff, dqcoeff,
  440. pd->dequant[0], eob);
  441. break;
  442. case TX_8X8:
  443. vp9_highbd_fdct8x8_1(src_diff, coeff, diff_stride);
  444. vp9_highbd_quantize_dc(coeff, 64, x->skip_block, p->round,
  445. p->quant_fp[0], qcoeff, dqcoeff,
  446. pd->dequant[0], eob);
  447. break;
  448. case TX_4X4:
  449. x->fwd_txm4x4(src_diff, coeff, diff_stride);
  450. vp9_highbd_quantize_dc(coeff, 16, x->skip_block, p->round,
  451. p->quant_fp[0], qcoeff, dqcoeff,
  452. pd->dequant[0], eob);
  453. break;
  454. default:
  455. assert(0);
  456. }
  457. return;
  458. }
  459. #endif // CONFIG_VP9_HIGHBITDEPTH
  460. switch (tx_size) {
  461. case TX_32X32:
  462. vp9_fdct32x32_1(src_diff, coeff, diff_stride);
  463. vp9_quantize_dc_32x32(coeff, x->skip_block, p->round,
  464. p->quant_fp[0], qcoeff, dqcoeff,
  465. pd->dequant[0], eob);
  466. break;
  467. case TX_16X16:
  468. vp9_fdct16x16_1(src_diff, coeff, diff_stride);
  469. vp9_quantize_dc(coeff, 256, x->skip_block, p->round,
  470. p->quant_fp[0], qcoeff, dqcoeff,
  471. pd->dequant[0], eob);
  472. break;
  473. case TX_8X8:
  474. vp9_fdct8x8_1(src_diff, coeff, diff_stride);
  475. vp9_quantize_dc(coeff, 64, x->skip_block, p->round,
  476. p->quant_fp[0], qcoeff, dqcoeff,
  477. pd->dequant[0], eob);
  478. break;
  479. case TX_4X4:
  480. x->fwd_txm4x4(src_diff, coeff, diff_stride);
  481. vp9_quantize_dc(coeff, 16, x->skip_block, p->round,
  482. p->quant_fp[0], qcoeff, dqcoeff,
  483. pd->dequant[0], eob);
  484. break;
  485. default:
  486. assert(0);
  487. break;
  488. }
  489. }
  490. void vp9_xform_quant(MACROBLOCK *x, int plane, int block,
  491. BLOCK_SIZE plane_bsize, TX_SIZE tx_size) {
  492. MACROBLOCKD *const xd = &x->e_mbd;
  493. const struct macroblock_plane *const p = &x->plane[plane];
  494. const struct macroblockd_plane *const pd = &xd->plane[plane];
  495. const scan_order *const scan_order = &vp9_default_scan_orders[tx_size];
  496. tran_low_t *const coeff = BLOCK_OFFSET(p->coeff, block);
  497. tran_low_t *const qcoeff = BLOCK_OFFSET(p->qcoeff, block);
  498. tran_low_t *const dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block);
  499. uint16_t *const eob = &p->eobs[block];
  500. const int diff_stride = 4 * num_4x4_blocks_wide_lookup[plane_bsize];
  501. int i, j;
  502. const int16_t *src_diff;
  503. txfrm_block_to_raster_xy(plane_bsize, tx_size, block, &i, &j);
  504. src_diff = &p->src_diff[4 * (j * diff_stride + i)];
  505. #if CONFIG_VP9_HIGHBITDEPTH
  506. if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
  507. switch (tx_size) {
  508. case TX_32X32:
  509. highbd_fdct32x32(x->use_lp32x32fdct, src_diff, coeff, diff_stride);
  510. vp9_highbd_quantize_b_32x32(coeff, 1024, x->skip_block, p->zbin,
  511. p->round, p->quant, p->quant_shift, qcoeff,
  512. dqcoeff, pd->dequant, eob,
  513. scan_order->scan, scan_order->iscan);
  514. break;
  515. case TX_16X16:
  516. vp9_highbd_fdct16x16(src_diff, coeff, diff_stride);
  517. vp9_highbd_quantize_b(coeff, 256, x->skip_block, p->zbin, p->round,
  518. p->quant, p->quant_shift, qcoeff, dqcoeff,
  519. pd->dequant, eob,
  520. scan_order->scan, scan_order->iscan);
  521. break;
  522. case TX_8X8:
  523. vp9_highbd_fdct8x8(src_diff, coeff, diff_stride);
  524. vp9_highbd_quantize_b(coeff, 64, x->skip_block, p->zbin, p->round,
  525. p->quant, p->quant_shift, qcoeff, dqcoeff,
  526. pd->dequant, eob,
  527. scan_order->scan, scan_order->iscan);
  528. break;
  529. case TX_4X4:
  530. x->fwd_txm4x4(src_diff, coeff, diff_stride);
  531. vp9_highbd_quantize_b(coeff, 16, x->skip_block, p->zbin, p->round,
  532. p->quant, p->quant_shift, qcoeff, dqcoeff,
  533. pd->dequant, eob,
  534. scan_order->scan, scan_order->iscan);
  535. break;
  536. default:
  537. assert(0);
  538. }
  539. return;
  540. }
  541. #endif // CONFIG_VP9_HIGHBITDEPTH
  542. switch (tx_size) {
  543. case TX_32X32:
  544. fdct32x32(x->use_lp32x32fdct, src_diff, coeff, diff_stride);
  545. vp9_quantize_b_32x32(coeff, 1024, x->skip_block, p->zbin, p->round,
  546. p->quant, p->quant_shift, qcoeff, dqcoeff,
  547. pd->dequant, eob, scan_order->scan,
  548. scan_order->iscan);
  549. break;
  550. case TX_16X16:
  551. vp9_fdct16x16(src_diff, coeff, diff_stride);
  552. vp9_quantize_b(coeff, 256, x->skip_block, p->zbin, p->round,
  553. p->quant, p->quant_shift, qcoeff, dqcoeff,
  554. pd->dequant, eob,
  555. scan_order->scan, scan_order->iscan);
  556. break;
  557. case TX_8X8:
  558. vp9_fdct8x8(src_diff, coeff, diff_stride);
  559. vp9_quantize_b(coeff, 64, x->skip_block, p->zbin, p->round,
  560. p->quant, p->quant_shift, qcoeff, dqcoeff,
  561. pd->dequant, eob,
  562. scan_order->scan, scan_order->iscan);
  563. break;
  564. case TX_4X4:
  565. x->fwd_txm4x4(src_diff, coeff, diff_stride);
  566. vp9_quantize_b(coeff, 16, x->skip_block, p->zbin, p->round,
  567. p->quant, p->quant_shift, qcoeff, dqcoeff,
  568. pd->dequant, eob,
  569. scan_order->scan, scan_order->iscan);
  570. break;
  571. default:
  572. assert(0);
  573. break;
  574. }
  575. }
  576. static void encode_block(int plane, int block, BLOCK_SIZE plane_bsize,
  577. TX_SIZE tx_size, void *arg) {
  578. struct encode_b_args *const args = arg;
  579. MACROBLOCK *const x = args->x;
  580. MACROBLOCKD *const xd = &x->e_mbd;
  581. struct optimize_ctx *const ctx = args->ctx;
  582. struct macroblock_plane *const p = &x->plane[plane];
  583. struct macroblockd_plane *const pd = &xd->plane[plane];
  584. tran_low_t *const dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block);
  585. int i, j;
  586. uint8_t *dst;
  587. ENTROPY_CONTEXT *a, *l;
  588. txfrm_block_to_raster_xy(plane_bsize, tx_size, block, &i, &j);
  589. dst = &pd->dst.buf[4 * j * pd->dst.stride + 4 * i];
  590. a = &ctx->ta[plane][i];
  591. l = &ctx->tl[plane][j];
  592. // TODO(jingning): per transformed block zero forcing only enabled for
  593. // luma component. will integrate chroma components as well.
  594. if (x->zcoeff_blk[tx_size][block] && plane == 0) {
  595. p->eobs[block] = 0;
  596. *a = *l = 0;
  597. return;
  598. }
  599. if (!x->skip_recode) {
  600. if (x->quant_fp) {
  601. // Encoding process for rtc mode
  602. if (x->skip_txfm[0] == 1 && plane == 0) {
  603. // skip forward transform
  604. p->eobs[block] = 0;
  605. *a = *l = 0;
  606. return;
  607. } else {
  608. vp9_xform_quant_fp(x, plane, block, plane_bsize, tx_size);
  609. }
  610. } else {
  611. if (max_txsize_lookup[plane_bsize] == tx_size) {
  612. int txfm_blk_index = (plane << 2) + (block >> (tx_size << 1));
  613. if (x->skip_txfm[txfm_blk_index] == 0) {
  614. // full forward transform and quantization
  615. vp9_xform_quant(x, plane, block, plane_bsize, tx_size);
  616. } else if (x->skip_txfm[txfm_blk_index]== 2) {
  617. // fast path forward transform and quantization
  618. vp9_xform_quant_dc(x, plane, block, plane_bsize, tx_size);
  619. } else {
  620. // skip forward transform
  621. p->eobs[block] = 0;
  622. *a = *l = 0;
  623. return;
  624. }
  625. } else {
  626. vp9_xform_quant(x, plane, block, plane_bsize, tx_size);
  627. }
  628. }
  629. }
  630. if (x->optimize && (!x->skip_recode || !x->skip_optimize)) {
  631. const int ctx = combine_entropy_contexts(*a, *l);
  632. *a = *l = optimize_b(x, plane, block, tx_size, ctx) > 0;
  633. } else {
  634. *a = *l = p->eobs[block] > 0;
  635. }
  636. if (p->eobs[block])
  637. *(args->skip) = 0;
  638. if (x->skip_encode || p->eobs[block] == 0)
  639. return;
  640. #if CONFIG_VP9_HIGHBITDEPTH
  641. if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
  642. switch (tx_size) {
  643. case TX_32X32:
  644. vp9_highbd_idct32x32_add(dqcoeff, dst, pd->dst.stride,
  645. p->eobs[block], xd->bd);
  646. break;
  647. case TX_16X16:
  648. vp9_highbd_idct16x16_add(dqcoeff, dst, pd->dst.stride,
  649. p->eobs[block], xd->bd);
  650. break;
  651. case TX_8X8:
  652. vp9_highbd_idct8x8_add(dqcoeff, dst, pd->dst.stride,
  653. p->eobs[block], xd->bd);
  654. break;
  655. case TX_4X4:
  656. // this is like vp9_short_idct4x4 but has a special case around eob<=1
  657. // which is significant (not just an optimization) for the lossless
  658. // case.
  659. x->highbd_itxm_add(dqcoeff, dst, pd->dst.stride,
  660. p->eobs[block], xd->bd);
  661. break;
  662. default:
  663. assert(0 && "Invalid transform size");
  664. }
  665. return;
  666. }
  667. #endif // CONFIG_VP9_HIGHBITDEPTH
  668. switch (tx_size) {
  669. case TX_32X32:
  670. vp9_idct32x32_add(dqcoeff, dst, pd->dst.stride, p->eobs[block]);
  671. break;
  672. case TX_16X16:
  673. vp9_idct16x16_add(dqcoeff, dst, pd->dst.stride, p->eobs[block]);
  674. break;
  675. case TX_8X8:
  676. vp9_idct8x8_add(dqcoeff, dst, pd->dst.stride, p->eobs[block]);
  677. break;
  678. case TX_4X4:
  679. // this is like vp9_short_idct4x4 but has a special case around eob<=1
  680. // which is significant (not just an optimization) for the lossless
  681. // case.
  682. x->itxm_add(dqcoeff, dst, pd->dst.stride, p->eobs[block]);
  683. break;
  684. default:
  685. assert(0 && "Invalid transform size");
  686. break;
  687. }
  688. }
  689. static void encode_block_pass1(int plane, int block, BLOCK_SIZE plane_bsize,
  690. TX_SIZE tx_size, void *arg) {
  691. MACROBLOCK *const x = (MACROBLOCK *)arg;
  692. MACROBLOCKD *const xd = &x->e_mbd;
  693. struct macroblock_plane *const p = &x->plane[plane];
  694. struct macroblockd_plane *const pd = &xd->plane[plane];
  695. tran_low_t *const dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block);
  696. int i, j;
  697. uint8_t *dst;
  698. txfrm_block_to_raster_xy(plane_bsize, tx_size, block, &i, &j);
  699. dst = &pd->dst.buf[4 * j * pd->dst.stride + 4 * i];
  700. vp9_xform_quant(x, plane, block, plane_bsize, tx_size);
  701. if (p->eobs[block] > 0) {
  702. #if CONFIG_VP9_HIGHBITDEPTH
  703. if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
  704. x->highbd_itxm_add(dqcoeff, dst, pd->dst.stride, p->eobs[block], xd->bd);
  705. return;
  706. }
  707. #endif // CONFIG_VP9_HIGHBITDEPTH
  708. x->itxm_add(dqcoeff, dst, pd->dst.stride, p->eobs[block]);
  709. }
  710. }
  711. void vp9_encode_sby_pass1(MACROBLOCK *x, BLOCK_SIZE bsize) {
  712. vp9_subtract_plane(x, bsize, 0);
  713. vp9_foreach_transformed_block_in_plane(&x->e_mbd, bsize, 0,
  714. encode_block_pass1, x);
  715. }
  716. void vp9_encode_sb(MACROBLOCK *x, BLOCK_SIZE bsize) {
  717. MACROBLOCKD *const xd = &x->e_mbd;
  718. struct optimize_ctx ctx;
  719. MB_MODE_INFO *mbmi = &xd->mi[0]->mbmi;
  720. struct encode_b_args arg = {x, &ctx, &mbmi->skip};
  721. int plane;
  722. mbmi->skip = 1;
  723. if (x->skip)
  724. return;
  725. for (plane = 0; plane < MAX_MB_PLANE; ++plane) {
  726. if (!x->skip_recode)
  727. vp9_subtract_plane(x, bsize, plane);
  728. if (x->optimize && (!x->skip_recode || !x->skip_optimize)) {
  729. const struct macroblockd_plane* const pd = &xd->plane[plane];
  730. const TX_SIZE tx_size = plane ? get_uv_tx_size(mbmi, pd) : mbmi->tx_size;
  731. vp9_get_entropy_contexts(bsize, tx_size, pd,
  732. ctx.ta[plane], ctx.tl[plane]);
  733. }
  734. vp9_foreach_transformed_block_in_plane(xd, bsize, plane, encode_block,
  735. &arg);
  736. }
  737. }
  738. void vp9_encode_block_intra(int plane, int block, BLOCK_SIZE plane_bsize,
  739. TX_SIZE tx_size, void *arg) {
  740. struct encode_b_args* const args = arg;
  741. MACROBLOCK *const x = args->x;
  742. MACROBLOCKD *const xd = &x->e_mbd;
  743. MB_MODE_INFO *mbmi = &xd->mi[0]->mbmi;
  744. struct macroblock_plane *const p = &x->plane[plane];
  745. struct macroblockd_plane *const pd = &xd->plane[plane];
  746. tran_low_t *coeff = BLOCK_OFFSET(p->coeff, block);
  747. tran_low_t *qcoeff = BLOCK_OFFSET(p->qcoeff, block);
  748. tran_low_t *dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block);
  749. const scan_order *scan_order;
  750. TX_TYPE tx_type;
  751. PREDICTION_MODE mode;
  752. const int bwl = b_width_log2_lookup[plane_bsize];
  753. const int diff_stride = 4 * (1 << bwl);
  754. uint8_t *src, *dst;
  755. int16_t *src_diff;
  756. uint16_t *eob = &p->eobs[block];
  757. const int src_stride = p->src.stride;
  758. const int dst_stride = pd->dst.stride;
  759. int i, j;
  760. txfrm_block_to_raster_xy(plane_bsize, tx_size, block, &i, &j);
  761. dst = &pd->dst.buf[4 * (j * dst_stride + i)];
  762. src = &p->src.buf[4 * (j * src_stride + i)];
  763. src_diff = &p->src_diff[4 * (j * diff_stride + i)];
  764. #if CONFIG_VP9_HIGHBITDEPTH
  765. if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
  766. switch (tx_size) {
  767. case TX_32X32:
  768. scan_order = &vp9_default_scan_orders[TX_32X32];
  769. mode = plane == 0 ? mbmi->mode : mbmi->uv_mode;
  770. vp9_predict_intra_block(xd, block >> 6, bwl, TX_32X32, mode,
  771. x->skip_encode ? src : dst,
  772. x->skip_encode ? src_stride : dst_stride,
  773. dst, dst_stride, i, j, plane);
  774. if (!x->skip_recode) {
  775. vp9_highbd_subtract_block(32, 32, src_diff, diff_stride,
  776. src, src_stride, dst, dst_stride, xd->bd);
  777. highbd_fdct32x32(x->use_lp32x32fdct, src_diff, coeff, diff_stride);
  778. vp9_highbd_quantize_b_32x32(coeff, 1024, x->skip_block, p->zbin,
  779. p->round, p->quant, p->quant_shift,
  780. qcoeff, dqcoeff, pd->dequant, eob,
  781. scan_order->scan, scan_order->iscan);
  782. }
  783. if (!x->skip_encode && *eob) {
  784. vp9_highbd_idct32x32_add(dqcoeff, dst, dst_stride, *eob, xd->bd);
  785. }
  786. break;
  787. case TX_16X16:
  788. tx_type = get_tx_type(pd->plane_type, xd);
  789. scan_order = &vp9_scan_orders[TX_16X16][tx_type];
  790. mode = plane == 0 ? mbmi->mode : mbmi->uv_mode;
  791. vp9_predict_intra_block(xd, block >> 4, bwl, TX_16X16, mode,
  792. x->skip_encode ? src : dst,
  793. x->skip_encode ? src_stride : dst_stride,
  794. dst, dst_stride, i, j, plane);
  795. if (!x->skip_recode) {
  796. vp9_highbd_subtract_block(16, 16, src_diff, diff_stride,
  797. src, src_stride, dst, dst_stride, xd->bd);
  798. vp9_highbd_fht16x16(src_diff, coeff, diff_stride, tx_type);
  799. vp9_highbd_quantize_b(coeff, 256, x->skip_block, p->zbin, p->round,
  800. p->quant, p->quant_shift, qcoeff, dqcoeff,
  801. pd->dequant, eob,
  802. scan_order->scan, scan_order->iscan);
  803. }
  804. if (!x->skip_encode && *eob) {
  805. vp9_highbd_iht16x16_add(tx_type, dqcoeff, dst, dst_stride,
  806. *eob, xd->bd);
  807. }
  808. break;
  809. case TX_8X8:
  810. tx_type = get_tx_type(pd->plane_type, xd);
  811. scan_order = &vp9_scan_orders[TX_8X8][tx_type];
  812. mode = plane == 0 ? mbmi->mode : mbmi->uv_mode;
  813. vp9_predict_intra_block(xd, block >> 2, bwl, TX_8X8, mode,
  814. x->skip_encode ? src : dst,
  815. x->skip_encode ? src_stride : dst_stride,
  816. dst, dst_stride, i, j, plane);
  817. if (!x->skip_recode) {
  818. vp9_highbd_subtract_block(8, 8, src_diff, diff_stride,
  819. src, src_stride, dst, dst_stride, xd->bd);
  820. vp9_highbd_fht8x8(src_diff, coeff, diff_stride, tx_type);
  821. vp9_highbd_quantize_b(coeff, 64, x->skip_block, p->zbin, p->round,
  822. p->quant, p->quant_shift, qcoeff, dqcoeff,
  823. pd->dequant, eob,
  824. scan_order->scan, scan_order->iscan);
  825. }
  826. if (!x->skip_encode && *eob) {
  827. vp9_highbd_iht8x8_add(tx_type, dqcoeff, dst, dst_stride, *eob,
  828. xd->bd);
  829. }
  830. break;
  831. case TX_4X4:
  832. tx_type = get_tx_type_4x4(pd->plane_type, xd, block);
  833. scan_order = &vp9_scan_orders[TX_4X4][tx_type];
  834. mode = plane == 0 ? get_y_mode(xd->mi[0], block) : mbmi->uv_mode;
  835. vp9_predict_intra_block(xd, block, bwl, TX_4X4, mode,
  836. x->skip_encode ? src : dst,
  837. x->skip_encode ? src_stride : dst_stride,
  838. dst, dst_stride, i, j, plane);
  839. if (!x->skip_recode) {
  840. vp9_highbd_subtract_block(4, 4, src_diff, diff_stride,
  841. src, src_stride, dst, dst_stride, xd->bd);
  842. if (tx_type != DCT_DCT)
  843. vp9_highbd_fht4x4(src_diff, coeff, diff_stride, tx_type);
  844. else
  845. x->fwd_txm4x4(src_diff, coeff, diff_stride);
  846. vp9_highbd_quantize_b(coeff, 16, x->skip_block, p->zbin, p->round,
  847. p->quant, p->quant_shift, qcoeff, dqcoeff,
  848. pd->dequant, eob,
  849. scan_order->scan, scan_order->iscan);
  850. }
  851. if (!x->skip_encode && *eob) {
  852. if (tx_type == DCT_DCT) {
  853. // this is like vp9_short_idct4x4 but has a special case around
  854. // eob<=1 which is significant (not just an optimization) for the
  855. // lossless case.
  856. x->highbd_itxm_add(dqcoeff, dst, dst_stride, *eob, xd->bd);
  857. } else {
  858. vp9_highbd_iht4x4_16_add(dqcoeff, dst, dst_stride, tx_type, xd->bd);
  859. }
  860. }
  861. break;
  862. default:
  863. assert(0);
  864. return;
  865. }
  866. if (*eob)
  867. *(args->skip) = 0;
  868. return;
  869. }
  870. #endif // CONFIG_VP9_HIGHBITDEPTH
  871. switch (tx_size) {
  872. case TX_32X32:
  873. scan_order = &vp9_default_scan_orders[TX_32X32];
  874. mode = plane == 0 ? mbmi->mode : mbmi->uv_mode;
  875. vp9_predict_intra_block(xd, block >> 6, bwl, TX_32X32, mode,
  876. x->skip_encode ? src : dst,
  877. x->skip_encode ? src_stride : dst_stride,
  878. dst, dst_stride, i, j, plane);
  879. if (!x->skip_recode) {
  880. vp9_subtract_block(32, 32, src_diff, diff_stride,
  881. src, src_stride, dst, dst_stride);
  882. fdct32x32(x->use_lp32x32fdct, src_diff, coeff, diff_stride);
  883. vp9_quantize_b_32x32(coeff, 1024, x->skip_block, p->zbin, p->round,
  884. p->quant, p->quant_shift, qcoeff, dqcoeff,
  885. pd->dequant, eob, scan_order->scan,
  886. scan_order->iscan);
  887. }
  888. if (!x->skip_encode && *eob)
  889. vp9_idct32x32_add(dqcoeff, dst, dst_stride, *eob);
  890. break;
  891. case TX_16X16:
  892. tx_type = get_tx_type(pd->plane_type, xd);
  893. scan_order = &vp9_scan_orders[TX_16X16][tx_type];
  894. mode = plane == 0 ? mbmi->mode : mbmi->uv_mode;
  895. vp9_predict_intra_block(xd, block >> 4, bwl, TX_16X16, mode,
  896. x->skip_encode ? src : dst,
  897. x->skip_encode ? src_stride : dst_stride,
  898. dst, dst_stride, i, j, plane);
  899. if (!x->skip_recode) {
  900. vp9_subtract_block(16, 16, src_diff, diff_stride,
  901. src, src_stride, dst, dst_stride);
  902. vp9_fht16x16(src_diff, coeff, diff_stride, tx_type);
  903. vp9_quantize_b(coeff, 256, x->skip_block, p->zbin, p->round,
  904. p->quant, p->quant_shift, qcoeff, dqcoeff,
  905. pd->dequant, eob, scan_order->scan,
  906. scan_order->iscan);
  907. }
  908. if (!x->skip_encode && *eob)
  909. vp9_iht16x16_add(tx_type, dqcoeff, dst, dst_stride, *eob);
  910. break;
  911. case TX_8X8:
  912. tx_type = get_tx_type(pd->plane_type, xd);
  913. scan_order = &vp9_scan_orders[TX_8X8][tx_type];
  914. mode = plane == 0 ? mbmi->mode : mbmi->uv_mode;
  915. vp9_predict_intra_block(xd, block >> 2, bwl, TX_8X8, mode,
  916. x->skip_encode ? src : dst,
  917. x->skip_encode ? src_stride : dst_stride,
  918. dst, dst_stride, i, j, plane);
  919. if (!x->skip_recode) {
  920. vp9_subtract_block(8, 8, src_diff, diff_stride,
  921. src, src_stride, dst, dst_stride);
  922. vp9_fht8x8(src_diff, coeff, diff_stride, tx_type);
  923. vp9_quantize_b(coeff, 64, x->skip_block, p->zbin, p->round, p->quant,
  924. p->quant_shift, qcoeff, dqcoeff,
  925. pd->dequant, eob, scan_order->scan,
  926. scan_order->iscan);
  927. }
  928. if (!x->skip_encode && *eob)
  929. vp9_iht8x8_add(tx_type, dqcoeff, dst, dst_stride, *eob);
  930. break;
  931. case TX_4X4:
  932. tx_type = get_tx_type_4x4(pd->plane_type, xd, block);
  933. scan_order = &vp9_scan_orders[TX_4X4][tx_type];
  934. mode = plane == 0 ? get_y_mode(xd->mi[0], block) : mbmi->uv_mode;
  935. vp9_predict_intra_block(xd, block, bwl, TX_4X4, mode,
  936. x->skip_encode ? src : dst,
  937. x->skip_encode ? src_stride : dst_stride,
  938. dst, dst_stride, i, j, plane);
  939. if (!x->skip_recode) {
  940. vp9_subtract_block(4, 4, src_diff, diff_stride,
  941. src, src_stride, dst, dst_stride);
  942. if (tx_type != DCT_DCT)
  943. vp9_fht4x4(src_diff, coeff, diff_stride, tx_type);
  944. else
  945. x->fwd_txm4x4(src_diff, coeff, diff_stride);
  946. vp9_quantize_b(coeff, 16, x->skip_block, p->zbin, p->round, p->quant,
  947. p->quant_shift, qcoeff, dqcoeff,
  948. pd->dequant, eob, scan_order->scan,
  949. scan_order->iscan);
  950. }
  951. if (!x->skip_encode && *eob) {
  952. if (tx_type == DCT_DCT)
  953. // this is like vp9_short_idct4x4 but has a special case around eob<=1
  954. // which is significant (not just an optimization) for the lossless
  955. // case.
  956. x->itxm_add(dqcoeff, dst, dst_stride, *eob);
  957. else
  958. vp9_iht4x4_16_add(dqcoeff, dst, dst_stride, tx_type);
  959. }
  960. break;
  961. default:
  962. assert(0);
  963. break;
  964. }
  965. if (*eob)
  966. *(args->skip) = 0;
  967. }
  968. void vp9_encode_intra_block_plane(MACROBLOCK *x, BLOCK_SIZE bsize, int plane) {
  969. const MACROBLOCKD *const xd = &x->e_mbd;
  970. struct encode_b_args arg = {x, NULL, &xd->mi[0]->mbmi.skip};
  971. vp9_foreach_transformed_block_in_plane(xd, bsize, plane,
  972. vp9_encode_block_intra, &arg);
  973. }