vp9_variance.c 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381
  1. /*
  2. * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
  3. *
  4. * Use of this source code is governed by a BSD-style license
  5. * that can be found in the LICENSE file in the root of the source
  6. * tree. An additional intellectual property rights grant can be found
  7. * in the file PATENTS. All contributing project authors may
  8. * be found in the AUTHORS file in the root of the source tree.
  9. */
  10. #include "./vp9_rtcd.h"
  11. #include "./vpx_dsp_rtcd.h"
  12. #include "vpx_ports/mem.h"
  13. #include "vpx/vpx_integer.h"
  14. #include "vp9/common/vp9_common.h"
  15. #include "vp9/common/vp9_filter.h"
  16. #include "vp9/encoder/vp9_variance.h"
  17. static uint8_t bilinear_filters[8][2] = {
  18. { 128, 0, },
  19. { 112, 16, },
  20. { 96, 32, },
  21. { 80, 48, },
  22. { 64, 64, },
  23. { 48, 80, },
  24. { 32, 96, },
  25. { 16, 112, },
  26. };
  27. // Applies a 1-D 2-tap bi-linear filter to the source block in either horizontal
  28. // or vertical direction to produce the filtered output block. Used to implement
  29. // first-pass of 2-D separable filter.
  30. //
  31. // Produces int32_t output to retain precision for next pass. Two filter taps
  32. // should sum to VP9_FILTER_WEIGHT. pixel_step defines whether the filter is
  33. // applied horizontally (pixel_step=1) or vertically (pixel_step=stride). It
  34. // defines the offset required to move from one input to the next.
  35. static void var_filter_block2d_bil_first_pass(const uint8_t *src_ptr,
  36. uint16_t *output_ptr,
  37. unsigned int src_pixels_per_line,
  38. int pixel_step,
  39. unsigned int output_height,
  40. unsigned int output_width,
  41. const uint8_t *vp9_filter) {
  42. unsigned int i, j;
  43. for (i = 0; i < output_height; i++) {
  44. for (j = 0; j < output_width; j++) {
  45. output_ptr[j] = ROUND_POWER_OF_TWO((int)src_ptr[0] * vp9_filter[0] +
  46. (int)src_ptr[pixel_step] * vp9_filter[1],
  47. FILTER_BITS);
  48. src_ptr++;
  49. }
  50. // Next row...
  51. src_ptr += src_pixels_per_line - output_width;
  52. output_ptr += output_width;
  53. }
  54. }
  55. // Applies a 1-D 2-tap bi-linear filter to the source block in either horizontal
  56. // or vertical direction to produce the filtered output block. Used to implement
  57. // second-pass of 2-D separable filter.
  58. //
  59. // Requires 32-bit input as produced by filter_block2d_bil_first_pass. Two
  60. // filter taps should sum to VP9_FILTER_WEIGHT. pixel_step defines whether the
  61. // filter is applied horizontally (pixel_step=1) or vertically (pixel_step=
  62. // stride). It defines the offset required to move from one input to the next.
  63. static void var_filter_block2d_bil_second_pass(const uint16_t *src_ptr,
  64. uint8_t *output_ptr,
  65. unsigned int src_pixels_per_line,
  66. unsigned int pixel_step,
  67. unsigned int output_height,
  68. unsigned int output_width,
  69. const uint8_t *vp9_filter) {
  70. unsigned int i, j;
  71. for (i = 0; i < output_height; i++) {
  72. for (j = 0; j < output_width; j++) {
  73. output_ptr[j] = ROUND_POWER_OF_TWO((int)src_ptr[0] * vp9_filter[0] +
  74. (int)src_ptr[pixel_step] * vp9_filter[1],
  75. FILTER_BITS);
  76. src_ptr++;
  77. }
  78. src_ptr += src_pixels_per_line - output_width;
  79. output_ptr += output_width;
  80. }
  81. }
  82. #define SUBPIX_VAR(W, H) \
  83. unsigned int vp9_sub_pixel_variance##W##x##H##_c( \
  84. const uint8_t *src, int src_stride, \
  85. int xoffset, int yoffset, \
  86. const uint8_t *dst, int dst_stride, \
  87. unsigned int *sse) { \
  88. uint16_t fdata3[(H + 1) * W]; \
  89. uint8_t temp2[H * W]; \
  90. \
  91. var_filter_block2d_bil_first_pass(src, fdata3, src_stride, 1, H + 1, W, \
  92. bilinear_filters[xoffset]); \
  93. var_filter_block2d_bil_second_pass(fdata3, temp2, W, W, H, W, \
  94. bilinear_filters[yoffset]); \
  95. \
  96. return vpx_variance##W##x##H##_c(temp2, W, dst, dst_stride, sse); \
  97. }
  98. #define SUBPIX_AVG_VAR(W, H) \
  99. unsigned int vp9_sub_pixel_avg_variance##W##x##H##_c( \
  100. const uint8_t *src, int src_stride, \
  101. int xoffset, int yoffset, \
  102. const uint8_t *dst, int dst_stride, \
  103. unsigned int *sse, \
  104. const uint8_t *second_pred) { \
  105. uint16_t fdata3[(H + 1) * W]; \
  106. uint8_t temp2[H * W]; \
  107. DECLARE_ALIGNED(16, uint8_t, temp3[H * W]); \
  108. \
  109. var_filter_block2d_bil_first_pass(src, fdata3, src_stride, 1, H + 1, W, \
  110. bilinear_filters[xoffset]); \
  111. var_filter_block2d_bil_second_pass(fdata3, temp2, W, W, H, W, \
  112. bilinear_filters[yoffset]); \
  113. \
  114. vpx_comp_avg_pred(temp3, second_pred, W, H, temp2, W); \
  115. \
  116. return vpx_variance##W##x##H##_c(temp3, W, dst, dst_stride, sse); \
  117. }
  118. SUBPIX_VAR(4, 4)
  119. SUBPIX_AVG_VAR(4, 4)
  120. SUBPIX_VAR(4, 8)
  121. SUBPIX_AVG_VAR(4, 8)
  122. SUBPIX_VAR(8, 4)
  123. SUBPIX_AVG_VAR(8, 4)
  124. SUBPIX_VAR(8, 8)
  125. SUBPIX_AVG_VAR(8, 8)
  126. SUBPIX_VAR(8, 16)
  127. SUBPIX_AVG_VAR(8, 16)
  128. SUBPIX_VAR(16, 8)
  129. SUBPIX_AVG_VAR(16, 8)
  130. SUBPIX_VAR(16, 16)
  131. SUBPIX_AVG_VAR(16, 16)
  132. SUBPIX_VAR(16, 32)
  133. SUBPIX_AVG_VAR(16, 32)
  134. SUBPIX_VAR(32, 16)
  135. SUBPIX_AVG_VAR(32, 16)
  136. SUBPIX_VAR(32, 32)
  137. SUBPIX_AVG_VAR(32, 32)
  138. SUBPIX_VAR(32, 64)
  139. SUBPIX_AVG_VAR(32, 64)
  140. SUBPIX_VAR(64, 32)
  141. SUBPIX_AVG_VAR(64, 32)
  142. SUBPIX_VAR(64, 64)
  143. SUBPIX_AVG_VAR(64, 64)
  144. #if CONFIG_VP9_HIGHBITDEPTH
  145. static void highbd_var_filter_block2d_bil_first_pass(
  146. const uint8_t *src_ptr8,
  147. uint16_t *output_ptr,
  148. unsigned int src_pixels_per_line,
  149. int pixel_step,
  150. unsigned int output_height,
  151. unsigned int output_width,
  152. const uint8_t *vp9_filter) {
  153. unsigned int i, j;
  154. uint16_t *src_ptr = CONVERT_TO_SHORTPTR(src_ptr8);
  155. for (i = 0; i < output_height; i++) {
  156. for (j = 0; j < output_width; j++) {
  157. output_ptr[j] =
  158. ROUND_POWER_OF_TWO((int)src_ptr[0] * vp9_filter[0] +
  159. (int)src_ptr[pixel_step] * vp9_filter[1],
  160. FILTER_BITS);
  161. src_ptr++;
  162. }
  163. // Next row...
  164. src_ptr += src_pixels_per_line - output_width;
  165. output_ptr += output_width;
  166. }
  167. }
  168. static void highbd_var_filter_block2d_bil_second_pass(
  169. const uint16_t *src_ptr,
  170. uint16_t *output_ptr,
  171. unsigned int src_pixels_per_line,
  172. unsigned int pixel_step,
  173. unsigned int output_height,
  174. unsigned int output_width,
  175. const uint8_t *vp9_filter) {
  176. unsigned int i, j;
  177. for (i = 0; i < output_height; i++) {
  178. for (j = 0; j < output_width; j++) {
  179. output_ptr[j] =
  180. ROUND_POWER_OF_TWO((int)src_ptr[0] * vp9_filter[0] +
  181. (int)src_ptr[pixel_step] * vp9_filter[1],
  182. FILTER_BITS);
  183. src_ptr++;
  184. }
  185. src_ptr += src_pixels_per_line - output_width;
  186. output_ptr += output_width;
  187. }
  188. }
  189. #define HIGHBD_SUBPIX_VAR(W, H) \
  190. unsigned int vp9_highbd_sub_pixel_variance##W##x##H##_c( \
  191. const uint8_t *src, int src_stride, \
  192. int xoffset, int yoffset, \
  193. const uint8_t *dst, int dst_stride, \
  194. unsigned int *sse) { \
  195. uint16_t fdata3[(H + 1) * W]; \
  196. uint16_t temp2[H * W]; \
  197. \
  198. highbd_var_filter_block2d_bil_first_pass(src, fdata3, src_stride, 1, H + 1, \
  199. W, bilinear_filters[xoffset]); \
  200. highbd_var_filter_block2d_bil_second_pass(fdata3, temp2, W, W, H, W, \
  201. bilinear_filters[yoffset]); \
  202. \
  203. return vpx_highbd_8_variance##W##x##H##_c(CONVERT_TO_BYTEPTR(temp2), W, dst, \
  204. dst_stride, sse); \
  205. } \
  206. \
  207. unsigned int vp9_highbd_10_sub_pixel_variance##W##x##H##_c( \
  208. const uint8_t *src, int src_stride, \
  209. int xoffset, int yoffset, \
  210. const uint8_t *dst, int dst_stride, \
  211. unsigned int *sse) { \
  212. uint16_t fdata3[(H + 1) * W]; \
  213. uint16_t temp2[H * W]; \
  214. \
  215. highbd_var_filter_block2d_bil_first_pass(src, fdata3, src_stride, 1, H + 1, \
  216. W, bilinear_filters[xoffset]); \
  217. highbd_var_filter_block2d_bil_second_pass(fdata3, temp2, W, W, H, W, \
  218. bilinear_filters[yoffset]); \
  219. \
  220. return vpx_highbd_10_variance##W##x##H##_c(CONVERT_TO_BYTEPTR(temp2), \
  221. W, dst, dst_stride, sse); \
  222. } \
  223. \
  224. unsigned int vp9_highbd_12_sub_pixel_variance##W##x##H##_c( \
  225. const uint8_t *src, int src_stride, \
  226. int xoffset, int yoffset, \
  227. const uint8_t *dst, int dst_stride, \
  228. unsigned int *sse) { \
  229. uint16_t fdata3[(H + 1) * W]; \
  230. uint16_t temp2[H * W]; \
  231. \
  232. highbd_var_filter_block2d_bil_first_pass(src, fdata3, src_stride, 1, H + 1, \
  233. W, bilinear_filters[xoffset]); \
  234. highbd_var_filter_block2d_bil_second_pass(fdata3, temp2, W, W, H, W, \
  235. bilinear_filters[yoffset]); \
  236. \
  237. return vpx_highbd_12_variance##W##x##H##_c(CONVERT_TO_BYTEPTR(temp2), \
  238. W, dst, dst_stride, sse); \
  239. }
  240. #define HIGHBD_SUBPIX_AVG_VAR(W, H) \
  241. unsigned int vp9_highbd_sub_pixel_avg_variance##W##x##H##_c( \
  242. const uint8_t *src, int src_stride, \
  243. int xoffset, int yoffset, \
  244. const uint8_t *dst, int dst_stride, \
  245. unsigned int *sse, \
  246. const uint8_t *second_pred) { \
  247. uint16_t fdata3[(H + 1) * W]; \
  248. uint16_t temp2[H * W]; \
  249. DECLARE_ALIGNED(16, uint16_t, temp3[H * W]); \
  250. \
  251. highbd_var_filter_block2d_bil_first_pass(src, fdata3, src_stride, 1, H + 1, \
  252. W, bilinear_filters[xoffset]); \
  253. highbd_var_filter_block2d_bil_second_pass(fdata3, temp2, W, W, H, W, \
  254. bilinear_filters[yoffset]); \
  255. \
  256. vpx_highbd_comp_avg_pred(temp3, second_pred, W, H, \
  257. CONVERT_TO_BYTEPTR(temp2), W); \
  258. \
  259. return vpx_highbd_8_variance##W##x##H##_c(CONVERT_TO_BYTEPTR(temp3), W, dst, \
  260. dst_stride, sse); \
  261. } \
  262. \
  263. unsigned int vp9_highbd_10_sub_pixel_avg_variance##W##x##H##_c( \
  264. const uint8_t *src, int src_stride, \
  265. int xoffset, int yoffset, \
  266. const uint8_t *dst, int dst_stride, \
  267. unsigned int *sse, \
  268. const uint8_t *second_pred) { \
  269. uint16_t fdata3[(H + 1) * W]; \
  270. uint16_t temp2[H * W]; \
  271. DECLARE_ALIGNED(16, uint16_t, temp3[H * W]); \
  272. \
  273. highbd_var_filter_block2d_bil_first_pass(src, fdata3, src_stride, 1, H + 1, \
  274. W, bilinear_filters[xoffset]); \
  275. highbd_var_filter_block2d_bil_second_pass(fdata3, temp2, W, W, H, W, \
  276. bilinear_filters[yoffset]); \
  277. \
  278. vpx_highbd_comp_avg_pred(temp3, second_pred, W, H, \
  279. CONVERT_TO_BYTEPTR(temp2), W); \
  280. \
  281. return vpx_highbd_10_variance##W##x##H##_c(CONVERT_TO_BYTEPTR(temp3), \
  282. W, dst, dst_stride, sse); \
  283. } \
  284. \
  285. unsigned int vp9_highbd_12_sub_pixel_avg_variance##W##x##H##_c( \
  286. const uint8_t *src, int src_stride, \
  287. int xoffset, int yoffset, \
  288. const uint8_t *dst, int dst_stride, \
  289. unsigned int *sse, \
  290. const uint8_t *second_pred) { \
  291. uint16_t fdata3[(H + 1) * W]; \
  292. uint16_t temp2[H * W]; \
  293. DECLARE_ALIGNED(16, uint16_t, temp3[H * W]); \
  294. \
  295. highbd_var_filter_block2d_bil_first_pass(src, fdata3, src_stride, 1, H + 1, \
  296. W, bilinear_filters[xoffset]); \
  297. highbd_var_filter_block2d_bil_second_pass(fdata3, temp2, W, W, H, W, \
  298. bilinear_filters[yoffset]); \
  299. \
  300. vpx_highbd_comp_avg_pred(temp3, second_pred, W, H, \
  301. CONVERT_TO_BYTEPTR(temp2), W); \
  302. \
  303. return vpx_highbd_12_variance##W##x##H##_c(CONVERT_TO_BYTEPTR(temp3), \
  304. W, dst, dst_stride, sse); \
  305. }
  306. HIGHBD_SUBPIX_VAR(4, 4)
  307. HIGHBD_SUBPIX_AVG_VAR(4, 4)
  308. HIGHBD_SUBPIX_VAR(4, 8)
  309. HIGHBD_SUBPIX_AVG_VAR(4, 8)
  310. HIGHBD_SUBPIX_VAR(8, 4)
  311. HIGHBD_SUBPIX_AVG_VAR(8, 4)
  312. HIGHBD_SUBPIX_VAR(8, 8)
  313. HIGHBD_SUBPIX_AVG_VAR(8, 8)
  314. HIGHBD_SUBPIX_VAR(8, 16)
  315. HIGHBD_SUBPIX_AVG_VAR(8, 16)
  316. HIGHBD_SUBPIX_VAR(16, 8)
  317. HIGHBD_SUBPIX_AVG_VAR(16, 8)
  318. HIGHBD_SUBPIX_VAR(16, 16)
  319. HIGHBD_SUBPIX_AVG_VAR(16, 16)
  320. HIGHBD_SUBPIX_VAR(16, 32)
  321. HIGHBD_SUBPIX_AVG_VAR(16, 32)
  322. HIGHBD_SUBPIX_VAR(32, 16)
  323. HIGHBD_SUBPIX_AVG_VAR(32, 16)
  324. HIGHBD_SUBPIX_VAR(32, 32)
  325. HIGHBD_SUBPIX_AVG_VAR(32, 32)
  326. HIGHBD_SUBPIX_VAR(32, 64)
  327. HIGHBD_SUBPIX_AVG_VAR(32, 64)
  328. HIGHBD_SUBPIX_VAR(64, 32)
  329. HIGHBD_SUBPIX_AVG_VAR(64, 32)
  330. HIGHBD_SUBPIX_VAR(64, 64)
  331. HIGHBD_SUBPIX_AVG_VAR(64, 64)
  332. #endif // CONFIG_VP9_HIGHBITDEPTH