vp9_convolve.c 21 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558
  1. /*
  2. * Copyright (c) 2013 The WebM project authors. All Rights Reserved.
  3. *
  4. * Use of this source code is governed by a BSD-style license
  5. * that can be found in the LICENSE file in the root of the source
  6. * tree. An additional intellectual property rights grant can be found
  7. * in the file PATENTS. All contributing project authors may
  8. * be found in the AUTHORS file in the root of the source tree.
  9. */
  10. #include <assert.h>
  11. #include "./vpx_config.h"
  12. #include "./vp9_rtcd.h"
  13. #include "vp9/common/vp9_common.h"
  14. #include "vp9/common/vp9_convolve.h"
  15. #include "vp9/common/vp9_filter.h"
  16. #include "vpx/vpx_integer.h"
  17. #include "vpx_ports/mem.h"
  18. static void convolve_horiz(const uint8_t *src, ptrdiff_t src_stride,
  19. uint8_t *dst, ptrdiff_t dst_stride,
  20. const InterpKernel *x_filters,
  21. int x0_q4, int x_step_q4, int w, int h) {
  22. int x, y;
  23. src -= SUBPEL_TAPS / 2 - 1;
  24. for (y = 0; y < h; ++y) {
  25. int x_q4 = x0_q4;
  26. for (x = 0; x < w; ++x) {
  27. const uint8_t *const src_x = &src[x_q4 >> SUBPEL_BITS];
  28. const int16_t *const x_filter = x_filters[x_q4 & SUBPEL_MASK];
  29. int k, sum = 0;
  30. for (k = 0; k < SUBPEL_TAPS; ++k)
  31. sum += src_x[k] * x_filter[k];
  32. dst[x] = clip_pixel(ROUND_POWER_OF_TWO(sum, FILTER_BITS));
  33. x_q4 += x_step_q4;
  34. }
  35. src += src_stride;
  36. dst += dst_stride;
  37. }
  38. }
  39. static void convolve_avg_horiz(const uint8_t *src, ptrdiff_t src_stride,
  40. uint8_t *dst, ptrdiff_t dst_stride,
  41. const InterpKernel *x_filters,
  42. int x0_q4, int x_step_q4, int w, int h) {
  43. int x, y;
  44. src -= SUBPEL_TAPS / 2 - 1;
  45. for (y = 0; y < h; ++y) {
  46. int x_q4 = x0_q4;
  47. for (x = 0; x < w; ++x) {
  48. const uint8_t *const src_x = &src[x_q4 >> SUBPEL_BITS];
  49. const int16_t *const x_filter = x_filters[x_q4 & SUBPEL_MASK];
  50. int k, sum = 0;
  51. for (k = 0; k < SUBPEL_TAPS; ++k)
  52. sum += src_x[k] * x_filter[k];
  53. dst[x] = ROUND_POWER_OF_TWO(dst[x] +
  54. clip_pixel(ROUND_POWER_OF_TWO(sum, FILTER_BITS)), 1);
  55. x_q4 += x_step_q4;
  56. }
  57. src += src_stride;
  58. dst += dst_stride;
  59. }
  60. }
  61. static void convolve_vert(const uint8_t *src, ptrdiff_t src_stride,
  62. uint8_t *dst, ptrdiff_t dst_stride,
  63. const InterpKernel *y_filters,
  64. int y0_q4, int y_step_q4, int w, int h) {
  65. int x, y;
  66. src -= src_stride * (SUBPEL_TAPS / 2 - 1);
  67. for (x = 0; x < w; ++x) {
  68. int y_q4 = y0_q4;
  69. for (y = 0; y < h; ++y) {
  70. const unsigned char *src_y = &src[(y_q4 >> SUBPEL_BITS) * src_stride];
  71. const int16_t *const y_filter = y_filters[y_q4 & SUBPEL_MASK];
  72. int k, sum = 0;
  73. for (k = 0; k < SUBPEL_TAPS; ++k)
  74. sum += src_y[k * src_stride] * y_filter[k];
  75. dst[y * dst_stride] = clip_pixel(ROUND_POWER_OF_TWO(sum, FILTER_BITS));
  76. y_q4 += y_step_q4;
  77. }
  78. ++src;
  79. ++dst;
  80. }
  81. }
  82. static void convolve_avg_vert(const uint8_t *src, ptrdiff_t src_stride,
  83. uint8_t *dst, ptrdiff_t dst_stride,
  84. const InterpKernel *y_filters,
  85. int y0_q4, int y_step_q4, int w, int h) {
  86. int x, y;
  87. src -= src_stride * (SUBPEL_TAPS / 2 - 1);
  88. for (x = 0; x < w; ++x) {
  89. int y_q4 = y0_q4;
  90. for (y = 0; y < h; ++y) {
  91. const unsigned char *src_y = &src[(y_q4 >> SUBPEL_BITS) * src_stride];
  92. const int16_t *const y_filter = y_filters[y_q4 & SUBPEL_MASK];
  93. int k, sum = 0;
  94. for (k = 0; k < SUBPEL_TAPS; ++k)
  95. sum += src_y[k * src_stride] * y_filter[k];
  96. dst[y * dst_stride] = ROUND_POWER_OF_TWO(dst[y * dst_stride] +
  97. clip_pixel(ROUND_POWER_OF_TWO(sum, FILTER_BITS)), 1);
  98. y_q4 += y_step_q4;
  99. }
  100. ++src;
  101. ++dst;
  102. }
  103. }
  104. static void convolve(const uint8_t *src, ptrdiff_t src_stride,
  105. uint8_t *dst, ptrdiff_t dst_stride,
  106. const InterpKernel *const x_filters,
  107. int x0_q4, int x_step_q4,
  108. const InterpKernel *const y_filters,
  109. int y0_q4, int y_step_q4,
  110. int w, int h) {
  111. // Note: Fixed size intermediate buffer, temp, places limits on parameters.
  112. // 2d filtering proceeds in 2 steps:
  113. // (1) Interpolate horizontally into an intermediate buffer, temp.
  114. // (2) Interpolate temp vertically to derive the sub-pixel result.
  115. // Deriving the maximum number of rows in the temp buffer (135):
  116. // --Smallest scaling factor is x1/2 ==> y_step_q4 = 32 (Normative).
  117. // --Largest block size is 64x64 pixels.
  118. // --64 rows in the downscaled frame span a distance of (64 - 1) * 32 in the
  119. // original frame (in 1/16th pixel units).
  120. // --Must round-up because block may be located at sub-pixel position.
  121. // --Require an additional SUBPEL_TAPS rows for the 8-tap filter tails.
  122. // --((64 - 1) * 32 + 15) >> 4 + 8 = 135.
  123. uint8_t temp[135 * 64];
  124. int intermediate_height =
  125. (((h - 1) * y_step_q4 + y0_q4) >> SUBPEL_BITS) + SUBPEL_TAPS;
  126. assert(w <= 64);
  127. assert(h <= 64);
  128. assert(y_step_q4 <= 32);
  129. assert(x_step_q4 <= 32);
  130. convolve_horiz(src - src_stride * (SUBPEL_TAPS / 2 - 1), src_stride, temp, 64,
  131. x_filters, x0_q4, x_step_q4, w, intermediate_height);
  132. convolve_vert(temp + 64 * (SUBPEL_TAPS / 2 - 1), 64, dst, dst_stride,
  133. y_filters, y0_q4, y_step_q4, w, h);
  134. }
  135. static const InterpKernel *get_filter_base(const int16_t *filter) {
  136. // NOTE: This assumes that the filter table is 256-byte aligned.
  137. // TODO(agrange) Modify to make independent of table alignment.
  138. return (const InterpKernel *)(((intptr_t)filter) & ~((intptr_t)0xFF));
  139. }
  140. static int get_filter_offset(const int16_t *f, const InterpKernel *base) {
  141. return (int)((const InterpKernel *)(intptr_t)f - base);
  142. }
  143. void vp9_convolve8_horiz_c(const uint8_t *src, ptrdiff_t src_stride,
  144. uint8_t *dst, ptrdiff_t dst_stride,
  145. const int16_t *filter_x, int x_step_q4,
  146. const int16_t *filter_y, int y_step_q4,
  147. int w, int h) {
  148. const InterpKernel *const filters_x = get_filter_base(filter_x);
  149. const int x0_q4 = get_filter_offset(filter_x, filters_x);
  150. (void)filter_y;
  151. (void)y_step_q4;
  152. convolve_horiz(src, src_stride, dst, dst_stride, filters_x,
  153. x0_q4, x_step_q4, w, h);
  154. }
  155. void vp9_convolve8_avg_horiz_c(const uint8_t *src, ptrdiff_t src_stride,
  156. uint8_t *dst, ptrdiff_t dst_stride,
  157. const int16_t *filter_x, int x_step_q4,
  158. const int16_t *filter_y, int y_step_q4,
  159. int w, int h) {
  160. const InterpKernel *const filters_x = get_filter_base(filter_x);
  161. const int x0_q4 = get_filter_offset(filter_x, filters_x);
  162. (void)filter_y;
  163. (void)y_step_q4;
  164. convolve_avg_horiz(src, src_stride, dst, dst_stride, filters_x,
  165. x0_q4, x_step_q4, w, h);
  166. }
  167. void vp9_convolve8_vert_c(const uint8_t *src, ptrdiff_t src_stride,
  168. uint8_t *dst, ptrdiff_t dst_stride,
  169. const int16_t *filter_x, int x_step_q4,
  170. const int16_t *filter_y, int y_step_q4,
  171. int w, int h) {
  172. const InterpKernel *const filters_y = get_filter_base(filter_y);
  173. const int y0_q4 = get_filter_offset(filter_y, filters_y);
  174. (void)filter_x;
  175. (void)x_step_q4;
  176. convolve_vert(src, src_stride, dst, dst_stride, filters_y,
  177. y0_q4, y_step_q4, w, h);
  178. }
  179. void vp9_convolve8_avg_vert_c(const uint8_t *src, ptrdiff_t src_stride,
  180. uint8_t *dst, ptrdiff_t dst_stride,
  181. const int16_t *filter_x, int x_step_q4,
  182. const int16_t *filter_y, int y_step_q4,
  183. int w, int h) {
  184. const InterpKernel *const filters_y = get_filter_base(filter_y);
  185. const int y0_q4 = get_filter_offset(filter_y, filters_y);
  186. (void)filter_x;
  187. (void)x_step_q4;
  188. convolve_avg_vert(src, src_stride, dst, dst_stride, filters_y,
  189. y0_q4, y_step_q4, w, h);
  190. }
  191. void vp9_convolve8_c(const uint8_t *src, ptrdiff_t src_stride,
  192. uint8_t *dst, ptrdiff_t dst_stride,
  193. const int16_t *filter_x, int x_step_q4,
  194. const int16_t *filter_y, int y_step_q4,
  195. int w, int h) {
  196. const InterpKernel *const filters_x = get_filter_base(filter_x);
  197. const int x0_q4 = get_filter_offset(filter_x, filters_x);
  198. const InterpKernel *const filters_y = get_filter_base(filter_y);
  199. const int y0_q4 = get_filter_offset(filter_y, filters_y);
  200. convolve(src, src_stride, dst, dst_stride,
  201. filters_x, x0_q4, x_step_q4,
  202. filters_y, y0_q4, y_step_q4, w, h);
  203. }
  204. void vp9_convolve8_avg_c(const uint8_t *src, ptrdiff_t src_stride,
  205. uint8_t *dst, ptrdiff_t dst_stride,
  206. const int16_t *filter_x, int x_step_q4,
  207. const int16_t *filter_y, int y_step_q4,
  208. int w, int h) {
  209. /* Fixed size intermediate buffer places limits on parameters. */
  210. DECLARE_ALIGNED(16, uint8_t, temp[64 * 64]);
  211. assert(w <= 64);
  212. assert(h <= 64);
  213. vp9_convolve8_c(src, src_stride, temp, 64,
  214. filter_x, x_step_q4, filter_y, y_step_q4, w, h);
  215. vp9_convolve_avg_c(temp, 64, dst, dst_stride, NULL, 0, NULL, 0, w, h);
  216. }
  217. void vp9_convolve_copy_c(const uint8_t *src, ptrdiff_t src_stride,
  218. uint8_t *dst, ptrdiff_t dst_stride,
  219. const int16_t *filter_x, int filter_x_stride,
  220. const int16_t *filter_y, int filter_y_stride,
  221. int w, int h) {
  222. int r;
  223. (void)filter_x; (void)filter_x_stride;
  224. (void)filter_y; (void)filter_y_stride;
  225. for (r = h; r > 0; --r) {
  226. memcpy(dst, src, w);
  227. src += src_stride;
  228. dst += dst_stride;
  229. }
  230. }
  231. void vp9_convolve_avg_c(const uint8_t *src, ptrdiff_t src_stride,
  232. uint8_t *dst, ptrdiff_t dst_stride,
  233. const int16_t *filter_x, int filter_x_stride,
  234. const int16_t *filter_y, int filter_y_stride,
  235. int w, int h) {
  236. int x, y;
  237. (void)filter_x; (void)filter_x_stride;
  238. (void)filter_y; (void)filter_y_stride;
  239. for (y = 0; y < h; ++y) {
  240. for (x = 0; x < w; ++x)
  241. dst[x] = ROUND_POWER_OF_TWO(dst[x] + src[x], 1);
  242. src += src_stride;
  243. dst += dst_stride;
  244. }
  245. }
  246. #if CONFIG_VP9_HIGHBITDEPTH
  247. static void highbd_convolve_horiz(const uint8_t *src8, ptrdiff_t src_stride,
  248. uint8_t *dst8, ptrdiff_t dst_stride,
  249. const InterpKernel *x_filters,
  250. int x0_q4, int x_step_q4,
  251. int w, int h, int bd) {
  252. int x, y;
  253. uint16_t *src = CONVERT_TO_SHORTPTR(src8);
  254. uint16_t *dst = CONVERT_TO_SHORTPTR(dst8);
  255. src -= SUBPEL_TAPS / 2 - 1;
  256. for (y = 0; y < h; ++y) {
  257. int x_q4 = x0_q4;
  258. for (x = 0; x < w; ++x) {
  259. const uint16_t *const src_x = &src[x_q4 >> SUBPEL_BITS];
  260. const int16_t *const x_filter = x_filters[x_q4 & SUBPEL_MASK];
  261. int k, sum = 0;
  262. for (k = 0; k < SUBPEL_TAPS; ++k)
  263. sum += src_x[k] * x_filter[k];
  264. dst[x] = clip_pixel_highbd(ROUND_POWER_OF_TWO(sum, FILTER_BITS), bd);
  265. x_q4 += x_step_q4;
  266. }
  267. src += src_stride;
  268. dst += dst_stride;
  269. }
  270. }
  271. static void highbd_convolve_avg_horiz(const uint8_t *src8, ptrdiff_t src_stride,
  272. uint8_t *dst8, ptrdiff_t dst_stride,
  273. const InterpKernel *x_filters,
  274. int x0_q4, int x_step_q4,
  275. int w, int h, int bd) {
  276. int x, y;
  277. uint16_t *src = CONVERT_TO_SHORTPTR(src8);
  278. uint16_t *dst = CONVERT_TO_SHORTPTR(dst8);
  279. src -= SUBPEL_TAPS / 2 - 1;
  280. for (y = 0; y < h; ++y) {
  281. int x_q4 = x0_q4;
  282. for (x = 0; x < w; ++x) {
  283. const uint16_t *const src_x = &src[x_q4 >> SUBPEL_BITS];
  284. const int16_t *const x_filter = x_filters[x_q4 & SUBPEL_MASK];
  285. int k, sum = 0;
  286. for (k = 0; k < SUBPEL_TAPS; ++k)
  287. sum += src_x[k] * x_filter[k];
  288. dst[x] = ROUND_POWER_OF_TWO(dst[x] +
  289. clip_pixel_highbd(ROUND_POWER_OF_TWO(sum, FILTER_BITS), bd), 1);
  290. x_q4 += x_step_q4;
  291. }
  292. src += src_stride;
  293. dst += dst_stride;
  294. }
  295. }
  296. static void highbd_convolve_vert(const uint8_t *src8, ptrdiff_t src_stride,
  297. uint8_t *dst8, ptrdiff_t dst_stride,
  298. const InterpKernel *y_filters,
  299. int y0_q4, int y_step_q4, int w, int h,
  300. int bd) {
  301. int x, y;
  302. uint16_t *src = CONVERT_TO_SHORTPTR(src8);
  303. uint16_t *dst = CONVERT_TO_SHORTPTR(dst8);
  304. src -= src_stride * (SUBPEL_TAPS / 2 - 1);
  305. for (x = 0; x < w; ++x) {
  306. int y_q4 = y0_q4;
  307. for (y = 0; y < h; ++y) {
  308. const uint16_t *src_y = &src[(y_q4 >> SUBPEL_BITS) * src_stride];
  309. const int16_t *const y_filter = y_filters[y_q4 & SUBPEL_MASK];
  310. int k, sum = 0;
  311. for (k = 0; k < SUBPEL_TAPS; ++k)
  312. sum += src_y[k * src_stride] * y_filter[k];
  313. dst[y * dst_stride] = clip_pixel_highbd(
  314. ROUND_POWER_OF_TWO(sum, FILTER_BITS), bd);
  315. y_q4 += y_step_q4;
  316. }
  317. ++src;
  318. ++dst;
  319. }
  320. }
  321. static void highbd_convolve_avg_vert(const uint8_t *src8, ptrdiff_t src_stride,
  322. uint8_t *dst8, ptrdiff_t dst_stride,
  323. const InterpKernel *y_filters,
  324. int y0_q4, int y_step_q4, int w, int h,
  325. int bd) {
  326. int x, y;
  327. uint16_t *src = CONVERT_TO_SHORTPTR(src8);
  328. uint16_t *dst = CONVERT_TO_SHORTPTR(dst8);
  329. src -= src_stride * (SUBPEL_TAPS / 2 - 1);
  330. for (x = 0; x < w; ++x) {
  331. int y_q4 = y0_q4;
  332. for (y = 0; y < h; ++y) {
  333. const uint16_t *src_y = &src[(y_q4 >> SUBPEL_BITS) * src_stride];
  334. const int16_t *const y_filter = y_filters[y_q4 & SUBPEL_MASK];
  335. int k, sum = 0;
  336. for (k = 0; k < SUBPEL_TAPS; ++k)
  337. sum += src_y[k * src_stride] * y_filter[k];
  338. dst[y * dst_stride] = ROUND_POWER_OF_TWO(dst[y * dst_stride] +
  339. clip_pixel_highbd(ROUND_POWER_OF_TWO(sum, FILTER_BITS), bd), 1);
  340. y_q4 += y_step_q4;
  341. }
  342. ++src;
  343. ++dst;
  344. }
  345. }
  346. static void highbd_convolve(const uint8_t *src, ptrdiff_t src_stride,
  347. uint8_t *dst, ptrdiff_t dst_stride,
  348. const InterpKernel *const x_filters,
  349. int x0_q4, int x_step_q4,
  350. const InterpKernel *const y_filters,
  351. int y0_q4, int y_step_q4,
  352. int w, int h, int bd) {
  353. // Note: Fixed size intermediate buffer, temp, places limits on parameters.
  354. // 2d filtering proceeds in 2 steps:
  355. // (1) Interpolate horizontally into an intermediate buffer, temp.
  356. // (2) Interpolate temp vertically to derive the sub-pixel result.
  357. // Deriving the maximum number of rows in the temp buffer (135):
  358. // --Smallest scaling factor is x1/2 ==> y_step_q4 = 32 (Normative).
  359. // --Largest block size is 64x64 pixels.
  360. // --64 rows in the downscaled frame span a distance of (64 - 1) * 32 in the
  361. // original frame (in 1/16th pixel units).
  362. // --Must round-up because block may be located at sub-pixel position.
  363. // --Require an additional SUBPEL_TAPS rows for the 8-tap filter tails.
  364. // --((64 - 1) * 32 + 15) >> 4 + 8 = 135.
  365. uint16_t temp[64 * 135];
  366. int intermediate_height =
  367. (((h - 1) * y_step_q4 + y0_q4) >> SUBPEL_BITS) + SUBPEL_TAPS;
  368. assert(w <= 64);
  369. assert(h <= 64);
  370. assert(y_step_q4 <= 32);
  371. assert(x_step_q4 <= 32);
  372. highbd_convolve_horiz(src - src_stride * (SUBPEL_TAPS / 2 - 1),
  373. src_stride, CONVERT_TO_BYTEPTR(temp), 64,
  374. x_filters, x0_q4, x_step_q4, w,
  375. intermediate_height, bd);
  376. highbd_convolve_vert(CONVERT_TO_BYTEPTR(temp) + 64 * (SUBPEL_TAPS / 2 - 1),
  377. 64, dst, dst_stride, y_filters, y0_q4, y_step_q4,
  378. w, h, bd);
  379. }
  380. void vp9_highbd_convolve8_horiz_c(const uint8_t *src, ptrdiff_t src_stride,
  381. uint8_t *dst, ptrdiff_t dst_stride,
  382. const int16_t *filter_x, int x_step_q4,
  383. const int16_t *filter_y, int y_step_q4,
  384. int w, int h, int bd) {
  385. const InterpKernel *const filters_x = get_filter_base(filter_x);
  386. const int x0_q4 = get_filter_offset(filter_x, filters_x);
  387. (void)filter_y;
  388. (void)y_step_q4;
  389. highbd_convolve_horiz(src, src_stride, dst, dst_stride, filters_x,
  390. x0_q4, x_step_q4, w, h, bd);
  391. }
  392. void vp9_highbd_convolve8_avg_horiz_c(const uint8_t *src, ptrdiff_t src_stride,
  393. uint8_t *dst, ptrdiff_t dst_stride,
  394. const int16_t *filter_x, int x_step_q4,
  395. const int16_t *filter_y, int y_step_q4,
  396. int w, int h, int bd) {
  397. const InterpKernel *const filters_x = get_filter_base(filter_x);
  398. const int x0_q4 = get_filter_offset(filter_x, filters_x);
  399. (void)filter_y;
  400. (void)y_step_q4;
  401. highbd_convolve_avg_horiz(src, src_stride, dst, dst_stride, filters_x,
  402. x0_q4, x_step_q4, w, h, bd);
  403. }
  404. void vp9_highbd_convolve8_vert_c(const uint8_t *src, ptrdiff_t src_stride,
  405. uint8_t *dst, ptrdiff_t dst_stride,
  406. const int16_t *filter_x, int x_step_q4,
  407. const int16_t *filter_y, int y_step_q4,
  408. int w, int h, int bd) {
  409. const InterpKernel *const filters_y = get_filter_base(filter_y);
  410. const int y0_q4 = get_filter_offset(filter_y, filters_y);
  411. (void)filter_x;
  412. (void)x_step_q4;
  413. highbd_convolve_vert(src, src_stride, dst, dst_stride, filters_y,
  414. y0_q4, y_step_q4, w, h, bd);
  415. }
  416. void vp9_highbd_convolve8_avg_vert_c(const uint8_t *src, ptrdiff_t src_stride,
  417. uint8_t *dst, ptrdiff_t dst_stride,
  418. const int16_t *filter_x, int x_step_q4,
  419. const int16_t *filter_y, int y_step_q4,
  420. int w, int h, int bd) {
  421. const InterpKernel *const filters_y = get_filter_base(filter_y);
  422. const int y0_q4 = get_filter_offset(filter_y, filters_y);
  423. (void)filter_x;
  424. (void)x_step_q4;
  425. highbd_convolve_avg_vert(src, src_stride, dst, dst_stride, filters_y,
  426. y0_q4, y_step_q4, w, h, bd);
  427. }
  428. void vp9_highbd_convolve8_c(const uint8_t *src, ptrdiff_t src_stride,
  429. uint8_t *dst, ptrdiff_t dst_stride,
  430. const int16_t *filter_x, int x_step_q4,
  431. const int16_t *filter_y, int y_step_q4,
  432. int w, int h, int bd) {
  433. const InterpKernel *const filters_x = get_filter_base(filter_x);
  434. const int x0_q4 = get_filter_offset(filter_x, filters_x);
  435. const InterpKernel *const filters_y = get_filter_base(filter_y);
  436. const int y0_q4 = get_filter_offset(filter_y, filters_y);
  437. highbd_convolve(src, src_stride, dst, dst_stride,
  438. filters_x, x0_q4, x_step_q4,
  439. filters_y, y0_q4, y_step_q4, w, h, bd);
  440. }
  441. void vp9_highbd_convolve8_avg_c(const uint8_t *src, ptrdiff_t src_stride,
  442. uint8_t *dst, ptrdiff_t dst_stride,
  443. const int16_t *filter_x, int x_step_q4,
  444. const int16_t *filter_y, int y_step_q4,
  445. int w, int h, int bd) {
  446. // Fixed size intermediate buffer places limits on parameters.
  447. DECLARE_ALIGNED(16, uint16_t, temp[64 * 64]);
  448. assert(w <= 64);
  449. assert(h <= 64);
  450. vp9_highbd_convolve8_c(src, src_stride, CONVERT_TO_BYTEPTR(temp), 64,
  451. filter_x, x_step_q4, filter_y, y_step_q4, w, h, bd);
  452. vp9_highbd_convolve_avg_c(CONVERT_TO_BYTEPTR(temp), 64, dst, dst_stride,
  453. NULL, 0, NULL, 0, w, h, bd);
  454. }
  455. void vp9_highbd_convolve_copy_c(const uint8_t *src8, ptrdiff_t src_stride,
  456. uint8_t *dst8, ptrdiff_t dst_stride,
  457. const int16_t *filter_x, int filter_x_stride,
  458. const int16_t *filter_y, int filter_y_stride,
  459. int w, int h, int bd) {
  460. int r;
  461. uint16_t *src = CONVERT_TO_SHORTPTR(src8);
  462. uint16_t *dst = CONVERT_TO_SHORTPTR(dst8);
  463. (void)filter_x;
  464. (void)filter_y;
  465. (void)filter_x_stride;
  466. (void)filter_y_stride;
  467. (void)bd;
  468. for (r = h; r > 0; --r) {
  469. memcpy(dst, src, w * sizeof(uint16_t));
  470. src += src_stride;
  471. dst += dst_stride;
  472. }
  473. }
  474. void vp9_highbd_convolve_avg_c(const uint8_t *src8, ptrdiff_t src_stride,
  475. uint8_t *dst8, ptrdiff_t dst_stride,
  476. const int16_t *filter_x, int filter_x_stride,
  477. const int16_t *filter_y, int filter_y_stride,
  478. int w, int h, int bd) {
  479. int x, y;
  480. uint16_t *src = CONVERT_TO_SHORTPTR(src8);
  481. uint16_t *dst = CONVERT_TO_SHORTPTR(dst8);
  482. (void)filter_x;
  483. (void)filter_y;
  484. (void)filter_x_stride;
  485. (void)filter_y_stride;
  486. (void)bd;
  487. for (y = 0; y < h; ++y) {
  488. for (x = 0; x < w; ++x) {
  489. dst[x] = ROUND_POWER_OF_TWO(dst[x] + src[x], 1);
  490. }
  491. src += src_stride;
  492. dst += dst_stride;
  493. }
  494. }
  495. #endif