vp9_fastssim.c 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466
  1. /*
  2. * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
  3. *
  4. * Use of this source code is governed by a BSD-style license
  5. * that can be found in the LICENSE file in the root of the source
  6. * tree. An additional intellectual property rights grant can be found
  7. * in the file PATENTS. All contributing project authors may
  8. * be found in the AUTHORS file in the root of the source tree.
  9. *
  10. * This code was originally written by: Nathan E. Egge, at the Daala
  11. * project.
  12. */
  13. #include <math.h>
  14. #include <string.h>
  15. #include "./vpx_config.h"
  16. #include "./vp9_rtcd.h"
  17. #include "vp9/encoder/vp9_ssim.h"
  18. /* TODO(jbb): High bit depth version of this code needed */
  19. typedef struct fs_level fs_level;
  20. typedef struct fs_ctx fs_ctx;
  21. #define SSIM_C1 (255 * 255 * 0.01 * 0.01)
  22. #define SSIM_C2 (255 * 255 * 0.03 * 0.03)
  23. #define FS_MINI(_a, _b) ((_a) < (_b) ? (_a) : (_b))
  24. #define FS_MAXI(_a, _b) ((_a) > (_b) ? (_a) : (_b))
  25. struct fs_level {
  26. uint16_t *im1;
  27. uint16_t *im2;
  28. double *ssim;
  29. int w;
  30. int h;
  31. };
  32. struct fs_ctx {
  33. fs_level *level;
  34. int nlevels;
  35. unsigned *col_buf;
  36. };
  37. static void fs_ctx_init(fs_ctx *_ctx, int _w, int _h, int _nlevels) {
  38. unsigned char *data;
  39. size_t data_size;
  40. int lw;
  41. int lh;
  42. int l;
  43. lw = (_w + 1) >> 1;
  44. lh = (_h + 1) >> 1;
  45. data_size = _nlevels * sizeof(fs_level)
  46. + 2 * (lw + 8) * 8 * sizeof(*_ctx->col_buf);
  47. for (l = 0; l < _nlevels; l++) {
  48. size_t im_size;
  49. size_t level_size;
  50. im_size = lw * (size_t) lh;
  51. level_size = 2 * im_size * sizeof(*_ctx->level[l].im1);
  52. level_size += sizeof(*_ctx->level[l].ssim) - 1;
  53. level_size /= sizeof(*_ctx->level[l].ssim);
  54. level_size += im_size;
  55. level_size *= sizeof(*_ctx->level[l].ssim);
  56. data_size += level_size;
  57. lw = (lw + 1) >> 1;
  58. lh = (lh + 1) >> 1;
  59. }
  60. data = (unsigned char *) malloc(data_size);
  61. _ctx->level = (fs_level *) data;
  62. _ctx->nlevels = _nlevels;
  63. data += _nlevels * sizeof(*_ctx->level);
  64. lw = (_w + 1) >> 1;
  65. lh = (_h + 1) >> 1;
  66. for (l = 0; l < _nlevels; l++) {
  67. size_t im_size;
  68. size_t level_size;
  69. _ctx->level[l].w = lw;
  70. _ctx->level[l].h = lh;
  71. im_size = lw * (size_t) lh;
  72. level_size = 2 * im_size * sizeof(*_ctx->level[l].im1);
  73. level_size += sizeof(*_ctx->level[l].ssim) - 1;
  74. level_size /= sizeof(*_ctx->level[l].ssim);
  75. level_size *= sizeof(*_ctx->level[l].ssim);
  76. _ctx->level[l].im1 = (uint16_t *) data;
  77. _ctx->level[l].im2 = _ctx->level[l].im1 + im_size;
  78. data += level_size;
  79. _ctx->level[l].ssim = (double *) data;
  80. data += im_size * sizeof(*_ctx->level[l].ssim);
  81. lw = (lw + 1) >> 1;
  82. lh = (lh + 1) >> 1;
  83. }
  84. _ctx->col_buf = (unsigned *) data;
  85. }
  86. static void fs_ctx_clear(fs_ctx *_ctx) {
  87. free(_ctx->level);
  88. }
  89. static void fs_downsample_level(fs_ctx *_ctx, int _l) {
  90. const uint16_t *src1;
  91. const uint16_t *src2;
  92. uint16_t *dst1;
  93. uint16_t *dst2;
  94. int w2;
  95. int h2;
  96. int w;
  97. int h;
  98. int i;
  99. int j;
  100. w = _ctx->level[_l].w;
  101. h = _ctx->level[_l].h;
  102. dst1 = _ctx->level[_l].im1;
  103. dst2 = _ctx->level[_l].im2;
  104. w2 = _ctx->level[_l - 1].w;
  105. h2 = _ctx->level[_l - 1].h;
  106. src1 = _ctx->level[_l - 1].im1;
  107. src2 = _ctx->level[_l - 1].im2;
  108. for (j = 0; j < h; j++) {
  109. int j0offs;
  110. int j1offs;
  111. j0offs = 2 * j * w2;
  112. j1offs = FS_MINI(2 * j + 1, h2) * w2;
  113. for (i = 0; i < w; i++) {
  114. int i0;
  115. int i1;
  116. i0 = 2 * i;
  117. i1 = FS_MINI(i0 + 1, w2);
  118. dst1[j * w + i] = src1[j0offs + i0] + src1[j0offs + i1]
  119. + src1[j1offs + i0] + src1[j1offs + i1];
  120. dst2[j * w + i] = src2[j0offs + i0] + src2[j0offs + i1]
  121. + src2[j1offs + i0] + src2[j1offs + i1];
  122. }
  123. }
  124. }
  125. static void fs_downsample_level0(fs_ctx *_ctx, const unsigned char *_src1,
  126. int _s1ystride, const unsigned char *_src2,
  127. int _s2ystride, int _w, int _h) {
  128. uint16_t *dst1;
  129. uint16_t *dst2;
  130. int w;
  131. int h;
  132. int i;
  133. int j;
  134. w = _ctx->level[0].w;
  135. h = _ctx->level[0].h;
  136. dst1 = _ctx->level[0].im1;
  137. dst2 = _ctx->level[0].im2;
  138. for (j = 0; j < h; j++) {
  139. int j0;
  140. int j1;
  141. j0 = 2 * j;
  142. j1 = FS_MINI(j0 + 1, _h);
  143. for (i = 0; i < w; i++) {
  144. int i0;
  145. int i1;
  146. i0 = 2 * i;
  147. i1 = FS_MINI(i0 + 1, _w);
  148. dst1[j * w + i] = _src1[j0 * _s1ystride + i0]
  149. + _src1[j0 * _s1ystride + i1] + _src1[j1 * _s1ystride + i0]
  150. + _src1[j1 * _s1ystride + i1];
  151. dst2[j * w + i] = _src2[j0 * _s2ystride + i0]
  152. + _src2[j0 * _s2ystride + i1] + _src2[j1 * _s2ystride + i0]
  153. + _src2[j1 * _s2ystride + i1];
  154. }
  155. }
  156. }
  157. static void fs_apply_luminance(fs_ctx *_ctx, int _l) {
  158. unsigned *col_sums_x;
  159. unsigned *col_sums_y;
  160. uint16_t *im1;
  161. uint16_t *im2;
  162. double *ssim;
  163. double c1;
  164. int w;
  165. int h;
  166. int j0offs;
  167. int j1offs;
  168. int i;
  169. int j;
  170. w = _ctx->level[_l].w;
  171. h = _ctx->level[_l].h;
  172. col_sums_x = _ctx->col_buf;
  173. col_sums_y = col_sums_x + w;
  174. im1 = _ctx->level[_l].im1;
  175. im2 = _ctx->level[_l].im2;
  176. for (i = 0; i < w; i++)
  177. col_sums_x[i] = 5 * im1[i];
  178. for (i = 0; i < w; i++)
  179. col_sums_y[i] = 5 * im2[i];
  180. for (j = 1; j < 4; j++) {
  181. j1offs = FS_MINI(j, h - 1) * w;
  182. for (i = 0; i < w; i++)
  183. col_sums_x[i] += im1[j1offs + i];
  184. for (i = 0; i < w; i++)
  185. col_sums_y[i] += im2[j1offs + i];
  186. }
  187. ssim = _ctx->level[_l].ssim;
  188. c1 = (double) (SSIM_C1 * 4096 * (1 << 4 * _l));
  189. for (j = 0; j < h; j++) {
  190. unsigned mux;
  191. unsigned muy;
  192. int i0;
  193. int i1;
  194. mux = 5 * col_sums_x[0];
  195. muy = 5 * col_sums_y[0];
  196. for (i = 1; i < 4; i++) {
  197. i1 = FS_MINI(i, w - 1);
  198. mux += col_sums_x[i1];
  199. muy += col_sums_y[i1];
  200. }
  201. for (i = 0; i < w; i++) {
  202. ssim[j * w + i] *= (2 * mux * (double) muy + c1)
  203. / (mux * (double) mux + muy * (double) muy + c1);
  204. if (i + 1 < w) {
  205. i0 = FS_MAXI(0, i - 4);
  206. i1 = FS_MINI(i + 4, w - 1);
  207. mux += col_sums_x[i1] - col_sums_x[i0];
  208. muy += col_sums_x[i1] - col_sums_x[i0];
  209. }
  210. }
  211. if (j + 1 < h) {
  212. j0offs = FS_MAXI(0, j - 4) * w;
  213. for (i = 0; i < w; i++)
  214. col_sums_x[i] -= im1[j0offs + i];
  215. for (i = 0; i < w; i++)
  216. col_sums_y[i] -= im2[j0offs + i];
  217. j1offs = FS_MINI(j + 4, h - 1) * w;
  218. for (i = 0; i < w; i++)
  219. col_sums_x[i] += im1[j1offs + i];
  220. for (i = 0; i < w; i++)
  221. col_sums_y[i] += im2[j1offs + i];
  222. }
  223. }
  224. }
  225. #define FS_COL_SET(_col, _joffs, _ioffs) \
  226. do { \
  227. unsigned gx; \
  228. unsigned gy; \
  229. gx = gx_buf[((j + (_joffs)) & 7) * stride + i + (_ioffs)]; \
  230. gy = gy_buf[((j + (_joffs)) & 7) * stride + i + (_ioffs)]; \
  231. col_sums_gx2[(_col)] = gx * (double)gx; \
  232. col_sums_gy2[(_col)] = gy * (double)gy; \
  233. col_sums_gxgy[(_col)] = gx * (double)gy; \
  234. } \
  235. while (0)
  236. #define FS_COL_ADD(_col, _joffs, _ioffs) \
  237. do { \
  238. unsigned gx; \
  239. unsigned gy; \
  240. gx = gx_buf[((j + (_joffs)) & 7) * stride + i + (_ioffs)]; \
  241. gy = gy_buf[((j + (_joffs)) & 7) * stride + i + (_ioffs)]; \
  242. col_sums_gx2[(_col)] += gx * (double)gx; \
  243. col_sums_gy2[(_col)] += gy * (double)gy; \
  244. col_sums_gxgy[(_col)] += gx * (double)gy; \
  245. } \
  246. while (0)
  247. #define FS_COL_SUB(_col, _joffs, _ioffs) \
  248. do { \
  249. unsigned gx; \
  250. unsigned gy; \
  251. gx = gx_buf[((j + (_joffs)) & 7) * stride + i + (_ioffs)]; \
  252. gy = gy_buf[((j + (_joffs)) & 7) * stride + i + (_ioffs)]; \
  253. col_sums_gx2[(_col)] -= gx * (double)gx; \
  254. col_sums_gy2[(_col)] -= gy * (double)gy; \
  255. col_sums_gxgy[(_col)] -= gx * (double)gy; \
  256. } \
  257. while (0)
  258. #define FS_COL_COPY(_col1, _col2) \
  259. do { \
  260. col_sums_gx2[(_col1)] = col_sums_gx2[(_col2)]; \
  261. col_sums_gy2[(_col1)] = col_sums_gy2[(_col2)]; \
  262. col_sums_gxgy[(_col1)] = col_sums_gxgy[(_col2)]; \
  263. } \
  264. while (0)
  265. #define FS_COL_HALVE(_col1, _col2) \
  266. do { \
  267. col_sums_gx2[(_col1)] = col_sums_gx2[(_col2)] * 0.5; \
  268. col_sums_gy2[(_col1)] = col_sums_gy2[(_col2)] * 0.5; \
  269. col_sums_gxgy[(_col1)] = col_sums_gxgy[(_col2)] * 0.5; \
  270. } \
  271. while (0)
  272. #define FS_COL_DOUBLE(_col1, _col2) \
  273. do { \
  274. col_sums_gx2[(_col1)] = col_sums_gx2[(_col2)] * 2; \
  275. col_sums_gy2[(_col1)] = col_sums_gy2[(_col2)] * 2; \
  276. col_sums_gxgy[(_col1)] = col_sums_gxgy[(_col2)] * 2; \
  277. } \
  278. while (0)
  279. static void fs_calc_structure(fs_ctx *_ctx, int _l) {
  280. uint16_t *im1;
  281. uint16_t *im2;
  282. unsigned *gx_buf;
  283. unsigned *gy_buf;
  284. double *ssim;
  285. double col_sums_gx2[8];
  286. double col_sums_gy2[8];
  287. double col_sums_gxgy[8];
  288. double c2;
  289. int stride;
  290. int w;
  291. int h;
  292. int i;
  293. int j;
  294. w = _ctx->level[_l].w;
  295. h = _ctx->level[_l].h;
  296. im1 = _ctx->level[_l].im1;
  297. im2 = _ctx->level[_l].im2;
  298. ssim = _ctx->level[_l].ssim;
  299. gx_buf = _ctx->col_buf;
  300. stride = w + 8;
  301. gy_buf = gx_buf + 8 * stride;
  302. memset(gx_buf, 0, 2 * 8 * stride * sizeof(*gx_buf));
  303. c2 = SSIM_C2 * (1 << 4 * _l) * 16 * 104;
  304. for (j = 0; j < h + 4; j++) {
  305. if (j < h - 1) {
  306. for (i = 0; i < w - 1; i++) {
  307. unsigned g1;
  308. unsigned g2;
  309. unsigned gx;
  310. unsigned gy;
  311. g1 = abs(im1[(j + 1) * w + i + 1] - im1[j * w + i]);
  312. g2 = abs(im1[(j + 1) * w + i] - im1[j * w + i + 1]);
  313. gx = 4 * FS_MAXI(g1, g2) + FS_MINI(g1, g2);
  314. g1 = abs(im2[(j + 1) * w + i + 1] - im2[j * w + i]);
  315. g2 = abs(im2[(j + 1) * w + i] - im2[j * w + i + 1]);
  316. gy = 4 * FS_MAXI(g1, g2) + FS_MINI(g1, g2);
  317. gx_buf[(j & 7) * stride + i + 4] = gx;
  318. gy_buf[(j & 7) * stride + i + 4] = gy;
  319. }
  320. } else {
  321. memset(gx_buf + (j & 7) * stride, 0, stride * sizeof(*gx_buf));
  322. memset(gy_buf + (j & 7) * stride, 0, stride * sizeof(*gy_buf));
  323. }
  324. if (j >= 4) {
  325. int k;
  326. col_sums_gx2[3] = col_sums_gx2[2] = col_sums_gx2[1] = col_sums_gx2[0] = 0;
  327. col_sums_gy2[3] = col_sums_gy2[2] = col_sums_gy2[1] = col_sums_gy2[0] = 0;
  328. col_sums_gxgy[3] = col_sums_gxgy[2] = col_sums_gxgy[1] =
  329. col_sums_gxgy[0] = 0;
  330. for (i = 4; i < 8; i++) {
  331. FS_COL_SET(i, -1, 0);
  332. FS_COL_ADD(i, 0, 0);
  333. for (k = 1; k < 8 - i; k++) {
  334. FS_COL_DOUBLE(i, i);
  335. FS_COL_ADD(i, -k - 1, 0);
  336. FS_COL_ADD(i, k, 0);
  337. }
  338. }
  339. for (i = 0; i < w; i++) {
  340. double mugx2;
  341. double mugy2;
  342. double mugxgy;
  343. mugx2 = col_sums_gx2[0];
  344. for (k = 1; k < 8; k++)
  345. mugx2 += col_sums_gx2[k];
  346. mugy2 = col_sums_gy2[0];
  347. for (k = 1; k < 8; k++)
  348. mugy2 += col_sums_gy2[k];
  349. mugxgy = col_sums_gxgy[0];
  350. for (k = 1; k < 8; k++)
  351. mugxgy += col_sums_gxgy[k];
  352. ssim[(j - 4) * w + i] = (2 * mugxgy + c2) / (mugx2 + mugy2 + c2);
  353. if (i + 1 < w) {
  354. FS_COL_SET(0, -1, 1);
  355. FS_COL_ADD(0, 0, 1);
  356. FS_COL_SUB(2, -3, 2);
  357. FS_COL_SUB(2, 2, 2);
  358. FS_COL_HALVE(1, 2);
  359. FS_COL_SUB(3, -4, 3);
  360. FS_COL_SUB(3, 3, 3);
  361. FS_COL_HALVE(2, 3);
  362. FS_COL_COPY(3, 4);
  363. FS_COL_DOUBLE(4, 5);
  364. FS_COL_ADD(4, -4, 5);
  365. FS_COL_ADD(4, 3, 5);
  366. FS_COL_DOUBLE(5, 6);
  367. FS_COL_ADD(5, -3, 6);
  368. FS_COL_ADD(5, 2, 6);
  369. FS_COL_DOUBLE(6, 7);
  370. FS_COL_ADD(6, -2, 7);
  371. FS_COL_ADD(6, 1, 7);
  372. FS_COL_SET(7, -1, 8);
  373. FS_COL_ADD(7, 0, 8);
  374. }
  375. }
  376. }
  377. }
  378. }
  379. #define FS_NLEVELS (4)
  380. /*These weights were derived from the default weights found in Wang's original
  381. Matlab implementation: {0.0448, 0.2856, 0.2363, 0.1333}.
  382. We drop the finest scale and renormalize the rest to sum to 1.*/
  383. static const double FS_WEIGHTS[FS_NLEVELS] = {0.2989654541015625,
  384. 0.3141326904296875, 0.2473602294921875, 0.1395416259765625};
  385. static double fs_average(fs_ctx *_ctx, int _l) {
  386. double *ssim;
  387. double ret;
  388. int w;
  389. int h;
  390. int i;
  391. int j;
  392. w = _ctx->level[_l].w;
  393. h = _ctx->level[_l].h;
  394. ssim = _ctx->level[_l].ssim;
  395. ret = 0;
  396. for (j = 0; j < h; j++)
  397. for (i = 0; i < w; i++)
  398. ret += ssim[j * w + i];
  399. return pow(ret / (w * h), FS_WEIGHTS[_l]);
  400. }
  401. static double calc_ssim(const unsigned char *_src, int _systride,
  402. const unsigned char *_dst, int _dystride, int _w, int _h) {
  403. fs_ctx ctx;
  404. double ret;
  405. int l;
  406. ret = 1;
  407. fs_ctx_init(&ctx, _w, _h, FS_NLEVELS);
  408. fs_downsample_level0(&ctx, _src, _systride, _dst, _dystride, _w, _h);
  409. for (l = 0; l < FS_NLEVELS - 1; l++) {
  410. fs_calc_structure(&ctx, l);
  411. ret *= fs_average(&ctx, l);
  412. fs_downsample_level(&ctx, l + 1);
  413. }
  414. fs_calc_structure(&ctx, l);
  415. fs_apply_luminance(&ctx, l);
  416. ret *= fs_average(&ctx, l);
  417. fs_ctx_clear(&ctx);
  418. return ret;
  419. }
  420. static double convert_ssim_db(double _ssim, double _weight) {
  421. return 10 * (log10(_weight) - log10(_weight - _ssim));
  422. }
  423. double vp9_calc_fastssim(YV12_BUFFER_CONFIG *source, YV12_BUFFER_CONFIG *dest,
  424. double *ssim_y, double *ssim_u, double *ssim_v) {
  425. double ssimv;
  426. vp9_clear_system_state();
  427. *ssim_y = calc_ssim(source->y_buffer, source->y_stride, dest->y_buffer,
  428. dest->y_stride, source->y_crop_width,
  429. source->y_crop_height);
  430. *ssim_u = calc_ssim(source->u_buffer, source->uv_stride, dest->u_buffer,
  431. dest->uv_stride, source->uv_crop_width,
  432. source->uv_crop_height);
  433. *ssim_v = calc_ssim(source->v_buffer, source->uv_stride, dest->v_buffer,
  434. dest->uv_stride, source->uv_crop_width,
  435. source->uv_crop_height);
  436. ssimv = (*ssim_y) * .8 + .1 * ((*ssim_u) + (*ssim_v));
  437. return convert_ssim_db(ssimv, 1.0);
  438. }