scale_argb.cc 29 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860
  1. /*
  2. * Copyright 2011 The LibYuv Project Authors. All rights reserved.
  3. *
  4. * Use of this source code is governed by a BSD-style license
  5. * that can be found in the LICENSE file in the root of the source
  6. * tree. An additional intellectual property rights grant can be found
  7. * in the file PATENTS. All contributing project authors may
  8. * be found in the AUTHORS file in the root of the source tree.
  9. */
  10. #include "libyuv/scale.h"
  11. #include <assert.h>
  12. #include <string.h>
  13. #include "libyuv/cpu_id.h"
  14. #include "libyuv/planar_functions.h" // For CopyARGB
  15. #include "libyuv/row.h"
  16. #include "libyuv/scale_row.h"
  17. #ifdef __cplusplus
  18. namespace libyuv {
  19. extern "C" {
  20. #endif
  21. static __inline int Abs(int v) {
  22. return v >= 0 ? v : -v;
  23. }
  24. // ScaleARGB ARGB, 1/2
  25. // This is an optimized version for scaling down a ARGB to 1/2 of
  26. // its original size.
  27. static void ScaleARGBDown2(int src_width, int src_height,
  28. int dst_width, int dst_height,
  29. int src_stride, int dst_stride,
  30. const uint8* src_argb, uint8* dst_argb,
  31. int x, int dx, int y, int dy,
  32. enum FilterMode filtering) {
  33. int j;
  34. int row_stride = src_stride * (dy >> 16);
  35. void (*ScaleARGBRowDown2)(const uint8* src_argb, ptrdiff_t src_stride,
  36. uint8* dst_argb, int dst_width) =
  37. filtering == kFilterNone ? ScaleARGBRowDown2_C :
  38. (filtering == kFilterLinear ? ScaleARGBRowDown2Linear_C :
  39. ScaleARGBRowDown2Box_C);
  40. assert(dx == 65536 * 2); // Test scale factor of 2.
  41. assert((dy & 0x1ffff) == 0); // Test vertical scale is multiple of 2.
  42. // Advance to odd row, even column.
  43. if (filtering == kFilterBilinear) {
  44. src_argb += (y >> 16) * src_stride + (x >> 16) * 4;
  45. } else {
  46. src_argb += (y >> 16) * src_stride + ((x >> 16) - 1) * 4;
  47. }
  48. #if defined(HAS_SCALEARGBROWDOWN2_SSE2)
  49. if (TestCpuFlag(kCpuHasSSE2)) {
  50. ScaleARGBRowDown2 = filtering == kFilterNone ? ScaleARGBRowDown2_Any_SSE2 :
  51. (filtering == kFilterLinear ? ScaleARGBRowDown2Linear_Any_SSE2 :
  52. ScaleARGBRowDown2Box_Any_SSE2);
  53. if (IS_ALIGNED(dst_width, 4)) {
  54. ScaleARGBRowDown2 = filtering == kFilterNone ? ScaleARGBRowDown2_SSE2 :
  55. (filtering == kFilterLinear ? ScaleARGBRowDown2Linear_SSE2 :
  56. ScaleARGBRowDown2Box_SSE2);
  57. }
  58. }
  59. #endif
  60. #if defined(HAS_SCALEARGBROWDOWN2_NEON)
  61. if (TestCpuFlag(kCpuHasNEON)) {
  62. ScaleARGBRowDown2 = filtering == kFilterNone ? ScaleARGBRowDown2_Any_NEON :
  63. (filtering == kFilterLinear ? ScaleARGBRowDown2Linear_Any_NEON :
  64. ScaleARGBRowDown2Box_Any_NEON);
  65. if (IS_ALIGNED(dst_width, 8)) {
  66. ScaleARGBRowDown2 = filtering == kFilterNone ? ScaleARGBRowDown2_NEON :
  67. (filtering == kFilterLinear ? ScaleARGBRowDown2Linear_NEON :
  68. ScaleARGBRowDown2Box_NEON);
  69. }
  70. }
  71. #endif
  72. if (filtering == kFilterLinear) {
  73. src_stride = 0;
  74. }
  75. for (j = 0; j < dst_height; ++j) {
  76. ScaleARGBRowDown2(src_argb, src_stride, dst_argb, dst_width);
  77. src_argb += row_stride;
  78. dst_argb += dst_stride;
  79. }
  80. }
  81. // ScaleARGB ARGB, 1/4
  82. // This is an optimized version for scaling down a ARGB to 1/4 of
  83. // its original size.
  84. static void ScaleARGBDown4Box(int src_width, int src_height,
  85. int dst_width, int dst_height,
  86. int src_stride, int dst_stride,
  87. const uint8* src_argb, uint8* dst_argb,
  88. int x, int dx, int y, int dy) {
  89. int j;
  90. // Allocate 2 rows of ARGB.
  91. const int kRowSize = (dst_width * 2 * 4 + 31) & ~31;
  92. align_buffer_64(row, kRowSize * 2);
  93. int row_stride = src_stride * (dy >> 16);
  94. void (*ScaleARGBRowDown2)(const uint8* src_argb, ptrdiff_t src_stride,
  95. uint8* dst_argb, int dst_width) = ScaleARGBRowDown2Box_C;
  96. // Advance to odd row, even column.
  97. src_argb += (y >> 16) * src_stride + (x >> 16) * 4;
  98. assert(dx == 65536 * 4); // Test scale factor of 4.
  99. assert((dy & 0x3ffff) == 0); // Test vertical scale is multiple of 4.
  100. #if defined(HAS_SCALEARGBROWDOWN2_SSE2)
  101. if (TestCpuFlag(kCpuHasSSE2)) {
  102. ScaleARGBRowDown2 = ScaleARGBRowDown2Box_Any_SSE2;
  103. if (IS_ALIGNED(dst_width, 4)) {
  104. ScaleARGBRowDown2 = ScaleARGBRowDown2Box_SSE2;
  105. }
  106. }
  107. #endif
  108. #if defined(HAS_SCALEARGBROWDOWN2_NEON)
  109. if (TestCpuFlag(kCpuHasNEON)) {
  110. ScaleARGBRowDown2 = ScaleARGBRowDown2Box_Any_NEON;
  111. if (IS_ALIGNED(dst_width, 8)) {
  112. ScaleARGBRowDown2 = ScaleARGBRowDown2Box_NEON;
  113. }
  114. }
  115. #endif
  116. for (j = 0; j < dst_height; ++j) {
  117. ScaleARGBRowDown2(src_argb, src_stride, row, dst_width * 2);
  118. ScaleARGBRowDown2(src_argb + src_stride * 2, src_stride,
  119. row + kRowSize, dst_width * 2);
  120. ScaleARGBRowDown2(row, kRowSize, dst_argb, dst_width);
  121. src_argb += row_stride;
  122. dst_argb += dst_stride;
  123. }
  124. free_aligned_buffer_64(row);
  125. }
  126. // ScaleARGB ARGB Even
  127. // This is an optimized version for scaling down a ARGB to even
  128. // multiple of its original size.
  129. static void ScaleARGBDownEven(int src_width, int src_height,
  130. int dst_width, int dst_height,
  131. int src_stride, int dst_stride,
  132. const uint8* src_argb, uint8* dst_argb,
  133. int x, int dx, int y, int dy,
  134. enum FilterMode filtering) {
  135. int j;
  136. int col_step = dx >> 16;
  137. int row_stride = (dy >> 16) * src_stride;
  138. void (*ScaleARGBRowDownEven)(const uint8* src_argb, ptrdiff_t src_stride,
  139. int src_step, uint8* dst_argb, int dst_width) =
  140. filtering ? ScaleARGBRowDownEvenBox_C : ScaleARGBRowDownEven_C;
  141. assert(IS_ALIGNED(src_width, 2));
  142. assert(IS_ALIGNED(src_height, 2));
  143. src_argb += (y >> 16) * src_stride + (x >> 16) * 4;
  144. #if defined(HAS_SCALEARGBROWDOWNEVEN_SSE2)
  145. if (TestCpuFlag(kCpuHasSSE2)) {
  146. ScaleARGBRowDownEven = filtering ? ScaleARGBRowDownEvenBox_Any_SSE2 :
  147. ScaleARGBRowDownEven_Any_SSE2;
  148. if (IS_ALIGNED(dst_width, 4)) {
  149. ScaleARGBRowDownEven = filtering ? ScaleARGBRowDownEvenBox_SSE2 :
  150. ScaleARGBRowDownEven_SSE2;
  151. }
  152. }
  153. #endif
  154. #if defined(HAS_SCALEARGBROWDOWNEVEN_NEON)
  155. if (TestCpuFlag(kCpuHasNEON)) {
  156. ScaleARGBRowDownEven = filtering ? ScaleARGBRowDownEvenBox_Any_NEON :
  157. ScaleARGBRowDownEven_Any_NEON;
  158. if (IS_ALIGNED(dst_width, 4)) {
  159. ScaleARGBRowDownEven = filtering ? ScaleARGBRowDownEvenBox_NEON :
  160. ScaleARGBRowDownEven_NEON;
  161. }
  162. }
  163. #endif
  164. if (filtering == kFilterLinear) {
  165. src_stride = 0;
  166. }
  167. for (j = 0; j < dst_height; ++j) {
  168. ScaleARGBRowDownEven(src_argb, src_stride, col_step, dst_argb, dst_width);
  169. src_argb += row_stride;
  170. dst_argb += dst_stride;
  171. }
  172. }
  173. // Scale ARGB down with bilinear interpolation.
  174. static void ScaleARGBBilinearDown(int src_width, int src_height,
  175. int dst_width, int dst_height,
  176. int src_stride, int dst_stride,
  177. const uint8* src_argb, uint8* dst_argb,
  178. int x, int dx, int y, int dy,
  179. enum FilterMode filtering) {
  180. int j;
  181. void (*InterpolateRow)(uint8* dst_argb, const uint8* src_argb,
  182. ptrdiff_t src_stride, int dst_width, int source_y_fraction) =
  183. InterpolateRow_C;
  184. void (*ScaleARGBFilterCols)(uint8* dst_argb, const uint8* src_argb,
  185. int dst_width, int x, int dx) =
  186. (src_width >= 32768) ? ScaleARGBFilterCols64_C : ScaleARGBFilterCols_C;
  187. int64 xlast = x + (int64)(dst_width - 1) * dx;
  188. int64 xl = (dx >= 0) ? x : xlast;
  189. int64 xr = (dx >= 0) ? xlast : x;
  190. int clip_src_width;
  191. xl = (xl >> 16) & ~3; // Left edge aligned.
  192. xr = (xr >> 16) + 1; // Right most pixel used. Bilinear uses 2 pixels.
  193. xr = (xr + 1 + 3) & ~3; // 1 beyond 4 pixel aligned right most pixel.
  194. if (xr > src_width) {
  195. xr = src_width;
  196. }
  197. clip_src_width = (int)(xr - xl) * 4; // Width aligned to 4.
  198. src_argb += xl * 4;
  199. x -= (int)(xl << 16);
  200. #if defined(HAS_INTERPOLATEROW_SSSE3)
  201. if (TestCpuFlag(kCpuHasSSSE3)) {
  202. InterpolateRow = InterpolateRow_Any_SSSE3;
  203. if (IS_ALIGNED(clip_src_width, 16)) {
  204. InterpolateRow = InterpolateRow_SSSE3;
  205. }
  206. }
  207. #endif
  208. #if defined(HAS_INTERPOLATEROW_AVX2)
  209. if (TestCpuFlag(kCpuHasAVX2)) {
  210. InterpolateRow = InterpolateRow_Any_AVX2;
  211. if (IS_ALIGNED(clip_src_width, 32)) {
  212. InterpolateRow = InterpolateRow_AVX2;
  213. }
  214. }
  215. #endif
  216. #if defined(HAS_INTERPOLATEROW_NEON)
  217. if (TestCpuFlag(kCpuHasNEON)) {
  218. InterpolateRow = InterpolateRow_Any_NEON;
  219. if (IS_ALIGNED(clip_src_width, 16)) {
  220. InterpolateRow = InterpolateRow_NEON;
  221. }
  222. }
  223. #endif
  224. #if defined(HAS_INTERPOLATEROW_DSPR2)
  225. if (TestCpuFlag(kCpuHasDSPR2) &&
  226. IS_ALIGNED(src_argb, 4) && IS_ALIGNED(src_stride, 4)) {
  227. InterpolateRow = InterpolateRow_Any_DSPR2;
  228. if (IS_ALIGNED(clip_src_width, 4)) {
  229. InterpolateRow = InterpolateRow_DSPR2;
  230. }
  231. }
  232. #endif
  233. #if defined(HAS_SCALEARGBFILTERCOLS_SSSE3)
  234. if (TestCpuFlag(kCpuHasSSSE3) && src_width < 32768) {
  235. ScaleARGBFilterCols = ScaleARGBFilterCols_SSSE3;
  236. }
  237. #endif
  238. #if defined(HAS_SCALEARGBFILTERCOLS_NEON)
  239. if (TestCpuFlag(kCpuHasNEON)) {
  240. ScaleARGBFilterCols = ScaleARGBFilterCols_Any_NEON;
  241. if (IS_ALIGNED(dst_width, 4)) {
  242. ScaleARGBFilterCols = ScaleARGBFilterCols_NEON;
  243. }
  244. }
  245. #endif
  246. // TODO(fbarchard): Consider not allocating row buffer for kFilterLinear.
  247. // Allocate a row of ARGB.
  248. {
  249. align_buffer_64(row, clip_src_width * 4);
  250. const int max_y = (src_height - 1) << 16;
  251. if (y > max_y) {
  252. y = max_y;
  253. }
  254. for (j = 0; j < dst_height; ++j) {
  255. int yi = y >> 16;
  256. const uint8* src = src_argb + yi * src_stride;
  257. if (filtering == kFilterLinear) {
  258. ScaleARGBFilterCols(dst_argb, src, dst_width, x, dx);
  259. } else {
  260. int yf = (y >> 8) & 255;
  261. InterpolateRow(row, src, src_stride, clip_src_width, yf);
  262. ScaleARGBFilterCols(dst_argb, row, dst_width, x, dx);
  263. }
  264. dst_argb += dst_stride;
  265. y += dy;
  266. if (y > max_y) {
  267. y = max_y;
  268. }
  269. }
  270. free_aligned_buffer_64(row);
  271. }
  272. }
  273. // Scale ARGB up with bilinear interpolation.
  274. static void ScaleARGBBilinearUp(int src_width, int src_height,
  275. int dst_width, int dst_height,
  276. int src_stride, int dst_stride,
  277. const uint8* src_argb, uint8* dst_argb,
  278. int x, int dx, int y, int dy,
  279. enum FilterMode filtering) {
  280. int j;
  281. void (*InterpolateRow)(uint8* dst_argb, const uint8* src_argb,
  282. ptrdiff_t src_stride, int dst_width, int source_y_fraction) =
  283. InterpolateRow_C;
  284. void (*ScaleARGBFilterCols)(uint8* dst_argb, const uint8* src_argb,
  285. int dst_width, int x, int dx) =
  286. filtering ? ScaleARGBFilterCols_C : ScaleARGBCols_C;
  287. const int max_y = (src_height - 1) << 16;
  288. #if defined(HAS_INTERPOLATEROW_SSSE3)
  289. if (TestCpuFlag(kCpuHasSSSE3)) {
  290. InterpolateRow = InterpolateRow_Any_SSSE3;
  291. if (IS_ALIGNED(dst_width, 4)) {
  292. InterpolateRow = InterpolateRow_SSSE3;
  293. }
  294. }
  295. #endif
  296. #if defined(HAS_INTERPOLATEROW_AVX2)
  297. if (TestCpuFlag(kCpuHasAVX2)) {
  298. InterpolateRow = InterpolateRow_Any_AVX2;
  299. if (IS_ALIGNED(dst_width, 8)) {
  300. InterpolateRow = InterpolateRow_AVX2;
  301. }
  302. }
  303. #endif
  304. #if defined(HAS_INTERPOLATEROW_NEON)
  305. if (TestCpuFlag(kCpuHasNEON)) {
  306. InterpolateRow = InterpolateRow_Any_NEON;
  307. if (IS_ALIGNED(dst_width, 4)) {
  308. InterpolateRow = InterpolateRow_NEON;
  309. }
  310. }
  311. #endif
  312. #if defined(HAS_INTERPOLATEROW_DSPR2)
  313. if (TestCpuFlag(kCpuHasDSPR2) &&
  314. IS_ALIGNED(dst_argb, 4) && IS_ALIGNED(dst_stride, 4)) {
  315. InterpolateRow = InterpolateRow_DSPR2;
  316. }
  317. #endif
  318. if (src_width >= 32768) {
  319. ScaleARGBFilterCols = filtering ?
  320. ScaleARGBFilterCols64_C : ScaleARGBCols64_C;
  321. }
  322. #if defined(HAS_SCALEARGBFILTERCOLS_SSSE3)
  323. if (filtering && TestCpuFlag(kCpuHasSSSE3) && src_width < 32768) {
  324. ScaleARGBFilterCols = ScaleARGBFilterCols_SSSE3;
  325. }
  326. #endif
  327. #if defined(HAS_SCALEARGBFILTERCOLS_NEON)
  328. if (filtering && TestCpuFlag(kCpuHasNEON)) {
  329. ScaleARGBFilterCols = ScaleARGBFilterCols_Any_NEON;
  330. if (IS_ALIGNED(dst_width, 4)) {
  331. ScaleARGBFilterCols = ScaleARGBFilterCols_NEON;
  332. }
  333. }
  334. #endif
  335. #if defined(HAS_SCALEARGBCOLS_SSE2)
  336. if (!filtering && TestCpuFlag(kCpuHasSSE2) && src_width < 32768) {
  337. ScaleARGBFilterCols = ScaleARGBCols_SSE2;
  338. }
  339. #endif
  340. #if defined(HAS_SCALEARGBCOLS_NEON)
  341. if (!filtering && TestCpuFlag(kCpuHasNEON)) {
  342. ScaleARGBFilterCols = ScaleARGBCols_Any_NEON;
  343. if (IS_ALIGNED(dst_width, 8)) {
  344. ScaleARGBFilterCols = ScaleARGBCols_NEON;
  345. }
  346. }
  347. #endif
  348. if (!filtering && src_width * 2 == dst_width && x < 0x8000) {
  349. ScaleARGBFilterCols = ScaleARGBColsUp2_C;
  350. #if defined(HAS_SCALEARGBCOLSUP2_SSE2)
  351. if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(dst_width, 8)) {
  352. ScaleARGBFilterCols = ScaleARGBColsUp2_SSE2;
  353. }
  354. #endif
  355. }
  356. if (y > max_y) {
  357. y = max_y;
  358. }
  359. {
  360. int yi = y >> 16;
  361. const uint8* src = src_argb + yi * src_stride;
  362. // Allocate 2 rows of ARGB.
  363. const int kRowSize = (dst_width * 4 + 31) & ~31;
  364. align_buffer_64(row, kRowSize * 2);
  365. uint8* rowptr = row;
  366. int rowstride = kRowSize;
  367. int lasty = yi;
  368. ScaleARGBFilterCols(rowptr, src, dst_width, x, dx);
  369. if (src_height > 1) {
  370. src += src_stride;
  371. }
  372. ScaleARGBFilterCols(rowptr + rowstride, src, dst_width, x, dx);
  373. src += src_stride;
  374. for (j = 0; j < dst_height; ++j) {
  375. yi = y >> 16;
  376. if (yi != lasty) {
  377. if (y > max_y) {
  378. y = max_y;
  379. yi = y >> 16;
  380. src = src_argb + yi * src_stride;
  381. }
  382. if (yi != lasty) {
  383. ScaleARGBFilterCols(rowptr, src, dst_width, x, dx);
  384. rowptr += rowstride;
  385. rowstride = -rowstride;
  386. lasty = yi;
  387. src += src_stride;
  388. }
  389. }
  390. if (filtering == kFilterLinear) {
  391. InterpolateRow(dst_argb, rowptr, 0, dst_width * 4, 0);
  392. } else {
  393. int yf = (y >> 8) & 255;
  394. InterpolateRow(dst_argb, rowptr, rowstride, dst_width * 4, yf);
  395. }
  396. dst_argb += dst_stride;
  397. y += dy;
  398. }
  399. free_aligned_buffer_64(row);
  400. }
  401. }
  402. #ifdef YUVSCALEUP
  403. // Scale YUV to ARGB up with bilinear interpolation.
  404. static void ScaleYUVToARGBBilinearUp(int src_width, int src_height,
  405. int dst_width, int dst_height,
  406. int src_stride_y,
  407. int src_stride_u,
  408. int src_stride_v,
  409. int dst_stride_argb,
  410. const uint8* src_y,
  411. const uint8* src_u,
  412. const uint8* src_v,
  413. uint8* dst_argb,
  414. int x, int dx, int y, int dy,
  415. enum FilterMode filtering) {
  416. int j;
  417. void (*I422ToARGBRow)(const uint8* y_buf,
  418. const uint8* u_buf,
  419. const uint8* v_buf,
  420. uint8* rgb_buf,
  421. int width) = I422ToARGBRow_C;
  422. #if defined(HAS_I422TOARGBROW_SSSE3)
  423. if (TestCpuFlag(kCpuHasSSSE3)) {
  424. I422ToARGBRow = I422ToARGBRow_Any_SSSE3;
  425. if (IS_ALIGNED(src_width, 8)) {
  426. I422ToARGBRow = I422ToARGBRow_SSSE3;
  427. }
  428. }
  429. #endif
  430. #if defined(HAS_I422TOARGBROW_AVX2)
  431. if (TestCpuFlag(kCpuHasAVX2)) {
  432. I422ToARGBRow = I422ToARGBRow_Any_AVX2;
  433. if (IS_ALIGNED(src_width, 16)) {
  434. I422ToARGBRow = I422ToARGBRow_AVX2;
  435. }
  436. }
  437. #endif
  438. #if defined(HAS_I422TOARGBROW_NEON)
  439. if (TestCpuFlag(kCpuHasNEON)) {
  440. I422ToARGBRow = I422ToARGBRow_Any_NEON;
  441. if (IS_ALIGNED(src_width, 8)) {
  442. I422ToARGBRow = I422ToARGBRow_NEON;
  443. }
  444. }
  445. #endif
  446. #if defined(HAS_I422TOARGBROW_DSPR2)
  447. if (TestCpuFlag(kCpuHasDSPR2) && IS_ALIGNED(src_width, 4) &&
  448. IS_ALIGNED(src_y, 4) && IS_ALIGNED(src_stride_y, 4) &&
  449. IS_ALIGNED(src_u, 2) && IS_ALIGNED(src_stride_u, 2) &&
  450. IS_ALIGNED(src_v, 2) && IS_ALIGNED(src_stride_v, 2) &&
  451. IS_ALIGNED(dst_argb, 4) && IS_ALIGNED(dst_stride_argb, 4)) {
  452. I422ToARGBRow = I422ToARGBRow_DSPR2;
  453. }
  454. #endif
  455. void (*InterpolateRow)(uint8* dst_argb, const uint8* src_argb,
  456. ptrdiff_t src_stride, int dst_width, int source_y_fraction) =
  457. InterpolateRow_C;
  458. #if defined(HAS_INTERPOLATEROW_SSSE3)
  459. if (TestCpuFlag(kCpuHasSSSE3)) {
  460. InterpolateRow = InterpolateRow_Any_SSSE3;
  461. if (IS_ALIGNED(dst_width, 4)) {
  462. InterpolateRow = InterpolateRow_SSSE3;
  463. }
  464. }
  465. #endif
  466. #if defined(HAS_INTERPOLATEROW_AVX2)
  467. if (TestCpuFlag(kCpuHasAVX2)) {
  468. InterpolateRow = InterpolateRow_Any_AVX2;
  469. if (IS_ALIGNED(dst_width, 8)) {
  470. InterpolateRow = InterpolateRow_AVX2;
  471. }
  472. }
  473. #endif
  474. #if defined(HAS_INTERPOLATEROW_NEON)
  475. if (TestCpuFlag(kCpuHasNEON)) {
  476. InterpolateRow = InterpolateRow_Any_NEON;
  477. if (IS_ALIGNED(dst_width, 4)) {
  478. InterpolateRow = InterpolateRow_NEON;
  479. }
  480. }
  481. #endif
  482. #if defined(HAS_INTERPOLATEROW_DSPR2)
  483. if (TestCpuFlag(kCpuHasDSPR2) &&
  484. IS_ALIGNED(dst_argb, 4) && IS_ALIGNED(dst_stride_argb, 4)) {
  485. InterpolateRow = InterpolateRow_DSPR2;
  486. }
  487. #endif
  488. void (*ScaleARGBFilterCols)(uint8* dst_argb, const uint8* src_argb,
  489. int dst_width, int x, int dx) =
  490. filtering ? ScaleARGBFilterCols_C : ScaleARGBCols_C;
  491. if (src_width >= 32768) {
  492. ScaleARGBFilterCols = filtering ?
  493. ScaleARGBFilterCols64_C : ScaleARGBCols64_C;
  494. }
  495. #if defined(HAS_SCALEARGBFILTERCOLS_SSSE3)
  496. if (filtering && TestCpuFlag(kCpuHasSSSE3) && src_width < 32768) {
  497. ScaleARGBFilterCols = ScaleARGBFilterCols_SSSE3;
  498. }
  499. #endif
  500. #if defined(HAS_SCALEARGBFILTERCOLS_NEON)
  501. if (filtering && TestCpuFlag(kCpuHasNEON)) {
  502. ScaleARGBFilterCols = ScaleARGBFilterCols_Any_NEON;
  503. if (IS_ALIGNED(dst_width, 4)) {
  504. ScaleARGBFilterCols = ScaleARGBFilterCols_NEON;
  505. }
  506. }
  507. #endif
  508. #if defined(HAS_SCALEARGBCOLS_SSE2)
  509. if (!filtering && TestCpuFlag(kCpuHasSSE2) && src_width < 32768) {
  510. ScaleARGBFilterCols = ScaleARGBCols_SSE2;
  511. }
  512. #endif
  513. #if defined(HAS_SCALEARGBCOLS_NEON)
  514. if (!filtering && TestCpuFlag(kCpuHasNEON)) {
  515. ScaleARGBFilterCols = ScaleARGBCols_Any_NEON;
  516. if (IS_ALIGNED(dst_width, 8)) {
  517. ScaleARGBFilterCols = ScaleARGBCols_NEON;
  518. }
  519. }
  520. #endif
  521. if (!filtering && src_width * 2 == dst_width && x < 0x8000) {
  522. ScaleARGBFilterCols = ScaleARGBColsUp2_C;
  523. #if defined(HAS_SCALEARGBCOLSUP2_SSE2)
  524. if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(dst_width, 8)) {
  525. ScaleARGBFilterCols = ScaleARGBColsUp2_SSE2;
  526. }
  527. #endif
  528. }
  529. const int max_y = (src_height - 1) << 16;
  530. if (y > max_y) {
  531. y = max_y;
  532. }
  533. const int kYShift = 1; // Shift Y by 1 to convert Y plane to UV coordinate.
  534. int yi = y >> 16;
  535. int uv_yi = yi >> kYShift;
  536. const uint8* src_row_y = src_y + yi * src_stride_y;
  537. const uint8* src_row_u = src_u + uv_yi * src_stride_u;
  538. const uint8* src_row_v = src_v + uv_yi * src_stride_v;
  539. // Allocate 2 rows of ARGB.
  540. const int kRowSize = (dst_width * 4 + 31) & ~31;
  541. align_buffer_64(row, kRowSize * 2);
  542. // Allocate 1 row of ARGB for source conversion.
  543. align_buffer_64(argb_row, src_width * 4);
  544. uint8* rowptr = row;
  545. int rowstride = kRowSize;
  546. int lasty = yi;
  547. // TODO(fbarchard): Convert first 2 rows of YUV to ARGB.
  548. ScaleARGBFilterCols(rowptr, src_row_y, dst_width, x, dx);
  549. if (src_height > 1) {
  550. src_row_y += src_stride_y;
  551. if (yi & 1) {
  552. src_row_u += src_stride_u;
  553. src_row_v += src_stride_v;
  554. }
  555. }
  556. ScaleARGBFilterCols(rowptr + rowstride, src_row_y, dst_width, x, dx);
  557. if (src_height > 2) {
  558. src_row_y += src_stride_y;
  559. if (!(yi & 1)) {
  560. src_row_u += src_stride_u;
  561. src_row_v += src_stride_v;
  562. }
  563. }
  564. for (j = 0; j < dst_height; ++j) {
  565. yi = y >> 16;
  566. if (yi != lasty) {
  567. if (y > max_y) {
  568. y = max_y;
  569. yi = y >> 16;
  570. uv_yi = yi >> kYShift;
  571. src_row_y = src_y + yi * src_stride_y;
  572. src_row_u = src_u + uv_yi * src_stride_u;
  573. src_row_v = src_v + uv_yi * src_stride_v;
  574. }
  575. if (yi != lasty) {
  576. // TODO(fbarchard): Convert the clipped region of row.
  577. I422ToARGBRow(src_row_y, src_row_u, src_row_v, argb_row, src_width);
  578. ScaleARGBFilterCols(rowptr, argb_row, dst_width, x, dx);
  579. rowptr += rowstride;
  580. rowstride = -rowstride;
  581. lasty = yi;
  582. src_row_y += src_stride_y;
  583. if (yi & 1) {
  584. src_row_u += src_stride_u;
  585. src_row_v += src_stride_v;
  586. }
  587. }
  588. }
  589. if (filtering == kFilterLinear) {
  590. InterpolateRow(dst_argb, rowptr, 0, dst_width * 4, 0);
  591. } else {
  592. int yf = (y >> 8) & 255;
  593. InterpolateRow(dst_argb, rowptr, rowstride, dst_width * 4, yf);
  594. }
  595. dst_argb += dst_stride_argb;
  596. y += dy;
  597. }
  598. free_aligned_buffer_64(row);
  599. free_aligned_buffer_64(row_argb);
  600. }
  601. #endif
  602. // Scale ARGB to/from any dimensions, without interpolation.
  603. // Fixed point math is used for performance: The upper 16 bits
  604. // of x and dx is the integer part of the source position and
  605. // the lower 16 bits are the fixed decimal part.
  606. static void ScaleARGBSimple(int src_width, int src_height,
  607. int dst_width, int dst_height,
  608. int src_stride, int dst_stride,
  609. const uint8* src_argb, uint8* dst_argb,
  610. int x, int dx, int y, int dy) {
  611. int j;
  612. void (*ScaleARGBCols)(uint8* dst_argb, const uint8* src_argb,
  613. int dst_width, int x, int dx) =
  614. (src_width >= 32768) ? ScaleARGBCols64_C : ScaleARGBCols_C;
  615. #if defined(HAS_SCALEARGBCOLS_SSE2)
  616. if (TestCpuFlag(kCpuHasSSE2) && src_width < 32768) {
  617. ScaleARGBCols = ScaleARGBCols_SSE2;
  618. }
  619. #endif
  620. #if defined(HAS_SCALEARGBCOLS_NEON)
  621. if (TestCpuFlag(kCpuHasNEON)) {
  622. ScaleARGBCols = ScaleARGBCols_Any_NEON;
  623. if (IS_ALIGNED(dst_width, 8)) {
  624. ScaleARGBCols = ScaleARGBCols_NEON;
  625. }
  626. }
  627. #endif
  628. if (src_width * 2 == dst_width && x < 0x8000) {
  629. ScaleARGBCols = ScaleARGBColsUp2_C;
  630. #if defined(HAS_SCALEARGBCOLSUP2_SSE2)
  631. if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(dst_width, 8)) {
  632. ScaleARGBCols = ScaleARGBColsUp2_SSE2;
  633. }
  634. #endif
  635. }
  636. for (j = 0; j < dst_height; ++j) {
  637. ScaleARGBCols(dst_argb, src_argb + (y >> 16) * src_stride,
  638. dst_width, x, dx);
  639. dst_argb += dst_stride;
  640. y += dy;
  641. }
  642. }
  643. // ScaleARGB a ARGB.
  644. // This function in turn calls a scaling function
  645. // suitable for handling the desired resolutions.
  646. static void ScaleARGB(const uint8* src, int src_stride,
  647. int src_width, int src_height,
  648. uint8* dst, int dst_stride,
  649. int dst_width, int dst_height,
  650. int clip_x, int clip_y, int clip_width, int clip_height,
  651. enum FilterMode filtering) {
  652. // Initial source x/y coordinate and step values as 16.16 fixed point.
  653. int x = 0;
  654. int y = 0;
  655. int dx = 0;
  656. int dy = 0;
  657. // ARGB does not support box filter yet, but allow the user to pass it.
  658. // Simplify filtering when possible.
  659. filtering = ScaleFilterReduce(src_width, src_height,
  660. dst_width, dst_height,
  661. filtering);
  662. // Negative src_height means invert the image.
  663. if (src_height < 0) {
  664. src_height = -src_height;
  665. src = src + (src_height - 1) * src_stride;
  666. src_stride = -src_stride;
  667. }
  668. ScaleSlope(src_width, src_height, dst_width, dst_height, filtering,
  669. &x, &y, &dx, &dy);
  670. src_width = Abs(src_width);
  671. if (clip_x) {
  672. int64 clipf = (int64)(clip_x) * dx;
  673. x += (clipf & 0xffff);
  674. src += (clipf >> 16) * 4;
  675. dst += clip_x * 4;
  676. }
  677. if (clip_y) {
  678. int64 clipf = (int64)(clip_y) * dy;
  679. y += (clipf & 0xffff);
  680. src += (clipf >> 16) * src_stride;
  681. dst += clip_y * dst_stride;
  682. }
  683. // Special case for integer step values.
  684. if (((dx | dy) & 0xffff) == 0) {
  685. if (!dx || !dy) { // 1 pixel wide and/or tall.
  686. filtering = kFilterNone;
  687. } else {
  688. // Optimized even scale down. ie 2, 4, 6, 8, 10x.
  689. if (!(dx & 0x10000) && !(dy & 0x10000)) {
  690. if (dx == 0x20000) {
  691. // Optimized 1/2 downsample.
  692. ScaleARGBDown2(src_width, src_height,
  693. clip_width, clip_height,
  694. src_stride, dst_stride, src, dst,
  695. x, dx, y, dy, filtering);
  696. return;
  697. }
  698. if (dx == 0x40000 && filtering == kFilterBox) {
  699. // Optimized 1/4 box downsample.
  700. ScaleARGBDown4Box(src_width, src_height,
  701. clip_width, clip_height,
  702. src_stride, dst_stride, src, dst,
  703. x, dx, y, dy);
  704. return;
  705. }
  706. ScaleARGBDownEven(src_width, src_height,
  707. clip_width, clip_height,
  708. src_stride, dst_stride, src, dst,
  709. x, dx, y, dy, filtering);
  710. return;
  711. }
  712. // Optimized odd scale down. ie 3, 5, 7, 9x.
  713. if ((dx & 0x10000) && (dy & 0x10000)) {
  714. filtering = kFilterNone;
  715. if (dx == 0x10000 && dy == 0x10000) {
  716. // Straight copy.
  717. ARGBCopy(src + (y >> 16) * src_stride + (x >> 16) * 4, src_stride,
  718. dst, dst_stride, clip_width, clip_height);
  719. return;
  720. }
  721. }
  722. }
  723. }
  724. if (dx == 0x10000 && (x & 0xffff) == 0) {
  725. // Arbitrary scale vertically, but unscaled vertically.
  726. ScalePlaneVertical(src_height,
  727. clip_width, clip_height,
  728. src_stride, dst_stride, src, dst,
  729. x, y, dy, 4, filtering);
  730. return;
  731. }
  732. if (filtering && dy < 65536) {
  733. ScaleARGBBilinearUp(src_width, src_height,
  734. clip_width, clip_height,
  735. src_stride, dst_stride, src, dst,
  736. x, dx, y, dy, filtering);
  737. return;
  738. }
  739. if (filtering) {
  740. ScaleARGBBilinearDown(src_width, src_height,
  741. clip_width, clip_height,
  742. src_stride, dst_stride, src, dst,
  743. x, dx, y, dy, filtering);
  744. return;
  745. }
  746. ScaleARGBSimple(src_width, src_height, clip_width, clip_height,
  747. src_stride, dst_stride, src, dst,
  748. x, dx, y, dy);
  749. }
  750. LIBYUV_API
  751. int ARGBScaleClip(const uint8* src_argb, int src_stride_argb,
  752. int src_width, int src_height,
  753. uint8* dst_argb, int dst_stride_argb,
  754. int dst_width, int dst_height,
  755. int clip_x, int clip_y, int clip_width, int clip_height,
  756. enum FilterMode filtering) {
  757. if (!src_argb || src_width == 0 || src_height == 0 ||
  758. !dst_argb || dst_width <= 0 || dst_height <= 0 ||
  759. clip_x < 0 || clip_y < 0 ||
  760. clip_width > 32768 || clip_height > 32768 ||
  761. (clip_x + clip_width) > dst_width ||
  762. (clip_y + clip_height) > dst_height) {
  763. return -1;
  764. }
  765. ScaleARGB(src_argb, src_stride_argb, src_width, src_height,
  766. dst_argb, dst_stride_argb, dst_width, dst_height,
  767. clip_x, clip_y, clip_width, clip_height, filtering);
  768. return 0;
  769. }
  770. // Scale an ARGB image.
  771. LIBYUV_API
  772. int ARGBScale(const uint8* src_argb, int src_stride_argb,
  773. int src_width, int src_height,
  774. uint8* dst_argb, int dst_stride_argb,
  775. int dst_width, int dst_height,
  776. enum FilterMode filtering) {
  777. if (!src_argb || src_width == 0 || src_height == 0 ||
  778. src_width > 32768 || src_height > 32768 ||
  779. !dst_argb || dst_width <= 0 || dst_height <= 0) {
  780. return -1;
  781. }
  782. ScaleARGB(src_argb, src_stride_argb, src_width, src_height,
  783. dst_argb, dst_stride_argb, dst_width, dst_height,
  784. 0, 0, dst_width, dst_height, filtering);
  785. return 0;
  786. }
  787. // Scale with YUV conversion to ARGB and clipping.
  788. LIBYUV_API
  789. int YUVToARGBScaleClip(const uint8* src_y, int src_stride_y,
  790. const uint8* src_u, int src_stride_u,
  791. const uint8* src_v, int src_stride_v,
  792. uint32 src_fourcc,
  793. int src_width, int src_height,
  794. uint8* dst_argb, int dst_stride_argb,
  795. uint32 dst_fourcc,
  796. int dst_width, int dst_height,
  797. int clip_x, int clip_y, int clip_width, int clip_height,
  798. enum FilterMode filtering) {
  799. uint8* argb_buffer = (uint8*)malloc(src_width * src_height * 4);
  800. int r;
  801. I420ToARGB(src_y, src_stride_y,
  802. src_u, src_stride_u,
  803. src_v, src_stride_v,
  804. argb_buffer, src_width * 4,
  805. src_width, src_height);
  806. r = ARGBScaleClip(argb_buffer, src_width * 4,
  807. src_width, src_height,
  808. dst_argb, dst_stride_argb,
  809. dst_width, dst_height,
  810. clip_x, clip_y, clip_width, clip_height,
  811. filtering);
  812. free(argb_buffer);
  813. return r;
  814. }
  815. #ifdef __cplusplus
  816. } // extern "C"
  817. } // namespace libyuv
  818. #endif