ssse3-scaler.c 17 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576
  1. /*
  2. * Copyright © 2013 Soren Sandmann Pedersen
  3. * Copyright © 2013 Red Hat, Inc.
  4. * Copyright © 2016 Mozilla Foundation
  5. *
  6. * Permission is hereby granted, free of charge, to any person obtaining a
  7. * copy of this software and associated documentation files (the "Software"),
  8. * to deal in the Software without restriction, including without limitation
  9. * the rights to use, copy, modify, merge, publish, distribute, sublicense,
  10. * and/or sell copies of the Software, and to permit persons to whom the
  11. * Software is furnished to do so, subject to the following conditions:
  12. *
  13. * The above copyright notice and this permission notice (including the next
  14. * paragraph) shall be included in all copies or substantial portions of the
  15. * Software.
  16. *
  17. * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  18. * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  19. * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
  20. * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  21. * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
  22. * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
  23. * DEALINGS IN THE SOFTWARE.
  24. *
  25. * Author: Soren Sandmann (soren.sandmann@gmail.com)
  26. * Jeff Muizelaar (jmuizelaar@mozilla.com)
  27. */
  28. /* This has been adapted from the ssse3 code from pixman. It's currently
  29. * a mess as I want to try it out in practice before finalizing the details.
  30. */
  31. #include <stdlib.h>
  32. #include <mmintrin.h>
  33. #include <xmmintrin.h>
  34. #include <emmintrin.h>
  35. #include <tmmintrin.h>
  36. #include <stdint.h>
  37. #include <assert.h>
  38. #include "ssse3-scaler.h"
  39. typedef int32_t pixman_fixed_16_16_t;
  40. typedef pixman_fixed_16_16_t pixman_fixed_t;
  41. #define pixman_fixed_1 (pixman_int_to_fixed(1))
  42. #define pixman_fixed_to_int(f) ((int) ((f) >> 16))
  43. #define pixman_int_to_fixed(i) ((pixman_fixed_t) ((i) << 16))
  44. #define pixman_double_to_fixed(d) ((pixman_fixed_t) ((d) * 65536.0))
  45. #define PIXMAN_FIXED_INT_MAX 32767
  46. #define PIXMAN_FIXED_INT_MIN -32768
  47. typedef struct pixman_vector pixman_vector_t;
  48. typedef int pixman_bool_t;
  49. typedef int64_t pixman_fixed_32_32_t;
  50. typedef pixman_fixed_32_32_t pixman_fixed_48_16_t;
  51. typedef struct { pixman_fixed_48_16_t v[3]; } pixman_vector_48_16_t;
  52. struct pixman_vector
  53. {
  54. pixman_fixed_t vector[3];
  55. };
  56. typedef struct pixman_transform pixman_transform_t;
  57. struct pixman_transform
  58. {
  59. pixman_fixed_t matrix[3][3];
  60. };
  61. #ifdef _MSC_VER
  62. #define force_inline __forceinline
  63. #else
  64. #define force_inline __inline__ __attribute__((always_inline))
  65. #endif
  66. #define BILINEAR_INTERPOLATION_BITS 6
  67. static force_inline int
  68. pixman_fixed_to_bilinear_weight (pixman_fixed_t x)
  69. {
  70. return (x >> (16 - BILINEAR_INTERPOLATION_BITS)) &
  71. ((1 << BILINEAR_INTERPOLATION_BITS) - 1);
  72. }
  73. static void
  74. pixman_transform_point_31_16_3d (const pixman_transform_t *t,
  75. const pixman_vector_48_16_t *v,
  76. pixman_vector_48_16_t *result)
  77. {
  78. int i;
  79. int64_t tmp[3][2];
  80. /* input vector values must have no more than 31 bits (including sign)
  81. * in the integer part */
  82. assert (v->v[0] < ((pixman_fixed_48_16_t)1 << (30 + 16)));
  83. assert (v->v[0] >= -((pixman_fixed_48_16_t)1 << (30 + 16)));
  84. assert (v->v[1] < ((pixman_fixed_48_16_t)1 << (30 + 16)));
  85. assert (v->v[1] >= -((pixman_fixed_48_16_t)1 << (30 + 16)));
  86. assert (v->v[2] < ((pixman_fixed_48_16_t)1 << (30 + 16)));
  87. assert (v->v[2] >= -((pixman_fixed_48_16_t)1 << (30 + 16)));
  88. for (i = 0; i < 3; i++)
  89. {
  90. tmp[i][0] = (int64_t)t->matrix[i][0] * (v->v[0] >> 16);
  91. tmp[i][1] = (int64_t)t->matrix[i][0] * (v->v[0] & 0xFFFF);
  92. tmp[i][0] += (int64_t)t->matrix[i][1] * (v->v[1] >> 16);
  93. tmp[i][1] += (int64_t)t->matrix[i][1] * (v->v[1] & 0xFFFF);
  94. tmp[i][0] += (int64_t)t->matrix[i][2] * (v->v[2] >> 16);
  95. tmp[i][1] += (int64_t)t->matrix[i][2] * (v->v[2] & 0xFFFF);
  96. }
  97. result->v[0] = tmp[0][0] + ((tmp[0][1] + 0x8000) >> 16);
  98. result->v[1] = tmp[1][0] + ((tmp[1][1] + 0x8000) >> 16);
  99. result->v[2] = tmp[2][0] + ((tmp[2][1] + 0x8000) >> 16);
  100. }
  101. static pixman_bool_t
  102. pixman_transform_point_3d (const struct pixman_transform *transform,
  103. struct pixman_vector * vector)
  104. {
  105. pixman_vector_48_16_t tmp;
  106. tmp.v[0] = vector->vector[0];
  107. tmp.v[1] = vector->vector[1];
  108. tmp.v[2] = vector->vector[2];
  109. pixman_transform_point_31_16_3d (transform, &tmp, &tmp);
  110. vector->vector[0] = tmp.v[0];
  111. vector->vector[1] = tmp.v[1];
  112. vector->vector[2] = tmp.v[2];
  113. return vector->vector[0] == tmp.v[0] &&
  114. vector->vector[1] == tmp.v[1] &&
  115. vector->vector[2] == tmp.v[2];
  116. }
  117. struct bits_image_t
  118. {
  119. uint32_t * bits;
  120. int rowstride;
  121. pixman_transform_t *transform;
  122. };
  123. typedef struct bits_image_t bits_image_t;
  124. typedef struct {
  125. int unused;
  126. } pixman_iter_info_t;
  127. typedef struct pixman_iter_t pixman_iter_t;
  128. typedef void (* pixman_iter_fini_t) (pixman_iter_t *iter);
  129. struct pixman_iter_t
  130. {
  131. int x, y;
  132. pixman_iter_fini_t fini;
  133. bits_image_t *image;
  134. uint32_t * buffer;
  135. int width;
  136. int height;
  137. void * data;
  138. };
  139. typedef struct
  140. {
  141. int y;
  142. uint64_t * buffer;
  143. } line_t;
  144. typedef struct
  145. {
  146. line_t lines[2];
  147. pixman_fixed_t y;
  148. pixman_fixed_t x;
  149. uint64_t data[1];
  150. } bilinear_info_t;
  151. static void
  152. ssse3_fetch_horizontal (bits_image_t *image, line_t *line,
  153. int y, pixman_fixed_t x, pixman_fixed_t ux, int n)
  154. {
  155. uint32_t *bits = image->bits + y * image->rowstride;
  156. __m128i vx = _mm_set_epi16 (
  157. - (x + 1), x, - (x + 1), x,
  158. - (x + ux + 1), x + ux, - (x + ux + 1), x + ux);
  159. __m128i vux = _mm_set_epi16 (
  160. - 2 * ux, 2 * ux, - 2 * ux, 2 * ux,
  161. - 2 * ux, 2 * ux, - 2 * ux, 2 * ux);
  162. __m128i vaddc = _mm_set_epi16 (1, 0, 1, 0, 1, 0, 1, 0);
  163. __m128i *b = (__m128i *)line->buffer;
  164. __m128i vrl0, vrl1;
  165. while ((n -= 2) >= 0)
  166. {
  167. __m128i vw, vr, s;
  168. #ifdef HACKY_PADDING
  169. if (pixman_fixed_to_int(x + ux) >= image->rowstride) {
  170. vrl1 = _mm_setzero_si128();
  171. printf("overread 2loop\n");
  172. } else {
  173. if (pixman_fixed_to_int(x + ux) < 0)
  174. printf("underflow\n");
  175. vrl1 = _mm_loadl_epi64(
  176. (__m128i *)(bits + (pixman_fixed_to_int(x + ux) < 0 ? 0 : pixman_fixed_to_int(x + ux))));
  177. }
  178. #else
  179. vrl1 = _mm_loadl_epi64(
  180. (__m128i *)(bits + pixman_fixed_to_int(x + ux)));
  181. #endif
  182. /* vrl1: R1, L1 */
  183. final_pixel:
  184. #ifdef HACKY_PADDING
  185. vrl0 = _mm_loadl_epi64 (
  186. (__m128i *)(bits + (pixman_fixed_to_int (x) < 0 ? 0 : pixman_fixed_to_int (x))));
  187. #else
  188. vrl0 = _mm_loadl_epi64 (
  189. (__m128i *)(bits + pixman_fixed_to_int (x)));
  190. #endif
  191. /* vrl0: R0, L0 */
  192. /* The weights are based on vx which is a vector of
  193. *
  194. * - (x + 1), x, - (x + 1), x,
  195. * - (x + ux + 1), x + ux, - (x + ux + 1), x + ux
  196. *
  197. * so the 16 bit weights end up like this:
  198. *
  199. * iw0, w0, iw0, w0, iw1, w1, iw1, w1
  200. *
  201. * and after shifting and packing, we get these bytes:
  202. *
  203. * iw0, w0, iw0, w0, iw1, w1, iw1, w1,
  204. * iw0, w0, iw0, w0, iw1, w1, iw1, w1,
  205. *
  206. * which means the first and the second input pixel
  207. * have to be interleaved like this:
  208. *
  209. * la0, ra0, lr0, rr0, la1, ra1, lr1, rr1,
  210. * lg0, rg0, lb0, rb0, lg1, rg1, lb1, rb1
  211. *
  212. * before maddubsw can be used.
  213. */
  214. vw = _mm_add_epi16 (
  215. vaddc, _mm_srli_epi16 (vx, 16 - BILINEAR_INTERPOLATION_BITS));
  216. /* vw: iw0, w0, iw0, w0, iw1, w1, iw1, w1
  217. */
  218. vw = _mm_packus_epi16 (vw, vw);
  219. /* vw: iw0, w0, iw0, w0, iw1, w1, iw1, w1,
  220. * iw0, w0, iw0, w0, iw1, w1, iw1, w1
  221. */
  222. vx = _mm_add_epi16 (vx, vux);
  223. x += 2 * ux;
  224. vr = _mm_unpacklo_epi16 (vrl1, vrl0);
  225. /* vr: rar0, rar1, rgb0, rgb1, lar0, lar1, lgb0, lgb1 */
  226. s = _mm_shuffle_epi32 (vr, _MM_SHUFFLE (1, 0, 3, 2));
  227. /* s: lar0, lar1, lgb0, lgb1, rar0, rar1, rgb0, rgb1 */
  228. vr = _mm_unpackhi_epi8 (vr, s);
  229. /* vr: la0, ra0, lr0, rr0, la1, ra1, lr1, rr1,
  230. * lg0, rg0, lb0, rb0, lg1, rg1, lb1, rb1
  231. */
  232. vr = _mm_maddubs_epi16 (vr, vw);
  233. /* When the weight is 0, the inverse weight is
  234. * 128 which can't be represented in a signed byte.
  235. * As a result maddubsw computes the following:
  236. *
  237. * r = l * -128 + r * 0
  238. *
  239. * rather than the desired
  240. *
  241. * r = l * 128 + r * 0
  242. *
  243. * We fix this by taking the absolute value of the
  244. * result.
  245. */
  246. // we can drop this if we use lower precision
  247. vr = _mm_shuffle_epi32 (vr, _MM_SHUFFLE (2, 0, 3, 1));
  248. /* vr: A0, R0, A1, R1, G0, B0, G1, B1 */
  249. _mm_store_si128 (b++, vr);
  250. }
  251. if (n == -1)
  252. {
  253. vrl1 = _mm_setzero_si128();
  254. goto final_pixel;
  255. }
  256. line->y = y;
  257. }
  258. // scale a line of destination pixels
  259. static uint32_t *
  260. ssse3_fetch_bilinear_cover (pixman_iter_t *iter, const uint32_t *mask)
  261. {
  262. pixman_fixed_t fx, ux;
  263. bilinear_info_t *info = iter->data;
  264. line_t *line0, *line1;
  265. int y0, y1;
  266. int32_t dist_y;
  267. __m128i vw, uvw;
  268. int i;
  269. fx = info->x;
  270. ux = iter->image->transform->matrix[0][0];
  271. y0 = pixman_fixed_to_int (info->y);
  272. if (y0 < 0)
  273. *(volatile char*)0 = 9;
  274. y1 = y0 + 1;
  275. // clamping in y direction
  276. if (y1 >= iter->height) {
  277. y1 = iter->height - 1;
  278. }
  279. line0 = &info->lines[y0 & 0x01];
  280. line1 = &info->lines[y1 & 0x01];
  281. if (line0->y != y0)
  282. {
  283. ssse3_fetch_horizontal (
  284. iter->image, line0, y0, fx, ux, iter->width);
  285. }
  286. if (line1->y != y1)
  287. {
  288. ssse3_fetch_horizontal (
  289. iter->image, line1, y1, fx, ux, iter->width);
  290. }
  291. #ifdef PIXMAN_STYLE_INTERPOLATION
  292. dist_y = pixman_fixed_to_bilinear_weight (info->y);
  293. dist_y <<= (16 - BILINEAR_INTERPOLATION_BITS);
  294. vw = _mm_set_epi16 (
  295. dist_y, dist_y, dist_y, dist_y, dist_y, dist_y, dist_y, dist_y);
  296. #else
  297. // setup the weights for the top (vw) and bottom (uvw) lines
  298. dist_y = pixman_fixed_to_bilinear_weight (info->y);
  299. // we use 15 instead of 16 because we need an extra bit to handle when the weights are 0 and 1
  300. dist_y <<= (15 - BILINEAR_INTERPOLATION_BITS);
  301. vw = _mm_set_epi16 (
  302. dist_y, dist_y, dist_y, dist_y, dist_y, dist_y, dist_y, dist_y);
  303. dist_y = (1 << BILINEAR_INTERPOLATION_BITS) - pixman_fixed_to_bilinear_weight (info->y);
  304. dist_y <<= (15 - BILINEAR_INTERPOLATION_BITS);
  305. uvw = _mm_set_epi16 (
  306. dist_y, dist_y, dist_y, dist_y, dist_y, dist_y, dist_y, dist_y);
  307. #endif
  308. for (i = 0; i + 3 < iter->width; i += 4)
  309. {
  310. __m128i top0 = _mm_load_si128 ((__m128i *)(line0->buffer + i));
  311. __m128i bot0 = _mm_load_si128 ((__m128i *)(line1->buffer + i));
  312. __m128i top1 = _mm_load_si128 ((__m128i *)(line0->buffer + i + 2));
  313. __m128i bot1 = _mm_load_si128 ((__m128i *)(line1->buffer + i + 2));
  314. #ifdef PIXMAN_STYLE_INTERPOLATION
  315. __m128i r0, r1, tmp, p;
  316. r0 = _mm_mulhi_epu16 (
  317. _mm_sub_epi16 (bot0, top0), vw);
  318. tmp = _mm_cmplt_epi16 (bot0, top0);
  319. tmp = _mm_and_si128 (tmp, vw);
  320. r0 = _mm_sub_epi16 (r0, tmp);
  321. r0 = _mm_add_epi16 (r0, top0);
  322. r0 = _mm_srli_epi16 (r0, BILINEAR_INTERPOLATION_BITS);
  323. /* r0: A0 R0 A1 R1 G0 B0 G1 B1 */
  324. //r0 = _mm_shuffle_epi32 (r0, _MM_SHUFFLE (2, 0, 3, 1));
  325. /* r0: A1 R1 G1 B1 A0 R0 G0 B0 */
  326. // tmp = bot1 < top1 ? vw : 0;
  327. // r1 = (bot1 - top1)*vw + top1 - tmp
  328. // r1 = bot1*vw - vw*top1 + top1 - tmp
  329. // r1 = bot1*vw + top1 - vw*top1 - tmp
  330. // r1 = bot1*vw + top1*(1 - vw) - tmp
  331. r1 = _mm_mulhi_epu16 (
  332. _mm_sub_epi16 (bot1, top1), vw);
  333. tmp = _mm_cmplt_epi16 (bot1, top1);
  334. tmp = _mm_and_si128 (tmp, vw);
  335. r1 = _mm_sub_epi16 (r1, tmp);
  336. r1 = _mm_add_epi16 (r1, top1);
  337. r1 = _mm_srli_epi16 (r1, BILINEAR_INTERPOLATION_BITS);
  338. //r1 = _mm_shuffle_epi32 (r1, _MM_SHUFFLE (2, 0, 3, 1));
  339. /* r1: A3 R3 G3 B3 A2 R2 G2 B2 */
  340. #else
  341. __m128i r0, r1, p;
  342. top0 = _mm_mulhi_epu16 (top0, uvw);
  343. bot0 = _mm_mulhi_epu16 (bot0, vw);
  344. r0 = _mm_add_epi16(top0, bot0);
  345. r0 = _mm_srli_epi16(r0, BILINEAR_INTERPOLATION_BITS-1);
  346. top1 = _mm_mulhi_epu16 (top1, uvw);
  347. bot1 = _mm_mulhi_epu16 (bot1, vw);
  348. r1 = _mm_add_epi16(top1, bot1);
  349. r1 = _mm_srli_epi16(r1, BILINEAR_INTERPOLATION_BITS-1);
  350. #endif
  351. p = _mm_packus_epi16 (r0, r1);
  352. _mm_storeu_si128 ((__m128i *)(iter->buffer + i), p);
  353. }
  354. while (i < iter->width)
  355. {
  356. __m128i top0 = _mm_load_si128 ((__m128i *)(line0->buffer + i));
  357. __m128i bot0 = _mm_load_si128 ((__m128i *)(line1->buffer + i));
  358. #ifdef PIXMAN_STYLE_INTERPOLATION
  359. __m128i r0, tmp, p;
  360. r0 = _mm_mulhi_epu16 (
  361. _mm_sub_epi16 (bot0, top0), vw);
  362. tmp = _mm_cmplt_epi16 (bot0, top0);
  363. tmp = _mm_and_si128 (tmp, vw);
  364. r0 = _mm_sub_epi16 (r0, tmp);
  365. r0 = _mm_add_epi16 (r0, top0);
  366. r0 = _mm_srli_epi16 (r0, BILINEAR_INTERPOLATION_BITS);
  367. /* r0: A0 R0 A1 R1 G0 B0 G1 B1 */
  368. r0 = _mm_shuffle_epi32 (r0, _MM_SHUFFLE (2, 0, 3, 1));
  369. /* r0: A1 R1 G1 B1 A0 R0 G0 B0 */
  370. #else
  371. __m128i r0, p;
  372. top0 = _mm_mulhi_epu16 (top0, uvw);
  373. bot0 = _mm_mulhi_epu16 (bot0, vw);
  374. r0 = _mm_add_epi16(top0, bot0);
  375. r0 = _mm_srli_epi16(r0, BILINEAR_INTERPOLATION_BITS-1);
  376. #endif
  377. p = _mm_packus_epi16 (r0, r0);
  378. if (iter->width - i == 1)
  379. {
  380. *(uint32_t *)(iter->buffer + i) = _mm_cvtsi128_si32 (p);
  381. i++;
  382. }
  383. else
  384. {
  385. _mm_storel_epi64 ((__m128i *)(iter->buffer + i), p);
  386. i += 2;
  387. }
  388. }
  389. info->y += iter->image->transform->matrix[1][1];
  390. return iter->buffer;
  391. }
  392. static void
  393. ssse3_bilinear_cover_iter_fini (pixman_iter_t *iter)
  394. {
  395. free (iter->data);
  396. }
  397. static void
  398. ssse3_bilinear_cover_iter_init (pixman_iter_t *iter)
  399. {
  400. int width = iter->width;
  401. bilinear_info_t *info;
  402. pixman_vector_t v;
  403. if (iter->x > PIXMAN_FIXED_INT_MAX ||
  404. iter->x < PIXMAN_FIXED_INT_MIN ||
  405. iter->y > PIXMAN_FIXED_INT_MAX ||
  406. iter->y < PIXMAN_FIXED_INT_MIN)
  407. goto fail;
  408. /* Reference point is the center of the pixel */
  409. v.vector[0] = pixman_int_to_fixed (iter->x) + pixman_fixed_1 / 2;
  410. v.vector[1] = pixman_int_to_fixed (iter->y) + pixman_fixed_1 / 2;
  411. v.vector[2] = pixman_fixed_1;
  412. if (!pixman_transform_point_3d (iter->image->transform, &v))
  413. goto fail;
  414. info = malloc (sizeof (*info) + (2 * width - 1) * sizeof (uint64_t) + 64);
  415. if (!info)
  416. goto fail;
  417. info->x = v.vector[0] - pixman_fixed_1 / 2;
  418. info->y = v.vector[1] - pixman_fixed_1 / 2;
  419. #define ALIGN(addr) \
  420. ((void *)((((uintptr_t)(addr)) + 15) & (~15)))
  421. /* It is safe to set the y coordinates to -1 initially
  422. * because COVER_CLIP_BILINEAR ensures that we will only
  423. * be asked to fetch lines in the [0, height) interval
  424. */
  425. info->lines[0].y = -1;
  426. info->lines[0].buffer = ALIGN (&(info->data[0]));
  427. info->lines[1].y = -1;
  428. info->lines[1].buffer = ALIGN (info->lines[0].buffer + width);
  429. iter->fini = ssse3_bilinear_cover_iter_fini;
  430. iter->data = info;
  431. return;
  432. fail:
  433. /* Something went wrong, either a bad matrix or OOM; in such cases,
  434. * we don't guarantee any particular rendering.
  435. */
  436. iter->fini = NULL;
  437. }
  438. /* scale the src from src_width/height to dest_width/height drawn
  439. * into the rectangle x,y width,height
  440. * src_stride and dst_stride are 4 byte units */
  441. bool ssse3_scale_data(uint32_t *src, int src_width, int src_height, int src_stride,
  442. uint32_t *dest, int dest_width, int dest_height,
  443. int dest_stride,
  444. int x, int y,
  445. int width, int height)
  446. {
  447. //XXX: assert(src_width > 1)
  448. pixman_transform_t transform = {
  449. { { pixman_fixed_1, 0, 0 },
  450. { 0, pixman_fixed_1, 0 },
  451. { 0, 0, pixman_fixed_1 } }
  452. };
  453. double width_scale = ((double)src_width)/dest_width;
  454. double height_scale = ((double)src_height)/dest_height;
  455. #define AVOID_PADDING
  456. #ifdef AVOID_PADDING
  457. // scale up by enough that we don't read outside of the bounds of the source surface
  458. // currently this is required to avoid reading out of bounds.
  459. if (width_scale < 1) {
  460. width_scale = (double)(src_width-1)/dest_width;
  461. transform.matrix[0][2] = pixman_fixed_1/2;
  462. }
  463. if (height_scale < 1) {
  464. height_scale = (double)(src_height-1)/dest_height;
  465. transform.matrix[1][2] = pixman_fixed_1/2;
  466. }
  467. #endif
  468. transform.matrix[0][0] = pixman_double_to_fixed(width_scale);
  469. transform.matrix[1][1] = pixman_double_to_fixed(height_scale);
  470. transform.matrix[2][2] = pixman_fixed_1;
  471. bits_image_t image;
  472. image.bits = src;
  473. image.transform = &transform;
  474. image.rowstride = src_stride;
  475. pixman_iter_t iter;
  476. iter.image = &image;
  477. iter.x = x;
  478. iter.y = y;
  479. iter.width = width;
  480. iter.height = src_height;
  481. iter.buffer = dest;
  482. iter.data = NULL;
  483. ssse3_bilinear_cover_iter_init(&iter);
  484. if (!iter.fini)
  485. return false;
  486. if (iter.data) {
  487. for (int iy = 0; iy < height; iy++) {
  488. ssse3_fetch_bilinear_cover(&iter, NULL);
  489. iter.buffer += dest_stride;
  490. }
  491. ssse3_bilinear_cover_iter_fini(&iter);
  492. }
  493. return true;
  494. }