jsimd_x86_64.c 20 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888
  1. /*
  2. * jsimd_x86_64.c
  3. *
  4. * Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
  5. * Copyright (C) 2009-2011, 2014, 2016, D. R. Commander.
  6. * Copyright (C) 2015, Matthieu Darbois.
  7. *
  8. * Based on the x86 SIMD extension for IJG JPEG library,
  9. * Copyright (C) 1999-2006, MIYASAKA Masaru.
  10. * For conditions of distribution and use, see copyright notice in jsimdext.inc
  11. *
  12. * This file contains the interface between the "normal" portions
  13. * of the library and the SIMD implementations when running on a
  14. * 64-bit x86 architecture.
  15. */
  16. #define JPEG_INTERNALS
  17. #include "../jinclude.h"
  18. #include "../jpeglib.h"
  19. #include "../jsimd.h"
  20. #include "../jdct.h"
  21. #include "../jsimddct.h"
  22. #include "jsimd.h"
  23. /*
  24. * In the PIC cases, we have no guarantee that constants will keep
  25. * their alignment. This macro allows us to verify it at runtime.
  26. */
  27. #define IS_ALIGNED(ptr, order) (((size_t)ptr & ((1 << order) - 1)) == 0)
  28. #define IS_ALIGNED_SSE(ptr) (IS_ALIGNED(ptr, 4)) /* 16 byte alignment */
  29. static unsigned int simd_support = ~0;
  30. static unsigned int simd_huffman = 1;
  31. /*
  32. * Check what SIMD accelerations are supported.
  33. *
  34. * FIXME: This code is racy under a multi-threaded environment.
  35. */
  36. LOCAL(void)
  37. init_simd (void)
  38. {
  39. char *env = NULL;
  40. if (simd_support != ~0U)
  41. return;
  42. simd_support = JSIMD_SSE2 | JSIMD_SSE;
  43. /* Force different settings through environment variables */
  44. env = getenv("JSIMD_FORCENONE");
  45. if ((env != NULL) && (strcmp(env, "1") == 0))
  46. simd_support = 0;
  47. env = getenv("JSIMD_NOHUFFENC");
  48. if ((env != NULL) && (strcmp(env, "1") == 0))
  49. simd_huffman = 0;
  50. }
  51. GLOBAL(int)
  52. jsimd_can_rgb_ycc (void)
  53. {
  54. init_simd();
  55. /* The code is optimised for these values only */
  56. if (BITS_IN_JSAMPLE != 8)
  57. return 0;
  58. if (sizeof(JDIMENSION) != 4)
  59. return 0;
  60. if ((RGB_PIXELSIZE != 3) && (RGB_PIXELSIZE != 4))
  61. return 0;
  62. if ((simd_support & JSIMD_SSE2) &&
  63. IS_ALIGNED_SSE(jconst_rgb_ycc_convert_sse2))
  64. return 1;
  65. return 0;
  66. }
  67. GLOBAL(int)
  68. jsimd_can_rgb_gray (void)
  69. {
  70. init_simd();
  71. /* The code is optimised for these values only */
  72. if (BITS_IN_JSAMPLE != 8)
  73. return 0;
  74. if (sizeof(JDIMENSION) != 4)
  75. return 0;
  76. if ((RGB_PIXELSIZE != 3) && (RGB_PIXELSIZE != 4))
  77. return 0;
  78. if ((simd_support & JSIMD_SSE2) &&
  79. IS_ALIGNED_SSE(jconst_rgb_gray_convert_sse2))
  80. return 1;
  81. return 0;
  82. }
  83. GLOBAL(int)
  84. jsimd_can_ycc_rgb (void)
  85. {
  86. init_simd();
  87. /* The code is optimised for these values only */
  88. if (BITS_IN_JSAMPLE != 8)
  89. return 0;
  90. if (sizeof(JDIMENSION) != 4)
  91. return 0;
  92. if ((RGB_PIXELSIZE != 3) && (RGB_PIXELSIZE != 4))
  93. return 0;
  94. if ((simd_support & JSIMD_SSE2) &&
  95. IS_ALIGNED_SSE(jconst_ycc_rgb_convert_sse2))
  96. return 1;
  97. return 0;
  98. }
  99. GLOBAL(int)
  100. jsimd_can_ycc_rgb565 (void)
  101. {
  102. return 0;
  103. }
  104. GLOBAL(void)
  105. jsimd_rgb_ycc_convert (j_compress_ptr cinfo,
  106. JSAMPARRAY input_buf, JSAMPIMAGE output_buf,
  107. JDIMENSION output_row, int num_rows)
  108. {
  109. void (*sse2fct)(JDIMENSION, JSAMPARRAY, JSAMPIMAGE, JDIMENSION, int);
  110. switch(cinfo->in_color_space) {
  111. case JCS_EXT_RGB:
  112. sse2fct=jsimd_extrgb_ycc_convert_sse2;
  113. break;
  114. case JCS_EXT_RGBX:
  115. case JCS_EXT_RGBA:
  116. sse2fct=jsimd_extrgbx_ycc_convert_sse2;
  117. break;
  118. case JCS_EXT_BGR:
  119. sse2fct=jsimd_extbgr_ycc_convert_sse2;
  120. break;
  121. case JCS_EXT_BGRX:
  122. case JCS_EXT_BGRA:
  123. sse2fct=jsimd_extbgrx_ycc_convert_sse2;
  124. break;
  125. case JCS_EXT_XBGR:
  126. case JCS_EXT_ABGR:
  127. sse2fct=jsimd_extxbgr_ycc_convert_sse2;
  128. break;
  129. case JCS_EXT_XRGB:
  130. case JCS_EXT_ARGB:
  131. sse2fct=jsimd_extxrgb_ycc_convert_sse2;
  132. break;
  133. default:
  134. sse2fct=jsimd_rgb_ycc_convert_sse2;
  135. break;
  136. }
  137. sse2fct(cinfo->image_width, input_buf, output_buf, output_row, num_rows);
  138. }
  139. GLOBAL(void)
  140. jsimd_rgb_gray_convert (j_compress_ptr cinfo,
  141. JSAMPARRAY input_buf, JSAMPIMAGE output_buf,
  142. JDIMENSION output_row, int num_rows)
  143. {
  144. void (*sse2fct)(JDIMENSION, JSAMPARRAY, JSAMPIMAGE, JDIMENSION, int);
  145. switch(cinfo->in_color_space) {
  146. case JCS_EXT_RGB:
  147. sse2fct=jsimd_extrgb_gray_convert_sse2;
  148. break;
  149. case JCS_EXT_RGBX:
  150. case JCS_EXT_RGBA:
  151. sse2fct=jsimd_extrgbx_gray_convert_sse2;
  152. break;
  153. case JCS_EXT_BGR:
  154. sse2fct=jsimd_extbgr_gray_convert_sse2;
  155. break;
  156. case JCS_EXT_BGRX:
  157. case JCS_EXT_BGRA:
  158. sse2fct=jsimd_extbgrx_gray_convert_sse2;
  159. break;
  160. case JCS_EXT_XBGR:
  161. case JCS_EXT_ABGR:
  162. sse2fct=jsimd_extxbgr_gray_convert_sse2;
  163. break;
  164. case JCS_EXT_XRGB:
  165. case JCS_EXT_ARGB:
  166. sse2fct=jsimd_extxrgb_gray_convert_sse2;
  167. break;
  168. default:
  169. sse2fct=jsimd_rgb_gray_convert_sse2;
  170. break;
  171. }
  172. sse2fct(cinfo->image_width, input_buf, output_buf, output_row, num_rows);
  173. }
  174. GLOBAL(void)
  175. jsimd_ycc_rgb_convert (j_decompress_ptr cinfo,
  176. JSAMPIMAGE input_buf, JDIMENSION input_row,
  177. JSAMPARRAY output_buf, int num_rows)
  178. {
  179. void (*sse2fct)(JDIMENSION, JSAMPIMAGE, JDIMENSION, JSAMPARRAY, int);
  180. switch(cinfo->out_color_space) {
  181. case JCS_EXT_RGB:
  182. sse2fct=jsimd_ycc_extrgb_convert_sse2;
  183. break;
  184. case JCS_EXT_RGBX:
  185. case JCS_EXT_RGBA:
  186. sse2fct=jsimd_ycc_extrgbx_convert_sse2;
  187. break;
  188. case JCS_EXT_BGR:
  189. sse2fct=jsimd_ycc_extbgr_convert_sse2;
  190. break;
  191. case JCS_EXT_BGRX:
  192. case JCS_EXT_BGRA:
  193. sse2fct=jsimd_ycc_extbgrx_convert_sse2;
  194. break;
  195. case JCS_EXT_XBGR:
  196. case JCS_EXT_ABGR:
  197. sse2fct=jsimd_ycc_extxbgr_convert_sse2;
  198. break;
  199. case JCS_EXT_XRGB:
  200. case JCS_EXT_ARGB:
  201. sse2fct=jsimd_ycc_extxrgb_convert_sse2;
  202. break;
  203. default:
  204. sse2fct=jsimd_ycc_rgb_convert_sse2;
  205. break;
  206. }
  207. sse2fct(cinfo->output_width, input_buf, input_row, output_buf, num_rows);
  208. }
  209. GLOBAL(void)
  210. jsimd_ycc_rgb565_convert (j_decompress_ptr cinfo,
  211. JSAMPIMAGE input_buf, JDIMENSION input_row,
  212. JSAMPARRAY output_buf, int num_rows)
  213. {
  214. }
  215. GLOBAL(int)
  216. jsimd_can_h2v2_downsample (void)
  217. {
  218. init_simd();
  219. /* The code is optimised for these values only */
  220. if (BITS_IN_JSAMPLE != 8)
  221. return 0;
  222. if (sizeof(JDIMENSION) != 4)
  223. return 0;
  224. if (simd_support & JSIMD_SSE2)
  225. return 1;
  226. return 0;
  227. }
  228. GLOBAL(int)
  229. jsimd_can_h2v1_downsample (void)
  230. {
  231. init_simd();
  232. /* The code is optimised for these values only */
  233. if (BITS_IN_JSAMPLE != 8)
  234. return 0;
  235. if (sizeof(JDIMENSION) != 4)
  236. return 0;
  237. if (simd_support & JSIMD_SSE2)
  238. return 1;
  239. return 0;
  240. }
  241. GLOBAL(void)
  242. jsimd_h2v2_downsample (j_compress_ptr cinfo, jpeg_component_info *compptr,
  243. JSAMPARRAY input_data, JSAMPARRAY output_data)
  244. {
  245. jsimd_h2v2_downsample_sse2(cinfo->image_width, cinfo->max_v_samp_factor,
  246. compptr->v_samp_factor, compptr->width_in_blocks,
  247. input_data, output_data);
  248. }
  249. GLOBAL(void)
  250. jsimd_h2v1_downsample (j_compress_ptr cinfo, jpeg_component_info *compptr,
  251. JSAMPARRAY input_data, JSAMPARRAY output_data)
  252. {
  253. jsimd_h2v1_downsample_sse2(cinfo->image_width, cinfo->max_v_samp_factor,
  254. compptr->v_samp_factor, compptr->width_in_blocks,
  255. input_data, output_data);
  256. }
  257. GLOBAL(int)
  258. jsimd_can_h2v2_upsample (void)
  259. {
  260. init_simd();
  261. /* The code is optimised for these values only */
  262. if (BITS_IN_JSAMPLE != 8)
  263. return 0;
  264. if (sizeof(JDIMENSION) != 4)
  265. return 0;
  266. if (simd_support & JSIMD_SSE2)
  267. return 1;
  268. return 0;
  269. }
  270. GLOBAL(int)
  271. jsimd_can_h2v1_upsample (void)
  272. {
  273. init_simd();
  274. /* The code is optimised for these values only */
  275. if (BITS_IN_JSAMPLE != 8)
  276. return 0;
  277. if (sizeof(JDIMENSION) != 4)
  278. return 0;
  279. if (simd_support & JSIMD_SSE2)
  280. return 1;
  281. return 0;
  282. }
  283. GLOBAL(void)
  284. jsimd_h2v2_upsample (j_decompress_ptr cinfo,
  285. jpeg_component_info *compptr,
  286. JSAMPARRAY input_data,
  287. JSAMPARRAY *output_data_ptr)
  288. {
  289. jsimd_h2v2_upsample_sse2(cinfo->max_v_samp_factor, cinfo->output_width,
  290. input_data, output_data_ptr);
  291. }
  292. GLOBAL(void)
  293. jsimd_h2v1_upsample (j_decompress_ptr cinfo,
  294. jpeg_component_info *compptr,
  295. JSAMPARRAY input_data,
  296. JSAMPARRAY *output_data_ptr)
  297. {
  298. jsimd_h2v1_upsample_sse2(cinfo->max_v_samp_factor, cinfo->output_width,
  299. input_data, output_data_ptr);
  300. }
  301. GLOBAL(int)
  302. jsimd_can_h2v2_fancy_upsample (void)
  303. {
  304. init_simd();
  305. /* The code is optimised for these values only */
  306. if (BITS_IN_JSAMPLE != 8)
  307. return 0;
  308. if (sizeof(JDIMENSION) != 4)
  309. return 0;
  310. if ((simd_support & JSIMD_SSE2) &&
  311. IS_ALIGNED_SSE(jconst_fancy_upsample_sse2))
  312. return 1;
  313. return 0;
  314. }
  315. GLOBAL(int)
  316. jsimd_can_h2v1_fancy_upsample (void)
  317. {
  318. init_simd();
  319. /* The code is optimised for these values only */
  320. if (BITS_IN_JSAMPLE != 8)
  321. return 0;
  322. if (sizeof(JDIMENSION) != 4)
  323. return 0;
  324. if ((simd_support & JSIMD_SSE2) &&
  325. IS_ALIGNED_SSE(jconst_fancy_upsample_sse2))
  326. return 1;
  327. return 0;
  328. }
  329. GLOBAL(void)
  330. jsimd_h2v2_fancy_upsample (j_decompress_ptr cinfo,
  331. jpeg_component_info *compptr,
  332. JSAMPARRAY input_data,
  333. JSAMPARRAY *output_data_ptr)
  334. {
  335. jsimd_h2v2_fancy_upsample_sse2(cinfo->max_v_samp_factor,
  336. compptr->downsampled_width, input_data,
  337. output_data_ptr);
  338. }
  339. GLOBAL(void)
  340. jsimd_h2v1_fancy_upsample (j_decompress_ptr cinfo,
  341. jpeg_component_info *compptr,
  342. JSAMPARRAY input_data,
  343. JSAMPARRAY *output_data_ptr)
  344. {
  345. jsimd_h2v1_fancy_upsample_sse2(cinfo->max_v_samp_factor,
  346. compptr->downsampled_width, input_data,
  347. output_data_ptr);
  348. }
  349. GLOBAL(int)
  350. jsimd_can_h2v2_merged_upsample (void)
  351. {
  352. init_simd();
  353. /* The code is optimised for these values only */
  354. if (BITS_IN_JSAMPLE != 8)
  355. return 0;
  356. if (sizeof(JDIMENSION) != 4)
  357. return 0;
  358. if ((simd_support & JSIMD_SSE2) &&
  359. IS_ALIGNED_SSE(jconst_merged_upsample_sse2))
  360. return 1;
  361. return 0;
  362. }
  363. GLOBAL(int)
  364. jsimd_can_h2v1_merged_upsample (void)
  365. {
  366. init_simd();
  367. /* The code is optimised for these values only */
  368. if (BITS_IN_JSAMPLE != 8)
  369. return 0;
  370. if (sizeof(JDIMENSION) != 4)
  371. return 0;
  372. if ((simd_support & JSIMD_SSE2) &&
  373. IS_ALIGNED_SSE(jconst_merged_upsample_sse2))
  374. return 1;
  375. return 0;
  376. }
  377. GLOBAL(void)
  378. jsimd_h2v2_merged_upsample (j_decompress_ptr cinfo,
  379. JSAMPIMAGE input_buf,
  380. JDIMENSION in_row_group_ctr,
  381. JSAMPARRAY output_buf)
  382. {
  383. void (*sse2fct)(JDIMENSION, JSAMPIMAGE, JDIMENSION, JSAMPARRAY);
  384. switch(cinfo->out_color_space) {
  385. case JCS_EXT_RGB:
  386. sse2fct=jsimd_h2v2_extrgb_merged_upsample_sse2;
  387. break;
  388. case JCS_EXT_RGBX:
  389. case JCS_EXT_RGBA:
  390. sse2fct=jsimd_h2v2_extrgbx_merged_upsample_sse2;
  391. break;
  392. case JCS_EXT_BGR:
  393. sse2fct=jsimd_h2v2_extbgr_merged_upsample_sse2;
  394. break;
  395. case JCS_EXT_BGRX:
  396. case JCS_EXT_BGRA:
  397. sse2fct=jsimd_h2v2_extbgrx_merged_upsample_sse2;
  398. break;
  399. case JCS_EXT_XBGR:
  400. case JCS_EXT_ABGR:
  401. sse2fct=jsimd_h2v2_extxbgr_merged_upsample_sse2;
  402. break;
  403. case JCS_EXT_XRGB:
  404. case JCS_EXT_ARGB:
  405. sse2fct=jsimd_h2v2_extxrgb_merged_upsample_sse2;
  406. break;
  407. default:
  408. sse2fct=jsimd_h2v2_merged_upsample_sse2;
  409. break;
  410. }
  411. sse2fct(cinfo->output_width, input_buf, in_row_group_ctr, output_buf);
  412. }
  413. GLOBAL(void)
  414. jsimd_h2v1_merged_upsample (j_decompress_ptr cinfo,
  415. JSAMPIMAGE input_buf,
  416. JDIMENSION in_row_group_ctr,
  417. JSAMPARRAY output_buf)
  418. {
  419. void (*sse2fct)(JDIMENSION, JSAMPIMAGE, JDIMENSION, JSAMPARRAY);
  420. switch(cinfo->out_color_space) {
  421. case JCS_EXT_RGB:
  422. sse2fct=jsimd_h2v1_extrgb_merged_upsample_sse2;
  423. break;
  424. case JCS_EXT_RGBX:
  425. case JCS_EXT_RGBA:
  426. sse2fct=jsimd_h2v1_extrgbx_merged_upsample_sse2;
  427. break;
  428. case JCS_EXT_BGR:
  429. sse2fct=jsimd_h2v1_extbgr_merged_upsample_sse2;
  430. break;
  431. case JCS_EXT_BGRX:
  432. case JCS_EXT_BGRA:
  433. sse2fct=jsimd_h2v1_extbgrx_merged_upsample_sse2;
  434. break;
  435. case JCS_EXT_XBGR:
  436. case JCS_EXT_ABGR:
  437. sse2fct=jsimd_h2v1_extxbgr_merged_upsample_sse2;
  438. break;
  439. case JCS_EXT_XRGB:
  440. case JCS_EXT_ARGB:
  441. sse2fct=jsimd_h2v1_extxrgb_merged_upsample_sse2;
  442. break;
  443. default:
  444. sse2fct=jsimd_h2v1_merged_upsample_sse2;
  445. break;
  446. }
  447. sse2fct(cinfo->output_width, input_buf, in_row_group_ctr, output_buf);
  448. }
  449. GLOBAL(int)
  450. jsimd_can_convsamp (void)
  451. {
  452. init_simd();
  453. /* The code is optimised for these values only */
  454. if (DCTSIZE != 8)
  455. return 0;
  456. if (BITS_IN_JSAMPLE != 8)
  457. return 0;
  458. if (sizeof(JDIMENSION) != 4)
  459. return 0;
  460. if (sizeof(DCTELEM) != 2)
  461. return 0;
  462. if (simd_support & JSIMD_SSE2)
  463. return 1;
  464. return 0;
  465. }
  466. GLOBAL(int)
  467. jsimd_can_convsamp_float (void)
  468. {
  469. init_simd();
  470. /* The code is optimised for these values only */
  471. if (DCTSIZE != 8)
  472. return 0;
  473. if (BITS_IN_JSAMPLE != 8)
  474. return 0;
  475. if (sizeof(JDIMENSION) != 4)
  476. return 0;
  477. if (sizeof(FAST_FLOAT) != 4)
  478. return 0;
  479. if (simd_support & JSIMD_SSE2)
  480. return 1;
  481. return 0;
  482. }
  483. GLOBAL(void)
  484. jsimd_convsamp (JSAMPARRAY sample_data, JDIMENSION start_col,
  485. DCTELEM *workspace)
  486. {
  487. jsimd_convsamp_sse2(sample_data, start_col, workspace);
  488. }
  489. GLOBAL(void)
  490. jsimd_convsamp_float (JSAMPARRAY sample_data, JDIMENSION start_col,
  491. FAST_FLOAT *workspace)
  492. {
  493. jsimd_convsamp_float_sse2(sample_data, start_col, workspace);
  494. }
  495. GLOBAL(int)
  496. jsimd_can_fdct_islow (void)
  497. {
  498. init_simd();
  499. /* The code is optimised for these values only */
  500. if (DCTSIZE != 8)
  501. return 0;
  502. if (sizeof(DCTELEM) != 2)
  503. return 0;
  504. if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_fdct_islow_sse2))
  505. return 1;
  506. return 0;
  507. }
  508. GLOBAL(int)
  509. jsimd_can_fdct_ifast (void)
  510. {
  511. init_simd();
  512. /* The code is optimised for these values only */
  513. if (DCTSIZE != 8)
  514. return 0;
  515. if (sizeof(DCTELEM) != 2)
  516. return 0;
  517. if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_fdct_ifast_sse2))
  518. return 1;
  519. return 0;
  520. }
  521. GLOBAL(int)
  522. jsimd_can_fdct_float (void)
  523. {
  524. init_simd();
  525. /* The code is optimised for these values only */
  526. if (DCTSIZE != 8)
  527. return 0;
  528. if (sizeof(FAST_FLOAT) != 4)
  529. return 0;
  530. if ((simd_support & JSIMD_SSE) && IS_ALIGNED_SSE(jconst_fdct_float_sse))
  531. return 1;
  532. return 0;
  533. }
  534. GLOBAL(void)
  535. jsimd_fdct_islow (DCTELEM *data)
  536. {
  537. jsimd_fdct_islow_sse2(data);
  538. }
  539. GLOBAL(void)
  540. jsimd_fdct_ifast (DCTELEM *data)
  541. {
  542. jsimd_fdct_ifast_sse2(data);
  543. }
  544. GLOBAL(void)
  545. jsimd_fdct_float (FAST_FLOAT *data)
  546. {
  547. jsimd_fdct_float_sse(data);
  548. }
  549. GLOBAL(int)
  550. jsimd_can_quantize (void)
  551. {
  552. init_simd();
  553. /* The code is optimised for these values only */
  554. if (DCTSIZE != 8)
  555. return 0;
  556. if (sizeof(JCOEF) != 2)
  557. return 0;
  558. if (sizeof(DCTELEM) != 2)
  559. return 0;
  560. if (simd_support & JSIMD_SSE2)
  561. return 1;
  562. return 0;
  563. }
  564. GLOBAL(int)
  565. jsimd_can_quantize_float (void)
  566. {
  567. init_simd();
  568. /* The code is optimised for these values only */
  569. if (DCTSIZE != 8)
  570. return 0;
  571. if (sizeof(JCOEF) != 2)
  572. return 0;
  573. if (sizeof(FAST_FLOAT) != 4)
  574. return 0;
  575. if (simd_support & JSIMD_SSE2)
  576. return 1;
  577. return 0;
  578. }
  579. GLOBAL(void)
  580. jsimd_quantize (JCOEFPTR coef_block, DCTELEM *divisors,
  581. DCTELEM *workspace)
  582. {
  583. jsimd_quantize_sse2(coef_block, divisors, workspace);
  584. }
  585. GLOBAL(void)
  586. jsimd_quantize_float (JCOEFPTR coef_block, FAST_FLOAT *divisors,
  587. FAST_FLOAT *workspace)
  588. {
  589. jsimd_quantize_float_sse2(coef_block, divisors, workspace);
  590. }
  591. GLOBAL(int)
  592. jsimd_can_idct_2x2 (void)
  593. {
  594. init_simd();
  595. /* The code is optimised for these values only */
  596. if (DCTSIZE != 8)
  597. return 0;
  598. if (sizeof(JCOEF) != 2)
  599. return 0;
  600. if (BITS_IN_JSAMPLE != 8)
  601. return 0;
  602. if (sizeof(JDIMENSION) != 4)
  603. return 0;
  604. if (sizeof(ISLOW_MULT_TYPE) != 2)
  605. return 0;
  606. if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_idct_red_sse2))
  607. return 1;
  608. return 0;
  609. }
  610. GLOBAL(int)
  611. jsimd_can_idct_4x4 (void)
  612. {
  613. init_simd();
  614. /* The code is optimised for these values only */
  615. if (DCTSIZE != 8)
  616. return 0;
  617. if (sizeof(JCOEF) != 2)
  618. return 0;
  619. if (BITS_IN_JSAMPLE != 8)
  620. return 0;
  621. if (sizeof(JDIMENSION) != 4)
  622. return 0;
  623. if (sizeof(ISLOW_MULT_TYPE) != 2)
  624. return 0;
  625. if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_idct_red_sse2))
  626. return 1;
  627. return 0;
  628. }
  629. GLOBAL(void)
  630. jsimd_idct_2x2 (j_decompress_ptr cinfo, jpeg_component_info *compptr,
  631. JCOEFPTR coef_block, JSAMPARRAY output_buf,
  632. JDIMENSION output_col)
  633. {
  634. jsimd_idct_2x2_sse2(compptr->dct_table, coef_block, output_buf, output_col);
  635. }
  636. GLOBAL(void)
  637. jsimd_idct_4x4 (j_decompress_ptr cinfo, jpeg_component_info *compptr,
  638. JCOEFPTR coef_block, JSAMPARRAY output_buf,
  639. JDIMENSION output_col)
  640. {
  641. jsimd_idct_4x4_sse2(compptr->dct_table, coef_block, output_buf, output_col);
  642. }
  643. GLOBAL(int)
  644. jsimd_can_idct_islow (void)
  645. {
  646. init_simd();
  647. /* The code is optimised for these values only */
  648. if (DCTSIZE != 8)
  649. return 0;
  650. if (sizeof(JCOEF) != 2)
  651. return 0;
  652. if (BITS_IN_JSAMPLE != 8)
  653. return 0;
  654. if (sizeof(JDIMENSION) != 4)
  655. return 0;
  656. if (sizeof(ISLOW_MULT_TYPE) != 2)
  657. return 0;
  658. if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_idct_islow_sse2))
  659. return 1;
  660. return 0;
  661. }
  662. GLOBAL(int)
  663. jsimd_can_idct_ifast (void)
  664. {
  665. init_simd();
  666. /* The code is optimised for these values only */
  667. if (DCTSIZE != 8)
  668. return 0;
  669. if (sizeof(JCOEF) != 2)
  670. return 0;
  671. if (BITS_IN_JSAMPLE != 8)
  672. return 0;
  673. if (sizeof(JDIMENSION) != 4)
  674. return 0;
  675. if (sizeof(IFAST_MULT_TYPE) != 2)
  676. return 0;
  677. if (IFAST_SCALE_BITS != 2)
  678. return 0;
  679. if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_idct_ifast_sse2))
  680. return 1;
  681. return 0;
  682. }
  683. GLOBAL(int)
  684. jsimd_can_idct_float (void)
  685. {
  686. init_simd();
  687. if (DCTSIZE != 8)
  688. return 0;
  689. if (sizeof(JCOEF) != 2)
  690. return 0;
  691. if (BITS_IN_JSAMPLE != 8)
  692. return 0;
  693. if (sizeof(JDIMENSION) != 4)
  694. return 0;
  695. if (sizeof(FAST_FLOAT) != 4)
  696. return 0;
  697. if (sizeof(FLOAT_MULT_TYPE) != 4)
  698. return 0;
  699. if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_idct_float_sse2))
  700. return 1;
  701. return 0;
  702. }
  703. GLOBAL(void)
  704. jsimd_idct_islow (j_decompress_ptr cinfo, jpeg_component_info *compptr,
  705. JCOEFPTR coef_block, JSAMPARRAY output_buf,
  706. JDIMENSION output_col)
  707. {
  708. jsimd_idct_islow_sse2(compptr->dct_table, coef_block, output_buf,
  709. output_col);
  710. }
  711. GLOBAL(void)
  712. jsimd_idct_ifast (j_decompress_ptr cinfo, jpeg_component_info *compptr,
  713. JCOEFPTR coef_block, JSAMPARRAY output_buf,
  714. JDIMENSION output_col)
  715. {
  716. jsimd_idct_ifast_sse2(compptr->dct_table, coef_block, output_buf,
  717. output_col);
  718. }
  719. GLOBAL(void)
  720. jsimd_idct_float (j_decompress_ptr cinfo, jpeg_component_info *compptr,
  721. JCOEFPTR coef_block, JSAMPARRAY output_buf,
  722. JDIMENSION output_col)
  723. {
  724. jsimd_idct_float_sse2(compptr->dct_table, coef_block, output_buf,
  725. output_col);
  726. }
  727. GLOBAL(int)
  728. jsimd_can_huff_encode_one_block (void)
  729. {
  730. init_simd();
  731. if (DCTSIZE != 8)
  732. return 0;
  733. if (sizeof(JCOEF) != 2)
  734. return 0;
  735. if ((simd_support & JSIMD_SSE2) && simd_huffman &&
  736. IS_ALIGNED_SSE(jconst_huff_encode_one_block))
  737. return 1;
  738. return 0;
  739. }
  740. GLOBAL(JOCTET*)
  741. jsimd_huff_encode_one_block (void *state, JOCTET *buffer, JCOEFPTR block,
  742. int last_dc_val, c_derived_tbl *dctbl,
  743. c_derived_tbl *actbl)
  744. {
  745. return jsimd_huff_encode_one_block_sse2(state, buffer, block, last_dc_val,
  746. dctbl, actbl);
  747. }