jsimd_arm.c 15 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728
  1. /*
  2. * jsimd_arm.c
  3. *
  4. * Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
  5. * Copyright (C) 2009-2011, 2013-2014, 2016, D. R. Commander.
  6. * Copyright (C) 2015-2016, Matthieu Darbois.
  7. *
  8. * Based on the x86 SIMD extension for IJG JPEG library,
  9. * Copyright (C) 1999-2006, MIYASAKA Masaru.
  10. * For conditions of distribution and use, see copyright notice in jsimdext.inc
  11. *
  12. * This file contains the interface between the "normal" portions
  13. * of the library and the SIMD implementations when running on a
  14. * 32-bit ARM architecture.
  15. */
  16. #define JPEG_INTERNALS
  17. #include "../jinclude.h"
  18. #include "../jpeglib.h"
  19. #include "../jsimd.h"
  20. #include "../jdct.h"
  21. #include "../jsimddct.h"
  22. #include "jsimd.h"
  23. #include <stdio.h>
  24. #include <string.h>
  25. #include <ctype.h>
  26. static unsigned int simd_support = ~0;
  27. static unsigned int simd_huffman = 1;
  28. #if defined(__linux__) || defined(ANDROID) || defined(__ANDROID__)
  29. #define SOMEWHAT_SANE_PROC_CPUINFO_SIZE_LIMIT (1024 * 1024)
  30. LOCAL(int)
  31. check_feature (char *buffer, char *feature)
  32. {
  33. char *p;
  34. if (*feature == 0)
  35. return 0;
  36. if (strncmp(buffer, "Features", 8) != 0)
  37. return 0;
  38. buffer += 8;
  39. while (isspace(*buffer))
  40. buffer++;
  41. /* Check if 'feature' is present in the buffer as a separate word */
  42. while ((p = strstr(buffer, feature))) {
  43. if (p > buffer && !isspace(*(p - 1))) {
  44. buffer++;
  45. continue;
  46. }
  47. p += strlen(feature);
  48. if (*p != 0 && !isspace(*p)) {
  49. buffer++;
  50. continue;
  51. }
  52. return 1;
  53. }
  54. return 0;
  55. }
  56. LOCAL(int)
  57. parse_proc_cpuinfo (int bufsize)
  58. {
  59. char *buffer = (char *)malloc(bufsize);
  60. FILE *fd;
  61. simd_support = 0;
  62. if (!buffer)
  63. return 0;
  64. fd = fopen("/proc/cpuinfo", "r");
  65. if (fd) {
  66. while (fgets(buffer, bufsize, fd)) {
  67. if (!strchr(buffer, '\n') && !feof(fd)) {
  68. /* "impossible" happened - insufficient size of the buffer! */
  69. fclose(fd);
  70. free(buffer);
  71. return 0;
  72. }
  73. if (check_feature(buffer, "neon"))
  74. simd_support |= JSIMD_ARM_NEON;
  75. }
  76. fclose(fd);
  77. }
  78. free(buffer);
  79. return 1;
  80. }
  81. #endif
  82. /*
  83. * Check what SIMD accelerations are supported.
  84. *
  85. * FIXME: This code is racy under a multi-threaded environment.
  86. */
  87. LOCAL(void)
  88. init_simd (void)
  89. {
  90. char *env = NULL;
  91. #if !defined(__ARM_NEON__) && defined(__linux__) || defined(ANDROID) || defined(__ANDROID__)
  92. int bufsize = 1024; /* an initial guess for the line buffer size limit */
  93. #endif
  94. if (simd_support != ~0U)
  95. return;
  96. simd_support = 0;
  97. #if defined(__ARM_NEON__)
  98. simd_support |= JSIMD_ARM_NEON;
  99. #elif defined(__linux__) || defined(ANDROID) || defined(__ANDROID__)
  100. /* We still have a chance to use NEON regardless of globally used
  101. * -mcpu/-mfpu options passed to gcc by performing runtime detection via
  102. * /proc/cpuinfo parsing on linux/android */
  103. while (!parse_proc_cpuinfo(bufsize)) {
  104. bufsize *= 2;
  105. if (bufsize > SOMEWHAT_SANE_PROC_CPUINFO_SIZE_LIMIT)
  106. break;
  107. }
  108. #endif
  109. /* Force different settings through environment variables */
  110. env = getenv("JSIMD_FORCENEON");
  111. if ((env != NULL) && (strcmp(env, "1") == 0))
  112. simd_support = JSIMD_ARM_NEON;
  113. env = getenv("JSIMD_FORCENONE");
  114. if ((env != NULL) && (strcmp(env, "1") == 0))
  115. simd_support = 0;
  116. env = getenv("JSIMD_NOHUFFENC");
  117. if ((env != NULL) && (strcmp(env, "1") == 0))
  118. simd_huffman = 0;
  119. }
  120. GLOBAL(int)
  121. jsimd_can_rgb_ycc (void)
  122. {
  123. init_simd();
  124. /* The code is optimised for these values only */
  125. if (BITS_IN_JSAMPLE != 8)
  126. return 0;
  127. if (sizeof(JDIMENSION) != 4)
  128. return 0;
  129. if ((RGB_PIXELSIZE != 3) && (RGB_PIXELSIZE != 4))
  130. return 0;
  131. if (simd_support & JSIMD_ARM_NEON)
  132. return 1;
  133. return 0;
  134. }
  135. GLOBAL(int)
  136. jsimd_can_rgb_gray (void)
  137. {
  138. init_simd();
  139. return 0;
  140. }
  141. GLOBAL(int)
  142. jsimd_can_ycc_rgb (void)
  143. {
  144. init_simd();
  145. /* The code is optimised for these values only */
  146. if (BITS_IN_JSAMPLE != 8)
  147. return 0;
  148. if (sizeof(JDIMENSION) != 4)
  149. return 0;
  150. if ((RGB_PIXELSIZE != 3) && (RGB_PIXELSIZE != 4))
  151. return 0;
  152. if (simd_support & JSIMD_ARM_NEON)
  153. return 1;
  154. return 0;
  155. }
  156. GLOBAL(int)
  157. jsimd_can_ycc_rgb565 (void)
  158. {
  159. init_simd();
  160. /* The code is optimised for these values only */
  161. if (BITS_IN_JSAMPLE != 8)
  162. return 0;
  163. if (sizeof(JDIMENSION) != 4)
  164. return 0;
  165. if (simd_support & JSIMD_ARM_NEON)
  166. return 1;
  167. return 0;
  168. }
  169. GLOBAL(void)
  170. jsimd_rgb_ycc_convert (j_compress_ptr cinfo,
  171. JSAMPARRAY input_buf, JSAMPIMAGE output_buf,
  172. JDIMENSION output_row, int num_rows)
  173. {
  174. void (*neonfct)(JDIMENSION, JSAMPARRAY, JSAMPIMAGE, JDIMENSION, int);
  175. switch(cinfo->in_color_space) {
  176. case JCS_EXT_RGB:
  177. neonfct=jsimd_extrgb_ycc_convert_neon;
  178. break;
  179. case JCS_EXT_RGBX:
  180. case JCS_EXT_RGBA:
  181. neonfct=jsimd_extrgbx_ycc_convert_neon;
  182. break;
  183. case JCS_EXT_BGR:
  184. neonfct=jsimd_extbgr_ycc_convert_neon;
  185. break;
  186. case JCS_EXT_BGRX:
  187. case JCS_EXT_BGRA:
  188. neonfct=jsimd_extbgrx_ycc_convert_neon;
  189. break;
  190. case JCS_EXT_XBGR:
  191. case JCS_EXT_ABGR:
  192. neonfct=jsimd_extxbgr_ycc_convert_neon;
  193. break;
  194. case JCS_EXT_XRGB:
  195. case JCS_EXT_ARGB:
  196. neonfct=jsimd_extxrgb_ycc_convert_neon;
  197. break;
  198. default:
  199. neonfct=jsimd_extrgb_ycc_convert_neon;
  200. break;
  201. }
  202. neonfct(cinfo->image_width, input_buf, output_buf, output_row, num_rows);
  203. }
  204. GLOBAL(void)
  205. jsimd_rgb_gray_convert (j_compress_ptr cinfo,
  206. JSAMPARRAY input_buf, JSAMPIMAGE output_buf,
  207. JDIMENSION output_row, int num_rows)
  208. {
  209. }
  210. GLOBAL(void)
  211. jsimd_ycc_rgb_convert (j_decompress_ptr cinfo,
  212. JSAMPIMAGE input_buf, JDIMENSION input_row,
  213. JSAMPARRAY output_buf, int num_rows)
  214. {
  215. void (*neonfct)(JDIMENSION, JSAMPIMAGE, JDIMENSION, JSAMPARRAY, int);
  216. switch(cinfo->out_color_space) {
  217. case JCS_EXT_RGB:
  218. neonfct=jsimd_ycc_extrgb_convert_neon;
  219. break;
  220. case JCS_EXT_RGBX:
  221. case JCS_EXT_RGBA:
  222. neonfct=jsimd_ycc_extrgbx_convert_neon;
  223. break;
  224. case JCS_EXT_BGR:
  225. neonfct=jsimd_ycc_extbgr_convert_neon;
  226. break;
  227. case JCS_EXT_BGRX:
  228. case JCS_EXT_BGRA:
  229. neonfct=jsimd_ycc_extbgrx_convert_neon;
  230. break;
  231. case JCS_EXT_XBGR:
  232. case JCS_EXT_ABGR:
  233. neonfct=jsimd_ycc_extxbgr_convert_neon;
  234. break;
  235. case JCS_EXT_XRGB:
  236. case JCS_EXT_ARGB:
  237. neonfct=jsimd_ycc_extxrgb_convert_neon;
  238. break;
  239. default:
  240. neonfct=jsimd_ycc_extrgb_convert_neon;
  241. break;
  242. }
  243. neonfct(cinfo->output_width, input_buf, input_row, output_buf, num_rows);
  244. }
  245. GLOBAL(void)
  246. jsimd_ycc_rgb565_convert (j_decompress_ptr cinfo,
  247. JSAMPIMAGE input_buf, JDIMENSION input_row,
  248. JSAMPARRAY output_buf, int num_rows)
  249. {
  250. jsimd_ycc_rgb565_convert_neon(cinfo->output_width, input_buf, input_row,
  251. output_buf, num_rows);
  252. }
  253. GLOBAL(int)
  254. jsimd_can_h2v2_downsample (void)
  255. {
  256. init_simd();
  257. return 0;
  258. }
  259. GLOBAL(int)
  260. jsimd_can_h2v1_downsample (void)
  261. {
  262. init_simd();
  263. return 0;
  264. }
  265. GLOBAL(void)
  266. jsimd_h2v2_downsample (j_compress_ptr cinfo, jpeg_component_info *compptr,
  267. JSAMPARRAY input_data, JSAMPARRAY output_data)
  268. {
  269. }
  270. GLOBAL(void)
  271. jsimd_h2v1_downsample (j_compress_ptr cinfo, jpeg_component_info *compptr,
  272. JSAMPARRAY input_data, JSAMPARRAY output_data)
  273. {
  274. }
  275. GLOBAL(int)
  276. jsimd_can_h2v2_upsample (void)
  277. {
  278. init_simd();
  279. return 0;
  280. }
  281. GLOBAL(int)
  282. jsimd_can_h2v1_upsample (void)
  283. {
  284. init_simd();
  285. return 0;
  286. }
  287. GLOBAL(void)
  288. jsimd_h2v2_upsample (j_decompress_ptr cinfo,
  289. jpeg_component_info *compptr,
  290. JSAMPARRAY input_data,
  291. JSAMPARRAY *output_data_ptr)
  292. {
  293. }
  294. GLOBAL(void)
  295. jsimd_h2v1_upsample (j_decompress_ptr cinfo,
  296. jpeg_component_info *compptr,
  297. JSAMPARRAY input_data,
  298. JSAMPARRAY *output_data_ptr)
  299. {
  300. }
  301. GLOBAL(int)
  302. jsimd_can_h2v2_fancy_upsample (void)
  303. {
  304. init_simd();
  305. return 0;
  306. }
  307. GLOBAL(int)
  308. jsimd_can_h2v1_fancy_upsample (void)
  309. {
  310. init_simd();
  311. /* The code is optimised for these values only */
  312. if (BITS_IN_JSAMPLE != 8)
  313. return 0;
  314. if (sizeof(JDIMENSION) != 4)
  315. return 0;
  316. if (simd_support & JSIMD_ARM_NEON)
  317. return 1;
  318. return 0;
  319. }
  320. GLOBAL(void)
  321. jsimd_h2v2_fancy_upsample (j_decompress_ptr cinfo,
  322. jpeg_component_info *compptr,
  323. JSAMPARRAY input_data,
  324. JSAMPARRAY *output_data_ptr)
  325. {
  326. }
  327. GLOBAL(void)
  328. jsimd_h2v1_fancy_upsample (j_decompress_ptr cinfo,
  329. jpeg_component_info *compptr,
  330. JSAMPARRAY input_data,
  331. JSAMPARRAY *output_data_ptr)
  332. {
  333. jsimd_h2v1_fancy_upsample_neon(cinfo->max_v_samp_factor,
  334. compptr->downsampled_width, input_data,
  335. output_data_ptr);
  336. }
  337. GLOBAL(int)
  338. jsimd_can_h2v2_merged_upsample (void)
  339. {
  340. init_simd();
  341. return 0;
  342. }
  343. GLOBAL(int)
  344. jsimd_can_h2v1_merged_upsample (void)
  345. {
  346. init_simd();
  347. return 0;
  348. }
  349. GLOBAL(void)
  350. jsimd_h2v2_merged_upsample (j_decompress_ptr cinfo,
  351. JSAMPIMAGE input_buf,
  352. JDIMENSION in_row_group_ctr,
  353. JSAMPARRAY output_buf)
  354. {
  355. }
  356. GLOBAL(void)
  357. jsimd_h2v1_merged_upsample (j_decompress_ptr cinfo,
  358. JSAMPIMAGE input_buf,
  359. JDIMENSION in_row_group_ctr,
  360. JSAMPARRAY output_buf)
  361. {
  362. }
  363. GLOBAL(int)
  364. jsimd_can_convsamp (void)
  365. {
  366. init_simd();
  367. /* The code is optimised for these values only */
  368. if (DCTSIZE != 8)
  369. return 0;
  370. if (BITS_IN_JSAMPLE != 8)
  371. return 0;
  372. if (sizeof(JDIMENSION) != 4)
  373. return 0;
  374. if (sizeof(DCTELEM) != 2)
  375. return 0;
  376. if (simd_support & JSIMD_ARM_NEON)
  377. return 1;
  378. return 0;
  379. }
  380. GLOBAL(int)
  381. jsimd_can_convsamp_float (void)
  382. {
  383. init_simd();
  384. return 0;
  385. }
  386. GLOBAL(void)
  387. jsimd_convsamp (JSAMPARRAY sample_data, JDIMENSION start_col,
  388. DCTELEM *workspace)
  389. {
  390. jsimd_convsamp_neon(sample_data, start_col, workspace);
  391. }
  392. GLOBAL(void)
  393. jsimd_convsamp_float (JSAMPARRAY sample_data, JDIMENSION start_col,
  394. FAST_FLOAT *workspace)
  395. {
  396. }
  397. GLOBAL(int)
  398. jsimd_can_fdct_islow (void)
  399. {
  400. init_simd();
  401. return 0;
  402. }
  403. GLOBAL(int)
  404. jsimd_can_fdct_ifast (void)
  405. {
  406. init_simd();
  407. /* The code is optimised for these values only */
  408. if (DCTSIZE != 8)
  409. return 0;
  410. if (sizeof(DCTELEM) != 2)
  411. return 0;
  412. if (simd_support & JSIMD_ARM_NEON)
  413. return 1;
  414. return 0;
  415. }
  416. GLOBAL(int)
  417. jsimd_can_fdct_float (void)
  418. {
  419. init_simd();
  420. return 0;
  421. }
  422. GLOBAL(void)
  423. jsimd_fdct_islow (DCTELEM *data)
  424. {
  425. }
  426. GLOBAL(void)
  427. jsimd_fdct_ifast (DCTELEM *data)
  428. {
  429. jsimd_fdct_ifast_neon(data);
  430. }
  431. GLOBAL(void)
  432. jsimd_fdct_float (FAST_FLOAT *data)
  433. {
  434. }
  435. GLOBAL(int)
  436. jsimd_can_quantize (void)
  437. {
  438. init_simd();
  439. /* The code is optimised for these values only */
  440. if (DCTSIZE != 8)
  441. return 0;
  442. if (sizeof(JCOEF) != 2)
  443. return 0;
  444. if (sizeof(DCTELEM) != 2)
  445. return 0;
  446. if (simd_support & JSIMD_ARM_NEON)
  447. return 1;
  448. return 0;
  449. }
  450. GLOBAL(int)
  451. jsimd_can_quantize_float (void)
  452. {
  453. init_simd();
  454. return 0;
  455. }
  456. GLOBAL(void)
  457. jsimd_quantize (JCOEFPTR coef_block, DCTELEM *divisors,
  458. DCTELEM *workspace)
  459. {
  460. jsimd_quantize_neon(coef_block, divisors, workspace);
  461. }
  462. GLOBAL(void)
  463. jsimd_quantize_float (JCOEFPTR coef_block, FAST_FLOAT *divisors,
  464. FAST_FLOAT *workspace)
  465. {
  466. }
  467. GLOBAL(int)
  468. jsimd_can_idct_2x2 (void)
  469. {
  470. init_simd();
  471. /* The code is optimised for these values only */
  472. if (DCTSIZE != 8)
  473. return 0;
  474. if (sizeof(JCOEF) != 2)
  475. return 0;
  476. if (BITS_IN_JSAMPLE != 8)
  477. return 0;
  478. if (sizeof(JDIMENSION) != 4)
  479. return 0;
  480. if (sizeof(ISLOW_MULT_TYPE) != 2)
  481. return 0;
  482. if (simd_support & JSIMD_ARM_NEON)
  483. return 1;
  484. return 0;
  485. }
  486. GLOBAL(int)
  487. jsimd_can_idct_4x4 (void)
  488. {
  489. init_simd();
  490. /* The code is optimised for these values only */
  491. if (DCTSIZE != 8)
  492. return 0;
  493. if (sizeof(JCOEF) != 2)
  494. return 0;
  495. if (BITS_IN_JSAMPLE != 8)
  496. return 0;
  497. if (sizeof(JDIMENSION) != 4)
  498. return 0;
  499. if (sizeof(ISLOW_MULT_TYPE) != 2)
  500. return 0;
  501. if (simd_support & JSIMD_ARM_NEON)
  502. return 1;
  503. return 0;
  504. }
  505. GLOBAL(void)
  506. jsimd_idct_2x2 (j_decompress_ptr cinfo, jpeg_component_info *compptr,
  507. JCOEFPTR coef_block, JSAMPARRAY output_buf,
  508. JDIMENSION output_col)
  509. {
  510. jsimd_idct_2x2_neon(compptr->dct_table, coef_block, output_buf,
  511. output_col);
  512. }
  513. GLOBAL(void)
  514. jsimd_idct_4x4 (j_decompress_ptr cinfo, jpeg_component_info *compptr,
  515. JCOEFPTR coef_block, JSAMPARRAY output_buf,
  516. JDIMENSION output_col)
  517. {
  518. jsimd_idct_4x4_neon(compptr->dct_table, coef_block, output_buf,
  519. output_col);
  520. }
  521. GLOBAL(int)
  522. jsimd_can_idct_islow (void)
  523. {
  524. init_simd();
  525. /* The code is optimised for these values only */
  526. if (DCTSIZE != 8)
  527. return 0;
  528. if (sizeof(JCOEF) != 2)
  529. return 0;
  530. if (BITS_IN_JSAMPLE != 8)
  531. return 0;
  532. if (sizeof(JDIMENSION) != 4)
  533. return 0;
  534. if (sizeof(ISLOW_MULT_TYPE) != 2)
  535. return 0;
  536. if (simd_support & JSIMD_ARM_NEON)
  537. return 1;
  538. return 0;
  539. }
  540. GLOBAL(int)
  541. jsimd_can_idct_ifast (void)
  542. {
  543. init_simd();
  544. /* The code is optimised for these values only */
  545. if (DCTSIZE != 8)
  546. return 0;
  547. if (sizeof(JCOEF) != 2)
  548. return 0;
  549. if (BITS_IN_JSAMPLE != 8)
  550. return 0;
  551. if (sizeof(JDIMENSION) != 4)
  552. return 0;
  553. if (sizeof(IFAST_MULT_TYPE) != 2)
  554. return 0;
  555. if (IFAST_SCALE_BITS != 2)
  556. return 0;
  557. if (simd_support & JSIMD_ARM_NEON)
  558. return 1;
  559. return 0;
  560. }
  561. GLOBAL(int)
  562. jsimd_can_idct_float (void)
  563. {
  564. init_simd();
  565. return 0;
  566. }
  567. GLOBAL(void)
  568. jsimd_idct_islow (j_decompress_ptr cinfo, jpeg_component_info *compptr,
  569. JCOEFPTR coef_block, JSAMPARRAY output_buf,
  570. JDIMENSION output_col)
  571. {
  572. jsimd_idct_islow_neon(compptr->dct_table, coef_block, output_buf,
  573. output_col);
  574. }
  575. GLOBAL(void)
  576. jsimd_idct_ifast (j_decompress_ptr cinfo, jpeg_component_info *compptr,
  577. JCOEFPTR coef_block, JSAMPARRAY output_buf,
  578. JDIMENSION output_col)
  579. {
  580. jsimd_idct_ifast_neon(compptr->dct_table, coef_block, output_buf,
  581. output_col);
  582. }
  583. GLOBAL(void)
  584. jsimd_idct_float (j_decompress_ptr cinfo, jpeg_component_info *compptr,
  585. JCOEFPTR coef_block, JSAMPARRAY output_buf,
  586. JDIMENSION output_col)
  587. {
  588. }
  589. GLOBAL(int)
  590. jsimd_can_huff_encode_one_block (void)
  591. {
  592. init_simd();
  593. if (DCTSIZE != 8)
  594. return 0;
  595. if (sizeof(JCOEF) != 2)
  596. return 0;
  597. if (simd_support & JSIMD_ARM_NEON && simd_huffman)
  598. return 1;
  599. return 0;
  600. }
  601. GLOBAL(JOCTET*)
  602. jsimd_huff_encode_one_block (void *state, JOCTET *buffer, JCOEFPTR block,
  603. int last_dc_val, c_derived_tbl *dctbl,
  604. c_derived_tbl *actbl)
  605. {
  606. return jsimd_huff_encode_one_block_neon(state, buffer, block, last_dc_val,
  607. dctbl, actbl);
  608. }