dsp.h 27 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679
  1. // Copyright 2011 Google Inc. All Rights Reserved.
  2. //
  3. // Use of this source code is governed by a BSD-style license
  4. // that can be found in the COPYING file in the root of the source
  5. // tree. An additional intellectual property rights grant can be found
  6. // in the file PATENTS. All contributing project authors may
  7. // be found in the AUTHORS file in the root of the source tree.
  8. // -----------------------------------------------------------------------------
  9. //
  10. // Speed-critical functions.
  11. //
  12. // Author: Skal (pascal.massimino@gmail.com)
  13. #ifndef WEBP_DSP_DSP_H_
  14. #define WEBP_DSP_DSP_H_
  15. #ifdef HAVE_CONFIG_H
  16. #include "../webp/config.h"
  17. #endif
  18. #include "../webp/types.h"
  19. #ifdef __cplusplus
  20. extern "C" {
  21. #endif
  22. #define BPS 32 // this is the common stride for enc/dec
  23. //------------------------------------------------------------------------------
  24. // CPU detection
  25. #if defined(__GNUC__)
  26. # define LOCAL_GCC_VERSION ((__GNUC__ << 8) | __GNUC_MINOR__)
  27. # define LOCAL_GCC_PREREQ(maj, min) \
  28. (LOCAL_GCC_VERSION >= (((maj) << 8) | (min)))
  29. #else
  30. # define LOCAL_GCC_VERSION 0
  31. # define LOCAL_GCC_PREREQ(maj, min) 0
  32. #endif
  33. #if defined(__clang__)
  34. # define LOCAL_CLANG_VERSION ((__clang_major__ << 8) | __clang_minor__)
  35. # define LOCAL_CLANG_PREREQ(maj, min) \
  36. (LOCAL_CLANG_VERSION >= (((maj) << 8) | (min)))
  37. #else
  38. # define LOCAL_CLANG_VERSION 0
  39. # define LOCAL_CLANG_PREREQ(maj, min) 0
  40. #endif
  41. #ifndef __has_builtin
  42. # define __has_builtin(x) 0
  43. #endif
  44. // for now, none of the optimizations below are available in emscripten
  45. #if !defined(EMSCRIPTEN)
  46. #if defined(_MSC_VER) && _MSC_VER > 1310 && \
  47. (defined(_M_X64) || defined(_M_IX86))
  48. #define WEBP_MSC_SSE2 // Visual C++ SSE2 targets
  49. #endif
  50. #if defined(_MSC_VER) && _MSC_VER >= 1500 && \
  51. (defined(_M_X64) || defined(_M_IX86))
  52. #define WEBP_MSC_SSE41 // Visual C++ SSE4.1 targets
  53. #endif
  54. // WEBP_HAVE_* are used to indicate the presence of the instruction set in dsp
  55. // files without intrinsics, allowing the corresponding Init() to be called.
  56. // Files containing intrinsics will need to be built targeting the instruction
  57. // set so should succeed on one of the earlier tests.
  58. #if defined(__SSE2__) || defined(WEBP_MSC_SSE2) || defined(WEBP_HAVE_SSE2)
  59. #define WEBP_USE_SSE2
  60. #endif
  61. #if defined(__SSE4_1__) || defined(WEBP_MSC_SSE41) || defined(WEBP_HAVE_SSE41)
  62. #define WEBP_USE_SSE41
  63. #endif
  64. // The intrinsics currently cause compiler errors with arm-nacl-gcc and the
  65. // inline assembly would need to be modified for use with Native Client.
  66. #if (defined(__ARM_NEON__) || \
  67. defined(__aarch64__) || defined(WEBP_HAVE_NEON)) && \
  68. !defined(__native_client__)
  69. #define WEBP_USE_NEON
  70. #endif
  71. #if !defined(WEBP_USE_NEON) && defined(__ANDROID__) && \
  72. defined(__ARM_ARCH_7A__) && defined(HAVE_CPU_FEATURES_H)
  73. #define WEBP_ANDROID_NEON // Android targets that may have NEON
  74. #define WEBP_USE_NEON
  75. #endif
  76. #if defined(_MSC_VER) && _MSC_VER >= 1700 && defined(_M_ARM)
  77. #define WEBP_USE_NEON
  78. #define WEBP_USE_INTRINSICS
  79. #endif
  80. #if defined(__mips__) && !defined(__mips64) && \
  81. defined(__mips_isa_rev) && (__mips_isa_rev >= 1) && (__mips_isa_rev < 6)
  82. #define WEBP_USE_MIPS32
  83. #if (__mips_isa_rev >= 2)
  84. #define WEBP_USE_MIPS32_R2
  85. #if defined(__mips_dspr2) || (defined(__mips_dsp_rev) && __mips_dsp_rev >= 2)
  86. #define WEBP_USE_MIPS_DSP_R2
  87. #endif
  88. #endif
  89. #endif
  90. #if defined(__mips_msa) && defined(__mips_isa_rev) && (__mips_isa_rev >= 5)
  91. #define WEBP_USE_MSA
  92. #endif
  93. #endif /* EMSCRIPTEN */
  94. #ifndef WEBP_DSP_OMIT_C_CODE
  95. #define WEBP_DSP_OMIT_C_CODE 1
  96. #endif
  97. #if (defined(__aarch64__) || defined(__ARM_NEON__)) && WEBP_DSP_OMIT_C_CODE
  98. #define WEBP_NEON_OMIT_C_CODE 1
  99. #else
  100. #define WEBP_NEON_OMIT_C_CODE 0
  101. #endif
  102. #if !(LOCAL_CLANG_PREREQ(3,8) || LOCAL_GCC_PREREQ(4,8) || defined(__aarch64__))
  103. #define WEBP_NEON_WORK_AROUND_GCC 1
  104. #else
  105. #define WEBP_NEON_WORK_AROUND_GCC 0
  106. #endif
  107. // This macro prevents thread_sanitizer from reporting known concurrent writes.
  108. #define WEBP_TSAN_IGNORE_FUNCTION
  109. #if defined(__has_feature)
  110. #if __has_feature(thread_sanitizer)
  111. #undef WEBP_TSAN_IGNORE_FUNCTION
  112. #define WEBP_TSAN_IGNORE_FUNCTION __attribute__((no_sanitize_thread))
  113. #endif
  114. #endif
  115. #if defined(WEBP_USE_THREAD) && !defined(_WIN32)
  116. #include <pthread.h> // NOLINT
  117. #define WEBP_DSP_INIT(func) do { \
  118. static volatile VP8CPUInfo func ## _last_cpuinfo_used = \
  119. (VP8CPUInfo)&func ## _last_cpuinfo_used; \
  120. static pthread_mutex_t func ## _lock = PTHREAD_MUTEX_INITIALIZER; \
  121. if (pthread_mutex_lock(&func ## _lock)) break; \
  122. if (func ## _last_cpuinfo_used != VP8GetCPUInfo) func(); \
  123. func ## _last_cpuinfo_used = VP8GetCPUInfo; \
  124. (void)pthread_mutex_unlock(&func ## _lock); \
  125. } while (0)
  126. #else // !(defined(WEBP_USE_THREAD) && !defined(_WIN32))
  127. #define WEBP_DSP_INIT(func) do { \
  128. static volatile VP8CPUInfo func ## _last_cpuinfo_used = \
  129. (VP8CPUInfo)&func ## _last_cpuinfo_used; \
  130. if (func ## _last_cpuinfo_used == VP8GetCPUInfo) break; \
  131. func(); \
  132. func ## _last_cpuinfo_used = VP8GetCPUInfo; \
  133. } while (0)
  134. #endif // defined(WEBP_USE_THREAD) && !defined(_WIN32)
  135. // Defines an Init + helper function that control multiple initialization of
  136. // function pointers / tables.
  137. /* Usage:
  138. WEBP_DSP_INIT_FUNC(InitFunc) {
  139. ...function body
  140. }
  141. */
  142. #define WEBP_DSP_INIT_FUNC(name) \
  143. static WEBP_TSAN_IGNORE_FUNCTION void name ## _body(void); \
  144. WEBP_TSAN_IGNORE_FUNCTION void name(void) { \
  145. WEBP_DSP_INIT(name ## _body); \
  146. } \
  147. static WEBP_TSAN_IGNORE_FUNCTION void name ## _body(void)
  148. #define WEBP_UBSAN_IGNORE_UNDEF
  149. #define WEBP_UBSAN_IGNORE_UNSIGNED_OVERFLOW
  150. #if defined(__clang__) && defined(__has_attribute)
  151. #if __has_attribute(no_sanitize)
  152. // This macro prevents the undefined behavior sanitizer from reporting
  153. // failures. This is only meant to silence unaligned loads on platforms that
  154. // are known to support them.
  155. #undef WEBP_UBSAN_IGNORE_UNDEF
  156. #define WEBP_UBSAN_IGNORE_UNDEF \
  157. __attribute__((no_sanitize("undefined")))
  158. // This macro prevents the undefined behavior sanitizer from reporting
  159. // failures related to unsigned integer overflows. This is only meant to
  160. // silence cases where this well defined behavior is expected.
  161. #undef WEBP_UBSAN_IGNORE_UNSIGNED_OVERFLOW
  162. #define WEBP_UBSAN_IGNORE_UNSIGNED_OVERFLOW \
  163. __attribute__((no_sanitize("unsigned-integer-overflow")))
  164. #endif
  165. #endif
  166. // Regularize the definition of WEBP_SWAP_16BIT_CSP (backward compatibility)
  167. #if !defined(WEBP_SWAP_16BIT_CSP)
  168. #define WEBP_SWAP_16BIT_CSP 0
  169. #endif
  170. // some endian fix (e.g.: mips-gcc doesn't define __BIG_ENDIAN__)
  171. #if !defined(WORDS_BIGENDIAN) && \
  172. (defined(__BIG_ENDIAN__) || defined(_M_PPC) || \
  173. (defined(__BYTE_ORDER__) && (__BYTE_ORDER__ == __ORDER_BIG_ENDIAN__)))
  174. #define WORDS_BIGENDIAN
  175. #endif
  176. typedef enum {
  177. kSSE2,
  178. kSSE3,
  179. kSlowSSSE3, // special feature for slow SSSE3 architectures
  180. kSSE4_1,
  181. kAVX,
  182. kAVX2,
  183. kNEON,
  184. kMIPS32,
  185. kMIPSdspR2,
  186. kMSA
  187. } CPUFeature;
  188. // returns true if the CPU supports the feature.
  189. typedef int (*VP8CPUInfo)(CPUFeature feature);
  190. WEBP_EXTERN VP8CPUInfo VP8GetCPUInfo;
  191. //------------------------------------------------------------------------------
  192. // Init stub generator
  193. // Defines an init function stub to ensure each module exposes a symbol,
  194. // avoiding a compiler warning.
  195. #define WEBP_DSP_INIT_STUB(func) \
  196. extern void func(void); \
  197. void func(void) {}
  198. //------------------------------------------------------------------------------
  199. // Encoding
  200. // Transforms
  201. // VP8Idct: Does one of two inverse transforms. If do_two is set, the transforms
  202. // will be done for (ref, in, dst) and (ref + 4, in + 16, dst + 4).
  203. typedef void (*VP8Idct)(const uint8_t* ref, const int16_t* in, uint8_t* dst,
  204. int do_two);
  205. typedef void (*VP8Fdct)(const uint8_t* src, const uint8_t* ref, int16_t* out);
  206. typedef void (*VP8WHT)(const int16_t* in, int16_t* out);
  207. extern VP8Idct VP8ITransform;
  208. extern VP8Fdct VP8FTransform;
  209. extern VP8Fdct VP8FTransform2; // performs two transforms at a time
  210. extern VP8WHT VP8FTransformWHT;
  211. // Predictions
  212. // *dst is the destination block. *top and *left can be NULL.
  213. typedef void (*VP8IntraPreds)(uint8_t *dst, const uint8_t* left,
  214. const uint8_t* top);
  215. typedef void (*VP8Intra4Preds)(uint8_t *dst, const uint8_t* top);
  216. extern VP8Intra4Preds VP8EncPredLuma4;
  217. extern VP8IntraPreds VP8EncPredLuma16;
  218. extern VP8IntraPreds VP8EncPredChroma8;
  219. typedef int (*VP8Metric)(const uint8_t* pix, const uint8_t* ref);
  220. extern VP8Metric VP8SSE16x16, VP8SSE16x8, VP8SSE8x8, VP8SSE4x4;
  221. typedef int (*VP8WMetric)(const uint8_t* pix, const uint8_t* ref,
  222. const uint16_t* const weights);
  223. // The weights for VP8TDisto4x4 and VP8TDisto16x16 contain a row-major
  224. // 4 by 4 symmetric matrix.
  225. extern VP8WMetric VP8TDisto4x4, VP8TDisto16x16;
  226. // Compute the average (DC) of four 4x4 blocks.
  227. // Each sub-4x4 block #i sum is stored in dc[i].
  228. typedef void (*VP8MeanMetric)(const uint8_t* ref, uint32_t dc[4]);
  229. extern VP8MeanMetric VP8Mean16x4;
  230. typedef void (*VP8BlockCopy)(const uint8_t* src, uint8_t* dst);
  231. extern VP8BlockCopy VP8Copy4x4;
  232. extern VP8BlockCopy VP8Copy16x8;
  233. // Quantization
  234. struct VP8Matrix; // forward declaration
  235. typedef int (*VP8QuantizeBlock)(int16_t in[16], int16_t out[16],
  236. const struct VP8Matrix* const mtx);
  237. // Same as VP8QuantizeBlock, but quantizes two consecutive blocks.
  238. typedef int (*VP8Quantize2Blocks)(int16_t in[32], int16_t out[32],
  239. const struct VP8Matrix* const mtx);
  240. extern VP8QuantizeBlock VP8EncQuantizeBlock;
  241. extern VP8Quantize2Blocks VP8EncQuantize2Blocks;
  242. // specific to 2nd transform:
  243. typedef int (*VP8QuantizeBlockWHT)(int16_t in[16], int16_t out[16],
  244. const struct VP8Matrix* const mtx);
  245. extern VP8QuantizeBlockWHT VP8EncQuantizeBlockWHT;
  246. extern const int VP8DspScan[16 + 4 + 4];
  247. // Collect histogram for susceptibility calculation.
  248. #define MAX_COEFF_THRESH 31 // size of histogram used by CollectHistogram.
  249. typedef struct {
  250. // We only need to store max_value and last_non_zero, not the distribution.
  251. int max_value;
  252. int last_non_zero;
  253. } VP8Histogram;
  254. typedef void (*VP8CHisto)(const uint8_t* ref, const uint8_t* pred,
  255. int start_block, int end_block,
  256. VP8Histogram* const histo);
  257. extern VP8CHisto VP8CollectHistogram;
  258. // General-purpose util function to help VP8CollectHistogram().
  259. void VP8SetHistogramData(const int distribution[MAX_COEFF_THRESH + 1],
  260. VP8Histogram* const histo);
  261. // must be called before using any of the above
  262. void VP8EncDspInit(void);
  263. //------------------------------------------------------------------------------
  264. // cost functions (encoding)
  265. extern const uint16_t VP8EntropyCost[256]; // 8bit fixed-point log(p)
  266. // approximate cost per level:
  267. extern const uint16_t VP8LevelFixedCosts[2047 /*MAX_LEVEL*/ + 1];
  268. extern const uint8_t VP8EncBands[16 + 1];
  269. struct VP8Residual;
  270. typedef void (*VP8SetResidualCoeffsFunc)(const int16_t* const coeffs,
  271. struct VP8Residual* const res);
  272. extern VP8SetResidualCoeffsFunc VP8SetResidualCoeffs;
  273. // Cost calculation function.
  274. typedef int (*VP8GetResidualCostFunc)(int ctx0,
  275. const struct VP8Residual* const res);
  276. extern VP8GetResidualCostFunc VP8GetResidualCost;
  277. // must be called before anything using the above
  278. void VP8EncDspCostInit(void);
  279. //------------------------------------------------------------------------------
  280. // SSIM / PSNR utils
  281. // struct for accumulating statistical moments
  282. typedef struct {
  283. uint32_t w; // sum(w_i) : sum of weights
  284. uint32_t xm, ym; // sum(w_i * x_i), sum(w_i * y_i)
  285. uint32_t xxm, xym, yym; // sum(w_i * x_i * x_i), etc.
  286. } VP8DistoStats;
  287. // Compute the final SSIM value
  288. // The non-clipped version assumes stats->w = (2 * VP8_SSIM_KERNEL + 1)^2.
  289. double VP8SSIMFromStats(const VP8DistoStats* const stats);
  290. double VP8SSIMFromStatsClipped(const VP8DistoStats* const stats);
  291. #define VP8_SSIM_KERNEL 3 // total size of the kernel: 2 * VP8_SSIM_KERNEL + 1
  292. typedef double (*VP8SSIMGetClippedFunc)(const uint8_t* src1, int stride1,
  293. const uint8_t* src2, int stride2,
  294. int xo, int yo, // center position
  295. int W, int H); // plane dimension
  296. #if !defined(WEBP_REDUCE_SIZE)
  297. // This version is called with the guarantee that you can load 8 bytes and
  298. // 8 rows at offset src1 and src2
  299. typedef double (*VP8SSIMGetFunc)(const uint8_t* src1, int stride1,
  300. const uint8_t* src2, int stride2);
  301. extern VP8SSIMGetFunc VP8SSIMGet; // unclipped / unchecked
  302. extern VP8SSIMGetClippedFunc VP8SSIMGetClipped; // with clipping
  303. #endif
  304. #if !defined(WEBP_DISABLE_STATS)
  305. typedef uint32_t (*VP8AccumulateSSEFunc)(const uint8_t* src1,
  306. const uint8_t* src2, int len);
  307. extern VP8AccumulateSSEFunc VP8AccumulateSSE;
  308. #endif
  309. // must be called before using any of the above directly
  310. void VP8SSIMDspInit(void);
  311. //------------------------------------------------------------------------------
  312. // Decoding
  313. typedef void (*VP8DecIdct)(const int16_t* coeffs, uint8_t* dst);
  314. // when doing two transforms, coeffs is actually int16_t[2][16].
  315. typedef void (*VP8DecIdct2)(const int16_t* coeffs, uint8_t* dst, int do_two);
  316. extern VP8DecIdct2 VP8Transform;
  317. extern VP8DecIdct VP8TransformAC3;
  318. extern VP8DecIdct VP8TransformUV;
  319. extern VP8DecIdct VP8TransformDC;
  320. extern VP8DecIdct VP8TransformDCUV;
  321. extern VP8WHT VP8TransformWHT;
  322. // *dst is the destination block, with stride BPS. Boundary samples are
  323. // assumed accessible when needed.
  324. typedef void (*VP8PredFunc)(uint8_t* dst);
  325. extern VP8PredFunc VP8PredLuma16[/* NUM_B_DC_MODES */];
  326. extern VP8PredFunc VP8PredChroma8[/* NUM_B_DC_MODES */];
  327. extern VP8PredFunc VP8PredLuma4[/* NUM_BMODES */];
  328. // clipping tables (for filtering)
  329. extern const int8_t* const VP8ksclip1; // clips [-1020, 1020] to [-128, 127]
  330. extern const int8_t* const VP8ksclip2; // clips [-112, 112] to [-16, 15]
  331. extern const uint8_t* const VP8kclip1; // clips [-255,511] to [0,255]
  332. extern const uint8_t* const VP8kabs0; // abs(x) for x in [-255,255]
  333. // must be called first
  334. void VP8InitClipTables(void);
  335. // simple filter (only for luma)
  336. typedef void (*VP8SimpleFilterFunc)(uint8_t* p, int stride, int thresh);
  337. extern VP8SimpleFilterFunc VP8SimpleVFilter16;
  338. extern VP8SimpleFilterFunc VP8SimpleHFilter16;
  339. extern VP8SimpleFilterFunc VP8SimpleVFilter16i; // filter 3 inner edges
  340. extern VP8SimpleFilterFunc VP8SimpleHFilter16i;
  341. // regular filter (on both macroblock edges and inner edges)
  342. typedef void (*VP8LumaFilterFunc)(uint8_t* luma, int stride,
  343. int thresh, int ithresh, int hev_t);
  344. typedef void (*VP8ChromaFilterFunc)(uint8_t* u, uint8_t* v, int stride,
  345. int thresh, int ithresh, int hev_t);
  346. // on outer edge
  347. extern VP8LumaFilterFunc VP8VFilter16;
  348. extern VP8LumaFilterFunc VP8HFilter16;
  349. extern VP8ChromaFilterFunc VP8VFilter8;
  350. extern VP8ChromaFilterFunc VP8HFilter8;
  351. // on inner edge
  352. extern VP8LumaFilterFunc VP8VFilter16i; // filtering 3 inner edges altogether
  353. extern VP8LumaFilterFunc VP8HFilter16i;
  354. extern VP8ChromaFilterFunc VP8VFilter8i; // filtering u and v altogether
  355. extern VP8ChromaFilterFunc VP8HFilter8i;
  356. // Dithering. Combines dithering values (centered around 128) with dst[],
  357. // according to: dst[] = clip(dst[] + (((dither[]-128) + 8) >> 4)
  358. #define VP8_DITHER_DESCALE 4
  359. #define VP8_DITHER_DESCALE_ROUNDER (1 << (VP8_DITHER_DESCALE - 1))
  360. #define VP8_DITHER_AMP_BITS 7
  361. #define VP8_DITHER_AMP_CENTER (1 << VP8_DITHER_AMP_BITS)
  362. extern void (*VP8DitherCombine8x8)(const uint8_t* dither, uint8_t* dst,
  363. int dst_stride);
  364. // must be called before anything using the above
  365. void VP8DspInit(void);
  366. //------------------------------------------------------------------------------
  367. // WebP I/O
  368. #define FANCY_UPSAMPLING // undefined to remove fancy upsampling support
  369. // Convert a pair of y/u/v lines together to the output rgb/a colorspace.
  370. // bottom_y can be NULL if only one line of output is needed (at top/bottom).
  371. typedef void (*WebPUpsampleLinePairFunc)(
  372. const uint8_t* top_y, const uint8_t* bottom_y,
  373. const uint8_t* top_u, const uint8_t* top_v,
  374. const uint8_t* cur_u, const uint8_t* cur_v,
  375. uint8_t* top_dst, uint8_t* bottom_dst, int len);
  376. #ifdef FANCY_UPSAMPLING
  377. // Fancy upsampling functions to convert YUV to RGB(A) modes
  378. extern WebPUpsampleLinePairFunc WebPUpsamplers[/* MODE_LAST */];
  379. #endif // FANCY_UPSAMPLING
  380. // Per-row point-sampling methods.
  381. typedef void (*WebPSamplerRowFunc)(const uint8_t* y,
  382. const uint8_t* u, const uint8_t* v,
  383. uint8_t* dst, int len);
  384. // Generic function to apply 'WebPSamplerRowFunc' to the whole plane:
  385. void WebPSamplerProcessPlane(const uint8_t* y, int y_stride,
  386. const uint8_t* u, const uint8_t* v, int uv_stride,
  387. uint8_t* dst, int dst_stride,
  388. int width, int height, WebPSamplerRowFunc func);
  389. // Sampling functions to convert rows of YUV to RGB(A)
  390. extern WebPSamplerRowFunc WebPSamplers[/* MODE_LAST */];
  391. // General function for converting two lines of ARGB or RGBA.
  392. // 'alpha_is_last' should be true if 0xff000000 is stored in memory as
  393. // as 0x00, 0x00, 0x00, 0xff (little endian).
  394. WebPUpsampleLinePairFunc WebPGetLinePairConverter(int alpha_is_last);
  395. // YUV444->RGB converters
  396. typedef void (*WebPYUV444Converter)(const uint8_t* y,
  397. const uint8_t* u, const uint8_t* v,
  398. uint8_t* dst, int len);
  399. extern WebPYUV444Converter WebPYUV444Converters[/* MODE_LAST */];
  400. // Must be called before using the WebPUpsamplers[] (and for premultiplied
  401. // colorspaces like rgbA, rgbA4444, etc)
  402. void WebPInitUpsamplers(void);
  403. // Must be called before using WebPSamplers[]
  404. void WebPInitSamplers(void);
  405. // Must be called before using WebPYUV444Converters[]
  406. void WebPInitYUV444Converters(void);
  407. //------------------------------------------------------------------------------
  408. // ARGB -> YUV converters
  409. // Convert ARGB samples to luma Y.
  410. extern void (*WebPConvertARGBToY)(const uint32_t* argb, uint8_t* y, int width);
  411. // Convert ARGB samples to U/V with downsampling. do_store should be '1' for
  412. // even lines and '0' for odd ones. 'src_width' is the original width, not
  413. // the U/V one.
  414. extern void (*WebPConvertARGBToUV)(const uint32_t* argb, uint8_t* u, uint8_t* v,
  415. int src_width, int do_store);
  416. // Convert a row of accumulated (four-values) of rgba32 toward U/V
  417. extern void (*WebPConvertRGBA32ToUV)(const uint16_t* rgb,
  418. uint8_t* u, uint8_t* v, int width);
  419. // Convert RGB or BGR to Y
  420. extern void (*WebPConvertRGB24ToY)(const uint8_t* rgb, uint8_t* y, int width);
  421. extern void (*WebPConvertBGR24ToY)(const uint8_t* bgr, uint8_t* y, int width);
  422. // used for plain-C fallback.
  423. extern void WebPConvertARGBToUV_C(const uint32_t* argb, uint8_t* u, uint8_t* v,
  424. int src_width, int do_store);
  425. extern void WebPConvertRGBA32ToUV_C(const uint16_t* rgb,
  426. uint8_t* u, uint8_t* v, int width);
  427. // utilities for accurate RGB->YUV conversion
  428. extern uint64_t (*WebPSharpYUVUpdateY)(const uint16_t* src, const uint16_t* ref,
  429. uint16_t* dst, int len);
  430. extern void (*WebPSharpYUVUpdateRGB)(const int16_t* src, const int16_t* ref,
  431. int16_t* dst, int len);
  432. extern void (*WebPSharpYUVFilterRow)(const int16_t* A, const int16_t* B,
  433. int len,
  434. const uint16_t* best_y, uint16_t* out);
  435. // Must be called before using the above.
  436. void WebPInitConvertARGBToYUV(void);
  437. //------------------------------------------------------------------------------
  438. // Rescaler
  439. struct WebPRescaler;
  440. // Import a row of data and save its contribution in the rescaler.
  441. // 'channel' denotes the channel number to be imported. 'Expand' corresponds to
  442. // the wrk->x_expand case. Otherwise, 'Shrink' is to be used.
  443. typedef void (*WebPRescalerImportRowFunc)(struct WebPRescaler* const wrk,
  444. const uint8_t* src);
  445. extern WebPRescalerImportRowFunc WebPRescalerImportRowExpand;
  446. extern WebPRescalerImportRowFunc WebPRescalerImportRowShrink;
  447. // Export one row (starting at x_out position) from rescaler.
  448. // 'Expand' corresponds to the wrk->y_expand case.
  449. // Otherwise 'Shrink' is to be used
  450. typedef void (*WebPRescalerExportRowFunc)(struct WebPRescaler* const wrk);
  451. extern WebPRescalerExportRowFunc WebPRescalerExportRowExpand;
  452. extern WebPRescalerExportRowFunc WebPRescalerExportRowShrink;
  453. // Plain-C implementation, as fall-back.
  454. extern void WebPRescalerImportRowExpand_C(struct WebPRescaler* const wrk,
  455. const uint8_t* src);
  456. extern void WebPRescalerImportRowShrink_C(struct WebPRescaler* const wrk,
  457. const uint8_t* src);
  458. extern void WebPRescalerExportRowExpand_C(struct WebPRescaler* const wrk);
  459. extern void WebPRescalerExportRowShrink_C(struct WebPRescaler* const wrk);
  460. // Main entry calls:
  461. extern void WebPRescalerImportRow(struct WebPRescaler* const wrk,
  462. const uint8_t* src);
  463. // Export one row (starting at x_out position) from rescaler.
  464. extern void WebPRescalerExportRow(struct WebPRescaler* const wrk);
  465. // Must be called first before using the above.
  466. void WebPRescalerDspInit(void);
  467. //------------------------------------------------------------------------------
  468. // Utilities for processing transparent channel.
  469. // Apply alpha pre-multiply on an rgba, bgra or argb plane of size w * h.
  470. // alpha_first should be 0 for argb, 1 for rgba or bgra (where alpha is last).
  471. extern void (*WebPApplyAlphaMultiply)(
  472. uint8_t* rgba, int alpha_first, int w, int h, int stride);
  473. // Same, buf specifically for RGBA4444 format
  474. extern void (*WebPApplyAlphaMultiply4444)(
  475. uint8_t* rgba4444, int w, int h, int stride);
  476. // Dispatch the values from alpha[] plane to the ARGB destination 'dst'.
  477. // Returns true if alpha[] plane has non-trivial values different from 0xff.
  478. extern int (*WebPDispatchAlpha)(const uint8_t* alpha, int alpha_stride,
  479. int width, int height,
  480. uint8_t* dst, int dst_stride);
  481. // Transfer packed 8b alpha[] values to green channel in dst[], zero'ing the
  482. // A/R/B values. 'dst_stride' is the stride for dst[] in uint32_t units.
  483. extern void (*WebPDispatchAlphaToGreen)(const uint8_t* alpha, int alpha_stride,
  484. int width, int height,
  485. uint32_t* dst, int dst_stride);
  486. // Extract the alpha values from 32b values in argb[] and pack them into alpha[]
  487. // (this is the opposite of WebPDispatchAlpha).
  488. // Returns true if there's only trivial 0xff alpha values.
  489. extern int (*WebPExtractAlpha)(const uint8_t* argb, int argb_stride,
  490. int width, int height,
  491. uint8_t* alpha, int alpha_stride);
  492. // Extract the green values from 32b values in argb[] and pack them into alpha[]
  493. // (this is the opposite of WebPDispatchAlphaToGreen).
  494. extern void (*WebPExtractGreen)(const uint32_t* argb, uint8_t* alpha, int size);
  495. // Pre-Multiply operation transforms x into x * A / 255 (where x=Y,R,G or B).
  496. // Un-Multiply operation transforms x into x * 255 / A.
  497. // Pre-Multiply or Un-Multiply (if 'inverse' is true) argb values in a row.
  498. extern void (*WebPMultARGBRow)(uint32_t* const ptr, int width, int inverse);
  499. // Same a WebPMultARGBRow(), but for several rows.
  500. void WebPMultARGBRows(uint8_t* ptr, int stride, int width, int num_rows,
  501. int inverse);
  502. // Same for a row of single values, with side alpha values.
  503. extern void (*WebPMultRow)(uint8_t* const ptr, const uint8_t* const alpha,
  504. int width, int inverse);
  505. // Same a WebPMultRow(), but for several 'num_rows' rows.
  506. void WebPMultRows(uint8_t* ptr, int stride,
  507. const uint8_t* alpha, int alpha_stride,
  508. int width, int num_rows, int inverse);
  509. // Plain-C versions, used as fallback by some implementations.
  510. void WebPMultRow_C(uint8_t* const ptr, const uint8_t* const alpha,
  511. int width, int inverse);
  512. void WebPMultARGBRow_C(uint32_t* const ptr, int width, int inverse);
  513. #ifdef WORDS_BIGENDIAN
  514. // ARGB packing function: a/r/g/b input is rgba or bgra order.
  515. extern void (*WebPPackARGB)(const uint8_t* a, const uint8_t* r,
  516. const uint8_t* g, const uint8_t* b, int len,
  517. uint32_t* out);
  518. #endif
  519. // RGB packing function. 'step' can be 3 or 4. r/g/b input is rgb or bgr order.
  520. extern void (*WebPPackRGB)(const uint8_t* r, const uint8_t* g, const uint8_t* b,
  521. int len, int step, uint32_t* out);
  522. // This function returns true if src[i] contains a value different from 0xff.
  523. extern int (*WebPHasAlpha8b)(const uint8_t* src, int length);
  524. // This function returns true if src[4*i] contains a value different from 0xff.
  525. extern int (*WebPHasAlpha32b)(const uint8_t* src, int length);
  526. // To be called first before using the above.
  527. void WebPInitAlphaProcessing(void);
  528. //------------------------------------------------------------------------------
  529. // Filter functions
  530. typedef enum { // Filter types.
  531. WEBP_FILTER_NONE = 0,
  532. WEBP_FILTER_HORIZONTAL,
  533. WEBP_FILTER_VERTICAL,
  534. WEBP_FILTER_GRADIENT,
  535. WEBP_FILTER_LAST = WEBP_FILTER_GRADIENT + 1, // end marker
  536. WEBP_FILTER_BEST, // meta-types
  537. WEBP_FILTER_FAST
  538. } WEBP_FILTER_TYPE;
  539. typedef void (*WebPFilterFunc)(const uint8_t* in, int width, int height,
  540. int stride, uint8_t* out);
  541. // In-place un-filtering.
  542. // Warning! 'prev_line' pointer can be equal to 'cur_line' or 'preds'.
  543. typedef void (*WebPUnfilterFunc)(const uint8_t* prev_line, const uint8_t* preds,
  544. uint8_t* cur_line, int width);
  545. // Filter the given data using the given predictor.
  546. // 'in' corresponds to a 2-dimensional pixel array of size (stride * height)
  547. // in raster order.
  548. // 'stride' is number of bytes per scan line (with possible padding).
  549. // 'out' should be pre-allocated.
  550. extern WebPFilterFunc WebPFilters[WEBP_FILTER_LAST];
  551. // In-place reconstruct the original data from the given filtered data.
  552. // The reconstruction will be done for 'num_rows' rows starting from 'row'
  553. // (assuming rows upto 'row - 1' are already reconstructed).
  554. extern WebPUnfilterFunc WebPUnfilters[WEBP_FILTER_LAST];
  555. // To be called first before using the above.
  556. void VP8FiltersInit(void);
  557. #ifdef __cplusplus
  558. } // extern "C"
  559. #endif
  560. #endif // WEBP_DSP_DSP_H_