astcenc_mathlib.h 9.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477
  1. // SPDX-License-Identifier: Apache-2.0
  2. // ----------------------------------------------------------------------------
  3. // Copyright 2011-2021 Arm Limited
  4. //
  5. // Licensed under the Apache License, Version 2.0 (the "License"); you may not
  6. // use this file except in compliance with the License. You may obtain a copy
  7. // of the License at:
  8. //
  9. // http://www.apache.org/licenses/LICENSE-2.0
  10. //
  11. // Unless required by applicable law or agreed to in writing, software
  12. // distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
  13. // WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
  14. // License for the specific language governing permissions and limitations
  15. // under the License.
  16. // ----------------------------------------------------------------------------
  17. /*
  18. * This module implements a variety of mathematical data types and library
  19. * functions used by the codec.
  20. */
  21. #ifndef ASTC_MATHLIB_H_INCLUDED
  22. #define ASTC_MATHLIB_H_INCLUDED
  23. #include <cassert>
  24. #include <cstdint>
  25. #include <cmath>
  26. #ifndef ASTCENC_POPCNT
  27. #if defined(__POPCNT__)
  28. #define ASTCENC_POPCNT 1
  29. #else
  30. #define ASTCENC_POPCNT 0
  31. #endif
  32. #endif
  33. #ifndef ASTCENC_F16C
  34. #if defined(__F16C__)
  35. #define ASTCENC_F16C 1
  36. #else
  37. #define ASTCENC_F16C 0
  38. #endif
  39. #endif
  40. #ifndef ASTCENC_SSE
  41. #if defined(__SSE4_2__)
  42. #define ASTCENC_SSE 42
  43. #elif defined(__SSE4_1__)
  44. #define ASTCENC_SSE 41
  45. #elif defined(__SSE2__)
  46. #define ASTCENC_SSE 20
  47. #else
  48. #define ASTCENC_SSE 0
  49. #endif
  50. #endif
  51. #ifndef ASTCENC_AVX
  52. #if defined(__AVX2__)
  53. #define ASTCENC_AVX 2
  54. #elif defined(__AVX__)
  55. #define ASTCENC_AVX 1
  56. #else
  57. #define ASTCENC_AVX 0
  58. #endif
  59. #endif
  60. #ifndef ASTCENC_NEON
  61. #if defined(__aarch64__)
  62. #define ASTCENC_NEON 1
  63. #else
  64. #define ASTCENC_NEON 0
  65. #endif
  66. #endif
  67. #if ASTCENC_AVX
  68. #define ASTCENC_VECALIGN 32
  69. #else
  70. #define ASTCENC_VECALIGN 16
  71. #endif
  72. #if ASTCENC_SSE != 0 || ASTCENC_AVX != 0 || ASTCENC_POPCNT != 0
  73. #include <immintrin.h>
  74. #endif
  75. /* ============================================================================
  76. Fast math library; note that many of the higher-order functions in this set
  77. use approximations which are less accurate, but faster, than <cmath> standard
  78. library equivalents.
  79. Note: Many of these are not necessarily faster than simple C versions when
  80. used on a single scalar value, but are included for testing purposes as most
  81. have an option based on SSE intrinsics and therefore provide an obvious route
  82. to future vectorization.
  83. ============================================================================ */
  84. // Union for manipulation of float bit patterns
  85. typedef union
  86. {
  87. uint32_t u;
  88. int32_t s;
  89. float f;
  90. } if32;
  91. // These are namespaced to avoid colliding with C standard library functions.
  92. namespace astc
  93. {
  94. static const float PI = 3.14159265358979323846f;
  95. static const float PI_OVER_TWO = 1.57079632679489661923f;
  96. /**
  97. * @brief SP float absolute value.
  98. *
  99. * @param v The value to make absolute.
  100. *
  101. * @return The absolute value.
  102. */
  103. static inline float fabs(float v)
  104. {
  105. return std::fabs(v);
  106. }
  107. /**
  108. * @brief Test if a float value is a nan.
  109. *
  110. * @param v The value test.
  111. *
  112. * @return Zero is not a NaN, non-zero otherwise.
  113. */
  114. static inline bool isnan(float v)
  115. {
  116. return v != v;
  117. }
  118. /**
  119. * @brief Return the minimum of two values.
  120. *
  121. * For floats, NaNs are turned into @c q.
  122. *
  123. * @param p The first value to compare.
  124. * @param q The second value to compare.
  125. *
  126. * @return The smallest value.
  127. */
  128. template<typename T>
  129. static inline T min(T p, T q)
  130. {
  131. return p < q ? p : q;
  132. }
  133. /**
  134. * @brief Return the minimum of three values.
  135. *
  136. * For floats, NaNs are turned into @c r.
  137. *
  138. * @param p The first value to compare.
  139. * @param q The second value to compare.
  140. * @param r The third value to compare.
  141. *
  142. * @return The smallest value.
  143. */
  144. template<typename T>
  145. static inline T min(T p, T q, T r)
  146. {
  147. return min(min(p, q), r);
  148. }
  149. /**
  150. * @brief Return the minimum of four values.
  151. *
  152. * For floats, NaNs are turned into @c s.
  153. *
  154. * @param p The first value to compare.
  155. * @param q The second value to compare.
  156. * @param r The third value to compare.
  157. * @param s The fourth value to compare.
  158. *
  159. * @return The smallest value.
  160. */
  161. template<typename T>
  162. static inline T min(T p, T q, T r, T s)
  163. {
  164. return min(min(p, q), min(r, s));
  165. }
  166. /**
  167. * @brief Return the maximum of two values.
  168. *
  169. * For floats, NaNs are turned into @c q.
  170. *
  171. * @param p The first value to compare.
  172. * @param q The second value to compare.
  173. *
  174. * @return The largest value.
  175. */
  176. template<typename T>
  177. static inline T max(T p, T q)
  178. {
  179. return p > q ? p : q;
  180. }
  181. /**
  182. * @brief Return the maximum of three values.
  183. *
  184. * For floats, NaNs are turned into @c r.
  185. *
  186. * @param p The first value to compare.
  187. * @param q The second value to compare.
  188. * @param r The third value to compare.
  189. *
  190. * @return The largest value.
  191. */
  192. template<typename T>
  193. static inline T max(T p, T q, T r)
  194. {
  195. return max(max(p, q), r);
  196. }
  197. /**
  198. * @brief Return the maximum of four values.
  199. *
  200. * For floats, NaNs are turned into @c s.
  201. *
  202. * @param p The first value to compare.
  203. * @param q The second value to compare.
  204. * @param r The third value to compare.
  205. * @param s The fourth value to compare.
  206. *
  207. * @return The largest value.
  208. */
  209. template<typename T>
  210. static inline T max(T p, T q, T r, T s)
  211. {
  212. return max(max(p, q), max(r, s));
  213. }
  214. /**
  215. * @brief Clamp a value value between @c mn and @c mx.
  216. *
  217. * For floats, NaNs are turned into @c mn.
  218. *
  219. * @param v The value to clamp.
  220. * @param mn The min value (inclusive).
  221. * @param mx The max value (inclusive).
  222. *
  223. * @return The clamped value.
  224. */
  225. template<typename T>
  226. inline T clamp(T v, T mn, T mx)
  227. {
  228. // Do not reorder; correct NaN handling relies on the fact that comparison
  229. // with NaN returns false and will fall-though to the "min" value.
  230. if (v > mx) return mx;
  231. if (v > mn) return v;
  232. return mn;
  233. }
  234. /**
  235. * @brief Clamp a float value between 0.0f and 1.0f.
  236. *
  237. * NaNs are turned into 0.0f.
  238. *
  239. * @param v The value to clamp.
  240. *
  241. * @return The clamped value.
  242. */
  243. static inline float clamp1f(float v)
  244. {
  245. return astc::clamp(v, 0.0f, 1.0f);
  246. }
  247. /**
  248. * @brief Clamp a float value between 0.0f and 255.0f.
  249. *
  250. * NaNs are turned into 0.0f.
  251. *
  252. * @param v The value to clamp.
  253. *
  254. * @return The clamped value.
  255. */
  256. static inline float clamp255f(float v)
  257. {
  258. return astc::clamp(v, 0.0f, 255.0f);
  259. }
  260. /**
  261. * @brief SP float round-down.
  262. *
  263. * @param v The value to round.
  264. *
  265. * @return The rounded value.
  266. */
  267. static inline float flt_rd(float v)
  268. {
  269. return std::floor(v);
  270. }
  271. /**
  272. * @brief SP float round-to-nearest and convert to integer.
  273. *
  274. * @param v The value to round.
  275. *
  276. * @return The rounded value.
  277. */
  278. static inline int flt2int_rtn(float v)
  279. {
  280. return static_cast<int>(v + 0.5f);
  281. }
  282. /**
  283. * @brief SP float round down and convert to integer.
  284. *
  285. * @param v The value to round.
  286. *
  287. * @return The rounded value.
  288. */
  289. static inline int flt2int_rd(float v)
  290. {
  291. return static_cast<int>(v);
  292. }
  293. /**
  294. * @brief SP float bit-interpreted as an integer.
  295. *
  296. * @param v The value to bitcast.
  297. *
  298. * @return The converted value.
  299. */
  300. static inline int float_as_int(float v)
  301. {
  302. union { int a; float b; } u;
  303. u.b = v;
  304. return u.a;
  305. }
  306. /**
  307. * @brief Integer bit-interpreted as an SP float.
  308. *
  309. * @param v The value to bitcast.
  310. *
  311. * @return The converted value.
  312. */
  313. static inline float int_as_float(int v)
  314. {
  315. union { int a; float b; } u;
  316. u.a = v;
  317. return u.b;
  318. }
  319. /**
  320. * @brief Fast approximation of 1.0 / sqrt(val).
  321. *
  322. * @param v The input value.
  323. *
  324. * @return The approximated result.
  325. */
  326. static inline float rsqrt(float v)
  327. {
  328. return 1.0f / std::sqrt(v);
  329. }
  330. /**
  331. * @brief Fast approximation of sqrt(val).
  332. *
  333. * @param v The input value.
  334. *
  335. * @return The approximated result.
  336. */
  337. static inline float sqrt(float v)
  338. {
  339. return std::sqrt(v);
  340. }
  341. /**
  342. * @brief Extract mantissa and exponent of a float value.
  343. *
  344. * @param v The input value.
  345. * @param[out] expo The output exponent.
  346. *
  347. * @return The mantissa.
  348. */
  349. static inline float frexp(float v, int* expo)
  350. {
  351. if32 p;
  352. p.f = v;
  353. *expo = ((p.u >> 23) & 0xFF) - 126;
  354. p.u = (p.u & 0x807fffff) | 0x3f000000;
  355. return p.f;
  356. }
  357. /**
  358. * @brief Initialize the seed structure for a random number generator.
  359. *
  360. * Important note: For the purposes of ASTC we want sets of random numbers to
  361. * use the codec, but we want the same seed value across instances and threads
  362. * to ensure that image output is stable across compressor runs and across
  363. * platforms. Every PRNG created by this call will therefore return the same
  364. * sequence of values ...
  365. *
  366. * @param state The state structure to initialize.
  367. */
  368. void rand_init(uint64_t state[2]);
  369. /**
  370. * @brief Return the next random number from the generator.
  371. *
  372. * This RNG is an implementation of the "xoroshoro-128+ 1.0" PRNG, based on the
  373. * public-domain implementation given by David Blackman & Sebastiano Vigna at
  374. * http://vigna.di.unimi.it/xorshift/xoroshiro128plus.c
  375. *
  376. * @param state The state structure to use/update.
  377. */
  378. uint64_t rand(uint64_t state[2]);
  379. }
  380. /* ============================================================================
  381. Softfloat library with fp32 and fp16 conversion functionality.
  382. ============================================================================ */
  383. #if (ASTCENC_F16C == 0) && (ASTCENC_NEON == 0)
  384. /* narrowing float->float conversions */
  385. uint16_t float_to_sf16(float val);
  386. float sf16_to_float(uint16_t val);
  387. #endif
  388. /*********************************
  389. Vector library
  390. *********************************/
  391. #include "astcenc_vecmathlib.h"
  392. /*********************************
  393. Declaration of line types
  394. *********************************/
  395. // parametric line, 2D: The line is given by line = a + b * t.
  396. struct line2
  397. {
  398. vfloat4 a;
  399. vfloat4 b;
  400. };
  401. // parametric line, 3D
  402. struct line3
  403. {
  404. vfloat4 a;
  405. vfloat4 b;
  406. };
  407. struct line4
  408. {
  409. vfloat4 a;
  410. vfloat4 b;
  411. };
  412. struct processed_line2
  413. {
  414. vfloat4 amod;
  415. vfloat4 bs;
  416. };
  417. struct processed_line3
  418. {
  419. vfloat4 amod;
  420. vfloat4 bs;
  421. };
  422. struct processed_line4
  423. {
  424. vfloat4 amod;
  425. vfloat4 bs;
  426. };
  427. #endif