FloatingPoint.h 16 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479
  1. /* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
  2. /* This Source Code Form is subject to the terms of the Mozilla Public
  3. * License, v. 2.0. If a copy of the MPL was not distributed with this
  4. * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
  5. /* Various predicates and operations on IEEE-754 floating point types. */
  6. #ifndef mozilla_FloatingPoint_h
  7. #define mozilla_FloatingPoint_h
  8. #include "mozilla/Assertions.h"
  9. #include "mozilla/Attributes.h"
  10. #include "mozilla/Casting.h"
  11. #include "mozilla/MathAlgorithms.h"
  12. #include "mozilla/Types.h"
  13. #include <stdint.h>
  14. namespace mozilla {
  15. /*
  16. * It's reasonable to ask why we have this header at all. Don't isnan,
  17. * copysign, the built-in comparison operators, and the like solve these
  18. * problems? Unfortunately, they don't. We've found that various compilers
  19. * (MSVC, MSVC when compiling with PGO, and GCC on OS X, at least) miscompile
  20. * the standard methods in various situations, so we can't use them. Some of
  21. * these compilers even have problems compiling seemingly reasonable bitwise
  22. * algorithms! But with some care we've found algorithms that seem to not
  23. * trigger those compiler bugs.
  24. *
  25. * For the aforementioned reasons, be very wary of making changes to any of
  26. * these algorithms. If you must make changes, keep a careful eye out for
  27. * compiler bustage, particularly PGO-specific bustage.
  28. */
  29. struct FloatTypeTraits
  30. {
  31. typedef uint32_t Bits;
  32. static const unsigned kExponentBias = 127;
  33. static const unsigned kExponentShift = 23;
  34. static const Bits kSignBit = 0x80000000UL;
  35. static const Bits kExponentBits = 0x7F800000UL;
  36. static const Bits kSignificandBits = 0x007FFFFFUL;
  37. };
  38. struct DoubleTypeTraits
  39. {
  40. typedef uint64_t Bits;
  41. static const unsigned kExponentBias = 1023;
  42. static const unsigned kExponentShift = 52;
  43. static const Bits kSignBit = 0x8000000000000000ULL;
  44. static const Bits kExponentBits = 0x7ff0000000000000ULL;
  45. static const Bits kSignificandBits = 0x000fffffffffffffULL;
  46. };
  47. template<typename T> struct SelectTrait;
  48. template<> struct SelectTrait<float> : public FloatTypeTraits {};
  49. template<> struct SelectTrait<double> : public DoubleTypeTraits {};
  50. /*
  51. * This struct contains details regarding the encoding of floating-point
  52. * numbers that can be useful for direct bit manipulation. As of now, the
  53. * template parameter has to be float or double.
  54. *
  55. * The nested typedef |Bits| is the unsigned integral type with the same size
  56. * as T: uint32_t for float and uint64_t for double (static assertions
  57. * double-check these assumptions).
  58. *
  59. * kExponentBias is the offset that is subtracted from the exponent when
  60. * computing the value, i.e. one plus the opposite of the mininum possible
  61. * exponent.
  62. * kExponentShift is the shift that one needs to apply to retrieve the
  63. * exponent component of the value.
  64. *
  65. * kSignBit contains a bits mask. Bit-and-ing with this mask will result in
  66. * obtaining the sign bit.
  67. * kExponentBits contains the mask needed for obtaining the exponent bits and
  68. * kSignificandBits contains the mask needed for obtaining the significand
  69. * bits.
  70. *
  71. * Full details of how floating point number formats are encoded are beyond
  72. * the scope of this comment. For more information, see
  73. * http://en.wikipedia.org/wiki/IEEE_floating_point
  74. * http://en.wikipedia.org/wiki/Floating_point#IEEE_754:_floating_point_in_modern_computers
  75. */
  76. template<typename T>
  77. struct FloatingPoint : public SelectTrait<T>
  78. {
  79. typedef SelectTrait<T> Base;
  80. typedef typename Base::Bits Bits;
  81. static_assert((Base::kSignBit & Base::kExponentBits) == 0,
  82. "sign bit shouldn't overlap exponent bits");
  83. static_assert((Base::kSignBit & Base::kSignificandBits) == 0,
  84. "sign bit shouldn't overlap significand bits");
  85. static_assert((Base::kExponentBits & Base::kSignificandBits) == 0,
  86. "exponent bits shouldn't overlap significand bits");
  87. static_assert((Base::kSignBit | Base::kExponentBits | Base::kSignificandBits) ==
  88. ~Bits(0),
  89. "all bits accounted for");
  90. /*
  91. * These implementations assume float/double are 32/64-bit single/double
  92. * format number types compatible with the IEEE-754 standard. C++ don't
  93. * require this to be the case. But we required this in implementations of
  94. * these algorithms that preceded this header, so we shouldn't break anything
  95. * if we keep doing so.
  96. */
  97. static_assert(sizeof(T) == sizeof(Bits), "Bits must be same size as T");
  98. };
  99. /** Determines whether a float/double is NaN. */
  100. template<typename T>
  101. static MOZ_ALWAYS_INLINE bool
  102. IsNaN(T aValue)
  103. {
  104. /*
  105. * A float/double is NaN if all exponent bits are 1 and the significand
  106. * contains at least one non-zero bit.
  107. */
  108. typedef FloatingPoint<T> Traits;
  109. typedef typename Traits::Bits Bits;
  110. return (BitwiseCast<Bits>(aValue) & Traits::kExponentBits) == Traits::kExponentBits &&
  111. (BitwiseCast<Bits>(aValue) & Traits::kSignificandBits) != 0;
  112. }
  113. /** Determines whether a float/double is +Infinity or -Infinity. */
  114. template<typename T>
  115. static MOZ_ALWAYS_INLINE bool
  116. IsInfinite(T aValue)
  117. {
  118. /* Infinities have all exponent bits set to 1 and an all-0 significand. */
  119. typedef FloatingPoint<T> Traits;
  120. typedef typename Traits::Bits Bits;
  121. Bits bits = BitwiseCast<Bits>(aValue);
  122. return (bits & ~Traits::kSignBit) == Traits::kExponentBits;
  123. }
  124. /** Determines whether a float/double is not NaN or infinite. */
  125. template<typename T>
  126. static MOZ_ALWAYS_INLINE bool
  127. IsFinite(T aValue)
  128. {
  129. /*
  130. * NaN and Infinities are the only non-finite floats/doubles, and both have
  131. * all exponent bits set to 1.
  132. */
  133. typedef FloatingPoint<T> Traits;
  134. typedef typename Traits::Bits Bits;
  135. Bits bits = BitwiseCast<Bits>(aValue);
  136. return (bits & Traits::kExponentBits) != Traits::kExponentBits;
  137. }
  138. /**
  139. * Determines whether a float/double is negative or -0. It is an error
  140. * to call this method on a float/double which is NaN.
  141. */
  142. template<typename T>
  143. static MOZ_ALWAYS_INLINE bool
  144. IsNegative(T aValue)
  145. {
  146. MOZ_ASSERT(!IsNaN(aValue), "NaN does not have a sign");
  147. /* The sign bit is set if the double is negative. */
  148. typedef FloatingPoint<T> Traits;
  149. typedef typename Traits::Bits Bits;
  150. Bits bits = BitwiseCast<Bits>(aValue);
  151. return (bits & Traits::kSignBit) != 0;
  152. }
  153. /** Determines whether a float/double represents -0. */
  154. template<typename T>
  155. static MOZ_ALWAYS_INLINE bool
  156. IsNegativeZero(T aValue)
  157. {
  158. /* Only the sign bit is set if the value is -0. */
  159. typedef FloatingPoint<T> Traits;
  160. typedef typename Traits::Bits Bits;
  161. Bits bits = BitwiseCast<Bits>(aValue);
  162. return bits == Traits::kSignBit;
  163. }
  164. /** Determines wether a float/double represents +0. */
  165. template<typename T>
  166. static MOZ_ALWAYS_INLINE bool
  167. IsPositiveZero(T aValue)
  168. {
  169. /* All bits are zero if the value is +0. */
  170. typedef FloatingPoint<T> Traits;
  171. typedef typename Traits::Bits Bits;
  172. Bits bits = BitwiseCast<Bits>(aValue);
  173. return bits == 0;
  174. }
  175. /**
  176. * Returns 0 if a float/double is NaN or infinite;
  177. * otherwise, the float/double is returned.
  178. */
  179. template<typename T>
  180. static MOZ_ALWAYS_INLINE T
  181. ToZeroIfNonfinite(T aValue)
  182. {
  183. return IsFinite(aValue) ? aValue : 0;
  184. }
  185. /**
  186. * Returns the exponent portion of the float/double.
  187. *
  188. * Zero is not special-cased, so ExponentComponent(0.0) is
  189. * -int_fast16_t(Traits::kExponentBias).
  190. */
  191. template<typename T>
  192. static MOZ_ALWAYS_INLINE int_fast16_t
  193. ExponentComponent(T aValue)
  194. {
  195. /*
  196. * The exponent component of a float/double is an unsigned number, biased
  197. * from its actual value. Subtract the bias to retrieve the actual exponent.
  198. */
  199. typedef FloatingPoint<T> Traits;
  200. typedef typename Traits::Bits Bits;
  201. Bits bits = BitwiseCast<Bits>(aValue);
  202. return int_fast16_t((bits & Traits::kExponentBits) >> Traits::kExponentShift) -
  203. int_fast16_t(Traits::kExponentBias);
  204. }
  205. /** Returns +Infinity. */
  206. template<typename T>
  207. static MOZ_ALWAYS_INLINE T
  208. PositiveInfinity()
  209. {
  210. /*
  211. * Positive infinity has all exponent bits set, sign bit set to 0, and no
  212. * significand.
  213. */
  214. typedef FloatingPoint<T> Traits;
  215. return BitwiseCast<T>(Traits::kExponentBits);
  216. }
  217. /** Returns -Infinity. */
  218. template<typename T>
  219. static MOZ_ALWAYS_INLINE T
  220. NegativeInfinity()
  221. {
  222. /*
  223. * Negative infinity has all exponent bits set, sign bit set to 1, and no
  224. * significand.
  225. */
  226. typedef FloatingPoint<T> Traits;
  227. return BitwiseCast<T>(Traits::kSignBit | Traits::kExponentBits);
  228. }
  229. /**
  230. * Computes the bit pattern for a NaN with the specified sign bit and
  231. * significand bits.
  232. */
  233. template<typename T,
  234. int SignBit,
  235. typename FloatingPoint<T>::Bits Significand>
  236. struct SpecificNaNBits
  237. {
  238. using Traits = FloatingPoint<T>;
  239. static_assert(SignBit == 0 || SignBit == 1, "bad sign bit");
  240. static_assert((Significand & ~Traits::kSignificandBits) == 0,
  241. "significand must only have significand bits set");
  242. static_assert(Significand & Traits::kSignificandBits,
  243. "significand must be nonzero");
  244. static constexpr typename Traits::Bits value =
  245. (SignBit * Traits::kSignBit) | Traits::kExponentBits | Significand;
  246. };
  247. /**
  248. * Constructs a NaN value with the specified sign bit and significand bits.
  249. *
  250. * There is also a variant that returns the value directly. In most cases, the
  251. * two variants should be identical. However, in the specific case of x86
  252. * chips, the behavior differs: returning floating-point values directly is done
  253. * through the x87 stack, and x87 loads and stores turn signaling NaNs into
  254. * quiet NaNs... silently. Returning floating-point values via outparam,
  255. * however, is done entirely within the SSE registers when SSE2 floating-point
  256. * is enabled in the compiler, which has semantics-preserving behavior you would
  257. * expect.
  258. *
  259. * If preserving the distinction between signaling NaNs and quiet NaNs is
  260. * important to you, you should use the outparam version. In all other cases,
  261. * you should use the direct return version.
  262. */
  263. template<typename T>
  264. static MOZ_ALWAYS_INLINE void
  265. SpecificNaN(int signbit, typename FloatingPoint<T>::Bits significand, T* result)
  266. {
  267. typedef FloatingPoint<T> Traits;
  268. MOZ_ASSERT(signbit == 0 || signbit == 1);
  269. MOZ_ASSERT((significand & ~Traits::kSignificandBits) == 0);
  270. MOZ_ASSERT(significand & Traits::kSignificandBits);
  271. BitwiseCast<T>((signbit ? Traits::kSignBit : 0) |
  272. Traits::kExponentBits |
  273. significand,
  274. result);
  275. MOZ_ASSERT(IsNaN(*result));
  276. }
  277. template<typename T>
  278. static MOZ_ALWAYS_INLINE T
  279. SpecificNaN(int signbit, typename FloatingPoint<T>::Bits significand)
  280. {
  281. T t;
  282. SpecificNaN(signbit, significand, &t);
  283. return t;
  284. }
  285. /** Computes the smallest non-zero positive float/double value. */
  286. template<typename T>
  287. static MOZ_ALWAYS_INLINE T
  288. MinNumberValue()
  289. {
  290. typedef FloatingPoint<T> Traits;
  291. typedef typename Traits::Bits Bits;
  292. return BitwiseCast<T>(Bits(1));
  293. }
  294. /**
  295. * If aValue is equal to some int32_t value, set *aInt32 to that value and
  296. * return true; otherwise return false.
  297. *
  298. * Note that negative zero is "equal" to zero here. To test whether a value can
  299. * be losslessly converted to int32_t and back, use NumberIsInt32 instead.
  300. */
  301. template<typename T>
  302. static MOZ_ALWAYS_INLINE bool
  303. NumberEqualsInt32(T aValue, int32_t* aInt32)
  304. {
  305. /*
  306. * XXX Casting a floating-point value that doesn't truncate to int32_t, to
  307. * int32_t, induces undefined behavior. We should definitely fix this
  308. * (bug 744965), but as apparently it "works" in practice, it's not a
  309. * pressing concern now.
  310. */
  311. return aValue == (*aInt32 = int32_t(aValue));
  312. }
  313. /**
  314. * If d can be converted to int32_t and back to an identical double value,
  315. * set *aInt32 to that value and return true; otherwise return false.
  316. *
  317. * The difference between this and NumberEqualsInt32 is that this method returns
  318. * false for negative zero.
  319. */
  320. template<typename T>
  321. static MOZ_ALWAYS_INLINE bool
  322. NumberIsInt32(T aValue, int32_t* aInt32)
  323. {
  324. return !IsNegativeZero(aValue) && NumberEqualsInt32(aValue, aInt32);
  325. }
  326. /**
  327. * Computes a NaN value. Do not use this method if you depend upon a particular
  328. * NaN value being returned.
  329. */
  330. template<typename T>
  331. static MOZ_ALWAYS_INLINE T
  332. UnspecifiedNaN()
  333. {
  334. /*
  335. * If we can use any quiet NaN, we might as well use the all-ones NaN,
  336. * since it's cheap to materialize on common platforms (such as x64, where
  337. * this value can be represented in a 32-bit signed immediate field, allowing
  338. * it to be stored to memory in a single instruction).
  339. */
  340. typedef FloatingPoint<T> Traits;
  341. return SpecificNaN<T>(1, Traits::kSignificandBits);
  342. }
  343. /**
  344. * Compare two doubles for equality, *without* equating -0 to +0, and equating
  345. * any NaN value to any other NaN value. (The normal equality operators equate
  346. * -0 with +0, and they equate NaN to no other value.)
  347. */
  348. template<typename T>
  349. static inline bool
  350. NumbersAreIdentical(T aValue1, T aValue2)
  351. {
  352. typedef FloatingPoint<T> Traits;
  353. typedef typename Traits::Bits Bits;
  354. if (IsNaN(aValue1)) {
  355. return IsNaN(aValue2);
  356. }
  357. return BitwiseCast<Bits>(aValue1) == BitwiseCast<Bits>(aValue2);
  358. }
  359. namespace detail {
  360. template<typename T>
  361. struct FuzzyEqualsEpsilon;
  362. template<>
  363. struct FuzzyEqualsEpsilon<float>
  364. {
  365. // A number near 1e-5 that is exactly representable in a float.
  366. static float value() { return 1.0f / (1 << 17); }
  367. };
  368. template<>
  369. struct FuzzyEqualsEpsilon<double>
  370. {
  371. // A number near 1e-12 that is exactly representable in a double.
  372. static double value() { return 1.0 / (1LL << 40); }
  373. };
  374. } // namespace detail
  375. /**
  376. * Compare two floating point values for equality, modulo rounding error. That
  377. * is, the two values are considered equal if they are both not NaN and if they
  378. * are less than or equal to aEpsilon apart. The default value of aEpsilon is
  379. * near 1e-5.
  380. *
  381. * For most scenarios you will want to use FuzzyEqualsMultiplicative instead,
  382. * as it is more reasonable over the entire range of floating point numbers.
  383. * This additive version should only be used if you know the range of the
  384. * numbers you are dealing with is bounded and stays around the same order of
  385. * magnitude.
  386. */
  387. template<typename T>
  388. static MOZ_ALWAYS_INLINE bool
  389. FuzzyEqualsAdditive(T aValue1, T aValue2,
  390. T aEpsilon = detail::FuzzyEqualsEpsilon<T>::value())
  391. {
  392. static_assert(IsFloatingPoint<T>::value, "floating point type required");
  393. return Abs(aValue1 - aValue2) <= aEpsilon;
  394. }
  395. /**
  396. * Compare two floating point values for equality, allowing for rounding error
  397. * relative to the magnitude of the values. That is, the two values are
  398. * considered equal if they are both not NaN and they are less than or equal to
  399. * some aEpsilon apart, where the aEpsilon is scaled by the smaller of the two
  400. * argument values.
  401. *
  402. * In most cases you will want to use this rather than FuzzyEqualsAdditive, as
  403. * this function effectively masks out differences in the bottom few bits of
  404. * the floating point numbers being compared, regardless of what order of
  405. * magnitude those numbers are at.
  406. */
  407. template<typename T>
  408. static MOZ_ALWAYS_INLINE bool
  409. FuzzyEqualsMultiplicative(T aValue1, T aValue2,
  410. T aEpsilon = detail::FuzzyEqualsEpsilon<T>::value())
  411. {
  412. static_assert(IsFloatingPoint<T>::value, "floating point type required");
  413. // can't use std::min because of bug 965340
  414. T smaller = Abs(aValue1) < Abs(aValue2) ? Abs(aValue1) : Abs(aValue2);
  415. return Abs(aValue1 - aValue2) <= aEpsilon * smaller;
  416. }
  417. /**
  418. * Returns true if the given value can be losslessly represented as an IEEE-754
  419. * single format number, false otherwise. All NaN values are considered
  420. * representable (notwithstanding that the exact bit pattern of a double format
  421. * NaN value can't be exactly represented in single format).
  422. *
  423. * This function isn't inlined to avoid buggy optimizations by MSVC.
  424. */
  425. MOZ_MUST_USE
  426. extern MFBT_API bool
  427. IsFloat32Representable(double aFloat32);
  428. } /* namespace mozilla */
  429. #endif /* mozilla_FloatingPoint_h */